--- /dev/null
+From e817ab2c6014a79269c881ba6704088a67795ad2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 17 Nov 2022 17:11:59 +0800
+Subject: 9p/fd: Use P9_HDRSZ for header size
+
+From: GUO Zihua <guozihua@huawei.com>
+
+[ Upstream commit 6854fadbeee10891ed74246bdc05031906b6c8cf ]
+
+Cleanup hardcoded header sizes to use P9_HDRSZ instead of '7'
+
+Link: https://lkml.kernel.org/r/20221117091159.31533-4-guozihua@huawei.com
+Signed-off-by: GUO Zihua <guozihua@huawei.com>
+Reviewed-by: Christian Schoenebeck <linux_oss@crudebyte.com>
+[Dominique: commit message adjusted to make sense after offset size
+adjustment got removed]
+Signed-off-by: Dominique Martinet <asmadeus@codewreck.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/9p/trans_fd.c | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
+index deb66635f0f3..e070a0b8e5ca 100644
+--- a/net/9p/trans_fd.c
++++ b/net/9p/trans_fd.c
+@@ -118,7 +118,7 @@ struct p9_conn {
+ struct list_head unsent_req_list;
+ struct p9_req_t *rreq;
+ struct p9_req_t *wreq;
+- char tmp_buf[7];
++ char tmp_buf[P9_HDRSZ];
+ struct p9_fcall rc;
+ int wpos;
+ int wsize;
+@@ -291,7 +291,7 @@ static void p9_read_work(struct work_struct *work)
+ if (!m->rc.sdata) {
+ m->rc.sdata = m->tmp_buf;
+ m->rc.offset = 0;
+- m->rc.capacity = 7; /* start by reading header */
++ m->rc.capacity = P9_HDRSZ; /* start by reading header */
+ }
+
+ clear_bit(Rpending, &m->wsched);
+@@ -314,7 +314,7 @@ static void p9_read_work(struct work_struct *work)
+ p9_debug(P9_DEBUG_TRANS, "got new header\n");
+
+ /* Header size */
+- m->rc.size = 7;
++ m->rc.size = P9_HDRSZ;
+ err = p9_parse_header(&m->rc, &m->rc.size, NULL, NULL, 0);
+ if (err) {
+ p9_debug(P9_DEBUG_ERROR,
+--
+2.35.1
+
--- /dev/null
+From b5b32fd794cbd41eaa5fbcace2be39beef2eea56 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 18 Nov 2022 22:44:41 +0900
+Subject: 9p/xen: check logical size for buffer size
+
+From: Dominique Martinet <asmadeus@codewreck.org>
+
+[ Upstream commit 391c18cf776eb4569ecda1f7794f360fe0a45a26 ]
+
+trans_xen did not check the data fits into the buffer before copying
+from the xen ring, but we probably should.
+Add a check that just skips the request and return an error to
+userspace if it did not fit
+
+Tested-by: Stefano Stabellini <sstabellini@kernel.org>
+Reviewed-by: Christian Schoenebeck <linux_oss@crudebyte.com>
+Link: https://lkml.kernel.org/r/20221118135542.63400-1-asmadeus@codewreck.org
+Signed-off-by: Dominique Martinet <asmadeus@codewreck.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/9p/trans_xen.c | 9 +++++++++
+ 1 file changed, 9 insertions(+)
+
+diff --git a/net/9p/trans_xen.c b/net/9p/trans_xen.c
+index 432ac5a16f2e..6c8a33f98f09 100644
+--- a/net/9p/trans_xen.c
++++ b/net/9p/trans_xen.c
+@@ -231,6 +231,14 @@ static void p9_xen_response(struct work_struct *work)
+ continue;
+ }
+
++ if (h.size > req->rc.capacity) {
++ dev_warn(&priv->dev->dev,
++ "requested packet size too big: %d for tag %d with capacity %zd\n",
++ h.size, h.tag, req->rc.capacity);
++ req->status = REQ_STATUS_ERROR;
++ goto recv_error;
++ }
++
+ memcpy(&req->rc, &h, sizeof(h));
+ req->rc.offset = 0;
+
+@@ -240,6 +248,7 @@ static void p9_xen_response(struct work_struct *work)
+ masked_prod, &masked_cons,
+ XEN_9PFS_RING_SIZE(ring));
+
++recv_error:
+ virt_mb();
+ cons += h.size;
+ ring->intf->in_cons = cons;
+--
+2.35.1
+
--- /dev/null
+From 621d8a45725353c81804715e0391a1762b803b4e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 18 Nov 2022 15:23:50 -0800
+Subject: ALSA: seq: Fix function prototype mismatch in
+ snd_seq_expand_var_event
+
+From: Kees Cook <keescook@chromium.org>
+
+[ Upstream commit 05530ef7cf7c7d700f6753f058999b1b5099a026 ]
+
+With clang's kernel control flow integrity (kCFI, CONFIG_CFI_CLANG),
+indirect call targets are validated against the expected function
+pointer prototype to make sure the call target is valid to help mitigate
+ROP attacks. If they are not identical, there is a failure at run time,
+which manifests as either a kernel panic or thread getting killed.
+
+seq_copy_in_user() and seq_copy_in_kernel() did not have prototypes
+matching snd_seq_dump_func_t. Adjust this and remove the casts. There
+are not resulting binary output differences.
+
+This was found as a result of Clang's new -Wcast-function-type-strict
+flag, which is more sensitive than the simpler -Wcast-function-type,
+which only checks for type width mismatches.
+
+Reported-by: kernel test robot <lkp@intel.com>
+Link: https://lore.kernel.org/lkml/202211041527.HD8TLSE1-lkp@intel.com
+Cc: Jaroslav Kysela <perex@perex.cz>
+Cc: Takashi Iwai <tiwai@suse.com>
+Cc: "Gustavo A. R. Silva" <gustavoars@kernel.org>
+Cc: alsa-devel@alsa-project.org
+Signed-off-by: Kees Cook <keescook@chromium.org>
+Link: https://lore.kernel.org/r/20221118232346.never.380-kees@kernel.org
+Signed-off-by: Takashi Iwai <tiwai@suse.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ sound/core/seq/seq_memory.c | 11 +++++++----
+ 1 file changed, 7 insertions(+), 4 deletions(-)
+
+diff --git a/sound/core/seq/seq_memory.c b/sound/core/seq/seq_memory.c
+index 65db1a7c77b7..bb76a2dd0a2f 100644
+--- a/sound/core/seq/seq_memory.c
++++ b/sound/core/seq/seq_memory.c
+@@ -112,15 +112,19 @@ EXPORT_SYMBOL(snd_seq_dump_var_event);
+ * expand the variable length event to linear buffer space.
+ */
+
+-static int seq_copy_in_kernel(char **bufptr, const void *src, int size)
++static int seq_copy_in_kernel(void *ptr, void *src, int size)
+ {
++ char **bufptr = ptr;
++
+ memcpy(*bufptr, src, size);
+ *bufptr += size;
+ return 0;
+ }
+
+-static int seq_copy_in_user(char __user **bufptr, const void *src, int size)
++static int seq_copy_in_user(void *ptr, void *src, int size)
+ {
++ char __user **bufptr = ptr;
++
+ if (copy_to_user(*bufptr, src, size))
+ return -EFAULT;
+ *bufptr += size;
+@@ -149,8 +153,7 @@ int snd_seq_expand_var_event(const struct snd_seq_event *event, int count, char
+ return newlen;
+ }
+ err = snd_seq_dump_var_event(event,
+- in_kernel ? (snd_seq_dump_func_t)seq_copy_in_kernel :
+- (snd_seq_dump_func_t)seq_copy_in_user,
++ in_kernel ? seq_copy_in_kernel : seq_copy_in_user,
+ &buf);
+ return err < 0 ? err : newlen;
+ }
+--
+2.35.1
+
--- /dev/null
+From 8b3c105761d6cae81a31e82cdbda80b22fb4c1b7 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 26 Sep 2022 16:09:12 +0100
+Subject: ARM: 9251/1: perf: Fix stacktraces for tracepoint events in THUMB2
+ kernels
+
+From: Tomislav Novak <tnovak@fb.com>
+
+[ Upstream commit 612695bccfdbd52004551308a55bae410e7cd22f ]
+
+Store the frame address where arm_get_current_stackframe() looks for it
+(ARM_r7 instead of ARM_fp if CONFIG_THUMB2_KERNEL=y). Otherwise frame->fp
+gets set to 0, causing unwind_frame() to fail.
+
+ # bpftrace -e 't:sched:sched_switch { @[kstack] = count(); exit(); }'
+ Attaching 1 probe...
+ @[
+ __schedule+1059
+ ]: 1
+
+A typical first unwind instruction is 0x97 (SP = R7), so after executing
+it SP ends up being 0 and -URC_FAILURE is returned.
+
+ unwind_frame(pc = ac9da7d7 lr = 00000000 sp = c69bdda0 fp = 00000000)
+ unwind_find_idx(ac9da7d7)
+ unwind_exec_insn: insn = 00000097
+ unwind_exec_insn: fp = 00000000 sp = 00000000 lr = 00000000 pc = 00000000
+
+With this patch:
+
+ # bpftrace -e 't:sched:sched_switch { @[kstack] = count(); exit(); }'
+ Attaching 1 probe...
+ @[
+ __schedule+1059
+ __schedule+1059
+ schedule+79
+ schedule_hrtimeout_range_clock+163
+ schedule_hrtimeout_range+17
+ ep_poll+471
+ SyS_epoll_wait+111
+ sys_epoll_pwait+231
+ __ret_fast_syscall+1
+ ]: 1
+
+Link: https://lore.kernel.org/r/20220920230728.2617421-1-tnovak@fb.com/
+
+Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
+Signed-off-by: Tomislav Novak <tnovak@fb.com>
+Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/arm/include/asm/perf_event.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/arch/arm/include/asm/perf_event.h b/arch/arm/include/asm/perf_event.h
+index fe87397c3d8c..bdbc1e590891 100644
+--- a/arch/arm/include/asm/perf_event.h
++++ b/arch/arm/include/asm/perf_event.h
+@@ -17,7 +17,7 @@ extern unsigned long perf_misc_flags(struct pt_regs *regs);
+
+ #define perf_arch_fetch_caller_regs(regs, __ip) { \
+ (regs)->ARM_pc = (__ip); \
+- (regs)->ARM_fp = (unsigned long) __builtin_frame_address(0); \
++ frame_pointer((regs)) = (unsigned long) __builtin_frame_address(0); \
+ (regs)->ARM_sp = current_stack_pointer; \
+ (regs)->ARM_cpsr = SVC_MODE; \
+ }
+--
+2.35.1
+
--- /dev/null
+From 528c862595d7d443fc942544454319823b2e5810 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 4 Nov 2022 21:46:18 +0100
+Subject: ARM: 9266/1: mm: fix no-MMU ZERO_PAGE() implementation
+
+From: Giulio Benetti <giulio.benetti@benettiengineering.com>
+
+[ Upstream commit 340a982825f76f1cff0daa605970fe47321b5ee7 ]
+
+Actually in no-MMU SoCs(i.e. i.MXRT) ZERO_PAGE(vaddr) expands to
+```
+virt_to_page(0)
+```
+that in order expands to:
+```
+pfn_to_page(virt_to_pfn(0))
+```
+and then virt_to_pfn(0) to:
+```
+ ((((unsigned long)(0) - PAGE_OFFSET) >> PAGE_SHIFT) +
+ PHYS_PFN_OFFSET)
+```
+where PAGE_OFFSET and PHYS_PFN_OFFSET are the DRAM offset(0x80000000) and
+PAGE_SHIFT is 12. This way we obtain 16MB(0x01000000) summed to the base of
+DRAM(0x80000000).
+When ZERO_PAGE(0) is then used, for example in bio_add_page(), the page
+gets an address that is out of DRAM bounds.
+So instead of using fake virtual page 0 let's allocate a dedicated
+zero_page during paging_init() and assign it to a global 'struct page *
+empty_zero_page' the same way mmu.c does and it's the same approach used
+in m68k with commit dc068f462179 as discussed here[0]. Then let's move
+ZERO_PAGE() definition to the top of pgtable.h to be in common between
+mmu.c and nommu.c.
+
+[0]: https://lore.kernel.org/linux-m68k/2a462b23-5b8e-bbf4-ec7d-778434a3b9d7@google.com/T/#m1266ceb63
+ad140743174d6b3070364d3c9a5179b
+
+Signed-off-by: Giulio Benetti <giulio.benetti@benettiengineering.com>
+Reviewed-by: Arnd Bergmann <arnd@arndb.de>
+Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/arm/include/asm/pgtable-nommu.h | 6 ------
+ arch/arm/include/asm/pgtable.h | 16 +++++++++-------
+ arch/arm/mm/nommu.c | 19 +++++++++++++++++++
+ 3 files changed, 28 insertions(+), 13 deletions(-)
+
+diff --git a/arch/arm/include/asm/pgtable-nommu.h b/arch/arm/include/asm/pgtable-nommu.h
+index d16aba48fa0a..090011394477 100644
+--- a/arch/arm/include/asm/pgtable-nommu.h
++++ b/arch/arm/include/asm/pgtable-nommu.h
+@@ -44,12 +44,6 @@
+
+ typedef pte_t *pte_addr_t;
+
+-/*
+- * ZERO_PAGE is a global shared page that is always zero: used
+- * for zero-mapped memory areas etc..
+- */
+-#define ZERO_PAGE(vaddr) (virt_to_page(0))
+-
+ /*
+ * Mark the prot value as uncacheable and unbufferable.
+ */
+diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h
+index c02f24400369..d38d503493cb 100644
+--- a/arch/arm/include/asm/pgtable.h
++++ b/arch/arm/include/asm/pgtable.h
+@@ -10,6 +10,15 @@
+ #include <linux/const.h>
+ #include <asm/proc-fns.h>
+
++#ifndef __ASSEMBLY__
++/*
++ * ZERO_PAGE is a global shared page that is always zero: used
++ * for zero-mapped memory areas etc..
++ */
++extern struct page *empty_zero_page;
++#define ZERO_PAGE(vaddr) (empty_zero_page)
++#endif
++
+ #ifndef CONFIG_MMU
+
+ #include <asm-generic/pgtable-nopud.h>
+@@ -156,13 +165,6 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
+ #define __S111 __PAGE_SHARED_EXEC
+
+ #ifndef __ASSEMBLY__
+-/*
+- * ZERO_PAGE is a global shared page that is always zero: used
+- * for zero-mapped memory areas etc..
+- */
+-extern struct page *empty_zero_page;
+-#define ZERO_PAGE(vaddr) (empty_zero_page)
+-
+
+ extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
+
+diff --git a/arch/arm/mm/nommu.c b/arch/arm/mm/nommu.c
+index 8b3d7191e2b8..959f05701738 100644
+--- a/arch/arm/mm/nommu.c
++++ b/arch/arm/mm/nommu.c
+@@ -26,6 +26,13 @@
+
+ unsigned long vectors_base;
+
++/*
++ * empty_zero_page is a special page that is used for
++ * zero-initialized data and COW.
++ */
++struct page *empty_zero_page;
++EXPORT_SYMBOL(empty_zero_page);
++
+ #ifdef CONFIG_ARM_MPU
+ struct mpu_rgn_info mpu_rgn_info;
+ #endif
+@@ -148,9 +155,21 @@ void __init adjust_lowmem_bounds(void)
+ */
+ void __init paging_init(const struct machine_desc *mdesc)
+ {
++ void *zero_page;
++
+ early_trap_init((void *)vectors_base);
+ mpu_setup();
++
++ /* allocate the zero page. */
++ zero_page = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
++ if (!zero_page)
++ panic("%s: Failed to allocate %lu bytes align=0x%lx\n",
++ __func__, PAGE_SIZE, PAGE_SIZE);
++
+ bootmem_init();
++
++ empty_zero_page = virt_to_page(zero_page);
++ flush_dcache_page(empty_zero_page);
+ }
+
+ /*
+--
+2.35.1
+
--- /dev/null
+From d4c7c13a6cd1aed237bee68b3450766db819e289 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 30 Oct 2022 21:56:29 +0100
+Subject: ARM: dts: rockchip: disable arm_global_timer on rk3066 and rk3188
+
+From: Johan Jonker <jbx6244@gmail.com>
+
+[ Upstream commit da74858a475782a3f16470907814c8cc5950ad68 ]
+
+The clock source and the sched_clock provided by the arm_global_timer
+on Rockchip rk3066a/rk3188 are quite unstable because their rates
+depend on the CPU frequency.
+
+Recent changes to the arm_global_timer driver makes it impossible to use.
+
+On the other side, the arm_global_timer has a higher rating than the
+ROCKCHIP_TIMER, it will be selected by default by the time framework
+while we want to use the stable Rockchip clock source.
+
+Keep the arm_global_timer disabled in order to have the
+DW_APB_TIMER (rk3066a) or ROCKCHIP_TIMER (rk3188) selected by default.
+
+Signed-off-by: Johan Jonker <jbx6244@gmail.com>
+Link: https://lore.kernel.org/r/f275ca8d-fd0a-26e5-b978-b7f3df815e0a@gmail.com
+Signed-off-by: Heiko Stuebner <heiko@sntech.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/arm/boot/dts/rk3188.dtsi | 1 -
+ arch/arm/boot/dts/rk3xxx.dtsi | 7 +++++++
+ 2 files changed, 7 insertions(+), 1 deletion(-)
+
+diff --git a/arch/arm/boot/dts/rk3188.dtsi b/arch/arm/boot/dts/rk3188.dtsi
+index a837a9a34e3e..ddf23748ead4 100644
+--- a/arch/arm/boot/dts/rk3188.dtsi
++++ b/arch/arm/boot/dts/rk3188.dtsi
+@@ -630,7 +630,6 @@
+
+ &global_timer {
+ interrupts = <GIC_PPI 11 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_EDGE_RISING)>;
+- status = "disabled";
+ };
+
+ &local_timer {
+diff --git a/arch/arm/boot/dts/rk3xxx.dtsi b/arch/arm/boot/dts/rk3xxx.dtsi
+index 859a7477909f..5edc46a5585c 100644
+--- a/arch/arm/boot/dts/rk3xxx.dtsi
++++ b/arch/arm/boot/dts/rk3xxx.dtsi
+@@ -111,6 +111,13 @@
+ reg = <0x1013c200 0x20>;
+ interrupts = <GIC_PPI 11 (GIC_CPU_MASK_SIMPLE(2) | IRQ_TYPE_EDGE_RISING)>;
+ clocks = <&cru CORE_PERI>;
++ status = "disabled";
++ /* The clock source and the sched_clock provided by the arm_global_timer
++ * on Rockchip rk3066a/rk3188 are quite unstable because their rates
++ * depend on the CPU frequency.
++ * Keep the arm_global_timer disabled in order to have the
++ * DW_APB_TIMER (rk3066a) or ROCKCHIP_TIMER (rk3188) selected by default.
++ */
+ };
+
+ local_timer: local-timer@1013c600 {
+--
+2.35.1
+
--- /dev/null
+From 5b1b99a1508cbef88005366b4a729d2514113d53 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 27 Oct 2022 10:58:22 +0200
+Subject: ARM: dts: rockchip: fix ir-receiver node names
+
+From: Johan Jonker <jbx6244@gmail.com>
+
+[ Upstream commit dd847fe34cdf1e89afed1af24986359f13082bfb ]
+
+Fix ir-receiver node names on Rockchip boards,
+so that they match with regex: '^ir(-receiver)?(@[a-f0-9]+)?$'
+
+Signed-off-by: Johan Jonker <jbx6244@gmail.com>
+Link: https://lore.kernel.org/r/ea5af279-f44c-afea-023d-bb37f5a0d58d@gmail.com
+Signed-off-by: Heiko Stuebner <heiko@sntech.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/arm/boot/dts/rk3188-radxarock.dts | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/arch/arm/boot/dts/rk3188-radxarock.dts b/arch/arm/boot/dts/rk3188-radxarock.dts
+index b0fef82c0a71..39b913f8d701 100644
+--- a/arch/arm/boot/dts/rk3188-radxarock.dts
++++ b/arch/arm/boot/dts/rk3188-radxarock.dts
+@@ -67,7 +67,7 @@
+ #sound-dai-cells = <0>;
+ };
+
+- ir_recv: gpio-ir-receiver {
++ ir_recv: ir-receiver {
+ compatible = "gpio-ir-receiver";
+ gpios = <&gpio0 RK_PB2 GPIO_ACTIVE_LOW>;
+ pinctrl-names = "default";
+--
+2.35.1
+
--- /dev/null
+From 163ce1b02852814851e67f197a271748daab03c7 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 24 Oct 2022 18:55:46 +0200
+Subject: arm: dts: rockchip: fix node name for hym8563 rtc
+
+From: Sebastian Reichel <sebastian.reichel@collabora.com>
+
+[ Upstream commit 17b57beafccb4569accbfc8c11390744cf59c021 ]
+
+Fix the node name for hym8563 in all arm rockchip devicetrees.
+
+Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
+Link: https://lore.kernel.org/r/20221024165549.74574-4-sebastian.reichel@collabora.com
+Signed-off-by: Heiko Stuebner <heiko@sntech.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/arm/boot/dts/rk3036-evb.dts | 2 +-
+ arch/arm/boot/dts/rk3288-evb-act8846.dts | 2 +-
+ arch/arm/boot/dts/rk3288-firefly.dtsi | 2 +-
+ arch/arm/boot/dts/rk3288-miqi.dts | 2 +-
+ arch/arm/boot/dts/rk3288-rock2-square.dts | 2 +-
+ 5 files changed, 5 insertions(+), 5 deletions(-)
+
+diff --git a/arch/arm/boot/dts/rk3036-evb.dts b/arch/arm/boot/dts/rk3036-evb.dts
+index 2a7e6624efb9..ea23ba98625e 100644
+--- a/arch/arm/boot/dts/rk3036-evb.dts
++++ b/arch/arm/boot/dts/rk3036-evb.dts
+@@ -31,7 +31,7 @@
+ &i2c1 {
+ status = "okay";
+
+- hym8563: hym8563@51 {
++ hym8563: rtc@51 {
+ compatible = "haoyu,hym8563";
+ reg = <0x51>;
+ #clock-cells = <0>;
+diff --git a/arch/arm/boot/dts/rk3288-evb-act8846.dts b/arch/arm/boot/dts/rk3288-evb-act8846.dts
+index be695b8c1f67..8a635c243127 100644
+--- a/arch/arm/boot/dts/rk3288-evb-act8846.dts
++++ b/arch/arm/boot/dts/rk3288-evb-act8846.dts
+@@ -54,7 +54,7 @@
+ vin-supply = <&vcc_sys>;
+ };
+
+- hym8563@51 {
++ rtc@51 {
+ compatible = "haoyu,hym8563";
+ reg = <0x51>;
+
+diff --git a/arch/arm/boot/dts/rk3288-firefly.dtsi b/arch/arm/boot/dts/rk3288-firefly.dtsi
+index 7fb582302b32..c560afe3af78 100644
+--- a/arch/arm/boot/dts/rk3288-firefly.dtsi
++++ b/arch/arm/boot/dts/rk3288-firefly.dtsi
+@@ -233,7 +233,7 @@
+ vin-supply = <&vcc_sys>;
+ };
+
+- hym8563: hym8563@51 {
++ hym8563: rtc@51 {
+ compatible = "haoyu,hym8563";
+ reg = <0x51>;
+ #clock-cells = <0>;
+diff --git a/arch/arm/boot/dts/rk3288-miqi.dts b/arch/arm/boot/dts/rk3288-miqi.dts
+index cf54d5ffff2f..fe265a834e8e 100644
+--- a/arch/arm/boot/dts/rk3288-miqi.dts
++++ b/arch/arm/boot/dts/rk3288-miqi.dts
+@@ -157,7 +157,7 @@
+ vin-supply = <&vcc_sys>;
+ };
+
+- hym8563: hym8563@51 {
++ hym8563: rtc@51 {
+ compatible = "haoyu,hym8563";
+ reg = <0x51>;
+ #clock-cells = <0>;
+diff --git a/arch/arm/boot/dts/rk3288-rock2-square.dts b/arch/arm/boot/dts/rk3288-rock2-square.dts
+index c4d1d142d8c6..d5ef99ebbddc 100644
+--- a/arch/arm/boot/dts/rk3288-rock2-square.dts
++++ b/arch/arm/boot/dts/rk3288-rock2-square.dts
+@@ -165,7 +165,7 @@
+ };
+
+ &i2c0 {
+- hym8563: hym8563@51 {
++ hym8563: rtc@51 {
+ compatible = "haoyu,hym8563";
+ reg = <0x51>;
+ #clock-cells = <0>;
+--
+2.35.1
+
--- /dev/null
+From e5912fb7eb097679379662e4ac19cadcaf8ba3d5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 27 Oct 2022 01:31:37 +0200
+Subject: ARM: dts: rockchip: rk3188: fix lcdc1-rgb24 node name
+
+From: Johan Jonker <jbx6244@gmail.com>
+
+[ Upstream commit 11871e20bcb23c00966e785a124fb72bc8340af4 ]
+
+The lcdc1-rgb24 node name is out of line with the rest
+of the rk3188 lcdc1 node, so fix it.
+
+Signed-off-by: Johan Jonker <jbx6244@gmail.com>
+Link: https://lore.kernel.org/r/7b9c0a6f-626b-07e8-ae74-7e0f08b8d241@gmail.com
+Signed-off-by: Heiko Stuebner <heiko@sntech.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/arm/boot/dts/rk3188.dtsi | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/arch/arm/boot/dts/rk3188.dtsi b/arch/arm/boot/dts/rk3188.dtsi
+index b6bde9d12c2b..a837a9a34e3e 100644
+--- a/arch/arm/boot/dts/rk3188.dtsi
++++ b/arch/arm/boot/dts/rk3188.dtsi
+@@ -402,7 +402,7 @@
+ rockchip,pins = <2 RK_PD3 1 &pcfg_pull_none>;
+ };
+
+- lcdc1_rgb24: ldcd1-rgb24 {
++ lcdc1_rgb24: lcdc1-rgb24 {
+ rockchip,pins = <2 RK_PA0 1 &pcfg_pull_none>,
+ <2 RK_PA1 1 &pcfg_pull_none>,
+ <2 RK_PA2 1 &pcfg_pull_none>,
+--
+2.35.1
+
--- /dev/null
+From 76fabdbab581205aaa7da7864a6f8c029f745509 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 27 Oct 2022 10:59:10 +0200
+Subject: arm64: dts: rockchip: fix ir-receiver node names
+
+From: Johan Jonker <jbx6244@gmail.com>
+
+[ Upstream commit de0d04b9780a23eb928aedfb6f981285f78d58e5 ]
+
+Fix ir-receiver node names on Rockchip boards,
+so that they match with regex: '^ir(-receiver)?(@[a-f0-9]+)?$'
+
+Signed-off-by: Johan Jonker <jbx6244@gmail.com>
+Link: https://lore.kernel.org/r/e9764253-8ce8-150b-4820-41f03f845469@gmail.com
+Signed-off-by: Heiko Stuebner <heiko@sntech.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/arm64/boot/dts/rockchip/rk3308-roc-cc.dts | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/arch/arm64/boot/dts/rockchip/rk3308-roc-cc.dts b/arch/arm64/boot/dts/rockchip/rk3308-roc-cc.dts
+index fbcb9531cc70..213c0759c4b8 100644
+--- a/arch/arm64/boot/dts/rockchip/rk3308-roc-cc.dts
++++ b/arch/arm64/boot/dts/rockchip/rk3308-roc-cc.dts
+@@ -13,7 +13,7 @@
+ stdout-path = "serial2:1500000n8";
+ };
+
+- ir_rx {
++ ir-receiver {
+ compatible = "gpio-ir-receiver";
+ gpios = <&gpio0 RK_PC0 GPIO_ACTIVE_HIGH>;
+ pinctrl-names = "default";
+--
+2.35.1
+
--- /dev/null
+From ffb3971e26f4690f2500ab9738472181039c199f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 24 Sep 2022 11:28:12 +0000
+Subject: arm64: dts: rockchip: keep I2S1 disabled for GPIO function on ROCK Pi
+ 4 series
+
+From: FUKAUMI Naoki <naoki@radxa.com>
+
+[ Upstream commit 849c19d14940b87332d5d59c7fc581d73f2099fd ]
+
+I2S1 pins are exposed on 40-pin header on Radxa ROCK Pi 4 series.
+their default function is GPIO, so I2S1 need to be disabled.
+
+Signed-off-by: FUKAUMI Naoki <naoki@radxa.com>
+Link: https://lore.kernel.org/r/20220924112812.1219-1-naoki@radxa.com
+Signed-off-by: Heiko Stuebner <heiko@sntech.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dtsi | 1 -
+ 1 file changed, 1 deletion(-)
+
+diff --git a/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dtsi
+index f121203081b9..64df64339119 100644
+--- a/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dtsi
++++ b/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dtsi
+@@ -448,7 +448,6 @@
+ &i2s1 {
+ rockchip,playback-channels = <2>;
+ rockchip,capture-channels = <2>;
+- status = "okay";
+ };
+
+ &i2s2 {
+--
+2.35.1
+
--- /dev/null
+From c21ed1784175f9bc14e302b6c710a05b1b188fce Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 22 Nov 2022 12:01:13 +0530
+Subject: ASoC: soc-pcm: Add NULL check in BE reparenting
+
+From: Srinivasa Rao Mandadapu <quic_srivasam@quicinc.com>
+
+[ Upstream commit db8f91d424fe0ea6db337aca8bc05908bbce1498 ]
+
+Add NULL check in dpcm_be_reparent API, to handle
+kernel NULL pointer dereference error.
+The issue occurred in fuzzing test.
+
+Signed-off-by: Srinivasa Rao Mandadapu <quic_srivasam@quicinc.com>
+Link: https://lore.kernel.org/r/1669098673-29703-1-git-send-email-quic_srivasam@quicinc.com
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ sound/soc/soc-pcm.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c
+index 0e2261ee07b6..fb874f924bbe 100644
+--- a/sound/soc/soc-pcm.c
++++ b/sound/soc/soc-pcm.c
+@@ -1154,6 +1154,8 @@ static void dpcm_be_reparent(struct snd_soc_pcm_runtime *fe,
+ return;
+
+ be_substream = snd_soc_dpcm_get_substream(be, stream);
++ if (!be_substream)
++ return;
+
+ for_each_dpcm_fe(be, stream, dpcm) {
+ if (dpcm->fe == fe)
+--
+2.35.1
+
--- /dev/null
+From 1515e509bb8fd1b1133f5c12ccfa326ea424a96e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 9 Nov 2022 20:13:54 +0800
+Subject: ASoC: wm8962: Wait for updated value of WM8962_CLOCKING1 register
+
+From: Chancel Liu <chancel.liu@nxp.com>
+
+[ Upstream commit 3ca507bf99611c82dafced73e921c1b10ee12869 ]
+
+DSPCLK_DIV field in WM8962_CLOCKING1 register is used to generate
+correct frequency of LRCLK and BCLK. Sometimes the read-only value
+can't be updated timely after enabling SYSCLK. This results in wrong
+calculation values. Delay is introduced here to wait for newest value
+from register. The time of the delay should be at least 500~1000us
+according to test.
+
+Signed-off-by: Chancel Liu <chancel.liu@nxp.com>
+Acked-by: Charles Keepax <ckeepax@opensource.cirrus.com>
+Link: https://lore.kernel.org/r/20221109121354.123958-1-chancel.liu@nxp.com
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ sound/soc/codecs/wm8962.c | 8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+diff --git a/sound/soc/codecs/wm8962.c b/sound/soc/codecs/wm8962.c
+index 21574447650c..57aeded978c2 100644
+--- a/sound/soc/codecs/wm8962.c
++++ b/sound/soc/codecs/wm8962.c
+@@ -2489,6 +2489,14 @@ static void wm8962_configure_bclk(struct snd_soc_component *component)
+ snd_soc_component_update_bits(component, WM8962_CLOCKING2,
+ WM8962_SYSCLK_ENA_MASK, WM8962_SYSCLK_ENA);
+
++ /* DSPCLK_DIV field in WM8962_CLOCKING1 register is used to generate
++ * correct frequency of LRCLK and BCLK. Sometimes the read-only value
++ * can't be updated timely after enabling SYSCLK. This results in wrong
++ * calculation values. Delay is introduced here to wait for newest
++ * value from register. The time of the delay should be at least
++ * 500~1000us according to test.
++ */
++ usleep_range(500, 1000);
+ dspclk = snd_soc_component_read(component, WM8962_CLOCKING1);
+
+ if (snd_soc_component_get_bias_level(component) != SND_SOC_BIAS_ON)
+--
+2.35.1
+
--- /dev/null
+From 6c94634e8e0ed4750f2342a16b20fd351585fc7f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 15 Nov 2022 16:29:44 +0000
+Subject: btrfs: send: avoid unaligned encoded writes when attempting to clone
+ range
+
+From: Filipe Manana <fdmanana@suse.com>
+
+[ Upstream commit a11452a3709e217492798cf3686ac2cc8eb3fb51 ]
+
+When trying to see if we can clone a file range, there are cases where we
+end up sending two write operations in case the inode from the source root
+has an i_size that is not sector size aligned and the length from the
+current offset to its i_size is less than the remaining length we are
+trying to clone.
+
+Issuing two write operations when we could instead issue a single write
+operation is not incorrect. However it is not optimal, specially if the
+extents are compressed and the flag BTRFS_SEND_FLAG_COMPRESSED was passed
+to the send ioctl. In that case we can end up sending an encoded write
+with an offset that is not sector size aligned, which makes the receiver
+fallback to decompressing the data and writing it using regular buffered
+IO (so re-compressing the data in case the fs is mounted with compression
+enabled), because encoded writes fail with -EINVAL when an offset is not
+sector size aligned.
+
+The following example, which triggered a bug in the receiver code for the
+fallback logic of decompressing + regular buffer IO and is fixed by the
+patchset referred in a Link at the bottom of this changelog, is an example
+where we have the non-optimal behaviour due to an unaligned encoded write:
+
+ $ cat test.sh
+ #!/bin/bash
+
+ DEV=/dev/sdj
+ MNT=/mnt/sdj
+
+ mkfs.btrfs -f $DEV > /dev/null
+ mount -o compress $DEV $MNT
+
+ # File foo has a size of 33K, not aligned to the sector size.
+ xfs_io -f -c "pwrite -S 0xab 0 33K" $MNT/foo
+
+ xfs_io -f -c "pwrite -S 0xcd 0 64K" $MNT/bar
+
+ # Now clone the first 32K of file bar into foo at offset 0.
+ xfs_io -c "reflink $MNT/bar 0 0 32K" $MNT/foo
+
+ # Snapshot the default subvolume and create a full send stream (v2).
+ btrfs subvolume snapshot -r $MNT $MNT/snap
+
+ btrfs send --compressed-data -f /tmp/test.send $MNT/snap
+
+ echo -e "\nFile bar in the original filesystem:"
+ od -A d -t x1 $MNT/snap/bar
+
+ umount $MNT
+ mkfs.btrfs -f $DEV > /dev/null
+ mount $DEV $MNT
+
+ echo -e "\nReceiving stream in a new filesystem..."
+ btrfs receive -f /tmp/test.send $MNT
+
+ echo -e "\nFile bar in the new filesystem:"
+ od -A d -t x1 $MNT/snap/bar
+
+ umount $MNT
+
+Before this patch, the send stream included one regular write and one
+encoded write for file 'bar', with the later being not sector size aligned
+and causing the receiver to fallback to decompression + buffered writes.
+The output of the btrfs receive command in verbose mode (-vvv):
+
+ (...)
+ mkfile o258-7-0
+ rename o258-7-0 -> bar
+ utimes
+ clone bar - source=foo source offset=0 offset=0 length=32768
+ write bar - offset=32768 length=1024
+ encoded_write bar - offset=33792, len=4096, unencoded_offset=33792, unencoded_file_len=31744, unencoded_len=65536, compression=1, encryption=0
+ encoded_write bar - falling back to decompress and write due to errno 22 ("Invalid argument")
+ (...)
+
+This patch avoids the regular write followed by an unaligned encoded write
+so that we end up sending a single encoded write that is aligned. So after
+this patch the stream content is (output of btrfs receive -vvv):
+
+ (...)
+ mkfile o258-7-0
+ rename o258-7-0 -> bar
+ utimes
+ clone bar - source=foo source offset=0 offset=0 length=32768
+ encoded_write bar - offset=32768, len=4096, unencoded_offset=32768, unencoded_file_len=32768, unencoded_len=65536, compression=1, encryption=0
+ (...)
+
+So we get more optimal behaviour and avoid the silent data loss bug in
+versions of btrfs-progs affected by the bug referred by the Link tag
+below (btrfs-progs v5.19, v5.19.1, v6.0 and v6.0.1).
+
+Link: https://lore.kernel.org/linux-btrfs/cover.1668529099.git.fdmanana@suse.com/
+Reviewed-by: Boris Burkov <boris@bur.io>
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/send.c | 24 +++++++++++++++++++++++-
+ 1 file changed, 23 insertions(+), 1 deletion(-)
+
+diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
+index 6b80dee17f49..4a6ba0997e39 100644
+--- a/fs/btrfs/send.c
++++ b/fs/btrfs/send.c
+@@ -5398,6 +5398,7 @@ static int clone_range(struct send_ctx *sctx,
+ u64 ext_len;
+ u64 clone_len;
+ u64 clone_data_offset;
++ bool crossed_src_i_size = false;
+
+ if (slot >= btrfs_header_nritems(leaf)) {
+ ret = btrfs_next_leaf(clone_root->root, path);
+@@ -5454,8 +5455,10 @@ static int clone_range(struct send_ctx *sctx,
+ if (key.offset >= clone_src_i_size)
+ break;
+
+- if (key.offset + ext_len > clone_src_i_size)
++ if (key.offset + ext_len > clone_src_i_size) {
+ ext_len = clone_src_i_size - key.offset;
++ crossed_src_i_size = true;
++ }
+
+ clone_data_offset = btrfs_file_extent_offset(leaf, ei);
+ if (btrfs_file_extent_disk_bytenr(leaf, ei) == disk_byte) {
+@@ -5515,6 +5518,25 @@ static int clone_range(struct send_ctx *sctx,
+ ret = send_clone(sctx, offset, clone_len,
+ clone_root);
+ }
++ } else if (crossed_src_i_size && clone_len < len) {
++ /*
++ * If we are at i_size of the clone source inode and we
++ * can not clone from it, terminate the loop. This is
++ * to avoid sending two write operations, one with a
++ * length matching clone_len and the final one after
++ * this loop with a length of len - clone_len.
++ *
++ * When using encoded writes (BTRFS_SEND_FLAG_COMPRESSED
++ * was passed to the send ioctl), this helps avoid
++ * sending an encoded write for an offset that is not
++ * sector size aligned, in case the i_size of the source
++ * inode is not sector size aligned. That will make the
++ * receiver fallback to decompression of the data and
++ * writing it using regular buffered IO, therefore while
++ * not incorrect, it's not optimal due decompression and
++ * possible re-compression at the receiver.
++ */
++ break;
+ } else {
+ ret = send_extent_data(sctx, offset, clone_len);
+ }
+--
+2.35.1
+
--- /dev/null
+From 23441e9af6d7908d5a0414e955ccd881e14d00d2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 18 Nov 2022 00:27:58 +0900
+Subject: fbcon: Use kzalloc() in fbcon_prepare_logo()
+
+From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
+
+[ Upstream commit a6a00d7e8ffd78d1cdb7a43f1278f081038c638f ]
+
+A kernel built with syzbot's config file reported that
+
+ scr_memcpyw(q, save, array3_size(logo_lines, new_cols, 2))
+
+causes uninitialized "save" to be copied.
+
+ ----------
+ [drm] Initialized vgem 1.0.0 20120112 for vgem on minor 0
+ [drm] Initialized vkms 1.0.0 20180514 for vkms on minor 1
+ Console: switching to colour frame buffer device 128x48
+ =====================================================
+ BUG: KMSAN: uninit-value in do_update_region+0x4b8/0xba0
+ do_update_region+0x4b8/0xba0
+ update_region+0x40d/0x840
+ fbcon_switch+0x3364/0x35e0
+ redraw_screen+0xae3/0x18a0
+ do_bind_con_driver+0x1cb3/0x1df0
+ do_take_over_console+0x11cb/0x13f0
+ fbcon_fb_registered+0xacc/0xfd0
+ register_framebuffer+0x1179/0x1320
+ __drm_fb_helper_initial_config_and_unlock+0x23ad/0x2b40
+ drm_fbdev_client_hotplug+0xbea/0xda0
+ drm_fbdev_generic_setup+0x65e/0x9d0
+ vkms_init+0x9f3/0xc76
+ (...snipped...)
+
+ Uninit was stored to memory at:
+ fbcon_prepare_logo+0x143b/0x1940
+ fbcon_init+0x2c1b/0x31c0
+ visual_init+0x3e7/0x820
+ do_bind_con_driver+0x14a4/0x1df0
+ do_take_over_console+0x11cb/0x13f0
+ fbcon_fb_registered+0xacc/0xfd0
+ register_framebuffer+0x1179/0x1320
+ __drm_fb_helper_initial_config_and_unlock+0x23ad/0x2b40
+ drm_fbdev_client_hotplug+0xbea/0xda0
+ drm_fbdev_generic_setup+0x65e/0x9d0
+ vkms_init+0x9f3/0xc76
+ (...snipped...)
+
+ Uninit was created at:
+ __kmem_cache_alloc_node+0xb69/0x1020
+ __kmalloc+0x379/0x680
+ fbcon_prepare_logo+0x704/0x1940
+ fbcon_init+0x2c1b/0x31c0
+ visual_init+0x3e7/0x820
+ do_bind_con_driver+0x14a4/0x1df0
+ do_take_over_console+0x11cb/0x13f0
+ fbcon_fb_registered+0xacc/0xfd0
+ register_framebuffer+0x1179/0x1320
+ __drm_fb_helper_initial_config_and_unlock+0x23ad/0x2b40
+ drm_fbdev_client_hotplug+0xbea/0xda0
+ drm_fbdev_generic_setup+0x65e/0x9d0
+ vkms_init+0x9f3/0xc76
+ (...snipped...)
+
+ CPU: 2 PID: 1 Comm: swapper/0 Not tainted 6.1.0-rc4-00356-g8f2975c2bb4c #924
+ Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006
+ ----------
+
+Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
+Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
+Link: https://patchwork.freedesktop.org/patch/msgid/cad03d25-0ea0-32c4-8173-fd1895314bce@I-love.SAKURA.ne.jp
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/video/fbdev/core/fbcon.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/video/fbdev/core/fbcon.c b/drivers/video/fbdev/core/fbcon.c
+index 2618d3beef64..27828435dd4f 100644
+--- a/drivers/video/fbdev/core/fbcon.c
++++ b/drivers/video/fbdev/core/fbcon.c
+@@ -609,7 +609,7 @@ static void fbcon_prepare_logo(struct vc_data *vc, struct fb_info *info,
+ if (scr_readw(r) != vc->vc_video_erase_char)
+ break;
+ if (r != q && new_rows >= rows + logo_lines) {
+- save = kmalloc(array3_size(logo_lines, new_cols, 2),
++ save = kzalloc(array3_size(logo_lines, new_cols, 2),
+ GFP_KERNEL);
+ if (save) {
+ int i = cols < new_cols ? cols : new_cols;
+--
+2.35.1
+
--- /dev/null
+From e5e26144a62a3037222e7eb80ffe8ed2d249ca26 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 7 Dec 2022 13:04:34 +0000
+Subject: media: videobuf2-core: take mmap_lock in vb2_get_unmapped_area()
+
+From: Hans Verkuil <hverkuil-cisco@xs4all.nl>
+
+[ Upstream commit 098e5edc5d048a8df8691fd9fde895af100be42b ]
+
+While vb2_mmap took the mmap_lock mutex, vb2_get_unmapped_area didn't.
+Add this.
+
+Also take this opportunity to move the 'q->memory != VB2_MEMORY_MMAP'
+check and vb2_fileio_is_active() check into __find_plane_by_offset() so
+both vb2_mmap and vb2_get_unmapped_area do the same checks.
+
+Since q->memory is checked while mmap_lock is held, also take that lock
+in reqbufs and create_bufs when it is set, and set it back to
+MEMORY_UNKNOWN on error.
+
+Fixes: f035eb4e976e ("[media] videobuf2: fix lockdep warning")
+Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
+Acked-by: Tomasz Figa <tfiga@chromium.org>
+Reviewed-by: Ricardo Ribalda <ribalda@chromium.org>
+Signed-off-by: Mauro Carvalho Chehab <mchehab@kernel.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../media/common/videobuf2/videobuf2-core.c | 102 +++++++++++++-----
+ 1 file changed, 73 insertions(+), 29 deletions(-)
+
+diff --git a/drivers/media/common/videobuf2/videobuf2-core.c b/drivers/media/common/videobuf2/videobuf2-core.c
+index 72350343a56a..3bafde87a125 100644
+--- a/drivers/media/common/videobuf2/videobuf2-core.c
++++ b/drivers/media/common/videobuf2/videobuf2-core.c
+@@ -787,7 +787,13 @@ int vb2_core_reqbufs(struct vb2_queue *q, enum vb2_memory memory,
+ num_buffers = max_t(unsigned int, *count, q->min_buffers_needed);
+ num_buffers = min_t(unsigned int, num_buffers, VB2_MAX_FRAME);
+ memset(q->alloc_devs, 0, sizeof(q->alloc_devs));
++ /*
++ * Set this now to ensure that drivers see the correct q->memory value
++ * in the queue_setup op.
++ */
++ mutex_lock(&q->mmap_lock);
+ q->memory = memory;
++ mutex_unlock(&q->mmap_lock);
+
+ /*
+ * Ask the driver how many buffers and planes per buffer it requires.
+@@ -796,22 +802,27 @@ int vb2_core_reqbufs(struct vb2_queue *q, enum vb2_memory memory,
+ ret = call_qop(q, queue_setup, q, &num_buffers, &num_planes,
+ plane_sizes, q->alloc_devs);
+ if (ret)
+- return ret;
++ goto error;
+
+ /* Check that driver has set sane values */
+- if (WARN_ON(!num_planes))
+- return -EINVAL;
++ if (WARN_ON(!num_planes)) {
++ ret = -EINVAL;
++ goto error;
++ }
+
+ for (i = 0; i < num_planes; i++)
+- if (WARN_ON(!plane_sizes[i]))
+- return -EINVAL;
++ if (WARN_ON(!plane_sizes[i])) {
++ ret = -EINVAL;
++ goto error;
++ }
+
+ /* Finally, allocate buffers and video memory */
+ allocated_buffers =
+ __vb2_queue_alloc(q, memory, num_buffers, num_planes, plane_sizes);
+ if (allocated_buffers == 0) {
+ dprintk(q, 1, "memory allocation failed\n");
+- return -ENOMEM;
++ ret = -ENOMEM;
++ goto error;
+ }
+
+ /*
+@@ -852,7 +863,8 @@ int vb2_core_reqbufs(struct vb2_queue *q, enum vb2_memory memory,
+ if (ret < 0) {
+ /*
+ * Note: __vb2_queue_free() will subtract 'allocated_buffers'
+- * from q->num_buffers.
++ * from q->num_buffers and it will reset q->memory to
++ * VB2_MEMORY_UNKNOWN.
+ */
+ __vb2_queue_free(q, allocated_buffers);
+ mutex_unlock(&q->mmap_lock);
+@@ -868,6 +880,12 @@ int vb2_core_reqbufs(struct vb2_queue *q, enum vb2_memory memory,
+ q->waiting_for_buffers = !q->is_output;
+
+ return 0;
++
++error:
++ mutex_lock(&q->mmap_lock);
++ q->memory = VB2_MEMORY_UNKNOWN;
++ mutex_unlock(&q->mmap_lock);
++ return ret;
+ }
+ EXPORT_SYMBOL_GPL(vb2_core_reqbufs);
+
+@@ -878,6 +896,7 @@ int vb2_core_create_bufs(struct vb2_queue *q, enum vb2_memory memory,
+ {
+ unsigned int num_planes = 0, num_buffers, allocated_buffers;
+ unsigned plane_sizes[VB2_MAX_PLANES] = { };
++ bool no_previous_buffers = !q->num_buffers;
+ int ret;
+
+ if (q->num_buffers == VB2_MAX_FRAME) {
+@@ -885,13 +904,19 @@ int vb2_core_create_bufs(struct vb2_queue *q, enum vb2_memory memory,
+ return -ENOBUFS;
+ }
+
+- if (!q->num_buffers) {
++ if (no_previous_buffers) {
+ if (q->waiting_in_dqbuf && *count) {
+ dprintk(q, 1, "another dup()ped fd is waiting for a buffer\n");
+ return -EBUSY;
+ }
+ memset(q->alloc_devs, 0, sizeof(q->alloc_devs));
++ /*
++ * Set this now to ensure that drivers see the correct q->memory
++ * value in the queue_setup op.
++ */
++ mutex_lock(&q->mmap_lock);
+ q->memory = memory;
++ mutex_unlock(&q->mmap_lock);
+ q->waiting_for_buffers = !q->is_output;
+ } else {
+ if (q->memory != memory) {
+@@ -914,14 +939,15 @@ int vb2_core_create_bufs(struct vb2_queue *q, enum vb2_memory memory,
+ ret = call_qop(q, queue_setup, q, &num_buffers,
+ &num_planes, plane_sizes, q->alloc_devs);
+ if (ret)
+- return ret;
++ goto error;
+
+ /* Finally, allocate buffers and video memory */
+ allocated_buffers = __vb2_queue_alloc(q, memory, num_buffers,
+ num_planes, plane_sizes);
+ if (allocated_buffers == 0) {
+ dprintk(q, 1, "memory allocation failed\n");
+- return -ENOMEM;
++ ret = -ENOMEM;
++ goto error;
+ }
+
+ /*
+@@ -952,7 +978,8 @@ int vb2_core_create_bufs(struct vb2_queue *q, enum vb2_memory memory,
+ if (ret < 0) {
+ /*
+ * Note: __vb2_queue_free() will subtract 'allocated_buffers'
+- * from q->num_buffers.
++ * from q->num_buffers and it will reset q->memory to
++ * VB2_MEMORY_UNKNOWN.
+ */
+ __vb2_queue_free(q, allocated_buffers);
+ mutex_unlock(&q->mmap_lock);
+@@ -967,6 +994,14 @@ int vb2_core_create_bufs(struct vb2_queue *q, enum vb2_memory memory,
+ *count = allocated_buffers;
+
+ return 0;
++
++error:
++ if (no_previous_buffers) {
++ mutex_lock(&q->mmap_lock);
++ q->memory = VB2_MEMORY_UNKNOWN;
++ mutex_unlock(&q->mmap_lock);
++ }
++ return ret;
+ }
+ EXPORT_SYMBOL_GPL(vb2_core_create_bufs);
+
+@@ -2120,6 +2155,22 @@ static int __find_plane_by_offset(struct vb2_queue *q, unsigned long off,
+ struct vb2_buffer *vb;
+ unsigned int buffer, plane;
+
++ /*
++ * Sanity checks to ensure the lock is held, MEMORY_MMAP is
++ * used and fileio isn't active.
++ */
++ lockdep_assert_held(&q->mmap_lock);
++
++ if (q->memory != VB2_MEMORY_MMAP) {
++ dprintk(q, 1, "queue is not currently set up for mmap\n");
++ return -EINVAL;
++ }
++
++ if (vb2_fileio_is_active(q)) {
++ dprintk(q, 1, "file io in progress\n");
++ return -EBUSY;
++ }
++
+ /*
+ * Go over all buffers and their planes, comparing the given offset
+ * with an offset assigned to each plane. If a match is found,
+@@ -2219,11 +2270,6 @@ int vb2_mmap(struct vb2_queue *q, struct vm_area_struct *vma)
+ int ret;
+ unsigned long length;
+
+- if (q->memory != VB2_MEMORY_MMAP) {
+- dprintk(q, 1, "queue is not currently set up for mmap\n");
+- return -EINVAL;
+- }
+-
+ /*
+ * Check memory area access mode.
+ */
+@@ -2245,14 +2291,9 @@ int vb2_mmap(struct vb2_queue *q, struct vm_area_struct *vma)
+
+ mutex_lock(&q->mmap_lock);
+
+- if (vb2_fileio_is_active(q)) {
+- dprintk(q, 1, "mmap: file io in progress\n");
+- ret = -EBUSY;
+- goto unlock;
+- }
+-
+ /*
+- * Find the plane corresponding to the offset passed by userspace.
++ * Find the plane corresponding to the offset passed by userspace. This
++ * will return an error if not MEMORY_MMAP or file I/O is in progress.
+ */
+ ret = __find_plane_by_offset(q, off, &buffer, &plane);
+ if (ret)
+@@ -2305,22 +2346,25 @@ unsigned long vb2_get_unmapped_area(struct vb2_queue *q,
+ void *vaddr;
+ int ret;
+
+- if (q->memory != VB2_MEMORY_MMAP) {
+- dprintk(q, 1, "queue is not currently set up for mmap\n");
+- return -EINVAL;
+- }
++ mutex_lock(&q->mmap_lock);
+
+ /*
+- * Find the plane corresponding to the offset passed by userspace.
++ * Find the plane corresponding to the offset passed by userspace. This
++ * will return an error if not MEMORY_MMAP or file I/O is in progress.
+ */
+ ret = __find_plane_by_offset(q, off, &buffer, &plane);
+ if (ret)
+- return ret;
++ goto unlock;
+
+ vb = q->bufs[buffer];
+
+ vaddr = vb2_plane_vaddr(vb, plane);
++ mutex_unlock(&q->mmap_lock);
+ return vaddr ? (unsigned long)vaddr : -EINVAL;
++
++unlock:
++ mutex_unlock(&q->mmap_lock);
++ return ret;
+ }
+ EXPORT_SYMBOL_GPL(vb2_get_unmapped_area);
+ #endif
+--
+2.35.1
+
--- /dev/null
+From 60accdd3d3a54a9d28d0f2d39ec740df38c167fa Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 22 Mar 2022 14:45:41 -0700
+Subject: mm: __isolate_lru_page_prepare() in isolate_migratepages_block()
+
+From: Hugh Dickins <hughd@google.com>
+
+[ Upstream commit 89f6c88a6ab4a11deb14c270f7f1454cda4f73d6 ]
+
+__isolate_lru_page_prepare() conflates two unrelated functions, with the
+flags to one disjoint from the flags to the other; and hides some of the
+important checks outside of isolate_migratepages_block(), where the
+sequence is better to be visible. It comes from the days of lumpy
+reclaim, before compaction, when the combination made more sense.
+
+Move what's needed by mm/compaction.c isolate_migratepages_block() inline
+there, and what's needed by mm/vmscan.c isolate_lru_pages() inline there.
+
+Shorten "isolate_mode" to "mode", so the sequence of conditions is easier
+to read. Declare a "mapping" variable, to save one call to page_mapping()
+(but not another: calling again after page is locked is necessary).
+Simplify isolate_lru_pages() with a "move_to" list pointer.
+
+Link: https://lkml.kernel.org/r/879d62a8-91cc-d3c6-fb3b-69768236df68@google.com
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Acked-by: David Rientjes <rientjes@google.com>
+Reviewed-by: Alex Shi <alexs@kernel.org>
+Cc: Alexander Duyck <alexander.duyck@gmail.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Stable-dep-of: 829ae0f81ce0 ("mm: migrate: fix THP's mapcount on isolation")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/swap.h | 1 -
+ mm/compaction.c | 51 +++++++++++++++++++---
+ mm/vmscan.c | 101 ++++++++-----------------------------------
+ 3 files changed, 62 insertions(+), 91 deletions(-)
+
+diff --git a/include/linux/swap.h b/include/linux/swap.h
+index 394d5de5d4b4..a502928c29c5 100644
+--- a/include/linux/swap.h
++++ b/include/linux/swap.h
+@@ -358,7 +358,6 @@ extern void lru_cache_add_inactive_or_unevictable(struct page *page,
+ extern unsigned long zone_reclaimable_pages(struct zone *zone);
+ extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
+ gfp_t gfp_mask, nodemask_t *mask);
+-extern bool __isolate_lru_page_prepare(struct page *page, isolate_mode_t mode);
+ extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
+ unsigned long nr_pages,
+ gfp_t gfp_mask,
+diff --git a/mm/compaction.c b/mm/compaction.c
+index ea46aadc7c21..57ce6b001b10 100644
+--- a/mm/compaction.c
++++ b/mm/compaction.c
+@@ -784,7 +784,7 @@ static bool too_many_isolated(pg_data_t *pgdat)
+ * @cc: Compaction control structure.
+ * @low_pfn: The first PFN to isolate
+ * @end_pfn: The one-past-the-last PFN to isolate, within same pageblock
+- * @isolate_mode: Isolation mode to be used.
++ * @mode: Isolation mode to be used.
+ *
+ * Isolate all pages that can be migrated from the range specified by
+ * [low_pfn, end_pfn). The range is expected to be within same pageblock.
+@@ -798,7 +798,7 @@ static bool too_many_isolated(pg_data_t *pgdat)
+ */
+ static unsigned long
+ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
+- unsigned long end_pfn, isolate_mode_t isolate_mode)
++ unsigned long end_pfn, isolate_mode_t mode)
+ {
+ pg_data_t *pgdat = cc->zone->zone_pgdat;
+ unsigned long nr_scanned = 0, nr_isolated = 0;
+@@ -806,6 +806,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
+ unsigned long flags = 0;
+ bool locked = false;
+ struct page *page = NULL, *valid_page = NULL;
++ struct address_space *mapping;
+ unsigned long start_pfn = low_pfn;
+ bool skip_on_failure = false;
+ unsigned long next_skip_pfn = 0;
+@@ -949,7 +950,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
+ locked = false;
+ }
+
+- if (!isolate_movable_page(page, isolate_mode))
++ if (!isolate_movable_page(page, mode))
+ goto isolate_success;
+ }
+
+@@ -961,15 +962,15 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
+ * so avoid taking lru_lock and isolating it unnecessarily in an
+ * admittedly racy check.
+ */
+- if (!page_mapping(page) &&
+- page_count(page) > page_mapcount(page))
++ mapping = page_mapping(page);
++ if (!mapping && page_count(page) > page_mapcount(page))
+ goto isolate_fail;
+
+ /*
+ * Only allow to migrate anonymous pages in GFP_NOFS context
+ * because those do not depend on fs locks.
+ */
+- if (!(cc->gfp_mask & __GFP_FS) && page_mapping(page))
++ if (!(cc->gfp_mask & __GFP_FS) && mapping)
+ goto isolate_fail;
+
+ /*
+@@ -980,9 +981,45 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
+ if (unlikely(!get_page_unless_zero(page)))
+ goto isolate_fail;
+
+- if (!__isolate_lru_page_prepare(page, isolate_mode))
++ /* Only take pages on LRU: a check now makes later tests safe */
++ if (!PageLRU(page))
++ goto isolate_fail_put;
++
++ /* Compaction might skip unevictable pages but CMA takes them */
++ if (!(mode & ISOLATE_UNEVICTABLE) && PageUnevictable(page))
++ goto isolate_fail_put;
++
++ /*
++ * To minimise LRU disruption, the caller can indicate with
++ * ISOLATE_ASYNC_MIGRATE that it only wants to isolate pages
++ * it will be able to migrate without blocking - clean pages
++ * for the most part. PageWriteback would require blocking.
++ */
++ if ((mode & ISOLATE_ASYNC_MIGRATE) && PageWriteback(page))
+ goto isolate_fail_put;
+
++ if ((mode & ISOLATE_ASYNC_MIGRATE) && PageDirty(page)) {
++ bool migrate_dirty;
++
++ /*
++ * Only pages without mappings or that have a
++ * ->migratepage callback are possible to migrate
++ * without blocking. However, we can be racing with
++ * truncation so it's necessary to lock the page
++ * to stabilise the mapping as truncation holds
++ * the page lock until after the page is removed
++ * from the page cache.
++ */
++ if (!trylock_page(page))
++ goto isolate_fail_put;
++
++ mapping = page_mapping(page);
++ migrate_dirty = !mapping || mapping->a_ops->migratepage;
++ unlock_page(page);
++ if (!migrate_dirty)
++ goto isolate_fail_put;
++ }
++
+ /* Try isolate the page */
+ if (!TestClearPageLRU(page))
+ goto isolate_fail_put;
+diff --git a/mm/vmscan.c b/mm/vmscan.c
+index 00a47845a15b..9cba0f890b33 100644
+--- a/mm/vmscan.c
++++ b/mm/vmscan.c
+@@ -1535,69 +1535,6 @@ unsigned int reclaim_clean_pages_from_list(struct zone *zone,
+ return nr_reclaimed;
+ }
+
+-/*
+- * Attempt to remove the specified page from its LRU. Only take this page
+- * if it is of the appropriate PageActive status. Pages which are being
+- * freed elsewhere are also ignored.
+- *
+- * page: page to consider
+- * mode: one of the LRU isolation modes defined above
+- *
+- * returns true on success, false on failure.
+- */
+-bool __isolate_lru_page_prepare(struct page *page, isolate_mode_t mode)
+-{
+- /* Only take pages on the LRU. */
+- if (!PageLRU(page))
+- return false;
+-
+- /* Compaction should not handle unevictable pages but CMA can do so */
+- if (PageUnevictable(page) && !(mode & ISOLATE_UNEVICTABLE))
+- return false;
+-
+- /*
+- * To minimise LRU disruption, the caller can indicate that it only
+- * wants to isolate pages it will be able to operate on without
+- * blocking - clean pages for the most part.
+- *
+- * ISOLATE_ASYNC_MIGRATE is used to indicate that it only wants to pages
+- * that it is possible to migrate without blocking
+- */
+- if (mode & ISOLATE_ASYNC_MIGRATE) {
+- /* All the caller can do on PageWriteback is block */
+- if (PageWriteback(page))
+- return false;
+-
+- if (PageDirty(page)) {
+- struct address_space *mapping;
+- bool migrate_dirty;
+-
+- /*
+- * Only pages without mappings or that have a
+- * ->migratepage callback are possible to migrate
+- * without blocking. However, we can be racing with
+- * truncation so it's necessary to lock the page
+- * to stabilise the mapping as truncation holds
+- * the page lock until after the page is removed
+- * from the page cache.
+- */
+- if (!trylock_page(page))
+- return false;
+-
+- mapping = page_mapping(page);
+- migrate_dirty = !mapping || mapping->a_ops->migratepage;
+- unlock_page(page);
+- if (!migrate_dirty)
+- return false;
+- }
+- }
+-
+- if ((mode & ISOLATE_UNMAPPED) && page_mapped(page))
+- return false;
+-
+- return true;
+-}
+-
+ /*
+ * Update LRU sizes after isolating pages. The LRU size updates must
+ * be complete before mem_cgroup_update_lru_size due to a sanity check.
+@@ -1647,11 +1584,11 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
+ unsigned long skipped = 0;
+ unsigned long scan, total_scan, nr_pages;
+ LIST_HEAD(pages_skipped);
+- isolate_mode_t mode = (sc->may_unmap ? 0 : ISOLATE_UNMAPPED);
+
+ total_scan = 0;
+ scan = 0;
+ while (scan < nr_to_scan && !list_empty(src)) {
++ struct list_head *move_to = src;
+ struct page *page;
+
+ page = lru_to_page(src);
+@@ -1661,9 +1598,9 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
+ total_scan += nr_pages;
+
+ if (page_zonenum(page) > sc->reclaim_idx) {
+- list_move(&page->lru, &pages_skipped);
+ nr_skipped[page_zonenum(page)] += nr_pages;
+- continue;
++ move_to = &pages_skipped;
++ goto move;
+ }
+
+ /*
+@@ -1671,37 +1608,34 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
+ * return with no isolated pages if the LRU mostly contains
+ * ineligible pages. This causes the VM to not reclaim any
+ * pages, triggering a premature OOM.
+- *
+- * Account all tail pages of THP. This would not cause
+- * premature OOM since __isolate_lru_page() returns -EBUSY
+- * only when the page is being freed somewhere else.
++ * Account all tail pages of THP.
+ */
+ scan += nr_pages;
+- if (!__isolate_lru_page_prepare(page, mode)) {
+- /* It is being freed elsewhere */
+- list_move(&page->lru, src);
+- continue;
+- }
++
++ if (!PageLRU(page))
++ goto move;
++ if (!sc->may_unmap && page_mapped(page))
++ goto move;
++
+ /*
+ * Be careful not to clear PageLRU until after we're
+ * sure the page is not being freed elsewhere -- the
+ * page release code relies on it.
+ */
+- if (unlikely(!get_page_unless_zero(page))) {
+- list_move(&page->lru, src);
+- continue;
+- }
++ if (unlikely(!get_page_unless_zero(page)))
++ goto move;
+
+ if (!TestClearPageLRU(page)) {
+ /* Another thread is already isolating this page */
+ put_page(page);
+- list_move(&page->lru, src);
+- continue;
++ goto move;
+ }
+
+ nr_taken += nr_pages;
+ nr_zone_taken[page_zonenum(page)] += nr_pages;
+- list_move(&page->lru, dst);
++ move_to = dst;
++move:
++ list_move(&page->lru, move_to);
+ }
+
+ /*
+@@ -1725,7 +1659,8 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
+ }
+ *nr_scanned = total_scan;
+ trace_mm_vmscan_lru_isolate(sc->reclaim_idx, sc->order, nr_to_scan,
+- total_scan, skipped, nr_taken, mode, lru);
++ total_scan, skipped, nr_taken,
++ sc->may_unmap ? 0 : ISOLATE_UNMAPPED, lru);
+ update_lru_sizes(lruvec, lru, nr_zone_taken);
+ return nr_taken;
+ }
+--
+2.35.1
+
--- /dev/null
+From ef33d369381db1a4df5a9f474b014cc4e4664606 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 15 Dec 2020 12:34:20 -0800
+Subject: mm/compaction: do page isolation first in compaction
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Alex Shi <alex.shi@linux.alibaba.com>
+
+[ Upstream commit 9df41314390b81a541ca6e84c8340bad0959e4b5 ]
+
+Currently, compaction would get the lru_lock and then do page isolation
+which works fine with pgdat->lru_lock, since any page isoltion would
+compete for the lru_lock. If we want to change to memcg lru_lock, we have
+to isolate the page before getting lru_lock, thus isoltion would block
+page's memcg change which relay on page isoltion too. Then we could
+safely use per memcg lru_lock later.
+
+The new page isolation use previous introduced TestClearPageLRU() + pgdat
+lru locking which will be changed to memcg lru lock later.
+
+Hugh Dickins <hughd@google.com> fixed following bugs in this patch's early
+version:
+
+Fix lots of crashes under compaction load: isolate_migratepages_block()
+must clean up appropriately when rejecting a page, setting PageLRU again
+if it had been cleared; and a put_page() after get_page_unless_zero()
+cannot safely be done while holding locked_lruvec - it may turn out to be
+the final put_page(), which will take an lruvec lock when PageLRU.
+
+And move __isolate_lru_page_prepare back after get_page_unless_zero to
+make trylock_page() safe: trylock_page() is not safe to use at this time:
+its setting PG_locked can race with the page being freed or allocated
+("Bad page"), and can also erase flags being set by one of those "sole
+owners" of a freshly allocated page who use non-atomic __SetPageFlag().
+
+Link: https://lkml.kernel.org/r/1604566549-62481-16-git-send-email-alex.shi@linux.alibaba.com
+Suggested-by: Johannes Weiner <hannes@cmpxchg.org>
+Signed-off-by: Alex Shi <alex.shi@linux.alibaba.com>
+Acked-by: Hugh Dickins <hughd@google.com>
+Acked-by: Johannes Weiner <hannes@cmpxchg.org>
+Acked-by: Vlastimil Babka <vbabka@suse.cz>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: Alexander Duyck <alexander.duyck@gmail.com>
+Cc: Andrea Arcangeli <aarcange@redhat.com>
+Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
+Cc: "Chen, Rong A" <rong.a.chen@intel.com>
+Cc: Daniel Jordan <daniel.m.jordan@oracle.com>
+Cc: "Huang, Ying" <ying.huang@intel.com>
+Cc: Jann Horn <jannh@google.com>
+Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
+Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Cc: Kirill A. Shutemov <kirill@shutemov.name>
+Cc: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
+Cc: Mel Gorman <mgorman@techsingularity.net>
+Cc: Michal Hocko <mhocko@kernel.org>
+Cc: Michal Hocko <mhocko@suse.com>
+Cc: Mika Penttilä <mika.penttila@nextfour.com>
+Cc: Minchan Kim <minchan@kernel.org>
+Cc: Shakeel Butt <shakeelb@google.com>
+Cc: Tejun Heo <tj@kernel.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
+Cc: Wei Yang <richard.weiyang@gmail.com>
+Cc: Yang Shi <yang.shi@linux.alibaba.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Stable-dep-of: 829ae0f81ce0 ("mm: migrate: fix THP's mapcount on isolation")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/swap.h | 2 +-
+ mm/compaction.c | 42 +++++++++++++++++++++++++++++++++---------
+ mm/vmscan.c | 43 ++++++++++++++++++++++---------------------
+ 3 files changed, 56 insertions(+), 31 deletions(-)
+
+diff --git a/include/linux/swap.h b/include/linux/swap.h
+index fbc6805358da..3577d3a6ec37 100644
+--- a/include/linux/swap.h
++++ b/include/linux/swap.h
+@@ -358,7 +358,7 @@ extern void lru_cache_add_inactive_or_unevictable(struct page *page,
+ extern unsigned long zone_reclaimable_pages(struct zone *zone);
+ extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
+ gfp_t gfp_mask, nodemask_t *mask);
+-extern int __isolate_lru_page(struct page *page, isolate_mode_t mode);
++extern int __isolate_lru_page_prepare(struct page *page, isolate_mode_t mode);
+ extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
+ unsigned long nr_pages,
+ gfp_t gfp_mask,
+diff --git a/mm/compaction.c b/mm/compaction.c
+index 8dfbe86bd74f..ba3e907f03b7 100644
+--- a/mm/compaction.c
++++ b/mm/compaction.c
+@@ -890,6 +890,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
+ if (!valid_page && IS_ALIGNED(low_pfn, pageblock_nr_pages)) {
+ if (!cc->ignore_skip_hint && get_pageblock_skip(page)) {
+ low_pfn = end_pfn;
++ page = NULL;
+ goto isolate_abort;
+ }
+ valid_page = page;
+@@ -971,6 +972,21 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
+ if (!(cc->gfp_mask & __GFP_FS) && page_mapping(page))
+ goto isolate_fail;
+
++ /*
++ * Be careful not to clear PageLRU until after we're
++ * sure the page is not being freed elsewhere -- the
++ * page release code relies on it.
++ */
++ if (unlikely(!get_page_unless_zero(page)))
++ goto isolate_fail;
++
++ if (__isolate_lru_page_prepare(page, isolate_mode) != 0)
++ goto isolate_fail_put;
++
++ /* Try isolate the page */
++ if (!TestClearPageLRU(page))
++ goto isolate_fail_put;
++
+ /* If we already hold the lock, we can skip some rechecking */
+ if (!locked) {
+ locked = compact_lock_irqsave(&pgdat->lru_lock,
+@@ -983,10 +999,6 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
+ goto isolate_abort;
+ }
+
+- /* Recheck PageLRU and PageCompound under lock */
+- if (!PageLRU(page))
+- goto isolate_fail;
+-
+ /*
+ * Page become compound since the non-locked check,
+ * and it's on LRU. It can only be a THP so the order
+@@ -994,16 +1006,13 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
+ */
+ if (unlikely(PageCompound(page) && !cc->alloc_contig)) {
+ low_pfn += compound_nr(page) - 1;
+- goto isolate_fail;
++ SetPageLRU(page);
++ goto isolate_fail_put;
+ }
+ }
+
+ lruvec = mem_cgroup_page_lruvec(page, pgdat);
+
+- /* Try isolate the page */
+- if (__isolate_lru_page(page, isolate_mode) != 0)
+- goto isolate_fail;
+-
+ /* The whole page is taken off the LRU; skip the tail pages. */
+ if (PageCompound(page))
+ low_pfn += compound_nr(page) - 1;
+@@ -1032,6 +1041,15 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
+ }
+
+ continue;
++
++isolate_fail_put:
++ /* Avoid potential deadlock in freeing page under lru_lock */
++ if (locked) {
++ spin_unlock_irqrestore(&pgdat->lru_lock, flags);
++ locked = false;
++ }
++ put_page(page);
++
+ isolate_fail:
+ if (!skip_on_failure)
+ continue;
+@@ -1068,9 +1086,15 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
+ if (unlikely(low_pfn > end_pfn))
+ low_pfn = end_pfn;
+
++ page = NULL;
++
+ isolate_abort:
+ if (locked)
+ spin_unlock_irqrestore(&pgdat->lru_lock, flags);
++ if (page) {
++ SetPageLRU(page);
++ put_page(page);
++ }
+
+ /*
+ * Updated the cached scanner pfn once the pageblock has been scanned
+diff --git a/mm/vmscan.c b/mm/vmscan.c
+index 8d62eedfc794..5ada402c8d95 100644
+--- a/mm/vmscan.c
++++ b/mm/vmscan.c
+@@ -1545,7 +1545,7 @@ unsigned int reclaim_clean_pages_from_list(struct zone *zone,
+ *
+ * returns 0 on success, -ve errno on failure.
+ */
+-int __isolate_lru_page(struct page *page, isolate_mode_t mode)
++int __isolate_lru_page_prepare(struct page *page, isolate_mode_t mode)
+ {
+ int ret = -EBUSY;
+
+@@ -1597,22 +1597,9 @@ int __isolate_lru_page(struct page *page, isolate_mode_t mode)
+ if ((mode & ISOLATE_UNMAPPED) && page_mapped(page))
+ return ret;
+
+- if (likely(get_page_unless_zero(page))) {
+- /*
+- * Be careful not to clear PageLRU until after we're
+- * sure the page is not being freed elsewhere -- the
+- * page release code relies on it.
+- */
+- if (TestClearPageLRU(page))
+- ret = 0;
+- else
+- put_page(page);
+- }
+-
+- return ret;
++ return 0;
+ }
+
+-
+ /*
+ * Update LRU sizes after isolating pages. The LRU size updates must
+ * be complete before mem_cgroup_update_lru_size due to a sanity check.
+@@ -1692,20 +1679,34 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
+ * only when the page is being freed somewhere else.
+ */
+ scan += nr_pages;
+- switch (__isolate_lru_page(page, mode)) {
++ switch (__isolate_lru_page_prepare(page, mode)) {
+ case 0:
++ /*
++ * Be careful not to clear PageLRU until after we're
++ * sure the page is not being freed elsewhere -- the
++ * page release code relies on it.
++ */
++ if (unlikely(!get_page_unless_zero(page)))
++ goto busy;
++
++ if (!TestClearPageLRU(page)) {
++ /*
++ * This page may in other isolation path,
++ * but we still hold lru_lock.
++ */
++ put_page(page);
++ goto busy;
++ }
++
+ nr_taken += nr_pages;
+ nr_zone_taken[page_zonenum(page)] += nr_pages;
+ list_move(&page->lru, dst);
+ break;
+
+- case -EBUSY:
++ default:
++busy:
+ /* else it is being freed elsewhere */
+ list_move(&page->lru, src);
+- continue;
+-
+- default:
+- BUG();
+ }
+ }
+
+--
+2.35.1
+
--- /dev/null
+From 2b6a220c2d4c90dda4ae1ed3aa0251eb152b8825 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 6 Dec 2022 18:16:04 +0100
+Subject: mm/khugepaged: fix GUP-fast interaction by sending IPI
+
+From: Jann Horn <jannh@google.com>
+
+commit 2ba99c5e08812494bc57f319fb562f527d9bacd8 upstream.
+
+Since commit 70cbc3cc78a99 ("mm: gup: fix the fast GUP race against THP
+collapse"), the lockless_pages_from_mm() fastpath rechecks the pmd_t to
+ensure that the page table was not removed by khugepaged in between.
+
+However, lockless_pages_from_mm() still requires that the page table is
+not concurrently freed. Fix it by sending IPIs (if the architecture uses
+semi-RCU-style page table freeing) before freeing/reusing page tables.
+
+Link: https://lkml.kernel.org/r/20221129154730.2274278-2-jannh@google.com
+Link: https://lkml.kernel.org/r/20221128180252.1684965-2-jannh@google.com
+Link: https://lkml.kernel.org/r/20221125213714.4115729-2-jannh@google.com
+Fixes: ba76149f47d8 ("thp: khugepaged")
+Signed-off-by: Jann Horn <jannh@google.com>
+Reviewed-by: Yang Shi <shy828301@gmail.com>
+Acked-by: David Hildenbrand <david@redhat.com>
+Cc: John Hubbard <jhubbard@nvidia.com>
+Cc: Peter Xu <peterx@redhat.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+[manual backport: two of the three places in khugepaged that can free
+ptes were refactored into a common helper between 5.15 and 6.0]
+Signed-off-by: Jann Horn <jannh@google.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/asm-generic/tlb.h | 4 ++++
+ mm/khugepaged.c | 3 +++
+ mm/mmu_gather.c | 4 +---
+ 3 files changed, 8 insertions(+), 3 deletions(-)
+
+diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h
+index a0c4b99d2899..f40c9534f20b 100644
+--- a/include/asm-generic/tlb.h
++++ b/include/asm-generic/tlb.h
+@@ -205,12 +205,16 @@ extern void tlb_remove_table(struct mmu_gather *tlb, void *table);
+ #define tlb_needs_table_invalidate() (true)
+ #endif
+
++void tlb_remove_table_sync_one(void);
++
+ #else
+
+ #ifdef tlb_needs_table_invalidate
+ #error tlb_needs_table_invalidate() requires MMU_GATHER_RCU_TABLE_FREE
+ #endif
+
++static inline void tlb_remove_table_sync_one(void) { }
++
+ #endif /* CONFIG_MMU_GATHER_RCU_TABLE_FREE */
+
+
+diff --git a/mm/khugepaged.c b/mm/khugepaged.c
+index 014e8b259313..0268b549bd60 100644
+--- a/mm/khugepaged.c
++++ b/mm/khugepaged.c
+@@ -1154,6 +1154,7 @@ static void collapse_huge_page(struct mm_struct *mm,
+ _pmd = pmdp_collapse_flush(vma, address, pmd);
+ spin_unlock(pmd_ptl);
+ mmu_notifier_invalidate_range_end(&range);
++ tlb_remove_table_sync_one();
+
+ spin_lock(pte_ptl);
+ isolated = __collapse_huge_page_isolate(vma, address, pte,
+@@ -1538,6 +1539,7 @@ void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr)
+ /* step 4: collapse pmd */
+ _pmd = pmdp_collapse_flush(vma, haddr, pmd);
+ mm_dec_nr_ptes(mm);
++ tlb_remove_table_sync_one();
+ pte_free(mm, pmd_pgtable(_pmd));
+
+ i_mmap_unlock_write(vma->vm_file->f_mapping);
+@@ -1625,6 +1627,7 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff)
+ /* assume page table is clear */
+ _pmd = pmdp_collapse_flush(vma, addr, pmd);
+ mm_dec_nr_ptes(mm);
++ tlb_remove_table_sync_one();
+ pte_free(mm, pmd_pgtable(_pmd));
+ }
+ mmap_write_unlock(mm);
+diff --git a/mm/mmu_gather.c b/mm/mmu_gather.c
+index 03c33c93a582..205fdbb5792a 100644
+--- a/mm/mmu_gather.c
++++ b/mm/mmu_gather.c
+@@ -139,7 +139,7 @@ static void tlb_remove_table_smp_sync(void *arg)
+ /* Simply deliver the interrupt */
+ }
+
+-static void tlb_remove_table_sync_one(void)
++void tlb_remove_table_sync_one(void)
+ {
+ /*
+ * This isn't an RCU grace period and hence the page-tables cannot be
+@@ -163,8 +163,6 @@ static void tlb_remove_table_free(struct mmu_table_batch *batch)
+
+ #else /* !CONFIG_MMU_GATHER_RCU_TABLE_FREE */
+
+-static void tlb_remove_table_sync_one(void) { }
+-
+ static void tlb_remove_table_free(struct mmu_table_batch *batch)
+ {
+ __tlb_remove_table_free(batch);
+--
+2.35.1
+
--- /dev/null
+From 8e1c95908e48c5198348debed6c347698ecc9ec1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 6 Dec 2022 18:16:05 +0100
+Subject: mm/khugepaged: invoke MMU notifiers in shmem/file collapse paths
+
+From: Jann Horn <jannh@google.com>
+
+commit f268f6cf875f3220afc77bdd0bf1bb136eb54db9 upstream.
+
+Any codepath that zaps page table entries must invoke MMU notifiers to
+ensure that secondary MMUs (like KVM) don't keep accessing pages which
+aren't mapped anymore. Secondary MMUs don't hold their own references to
+pages that are mirrored over, so failing to notify them can lead to page
+use-after-free.
+
+I'm marking this as addressing an issue introduced in commit f3f0e1d2150b
+("khugepaged: add support of collapse for tmpfs/shmem pages"), but most of
+the security impact of this only came in commit 27e1f8273113 ("khugepaged:
+enable collapse pmd for pte-mapped THP"), which actually omitted flushes
+for the removal of present PTEs, not just for the removal of empty page
+tables.
+
+Link: https://lkml.kernel.org/r/20221129154730.2274278-3-jannh@google.com
+Link: https://lkml.kernel.org/r/20221128180252.1684965-3-jannh@google.com
+Link: https://lkml.kernel.org/r/20221125213714.4115729-3-jannh@google.com
+Fixes: f3f0e1d2150b ("khugepaged: add support of collapse for tmpfs/shmem pages")
+Signed-off-by: Jann Horn <jannh@google.com>
+Acked-by: David Hildenbrand <david@redhat.com>
+Reviewed-by: Yang Shi <shy828301@gmail.com>
+Cc: John Hubbard <jhubbard@nvidia.com>
+Cc: Peter Xu <peterx@redhat.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+[manual backport: this code was refactored from two copies into a common
+helper between 5.15 and 6.0]
+Signed-off-by: Jann Horn <jannh@google.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ mm/khugepaged.c | 13 +++++++++++++
+ 1 file changed, 13 insertions(+)
+
+diff --git a/mm/khugepaged.c b/mm/khugepaged.c
+index 0268b549bd60..0eb3adf4ff68 100644
+--- a/mm/khugepaged.c
++++ b/mm/khugepaged.c
+@@ -1444,6 +1444,7 @@ void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr)
+ spinlock_t *ptl;
+ int count = 0;
+ int i;
++ struct mmu_notifier_range range;
+
+ if (!vma || !vma->vm_file ||
+ vma->vm_start > haddr || vma->vm_end < haddr + HPAGE_PMD_SIZE)
+@@ -1537,9 +1538,13 @@ void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr)
+ }
+
+ /* step 4: collapse pmd */
++ mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, NULL, mm, haddr,
++ haddr + HPAGE_PMD_SIZE);
++ mmu_notifier_invalidate_range_start(&range);
+ _pmd = pmdp_collapse_flush(vma, haddr, pmd);
+ mm_dec_nr_ptes(mm);
+ tlb_remove_table_sync_one();
++ mmu_notifier_invalidate_range_end(&range);
+ pte_free(mm, pmd_pgtable(_pmd));
+
+ i_mmap_unlock_write(vma->vm_file->f_mapping);
+@@ -1624,11 +1629,19 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff)
+ */
+ if (mmap_write_trylock(mm)) {
+ if (!khugepaged_test_exit(mm)) {
++ struct mmu_notifier_range range;
++
++ mmu_notifier_range_init(&range,
++ MMU_NOTIFY_CLEAR, 0,
++ NULL, mm, addr,
++ addr + HPAGE_PMD_SIZE);
++ mmu_notifier_invalidate_range_start(&range);
+ /* assume page table is clear */
+ _pmd = pmdp_collapse_flush(vma, addr, pmd);
+ mm_dec_nr_ptes(mm);
+ tlb_remove_table_sync_one();
+ pte_free(mm, pmd_pgtable(_pmd));
++ mmu_notifier_invalidate_range_end(&range);
+ }
+ mmap_write_unlock(mm);
+ } else {
+--
+2.35.1
+
--- /dev/null
+From fb456f2c893540f9a10c07cf05d86bc67bea8359 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 6 Dec 2022 18:16:06 +0100
+Subject: mm/khugepaged: take the right locks for page table retraction
+
+From: Jann Horn <jannh@google.com>
+
+commit 8d3c106e19e8d251da31ff4cc7462e4565d65084 upstream.
+
+pagetable walks on address ranges mapped by VMAs can be done under the
+mmap lock, the lock of an anon_vma attached to the VMA, or the lock of the
+VMA's address_space. Only one of these needs to be held, and it does not
+need to be held in exclusive mode.
+
+Under those circumstances, the rules for concurrent access to page table
+entries are:
+
+ - Terminal page table entries (entries that don't point to another page
+ table) can be arbitrarily changed under the page table lock, with the
+ exception that they always need to be consistent for
+ hardware page table walks and lockless_pages_from_mm().
+ This includes that they can be changed into non-terminal entries.
+ - Non-terminal page table entries (which point to another page table)
+ can not be modified; readers are allowed to READ_ONCE() an entry, verify
+ that it is non-terminal, and then assume that its value will stay as-is.
+
+Retracting a page table involves modifying a non-terminal entry, so
+page-table-level locks are insufficient to protect against concurrent page
+table traversal; it requires taking all the higher-level locks under which
+it is possible to start a page walk in the relevant range in exclusive
+mode.
+
+The collapse_huge_page() path for anonymous THP already follows this rule,
+but the shmem/file THP path was getting it wrong, making it possible for
+concurrent rmap-based operations to cause corruption.
+
+Link: https://lkml.kernel.org/r/20221129154730.2274278-1-jannh@google.com
+Link: https://lkml.kernel.org/r/20221128180252.1684965-1-jannh@google.com
+Link: https://lkml.kernel.org/r/20221125213714.4115729-1-jannh@google.com
+Fixes: 27e1f8273113 ("khugepaged: enable collapse pmd for pte-mapped THP")
+Signed-off-by: Jann Horn <jannh@google.com>
+Reviewed-by: Yang Shi <shy828301@gmail.com>
+Acked-by: David Hildenbrand <david@redhat.com>
+Cc: John Hubbard <jhubbard@nvidia.com>
+Cc: Peter Xu <peterx@redhat.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+[manual backport: this code was refactored from two copies into a common
+helper between 5.15 and 6.0]
+Signed-off-by: Jann Horn <jannh@google.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ mm/khugepaged.c | 31 ++++++++++++++++++++++++++-----
+ 1 file changed, 26 insertions(+), 5 deletions(-)
+
+diff --git a/mm/khugepaged.c b/mm/khugepaged.c
+index cf4dceb9682b..014e8b259313 100644
+--- a/mm/khugepaged.c
++++ b/mm/khugepaged.c
+@@ -1457,6 +1457,14 @@ void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr)
+ if (!hugepage_vma_check(vma, vma->vm_flags | VM_HUGEPAGE))
+ return;
+
++ /*
++ * Symmetry with retract_page_tables(): Exclude MAP_PRIVATE mappings
++ * that got written to. Without this, we'd have to also lock the
++ * anon_vma if one exists.
++ */
++ if (vma->anon_vma)
++ return;
++
+ hpage = find_lock_page(vma->vm_file->f_mapping,
+ linear_page_index(vma, haddr));
+ if (!hpage)
+@@ -1469,6 +1477,19 @@ void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr)
+ if (!pmd)
+ goto drop_hpage;
+
++ /*
++ * We need to lock the mapping so that from here on, only GUP-fast and
++ * hardware page walks can access the parts of the page tables that
++ * we're operating on.
++ */
++ i_mmap_lock_write(vma->vm_file->f_mapping);
++
++ /*
++ * This spinlock should be unnecessary: Nobody else should be accessing
++ * the page tables under spinlock protection here, only
++ * lockless_pages_from_mm() and the hardware page walker can access page
++ * tables while all the high-level locks are held in write mode.
++ */
+ start_pte = pte_offset_map_lock(mm, pmd, haddr, &ptl);
+
+ /* step 1: check all mapped PTEs are to the right huge page */
+@@ -1515,12 +1536,12 @@ void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr)
+ }
+
+ /* step 4: collapse pmd */
+- ptl = pmd_lock(vma->vm_mm, pmd);
+ _pmd = pmdp_collapse_flush(vma, haddr, pmd);
+- spin_unlock(ptl);
+ mm_dec_nr_ptes(mm);
+ pte_free(mm, pmd_pgtable(_pmd));
+
++ i_mmap_unlock_write(vma->vm_file->f_mapping);
++
+ drop_hpage:
+ unlock_page(hpage);
+ put_page(hpage);
+@@ -1528,6 +1549,7 @@ void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr)
+
+ abort:
+ pte_unmap_unlock(start_pte, ptl);
++ i_mmap_unlock_write(vma->vm_file->f_mapping);
+ goto drop_hpage;
+ }
+
+@@ -1577,7 +1599,8 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff)
+ * An alternative would be drop the check, but check that page
+ * table is clear before calling pmdp_collapse_flush() under
+ * ptl. It has higher chance to recover THP for the VMA, but
+- * has higher cost too.
++ * has higher cost too. It would also probably require locking
++ * the anon_vma.
+ */
+ if (vma->anon_vma)
+ continue;
+@@ -1599,10 +1622,8 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff)
+ */
+ if (mmap_write_trylock(mm)) {
+ if (!khugepaged_test_exit(mm)) {
+- spinlock_t *ptl = pmd_lock(mm, pmd);
+ /* assume page table is clear */
+ _pmd = pmdp_collapse_flush(vma, addr, pmd);
+- spin_unlock(ptl);
+ mm_dec_nr_ptes(mm);
+ pte_free(mm, pmd_pgtable(_pmd));
+ }
+--
+2.35.1
+
--- /dev/null
+From d84c0415a11eafaa01336ef3fa61f707986b5656 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 15 Dec 2020 12:34:16 -0800
+Subject: mm/lru: introduce TestClearPageLRU()
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Alex Shi <alex.shi@linux.alibaba.com>
+
+[ Upstream commit d25b5bd8a8f420b15517c19c4626c0c009f72a63 ]
+
+Currently lru_lock still guards both lru list and page's lru bit, that's
+ok. but if we want to use specific lruvec lock on the page, we need to
+pin down the page's lruvec/memcg during locking. Just taking lruvec lock
+first may be undermined by the page's memcg charge/migration. To fix this
+problem, we will clear the lru bit out of locking and use it as pin down
+action to block the page isolation in memcg changing.
+
+So now a standard steps of page isolation is following:
+ 1, get_page(); #pin the page avoid to be free
+ 2, TestClearPageLRU(); #block other isolation like memcg change
+ 3, spin_lock on lru_lock; #serialize lru list access
+ 4, delete page from lru list;
+
+This patch start with the first part: TestClearPageLRU, which combines
+PageLRU check and ClearPageLRU into a macro func TestClearPageLRU. This
+function will be used as page isolation precondition to prevent other
+isolations some where else. Then there are may !PageLRU page on lru list,
+need to remove BUG() checking accordingly.
+
+There 2 rules for lru bit now:
+1, the lru bit still indicate if a page on lru list, just in some
+ temporary moment(isolating), the page may have no lru bit when
+ it's on lru list. but the page still must be on lru list when the
+ lru bit set.
+2, have to remove lru bit before delete it from lru list.
+
+As Andrew Morton mentioned this change would dirty cacheline for a page
+which isn't on the LRU. But the loss would be acceptable in Rong Chen
+<rong.a.chen@intel.com> report:
+https://lore.kernel.org/lkml/20200304090301.GB5972@shao2-debian/
+
+Link: https://lkml.kernel.org/r/1604566549-62481-15-git-send-email-alex.shi@linux.alibaba.com
+Suggested-by: Johannes Weiner <hannes@cmpxchg.org>
+Signed-off-by: Alex Shi <alex.shi@linux.alibaba.com>
+Acked-by: Hugh Dickins <hughd@google.com>
+Acked-by: Johannes Weiner <hannes@cmpxchg.org>
+Acked-by: Vlastimil Babka <vbabka@suse.cz>
+Cc: Michal Hocko <mhocko@kernel.org>
+Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
+Cc: Alexander Duyck <alexander.duyck@gmail.com>
+Cc: Andrea Arcangeli <aarcange@redhat.com>
+Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
+Cc: Daniel Jordan <daniel.m.jordan@oracle.com>
+Cc: "Huang, Ying" <ying.huang@intel.com>
+Cc: Jann Horn <jannh@google.com>
+Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
+Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Cc: Kirill A. Shutemov <kirill@shutemov.name>
+Cc: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
+Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
+Cc: Mel Gorman <mgorman@techsingularity.net>
+Cc: Michal Hocko <mhocko@suse.com>
+Cc: Mika Penttilä <mika.penttila@nextfour.com>
+Cc: Minchan Kim <minchan@kernel.org>
+Cc: Shakeel Butt <shakeelb@google.com>
+Cc: Tejun Heo <tj@kernel.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: Wei Yang <richard.weiyang@gmail.com>
+Cc: Yang Shi <yang.shi@linux.alibaba.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Stable-dep-of: 829ae0f81ce0 ("mm: migrate: fix THP's mapcount on isolation")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/page-flags.h | 1 +
+ mm/mlock.c | 3 +--
+ mm/vmscan.c | 39 +++++++++++++++++++-------------------
+ 3 files changed, 21 insertions(+), 22 deletions(-)
+
+diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
+index 4f6ba9379112..14a0cac9e099 100644
+--- a/include/linux/page-flags.h
++++ b/include/linux/page-flags.h
+@@ -335,6 +335,7 @@ PAGEFLAG(Referenced, referenced, PF_HEAD)
+ PAGEFLAG(Dirty, dirty, PF_HEAD) TESTSCFLAG(Dirty, dirty, PF_HEAD)
+ __CLEARPAGEFLAG(Dirty, dirty, PF_HEAD)
+ PAGEFLAG(LRU, lru, PF_HEAD) __CLEARPAGEFLAG(LRU, lru, PF_HEAD)
++ TESTCLEARFLAG(LRU, lru, PF_HEAD)
+ PAGEFLAG(Active, active, PF_HEAD) __CLEARPAGEFLAG(Active, active, PF_HEAD)
+ TESTCLEARFLAG(Active, active, PF_HEAD)
+ PAGEFLAG(Workingset, workingset, PF_HEAD)
+diff --git a/mm/mlock.c b/mm/mlock.c
+index d487aa864e86..7b0e6334be6f 100644
+--- a/mm/mlock.c
++++ b/mm/mlock.c
+@@ -276,10 +276,9 @@ static void __munlock_pagevec(struct pagevec *pvec, struct zone *zone)
+ * We already have pin from follow_page_mask()
+ * so we can spare the get_page() here.
+ */
+- if (PageLRU(page)) {
++ if (TestClearPageLRU(page)) {
+ struct lruvec *lruvec;
+
+- ClearPageLRU(page);
+ lruvec = mem_cgroup_page_lruvec(page,
+ page_pgdat(page));
+ del_page_from_lru_list(page, lruvec,
+diff --git a/mm/vmscan.c b/mm/vmscan.c
+index 51ccd80e70b6..8d62eedfc794 100644
+--- a/mm/vmscan.c
++++ b/mm/vmscan.c
+@@ -1547,7 +1547,7 @@ unsigned int reclaim_clean_pages_from_list(struct zone *zone,
+ */
+ int __isolate_lru_page(struct page *page, isolate_mode_t mode)
+ {
+- int ret = -EINVAL;
++ int ret = -EBUSY;
+
+ /* Only take pages on the LRU. */
+ if (!PageLRU(page))
+@@ -1557,8 +1557,6 @@ int __isolate_lru_page(struct page *page, isolate_mode_t mode)
+ if (PageUnevictable(page) && !(mode & ISOLATE_UNEVICTABLE))
+ return ret;
+
+- ret = -EBUSY;
+-
+ /*
+ * To minimise LRU disruption, the caller can indicate that it only
+ * wants to isolate pages it will be able to operate on without
+@@ -1605,8 +1603,10 @@ int __isolate_lru_page(struct page *page, isolate_mode_t mode)
+ * sure the page is not being freed elsewhere -- the
+ * page release code relies on it.
+ */
+- ClearPageLRU(page);
+- ret = 0;
++ if (TestClearPageLRU(page))
++ ret = 0;
++ else
++ put_page(page);
+ }
+
+ return ret;
+@@ -1672,8 +1672,6 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
+ page = lru_to_page(src);
+ prefetchw_prev_lru_page(page, src, flags);
+
+- VM_BUG_ON_PAGE(!PageLRU(page), page);
+-
+ nr_pages = compound_nr(page);
+ total_scan += nr_pages;
+
+@@ -1770,21 +1768,18 @@ int isolate_lru_page(struct page *page)
+ VM_BUG_ON_PAGE(!page_count(page), page);
+ WARN_RATELIMIT(PageTail(page), "trying to isolate tail page");
+
+- if (PageLRU(page)) {
++ if (TestClearPageLRU(page)) {
+ pg_data_t *pgdat = page_pgdat(page);
+ struct lruvec *lruvec;
+
+- spin_lock_irq(&pgdat->lru_lock);
++ get_page(page);
+ lruvec = mem_cgroup_page_lruvec(page, pgdat);
+- if (PageLRU(page)) {
+- int lru = page_lru(page);
+- get_page(page);
+- ClearPageLRU(page);
+- del_page_from_lru_list(page, lruvec, lru);
+- ret = 0;
+- }
++ spin_lock_irq(&pgdat->lru_lock);
++ del_page_from_lru_list(page, lruvec, page_lru(page));
+ spin_unlock_irq(&pgdat->lru_lock);
++ ret = 0;
+ }
++
+ return ret;
+ }
+
+@@ -4291,6 +4286,10 @@ void check_move_unevictable_pages(struct pagevec *pvec)
+ nr_pages = thp_nr_pages(page);
+ pgscanned += nr_pages;
+
++ /* block memcg migration during page moving between lru */
++ if (!TestClearPageLRU(page))
++ continue;
++
+ if (pagepgdat != pgdat) {
+ if (pgdat)
+ spin_unlock_irq(&pgdat->lru_lock);
+@@ -4299,10 +4298,7 @@ void check_move_unevictable_pages(struct pagevec *pvec)
+ }
+ lruvec = mem_cgroup_page_lruvec(page, pgdat);
+
+- if (!PageLRU(page) || !PageUnevictable(page))
+- continue;
+-
+- if (page_evictable(page)) {
++ if (page_evictable(page) && PageUnevictable(page)) {
+ enum lru_list lru = page_lru_base_type(page);
+
+ VM_BUG_ON_PAGE(PageActive(page), page);
+@@ -4311,12 +4307,15 @@ void check_move_unevictable_pages(struct pagevec *pvec)
+ add_page_to_lru_list(page, lruvec, lru);
+ pgrescued += nr_pages;
+ }
++ SetPageLRU(page);
+ }
+
+ if (pgdat) {
+ __count_vm_events(UNEVICTABLE_PGRESCUED, pgrescued);
+ __count_vm_events(UNEVICTABLE_PGSCANNED, pgscanned);
+ spin_unlock_irq(&pgdat->lru_lock);
++ } else if (pgscanned) {
++ count_vm_events(UNEVICTABLE_PGSCANNED, pgscanned);
+ }
+ }
+ EXPORT_SYMBOL_GPL(check_move_unevictable_pages);
+--
+2.35.1
+
--- /dev/null
+From 91f25a9aa0bb126c81ed361cef0f8608ac4c3f15 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 24 Nov 2022 17:55:23 +0800
+Subject: mm: migrate: fix THP's mapcount on isolation
+
+From: Gavin Shan <gshan@redhat.com>
+
+[ Upstream commit 829ae0f81ce093d674ff2256f66a714753e9ce32 ]
+
+The issue is reported when removing memory through virtio_mem device. The
+transparent huge page, experienced copy-on-write fault, is wrongly
+regarded as pinned. The transparent huge page is escaped from being
+isolated in isolate_migratepages_block(). The transparent huge page can't
+be migrated and the corresponding memory block can't be put into offline
+state.
+
+Fix it by replacing page_mapcount() with total_mapcount(). With this, the
+transparent huge page can be isolated and migrated, and the memory block
+can be put into offline state. Besides, The page's refcount is increased
+a bit earlier to avoid the page is released when the check is executed.
+
+Link: https://lkml.kernel.org/r/20221124095523.31061-1-gshan@redhat.com
+Fixes: 1da2f328fa64 ("mm,thp,compaction,cma: allow THP migration for CMA allocations")
+Signed-off-by: Gavin Shan <gshan@redhat.com>
+Reported-by: Zhenyu Zhang <zhenyzha@redhat.com>
+Tested-by: Zhenyu Zhang <zhenyzha@redhat.com>
+Suggested-by: David Hildenbrand <david@redhat.com>
+Acked-by: David Hildenbrand <david@redhat.com>
+Cc: Alistair Popple <apopple@nvidia.com>
+Cc: Hugh Dickins <hughd@google.com>
+Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: William Kucharski <william.kucharski@oracle.com>
+Cc: Zi Yan <ziy@nvidia.com>
+Cc: <stable@vger.kernel.org> [5.7+]
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ mm/compaction.c | 22 +++++++++++-----------
+ 1 file changed, 11 insertions(+), 11 deletions(-)
+
+diff --git a/mm/compaction.c b/mm/compaction.c
+index 57ce6b001b10..54d1041560c7 100644
+--- a/mm/compaction.c
++++ b/mm/compaction.c
+@@ -957,29 +957,29 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
+ goto isolate_fail;
+ }
+
++ /*
++ * Be careful not to clear PageLRU until after we're
++ * sure the page is not being freed elsewhere -- the
++ * page release code relies on it.
++ */
++ if (unlikely(!get_page_unless_zero(page)))
++ goto isolate_fail;
++
+ /*
+ * Migration will fail if an anonymous page is pinned in memory,
+ * so avoid taking lru_lock and isolating it unnecessarily in an
+ * admittedly racy check.
+ */
+ mapping = page_mapping(page);
+- if (!mapping && page_count(page) > page_mapcount(page))
+- goto isolate_fail;
++ if (!mapping && (page_count(page) - 1) > total_mapcount(page))
++ goto isolate_fail_put;
+
+ /*
+ * Only allow to migrate anonymous pages in GFP_NOFS context
+ * because those do not depend on fs locks.
+ */
+ if (!(cc->gfp_mask & __GFP_FS) && mapping)
+- goto isolate_fail;
+-
+- /*
+- * Be careful not to clear PageLRU until after we're
+- * sure the page is not being freed elsewhere -- the
+- * page release code relies on it.
+- */
+- if (unlikely(!get_page_unless_zero(page)))
+- goto isolate_fail;
++ goto isolate_fail_put;
+
+ /* Only take pages on LRU: a check now makes later tests safe */
+ if (!PageLRU(page))
+--
+2.35.1
+
--- /dev/null
+From 6649227e330b37c9583146cd7446b41771b3a7f1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 15 Dec 2020 12:34:11 -0800
+Subject: mm/mlock: remove __munlock_isolate_lru_page()
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Alex Shi <alex.shi@linux.alibaba.com>
+
+[ Upstream commit 13805a88a9bd3fb37f33dd8972d904de62796f3d ]
+
+__munlock_isolate_lru_page() only has one caller, remove it to clean up
+and simplify code.
+
+Link: https://lkml.kernel.org/r/1604566549-62481-14-git-send-email-alex.shi@linux.alibaba.com
+Signed-off-by: Alex Shi <alex.shi@linux.alibaba.com>
+Acked-by: Hugh Dickins <hughd@google.com>
+Acked-by: Johannes Weiner <hannes@cmpxchg.org>
+Acked-by: Vlastimil Babka <vbabka@suse.cz>
+Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Cc: Alexander Duyck <alexander.duyck@gmail.com>
+Cc: Andrea Arcangeli <aarcange@redhat.com>
+Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
+Cc: "Chen, Rong A" <rong.a.chen@intel.com>
+Cc: Daniel Jordan <daniel.m.jordan@oracle.com>
+Cc: "Huang, Ying" <ying.huang@intel.com>
+Cc: Jann Horn <jannh@google.com>
+Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
+Cc: Kirill A. Shutemov <kirill@shutemov.name>
+Cc: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
+Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
+Cc: Mel Gorman <mgorman@techsingularity.net>
+Cc: Michal Hocko <mhocko@kernel.org>
+Cc: Michal Hocko <mhocko@suse.com>
+Cc: Mika Penttilä <mika.penttila@nextfour.com>
+Cc: Minchan Kim <minchan@kernel.org>
+Cc: Shakeel Butt <shakeelb@google.com>
+Cc: Tejun Heo <tj@kernel.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
+Cc: Wei Yang <richard.weiyang@gmail.com>
+Cc: Yang Shi <yang.shi@linux.alibaba.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Stable-dep-of: 829ae0f81ce0 ("mm: migrate: fix THP's mapcount on isolation")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ mm/mlock.c | 31 +++++++++----------------------
+ 1 file changed, 9 insertions(+), 22 deletions(-)
+
+diff --git a/mm/mlock.c b/mm/mlock.c
+index 796c726a0407..d487aa864e86 100644
+--- a/mm/mlock.c
++++ b/mm/mlock.c
+@@ -105,26 +105,6 @@ void mlock_vma_page(struct page *page)
+ }
+ }
+
+-/*
+- * Isolate a page from LRU with optional get_page() pin.
+- * Assumes lru_lock already held and page already pinned.
+- */
+-static bool __munlock_isolate_lru_page(struct page *page, bool getpage)
+-{
+- if (PageLRU(page)) {
+- struct lruvec *lruvec;
+-
+- lruvec = mem_cgroup_page_lruvec(page, page_pgdat(page));
+- if (getpage)
+- get_page(page);
+- ClearPageLRU(page);
+- del_page_from_lru_list(page, lruvec, page_lru(page));
+- return true;
+- }
+-
+- return false;
+-}
+-
+ /*
+ * Finish munlock after successful page isolation
+ *
+@@ -296,9 +276,16 @@ static void __munlock_pagevec(struct pagevec *pvec, struct zone *zone)
+ * We already have pin from follow_page_mask()
+ * so we can spare the get_page() here.
+ */
+- if (__munlock_isolate_lru_page(page, false))
++ if (PageLRU(page)) {
++ struct lruvec *lruvec;
++
++ ClearPageLRU(page);
++ lruvec = mem_cgroup_page_lruvec(page,
++ page_pgdat(page));
++ del_page_from_lru_list(page, lruvec,
++ page_lru(page));
+ continue;
+- else
++ } else
+ __munlock_isolation_failed(page);
+ } else {
+ delta_munlocked++;
+--
+2.35.1
+
--- /dev/null
+From b824ddafd0a14e7a943171ce5903b83057e0c587 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 15 Dec 2020 12:34:07 -0800
+Subject: mm/mlock: remove lru_lock on TestClearPageMlocked
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Alex Shi <alex.shi@linux.alibaba.com>
+
+[ Upstream commit 3db19aa39bac33f2e850fa1ddd67be29b192e51f ]
+
+In the func munlock_vma_page, comments mentained lru_lock needed for
+serialization with split_huge_pages. But the page must be PageLocked as
+well as pages in split_huge_page series funcs. Thus the PageLocked is
+enough to serialize both funcs.
+
+Further more, Hugh Dickins pointed: before splitting in
+split_huge_page_to_list, the page was unmap_page() to remove pmd/ptes
+which protect the page from munlock. Thus, no needs to guard
+__split_huge_page_tail for mlock clean, just keep the lru_lock there for
+isolation purpose.
+
+LKP found a preempt issue on __mod_zone_page_state which need change to
+mod_zone_page_state. Thanks!
+
+Link: https://lkml.kernel.org/r/1604566549-62481-13-git-send-email-alex.shi@linux.alibaba.com
+Signed-off-by: Alex Shi <alex.shi@linux.alibaba.com>
+Acked-by: Hugh Dickins <hughd@google.com>
+Acked-by: Johannes Weiner <hannes@cmpxchg.org>
+Acked-by: Vlastimil Babka <vbabka@suse.cz>
+Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Cc: Alexander Duyck <alexander.duyck@gmail.com>
+Cc: Andrea Arcangeli <aarcange@redhat.com>
+Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
+Cc: "Chen, Rong A" <rong.a.chen@intel.com>
+Cc: Daniel Jordan <daniel.m.jordan@oracle.com>
+Cc: "Huang, Ying" <ying.huang@intel.com>
+Cc: Jann Horn <jannh@google.com>
+Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
+Cc: Kirill A. Shutemov <kirill@shutemov.name>
+Cc: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
+Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
+Cc: Mel Gorman <mgorman@techsingularity.net>
+Cc: Michal Hocko <mhocko@kernel.org>
+Cc: Michal Hocko <mhocko@suse.com>
+Cc: Mika Penttilä <mika.penttila@nextfour.com>
+Cc: Minchan Kim <minchan@kernel.org>
+Cc: Shakeel Butt <shakeelb@google.com>
+Cc: Tejun Heo <tj@kernel.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
+Cc: Wei Yang <richard.weiyang@gmail.com>
+Cc: Yang Shi <yang.shi@linux.alibaba.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Stable-dep-of: 829ae0f81ce0 ("mm: migrate: fix THP's mapcount on isolation")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ mm/mlock.c | 26 +++++---------------------
+ 1 file changed, 5 insertions(+), 21 deletions(-)
+
+diff --git a/mm/mlock.c b/mm/mlock.c
+index 884b1216da6a..796c726a0407 100644
+--- a/mm/mlock.c
++++ b/mm/mlock.c
+@@ -187,40 +187,24 @@ static void __munlock_isolation_failed(struct page *page)
+ unsigned int munlock_vma_page(struct page *page)
+ {
+ int nr_pages;
+- pg_data_t *pgdat = page_pgdat(page);
+
+ /* For try_to_munlock() and to serialize with page migration */
+ BUG_ON(!PageLocked(page));
+-
+ VM_BUG_ON_PAGE(PageTail(page), page);
+
+- /*
+- * Serialize with any parallel __split_huge_page_refcount() which
+- * might otherwise copy PageMlocked to part of the tail pages before
+- * we clear it in the head page. It also stabilizes thp_nr_pages().
+- */
+- spin_lock_irq(&pgdat->lru_lock);
+-
+ if (!TestClearPageMlocked(page)) {
+ /* Potentially, PTE-mapped THP: do not skip the rest PTEs */
+- nr_pages = 1;
+- goto unlock_out;
++ return 0;
+ }
+
+ nr_pages = thp_nr_pages(page);
+- __mod_zone_page_state(page_zone(page), NR_MLOCK, -nr_pages);
++ mod_zone_page_state(page_zone(page), NR_MLOCK, -nr_pages);
+
+- if (__munlock_isolate_lru_page(page, true)) {
+- spin_unlock_irq(&pgdat->lru_lock);
++ if (!isolate_lru_page(page))
+ __munlock_isolated_page(page);
+- goto out;
+- }
+- __munlock_isolation_failed(page);
+-
+-unlock_out:
+- spin_unlock_irq(&pgdat->lru_lock);
++ else
++ __munlock_isolation_failed(page);
+
+-out:
+ return nr_pages - 1;
+ }
+
+--
+2.35.1
+
--- /dev/null
+From af78db2daeeeec6283747a8d591daf6df57e1961 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 24 Feb 2021 12:08:01 -0800
+Subject: mm/vmscan: __isolate_lru_page_prepare() cleanup
+
+From: Alex Shi <alex.shi@linux.alibaba.com>
+
+[ Upstream commit c2135f7c570bc274035834848d9bf46ea89ba763 ]
+
+The function just returns 2 results, so using a 'switch' to deal with its
+result is unnecessary. Also simplify it to a bool func as Vlastimil
+suggested.
+
+Also remove 'goto' by reusing list_move(), and take Matthew Wilcox's
+suggestion to update comments in function.
+
+Link: https://lkml.kernel.org/r/728874d7-2d93-4049-68c1-dcc3b2d52ccd@linux.alibaba.com
+Signed-off-by: Alex Shi <alex.shi@linux.alibaba.com>
+Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
+Acked-by: Vlastimil Babka <vbabka@suse.cz>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: Hugh Dickins <hughd@google.com>
+Cc: Yu Zhao <yuzhao@google.com>
+Cc: Michal Hocko <mhocko@suse.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Stable-dep-of: 829ae0f81ce0 ("mm: migrate: fix THP's mapcount on isolation")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/swap.h | 2 +-
+ mm/compaction.c | 2 +-
+ mm/vmscan.c | 68 ++++++++++++++++++++------------------------
+ 3 files changed, 33 insertions(+), 39 deletions(-)
+
+diff --git a/include/linux/swap.h b/include/linux/swap.h
+index 3577d3a6ec37..394d5de5d4b4 100644
+--- a/include/linux/swap.h
++++ b/include/linux/swap.h
+@@ -358,7 +358,7 @@ extern void lru_cache_add_inactive_or_unevictable(struct page *page,
+ extern unsigned long zone_reclaimable_pages(struct zone *zone);
+ extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
+ gfp_t gfp_mask, nodemask_t *mask);
+-extern int __isolate_lru_page_prepare(struct page *page, isolate_mode_t mode);
++extern bool __isolate_lru_page_prepare(struct page *page, isolate_mode_t mode);
+ extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
+ unsigned long nr_pages,
+ gfp_t gfp_mask,
+diff --git a/mm/compaction.c b/mm/compaction.c
+index ba3e907f03b7..ea46aadc7c21 100644
+--- a/mm/compaction.c
++++ b/mm/compaction.c
+@@ -980,7 +980,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
+ if (unlikely(!get_page_unless_zero(page)))
+ goto isolate_fail;
+
+- if (__isolate_lru_page_prepare(page, isolate_mode) != 0)
++ if (!__isolate_lru_page_prepare(page, isolate_mode))
+ goto isolate_fail_put;
+
+ /* Try isolate the page */
+diff --git a/mm/vmscan.c b/mm/vmscan.c
+index 5ada402c8d95..00a47845a15b 100644
+--- a/mm/vmscan.c
++++ b/mm/vmscan.c
+@@ -1543,19 +1543,17 @@ unsigned int reclaim_clean_pages_from_list(struct zone *zone,
+ * page: page to consider
+ * mode: one of the LRU isolation modes defined above
+ *
+- * returns 0 on success, -ve errno on failure.
++ * returns true on success, false on failure.
+ */
+-int __isolate_lru_page_prepare(struct page *page, isolate_mode_t mode)
++bool __isolate_lru_page_prepare(struct page *page, isolate_mode_t mode)
+ {
+- int ret = -EBUSY;
+-
+ /* Only take pages on the LRU. */
+ if (!PageLRU(page))
+- return ret;
++ return false;
+
+ /* Compaction should not handle unevictable pages but CMA can do so */
+ if (PageUnevictable(page) && !(mode & ISOLATE_UNEVICTABLE))
+- return ret;
++ return false;
+
+ /*
+ * To minimise LRU disruption, the caller can indicate that it only
+@@ -1568,7 +1566,7 @@ int __isolate_lru_page_prepare(struct page *page, isolate_mode_t mode)
+ if (mode & ISOLATE_ASYNC_MIGRATE) {
+ /* All the caller can do on PageWriteback is block */
+ if (PageWriteback(page))
+- return ret;
++ return false;
+
+ if (PageDirty(page)) {
+ struct address_space *mapping;
+@@ -1584,20 +1582,20 @@ int __isolate_lru_page_prepare(struct page *page, isolate_mode_t mode)
+ * from the page cache.
+ */
+ if (!trylock_page(page))
+- return ret;
++ return false;
+
+ mapping = page_mapping(page);
+ migrate_dirty = !mapping || mapping->a_ops->migratepage;
+ unlock_page(page);
+ if (!migrate_dirty)
+- return ret;
++ return false;
+ }
+ }
+
+ if ((mode & ISOLATE_UNMAPPED) && page_mapped(page))
+- return ret;
++ return false;
+
+- return 0;
++ return true;
+ }
+
+ /*
+@@ -1679,35 +1677,31 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
+ * only when the page is being freed somewhere else.
+ */
+ scan += nr_pages;
+- switch (__isolate_lru_page_prepare(page, mode)) {
+- case 0:
+- /*
+- * Be careful not to clear PageLRU until after we're
+- * sure the page is not being freed elsewhere -- the
+- * page release code relies on it.
+- */
+- if (unlikely(!get_page_unless_zero(page)))
+- goto busy;
+-
+- if (!TestClearPageLRU(page)) {
+- /*
+- * This page may in other isolation path,
+- * but we still hold lru_lock.
+- */
+- put_page(page);
+- goto busy;
+- }
+-
+- nr_taken += nr_pages;
+- nr_zone_taken[page_zonenum(page)] += nr_pages;
+- list_move(&page->lru, dst);
+- break;
++ if (!__isolate_lru_page_prepare(page, mode)) {
++ /* It is being freed elsewhere */
++ list_move(&page->lru, src);
++ continue;
++ }
++ /*
++ * Be careful not to clear PageLRU until after we're
++ * sure the page is not being freed elsewhere -- the
++ * page release code relies on it.
++ */
++ if (unlikely(!get_page_unless_zero(page))) {
++ list_move(&page->lru, src);
++ continue;
++ }
+
+- default:
+-busy:
+- /* else it is being freed elsewhere */
++ if (!TestClearPageLRU(page)) {
++ /* Another thread is already isolating this page */
++ put_page(page);
+ list_move(&page->lru, src);
++ continue;
+ }
++
++ nr_taken += nr_pages;
++ nr_zone_taken[page_zonenum(page)] += nr_pages;
++ list_move(&page->lru, dst);
+ }
+
+ /*
+--
+2.35.1
+
--- /dev/null
+From f7759205e536a72dd8f59d500166f51408c42e0f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 21 Nov 2022 13:54:55 +0100
+Subject: net: usb: qmi_wwan: add u-blox 0x1342 composition
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Davide Tronchin <davide.tronchin.94@gmail.com>
+
+[ Upstream commit a487069e11b6527373f7c6f435d8998051d0b5d9 ]
+
+Add RmNet support for LARA-L6.
+
+LARA-L6 module can be configured (by AT interface) in three different
+USB modes:
+* Default mode (Vendor ID: 0x1546 Product ID: 0x1341) with 4 serial
+interfaces
+* RmNet mode (Vendor ID: 0x1546 Product ID: 0x1342) with 4 serial
+interfaces and 1 RmNet virtual network interface
+* CDC-ECM mode (Vendor ID: 0x1546 Product ID: 0x1343) with 4 serial
+interface and 1 CDC-ECM virtual network interface
+
+In RmNet mode LARA-L6 exposes the following interfaces:
+If 0: Diagnostic
+If 1: AT parser
+If 2: AT parser
+If 3: AT parset/alternative functions
+If 4: RMNET interface
+
+Signed-off-by: Davide Tronchin <davide.tronchin.94@gmail.com>
+Acked-by: Bjørn Mork <bjorn@mork.no>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/usb/qmi_wwan.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c
+index 7313e6e03c12..bce151e3706a 100644
+--- a/drivers/net/usb/qmi_wwan.c
++++ b/drivers/net/usb/qmi_wwan.c
+@@ -1352,6 +1352,7 @@ static const struct usb_device_id products[] = {
+ {QMI_FIXED_INTF(0x0489, 0xe0b4, 0)}, /* Foxconn T77W968 LTE */
+ {QMI_FIXED_INTF(0x0489, 0xe0b5, 0)}, /* Foxconn T77W968 LTE with eSIM support*/
+ {QMI_FIXED_INTF(0x2692, 0x9025, 4)}, /* Cellient MPL200 (rebranded Qualcomm 05c6:9025) */
++ {QMI_QUIRK_SET_DTR(0x1546, 0x1342, 4)}, /* u-blox LARA-L6 */
+
+ /* 4. Gobi 1000 devices */
+ {QMI_GOBI1K_DEVICE(0x05c6, 0x9212)}, /* Acer Gobi Modem Device */
+--
+2.35.1
+
--- /dev/null
+From 19b51875859c93759b9e767950f8d2b937384249 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 18 Nov 2022 14:10:35 +0100
+Subject: regulator: slg51000: Wait after asserting CS pin
+
+From: Konrad Dybcio <konrad.dybcio@linaro.org>
+
+[ Upstream commit 0b24dfa587c6cc7484cfb170da5c7dd73451f670 ]
+
+Sony's downstream driver [1], among some other changes, adds a
+seemingly random 10ms usleep_range, which turned out to be necessary
+for the hardware to function properly on at least Sony Xperia 1 IV.
+Without this, I2C transactions with the SLG51000 straight up fail.
+
+Relax (10-10ms -> 10-11ms) and add the aforementioned sleep to make
+sure the hardware has some time to wake up.
+
+(nagara-2.0.0-mlc/vendor/semc/hardware/camera-kernel-module/)
+[1] https://developer.sony.com/file/download/open-source-archive-for-64-0-m-4-29/
+
+Signed-off-by: Konrad Dybcio <konrad.dybcio@linaro.org>
+Link: https://lore.kernel.org/r/20221118131035.54874-1-konrad.dybcio@linaro.org
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/regulator/slg51000-regulator.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/drivers/regulator/slg51000-regulator.c b/drivers/regulator/slg51000-regulator.c
+index 75a941fb3c2b..1b2eee95ad3f 100644
+--- a/drivers/regulator/slg51000-regulator.c
++++ b/drivers/regulator/slg51000-regulator.c
+@@ -457,6 +457,8 @@ static int slg51000_i2c_probe(struct i2c_client *client)
+ chip->cs_gpiod = cs_gpiod;
+ }
+
++ usleep_range(10000, 11000);
++
+ i2c_set_clientdata(client, chip);
+ chip->chip_irq = client->irq;
+ chip->dev = dev;
+--
+2.35.1
+
--- /dev/null
+From 6d822387a0acf36069221010ef3e9a99131f4167 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 20 Nov 2022 23:12:08 +0100
+Subject: regulator: twl6030: fix get status of twl6032 regulators
+
+From: Andreas Kemnade <andreas@kemnade.info>
+
+[ Upstream commit 31a6297b89aabc81b274c093a308a7f5b55081a7 ]
+
+Status is reported as always off in the 6032 case. Status
+reporting now matches the logic in the setters. Once of
+the differences to the 6030 is that there are no groups,
+therefore the state needs to be read out in the lower bits.
+
+Signed-off-by: Andreas Kemnade <andreas@kemnade.info>
+Link: https://lore.kernel.org/r/20221120221208.3093727-3-andreas@kemnade.info
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/regulator/twl6030-regulator.c | 15 +++++++++++----
+ 1 file changed, 11 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/regulator/twl6030-regulator.c b/drivers/regulator/twl6030-regulator.c
+index 7c7e3648ea4b..f3856750944f 100644
+--- a/drivers/regulator/twl6030-regulator.c
++++ b/drivers/regulator/twl6030-regulator.c
+@@ -67,6 +67,7 @@ struct twlreg_info {
+ #define TWL6030_CFG_STATE_SLEEP 0x03
+ #define TWL6030_CFG_STATE_GRP_SHIFT 5
+ #define TWL6030_CFG_STATE_APP_SHIFT 2
++#define TWL6030_CFG_STATE_MASK 0x03
+ #define TWL6030_CFG_STATE_APP_MASK (0x03 << TWL6030_CFG_STATE_APP_SHIFT)
+ #define TWL6030_CFG_STATE_APP(v) (((v) & TWL6030_CFG_STATE_APP_MASK) >>\
+ TWL6030_CFG_STATE_APP_SHIFT)
+@@ -128,13 +129,14 @@ static int twl6030reg_is_enabled(struct regulator_dev *rdev)
+ if (grp < 0)
+ return grp;
+ grp &= P1_GRP_6030;
++ val = twlreg_read(info, TWL_MODULE_PM_RECEIVER, VREG_STATE);
++ val = TWL6030_CFG_STATE_APP(val);
+ } else {
++ val = twlreg_read(info, TWL_MODULE_PM_RECEIVER, VREG_STATE);
++ val &= TWL6030_CFG_STATE_MASK;
+ grp = 1;
+ }
+
+- val = twlreg_read(info, TWL_MODULE_PM_RECEIVER, VREG_STATE);
+- val = TWL6030_CFG_STATE_APP(val);
+-
+ return grp && (val == TWL6030_CFG_STATE_ON);
+ }
+
+@@ -187,7 +189,12 @@ static int twl6030reg_get_status(struct regulator_dev *rdev)
+
+ val = twlreg_read(info, TWL_MODULE_PM_RECEIVER, VREG_STATE);
+
+- switch (TWL6030_CFG_STATE_APP(val)) {
++ if (info->features & TWL6032_SUBCLASS)
++ val &= TWL6030_CFG_STATE_MASK;
++ else
++ val = TWL6030_CFG_STATE_APP(val);
++
++ switch (val) {
+ case TWL6030_CFG_STATE_ON:
+ return REGULATOR_STATUS_NORMAL;
+
+--
+2.35.1
+
--- /dev/null
+From 7a3b77a291c1fb84974f59d34a2d5cf044f52d8f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 10 Dec 2021 21:01:25 +0100
+Subject: rtc: Check return value from mc146818_get_time()
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Mateusz Jończyk <mat.jonczyk@o2.pl>
+
+[ Upstream commit 0dd8d6cb9eddfe637bcd821bbfd40ebd5a0737b9 ]
+
+There are 4 users of mc146818_get_time() and none of them was checking
+the return value from this function. Change this.
+
+Print the appropriate warnings in callers of mc146818_get_time() instead
+of in the function mc146818_get_time() itself, in order not to add
+strings to rtc-mc146818-lib.c, which is kind of a library.
+
+The callers of alpha_rtc_read_time() and cmos_read_time() may use the
+contents of (struct rtc_time *) even when the functions return a failure
+code. Therefore, set the contents of (struct rtc_time *) to 0x00,
+which looks more sensible then 0xff and aligns with the (possibly
+stale?) comment in cmos_read_time:
+
+ /*
+ * If pm_trace abused the RTC for storage, set the timespec to 0,
+ * which tells the caller that this RTC value is unusable.
+ */
+
+For consistency, do this in mc146818_get_time().
+
+Note: hpet_rtc_interrupt() may call mc146818_get_time() many times a
+second. It is very unlikely, though, that the RTC suddenly stops
+working and mc146818_get_time() would consistently fail.
+
+Only compile-tested on alpha.
+
+Signed-off-by: Mateusz Jończyk <mat.jonczyk@o2.pl>
+Cc: Richard Henderson <rth@twiddle.net>
+Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru>
+Cc: Matt Turner <mattst88@gmail.com>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: Ingo Molnar <mingo@redhat.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: Alessandro Zummo <a.zummo@towertech.it>
+Cc: Alexandre Belloni <alexandre.belloni@bootlin.com>
+Cc: linux-alpha@vger.kernel.org
+Cc: x86@kernel.org
+Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
+Link: https://lore.kernel.org/r/20211210200131.153887-4-mat.jonczyk@o2.pl
+Stable-dep-of: cd17420ebea5 ("rtc: cmos: avoid UIP when writing alarm time")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/alpha/kernel/rtc.c | 7 ++++++-
+ arch/x86/kernel/hpet.c | 8 ++++++--
+ drivers/base/power/trace.c | 6 +++++-
+ drivers/rtc/rtc-cmos.c | 9 ++++++++-
+ drivers/rtc/rtc-mc146818-lib.c | 2 +-
+ 5 files changed, 26 insertions(+), 6 deletions(-)
+
+diff --git a/arch/alpha/kernel/rtc.c b/arch/alpha/kernel/rtc.c
+index 1b1d5963ac55..48ffbfbd0624 100644
+--- a/arch/alpha/kernel/rtc.c
++++ b/arch/alpha/kernel/rtc.c
+@@ -80,7 +80,12 @@ init_rtc_epoch(void)
+ static int
+ alpha_rtc_read_time(struct device *dev, struct rtc_time *tm)
+ {
+- mc146818_get_time(tm);
++ int ret = mc146818_get_time(tm);
++
++ if (ret < 0) {
++ dev_err_ratelimited(dev, "unable to read current time\n");
++ return ret;
++ }
+
+ /* Adjust for non-default epochs. It's easier to depend on the
+ generic __get_rtc_time and adjust the epoch here than create
+diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
+index 4ab7a9757e52..574df24a8e5a 100644
+--- a/arch/x86/kernel/hpet.c
++++ b/arch/x86/kernel/hpet.c
+@@ -1325,8 +1325,12 @@ irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id)
+ hpet_rtc_timer_reinit();
+ memset(&curr_time, 0, sizeof(struct rtc_time));
+
+- if (hpet_rtc_flags & (RTC_UIE | RTC_AIE))
+- mc146818_get_time(&curr_time);
++ if (hpet_rtc_flags & (RTC_UIE | RTC_AIE)) {
++ if (unlikely(mc146818_get_time(&curr_time) < 0)) {
++ pr_err_ratelimited("unable to read current time from RTC\n");
++ return IRQ_HANDLED;
++ }
++ }
+
+ if (hpet_rtc_flags & RTC_UIE &&
+ curr_time.tm_sec != hpet_prev_update_sec) {
+diff --git a/drivers/base/power/trace.c b/drivers/base/power/trace.c
+index 94665037f4a3..72b7a92337b1 100644
+--- a/drivers/base/power/trace.c
++++ b/drivers/base/power/trace.c
+@@ -120,7 +120,11 @@ static unsigned int read_magic_time(void)
+ struct rtc_time time;
+ unsigned int val;
+
+- mc146818_get_time(&time);
++ if (mc146818_get_time(&time) < 0) {
++ pr_err("Unable to read current time from RTC\n");
++ return 0;
++ }
++
+ pr_info("RTC time: %ptRt, date: %ptRd\n", &time, &time);
+ val = time.tm_year; /* 100 years */
+ if (val > 100)
+diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c
+index ed4f512eabf0..f8358bb2ae31 100644
+--- a/drivers/rtc/rtc-cmos.c
++++ b/drivers/rtc/rtc-cmos.c
+@@ -222,6 +222,8 @@ static inline void cmos_write_bank2(unsigned char val, unsigned char addr)
+
+ static int cmos_read_time(struct device *dev, struct rtc_time *t)
+ {
++ int ret;
++
+ /*
+ * If pm_trace abused the RTC for storage, set the timespec to 0,
+ * which tells the caller that this RTC value is unusable.
+@@ -229,7 +231,12 @@ static int cmos_read_time(struct device *dev, struct rtc_time *t)
+ if (!pm_trace_rtc_valid())
+ return -EIO;
+
+- mc146818_get_time(t);
++ ret = mc146818_get_time(t);
++ if (ret < 0) {
++ dev_err_ratelimited(dev, "unable to read current time\n");
++ return ret;
++ }
++
+ return 0;
+ }
+
+diff --git a/drivers/rtc/rtc-mc146818-lib.c b/drivers/rtc/rtc-mc146818-lib.c
+index 6262f0680f13..3ae5c690f22b 100644
+--- a/drivers/rtc/rtc-mc146818-lib.c
++++ b/drivers/rtc/rtc-mc146818-lib.c
+@@ -24,7 +24,7 @@ unsigned int mc146818_get_time(struct rtc_time *time)
+ /* Ensure that the RTC is accessible. Bit 6 must be 0! */
+ if (WARN_ON_ONCE((CMOS_READ(RTC_VALID) & 0x40) != 0)) {
+ spin_unlock_irqrestore(&rtc_lock, flags);
+- memset(time, 0xff, sizeof(*time));
++ memset(time, 0, sizeof(*time));
+ return -EIO;
+ }
+
+--
+2.35.1
+
--- /dev/null
+From fcac9c587c7c5343a87cdce5d46cd47ca1057c21 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 10 Dec 2021 21:01:30 +0100
+Subject: rtc: cmos: avoid UIP when reading alarm time
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Mateusz Jończyk <mat.jonczyk@o2.pl>
+
+[ Upstream commit cdedc45c579faf8cc6608d3ef81576ee0d512aa4 ]
+
+Some Intel chipsets disconnect the time and date RTC registers when the
+clock update is in progress: during this time reads may return bogus
+values and writes fail silently. This includes the RTC alarm registers.
+[1]
+
+cmos_read_alarm() did not take account for that, which caused alarm time
+reads to sometimes return bogus values. This can be shown with a test
+patch that I am attaching to this patch series.
+
+Fix this, by using mc146818_avoid_UIP().
+
+[1] 7th Generation Intel ® Processor Family I/O for U/Y Platforms [...]
+Datasheet, Volume 1 of 2 (Intel's Document Number: 334658-006)
+Page 208
+https://www.intel.com/content/dam/www/public/us/en/documents/datasheets/7th-and-8th-gen-core-family-mobile-u-y-processor-lines-i-o-datasheet-vol-1.pdf
+ "If a RAM read from the ten time and date bytes is attempted
+ during an update cycle, the value read do not necessarily
+ represent the true contents of those locations. Any RAM writes
+ under the same conditions are ignored."
+
+Signed-off-by: Mateusz Jończyk <mat.jonczyk@o2.pl>
+Cc: Alessandro Zummo <a.zummo@towertech.it>
+Cc: Alexandre Belloni <alexandre.belloni@bootlin.com>
+Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
+Link: https://lore.kernel.org/r/20211210200131.153887-9-mat.jonczyk@o2.pl
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/rtc/rtc-cmos.c | 72 ++++++++++++++++++++++++++++--------------
+ 1 file changed, 49 insertions(+), 23 deletions(-)
+
+diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c
+index 601e3967e1f0..d419eb988b22 100644
+--- a/drivers/rtc/rtc-cmos.c
++++ b/drivers/rtc/rtc-cmos.c
+@@ -249,10 +249,46 @@ static int cmos_set_time(struct device *dev, struct rtc_time *t)
+ return mc146818_set_time(t);
+ }
+
++struct cmos_read_alarm_callback_param {
++ struct cmos_rtc *cmos;
++ struct rtc_time *time;
++ unsigned char rtc_control;
++};
++
++static void cmos_read_alarm_callback(unsigned char __always_unused seconds,
++ void *param_in)
++{
++ struct cmos_read_alarm_callback_param *p =
++ (struct cmos_read_alarm_callback_param *)param_in;
++ struct rtc_time *time = p->time;
++
++ time->tm_sec = CMOS_READ(RTC_SECONDS_ALARM);
++ time->tm_min = CMOS_READ(RTC_MINUTES_ALARM);
++ time->tm_hour = CMOS_READ(RTC_HOURS_ALARM);
++
++ if (p->cmos->day_alrm) {
++ /* ignore upper bits on readback per ACPI spec */
++ time->tm_mday = CMOS_READ(p->cmos->day_alrm) & 0x3f;
++ if (!time->tm_mday)
++ time->tm_mday = -1;
++
++ if (p->cmos->mon_alrm) {
++ time->tm_mon = CMOS_READ(p->cmos->mon_alrm);
++ if (!time->tm_mon)
++ time->tm_mon = -1;
++ }
++ }
++
++ p->rtc_control = CMOS_READ(RTC_CONTROL);
++}
++
+ static int cmos_read_alarm(struct device *dev, struct rtc_wkalrm *t)
+ {
+ struct cmos_rtc *cmos = dev_get_drvdata(dev);
+- unsigned char rtc_control;
++ struct cmos_read_alarm_callback_param p = {
++ .cmos = cmos,
++ .time = &t->time,
++ };
+
+ /* This not only a rtc_op, but also called directly */
+ if (!is_valid_irq(cmos->irq))
+@@ -263,28 +299,18 @@ static int cmos_read_alarm(struct device *dev, struct rtc_wkalrm *t)
+ * the future.
+ */
+
+- spin_lock_irq(&rtc_lock);
+- t->time.tm_sec = CMOS_READ(RTC_SECONDS_ALARM);
+- t->time.tm_min = CMOS_READ(RTC_MINUTES_ALARM);
+- t->time.tm_hour = CMOS_READ(RTC_HOURS_ALARM);
+-
+- if (cmos->day_alrm) {
+- /* ignore upper bits on readback per ACPI spec */
+- t->time.tm_mday = CMOS_READ(cmos->day_alrm) & 0x3f;
+- if (!t->time.tm_mday)
+- t->time.tm_mday = -1;
+-
+- if (cmos->mon_alrm) {
+- t->time.tm_mon = CMOS_READ(cmos->mon_alrm);
+- if (!t->time.tm_mon)
+- t->time.tm_mon = -1;
+- }
+- }
+-
+- rtc_control = CMOS_READ(RTC_CONTROL);
+- spin_unlock_irq(&rtc_lock);
++ /* Some Intel chipsets disconnect the alarm registers when the clock
++ * update is in progress - during this time reads return bogus values
++ * and writes may fail silently. See for example "7th Generation Intel®
++ * Processor Family I/O for U/Y Platforms [...] Datasheet", section
++ * 27.7.1
++ *
++ * Use the mc146818_avoid_UIP() function to avoid this.
++ */
++ if (!mc146818_avoid_UIP(cmos_read_alarm_callback, &p))
++ return -EIO;
+
+- if (!(rtc_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
++ if (!(p.rtc_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
+ if (((unsigned)t->time.tm_sec) < 0x60)
+ t->time.tm_sec = bcd2bin(t->time.tm_sec);
+ else
+@@ -313,7 +339,7 @@ static int cmos_read_alarm(struct device *dev, struct rtc_wkalrm *t)
+ }
+ }
+
+- t->enabled = !!(rtc_control & RTC_AIE);
++ t->enabled = !!(p.rtc_control & RTC_AIE);
+ t->pending = 0;
+
+ return 0;
+--
+2.35.1
+
--- /dev/null
+From 84f76456ea301647d1e114c5f17b16c62b8d588f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 10 Dec 2021 21:01:31 +0100
+Subject: rtc: cmos: avoid UIP when writing alarm time
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Mateusz Jończyk <mat.jonczyk@o2.pl>
+
+[ Upstream commit cd17420ebea580c22dd3a93f7237de3d2cfafc37 ]
+
+Some Intel chipsets disconnect the time and date RTC registers when the
+clock update is in progress: during this time reads may return bogus
+values and writes fail silently. This includes the RTC alarm registers.
+[1]
+
+cmos_set_alarm() did not take account for that, fix it.
+
+[1] 7th Generation Intel ® Processor Family I/O for U/Y Platforms [...]
+Datasheet, Volume 1 of 2 (Intel's Document Number: 334658-006)
+Page 208
+https://www.intel.com/content/dam/www/public/us/en/documents/datasheets/7th-and-8th-gen-core-family-mobile-u-y-processor-lines-i-o-datasheet-vol-1.pdf
+ "If a RAM read from the ten time and date bytes is attempted
+ during an update cycle, the value read do not necessarily
+ represent the true contents of those locations. Any RAM writes
+ under the same conditions are ignored."
+
+Signed-off-by: Mateusz Jończyk <mat.jonczyk@o2.pl>
+Cc: Alessandro Zummo <a.zummo@towertech.it>
+Cc: Alexandre Belloni <alexandre.belloni@bootlin.com>
+Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
+Link: https://lore.kernel.org/r/20211210200131.153887-10-mat.jonczyk@o2.pl
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/rtc/rtc-cmos.c | 107 +++++++++++++++++++++++++----------------
+ 1 file changed, 66 insertions(+), 41 deletions(-)
+
+diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c
+index 93ffb9eaf63a..601e3967e1f0 100644
+--- a/drivers/rtc/rtc-cmos.c
++++ b/drivers/rtc/rtc-cmos.c
+@@ -444,10 +444,57 @@ static int cmos_validate_alarm(struct device *dev, struct rtc_wkalrm *t)
+ return 0;
+ }
+
++struct cmos_set_alarm_callback_param {
++ struct cmos_rtc *cmos;
++ unsigned char mon, mday, hrs, min, sec;
++ struct rtc_wkalrm *t;
++};
++
++/* Note: this function may be executed by mc146818_avoid_UIP() more then
++ * once
++ */
++static void cmos_set_alarm_callback(unsigned char __always_unused seconds,
++ void *param_in)
++{
++ struct cmos_set_alarm_callback_param *p =
++ (struct cmos_set_alarm_callback_param *)param_in;
++
++ /* next rtc irq must not be from previous alarm setting */
++ cmos_irq_disable(p->cmos, RTC_AIE);
++
++ /* update alarm */
++ CMOS_WRITE(p->hrs, RTC_HOURS_ALARM);
++ CMOS_WRITE(p->min, RTC_MINUTES_ALARM);
++ CMOS_WRITE(p->sec, RTC_SECONDS_ALARM);
++
++ /* the system may support an "enhanced" alarm */
++ if (p->cmos->day_alrm) {
++ CMOS_WRITE(p->mday, p->cmos->day_alrm);
++ if (p->cmos->mon_alrm)
++ CMOS_WRITE(p->mon, p->cmos->mon_alrm);
++ }
++
++ if (use_hpet_alarm()) {
++ /*
++ * FIXME the HPET alarm glue currently ignores day_alrm
++ * and mon_alrm ...
++ */
++ hpet_set_alarm_time(p->t->time.tm_hour, p->t->time.tm_min,
++ p->t->time.tm_sec);
++ }
++
++ if (p->t->enabled)
++ cmos_irq_enable(p->cmos, RTC_AIE);
++}
++
+ static int cmos_set_alarm(struct device *dev, struct rtc_wkalrm *t)
+ {
+ struct cmos_rtc *cmos = dev_get_drvdata(dev);
+- unsigned char mon, mday, hrs, min, sec, rtc_control;
++ struct cmos_set_alarm_callback_param p = {
++ .cmos = cmos,
++ .t = t
++ };
++ unsigned char rtc_control;
+ int ret;
+
+ /* This not only a rtc_op, but also called directly */
+@@ -458,11 +505,11 @@ static int cmos_set_alarm(struct device *dev, struct rtc_wkalrm *t)
+ if (ret < 0)
+ return ret;
+
+- mon = t->time.tm_mon + 1;
+- mday = t->time.tm_mday;
+- hrs = t->time.tm_hour;
+- min = t->time.tm_min;
+- sec = t->time.tm_sec;
++ p.mon = t->time.tm_mon + 1;
++ p.mday = t->time.tm_mday;
++ p.hrs = t->time.tm_hour;
++ p.min = t->time.tm_min;
++ p.sec = t->time.tm_sec;
+
+ spin_lock_irq(&rtc_lock);
+ rtc_control = CMOS_READ(RTC_CONTROL);
+@@ -470,43 +517,21 @@ static int cmos_set_alarm(struct device *dev, struct rtc_wkalrm *t)
+
+ if (!(rtc_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
+ /* Writing 0xff means "don't care" or "match all". */
+- mon = (mon <= 12) ? bin2bcd(mon) : 0xff;
+- mday = (mday >= 1 && mday <= 31) ? bin2bcd(mday) : 0xff;
+- hrs = (hrs < 24) ? bin2bcd(hrs) : 0xff;
+- min = (min < 60) ? bin2bcd(min) : 0xff;
+- sec = (sec < 60) ? bin2bcd(sec) : 0xff;
++ p.mon = (p.mon <= 12) ? bin2bcd(p.mon) : 0xff;
++ p.mday = (p.mday >= 1 && p.mday <= 31) ? bin2bcd(p.mday) : 0xff;
++ p.hrs = (p.hrs < 24) ? bin2bcd(p.hrs) : 0xff;
++ p.min = (p.min < 60) ? bin2bcd(p.min) : 0xff;
++ p.sec = (p.sec < 60) ? bin2bcd(p.sec) : 0xff;
+ }
+
+- spin_lock_irq(&rtc_lock);
+-
+- /* next rtc irq must not be from previous alarm setting */
+- cmos_irq_disable(cmos, RTC_AIE);
+-
+- /* update alarm */
+- CMOS_WRITE(hrs, RTC_HOURS_ALARM);
+- CMOS_WRITE(min, RTC_MINUTES_ALARM);
+- CMOS_WRITE(sec, RTC_SECONDS_ALARM);
+-
+- /* the system may support an "enhanced" alarm */
+- if (cmos->day_alrm) {
+- CMOS_WRITE(mday, cmos->day_alrm);
+- if (cmos->mon_alrm)
+- CMOS_WRITE(mon, cmos->mon_alrm);
+- }
+-
+- if (use_hpet_alarm()) {
+- /*
+- * FIXME the HPET alarm glue currently ignores day_alrm
+- * and mon_alrm ...
+- */
+- hpet_set_alarm_time(t->time.tm_hour, t->time.tm_min,
+- t->time.tm_sec);
+- }
+-
+- if (t->enabled)
+- cmos_irq_enable(cmos, RTC_AIE);
+-
+- spin_unlock_irq(&rtc_lock);
++ /*
++ * Some Intel chipsets disconnect the alarm registers when the clock
++ * update is in progress - during this time writes fail silently.
++ *
++ * Use mc146818_avoid_UIP() to avoid this.
++ */
++ if (!mc146818_avoid_UIP(cmos_set_alarm_callback, &p))
++ return -EIO;
+
+ cmos->alarm_expires = rtc_tm_to_time64(&t->time);
+
+--
+2.35.1
+
--- /dev/null
+From 74d206aa5712c743dac13679d2cf585f3d88199d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 16 Jul 2021 23:04:37 +0200
+Subject: rtc: cmos: remove stale REVISIT comments
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Mateusz Jończyk <mat.jonczyk@o2.pl>
+
+[ Upstream commit e1aba37569f0aa9c993f740828871e48eea79f98 ]
+
+It appears mc146818_get_time() and mc146818_set_time() now correctly
+use the century register as specified in the ACPI FADT table. It is not
+clear what else could be done here.
+
+These comments were introduced by
+ commit 7be2c7c96aff ("[PATCH] RTC framework driver for CMOS RTCs")
+in 2007, which originally referenced function get_rtc_time() in
+include/asm-generic/rtc.h .
+
+Signed-off-by: Mateusz Jończyk <mat.jonczyk@o2.pl>
+Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
+Link: https://lore.kernel.org/r/20210716210437.29622-1-mat.jonczyk@o2.pl
+Stable-dep-of: cd17420ebea5 ("rtc: cmos: avoid UIP when writing alarm time")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/rtc/rtc-cmos.c | 8 +-------
+ 1 file changed, 1 insertion(+), 7 deletions(-)
+
+diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c
+index 8e8ce40f6440..ed4f512eabf0 100644
+--- a/drivers/rtc/rtc-cmos.c
++++ b/drivers/rtc/rtc-cmos.c
+@@ -229,19 +229,13 @@ static int cmos_read_time(struct device *dev, struct rtc_time *t)
+ if (!pm_trace_rtc_valid())
+ return -EIO;
+
+- /* REVISIT: if the clock has a "century" register, use
+- * that instead of the heuristic in mc146818_get_time().
+- * That'll make Y3K compatility (year > 2070) easy!
+- */
+ mc146818_get_time(t);
+ return 0;
+ }
+
+ static int cmos_set_time(struct device *dev, struct rtc_time *t)
+ {
+- /* REVISIT: set the "century" register if available
+- *
+- * NOTE: this ignores the issue whereby updating the seconds
++ /* NOTE: this ignores the issue whereby updating the seconds
+ * takes effect exactly 500ms after we write the register.
+ * (Also queueing and other delays before we get this far.)
+ */
+--
+2.35.1
+
--- /dev/null
+From ad5b5459965ce741f5ad888fa23e74271b21b8c9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 3 Feb 2021 20:39:36 +0800
+Subject: rtc: cmos: Replace spin_lock_irqsave with spin_lock in hard IRQ
+
+From: Xiaofei Tan <tanxiaofei@huawei.com>
+
+[ Upstream commit 6950d046eb6eabbc271fda416460c05f7a85698a ]
+
+It is redundant to do irqsave and irqrestore in hardIRQ context, where
+it has been in a irq-disabled context.
+
+Signed-off-by: Xiaofei Tan <tanxiaofei@huawei.com>
+Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
+Link: https://lore.kernel.org/r/1612355981-6764-2-git-send-email-tanxiaofei@huawei.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/rtc/rtc-cmos.c | 5 ++---
+ 1 file changed, 2 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c
+index d419eb988b22..21f2bdd025b6 100644
+--- a/drivers/rtc/rtc-cmos.c
++++ b/drivers/rtc/rtc-cmos.c
+@@ -704,11 +704,10 @@ static struct cmos_rtc cmos_rtc;
+
+ static irqreturn_t cmos_interrupt(int irq, void *p)
+ {
+- unsigned long flags;
+ u8 irqstat;
+ u8 rtc_control;
+
+- spin_lock_irqsave(&rtc_lock, flags);
++ spin_lock(&rtc_lock);
+
+ /* When the HPET interrupt handler calls us, the interrupt
+ * status is passed as arg1 instead of the irq number. But
+@@ -742,7 +741,7 @@ static irqreturn_t cmos_interrupt(int irq, void *p)
+ hpet_mask_rtc_irq_bit(RTC_AIE);
+ CMOS_READ(RTC_INTR_FLAGS);
+ }
+- spin_unlock_irqrestore(&rtc_lock, flags);
++ spin_unlock(&rtc_lock);
+
+ if (is_intr(irqstat)) {
+ rtc_update_irq(p, 1, irqstat);
+--
+2.35.1
+
--- /dev/null
+From 9ef93cd13386ac610b618b84cd2cc715272b215e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 26 Jan 2021 18:02:11 +0100
+Subject: rtc: mc146818: Detect and handle broken RTCs
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+[ Upstream commit 211e5db19d15a721b2953ea54b8f26c2963720eb ]
+
+The recent fix for handling the UIP bit unearthed another issue in the RTC
+code. If the RTC is advertised but the readout is straight 0xFF because
+it's not available, the old code just proceeded with crappy values, but the
+new code hangs because it waits for the UIP bit to become low.
+
+Add a sanity check in the RTC CMOS probe function which reads the RTC_VALID
+register (Register D) which should have bit 0-6 cleared. If that's not the
+case then fail to register the CMOS.
+
+Add the same check to mc146818_get_time(), warn once when the condition
+is true and invalidate the rtc_time data.
+
+Reported-by: Mickaël Salaün <mic@digikod.net>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Tested-by: Mickaël Salaün <mic@linux.microsoft.com>
+Acked-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
+Link: https://lore.kernel.org/r/87tur3fx7w.fsf@nanos.tec.linutronix.de
+Stable-dep-of: cd17420ebea5 ("rtc: cmos: avoid UIP when writing alarm time")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/rtc/rtc-cmos.c | 8 ++++++++
+ drivers/rtc/rtc-mc146818-lib.c | 7 +++++++
+ 2 files changed, 15 insertions(+)
+
+diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c
+index 58c6382a2807..cce4b62ffdd0 100644
+--- a/drivers/rtc/rtc-cmos.c
++++ b/drivers/rtc/rtc-cmos.c
+@@ -808,6 +808,14 @@ cmos_do_probe(struct device *dev, struct resource *ports, int rtc_irq)
+
+ spin_lock_irq(&rtc_lock);
+
++ /* Ensure that the RTC is accessible. Bit 0-6 must be 0! */
++ if ((CMOS_READ(RTC_VALID) & 0x7f) != 0) {
++ spin_unlock_irq(&rtc_lock);
++ dev_warn(dev, "not accessible\n");
++ retval = -ENXIO;
++ goto cleanup1;
++ }
++
+ if (!(flags & CMOS_RTC_FLAGS_NOFREQ)) {
+ /* force periodic irq to CMOS reset default of 1024Hz;
+ *
+diff --git a/drivers/rtc/rtc-mc146818-lib.c b/drivers/rtc/rtc-mc146818-lib.c
+index 8364e4141670..7f01dc41271d 100644
+--- a/drivers/rtc/rtc-mc146818-lib.c
++++ b/drivers/rtc/rtc-mc146818-lib.c
+@@ -21,6 +21,13 @@ unsigned int mc146818_get_time(struct rtc_time *time)
+
+ again:
+ spin_lock_irqsave(&rtc_lock, flags);
++ /* Ensure that the RTC is accessible. Bit 0-6 must be 0! */
++ if (WARN_ON_ONCE((CMOS_READ(RTC_VALID) & 0x7f) != 0)) {
++ spin_unlock_irqrestore(&rtc_lock, flags);
++ memset(time, 0xff, sizeof(*time));
++ return 0;
++ }
++
+ /*
+ * Check whether there is an update in progress during which the
+ * readout is unspecified. The maximum update time is ~2ms. Poll
+--
+2.35.1
+
--- /dev/null
+From 4ef1e2e1376fa1358435ab952fe3ad29ae1082a6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 1 Feb 2021 20:24:17 +0100
+Subject: rtc: mc146818: Dont test for bit 0-5 in Register D
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+[ Upstream commit ebb22a05943666155e6da04407cc6e913974c78c ]
+
+The recent change to validate the RTC turned out to be overly tight.
+
+While it cures the problem on the reporters machine it breaks machines
+with Intel chipsets which use bit 0-5 of the D register. So check only
+for bit 6 being 0 which is the case on these Intel machines as well.
+
+Fixes: 211e5db19d15 ("rtc: mc146818: Detect and handle broken RTCs")
+Reported-by: Serge Belyshev <belyshev@depni.sinp.msu.ru>
+Reported-by: Dirk Gouders <dirk@gouders.net>
+Reported-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Tested-by: Dirk Gouders <dirk@gouders.net>
+Tested-by: Len Brown <len.brown@intel.com>
+Tested-by: Borislav Petkov <bp@suse.de>
+Acked-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
+Link: https://lore.kernel.org/r/87zh0nbnha.fsf@nanos.tec.linutronix.de
+Stable-dep-of: cd17420ebea5 ("rtc: cmos: avoid UIP when writing alarm time")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/rtc/rtc-cmos.c | 4 ++--
+ drivers/rtc/rtc-mc146818-lib.c | 4 ++--
+ 2 files changed, 4 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c
+index cce4b62ffdd0..8e8ce40f6440 100644
+--- a/drivers/rtc/rtc-cmos.c
++++ b/drivers/rtc/rtc-cmos.c
+@@ -808,8 +808,8 @@ cmos_do_probe(struct device *dev, struct resource *ports, int rtc_irq)
+
+ spin_lock_irq(&rtc_lock);
+
+- /* Ensure that the RTC is accessible. Bit 0-6 must be 0! */
+- if ((CMOS_READ(RTC_VALID) & 0x7f) != 0) {
++ /* Ensure that the RTC is accessible. Bit 6 must be 0! */
++ if ((CMOS_READ(RTC_VALID) & 0x40) != 0) {
+ spin_unlock_irq(&rtc_lock);
+ dev_warn(dev, "not accessible\n");
+ retval = -ENXIO;
+diff --git a/drivers/rtc/rtc-mc146818-lib.c b/drivers/rtc/rtc-mc146818-lib.c
+index 7f01dc41271d..6ed2cd5d2bba 100644
+--- a/drivers/rtc/rtc-mc146818-lib.c
++++ b/drivers/rtc/rtc-mc146818-lib.c
+@@ -21,8 +21,8 @@ unsigned int mc146818_get_time(struct rtc_time *time)
+
+ again:
+ spin_lock_irqsave(&rtc_lock, flags);
+- /* Ensure that the RTC is accessible. Bit 0-6 must be 0! */
+- if (WARN_ON_ONCE((CMOS_READ(RTC_VALID) & 0x7f) != 0)) {
++ /* Ensure that the RTC is accessible. Bit 6 must be 0! */
++ if (WARN_ON_ONCE((CMOS_READ(RTC_VALID) & 0x40) != 0)) {
+ spin_unlock_irqrestore(&rtc_lock, flags);
+ memset(time, 0xff, sizeof(*time));
+ return 0;
+--
+2.35.1
+
--- /dev/null
+From dcb86a8ba704e221a6401c14de2f0d35b725235f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 10 Dec 2021 21:01:24 +0100
+Subject: rtc: mc146818-lib: change return values of mc146818_get_time()
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Mateusz Jończyk <mat.jonczyk@o2.pl>
+
+[ Upstream commit d35786b3a28dee20b12962ae2dd365892a99ed1a ]
+
+No function is checking mc146818_get_time() return values yet, so
+correct them to make them more customary.
+
+Signed-off-by: Mateusz Jończyk <mat.jonczyk@o2.pl>
+Cc: Alessandro Zummo <a.zummo@towertech.it>
+Cc: Alexandre Belloni <alexandre.belloni@bootlin.com>
+Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
+Link: https://lore.kernel.org/r/20211210200131.153887-3-mat.jonczyk@o2.pl
+Stable-dep-of: cd17420ebea5 ("rtc: cmos: avoid UIP when writing alarm time")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/rtc/rtc-mc146818-lib.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/rtc/rtc-mc146818-lib.c b/drivers/rtc/rtc-mc146818-lib.c
+index 6ed2cd5d2bba..6262f0680f13 100644
+--- a/drivers/rtc/rtc-mc146818-lib.c
++++ b/drivers/rtc/rtc-mc146818-lib.c
+@@ -25,7 +25,7 @@ unsigned int mc146818_get_time(struct rtc_time *time)
+ if (WARN_ON_ONCE((CMOS_READ(RTC_VALID) & 0x40) != 0)) {
+ spin_unlock_irqrestore(&rtc_lock, flags);
+ memset(time, 0xff, sizeof(*time));
+- return 0;
++ return -EIO;
+ }
+
+ /*
+@@ -116,7 +116,7 @@ unsigned int mc146818_get_time(struct rtc_time *time)
+
+ time->tm_mon--;
+
+- return RTC_24H;
++ return 0;
+ }
+ EXPORT_SYMBOL_GPL(mc146818_get_time);
+
+--
+2.35.1
+
--- /dev/null
+From 0f4373bd6d8f06e9d571ae76ea8f6be4d684344b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 10 Dec 2021 21:01:27 +0100
+Subject: rtc: mc146818-lib: extract mc146818_avoid_UIP
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Mateusz Jończyk <mat.jonczyk@o2.pl>
+
+[ Upstream commit ec5895c0f2d87b9bf4185db1915e40fa6fcfc0ac ]
+
+Function mc146818_get_time() contains an elaborate mechanism of reading
+the RTC time while no RTC update is in progress. It turns out that
+reading the RTC alarm clock also requires avoiding the RTC update.
+Therefore, the mechanism in mc146818_get_time() should be reused - so
+extract it into a separate function.
+
+The logic in mc146818_avoid_UIP() is same as in mc146818_get_time()
+except that after every
+
+ if (CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP) {
+
+there is now "mdelay(1)".
+
+To avoid producing a very unreadable patch, mc146818_get_time() will be
+refactored to use mc146818_avoid_UIP() in the next patch.
+
+Signed-off-by: Mateusz Jończyk <mat.jonczyk@o2.pl>
+Cc: Alessandro Zummo <a.zummo@towertech.it>
+Cc: Alexandre Belloni <alexandre.belloni@bootlin.com>
+Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
+Link: https://lore.kernel.org/r/20211210200131.153887-6-mat.jonczyk@o2.pl
+Stable-dep-of: cd17420ebea5 ("rtc: cmos: avoid UIP when writing alarm time")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/rtc/rtc-mc146818-lib.c | 70 ++++++++++++++++++++++++++++++++++
+ include/linux/mc146818rtc.h | 3 ++
+ 2 files changed, 73 insertions(+)
+
+diff --git a/drivers/rtc/rtc-mc146818-lib.c b/drivers/rtc/rtc-mc146818-lib.c
+index 94df6056c5c0..46527a5d3912 100644
+--- a/drivers/rtc/rtc-mc146818-lib.c
++++ b/drivers/rtc/rtc-mc146818-lib.c
+@@ -8,6 +8,76 @@
+ #include <linux/acpi.h>
+ #endif
+
++/*
++ * Execute a function while the UIP (Update-in-progress) bit of the RTC is
++ * unset.
++ *
++ * Warning: callback may be executed more then once.
++ */
++bool mc146818_avoid_UIP(void (*callback)(unsigned char seconds, void *param),
++ void *param)
++{
++ int i;
++ unsigned long flags;
++ unsigned char seconds;
++
++ for (i = 0; i < 10; i++) {
++ spin_lock_irqsave(&rtc_lock, flags);
++
++ /*
++ * Check whether there is an update in progress during which the
++ * readout is unspecified. The maximum update time is ~2ms. Poll
++ * every msec for completion.
++ *
++ * Store the second value before checking UIP so a long lasting
++ * NMI which happens to hit after the UIP check cannot make
++ * an update cycle invisible.
++ */
++ seconds = CMOS_READ(RTC_SECONDS);
++
++ if (CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP) {
++ spin_unlock_irqrestore(&rtc_lock, flags);
++ mdelay(1);
++ continue;
++ }
++
++ /* Revalidate the above readout */
++ if (seconds != CMOS_READ(RTC_SECONDS)) {
++ spin_unlock_irqrestore(&rtc_lock, flags);
++ continue;
++ }
++
++ if (callback)
++ callback(seconds, param);
++
++ /*
++ * Check for the UIP bit again. If it is set now then
++ * the above values may contain garbage.
++ */
++ if (CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP) {
++ spin_unlock_irqrestore(&rtc_lock, flags);
++ mdelay(1);
++ continue;
++ }
++
++ /*
++ * A NMI might have interrupted the above sequence so check
++ * whether the seconds value has changed which indicates that
++ * the NMI took longer than the UIP bit was set. Unlikely, but
++ * possible and there is also virt...
++ */
++ if (seconds != CMOS_READ(RTC_SECONDS)) {
++ spin_unlock_irqrestore(&rtc_lock, flags);
++ continue;
++ }
++ spin_unlock_irqrestore(&rtc_lock, flags);
++
++ return true;
++ }
++ return false;
++}
++EXPORT_SYMBOL_GPL(mc146818_avoid_UIP);
++
+ /*
+ * If the UIP (Update-in-progress) bit of the RTC is set for more then
+ * 10ms, the RTC is apparently broken or not present.
+diff --git a/include/linux/mc146818rtc.h b/include/linux/mc146818rtc.h
+index c246ce191915..fb042e0e7d76 100644
+--- a/include/linux/mc146818rtc.h
++++ b/include/linux/mc146818rtc.h
+@@ -129,4 +129,7 @@ bool mc146818_does_rtc_work(void);
+ unsigned int mc146818_get_time(struct rtc_time *time);
+ int mc146818_set_time(struct rtc_time *time);
+
++bool mc146818_avoid_UIP(void (*callback)(unsigned char seconds, void *param),
++ void *param);
++
+ #endif /* _MC146818RTC_H */
+--
+2.35.1
+
--- /dev/null
+From 453e0ae9b265b8eb55cb8d8d60e86f4757a919b8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 10 Dec 2021 21:01:26 +0100
+Subject: rtc: mc146818-lib: fix RTC presence check
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Mateusz Jończyk <mat.jonczyk@o2.pl>
+
+[ Upstream commit ea6fa4961aab8f90a8aa03575a98b4bda368d4b6 ]
+
+To prevent an infinite loop in mc146818_get_time(),
+commit 211e5db19d15 ("rtc: mc146818: Detect and handle broken RTCs")
+added a check for RTC availability. Together with a later fix, it
+checked if bit 6 in register 0x0d is cleared.
+
+This, however, caused a false negative on a motherboard with an AMD
+SB710 southbridge; according to the specification [1], bit 6 of register
+0x0d of this chipset is a scratchbit. This caused a regression in Linux
+5.11 - the RTC was determined broken by the kernel and not used by
+rtc-cmos.c [3]. This problem was also reported in Fedora [4].
+
+As a better alternative, check whether the UIP ("Update-in-progress")
+bit is set for longer then 10ms. If that is the case, then apparently
+the RTC is either absent (and all register reads return 0xff) or broken.
+Also limit the number of loop iterations in mc146818_get_time() to 10 to
+prevent an infinite loop there.
+
+The functions mc146818_get_time() and mc146818_does_rtc_work() will be
+refactored later in this patch series, in order to fix a separate
+problem with reading / setting the RTC alarm time. This is done so to
+avoid a confusion about what is being fixed when.
+
+In a previous approach to this problem, I implemented a check whether
+the RTC_HOURS register contains a value <= 24. This, however, sometimes
+did not work correctly on my Intel Kaby Lake laptop. According to
+Intel's documentation [2], "the time and date RAM locations (0-9) are
+disconnected from the external bus" during the update cycle so reading
+this register without checking the UIP bit is incorrect.
+
+[1] AMD SB700/710/750 Register Reference Guide, page 308,
+https://developer.amd.com/wordpress/media/2012/10/43009_sb7xx_rrg_pub_1.00.pdf
+
+[2] 7th Generation Intel ® Processor Family I/O for U/Y Platforms [...] Datasheet
+Volume 1 of 2, page 209
+Intel's Document Number: 334658-006,
+https://www.intel.com/content/dam/www/public/us/en/documents/datasheets/7th-and-8th-gen-core-family-mobile-u-y-processor-lines-i-o-datasheet-vol-1.pdf
+
+[3] Functions in arch/x86/kernel/rtc.c apparently were using it.
+
+[4] https://bugzilla.redhat.com/show_bug.cgi?id=1936688
+
+Fixes: 211e5db19d15 ("rtc: mc146818: Detect and handle broken RTCs")
+Fixes: ebb22a059436 ("rtc: mc146818: Dont test for bit 0-5 in Register D")
+Signed-off-by: Mateusz Jończyk <mat.jonczyk@o2.pl>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: Alessandro Zummo <a.zummo@towertech.it>
+Cc: Alexandre Belloni <alexandre.belloni@bootlin.com>
+Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
+Link: https://lore.kernel.org/r/20211210200131.153887-5-mat.jonczyk@o2.pl
+Stable-dep-of: cd17420ebea5 ("rtc: cmos: avoid UIP when writing alarm time")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/rtc/rtc-cmos.c | 10 ++++------
+ drivers/rtc/rtc-mc146818-lib.c | 34 ++++++++++++++++++++++++++++++----
+ include/linux/mc146818rtc.h | 1 +
+ 3 files changed, 35 insertions(+), 10 deletions(-)
+
+diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c
+index f8358bb2ae31..93ffb9eaf63a 100644
+--- a/drivers/rtc/rtc-cmos.c
++++ b/drivers/rtc/rtc-cmos.c
+@@ -807,16 +807,14 @@ cmos_do_probe(struct device *dev, struct resource *ports, int rtc_irq)
+
+ rename_region(ports, dev_name(&cmos_rtc.rtc->dev));
+
+- spin_lock_irq(&rtc_lock);
+-
+- /* Ensure that the RTC is accessible. Bit 6 must be 0! */
+- if ((CMOS_READ(RTC_VALID) & 0x40) != 0) {
+- spin_unlock_irq(&rtc_lock);
+- dev_warn(dev, "not accessible\n");
++ if (!mc146818_does_rtc_work()) {
++ dev_warn(dev, "broken or not accessible\n");
+ retval = -ENXIO;
+ goto cleanup1;
+ }
+
++ spin_lock_irq(&rtc_lock);
++
+ if (!(flags & CMOS_RTC_FLAGS_NOFREQ)) {
+ /* force periodic irq to CMOS reset default of 1024Hz;
+ *
+diff --git a/drivers/rtc/rtc-mc146818-lib.c b/drivers/rtc/rtc-mc146818-lib.c
+index 3ae5c690f22b..94df6056c5c0 100644
+--- a/drivers/rtc/rtc-mc146818-lib.c
++++ b/drivers/rtc/rtc-mc146818-lib.c
+@@ -8,10 +8,36 @@
+ #include <linux/acpi.h>
+ #endif
+
++/*
++ * If the UIP (Update-in-progress) bit of the RTC is set for more then
++ * 10ms, the RTC is apparently broken or not present.
++ */
++bool mc146818_does_rtc_work(void)
++{
++ int i;
++ unsigned char val;
++ unsigned long flags;
++
++ for (i = 0; i < 10; i++) {
++ spin_lock_irqsave(&rtc_lock, flags);
++ val = CMOS_READ(RTC_FREQ_SELECT);
++ spin_unlock_irqrestore(&rtc_lock, flags);
++
++ if ((val & RTC_UIP) == 0)
++ return true;
++
++ mdelay(1);
++ }
++
++ return false;
++}
++EXPORT_SYMBOL_GPL(mc146818_does_rtc_work);
++
+ unsigned int mc146818_get_time(struct rtc_time *time)
+ {
+ unsigned char ctrl;
+ unsigned long flags;
++ unsigned int iter_count = 0;
+ unsigned char century = 0;
+ bool retry;
+
+@@ -20,13 +46,13 @@ unsigned int mc146818_get_time(struct rtc_time *time)
+ #endif
+
+ again:
+- spin_lock_irqsave(&rtc_lock, flags);
+- /* Ensure that the RTC is accessible. Bit 6 must be 0! */
+- if (WARN_ON_ONCE((CMOS_READ(RTC_VALID) & 0x40) != 0)) {
+- spin_unlock_irqrestore(&rtc_lock, flags);
++ if (iter_count > 10) {
+ memset(time, 0, sizeof(*time));
+ return -EIO;
+ }
++ iter_count++;
++
++ spin_lock_irqsave(&rtc_lock, flags);
+
+ /*
+ * Check whether there is an update in progress during which the
+diff --git a/include/linux/mc146818rtc.h b/include/linux/mc146818rtc.h
+index 1e0205811394..c246ce191915 100644
+--- a/include/linux/mc146818rtc.h
++++ b/include/linux/mc146818rtc.h
+@@ -125,6 +125,7 @@ struct cmos_rtc_board_info {
+ #define RTC_IO_EXTENT_USED RTC_IO_EXTENT
+ #endif /* ARCH_RTC_LOCATION */
+
++bool mc146818_does_rtc_work(void);
+ unsigned int mc146818_get_time(struct rtc_time *time);
+ int mc146818_set_time(struct rtc_time *time);
+
+--
+2.35.1
+
--- /dev/null
+From fff644f7c81182036f455c49f0d146c802e4ee08 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 6 Dec 2020 22:46:14 +0100
+Subject: rtc: mc146818: Prevent reading garbage
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+[ Upstream commit 05a0302c35481e9b47fb90ba40922b0a4cae40d8 ]
+
+The MC146818 driver is prone to read garbage from the RTC. There are
+several issues all related to the update cycle of the MC146818. The chip
+increments seconds obviously once per second and indicates that by a bit in
+a register. The bit goes high 244us before the actual update starts. During
+the update the readout of the time values is undefined.
+
+The code just checks whether the update in progress bit (UIP) is set before
+reading the clock. If it's set it waits arbitrary 20ms before retrying,
+which is ample because the maximum update time is ~2ms.
+
+But this check does not guarantee that the UIP bit goes high and the actual
+update happens during the readout. So the following can happen
+
+ 0.997 UIP = False
+ -> Interrupt/NMI/preemption
+ 0.998 UIP -> True
+ 0.999 Readout <- Undefined
+
+To prevent this rework the code so it checks UIP before and after the
+readout and if set after the readout try again.
+
+But that's not enough to cover the following:
+
+ 0.997 UIP = False
+ Readout seconds
+ -> NMI (or vCPU scheduled out)
+ 0.998 UIP -> True
+ update completes
+ UIP -> False
+ 1.000 Readout minutes,....
+ UIP check succeeds
+
+That can make the readout wrong up to 59 seconds.
+
+To prevent this, read the seconds value before the first UIP check,
+validate it after checking UIP and after reading out the rest.
+
+It's amazing that the original i386 code had this actually correct and
+the generic implementation of the MC146818 driver got it wrong in 2002 and
+it stayed that way until today.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Acked-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
+Link: https://lore.kernel.org/r/20201206220541.594826678@linutronix.de
+Stable-dep-of: cd17420ebea5 ("rtc: cmos: avoid UIP when writing alarm time")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/rtc/rtc-mc146818-lib.c | 64 +++++++++++++++++++++-------------
+ 1 file changed, 39 insertions(+), 25 deletions(-)
+
+diff --git a/drivers/rtc/rtc-mc146818-lib.c b/drivers/rtc/rtc-mc146818-lib.c
+index b036ff33fbe6..8364e4141670 100644
+--- a/drivers/rtc/rtc-mc146818-lib.c
++++ b/drivers/rtc/rtc-mc146818-lib.c
+@@ -8,41 +8,41 @@
+ #include <linux/acpi.h>
+ #endif
+
+-/*
+- * Returns true if a clock update is in progress
+- */
+-static inline unsigned char mc146818_is_updating(void)
+-{
+- unsigned char uip;
+- unsigned long flags;
+-
+- spin_lock_irqsave(&rtc_lock, flags);
+- uip = (CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP);
+- spin_unlock_irqrestore(&rtc_lock, flags);
+- return uip;
+-}
+-
+ unsigned int mc146818_get_time(struct rtc_time *time)
+ {
+ unsigned char ctrl;
+ unsigned long flags;
+ unsigned char century = 0;
++ bool retry;
+
+ #ifdef CONFIG_MACH_DECSTATION
+ unsigned int real_year;
+ #endif
+
++again:
++ spin_lock_irqsave(&rtc_lock, flags);
+ /*
+- * read RTC once any update in progress is done. The update
+- * can take just over 2ms. We wait 20ms. There is no need to
+- * to poll-wait (up to 1s - eeccch) for the falling edge of RTC_UIP.
+- * If you need to know *exactly* when a second has started, enable
+- * periodic update complete interrupts, (via ioctl) and then
+- * immediately read /dev/rtc which will block until you get the IRQ.
+- * Once the read clears, read the RTC time (again via ioctl). Easy.
++ * Check whether there is an update in progress during which the
++ * readout is unspecified. The maximum update time is ~2ms. Poll
++ * every msec for completion.
++ *
++ * Store the second value before checking UIP so a long lasting NMI
++ * which happens to hit after the UIP check cannot make an update
++ * cycle invisible.
+ */
+- if (mc146818_is_updating())
+- mdelay(20);
++ time->tm_sec = CMOS_READ(RTC_SECONDS);
++
++ if (CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP) {
++ spin_unlock_irqrestore(&rtc_lock, flags);
++ mdelay(1);
++ goto again;
++ }
++
++ /* Revalidate the above readout */
++ if (time->tm_sec != CMOS_READ(RTC_SECONDS)) {
++ spin_unlock_irqrestore(&rtc_lock, flags);
++ goto again;
++ }
+
+ /*
+ * Only the values that we read from the RTC are set. We leave
+@@ -50,8 +50,6 @@ unsigned int mc146818_get_time(struct rtc_time *time)
+ * RTC has RTC_DAY_OF_WEEK, we ignore it, as it is only updated
+ * by the RTC when initially set to a non-zero value.
+ */
+- spin_lock_irqsave(&rtc_lock, flags);
+- time->tm_sec = CMOS_READ(RTC_SECONDS);
+ time->tm_min = CMOS_READ(RTC_MINUTES);
+ time->tm_hour = CMOS_READ(RTC_HOURS);
+ time->tm_mday = CMOS_READ(RTC_DAY_OF_MONTH);
+@@ -66,8 +64,24 @@ unsigned int mc146818_get_time(struct rtc_time *time)
+ century = CMOS_READ(acpi_gbl_FADT.century);
+ #endif
+ ctrl = CMOS_READ(RTC_CONTROL);
++ /*
++ * Check for the UIP bit again. If it is set now then
++ * the above values may contain garbage.
++ */
++ retry = CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP;
++ /*
++ * A NMI might have interrupted the above sequence so check whether
++ * the seconds value has changed which indicates that the NMI took
++ * longer than the UIP bit was set. Unlikely, but possible and
++ * there is also virt...
++ */
++ retry |= time->tm_sec != CMOS_READ(RTC_SECONDS);
++
+ spin_unlock_irqrestore(&rtc_lock, flags);
+
++ if (retry)
++ goto again;
++
+ if (!(ctrl & RTC_DM_BINARY) || RTC_ALWAYS_BCD)
+ {
+ time->tm_sec = bcd2bin(time->tm_sec);
+--
+2.35.1
+
--- /dev/null
+From 454cde155ee62939e82a056e9fbb2bb5ab8190e8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 6 Dec 2020 22:46:15 +0100
+Subject: rtc: mc146818: Reduce spinlock section in mc146818_set_time()
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+[ Upstream commit dcf257e92622ba0e25fdc4b6699683e7ae67e2a1 ]
+
+No need to hold the lock and disable interrupts for doing math.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Acked-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
+Link: https://lore.kernel.org/r/20201206220541.709243630@linutronix.de
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/rtc/rtc-mc146818-lib.c | 6 ++----
+ 1 file changed, 2 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/rtc/rtc-mc146818-lib.c b/drivers/rtc/rtc-mc146818-lib.c
+index 46527a5d3912..1ca866461d10 100644
+--- a/drivers/rtc/rtc-mc146818-lib.c
++++ b/drivers/rtc/rtc-mc146818-lib.c
+@@ -249,7 +249,6 @@ int mc146818_set_time(struct rtc_time *time)
+ if (yrs > 255) /* They are unsigned */
+ return -EINVAL;
+
+- spin_lock_irqsave(&rtc_lock, flags);
+ #ifdef CONFIG_MACH_DECSTATION
+ real_yrs = yrs;
+ leap_yr = ((!((yrs + 1900) % 4) && ((yrs + 1900) % 100)) ||
+@@ -278,10 +277,8 @@ int mc146818_set_time(struct rtc_time *time)
+ /* These limits and adjustments are independent of
+ * whether the chip is in binary mode or not.
+ */
+- if (yrs > 169) {
+- spin_unlock_irqrestore(&rtc_lock, flags);
++ if (yrs > 169)
+ return -EINVAL;
+- }
+
+ if (yrs >= 100)
+ yrs -= 100;
+@@ -297,6 +294,7 @@ int mc146818_set_time(struct rtc_time *time)
+ century = bin2bcd(century);
+ }
+
++ spin_lock_irqsave(&rtc_lock, flags);
+ save_control = CMOS_READ(RTC_CONTROL);
+ CMOS_WRITE((save_control|RTC_SET), RTC_CONTROL);
+ save_freq_select = CMOS_READ(RTC_FREQ_SELECT);
+--
+2.35.1
+
--- /dev/null
+mm-mlock-remove-lru_lock-on-testclearpagemlocked.patch
+mm-mlock-remove-__munlock_isolate_lru_page.patch
+mm-lru-introduce-testclearpagelru.patch
+mm-compaction-do-page-isolation-first-in-compaction.patch
+mm-vmscan-__isolate_lru_page_prepare-cleanup.patch
+mm-__isolate_lru_page_prepare-in-isolate_migratepage.patch
+mm-migrate-fix-thp-s-mapcount-on-isolation.patch
+arm64-dts-rockchip-keep-i2s1-disabled-for-gpio-funct.patch
+arm-dts-rockchip-fix-node-name-for-hym8563-rtc.patch
+arm-dts-rockchip-fix-ir-receiver-node-names.patch
+arm64-dts-rockchip-fix-ir-receiver-node-names.patch
+arm-dts-rockchip-rk3188-fix-lcdc1-rgb24-node-name.patch
+arm-9251-1-perf-fix-stacktraces-for-tracepoint-event.patch
+arm-9266-1-mm-fix-no-mmu-zero_page-implementation.patch
+asoc-wm8962-wait-for-updated-value-of-wm8962_clockin.patch
+arm-dts-rockchip-disable-arm_global_timer-on-rk3066-.patch
+9p-fd-use-p9_hdrsz-for-header-size.patch
+regulator-slg51000-wait-after-asserting-cs-pin.patch
+alsa-seq-fix-function-prototype-mismatch-in-snd_seq_.patch
+btrfs-send-avoid-unaligned-encoded-writes-when-attem.patch
+asoc-soc-pcm-add-null-check-in-be-reparenting.patch
+regulator-twl6030-fix-get-status-of-twl6032-regulato.patch
+fbcon-use-kzalloc-in-fbcon_prepare_logo.patch
+usb-dwc3-gadget-disable-gusb2phycfg.susphy-for-end-t.patch
+9p-xen-check-logical-size-for-buffer-size.patch
+net-usb-qmi_wwan-add-u-blox-0x1342-composition.patch
+mm-khugepaged-take-the-right-locks-for-page-table-re.patch
+mm-khugepaged-fix-gup-fast-interaction-by-sending-ip.patch
+mm-khugepaged-invoke-mmu-notifiers-in-shmem-file-col.patch
+rtc-mc146818-prevent-reading-garbage.patch
+rtc-mc146818-detect-and-handle-broken-rtcs.patch
+rtc-mc146818-dont-test-for-bit-0-5-in-register-d.patch
+rtc-cmos-remove-stale-revisit-comments.patch
+rtc-mc146818-lib-change-return-values-of-mc146818_ge.patch
+rtc-check-return-value-from-mc146818_get_time.patch
+rtc-mc146818-lib-fix-rtc-presence-check.patch
+rtc-mc146818-lib-extract-mc146818_avoid_uip.patch
+rtc-cmos-avoid-uip-when-writing-alarm-time.patch
+rtc-cmos-avoid-uip-when-reading-alarm-time.patch
+rtc-cmos-replace-spin_lock_irqsave-with-spin_lock-in.patch
+rtc-mc146818-reduce-spinlock-section-in-mc146818_set.patch
+xen-netback-ensure-protocol-headers-don-t-fall-in-th.patch
+xen-netback-do-some-code-cleanup.patch
+xen-netback-don-t-call-kfree_skb-with-interrupts-dis.patch
+media-videobuf2-core-take-mmap_lock-in-vb2_get_unmap.patch
--- /dev/null
+From 7b5d9450baa4cd437176ba3ecc980430f344197c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 9 Nov 2022 17:58:50 -0800
+Subject: usb: dwc3: gadget: Disable GUSB2PHYCFG.SUSPHY for End Transfer
+
+From: Thinh Nguyen <Thinh.Nguyen@synopsys.com>
+
+[ Upstream commit 3aa07f72894d209fcf922ad686cbb28cf005aaad ]
+
+If there's a disconnection while operating in eSS, there may be a delay
+in VBUS drop response from the connector. In that case, the internal
+link state may drop to operate in usb2 speed while the controller thinks
+the VBUS is still high. The driver must make sure to disable
+GUSB2PHYCFG.SUSPHY when sending endpoint command while in usb2 speed.
+The End Transfer command may be called, and only that command needs to
+go through at this point. Let's keep it simple and unconditionally
+disable GUSB2PHYCFG.SUSPHY whenever we issue the command.
+
+This scenario is not seen in real hardware. In a rare case, our
+prototype type-c controller/interface may have a slow response
+triggerring this issue.
+
+Signed-off-by: Thinh Nguyen <Thinh.Nguyen@synopsys.com>
+Link: https://lore.kernel.org/r/5651117207803c26e2f22ddf4e5ce9e865dcf7c7.1668045468.git.Thinh.Nguyen@synopsys.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/usb/dwc3/gadget.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
+index a9a43d649478..28a1194f849f 100644
+--- a/drivers/usb/dwc3/gadget.c
++++ b/drivers/usb/dwc3/gadget.c
+@@ -291,7 +291,8 @@ int dwc3_send_gadget_ep_cmd(struct dwc3_ep *dep, unsigned int cmd,
+ *
+ * DWC_usb3 3.30a and DWC_usb31 1.90a programming guide section 3.2.2
+ */
+- if (dwc->gadget->speed <= USB_SPEED_HIGH) {
++ if (dwc->gadget->speed <= USB_SPEED_HIGH ||
++ DWC3_DEPCMD_CMD(cmd) == DWC3_DEPCMD_ENDTRANSFER) {
+ reg = dwc3_readl(dwc->regs, DWC3_GUSB2PHYCFG(0));
+ if (unlikely(reg & DWC3_GUSB2PHYCFG_SUSPHY)) {
+ saved_config |= DWC3_GUSB2PHYCFG_SUSPHY;
+--
+2.35.1
+
--- /dev/null
+From 603ae7055010466a085118b324f735574c43aa56 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 8 Jun 2022 06:37:26 +0200
+Subject: xen/netback: do some code cleanup
+
+From: Juergen Gross <jgross@suse.com>
+
+[ Upstream commit 5834e72eda0b7e5767eb107259d98eef19ebd11f ]
+
+Remove some unused macros and functions, make local functions static.
+
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Acked-by: Wei Liu <wei.liu@kernel.org>
+Link: https://lore.kernel.org/r/20220608043726.9380-1-jgross@suse.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Stable-dep-of: 74e7e1efdad4 ("xen/netback: don't call kfree_skb() with interrupts disabled")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/xen-netback/common.h | 12 ------------
+ drivers/net/xen-netback/interface.c | 16 +---------------
+ drivers/net/xen-netback/netback.c | 4 +++-
+ drivers/net/xen-netback/rx.c | 2 +-
+ 4 files changed, 5 insertions(+), 29 deletions(-)
+
+diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h
+index 6a9178896c90..945647128c0e 100644
+--- a/drivers/net/xen-netback/common.h
++++ b/drivers/net/xen-netback/common.h
+@@ -48,7 +48,6 @@
+ #include <linux/debugfs.h>
+
+ typedef unsigned int pending_ring_idx_t;
+-#define INVALID_PENDING_RING_IDX (~0U)
+
+ struct pending_tx_info {
+ struct xen_netif_tx_request req; /* tx request */
+@@ -82,8 +81,6 @@ struct xenvif_rx_meta {
+ /* Discriminate from any valid pending_idx value. */
+ #define INVALID_PENDING_IDX 0xFFFF
+
+-#define MAX_BUFFER_OFFSET XEN_PAGE_SIZE
+-
+ #define MAX_PENDING_REQS XEN_NETIF_TX_RING_SIZE
+
+ /* The maximum number of frags is derived from the size of a grant (same
+@@ -367,11 +364,6 @@ void xenvif_free(struct xenvif *vif);
+ int xenvif_xenbus_init(void);
+ void xenvif_xenbus_fini(void);
+
+-int xenvif_schedulable(struct xenvif *vif);
+-
+-int xenvif_queue_stopped(struct xenvif_queue *queue);
+-void xenvif_wake_queue(struct xenvif_queue *queue);
+-
+ /* (Un)Map communication rings. */
+ void xenvif_unmap_frontend_data_rings(struct xenvif_queue *queue);
+ int xenvif_map_frontend_data_rings(struct xenvif_queue *queue,
+@@ -394,7 +386,6 @@ int xenvif_dealloc_kthread(void *data);
+ irqreturn_t xenvif_ctrl_irq_fn(int irq, void *data);
+
+ bool xenvif_have_rx_work(struct xenvif_queue *queue, bool test_kthread);
+-void xenvif_rx_action(struct xenvif_queue *queue);
+ void xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb);
+
+ void xenvif_carrier_on(struct xenvif *vif);
+@@ -402,9 +393,6 @@ void xenvif_carrier_on(struct xenvif *vif);
+ /* Callback from stack when TX packet can be released */
+ void xenvif_zerocopy_callback(struct ubuf_info *ubuf, bool zerocopy_success);
+
+-/* Unmap a pending page and release it back to the guest */
+-void xenvif_idx_unmap(struct xenvif_queue *queue, u16 pending_idx);
+-
+ static inline pending_ring_idx_t nr_pending_reqs(struct xenvif_queue *queue)
+ {
+ return MAX_PENDING_REQS -
+diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c
+index 7ce9807fc24c..645a804ab788 100644
+--- a/drivers/net/xen-netback/interface.c
++++ b/drivers/net/xen-netback/interface.c
+@@ -70,7 +70,7 @@ void xenvif_skb_zerocopy_complete(struct xenvif_queue *queue)
+ wake_up(&queue->dealloc_wq);
+ }
+
+-int xenvif_schedulable(struct xenvif *vif)
++static int xenvif_schedulable(struct xenvif *vif)
+ {
+ return netif_running(vif->dev) &&
+ test_bit(VIF_STATUS_CONNECTED, &vif->status) &&
+@@ -178,20 +178,6 @@ irqreturn_t xenvif_interrupt(int irq, void *dev_id)
+ return IRQ_HANDLED;
+ }
+
+-int xenvif_queue_stopped(struct xenvif_queue *queue)
+-{
+- struct net_device *dev = queue->vif->dev;
+- unsigned int id = queue->id;
+- return netif_tx_queue_stopped(netdev_get_tx_queue(dev, id));
+-}
+-
+-void xenvif_wake_queue(struct xenvif_queue *queue)
+-{
+- struct net_device *dev = queue->vif->dev;
+- unsigned int id = queue->id;
+- netif_tx_wake_queue(netdev_get_tx_queue(dev, id));
+-}
+-
+ static u16 xenvif_select_queue(struct net_device *dev, struct sk_buff *skb,
+ struct net_device *sb_dev)
+ {
+diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
+index 06fd61b71d37..fed0f7458e18 100644
+--- a/drivers/net/xen-netback/netback.c
++++ b/drivers/net/xen-netback/netback.c
+@@ -112,6 +112,8 @@ static void make_tx_response(struct xenvif_queue *queue,
+ s8 st);
+ static void push_tx_responses(struct xenvif_queue *queue);
+
++static void xenvif_idx_unmap(struct xenvif_queue *queue, u16 pending_idx);
++
+ static inline int tx_work_todo(struct xenvif_queue *queue);
+
+ static inline unsigned long idx_to_pfn(struct xenvif_queue *queue,
+@@ -1440,7 +1442,7 @@ static void push_tx_responses(struct xenvif_queue *queue)
+ notify_remote_via_irq(queue->tx_irq);
+ }
+
+-void xenvif_idx_unmap(struct xenvif_queue *queue, u16 pending_idx)
++static void xenvif_idx_unmap(struct xenvif_queue *queue, u16 pending_idx)
+ {
+ int ret;
+ struct gnttab_unmap_grant_ref tx_unmap_op;
+diff --git a/drivers/net/xen-netback/rx.c b/drivers/net/xen-netback/rx.c
+index a0335407be42..932762177110 100644
+--- a/drivers/net/xen-netback/rx.c
++++ b/drivers/net/xen-netback/rx.c
+@@ -486,7 +486,7 @@ static void xenvif_rx_skb(struct xenvif_queue *queue)
+
+ #define RX_BATCH_SIZE 64
+
+-void xenvif_rx_action(struct xenvif_queue *queue)
++static void xenvif_rx_action(struct xenvif_queue *queue)
+ {
+ struct sk_buff_head completed_skbs;
+ unsigned int work_done = 0;
+--
+2.35.1
+
--- /dev/null
+From 6156d032440fa7a7dcdd503d76769bc4f8389d4a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 6 Dec 2022 08:54:24 +0100
+Subject: xen/netback: don't call kfree_skb() with interrupts disabled
+
+From: Juergen Gross <jgross@suse.com>
+
+[ Upstream commit 74e7e1efdad45580cc3839f2a155174cf158f9b5 ]
+
+It is not allowed to call kfree_skb() from hardware interrupt
+context or with interrupts being disabled. So remove kfree_skb()
+from the spin_lock_irqsave() section and use the already existing
+"drop" label in xenvif_start_xmit() for dropping the SKB. At the
+same time replace the dev_kfree_skb() call there with a call of
+dev_kfree_skb_any(), as xenvif_start_xmit() can be called with
+disabled interrupts.
+
+This is XSA-424 / CVE-2022-42328 / CVE-2022-42329.
+
+Fixes: be81992f9086 ("xen/netback: don't queue unlimited number of packages")
+Reported-by: Yang Yingliang <yangyingliang@huawei.com>
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/xen-netback/common.h | 2 +-
+ drivers/net/xen-netback/interface.c | 6 ++++--
+ drivers/net/xen-netback/rx.c | 8 +++++---
+ 3 files changed, 10 insertions(+), 6 deletions(-)
+
+diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h
+index 945647128c0e..1ba974969216 100644
+--- a/drivers/net/xen-netback/common.h
++++ b/drivers/net/xen-netback/common.h
+@@ -386,7 +386,7 @@ int xenvif_dealloc_kthread(void *data);
+ irqreturn_t xenvif_ctrl_irq_fn(int irq, void *data);
+
+ bool xenvif_have_rx_work(struct xenvif_queue *queue, bool test_kthread);
+-void xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb);
++bool xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb);
+
+ void xenvif_carrier_on(struct xenvif *vif);
+
+diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c
+index 645a804ab788..97cf5bc48902 100644
+--- a/drivers/net/xen-netback/interface.c
++++ b/drivers/net/xen-netback/interface.c
+@@ -255,14 +255,16 @@ xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev)
+ if (vif->hash.alg == XEN_NETIF_CTRL_HASH_ALGORITHM_NONE)
+ skb_clear_hash(skb);
+
+- xenvif_rx_queue_tail(queue, skb);
++ if (!xenvif_rx_queue_tail(queue, skb))
++ goto drop;
++
+ xenvif_kick_thread(queue);
+
+ return NETDEV_TX_OK;
+
+ drop:
+ vif->dev->stats.tx_dropped++;
+- dev_kfree_skb(skb);
++ dev_kfree_skb_any(skb);
+ return NETDEV_TX_OK;
+ }
+
+diff --git a/drivers/net/xen-netback/rx.c b/drivers/net/xen-netback/rx.c
+index 932762177110..0ba754ebc5ba 100644
+--- a/drivers/net/xen-netback/rx.c
++++ b/drivers/net/xen-netback/rx.c
+@@ -82,9 +82,10 @@ static bool xenvif_rx_ring_slots_available(struct xenvif_queue *queue)
+ return false;
+ }
+
+-void xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb)
++bool xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb)
+ {
+ unsigned long flags;
++ bool ret = true;
+
+ spin_lock_irqsave(&queue->rx_queue.lock, flags);
+
+@@ -92,8 +93,7 @@ void xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb)
+ struct net_device *dev = queue->vif->dev;
+
+ netif_tx_stop_queue(netdev_get_tx_queue(dev, queue->id));
+- kfree_skb(skb);
+- queue->vif->dev->stats.rx_dropped++;
++ ret = false;
+ } else {
+ if (skb_queue_empty(&queue->rx_queue))
+ xenvif_update_needed_slots(queue, skb);
+@@ -104,6 +104,8 @@ void xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb)
+ }
+
+ spin_unlock_irqrestore(&queue->rx_queue.lock, flags);
++
++ return ret;
+ }
+
+ static struct sk_buff *xenvif_rx_dequeue(struct xenvif_queue *queue)
+--
+2.35.1
+
--- /dev/null
+From 18dc22277fdfe02b0cfaa39d8737dc56df6a13b9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 22 Nov 2022 09:16:59 +0000
+Subject: xen/netback: Ensure protocol headers don't fall in the non-linear
+ area
+
+From: Ross Lagerwall <ross.lagerwall@citrix.com>
+
+[ Upstream commit ad7f402ae4f466647c3a669b8a6f3e5d4271c84a ]
+
+In some cases, the frontend may send a packet where the protocol headers
+are spread across multiple slots. This would result in netback creating
+an skb where the protocol headers spill over into the non-linear area.
+Some drivers and NICs don't handle this properly resulting in an
+interface reset or worse.
+
+This issue was introduced by the removal of an unconditional skb pull in
+the tx path to improve performance. Fix this without reintroducing the
+pull by setting up grant copy ops for as many slots as needed to reach
+the XEN_NETBACK_TX_COPY_LEN size. Adjust the rest of the code to handle
+multiple copy operations per skb.
+
+This is XSA-423 / CVE-2022-3643.
+
+Fixes: 7e5d7753956b ("xen-netback: remove unconditional __pskb_pull_tail() in guest Tx path")
+Signed-off-by: Ross Lagerwall <ross.lagerwall@citrix.com>
+Reviewed-by: Paul Durrant <paul@xen.org>
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/xen-netback/netback.c | 223 ++++++++++++++++--------------
+ 1 file changed, 123 insertions(+), 100 deletions(-)
+
+diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
+index b0cbc7fead74..06fd61b71d37 100644
+--- a/drivers/net/xen-netback/netback.c
++++ b/drivers/net/xen-netback/netback.c
+@@ -330,10 +330,13 @@ static int xenvif_count_requests(struct xenvif_queue *queue,
+
+
+ struct xenvif_tx_cb {
+- u16 pending_idx;
++ u16 copy_pending_idx[XEN_NETBK_LEGACY_SLOTS_MAX + 1];
++ u8 copy_count;
+ };
+
+ #define XENVIF_TX_CB(skb) ((struct xenvif_tx_cb *)(skb)->cb)
++#define copy_pending_idx(skb, i) (XENVIF_TX_CB(skb)->copy_pending_idx[i])
++#define copy_count(skb) (XENVIF_TX_CB(skb)->copy_count)
+
+ static inline void xenvif_tx_create_map_op(struct xenvif_queue *queue,
+ u16 pending_idx,
+@@ -368,31 +371,93 @@ static inline struct sk_buff *xenvif_alloc_skb(unsigned int size)
+ return skb;
+ }
+
+-static struct gnttab_map_grant_ref *xenvif_get_requests(struct xenvif_queue *queue,
+- struct sk_buff *skb,
+- struct xen_netif_tx_request *txp,
+- struct gnttab_map_grant_ref *gop,
+- unsigned int frag_overflow,
+- struct sk_buff *nskb)
++static void xenvif_get_requests(struct xenvif_queue *queue,
++ struct sk_buff *skb,
++ struct xen_netif_tx_request *first,
++ struct xen_netif_tx_request *txfrags,
++ unsigned *copy_ops,
++ unsigned *map_ops,
++ unsigned int frag_overflow,
++ struct sk_buff *nskb,
++ unsigned int extra_count,
++ unsigned int data_len)
+ {
+ struct skb_shared_info *shinfo = skb_shinfo(skb);
+ skb_frag_t *frags = shinfo->frags;
+- u16 pending_idx = XENVIF_TX_CB(skb)->pending_idx;
+- int start;
++ u16 pending_idx;
+ pending_ring_idx_t index;
+ unsigned int nr_slots;
++ struct gnttab_copy *cop = queue->tx_copy_ops + *copy_ops;
++ struct gnttab_map_grant_ref *gop = queue->tx_map_ops + *map_ops;
++ struct xen_netif_tx_request *txp = first;
++
++ nr_slots = shinfo->nr_frags + 1;
++
++ copy_count(skb) = 0;
++
++ /* Create copy ops for exactly data_len bytes into the skb head. */
++ __skb_put(skb, data_len);
++ while (data_len > 0) {
++ int amount = data_len > txp->size ? txp->size : data_len;
++
++ cop->source.u.ref = txp->gref;
++ cop->source.domid = queue->vif->domid;
++ cop->source.offset = txp->offset;
++
++ cop->dest.domid = DOMID_SELF;
++ cop->dest.offset = (offset_in_page(skb->data +
++ skb_headlen(skb) -
++ data_len)) & ~XEN_PAGE_MASK;
++ cop->dest.u.gmfn = virt_to_gfn(skb->data + skb_headlen(skb)
++ - data_len);
++
++ cop->len = amount;
++ cop->flags = GNTCOPY_source_gref;
+
+- nr_slots = shinfo->nr_frags;
++ index = pending_index(queue->pending_cons);
++ pending_idx = queue->pending_ring[index];
++ callback_param(queue, pending_idx).ctx = NULL;
++ copy_pending_idx(skb, copy_count(skb)) = pending_idx;
++ copy_count(skb)++;
++
++ cop++;
++ data_len -= amount;
+
+- /* Skip first skb fragment if it is on same page as header fragment. */
+- start = (frag_get_pending_idx(&shinfo->frags[0]) == pending_idx);
++ if (amount == txp->size) {
++ /* The copy op covered the full tx_request */
++
++ memcpy(&queue->pending_tx_info[pending_idx].req,
++ txp, sizeof(*txp));
++ queue->pending_tx_info[pending_idx].extra_count =
++ (txp == first) ? extra_count : 0;
++
++ if (txp == first)
++ txp = txfrags;
++ else
++ txp++;
++ queue->pending_cons++;
++ nr_slots--;
++ } else {
++ /* The copy op partially covered the tx_request.
++ * The remainder will be mapped.
++ */
++ txp->offset += amount;
++ txp->size -= amount;
++ }
++ }
+
+- for (shinfo->nr_frags = start; shinfo->nr_frags < nr_slots;
+- shinfo->nr_frags++, txp++, gop++) {
++ for (shinfo->nr_frags = 0; shinfo->nr_frags < nr_slots;
++ shinfo->nr_frags++, gop++) {
+ index = pending_index(queue->pending_cons++);
+ pending_idx = queue->pending_ring[index];
+- xenvif_tx_create_map_op(queue, pending_idx, txp, 0, gop);
++ xenvif_tx_create_map_op(queue, pending_idx, txp,
++ txp == first ? extra_count : 0, gop);
+ frag_set_pending_idx(&frags[shinfo->nr_frags], pending_idx);
++
++ if (txp == first)
++ txp = txfrags;
++ else
++ txp++;
+ }
+
+ if (frag_overflow) {
+@@ -413,7 +478,8 @@ static struct gnttab_map_grant_ref *xenvif_get_requests(struct xenvif_queue *que
+ skb_shinfo(skb)->frag_list = nskb;
+ }
+
+- return gop;
++ (*copy_ops) = cop - queue->tx_copy_ops;
++ (*map_ops) = gop - queue->tx_map_ops;
+ }
+
+ static inline void xenvif_grant_handle_set(struct xenvif_queue *queue,
+@@ -449,7 +515,7 @@ static int xenvif_tx_check_gop(struct xenvif_queue *queue,
+ struct gnttab_copy **gopp_copy)
+ {
+ struct gnttab_map_grant_ref *gop_map = *gopp_map;
+- u16 pending_idx = XENVIF_TX_CB(skb)->pending_idx;
++ u16 pending_idx;
+ /* This always points to the shinfo of the skb being checked, which
+ * could be either the first or the one on the frag_list
+ */
+@@ -460,24 +526,37 @@ static int xenvif_tx_check_gop(struct xenvif_queue *queue,
+ struct skb_shared_info *first_shinfo = NULL;
+ int nr_frags = shinfo->nr_frags;
+ const bool sharedslot = nr_frags &&
+- frag_get_pending_idx(&shinfo->frags[0]) == pending_idx;
++ frag_get_pending_idx(&shinfo->frags[0]) ==
++ copy_pending_idx(skb, copy_count(skb) - 1);
+ int i, err;
+
+- /* Check status of header. */
+- err = (*gopp_copy)->status;
+- if (unlikely(err)) {
+- if (net_ratelimit())
+- netdev_dbg(queue->vif->dev,
+- "Grant copy of header failed! status: %d pending_idx: %u ref: %u\n",
+- (*gopp_copy)->status,
+- pending_idx,
+- (*gopp_copy)->source.u.ref);
+- /* The first frag might still have this slot mapped */
+- if (!sharedslot)
+- xenvif_idx_release(queue, pending_idx,
+- XEN_NETIF_RSP_ERROR);
++ for (i = 0; i < copy_count(skb); i++) {
++ int newerr;
++
++ /* Check status of header. */
++ pending_idx = copy_pending_idx(skb, i);
++
++ newerr = (*gopp_copy)->status;
++ if (likely(!newerr)) {
++ /* The first frag might still have this slot mapped */
++ if (i < copy_count(skb) - 1 || !sharedslot)
++ xenvif_idx_release(queue, pending_idx,
++ XEN_NETIF_RSP_OKAY);
++ } else {
++ err = newerr;
++ if (net_ratelimit())
++ netdev_dbg(queue->vif->dev,
++ "Grant copy of header failed! status: %d pending_idx: %u ref: %u\n",
++ (*gopp_copy)->status,
++ pending_idx,
++ (*gopp_copy)->source.u.ref);
++ /* The first frag might still have this slot mapped */
++ if (i < copy_count(skb) - 1 || !sharedslot)
++ xenvif_idx_release(queue, pending_idx,
++ XEN_NETIF_RSP_ERROR);
++ }
++ (*gopp_copy)++;
+ }
+- (*gopp_copy)++;
+
+ check_frags:
+ for (i = 0; i < nr_frags; i++, gop_map++) {
+@@ -524,14 +603,6 @@ static int xenvif_tx_check_gop(struct xenvif_queue *queue,
+ if (err)
+ continue;
+
+- /* First error: if the header haven't shared a slot with the
+- * first frag, release it as well.
+- */
+- if (!sharedslot)
+- xenvif_idx_release(queue,
+- XENVIF_TX_CB(skb)->pending_idx,
+- XEN_NETIF_RSP_OKAY);
+-
+ /* Invalidate preceding fragments of this skb. */
+ for (j = 0; j < i; j++) {
+ pending_idx = frag_get_pending_idx(&shinfo->frags[j]);
+@@ -801,7 +872,6 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue,
+ unsigned *copy_ops,
+ unsigned *map_ops)
+ {
+- struct gnttab_map_grant_ref *gop = queue->tx_map_ops;
+ struct sk_buff *skb, *nskb;
+ int ret;
+ unsigned int frag_overflow;
+@@ -883,8 +953,12 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue,
+ continue;
+ }
+
++ data_len = (txreq.size > XEN_NETBACK_TX_COPY_LEN) ?
++ XEN_NETBACK_TX_COPY_LEN : txreq.size;
++
+ ret = xenvif_count_requests(queue, &txreq, extra_count,
+ txfrags, work_to_do);
++
+ if (unlikely(ret < 0))
+ break;
+
+@@ -910,9 +984,8 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue,
+ index = pending_index(queue->pending_cons);
+ pending_idx = queue->pending_ring[index];
+
+- data_len = (txreq.size > XEN_NETBACK_TX_COPY_LEN &&
+- ret < XEN_NETBK_LEGACY_SLOTS_MAX) ?
+- XEN_NETBACK_TX_COPY_LEN : txreq.size;
++ if (ret >= XEN_NETBK_LEGACY_SLOTS_MAX - 1 && data_len < txreq.size)
++ data_len = txreq.size;
+
+ skb = xenvif_alloc_skb(data_len);
+ if (unlikely(skb == NULL)) {
+@@ -923,8 +996,6 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue,
+ }
+
+ skb_shinfo(skb)->nr_frags = ret;
+- if (data_len < txreq.size)
+- skb_shinfo(skb)->nr_frags++;
+ /* At this point shinfo->nr_frags is in fact the number of
+ * slots, which can be as large as XEN_NETBK_LEGACY_SLOTS_MAX.
+ */
+@@ -986,54 +1057,19 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue,
+ type);
+ }
+
+- XENVIF_TX_CB(skb)->pending_idx = pending_idx;
+-
+- __skb_put(skb, data_len);
+- queue->tx_copy_ops[*copy_ops].source.u.ref = txreq.gref;
+- queue->tx_copy_ops[*copy_ops].source.domid = queue->vif->domid;
+- queue->tx_copy_ops[*copy_ops].source.offset = txreq.offset;
+-
+- queue->tx_copy_ops[*copy_ops].dest.u.gmfn =
+- virt_to_gfn(skb->data);
+- queue->tx_copy_ops[*copy_ops].dest.domid = DOMID_SELF;
+- queue->tx_copy_ops[*copy_ops].dest.offset =
+- offset_in_page(skb->data) & ~XEN_PAGE_MASK;
+-
+- queue->tx_copy_ops[*copy_ops].len = data_len;
+- queue->tx_copy_ops[*copy_ops].flags = GNTCOPY_source_gref;
+-
+- (*copy_ops)++;
+-
+- if (data_len < txreq.size) {
+- frag_set_pending_idx(&skb_shinfo(skb)->frags[0],
+- pending_idx);
+- xenvif_tx_create_map_op(queue, pending_idx, &txreq,
+- extra_count, gop);
+- gop++;
+- } else {
+- frag_set_pending_idx(&skb_shinfo(skb)->frags[0],
+- INVALID_PENDING_IDX);
+- memcpy(&queue->pending_tx_info[pending_idx].req,
+- &txreq, sizeof(txreq));
+- queue->pending_tx_info[pending_idx].extra_count =
+- extra_count;
+- }
+-
+- queue->pending_cons++;
+-
+- gop = xenvif_get_requests(queue, skb, txfrags, gop,
+- frag_overflow, nskb);
++ xenvif_get_requests(queue, skb, &txreq, txfrags, copy_ops,
++ map_ops, frag_overflow, nskb, extra_count,
++ data_len);
+
+ __skb_queue_tail(&queue->tx_queue, skb);
+
+ queue->tx.req_cons = idx;
+
+- if (((gop-queue->tx_map_ops) >= ARRAY_SIZE(queue->tx_map_ops)) ||
++ if ((*map_ops >= ARRAY_SIZE(queue->tx_map_ops)) ||
+ (*copy_ops >= ARRAY_SIZE(queue->tx_copy_ops)))
+ break;
+ }
+
+- (*map_ops) = gop - queue->tx_map_ops;
+ return;
+ }
+
+@@ -1112,9 +1148,8 @@ static int xenvif_tx_submit(struct xenvif_queue *queue)
+ while ((skb = __skb_dequeue(&queue->tx_queue)) != NULL) {
+ struct xen_netif_tx_request *txp;
+ u16 pending_idx;
+- unsigned data_len;
+
+- pending_idx = XENVIF_TX_CB(skb)->pending_idx;
++ pending_idx = copy_pending_idx(skb, 0);
+ txp = &queue->pending_tx_info[pending_idx].req;
+
+ /* Check the remap error code. */
+@@ -1133,18 +1168,6 @@ static int xenvif_tx_submit(struct xenvif_queue *queue)
+ continue;
+ }
+
+- data_len = skb->len;
+- callback_param(queue, pending_idx).ctx = NULL;
+- if (data_len < txp->size) {
+- /* Append the packet payload as a fragment. */
+- txp->offset += data_len;
+- txp->size -= data_len;
+- } else {
+- /* Schedule a response immediately. */
+- xenvif_idx_release(queue, pending_idx,
+- XEN_NETIF_RSP_OKAY);
+- }
+-
+ if (txp->flags & XEN_NETTXF_csum_blank)
+ skb->ip_summed = CHECKSUM_PARTIAL;
+ else if (txp->flags & XEN_NETTXF_data_validated)
+@@ -1330,7 +1353,7 @@ static inline void xenvif_tx_dealloc_action(struct xenvif_queue *queue)
+ /* Called after netfront has transmitted */
+ int xenvif_tx_action(struct xenvif_queue *queue, int budget)
+ {
+- unsigned nr_mops, nr_cops = 0;
++ unsigned nr_mops = 0, nr_cops = 0;
+ int work_done, ret;
+
+ if (unlikely(!tx_work_todo(queue)))
+--
+2.35.1
+