]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
Fixes for 5.10
authorSasha Levin <sashal@kernel.org>
Thu, 8 Dec 2022 13:53:35 +0000 (08:53 -0500)
committerSasha Levin <sashal@kernel.org>
Thu, 8 Dec 2022 13:53:35 +0000 (08:53 -0500)
Signed-off-by: Sasha Levin <sashal@kernel.org>
46 files changed:
queue-5.10/9p-fd-use-p9_hdrsz-for-header-size.patch [new file with mode: 0644]
queue-5.10/9p-xen-check-logical-size-for-buffer-size.patch [new file with mode: 0644]
queue-5.10/alsa-seq-fix-function-prototype-mismatch-in-snd_seq_.patch [new file with mode: 0644]
queue-5.10/arm-9251-1-perf-fix-stacktraces-for-tracepoint-event.patch [new file with mode: 0644]
queue-5.10/arm-9266-1-mm-fix-no-mmu-zero_page-implementation.patch [new file with mode: 0644]
queue-5.10/arm-dts-rockchip-disable-arm_global_timer-on-rk3066-.patch [new file with mode: 0644]
queue-5.10/arm-dts-rockchip-fix-ir-receiver-node-names.patch [new file with mode: 0644]
queue-5.10/arm-dts-rockchip-fix-node-name-for-hym8563-rtc.patch [new file with mode: 0644]
queue-5.10/arm-dts-rockchip-rk3188-fix-lcdc1-rgb24-node-name.patch [new file with mode: 0644]
queue-5.10/arm64-dts-rockchip-fix-ir-receiver-node-names.patch [new file with mode: 0644]
queue-5.10/arm64-dts-rockchip-keep-i2s1-disabled-for-gpio-funct.patch [new file with mode: 0644]
queue-5.10/asoc-soc-pcm-add-null-check-in-be-reparenting.patch [new file with mode: 0644]
queue-5.10/asoc-wm8962-wait-for-updated-value-of-wm8962_clockin.patch [new file with mode: 0644]
queue-5.10/btrfs-send-avoid-unaligned-encoded-writes-when-attem.patch [new file with mode: 0644]
queue-5.10/fbcon-use-kzalloc-in-fbcon_prepare_logo.patch [new file with mode: 0644]
queue-5.10/media-videobuf2-core-take-mmap_lock-in-vb2_get_unmap.patch [new file with mode: 0644]
queue-5.10/mm-__isolate_lru_page_prepare-in-isolate_migratepage.patch [new file with mode: 0644]
queue-5.10/mm-compaction-do-page-isolation-first-in-compaction.patch [new file with mode: 0644]
queue-5.10/mm-khugepaged-fix-gup-fast-interaction-by-sending-ip.patch [new file with mode: 0644]
queue-5.10/mm-khugepaged-invoke-mmu-notifiers-in-shmem-file-col.patch [new file with mode: 0644]
queue-5.10/mm-khugepaged-take-the-right-locks-for-page-table-re.patch [new file with mode: 0644]
queue-5.10/mm-lru-introduce-testclearpagelru.patch [new file with mode: 0644]
queue-5.10/mm-migrate-fix-thp-s-mapcount-on-isolation.patch [new file with mode: 0644]
queue-5.10/mm-mlock-remove-__munlock_isolate_lru_page.patch [new file with mode: 0644]
queue-5.10/mm-mlock-remove-lru_lock-on-testclearpagemlocked.patch [new file with mode: 0644]
queue-5.10/mm-vmscan-__isolate_lru_page_prepare-cleanup.patch [new file with mode: 0644]
queue-5.10/net-usb-qmi_wwan-add-u-blox-0x1342-composition.patch [new file with mode: 0644]
queue-5.10/regulator-slg51000-wait-after-asserting-cs-pin.patch [new file with mode: 0644]
queue-5.10/regulator-twl6030-fix-get-status-of-twl6032-regulato.patch [new file with mode: 0644]
queue-5.10/rtc-check-return-value-from-mc146818_get_time.patch [new file with mode: 0644]
queue-5.10/rtc-cmos-avoid-uip-when-reading-alarm-time.patch [new file with mode: 0644]
queue-5.10/rtc-cmos-avoid-uip-when-writing-alarm-time.patch [new file with mode: 0644]
queue-5.10/rtc-cmos-remove-stale-revisit-comments.patch [new file with mode: 0644]
queue-5.10/rtc-cmos-replace-spin_lock_irqsave-with-spin_lock-in.patch [new file with mode: 0644]
queue-5.10/rtc-mc146818-detect-and-handle-broken-rtcs.patch [new file with mode: 0644]
queue-5.10/rtc-mc146818-dont-test-for-bit-0-5-in-register-d.patch [new file with mode: 0644]
queue-5.10/rtc-mc146818-lib-change-return-values-of-mc146818_ge.patch [new file with mode: 0644]
queue-5.10/rtc-mc146818-lib-extract-mc146818_avoid_uip.patch [new file with mode: 0644]
queue-5.10/rtc-mc146818-lib-fix-rtc-presence-check.patch [new file with mode: 0644]
queue-5.10/rtc-mc146818-prevent-reading-garbage.patch [new file with mode: 0644]
queue-5.10/rtc-mc146818-reduce-spinlock-section-in-mc146818_set.patch [new file with mode: 0644]
queue-5.10/series [new file with mode: 0644]
queue-5.10/usb-dwc3-gadget-disable-gusb2phycfg.susphy-for-end-t.patch [new file with mode: 0644]
queue-5.10/xen-netback-do-some-code-cleanup.patch [new file with mode: 0644]
queue-5.10/xen-netback-don-t-call-kfree_skb-with-interrupts-dis.patch [new file with mode: 0644]
queue-5.10/xen-netback-ensure-protocol-headers-don-t-fall-in-th.patch [new file with mode: 0644]

diff --git a/queue-5.10/9p-fd-use-p9_hdrsz-for-header-size.patch b/queue-5.10/9p-fd-use-p9_hdrsz-for-header-size.patch
new file mode 100644 (file)
index 0000000..04c1394
--- /dev/null
@@ -0,0 +1,56 @@
+From e817ab2c6014a79269c881ba6704088a67795ad2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 17 Nov 2022 17:11:59 +0800
+Subject: 9p/fd: Use P9_HDRSZ for header size
+
+From: GUO Zihua <guozihua@huawei.com>
+
+[ Upstream commit 6854fadbeee10891ed74246bdc05031906b6c8cf ]
+
+Cleanup hardcoded header sizes to use P9_HDRSZ instead of '7'
+
+Link: https://lkml.kernel.org/r/20221117091159.31533-4-guozihua@huawei.com
+Signed-off-by: GUO Zihua <guozihua@huawei.com>
+Reviewed-by: Christian Schoenebeck <linux_oss@crudebyte.com>
+[Dominique: commit message adjusted to make sense after offset size
+adjustment got removed]
+Signed-off-by: Dominique Martinet <asmadeus@codewreck.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/9p/trans_fd.c | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
+index deb66635f0f3..e070a0b8e5ca 100644
+--- a/net/9p/trans_fd.c
++++ b/net/9p/trans_fd.c
+@@ -118,7 +118,7 @@ struct p9_conn {
+       struct list_head unsent_req_list;
+       struct p9_req_t *rreq;
+       struct p9_req_t *wreq;
+-      char tmp_buf[7];
++      char tmp_buf[P9_HDRSZ];
+       struct p9_fcall rc;
+       int wpos;
+       int wsize;
+@@ -291,7 +291,7 @@ static void p9_read_work(struct work_struct *work)
+       if (!m->rc.sdata) {
+               m->rc.sdata = m->tmp_buf;
+               m->rc.offset = 0;
+-              m->rc.capacity = 7; /* start by reading header */
++              m->rc.capacity = P9_HDRSZ; /* start by reading header */
+       }
+       clear_bit(Rpending, &m->wsched);
+@@ -314,7 +314,7 @@ static void p9_read_work(struct work_struct *work)
+               p9_debug(P9_DEBUG_TRANS, "got new header\n");
+               /* Header size */
+-              m->rc.size = 7;
++              m->rc.size = P9_HDRSZ;
+               err = p9_parse_header(&m->rc, &m->rc.size, NULL, NULL, 0);
+               if (err) {
+                       p9_debug(P9_DEBUG_ERROR,
+-- 
+2.35.1
+
diff --git a/queue-5.10/9p-xen-check-logical-size-for-buffer-size.patch b/queue-5.10/9p-xen-check-logical-size-for-buffer-size.patch
new file mode 100644 (file)
index 0000000..eff3d92
--- /dev/null
@@ -0,0 +1,53 @@
+From b5b32fd794cbd41eaa5fbcace2be39beef2eea56 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 18 Nov 2022 22:44:41 +0900
+Subject: 9p/xen: check logical size for buffer size
+
+From: Dominique Martinet <asmadeus@codewreck.org>
+
+[ Upstream commit 391c18cf776eb4569ecda1f7794f360fe0a45a26 ]
+
+trans_xen did not check the data fits into the buffer before copying
+from the xen ring, but we probably should.
+Add a check that just skips the request and return an error to
+userspace if it did not fit
+
+Tested-by: Stefano Stabellini <sstabellini@kernel.org>
+Reviewed-by: Christian Schoenebeck <linux_oss@crudebyte.com>
+Link: https://lkml.kernel.org/r/20221118135542.63400-1-asmadeus@codewreck.org
+Signed-off-by: Dominique Martinet <asmadeus@codewreck.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/9p/trans_xen.c | 9 +++++++++
+ 1 file changed, 9 insertions(+)
+
+diff --git a/net/9p/trans_xen.c b/net/9p/trans_xen.c
+index 432ac5a16f2e..6c8a33f98f09 100644
+--- a/net/9p/trans_xen.c
++++ b/net/9p/trans_xen.c
+@@ -231,6 +231,14 @@ static void p9_xen_response(struct work_struct *work)
+                       continue;
+               }
++              if (h.size > req->rc.capacity) {
++                      dev_warn(&priv->dev->dev,
++                               "requested packet size too big: %d for tag %d with capacity %zd\n",
++                               h.size, h.tag, req->rc.capacity);
++                      req->status = REQ_STATUS_ERROR;
++                      goto recv_error;
++              }
++
+               memcpy(&req->rc, &h, sizeof(h));
+               req->rc.offset = 0;
+@@ -240,6 +248,7 @@ static void p9_xen_response(struct work_struct *work)
+                                    masked_prod, &masked_cons,
+                                    XEN_9PFS_RING_SIZE(ring));
++recv_error:
+               virt_mb();
+               cons += h.size;
+               ring->intf->in_cons = cons;
+-- 
+2.35.1
+
diff --git a/queue-5.10/alsa-seq-fix-function-prototype-mismatch-in-snd_seq_.patch b/queue-5.10/alsa-seq-fix-function-prototype-mismatch-in-snd_seq_.patch
new file mode 100644 (file)
index 0000000..23b6e64
--- /dev/null
@@ -0,0 +1,77 @@
+From 621d8a45725353c81804715e0391a1762b803b4e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 18 Nov 2022 15:23:50 -0800
+Subject: ALSA: seq: Fix function prototype mismatch in
+ snd_seq_expand_var_event
+
+From: Kees Cook <keescook@chromium.org>
+
+[ Upstream commit 05530ef7cf7c7d700f6753f058999b1b5099a026 ]
+
+With clang's kernel control flow integrity (kCFI, CONFIG_CFI_CLANG),
+indirect call targets are validated against the expected function
+pointer prototype to make sure the call target is valid to help mitigate
+ROP attacks. If they are not identical, there is a failure at run time,
+which manifests as either a kernel panic or thread getting killed.
+
+seq_copy_in_user() and seq_copy_in_kernel() did not have prototypes
+matching snd_seq_dump_func_t. Adjust this and remove the casts. There
+are not resulting binary output differences.
+
+This was found as a result of Clang's new -Wcast-function-type-strict
+flag, which is more sensitive than the simpler -Wcast-function-type,
+which only checks for type width mismatches.
+
+Reported-by: kernel test robot <lkp@intel.com>
+Link: https://lore.kernel.org/lkml/202211041527.HD8TLSE1-lkp@intel.com
+Cc: Jaroslav Kysela <perex@perex.cz>
+Cc: Takashi Iwai <tiwai@suse.com>
+Cc: "Gustavo A. R. Silva" <gustavoars@kernel.org>
+Cc: alsa-devel@alsa-project.org
+Signed-off-by: Kees Cook <keescook@chromium.org>
+Link: https://lore.kernel.org/r/20221118232346.never.380-kees@kernel.org
+Signed-off-by: Takashi Iwai <tiwai@suse.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ sound/core/seq/seq_memory.c | 11 +++++++----
+ 1 file changed, 7 insertions(+), 4 deletions(-)
+
+diff --git a/sound/core/seq/seq_memory.c b/sound/core/seq/seq_memory.c
+index 65db1a7c77b7..bb76a2dd0a2f 100644
+--- a/sound/core/seq/seq_memory.c
++++ b/sound/core/seq/seq_memory.c
+@@ -112,15 +112,19 @@ EXPORT_SYMBOL(snd_seq_dump_var_event);
+  * expand the variable length event to linear buffer space.
+  */
+-static int seq_copy_in_kernel(char **bufptr, const void *src, int size)
++static int seq_copy_in_kernel(void *ptr, void *src, int size)
+ {
++      char **bufptr = ptr;
++
+       memcpy(*bufptr, src, size);
+       *bufptr += size;
+       return 0;
+ }
+-static int seq_copy_in_user(char __user **bufptr, const void *src, int size)
++static int seq_copy_in_user(void *ptr, void *src, int size)
+ {
++      char __user **bufptr = ptr;
++
+       if (copy_to_user(*bufptr, src, size))
+               return -EFAULT;
+       *bufptr += size;
+@@ -149,8 +153,7 @@ int snd_seq_expand_var_event(const struct snd_seq_event *event, int count, char
+               return newlen;
+       }
+       err = snd_seq_dump_var_event(event,
+-                                   in_kernel ? (snd_seq_dump_func_t)seq_copy_in_kernel :
+-                                   (snd_seq_dump_func_t)seq_copy_in_user,
++                                   in_kernel ? seq_copy_in_kernel : seq_copy_in_user,
+                                    &buf);
+       return err < 0 ? err : newlen;
+ }
+-- 
+2.35.1
+
diff --git a/queue-5.10/arm-9251-1-perf-fix-stacktraces-for-tracepoint-event.patch b/queue-5.10/arm-9251-1-perf-fix-stacktraces-for-tracepoint-event.patch
new file mode 100644 (file)
index 0000000..3251fda
--- /dev/null
@@ -0,0 +1,70 @@
+From 8b3c105761d6cae81a31e82cdbda80b22fb4c1b7 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 26 Sep 2022 16:09:12 +0100
+Subject: ARM: 9251/1: perf: Fix stacktraces for tracepoint events in THUMB2
+ kernels
+
+From: Tomislav Novak <tnovak@fb.com>
+
+[ Upstream commit 612695bccfdbd52004551308a55bae410e7cd22f ]
+
+Store the frame address where arm_get_current_stackframe() looks for it
+(ARM_r7 instead of ARM_fp if CONFIG_THUMB2_KERNEL=y). Otherwise frame->fp
+gets set to 0, causing unwind_frame() to fail.
+
+  # bpftrace -e 't:sched:sched_switch { @[kstack] = count(); exit(); }'
+  Attaching 1 probe...
+  @[
+      __schedule+1059
+  ]: 1
+
+A typical first unwind instruction is 0x97 (SP = R7), so after executing
+it SP ends up being 0 and -URC_FAILURE is returned.
+
+  unwind_frame(pc = ac9da7d7 lr = 00000000 sp = c69bdda0 fp = 00000000)
+  unwind_find_idx(ac9da7d7)
+  unwind_exec_insn: insn = 00000097
+  unwind_exec_insn: fp = 00000000 sp = 00000000 lr = 00000000 pc = 00000000
+
+With this patch:
+
+  # bpftrace -e 't:sched:sched_switch { @[kstack] = count(); exit(); }'
+  Attaching 1 probe...
+  @[
+      __schedule+1059
+      __schedule+1059
+      schedule+79
+      schedule_hrtimeout_range_clock+163
+      schedule_hrtimeout_range+17
+      ep_poll+471
+      SyS_epoll_wait+111
+      sys_epoll_pwait+231
+      __ret_fast_syscall+1
+  ]: 1
+
+Link: https://lore.kernel.org/r/20220920230728.2617421-1-tnovak@fb.com/
+
+Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
+Signed-off-by: Tomislav Novak <tnovak@fb.com>
+Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/arm/include/asm/perf_event.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/arch/arm/include/asm/perf_event.h b/arch/arm/include/asm/perf_event.h
+index fe87397c3d8c..bdbc1e590891 100644
+--- a/arch/arm/include/asm/perf_event.h
++++ b/arch/arm/include/asm/perf_event.h
+@@ -17,7 +17,7 @@ extern unsigned long perf_misc_flags(struct pt_regs *regs);
+ #define perf_arch_fetch_caller_regs(regs, __ip) { \
+       (regs)->ARM_pc = (__ip); \
+-      (regs)->ARM_fp = (unsigned long) __builtin_frame_address(0); \
++      frame_pointer((regs)) = (unsigned long) __builtin_frame_address(0); \
+       (regs)->ARM_sp = current_stack_pointer; \
+       (regs)->ARM_cpsr = SVC_MODE; \
+ }
+-- 
+2.35.1
+
diff --git a/queue-5.10/arm-9266-1-mm-fix-no-mmu-zero_page-implementation.patch b/queue-5.10/arm-9266-1-mm-fix-no-mmu-zero_page-implementation.patch
new file mode 100644 (file)
index 0000000..f0027de
--- /dev/null
@@ -0,0 +1,141 @@
+From 528c862595d7d443fc942544454319823b2e5810 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 4 Nov 2022 21:46:18 +0100
+Subject: ARM: 9266/1: mm: fix no-MMU ZERO_PAGE() implementation
+
+From: Giulio Benetti <giulio.benetti@benettiengineering.com>
+
+[ Upstream commit 340a982825f76f1cff0daa605970fe47321b5ee7 ]
+
+Actually in no-MMU SoCs(i.e. i.MXRT) ZERO_PAGE(vaddr) expands to
+```
+virt_to_page(0)
+```
+that in order expands to:
+```
+pfn_to_page(virt_to_pfn(0))
+```
+and then virt_to_pfn(0) to:
+```
+        ((((unsigned long)(0) - PAGE_OFFSET) >> PAGE_SHIFT) +
+         PHYS_PFN_OFFSET)
+```
+where PAGE_OFFSET and PHYS_PFN_OFFSET are the DRAM offset(0x80000000) and
+PAGE_SHIFT is 12. This way we obtain 16MB(0x01000000) summed to the base of
+DRAM(0x80000000).
+When ZERO_PAGE(0) is then used, for example in bio_add_page(), the page
+gets an address that is out of DRAM bounds.
+So instead of using fake virtual page 0 let's allocate a dedicated
+zero_page during paging_init() and assign it to a global 'struct page *
+empty_zero_page' the same way mmu.c does and it's the same approach used
+in m68k with commit dc068f462179 as discussed here[0]. Then let's move
+ZERO_PAGE() definition to the top of pgtable.h to be in common between
+mmu.c and nommu.c.
+
+[0]: https://lore.kernel.org/linux-m68k/2a462b23-5b8e-bbf4-ec7d-778434a3b9d7@google.com/T/#m1266ceb63
+ad140743174d6b3070364d3c9a5179b
+
+Signed-off-by: Giulio Benetti <giulio.benetti@benettiengineering.com>
+Reviewed-by: Arnd Bergmann <arnd@arndb.de>
+Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/arm/include/asm/pgtable-nommu.h |  6 ------
+ arch/arm/include/asm/pgtable.h       | 16 +++++++++-------
+ arch/arm/mm/nommu.c                  | 19 +++++++++++++++++++
+ 3 files changed, 28 insertions(+), 13 deletions(-)
+
+diff --git a/arch/arm/include/asm/pgtable-nommu.h b/arch/arm/include/asm/pgtable-nommu.h
+index d16aba48fa0a..090011394477 100644
+--- a/arch/arm/include/asm/pgtable-nommu.h
++++ b/arch/arm/include/asm/pgtable-nommu.h
+@@ -44,12 +44,6 @@
+ typedef pte_t *pte_addr_t;
+-/*
+- * ZERO_PAGE is a global shared page that is always zero: used
+- * for zero-mapped memory areas etc..
+- */
+-#define ZERO_PAGE(vaddr)      (virt_to_page(0))
+-
+ /*
+  * Mark the prot value as uncacheable and unbufferable.
+  */
+diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h
+index c02f24400369..d38d503493cb 100644
+--- a/arch/arm/include/asm/pgtable.h
++++ b/arch/arm/include/asm/pgtable.h
+@@ -10,6 +10,15 @@
+ #include <linux/const.h>
+ #include <asm/proc-fns.h>
++#ifndef __ASSEMBLY__
++/*
++ * ZERO_PAGE is a global shared page that is always zero: used
++ * for zero-mapped memory areas etc..
++ */
++extern struct page *empty_zero_page;
++#define ZERO_PAGE(vaddr)      (empty_zero_page)
++#endif
++
+ #ifndef CONFIG_MMU
+ #include <asm-generic/pgtable-nopud.h>
+@@ -156,13 +165,6 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
+ #define __S111  __PAGE_SHARED_EXEC
+ #ifndef __ASSEMBLY__
+-/*
+- * ZERO_PAGE is a global shared page that is always zero: used
+- * for zero-mapped memory areas etc..
+- */
+-extern struct page *empty_zero_page;
+-#define ZERO_PAGE(vaddr)      (empty_zero_page)
+-
+ extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
+diff --git a/arch/arm/mm/nommu.c b/arch/arm/mm/nommu.c
+index 8b3d7191e2b8..959f05701738 100644
+--- a/arch/arm/mm/nommu.c
++++ b/arch/arm/mm/nommu.c
+@@ -26,6 +26,13 @@
+ unsigned long vectors_base;
++/*
++ * empty_zero_page is a special page that is used for
++ * zero-initialized data and COW.
++ */
++struct page *empty_zero_page;
++EXPORT_SYMBOL(empty_zero_page);
++
+ #ifdef CONFIG_ARM_MPU
+ struct mpu_rgn_info mpu_rgn_info;
+ #endif
+@@ -148,9 +155,21 @@ void __init adjust_lowmem_bounds(void)
+  */
+ void __init paging_init(const struct machine_desc *mdesc)
+ {
++      void *zero_page;
++
+       early_trap_init((void *)vectors_base);
+       mpu_setup();
++
++      /* allocate the zero page. */
++      zero_page = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
++      if (!zero_page)
++              panic("%s: Failed to allocate %lu bytes align=0x%lx\n",
++                    __func__, PAGE_SIZE, PAGE_SIZE);
++
+       bootmem_init();
++
++      empty_zero_page = virt_to_page(zero_page);
++      flush_dcache_page(empty_zero_page);
+ }
+ /*
+-- 
+2.35.1
+
diff --git a/queue-5.10/arm-dts-rockchip-disable-arm_global_timer-on-rk3066-.patch b/queue-5.10/arm-dts-rockchip-disable-arm_global_timer-on-rk3066-.patch
new file mode 100644 (file)
index 0000000..eb4cc63
--- /dev/null
@@ -0,0 +1,64 @@
+From d4c7c13a6cd1aed237bee68b3450766db819e289 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 30 Oct 2022 21:56:29 +0100
+Subject: ARM: dts: rockchip: disable arm_global_timer on rk3066 and rk3188
+
+From: Johan Jonker <jbx6244@gmail.com>
+
+[ Upstream commit da74858a475782a3f16470907814c8cc5950ad68 ]
+
+The clock source and the sched_clock provided by the arm_global_timer
+on Rockchip rk3066a/rk3188 are quite unstable because their rates
+depend on the CPU frequency.
+
+Recent changes to the arm_global_timer driver makes it impossible to use.
+
+On the other side, the arm_global_timer has a higher rating than the
+ROCKCHIP_TIMER, it will be selected by default by the time framework
+while we want to use the stable Rockchip clock source.
+
+Keep the arm_global_timer disabled in order to have the
+DW_APB_TIMER (rk3066a) or ROCKCHIP_TIMER (rk3188) selected by default.
+
+Signed-off-by: Johan Jonker <jbx6244@gmail.com>
+Link: https://lore.kernel.org/r/f275ca8d-fd0a-26e5-b978-b7f3df815e0a@gmail.com
+Signed-off-by: Heiko Stuebner <heiko@sntech.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/arm/boot/dts/rk3188.dtsi | 1 -
+ arch/arm/boot/dts/rk3xxx.dtsi | 7 +++++++
+ 2 files changed, 7 insertions(+), 1 deletion(-)
+
+diff --git a/arch/arm/boot/dts/rk3188.dtsi b/arch/arm/boot/dts/rk3188.dtsi
+index a837a9a34e3e..ddf23748ead4 100644
+--- a/arch/arm/boot/dts/rk3188.dtsi
++++ b/arch/arm/boot/dts/rk3188.dtsi
+@@ -630,7 +630,6 @@
+ &global_timer {
+       interrupts = <GIC_PPI 11 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_EDGE_RISING)>;
+-      status = "disabled";
+ };
+ &local_timer {
+diff --git a/arch/arm/boot/dts/rk3xxx.dtsi b/arch/arm/boot/dts/rk3xxx.dtsi
+index 859a7477909f..5edc46a5585c 100644
+--- a/arch/arm/boot/dts/rk3xxx.dtsi
++++ b/arch/arm/boot/dts/rk3xxx.dtsi
+@@ -111,6 +111,13 @@
+               reg = <0x1013c200 0x20>;
+               interrupts = <GIC_PPI 11 (GIC_CPU_MASK_SIMPLE(2) | IRQ_TYPE_EDGE_RISING)>;
+               clocks = <&cru CORE_PERI>;
++              status = "disabled";
++              /* The clock source and the sched_clock provided by the arm_global_timer
++               * on Rockchip rk3066a/rk3188 are quite unstable because their rates
++               * depend on the CPU frequency.
++               * Keep the arm_global_timer disabled in order to have the
++               * DW_APB_TIMER (rk3066a) or ROCKCHIP_TIMER (rk3188) selected by default.
++               */
+       };
+       local_timer: local-timer@1013c600 {
+-- 
+2.35.1
+
diff --git a/queue-5.10/arm-dts-rockchip-fix-ir-receiver-node-names.patch b/queue-5.10/arm-dts-rockchip-fix-ir-receiver-node-names.patch
new file mode 100644 (file)
index 0000000..c2c34ad
--- /dev/null
@@ -0,0 +1,36 @@
+From 5b1b99a1508cbef88005366b4a729d2514113d53 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 27 Oct 2022 10:58:22 +0200
+Subject: ARM: dts: rockchip: fix ir-receiver node names
+
+From: Johan Jonker <jbx6244@gmail.com>
+
+[ Upstream commit dd847fe34cdf1e89afed1af24986359f13082bfb ]
+
+Fix ir-receiver node names on Rockchip boards,
+so that they match with regex: '^ir(-receiver)?(@[a-f0-9]+)?$'
+
+Signed-off-by: Johan Jonker <jbx6244@gmail.com>
+Link: https://lore.kernel.org/r/ea5af279-f44c-afea-023d-bb37f5a0d58d@gmail.com
+Signed-off-by: Heiko Stuebner <heiko@sntech.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/arm/boot/dts/rk3188-radxarock.dts | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/arch/arm/boot/dts/rk3188-radxarock.dts b/arch/arm/boot/dts/rk3188-radxarock.dts
+index b0fef82c0a71..39b913f8d701 100644
+--- a/arch/arm/boot/dts/rk3188-radxarock.dts
++++ b/arch/arm/boot/dts/rk3188-radxarock.dts
+@@ -67,7 +67,7 @@
+               #sound-dai-cells = <0>;
+       };
+-      ir_recv: gpio-ir-receiver {
++      ir_recv: ir-receiver {
+               compatible = "gpio-ir-receiver";
+               gpios = <&gpio0 RK_PB2 GPIO_ACTIVE_LOW>;
+               pinctrl-names = "default";
+-- 
+2.35.1
+
diff --git a/queue-5.10/arm-dts-rockchip-fix-node-name-for-hym8563-rtc.patch b/queue-5.10/arm-dts-rockchip-fix-node-name-for-hym8563-rtc.patch
new file mode 100644 (file)
index 0000000..77fc21c
--- /dev/null
@@ -0,0 +1,91 @@
+From 163ce1b02852814851e67f197a271748daab03c7 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 24 Oct 2022 18:55:46 +0200
+Subject: arm: dts: rockchip: fix node name for hym8563 rtc
+
+From: Sebastian Reichel <sebastian.reichel@collabora.com>
+
+[ Upstream commit 17b57beafccb4569accbfc8c11390744cf59c021 ]
+
+Fix the node name for hym8563 in all arm rockchip devicetrees.
+
+Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
+Link: https://lore.kernel.org/r/20221024165549.74574-4-sebastian.reichel@collabora.com
+Signed-off-by: Heiko Stuebner <heiko@sntech.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/arm/boot/dts/rk3036-evb.dts          | 2 +-
+ arch/arm/boot/dts/rk3288-evb-act8846.dts  | 2 +-
+ arch/arm/boot/dts/rk3288-firefly.dtsi     | 2 +-
+ arch/arm/boot/dts/rk3288-miqi.dts         | 2 +-
+ arch/arm/boot/dts/rk3288-rock2-square.dts | 2 +-
+ 5 files changed, 5 insertions(+), 5 deletions(-)
+
+diff --git a/arch/arm/boot/dts/rk3036-evb.dts b/arch/arm/boot/dts/rk3036-evb.dts
+index 2a7e6624efb9..ea23ba98625e 100644
+--- a/arch/arm/boot/dts/rk3036-evb.dts
++++ b/arch/arm/boot/dts/rk3036-evb.dts
+@@ -31,7 +31,7 @@
+ &i2c1 {
+       status = "okay";
+-      hym8563: hym8563@51 {
++      hym8563: rtc@51 {
+               compatible = "haoyu,hym8563";
+               reg = <0x51>;
+               #clock-cells = <0>;
+diff --git a/arch/arm/boot/dts/rk3288-evb-act8846.dts b/arch/arm/boot/dts/rk3288-evb-act8846.dts
+index be695b8c1f67..8a635c243127 100644
+--- a/arch/arm/boot/dts/rk3288-evb-act8846.dts
++++ b/arch/arm/boot/dts/rk3288-evb-act8846.dts
+@@ -54,7 +54,7 @@
+               vin-supply = <&vcc_sys>;
+       };
+-      hym8563@51 {
++      rtc@51 {
+               compatible = "haoyu,hym8563";
+               reg = <0x51>;
+diff --git a/arch/arm/boot/dts/rk3288-firefly.dtsi b/arch/arm/boot/dts/rk3288-firefly.dtsi
+index 7fb582302b32..c560afe3af78 100644
+--- a/arch/arm/boot/dts/rk3288-firefly.dtsi
++++ b/arch/arm/boot/dts/rk3288-firefly.dtsi
+@@ -233,7 +233,7 @@
+               vin-supply = <&vcc_sys>;
+       };
+-      hym8563: hym8563@51 {
++      hym8563: rtc@51 {
+               compatible = "haoyu,hym8563";
+               reg = <0x51>;
+               #clock-cells = <0>;
+diff --git a/arch/arm/boot/dts/rk3288-miqi.dts b/arch/arm/boot/dts/rk3288-miqi.dts
+index cf54d5ffff2f..fe265a834e8e 100644
+--- a/arch/arm/boot/dts/rk3288-miqi.dts
++++ b/arch/arm/boot/dts/rk3288-miqi.dts
+@@ -157,7 +157,7 @@
+               vin-supply = <&vcc_sys>;
+       };
+-      hym8563: hym8563@51 {
++      hym8563: rtc@51 {
+               compatible = "haoyu,hym8563";
+               reg = <0x51>;
+               #clock-cells = <0>;
+diff --git a/arch/arm/boot/dts/rk3288-rock2-square.dts b/arch/arm/boot/dts/rk3288-rock2-square.dts
+index c4d1d142d8c6..d5ef99ebbddc 100644
+--- a/arch/arm/boot/dts/rk3288-rock2-square.dts
++++ b/arch/arm/boot/dts/rk3288-rock2-square.dts
+@@ -165,7 +165,7 @@
+ };
+ &i2c0 {
+-      hym8563: hym8563@51 {
++      hym8563: rtc@51 {
+               compatible = "haoyu,hym8563";
+               reg = <0x51>;
+               #clock-cells = <0>;
+-- 
+2.35.1
+
diff --git a/queue-5.10/arm-dts-rockchip-rk3188-fix-lcdc1-rgb24-node-name.patch b/queue-5.10/arm-dts-rockchip-rk3188-fix-lcdc1-rgb24-node-name.patch
new file mode 100644 (file)
index 0000000..bda3a6b
--- /dev/null
@@ -0,0 +1,36 @@
+From e5912fb7eb097679379662e4ac19cadcaf8ba3d5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 27 Oct 2022 01:31:37 +0200
+Subject: ARM: dts: rockchip: rk3188: fix lcdc1-rgb24 node name
+
+From: Johan Jonker <jbx6244@gmail.com>
+
+[ Upstream commit 11871e20bcb23c00966e785a124fb72bc8340af4 ]
+
+The lcdc1-rgb24 node name is out of line with the rest
+of the rk3188 lcdc1 node, so fix it.
+
+Signed-off-by: Johan Jonker <jbx6244@gmail.com>
+Link: https://lore.kernel.org/r/7b9c0a6f-626b-07e8-ae74-7e0f08b8d241@gmail.com
+Signed-off-by: Heiko Stuebner <heiko@sntech.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/arm/boot/dts/rk3188.dtsi | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/arch/arm/boot/dts/rk3188.dtsi b/arch/arm/boot/dts/rk3188.dtsi
+index b6bde9d12c2b..a837a9a34e3e 100644
+--- a/arch/arm/boot/dts/rk3188.dtsi
++++ b/arch/arm/boot/dts/rk3188.dtsi
+@@ -402,7 +402,7 @@
+                               rockchip,pins = <2 RK_PD3 1 &pcfg_pull_none>;
+                       };
+-                      lcdc1_rgb24: ldcd1-rgb24 {
++                      lcdc1_rgb24: lcdc1-rgb24 {
+                               rockchip,pins = <2 RK_PA0 1 &pcfg_pull_none>,
+                                               <2 RK_PA1 1 &pcfg_pull_none>,
+                                               <2 RK_PA2 1 &pcfg_pull_none>,
+-- 
+2.35.1
+
diff --git a/queue-5.10/arm64-dts-rockchip-fix-ir-receiver-node-names.patch b/queue-5.10/arm64-dts-rockchip-fix-ir-receiver-node-names.patch
new file mode 100644 (file)
index 0000000..ea964ef
--- /dev/null
@@ -0,0 +1,36 @@
+From 76fabdbab581205aaa7da7864a6f8c029f745509 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 27 Oct 2022 10:59:10 +0200
+Subject: arm64: dts: rockchip: fix ir-receiver node names
+
+From: Johan Jonker <jbx6244@gmail.com>
+
+[ Upstream commit de0d04b9780a23eb928aedfb6f981285f78d58e5 ]
+
+Fix ir-receiver node names on Rockchip boards,
+so that they match with regex: '^ir(-receiver)?(@[a-f0-9]+)?$'
+
+Signed-off-by: Johan Jonker <jbx6244@gmail.com>
+Link: https://lore.kernel.org/r/e9764253-8ce8-150b-4820-41f03f845469@gmail.com
+Signed-off-by: Heiko Stuebner <heiko@sntech.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/arm64/boot/dts/rockchip/rk3308-roc-cc.dts | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/arch/arm64/boot/dts/rockchip/rk3308-roc-cc.dts b/arch/arm64/boot/dts/rockchip/rk3308-roc-cc.dts
+index fbcb9531cc70..213c0759c4b8 100644
+--- a/arch/arm64/boot/dts/rockchip/rk3308-roc-cc.dts
++++ b/arch/arm64/boot/dts/rockchip/rk3308-roc-cc.dts
+@@ -13,7 +13,7 @@
+               stdout-path = "serial2:1500000n8";
+       };
+-      ir_rx {
++      ir-receiver {
+               compatible = "gpio-ir-receiver";
+               gpios = <&gpio0 RK_PC0 GPIO_ACTIVE_HIGH>;
+               pinctrl-names = "default";
+-- 
+2.35.1
+
diff --git a/queue-5.10/arm64-dts-rockchip-keep-i2s1-disabled-for-gpio-funct.patch b/queue-5.10/arm64-dts-rockchip-keep-i2s1-disabled-for-gpio-funct.patch
new file mode 100644 (file)
index 0000000..74d0ee3
--- /dev/null
@@ -0,0 +1,36 @@
+From ffb3971e26f4690f2500ab9738472181039c199f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 24 Sep 2022 11:28:12 +0000
+Subject: arm64: dts: rockchip: keep I2S1 disabled for GPIO function on ROCK Pi
+ 4 series
+
+From: FUKAUMI Naoki <naoki@radxa.com>
+
+[ Upstream commit 849c19d14940b87332d5d59c7fc581d73f2099fd ]
+
+I2S1 pins are exposed on 40-pin header on Radxa ROCK Pi 4 series.
+their default function is GPIO, so I2S1 need to be disabled.
+
+Signed-off-by: FUKAUMI Naoki <naoki@radxa.com>
+Link: https://lore.kernel.org/r/20220924112812.1219-1-naoki@radxa.com
+Signed-off-by: Heiko Stuebner <heiko@sntech.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dtsi | 1 -
+ 1 file changed, 1 deletion(-)
+
+diff --git a/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dtsi
+index f121203081b9..64df64339119 100644
+--- a/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dtsi
++++ b/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dtsi
+@@ -448,7 +448,6 @@
+ &i2s1 {
+       rockchip,playback-channels = <2>;
+       rockchip,capture-channels = <2>;
+-      status = "okay";
+ };
+ &i2s2 {
+-- 
+2.35.1
+
diff --git a/queue-5.10/asoc-soc-pcm-add-null-check-in-be-reparenting.patch b/queue-5.10/asoc-soc-pcm-add-null-check-in-be-reparenting.patch
new file mode 100644 (file)
index 0000000..0e2df53
--- /dev/null
@@ -0,0 +1,37 @@
+From c21ed1784175f9bc14e302b6c710a05b1b188fce Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 22 Nov 2022 12:01:13 +0530
+Subject: ASoC: soc-pcm: Add NULL check in BE reparenting
+
+From: Srinivasa Rao Mandadapu <quic_srivasam@quicinc.com>
+
+[ Upstream commit db8f91d424fe0ea6db337aca8bc05908bbce1498 ]
+
+Add NULL check in dpcm_be_reparent API, to handle
+kernel NULL pointer dereference error.
+The issue occurred in fuzzing test.
+
+Signed-off-by: Srinivasa Rao Mandadapu <quic_srivasam@quicinc.com>
+Link: https://lore.kernel.org/r/1669098673-29703-1-git-send-email-quic_srivasam@quicinc.com
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ sound/soc/soc-pcm.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c
+index 0e2261ee07b6..fb874f924bbe 100644
+--- a/sound/soc/soc-pcm.c
++++ b/sound/soc/soc-pcm.c
+@@ -1154,6 +1154,8 @@ static void dpcm_be_reparent(struct snd_soc_pcm_runtime *fe,
+               return;
+       be_substream = snd_soc_dpcm_get_substream(be, stream);
++      if (!be_substream)
++              return;
+       for_each_dpcm_fe(be, stream, dpcm) {
+               if (dpcm->fe == fe)
+-- 
+2.35.1
+
diff --git a/queue-5.10/asoc-wm8962-wait-for-updated-value-of-wm8962_clockin.patch b/queue-5.10/asoc-wm8962-wait-for-updated-value-of-wm8962_clockin.patch
new file mode 100644 (file)
index 0000000..3f17d77
--- /dev/null
@@ -0,0 +1,47 @@
+From 1515e509bb8fd1b1133f5c12ccfa326ea424a96e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 9 Nov 2022 20:13:54 +0800
+Subject: ASoC: wm8962: Wait for updated value of WM8962_CLOCKING1 register
+
+From: Chancel Liu <chancel.liu@nxp.com>
+
+[ Upstream commit 3ca507bf99611c82dafced73e921c1b10ee12869 ]
+
+DSPCLK_DIV field in WM8962_CLOCKING1 register is used to generate
+correct frequency of LRCLK and BCLK. Sometimes the read-only value
+can't be updated timely after enabling SYSCLK. This results in wrong
+calculation values. Delay is introduced here to wait for newest value
+from register. The time of the delay should be at least 500~1000us
+according to test.
+
+Signed-off-by: Chancel Liu <chancel.liu@nxp.com>
+Acked-by: Charles Keepax <ckeepax@opensource.cirrus.com>
+Link: https://lore.kernel.org/r/20221109121354.123958-1-chancel.liu@nxp.com
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ sound/soc/codecs/wm8962.c | 8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+diff --git a/sound/soc/codecs/wm8962.c b/sound/soc/codecs/wm8962.c
+index 21574447650c..57aeded978c2 100644
+--- a/sound/soc/codecs/wm8962.c
++++ b/sound/soc/codecs/wm8962.c
+@@ -2489,6 +2489,14 @@ static void wm8962_configure_bclk(struct snd_soc_component *component)
+               snd_soc_component_update_bits(component, WM8962_CLOCKING2,
+                               WM8962_SYSCLK_ENA_MASK, WM8962_SYSCLK_ENA);
++      /* DSPCLK_DIV field in WM8962_CLOCKING1 register is used to generate
++       * correct frequency of LRCLK and BCLK. Sometimes the read-only value
++       * can't be updated timely after enabling SYSCLK. This results in wrong
++       * calculation values. Delay is introduced here to wait for newest
++       * value from register. The time of the delay should be at least
++       * 500~1000us according to test.
++       */
++      usleep_range(500, 1000);
+       dspclk = snd_soc_component_read(component, WM8962_CLOCKING1);
+       if (snd_soc_component_get_bias_level(component) != SND_SOC_BIAS_ON)
+-- 
+2.35.1
+
diff --git a/queue-5.10/btrfs-send-avoid-unaligned-encoded-writes-when-attem.patch b/queue-5.10/btrfs-send-avoid-unaligned-encoded-writes-when-attem.patch
new file mode 100644 (file)
index 0000000..9799427
--- /dev/null
@@ -0,0 +1,161 @@
+From 6c94634e8e0ed4750f2342a16b20fd351585fc7f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 15 Nov 2022 16:29:44 +0000
+Subject: btrfs: send: avoid unaligned encoded writes when attempting to clone
+ range
+
+From: Filipe Manana <fdmanana@suse.com>
+
+[ Upstream commit a11452a3709e217492798cf3686ac2cc8eb3fb51 ]
+
+When trying to see if we can clone a file range, there are cases where we
+end up sending two write operations in case the inode from the source root
+has an i_size that is not sector size aligned and the length from the
+current offset to its i_size is less than the remaining length we are
+trying to clone.
+
+Issuing two write operations when we could instead issue a single write
+operation is not incorrect. However it is not optimal, specially if the
+extents are compressed and the flag BTRFS_SEND_FLAG_COMPRESSED was passed
+to the send ioctl. In that case we can end up sending an encoded write
+with an offset that is not sector size aligned, which makes the receiver
+fallback to decompressing the data and writing it using regular buffered
+IO (so re-compressing the data in case the fs is mounted with compression
+enabled), because encoded writes fail with -EINVAL when an offset is not
+sector size aligned.
+
+The following example, which triggered a bug in the receiver code for the
+fallback logic of decompressing + regular buffer IO and is fixed by the
+patchset referred in a Link at the bottom of this changelog, is an example
+where we have the non-optimal behaviour due to an unaligned encoded write:
+
+   $ cat test.sh
+   #!/bin/bash
+
+   DEV=/dev/sdj
+   MNT=/mnt/sdj
+
+   mkfs.btrfs -f $DEV > /dev/null
+   mount -o compress $DEV $MNT
+
+   # File foo has a size of 33K, not aligned to the sector size.
+   xfs_io -f -c "pwrite -S 0xab 0 33K" $MNT/foo
+
+   xfs_io -f -c "pwrite -S 0xcd 0 64K" $MNT/bar
+
+   # Now clone the first 32K of file bar into foo at offset 0.
+   xfs_io -c "reflink $MNT/bar 0 0 32K" $MNT/foo
+
+   # Snapshot the default subvolume and create a full send stream (v2).
+   btrfs subvolume snapshot -r $MNT $MNT/snap
+
+   btrfs send --compressed-data -f /tmp/test.send $MNT/snap
+
+   echo -e "\nFile bar in the original filesystem:"
+   od -A d -t x1 $MNT/snap/bar
+
+   umount $MNT
+   mkfs.btrfs -f $DEV > /dev/null
+   mount $DEV $MNT
+
+   echo -e "\nReceiving stream in a new filesystem..."
+   btrfs receive -f /tmp/test.send $MNT
+
+   echo -e "\nFile bar in the new filesystem:"
+   od -A d -t x1 $MNT/snap/bar
+
+   umount $MNT
+
+Before this patch, the send stream included one regular write and one
+encoded write for file 'bar', with the later being not sector size aligned
+and causing the receiver to fallback to decompression + buffered writes.
+The output of the btrfs receive command in verbose mode (-vvv):
+
+   (...)
+   mkfile o258-7-0
+   rename o258-7-0 -> bar
+   utimes
+   clone bar - source=foo source offset=0 offset=0 length=32768
+   write bar - offset=32768 length=1024
+   encoded_write bar - offset=33792, len=4096, unencoded_offset=33792, unencoded_file_len=31744, unencoded_len=65536, compression=1, encryption=0
+   encoded_write bar - falling back to decompress and write due to errno 22 ("Invalid argument")
+   (...)
+
+This patch avoids the regular write followed by an unaligned encoded write
+so that we end up sending a single encoded write that is aligned. So after
+this patch the stream content is (output of btrfs receive -vvv):
+
+   (...)
+   mkfile o258-7-0
+   rename o258-7-0 -> bar
+   utimes
+   clone bar - source=foo source offset=0 offset=0 length=32768
+   encoded_write bar - offset=32768, len=4096, unencoded_offset=32768, unencoded_file_len=32768, unencoded_len=65536, compression=1, encryption=0
+   (...)
+
+So we get more optimal behaviour and avoid the silent data loss bug in
+versions of btrfs-progs affected by the bug referred by the Link tag
+below (btrfs-progs v5.19, v5.19.1, v6.0 and v6.0.1).
+
+Link: https://lore.kernel.org/linux-btrfs/cover.1668529099.git.fdmanana@suse.com/
+Reviewed-by: Boris Burkov <boris@bur.io>
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/send.c | 24 +++++++++++++++++++++++-
+ 1 file changed, 23 insertions(+), 1 deletion(-)
+
+diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
+index 6b80dee17f49..4a6ba0997e39 100644
+--- a/fs/btrfs/send.c
++++ b/fs/btrfs/send.c
+@@ -5398,6 +5398,7 @@ static int clone_range(struct send_ctx *sctx,
+               u64 ext_len;
+               u64 clone_len;
+               u64 clone_data_offset;
++              bool crossed_src_i_size = false;
+               if (slot >= btrfs_header_nritems(leaf)) {
+                       ret = btrfs_next_leaf(clone_root->root, path);
+@@ -5454,8 +5455,10 @@ static int clone_range(struct send_ctx *sctx,
+               if (key.offset >= clone_src_i_size)
+                       break;
+-              if (key.offset + ext_len > clone_src_i_size)
++              if (key.offset + ext_len > clone_src_i_size) {
+                       ext_len = clone_src_i_size - key.offset;
++                      crossed_src_i_size = true;
++              }
+               clone_data_offset = btrfs_file_extent_offset(leaf, ei);
+               if (btrfs_file_extent_disk_bytenr(leaf, ei) == disk_byte) {
+@@ -5515,6 +5518,25 @@ static int clone_range(struct send_ctx *sctx,
+                               ret = send_clone(sctx, offset, clone_len,
+                                                clone_root);
+                       }
++              } else if (crossed_src_i_size && clone_len < len) {
++                      /*
++                       * If we are at i_size of the clone source inode and we
++                       * can not clone from it, terminate the loop. This is
++                       * to avoid sending two write operations, one with a
++                       * length matching clone_len and the final one after
++                       * this loop with a length of len - clone_len.
++                       *
++                       * When using encoded writes (BTRFS_SEND_FLAG_COMPRESSED
++                       * was passed to the send ioctl), this helps avoid
++                       * sending an encoded write for an offset that is not
++                       * sector size aligned, in case the i_size of the source
++                       * inode is not sector size aligned. That will make the
++                       * receiver fallback to decompression of the data and
++                       * writing it using regular buffered IO, therefore while
++                       * not incorrect, it's not optimal due decompression and
++                       * possible re-compression at the receiver.
++                       */
++                      break;
+               } else {
+                       ret = send_extent_data(sctx, offset, clone_len);
+               }
+-- 
+2.35.1
+
diff --git a/queue-5.10/fbcon-use-kzalloc-in-fbcon_prepare_logo.patch b/queue-5.10/fbcon-use-kzalloc-in-fbcon_prepare_logo.patch
new file mode 100644 (file)
index 0000000..c1a101f
--- /dev/null
@@ -0,0 +1,93 @@
+From 23441e9af6d7908d5a0414e955ccd881e14d00d2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 18 Nov 2022 00:27:58 +0900
+Subject: fbcon: Use kzalloc() in fbcon_prepare_logo()
+
+From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
+
+[ Upstream commit a6a00d7e8ffd78d1cdb7a43f1278f081038c638f ]
+
+A kernel built with syzbot's config file reported that
+
+  scr_memcpyw(q, save, array3_size(logo_lines, new_cols, 2))
+
+causes uninitialized "save" to be copied.
+
+  ----------
+  [drm] Initialized vgem 1.0.0 20120112 for vgem on minor 0
+  [drm] Initialized vkms 1.0.0 20180514 for vkms on minor 1
+  Console: switching to colour frame buffer device 128x48
+  =====================================================
+  BUG: KMSAN: uninit-value in do_update_region+0x4b8/0xba0
+   do_update_region+0x4b8/0xba0
+   update_region+0x40d/0x840
+   fbcon_switch+0x3364/0x35e0
+   redraw_screen+0xae3/0x18a0
+   do_bind_con_driver+0x1cb3/0x1df0
+   do_take_over_console+0x11cb/0x13f0
+   fbcon_fb_registered+0xacc/0xfd0
+   register_framebuffer+0x1179/0x1320
+   __drm_fb_helper_initial_config_and_unlock+0x23ad/0x2b40
+   drm_fbdev_client_hotplug+0xbea/0xda0
+   drm_fbdev_generic_setup+0x65e/0x9d0
+   vkms_init+0x9f3/0xc76
+   (...snipped...)
+
+  Uninit was stored to memory at:
+   fbcon_prepare_logo+0x143b/0x1940
+   fbcon_init+0x2c1b/0x31c0
+   visual_init+0x3e7/0x820
+   do_bind_con_driver+0x14a4/0x1df0
+   do_take_over_console+0x11cb/0x13f0
+   fbcon_fb_registered+0xacc/0xfd0
+   register_framebuffer+0x1179/0x1320
+   __drm_fb_helper_initial_config_and_unlock+0x23ad/0x2b40
+   drm_fbdev_client_hotplug+0xbea/0xda0
+   drm_fbdev_generic_setup+0x65e/0x9d0
+   vkms_init+0x9f3/0xc76
+   (...snipped...)
+
+  Uninit was created at:
+   __kmem_cache_alloc_node+0xb69/0x1020
+   __kmalloc+0x379/0x680
+   fbcon_prepare_logo+0x704/0x1940
+   fbcon_init+0x2c1b/0x31c0
+   visual_init+0x3e7/0x820
+   do_bind_con_driver+0x14a4/0x1df0
+   do_take_over_console+0x11cb/0x13f0
+   fbcon_fb_registered+0xacc/0xfd0
+   register_framebuffer+0x1179/0x1320
+   __drm_fb_helper_initial_config_and_unlock+0x23ad/0x2b40
+   drm_fbdev_client_hotplug+0xbea/0xda0
+   drm_fbdev_generic_setup+0x65e/0x9d0
+   vkms_init+0x9f3/0xc76
+   (...snipped...)
+
+  CPU: 2 PID: 1 Comm: swapper/0 Not tainted 6.1.0-rc4-00356-g8f2975c2bb4c #924
+  Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006
+  ----------
+
+Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
+Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
+Link: https://patchwork.freedesktop.org/patch/msgid/cad03d25-0ea0-32c4-8173-fd1895314bce@I-love.SAKURA.ne.jp
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/video/fbdev/core/fbcon.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/video/fbdev/core/fbcon.c b/drivers/video/fbdev/core/fbcon.c
+index 2618d3beef64..27828435dd4f 100644
+--- a/drivers/video/fbdev/core/fbcon.c
++++ b/drivers/video/fbdev/core/fbcon.c
+@@ -609,7 +609,7 @@ static void fbcon_prepare_logo(struct vc_data *vc, struct fb_info *info,
+               if (scr_readw(r) != vc->vc_video_erase_char)
+                       break;
+       if (r != q && new_rows >= rows + logo_lines) {
+-              save = kmalloc(array3_size(logo_lines, new_cols, 2),
++              save = kzalloc(array3_size(logo_lines, new_cols, 2),
+                              GFP_KERNEL);
+               if (save) {
+                       int i = cols < new_cols ? cols : new_cols;
+-- 
+2.35.1
+
diff --git a/queue-5.10/media-videobuf2-core-take-mmap_lock-in-vb2_get_unmap.patch b/queue-5.10/media-videobuf2-core-take-mmap_lock-in-vb2_get_unmap.patch
new file mode 100644 (file)
index 0000000..a357c01
--- /dev/null
@@ -0,0 +1,265 @@
+From e5e26144a62a3037222e7eb80ffe8ed2d249ca26 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 7 Dec 2022 13:04:34 +0000
+Subject: media: videobuf2-core: take mmap_lock in vb2_get_unmapped_area()
+
+From: Hans Verkuil <hverkuil-cisco@xs4all.nl>
+
+[ Upstream commit 098e5edc5d048a8df8691fd9fde895af100be42b ]
+
+While vb2_mmap took the mmap_lock mutex, vb2_get_unmapped_area didn't.
+Add this.
+
+Also take this opportunity to move the 'q->memory != VB2_MEMORY_MMAP'
+check and vb2_fileio_is_active() check into __find_plane_by_offset() so
+both vb2_mmap and vb2_get_unmapped_area do the same checks.
+
+Since q->memory is checked while mmap_lock is held, also take that lock
+in reqbufs and create_bufs when it is set, and set it back to
+MEMORY_UNKNOWN on error.
+
+Fixes: f035eb4e976e ("[media] videobuf2: fix lockdep warning")
+Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
+Acked-by: Tomasz Figa <tfiga@chromium.org>
+Reviewed-by: Ricardo Ribalda <ribalda@chromium.org>
+Signed-off-by: Mauro Carvalho Chehab <mchehab@kernel.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../media/common/videobuf2/videobuf2-core.c   | 102 +++++++++++++-----
+ 1 file changed, 73 insertions(+), 29 deletions(-)
+
+diff --git a/drivers/media/common/videobuf2/videobuf2-core.c b/drivers/media/common/videobuf2/videobuf2-core.c
+index 72350343a56a..3bafde87a125 100644
+--- a/drivers/media/common/videobuf2/videobuf2-core.c
++++ b/drivers/media/common/videobuf2/videobuf2-core.c
+@@ -787,7 +787,13 @@ int vb2_core_reqbufs(struct vb2_queue *q, enum vb2_memory memory,
+       num_buffers = max_t(unsigned int, *count, q->min_buffers_needed);
+       num_buffers = min_t(unsigned int, num_buffers, VB2_MAX_FRAME);
+       memset(q->alloc_devs, 0, sizeof(q->alloc_devs));
++      /*
++       * Set this now to ensure that drivers see the correct q->memory value
++       * in the queue_setup op.
++       */
++      mutex_lock(&q->mmap_lock);
+       q->memory = memory;
++      mutex_unlock(&q->mmap_lock);
+       /*
+        * Ask the driver how many buffers and planes per buffer it requires.
+@@ -796,22 +802,27 @@ int vb2_core_reqbufs(struct vb2_queue *q, enum vb2_memory memory,
+       ret = call_qop(q, queue_setup, q, &num_buffers, &num_planes,
+                      plane_sizes, q->alloc_devs);
+       if (ret)
+-              return ret;
++              goto error;
+       /* Check that driver has set sane values */
+-      if (WARN_ON(!num_planes))
+-              return -EINVAL;
++      if (WARN_ON(!num_planes)) {
++              ret = -EINVAL;
++              goto error;
++      }
+       for (i = 0; i < num_planes; i++)
+-              if (WARN_ON(!plane_sizes[i]))
+-                      return -EINVAL;
++              if (WARN_ON(!plane_sizes[i])) {
++                      ret = -EINVAL;
++                      goto error;
++              }
+       /* Finally, allocate buffers and video memory */
+       allocated_buffers =
+               __vb2_queue_alloc(q, memory, num_buffers, num_planes, plane_sizes);
+       if (allocated_buffers == 0) {
+               dprintk(q, 1, "memory allocation failed\n");
+-              return -ENOMEM;
++              ret = -ENOMEM;
++              goto error;
+       }
+       /*
+@@ -852,7 +863,8 @@ int vb2_core_reqbufs(struct vb2_queue *q, enum vb2_memory memory,
+       if (ret < 0) {
+               /*
+                * Note: __vb2_queue_free() will subtract 'allocated_buffers'
+-               * from q->num_buffers.
++               * from q->num_buffers and it will reset q->memory to
++               * VB2_MEMORY_UNKNOWN.
+                */
+               __vb2_queue_free(q, allocated_buffers);
+               mutex_unlock(&q->mmap_lock);
+@@ -868,6 +880,12 @@ int vb2_core_reqbufs(struct vb2_queue *q, enum vb2_memory memory,
+       q->waiting_for_buffers = !q->is_output;
+       return 0;
++
++error:
++      mutex_lock(&q->mmap_lock);
++      q->memory = VB2_MEMORY_UNKNOWN;
++      mutex_unlock(&q->mmap_lock);
++      return ret;
+ }
+ EXPORT_SYMBOL_GPL(vb2_core_reqbufs);
+@@ -878,6 +896,7 @@ int vb2_core_create_bufs(struct vb2_queue *q, enum vb2_memory memory,
+ {
+       unsigned int num_planes = 0, num_buffers, allocated_buffers;
+       unsigned plane_sizes[VB2_MAX_PLANES] = { };
++      bool no_previous_buffers = !q->num_buffers;
+       int ret;
+       if (q->num_buffers == VB2_MAX_FRAME) {
+@@ -885,13 +904,19 @@ int vb2_core_create_bufs(struct vb2_queue *q, enum vb2_memory memory,
+               return -ENOBUFS;
+       }
+-      if (!q->num_buffers) {
++      if (no_previous_buffers) {
+               if (q->waiting_in_dqbuf && *count) {
+                       dprintk(q, 1, "another dup()ped fd is waiting for a buffer\n");
+                       return -EBUSY;
+               }
+               memset(q->alloc_devs, 0, sizeof(q->alloc_devs));
++              /*
++               * Set this now to ensure that drivers see the correct q->memory
++               * value in the queue_setup op.
++               */
++              mutex_lock(&q->mmap_lock);
+               q->memory = memory;
++              mutex_unlock(&q->mmap_lock);
+               q->waiting_for_buffers = !q->is_output;
+       } else {
+               if (q->memory != memory) {
+@@ -914,14 +939,15 @@ int vb2_core_create_bufs(struct vb2_queue *q, enum vb2_memory memory,
+       ret = call_qop(q, queue_setup, q, &num_buffers,
+                      &num_planes, plane_sizes, q->alloc_devs);
+       if (ret)
+-              return ret;
++              goto error;
+       /* Finally, allocate buffers and video memory */
+       allocated_buffers = __vb2_queue_alloc(q, memory, num_buffers,
+                               num_planes, plane_sizes);
+       if (allocated_buffers == 0) {
+               dprintk(q, 1, "memory allocation failed\n");
+-              return -ENOMEM;
++              ret = -ENOMEM;
++              goto error;
+       }
+       /*
+@@ -952,7 +978,8 @@ int vb2_core_create_bufs(struct vb2_queue *q, enum vb2_memory memory,
+       if (ret < 0) {
+               /*
+                * Note: __vb2_queue_free() will subtract 'allocated_buffers'
+-               * from q->num_buffers.
++               * from q->num_buffers and it will reset q->memory to
++               * VB2_MEMORY_UNKNOWN.
+                */
+               __vb2_queue_free(q, allocated_buffers);
+               mutex_unlock(&q->mmap_lock);
+@@ -967,6 +994,14 @@ int vb2_core_create_bufs(struct vb2_queue *q, enum vb2_memory memory,
+       *count = allocated_buffers;
+       return 0;
++
++error:
++      if (no_previous_buffers) {
++              mutex_lock(&q->mmap_lock);
++              q->memory = VB2_MEMORY_UNKNOWN;
++              mutex_unlock(&q->mmap_lock);
++      }
++      return ret;
+ }
+ EXPORT_SYMBOL_GPL(vb2_core_create_bufs);
+@@ -2120,6 +2155,22 @@ static int __find_plane_by_offset(struct vb2_queue *q, unsigned long off,
+       struct vb2_buffer *vb;
+       unsigned int buffer, plane;
++      /*
++       * Sanity checks to ensure the lock is held, MEMORY_MMAP is
++       * used and fileio isn't active.
++       */
++      lockdep_assert_held(&q->mmap_lock);
++
++      if (q->memory != VB2_MEMORY_MMAP) {
++              dprintk(q, 1, "queue is not currently set up for mmap\n");
++              return -EINVAL;
++      }
++
++      if (vb2_fileio_is_active(q)) {
++              dprintk(q, 1, "file io in progress\n");
++              return -EBUSY;
++      }
++
+       /*
+        * Go over all buffers and their planes, comparing the given offset
+        * with an offset assigned to each plane. If a match is found,
+@@ -2219,11 +2270,6 @@ int vb2_mmap(struct vb2_queue *q, struct vm_area_struct *vma)
+       int ret;
+       unsigned long length;
+-      if (q->memory != VB2_MEMORY_MMAP) {
+-              dprintk(q, 1, "queue is not currently set up for mmap\n");
+-              return -EINVAL;
+-      }
+-
+       /*
+        * Check memory area access mode.
+        */
+@@ -2245,14 +2291,9 @@ int vb2_mmap(struct vb2_queue *q, struct vm_area_struct *vma)
+       mutex_lock(&q->mmap_lock);
+-      if (vb2_fileio_is_active(q)) {
+-              dprintk(q, 1, "mmap: file io in progress\n");
+-              ret = -EBUSY;
+-              goto unlock;
+-      }
+-
+       /*
+-       * Find the plane corresponding to the offset passed by userspace.
++       * Find the plane corresponding to the offset passed by userspace. This
++       * will return an error if not MEMORY_MMAP or file I/O is in progress.
+        */
+       ret = __find_plane_by_offset(q, off, &buffer, &plane);
+       if (ret)
+@@ -2305,22 +2346,25 @@ unsigned long vb2_get_unmapped_area(struct vb2_queue *q,
+       void *vaddr;
+       int ret;
+-      if (q->memory != VB2_MEMORY_MMAP) {
+-              dprintk(q, 1, "queue is not currently set up for mmap\n");
+-              return -EINVAL;
+-      }
++      mutex_lock(&q->mmap_lock);
+       /*
+-       * Find the plane corresponding to the offset passed by userspace.
++       * Find the plane corresponding to the offset passed by userspace. This
++       * will return an error if not MEMORY_MMAP or file I/O is in progress.
+        */
+       ret = __find_plane_by_offset(q, off, &buffer, &plane);
+       if (ret)
+-              return ret;
++              goto unlock;
+       vb = q->bufs[buffer];
+       vaddr = vb2_plane_vaddr(vb, plane);
++      mutex_unlock(&q->mmap_lock);
+       return vaddr ? (unsigned long)vaddr : -EINVAL;
++
++unlock:
++      mutex_unlock(&q->mmap_lock);
++      return ret;
+ }
+ EXPORT_SYMBOL_GPL(vb2_get_unmapped_area);
+ #endif
+-- 
+2.35.1
+
diff --git a/queue-5.10/mm-__isolate_lru_page_prepare-in-isolate_migratepage.patch b/queue-5.10/mm-__isolate_lru_page_prepare-in-isolate_migratepage.patch
new file mode 100644 (file)
index 0000000..2a42e3a
--- /dev/null
@@ -0,0 +1,318 @@
+From 60accdd3d3a54a9d28d0f2d39ec740df38c167fa Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 22 Mar 2022 14:45:41 -0700
+Subject: mm: __isolate_lru_page_prepare() in isolate_migratepages_block()
+
+From: Hugh Dickins <hughd@google.com>
+
+[ Upstream commit 89f6c88a6ab4a11deb14c270f7f1454cda4f73d6 ]
+
+__isolate_lru_page_prepare() conflates two unrelated functions, with the
+flags to one disjoint from the flags to the other; and hides some of the
+important checks outside of isolate_migratepages_block(), where the
+sequence is better to be visible.  It comes from the days of lumpy
+reclaim, before compaction, when the combination made more sense.
+
+Move what's needed by mm/compaction.c isolate_migratepages_block() inline
+there, and what's needed by mm/vmscan.c isolate_lru_pages() inline there.
+
+Shorten "isolate_mode" to "mode", so the sequence of conditions is easier
+to read.  Declare a "mapping" variable, to save one call to page_mapping()
+(but not another: calling again after page is locked is necessary).
+Simplify isolate_lru_pages() with a "move_to" list pointer.
+
+Link: https://lkml.kernel.org/r/879d62a8-91cc-d3c6-fb3b-69768236df68@google.com
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Acked-by: David Rientjes <rientjes@google.com>
+Reviewed-by: Alex Shi <alexs@kernel.org>
+Cc: Alexander Duyck <alexander.duyck@gmail.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Stable-dep-of: 829ae0f81ce0 ("mm: migrate: fix THP's mapcount on isolation")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/swap.h |   1 -
+ mm/compaction.c      |  51 +++++++++++++++++++---
+ mm/vmscan.c          | 101 ++++++++-----------------------------------
+ 3 files changed, 62 insertions(+), 91 deletions(-)
+
+diff --git a/include/linux/swap.h b/include/linux/swap.h
+index 394d5de5d4b4..a502928c29c5 100644
+--- a/include/linux/swap.h
++++ b/include/linux/swap.h
+@@ -358,7 +358,6 @@ extern void lru_cache_add_inactive_or_unevictable(struct page *page,
+ extern unsigned long zone_reclaimable_pages(struct zone *zone);
+ extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
+                                       gfp_t gfp_mask, nodemask_t *mask);
+-extern bool __isolate_lru_page_prepare(struct page *page, isolate_mode_t mode);
+ extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
+                                                 unsigned long nr_pages,
+                                                 gfp_t gfp_mask,
+diff --git a/mm/compaction.c b/mm/compaction.c
+index ea46aadc7c21..57ce6b001b10 100644
+--- a/mm/compaction.c
++++ b/mm/compaction.c
+@@ -784,7 +784,7 @@ static bool too_many_isolated(pg_data_t *pgdat)
+  * @cc:               Compaction control structure.
+  * @low_pfn:  The first PFN to isolate
+  * @end_pfn:  The one-past-the-last PFN to isolate, within same pageblock
+- * @isolate_mode: Isolation mode to be used.
++ * @mode:     Isolation mode to be used.
+  *
+  * Isolate all pages that can be migrated from the range specified by
+  * [low_pfn, end_pfn). The range is expected to be within same pageblock.
+@@ -798,7 +798,7 @@ static bool too_many_isolated(pg_data_t *pgdat)
+  */
+ static unsigned long
+ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
+-                      unsigned long end_pfn, isolate_mode_t isolate_mode)
++                      unsigned long end_pfn, isolate_mode_t mode)
+ {
+       pg_data_t *pgdat = cc->zone->zone_pgdat;
+       unsigned long nr_scanned = 0, nr_isolated = 0;
+@@ -806,6 +806,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
+       unsigned long flags = 0;
+       bool locked = false;
+       struct page *page = NULL, *valid_page = NULL;
++      struct address_space *mapping;
+       unsigned long start_pfn = low_pfn;
+       bool skip_on_failure = false;
+       unsigned long next_skip_pfn = 0;
+@@ -949,7 +950,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
+                                       locked = false;
+                               }
+-                              if (!isolate_movable_page(page, isolate_mode))
++                              if (!isolate_movable_page(page, mode))
+                                       goto isolate_success;
+                       }
+@@ -961,15 +962,15 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
+                * so avoid taking lru_lock and isolating it unnecessarily in an
+                * admittedly racy check.
+                */
+-              if (!page_mapping(page) &&
+-                  page_count(page) > page_mapcount(page))
++              mapping = page_mapping(page);
++              if (!mapping && page_count(page) > page_mapcount(page))
+                       goto isolate_fail;
+               /*
+                * Only allow to migrate anonymous pages in GFP_NOFS context
+                * because those do not depend on fs locks.
+                */
+-              if (!(cc->gfp_mask & __GFP_FS) && page_mapping(page))
++              if (!(cc->gfp_mask & __GFP_FS) && mapping)
+                       goto isolate_fail;
+               /*
+@@ -980,9 +981,45 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
+               if (unlikely(!get_page_unless_zero(page)))
+                       goto isolate_fail;
+-              if (!__isolate_lru_page_prepare(page, isolate_mode))
++              /* Only take pages on LRU: a check now makes later tests safe */
++              if (!PageLRU(page))
++                      goto isolate_fail_put;
++
++              /* Compaction might skip unevictable pages but CMA takes them */
++              if (!(mode & ISOLATE_UNEVICTABLE) && PageUnevictable(page))
++                      goto isolate_fail_put;
++
++              /*
++               * To minimise LRU disruption, the caller can indicate with
++               * ISOLATE_ASYNC_MIGRATE that it only wants to isolate pages
++               * it will be able to migrate without blocking - clean pages
++               * for the most part.  PageWriteback would require blocking.
++               */
++              if ((mode & ISOLATE_ASYNC_MIGRATE) && PageWriteback(page))
+                       goto isolate_fail_put;
++              if ((mode & ISOLATE_ASYNC_MIGRATE) && PageDirty(page)) {
++                      bool migrate_dirty;
++
++                      /*
++                       * Only pages without mappings or that have a
++                       * ->migratepage callback are possible to migrate
++                       * without blocking. However, we can be racing with
++                       * truncation so it's necessary to lock the page
++                       * to stabilise the mapping as truncation holds
++                       * the page lock until after the page is removed
++                       * from the page cache.
++                       */
++                      if (!trylock_page(page))
++                              goto isolate_fail_put;
++
++                      mapping = page_mapping(page);
++                      migrate_dirty = !mapping || mapping->a_ops->migratepage;
++                      unlock_page(page);
++                      if (!migrate_dirty)
++                              goto isolate_fail_put;
++              }
++
+               /* Try isolate the page */
+               if (!TestClearPageLRU(page))
+                       goto isolate_fail_put;
+diff --git a/mm/vmscan.c b/mm/vmscan.c
+index 00a47845a15b..9cba0f890b33 100644
+--- a/mm/vmscan.c
++++ b/mm/vmscan.c
+@@ -1535,69 +1535,6 @@ unsigned int reclaim_clean_pages_from_list(struct zone *zone,
+       return nr_reclaimed;
+ }
+-/*
+- * Attempt to remove the specified page from its LRU.  Only take this page
+- * if it is of the appropriate PageActive status.  Pages which are being
+- * freed elsewhere are also ignored.
+- *
+- * page:      page to consider
+- * mode:      one of the LRU isolation modes defined above
+- *
+- * returns true on success, false on failure.
+- */
+-bool __isolate_lru_page_prepare(struct page *page, isolate_mode_t mode)
+-{
+-      /* Only take pages on the LRU. */
+-      if (!PageLRU(page))
+-              return false;
+-
+-      /* Compaction should not handle unevictable pages but CMA can do so */
+-      if (PageUnevictable(page) && !(mode & ISOLATE_UNEVICTABLE))
+-              return false;
+-
+-      /*
+-       * To minimise LRU disruption, the caller can indicate that it only
+-       * wants to isolate pages it will be able to operate on without
+-       * blocking - clean pages for the most part.
+-       *
+-       * ISOLATE_ASYNC_MIGRATE is used to indicate that it only wants to pages
+-       * that it is possible to migrate without blocking
+-       */
+-      if (mode & ISOLATE_ASYNC_MIGRATE) {
+-              /* All the caller can do on PageWriteback is block */
+-              if (PageWriteback(page))
+-                      return false;
+-
+-              if (PageDirty(page)) {
+-                      struct address_space *mapping;
+-                      bool migrate_dirty;
+-
+-                      /*
+-                       * Only pages without mappings or that have a
+-                       * ->migratepage callback are possible to migrate
+-                       * without blocking. However, we can be racing with
+-                       * truncation so it's necessary to lock the page
+-                       * to stabilise the mapping as truncation holds
+-                       * the page lock until after the page is removed
+-                       * from the page cache.
+-                       */
+-                      if (!trylock_page(page))
+-                              return false;
+-
+-                      mapping = page_mapping(page);
+-                      migrate_dirty = !mapping || mapping->a_ops->migratepage;
+-                      unlock_page(page);
+-                      if (!migrate_dirty)
+-                              return false;
+-              }
+-      }
+-
+-      if ((mode & ISOLATE_UNMAPPED) && page_mapped(page))
+-              return false;
+-
+-      return true;
+-}
+-
+ /*
+  * Update LRU sizes after isolating pages. The LRU size updates must
+  * be complete before mem_cgroup_update_lru_size due to a sanity check.
+@@ -1647,11 +1584,11 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
+       unsigned long skipped = 0;
+       unsigned long scan, total_scan, nr_pages;
+       LIST_HEAD(pages_skipped);
+-      isolate_mode_t mode = (sc->may_unmap ? 0 : ISOLATE_UNMAPPED);
+       total_scan = 0;
+       scan = 0;
+       while (scan < nr_to_scan && !list_empty(src)) {
++              struct list_head *move_to = src;
+               struct page *page;
+               page = lru_to_page(src);
+@@ -1661,9 +1598,9 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
+               total_scan += nr_pages;
+               if (page_zonenum(page) > sc->reclaim_idx) {
+-                      list_move(&page->lru, &pages_skipped);
+                       nr_skipped[page_zonenum(page)] += nr_pages;
+-                      continue;
++                      move_to = &pages_skipped;
++                      goto move;
+               }
+               /*
+@@ -1671,37 +1608,34 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
+                * return with no isolated pages if the LRU mostly contains
+                * ineligible pages.  This causes the VM to not reclaim any
+                * pages, triggering a premature OOM.
+-               *
+-               * Account all tail pages of THP.  This would not cause
+-               * premature OOM since __isolate_lru_page() returns -EBUSY
+-               * only when the page is being freed somewhere else.
++               * Account all tail pages of THP.
+                */
+               scan += nr_pages;
+-              if (!__isolate_lru_page_prepare(page, mode)) {
+-                      /* It is being freed elsewhere */
+-                      list_move(&page->lru, src);
+-                      continue;
+-              }
++
++              if (!PageLRU(page))
++                      goto move;
++              if (!sc->may_unmap && page_mapped(page))
++                      goto move;
++
+               /*
+                * Be careful not to clear PageLRU until after we're
+                * sure the page is not being freed elsewhere -- the
+                * page release code relies on it.
+                */
+-              if (unlikely(!get_page_unless_zero(page))) {
+-                      list_move(&page->lru, src);
+-                      continue;
+-              }
++              if (unlikely(!get_page_unless_zero(page)))
++                      goto move;
+               if (!TestClearPageLRU(page)) {
+                       /* Another thread is already isolating this page */
+                       put_page(page);
+-                      list_move(&page->lru, src);
+-                      continue;
++                      goto move;
+               }
+               nr_taken += nr_pages;
+               nr_zone_taken[page_zonenum(page)] += nr_pages;
+-              list_move(&page->lru, dst);
++              move_to = dst;
++move:
++              list_move(&page->lru, move_to);
+       }
+       /*
+@@ -1725,7 +1659,8 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
+       }
+       *nr_scanned = total_scan;
+       trace_mm_vmscan_lru_isolate(sc->reclaim_idx, sc->order, nr_to_scan,
+-                                  total_scan, skipped, nr_taken, mode, lru);
++                                  total_scan, skipped, nr_taken,
++                                  sc->may_unmap ? 0 : ISOLATE_UNMAPPED, lru);
+       update_lru_sizes(lruvec, lru, nr_zone_taken);
+       return nr_taken;
+ }
+-- 
+2.35.1
+
diff --git a/queue-5.10/mm-compaction-do-page-isolation-first-in-compaction.patch b/queue-5.10/mm-compaction-do-page-isolation-first-in-compaction.patch
new file mode 100644 (file)
index 0000000..45d5ebf
--- /dev/null
@@ -0,0 +1,266 @@
+From ef33d369381db1a4df5a9f474b014cc4e4664606 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 15 Dec 2020 12:34:20 -0800
+Subject: mm/compaction: do page isolation first in compaction
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Alex Shi <alex.shi@linux.alibaba.com>
+
+[ Upstream commit 9df41314390b81a541ca6e84c8340bad0959e4b5 ]
+
+Currently, compaction would get the lru_lock and then do page isolation
+which works fine with pgdat->lru_lock, since any page isoltion would
+compete for the lru_lock.  If we want to change to memcg lru_lock, we have
+to isolate the page before getting lru_lock, thus isoltion would block
+page's memcg change which relay on page isoltion too.  Then we could
+safely use per memcg lru_lock later.
+
+The new page isolation use previous introduced TestClearPageLRU() + pgdat
+lru locking which will be changed to memcg lru lock later.
+
+Hugh Dickins <hughd@google.com> fixed following bugs in this patch's early
+version:
+
+Fix lots of crashes under compaction load: isolate_migratepages_block()
+must clean up appropriately when rejecting a page, setting PageLRU again
+if it had been cleared; and a put_page() after get_page_unless_zero()
+cannot safely be done while holding locked_lruvec - it may turn out to be
+the final put_page(), which will take an lruvec lock when PageLRU.
+
+And move __isolate_lru_page_prepare back after get_page_unless_zero to
+make trylock_page() safe: trylock_page() is not safe to use at this time:
+its setting PG_locked can race with the page being freed or allocated
+("Bad page"), and can also erase flags being set by one of those "sole
+owners" of a freshly allocated page who use non-atomic __SetPageFlag().
+
+Link: https://lkml.kernel.org/r/1604566549-62481-16-git-send-email-alex.shi@linux.alibaba.com
+Suggested-by: Johannes Weiner <hannes@cmpxchg.org>
+Signed-off-by: Alex Shi <alex.shi@linux.alibaba.com>
+Acked-by: Hugh Dickins <hughd@google.com>
+Acked-by: Johannes Weiner <hannes@cmpxchg.org>
+Acked-by: Vlastimil Babka <vbabka@suse.cz>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: Alexander Duyck <alexander.duyck@gmail.com>
+Cc: Andrea Arcangeli <aarcange@redhat.com>
+Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
+Cc: "Chen, Rong A" <rong.a.chen@intel.com>
+Cc: Daniel Jordan <daniel.m.jordan@oracle.com>
+Cc: "Huang, Ying" <ying.huang@intel.com>
+Cc: Jann Horn <jannh@google.com>
+Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
+Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Cc: Kirill A. Shutemov <kirill@shutemov.name>
+Cc: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
+Cc: Mel Gorman <mgorman@techsingularity.net>
+Cc: Michal Hocko <mhocko@kernel.org>
+Cc: Michal Hocko <mhocko@suse.com>
+Cc: Mika Penttilä <mika.penttila@nextfour.com>
+Cc: Minchan Kim <minchan@kernel.org>
+Cc: Shakeel Butt <shakeelb@google.com>
+Cc: Tejun Heo <tj@kernel.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
+Cc: Wei Yang <richard.weiyang@gmail.com>
+Cc: Yang Shi <yang.shi@linux.alibaba.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Stable-dep-of: 829ae0f81ce0 ("mm: migrate: fix THP's mapcount on isolation")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/swap.h |  2 +-
+ mm/compaction.c      | 42 +++++++++++++++++++++++++++++++++---------
+ mm/vmscan.c          | 43 ++++++++++++++++++++++---------------------
+ 3 files changed, 56 insertions(+), 31 deletions(-)
+
+diff --git a/include/linux/swap.h b/include/linux/swap.h
+index fbc6805358da..3577d3a6ec37 100644
+--- a/include/linux/swap.h
++++ b/include/linux/swap.h
+@@ -358,7 +358,7 @@ extern void lru_cache_add_inactive_or_unevictable(struct page *page,
+ extern unsigned long zone_reclaimable_pages(struct zone *zone);
+ extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
+                                       gfp_t gfp_mask, nodemask_t *mask);
+-extern int __isolate_lru_page(struct page *page, isolate_mode_t mode);
++extern int __isolate_lru_page_prepare(struct page *page, isolate_mode_t mode);
+ extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
+                                                 unsigned long nr_pages,
+                                                 gfp_t gfp_mask,
+diff --git a/mm/compaction.c b/mm/compaction.c
+index 8dfbe86bd74f..ba3e907f03b7 100644
+--- a/mm/compaction.c
++++ b/mm/compaction.c
+@@ -890,6 +890,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
+               if (!valid_page && IS_ALIGNED(low_pfn, pageblock_nr_pages)) {
+                       if (!cc->ignore_skip_hint && get_pageblock_skip(page)) {
+                               low_pfn = end_pfn;
++                              page = NULL;
+                               goto isolate_abort;
+                       }
+                       valid_page = page;
+@@ -971,6 +972,21 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
+               if (!(cc->gfp_mask & __GFP_FS) && page_mapping(page))
+                       goto isolate_fail;
++              /*
++               * Be careful not to clear PageLRU until after we're
++               * sure the page is not being freed elsewhere -- the
++               * page release code relies on it.
++               */
++              if (unlikely(!get_page_unless_zero(page)))
++                      goto isolate_fail;
++
++              if (__isolate_lru_page_prepare(page, isolate_mode) != 0)
++                      goto isolate_fail_put;
++
++              /* Try isolate the page */
++              if (!TestClearPageLRU(page))
++                      goto isolate_fail_put;
++
+               /* If we already hold the lock, we can skip some rechecking */
+               if (!locked) {
+                       locked = compact_lock_irqsave(&pgdat->lru_lock,
+@@ -983,10 +999,6 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
+                                       goto isolate_abort;
+                       }
+-                      /* Recheck PageLRU and PageCompound under lock */
+-                      if (!PageLRU(page))
+-                              goto isolate_fail;
+-
+                       /*
+                        * Page become compound since the non-locked check,
+                        * and it's on LRU. It can only be a THP so the order
+@@ -994,16 +1006,13 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
+                        */
+                       if (unlikely(PageCompound(page) && !cc->alloc_contig)) {
+                               low_pfn += compound_nr(page) - 1;
+-                              goto isolate_fail;
++                              SetPageLRU(page);
++                              goto isolate_fail_put;
+                       }
+               }
+               lruvec = mem_cgroup_page_lruvec(page, pgdat);
+-              /* Try isolate the page */
+-              if (__isolate_lru_page(page, isolate_mode) != 0)
+-                      goto isolate_fail;
+-
+               /* The whole page is taken off the LRU; skip the tail pages. */
+               if (PageCompound(page))
+                       low_pfn += compound_nr(page) - 1;
+@@ -1032,6 +1041,15 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
+               }
+               continue;
++
++isolate_fail_put:
++              /* Avoid potential deadlock in freeing page under lru_lock */
++              if (locked) {
++                      spin_unlock_irqrestore(&pgdat->lru_lock, flags);
++                      locked = false;
++              }
++              put_page(page);
++
+ isolate_fail:
+               if (!skip_on_failure)
+                       continue;
+@@ -1068,9 +1086,15 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
+       if (unlikely(low_pfn > end_pfn))
+               low_pfn = end_pfn;
++      page = NULL;
++
+ isolate_abort:
+       if (locked)
+               spin_unlock_irqrestore(&pgdat->lru_lock, flags);
++      if (page) {
++              SetPageLRU(page);
++              put_page(page);
++      }
+       /*
+        * Updated the cached scanner pfn once the pageblock has been scanned
+diff --git a/mm/vmscan.c b/mm/vmscan.c
+index 8d62eedfc794..5ada402c8d95 100644
+--- a/mm/vmscan.c
++++ b/mm/vmscan.c
+@@ -1545,7 +1545,7 @@ unsigned int reclaim_clean_pages_from_list(struct zone *zone,
+  *
+  * returns 0 on success, -ve errno on failure.
+  */
+-int __isolate_lru_page(struct page *page, isolate_mode_t mode)
++int __isolate_lru_page_prepare(struct page *page, isolate_mode_t mode)
+ {
+       int ret = -EBUSY;
+@@ -1597,22 +1597,9 @@ int __isolate_lru_page(struct page *page, isolate_mode_t mode)
+       if ((mode & ISOLATE_UNMAPPED) && page_mapped(page))
+               return ret;
+-      if (likely(get_page_unless_zero(page))) {
+-              /*
+-               * Be careful not to clear PageLRU until after we're
+-               * sure the page is not being freed elsewhere -- the
+-               * page release code relies on it.
+-               */
+-              if (TestClearPageLRU(page))
+-                      ret = 0;
+-              else
+-                      put_page(page);
+-      }
+-
+-      return ret;
++      return 0;
+ }
+-
+ /*
+  * Update LRU sizes after isolating pages. The LRU size updates must
+  * be complete before mem_cgroup_update_lru_size due to a sanity check.
+@@ -1692,20 +1679,34 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
+                * only when the page is being freed somewhere else.
+                */
+               scan += nr_pages;
+-              switch (__isolate_lru_page(page, mode)) {
++              switch (__isolate_lru_page_prepare(page, mode)) {
+               case 0:
++                      /*
++                       * Be careful not to clear PageLRU until after we're
++                       * sure the page is not being freed elsewhere -- the
++                       * page release code relies on it.
++                       */
++                      if (unlikely(!get_page_unless_zero(page)))
++                              goto busy;
++
++                      if (!TestClearPageLRU(page)) {
++                              /*
++                               * This page may in other isolation path,
++                               * but we still hold lru_lock.
++                               */
++                              put_page(page);
++                              goto busy;
++                      }
++
+                       nr_taken += nr_pages;
+                       nr_zone_taken[page_zonenum(page)] += nr_pages;
+                       list_move(&page->lru, dst);
+                       break;
+-              case -EBUSY:
++              default:
++busy:
+                       /* else it is being freed elsewhere */
+                       list_move(&page->lru, src);
+-                      continue;
+-
+-              default:
+-                      BUG();
+               }
+       }
+-- 
+2.35.1
+
diff --git a/queue-5.10/mm-khugepaged-fix-gup-fast-interaction-by-sending-ip.patch b/queue-5.10/mm-khugepaged-fix-gup-fast-interaction-by-sending-ip.patch
new file mode 100644 (file)
index 0000000..ad4d743
--- /dev/null
@@ -0,0 +1,112 @@
+From 2b6a220c2d4c90dda4ae1ed3aa0251eb152b8825 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 6 Dec 2022 18:16:04 +0100
+Subject: mm/khugepaged: fix GUP-fast interaction by sending IPI
+
+From: Jann Horn <jannh@google.com>
+
+commit 2ba99c5e08812494bc57f319fb562f527d9bacd8 upstream.
+
+Since commit 70cbc3cc78a99 ("mm: gup: fix the fast GUP race against THP
+collapse"), the lockless_pages_from_mm() fastpath rechecks the pmd_t to
+ensure that the page table was not removed by khugepaged in between.
+
+However, lockless_pages_from_mm() still requires that the page table is
+not concurrently freed.  Fix it by sending IPIs (if the architecture uses
+semi-RCU-style page table freeing) before freeing/reusing page tables.
+
+Link: https://lkml.kernel.org/r/20221129154730.2274278-2-jannh@google.com
+Link: https://lkml.kernel.org/r/20221128180252.1684965-2-jannh@google.com
+Link: https://lkml.kernel.org/r/20221125213714.4115729-2-jannh@google.com
+Fixes: ba76149f47d8 ("thp: khugepaged")
+Signed-off-by: Jann Horn <jannh@google.com>
+Reviewed-by: Yang Shi <shy828301@gmail.com>
+Acked-by: David Hildenbrand <david@redhat.com>
+Cc: John Hubbard <jhubbard@nvidia.com>
+Cc: Peter Xu <peterx@redhat.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+[manual backport: two of the three places in khugepaged that can free
+ptes were refactored into a common helper between 5.15 and 6.0]
+Signed-off-by: Jann Horn <jannh@google.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/asm-generic/tlb.h | 4 ++++
+ mm/khugepaged.c           | 3 +++
+ mm/mmu_gather.c           | 4 +---
+ 3 files changed, 8 insertions(+), 3 deletions(-)
+
+diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h
+index a0c4b99d2899..f40c9534f20b 100644
+--- a/include/asm-generic/tlb.h
++++ b/include/asm-generic/tlb.h
+@@ -205,12 +205,16 @@ extern void tlb_remove_table(struct mmu_gather *tlb, void *table);
+ #define tlb_needs_table_invalidate() (true)
+ #endif
++void tlb_remove_table_sync_one(void);
++
+ #else
+ #ifdef tlb_needs_table_invalidate
+ #error tlb_needs_table_invalidate() requires MMU_GATHER_RCU_TABLE_FREE
+ #endif
++static inline void tlb_remove_table_sync_one(void) { }
++
+ #endif /* CONFIG_MMU_GATHER_RCU_TABLE_FREE */
+diff --git a/mm/khugepaged.c b/mm/khugepaged.c
+index 014e8b259313..0268b549bd60 100644
+--- a/mm/khugepaged.c
++++ b/mm/khugepaged.c
+@@ -1154,6 +1154,7 @@ static void collapse_huge_page(struct mm_struct *mm,
+       _pmd = pmdp_collapse_flush(vma, address, pmd);
+       spin_unlock(pmd_ptl);
+       mmu_notifier_invalidate_range_end(&range);
++      tlb_remove_table_sync_one();
+       spin_lock(pte_ptl);
+       isolated = __collapse_huge_page_isolate(vma, address, pte,
+@@ -1538,6 +1539,7 @@ void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr)
+       /* step 4: collapse pmd */
+       _pmd = pmdp_collapse_flush(vma, haddr, pmd);
+       mm_dec_nr_ptes(mm);
++      tlb_remove_table_sync_one();
+       pte_free(mm, pmd_pgtable(_pmd));
+       i_mmap_unlock_write(vma->vm_file->f_mapping);
+@@ -1625,6 +1627,7 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff)
+                               /* assume page table is clear */
+                               _pmd = pmdp_collapse_flush(vma, addr, pmd);
+                               mm_dec_nr_ptes(mm);
++                              tlb_remove_table_sync_one();
+                               pte_free(mm, pmd_pgtable(_pmd));
+                       }
+                       mmap_write_unlock(mm);
+diff --git a/mm/mmu_gather.c b/mm/mmu_gather.c
+index 03c33c93a582..205fdbb5792a 100644
+--- a/mm/mmu_gather.c
++++ b/mm/mmu_gather.c
+@@ -139,7 +139,7 @@ static void tlb_remove_table_smp_sync(void *arg)
+       /* Simply deliver the interrupt */
+ }
+-static void tlb_remove_table_sync_one(void)
++void tlb_remove_table_sync_one(void)
+ {
+       /*
+        * This isn't an RCU grace period and hence the page-tables cannot be
+@@ -163,8 +163,6 @@ static void tlb_remove_table_free(struct mmu_table_batch *batch)
+ #else /* !CONFIG_MMU_GATHER_RCU_TABLE_FREE */
+-static void tlb_remove_table_sync_one(void) { }
+-
+ static void tlb_remove_table_free(struct mmu_table_batch *batch)
+ {
+       __tlb_remove_table_free(batch);
+-- 
+2.35.1
+
diff --git a/queue-5.10/mm-khugepaged-invoke-mmu-notifiers-in-shmem-file-col.patch b/queue-5.10/mm-khugepaged-invoke-mmu-notifiers-in-shmem-file-col.patch
new file mode 100644 (file)
index 0000000..1d0ad3f
--- /dev/null
@@ -0,0 +1,90 @@
+From 8e1c95908e48c5198348debed6c347698ecc9ec1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 6 Dec 2022 18:16:05 +0100
+Subject: mm/khugepaged: invoke MMU notifiers in shmem/file collapse paths
+
+From: Jann Horn <jannh@google.com>
+
+commit f268f6cf875f3220afc77bdd0bf1bb136eb54db9 upstream.
+
+Any codepath that zaps page table entries must invoke MMU notifiers to
+ensure that secondary MMUs (like KVM) don't keep accessing pages which
+aren't mapped anymore.  Secondary MMUs don't hold their own references to
+pages that are mirrored over, so failing to notify them can lead to page
+use-after-free.
+
+I'm marking this as addressing an issue introduced in commit f3f0e1d2150b
+("khugepaged: add support of collapse for tmpfs/shmem pages"), but most of
+the security impact of this only came in commit 27e1f8273113 ("khugepaged:
+enable collapse pmd for pte-mapped THP"), which actually omitted flushes
+for the removal of present PTEs, not just for the removal of empty page
+tables.
+
+Link: https://lkml.kernel.org/r/20221129154730.2274278-3-jannh@google.com
+Link: https://lkml.kernel.org/r/20221128180252.1684965-3-jannh@google.com
+Link: https://lkml.kernel.org/r/20221125213714.4115729-3-jannh@google.com
+Fixes: f3f0e1d2150b ("khugepaged: add support of collapse for tmpfs/shmem pages")
+Signed-off-by: Jann Horn <jannh@google.com>
+Acked-by: David Hildenbrand <david@redhat.com>
+Reviewed-by: Yang Shi <shy828301@gmail.com>
+Cc: John Hubbard <jhubbard@nvidia.com>
+Cc: Peter Xu <peterx@redhat.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+[manual backport: this code was refactored from two copies into a common
+helper between 5.15 and 6.0]
+Signed-off-by: Jann Horn <jannh@google.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ mm/khugepaged.c | 13 +++++++++++++
+ 1 file changed, 13 insertions(+)
+
+diff --git a/mm/khugepaged.c b/mm/khugepaged.c
+index 0268b549bd60..0eb3adf4ff68 100644
+--- a/mm/khugepaged.c
++++ b/mm/khugepaged.c
+@@ -1444,6 +1444,7 @@ void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr)
+       spinlock_t *ptl;
+       int count = 0;
+       int i;
++      struct mmu_notifier_range range;
+       if (!vma || !vma->vm_file ||
+           vma->vm_start > haddr || vma->vm_end < haddr + HPAGE_PMD_SIZE)
+@@ -1537,9 +1538,13 @@ void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr)
+       }
+       /* step 4: collapse pmd */
++      mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, NULL, mm, haddr,
++                              haddr + HPAGE_PMD_SIZE);
++      mmu_notifier_invalidate_range_start(&range);
+       _pmd = pmdp_collapse_flush(vma, haddr, pmd);
+       mm_dec_nr_ptes(mm);
+       tlb_remove_table_sync_one();
++      mmu_notifier_invalidate_range_end(&range);
+       pte_free(mm, pmd_pgtable(_pmd));
+       i_mmap_unlock_write(vma->vm_file->f_mapping);
+@@ -1624,11 +1629,19 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff)
+                */
+               if (mmap_write_trylock(mm)) {
+                       if (!khugepaged_test_exit(mm)) {
++                              struct mmu_notifier_range range;
++
++                              mmu_notifier_range_init(&range,
++                                                      MMU_NOTIFY_CLEAR, 0,
++                                                      NULL, mm, addr,
++                                                      addr + HPAGE_PMD_SIZE);
++                              mmu_notifier_invalidate_range_start(&range);
+                               /* assume page table is clear */
+                               _pmd = pmdp_collapse_flush(vma, addr, pmd);
+                               mm_dec_nr_ptes(mm);
+                               tlb_remove_table_sync_one();
+                               pte_free(mm, pmd_pgtable(_pmd));
++                              mmu_notifier_invalidate_range_end(&range);
+                       }
+                       mmap_write_unlock(mm);
+               } else {
+-- 
+2.35.1
+
diff --git a/queue-5.10/mm-khugepaged-take-the-right-locks-for-page-table-re.patch b/queue-5.10/mm-khugepaged-take-the-right-locks-for-page-table-re.patch
new file mode 100644 (file)
index 0000000..c973f9c
--- /dev/null
@@ -0,0 +1,141 @@
+From fb456f2c893540f9a10c07cf05d86bc67bea8359 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 6 Dec 2022 18:16:06 +0100
+Subject: mm/khugepaged: take the right locks for page table retraction
+
+From: Jann Horn <jannh@google.com>
+
+commit 8d3c106e19e8d251da31ff4cc7462e4565d65084 upstream.
+
+pagetable walks on address ranges mapped by VMAs can be done under the
+mmap lock, the lock of an anon_vma attached to the VMA, or the lock of the
+VMA's address_space.  Only one of these needs to be held, and it does not
+need to be held in exclusive mode.
+
+Under those circumstances, the rules for concurrent access to page table
+entries are:
+
+ - Terminal page table entries (entries that don't point to another page
+   table) can be arbitrarily changed under the page table lock, with the
+   exception that they always need to be consistent for
+   hardware page table walks and lockless_pages_from_mm().
+   This includes that they can be changed into non-terminal entries.
+ - Non-terminal page table entries (which point to another page table)
+   can not be modified; readers are allowed to READ_ONCE() an entry, verify
+   that it is non-terminal, and then assume that its value will stay as-is.
+
+Retracting a page table involves modifying a non-terminal entry, so
+page-table-level locks are insufficient to protect against concurrent page
+table traversal; it requires taking all the higher-level locks under which
+it is possible to start a page walk in the relevant range in exclusive
+mode.
+
+The collapse_huge_page() path for anonymous THP already follows this rule,
+but the shmem/file THP path was getting it wrong, making it possible for
+concurrent rmap-based operations to cause corruption.
+
+Link: https://lkml.kernel.org/r/20221129154730.2274278-1-jannh@google.com
+Link: https://lkml.kernel.org/r/20221128180252.1684965-1-jannh@google.com
+Link: https://lkml.kernel.org/r/20221125213714.4115729-1-jannh@google.com
+Fixes: 27e1f8273113 ("khugepaged: enable collapse pmd for pte-mapped THP")
+Signed-off-by: Jann Horn <jannh@google.com>
+Reviewed-by: Yang Shi <shy828301@gmail.com>
+Acked-by: David Hildenbrand <david@redhat.com>
+Cc: John Hubbard <jhubbard@nvidia.com>
+Cc: Peter Xu <peterx@redhat.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+[manual backport: this code was refactored from two copies into a common
+helper between 5.15 and 6.0]
+Signed-off-by: Jann Horn <jannh@google.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ mm/khugepaged.c | 31 ++++++++++++++++++++++++++-----
+ 1 file changed, 26 insertions(+), 5 deletions(-)
+
+diff --git a/mm/khugepaged.c b/mm/khugepaged.c
+index cf4dceb9682b..014e8b259313 100644
+--- a/mm/khugepaged.c
++++ b/mm/khugepaged.c
+@@ -1457,6 +1457,14 @@ void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr)
+       if (!hugepage_vma_check(vma, vma->vm_flags | VM_HUGEPAGE))
+               return;
++      /*
++       * Symmetry with retract_page_tables(): Exclude MAP_PRIVATE mappings
++       * that got written to. Without this, we'd have to also lock the
++       * anon_vma if one exists.
++       */
++      if (vma->anon_vma)
++              return;
++
+       hpage = find_lock_page(vma->vm_file->f_mapping,
+                              linear_page_index(vma, haddr));
+       if (!hpage)
+@@ -1469,6 +1477,19 @@ void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr)
+       if (!pmd)
+               goto drop_hpage;
++      /*
++       * We need to lock the mapping so that from here on, only GUP-fast and
++       * hardware page walks can access the parts of the page tables that
++       * we're operating on.
++       */
++      i_mmap_lock_write(vma->vm_file->f_mapping);
++
++      /*
++       * This spinlock should be unnecessary: Nobody else should be accessing
++       * the page tables under spinlock protection here, only
++       * lockless_pages_from_mm() and the hardware page walker can access page
++       * tables while all the high-level locks are held in write mode.
++       */
+       start_pte = pte_offset_map_lock(mm, pmd, haddr, &ptl);
+       /* step 1: check all mapped PTEs are to the right huge page */
+@@ -1515,12 +1536,12 @@ void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr)
+       }
+       /* step 4: collapse pmd */
+-      ptl = pmd_lock(vma->vm_mm, pmd);
+       _pmd = pmdp_collapse_flush(vma, haddr, pmd);
+-      spin_unlock(ptl);
+       mm_dec_nr_ptes(mm);
+       pte_free(mm, pmd_pgtable(_pmd));
++      i_mmap_unlock_write(vma->vm_file->f_mapping);
++
+ drop_hpage:
+       unlock_page(hpage);
+       put_page(hpage);
+@@ -1528,6 +1549,7 @@ void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr)
+ abort:
+       pte_unmap_unlock(start_pte, ptl);
++      i_mmap_unlock_write(vma->vm_file->f_mapping);
+       goto drop_hpage;
+ }
+@@ -1577,7 +1599,8 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff)
+                * An alternative would be drop the check, but check that page
+                * table is clear before calling pmdp_collapse_flush() under
+                * ptl. It has higher chance to recover THP for the VMA, but
+-               * has higher cost too.
++               * has higher cost too. It would also probably require locking
++               * the anon_vma.
+                */
+               if (vma->anon_vma)
+                       continue;
+@@ -1599,10 +1622,8 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff)
+                */
+               if (mmap_write_trylock(mm)) {
+                       if (!khugepaged_test_exit(mm)) {
+-                              spinlock_t *ptl = pmd_lock(mm, pmd);
+                               /* assume page table is clear */
+                               _pmd = pmdp_collapse_flush(vma, addr, pmd);
+-                              spin_unlock(ptl);
+                               mm_dec_nr_ptes(mm);
+                               pte_free(mm, pmd_pgtable(_pmd));
+                       }
+-- 
+2.35.1
+
diff --git a/queue-5.10/mm-lru-introduce-testclearpagelru.patch b/queue-5.10/mm-lru-introduce-testclearpagelru.patch
new file mode 100644 (file)
index 0000000..abe6301
--- /dev/null
@@ -0,0 +1,223 @@
+From d84c0415a11eafaa01336ef3fa61f707986b5656 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 15 Dec 2020 12:34:16 -0800
+Subject: mm/lru: introduce TestClearPageLRU()
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Alex Shi <alex.shi@linux.alibaba.com>
+
+[ Upstream commit d25b5bd8a8f420b15517c19c4626c0c009f72a63 ]
+
+Currently lru_lock still guards both lru list and page's lru bit, that's
+ok.  but if we want to use specific lruvec lock on the page, we need to
+pin down the page's lruvec/memcg during locking.  Just taking lruvec lock
+first may be undermined by the page's memcg charge/migration.  To fix this
+problem, we will clear the lru bit out of locking and use it as pin down
+action to block the page isolation in memcg changing.
+
+So now a standard steps of page isolation is following:
+       1, get_page();         #pin the page avoid to be free
+       2, TestClearPageLRU(); #block other isolation like memcg change
+       3, spin_lock on lru_lock; #serialize lru list access
+       4, delete page from lru list;
+
+This patch start with the first part: TestClearPageLRU, which combines
+PageLRU check and ClearPageLRU into a macro func TestClearPageLRU.  This
+function will be used as page isolation precondition to prevent other
+isolations some where else.  Then there are may !PageLRU page on lru list,
+need to remove BUG() checking accordingly.
+
+There 2 rules for lru bit now:
+1, the lru bit still indicate if a page on lru list, just in some
+   temporary moment(isolating), the page may have no lru bit when
+   it's on lru list.  but the page still must be on lru list when the
+   lru bit set.
+2, have to remove lru bit before delete it from lru list.
+
+As Andrew Morton mentioned this change would dirty cacheline for a page
+which isn't on the LRU.  But the loss would be acceptable in Rong Chen
+<rong.a.chen@intel.com> report:
+https://lore.kernel.org/lkml/20200304090301.GB5972@shao2-debian/
+
+Link: https://lkml.kernel.org/r/1604566549-62481-15-git-send-email-alex.shi@linux.alibaba.com
+Suggested-by: Johannes Weiner <hannes@cmpxchg.org>
+Signed-off-by: Alex Shi <alex.shi@linux.alibaba.com>
+Acked-by: Hugh Dickins <hughd@google.com>
+Acked-by: Johannes Weiner <hannes@cmpxchg.org>
+Acked-by: Vlastimil Babka <vbabka@suse.cz>
+Cc: Michal Hocko <mhocko@kernel.org>
+Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
+Cc: Alexander Duyck <alexander.duyck@gmail.com>
+Cc: Andrea Arcangeli <aarcange@redhat.com>
+Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
+Cc: Daniel Jordan <daniel.m.jordan@oracle.com>
+Cc: "Huang, Ying" <ying.huang@intel.com>
+Cc: Jann Horn <jannh@google.com>
+Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
+Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Cc: Kirill A. Shutemov <kirill@shutemov.name>
+Cc: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
+Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
+Cc: Mel Gorman <mgorman@techsingularity.net>
+Cc: Michal Hocko <mhocko@suse.com>
+Cc: Mika Penttilä <mika.penttila@nextfour.com>
+Cc: Minchan Kim <minchan@kernel.org>
+Cc: Shakeel Butt <shakeelb@google.com>
+Cc: Tejun Heo <tj@kernel.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: Wei Yang <richard.weiyang@gmail.com>
+Cc: Yang Shi <yang.shi@linux.alibaba.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Stable-dep-of: 829ae0f81ce0 ("mm: migrate: fix THP's mapcount on isolation")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/page-flags.h |  1 +
+ mm/mlock.c                 |  3 +--
+ mm/vmscan.c                | 39 +++++++++++++++++++-------------------
+ 3 files changed, 21 insertions(+), 22 deletions(-)
+
+diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
+index 4f6ba9379112..14a0cac9e099 100644
+--- a/include/linux/page-flags.h
++++ b/include/linux/page-flags.h
+@@ -335,6 +335,7 @@ PAGEFLAG(Referenced, referenced, PF_HEAD)
+ PAGEFLAG(Dirty, dirty, PF_HEAD) TESTSCFLAG(Dirty, dirty, PF_HEAD)
+       __CLEARPAGEFLAG(Dirty, dirty, PF_HEAD)
+ PAGEFLAG(LRU, lru, PF_HEAD) __CLEARPAGEFLAG(LRU, lru, PF_HEAD)
++      TESTCLEARFLAG(LRU, lru, PF_HEAD)
+ PAGEFLAG(Active, active, PF_HEAD) __CLEARPAGEFLAG(Active, active, PF_HEAD)
+       TESTCLEARFLAG(Active, active, PF_HEAD)
+ PAGEFLAG(Workingset, workingset, PF_HEAD)
+diff --git a/mm/mlock.c b/mm/mlock.c
+index d487aa864e86..7b0e6334be6f 100644
+--- a/mm/mlock.c
++++ b/mm/mlock.c
+@@ -276,10 +276,9 @@ static void __munlock_pagevec(struct pagevec *pvec, struct zone *zone)
+                        * We already have pin from follow_page_mask()
+                        * so we can spare the get_page() here.
+                        */
+-                      if (PageLRU(page)) {
++                      if (TestClearPageLRU(page)) {
+                               struct lruvec *lruvec;
+-                              ClearPageLRU(page);
+                               lruvec = mem_cgroup_page_lruvec(page,
+                                                       page_pgdat(page));
+                               del_page_from_lru_list(page, lruvec,
+diff --git a/mm/vmscan.c b/mm/vmscan.c
+index 51ccd80e70b6..8d62eedfc794 100644
+--- a/mm/vmscan.c
++++ b/mm/vmscan.c
+@@ -1547,7 +1547,7 @@ unsigned int reclaim_clean_pages_from_list(struct zone *zone,
+  */
+ int __isolate_lru_page(struct page *page, isolate_mode_t mode)
+ {
+-      int ret = -EINVAL;
++      int ret = -EBUSY;
+       /* Only take pages on the LRU. */
+       if (!PageLRU(page))
+@@ -1557,8 +1557,6 @@ int __isolate_lru_page(struct page *page, isolate_mode_t mode)
+       if (PageUnevictable(page) && !(mode & ISOLATE_UNEVICTABLE))
+               return ret;
+-      ret = -EBUSY;
+-
+       /*
+        * To minimise LRU disruption, the caller can indicate that it only
+        * wants to isolate pages it will be able to operate on without
+@@ -1605,8 +1603,10 @@ int __isolate_lru_page(struct page *page, isolate_mode_t mode)
+                * sure the page is not being freed elsewhere -- the
+                * page release code relies on it.
+                */
+-              ClearPageLRU(page);
+-              ret = 0;
++              if (TestClearPageLRU(page))
++                      ret = 0;
++              else
++                      put_page(page);
+       }
+       return ret;
+@@ -1672,8 +1672,6 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
+               page = lru_to_page(src);
+               prefetchw_prev_lru_page(page, src, flags);
+-              VM_BUG_ON_PAGE(!PageLRU(page), page);
+-
+               nr_pages = compound_nr(page);
+               total_scan += nr_pages;
+@@ -1770,21 +1768,18 @@ int isolate_lru_page(struct page *page)
+       VM_BUG_ON_PAGE(!page_count(page), page);
+       WARN_RATELIMIT(PageTail(page), "trying to isolate tail page");
+-      if (PageLRU(page)) {
++      if (TestClearPageLRU(page)) {
+               pg_data_t *pgdat = page_pgdat(page);
+               struct lruvec *lruvec;
+-              spin_lock_irq(&pgdat->lru_lock);
++              get_page(page);
+               lruvec = mem_cgroup_page_lruvec(page, pgdat);
+-              if (PageLRU(page)) {
+-                      int lru = page_lru(page);
+-                      get_page(page);
+-                      ClearPageLRU(page);
+-                      del_page_from_lru_list(page, lruvec, lru);
+-                      ret = 0;
+-              }
++              spin_lock_irq(&pgdat->lru_lock);
++              del_page_from_lru_list(page, lruvec, page_lru(page));
+               spin_unlock_irq(&pgdat->lru_lock);
++              ret = 0;
+       }
++
+       return ret;
+ }
+@@ -4291,6 +4286,10 @@ void check_move_unevictable_pages(struct pagevec *pvec)
+               nr_pages = thp_nr_pages(page);
+               pgscanned += nr_pages;
++              /* block memcg migration during page moving between lru */
++              if (!TestClearPageLRU(page))
++                      continue;
++
+               if (pagepgdat != pgdat) {
+                       if (pgdat)
+                               spin_unlock_irq(&pgdat->lru_lock);
+@@ -4299,10 +4298,7 @@ void check_move_unevictable_pages(struct pagevec *pvec)
+               }
+               lruvec = mem_cgroup_page_lruvec(page, pgdat);
+-              if (!PageLRU(page) || !PageUnevictable(page))
+-                      continue;
+-
+-              if (page_evictable(page)) {
++              if (page_evictable(page) && PageUnevictable(page)) {
+                       enum lru_list lru = page_lru_base_type(page);
+                       VM_BUG_ON_PAGE(PageActive(page), page);
+@@ -4311,12 +4307,15 @@ void check_move_unevictable_pages(struct pagevec *pvec)
+                       add_page_to_lru_list(page, lruvec, lru);
+                       pgrescued += nr_pages;
+               }
++              SetPageLRU(page);
+       }
+       if (pgdat) {
+               __count_vm_events(UNEVICTABLE_PGRESCUED, pgrescued);
+               __count_vm_events(UNEVICTABLE_PGSCANNED, pgscanned);
+               spin_unlock_irq(&pgdat->lru_lock);
++      } else if (pgscanned) {
++              count_vm_events(UNEVICTABLE_PGSCANNED, pgscanned);
+       }
+ }
+ EXPORT_SYMBOL_GPL(check_move_unevictable_pages);
+-- 
+2.35.1
+
diff --git a/queue-5.10/mm-migrate-fix-thp-s-mapcount-on-isolation.patch b/queue-5.10/mm-migrate-fix-thp-s-mapcount-on-isolation.patch
new file mode 100644 (file)
index 0000000..21b8107
--- /dev/null
@@ -0,0 +1,89 @@
+From 91f25a9aa0bb126c81ed361cef0f8608ac4c3f15 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 24 Nov 2022 17:55:23 +0800
+Subject: mm: migrate: fix THP's mapcount on isolation
+
+From: Gavin Shan <gshan@redhat.com>
+
+[ Upstream commit 829ae0f81ce093d674ff2256f66a714753e9ce32 ]
+
+The issue is reported when removing memory through virtio_mem device.  The
+transparent huge page, experienced copy-on-write fault, is wrongly
+regarded as pinned.  The transparent huge page is escaped from being
+isolated in isolate_migratepages_block().  The transparent huge page can't
+be migrated and the corresponding memory block can't be put into offline
+state.
+
+Fix it by replacing page_mapcount() with total_mapcount().  With this, the
+transparent huge page can be isolated and migrated, and the memory block
+can be put into offline state.  Besides, The page's refcount is increased
+a bit earlier to avoid the page is released when the check is executed.
+
+Link: https://lkml.kernel.org/r/20221124095523.31061-1-gshan@redhat.com
+Fixes: 1da2f328fa64 ("mm,thp,compaction,cma: allow THP migration for CMA allocations")
+Signed-off-by: Gavin Shan <gshan@redhat.com>
+Reported-by: Zhenyu Zhang <zhenyzha@redhat.com>
+Tested-by: Zhenyu Zhang <zhenyzha@redhat.com>
+Suggested-by: David Hildenbrand <david@redhat.com>
+Acked-by: David Hildenbrand <david@redhat.com>
+Cc: Alistair Popple <apopple@nvidia.com>
+Cc: Hugh Dickins <hughd@google.com>
+Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: William Kucharski <william.kucharski@oracle.com>
+Cc: Zi Yan <ziy@nvidia.com>
+Cc: <stable@vger.kernel.org>   [5.7+]
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ mm/compaction.c | 22 +++++++++++-----------
+ 1 file changed, 11 insertions(+), 11 deletions(-)
+
+diff --git a/mm/compaction.c b/mm/compaction.c
+index 57ce6b001b10..54d1041560c7 100644
+--- a/mm/compaction.c
++++ b/mm/compaction.c
+@@ -957,29 +957,29 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
+                       goto isolate_fail;
+               }
++              /*
++               * Be careful not to clear PageLRU until after we're
++               * sure the page is not being freed elsewhere -- the
++               * page release code relies on it.
++               */
++              if (unlikely(!get_page_unless_zero(page)))
++                      goto isolate_fail;
++
+               /*
+                * Migration will fail if an anonymous page is pinned in memory,
+                * so avoid taking lru_lock and isolating it unnecessarily in an
+                * admittedly racy check.
+                */
+               mapping = page_mapping(page);
+-              if (!mapping && page_count(page) > page_mapcount(page))
+-                      goto isolate_fail;
++              if (!mapping && (page_count(page) - 1) > total_mapcount(page))
++                      goto isolate_fail_put;
+               /*
+                * Only allow to migrate anonymous pages in GFP_NOFS context
+                * because those do not depend on fs locks.
+                */
+               if (!(cc->gfp_mask & __GFP_FS) && mapping)
+-                      goto isolate_fail;
+-
+-              /*
+-               * Be careful not to clear PageLRU until after we're
+-               * sure the page is not being freed elsewhere -- the
+-               * page release code relies on it.
+-               */
+-              if (unlikely(!get_page_unless_zero(page)))
+-                      goto isolate_fail;
++                      goto isolate_fail_put;
+               /* Only take pages on LRU: a check now makes later tests safe */
+               if (!PageLRU(page))
+-- 
+2.35.1
+
diff --git a/queue-5.10/mm-mlock-remove-__munlock_isolate_lru_page.patch b/queue-5.10/mm-mlock-remove-__munlock_isolate_lru_page.patch
new file mode 100644 (file)
index 0000000..b5a8094
--- /dev/null
@@ -0,0 +1,104 @@
+From 6649227e330b37c9583146cd7446b41771b3a7f1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 15 Dec 2020 12:34:11 -0800
+Subject: mm/mlock: remove __munlock_isolate_lru_page()
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Alex Shi <alex.shi@linux.alibaba.com>
+
+[ Upstream commit 13805a88a9bd3fb37f33dd8972d904de62796f3d ]
+
+__munlock_isolate_lru_page() only has one caller, remove it to clean up
+and simplify code.
+
+Link: https://lkml.kernel.org/r/1604566549-62481-14-git-send-email-alex.shi@linux.alibaba.com
+Signed-off-by: Alex Shi <alex.shi@linux.alibaba.com>
+Acked-by: Hugh Dickins <hughd@google.com>
+Acked-by: Johannes Weiner <hannes@cmpxchg.org>
+Acked-by: Vlastimil Babka <vbabka@suse.cz>
+Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Cc: Alexander Duyck <alexander.duyck@gmail.com>
+Cc: Andrea Arcangeli <aarcange@redhat.com>
+Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
+Cc: "Chen, Rong A" <rong.a.chen@intel.com>
+Cc: Daniel Jordan <daniel.m.jordan@oracle.com>
+Cc: "Huang, Ying" <ying.huang@intel.com>
+Cc: Jann Horn <jannh@google.com>
+Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
+Cc: Kirill A. Shutemov <kirill@shutemov.name>
+Cc: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
+Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
+Cc: Mel Gorman <mgorman@techsingularity.net>
+Cc: Michal Hocko <mhocko@kernel.org>
+Cc: Michal Hocko <mhocko@suse.com>
+Cc: Mika Penttilä <mika.penttila@nextfour.com>
+Cc: Minchan Kim <minchan@kernel.org>
+Cc: Shakeel Butt <shakeelb@google.com>
+Cc: Tejun Heo <tj@kernel.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
+Cc: Wei Yang <richard.weiyang@gmail.com>
+Cc: Yang Shi <yang.shi@linux.alibaba.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Stable-dep-of: 829ae0f81ce0 ("mm: migrate: fix THP's mapcount on isolation")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ mm/mlock.c | 31 +++++++++----------------------
+ 1 file changed, 9 insertions(+), 22 deletions(-)
+
+diff --git a/mm/mlock.c b/mm/mlock.c
+index 796c726a0407..d487aa864e86 100644
+--- a/mm/mlock.c
++++ b/mm/mlock.c
+@@ -105,26 +105,6 @@ void mlock_vma_page(struct page *page)
+       }
+ }
+-/*
+- * Isolate a page from LRU with optional get_page() pin.
+- * Assumes lru_lock already held and page already pinned.
+- */
+-static bool __munlock_isolate_lru_page(struct page *page, bool getpage)
+-{
+-      if (PageLRU(page)) {
+-              struct lruvec *lruvec;
+-
+-              lruvec = mem_cgroup_page_lruvec(page, page_pgdat(page));
+-              if (getpage)
+-                      get_page(page);
+-              ClearPageLRU(page);
+-              del_page_from_lru_list(page, lruvec, page_lru(page));
+-              return true;
+-      }
+-
+-      return false;
+-}
+-
+ /*
+  * Finish munlock after successful page isolation
+  *
+@@ -296,9 +276,16 @@ static void __munlock_pagevec(struct pagevec *pvec, struct zone *zone)
+                        * We already have pin from follow_page_mask()
+                        * so we can spare the get_page() here.
+                        */
+-                      if (__munlock_isolate_lru_page(page, false))
++                      if (PageLRU(page)) {
++                              struct lruvec *lruvec;
++
++                              ClearPageLRU(page);
++                              lruvec = mem_cgroup_page_lruvec(page,
++                                                      page_pgdat(page));
++                              del_page_from_lru_list(page, lruvec,
++                                                      page_lru(page));
+                               continue;
+-                      else
++                      } else
+                               __munlock_isolation_failed(page);
+               } else {
+                       delta_munlocked++;
+-- 
+2.35.1
+
diff --git a/queue-5.10/mm-mlock-remove-lru_lock-on-testclearpagemlocked.patch b/queue-5.10/mm-mlock-remove-lru_lock-on-testclearpagemlocked.patch
new file mode 100644 (file)
index 0000000..2efbb62
--- /dev/null
@@ -0,0 +1,115 @@
+From b824ddafd0a14e7a943171ce5903b83057e0c587 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 15 Dec 2020 12:34:07 -0800
+Subject: mm/mlock: remove lru_lock on TestClearPageMlocked
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Alex Shi <alex.shi@linux.alibaba.com>
+
+[ Upstream commit 3db19aa39bac33f2e850fa1ddd67be29b192e51f ]
+
+In the func munlock_vma_page, comments mentained lru_lock needed for
+serialization with split_huge_pages.  But the page must be PageLocked as
+well as pages in split_huge_page series funcs.  Thus the PageLocked is
+enough to serialize both funcs.
+
+Further more, Hugh Dickins pointed: before splitting in
+split_huge_page_to_list, the page was unmap_page() to remove pmd/ptes
+which protect the page from munlock.  Thus, no needs to guard
+__split_huge_page_tail for mlock clean, just keep the lru_lock there for
+isolation purpose.
+
+LKP found a preempt issue on __mod_zone_page_state which need change to
+mod_zone_page_state.  Thanks!
+
+Link: https://lkml.kernel.org/r/1604566549-62481-13-git-send-email-alex.shi@linux.alibaba.com
+Signed-off-by: Alex Shi <alex.shi@linux.alibaba.com>
+Acked-by: Hugh Dickins <hughd@google.com>
+Acked-by: Johannes Weiner <hannes@cmpxchg.org>
+Acked-by: Vlastimil Babka <vbabka@suse.cz>
+Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Cc: Alexander Duyck <alexander.duyck@gmail.com>
+Cc: Andrea Arcangeli <aarcange@redhat.com>
+Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
+Cc: "Chen, Rong A" <rong.a.chen@intel.com>
+Cc: Daniel Jordan <daniel.m.jordan@oracle.com>
+Cc: "Huang, Ying" <ying.huang@intel.com>
+Cc: Jann Horn <jannh@google.com>
+Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
+Cc: Kirill A. Shutemov <kirill@shutemov.name>
+Cc: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
+Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
+Cc: Mel Gorman <mgorman@techsingularity.net>
+Cc: Michal Hocko <mhocko@kernel.org>
+Cc: Michal Hocko <mhocko@suse.com>
+Cc: Mika Penttilä <mika.penttila@nextfour.com>
+Cc: Minchan Kim <minchan@kernel.org>
+Cc: Shakeel Butt <shakeelb@google.com>
+Cc: Tejun Heo <tj@kernel.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
+Cc: Wei Yang <richard.weiyang@gmail.com>
+Cc: Yang Shi <yang.shi@linux.alibaba.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Stable-dep-of: 829ae0f81ce0 ("mm: migrate: fix THP's mapcount on isolation")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ mm/mlock.c | 26 +++++---------------------
+ 1 file changed, 5 insertions(+), 21 deletions(-)
+
+diff --git a/mm/mlock.c b/mm/mlock.c
+index 884b1216da6a..796c726a0407 100644
+--- a/mm/mlock.c
++++ b/mm/mlock.c
+@@ -187,40 +187,24 @@ static void __munlock_isolation_failed(struct page *page)
+ unsigned int munlock_vma_page(struct page *page)
+ {
+       int nr_pages;
+-      pg_data_t *pgdat = page_pgdat(page);
+       /* For try_to_munlock() and to serialize with page migration */
+       BUG_ON(!PageLocked(page));
+-
+       VM_BUG_ON_PAGE(PageTail(page), page);
+-      /*
+-       * Serialize with any parallel __split_huge_page_refcount() which
+-       * might otherwise copy PageMlocked to part of the tail pages before
+-       * we clear it in the head page. It also stabilizes thp_nr_pages().
+-       */
+-      spin_lock_irq(&pgdat->lru_lock);
+-
+       if (!TestClearPageMlocked(page)) {
+               /* Potentially, PTE-mapped THP: do not skip the rest PTEs */
+-              nr_pages = 1;
+-              goto unlock_out;
++              return 0;
+       }
+       nr_pages = thp_nr_pages(page);
+-      __mod_zone_page_state(page_zone(page), NR_MLOCK, -nr_pages);
++      mod_zone_page_state(page_zone(page), NR_MLOCK, -nr_pages);
+-      if (__munlock_isolate_lru_page(page, true)) {
+-              spin_unlock_irq(&pgdat->lru_lock);
++      if (!isolate_lru_page(page))
+               __munlock_isolated_page(page);
+-              goto out;
+-      }
+-      __munlock_isolation_failed(page);
+-
+-unlock_out:
+-      spin_unlock_irq(&pgdat->lru_lock);
++      else
++              __munlock_isolation_failed(page);
+-out:
+       return nr_pages - 1;
+ }
+-- 
+2.35.1
+
diff --git a/queue-5.10/mm-vmscan-__isolate_lru_page_prepare-cleanup.patch b/queue-5.10/mm-vmscan-__isolate_lru_page_prepare-cleanup.patch
new file mode 100644 (file)
index 0000000..5ab6584
--- /dev/null
@@ -0,0 +1,183 @@
+From af78db2daeeeec6283747a8d591daf6df57e1961 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 24 Feb 2021 12:08:01 -0800
+Subject: mm/vmscan: __isolate_lru_page_prepare() cleanup
+
+From: Alex Shi <alex.shi@linux.alibaba.com>
+
+[ Upstream commit c2135f7c570bc274035834848d9bf46ea89ba763 ]
+
+The function just returns 2 results, so using a 'switch' to deal with its
+result is unnecessary.  Also simplify it to a bool func as Vlastimil
+suggested.
+
+Also remove 'goto' by reusing list_move(), and take Matthew Wilcox's
+suggestion to update comments in function.
+
+Link: https://lkml.kernel.org/r/728874d7-2d93-4049-68c1-dcc3b2d52ccd@linux.alibaba.com
+Signed-off-by: Alex Shi <alex.shi@linux.alibaba.com>
+Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
+Acked-by: Vlastimil Babka <vbabka@suse.cz>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: Hugh Dickins <hughd@google.com>
+Cc: Yu Zhao <yuzhao@google.com>
+Cc: Michal Hocko <mhocko@suse.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Stable-dep-of: 829ae0f81ce0 ("mm: migrate: fix THP's mapcount on isolation")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/swap.h |  2 +-
+ mm/compaction.c      |  2 +-
+ mm/vmscan.c          | 68 ++++++++++++++++++++------------------------
+ 3 files changed, 33 insertions(+), 39 deletions(-)
+
+diff --git a/include/linux/swap.h b/include/linux/swap.h
+index 3577d3a6ec37..394d5de5d4b4 100644
+--- a/include/linux/swap.h
++++ b/include/linux/swap.h
+@@ -358,7 +358,7 @@ extern void lru_cache_add_inactive_or_unevictable(struct page *page,
+ extern unsigned long zone_reclaimable_pages(struct zone *zone);
+ extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
+                                       gfp_t gfp_mask, nodemask_t *mask);
+-extern int __isolate_lru_page_prepare(struct page *page, isolate_mode_t mode);
++extern bool __isolate_lru_page_prepare(struct page *page, isolate_mode_t mode);
+ extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
+                                                 unsigned long nr_pages,
+                                                 gfp_t gfp_mask,
+diff --git a/mm/compaction.c b/mm/compaction.c
+index ba3e907f03b7..ea46aadc7c21 100644
+--- a/mm/compaction.c
++++ b/mm/compaction.c
+@@ -980,7 +980,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
+               if (unlikely(!get_page_unless_zero(page)))
+                       goto isolate_fail;
+-              if (__isolate_lru_page_prepare(page, isolate_mode) != 0)
++              if (!__isolate_lru_page_prepare(page, isolate_mode))
+                       goto isolate_fail_put;
+               /* Try isolate the page */
+diff --git a/mm/vmscan.c b/mm/vmscan.c
+index 5ada402c8d95..00a47845a15b 100644
+--- a/mm/vmscan.c
++++ b/mm/vmscan.c
+@@ -1543,19 +1543,17 @@ unsigned int reclaim_clean_pages_from_list(struct zone *zone,
+  * page:      page to consider
+  * mode:      one of the LRU isolation modes defined above
+  *
+- * returns 0 on success, -ve errno on failure.
++ * returns true on success, false on failure.
+  */
+-int __isolate_lru_page_prepare(struct page *page, isolate_mode_t mode)
++bool __isolate_lru_page_prepare(struct page *page, isolate_mode_t mode)
+ {
+-      int ret = -EBUSY;
+-
+       /* Only take pages on the LRU. */
+       if (!PageLRU(page))
+-              return ret;
++              return false;
+       /* Compaction should not handle unevictable pages but CMA can do so */
+       if (PageUnevictable(page) && !(mode & ISOLATE_UNEVICTABLE))
+-              return ret;
++              return false;
+       /*
+        * To minimise LRU disruption, the caller can indicate that it only
+@@ -1568,7 +1566,7 @@ int __isolate_lru_page_prepare(struct page *page, isolate_mode_t mode)
+       if (mode & ISOLATE_ASYNC_MIGRATE) {
+               /* All the caller can do on PageWriteback is block */
+               if (PageWriteback(page))
+-                      return ret;
++                      return false;
+               if (PageDirty(page)) {
+                       struct address_space *mapping;
+@@ -1584,20 +1582,20 @@ int __isolate_lru_page_prepare(struct page *page, isolate_mode_t mode)
+                        * from the page cache.
+                        */
+                       if (!trylock_page(page))
+-                              return ret;
++                              return false;
+                       mapping = page_mapping(page);
+                       migrate_dirty = !mapping || mapping->a_ops->migratepage;
+                       unlock_page(page);
+                       if (!migrate_dirty)
+-                              return ret;
++                              return false;
+               }
+       }
+       if ((mode & ISOLATE_UNMAPPED) && page_mapped(page))
+-              return ret;
++              return false;
+-      return 0;
++      return true;
+ }
+ /*
+@@ -1679,35 +1677,31 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
+                * only when the page is being freed somewhere else.
+                */
+               scan += nr_pages;
+-              switch (__isolate_lru_page_prepare(page, mode)) {
+-              case 0:
+-                      /*
+-                       * Be careful not to clear PageLRU until after we're
+-                       * sure the page is not being freed elsewhere -- the
+-                       * page release code relies on it.
+-                       */
+-                      if (unlikely(!get_page_unless_zero(page)))
+-                              goto busy;
+-
+-                      if (!TestClearPageLRU(page)) {
+-                              /*
+-                               * This page may in other isolation path,
+-                               * but we still hold lru_lock.
+-                               */
+-                              put_page(page);
+-                              goto busy;
+-                      }
+-
+-                      nr_taken += nr_pages;
+-                      nr_zone_taken[page_zonenum(page)] += nr_pages;
+-                      list_move(&page->lru, dst);
+-                      break;
++              if (!__isolate_lru_page_prepare(page, mode)) {
++                      /* It is being freed elsewhere */
++                      list_move(&page->lru, src);
++                      continue;
++              }
++              /*
++               * Be careful not to clear PageLRU until after we're
++               * sure the page is not being freed elsewhere -- the
++               * page release code relies on it.
++               */
++              if (unlikely(!get_page_unless_zero(page))) {
++                      list_move(&page->lru, src);
++                      continue;
++              }
+-              default:
+-busy:
+-                      /* else it is being freed elsewhere */
++              if (!TestClearPageLRU(page)) {
++                      /* Another thread is already isolating this page */
++                      put_page(page);
+                       list_move(&page->lru, src);
++                      continue;
+               }
++
++              nr_taken += nr_pages;
++              nr_zone_taken[page_zonenum(page)] += nr_pages;
++              list_move(&page->lru, dst);
+       }
+       /*
+-- 
+2.35.1
+
diff --git a/queue-5.10/net-usb-qmi_wwan-add-u-blox-0x1342-composition.patch b/queue-5.10/net-usb-qmi_wwan-add-u-blox-0x1342-composition.patch
new file mode 100644 (file)
index 0000000..aa18401
--- /dev/null
@@ -0,0 +1,53 @@
+From f7759205e536a72dd8f59d500166f51408c42e0f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 21 Nov 2022 13:54:55 +0100
+Subject: net: usb: qmi_wwan: add u-blox 0x1342 composition
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Davide Tronchin <davide.tronchin.94@gmail.com>
+
+[ Upstream commit a487069e11b6527373f7c6f435d8998051d0b5d9 ]
+
+Add RmNet support for LARA-L6.
+
+LARA-L6 module can be configured (by AT interface) in three different
+USB modes:
+* Default mode (Vendor ID: 0x1546 Product ID: 0x1341) with 4 serial
+interfaces
+* RmNet mode (Vendor ID: 0x1546 Product ID: 0x1342) with 4 serial
+interfaces and 1 RmNet virtual network interface
+* CDC-ECM mode (Vendor ID: 0x1546 Product ID: 0x1343) with 4 serial
+interface and 1 CDC-ECM virtual network interface
+
+In RmNet mode LARA-L6 exposes the following interfaces:
+If 0: Diagnostic
+If 1: AT parser
+If 2: AT parser
+If 3: AT parset/alternative functions
+If 4: RMNET interface
+
+Signed-off-by: Davide Tronchin <davide.tronchin.94@gmail.com>
+Acked-by: Bjørn Mork <bjorn@mork.no>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/usb/qmi_wwan.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c
+index 7313e6e03c12..bce151e3706a 100644
+--- a/drivers/net/usb/qmi_wwan.c
++++ b/drivers/net/usb/qmi_wwan.c
+@@ -1352,6 +1352,7 @@ static const struct usb_device_id products[] = {
+       {QMI_FIXED_INTF(0x0489, 0xe0b4, 0)},    /* Foxconn T77W968 LTE */
+       {QMI_FIXED_INTF(0x0489, 0xe0b5, 0)},    /* Foxconn T77W968 LTE with eSIM support*/
+       {QMI_FIXED_INTF(0x2692, 0x9025, 4)},    /* Cellient MPL200 (rebranded Qualcomm 05c6:9025) */
++      {QMI_QUIRK_SET_DTR(0x1546, 0x1342, 4)}, /* u-blox LARA-L6 */
+       /* 4. Gobi 1000 devices */
+       {QMI_GOBI1K_DEVICE(0x05c6, 0x9212)},    /* Acer Gobi Modem Device */
+-- 
+2.35.1
+
diff --git a/queue-5.10/regulator-slg51000-wait-after-asserting-cs-pin.patch b/queue-5.10/regulator-slg51000-wait-after-asserting-cs-pin.patch
new file mode 100644 (file)
index 0000000..d8b495c
--- /dev/null
@@ -0,0 +1,44 @@
+From 19b51875859c93759b9e767950f8d2b937384249 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 18 Nov 2022 14:10:35 +0100
+Subject: regulator: slg51000: Wait after asserting CS pin
+
+From: Konrad Dybcio <konrad.dybcio@linaro.org>
+
+[ Upstream commit 0b24dfa587c6cc7484cfb170da5c7dd73451f670 ]
+
+Sony's downstream driver [1], among some other changes, adds a
+seemingly random 10ms usleep_range, which turned out to be necessary
+for the hardware to function properly on at least Sony Xperia 1 IV.
+Without this, I2C transactions with the SLG51000 straight up fail.
+
+Relax (10-10ms -> 10-11ms) and add the aforementioned sleep to make
+sure the hardware has some time to wake up.
+
+(nagara-2.0.0-mlc/vendor/semc/hardware/camera-kernel-module/)
+[1] https://developer.sony.com/file/download/open-source-archive-for-64-0-m-4-29/
+
+Signed-off-by: Konrad Dybcio <konrad.dybcio@linaro.org>
+Link: https://lore.kernel.org/r/20221118131035.54874-1-konrad.dybcio@linaro.org
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/regulator/slg51000-regulator.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/drivers/regulator/slg51000-regulator.c b/drivers/regulator/slg51000-regulator.c
+index 75a941fb3c2b..1b2eee95ad3f 100644
+--- a/drivers/regulator/slg51000-regulator.c
++++ b/drivers/regulator/slg51000-regulator.c
+@@ -457,6 +457,8 @@ static int slg51000_i2c_probe(struct i2c_client *client)
+               chip->cs_gpiod = cs_gpiod;
+       }
++      usleep_range(10000, 11000);
++
+       i2c_set_clientdata(client, chip);
+       chip->chip_irq = client->irq;
+       chip->dev = dev;
+-- 
+2.35.1
+
diff --git a/queue-5.10/regulator-twl6030-fix-get-status-of-twl6032-regulato.patch b/queue-5.10/regulator-twl6030-fix-get-status-of-twl6032-regulato.patch
new file mode 100644 (file)
index 0000000..f4dc138
--- /dev/null
@@ -0,0 +1,69 @@
+From 6d822387a0acf36069221010ef3e9a99131f4167 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 20 Nov 2022 23:12:08 +0100
+Subject: regulator: twl6030: fix get status of twl6032 regulators
+
+From: Andreas Kemnade <andreas@kemnade.info>
+
+[ Upstream commit 31a6297b89aabc81b274c093a308a7f5b55081a7 ]
+
+Status is reported as always off in the 6032 case. Status
+reporting now matches the logic in the setters. Once of
+the differences to the 6030 is that there are no groups,
+therefore the state needs to be read out in the lower bits.
+
+Signed-off-by: Andreas Kemnade <andreas@kemnade.info>
+Link: https://lore.kernel.org/r/20221120221208.3093727-3-andreas@kemnade.info
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/regulator/twl6030-regulator.c | 15 +++++++++++----
+ 1 file changed, 11 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/regulator/twl6030-regulator.c b/drivers/regulator/twl6030-regulator.c
+index 7c7e3648ea4b..f3856750944f 100644
+--- a/drivers/regulator/twl6030-regulator.c
++++ b/drivers/regulator/twl6030-regulator.c
+@@ -67,6 +67,7 @@ struct twlreg_info {
+ #define TWL6030_CFG_STATE_SLEEP       0x03
+ #define TWL6030_CFG_STATE_GRP_SHIFT   5
+ #define TWL6030_CFG_STATE_APP_SHIFT   2
++#define TWL6030_CFG_STATE_MASK                0x03
+ #define TWL6030_CFG_STATE_APP_MASK    (0x03 << TWL6030_CFG_STATE_APP_SHIFT)
+ #define TWL6030_CFG_STATE_APP(v)      (((v) & TWL6030_CFG_STATE_APP_MASK) >>\
+                                               TWL6030_CFG_STATE_APP_SHIFT)
+@@ -128,13 +129,14 @@ static int twl6030reg_is_enabled(struct regulator_dev *rdev)
+               if (grp < 0)
+                       return grp;
+               grp &= P1_GRP_6030;
++              val = twlreg_read(info, TWL_MODULE_PM_RECEIVER, VREG_STATE);
++              val = TWL6030_CFG_STATE_APP(val);
+       } else {
++              val = twlreg_read(info, TWL_MODULE_PM_RECEIVER, VREG_STATE);
++              val &= TWL6030_CFG_STATE_MASK;
+               grp = 1;
+       }
+-      val = twlreg_read(info, TWL_MODULE_PM_RECEIVER, VREG_STATE);
+-      val = TWL6030_CFG_STATE_APP(val);
+-
+       return grp && (val == TWL6030_CFG_STATE_ON);
+ }
+@@ -187,7 +189,12 @@ static int twl6030reg_get_status(struct regulator_dev *rdev)
+       val = twlreg_read(info, TWL_MODULE_PM_RECEIVER, VREG_STATE);
+-      switch (TWL6030_CFG_STATE_APP(val)) {
++      if (info->features & TWL6032_SUBCLASS)
++              val &= TWL6030_CFG_STATE_MASK;
++      else
++              val = TWL6030_CFG_STATE_APP(val);
++
++      switch (val) {
+       case TWL6030_CFG_STATE_ON:
+               return REGULATOR_STATUS_NORMAL;
+-- 
+2.35.1
+
diff --git a/queue-5.10/rtc-check-return-value-from-mc146818_get_time.patch b/queue-5.10/rtc-check-return-value-from-mc146818_get_time.patch
new file mode 100644 (file)
index 0000000..7a3466f
--- /dev/null
@@ -0,0 +1,159 @@
+From 7a3b77a291c1fb84974f59d34a2d5cf044f52d8f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 10 Dec 2021 21:01:25 +0100
+Subject: rtc: Check return value from mc146818_get_time()
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Mateusz JoÅ„czyk <mat.jonczyk@o2.pl>
+
+[ Upstream commit 0dd8d6cb9eddfe637bcd821bbfd40ebd5a0737b9 ]
+
+There are 4 users of mc146818_get_time() and none of them was checking
+the return value from this function. Change this.
+
+Print the appropriate warnings in callers of mc146818_get_time() instead
+of in the function mc146818_get_time() itself, in order not to add
+strings to rtc-mc146818-lib.c, which is kind of a library.
+
+The callers of alpha_rtc_read_time() and cmos_read_time() may use the
+contents of (struct rtc_time *) even when the functions return a failure
+code. Therefore, set the contents of (struct rtc_time *) to 0x00,
+which looks more sensible then 0xff and aligns with the (possibly
+stale?) comment in cmos_read_time:
+
+       /*
+        * If pm_trace abused the RTC for storage, set the timespec to 0,
+        * which tells the caller that this RTC value is unusable.
+        */
+
+For consistency, do this in mc146818_get_time().
+
+Note: hpet_rtc_interrupt() may call mc146818_get_time() many times a
+second. It is very unlikely, though, that the RTC suddenly stops
+working and mc146818_get_time() would consistently fail.
+
+Only compile-tested on alpha.
+
+Signed-off-by: Mateusz JoÅ„czyk <mat.jonczyk@o2.pl>
+Cc: Richard Henderson <rth@twiddle.net>
+Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru>
+Cc: Matt Turner <mattst88@gmail.com>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: Ingo Molnar <mingo@redhat.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: Alessandro Zummo <a.zummo@towertech.it>
+Cc: Alexandre Belloni <alexandre.belloni@bootlin.com>
+Cc: linux-alpha@vger.kernel.org
+Cc: x86@kernel.org
+Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
+Link: https://lore.kernel.org/r/20211210200131.153887-4-mat.jonczyk@o2.pl
+Stable-dep-of: cd17420ebea5 ("rtc: cmos: avoid UIP when writing alarm time")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/alpha/kernel/rtc.c        | 7 ++++++-
+ arch/x86/kernel/hpet.c         | 8 ++++++--
+ drivers/base/power/trace.c     | 6 +++++-
+ drivers/rtc/rtc-cmos.c         | 9 ++++++++-
+ drivers/rtc/rtc-mc146818-lib.c | 2 +-
+ 5 files changed, 26 insertions(+), 6 deletions(-)
+
+diff --git a/arch/alpha/kernel/rtc.c b/arch/alpha/kernel/rtc.c
+index 1b1d5963ac55..48ffbfbd0624 100644
+--- a/arch/alpha/kernel/rtc.c
++++ b/arch/alpha/kernel/rtc.c
+@@ -80,7 +80,12 @@ init_rtc_epoch(void)
+ static int
+ alpha_rtc_read_time(struct device *dev, struct rtc_time *tm)
+ {
+-      mc146818_get_time(tm);
++      int ret = mc146818_get_time(tm);
++
++      if (ret < 0) {
++              dev_err_ratelimited(dev, "unable to read current time\n");
++              return ret;
++      }
+       /* Adjust for non-default epochs.  It's easier to depend on the
+          generic __get_rtc_time and adjust the epoch here than create
+diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
+index 4ab7a9757e52..574df24a8e5a 100644
+--- a/arch/x86/kernel/hpet.c
++++ b/arch/x86/kernel/hpet.c
+@@ -1325,8 +1325,12 @@ irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id)
+       hpet_rtc_timer_reinit();
+       memset(&curr_time, 0, sizeof(struct rtc_time));
+-      if (hpet_rtc_flags & (RTC_UIE | RTC_AIE))
+-              mc146818_get_time(&curr_time);
++      if (hpet_rtc_flags & (RTC_UIE | RTC_AIE)) {
++              if (unlikely(mc146818_get_time(&curr_time) < 0)) {
++                      pr_err_ratelimited("unable to read current time from RTC\n");
++                      return IRQ_HANDLED;
++              }
++      }
+       if (hpet_rtc_flags & RTC_UIE &&
+           curr_time.tm_sec != hpet_prev_update_sec) {
+diff --git a/drivers/base/power/trace.c b/drivers/base/power/trace.c
+index 94665037f4a3..72b7a92337b1 100644
+--- a/drivers/base/power/trace.c
++++ b/drivers/base/power/trace.c
+@@ -120,7 +120,11 @@ static unsigned int read_magic_time(void)
+       struct rtc_time time;
+       unsigned int val;
+-      mc146818_get_time(&time);
++      if (mc146818_get_time(&time) < 0) {
++              pr_err("Unable to read current time from RTC\n");
++              return 0;
++      }
++
+       pr_info("RTC time: %ptRt, date: %ptRd\n", &time, &time);
+       val = time.tm_year;                             /* 100 years */
+       if (val > 100)
+diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c
+index ed4f512eabf0..f8358bb2ae31 100644
+--- a/drivers/rtc/rtc-cmos.c
++++ b/drivers/rtc/rtc-cmos.c
+@@ -222,6 +222,8 @@ static inline void cmos_write_bank2(unsigned char val, unsigned char addr)
+ static int cmos_read_time(struct device *dev, struct rtc_time *t)
+ {
++      int ret;
++
+       /*
+        * If pm_trace abused the RTC for storage, set the timespec to 0,
+        * which tells the caller that this RTC value is unusable.
+@@ -229,7 +231,12 @@ static int cmos_read_time(struct device *dev, struct rtc_time *t)
+       if (!pm_trace_rtc_valid())
+               return -EIO;
+-      mc146818_get_time(t);
++      ret = mc146818_get_time(t);
++      if (ret < 0) {
++              dev_err_ratelimited(dev, "unable to read current time\n");
++              return ret;
++      }
++
+       return 0;
+ }
+diff --git a/drivers/rtc/rtc-mc146818-lib.c b/drivers/rtc/rtc-mc146818-lib.c
+index 6262f0680f13..3ae5c690f22b 100644
+--- a/drivers/rtc/rtc-mc146818-lib.c
++++ b/drivers/rtc/rtc-mc146818-lib.c
+@@ -24,7 +24,7 @@ unsigned int mc146818_get_time(struct rtc_time *time)
+       /* Ensure that the RTC is accessible. Bit 6 must be 0! */
+       if (WARN_ON_ONCE((CMOS_READ(RTC_VALID) & 0x40) != 0)) {
+               spin_unlock_irqrestore(&rtc_lock, flags);
+-              memset(time, 0xff, sizeof(*time));
++              memset(time, 0, sizeof(*time));
+               return -EIO;
+       }
+-- 
+2.35.1
+
diff --git a/queue-5.10/rtc-cmos-avoid-uip-when-reading-alarm-time.patch b/queue-5.10/rtc-cmos-avoid-uip-when-reading-alarm-time.patch
new file mode 100644 (file)
index 0000000..7d1705a
--- /dev/null
@@ -0,0 +1,146 @@
+From fcac9c587c7c5343a87cdce5d46cd47ca1057c21 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 10 Dec 2021 21:01:30 +0100
+Subject: rtc: cmos: avoid UIP when reading alarm time
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Mateusz JoÅ„czyk <mat.jonczyk@o2.pl>
+
+[ Upstream commit cdedc45c579faf8cc6608d3ef81576ee0d512aa4 ]
+
+Some Intel chipsets disconnect the time and date RTC registers when the
+clock update is in progress: during this time reads may return bogus
+values and writes fail silently. This includes the RTC alarm registers.
+[1]
+
+cmos_read_alarm() did not take account for that, which caused alarm time
+reads to sometimes return bogus values. This can be shown with a test
+patch that I am attaching to this patch series.
+
+Fix this, by using mc146818_avoid_UIP().
+
+[1] 7th Generation Intel Â® Processor Family I/O for U/Y Platforms [...]
+Datasheet, Volume 1 of 2 (Intel's Document Number: 334658-006)
+Page 208
+https://www.intel.com/content/dam/www/public/us/en/documents/datasheets/7th-and-8th-gen-core-family-mobile-u-y-processor-lines-i-o-datasheet-vol-1.pdf
+        "If a RAM read from the ten time and date bytes is attempted
+        during an update cycle, the value read do not necessarily
+        represent the true contents of those locations. Any RAM writes
+        under the same conditions are ignored."
+
+Signed-off-by: Mateusz JoÅ„czyk <mat.jonczyk@o2.pl>
+Cc: Alessandro Zummo <a.zummo@towertech.it>
+Cc: Alexandre Belloni <alexandre.belloni@bootlin.com>
+Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
+Link: https://lore.kernel.org/r/20211210200131.153887-9-mat.jonczyk@o2.pl
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/rtc/rtc-cmos.c | 72 ++++++++++++++++++++++++++++--------------
+ 1 file changed, 49 insertions(+), 23 deletions(-)
+
+diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c
+index 601e3967e1f0..d419eb988b22 100644
+--- a/drivers/rtc/rtc-cmos.c
++++ b/drivers/rtc/rtc-cmos.c
+@@ -249,10 +249,46 @@ static int cmos_set_time(struct device *dev, struct rtc_time *t)
+       return mc146818_set_time(t);
+ }
++struct cmos_read_alarm_callback_param {
++      struct cmos_rtc *cmos;
++      struct rtc_time *time;
++      unsigned char   rtc_control;
++};
++
++static void cmos_read_alarm_callback(unsigned char __always_unused seconds,
++                                   void *param_in)
++{
++      struct cmos_read_alarm_callback_param *p =
++              (struct cmos_read_alarm_callback_param *)param_in;
++      struct rtc_time *time = p->time;
++
++      time->tm_sec = CMOS_READ(RTC_SECONDS_ALARM);
++      time->tm_min = CMOS_READ(RTC_MINUTES_ALARM);
++      time->tm_hour = CMOS_READ(RTC_HOURS_ALARM);
++
++      if (p->cmos->day_alrm) {
++              /* ignore upper bits on readback per ACPI spec */
++              time->tm_mday = CMOS_READ(p->cmos->day_alrm) & 0x3f;
++              if (!time->tm_mday)
++                      time->tm_mday = -1;
++
++              if (p->cmos->mon_alrm) {
++                      time->tm_mon = CMOS_READ(p->cmos->mon_alrm);
++                      if (!time->tm_mon)
++                              time->tm_mon = -1;
++              }
++      }
++
++      p->rtc_control = CMOS_READ(RTC_CONTROL);
++}
++
+ static int cmos_read_alarm(struct device *dev, struct rtc_wkalrm *t)
+ {
+       struct cmos_rtc *cmos = dev_get_drvdata(dev);
+-      unsigned char   rtc_control;
++      struct cmos_read_alarm_callback_param p = {
++              .cmos = cmos,
++              .time = &t->time,
++      };
+       /* This not only a rtc_op, but also called directly */
+       if (!is_valid_irq(cmos->irq))
+@@ -263,28 +299,18 @@ static int cmos_read_alarm(struct device *dev, struct rtc_wkalrm *t)
+        * the future.
+        */
+-      spin_lock_irq(&rtc_lock);
+-      t->time.tm_sec = CMOS_READ(RTC_SECONDS_ALARM);
+-      t->time.tm_min = CMOS_READ(RTC_MINUTES_ALARM);
+-      t->time.tm_hour = CMOS_READ(RTC_HOURS_ALARM);
+-
+-      if (cmos->day_alrm) {
+-              /* ignore upper bits on readback per ACPI spec */
+-              t->time.tm_mday = CMOS_READ(cmos->day_alrm) & 0x3f;
+-              if (!t->time.tm_mday)
+-                      t->time.tm_mday = -1;
+-
+-              if (cmos->mon_alrm) {
+-                      t->time.tm_mon = CMOS_READ(cmos->mon_alrm);
+-                      if (!t->time.tm_mon)
+-                              t->time.tm_mon = -1;
+-              }
+-      }
+-
+-      rtc_control = CMOS_READ(RTC_CONTROL);
+-      spin_unlock_irq(&rtc_lock);
++      /* Some Intel chipsets disconnect the alarm registers when the clock
++       * update is in progress - during this time reads return bogus values
++       * and writes may fail silently. See for example "7th Generation Intel®
++       * Processor Family I/O for U/Y Platforms [...] Datasheet", section
++       * 27.7.1
++       *
++       * Use the mc146818_avoid_UIP() function to avoid this.
++       */
++      if (!mc146818_avoid_UIP(cmos_read_alarm_callback, &p))
++              return -EIO;
+-      if (!(rtc_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
++      if (!(p.rtc_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
+               if (((unsigned)t->time.tm_sec) < 0x60)
+                       t->time.tm_sec = bcd2bin(t->time.tm_sec);
+               else
+@@ -313,7 +339,7 @@ static int cmos_read_alarm(struct device *dev, struct rtc_wkalrm *t)
+               }
+       }
+-      t->enabled = !!(rtc_control & RTC_AIE);
++      t->enabled = !!(p.rtc_control & RTC_AIE);
+       t->pending = 0;
+       return 0;
+-- 
+2.35.1
+
diff --git a/queue-5.10/rtc-cmos-avoid-uip-when-writing-alarm-time.patch b/queue-5.10/rtc-cmos-avoid-uip-when-writing-alarm-time.patch
new file mode 100644 (file)
index 0000000..d47b334
--- /dev/null
@@ -0,0 +1,178 @@
+From 84f76456ea301647d1e114c5f17b16c62b8d588f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 10 Dec 2021 21:01:31 +0100
+Subject: rtc: cmos: avoid UIP when writing alarm time
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Mateusz JoÅ„czyk <mat.jonczyk@o2.pl>
+
+[ Upstream commit cd17420ebea580c22dd3a93f7237de3d2cfafc37 ]
+
+Some Intel chipsets disconnect the time and date RTC registers when the
+clock update is in progress: during this time reads may return bogus
+values and writes fail silently. This includes the RTC alarm registers.
+[1]
+
+cmos_set_alarm() did not take account for that, fix it.
+
+[1] 7th Generation Intel Â® Processor Family I/O for U/Y Platforms [...]
+Datasheet, Volume 1 of 2 (Intel's Document Number: 334658-006)
+Page 208
+https://www.intel.com/content/dam/www/public/us/en/documents/datasheets/7th-and-8th-gen-core-family-mobile-u-y-processor-lines-i-o-datasheet-vol-1.pdf
+        "If a RAM read from the ten time and date bytes is attempted
+        during an update cycle, the value read do not necessarily
+        represent the true contents of those locations. Any RAM writes
+        under the same conditions are ignored."
+
+Signed-off-by: Mateusz JoÅ„czyk <mat.jonczyk@o2.pl>
+Cc: Alessandro Zummo <a.zummo@towertech.it>
+Cc: Alexandre Belloni <alexandre.belloni@bootlin.com>
+Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
+Link: https://lore.kernel.org/r/20211210200131.153887-10-mat.jonczyk@o2.pl
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/rtc/rtc-cmos.c | 107 +++++++++++++++++++++++++----------------
+ 1 file changed, 66 insertions(+), 41 deletions(-)
+
+diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c
+index 93ffb9eaf63a..601e3967e1f0 100644
+--- a/drivers/rtc/rtc-cmos.c
++++ b/drivers/rtc/rtc-cmos.c
+@@ -444,10 +444,57 @@ static int cmos_validate_alarm(struct device *dev, struct rtc_wkalrm *t)
+       return 0;
+ }
++struct cmos_set_alarm_callback_param {
++      struct cmos_rtc *cmos;
++      unsigned char mon, mday, hrs, min, sec;
++      struct rtc_wkalrm *t;
++};
++
++/* Note: this function may be executed by mc146818_avoid_UIP() more then
++ *     once
++ */
++static void cmos_set_alarm_callback(unsigned char __always_unused seconds,
++                                  void *param_in)
++{
++      struct cmos_set_alarm_callback_param *p =
++              (struct cmos_set_alarm_callback_param *)param_in;
++
++      /* next rtc irq must not be from previous alarm setting */
++      cmos_irq_disable(p->cmos, RTC_AIE);
++
++      /* update alarm */
++      CMOS_WRITE(p->hrs, RTC_HOURS_ALARM);
++      CMOS_WRITE(p->min, RTC_MINUTES_ALARM);
++      CMOS_WRITE(p->sec, RTC_SECONDS_ALARM);
++
++      /* the system may support an "enhanced" alarm */
++      if (p->cmos->day_alrm) {
++              CMOS_WRITE(p->mday, p->cmos->day_alrm);
++              if (p->cmos->mon_alrm)
++                      CMOS_WRITE(p->mon, p->cmos->mon_alrm);
++      }
++
++      if (use_hpet_alarm()) {
++              /*
++               * FIXME the HPET alarm glue currently ignores day_alrm
++               * and mon_alrm ...
++               */
++              hpet_set_alarm_time(p->t->time.tm_hour, p->t->time.tm_min,
++                                  p->t->time.tm_sec);
++      }
++
++      if (p->t->enabled)
++              cmos_irq_enable(p->cmos, RTC_AIE);
++}
++
+ static int cmos_set_alarm(struct device *dev, struct rtc_wkalrm *t)
+ {
+       struct cmos_rtc *cmos = dev_get_drvdata(dev);
+-      unsigned char mon, mday, hrs, min, sec, rtc_control;
++      struct cmos_set_alarm_callback_param p = {
++              .cmos = cmos,
++              .t = t
++      };
++      unsigned char rtc_control;
+       int ret;
+       /* This not only a rtc_op, but also called directly */
+@@ -458,11 +505,11 @@ static int cmos_set_alarm(struct device *dev, struct rtc_wkalrm *t)
+       if (ret < 0)
+               return ret;
+-      mon = t->time.tm_mon + 1;
+-      mday = t->time.tm_mday;
+-      hrs = t->time.tm_hour;
+-      min = t->time.tm_min;
+-      sec = t->time.tm_sec;
++      p.mon = t->time.tm_mon + 1;
++      p.mday = t->time.tm_mday;
++      p.hrs = t->time.tm_hour;
++      p.min = t->time.tm_min;
++      p.sec = t->time.tm_sec;
+       spin_lock_irq(&rtc_lock);
+       rtc_control = CMOS_READ(RTC_CONTROL);
+@@ -470,43 +517,21 @@ static int cmos_set_alarm(struct device *dev, struct rtc_wkalrm *t)
+       if (!(rtc_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
+               /* Writing 0xff means "don't care" or "match all".  */
+-              mon = (mon <= 12) ? bin2bcd(mon) : 0xff;
+-              mday = (mday >= 1 && mday <= 31) ? bin2bcd(mday) : 0xff;
+-              hrs = (hrs < 24) ? bin2bcd(hrs) : 0xff;
+-              min = (min < 60) ? bin2bcd(min) : 0xff;
+-              sec = (sec < 60) ? bin2bcd(sec) : 0xff;
++              p.mon = (p.mon <= 12) ? bin2bcd(p.mon) : 0xff;
++              p.mday = (p.mday >= 1 && p.mday <= 31) ? bin2bcd(p.mday) : 0xff;
++              p.hrs = (p.hrs < 24) ? bin2bcd(p.hrs) : 0xff;
++              p.min = (p.min < 60) ? bin2bcd(p.min) : 0xff;
++              p.sec = (p.sec < 60) ? bin2bcd(p.sec) : 0xff;
+       }
+-      spin_lock_irq(&rtc_lock);
+-
+-      /* next rtc irq must not be from previous alarm setting */
+-      cmos_irq_disable(cmos, RTC_AIE);
+-
+-      /* update alarm */
+-      CMOS_WRITE(hrs, RTC_HOURS_ALARM);
+-      CMOS_WRITE(min, RTC_MINUTES_ALARM);
+-      CMOS_WRITE(sec, RTC_SECONDS_ALARM);
+-
+-      /* the system may support an "enhanced" alarm */
+-      if (cmos->day_alrm) {
+-              CMOS_WRITE(mday, cmos->day_alrm);
+-              if (cmos->mon_alrm)
+-                      CMOS_WRITE(mon, cmos->mon_alrm);
+-      }
+-
+-      if (use_hpet_alarm()) {
+-              /*
+-               * FIXME the HPET alarm glue currently ignores day_alrm
+-               * and mon_alrm ...
+-               */
+-              hpet_set_alarm_time(t->time.tm_hour, t->time.tm_min,
+-                                  t->time.tm_sec);
+-      }
+-
+-      if (t->enabled)
+-              cmos_irq_enable(cmos, RTC_AIE);
+-
+-      spin_unlock_irq(&rtc_lock);
++      /*
++       * Some Intel chipsets disconnect the alarm registers when the clock
++       * update is in progress - during this time writes fail silently.
++       *
++       * Use mc146818_avoid_UIP() to avoid this.
++       */
++      if (!mc146818_avoid_UIP(cmos_set_alarm_callback, &p))
++              return -EIO;
+       cmos->alarm_expires = rtc_tm_to_time64(&t->time);
+-- 
+2.35.1
+
diff --git a/queue-5.10/rtc-cmos-remove-stale-revisit-comments.patch b/queue-5.10/rtc-cmos-remove-stale-revisit-comments.patch
new file mode 100644 (file)
index 0000000..38784cc
--- /dev/null
@@ -0,0 +1,58 @@
+From 74d206aa5712c743dac13679d2cf585f3d88199d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 16 Jul 2021 23:04:37 +0200
+Subject: rtc: cmos: remove stale REVISIT comments
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Mateusz JoÅ„czyk <mat.jonczyk@o2.pl>
+
+[ Upstream commit e1aba37569f0aa9c993f740828871e48eea79f98 ]
+
+It appears mc146818_get_time() and mc146818_set_time() now correctly
+use the century register as specified in the ACPI FADT table. It is not
+clear what else could be done here.
+
+These comments were introduced by
+        commit 7be2c7c96aff ("[PATCH] RTC framework driver for CMOS RTCs")
+in 2007, which originally referenced function get_rtc_time() in
+include/asm-generic/rtc.h .
+
+Signed-off-by: Mateusz JoÅ„czyk <mat.jonczyk@o2.pl>
+Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
+Link: https://lore.kernel.org/r/20210716210437.29622-1-mat.jonczyk@o2.pl
+Stable-dep-of: cd17420ebea5 ("rtc: cmos: avoid UIP when writing alarm time")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/rtc/rtc-cmos.c | 8 +-------
+ 1 file changed, 1 insertion(+), 7 deletions(-)
+
+diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c
+index 8e8ce40f6440..ed4f512eabf0 100644
+--- a/drivers/rtc/rtc-cmos.c
++++ b/drivers/rtc/rtc-cmos.c
+@@ -229,19 +229,13 @@ static int cmos_read_time(struct device *dev, struct rtc_time *t)
+       if (!pm_trace_rtc_valid())
+               return -EIO;
+-      /* REVISIT:  if the clock has a "century" register, use
+-       * that instead of the heuristic in mc146818_get_time().
+-       * That'll make Y3K compatility (year > 2070) easy!
+-       */
+       mc146818_get_time(t);
+       return 0;
+ }
+ static int cmos_set_time(struct device *dev, struct rtc_time *t)
+ {
+-      /* REVISIT:  set the "century" register if available
+-       *
+-       * NOTE: this ignores the issue whereby updating the seconds
++      /* NOTE: this ignores the issue whereby updating the seconds
+        * takes effect exactly 500ms after we write the register.
+        * (Also queueing and other delays before we get this far.)
+        */
+-- 
+2.35.1
+
diff --git a/queue-5.10/rtc-cmos-replace-spin_lock_irqsave-with-spin_lock-in.patch b/queue-5.10/rtc-cmos-replace-spin_lock_irqsave-with-spin_lock-in.patch
new file mode 100644 (file)
index 0000000..7a65017
--- /dev/null
@@ -0,0 +1,49 @@
+From ad5b5459965ce741f5ad888fa23e74271b21b8c9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 3 Feb 2021 20:39:36 +0800
+Subject: rtc: cmos: Replace spin_lock_irqsave with spin_lock in hard IRQ
+
+From: Xiaofei Tan <tanxiaofei@huawei.com>
+
+[ Upstream commit 6950d046eb6eabbc271fda416460c05f7a85698a ]
+
+It is redundant to do irqsave and irqrestore in hardIRQ context, where
+it has been in a irq-disabled context.
+
+Signed-off-by: Xiaofei Tan <tanxiaofei@huawei.com>
+Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
+Link: https://lore.kernel.org/r/1612355981-6764-2-git-send-email-tanxiaofei@huawei.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/rtc/rtc-cmos.c | 5 ++---
+ 1 file changed, 2 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c
+index d419eb988b22..21f2bdd025b6 100644
+--- a/drivers/rtc/rtc-cmos.c
++++ b/drivers/rtc/rtc-cmos.c
+@@ -704,11 +704,10 @@ static struct cmos_rtc   cmos_rtc;
+ static irqreturn_t cmos_interrupt(int irq, void *p)
+ {
+-      unsigned long   flags;
+       u8              irqstat;
+       u8              rtc_control;
+-      spin_lock_irqsave(&rtc_lock, flags);
++      spin_lock(&rtc_lock);
+       /* When the HPET interrupt handler calls us, the interrupt
+        * status is passed as arg1 instead of the irq number.  But
+@@ -742,7 +741,7 @@ static irqreturn_t cmos_interrupt(int irq, void *p)
+                       hpet_mask_rtc_irq_bit(RTC_AIE);
+               CMOS_READ(RTC_INTR_FLAGS);
+       }
+-      spin_unlock_irqrestore(&rtc_lock, flags);
++      spin_unlock(&rtc_lock);
+       if (is_intr(irqstat)) {
+               rtc_update_irq(p, 1, irqstat);
+-- 
+2.35.1
+
diff --git a/queue-5.10/rtc-mc146818-detect-and-handle-broken-rtcs.patch b/queue-5.10/rtc-mc146818-detect-and-handle-broken-rtcs.patch
new file mode 100644 (file)
index 0000000..f3cbc3a
--- /dev/null
@@ -0,0 +1,76 @@
+From 9ef93cd13386ac610b618b84cd2cc715272b215e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 26 Jan 2021 18:02:11 +0100
+Subject: rtc: mc146818: Detect and handle broken RTCs
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+[ Upstream commit 211e5db19d15a721b2953ea54b8f26c2963720eb ]
+
+The recent fix for handling the UIP bit unearthed another issue in the RTC
+code. If the RTC is advertised but the readout is straight 0xFF because
+it's not available, the old code just proceeded with crappy values, but the
+new code hangs because it waits for the UIP bit to become low.
+
+Add a sanity check in the RTC CMOS probe function which reads the RTC_VALID
+register (Register D) which should have bit 0-6 cleared. If that's not the
+case then fail to register the CMOS.
+
+Add the same check to mc146818_get_time(), warn once when the condition
+is true and invalidate the rtc_time data.
+
+Reported-by: Mickaël Salaün <mic@digikod.net>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Tested-by: Mickaël Salaün <mic@linux.microsoft.com>
+Acked-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
+Link: https://lore.kernel.org/r/87tur3fx7w.fsf@nanos.tec.linutronix.de
+Stable-dep-of: cd17420ebea5 ("rtc: cmos: avoid UIP when writing alarm time")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/rtc/rtc-cmos.c         | 8 ++++++++
+ drivers/rtc/rtc-mc146818-lib.c | 7 +++++++
+ 2 files changed, 15 insertions(+)
+
+diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c
+index 58c6382a2807..cce4b62ffdd0 100644
+--- a/drivers/rtc/rtc-cmos.c
++++ b/drivers/rtc/rtc-cmos.c
+@@ -808,6 +808,14 @@ cmos_do_probe(struct device *dev, struct resource *ports, int rtc_irq)
+       spin_lock_irq(&rtc_lock);
++      /* Ensure that the RTC is accessible. Bit 0-6 must be 0! */
++      if ((CMOS_READ(RTC_VALID) & 0x7f) != 0) {
++              spin_unlock_irq(&rtc_lock);
++              dev_warn(dev, "not accessible\n");
++              retval = -ENXIO;
++              goto cleanup1;
++      }
++
+       if (!(flags & CMOS_RTC_FLAGS_NOFREQ)) {
+               /* force periodic irq to CMOS reset default of 1024Hz;
+                *
+diff --git a/drivers/rtc/rtc-mc146818-lib.c b/drivers/rtc/rtc-mc146818-lib.c
+index 8364e4141670..7f01dc41271d 100644
+--- a/drivers/rtc/rtc-mc146818-lib.c
++++ b/drivers/rtc/rtc-mc146818-lib.c
+@@ -21,6 +21,13 @@ unsigned int mc146818_get_time(struct rtc_time *time)
+ again:
+       spin_lock_irqsave(&rtc_lock, flags);
++      /* Ensure that the RTC is accessible. Bit 0-6 must be 0! */
++      if (WARN_ON_ONCE((CMOS_READ(RTC_VALID) & 0x7f) != 0)) {
++              spin_unlock_irqrestore(&rtc_lock, flags);
++              memset(time, 0xff, sizeof(*time));
++              return 0;
++      }
++
+       /*
+        * Check whether there is an update in progress during which the
+        * readout is unspecified. The maximum update time is ~2ms. Poll
+-- 
+2.35.1
+
diff --git a/queue-5.10/rtc-mc146818-dont-test-for-bit-0-5-in-register-d.patch b/queue-5.10/rtc-mc146818-dont-test-for-bit-0-5-in-register-d.patch
new file mode 100644 (file)
index 0000000..553d0f8
--- /dev/null
@@ -0,0 +1,65 @@
+From 4ef1e2e1376fa1358435ab952fe3ad29ae1082a6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 1 Feb 2021 20:24:17 +0100
+Subject: rtc: mc146818: Dont test for bit 0-5 in Register D
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+[ Upstream commit ebb22a05943666155e6da04407cc6e913974c78c ]
+
+The recent change to validate the RTC turned out to be overly tight.
+
+While it cures the problem on the reporters machine it breaks machines
+with Intel chipsets which use bit 0-5 of the D register. So check only
+for bit 6 being 0 which is the case on these Intel machines as well.
+
+Fixes: 211e5db19d15 ("rtc: mc146818: Detect and handle broken RTCs")
+Reported-by: Serge Belyshev <belyshev@depni.sinp.msu.ru>
+Reported-by: Dirk Gouders <dirk@gouders.net>
+Reported-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Tested-by: Dirk Gouders <dirk@gouders.net>
+Tested-by: Len Brown <len.brown@intel.com>
+Tested-by: Borislav Petkov <bp@suse.de>
+Acked-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
+Link: https://lore.kernel.org/r/87zh0nbnha.fsf@nanos.tec.linutronix.de
+Stable-dep-of: cd17420ebea5 ("rtc: cmos: avoid UIP when writing alarm time")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/rtc/rtc-cmos.c         | 4 ++--
+ drivers/rtc/rtc-mc146818-lib.c | 4 ++--
+ 2 files changed, 4 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c
+index cce4b62ffdd0..8e8ce40f6440 100644
+--- a/drivers/rtc/rtc-cmos.c
++++ b/drivers/rtc/rtc-cmos.c
+@@ -808,8 +808,8 @@ cmos_do_probe(struct device *dev, struct resource *ports, int rtc_irq)
+       spin_lock_irq(&rtc_lock);
+-      /* Ensure that the RTC is accessible. Bit 0-6 must be 0! */
+-      if ((CMOS_READ(RTC_VALID) & 0x7f) != 0) {
++      /* Ensure that the RTC is accessible. Bit 6 must be 0! */
++      if ((CMOS_READ(RTC_VALID) & 0x40) != 0) {
+               spin_unlock_irq(&rtc_lock);
+               dev_warn(dev, "not accessible\n");
+               retval = -ENXIO;
+diff --git a/drivers/rtc/rtc-mc146818-lib.c b/drivers/rtc/rtc-mc146818-lib.c
+index 7f01dc41271d..6ed2cd5d2bba 100644
+--- a/drivers/rtc/rtc-mc146818-lib.c
++++ b/drivers/rtc/rtc-mc146818-lib.c
+@@ -21,8 +21,8 @@ unsigned int mc146818_get_time(struct rtc_time *time)
+ again:
+       spin_lock_irqsave(&rtc_lock, flags);
+-      /* Ensure that the RTC is accessible. Bit 0-6 must be 0! */
+-      if (WARN_ON_ONCE((CMOS_READ(RTC_VALID) & 0x7f) != 0)) {
++      /* Ensure that the RTC is accessible. Bit 6 must be 0! */
++      if (WARN_ON_ONCE((CMOS_READ(RTC_VALID) & 0x40) != 0)) {
+               spin_unlock_irqrestore(&rtc_lock, flags);
+               memset(time, 0xff, sizeof(*time));
+               return 0;
+-- 
+2.35.1
+
diff --git a/queue-5.10/rtc-mc146818-lib-change-return-values-of-mc146818_ge.patch b/queue-5.10/rtc-mc146818-lib-change-return-values-of-mc146818_ge.patch
new file mode 100644 (file)
index 0000000..f764649
--- /dev/null
@@ -0,0 +1,51 @@
+From dcb86a8ba704e221a6401c14de2f0d35b725235f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 10 Dec 2021 21:01:24 +0100
+Subject: rtc: mc146818-lib: change return values of mc146818_get_time()
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Mateusz JoÅ„czyk <mat.jonczyk@o2.pl>
+
+[ Upstream commit d35786b3a28dee20b12962ae2dd365892a99ed1a ]
+
+No function is checking mc146818_get_time() return values yet, so
+correct them to make them more customary.
+
+Signed-off-by: Mateusz JoÅ„czyk <mat.jonczyk@o2.pl>
+Cc: Alessandro Zummo <a.zummo@towertech.it>
+Cc: Alexandre Belloni <alexandre.belloni@bootlin.com>
+Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
+Link: https://lore.kernel.org/r/20211210200131.153887-3-mat.jonczyk@o2.pl
+Stable-dep-of: cd17420ebea5 ("rtc: cmos: avoid UIP when writing alarm time")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/rtc/rtc-mc146818-lib.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/rtc/rtc-mc146818-lib.c b/drivers/rtc/rtc-mc146818-lib.c
+index 6ed2cd5d2bba..6262f0680f13 100644
+--- a/drivers/rtc/rtc-mc146818-lib.c
++++ b/drivers/rtc/rtc-mc146818-lib.c
+@@ -25,7 +25,7 @@ unsigned int mc146818_get_time(struct rtc_time *time)
+       if (WARN_ON_ONCE((CMOS_READ(RTC_VALID) & 0x40) != 0)) {
+               spin_unlock_irqrestore(&rtc_lock, flags);
+               memset(time, 0xff, sizeof(*time));
+-              return 0;
++              return -EIO;
+       }
+       /*
+@@ -116,7 +116,7 @@ unsigned int mc146818_get_time(struct rtc_time *time)
+       time->tm_mon--;
+-      return RTC_24H;
++      return 0;
+ }
+ EXPORT_SYMBOL_GPL(mc146818_get_time);
+-- 
+2.35.1
+
diff --git a/queue-5.10/rtc-mc146818-lib-extract-mc146818_avoid_uip.patch b/queue-5.10/rtc-mc146818-lib-extract-mc146818_avoid_uip.patch
new file mode 100644 (file)
index 0000000..508c8a6
--- /dev/null
@@ -0,0 +1,136 @@
+From 0f4373bd6d8f06e9d571ae76ea8f6be4d684344b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 10 Dec 2021 21:01:27 +0100
+Subject: rtc: mc146818-lib: extract mc146818_avoid_UIP
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Mateusz JoÅ„czyk <mat.jonczyk@o2.pl>
+
+[ Upstream commit ec5895c0f2d87b9bf4185db1915e40fa6fcfc0ac ]
+
+Function mc146818_get_time() contains an elaborate mechanism of reading
+the RTC time while no RTC update is in progress. It turns out that
+reading the RTC alarm clock also requires avoiding the RTC update.
+Therefore, the mechanism in mc146818_get_time() should be reused - so
+extract it into a separate function.
+
+The logic in mc146818_avoid_UIP() is same as in mc146818_get_time()
+except that after every
+
+        if (CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP) {
+
+there is now "mdelay(1)".
+
+To avoid producing a very unreadable patch, mc146818_get_time() will be
+refactored to use mc146818_avoid_UIP() in the next patch.
+
+Signed-off-by: Mateusz JoÅ„czyk <mat.jonczyk@o2.pl>
+Cc: Alessandro Zummo <a.zummo@towertech.it>
+Cc: Alexandre Belloni <alexandre.belloni@bootlin.com>
+Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
+Link: https://lore.kernel.org/r/20211210200131.153887-6-mat.jonczyk@o2.pl
+Stable-dep-of: cd17420ebea5 ("rtc: cmos: avoid UIP when writing alarm time")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/rtc/rtc-mc146818-lib.c | 70 ++++++++++++++++++++++++++++++++++
+ include/linux/mc146818rtc.h    |  3 ++
+ 2 files changed, 73 insertions(+)
+
+diff --git a/drivers/rtc/rtc-mc146818-lib.c b/drivers/rtc/rtc-mc146818-lib.c
+index 94df6056c5c0..46527a5d3912 100644
+--- a/drivers/rtc/rtc-mc146818-lib.c
++++ b/drivers/rtc/rtc-mc146818-lib.c
+@@ -8,6 +8,76 @@
+ #include <linux/acpi.h>
+ #endif
++/*
++ * Execute a function while the UIP (Update-in-progress) bit of the RTC is
++ * unset.
++ *
++ * Warning: callback may be executed more then once.
++ */
++bool mc146818_avoid_UIP(void (*callback)(unsigned char seconds, void *param),
++                      void *param)
++{
++      int i;
++      unsigned long flags;
++      unsigned char seconds;
++
++      for (i = 0; i < 10; i++) {
++              spin_lock_irqsave(&rtc_lock, flags);
++
++              /*
++               * Check whether there is an update in progress during which the
++               * readout is unspecified. The maximum update time is ~2ms. Poll
++               * every msec for completion.
++               *
++               * Store the second value before checking UIP so a long lasting
++               * NMI which happens to hit after the UIP check cannot make
++               * an update cycle invisible.
++               */
++              seconds = CMOS_READ(RTC_SECONDS);
++
++              if (CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP) {
++                      spin_unlock_irqrestore(&rtc_lock, flags);
++                      mdelay(1);
++                      continue;
++              }
++
++              /* Revalidate the above readout */
++              if (seconds != CMOS_READ(RTC_SECONDS)) {
++                      spin_unlock_irqrestore(&rtc_lock, flags);
++                      continue;
++              }
++
++              if (callback)
++                      callback(seconds, param);
++
++              /*
++               * Check for the UIP bit again. If it is set now then
++               * the above values may contain garbage.
++               */
++              if (CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP) {
++                      spin_unlock_irqrestore(&rtc_lock, flags);
++                      mdelay(1);
++                      continue;
++              }
++
++              /*
++               * A NMI might have interrupted the above sequence so check
++               * whether the seconds value has changed which indicates that
++               * the NMI took longer than the UIP bit was set. Unlikely, but
++               * possible and there is also virt...
++               */
++              if (seconds != CMOS_READ(RTC_SECONDS)) {
++                      spin_unlock_irqrestore(&rtc_lock, flags);
++                      continue;
++              }
++              spin_unlock_irqrestore(&rtc_lock, flags);
++
++              return true;
++      }
++      return false;
++}
++EXPORT_SYMBOL_GPL(mc146818_avoid_UIP);
++
+ /*
+  * If the UIP (Update-in-progress) bit of the RTC is set for more then
+  * 10ms, the RTC is apparently broken or not present.
+diff --git a/include/linux/mc146818rtc.h b/include/linux/mc146818rtc.h
+index c246ce191915..fb042e0e7d76 100644
+--- a/include/linux/mc146818rtc.h
++++ b/include/linux/mc146818rtc.h
+@@ -129,4 +129,7 @@ bool mc146818_does_rtc_work(void);
+ unsigned int mc146818_get_time(struct rtc_time *time);
+ int mc146818_set_time(struct rtc_time *time);
++bool mc146818_avoid_UIP(void (*callback)(unsigned char seconds, void *param),
++                      void *param);
++
+ #endif /* _MC146818RTC_H */
+-- 
+2.35.1
+
diff --git a/queue-5.10/rtc-mc146818-lib-fix-rtc-presence-check.patch b/queue-5.10/rtc-mc146818-lib-fix-rtc-presence-check.patch
new file mode 100644 (file)
index 0000000..75a8874
--- /dev/null
@@ -0,0 +1,168 @@
+From 453e0ae9b265b8eb55cb8d8d60e86f4757a919b8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 10 Dec 2021 21:01:26 +0100
+Subject: rtc: mc146818-lib: fix RTC presence check
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Mateusz JoÅ„czyk <mat.jonczyk@o2.pl>
+
+[ Upstream commit ea6fa4961aab8f90a8aa03575a98b4bda368d4b6 ]
+
+To prevent an infinite loop in mc146818_get_time(),
+commit 211e5db19d15 ("rtc: mc146818: Detect and handle broken RTCs")
+added a check for RTC availability. Together with a later fix, it
+checked if bit 6 in register 0x0d is cleared.
+
+This, however, caused a false negative on a motherboard with an AMD
+SB710 southbridge; according to the specification [1], bit 6 of register
+0x0d of this chipset is a scratchbit. This caused a regression in Linux
+5.11 - the RTC was determined broken by the kernel and not used by
+rtc-cmos.c [3]. This problem was also reported in Fedora [4].
+
+As a better alternative, check whether the UIP ("Update-in-progress")
+bit is set for longer then 10ms. If that is the case, then apparently
+the RTC is either absent (and all register reads return 0xff) or broken.
+Also limit the number of loop iterations in mc146818_get_time() to 10 to
+prevent an infinite loop there.
+
+The functions mc146818_get_time() and mc146818_does_rtc_work() will be
+refactored later in this patch series, in order to fix a separate
+problem with reading / setting the RTC alarm time. This is done so to
+avoid a confusion about what is being fixed when.
+
+In a previous approach to this problem, I implemented a check whether
+the RTC_HOURS register contains a value <= 24. This, however, sometimes
+did not work correctly on my Intel Kaby Lake laptop. According to
+Intel's documentation [2], "the time and date RAM locations (0-9) are
+disconnected from the external bus" during the update cycle so reading
+this register without checking the UIP bit is incorrect.
+
+[1] AMD SB700/710/750 Register Reference Guide, page 308,
+https://developer.amd.com/wordpress/media/2012/10/43009_sb7xx_rrg_pub_1.00.pdf
+
+[2] 7th Generation Intel Â® Processor Family I/O for U/Y Platforms [...] Datasheet
+Volume 1 of 2, page 209
+Intel's Document Number: 334658-006,
+https://www.intel.com/content/dam/www/public/us/en/documents/datasheets/7th-and-8th-gen-core-family-mobile-u-y-processor-lines-i-o-datasheet-vol-1.pdf
+
+[3] Functions in arch/x86/kernel/rtc.c apparently were using it.
+
+[4] https://bugzilla.redhat.com/show_bug.cgi?id=1936688
+
+Fixes: 211e5db19d15 ("rtc: mc146818: Detect and handle broken RTCs")
+Fixes: ebb22a059436 ("rtc: mc146818: Dont test for bit 0-5 in Register D")
+Signed-off-by: Mateusz JoÅ„czyk <mat.jonczyk@o2.pl>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: Alessandro Zummo <a.zummo@towertech.it>
+Cc: Alexandre Belloni <alexandre.belloni@bootlin.com>
+Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
+Link: https://lore.kernel.org/r/20211210200131.153887-5-mat.jonczyk@o2.pl
+Stable-dep-of: cd17420ebea5 ("rtc: cmos: avoid UIP when writing alarm time")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/rtc/rtc-cmos.c         | 10 ++++------
+ drivers/rtc/rtc-mc146818-lib.c | 34 ++++++++++++++++++++++++++++++----
+ include/linux/mc146818rtc.h    |  1 +
+ 3 files changed, 35 insertions(+), 10 deletions(-)
+
+diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c
+index f8358bb2ae31..93ffb9eaf63a 100644
+--- a/drivers/rtc/rtc-cmos.c
++++ b/drivers/rtc/rtc-cmos.c
+@@ -807,16 +807,14 @@ cmos_do_probe(struct device *dev, struct resource *ports, int rtc_irq)
+       rename_region(ports, dev_name(&cmos_rtc.rtc->dev));
+-      spin_lock_irq(&rtc_lock);
+-
+-      /* Ensure that the RTC is accessible. Bit 6 must be 0! */
+-      if ((CMOS_READ(RTC_VALID) & 0x40) != 0) {
+-              spin_unlock_irq(&rtc_lock);
+-              dev_warn(dev, "not accessible\n");
++      if (!mc146818_does_rtc_work()) {
++              dev_warn(dev, "broken or not accessible\n");
+               retval = -ENXIO;
+               goto cleanup1;
+       }
++      spin_lock_irq(&rtc_lock);
++
+       if (!(flags & CMOS_RTC_FLAGS_NOFREQ)) {
+               /* force periodic irq to CMOS reset default of 1024Hz;
+                *
+diff --git a/drivers/rtc/rtc-mc146818-lib.c b/drivers/rtc/rtc-mc146818-lib.c
+index 3ae5c690f22b..94df6056c5c0 100644
+--- a/drivers/rtc/rtc-mc146818-lib.c
++++ b/drivers/rtc/rtc-mc146818-lib.c
+@@ -8,10 +8,36 @@
+ #include <linux/acpi.h>
+ #endif
++/*
++ * If the UIP (Update-in-progress) bit of the RTC is set for more then
++ * 10ms, the RTC is apparently broken or not present.
++ */
++bool mc146818_does_rtc_work(void)
++{
++      int i;
++      unsigned char val;
++      unsigned long flags;
++
++      for (i = 0; i < 10; i++) {
++              spin_lock_irqsave(&rtc_lock, flags);
++              val = CMOS_READ(RTC_FREQ_SELECT);
++              spin_unlock_irqrestore(&rtc_lock, flags);
++
++              if ((val & RTC_UIP) == 0)
++                      return true;
++
++              mdelay(1);
++      }
++
++      return false;
++}
++EXPORT_SYMBOL_GPL(mc146818_does_rtc_work);
++
+ unsigned int mc146818_get_time(struct rtc_time *time)
+ {
+       unsigned char ctrl;
+       unsigned long flags;
++      unsigned int iter_count = 0;
+       unsigned char century = 0;
+       bool retry;
+@@ -20,13 +46,13 @@ unsigned int mc146818_get_time(struct rtc_time *time)
+ #endif
+ again:
+-      spin_lock_irqsave(&rtc_lock, flags);
+-      /* Ensure that the RTC is accessible. Bit 6 must be 0! */
+-      if (WARN_ON_ONCE((CMOS_READ(RTC_VALID) & 0x40) != 0)) {
+-              spin_unlock_irqrestore(&rtc_lock, flags);
++      if (iter_count > 10) {
+               memset(time, 0, sizeof(*time));
+               return -EIO;
+       }
++      iter_count++;
++
++      spin_lock_irqsave(&rtc_lock, flags);
+       /*
+        * Check whether there is an update in progress during which the
+diff --git a/include/linux/mc146818rtc.h b/include/linux/mc146818rtc.h
+index 1e0205811394..c246ce191915 100644
+--- a/include/linux/mc146818rtc.h
++++ b/include/linux/mc146818rtc.h
+@@ -125,6 +125,7 @@ struct cmos_rtc_board_info {
+ #define RTC_IO_EXTENT_USED      RTC_IO_EXTENT
+ #endif /* ARCH_RTC_LOCATION */
++bool mc146818_does_rtc_work(void);
+ unsigned int mc146818_get_time(struct rtc_time *time);
+ int mc146818_set_time(struct rtc_time *time);
+-- 
+2.35.1
+
diff --git a/queue-5.10/rtc-mc146818-prevent-reading-garbage.patch b/queue-5.10/rtc-mc146818-prevent-reading-garbage.patch
new file mode 100644 (file)
index 0000000..9742058
--- /dev/null
@@ -0,0 +1,165 @@
+From fff644f7c81182036f455c49f0d146c802e4ee08 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 6 Dec 2020 22:46:14 +0100
+Subject: rtc: mc146818: Prevent reading garbage
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+[ Upstream commit 05a0302c35481e9b47fb90ba40922b0a4cae40d8 ]
+
+The MC146818 driver is prone to read garbage from the RTC. There are
+several issues all related to the update cycle of the MC146818. The chip
+increments seconds obviously once per second and indicates that by a bit in
+a register. The bit goes high 244us before the actual update starts. During
+the update the readout of the time values is undefined.
+
+The code just checks whether the update in progress bit (UIP) is set before
+reading the clock. If it's set it waits arbitrary 20ms before retrying,
+which is ample because the maximum update time is ~2ms.
+
+But this check does not guarantee that the UIP bit goes high and the actual
+update happens during the readout. So the following can happen
+
+ 0.997                UIP = False
+   -> Interrupt/NMI/preemption
+ 0.998        UIP -> True
+ 0.999        Readout  <- Undefined
+
+To prevent this rework the code so it checks UIP before and after the
+readout and if set after the readout try again.
+
+But that's not enough to cover the following:
+
+ 0.997                UIP = False
+              Readout seconds
+   -> NMI (or vCPU scheduled out)
+ 0.998        UIP -> True
+              update completes
+              UIP -> False
+ 1.000        Readout  minutes,....
+              UIP check succeeds
+
+That can make the readout wrong up to 59 seconds.
+
+To prevent this, read the seconds value before the first UIP check,
+validate it after checking UIP and after reading out the rest.
+
+It's amazing that the original i386 code had this actually correct and
+the generic implementation of the MC146818 driver got it wrong in 2002 and
+it stayed that way until today.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Acked-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
+Link: https://lore.kernel.org/r/20201206220541.594826678@linutronix.de
+Stable-dep-of: cd17420ebea5 ("rtc: cmos: avoid UIP when writing alarm time")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/rtc/rtc-mc146818-lib.c | 64 +++++++++++++++++++++-------------
+ 1 file changed, 39 insertions(+), 25 deletions(-)
+
+diff --git a/drivers/rtc/rtc-mc146818-lib.c b/drivers/rtc/rtc-mc146818-lib.c
+index b036ff33fbe6..8364e4141670 100644
+--- a/drivers/rtc/rtc-mc146818-lib.c
++++ b/drivers/rtc/rtc-mc146818-lib.c
+@@ -8,41 +8,41 @@
+ #include <linux/acpi.h>
+ #endif
+-/*
+- * Returns true if a clock update is in progress
+- */
+-static inline unsigned char mc146818_is_updating(void)
+-{
+-      unsigned char uip;
+-      unsigned long flags;
+-
+-      spin_lock_irqsave(&rtc_lock, flags);
+-      uip = (CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP);
+-      spin_unlock_irqrestore(&rtc_lock, flags);
+-      return uip;
+-}
+-
+ unsigned int mc146818_get_time(struct rtc_time *time)
+ {
+       unsigned char ctrl;
+       unsigned long flags;
+       unsigned char century = 0;
++      bool retry;
+ #ifdef CONFIG_MACH_DECSTATION
+       unsigned int real_year;
+ #endif
++again:
++      spin_lock_irqsave(&rtc_lock, flags);
+       /*
+-       * read RTC once any update in progress is done. The update
+-       * can take just over 2ms. We wait 20ms. There is no need to
+-       * to poll-wait (up to 1s - eeccch) for the falling edge of RTC_UIP.
+-       * If you need to know *exactly* when a second has started, enable
+-       * periodic update complete interrupts, (via ioctl) and then
+-       * immediately read /dev/rtc which will block until you get the IRQ.
+-       * Once the read clears, read the RTC time (again via ioctl). Easy.
++       * Check whether there is an update in progress during which the
++       * readout is unspecified. The maximum update time is ~2ms. Poll
++       * every msec for completion.
++       *
++       * Store the second value before checking UIP so a long lasting NMI
++       * which happens to hit after the UIP check cannot make an update
++       * cycle invisible.
+        */
+-      if (mc146818_is_updating())
+-              mdelay(20);
++      time->tm_sec = CMOS_READ(RTC_SECONDS);
++
++      if (CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP) {
++              spin_unlock_irqrestore(&rtc_lock, flags);
++              mdelay(1);
++              goto again;
++      }
++
++      /* Revalidate the above readout */
++      if (time->tm_sec != CMOS_READ(RTC_SECONDS)) {
++              spin_unlock_irqrestore(&rtc_lock, flags);
++              goto again;
++      }
+       /*
+        * Only the values that we read from the RTC are set. We leave
+@@ -50,8 +50,6 @@ unsigned int mc146818_get_time(struct rtc_time *time)
+        * RTC has RTC_DAY_OF_WEEK, we ignore it, as it is only updated
+        * by the RTC when initially set to a non-zero value.
+        */
+-      spin_lock_irqsave(&rtc_lock, flags);
+-      time->tm_sec = CMOS_READ(RTC_SECONDS);
+       time->tm_min = CMOS_READ(RTC_MINUTES);
+       time->tm_hour = CMOS_READ(RTC_HOURS);
+       time->tm_mday = CMOS_READ(RTC_DAY_OF_MONTH);
+@@ -66,8 +64,24 @@ unsigned int mc146818_get_time(struct rtc_time *time)
+               century = CMOS_READ(acpi_gbl_FADT.century);
+ #endif
+       ctrl = CMOS_READ(RTC_CONTROL);
++      /*
++       * Check for the UIP bit again. If it is set now then
++       * the above values may contain garbage.
++       */
++      retry = CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP;
++      /*
++       * A NMI might have interrupted the above sequence so check whether
++       * the seconds value has changed which indicates that the NMI took
++       * longer than the UIP bit was set. Unlikely, but possible and
++       * there is also virt...
++       */
++      retry |= time->tm_sec != CMOS_READ(RTC_SECONDS);
++
+       spin_unlock_irqrestore(&rtc_lock, flags);
++      if (retry)
++              goto again;
++
+       if (!(ctrl & RTC_DM_BINARY) || RTC_ALWAYS_BCD)
+       {
+               time->tm_sec = bcd2bin(time->tm_sec);
+-- 
+2.35.1
+
diff --git a/queue-5.10/rtc-mc146818-reduce-spinlock-section-in-mc146818_set.patch b/queue-5.10/rtc-mc146818-reduce-spinlock-section-in-mc146818_set.patch
new file mode 100644 (file)
index 0000000..239656f
--- /dev/null
@@ -0,0 +1,54 @@
+From 454cde155ee62939e82a056e9fbb2bb5ab8190e8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 6 Dec 2020 22:46:15 +0100
+Subject: rtc: mc146818: Reduce spinlock section in mc146818_set_time()
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+[ Upstream commit dcf257e92622ba0e25fdc4b6699683e7ae67e2a1 ]
+
+No need to hold the lock and disable interrupts for doing math.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Acked-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
+Link: https://lore.kernel.org/r/20201206220541.709243630@linutronix.de
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/rtc/rtc-mc146818-lib.c | 6 ++----
+ 1 file changed, 2 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/rtc/rtc-mc146818-lib.c b/drivers/rtc/rtc-mc146818-lib.c
+index 46527a5d3912..1ca866461d10 100644
+--- a/drivers/rtc/rtc-mc146818-lib.c
++++ b/drivers/rtc/rtc-mc146818-lib.c
+@@ -249,7 +249,6 @@ int mc146818_set_time(struct rtc_time *time)
+       if (yrs > 255)  /* They are unsigned */
+               return -EINVAL;
+-      spin_lock_irqsave(&rtc_lock, flags);
+ #ifdef CONFIG_MACH_DECSTATION
+       real_yrs = yrs;
+       leap_yr = ((!((yrs + 1900) % 4) && ((yrs + 1900) % 100)) ||
+@@ -278,10 +277,8 @@ int mc146818_set_time(struct rtc_time *time)
+       /* These limits and adjustments are independent of
+        * whether the chip is in binary mode or not.
+        */
+-      if (yrs > 169) {
+-              spin_unlock_irqrestore(&rtc_lock, flags);
++      if (yrs > 169)
+               return -EINVAL;
+-      }
+       if (yrs >= 100)
+               yrs -= 100;
+@@ -297,6 +294,7 @@ int mc146818_set_time(struct rtc_time *time)
+               century = bin2bcd(century);
+       }
++      spin_lock_irqsave(&rtc_lock, flags);
+       save_control = CMOS_READ(RTC_CONTROL);
+       CMOS_WRITE((save_control|RTC_SET), RTC_CONTROL);
+       save_freq_select = CMOS_READ(RTC_FREQ_SELECT);
+-- 
+2.35.1
+
diff --git a/queue-5.10/series b/queue-5.10/series
new file mode 100644 (file)
index 0000000..ac2bfc8
--- /dev/null
@@ -0,0 +1,45 @@
+mm-mlock-remove-lru_lock-on-testclearpagemlocked.patch
+mm-mlock-remove-__munlock_isolate_lru_page.patch
+mm-lru-introduce-testclearpagelru.patch
+mm-compaction-do-page-isolation-first-in-compaction.patch
+mm-vmscan-__isolate_lru_page_prepare-cleanup.patch
+mm-__isolate_lru_page_prepare-in-isolate_migratepage.patch
+mm-migrate-fix-thp-s-mapcount-on-isolation.patch
+arm64-dts-rockchip-keep-i2s1-disabled-for-gpio-funct.patch
+arm-dts-rockchip-fix-node-name-for-hym8563-rtc.patch
+arm-dts-rockchip-fix-ir-receiver-node-names.patch
+arm64-dts-rockchip-fix-ir-receiver-node-names.patch
+arm-dts-rockchip-rk3188-fix-lcdc1-rgb24-node-name.patch
+arm-9251-1-perf-fix-stacktraces-for-tracepoint-event.patch
+arm-9266-1-mm-fix-no-mmu-zero_page-implementation.patch
+asoc-wm8962-wait-for-updated-value-of-wm8962_clockin.patch
+arm-dts-rockchip-disable-arm_global_timer-on-rk3066-.patch
+9p-fd-use-p9_hdrsz-for-header-size.patch
+regulator-slg51000-wait-after-asserting-cs-pin.patch
+alsa-seq-fix-function-prototype-mismatch-in-snd_seq_.patch
+btrfs-send-avoid-unaligned-encoded-writes-when-attem.patch
+asoc-soc-pcm-add-null-check-in-be-reparenting.patch
+regulator-twl6030-fix-get-status-of-twl6032-regulato.patch
+fbcon-use-kzalloc-in-fbcon_prepare_logo.patch
+usb-dwc3-gadget-disable-gusb2phycfg.susphy-for-end-t.patch
+9p-xen-check-logical-size-for-buffer-size.patch
+net-usb-qmi_wwan-add-u-blox-0x1342-composition.patch
+mm-khugepaged-take-the-right-locks-for-page-table-re.patch
+mm-khugepaged-fix-gup-fast-interaction-by-sending-ip.patch
+mm-khugepaged-invoke-mmu-notifiers-in-shmem-file-col.patch
+rtc-mc146818-prevent-reading-garbage.patch
+rtc-mc146818-detect-and-handle-broken-rtcs.patch
+rtc-mc146818-dont-test-for-bit-0-5-in-register-d.patch
+rtc-cmos-remove-stale-revisit-comments.patch
+rtc-mc146818-lib-change-return-values-of-mc146818_ge.patch
+rtc-check-return-value-from-mc146818_get_time.patch
+rtc-mc146818-lib-fix-rtc-presence-check.patch
+rtc-mc146818-lib-extract-mc146818_avoid_uip.patch
+rtc-cmos-avoid-uip-when-writing-alarm-time.patch
+rtc-cmos-avoid-uip-when-reading-alarm-time.patch
+rtc-cmos-replace-spin_lock_irqsave-with-spin_lock-in.patch
+rtc-mc146818-reduce-spinlock-section-in-mc146818_set.patch
+xen-netback-ensure-protocol-headers-don-t-fall-in-th.patch
+xen-netback-do-some-code-cleanup.patch
+xen-netback-don-t-call-kfree_skb-with-interrupts-dis.patch
+media-videobuf2-core-take-mmap_lock-in-vb2_get_unmap.patch
diff --git a/queue-5.10/usb-dwc3-gadget-disable-gusb2phycfg.susphy-for-end-t.patch b/queue-5.10/usb-dwc3-gadget-disable-gusb2phycfg.susphy-for-end-t.patch
new file mode 100644 (file)
index 0000000..e5ae827
--- /dev/null
@@ -0,0 +1,47 @@
+From 7b5d9450baa4cd437176ba3ecc980430f344197c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 9 Nov 2022 17:58:50 -0800
+Subject: usb: dwc3: gadget: Disable GUSB2PHYCFG.SUSPHY for End Transfer
+
+From: Thinh Nguyen <Thinh.Nguyen@synopsys.com>
+
+[ Upstream commit 3aa07f72894d209fcf922ad686cbb28cf005aaad ]
+
+If there's a disconnection while operating in eSS, there may be a delay
+in VBUS drop response from the connector. In that case, the internal
+link state may drop to operate in usb2 speed while the controller thinks
+the VBUS is still high. The driver must make sure to disable
+GUSB2PHYCFG.SUSPHY when sending endpoint command while in usb2 speed.
+The End Transfer command may be called, and only that command needs to
+go through at this point. Let's keep it simple and unconditionally
+disable GUSB2PHYCFG.SUSPHY whenever we issue the command.
+
+This scenario is not seen in real hardware. In a rare case, our
+prototype type-c controller/interface may have a slow response
+triggerring this issue.
+
+Signed-off-by: Thinh Nguyen <Thinh.Nguyen@synopsys.com>
+Link: https://lore.kernel.org/r/5651117207803c26e2f22ddf4e5ce9e865dcf7c7.1668045468.git.Thinh.Nguyen@synopsys.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/usb/dwc3/gadget.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
+index a9a43d649478..28a1194f849f 100644
+--- a/drivers/usb/dwc3/gadget.c
++++ b/drivers/usb/dwc3/gadget.c
+@@ -291,7 +291,8 @@ int dwc3_send_gadget_ep_cmd(struct dwc3_ep *dep, unsigned int cmd,
+        *
+        * DWC_usb3 3.30a and DWC_usb31 1.90a programming guide section 3.2.2
+        */
+-      if (dwc->gadget->speed <= USB_SPEED_HIGH) {
++      if (dwc->gadget->speed <= USB_SPEED_HIGH ||
++          DWC3_DEPCMD_CMD(cmd) == DWC3_DEPCMD_ENDTRANSFER) {
+               reg = dwc3_readl(dwc->regs, DWC3_GUSB2PHYCFG(0));
+               if (unlikely(reg & DWC3_GUSB2PHYCFG_SUSPHY)) {
+                       saved_config |= DWC3_GUSB2PHYCFG_SUSPHY;
+-- 
+2.35.1
+
diff --git a/queue-5.10/xen-netback-do-some-code-cleanup.patch b/queue-5.10/xen-netback-do-some-code-cleanup.patch
new file mode 100644 (file)
index 0000000..98f77fd
--- /dev/null
@@ -0,0 +1,147 @@
+From 603ae7055010466a085118b324f735574c43aa56 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 8 Jun 2022 06:37:26 +0200
+Subject: xen/netback: do some code cleanup
+
+From: Juergen Gross <jgross@suse.com>
+
+[ Upstream commit 5834e72eda0b7e5767eb107259d98eef19ebd11f ]
+
+Remove some unused macros and functions, make local functions static.
+
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Acked-by: Wei Liu <wei.liu@kernel.org>
+Link: https://lore.kernel.org/r/20220608043726.9380-1-jgross@suse.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Stable-dep-of: 74e7e1efdad4 ("xen/netback: don't call kfree_skb() with interrupts disabled")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/xen-netback/common.h    | 12 ------------
+ drivers/net/xen-netback/interface.c | 16 +---------------
+ drivers/net/xen-netback/netback.c   |  4 +++-
+ drivers/net/xen-netback/rx.c        |  2 +-
+ 4 files changed, 5 insertions(+), 29 deletions(-)
+
+diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h
+index 6a9178896c90..945647128c0e 100644
+--- a/drivers/net/xen-netback/common.h
++++ b/drivers/net/xen-netback/common.h
+@@ -48,7 +48,6 @@
+ #include <linux/debugfs.h>
+ typedef unsigned int pending_ring_idx_t;
+-#define INVALID_PENDING_RING_IDX (~0U)
+ struct pending_tx_info {
+       struct xen_netif_tx_request req; /* tx request */
+@@ -82,8 +81,6 @@ struct xenvif_rx_meta {
+ /* Discriminate from any valid pending_idx value. */
+ #define INVALID_PENDING_IDX 0xFFFF
+-#define MAX_BUFFER_OFFSET XEN_PAGE_SIZE
+-
+ #define MAX_PENDING_REQS XEN_NETIF_TX_RING_SIZE
+ /* The maximum number of frags is derived from the size of a grant (same
+@@ -367,11 +364,6 @@ void xenvif_free(struct xenvif *vif);
+ int xenvif_xenbus_init(void);
+ void xenvif_xenbus_fini(void);
+-int xenvif_schedulable(struct xenvif *vif);
+-
+-int xenvif_queue_stopped(struct xenvif_queue *queue);
+-void xenvif_wake_queue(struct xenvif_queue *queue);
+-
+ /* (Un)Map communication rings. */
+ void xenvif_unmap_frontend_data_rings(struct xenvif_queue *queue);
+ int xenvif_map_frontend_data_rings(struct xenvif_queue *queue,
+@@ -394,7 +386,6 @@ int xenvif_dealloc_kthread(void *data);
+ irqreturn_t xenvif_ctrl_irq_fn(int irq, void *data);
+ bool xenvif_have_rx_work(struct xenvif_queue *queue, bool test_kthread);
+-void xenvif_rx_action(struct xenvif_queue *queue);
+ void xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb);
+ void xenvif_carrier_on(struct xenvif *vif);
+@@ -402,9 +393,6 @@ void xenvif_carrier_on(struct xenvif *vif);
+ /* Callback from stack when TX packet can be released */
+ void xenvif_zerocopy_callback(struct ubuf_info *ubuf, bool zerocopy_success);
+-/* Unmap a pending page and release it back to the guest */
+-void xenvif_idx_unmap(struct xenvif_queue *queue, u16 pending_idx);
+-
+ static inline pending_ring_idx_t nr_pending_reqs(struct xenvif_queue *queue)
+ {
+       return MAX_PENDING_REQS -
+diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c
+index 7ce9807fc24c..645a804ab788 100644
+--- a/drivers/net/xen-netback/interface.c
++++ b/drivers/net/xen-netback/interface.c
+@@ -70,7 +70,7 @@ void xenvif_skb_zerocopy_complete(struct xenvif_queue *queue)
+       wake_up(&queue->dealloc_wq);
+ }
+-int xenvif_schedulable(struct xenvif *vif)
++static int xenvif_schedulable(struct xenvif *vif)
+ {
+       return netif_running(vif->dev) &&
+               test_bit(VIF_STATUS_CONNECTED, &vif->status) &&
+@@ -178,20 +178,6 @@ irqreturn_t xenvif_interrupt(int irq, void *dev_id)
+       return IRQ_HANDLED;
+ }
+-int xenvif_queue_stopped(struct xenvif_queue *queue)
+-{
+-      struct net_device *dev = queue->vif->dev;
+-      unsigned int id = queue->id;
+-      return netif_tx_queue_stopped(netdev_get_tx_queue(dev, id));
+-}
+-
+-void xenvif_wake_queue(struct xenvif_queue *queue)
+-{
+-      struct net_device *dev = queue->vif->dev;
+-      unsigned int id = queue->id;
+-      netif_tx_wake_queue(netdev_get_tx_queue(dev, id));
+-}
+-
+ static u16 xenvif_select_queue(struct net_device *dev, struct sk_buff *skb,
+                              struct net_device *sb_dev)
+ {
+diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
+index 06fd61b71d37..fed0f7458e18 100644
+--- a/drivers/net/xen-netback/netback.c
++++ b/drivers/net/xen-netback/netback.c
+@@ -112,6 +112,8 @@ static void make_tx_response(struct xenvif_queue *queue,
+                            s8       st);
+ static void push_tx_responses(struct xenvif_queue *queue);
++static void xenvif_idx_unmap(struct xenvif_queue *queue, u16 pending_idx);
++
+ static inline int tx_work_todo(struct xenvif_queue *queue);
+ static inline unsigned long idx_to_pfn(struct xenvif_queue *queue,
+@@ -1440,7 +1442,7 @@ static void push_tx_responses(struct xenvif_queue *queue)
+               notify_remote_via_irq(queue->tx_irq);
+ }
+-void xenvif_idx_unmap(struct xenvif_queue *queue, u16 pending_idx)
++static void xenvif_idx_unmap(struct xenvif_queue *queue, u16 pending_idx)
+ {
+       int ret;
+       struct gnttab_unmap_grant_ref tx_unmap_op;
+diff --git a/drivers/net/xen-netback/rx.c b/drivers/net/xen-netback/rx.c
+index a0335407be42..932762177110 100644
+--- a/drivers/net/xen-netback/rx.c
++++ b/drivers/net/xen-netback/rx.c
+@@ -486,7 +486,7 @@ static void xenvif_rx_skb(struct xenvif_queue *queue)
+ #define RX_BATCH_SIZE 64
+-void xenvif_rx_action(struct xenvif_queue *queue)
++static void xenvif_rx_action(struct xenvif_queue *queue)
+ {
+       struct sk_buff_head completed_skbs;
+       unsigned int work_done = 0;
+-- 
+2.35.1
+
diff --git a/queue-5.10/xen-netback-don-t-call-kfree_skb-with-interrupts-dis.patch b/queue-5.10/xen-netback-don-t-call-kfree_skb-with-interrupts-dis.patch
new file mode 100644 (file)
index 0000000..0410277
--- /dev/null
@@ -0,0 +1,105 @@
+From 6156d032440fa7a7dcdd503d76769bc4f8389d4a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 6 Dec 2022 08:54:24 +0100
+Subject: xen/netback: don't call kfree_skb() with interrupts disabled
+
+From: Juergen Gross <jgross@suse.com>
+
+[ Upstream commit 74e7e1efdad45580cc3839f2a155174cf158f9b5 ]
+
+It is not allowed to call kfree_skb() from hardware interrupt
+context or with interrupts being disabled. So remove kfree_skb()
+from the spin_lock_irqsave() section and use the already existing
+"drop" label in xenvif_start_xmit() for dropping the SKB. At the
+same time replace the dev_kfree_skb() call there with a call of
+dev_kfree_skb_any(), as xenvif_start_xmit() can be called with
+disabled interrupts.
+
+This is XSA-424 / CVE-2022-42328 / CVE-2022-42329.
+
+Fixes: be81992f9086 ("xen/netback: don't queue unlimited number of packages")
+Reported-by: Yang Yingliang <yangyingliang@huawei.com>
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/xen-netback/common.h    | 2 +-
+ drivers/net/xen-netback/interface.c | 6 ++++--
+ drivers/net/xen-netback/rx.c        | 8 +++++---
+ 3 files changed, 10 insertions(+), 6 deletions(-)
+
+diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h
+index 945647128c0e..1ba974969216 100644
+--- a/drivers/net/xen-netback/common.h
++++ b/drivers/net/xen-netback/common.h
+@@ -386,7 +386,7 @@ int xenvif_dealloc_kthread(void *data);
+ irqreturn_t xenvif_ctrl_irq_fn(int irq, void *data);
+ bool xenvif_have_rx_work(struct xenvif_queue *queue, bool test_kthread);
+-void xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb);
++bool xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb);
+ void xenvif_carrier_on(struct xenvif *vif);
+diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c
+index 645a804ab788..97cf5bc48902 100644
+--- a/drivers/net/xen-netback/interface.c
++++ b/drivers/net/xen-netback/interface.c
+@@ -255,14 +255,16 @@ xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev)
+       if (vif->hash.alg == XEN_NETIF_CTRL_HASH_ALGORITHM_NONE)
+               skb_clear_hash(skb);
+-      xenvif_rx_queue_tail(queue, skb);
++      if (!xenvif_rx_queue_tail(queue, skb))
++              goto drop;
++
+       xenvif_kick_thread(queue);
+       return NETDEV_TX_OK;
+  drop:
+       vif->dev->stats.tx_dropped++;
+-      dev_kfree_skb(skb);
++      dev_kfree_skb_any(skb);
+       return NETDEV_TX_OK;
+ }
+diff --git a/drivers/net/xen-netback/rx.c b/drivers/net/xen-netback/rx.c
+index 932762177110..0ba754ebc5ba 100644
+--- a/drivers/net/xen-netback/rx.c
++++ b/drivers/net/xen-netback/rx.c
+@@ -82,9 +82,10 @@ static bool xenvif_rx_ring_slots_available(struct xenvif_queue *queue)
+       return false;
+ }
+-void xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb)
++bool xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb)
+ {
+       unsigned long flags;
++      bool ret = true;
+       spin_lock_irqsave(&queue->rx_queue.lock, flags);
+@@ -92,8 +93,7 @@ void xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb)
+               struct net_device *dev = queue->vif->dev;
+               netif_tx_stop_queue(netdev_get_tx_queue(dev, queue->id));
+-              kfree_skb(skb);
+-              queue->vif->dev->stats.rx_dropped++;
++              ret = false;
+       } else {
+               if (skb_queue_empty(&queue->rx_queue))
+                       xenvif_update_needed_slots(queue, skb);
+@@ -104,6 +104,8 @@ void xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb)
+       }
+       spin_unlock_irqrestore(&queue->rx_queue.lock, flags);
++
++      return ret;
+ }
+ static struct sk_buff *xenvif_rx_dequeue(struct xenvif_queue *queue)
+-- 
+2.35.1
+
diff --git a/queue-5.10/xen-netback-ensure-protocol-headers-don-t-fall-in-th.patch b/queue-5.10/xen-netback-ensure-protocol-headers-don-t-fall-in-th.patch
new file mode 100644 (file)
index 0000000..b8c4123
--- /dev/null
@@ -0,0 +1,390 @@
+From 18dc22277fdfe02b0cfaa39d8737dc56df6a13b9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 22 Nov 2022 09:16:59 +0000
+Subject: xen/netback: Ensure protocol headers don't fall in the non-linear
+ area
+
+From: Ross Lagerwall <ross.lagerwall@citrix.com>
+
+[ Upstream commit ad7f402ae4f466647c3a669b8a6f3e5d4271c84a ]
+
+In some cases, the frontend may send a packet where the protocol headers
+are spread across multiple slots. This would result in netback creating
+an skb where the protocol headers spill over into the non-linear area.
+Some drivers and NICs don't handle this properly resulting in an
+interface reset or worse.
+
+This issue was introduced by the removal of an unconditional skb pull in
+the tx path to improve performance.  Fix this without reintroducing the
+pull by setting up grant copy ops for as many slots as needed to reach
+the XEN_NETBACK_TX_COPY_LEN size. Adjust the rest of the code to handle
+multiple copy operations per skb.
+
+This is XSA-423 / CVE-2022-3643.
+
+Fixes: 7e5d7753956b ("xen-netback: remove unconditional __pskb_pull_tail() in guest Tx path")
+Signed-off-by: Ross Lagerwall <ross.lagerwall@citrix.com>
+Reviewed-by: Paul Durrant <paul@xen.org>
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/xen-netback/netback.c | 223 ++++++++++++++++--------------
+ 1 file changed, 123 insertions(+), 100 deletions(-)
+
+diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
+index b0cbc7fead74..06fd61b71d37 100644
+--- a/drivers/net/xen-netback/netback.c
++++ b/drivers/net/xen-netback/netback.c
+@@ -330,10 +330,13 @@ static int xenvif_count_requests(struct xenvif_queue *queue,
+ struct xenvif_tx_cb {
+-      u16 pending_idx;
++      u16 copy_pending_idx[XEN_NETBK_LEGACY_SLOTS_MAX + 1];
++      u8 copy_count;
+ };
+ #define XENVIF_TX_CB(skb) ((struct xenvif_tx_cb *)(skb)->cb)
++#define copy_pending_idx(skb, i) (XENVIF_TX_CB(skb)->copy_pending_idx[i])
++#define copy_count(skb) (XENVIF_TX_CB(skb)->copy_count)
+ static inline void xenvif_tx_create_map_op(struct xenvif_queue *queue,
+                                          u16 pending_idx,
+@@ -368,31 +371,93 @@ static inline struct sk_buff *xenvif_alloc_skb(unsigned int size)
+       return skb;
+ }
+-static struct gnttab_map_grant_ref *xenvif_get_requests(struct xenvif_queue *queue,
+-                                                      struct sk_buff *skb,
+-                                                      struct xen_netif_tx_request *txp,
+-                                                      struct gnttab_map_grant_ref *gop,
+-                                                      unsigned int frag_overflow,
+-                                                      struct sk_buff *nskb)
++static void xenvif_get_requests(struct xenvif_queue *queue,
++                              struct sk_buff *skb,
++                              struct xen_netif_tx_request *first,
++                              struct xen_netif_tx_request *txfrags,
++                              unsigned *copy_ops,
++                              unsigned *map_ops,
++                              unsigned int frag_overflow,
++                              struct sk_buff *nskb,
++                              unsigned int extra_count,
++                              unsigned int data_len)
+ {
+       struct skb_shared_info *shinfo = skb_shinfo(skb);
+       skb_frag_t *frags = shinfo->frags;
+-      u16 pending_idx = XENVIF_TX_CB(skb)->pending_idx;
+-      int start;
++      u16 pending_idx;
+       pending_ring_idx_t index;
+       unsigned int nr_slots;
++      struct gnttab_copy *cop = queue->tx_copy_ops + *copy_ops;
++      struct gnttab_map_grant_ref *gop = queue->tx_map_ops + *map_ops;
++      struct xen_netif_tx_request *txp = first;
++
++      nr_slots = shinfo->nr_frags + 1;
++
++      copy_count(skb) = 0;
++
++      /* Create copy ops for exactly data_len bytes into the skb head. */
++      __skb_put(skb, data_len);
++      while (data_len > 0) {
++              int amount = data_len > txp->size ? txp->size : data_len;
++
++              cop->source.u.ref = txp->gref;
++              cop->source.domid = queue->vif->domid;
++              cop->source.offset = txp->offset;
++
++              cop->dest.domid = DOMID_SELF;
++              cop->dest.offset = (offset_in_page(skb->data +
++                                                 skb_headlen(skb) -
++                                                 data_len)) & ~XEN_PAGE_MASK;
++              cop->dest.u.gmfn = virt_to_gfn(skb->data + skb_headlen(skb)
++                                             - data_len);
++
++              cop->len = amount;
++              cop->flags = GNTCOPY_source_gref;
+-      nr_slots = shinfo->nr_frags;
++              index = pending_index(queue->pending_cons);
++              pending_idx = queue->pending_ring[index];
++              callback_param(queue, pending_idx).ctx = NULL;
++              copy_pending_idx(skb, copy_count(skb)) = pending_idx;
++              copy_count(skb)++;
++
++              cop++;
++              data_len -= amount;
+-      /* Skip first skb fragment if it is on same page as header fragment. */
+-      start = (frag_get_pending_idx(&shinfo->frags[0]) == pending_idx);
++              if (amount == txp->size) {
++                      /* The copy op covered the full tx_request */
++
++                      memcpy(&queue->pending_tx_info[pending_idx].req,
++                             txp, sizeof(*txp));
++                      queue->pending_tx_info[pending_idx].extra_count =
++                              (txp == first) ? extra_count : 0;
++
++                      if (txp == first)
++                              txp = txfrags;
++                      else
++                              txp++;
++                      queue->pending_cons++;
++                      nr_slots--;
++              } else {
++                      /* The copy op partially covered the tx_request.
++                       * The remainder will be mapped.
++                       */
++                      txp->offset += amount;
++                      txp->size -= amount;
++              }
++      }
+-      for (shinfo->nr_frags = start; shinfo->nr_frags < nr_slots;
+-           shinfo->nr_frags++, txp++, gop++) {
++      for (shinfo->nr_frags = 0; shinfo->nr_frags < nr_slots;
++           shinfo->nr_frags++, gop++) {
+               index = pending_index(queue->pending_cons++);
+               pending_idx = queue->pending_ring[index];
+-              xenvif_tx_create_map_op(queue, pending_idx, txp, 0, gop);
++              xenvif_tx_create_map_op(queue, pending_idx, txp,
++                                      txp == first ? extra_count : 0, gop);
+               frag_set_pending_idx(&frags[shinfo->nr_frags], pending_idx);
++
++              if (txp == first)
++                      txp = txfrags;
++              else
++                      txp++;
+       }
+       if (frag_overflow) {
+@@ -413,7 +478,8 @@ static struct gnttab_map_grant_ref *xenvif_get_requests(struct xenvif_queue *que
+               skb_shinfo(skb)->frag_list = nskb;
+       }
+-      return gop;
++      (*copy_ops) = cop - queue->tx_copy_ops;
++      (*map_ops) = gop - queue->tx_map_ops;
+ }
+ static inline void xenvif_grant_handle_set(struct xenvif_queue *queue,
+@@ -449,7 +515,7 @@ static int xenvif_tx_check_gop(struct xenvif_queue *queue,
+                              struct gnttab_copy **gopp_copy)
+ {
+       struct gnttab_map_grant_ref *gop_map = *gopp_map;
+-      u16 pending_idx = XENVIF_TX_CB(skb)->pending_idx;
++      u16 pending_idx;
+       /* This always points to the shinfo of the skb being checked, which
+        * could be either the first or the one on the frag_list
+        */
+@@ -460,24 +526,37 @@ static int xenvif_tx_check_gop(struct xenvif_queue *queue,
+       struct skb_shared_info *first_shinfo = NULL;
+       int nr_frags = shinfo->nr_frags;
+       const bool sharedslot = nr_frags &&
+-                              frag_get_pending_idx(&shinfo->frags[0]) == pending_idx;
++                              frag_get_pending_idx(&shinfo->frags[0]) ==
++                                  copy_pending_idx(skb, copy_count(skb) - 1);
+       int i, err;
+-      /* Check status of header. */
+-      err = (*gopp_copy)->status;
+-      if (unlikely(err)) {
+-              if (net_ratelimit())
+-                      netdev_dbg(queue->vif->dev,
+-                                 "Grant copy of header failed! status: %d pending_idx: %u ref: %u\n",
+-                                 (*gopp_copy)->status,
+-                                 pending_idx,
+-                                 (*gopp_copy)->source.u.ref);
+-              /* The first frag might still have this slot mapped */
+-              if (!sharedslot)
+-                      xenvif_idx_release(queue, pending_idx,
+-                                         XEN_NETIF_RSP_ERROR);
++      for (i = 0; i < copy_count(skb); i++) {
++              int newerr;
++
++              /* Check status of header. */
++              pending_idx = copy_pending_idx(skb, i);
++
++              newerr = (*gopp_copy)->status;
++              if (likely(!newerr)) {
++                      /* The first frag might still have this slot mapped */
++                      if (i < copy_count(skb) - 1 || !sharedslot)
++                              xenvif_idx_release(queue, pending_idx,
++                                                 XEN_NETIF_RSP_OKAY);
++              } else {
++                      err = newerr;
++                      if (net_ratelimit())
++                              netdev_dbg(queue->vif->dev,
++                                         "Grant copy of header failed! status: %d pending_idx: %u ref: %u\n",
++                                         (*gopp_copy)->status,
++                                         pending_idx,
++                                         (*gopp_copy)->source.u.ref);
++                      /* The first frag might still have this slot mapped */
++                      if (i < copy_count(skb) - 1 || !sharedslot)
++                              xenvif_idx_release(queue, pending_idx,
++                                                 XEN_NETIF_RSP_ERROR);
++              }
++              (*gopp_copy)++;
+       }
+-      (*gopp_copy)++;
+ check_frags:
+       for (i = 0; i < nr_frags; i++, gop_map++) {
+@@ -524,14 +603,6 @@ static int xenvif_tx_check_gop(struct xenvif_queue *queue,
+               if (err)
+                       continue;
+-              /* First error: if the header haven't shared a slot with the
+-               * first frag, release it as well.
+-               */
+-              if (!sharedslot)
+-                      xenvif_idx_release(queue,
+-                                         XENVIF_TX_CB(skb)->pending_idx,
+-                                         XEN_NETIF_RSP_OKAY);
+-
+               /* Invalidate preceding fragments of this skb. */
+               for (j = 0; j < i; j++) {
+                       pending_idx = frag_get_pending_idx(&shinfo->frags[j]);
+@@ -801,7 +872,6 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue,
+                                    unsigned *copy_ops,
+                                    unsigned *map_ops)
+ {
+-      struct gnttab_map_grant_ref *gop = queue->tx_map_ops;
+       struct sk_buff *skb, *nskb;
+       int ret;
+       unsigned int frag_overflow;
+@@ -883,8 +953,12 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue,
+                       continue;
+               }
++              data_len = (txreq.size > XEN_NETBACK_TX_COPY_LEN) ?
++                      XEN_NETBACK_TX_COPY_LEN : txreq.size;
++
+               ret = xenvif_count_requests(queue, &txreq, extra_count,
+                                           txfrags, work_to_do);
++
+               if (unlikely(ret < 0))
+                       break;
+@@ -910,9 +984,8 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue,
+               index = pending_index(queue->pending_cons);
+               pending_idx = queue->pending_ring[index];
+-              data_len = (txreq.size > XEN_NETBACK_TX_COPY_LEN &&
+-                          ret < XEN_NETBK_LEGACY_SLOTS_MAX) ?
+-                      XEN_NETBACK_TX_COPY_LEN : txreq.size;
++              if (ret >= XEN_NETBK_LEGACY_SLOTS_MAX - 1 && data_len < txreq.size)
++                      data_len = txreq.size;
+               skb = xenvif_alloc_skb(data_len);
+               if (unlikely(skb == NULL)) {
+@@ -923,8 +996,6 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue,
+               }
+               skb_shinfo(skb)->nr_frags = ret;
+-              if (data_len < txreq.size)
+-                      skb_shinfo(skb)->nr_frags++;
+               /* At this point shinfo->nr_frags is in fact the number of
+                * slots, which can be as large as XEN_NETBK_LEGACY_SLOTS_MAX.
+                */
+@@ -986,54 +1057,19 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue,
+                                            type);
+               }
+-              XENVIF_TX_CB(skb)->pending_idx = pending_idx;
+-
+-              __skb_put(skb, data_len);
+-              queue->tx_copy_ops[*copy_ops].source.u.ref = txreq.gref;
+-              queue->tx_copy_ops[*copy_ops].source.domid = queue->vif->domid;
+-              queue->tx_copy_ops[*copy_ops].source.offset = txreq.offset;
+-
+-              queue->tx_copy_ops[*copy_ops].dest.u.gmfn =
+-                      virt_to_gfn(skb->data);
+-              queue->tx_copy_ops[*copy_ops].dest.domid = DOMID_SELF;
+-              queue->tx_copy_ops[*copy_ops].dest.offset =
+-                      offset_in_page(skb->data) & ~XEN_PAGE_MASK;
+-
+-              queue->tx_copy_ops[*copy_ops].len = data_len;
+-              queue->tx_copy_ops[*copy_ops].flags = GNTCOPY_source_gref;
+-
+-              (*copy_ops)++;
+-
+-              if (data_len < txreq.size) {
+-                      frag_set_pending_idx(&skb_shinfo(skb)->frags[0],
+-                                           pending_idx);
+-                      xenvif_tx_create_map_op(queue, pending_idx, &txreq,
+-                                              extra_count, gop);
+-                      gop++;
+-              } else {
+-                      frag_set_pending_idx(&skb_shinfo(skb)->frags[0],
+-                                           INVALID_PENDING_IDX);
+-                      memcpy(&queue->pending_tx_info[pending_idx].req,
+-                             &txreq, sizeof(txreq));
+-                      queue->pending_tx_info[pending_idx].extra_count =
+-                              extra_count;
+-              }
+-
+-              queue->pending_cons++;
+-
+-              gop = xenvif_get_requests(queue, skb, txfrags, gop,
+-                                        frag_overflow, nskb);
++              xenvif_get_requests(queue, skb, &txreq, txfrags, copy_ops,
++                                  map_ops, frag_overflow, nskb, extra_count,
++                                  data_len);
+               __skb_queue_tail(&queue->tx_queue, skb);
+               queue->tx.req_cons = idx;
+-              if (((gop-queue->tx_map_ops) >= ARRAY_SIZE(queue->tx_map_ops)) ||
++              if ((*map_ops >= ARRAY_SIZE(queue->tx_map_ops)) ||
+                   (*copy_ops >= ARRAY_SIZE(queue->tx_copy_ops)))
+                       break;
+       }
+-      (*map_ops) = gop - queue->tx_map_ops;
+       return;
+ }
+@@ -1112,9 +1148,8 @@ static int xenvif_tx_submit(struct xenvif_queue *queue)
+       while ((skb = __skb_dequeue(&queue->tx_queue)) != NULL) {
+               struct xen_netif_tx_request *txp;
+               u16 pending_idx;
+-              unsigned data_len;
+-              pending_idx = XENVIF_TX_CB(skb)->pending_idx;
++              pending_idx = copy_pending_idx(skb, 0);
+               txp = &queue->pending_tx_info[pending_idx].req;
+               /* Check the remap error code. */
+@@ -1133,18 +1168,6 @@ static int xenvif_tx_submit(struct xenvif_queue *queue)
+                       continue;
+               }
+-              data_len = skb->len;
+-              callback_param(queue, pending_idx).ctx = NULL;
+-              if (data_len < txp->size) {
+-                      /* Append the packet payload as a fragment. */
+-                      txp->offset += data_len;
+-                      txp->size -= data_len;
+-              } else {
+-                      /* Schedule a response immediately. */
+-                      xenvif_idx_release(queue, pending_idx,
+-                                         XEN_NETIF_RSP_OKAY);
+-              }
+-
+               if (txp->flags & XEN_NETTXF_csum_blank)
+                       skb->ip_summed = CHECKSUM_PARTIAL;
+               else if (txp->flags & XEN_NETTXF_data_validated)
+@@ -1330,7 +1353,7 @@ static inline void xenvif_tx_dealloc_action(struct xenvif_queue *queue)
+ /* Called after netfront has transmitted */
+ int xenvif_tx_action(struct xenvif_queue *queue, int budget)
+ {
+-      unsigned nr_mops, nr_cops = 0;
++      unsigned nr_mops = 0, nr_cops = 0;
+       int work_done, ret;
+       if (unlikely(!tx_work_todo(queue)))
+-- 
+2.35.1
+