--- /dev/null
+From 1a3e1f40962c445b997151a542314f3c6097f8c3 Mon Sep 17 00:00:00 2001
+From: Johannes Weiner <hannes@cmpxchg.org>
+Date: Thu, 6 Aug 2020 23:20:45 -0700
+Subject: mm: memcontrol: decouple reference counting from page accounting
+
+From: Johannes Weiner <hannes@cmpxchg.org>
+
+commit 1a3e1f40962c445b997151a542314f3c6097f8c3 upstream.
+
+The reference counting of a memcg is currently coupled directly to how
+many 4k pages are charged to it. This doesn't work well with Roman's new
+slab controller, which maintains pools of objects and doesn't want to keep
+an extra balance sheet for the pages backing those objects.
+
+This unusual refcounting design (reference counts usually track pointers
+to an object) is only for historical reasons: memcg used to not take any
+css references and simply stalled offlining until all charges had been
+reparented and the page counters had dropped to zero. When we got rid of
+the reparenting requirement, the simple mechanical translation was to take
+a reference for every charge.
+
+More historical context can be found in commit e8ea14cc6ead ("mm:
+memcontrol: take a css reference for each charged page"), commit
+64f219938941 ("mm: memcontrol: remove obsolete kmemcg pinning tricks") and
+commit b2052564e66d ("mm: memcontrol: continue cache reclaim from offlined
+groups").
+
+The new slab controller exposes the limitations in this scheme, so let's
+switch it to a more idiomatic reference counting model based on actual
+kernel pointers to the memcg:
+
+- The per-cpu stock holds a reference to the memcg its caching
+
+- User pages hold a reference for their page->mem_cgroup. Transparent
+ huge pages will no longer acquire tail references in advance, we'll
+ get them if needed during the split.
+
+- Kernel pages hold a reference for their page->mem_cgroup
+
+- Pages allocated in the root cgroup will acquire and release css
+ references for simplicity. css_get() and css_put() optimize that.
+
+- The current memcg_charge_slab() already hacked around the per-charge
+ references; this change gets rid of that as well.
+
+- tcp accounting will handle reference in mem_cgroup_sk_{alloc,free}
+
+Roman:
+1) Rebased on top of the current mm tree: added css_get() in
+ mem_cgroup_charge(), dropped mem_cgroup_try_charge() part
+2) I've reformatted commit references in the commit log to make
+ checkpatch.pl happy.
+
+[hughd@google.com: remove css_put_many() from __mem_cgroup_clear_mc()]
+ Link: http://lkml.kernel.org/r/alpine.LSU.2.11.2007302011450.2347@eggly.anvils
+
+Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
+Signed-off-by: Roman Gushchin <guro@fb.com>
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Reviewed-by: Shakeel Butt <shakeelb@google.com>
+Acked-by: Roman Gushchin <guro@fb.com>
+Acked-by: Michal Hocko <mhocko@suse.com>
+Cc: Christoph Lameter <cl@linux.com>
+Cc: Tejun Heo <tj@kernel.org>
+Cc: Vlastimil Babka <vbabka@suse.cz>
+Link: http://lkml.kernel.org/r/20200623174037.3951353-6-guro@fb.com
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Fixes: cdec2e4265df ("memcg: coalesce charging via percpu storage")
+Signed-off-by: GONG, Ruiqi <gongruiqi1@huawei.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/memcontrol.c | 6 ++++++
+ 1 file changed, 6 insertions(+)
+
+--- a/mm/memcontrol.c
++++ b/mm/memcontrol.c
+@@ -2214,6 +2214,9 @@ static void drain_stock(struct memcg_sto
+ {
+ struct mem_cgroup *old = stock->cached;
+
++ if (!old)
++ return;
++
+ if (stock->nr_pages) {
+ page_counter_uncharge(&old->memory, stock->nr_pages);
+ if (do_memsw_account())
+@@ -2221,6 +2224,8 @@ static void drain_stock(struct memcg_sto
+ css_put_many(&old->css, stock->nr_pages);
+ stock->nr_pages = 0;
+ }
++
++ css_put(&old->css);
+ stock->cached = NULL;
+ }
+
+@@ -2256,6 +2261,7 @@ static void refill_stock(struct mem_cgro
+ stock = this_cpu_ptr(&memcg_stock);
+ if (stock->cached != memcg) { /* reset if necessary */
+ drain_stock(stock);
++ css_get(&memcg->css);
+ stock->cached = memcg;
+ }
+ stock->nr_pages += nr_pages;
--- /dev/null
+From 5bc09b397cbf1221f8a8aacb1152650c9195b02b Mon Sep 17 00:00:00 2001
+From: Ryusuke Konishi <konishi.ryusuke@gmail.com>
+Date: Sun, 4 Feb 2024 01:16:45 +0900
+Subject: nilfs2: fix potential bug in end_buffer_async_write
+
+From: Ryusuke Konishi <konishi.ryusuke@gmail.com>
+
+commit 5bc09b397cbf1221f8a8aacb1152650c9195b02b upstream.
+
+According to a syzbot report, end_buffer_async_write(), which handles the
+completion of block device writes, may detect abnormal condition of the
+buffer async_write flag and cause a BUG_ON failure when using nilfs2.
+
+Nilfs2 itself does not use end_buffer_async_write(). But, the async_write
+flag is now used as a marker by commit 7f42ec394156 ("nilfs2: fix issue
+with race condition of competition between segments for dirty blocks") as
+a means of resolving double list insertion of dirty blocks in
+nilfs_lookup_dirty_data_buffers() and nilfs_lookup_node_buffers() and the
+resulting crash.
+
+This modification is safe as long as it is used for file data and b-tree
+node blocks where the page caches are independent. However, it was
+irrelevant and redundant to also introduce async_write for segment summary
+and super root blocks that share buffers with the backing device. This
+led to the possibility that the BUG_ON check in end_buffer_async_write
+would fail as described above, if independent writebacks of the backing
+device occurred in parallel.
+
+The use of async_write for segment summary buffers has already been
+removed in a previous change.
+
+Fix this issue by removing the manipulation of the async_write flag for
+the remaining super root block buffer.
+
+Link: https://lkml.kernel.org/r/20240203161645.4992-1-konishi.ryusuke@gmail.com
+Fixes: 7f42ec394156 ("nilfs2: fix issue with race condition of competition between segments for dirty blocks")
+Signed-off-by: Ryusuke Konishi <konishi.ryusuke@gmail.com>
+Reported-by: syzbot+5c04210f7c7f897c1e7f@syzkaller.appspotmail.com
+Closes: https://lkml.kernel.org/r/00000000000019a97c05fd42f8c8@google.com
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/nilfs2/segment.c | 8 +++++---
+ 1 file changed, 5 insertions(+), 3 deletions(-)
+
+--- a/fs/nilfs2/segment.c
++++ b/fs/nilfs2/segment.c
+@@ -1702,7 +1702,6 @@ static void nilfs_segctor_prepare_write(
+
+ list_for_each_entry(bh, &segbuf->sb_payload_buffers,
+ b_assoc_buffers) {
+- set_buffer_async_write(bh);
+ if (bh == segbuf->sb_super_root) {
+ if (bh->b_page != bd_page) {
+ lock_page(bd_page);
+@@ -1713,6 +1712,7 @@ static void nilfs_segctor_prepare_write(
+ }
+ break;
+ }
++ set_buffer_async_write(bh);
+ if (bh->b_page != fs_page) {
+ nilfs_begin_page_io(fs_page);
+ fs_page = bh->b_page;
+@@ -1798,7 +1798,6 @@ static void nilfs_abort_logs(struct list
+
+ list_for_each_entry(bh, &segbuf->sb_payload_buffers,
+ b_assoc_buffers) {
+- clear_buffer_async_write(bh);
+ if (bh == segbuf->sb_super_root) {
+ clear_buffer_uptodate(bh);
+ if (bh->b_page != bd_page) {
+@@ -1807,6 +1806,7 @@ static void nilfs_abort_logs(struct list
+ }
+ break;
+ }
++ clear_buffer_async_write(bh);
+ if (bh->b_page != fs_page) {
+ nilfs_end_page_io(fs_page, err);
+ fs_page = bh->b_page;
+@@ -1894,8 +1894,9 @@ static void nilfs_segctor_complete_write
+ BIT(BH_Delay) | BIT(BH_NILFS_Volatile) |
+ BIT(BH_NILFS_Redirected));
+
+- set_mask_bits(&bh->b_state, clear_bits, set_bits);
+ if (bh == segbuf->sb_super_root) {
++ set_buffer_uptodate(bh);
++ clear_buffer_dirty(bh);
+ if (bh->b_page != bd_page) {
+ end_page_writeback(bd_page);
+ bd_page = bh->b_page;
+@@ -1903,6 +1904,7 @@ static void nilfs_segctor_complete_write
+ update_sr = true;
+ break;
+ }
++ set_mask_bits(&bh->b_state, clear_bits, set_bits);
+ if (bh->b_page != fs_page) {
+ nilfs_end_page_io(fs_page, 0);
+ fs_page = bh->b_page;
--- /dev/null
+From 5124a0a549857c4b87173280e192eea24dea72ad Mon Sep 17 00:00:00 2001
+From: Ryusuke Konishi <konishi.ryusuke@gmail.com>
+Date: Fri, 27 Jan 2023 01:41:14 +0900
+Subject: nilfs2: replace WARN_ONs for invalid DAT metadata block requests
+
+From: Ryusuke Konishi <konishi.ryusuke@gmail.com>
+
+commit 5124a0a549857c4b87173280e192eea24dea72ad upstream.
+
+If DAT metadata file block access fails due to corruption of the DAT file
+or abnormal virtual block numbers held by b-trees or inodes, a kernel
+warning is generated.
+
+This replaces the WARN_ONs by error output, so that a kernel, booted with
+panic_on_warn, does not panic. This patch also replaces the detected
+return code -ENOENT with another internal code -EINVAL to notify the bmap
+layer of metadata corruption. When the bmap layer sees -EINVAL, it
+handles the abnormal situation with nilfs_bmap_convert_error() and finally
+returns code -EIO as it should.
+
+Link: https://lkml.kernel.org/r/0000000000005cc3d205ea23ddcf@google.com
+Link: https://lkml.kernel.org/r/20230126164114.6911-1-konishi.ryusuke@gmail.com
+Signed-off-by: Ryusuke Konishi <konishi.ryusuke@gmail.com>
+Reported-by: <syzbot+5d5d25f90f195a3cfcb4@syzkaller.appspotmail.com>
+Tested-by: Ryusuke Konishi <konishi.ryusuke@gmail.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/nilfs2/dat.c | 27 +++++++++++++++++----------
+ 1 file changed, 17 insertions(+), 10 deletions(-)
+
+--- a/fs/nilfs2/dat.c
++++ b/fs/nilfs2/dat.c
+@@ -40,8 +40,21 @@ static inline struct nilfs_dat_info *NIL
+ static int nilfs_dat_prepare_entry(struct inode *dat,
+ struct nilfs_palloc_req *req, int create)
+ {
+- return nilfs_palloc_get_entry_block(dat, req->pr_entry_nr,
+- create, &req->pr_entry_bh);
++ int ret;
++
++ ret = nilfs_palloc_get_entry_block(dat, req->pr_entry_nr,
++ create, &req->pr_entry_bh);
++ if (unlikely(ret == -ENOENT)) {
++ nilfs_err(dat->i_sb,
++ "DAT doesn't have a block to manage vblocknr = %llu",
++ (unsigned long long)req->pr_entry_nr);
++ /*
++ * Return internal code -EINVAL to notify bmap layer of
++ * metadata corruption.
++ */
++ ret = -EINVAL;
++ }
++ return ret;
+ }
+
+ static void nilfs_dat_commit_entry(struct inode *dat,
+@@ -123,11 +136,7 @@ static void nilfs_dat_commit_free(struct
+
+ int nilfs_dat_prepare_start(struct inode *dat, struct nilfs_palloc_req *req)
+ {
+- int ret;
+-
+- ret = nilfs_dat_prepare_entry(dat, req, 0);
+- WARN_ON(ret == -ENOENT);
+- return ret;
++ return nilfs_dat_prepare_entry(dat, req, 0);
+ }
+
+ void nilfs_dat_commit_start(struct inode *dat, struct nilfs_palloc_req *req,
+@@ -154,10 +163,8 @@ int nilfs_dat_prepare_end(struct inode *
+ int ret;
+
+ ret = nilfs_dat_prepare_entry(dat, req, 0);
+- if (ret < 0) {
+- WARN_ON(ret == -ENOENT);
++ if (ret < 0)
+ return ret;
+- }
+
+ kaddr = kmap_atomic(req->pr_entry_bh->b_page);
+ entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr,
--- /dev/null
+From 944d5fe50f3f03daacfea16300e656a1691c4a23 Mon Sep 17 00:00:00 2001
+From: Linus Torvalds <torvalds@linuxfoundation.org>
+Date: Sun, 4 Feb 2024 15:25:12 +0000
+Subject: sched/membarrier: reduce the ability to hammer on sys_membarrier
+
+From: Linus Torvalds <torvalds@linuxfoundation.org>
+
+commit 944d5fe50f3f03daacfea16300e656a1691c4a23 upstream.
+
+On some systems, sys_membarrier can be very expensive, causing overall
+slowdowns for everything. So put a lock on the path in order to
+serialize the accesses to prevent the ability for this to be called at
+too high of a frequency and saturate the machine.
+
+Reviewed-and-tested-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+Acked-by: Borislav Petkov <bp@alien8.de>
+Fixes: 22e4ebb97582 ("membarrier: Provide expedited private command")
+Fixes: c5f58bd58f43 ("membarrier: Provide GLOBAL_EXPEDITED command")
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+[ converted to explicit mutex_*() calls - cleanup.h is not in this stable
+ branch - gregkh ]
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/sched/membarrier.c | 9 +++++++++
+ 1 file changed, 9 insertions(+)
+
+--- a/kernel/sched/membarrier.c
++++ b/kernel/sched/membarrier.c
+@@ -25,6 +25,8 @@
+ | MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED \
+ | MEMBARRIER_PRIVATE_EXPEDITED_SYNC_CORE_BITMASK)
+
++static DEFINE_MUTEX(membarrier_ipi_mutex);
++
+ static void ipi_mb(void *info)
+ {
+ smp_mb(); /* IPIs should be serializing but paranoid. */
+@@ -97,6 +99,7 @@ static int membarrier_global_expedited(v
+ if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL))
+ return -ENOMEM;
+
++ mutex_lock(&membarrier_ipi_mutex);
+ cpus_read_lock();
+ rcu_read_lock();
+ for_each_online_cpu(cpu) {
+@@ -143,6 +146,8 @@ static int membarrier_global_expedited(v
+ * rq->curr modification in scheduler.
+ */
+ smp_mb(); /* exit from system call is not a mb */
++ mutex_unlock(&membarrier_ipi_mutex);
++
+ return 0;
+ }
+
+@@ -178,6 +183,7 @@ static int membarrier_private_expedited(
+ if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL))
+ return -ENOMEM;
+
++ mutex_lock(&membarrier_ipi_mutex);
+ cpus_read_lock();
+ rcu_read_lock();
+ for_each_online_cpu(cpu) {
+@@ -212,6 +218,7 @@ static int membarrier_private_expedited(
+ * rq->curr modification in scheduler.
+ */
+ smp_mb(); /* exit from system call is not a mb */
++ mutex_unlock(&membarrier_ipi_mutex);
+
+ return 0;
+ }
+@@ -253,6 +260,7 @@ static int sync_runqueues_membarrier_sta
+ * between threads which are users of @mm has its membarrier state
+ * updated.
+ */
++ mutex_lock(&membarrier_ipi_mutex);
+ cpus_read_lock();
+ rcu_read_lock();
+ for_each_online_cpu(cpu) {
+@@ -269,6 +277,7 @@ static int sync_runqueues_membarrier_sta
+
+ free_cpumask_var(tmpmask);
+ cpus_read_unlock();
++ mutex_unlock(&membarrier_ipi_mutex);
+
+ return 0;
+ }
netfilter-ipset-fix-performance-regression-in-swap-operation.patch
netfilter-ipset-missing-gc-cancellations-fixed.patch
net-prevent-mss-overflow-in-skb_segment.patch
+sched-membarrier-reduce-the-ability-to-hammer-on-sys_membarrier.patch
+mm-memcontrol-decouple-reference-counting-from-page-accounting.patch
+nilfs2-fix-potential-bug-in-end_buffer_async_write.patch
+nilfs2-replace-warn_ons-for-invalid-dat-metadata-block-requests.patch