--- /dev/null
+From foo@baz Wed Dec 30 04:31:08 PM CET 2020
+From: Takashi Iwai <tiwai@suse.de>
+Date: Fri, 13 Dec 2019 09:51:11 +0100
+Subject: ALSA: hda/ca0132 - Fix work handling in delayed HP detection
+
+From: Takashi Iwai <tiwai@suse.de>
+
+commit 42fb6b1d41eb5905d77c06cad2e87b70289bdb76 upstream
+
+CA0132 has the delayed HP jack detection code that is invoked from the
+unsol handler, but it does a few weird things: it contains the cancel
+of a work inside the work handler, and yet it misses the cancel-sync
+call at (runtime-)suspend. This patch addresses those issues.
+
+Fixes: 15c2b3cc09a3 ("ALSA: hda/ca0132 - Fix possible workqueue stall")
+Cc: <stable@vger.kernel.org>
+Link: https://lore.kernel.org/r/20191213085111.22855-4-tiwai@suse.de
+Signed-off-by: Takashi Iwai <tiwai@suse.de>
+[sudip: adjust context]
+Signed-off-by: Sudip Mukherjee <sudipm.mukherjee@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ sound/pci/hda/patch_ca0132.c | 16 ++++++++++++++--
+ 1 file changed, 14 insertions(+), 2 deletions(-)
+
+--- a/sound/pci/hda/patch_ca0132.c
++++ b/sound/pci/hda/patch_ca0132.c
+@@ -4443,11 +4443,10 @@ static void hp_callback(struct hda_codec
+ /* Delay enabling the HP amp, to let the mic-detection
+ * state machine run.
+ */
+- cancel_delayed_work(&spec->unsol_hp_work);
+- schedule_delayed_work(&spec->unsol_hp_work, msecs_to_jiffies(500));
+ tbl = snd_hda_jack_tbl_get(codec, cb->nid);
+ if (tbl)
+ tbl->block_report = 1;
++ schedule_delayed_work(&spec->unsol_hp_work, msecs_to_jiffies(500));
+ }
+
+ static void amic_callback(struct hda_codec *codec, struct hda_jack_callback *cb)
+@@ -4625,12 +4624,25 @@ static void ca0132_free(struct hda_codec
+ kfree(codec->spec);
+ }
+
++#ifdef CONFIG_PM
++static int ca0132_suspend(struct hda_codec *codec)
++{
++ struct ca0132_spec *spec = codec->spec;
++
++ cancel_delayed_work_sync(&spec->unsol_hp_work);
++ return 0;
++}
++#endif
++
+ static const struct hda_codec_ops ca0132_patch_ops = {
+ .build_controls = ca0132_build_controls,
+ .build_pcms = ca0132_build_pcms,
+ .init = ca0132_init,
+ .free = ca0132_free,
+ .unsol_event = snd_hda_jack_unsol_event,
++#ifdef CONFIG_PM
++ .suspend = ca0132_suspend,
++#endif
+ };
+
+ static void ca0132_config(struct hda_codec *codec)
--- /dev/null
+From foo@baz Wed Dec 30 04:31:08 PM CET 2020
+From: Johan Hovold <johan@kernel.org>
+Date: Tue, 14 Jan 2020 09:39:53 +0100
+Subject: ALSA: usb-audio: fix sync-ep altsetting sanity check
+
+From: Johan Hovold <johan@kernel.org>
+
+commit 5d1b71226dc4d44b4b65766fa9d74492f9d4587b upstream
+
+The altsetting sanity check in set_sync_ep_implicit_fb_quirk() was
+checking for there to be at least one altsetting but then went on to
+access the second one, which may not exist.
+
+This could lead to random slab data being used to initialise the sync
+endpoint in snd_usb_add_endpoint().
+
+Fixes: c75a8a7ae565 ("ALSA: snd-usb: add support for implicit feedback")
+Fixes: ca10a7ebdff1 ("ALSA: usb-audio: FT C400 sync playback EP to capture EP")
+Fixes: 5e35dc0338d8 ("ALSA: usb-audio: add implicit fb quirk for Behringer UFX1204")
+Fixes: 17f08b0d9aaf ("ALSA: usb-audio: add implicit fb quirk for Axe-Fx II")
+Fixes: 103e9625647a ("ALSA: usb-audio: simplify set_sync_ep_implicit_fb_quirk")
+Cc: stable <stable@vger.kernel.org> # 3.5
+Signed-off-by: Johan Hovold <johan@kernel.org>
+Link: https://lore.kernel.org/r/20200114083953.1106-1-johan@kernel.org
+Signed-off-by: Takashi Iwai <tiwai@suse.de>
+Signed-off-by: Sudip Mukherjee <sudipm.mukherjee@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ sound/usb/pcm.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/sound/usb/pcm.c
++++ b/sound/usb/pcm.c
+@@ -369,7 +369,7 @@ static int set_sync_ep_implicit_fb_quirk
+ add_sync_ep_from_ifnum:
+ iface = usb_ifnum_to_if(dev, ifnum);
+
+- if (!iface || iface->num_altsetting == 0)
++ if (!iface || iface->num_altsetting < 2)
+ return -EINVAL;
+
+ alts = &iface->altsetting[1];
--- /dev/null
+From foo@baz Wed Dec 30 04:31:08 PM CET 2020
+From: Alberto Aguirre <albaguirre@gmail.com>
+Date: Wed, 18 Apr 2018 09:35:34 -0500
+Subject: ALSA: usb-audio: simplify set_sync_ep_implicit_fb_quirk
+
+From: Alberto Aguirre <albaguirre@gmail.com>
+
+commit 103e9625647ad74d201e26fb74afcd8479142a37 upstream
+
+Signed-off-by: Alberto Aguirre <albaguirre@gmail.com>
+Signed-off-by: Takashi Iwai <tiwai@suse.de>
+Signed-off-by: Sudip Mukherjee <sudipm.mukherjee@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ sound/usb/pcm.c | 52 ++++++++++++++++++++--------------------------------
+ 1 file changed, 20 insertions(+), 32 deletions(-)
+
+--- a/sound/usb/pcm.c
++++ b/sound/usb/pcm.c
+@@ -324,6 +324,7 @@ static int set_sync_ep_implicit_fb_quirk
+ struct usb_host_interface *alts;
+ struct usb_interface *iface;
+ unsigned int ep;
++ unsigned int ifnum;
+
+ /* Implicit feedback sync EPs consumers are always playback EPs */
+ if (subs->direction != SNDRV_PCM_STREAM_PLAYBACK)
+@@ -334,44 +335,23 @@ static int set_sync_ep_implicit_fb_quirk
+ case USB_ID(0x0763, 0x2031): /* M-Audio Fast Track C600 */
+ case USB_ID(0x22f0, 0x0006): /* Allen&Heath Qu-16 */
+ ep = 0x81;
+- iface = usb_ifnum_to_if(dev, 3);
+-
+- if (!iface || iface->num_altsetting == 0)
+- return -EINVAL;
+-
+- alts = &iface->altsetting[1];
+- goto add_sync_ep;
+- break;
++ ifnum = 3;
++ goto add_sync_ep_from_ifnum;
+ case USB_ID(0x0763, 0x2080): /* M-Audio FastTrack Ultra */
+ case USB_ID(0x0763, 0x2081):
+ ep = 0x81;
+- iface = usb_ifnum_to_if(dev, 2);
+-
+- if (!iface || iface->num_altsetting == 0)
+- return -EINVAL;
+-
+- alts = &iface->altsetting[1];
+- goto add_sync_ep;
+- case USB_ID(0x2466, 0x8003):
++ ifnum = 2;
++ goto add_sync_ep_from_ifnum;
++ case USB_ID(0x2466, 0x8003): /* Fractal Audio Axe-Fx II */
+ ep = 0x86;
+- iface = usb_ifnum_to_if(dev, 2);
+-
+- if (!iface || iface->num_altsetting == 0)
+- return -EINVAL;
+-
+- alts = &iface->altsetting[1];
+- goto add_sync_ep;
+- case USB_ID(0x1397, 0x0002):
++ ifnum = 2;
++ goto add_sync_ep_from_ifnum;
++ case USB_ID(0x1397, 0x0002): /* Behringer UFX1204 */
+ ep = 0x81;
+- iface = usb_ifnum_to_if(dev, 1);
+-
+- if (!iface || iface->num_altsetting == 0)
+- return -EINVAL;
+-
+- alts = &iface->altsetting[1];
+- goto add_sync_ep;
+-
++ ifnum = 1;
++ goto add_sync_ep_from_ifnum;
+ }
++
+ if (attr == USB_ENDPOINT_SYNC_ASYNC &&
+ altsd->bInterfaceClass == USB_CLASS_VENDOR_SPEC &&
+ altsd->bInterfaceProtocol == 2 &&
+@@ -386,6 +366,14 @@ static int set_sync_ep_implicit_fb_quirk
+ /* No quirk */
+ return 0;
+
++add_sync_ep_from_ifnum:
++ iface = usb_ifnum_to_if(dev, ifnum);
++
++ if (!iface || iface->num_altsetting == 0)
++ return -EINVAL;
++
++ alts = &iface->altsetting[1];
++
+ add_sync_ep:
+ subs->sync_endpoint = snd_usb_add_endpoint(subs->stream->chip,
+ alts, ep, !subs->direction,
--- /dev/null
+From 93decc563637c4288380912eac0eb42fb246cc04 Mon Sep 17 00:00:00 2001
+From: Kevin Vigor <kvigor@gmail.com>
+Date: Fri, 6 Nov 2020 14:20:34 -0800
+Subject: md/raid10: initialize r10_bio->read_slot before use.
+
+From: Kevin Vigor <kvigor@gmail.com>
+
+commit 93decc563637c4288380912eac0eb42fb246cc04 upstream.
+
+In __make_request() a new r10bio is allocated and passed to
+raid10_read_request(). The read_slot member of the bio is not
+initialized, and the raid10_read_request() uses it to index an
+array. This leads to occasional panics.
+
+Fix by initializing the field to invalid value and checking for
+valid value in raid10_read_request().
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Kevin Vigor <kvigor@gmail.com>
+Signed-off-by: Song Liu <songliubraving@fb.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+
+---
+ drivers/md/raid10.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/drivers/md/raid10.c
++++ b/drivers/md/raid10.c
+@@ -1120,7 +1120,7 @@ static void raid10_read_request(struct m
+ struct md_rdev *err_rdev = NULL;
+ gfp_t gfp = GFP_NOIO;
+
+- if (r10_bio->devs[slot].rdev) {
++ if (slot >= 0 && r10_bio->devs[slot].rdev) {
+ /*
+ * This is an error retry, but we cannot
+ * safely dereference the rdev in the r10_bio,
+@@ -1513,6 +1513,7 @@ static void __make_request(struct mddev
+ r10_bio->mddev = mddev;
+ r10_bio->sector = bio->bi_iter.bi_sector;
+ r10_bio->state = 0;
++ r10_bio->read_slot = -1;
+ memset(r10_bio->devs, 0, sizeof(r10_bio->devs[0]) * conf->copies);
+
+ if (bio_data_dir(bio) == READ)
--- /dev/null
+From foo@baz Wed Dec 30 04:34:33 PM CET 2020
+From: Shaoying Xu <shaoyi@amazon.com>
+Date: Tue, 29 Dec 2020 02:33:42 +0000
+Subject: mm: memcontrol: eliminate raw access to stat and event counters
+To: <stable@vger.kernel.org>
+Cc: <shaoyi@amazon.com>, <surajjs@amazon.com>
+Message-ID: <20201229023342.GA24991@amazon.com>
+Content-Disposition: inline
+
+From: Johannes Weiner <hannes@cmpxchg.org>
+
+commit c9019e9bf42e66d028d70d2da6206cad4dd9250d upstream
+
+Replace all raw 'this_cpu_' modifications of the stat and event per-cpu
+counters with API functions such as mod_memcg_state().
+
+This makes the code easier to read, but is also in preparation for the
+next patch, which changes the per-cpu implementation of those counters.
+
+Link: http://lkml.kernel.org/r/20171103153336.24044-1-hannes@cmpxchg.org
+Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
+Acked-by: Vladimir Davydov <vdavydov.dev@gmail.com>
+Cc: Michal Hocko <mhocko@suse.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Shaoying Xu <shaoyi@amazon.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/memcontrol.h | 31 +++++++++++++++--------
+ mm/memcontrol.c | 59 +++++++++++++++++++--------------------------
+ 2 files changed, 45 insertions(+), 45 deletions(-)
+
+--- a/include/linux/memcontrol.h
++++ b/include/linux/memcontrol.h
+@@ -272,13 +272,6 @@ static inline bool mem_cgroup_disabled(v
+ return !cgroup_subsys_enabled(memory_cgrp_subsys);
+ }
+
+-static inline void mem_cgroup_event(struct mem_cgroup *memcg,
+- enum memcg_event_item event)
+-{
+- this_cpu_inc(memcg->stat->events[event]);
+- cgroup_file_notify(&memcg->events_file);
+-}
+-
+ bool mem_cgroup_low(struct mem_cgroup *root, struct mem_cgroup *memcg);
+
+ int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm,
+@@ -627,15 +620,23 @@ unsigned long mem_cgroup_soft_limit_recl
+ gfp_t gfp_mask,
+ unsigned long *total_scanned);
+
++/* idx can be of type enum memcg_event_item or vm_event_item */
++static inline void __count_memcg_events(struct mem_cgroup *memcg,
++ int idx, unsigned long count)
++{
++ if (!mem_cgroup_disabled())
++ __this_cpu_add(memcg->stat->events[idx], count);
++}
++
++/* idx can be of type enum memcg_event_item or vm_event_item */
+ static inline void count_memcg_events(struct mem_cgroup *memcg,
+- enum vm_event_item idx,
+- unsigned long count)
++ int idx, unsigned long count)
+ {
+ if (!mem_cgroup_disabled())
+ this_cpu_add(memcg->stat->events[idx], count);
+ }
+
+-/* idx can be of type enum memcg_stat_item or node_stat_item */
++/* idx can be of type enum memcg_event_item or vm_event_item */
+ static inline void count_memcg_page_event(struct page *page,
+ int idx)
+ {
+@@ -654,12 +655,20 @@ static inline void count_memcg_event_mm(
+ rcu_read_lock();
+ memcg = mem_cgroup_from_task(rcu_dereference(mm->owner));
+ if (likely(memcg)) {
+- this_cpu_inc(memcg->stat->events[idx]);
++ count_memcg_events(memcg, idx, 1);
+ if (idx == OOM_KILL)
+ cgroup_file_notify(&memcg->events_file);
+ }
+ rcu_read_unlock();
+ }
++
++static inline void mem_cgroup_event(struct mem_cgroup *memcg,
++ enum memcg_event_item event)
++{
++ count_memcg_events(memcg, event, 1);
++ cgroup_file_notify(&memcg->events_file);
++}
++
+ #ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ void mem_cgroup_split_huge_fixup(struct page *head);
+ #endif
+--- a/mm/memcontrol.c
++++ b/mm/memcontrol.c
+@@ -586,23 +586,23 @@ static void mem_cgroup_charge_statistics
+ * counted as CACHE even if it's on ANON LRU.
+ */
+ if (PageAnon(page))
+- __this_cpu_add(memcg->stat->count[MEMCG_RSS], nr_pages);
++ __mod_memcg_state(memcg, MEMCG_RSS, nr_pages);
+ else {
+- __this_cpu_add(memcg->stat->count[MEMCG_CACHE], nr_pages);
++ __mod_memcg_state(memcg, MEMCG_CACHE, nr_pages);
+ if (PageSwapBacked(page))
+- __this_cpu_add(memcg->stat->count[NR_SHMEM], nr_pages);
++ __mod_memcg_state(memcg, NR_SHMEM, nr_pages);
+ }
+
+ if (compound) {
+ VM_BUG_ON_PAGE(!PageTransHuge(page), page);
+- __this_cpu_add(memcg->stat->count[MEMCG_RSS_HUGE], nr_pages);
++ __mod_memcg_state(memcg, MEMCG_RSS_HUGE, nr_pages);
+ }
+
+ /* pagein of a big page is an event. So, ignore page size */
+ if (nr_pages > 0)
+- __this_cpu_inc(memcg->stat->events[PGPGIN]);
++ __count_memcg_events(memcg, PGPGIN, 1);
+ else {
+- __this_cpu_inc(memcg->stat->events[PGPGOUT]);
++ __count_memcg_events(memcg, PGPGOUT, 1);
+ nr_pages = -nr_pages; /* for event */
+ }
+
+@@ -2444,18 +2444,11 @@ void mem_cgroup_split_huge_fixup(struct
+ for (i = 1; i < HPAGE_PMD_NR; i++)
+ head[i].mem_cgroup = head->mem_cgroup;
+
+- __this_cpu_sub(head->mem_cgroup->stat->count[MEMCG_RSS_HUGE],
+- HPAGE_PMD_NR);
++ __mod_memcg_state(head->mem_cgroup, MEMCG_RSS_HUGE, -HPAGE_PMD_NR);
+ }
+ #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+
+ #ifdef CONFIG_MEMCG_SWAP
+-static void mem_cgroup_swap_statistics(struct mem_cgroup *memcg,
+- int nr_entries)
+-{
+- this_cpu_add(memcg->stat->count[MEMCG_SWAP], nr_entries);
+-}
+-
+ /**
+ * mem_cgroup_move_swap_account - move swap charge and swap_cgroup's record.
+ * @entry: swap entry to be moved
+@@ -2479,8 +2472,8 @@ static int mem_cgroup_move_swap_account(
+ new_id = mem_cgroup_id(to);
+
+ if (swap_cgroup_cmpxchg(entry, old_id, new_id) == old_id) {
+- mem_cgroup_swap_statistics(from, -1);
+- mem_cgroup_swap_statistics(to, 1);
++ mod_memcg_state(from, MEMCG_SWAP, -1);
++ mod_memcg_state(to, MEMCG_SWAP, 1);
+ return 0;
+ }
+ return -EINVAL;
+@@ -4632,8 +4625,8 @@ static int mem_cgroup_move_account(struc
+ spin_lock_irqsave(&from->move_lock, flags);
+
+ if (!anon && page_mapped(page)) {
+- __this_cpu_sub(from->stat->count[NR_FILE_MAPPED], nr_pages);
+- __this_cpu_add(to->stat->count[NR_FILE_MAPPED], nr_pages);
++ __mod_memcg_state(from, NR_FILE_MAPPED, -nr_pages);
++ __mod_memcg_state(to, NR_FILE_MAPPED, nr_pages);
+ }
+
+ /*
+@@ -4645,16 +4638,14 @@ static int mem_cgroup_move_account(struc
+ struct address_space *mapping = page_mapping(page);
+
+ if (mapping_cap_account_dirty(mapping)) {
+- __this_cpu_sub(from->stat->count[NR_FILE_DIRTY],
+- nr_pages);
+- __this_cpu_add(to->stat->count[NR_FILE_DIRTY],
+- nr_pages);
++ __mod_memcg_state(from, NR_FILE_DIRTY, -nr_pages);
++ __mod_memcg_state(to, NR_FILE_DIRTY, nr_pages);
+ }
+ }
+
+ if (PageWriteback(page)) {
+- __this_cpu_sub(from->stat->count[NR_WRITEBACK], nr_pages);
+- __this_cpu_add(to->stat->count[NR_WRITEBACK], nr_pages);
++ __mod_memcg_state(from, NR_WRITEBACK, -nr_pages);
++ __mod_memcg_state(to, NR_WRITEBACK, nr_pages);
+ }
+
+ /*
+@@ -5690,11 +5681,11 @@ static void uncharge_batch(const struct
+ }
+
+ local_irq_save(flags);
+- __this_cpu_sub(ug->memcg->stat->count[MEMCG_RSS], ug->nr_anon);
+- __this_cpu_sub(ug->memcg->stat->count[MEMCG_CACHE], ug->nr_file);
+- __this_cpu_sub(ug->memcg->stat->count[MEMCG_RSS_HUGE], ug->nr_huge);
+- __this_cpu_sub(ug->memcg->stat->count[NR_SHMEM], ug->nr_shmem);
+- __this_cpu_add(ug->memcg->stat->events[PGPGOUT], ug->pgpgout);
++ __mod_memcg_state(ug->memcg, MEMCG_RSS, -ug->nr_anon);
++ __mod_memcg_state(ug->memcg, MEMCG_CACHE, -ug->nr_file);
++ __mod_memcg_state(ug->memcg, MEMCG_RSS_HUGE, -ug->nr_huge);
++ __mod_memcg_state(ug->memcg, NR_SHMEM, -ug->nr_shmem);
++ __count_memcg_events(ug->memcg, PGPGOUT, ug->pgpgout);
+ __this_cpu_add(ug->memcg->stat->nr_page_events, nr_pages);
+ memcg_check_events(ug->memcg, ug->dummy_page);
+ local_irq_restore(flags);
+@@ -5926,7 +5917,7 @@ bool mem_cgroup_charge_skmem(struct mem_
+ if (in_softirq())
+ gfp_mask = GFP_NOWAIT;
+
+- this_cpu_add(memcg->stat->count[MEMCG_SOCK], nr_pages);
++ mod_memcg_state(memcg, MEMCG_SOCK, nr_pages);
+
+ if (try_charge(memcg, gfp_mask, nr_pages) == 0)
+ return true;
+@@ -5947,7 +5938,7 @@ void mem_cgroup_uncharge_skmem(struct me
+ return;
+ }
+
+- this_cpu_sub(memcg->stat->count[MEMCG_SOCK], nr_pages);
++ mod_memcg_state(memcg, MEMCG_SOCK, -nr_pages);
+
+ refill_stock(memcg, nr_pages);
+ }
+@@ -6071,7 +6062,7 @@ void mem_cgroup_swapout(struct page *pag
+ oldid = swap_cgroup_record(entry, mem_cgroup_id(swap_memcg),
+ nr_entries);
+ VM_BUG_ON_PAGE(oldid, page);
+- mem_cgroup_swap_statistics(swap_memcg, nr_entries);
++ mod_memcg_state(swap_memcg, MEMCG_SWAP, nr_entries);
+
+ page->mem_cgroup = NULL;
+
+@@ -6137,7 +6128,7 @@ int mem_cgroup_try_charge_swap(struct pa
+ mem_cgroup_id_get_many(memcg, nr_pages - 1);
+ oldid = swap_cgroup_record(entry, mem_cgroup_id(memcg), nr_pages);
+ VM_BUG_ON_PAGE(oldid, page);
+- mem_cgroup_swap_statistics(memcg, nr_pages);
++ mod_memcg_state(memcg, MEMCG_SWAP, nr_pages);
+
+ return 0;
+ }
+@@ -6165,7 +6156,7 @@ void mem_cgroup_uncharge_swap(swp_entry_
+ else
+ page_counter_uncharge(&memcg->memsw, nr_pages);
+ }
+- mem_cgroup_swap_statistics(memcg, -nr_pages);
++ mod_memcg_state(memcg, MEMCG_SWAP, -nr_pages);
+ mem_cgroup_id_put_many(memcg, nr_pages);
+ }
+ rcu_read_unlock();
--- /dev/null
+From foo@baz Wed Dec 30 04:34:33 PM CET 2020
+From: Shaoying Xu <shaoyi@amazon.com>
+Date: Tue, 29 Dec 2020 02:38:22 +0000
+Subject: mm: memcontrol: fix excessive complexity in memory.stat reporting
+To: <stable@vger.kernel.org>
+Cc: <shaoyi@amazon.com>, <surajjs@amazon.com>
+Message-ID: <20201229023822.GA25663@amazon.com>
+Content-Disposition: inline
+
+From: Johannes Weiner <hannes@cmpxchg.org>
+
+commit a983b5ebee57209c99f68c8327072f25e0e6e3da upstream
+
+We've seen memory.stat reads in top-level cgroups take up to fourteen
+seconds during a userspace bug that created tens of thousands of ghost
+cgroups pinned by lingering page cache.
+
+Even with a more reasonable number of cgroups, aggregating memory.stat
+is unnecessarily heavy. The complexity is this:
+
+ nr_cgroups * nr_stat_items * nr_possible_cpus
+
+where the stat items are ~70 at this point. With 128 cgroups and 128
+CPUs - decent, not enormous setups - reading the top-level memory.stat
+has to aggregate over a million per-cpu counters. This doesn't scale.
+
+Instead of spreading the source of truth across all CPUs, use the
+per-cpu counters merely to batch updates to shared atomic counters.
+
+This is the same as the per-cpu stocks we use for charging memory to the
+shared atomic page_counters, and also the way the global vmstat counters
+are implemented.
+
+Vmstat has elaborate spilling thresholds that depend on the number of
+CPUs, amount of memory, and memory pressure - carefully balancing the
+cost of counter updates with the amount of per-cpu error. That's
+because the vmstat counters are system-wide, but also used for decisions
+inside the kernel (e.g. NR_FREE_PAGES in the allocator). Neither is
+true for the memory controller.
+
+Use the same static batch size we already use for page_counter updates
+during charging. The per-cpu error in the stats will be 128k, which is
+an acceptable ratio of cores to memory accounting granularity.
+
+[hannes@cmpxchg.org: fix warning in __this_cpu_xchg() calls]
+ Link: http://lkml.kernel.org/r/20171201135750.GB8097@cmpxchg.org
+Link: http://lkml.kernel.org/r/20171103153336.24044-3-hannes@cmpxchg.org
+Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
+Acked-by: Vladimir Davydov <vdavydov.dev@gmail.com>
+Cc: Michal Hocko <mhocko@suse.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: stable@vger.kernel.org
+[shaoyi@amazon.com: resolved the conflict brought by commit 17ffa29c355658c8e9b19f56cbf0388500ca7905 in mm/memcontrol.c by contextual fix]
+Signed-off-by: Shaoying Xu <shaoyi@amazon.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/memcontrol.h | 96 +++++++++++++++++++++++++++---------------
+ mm/memcontrol.c | 101 ++++++++++++++++++++++-----------------------
+ 2 files changed, 113 insertions(+), 84 deletions(-)
+
+--- a/include/linux/memcontrol.h
++++ b/include/linux/memcontrol.h
+@@ -108,7 +108,10 @@ struct lruvec_stat {
+ */
+ struct mem_cgroup_per_node {
+ struct lruvec lruvec;
+- struct lruvec_stat __percpu *lruvec_stat;
++
++ struct lruvec_stat __percpu *lruvec_stat_cpu;
++ atomic_long_t lruvec_stat[NR_VM_NODE_STAT_ITEMS];
++
+ unsigned long lru_zone_size[MAX_NR_ZONES][NR_LRU_LISTS];
+
+ struct mem_cgroup_reclaim_iter iter[DEF_PRIORITY + 1];
+@@ -227,10 +230,10 @@ struct mem_cgroup {
+ spinlock_t move_lock;
+ struct task_struct *move_lock_task;
+ unsigned long move_lock_flags;
+- /*
+- * percpu counter.
+- */
+- struct mem_cgroup_stat_cpu __percpu *stat;
++
++ struct mem_cgroup_stat_cpu __percpu *stat_cpu;
++ atomic_long_t stat[MEMCG_NR_STAT];
++ atomic_long_t events[MEMCG_NR_EVENTS];
+
+ unsigned long socket_pressure;
+
+@@ -265,6 +268,12 @@ struct mem_cgroup {
+ /* WARNING: nodeinfo must be the last member here */
+ };
+
++/*
++ * size of first charge trial. "32" comes from vmscan.c's magic value.
++ * TODO: maybe necessary to use big numbers in big irons.
++ */
++#define MEMCG_CHARGE_BATCH 32U
++
+ extern struct mem_cgroup *root_mem_cgroup;
+
+ static inline bool mem_cgroup_disabled(void)
+@@ -485,32 +494,38 @@ void unlock_page_memcg(struct page *page
+ static inline unsigned long memcg_page_state(struct mem_cgroup *memcg,
+ int idx)
+ {
+- long val = 0;
+- int cpu;
+-
+- for_each_possible_cpu(cpu)
+- val += per_cpu(memcg->stat->count[idx], cpu);
+-
+- if (val < 0)
+- val = 0;
+-
+- return val;
++ long x = atomic_long_read(&memcg->stat[idx]);
++#ifdef CONFIG_SMP
++ if (x < 0)
++ x = 0;
++#endif
++ return x;
+ }
+
+ /* idx can be of type enum memcg_stat_item or node_stat_item */
+ static inline void __mod_memcg_state(struct mem_cgroup *memcg,
+ int idx, int val)
+ {
+- if (!mem_cgroup_disabled())
+- __this_cpu_add(memcg->stat->count[idx], val);
++ long x;
++
++ if (mem_cgroup_disabled())
++ return;
++
++ x = val + __this_cpu_read(memcg->stat_cpu->count[idx]);
++ if (unlikely(abs(x) > MEMCG_CHARGE_BATCH)) {
++ atomic_long_add(x, &memcg->stat[idx]);
++ x = 0;
++ }
++ __this_cpu_write(memcg->stat_cpu->count[idx], x);
+ }
+
+ /* idx can be of type enum memcg_stat_item or node_stat_item */
+ static inline void mod_memcg_state(struct mem_cgroup *memcg,
+ int idx, int val)
+ {
+- if (!mem_cgroup_disabled())
+- this_cpu_add(memcg->stat->count[idx], val);
++ preempt_disable();
++ __mod_memcg_state(memcg, idx, val);
++ preempt_enable();
+ }
+
+ /**
+@@ -548,26 +563,25 @@ static inline unsigned long lruvec_page_
+ enum node_stat_item idx)
+ {
+ struct mem_cgroup_per_node *pn;
+- long val = 0;
+- int cpu;
++ long x;
+
+ if (mem_cgroup_disabled())
+ return node_page_state(lruvec_pgdat(lruvec), idx);
+
+ pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
+- for_each_possible_cpu(cpu)
+- val += per_cpu(pn->lruvec_stat->count[idx], cpu);
+-
+- if (val < 0)
+- val = 0;
+-
+- return val;
++ x = atomic_long_read(&pn->lruvec_stat[idx]);
++#ifdef CONFIG_SMP
++ if (x < 0)
++ x = 0;
++#endif
++ return x;
+ }
+
+ static inline void __mod_lruvec_state(struct lruvec *lruvec,
+ enum node_stat_item idx, int val)
+ {
+ struct mem_cgroup_per_node *pn;
++ long x;
+
+ /* Update node */
+ __mod_node_page_state(lruvec_pgdat(lruvec), idx, val);
+@@ -581,7 +595,12 @@ static inline void __mod_lruvec_state(st
+ __mod_memcg_state(pn->memcg, idx, val);
+
+ /* Update lruvec */
+- __this_cpu_add(pn->lruvec_stat->count[idx], val);
++ x = val + __this_cpu_read(pn->lruvec_stat_cpu->count[idx]);
++ if (unlikely(abs(x) > MEMCG_CHARGE_BATCH)) {
++ atomic_long_add(x, &pn->lruvec_stat[idx]);
++ x = 0;
++ }
++ __this_cpu_write(pn->lruvec_stat_cpu->count[idx], x);
+ }
+
+ static inline void mod_lruvec_state(struct lruvec *lruvec,
+@@ -624,16 +643,25 @@ unsigned long mem_cgroup_soft_limit_recl
+ static inline void __count_memcg_events(struct mem_cgroup *memcg,
+ int idx, unsigned long count)
+ {
+- if (!mem_cgroup_disabled())
+- __this_cpu_add(memcg->stat->events[idx], count);
++ unsigned long x;
++
++ if (mem_cgroup_disabled())
++ return;
++
++ x = count + __this_cpu_read(memcg->stat_cpu->events[idx]);
++ if (unlikely(x > MEMCG_CHARGE_BATCH)) {
++ atomic_long_add(x, &memcg->events[idx]);
++ x = 0;
++ }
++ __this_cpu_write(memcg->stat_cpu->events[idx], x);
+ }
+
+-/* idx can be of type enum memcg_event_item or vm_event_item */
+ static inline void count_memcg_events(struct mem_cgroup *memcg,
+ int idx, unsigned long count)
+ {
+- if (!mem_cgroup_disabled())
+- this_cpu_add(memcg->stat->events[idx], count);
++ preempt_disable();
++ __count_memcg_events(memcg, idx, count);
++ preempt_enable();
+ }
+
+ /* idx can be of type enum memcg_event_item or vm_event_item */
+--- a/mm/memcontrol.c
++++ b/mm/memcontrol.c
+@@ -542,39 +542,10 @@ mem_cgroup_largest_soft_limit_node(struc
+ return mz;
+ }
+
+-/*
+- * Return page count for single (non recursive) @memcg.
+- *
+- * Implementation Note: reading percpu statistics for memcg.
+- *
+- * Both of vmstat[] and percpu_counter has threshold and do periodic
+- * synchronization to implement "quick" read. There are trade-off between
+- * reading cost and precision of value. Then, we may have a chance to implement
+- * a periodic synchronization of counter in memcg's counter.
+- *
+- * But this _read() function is used for user interface now. The user accounts
+- * memory usage by memory cgroup and he _always_ requires exact value because
+- * he accounts memory. Even if we provide quick-and-fuzzy read, we always
+- * have to visit all online cpus and make sum. So, for now, unnecessary
+- * synchronization is not implemented. (just implemented for cpu hotplug)
+- *
+- * If there are kernel internal actions which can make use of some not-exact
+- * value, and reading all cpu value can be performance bottleneck in some
+- * common workload, threshold and synchronization as vmstat[] should be
+- * implemented.
+- *
+- * The parameter idx can be of type enum memcg_event_item or vm_event_item.
+- */
+-
+ static unsigned long memcg_sum_events(struct mem_cgroup *memcg,
+ int event)
+ {
+- unsigned long val = 0;
+- int cpu;
+-
+- for_each_possible_cpu(cpu)
+- val += per_cpu(memcg->stat->events[event], cpu);
+- return val;
++ return atomic_long_read(&memcg->events[event]);
+ }
+
+ static void mem_cgroup_charge_statistics(struct mem_cgroup *memcg,
+@@ -606,7 +577,7 @@ static void mem_cgroup_charge_statistics
+ nr_pages = -nr_pages; /* for event */
+ }
+
+- __this_cpu_add(memcg->stat->nr_page_events, nr_pages);
++ __this_cpu_add(memcg->stat_cpu->nr_page_events, nr_pages);
+ }
+
+ unsigned long mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg,
+@@ -642,8 +613,8 @@ static bool mem_cgroup_event_ratelimit(s
+ {
+ unsigned long val, next;
+
+- val = __this_cpu_read(memcg->stat->nr_page_events);
+- next = __this_cpu_read(memcg->stat->targets[target]);
++ val = __this_cpu_read(memcg->stat_cpu->nr_page_events);
++ next = __this_cpu_read(memcg->stat_cpu->targets[target]);
+ /* from time_after() in jiffies.h */
+ if ((long)(next - val) < 0) {
+ switch (target) {
+@@ -659,7 +630,7 @@ static bool mem_cgroup_event_ratelimit(s
+ default:
+ break;
+ }
+- __this_cpu_write(memcg->stat->targets[target], next);
++ __this_cpu_write(memcg->stat_cpu->targets[target], next);
+ return true;
+ }
+ return false;
+@@ -1726,11 +1697,6 @@ void unlock_page_memcg(struct page *page
+ }
+ EXPORT_SYMBOL(unlock_page_memcg);
+
+-/*
+- * size of first charge trial. "32" comes from vmscan.c's magic value.
+- * TODO: maybe necessary to use big numbers in big irons.
+- */
+-#define CHARGE_BATCH 32U
+ struct memcg_stock_pcp {
+ struct mem_cgroup *cached; /* this never be root cgroup */
+ unsigned int nr_pages;
+@@ -1758,7 +1724,7 @@ static bool consume_stock(struct mem_cgr
+ unsigned long flags;
+ bool ret = false;
+
+- if (nr_pages > CHARGE_BATCH)
++ if (nr_pages > MEMCG_CHARGE_BATCH)
+ return ret;
+
+ local_irq_save(flags);
+@@ -1827,7 +1793,7 @@ static void refill_stock(struct mem_cgro
+ }
+ stock->nr_pages += nr_pages;
+
+- if (stock->nr_pages > CHARGE_BATCH)
++ if (stock->nr_pages > MEMCG_CHARGE_BATCH)
+ drain_stock(stock);
+
+ local_irq_restore(flags);
+@@ -1877,9 +1843,44 @@ static void drain_all_stock(struct mem_c
+ static int memcg_hotplug_cpu_dead(unsigned int cpu)
+ {
+ struct memcg_stock_pcp *stock;
++ struct mem_cgroup *memcg;
+
+ stock = &per_cpu(memcg_stock, cpu);
+ drain_stock(stock);
++
++ for_each_mem_cgroup(memcg) {
++ int i;
++
++ for (i = 0; i < MEMCG_NR_STAT; i++) {
++ int nid;
++ long x;
++
++ x = this_cpu_xchg(memcg->stat_cpu->count[i], 0);
++ if (x)
++ atomic_long_add(x, &memcg->stat[i]);
++
++ if (i >= NR_VM_NODE_STAT_ITEMS)
++ continue;
++
++ for_each_node(nid) {
++ struct mem_cgroup_per_node *pn;
++
++ pn = mem_cgroup_nodeinfo(memcg, nid);
++ x = this_cpu_xchg(pn->lruvec_stat_cpu->count[i], 0);
++ if (x)
++ atomic_long_add(x, &pn->lruvec_stat[i]);
++ }
++ }
++
++ for (i = 0; i < MEMCG_NR_EVENTS; i++) {
++ long x;
++
++ x = this_cpu_xchg(memcg->stat_cpu->events[i], 0);
++ if (x)
++ atomic_long_add(x, &memcg->events[i]);
++ }
++ }
++
+ return 0;
+ }
+
+@@ -1900,7 +1901,7 @@ static void high_work_func(struct work_s
+ struct mem_cgroup *memcg;
+
+ memcg = container_of(work, struct mem_cgroup, high_work);
+- reclaim_high(memcg, CHARGE_BATCH, GFP_KERNEL);
++ reclaim_high(memcg, MEMCG_CHARGE_BATCH, GFP_KERNEL);
+ }
+
+ /*
+@@ -1924,7 +1925,7 @@ void mem_cgroup_handle_over_high(void)
+ static int try_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,
+ unsigned int nr_pages)
+ {
+- unsigned int batch = max(CHARGE_BATCH, nr_pages);
++ unsigned int batch = max(MEMCG_CHARGE_BATCH, nr_pages);
+ int nr_retries = MEM_CGROUP_RECLAIM_RETRIES;
+ struct mem_cgroup *mem_over_limit;
+ struct page_counter *counter;
+@@ -4203,8 +4204,8 @@ static int alloc_mem_cgroup_per_node_inf
+ if (!pn)
+ return 1;
+
+- pn->lruvec_stat = alloc_percpu(struct lruvec_stat);
+- if (!pn->lruvec_stat) {
++ pn->lruvec_stat_cpu = alloc_percpu(struct lruvec_stat);
++ if (!pn->lruvec_stat_cpu) {
+ kfree(pn);
+ return 1;
+ }
+@@ -4225,7 +4226,7 @@ static void free_mem_cgroup_per_node_inf
+ if (!pn)
+ return;
+
+- free_percpu(pn->lruvec_stat);
++ free_percpu(pn->lruvec_stat_cpu);
+ kfree(pn);
+ }
+
+@@ -4235,7 +4236,7 @@ static void __mem_cgroup_free(struct mem
+
+ for_each_node(node)
+ free_mem_cgroup_per_node_info(memcg, node);
+- free_percpu(memcg->stat);
++ free_percpu(memcg->stat_cpu);
+ kfree(memcg);
+ }
+
+@@ -4264,8 +4265,8 @@ static struct mem_cgroup *mem_cgroup_all
+ if (memcg->id.id < 0)
+ goto fail;
+
+- memcg->stat = alloc_percpu(struct mem_cgroup_stat_cpu);
+- if (!memcg->stat)
++ memcg->stat_cpu = alloc_percpu(struct mem_cgroup_stat_cpu);
++ if (!memcg->stat_cpu)
+ goto fail;
+
+ for_each_node(node)
+@@ -5686,7 +5687,7 @@ static void uncharge_batch(const struct
+ __mod_memcg_state(ug->memcg, MEMCG_RSS_HUGE, -ug->nr_huge);
+ __mod_memcg_state(ug->memcg, NR_SHMEM, -ug->nr_shmem);
+ __count_memcg_events(ug->memcg, PGPGOUT, ug->pgpgout);
+- __this_cpu_add(ug->memcg->stat->nr_page_events, nr_pages);
++ __this_cpu_add(ug->memcg->stat_cpu->nr_page_events, nr_pages);
+ memcg_check_events(ug->memcg, ug->dummy_page);
+ local_irq_restore(flags);
+
--- /dev/null
+From foo@baz Wed Dec 30 04:34:33 PM CET 2020
+From: Shaoying Xu <shaoyi@amazon.com>
+Date: Tue, 29 Dec 2020 02:35:58 +0000
+Subject: mm: memcontrol: implement lruvec stat functions on top of each other
+To: <stable@vger.kernel.org>
+Cc: <shaoyi@amazon.com>, <surajjs@amazon.com>
+Message-ID: <20201229023558.GA25485@amazon.com>
+Content-Disposition: inline
+
+From: Johannes Weiner <hannes@cmpxchg.org>
+
+commit 284542656e22c43fdada8c8cc0ca9ede8453eed7 upstream
+
+The implementation of the lruvec stat functions and their variants for
+accounting through a page, or accounting from a preemptible context, are
+mostly identical and needlessly repetitive.
+
+Implement the lruvec_page functions by looking up the page's lruvec and
+then using the lruvec function.
+
+Implement the functions for preemptible contexts by disabling preemption
+before calling the atomic context functions.
+
+Link: http://lkml.kernel.org/r/20171103153336.24044-2-hannes@cmpxchg.org
+Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
+Acked-by: Vladimir Davydov <vdavydov.dev@gmail.com>
+Cc: Michal Hocko <mhocko@suse.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: stable@vger.kernel.org
+Signed-off-by: Shaoying Xu <shaoyi@amazon.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/memcontrol.h | 44 ++++++++++++++++++++++----------------------
+ 1 file changed, 22 insertions(+), 22 deletions(-)
+
+--- a/include/linux/memcontrol.h
++++ b/include/linux/memcontrol.h
+@@ -569,51 +569,51 @@ static inline void __mod_lruvec_state(st
+ {
+ struct mem_cgroup_per_node *pn;
+
++ /* Update node */
+ __mod_node_page_state(lruvec_pgdat(lruvec), idx, val);
++
+ if (mem_cgroup_disabled())
+ return;
++
+ pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
++
++ /* Update memcg */
+ __mod_memcg_state(pn->memcg, idx, val);
++
++ /* Update lruvec */
+ __this_cpu_add(pn->lruvec_stat->count[idx], val);
+ }
+
+ static inline void mod_lruvec_state(struct lruvec *lruvec,
+ enum node_stat_item idx, int val)
+ {
+- struct mem_cgroup_per_node *pn;
+-
+- mod_node_page_state(lruvec_pgdat(lruvec), idx, val);
+- if (mem_cgroup_disabled())
+- return;
+- pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
+- mod_memcg_state(pn->memcg, idx, val);
+- this_cpu_add(pn->lruvec_stat->count[idx], val);
++ preempt_disable();
++ __mod_lruvec_state(lruvec, idx, val);
++ preempt_enable();
+ }
+
+ static inline void __mod_lruvec_page_state(struct page *page,
+ enum node_stat_item idx, int val)
+ {
+- struct mem_cgroup_per_node *pn;
++ pg_data_t *pgdat = page_pgdat(page);
++ struct lruvec *lruvec;
+
+- __mod_node_page_state(page_pgdat(page), idx, val);
+- if (mem_cgroup_disabled() || !page->mem_cgroup)
++ /* Untracked pages have no memcg, no lruvec. Update only the node */
++ if (!page->mem_cgroup) {
++ __mod_node_page_state(pgdat, idx, val);
+ return;
+- __mod_memcg_state(page->mem_cgroup, idx, val);
+- pn = page->mem_cgroup->nodeinfo[page_to_nid(page)];
+- __this_cpu_add(pn->lruvec_stat->count[idx], val);
++ }
++
++ lruvec = mem_cgroup_lruvec(pgdat, page->mem_cgroup);
++ __mod_lruvec_state(lruvec, idx, val);
+ }
+
+ static inline void mod_lruvec_page_state(struct page *page,
+ enum node_stat_item idx, int val)
+ {
+- struct mem_cgroup_per_node *pn;
+-
+- mod_node_page_state(page_pgdat(page), idx, val);
+- if (mem_cgroup_disabled() || !page->mem_cgroup)
+- return;
+- mod_memcg_state(page->mem_cgroup, idx, val);
+- pn = page->mem_cgroup->nodeinfo[page_to_nid(page)];
+- this_cpu_add(pn->lruvec_stat->count[idx], val);
++ preempt_disable();
++ __mod_lruvec_page_state(page, idx, val);
++ preempt_enable();
+ }
+
+ unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order,
x86-entry-64-add-instruction-suffix.patch
+md-raid10-initialize-r10_bio-read_slot-before-use.patch
+alsa-hda-ca0132-fix-work-handling-in-delayed-hp-detection.patch
+alsa-usb-audio-simplify-set_sync_ep_implicit_fb_quirk.patch
+alsa-usb-audio-fix-sync-ep-altsetting-sanity-check.patch
+mm-memcontrol-eliminate-raw-access-to-stat-and-event-counters.patch
+mm-memcontrol-implement-lruvec-stat-functions-on-top-of-each-other.patch
+mm-memcontrol-fix-excessive-complexity-in-memory.stat-reporting.patch