From: Greg Kroah-Hartman Date: Wed, 30 Dec 2020 15:37:01 +0000 (+0100) Subject: 4.14-stable patches X-Git-Tag: v4.19.165~47 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=c8564a788d9747699391dbdf107144a6f239d7a2;p=thirdparty%2Fkernel%2Fstable-queue.git 4.14-stable patches added patches: alsa-hda-ca0132-fix-work-handling-in-delayed-hp-detection.patch alsa-usb-audio-fix-sync-ep-altsetting-sanity-check.patch alsa-usb-audio-simplify-set_sync_ep_implicit_fb_quirk.patch md-raid10-initialize-r10_bio-read_slot-before-use.patch mm-memcontrol-eliminate-raw-access-to-stat-and-event-counters.patch mm-memcontrol-fix-excessive-complexity-in-memory.stat-reporting.patch mm-memcontrol-implement-lruvec-stat-functions-on-top-of-each-other.patch --- diff --git a/queue-4.14/alsa-hda-ca0132-fix-work-handling-in-delayed-hp-detection.patch b/queue-4.14/alsa-hda-ca0132-fix-work-handling-in-delayed-hp-detection.patch new file mode 100644 index 00000000000..7db30e3bc6b --- /dev/null +++ b/queue-4.14/alsa-hda-ca0132-fix-work-handling-in-delayed-hp-detection.patch @@ -0,0 +1,66 @@ +From foo@baz Wed Dec 30 04:31:08 PM CET 2020 +From: Takashi Iwai +Date: Fri, 13 Dec 2019 09:51:11 +0100 +Subject: ALSA: hda/ca0132 - Fix work handling in delayed HP detection + +From: Takashi Iwai + +commit 42fb6b1d41eb5905d77c06cad2e87b70289bdb76 upstream + +CA0132 has the delayed HP jack detection code that is invoked from the +unsol handler, but it does a few weird things: it contains the cancel +of a work inside the work handler, and yet it misses the cancel-sync +call at (runtime-)suspend. This patch addresses those issues. + +Fixes: 15c2b3cc09a3 ("ALSA: hda/ca0132 - Fix possible workqueue stall") +Cc: +Link: https://lore.kernel.org/r/20191213085111.22855-4-tiwai@suse.de +Signed-off-by: Takashi Iwai +[sudip: adjust context] +Signed-off-by: Sudip Mukherjee +Signed-off-by: Greg Kroah-Hartman +--- + sound/pci/hda/patch_ca0132.c | 16 ++++++++++++++-- + 1 file changed, 14 insertions(+), 2 deletions(-) + +--- a/sound/pci/hda/patch_ca0132.c ++++ b/sound/pci/hda/patch_ca0132.c +@@ -4443,11 +4443,10 @@ static void hp_callback(struct hda_codec + /* Delay enabling the HP amp, to let the mic-detection + * state machine run. + */ +- cancel_delayed_work(&spec->unsol_hp_work); +- schedule_delayed_work(&spec->unsol_hp_work, msecs_to_jiffies(500)); + tbl = snd_hda_jack_tbl_get(codec, cb->nid); + if (tbl) + tbl->block_report = 1; ++ schedule_delayed_work(&spec->unsol_hp_work, msecs_to_jiffies(500)); + } + + static void amic_callback(struct hda_codec *codec, struct hda_jack_callback *cb) +@@ -4625,12 +4624,25 @@ static void ca0132_free(struct hda_codec + kfree(codec->spec); + } + ++#ifdef CONFIG_PM ++static int ca0132_suspend(struct hda_codec *codec) ++{ ++ struct ca0132_spec *spec = codec->spec; ++ ++ cancel_delayed_work_sync(&spec->unsol_hp_work); ++ return 0; ++} ++#endif ++ + static const struct hda_codec_ops ca0132_patch_ops = { + .build_controls = ca0132_build_controls, + .build_pcms = ca0132_build_pcms, + .init = ca0132_init, + .free = ca0132_free, + .unsol_event = snd_hda_jack_unsol_event, ++#ifdef CONFIG_PM ++ .suspend = ca0132_suspend, ++#endif + }; + + static void ca0132_config(struct hda_codec *codec) diff --git a/queue-4.14/alsa-usb-audio-fix-sync-ep-altsetting-sanity-check.patch b/queue-4.14/alsa-usb-audio-fix-sync-ep-altsetting-sanity-check.patch new file mode 100644 index 00000000000..d50c320a8ff --- /dev/null +++ b/queue-4.14/alsa-usb-audio-fix-sync-ep-altsetting-sanity-check.patch @@ -0,0 +1,42 @@ +From foo@baz Wed Dec 30 04:31:08 PM CET 2020 +From: Johan Hovold +Date: Tue, 14 Jan 2020 09:39:53 +0100 +Subject: ALSA: usb-audio: fix sync-ep altsetting sanity check + +From: Johan Hovold + +commit 5d1b71226dc4d44b4b65766fa9d74492f9d4587b upstream + +The altsetting sanity check in set_sync_ep_implicit_fb_quirk() was +checking for there to be at least one altsetting but then went on to +access the second one, which may not exist. + +This could lead to random slab data being used to initialise the sync +endpoint in snd_usb_add_endpoint(). + +Fixes: c75a8a7ae565 ("ALSA: snd-usb: add support for implicit feedback") +Fixes: ca10a7ebdff1 ("ALSA: usb-audio: FT C400 sync playback EP to capture EP") +Fixes: 5e35dc0338d8 ("ALSA: usb-audio: add implicit fb quirk for Behringer UFX1204") +Fixes: 17f08b0d9aaf ("ALSA: usb-audio: add implicit fb quirk for Axe-Fx II") +Fixes: 103e9625647a ("ALSA: usb-audio: simplify set_sync_ep_implicit_fb_quirk") +Cc: stable # 3.5 +Signed-off-by: Johan Hovold +Link: https://lore.kernel.org/r/20200114083953.1106-1-johan@kernel.org +Signed-off-by: Takashi Iwai +Signed-off-by: Sudip Mukherjee +Signed-off-by: Greg Kroah-Hartman +--- + sound/usb/pcm.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/sound/usb/pcm.c ++++ b/sound/usb/pcm.c +@@ -369,7 +369,7 @@ static int set_sync_ep_implicit_fb_quirk + add_sync_ep_from_ifnum: + iface = usb_ifnum_to_if(dev, ifnum); + +- if (!iface || iface->num_altsetting == 0) ++ if (!iface || iface->num_altsetting < 2) + return -EINVAL; + + alts = &iface->altsetting[1]; diff --git a/queue-4.14/alsa-usb-audio-simplify-set_sync_ep_implicit_fb_quirk.patch b/queue-4.14/alsa-usb-audio-simplify-set_sync_ep_implicit_fb_quirk.patch new file mode 100644 index 00000000000..3f112451b55 --- /dev/null +++ b/queue-4.14/alsa-usb-audio-simplify-set_sync_ep_implicit_fb_quirk.patch @@ -0,0 +1,98 @@ +From foo@baz Wed Dec 30 04:31:08 PM CET 2020 +From: Alberto Aguirre +Date: Wed, 18 Apr 2018 09:35:34 -0500 +Subject: ALSA: usb-audio: simplify set_sync_ep_implicit_fb_quirk + +From: Alberto Aguirre + +commit 103e9625647ad74d201e26fb74afcd8479142a37 upstream + +Signed-off-by: Alberto Aguirre +Signed-off-by: Takashi Iwai +Signed-off-by: Sudip Mukherjee +Signed-off-by: Greg Kroah-Hartman +--- + sound/usb/pcm.c | 52 ++++++++++++++++++++-------------------------------- + 1 file changed, 20 insertions(+), 32 deletions(-) + +--- a/sound/usb/pcm.c ++++ b/sound/usb/pcm.c +@@ -324,6 +324,7 @@ static int set_sync_ep_implicit_fb_quirk + struct usb_host_interface *alts; + struct usb_interface *iface; + unsigned int ep; ++ unsigned int ifnum; + + /* Implicit feedback sync EPs consumers are always playback EPs */ + if (subs->direction != SNDRV_PCM_STREAM_PLAYBACK) +@@ -334,44 +335,23 @@ static int set_sync_ep_implicit_fb_quirk + case USB_ID(0x0763, 0x2031): /* M-Audio Fast Track C600 */ + case USB_ID(0x22f0, 0x0006): /* Allen&Heath Qu-16 */ + ep = 0x81; +- iface = usb_ifnum_to_if(dev, 3); +- +- if (!iface || iface->num_altsetting == 0) +- return -EINVAL; +- +- alts = &iface->altsetting[1]; +- goto add_sync_ep; +- break; ++ ifnum = 3; ++ goto add_sync_ep_from_ifnum; + case USB_ID(0x0763, 0x2080): /* M-Audio FastTrack Ultra */ + case USB_ID(0x0763, 0x2081): + ep = 0x81; +- iface = usb_ifnum_to_if(dev, 2); +- +- if (!iface || iface->num_altsetting == 0) +- return -EINVAL; +- +- alts = &iface->altsetting[1]; +- goto add_sync_ep; +- case USB_ID(0x2466, 0x8003): ++ ifnum = 2; ++ goto add_sync_ep_from_ifnum; ++ case USB_ID(0x2466, 0x8003): /* Fractal Audio Axe-Fx II */ + ep = 0x86; +- iface = usb_ifnum_to_if(dev, 2); +- +- if (!iface || iface->num_altsetting == 0) +- return -EINVAL; +- +- alts = &iface->altsetting[1]; +- goto add_sync_ep; +- case USB_ID(0x1397, 0x0002): ++ ifnum = 2; ++ goto add_sync_ep_from_ifnum; ++ case USB_ID(0x1397, 0x0002): /* Behringer UFX1204 */ + ep = 0x81; +- iface = usb_ifnum_to_if(dev, 1); +- +- if (!iface || iface->num_altsetting == 0) +- return -EINVAL; +- +- alts = &iface->altsetting[1]; +- goto add_sync_ep; +- ++ ifnum = 1; ++ goto add_sync_ep_from_ifnum; + } ++ + if (attr == USB_ENDPOINT_SYNC_ASYNC && + altsd->bInterfaceClass == USB_CLASS_VENDOR_SPEC && + altsd->bInterfaceProtocol == 2 && +@@ -386,6 +366,14 @@ static int set_sync_ep_implicit_fb_quirk + /* No quirk */ + return 0; + ++add_sync_ep_from_ifnum: ++ iface = usb_ifnum_to_if(dev, ifnum); ++ ++ if (!iface || iface->num_altsetting == 0) ++ return -EINVAL; ++ ++ alts = &iface->altsetting[1]; ++ + add_sync_ep: + subs->sync_endpoint = snd_usb_add_endpoint(subs->stream->chip, + alts, ep, !subs->direction, diff --git a/queue-4.14/md-raid10-initialize-r10_bio-read_slot-before-use.patch b/queue-4.14/md-raid10-initialize-r10_bio-read_slot-before-use.patch new file mode 100644 index 00000000000..5c463a8dfeb --- /dev/null +++ b/queue-4.14/md-raid10-initialize-r10_bio-read_slot-before-use.patch @@ -0,0 +1,46 @@ +From 93decc563637c4288380912eac0eb42fb246cc04 Mon Sep 17 00:00:00 2001 +From: Kevin Vigor +Date: Fri, 6 Nov 2020 14:20:34 -0800 +Subject: md/raid10: initialize r10_bio->read_slot before use. + +From: Kevin Vigor + +commit 93decc563637c4288380912eac0eb42fb246cc04 upstream. + +In __make_request() a new r10bio is allocated and passed to +raid10_read_request(). The read_slot member of the bio is not +initialized, and the raid10_read_request() uses it to index an +array. This leads to occasional panics. + +Fix by initializing the field to invalid value and checking for +valid value in raid10_read_request(). + +Cc: stable@vger.kernel.org +Signed-off-by: Kevin Vigor +Signed-off-by: Song Liu +Signed-off-by: Greg Kroah-Hartman + + +--- + drivers/md/raid10.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/drivers/md/raid10.c ++++ b/drivers/md/raid10.c +@@ -1120,7 +1120,7 @@ static void raid10_read_request(struct m + struct md_rdev *err_rdev = NULL; + gfp_t gfp = GFP_NOIO; + +- if (r10_bio->devs[slot].rdev) { ++ if (slot >= 0 && r10_bio->devs[slot].rdev) { + /* + * This is an error retry, but we cannot + * safely dereference the rdev in the r10_bio, +@@ -1513,6 +1513,7 @@ static void __make_request(struct mddev + r10_bio->mddev = mddev; + r10_bio->sector = bio->bi_iter.bi_sector; + r10_bio->state = 0; ++ r10_bio->read_slot = -1; + memset(r10_bio->devs, 0, sizeof(r10_bio->devs[0]) * conf->copies); + + if (bio_data_dir(bio) == READ) diff --git a/queue-4.14/mm-memcontrol-eliminate-raw-access-to-stat-and-event-counters.patch b/queue-4.14/mm-memcontrol-eliminate-raw-access-to-stat-and-event-counters.patch new file mode 100644 index 00000000000..137cff4b077 --- /dev/null +++ b/queue-4.14/mm-memcontrol-eliminate-raw-access-to-stat-and-event-counters.patch @@ -0,0 +1,255 @@ +From foo@baz Wed Dec 30 04:34:33 PM CET 2020 +From: Shaoying Xu +Date: Tue, 29 Dec 2020 02:33:42 +0000 +Subject: mm: memcontrol: eliminate raw access to stat and event counters +To: +Cc: , +Message-ID: <20201229023342.GA24991@amazon.com> +Content-Disposition: inline + +From: Johannes Weiner + +commit c9019e9bf42e66d028d70d2da6206cad4dd9250d upstream + +Replace all raw 'this_cpu_' modifications of the stat and event per-cpu +counters with API functions such as mod_memcg_state(). + +This makes the code easier to read, but is also in preparation for the +next patch, which changes the per-cpu implementation of those counters. + +Link: http://lkml.kernel.org/r/20171103153336.24044-1-hannes@cmpxchg.org +Signed-off-by: Johannes Weiner +Acked-by: Vladimir Davydov +Cc: Michal Hocko +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Cc: +Signed-off-by: Shaoying Xu +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/memcontrol.h | 31 +++++++++++++++-------- + mm/memcontrol.c | 59 +++++++++++++++++++-------------------------- + 2 files changed, 45 insertions(+), 45 deletions(-) + +--- a/include/linux/memcontrol.h ++++ b/include/linux/memcontrol.h +@@ -272,13 +272,6 @@ static inline bool mem_cgroup_disabled(v + return !cgroup_subsys_enabled(memory_cgrp_subsys); + } + +-static inline void mem_cgroup_event(struct mem_cgroup *memcg, +- enum memcg_event_item event) +-{ +- this_cpu_inc(memcg->stat->events[event]); +- cgroup_file_notify(&memcg->events_file); +-} +- + bool mem_cgroup_low(struct mem_cgroup *root, struct mem_cgroup *memcg); + + int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm, +@@ -627,15 +620,23 @@ unsigned long mem_cgroup_soft_limit_recl + gfp_t gfp_mask, + unsigned long *total_scanned); + ++/* idx can be of type enum memcg_event_item or vm_event_item */ ++static inline void __count_memcg_events(struct mem_cgroup *memcg, ++ int idx, unsigned long count) ++{ ++ if (!mem_cgroup_disabled()) ++ __this_cpu_add(memcg->stat->events[idx], count); ++} ++ ++/* idx can be of type enum memcg_event_item or vm_event_item */ + static inline void count_memcg_events(struct mem_cgroup *memcg, +- enum vm_event_item idx, +- unsigned long count) ++ int idx, unsigned long count) + { + if (!mem_cgroup_disabled()) + this_cpu_add(memcg->stat->events[idx], count); + } + +-/* idx can be of type enum memcg_stat_item or node_stat_item */ ++/* idx can be of type enum memcg_event_item or vm_event_item */ + static inline void count_memcg_page_event(struct page *page, + int idx) + { +@@ -654,12 +655,20 @@ static inline void count_memcg_event_mm( + rcu_read_lock(); + memcg = mem_cgroup_from_task(rcu_dereference(mm->owner)); + if (likely(memcg)) { +- this_cpu_inc(memcg->stat->events[idx]); ++ count_memcg_events(memcg, idx, 1); + if (idx == OOM_KILL) + cgroup_file_notify(&memcg->events_file); + } + rcu_read_unlock(); + } ++ ++static inline void mem_cgroup_event(struct mem_cgroup *memcg, ++ enum memcg_event_item event) ++{ ++ count_memcg_events(memcg, event, 1); ++ cgroup_file_notify(&memcg->events_file); ++} ++ + #ifdef CONFIG_TRANSPARENT_HUGEPAGE + void mem_cgroup_split_huge_fixup(struct page *head); + #endif +--- a/mm/memcontrol.c ++++ b/mm/memcontrol.c +@@ -586,23 +586,23 @@ static void mem_cgroup_charge_statistics + * counted as CACHE even if it's on ANON LRU. + */ + if (PageAnon(page)) +- __this_cpu_add(memcg->stat->count[MEMCG_RSS], nr_pages); ++ __mod_memcg_state(memcg, MEMCG_RSS, nr_pages); + else { +- __this_cpu_add(memcg->stat->count[MEMCG_CACHE], nr_pages); ++ __mod_memcg_state(memcg, MEMCG_CACHE, nr_pages); + if (PageSwapBacked(page)) +- __this_cpu_add(memcg->stat->count[NR_SHMEM], nr_pages); ++ __mod_memcg_state(memcg, NR_SHMEM, nr_pages); + } + + if (compound) { + VM_BUG_ON_PAGE(!PageTransHuge(page), page); +- __this_cpu_add(memcg->stat->count[MEMCG_RSS_HUGE], nr_pages); ++ __mod_memcg_state(memcg, MEMCG_RSS_HUGE, nr_pages); + } + + /* pagein of a big page is an event. So, ignore page size */ + if (nr_pages > 0) +- __this_cpu_inc(memcg->stat->events[PGPGIN]); ++ __count_memcg_events(memcg, PGPGIN, 1); + else { +- __this_cpu_inc(memcg->stat->events[PGPGOUT]); ++ __count_memcg_events(memcg, PGPGOUT, 1); + nr_pages = -nr_pages; /* for event */ + } + +@@ -2444,18 +2444,11 @@ void mem_cgroup_split_huge_fixup(struct + for (i = 1; i < HPAGE_PMD_NR; i++) + head[i].mem_cgroup = head->mem_cgroup; + +- __this_cpu_sub(head->mem_cgroup->stat->count[MEMCG_RSS_HUGE], +- HPAGE_PMD_NR); ++ __mod_memcg_state(head->mem_cgroup, MEMCG_RSS_HUGE, -HPAGE_PMD_NR); + } + #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ + + #ifdef CONFIG_MEMCG_SWAP +-static void mem_cgroup_swap_statistics(struct mem_cgroup *memcg, +- int nr_entries) +-{ +- this_cpu_add(memcg->stat->count[MEMCG_SWAP], nr_entries); +-} +- + /** + * mem_cgroup_move_swap_account - move swap charge and swap_cgroup's record. + * @entry: swap entry to be moved +@@ -2479,8 +2472,8 @@ static int mem_cgroup_move_swap_account( + new_id = mem_cgroup_id(to); + + if (swap_cgroup_cmpxchg(entry, old_id, new_id) == old_id) { +- mem_cgroup_swap_statistics(from, -1); +- mem_cgroup_swap_statistics(to, 1); ++ mod_memcg_state(from, MEMCG_SWAP, -1); ++ mod_memcg_state(to, MEMCG_SWAP, 1); + return 0; + } + return -EINVAL; +@@ -4632,8 +4625,8 @@ static int mem_cgroup_move_account(struc + spin_lock_irqsave(&from->move_lock, flags); + + if (!anon && page_mapped(page)) { +- __this_cpu_sub(from->stat->count[NR_FILE_MAPPED], nr_pages); +- __this_cpu_add(to->stat->count[NR_FILE_MAPPED], nr_pages); ++ __mod_memcg_state(from, NR_FILE_MAPPED, -nr_pages); ++ __mod_memcg_state(to, NR_FILE_MAPPED, nr_pages); + } + + /* +@@ -4645,16 +4638,14 @@ static int mem_cgroup_move_account(struc + struct address_space *mapping = page_mapping(page); + + if (mapping_cap_account_dirty(mapping)) { +- __this_cpu_sub(from->stat->count[NR_FILE_DIRTY], +- nr_pages); +- __this_cpu_add(to->stat->count[NR_FILE_DIRTY], +- nr_pages); ++ __mod_memcg_state(from, NR_FILE_DIRTY, -nr_pages); ++ __mod_memcg_state(to, NR_FILE_DIRTY, nr_pages); + } + } + + if (PageWriteback(page)) { +- __this_cpu_sub(from->stat->count[NR_WRITEBACK], nr_pages); +- __this_cpu_add(to->stat->count[NR_WRITEBACK], nr_pages); ++ __mod_memcg_state(from, NR_WRITEBACK, -nr_pages); ++ __mod_memcg_state(to, NR_WRITEBACK, nr_pages); + } + + /* +@@ -5690,11 +5681,11 @@ static void uncharge_batch(const struct + } + + local_irq_save(flags); +- __this_cpu_sub(ug->memcg->stat->count[MEMCG_RSS], ug->nr_anon); +- __this_cpu_sub(ug->memcg->stat->count[MEMCG_CACHE], ug->nr_file); +- __this_cpu_sub(ug->memcg->stat->count[MEMCG_RSS_HUGE], ug->nr_huge); +- __this_cpu_sub(ug->memcg->stat->count[NR_SHMEM], ug->nr_shmem); +- __this_cpu_add(ug->memcg->stat->events[PGPGOUT], ug->pgpgout); ++ __mod_memcg_state(ug->memcg, MEMCG_RSS, -ug->nr_anon); ++ __mod_memcg_state(ug->memcg, MEMCG_CACHE, -ug->nr_file); ++ __mod_memcg_state(ug->memcg, MEMCG_RSS_HUGE, -ug->nr_huge); ++ __mod_memcg_state(ug->memcg, NR_SHMEM, -ug->nr_shmem); ++ __count_memcg_events(ug->memcg, PGPGOUT, ug->pgpgout); + __this_cpu_add(ug->memcg->stat->nr_page_events, nr_pages); + memcg_check_events(ug->memcg, ug->dummy_page); + local_irq_restore(flags); +@@ -5926,7 +5917,7 @@ bool mem_cgroup_charge_skmem(struct mem_ + if (in_softirq()) + gfp_mask = GFP_NOWAIT; + +- this_cpu_add(memcg->stat->count[MEMCG_SOCK], nr_pages); ++ mod_memcg_state(memcg, MEMCG_SOCK, nr_pages); + + if (try_charge(memcg, gfp_mask, nr_pages) == 0) + return true; +@@ -5947,7 +5938,7 @@ void mem_cgroup_uncharge_skmem(struct me + return; + } + +- this_cpu_sub(memcg->stat->count[MEMCG_SOCK], nr_pages); ++ mod_memcg_state(memcg, MEMCG_SOCK, -nr_pages); + + refill_stock(memcg, nr_pages); + } +@@ -6071,7 +6062,7 @@ void mem_cgroup_swapout(struct page *pag + oldid = swap_cgroup_record(entry, mem_cgroup_id(swap_memcg), + nr_entries); + VM_BUG_ON_PAGE(oldid, page); +- mem_cgroup_swap_statistics(swap_memcg, nr_entries); ++ mod_memcg_state(swap_memcg, MEMCG_SWAP, nr_entries); + + page->mem_cgroup = NULL; + +@@ -6137,7 +6128,7 @@ int mem_cgroup_try_charge_swap(struct pa + mem_cgroup_id_get_many(memcg, nr_pages - 1); + oldid = swap_cgroup_record(entry, mem_cgroup_id(memcg), nr_pages); + VM_BUG_ON_PAGE(oldid, page); +- mem_cgroup_swap_statistics(memcg, nr_pages); ++ mod_memcg_state(memcg, MEMCG_SWAP, nr_pages); + + return 0; + } +@@ -6165,7 +6156,7 @@ void mem_cgroup_uncharge_swap(swp_entry_ + else + page_counter_uncharge(&memcg->memsw, nr_pages); + } +- mem_cgroup_swap_statistics(memcg, -nr_pages); ++ mod_memcg_state(memcg, MEMCG_SWAP, -nr_pages); + mem_cgroup_id_put_many(memcg, nr_pages); + } + rcu_read_unlock(); diff --git a/queue-4.14/mm-memcontrol-fix-excessive-complexity-in-memory.stat-reporting.patch b/queue-4.14/mm-memcontrol-fix-excessive-complexity-in-memory.stat-reporting.patch new file mode 100644 index 00000000000..1e19a75fe9f --- /dev/null +++ b/queue-4.14/mm-memcontrol-fix-excessive-complexity-in-memory.stat-reporting.patch @@ -0,0 +1,450 @@ +From foo@baz Wed Dec 30 04:34:33 PM CET 2020 +From: Shaoying Xu +Date: Tue, 29 Dec 2020 02:38:22 +0000 +Subject: mm: memcontrol: fix excessive complexity in memory.stat reporting +To: +Cc: , +Message-ID: <20201229023822.GA25663@amazon.com> +Content-Disposition: inline + +From: Johannes Weiner + +commit a983b5ebee57209c99f68c8327072f25e0e6e3da upstream + +We've seen memory.stat reads in top-level cgroups take up to fourteen +seconds during a userspace bug that created tens of thousands of ghost +cgroups pinned by lingering page cache. + +Even with a more reasonable number of cgroups, aggregating memory.stat +is unnecessarily heavy. The complexity is this: + + nr_cgroups * nr_stat_items * nr_possible_cpus + +where the stat items are ~70 at this point. With 128 cgroups and 128 +CPUs - decent, not enormous setups - reading the top-level memory.stat +has to aggregate over a million per-cpu counters. This doesn't scale. + +Instead of spreading the source of truth across all CPUs, use the +per-cpu counters merely to batch updates to shared atomic counters. + +This is the same as the per-cpu stocks we use for charging memory to the +shared atomic page_counters, and also the way the global vmstat counters +are implemented. + +Vmstat has elaborate spilling thresholds that depend on the number of +CPUs, amount of memory, and memory pressure - carefully balancing the +cost of counter updates with the amount of per-cpu error. That's +because the vmstat counters are system-wide, but also used for decisions +inside the kernel (e.g. NR_FREE_PAGES in the allocator). Neither is +true for the memory controller. + +Use the same static batch size we already use for page_counter updates +during charging. The per-cpu error in the stats will be 128k, which is +an acceptable ratio of cores to memory accounting granularity. + +[hannes@cmpxchg.org: fix warning in __this_cpu_xchg() calls] + Link: http://lkml.kernel.org/r/20171201135750.GB8097@cmpxchg.org +Link: http://lkml.kernel.org/r/20171103153336.24044-3-hannes@cmpxchg.org +Signed-off-by: Johannes Weiner +Acked-by: Vladimir Davydov +Cc: Michal Hocko +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Cc: stable@vger.kernel.org +[shaoyi@amazon.com: resolved the conflict brought by commit 17ffa29c355658c8e9b19f56cbf0388500ca7905 in mm/memcontrol.c by contextual fix] +Signed-off-by: Shaoying Xu +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/memcontrol.h | 96 +++++++++++++++++++++++++++--------------- + mm/memcontrol.c | 101 ++++++++++++++++++++++----------------------- + 2 files changed, 113 insertions(+), 84 deletions(-) + +--- a/include/linux/memcontrol.h ++++ b/include/linux/memcontrol.h +@@ -108,7 +108,10 @@ struct lruvec_stat { + */ + struct mem_cgroup_per_node { + struct lruvec lruvec; +- struct lruvec_stat __percpu *lruvec_stat; ++ ++ struct lruvec_stat __percpu *lruvec_stat_cpu; ++ atomic_long_t lruvec_stat[NR_VM_NODE_STAT_ITEMS]; ++ + unsigned long lru_zone_size[MAX_NR_ZONES][NR_LRU_LISTS]; + + struct mem_cgroup_reclaim_iter iter[DEF_PRIORITY + 1]; +@@ -227,10 +230,10 @@ struct mem_cgroup { + spinlock_t move_lock; + struct task_struct *move_lock_task; + unsigned long move_lock_flags; +- /* +- * percpu counter. +- */ +- struct mem_cgroup_stat_cpu __percpu *stat; ++ ++ struct mem_cgroup_stat_cpu __percpu *stat_cpu; ++ atomic_long_t stat[MEMCG_NR_STAT]; ++ atomic_long_t events[MEMCG_NR_EVENTS]; + + unsigned long socket_pressure; + +@@ -265,6 +268,12 @@ struct mem_cgroup { + /* WARNING: nodeinfo must be the last member here */ + }; + ++/* ++ * size of first charge trial. "32" comes from vmscan.c's magic value. ++ * TODO: maybe necessary to use big numbers in big irons. ++ */ ++#define MEMCG_CHARGE_BATCH 32U ++ + extern struct mem_cgroup *root_mem_cgroup; + + static inline bool mem_cgroup_disabled(void) +@@ -485,32 +494,38 @@ void unlock_page_memcg(struct page *page + static inline unsigned long memcg_page_state(struct mem_cgroup *memcg, + int idx) + { +- long val = 0; +- int cpu; +- +- for_each_possible_cpu(cpu) +- val += per_cpu(memcg->stat->count[idx], cpu); +- +- if (val < 0) +- val = 0; +- +- return val; ++ long x = atomic_long_read(&memcg->stat[idx]); ++#ifdef CONFIG_SMP ++ if (x < 0) ++ x = 0; ++#endif ++ return x; + } + + /* idx can be of type enum memcg_stat_item or node_stat_item */ + static inline void __mod_memcg_state(struct mem_cgroup *memcg, + int idx, int val) + { +- if (!mem_cgroup_disabled()) +- __this_cpu_add(memcg->stat->count[idx], val); ++ long x; ++ ++ if (mem_cgroup_disabled()) ++ return; ++ ++ x = val + __this_cpu_read(memcg->stat_cpu->count[idx]); ++ if (unlikely(abs(x) > MEMCG_CHARGE_BATCH)) { ++ atomic_long_add(x, &memcg->stat[idx]); ++ x = 0; ++ } ++ __this_cpu_write(memcg->stat_cpu->count[idx], x); + } + + /* idx can be of type enum memcg_stat_item or node_stat_item */ + static inline void mod_memcg_state(struct mem_cgroup *memcg, + int idx, int val) + { +- if (!mem_cgroup_disabled()) +- this_cpu_add(memcg->stat->count[idx], val); ++ preempt_disable(); ++ __mod_memcg_state(memcg, idx, val); ++ preempt_enable(); + } + + /** +@@ -548,26 +563,25 @@ static inline unsigned long lruvec_page_ + enum node_stat_item idx) + { + struct mem_cgroup_per_node *pn; +- long val = 0; +- int cpu; ++ long x; + + if (mem_cgroup_disabled()) + return node_page_state(lruvec_pgdat(lruvec), idx); + + pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec); +- for_each_possible_cpu(cpu) +- val += per_cpu(pn->lruvec_stat->count[idx], cpu); +- +- if (val < 0) +- val = 0; +- +- return val; ++ x = atomic_long_read(&pn->lruvec_stat[idx]); ++#ifdef CONFIG_SMP ++ if (x < 0) ++ x = 0; ++#endif ++ return x; + } + + static inline void __mod_lruvec_state(struct lruvec *lruvec, + enum node_stat_item idx, int val) + { + struct mem_cgroup_per_node *pn; ++ long x; + + /* Update node */ + __mod_node_page_state(lruvec_pgdat(lruvec), idx, val); +@@ -581,7 +595,12 @@ static inline void __mod_lruvec_state(st + __mod_memcg_state(pn->memcg, idx, val); + + /* Update lruvec */ +- __this_cpu_add(pn->lruvec_stat->count[idx], val); ++ x = val + __this_cpu_read(pn->lruvec_stat_cpu->count[idx]); ++ if (unlikely(abs(x) > MEMCG_CHARGE_BATCH)) { ++ atomic_long_add(x, &pn->lruvec_stat[idx]); ++ x = 0; ++ } ++ __this_cpu_write(pn->lruvec_stat_cpu->count[idx], x); + } + + static inline void mod_lruvec_state(struct lruvec *lruvec, +@@ -624,16 +643,25 @@ unsigned long mem_cgroup_soft_limit_recl + static inline void __count_memcg_events(struct mem_cgroup *memcg, + int idx, unsigned long count) + { +- if (!mem_cgroup_disabled()) +- __this_cpu_add(memcg->stat->events[idx], count); ++ unsigned long x; ++ ++ if (mem_cgroup_disabled()) ++ return; ++ ++ x = count + __this_cpu_read(memcg->stat_cpu->events[idx]); ++ if (unlikely(x > MEMCG_CHARGE_BATCH)) { ++ atomic_long_add(x, &memcg->events[idx]); ++ x = 0; ++ } ++ __this_cpu_write(memcg->stat_cpu->events[idx], x); + } + +-/* idx can be of type enum memcg_event_item or vm_event_item */ + static inline void count_memcg_events(struct mem_cgroup *memcg, + int idx, unsigned long count) + { +- if (!mem_cgroup_disabled()) +- this_cpu_add(memcg->stat->events[idx], count); ++ preempt_disable(); ++ __count_memcg_events(memcg, idx, count); ++ preempt_enable(); + } + + /* idx can be of type enum memcg_event_item or vm_event_item */ +--- a/mm/memcontrol.c ++++ b/mm/memcontrol.c +@@ -542,39 +542,10 @@ mem_cgroup_largest_soft_limit_node(struc + return mz; + } + +-/* +- * Return page count for single (non recursive) @memcg. +- * +- * Implementation Note: reading percpu statistics for memcg. +- * +- * Both of vmstat[] and percpu_counter has threshold and do periodic +- * synchronization to implement "quick" read. There are trade-off between +- * reading cost and precision of value. Then, we may have a chance to implement +- * a periodic synchronization of counter in memcg's counter. +- * +- * But this _read() function is used for user interface now. The user accounts +- * memory usage by memory cgroup and he _always_ requires exact value because +- * he accounts memory. Even if we provide quick-and-fuzzy read, we always +- * have to visit all online cpus and make sum. So, for now, unnecessary +- * synchronization is not implemented. (just implemented for cpu hotplug) +- * +- * If there are kernel internal actions which can make use of some not-exact +- * value, and reading all cpu value can be performance bottleneck in some +- * common workload, threshold and synchronization as vmstat[] should be +- * implemented. +- * +- * The parameter idx can be of type enum memcg_event_item or vm_event_item. +- */ +- + static unsigned long memcg_sum_events(struct mem_cgroup *memcg, + int event) + { +- unsigned long val = 0; +- int cpu; +- +- for_each_possible_cpu(cpu) +- val += per_cpu(memcg->stat->events[event], cpu); +- return val; ++ return atomic_long_read(&memcg->events[event]); + } + + static void mem_cgroup_charge_statistics(struct mem_cgroup *memcg, +@@ -606,7 +577,7 @@ static void mem_cgroup_charge_statistics + nr_pages = -nr_pages; /* for event */ + } + +- __this_cpu_add(memcg->stat->nr_page_events, nr_pages); ++ __this_cpu_add(memcg->stat_cpu->nr_page_events, nr_pages); + } + + unsigned long mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg, +@@ -642,8 +613,8 @@ static bool mem_cgroup_event_ratelimit(s + { + unsigned long val, next; + +- val = __this_cpu_read(memcg->stat->nr_page_events); +- next = __this_cpu_read(memcg->stat->targets[target]); ++ val = __this_cpu_read(memcg->stat_cpu->nr_page_events); ++ next = __this_cpu_read(memcg->stat_cpu->targets[target]); + /* from time_after() in jiffies.h */ + if ((long)(next - val) < 0) { + switch (target) { +@@ -659,7 +630,7 @@ static bool mem_cgroup_event_ratelimit(s + default: + break; + } +- __this_cpu_write(memcg->stat->targets[target], next); ++ __this_cpu_write(memcg->stat_cpu->targets[target], next); + return true; + } + return false; +@@ -1726,11 +1697,6 @@ void unlock_page_memcg(struct page *page + } + EXPORT_SYMBOL(unlock_page_memcg); + +-/* +- * size of first charge trial. "32" comes from vmscan.c's magic value. +- * TODO: maybe necessary to use big numbers in big irons. +- */ +-#define CHARGE_BATCH 32U + struct memcg_stock_pcp { + struct mem_cgroup *cached; /* this never be root cgroup */ + unsigned int nr_pages; +@@ -1758,7 +1724,7 @@ static bool consume_stock(struct mem_cgr + unsigned long flags; + bool ret = false; + +- if (nr_pages > CHARGE_BATCH) ++ if (nr_pages > MEMCG_CHARGE_BATCH) + return ret; + + local_irq_save(flags); +@@ -1827,7 +1793,7 @@ static void refill_stock(struct mem_cgro + } + stock->nr_pages += nr_pages; + +- if (stock->nr_pages > CHARGE_BATCH) ++ if (stock->nr_pages > MEMCG_CHARGE_BATCH) + drain_stock(stock); + + local_irq_restore(flags); +@@ -1877,9 +1843,44 @@ static void drain_all_stock(struct mem_c + static int memcg_hotplug_cpu_dead(unsigned int cpu) + { + struct memcg_stock_pcp *stock; ++ struct mem_cgroup *memcg; + + stock = &per_cpu(memcg_stock, cpu); + drain_stock(stock); ++ ++ for_each_mem_cgroup(memcg) { ++ int i; ++ ++ for (i = 0; i < MEMCG_NR_STAT; i++) { ++ int nid; ++ long x; ++ ++ x = this_cpu_xchg(memcg->stat_cpu->count[i], 0); ++ if (x) ++ atomic_long_add(x, &memcg->stat[i]); ++ ++ if (i >= NR_VM_NODE_STAT_ITEMS) ++ continue; ++ ++ for_each_node(nid) { ++ struct mem_cgroup_per_node *pn; ++ ++ pn = mem_cgroup_nodeinfo(memcg, nid); ++ x = this_cpu_xchg(pn->lruvec_stat_cpu->count[i], 0); ++ if (x) ++ atomic_long_add(x, &pn->lruvec_stat[i]); ++ } ++ } ++ ++ for (i = 0; i < MEMCG_NR_EVENTS; i++) { ++ long x; ++ ++ x = this_cpu_xchg(memcg->stat_cpu->events[i], 0); ++ if (x) ++ atomic_long_add(x, &memcg->events[i]); ++ } ++ } ++ + return 0; + } + +@@ -1900,7 +1901,7 @@ static void high_work_func(struct work_s + struct mem_cgroup *memcg; + + memcg = container_of(work, struct mem_cgroup, high_work); +- reclaim_high(memcg, CHARGE_BATCH, GFP_KERNEL); ++ reclaim_high(memcg, MEMCG_CHARGE_BATCH, GFP_KERNEL); + } + + /* +@@ -1924,7 +1925,7 @@ void mem_cgroup_handle_over_high(void) + static int try_charge(struct mem_cgroup *memcg, gfp_t gfp_mask, + unsigned int nr_pages) + { +- unsigned int batch = max(CHARGE_BATCH, nr_pages); ++ unsigned int batch = max(MEMCG_CHARGE_BATCH, nr_pages); + int nr_retries = MEM_CGROUP_RECLAIM_RETRIES; + struct mem_cgroup *mem_over_limit; + struct page_counter *counter; +@@ -4203,8 +4204,8 @@ static int alloc_mem_cgroup_per_node_inf + if (!pn) + return 1; + +- pn->lruvec_stat = alloc_percpu(struct lruvec_stat); +- if (!pn->lruvec_stat) { ++ pn->lruvec_stat_cpu = alloc_percpu(struct lruvec_stat); ++ if (!pn->lruvec_stat_cpu) { + kfree(pn); + return 1; + } +@@ -4225,7 +4226,7 @@ static void free_mem_cgroup_per_node_inf + if (!pn) + return; + +- free_percpu(pn->lruvec_stat); ++ free_percpu(pn->lruvec_stat_cpu); + kfree(pn); + } + +@@ -4235,7 +4236,7 @@ static void __mem_cgroup_free(struct mem + + for_each_node(node) + free_mem_cgroup_per_node_info(memcg, node); +- free_percpu(memcg->stat); ++ free_percpu(memcg->stat_cpu); + kfree(memcg); + } + +@@ -4264,8 +4265,8 @@ static struct mem_cgroup *mem_cgroup_all + if (memcg->id.id < 0) + goto fail; + +- memcg->stat = alloc_percpu(struct mem_cgroup_stat_cpu); +- if (!memcg->stat) ++ memcg->stat_cpu = alloc_percpu(struct mem_cgroup_stat_cpu); ++ if (!memcg->stat_cpu) + goto fail; + + for_each_node(node) +@@ -5686,7 +5687,7 @@ static void uncharge_batch(const struct + __mod_memcg_state(ug->memcg, MEMCG_RSS_HUGE, -ug->nr_huge); + __mod_memcg_state(ug->memcg, NR_SHMEM, -ug->nr_shmem); + __count_memcg_events(ug->memcg, PGPGOUT, ug->pgpgout); +- __this_cpu_add(ug->memcg->stat->nr_page_events, nr_pages); ++ __this_cpu_add(ug->memcg->stat_cpu->nr_page_events, nr_pages); + memcg_check_events(ug->memcg, ug->dummy_page); + local_irq_restore(flags); + diff --git a/queue-4.14/mm-memcontrol-implement-lruvec-stat-functions-on-top-of-each-other.patch b/queue-4.14/mm-memcontrol-implement-lruvec-stat-functions-on-top-of-each-other.patch new file mode 100644 index 00000000000..a8213a5223c --- /dev/null +++ b/queue-4.14/mm-memcontrol-implement-lruvec-stat-functions-on-top-of-each-other.patch @@ -0,0 +1,112 @@ +From foo@baz Wed Dec 30 04:34:33 PM CET 2020 +From: Shaoying Xu +Date: Tue, 29 Dec 2020 02:35:58 +0000 +Subject: mm: memcontrol: implement lruvec stat functions on top of each other +To: +Cc: , +Message-ID: <20201229023558.GA25485@amazon.com> +Content-Disposition: inline + +From: Johannes Weiner + +commit 284542656e22c43fdada8c8cc0ca9ede8453eed7 upstream + +The implementation of the lruvec stat functions and their variants for +accounting through a page, or accounting from a preemptible context, are +mostly identical and needlessly repetitive. + +Implement the lruvec_page functions by looking up the page's lruvec and +then using the lruvec function. + +Implement the functions for preemptible contexts by disabling preemption +before calling the atomic context functions. + +Link: http://lkml.kernel.org/r/20171103153336.24044-2-hannes@cmpxchg.org +Signed-off-by: Johannes Weiner +Acked-by: Vladimir Davydov +Cc: Michal Hocko +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Cc: stable@vger.kernel.org +Signed-off-by: Shaoying Xu +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/memcontrol.h | 44 ++++++++++++++++++++++---------------------- + 1 file changed, 22 insertions(+), 22 deletions(-) + +--- a/include/linux/memcontrol.h ++++ b/include/linux/memcontrol.h +@@ -569,51 +569,51 @@ static inline void __mod_lruvec_state(st + { + struct mem_cgroup_per_node *pn; + ++ /* Update node */ + __mod_node_page_state(lruvec_pgdat(lruvec), idx, val); ++ + if (mem_cgroup_disabled()) + return; ++ + pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec); ++ ++ /* Update memcg */ + __mod_memcg_state(pn->memcg, idx, val); ++ ++ /* Update lruvec */ + __this_cpu_add(pn->lruvec_stat->count[idx], val); + } + + static inline void mod_lruvec_state(struct lruvec *lruvec, + enum node_stat_item idx, int val) + { +- struct mem_cgroup_per_node *pn; +- +- mod_node_page_state(lruvec_pgdat(lruvec), idx, val); +- if (mem_cgroup_disabled()) +- return; +- pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec); +- mod_memcg_state(pn->memcg, idx, val); +- this_cpu_add(pn->lruvec_stat->count[idx], val); ++ preempt_disable(); ++ __mod_lruvec_state(lruvec, idx, val); ++ preempt_enable(); + } + + static inline void __mod_lruvec_page_state(struct page *page, + enum node_stat_item idx, int val) + { +- struct mem_cgroup_per_node *pn; ++ pg_data_t *pgdat = page_pgdat(page); ++ struct lruvec *lruvec; + +- __mod_node_page_state(page_pgdat(page), idx, val); +- if (mem_cgroup_disabled() || !page->mem_cgroup) ++ /* Untracked pages have no memcg, no lruvec. Update only the node */ ++ if (!page->mem_cgroup) { ++ __mod_node_page_state(pgdat, idx, val); + return; +- __mod_memcg_state(page->mem_cgroup, idx, val); +- pn = page->mem_cgroup->nodeinfo[page_to_nid(page)]; +- __this_cpu_add(pn->lruvec_stat->count[idx], val); ++ } ++ ++ lruvec = mem_cgroup_lruvec(pgdat, page->mem_cgroup); ++ __mod_lruvec_state(lruvec, idx, val); + } + + static inline void mod_lruvec_page_state(struct page *page, + enum node_stat_item idx, int val) + { +- struct mem_cgroup_per_node *pn; +- +- mod_node_page_state(page_pgdat(page), idx, val); +- if (mem_cgroup_disabled() || !page->mem_cgroup) +- return; +- mod_memcg_state(page->mem_cgroup, idx, val); +- pn = page->mem_cgroup->nodeinfo[page_to_nid(page)]; +- this_cpu_add(pn->lruvec_stat->count[idx], val); ++ preempt_disable(); ++ __mod_lruvec_page_state(page, idx, val); ++ preempt_enable(); + } + + unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order, diff --git a/queue-4.14/series b/queue-4.14/series index 815e2355cb4..6ffa5407677 100644 --- a/queue-4.14/series +++ b/queue-4.14/series @@ -1 +1,8 @@ x86-entry-64-add-instruction-suffix.patch +md-raid10-initialize-r10_bio-read_slot-before-use.patch +alsa-hda-ca0132-fix-work-handling-in-delayed-hp-detection.patch +alsa-usb-audio-simplify-set_sync_ep_implicit_fb_quirk.patch +alsa-usb-audio-fix-sync-ep-altsetting-sanity-check.patch +mm-memcontrol-eliminate-raw-access-to-stat-and-event-counters.patch +mm-memcontrol-implement-lruvec-stat-functions-on-top-of-each-other.patch +mm-memcontrol-fix-excessive-complexity-in-memory.stat-reporting.patch