From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 15 Apr 2019 11:34:33 +0000 (+0200)
Subject: 4.14-stable patches
X-Git-Tag: v4.9.169~24
X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=83d090bd4e9c6cfa2b44f26a2c6dc78e7f628552;p=thirdparty%2Fkernel%2Fstable-queue.git

4.14-stable patches

added patches:
	asoc-fsl_esai-fix-channel-swap-issue-when-stream-starts.patch
	mm-writeback-use-exact-memcg-dirty-counts.patch
---

diff --git a/queue-4.14/asoc-fsl_esai-fix-channel-swap-issue-when-stream-starts.patch b/queue-4.14/asoc-fsl_esai-fix-channel-swap-issue-when-stream-starts.patch
new file mode 100644
index 00000000000..a00cb95a080
--- /dev/null
+++ b/queue-4.14/asoc-fsl_esai-fix-channel-swap-issue-when-stream-starts.patch
@@ -0,0 +1,130 @@
+From 0ff4e8c61b794a4bf6c854ab071a1abaaa80f358 Mon Sep 17 00:00:00 2001
+From: "S.j. Wang" <shengjiu.wang@nxp.com>
+Date: Wed, 27 Feb 2019 06:31:12 +0000
+Subject: ASoC: fsl_esai: fix channel swap issue when stream starts
+
+From: S.j. Wang <shengjiu.wang@nxp.com>
+
+commit 0ff4e8c61b794a4bf6c854ab071a1abaaa80f358 upstream.
+
+There is very low possibility ( < 0.1% ) that channel swap happened
+in beginning when multi output/input pin is enabled. The issue is
+that hardware can't send data to correct pin in the beginning with
+the normal enable flow.
+
+This is hardware issue, but there is no errata, the workaround flow
+is that: Each time playback/recording, firstly clear the xSMA/xSMB,
+then enable TE/RE, then enable xSMB and xSMA (xSMB must be enabled
+before xSMA). Which is to use the xSMA as the trigger start register,
+previously the xCR_TE or xCR_RE is the bit for starting.
+
+Fixes commit 43d24e76b698 ("ASoC: fsl_esai: Add ESAI CPU DAI driver")
+Cc: <stable@vger.kernel.org>
+Reviewed-by: Fabio Estevam <festevam@gmail.com>
+Acked-by: Nicolin Chen <nicoleotsuka@gmail.com>
+Signed-off-by: Shengjiu Wang <shengjiu.wang@nxp.com>
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ sound/soc/fsl/fsl_esai.c |   47 +++++++++++++++++++++++++++++++++++++----------
+ 1 file changed, 37 insertions(+), 10 deletions(-)
+
+--- a/sound/soc/fsl/fsl_esai.c
++++ b/sound/soc/fsl/fsl_esai.c
+@@ -58,6 +58,8 @@ struct fsl_esai {
+ 	u32 fifo_depth;
+ 	u32 slot_width;
+ 	u32 slots;
++	u32 tx_mask;
++	u32 rx_mask;
+ 	u32 hck_rate[2];
+ 	u32 sck_rate[2];
+ 	bool hck_dir[2];
+@@ -358,21 +360,13 @@ static int fsl_esai_set_dai_tdm_slot(str
+ 	regmap_update_bits(esai_priv->regmap, REG_ESAI_TCCR,
+ 			   ESAI_xCCR_xDC_MASK, ESAI_xCCR_xDC(slots));
+ 
+-	regmap_update_bits(esai_priv->regmap, REG_ESAI_TSMA,
+-			   ESAI_xSMA_xS_MASK, ESAI_xSMA_xS(tx_mask));
+-	regmap_update_bits(esai_priv->regmap, REG_ESAI_TSMB,
+-			   ESAI_xSMB_xS_MASK, ESAI_xSMB_xS(tx_mask));
+-
+ 	regmap_update_bits(esai_priv->regmap, REG_ESAI_RCCR,
+ 			   ESAI_xCCR_xDC_MASK, ESAI_xCCR_xDC(slots));
+ 
+-	regmap_update_bits(esai_priv->regmap, REG_ESAI_RSMA,
+-			   ESAI_xSMA_xS_MASK, ESAI_xSMA_xS(rx_mask));
+-	regmap_update_bits(esai_priv->regmap, REG_ESAI_RSMB,
+-			   ESAI_xSMB_xS_MASK, ESAI_xSMB_xS(rx_mask));
+-
+ 	esai_priv->slot_width = slot_width;
+ 	esai_priv->slots = slots;
++	esai_priv->tx_mask = tx_mask;
++	esai_priv->rx_mask = rx_mask;
+ 
+ 	return 0;
+ }
+@@ -593,6 +587,7 @@ static int fsl_esai_trigger(struct snd_p
+ 	bool tx = substream->stream == SNDRV_PCM_STREAM_PLAYBACK;
+ 	u8 i, channels = substream->runtime->channels;
+ 	u32 pins = DIV_ROUND_UP(channels, esai_priv->slots);
++	u32 mask;
+ 
+ 	switch (cmd) {
+ 	case SNDRV_PCM_TRIGGER_START:
+@@ -605,15 +600,38 @@ static int fsl_esai_trigger(struct snd_p
+ 		for (i = 0; tx && i < channels; i++)
+ 			regmap_write(esai_priv->regmap, REG_ESAI_ETDR, 0x0);
+ 
++		/*
++		 * When set the TE/RE in the end of enablement flow, there
++		 * will be channel swap issue for multi data line case.
++		 * In order to workaround this issue, we switch the bit
++		 * enablement sequence to below sequence
++		 * 1) clear the xSMB & xSMA: which is done in probe and
++		 *                           stop state.
++		 * 2) set TE/RE
++		 * 3) set xSMB
++		 * 4) set xSMA:  xSMA is the last one in this flow, which
++		 *               will trigger esai to start.
++		 */
+ 		regmap_update_bits(esai_priv->regmap, REG_ESAI_xCR(tx),
+ 				   tx ? ESAI_xCR_TE_MASK : ESAI_xCR_RE_MASK,
+ 				   tx ? ESAI_xCR_TE(pins) : ESAI_xCR_RE(pins));
++		mask = tx ? esai_priv->tx_mask : esai_priv->rx_mask;
++
++		regmap_update_bits(esai_priv->regmap, REG_ESAI_xSMB(tx),
++				   ESAI_xSMB_xS_MASK, ESAI_xSMB_xS(mask));
++		regmap_update_bits(esai_priv->regmap, REG_ESAI_xSMA(tx),
++				   ESAI_xSMA_xS_MASK, ESAI_xSMA_xS(mask));
++
+ 		break;
+ 	case SNDRV_PCM_TRIGGER_SUSPEND:
+ 	case SNDRV_PCM_TRIGGER_STOP:
+ 	case SNDRV_PCM_TRIGGER_PAUSE_PUSH:
+ 		regmap_update_bits(esai_priv->regmap, REG_ESAI_xCR(tx),
+ 				   tx ? ESAI_xCR_TE_MASK : ESAI_xCR_RE_MASK, 0);
++		regmap_update_bits(esai_priv->regmap, REG_ESAI_xSMA(tx),
++				   ESAI_xSMA_xS_MASK, 0);
++		regmap_update_bits(esai_priv->regmap, REG_ESAI_xSMB(tx),
++				   ESAI_xSMB_xS_MASK, 0);
+ 
+ 		/* Disable and reset FIFO */
+ 		regmap_update_bits(esai_priv->regmap, REG_ESAI_xFCR(tx),
+@@ -903,6 +921,15 @@ static int fsl_esai_probe(struct platfor
+ 		return ret;
+ 	}
+ 
++	esai_priv->tx_mask = 0xFFFFFFFF;
++	esai_priv->rx_mask = 0xFFFFFFFF;
++
++	/* Clear the TSMA, TSMB, RSMA, RSMB */
++	regmap_write(esai_priv->regmap, REG_ESAI_TSMA, 0);
++	regmap_write(esai_priv->regmap, REG_ESAI_TSMB, 0);
++	regmap_write(esai_priv->regmap, REG_ESAI_RSMA, 0);
++	regmap_write(esai_priv->regmap, REG_ESAI_RSMB, 0);
++
+ 	ret = devm_snd_soc_register_component(&pdev->dev, &fsl_esai_component,
+ 					      &fsl_esai_dai, 1);
+ 	if (ret) {
diff --git a/queue-4.14/mm-writeback-use-exact-memcg-dirty-counts.patch b/queue-4.14/mm-writeback-use-exact-memcg-dirty-counts.patch
new file mode 100644
index 00000000000..c647b461f32
--- /dev/null
+++ b/queue-4.14/mm-writeback-use-exact-memcg-dirty-counts.patch
@@ -0,0 +1,223 @@
+From 0b3d6e6f2dd0a7b697b1aa8c167265908940624b Mon Sep 17 00:00:00 2001
+From: Greg Thelen <gthelen@google.com>
+Date: Fri, 5 Apr 2019 18:39:18 -0700
+Subject: mm: writeback: use exact memcg dirty counts
+
+From: Greg Thelen <gthelen@google.com>
+
+commit 0b3d6e6f2dd0a7b697b1aa8c167265908940624b upstream.
+
+Since commit a983b5ebee57 ("mm: memcontrol: fix excessive complexity in
+memory.stat reporting") memcg dirty and writeback counters are managed
+as:
+
+ 1) per-memcg per-cpu values in range of [-32..32]
+
+ 2) per-memcg atomic counter
+
+When a per-cpu counter cannot fit in [-32..32] it's flushed to the
+atomic.  Stat readers only check the atomic.  Thus readers such as
+balance_dirty_pages() may see a nontrivial error margin: 32 pages per
+cpu.
+
+Assuming 100 cpus:
+   4k x86 page_size:  13 MiB error per memcg
+  64k ppc page_size: 200 MiB error per memcg
+
+Considering that dirty+writeback are used together for some decisions the
+errors double.
+
+This inaccuracy can lead to undeserved oom kills.  One nasty case is
+when all per-cpu counters hold positive values offsetting an atomic
+negative value (i.e.  per_cpu[*]=32, atomic=n_cpu*-32).
+balance_dirty_pages() only consults the atomic and does not consider
+throttling the next n_cpu*32 dirty pages.  If the file_lru is in the
+13..200 MiB range then there's absolutely no dirty throttling, which
+burdens vmscan with only dirty+writeback pages thus resorting to oom
+kill.
+
+It could be argued that tiny containers are not supported, but it's more
+subtle.  It's the amount the space available for file lru that matters.
+If a container has memory.max-200MiB of non reclaimable memory, then it
+will also suffer such oom kills on a 100 cpu machine.
+
+The following test reliably ooms without this patch.  This patch avoids
+oom kills.
+
+  $ cat test
+  mount -t cgroup2 none /dev/cgroup
+  cd /dev/cgroup
+  echo +io +memory > cgroup.subtree_control
+  mkdir test
+  cd test
+  echo 10M > memory.max
+  (echo $BASHPID > cgroup.procs && exec /memcg-writeback-stress /foo)
+  (echo $BASHPID > cgroup.procs && exec dd if=/dev/zero of=/foo bs=2M count=100)
+
+  $ cat memcg-writeback-stress.c
+  /*
+   * Dirty pages from all but one cpu.
+   * Clean pages from the non dirtying cpu.
+   * This is to stress per cpu counter imbalance.
+   * On a 100 cpu machine:
+   * - per memcg per cpu dirty count is 32 pages for each of 99 cpus
+   * - per memcg atomic is -99*32 pages
+   * - thus the complete dirty limit: sum of all counters 0
+   * - balance_dirty_pages() only sees atomic count -99*32 pages, which
+   *   it max()s to 0.
+   * - So a workload can dirty -99*32 pages before balance_dirty_pages()
+   *   cares.
+   */
+  #define _GNU_SOURCE
+  #include <err.h>
+  #include <fcntl.h>
+  #include <sched.h>
+  #include <stdlib.h>
+  #include <stdio.h>
+  #include <sys/stat.h>
+  #include <sys/sysinfo.h>
+  #include <sys/types.h>
+  #include <unistd.h>
+
+  static char *buf;
+  static int bufSize;
+
+  static void set_affinity(int cpu)
+  {
+  	cpu_set_t affinity;
+
+  	CPU_ZERO(&affinity);
+  	CPU_SET(cpu, &affinity);
+  	if (sched_setaffinity(0, sizeof(affinity), &affinity))
+  		err(1, "sched_setaffinity");
+  }
+
+  static void dirty_on(int output_fd, int cpu)
+  {
+  	int i, wrote;
+
+  	set_affinity(cpu);
+  	for (i = 0; i < 32; i++) {
+  		for (wrote = 0; wrote < bufSize; ) {
+  			int ret = write(output_fd, buf+wrote, bufSize-wrote);
+  			if (ret == -1)
+  				err(1, "write");
+  			wrote += ret;
+  		}
+  	}
+  }
+
+  int main(int argc, char **argv)
+  {
+  	int cpu, flush_cpu = 1, output_fd;
+  	const char *output;
+
+  	if (argc != 2)
+  		errx(1, "usage: output_file");
+
+  	output = argv[1];
+  	bufSize = getpagesize();
+  	buf = malloc(getpagesize());
+  	if (buf == NULL)
+  		errx(1, "malloc failed");
+
+  	output_fd = open(output, O_CREAT|O_RDWR);
+  	if (output_fd == -1)
+  		err(1, "open(%s)", output);
+
+  	for (cpu = 0; cpu < get_nprocs(); cpu++) {
+  		if (cpu != flush_cpu)
+  			dirty_on(output_fd, cpu);
+  	}
+
+  	set_affinity(flush_cpu);
+  	if (fsync(output_fd))
+  		err(1, "fsync(%s)", output);
+  	if (close(output_fd))
+  		err(1, "close(%s)", output);
+  	free(buf);
+  }
+
+Make balance_dirty_pages() and wb_over_bg_thresh() work harder to
+collect exact per memcg counters.  This avoids the aforementioned oom
+kills.
+
+This does not affect the overhead of memory.stat, which still reads the
+single atomic counter.
+
+Why not use percpu_counter? memcg already handles cpus going offline, so
+no need for that overhead from percpu_counter.  And the percpu_counter
+spinlocks are more heavyweight than is required.
+
+It probably also makes sense to use exact dirty and writeback counters
+in memcg oom reports.  But that is saved for later.
+
+Link: http://lkml.kernel.org/r/20190329174609.164344-1-gthelen@google.com
+Signed-off-by: Greg Thelen <gthelen@google.com>
+Reviewed-by: Roman Gushchin <guro@fb.com>
+Acked-by: Johannes Weiner <hannes@cmpxchg.org>
+Cc: Michal Hocko <mhocko@kernel.org>
+Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
+Cc: Tejun Heo <tj@kernel.org>
+Cc: <stable@vger.kernel.org>	[4.16+]
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/linux/memcontrol.h |    5 ++++-
+ mm/memcontrol.c            |   20 ++++++++++++++++++--
+ 2 files changed, 22 insertions(+), 3 deletions(-)
+
+--- a/include/linux/memcontrol.h
++++ b/include/linux/memcontrol.h
+@@ -488,7 +488,10 @@ struct mem_cgroup *lock_page_memcg(struc
+ void __unlock_page_memcg(struct mem_cgroup *memcg);
+ void unlock_page_memcg(struct page *page);
+ 
+-/* idx can be of type enum memcg_stat_item or node_stat_item */
++/*
++ * idx can be of type enum memcg_stat_item or node_stat_item.
++ * Keep in sync with memcg_exact_page_state().
++ */
+ static inline unsigned long memcg_page_state(struct mem_cgroup *memcg,
+ 					     int idx)
+ {
+--- a/mm/memcontrol.c
++++ b/mm/memcontrol.c
+@@ -3671,6 +3671,22 @@ struct wb_domain *mem_cgroup_wb_domain(s
+ 	return &memcg->cgwb_domain;
+ }
+ 
++/*
++ * idx can be of type enum memcg_stat_item or node_stat_item.
++ * Keep in sync with memcg_exact_page().
++ */
++static unsigned long memcg_exact_page_state(struct mem_cgroup *memcg, int idx)
++{
++	long x = atomic_long_read(&memcg->stat[idx]);
++	int cpu;
++
++	for_each_online_cpu(cpu)
++		x += per_cpu_ptr(memcg->stat_cpu, cpu)->count[idx];
++	if (x < 0)
++		x = 0;
++	return x;
++}
++
+ /**
+  * mem_cgroup_wb_stats - retrieve writeback related stats from its memcg
+  * @wb: bdi_writeback in question
+@@ -3696,10 +3712,10 @@ void mem_cgroup_wb_stats(struct bdi_writ
+ 	struct mem_cgroup *memcg = mem_cgroup_from_css(wb->memcg_css);
+ 	struct mem_cgroup *parent;
+ 
+-	*pdirty = memcg_page_state(memcg, NR_FILE_DIRTY);
++	*pdirty = memcg_exact_page_state(memcg, NR_FILE_DIRTY);
+ 
+ 	/* this should eventually include NR_UNSTABLE_NFS */
+-	*pwriteback = memcg_page_state(memcg, NR_WRITEBACK);
++	*pwriteback = memcg_exact_page_state(memcg, NR_WRITEBACK);
+ 	*pfilepages = mem_cgroup_nr_lru_pages(memcg, (1 << LRU_INACTIVE_FILE) |
+ 						     (1 << LRU_ACTIVE_FILE));
+ 	*pheadroom = PAGE_COUNTER_MAX;
diff --git a/queue-4.14/series b/queue-4.14/series
index 5959ccb9cd5..fff7524b0a9 100644
--- a/queue-4.14/series
+++ b/queue-4.14/series
@@ -42,3 +42,5 @@ parisc-regs_return_value-should-return-gpr28.patch
 alarmtimer-return-correct-remaining-time.patch
 drm-udl-add-a-release-method-and-delay-modeset-teardown.patch
 include-linux-bitrev.h-fix-constant-bitrev.patch
+mm-writeback-use-exact-memcg-dirty-counts.patch
+asoc-fsl_esai-fix-channel-swap-issue-when-stream-starts.patch