--- /dev/null
+From 0ff4e8c61b794a4bf6c854ab071a1abaaa80f358 Mon Sep 17 00:00:00 2001
+From: "S.j. Wang" <shengjiu.wang@nxp.com>
+Date: Wed, 27 Feb 2019 06:31:12 +0000
+Subject: ASoC: fsl_esai: fix channel swap issue when stream starts
+
+From: S.j. Wang <shengjiu.wang@nxp.com>
+
+commit 0ff4e8c61b794a4bf6c854ab071a1abaaa80f358 upstream.
+
+There is very low possibility ( < 0.1% ) that channel swap happened
+in beginning when multi output/input pin is enabled. The issue is
+that hardware can't send data to correct pin in the beginning with
+the normal enable flow.
+
+This is hardware issue, but there is no errata, the workaround flow
+is that: Each time playback/recording, firstly clear the xSMA/xSMB,
+then enable TE/RE, then enable xSMB and xSMA (xSMB must be enabled
+before xSMA). Which is to use the xSMA as the trigger start register,
+previously the xCR_TE or xCR_RE is the bit for starting.
+
+Fixes commit 43d24e76b698 ("ASoC: fsl_esai: Add ESAI CPU DAI driver")
+Cc: <stable@vger.kernel.org>
+Reviewed-by: Fabio Estevam <festevam@gmail.com>
+Acked-by: Nicolin Chen <nicoleotsuka@gmail.com>
+Signed-off-by: Shengjiu Wang <shengjiu.wang@nxp.com>
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ sound/soc/fsl/fsl_esai.c | 47 +++++++++++++++++++++++++++++++++++++----------
+ 1 file changed, 37 insertions(+), 10 deletions(-)
+
+--- a/sound/soc/fsl/fsl_esai.c
++++ b/sound/soc/fsl/fsl_esai.c
+@@ -58,6 +58,8 @@ struct fsl_esai {
+ u32 fifo_depth;
+ u32 slot_width;
+ u32 slots;
++ u32 tx_mask;
++ u32 rx_mask;
+ u32 hck_rate[2];
+ u32 sck_rate[2];
+ bool hck_dir[2];
+@@ -358,21 +360,13 @@ static int fsl_esai_set_dai_tdm_slot(str
+ regmap_update_bits(esai_priv->regmap, REG_ESAI_TCCR,
+ ESAI_xCCR_xDC_MASK, ESAI_xCCR_xDC(slots));
+
+- regmap_update_bits(esai_priv->regmap, REG_ESAI_TSMA,
+- ESAI_xSMA_xS_MASK, ESAI_xSMA_xS(tx_mask));
+- regmap_update_bits(esai_priv->regmap, REG_ESAI_TSMB,
+- ESAI_xSMB_xS_MASK, ESAI_xSMB_xS(tx_mask));
+-
+ regmap_update_bits(esai_priv->regmap, REG_ESAI_RCCR,
+ ESAI_xCCR_xDC_MASK, ESAI_xCCR_xDC(slots));
+
+- regmap_update_bits(esai_priv->regmap, REG_ESAI_RSMA,
+- ESAI_xSMA_xS_MASK, ESAI_xSMA_xS(rx_mask));
+- regmap_update_bits(esai_priv->regmap, REG_ESAI_RSMB,
+- ESAI_xSMB_xS_MASK, ESAI_xSMB_xS(rx_mask));
+-
+ esai_priv->slot_width = slot_width;
+ esai_priv->slots = slots;
++ esai_priv->tx_mask = tx_mask;
++ esai_priv->rx_mask = rx_mask;
+
+ return 0;
+ }
+@@ -593,6 +587,7 @@ static int fsl_esai_trigger(struct snd_p
+ bool tx = substream->stream == SNDRV_PCM_STREAM_PLAYBACK;
+ u8 i, channels = substream->runtime->channels;
+ u32 pins = DIV_ROUND_UP(channels, esai_priv->slots);
++ u32 mask;
+
+ switch (cmd) {
+ case SNDRV_PCM_TRIGGER_START:
+@@ -605,15 +600,38 @@ static int fsl_esai_trigger(struct snd_p
+ for (i = 0; tx && i < channels; i++)
+ regmap_write(esai_priv->regmap, REG_ESAI_ETDR, 0x0);
+
++ /*
++ * When set the TE/RE in the end of enablement flow, there
++ * will be channel swap issue for multi data line case.
++ * In order to workaround this issue, we switch the bit
++ * enablement sequence to below sequence
++ * 1) clear the xSMB & xSMA: which is done in probe and
++ * stop state.
++ * 2) set TE/RE
++ * 3) set xSMB
++ * 4) set xSMA: xSMA is the last one in this flow, which
++ * will trigger esai to start.
++ */
+ regmap_update_bits(esai_priv->regmap, REG_ESAI_xCR(tx),
+ tx ? ESAI_xCR_TE_MASK : ESAI_xCR_RE_MASK,
+ tx ? ESAI_xCR_TE(pins) : ESAI_xCR_RE(pins));
++ mask = tx ? esai_priv->tx_mask : esai_priv->rx_mask;
++
++ regmap_update_bits(esai_priv->regmap, REG_ESAI_xSMB(tx),
++ ESAI_xSMB_xS_MASK, ESAI_xSMB_xS(mask));
++ regmap_update_bits(esai_priv->regmap, REG_ESAI_xSMA(tx),
++ ESAI_xSMA_xS_MASK, ESAI_xSMA_xS(mask));
++
+ break;
+ case SNDRV_PCM_TRIGGER_SUSPEND:
+ case SNDRV_PCM_TRIGGER_STOP:
+ case SNDRV_PCM_TRIGGER_PAUSE_PUSH:
+ regmap_update_bits(esai_priv->regmap, REG_ESAI_xCR(tx),
+ tx ? ESAI_xCR_TE_MASK : ESAI_xCR_RE_MASK, 0);
++ regmap_update_bits(esai_priv->regmap, REG_ESAI_xSMA(tx),
++ ESAI_xSMA_xS_MASK, 0);
++ regmap_update_bits(esai_priv->regmap, REG_ESAI_xSMB(tx),
++ ESAI_xSMB_xS_MASK, 0);
+
+ /* Disable and reset FIFO */
+ regmap_update_bits(esai_priv->regmap, REG_ESAI_xFCR(tx),
+@@ -903,6 +921,15 @@ static int fsl_esai_probe(struct platfor
+ return ret;
+ }
+
++ esai_priv->tx_mask = 0xFFFFFFFF;
++ esai_priv->rx_mask = 0xFFFFFFFF;
++
++ /* Clear the TSMA, TSMB, RSMA, RSMB */
++ regmap_write(esai_priv->regmap, REG_ESAI_TSMA, 0);
++ regmap_write(esai_priv->regmap, REG_ESAI_TSMB, 0);
++ regmap_write(esai_priv->regmap, REG_ESAI_RSMA, 0);
++ regmap_write(esai_priv->regmap, REG_ESAI_RSMB, 0);
++
+ ret = devm_snd_soc_register_component(&pdev->dev, &fsl_esai_component,
+ &fsl_esai_dai, 1);
+ if (ret) {
--- /dev/null
+From 0b3d6e6f2dd0a7b697b1aa8c167265908940624b Mon Sep 17 00:00:00 2001
+From: Greg Thelen <gthelen@google.com>
+Date: Fri, 5 Apr 2019 18:39:18 -0700
+Subject: mm: writeback: use exact memcg dirty counts
+
+From: Greg Thelen <gthelen@google.com>
+
+commit 0b3d6e6f2dd0a7b697b1aa8c167265908940624b upstream.
+
+Since commit a983b5ebee57 ("mm: memcontrol: fix excessive complexity in
+memory.stat reporting") memcg dirty and writeback counters are managed
+as:
+
+ 1) per-memcg per-cpu values in range of [-32..32]
+
+ 2) per-memcg atomic counter
+
+When a per-cpu counter cannot fit in [-32..32] it's flushed to the
+atomic. Stat readers only check the atomic. Thus readers such as
+balance_dirty_pages() may see a nontrivial error margin: 32 pages per
+cpu.
+
+Assuming 100 cpus:
+ 4k x86 page_size: 13 MiB error per memcg
+ 64k ppc page_size: 200 MiB error per memcg
+
+Considering that dirty+writeback are used together for some decisions the
+errors double.
+
+This inaccuracy can lead to undeserved oom kills. One nasty case is
+when all per-cpu counters hold positive values offsetting an atomic
+negative value (i.e. per_cpu[*]=32, atomic=n_cpu*-32).
+balance_dirty_pages() only consults the atomic and does not consider
+throttling the next n_cpu*32 dirty pages. If the file_lru is in the
+13..200 MiB range then there's absolutely no dirty throttling, which
+burdens vmscan with only dirty+writeback pages thus resorting to oom
+kill.
+
+It could be argued that tiny containers are not supported, but it's more
+subtle. It's the amount the space available for file lru that matters.
+If a container has memory.max-200MiB of non reclaimable memory, then it
+will also suffer such oom kills on a 100 cpu machine.
+
+The following test reliably ooms without this patch. This patch avoids
+oom kills.
+
+ $ cat test
+ mount -t cgroup2 none /dev/cgroup
+ cd /dev/cgroup
+ echo +io +memory > cgroup.subtree_control
+ mkdir test
+ cd test
+ echo 10M > memory.max
+ (echo $BASHPID > cgroup.procs && exec /memcg-writeback-stress /foo)
+ (echo $BASHPID > cgroup.procs && exec dd if=/dev/zero of=/foo bs=2M count=100)
+
+ $ cat memcg-writeback-stress.c
+ /*
+ * Dirty pages from all but one cpu.
+ * Clean pages from the non dirtying cpu.
+ * This is to stress per cpu counter imbalance.
+ * On a 100 cpu machine:
+ * - per memcg per cpu dirty count is 32 pages for each of 99 cpus
+ * - per memcg atomic is -99*32 pages
+ * - thus the complete dirty limit: sum of all counters 0
+ * - balance_dirty_pages() only sees atomic count -99*32 pages, which
+ * it max()s to 0.
+ * - So a workload can dirty -99*32 pages before balance_dirty_pages()
+ * cares.
+ */
+ #define _GNU_SOURCE
+ #include <err.h>
+ #include <fcntl.h>
+ #include <sched.h>
+ #include <stdlib.h>
+ #include <stdio.h>
+ #include <sys/stat.h>
+ #include <sys/sysinfo.h>
+ #include <sys/types.h>
+ #include <unistd.h>
+
+ static char *buf;
+ static int bufSize;
+
+ static void set_affinity(int cpu)
+ {
+ cpu_set_t affinity;
+
+ CPU_ZERO(&affinity);
+ CPU_SET(cpu, &affinity);
+ if (sched_setaffinity(0, sizeof(affinity), &affinity))
+ err(1, "sched_setaffinity");
+ }
+
+ static void dirty_on(int output_fd, int cpu)
+ {
+ int i, wrote;
+
+ set_affinity(cpu);
+ for (i = 0; i < 32; i++) {
+ for (wrote = 0; wrote < bufSize; ) {
+ int ret = write(output_fd, buf+wrote, bufSize-wrote);
+ if (ret == -1)
+ err(1, "write");
+ wrote += ret;
+ }
+ }
+ }
+
+ int main(int argc, char **argv)
+ {
+ int cpu, flush_cpu = 1, output_fd;
+ const char *output;
+
+ if (argc != 2)
+ errx(1, "usage: output_file");
+
+ output = argv[1];
+ bufSize = getpagesize();
+ buf = malloc(getpagesize());
+ if (buf == NULL)
+ errx(1, "malloc failed");
+
+ output_fd = open(output, O_CREAT|O_RDWR);
+ if (output_fd == -1)
+ err(1, "open(%s)", output);
+
+ for (cpu = 0; cpu < get_nprocs(); cpu++) {
+ if (cpu != flush_cpu)
+ dirty_on(output_fd, cpu);
+ }
+
+ set_affinity(flush_cpu);
+ if (fsync(output_fd))
+ err(1, "fsync(%s)", output);
+ if (close(output_fd))
+ err(1, "close(%s)", output);
+ free(buf);
+ }
+
+Make balance_dirty_pages() and wb_over_bg_thresh() work harder to
+collect exact per memcg counters. This avoids the aforementioned oom
+kills.
+
+This does not affect the overhead of memory.stat, which still reads the
+single atomic counter.
+
+Why not use percpu_counter? memcg already handles cpus going offline, so
+no need for that overhead from percpu_counter. And the percpu_counter
+spinlocks are more heavyweight than is required.
+
+It probably also makes sense to use exact dirty and writeback counters
+in memcg oom reports. But that is saved for later.
+
+Link: http://lkml.kernel.org/r/20190329174609.164344-1-gthelen@google.com
+Signed-off-by: Greg Thelen <gthelen@google.com>
+Reviewed-by: Roman Gushchin <guro@fb.com>
+Acked-by: Johannes Weiner <hannes@cmpxchg.org>
+Cc: Michal Hocko <mhocko@kernel.org>
+Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
+Cc: Tejun Heo <tj@kernel.org>
+Cc: <stable@vger.kernel.org> [4.16+]
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/linux/memcontrol.h | 5 ++++-
+ mm/memcontrol.c | 20 ++++++++++++++++++--
+ 2 files changed, 22 insertions(+), 3 deletions(-)
+
+--- a/include/linux/memcontrol.h
++++ b/include/linux/memcontrol.h
+@@ -488,7 +488,10 @@ struct mem_cgroup *lock_page_memcg(struc
+ void __unlock_page_memcg(struct mem_cgroup *memcg);
+ void unlock_page_memcg(struct page *page);
+
+-/* idx can be of type enum memcg_stat_item or node_stat_item */
++/*
++ * idx can be of type enum memcg_stat_item or node_stat_item.
++ * Keep in sync with memcg_exact_page_state().
++ */
+ static inline unsigned long memcg_page_state(struct mem_cgroup *memcg,
+ int idx)
+ {
+--- a/mm/memcontrol.c
++++ b/mm/memcontrol.c
+@@ -3671,6 +3671,22 @@ struct wb_domain *mem_cgroup_wb_domain(s
+ return &memcg->cgwb_domain;
+ }
+
++/*
++ * idx can be of type enum memcg_stat_item or node_stat_item.
++ * Keep in sync with memcg_exact_page().
++ */
++static unsigned long memcg_exact_page_state(struct mem_cgroup *memcg, int idx)
++{
++ long x = atomic_long_read(&memcg->stat[idx]);
++ int cpu;
++
++ for_each_online_cpu(cpu)
++ x += per_cpu_ptr(memcg->stat_cpu, cpu)->count[idx];
++ if (x < 0)
++ x = 0;
++ return x;
++}
++
+ /**
+ * mem_cgroup_wb_stats - retrieve writeback related stats from its memcg
+ * @wb: bdi_writeback in question
+@@ -3696,10 +3712,10 @@ void mem_cgroup_wb_stats(struct bdi_writ
+ struct mem_cgroup *memcg = mem_cgroup_from_css(wb->memcg_css);
+ struct mem_cgroup *parent;
+
+- *pdirty = memcg_page_state(memcg, NR_FILE_DIRTY);
++ *pdirty = memcg_exact_page_state(memcg, NR_FILE_DIRTY);
+
+ /* this should eventually include NR_UNSTABLE_NFS */
+- *pwriteback = memcg_page_state(memcg, NR_WRITEBACK);
++ *pwriteback = memcg_exact_page_state(memcg, NR_WRITEBACK);
+ *pfilepages = mem_cgroup_nr_lru_pages(memcg, (1 << LRU_INACTIVE_FILE) |
+ (1 << LRU_ACTIVE_FILE));
+ *pheadroom = PAGE_COUNTER_MAX;