5.3-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Tue, 8 Oct 2019 07:44:23 +0000 (09:44 +0200)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Tue, 8 Oct 2019 07:44:23 +0000 (09:44 +0200)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Tue, 8 Oct 2019 07:44:23 +0000 (09:44 +0200)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Tue, 8 Oct 2019 07:44:23 +0000 (09:44 +0200)
diff --git a/queue-5.3/asoc-define-a-set-of-dapm-pre-post-up-events.patch b/queue-5.3/asoc-define-a-set-of-dapm-pre-post-up-events.patch

new file mode 100644 (file)

index 0000000..1ebc789
--- /dev/null
+++ b/queue-5.3/asoc-define-a-set-of-dapm-pre-post-up-events.patch
@@ -0,0 +1,36 @@
+From cfc8f568aada98f9608a0a62511ca18d647613e2 Mon Sep 17 00:00:00 2001
+From: Oleksandr Suvorov <oleksandr.suvorov@toradex.com>
+Date: Fri, 19 Jul 2019 10:05:30 +0000
+Subject: ASoC: Define a set of DAPM pre/post-up events
+
+From: Oleksandr Suvorov <oleksandr.suvorov@toradex.com>
+
+commit cfc8f568aada98f9608a0a62511ca18d647613e2 upstream.
+
+Prepare to use SND_SOC_DAPM_PRE_POST_PMU definition to
+reduce coming code size and make it more readable.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Oleksandr Suvorov <oleksandr.suvorov@toradex.com>
+Reviewed-by: Marcel Ziswiler <marcel.ziswiler@toradex.com>
+Reviewed-by: Igor Opaniuk <igor.opaniuk@toradex.com>
+Reviewed-by: Fabio Estevam <festevam@gmail.com>
+Link: https://lore.kernel.org/r/20190719100524.23300-2-oleksandr.suvorov@toradex.com
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/sound/soc-dapm.h |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/include/sound/soc-dapm.h
++++ b/include/sound/soc-dapm.h
+@@ -353,6 +353,8 @@ struct device;
+ #define SND_SOC_DAPM_WILL_PMD   0x80    /* called at start of sequence */
+ #define SND_SOC_DAPM_PRE_POST_PMD \
+                               (SND_SOC_DAPM_PRE_PMD | SND_SOC_DAPM_POST_PMD)
++#define SND_SOC_DAPM_PRE_POST_PMU \
++                              (SND_SOC_DAPM_PRE_PMU | SND_SOC_DAPM_POST_PMU)
+ 
+ /* convenience event type detection */
+ #define SND_SOC_DAPM_EVENT_ON(e)      \
diff --git a/queue-5.3/asoc-sgtl5000-improve-vag-power-and-mute-control.patch b/queue-5.3/asoc-sgtl5000-improve-vag-power-and-mute-control.patch

new file mode 100644 (file)

index 0000000..159d31a
--- /dev/null
+++ b/queue-5.3/asoc-sgtl5000-improve-vag-power-and-mute-control.patch
@@ -0,0 +1,332 @@
+From b1f373a11d25fc9a5f7679c9b85799fe09b0dc4a Mon Sep 17 00:00:00 2001
+From: Oleksandr Suvorov <oleksandr.suvorov@toradex.com>
+Date: Fri, 19 Jul 2019 10:05:31 +0000
+Subject: ASoC: sgtl5000: Improve VAG power and mute control
+
+From: Oleksandr Suvorov <oleksandr.suvorov@toradex.com>
+
+commit b1f373a11d25fc9a5f7679c9b85799fe09b0dc4a upstream.
+
+VAG power control is improved to fit the manual [1]. This patch fixes as
+minimum one bug: if customer muxes Headphone to Line-In right after boot,
+the VAG power remains off that leads to poor sound quality from line-in.
+
+I.e. after boot:
+  - Connect sound source to Line-In jack;
+  - Connect headphone to HP jack;
+  - Run following commands:
+  $ amixer set 'Headphone' 80%
+  $ amixer set 'Headphone Mux' LINE_IN
+
+Change VAG power on/off control according to the following algorithm:
+  - turn VAG power ON on the 1st incoming event.
+  - keep it ON if there is any active VAG consumer (ADC/DAC/HP/Line-In).
+  - turn VAG power OFF when there is the latest consumer's pre-down event
+    come.
+  - always delay after VAG power OFF to avoid pop.
+  - delay after VAG power ON if the initiative consumer is Line-In, this
+    prevents pop during line-in muxing.
+
+According to the data sheet [1], to avoid any pops/clicks,
+the outputs should be muted during input/output
+routing changes.
+
+[1] https://www.nxp.com/docs/en/data-sheet/SGTL5000.pdf
+
+Cc: stable@vger.kernel.org
+Fixes: 9b34e6cc3bc2 ("ASoC: Add Freescale SGTL5000 codec support")
+Signed-off-by: Oleksandr Suvorov <oleksandr.suvorov@toradex.com>
+Reviewed-by: Marcel Ziswiler <marcel.ziswiler@toradex.com>
+Reviewed-by: Fabio Estevam <festevam@gmail.com>
+Reviewed-by: Cezary Rojewski <cezary.rojewski@intel.com>
+Link: https://lore.kernel.org/r/20190719100524.23300-3-oleksandr.suvorov@toradex.com
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ sound/soc/codecs/sgtl5000.c |  224 ++++++++++++++++++++++++++++++++++++++------
+ 1 file changed, 194 insertions(+), 30 deletions(-)
+
+--- a/sound/soc/codecs/sgtl5000.c
++++ b/sound/soc/codecs/sgtl5000.c
+@@ -31,6 +31,13 @@
+ #define SGTL5000_DAP_REG_OFFSET       0x0100
+ #define SGTL5000_MAX_REG_OFFSET       0x013A
+ 
++/* Delay for the VAG ramp up */
++#define SGTL5000_VAG_POWERUP_DELAY 500 /* ms */
++/* Delay for the VAG ramp down */
++#define SGTL5000_VAG_POWERDOWN_DELAY 500 /* ms */
++
++#define SGTL5000_OUTPUTS_MUTE (SGTL5000_HP_MUTE | SGTL5000_LINE_OUT_MUTE)
++
+ /* default value of sgtl5000 registers */
+ static const struct reg_default sgtl5000_reg_defaults[] = {
+       { SGTL5000_CHIP_DIG_POWER,              0x0000 },
+@@ -123,6 +130,13 @@ enum  {
+       I2S_SCLK_STRENGTH_HIGH,
+ };
+ 
++enum {
++      HP_POWER_EVENT,
++      DAC_POWER_EVENT,
++      ADC_POWER_EVENT,
++      LAST_POWER_EVENT = ADC_POWER_EVENT
++};
++
+ /* sgtl5000 private structure in codec */
+ struct sgtl5000_priv {
+       int sysclk;     /* sysclk rate */
+@@ -137,8 +151,109 @@ struct sgtl5000_priv {
+       u8 micbias_voltage;
+       u8 lrclk_strength;
+       u8 sclk_strength;
++      u16 mute_state[LAST_POWER_EVENT + 1];
+ };
+ 
++static inline int hp_sel_input(struct snd_soc_component *component)
++{
++      return (snd_soc_component_read32(component, SGTL5000_CHIP_ANA_CTRL) &
++              SGTL5000_HP_SEL_MASK) >> SGTL5000_HP_SEL_SHIFT;
++}
++
++static inline u16 mute_output(struct snd_soc_component *component,
++                            u16 mute_mask)
++{
++      u16 mute_reg = snd_soc_component_read32(component,
++                                            SGTL5000_CHIP_ANA_CTRL);
++
++      snd_soc_component_update_bits(component, SGTL5000_CHIP_ANA_CTRL,
++                          mute_mask, mute_mask);
++      return mute_reg;
++}
++
++static inline void restore_output(struct snd_soc_component *component,
++                                u16 mute_mask, u16 mute_reg)
++{
++      snd_soc_component_update_bits(component, SGTL5000_CHIP_ANA_CTRL,
++              mute_mask, mute_reg);
++}
++
++static void vag_power_on(struct snd_soc_component *component, u32 source)
++{
++      if (snd_soc_component_read32(component, SGTL5000_CHIP_ANA_POWER) &
++          SGTL5000_VAG_POWERUP)
++              return;
++
++      snd_soc_component_update_bits(component, SGTL5000_CHIP_ANA_POWER,
++                          SGTL5000_VAG_POWERUP, SGTL5000_VAG_POWERUP);
++
++      /* When VAG powering on to get local loop from Line-In, the sleep
++       * is required to avoid loud pop.
++       */
++      if (hp_sel_input(component) == SGTL5000_HP_SEL_LINE_IN &&
++          source == HP_POWER_EVENT)
++              msleep(SGTL5000_VAG_POWERUP_DELAY);
++}
++
++static int vag_power_consumers(struct snd_soc_component *component,
++                             u16 ana_pwr_reg, u32 source)
++{
++      int consumers = 0;
++
++      /* count dac/adc consumers unconditional */
++      if (ana_pwr_reg & SGTL5000_DAC_POWERUP)
++              consumers++;
++      if (ana_pwr_reg & SGTL5000_ADC_POWERUP)
++              consumers++;
++
++      /*
++       * If the event comes from HP and Line-In is selected,
++       * current action is 'DAC to be powered down'.
++       * As HP_POWERUP is not set when HP muxed to line-in,
++       * we need to keep VAG power ON.
++       */
++      if (source == HP_POWER_EVENT) {
++              if (hp_sel_input(component) == SGTL5000_HP_SEL_LINE_IN)
++                      consumers++;
++      } else {
++              if (ana_pwr_reg & SGTL5000_HP_POWERUP)
++                      consumers++;
++      }
++
++      return consumers;
++}
++
++static void vag_power_off(struct snd_soc_component *component, u32 source)
++{
++      u16 ana_pwr = snd_soc_component_read32(component,
++                                           SGTL5000_CHIP_ANA_POWER);
++
++      if (!(ana_pwr & SGTL5000_VAG_POWERUP))
++              return;
++
++      /*
++       * This function calls when any of VAG power consumers is disappearing.
++       * Thus, if there is more than one consumer at the moment, as minimum
++       * one consumer will definitely stay after the end of the current
++       * event.
++       * Don't clear VAG_POWERUP if 2 or more consumers of VAG present:
++       * - LINE_IN (for HP events) / HP (for DAC/ADC events)
++       * - DAC
++       * - ADC
++       * (the current consumer is disappearing right now)
++       */
++      if (vag_power_consumers(component, ana_pwr, source) >= 2)
++              return;
++
++      snd_soc_component_update_bits(component, SGTL5000_CHIP_ANA_POWER,
++              SGTL5000_VAG_POWERUP, 0);
++      /* In power down case, we need wait 400-1000 ms
++       * when VAG fully ramped down.
++       * As longer we wait, as smaller pop we've got.
++       */
++      msleep(SGTL5000_VAG_POWERDOWN_DELAY);
++}
++
+ /*
+  * mic_bias power on/off share the same register bits with
+  * output impedance of mic bias, when power on mic bias, we
+@@ -170,36 +285,46 @@ static int mic_bias_event(struct snd_soc
+       return 0;
+ }
+ 
+-/*
+- * As manual described, ADC/DAC only works when VAG powerup,
+- * So enabled VAG before ADC/DAC up.
+- * In power down case, we need wait 400ms when vag fully ramped down.
+- */
+-static int power_vag_event(struct snd_soc_dapm_widget *w,
+-      struct snd_kcontrol *kcontrol, int event)
++static int vag_and_mute_control(struct snd_soc_component *component,
++                               int event, int event_source)
+ {
+-      struct snd_soc_component *component = snd_soc_dapm_to_component(w->dapm);
+-      const u32 mask = SGTL5000_DAC_POWERUP | SGTL5000_ADC_POWERUP;
++      static const u16 mute_mask[] = {
++              /*
++               * Mask for HP_POWER_EVENT.
++               * Muxing Headphones have to be wrapped with mute/unmute
++               * headphones only.
++               */
++              SGTL5000_HP_MUTE,
++              /*
++               * Masks for DAC_POWER_EVENT/ADC_POWER_EVENT.
++               * Muxing DAC or ADC block have to wrapped with mute/unmute
++               * both headphones and line-out.
++               */
++              SGTL5000_OUTPUTS_MUTE,
++              SGTL5000_OUTPUTS_MUTE
++      };
++
++      struct sgtl5000_priv *sgtl5000 =
++              snd_soc_component_get_drvdata(component);
+ 
+       switch (event) {
++      case SND_SOC_DAPM_PRE_PMU:
++              sgtl5000->mute_state[event_source] =
++                      mute_output(component, mute_mask[event_source]);
++              break;
+       case SND_SOC_DAPM_POST_PMU:
+-              snd_soc_component_update_bits(component, SGTL5000_CHIP_ANA_POWER,
+-                      SGTL5000_VAG_POWERUP, SGTL5000_VAG_POWERUP);
+-              msleep(400);
++              vag_power_on(component, event_source);
++              restore_output(component, mute_mask[event_source],
++                             sgtl5000->mute_state[event_source]);
+               break;
+-
+       case SND_SOC_DAPM_PRE_PMD:
+-              /*
+-               * Don't clear VAG_POWERUP, when both DAC and ADC are
+-               * operational to prevent inadvertently starving the
+-               * other one of them.
+-               */
+-              if ((snd_soc_component_read32(component, SGTL5000_CHIP_ANA_POWER) &
+-                              mask) != mask) {
+-                      snd_soc_component_update_bits(component, SGTL5000_CHIP_ANA_POWER,
+-                              SGTL5000_VAG_POWERUP, 0);
+-                      msleep(400);
+-              }
++              sgtl5000->mute_state[event_source] =
++                      mute_output(component, mute_mask[event_source]);
++              vag_power_off(component, event_source);
++              break;
++      case SND_SOC_DAPM_POST_PMD:
++              restore_output(component, mute_mask[event_source],
++                             sgtl5000->mute_state[event_source]);
+               break;
+       default:
+               break;
+@@ -208,6 +333,41 @@ static int power_vag_event(struct snd_so
+       return 0;
+ }
+ 
++/*
++ * Mute Headphone when power it up/down.
++ * Control VAG power on HP power path.
++ */
++static int headphone_pga_event(struct snd_soc_dapm_widget *w,
++      struct snd_kcontrol *kcontrol, int event)
++{
++      struct snd_soc_component *component =
++              snd_soc_dapm_to_component(w->dapm);
++
++      return vag_and_mute_control(component, event, HP_POWER_EVENT);
++}
++
++/* As manual describes, ADC/DAC powering up/down requires
++ * to mute outputs to avoid pops.
++ * Control VAG power on ADC/DAC power path.
++ */
++static int adc_updown_depop(struct snd_soc_dapm_widget *w,
++      struct snd_kcontrol *kcontrol, int event)
++{
++      struct snd_soc_component *component =
++              snd_soc_dapm_to_component(w->dapm);
++
++      return vag_and_mute_control(component, event, ADC_POWER_EVENT);
++}
++
++static int dac_updown_depop(struct snd_soc_dapm_widget *w,
++      struct snd_kcontrol *kcontrol, int event)
++{
++      struct snd_soc_component *component =
++              snd_soc_dapm_to_component(w->dapm);
++
++      return vag_and_mute_control(component, event, DAC_POWER_EVENT);
++}
++
+ /* input sources for ADC */
+ static const char *adc_mux_text[] = {
+       "MIC_IN", "LINE_IN"
+@@ -280,7 +440,10 @@ static const struct snd_soc_dapm_widget
+                           mic_bias_event,
+                           SND_SOC_DAPM_POST_PMU | SND_SOC_DAPM_PRE_PMD),
+ 
+-      SND_SOC_DAPM_PGA("HP", SGTL5000_CHIP_ANA_POWER, 4, 0, NULL, 0),
++      SND_SOC_DAPM_PGA_E("HP", SGTL5000_CHIP_ANA_POWER, 4, 0, NULL, 0,
++                         headphone_pga_event,
++                         SND_SOC_DAPM_PRE_POST_PMU |
++                         SND_SOC_DAPM_PRE_POST_PMD),
+       SND_SOC_DAPM_PGA("LO", SGTL5000_CHIP_ANA_POWER, 0, 0, NULL, 0),
+ 
+       SND_SOC_DAPM_MUX("Capture Mux", SND_SOC_NOPM, 0, 0, &adc_mux),
+@@ -301,11 +464,12 @@ static const struct snd_soc_dapm_widget
+                               0, SGTL5000_CHIP_DIG_POWER,
+                               1, 0),
+ 
+-      SND_SOC_DAPM_ADC("ADC", "Capture", SGTL5000_CHIP_ANA_POWER, 1, 0),
+-      SND_SOC_DAPM_DAC("DAC", "Playback", SGTL5000_CHIP_ANA_POWER, 3, 0),
+-
+-      SND_SOC_DAPM_PRE("VAG_POWER_PRE", power_vag_event),
+-      SND_SOC_DAPM_POST("VAG_POWER_POST", power_vag_event),
++      SND_SOC_DAPM_ADC_E("ADC", "Capture", SGTL5000_CHIP_ANA_POWER, 1, 0,
++                         adc_updown_depop, SND_SOC_DAPM_PRE_POST_PMU |
++                         SND_SOC_DAPM_PRE_POST_PMD),
++      SND_SOC_DAPM_DAC_E("DAC", "Playback", SGTL5000_CHIP_ANA_POWER, 3, 0,
++                         dac_updown_depop, SND_SOC_DAPM_PRE_POST_PMU |
++                         SND_SOC_DAPM_PRE_POST_PMD),
+ };
+ 
+ /* routes for sgtl5000 */
diff --git a/queue-5.3/can-mcp251x-mcp251x_hw_reset-allow-more-time-after-a-reset.patch b/queue-5.3/can-mcp251x-mcp251x_hw_reset-allow-more-time-after-a-reset.patch

new file mode 100644 (file)

index 0000000..9f58cd8
--- /dev/null
+++ b/queue-5.3/can-mcp251x-mcp251x_hw_reset-allow-more-time-after-a-reset.patch
@@ -0,0 +1,57 @@
+From d84ea2123f8d27144e3f4d58cd88c9c6ddc799de Mon Sep 17 00:00:00 2001
+From: Marc Kleine-Budde <mkl@pengutronix.de>
+Date: Tue, 13 Aug 2019 16:01:02 +0200
+Subject: can: mcp251x: mcp251x_hw_reset(): allow more time after a reset
+
+From: Marc Kleine-Budde <mkl@pengutronix.de>
+
+commit d84ea2123f8d27144e3f4d58cd88c9c6ddc799de upstream.
+
+Some boards take longer than 5ms to power up after a reset, so allow
+some retries attempts before giving up.
+
+Fixes: ff06d611a31c ("can: mcp251x: Improve mcp251x_hw_reset()")
+Cc: linux-stable <stable@vger.kernel.org>
+Tested-by: Sean Nyekjaer <sean@geanix.com>
+Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/net/can/spi/mcp251x.c |   19 ++++++++++++++-----
+ 1 file changed, 14 insertions(+), 5 deletions(-)
+
+--- a/drivers/net/can/spi/mcp251x.c
++++ b/drivers/net/can/spi/mcp251x.c
+@@ -612,7 +612,7 @@ static int mcp251x_setup(struct net_devi
+ static int mcp251x_hw_reset(struct spi_device *spi)
+ {
+       struct mcp251x_priv *priv = spi_get_drvdata(spi);
+-      u8 reg;
++      unsigned long timeout;
+       int ret;
+ 
+       /* Wait for oscillator startup timer after power up */
+@@ -626,10 +626,19 @@ static int mcp251x_hw_reset(struct spi_d
+       /* Wait for oscillator startup timer after reset */
+       mdelay(MCP251X_OST_DELAY_MS);
+ 
+-      reg = mcp251x_read_reg(spi, CANSTAT);
+-      if ((reg & CANCTRL_REQOP_MASK) != CANCTRL_REQOP_CONF)
+-              return -ENODEV;
+-
++      /* Wait for reset to finish */
++      timeout = jiffies + HZ;
++      while ((mcp251x_read_reg(spi, CANSTAT) & CANCTRL_REQOP_MASK) !=
++             CANCTRL_REQOP_CONF) {
++              usleep_range(MCP251X_OST_DELAY_MS * 1000,
++                           MCP251X_OST_DELAY_MS * 1000 * 2);
++
++              if (time_after(jiffies, timeout)) {
++                      dev_err(&spi->dev,
++                              "MCP251x didn't enter in conf mode after reset\n");
++                      return -EBUSY;
++              }
++      }
+       return 0;
+ }
+ 
diff --git a/queue-5.3/crypto-caam-fix-concurrency-issue-in-givencrypt-descriptor.patch b/queue-5.3/crypto-caam-fix-concurrency-issue-in-givencrypt-descriptor.patch

new file mode 100644 (file)

index 0000000..2e2f882
--- /dev/null
+++ b/queue-5.3/crypto-caam-fix-concurrency-issue-in-givencrypt-descriptor.patch
@@ -0,0 +1,95 @@
+From 48f89d2a2920166c35b1c0b69917dbb0390ebec7 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Horia=20Geant=C4=83?= <horia.geanta@nxp.com>
+Date: Tue, 30 Jul 2019 08:48:33 +0300
+Subject: crypto: caam - fix concurrency issue in givencrypt descriptor
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Horia Geantă <horia.geanta@nxp.com>
+
+commit 48f89d2a2920166c35b1c0b69917dbb0390ebec7 upstream.
+
+IV transfer from ofifo to class2 (set up at [29][30]) is not guaranteed
+to be scheduled before the data transfer from ofifo to external memory
+(set up at [38]:
+
+[29] 10FA0004           ld: ind-nfifo (len=4) imm
+[30] 81F00010               <nfifo_entry: ofifo->class2 type=msg len=16>
+[31] 14820004           ld: ccb2-datasz len=4 offs=0 imm
+[32] 00000010               data:0x00000010
+[33] 8210010D    operation: cls1-op aes cbc init-final enc
+[34] A8080B04         math: (seqin + math0)->vseqout len=4
+[35] 28000010    seqfifold: skip len=16
+[36] A8080A04         math: (seqin + math0)->vseqin len=4
+[37] 2F1E0000    seqfifold: both msg1->2-last2-last1 len=vseqinsz
+[38] 69300000   seqfifostr: msg len=vseqoutsz
+[39] 5C20000C      seqstr: ccb2 ctx len=12 offs=0
+
+If ofifo -> external memory transfer happens first, DECO will hang
+(issuing a Watchdog Timeout error, if WDOG is enabled) waiting for
+data availability in ofifo for the ofifo -> c2 ififo transfer.
+
+Make sure IV transfer happens first by waiting for all CAAM internal
+transfers to end before starting payload transfer.
+
+New descriptor with jump command inserted at [37]:
+
+[..]
+[36] A8080A04         math: (seqin + math0)->vseqin len=4
+[37] A1000401         jump: jsl1 all-match[!nfifopend] offset=[01] local->[38]
+[38] 2F1E0000    seqfifold: both msg1->2-last2-last1 len=vseqinsz
+[39] 69300000   seqfifostr: msg len=vseqoutsz
+[40] 5C20000C      seqstr: ccb2 ctx len=12 offs=0
+
+[Note: the issue is present in the descriptor from the very beginning
+(cf. Fixes tag). However I've marked it v4.19+ since it's the oldest
+maintained kernel that the patch applies clean against.]
+
+Cc: <stable@vger.kernel.org> # v4.19+
+Fixes: 1acebad3d8db8 ("crypto: caam - faster aead implementation")
+Signed-off-by: Horia Geantă <horia.geanta@nxp.com>
+Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/crypto/caam/caamalg_desc.c |    9 +++++++++
+ drivers/crypto/caam/caamalg_desc.h |    2 +-
+ 2 files changed, 10 insertions(+), 1 deletion(-)
+
+--- a/drivers/crypto/caam/caamalg_desc.c
++++ b/drivers/crypto/caam/caamalg_desc.c
+@@ -503,6 +503,7 @@ void cnstr_shdsc_aead_givencap(u32 * con
+                              const bool is_qi, int era)
+ {
+       u32 geniv, moveiv;
++      u32 *wait_cmd;
+ 
+       /* Note: Context registers are saved. */
+       init_sh_desc_key_aead(desc, cdata, adata, is_rfc3686, nonce, era);
+@@ -598,6 +599,14 @@ copy_iv:
+ 
+       /* Will read cryptlen */
+       append_math_add(desc, VARSEQINLEN, SEQINLEN, REG0, CAAM_CMD_SZ);
++
++      /*
++       * Wait for IV transfer (ofifo -> class2) to finish before starting
++       * ciphertext transfer (ofifo -> external memory).
++       */
++      wait_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL | JUMP_COND_NIFP);
++      set_jump_tgt_here(desc, wait_cmd);
++
+       append_seq_fifo_load(desc, 0, FIFOLD_CLASS_BOTH | KEY_VLF |
+                            FIFOLD_TYPE_MSG1OUT2 | FIFOLD_TYPE_LASTBOTH);
+       append_seq_fifo_store(desc, 0, FIFOST_TYPE_MESSAGE_DATA | KEY_VLF);
+--- a/drivers/crypto/caam/caamalg_desc.h
++++ b/drivers/crypto/caam/caamalg_desc.h
+@@ -12,7 +12,7 @@
+ #define DESC_AEAD_BASE                        (4 * CAAM_CMD_SZ)
+ #define DESC_AEAD_ENC_LEN             (DESC_AEAD_BASE + 11 * CAAM_CMD_SZ)
+ #define DESC_AEAD_DEC_LEN             (DESC_AEAD_BASE + 15 * CAAM_CMD_SZ)
+-#define DESC_AEAD_GIVENC_LEN          (DESC_AEAD_ENC_LEN + 7 * CAAM_CMD_SZ)
++#define DESC_AEAD_GIVENC_LEN          (DESC_AEAD_ENC_LEN + 8 * CAAM_CMD_SZ)
+ #define DESC_QI_AEAD_ENC_LEN          (DESC_AEAD_ENC_LEN + 3 * CAAM_CMD_SZ)
+ #define DESC_QI_AEAD_DEC_LEN          (DESC_AEAD_DEC_LEN + 3 * CAAM_CMD_SZ)
+ #define DESC_QI_AEAD_GIVENC_LEN               (DESC_AEAD_GIVENC_LEN + 3 * CAAM_CMD_SZ)
diff --git a/queue-5.3/crypto-caam-qi-fix-error-handling-in-ern-handler.patch b/queue-5.3/crypto-caam-qi-fix-error-handling-in-ern-handler.patch

new file mode 100644 (file)

index 0000000..c42aba9
--- /dev/null
+++ b/queue-5.3/crypto-caam-qi-fix-error-handling-in-ern-handler.patch
@@ -0,0 +1,73 @@
+From 51fab3d73054ca5b06b26e20edac0486b052c6f4 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Horia=20Geant=C4=83?= <horia.geanta@nxp.com>
+Date: Wed, 31 Jul 2019 16:08:02 +0300
+Subject: crypto: caam/qi - fix error handling in ERN handler
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Horia Geantă <horia.geanta@nxp.com>
+
+commit 51fab3d73054ca5b06b26e20edac0486b052c6f4 upstream.
+
+ERN handler calls the caam/qi frontend "done" callback with a status
+of -EIO. This is incorrect, since the callback expects a status value
+meaningful for the crypto engine - hence the cryptic messages
+like the one below:
+platform caam_qi: 15: unknown error source
+
+Fix this by providing the callback with:
+-the status returned by the crypto engine (fd[status]) in case
+it contains an error, OR
+-a QI "No error" code otherwise; this will trigger the message:
+platform caam_qi: 50000000: Queue Manager Interface: No error
+which is fine, since QMan driver provides details about the cause of
+failure
+
+Cc: <stable@vger.kernel.org> # v5.1+
+Fixes: 67c2315def06 ("crypto: caam - add Queue Interface (QI) backend support")
+Signed-off-by: Horia Geantă <horia.geanta@nxp.com>
+Reviewed-by: Iuliana Prodan <iuliana.prodan@nxp.com>
+Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/crypto/caam/error.c |    1 +
+ drivers/crypto/caam/qi.c    |    5 ++++-
+ drivers/crypto/caam/regs.h  |    1 +
+ 3 files changed, 6 insertions(+), 1 deletion(-)
+
+--- a/drivers/crypto/caam/error.c
++++ b/drivers/crypto/caam/error.c
+@@ -118,6 +118,7 @@ static const struct {
+       u8 value;
+       const char *error_text;
+ } qi_error_list[] = {
++      { 0x00, "No error" },
+       { 0x1F, "Job terminated by FQ or ICID flush" },
+       { 0x20, "FD format error"},
+       { 0x21, "FD command format error"},
+--- a/drivers/crypto/caam/qi.c
++++ b/drivers/crypto/caam/qi.c
+@@ -163,7 +163,10 @@ static void caam_fq_ern_cb(struct qman_p
+       dma_unmap_single(drv_req->drv_ctx->qidev, qm_fd_addr(fd),
+                        sizeof(drv_req->fd_sgt), DMA_BIDIRECTIONAL);
+ 
+-      drv_req->cbk(drv_req, -EIO);
++      if (fd->status)
++              drv_req->cbk(drv_req, be32_to_cpu(fd->status));
++      else
++              drv_req->cbk(drv_req, JRSTA_SSRC_QI);
+ }
+ 
+ static struct qman_fq *create_caam_req_fq(struct device *qidev,
+--- a/drivers/crypto/caam/regs.h
++++ b/drivers/crypto/caam/regs.h
+@@ -641,6 +641,7 @@ struct caam_job_ring {
+ #define JRSTA_SSRC_CCB_ERROR        0x20000000
+ #define JRSTA_SSRC_JUMP_HALT_USER   0x30000000
+ #define JRSTA_SSRC_DECO             0x40000000
++#define JRSTA_SSRC_QI               0x50000000
+ #define JRSTA_SSRC_JRERROR          0x60000000
+ #define JRSTA_SSRC_JUMP_HALT_CC     0x70000000
+ 
diff --git a/queue-5.3/crypto-cavium-zip-add-missing-single_release.patch b/queue-5.3/crypto-cavium-zip-add-missing-single_release.patch

new file mode 100644 (file)

index 0000000..69a166e
--- /dev/null
+++ b/queue-5.3/crypto-cavium-zip-add-missing-single_release.patch
@@ -0,0 +1,48 @@
+From c552ffb5c93d9d65aaf34f5f001c4e7e8484ced1 Mon Sep 17 00:00:00 2001
+From: Wei Yongjun <weiyongjun1@huawei.com>
+Date: Wed, 4 Sep 2019 14:18:09 +0000
+Subject: crypto: cavium/zip - Add missing single_release()
+
+From: Wei Yongjun <weiyongjun1@huawei.com>
+
+commit c552ffb5c93d9d65aaf34f5f001c4e7e8484ced1 upstream.
+
+When using single_open() for opening, single_release() should be
+used instead of seq_release(), otherwise there is a memory leak.
+
+Fixes: 09ae5d37e093 ("crypto: zip - Add Compression/Decompression statistics")
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Wei Yongjun <weiyongjun1@huawei.com>
+Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/crypto/cavium/zip/zip_main.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/drivers/crypto/cavium/zip/zip_main.c
++++ b/drivers/crypto/cavium/zip/zip_main.c
+@@ -593,6 +593,7 @@ static const struct file_operations zip_
+       .owner = THIS_MODULE,
+       .open  = zip_stats_open,
+       .read  = seq_read,
++      .release = single_release,
+ };
+ 
+ static int zip_clear_open(struct inode *inode, struct file *file)
+@@ -604,6 +605,7 @@ static const struct file_operations zip_
+       .owner = THIS_MODULE,
+       .open  = zip_clear_open,
+       .read  = seq_read,
++      .release = single_release,
+ };
+ 
+ static int zip_regs_open(struct inode *inode, struct file *file)
+@@ -615,6 +617,7 @@ static const struct file_operations zip_
+       .owner = THIS_MODULE,
+       .open  = zip_regs_open,
+       .read  = seq_read,
++      .release = single_release,
+ };
+ 
+ /* Root directory for thunderx_zip debugfs entry */
diff --git a/queue-5.3/crypto-ccree-account-for-tee-not-ready-to-report.patch b/queue-5.3/crypto-ccree-account-for-tee-not-ready-to-report.patch

new file mode 100644 (file)

index 0000000..4602d6c
--- /dev/null
+++ b/queue-5.3/crypto-ccree-account-for-tee-not-ready-to-report.patch
@@ -0,0 +1,42 @@
+From 76a95bd8f9e10cade9c4c8df93b5c20ff45dc0f5 Mon Sep 17 00:00:00 2001
+From: Gilad Ben-Yossef <gilad@benyossef.com>
+Date: Tue, 2 Jul 2019 14:39:19 +0300
+Subject: crypto: ccree - account for TEE not ready to report
+
+From: Gilad Ben-Yossef <gilad@benyossef.com>
+
+commit 76a95bd8f9e10cade9c4c8df93b5c20ff45dc0f5 upstream.
+
+When ccree driver runs it checks the state of the Trusted Execution
+Environment CryptoCell driver before proceeding. We did not account
+for cases where the TEE side is not ready or not available at all.
+Fix it by only considering TEE error state after sync with the TEE
+side driver.
+
+Signed-off-by: Gilad Ben-Yossef <gilad@benyossef.com>
+Fixes: ab8ec9658f5a ("crypto: ccree - add FIPS support")
+CC: stable@vger.kernel.org # v4.17+
+Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/crypto/ccree/cc_fips.c |    8 +++++++-
+ 1 file changed, 7 insertions(+), 1 deletion(-)
+
+--- a/drivers/crypto/ccree/cc_fips.c
++++ b/drivers/crypto/ccree/cc_fips.c
+@@ -21,7 +21,13 @@ static bool cc_get_tee_fips_status(struc
+       u32 reg;
+ 
+       reg = cc_ioread(drvdata, CC_REG(GPR_HOST));
+-      return (reg == (CC_FIPS_SYNC_TEE_STATUS | CC_FIPS_SYNC_MODULE_OK));
++      /* Did the TEE report status? */
++      if (reg & CC_FIPS_SYNC_TEE_STATUS)
++              /* Yes. Is it OK? */
++              return (reg & CC_FIPS_SYNC_MODULE_OK);
++
++      /* No. It's either not in use or will be reported later */
++      return true;
+ }
+ 
+ /*
diff --git a/queue-5.3/crypto-ccree-use-the-full-crypt-length-value.patch b/queue-5.3/crypto-ccree-use-the-full-crypt-length-value.patch

new file mode 100644 (file)

index 0000000..a93338b
--- /dev/null
+++ b/queue-5.3/crypto-ccree-use-the-full-crypt-length-value.patch
@@ -0,0 +1,34 @@
+From 7a4be6c113c1f721818d1e3722a9015fe393295c Mon Sep 17 00:00:00 2001
+From: Gilad Ben-Yossef <gilad@benyossef.com>
+Date: Mon, 29 Jul 2019 13:40:18 +0300
+Subject: crypto: ccree - use the full crypt length value
+
+From: Gilad Ben-Yossef <gilad@benyossef.com>
+
+commit 7a4be6c113c1f721818d1e3722a9015fe393295c upstream.
+
+In case of AEAD decryption verifcation error we were using the
+wrong value to zero out the plaintext buffer leaving the end of
+the buffer with the false plaintext.
+
+Signed-off-by: Gilad Ben-Yossef <gilad@benyossef.com>
+Fixes: ff27e85a85bb ("crypto: ccree - add AEAD support")
+CC: stable@vger.kernel.org # v4.17+
+Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/crypto/ccree/cc_aead.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/crypto/ccree/cc_aead.c
++++ b/drivers/crypto/ccree/cc_aead.c
+@@ -236,7 +236,7 @@ static void cc_aead_complete(struct devi
+                       /* In case of payload authentication failure, MUST NOT
+                        * revealed the decrypted message --> zero its memory.
+                        */
+-                      cc_zero_sgl(areq->dst, areq_ctx->cryptlen);
++                      cc_zero_sgl(areq->dst, areq->cryptlen);
+                       err = -EBADMSG;
+               }
+       } else { /*ENCRYPT*/
diff --git a/queue-5.3/crypto-qat-silence-smp_processor_id-warning.patch b/queue-5.3/crypto-qat-silence-smp_processor_id-warning.patch

new file mode 100644 (file)

index 0000000..569785b
--- /dev/null
+++ b/queue-5.3/crypto-qat-silence-smp_processor_id-warning.patch
@@ -0,0 +1,68 @@
+From 1b82feb6c5e1996513d0fb0bbb475417088b4954 Mon Sep 17 00:00:00 2001
+From: Alexander Sverdlin <alexander.sverdlin@nokia.com>
+Date: Tue, 23 Jul 2019 07:24:01 +0000
+Subject: crypto: qat - Silence smp_processor_id() warning
+
+From: Alexander Sverdlin <alexander.sverdlin@nokia.com>
+
+commit 1b82feb6c5e1996513d0fb0bbb475417088b4954 upstream.
+
+It seems that smp_processor_id() is only used for a best-effort
+load-balancing, refer to qat_crypto_get_instance_node(). It's not feasible
+to disable preemption for the duration of the crypto requests. Therefore,
+just silence the warning. This commit is similar to e7a9b05ca4
+("crypto: cavium - Fix smp_processor_id() warnings").
+
+Silences the following splat:
+BUG: using smp_processor_id() in preemptible [00000000] code: cryptomgr_test/2904
+caller is qat_alg_ablkcipher_setkey+0x300/0x4a0 [intel_qat]
+CPU: 1 PID: 2904 Comm: cryptomgr_test Tainted: P           O    4.14.69 #1
+...
+Call Trace:
+ dump_stack+0x5f/0x86
+ check_preemption_disabled+0xd3/0xe0
+ qat_alg_ablkcipher_setkey+0x300/0x4a0 [intel_qat]
+ skcipher_setkey_ablkcipher+0x2b/0x40
+ __test_skcipher+0x1f3/0xb20
+ ? cpumask_next_and+0x26/0x40
+ ? find_busiest_group+0x10e/0x9d0
+ ? preempt_count_add+0x49/0xa0
+ ? try_module_get+0x61/0xf0
+ ? crypto_mod_get+0x15/0x30
+ ? __kmalloc+0x1df/0x1f0
+ ? __crypto_alloc_tfm+0x116/0x180
+ ? crypto_skcipher_init_tfm+0xa6/0x180
+ ? crypto_create_tfm+0x4b/0xf0
+ test_skcipher+0x21/0xa0
+ alg_test_skcipher+0x3f/0xa0
+ alg_test.part.6+0x126/0x2a0
+ ? finish_task_switch+0x21b/0x260
+ ? __schedule+0x1e9/0x800
+ ? __wake_up_common+0x8d/0x140
+ cryptomgr_test+0x40/0x50
+ kthread+0xff/0x130
+ ? cryptomgr_notify+0x540/0x540
+ ? kthread_create_on_node+0x70/0x70
+ ret_from_fork+0x24/0x50
+
+Fixes: ed8ccaef52 ("crypto: qat - Add support for SRIOV")
+Cc: stable@vger.kernel.org
+Signed-off-by: Alexander Sverdlin <alexander.sverdlin@nokia.com>
+Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/crypto/qat/qat_common/adf_common_drv.h |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/crypto/qat/qat_common/adf_common_drv.h
++++ b/drivers/crypto/qat/qat_common/adf_common_drv.h
+@@ -95,7 +95,7 @@ struct service_hndl {
+ 
+ static inline int get_current_node(void)
+ {
+-      return topology_physical_package_id(smp_processor_id());
++      return topology_physical_package_id(raw_smp_processor_id());
+ }
+ 
+ int adf_service_register(struct service_hndl *service);
diff --git a/queue-5.3/crypto-skcipher-unmap-pages-after-an-external-error.patch b/queue-5.3/crypto-skcipher-unmap-pages-after-an-external-error.patch

new file mode 100644 (file)

index 0000000..499a8b5
--- /dev/null
+++ b/queue-5.3/crypto-skcipher-unmap-pages-after-an-external-error.patch
@@ -0,0 +1,121 @@
+From 0ba3c026e685573bd3534c17e27da7c505ac99c4 Mon Sep 17 00:00:00 2001
+From: Herbert Xu <herbert@gondor.apana.org.au>
+Date: Fri, 6 Sep 2019 13:13:06 +1000
+Subject: crypto: skcipher - Unmap pages after an external error
+
+From: Herbert Xu <herbert@gondor.apana.org.au>
+
+commit 0ba3c026e685573bd3534c17e27da7c505ac99c4 upstream.
+
+skcipher_walk_done may be called with an error by internal or
+external callers.  For those internal callers we shouldn't unmap
+pages but for external callers we must unmap any pages that are
+in use.
+
+This patch distinguishes between the two cases by checking whether
+walk->nbytes is zero or not.  For internal callers, we now set
+walk->nbytes to zero prior to the call.  For external callers,
+walk->nbytes has always been non-zero (as zero is used to indicate
+the termination of a walk).
+
+Reported-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
+Fixes: 5cde0af2a982 ("[CRYPTO] cipher: Added block cipher type")
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
+Tested-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
+Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ crypto/skcipher.c |   42 +++++++++++++++++++++++-------------------
+ 1 file changed, 23 insertions(+), 19 deletions(-)
+
+--- a/crypto/skcipher.c
++++ b/crypto/skcipher.c
+@@ -90,7 +90,7 @@ static inline u8 *skcipher_get_spot(u8 *
+       return max(start, end_page);
+ }
+ 
+-static void skcipher_done_slow(struct skcipher_walk *walk, unsigned int bsize)
++static int skcipher_done_slow(struct skcipher_walk *walk, unsigned int bsize)
+ {
+       u8 *addr;
+ 
+@@ -98,19 +98,21 @@ static void skcipher_done_slow(struct sk
+       addr = skcipher_get_spot(addr, bsize);
+       scatterwalk_copychunks(addr, &walk->out, bsize,
+                              (walk->flags & SKCIPHER_WALK_PHYS) ? 2 : 1);
++      return 0;
+ }
+ 
+ int skcipher_walk_done(struct skcipher_walk *walk, int err)
+ {
+-      unsigned int n; /* bytes processed */
+-      bool more;
++      unsigned int n = walk->nbytes;
++      unsigned int nbytes = 0;
+ 
+-      if (unlikely(err < 0))
++      if (!n)
+               goto finish;
+ 
+-      n = walk->nbytes - err;
+-      walk->total -= n;
+-      more = (walk->total != 0);
++      if (likely(err >= 0)) {
++              n -= err;
++              nbytes = walk->total - n;
++      }
+ 
+       if (likely(!(walk->flags & (SKCIPHER_WALK_PHYS |
+                                   SKCIPHER_WALK_SLOW |
+@@ -126,7 +128,7 @@ unmap_src:
+               memcpy(walk->dst.virt.addr, walk->page, n);
+               skcipher_unmap_dst(walk);
+       } else if (unlikely(walk->flags & SKCIPHER_WALK_SLOW)) {
+-              if (err) {
++              if (err > 0) {
+                       /*
+                        * Didn't process all bytes.  Either the algorithm is
+                        * broken, or this was the last step and it turned out
+@@ -134,27 +136,29 @@ unmap_src:
+                        * the algorithm requires it.
+                        */
+                       err = -EINVAL;
+-                      goto finish;
+-              }
+-              skcipher_done_slow(walk, n);
+-              goto already_advanced;
++                      nbytes = 0;
++              } else
++                      n = skcipher_done_slow(walk, n);
+       }
+ 
++      if (err > 0)
++              err = 0;
++
++      walk->total = nbytes;
++      walk->nbytes = 0;
++
+       scatterwalk_advance(&walk->in, n);
+       scatterwalk_advance(&walk->out, n);
+-already_advanced:
+-      scatterwalk_done(&walk->in, 0, more);
+-      scatterwalk_done(&walk->out, 1, more);
++      scatterwalk_done(&walk->in, 0, nbytes);
++      scatterwalk_done(&walk->out, 1, nbytes);
+ 
+-      if (more) {
++      if (nbytes) {
+               crypto_yield(walk->flags & SKCIPHER_WALK_SLEEP ?
+                            CRYPTO_TFM_REQ_MAY_SLEEP : 0);
+               return skcipher_walk_next(walk);
+       }
+-      err = 0;
+-finish:
+-      walk->nbytes = 0;
+ 
++finish:
+       /* Short-circuit for the common/fast path. */
+       if (!((unsigned long)walk->buffer | (unsigned long)walk->page))
+               goto out;
diff --git a/queue-5.3/kvm-ppc-book3s-enable-xive-native-capability-only-if-opal-has-required-functions.patch b/queue-5.3/kvm-ppc-book3s-enable-xive-native-capability-only-if-opal-has-required-functions.patch

new file mode 100644 (file)

index 0000000..90aa872
--- /dev/null
+++ b/queue-5.3/kvm-ppc-book3s-enable-xive-native-capability-only-if-opal-has-required-functions.patch
@@ -0,0 +1,122 @@
+From 2ad7a27deaf6d78545d97ab80874584f6990360e Mon Sep 17 00:00:00 2001
+From: Paul Mackerras <paulus@ozlabs.org>
+Date: Mon, 26 Aug 2019 16:21:21 +1000
+Subject: KVM: PPC: Book3S: Enable XIVE native capability only if OPAL has required functions
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Paul Mackerras <paulus@ozlabs.org>
+
+commit 2ad7a27deaf6d78545d97ab80874584f6990360e upstream.
+
+There are some POWER9 machines where the OPAL firmware does not support
+the OPAL_XIVE_GET_QUEUE_STATE and OPAL_XIVE_SET_QUEUE_STATE calls.
+The impact of this is that a guest using XIVE natively will not be able
+to be migrated successfully.  On the source side, the get_attr operation
+on the KVM native device for the KVM_DEV_XIVE_GRP_EQ_CONFIG attribute
+will fail; on the destination side, the set_attr operation for the same
+attribute will fail.
+
+This adds tests for the existence of the OPAL get/set queue state
+functions, and if they are not supported, the XIVE-native KVM device
+is not created and the KVM_CAP_PPC_IRQ_XIVE capability returns false.
+Userspace can then either provide a software emulation of XIVE, or
+else tell the guest that it does not have a XIVE controller available
+to it.
+
+Cc: stable@vger.kernel.org # v5.2+
+Fixes: 3fab2d10588e ("KVM: PPC: Book3S HV: XIVE: Activate XIVE exploitation mode")
+Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
+Reviewed-by: Cédric Le Goater <clg@kaod.org>
+Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/powerpc/include/asm/kvm_ppc.h    |    1 +
+ arch/powerpc/include/asm/xive.h       |    1 +
+ arch/powerpc/kvm/book3s.c             |    8 +++++---
+ arch/powerpc/kvm/book3s_xive_native.c |    5 +++++
+ arch/powerpc/kvm/powerpc.c            |    3 ++-
+ arch/powerpc/sysdev/xive/native.c     |    7 +++++++
+ 6 files changed, 21 insertions(+), 4 deletions(-)
+
+--- a/arch/powerpc/include/asm/kvm_ppc.h
++++ b/arch/powerpc/include/asm/kvm_ppc.h
+@@ -598,6 +598,7 @@ extern int kvmppc_xive_native_get_vp(str
+                                    union kvmppc_one_reg *val);
+ extern int kvmppc_xive_native_set_vp(struct kvm_vcpu *vcpu,
+                                    union kvmppc_one_reg *val);
++extern bool kvmppc_xive_native_supported(void);
+ 
+ #else
+ static inline int kvmppc_xive_set_xive(struct kvm *kvm, u32 irq, u32 server,
+--- a/arch/powerpc/include/asm/xive.h
++++ b/arch/powerpc/include/asm/xive.h
+@@ -127,6 +127,7 @@ extern int xive_native_get_queue_state(u
+ extern int xive_native_set_queue_state(u32 vp_id, uint32_t prio, u32 qtoggle,
+                                      u32 qindex);
+ extern int xive_native_get_vp_state(u32 vp_id, u64 *out_state);
++extern bool xive_native_has_queue_state_support(void);
+ 
+ #else
+ 
+--- a/arch/powerpc/kvm/book3s.c
++++ b/arch/powerpc/kvm/book3s.c
+@@ -1083,9 +1083,11 @@ static int kvmppc_book3s_init(void)
+       if (xics_on_xive()) {
+               kvmppc_xive_init_module();
+               kvm_register_device_ops(&kvm_xive_ops, KVM_DEV_TYPE_XICS);
+-              kvmppc_xive_native_init_module();
+-              kvm_register_device_ops(&kvm_xive_native_ops,
+-                                      KVM_DEV_TYPE_XIVE);
++              if (kvmppc_xive_native_supported()) {
++                      kvmppc_xive_native_init_module();
++                      kvm_register_device_ops(&kvm_xive_native_ops,
++                                              KVM_DEV_TYPE_XIVE);
++              }
+       } else
+ #endif
+               kvm_register_device_ops(&kvm_xics_ops, KVM_DEV_TYPE_XICS);
+--- a/arch/powerpc/kvm/book3s_xive_native.c
++++ b/arch/powerpc/kvm/book3s_xive_native.c
+@@ -1171,6 +1171,11 @@ int kvmppc_xive_native_set_vp(struct kvm
+       return 0;
+ }
+ 
++bool kvmppc_xive_native_supported(void)
++{
++      return xive_native_has_queue_state_support();
++}
++
+ static int xive_native_debug_show(struct seq_file *m, void *private)
+ {
+       struct kvmppc_xive *xive = m->private;
+--- a/arch/powerpc/kvm/powerpc.c
++++ b/arch/powerpc/kvm/powerpc.c
+@@ -561,7 +561,8 @@ int kvm_vm_ioctl_check_extension(struct
+                * a POWER9 processor) and the PowerNV platform, as
+                * nested is not yet supported.
+                */
+-              r = xive_enabled() && !!cpu_has_feature(CPU_FTR_HVMODE);
++              r = xive_enabled() && !!cpu_has_feature(CPU_FTR_HVMODE) &&
++                      kvmppc_xive_native_supported();
+               break;
+ #endif
+ 
+--- a/arch/powerpc/sysdev/xive/native.c
++++ b/arch/powerpc/sysdev/xive/native.c
+@@ -811,6 +811,13 @@ int xive_native_set_queue_state(u32 vp_i
+ }
+ EXPORT_SYMBOL_GPL(xive_native_set_queue_state);
+ 
++bool xive_native_has_queue_state_support(void)
++{
++      return opal_check_token(OPAL_XIVE_GET_QUEUE_STATE) &&
++              opal_check_token(OPAL_XIVE_SET_QUEUE_STATE);
++}
++EXPORT_SYMBOL_GPL(xive_native_has_queue_state_support);
++
+ int xive_native_get_vp_state(u32 vp_id, u64 *out_state)
+ {
+       __be64 state;
diff --git a/queue-5.3/kvm-ppc-book3s-hv-check-for-mmu-ready-on-piggybacked-virtual-cores.patch b/queue-5.3/kvm-ppc-book3s-hv-check-for-mmu-ready-on-piggybacked-virtual-cores.patch

new file mode 100644 (file)

index 0000000..6483604
--- /dev/null
+++ b/queue-5.3/kvm-ppc-book3s-hv-check-for-mmu-ready-on-piggybacked-virtual-cores.patch
@@ -0,0 +1,71 @@
+From d28eafc5a64045c78136162af9d4ba42f8230080 Mon Sep 17 00:00:00 2001
+From: Paul Mackerras <paulus@ozlabs.org>
+Date: Tue, 27 Aug 2019 11:31:37 +1000
+Subject: KVM: PPC: Book3S HV: Check for MMU ready on piggybacked virtual cores
+
+From: Paul Mackerras <paulus@ozlabs.org>
+
+commit d28eafc5a64045c78136162af9d4ba42f8230080 upstream.
+
+When we are running multiple vcores on the same physical core, they
+could be from different VMs and so it is possible that one of the
+VMs could have its arch.mmu_ready flag cleared (for example by a
+concurrent HPT resize) when we go to run it on a physical core.
+We currently check the arch.mmu_ready flag for the primary vcore
+but not the flags for the other vcores that will be run alongside
+it.  This adds that check, and also a check when we select the
+secondary vcores from the preempted vcores list.
+
+Cc: stable@vger.kernel.org # v4.14+
+Fixes: 38c53af85306 ("KVM: PPC: Book3S HV: Fix exclusion between HPT resizing and other HPT updates")
+Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/powerpc/kvm/book3s_hv.c |   15 ++++++++++-----
+ 1 file changed, 10 insertions(+), 5 deletions(-)
+
+--- a/arch/powerpc/kvm/book3s_hv.c
++++ b/arch/powerpc/kvm/book3s_hv.c
+@@ -2860,7 +2860,7 @@ static void collect_piggybacks(struct co
+               if (!spin_trylock(&pvc->lock))
+                       continue;
+               prepare_threads(pvc);
+-              if (!pvc->n_runnable) {
++              if (!pvc->n_runnable || !pvc->kvm->arch.mmu_ready) {
+                       list_del_init(&pvc->preempt_list);
+                       if (pvc->runner == NULL) {
+                               pvc->vcore_state = VCORE_INACTIVE;
+@@ -2881,15 +2881,20 @@ static void collect_piggybacks(struct co
+       spin_unlock(&lp->lock);
+ }
+ 
+-static bool recheck_signals(struct core_info *cip)
++static bool recheck_signals_and_mmu(struct core_info *cip)
+ {
+       int sub, i;
+       struct kvm_vcpu *vcpu;
++      struct kvmppc_vcore *vc;
+ 
+-      for (sub = 0; sub < cip->n_subcores; ++sub)
+-              for_each_runnable_thread(i, vcpu, cip->vc[sub])
++      for (sub = 0; sub < cip->n_subcores; ++sub) {
++              vc = cip->vc[sub];
++              if (!vc->kvm->arch.mmu_ready)
++                      return true;
++              for_each_runnable_thread(i, vcpu, vc)
+                       if (signal_pending(vcpu->arch.run_task))
+                               return true;
++      }
+       return false;
+ }
+ 
+@@ -3119,7 +3124,7 @@ static noinline void kvmppc_run_core(str
+       local_irq_disable();
+       hard_irq_disable();
+       if (lazy_irq_pending() || need_resched() ||
+-          recheck_signals(&core_info) || !vc->kvm->arch.mmu_ready) {
++          recheck_signals_and_mmu(&core_info)) {
+               local_irq_enable();
+               vc->vcore_state = VCORE_INACTIVE;
+               /* Unlock all except the primary vcore */
diff --git a/queue-5.3/kvm-ppc-book3s-hv-don-t-lose-pending-doorbell-request-on-migration-on-p9.patch b/queue-5.3/kvm-ppc-book3s-hv-don-t-lose-pending-doorbell-request-on-migration-on-p9.patch

new file mode 100644 (file)

index 0000000..9259466
--- /dev/null
+++ b/queue-5.3/kvm-ppc-book3s-hv-don-t-lose-pending-doorbell-request-on-migration-on-p9.patch
@@ -0,0 +1,52 @@
+From ff42df49e75f053a8a6b4c2533100cdcc23afe69 Mon Sep 17 00:00:00 2001
+From: Paul Mackerras <paulus@ozlabs.org>
+Date: Tue, 27 Aug 2019 11:35:40 +1000
+Subject: KVM: PPC: Book3S HV: Don't lose pending doorbell request on migration on P9
+
+From: Paul Mackerras <paulus@ozlabs.org>
+
+commit ff42df49e75f053a8a6b4c2533100cdcc23afe69 upstream.
+
+On POWER9, when userspace reads the value of the DPDES register on a
+vCPU, it is possible for 0 to be returned although there is a doorbell
+interrupt pending for the vCPU.  This can lead to a doorbell interrupt
+being lost across migration.  If the guest kernel uses doorbell
+interrupts for IPIs, then it could malfunction because of the lost
+interrupt.
+
+This happens because a newly-generated doorbell interrupt is signalled
+by setting vcpu->arch.doorbell_request to 1; the DPDES value in
+vcpu->arch.vcore->dpdes is not updated, because it can only be updated
+when holding the vcpu mutex, in order to avoid races.
+
+To fix this, we OR in vcpu->arch.doorbell_request when reading the
+DPDES value.
+
+Cc: stable@vger.kernel.org # v4.13+
+Fixes: 579006944e0d ("KVM: PPC: Book3S HV: Virtualize doorbell facility on POWER9")
+Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
+Tested-by: Alexey Kardashevskiy <aik@ozlabs.ru>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/powerpc/kvm/book3s_hv.c |    9 ++++++++-
+ 1 file changed, 8 insertions(+), 1 deletion(-)
+
+--- a/arch/powerpc/kvm/book3s_hv.c
++++ b/arch/powerpc/kvm/book3s_hv.c
+@@ -1678,7 +1678,14 @@ static int kvmppc_get_one_reg_hv(struct
+               *val = get_reg_val(id, vcpu->arch.pspb);
+               break;
+       case KVM_REG_PPC_DPDES:
+-              *val = get_reg_val(id, vcpu->arch.vcore->dpdes);
++              /*
++               * On POWER9, where we are emulating msgsndp etc.,
++               * we return 1 bit for each vcpu, which can come from
++               * either vcore->dpdes or doorbell_request.
++               * On POWER8, doorbell_request is 0.
++               */
++              *val = get_reg_val(id, vcpu->arch.vcore->dpdes |
++                                 vcpu->arch.doorbell_request);
+               break;
+       case KVM_REG_PPC_VTB:
+               *val = get_reg_val(id, vcpu->arch.vcore->vtb);
diff --git a/queue-5.3/kvm-ppc-book3s-hv-don-t-push-xive-context-when-not-using-xive-device.patch b/queue-5.3/kvm-ppc-book3s-hv-don-t-push-xive-context-when-not-using-xive-device.patch

new file mode 100644 (file)

index 0000000..9372970
--- /dev/null
+++ b/queue-5.3/kvm-ppc-book3s-hv-don-t-push-xive-context-when-not-using-xive-device.patch
@@ -0,0 +1,89 @@
+From 8d4ba9c931bc384bcc6889a43915aaaf19d3e499 Mon Sep 17 00:00:00 2001
+From: Paul Mackerras <paulus@ozlabs.org>
+Date: Tue, 13 Aug 2019 20:01:00 +1000
+Subject: KVM: PPC: Book3S HV: Don't push XIVE context when not using XIVE device
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Paul Mackerras <paulus@ozlabs.org>
+
+commit 8d4ba9c931bc384bcc6889a43915aaaf19d3e499 upstream.
+
+At present, when running a guest on POWER9 using HV KVM but not using
+an in-kernel interrupt controller (XICS or XIVE), for example if QEMU
+is run with the kernel_irqchip=off option, the guest entry code goes
+ahead and tries to load the guest context into the XIVE hardware, even
+though no context has been set up.
+
+To fix this, we check that the "CAM word" is non-zero before pushing
+it to the hardware.  The CAM word is initialized to a non-zero value
+in kvmppc_xive_connect_vcpu() and kvmppc_xive_native_connect_vcpu(),
+and is now cleared in kvmppc_xive_{,native_}cleanup_vcpu.
+
+Fixes: 5af50993850a ("KVM: PPC: Book3S HV: Native usage of the XIVE interrupt controller")
+Cc: stable@vger.kernel.org # v4.12+
+Reported-by: Cédric Le Goater <clg@kaod.org>
+Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
+Reviewed-by: Cédric Le Goater <clg@kaod.org>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Link: https://lore.kernel.org/r/20190813100100.GC9567@blackberry
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/powerpc/kvm/book3s_hv_rmhandlers.S |    2 ++
+ arch/powerpc/kvm/book3s_xive.c          |   11 ++++++++++-
+ arch/powerpc/kvm/book3s_xive_native.c   |    3 +++
+ 3 files changed, 15 insertions(+), 1 deletion(-)
+
+--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
++++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+@@ -942,6 +942,8 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_3
+       ld      r11, VCPU_XIVE_SAVED_STATE(r4)
+       li      r9, TM_QW1_OS
+       lwz     r8, VCPU_XIVE_CAM_WORD(r4)
++      cmpwi   r8, 0
++      beq     no_xive
+       li      r7, TM_QW1_OS + TM_WORD2
+       mfmsr   r0
+       andi.   r0, r0, MSR_DR          /* in real mode? */
+--- a/arch/powerpc/kvm/book3s_xive.c
++++ b/arch/powerpc/kvm/book3s_xive.c
+@@ -67,8 +67,14 @@ void kvmppc_xive_push_vcpu(struct kvm_vc
+       void __iomem *tima = local_paca->kvm_hstate.xive_tima_virt;
+       u64 pq;
+ 
+-      if (!tima)
++      /*
++       * Nothing to do if the platform doesn't have a XIVE
++       * or this vCPU doesn't have its own XIVE context
++       * (e.g. because it's not using an in-kernel interrupt controller).
++       */
++      if (!tima || !vcpu->arch.xive_cam_word)
+               return;
++
+       eieio();
+       __raw_writeq(vcpu->arch.xive_saved_state.w01, tima + TM_QW1_OS);
+       __raw_writel(vcpu->arch.xive_cam_word, tima + TM_QW1_OS + TM_WORD2);
+@@ -1146,6 +1152,9 @@ void kvmppc_xive_cleanup_vcpu(struct kvm
+       /* Disable the VP */
+       xive_native_disable_vp(xc->vp_id);
+ 
++      /* Clear the cam word so guest entry won't try to push context */
++      vcpu->arch.xive_cam_word = 0;
++
+       /* Free the queues */
+       for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
+               struct xive_q *q = &xc->queues[i];
+--- a/arch/powerpc/kvm/book3s_xive_native.c
++++ b/arch/powerpc/kvm/book3s_xive_native.c
+@@ -81,6 +81,9 @@ void kvmppc_xive_native_cleanup_vcpu(str
+       /* Disable the VP */
+       xive_native_disable_vp(xc->vp_id);
+ 
++      /* Clear the cam word so guest entry won't try to push context */
++      vcpu->arch.xive_cam_word = 0;
++
+       /* Free the queues */
+       for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
+               kvmppc_xive_native_cleanup_queue(vcpu, i);
diff --git a/queue-5.3/kvm-ppc-book3s-hv-fix-race-in-re-enabling-xive-escalation-interrupts.patch b/queue-5.3/kvm-ppc-book3s-hv-fix-race-in-re-enabling-xive-escalation-interrupts.patch

new file mode 100644 (file)

index 0000000..21df931
--- /dev/null
+++ b/queue-5.3/kvm-ppc-book3s-hv-fix-race-in-re-enabling-xive-escalation-interrupts.patch
@@ -0,0 +1,111 @@
+From 959c5d5134786b4988b6fdd08e444aa67d1667ed Mon Sep 17 00:00:00 2001
+From: Paul Mackerras <paulus@ozlabs.org>
+Date: Tue, 13 Aug 2019 20:03:49 +1000
+Subject: KVM: PPC: Book3S HV: Fix race in re-enabling XIVE escalation interrupts
+
+From: Paul Mackerras <paulus@ozlabs.org>
+
+commit 959c5d5134786b4988b6fdd08e444aa67d1667ed upstream.
+
+Escalation interrupts are interrupts sent to the host by the XIVE
+hardware when it has an interrupt to deliver to a guest VCPU but that
+VCPU is not running anywhere in the system.  Hence we disable the
+escalation interrupt for the VCPU being run when we enter the guest
+and re-enable it when the guest does an H_CEDE hypercall indicating
+it is idle.
+
+It is possible that an escalation interrupt gets generated just as we
+are entering the guest.  In that case the escalation interrupt may be
+using a queue entry in one of the interrupt queues, and that queue
+entry may not have been processed when the guest exits with an H_CEDE.
+The existing entry code detects this situation and does not clear the
+vcpu->arch.xive_esc_on flag as an indication that there is a pending
+queue entry (if the queue entry gets processed, xive_esc_irq() will
+clear the flag).  There is a comment in the code saying that if the
+flag is still set on H_CEDE, we have to abort the cede rather than
+re-enabling the escalation interrupt, lest we end up with two
+occurrences of the escalation interrupt in the interrupt queue.
+
+However, the exit code doesn't do that; it aborts the cede in the sense
+that vcpu->arch.ceded gets cleared, but it still enables the escalation
+interrupt by setting the source's PQ bits to 00.  Instead we need to
+set the PQ bits to 10, indicating that an interrupt has been triggered.
+We also need to avoid setting vcpu->arch.xive_esc_on in this case
+(i.e. vcpu->arch.xive_esc_on seen to be set on H_CEDE) because
+xive_esc_irq() will run at some point and clear it, and if we race with
+that we may end up with an incorrect result (i.e. xive_esc_on set when
+the escalation interrupt has just been handled).
+
+It is extremely unlikely that having two queue entries would cause
+observable problems; theoretically it could cause queue overflow, but
+the CPU would have to have thousands of interrupts targetted to it for
+that to be possible.  However, this fix will also make it possible to
+determine accurately whether there is an unhandled escalation
+interrupt in the queue, which will be needed by the following patch.
+
+Fixes: 9b9b13a6d153 ("KVM: PPC: Book3S HV: Keep XIVE escalation interrupt masked unless ceded")
+Cc: stable@vger.kernel.org # v4.16+
+Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Link: https://lore.kernel.org/r/20190813100349.GD9567@blackberry
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/powerpc/kvm/book3s_hv_rmhandlers.S |   36 ++++++++++++++++++++------------
+ 1 file changed, 23 insertions(+), 13 deletions(-)
+
+--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
++++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+@@ -2833,29 +2833,39 @@ kvm_cede_prodded:
+ kvm_cede_exit:
+       ld      r9, HSTATE_KVM_VCPU(r13)
+ #ifdef CONFIG_KVM_XICS
+-      /* Abort if we still have a pending escalation */
++      /* are we using XIVE with single escalation? */
++      ld      r10, VCPU_XIVE_ESC_VADDR(r9)
++      cmpdi   r10, 0
++      beq     3f
++      li      r6, XIVE_ESB_SET_PQ_00
++      /*
++       * If we still have a pending escalation, abort the cede,
++       * and we must set PQ to 10 rather than 00 so that we don't
++       * potentially end up with two entries for the escalation
++       * interrupt in the XIVE interrupt queue.  In that case
++       * we also don't want to set xive_esc_on to 1 here in
++       * case we race with xive_esc_irq().
++       */
+       lbz     r5, VCPU_XIVE_ESC_ON(r9)
+       cmpwi   r5, 0
+-      beq     1f
++      beq     4f
+       li      r0, 0
+       stb     r0, VCPU_CEDED(r9)
+-1:    /* Enable XIVE escalation */
+-      li      r5, XIVE_ESB_SET_PQ_00
++      li      r6, XIVE_ESB_SET_PQ_10
++      b       5f
++4:    li      r0, 1
++      stb     r0, VCPU_XIVE_ESC_ON(r9)
++      /* make sure store to xive_esc_on is seen before xive_esc_irq runs */
++      sync
++5:    /* Enable XIVE escalation */
+       mfmsr   r0
+       andi.   r0, r0, MSR_DR          /* in real mode? */
+       beq     1f
+-      ld      r10, VCPU_XIVE_ESC_VADDR(r9)
+-      cmpdi   r10, 0
+-      beq     3f
+-      ldx     r0, r10, r5
++      ldx     r0, r10, r6
+       b       2f
+ 1:    ld      r10, VCPU_XIVE_ESC_RADDR(r9)
+-      cmpdi   r10, 0
+-      beq     3f
+-      ldcix   r0, r10, r5
++      ldcix   r0, r10, r6
+ 2:    sync
+-      li      r0, 1
+-      stb     r0, VCPU_XIVE_ESC_ON(r9)
+ #endif /* CONFIG_KVM_XICS */
+ 3:    b       guest_exit_cont
+ 
diff --git a/queue-5.3/kvm-ppc-book3s-hv-xive-free-escalation-interrupts-before-disabling-the-vp.patch b/queue-5.3/kvm-ppc-book3s-hv-xive-free-escalation-interrupts-before-disabling-the-vp.patch

new file mode 100644 (file)

index 0000000..8108b6a
--- /dev/null
+++ b/queue-5.3/kvm-ppc-book3s-hv-xive-free-escalation-interrupts-before-disabling-the-vp.patch
@@ -0,0 +1,111 @@
+From 237aed48c642328ff0ab19b63423634340224a06 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= <clg@kaod.org>
+Date: Tue, 6 Aug 2019 19:25:38 +0200
+Subject: KVM: PPC: Book3S HV: XIVE: Free escalation interrupts before disabling the VP
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Cédric Le Goater <clg@kaod.org>
+
+commit 237aed48c642328ff0ab19b63423634340224a06 upstream.
+
+When a vCPU is brought done, the XIVE VP (Virtual Processor) is first
+disabled and then the event notification queues are freed. When freeing
+the queues, we check for possible escalation interrupts and free them
+also.
+
+But when a XIVE VP is disabled, the underlying XIVE ENDs also are
+disabled in OPAL. When an END (Event Notification Descriptor) is
+disabled, its ESB pages (ESn and ESe) are disabled and loads return all
+1s. Which means that any access on the ESB page of the escalation
+interrupt will return invalid values.
+
+When an interrupt is freed, the shutdown handler computes a 'saved_p'
+field from the value returned by a load in xive_do_source_set_mask().
+This value is incorrect for escalation interrupts for the reason
+described above.
+
+This has no impact on Linux/KVM today because we don't make use of it
+but we will introduce in future changes a xive_get_irqchip_state()
+handler. This handler will use the 'saved_p' field to return the state
+of an interrupt and 'saved_p' being incorrect, softlockup will occur.
+
+Fix the vCPU cleanup sequence by first freeing the escalation interrupts
+if any, then disable the XIVE VP and last free the queues.
+
+Fixes: 90c73795afa2 ("KVM: PPC: Book3S HV: Add a new KVM device for the XIVE native exploitation mode")
+Fixes: 5af50993850a ("KVM: PPC: Book3S HV: Native usage of the XIVE interrupt controller")
+Cc: stable@vger.kernel.org # v4.12+
+Signed-off-by: Cédric Le Goater <clg@kaod.org>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Link: https://lore.kernel.org/r/20190806172538.5087-1-clg@kaod.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/powerpc/kvm/book3s_xive.c        |   18 ++++++++++--------
+ arch/powerpc/kvm/book3s_xive_native.c |   12 +++++++-----
+ 2 files changed, 17 insertions(+), 13 deletions(-)
+
+--- a/arch/powerpc/kvm/book3s_xive.c
++++ b/arch/powerpc/kvm/book3s_xive.c
+@@ -1134,20 +1134,22 @@ void kvmppc_xive_cleanup_vcpu(struct kvm
+       /* Mask the VP IPI */
+       xive_vm_esb_load(&xc->vp_ipi_data, XIVE_ESB_SET_PQ_01);
+ 
+-      /* Disable the VP */
+-      xive_native_disable_vp(xc->vp_id);
+-
+-      /* Free the queues & associated interrupts */
++      /* Free escalations */
+       for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
+-              struct xive_q *q = &xc->queues[i];
+-
+-              /* Free the escalation irq */
+               if (xc->esc_virq[i]) {
+                       free_irq(xc->esc_virq[i], vcpu);
+                       irq_dispose_mapping(xc->esc_virq[i]);
+                       kfree(xc->esc_virq_names[i]);
+               }
+-              /* Free the queue */
++      }
++
++      /* Disable the VP */
++      xive_native_disable_vp(xc->vp_id);
++
++      /* Free the queues */
++      for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
++              struct xive_q *q = &xc->queues[i];
++
+               xive_native_disable_queue(xc->vp_id, q, i);
+               if (q->qpage) {
+                       free_pages((unsigned long)q->qpage,
+--- a/arch/powerpc/kvm/book3s_xive_native.c
++++ b/arch/powerpc/kvm/book3s_xive_native.c
+@@ -67,10 +67,7 @@ void kvmppc_xive_native_cleanup_vcpu(str
+       xc->valid = false;
+       kvmppc_xive_disable_vcpu_interrupts(vcpu);
+ 
+-      /* Disable the VP */
+-      xive_native_disable_vp(xc->vp_id);
+-
+-      /* Free the queues & associated interrupts */
++      /* Free escalations */
+       for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
+               /* Free the escalation irq */
+               if (xc->esc_virq[i]) {
+@@ -79,8 +76,13 @@ void kvmppc_xive_native_cleanup_vcpu(str
+                       kfree(xc->esc_virq_names[i]);
+                       xc->esc_virq[i] = 0;
+               }
++      }
+ 
+-              /* Free the queue */
++      /* Disable the VP */
++      xive_native_disable_vp(xc->vp_id);
++
++      /* Free the queues */
++      for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
+               kvmppc_xive_native_cleanup_queue(vcpu, i);
+       }
+ 
diff --git a/queue-5.3/kvm-s390-fix-__insn32_query-inline-assembly.patch b/queue-5.3/kvm-s390-fix-__insn32_query-inline-assembly.patch

new file mode 100644 (file)

index 0000000..25d962a
--- /dev/null
+++ b/queue-5.3/kvm-s390-fix-__insn32_query-inline-assembly.patch
@@ -0,0 +1,49 @@
+From b1c41ac3ce569b04644bb1e3fd28926604637da3 Mon Sep 17 00:00:00 2001
+From: Heiko Carstens <heiko.carstens@de.ibm.com>
+Date: Wed, 2 Oct 2019 14:24:47 +0200
+Subject: KVM: s390: fix __insn32_query() inline assembly
+
+From: Heiko Carstens <heiko.carstens@de.ibm.com>
+
+commit b1c41ac3ce569b04644bb1e3fd28926604637da3 upstream.
+
+The inline assembly constraints of __insn32_query() tell the compiler
+that only the first byte of "query" is being written to. Intended was
+probably that 32 bytes are written to.
+
+Fix and simplify the code and just use a "memory" clobber.
+
+Fixes: d668139718a9 ("KVM: s390: provide query function for instructions returning 32 byte")
+Cc: stable@vger.kernel.org # v5.2+
+Acked-by: Christian Borntraeger <borntraeger@de.ibm.com>
+Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
+Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/s390/kvm/kvm-s390.c |    6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/arch/s390/kvm/kvm-s390.c
++++ b/arch/s390/kvm/kvm-s390.c
+@@ -332,7 +332,7 @@ static inline int plo_test_bit(unsigned
+       return cc == 0;
+ }
+ 
+-static inline void __insn32_query(unsigned int opcode, u8 query[32])
++static inline void __insn32_query(unsigned int opcode, u8 *query)
+ {
+       register unsigned long r0 asm("0") = 0; /* query function */
+       register unsigned long r1 asm("1") = (unsigned long) query;
+@@ -340,9 +340,9 @@ static inline void __insn32_query(unsign
+       asm volatile(
+               /* Parameter regs are ignored */
+               "       .insn   rrf,%[opc] << 16,2,4,6,0\n"
+-              : "=m" (*query)
++              :
+               : "d" (r0), "a" (r1), [opc] "i" (opcode)
+-              : "cc");
++              : "cc", "memory");
+ }
+ 
+ #define INSN_SORTL 0xb938
diff --git a/queue-5.3/kvm-s390-test-for-bad-access-register-and-size-at-the-start-of-s390_mem_op.patch b/queue-5.3/kvm-s390-test-for-bad-access-register-and-size-at-the-start-of-s390_mem_op.patch

new file mode 100644 (file)

index 0000000..b890b7a
--- /dev/null
+++ b/queue-5.3/kvm-s390-test-for-bad-access-register-and-size-at-the-start-of-s390_mem_op.patch
@@ -0,0 +1,50 @@
+From a13b03bbb4575b350b46090af4dfd30e735aaed1 Mon Sep 17 00:00:00 2001
+From: Thomas Huth <thuth@redhat.com>
+Date: Thu, 29 Aug 2019 14:25:17 +0200
+Subject: KVM: s390: Test for bad access register and size at the start of S390_MEM_OP
+
+From: Thomas Huth <thuth@redhat.com>
+
+commit a13b03bbb4575b350b46090af4dfd30e735aaed1 upstream.
+
+If the KVM_S390_MEM_OP ioctl is called with an access register >= 16,
+then there is certainly a bug in the calling userspace application.
+We check for wrong access registers, but only if the vCPU was already
+in the access register mode before (i.e. the SIE block has recorded
+it). The check is also buried somewhere deep in the calling chain (in
+the function ar_translation()), so this is somewhat hard to find.
+
+It's better to always report an error to the userspace in case this
+field is set wrong, and it's safer in the KVM code if we block wrong
+values here early instead of relying on a check somewhere deep down
+the calling chain, so let's add another check to kvm_s390_guest_mem_op()
+directly.
+
+We also should check that the "size" is non-zero here (thanks to Janosch
+Frank for the hint!). If we do not check the size, we could call vmalloc()
+with this 0 value, and this will cause a kernel warning.
+
+Signed-off-by: Thomas Huth <thuth@redhat.com>
+Link: https://lkml.kernel.org/r/20190829122517.31042-1-thuth@redhat.com
+Reviewed-by: Cornelia Huck <cohuck@redhat.com>
+Reviewed-by: Janosch Frank <frankja@linux.ibm.com>
+Reviewed-by: David Hildenbrand <david@redhat.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/s390/kvm/kvm-s390.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/s390/kvm/kvm-s390.c
++++ b/arch/s390/kvm/kvm-s390.c
+@@ -4257,7 +4257,7 @@ static long kvm_s390_guest_mem_op(struct
+       const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
+                                   | KVM_S390_MEMOP_F_CHECK_ONLY;
+ 
+-      if (mop->flags & ~supported_flags)
++      if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size)
+               return -EINVAL;
+ 
+       if (mop->size > MEM_OP_MAX_SIZE)
diff --git a/queue-5.3/kvm-x86-fix-userspace-set-invalid-cr4.patch b/queue-5.3/kvm-x86-fix-userspace-set-invalid-cr4.patch

new file mode 100644 (file)

index 0000000..c467515
--- /dev/null
+++ b/queue-5.3/kvm-x86-fix-userspace-set-invalid-cr4.patch
@@ -0,0 +1,123 @@
+From 3ca94192278ca8de169d78c085396c424be123b3 Mon Sep 17 00:00:00 2001
+From: Wanpeng Li <wanpengli@tencent.com>
+Date: Wed, 18 Sep 2019 17:50:10 +0800
+Subject: KVM: X86: Fix userspace set invalid CR4
+
+From: Wanpeng Li <wanpengli@tencent.com>
+
+commit 3ca94192278ca8de169d78c085396c424be123b3 upstream.
+
+Reported by syzkaller:
+
+       WARNING: CPU: 0 PID: 6544 at /home/kernel/data/kvm/arch/x86/kvm//vmx/vmx.c:4689 handle_desc+0x37/0x40 [kvm_intel]
+       CPU: 0 PID: 6544 Comm: a.out Tainted: G           OE     5.3.0-rc4+ #4
+       RIP: 0010:handle_desc+0x37/0x40 [kvm_intel]
+       Call Trace:
+        vmx_handle_exit+0xbe/0x6b0 [kvm_intel]
+        vcpu_enter_guest+0x4dc/0x18d0 [kvm]
+        kvm_arch_vcpu_ioctl_run+0x407/0x660 [kvm]
+        kvm_vcpu_ioctl+0x3ad/0x690 [kvm]
+        do_vfs_ioctl+0xa2/0x690
+        ksys_ioctl+0x6d/0x80
+        __x64_sys_ioctl+0x1a/0x20
+        do_syscall_64+0x74/0x720
+        entry_SYSCALL_64_after_hwframe+0x49/0xbe
+
+When CR4.UMIP is set, guest should have UMIP cpuid flag. Current
+kvm set_sregs function doesn't have such check when userspace inputs
+sregs values. SECONDARY_EXEC_DESC is enabled on writes to CR4.UMIP
+in vmx_set_cr4 though guest doesn't have UMIP cpuid flag. The testcast
+triggers handle_desc warning when executing ltr instruction since
+guest architectural CR4 doesn't set UMIP. This patch fixes it by
+adding valid CR4 and CPUID combination checking in __set_sregs.
+
+syzkaller source: https://syzkaller.appspot.com/x/repro.c?x=138efb99600000
+
+Reported-by: syzbot+0f1819555fbdce992df9@syzkaller.appspotmail.com
+Cc: stable@vger.kernel.org
+Signed-off-by: Wanpeng Li <wanpengli@tencent.com>
+Reviewed-by: Sean Christopherson <sean.j.christopherson@intel.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/kvm/x86.c |   38 +++++++++++++++++++++-----------------
+ 1 file changed, 21 insertions(+), 17 deletions(-)
+
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -884,34 +884,42 @@ int kvm_set_xcr(struct kvm_vcpu *vcpu, u
+ }
+ EXPORT_SYMBOL_GPL(kvm_set_xcr);
+ 
+-int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
++static int kvm_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
+ {
+-      unsigned long old_cr4 = kvm_read_cr4(vcpu);
+-      unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE |
+-                                 X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_PKE;
+-
+       if (cr4 & CR4_RESERVED_BITS)
+-              return 1;
++              return -EINVAL;
+ 
+       if (!guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) && (cr4 & X86_CR4_OSXSAVE))
+-              return 1;
++              return -EINVAL;
+ 
+       if (!guest_cpuid_has(vcpu, X86_FEATURE_SMEP) && (cr4 & X86_CR4_SMEP))
+-              return 1;
++              return -EINVAL;
+ 
+       if (!guest_cpuid_has(vcpu, X86_FEATURE_SMAP) && (cr4 & X86_CR4_SMAP))
+-              return 1;
++              return -EINVAL;
+ 
+       if (!guest_cpuid_has(vcpu, X86_FEATURE_FSGSBASE) && (cr4 & X86_CR4_FSGSBASE))
+-              return 1;
++              return -EINVAL;
+ 
+       if (!guest_cpuid_has(vcpu, X86_FEATURE_PKU) && (cr4 & X86_CR4_PKE))
+-              return 1;
++              return -EINVAL;
+ 
+       if (!guest_cpuid_has(vcpu, X86_FEATURE_LA57) && (cr4 & X86_CR4_LA57))
+-              return 1;
++              return -EINVAL;
+ 
+       if (!guest_cpuid_has(vcpu, X86_FEATURE_UMIP) && (cr4 & X86_CR4_UMIP))
++              return -EINVAL;
++
++      return 0;
++}
++
++int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
++{
++      unsigned long old_cr4 = kvm_read_cr4(vcpu);
++      unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE |
++                                 X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_PKE;
++
++      if (kvm_valid_cr4(vcpu, cr4))
+               return 1;
+ 
+       if (is_long_mode(vcpu)) {
+@@ -8598,10 +8606,6 @@ EXPORT_SYMBOL_GPL(kvm_task_switch);
+ 
+ static int kvm_valid_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
+ {
+-      if (!guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) &&
+-                      (sregs->cr4 & X86_CR4_OSXSAVE))
+-              return  -EINVAL;
+-
+       if ((sregs->efer & EFER_LME) && (sregs->cr0 & X86_CR0_PG)) {
+               /*
+                * When EFER.LME and CR0.PG are set, the processor is in
+@@ -8620,7 +8624,7 @@ static int kvm_valid_sregs(struct kvm_vc
+                       return -EINVAL;
+       }
+ 
+-      return 0;
++      return kvm_valid_cr4(vcpu, sregs->cr4);
+ }
+ 
+ static int __set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
diff --git a/queue-5.3/mips-treat-loongson-extensions-as-ases.patch b/queue-5.3/mips-treat-loongson-extensions-as-ases.patch

new file mode 100644 (file)

index 0000000..eab654e
--- /dev/null
+++ b/queue-5.3/mips-treat-loongson-extensions-as-ases.patch
@@ -0,0 +1,107 @@
+From d2f965549006acb865c4638f1f030ebcefdc71f6 Mon Sep 17 00:00:00 2001
+From: Jiaxun Yang <jiaxun.yang@flygoat.com>
+Date: Wed, 29 May 2019 16:42:59 +0800
+Subject: MIPS: Treat Loongson Extensions as ASEs
+
+From: Jiaxun Yang <jiaxun.yang@flygoat.com>
+
+commit d2f965549006acb865c4638f1f030ebcefdc71f6 upstream.
+
+Recently, binutils had split Loongson-3 Extensions into four ASEs:
+MMI, CAM, EXT, EXT2. This patch do the samething in kernel and expose
+them in cpuinfo so applications can probe supported ASEs at runtime.
+
+Signed-off-by: Jiaxun Yang <jiaxun.yang@flygoat.com>
+Cc: Huacai Chen <chenhc@lemote.com>
+Cc: Yunqiang Su <ysu@wavecomp.com>
+Cc: stable@vger.kernel.org # v4.14+
+Signed-off-by: Paul Burton <paul.burton@mips.com>
+Cc: linux-mips@vger.kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/mips/include/asm/cpu-features.h |   16 ++++++++++++++++
+ arch/mips/include/asm/cpu.h          |    4 ++++
+ arch/mips/kernel/cpu-probe.c         |    6 ++++++
+ arch/mips/kernel/proc.c              |    4 ++++
+ 4 files changed, 30 insertions(+)
+
+--- a/arch/mips/include/asm/cpu-features.h
++++ b/arch/mips/include/asm/cpu-features.h
+@@ -397,6 +397,22 @@
+ #define cpu_has_dsp3          __ase(MIPS_ASE_DSP3)
+ #endif
+ 
++#ifndef cpu_has_loongson_mmi
++#define cpu_has_loongson_mmi          __ase(MIPS_ASE_LOONGSON_MMI)
++#endif
++
++#ifndef cpu_has_loongson_cam
++#define cpu_has_loongson_cam          __ase(MIPS_ASE_LOONGSON_CAM)
++#endif
++
++#ifndef cpu_has_loongson_ext
++#define cpu_has_loongson_ext          __ase(MIPS_ASE_LOONGSON_EXT)
++#endif
++
++#ifndef cpu_has_loongson_ext2
++#define cpu_has_loongson_ext2         __ase(MIPS_ASE_LOONGSON_EXT2)
++#endif
++
+ #ifndef cpu_has_mipsmt
+ #define cpu_has_mipsmt                __isa_lt_and_ase(6, MIPS_ASE_MIPSMT)
+ #endif
+--- a/arch/mips/include/asm/cpu.h
++++ b/arch/mips/include/asm/cpu.h
+@@ -433,5 +433,9 @@ enum cpu_type_enum {
+ #define MIPS_ASE_MSA          0x00000100 /* MIPS SIMD Architecture */
+ #define MIPS_ASE_DSP3         0x00000200 /* Signal Processing ASE Rev 3*/
+ #define MIPS_ASE_MIPS16E2     0x00000400 /* MIPS16e2 */
++#define MIPS_ASE_LOONGSON_MMI 0x00000800 /* Loongson MultiMedia extensions Instructions */
++#define MIPS_ASE_LOONGSON_CAM 0x00001000 /* Loongson CAM */
++#define MIPS_ASE_LOONGSON_EXT 0x00002000 /* Loongson EXTensions */
++#define MIPS_ASE_LOONGSON_EXT2        0x00004000 /* Loongson EXTensions R2 */
+ 
+ #endif /* _ASM_CPU_H */
+--- a/arch/mips/kernel/cpu-probe.c
++++ b/arch/mips/kernel/cpu-probe.c
+@@ -1573,6 +1573,8 @@ static inline void cpu_probe_legacy(stru
+                       __cpu_name[cpu] = "ICT Loongson-3";
+                       set_elf_platform(cpu, "loongson3a");
+                       set_isa(c, MIPS_CPU_ISA_M64R1);
++                      c->ases |= (MIPS_ASE_LOONGSON_MMI | MIPS_ASE_LOONGSON_CAM |
++                              MIPS_ASE_LOONGSON_EXT);
+                       break;
+               case PRID_REV_LOONGSON3B_R1:
+               case PRID_REV_LOONGSON3B_R2:
+@@ -1580,6 +1582,8 @@ static inline void cpu_probe_legacy(stru
+                       __cpu_name[cpu] = "ICT Loongson-3";
+                       set_elf_platform(cpu, "loongson3b");
+                       set_isa(c, MIPS_CPU_ISA_M64R1);
++                      c->ases |= (MIPS_ASE_LOONGSON_MMI | MIPS_ASE_LOONGSON_CAM |
++                              MIPS_ASE_LOONGSON_EXT);
+                       break;
+               }
+ 
+@@ -1946,6 +1950,8 @@ static inline void cpu_probe_loongson(st
+               decode_configs(c);
+               c->options |= MIPS_CPU_FTLB | MIPS_CPU_TLBINV | MIPS_CPU_LDPTE;
+               c->writecombine = _CACHE_UNCACHED_ACCELERATED;
++              c->ases |= (MIPS_ASE_LOONGSON_MMI | MIPS_ASE_LOONGSON_CAM |
++                      MIPS_ASE_LOONGSON_EXT | MIPS_ASE_LOONGSON_EXT2);
+               break;
+       default:
+               panic("Unknown Loongson Processor ID!");
+--- a/arch/mips/kernel/proc.c
++++ b/arch/mips/kernel/proc.c
+@@ -124,6 +124,10 @@ static int show_cpuinfo(struct seq_file
+       if (cpu_has_eva)        seq_printf(m, "%s", " eva");
+       if (cpu_has_htw)        seq_printf(m, "%s", " htw");
+       if (cpu_has_xpa)        seq_printf(m, "%s", " xpa");
++      if (cpu_has_loongson_mmi)       seq_printf(m, "%s", " loongson-mmi");
++      if (cpu_has_loongson_cam)       seq_printf(m, "%s", " loongson-cam");
++      if (cpu_has_loongson_ext)       seq_printf(m, "%s", " loongson-ext");
++      if (cpu_has_loongson_ext2)      seq_printf(m, "%s", " loongson-ext2");
+       seq_printf(m, "\n");
+ 
+       if (cpu_has_mmips) {
diff --git a/queue-5.3/nbd-fix-max-number-of-supported-devs.patch b/queue-5.3/nbd-fix-max-number-of-supported-devs.patch

new file mode 100644 (file)

index 0000000..89a14de
--- /dev/null
+++ b/queue-5.3/nbd-fix-max-number-of-supported-devs.patch
@@ -0,0 +1,159 @@
+From e9e006f5fcf2bab59149cb38a48a4817c1b538b4 Mon Sep 17 00:00:00 2001
+From: Mike Christie <mchristi@redhat.com>
+Date: Sun, 4 Aug 2019 14:10:06 -0500
+Subject: nbd: fix max number of supported devs
+
+From: Mike Christie <mchristi@redhat.com>
+
+commit e9e006f5fcf2bab59149cb38a48a4817c1b538b4 upstream.
+
+This fixes a bug added in 4.10 with commit:
+
+commit 9561a7ade0c205bc2ee035a2ac880478dcc1a024
+Author: Josef Bacik <jbacik@fb.com>
+Date:   Tue Nov 22 14:04:40 2016 -0500
+
+    nbd: add multi-connection support
+
+that limited the number of devices to 256. Before the patch we could
+create 1000s of devices, but the patch switched us from using our
+own thread to using a work queue which has a default limit of 256
+active works.
+
+The problem is that our recv_work function sits in a loop until
+disconnection but only handles IO for one connection. The work is
+started when the connection is started/restarted, but if we end up
+creating 257 or more connections, the queue_work call just queues
+connection257+'s recv_work and that waits for connection 1 - 256's
+recv_work to be disconnected and that work instance completing.
+
+Instead of reverting back to kthreads, this has us allocate a
+workqueue_struct per device, so we can block in the work.
+
+Cc: stable@vger.kernel.org
+Reviewed-by: Josef Bacik <josef@toxicpanda.com>
+Signed-off-by: Mike Christie <mchristi@redhat.com>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/block/nbd.c |   39 +++++++++++++++++++++++++--------------
+ 1 file changed, 25 insertions(+), 14 deletions(-)
+
+--- a/drivers/block/nbd.c
++++ b/drivers/block/nbd.c
+@@ -108,6 +108,7 @@ struct nbd_device {
+       struct nbd_config *config;
+       struct mutex config_lock;
+       struct gendisk *disk;
++      struct workqueue_struct *recv_workq;
+ 
+       struct list_head list;
+       struct task_struct *task_recv;
+@@ -138,7 +139,6 @@ static struct dentry *nbd_dbg_dir;
+ 
+ static unsigned int nbds_max = 16;
+ static int max_part = 16;
+-static struct workqueue_struct *recv_workqueue;
+ static int part_shift;
+ 
+ static int nbd_dev_dbg_init(struct nbd_device *nbd);
+@@ -1038,7 +1038,7 @@ static int nbd_reconnect_socket(struct n
+               /* We take the tx_mutex in an error path in the recv_work, so we
+                * need to queue_work outside of the tx_mutex.
+                */
+-              queue_work(recv_workqueue, &args->work);
++              queue_work(nbd->recv_workq, &args->work);
+ 
+               atomic_inc(&config->live_connections);
+               wake_up(&config->conn_wait);
+@@ -1139,6 +1139,10 @@ static void nbd_config_put(struct nbd_de
+               kfree(nbd->config);
+               nbd->config = NULL;
+ 
++              if (nbd->recv_workq)
++                      destroy_workqueue(nbd->recv_workq);
++              nbd->recv_workq = NULL;
++
+               nbd->tag_set.timeout = 0;
+               nbd->disk->queue->limits.discard_granularity = 0;
+               nbd->disk->queue->limits.discard_alignment = 0;
+@@ -1167,6 +1171,14 @@ static int nbd_start_device(struct nbd_d
+               return -EINVAL;
+       }
+ 
++      nbd->recv_workq = alloc_workqueue("knbd%d-recv",
++                                        WQ_MEM_RECLAIM | WQ_HIGHPRI |
++                                        WQ_UNBOUND, 0, nbd->index);
++      if (!nbd->recv_workq) {
++              dev_err(disk_to_dev(nbd->disk), "Could not allocate knbd recv work queue.\n");
++              return -ENOMEM;
++      }
++
+       blk_mq_update_nr_hw_queues(&nbd->tag_set, config->num_connections);
+       nbd->task_recv = current;
+ 
+@@ -1197,7 +1209,7 @@ static int nbd_start_device(struct nbd_d
+               INIT_WORK(&args->work, recv_work);
+               args->nbd = nbd;
+               args->index = i;
+-              queue_work(recv_workqueue, &args->work);
++              queue_work(nbd->recv_workq, &args->work);
+       }
+       nbd_size_update(nbd);
+       return error;
+@@ -1217,8 +1229,10 @@ static int nbd_start_device_ioctl(struct
+       mutex_unlock(&nbd->config_lock);
+       ret = wait_event_interruptible(config->recv_wq,
+                                        atomic_read(&config->recv_threads) == 0);
+-      if (ret)
++      if (ret) {
+               sock_shutdown(nbd);
++              flush_workqueue(nbd->recv_workq);
++      }
+       mutex_lock(&nbd->config_lock);
+       nbd_bdev_reset(bdev);
+       /* user requested, ignore socket errors */
+@@ -1877,6 +1891,12 @@ static void nbd_disconnect_and_put(struc
+       nbd_disconnect(nbd);
+       nbd_clear_sock(nbd);
+       mutex_unlock(&nbd->config_lock);
++      /*
++       * Make sure recv thread has finished, so it does not drop the last
++       * config ref and try to destroy the workqueue from inside the work
++       * queue.
++       */
++      flush_workqueue(nbd->recv_workq);
+       if (test_and_clear_bit(NBD_HAS_CONFIG_REF,
+                              &nbd->config->runtime_flags))
+               nbd_config_put(nbd);
+@@ -2263,20 +2283,12 @@ static int __init nbd_init(void)
+ 
+       if (nbds_max > 1UL << (MINORBITS - part_shift))
+               return -EINVAL;
+-      recv_workqueue = alloc_workqueue("knbd-recv",
+-                                       WQ_MEM_RECLAIM | WQ_HIGHPRI |
+-                                       WQ_UNBOUND, 0);
+-      if (!recv_workqueue)
+-              return -ENOMEM;
+ 
+-      if (register_blkdev(NBD_MAJOR, "nbd")) {
+-              destroy_workqueue(recv_workqueue);
++      if (register_blkdev(NBD_MAJOR, "nbd"))
+               return -EIO;
+-      }
+ 
+       if (genl_register_family(&nbd_genl_family)) {
+               unregister_blkdev(NBD_MAJOR, "nbd");
+-              destroy_workqueue(recv_workqueue);
+               return -EINVAL;
+       }
+       nbd_dbg_init();
+@@ -2318,7 +2330,6 @@ static void __exit nbd_cleanup(void)
+ 
+       idr_destroy(&nbd_index_idr);
+       genl_unregister_family(&nbd_genl_family);
+-      destroy_workqueue(recv_workqueue);
+       unregister_blkdev(NBD_MAJOR, "nbd");
+ }
+ 
diff --git a/queue-5.3/pm-devfreq-tegra-fix-khz-to-hz-conversion.patch b/queue-5.3/pm-devfreq-tegra-fix-khz-to-hz-conversion.patch

new file mode 100644 (file)

index 0000000..ee1908b
--- /dev/null
+++ b/queue-5.3/pm-devfreq-tegra-fix-khz-to-hz-conversion.patch
@@ -0,0 +1,75 @@
+From 62bacb06b9f08965c4ef10e17875450490c948c0 Mon Sep 17 00:00:00 2001
+From: Dmitry Osipenko <digetx@gmail.com>
+Date: Thu, 2 May 2019 02:38:00 +0300
+Subject: PM / devfreq: tegra: Fix kHz to Hz conversion
+
+From: Dmitry Osipenko <digetx@gmail.com>
+
+commit 62bacb06b9f08965c4ef10e17875450490c948c0 upstream.
+
+The kHz to Hz is incorrectly converted in a few places in the code,
+this results in a wrong frequency being calculated because devfreq core
+uses OPP frequencies that are given in Hz to clamp the rate, while
+tegra-devfreq gives to the core value in kHz and then it also expects to
+receive value in kHz from the core. In a result memory freq is always set
+to a value which is close to ULONG_MAX because of the bug. Hence the EMC
+frequency is always capped to the maximum and the driver doesn't do
+anything useful. This patch was tested on Tegra30 and Tegra124 SoC's, EMC
+frequency scaling works properly now.
+
+Cc: <stable@vger.kernel.org> # 4.14+
+Tested-by: Steev Klimaszewski <steev@kali.org>
+Reviewed-by: Chanwoo Choi <cw00.choi@samsung.com>
+Signed-off-by: Dmitry Osipenko <digetx@gmail.com>
+Acked-by: Thierry Reding <treding@nvidia.com>
+Signed-off-by: MyungJoo Ham <myungjoo.ham@samsung.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/devfreq/tegra-devfreq.c |   12 +++++-------
+ 1 file changed, 5 insertions(+), 7 deletions(-)
+
+--- a/drivers/devfreq/tegra-devfreq.c
++++ b/drivers/devfreq/tegra-devfreq.c
+@@ -474,11 +474,11 @@ static int tegra_devfreq_target(struct d
+ {
+       struct tegra_devfreq *tegra = dev_get_drvdata(dev);
+       struct dev_pm_opp *opp;
+-      unsigned long rate = *freq * KHZ;
++      unsigned long rate;
+ 
+-      opp = devfreq_recommended_opp(dev, &rate, flags);
++      opp = devfreq_recommended_opp(dev, freq, flags);
+       if (IS_ERR(opp)) {
+-              dev_err(dev, "Failed to find opp for %lu KHz\n", *freq);
++              dev_err(dev, "Failed to find opp for %lu Hz\n", *freq);
+               return PTR_ERR(opp);
+       }
+       rate = dev_pm_opp_get_freq(opp);
+@@ -487,8 +487,6 @@ static int tegra_devfreq_target(struct d
+       clk_set_min_rate(tegra->emc_clock, rate);
+       clk_set_rate(tegra->emc_clock, 0);
+ 
+-      *freq = rate;
+-
+       return 0;
+ }
+ 
+@@ -498,7 +496,7 @@ static int tegra_devfreq_get_dev_status(
+       struct tegra_devfreq *tegra = dev_get_drvdata(dev);
+       struct tegra_devfreq_device *actmon_dev;
+ 
+-      stat->current_frequency = tegra->cur_freq;
++      stat->current_frequency = tegra->cur_freq * KHZ;
+ 
+       /* To be used by the tegra governor */
+       stat->private_data = tegra;
+@@ -553,7 +551,7 @@ static int tegra_governor_get_target(str
+               target_freq = max(target_freq, dev->target_freq);
+       }
+ 
+-      *freq = target_freq;
++      *freq = target_freq * KHZ;
+ 
+       return 0;
+ }
diff --git a/queue-5.3/power-supply-sbs-battery-only-return-health-when-battery-present.patch b/queue-5.3/power-supply-sbs-battery-only-return-health-when-battery-present.patch

new file mode 100644 (file)

index 0000000..e583b82
--- /dev/null
+++ b/queue-5.3/power-supply-sbs-battery-only-return-health-when-battery-present.patch
@@ -0,0 +1,74 @@
+From fe55e770327363304c4111423e6f7ff3c650136d Mon Sep 17 00:00:00 2001
+From: Michael Nosthoff <committed@heine.so>
+Date: Fri, 16 Aug 2019 09:58:42 +0200
+Subject: power: supply: sbs-battery: only return health when battery present
+
+From: Michael Nosthoff <committed@heine.so>
+
+commit fe55e770327363304c4111423e6f7ff3c650136d upstream.
+
+when the battery is set to sbs-mode and  no gpio detection is enabled
+"health" is always returning a value even when the battery is not present.
+All other fields return "not present".
+This leads to a scenario where the driver is constantly switching between
+"present" and "not present" state. This generates a lot of constant
+traffic on the i2c.
+
+This commit changes the response of "health" to an error when the battery
+is not responding leading to a consistent "not present" state.
+
+Fixes: 76b16f4cdfb8 ("power: supply: sbs-battery: don't assume MANUFACTURER_DATA formats")
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Michael Nosthoff <committed@heine.so>
+Reviewed-by: Brian Norris <briannorris@chromium.org>
+Tested-by: Brian Norris <briannorris@chromium.org>
+Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/power/supply/sbs-battery.c |   25 ++++++++++++++++---------
+ 1 file changed, 16 insertions(+), 9 deletions(-)
+
+--- a/drivers/power/supply/sbs-battery.c
++++ b/drivers/power/supply/sbs-battery.c
+@@ -314,17 +314,22 @@ static int sbs_get_battery_presence_and_
+ {
+       int ret;
+ 
+-      if (psp == POWER_SUPPLY_PROP_PRESENT) {
+-              /* Dummy command; if it succeeds, battery is present. */
+-              ret = sbs_read_word_data(client, sbs_data[REG_STATUS].addr);
+-              if (ret < 0)
+-                      val->intval = 0; /* battery disconnected */
+-              else
+-                      val->intval = 1; /* battery present */
+-      } else { /* POWER_SUPPLY_PROP_HEALTH */
++      /* Dummy command; if it succeeds, battery is present. */
++      ret = sbs_read_word_data(client, sbs_data[REG_STATUS].addr);
++
++      if (ret < 0) { /* battery not present*/
++              if (psp == POWER_SUPPLY_PROP_PRESENT) {
++                      val->intval = 0;
++                      return 0;
++              }
++              return ret;
++      }
++
++      if (psp == POWER_SUPPLY_PROP_PRESENT)
++              val->intval = 1; /* battery present */
++      else /* POWER_SUPPLY_PROP_HEALTH */
+               /* SBS spec doesn't have a general health command. */
+               val->intval = POWER_SUPPLY_HEALTH_UNKNOWN;
+-      }
+ 
+       return 0;
+ }
+@@ -626,6 +631,8 @@ static int sbs_get_property(struct power
+               else
+                       ret = sbs_get_battery_presence_and_health(client, psp,
+                                                                 val);
++
++              /* this can only be true if no gpio is used */
+               if (psp == POWER_SUPPLY_PROP_PRESENT)
+                       return 0;
+               break;
diff --git a/queue-5.3/power-supply-sbs-battery-use-correct-flags-field.patch b/queue-5.3/power-supply-sbs-battery-use-correct-flags-field.patch

new file mode 100644 (file)

index 0000000..e4270a8
--- /dev/null
+++ b/queue-5.3/power-supply-sbs-battery-use-correct-flags-field.patch
@@ -0,0 +1,37 @@
+From 99956a9e08251a1234434b492875b1eaff502a12 Mon Sep 17 00:00:00 2001
+From: Michael Nosthoff <committed@heine.so>
+Date: Fri, 16 Aug 2019 09:37:42 +0200
+Subject: power: supply: sbs-battery: use correct flags field
+
+From: Michael Nosthoff <committed@heine.so>
+
+commit 99956a9e08251a1234434b492875b1eaff502a12 upstream.
+
+the type flag is stored in the chip->flags field not in the
+client->flags field. This currently leads to never using the ti
+specific health function as client->flags doesn't use that bit.
+So it's always falling back to the general one.
+
+Fixes: 76b16f4cdfb8 ("power: supply: sbs-battery: don't assume MANUFACTURER_DATA formats")
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Michael Nosthoff <committed@heine.so>
+Reviewed-by: Brian Norris <briannorris@chromium.org>
+Reviewed-by: Enric Balletbo i Serra <enric.balletbo@collabora.com>
+Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/power/supply/sbs-battery.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/power/supply/sbs-battery.c
++++ b/drivers/power/supply/sbs-battery.c
+@@ -620,7 +620,7 @@ static int sbs_get_property(struct power
+       switch (psp) {
+       case POWER_SUPPLY_PROP_PRESENT:
+       case POWER_SUPPLY_PROP_HEALTH:
+-              if (client->flags & SBS_FLAGS_TI_BQ20Z75)
++              if (chip->flags & SBS_FLAGS_TI_BQ20Z75)
+                       ret = sbs_get_ti_battery_presence_and_health(client,
+                                                                    psp, val);
+               else
diff --git a/queue-5.3/powerpc-32s-fix-boot-failure-with-debug_pagealloc-without-kasan.patch b/queue-5.3/powerpc-32s-fix-boot-failure-with-debug_pagealloc-without-kasan.patch

new file mode 100644 (file)

index 0000000..271b26f
--- /dev/null
+++ b/queue-5.3/powerpc-32s-fix-boot-failure-with-debug_pagealloc-without-kasan.patch
@@ -0,0 +1,61 @@
+From 9d6d712fbf7766f21c838940eebcd7b4d476c5e6 Mon Sep 17 00:00:00 2001
+From: Christophe Leroy <christophe.leroy@c-s.fr>
+Date: Wed, 14 Aug 2019 10:02:20 +0000
+Subject: powerpc/32s: Fix boot failure with DEBUG_PAGEALLOC without KASAN.
+
+From: Christophe Leroy <christophe.leroy@c-s.fr>
+
+commit 9d6d712fbf7766f21c838940eebcd7b4d476c5e6 upstream.
+
+When KASAN is selected, the definitive hash table has to be
+set up later, but there is already an early temporary one.
+
+When KASAN is not selected, there is no early hash table,
+so the setup of the definitive hash table cannot be delayed.
+
+Fixes: 72f208c6a8f7 ("powerpc/32s: move hash code patching out of MMU_init_hw()")
+Cc: stable@vger.kernel.org # v5.2+
+Reported-by: Jonathan Neuschafer <j.neuschaefer@gmx.net>
+Tested-by: Jonathan Neuschafer <j.neuschaefer@gmx.net>
+Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Link: https://lore.kernel.org/r/b7860c5e1e784d6b96ba67edf47dd6cbc2e78ab6.1565776892.git.christophe.leroy@c-s.fr
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/powerpc/kernel/head_32.S  |    2 ++
+ arch/powerpc/mm/book3s32/mmu.c |    9 +++++++++
+ 2 files changed, 11 insertions(+)
+
+--- a/arch/powerpc/kernel/head_32.S
++++ b/arch/powerpc/kernel/head_32.S
+@@ -897,9 +897,11 @@ start_here:
+       bl      machine_init
+       bl      __save_cpu_setup
+       bl      MMU_init
++#ifdef CONFIG_KASAN
+ BEGIN_MMU_FTR_SECTION
+       bl      MMU_init_hw_patch
+ END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE)
++#endif
+ 
+ /*
+  * Go back to running unmapped so we can load up new values
+--- a/arch/powerpc/mm/book3s32/mmu.c
++++ b/arch/powerpc/mm/book3s32/mmu.c
+@@ -358,6 +358,15 @@ void __init MMU_init_hw(void)
+       hash_mb2 = hash_mb = 32 - LG_HPTEG_SIZE - lg_n_hpteg;
+       if (lg_n_hpteg > 16)
+               hash_mb2 = 16 - LG_HPTEG_SIZE;
++
++      /*
++       * When KASAN is selected, there is already an early temporary hash
++       * table and the switch to the final hash table is done later.
++       */
++      if (IS_ENABLED(CONFIG_KASAN))
++              return;
++
++      MMU_init_hw_patch();
+ }
+ 
+ void __init MMU_init_hw_patch(void)
diff --git a/queue-5.3/powerpc-603-fix-handling-of-the-dirty-flag.patch b/queue-5.3/powerpc-603-fix-handling-of-the-dirty-flag.patch

new file mode 100644 (file)

index 0000000..c48e454
--- /dev/null
+++ b/queue-5.3/powerpc-603-fix-handling-of-the-dirty-flag.patch
@@ -0,0 +1,50 @@
+From 415480dce2ef03bb8335deebd2f402f475443ce0 Mon Sep 17 00:00:00 2001
+From: Christophe Leroy <christophe.leroy@c-s.fr>
+Date: Mon, 19 Aug 2019 06:40:25 +0000
+Subject: powerpc/603: Fix handling of the DIRTY flag
+
+From: Christophe Leroy <christophe.leroy@c-s.fr>
+
+commit 415480dce2ef03bb8335deebd2f402f475443ce0 upstream.
+
+If a page is already mapped RW without the DIRTY flag, the DIRTY
+flag is never set and a TLB store miss exception is taken forever.
+
+This is easily reproduced with the following app:
+
+void main(void)
+{
+       volatile char *ptr = mmap(0, 4096, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0);
+
+       *ptr = *ptr;
+}
+
+When DIRTY flag is not set, bail out of TLB miss handler and take
+a minor page fault which will set the DIRTY flag.
+
+Fixes: f8b58c64eaef ("powerpc/603: let's handle PAGE_DIRTY directly")
+Cc: stable@vger.kernel.org # v5.1+
+Reported-by: Doug Crawford <doug.crawford@intelight-its.com>
+Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Link: https://lore.kernel.org/r/80432f71194d7ee75b2f5043ecf1501cf1cca1f3.1566196646.git.christophe.leroy@c-s.fr
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/powerpc/kernel/head_32.S |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/arch/powerpc/kernel/head_32.S
++++ b/arch/powerpc/kernel/head_32.S
+@@ -557,9 +557,9 @@ DataStoreTLBMiss:
+       cmplw   0,r1,r3
+       mfspr   r2, SPRN_SPRG_PGDIR
+ #ifdef CONFIG_SWAP
+-      li      r1, _PAGE_RW | _PAGE_PRESENT | _PAGE_ACCESSED
++      li      r1, _PAGE_RW | _PAGE_DIRTY | _PAGE_PRESENT | _PAGE_ACCESSED
+ #else
+-      li      r1, _PAGE_RW | _PAGE_PRESENT
++      li      r1, _PAGE_RW | _PAGE_DIRTY | _PAGE_PRESENT
+ #endif
+       bge-    112f
+       lis     r2, (swapper_pg_dir - PAGE_OFFSET)@ha   /* if kernel address, use */
diff --git a/queue-5.3/powerpc-book3s64-mm-don-t-do-tlbie-fixup-for-some-hardware-revisions.patch b/queue-5.3/powerpc-book3s64-mm-don-t-do-tlbie-fixup-for-some-hardware-revisions.patch

new file mode 100644 (file)

index 0000000..d569109
--- /dev/null
+++ b/queue-5.3/powerpc-book3s64-mm-don-t-do-tlbie-fixup-for-some-hardware-revisions.patch
@@ -0,0 +1,81 @@
+From 677733e296b5c7a37c47da391fc70a43dc40bd67 Mon Sep 17 00:00:00 2001
+From: "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com>
+Date: Tue, 24 Sep 2019 09:22:51 +0530
+Subject: powerpc/book3s64/mm: Don't do tlbie fixup for some hardware revisions
+
+From: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
+
+commit 677733e296b5c7a37c47da391fc70a43dc40bd67 upstream.
+
+The store ordering vs tlbie issue mentioned in commit
+a5d4b5891c2f ("powerpc/mm: Fixup tlbie vs store ordering issue on
+POWER9") is fixed for Nimbus 2.3 and Cumulus 1.3 revisions. We don't
+need to apply the fixup if we are running on them
+
+We can only do this on PowerNV. On pseries guest with KVM we still
+don't support redoing the feature fixup after migration. So we should
+be enabling all the workarounds needed, because whe can possibly
+migrate between DD 2.3 and DD 2.2
+
+Fixes: a5d4b5891c2f ("powerpc/mm: Fixup tlbie vs store ordering issue on POWER9")
+Cc: stable@vger.kernel.org # v4.16+
+Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Link: https://lore.kernel.org/r/20190924035254.24612-1-aneesh.kumar@linux.ibm.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/powerpc/kernel/dt_cpu_ftrs.c |   30 ++++++++++++++++++++++++++++--
+ 1 file changed, 28 insertions(+), 2 deletions(-)
+
+--- a/arch/powerpc/kernel/dt_cpu_ftrs.c
++++ b/arch/powerpc/kernel/dt_cpu_ftrs.c
+@@ -691,9 +691,35 @@ static bool __init cpufeatures_process_f
+       return true;
+ }
+ 
++/*
++ * Handle POWER9 broadcast tlbie invalidation issue using
++ * cpu feature flag.
++ */
++static __init void update_tlbie_feature_flag(unsigned long pvr)
++{
++      if (PVR_VER(pvr) == PVR_POWER9) {
++              /*
++               * Set the tlbie feature flag for anything below
++               * Nimbus DD 2.3 and Cumulus DD 1.3
++               */
++              if ((pvr & 0xe000) == 0) {
++                      /* Nimbus */
++                      if ((pvr & 0xfff) < 0x203)
++                              cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_BUG;
++              } else if ((pvr & 0xc000) == 0) {
++                      /* Cumulus */
++                      if ((pvr & 0xfff) < 0x103)
++                              cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_BUG;
++              } else {
++                      WARN_ONCE(1, "Unknown PVR");
++                      cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_BUG;
++              }
++      }
++}
++
+ static __init void cpufeatures_cpu_quirks(void)
+ {
+-      int version = mfspr(SPRN_PVR);
++      unsigned long version = mfspr(SPRN_PVR);
+ 
+       /*
+        * Not all quirks can be derived from the cpufeatures device tree.
+@@ -712,10 +738,10 @@ static __init void cpufeatures_cpu_quirk
+ 
+       if ((version & 0xffff0000) == 0x004e0000) {
+               cur_cpu_spec->cpu_features &= ~(CPU_FTR_DAWR);
+-              cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_BUG;
+               cur_cpu_spec->cpu_features |= CPU_FTR_P9_TIDR;
+       }
+ 
++      update_tlbie_feature_flag(version);
+       /*
+        * PKEY was not in the initial base or feature node
+        * specification, but it should become optional in the next
diff --git a/queue-5.3/powerpc-book3s64-radix-rename-cpu_ftr_p9_tlbie_bug-feature-flag.patch b/queue-5.3/powerpc-book3s64-radix-rename-cpu_ftr_p9_tlbie_bug-feature-flag.patch

new file mode 100644 (file)

index 0000000..91852cc
--- /dev/null
+++ b/queue-5.3/powerpc-book3s64-radix-rename-cpu_ftr_p9_tlbie_bug-feature-flag.patch
@@ -0,0 +1,110 @@
+From 09ce98cacd51fcd0fa0af2f79d1e1d3192f4cbb0 Mon Sep 17 00:00:00 2001
+From: "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com>
+Date: Tue, 24 Sep 2019 09:22:52 +0530
+Subject: powerpc/book3s64/radix: Rename CPU_FTR_P9_TLBIE_BUG feature flag
+
+From: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
+
+commit 09ce98cacd51fcd0fa0af2f79d1e1d3192f4cbb0 upstream.
+
+Rename the #define to indicate this is related to store vs tlbie
+ordering issue. In the next patch, we will be adding another feature
+flag that is used to handles ERAT flush vs tlbie ordering issue.
+
+Fixes: a5d4b5891c2f ("powerpc/mm: Fixup tlbie vs store ordering issue on POWER9")
+Cc: stable@vger.kernel.org # v4.16+
+Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Link: https://lore.kernel.org/r/20190924035254.24612-2-aneesh.kumar@linux.ibm.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/powerpc/include/asm/cputable.h    |    4 ++--
+ arch/powerpc/kernel/dt_cpu_ftrs.c      |    6 +++---
+ arch/powerpc/kvm/book3s_hv_rm_mmu.c    |    2 +-
+ arch/powerpc/mm/book3s64/hash_native.c |    2 +-
+ arch/powerpc/mm/book3s64/radix_tlb.c   |    4 ++--
+ 5 files changed, 9 insertions(+), 9 deletions(-)
+
+--- a/arch/powerpc/include/asm/cputable.h
++++ b/arch/powerpc/include/asm/cputable.h
+@@ -213,7 +213,7 @@ static inline void cpu_feature_keys_init
+ #define CPU_FTR_POWER9_DD2_1          LONG_ASM_CONST(0x0000080000000000)
+ #define CPU_FTR_P9_TM_HV_ASSIST               LONG_ASM_CONST(0x0000100000000000)
+ #define CPU_FTR_P9_TM_XER_SO_BUG      LONG_ASM_CONST(0x0000200000000000)
+-#define CPU_FTR_P9_TLBIE_BUG          LONG_ASM_CONST(0x0000400000000000)
++#define CPU_FTR_P9_TLBIE_STQ_BUG      LONG_ASM_CONST(0x0000400000000000)
+ #define CPU_FTR_P9_TIDR                       LONG_ASM_CONST(0x0000800000000000)
+ 
+ #ifndef __ASSEMBLY__
+@@ -461,7 +461,7 @@ static inline void cpu_feature_keys_init
+           CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY | \
+           CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_ARCH_207S | \
+           CPU_FTR_TM_COMP | CPU_FTR_ARCH_300 | CPU_FTR_PKEY | \
+-          CPU_FTR_P9_TLBIE_BUG | CPU_FTR_P9_TIDR)
++          CPU_FTR_P9_TLBIE_STQ_BUG | CPU_FTR_P9_TIDR)
+ #define CPU_FTRS_POWER9_DD2_0 CPU_FTRS_POWER9
+ #define CPU_FTRS_POWER9_DD2_1 (CPU_FTRS_POWER9 | CPU_FTR_POWER9_DD2_1)
+ #define CPU_FTRS_POWER9_DD2_2 (CPU_FTRS_POWER9 | CPU_FTR_POWER9_DD2_1 | \
+--- a/arch/powerpc/kernel/dt_cpu_ftrs.c
++++ b/arch/powerpc/kernel/dt_cpu_ftrs.c
+@@ -705,14 +705,14 @@ static __init void update_tlbie_feature_
+               if ((pvr & 0xe000) == 0) {
+                       /* Nimbus */
+                       if ((pvr & 0xfff) < 0x203)
+-                              cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_BUG;
++                              cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_STQ_BUG;
+               } else if ((pvr & 0xc000) == 0) {
+                       /* Cumulus */
+                       if ((pvr & 0xfff) < 0x103)
+-                              cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_BUG;
++                              cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_STQ_BUG;
+               } else {
+                       WARN_ONCE(1, "Unknown PVR");
+-                      cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_BUG;
++                      cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_STQ_BUG;
+               }
+       }
+ }
+--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
++++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+@@ -451,7 +451,7 @@ static void do_tlbies(struct kvm *kvm, u
+                                    "r" (rbvalues[i]), "r" (kvm->arch.lpid));
+               }
+ 
+-              if (cpu_has_feature(CPU_FTR_P9_TLBIE_BUG)) {
++              if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
+                       /*
+                        * Need the extra ptesync to make sure we don't
+                        * re-order the tlbie
+--- a/arch/powerpc/mm/book3s64/hash_native.c
++++ b/arch/powerpc/mm/book3s64/hash_native.c
+@@ -199,7 +199,7 @@ static inline unsigned long  ___tlbie(un
+ 
+ static inline void fixup_tlbie(unsigned long vpn, int psize, int apsize, int ssize)
+ {
+-      if (cpu_has_feature(CPU_FTR_P9_TLBIE_BUG)) {
++      if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
+               /* Need the extra ptesync to ensure we don't reorder tlbie*/
+               asm volatile("ptesync": : :"memory");
+               ___tlbie(vpn, psize, apsize, ssize);
+--- a/arch/powerpc/mm/book3s64/radix_tlb.c
++++ b/arch/powerpc/mm/book3s64/radix_tlb.c
+@@ -216,7 +216,7 @@ static inline void fixup_tlbie(void)
+       unsigned long pid = 0;
+       unsigned long va = ((1UL << 52) - 1);
+ 
+-      if (cpu_has_feature(CPU_FTR_P9_TLBIE_BUG)) {
++      if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
+               asm volatile("ptesync": : :"memory");
+               __tlbie_va(va, pid, mmu_get_ap(MMU_PAGE_64K), RIC_FLUSH_TLB);
+       }
+@@ -226,7 +226,7 @@ static inline void fixup_tlbie_lpid(unsi
+ {
+       unsigned long va = ((1UL << 52) - 1);
+ 
+-      if (cpu_has_feature(CPU_FTR_P9_TLBIE_BUG)) {
++      if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
+               asm volatile("ptesync": : :"memory");
+               __tlbie_lpid_va(va, lpid, mmu_get_ap(MMU_PAGE_64K), RIC_FLUSH_TLB);
+       }
diff --git a/queue-5.3/powerpc-kasan-fix-parallel-loading-of-modules.patch b/queue-5.3/powerpc-kasan-fix-parallel-loading-of-modules.patch

new file mode 100644 (file)

index 0000000..ced38bd
--- /dev/null
+++ b/queue-5.3/powerpc-kasan-fix-parallel-loading-of-modules.patch
@@ -0,0 +1,74 @@
+From 45ff3c55958542c3b76075d59741297b8cb31cbb Mon Sep 17 00:00:00 2001
+From: Christophe Leroy <christophe.leroy@c-s.fr>
+Date: Fri, 9 Aug 2019 14:58:09 +0000
+Subject: powerpc/kasan: Fix parallel loading of modules.
+
+From: Christophe Leroy <christophe.leroy@c-s.fr>
+
+commit 45ff3c55958542c3b76075d59741297b8cb31cbb upstream.
+
+Parallel loading of modules may lead to bad setup of shadow page table
+entries.
+
+First, lets align modules so that two modules never share the same
+shadow page.
+
+Second, ensure that two modules cannot allocate two page tables for
+the same PMD entry at the same time. This is done by using
+init_mm.page_table_lock in the same way as __pte_alloc_kernel()
+
+Fixes: 2edb16efc899 ("powerpc/32: Add KASAN support")
+Cc: stable@vger.kernel.org # v5.2+
+Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Link: https://lore.kernel.org/r/c97284f912128cbc3f2fe09d68e90e65fb3e6026.1565361876.git.christophe.leroy@c-s.fr
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/powerpc/mm/kasan/kasan_init_32.c |   21 +++++++++++++++++++--
+ 1 file changed, 19 insertions(+), 2 deletions(-)
+
+--- a/arch/powerpc/mm/kasan/kasan_init_32.c
++++ b/arch/powerpc/mm/kasan/kasan_init_32.c
+@@ -5,6 +5,7 @@
+ #include <linux/kasan.h>
+ #include <linux/printk.h>
+ #include <linux/memblock.h>
++#include <linux/moduleloader.h>
+ #include <linux/sched/task.h>
+ #include <linux/vmalloc.h>
+ #include <asm/pgalloc.h>
+@@ -46,7 +47,19 @@ static int __ref kasan_init_shadow_page_
+                       kasan_populate_pte(new, PAGE_READONLY);
+               else
+                       kasan_populate_pte(new, PAGE_KERNEL_RO);
+-              pmd_populate_kernel(&init_mm, pmd, new);
++
++              smp_wmb(); /* See comment in __pte_alloc */
++
++              spin_lock(&init_mm.page_table_lock);
++                      /* Has another populated it ? */
++              if (likely((void *)pmd_page_vaddr(*pmd) == kasan_early_shadow_pte)) {
++                      pmd_populate_kernel(&init_mm, pmd, new);
++                      new = NULL;
++              }
++              spin_unlock(&init_mm.page_table_lock);
++
++              if (new && slab_is_available())
++                      pte_free_kernel(&init_mm, new);
+       }
+       return 0;
+ }
+@@ -137,7 +150,11 @@ void __init kasan_init(void)
+ #ifdef CONFIG_MODULES
+ void *module_alloc(unsigned long size)
+ {
+-      void *base = vmalloc_exec(size);
++      void *base;
++
++      base = __vmalloc_node_range(size, MODULE_ALIGN, VMALLOC_START, VMALLOC_END,
++                                  GFP_KERNEL, PAGE_KERNEL_EXEC, VM_FLUSH_RESET_PERMS,
++                                  NUMA_NO_NODE, __builtin_return_address(0));
+ 
+       if (!base)
+               return NULL;
diff --git a/queue-5.3/powerpc-kasan-fix-shadow-area-set-up-for-modules.patch b/queue-5.3/powerpc-kasan-fix-shadow-area-set-up-for-modules.patch

new file mode 100644 (file)

index 0000000..51c6b7c
--- /dev/null
+++ b/queue-5.3/powerpc-kasan-fix-shadow-area-set-up-for-modules.patch
@@ -0,0 +1,40 @@
+From 663c0c9496a69f80011205ba3194049bcafd681d Mon Sep 17 00:00:00 2001
+From: Christophe Leroy <christophe.leroy@c-s.fr>
+Date: Fri, 9 Aug 2019 14:58:10 +0000
+Subject: powerpc/kasan: Fix shadow area set up for modules.
+
+From: Christophe Leroy <christophe.leroy@c-s.fr>
+
+commit 663c0c9496a69f80011205ba3194049bcafd681d upstream.
+
+When loading modules, from time to time an Oops is encountered during
+the init of shadow area for globals. This is due to the last page not
+always being mapped depending on the exact distance between the start
+and the end of the shadow area and the alignment with the page
+addresses.
+
+Fix this by aligning the starting address with the page address.
+
+Fixes: 2edb16efc899 ("powerpc/32: Add KASAN support")
+Cc: stable@vger.kernel.org # v5.2+
+Reported-by: Erhard F. <erhard_f@mailbox.org>
+Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Link: https://lore.kernel.org/r/4f887e9b77d0d725cbb52035c7ece485c1c5fc14.1565361881.git.christophe.leroy@c-s.fr
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/powerpc/mm/kasan/kasan_init_32.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/powerpc/mm/kasan/kasan_init_32.c
++++ b/arch/powerpc/mm/kasan/kasan_init_32.c
+@@ -87,7 +87,7 @@ static int __ref kasan_init_region(void
+       if (!slab_is_available())
+               block = memblock_alloc(k_end - k_start, PAGE_SIZE);
+ 
+-      for (k_cur = k_start; k_cur < k_end; k_cur += PAGE_SIZE) {
++      for (k_cur = k_start & PAGE_MASK; k_cur < k_end; k_cur += PAGE_SIZE) {
+               pmd_t *pmd = pmd_offset(pud_offset(pgd_offset_k(k_cur), k_cur), k_cur);
+               void *va = block ? block + k_cur - k_start : kasan_get_one_page();
+               pte_t pte = pfn_pte(PHYS_PFN(__pa(va)), PAGE_KERNEL);
diff --git a/queue-5.3/powerpc-mce-fix-mce-handling-for-huge-pages.patch b/queue-5.3/powerpc-mce-fix-mce-handling-for-huge-pages.patch

new file mode 100644 (file)

index 0000000..2a2d31f
--- /dev/null
+++ b/queue-5.3/powerpc-mce-fix-mce-handling-for-huge-pages.patch
@@ -0,0 +1,81 @@
+From 99ead78afd1128bfcebe7f88f3b102fb2da09aee Mon Sep 17 00:00:00 2001
+From: Balbir Singh <bsingharora@gmail.com>
+Date: Tue, 20 Aug 2019 13:43:47 +0530
+Subject: powerpc/mce: Fix MCE handling for huge pages
+
+From: Balbir Singh <bsingharora@gmail.com>
+
+commit 99ead78afd1128bfcebe7f88f3b102fb2da09aee upstream.
+
+The current code would fail on huge pages addresses, since the shift would
+be incorrect. Use the correct page shift value returned by
+__find_linux_pte() to get the correct physical address. The code is more
+generic and can handle both regular and compound pages.
+
+Fixes: ba41e1e1ccb9 ("powerpc/mce: Hookup derror (load/store) UE errors")
+Signed-off-by: Balbir Singh <bsingharora@gmail.com>
+[arbab@linux.ibm.com: Fixup pseries_do_memory_failure()]
+Signed-off-by: Reza Arbab <arbab@linux.ibm.com>
+Tested-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
+Signed-off-by: Santosh Sivaraj <santosh@fossix.org>
+Cc: stable@vger.kernel.org # v4.15+
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Link: https://lore.kernel.org/r/20190820081352.8641-3-santosh@fossix.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/powerpc/kernel/mce_power.c |   19 +++++++++++++------
+ 1 file changed, 13 insertions(+), 6 deletions(-)
+
+--- a/arch/powerpc/kernel/mce_power.c
++++ b/arch/powerpc/kernel/mce_power.c
+@@ -26,6 +26,7 @@
+ unsigned long addr_to_pfn(struct pt_regs *regs, unsigned long addr)
+ {
+       pte_t *ptep;
++      unsigned int shift;
+       unsigned long flags;
+       struct mm_struct *mm;
+ 
+@@ -35,13 +36,18 @@ unsigned long addr_to_pfn(struct pt_regs
+               mm = &init_mm;
+ 
+       local_irq_save(flags);
+-      if (mm == current->mm)
+-              ptep = find_current_mm_pte(mm->pgd, addr, NULL, NULL);
+-      else
+-              ptep = find_init_mm_pte(addr, NULL);
++      ptep = __find_linux_pte(mm->pgd, addr, NULL, &shift);
+       local_irq_restore(flags);
++
+       if (!ptep || pte_special(*ptep))
+               return ULONG_MAX;
++
++      if (shift > PAGE_SHIFT) {
++              unsigned long rpnmask = (1ul << shift) - PAGE_SIZE;
++
++              return pte_pfn(__pte(pte_val(*ptep) | (addr & rpnmask)));
++      }
++
+       return pte_pfn(*ptep);
+ }
+ 
+@@ -344,7 +350,7 @@ static const struct mce_derror_table mce
+   MCE_INITIATOR_CPU,   MCE_SEV_SEVERE, true },
+ { 0, false, 0, 0, 0, 0, 0 } };
+ 
+-static int mce_find_instr_ea_and_pfn(struct pt_regs *regs, uint64_t *addr,
++static int mce_find_instr_ea_and_phys(struct pt_regs *regs, uint64_t *addr,
+                                       uint64_t *phys_addr)
+ {
+       /*
+@@ -541,7 +547,8 @@ static int mce_handle_derror(struct pt_r
+                        * kernel/exception-64s.h
+                        */
+                       if (get_paca()->in_mce < MAX_MCE_DEPTH)
+-                              mce_find_instr_ea_and_pfn(regs, addr, phys_addr);
++                              mce_find_instr_ea_and_phys(regs, addr,
++                                                         phys_addr);
+               }
+               found = 1;
+       }
diff --git a/queue-5.3/powerpc-mce-schedule-work-from-irq_work.patch b/queue-5.3/powerpc-mce-schedule-work-from-irq_work.patch

new file mode 100644 (file)

index 0000000..7ea39e7
--- /dev/null
+++ b/queue-5.3/powerpc-mce-schedule-work-from-irq_work.patch
@@ -0,0 +1,67 @@
+From b5bda6263cad9a927e1a4edb7493d542da0c1410 Mon Sep 17 00:00:00 2001
+From: Santosh Sivaraj <santosh@fossix.org>
+Date: Tue, 20 Aug 2019 13:43:46 +0530
+Subject: powerpc/mce: Schedule work from irq_work
+
+From: Santosh Sivaraj <santosh@fossix.org>
+
+commit b5bda6263cad9a927e1a4edb7493d542da0c1410 upstream.
+
+schedule_work() cannot be called from MCE exception context as MCE can
+interrupt even in interrupt disabled context.
+
+Fixes: 733e4a4c4467 ("powerpc/mce: hookup memory_failure for UE errors")
+Cc: stable@vger.kernel.org # v4.15+
+Reviewed-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
+Reviewed-by: Nicholas Piggin <npiggin@gmail.com>
+Acked-by: Balbir Singh <bsingharora@gmail.com>
+Signed-off-by: Santosh Sivaraj <santosh@fossix.org>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Link: https://lore.kernel.org/r/20190820081352.8641-2-santosh@fossix.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/powerpc/kernel/mce.c |   11 ++++++++++-
+ 1 file changed, 10 insertions(+), 1 deletion(-)
+
+--- a/arch/powerpc/kernel/mce.c
++++ b/arch/powerpc/kernel/mce.c
+@@ -33,6 +33,7 @@ static DEFINE_PER_CPU(struct machine_che
+                                       mce_ue_event_queue);
+ 
+ static void machine_check_process_queued_event(struct irq_work *work);
++static void machine_check_ue_irq_work(struct irq_work *work);
+ void machine_check_ue_event(struct machine_check_event *evt);
+ static void machine_process_ue_event(struct work_struct *work);
+ 
+@@ -40,6 +41,10 @@ static struct irq_work mce_event_process
+         .func = machine_check_process_queued_event,
+ };
+ 
++static struct irq_work mce_ue_event_irq_work = {
++      .func = machine_check_ue_irq_work,
++};
++
+ DECLARE_WORK(mce_ue_event_work, machine_process_ue_event);
+ 
+ static void mce_set_error_info(struct machine_check_event *mce,
+@@ -199,6 +204,10 @@ void release_mce_event(void)
+       get_mce_event(NULL, true);
+ }
+ 
++static void machine_check_ue_irq_work(struct irq_work *work)
++{
++      schedule_work(&mce_ue_event_work);
++}
+ 
+ /*
+  * Queue up the MCE event which then can be handled later.
+@@ -216,7 +225,7 @@ void machine_check_ue_event(struct machi
+       memcpy(this_cpu_ptr(&mce_ue_event_queue[index]), evt, sizeof(*evt));
+ 
+       /* Queue work to process this event later. */
+-      schedule_work(&mce_ue_event_work);
++      irq_work_queue(&mce_ue_event_irq_work);
+ }
+ 
+ /*
diff --git a/queue-5.3/powerpc-mm-add-a-helper-to-select-page_kernel_ro-or-page_readonly.patch b/queue-5.3/powerpc-mm-add-a-helper-to-select-page_kernel_ro-or-page_readonly.patch

new file mode 100644 (file)

index 0000000..e75b849
--- /dev/null
+++ b/queue-5.3/powerpc-mm-add-a-helper-to-select-page_kernel_ro-or-page_readonly.patch
@@ -0,0 +1,76 @@
+From 4c0f5d1eb4072871c34530358df45f05ab80edd6 Mon Sep 17 00:00:00 2001
+From: Christophe Leroy <christophe.leroy@c-s.fr>
+Date: Wed, 21 Aug 2019 10:20:00 +0000
+Subject: powerpc/mm: Add a helper to select PAGE_KERNEL_RO or PAGE_READONLY
+
+From: Christophe Leroy <christophe.leroy@c-s.fr>
+
+commit 4c0f5d1eb4072871c34530358df45f05ab80edd6 upstream.
+
+In a couple of places there is a need to select whether read-only
+protection of shadow pages is performed with PAGE_KERNEL_RO or with
+PAGE_READONLY.
+
+Add a helper to avoid duplicating the choice.
+
+Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
+Cc: stable@vger.kernel.org
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Link: https://lore.kernel.org/r/9f33f44b9cd741c4a02b3dce7b8ef9438fe2cd2a.1566382750.git.christophe.leroy@c-s.fr
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/powerpc/mm/kasan/kasan_init_32.c |   21 +++++++++++++--------
+ 1 file changed, 13 insertions(+), 8 deletions(-)
+
+--- a/arch/powerpc/mm/kasan/kasan_init_32.c
++++ b/arch/powerpc/mm/kasan/kasan_init_32.c
+@@ -12,6 +12,14 @@
+ #include <asm/code-patching.h>
+ #include <mm/mmu_decl.h>
+ 
++static pgprot_t kasan_prot_ro(void)
++{
++      if (early_mmu_has_feature(MMU_FTR_HPTE_TABLE))
++              return PAGE_READONLY;
++
++      return PAGE_KERNEL_RO;
++}
++
+ static void kasan_populate_pte(pte_t *ptep, pgprot_t prot)
+ {
+       unsigned long va = (unsigned long)kasan_early_shadow_page;
+@@ -26,6 +34,7 @@ static int __ref kasan_init_shadow_page_
+ {
+       pmd_t *pmd;
+       unsigned long k_cur, k_next;
++      pgprot_t prot = kasan_prot_ro();
+ 
+       pmd = pmd_offset(pud_offset(pgd_offset_k(k_start), k_start), k_start);
+ 
+@@ -43,10 +52,7 @@ static int __ref kasan_init_shadow_page_
+ 
+               if (!new)
+                       return -ENOMEM;
+-              if (early_mmu_has_feature(MMU_FTR_HPTE_TABLE))
+-                      kasan_populate_pte(new, PAGE_READONLY);
+-              else
+-                      kasan_populate_pte(new, PAGE_KERNEL_RO);
++              kasan_populate_pte(new, prot);
+ 
+               smp_wmb(); /* See comment in __pte_alloc */
+ 
+@@ -103,10 +109,9 @@ static int __ref kasan_init_region(void
+ 
+ static void __init kasan_remap_early_shadow_ro(void)
+ {
+-      if (early_mmu_has_feature(MMU_FTR_HPTE_TABLE))
+-              kasan_populate_pte(kasan_early_shadow_pte, PAGE_READONLY);
+-      else
+-              kasan_populate_pte(kasan_early_shadow_pte, PAGE_KERNEL_RO);
++      pgprot_t prot = kasan_prot_ro();
++
++      kasan_populate_pte(kasan_early_shadow_pte, prot);
+ 
+       flush_tlb_kernel_range(KASAN_SHADOW_START, KASAN_SHADOW_END);
+ }
diff --git a/queue-5.3/powerpc-mm-fix-an-oops-in-kasan_mmu_init.patch b/queue-5.3/powerpc-mm-fix-an-oops-in-kasan_mmu_init.patch

new file mode 100644 (file)

index 0000000..efe07f8
--- /dev/null
+++ b/queue-5.3/powerpc-mm-fix-an-oops-in-kasan_mmu_init.patch
@@ -0,0 +1,85 @@
+From cbd18991e24fea2c31da3bb117c83e4a3538cd11 Mon Sep 17 00:00:00 2001
+From: Christophe Leroy <christophe.leroy@c-s.fr>
+Date: Wed, 21 Aug 2019 10:20:11 +0000
+Subject: powerpc/mm: Fix an Oops in kasan_mmu_init()
+
+From: Christophe Leroy <christophe.leroy@c-s.fr>
+
+commit cbd18991e24fea2c31da3bb117c83e4a3538cd11 upstream.
+
+Uncompressing Kernel Image ... OK
+   Loading Device Tree to 01ff7000, end 01fff74f ... OK
+[    0.000000] printk: bootconsole [udbg0] enabled
+[    0.000000] BUG: Unable to handle kernel data access at 0xf818c000
+[    0.000000] Faulting instruction address: 0xc0013c7c
+[    0.000000] Thread overran stack, or stack corrupted
+[    0.000000] Oops: Kernel access of bad area, sig: 11 [#1]
+[    0.000000] BE PAGE_SIZE=16K PREEMPT
+[    0.000000] Modules linked in:
+[    0.000000] CPU: 0 PID: 0 Comm: swapper Not tainted 5.3.0-rc4-s3k-dev-00743-g5abe4a3e8fd3-dirty #2080
+[    0.000000] NIP:  c0013c7c LR: c0013310 CTR: 00000000
+[    0.000000] REGS: c0c5ff38 TRAP: 0300   Not tainted  (5.3.0-rc4-s3k-dev-00743-g5abe4a3e8fd3-dirty)
+[    0.000000] MSR:  00001032 <ME,IR,DR,RI>  CR: 99033955  XER: 80002100
+[    0.000000] DAR: f818c000 DSISR: 82000000
+[    0.000000] GPR00: c0013310 c0c5fff0 c0ad6ac0 c0c600c0 f818c031 82000000 00000000 ffffffff
+[    0.000000] GPR08: 00000000 f1f1f1f1 c0013c2c c0013304 99033955 00400008 00000000 07ff9598
+[    0.000000] GPR16: 00000000 07ffb94c 00000000 00000000 00000000 00000000 00000000 f818cfb2
+[    0.000000] GPR24: 00000000 00000000 00001000 ffffffff 00000000 c07dbf80 00000000 f818c000
+[    0.000000] NIP [c0013c7c] do_page_fault+0x50/0x904
+[    0.000000] LR [c0013310] handle_page_fault+0xc/0x38
+[    0.000000] Call Trace:
+[    0.000000] Instruction dump:
+[    0.000000] be010080 91410014 553fe8fe 3d40c001 3d20f1f1 7d800026 394a3c2c 3fffe000
+[    0.000000] 6129f1f1 900100c4 9181007c 91410018 <913f0000> 3d2001f4 6129f4f4 913f0004
+
+Don't map the early shadow page read-only yet when creating the new
+page tables for the real shadow memory, otherwise the memblock
+allocations that immediately follows to create the real shadow pages
+that are about to replace the early shadow page trigger a page fault
+if they fall into the region being worked on at the moment.
+
+Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
+Fixes: 2edb16efc899 ("powerpc/32: Add KASAN support")
+Cc: stable@vger.kernel.org
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Link: https://lore.kernel.org/r/fe86886fb8db44360417cee0dc515ad47ca6ef72.1566382750.git.christophe.leroy@c-s.fr
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/powerpc/mm/kasan/kasan_init_32.c |   15 ++++++++++++++-
+ 1 file changed, 14 insertions(+), 1 deletion(-)
+
+--- a/arch/powerpc/mm/kasan/kasan_init_32.c
++++ b/arch/powerpc/mm/kasan/kasan_init_32.c
+@@ -34,7 +34,7 @@ static int __ref kasan_init_shadow_page_
+ {
+       pmd_t *pmd;
+       unsigned long k_cur, k_next;
+-      pgprot_t prot = kasan_prot_ro();
++      pgprot_t prot = slab_is_available() ? kasan_prot_ro() : PAGE_KERNEL;
+ 
+       pmd = pmd_offset(pud_offset(pgd_offset_k(k_start), k_start), k_start);
+ 
+@@ -110,9 +110,22 @@ static int __ref kasan_init_region(void
+ static void __init kasan_remap_early_shadow_ro(void)
+ {
+       pgprot_t prot = kasan_prot_ro();
++      unsigned long k_start = KASAN_SHADOW_START;
++      unsigned long k_end = KASAN_SHADOW_END;
++      unsigned long k_cur;
++      phys_addr_t pa = __pa(kasan_early_shadow_page);
+ 
+       kasan_populate_pte(kasan_early_shadow_pte, prot);
+ 
++      for (k_cur = k_start & PAGE_MASK; k_cur < k_end; k_cur += PAGE_SIZE) {
++              pmd_t *pmd = pmd_offset(pud_offset(pgd_offset_k(k_cur), k_cur), k_cur);
++              pte_t *ptep = pte_offset_kernel(pmd, k_cur);
++
++              if ((pte_val(*ptep) & PTE_RPN_MASK) != pa)
++                      continue;
++
++              __set_pte_at(&init_mm, k_cur, ptep, pfn_pte(PHYS_PFN(pa), prot), 0);
++      }
+       flush_tlb_kernel_range(KASAN_SHADOW_START, KASAN_SHADOW_END);
+ }
+ 
diff --git a/queue-5.3/powerpc-mm-fixup-tlbie-vs-mtpidr-mtlpidr-ordering-issue-on-power9.patch b/queue-5.3/powerpc-mm-fixup-tlbie-vs-mtpidr-mtlpidr-ordering-issue-on-power9.patch

new file mode 100644 (file)

index 0000000..5419496
--- /dev/null
+++ b/queue-5.3/powerpc-mm-fixup-tlbie-vs-mtpidr-mtlpidr-ordering-issue-on-power9.patch
@@ -0,0 +1,347 @@
+From 047e6575aec71d75b765c22111820c4776cd1c43 Mon Sep 17 00:00:00 2001
+From: "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com>
+Date: Tue, 24 Sep 2019 09:22:53 +0530
+Subject: powerpc/mm: Fixup tlbie vs mtpidr/mtlpidr ordering issue on POWER9
+
+From: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
+
+commit 047e6575aec71d75b765c22111820c4776cd1c43 upstream.
+
+On POWER9, under some circumstances, a broadcast TLB invalidation will
+fail to invalidate the ERAT cache on some threads when there are
+parallel mtpidr/mtlpidr happening on other threads of the same core.
+This can cause stores to continue to go to a page after it's unmapped.
+
+The workaround is to force an ERAT flush using PID=0 or LPID=0 tlbie
+flush. This additional TLB flush will cause the ERAT cache
+invalidation. Since we are using PID=0 or LPID=0, we don't get
+filtered out by the TLB snoop filtering logic.
+
+We need to still follow this up with another tlbie to take care of
+store vs tlbie ordering issue explained in commit:
+a5d4b5891c2f ("powerpc/mm: Fixup tlbie vs store ordering issue on
+POWER9"). The presence of ERAT cache implies we can still get new
+stores and they may miss store queue marking flush.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Link: https://lore.kernel.org/r/20190924035254.24612-3-aneesh.kumar@linux.ibm.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/powerpc/include/asm/cputable.h    |    3 -
+ arch/powerpc/kernel/dt_cpu_ftrs.c      |    2 
+ arch/powerpc/kvm/book3s_hv_rm_mmu.c    |   42 +++++++++++++----
+ arch/powerpc/mm/book3s64/hash_native.c |   29 ++++++++++-
+ arch/powerpc/mm/book3s64/radix_tlb.c   |   80 +++++++++++++++++++++++++++++----
+ 5 files changed, 134 insertions(+), 22 deletions(-)
+
+--- a/arch/powerpc/include/asm/cputable.h
++++ b/arch/powerpc/include/asm/cputable.h
+@@ -215,6 +215,7 @@ static inline void cpu_feature_keys_init
+ #define CPU_FTR_P9_TM_XER_SO_BUG      LONG_ASM_CONST(0x0000200000000000)
+ #define CPU_FTR_P9_TLBIE_STQ_BUG      LONG_ASM_CONST(0x0000400000000000)
+ #define CPU_FTR_P9_TIDR                       LONG_ASM_CONST(0x0000800000000000)
++#define CPU_FTR_P9_TLBIE_ERAT_BUG     LONG_ASM_CONST(0x0001000000000000)
+ 
+ #ifndef __ASSEMBLY__
+ 
+@@ -461,7 +462,7 @@ static inline void cpu_feature_keys_init
+           CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY | \
+           CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_ARCH_207S | \
+           CPU_FTR_TM_COMP | CPU_FTR_ARCH_300 | CPU_FTR_PKEY | \
+-          CPU_FTR_P9_TLBIE_STQ_BUG | CPU_FTR_P9_TIDR)
++          CPU_FTR_P9_TLBIE_STQ_BUG | CPU_FTR_P9_TLBIE_ERAT_BUG | CPU_FTR_P9_TIDR)
+ #define CPU_FTRS_POWER9_DD2_0 CPU_FTRS_POWER9
+ #define CPU_FTRS_POWER9_DD2_1 (CPU_FTRS_POWER9 | CPU_FTR_POWER9_DD2_1)
+ #define CPU_FTRS_POWER9_DD2_2 (CPU_FTRS_POWER9 | CPU_FTR_POWER9_DD2_1 | \
+--- a/arch/powerpc/kernel/dt_cpu_ftrs.c
++++ b/arch/powerpc/kernel/dt_cpu_ftrs.c
+@@ -714,6 +714,8 @@ static __init void update_tlbie_feature_
+                       WARN_ONCE(1, "Unknown PVR");
+                       cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_STQ_BUG;
+               }
++
++              cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_ERAT_BUG;
+       }
+ }
+ 
+--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
++++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+@@ -433,6 +433,37 @@ static inline int is_mmio_hpte(unsigned
+               (HPTE_R_KEY_HI | HPTE_R_KEY_LO));
+ }
+ 
++static inline void fixup_tlbie_lpid(unsigned long rb_value, unsigned long lpid)
++{
++
++      if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
++              /* Radix flush for a hash guest */
++
++              unsigned long rb,rs,prs,r,ric;
++
++              rb = PPC_BIT(52); /* IS = 2 */
++              rs = 0;  /* lpid = 0 */
++              prs = 0; /* partition scoped */
++              r = 1;   /* radix format */
++              ric = 0; /* RIC_FLSUH_TLB */
++
++              /*
++               * Need the extra ptesync to make sure we don't
++               * re-order the tlbie
++               */
++              asm volatile("ptesync": : :"memory");
++              asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
++                           : : "r"(rb), "i"(r), "i"(prs),
++                             "i"(ric), "r"(rs) : "memory");
++      }
++
++      if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
++              asm volatile("ptesync": : :"memory");
++              asm volatile(PPC_TLBIE_5(%0,%1,0,0,0) : :
++                           "r" (rb_value), "r" (lpid));
++      }
++}
++
+ static void do_tlbies(struct kvm *kvm, unsigned long *rbvalues,
+                     long npages, int global, bool need_sync)
+ {
+@@ -451,16 +482,7 @@ static void do_tlbies(struct kvm *kvm, u
+                                    "r" (rbvalues[i]), "r" (kvm->arch.lpid));
+               }
+ 
+-              if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
+-                      /*
+-                       * Need the extra ptesync to make sure we don't
+-                       * re-order the tlbie
+-                       */
+-                      asm volatile("ptesync": : :"memory");
+-                      asm volatile(PPC_TLBIE_5(%0,%1,0,0,0) : :
+-                                   "r" (rbvalues[0]), "r" (kvm->arch.lpid));
+-              }
+-
++              fixup_tlbie_lpid(rbvalues[i - 1], kvm->arch.lpid);
+               asm volatile("eieio; tlbsync; ptesync" : : : "memory");
+       } else {
+               if (need_sync)
+--- a/arch/powerpc/mm/book3s64/hash_native.c
++++ b/arch/powerpc/mm/book3s64/hash_native.c
+@@ -197,8 +197,31 @@ static inline unsigned long  ___tlbie(un
+       return va;
+ }
+ 
+-static inline void fixup_tlbie(unsigned long vpn, int psize, int apsize, int ssize)
++static inline void fixup_tlbie_vpn(unsigned long vpn, int psize,
++                                 int apsize, int ssize)
+ {
++      if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
++              /* Radix flush for a hash guest */
++
++              unsigned long rb,rs,prs,r,ric;
++
++              rb = PPC_BIT(52); /* IS = 2 */
++              rs = 0;  /* lpid = 0 */
++              prs = 0; /* partition scoped */
++              r = 1;   /* radix format */
++              ric = 0; /* RIC_FLSUH_TLB */
++
++              /*
++               * Need the extra ptesync to make sure we don't
++               * re-order the tlbie
++               */
++              asm volatile("ptesync": : :"memory");
++              asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
++                           : : "r"(rb), "i"(r), "i"(prs),
++                             "i"(ric), "r"(rs) : "memory");
++      }
++
++
+       if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
+               /* Need the extra ptesync to ensure we don't reorder tlbie*/
+               asm volatile("ptesync": : :"memory");
+@@ -283,7 +306,7 @@ static inline void tlbie(unsigned long v
+               asm volatile("ptesync": : :"memory");
+       } else {
+               __tlbie(vpn, psize, apsize, ssize);
+-              fixup_tlbie(vpn, psize, apsize, ssize);
++              fixup_tlbie_vpn(vpn, psize, apsize, ssize);
+               asm volatile("eieio; tlbsync; ptesync": : :"memory");
+       }
+       if (lock_tlbie && !use_local)
+@@ -856,7 +879,7 @@ static void native_flush_hash_range(unsi
+               /*
+                * Just do one more with the last used values.
+                */
+-              fixup_tlbie(vpn, psize, psize, ssize);
++              fixup_tlbie_vpn(vpn, psize, psize, ssize);
+               asm volatile("eieio; tlbsync; ptesync":::"memory");
+ 
+               if (lock_tlbie)
+--- a/arch/powerpc/mm/book3s64/radix_tlb.c
++++ b/arch/powerpc/mm/book3s64/radix_tlb.c
+@@ -211,21 +211,82 @@ static __always_inline void __tlbie_lpid
+       trace_tlbie(lpid, 0, rb, rs, ric, prs, r);
+ }
+ 
+-static inline void fixup_tlbie(void)
++
++static inline void fixup_tlbie_va(unsigned long va, unsigned long pid,
++                                unsigned long ap)
+ {
+-      unsigned long pid = 0;
++      if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
++              asm volatile("ptesync": : :"memory");
++              __tlbie_va(va, 0, ap, RIC_FLUSH_TLB);
++      }
++
++      if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
++              asm volatile("ptesync": : :"memory");
++              __tlbie_va(va, pid, ap, RIC_FLUSH_TLB);
++      }
++}
++
++static inline void fixup_tlbie_va_range(unsigned long va, unsigned long pid,
++                                      unsigned long ap)
++{
++      if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
++              asm volatile("ptesync": : :"memory");
++              __tlbie_pid(0, RIC_FLUSH_TLB);
++      }
++
++      if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
++              asm volatile("ptesync": : :"memory");
++              __tlbie_va(va, pid, ap, RIC_FLUSH_TLB);
++      }
++}
++
++static inline void fixup_tlbie_pid(unsigned long pid)
++{
++      /*
++       * We can use any address for the invalidation, pick one which is
++       * probably unused as an optimisation.
++       */
+       unsigned long va = ((1UL << 52) - 1);
+ 
++      if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
++              asm volatile("ptesync": : :"memory");
++              __tlbie_pid(0, RIC_FLUSH_TLB);
++      }
++
+       if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
+               asm volatile("ptesync": : :"memory");
+               __tlbie_va(va, pid, mmu_get_ap(MMU_PAGE_64K), RIC_FLUSH_TLB);
+       }
+ }
+ 
++
++static inline void fixup_tlbie_lpid_va(unsigned long va, unsigned long lpid,
++                                     unsigned long ap)
++{
++      if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
++              asm volatile("ptesync": : :"memory");
++              __tlbie_lpid_va(va, 0, ap, RIC_FLUSH_TLB);
++      }
++
++      if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
++              asm volatile("ptesync": : :"memory");
++              __tlbie_lpid_va(va, lpid, ap, RIC_FLUSH_TLB);
++      }
++}
++
+ static inline void fixup_tlbie_lpid(unsigned long lpid)
+ {
++      /*
++       * We can use any address for the invalidation, pick one which is
++       * probably unused as an optimisation.
++       */
+       unsigned long va = ((1UL << 52) - 1);
+ 
++      if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
++              asm volatile("ptesync": : :"memory");
++              __tlbie_lpid(0, RIC_FLUSH_TLB);
++      }
++
+       if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
+               asm volatile("ptesync": : :"memory");
+               __tlbie_lpid_va(va, lpid, mmu_get_ap(MMU_PAGE_64K), RIC_FLUSH_TLB);
+@@ -273,6 +334,7 @@ static inline void _tlbie_pid(unsigned l
+       switch (ric) {
+       case RIC_FLUSH_TLB:
+               __tlbie_pid(pid, RIC_FLUSH_TLB);
++              fixup_tlbie_pid(pid);
+               break;
+       case RIC_FLUSH_PWC:
+               __tlbie_pid(pid, RIC_FLUSH_PWC);
+@@ -280,8 +342,8 @@ static inline void _tlbie_pid(unsigned l
+       case RIC_FLUSH_ALL:
+       default:
+               __tlbie_pid(pid, RIC_FLUSH_ALL);
++              fixup_tlbie_pid(pid);
+       }
+-      fixup_tlbie();
+       asm volatile("eieio; tlbsync; ptesync": : :"memory");
+ }
+ 
+@@ -325,6 +387,7 @@ static inline void _tlbie_lpid(unsigned
+       switch (ric) {
+       case RIC_FLUSH_TLB:
+               __tlbie_lpid(lpid, RIC_FLUSH_TLB);
++              fixup_tlbie_lpid(lpid);
+               break;
+       case RIC_FLUSH_PWC:
+               __tlbie_lpid(lpid, RIC_FLUSH_PWC);
+@@ -332,8 +395,8 @@ static inline void _tlbie_lpid(unsigned
+       case RIC_FLUSH_ALL:
+       default:
+               __tlbie_lpid(lpid, RIC_FLUSH_ALL);
++              fixup_tlbie_lpid(lpid);
+       }
+-      fixup_tlbie_lpid(lpid);
+       asm volatile("eieio; tlbsync; ptesync": : :"memory");
+ }
+ 
+@@ -407,6 +470,8 @@ static inline void __tlbie_va_range(unsi
+ 
+       for (addr = start; addr < end; addr += page_size)
+               __tlbie_va(addr, pid, ap, RIC_FLUSH_TLB);
++
++      fixup_tlbie_va_range(addr - page_size, pid, ap);
+ }
+ 
+ static __always_inline void _tlbie_va(unsigned long va, unsigned long pid,
+@@ -416,7 +481,7 @@ static __always_inline void _tlbie_va(un
+ 
+       asm volatile("ptesync": : :"memory");
+       __tlbie_va(va, pid, ap, ric);
+-      fixup_tlbie();
++      fixup_tlbie_va(va, pid, ap);
+       asm volatile("eieio; tlbsync; ptesync": : :"memory");
+ }
+ 
+@@ -427,7 +492,7 @@ static __always_inline void _tlbie_lpid_
+ 
+       asm volatile("ptesync": : :"memory");
+       __tlbie_lpid_va(va, lpid, ap, ric);
+-      fixup_tlbie_lpid(lpid);
++      fixup_tlbie_lpid_va(va, lpid, ap);
+       asm volatile("eieio; tlbsync; ptesync": : :"memory");
+ }
+ 
+@@ -439,7 +504,6 @@ static inline void _tlbie_va_range(unsig
+       if (also_pwc)
+               __tlbie_pid(pid, RIC_FLUSH_PWC);
+       __tlbie_va_range(start, end, pid, page_size, psize);
+-      fixup_tlbie();
+       asm volatile("eieio; tlbsync; ptesync": : :"memory");
+ }
+ 
+@@ -775,7 +839,7 @@ is_local:
+                       if (gflush)
+                               __tlbie_va_range(gstart, gend, pid,
+                                               PUD_SIZE, MMU_PAGE_1G);
+-                      fixup_tlbie();
++
+                       asm volatile("eieio; tlbsync; ptesync": : :"memory");
+               }
+       }
diff --git a/queue-5.3/powerpc-powernv-ioda-fix-race-in-tce-level-allocation.patch b/queue-5.3/powerpc-powernv-ioda-fix-race-in-tce-level-allocation.patch

new file mode 100644 (file)

index 0000000..a1b441f
--- /dev/null
+++ b/queue-5.3/powerpc-powernv-ioda-fix-race-in-tce-level-allocation.patch
@@ -0,0 +1,72 @@
+From 56090a3902c80c296e822d11acdb6a101b322c52 Mon Sep 17 00:00:00 2001
+From: Alexey Kardashevskiy <aik@ozlabs.ru>
+Date: Thu, 18 Jul 2019 15:11:36 +1000
+Subject: powerpc/powernv/ioda: Fix race in TCE level allocation
+
+From: Alexey Kardashevskiy <aik@ozlabs.ru>
+
+commit 56090a3902c80c296e822d11acdb6a101b322c52 upstream.
+
+pnv_tce() returns a pointer to a TCE entry and originally a TCE table
+would be pre-allocated. For the default case of 2GB window the table
+needs only a single level and that is fine. However if more levels are
+requested, it is possible to get a race when 2 threads want a pointer
+to a TCE entry from the same page of TCEs.
+
+This adds cmpxchg to handle the race. Note that once TCE is non-zero,
+it cannot become zero again.
+
+Fixes: a68bd1267b72 ("powerpc/powernv/ioda: Allocate indirect TCE levels on demand")
+CC: stable@vger.kernel.org # v4.19+
+Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Link: https://lore.kernel.org/r/20190718051139.74787-2-aik@ozlabs.ru
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/powerpc/platforms/powernv/pci-ioda-tce.c |   18 +++++++++++++-----
+ 1 file changed, 13 insertions(+), 5 deletions(-)
+
+--- a/arch/powerpc/platforms/powernv/pci-ioda-tce.c
++++ b/arch/powerpc/platforms/powernv/pci-ioda-tce.c
+@@ -49,6 +49,9 @@ static __be64 *pnv_alloc_tce_level(int n
+       return addr;
+ }
+ 
++static void pnv_pci_ioda2_table_do_free_pages(__be64 *addr,
++              unsigned long size, unsigned int levels);
++
+ static __be64 *pnv_tce(struct iommu_table *tbl, bool user, long idx, bool alloc)
+ {
+       __be64 *tmp = user ? tbl->it_userspace : (__be64 *) tbl->it_base;
+@@ -58,9 +61,9 @@ static __be64 *pnv_tce(struct iommu_tabl
+ 
+       while (level) {
+               int n = (idx & mask) >> (level * shift);
+-              unsigned long tce;
++              unsigned long oldtce, tce = be64_to_cpu(READ_ONCE(tmp[n]));
+ 
+-              if (tmp[n] == 0) {
++              if (!tce) {
+                       __be64 *tmp2;
+ 
+                       if (!alloc)
+@@ -71,10 +74,15 @@ static __be64 *pnv_tce(struct iommu_tabl
+                       if (!tmp2)
+                               return NULL;
+ 
+-                      tmp[n] = cpu_to_be64(__pa(tmp2) |
+-                                      TCE_PCI_READ | TCE_PCI_WRITE);
++                      tce = __pa(tmp2) | TCE_PCI_READ | TCE_PCI_WRITE;
++                      oldtce = be64_to_cpu(cmpxchg(&tmp[n], 0,
++                                      cpu_to_be64(tce)));
++                      if (oldtce) {
++                              pnv_pci_ioda2_table_do_free_pages(tmp2,
++                                      ilog2(tbl->it_level_size) + 3, 1);
++                              tce = oldtce;
++                      }
+               }
+-              tce = be64_to_cpu(tmp[n]);
+ 
+               tmp = __va(tce & ~(TCE_PCI_READ | TCE_PCI_WRITE));
+               idx &= ~mask;
diff --git a/queue-5.3/powerpc-powernv-restrict-opal-symbol-map-to-only-be-readable-by-root.patch b/queue-5.3/powerpc-powernv-restrict-opal-symbol-map-to-only-be-readable-by-root.patch

new file mode 100644 (file)

index 0000000..81133a2
--- /dev/null
+++ b/queue-5.3/powerpc-powernv-restrict-opal-symbol-map-to-only-be-readable-by-root.patch
@@ -0,0 +1,54 @@
+From e7de4f7b64c23e503a8c42af98d56f2a7462bd6d Mon Sep 17 00:00:00 2001
+From: Andrew Donnellan <ajd@linux.ibm.com>
+Date: Fri, 3 May 2019 17:52:53 +1000
+Subject: powerpc/powernv: Restrict OPAL symbol map to only be readable by root
+
+From: Andrew Donnellan <ajd@linux.ibm.com>
+
+commit e7de4f7b64c23e503a8c42af98d56f2a7462bd6d upstream.
+
+Currently the OPAL symbol map is globally readable, which seems bad as
+it contains physical addresses.
+
+Restrict it to root.
+
+Fixes: c8742f85125d ("powerpc/powernv: Expose OPAL firmware symbol map")
+Cc: stable@vger.kernel.org # v3.19+
+Suggested-by: Michael Ellerman <mpe@ellerman.id.au>
+Signed-off-by: Andrew Donnellan <ajd@linux.ibm.com>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Link: https://lore.kernel.org/r/20190503075253.22798-1-ajd@linux.ibm.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/powerpc/platforms/powernv/opal.c |   11 +++++++----
+ 1 file changed, 7 insertions(+), 4 deletions(-)
+
+--- a/arch/powerpc/platforms/powernv/opal.c
++++ b/arch/powerpc/platforms/powernv/opal.c
+@@ -705,7 +705,10 @@ static ssize_t symbol_map_read(struct fi
+                                      bin_attr->size);
+ }
+ 
+-static BIN_ATTR_RO(symbol_map, 0);
++static struct bin_attribute symbol_map_attr = {
++      .attr = {.name = "symbol_map", .mode = 0400},
++      .read = symbol_map_read
++};
+ 
+ static void opal_export_symmap(void)
+ {
+@@ -722,10 +725,10 @@ static void opal_export_symmap(void)
+               return;
+ 
+       /* Setup attributes */
+-      bin_attr_symbol_map.private = __va(be64_to_cpu(syms[0]));
+-      bin_attr_symbol_map.size = be64_to_cpu(syms[1]);
++      symbol_map_attr.private = __va(be64_to_cpu(syms[0]));
++      symbol_map_attr.size = be64_to_cpu(syms[1]);
+ 
+-      rc = sysfs_create_bin_file(opal_kobj, &bin_attr_symbol_map);
++      rc = sysfs_create_bin_file(opal_kobj, &symbol_map_attr);
+       if (rc)
+               pr_warn("Error %d creating OPAL symbols file\n", rc);
+ }
diff --git a/queue-5.3/powerpc-pseries-fix-cpu_hotplug_lock-acquisition-in-resize_hpt.patch b/queue-5.3/powerpc-pseries-fix-cpu_hotplug_lock-acquisition-in-resize_hpt.patch

new file mode 100644 (file)

index 0000000..caa3b53
--- /dev/null
+++ b/queue-5.3/powerpc-pseries-fix-cpu_hotplug_lock-acquisition-in-resize_hpt.patch
@@ -0,0 +1,170 @@
+From c784be435d5dae28d3b03db31753dd7a18733f0c Mon Sep 17 00:00:00 2001
+From: "Gautham R. Shenoy" <ego@linux.vnet.ibm.com>
+Date: Wed, 15 May 2019 13:15:52 +0530
+Subject: powerpc/pseries: Fix cpu_hotplug_lock acquisition in resize_hpt()
+
+From: Gautham R. Shenoy <ego@linux.vnet.ibm.com>
+
+commit c784be435d5dae28d3b03db31753dd7a18733f0c upstream.
+
+The calls to arch_add_memory()/arch_remove_memory() are always made
+with the read-side cpu_hotplug_lock acquired via memory_hotplug_begin().
+On pSeries, arch_add_memory()/arch_remove_memory() eventually call
+resize_hpt() which in turn calls stop_machine() which acquires the
+read-side cpu_hotplug_lock again, thereby resulting in the recursive
+acquisition of this lock.
+
+In the absence of CONFIG_PROVE_LOCKING, we hadn't observed a system
+lockup during a memory hotplug operation because cpus_read_lock() is a
+per-cpu rwsem read, which, in the fast-path (in the absence of the
+writer, which in our case is a CPU-hotplug operation) simply
+increments the read_count on the semaphore. Thus a recursive read in
+the fast-path doesn't cause any problems.
+
+However, we can hit this problem in practice if there is a concurrent
+CPU-Hotplug operation in progress which is waiting to acquire the
+write-side of the lock. This will cause the second recursive read to
+block until the writer finishes. While the writer is blocked since the
+first read holds the lock. Thus both the reader as well as the writers
+fail to make any progress thereby blocking both CPU-Hotplug as well as
+Memory Hotplug operations.
+
+Memory-Hotplug                         CPU-Hotplug
+CPU 0                                  CPU 1
+------                                  ------
+
+1. down_read(cpu_hotplug_lock.rw_sem)
+   [memory_hotplug_begin]
+                                       2. down_write(cpu_hotplug_lock.rw_sem)
+                                       [cpu_up/cpu_down]
+3. down_read(cpu_hotplug_lock.rw_sem)
+   [stop_machine()]
+
+Lockdep complains as follows in these code-paths.
+
+ swapper/0/1 is trying to acquire lock:
+ (____ptrval____) (cpu_hotplug_lock.rw_sem){++++}, at: stop_machine+0x2c/0x60
+
+but task is already holding lock:
+(____ptrval____) (cpu_hotplug_lock.rw_sem){++++}, at: mem_hotplug_begin+0x20/0x50
+
+ other info that might help us debug this:
+  Possible unsafe locking scenario:
+
+        CPU0
+        ----
+   lock(cpu_hotplug_lock.rw_sem);
+   lock(cpu_hotplug_lock.rw_sem);
+
+  *** DEADLOCK ***
+
+  May be due to missing lock nesting notation
+
+ 3 locks held by swapper/0/1:
+  #0: (____ptrval____) (&dev->mutex){....}, at: __driver_attach+0x12c/0x1b0
+  #1: (____ptrval____) (cpu_hotplug_lock.rw_sem){++++}, at: mem_hotplug_begin+0x20/0x50
+  #2: (____ptrval____) (mem_hotplug_lock.rw_sem){++++}, at: percpu_down_write+0x54/0x1a0
+
+stack backtrace:
+ CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.0.0-rc5-58373-gbc99402235f3-dirty #166
+ Call Trace:
+   dump_stack+0xe8/0x164 (unreliable)
+   __lock_acquire+0x1110/0x1c70
+   lock_acquire+0x240/0x290
+   cpus_read_lock+0x64/0xf0
+   stop_machine+0x2c/0x60
+   pseries_lpar_resize_hpt+0x19c/0x2c0
+   resize_hpt_for_hotplug+0x70/0xd0
+   arch_add_memory+0x58/0xfc
+   devm_memremap_pages+0x5e8/0x8f0
+   pmem_attach_disk+0x764/0x830
+   nvdimm_bus_probe+0x118/0x240
+   really_probe+0x230/0x4b0
+   driver_probe_device+0x16c/0x1e0
+   __driver_attach+0x148/0x1b0
+   bus_for_each_dev+0x90/0x130
+   driver_attach+0x34/0x50
+   bus_add_driver+0x1a8/0x360
+   driver_register+0x108/0x170
+   __nd_driver_register+0xd0/0xf0
+   nd_pmem_driver_init+0x34/0x48
+   do_one_initcall+0x1e0/0x45c
+   kernel_init_freeable+0x540/0x64c
+   kernel_init+0x2c/0x160
+   ret_from_kernel_thread+0x5c/0x68
+
+Fix this issue by
+  1) Requiring all the calls to pseries_lpar_resize_hpt() be made
+     with cpu_hotplug_lock held.
+
+  2) In pseries_lpar_resize_hpt() invoke stop_machine_cpuslocked()
+     as a consequence of 1)
+
+  3) To satisfy 1), in hpt_order_set(), call mmu_hash_ops.resize_hpt()
+     with cpu_hotplug_lock held.
+
+Fixes: dbcf929c0062 ("powerpc/pseries: Add support for hash table resizing")
+Cc: stable@vger.kernel.org # v4.11+
+Reported-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
+Signed-off-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Link: https://lore.kernel.org/r/1557906352-29048-1-git-send-email-ego@linux.vnet.ibm.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/powerpc/mm/book3s64/hash_utils.c |    9 ++++++++-
+ arch/powerpc/platforms/pseries/lpar.c |    8 ++++++--
+ 2 files changed, 14 insertions(+), 3 deletions(-)
+
+--- a/arch/powerpc/mm/book3s64/hash_utils.c
++++ b/arch/powerpc/mm/book3s64/hash_utils.c
+@@ -34,6 +34,7 @@
+ #include <linux/libfdt.h>
+ #include <linux/pkeys.h>
+ #include <linux/hugetlb.h>
++#include <linux/cpu.h>
+ 
+ #include <asm/debugfs.h>
+ #include <asm/processor.h>
+@@ -1931,10 +1932,16 @@ static int hpt_order_get(void *data, u64
+ 
+ static int hpt_order_set(void *data, u64 val)
+ {
++      int ret;
++
+       if (!mmu_hash_ops.resize_hpt)
+               return -ENODEV;
+ 
+-      return mmu_hash_ops.resize_hpt(val);
++      cpus_read_lock();
++      ret = mmu_hash_ops.resize_hpt(val);
++      cpus_read_unlock();
++
++      return ret;
+ }
+ 
+ DEFINE_DEBUGFS_ATTRIBUTE(fops_hpt_order, hpt_order_get, hpt_order_set, "%llu\n");
+--- a/arch/powerpc/platforms/pseries/lpar.c
++++ b/arch/powerpc/platforms/pseries/lpar.c
+@@ -1413,7 +1413,10 @@ static int pseries_lpar_resize_hpt_commi
+       return 0;
+ }
+ 
+-/* Must be called in user context */
++/*
++ * Must be called in process context. The caller must hold the
++ * cpus_lock.
++ */
+ static int pseries_lpar_resize_hpt(unsigned long shift)
+ {
+       struct hpt_resize_state state = {
+@@ -1467,7 +1470,8 @@ static int pseries_lpar_resize_hpt(unsig
+ 
+       t1 = ktime_get();
+ 
+-      rc = stop_machine(pseries_lpar_resize_hpt_commit, &state, NULL);
++      rc = stop_machine_cpuslocked(pseries_lpar_resize_hpt_commit,
++                                   &state, NULL);
+ 
+       t2 = ktime_get();
+ 
diff --git a/queue-5.3/powerpc-ptdump-fix-addresses-display-on-ppc32.patch b/queue-5.3/powerpc-ptdump-fix-addresses-display-on-ppc32.patch

new file mode 100644 (file)

index 0000000..5eec2c7
--- /dev/null
+++ b/queue-5.3/powerpc-ptdump-fix-addresses-display-on-ppc32.patch
@@ -0,0 +1,38 @@
+From 7c7a532ba3fc51bf9527d191fb410786c1fdc73c Mon Sep 17 00:00:00 2001
+From: Christophe Leroy <christophe.leroy@c-s.fr>
+Date: Wed, 14 Aug 2019 12:36:09 +0000
+Subject: powerpc/ptdump: Fix addresses display on PPC32
+
+From: Christophe Leroy <christophe.leroy@c-s.fr>
+
+commit 7c7a532ba3fc51bf9527d191fb410786c1fdc73c upstream.
+
+Commit 453d87f6a8ae ("powerpc/mm: Warn if W+X pages found on boot")
+wrongly changed KERN_VIRT_START from 0 to PAGE_OFFSET, leading to a
+shift in the displayed addresses.
+
+Lets revert that change to resync walk_pagetables()'s addr val and
+pgd_t pointer for PPC32.
+
+Fixes: 453d87f6a8ae ("powerpc/mm: Warn if W+X pages found on boot")
+Cc: stable@vger.kernel.org # v5.2+
+Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Link: https://lore.kernel.org/r/eb4d626514e22f85814830012642329018ef6af9.1565786091.git.christophe.leroy@c-s.fr
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/powerpc/mm/ptdump/ptdump.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/powerpc/mm/ptdump/ptdump.c
++++ b/arch/powerpc/mm/ptdump/ptdump.c
+@@ -27,7 +27,7 @@
+ #include "ptdump.h"
+ 
+ #ifdef CONFIG_PPC32
+-#define KERN_VIRT_START       PAGE_OFFSET
++#define KERN_VIRT_START       0
+ #endif
+ 
+ /*
diff --git a/queue-5.3/powerpc-xive-implement-get_irqchip_state-method-for-xive-to-fix-shutdown-race.patch b/queue-5.3/powerpc-xive-implement-get_irqchip_state-method-for-xive-to-fix-shutdown-race.patch

new file mode 100644 (file)

index 0000000..b521d00
--- /dev/null
+++ b/queue-5.3/powerpc-xive-implement-get_irqchip_state-method-for-xive-to-fix-shutdown-race.patch
@@ -0,0 +1,390 @@
+From da15c03b047dca891d37b9f4ef9ca14d84a6484f Mon Sep 17 00:00:00 2001
+From: Paul Mackerras <paulus@ozlabs.org>
+Date: Tue, 13 Aug 2019 20:06:48 +1000
+Subject: powerpc/xive: Implement get_irqchip_state method for XIVE to fix shutdown race
+
+From: Paul Mackerras <paulus@ozlabs.org>
+
+commit da15c03b047dca891d37b9f4ef9ca14d84a6484f upstream.
+
+Testing has revealed the existence of a race condition where a XIVE
+interrupt being shut down can be in one of the XIVE interrupt queues
+(of which there are up to 8 per CPU, one for each priority) at the
+point where free_irq() is called.  If this happens, can return an
+interrupt number which has been shut down.  This can lead to various
+symptoms:
+
+- irq_to_desc(irq) can be NULL.  In this case, no end-of-interrupt
+  function gets called, resulting in the CPU's elevated interrupt
+  priority (numerically lowered CPPR) never gets reset.  That then
+  means that the CPU stops processing interrupts, causing device
+  timeouts and other errors in various device drivers.
+
+- The irq descriptor or related data structures can be in the process
+  of being freed as the interrupt code is using them.  This typically
+  leads to crashes due to bad pointer dereferences.
+
+This race is basically what commit 62e0468650c3 ("genirq: Add optional
+hardware synchronization for shutdown", 2019-06-28) is intended to
+fix, given a get_irqchip_state() method for the interrupt controller
+being used.  It works by polling the interrupt controller when an
+interrupt is being freed until the controller says it is not pending.
+
+With XIVE, the PQ bits of the interrupt source indicate the state of
+the interrupt source, and in particular the P bit goes from 0 to 1 at
+the point where the hardware writes an entry into the interrupt queue
+that this interrupt is directed towards.  Normally, the code will then
+process the interrupt and do an end-of-interrupt (EOI) operation which
+will reset PQ to 00 (assuming another interrupt hasn't been generated
+in the meantime).  However, there are situations where the code resets
+P even though a queue entry exists (for example, by setting PQ to 01,
+which disables the interrupt source), and also situations where the
+code leaves P at 1 after removing the queue entry (for example, this
+is done for escalation interrupts so they cannot fire again until
+they are explicitly re-enabled).
+
+The code already has a 'saved_p' flag for the interrupt source which
+indicates that a queue entry exists, although it isn't maintained
+consistently.  This patch adds a 'stale_p' flag to indicate that
+P has been left at 1 after processing a queue entry, and adds code
+to set and clear saved_p and stale_p as necessary to maintain a
+consistent indication of whether a queue entry may or may not exist.
+
+With this, we can implement xive_get_irqchip_state() by looking at
+stale_p, saved_p and the ESB PQ bits for the interrupt.
+
+There is some additional code to handle escalation interrupts
+properly; because they are enabled and disabled in KVM assembly code,
+which does not have access to the xive_irq_data struct for the
+escalation interrupt.  Hence, stale_p may be incorrect when the
+escalation interrupt is freed in kvmppc_xive_{,native_}cleanup_vcpu().
+Fortunately, we can fix it up by looking at vcpu->arch.xive_esc_on,
+with some careful attention to barriers in order to ensure the correct
+result if xive_esc_irq() races with kvmppc_xive_cleanup_vcpu().
+
+Finally, this adds code to make noise on the console (pr_crit and
+WARN_ON(1)) if we find an interrupt queue entry for an interrupt
+which does not have a descriptor.  While this won't catch the race
+reliably, if it does get triggered it will be an indication that
+the race is occurring and needs to be debugged.
+
+Fixes: 243e25112d06 ("powerpc/xive: Native exploitation of the XIVE interrupt controller")
+Cc: stable@vger.kernel.org # v4.12+
+Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Link: https://lore.kernel.org/r/20190813100648.GE9567@blackberry
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/powerpc/include/asm/xive.h       |    8 +++
+ arch/powerpc/kvm/book3s_xive.c        |   31 ++++++++++++
+ arch/powerpc/kvm/book3s_xive.h        |    2 
+ arch/powerpc/kvm/book3s_xive_native.c |    3 +
+ arch/powerpc/sysdev/xive/common.c     |   87 +++++++++++++++++++++++++---------
+ 5 files changed, 108 insertions(+), 23 deletions(-)
+
+--- a/arch/powerpc/include/asm/xive.h
++++ b/arch/powerpc/include/asm/xive.h
+@@ -46,7 +46,15 @@ struct xive_irq_data {
+ 
+       /* Setup/used by frontend */
+       int target;
++      /*
++       * saved_p means that there is a queue entry for this interrupt
++       * in some CPU's queue (not including guest vcpu queues), even
++       * if P is not set in the source ESB.
++       * stale_p means that there is no queue entry for this interrupt
++       * in some CPU's queue, even if P is set in the source ESB.
++       */
+       bool saved_p;
++      bool stale_p;
+ };
+ #define XIVE_IRQ_FLAG_STORE_EOI       0x01
+ #define XIVE_IRQ_FLAG_LSI     0x02
+--- a/arch/powerpc/kvm/book3s_xive.c
++++ b/arch/powerpc/kvm/book3s_xive.c
+@@ -166,6 +166,9 @@ static irqreturn_t xive_esc_irq(int irq,
+        */
+       vcpu->arch.xive_esc_on = false;
+ 
++      /* This orders xive_esc_on = false vs. subsequent stale_p = true */
++      smp_wmb();      /* goes with smp_mb() in cleanup_single_escalation */
++
+       return IRQ_HANDLED;
+ }
+ 
+@@ -1119,6 +1122,31 @@ void kvmppc_xive_disable_vcpu_interrupts
+       vcpu->arch.xive_esc_raddr = 0;
+ }
+ 
++/*
++ * In single escalation mode, the escalation interrupt is marked so
++ * that EOI doesn't re-enable it, but just sets the stale_p flag to
++ * indicate that the P bit has already been dealt with.  However, the
++ * assembly code that enters the guest sets PQ to 00 without clearing
++ * stale_p (because it has no easy way to address it).  Hence we have
++ * to adjust stale_p before shutting down the interrupt.
++ */
++void xive_cleanup_single_escalation(struct kvm_vcpu *vcpu,
++                                  struct kvmppc_xive_vcpu *xc, int irq)
++{
++      struct irq_data *d = irq_get_irq_data(irq);
++      struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
++
++      /*
++       * This slightly odd sequence gives the right result
++       * (i.e. stale_p set if xive_esc_on is false) even if
++       * we race with xive_esc_irq() and xive_irq_eoi().
++       */
++      xd->stale_p = false;
++      smp_mb();               /* paired with smb_wmb in xive_esc_irq */
++      if (!vcpu->arch.xive_esc_on)
++              xd->stale_p = true;
++}
++
+ void kvmppc_xive_cleanup_vcpu(struct kvm_vcpu *vcpu)
+ {
+       struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+@@ -1143,6 +1171,9 @@ void kvmppc_xive_cleanup_vcpu(struct kvm
+       /* Free escalations */
+       for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
+               if (xc->esc_virq[i]) {
++                      if (xc->xive->single_escalation)
++                              xive_cleanup_single_escalation(vcpu, xc,
++                                                      xc->esc_virq[i]);
+                       free_irq(xc->esc_virq[i], vcpu);
+                       irq_dispose_mapping(xc->esc_virq[i]);
+                       kfree(xc->esc_virq_names[i]);
+--- a/arch/powerpc/kvm/book3s_xive.h
++++ b/arch/powerpc/kvm/book3s_xive.h
+@@ -282,6 +282,8 @@ int kvmppc_xive_select_target(struct kvm
+ int kvmppc_xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio,
+                                 bool single_escalation);
+ struct kvmppc_xive *kvmppc_xive_get_device(struct kvm *kvm, u32 type);
++void xive_cleanup_single_escalation(struct kvm_vcpu *vcpu,
++                                  struct kvmppc_xive_vcpu *xc, int irq);
+ 
+ #endif /* CONFIG_KVM_XICS */
+ #endif /* _KVM_PPC_BOOK3S_XICS_H */
+--- a/arch/powerpc/kvm/book3s_xive_native.c
++++ b/arch/powerpc/kvm/book3s_xive_native.c
+@@ -71,6 +71,9 @@ void kvmppc_xive_native_cleanup_vcpu(str
+       for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
+               /* Free the escalation irq */
+               if (xc->esc_virq[i]) {
++                      if (xc->xive->single_escalation)
++                              xive_cleanup_single_escalation(vcpu, xc,
++                                                      xc->esc_virq[i]);
+                       free_irq(xc->esc_virq[i], vcpu);
+                       irq_dispose_mapping(xc->esc_virq[i]);
+                       kfree(xc->esc_virq_names[i]);
+--- a/arch/powerpc/sysdev/xive/common.c
++++ b/arch/powerpc/sysdev/xive/common.c
+@@ -135,7 +135,7 @@ static u32 xive_read_eq(struct xive_q *q
+ static u32 xive_scan_interrupts(struct xive_cpu *xc, bool just_peek)
+ {
+       u32 irq = 0;
+-      u8 prio;
++      u8 prio = 0;
+ 
+       /* Find highest pending priority */
+       while (xc->pending_prio != 0) {
+@@ -148,8 +148,19 @@ static u32 xive_scan_interrupts(struct x
+               irq = xive_read_eq(&xc->queue[prio], just_peek);
+ 
+               /* Found something ? That's it */
+-              if (irq)
+-                      break;
++              if (irq) {
++                      if (just_peek || irq_to_desc(irq))
++                              break;
++                      /*
++                       * We should never get here; if we do then we must
++                       * have failed to synchronize the interrupt properly
++                       * when shutting it down.
++                       */
++                      pr_crit("xive: got interrupt %d without descriptor, dropping\n",
++                              irq);
++                      WARN_ON(1);
++                      continue;
++              }
+ 
+               /* Clear pending bits */
+               xc->pending_prio &= ~(1 << prio);
+@@ -307,6 +318,7 @@ static void xive_do_queue_eoi(struct xiv
+  */
+ static void xive_do_source_eoi(u32 hw_irq, struct xive_irq_data *xd)
+ {
++      xd->stale_p = false;
+       /* If the XIVE supports the new "store EOI facility, use it */
+       if (xd->flags & XIVE_IRQ_FLAG_STORE_EOI)
+               xive_esb_write(xd, XIVE_ESB_STORE_EOI, 0);
+@@ -350,7 +362,7 @@ static void xive_do_source_eoi(u32 hw_ir
+       }
+ }
+ 
+-/* irq_chip eoi callback */
++/* irq_chip eoi callback, called with irq descriptor lock held */
+ static void xive_irq_eoi(struct irq_data *d)
+ {
+       struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
+@@ -366,6 +378,8 @@ static void xive_irq_eoi(struct irq_data
+       if (!irqd_irq_disabled(d) && !irqd_is_forwarded_to_vcpu(d) &&
+           !(xd->flags & XIVE_IRQ_NO_EOI))
+               xive_do_source_eoi(irqd_to_hwirq(d), xd);
++      else
++              xd->stale_p = true;
+ 
+       /*
+        * Clear saved_p to indicate that it's no longer occupying
+@@ -397,11 +411,16 @@ static void xive_do_source_set_mask(stru
+        */
+       if (mask) {
+               val = xive_esb_read(xd, XIVE_ESB_SET_PQ_01);
+-              xd->saved_p = !!(val & XIVE_ESB_VAL_P);
+-      } else if (xd->saved_p)
++              if (!xd->stale_p && !!(val & XIVE_ESB_VAL_P))
++                      xd->saved_p = true;
++              xd->stale_p = false;
++      } else if (xd->saved_p) {
+               xive_esb_read(xd, XIVE_ESB_SET_PQ_10);
+-      else
++              xd->saved_p = false;
++      } else {
+               xive_esb_read(xd, XIVE_ESB_SET_PQ_00);
++              xd->stale_p = false;
++      }
+ }
+ 
+ /*
+@@ -541,6 +560,8 @@ static unsigned int xive_irq_startup(str
+       unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
+       int target, rc;
+ 
++      xd->saved_p = false;
++      xd->stale_p = false;
+       pr_devel("xive_irq_startup: irq %d [0x%x] data @%p\n",
+                d->irq, hw_irq, d);
+ 
+@@ -587,6 +608,7 @@ static unsigned int xive_irq_startup(str
+       return 0;
+ }
+ 
++/* called with irq descriptor lock held */
+ static void xive_irq_shutdown(struct irq_data *d)
+ {
+       struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
+@@ -602,16 +624,6 @@ static void xive_irq_shutdown(struct irq
+       xive_do_source_set_mask(xd, true);
+ 
+       /*
+-       * The above may have set saved_p. We clear it otherwise it
+-       * will prevent re-enabling later on. It is ok to forget the
+-       * fact that the interrupt might be in a queue because we are
+-       * accounting that already in xive_dec_target_count() and will
+-       * be re-routing it to a new queue with proper accounting when
+-       * it's started up again
+-       */
+-      xd->saved_p = false;
+-
+-      /*
+        * Mask the interrupt in HW in the IVT/EAS and set the number
+        * to be the "bad" IRQ number
+        */
+@@ -797,6 +809,10 @@ static int xive_irq_retrigger(struct irq
+       return 1;
+ }
+ 
++/*
++ * Caller holds the irq descriptor lock, so this won't be called
++ * concurrently with xive_get_irqchip_state on the same interrupt.
++ */
+ static int xive_irq_set_vcpu_affinity(struct irq_data *d, void *state)
+ {
+       struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
+@@ -820,6 +836,10 @@ static int xive_irq_set_vcpu_affinity(st
+ 
+               /* Set it to PQ=10 state to prevent further sends */
+               pq = xive_esb_read(xd, XIVE_ESB_SET_PQ_10);
++              if (!xd->stale_p) {
++                      xd->saved_p = !!(pq & XIVE_ESB_VAL_P);
++                      xd->stale_p = !xd->saved_p;
++              }
+ 
+               /* No target ? nothing to do */
+               if (xd->target == XIVE_INVALID_TARGET) {
+@@ -827,7 +847,7 @@ static int xive_irq_set_vcpu_affinity(st
+                        * An untargetted interrupt should have been
+                        * also masked at the source
+                        */
+-                      WARN_ON(pq & 2);
++                      WARN_ON(xd->saved_p);
+ 
+                       return 0;
+               }
+@@ -847,9 +867,8 @@ static int xive_irq_set_vcpu_affinity(st
+                * This saved_p is cleared by the host EOI, when we know
+                * for sure the queue slot is no longer in use.
+                */
+-              if (pq & 2) {
+-                      pq = xive_esb_read(xd, XIVE_ESB_SET_PQ_11);
+-                      xd->saved_p = true;
++              if (xd->saved_p) {
++                      xive_esb_read(xd, XIVE_ESB_SET_PQ_11);
+ 
+                       /*
+                        * Sync the XIVE source HW to ensure the interrupt
+@@ -862,8 +881,7 @@ static int xive_irq_set_vcpu_affinity(st
+                        */
+                       if (xive_ops->sync_source)
+                               xive_ops->sync_source(hw_irq);
+-              } else
+-                      xd->saved_p = false;
++              }
+       } else {
+               irqd_clr_forwarded_to_vcpu(d);
+ 
+@@ -914,6 +932,23 @@ static int xive_irq_set_vcpu_affinity(st
+       return 0;
+ }
+ 
++/* Called with irq descriptor lock held. */
++static int xive_get_irqchip_state(struct irq_data *data,
++                                enum irqchip_irq_state which, bool *state)
++{
++      struct xive_irq_data *xd = irq_data_get_irq_handler_data(data);
++
++      switch (which) {
++      case IRQCHIP_STATE_ACTIVE:
++              *state = !xd->stale_p &&
++                       (xd->saved_p ||
++                        !!(xive_esb_read(xd, XIVE_ESB_GET) & XIVE_ESB_VAL_P));
++              return 0;
++      default:
++              return -EINVAL;
++      }
++}
++
+ static struct irq_chip xive_irq_chip = {
+       .name = "XIVE-IRQ",
+       .irq_startup = xive_irq_startup,
+@@ -925,6 +960,7 @@ static struct irq_chip xive_irq_chip = {
+       .irq_set_type = xive_irq_set_type,
+       .irq_retrigger = xive_irq_retrigger,
+       .irq_set_vcpu_affinity = xive_irq_set_vcpu_affinity,
++      .irq_get_irqchip_state = xive_get_irqchip_state,
+ };
+ 
+ bool is_xive_irq(struct irq_chip *chip)
+@@ -1338,6 +1374,11 @@ static void xive_flush_cpu_queue(unsigne
+               xd = irq_desc_get_handler_data(desc);
+ 
+               /*
++               * Clear saved_p to indicate that it's no longer pending
++               */
++              xd->saved_p = false;
++
++              /*
+                * For LSIs, we EOI, this will cause a resend if it's
+                * still asserted. Otherwise do an MSI retrigger.
+                */
diff --git a/queue-5.3/revert-s390-dasd-add-discard-support-for-ese-volumes.patch b/queue-5.3/revert-s390-dasd-add-discard-support-for-ese-volumes.patch

new file mode 100644 (file)

index 0000000..9b98493
--- /dev/null
+++ b/queue-5.3/revert-s390-dasd-add-discard-support-for-ese-volumes.patch
@@ -0,0 +1,139 @@
+From 964ce509e2ded52c1a61ad86044cc4d70abd9eb8 Mon Sep 17 00:00:00 2001
+From: Stefan Haberland <sth@linux.ibm.com>
+Date: Tue, 1 Oct 2019 17:34:39 +0200
+Subject: Revert "s390/dasd: Add discard support for ESE volumes"
+
+From: Stefan Haberland <sth@linux.ibm.com>
+
+commit 964ce509e2ded52c1a61ad86044cc4d70abd9eb8 upstream.
+
+This reverts commit 7e64db1597fe114b83fe17d0ba96c6aa5fca419a.
+
+The thin provisioning feature introduces an IOCTL and the discard support
+to allow userspace tools and filesystems to release unused and previously
+allocated space respectively.
+
+During some internal performance improvements and further tests, the
+release of allocated space revealed some issues that may lead to data
+corruption in some configurations when filesystems are mounted with
+discard support enabled.
+
+While we're working on a fix and trying to clarify the situation,
+this commit reverts the discard support for ESE volumes to prevent
+potential data corruption.
+
+Cc: <stable@vger.kernel.org> # 5.3
+Signed-off-by: Stefan Haberland <sth@linux.ibm.com>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/s390/block/dasd_eckd.c |   57 ++---------------------------------------
+ 1 file changed, 3 insertions(+), 54 deletions(-)
+
+--- a/drivers/s390/block/dasd_eckd.c
++++ b/drivers/s390/block/dasd_eckd.c
+@@ -2055,9 +2055,6 @@ dasd_eckd_check_characteristics(struct d
+       if (readonly)
+               set_bit(DASD_FLAG_DEVICE_RO, &device->flags);
+ 
+-      if (dasd_eckd_is_ese(device))
+-              dasd_set_feature(device->cdev, DASD_FEATURE_DISCARD, 1);
+-
+       dev_info(&device->cdev->dev, "New DASD %04X/%02X (CU %04X/%02X) "
+                "with %d cylinders, %d heads, %d sectors%s\n",
+                private->rdc_data.dev_type,
+@@ -3691,14 +3688,6 @@ static int dasd_eckd_release_space(struc
+               return -EINVAL;
+ }
+ 
+-static struct dasd_ccw_req *
+-dasd_eckd_build_cp_discard(struct dasd_device *device, struct dasd_block *block,
+-                         struct request *req, sector_t first_trk,
+-                         sector_t last_trk)
+-{
+-      return dasd_eckd_dso_ras(device, block, req, first_trk, last_trk, 1);
+-}
+-
+ static struct dasd_ccw_req *dasd_eckd_build_cp_cmd_single(
+                                              struct dasd_device *startdev,
+                                              struct dasd_block *block,
+@@ -4443,10 +4432,6 @@ static struct dasd_ccw_req *dasd_eckd_bu
+       cmdwtd = private->features.feature[12] & 0x40;
+       use_prefix = private->features.feature[8] & 0x01;
+ 
+-      if (req_op(req) == REQ_OP_DISCARD)
+-              return dasd_eckd_build_cp_discard(startdev, block, req,
+-                                                first_trk, last_trk);
+-
+       cqr = NULL;
+       if (cdlspecial || dasd_page_cache) {
+               /* do nothing, just fall through to the cmd mode single case */
+@@ -4725,14 +4710,12 @@ static struct dasd_ccw_req *dasd_eckd_bu
+                                                    struct dasd_block *block,
+                                                    struct request *req)
+ {
+-      struct dasd_device *startdev = NULL;
+       struct dasd_eckd_private *private;
+-      struct dasd_ccw_req *cqr;
++      struct dasd_device *startdev;
+       unsigned long flags;
++      struct dasd_ccw_req *cqr;
+ 
+-      /* Discard requests can only be processed on base devices */
+-      if (req_op(req) != REQ_OP_DISCARD)
+-              startdev = dasd_alias_get_start_dev(base);
++      startdev = dasd_alias_get_start_dev(base);
+       if (!startdev)
+               startdev = base;
+       private = startdev->private;
+@@ -6513,20 +6496,8 @@ static void dasd_eckd_setup_blk_queue(st
+       unsigned int logical_block_size = block->bp_block;
+       struct request_queue *q = block->request_queue;
+       struct dasd_device *device = block->base;
+-      struct dasd_eckd_private *private;
+-      unsigned int max_discard_sectors;
+-      unsigned int max_bytes;
+-      unsigned int ext_bytes; /* Extent Size in Bytes */
+-      int recs_per_trk;
+-      int trks_per_cyl;
+-      int ext_limit;
+-      int ext_size; /* Extent Size in Cylinders */
+       int max;
+ 
+-      private = device->private;
+-      trks_per_cyl = private->rdc_data.trk_per_cyl;
+-      recs_per_trk = recs_per_track(&private->rdc_data, 0, logical_block_size);
+-
+       if (device->features & DASD_FEATURE_USERAW) {
+               /*
+                * the max_blocks value for raw_track access is 256
+@@ -6547,28 +6518,6 @@ static void dasd_eckd_setup_blk_queue(st
+       /* With page sized segments each segment can be translated into one idaw/tidaw */
+       blk_queue_max_segment_size(q, PAGE_SIZE);
+       blk_queue_segment_boundary(q, PAGE_SIZE - 1);
+-
+-      if (dasd_eckd_is_ese(device)) {
+-              /*
+-               * Depending on the extent size, up to UINT_MAX bytes can be
+-               * accepted. However, neither DASD_ECKD_RAS_EXTS_MAX nor the
+-               * device limits should be exceeded.
+-               */
+-              ext_size = dasd_eckd_ext_size(device);
+-              ext_limit = min(private->real_cyl / ext_size, DASD_ECKD_RAS_EXTS_MAX);
+-              ext_bytes = ext_size * trks_per_cyl * recs_per_trk *
+-                      logical_block_size;
+-              max_bytes = UINT_MAX - (UINT_MAX % ext_bytes);
+-              if (max_bytes / ext_bytes > ext_limit)
+-                      max_bytes = ext_bytes * ext_limit;
+-
+-              max_discard_sectors = max_bytes / 512;
+-
+-              blk_queue_max_discard_sectors(q, max_discard_sectors);
+-              blk_queue_flag_set(QUEUE_FLAG_DISCARD, q);
+-              q->limits.discard_granularity = ext_bytes;
+-              q->limits.discard_alignment = ext_bytes;
+-      }
+ }
+ 
+ static struct ccw_driver dasd_eckd_driver = {
diff --git a/queue-5.3/s390-cio-avoid-calling-strlen-on-null-pointer.patch b/queue-5.3/s390-cio-avoid-calling-strlen-on-null-pointer.patch

new file mode 100644 (file)

index 0000000..e8c4413
--- /dev/null
+++ b/queue-5.3/s390-cio-avoid-calling-strlen-on-null-pointer.patch
@@ -0,0 +1,55 @@
+From ea298e6ee8b34b3ed4366be7eb799d0650ebe555 Mon Sep 17 00:00:00 2001
+From: Vasily Gorbik <gor@linux.ibm.com>
+Date: Tue, 17 Sep 2019 20:04:04 +0200
+Subject: s390/cio: avoid calling strlen on null pointer
+
+From: Vasily Gorbik <gor@linux.ibm.com>
+
+commit ea298e6ee8b34b3ed4366be7eb799d0650ebe555 upstream.
+
+Fix the following kasan finding:
+BUG: KASAN: global-out-of-bounds in ccwgroup_create_dev+0x850/0x1140
+Read of size 1 at addr 0000000000000000 by task systemd-udevd.r/561
+
+CPU: 30 PID: 561 Comm: systemd-udevd.r Tainted: G    B
+Hardware name: IBM 3906 M04 704 (LPAR)
+Call Trace:
+([<0000000231b3db7e>] show_stack+0x14e/0x1a8)
+ [<0000000233826410>] dump_stack+0x1d0/0x218
+ [<000000023216fac4>] print_address_description+0x64/0x380
+ [<000000023216f5a8>] __kasan_report+0x138/0x168
+ [<00000002331b8378>] ccwgroup_create_dev+0x850/0x1140
+ [<00000002332b618a>] group_store+0x3a/0x50
+ [<00000002323ac706>] kernfs_fop_write+0x246/0x3b8
+ [<00000002321d409a>] vfs_write+0x132/0x450
+ [<00000002321d47da>] ksys_write+0x122/0x208
+ [<0000000233877102>] system_call+0x2a6/0x2c8
+
+Triggered by:
+openat(AT_FDCWD, "/sys/bus/ccwgroup/drivers/qeth/group",
+               O_WRONLY|O_CREAT|O_TRUNC|O_CLOEXEC, 0666) = 16
+write(16, "0.0.bd00,0.0.bd01,0.0.bd02", 26) = 26
+
+The problem is that __get_next_id in ccwgroup_create_dev might set "buf"
+buffer pointer to NULL and explicit check for that is required.
+
+Cc: stable@vger.kernel.org
+Reviewed-by: Sebastian Ott <sebott@linux.ibm.com>
+Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/s390/cio/ccwgroup.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/s390/cio/ccwgroup.c
++++ b/drivers/s390/cio/ccwgroup.c
+@@ -372,7 +372,7 @@ int ccwgroup_create_dev(struct device *p
+               goto error;
+       }
+       /* Check for trailing stuff. */
+-      if (i == num_devices && strlen(buf) > 0) {
++      if (i == num_devices && buf && strlen(buf) > 0) {
+               rc = -EINVAL;
+               goto error;
+       }
diff --git a/queue-5.3/s390-cio-exclude-subchannels-with-no-parent-from-pseudo-check.patch b/queue-5.3/s390-cio-exclude-subchannels-with-no-parent-from-pseudo-check.patch

new file mode 100644 (file)

index 0000000..f1ae804
--- /dev/null
+++ b/queue-5.3/s390-cio-exclude-subchannels-with-no-parent-from-pseudo-check.patch
@@ -0,0 +1,54 @@
+From ab5758848039de9a4b249d46e4ab591197eebaf2 Mon Sep 17 00:00:00 2001
+From: Vasily Gorbik <gor@linux.ibm.com>
+Date: Thu, 19 Sep 2019 15:55:17 +0200
+Subject: s390/cio: exclude subchannels with no parent from pseudo check
+
+From: Vasily Gorbik <gor@linux.ibm.com>
+
+commit ab5758848039de9a4b249d46e4ab591197eebaf2 upstream.
+
+ccw console is created early in start_kernel and used before css is
+initialized or ccw console subchannel is registered. Until then console
+subchannel does not have a parent. For that reason assume subchannels
+with no parent are not pseudo subchannels. This fixes the following
+kasan finding:
+
+BUG: KASAN: global-out-of-bounds in sch_is_pseudo_sch+0x8e/0x98
+Read of size 8 at addr 00000000000005e8 by task swapper/0/0
+
+CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.3.0-rc8-07370-g6ac43dd12538 #2
+Hardware name: IBM 2964 NC9 702 (z/VM 6.4.0)
+Call Trace:
+([<000000000012cd76>] show_stack+0x14e/0x1e0)
+ [<0000000001f7fb44>] dump_stack+0x1a4/0x1f8
+ [<00000000007d7afc>] print_address_description+0x64/0x3c8
+ [<00000000007d75f6>] __kasan_report+0x14e/0x180
+ [<00000000018a2986>] sch_is_pseudo_sch+0x8e/0x98
+ [<000000000189b950>] cio_enable_subchannel+0x1d0/0x510
+ [<00000000018cac7c>] ccw_device_recognition+0x12c/0x188
+ [<0000000002ceb1a8>] ccw_device_enable_console+0x138/0x340
+ [<0000000002cf1cbe>] con3215_init+0x25e/0x300
+ [<0000000002c8770a>] console_init+0x68a/0x9b8
+ [<0000000002c6a3d6>] start_kernel+0x4fe/0x728
+ [<0000000000100070>] startup_continue+0x70/0xd0
+
+Cc: stable@vger.kernel.org
+Reviewed-by: Sebastian Ott <sebott@linux.ibm.com>
+Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/s390/cio/css.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/drivers/s390/cio/css.c
++++ b/drivers/s390/cio/css.c
+@@ -1388,6 +1388,8 @@ device_initcall(cio_settle_init);
+ 
+ int sch_is_pseudo_sch(struct subchannel *sch)
+ {
++      if (!sch->dev.parent)
++              return 0;
+       return sch == to_css(sch->dev.parent)->pseudo_subchannel;
+ }
+ 
diff --git a/queue-5.3/s390-dasd-fix-error-handling-during-online-processing.patch b/queue-5.3/s390-dasd-fix-error-handling-during-online-processing.patch

new file mode 100644 (file)

index 0000000..a12e1b1
--- /dev/null
+++ b/queue-5.3/s390-dasd-fix-error-handling-during-online-processing.patch
@@ -0,0 +1,94 @@
+From dd45483981ac62f432e073fea6e5e11200b9070d Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Jan=20H=C3=B6ppner?= <hoeppner@linux.ibm.com>
+Date: Tue, 1 Oct 2019 17:34:38 +0200
+Subject: s390/dasd: Fix error handling during online processing
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Jan Höppner <hoeppner@linux.ibm.com>
+
+commit dd45483981ac62f432e073fea6e5e11200b9070d upstream.
+
+It is possible that the CCW commands for reading volume and extent pool
+information are not supported, either by the storage server (for
+dedicated DASDs) or by z/VM (for virtual devices, such as MDISKs).
+
+As a command reject will occur in such a case, the current error
+handling leads to a failing online processing and thus the DASD can't be
+used at all.
+
+Since the data being read is not essential for an fully operational
+DASD, the error handling can be removed. Information about the failing
+command is sent to the s390dbf debug feature.
+
+Fixes: c729696bcf8b ("s390/dasd: Recognise data for ESE volumes")
+Cc: <stable@vger.kernel.org> # 5.3
+Reported-by: Frank Heimes <frank.heimes@canonical.com>
+Signed-off-by: Jan Höppner <hoeppner@linux.ibm.com>
+Signed-off-by: Stefan Haberland <sth@linux.ibm.com>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/s390/block/dasd_eckd.c |   24 ++++++++----------------
+ 1 file changed, 8 insertions(+), 16 deletions(-)
+
+--- a/drivers/s390/block/dasd_eckd.c
++++ b/drivers/s390/block/dasd_eckd.c
+@@ -1553,8 +1553,8 @@ static int dasd_eckd_read_vol_info(struc
+       if (rc == 0) {
+               memcpy(&private->vsq, vsq, sizeof(*vsq));
+       } else {
+-              dev_warn(&device->cdev->dev,
+-                       "Reading the volume storage information failed with rc=%d\n", rc);
++              DBF_EVENT_DEVID(DBF_WARNING, device->cdev,
++                              "Reading the volume storage information failed with rc=%d", rc);
+       }
+ 
+       if (useglobal)
+@@ -1737,8 +1737,8 @@ static int dasd_eckd_read_ext_pool_info(
+       if (rc == 0) {
+               dasd_eckd_cpy_ext_pool_data(device, lcq);
+       } else {
+-              dev_warn(&device->cdev->dev,
+-                       "Reading the logical configuration failed with rc=%d\n", rc);
++              DBF_EVENT_DEVID(DBF_WARNING, device->cdev,
++                              "Reading the logical configuration failed with rc=%d", rc);
+       }
+ 
+       dasd_sfree_request(cqr, cqr->memdev);
+@@ -2020,14 +2020,10 @@ dasd_eckd_check_characteristics(struct d
+       dasd_eckd_read_features(device);
+ 
+       /* Read Volume Information */
+-      rc = dasd_eckd_read_vol_info(device);
+-      if (rc)
+-              goto out_err3;
++      dasd_eckd_read_vol_info(device);
+ 
+       /* Read Extent Pool Information */
+-      rc = dasd_eckd_read_ext_pool_info(device);
+-      if (rc)
+-              goto out_err3;
++      dasd_eckd_read_ext_pool_info(device);
+ 
+       /* Read Device Characteristics */
+       rc = dasd_generic_read_dev_chars(device, DASD_ECKD_MAGIC,
+@@ -5663,14 +5659,10 @@ static int dasd_eckd_restore_device(stru
+       dasd_eckd_read_features(device);
+ 
+       /* Read Volume Information */
+-      rc = dasd_eckd_read_vol_info(device);
+-      if (rc)
+-              goto out_err2;
++      dasd_eckd_read_vol_info(device);
+ 
+       /* Read Extent Pool Information */
+-      rc = dasd_eckd_read_ext_pool_info(device);
+-      if (rc)
+-              goto out_err2;
++      dasd_eckd_read_ext_pool_info(device);
+ 
+       /* Read Device Characteristics */
+       rc = dasd_generic_read_dev_chars(device, DASD_ECKD_MAGIC,
diff --git a/queue-5.3/s390-process-avoid-potential-reading-of-freed-stack.patch b/queue-5.3/s390-process-avoid-potential-reading-of-freed-stack.patch

new file mode 100644 (file)

index 0000000..72a4a97
--- /dev/null
+++ b/queue-5.3/s390-process-avoid-potential-reading-of-freed-stack.patch
@@ -0,0 +1,62 @@
+From 8769f610fe6d473e5e8e221709c3ac402037da6c Mon Sep 17 00:00:00 2001
+From: Vasily Gorbik <gor@linux.ibm.com>
+Date: Tue, 13 Aug 2019 20:11:08 +0200
+Subject: s390/process: avoid potential reading of freed stack
+
+From: Vasily Gorbik <gor@linux.ibm.com>
+
+commit 8769f610fe6d473e5e8e221709c3ac402037da6c upstream.
+
+With THREAD_INFO_IN_TASK (which is selected on s390) task's stack usage
+is refcounted and should always be protected by get/put when touching
+other task's stack to avoid race conditions with task's destruction code.
+
+Fixes: d5c352cdd022 ("s390: move thread_info into task_struct")
+Cc: stable@vger.kernel.org # v4.10+
+Acked-by: Ilya Leoshkevich <iii@linux.ibm.com>
+Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/s390/kernel/process.c |   22 ++++++++++++++++------
+ 1 file changed, 16 insertions(+), 6 deletions(-)
+
+--- a/arch/s390/kernel/process.c
++++ b/arch/s390/kernel/process.c
+@@ -184,20 +184,30 @@ unsigned long get_wchan(struct task_stru
+ 
+       if (!p || p == current || p->state == TASK_RUNNING || !task_stack_page(p))
+               return 0;
++
++      if (!try_get_task_stack(p))
++              return 0;
++
+       low = task_stack_page(p);
+       high = (struct stack_frame *) task_pt_regs(p);
+       sf = (struct stack_frame *) p->thread.ksp;
+-      if (sf <= low || sf > high)
+-              return 0;
++      if (sf <= low || sf > high) {
++              return_address = 0;
++              goto out;
++      }
+       for (count = 0; count < 16; count++) {
+               sf = (struct stack_frame *) sf->back_chain;
+-              if (sf <= low || sf > high)
+-                      return 0;
++              if (sf <= low || sf > high) {
++                      return_address = 0;
++                      goto out;
++              }
+               return_address = sf->gprs[8];
+               if (!in_sched_functions(return_address))
+-                      return return_address;
++                      goto out;
+       }
+-      return 0;
++out:
++      put_task_stack(p);
++      return return_address;
+ }
+ 
+ unsigned long arch_align_stack(unsigned long sp)
diff --git a/queue-5.3/s390-sclp-fix-bit-checked-for-has_sipl.patch b/queue-5.3/s390-sclp-fix-bit-checked-for-has_sipl.patch

new file mode 100644 (file)

index 0000000..578674c
--- /dev/null
+++ b/queue-5.3/s390-sclp-fix-bit-checked-for-has_sipl.patch
@@ -0,0 +1,31 @@
+From 4df9a82549cfed5b52da21e7d007b79b2ea1769a Mon Sep 17 00:00:00 2001
+From: Philipp Rudo <prudo@linux.ibm.com>
+Date: Thu, 29 Aug 2019 15:38:37 +0200
+Subject: s390/sclp: Fix bit checked for has_sipl
+
+From: Philipp Rudo <prudo@linux.ibm.com>
+
+commit 4df9a82549cfed5b52da21e7d007b79b2ea1769a upstream.
+
+Fixes: c9896acc7851 ("s390/ipl: Provide has_secure sysfs attribute")
+Cc: stable@vger.kernel.org # 5.2+
+Reviewed-by: Heiko Carstens <heiko.carstens@de.ibm.com>
+Signed-off-by: Philipp Rudo <prudo@linux.ibm.com>
+Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/s390/char/sclp_early.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/s390/char/sclp_early.c
++++ b/drivers/s390/char/sclp_early.c
+@@ -40,7 +40,7 @@ static void __init sclp_early_facilities
+       sclp.has_gisaf = !!(sccb->fac118 & 0x08);
+       sclp.has_hvs = !!(sccb->fac119 & 0x80);
+       sclp.has_kss = !!(sccb->fac98 & 0x01);
+-      sclp.has_sipl = !!(sccb->cbl & 0x02);
++      sclp.has_sipl = !!(sccb->cbl & 0x4000);
+       if (sccb->fac85 & 0x02)
+               S390_lowcore.machine_flags |= MACHINE_FLAG_ESOP;
+       if (sccb->fac91 & 0x40)
diff --git a/queue-5.3/s390-topology-avoid-firing-events-before-kobjs-are-created.patch b/queue-5.3/s390-topology-avoid-firing-events-before-kobjs-are-created.patch

new file mode 100644 (file)

index 0000000..032e25f
--- /dev/null
+++ b/queue-5.3/s390-topology-avoid-firing-events-before-kobjs-are-created.patch
@@ -0,0 +1,61 @@
+From f3122a79a1b0a113d3aea748e0ec26f2cb2889de Mon Sep 17 00:00:00 2001
+From: Vasily Gorbik <gor@linux.ibm.com>
+Date: Tue, 17 Sep 2019 22:59:03 +0200
+Subject: s390/topology: avoid firing events before kobjs are created
+
+From: Vasily Gorbik <gor@linux.ibm.com>
+
+commit f3122a79a1b0a113d3aea748e0ec26f2cb2889de upstream.
+
+arch_update_cpu_topology is first called from:
+kernel_init_freeable->sched_init_smp->sched_init_domains
+
+even before cpus has been registered in:
+kernel_init_freeable->do_one_initcall->s390_smp_init
+
+Do not trigger kobject_uevent change events until cpu devices are
+actually created. Fixes the following kasan findings:
+
+BUG: KASAN: global-out-of-bounds in kobject_uevent_env+0xb40/0xee0
+Read of size 8 at addr 0000000000000020 by task swapper/0/1
+
+BUG: KASAN: global-out-of-bounds in kobject_uevent_env+0xb36/0xee0
+Read of size 8 at addr 0000000000000018 by task swapper/0/1
+
+CPU: 0 PID: 1 Comm: swapper/0 Tainted: G    B
+Hardware name: IBM 3906 M04 704 (LPAR)
+Call Trace:
+([<0000000143c6db7e>] show_stack+0x14e/0x1a8)
+ [<0000000145956498>] dump_stack+0x1d0/0x218
+ [<000000014429fb4c>] print_address_description+0x64/0x380
+ [<000000014429f630>] __kasan_report+0x138/0x168
+ [<0000000145960b96>] kobject_uevent_env+0xb36/0xee0
+ [<0000000143c7c47c>] arch_update_cpu_topology+0x104/0x108
+ [<0000000143df9e22>] sched_init_domains+0x62/0xe8
+ [<000000014644c94a>] sched_init_smp+0x3a/0xc0
+ [<0000000146433a20>] kernel_init_freeable+0x558/0x958
+ [<000000014599002a>] kernel_init+0x22/0x160
+ [<00000001459a71d4>] ret_from_fork+0x28/0x30
+ [<00000001459a71dc>] kernel_thread_starter+0x0/0x10
+
+Cc: stable@vger.kernel.org
+Reviewed-by: Heiko Carstens <heiko.carstens@de.ibm.com>
+Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/s390/kernel/topology.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/arch/s390/kernel/topology.c
++++ b/arch/s390/kernel/topology.c
+@@ -311,7 +311,8 @@ int arch_update_cpu_topology(void)
+       on_each_cpu(__arch_update_dedicated_flag, NULL, 0);
+       for_each_online_cpu(cpu) {
+               dev = get_cpu_device(cpu);
+-              kobject_uevent(&dev->kobj, KOBJ_CHANGE);
++              if (dev)
++                      kobject_uevent(&dev->kobj, KOBJ_CHANGE);
+       }
+       return rc;
+ }
diff --git a/queue-5.3/series b/queue-5.3/series

new file mode 100644 (file)

index 0000000..b1c703d
--- /dev/null
+++ b/queue-5.3/series
@@ -0,0 +1,52 @@
+s390-process-avoid-potential-reading-of-freed-stack.patch
+s390-sclp-fix-bit-checked-for-has_sipl.patch
+kvm-s390-test-for-bad-access-register-and-size-at-the-start-of-s390_mem_op.patch
+s390-topology-avoid-firing-events-before-kobjs-are-created.patch
+s390-cio-avoid-calling-strlen-on-null-pointer.patch
+s390-cio-exclude-subchannels-with-no-parent-from-pseudo-check.patch
+s390-dasd-fix-error-handling-during-online-processing.patch
+revert-s390-dasd-add-discard-support-for-ese-volumes.patch
+kvm-s390-fix-__insn32_query-inline-assembly.patch
+kvm-ppc-book3s-enable-xive-native-capability-only-if-opal-has-required-functions.patch
+kvm-ppc-book3s-hv-xive-free-escalation-interrupts-before-disabling-the-vp.patch
+kvm-ppc-book3s-hv-don-t-push-xive-context-when-not-using-xive-device.patch
+kvm-ppc-book3s-hv-fix-race-in-re-enabling-xive-escalation-interrupts.patch
+kvm-ppc-book3s-hv-check-for-mmu-ready-on-piggybacked-virtual-cores.patch
+kvm-ppc-book3s-hv-don-t-lose-pending-doorbell-request-on-migration-on-p9.patch
+kvm-x86-fix-userspace-set-invalid-cr4.patch
+nbd-fix-max-number-of-supported-devs.patch
+pm-devfreq-tegra-fix-khz-to-hz-conversion.patch
+asoc-define-a-set-of-dapm-pre-post-up-events.patch
+asoc-sgtl5000-improve-vag-power-and-mute-control.patch
+powerpc-xive-implement-get_irqchip_state-method-for-xive-to-fix-shutdown-race.patch
+powerpc-mce-fix-mce-handling-for-huge-pages.patch
+powerpc-mce-schedule-work-from-irq_work.patch
+powerpc-603-fix-handling-of-the-dirty-flag.patch
+powerpc-32s-fix-boot-failure-with-debug_pagealloc-without-kasan.patch
+powerpc-ptdump-fix-addresses-display-on-ppc32.patch
+powerpc-powernv-restrict-opal-symbol-map-to-only-be-readable-by-root.patch
+powerpc-pseries-fix-cpu_hotplug_lock-acquisition-in-resize_hpt.patch
+powerpc-powernv-ioda-fix-race-in-tce-level-allocation.patch
+powerpc-kasan-fix-parallel-loading-of-modules.patch
+powerpc-kasan-fix-shadow-area-set-up-for-modules.patch
+powerpc-book3s64-mm-don-t-do-tlbie-fixup-for-some-hardware-revisions.patch
+powerpc-book3s64-radix-rename-cpu_ftr_p9_tlbie_bug-feature-flag.patch
+powerpc-mm-add-a-helper-to-select-page_kernel_ro-or-page_readonly.patch
+powerpc-mm-fix-an-oops-in-kasan_mmu_init.patch
+powerpc-mm-fixup-tlbie-vs-mtpidr-mtlpidr-ordering-issue-on-power9.patch
+can-mcp251x-mcp251x_hw_reset-allow-more-time-after-a-reset.patch
+tools-lib-traceevent-fix-robust-test-of-do_generate_dynamic_list_file.patch
+tools-lib-traceevent-do-not-free-tep-cmdlines-in-add_new_comm-on-failure.patch
+crypto-qat-silence-smp_processor_id-warning.patch
+crypto-skcipher-unmap-pages-after-an-external-error.patch
+crypto-cavium-zip-add-missing-single_release.patch
+crypto-caam-qi-fix-error-handling-in-ern-handler.patch
+crypto-caam-fix-concurrency-issue-in-givencrypt-descriptor.patch
+crypto-ccree-account-for-tee-not-ready-to-report.patch
+crypto-ccree-use-the-full-crypt-length-value.patch
+mips-treat-loongson-extensions-as-ases.patch
+power-supply-sbs-battery-use-correct-flags-field.patch
+power-supply-sbs-battery-only-return-health-when-battery-present.patch
+tracing-make-sure-variable-reference-alias-has-correct-var_ref_idx.patch
+usercopy-avoid-highmem-pfn-warning.patch
+timer-read-jiffies-once-when-forwarding-base-clk.patch
diff --git a/queue-5.3/timer-read-jiffies-once-when-forwarding-base-clk.patch b/queue-5.3/timer-read-jiffies-once-when-forwarding-base-clk.patch

new file mode 100644 (file)

index 0000000..86e1756
--- /dev/null
+++ b/queue-5.3/timer-read-jiffies-once-when-forwarding-base-clk.patch
@@ -0,0 +1,78 @@
+From e430d802d6a3aaf61bd3ed03d9404888a29b9bf9 Mon Sep 17 00:00:00 2001
+From: Li RongQing <lirongqing@baidu.com>
+Date: Thu, 19 Sep 2019 20:04:47 +0800
+Subject: timer: Read jiffies once when forwarding base clk
+
+From: Li RongQing <lirongqing@baidu.com>
+
+commit e430d802d6a3aaf61bd3ed03d9404888a29b9bf9 upstream.
+
+The timer delayed for more than 3 seconds warning was triggered during
+testing.
+
+  Workqueue: events_unbound sched_tick_remote
+  RIP: 0010:sched_tick_remote+0xee/0x100
+  ...
+  Call Trace:
+   process_one_work+0x18c/0x3a0
+   worker_thread+0x30/0x380
+   kthread+0x113/0x130
+   ret_from_fork+0x22/0x40
+
+The reason is that the code in collect_expired_timers() uses jiffies
+unprotected:
+
+    if (next_event > jiffies)
+        base->clk = jiffies;
+
+As the compiler is allowed to reload the value base->clk can advance
+between the check and the store and in the worst case advance farther than
+next event. That causes the timer expiry to be delayed until the wheel
+pointer wraps around.
+
+Convert the code to use READ_ONCE()
+
+Fixes: 236968383cf5 ("timers: Optimize collect_expired_timers() for NOHZ")
+Signed-off-by: Li RongQing <lirongqing@baidu.com>
+Signed-off-by: Liang ZhiCheng <liangzhicheng@baidu.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: stable@vger.kernel.org
+Link: https://lkml.kernel.org/r/1568894687-14499-1-git-send-email-lirongqing@baidu.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/time/timer.c |    8 +++++---
+ 1 file changed, 5 insertions(+), 3 deletions(-)
+
+--- a/kernel/time/timer.c
++++ b/kernel/time/timer.c
+@@ -1593,24 +1593,26 @@ void timer_clear_idle(void)
+ static int collect_expired_timers(struct timer_base *base,
+                                 struct hlist_head *heads)
+ {
++      unsigned long now = READ_ONCE(jiffies);
++
+       /*
+        * NOHZ optimization. After a long idle sleep we need to forward the
+        * base to current jiffies. Avoid a loop by searching the bitfield for
+        * the next expiring timer.
+        */
+-      if ((long)(jiffies - base->clk) > 2) {
++      if ((long)(now - base->clk) > 2) {
+               unsigned long next = __next_timer_interrupt(base);
+ 
+               /*
+                * If the next timer is ahead of time forward to current
+                * jiffies, otherwise forward to the next expiry time:
+                */
+-              if (time_after(next, jiffies)) {
++              if (time_after(next, now)) {
+                       /*
+                        * The call site will increment base->clk and then
+                        * terminate the expiry loop immediately.
+                        */
+-                      base->clk = jiffies;
++                      base->clk = now;
+                       return 0;
+               }
+               base->clk = next;
diff --git a/queue-5.3/tools-lib-traceevent-do-not-free-tep-cmdlines-in-add_new_comm-on-failure.patch b/queue-5.3/tools-lib-traceevent-do-not-free-tep-cmdlines-in-add_new_comm-on-failure.patch

new file mode 100644 (file)

index 0000000..47728a0
--- /dev/null
+++ b/queue-5.3/tools-lib-traceevent-do-not-free-tep-cmdlines-in-add_new_comm-on-failure.patch
@@ -0,0 +1,52 @@
+From b0215e2d6a18d8331b2d4a8b38ccf3eff783edb1 Mon Sep 17 00:00:00 2001
+From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
+Date: Wed, 28 Aug 2019 15:05:28 -0400
+Subject: tools lib traceevent: Do not free tep->cmdlines in add_new_comm() on failure
+
+From: Steven Rostedt (VMware) <rostedt@goodmis.org>
+
+commit b0215e2d6a18d8331b2d4a8b38ccf3eff783edb1 upstream.
+
+If the re-allocation of tep->cmdlines succeeds, then the previous
+allocation of tep->cmdlines will be freed. If we later fail in
+add_new_comm(), we must not free cmdlines, and also should assign
+tep->cmdlines to the new allocation. Otherwise when freeing tep, the
+tep->cmdlines will be pointing to garbage.
+
+Fixes: a6d2a61ac653a ("tools lib traceevent: Remove some die() calls")
+Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Jiri Olsa <jolsa@redhat.com>
+Cc: Namhyung Kim <namhyung@kernel.org>
+Cc: linux-trace-devel@vger.kernel.org
+Cc: stable@vger.kernel.org
+Link: http://lkml.kernel.org/r/20190828191819.970121417@goodmis.org
+Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ tools/lib/traceevent/event-parse.c |    3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+--- a/tools/lib/traceevent/event-parse.c
++++ b/tools/lib/traceevent/event-parse.c
+@@ -269,10 +269,10 @@ static int add_new_comm(struct tep_handl
+               errno = ENOMEM;
+               return -1;
+       }
++      tep->cmdlines = cmdlines;
+ 
+       cmdlines[tep->cmdline_count].comm = strdup(comm);
+       if (!cmdlines[tep->cmdline_count].comm) {
+-              free(cmdlines);
+               errno = ENOMEM;
+               return -1;
+       }
+@@ -283,7 +283,6 @@ static int add_new_comm(struct tep_handl
+               tep->cmdline_count++;
+ 
+       qsort(cmdlines, tep->cmdline_count, sizeof(*cmdlines), cmdline_cmp);
+-      tep->cmdlines = cmdlines;
+ 
+       return 0;
+ }
diff --git a/queue-5.3/tools-lib-traceevent-fix-robust-test-of-do_generate_dynamic_list_file.patch b/queue-5.3/tools-lib-traceevent-fix-robust-test-of-do_generate_dynamic_list_file.patch

new file mode 100644 (file)

index 0000000..65f18a5
--- /dev/null
+++ b/queue-5.3/tools-lib-traceevent-fix-robust-test-of-do_generate_dynamic_list_file.patch
@@ -0,0 +1,55 @@
+From 82a2f88458d70704be843961e10b5cef9a6e95d3 Mon Sep 17 00:00:00 2001
+From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
+Date: Mon, 5 Aug 2019 13:01:50 -0400
+Subject: tools lib traceevent: Fix "robust" test of do_generate_dynamic_list_file
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Steven Rostedt (VMware) <rostedt@goodmis.org>
+
+commit 82a2f88458d70704be843961e10b5cef9a6e95d3 upstream.
+
+The tools/lib/traceevent/Makefile had a test added to it to detect a failure
+of the "nm" when making the dynamic list file (whatever that is). The
+problem is that the test sorts the values "U W w" and some versions of sort
+will place "w" ahead of "W" (even though it has a higher ASCII value, and
+break the test.
+
+Add 'tr "w" "W"' to merge the two and not worry about the ordering.
+
+Reported-by: Tzvetomir Stoyanov <tstoyanov@vmware.com>
+Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
+Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
+Cc: David Carrillo-Cisneros <davidcc@google.com>
+Cc: He Kuang <hekuang@huawei.com>
+Cc: Jiri Olsa <jolsa@kernel.org>
+Cc: Michal rarek <mmarek@suse.com>
+Cc: Paul Turner <pjt@google.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Stephane Eranian <eranian@google.com>
+Cc: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
+Cc: Wang Nan <wangnan0@huawei.com>
+Cc: stable@vger.kernel.org
+Fixes: 6467753d61399 ("tools lib traceevent: Robustify do_generate_dynamic_list_file")
+Link: http://lkml.kernel.org/r/20190805130150.25acfeb1@gandalf.local.home
+Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ tools/lib/traceevent/Makefile |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/tools/lib/traceevent/Makefile
++++ b/tools/lib/traceevent/Makefile
+@@ -266,8 +266,8 @@ endef
+ 
+ define do_generate_dynamic_list_file
+       symbol_type=`$(NM) -u -D $1 | awk 'NF>1 {print $$1}' | \
+-      xargs echo "U W w" | tr ' ' '\n' | sort -u | xargs echo`;\
+-      if [ "$$symbol_type" = "U W w" ];then                           \
++      xargs echo "U w W" | tr 'w ' 'W\n' | sort -u | xargs echo`;\
++      if [ "$$symbol_type" = "U W" ];then                             \
+               (echo '{';                                              \
+               $(NM) -u -D $1 | awk 'NF>1 {print "\t"$$2";"}' | sort -u;\
+               echo '};';                                              \
diff --git a/queue-5.3/tracing-make-sure-variable-reference-alias-has-correct-var_ref_idx.patch b/queue-5.3/tracing-make-sure-variable-reference-alias-has-correct-var_ref_idx.patch

new file mode 100644 (file)

index 0000000..f1dc504
--- /dev/null
+++ b/queue-5.3/tracing-make-sure-variable-reference-alias-has-correct-var_ref_idx.patch
@@ -0,0 +1,97 @@
+From 17f8607a1658a8e70415eef67909f990d13017b5 Mon Sep 17 00:00:00 2001
+From: Tom Zanussi <zanussi@kernel.org>
+Date: Sun, 1 Sep 2019 17:02:01 -0500
+Subject: tracing: Make sure variable reference alias has correct var_ref_idx
+
+From: Tom Zanussi <zanussi@kernel.org>
+
+commit 17f8607a1658a8e70415eef67909f990d13017b5 upstream.
+
+Original changelog from Steve Rostedt (except last sentence which
+explains the problem, and the Fixes: tag):
+
+I performed a three way histogram with the following commands:
+
+echo 'irq_lat u64 lat pid_t pid' > synthetic_events
+echo 'wake_lat u64 lat u64 irqlat pid_t pid' >> synthetic_events
+echo 'hist:keys=common_pid:irqts=common_timestamp.usecs if function == 0xffffffff81200580' > events/timer/hrtimer_start/trigger
+echo 'hist:keys=common_pid:lat=common_timestamp.usecs-$irqts:onmatch(timer.hrtimer_start).irq_lat($lat,pid) if common_flags & 1' > events/sched/sched_waking/trigger
+echo 'hist:keys=pid:wakets=common_timestamp.usecs,irqlat=lat' > events/synthetic/irq_lat/trigger
+echo 'hist:keys=next_pid:lat=common_timestamp.usecs-$wakets,irqlat=$irqlat:onmatch(synthetic.irq_lat).wake_lat($lat,$irqlat,next_pid)' > events/sched/sched_switch/trigger
+echo 1 > events/synthetic/wake_lat/enable
+
+Basically I wanted to see:
+
+ hrtimer_start (calling function tick_sched_timer)
+
+Note:
+
+  # grep tick_sched_timer /proc/kallsyms
+ffffffff81200580 t tick_sched_timer
+
+And save the time of that, and then record sched_waking if it is called
+in interrupt context and with the same pid as the hrtimer_start, it
+will record the latency between that and the waking event.
+
+I then look at when the task that is woken is scheduled in, and record
+the latency between the wakeup and the task running.
+
+At the end, the wake_lat synthetic event will show the wakeup to
+scheduled latency, as well as the irq latency in from hritmer_start to
+the wakeup. The problem is that I found this:
+
+          <idle>-0     [007] d...   190.485261: wake_lat: lat=27 irqlat=190485230 pid=698
+          <idle>-0     [005] d...   190.485283: wake_lat: lat=40 irqlat=190485239 pid=10
+          <idle>-0     [002] d...   190.488327: wake_lat: lat=56 irqlat=190488266 pid=335
+          <idle>-0     [005] d...   190.489330: wake_lat: lat=64 irqlat=190489262 pid=10
+          <idle>-0     [003] d...   190.490312: wake_lat: lat=43 irqlat=190490265 pid=77
+          <idle>-0     [005] d...   190.493322: wake_lat: lat=54 irqlat=190493262 pid=10
+          <idle>-0     [005] d...   190.497305: wake_lat: lat=35 irqlat=190497267 pid=10
+          <idle>-0     [005] d...   190.501319: wake_lat: lat=50 irqlat=190501264 pid=10
+
+The irqlat seemed quite large! Investigating this further, if I had
+enabled the irq_lat synthetic event, I noticed this:
+
+          <idle>-0     [002] d.s.   249.429308: irq_lat: lat=164968 pid=335
+          <idle>-0     [002] d...   249.429369: wake_lat: lat=55 irqlat=249429308 pid=335
+
+Notice that the timestamp of the irq_lat "249.429308" is awfully
+similar to the reported irqlat variable. In fact, all instances were
+like this. It appeared that:
+
+  irqlat=$irqlat
+
+Wasn't assigning the old $irqlat to the new irqlat variable, but
+instead was assigning the $irqts to it.
+
+The issue is that assigning the old $irqlat to the new irqlat variable
+creates a variable reference alias, but the alias creation code
+forgets to make sure the alias uses the same var_ref_idx to access the
+reference.
+
+Link: http://lkml.kernel.org/r/1567375321.5282.12.camel@kernel.org
+
+Cc: Linux Trace Devel <linux-trace-devel@vger.kernel.org>
+Cc: linux-rt-users <linux-rt-users@vger.kernel.org>
+Cc: stable@vger.kernel.org
+Fixes: 7e8b88a30b085 ("tracing: Add hist trigger support for variable reference aliases")
+Reported-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
+Signed-off-by: Tom Zanussi <zanussi@kernel.org>
+Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/trace/trace_events_hist.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/kernel/trace/trace_events_hist.c
++++ b/kernel/trace/trace_events_hist.c
+@@ -2785,6 +2785,8 @@ static struct hist_field *create_alias(s
+               return NULL;
+       }
+ 
++      alias->var_ref_idx = var_ref->var_ref_idx;
++
+       return alias;
+ }
+ 
diff --git a/queue-5.3/usercopy-avoid-highmem-pfn-warning.patch b/queue-5.3/usercopy-avoid-highmem-pfn-warning.patch

new file mode 100644 (file)

index 0000000..1039bba
--- /dev/null
+++ b/queue-5.3/usercopy-avoid-highmem-pfn-warning.patch
@@ -0,0 +1,88 @@
+From 314eed30ede02fa925990f535652254b5bad6b65 Mon Sep 17 00:00:00 2001
+From: Kees Cook <keescook@chromium.org>
+Date: Tue, 17 Sep 2019 11:00:25 -0700
+Subject: usercopy: Avoid HIGHMEM pfn warning
+
+From: Kees Cook <keescook@chromium.org>
+
+commit 314eed30ede02fa925990f535652254b5bad6b65 upstream.
+
+When running on a system with >512MB RAM with a 32-bit kernel built with:
+
+       CONFIG_DEBUG_VIRTUAL=y
+       CONFIG_HIGHMEM=y
+       CONFIG_HARDENED_USERCOPY=y
+
+all execve()s will fail due to argv copying into kmap()ed pages, and on
+usercopy checking the calls ultimately of virt_to_page() will be looking
+for "bad" kmap (highmem) pointers due to CONFIG_DEBUG_VIRTUAL=y:
+
+ ------------[ cut here ]------------
+ kernel BUG at ../arch/x86/mm/physaddr.c:83!
+ invalid opcode: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC
+ CPU: 1 PID: 1 Comm: swapper/0 Not tainted 5.3.0-rc8 #6
+ Hardware name: Dell Inc. Inspiron 1318/0C236D, BIOS A04 01/15/2009
+ EIP: __phys_addr+0xaf/0x100
+ ...
+ Call Trace:
+  __check_object_size+0xaf/0x3c0
+  ? __might_sleep+0x80/0xa0
+  copy_strings+0x1c2/0x370
+  copy_strings_kernel+0x2b/0x40
+  __do_execve_file+0x4ca/0x810
+  ? kmem_cache_alloc+0x1c7/0x370
+  do_execve+0x1b/0x20
+  ...
+
+The check is from arch/x86/mm/physaddr.c:
+
+       VIRTUAL_BUG_ON((phys_addr >> PAGE_SHIFT) > max_low_pfn);
+
+Due to the kmap() in fs/exec.c:
+
+               kaddr = kmap(kmapped_page);
+       ...
+       if (copy_from_user(kaddr+offset, str, bytes_to_copy)) ...
+
+Now we can fetch the correct page to avoid the pfn check. In both cases,
+hardened usercopy will need to walk the page-span checker (if enabled)
+to do sanity checking.
+
+Reported-by: Randy Dunlap <rdunlap@infradead.org>
+Tested-by: Randy Dunlap <rdunlap@infradead.org>
+Fixes: f5509cc18daa ("mm: Hardened usercopy")
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: stable@vger.kernel.org
+Signed-off-by: Kees Cook <keescook@chromium.org>
+Reviewed-by: Matthew Wilcox (Oracle) <willy@infradead.org>
+Link: https://lore.kernel.org/r/201909171056.7F2FFD17@keescook
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/usercopy.c |    8 +++++++-
+ 1 file changed, 7 insertions(+), 1 deletion(-)
+
+--- a/mm/usercopy.c
++++ b/mm/usercopy.c
+@@ -11,6 +11,7 @@
+ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+ 
+ #include <linux/mm.h>
++#include <linux/highmem.h>
+ #include <linux/slab.h>
+ #include <linux/sched.h>
+ #include <linux/sched/task.h>
+@@ -227,7 +228,12 @@ static inline void check_heap_object(con
+       if (!virt_addr_valid(ptr))
+               return;
+ 
+-      page = virt_to_head_page(ptr);
++      /*
++       * When CONFIG_HIGHMEM=y, kmap_to_page() will give either the
++       * highmem page or fallback to virt_to_page(). The following
++       * is effectively a highmem-aware virt_to_head_page().
++       */
++      page = compound_head(kmap_to_page((void *)ptr));
+ 
+       if (PageSlab(page)) {
+               /* Check slab allocator for flags and size. */
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Tue, 8 Oct 2019 07:44:23 +0000 (09:44 +0200)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Tue, 8 Oct 2019 07:44:23 +0000 (09:44 +0200)
queue-5.3/asoc-define-a-set-of-dapm-pre-post-up-events.patch	[new file with mode: 0644]	patch \| blob
queue-5.3/asoc-sgtl5000-improve-vag-power-and-mute-control.patch	[new file with mode: 0644]	patch \| blob
queue-5.3/can-mcp251x-mcp251x_hw_reset-allow-more-time-after-a-reset.patch	[new file with mode: 0644]	patch \| blob
queue-5.3/crypto-caam-fix-concurrency-issue-in-givencrypt-descriptor.patch	[new file with mode: 0644]	patch \| blob
queue-5.3/crypto-caam-qi-fix-error-handling-in-ern-handler.patch	[new file with mode: 0644]	patch \| blob
queue-5.3/crypto-cavium-zip-add-missing-single_release.patch	[new file with mode: 0644]	patch \| blob
queue-5.3/crypto-ccree-account-for-tee-not-ready-to-report.patch	[new file with mode: 0644]	patch \| blob
queue-5.3/crypto-ccree-use-the-full-crypt-length-value.patch	[new file with mode: 0644]	patch \| blob
queue-5.3/crypto-qat-silence-smp_processor_id-warning.patch	[new file with mode: 0644]	patch \| blob
queue-5.3/crypto-skcipher-unmap-pages-after-an-external-error.patch	[new file with mode: 0644]	patch \| blob
queue-5.3/kvm-ppc-book3s-enable-xive-native-capability-only-if-opal-has-required-functions.patch	[new file with mode: 0644]	patch \| blob
queue-5.3/kvm-ppc-book3s-hv-check-for-mmu-ready-on-piggybacked-virtual-cores.patch	[new file with mode: 0644]	patch \| blob
queue-5.3/kvm-ppc-book3s-hv-don-t-lose-pending-doorbell-request-on-migration-on-p9.patch	[new file with mode: 0644]	patch \| blob
queue-5.3/kvm-ppc-book3s-hv-don-t-push-xive-context-when-not-using-xive-device.patch	[new file with mode: 0644]	patch \| blob
queue-5.3/kvm-ppc-book3s-hv-fix-race-in-re-enabling-xive-escalation-interrupts.patch	[new file with mode: 0644]	patch \| blob
queue-5.3/kvm-ppc-book3s-hv-xive-free-escalation-interrupts-before-disabling-the-vp.patch	[new file with mode: 0644]	patch \| blob
queue-5.3/kvm-s390-fix-__insn32_query-inline-assembly.patch	[new file with mode: 0644]	patch \| blob
queue-5.3/kvm-s390-test-for-bad-access-register-and-size-at-the-start-of-s390_mem_op.patch	[new file with mode: 0644]	patch \| blob
queue-5.3/kvm-x86-fix-userspace-set-invalid-cr4.patch	[new file with mode: 0644]	patch \| blob
queue-5.3/mips-treat-loongson-extensions-as-ases.patch	[new file with mode: 0644]	patch \| blob
queue-5.3/nbd-fix-max-number-of-supported-devs.patch	[new file with mode: 0644]	patch \| blob
queue-5.3/pm-devfreq-tegra-fix-khz-to-hz-conversion.patch	[new file with mode: 0644]	patch \| blob
queue-5.3/power-supply-sbs-battery-only-return-health-when-battery-present.patch	[new file with mode: 0644]	patch \| blob
queue-5.3/power-supply-sbs-battery-use-correct-flags-field.patch	[new file with mode: 0644]	patch \| blob
queue-5.3/powerpc-32s-fix-boot-failure-with-debug_pagealloc-without-kasan.patch	[new file with mode: 0644]	patch \| blob
queue-5.3/powerpc-603-fix-handling-of-the-dirty-flag.patch	[new file with mode: 0644]	patch \| blob
queue-5.3/powerpc-book3s64-mm-don-t-do-tlbie-fixup-for-some-hardware-revisions.patch	[new file with mode: 0644]	patch \| blob
queue-5.3/powerpc-book3s64-radix-rename-cpu_ftr_p9_tlbie_bug-feature-flag.patch	[new file with mode: 0644]	patch \| blob
queue-5.3/powerpc-kasan-fix-parallel-loading-of-modules.patch	[new file with mode: 0644]	patch \| blob
queue-5.3/powerpc-kasan-fix-shadow-area-set-up-for-modules.patch	[new file with mode: 0644]	patch \| blob
queue-5.3/powerpc-mce-fix-mce-handling-for-huge-pages.patch	[new file with mode: 0644]	patch \| blob
queue-5.3/powerpc-mce-schedule-work-from-irq_work.patch	[new file with mode: 0644]	patch \| blob
queue-5.3/powerpc-mm-add-a-helper-to-select-page_kernel_ro-or-page_readonly.patch	[new file with mode: 0644]	patch \| blob
queue-5.3/powerpc-mm-fix-an-oops-in-kasan_mmu_init.patch	[new file with mode: 0644]	patch \| blob
queue-5.3/powerpc-mm-fixup-tlbie-vs-mtpidr-mtlpidr-ordering-issue-on-power9.patch	[new file with mode: 0644]	patch \| blob
queue-5.3/powerpc-powernv-ioda-fix-race-in-tce-level-allocation.patch	[new file with mode: 0644]	patch \| blob
queue-5.3/powerpc-powernv-restrict-opal-symbol-map-to-only-be-readable-by-root.patch	[new file with mode: 0644]	patch \| blob
queue-5.3/powerpc-pseries-fix-cpu_hotplug_lock-acquisition-in-resize_hpt.patch	[new file with mode: 0644]	patch \| blob
queue-5.3/powerpc-ptdump-fix-addresses-display-on-ppc32.patch	[new file with mode: 0644]	patch \| blob
queue-5.3/powerpc-xive-implement-get_irqchip_state-method-for-xive-to-fix-shutdown-race.patch	[new file with mode: 0644]	patch \| blob
queue-5.3/revert-s390-dasd-add-discard-support-for-ese-volumes.patch	[new file with mode: 0644]	patch \| blob
queue-5.3/s390-cio-avoid-calling-strlen-on-null-pointer.patch	[new file with mode: 0644]	patch \| blob
queue-5.3/s390-cio-exclude-subchannels-with-no-parent-from-pseudo-check.patch	[new file with mode: 0644]	patch \| blob
queue-5.3/s390-dasd-fix-error-handling-during-online-processing.patch	[new file with mode: 0644]	patch \| blob
queue-5.3/s390-process-avoid-potential-reading-of-freed-stack.patch	[new file with mode: 0644]	patch \| blob
queue-5.3/s390-sclp-fix-bit-checked-for-has_sipl.patch	[new file with mode: 0644]	patch \| blob
queue-5.3/s390-topology-avoid-firing-events-before-kobjs-are-created.patch	[new file with mode: 0644]	patch \| blob
queue-5.3/series	[new file with mode: 0644]	patch \| blob
queue-5.3/timer-read-jiffies-once-when-forwarding-base-clk.patch	[new file with mode: 0644]	patch \| blob
queue-5.3/tools-lib-traceevent-do-not-free-tep-cmdlines-in-add_new_comm-on-failure.patch	[new file with mode: 0644]	patch \| blob
queue-5.3/tools-lib-traceevent-fix-robust-test-of-do_generate_dynamic_list_file.patch	[new file with mode: 0644]	patch \| blob
queue-5.3/tracing-make-sure-variable-reference-alias-has-correct-var_ref_idx.patch	[new file with mode: 0644]	patch \| blob
queue-5.3/usercopy-avoid-highmem-pfn-warning.patch	[new file with mode: 0644]	patch \| blob