From: Greg Kroah-Hartman Date: Fri, 8 Nov 2019 16:56:24 +0000 (+0100) Subject: 4.14-stable patches X-Git-Tag: v4.4.200~3 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=b051d508df02cf751a5d235f2ae6ab1079b41f47;p=thirdparty%2Fkernel%2Fstable-queue.git 4.14-stable patches added patches: iio-adc-stm32-adc-fix-a-race-when-using-several-adcs-with-dma-and-irq.patch iio-adc-stm32-adc-move-registers-definitions.patch powerpc-book3s64-mm-don-t-do-tlbie-fixup-for-some-hardware-revisions.patch powerpc-book3s64-radix-rename-cpu_ftr_p9_tlbie_bug-feature-flag.patch powerpc-mm-fixup-tlbie-vs-mtpidr-mtlpidr-ordering-issue-on-power9.patch powerpc-mm-fixup-tlbie-vs-store-ordering-issue-on-power9.patch selftests-powerpc-add-test-case-for-tlbie-vs-mtpidr-ordering-issue.patch selftests-powerpc-fix-compile-error-on-tlbie_test-due-to-newer-gcc.patch --- diff --git a/queue-4.14/iio-adc-stm32-adc-fix-a-race-when-using-several-adcs-with-dma-and-irq.patch b/queue-4.14/iio-adc-stm32-adc-fix-a-race-when-using-several-adcs-with-dma-and-irq.patch new file mode 100644 index 00000000000..ef94b70f6d7 --- /dev/null +++ b/queue-4.14/iio-adc-stm32-adc-fix-a-race-when-using-several-adcs-with-dma-and-irq.patch @@ -0,0 +1,130 @@ +From dcb10920179ab74caf88a6f2afadecfc2743b910 Mon Sep 17 00:00:00 2001 +From: Fabrice Gasnier +Date: Tue, 17 Sep 2019 14:38:16 +0200 +Subject: iio: adc: stm32-adc: fix a race when using several adcs with dma and irq + +From: Fabrice Gasnier + +commit dcb10920179ab74caf88a6f2afadecfc2743b910 upstream. + +End of conversion may be handled by using IRQ or DMA. There may be a +race when two conversions complete at the same time on several ADCs. +EOC can be read as 'set' for several ADCs, with: +- an ADC configured to use IRQs. EOCIE bit is set. The handler is normally + called in this case. +- an ADC configured to use DMA. EOCIE bit isn't set. EOC triggers the DMA + request instead. It's then automatically cleared by DMA read. But the + handler gets called due to status bit is temporarily set (IRQ triggered + by the other ADC). +So both EOC status bit in CSR and EOCIE control bit must be checked +before invoking the interrupt handler (e.g. call ISR only for +IRQ-enabled ADCs). + +Fixes: 2763ea0585c9 ("iio: adc: stm32: add optional dma support") + +Signed-off-by: Fabrice Gasnier +Cc: +Signed-off-by: Jonathan Cameron +Signed-off-by: Greg Kroah-Hartman + + +--- + drivers/iio/adc/stm32-adc-core.c | 43 ++++++++++++++++++++++++++++++++++++--- + drivers/iio/adc/stm32-adc-core.h | 1 + 2 files changed, 41 insertions(+), 3 deletions(-) + +--- a/drivers/iio/adc/stm32-adc-core.c ++++ b/drivers/iio/adc/stm32-adc-core.c +@@ -45,12 +45,16 @@ + * @eoc1: adc1 end of conversion flag in @csr + * @eoc2: adc2 end of conversion flag in @csr + * @eoc3: adc3 end of conversion flag in @csr ++ * @ier: interrupt enable register offset for each adc ++ * @eocie_msk: end of conversion interrupt enable mask in @ier + */ + struct stm32_adc_common_regs { + u32 csr; + u32 eoc1_msk; + u32 eoc2_msk; + u32 eoc3_msk; ++ u32 ier; ++ u32 eocie_msk; + }; + + struct stm32_adc_priv; +@@ -244,6 +248,8 @@ static const struct stm32_adc_common_reg + .eoc1_msk = STM32F4_EOC1, + .eoc2_msk = STM32F4_EOC2, + .eoc3_msk = STM32F4_EOC3, ++ .ier = STM32F4_ADC_CR1, ++ .eocie_msk = STM32F4_EOCIE, + }; + + /* STM32H7 common registers definitions */ +@@ -251,8 +257,24 @@ static const struct stm32_adc_common_reg + .csr = STM32H7_ADC_CSR, + .eoc1_msk = STM32H7_EOC_MST, + .eoc2_msk = STM32H7_EOC_SLV, ++ .ier = STM32H7_ADC_IER, ++ .eocie_msk = STM32H7_EOCIE, + }; + ++static const unsigned int stm32_adc_offset[STM32_ADC_MAX_ADCS] = { ++ 0, STM32_ADC_OFFSET, STM32_ADC_OFFSET * 2, ++}; ++ ++static unsigned int stm32_adc_eoc_enabled(struct stm32_adc_priv *priv, ++ unsigned int adc) ++{ ++ u32 ier, offset = stm32_adc_offset[adc]; ++ ++ ier = readl_relaxed(priv->common.base + offset + priv->cfg->regs->ier); ++ ++ return ier & priv->cfg->regs->eocie_msk; ++} ++ + /* ADC common interrupt for all instances */ + static void stm32_adc_irq_handler(struct irq_desc *desc) + { +@@ -263,13 +285,28 @@ static void stm32_adc_irq_handler(struct + chained_irq_enter(chip, desc); + status = readl_relaxed(priv->common.base + priv->cfg->regs->csr); + +- if (status & priv->cfg->regs->eoc1_msk) ++ /* ++ * End of conversion may be handled by using IRQ or DMA. There may be a ++ * race here when two conversions complete at the same time on several ++ * ADCs. EOC may be read 'set' for several ADCs, with: ++ * - an ADC configured to use DMA (EOC triggers the DMA request, and ++ * is then automatically cleared by DR read in hardware) ++ * - an ADC configured to use IRQs (EOCIE bit is set. The handler must ++ * be called in this case) ++ * So both EOC status bit in CSR and EOCIE control bit must be checked ++ * before invoking the interrupt handler (e.g. call ISR only for ++ * IRQ-enabled ADCs). ++ */ ++ if (status & priv->cfg->regs->eoc1_msk && ++ stm32_adc_eoc_enabled(priv, 0)) + generic_handle_irq(irq_find_mapping(priv->domain, 0)); + +- if (status & priv->cfg->regs->eoc2_msk) ++ if (status & priv->cfg->regs->eoc2_msk && ++ stm32_adc_eoc_enabled(priv, 1)) + generic_handle_irq(irq_find_mapping(priv->domain, 1)); + +- if (status & priv->cfg->regs->eoc3_msk) ++ if (status & priv->cfg->regs->eoc3_msk && ++ stm32_adc_eoc_enabled(priv, 2)) + generic_handle_irq(irq_find_mapping(priv->domain, 2)); + + chained_irq_exit(chip, desc); +--- a/drivers/iio/adc/stm32-adc-core.h ++++ b/drivers/iio/adc/stm32-adc-core.h +@@ -37,6 +37,7 @@ + * -------------------------------------------------------- + */ + #define STM32_ADC_MAX_ADCS 3 ++#define STM32_ADC_OFFSET 0x100 + #define STM32_ADCX_COMN_OFFSET 0x300 + + /* STM32F4 - Registers for each ADC instance */ diff --git a/queue-4.14/iio-adc-stm32-adc-move-registers-definitions.patch b/queue-4.14/iio-adc-stm32-adc-move-registers-definitions.patch new file mode 100644 index 00000000000..20acb9379f5 --- /dev/null +++ b/queue-4.14/iio-adc-stm32-adc-move-registers-definitions.patch @@ -0,0 +1,327 @@ +From 31922f62bb527d749b99dbc776e514bcba29b7fe Mon Sep 17 00:00:00 2001 +From: Fabrice Gasnier +Date: Tue, 17 Sep 2019 14:38:15 +0200 +Subject: iio: adc: stm32-adc: move registers definitions + +From: Fabrice Gasnier + +commit 31922f62bb527d749b99dbc776e514bcba29b7fe upstream. + +Move STM32 ADC registers definitions to common header. +This is precursor patch to: +- iio: adc: stm32-adc: fix a race when using several adcs with dma and irq + +It keeps registers definitions as a whole block, to ease readability and +allow simple access path to EOC bits (readl) in stm32-adc-core driver. + +Fixes: 2763ea0585c9 ("iio: adc: stm32: add optional dma support") + +Signed-off-by: Fabrice Gasnier +Cc: +Signed-off-by: Jonathan Cameron +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/iio/adc/stm32-adc-core.c | 27 ------- + drivers/iio/adc/stm32-adc-core.h | 134 +++++++++++++++++++++++++++++++++++++++ + drivers/iio/adc/stm32-adc.c | 107 ------------------------------- + 3 files changed, 134 insertions(+), 134 deletions(-) + +--- a/drivers/iio/adc/stm32-adc-core.c ++++ b/drivers/iio/adc/stm32-adc-core.c +@@ -33,36 +33,9 @@ + + #include "stm32-adc-core.h" + +-/* STM32F4 - common registers for all ADC instances: 1, 2 & 3 */ +-#define STM32F4_ADC_CSR (STM32_ADCX_COMN_OFFSET + 0x00) +-#define STM32F4_ADC_CCR (STM32_ADCX_COMN_OFFSET + 0x04) +- +-/* STM32F4_ADC_CSR - bit fields */ +-#define STM32F4_EOC3 BIT(17) +-#define STM32F4_EOC2 BIT(9) +-#define STM32F4_EOC1 BIT(1) +- +-/* STM32F4_ADC_CCR - bit fields */ +-#define STM32F4_ADC_ADCPRE_SHIFT 16 +-#define STM32F4_ADC_ADCPRE_MASK GENMASK(17, 16) +- + /* STM32 F4 maximum analog clock rate (from datasheet) */ + #define STM32F4_ADC_MAX_CLK_RATE 36000000 + +-/* STM32H7 - common registers for all ADC instances */ +-#define STM32H7_ADC_CSR (STM32_ADCX_COMN_OFFSET + 0x00) +-#define STM32H7_ADC_CCR (STM32_ADCX_COMN_OFFSET + 0x08) +- +-/* STM32H7_ADC_CSR - bit fields */ +-#define STM32H7_EOC_SLV BIT(18) +-#define STM32H7_EOC_MST BIT(2) +- +-/* STM32H7_ADC_CCR - bit fields */ +-#define STM32H7_PRESC_SHIFT 18 +-#define STM32H7_PRESC_MASK GENMASK(21, 18) +-#define STM32H7_CKMODE_SHIFT 16 +-#define STM32H7_CKMODE_MASK GENMASK(17, 16) +- + /* STM32 H7 maximum analog clock rate (from datasheet) */ + #define STM32H7_ADC_MAX_CLK_RATE 36000000 + +--- a/drivers/iio/adc/stm32-adc-core.h ++++ b/drivers/iio/adc/stm32-adc-core.h +@@ -39,6 +39,140 @@ + #define STM32_ADC_MAX_ADCS 3 + #define STM32_ADCX_COMN_OFFSET 0x300 + ++/* STM32F4 - Registers for each ADC instance */ ++#define STM32F4_ADC_SR 0x00 ++#define STM32F4_ADC_CR1 0x04 ++#define STM32F4_ADC_CR2 0x08 ++#define STM32F4_ADC_SMPR1 0x0C ++#define STM32F4_ADC_SMPR2 0x10 ++#define STM32F4_ADC_HTR 0x24 ++#define STM32F4_ADC_LTR 0x28 ++#define STM32F4_ADC_SQR1 0x2C ++#define STM32F4_ADC_SQR2 0x30 ++#define STM32F4_ADC_SQR3 0x34 ++#define STM32F4_ADC_JSQR 0x38 ++#define STM32F4_ADC_JDR1 0x3C ++#define STM32F4_ADC_JDR2 0x40 ++#define STM32F4_ADC_JDR3 0x44 ++#define STM32F4_ADC_JDR4 0x48 ++#define STM32F4_ADC_DR 0x4C ++ ++/* STM32F4 - common registers for all ADC instances: 1, 2 & 3 */ ++#define STM32F4_ADC_CSR (STM32_ADCX_COMN_OFFSET + 0x00) ++#define STM32F4_ADC_CCR (STM32_ADCX_COMN_OFFSET + 0x04) ++ ++/* STM32F4_ADC_SR - bit fields */ ++#define STM32F4_STRT BIT(4) ++#define STM32F4_EOC BIT(1) ++ ++/* STM32F4_ADC_CR1 - bit fields */ ++#define STM32F4_RES_SHIFT 24 ++#define STM32F4_RES_MASK GENMASK(25, 24) ++#define STM32F4_SCAN BIT(8) ++#define STM32F4_EOCIE BIT(5) ++ ++/* STM32F4_ADC_CR2 - bit fields */ ++#define STM32F4_SWSTART BIT(30) ++#define STM32F4_EXTEN_SHIFT 28 ++#define STM32F4_EXTEN_MASK GENMASK(29, 28) ++#define STM32F4_EXTSEL_SHIFT 24 ++#define STM32F4_EXTSEL_MASK GENMASK(27, 24) ++#define STM32F4_EOCS BIT(10) ++#define STM32F4_DDS BIT(9) ++#define STM32F4_DMA BIT(8) ++#define STM32F4_ADON BIT(0) ++ ++/* STM32F4_ADC_CSR - bit fields */ ++#define STM32F4_EOC3 BIT(17) ++#define STM32F4_EOC2 BIT(9) ++#define STM32F4_EOC1 BIT(1) ++ ++/* STM32F4_ADC_CCR - bit fields */ ++#define STM32F4_ADC_ADCPRE_SHIFT 16 ++#define STM32F4_ADC_ADCPRE_MASK GENMASK(17, 16) ++ ++/* STM32H7 - Registers for each ADC instance */ ++#define STM32H7_ADC_ISR 0x00 ++#define STM32H7_ADC_IER 0x04 ++#define STM32H7_ADC_CR 0x08 ++#define STM32H7_ADC_CFGR 0x0C ++#define STM32H7_ADC_SMPR1 0x14 ++#define STM32H7_ADC_SMPR2 0x18 ++#define STM32H7_ADC_PCSEL 0x1C ++#define STM32H7_ADC_SQR1 0x30 ++#define STM32H7_ADC_SQR2 0x34 ++#define STM32H7_ADC_SQR3 0x38 ++#define STM32H7_ADC_SQR4 0x3C ++#define STM32H7_ADC_DR 0x40 ++#define STM32H7_ADC_CALFACT 0xC4 ++#define STM32H7_ADC_CALFACT2 0xC8 ++ ++/* STM32H7 - common registers for all ADC instances */ ++#define STM32H7_ADC_CSR (STM32_ADCX_COMN_OFFSET + 0x00) ++#define STM32H7_ADC_CCR (STM32_ADCX_COMN_OFFSET + 0x08) ++ ++/* STM32H7_ADC_ISR - bit fields */ ++#define STM32H7_EOC BIT(2) ++#define STM32H7_ADRDY BIT(0) ++ ++/* STM32H7_ADC_IER - bit fields */ ++#define STM32H7_EOCIE STM32H7_EOC ++ ++/* STM32H7_ADC_CR - bit fields */ ++#define STM32H7_ADCAL BIT(31) ++#define STM32H7_ADCALDIF BIT(30) ++#define STM32H7_DEEPPWD BIT(29) ++#define STM32H7_ADVREGEN BIT(28) ++#define STM32H7_LINCALRDYW6 BIT(27) ++#define STM32H7_LINCALRDYW5 BIT(26) ++#define STM32H7_LINCALRDYW4 BIT(25) ++#define STM32H7_LINCALRDYW3 BIT(24) ++#define STM32H7_LINCALRDYW2 BIT(23) ++#define STM32H7_LINCALRDYW1 BIT(22) ++#define STM32H7_ADCALLIN BIT(16) ++#define STM32H7_BOOST BIT(8) ++#define STM32H7_ADSTP BIT(4) ++#define STM32H7_ADSTART BIT(2) ++#define STM32H7_ADDIS BIT(1) ++#define STM32H7_ADEN BIT(0) ++ ++/* STM32H7_ADC_CFGR bit fields */ ++#define STM32H7_EXTEN_SHIFT 10 ++#define STM32H7_EXTEN_MASK GENMASK(11, 10) ++#define STM32H7_EXTSEL_SHIFT 5 ++#define STM32H7_EXTSEL_MASK GENMASK(9, 5) ++#define STM32H7_RES_SHIFT 2 ++#define STM32H7_RES_MASK GENMASK(4, 2) ++#define STM32H7_DMNGT_SHIFT 0 ++#define STM32H7_DMNGT_MASK GENMASK(1, 0) ++ ++enum stm32h7_adc_dmngt { ++ STM32H7_DMNGT_DR_ONLY, /* Regular data in DR only */ ++ STM32H7_DMNGT_DMA_ONESHOT, /* DMA one shot mode */ ++ STM32H7_DMNGT_DFSDM, /* DFSDM mode */ ++ STM32H7_DMNGT_DMA_CIRC, /* DMA circular mode */ ++}; ++ ++/* STM32H7_ADC_CALFACT - bit fields */ ++#define STM32H7_CALFACT_D_SHIFT 16 ++#define STM32H7_CALFACT_D_MASK GENMASK(26, 16) ++#define STM32H7_CALFACT_S_SHIFT 0 ++#define STM32H7_CALFACT_S_MASK GENMASK(10, 0) ++ ++/* STM32H7_ADC_CALFACT2 - bit fields */ ++#define STM32H7_LINCALFACT_SHIFT 0 ++#define STM32H7_LINCALFACT_MASK GENMASK(29, 0) ++ ++/* STM32H7_ADC_CSR - bit fields */ ++#define STM32H7_EOC_SLV BIT(18) ++#define STM32H7_EOC_MST BIT(2) ++ ++/* STM32H7_ADC_CCR - bit fields */ ++#define STM32H7_PRESC_SHIFT 18 ++#define STM32H7_PRESC_MASK GENMASK(21, 18) ++#define STM32H7_CKMODE_SHIFT 16 ++#define STM32H7_CKMODE_MASK GENMASK(17, 16) ++ + /** + * struct stm32_adc_common - stm32 ADC driver common data (for all instances) + * @base: control registers base cpu addr +--- a/drivers/iio/adc/stm32-adc.c ++++ b/drivers/iio/adc/stm32-adc.c +@@ -40,113 +40,6 @@ + + #include "stm32-adc-core.h" + +-/* STM32F4 - Registers for each ADC instance */ +-#define STM32F4_ADC_SR 0x00 +-#define STM32F4_ADC_CR1 0x04 +-#define STM32F4_ADC_CR2 0x08 +-#define STM32F4_ADC_SMPR1 0x0C +-#define STM32F4_ADC_SMPR2 0x10 +-#define STM32F4_ADC_HTR 0x24 +-#define STM32F4_ADC_LTR 0x28 +-#define STM32F4_ADC_SQR1 0x2C +-#define STM32F4_ADC_SQR2 0x30 +-#define STM32F4_ADC_SQR3 0x34 +-#define STM32F4_ADC_JSQR 0x38 +-#define STM32F4_ADC_JDR1 0x3C +-#define STM32F4_ADC_JDR2 0x40 +-#define STM32F4_ADC_JDR3 0x44 +-#define STM32F4_ADC_JDR4 0x48 +-#define STM32F4_ADC_DR 0x4C +- +-/* STM32F4_ADC_SR - bit fields */ +-#define STM32F4_STRT BIT(4) +-#define STM32F4_EOC BIT(1) +- +-/* STM32F4_ADC_CR1 - bit fields */ +-#define STM32F4_RES_SHIFT 24 +-#define STM32F4_RES_MASK GENMASK(25, 24) +-#define STM32F4_SCAN BIT(8) +-#define STM32F4_EOCIE BIT(5) +- +-/* STM32F4_ADC_CR2 - bit fields */ +-#define STM32F4_SWSTART BIT(30) +-#define STM32F4_EXTEN_SHIFT 28 +-#define STM32F4_EXTEN_MASK GENMASK(29, 28) +-#define STM32F4_EXTSEL_SHIFT 24 +-#define STM32F4_EXTSEL_MASK GENMASK(27, 24) +-#define STM32F4_EOCS BIT(10) +-#define STM32F4_DDS BIT(9) +-#define STM32F4_DMA BIT(8) +-#define STM32F4_ADON BIT(0) +- +-/* STM32H7 - Registers for each ADC instance */ +-#define STM32H7_ADC_ISR 0x00 +-#define STM32H7_ADC_IER 0x04 +-#define STM32H7_ADC_CR 0x08 +-#define STM32H7_ADC_CFGR 0x0C +-#define STM32H7_ADC_SMPR1 0x14 +-#define STM32H7_ADC_SMPR2 0x18 +-#define STM32H7_ADC_PCSEL 0x1C +-#define STM32H7_ADC_SQR1 0x30 +-#define STM32H7_ADC_SQR2 0x34 +-#define STM32H7_ADC_SQR3 0x38 +-#define STM32H7_ADC_SQR4 0x3C +-#define STM32H7_ADC_DR 0x40 +-#define STM32H7_ADC_CALFACT 0xC4 +-#define STM32H7_ADC_CALFACT2 0xC8 +- +-/* STM32H7_ADC_ISR - bit fields */ +-#define STM32H7_EOC BIT(2) +-#define STM32H7_ADRDY BIT(0) +- +-/* STM32H7_ADC_IER - bit fields */ +-#define STM32H7_EOCIE STM32H7_EOC +- +-/* STM32H7_ADC_CR - bit fields */ +-#define STM32H7_ADCAL BIT(31) +-#define STM32H7_ADCALDIF BIT(30) +-#define STM32H7_DEEPPWD BIT(29) +-#define STM32H7_ADVREGEN BIT(28) +-#define STM32H7_LINCALRDYW6 BIT(27) +-#define STM32H7_LINCALRDYW5 BIT(26) +-#define STM32H7_LINCALRDYW4 BIT(25) +-#define STM32H7_LINCALRDYW3 BIT(24) +-#define STM32H7_LINCALRDYW2 BIT(23) +-#define STM32H7_LINCALRDYW1 BIT(22) +-#define STM32H7_ADCALLIN BIT(16) +-#define STM32H7_BOOST BIT(8) +-#define STM32H7_ADSTP BIT(4) +-#define STM32H7_ADSTART BIT(2) +-#define STM32H7_ADDIS BIT(1) +-#define STM32H7_ADEN BIT(0) +- +-/* STM32H7_ADC_CFGR bit fields */ +-#define STM32H7_EXTEN_SHIFT 10 +-#define STM32H7_EXTEN_MASK GENMASK(11, 10) +-#define STM32H7_EXTSEL_SHIFT 5 +-#define STM32H7_EXTSEL_MASK GENMASK(9, 5) +-#define STM32H7_RES_SHIFT 2 +-#define STM32H7_RES_MASK GENMASK(4, 2) +-#define STM32H7_DMNGT_SHIFT 0 +-#define STM32H7_DMNGT_MASK GENMASK(1, 0) +- +-enum stm32h7_adc_dmngt { +- STM32H7_DMNGT_DR_ONLY, /* Regular data in DR only */ +- STM32H7_DMNGT_DMA_ONESHOT, /* DMA one shot mode */ +- STM32H7_DMNGT_DFSDM, /* DFSDM mode */ +- STM32H7_DMNGT_DMA_CIRC, /* DMA circular mode */ +-}; +- +-/* STM32H7_ADC_CALFACT - bit fields */ +-#define STM32H7_CALFACT_D_SHIFT 16 +-#define STM32H7_CALFACT_D_MASK GENMASK(26, 16) +-#define STM32H7_CALFACT_S_SHIFT 0 +-#define STM32H7_CALFACT_S_MASK GENMASK(10, 0) +- +-/* STM32H7_ADC_CALFACT2 - bit fields */ +-#define STM32H7_LINCALFACT_SHIFT 0 +-#define STM32H7_LINCALFACT_MASK GENMASK(29, 0) +- + /* Number of linear calibration shadow registers / LINCALRDYW control bits */ + #define STM32H7_LINCALFACT_NUM 6 + diff --git a/queue-4.14/powerpc-book3s64-mm-don-t-do-tlbie-fixup-for-some-hardware-revisions.patch b/queue-4.14/powerpc-book3s64-mm-don-t-do-tlbie-fixup-for-some-hardware-revisions.patch new file mode 100644 index 00000000000..0bc57ace07f --- /dev/null +++ b/queue-4.14/powerpc-book3s64-mm-don-t-do-tlbie-fixup-for-some-hardware-revisions.patch @@ -0,0 +1,83 @@ +From foo@baz Fri 08 Nov 2019 05:55:09 PM CET +From: Sandipan Das +Date: Thu, 17 Oct 2019 13:35:01 +0530 +Subject: powerpc/book3s64/mm: Don't do tlbie fixup for some hardware revisions +To: gregkh@linuxfoundation.org +Cc: stable@vger.kernel.org, aneesh.kumar@linux.ibm.com, mpe@ellerman.id.au, linuxppc-dev@lists.ozlabs.org +Message-ID: <20191017080505.8348-2-sandipan@linux.ibm.com> + +From: "Aneesh Kumar K.V" + +commit 677733e296b5c7a37c47da391fc70a43dc40bd67 upstream. + +The store ordering vs tlbie issue mentioned in commit +a5d4b5891c2f ("powerpc/mm: Fixup tlbie vs store ordering issue on +POWER9") is fixed for Nimbus 2.3 and Cumulus 1.3 revisions. We don't +need to apply the fixup if we are running on them + +We can only do this on PowerNV. On pseries guest with kvm we still +don't support redoing the feature fixup after migration. So we should +be enabling all the workarounds needed, because whe can possibly +migrate between DD 2.3 and DD 2.2 + +Cc: stable@vger.kernel.org # v4.14 +Fixes: a5d4b5891c2f ("powerpc/mm: Fixup tlbie vs store ordering issue on POWER9") +Signed-off-by: Aneesh Kumar K.V +Signed-off-by: Michael Ellerman +Link: https://lore.kernel.org/r/20190924035254.24612-1-aneesh.kumar@linux.ibm.com +[sandipan: Backported to v4.14] +Signed-off-by: Sandipan Das +Signed-off-by: Greg Kroah-Hartman +--- + arch/powerpc/kernel/dt_cpu_ftrs.c | 31 ++++++++++++++++++++++++++++--- + 1 file changed, 28 insertions(+), 3 deletions(-) + +--- a/arch/powerpc/kernel/dt_cpu_ftrs.c ++++ b/arch/powerpc/kernel/dt_cpu_ftrs.c +@@ -733,9 +733,35 @@ static bool __init cpufeatures_process_f + return true; + } + ++/* ++ * Handle POWER9 broadcast tlbie invalidation issue using ++ * cpu feature flag. ++ */ ++static __init void update_tlbie_feature_flag(unsigned long pvr) ++{ ++ if (PVR_VER(pvr) == PVR_POWER9) { ++ /* ++ * Set the tlbie feature flag for anything below ++ * Nimbus DD 2.3 and Cumulus DD 1.3 ++ */ ++ if ((pvr & 0xe000) == 0) { ++ /* Nimbus */ ++ if ((pvr & 0xfff) < 0x203) ++ cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_BUG; ++ } else if ((pvr & 0xc000) == 0) { ++ /* Cumulus */ ++ if ((pvr & 0xfff) < 0x103) ++ cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_BUG; ++ } else { ++ WARN_ONCE(1, "Unknown PVR"); ++ cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_BUG; ++ } ++ } ++} ++ + static __init void cpufeatures_cpu_quirks(void) + { +- int version = mfspr(SPRN_PVR); ++ unsigned long version = mfspr(SPRN_PVR); + + /* + * Not all quirks can be derived from the cpufeatures device tree. +@@ -743,8 +769,7 @@ static __init void cpufeatures_cpu_quirk + if ((version & 0xffffff00) == 0x004e0100) + cur_cpu_spec->cpu_features |= CPU_FTR_POWER9_DD1; + +- if ((version & 0xffff0000) == 0x004e0000) +- cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_BUG; ++ update_tlbie_feature_flag(version); + } + + static void __init cpufeatures_setup_finished(void) diff --git a/queue-4.14/powerpc-book3s64-radix-rename-cpu_ftr_p9_tlbie_bug-feature-flag.patch b/queue-4.14/powerpc-book3s64-radix-rename-cpu_ftr_p9_tlbie_bug-feature-flag.patch new file mode 100644 index 00000000000..ca16b44695a --- /dev/null +++ b/queue-4.14/powerpc-book3s64-radix-rename-cpu_ftr_p9_tlbie_bug-feature-flag.patch @@ -0,0 +1,117 @@ +From foo@baz Fri 08 Nov 2019 05:55:09 PM CET +From: Sandipan Das +Date: Thu, 17 Oct 2019 13:35:02 +0530 +Subject: powerpc/book3s64/radix: Rename CPU_FTR_P9_TLBIE_BUG feature flag +To: gregkh@linuxfoundation.org +Cc: stable@vger.kernel.org, aneesh.kumar@linux.ibm.com, mpe@ellerman.id.au, linuxppc-dev@lists.ozlabs.org +Message-ID: <20191017080505.8348-3-sandipan@linux.ibm.com> + +From: "Aneesh Kumar K.V" + +commit 09ce98cacd51fcd0fa0af2f79d1e1d3192f4cbb0 upstream. + +Rename the #define to indicate this is related to store vs tlbie +ordering issue. In the next patch, we will be adding another feature +flag that is used to handles ERAT flush vs tlbie ordering issue. + +Cc: stable@vger.kernel.org # v4.14 +Fixes: a5d4b5891c2f ("powerpc/mm: Fixup tlbie vs store ordering issue on POWER9") +Signed-off-by: Aneesh Kumar K.V +Signed-off-by: Michael Ellerman +Link: https://lore.kernel.org/r/20190924035254.24612-2-aneesh.kumar@linux.ibm.com +[sandipan: Backported to v4.14] +Signed-off-by: Sandipan Das +Signed-off-by: Greg Kroah-Hartman +--- + arch/powerpc/include/asm/cputable.h | 4 ++-- + arch/powerpc/kernel/dt_cpu_ftrs.c | 6 +++--- + arch/powerpc/kvm/book3s_64_mmu_radix.c | 2 +- + arch/powerpc/kvm/book3s_hv_rm_mmu.c | 2 +- + arch/powerpc/mm/hash_native_64.c | 2 +- + arch/powerpc/mm/tlb-radix.c | 2 +- + 6 files changed, 9 insertions(+), 9 deletions(-) + +--- a/arch/powerpc/include/asm/cputable.h ++++ b/arch/powerpc/include/asm/cputable.h +@@ -215,7 +215,7 @@ enum { + #define CPU_FTR_DAWR LONG_ASM_CONST(0x0400000000000000) + #define CPU_FTR_DABRX LONG_ASM_CONST(0x0800000000000000) + #define CPU_FTR_PMAO_BUG LONG_ASM_CONST(0x1000000000000000) +-#define CPU_FTR_P9_TLBIE_BUG LONG_ASM_CONST(0x2000000000000000) ++#define CPU_FTR_P9_TLBIE_STQ_BUG LONG_ASM_CONST(0x0000400000000000) + #define CPU_FTR_POWER9_DD1 LONG_ASM_CONST(0x4000000000000000) + + #ifndef __ASSEMBLY__ +@@ -477,7 +477,7 @@ enum { + CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY | \ + CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_DAWR | \ + CPU_FTR_ARCH_207S | CPU_FTR_TM_COMP | CPU_FTR_ARCH_300 | \ +- CPU_FTR_P9_TLBIE_BUG) ++ CPU_FTR_P9_TLBIE_STQ_BUG) + #define CPU_FTRS_POWER9_DD1 ((CPU_FTRS_POWER9 | CPU_FTR_POWER9_DD1) & \ + (~CPU_FTR_SAO)) + #define CPU_FTRS_CELL (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \ +--- a/arch/powerpc/kernel/dt_cpu_ftrs.c ++++ b/arch/powerpc/kernel/dt_cpu_ftrs.c +@@ -747,14 +747,14 @@ static __init void update_tlbie_feature_ + if ((pvr & 0xe000) == 0) { + /* Nimbus */ + if ((pvr & 0xfff) < 0x203) +- cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_BUG; ++ cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_STQ_BUG; + } else if ((pvr & 0xc000) == 0) { + /* Cumulus */ + if ((pvr & 0xfff) < 0x103) +- cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_BUG; ++ cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_STQ_BUG; + } else { + WARN_ONCE(1, "Unknown PVR"); +- cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_BUG; ++ cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_STQ_BUG; + } + } + } +--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c ++++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c +@@ -160,7 +160,7 @@ static void kvmppc_radix_tlbie_page(stru + asm volatile("ptesync": : :"memory"); + asm volatile(PPC_TLBIE_5(%0, %1, 0, 0, 1) + : : "r" (addr), "r" (kvm->arch.lpid) : "memory"); +- if (cpu_has_feature(CPU_FTR_P9_TLBIE_BUG)) ++ if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) + asm volatile(PPC_TLBIE_5(%0, %1, 0, 0, 1) + : : "r" (addr), "r" (kvm->arch.lpid) : "memory"); + asm volatile("ptesync": : :"memory"); +--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c ++++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c +@@ -449,7 +449,7 @@ static void do_tlbies(struct kvm *kvm, u + "r" (rbvalues[i]), "r" (kvm->arch.lpid)); + } + +- if (cpu_has_feature(CPU_FTR_P9_TLBIE_BUG)) { ++ if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { + /* + * Need the extra ptesync to make sure we don't + * re-order the tlbie +--- a/arch/powerpc/mm/hash_native_64.c ++++ b/arch/powerpc/mm/hash_native_64.c +@@ -106,7 +106,7 @@ static inline unsigned long ___tlbie(un + + static inline void fixup_tlbie(unsigned long vpn, int psize, int apsize, int ssize) + { +- if (cpu_has_feature(CPU_FTR_P9_TLBIE_BUG)) { ++ if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { + /* Need the extra ptesync to ensure we don't reorder tlbie*/ + asm volatile("ptesync": : :"memory"); + ___tlbie(vpn, psize, apsize, ssize); +--- a/arch/powerpc/mm/tlb-radix.c ++++ b/arch/powerpc/mm/tlb-radix.c +@@ -44,7 +44,7 @@ static inline void fixup_tlbie(void) + unsigned long pid = 0; + unsigned long va = ((1UL << 52) - 1); + +- if (cpu_has_feature(CPU_FTR_P9_TLBIE_BUG)) { ++ if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { + asm volatile("ptesync": : :"memory"); + __tlbie_va(va, pid, mmu_get_ap(MMU_PAGE_64K), RIC_FLUSH_TLB); + } diff --git a/queue-4.14/powerpc-mm-fixup-tlbie-vs-mtpidr-mtlpidr-ordering-issue-on-power9.patch b/queue-4.14/powerpc-mm-fixup-tlbie-vs-mtpidr-mtlpidr-ordering-issue-on-power9.patch new file mode 100644 index 00000000000..158b62da793 --- /dev/null +++ b/queue-4.14/powerpc-mm-fixup-tlbie-vs-mtpidr-mtlpidr-ordering-issue-on-power9.patch @@ -0,0 +1,287 @@ +From foo@baz Fri 08 Nov 2019 05:55:09 PM CET +From: Sandipan Das +Date: Thu, 17 Oct 2019 13:35:03 +0530 +Subject: powerpc/mm: Fixup tlbie vs mtpidr/mtlpidr ordering issue on POWER9 +To: gregkh@linuxfoundation.org +Cc: stable@vger.kernel.org, aneesh.kumar@linux.ibm.com, mpe@ellerman.id.au, linuxppc-dev@lists.ozlabs.org +Message-ID: <20191017080505.8348-4-sandipan@linux.ibm.com> + +From: "Aneesh Kumar K.V" + +commit 047e6575aec71d75b765c22111820c4776cd1c43 upstream. + +On POWER9, under some circumstances, a broadcast TLB invalidation will +fail to invalidate the ERAT cache on some threads when there are +parallel mtpidr/mtlpidr happening on other threads of the same core. +This can cause stores to continue to go to a page after it's unmapped. + +The workaround is to force an ERAT flush using PID=0 or LPID=0 tlbie +flush. This additional TLB flush will cause the ERAT cache +invalidation. Since we are using PID=0 or LPID=0, we don't get +filtered out by the TLB snoop filtering logic. + +We need to still follow this up with another tlbie to take care of +store vs tlbie ordering issue explained in commit: +a5d4b5891c2f ("powerpc/mm: Fixup tlbie vs store ordering issue on +POWER9"). The presence of ERAT cache implies we can still get new +stores and they may miss store queue marking flush. + +Cc: stable@vger.kernel.org # v4.14 +Signed-off-by: Aneesh Kumar K.V +Signed-off-by: Michael Ellerman +Link: https://lore.kernel.org/r/20190924035254.24612-3-aneesh.kumar@linux.ibm.com +[sandipan: Backported to v4.14] +Signed-off-by: Sandipan Das +Signed-off-by: Greg Kroah-Hartman +--- + arch/powerpc/include/asm/cputable.h | 3 + + arch/powerpc/kernel/dt_cpu_ftrs.c | 2 + + arch/powerpc/kvm/book3s_hv_rm_mmu.c | 42 +++++++++++++++++------ + arch/powerpc/mm/hash_native_64.c | 28 +++++++++++++-- + arch/powerpc/mm/tlb-radix.c | 65 ++++++++++++++++++++++++++++++------ + 5 files changed, 116 insertions(+), 24 deletions(-) + +--- a/arch/powerpc/include/asm/cputable.h ++++ b/arch/powerpc/include/asm/cputable.h +@@ -217,6 +217,7 @@ enum { + #define CPU_FTR_PMAO_BUG LONG_ASM_CONST(0x1000000000000000) + #define CPU_FTR_P9_TLBIE_STQ_BUG LONG_ASM_CONST(0x0000400000000000) + #define CPU_FTR_POWER9_DD1 LONG_ASM_CONST(0x4000000000000000) ++#define CPU_FTR_P9_TLBIE_ERAT_BUG LONG_ASM_CONST(0x0001000000000000) + + #ifndef __ASSEMBLY__ + +@@ -477,7 +478,7 @@ enum { + CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY | \ + CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_DAWR | \ + CPU_FTR_ARCH_207S | CPU_FTR_TM_COMP | CPU_FTR_ARCH_300 | \ +- CPU_FTR_P9_TLBIE_STQ_BUG) ++ CPU_FTR_P9_TLBIE_STQ_BUG | CPU_FTR_P9_TLBIE_ERAT_BUG) + #define CPU_FTRS_POWER9_DD1 ((CPU_FTRS_POWER9 | CPU_FTR_POWER9_DD1) & \ + (~CPU_FTR_SAO)) + #define CPU_FTRS_CELL (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \ +--- a/arch/powerpc/kernel/dt_cpu_ftrs.c ++++ b/arch/powerpc/kernel/dt_cpu_ftrs.c +@@ -756,6 +756,8 @@ static __init void update_tlbie_feature_ + WARN_ONCE(1, "Unknown PVR"); + cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_STQ_BUG; + } ++ ++ cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_ERAT_BUG; + } + } + +--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c ++++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c +@@ -429,6 +429,37 @@ static inline int try_lock_tlbie(unsigne + return old == 0; + } + ++static inline void fixup_tlbie_lpid(unsigned long rb_value, unsigned long lpid) ++{ ++ ++ if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) { ++ /* Radix flush for a hash guest */ ++ ++ unsigned long rb,rs,prs,r,ric; ++ ++ rb = PPC_BIT(52); /* IS = 2 */ ++ rs = 0; /* lpid = 0 */ ++ prs = 0; /* partition scoped */ ++ r = 1; /* radix format */ ++ ric = 0; /* RIC_FLSUH_TLB */ ++ ++ /* ++ * Need the extra ptesync to make sure we don't ++ * re-order the tlbie ++ */ ++ asm volatile("ptesync": : :"memory"); ++ asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) ++ : : "r"(rb), "i"(r), "i"(prs), ++ "i"(ric), "r"(rs) : "memory"); ++ } ++ ++ if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { ++ asm volatile("ptesync": : :"memory"); ++ asm volatile(PPC_TLBIE_5(%0,%1,0,0,0) : : ++ "r" (rb_value), "r" (lpid)); ++ } ++} ++ + static void do_tlbies(struct kvm *kvm, unsigned long *rbvalues, + long npages, int global, bool need_sync) + { +@@ -449,16 +480,7 @@ static void do_tlbies(struct kvm *kvm, u + "r" (rbvalues[i]), "r" (kvm->arch.lpid)); + } + +- if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { +- /* +- * Need the extra ptesync to make sure we don't +- * re-order the tlbie +- */ +- asm volatile("ptesync": : :"memory"); +- asm volatile(PPC_TLBIE_5(%0,%1,0,0,0) : : +- "r" (rbvalues[0]), "r" (kvm->arch.lpid)); +- } +- ++ fixup_tlbie_lpid(rbvalues[i - 1], kvm->arch.lpid); + asm volatile("eieio; tlbsync; ptesync" : : : "memory"); + kvm->arch.tlbie_lock = 0; + } else { +--- a/arch/powerpc/mm/hash_native_64.c ++++ b/arch/powerpc/mm/hash_native_64.c +@@ -104,8 +104,30 @@ static inline unsigned long ___tlbie(un + return va; + } + +-static inline void fixup_tlbie(unsigned long vpn, int psize, int apsize, int ssize) ++static inline void fixup_tlbie_vpn(unsigned long vpn, int psize, ++ int apsize, int ssize) + { ++ if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) { ++ /* Radix flush for a hash guest */ ++ ++ unsigned long rb,rs,prs,r,ric; ++ ++ rb = PPC_BIT(52); /* IS = 2 */ ++ rs = 0; /* lpid = 0 */ ++ prs = 0; /* partition scoped */ ++ r = 1; /* radix format */ ++ ric = 0; /* RIC_FLSUH_TLB */ ++ ++ /* ++ * Need the extra ptesync to make sure we don't ++ * re-order the tlbie ++ */ ++ asm volatile("ptesync": : :"memory"); ++ asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) ++ : : "r"(rb), "i"(r), "i"(prs), ++ "i"(ric), "r"(rs) : "memory"); ++ } ++ + if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { + /* Need the extra ptesync to ensure we don't reorder tlbie*/ + asm volatile("ptesync": : :"memory"); +@@ -190,7 +212,7 @@ static inline void tlbie(unsigned long v + asm volatile("ptesync": : :"memory"); + } else { + __tlbie(vpn, psize, apsize, ssize); +- fixup_tlbie(vpn, psize, apsize, ssize); ++ fixup_tlbie_vpn(vpn, psize, apsize, ssize); + asm volatile("eieio; tlbsync; ptesync": : :"memory"); + } + if (lock_tlbie && !use_local) +@@ -759,7 +781,7 @@ static void native_flush_hash_range(unsi + /* + * Just do one more with the last used values. + */ +- fixup_tlbie(vpn, psize, psize, ssize); ++ fixup_tlbie_vpn(vpn, psize, psize, ssize); + asm volatile("eieio; tlbsync; ptesync":::"memory"); + + if (lock_tlbie) +--- a/arch/powerpc/mm/tlb-radix.c ++++ b/arch/powerpc/mm/tlb-radix.c +@@ -39,14 +39,18 @@ static inline void __tlbie_va(unsigned l + trace_tlbie(0, 0, rb, rs, ric, prs, r); + } + +-static inline void fixup_tlbie(void) ++ ++static inline void fixup_tlbie_va(unsigned long va, unsigned long pid, ++ unsigned long ap) + { +- unsigned long pid = 0; +- unsigned long va = ((1UL << 52) - 1); ++ if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) { ++ asm volatile("ptesync": : :"memory"); ++ __tlbie_va(va, 0, ap, RIC_FLUSH_TLB); ++ } + + if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { + asm volatile("ptesync": : :"memory"); +- __tlbie_va(va, pid, mmu_get_ap(MMU_PAGE_64K), RIC_FLUSH_TLB); ++ __tlbie_va(va, pid, ap, RIC_FLUSH_TLB); + } + } + +@@ -95,23 +99,64 @@ static inline void _tlbiel_pid(unsigned + asm volatile(PPC_INVALIDATE_ERAT "; isync" : : :"memory"); + } + +-static inline void _tlbie_pid(unsigned long pid, unsigned long ric) ++static inline void __tlbie_pid(unsigned long pid, unsigned long ric) + { + unsigned long rb,rs,prs,r; + + rb = PPC_BIT(53); /* IS = 1 */ + rs = pid << PPC_BITLSHIFT(31); + prs = 1; /* process scoped */ +- r = 1; /* raidx format */ ++ r = 1; /* radix format */ + +- asm volatile("ptesync": : :"memory"); + asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) + : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); +- fixup_tlbie(); +- asm volatile("eieio; tlbsync; ptesync": : :"memory"); + trace_tlbie(0, 0, rb, rs, ric, prs, r); + } + ++static inline void fixup_tlbie_pid(unsigned long pid) ++{ ++ /* ++ * We can use any address for the invalidation, pick one which is ++ * probably unused as an optimisation. ++ */ ++ unsigned long va = ((1UL << 52) - 1); ++ ++ if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) { ++ asm volatile("ptesync": : :"memory"); ++ __tlbie_pid(0, RIC_FLUSH_TLB); ++ } ++ ++ if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { ++ asm volatile("ptesync": : :"memory"); ++ __tlbie_va(va, pid, mmu_get_ap(MMU_PAGE_64K), RIC_FLUSH_TLB); ++ } ++} ++ ++static inline void _tlbie_pid(unsigned long pid, unsigned long ric) ++{ ++ asm volatile("ptesync": : :"memory"); ++ ++ /* ++ * Workaround the fact that the "ric" argument to __tlbie_pid ++ * must be a compile-time contraint to match the "i" constraint ++ * in the asm statement. ++ */ ++ switch (ric) { ++ case RIC_FLUSH_TLB: ++ __tlbie_pid(pid, RIC_FLUSH_TLB); ++ fixup_tlbie_pid(pid); ++ break; ++ case RIC_FLUSH_PWC: ++ __tlbie_pid(pid, RIC_FLUSH_PWC); ++ break; ++ case RIC_FLUSH_ALL: ++ default: ++ __tlbie_pid(pid, RIC_FLUSH_ALL); ++ fixup_tlbie_pid(pid); ++ } ++ asm volatile("eieio; tlbsync; ptesync": : :"memory"); ++} ++ + static inline void _tlbiel_va(unsigned long va, unsigned long pid, + unsigned long ap, unsigned long ric) + { +@@ -135,7 +180,7 @@ static inline void _tlbie_va(unsigned lo + { + asm volatile("ptesync": : :"memory"); + __tlbie_va(va, pid, ap, ric); +- fixup_tlbie(); ++ fixup_tlbie_va(va, pid, ap); + asm volatile("eieio; tlbsync; ptesync": : :"memory"); + } + diff --git a/queue-4.14/powerpc-mm-fixup-tlbie-vs-store-ordering-issue-on-power9.patch b/queue-4.14/powerpc-mm-fixup-tlbie-vs-store-ordering-issue-on-power9.patch new file mode 100644 index 00000000000..40c15da95bb --- /dev/null +++ b/queue-4.14/powerpc-mm-fixup-tlbie-vs-store-ordering-issue-on-power9.patch @@ -0,0 +1,226 @@ +From foo@baz Fri 08 Nov 2019 05:55:09 PM CET +From: Sandipan Das +Date: Thu, 17 Oct 2019 13:35:00 +0530 +Subject: powerpc/mm: Fixup tlbie vs store ordering issue on POWER9 +To: gregkh@linuxfoundation.org +Cc: stable@vger.kernel.org, aneesh.kumar@linux.ibm.com, mpe@ellerman.id.au, linuxppc-dev@lists.ozlabs.org, "Aneesh Kumar K.V" +Message-ID: <20191017080505.8348-1-sandipan@linux.ibm.com> + +From: "Aneesh Kumar K.V" + +commit a5d4b5891c2f1f865a2def1eb0030f534e77ff86 upstream. + +On POWER9, under some circumstances, a broadcast TLB invalidation +might complete before all previous stores have drained, potentially +allowing stale stores from becoming visible after the invalidation. +This works around it by doubling up those TLB invalidations which was +verified by HW to be sufficient to close the risk window. + +This will be documented in a yet-to-be-published errata. + +Cc: stable@vger.kernel.org # v4.14 +Fixes: 1a472c9dba6b ("powerpc/mm/radix: Add tlbflush routines") +Signed-off-by: Aneesh Kumar K.V +[mpe: Enable the feature in the DT CPU features code for all Power9, + rename the feature to CPU_FTR_P9_TLBIE_BUG per benh.] +Signed-off-by: Michael Ellerman +Link: https://lore.kernel.org/r/20180323045627.16800-3-aneesh.kumar@linux.vnet.ibm.com/ +[sandipan: Backported to v4.14] +Signed-off-by: Sandipan Das +Signed-off-by: Greg Kroah-Hartman +--- + arch/powerpc/include/asm/cputable.h | 4 ++- + arch/powerpc/kernel/dt_cpu_ftrs.c | 3 ++ + arch/powerpc/kvm/book3s_64_mmu_radix.c | 3 ++ + arch/powerpc/kvm/book3s_hv_rm_mmu.c | 11 ++++++++ + arch/powerpc/mm/hash_native_64.c | 16 ++++++++++++ + arch/powerpc/mm/pgtable_64.c | 1 + arch/powerpc/mm/tlb-radix.c | 41 ++++++++++++++++++++++++--------- + 7 files changed, 66 insertions(+), 13 deletions(-) + +--- a/arch/powerpc/include/asm/cputable.h ++++ b/arch/powerpc/include/asm/cputable.h +@@ -215,6 +215,7 @@ enum { + #define CPU_FTR_DAWR LONG_ASM_CONST(0x0400000000000000) + #define CPU_FTR_DABRX LONG_ASM_CONST(0x0800000000000000) + #define CPU_FTR_PMAO_BUG LONG_ASM_CONST(0x1000000000000000) ++#define CPU_FTR_P9_TLBIE_BUG LONG_ASM_CONST(0x2000000000000000) + #define CPU_FTR_POWER9_DD1 LONG_ASM_CONST(0x4000000000000000) + + #ifndef __ASSEMBLY__ +@@ -475,7 +476,8 @@ enum { + CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \ + CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY | \ + CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_DAWR | \ +- CPU_FTR_ARCH_207S | CPU_FTR_TM_COMP | CPU_FTR_ARCH_300) ++ CPU_FTR_ARCH_207S | CPU_FTR_TM_COMP | CPU_FTR_ARCH_300 | \ ++ CPU_FTR_P9_TLBIE_BUG) + #define CPU_FTRS_POWER9_DD1 ((CPU_FTRS_POWER9 | CPU_FTR_POWER9_DD1) & \ + (~CPU_FTR_SAO)) + #define CPU_FTRS_CELL (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \ +--- a/arch/powerpc/kernel/dt_cpu_ftrs.c ++++ b/arch/powerpc/kernel/dt_cpu_ftrs.c +@@ -742,6 +742,9 @@ static __init void cpufeatures_cpu_quirk + */ + if ((version & 0xffffff00) == 0x004e0100) + cur_cpu_spec->cpu_features |= CPU_FTR_POWER9_DD1; ++ ++ if ((version & 0xffff0000) == 0x004e0000) ++ cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_BUG; + } + + static void __init cpufeatures_setup_finished(void) +--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c ++++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c +@@ -160,6 +160,9 @@ static void kvmppc_radix_tlbie_page(stru + asm volatile("ptesync": : :"memory"); + asm volatile(PPC_TLBIE_5(%0, %1, 0, 0, 1) + : : "r" (addr), "r" (kvm->arch.lpid) : "memory"); ++ if (cpu_has_feature(CPU_FTR_P9_TLBIE_BUG)) ++ asm volatile(PPC_TLBIE_5(%0, %1, 0, 0, 1) ++ : : "r" (addr), "r" (kvm->arch.lpid) : "memory"); + asm volatile("ptesync": : :"memory"); + } + +--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c ++++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c +@@ -448,6 +448,17 @@ static void do_tlbies(struct kvm *kvm, u + asm volatile(PPC_TLBIE_5(%0,%1,0,0,0) : : + "r" (rbvalues[i]), "r" (kvm->arch.lpid)); + } ++ ++ if (cpu_has_feature(CPU_FTR_P9_TLBIE_BUG)) { ++ /* ++ * Need the extra ptesync to make sure we don't ++ * re-order the tlbie ++ */ ++ asm volatile("ptesync": : :"memory"); ++ asm volatile(PPC_TLBIE_5(%0,%1,0,0,0) : : ++ "r" (rbvalues[0]), "r" (kvm->arch.lpid)); ++ } ++ + asm volatile("eieio; tlbsync; ptesync" : : : "memory"); + kvm->arch.tlbie_lock = 0; + } else { +--- a/arch/powerpc/mm/hash_native_64.c ++++ b/arch/powerpc/mm/hash_native_64.c +@@ -104,6 +104,15 @@ static inline unsigned long ___tlbie(un + return va; + } + ++static inline void fixup_tlbie(unsigned long vpn, int psize, int apsize, int ssize) ++{ ++ if (cpu_has_feature(CPU_FTR_P9_TLBIE_BUG)) { ++ /* Need the extra ptesync to ensure we don't reorder tlbie*/ ++ asm volatile("ptesync": : :"memory"); ++ ___tlbie(vpn, psize, apsize, ssize); ++ } ++} ++ + static inline void __tlbie(unsigned long vpn, int psize, int apsize, int ssize) + { + unsigned long rb; +@@ -181,6 +190,7 @@ static inline void tlbie(unsigned long v + asm volatile("ptesync": : :"memory"); + } else { + __tlbie(vpn, psize, apsize, ssize); ++ fixup_tlbie(vpn, psize, apsize, ssize); + asm volatile("eieio; tlbsync; ptesync": : :"memory"); + } + if (lock_tlbie && !use_local) +@@ -674,7 +684,7 @@ static void native_hpte_clear(void) + */ + static void native_flush_hash_range(unsigned long number, int local) + { +- unsigned long vpn; ++ unsigned long vpn = 0; + unsigned long hash, index, hidx, shift, slot; + struct hash_pte *hptep; + unsigned long hpte_v; +@@ -746,6 +756,10 @@ static void native_flush_hash_range(unsi + __tlbie(vpn, psize, psize, ssize); + } pte_iterate_hashed_end(); + } ++ /* ++ * Just do one more with the last used values. ++ */ ++ fixup_tlbie(vpn, psize, psize, ssize); + asm volatile("eieio; tlbsync; ptesync":::"memory"); + + if (lock_tlbie) +--- a/arch/powerpc/mm/pgtable_64.c ++++ b/arch/powerpc/mm/pgtable_64.c +@@ -491,6 +491,7 @@ void mmu_partition_table_set_entry(unsig + "r" (TLBIEL_INVAL_SET_LPID), "r" (lpid)); + trace_tlbie(lpid, 0, TLBIEL_INVAL_SET_LPID, lpid, 2, 0, 0); + } ++ /* do we need fixup here ?*/ + asm volatile("eieio; tlbsync; ptesync" : : : "memory"); + } + EXPORT_SYMBOL_GPL(mmu_partition_table_set_entry); +--- a/arch/powerpc/mm/tlb-radix.c ++++ b/arch/powerpc/mm/tlb-radix.c +@@ -23,6 +23,33 @@ + #define RIC_FLUSH_PWC 1 + #define RIC_FLUSH_ALL 2 + ++static inline void __tlbie_va(unsigned long va, unsigned long pid, ++ unsigned long ap, unsigned long ric) ++{ ++ unsigned long rb,rs,prs,r; ++ ++ rb = va & ~(PPC_BITMASK(52, 63)); ++ rb |= ap << PPC_BITLSHIFT(58); ++ rs = pid << PPC_BITLSHIFT(31); ++ prs = 1; /* process scoped */ ++ r = 1; /* raidx format */ ++ ++ asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) ++ : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); ++ trace_tlbie(0, 0, rb, rs, ric, prs, r); ++} ++ ++static inline void fixup_tlbie(void) ++{ ++ unsigned long pid = 0; ++ unsigned long va = ((1UL << 52) - 1); ++ ++ if (cpu_has_feature(CPU_FTR_P9_TLBIE_BUG)) { ++ asm volatile("ptesync": : :"memory"); ++ __tlbie_va(va, pid, mmu_get_ap(MMU_PAGE_64K), RIC_FLUSH_TLB); ++ } ++} ++ + static inline void __tlbiel_pid(unsigned long pid, int set, + unsigned long ric) + { +@@ -80,6 +107,7 @@ static inline void _tlbie_pid(unsigned l + asm volatile("ptesync": : :"memory"); + asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) + : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); ++ fixup_tlbie(); + asm volatile("eieio; tlbsync; ptesync": : :"memory"); + trace_tlbie(0, 0, rb, rs, ric, prs, r); + } +@@ -105,19 +133,10 @@ static inline void _tlbiel_va(unsigned l + static inline void _tlbie_va(unsigned long va, unsigned long pid, + unsigned long ap, unsigned long ric) + { +- unsigned long rb,rs,prs,r; +- +- rb = va & ~(PPC_BITMASK(52, 63)); +- rb |= ap << PPC_BITLSHIFT(58); +- rs = pid << PPC_BITLSHIFT(31); +- prs = 1; /* process scoped */ +- r = 1; /* raidx format */ +- + asm volatile("ptesync": : :"memory"); +- asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) +- : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); ++ __tlbie_va(va, pid, ap, ric); ++ fixup_tlbie(); + asm volatile("eieio; tlbsync; ptesync": : :"memory"); +- trace_tlbie(0, 0, rb, rs, ric, prs, r); + } + + /* diff --git a/queue-4.14/selftests-powerpc-add-test-case-for-tlbie-vs-mtpidr-ordering-issue.patch b/queue-4.14/selftests-powerpc-add-test-case-for-tlbie-vs-mtpidr-ordering-issue.patch new file mode 100644 index 00000000000..29c15c37dc6 --- /dev/null +++ b/queue-4.14/selftests-powerpc-add-test-case-for-tlbie-vs-mtpidr-ordering-issue.patch @@ -0,0 +1,778 @@ +From foo@baz Fri 08 Nov 2019 05:55:09 PM CET +From: Sandipan Das +Date: Thu, 17 Oct 2019 13:35:04 +0530 +Subject: selftests/powerpc: Add test case for tlbie vs mtpidr ordering issue +To: gregkh@linuxfoundation.org +Cc: stable@vger.kernel.org, aneesh.kumar@linux.ibm.com, mpe@ellerman.id.au, linuxppc-dev@lists.ozlabs.org +Message-ID: <20191017080505.8348-5-sandipan@linux.ibm.com> + +From: "Aneesh Kumar K.V" + +commit 93cad5f789951eaa27c3392b15294b4e51253944 upstream. + +Cc: stable@vger.kernel.org # v4.14 +Signed-off-by: Aneesh Kumar K.V +[mpe: Some minor fixes to make it build] +Signed-off-by: Michael Ellerman +Link: https://lore.kernel.org/r/20190924035254.24612-4-aneesh.kumar@linux.ibm.com +[sandipan: Backported to v4.14] +Signed-off-by: Sandipan Das +Signed-off-by: Greg Kroah-Hartman +--- + tools/testing/selftests/powerpc/mm/Makefile | 2 + tools/testing/selftests/powerpc/mm/tlbie_test.c | 734 ++++++++++++++++++++++++ + 2 files changed, 736 insertions(+) + create mode 100644 tools/testing/selftests/powerpc/mm/tlbie_test.c + +--- a/tools/testing/selftests/powerpc/mm/Makefile ++++ b/tools/testing/selftests/powerpc/mm/Makefile +@@ -3,6 +3,7 @@ noarg: + $(MAKE) -C ../ + + TEST_GEN_PROGS := hugetlb_vs_thp_test subpage_prot prot_sao ++TEST_GEN_PROGS_EXTENDED := tlbie_test + TEST_GEN_FILES := tempfile + + include ../../lib.mk +@@ -14,3 +15,4 @@ $(OUTPUT)/prot_sao: ../utils.c + $(OUTPUT)/tempfile: + dd if=/dev/zero of=$@ bs=64k count=1 + ++$(OUTPUT)/tlbie_test: LDLIBS += -lpthread +--- /dev/null ++++ b/tools/testing/selftests/powerpc/mm/tlbie_test.c +@@ -0,0 +1,734 @@ ++// SPDX-License-Identifier: GPL-2.0 ++ ++/* ++ * Copyright 2019, Nick Piggin, Gautham R. Shenoy, Aneesh Kumar K.V, IBM Corp. ++ */ ++ ++/* ++ * ++ * Test tlbie/mtpidr race. We have 4 threads doing flush/load/compare/store ++ * sequence in a loop. The same threads also rung a context switch task ++ * that does sched_yield() in loop. ++ * ++ * The snapshot thread mark the mmap area PROT_READ in between, make a copy ++ * and copy it back to the original area. This helps us to detect if any ++ * store continued to happen after we marked the memory PROT_READ. ++ */ ++ ++#define _GNU_SOURCE ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++static inline void dcbf(volatile unsigned int *addr) ++{ ++ __asm__ __volatile__ ("dcbf %y0; sync" : : "Z"(*(unsigned char *)addr) : "memory"); ++} ++ ++static void err_msg(char *msg) ++{ ++ ++ time_t now; ++ time(&now); ++ printf("=================================\n"); ++ printf(" Error: %s\n", msg); ++ printf(" %s", ctime(&now)); ++ printf("=================================\n"); ++ exit(1); ++} ++ ++static char *map1; ++static char *map2; ++static pid_t rim_process_pid; ++ ++/* ++ * A "rim-sequence" is defined to be the sequence of the following ++ * operations performed on a memory word: ++ * 1) FLUSH the contents of that word. ++ * 2) LOAD the contents of that word. ++ * 3) COMPARE the contents of that word with the content that was ++ * previously stored at that word ++ * 4) STORE new content into that word. ++ * ++ * The threads in this test that perform the rim-sequence are termed ++ * as rim_threads. ++ */ ++ ++/* ++ * A "corruption" is defined to be the failed COMPARE operation in a ++ * rim-sequence. ++ * ++ * A rim_thread that detects a corruption informs about it to all the ++ * other rim_threads, and the mem_snapshot thread. ++ */ ++static volatile unsigned int corruption_found; ++ ++/* ++ * This defines the maximum number of rim_threads in this test. ++ * ++ * The THREAD_ID_BITS denote the number of bits required ++ * to represent the thread_ids [0..MAX_THREADS - 1]. ++ * We are being a bit paranoid here and set it to 8 bits, ++ * though 6 bits suffice. ++ * ++ */ ++#define MAX_THREADS 64 ++#define THREAD_ID_BITS 8 ++#define THREAD_ID_MASK ((1 << THREAD_ID_BITS) - 1) ++static unsigned int rim_thread_ids[MAX_THREADS]; ++static pthread_t rim_threads[MAX_THREADS]; ++ ++ ++/* ++ * Each rim_thread works on an exclusive "chunk" of size ++ * RIM_CHUNK_SIZE. ++ * ++ * The ith rim_thread works on the ith chunk. ++ * ++ * The ith chunk begins at ++ * map1 + (i * RIM_CHUNK_SIZE) ++ */ ++#define RIM_CHUNK_SIZE 1024 ++#define BITS_PER_BYTE 8 ++#define WORD_SIZE (sizeof(unsigned int)) ++#define WORD_BITS (WORD_SIZE * BITS_PER_BYTE) ++#define WORDS_PER_CHUNK (RIM_CHUNK_SIZE/WORD_SIZE) ++ ++static inline char *compute_chunk_start_addr(unsigned int thread_id) ++{ ++ char *chunk_start; ++ ++ chunk_start = (char *)((unsigned long)map1 + ++ (thread_id * RIM_CHUNK_SIZE)); ++ ++ return chunk_start; ++} ++ ++/* ++ * The "word-offset" of a word-aligned address inside a chunk, is ++ * defined to be the number of words that precede the address in that ++ * chunk. ++ * ++ * WORD_OFFSET_BITS denote the number of bits required to represent ++ * the word-offsets of all the word-aligned addresses of a chunk. ++ */ ++#define WORD_OFFSET_BITS (__builtin_ctz(WORDS_PER_CHUNK)) ++#define WORD_OFFSET_MASK ((1 << WORD_OFFSET_BITS) - 1) ++ ++static inline unsigned int compute_word_offset(char *start, unsigned int *addr) ++{ ++ unsigned int delta_bytes, ret; ++ delta_bytes = (unsigned long)addr - (unsigned long)start; ++ ++ ret = delta_bytes/WORD_SIZE; ++ ++ return ret; ++} ++ ++/* ++ * A "sweep" is defined to be the sequential execution of the ++ * rim-sequence by a rim_thread on its chunk one word at a time, ++ * starting from the first word of its chunk and ending with the last ++ * word of its chunk. ++ * ++ * Each sweep of a rim_thread is uniquely identified by a sweep_id. ++ * SWEEP_ID_BITS denote the number of bits required to represent ++ * the sweep_ids of rim_threads. ++ * ++ * As to why SWEEP_ID_BITS are computed as a function of THREAD_ID_BITS, ++ * WORD_OFFSET_BITS, and WORD_BITS, see the "store-pattern" below. ++ */ ++#define SWEEP_ID_BITS (WORD_BITS - (THREAD_ID_BITS + WORD_OFFSET_BITS)) ++#define SWEEP_ID_MASK ((1 << SWEEP_ID_BITS) - 1) ++ ++/* ++ * A "store-pattern" is the word-pattern that is stored into a word ++ * location in the 4)STORE step of the rim-sequence. ++ * ++ * In the store-pattern, we shall encode: ++ * ++ * - The thread-id of the rim_thread performing the store ++ * (The most significant THREAD_ID_BITS) ++ * ++ * - The word-offset of the address into which the store is being ++ * performed (The next WORD_OFFSET_BITS) ++ * ++ * - The sweep_id of the current sweep in which the store is ++ * being performed. (The lower SWEEP_ID_BITS) ++ * ++ * Store Pattern: 32 bits ++ * |------------------|--------------------|---------------------------------| ++ * | Thread id | Word offset | sweep_id | ++ * |------------------|--------------------|---------------------------------| ++ * THREAD_ID_BITS WORD_OFFSET_BITS SWEEP_ID_BITS ++ * ++ * In the store pattern, the (Thread-id + Word-offset) uniquely identify the ++ * address to which the store is being performed i.e, ++ * address == map1 + ++ * (Thread-id * RIM_CHUNK_SIZE) + (Word-offset * WORD_SIZE) ++ * ++ * And the sweep_id in the store pattern identifies the time when the ++ * store was performed by the rim_thread. ++ * ++ * We shall use this property in the 3)COMPARE step of the ++ * rim-sequence. ++ */ ++#define SWEEP_ID_SHIFT 0 ++#define WORD_OFFSET_SHIFT (SWEEP_ID_BITS) ++#define THREAD_ID_SHIFT (WORD_OFFSET_BITS + SWEEP_ID_BITS) ++ ++/* ++ * Compute the store pattern for a given thread with id @tid, at ++ * location @addr in the sweep identified by @sweep_id ++ */ ++static inline unsigned int compute_store_pattern(unsigned int tid, ++ unsigned int *addr, ++ unsigned int sweep_id) ++{ ++ unsigned int ret = 0; ++ char *start = compute_chunk_start_addr(tid); ++ unsigned int word_offset = compute_word_offset(start, addr); ++ ++ ret += (tid & THREAD_ID_MASK) << THREAD_ID_SHIFT; ++ ret += (word_offset & WORD_OFFSET_MASK) << WORD_OFFSET_SHIFT; ++ ret += (sweep_id & SWEEP_ID_MASK) << SWEEP_ID_SHIFT; ++ return ret; ++} ++ ++/* Extract the thread-id from the given store-pattern */ ++static inline unsigned int extract_tid(unsigned int pattern) ++{ ++ unsigned int ret; ++ ++ ret = (pattern >> THREAD_ID_SHIFT) & THREAD_ID_MASK; ++ return ret; ++} ++ ++/* Extract the word-offset from the given store-pattern */ ++static inline unsigned int extract_word_offset(unsigned int pattern) ++{ ++ unsigned int ret; ++ ++ ret = (pattern >> WORD_OFFSET_SHIFT) & WORD_OFFSET_MASK; ++ ++ return ret; ++} ++ ++/* Extract the sweep-id from the given store-pattern */ ++static inline unsigned int extract_sweep_id(unsigned int pattern) ++ ++{ ++ unsigned int ret; ++ ++ ret = (pattern >> SWEEP_ID_SHIFT) & SWEEP_ID_MASK; ++ ++ return ret; ++} ++ ++/************************************************************ ++ * * ++ * Logging the output of the verification * ++ * * ++ ************************************************************/ ++#define LOGDIR_NAME_SIZE 100 ++static char logdir[LOGDIR_NAME_SIZE]; ++ ++static FILE *fp[MAX_THREADS]; ++static const char logfilename[] ="Thread-%02d-Chunk"; ++ ++static inline void start_verification_log(unsigned int tid, ++ unsigned int *addr, ++ unsigned int cur_sweep_id, ++ unsigned int prev_sweep_id) ++{ ++ FILE *f; ++ char logfile[30]; ++ char path[LOGDIR_NAME_SIZE + 30]; ++ char separator[2] = "/"; ++ char *chunk_start = compute_chunk_start_addr(tid); ++ unsigned int size = RIM_CHUNK_SIZE; ++ ++ sprintf(logfile, logfilename, tid); ++ strcpy(path, logdir); ++ strcat(path, separator); ++ strcat(path, logfile); ++ f = fopen(path, "w"); ++ ++ if (!f) { ++ err_msg("Unable to create logfile\n"); ++ } ++ ++ fp[tid] = f; ++ ++ fprintf(f, "----------------------------------------------------------\n"); ++ fprintf(f, "PID = %d\n", rim_process_pid); ++ fprintf(f, "Thread id = %02d\n", tid); ++ fprintf(f, "Chunk Start Addr = 0x%016lx\n", (unsigned long)chunk_start); ++ fprintf(f, "Chunk Size = %d\n", size); ++ fprintf(f, "Next Store Addr = 0x%016lx\n", (unsigned long)addr); ++ fprintf(f, "Current sweep-id = 0x%08x\n", cur_sweep_id); ++ fprintf(f, "Previous sweep-id = 0x%08x\n", prev_sweep_id); ++ fprintf(f, "----------------------------------------------------------\n"); ++} ++ ++static inline void log_anamoly(unsigned int tid, unsigned int *addr, ++ unsigned int expected, unsigned int observed) ++{ ++ FILE *f = fp[tid]; ++ ++ fprintf(f, "Thread %02d: Addr 0x%lx: Expected 0x%x, Observed 0x%x\n", ++ tid, (unsigned long)addr, expected, observed); ++ fprintf(f, "Thread %02d: Expected Thread id = %02d\n", tid, extract_tid(expected)); ++ fprintf(f, "Thread %02d: Observed Thread id = %02d\n", tid, extract_tid(observed)); ++ fprintf(f, "Thread %02d: Expected Word offset = %03d\n", tid, extract_word_offset(expected)); ++ fprintf(f, "Thread %02d: Observed Word offset = %03d\n", tid, extract_word_offset(observed)); ++ fprintf(f, "Thread %02d: Expected sweep-id = 0x%x\n", tid, extract_sweep_id(expected)); ++ fprintf(f, "Thread %02d: Observed sweep-id = 0x%x\n", tid, extract_sweep_id(observed)); ++ fprintf(f, "----------------------------------------------------------\n"); ++} ++ ++static inline void end_verification_log(unsigned int tid, unsigned nr_anamolies) ++{ ++ FILE *f = fp[tid]; ++ char logfile[30]; ++ char path[LOGDIR_NAME_SIZE + 30]; ++ char separator[] = "/"; ++ ++ fclose(f); ++ ++ if (nr_anamolies == 0) { ++ remove(path); ++ return; ++ } ++ ++ sprintf(logfile, logfilename, tid); ++ strcpy(path, logdir); ++ strcat(path, separator); ++ strcat(path, logfile); ++ ++ printf("Thread %02d chunk has %d corrupted words. For details check %s\n", ++ tid, nr_anamolies, path); ++} ++ ++/* ++ * When a COMPARE step of a rim-sequence fails, the rim_thread informs ++ * everyone else via the shared_memory pointed to by ++ * corruption_found variable. On seeing this, every thread verifies the ++ * content of its chunk as follows. ++ * ++ * Suppose a thread identified with @tid was about to store (but not ++ * yet stored) to @next_store_addr in its current sweep identified ++ * @cur_sweep_id. Let @prev_sweep_id indicate the previous sweep_id. ++ * ++ * This implies that for all the addresses @addr < @next_store_addr, ++ * Thread @tid has already performed a store as part of its current ++ * sweep. Hence we expect the content of such @addr to be: ++ * |-------------------------------------------------| ++ * | tid | word_offset(addr) | cur_sweep_id | ++ * |-------------------------------------------------| ++ * ++ * Since Thread @tid is yet to perform stores on address ++ * @next_store_addr and above, we expect the content of such an ++ * address @addr to be: ++ * |-------------------------------------------------| ++ * | tid | word_offset(addr) | prev_sweep_id | ++ * |-------------------------------------------------| ++ * ++ * The verifier function @verify_chunk does this verification and logs ++ * any anamolies that it finds. ++ */ ++static void verify_chunk(unsigned int tid, unsigned int *next_store_addr, ++ unsigned int cur_sweep_id, ++ unsigned int prev_sweep_id) ++{ ++ unsigned int *iter_ptr; ++ unsigned int size = RIM_CHUNK_SIZE; ++ unsigned int expected; ++ unsigned int observed; ++ char *chunk_start = compute_chunk_start_addr(tid); ++ ++ int nr_anamolies = 0; ++ ++ start_verification_log(tid, next_store_addr, ++ cur_sweep_id, prev_sweep_id); ++ ++ for (iter_ptr = (unsigned int *)chunk_start; ++ (unsigned long)iter_ptr < (unsigned long)chunk_start + size; ++ iter_ptr++) { ++ unsigned int expected_sweep_id; ++ ++ if (iter_ptr < next_store_addr) { ++ expected_sweep_id = cur_sweep_id; ++ } else { ++ expected_sweep_id = prev_sweep_id; ++ } ++ ++ expected = compute_store_pattern(tid, iter_ptr, expected_sweep_id); ++ ++ dcbf((volatile unsigned int*)iter_ptr); //Flush before reading ++ observed = *iter_ptr; ++ ++ if (observed != expected) { ++ nr_anamolies++; ++ log_anamoly(tid, iter_ptr, expected, observed); ++ } ++ } ++ ++ end_verification_log(tid, nr_anamolies); ++} ++ ++static void set_pthread_cpu(pthread_t th, int cpu) ++{ ++ cpu_set_t run_cpu_mask; ++ struct sched_param param; ++ ++ CPU_ZERO(&run_cpu_mask); ++ CPU_SET(cpu, &run_cpu_mask); ++ pthread_setaffinity_np(th, sizeof(cpu_set_t), &run_cpu_mask); ++ ++ param.sched_priority = 1; ++ if (0 && sched_setscheduler(0, SCHED_FIFO, ¶m) == -1) { ++ /* haven't reproduced with this setting, it kills random preemption which may be a factor */ ++ fprintf(stderr, "could not set SCHED_FIFO, run as root?\n"); ++ } ++} ++ ++static void set_mycpu(int cpu) ++{ ++ cpu_set_t run_cpu_mask; ++ struct sched_param param; ++ ++ CPU_ZERO(&run_cpu_mask); ++ CPU_SET(cpu, &run_cpu_mask); ++ sched_setaffinity(0, sizeof(cpu_set_t), &run_cpu_mask); ++ ++ param.sched_priority = 1; ++ if (0 && sched_setscheduler(0, SCHED_FIFO, ¶m) == -1) { ++ fprintf(stderr, "could not set SCHED_FIFO, run as root?\n"); ++ } ++} ++ ++static volatile int segv_wait; ++ ++static void segv_handler(int signo, siginfo_t *info, void *extra) ++{ ++ while (segv_wait) { ++ sched_yield(); ++ } ++ ++} ++ ++static void set_segv_handler(void) ++{ ++ struct sigaction sa; ++ ++ sa.sa_flags = SA_SIGINFO; ++ sa.sa_sigaction = segv_handler; ++ ++ if (sigaction(SIGSEGV, &sa, NULL) == -1) { ++ perror("sigaction"); ++ exit(EXIT_FAILURE); ++ } ++} ++ ++int timeout = 0; ++/* ++ * This function is executed by every rim_thread. ++ * ++ * This function performs sweeps over the exclusive chunks of the ++ * rim_threads executing the rim-sequence one word at a time. ++ */ ++static void *rim_fn(void *arg) ++{ ++ unsigned int tid = *((unsigned int *)arg); ++ ++ int size = RIM_CHUNK_SIZE; ++ char *chunk_start = compute_chunk_start_addr(tid); ++ ++ unsigned int prev_sweep_id; ++ unsigned int cur_sweep_id = 0; ++ ++ /* word access */ ++ unsigned int pattern = cur_sweep_id; ++ unsigned int *pattern_ptr = &pattern; ++ unsigned int *w_ptr, read_data; ++ ++ set_segv_handler(); ++ ++ /* ++ * Let us initialize the chunk: ++ * ++ * Each word-aligned address addr in the chunk, ++ * is initialized to : ++ * |-------------------------------------------------| ++ * | tid | word_offset(addr) | 0 | ++ * |-------------------------------------------------| ++ */ ++ for (w_ptr = (unsigned int *)chunk_start; ++ (unsigned long)w_ptr < (unsigned long)(chunk_start) + size; ++ w_ptr++) { ++ ++ *pattern_ptr = compute_store_pattern(tid, w_ptr, cur_sweep_id); ++ *w_ptr = *pattern_ptr; ++ } ++ ++ while (!corruption_found && !timeout) { ++ prev_sweep_id = cur_sweep_id; ++ cur_sweep_id = cur_sweep_id + 1; ++ ++ for (w_ptr = (unsigned int *)chunk_start; ++ (unsigned long)w_ptr < (unsigned long)(chunk_start) + size; ++ w_ptr++) { ++ unsigned int old_pattern; ++ ++ /* ++ * Compute the pattern that we would have ++ * stored at this location in the previous ++ * sweep. ++ */ ++ old_pattern = compute_store_pattern(tid, w_ptr, prev_sweep_id); ++ ++ /* ++ * FLUSH:Ensure that we flush the contents of ++ * the cache before loading ++ */ ++ dcbf((volatile unsigned int*)w_ptr); //Flush ++ ++ /* LOAD: Read the value */ ++ read_data = *w_ptr; //Load ++ ++ /* ++ * COMPARE: Is it the same as what we had stored ++ * in the previous sweep ? It better be! ++ */ ++ if (read_data != old_pattern) { ++ /* No it isn't! Tell everyone */ ++ corruption_found = 1; ++ } ++ ++ /* ++ * Before performing a store, let us check if ++ * any rim_thread has found a corruption. ++ */ ++ if (corruption_found || timeout) { ++ /* ++ * Yes. Someone (including us!) has found ++ * a corruption :( ++ * ++ * Let us verify that our chunk is ++ * correct. ++ */ ++ /* But first, let us allow the dust to settle down! */ ++ verify_chunk(tid, w_ptr, cur_sweep_id, prev_sweep_id); ++ ++ return 0; ++ } ++ ++ /* ++ * Compute the new pattern that we are going ++ * to write to this location ++ */ ++ *pattern_ptr = compute_store_pattern(tid, w_ptr, cur_sweep_id); ++ ++ /* ++ * STORE: Now let us write this pattern into ++ * the location ++ */ ++ *w_ptr = *pattern_ptr; ++ } ++ } ++ ++ return NULL; ++} ++ ++ ++static unsigned long start_cpu = 0; ++static unsigned long nrthreads = 4; ++ ++static pthread_t mem_snapshot_thread; ++ ++static void *mem_snapshot_fn(void *arg) ++{ ++ int page_size = getpagesize(); ++ size_t size = page_size; ++ void *tmp = malloc(size); ++ ++ while (!corruption_found && !timeout) { ++ /* Stop memory migration once corruption is found */ ++ segv_wait = 1; ++ ++ mprotect(map1, size, PROT_READ); ++ ++ /* ++ * Load from the working alias (map1). Loading from map2 ++ * also fails. ++ */ ++ memcpy(tmp, map1, size); ++ ++ /* ++ * Stores must go via map2 which has write permissions, but ++ * the corrupted data tends to be seen in the snapshot buffer, ++ * so corruption does not appear to be introduced at the ++ * copy-back via map2 alias here. ++ */ ++ memcpy(map2, tmp, size); ++ /* ++ * Before releasing other threads, must ensure the copy ++ * back to ++ */ ++ asm volatile("sync" ::: "memory"); ++ mprotect(map1, size, PROT_READ|PROT_WRITE); ++ asm volatile("sync" ::: "memory"); ++ segv_wait = 0; ++ ++ usleep(1); /* This value makes a big difference */ ++ } ++ ++ return 0; ++} ++ ++void alrm_sighandler(int sig) ++{ ++ timeout = 1; ++} ++ ++int main(int argc, char *argv[]) ++{ ++ int c; ++ int page_size = getpagesize(); ++ time_t now; ++ int i, dir_error; ++ pthread_attr_t attr; ++ key_t shm_key = (key_t) getpid(); ++ int shmid, run_time = 20 * 60; ++ struct sigaction sa_alrm; ++ ++ snprintf(logdir, LOGDIR_NAME_SIZE, ++ "/tmp/logdir-%u", (unsigned int)getpid()); ++ while ((c = getopt(argc, argv, "r:hn:l:t:")) != -1) { ++ switch(c) { ++ case 'r': ++ start_cpu = strtoul(optarg, NULL, 10); ++ break; ++ case 'h': ++ printf("%s [-r ] [-n ] [-l ] [-t ]\n", argv[0]); ++ exit(0); ++ break; ++ case 'n': ++ nrthreads = strtoul(optarg, NULL, 10); ++ break; ++ case 'l': ++ strncpy(logdir, optarg, LOGDIR_NAME_SIZE); ++ break; ++ case 't': ++ run_time = strtoul(optarg, NULL, 10); ++ break; ++ default: ++ printf("invalid option\n"); ++ exit(0); ++ break; ++ } ++ } ++ ++ if (nrthreads > MAX_THREADS) ++ nrthreads = MAX_THREADS; ++ ++ shmid = shmget(shm_key, page_size, IPC_CREAT|0666); ++ if (shmid < 0) { ++ err_msg("Failed shmget\n"); ++ } ++ ++ map1 = shmat(shmid, NULL, 0); ++ if (map1 == (void *) -1) { ++ err_msg("Failed shmat"); ++ } ++ ++ map2 = shmat(shmid, NULL, 0); ++ if (map2 == (void *) -1) { ++ err_msg("Failed shmat"); ++ } ++ ++ dir_error = mkdir(logdir, 0755); ++ ++ if (dir_error) { ++ err_msg("Failed mkdir"); ++ } ++ ++ printf("start_cpu list:%lu\n", start_cpu); ++ printf("number of worker threads:%lu + 1 snapshot thread\n", nrthreads); ++ printf("Allocated address:0x%016lx + secondary map:0x%016lx\n", (unsigned long)map1, (unsigned long)map2); ++ printf("logdir at : %s\n", logdir); ++ printf("Timeout: %d seconds\n", run_time); ++ ++ time(&now); ++ printf("=================================\n"); ++ printf(" Starting Test\n"); ++ printf(" %s", ctime(&now)); ++ printf("=================================\n"); ++ ++ for (i = 0; i < nrthreads; i++) { ++ if (1 && !fork()) { ++ prctl(PR_SET_PDEATHSIG, SIGKILL); ++ set_mycpu(start_cpu + i); ++ for (;;) ++ sched_yield(); ++ exit(0); ++ } ++ } ++ ++ ++ sa_alrm.sa_handler = &alrm_sighandler; ++ sigemptyset(&sa_alrm.sa_mask); ++ sa_alrm.sa_flags = 0; ++ ++ if (sigaction(SIGALRM, &sa_alrm, 0) == -1) { ++ err_msg("Failed signal handler registration\n"); ++ } ++ ++ alarm(run_time); ++ ++ pthread_attr_init(&attr); ++ for (i = 0; i < nrthreads; i++) { ++ rim_thread_ids[i] = i; ++ pthread_create(&rim_threads[i], &attr, rim_fn, &rim_thread_ids[i]); ++ set_pthread_cpu(rim_threads[i], start_cpu + i); ++ } ++ ++ pthread_create(&mem_snapshot_thread, &attr, mem_snapshot_fn, map1); ++ set_pthread_cpu(mem_snapshot_thread, start_cpu + i); ++ ++ ++ pthread_join(mem_snapshot_thread, NULL); ++ for (i = 0; i < nrthreads; i++) { ++ pthread_join(rim_threads[i], NULL); ++ } ++ ++ if (!timeout) { ++ time(&now); ++ printf("=================================\n"); ++ printf(" Data Corruption Detected\n"); ++ printf(" %s", ctime(&now)); ++ printf(" See logfiles in %s\n", logdir); ++ printf("=================================\n"); ++ return 1; ++ } ++ return 0; ++} diff --git a/queue-4.14/selftests-powerpc-fix-compile-error-on-tlbie_test-due-to-newer-gcc.patch b/queue-4.14/selftests-powerpc-fix-compile-error-on-tlbie_test-due-to-newer-gcc.patch new file mode 100644 index 00000000000..7a2ee950992 --- /dev/null +++ b/queue-4.14/selftests-powerpc-fix-compile-error-on-tlbie_test-due-to-newer-gcc.patch @@ -0,0 +1,46 @@ +From foo@baz Fri 08 Nov 2019 05:55:09 PM CET +From: Sandipan Das +Date: Thu, 17 Oct 2019 13:35:05 +0530 +Subject: selftests/powerpc: Fix compile error on tlbie_test due to newer gcc +To: gregkh@linuxfoundation.org +Cc: stable@vger.kernel.org, aneesh.kumar@linux.ibm.com, mpe@ellerman.id.au, linuxppc-dev@lists.ozlabs.org, "Desnes A. Nunes do Rosario" +Message-ID: <20191017080505.8348-6-sandipan@linux.ibm.com> + +From: Desnes A. Nunes do Rosario + +commit 5b216ea1c40cf06eead15054c70e238c9bd4729e upstream. + +Newer versions of GCC (>= 9) demand that the size of the string to be +copied must be explicitly smaller than the size of the destination. +Thus, the NULL char has to be taken into account on strncpy. + +This will avoid the following compiling error: + + tlbie_test.c: In function 'main': + tlbie_test.c:639:4: error: 'strncpy' specified bound 100 equals destination size + strncpy(logdir, optarg, LOGDIR_NAME_SIZE); + ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + cc1: all warnings being treated as errors + +Cc: stable@vger.kernel.org # v4.14 +Signed-off-by: Desnes A. Nunes do Rosario +Signed-off-by: Michael Ellerman +Link: https://lore.kernel.org/r/20191003211010.9711-1-desnesn@linux.ibm.com +[sandipan: Backported to v4.14] +Signed-off-by: Sandipan Das +Signed-off-by: Greg Kroah-Hartman +--- + tools/testing/selftests/powerpc/mm/tlbie_test.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/tools/testing/selftests/powerpc/mm/tlbie_test.c ++++ b/tools/testing/selftests/powerpc/mm/tlbie_test.c +@@ -636,7 +636,7 @@ int main(int argc, char *argv[]) + nrthreads = strtoul(optarg, NULL, 10); + break; + case 'l': +- strncpy(logdir, optarg, LOGDIR_NAME_SIZE); ++ strncpy(logdir, optarg, LOGDIR_NAME_SIZE - 1); + break; + case 't': + run_time = strtoul(optarg, NULL, 10); diff --git a/queue-4.14/series b/queue-4.14/series index d65f6e94321..50121c9ebd5 100644 --- a/queue-4.14/series +++ b/queue-4.14/series @@ -52,3 +52,11 @@ sched-wake_q-fix-wakeup-ordering-for-wake_q.patch kbuild-use-fmacro-prefix-map-to-make-__file__-a-rela.patch kbuild-add-fcf-protection-none-when-using-retpoline-.patch platform-x86-pmc_atom-add-siemens-simatic-ipc227e-to-critclk_systems-dmi-table.patch +iio-adc-stm32-adc-move-registers-definitions.patch +iio-adc-stm32-adc-fix-a-race-when-using-several-adcs-with-dma-and-irq.patch +powerpc-mm-fixup-tlbie-vs-store-ordering-issue-on-power9.patch +powerpc-book3s64-mm-don-t-do-tlbie-fixup-for-some-hardware-revisions.patch +powerpc-book3s64-radix-rename-cpu_ftr_p9_tlbie_bug-feature-flag.patch +powerpc-mm-fixup-tlbie-vs-mtpidr-mtlpidr-ordering-issue-on-power9.patch +selftests-powerpc-add-test-case-for-tlbie-vs-mtpidr-ordering-issue.patch +selftests-powerpc-fix-compile-error-on-tlbie_test-due-to-newer-gcc.patch