4.9-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Thu, 21 Dec 2017 08:40:09 +0000 (09:40 +0100)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Thu, 21 Dec 2017 08:40:09 +0000 (09:40 +0100)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 21 Dec 2017 08:40:09 +0000 (09:40 +0100)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 21 Dec 2017 08:40:09 +0000 (09:40 +0100)
diff --git a/queue-4.9/alsa-hda-add-support-for-docking-station-for-hp-820-g2.patch b/queue-4.9/alsa-hda-add-support-for-docking-station-for-hp-820-g2.patch

new file mode 100644 (file)

index 0000000..f9cfa54
--- /dev/null
+++ b/queue-4.9/alsa-hda-add-support-for-docking-station-for-hp-820-g2.patch
@@ -0,0 +1,65 @@
+From foo@baz Thu Dec 21 09:02:40 CET 2017
+From: Jaroslav Kysela <perex@perex.cz>
+Date: Thu, 9 Mar 2017 13:29:13 +0100
+Subject: ALSA: hda - add support for docking station for HP 820 G2
+
+From: Jaroslav Kysela <perex@perex.cz>
+
+
+[ Upstream commit 04d5466a976b096364a39a63ac264c1b3a5f8fa1 ]
+
+This tested patch adds missing initialization for Line-In/Out PINs for
+the docking station for HP 820 G2.
+
+Signed-off-by: Jaroslav Kysela <perex@perex.cz>
+Signed-off-by: Takashi Iwai <tiwai@suse.de>
+Signed-off-by: Sasha Levin <alexander.levin@verizon.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ sound/pci/hda/patch_realtek.c |   14 +++++++++++++-
+ 1 file changed, 13 insertions(+), 1 deletion(-)
+
+--- a/sound/pci/hda/patch_realtek.c
++++ b/sound/pci/hda/patch_realtek.c
+@@ -4854,6 +4854,7 @@ enum {
+       ALC286_FIXUP_HP_GPIO_LED,
+       ALC280_FIXUP_HP_GPIO2_MIC_HOTKEY,
+       ALC280_FIXUP_HP_DOCK_PINS,
++      ALC269_FIXUP_HP_DOCK_GPIO_MIC1_LED,
+       ALC280_FIXUP_HP_9480M,
+       ALC288_FIXUP_DELL_HEADSET_MODE,
+       ALC288_FIXUP_DELL1_MIC_NO_PRESENCE,
+@@ -5394,6 +5395,16 @@ static const struct hda_fixup alc269_fix
+               .chained = true,
+               .chain_id = ALC280_FIXUP_HP_GPIO4
+       },
++      [ALC269_FIXUP_HP_DOCK_GPIO_MIC1_LED] = {
++              .type = HDA_FIXUP_PINS,
++              .v.pins = (const struct hda_pintbl[]) {
++                      { 0x1b, 0x21011020 }, /* line-out */
++                      { 0x18, 0x2181103f }, /* line-in */
++                      { },
++              },
++              .chained = true,
++              .chain_id = ALC269_FIXUP_HP_GPIO_MIC1_LED
++      },
+       [ALC280_FIXUP_HP_9480M] = {
+               .type = HDA_FIXUP_FUNC,
+               .v.func = alc280_fixup_hp_9480m,
+@@ -5646,7 +5657,7 @@ static const struct snd_pci_quirk alc269
+       SND_PCI_QUIRK(0x103c, 0x2256, "HP", ALC269_FIXUP_HP_GPIO_MIC1_LED),
+       SND_PCI_QUIRK(0x103c, 0x2257, "HP", ALC269_FIXUP_HP_GPIO_MIC1_LED),
+       SND_PCI_QUIRK(0x103c, 0x2259, "HP", ALC269_FIXUP_HP_GPIO_MIC1_LED),
+-      SND_PCI_QUIRK(0x103c, 0x225a, "HP", ALC269_FIXUP_HP_GPIO_MIC1_LED),
++      SND_PCI_QUIRK(0x103c, 0x225a, "HP", ALC269_FIXUP_HP_DOCK_GPIO_MIC1_LED),
+       SND_PCI_QUIRK(0x103c, 0x2260, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1),
+       SND_PCI_QUIRK(0x103c, 0x2263, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1),
+       SND_PCI_QUIRK(0x103c, 0x2264, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1),
+@@ -5812,6 +5823,7 @@ static const struct hda_model_fixup alc2
+       {.id = ALC269_FIXUP_HEADSET_MODE_NO_HP_MIC, .name = "headset-mode-no-hp-mic"},
+       {.id = ALC269_FIXUP_LENOVO_DOCK, .name = "lenovo-dock"},
+       {.id = ALC269_FIXUP_HP_GPIO_LED, .name = "hp-gpio-led"},
++      {.id = ALC269_FIXUP_HP_DOCK_GPIO_MIC1_LED, .name = "hp-dock-gpio-mic1-led"},
+       {.id = ALC269_FIXUP_DELL1_MIC_NO_PRESENCE, .name = "dell-headset-multi"},
+       {.id = ALC269_FIXUP_DELL2_MIC_NO_PRESENCE, .name = "dell-headset-dock"},
+       {.id = ALC283_FIXUP_CHROME_BOOK, .name = "alc283-dac-wcaps"},
diff --git a/queue-4.9/alsa-hda-add-support-for-docking-station-for-hp-840-g3.patch b/queue-4.9/alsa-hda-add-support-for-docking-station-for-hp-840-g3.patch

new file mode 100644 (file)

index 0000000..6664224
--- /dev/null
+++ b/queue-4.9/alsa-hda-add-support-for-docking-station-for-hp-840-g3.patch
@@ -0,0 +1,62 @@
+From foo@baz Thu Dec 21 09:02:40 CET 2017
+From: Jaroslav Kysela <perex@perex.cz>
+Date: Thu, 9 Mar 2017 13:30:09 +0100
+Subject: ALSA: hda - add support for docking station for HP 840 G3
+
+From: Jaroslav Kysela <perex@perex.cz>
+
+
+[ Upstream commit cc3a47a248d7791ef0d2c81a35c46769e55e4c6c ]
+
+This tested patch adds missing initialization for Line-In/Out PINs for
+the docking station for HP 840 G3.
+
+Signed-off-by: Jaroslav Kysela <perex@perex.cz>
+Signed-off-by: Takashi Iwai <tiwai@suse.de>
+Signed-off-by: Sasha Levin <alexander.levin@verizon.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ sound/pci/hda/patch_conexant.c |   11 +++++++++++
+ 1 file changed, 11 insertions(+)
+
+--- a/sound/pci/hda/patch_conexant.c
++++ b/sound/pci/hda/patch_conexant.c
+@@ -261,6 +261,7 @@ enum {
+       CXT_FIXUP_HP_530,
+       CXT_FIXUP_CAP_MIX_AMP_5047,
+       CXT_FIXUP_MUTE_LED_EAPD,
++      CXT_FIXUP_HP_DOCK,
+       CXT_FIXUP_HP_SPECTRE,
+       CXT_FIXUP_HP_GATE_MIC,
+ };
+@@ -778,6 +779,14 @@ static const struct hda_fixup cxt_fixups
+               .type = HDA_FIXUP_FUNC,
+               .v.func = cxt_fixup_mute_led_eapd,
+       },
++      [CXT_FIXUP_HP_DOCK] = {
++              .type = HDA_FIXUP_PINS,
++              .v.pins = (const struct hda_pintbl[]) {
++                      { 0x16, 0x21011020 }, /* line-out */
++                      { 0x18, 0x2181103f }, /* line-in */
++                      { }
++              }
++      },
+       [CXT_FIXUP_HP_SPECTRE] = {
+               .type = HDA_FIXUP_PINS,
+               .v.pins = (const struct hda_pintbl[]) {
+@@ -839,6 +848,7 @@ static const struct snd_pci_quirk cxt506
+       SND_PCI_QUIRK(0x1025, 0x0543, "Acer Aspire One 522", CXT_FIXUP_STEREO_DMIC),
+       SND_PCI_QUIRK(0x1025, 0x054c, "Acer Aspire 3830TG", CXT_FIXUP_ASPIRE_DMIC),
+       SND_PCI_QUIRK(0x1025, 0x054f, "Acer Aspire 4830T", CXT_FIXUP_ASPIRE_DMIC),
++      SND_PCI_QUIRK(0x103c, 0x8079, "HP EliteBook 840 G3", CXT_FIXUP_HP_DOCK),
+       SND_PCI_QUIRK(0x103c, 0x8174, "HP Spectre x360", CXT_FIXUP_HP_SPECTRE),
+       SND_PCI_QUIRK(0x103c, 0x8115, "HP Z1 Gen3", CXT_FIXUP_HP_GATE_MIC),
+       SND_PCI_QUIRK(0x1043, 0x138d, "Asus", CXT_FIXUP_HEADPHONE_MIC_PIN),
+@@ -872,6 +882,7 @@ static const struct hda_model_fixup cxt5
+       { .id = CXT_PINCFG_LEMOTE_A1205, .name = "lemote-a1205" },
+       { .id = CXT_FIXUP_OLPC_XO, .name = "olpc-xo" },
+       { .id = CXT_FIXUP_MUTE_LED_EAPD, .name = "mute-led-eapd" },
++      { .id = CXT_FIXUP_HP_DOCK, .name = "hp-dock" },
+       {}
+ };
+ 
diff --git a/queue-4.9/arm-dma-mapping-disallow-dma_get_sgtable-for-non-kernel-managed-memory.patch b/queue-4.9/arm-dma-mapping-disallow-dma_get_sgtable-for-non-kernel-managed-memory.patch

new file mode 100644 (file)

index 0000000..7d6fcc7
--- /dev/null
+++ b/queue-4.9/arm-dma-mapping-disallow-dma_get_sgtable-for-non-kernel-managed-memory.patch
@@ -0,0 +1,65 @@
+From foo@baz Thu Dec 21 09:02:40 CET 2017
+From: Russell King <rmk+kernel@armlinux.org.uk>
+Date: Wed, 29 Mar 2017 17:12:47 +0100
+Subject: ARM: dma-mapping: disallow dma_get_sgtable() for non-kernel managed memory
+
+From: Russell King <rmk+kernel@armlinux.org.uk>
+
+
+[ Upstream commit 916a008b4b8ecc02fbd035cfb133773dba1ff3d7 ]
+
+dma_get_sgtable() tries to create a scatterlist table containing valid
+struct page pointers for the coherent memory allocation passed in to it.
+
+However, memory can be declared via dma_declare_coherent_memory(), or
+via other reservation schemes which means that coherent memory is not
+guaranteed to be backed by struct pages.  In such cases, the resulting
+scatterlist table contains pointers to invalid pages, which causes
+kernel oops later.
+
+This patch adds detection of such memory, and refuses to create a
+scatterlist table for such memory.
+
+Reported-by: Shuah Khan <shuahkhan@gmail.com>
+Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
+Signed-off-by: Sasha Levin <alexander.levin@verizon.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/arm/mm/dma-mapping.c |   20 +++++++++++++++++++-
+ 1 file changed, 19 insertions(+), 1 deletion(-)
+
+--- a/arch/arm/mm/dma-mapping.c
++++ b/arch/arm/mm/dma-mapping.c
+@@ -930,13 +930,31 @@ static void arm_coherent_dma_free(struct
+       __arm_dma_free(dev, size, cpu_addr, handle, attrs, true);
+ }
+ 
++/*
++ * The whole dma_get_sgtable() idea is fundamentally unsafe - it seems
++ * that the intention is to allow exporting memory allocated via the
++ * coherent DMA APIs through the dma_buf API, which only accepts a
++ * scattertable.  This presents a couple of problems:
++ * 1. Not all memory allocated via the coherent DMA APIs is backed by
++ *    a struct page
++ * 2. Passing coherent DMA memory into the streaming APIs is not allowed
++ *    as we will try to flush the memory through a different alias to that
++ *    actually being used (and the flushes are redundant.)
++ */
+ int arm_dma_get_sgtable(struct device *dev, struct sg_table *sgt,
+                void *cpu_addr, dma_addr_t handle, size_t size,
+                unsigned long attrs)
+ {
+-      struct page *page = pfn_to_page(dma_to_pfn(dev, handle));
++      unsigned long pfn = dma_to_pfn(dev, handle);
++      struct page *page;
+       int ret;
+ 
++      /* If the PFN is not valid, we do not have a struct page */
++      if (!pfn_valid(pfn))
++              return -ENXIO;
++
++      page = pfn_to_page(pfn);
++
+       ret = sg_alloc_table(sgt, 1, GFP_KERNEL);
+       if (unlikely(ret))
+               return ret;
diff --git a/queue-4.9/arm-dts-am335x-evmsk-adjust-mmc2-param-to-allow-suspend.patch b/queue-4.9/arm-dts-am335x-evmsk-adjust-mmc2-param-to-allow-suspend.patch

new file mode 100644 (file)

index 0000000..96635b2
--- /dev/null
+++ b/queue-4.9/arm-dts-am335x-evmsk-adjust-mmc2-param-to-allow-suspend.patch
@@ -0,0 +1,31 @@
+From foo@baz Thu Dec 21 09:02:40 CET 2017
+From: "Reizer, Eyal" <eyalr@ti.com>
+Date: Sun, 26 Mar 2017 08:53:10 +0000
+Subject: ARM: dts: am335x-evmsk: adjust mmc2 param to allow suspend
+
+From: "Reizer, Eyal" <eyalr@ti.com>
+
+
+[ Upstream commit 9bcf53f34a2c1cebc45cc12e273dcd5f51fbc099 ]
+
+mmc2 used for wl12xx was missing the keep-power-in suspend
+parameter. As a result the board couldn't reach suspend state.
+
+Signed-off-by: Eyal Reizer <eyalr@ti.com>
+Signed-off-by: Tony Lindgren <tony@atomide.com>
+Signed-off-by: Sasha Levin <alexander.levin@verizon.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/arm/boot/dts/am335x-evmsk.dts |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/arch/arm/boot/dts/am335x-evmsk.dts
++++ b/arch/arm/boot/dts/am335x-evmsk.dts
+@@ -668,6 +668,7 @@
+       ti,non-removable;
+       bus-width = <4>;
+       cap-power-off-card;
++      keep-power-in-suspend;
+       pinctrl-names = "default";
+       pinctrl-0 = <&mmc2_pins>;
+ 
diff --git a/queue-4.9/arm-dts-ti-fix-pci-bus-dtc-warnings.patch b/queue-4.9/arm-dts-ti-fix-pci-bus-dtc-warnings.patch

new file mode 100644 (file)

index 0000000..3e121aa
--- /dev/null
+++ b/queue-4.9/arm-dts-ti-fix-pci-bus-dtc-warnings.patch
@@ -0,0 +1,41 @@
+From foo@baz Thu Dec 21 09:02:40 CET 2017
+From: Rob Herring <robh@kernel.org>
+Date: Tue, 21 Mar 2017 21:03:01 -0500
+Subject: ARM: dts: ti: fix PCI bus dtc warnings
+
+From: Rob Herring <robh@kernel.org>
+
+
+[ Upstream commit 7d79f6098d82f8c09914d7799bc96891ad9c3baf ]
+
+dtc recently added PCI bus checks. Fix these warnings.
+
+Signed-off-by: Rob Herring <robh@kernel.org>
+Cc: "Benoît Cousson" <bcousson@baylibre.com>
+Cc: Tony Lindgren <tony@atomide.com>
+Cc: linux-omap@vger.kernel.org
+Signed-off-by: Tony Lindgren <tony@atomide.com>
+Signed-off-by: Sasha Levin <alexander.levin@verizon.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/arm/boot/dts/dra7.dtsi |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/arch/arm/boot/dts/dra7.dtsi
++++ b/arch/arm/boot/dts/dra7.dtsi
+@@ -282,6 +282,7 @@
+                               device_type = "pci";
+                               ranges = <0x81000000 0 0          0x03000 0 0x00010000
+                                         0x82000000 0 0x20013000 0x13000 0 0xffed000>;
++                              bus-range = <0x00 0xff>;
+                               #interrupt-cells = <1>;
+                               num-lanes = <1>;
+                               linux,pci-domain = <0>;
+@@ -318,6 +319,7 @@
+                               device_type = "pci";
+                               ranges = <0x81000000 0 0          0x03000 0 0x00010000
+                                         0x82000000 0 0x30013000 0x13000 0 0xffed000>;
++                              bus-range = <0x00 0xff>;
+                               #interrupt-cells = <1>;
+                               num-lanes = <1>;
+                               linux,pci-domain = <1>;
diff --git a/queue-4.9/arm-kprobes-align-stack-to-8-bytes-in-test-code.patch b/queue-4.9/arm-kprobes-align-stack-to-8-bytes-in-test-code.patch

new file mode 100644 (file)

index 0000000..277f47e
--- /dev/null
+++ b/queue-4.9/arm-kprobes-align-stack-to-8-bytes-in-test-code.patch
@@ -0,0 +1,72 @@
+From foo@baz Thu Dec 21 09:02:40 CET 2017
+From: Jon Medhurst <tixy@linaro.org>
+Date: Thu, 2 Mar 2017 13:04:09 +0000
+Subject: arm: kprobes: Align stack to 8-bytes in test code
+
+From: Jon Medhurst <tixy@linaro.org>
+
+
+[ Upstream commit 974310d047f3c7788a51d10c8d255eebdb1fa857 ]
+
+kprobes test cases need to have a stack that is aligned to an 8-byte
+boundary because they call other functions (and the ARM ABI mandates
+that alignment) and because test cases include 64-bit accesses to the
+stack. Unfortunately, GCC doesn't ensure this alignment for inline
+assembler and for the code in question seems to always misalign it by
+pushing just the LR register onto the stack. We therefore need to
+explicitly perform stack alignment at the start of each test case.
+
+Without this fix, some test cases will generate alignment faults on
+systems where alignment is enforced. Even if the kernel is configured to
+handle these faults in software, triggering them is ugly. It also
+exposes limitations in the fault handling code which doesn't cope with
+writes to the stack. E.g. when handling this instruction
+
+   strd r6, [sp, #-64]!
+
+the fault handling code will write to a stack location below the SP
+value at the point the fault occurred, which coincides with where the
+exception handler has pushed the saved register context. This results in
+corruption of those registers.
+
+Signed-off-by: Jon Medhurst <tixy@linaro.org>
+Signed-off-by: Sasha Levin <alexander.levin@verizon.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/arm/probes/kprobes/test-core.c |   11 ++++++++---
+ 1 file changed, 8 insertions(+), 3 deletions(-)
+
+--- a/arch/arm/probes/kprobes/test-core.c
++++ b/arch/arm/probes/kprobes/test-core.c
+@@ -976,7 +976,10 @@ static void coverage_end(void)
+ void __naked __kprobes_test_case_start(void)
+ {
+       __asm__ __volatile__ (
+-              "stmdb  sp!, {r4-r11}                           \n\t"
++              "mov    r2, sp                                  \n\t"
++              "bic    r3, r2, #7                              \n\t"
++              "mov    sp, r3                                  \n\t"
++              "stmdb  sp!, {r2-r11}                           \n\t"
+               "sub    sp, sp, #"__stringify(TEST_MEMORY_SIZE)"\n\t"
+               "bic    r0, lr, #1  @ r0 = inline data          \n\t"
+               "mov    r1, sp                                  \n\t"
+@@ -996,7 +999,8 @@ void __naked __kprobes_test_case_end_32(
+               "movne  pc, r0                                  \n\t"
+               "mov    r0, r4                                  \n\t"
+               "add    sp, sp, #"__stringify(TEST_MEMORY_SIZE)"\n\t"
+-              "ldmia  sp!, {r4-r11}                           \n\t"
++              "ldmia  sp!, {r2-r11}                           \n\t"
++              "mov    sp, r2                                  \n\t"
+               "mov    pc, r0                                  \n\t"
+       );
+ }
+@@ -1012,7 +1016,8 @@ void __naked __kprobes_test_case_end_16(
+               "bxne   r0                                      \n\t"
+               "mov    r0, r4                                  \n\t"
+               "add    sp, sp, #"__stringify(TEST_MEMORY_SIZE)"\n\t"
+-              "ldmia  sp!, {r4-r11}                           \n\t"
++              "ldmia  sp!, {r2-r11}                           \n\t"
++              "mov    sp, r2                                  \n\t"
+               "bx     r0                                      \n\t"
+       );
+ }
diff --git a/queue-4.9/arm-kprobes-fix-the-return-address-of-multiple-kretprobes.patch b/queue-4.9/arm-kprobes-fix-the-return-address-of-multiple-kretprobes.patch

new file mode 100644 (file)

index 0000000..d33d548
--- /dev/null
+++ b/queue-4.9/arm-kprobes-fix-the-return-address-of-multiple-kretprobes.patch
@@ -0,0 +1,99 @@
+From foo@baz Thu Dec 21 09:02:40 CET 2017
+From: Masami Hiramatsu <mhiramat@kernel.org>
+Date: Tue, 14 Feb 2017 00:05:59 +0900
+Subject: arm: kprobes: Fix the return address of multiple kretprobes
+
+From: Masami Hiramatsu <mhiramat@kernel.org>
+
+
+[ Upstream commit 06553175f585b52509c7df37d6f4a50aacb7b211 ]
+
+This is arm port of commit 737480a0d525 ("kprobes/x86:
+Fix the return address of multiple kretprobes").
+
+Fix the return address of subsequent kretprobes when multiple
+kretprobes are set on the same function.
+
+For example:
+
+  # cd /sys/kernel/debug/tracing
+  # echo "r:event1 sys_symlink" > kprobe_events
+  # echo "r:event2 sys_symlink" >> kprobe_events
+  # echo 1 > events/kprobes/enable
+  # ln -s /tmp/foo /tmp/bar
+
+ (without this patch)
+
+  # cat trace | grep -v ^#
+              ln-82    [000] dn.2    68.446525: event1: (kretprobe_trampoline+0x0/0x18 <- SyS_symlink)
+              ln-82    [000] dn.2    68.447831: event2: (ret_fast_syscall+0x0/0x1c <- SyS_symlink)
+
+ (with this patch)
+
+  # cat trace | grep -v ^#
+              ln-81    [000] dn.1    39.463469: event1: (ret_fast_syscall+0x0/0x1c <- SyS_symlink)
+              ln-81    [000] dn.1    39.464701: event2: (ret_fast_syscall+0x0/0x1c <- SyS_symlink)
+
+Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: KUMANO Syuhei <kumano.prog@gmail.com>
+Signed-off-by: Jon Medhurst <tixy@linaro.org>
+Signed-off-by: Sasha Levin <alexander.levin@verizon.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/arm/probes/kprobes/core.c |   24 ++++++++++++++++++++++--
+ 1 file changed, 22 insertions(+), 2 deletions(-)
+
+--- a/arch/arm/probes/kprobes/core.c
++++ b/arch/arm/probes/kprobes/core.c
+@@ -433,6 +433,7 @@ static __used __kprobes void *trampoline
+       struct hlist_node *tmp;
+       unsigned long flags, orig_ret_address = 0;
+       unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline;
++      kprobe_opcode_t *correct_ret_addr = NULL;
+ 
+       INIT_HLIST_HEAD(&empty_rp);
+       kretprobe_hash_lock(current, &head, &flags);
+@@ -455,14 +456,34 @@ static __used __kprobes void *trampoline
+                       /* another task is sharing our hash bucket */
+                       continue;
+ 
++              orig_ret_address = (unsigned long)ri->ret_addr;
++
++              if (orig_ret_address != trampoline_address)
++                      /*
++                       * This is the real return address. Any other
++                       * instances associated with this task are for
++                       * other calls deeper on the call stack
++                       */
++                      break;
++      }
++
++      kretprobe_assert(ri, orig_ret_address, trampoline_address);
++
++      correct_ret_addr = ri->ret_addr;
++      hlist_for_each_entry_safe(ri, tmp, head, hlist) {
++              if (ri->task != current)
++                      /* another task is sharing our hash bucket */
++                      continue;
++
++              orig_ret_address = (unsigned long)ri->ret_addr;
+               if (ri->rp && ri->rp->handler) {
+                       __this_cpu_write(current_kprobe, &ri->rp->kp);
+                       get_kprobe_ctlblk()->kprobe_status = KPROBE_HIT_ACTIVE;
++                      ri->ret_addr = correct_ret_addr;
+                       ri->rp->handler(ri, regs);
+                       __this_cpu_write(current_kprobe, NULL);
+               }
+ 
+-              orig_ret_address = (unsigned long)ri->ret_addr;
+               recycle_rp_inst(ri, &empty_rp);
+ 
+               if (orig_ret_address != trampoline_address)
+@@ -474,7 +495,6 @@ static __used __kprobes void *trampoline
+                       break;
+       }
+ 
+-      kretprobe_assert(ri, orig_ret_address, trampoline_address);
+       kretprobe_hash_unlock(current, &flags);
+ 
+       hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) {
diff --git a/queue-4.9/asoc-img-parallel-out-add-pm_runtime_get-put-to-set_fmt-callback.patch b/queue-4.9/asoc-img-parallel-out-add-pm_runtime_get-put-to-set_fmt-callback.patch

new file mode 100644 (file)

index 0000000..80f9770
--- /dev/null
+++ b/queue-4.9/asoc-img-parallel-out-add-pm_runtime_get-put-to-set_fmt-callback.patch
@@ -0,0 +1,37 @@
+From foo@baz Thu Dec 21 09:02:40 CET 2017
+From: Ed Blake <ed.blake@sondrel.com>
+Date: Mon, 2 Oct 2017 11:00:33 +0100
+Subject: ASoC: img-parallel-out: Add pm_runtime_get/put to set_fmt callback
+
+From: Ed Blake <ed.blake@sondrel.com>
+
+
+[ Upstream commit c70458890ff15d858bd347fa9f563818bcd6e457 ]
+
+Add pm_runtime_get_sync and pm_runtime_put calls to set_fmt callback
+function. This fixes a bus error during boot when CONFIG_SUSPEND is
+defined when this function gets called while the device is runtime
+disabled and device registers are accessed while the clock is disabled.
+
+Signed-off-by: Ed Blake <ed.blake@sondrel.com>
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Signed-off-by: Sasha Levin <alexander.levin@verizon.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ sound/soc/img/img-parallel-out.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/sound/soc/img/img-parallel-out.c
++++ b/sound/soc/img/img-parallel-out.c
+@@ -166,9 +166,11 @@ static int img_prl_out_set_fmt(struct sn
+               return -EINVAL;
+       }
+ 
++      pm_runtime_get_sync(prl->dev);
+       reg = img_prl_out_readl(prl, IMG_PRL_OUT_CTL);
+       reg = (reg & ~IMG_PRL_OUT_CTL_EDGE_MASK) | control_set;
+       img_prl_out_writel(prl, reg, IMG_PRL_OUT_CTL);
++      pm_runtime_put(prl->dev);
+ 
+       return 0;
+ }
diff --git a/queue-4.9/asoc-sti-fix-reader-substream-pointer-set.patch b/queue-4.9/asoc-sti-fix-reader-substream-pointer-set.patch

new file mode 100644 (file)

index 0000000..cd697d9
--- /dev/null
+++ b/queue-4.9/asoc-sti-fix-reader-substream-pointer-set.patch
@@ -0,0 +1,40 @@
+From foo@baz Thu Dec 21 09:02:40 CET 2017
+From: Arnaud Pouliquen <arnaud.pouliquen@st.com>
+Date: Thu, 23 Mar 2017 19:39:54 +0100
+Subject: ASoC: STI: Fix reader substream pointer set
+
+From: Arnaud Pouliquen <arnaud.pouliquen@st.com>
+
+
+[ Upstream commit 3c9d3f1bc2defd418b5933bbc928096c9c686d3b ]
+
+reader->substream is used in IRQ handler for error case but is never set.
+Set value to pcm substream on DAI startup and clean it on dai shutdown.
+
+Signed-off-by: Arnaud Pouliquen <arnaud.pouliquen@st.com>
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Signed-off-by: Sasha Levin <alexander.levin@verizon.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ sound/soc/sti/uniperif_reader.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/sound/soc/sti/uniperif_reader.c
++++ b/sound/soc/sti/uniperif_reader.c
+@@ -364,6 +364,8 @@ static int uni_reader_startup(struct snd
+       struct uniperif *reader = priv->dai_data.uni;
+       int ret;
+ 
++      reader->substream = substream;
++
+       if (!UNIPERIF_TYPE_IS_TDM(reader))
+               return 0;
+ 
+@@ -393,6 +395,7 @@ static void uni_reader_shutdown(struct s
+               /* Stop the reader */
+               uni_reader_stop(reader);
+       }
++      reader->substream = NULL;
+ }
+ 
+ static const struct snd_soc_dai_ops uni_reader_dai_ops = {
diff --git a/queue-4.9/backlight-pwm_bl-fix-overflow-condition.patch b/queue-4.9/backlight-pwm_bl-fix-overflow-condition.patch

new file mode 100644 (file)

index 0000000..228eb5c
--- /dev/null
+++ b/queue-4.9/backlight-pwm_bl-fix-overflow-condition.patch
@@ -0,0 +1,46 @@
+From foo@baz Thu Dec 21 09:02:40 CET 2017
+From: Derek Basehore <dbasehore@chromium.org>
+Date: Tue, 29 Aug 2017 13:34:34 -0700
+Subject: backlight: pwm_bl: Fix overflow condition
+
+From: Derek Basehore <dbasehore@chromium.org>
+
+
+[ Upstream commit 5d0c49acebc9488e37db95f1d4a55644e545ffe7 ]
+
+This fixes an overflow condition that can happen with high max
+brightness and period values in compute_duty_cycle. This fixes it by
+using a 64 bit variable for computing the duty cycle.
+
+Signed-off-by: Derek Basehore <dbasehore@chromium.org>
+Acked-by: Thierry Reding <thierry.reding@gmail.com>
+Reviewed-by: Brian Norris <briannorris@chromium.org>
+Signed-off-by: Lee Jones <lee.jones@linaro.org>
+Signed-off-by: Sasha Levin <alexander.levin@verizon.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/video/backlight/pwm_bl.c |    7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+--- a/drivers/video/backlight/pwm_bl.c
++++ b/drivers/video/backlight/pwm_bl.c
+@@ -79,14 +79,17 @@ static void pwm_backlight_power_off(stru
+ static int compute_duty_cycle(struct pwm_bl_data *pb, int brightness)
+ {
+       unsigned int lth = pb->lth_brightness;
+-      int duty_cycle;
++      u64 duty_cycle;
+ 
+       if (pb->levels)
+               duty_cycle = pb->levels[brightness];
+       else
+               duty_cycle = brightness;
+ 
+-      return (duty_cycle * (pb->period - lth) / pb->scale) + lth;
++      duty_cycle *= pb->period - lth;
++      do_div(duty_cycle, pb->scale);
++
++      return duty_cycle + lth;
+ }
+ 
+ static int pwm_backlight_update_status(struct backlight_device *bl)
diff --git a/queue-4.9/bna-avoid-writing-uninitialized-data-into-hw-registers.patch b/queue-4.9/bna-avoid-writing-uninitialized-data-into-hw-registers.patch

new file mode 100644 (file)

index 0000000..ae9d20c
--- /dev/null
+++ b/queue-4.9/bna-avoid-writing-uninitialized-data-into-hw-registers.patch
@@ -0,0 +1,69 @@
+From foo@baz Thu Dec 21 09:02:40 CET 2017
+From: Arnd Bergmann <arnd@arndb.de>
+Date: Thu, 23 Mar 2017 17:07:26 +0100
+Subject: bna: avoid writing uninitialized data into hw registers
+
+From: Arnd Bergmann <arnd@arndb.de>
+
+
+[ Upstream commit a5af83925363eb85d467933e3d6ec5a87001eb7c ]
+
+The latest gcc-7 snapshot warns about bfa_ioc_send_enable/bfa_ioc_send_disable
+writing undefined values into the hardware registers:
+
+drivers/net/ethernet/brocade/bna/bfa_ioc.c: In function 'bfa_iocpf_sm_disabling_entry':
+arch/arm/include/asm/io.h:109:22: error: '*((void *)&disable_req+4)' is used uninitialized in this function [-Werror=uninitialized]
+arch/arm/include/asm/io.h:109:22: error: '*((void *)&disable_req+8)' is used uninitialized in this function [-Werror=uninitialized]
+
+The two functions look like they should do the same thing, but only one
+of them initializes the time stamp and clscode field. The fact that we
+only get a warning for one of the two functions seems to be arbitrary,
+based on the inlining decisions in the compiler.
+
+To address this, I'm making both functions do the same thing:
+
+- set the clscode from the ioc structure in both
+- set the time stamp from ktime_get_real_seconds (which also
+  avoids the signed-integer overflow in 2038 and extends the
+  well-defined behavior until 2106).
+- zero-fill the reserved field
+
+Fixes: 8b230ed8ec96 ("bna: Brocade 10Gb Ethernet device driver")
+Signed-off-by: Arnd Bergmann <arnd@arndb.de>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <alexander.levin@verizon.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/brocade/bna/bfa_ioc.c |   10 +++++++---
+ 1 file changed, 7 insertions(+), 3 deletions(-)
+
+--- a/drivers/net/ethernet/brocade/bna/bfa_ioc.c
++++ b/drivers/net/ethernet/brocade/bna/bfa_ioc.c
+@@ -1930,13 +1930,13 @@ static void
+ bfa_ioc_send_enable(struct bfa_ioc *ioc)
+ {
+       struct bfi_ioc_ctrl_req enable_req;
+-      struct timeval tv;
+ 
+       bfi_h2i_set(enable_req.mh, BFI_MC_IOC, BFI_IOC_H2I_ENABLE_REQ,
+                   bfa_ioc_portid(ioc));
+       enable_req.clscode = htons(ioc->clscode);
+-      do_gettimeofday(&tv);
+-      enable_req.tv_sec = ntohl(tv.tv_sec);
++      enable_req.rsvd = htons(0);
++      /* overflow in 2106 */
++      enable_req.tv_sec = ntohl(ktime_get_real_seconds());
+       bfa_ioc_mbox_send(ioc, &enable_req, sizeof(struct bfi_ioc_ctrl_req));
+ }
+ 
+@@ -1947,6 +1947,10 @@ bfa_ioc_send_disable(struct bfa_ioc *ioc
+ 
+       bfi_h2i_set(disable_req.mh, BFI_MC_IOC, BFI_IOC_H2I_DISABLE_REQ,
+                   bfa_ioc_portid(ioc));
++      disable_req.clscode = htons(ioc->clscode);
++      disable_req.rsvd = htons(0);
++      /* overflow in 2106 */
++      disable_req.tv_sec = ntohl(ktime_get_real_seconds());
+       bfa_ioc_mbox_send(ioc, &disable_req, sizeof(struct bfi_ioc_ctrl_req));
+ }
+ 
diff --git a/queue-4.9/bna-integer-overflow-bug-in-debugfs.patch b/queue-4.9/bna-integer-overflow-bug-in-debugfs.patch

new file mode 100644 (file)

index 0000000..d62c602
--- /dev/null
+++ b/queue-4.9/bna-integer-overflow-bug-in-debugfs.patch
@@ -0,0 +1,38 @@
+From foo@baz Thu Dec 21 09:02:40 CET 2017
+From: Dan Carpenter <dan.carpenter@oracle.com>
+Date: Fri, 17 Mar 2017 23:52:35 +0300
+Subject: bna: integer overflow bug in debugfs
+
+From: Dan Carpenter <dan.carpenter@oracle.com>
+
+
+[ Upstream commit 13e2d5187f6b965ba3556caedb914baf81b98ed2 ]
+
+We could allocate less memory than intended because we do:
+
+       bnad->regdata = kzalloc(len << 2, GFP_KERNEL);
+
+The shift can overflow leading to a crash.  This is debugfs code so the
+impact is very small.
+
+Fixes: 7afc5dbde091 ("bna: Add debugfs interface.")
+Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
+Acked-by: Rasesh Mody <rasesh.mody@cavium.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <alexander.levin@verizon.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/brocade/bna/bnad_debugfs.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/brocade/bna/bnad_debugfs.c
++++ b/drivers/net/ethernet/brocade/bna/bnad_debugfs.c
+@@ -325,7 +325,7 @@ bnad_debugfs_write_regrd(struct file *fi
+               return PTR_ERR(kern_buf);
+ 
+       rc = sscanf(kern_buf, "%x:%x", &addr, &len);
+-      if (rc < 2) {
++      if (rc < 2 || len > UINT_MAX >> 2) {
+               netdev_warn(bnad->netdev, "failed to read user buffer\n");
+               kfree(kern_buf);
+               return -EINVAL;
diff --git a/queue-4.9/bnxt_en-fix-null-pointer-dereference-in-reopen-failure-path.patch b/queue-4.9/bnxt_en-fix-null-pointer-dereference-in-reopen-failure-path.patch

new file mode 100644 (file)

index 0000000..6b69c2e
--- /dev/null
+++ b/queue-4.9/bnxt_en-fix-null-pointer-dereference-in-reopen-failure-path.patch
@@ -0,0 +1,62 @@
+From foo@baz Thu Dec 21 09:02:40 CET 2017
+From: Sankar Patchineelam <sankar.patchineelam@broadcom.com>
+Date: Tue, 28 Mar 2017 19:47:29 -0400
+Subject: bnxt_en: Fix NULL pointer dereference in reopen failure path
+
+From: Sankar Patchineelam <sankar.patchineelam@broadcom.com>
+
+
+[ Upstream commit 2247925f0942dc4e7c09b1cde45ca18461d94c5f ]
+
+Net device reset can fail when the h/w or f/w is in a bad state.
+Subsequent netdevice open fails in bnxt_hwrm_stat_ctx_alloc().
+The cleanup invokes bnxt_hwrm_resource_free() which inturn
+calls bnxt_disable_int().  In this routine, the code segment
+
+if (ring->fw_ring_id != INVALID_HW_RING_ID)
+   BNXT_CP_DB(cpr->cp_doorbell, cpr->cp_raw_cons);
+
+results in NULL pointer dereference as cpr->cp_doorbell is not yet
+initialized, and fw_ring_id is zero.
+
+The fix is to initialize cpr fw_ring_id to INVALID_HW_RING_ID before
+bnxt_init_chip() is invoked.
+
+Signed-off-by: Sankar Patchineelam <sankar.patchineelam@broadcom.com>
+Signed-off-by: Michael Chan <michael.chan@broadcom.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <alexander.levin@verizon.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/broadcom/bnxt/bnxt.c |   13 +++++++++++++
+ 1 file changed, 13 insertions(+)
+
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+@@ -2381,6 +2381,18 @@ static int bnxt_init_one_rx_ring(struct
+       return 0;
+ }
+ 
++static void bnxt_init_cp_rings(struct bnxt *bp)
++{
++      int i;
++
++      for (i = 0; i < bp->cp_nr_rings; i++) {
++              struct bnxt_cp_ring_info *cpr = &bp->bnapi[i]->cp_ring;
++              struct bnxt_ring_struct *ring = &cpr->cp_ring_struct;
++
++              ring->fw_ring_id = INVALID_HW_RING_ID;
++      }
++}
++
+ static int bnxt_init_rx_rings(struct bnxt *bp)
+ {
+       int i, rc = 0;
+@@ -4700,6 +4712,7 @@ static int bnxt_shutdown_nic(struct bnxt
+ 
+ static int bnxt_init_nic(struct bnxt *bp, bool irq_re_init)
+ {
++      bnxt_init_cp_rings(bp);
+       bnxt_init_rx_rings(bp);
+       bnxt_init_tx_rings(bp);
+       bnxt_init_ring_grps(bp, irq_re_init);
diff --git a/queue-4.9/btrfs-fix-an-integer-overflow-check.patch b/queue-4.9/btrfs-fix-an-integer-overflow-check.patch

new file mode 100644 (file)

index 0000000..dd796b5
--- /dev/null
+++ b/queue-4.9/btrfs-fix-an-integer-overflow-check.patch
@@ -0,0 +1,52 @@
+From foo@baz Thu Dec 21 09:02:40 CET 2017
+From: Dan Carpenter <dan.carpenter@oracle.com>
+Date: Fri, 17 Mar 2017 23:51:20 +0300
+Subject: Btrfs: fix an integer overflow check
+
+From: Dan Carpenter <dan.carpenter@oracle.com>
+
+
+[ Upstream commit 457ae7268b29c33dee1c0feb143a15f6029d177b ]
+
+This isn't super serious because you need CAP_ADMIN to run this code.
+
+I added this integer overflow check last year but apparently I am
+rubbish at writing integer overflow checks...  There are two issues.
+First, access_ok() works on unsigned long type and not u64 so on 32 bit
+systems the access_ok() could be checking a truncated size.  The other
+issue is that we should be using a stricter limit so we don't overflow
+the kzalloc() setting ctx->clone_roots later in the function after the
+access_ok():
+
+       alloc_size = sizeof(struct clone_root) * (arg->clone_sources_count + 1);
+       sctx->clone_roots = kzalloc(alloc_size, GFP_KERNEL | __GFP_NOWARN);
+
+Fixes: f5ecec3ce21f ("btrfs: send: silence an integer overflow warning")
+Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+[ added comment ]
+Signed-off-by: David Sterba <dsterba@suse.com>
+
+Signed-off-by: Sasha Levin <alexander.levin@verizon.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/send.c |    7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+--- a/fs/btrfs/send.c
++++ b/fs/btrfs/send.c
+@@ -6196,8 +6196,13 @@ long btrfs_ioctl_send(struct file *mnt_f
+               goto out;
+       }
+ 
++      /*
++       * Check that we don't overflow at later allocations, we request
++       * clone_sources_count + 1 items, and compare to unsigned long inside
++       * access_ok.
++       */
+       if (arg->clone_sources_count >
+-          ULLONG_MAX / sizeof(*arg->clone_sources)) {
++          ULONG_MAX / sizeof(struct clone_root) - 1) {
+               ret = -EINVAL;
+               goto out;
+       }
diff --git a/queue-4.9/clk-sunxi-ng-sun6i-rename-hdmi-ddc-clock-to-avoid-name-collision.patch b/queue-4.9/clk-sunxi-ng-sun6i-rename-hdmi-ddc-clock-to-avoid-name-collision.patch

new file mode 100644 (file)

index 0000000..23f961a
--- /dev/null
+++ b/queue-4.9/clk-sunxi-ng-sun6i-rename-hdmi-ddc-clock-to-avoid-name-collision.patch
@@ -0,0 +1,36 @@
+From foo@baz Thu Dec 21 09:02:40 CET 2017
+From: Chen-Yu Tsai <wens@csie.org>
+Date: Fri, 29 Sep 2017 16:22:54 +0800
+Subject: clk: sunxi-ng: sun6i: Rename HDMI DDC clock to avoid name collision
+
+From: Chen-Yu Tsai <wens@csie.org>
+
+
+[ Upstream commit 7f3ed79188f2f094d0ee366fa858857fb7f511ba ]
+
+The HDMI DDC clock found in the CCU is the parent of the actual DDC
+clock within the HDMI controller. That clock is also named "hdmi-ddc".
+
+Rename the one in the CCU to "ddc". This makes more sense than renaming
+the one in the HDMI controller to something else.
+
+Fixes: c6e6c96d8fa6 ("clk: sunxi-ng: Add A31/A31s clocks")
+Signed-off-by: Chen-Yu Tsai <wens@csie.org>
+Signed-off-by: Maxime Ripard <maxime.ripard@free-electrons.com>
+Signed-off-by: Sasha Levin <alexander.levin@verizon.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/clk/sunxi-ng/ccu-sun6i-a31.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/clk/sunxi-ng/ccu-sun6i-a31.c
++++ b/drivers/clk/sunxi-ng/ccu-sun6i-a31.c
+@@ -608,7 +608,7 @@ static SUNXI_CCU_M_WITH_MUX_GATE(hdmi_cl
+                                0x150, 0, 4, 24, 2, BIT(31),
+                                CLK_SET_RATE_PARENT);
+ 
+-static SUNXI_CCU_GATE(hdmi_ddc_clk, "hdmi-ddc", "osc24M", 0x150, BIT(30), 0);
++static SUNXI_CCU_GATE(hdmi_ddc_clk, "ddc", "osc24M", 0x150, BIT(30), 0);
+ 
+ static SUNXI_CCU_GATE(ps_clk, "ps", "lcd1-ch1", 0x140, BIT(31), 0);
+ 
diff --git a/queue-4.9/cpufreq-fix-creation-of-symbolic-links-to-policy-directories.patch b/queue-4.9/cpufreq-fix-creation-of-symbolic-links-to-policy-directories.patch

new file mode 100644 (file)

index 0000000..9c61c4a
--- /dev/null
+++ b/queue-4.9/cpufreq-fix-creation-of-symbolic-links-to-policy-directories.patch
@@ -0,0 +1,115 @@
+From foo@baz Thu Dec 21 09:02:40 CET 2017
+From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
+Date: Mon, 27 Mar 2017 19:33:09 +0200
+Subject: cpufreq: Fix creation of symbolic links to policy directories
+
+From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
+
+
+[ Upstream commit 2f0ba790df51721794c11abc7a076d407392f648 ]
+
+The cpufreq core only tries to create symbolic links from CPU
+directories in sysfs to policy directories in cpufreq_add_dev(),
+either when a given CPU is registered or when the cpufreq driver
+is registered, whichever happens first.  That is not sufficient,
+however, because cpufreq_add_dev() may be called for an offline CPU
+whose policy object has not been created yet and, quite obviously,
+the symbolic cannot be added in that case.
+
+Fix that by making cpufreq_online() attempt to add symbolic links to
+policy objects for the CPUs in the related_cpus mask of every new
+policy object created by it.
+
+The cpufreq_driver_lock locking around the for_each_cpu() loop
+in cpufreq_online() is dropped, because it is not necessary and the
+code is somewhat simpler without it.  Moreover, failures to create
+a symbolic link will not be regarded as hard errors any more and
+the CPUs without those links will not be taken offline automatically,
+but that should not be problematic in practice.
+
+Reported-and-tested-by: Prashanth Prakash <pprakash@codeaurora.org>
+Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+Cc: 4.9+ <stable@vger.kernel.org> # 4.9+
+Signed-off-by: Sasha Levin <alexander.levin@verizon.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/cpufreq/cpufreq.c |   38 +++++++++++++++++++++-----------------
+ 1 file changed, 21 insertions(+), 17 deletions(-)
+
+--- a/drivers/cpufreq/cpufreq.c
++++ b/drivers/cpufreq/cpufreq.c
+@@ -918,11 +918,19 @@ static struct kobj_type ktype_cpufreq =
+       .release        = cpufreq_sysfs_release,
+ };
+ 
+-static int add_cpu_dev_symlink(struct cpufreq_policy *policy,
+-                             struct device *dev)
++static void add_cpu_dev_symlink(struct cpufreq_policy *policy, unsigned int cpu)
+ {
++      struct device *dev = get_cpu_device(cpu);
++
++      if (!dev)
++              return;
++
++      if (cpumask_test_and_set_cpu(cpu, policy->real_cpus))
++              return;
++
+       dev_dbg(dev, "%s: Adding symlink\n", __func__);
+-      return sysfs_create_link(&dev->kobj, &policy->kobj, "cpufreq");
++      if (sysfs_create_link(&dev->kobj, &policy->kobj, "cpufreq"))
++              dev_err(dev, "cpufreq symlink creation failed\n");
+ }
+ 
+ static void remove_cpu_dev_symlink(struct cpufreq_policy *policy,
+@@ -1184,10 +1192,10 @@ static int cpufreq_online(unsigned int c
+               policy->user_policy.min = policy->min;
+               policy->user_policy.max = policy->max;
+ 
+-              write_lock_irqsave(&cpufreq_driver_lock, flags);
+-              for_each_cpu(j, policy->related_cpus)
++              for_each_cpu(j, policy->related_cpus) {
+                       per_cpu(cpufreq_cpu_data, j) = policy;
+-              write_unlock_irqrestore(&cpufreq_driver_lock, flags);
++                      add_cpu_dev_symlink(policy, j);
++              }
+       } else {
+               policy->min = policy->user_policy.min;
+               policy->max = policy->user_policy.max;
+@@ -1284,13 +1292,15 @@ out_exit_policy:
+ 
+       if (cpufreq_driver->exit)
+               cpufreq_driver->exit(policy);
++
++      for_each_cpu(j, policy->real_cpus)
++              remove_cpu_dev_symlink(policy, get_cpu_device(j));
++
+ out_free_policy:
+       cpufreq_policy_free(policy, !new_policy);
+       return ret;
+ }
+ 
+-static int cpufreq_offline(unsigned int cpu);
+-
+ /**
+  * cpufreq_add_dev - the cpufreq interface for a CPU device.
+  * @dev: CPU device.
+@@ -1312,16 +1322,10 @@ static int cpufreq_add_dev(struct device
+ 
+       /* Create sysfs link on CPU registration */
+       policy = per_cpu(cpufreq_cpu_data, cpu);
+-      if (!policy || cpumask_test_and_set_cpu(cpu, policy->real_cpus))
+-              return 0;
++      if (policy)
++              add_cpu_dev_symlink(policy, cpu);
+ 
+-      ret = add_cpu_dev_symlink(policy, dev);
+-      if (ret) {
+-              cpumask_clear_cpu(cpu, policy->real_cpus);
+-              cpufreq_offline(cpu);
+-      }
+-
+-      return ret;
++      return 0;
+ }
+ 
+ static int cpufreq_offline(unsigned int cpu)
diff --git a/queue-4.9/cpuidle-fix-broadcast-control-when-broadcast-can-not-be-entered.patch b/queue-4.9/cpuidle-fix-broadcast-control-when-broadcast-can-not-be-entered.patch

new file mode 100644 (file)

index 0000000..6777cba
--- /dev/null
+++ b/queue-4.9/cpuidle-fix-broadcast-control-when-broadcast-can-not-be-entered.patch
@@ -0,0 +1,39 @@
+From foo@baz Thu Dec 21 09:02:40 CET 2017
+From: Nicholas Piggin <npiggin@gmail.com>
+Date: Fri, 1 Sep 2017 14:29:56 +1000
+Subject: cpuidle: fix broadcast control when broadcast can not be entered
+
+From: Nicholas Piggin <npiggin@gmail.com>
+
+
+[ Upstream commit f187851b9b4a76952b1158b86434563dd2031103 ]
+
+When failing to enter broadcast timer mode for an idle state that
+requires it, a new state is selected that does not require broadcast,
+but the broadcast variable remains set. This causes
+tick_broadcast_exit to be called despite not having entered broadcast
+mode.
+
+This causes the WARN_ON_ONCE(!irqs_disabled()) to trigger in some
+cases. It does not appear to cause problems for code today, but seems
+to violate the interface so should be fixed.
+
+Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
+Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+Signed-off-by: Sasha Levin <alexander.levin@verizon.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/cpuidle/cpuidle.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/cpuidle/cpuidle.c
++++ b/drivers/cpuidle/cpuidle.c
+@@ -189,6 +189,7 @@ int cpuidle_enter_state(struct cpuidle_d
+                       return -EBUSY;
+               }
+               target_state = &drv->states[index];
++              broadcast = false;
+       }
+ 
+       /* Take note of the planned idle state. */
diff --git a/queue-4.9/cpuidle-powernv-pass-correct-drv-cpumask-for-registration.patch b/queue-4.9/cpuidle-powernv-pass-correct-drv-cpumask-for-registration.patch

new file mode 100644 (file)

index 0000000..57379af
--- /dev/null
+++ b/queue-4.9/cpuidle-powernv-pass-correct-drv-cpumask-for-registration.patch
@@ -0,0 +1,94 @@
+From foo@baz Thu Dec 21 09:02:40 CET 2017
+From: Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com>
+Date: Thu, 23 Mar 2017 20:52:46 +0530
+Subject: cpuidle: powernv: Pass correct drv->cpumask for registration
+
+From: Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com>
+
+
+[ Upstream commit 293d264f13cbde328d5477f49e3103edbc1dc191 ]
+
+drv->cpumask defaults to cpu_possible_mask in __cpuidle_driver_init().
+On PowerNV platform cpu_present could be less than cpu_possible in cases
+where firmware detects the cpu, but it is not available to the OS.  When
+CONFIG_HOTPLUG_CPU=n, such cpus are not hotplugable at runtime and hence
+we skip creating cpu_device.
+
+This breaks cpuidle on powernv where register_cpu() is not called for
+cpus in cpu_possible_mask that cannot be hot-added at runtime.
+
+Trying cpuidle_register_device() on cpu without cpu_device will cause
+crash like this:
+
+cpu 0xf: Vector: 380 (Data SLB Access) at [c000000ff1503490]
+    pc: c00000000022c8bc: string+0x34/0x60
+    lr: c00000000022ed78: vsnprintf+0x284/0x42c
+    sp: c000000ff1503710
+   msr: 9000000000009033
+   dar: 6000000060000000
+  current = 0xc000000ff1480000
+  paca    = 0xc00000000fe82d00   softe: 0        irq_happened: 0x01
+    pid   = 1, comm = swapper/8
+Linux version 4.11.0-rc2 (sv@sagarika) (gcc version 4.9.4
+(Buildroot 2017.02-00004-gc28573e) ) #15 SMP Fri Mar 17 19:32:02 IST 2017
+enter ? for help
+[link register   ] c00000000022ed78 vsnprintf+0x284/0x42c
+[c000000ff1503710] c00000000022ebb8 vsnprintf+0xc4/0x42c (unreliable)
+[c000000ff1503800] c00000000022ef40 vscnprintf+0x20/0x44
+[c000000ff1503830] c0000000000ab61c vprintk_emit+0x94/0x2cc
+[c000000ff15038a0] c0000000000acc9c vprintk_func+0x60/0x74
+[c000000ff15038c0] c000000000619694 printk+0x38/0x4c
+[c000000ff15038e0] c000000000224950 kobject_get+0x40/0x60
+[c000000ff1503950] c00000000022507c kobject_add_internal+0x60/0x2c4
+[c000000ff15039e0] c000000000225350 kobject_init_and_add+0x70/0x78
+[c000000ff1503a60] c00000000053c288 cpuidle_add_sysfs+0x9c/0xe0
+[c000000ff1503ae0] c00000000053aeac cpuidle_register_device+0xd4/0x12c
+[c000000ff1503b30] c00000000053b108 cpuidle_register+0x98/0xcc
+[c000000ff1503bc0] c00000000085eaf0 powernv_processor_idle_init+0x140/0x1e0
+[c000000ff1503c60] c00000000000cd60 do_one_initcall+0xc0/0x15c
+[c000000ff1503d20] c000000000833e84 kernel_init_freeable+0x1a0/0x25c
+[c000000ff1503dc0] c00000000000d478 kernel_init+0x24/0x12c
+[c000000ff1503e30] c00000000000b564 ret_from_kernel_thread+0x5c/0x78
+
+This patch fixes the bug by passing correct cpumask from
+powernv-cpuidle driver.
+
+Signed-off-by: Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com>
+Reviewed-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com>
+Acked-by: Michael Ellerman <mpe@ellerman.id.au>
+[ rjw: Comment massage ]
+Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+
+Signed-off-by: Sasha Levin <alexander.levin@verizon.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/cpuidle/cpuidle-powernv.c |   18 ++++++++++++++++++
+ 1 file changed, 18 insertions(+)
+
+--- a/drivers/cpuidle/cpuidle-powernv.c
++++ b/drivers/cpuidle/cpuidle-powernv.c
+@@ -164,6 +164,24 @@ static int powernv_cpuidle_driver_init(v
+               drv->state_count += 1;
+       }
+ 
++      /*
++       * On the PowerNV platform cpu_present may be less than cpu_possible in
++       * cases when firmware detects the CPU, but it is not available to the
++       * OS.  If CONFIG_HOTPLUG_CPU=n, then such CPUs are not hotplugable at
++       * run time and hence cpu_devices are not created for those CPUs by the
++       * generic topology_init().
++       *
++       * drv->cpumask defaults to cpu_possible_mask in
++       * __cpuidle_driver_init().  This breaks cpuidle on PowerNV where
++       * cpu_devices are not created for CPUs in cpu_possible_mask that
++       * cannot be hot-added later at run time.
++       *
++       * Trying cpuidle_register_device() on a CPU without a cpu_device is
++       * incorrect, so pass a correct CPU mask to the generic cpuidle driver.
++       */
++
++      drv->cpumask = (struct cpumask *)cpu_present_mask;
++
+       return 0;
+ }
+ 
diff --git a/queue-4.9/cpuidle-validate-cpu_dev-in-cpuidle_add_sysfs.patch b/queue-4.9/cpuidle-validate-cpu_dev-in-cpuidle_add_sysfs.patch

new file mode 100644 (file)

index 0000000..2fbdffe
--- /dev/null
+++ b/queue-4.9/cpuidle-validate-cpu_dev-in-cpuidle_add_sysfs.patch
@@ -0,0 +1,79 @@
+From foo@baz Thu Dec 21 09:02:40 CET 2017
+From: Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com>
+Date: Sun, 19 Mar 2017 00:51:59 +0530
+Subject: cpuidle: Validate cpu_dev in cpuidle_add_sysfs()
+
+From: Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com>
+
+
+[ Upstream commit ad0a45fd9c14feebd000b6e84189d0edff265170 ]
+
+If a given cpu is not in cpu_present and cpu hotplug
+is disabled, arch can skip setting up the cpu_dev.
+
+Arch cpuidle driver should pass correct cpu mask
+for registration, but failing to do so by the driver
+causes error to propagate and crash like this:
+
+[   30.076045] Unable to handle kernel paging request for data at address 0x00000048
+[   30.076100] Faulting instruction address: 0xc0000000007b2f30
+cpu 0x4d: Vector: 300 (Data Access) at [c000003feb18b670]
+    pc: c0000000007b2f30: kobject_get+0x20/0x70
+    lr: c0000000007b3c94: kobject_add_internal+0x54/0x3f0
+    sp: c000003feb18b8f0
+   msr: 9000000000009033
+   dar: 48
+ dsisr: 40000000
+  current = 0xc000003fd2ed8300
+  paca    = 0xc00000000fbab500   softe: 0        irq_happened: 0x01
+    pid   = 1, comm = swapper/0
+Linux version 4.11.0-rc2-svaidy+ (sv@sagarika) (gcc version 6.2.0
+20161005 (Ubuntu 6.2.0-5ubuntu12) ) #10 SMP Sun Mar 19 00:08:09 IST 2017
+enter ? for help
+[c000003feb18b960] c0000000007b3c94 kobject_add_internal+0x54/0x3f0
+[c000003feb18b9f0] c0000000007b43a4 kobject_init_and_add+0x64/0xa0
+[c000003feb18ba70] c000000000e284f4 cpuidle_add_sysfs+0xb4/0x130
+[c000003feb18baf0] c000000000e26038 cpuidle_register_device+0x118/0x1c0
+[c000003feb18bb30] c000000000e26c48 cpuidle_register+0x78/0x120
+[c000003feb18bbc0] c00000000168fd9c powernv_processor_idle_init+0x110/0x1c4
+[c000003feb18bc40] c00000000000cff8 do_one_initcall+0x68/0x1d0
+[c000003feb18bd00] c0000000016242f4 kernel_init_freeable+0x280/0x360
+[c000003feb18bdc0] c00000000000d864 kernel_init+0x24/0x160
+[c000003feb18be30] c00000000000b4e8 ret_from_kernel_thread+0x5c/0x74
+
+Validating cpu_dev fixes the crash and reports correct error message like:
+
+[   30.163506] Failed to register cpuidle device for cpu136
+[   30.173329] Registration of powernv driver failed.
+
+Signed-off-by: Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com>
+[ rjw: Comment massage ]
+Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+
+Signed-off-by: Sasha Levin <alexander.levin@verizon.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/cpuidle/sysfs.c |   12 ++++++++++++
+ 1 file changed, 12 insertions(+)
+
+--- a/drivers/cpuidle/sysfs.c
++++ b/drivers/cpuidle/sysfs.c
+@@ -613,6 +613,18 @@ int cpuidle_add_sysfs(struct cpuidle_dev
+       struct device *cpu_dev = get_cpu_device((unsigned long)dev->cpu);
+       int error;
+ 
++      /*
++       * Return if cpu_device is not setup for this CPU.
++       *
++       * This could happen if the arch did not set up cpu_device
++       * since this CPU is not in cpu_present mask and the
++       * driver did not send a correct CPU mask during registration.
++       * Without this check we would end up passing bogus
++       * value for &cpu_dev->kobj in kobject_init_and_add()
++       */
++      if (!cpu_dev)
++              return -ENODEV;
++
+       kdev = kzalloc(sizeof(*kdev), GFP_KERNEL);
+       if (!kdev)
+               return -ENOMEM;
diff --git a/queue-4.9/crypto-crypto4xx-increase-context-and-scatter-ring-buffer-elements.patch b/queue-4.9/crypto-crypto4xx-increase-context-and-scatter-ring-buffer-elements.patch

new file mode 100644 (file)

index 0000000..37bf81d
--- /dev/null
+++ b/queue-4.9/crypto-crypto4xx-increase-context-and-scatter-ring-buffer-elements.patch
@@ -0,0 +1,57 @@
+From foo@baz Thu Dec 21 09:02:40 CET 2017
+From: Christian Lamparter <chunkeey@gmail.com>
+Date: Wed, 4 Oct 2017 01:00:08 +0200
+Subject: crypto: crypto4xx - increase context and scatter ring buffer elements
+
+From: Christian Lamparter <chunkeey@gmail.com>
+
+
+[ Upstream commit 778f81d6cdb7d25360f082ac0384d5103f04eca5 ]
+
+If crypto4xx is used in conjunction with dm-crypt, the available
+ring buffer elements are not enough to handle the load properly.
+
+On an aes-cbc-essiv:sha256 encrypted swap partition the read
+performance is abyssal: (tested with hdparm -t)
+
+/dev/mapper/swap_crypt:
+ Timing buffered disk reads:  14 MB in  3.68 seconds =   3.81 MB/sec
+
+The patch increases both PPC4XX_NUM_SD and PPC4XX_NUM_PD to 256.
+This improves the performance considerably:
+
+/dev/mapper/swap_crypt:
+ Timing buffered disk reads: 104 MB in  3.03 seconds =  34.31 MB/sec
+
+Furthermore, PPC4XX_LAST_SD, PPC4XX_LAST_GD and PPC4XX_LAST_PD
+can be easily calculated from their respective PPC4XX_NUM_*
+constant.
+
+Signed-off-by: Christian Lamparter <chunkeey@gmail.com>
+Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
+Signed-off-by: Sasha Levin <alexander.levin@verizon.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/crypto/amcc/crypto4xx_core.h |   10 +++++-----
+ 1 file changed, 5 insertions(+), 5 deletions(-)
+
+--- a/drivers/crypto/amcc/crypto4xx_core.h
++++ b/drivers/crypto/amcc/crypto4xx_core.h
+@@ -34,12 +34,12 @@
+ #define PPC405EX_CE_RESET                       0x00000008
+ 
+ #define CRYPTO4XX_CRYPTO_PRIORITY             300
+-#define PPC4XX_LAST_PD                                63
+-#define PPC4XX_NUM_PD                         64
+-#define PPC4XX_LAST_GD                                1023
++#define PPC4XX_NUM_PD                         256
++#define PPC4XX_LAST_PD                                (PPC4XX_NUM_PD - 1)
+ #define PPC4XX_NUM_GD                         1024
+-#define PPC4XX_LAST_SD                                63
+-#define PPC4XX_NUM_SD                         64
++#define PPC4XX_LAST_GD                                (PPC4XX_NUM_GD - 1)
++#define PPC4XX_NUM_SD                         256
++#define PPC4XX_LAST_SD                                (PPC4XX_NUM_SD - 1)
+ #define PPC4XX_SD_BUFFER_SIZE                 2048
+ 
+ #define PD_ENTRY_INUSE                                1
diff --git a/queue-4.9/crypto-deadlock-between-crypto_alg_sem-rtnl_mutex-genl_mutex.patch b/queue-4.9/crypto-deadlock-between-crypto_alg_sem-rtnl_mutex-genl_mutex.patch

new file mode 100644 (file)

index 0000000..cadbe97
--- /dev/null
+++ b/queue-4.9/crypto-deadlock-between-crypto_alg_sem-rtnl_mutex-genl_mutex.patch
@@ -0,0 +1,95 @@
+From foo@baz Thu Dec 21 09:02:40 CET 2017
+From: Herbert Xu <herbert@gondor.apana.org.au>
+Date: Tue, 14 Mar 2017 18:25:57 +0800
+Subject: crypto: deadlock between crypto_alg_sem/rtnl_mutex/genl_mutex
+
+From: Herbert Xu <herbert@gondor.apana.org.au>
+
+
+[ Upstream commit 8a0f5ccfb33b0b8b51de65b7b3bf342ba10b4fb6 ]
+
+On Tue, Mar 14, 2017 at 10:44:10AM +0100, Dmitry Vyukov wrote:
+>
+> Yes, please.
+> Disregarding some reports is not a good way long term.
+
+Please try this patch.
+
+---8<---
+Subject: netlink: Annotate nlk cb_mutex by protocol
+
+Currently all occurences of nlk->cb_mutex are annotated by lockdep
+as a single class.  This causes a false lcokdep cycle involving
+genl and crypto_user.
+
+This patch fixes it by dividing cb_mutex into individual classes
+based on the netlink protocol.  As genl and crypto_user do not
+use the same netlink protocol this breaks the false dependency
+loop.
+
+Reported-by: Dmitry Vyukov <dvyukov@google.com>
+Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <alexander.levin@verizon.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/netlink/af_netlink.c |   41 +++++++++++++++++++++++++++++++++++++++++
+ 1 file changed, 41 insertions(+)
+
+--- a/net/netlink/af_netlink.c
++++ b/net/netlink/af_netlink.c
+@@ -96,6 +96,44 @@ EXPORT_SYMBOL_GPL(nl_table);
+ 
+ static DECLARE_WAIT_QUEUE_HEAD(nl_table_wait);
+ 
++static struct lock_class_key nlk_cb_mutex_keys[MAX_LINKS];
++
++static const char *const nlk_cb_mutex_key_strings[MAX_LINKS + 1] = {
++      "nlk_cb_mutex-ROUTE",
++      "nlk_cb_mutex-1",
++      "nlk_cb_mutex-USERSOCK",
++      "nlk_cb_mutex-FIREWALL",
++      "nlk_cb_mutex-SOCK_DIAG",
++      "nlk_cb_mutex-NFLOG",
++      "nlk_cb_mutex-XFRM",
++      "nlk_cb_mutex-SELINUX",
++      "nlk_cb_mutex-ISCSI",
++      "nlk_cb_mutex-AUDIT",
++      "nlk_cb_mutex-FIB_LOOKUP",
++      "nlk_cb_mutex-CONNECTOR",
++      "nlk_cb_mutex-NETFILTER",
++      "nlk_cb_mutex-IP6_FW",
++      "nlk_cb_mutex-DNRTMSG",
++      "nlk_cb_mutex-KOBJECT_UEVENT",
++      "nlk_cb_mutex-GENERIC",
++      "nlk_cb_mutex-17",
++      "nlk_cb_mutex-SCSITRANSPORT",
++      "nlk_cb_mutex-ECRYPTFS",
++      "nlk_cb_mutex-RDMA",
++      "nlk_cb_mutex-CRYPTO",
++      "nlk_cb_mutex-SMC",
++      "nlk_cb_mutex-23",
++      "nlk_cb_mutex-24",
++      "nlk_cb_mutex-25",
++      "nlk_cb_mutex-26",
++      "nlk_cb_mutex-27",
++      "nlk_cb_mutex-28",
++      "nlk_cb_mutex-29",
++      "nlk_cb_mutex-30",
++      "nlk_cb_mutex-31",
++      "nlk_cb_mutex-MAX_LINKS"
++};
++
+ static int netlink_dump(struct sock *sk);
+ static void netlink_skb_destructor(struct sk_buff *skb);
+ 
+@@ -585,6 +623,9 @@ static int __netlink_create(struct net *
+       } else {
+               nlk->cb_mutex = &nlk->cb_def_mutex;
+               mutex_init(nlk->cb_mutex);
++              lockdep_set_class_and_name(nlk->cb_mutex,
++                                         nlk_cb_mutex_keys + protocol,
++                                         nlk_cb_mutex_key_strings[protocol]);
+       }
+       init_waitqueue_head(&nlk->wait);
+ 
diff --git a/queue-4.9/fm10k-ensure-we-process-sm-mbx-when-processing-vf-mbx.patch b/queue-4.9/fm10k-ensure-we-process-sm-mbx-when-processing-vf-mbx.patch

new file mode 100644 (file)

index 0000000..31c6b56
--- /dev/null
+++ b/queue-4.9/fm10k-ensure-we-process-sm-mbx-when-processing-vf-mbx.patch
@@ -0,0 +1,41 @@
+From foo@baz Thu Dec 21 09:02:40 CET 2017
+From: Jacob Keller <jacob.e.keller@intel.com>
+Date: Mon, 2 Oct 2017 07:17:50 -0700
+Subject: fm10k: ensure we process SM mbx when processing VF mbx
+
+From: Jacob Keller <jacob.e.keller@intel.com>
+
+
+[ Upstream commit 17a91809942ca32c70026d2d5ba3348a2c4fdf8f ]
+
+When we process VF mailboxes, the driver is likely going to also queue
+up messages to the switch manager. This process merely queues up the
+FIFO, but doesn't actually begin the transmission process. Because we
+hold the mailbox lock during this VF processing, the PF<->SM mailbox is
+not getting processed at this time. Ensure that we actually process the
+PF<->SM mailbox in between each PF<->VF mailbox.
+
+This should ensure prompt transmission of the messages queued up after
+each VF message is received and handled.
+
+Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
+Tested-by: Krishneil Singh <krishneil.k.singh@intel.com>
+Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
+Signed-off-by: Sasha Levin <alexander.levin@verizon.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/intel/fm10k/fm10k_iov.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/drivers/net/ethernet/intel/fm10k/fm10k_iov.c
++++ b/drivers/net/ethernet/intel/fm10k/fm10k_iov.c
+@@ -126,6 +126,9 @@ process_mbx:
+               struct fm10k_mbx_info *mbx = &vf_info->mbx;
+               u16 glort = vf_info->glort;
+ 
++              /* process the SM mailbox first to drain outgoing messages */
++              hw->mbx.ops.process(hw, &hw->mbx);
++
+               /* verify port mapping is valid, if not reset port */
+               if (vf_info->vf_flags && !fm10k_glort_valid_pf(hw, glort))
+                       hw->iov.ops.reset_lport(hw, vf_info);
diff --git a/queue-4.9/fm10k-fix-mis-ordered-parameters-in-declaration-for-.ndo_set_vf_bw.patch b/queue-4.9/fm10k-fix-mis-ordered-parameters-in-declaration-for-.ndo_set_vf_bw.patch

new file mode 100644 (file)

index 0000000..48b666e
--- /dev/null
+++ b/queue-4.9/fm10k-fix-mis-ordered-parameters-in-declaration-for-.ndo_set_vf_bw.patch
@@ -0,0 +1,76 @@
+From foo@baz Thu Dec 21 09:02:40 CET 2017
+From: Jacob Keller <jacob.e.keller@intel.com>
+Date: Fri, 11 Aug 2017 11:14:58 -0700
+Subject: fm10k: fix mis-ordered parameters in declaration for .ndo_set_vf_bw
+
+From: Jacob Keller <jacob.e.keller@intel.com>
+
+
+[ Upstream commit 3e256ac5b1ec307e5dd5a4c99fbdbc651446c738 ]
+
+We've had support for setting both a minimum and maximum bandwidth via
+.ndo_set_vf_bw since commit 883a9ccbae56 ("fm10k: Add support for SR-IOV
+to driver", 2014-09-20).
+
+Likely because we do not support minimum rates, the declaration
+mis-ordered the "unused" parameter, which causes warnings when analyzed
+with cppcheck.
+
+Fix this warning by properly declaring the min_rate and max_rate
+variables in the declaration and definition (rather than using
+"unused"). Also rename "rate" to max_rate so as to clarify that we only
+support setting the maximum rate.
+
+Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
+Tested-by: Krishneil Singh <krishneil.k.singh@intel.com>
+Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
+Signed-off-by: Sasha Levin <alexander.levin@verizon.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/intel/fm10k/fm10k.h     |    4 ++--
+ drivers/net/ethernet/intel/fm10k/fm10k_iov.c |    9 +++++----
+ 2 files changed, 7 insertions(+), 6 deletions(-)
+
+--- a/drivers/net/ethernet/intel/fm10k/fm10k.h
++++ b/drivers/net/ethernet/intel/fm10k/fm10k.h
+@@ -508,8 +508,8 @@ s32 fm10k_iov_update_pvid(struct fm10k_i
+ int fm10k_ndo_set_vf_mac(struct net_device *netdev, int vf_idx, u8 *mac);
+ int fm10k_ndo_set_vf_vlan(struct net_device *netdev,
+                         int vf_idx, u16 vid, u8 qos, __be16 vlan_proto);
+-int fm10k_ndo_set_vf_bw(struct net_device *netdev, int vf_idx, int rate,
+-                      int unused);
++int fm10k_ndo_set_vf_bw(struct net_device *netdev, int vf_idx,
++                      int __always_unused min_rate, int max_rate);
+ int fm10k_ndo_get_vf_config(struct net_device *netdev,
+                           int vf_idx, struct ifla_vf_info *ivi);
+ 
+--- a/drivers/net/ethernet/intel/fm10k/fm10k_iov.c
++++ b/drivers/net/ethernet/intel/fm10k/fm10k_iov.c
+@@ -482,7 +482,7 @@ int fm10k_ndo_set_vf_vlan(struct net_dev
+ }
+ 
+ int fm10k_ndo_set_vf_bw(struct net_device *netdev, int vf_idx,
+-                      int __always_unused unused, int rate)
++                      int __always_unused min_rate, int max_rate)
+ {
+       struct fm10k_intfc *interface = netdev_priv(netdev);
+       struct fm10k_iov_data *iov_data = interface->iov_data;
+@@ -493,14 +493,15 @@ int fm10k_ndo_set_vf_bw(struct net_devic
+               return -EINVAL;
+ 
+       /* rate limit cannot be less than 10Mbs or greater than link speed */
+-      if (rate && ((rate < FM10K_VF_TC_MIN) || rate > FM10K_VF_TC_MAX))
++      if (max_rate &&
++          (max_rate < FM10K_VF_TC_MIN || max_rate > FM10K_VF_TC_MAX))
+               return -EINVAL;
+ 
+       /* store values */
+-      iov_data->vf_info[vf_idx].rate = rate;
++      iov_data->vf_info[vf_idx].rate = max_rate;
+ 
+       /* update hardware configuration */
+-      hw->iov.ops.configure_tc(hw, vf_idx, rate);
++      hw->iov.ops.configure_tc(hw, vf_idx, max_rate);
+ 
+       return 0;
+ }
diff --git a/queue-4.9/hid-corsair-add-driver-scimitar-pro-rgb-gaming-mouse-1b1c-1b3e-support-to-hid-corsair.patch b/queue-4.9/hid-corsair-add-driver-scimitar-pro-rgb-gaming-mouse-1b1c-1b3e-support-to-hid-corsair.patch

new file mode 100644 (file)

index 0000000..24b0647
--- /dev/null
+++ b/queue-4.9/hid-corsair-add-driver-scimitar-pro-rgb-gaming-mouse-1b1c-1b3e-support-to-hid-corsair.patch
@@ -0,0 +1,126 @@
+From foo@baz Thu Dec 21 09:02:40 CET 2017
+From: Oscar Campos <oscar.campos@member.fsf.org>
+Date: Mon, 6 Mar 2017 21:02:39 +0000
+Subject: HID: corsair: Add driver Scimitar Pro RGB gaming mouse 1b1c:1b3e support to hid-corsair
+
+From: Oscar Campos <oscar.campos@member.fsf.org>
+
+
+[ Upstream commit 01adc47e885f1127b29d76d0dfb21d8262f9d6b4 ]
+
+This mouse sold by Corsair as Scimitar PRO RGB defines two consecutive
+Logical Minimum items in its Application (Consumer.0001) report making
+it non parseable. This patch fixes the report descriptor overriding
+byte 77 in rdesc from 0x16 (Logical Minimum with 16 bits value) to 0x26
+(Logical Maximum with 16 bits value).
+
+Signed-off-by: Oscar Campos <oscar.campos@member.fsf.org>
+Signed-off-by: Jiri Kosina <jkosina@suse.cz>
+Signed-off-by: Sasha Levin <alexander.levin@verizon.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/hid/Kconfig       |    1 
+ drivers/hid/hid-core.c    |    1 
+ drivers/hid/hid-corsair.c |   47 ++++++++++++++++++++++++++++++++++++++++++++++
+ 3 files changed, 49 insertions(+)
+
+--- a/drivers/hid/Kconfig
++++ b/drivers/hid/Kconfig
+@@ -190,6 +190,7 @@ config HID_CORSAIR
+ 
+       Supported devices:
+       - Vengeance K90
++      - Scimitar PRO RGB
+ 
+ config HID_PRODIKEYS
+       tristate "Prodikeys PC-MIDI Keyboard support"
+--- a/drivers/hid/hid-core.c
++++ b/drivers/hid/hid-core.c
+@@ -1872,6 +1872,7 @@ static const struct hid_device_id hid_ha
+       { HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_AK1D) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_ACER_SWITCH12) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K90) },
++      { HID_USB_DEVICE(USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_SCIMITAR_PRO_RGB) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_CREATIVELABS, USB_DEVICE_ID_PRODIKEYS_PCMIDI) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_CYGNAL, USB_DEVICE_ID_CYGNAL_CP2112) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_CYPRESS, USB_DEVICE_ID_CYPRESS_BARCODE_1) },
+--- a/drivers/hid/hid-corsair.c
++++ b/drivers/hid/hid-corsair.c
+@@ -3,8 +3,10 @@
+  *
+  * Supported devices:
+  *  - Vengeance K90 Keyboard
++ *  - Scimitar PRO RGB Gaming Mouse
+  *
+  * Copyright (c) 2015 Clement Vuchener
++ * Copyright (c) 2017 Oscar Campos
+  */
+ 
+ /*
+@@ -670,10 +672,51 @@ static int corsair_input_mapping(struct
+       return 0;
+ }
+ 
++/*
++ * The report descriptor of Corsair Scimitar RGB Pro gaming mouse is
++ * non parseable as they define two consecutive Logical Minimum for
++ * the Usage Page (Consumer) in rdescs bytes 75 and 77 being 77 0x16
++ * that should be obviousy 0x26 for Logical Magimum of 16 bits. This
++ * prevents poper parsing of the report descriptor due Logical
++ * Minimum being larger than Logical Maximum.
++ *
++ * This driver fixes the report descriptor for:
++ * - USB ID b1c:1b3e, sold as Scimitar RGB Pro Gaming mouse
++ */
++
++static __u8 *corsair_mouse_report_fixup(struct hid_device *hdev, __u8 *rdesc,
++        unsigned int *rsize)
++{
++      struct usb_interface *intf = to_usb_interface(hdev->dev.parent);
++
++      if (intf->cur_altsetting->desc.bInterfaceNumber == 1) {
++              /*
++               * Corsair Scimitar RGB Pro report descriptor is broken and
++               * defines two different Logical Minimum for the Consumer
++               * Application. The byte 77 should be a 0x26 defining a 16
++               * bits integer for the Logical Maximum but it is a 0x16
++               * instead (Logical Minimum)
++               */
++              switch (hdev->product) {
++              case USB_DEVICE_ID_CORSAIR_SCIMITAR_PRO_RGB:
++                      if (*rsize >= 172 && rdesc[75] == 0x15 && rdesc[77] == 0x16
++                      && rdesc[78] == 0xff && rdesc[79] == 0x0f) {
++                              hid_info(hdev, "Fixing up report descriptor\n");
++                              rdesc[77] = 0x26;
++                      }
++                      break;
++              }
++
++      }
++      return rdesc;
++}
++
+ static const struct hid_device_id corsair_devices[] = {
+       { HID_USB_DEVICE(USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K90),
+               .driver_data = CORSAIR_USE_K90_MACRO |
+                              CORSAIR_USE_K90_BACKLIGHT },
++      { HID_USB_DEVICE(USB_VENDOR_ID_CORSAIR,
++            USB_DEVICE_ID_CORSAIR_SCIMITAR_PRO_RGB) },
+       {}
+ };
+ 
+@@ -686,10 +729,14 @@ static struct hid_driver corsair_driver
+       .event = corsair_event,
+       .remove = corsair_remove,
+       .input_mapping = corsair_input_mapping,
++      .report_fixup = corsair_mouse_report_fixup,
+ };
+ 
+ module_hid_driver(corsair_driver);
+ 
+ MODULE_LICENSE("GPL");
++/* Original K90 driver author */
+ MODULE_AUTHOR("Clement Vuchener");
++/* Scimitar PRO RGB driver author */
++MODULE_AUTHOR("Oscar Campos");
+ MODULE_DESCRIPTION("HID driver for Corsair devices");
diff --git a/queue-4.9/hid-corsair-support-for-k65-k70-rapidfire-and-scimitar-pro-rgb.patch b/queue-4.9/hid-corsair-support-for-k65-k70-rapidfire-and-scimitar-pro-rgb.patch

new file mode 100644 (file)

index 0000000..acaa5a7
--- /dev/null
+++ b/queue-4.9/hid-corsair-support-for-k65-k70-rapidfire-and-scimitar-pro-rgb.patch
@@ -0,0 +1,52 @@
+From foo@baz Thu Dec 21 09:02:40 CET 2017
+From: Oscar Campos <oscar.campos@member.fsf.org>
+Date: Fri, 10 Feb 2017 18:23:00 +0000
+Subject: HID: corsair: support for K65-K70 Rapidfire and Scimitar Pro RGB
+
+From: Oscar Campos <oscar.campos@member.fsf.org>
+
+
+[ Upstream commit deaba636997557fce46ca7bcb509bff5ea1b0558 ]
+
+Add quirks for several corsair gaming devices to avoid long delays on
+report initialization
+
+Supported devices:
+
+ - Corsair K65RGB Rapidfire Gaming Keyboard
+ - Corsair K70RGB Rapidfire Gaming Keyboard
+ - Corsair Scimitar Pro RGB Gaming Mouse
+
+Signed-off-by: Oscar Campos <oscar.campos@member.fsf.org>
+Signed-off-by: Jiri Kosina <jkosina@suse.cz>
+Signed-off-by: Sasha Levin <alexander.levin@verizon.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/hid/hid-ids.h           |    3 +++
+ drivers/hid/usbhid/hid-quirks.c |    3 +++
+ 2 files changed, 6 insertions(+)
+
+--- a/drivers/hid/hid-ids.h
++++ b/drivers/hid/hid-ids.h
+@@ -277,6 +277,9 @@
+ #define USB_DEVICE_ID_CORSAIR_K70RGB    0x1b13
+ #define USB_DEVICE_ID_CORSAIR_STRAFE    0x1b15
+ #define USB_DEVICE_ID_CORSAIR_K65RGB    0x1b17
++#define USB_DEVICE_ID_CORSAIR_K70RGB_RAPIDFIRE  0x1b38
++#define USB_DEVICE_ID_CORSAIR_K65RGB_RAPIDFIRE  0x1b39
++#define USB_DEVICE_ID_CORSAIR_SCIMITAR_PRO_RGB  0x1b3e
+ 
+ #define USB_VENDOR_ID_CREATIVELABS    0x041e
+ #define USB_DEVICE_ID_CREATIVE_SB_OMNI_SURROUND_51    0x322c
+--- a/drivers/hid/usbhid/hid-quirks.c
++++ b/drivers/hid/usbhid/hid-quirks.c
+@@ -80,6 +80,9 @@ static const struct hid_blacklist {
+       { USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K70RGB, HID_QUIRK_NO_INIT_REPORTS },
+       { USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K65RGB, HID_QUIRK_NO_INIT_REPORTS },
+       { USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_STRAFE, HID_QUIRK_NO_INIT_REPORTS | HID_QUIRK_ALWAYS_POLL },
++      { USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K70RGB_RAPIDFIRE, HID_QUIRK_NO_INIT_REPORTS | HID_QUIRK_ALWAYS_POLL },
++      { USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K65RGB_RAPIDFIRE, HID_QUIRK_NO_INIT_REPORTS | HID_QUIRK_ALWAYS_POLL },
++      { USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_SCIMITAR_PRO_RGB, HID_QUIRK_NO_INIT_REPORTS | HID_QUIRK_ALWAYS_POLL },
+       { USB_VENDOR_ID_CREATIVELABS, USB_DEVICE_ID_CREATIVE_SB_OMNI_SURROUND_51, HID_QUIRK_NOGET },
+       { USB_VENDOR_ID_DELL, USB_DEVICE_ID_DELL_PIXART_USB_OPTICAL_MOUSE, HID_QUIRK_ALWAYS_POLL },
+       { USB_VENDOR_ID_DMI, USB_DEVICE_ID_DMI_ENC, HID_QUIRK_NOGET },
diff --git a/queue-4.9/hid-xinmo-fix-for-out-of-range-for-tht-2p-arcade-controller.patch b/queue-4.9/hid-xinmo-fix-for-out-of-range-for-tht-2p-arcade-controller.patch

new file mode 100644 (file)

index 0000000..4836348
--- /dev/null
+++ b/queue-4.9/hid-xinmo-fix-for-out-of-range-for-tht-2p-arcade-controller.patch
@@ -0,0 +1,56 @@
+From foo@baz Thu Dec 21 09:02:40 CET 2017
+From: Peter Stein <peter@stuntstein.dk>
+Date: Fri, 17 Feb 2017 00:00:50 -0800
+Subject: HID: xinmo: fix for out of range for THT 2P arcade controller.
+
+From: Peter Stein <peter@stuntstein.dk>
+
+
+[ Upstream commit 9257821c5a1dc57ef3a37f7cbcebaf548395c964 ]
+
+There is a new clone of the XIN MO arcade controller which has same issue with
+out of range like the original.  This fix will solve the issue where 2
+directions on the joystick are not recognized by the new THT 2P arcade
+controller with device ID 0x75e1.  In details the new device ID is added the
+hid-id list and the hid-xinmo source code.
+
+Signed-off-by: Peter Stein <peter@stuntstein.dk>
+Signed-off-by: Jiri Kosina <jkosina@suse.cz>
+Signed-off-by: Sasha Levin <alexander.levin@verizon.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/hid/hid-core.c  |    1 +
+ drivers/hid/hid-ids.h   |    1 +
+ drivers/hid/hid-xinmo.c |    1 +
+ 3 files changed, 3 insertions(+)
+
+--- a/drivers/hid/hid-core.c
++++ b/drivers/hid/hid-core.c
+@@ -2107,6 +2107,7 @@ static const struct hid_device_id hid_ha
+       { HID_USB_DEVICE(USB_VENDOR_ID_WALTOP, USB_DEVICE_ID_WALTOP_SIRIUS_BATTERY_FREE_TABLET) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_X_TENSIONS, USB_DEVICE_ID_SPEEDLINK_VAD_CEZANNE) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_XIN_MO, USB_DEVICE_ID_XIN_MO_DUAL_ARCADE) },
++      { HID_USB_DEVICE(USB_VENDOR_ID_XIN_MO, USB_DEVICE_ID_THT_2P_ARCADE) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_ZEROPLUS, 0x0005) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_ZEROPLUS, 0x0030) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_ZYDACRON, USB_DEVICE_ID_ZYDACRON_REMOTE_CONTROL) },
+--- a/drivers/hid/hid-ids.h
++++ b/drivers/hid/hid-ids.h
+@@ -1080,6 +1080,7 @@
+ 
+ #define USB_VENDOR_ID_XIN_MO                  0x16c0
+ #define USB_DEVICE_ID_XIN_MO_DUAL_ARCADE      0x05e1
++#define USB_DEVICE_ID_THT_2P_ARCADE           0x75e1
+ 
+ #define USB_VENDOR_ID_XIROKU          0x1477
+ #define USB_DEVICE_ID_XIROKU_SPX      0x1006
+--- a/drivers/hid/hid-xinmo.c
++++ b/drivers/hid/hid-xinmo.c
+@@ -46,6 +46,7 @@ static int xinmo_event(struct hid_device
+ 
+ static const struct hid_device_id xinmo_devices[] = {
+       { HID_USB_DEVICE(USB_VENDOR_ID_XIN_MO, USB_DEVICE_ID_XIN_MO_DUAL_ARCADE) },
++      { HID_USB_DEVICE(USB_VENDOR_ID_XIN_MO, USB_DEVICE_ID_THT_2P_ARCADE) },
+       { }
+ };
+ 
diff --git a/queue-4.9/hwmon-asus_atk0110-fix-uninitialized-data-access.patch b/queue-4.9/hwmon-asus_atk0110-fix-uninitialized-data-access.patch

new file mode 100644 (file)

index 0000000..db3afb7
--- /dev/null
+++ b/queue-4.9/hwmon-asus_atk0110-fix-uninitialized-data-access.patch
@@ -0,0 +1,42 @@
+From foo@baz Thu Dec 21 09:02:40 CET 2017
+From: Arnd Bergmann <arnd@arndb.de>
+Date: Thu, 23 Mar 2017 16:03:11 +0100
+Subject: hwmon: (asus_atk0110) fix uninitialized data access
+
+From: Arnd Bergmann <arnd@arndb.de>
+
+
+[ Upstream commit a2125d02443e9a4e68bcfd9f8004fa23239e8329 ]
+
+The latest gcc-7 snapshot adds a warning to point out that when
+atk_read_value_old or atk_read_value_new fails, we copy
+uninitialized data into sensor->cached_value:
+
+drivers/hwmon/asus_atk0110.c: In function 'atk_input_show':
+drivers/hwmon/asus_atk0110.c:651:26: error: 'value' may be used uninitialized in this function [-Werror=maybe-uninitialized]
+
+Adding an error check avoids this. All versions of the driver
+are affected.
+
+Fixes: 2c03d07ad54d ("hwmon: Add Asus ATK0110 support")
+Signed-off-by: Arnd Bergmann <arnd@arndb.de>
+Reviewed-by: Luca Tettamanti <kronos.it@gmail.com>
+Signed-off-by: Guenter Roeck <linux@roeck-us.net>
+Signed-off-by: Sasha Levin <alexander.levin@verizon.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/hwmon/asus_atk0110.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/drivers/hwmon/asus_atk0110.c
++++ b/drivers/hwmon/asus_atk0110.c
+@@ -646,6 +646,9 @@ static int atk_read_value(struct atk_sen
+               else
+                       err = atk_read_value_new(sensor, value);
+ 
++              if (err)
++                      return err;
++
+               sensor->is_valid = true;
+               sensor->last_updated = jiffies;
+               sensor->cached_value = *value;
diff --git a/queue-4.9/hwmon-max31790-set-correct-pwm-value.patch b/queue-4.9/hwmon-max31790-set-correct-pwm-value.patch

new file mode 100644 (file)

index 0000000..0643fe0
--- /dev/null
+++ b/queue-4.9/hwmon-max31790-set-correct-pwm-value.patch
@@ -0,0 +1,33 @@
+From foo@baz Thu Dec 21 09:02:40 CET 2017
+From: Alex Hemme <ahemme@cisco.com>
+Date: Tue, 7 Mar 2017 14:38:29 -0500
+Subject: hwmon: (max31790) Set correct PWM value
+
+From: Alex Hemme <ahemme@cisco.com>
+
+
+[ Upstream commit dd7406dd334a98ada3ff5371847a3eeb4ba16313 ]
+
+Traced fans not spinning to incorrect PWM value being written.
+The passed in value was written instead of the calulated value.
+
+Fixes: 54187ff9d766 ("hwmon: (max31790) Convert to use new hwmon registration API")
+Signed-off-by: Alex Hemme <ahemme@cisco.com>
+Signed-off-by: Guenter Roeck <linux@roeck-us.net>
+Signed-off-by: Sasha Levin <alexander.levin@verizon.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/hwmon/max31790.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/hwmon/max31790.c
++++ b/drivers/hwmon/max31790.c
+@@ -311,7 +311,7 @@ static int max31790_write_pwm(struct dev
+               data->pwm[channel] = val << 8;
+               err = i2c_smbus_write_word_swapped(client,
+                                                  MAX31790_REG_PWMOUT(channel),
+-                                                 val);
++                                                 data->pwm[channel]);
+               break;
+       case hwmon_pwm_enable:
+               fan_config = data->fan_config[channel];
diff --git a/queue-4.9/i2c-mux-pca954x-add-missing-pca9546-definition-to-chip_desc.patch b/queue-4.9/i2c-mux-pca954x-add-missing-pca9546-definition-to-chip_desc.patch

new file mode 100644 (file)

index 0000000..de41402
--- /dev/null
+++ b/queue-4.9/i2c-mux-pca954x-add-missing-pca9546-definition-to-chip_desc.patch
@@ -0,0 +1,44 @@
+From foo@baz Thu Dec 21 09:02:40 CET 2017
+From: Mike Looijmans <mike.looijmans@topic.nl>
+Date: Thu, 23 Mar 2017 10:00:36 +0100
+Subject: i2c: mux: pca954x: Add missing pca9546 definition to chip_desc
+
+From: Mike Looijmans <mike.looijmans@topic.nl>
+
+
+[ Upstream commit dbe4d69d252e9e65c6c46826980b77b11a142065 ]
+
+The spec for the pca9546 was missing. This chip is the same as the pca9545
+except that it lacks interrupt lines. While the i2c_device_id table mapped
+the pca9546 to the pca9545 definition the compatible table did not.
+
+Signed-off-by: Mike Looijmans <mike.looijmans@topic.nl>
+Signed-off-by: Peter Rosin <peda@axentia.se>
+Signed-off-by: Sasha Levin <alexander.levin@verizon.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/i2c/muxes/i2c-mux-pca954x.c |    6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+--- a/drivers/i2c/muxes/i2c-mux-pca954x.c
++++ b/drivers/i2c/muxes/i2c-mux-pca954x.c
+@@ -96,6 +96,10 @@ static const struct chip_desc chips[] =
+               .nchans = 4,
+               .muxtype = pca954x_isswi,
+       },
++      [pca_9546] = {
++              .nchans = 4,
++              .muxtype = pca954x_isswi,
++      },
+       [pca_9547] = {
+               .nchans = 8,
+               .enable = 0x8,
+@@ -113,7 +117,7 @@ static const struct i2c_device_id pca954
+       { "pca9543", pca_9543 },
+       { "pca9544", pca_9544 },
+       { "pca9545", pca_9545 },
+-      { "pca9546", pca_9545 },
++      { "pca9546", pca_9546 },
+       { "pca9547", pca_9547 },
+       { "pca9548", pca_9548 },
+       { }
diff --git a/queue-4.9/i40e-do-not-enable-napi-on-q_vectors-that-have-no-rings.patch b/queue-4.9/i40e-do-not-enable-napi-on-q_vectors-that-have-no-rings.patch

new file mode 100644 (file)

index 0000000..e116503
--- /dev/null
+++ b/queue-4.9/i40e-do-not-enable-napi-on-q_vectors-that-have-no-rings.patch
@@ -0,0 +1,58 @@
+From foo@baz Thu Dec 21 09:02:40 CET 2017
+From: Alexander Duyck <alexander.h.duyck@intel.com>
+Date: Fri, 24 Mar 2017 15:01:42 -0700
+Subject: i40e: Do not enable NAPI on q_vectors that have no rings
+
+From: Alexander Duyck <alexander.h.duyck@intel.com>
+
+
+[ Upstream commit 13a8cd191a2b470cfd435b3b57dbd21aa65ff78c ]
+
+When testing the epoll w/ busy poll code I found that I could get into a
+state where the i40e driver had q_vectors w/ active NAPI that had no rings.
+This was resulting in a divide by zero error.  To correct it I am updating
+the driver code so that we only support NAPI on q_vectors that have 1 or
+more rings allocated to them.
+
+Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
+Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
+Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <alexander.levin@verizon.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/intel/i40e/i40e_main.c |   16 ++++++++++++----
+ 1 file changed, 12 insertions(+), 4 deletions(-)
+
+--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
++++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
+@@ -4217,8 +4217,12 @@ static void i40e_napi_enable_all(struct
+       if (!vsi->netdev)
+               return;
+ 
+-      for (q_idx = 0; q_idx < vsi->num_q_vectors; q_idx++)
+-              napi_enable(&vsi->q_vectors[q_idx]->napi);
++      for (q_idx = 0; q_idx < vsi->num_q_vectors; q_idx++) {
++              struct i40e_q_vector *q_vector = vsi->q_vectors[q_idx];
++
++              if (q_vector->rx.ring || q_vector->tx.ring)
++                      napi_enable(&q_vector->napi);
++      }
+ }
+ 
+ /**
+@@ -4232,8 +4236,12 @@ static void i40e_napi_disable_all(struct
+       if (!vsi->netdev)
+               return;
+ 
+-      for (q_idx = 0; q_idx < vsi->num_q_vectors; q_idx++)
+-              napi_disable(&vsi->q_vectors[q_idx]->napi);
++      for (q_idx = 0; q_idx < vsi->num_q_vectors; q_idx++) {
++              struct i40e_q_vector *q_vector = vsi->q_vectors[q_idx];
++
++              if (q_vector->rx.ring || q_vector->tx.ring)
++                      napi_disable(&q_vector->napi);
++      }
+ }
+ 
+ /**
diff --git a/queue-4.9/i40iw-receive-netdev-events-post-inet_notifier-state.patch b/queue-4.9/i40iw-receive-netdev-events-post-inet_notifier-state.patch

new file mode 100644 (file)

index 0000000..5d26747
--- /dev/null
+++ b/queue-4.9/i40iw-receive-netdev-events-post-inet_notifier-state.patch
@@ -0,0 +1,59 @@
+From foo@baz Thu Dec 21 09:02:40 CET 2017
+From: Shiraz Saleem <shiraz.saleem@intel.com>
+Date: Fri, 17 Mar 2017 18:30:07 -0500
+Subject: i40iw: Receive netdev events post INET_NOTIFIER state
+
+From: Shiraz Saleem <shiraz.saleem@intel.com>
+
+
+[ Upstream commit 871a8623d3b40221ad1103aff715dfee0aa4dacf ]
+
+Netdev notification events are de-registered only when all
+client iwdev instances are removed. If a single client is closed
+and re-opened, netdev events could arrive even before the Control
+Queue-Pair (CQP) is created, causing a NULL pointer dereference crash
+in i40iw_get_cqp_request. Fix this by allowing netdev event
+notification only after we have reached the INET_NOTIFIER state with
+respect to device initialization.
+
+Reported-by: Stefan Assmann <sassmann@redhat.com>
+Signed-off-by: Shiraz Saleem <shiraz.saleem@intel.com>
+Reviewed-by: Yuval Shaia <yuval.shaia@oracle.com>
+Signed-off-by: Doug Ledford <dledford@redhat.com>
+Signed-off-by: Sasha Levin <alexander.levin@verizon.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/infiniband/hw/i40iw/i40iw_utils.c |    8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+--- a/drivers/infiniband/hw/i40iw/i40iw_utils.c
++++ b/drivers/infiniband/hw/i40iw/i40iw_utils.c
+@@ -159,6 +159,9 @@ int i40iw_inetaddr_event(struct notifier
+               return NOTIFY_DONE;
+ 
+       iwdev = &hdl->device;
++      if (iwdev->init_state < INET_NOTIFIER)
++              return NOTIFY_DONE;
++
+       netdev = iwdev->ldev->netdev;
+       upper_dev = netdev_master_upper_dev_get(netdev);
+       if (netdev != event_netdev)
+@@ -231,6 +234,9 @@ int i40iw_inet6addr_event(struct notifie
+               return NOTIFY_DONE;
+ 
+       iwdev = &hdl->device;
++      if (iwdev->init_state < INET_NOTIFIER)
++              return NOTIFY_DONE;
++
+       netdev = iwdev->ldev->netdev;
+       if (netdev != event_netdev)
+               return NOTIFY_DONE;
+@@ -280,6 +286,8 @@ int i40iw_net_event(struct notifier_bloc
+               if (!iwhdl)
+                       return NOTIFY_DONE;
+               iwdev = &iwhdl->device;
++              if (iwdev->init_state < INET_NOTIFIER)
++                      return NOTIFY_DONE;
+               p = (__be32 *)neigh->primary_key;
+               i40iw_copy_ip_ntohl(local_ipaddr, p);
+               if (neigh->nud_state & NUD_VALID) {
diff --git a/queue-4.9/ib-core-protect-against-self-requeue-of-a-cq-work-item.patch b/queue-4.9/ib-core-protect-against-self-requeue-of-a-cq-work-item.patch

new file mode 100644 (file)

index 0000000..925863b
--- /dev/null
+++ b/queue-4.9/ib-core-protect-against-self-requeue-of-a-cq-work-item.patch
@@ -0,0 +1,37 @@
+From foo@baz Thu Dec 21 09:02:40 CET 2017
+From: Sagi Grimberg <sagi@grimberg.me>
+Date: Wed, 8 Mar 2017 22:00:52 +0200
+Subject: IB/core: Protect against self-requeue of a cq work item
+
+From: Sagi Grimberg <sagi@grimberg.me>
+
+
+[ Upstream commit 86f46aba8d1ac3ed0904542158a9b9cb9c7a143c ]
+
+We need to make sure that the cq work item does not
+run when we are destroying the cq. Unlike flush_work,
+cancel_work_sync protects against self-requeue of the
+work item (which we can do in ib_cq_poll_work).
+
+Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Bart Van Assche <bart.vanassche@sandisk.com>--
+Reviewed-by: Leon Romanovsky <leonro@mellanox.com>
+Signed-off-by: Doug Ledford <dledford@redhat.com>
+Signed-off-by: Sasha Levin <alexander.levin@verizon.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/infiniband/core/cq.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/infiniband/core/cq.c
++++ b/drivers/infiniband/core/cq.c
+@@ -196,7 +196,7 @@ void ib_free_cq(struct ib_cq *cq)
+               irq_poll_disable(&cq->iop);
+               break;
+       case IB_POLL_WORKQUEUE:
+-              flush_work(&cq->work);
++              cancel_work_sync(&cq->work);
+               break;
+       default:
+               WARN_ON_ONCE(1);
diff --git a/queue-4.9/ib-rxe-check-for-allocation-failure-on-elem.patch b/queue-4.9/ib-rxe-check-for-allocation-failure-on-elem.patch

new file mode 100644 (file)

index 0000000..8c9ef8d
--- /dev/null
+++ b/queue-4.9/ib-rxe-check-for-allocation-failure-on-elem.patch
@@ -0,0 +1,36 @@
+From foo@baz Thu Dec 21 09:02:40 CET 2017
+From: Colin Ian King <colin.king@canonical.com>
+Date: Fri, 8 Sep 2017 15:37:45 +0100
+Subject: IB/rxe: check for allocation failure on elem
+
+From: Colin Ian King <colin.king@canonical.com>
+
+
+[ Upstream commit 4831ca9e4a8e48cb27e0a792f73250390827a228 ]
+
+The allocation for elem may fail (especially because we're using
+GFP_ATOMIC) so best to check for a null return.  This fixes a potential
+null pointer dereference when assigning elem->pool.
+
+Detected by CoverityScan CID#1357507 ("Dereference null return value")
+
+Fixes: 8700e3e7c485 ("Soft RoCE driver")
+Signed-off-by: Colin Ian King <colin.king@canonical.com>
+Signed-off-by: Doug Ledford <dledford@redhat.com>
+Signed-off-by: Sasha Levin <alexander.levin@verizon.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/infiniband/sw/rxe/rxe_pool.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/drivers/infiniband/sw/rxe/rxe_pool.c
++++ b/drivers/infiniband/sw/rxe/rxe_pool.c
+@@ -412,6 +412,8 @@ void *rxe_alloc(struct rxe_pool *pool)
+       elem = kmem_cache_zalloc(pool_cache(pool),
+                                (pool->flags & RXE_POOL_ATOMIC) ?
+                                GFP_ATOMIC : GFP_KERNEL);
++      if (!elem)
++              return NULL;
+ 
+       elem->pool = pool;
+       kref_init(&elem->ref_cnt);
diff --git a/queue-4.9/ib-rxe-double-free-on-error.patch b/queue-4.9/ib-rxe-double-free-on-error.patch

new file mode 100644 (file)

index 0000000..a7ce135
--- /dev/null
+++ b/queue-4.9/ib-rxe-double-free-on-error.patch
@@ -0,0 +1,37 @@
+From foo@baz Thu Dec 21 09:02:40 CET 2017
+From: Dan Carpenter <dan.carpenter@oracle.com>
+Date: Wed, 8 Mar 2017 08:21:52 +0300
+Subject: IB/rxe: double free on error
+
+From: Dan Carpenter <dan.carpenter@oracle.com>
+
+
+[ Upstream commit ded260235308f340b979258a4c736e06ba12c747 ]
+
+"goto err;" has it's own kfree_skb() call so it's a double free.  We
+only need to free on the "goto exit;" path.
+
+Fixes: 8700e3e7c485 ("Soft RoCE driver")
+Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
+Signed-off-by: Doug Ledford <dledford@redhat.com>
+Signed-off-by: Sasha Levin <alexander.levin@verizon.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/infiniband/sw/rxe/rxe_req.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/infiniband/sw/rxe/rxe_req.c
++++ b/drivers/infiniband/sw/rxe/rxe_req.c
+@@ -726,11 +726,11 @@ next_wqe:
+       ret = rxe_xmit_packet(to_rdev(qp->ibqp.device), qp, &pkt, skb);
+       if (ret) {
+               qp->need_req_skb = 1;
+-              kfree_skb(skb);
+ 
+               rollback_state(wqe, qp, &rollback_wqe, rollback_psn);
+ 
+               if (ret == -EAGAIN) {
++                      kfree_skb(skb);
+                       rxe_run_task(&qp->req.task, 1);
+                       goto exit;
+               }
diff --git a/queue-4.9/ib-rxe-increment-msn-only-when-completing-a-request.patch b/queue-4.9/ib-rxe-increment-msn-only-when-completing-a-request.patch

new file mode 100644 (file)

index 0000000..017d6b3
--- /dev/null
+++ b/queue-4.9/ib-rxe-increment-msn-only-when-completing-a-request.patch
@@ -0,0 +1,56 @@
+From foo@baz Thu Dec 21 09:02:40 CET 2017
+From: David Marchand <david.marchand@6wind.com>
+Date: Fri, 24 Feb 2017 15:38:26 +0100
+Subject: IB/rxe: increment msn only when completing a request
+
+From: David Marchand <david.marchand@6wind.com>
+
+
+[ Upstream commit 9fcd67d1772c43d2f23e8fca56acc7219e991676 ]
+
+According to C9-147, MSN should only be incremented when the last packet of
+a multi packet request has been received.
+
+"Logically, the requester associates a sequential Send Sequence Number
+(SSN) with each WQE posted to the send queue. The SSN bears a one-
+to-one relationship to the MSN returned by the responder in each re-
+sponse packet. Therefore, when the requester receives a response, it in-
+terprets the MSN as representing the SSN of the most recent request
+completed by the responder to determine which send WQE(s) can be
+completed."
+
+Fixes: 8700e3e7c485 ("Soft RoCE driver")
+
+Signed-off-by: David Marchand <david.marchand@6wind.com>
+Signed-off-by: Doug Ledford <dledford@redhat.com>
+Signed-off-by: Sasha Levin <alexander.levin@verizon.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/infiniband/sw/rxe/rxe_resp.c |    9 ++++-----
+ 1 file changed, 4 insertions(+), 5 deletions(-)
+
+--- a/drivers/infiniband/sw/rxe/rxe_resp.c
++++ b/drivers/infiniband/sw/rxe/rxe_resp.c
+@@ -799,18 +799,17 @@ static enum resp_states execute(struct r
+               /* Unreachable */
+               WARN_ON(1);
+ 
+-      /* We successfully processed this new request. */
+-      qp->resp.msn++;
+-
+       /* next expected psn, read handles this separately */
+       qp->resp.psn = (pkt->psn + 1) & BTH_PSN_MASK;
+ 
+       qp->resp.opcode = pkt->opcode;
+       qp->resp.status = IB_WC_SUCCESS;
+ 
+-      if (pkt->mask & RXE_COMP_MASK)
++      if (pkt->mask & RXE_COMP_MASK) {
++              /* We successfully processed this new request. */
++              qp->resp.msn++;
+               return RESPST_COMPLETE;
+-      else if (qp_type(qp) == IB_QPT_RC)
++      } else if (qp_type(qp) == IB_QPT_RC)
+               return RESPST_ACKNOWLEDGE;
+       else
+               return RESPST_CLEANUP;
diff --git a/queue-4.9/igb-check-memory-allocation-failure.patch b/queue-4.9/igb-check-memory-allocation-failure.patch

new file mode 100644 (file)

index 0000000..ec5506e
--- /dev/null
+++ b/queue-4.9/igb-check-memory-allocation-failure.patch
@@ -0,0 +1,36 @@
+From foo@baz Thu Dec 21 09:02:40 CET 2017
+From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
+Date: Sun, 27 Aug 2017 08:39:51 +0200
+Subject: igb: check memory allocation failure
+
+From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
+
+
+[ Upstream commit 18eb86362a52f0af933cc0fd5e37027317eb2d1c ]
+
+Check memory allocation failures and return -ENOMEM in such cases, as
+already done for other memory allocations in this function.
+
+This avoids NULL pointers dereference.
+
+Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
+Tested-by: Aaron Brown <aaron.f.brown@intel.com
+Acked-by: PJ Waskiewicz <peter.waskiewicz.jr@intel.com>
+Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
+Signed-off-by: Sasha Levin <alexander.levin@verizon.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/intel/igb/igb_main.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/drivers/net/ethernet/intel/igb/igb_main.c
++++ b/drivers/net/ethernet/intel/igb/igb_main.c
+@@ -3102,6 +3102,8 @@ static int igb_sw_init(struct igb_adapte
+       /* Setup and initialize a copy of the hw vlan table array */
+       adapter->shadow_vfta = kcalloc(E1000_VLAN_FILTER_TBL_SIZE, sizeof(u32),
+                                      GFP_ATOMIC);
++      if (!adapter->shadow_vfta)
++              return -ENOMEM;
+ 
+       /* This call may decrease the number of queues */
+       if (igb_init_interrupt_scheme(adapter, true)) {
diff --git a/queue-4.9/inet-frag-release-spinlock-before-calling-icmp_send.patch b/queue-4.9/inet-frag-release-spinlock-before-calling-icmp_send.patch

new file mode 100644 (file)

index 0000000..807b3c8
--- /dev/null
+++ b/queue-4.9/inet-frag-release-spinlock-before-calling-icmp_send.patch
@@ -0,0 +1,334 @@
+From foo@baz Thu Dec 21 09:02:40 CET 2017
+From: Eric Dumazet <edumazet@google.com>
+Date: Wed, 22 Mar 2017 08:57:15 -0700
+Subject: inet: frag: release spinlock before calling icmp_send()
+
+From: Eric Dumazet <edumazet@google.com>
+
+
+[ Upstream commit ec4fbd64751de18729eaa816ec69e4b504b5a7a2 ]
+
+Dmitry reported a lockdep splat [1] (false positive) that we can fix
+by releasing the spinlock before calling icmp_send() from ip_expire()
+
+This is a false positive because sending an ICMP message can not
+possibly re-enter the IP frag engine.
+
+[1]
+[ INFO: possible circular locking dependency detected ]
+4.10.0+ #29 Not tainted
+-------------------------------------------------------
+modprobe/12392 is trying to acquire lock:
+ (_xmit_ETHER#2){+.-...}, at: [<ffffffff837a8182>] spin_lock
+include/linux/spinlock.h:299 [inline]
+ (_xmit_ETHER#2){+.-...}, at: [<ffffffff837a8182>] __netif_tx_lock
+include/linux/netdevice.h:3486 [inline]
+ (_xmit_ETHER#2){+.-...}, at: [<ffffffff837a8182>]
+sch_direct_xmit+0x282/0x6d0 net/sched/sch_generic.c:180
+
+but task is already holding lock:
+ (&(&q->lock)->rlock){+.-...}, at: [<ffffffff8389a4d1>] spin_lock
+include/linux/spinlock.h:299 [inline]
+ (&(&q->lock)->rlock){+.-...}, at: [<ffffffff8389a4d1>]
+ip_expire+0x51/0x6c0 net/ipv4/ip_fragment.c:201
+
+which lock already depends on the new lock.
+
+the existing dependency chain (in reverse order) is:
+
+-> #1 (&(&q->lock)->rlock){+.-...}:
+       validate_chain kernel/locking/lockdep.c:2267 [inline]
+       __lock_acquire+0x2149/0x3430 kernel/locking/lockdep.c:3340
+       lock_acquire+0x2a1/0x630 kernel/locking/lockdep.c:3755
+       __raw_spin_lock include/linux/spinlock_api_smp.h:142 [inline]
+       _raw_spin_lock+0x33/0x50 kernel/locking/spinlock.c:151
+       spin_lock include/linux/spinlock.h:299 [inline]
+       ip_defrag+0x3a2/0x4130 net/ipv4/ip_fragment.c:669
+       ip_check_defrag+0x4e3/0x8b0 net/ipv4/ip_fragment.c:713
+       packet_rcv_fanout+0x282/0x800 net/packet/af_packet.c:1459
+       deliver_skb net/core/dev.c:1834 [inline]
+       dev_queue_xmit_nit+0x294/0xa90 net/core/dev.c:1890
+       xmit_one net/core/dev.c:2903 [inline]
+       dev_hard_start_xmit+0x16b/0xab0 net/core/dev.c:2923
+       sch_direct_xmit+0x31f/0x6d0 net/sched/sch_generic.c:182
+       __dev_xmit_skb net/core/dev.c:3092 [inline]
+       __dev_queue_xmit+0x13e5/0x1e60 net/core/dev.c:3358
+       dev_queue_xmit+0x17/0x20 net/core/dev.c:3423
+       neigh_resolve_output+0x6b9/0xb10 net/core/neighbour.c:1308
+       neigh_output include/net/neighbour.h:478 [inline]
+       ip_finish_output2+0x8b8/0x15a0 net/ipv4/ip_output.c:228
+       ip_do_fragment+0x1d93/0x2720 net/ipv4/ip_output.c:672
+       ip_fragment.constprop.54+0x145/0x200 net/ipv4/ip_output.c:545
+       ip_finish_output+0x82d/0xe10 net/ipv4/ip_output.c:314
+       NF_HOOK_COND include/linux/netfilter.h:246 [inline]
+       ip_output+0x1f0/0x7a0 net/ipv4/ip_output.c:404
+       dst_output include/net/dst.h:486 [inline]
+       ip_local_out+0x95/0x170 net/ipv4/ip_output.c:124
+       ip_send_skb+0x3c/0xc0 net/ipv4/ip_output.c:1492
+       ip_push_pending_frames+0x64/0x80 net/ipv4/ip_output.c:1512
+       raw_sendmsg+0x26de/0x3a00 net/ipv4/raw.c:655
+       inet_sendmsg+0x164/0x5b0 net/ipv4/af_inet.c:761
+       sock_sendmsg_nosec net/socket.c:633 [inline]
+       sock_sendmsg+0xca/0x110 net/socket.c:643
+       ___sys_sendmsg+0x4a3/0x9f0 net/socket.c:1985
+       __sys_sendmmsg+0x25c/0x750 net/socket.c:2075
+       SYSC_sendmmsg net/socket.c:2106 [inline]
+       SyS_sendmmsg+0x35/0x60 net/socket.c:2101
+       do_syscall_64+0x2e8/0x930 arch/x86/entry/common.c:281
+       return_from_SYSCALL_64+0x0/0x7a
+
+-> #0 (_xmit_ETHER#2){+.-...}:
+       check_prev_add kernel/locking/lockdep.c:1830 [inline]
+       check_prevs_add+0xa8f/0x19f0 kernel/locking/lockdep.c:1940
+       validate_chain kernel/locking/lockdep.c:2267 [inline]
+       __lock_acquire+0x2149/0x3430 kernel/locking/lockdep.c:3340
+       lock_acquire+0x2a1/0x630 kernel/locking/lockdep.c:3755
+       __raw_spin_lock include/linux/spinlock_api_smp.h:142 [inline]
+       _raw_spin_lock+0x33/0x50 kernel/locking/spinlock.c:151
+       spin_lock include/linux/spinlock.h:299 [inline]
+       __netif_tx_lock include/linux/netdevice.h:3486 [inline]
+       sch_direct_xmit+0x282/0x6d0 net/sched/sch_generic.c:180
+       __dev_xmit_skb net/core/dev.c:3092 [inline]
+       __dev_queue_xmit+0x13e5/0x1e60 net/core/dev.c:3358
+       dev_queue_xmit+0x17/0x20 net/core/dev.c:3423
+       neigh_hh_output include/net/neighbour.h:468 [inline]
+       neigh_output include/net/neighbour.h:476 [inline]
+       ip_finish_output2+0xf6c/0x15a0 net/ipv4/ip_output.c:228
+       ip_finish_output+0xa29/0xe10 net/ipv4/ip_output.c:316
+       NF_HOOK_COND include/linux/netfilter.h:246 [inline]
+       ip_output+0x1f0/0x7a0 net/ipv4/ip_output.c:404
+       dst_output include/net/dst.h:486 [inline]
+       ip_local_out+0x95/0x170 net/ipv4/ip_output.c:124
+       ip_send_skb+0x3c/0xc0 net/ipv4/ip_output.c:1492
+       ip_push_pending_frames+0x64/0x80 net/ipv4/ip_output.c:1512
+       icmp_push_reply+0x372/0x4d0 net/ipv4/icmp.c:394
+       icmp_send+0x156c/0x1c80 net/ipv4/icmp.c:754
+       ip_expire+0x40e/0x6c0 net/ipv4/ip_fragment.c:239
+       call_timer_fn+0x241/0x820 kernel/time/timer.c:1268
+       expire_timers kernel/time/timer.c:1307 [inline]
+       __run_timers+0x960/0xcf0 kernel/time/timer.c:1601
+       run_timer_softirq+0x21/0x80 kernel/time/timer.c:1614
+       __do_softirq+0x31f/0xbe7 kernel/softirq.c:284
+       invoke_softirq kernel/softirq.c:364 [inline]
+       irq_exit+0x1cc/0x200 kernel/softirq.c:405
+       exiting_irq arch/x86/include/asm/apic.h:657 [inline]
+       smp_apic_timer_interrupt+0x76/0xa0 arch/x86/kernel/apic/apic.c:962
+       apic_timer_interrupt+0x93/0xa0 arch/x86/entry/entry_64.S:707
+       __read_once_size include/linux/compiler.h:254 [inline]
+       atomic_read arch/x86/include/asm/atomic.h:26 [inline]
+       rcu_dynticks_curr_cpu_in_eqs kernel/rcu/tree.c:350 [inline]
+       __rcu_is_watching kernel/rcu/tree.c:1133 [inline]
+       rcu_is_watching+0x83/0x110 kernel/rcu/tree.c:1147
+       rcu_read_lock_held+0x87/0xc0 kernel/rcu/update.c:293
+       radix_tree_deref_slot include/linux/radix-tree.h:238 [inline]
+       filemap_map_pages+0x6d4/0x1570 mm/filemap.c:2335
+       do_fault_around mm/memory.c:3231 [inline]
+       do_read_fault mm/memory.c:3265 [inline]
+       do_fault+0xbd5/0x2080 mm/memory.c:3370
+       handle_pte_fault mm/memory.c:3600 [inline]
+       __handle_mm_fault+0x1062/0x2cb0 mm/memory.c:3714
+       handle_mm_fault+0x1e2/0x480 mm/memory.c:3751
+       __do_page_fault+0x4f6/0xb60 arch/x86/mm/fault.c:1397
+       do_page_fault+0x54/0x70 arch/x86/mm/fault.c:1460
+       page_fault+0x28/0x30 arch/x86/entry/entry_64.S:1011
+
+other info that might help us debug this:
+
+ Possible unsafe locking scenario:
+
+       CPU0                    CPU1
+       ----                    ----
+  lock(&(&q->lock)->rlock);
+                               lock(_xmit_ETHER#2);
+                               lock(&(&q->lock)->rlock);
+  lock(_xmit_ETHER#2);
+
+ *** DEADLOCK ***
+
+10 locks held by modprobe/12392:
+ #0:  (&mm->mmap_sem){++++++}, at: [<ffffffff81329758>]
+__do_page_fault+0x2b8/0xb60 arch/x86/mm/fault.c:1336
+ #1:  (rcu_read_lock){......}, at: [<ffffffff8188cab6>]
+filemap_map_pages+0x1e6/0x1570 mm/filemap.c:2324
+ #2:  (&(ptlock_ptr(page))->rlock#2){+.+...}, at: [<ffffffff81984a78>]
+spin_lock include/linux/spinlock.h:299 [inline]
+ #2:  (&(ptlock_ptr(page))->rlock#2){+.+...}, at: [<ffffffff81984a78>]
+pte_alloc_one_map mm/memory.c:2944 [inline]
+ #2:  (&(ptlock_ptr(page))->rlock#2){+.+...}, at: [<ffffffff81984a78>]
+alloc_set_pte+0x13b8/0x1b90 mm/memory.c:3072
+ #3:  (((&q->timer))){+.-...}, at: [<ffffffff81627e72>]
+lockdep_copy_map include/linux/lockdep.h:175 [inline]
+ #3:  (((&q->timer))){+.-...}, at: [<ffffffff81627e72>]
+call_timer_fn+0x1c2/0x820 kernel/time/timer.c:1258
+ #4:  (&(&q->lock)->rlock){+.-...}, at: [<ffffffff8389a4d1>] spin_lock
+include/linux/spinlock.h:299 [inline]
+ #4:  (&(&q->lock)->rlock){+.-...}, at: [<ffffffff8389a4d1>]
+ip_expire+0x51/0x6c0 net/ipv4/ip_fragment.c:201
+ #5:  (rcu_read_lock){......}, at: [<ffffffff8389a633>]
+ip_expire+0x1b3/0x6c0 net/ipv4/ip_fragment.c:216
+ #6:  (slock-AF_INET){+.-...}, at: [<ffffffff839b3313>] spin_trylock
+include/linux/spinlock.h:309 [inline]
+ #6:  (slock-AF_INET){+.-...}, at: [<ffffffff839b3313>] icmp_xmit_lock
+net/ipv4/icmp.c:219 [inline]
+ #6:  (slock-AF_INET){+.-...}, at: [<ffffffff839b3313>]
+icmp_send+0x803/0x1c80 net/ipv4/icmp.c:681
+ #7:  (rcu_read_lock_bh){......}, at: [<ffffffff838ab9a1>]
+ip_finish_output2+0x2c1/0x15a0 net/ipv4/ip_output.c:198
+ #8:  (rcu_read_lock_bh){......}, at: [<ffffffff836d1dee>]
+__dev_queue_xmit+0x23e/0x1e60 net/core/dev.c:3324
+ #9:  (dev->qdisc_running_key ?: &qdisc_running_key){+.....}, at:
+[<ffffffff836d3a27>] dev_queue_xmit+0x17/0x20 net/core/dev.c:3423
+
+stack backtrace:
+CPU: 0 PID: 12392 Comm: modprobe Not tainted 4.10.0+ #29
+Hardware name: Google Google Compute Engine/Google Compute Engine,
+BIOS Google 01/01/2011
+Call Trace:
+ <IRQ>
+ __dump_stack lib/dump_stack.c:16 [inline]
+ dump_stack+0x2ee/0x3ef lib/dump_stack.c:52
+ print_circular_bug+0x307/0x3b0 kernel/locking/lockdep.c:1204
+ check_prev_add kernel/locking/lockdep.c:1830 [inline]
+ check_prevs_add+0xa8f/0x19f0 kernel/locking/lockdep.c:1940
+ validate_chain kernel/locking/lockdep.c:2267 [inline]
+ __lock_acquire+0x2149/0x3430 kernel/locking/lockdep.c:3340
+ lock_acquire+0x2a1/0x630 kernel/locking/lockdep.c:3755
+ __raw_spin_lock include/linux/spinlock_api_smp.h:142 [inline]
+ _raw_spin_lock+0x33/0x50 kernel/locking/spinlock.c:151
+ spin_lock include/linux/spinlock.h:299 [inline]
+ __netif_tx_lock include/linux/netdevice.h:3486 [inline]
+ sch_direct_xmit+0x282/0x6d0 net/sched/sch_generic.c:180
+ __dev_xmit_skb net/core/dev.c:3092 [inline]
+ __dev_queue_xmit+0x13e5/0x1e60 net/core/dev.c:3358
+ dev_queue_xmit+0x17/0x20 net/core/dev.c:3423
+ neigh_hh_output include/net/neighbour.h:468 [inline]
+ neigh_output include/net/neighbour.h:476 [inline]
+ ip_finish_output2+0xf6c/0x15a0 net/ipv4/ip_output.c:228
+ ip_finish_output+0xa29/0xe10 net/ipv4/ip_output.c:316
+ NF_HOOK_COND include/linux/netfilter.h:246 [inline]
+ ip_output+0x1f0/0x7a0 net/ipv4/ip_output.c:404
+ dst_output include/net/dst.h:486 [inline]
+ ip_local_out+0x95/0x170 net/ipv4/ip_output.c:124
+ ip_send_skb+0x3c/0xc0 net/ipv4/ip_output.c:1492
+ ip_push_pending_frames+0x64/0x80 net/ipv4/ip_output.c:1512
+ icmp_push_reply+0x372/0x4d0 net/ipv4/icmp.c:394
+ icmp_send+0x156c/0x1c80 net/ipv4/icmp.c:754
+ ip_expire+0x40e/0x6c0 net/ipv4/ip_fragment.c:239
+ call_timer_fn+0x241/0x820 kernel/time/timer.c:1268
+ expire_timers kernel/time/timer.c:1307 [inline]
+ __run_timers+0x960/0xcf0 kernel/time/timer.c:1601
+ run_timer_softirq+0x21/0x80 kernel/time/timer.c:1614
+ __do_softirq+0x31f/0xbe7 kernel/softirq.c:284
+ invoke_softirq kernel/softirq.c:364 [inline]
+ irq_exit+0x1cc/0x200 kernel/softirq.c:405
+ exiting_irq arch/x86/include/asm/apic.h:657 [inline]
+ smp_apic_timer_interrupt+0x76/0xa0 arch/x86/kernel/apic/apic.c:962
+ apic_timer_interrupt+0x93/0xa0 arch/x86/entry/entry_64.S:707
+RIP: 0010:__read_once_size include/linux/compiler.h:254 [inline]
+RIP: 0010:atomic_read arch/x86/include/asm/atomic.h:26 [inline]
+RIP: 0010:rcu_dynticks_curr_cpu_in_eqs kernel/rcu/tree.c:350 [inline]
+RIP: 0010:__rcu_is_watching kernel/rcu/tree.c:1133 [inline]
+RIP: 0010:rcu_is_watching+0x83/0x110 kernel/rcu/tree.c:1147
+RSP: 0000:ffff8801c391f120 EFLAGS: 00000a03 ORIG_RAX: ffffffffffffff10
+RAX: dffffc0000000000 RBX: ffff8801c391f148 RCX: 0000000000000000
+RDX: 0000000000000000 RSI: 000055edd4374000 RDI: ffff8801dbe1ae0c
+RBP: ffff8801c391f1a0 R08: 0000000000000002 R09: 0000000000000000
+R10: dffffc0000000000 R11: 0000000000000002 R12: 1ffff10038723e25
+R13: ffff8801dbe1ae00 R14: ffff8801c391f680 R15: dffffc0000000000
+ </IRQ>
+ rcu_read_lock_held+0x87/0xc0 kernel/rcu/update.c:293
+ radix_tree_deref_slot include/linux/radix-tree.h:238 [inline]
+ filemap_map_pages+0x6d4/0x1570 mm/filemap.c:2335
+ do_fault_around mm/memory.c:3231 [inline]
+ do_read_fault mm/memory.c:3265 [inline]
+ do_fault+0xbd5/0x2080 mm/memory.c:3370
+ handle_pte_fault mm/memory.c:3600 [inline]
+ __handle_mm_fault+0x1062/0x2cb0 mm/memory.c:3714
+ handle_mm_fault+0x1e2/0x480 mm/memory.c:3751
+ __do_page_fault+0x4f6/0xb60 arch/x86/mm/fault.c:1397
+ do_page_fault+0x54/0x70 arch/x86/mm/fault.c:1460
+ page_fault+0x28/0x30 arch/x86/entry/entry_64.S:1011
+RIP: 0033:0x7f83172f2786
+RSP: 002b:00007fffe859ae80 EFLAGS: 00010293
+RAX: 000055edd4373040 RBX: 00007f83175111c8 RCX: 000055edd4373238
+RDX: 0000000000000000 RSI: 0000000000000000 RDI: 00007f8317510970
+RBP: 00007fffe859afd0 R08: 0000000000000009 R09: 0000000000000000
+R10: 0000000000000064 R11: 0000000000000000 R12: 000055edd4373040
+R13: 0000000000000000 R14: 00007fffe859afe8 R15: 0000000000000000
+
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: Dmitry Vyukov <dvyukov@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <alexander.levin@verizon.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/ip_fragment.c |   25 +++++++++++++++++--------
+ 1 file changed, 17 insertions(+), 8 deletions(-)
+
+--- a/net/ipv4/ip_fragment.c
++++ b/net/ipv4/ip_fragment.c
+@@ -198,6 +198,7 @@ static void ip_expire(unsigned long arg)
+       qp = container_of((struct inet_frag_queue *) arg, struct ipq, q);
+       net = container_of(qp->q.net, struct net, ipv4.frags);
+ 
++      rcu_read_lock();
+       spin_lock(&qp->q.lock);
+ 
+       if (qp->q.flags & INET_FRAG_COMPLETE)
+@@ -207,7 +208,7 @@ static void ip_expire(unsigned long arg)
+       __IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS);
+ 
+       if (!inet_frag_evicting(&qp->q)) {
+-              struct sk_buff *head = qp->q.fragments;
++              struct sk_buff *clone, *head = qp->q.fragments;
+               const struct iphdr *iph;
+               int err;
+ 
+@@ -216,32 +217,40 @@ static void ip_expire(unsigned long arg)
+               if (!(qp->q.flags & INET_FRAG_FIRST_IN) || !qp->q.fragments)
+                       goto out;
+ 
+-              rcu_read_lock();
+               head->dev = dev_get_by_index_rcu(net, qp->iif);
+               if (!head->dev)
+-                      goto out_rcu_unlock;
++                      goto out;
++
+ 
+               /* skb has no dst, perform route lookup again */
+               iph = ip_hdr(head);
+               err = ip_route_input_noref(head, iph->daddr, iph->saddr,
+                                          iph->tos, head->dev);
+               if (err)
+-                      goto out_rcu_unlock;
++                      goto out;
+ 
+               /* Only an end host needs to send an ICMP
+                * "Fragment Reassembly Timeout" message, per RFC792.
+                */
+               if (frag_expire_skip_icmp(qp->user) &&
+                   (skb_rtable(head)->rt_type != RTN_LOCAL))
+-                      goto out_rcu_unlock;
++                      goto out;
++
++              clone = skb_clone(head, GFP_ATOMIC);
+ 
+               /* Send an ICMP "Fragment Reassembly Timeout" message. */
+-              icmp_send(head, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0);
+-out_rcu_unlock:
+-              rcu_read_unlock();
++              if (clone) {
++                      spin_unlock(&qp->q.lock);
++                      icmp_send(clone, ICMP_TIME_EXCEEDED,
++                                ICMP_EXC_FRAGTIME, 0);
++                      consume_skb(clone);
++                      goto out_rcu_unlock;
++              }
+       }
+ out:
+       spin_unlock(&qp->q.lock);
++out_rcu_unlock:
++      rcu_read_unlock();
+       ipq_put(qp);
+ }
+ 
diff --git a/queue-4.9/infiniband-fix-alignment-of-mmap-cookies-to-support-vipt-caching.patch b/queue-4.9/infiniband-fix-alignment-of-mmap-cookies-to-support-vipt-caching.patch

new file mode 100644 (file)

index 0000000..9f4fda1
--- /dev/null
+++ b/queue-4.9/infiniband-fix-alignment-of-mmap-cookies-to-support-vipt-caching.patch
@@ -0,0 +1,58 @@
+From foo@baz Thu Dec 21 09:02:40 CET 2017
+From: Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
+Date: Fri, 10 Mar 2017 11:34:20 -0700
+Subject: infiniband: Fix alignment of mmap cookies to support VIPT caching
+
+From: Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
+
+
+[ Upstream commit cb8864559631754ac93d5734b165ccd0cad4728c ]
+
+When vmalloc_user is used to create memory that is supposed to be mmap'd
+to user space, it is necessary for the mmap cookie (eg the offset) to be
+aligned to SHMLBA.
+
+This creates a situation where all virtual mappings of the same physical
+page share the same virtual cache index and guarantees VIPT coherence.
+Otherwise the cache is non-coherent and the kernel will not see writes
+by userspace when reading the shared page (or vice-versa).
+
+Reported-by: Josh Beavers <josh.beavers@gmail.com>
+Signed-off-by: Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
+Signed-off-by: Doug Ledford <dledford@redhat.com>
+Signed-off-by: Sasha Levin <alexander.levin@verizon.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/infiniband/sw/rdmavt/mmap.c  |    4 ++--
+ drivers/infiniband/sw/rxe/rxe_mmap.c |    4 ++--
+ 2 files changed, 4 insertions(+), 4 deletions(-)
+
+--- a/drivers/infiniband/sw/rdmavt/mmap.c
++++ b/drivers/infiniband/sw/rdmavt/mmap.c
+@@ -170,9 +170,9 @@ struct rvt_mmap_info *rvt_create_mmap_in
+ 
+       spin_lock_irq(&rdi->mmap_offset_lock);
+       if (rdi->mmap_offset == 0)
+-              rdi->mmap_offset = PAGE_SIZE;
++              rdi->mmap_offset = ALIGN(PAGE_SIZE, SHMLBA);
+       ip->offset = rdi->mmap_offset;
+-      rdi->mmap_offset += size;
++      rdi->mmap_offset += ALIGN(size, SHMLBA);
+       spin_unlock_irq(&rdi->mmap_offset_lock);
+ 
+       INIT_LIST_HEAD(&ip->pending_mmaps);
+--- a/drivers/infiniband/sw/rxe/rxe_mmap.c
++++ b/drivers/infiniband/sw/rxe/rxe_mmap.c
+@@ -156,10 +156,10 @@ struct rxe_mmap_info *rxe_create_mmap_in
+       spin_lock_bh(&rxe->mmap_offset_lock);
+ 
+       if (rxe->mmap_offset == 0)
+-              rxe->mmap_offset = PAGE_SIZE;
++              rxe->mmap_offset = ALIGN(PAGE_SIZE, SHMLBA);
+ 
+       ip->info.offset = rxe->mmap_offset;
+-      rxe->mmap_offset += size;
++      rxe->mmap_offset += ALIGN(size, SHMLBA);
+ 
+       spin_unlock_bh(&rxe->mmap_offset_lock);
+ 
diff --git a/queue-4.9/iommu-exynos-workaround-flpd-cache-flush-issues-for-sysmmu-v5.patch b/queue-4.9/iommu-exynos-workaround-flpd-cache-flush-issues-for-sysmmu-v5.patch

new file mode 100644 (file)

index 0000000..bd5dea2
--- /dev/null
+++ b/queue-4.9/iommu-exynos-workaround-flpd-cache-flush-issues-for-sysmmu-v5.patch
@@ -0,0 +1,42 @@
+From foo@baz Thu Dec 21 09:02:40 CET 2017
+From: Marek Szyprowski <m.szyprowski@samsung.com>
+Date: Mon, 20 Mar 2017 10:17:57 +0100
+Subject: iommu/exynos: Workaround FLPD cache flush issues for SYSMMU v5
+
+From: Marek Szyprowski <m.szyprowski@samsung.com>
+
+
+[ Upstream commit cd37a296a9f890586665bb8974a8b17ee2f17d6d ]
+
+For some unknown reasons, in some cases, FLPD cache invalidation doesn't
+work properly with SYSMMU v5 controllers found in Exynos5433 SoCs. This
+can be observed by a firmware crash during initialization phase of MFC
+video decoder available in the mentioned SoCs when IOMMU support is
+enabled. To workaround this issue perform a full TLB/FLPD invalidation
+in case of replacing any first level page descriptors in case of SYSMMU v5.
+
+Fixes: 740a01eee9ada ("iommu/exynos: Add support for v5 SYSMMU")
+CC: stable@vger.kernel.org # v4.10+
+Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
+Tested-by: Andrzej Hajda <a.hajda@samsung.com>
+Signed-off-by: Joerg Roedel <jroedel@suse.de>
+Signed-off-by: Sasha Levin <alexander.levin@verizon.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/iommu/exynos-iommu.c |    5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+--- a/drivers/iommu/exynos-iommu.c
++++ b/drivers/iommu/exynos-iommu.c
+@@ -543,7 +543,10 @@ static void sysmmu_tlb_invalidate_flpdca
+       if (is_sysmmu_active(data) && data->version >= MAKE_MMU_VER(3, 3)) {
+               clk_enable(data->clk_master);
+               if (sysmmu_block(data)) {
+-                      __sysmmu_tlb_invalidate_entry(data, iova, 1);
++                      if (data->version >= MAKE_MMU_VER(5, 0))
++                              __sysmmu_tlb_invalidate(data);
++                      else
++                              __sysmmu_tlb_invalidate_entry(data, iova, 1);
+                       sysmmu_unblock(data);
+               }
+               clk_disable(data->clk_master);
diff --git a/queue-4.9/irda-vlsi_ir-fix-check-for-dma-mapping-errors.patch b/queue-4.9/irda-vlsi_ir-fix-check-for-dma-mapping-errors.patch

new file mode 100644 (file)

index 0000000..aca7bda
--- /dev/null
+++ b/queue-4.9/irda-vlsi_ir-fix-check-for-dma-mapping-errors.patch
@@ -0,0 +1,47 @@
+From foo@baz Thu Dec 21 09:02:40 CET 2017
+From: Alexey Khoroshilov <khoroshilov@ispras.ru>
+Date: Sat, 25 Mar 2017 01:48:08 +0300
+Subject: irda: vlsi_ir: fix check for DMA mapping errors
+
+From: Alexey Khoroshilov <khoroshilov@ispras.ru>
+
+
+[ Upstream commit 6ac3b77a6ffff7513ff86b684aa256ea01c0e5b5 ]
+
+vlsi_alloc_ring() checks for DMA mapping errors by comparing
+returned address with zero, while pci_dma_mapping_error() should be used.
+
+Found by Linux Driver Verification project (linuxtesting.org).
+
+Signed-off-by: Alexey Khoroshilov <khoroshilov@ispras.ru>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <alexander.levin@verizon.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/irda/vlsi_ir.c |    8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+--- a/drivers/net/irda/vlsi_ir.c
++++ b/drivers/net/irda/vlsi_ir.c
+@@ -418,8 +418,9 @@ static struct vlsi_ring *vlsi_alloc_ring
+               memset(rd, 0, sizeof(*rd));
+               rd->hw = hwmap + i;
+               rd->buf = kmalloc(len, GFP_KERNEL|GFP_DMA);
+-              if (rd->buf == NULL ||
+-                  !(busaddr = pci_map_single(pdev, rd->buf, len, dir))) {
++              if (rd->buf)
++                      busaddr = pci_map_single(pdev, rd->buf, len, dir);
++              if (rd->buf == NULL || pci_dma_mapping_error(pdev, busaddr)) {
+                       if (rd->buf) {
+                               net_err_ratelimited("%s: failed to create PCI-MAP for %p\n",
+                                                   __func__, rd->buf);
+@@ -430,8 +431,7 @@ static struct vlsi_ring *vlsi_alloc_ring
+                               rd = r->rd + j;
+                               busaddr = rd_get_addr(rd);
+                               rd_set_addr_status(rd, 0, 0);
+-                              if (busaddr)
+-                                      pci_unmap_single(pdev, busaddr, len, dir);
++                              pci_unmap_single(pdev, busaddr, len, dir);
+                               kfree(rd->buf);
+                               rd->buf = NULL;
+                       }
diff --git a/queue-4.9/isdn-kcapi-avoid-uninitialized-data.patch b/queue-4.9/isdn-kcapi-avoid-uninitialized-data.patch

new file mode 100644 (file)

index 0000000..6a126c6
--- /dev/null
+++ b/queue-4.9/isdn-kcapi-avoid-uninitialized-data.patch
@@ -0,0 +1,42 @@
+From foo@baz Thu Dec 21 09:02:40 CET 2017
+From: Arnd Bergmann <arnd@arndb.de>
+Date: Tue, 28 Mar 2017 12:11:07 +0200
+Subject: isdn: kcapi: avoid uninitialized data
+
+From: Arnd Bergmann <arnd@arndb.de>
+
+
+[ Upstream commit af109a2cf6a9a6271fa420ae2d64d72d86c92b7d ]
+
+gcc-7 points out that the AVMB1_ADDCARD ioctl results in an unintialized
+value ending up in the cardnr parameter:
+
+drivers/isdn/capi/kcapi.c: In function 'old_capi_manufacturer':
+drivers/isdn/capi/kcapi.c:1042:24: error: 'cdef.cardnr' may be used uninitialized in this function [-Werror=maybe-uninitialized]
+   cparams.cardnr = cdef.cardnr;
+
+This has been broken since before the start of the git history, so
+either the value is not used for anything important, or the ioctl
+command doesn't get called in practice.
+
+Setting the cardnr to zero avoids the warning and makes sure
+we have consistent behavior.
+
+Signed-off-by: Arnd Bergmann <arnd@arndb.de>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <alexander.levin@verizon.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/isdn/capi/kcapi.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/isdn/capi/kcapi.c
++++ b/drivers/isdn/capi/kcapi.c
+@@ -1032,6 +1032,7 @@ static int old_capi_manufacturer(unsigne
+                                                    sizeof(avmb1_carddef))))
+                               return -EFAULT;
+                       cdef.cardtype = AVM_CARDTYPE_B1;
++                      cdef.cardnr = 0;
+               } else {
+                       if ((retval = copy_from_user(&cdef, data,
+                                                    sizeof(avmb1_extcarddef))))
diff --git a/queue-4.9/ixgbe-fix-use-of-uninitialized-padding.patch b/queue-4.9/ixgbe-fix-use-of-uninitialized-padding.patch

new file mode 100644 (file)

index 0000000..c9fa4e9
--- /dev/null
+++ b/queue-4.9/ixgbe-fix-use-of-uninitialized-padding.patch
@@ -0,0 +1,53 @@
+From foo@baz Thu Dec 21 09:02:40 CET 2017
+From: Emil Tantilov <emil.s.tantilov@intel.com>
+Date: Mon, 11 Sep 2017 14:21:31 -0700
+Subject: ixgbe: fix use of uninitialized padding
+
+From: Emil Tantilov <emil.s.tantilov@intel.com>
+
+
+[ Upstream commit dcfd6b839c998bc9838e2a47f44f37afbdf3099c ]
+
+This patch is resolving Coverity hits where padding in a structure could
+be used uninitialized.
+
+- Initialize fwd_cmd.pad/2 before ixgbe_calculate_checksum()
+
+- Initialize buffer.pad2/3 before ixgbe_hic_unlocked()
+
+Signed-off-by: Emil Tantilov <emil.s.tantilov@intel.com>
+Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
+Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
+Signed-off-by: Sasha Levin <alexander.levin@verizon.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/intel/ixgbe/ixgbe_common.c |    4 ++--
+ drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c   |    2 ++
+ 2 files changed, 4 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
++++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
+@@ -3696,10 +3696,10 @@ s32 ixgbe_set_fw_drv_ver_generic(struct
+       fw_cmd.ver_build = build;
+       fw_cmd.ver_sub = sub;
+       fw_cmd.hdr.checksum = 0;
+-      fw_cmd.hdr.checksum = ixgbe_calculate_checksum((u8 *)&fw_cmd,
+-                              (FW_CEM_HDR_LEN + fw_cmd.hdr.buf_len));
+       fw_cmd.pad = 0;
+       fw_cmd.pad2 = 0;
++      fw_cmd.hdr.checksum = ixgbe_calculate_checksum((u8 *)&fw_cmd,
++                              (FW_CEM_HDR_LEN + fw_cmd.hdr.buf_len));
+ 
+       for (i = 0; i <= FW_CEM_MAX_RETRIES; i++) {
+               ret_val = ixgbe_host_interface_command(hw, &fw_cmd,
+--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
++++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
+@@ -617,6 +617,8 @@ static s32 ixgbe_read_ee_hostif_buffer_X
+               /* convert offset from words to bytes */
+               buffer.address = cpu_to_be32((offset + current_word) * 2);
+               buffer.length = cpu_to_be16(words_to_read * 2);
++              buffer.pad2 = 0;
++              buffer.pad3 = 0;
+ 
+               status = ixgbe_host_interface_command(hw, &buffer,
+                                                     sizeof(buffer),
diff --git a/queue-4.9/kvm-fix-usage-of-uninit-spinlock-in-avic_vm_destroy.patch b/queue-4.9/kvm-fix-usage-of-uninit-spinlock-in-avic_vm_destroy.patch

new file mode 100644 (file)

index 0000000..e8c38d9
--- /dev/null
+++ b/queue-4.9/kvm-fix-usage-of-uninit-spinlock-in-avic_vm_destroy.patch
@@ -0,0 +1,46 @@
+From foo@baz Thu Dec 21 09:02:40 CET 2017
+From: Dmitry Vyukov <dvyukov@google.com>
+Date: Tue, 24 Jan 2017 14:06:48 +0100
+Subject: kvm: fix usage of uninit spinlock in avic_vm_destroy()
+
+From: Dmitry Vyukov <dvyukov@google.com>
+
+
+[ Upstream commit 3863dff0c3dd72984395c93b12383b393c5c3989 ]
+
+If avic is not enabled, avic_vm_init() does nothing and returns early.
+However, avic_vm_destroy() still tries to destroy what hasn't been created.
+The only bad consequence of this now is that avic_vm_destroy() uses
+svm_vm_data_hash_lock that hasn't been initialized (and is not meant
+to be used at all if avic is not enabled).
+
+Return early from avic_vm_destroy() if avic is not enabled.
+It has nothing to destroy.
+
+Signed-off-by: Dmitry Vyukov <dvyukov@google.com>
+Cc: Joerg Roedel <joro@8bytes.org>
+Cc: Paolo Bonzini <pbonzini@redhat.com>
+Cc: "Radim Krčmář" <rkrcmar@redhat.com>
+Cc: David Hildenbrand <david@redhat.com>
+Cc: kvm@vger.kernel.org
+Cc: syzkaller@googlegroups.com
+Reviewed-by: David Hildenbrand <david@redhat.com>
+Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
+Signed-off-by: Sasha Levin <alexander.levin@verizon.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/arch/x86/kvm/svm.c
++++ b/arch/x86/kvm/svm.c
+@@ -1382,6 +1382,9 @@ static void avic_vm_destroy(struct kvm *
+       unsigned long flags;
+       struct kvm_arch *vm_data = &kvm->arch;
+ 
++      if (!avic)
++              return;
++
+       avic_free_vm_id(vm_data->avic_vm_id);
+ 
+       if (vm_data->avic_logical_id_table_page)
diff --git a/queue-4.9/kvm-mm-account-kvm-related-kmem-slabs-to-kmemcg.patch b/queue-4.9/kvm-mm-account-kvm-related-kmem-slabs-to-kmemcg.patch

new file mode 100644 (file)

index 0000000..15921d8
--- /dev/null
+++ b/queue-4.9/kvm-mm-account-kvm-related-kmem-slabs-to-kmemcg.patch
@@ -0,0 +1,56 @@
+From foo@baz Thu Dec 21 09:02:40 CET 2017
+From: Shakeel Butt <shakeelb@google.com>
+Date: Thu, 5 Oct 2017 18:07:24 -0700
+Subject: kvm, mm: account kvm related kmem slabs to kmemcg
+
+From: Shakeel Butt <shakeelb@google.com>
+
+
+[ Upstream commit 46bea48ac241fe0b413805952dda74dd0c09ba8b ]
+
+The kvm slabs can consume a significant amount of system memory
+and indeed in our production environment we have observed that
+a lot of machines are spending significant amount of memory that
+can not be left as system memory overhead. Also the allocations
+from these slabs can be triggered directly by user space applications
+which has access to kvm and thus a buggy application can leak
+such memory. So, these caches should be accounted to kmemcg.
+
+Signed-off-by: Shakeel Butt <shakeelb@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Sasha Levin <alexander.levin@verizon.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/mmu.c  |    4 ++--
+ virt/kvm/kvm_main.c |    2 +-
+ 2 files changed, 3 insertions(+), 3 deletions(-)
+
+--- a/arch/x86/kvm/mmu.c
++++ b/arch/x86/kvm/mmu.c
+@@ -5052,13 +5052,13 @@ int kvm_mmu_module_init(void)
+ {
+       pte_list_desc_cache = kmem_cache_create("pte_list_desc",
+                                           sizeof(struct pte_list_desc),
+-                                          0, 0, NULL);
++                                          0, SLAB_ACCOUNT, NULL);
+       if (!pte_list_desc_cache)
+               goto nomem;
+ 
+       mmu_page_header_cache = kmem_cache_create("kvm_mmu_page_header",
+                                                 sizeof(struct kvm_mmu_page),
+-                                                0, 0, NULL);
++                                                0, SLAB_ACCOUNT, NULL);
+       if (!mmu_page_header_cache)
+               goto nomem;
+ 
+--- a/virt/kvm/kvm_main.c
++++ b/virt/kvm/kvm_main.c
+@@ -3904,7 +3904,7 @@ int kvm_init(void *opaque, unsigned vcpu
+       if (!vcpu_align)
+               vcpu_align = __alignof__(struct kvm_vcpu);
+       kvm_vcpu_cache = kmem_cache_create("kvm_vcpu", vcpu_size, vcpu_align,
+-                                         0, NULL);
++                                         SLAB_ACCOUNT, NULL);
+       if (!kvm_vcpu_cache) {
+               r = -ENOMEM;
+               goto out_free_3;
diff --git a/queue-4.9/kvm-nvmx-fix-host_cr3-host_cr4-cache.patch b/queue-4.9/kvm-nvmx-fix-host_cr3-host_cr4-cache.patch

new file mode 100644 (file)

index 0000000..9228412
--- /dev/null
+++ b/queue-4.9/kvm-nvmx-fix-host_cr3-host_cr4-cache.patch
@@ -0,0 +1,83 @@
+From 44889942b6eb356eab27ce25fe10701adfec7776 Mon Sep 17 00:00:00 2001
+From: Ladi Prosek <lprosek@redhat.com>
+Date: Fri, 22 Sep 2017 07:53:15 +0200
+Subject: KVM: nVMX: fix HOST_CR3/HOST_CR4 cache
+
+From: Ladi Prosek <lprosek@redhat.com>
+
+commit 44889942b6eb356eab27ce25fe10701adfec7776 upstream.
+
+For nested virt we maintain multiple VMCS that can run on a vCPU. So it is
+incorrect to keep vmcs_host_cr3 and vmcs_host_cr4, whose purpose is caching
+the value of the rarely changing HOST_CR3 and HOST_CR4 VMCS fields, in
+vCPU-wide data structures.
+
+Hyper-V nested on KVM runs into this consistently for me with PCID enabled.
+CR3 is updated with a new value, unlikely(cr3 != vmx->host_state.vmcs_host_cr3)
+fires, and the currently loaded VMCS is updated. Then we switch from L2 to
+L1 and the next exit reverts CR3 to its old value.
+
+Fixes: d6e41f1151fe ("x86/mm, KVM: Teach KVM's VMX code that CR3 isn't a constant")
+Signed-off-by: Ladi Prosek <lprosek@redhat.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/kvm/vmx.c |   16 ++++++++--------
+ 1 file changed, 8 insertions(+), 8 deletions(-)
+
+--- a/arch/x86/kvm/vmx.c
++++ b/arch/x86/kvm/vmx.c
+@@ -191,6 +191,8 @@ struct loaded_vmcs {
+       struct vmcs *shadow_vmcs;
+       int cpu;
+       int launched;
++      unsigned long vmcs_host_cr3;    /* May not match real cr3 */
++      unsigned long vmcs_host_cr4;    /* May not match real cr4 */
+       struct list_head loaded_vmcss_on_cpu_link;
+ };
+ 
+@@ -573,8 +575,6 @@ struct vcpu_vmx {
+               int           gs_ldt_reload_needed;
+               int           fs_reload_needed;
+               u64           msr_host_bndcfgs;
+-              unsigned long vmcs_host_cr3;    /* May not match real cr3 */
+-              unsigned long vmcs_host_cr4;    /* May not match real cr4 */
+       } host_state;
+       struct {
+               int vm86_active;
+@@ -4871,12 +4871,12 @@ static void vmx_set_constant_host_state(
+        */
+       cr3 = read_cr3();
+       vmcs_writel(HOST_CR3, cr3);             /* 22.2.3  FIXME: shadow tables */
+-      vmx->host_state.vmcs_host_cr3 = cr3;
++      vmx->loaded_vmcs->vmcs_host_cr3 = cr3;
+ 
+       /* Save the most likely value for this task's CR4 in the VMCS. */
+       cr4 = cr4_read_shadow();
+       vmcs_writel(HOST_CR4, cr4);                     /* 22.2.3, 22.2.5 */
+-      vmx->host_state.vmcs_host_cr4 = cr4;
++      vmx->loaded_vmcs->vmcs_host_cr4 = cr4;
+ 
+       vmcs_write16(HOST_CS_SELECTOR, __KERNEL_CS);  /* 22.2.4 */
+ #ifdef CONFIG_X86_64
+@@ -8874,15 +8874,15 @@ static void __noclone vmx_vcpu_run(struc
+               vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]);
+ 
+       cr3 = __get_current_cr3_fast();
+-      if (unlikely(cr3 != vmx->host_state.vmcs_host_cr3)) {
++      if (unlikely(cr3 != vmx->loaded_vmcs->vmcs_host_cr3)) {
+               vmcs_writel(HOST_CR3, cr3);
+-              vmx->host_state.vmcs_host_cr3 = cr3;
++              vmx->loaded_vmcs->vmcs_host_cr3 = cr3;
+       }
+ 
+       cr4 = cr4_read_shadow();
+-      if (unlikely(cr4 != vmx->host_state.vmcs_host_cr4)) {
++      if (unlikely(cr4 != vmx->loaded_vmcs->vmcs_host_cr4)) {
+               vmcs_writel(HOST_CR4, cr4);
+-              vmx->host_state.vmcs_host_cr4 = cr4;
++              vmx->loaded_vmcs->vmcs_host_cr4 = cr4;
+       }
+ 
+       /* When single-stepping over STI and MOV SS, we must clear the
diff --git a/queue-4.9/kvm-pci-assign-do-not-map-smm-memory-slot-pages-in-vt-d-page-tables.patch b/queue-4.9/kvm-pci-assign-do-not-map-smm-memory-slot-pages-in-vt-d-page-tables.patch

new file mode 100644 (file)

index 0000000..4eb5f3d
--- /dev/null
+++ b/queue-4.9/kvm-pci-assign-do-not-map-smm-memory-slot-pages-in-vt-d-page-tables.patch
@@ -0,0 +1,34 @@
+From foo@baz Thu Dec 21 09:02:40 CET 2017
+From: "Herongguang (Stephen)" <herongguang.he@huawei.com>
+Date: Mon, 27 Mar 2017 15:21:17 +0800
+Subject: KVM: pci-assign: do not map smm memory slot pages in vt-d page tables
+
+From: "Herongguang (Stephen)" <herongguang.he@huawei.com>
+
+
+[ Upstream commit 0292e169b2d9c8377a168778f0b16eadb1f578fd ]
+
+or VM memory are not put thus leaked in kvm_iommu_unmap_memslots() when
+destroy VM.
+
+This is consistent with current vfio implementation.
+
+Signed-off-by: herongguang <herongguang.he@huawei.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Sasha Levin <alexander.levin@verizon.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ virt/kvm/kvm_main.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/virt/kvm/kvm_main.c
++++ b/virt/kvm/kvm_main.c
+@@ -1060,7 +1060,7 @@ int __kvm_set_memory_region(struct kvm *
+        * changes) is disallowed above, so any other attribute changes getting
+        * here can be skipped.
+        */
+-      if ((change == KVM_MR_CREATE) || (change == KVM_MR_MOVE)) {
++      if (as_id == 0 && (change == KVM_MR_CREATE || change == KVM_MR_MOVE)) {
+               r = kvm_iommu_map_pages(kvm, &new);
+               return r;
+       }
diff --git a/queue-4.9/kvm-vmx-fix-enable-vpid-conditions.patch b/queue-4.9/kvm-vmx-fix-enable-vpid-conditions.patch

new file mode 100644 (file)

index 0000000..54a821e
--- /dev/null
+++ b/queue-4.9/kvm-vmx-fix-enable-vpid-conditions.patch
@@ -0,0 +1,85 @@
+From foo@baz Thu Dec 21 09:02:40 CET 2017
+From: Wanpeng Li <wanpeng.li@hotmail.com>
+Date: Thu, 23 Mar 2017 05:30:08 -0700
+Subject: KVM: VMX: Fix enable VPID conditions
+
+From: Wanpeng Li <wanpeng.li@hotmail.com>
+
+
+[ Upstream commit 08d839c4b134b8328ec42f2157a9ca4b93227c03 ]
+
+This can be reproduced by running L2 on L1, and disable VPID on L0
+if w/o commit "KVM: nVMX: Fix nested VPID vmx exec control", the L2
+crash as below:
+
+KVM: entry failed, hardware error 0x7
+EAX=00000000 EBX=00000000 ECX=00000000 EDX=000306c3
+ESI=00000000 EDI=00000000 EBP=00000000 ESP=00000000
+EIP=0000fff0 EFL=00000002 [-------] CPL=0 II=0 A20=1 SMM=0 HLT=0
+ES =0000 00000000 0000ffff 00009300
+CS =f000 ffff0000 0000ffff 00009b00
+SS =0000 00000000 0000ffff 00009300
+DS =0000 00000000 0000ffff 00009300
+FS =0000 00000000 0000ffff 00009300
+GS =0000 00000000 0000ffff 00009300
+LDT=0000 00000000 0000ffff 00008200
+TR =0000 00000000 0000ffff 00008b00
+GDT=     00000000 0000ffff
+IDT=     00000000 0000ffff
+CR0=60000010 CR2=00000000 CR3=00000000 CR4=00000000
+DR0=0000000000000000 DR1=0000000000000000 DR2=0000000000000000 DR3=0000000000000000
+DR6=00000000ffff0ff0 DR7=0000000000000400
+EFER=0000000000000000
+
+Reference SDM 30.3 INVVPID:
+
+Protected Mode Exceptions
+- #UD
+  - If not in VMX operation.
+  - If the logical processor does not support VPIDs (IA32_VMX_PROCBASED_CTLS2[37]=0).
+  - If the logical processor supports VPIDs (IA32_VMX_PROCBASED_CTLS2[37]=1) but does
+    not support the INVVPID instruction (IA32_VMX_EPT_VPID_CAP[32]=0).
+
+So we should check both VPID enable bit in vmx exec control and INVVPID support bit
+in vmx capability MSRs to enable VPID. This patch adds the guarantee to not enable
+VPID if either INVVPID or single-context/all-context invalidation is not exposed in
+vmx capability MSRs.
+
+Reviewed-by: David Hildenbrand <david@redhat.com>
+Reviewed-by: Jim Mattson <jmattson@google.com>
+Cc: Paolo Bonzini <pbonzini@redhat.com>
+Cc: Radim Krčmář <rkrcmar@redhat.com>
+Signed-off-by: Wanpeng Li <wanpeng.li@hotmail.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Sasha Levin <alexander.levin@verizon.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/vmx.c |    9 ++++++++-
+ 1 file changed, 8 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/kvm/vmx.c
++++ b/arch/x86/kvm/vmx.c
+@@ -1201,6 +1201,11 @@ static inline bool cpu_has_vmx_invvpid_g
+       return vmx_capability.vpid & VMX_VPID_EXTENT_GLOBAL_CONTEXT_BIT;
+ }
+ 
++static inline bool cpu_has_vmx_invvpid(void)
++{
++      return vmx_capability.vpid & VMX_VPID_INVVPID_BIT;
++}
++
+ static inline bool cpu_has_vmx_ept(void)
+ {
+       return vmcs_config.cpu_based_2nd_exec_ctrl &
+@@ -6445,8 +6450,10 @@ static __init int hardware_setup(void)
+       if (boot_cpu_has(X86_FEATURE_NX))
+               kvm_enable_efer_bits(EFER_NX);
+ 
+-      if (!cpu_has_vmx_vpid())
++      if (!cpu_has_vmx_vpid() || !cpu_has_vmx_invvpid() ||
++              !(cpu_has_vmx_invvpid_single() || cpu_has_vmx_invvpid_global()))
+               enable_vpid = 0;
++
+       if (!cpu_has_vmx_shadow_vmcs())
+               enable_shadow_vmcs = 0;
+       if (enable_shadow_vmcs)
diff --git a/queue-4.9/kvm-vmx-flush-tlb-when-the-apic-access-address-changes.patch b/queue-4.9/kvm-vmx-flush-tlb-when-the-apic-access-address-changes.patch

new file mode 100644 (file)

index 0000000..3ef7d41
--- /dev/null
+++ b/queue-4.9/kvm-vmx-flush-tlb-when-the-apic-access-address-changes.patch
@@ -0,0 +1,88 @@
+From foo@baz Thu Dec 21 09:02:40 CET 2017
+From: Jim Mattson <jmattson@google.com>
+Date: Thu, 16 Mar 2017 13:53:59 -0700
+Subject: kvm: vmx: Flush TLB when the APIC-access address changes
+
+From: Jim Mattson <jmattson@google.com>
+
+
+[ Upstream commit fb6c8198431311027c3434d4e94ab8bc040f7aea ]
+
+Quoting from the Intel SDM, volume 3, section 28.3.3.4: Guidelines for
+Use of the INVEPT Instruction:
+
+If EPT was in use on a logical processor at one time with EPTP X, it
+is recommended that software use the INVEPT instruction with the
+"single-context" INVEPT type and with EPTP X in the INVEPT descriptor
+before a VM entry on the same logical processor that enables EPT with
+EPTP X and either (a) the "virtualize APIC accesses" VM-execution
+control was changed from 0 to 1; or (b) the value of the APIC-access
+address was changed.
+
+In the nested case, the burden falls on L1, unless L0 enables EPT in
+vmcs02 when L1 doesn't enable EPT in vmcs12.
+
+Signed-off-by: Jim Mattson <jmattson@google.com>
+Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
+Signed-off-by: Sasha Levin <alexander.levin@verizon.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/vmx.c |   18 +++++++++++++++++-
+ 1 file changed, 17 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/kvm/vmx.c
++++ b/arch/x86/kvm/vmx.c
+@@ -3818,6 +3818,12 @@ static void vmx_flush_tlb(struct kvm_vcp
+       __vmx_flush_tlb(vcpu, to_vmx(vcpu)->vpid);
+ }
+ 
++static void vmx_flush_tlb_ept_only(struct kvm_vcpu *vcpu)
++{
++      if (enable_ept)
++              vmx_flush_tlb(vcpu);
++}
++
+ static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu)
+ {
+       ulong cr0_guest_owned_bits = vcpu->arch.cr0_guest_owned_bits;
+@@ -8505,6 +8511,7 @@ static void vmx_set_virtual_x2apic_mode(
+       } else {
+               sec_exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
+               sec_exec_control |= SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
++              vmx_flush_tlb_ept_only(vcpu);
+       }
+       vmcs_write32(SECONDARY_VM_EXEC_CONTROL, sec_exec_control);
+ 
+@@ -8530,8 +8537,10 @@ static void vmx_set_apic_access_page_add
+        */
+       if (!is_guest_mode(vcpu) ||
+           !nested_cpu_has2(get_vmcs12(&vmx->vcpu),
+-                           SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES))
++                           SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) {
+               vmcs_write64(APIC_ACCESS_ADDR, hpa);
++              vmx_flush_tlb_ept_only(vcpu);
++      }
+ }
+ 
+ static void vmx_hwapic_isr_update(struct kvm_vcpu *vcpu, int max_isr)
+@@ -10110,6 +10119,9 @@ static void prepare_vmcs02(struct kvm_vc
+       if (nested_cpu_has_ept(vmcs12)) {
+               kvm_mmu_unload(vcpu);
+               nested_ept_init_mmu_context(vcpu);
++      } else if (nested_cpu_has2(vmcs12,
++                                 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) {
++              vmx_flush_tlb_ept_only(vcpu);
+       }
+ 
+       if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_EFER)
+@@ -10850,6 +10862,10 @@ static void nested_vmx_vmexit(struct kvm
+               vmx->nested.change_vmcs01_virtual_x2apic_mode = false;
+               vmx_set_virtual_x2apic_mode(vcpu,
+                               vcpu->arch.apic_base & X2APIC_ENABLE);
++      } else if (!nested_cpu_has_ept(vmcs12) &&
++                 nested_cpu_has2(vmcs12,
++                                 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) {
++              vmx_flush_tlb_ept_only(vcpu);
+       }
+ 
+       /* This is needed for same reason as it was needed in prepare_vmcs02 */
diff --git a/queue-4.9/kvm-x86-correct-async-page-present-tracepoint.patch b/queue-4.9/kvm-x86-correct-async-page-present-tracepoint.patch

new file mode 100644 (file)

index 0000000..ce7c5c2
--- /dev/null
+++ b/queue-4.9/kvm-x86-correct-async-page-present-tracepoint.patch
@@ -0,0 +1,51 @@
+From foo@baz Thu Dec 21 09:02:40 CET 2017
+From: Wanpeng Li <wanpeng.li@hotmail.com>
+Date: Mon, 20 Mar 2017 21:18:55 -0700
+Subject: KVM: x86: correct async page present tracepoint
+
+From: Wanpeng Li <wanpeng.li@hotmail.com>
+
+
+[ Upstream commit 24dccf83a121b8a4ad5c2ad383a8184ef6c266ee ]
+
+After async pf setup successfully, there is a broadcast wakeup w/ special
+token 0xffffffff which tells vCPU that it should wake up all processes
+waiting for APFs though there is no real process waiting at the moment.
+
+The async page present tracepoint print prematurely and fails to catch the
+special token setup. This patch fixes it by moving the async page present
+tracepoint after the special token setup.
+
+Before patch:
+
+qemu-system-x86-8499  [006] ...1  5973.473292: kvm_async_pf_ready: token 0x0 gva 0x0
+
+After patch:
+
+qemu-system-x86-8499  [006] ...1  5973.473292: kvm_async_pf_ready: token 0xffffffff gva 0x0
+
+Cc: Paolo Bonzini <pbonzini@redhat.com>
+Cc: Radim Krčmář <rkrcmar@redhat.com>
+Signed-off-by: Wanpeng Li <wanpeng.li@hotmail.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Sasha Levin <alexander.levin@verizon.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/x86.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -8443,11 +8443,11 @@ void kvm_arch_async_page_present(struct
+ {
+       struct x86_exception fault;
+ 
+-      trace_kvm_async_pf_ready(work->arch.token, work->gva);
+       if (work->wakeup_all)
+               work->arch.token = ~0; /* broadcast wakeup */
+       else
+               kvm_del_async_pf_gfn(vcpu, work->arch.gfn);
++      trace_kvm_async_pf_ready(work->arch.token, work->gva);
+ 
+       if ((vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED) &&
+           !apf_put_user(vcpu, KVM_PV_REASON_PAGE_READY)) {
diff --git a/queue-4.9/mm-x86-mm-make-the-batched-unmap-tlb-flush-api-more-generic.patch b/queue-4.9/mm-x86-mm-make-the-batched-unmap-tlb-flush-api-more-generic.patch

new file mode 100644 (file)

index 0000000..ed23e79
--- /dev/null
+++ b/queue-4.9/mm-x86-mm-make-the-batched-unmap-tlb-flush-api-more-generic.patch
@@ -0,0 +1,183 @@
+From e73ad5ff2f76da25390e9607cb549691639330c3 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Mon, 22 May 2017 15:30:03 -0700
+Subject: mm, x86/mm: Make the batched unmap TLB flush API more generic
+
+From: Andy Lutomirski <luto@kernel.org>
+
+commit e73ad5ff2f76da25390e9607cb549691639330c3 upstream.
+
+try_to_unmap_flush() used to open-code a rather x86-centric flush
+sequence: local_flush_tlb() + flush_tlb_others().  Rearrange the
+code so that the arch (only x86 for now) provides
+arch_tlbbatch_add_mm() and arch_tlbbatch_flush() and the core code
+calls those functions instead.
+
+I'll want this for x86 because, to enable address space ids, I can't
+support the flush_tlb_others() mode used by exising
+try_to_unmap_flush() implementation with good performance.  I can
+support the new API fairly easily, though.
+
+I imagine that other architectures may be in a similar position.
+Architectures with strong remote flush primitives (arm64?) may have
+even worse performance problems with flush_tlb_others() the way that
+try_to_unmap_flush() uses it.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Acked-by: Kees Cook <keescook@chromium.org>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Borislav Petkov <bpetkov@suse.de>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Mel Gorman <mgorman@suse.de>
+Cc: Michal Hocko <mhocko@suse.com>
+Cc: Nadav Amit <nadav.amit@gmail.com>
+Cc: Nadav Amit <namit@vmware.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Sasha Levin <sasha.levin@oracle.com>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: linux-mm@kvack.org
+Link: http://lkml.kernel.org/r/19f25a8581f9fb77876b7ff3b001f89835e34ea3.1495492063.git.luto@kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Eduardo Valentin <eduval@amazon.com>
+Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/tlbbatch.h |   16 ++++++++++++++++
+ arch/x86/include/asm/tlbflush.h |    8 ++++++++
+ arch/x86/mm/tlb.c               |   17 +++++++++++++++++
+ include/linux/sched.h           |   15 +++++++++++----
+ mm/rmap.c                       |   16 ++--------------
+ 5 files changed, 54 insertions(+), 18 deletions(-)
+ create mode 100644 arch/x86/include/asm/tlbbatch.h
+
+--- /dev/null
++++ b/arch/x86/include/asm/tlbbatch.h
+@@ -0,0 +1,16 @@
++#ifndef _ARCH_X86_TLBBATCH_H
++#define _ARCH_X86_TLBBATCH_H
++
++#include <linux/cpumask.h>
++
++#ifdef CONFIG_SMP
++struct arch_tlbflush_unmap_batch {
++      /*
++       * Each bit set is a CPU that potentially has a TLB entry for one of
++       * the PFNs being flushed..
++       */
++      struct cpumask cpumask;
++};
++#endif
++
++#endif /* _ARCH_X86_TLBBATCH_H */
+--- a/arch/x86/include/asm/tlbflush.h
++++ b/arch/x86/include/asm/tlbflush.h
+@@ -327,6 +327,14 @@ static inline void reset_lazy_tlbstate(v
+       this_cpu_write(cpu_tlbstate.active_mm, &init_mm);
+ }
+ 
++static inline void arch_tlbbatch_add_mm(struct arch_tlbflush_unmap_batch *batch,
++                                      struct mm_struct *mm)
++{
++      cpumask_or(&batch->cpumask, &batch->cpumask, mm_cpumask(mm));
++}
++
++extern void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch);
++
+ #endif        /* SMP */
+ 
+ #ifndef CONFIG_PARAVIRT
+--- a/arch/x86/mm/tlb.c
++++ b/arch/x86/mm/tlb.c
+@@ -410,6 +410,23 @@ void flush_tlb_kernel_range(unsigned lon
+       }
+ }
+ 
++void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch)
++{
++      int cpu = get_cpu();
++
++      if (cpumask_test_cpu(cpu, &batch->cpumask)) {
++              count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
++              local_flush_tlb();
++              trace_tlb_flush(TLB_LOCAL_SHOOTDOWN, TLB_FLUSH_ALL);
++      }
++
++      if (cpumask_any_but(&batch->cpumask, cpu) < nr_cpu_ids)
++              flush_tlb_others(&batch->cpumask, NULL, 0, TLB_FLUSH_ALL);
++      cpumask_clear(&batch->cpumask);
++
++      put_cpu();
++}
++
+ static ssize_t tlbflush_read_file(struct file *file, char __user *user_buf,
+                            size_t count, loff_t *ppos)
+ {
+--- a/include/linux/sched.h
++++ b/include/linux/sched.h
+@@ -1463,15 +1463,22 @@ enum perf_event_task_context {
+       perf_nr_task_contexts,
+ };
+ 
++#ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
++#include <asm/tlbbatch.h>
++#endif
++
+ /* Track pages that require TLB flushes */
+ struct tlbflush_unmap_batch {
+       /*
+-       * Each bit set is a CPU that potentially has a TLB entry for one of
+-       * the PFNs being flushed. See set_tlb_ubc_flush_pending().
++       * The arch code makes the following promise: generic code can modify a
++       * PTE, then call arch_tlbbatch_add_mm() (which internally provides all
++       * needed barriers), then call arch_tlbbatch_flush(), and the entries
++       * will be flushed on all CPUs by the time that arch_tlbbatch_flush()
++       * returns.
+        */
+-      struct cpumask cpumask;
++      struct arch_tlbflush_unmap_batch arch;
+ 
+-      /* True if any bit in cpumask is set */
++      /* True if a flush is needed. */
+       bool flush_required;
+ 
+       /*
+--- a/mm/rmap.c
++++ b/mm/rmap.c
+@@ -578,25 +578,13 @@ void page_unlock_anon_vma_read(struct an
+ void try_to_unmap_flush(void)
+ {
+       struct tlbflush_unmap_batch *tlb_ubc = &current->tlb_ubc;
+-      int cpu;
+ 
+       if (!tlb_ubc->flush_required)
+               return;
+ 
+-      cpu = get_cpu();
+-
+-      if (cpumask_test_cpu(cpu, &tlb_ubc->cpumask)) {
+-              count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
+-              local_flush_tlb();
+-              trace_tlb_flush(TLB_LOCAL_SHOOTDOWN, TLB_FLUSH_ALL);
+-      }
+-
+-      if (cpumask_any_but(&tlb_ubc->cpumask, cpu) < nr_cpu_ids)
+-              flush_tlb_others(&tlb_ubc->cpumask, NULL, 0, TLB_FLUSH_ALL);
+-      cpumask_clear(&tlb_ubc->cpumask);
++      arch_tlbbatch_flush(&tlb_ubc->arch);
+       tlb_ubc->flush_required = false;
+       tlb_ubc->writable = false;
+-      put_cpu();
+ }
+ 
+ /* Flush iff there are potentially writable TLB entries that can race with IO */
+@@ -613,7 +601,7 @@ static void set_tlb_ubc_flush_pending(st
+ {
+       struct tlbflush_unmap_batch *tlb_ubc = &current->tlb_ubc;
+ 
+-      cpumask_or(&tlb_ubc->cpumask, &tlb_ubc->cpumask, mm_cpumask(mm));
++      arch_tlbbatch_add_mm(&tlb_ubc->arch, mm);
+       tlb_ubc->flush_required = true;
+ 
+       /*
diff --git a/queue-4.9/nbd-set-queue-timeout-properly.patch b/queue-4.9/nbd-set-queue-timeout-properly.patch

new file mode 100644 (file)

index 0000000..eaa1977
--- /dev/null
+++ b/queue-4.9/nbd-set-queue-timeout-properly.patch
@@ -0,0 +1,35 @@
+From foo@baz Thu Dec 21 09:02:40 CET 2017
+From: Josef Bacik <jbacik@fb.com>
+Date: Fri, 24 Mar 2017 14:08:28 -0400
+Subject: nbd: set queue timeout properly
+
+From: Josef Bacik <jbacik@fb.com>
+
+
+[ Upstream commit f8586855031a1d6b243f013c3082631346fddfad ]
+
+We can't just set the timeout on the tagset, we have to set it on the
+queue as it would have been setup already at this point.
+
+Signed-off-by: Josef Bacik <jbacik@fb.com>
+Signed-off-by: Jens Axboe <axboe@fb.com>
+Signed-off-by: Sasha Levin <alexander.levin@verizon.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/block/nbd.c |    5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+--- a/drivers/block/nbd.c
++++ b/drivers/block/nbd.c
+@@ -654,7 +654,10 @@ static int __nbd_ioctl(struct block_devi
+               return nbd_size_set(nbd, bdev, nbd->blksize, arg);
+ 
+       case NBD_SET_TIMEOUT:
+-              nbd->tag_set.timeout = arg * HZ;
++              if (arg) {
++                      nbd->tag_set.timeout = arg * HZ;
++                      blk_queue_rq_timeout(nbd->disk->queue, arg * HZ);
++              }
+               return 0;
+ 
+       case NBD_SET_FLAGS:
diff --git a/queue-4.9/net-do-not-allow-negative-values-for-busy_read-and-busy_poll-sysctl-interfaces.patch b/queue-4.9/net-do-not-allow-negative-values-for-busy_read-and-busy_poll-sysctl-interfaces.patch

new file mode 100644 (file)

index 0000000..01e4b8c
--- /dev/null
+++ b/queue-4.9/net-do-not-allow-negative-values-for-busy_read-and-busy_poll-sysctl-interfaces.patch
@@ -0,0 +1,51 @@
+From foo@baz Thu Dec 21 09:02:40 CET 2017
+From: Alexander Duyck <alexander.h.duyck@intel.com>
+Date: Fri, 24 Mar 2017 09:38:03 -0700
+Subject: net: Do not allow negative values for busy_read and busy_poll sysctl interfaces
+
+From: Alexander Duyck <alexander.h.duyck@intel.com>
+
+
+[ Upstream commit 95f255211396958c718aef8c45e3923b5211ea7b ]
+
+This change basically codifies what I think was already the limitations on
+the busy_poll and busy_read sysctl interfaces.  We weren't checking the
+lower bounds and as such could input negative values. The behavior when
+that was used was dependent on the architecture. In order to prevent any
+issues with that I am just disabling support for values less than 0 since
+this way we don't have to worry about any odd behaviors.
+
+By limiting the sysctl values this way it also makes it consistent with how
+we handle the SO_BUSY_POLL socket option since the value appears to be
+reported as a signed integer value and negative values are rejected.
+
+Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
+Acked-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <alexander.levin@verizon.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/sysctl_net_core.c |    6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+--- a/net/core/sysctl_net_core.c
++++ b/net/core/sysctl_net_core.c
+@@ -369,14 +369,16 @@ static struct ctl_table net_core_table[]
+               .data           = &sysctl_net_busy_poll,
+               .maxlen         = sizeof(unsigned int),
+               .mode           = 0644,
+-              .proc_handler   = proc_dointvec
++              .proc_handler   = proc_dointvec_minmax,
++              .extra1         = &zero,
+       },
+       {
+               .procname       = "busy_read",
+               .data           = &sysctl_net_busy_read,
+               .maxlen         = sizeof(unsigned int),
+               .mode           = 0644,
+-              .proc_handler   = proc_dointvec
++              .proc_handler   = proc_dointvec_minmax,
++              .extra1         = &zero,
+       },
+ #endif
+ #ifdef CONFIG_NET_SCHED
diff --git a/queue-4.9/net-ipconfig-fix-ic_close_devs-use-after-free.patch b/queue-4.9/net-ipconfig-fix-ic_close_devs-use-after-free.patch

new file mode 100644 (file)

index 0000000..7b256a6
--- /dev/null
+++ b/queue-4.9/net-ipconfig-fix-ic_close_devs-use-after-free.patch
@@ -0,0 +1,99 @@
+From foo@baz Thu Dec 21 09:02:40 CET 2017
+From: Mark Rutland <mark.rutland@arm.com>
+Date: Mon, 27 Mar 2017 18:00:14 +0100
+Subject: net: ipconfig: fix ic_close_devs() use-after-free
+
+From: Mark Rutland <mark.rutland@arm.com>
+
+
+[ Upstream commit ffefb6f4d6ad699a2b5484241bc46745a53235d0 ]
+
+Our chosen ic_dev may be anywhere in our list of ic_devs, and we may
+free it before attempting to close others. When we compare d->dev and
+ic_dev->dev, we're potentially dereferencing memory returned to the
+allocator. This causes KASAN to scream for each subsequent ic_dev we
+check.
+
+As there's a 1-1 mapping between ic_devs and netdevs, we can instead
+compare d and ic_dev directly, which implicitly handles the !ic_dev
+case, and avoids the use-after-free. The ic_dev pointer may be stale,
+but we will not dereference it.
+
+Original splat:
+
+[    6.487446] ==================================================================
+[    6.494693] BUG: KASAN: use-after-free in ic_close_devs+0xc4/0x154 at addr ffff800367efa708
+[    6.503013] Read of size 8 by task swapper/0/1
+[    6.507452] CPU: 5 PID: 1 Comm: swapper/0 Not tainted 4.11.0-rc3-00002-gda42158 #8
+[    6.514993] Hardware name: AppliedMicro Mustang/Mustang, BIOS 3.05.05-beta_rc Jan 27 2016
+[    6.523138] Call trace:
+[    6.525590] [<ffff200008094778>] dump_backtrace+0x0/0x570
+[    6.530976] [<ffff200008094d08>] show_stack+0x20/0x30
+[    6.536017] [<ffff200008bee928>] dump_stack+0x120/0x188
+[    6.541231] [<ffff20000856d5e4>] kasan_object_err+0x24/0xa0
+[    6.546790] [<ffff20000856d924>] kasan_report_error+0x244/0x738
+[    6.552695] [<ffff20000856dfec>] __asan_report_load8_noabort+0x54/0x80
+[    6.559204] [<ffff20000aae86ac>] ic_close_devs+0xc4/0x154
+[    6.564590] [<ffff20000aaedbac>] ip_auto_config+0x2ed4/0x2f1c
+[    6.570321] [<ffff200008084b04>] do_one_initcall+0xcc/0x370
+[    6.575882] [<ffff20000aa31de8>] kernel_init_freeable+0x5f8/0x6c4
+[    6.581959] [<ffff20000a16df00>] kernel_init+0x18/0x190
+[    6.587171] [<ffff200008084710>] ret_from_fork+0x10/0x40
+[    6.592468] Object at ffff800367efa700, in cache kmalloc-128 size: 128
+[    6.598969] Allocated:
+[    6.601324] PID = 1
+[    6.603427]  save_stack_trace_tsk+0x0/0x418
+[    6.607603]  save_stack_trace+0x20/0x30
+[    6.611430]  kasan_kmalloc+0xd8/0x188
+[    6.615087]  ip_auto_config+0x8c4/0x2f1c
+[    6.619002]  do_one_initcall+0xcc/0x370
+[    6.622832]  kernel_init_freeable+0x5f8/0x6c4
+[    6.627178]  kernel_init+0x18/0x190
+[    6.630660]  ret_from_fork+0x10/0x40
+[    6.634223] Freed:
+[    6.636233] PID = 1
+[    6.638334]  save_stack_trace_tsk+0x0/0x418
+[    6.642510]  save_stack_trace+0x20/0x30
+[    6.646337]  kasan_slab_free+0x88/0x178
+[    6.650167]  kfree+0xb8/0x478
+[    6.653131]  ic_close_devs+0x130/0x154
+[    6.656875]  ip_auto_config+0x2ed4/0x2f1c
+[    6.660875]  do_one_initcall+0xcc/0x370
+[    6.664705]  kernel_init_freeable+0x5f8/0x6c4
+[    6.669051]  kernel_init+0x18/0x190
+[    6.672534]  ret_from_fork+0x10/0x40
+[    6.676098] Memory state around the buggy address:
+[    6.680880]  ffff800367efa600: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
+[    6.688078]  ffff800367efa680: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
+[    6.695276] >ffff800367efa700: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
+[    6.702469]                       ^
+[    6.705952]  ffff800367efa780: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
+[    6.713149]  ffff800367efa800: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
+[    6.720343] ==================================================================
+[    6.727536] Disabling lock debugging due to kernel taint
+
+Signed-off-by: Mark Rutland <mark.rutland@arm.com>
+Cc: Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
+Cc: David S. Miller <davem@davemloft.net>
+Cc: Hideaki YOSHIFUJI <yoshfuji@linux-ipv6.org>
+Cc: James Morris <jmorris@namei.org>
+Cc: Patrick McHardy <kaber@trash.net>
+Cc: netdev@vger.kernel.org
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <alexander.levin@verizon.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/ipconfig.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/ipv4/ipconfig.c
++++ b/net/ipv4/ipconfig.c
+@@ -306,7 +306,7 @@ static void __init ic_close_devs(void)
+       while ((d = next)) {
+               next = d->next;
+               dev = d->dev;
+-              if ((!ic_dev || dev != ic_dev->dev) && !netdev_uses_dsa(dev)) {
++              if (d != ic_dev && !netdev_uses_dsa(dev)) {
+                       pr_debug("IP-Config: Downing %s\n", dev->name);
+                       dev_change_flags(dev, d->flags);
+               }
diff --git a/queue-4.9/net-ipv6-send-ns-for-dad-when-link-operationally-up.patch b/queue-4.9/net-ipv6-send-ns-for-dad-when-link-operationally-up.patch

new file mode 100644 (file)

index 0000000..9e9e96e
--- /dev/null
+++ b/queue-4.9/net-ipv6-send-ns-for-dad-when-link-operationally-up.patch
@@ -0,0 +1,73 @@
+From foo@baz Thu Dec 21 09:02:40 CET 2017
+From: Mike Manning <mmanning@brocade.com>
+Date: Mon, 25 Sep 2017 22:01:36 +0100
+Subject: net: ipv6: send NS for DAD when link operationally up
+
+From: Mike Manning <mmanning@brocade.com>
+
+
+[ Upstream commit 1f372c7bfb23286d2bf4ce0423ab488e86b74bb2 ]
+
+The NS for DAD are sent on admin up as long as a valid qdisc is found.
+A race condition exists by which these packets will not egress the
+interface if the operational state of the lower device is not yet up.
+The solution is to delay DAD until the link is operationally up
+according to RFC2863. Rather than only doing this, follow the existing
+code checks by deferring IPv6 device initialization altogether. The fix
+allows DAD on devices like tunnels that are controlled by userspace
+control plane. The fix has no impact on regular deployments, but means
+that there is no IPv6 connectivity until the port has been opened in
+the case of port-based network access control, which should be
+desirable.
+
+Signed-off-by: Mike Manning <mmanning@brocade.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <alexander.levin@verizon.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/addrconf.c |   12 ++++++------
+ 1 file changed, 6 insertions(+), 6 deletions(-)
+
+--- a/net/ipv6/addrconf.c
++++ b/net/ipv6/addrconf.c
+@@ -286,10 +286,10 @@ static struct ipv6_devconf ipv6_devconf_
+       .keep_addr_on_down      = 0,
+ };
+ 
+-/* Check if a valid qdisc is available */
+-static inline bool addrconf_qdisc_ok(const struct net_device *dev)
++/* Check if link is ready: is it up and is a valid qdisc available */
++static inline bool addrconf_link_ready(const struct net_device *dev)
+ {
+-      return !qdisc_tx_is_noop(dev);
++      return netif_oper_up(dev) && !qdisc_tx_is_noop(dev);
+ }
+ 
+ static void addrconf_del_rs_timer(struct inet6_dev *idev)
+@@ -434,7 +434,7 @@ static struct inet6_dev *ipv6_add_dev(st
+ 
+       ndev->token = in6addr_any;
+ 
+-      if (netif_running(dev) && addrconf_qdisc_ok(dev))
++      if (netif_running(dev) && addrconf_link_ready(dev))
+               ndev->if_flags |= IF_READY;
+ 
+       ipv6_mc_init_dev(ndev);
+@@ -3368,7 +3368,7 @@ static int addrconf_notify(struct notifi
+                       /* restore routes for permanent addresses */
+                       addrconf_permanent_addr(dev);
+ 
+-                      if (!addrconf_qdisc_ok(dev)) {
++                      if (!addrconf_link_ready(dev)) {
+                               /* device is not ready yet. */
+                               pr_info("ADDRCONF(NETDEV_UP): %s: link is not ready\n",
+                                       dev->name);
+@@ -3383,7 +3383,7 @@ static int addrconf_notify(struct notifi
+                               run_pending = 1;
+                       }
+               } else if (event == NETDEV_CHANGE) {
+-                      if (!addrconf_qdisc_ok(dev)) {
++                      if (!addrconf_link_ready(dev)) {
+                               /* device is still not ready. */
+                               break;
+                       }
diff --git a/queue-4.9/net-moxa-fix-tx-overrun-memory-leak.patch b/queue-4.9/net-moxa-fix-tx-overrun-memory-leak.patch

new file mode 100644 (file)

index 0000000..1419d9d
--- /dev/null
+++ b/queue-4.9/net-moxa-fix-tx-overrun-memory-leak.patch
@@ -0,0 +1,98 @@
+From foo@baz Thu Dec 21 09:02:40 CET 2017
+From: Jonas Jensen <jonas.jensen@gmail.com>
+Date: Tue, 28 Mar 2017 12:12:38 +0200
+Subject: net: moxa: fix TX overrun memory leak
+
+From: Jonas Jensen <jonas.jensen@gmail.com>
+
+
+[ Upstream commit c2b341a620018d4eaeb0e85c16274ac4e5f153d4 ]
+
+moxart_mac_start_xmit() doesn't care where tx_tail is, tx_head can
+catch and pass tx_tail, which is bad because moxart_tx_finished()
+isn't guaranteed to catch up on freeing resources from tx_tail.
+
+Add a check in moxart_mac_start_xmit() stopping the queue at the
+end of the circular buffer. Also add a check in moxart_tx_finished()
+waking the queue if the buffer has TX_WAKE_THRESHOLD or more
+free descriptors.
+
+While we're at it, move spin_lock_irq() to happen before our
+descriptor pointer is assigned in moxart_mac_start_xmit().
+
+Addresses https://bugzilla.kernel.org/show_bug.cgi?id=99451
+
+Signed-off-by: Jonas Jensen <jonas.jensen@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <alexander.levin@verizon.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/moxa/moxart_ether.c |   20 ++++++++++++++++++--
+ drivers/net/ethernet/moxa/moxart_ether.h |    1 +
+ 2 files changed, 19 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/ethernet/moxa/moxart_ether.c
++++ b/drivers/net/ethernet/moxa/moxart_ether.c
+@@ -25,6 +25,7 @@
+ #include <linux/of_irq.h>
+ #include <linux/crc32.h>
+ #include <linux/crc32c.h>
++#include <linux/circ_buf.h>
+ 
+ #include "moxart_ether.h"
+ 
+@@ -278,6 +279,13 @@ rx_next:
+       return rx;
+ }
+ 
++static int moxart_tx_queue_space(struct net_device *ndev)
++{
++      struct moxart_mac_priv_t *priv = netdev_priv(ndev);
++
++      return CIRC_SPACE(priv->tx_head, priv->tx_tail, TX_DESC_NUM);
++}
++
+ static void moxart_tx_finished(struct net_device *ndev)
+ {
+       struct moxart_mac_priv_t *priv = netdev_priv(ndev);
+@@ -297,6 +305,9 @@ static void moxart_tx_finished(struct ne
+               tx_tail = TX_NEXT(tx_tail);
+       }
+       priv->tx_tail = tx_tail;
++      if (netif_queue_stopped(ndev) &&
++          moxart_tx_queue_space(ndev) >= TX_WAKE_THRESHOLD)
++              netif_wake_queue(ndev);
+ }
+ 
+ static irqreturn_t moxart_mac_interrupt(int irq, void *dev_id)
+@@ -324,13 +335,18 @@ static int moxart_mac_start_xmit(struct
+       struct moxart_mac_priv_t *priv = netdev_priv(ndev);
+       void *desc;
+       unsigned int len;
+-      unsigned int tx_head = priv->tx_head;
++      unsigned int tx_head;
+       u32 txdes1;
+       int ret = NETDEV_TX_BUSY;
+ 
++      spin_lock_irq(&priv->txlock);
++
++      tx_head = priv->tx_head;
+       desc = priv->tx_desc_base + (TX_REG_DESC_SIZE * tx_head);
+ 
+-      spin_lock_irq(&priv->txlock);
++      if (moxart_tx_queue_space(ndev) == 1)
++              netif_stop_queue(ndev);
++
+       if (moxart_desc_read(desc + TX_REG_OFFSET_DESC0) & TX_DESC0_DMA_OWN) {
+               net_dbg_ratelimited("no TX space for packet\n");
+               priv->stats.tx_dropped++;
+--- a/drivers/net/ethernet/moxa/moxart_ether.h
++++ b/drivers/net/ethernet/moxa/moxart_ether.h
+@@ -59,6 +59,7 @@
+ #define TX_NEXT(N)            (((N) + 1) & (TX_DESC_NUM_MASK))
+ #define TX_BUF_SIZE           1600
+ #define TX_BUF_SIZE_MAX               (TX_DESC1_BUF_SIZE_MASK+1)
++#define TX_WAKE_THRESHOLD     16
+ 
+ #define RX_DESC_NUM           64
+ #define RX_DESC_NUM_MASK      (RX_DESC_NUM-1)
diff --git a/queue-4.9/net-phy-at803x-change-error-to-einval-for-invalid-mac.patch b/queue-4.9/net-phy-at803x-change-error-to-einval-for-invalid-mac.patch

new file mode 100644 (file)

index 0000000..ecfe1a6
--- /dev/null
+++ b/queue-4.9/net-phy-at803x-change-error-to-einval-for-invalid-mac.patch
@@ -0,0 +1,32 @@
+From foo@baz Thu Dec 21 09:02:40 CET 2017
+From: Dan Murphy <dmurphy@ti.com>
+Date: Tue, 10 Oct 2017 12:42:56 -0500
+Subject: net: phy: at803x: Change error to EINVAL for invalid MAC
+
+From: Dan Murphy <dmurphy@ti.com>
+
+
+[ Upstream commit fc7556877d1748ac00958822a0a3bba1d4bd9e0d ]
+
+Change the return error code to EINVAL if the MAC
+address is not valid in the set_wol function.
+
+Signed-off-by: Dan Murphy <dmurphy@ti.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <alexander.levin@verizon.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/phy/at803x.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/phy/at803x.c
++++ b/drivers/net/phy/at803x.c
+@@ -166,7 +166,7 @@ static int at803x_set_wol(struct phy_dev
+               mac = (const u8 *) ndev->dev_addr;
+ 
+               if (!is_valid_ether_addr(mac))
+-                      return -EFAULT;
++                      return -EINVAL;
+ 
+               for (i = 0; i < 3; i++) {
+                       phy_write(phydev, AT803X_MMD_ACCESS_CONTROL,
diff --git a/queue-4.9/net-qmi_wwan-add-usb-ids-for-mdm6600-modem-on-motorola-droid-4.patch b/queue-4.9/net-qmi_wwan-add-usb-ids-for-mdm6600-modem-on-motorola-droid-4.patch

new file mode 100644 (file)

index 0000000..9ef9c6c
--- /dev/null
+++ b/queue-4.9/net-qmi_wwan-add-usb-ids-for-mdm6600-modem-on-motorola-droid-4.patch
@@ -0,0 +1,37 @@
+From foo@baz Thu Dec 21 09:02:40 CET 2017
+From: Tony Lindgren <tony@atomide.com>
+Date: Sun, 19 Mar 2017 09:19:57 -0700
+Subject: net: qmi_wwan: Add USB IDs for MDM6600 modem on Motorola Droid 4
+
+From: Tony Lindgren <tony@atomide.com>
+
+
+[ Upstream commit 4071898bf0f4d79ff353db327af2a15123272548 ]
+
+This gets qmicli working with the MDM6600 modem.
+
+Cc: Bjørn Mork <bjorn@mork.no>
+Reviewed-by: Sebastian Reichel <sre@kernel.org>
+Tested-by: Sebastian Reichel <sre@kernel.org>
+Signed-off-by: Tony Lindgren <tony@atomide.com>
+Acked-by: Bjørn Mork <bjorn@mork.no>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <alexander.levin@verizon.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/usb/qmi_wwan.c |    4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/drivers/net/usb/qmi_wwan.c
++++ b/drivers/net/usb/qmi_wwan.c
+@@ -582,6 +582,10 @@ static const struct usb_device_id produc
+               USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, USB_CLASS_VENDOR_SPEC, 0x01, 0x69),
+               .driver_info        = (unsigned long)&qmi_wwan_info,
+       },
++      {       /* Motorola Mapphone devices with MDM6600 */
++              USB_VENDOR_AND_INTERFACE_INFO(0x22b8, USB_CLASS_VENDOR_SPEC, 0xfb, 0xff),
++              .driver_info        = (unsigned long)&qmi_wwan_info,
++      },
+ 
+       /* 2. Combined interface devices matching on class+protocol */
+       {       /* Huawei E367 and possibly others in "Windows mode" */
diff --git a/queue-4.9/netfilter-nf_nat_snmp-fix-panic-when-snmp_trap_helper-fails-to-register.patch b/queue-4.9/netfilter-nf_nat_snmp-fix-panic-when-snmp_trap_helper-fails-to-register.patch

new file mode 100644 (file)

index 0000000..eeff052
--- /dev/null
+++ b/queue-4.9/netfilter-nf_nat_snmp-fix-panic-when-snmp_trap_helper-fails-to-register.patch
@@ -0,0 +1,65 @@
+From foo@baz Thu Dec 21 09:02:40 CET 2017
+From: Gao Feng <fgao@ikuai8.com>
+Date: Sat, 25 Mar 2017 18:24:36 +0800
+Subject: netfilter: nf_nat_snmp: Fix panic when snmp_trap_helper fails to register
+
+From: Gao Feng <fgao@ikuai8.com>
+
+
+[ Upstream commit 75c689dca98851d65ef5a27e5ce26b625b68751c ]
+
+In the commit 93557f53e1fb ("netfilter: nf_conntrack: nf_conntrack snmp
+helper"), the snmp_helper is replaced by nf_nat_snmp_hook. So the
+snmp_helper is never registered. But it still tries to unregister the
+snmp_helper, it could cause the panic.
+
+Now remove the useless snmp_helper and the unregister call in the
+error handler.
+
+Fixes: 93557f53e1fb ("netfilter: nf_conntrack: nf_conntrack snmp helper")
+Signed-off-by: Gao Feng <fgao@ikuai8.com>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Sasha Levin <alexander.levin@verizon.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/netfilter/nf_nat_snmp_basic.c |   19 +------------------
+ 1 file changed, 1 insertion(+), 18 deletions(-)
+
+--- a/net/ipv4/netfilter/nf_nat_snmp_basic.c
++++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c
+@@ -1260,16 +1260,6 @@ static const struct nf_conntrack_expect_
+       .timeout        = 180,
+ };
+ 
+-static struct nf_conntrack_helper snmp_helper __read_mostly = {
+-      .me                     = THIS_MODULE,
+-      .help                   = help,
+-      .expect_policy          = &snmp_exp_policy,
+-      .name                   = "snmp",
+-      .tuple.src.l3num        = AF_INET,
+-      .tuple.src.u.udp.port   = cpu_to_be16(SNMP_PORT),
+-      .tuple.dst.protonum     = IPPROTO_UDP,
+-};
+-
+ static struct nf_conntrack_helper snmp_trap_helper __read_mostly = {
+       .me                     = THIS_MODULE,
+       .help                   = help,
+@@ -1288,17 +1278,10 @@ static struct nf_conntrack_helper snmp_t
+ 
+ static int __init nf_nat_snmp_basic_init(void)
+ {
+-      int ret = 0;
+-
+       BUG_ON(nf_nat_snmp_hook != NULL);
+       RCU_INIT_POINTER(nf_nat_snmp_hook, help);
+ 
+-      ret = nf_conntrack_helper_register(&snmp_trap_helper);
+-      if (ret < 0) {
+-              nf_conntrack_helper_unregister(&snmp_helper);
+-              return ret;
+-      }
+-      return ret;
++      return nf_conntrack_helper_register(&snmp_trap_helper);
+ }
+ 
+ static void __exit nf_nat_snmp_basic_fini(void)
diff --git a/queue-4.9/netfilter-nfnetlink_queue-fix-secctx-memory-leak.patch b/queue-4.9/netfilter-nfnetlink_queue-fix-secctx-memory-leak.patch

new file mode 100644 (file)

index 0000000..0c377cd
--- /dev/null
+++ b/queue-4.9/netfilter-nfnetlink_queue-fix-secctx-memory-leak.patch
@@ -0,0 +1,60 @@
+From foo@baz Thu Dec 21 09:02:40 CET 2017
+From: Liping Zhang <zlpnobody@gmail.com>
+Date: Tue, 28 Mar 2017 22:59:25 +0800
+Subject: netfilter: nfnetlink_queue: fix secctx memory leak
+
+From: Liping Zhang <zlpnobody@gmail.com>
+
+
+[ Upstream commit 77c1c03c5b8ef28e55bb0aff29b1e006037ca645 ]
+
+We must call security_release_secctx to free the memory returned by
+security_secid_to_secctx, otherwise memory may be leaked forever.
+
+Fixes: ef493bd930ae ("netfilter: nfnetlink_queue: add security context information")
+Signed-off-by: Liping Zhang <zlpnobody@gmail.com>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Sasha Levin <alexander.levin@verizon.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/netfilter/nfnetlink_queue.c |    9 +++++++--
+ 1 file changed, 7 insertions(+), 2 deletions(-)
+
+--- a/net/netfilter/nfnetlink_queue.c
++++ b/net/netfilter/nfnetlink_queue.c
+@@ -443,7 +443,7 @@ nfqnl_build_packet_message(struct net *n
+       skb = alloc_skb(size, GFP_ATOMIC);
+       if (!skb) {
+               skb_tx_error(entskb);
+-              return NULL;
++              goto nlmsg_failure;
+       }
+ 
+       nlh = nlmsg_put(skb, 0, 0,
+@@ -452,7 +452,7 @@ nfqnl_build_packet_message(struct net *n
+       if (!nlh) {
+               skb_tx_error(entskb);
+               kfree_skb(skb);
+-              return NULL;
++              goto nlmsg_failure;
+       }
+       nfmsg = nlmsg_data(nlh);
+       nfmsg->nfgen_family = entry->state.pf;
+@@ -598,12 +598,17 @@ nfqnl_build_packet_message(struct net *n
+       }
+ 
+       nlh->nlmsg_len = skb->len;
++      if (seclen)
++              security_release_secctx(secdata, seclen);
+       return skb;
+ 
+ nla_put_failure:
+       skb_tx_error(entskb);
+       kfree_skb(skb);
+       net_err_ratelimited("nf_queue: error creating packet message\n");
++nlmsg_failure:
++      if (seclen)
++              security_release_secctx(secdata, seclen);
+       return NULL;
+ }
+ 
diff --git a/queue-4.9/netfilter-nfnl_cthelper-fix-a-race-when-walk-the-nf_ct_helper_hash-table.patch b/queue-4.9/netfilter-nfnl_cthelper-fix-a-race-when-walk-the-nf_ct_helper_hash-table.patch

new file mode 100644 (file)

index 0000000..2fd2caf
--- /dev/null
+++ b/queue-4.9/netfilter-nfnl_cthelper-fix-a-race-when-walk-the-nf_ct_helper_hash-table.patch
@@ -0,0 +1,326 @@
+From foo@baz Thu Dec 21 09:02:40 CET 2017
+From: Liping Zhang <zlpnobody@gmail.com>
+Date: Sat, 25 Mar 2017 12:09:15 +0800
+Subject: netfilter: nfnl_cthelper: fix a race when walk the nf_ct_helper_hash table
+
+From: Liping Zhang <zlpnobody@gmail.com>
+
+
+[ Upstream commit 83d90219a5df8d950855ce73229a97b63605c317 ]
+
+The nf_ct_helper_hash table is protected by nf_ct_helper_mutex, while
+nfct_helper operation is protected by nfnl_lock(NFNL_SUBSYS_CTHELPER).
+So it's possible that one CPU is walking the nf_ct_helper_hash for
+cthelper add/get/del, another cpu is doing nf_conntrack_helpers_unregister
+at the same time. This is dangrous, and may cause use after free error.
+
+Note, delete operation will flush all cthelpers added via nfnetlink, so
+using rcu to do protect is not easy.
+
+Now introduce a dummy list to record all the cthelpers added via
+nfnetlink, then we can walk the dummy list instead of walking the
+nf_ct_helper_hash. Also, keep nfnl_cthelper_dump_table unchanged, it
+may be invoked without nfnl_lock(NFNL_SUBSYS_CTHELPER) held.
+
+Signed-off-by: Liping Zhang <zlpnobody@gmail.com>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Sasha Levin <alexander.levin@verizon.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/netfilter/nfnetlink_cthelper.c |  185 +++++++++++++++++--------------------
+ 1 file changed, 85 insertions(+), 100 deletions(-)
+
+--- a/net/netfilter/nfnetlink_cthelper.c
++++ b/net/netfilter/nfnetlink_cthelper.c
+@@ -32,6 +32,13 @@ MODULE_LICENSE("GPL");
+ MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
+ MODULE_DESCRIPTION("nfnl_cthelper: User-space connection tracking helpers");
+ 
++struct nfnl_cthelper {
++      struct list_head                list;
++      struct nf_conntrack_helper      helper;
++};
++
++static LIST_HEAD(nfnl_cthelper_list);
++
+ static int
+ nfnl_userspace_cthelper(struct sk_buff *skb, unsigned int protoff,
+                       struct nf_conn *ct, enum ip_conntrack_info ctinfo)
+@@ -205,14 +212,16 @@ nfnl_cthelper_create(const struct nlattr
+                    struct nf_conntrack_tuple *tuple)
+ {
+       struct nf_conntrack_helper *helper;
++      struct nfnl_cthelper *nfcth;
+       int ret;
+ 
+       if (!tb[NFCTH_TUPLE] || !tb[NFCTH_POLICY] || !tb[NFCTH_PRIV_DATA_LEN])
+               return -EINVAL;
+ 
+-      helper = kzalloc(sizeof(struct nf_conntrack_helper), GFP_KERNEL);
+-      if (helper == NULL)
++      nfcth = kzalloc(sizeof(*nfcth), GFP_KERNEL);
++      if (nfcth == NULL)
+               return -ENOMEM;
++      helper = &nfcth->helper;
+ 
+       ret = nfnl_cthelper_parse_expect_policy(helper, tb[NFCTH_POLICY]);
+       if (ret < 0)
+@@ -249,11 +258,12 @@ nfnl_cthelper_create(const struct nlattr
+       if (ret < 0)
+               goto err2;
+ 
++      list_add_tail(&nfcth->list, &nfnl_cthelper_list);
+       return 0;
+ err2:
+       kfree(helper->expect_policy);
+ err1:
+-      kfree(helper);
++      kfree(nfcth);
+       return ret;
+ }
+ 
+@@ -379,7 +389,8 @@ static int nfnl_cthelper_new(struct net
+       const char *helper_name;
+       struct nf_conntrack_helper *cur, *helper = NULL;
+       struct nf_conntrack_tuple tuple;
+-      int ret = 0, i;
++      struct nfnl_cthelper *nlcth;
++      int ret = 0;
+ 
+       if (!tb[NFCTH_NAME] || !tb[NFCTH_TUPLE])
+               return -EINVAL;
+@@ -390,31 +401,22 @@ static int nfnl_cthelper_new(struct net
+       if (ret < 0)
+               return ret;
+ 
+-      rcu_read_lock();
+-      for (i = 0; i < nf_ct_helper_hsize && !helper; i++) {
+-              hlist_for_each_entry_rcu(cur, &nf_ct_helper_hash[i], hnode) {
++      list_for_each_entry(nlcth, &nfnl_cthelper_list, list) {
++              cur = &nlcth->helper;
+ 
+-                      /* skip non-userspace conntrack helpers. */
+-                      if (!(cur->flags & NF_CT_HELPER_F_USERSPACE))
+-                              continue;
++              if (strncmp(cur->name, helper_name, NF_CT_HELPER_NAME_LEN))
++                      continue;
+ 
+-                      if (strncmp(cur->name, helper_name,
+-                                      NF_CT_HELPER_NAME_LEN) != 0)
+-                              continue;
++              if ((tuple.src.l3num != cur->tuple.src.l3num ||
++                   tuple.dst.protonum != cur->tuple.dst.protonum))
++                      continue;
+ 
+-                      if ((tuple.src.l3num != cur->tuple.src.l3num ||
+-                           tuple.dst.protonum != cur->tuple.dst.protonum))
+-                              continue;
++              if (nlh->nlmsg_flags & NLM_F_EXCL)
++                      return -EEXIST;
+ 
+-                      if (nlh->nlmsg_flags & NLM_F_EXCL) {
+-                              ret = -EEXIST;
+-                              goto err;
+-                      }
+-                      helper = cur;
+-                      break;
+-              }
++              helper = cur;
++              break;
+       }
+-      rcu_read_unlock();
+ 
+       if (helper == NULL)
+               ret = nfnl_cthelper_create(tb, &tuple);
+@@ -422,9 +424,6 @@ static int nfnl_cthelper_new(struct net
+               ret = nfnl_cthelper_update(tb, helper);
+ 
+       return ret;
+-err:
+-      rcu_read_unlock();
+-      return ret;
+ }
+ 
+ static int
+@@ -588,11 +587,12 @@ static int nfnl_cthelper_get(struct net
+                            struct sk_buff *skb, const struct nlmsghdr *nlh,
+                            const struct nlattr * const tb[])
+ {
+-      int ret = -ENOENT, i;
++      int ret = -ENOENT;
+       struct nf_conntrack_helper *cur;
+       struct sk_buff *skb2;
+       char *helper_name = NULL;
+       struct nf_conntrack_tuple tuple;
++      struct nfnl_cthelper *nlcth;
+       bool tuple_set = false;
+ 
+       if (nlh->nlmsg_flags & NLM_F_DUMP) {
+@@ -613,45 +613,39 @@ static int nfnl_cthelper_get(struct net
+               tuple_set = true;
+       }
+ 
+-      for (i = 0; i < nf_ct_helper_hsize; i++) {
+-              hlist_for_each_entry_rcu(cur, &nf_ct_helper_hash[i], hnode) {
+-
+-                      /* skip non-userspace conntrack helpers. */
+-                      if (!(cur->flags & NF_CT_HELPER_F_USERSPACE))
+-                              continue;
+-
+-                      if (helper_name && strncmp(cur->name, helper_name,
+-                                              NF_CT_HELPER_NAME_LEN) != 0) {
+-                              continue;
+-                      }
+-                      if (tuple_set &&
+-                          (tuple.src.l3num != cur->tuple.src.l3num ||
+-                           tuple.dst.protonum != cur->tuple.dst.protonum))
+-                              continue;
+-
+-                      skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+-                      if (skb2 == NULL) {
+-                              ret = -ENOMEM;
+-                              break;
+-                      }
++      list_for_each_entry(nlcth, &nfnl_cthelper_list, list) {
++              cur = &nlcth->helper;
++              if (helper_name &&
++                  strncmp(cur->name, helper_name, NF_CT_HELPER_NAME_LEN))
++                      continue;
++
++              if (tuple_set &&
++                  (tuple.src.l3num != cur->tuple.src.l3num ||
++                   tuple.dst.protonum != cur->tuple.dst.protonum))
++                      continue;
++
++              skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
++              if (skb2 == NULL) {
++                      ret = -ENOMEM;
++                      break;
++              }
+ 
+-                      ret = nfnl_cthelper_fill_info(skb2, NETLINK_CB(skb).portid,
+-                                              nlh->nlmsg_seq,
+-                                              NFNL_MSG_TYPE(nlh->nlmsg_type),
+-                                              NFNL_MSG_CTHELPER_NEW, cur);
+-                      if (ret <= 0) {
+-                              kfree_skb(skb2);
+-                              break;
+-                      }
++              ret = nfnl_cthelper_fill_info(skb2, NETLINK_CB(skb).portid,
++                                            nlh->nlmsg_seq,
++                                            NFNL_MSG_TYPE(nlh->nlmsg_type),
++                                            NFNL_MSG_CTHELPER_NEW, cur);
++              if (ret <= 0) {
++                      kfree_skb(skb2);
++                      break;
++              }
+ 
+-                      ret = netlink_unicast(nfnl, skb2, NETLINK_CB(skb).portid,
+-                                              MSG_DONTWAIT);
+-                      if (ret > 0)
+-                              ret = 0;
++              ret = netlink_unicast(nfnl, skb2, NETLINK_CB(skb).portid,
++                                    MSG_DONTWAIT);
++              if (ret > 0)
++                      ret = 0;
+ 
+-                      /* this avoids a loop in nfnetlink. */
+-                      return ret == -EAGAIN ? -ENOBUFS : ret;
+-              }
++              /* this avoids a loop in nfnetlink. */
++              return ret == -EAGAIN ? -ENOBUFS : ret;
+       }
+       return ret;
+ }
+@@ -662,10 +656,10 @@ static int nfnl_cthelper_del(struct net
+ {
+       char *helper_name = NULL;
+       struct nf_conntrack_helper *cur;
+-      struct hlist_node *tmp;
+       struct nf_conntrack_tuple tuple;
+       bool tuple_set = false, found = false;
+-      int i, j = 0, ret;
++      struct nfnl_cthelper *nlcth, *n;
++      int j = 0, ret;
+ 
+       if (tb[NFCTH_NAME])
+               helper_name = nla_data(tb[NFCTH_NAME]);
+@@ -678,30 +672,27 @@ static int nfnl_cthelper_del(struct net
+               tuple_set = true;
+       }
+ 
+-      for (i = 0; i < nf_ct_helper_hsize; i++) {
+-              hlist_for_each_entry_safe(cur, tmp, &nf_ct_helper_hash[i],
+-                                                              hnode) {
+-                      /* skip non-userspace conntrack helpers. */
+-                      if (!(cur->flags & NF_CT_HELPER_F_USERSPACE))
+-                              continue;
+-
+-                      j++;
+-
+-                      if (helper_name && strncmp(cur->name, helper_name,
+-                                              NF_CT_HELPER_NAME_LEN) != 0) {
+-                              continue;
+-                      }
+-                      if (tuple_set &&
+-                          (tuple.src.l3num != cur->tuple.src.l3num ||
+-                           tuple.dst.protonum != cur->tuple.dst.protonum))
+-                              continue;
++      list_for_each_entry_safe(nlcth, n, &nfnl_cthelper_list, list) {
++              cur = &nlcth->helper;
++              j++;
++
++              if (helper_name &&
++                  strncmp(cur->name, helper_name, NF_CT_HELPER_NAME_LEN))
++                      continue;
++
++              if (tuple_set &&
++                  (tuple.src.l3num != cur->tuple.src.l3num ||
++                   tuple.dst.protonum != cur->tuple.dst.protonum))
++                      continue;
++
++              found = true;
++              nf_conntrack_helper_unregister(cur);
++              kfree(cur->expect_policy);
+ 
+-                      found = true;
+-                      nf_conntrack_helper_unregister(cur);
+-                      kfree(cur->expect_policy);
+-                      kfree(cur);
+-              }
++              list_del(&nlcth->list);
++              kfree(nlcth);
+       }
++
+       /* Make sure we return success if we flush and there is no helpers */
+       return (found || j == 0) ? 0 : -ENOENT;
+ }
+@@ -750,22 +741,16 @@ err_out:
+ static void __exit nfnl_cthelper_exit(void)
+ {
+       struct nf_conntrack_helper *cur;
+-      struct hlist_node *tmp;
+-      int i;
++      struct nfnl_cthelper *nlcth, *n;
+ 
+       nfnetlink_subsys_unregister(&nfnl_cthelper_subsys);
+ 
+-      for (i=0; i<nf_ct_helper_hsize; i++) {
+-              hlist_for_each_entry_safe(cur, tmp, &nf_ct_helper_hash[i],
+-                                                                      hnode) {
+-                      /* skip non-userspace conntrack helpers. */
+-                      if (!(cur->flags & NF_CT_HELPER_F_USERSPACE))
+-                              continue;
++      list_for_each_entry_safe(nlcth, n, &nfnl_cthelper_list, list) {
++              cur = &nlcth->helper;
+ 
+-                      nf_conntrack_helper_unregister(cur);
+-                      kfree(cur->expect_policy);
+-                      kfree(cur);
+-              }
++              nf_conntrack_helper_unregister(cur);
++              kfree(cur->expect_policy);
++              kfree(nlcth);
+       }
+ }
+ 
diff --git a/queue-4.9/netfilter-nfnl_cthelper-fix-memory-leak.patch b/queue-4.9/netfilter-nfnl_cthelper-fix-memory-leak.patch

new file mode 100644 (file)

index 0000000..3f75945
--- /dev/null
+++ b/queue-4.9/netfilter-nfnl_cthelper-fix-memory-leak.patch
@@ -0,0 +1,64 @@
+From foo@baz Thu Dec 21 09:02:40 CET 2017
+From: Jeffy Chen <jeffy.chen@rock-chips.com>
+Date: Tue, 21 Mar 2017 15:07:10 +0800
+Subject: netfilter: nfnl_cthelper: Fix memory leak
+
+From: Jeffy Chen <jeffy.chen@rock-chips.com>
+
+
+[ Upstream commit f83bf8da1135ca635aac8f062cad3f001fcf3a26 ]
+
+We have memory leaks of nf_conntrack_helper & expect_policy.
+
+Signed-off-by: Jeffy Chen <jeffy.chen@rock-chips.com>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Sasha Levin <alexander.levin@verizon.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/netfilter/nfnetlink_cthelper.c |   12 +++++++++---
+ 1 file changed, 9 insertions(+), 3 deletions(-)
+
+--- a/net/netfilter/nfnetlink_cthelper.c
++++ b/net/netfilter/nfnetlink_cthelper.c
+@@ -216,7 +216,7 @@ nfnl_cthelper_create(const struct nlattr
+ 
+       ret = nfnl_cthelper_parse_expect_policy(helper, tb[NFCTH_POLICY]);
+       if (ret < 0)
+-              goto err;
++              goto err1;
+ 
+       strncpy(helper->name, nla_data(tb[NFCTH_NAME]), NF_CT_HELPER_NAME_LEN);
+       helper->data_len = ntohl(nla_get_be32(tb[NFCTH_PRIV_DATA_LEN]));
+@@ -247,10 +247,12 @@ nfnl_cthelper_create(const struct nlattr
+ 
+       ret = nf_conntrack_helper_register(helper);
+       if (ret < 0)
+-              goto err;
++              goto err2;
+ 
+       return 0;
+-err:
++err2:
++      kfree(helper->expect_policy);
++err1:
+       kfree(helper);
+       return ret;
+ }
+@@ -696,6 +698,8 @@ static int nfnl_cthelper_del(struct net
+ 
+                       found = true;
+                       nf_conntrack_helper_unregister(cur);
++                      kfree(cur->expect_policy);
++                      kfree(cur);
+               }
+       }
+       /* Make sure we return success if we flush and there is no helpers */
+@@ -759,6 +763,8 @@ static void __exit nfnl_cthelper_exit(vo
+                               continue;
+ 
+                       nf_conntrack_helper_unregister(cur);
++                      kfree(cur->expect_policy);
++                      kfree(cur);
+               }
+       }
+ }
diff --git a/queue-4.9/netfilter-nfnl_cthelper-fix-runtime-expectation-policy-updates.patch b/queue-4.9/netfilter-nfnl_cthelper-fix-runtime-expectation-policy-updates.patch

new file mode 100644 (file)

index 0000000..2bdb494
--- /dev/null
+++ b/queue-4.9/netfilter-nfnl_cthelper-fix-runtime-expectation-policy-updates.patch
@@ -0,0 +1,123 @@
+From foo@baz Thu Dec 21 09:02:40 CET 2017
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+Date: Tue, 21 Mar 2017 13:32:37 +0100
+Subject: netfilter: nfnl_cthelper: fix runtime expectation policy updates
+
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+
+
+[ Upstream commit 2c422257550f123049552b39f7af6e3428a60f43 ]
+
+We only allow runtime updates of expectation policies for timeout and
+maximum number of expectations, otherwise reject the update.
+
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Acked-by: Liping Zhang <zlpnobody@gmail.com>
+Signed-off-by: Sasha Levin <alexander.levin@verizon.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/netfilter/nfnetlink_cthelper.c |   86 ++++++++++++++++++++++++++++++++++++-
+ 1 file changed, 84 insertions(+), 2 deletions(-)
+
+--- a/net/netfilter/nfnetlink_cthelper.c
++++ b/net/netfilter/nfnetlink_cthelper.c
+@@ -256,6 +256,89 @@ err:
+ }
+ 
+ static int
++nfnl_cthelper_update_policy_one(const struct nf_conntrack_expect_policy *policy,
++                              struct nf_conntrack_expect_policy *new_policy,
++                              const struct nlattr *attr)
++{
++      struct nlattr *tb[NFCTH_POLICY_MAX + 1];
++      int err;
++
++      err = nla_parse_nested(tb, NFCTH_POLICY_MAX, attr,
++                             nfnl_cthelper_expect_pol);
++      if (err < 0)
++              return err;
++
++      if (!tb[NFCTH_POLICY_NAME] ||
++          !tb[NFCTH_POLICY_EXPECT_MAX] ||
++          !tb[NFCTH_POLICY_EXPECT_TIMEOUT])
++              return -EINVAL;
++
++      if (nla_strcmp(tb[NFCTH_POLICY_NAME], policy->name))
++              return -EBUSY;
++
++      new_policy->max_expected =
++              ntohl(nla_get_be32(tb[NFCTH_POLICY_EXPECT_MAX]));
++      new_policy->timeout =
++              ntohl(nla_get_be32(tb[NFCTH_POLICY_EXPECT_TIMEOUT]));
++
++      return 0;
++}
++
++static int nfnl_cthelper_update_policy_all(struct nlattr *tb[],
++                                         struct nf_conntrack_helper *helper)
++{
++      struct nf_conntrack_expect_policy new_policy[helper->expect_class_max + 1];
++      struct nf_conntrack_expect_policy *policy;
++      int i, err;
++
++      /* Check first that all policy attributes are well-formed, so we don't
++       * leave things in inconsistent state on errors.
++       */
++      for (i = 0; i < helper->expect_class_max + 1; i++) {
++
++              if (!tb[NFCTH_POLICY_SET + i])
++                      return -EINVAL;
++
++              err = nfnl_cthelper_update_policy_one(&helper->expect_policy[i],
++                                                    &new_policy[i],
++                                                    tb[NFCTH_POLICY_SET + i]);
++              if (err < 0)
++                      return err;
++      }
++      /* Now we can safely update them. */
++      for (i = 0; i < helper->expect_class_max + 1; i++) {
++              policy = (struct nf_conntrack_expect_policy *)
++                              &helper->expect_policy[i];
++              policy->max_expected = new_policy->max_expected;
++              policy->timeout = new_policy->timeout;
++      }
++
++      return 0;
++}
++
++static int nfnl_cthelper_update_policy(struct nf_conntrack_helper *helper,
++                                     const struct nlattr *attr)
++{
++      struct nlattr *tb[NFCTH_POLICY_SET_MAX + 1];
++      unsigned int class_max;
++      int err;
++
++      err = nla_parse_nested(tb, NFCTH_POLICY_SET_MAX, attr,
++                             nfnl_cthelper_expect_policy_set);
++      if (err < 0)
++              return err;
++
++      if (!tb[NFCTH_POLICY_SET_NUM])
++              return -EINVAL;
++
++      class_max = ntohl(nla_get_be32(tb[NFCTH_POLICY_SET_NUM]));
++      if (helper->expect_class_max + 1 != class_max)
++              return -EBUSY;
++
++      return nfnl_cthelper_update_policy_all(tb, helper);
++}
++
++static int
+ nfnl_cthelper_update(const struct nlattr * const tb[],
+                    struct nf_conntrack_helper *helper)
+ {
+@@ -265,8 +348,7 @@ nfnl_cthelper_update(const struct nlattr
+               return -EBUSY;
+ 
+       if (tb[NFCTH_POLICY]) {
+-              ret = nfnl_cthelper_parse_expect_policy(helper,
+-                                                      tb[NFCTH_POLICY]);
++              ret = nfnl_cthelper_update_policy(helper, tb[NFCTH_POLICY]);
+               if (ret < 0)
+                       return ret;
+       }
diff --git a/queue-4.9/nvme-loop-handle-cpu-unplug-when-re-establishing-the-controller.patch b/queue-4.9/nvme-loop-handle-cpu-unplug-when-re-establishing-the-controller.patch

new file mode 100644 (file)

index 0000000..ec710bb
--- /dev/null
+++ b/queue-4.9/nvme-loop-handle-cpu-unplug-when-re-establishing-the-controller.patch
@@ -0,0 +1,182 @@
+From foo@baz Thu Dec 21 09:02:40 CET 2017
+From: Sagi Grimberg <sagi@grimberg.me>
+Date: Mon, 13 Mar 2017 13:27:51 +0200
+Subject: nvme-loop: handle cpu unplug when re-establishing the controller
+
+From: Sagi Grimberg <sagi@grimberg.me>
+
+
+[ Upstream commit 945dd5bacc8978439af276976b5dcbbd42333dbc ]
+
+If a cpu unplug event has occured, we need to take the minimum
+of the provided nr_io_queues and the number of online cpus,
+otherwise we won't be able to connect them as blk-mq mapping
+won't dispatch to those queues.
+
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
+Signed-off-by: Sasha Levin <alexander.levin@verizon.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/nvme/target/loop.c |   88 +++++++++++++++++++++++++--------------------
+ 1 file changed, 50 insertions(+), 38 deletions(-)
+
+--- a/drivers/nvme/target/loop.c
++++ b/drivers/nvme/target/loop.c
+@@ -223,8 +223,6 @@ static void nvme_loop_submit_async_event
+ static int nvme_loop_init_iod(struct nvme_loop_ctrl *ctrl,
+               struct nvme_loop_iod *iod, unsigned int queue_idx)
+ {
+-      BUG_ON(queue_idx >= ctrl->queue_count);
+-
+       iod->req.cmd = &iod->cmd;
+       iod->req.rsp = &iod->rsp;
+       iod->queue = &ctrl->queues[queue_idx];
+@@ -314,6 +312,43 @@ free_ctrl:
+       kfree(ctrl);
+ }
+ 
++static void nvme_loop_destroy_io_queues(struct nvme_loop_ctrl *ctrl)
++{
++      int i;
++
++      for (i = 1; i < ctrl->queue_count; i++)
++              nvmet_sq_destroy(&ctrl->queues[i].nvme_sq);
++}
++
++static int nvme_loop_init_io_queues(struct nvme_loop_ctrl *ctrl)
++{
++      struct nvmf_ctrl_options *opts = ctrl->ctrl.opts;
++      unsigned int nr_io_queues;
++      int ret, i;
++
++      nr_io_queues = min(opts->nr_io_queues, num_online_cpus());
++      ret = nvme_set_queue_count(&ctrl->ctrl, &nr_io_queues);
++      if (ret || !nr_io_queues)
++              return ret;
++
++      dev_info(ctrl->ctrl.device, "creating %d I/O queues.\n", nr_io_queues);
++
++      for (i = 1; i <= nr_io_queues; i++) {
++              ctrl->queues[i].ctrl = ctrl;
++              ret = nvmet_sq_init(&ctrl->queues[i].nvme_sq);
++              if (ret)
++                      goto out_destroy_queues;
++
++              ctrl->queue_count++;
++      }
++
++      return 0;
++
++out_destroy_queues:
++      nvme_loop_destroy_io_queues(ctrl);
++      return ret;
++}
++
+ static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl)
+ {
+       int error;
+@@ -385,17 +420,13 @@ out_free_sq:
+ 
+ static void nvme_loop_shutdown_ctrl(struct nvme_loop_ctrl *ctrl)
+ {
+-      int i;
+-
+       nvme_stop_keep_alive(&ctrl->ctrl);
+ 
+       if (ctrl->queue_count > 1) {
+               nvme_stop_queues(&ctrl->ctrl);
+               blk_mq_tagset_busy_iter(&ctrl->tag_set,
+                                       nvme_cancel_request, &ctrl->ctrl);
+-
+-              for (i = 1; i < ctrl->queue_count; i++)
+-                      nvmet_sq_destroy(&ctrl->queues[i].nvme_sq);
++              nvme_loop_destroy_io_queues(ctrl);
+       }
+ 
+       if (ctrl->ctrl.state == NVME_CTRL_LIVE)
+@@ -467,19 +498,14 @@ static void nvme_loop_reset_ctrl_work(st
+       if (ret)
+               goto out_disable;
+ 
+-      for (i = 1; i <= ctrl->ctrl.opts->nr_io_queues; i++) {
+-              ctrl->queues[i].ctrl = ctrl;
+-              ret = nvmet_sq_init(&ctrl->queues[i].nvme_sq);
+-              if (ret)
+-                      goto out_free_queues;
+-
+-              ctrl->queue_count++;
+-      }
++      ret = nvme_loop_init_io_queues(ctrl);
++      if (ret)
++              goto out_destroy_admin;
+ 
+-      for (i = 1; i <= ctrl->ctrl.opts->nr_io_queues; i++) {
++      for (i = 1; i < ctrl->queue_count; i++) {
+               ret = nvmf_connect_io_queue(&ctrl->ctrl, i);
+               if (ret)
+-                      goto out_free_queues;
++                      goto out_destroy_io;
+       }
+ 
+       changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
+@@ -492,9 +518,9 @@ static void nvme_loop_reset_ctrl_work(st
+ 
+       return;
+ 
+-out_free_queues:
+-      for (i = 1; i < ctrl->queue_count; i++)
+-              nvmet_sq_destroy(&ctrl->queues[i].nvme_sq);
++out_destroy_io:
++      nvme_loop_destroy_io_queues(ctrl);
++out_destroy_admin:
+       nvme_loop_destroy_admin_queue(ctrl);
+ out_disable:
+       dev_warn(ctrl->ctrl.device, "Removing after reset failure\n");
+@@ -533,25 +559,12 @@ static const struct nvme_ctrl_ops nvme_l
+ 
+ static int nvme_loop_create_io_queues(struct nvme_loop_ctrl *ctrl)
+ {
+-      struct nvmf_ctrl_options *opts = ctrl->ctrl.opts;
+       int ret, i;
+ 
+-      ret = nvme_set_queue_count(&ctrl->ctrl, &opts->nr_io_queues);
+-      if (ret || !opts->nr_io_queues)
++      ret = nvme_loop_init_io_queues(ctrl);
++      if (ret)
+               return ret;
+ 
+-      dev_info(ctrl->ctrl.device, "creating %d I/O queues.\n",
+-              opts->nr_io_queues);
+-
+-      for (i = 1; i <= opts->nr_io_queues; i++) {
+-              ctrl->queues[i].ctrl = ctrl;
+-              ret = nvmet_sq_init(&ctrl->queues[i].nvme_sq);
+-              if (ret)
+-                      goto out_destroy_queues;
+-
+-              ctrl->queue_count++;
+-      }
+-
+       memset(&ctrl->tag_set, 0, sizeof(ctrl->tag_set));
+       ctrl->tag_set.ops = &nvme_loop_mq_ops;
+       ctrl->tag_set.queue_depth = ctrl->ctrl.opts->queue_size;
+@@ -575,7 +588,7 @@ static int nvme_loop_create_io_queues(st
+               goto out_free_tagset;
+       }
+ 
+-      for (i = 1; i <= opts->nr_io_queues; i++) {
++      for (i = 1; i < ctrl->queue_count; i++) {
+               ret = nvmf_connect_io_queue(&ctrl->ctrl, i);
+               if (ret)
+                       goto out_cleanup_connect_q;
+@@ -588,8 +601,7 @@ out_cleanup_connect_q:
+ out_free_tagset:
+       blk_mq_free_tag_set(&ctrl->tag_set);
+ out_destroy_queues:
+-      for (i = 1; i < ctrl->queue_count; i++)
+-              nvmet_sq_destroy(&ctrl->queues[i].nvme_sq);
++      nvme_loop_destroy_io_queues(ctrl);
+       return ret;
+ }
+ 
diff --git a/queue-4.9/pci-aer-report-non-fatal-errors-only-to-the-affected-endpoint.patch b/queue-4.9/pci-aer-report-non-fatal-errors-only-to-the-affected-endpoint.patch

new file mode 100644 (file)

index 0000000..f3c8e55
--- /dev/null
+++ b/queue-4.9/pci-aer-report-non-fatal-errors-only-to-the-affected-endpoint.patch
@@ -0,0 +1,69 @@
+From foo@baz Thu Dec 21 09:02:40 CET 2017
+From: Gabriele Paoloni <gabriele.paoloni@huawei.com>
+Date: Thu, 28 Sep 2017 15:33:05 +0100
+Subject: PCI/AER: Report non-fatal errors only to the affected endpoint
+
+From: Gabriele Paoloni <gabriele.paoloni@huawei.com>
+
+
+[ Upstream commit 86acc790717fb60fb51ea3095084e331d8711c74 ]
+
+Previously, if an non-fatal error was reported by an endpoint, we
+called report_error_detected() for the endpoint, every sibling on the
+bus, and their descendents.  If any of them did not implement the
+.error_detected() method, do_recovery() failed, leaving all these
+devices unrecovered.
+
+For example, the system described in the bugzilla below has two devices:
+
+  0000:74:02.0 [19e5:a230] SAS controller, driver has .error_detected()
+  0000:74:03.0 [19e5:a235] SATA controller, driver lacks .error_detected()
+
+When a device such as 74:02.0 reported a non-fatal error, do_recovery()
+failed because 74:03.0 lacked an .error_detected() method.  But per PCIe
+r3.1, sec 6.2.2.2.2, such an error does not compromise the Link and
+does not affect 74:03.0:
+
+  Non-fatal errors are uncorrectable errors which cause a particular
+  transaction to be unreliable but the Link is otherwise fully functional.
+  Isolating Non-fatal from Fatal errors provides Requester/Receiver logic
+  in a device or system management software the opportunity to recover from
+  the error without resetting the components on the Link and disturbing
+  other transactions in progress.  Devices not associated with the
+  transaction in error are not impacted by the error.
+
+Report non-fatal errors only to the endpoint that reported them.  We really
+want to check for AER_NONFATAL here, but the current code structure doesn't
+allow that.  Looking for pci_channel_io_normal is the best we can do now.
+
+Link: https://bugzilla.kernel.org/show_bug.cgi?id=197055
+Fixes: 6c2b374d7485 ("PCI-Express AER implemetation: AER core and aerdriver")
+Signed-off-by: Gabriele Paoloni <gabriele.paoloni@huawei.com>
+Signed-off-by: Dongdong Liu <liudongdong3@huawei.com>
+[bhelgaas: changelog]
+Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
+
+Signed-off-by: Sasha Levin <alexander.levin@verizon.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/pci/pcie/aer/aerdrv_core.c |    9 ++++++++-
+ 1 file changed, 8 insertions(+), 1 deletion(-)
+
+--- a/drivers/pci/pcie/aer/aerdrv_core.c
++++ b/drivers/pci/pcie/aer/aerdrv_core.c
+@@ -390,7 +390,14 @@ static pci_ers_result_t broadcast_error_
+                * If the error is reported by an end point, we think this
+                * error is related to the upstream link of the end point.
+                */
+-              pci_walk_bus(dev->bus, cb, &result_data);
++              if (state == pci_channel_io_normal)
++                      /*
++                       * the error is non fatal so the bus is ok, just invoke
++                       * the callback for the function that logged the error.
++                       */
++                      cb(dev, &result_data);
++              else
++                      pci_walk_bus(dev->bus, cb, &result_data);
+       }
+ 
+       return result_data.result;
diff --git a/queue-4.9/pci-avoid-bus-reset-if-bridge-itself-is-broken.patch b/queue-4.9/pci-avoid-bus-reset-if-bridge-itself-is-broken.patch

new file mode 100644 (file)

index 0000000..0c96e48
--- /dev/null
+++ b/queue-4.9/pci-avoid-bus-reset-if-bridge-itself-is-broken.patch
@@ -0,0 +1,47 @@
+From foo@baz Thu Dec 21 09:02:40 CET 2017
+From: David Daney <david.daney@cavium.com>
+Date: Fri, 8 Sep 2017 10:10:31 +0200
+Subject: PCI: Avoid bus reset if bridge itself is broken
+
+From: David Daney <david.daney@cavium.com>
+
+
+[ Upstream commit 357027786f3523d26f42391aa4c075b8495e5d28 ]
+
+When checking to see if a PCI bus can safely be reset, we previously
+checked to see if any of the children had their PCI_DEV_FLAGS_NO_BUS_RESET
+flag set.  Children marked with that flag are known not to behave well
+after a bus reset.
+
+Some PCIe root port bridges also do not behave well after a bus reset,
+sometimes causing the devices behind the bridge to become unusable.
+
+Add a check for PCI_DEV_FLAGS_NO_BUS_RESET being set in the bridge device
+to allow these bridges to be flagged, and prevent their secondary buses
+from being reset.
+
+Signed-off-by: David Daney <david.daney@cavium.com>
+[jglauber@cavium.com: fixed typo]
+Signed-off-by: Jan Glauber <jglauber@cavium.com>
+Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
+Reviewed-by: Alex Williamson <alex.williamson@redhat.com>
+
+Signed-off-by: Sasha Levin <alexander.levin@verizon.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/pci/pci.c |    4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/drivers/pci/pci.c
++++ b/drivers/pci/pci.c
+@@ -4214,6 +4214,10 @@ static bool pci_bus_resetable(struct pci
+ {
+       struct pci_dev *dev;
+ 
++
++      if (bus->self && (bus->self->dev_flags & PCI_DEV_FLAGS_NO_BUS_RESET))
++              return false;
++
+       list_for_each_entry(dev, &bus->devices, bus_list) {
+               if (dev->dev_flags & PCI_DEV_FLAGS_NO_BUS_RESET ||
+                   (dev->subordinate && !pci_bus_resetable(dev->subordinate)))
diff --git a/queue-4.9/pci-create-sr-iov-virtfn-physfn-links-before-attaching-driver.patch b/queue-4.9/pci-create-sr-iov-virtfn-physfn-links-before-attaching-driver.patch

new file mode 100644 (file)

index 0000000..06b7038
--- /dev/null
+++ b/queue-4.9/pci-create-sr-iov-virtfn-physfn-links-before-attaching-driver.patch
@@ -0,0 +1,44 @@
+From foo@baz Thu Dec 21 09:02:40 CET 2017
+From: Stuart Hayes <stuart.w.hayes@gmail.com>
+Date: Wed, 4 Oct 2017 10:57:52 -0500
+Subject: PCI: Create SR-IOV virtfn/physfn links before attaching driver
+
+From: Stuart Hayes <stuart.w.hayes@gmail.com>
+
+
+[ Upstream commit 27d6162944b9b34c32cd5841acd21786637ee743 ]
+
+When creating virtual functions, create the "virtfn%u" and "physfn" links
+in sysfs *before* attaching the driver instead of after.  When we attach
+the driver to the new virtual network interface first, there is a race when
+the driver attaches to the new sends out an "add" udev event, and the
+network interface naming software (biosdevname or systemd, for example)
+tries to look at these links.
+
+Signed-off-by: Stuart Hayes <stuart.w.hayes@gmail.com>
+Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
+Signed-off-by: Sasha Levin <alexander.levin@verizon.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/pci/iov.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/drivers/pci/iov.c
++++ b/drivers/pci/iov.c
+@@ -164,7 +164,6 @@ int pci_iov_add_virtfn(struct pci_dev *d
+       pci_device_add(virtfn, virtfn->bus);
+       mutex_unlock(&iov->dev->sriov->lock);
+ 
+-      pci_bus_add_device(virtfn);
+       sprintf(buf, "virtfn%u", id);
+       rc = sysfs_create_link(&dev->dev.kobj, &virtfn->dev.kobj, buf);
+       if (rc)
+@@ -175,6 +174,8 @@ int pci_iov_add_virtfn(struct pci_dev *d
+ 
+       kobject_uevent(&virtfn->dev.kobj, KOBJ_CHANGE);
+ 
++      pci_bus_add_device(virtfn);
++
+       return 0;
+ 
+ failed2:
diff --git a/queue-4.9/percpu-don-t-forget-to-free-the-temporary-struct-pcpu_alloc_info.patch b/queue-4.9/percpu-don-t-forget-to-free-the-temporary-struct-pcpu_alloc_info.patch

new file mode 100644 (file)

index 0000000..9ac0b2e
--- /dev/null
+++ b/queue-4.9/percpu-don-t-forget-to-free-the-temporary-struct-pcpu_alloc_info.patch
@@ -0,0 +1,43 @@
+From foo@baz Thu Dec 21 09:02:40 CET 2017
+From: Nicolas Pitre <nicolas.pitre@linaro.org>
+Date: Tue, 3 Oct 2017 18:29:49 -0400
+Subject: percpu: don't forget to free the temporary struct pcpu_alloc_info
+
+From: Nicolas Pitre <nicolas.pitre@linaro.org>
+
+
+[ Upstream commit 438a50618095061920d3a30d4c5ca1ef2e0ff860 ]
+
+Unlike the SMP case, the !SMP case does not free the memory for struct
+pcpu_alloc_info allocated in setup_per_cpu_areas(). And to give it a
+chance of being reused by the page allocator later, align it to a page
+boundary just like its size.
+
+Signed-off-by: Nicolas Pitre <nico@linaro.org>
+Acked-by: Dennis Zhou <dennisszhou@gmail.com>
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Signed-off-by: Sasha Levin <alexander.levin@verizon.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/percpu.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/mm/percpu.c
++++ b/mm/percpu.c
+@@ -1400,7 +1400,7 @@ struct pcpu_alloc_info * __init pcpu_all
+                         __alignof__(ai->groups[0].cpu_map[0]));
+       ai_size = base_size + nr_units * sizeof(ai->groups[0].cpu_map[0]);
+ 
+-      ptr = memblock_virt_alloc_nopanic(PFN_ALIGN(ai_size), 0);
++      ptr = memblock_virt_alloc_nopanic(PFN_ALIGN(ai_size), PAGE_SIZE);
+       if (!ptr)
+               return NULL;
+       ai = ptr;
+@@ -2264,6 +2264,7 @@ void __init setup_per_cpu_areas(void)
+ 
+       if (pcpu_setup_first_chunk(ai, fc) < 0)
+               panic("Failed to initialize percpu areas.");
++      pcpu_free_alloc_info(ai);
+ }
+ 
+ #endif        /* CONFIG_SMP */
diff --git a/queue-4.9/pinctrl-st-add-irq_request-release_resources-callbacks.patch b/queue-4.9/pinctrl-st-add-irq_request-release_resources-callbacks.patch

new file mode 100644 (file)

index 0000000..b6fd400
--- /dev/null
+++ b/queue-4.9/pinctrl-st-add-irq_request-release_resources-callbacks.patch
@@ -0,0 +1,75 @@
+From foo@baz Thu Dec 21 09:02:40 CET 2017
+From: Patrice Chotard <patrice.chotard@st.com>
+Date: Thu, 16 Mar 2017 18:26:02 +0100
+Subject: pinctrl: st: add irq_request/release_resources callbacks
+
+From: Patrice Chotard <patrice.chotard@st.com>
+
+
+[ Upstream commit e855fa9a65c40788b5069abb0d094537daa22e05 ]
+
+When using GPIO as IRQ source, the GPIO must be configured
+in INPUT. Callbacks dedicated for this was missing in
+pinctrl-st driver.
+
+This fix the following kernel error when trying to lock a gpio
+as IRQ:
+
+[    7.521095] gpio gpiochip7: (PIO11): gpiochip_lock_as_irq: tried to flag a GPIO set as output for IRQ
+[    7.526018] gpio gpiochip7: (PIO11): unable to lock HW IRQ 6 for IRQ
+[    7.529405] genirq: Failed to request resources for 0-0053 (irq 81) on irqchip GPIO
+
+Signed-off-by: Patrice Chotard <patrice.chotard@st.com>
+Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
+Signed-off-by: Sasha Levin <alexander.levin@verizon.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/pinctrl/pinctrl-st.c |   30 ++++++++++++++++++++++++------
+ 1 file changed, 24 insertions(+), 6 deletions(-)
+
+--- a/drivers/pinctrl/pinctrl-st.c
++++ b/drivers/pinctrl/pinctrl-st.c
+@@ -1285,6 +1285,22 @@ static void st_gpio_irq_unmask(struct ir
+       writel(BIT(d->hwirq), bank->base + REG_PIO_SET_PMASK);
+ }
+ 
++static int st_gpio_irq_request_resources(struct irq_data *d)
++{
++      struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
++
++      st_gpio_direction_input(gc, d->hwirq);
++
++      return gpiochip_lock_as_irq(gc, d->hwirq);
++}
++
++static void st_gpio_irq_release_resources(struct irq_data *d)
++{
++      struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
++
++      gpiochip_unlock_as_irq(gc, d->hwirq);
++}
++
+ static int st_gpio_irq_set_type(struct irq_data *d, unsigned type)
+ {
+       struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
+@@ -1438,12 +1454,14 @@ static struct gpio_chip st_gpio_template
+ };
+ 
+ static struct irq_chip st_gpio_irqchip = {
+-      .name           = "GPIO",
+-      .irq_disable    = st_gpio_irq_mask,
+-      .irq_mask       = st_gpio_irq_mask,
+-      .irq_unmask     = st_gpio_irq_unmask,
+-      .irq_set_type   = st_gpio_irq_set_type,
+-      .flags          = IRQCHIP_SKIP_SET_WAKE,
++      .name                   = "GPIO",
++      .irq_request_resources  = st_gpio_irq_request_resources,
++      .irq_release_resources  = st_gpio_irq_release_resources,
++      .irq_disable            = st_gpio_irq_mask,
++      .irq_mask               = st_gpio_irq_mask,
++      .irq_unmask             = st_gpio_irq_unmask,
++      .irq_set_type           = st_gpio_irq_set_type,
++      .flags                  = IRQCHIP_SKIP_SET_WAKE,
+ };
+ 
+ static int st_gpiolib_register_bank(struct st_pinctrl *info,
diff --git a/queue-4.9/pm-opp-move-error-message-to-debug-level.patch b/queue-4.9/pm-opp-move-error-message-to-debug-level.patch

new file mode 100644 (file)

index 0000000..6d8ed68
--- /dev/null
+++ b/queue-4.9/pm-opp-move-error-message-to-debug-level.patch
@@ -0,0 +1,46 @@
+From foo@baz Thu Dec 21 09:02:40 CET 2017
+From: Fabio Estevam <fabio.estevam@nxp.com>
+Date: Fri, 29 Sep 2017 14:39:49 -0300
+Subject: PM / OPP: Move error message to debug level
+
+From: Fabio Estevam <fabio.estevam@nxp.com>
+
+
+[ Upstream commit 035ed07208dc501d023873447113f3f178592156 ]
+
+On some i.MX6 platforms which do not have speed grading
+check, opp table will not be created in platform code,
+so cpufreq driver prints the following error message:
+
+cpu cpu0: dev_pm_opp_get_opp_count: OPP table not found (-19)
+
+However, this is not really an error in this case because the
+imx6q-cpufreq driver first calls dev_pm_opp_get_opp_count()
+and if it fails, it means that platform code does not provide
+OPP and then dev_pm_opp_of_add_table() will be called.
+
+In order to avoid such confusing error message, move it to
+debug level.
+
+It is up to the caller of dev_pm_opp_get_opp_count() to check its
+return value and decide if it will print an error or not.
+
+Signed-off-by: Fabio Estevam <fabio.estevam@nxp.com>
+Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+Signed-off-by: Sasha Levin <alexander.levin@verizon.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/base/power/opp/core.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/base/power/opp/core.c
++++ b/drivers/base/power/opp/core.c
+@@ -331,7 +331,7 @@ int dev_pm_opp_get_opp_count(struct devi
+       opp_table = _find_opp_table(dev);
+       if (IS_ERR(opp_table)) {
+               count = PTR_ERR(opp_table);
+-              dev_err(dev, "%s: OPP table not found (%d)\n",
++              dev_dbg(dev, "%s: OPP table not found (%d)\n",
+                       __func__, count);
+               goto out_unlock;
+       }
diff --git a/queue-4.9/r8152-fix-the-list-rx_done-may-be-used-without-initialization.patch b/queue-4.9/r8152-fix-the-list-rx_done-may-be-used-without-initialization.patch

new file mode 100644 (file)

index 0000000..45fb313
--- /dev/null
+++ b/queue-4.9/r8152-fix-the-list-rx_done-may-be-used-without-initialization.patch
@@ -0,0 +1,37 @@
+From foo@baz Thu Dec 21 09:02:40 CET 2017
+From: hayeswang <hayeswang@realtek.com>
+Date: Tue, 14 Mar 2017 14:15:20 +0800
+Subject: r8152: fix the list rx_done may be used without initialization
+
+From: hayeswang <hayeswang@realtek.com>
+
+
+[ Upstream commit 98d068ab52b4b11d403995ed14154660797e7136 ]
+
+The list rx_done would be initialized when the linking on occurs.
+Therefore, if a napi is scheduled without any linking on before,
+the following kernel panic would happen.
+
+       BUG: unable to handle kernel NULL pointer dereference at 000000000000008
+       IP: [<ffffffffc085efde>] r8152_poll+0xe1e/0x1210 [r8152]
+       PGD 0
+       Oops: 0002 [#1] SMP
+
+Signed-off-by: Hayes Wang <hayeswang@realtek.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <alexander.levin@verizon.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/usb/r8152.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/net/usb/r8152.c
++++ b/drivers/net/usb/r8152.c
+@@ -1362,6 +1362,7 @@ static int alloc_all_mem(struct r8152 *t
+       spin_lock_init(&tp->rx_lock);
+       spin_lock_init(&tp->tx_lock);
+       INIT_LIST_HEAD(&tp->tx_free);
++      INIT_LIST_HEAD(&tp->rx_done);
+       skb_queue_head_init(&tp->tx_queue);
+       skb_queue_head_init(&tp->rx_queue);
+ 
diff --git a/queue-4.9/r8152-fix-the-rx-early-size-of-rtl8153.patch b/queue-4.9/r8152-fix-the-rx-early-size-of-rtl8153.patch

new file mode 100644 (file)

index 0000000..64698f2
--- /dev/null
+++ b/queue-4.9/r8152-fix-the-rx-early-size-of-rtl8153.patch
@@ -0,0 +1,53 @@
+From foo@baz Thu Dec 21 09:02:40 CET 2017
+From: hayeswang <hayeswang@realtek.com>
+Date: Mon, 20 Mar 2017 16:13:45 +0800
+Subject: r8152: fix the rx early size of RTL8153
+
+From: hayeswang <hayeswang@realtek.com>
+
+
+[ Upstream commit b20cb60e2b865638459e6ec82ad3536d3734e555 ]
+
+revert commit a59e6d815226 ("r8152: correct the rx early size") and
+fix the rx early size as
+
+       (rx buffer size - rx packet size - rx desc size - alignment) / 4
+
+Signed-off-by: Hayes Wang <hayeswang@realtek.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <alexander.levin@verizon.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/usb/r8152.c |    7 ++++---
+ 1 file changed, 4 insertions(+), 3 deletions(-)
+
+--- a/drivers/net/usb/r8152.c
++++ b/drivers/net/usb/r8152.c
+@@ -32,7 +32,7 @@
+ #define NETNEXT_VERSION               "08"
+ 
+ /* Information for net */
+-#define NET_VERSION           "8"
++#define NET_VERSION           "9"
+ 
+ #define DRIVER_VERSION                "v1." NETNEXT_VERSION "." NET_VERSION
+ #define DRIVER_AUTHOR "Realtek linux nic maintainers <nic_swsd@realtek.com>"
+@@ -501,6 +501,8 @@ enum rtl_register_content {
+ #define RTL8153_RMS           RTL8153_MAX_PACKET
+ #define RTL8152_TX_TIMEOUT    (5 * HZ)
+ #define RTL8152_NAPI_WEIGHT   64
++#define rx_reserved_size(x)   ((x) + VLAN_ETH_HLEN + CRC_SIZE + \
++                               sizeof(struct rx_desc) + RX_ALIGN)
+ 
+ /* rtl8152 flags */
+ enum rtl8152_flags {
+@@ -2253,8 +2255,7 @@ static void r8153_set_rx_early_timeout(s
+ 
+ static void r8153_set_rx_early_size(struct r8152 *tp)
+ {
+-      u32 mtu = tp->netdev->mtu;
+-      u32 ocp_data = (agg_buf_sz - mtu - VLAN_ETH_HLEN - VLAN_HLEN) / 8;
++      u32 ocp_data = (agg_buf_sz - rx_reserved_size(tp->netdev->mtu)) / 4;
+ 
+       ocp_write_word(tp, MCU_TYPE_USB, USB_RX_EARLY_SIZE, ocp_data);
+ }
diff --git a/queue-4.9/r8152-prevent-the-driver-from-transmitting-packets-with-carrier-off.patch b/queue-4.9/r8152-prevent-the-driver-from-transmitting-packets-with-carrier-off.patch

new file mode 100644 (file)

index 0000000..7983645
--- /dev/null
+++ b/queue-4.9/r8152-prevent-the-driver-from-transmitting-packets-with-carrier-off.patch
@@ -0,0 +1,71 @@
+From foo@baz Thu Dec 21 09:02:40 CET 2017
+From: hayeswang <hayeswang@realtek.com>
+Date: Thu, 23 Mar 2017 19:14:19 +0800
+Subject: r8152: prevent the driver from transmitting packets with carrier off
+
+From: hayeswang <hayeswang@realtek.com>
+
+
+[ Upstream commit 2f25abe6bac573928a990ccbdac75873add8127e ]
+
+The linking status may be changed when autosuspend. And, after
+autoresume, the driver may try to transmit packets when the device
+is carrier off, because the interrupt transfer doesn't update the
+linking status, yet. And, if the device is in ALDPS mode, the device
+would stop working.
+
+The another similar case is
+ 1. unplug the cable.
+ 2. interrupt transfer queue a work_queue for linking change.
+ 3. device enters the ALDPS mode.
+ 4. a tx occurs before the work_queue is called.
+
+Signed-off-by: Hayes Wang <hayeswang@realtek.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <alexander.levin@verizon.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/usb/r8152.c |   18 ++++++++++++++++--
+ 1 file changed, 16 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/usb/r8152.c
++++ b/drivers/net/usb/r8152.c
+@@ -1294,6 +1294,7 @@ static void intr_callback(struct urb *ur
+               }
+       } else {
+               if (netif_carrier_ok(tp->netdev)) {
++                      netif_stop_queue(tp->netdev);
+                       set_bit(RTL8152_LINK_CHG, &tp->flags);
+                       schedule_delayed_work(&tp->schedule, 0);
+               }
+@@ -3167,6 +3168,9 @@ static void set_carrier(struct r8152 *tp
+                       napi_enable(&tp->napi);
+                       netif_wake_queue(netdev);
+                       netif_info(tp, link, netdev, "carrier on\n");
++              } else if (netif_queue_stopped(netdev) &&
++                         skb_queue_len(&tp->tx_queue) < tp->tx_qlen) {
++                      netif_wake_queue(netdev);
+               }
+       } else {
+               if (netif_carrier_ok(netdev)) {
+@@ -3700,8 +3704,18 @@ static int rtl8152_resume(struct usb_int
+                       tp->rtl_ops.autosuspend_en(tp, false);
+                       napi_disable(&tp->napi);
+                       set_bit(WORK_ENABLE, &tp->flags);
+-                      if (netif_carrier_ok(tp->netdev))
+-                              rtl_start_rx(tp);
++
++                      if (netif_carrier_ok(tp->netdev)) {
++                              if (rtl8152_get_speed(tp) & LINK_STATUS) {
++                                      rtl_start_rx(tp);
++                              } else {
++                                      netif_carrier_off(tp->netdev);
++                                      tp->rtl_ops.disable(tp);
++                                      netif_info(tp, link, tp->netdev,
++                                                 "linking down\n");
++                              }
++                      }
++
+                       napi_enable(&tp->napi);
+                       clear_bit(SELECTIVE_SUSPEND, &tp->flags);
+                       smp_mb__after_atomic();
diff --git a/queue-4.9/rdma-iser-fix-possible-mr-leak-on-device-removal-event.patch b/queue-4.9/rdma-iser-fix-possible-mr-leak-on-device-removal-event.patch

new file mode 100644 (file)

index 0000000..9dcd78c
--- /dev/null
+++ b/queue-4.9/rdma-iser-fix-possible-mr-leak-on-device-removal-event.patch
@@ -0,0 +1,83 @@
+From foo@baz Thu Dec 21 09:02:40 CET 2017
+From: Sagi Grimberg <sagi@grimberg.me>
+Date: Mon, 27 Feb 2017 20:16:33 +0200
+Subject: RDMA/iser: Fix possible mr leak on device removal event
+
+From: Sagi Grimberg <sagi@grimberg.me>
+
+
+[ Upstream commit ea174c9573b0e0c8bc1a7a90fe9360ccb7aa9cbb ]
+
+When the rdma device is removed, we must cleanup all
+the rdma resources within the DEVICE_REMOVAL event
+handler to let the device teardown gracefully. When
+this happens with live I/O, some memory regions are
+occupied. Thus, track them too and dereg all the mr's.
+
+We are safe with mr access by iscsi_iser_cleanup_task.
+
+Reported-by: Raju Rangoju <rajur@chelsio.com>
+Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
+Reviewed-by: Max Gurtovoy <maxg@mellanox.com>
+Reviewed-by: Max Gurtovoy <maxg@mellanox.com>
+Signed-off-by: Doug Ledford <dledford@redhat.com>
+Signed-off-by: Sasha Levin <alexander.levin@verizon.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/infiniband/ulp/iser/iscsi_iser.h |    2 ++
+ drivers/infiniband/ulp/iser/iser_verbs.c |    8 +++++---
+ 2 files changed, 7 insertions(+), 3 deletions(-)
+
+--- a/drivers/infiniband/ulp/iser/iscsi_iser.h
++++ b/drivers/infiniband/ulp/iser/iscsi_iser.h
+@@ -430,6 +430,7 @@ struct iser_fr_desc {
+       struct list_head                  list;
+       struct iser_reg_resources         rsc;
+       struct iser_pi_context           *pi_ctx;
++      struct list_head                  all_list;
+ };
+ 
+ /**
+@@ -443,6 +444,7 @@ struct iser_fr_pool {
+       struct list_head        list;
+       spinlock_t              lock;
+       int                     size;
++      struct list_head        all_list;
+ };
+ 
+ /**
+--- a/drivers/infiniband/ulp/iser/iser_verbs.c
++++ b/drivers/infiniband/ulp/iser/iser_verbs.c
+@@ -362,6 +362,7 @@ int iser_alloc_fastreg_pool(struct ib_co
+       int i, ret;
+ 
+       INIT_LIST_HEAD(&fr_pool->list);
++      INIT_LIST_HEAD(&fr_pool->all_list);
+       spin_lock_init(&fr_pool->lock);
+       fr_pool->size = 0;
+       for (i = 0; i < cmds_max; i++) {
+@@ -373,6 +374,7 @@ int iser_alloc_fastreg_pool(struct ib_co
+               }
+ 
+               list_add_tail(&desc->list, &fr_pool->list);
++              list_add_tail(&desc->all_list, &fr_pool->all_list);
+               fr_pool->size++;
+       }
+ 
+@@ -392,13 +394,13 @@ void iser_free_fastreg_pool(struct ib_co
+       struct iser_fr_desc *desc, *tmp;
+       int i = 0;
+ 
+-      if (list_empty(&fr_pool->list))
++      if (list_empty(&fr_pool->all_list))
+               return;
+ 
+       iser_info("freeing conn %p fr pool\n", ib_conn);
+ 
+-      list_for_each_entry_safe(desc, tmp, &fr_pool->list, list) {
+-              list_del(&desc->list);
++      list_for_each_entry_safe(desc, tmp, &fr_pool->all_list, all_list) {
++              list_del(&desc->all_list);
+               iser_free_reg_res(&desc->rsc);
+               if (desc->pi_ctx)
+                       iser_free_pi_ctx(desc->pi_ctx);
diff --git a/queue-4.9/rtc-pl031-make-interrupt-optional.patch b/queue-4.9/rtc-pl031-make-interrupt-optional.patch

new file mode 100644 (file)

index 0000000..c92c720
--- /dev/null
+++ b/queue-4.9/rtc-pl031-make-interrupt-optional.patch
@@ -0,0 +1,53 @@
+From foo@baz Thu Dec 21 09:02:40 CET 2017
+From: Russell King <rmk+kernel@armlinux.org.uk>
+Date: Fri, 29 Sep 2017 11:22:15 +0100
+Subject: rtc: pl031: make interrupt optional
+
+From: Russell King <rmk+kernel@armlinux.org.uk>
+
+
+[ Upstream commit 5b64a2965dfdfca8039e93303c64e2b15c19ff0c ]
+
+On some platforms, the interrupt for the PL031 is optional.  Avoid
+trying to claim the interrupt if it's not specified.
+
+Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
+Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
+Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
+Signed-off-by: Sasha Levin <alexander.levin@verizon.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/rtc/rtc-pl031.c |   14 ++++++++------
+ 1 file changed, 8 insertions(+), 6 deletions(-)
+
+--- a/drivers/rtc/rtc-pl031.c
++++ b/drivers/rtc/rtc-pl031.c
+@@ -308,7 +308,8 @@ static int pl031_remove(struct amba_devi
+ 
+       dev_pm_clear_wake_irq(&adev->dev);
+       device_init_wakeup(&adev->dev, false);
+-      free_irq(adev->irq[0], ldata);
++      if (adev->irq[0])
++              free_irq(adev->irq[0], ldata);
+       rtc_device_unregister(ldata->rtc);
+       iounmap(ldata->base);
+       kfree(ldata);
+@@ -381,12 +382,13 @@ static int pl031_probe(struct amba_devic
+               goto out_no_rtc;
+       }
+ 
+-      if (request_irq(adev->irq[0], pl031_interrupt,
+-                      vendor->irqflags, "rtc-pl031", ldata)) {
+-              ret = -EIO;
+-              goto out_no_irq;
++      if (adev->irq[0]) {
++              ret = request_irq(adev->irq[0], pl031_interrupt,
++                                vendor->irqflags, "rtc-pl031", ldata);
++              if (ret)
++                      goto out_no_irq;
++              dev_pm_set_wake_irq(&adev->dev, adev->irq[0]);
+       }
+-      dev_pm_set_wake_irq(&adev->dev, adev->irq[0]);
+       return 0;
+ 
+ out_no_irq:
diff --git a/queue-4.9/rtc-set-the-alarm-to-the-next-expiring-timer.patch b/queue-4.9/rtc-set-the-alarm-to-the-next-expiring-timer.patch

new file mode 100644 (file)

index 0000000..202887a
--- /dev/null
+++ b/queue-4.9/rtc-set-the-alarm-to-the-next-expiring-timer.patch
@@ -0,0 +1,35 @@
+From foo@baz Thu Dec 21 09:02:40 CET 2017
+From: Alexandre Belloni <alexandre.belloni@free-electrons.com>
+Date: Thu, 28 Sep 2017 13:53:27 +0200
+Subject: rtc: set the alarm to the next expiring timer
+
+From: Alexandre Belloni <alexandre.belloni@free-electrons.com>
+
+
+[ Upstream commit 74717b28cb32e1ad3c1042cafd76b264c8c0f68d ]
+
+If there is any non expired timer in the queue, the RTC alarm is never set.
+This is an issue when adding a timer that expires before the next non
+expired timer.
+
+Ensure the RTC alarm is set in that case.
+
+Fixes: 2b2f5ff00f63 ("rtc: interface: ignore expired timers when enqueuing new timers")
+Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
+Signed-off-by: Sasha Levin <alexander.levin@verizon.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/rtc/interface.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/rtc/interface.c
++++ b/drivers/rtc/interface.c
+@@ -772,7 +772,7 @@ static int rtc_timer_enqueue(struct rtc_
+       }
+ 
+       timerqueue_add(&rtc->timerqueue, &timer->node);
+-      if (!next) {
++      if (!next || ktime_before(timer->node.expires, next->expires)) {
+               struct rtc_wkalrm alarm;
+               int err;
+               alarm.time = rtc_ktime_to_tm(timer->node.expires);
diff --git a/queue-4.9/s390-qeth-no-eth-header-for-outbound-af_iucv.patch b/queue-4.9/s390-qeth-no-eth-header-for-outbound-af_iucv.patch

new file mode 100644 (file)

index 0000000..2cbbb2f
--- /dev/null
+++ b/queue-4.9/s390-qeth-no-eth-header-for-outbound-af_iucv.patch
@@ -0,0 +1,78 @@
+From foo@baz Thu Dec 21 09:02:40 CET 2017
+From: Julian Wiedmann <jwi@linux.vnet.ibm.com>
+Date: Thu, 23 Mar 2017 14:55:09 +0100
+Subject: s390/qeth: no ETH header for outbound AF_IUCV
+
+From: Julian Wiedmann <jwi@linux.vnet.ibm.com>
+
+
+[ Upstream commit acd9776b5c45ef02d1a210969a6fcc058afb76e3 ]
+
+With AF_IUCV traffic, the skb passed to hard_start_xmit() has a 14 byte
+slot at skb->data, intended for an ETH header. qeth_l3_fill_af_iucv_hdr()
+fills this ETH header... and then immediately moves it to the
+skb's headroom, where it disappears and is never seen again.
+
+But it's still possible for us to return NETDEV_TX_BUSY after the skb has
+been modified. Since we didn't get a private copy of the skb, the next
+time the skb is delivered to hard_start_xmit() it no longer has the
+expected layout (we moved the ETH header to the headroom, so skb->data
+now starts at the IUCV_TRANS header). So when qeth_l3_fill_af_iucv_hdr()
+does another round of rebuilding, the resulting qeth header ends up
+all wrong. On transmission, the buffer is then rejected by
+the HiperSockets device with SBALF15 = x'04'.
+When this error is passed back to af_iucv as TX_NOTIFY_UNREACHABLE, it
+tears down the offending socket.
+
+As the ETH header for AF_IUCV serves no purpose, just align the code to
+what we do for IP traffic on L3 HiperSockets: keep the ETH header at
+skb->data, and pass down data_offset = ETH_HLEN to qeth_fill_buffer().
+When mapping the payload into the SBAL elements, the ETH header is then
+stripped off. This avoids the skb manipulations in
+qeth_l3_fill_af_iucv_hdr(), and any buffer re-entering hard_start_xmit()
+after NETDEV_TX_BUSY is now processed properly.
+
+Signed-off-by: Julian Wiedmann <jwi@linux.vnet.ibm.com>
+Signed-off-by: Ursula Braun <ubraun@linux.vnet.ibm.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <alexander.levin@verizon.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/s390/net/qeth_l3_main.c |   15 ++++-----------
+ 1 file changed, 4 insertions(+), 11 deletions(-)
+
+--- a/drivers/s390/net/qeth_l3_main.c
++++ b/drivers/s390/net/qeth_l3_main.c
+@@ -2612,17 +2612,13 @@ static void qeth_l3_fill_af_iucv_hdr(str
+       char daddr[16];
+       struct af_iucv_trans_hdr *iucv_hdr;
+ 
+-      skb_pull(skb, 14);
+-      card->dev->header_ops->create(skb, card->dev, 0,
+-                                    card->dev->dev_addr, card->dev->dev_addr,
+-                                    card->dev->addr_len);
+-      skb_pull(skb, 14);
+-      iucv_hdr = (struct af_iucv_trans_hdr *)skb->data;
+       memset(hdr, 0, sizeof(struct qeth_hdr));
+       hdr->hdr.l3.id = QETH_HEADER_TYPE_LAYER3;
+       hdr->hdr.l3.ext_flags = 0;
+-      hdr->hdr.l3.length = skb->len;
++      hdr->hdr.l3.length = skb->len - ETH_HLEN;
+       hdr->hdr.l3.flags = QETH_HDR_IPV6 | QETH_CAST_UNICAST;
++
++      iucv_hdr = (struct af_iucv_trans_hdr *) (skb->data + ETH_HLEN);
+       memset(daddr, 0, sizeof(daddr));
+       daddr[0] = 0xfe;
+       daddr[1] = 0x80;
+@@ -2826,10 +2822,7 @@ static int qeth_l3_hard_start_xmit(struc
+       if ((card->info.type == QETH_CARD_TYPE_IQD) &&
+           !skb_is_nonlinear(skb)) {
+               new_skb = skb;
+-              if (new_skb->protocol == ETH_P_AF_IUCV)
+-                      data_offset = 0;
+-              else
+-                      data_offset = ETH_HLEN;
++              data_offset = ETH_HLEN;
+               hdr = kmem_cache_alloc(qeth_core_header_cache, GFP_ATOMIC);
+               if (!hdr)
+                       goto tx_drop;
diff --git a/queue-4.9/s390-qeth-size-calculation-outbound-buffers.patch b/queue-4.9/s390-qeth-size-calculation-outbound-buffers.patch

new file mode 100644 (file)

index 0000000..9732dc1
--- /dev/null
+++ b/queue-4.9/s390-qeth-size-calculation-outbound-buffers.patch
@@ -0,0 +1,118 @@
+From foo@baz Thu Dec 21 09:02:40 CET 2017
+From: Julian Wiedmann <jwi@linux.vnet.ibm.com>
+Date: Thu, 23 Mar 2017 14:55:08 +0100
+Subject: s390/qeth: size calculation outbound buffers
+
+From: Julian Wiedmann <jwi@linux.vnet.ibm.com>
+
+
+[ Upstream commit 7d969d2e8890f546c8cec634b3aa5f57d4eef883 ]
+
+Depending on the device type, hard_start_xmit() builds different output
+buffer formats. For instance with HiperSockets, on both L2 and L3 we
+strip the ETH header from the skb - L3 doesn't need it, and L2 carries
+it in the buffer's header element.
+For this, we pass data_offset = ETH_HLEN all the way down to
+__qeth_fill_buffer(), where skb->data is then adjusted accordingly.
+But the initial size calculation still considers the *full* skb length
+(including the ETH header). So qeth_get_elements_no() can erroneously
+reject a skb as too big, even though it would actually fit into an
+output buffer once the ETH header has been trimmed off later.
+
+Fix this by passing an additional offset to qeth_get_elements_no(),
+that indicates where in the skb the on-wire data actually begins.
+Since the current code uses data_offset=-1 for some special handling
+on OSA, we need to clamp data_offset to 0...
+
+On HiperSockets this helps when sending ~MTU-size skbs with weird page
+alignment. No change for OSA or AF_IUCV.
+
+Signed-off-by: Julian Wiedmann <jwi@linux.vnet.ibm.com>
+Signed-off-by: Ursula Braun <ubraun@linux.vnet.ibm.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <alexander.levin@verizon.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/s390/net/qeth_core.h      |    3 ++-
+ drivers/s390/net/qeth_core_main.c |    5 +++--
+ drivers/s390/net/qeth_l2_main.c   |    5 +++--
+ drivers/s390/net/qeth_l3_main.c   |    5 +++--
+ 4 files changed, 11 insertions(+), 7 deletions(-)
+
+--- a/drivers/s390/net/qeth_core.h
++++ b/drivers/s390/net/qeth_core.h
+@@ -969,7 +969,8 @@ int qeth_bridgeport_query_ports(struct q
+ int qeth_bridgeport_setrole(struct qeth_card *card, enum qeth_sbp_roles role);
+ int qeth_bridgeport_an_set(struct qeth_card *card, int enable);
+ int qeth_get_priority_queue(struct qeth_card *, struct sk_buff *, int, int);
+-int qeth_get_elements_no(struct qeth_card *, struct sk_buff *, int);
++int qeth_get_elements_no(struct qeth_card *card, struct sk_buff *skb,
++                       int extra_elems, int data_offset);
+ int qeth_get_elements_for_frags(struct sk_buff *);
+ int qeth_do_send_packet_fast(struct qeth_card *, struct qeth_qdio_out_q *,
+                       struct sk_buff *, struct qeth_hdr *, int, int, int);
+--- a/drivers/s390/net/qeth_core_main.c
++++ b/drivers/s390/net/qeth_core_main.c
+@@ -3842,6 +3842,7 @@ EXPORT_SYMBOL_GPL(qeth_get_elements_for_
+  * @card:                     qeth card structure, to check max. elems.
+  * @skb:                      SKB address
+  * @extra_elems:              extra elems needed, to check against max.
++ * @data_offset:              range starts at skb->data + data_offset
+  *
+  * Returns the number of pages, and thus QDIO buffer elements, needed to cover
+  * skb data, including linear part and fragments. Checks if the result plus
+@@ -3849,10 +3850,10 @@ EXPORT_SYMBOL_GPL(qeth_get_elements_for_
+  * Note: extra_elems is not included in the returned result.
+  */
+ int qeth_get_elements_no(struct qeth_card *card,
+-                   struct sk_buff *skb, int extra_elems)
++                   struct sk_buff *skb, int extra_elems, int data_offset)
+ {
+       int elements = qeth_get_elements_for_range(
+-                              (addr_t)skb->data,
++                              (addr_t)skb->data + data_offset,
+                               (addr_t)skb->data + skb_headlen(skb)) +
+                       qeth_get_elements_for_frags(skb);
+ 
+--- a/drivers/s390/net/qeth_l2_main.c
++++ b/drivers/s390/net/qeth_l2_main.c
+@@ -865,7 +865,7 @@ static int qeth_l2_hard_start_xmit(struc
+        * chaining we can not send long frag lists
+        */
+       if ((card->info.type != QETH_CARD_TYPE_IQD) &&
+-          !qeth_get_elements_no(card, new_skb, 0)) {
++          !qeth_get_elements_no(card, new_skb, 0, 0)) {
+               int lin_rc = skb_linearize(new_skb);
+ 
+               if (card->options.performance_stats) {
+@@ -910,7 +910,8 @@ static int qeth_l2_hard_start_xmit(struc
+               }
+       }
+ 
+-      elements = qeth_get_elements_no(card, new_skb, elements_needed);
++      elements = qeth_get_elements_no(card, new_skb, elements_needed,
++                                      (data_offset > 0) ? data_offset : 0);
+       if (!elements) {
+               if (data_offset >= 0)
+                       kmem_cache_free(qeth_core_header_cache, hdr);
+--- a/drivers/s390/net/qeth_l3_main.c
++++ b/drivers/s390/net/qeth_l3_main.c
+@@ -2870,7 +2870,7 @@ static int qeth_l3_hard_start_xmit(struc
+        */
+       if ((card->info.type != QETH_CARD_TYPE_IQD) &&
+           ((use_tso && !qeth_l3_get_elements_no_tso(card, new_skb, 1)) ||
+-           (!use_tso && !qeth_get_elements_no(card, new_skb, 0)))) {
++           (!use_tso && !qeth_get_elements_no(card, new_skb, 0, 0)))) {
+               int lin_rc = skb_linearize(new_skb);
+ 
+               if (card->options.performance_stats) {
+@@ -2912,7 +2912,8 @@ static int qeth_l3_hard_start_xmit(struc
+ 
+       elements = use_tso ?
+                  qeth_l3_get_elements_no_tso(card, new_skb, hdr_elements) :
+-                 qeth_get_elements_no(card, new_skb, hdr_elements);
++                 qeth_get_elements_no(card, new_skb, hdr_elements,
++                                      (data_offset > 0) ? data_offset : 0);
+       if (!elements) {
+               if (data_offset >= 0)
+                       kmem_cache_free(qeth_core_header_cache, hdr);
diff --git a/queue-4.9/sch_dsmark-fix-invalid-skb_cow-usage.patch b/queue-4.9/sch_dsmark-fix-invalid-skb_cow-usage.patch

new file mode 100644 (file)

index 0000000..ce38521
--- /dev/null
+++ b/queue-4.9/sch_dsmark-fix-invalid-skb_cow-usage.patch
@@ -0,0 +1,53 @@
+From foo@baz Thu Dec 21 09:02:40 CET 2017
+From: Eric Dumazet <edumazet@google.com>
+Date: Fri, 17 Mar 2017 08:05:28 -0700
+Subject: sch_dsmark: fix invalid skb_cow() usage
+
+From: Eric Dumazet <edumazet@google.com>
+
+
+[ Upstream commit aea92fb2e09e29653b023d4254ac9fbf94221538 ]
+
+skb_cow(skb, sizeof(ip header)) is not very helpful in this context.
+
+First we need to use pskb_may_pull() to make sure the ip header
+is in skb linear part, then use skb_try_make_writable() to
+address clones issues.
+
+Fixes: 4c30719f4f55 ("[PKT_SCHED] dsmark: handle cloned and non-linear skb's")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <alexander.levin@verizon.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sched/sch_dsmark.c |   10 ++++++++--
+ 1 file changed, 8 insertions(+), 2 deletions(-)
+
+--- a/net/sched/sch_dsmark.c
++++ b/net/sched/sch_dsmark.c
+@@ -200,9 +200,13 @@ static int dsmark_enqueue(struct sk_buff
+       pr_debug("%s(skb %p,sch %p,[qdisc %p])\n", __func__, skb, sch, p);
+ 
+       if (p->set_tc_index) {
++              int wlen = skb_network_offset(skb);
++
+               switch (tc_skb_protocol(skb)) {
+               case htons(ETH_P_IP):
+-                      if (skb_cow_head(skb, sizeof(struct iphdr)))
++                      wlen += sizeof(struct iphdr);
++                      if (!pskb_may_pull(skb, wlen) ||
++                          skb_try_make_writable(skb, wlen))
+                               goto drop;
+ 
+                       skb->tc_index = ipv4_get_dsfield(ip_hdr(skb))
+@@ -210,7 +214,9 @@ static int dsmark_enqueue(struct sk_buff
+                       break;
+ 
+               case htons(ETH_P_IPV6):
+-                      if (skb_cow_head(skb, sizeof(struct ipv6hdr)))
++                      wlen += sizeof(struct ipv6hdr);
++                      if (!pskb_may_pull(skb, wlen) ||
++                          skb_try_make_writable(skb, wlen))
+                               goto drop;
+ 
+                       skb->tc_index = ipv6_get_dsfield(ipv6_hdr(skb))
diff --git a/queue-4.9/scsi-cxgb4i-fix-tx-skb-leak.patch b/queue-4.9/scsi-cxgb4i-fix-tx-skb-leak.patch

new file mode 100644 (file)

index 0000000..e6b5551
--- /dev/null
+++ b/queue-4.9/scsi-cxgb4i-fix-tx-skb-leak.patch
@@ -0,0 +1,32 @@
+From foo@baz Thu Dec 21 09:02:40 CET 2017
+From: Varun Prakash <varun@chelsio.com>
+Date: Wed, 11 Oct 2017 19:33:07 +0530
+Subject: scsi: cxgb4i: fix Tx skb leak
+
+From: Varun Prakash <varun@chelsio.com>
+
+
+[ Upstream commit 9b3a081fb62158b50bcc90522ca2423017544367 ]
+
+In case of connection reset Tx skb queue can have some skbs which are
+not transmitted so purge Tx skb queue in release_offload_resources() to
+avoid skb leak.
+
+Signed-off-by: Varun Prakash <varun@chelsio.com>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Sasha Levin <alexander.levin@verizon.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/scsi/cxgbi/cxgb4i/cxgb4i.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c
++++ b/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c
+@@ -1347,6 +1347,7 @@ static void release_offload_resources(st
+               csk, csk->state, csk->flags, csk->tid);
+ 
+       cxgbi_sock_free_cpl_skbs(csk);
++      cxgbi_sock_purge_write_queue(csk);
+       if (csk->wr_cred != csk->wr_max_cred) {
+               cxgbi_sock_purge_wr_queue(csk);
+               cxgbi_sock_reset_wr_list(csk);
diff --git a/queue-4.9/scsi-lpfc-fix-pt2pt-prli-reject.patch b/queue-4.9/scsi-lpfc-fix-pt2pt-prli-reject.patch

new file mode 100644 (file)

index 0000000..58b9851
--- /dev/null
+++ b/queue-4.9/scsi-lpfc-fix-pt2pt-prli-reject.patch
@@ -0,0 +1,39 @@
+From foo@baz Thu Dec 21 09:02:40 CET 2017
+From: Dick Kennedy <dick.kennedy@broadcom.com>
+Date: Thu, 23 Mar 2017 08:47:18 -0400
+Subject: scsi: lpfc: Fix PT2PT PRLI reject
+
+From: Dick Kennedy <dick.kennedy@broadcom.com>
+
+
+[ Upstream commit a71e3cdcfce4880a4578915e110e3eaed1659765 ]
+
+lpfc cannot establish connection with targets that send PRLI in P2P
+configurations.
+
+If lpfc rejects a PRLI that is sent from a target the target will not
+resend and will reject the PRLI send from the initiator.
+
+[mkp: applied by hand]
+
+Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
+Signed-off-by: James Smart <james.smart@broadcom.com>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Sasha Levin <alexander.levin@verizon.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/scsi/lpfc/lpfc_els.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/drivers/scsi/lpfc/lpfc_els.c
++++ b/drivers/scsi/lpfc/lpfc_els.c
+@@ -7782,7 +7782,8 @@ lpfc_els_unsol_buffer(struct lpfc_hba *p
+                       did, vport->port_state, ndlp->nlp_flag);
+ 
+               phba->fc_stat.elsRcvPRLI++;
+-              if (vport->port_state < LPFC_DISC_AUTH) {
++              if ((vport->port_state < LPFC_DISC_AUTH) &&
++                  (vport->fc_flag & FC_FABRIC)) {
+                       rjt_err = LSRJT_UNABLE_TPC;
+                       rjt_exp = LSEXP_NOTHING_MORE;
+                       break;
diff --git a/queue-4.9/scsi-lpfc-fix-secure-firmware-updates.patch b/queue-4.9/scsi-lpfc-fix-secure-firmware-updates.patch

new file mode 100644 (file)

index 0000000..50abc72
--- /dev/null
+++ b/queue-4.9/scsi-lpfc-fix-secure-firmware-updates.patch
@@ -0,0 +1,37 @@
+From foo@baz Thu Dec 21 09:02:40 CET 2017
+From: Dick Kennedy <dick.kennedy@broadcom.com>
+Date: Fri, 29 Sep 2017 17:34:42 -0700
+Subject: scsi: lpfc: Fix secure firmware updates
+
+From: Dick Kennedy <dick.kennedy@broadcom.com>
+
+
+[ Upstream commit 184fc2b9a8bcbda9c14d0a1e7fbecfc028c7702e ]
+
+Firmware update fails with: status x17 add_status x56 on the final write
+
+If multiple DMA buffers are used for the download, some firmware revs
+have difficulty with signatures and crcs split across the dma buffer
+boundaries.  Resolve by making all writes be a single 4k page in length.
+
+Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
+Signed-off-by: James Smart <james.smart@broadcom.com>
+Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Sasha Levin <alexander.levin@verizon.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/scsi/lpfc/lpfc_hw4.h |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/scsi/lpfc/lpfc_hw4.h
++++ b/drivers/scsi/lpfc/lpfc_hw4.h
+@@ -3232,7 +3232,7 @@ struct lpfc_mbx_get_port_name {
+ #define MB_CEQ_STATUS_QUEUE_FLUSHING          0x4
+ #define MB_CQE_STATUS_DMA_FAILED              0x5
+ 
+-#define LPFC_MBX_WR_CONFIG_MAX_BDE            8
++#define LPFC_MBX_WR_CONFIG_MAX_BDE            1
+ struct lpfc_mbx_wr_object {
+       struct mbox_header header;
+       union {
diff --git a/queue-4.9/scsi-lpfc-plogi-failures-during-npiv-testing.patch b/queue-4.9/scsi-lpfc-plogi-failures-during-npiv-testing.patch

new file mode 100644 (file)

index 0000000..7bef84b
--- /dev/null
+++ b/queue-4.9/scsi-lpfc-plogi-failures-during-npiv-testing.patch
@@ -0,0 +1,39 @@
+From foo@baz Thu Dec 21 09:02:40 CET 2017
+From: Dick Kennedy <dick.kennedy@broadcom.com>
+Date: Fri, 29 Sep 2017 17:34:32 -0700
+Subject: scsi: lpfc: PLOGI failures during NPIV testing
+
+From: Dick Kennedy <dick.kennedy@broadcom.com>
+
+
+[ Upstream commit e8bcf0ae4c0346fdc78ebefe0eefcaa6a6622d38 ]
+
+Local Reject/Invalid RPI errors seen during discovery.
+
+Temporary RPI cleanup was occurring regardless of SLI rev. It's only
+necessary on SLI-4.
+
+Adjust the test for whether cleanup is necessary.
+
+Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
+Signed-off-by: James Smart <james.smart@broadcom.com>
+Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Sasha Levin <alexander.levin@verizon.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/scsi/lpfc/lpfc_hbadisc.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/drivers/scsi/lpfc/lpfc_hbadisc.c
++++ b/drivers/scsi/lpfc/lpfc_hbadisc.c
+@@ -4784,7 +4784,8 @@ lpfc_nlp_remove(struct lpfc_vport *vport
+       lpfc_cancel_retry_delay_tmo(vport, ndlp);
+       if ((ndlp->nlp_flag & NLP_DEFER_RM) &&
+           !(ndlp->nlp_flag & NLP_REG_LOGIN_SEND) &&
+-          !(ndlp->nlp_flag & NLP_RPI_REGISTERED)) {
++          !(ndlp->nlp_flag & NLP_RPI_REGISTERED) &&
++          phba->sli_rev != LPFC_SLI_REV4) {
+               /* For this case we need to cleanup the default rpi
+                * allocated by the firmware.
+                */
diff --git a/queue-4.9/scsi-mpt3sas-fix-io-error-occurs-on-pulling-out-a-drive-from-raid1-volume-created-on-two-sata-drive.patch b/queue-4.9/scsi-mpt3sas-fix-io-error-occurs-on-pulling-out-a-drive-from-raid1-volume-created-on-two-sata-drive.patch

new file mode 100644 (file)

index 0000000..648691f
--- /dev/null
+++ b/queue-4.9/scsi-mpt3sas-fix-io-error-occurs-on-pulling-out-a-drive-from-raid1-volume-created-on-two-sata-drive.patch
@@ -0,0 +1,43 @@
+From foo@baz Thu Dec 21 09:02:40 CET 2017
+From: Sreekanth Reddy <sreekanth.reddy@broadcom.com>
+Date: Tue, 10 Oct 2017 18:41:18 +0530
+Subject: scsi: mpt3sas: Fix IO error occurs on pulling out a drive from RAID1 volume created on two SATA drive
+
+From: Sreekanth Reddy <sreekanth.reddy@broadcom.com>
+
+
+[ Upstream commit 2ce9a3645299ba1752873d333d73f67620f4550b ]
+
+Whenever an I/O for a RAID volume fails with IOCStatus
+MPI2_IOCSTATUS_SCSI_IOC_TERMINATED and SCSIStatus equal to
+(MPI2_SCSI_STATE_TERMINATED | MPI2_SCSI_STATE_NO_SCSI_STATUS) then
+return the I/O to SCSI midlayer with "DID_RESET" (i.e. retry the IO
+infinite times) set in the host byte.
+
+Previously, the driver was completing the I/O with "DID_SOFT_ERROR"
+which causes the I/O to be quickly retried. However, firmware needed
+more time and hence I/Os were failing.
+
+Signed-off-by: Sreekanth Reddy <Sreekanth.Reddy@broadcom.com>
+Reviewed-by: Tomas Henzl <thenzl@redhat.com>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Sasha Levin <alexander.levin@verizon.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/scsi/mpt3sas/mpt3sas_scsih.c |    5 +++++
+ 1 file changed, 5 insertions(+)
+
+--- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c
++++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
+@@ -4770,6 +4770,11 @@ _scsih_io_done(struct MPT3SAS_ADAPTER *i
+               } else if (log_info == VIRTUAL_IO_FAILED_RETRY) {
+                       scmd->result = DID_RESET << 16;
+                       break;
++              } else if ((scmd->device->channel == RAID_CHANNEL) &&
++                 (scsi_state == (MPI2_SCSI_STATE_TERMINATED |
++                 MPI2_SCSI_STATE_NO_SCSI_STATUS))) {
++                      scmd->result = DID_RESET << 16;
++                      break;
+               }
+               scmd->result = DID_SOFT_ERROR << 16;
+               break;
diff --git a/queue-4.9/sctp-out_qlen-should-be-updated-when-pruning-unsent-queue.patch b/queue-4.9/sctp-out_qlen-should-be-updated-when-pruning-unsent-queue.patch

new file mode 100644 (file)

index 0000000..7ed76f2
--- /dev/null
+++ b/queue-4.9/sctp-out_qlen-should-be-updated-when-pruning-unsent-queue.patch
@@ -0,0 +1,58 @@
+From foo@baz Thu Dec 21 09:02:40 CET 2017
+From: Xin Long <lucien.xin@gmail.com>
+Date: Sat, 18 Mar 2017 20:03:59 +0800
+Subject: sctp: out_qlen should be updated when pruning unsent queue
+
+From: Xin Long <lucien.xin@gmail.com>
+
+
+[ Upstream commit 23bb09cfbe04076ef647da3889a5a5ab6cbe6f15 ]
+
+This patch is to fix the issue that sctp_prsctp_prune_sent forgot
+to update q->out_qlen when removing a chunk from unsent queue.
+
+Fixes: 8dbdf1f5b09c ("sctp: implement prsctp PRIO policy")
+Signed-off-by: Xin Long <lucien.xin@gmail.com>
+Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <alexander.levin@verizon.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sctp/outqueue.c |   11 +++++------
+ 1 file changed, 5 insertions(+), 6 deletions(-)
+
+--- a/net/sctp/outqueue.c
++++ b/net/sctp/outqueue.c
+@@ -382,17 +382,18 @@ static int sctp_prsctp_prune_sent(struct
+ }
+ 
+ static int sctp_prsctp_prune_unsent(struct sctp_association *asoc,
+-                                  struct sctp_sndrcvinfo *sinfo,
+-                                  struct list_head *queue, int msg_len)
++                                  struct sctp_sndrcvinfo *sinfo, int msg_len)
+ {
++      struct sctp_outq *q = &asoc->outqueue;
+       struct sctp_chunk *chk, *temp;
+ 
+-      list_for_each_entry_safe(chk, temp, queue, list) {
++      list_for_each_entry_safe(chk, temp, &q->out_chunk_list, list) {
+               if (!SCTP_PR_PRIO_ENABLED(chk->sinfo.sinfo_flags) ||
+                   chk->sinfo.sinfo_timetolive <= sinfo->sinfo_timetolive)
+                       continue;
+ 
+               list_del_init(&chk->list);
++              q->out_qlen -= chk->skb->len;
+               asoc->sent_cnt_removable--;
+               asoc->abandoned_unsent[SCTP_PR_INDEX(PRIO)]++;
+ 
+@@ -431,9 +432,7 @@ void sctp_prsctp_prune(struct sctp_assoc
+                       return;
+       }
+ 
+-      sctp_prsctp_prune_unsent(asoc, sinfo,
+-                               &asoc->outqueue.out_chunk_list,
+-                               msg_len);
++      sctp_prsctp_prune_unsent(asoc, sinfo, msg_len);
+ }
+ 
+ /* Mark all the eligible packets on a transport for retransmission.  */
diff --git a/queue-4.9/series b/queue-4.9/series

index 8ef57ee9089da8bb4fdf92e1ba24623c11f4d15c..1fd3d64ae598883fc20b6f07435f18a4673e4a32 100644 (file)
--- a/queue-4.9/series
+++ b/queue-4.9/series
@@ -1,2 +1,114 @@
  cxl-check-if-vphb-exists-before-iterating-over-afu-devices.patch
  arm64-initialise-high_memory-global-variable-earlier.patch
+x86-entry-unwind-create-stack-frames-for-saved-interrupt-registers.patch
+x86-mm-reimplement-flush_tlb_page-using-flush_tlb_mm_range.patch
+x86-mm-reduce-indentation-in-flush_tlb_func.patch
+mm-x86-mm-make-the-batched-unmap-tlb-flush-api-more-generic.patch
+x86-mm-pass-flush_tlb_info-to-flush_tlb_others-etc.patch
+x86-mm-change-the-leave_mm-condition-for-local-tlb-flushes.patch
+x86-mm-refactor-flush_tlb_mm_range-to-merge-local-and-remote-cases.patch
+x86-mm-use-new-merged-flush-logic-in-arch_tlbbatch_flush.patch
+x86-mm-remove-the-up-asm-tlbflush.h-code-always-use-the-formerly-smp-code.patch
+x86-mm-rework-lazy-tlb-to-track-the-actual-loaded-mm.patch
+x86-mm-be-more-consistent-wrt-page_shift-vs-page_size-in-tlb-flush-code.patch
+x86-mm-kvm-teach-kvm-s-vmx-code-that-cr3-isn-t-a-constant.patch
+kvm-nvmx-fix-host_cr3-host_cr4-cache.patch
+alsa-hda-add-support-for-docking-station-for-hp-820-g2.patch
+alsa-hda-add-support-for-docking-station-for-hp-840-g3.patch
+kvm-fix-usage-of-uninit-spinlock-in-avic_vm_destroy.patch
+hid-corsair-support-for-k65-k70-rapidfire-and-scimitar-pro-rgb.patch
+hid-corsair-add-driver-scimitar-pro-rgb-gaming-mouse-1b1c-1b3e-support-to-hid-corsair.patch
+arm-kprobes-fix-the-return-address-of-multiple-kretprobes.patch
+arm-kprobes-align-stack-to-8-bytes-in-test-code.patch
+nvme-loop-handle-cpu-unplug-when-re-establishing-the-controller.patch
+cpuidle-validate-cpu_dev-in-cpuidle_add_sysfs.patch
+r8152-fix-the-list-rx_done-may-be-used-without-initialization.patch
+crypto-deadlock-between-crypto_alg_sem-rtnl_mutex-genl_mutex.patch
+vsock-track-pkt-owner-vsock.patch
+vhost-vsock-add-pkt-cancel-capability.patch
+vsock-cancel-packets-when-failing-to-connect.patch
+sch_dsmark-fix-invalid-skb_cow-usage.patch
+bna-integer-overflow-bug-in-debugfs.patch
+sctp-out_qlen-should-be-updated-when-pruning-unsent-queue.patch
+net-qmi_wwan-add-usb-ids-for-mdm6600-modem-on-motorola-droid-4.patch
+hwmon-max31790-set-correct-pwm-value.patch
+usb-gadget-f_uvc-sanity-check-wmaxpacketsize-for-superspeed.patch
+usb-gadget-udc-remove-pointer-dereference-after-free.patch
+netfilter-nfnl_cthelper-fix-runtime-expectation-policy-updates.patch
+netfilter-nfnl_cthelper-fix-memory-leak.patch
+iommu-exynos-workaround-flpd-cache-flush-issues-for-sysmmu-v5.patch
+r8152-fix-the-rx-early-size-of-rtl8153.patch
+tipc-fix-nametbl-deadlock-at-tipc_nametbl_unsubscribe.patch
+inet-frag-release-spinlock-before-calling-icmp_send.patch
+pinctrl-st-add-irq_request-release_resources-callbacks.patch
+scsi-lpfc-fix-pt2pt-prli-reject.patch
+kvm-vmx-flush-tlb-when-the-apic-access-address-changes.patch
+kvm-x86-correct-async-page-present-tracepoint.patch
+kvm-vmx-fix-enable-vpid-conditions.patch
+arm-dts-ti-fix-pci-bus-dtc-warnings.patch
+hwmon-asus_atk0110-fix-uninitialized-data-access.patch
+i2c-mux-pca954x-add-missing-pca9546-definition-to-chip_desc.patch
+hid-xinmo-fix-for-out-of-range-for-tht-2p-arcade-controller.patch
+asoc-sti-fix-reader-substream-pointer-set.patch
+r8152-prevent-the-driver-from-transmitting-packets-with-carrier-off.patch
+s390-qeth-size-calculation-outbound-buffers.patch
+s390-qeth-no-eth-header-for-outbound-af_iucv.patch
+bna-avoid-writing-uninitialized-data-into-hw-registers.patch
+i40iw-receive-netdev-events-post-inet_notifier-state.patch
+ib-core-protect-against-self-requeue-of-a-cq-work-item.patch
+infiniband-fix-alignment-of-mmap-cookies-to-support-vipt-caching.patch
+nbd-set-queue-timeout-properly.patch
+net-do-not-allow-negative-values-for-busy_read-and-busy_poll-sysctl-interfaces.patch
+ib-rxe-double-free-on-error.patch
+ib-rxe-increment-msn-only-when-completing-a-request.patch
+i40e-do-not-enable-napi-on-q_vectors-that-have-no-rings.patch
+rdma-iser-fix-possible-mr-leak-on-device-removal-event.patch
+irda-vlsi_ir-fix-check-for-dma-mapping-errors.patch
+netfilter-nfnl_cthelper-fix-a-race-when-walk-the-nf_ct_helper_hash-table.patch
+netfilter-nf_nat_snmp-fix-panic-when-snmp_trap_helper-fails-to-register.patch
+arm-dts-am335x-evmsk-adjust-mmc2-param-to-allow-suspend.patch
+cpufreq-fix-creation-of-symbolic-links-to-policy-directories.patch
+net-ipconfig-fix-ic_close_devs-use-after-free.patch
+kvm-pci-assign-do-not-map-smm-memory-slot-pages-in-vt-d-page-tables.patch
+virtio-balloon-use-actual-number-of-stats-for-stats-queue-buffers.patch
+virtio_balloon-prevent-uninitialized-variable-use.patch
+isdn-kcapi-avoid-uninitialized-data.patch
+net-moxa-fix-tx-overrun-memory-leak.patch
+xhci-plat-register-shutdown-for-xhci_plat.patch
+netfilter-nfnetlink_queue-fix-secctx-memory-leak.patch
+btrfs-fix-an-integer-overflow-check.patch
+arm-dma-mapping-disallow-dma_get_sgtable-for-non-kernel-managed-memory.patch
+cpuidle-powernv-pass-correct-drv-cpumask-for-registration.patch
+bnxt_en-fix-null-pointer-dereference-in-reopen-failure-path.patch
+backlight-pwm_bl-fix-overflow-condition.patch
+crypto-crypto4xx-increase-context-and-scatter-ring-buffer-elements.patch
+rtc-pl031-make-interrupt-optional.patch
+kvm-mm-account-kvm-related-kmem-slabs-to-kmemcg.patch
+net-phy-at803x-change-error-to-einval-for-invalid-mac.patch
+pci-avoid-bus-reset-if-bridge-itself-is-broken.patch
+scsi-cxgb4i-fix-tx-skb-leak.patch
+scsi-mpt3sas-fix-io-error-occurs-on-pulling-out-a-drive-from-raid1-volume-created-on-two-sata-drive.patch
+pci-create-sr-iov-virtfn-physfn-links-before-attaching-driver.patch
+pm-opp-move-error-message-to-debug-level.patch
+igb-check-memory-allocation-failure.patch
+ixgbe-fix-use-of-uninitialized-padding.patch
+ib-rxe-check-for-allocation-failure-on-elem.patch
+pci-aer-report-non-fatal-errors-only-to-the-affected-endpoint.patch
+tracing-exclude-generic-fields-from-histograms.patch
+percpu-don-t-forget-to-free-the-temporary-struct-pcpu_alloc_info.patch
+asoc-img-parallel-out-add-pm_runtime_get-put-to-set_fmt-callback.patch
+fm10k-fix-mis-ordered-parameters-in-declaration-for-.ndo_set_vf_bw.patch
+scsi-lpfc-fix-secure-firmware-updates.patch
+scsi-lpfc-plogi-failures-during-npiv-testing.patch
+vfio-pci-virtualize-maximum-payload-size.patch
+fm10k-ensure-we-process-sm-mbx-when-processing-vf-mbx.patch
+net-ipv6-send-ns-for-dad-when-link-operationally-up.patch
+staging-greybus-light-release-memory-obtained-by-kasprintf.patch
+clk-sunxi-ng-sun6i-rename-hdmi-ddc-clock-to-avoid-name-collision.patch
+tcp-fix-under-evaluated-ssthresh-in-tcp-vegas.patch
+rtc-set-the-alarm-to-the-next-expiring-timer.patch
+cpuidle-fix-broadcast-control-when-broadcast-can-not-be-entered.patch
+x86-kvm-vmx-simplify-segment_base.patch
+x86-unify-tss_struct.patch
+x86-kvm-vmx-defer-tr-reload-after-vm-exit.patch
+x86-kvm-vmx-remove-unused-variable-in-segment_base.patch
diff --git a/queue-4.9/staging-greybus-light-release-memory-obtained-by-kasprintf.patch b/queue-4.9/staging-greybus-light-release-memory-obtained-by-kasprintf.patch

new file mode 100644 (file)

index 0000000..6af7868
--- /dev/null
+++ b/queue-4.9/staging-greybus-light-release-memory-obtained-by-kasprintf.patch
@@ -0,0 +1,32 @@
+From foo@baz Thu Dec 21 09:02:40 CET 2017
+From: Arvind Yadav <arvind.yadav.cs@gmail.com>
+Date: Sat, 23 Sep 2017 13:25:30 +0530
+Subject: staging: greybus: light: Release memory obtained by kasprintf
+
+From: Arvind Yadav <arvind.yadav.cs@gmail.com>
+
+
+[ Upstream commit 04820da21050b35eed68aa046115d810163ead0c ]
+
+Free memory region, if gb_lights_channel_config is not successful.
+
+Signed-off-by: Arvind Yadav <arvind.yadav.cs@gmail.com>
+Reviewed-by: Rui Miguel Silva <rmfrfs@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Sasha Levin <alexander.levin@verizon.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/staging/greybus/light.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/drivers/staging/greybus/light.c
++++ b/drivers/staging/greybus/light.c
+@@ -924,6 +924,8 @@ static void __gb_lights_led_unregister(s
+               return;
+ 
+       led_classdev_unregister(cdev);
++      kfree(cdev->name);
++      cdev->name = NULL;
+       channel->led = NULL;
+ }
+ 
diff --git a/queue-4.9/tcp-fix-under-evaluated-ssthresh-in-tcp-vegas.patch b/queue-4.9/tcp-fix-under-evaluated-ssthresh-in-tcp-vegas.patch

new file mode 100644 (file)

index 0000000..51f5f1d
--- /dev/null
+++ b/queue-4.9/tcp-fix-under-evaluated-ssthresh-in-tcp-vegas.patch
@@ -0,0 +1,33 @@
+From foo@baz Thu Dec 21 09:02:40 CET 2017
+From: Hoang Tran <tranviethoang.vn@gmail.com>
+Date: Wed, 27 Sep 2017 18:30:58 +0200
+Subject: tcp: fix under-evaluated ssthresh in TCP Vegas
+
+From: Hoang Tran <tranviethoang.vn@gmail.com>
+
+
+[ Upstream commit cf5d74b85ef40c202c76d90959db4d850f301b95 ]
+
+With the commit 76174004a0f19785 (tcp: do not slow start when cwnd equals
+ssthresh), the comparison to the reduced cwnd in tcp_vegas_ssthresh() would
+under-evaluate the ssthresh.
+
+Signed-off-by: Hoang Tran <hoang.tran@uclouvain.be>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <alexander.levin@verizon.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_vegas.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/ipv4/tcp_vegas.c
++++ b/net/ipv4/tcp_vegas.c
+@@ -158,7 +158,7 @@ EXPORT_SYMBOL_GPL(tcp_vegas_cwnd_event);
+ 
+ static inline u32 tcp_vegas_ssthresh(struct tcp_sock *tp)
+ {
+-      return  min(tp->snd_ssthresh, tp->snd_cwnd-1);
++      return  min(tp->snd_ssthresh, tp->snd_cwnd);
+ }
+ 
+ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked)
diff --git a/queue-4.9/tipc-fix-nametbl-deadlock-at-tipc_nametbl_unsubscribe.patch b/queue-4.9/tipc-fix-nametbl-deadlock-at-tipc_nametbl_unsubscribe.patch

new file mode 100644 (file)

index 0000000..28b7d55
--- /dev/null
+++ b/queue-4.9/tipc-fix-nametbl-deadlock-at-tipc_nametbl_unsubscribe.patch
@@ -0,0 +1,102 @@
+From foo@baz Thu Dec 21 09:02:40 CET 2017
+From: Ying Xue <ying.xue@windriver.com>
+Date: Tue, 21 Mar 2017 10:47:49 +0100
+Subject: tipc: fix nametbl deadlock at tipc_nametbl_unsubscribe
+
+From: Ying Xue <ying.xue@windriver.com>
+
+
+[ Upstream commit 557d054c01da0337ca81de9e9d9206d57245b57e ]
+
+Until now, tipc_nametbl_unsubscribe() is called at subscriptions
+reference count cleanup. Usually the subscriptions cleanup is
+called at subscription timeout or at subscription cancel or at
+subscriber delete.
+
+We have ignored the possibility of this being called from other
+locations, which causes deadlock as we try to grab the
+tn->nametbl_lock while holding it already.
+
+   CPU1:                             CPU2:
+----------                     ----------------
+tipc_nametbl_publish
+spin_lock_bh(&tn->nametbl_lock)
+tipc_nametbl_insert_publ
+tipc_nameseq_insert_publ
+tipc_subscrp_report_overlap
+tipc_subscrp_get
+tipc_subscrp_send_event
+                             tipc_close_conn
+                             tipc_subscrb_release_cb
+                             tipc_subscrb_delete
+                             tipc_subscrp_put
+tipc_subscrp_put
+tipc_subscrp_kref_release
+tipc_nametbl_unsubscribe
+spin_lock_bh(&tn->nametbl_lock)
+<<grab nametbl_lock again>>
+
+   CPU1:                              CPU2:
+----------                     ----------------
+tipc_nametbl_stop
+spin_lock_bh(&tn->nametbl_lock)
+tipc_purge_publications
+tipc_nameseq_remove_publ
+tipc_subscrp_report_overlap
+tipc_subscrp_get
+tipc_subscrp_send_event
+                             tipc_close_conn
+                             tipc_subscrb_release_cb
+                             tipc_subscrb_delete
+                             tipc_subscrp_put
+tipc_subscrp_put
+tipc_subscrp_kref_release
+tipc_nametbl_unsubscribe
+spin_lock_bh(&tn->nametbl_lock)
+<<grab nametbl_lock again>>
+
+In this commit, we advance the calling of tipc_nametbl_unsubscribe()
+from the refcount cleanup to the intended callers.
+
+Fixes: d094c4d5f5c7 ("tipc: add subscription refcount to avoid invalid delete")
+Reported-by: John Thompson <thompa.atl@gmail.com>
+Acked-by: Jon Maloy <jon.maloy@ericsson.com>
+Signed-off-by: Ying Xue <ying.xue@windriver.com>
+Signed-off-by: Parthasarathy Bhuvaragan <parthasarathy.bhuvaragan@ericsson.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <alexander.levin@verizon.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/tipc/subscr.c |    7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+--- a/net/tipc/subscr.c
++++ b/net/tipc/subscr.c
+@@ -141,6 +141,11 @@ void tipc_subscrp_report_overlap(struct
+ static void tipc_subscrp_timeout(unsigned long data)
+ {
+       struct tipc_subscription *sub = (struct tipc_subscription *)data;
++      struct tipc_subscriber *subscriber = sub->subscriber;
++
++      spin_lock_bh(&subscriber->lock);
++      tipc_nametbl_unsubscribe(sub);
++      spin_unlock_bh(&subscriber->lock);
+ 
+       /* Notify subscriber of timeout */
+       tipc_subscrp_send_event(sub, sub->evt.s.seq.lower, sub->evt.s.seq.upper,
+@@ -173,7 +178,6 @@ static void tipc_subscrp_kref_release(st
+       struct tipc_subscriber *subscriber = sub->subscriber;
+ 
+       spin_lock_bh(&subscriber->lock);
+-      tipc_nametbl_unsubscribe(sub);
+       list_del(&sub->subscrp_list);
+       atomic_dec(&tn->subscription_count);
+       spin_unlock_bh(&subscriber->lock);
+@@ -205,6 +209,7 @@ static void tipc_subscrb_subscrp_delete(
+               if (s && memcmp(s, &sub->evt.s, sizeof(struct tipc_subscr)))
+                       continue;
+ 
++              tipc_nametbl_unsubscribe(sub);
+               tipc_subscrp_get(sub);
+               spin_unlock_bh(&subscriber->lock);
+               tipc_subscrp_delete(sub);
diff --git a/queue-4.9/tracing-exclude-generic-fields-from-histograms.patch b/queue-4.9/tracing-exclude-generic-fields-from-histograms.patch

new file mode 100644 (file)

index 0000000..7e26365
--- /dev/null
+++ b/queue-4.9/tracing-exclude-generic-fields-from-histograms.patch
@@ -0,0 +1,47 @@
+From foo@baz Thu Dec 21 09:02:40 CET 2017
+From: Tom Zanussi <tom.zanussi@linux.intel.com>
+Date: Fri, 22 Sep 2017 14:58:17 -0500
+Subject: tracing: Exclude 'generic fields' from histograms
+
+From: Tom Zanussi <tom.zanussi@linux.intel.com>
+
+
+[ Upstream commit a15f7fc20389a8827d5859907568b201234d4b79 ]
+
+There are a small number of 'generic fields' (comm/COMM/cpu/CPU) that
+are found by trace_find_event_field() but are only meant for
+filtering.  Specifically, they unlike normal fields, they have a size
+of 0 and thus wreak havoc when used as a histogram key.
+
+Exclude these (return -EINVAL) when used as histogram keys.
+
+Link: http://lkml.kernel.org/r/956154cbc3e8a4f0633d619b886c97f0f0edf7b4.1506105045.git.tom.zanussi@linux.intel.com
+
+Signed-off-by: Tom Zanussi <tom.zanussi@linux.intel.com>
+Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
+Signed-off-by: Sasha Levin <alexander.levin@verizon.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/trace/trace_events_hist.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/kernel/trace/trace_events_hist.c
++++ b/kernel/trace/trace_events_hist.c
+@@ -449,7 +449,7 @@ static int create_val_field(struct hist_
+       }
+ 
+       field = trace_find_event_field(file->event_call, field_name);
+-      if (!field) {
++      if (!field || !field->size) {
+               ret = -EINVAL;
+               goto out;
+       }
+@@ -547,7 +547,7 @@ static int create_key_field(struct hist_
+               }
+ 
+               field = trace_find_event_field(file->event_call, field_name);
+-              if (!field) {
++              if (!field || !field->size) {
+                       ret = -EINVAL;
+                       goto out;
+               }
diff --git a/queue-4.9/usb-gadget-f_uvc-sanity-check-wmaxpacketsize-for-superspeed.patch b/queue-4.9/usb-gadget-f_uvc-sanity-check-wmaxpacketsize-for-superspeed.patch

new file mode 100644 (file)

index 0000000..0f696e1
--- /dev/null
+++ b/queue-4.9/usb-gadget-f_uvc-sanity-check-wmaxpacketsize-for-superspeed.patch
@@ -0,0 +1,40 @@
+From foo@baz Thu Dec 21 09:02:40 CET 2017
+From: Roger Quadros <rogerq@ti.com>
+Date: Wed, 8 Mar 2017 16:05:44 +0200
+Subject: usb: gadget: f_uvc: Sanity check wMaxPacketSize for SuperSpeed
+
+From: Roger Quadros <rogerq@ti.com>
+
+
+[ Upstream commit 16bb05d98c904a4f6c5ce7e2d992299f794acbf2 ]
+
+As per USB3.0 Specification "Table 9-20. Standard Endpoint Descriptor",
+for interrupt and isochronous endpoints, wMaxPacketSize must be set to
+1024 if the endpoint defines bMaxBurst to be greater than zero.
+
+Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
+Signed-off-by: Roger Quadros <rogerq@ti.com>
+Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
+Signed-off-by: Sasha Levin <alexander.levin@verizon.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/usb/gadget/function/f_uvc.c |    8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+--- a/drivers/usb/gadget/function/f_uvc.c
++++ b/drivers/usb/gadget/function/f_uvc.c
+@@ -594,6 +594,14 @@ uvc_function_bind(struct usb_configurati
+       opts->streaming_maxpacket = clamp(opts->streaming_maxpacket, 1U, 3072U);
+       opts->streaming_maxburst = min(opts->streaming_maxburst, 15U);
+ 
++      /* For SS, wMaxPacketSize has to be 1024 if bMaxBurst is not 0 */
++      if (opts->streaming_maxburst &&
++          (opts->streaming_maxpacket % 1024) != 0) {
++              opts->streaming_maxpacket = roundup(opts->streaming_maxpacket, 1024);
++              INFO(cdev, "overriding streaming_maxpacket to %d\n",
++                   opts->streaming_maxpacket);
++      }
++
+       /* Fill in the FS/HS/SS Video Streaming specific descriptors from the
+        * module parameters.
+        *
diff --git a/queue-4.9/usb-gadget-udc-remove-pointer-dereference-after-free.patch b/queue-4.9/usb-gadget-udc-remove-pointer-dereference-after-free.patch

new file mode 100644 (file)

index 0000000..85f87d9
--- /dev/null
+++ b/queue-4.9/usb-gadget-udc-remove-pointer-dereference-after-free.patch
@@ -0,0 +1,32 @@
+From foo@baz Thu Dec 21 09:02:40 CET 2017
+From: "Gustavo A. R. Silva" <garsilva@embeddedor.com>
+Date: Fri, 10 Mar 2017 15:39:32 -0600
+Subject: usb: gadget: udc: remove pointer dereference after free
+
+From: "Gustavo A. R. Silva" <garsilva@embeddedor.com>
+
+
+[ Upstream commit 1f459262b0e1649a1e5ad12fa4c66eb76c2220ce ]
+
+Remove pointer dereference after free.
+
+Addresses-Coverity-ID: 1091173
+Acked-by: Michal Nazarewicz <mina86@mina86.com>
+Signed-off-by: Gustavo A. R. Silva <garsilva@embeddedor.com>
+Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
+Signed-off-by: Sasha Levin <alexander.levin@verizon.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/usb/gadget/udc/pch_udc.c |    1 -
+ 1 file changed, 1 deletion(-)
+
+--- a/drivers/usb/gadget/udc/pch_udc.c
++++ b/drivers/usb/gadget/udc/pch_udc.c
+@@ -1523,7 +1523,6 @@ static void pch_udc_free_dma_chain(struc
+               td = phys_to_virt(addr);
+               addr2 = (dma_addr_t)td->next;
+               pci_pool_free(dev->data_requests, td, addr);
+-              td->next = 0x00;
+               addr = addr2;
+       }
+       req->chain_len = 1;
diff --git a/queue-4.9/vfio-pci-virtualize-maximum-payload-size.patch b/queue-4.9/vfio-pci-virtualize-maximum-payload-size.patch

new file mode 100644 (file)

index 0000000..0e2196d
--- /dev/null
+++ b/queue-4.9/vfio-pci-virtualize-maximum-payload-size.patch
@@ -0,0 +1,47 @@
+From foo@baz Thu Dec 21 09:02:40 CET 2017
+From: Alex Williamson <alex.williamson@redhat.com>
+Date: Mon, 2 Oct 2017 12:39:09 -0600
+Subject: vfio/pci: Virtualize Maximum Payload Size
+
+From: Alex Williamson <alex.williamson@redhat.com>
+
+
+[ Upstream commit 523184972b282cd9ca17a76f6ca4742394856818 ]
+
+With virtual PCI-Express chipsets, we now see userspace/guest drivers
+trying to match the physical MPS setting to a virtual downstream port.
+Of course a lone physical device surrounded by virtual interconnects
+cannot make a correct decision for a proper MPS setting.  Instead,
+let's virtualize the MPS control register so that writes through to
+hardware are disallowed.  Userspace drivers like QEMU assume they can
+write anything to the device and we'll filter out anything dangerous.
+Since mismatched MPS can lead to AER and other faults, let's add it
+to the kernel side rather than relying on userspace virtualization to
+handle it.
+
+Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
+Reviewed-by: Eric Auger <eric.auger@redhat.com>
+Signed-off-by: Sasha Levin <alexander.levin@verizon.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/vfio/pci/vfio_pci_config.c |    6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+--- a/drivers/vfio/pci/vfio_pci_config.c
++++ b/drivers/vfio/pci/vfio_pci_config.c
+@@ -851,11 +851,13 @@ static int __init init_pci_cap_exp_perm(
+ 
+       /*
+        * Allow writes to device control fields, except devctl_phantom,
+-       * which could confuse IOMMU, and the ARI bit in devctl2, which
++       * which could confuse IOMMU, MPS, which can break communication
++       * with other physical devices, and the ARI bit in devctl2, which
+        * is set at probe time.  FLR gets virtualized via our writefn.
+        */
+       p_setw(perm, PCI_EXP_DEVCTL,
+-             PCI_EXP_DEVCTL_BCR_FLR, ~PCI_EXP_DEVCTL_PHANTOM);
++             PCI_EXP_DEVCTL_BCR_FLR | PCI_EXP_DEVCTL_PAYLOAD,
++             ~PCI_EXP_DEVCTL_PHANTOM);
+       p_setw(perm, PCI_EXP_DEVCTL2, NO_VIRT, ~PCI_EXP_DEVCTL2_ARI);
+       return 0;
+ }
diff --git a/queue-4.9/vhost-vsock-add-pkt-cancel-capability.patch b/queue-4.9/vhost-vsock-add-pkt-cancel-capability.patch

new file mode 100644 (file)

index 0000000..7f5dd32
--- /dev/null
+++ b/queue-4.9/vhost-vsock-add-pkt-cancel-capability.patch
@@ -0,0 +1,92 @@
+From foo@baz Thu Dec 21 09:02:40 CET 2017
+From: Peng Tao <bergwolf@gmail.com>
+Date: Wed, 15 Mar 2017 09:32:15 +0800
+Subject: vhost-vsock: add pkt cancel capability
+
+From: Peng Tao <bergwolf@gmail.com>
+
+
+[ Upstream commit 16320f363ae128d9b9c70e60f00f2a572f57c23d ]
+
+To allow canceling all packets of a connection.
+
+Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
+Reviewed-by: Jorgen Hansen <jhansen@vmware.com>
+Signed-off-by: Peng Tao <bergwolf@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <alexander.levin@verizon.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/vhost/vsock.c  |   41 +++++++++++++++++++++++++++++++++++++++++
+ include/net/af_vsock.h |    3 +++
+ 2 files changed, 44 insertions(+)
+
+--- a/drivers/vhost/vsock.c
++++ b/drivers/vhost/vsock.c
+@@ -218,6 +218,46 @@ vhost_transport_send_pkt(struct virtio_v
+       return len;
+ }
+ 
++static int
++vhost_transport_cancel_pkt(struct vsock_sock *vsk)
++{
++      struct vhost_vsock *vsock;
++      struct virtio_vsock_pkt *pkt, *n;
++      int cnt = 0;
++      LIST_HEAD(freeme);
++
++      /* Find the vhost_vsock according to guest context id  */
++      vsock = vhost_vsock_get(vsk->remote_addr.svm_cid);
++      if (!vsock)
++              return -ENODEV;
++
++      spin_lock_bh(&vsock->send_pkt_list_lock);
++      list_for_each_entry_safe(pkt, n, &vsock->send_pkt_list, list) {
++              if (pkt->vsk != vsk)
++                      continue;
++              list_move(&pkt->list, &freeme);
++      }
++      spin_unlock_bh(&vsock->send_pkt_list_lock);
++
++      list_for_each_entry_safe(pkt, n, &freeme, list) {
++              if (pkt->reply)
++                      cnt++;
++              list_del(&pkt->list);
++              virtio_transport_free_pkt(pkt);
++      }
++
++      if (cnt) {
++              struct vhost_virtqueue *tx_vq = &vsock->vqs[VSOCK_VQ_TX];
++              int new_cnt;
++
++              new_cnt = atomic_sub_return(cnt, &vsock->queued_replies);
++              if (new_cnt + cnt >= tx_vq->num && new_cnt < tx_vq->num)
++                      vhost_poll_queue(&tx_vq->poll);
++      }
++
++      return 0;
++}
++
+ static struct virtio_vsock_pkt *
+ vhost_vsock_alloc_pkt(struct vhost_virtqueue *vq,
+                     unsigned int out, unsigned int in)
+@@ -669,6 +709,7 @@ static struct virtio_transport vhost_tra
+               .release                  = virtio_transport_release,
+               .connect                  = virtio_transport_connect,
+               .shutdown                 = virtio_transport_shutdown,
++              .cancel_pkt               = vhost_transport_cancel_pkt,
+ 
+               .dgram_enqueue            = virtio_transport_dgram_enqueue,
+               .dgram_dequeue            = virtio_transport_dgram_dequeue,
+--- a/include/net/af_vsock.h
++++ b/include/net/af_vsock.h
+@@ -100,6 +100,9 @@ struct vsock_transport {
+       void (*destruct)(struct vsock_sock *);
+       void (*release)(struct vsock_sock *);
+ 
++      /* Cancel all pending packets sent on vsock. */
++      int (*cancel_pkt)(struct vsock_sock *vsk);
++
+       /* Connections. */
+       int (*connect)(struct vsock_sock *);
+ 
diff --git a/queue-4.9/virtio-balloon-use-actual-number-of-stats-for-stats-queue-buffers.patch b/queue-4.9/virtio-balloon-use-actual-number-of-stats-for-stats-queue-buffers.patch

new file mode 100644 (file)

index 0000000..d13f10e
--- /dev/null
+++ b/queue-4.9/virtio-balloon-use-actual-number-of-stats-for-stats-queue-buffers.patch
@@ -0,0 +1,91 @@
+From foo@baz Thu Dec 21 09:02:40 CET 2017
+From: Ladi Prosek <lprosek@redhat.com>
+Date: Tue, 28 Mar 2017 18:46:58 +0200
+Subject: virtio-balloon: use actual number of stats for stats queue buffers
+
+From: Ladi Prosek <lprosek@redhat.com>
+
+
+[ Upstream commit 9646b26e85896ef0256e66649f7937f774dc18a6 ]
+
+The virtio balloon driver contained a not-so-obvious invariant that
+update_balloon_stats has to update exactly VIRTIO_BALLOON_S_NR counters
+in order to send valid stats to the host. This commit fixes it by having
+update_balloon_stats return the actual number of counters, and its
+callers use it when pushing buffers to the stats virtqueue.
+
+Note that it is still out of spec to change the number of counters
+at run-time. "Driver MUST supply the same subset of statistics in all
+buffers submitted to the statsq."
+
+Suggested-by: Arnd Bergmann <arnd@arndb.de>
+Signed-off-by: Ladi Prosek <lprosek@redhat.com>
+Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
+Signed-off-by: Sasha Levin <alexander.levin@verizon.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/virtio/virtio_balloon.c |   17 ++++++++++-------
+ 1 file changed, 10 insertions(+), 7 deletions(-)
+
+--- a/drivers/virtio/virtio_balloon.c
++++ b/drivers/virtio/virtio_balloon.c
+@@ -241,11 +241,11 @@ static inline void update_stat(struct vi
+ 
+ #define pages_to_bytes(x) ((u64)(x) << PAGE_SHIFT)
+ 
+-static void update_balloon_stats(struct virtio_balloon *vb)
++static unsigned int update_balloon_stats(struct virtio_balloon *vb)
+ {
+       unsigned long events[NR_VM_EVENT_ITEMS];
+       struct sysinfo i;
+-      int idx = 0;
++      unsigned int idx = 0;
+       long available;
+ 
+       all_vm_events(events);
+@@ -265,6 +265,8 @@ static void update_balloon_stats(struct
+                               pages_to_bytes(i.totalram));
+       update_stat(vb, idx++, VIRTIO_BALLOON_S_AVAIL,
+                               pages_to_bytes(available));
++
++      return idx;
+ }
+ 
+ /*
+@@ -290,14 +292,14 @@ static void stats_handle_request(struct
+ {
+       struct virtqueue *vq;
+       struct scatterlist sg;
+-      unsigned int len;
++      unsigned int len, num_stats;
+ 
+-      update_balloon_stats(vb);
++      num_stats = update_balloon_stats(vb);
+ 
+       vq = vb->stats_vq;
+       if (!virtqueue_get_buf(vq, &len))
+               return;
+-      sg_init_one(&sg, vb->stats, sizeof(vb->stats));
++      sg_init_one(&sg, vb->stats, sizeof(vb->stats[0]) * num_stats);
+       virtqueue_add_outbuf(vq, &sg, 1, vb, GFP_KERNEL);
+       virtqueue_kick(vq);
+ }
+@@ -421,15 +423,16 @@ static int init_vqs(struct virtio_balloo
+       vb->deflate_vq = vqs[1];
+       if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) {
+               struct scatterlist sg;
++              unsigned int num_stats;
+               vb->stats_vq = vqs[2];
+ 
+               /*
+                * Prime this virtqueue with one buffer so the hypervisor can
+                * use it to signal us later (it can't be broken yet!).
+                */
+-              update_balloon_stats(vb);
++              num_stats = update_balloon_stats(vb);
+ 
+-              sg_init_one(&sg, vb->stats, sizeof vb->stats);
++              sg_init_one(&sg, vb->stats, sizeof(vb->stats[0]) * num_stats);
+               if (virtqueue_add_outbuf(vb->stats_vq, &sg, 1, vb, GFP_KERNEL)
+                   < 0)
+                       BUG();
diff --git a/queue-4.9/virtio_balloon-prevent-uninitialized-variable-use.patch b/queue-4.9/virtio_balloon-prevent-uninitialized-variable-use.patch

new file mode 100644 (file)

index 0000000..356fcfd
--- /dev/null
+++ b/queue-4.9/virtio_balloon-prevent-uninitialized-variable-use.patch
@@ -0,0 +1,50 @@
+From foo@baz Thu Dec 21 09:02:40 CET 2017
+From: Arnd Bergmann <arnd@arndb.de>
+Date: Tue, 28 Mar 2017 18:46:59 +0200
+Subject: virtio_balloon: prevent uninitialized variable use
+
+From: Arnd Bergmann <arnd@arndb.de>
+
+
+[ Upstream commit f0bb2d50dfcc519f06f901aac88502be6ff1df2c ]
+
+The latest gcc-7.0.1 snapshot reports a new warning:
+
+virtio/virtio_balloon.c: In function 'update_balloon_stats':
+virtio/virtio_balloon.c:258:26: error: 'events[2]' is used uninitialized in this function [-Werror=uninitialized]
+virtio/virtio_balloon.c:260:26: error: 'events[3]' is used uninitialized in this function [-Werror=uninitialized]
+virtio/virtio_balloon.c:261:56: error: 'events[18]' is used uninitialized in this function [-Werror=uninitialized]
+virtio/virtio_balloon.c:262:56: error: 'events[17]' is used uninitialized in this function [-Werror=uninitialized]
+
+This seems absolutely right, so we should add an extra check to
+prevent copying uninitialized stack data into the statistics.
+>From all I can tell, this has been broken since the statistics code
+was originally added in 2.6.34.
+
+Fixes: 9564e138b1f6 ("virtio: Add memory statistics reporting to the balloon driver (V4)")
+Signed-off-by: Arnd Bergmann <arnd@arndb.de>
+Signed-off-by: Ladi Prosek <lprosek@redhat.com>
+Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
+Signed-off-by: Sasha Levin <alexander.levin@verizon.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/virtio/virtio_balloon.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/drivers/virtio/virtio_balloon.c
++++ b/drivers/virtio/virtio_balloon.c
+@@ -253,12 +253,14 @@ static unsigned int update_balloon_stats
+ 
+       available = si_mem_available();
+ 
++#ifdef CONFIG_VM_EVENT_COUNTERS
+       update_stat(vb, idx++, VIRTIO_BALLOON_S_SWAP_IN,
+                               pages_to_bytes(events[PSWPIN]));
+       update_stat(vb, idx++, VIRTIO_BALLOON_S_SWAP_OUT,
+                               pages_to_bytes(events[PSWPOUT]));
+       update_stat(vb, idx++, VIRTIO_BALLOON_S_MAJFLT, events[PGMAJFAULT]);
+       update_stat(vb, idx++, VIRTIO_BALLOON_S_MINFLT, events[PGFAULT]);
++#endif
+       update_stat(vb, idx++, VIRTIO_BALLOON_S_MEMFREE,
+                               pages_to_bytes(i.freeram));
+       update_stat(vb, idx++, VIRTIO_BALLOON_S_MEMTOT,
diff --git a/queue-4.9/vsock-cancel-packets-when-failing-to-connect.patch b/queue-4.9/vsock-cancel-packets-when-failing-to-connect.patch

new file mode 100644 (file)

index 0000000..96d670c
--- /dev/null
+++ b/queue-4.9/vsock-cancel-packets-when-failing-to-connect.patch
@@ -0,0 +1,72 @@
+From foo@baz Thu Dec 21 09:02:40 CET 2017
+From: Peng Tao <bergwolf@gmail.com>
+Date: Wed, 15 Mar 2017 09:32:17 +0800
+Subject: vsock: cancel packets when failing to connect
+
+From: Peng Tao <bergwolf@gmail.com>
+
+
+[ Upstream commit 380feae0def7e6a115124a3219c3ec9b654dca32 ]
+
+Otherwise we'll leave the packets queued until releasing vsock device.
+E.g., if guest is slow to start up, resulting ETIMEDOUT on connect, guest
+will get the connect requests from failed host sockets.
+
+Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
+Reviewed-by: Jorgen Hansen <jhansen@vmware.com>
+Signed-off-by: Peng Tao <bergwolf@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <alexander.levin@verizon.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/vmw_vsock/af_vsock.c |   14 ++++++++++++++
+ 1 file changed, 14 insertions(+)
+
+--- a/net/vmw_vsock/af_vsock.c
++++ b/net/vmw_vsock/af_vsock.c
+@@ -1101,10 +1101,19 @@ static const struct proto_ops vsock_dgra
+       .sendpage = sock_no_sendpage,
+ };
+ 
++static int vsock_transport_cancel_pkt(struct vsock_sock *vsk)
++{
++      if (!transport->cancel_pkt)
++              return -EOPNOTSUPP;
++
++      return transport->cancel_pkt(vsk);
++}
++
+ static void vsock_connect_timeout(struct work_struct *work)
+ {
+       struct sock *sk;
+       struct vsock_sock *vsk;
++      int cancel = 0;
+ 
+       vsk = container_of(work, struct vsock_sock, dwork.work);
+       sk = sk_vsock(vsk);
+@@ -1115,8 +1124,11 @@ static void vsock_connect_timeout(struct
+               sk->sk_state = SS_UNCONNECTED;
+               sk->sk_err = ETIMEDOUT;
+               sk->sk_error_report(sk);
++              cancel = 1;
+       }
+       release_sock(sk);
++      if (cancel)
++              vsock_transport_cancel_pkt(vsk);
+ 
+       sock_put(sk);
+ }
+@@ -1223,11 +1235,13 @@ static int vsock_stream_connect(struct s
+                       err = sock_intr_errno(timeout);
+                       sk->sk_state = SS_UNCONNECTED;
+                       sock->state = SS_UNCONNECTED;
++                      vsock_transport_cancel_pkt(vsk);
+                       goto out_wait;
+               } else if (timeout == 0) {
+                       err = -ETIMEDOUT;
+                       sk->sk_state = SS_UNCONNECTED;
+                       sock->state = SS_UNCONNECTED;
++                      vsock_transport_cancel_pkt(vsk);
+                       goto out_wait;
+               }
+ 
diff --git a/queue-4.9/vsock-track-pkt-owner-vsock.patch b/queue-4.9/vsock-track-pkt-owner-vsock.patch

new file mode 100644 (file)

index 0000000..40a47fa
--- /dev/null
+++ b/queue-4.9/vsock-track-pkt-owner-vsock.patch
@@ -0,0 +1,98 @@
+From foo@baz Thu Dec 21 09:02:40 CET 2017
+From: Peng Tao <bergwolf@gmail.com>
+Date: Wed, 15 Mar 2017 09:32:14 +0800
+Subject: vsock: track pkt owner vsock
+
+From: Peng Tao <bergwolf@gmail.com>
+
+
+[ Upstream commit 36d277bac8080202684e67162ebb157f16631581 ]
+
+So that we can cancel a queued pkt later if necessary.
+
+Signed-off-by: Peng Tao <bergwolf@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <alexander.levin@verizon.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/virtio_vsock.h            |    3 +++
+ net/vmw_vsock/virtio_transport_common.c |    7 +++++++
+ 2 files changed, 10 insertions(+)
+
+--- a/include/linux/virtio_vsock.h
++++ b/include/linux/virtio_vsock.h
+@@ -48,6 +48,8 @@ struct virtio_vsock_pkt {
+       struct virtio_vsock_hdr hdr;
+       struct work_struct work;
+       struct list_head list;
++      /* socket refcnt not held, only use for cancellation */
++      struct vsock_sock *vsk;
+       void *buf;
+       u32 len;
+       u32 off;
+@@ -56,6 +58,7 @@ struct virtio_vsock_pkt {
+ 
+ struct virtio_vsock_pkt_info {
+       u32 remote_cid, remote_port;
++      struct vsock_sock *vsk;
+       struct msghdr *msg;
+       u32 pkt_len;
+       u16 type;
+--- a/net/vmw_vsock/virtio_transport_common.c
++++ b/net/vmw_vsock/virtio_transport_common.c
+@@ -57,6 +57,7 @@ virtio_transport_alloc_pkt(struct virtio
+       pkt->len                = len;
+       pkt->hdr.len            = cpu_to_le32(len);
+       pkt->reply              = info->reply;
++      pkt->vsk                = info->vsk;
+ 
+       if (info->msg && len > 0) {
+               pkt->buf = kmalloc(len, GFP_KERNEL);
+@@ -180,6 +181,7 @@ static int virtio_transport_send_credit_
+       struct virtio_vsock_pkt_info info = {
+               .op = VIRTIO_VSOCK_OP_CREDIT_UPDATE,
+               .type = type,
++              .vsk = vsk,
+       };
+ 
+       return virtio_transport_send_pkt_info(vsk, &info);
+@@ -519,6 +521,7 @@ int virtio_transport_connect(struct vsoc
+       struct virtio_vsock_pkt_info info = {
+               .op = VIRTIO_VSOCK_OP_REQUEST,
+               .type = VIRTIO_VSOCK_TYPE_STREAM,
++              .vsk = vsk,
+       };
+ 
+       return virtio_transport_send_pkt_info(vsk, &info);
+@@ -534,6 +537,7 @@ int virtio_transport_shutdown(struct vso
+                         VIRTIO_VSOCK_SHUTDOWN_RCV : 0) |
+                        (mode & SEND_SHUTDOWN ?
+                         VIRTIO_VSOCK_SHUTDOWN_SEND : 0),
++              .vsk = vsk,
+       };
+ 
+       return virtio_transport_send_pkt_info(vsk, &info);
+@@ -560,6 +564,7 @@ virtio_transport_stream_enqueue(struct v
+               .type = VIRTIO_VSOCK_TYPE_STREAM,
+               .msg = msg,
+               .pkt_len = len,
++              .vsk = vsk,
+       };
+ 
+       return virtio_transport_send_pkt_info(vsk, &info);
+@@ -581,6 +586,7 @@ static int virtio_transport_reset(struct
+               .op = VIRTIO_VSOCK_OP_RST,
+               .type = VIRTIO_VSOCK_TYPE_STREAM,
+               .reply = !!pkt,
++              .vsk = vsk,
+       };
+ 
+       /* Send RST only if the original pkt is not a RST pkt */
+@@ -826,6 +832,7 @@ virtio_transport_send_response(struct vs
+               .remote_cid = le64_to_cpu(pkt->hdr.src_cid),
+               .remote_port = le32_to_cpu(pkt->hdr.src_port),
+               .reply = true,
++              .vsk = vsk,
+       };
+ 
+       return virtio_transport_send_pkt_info(vsk, &info);
diff --git a/queue-4.9/x86-entry-unwind-create-stack-frames-for-saved-interrupt-registers.patch b/queue-4.9/x86-entry-unwind-create-stack-frames-for-saved-interrupt-registers.patch

new file mode 100644 (file)

index 0000000..16962a0
--- /dev/null
+++ b/queue-4.9/x86-entry-unwind-create-stack-frames-for-saved-interrupt-registers.patch
@@ -0,0 +1,430 @@
+From 946c191161cef10c667b5ee3179db1714fa5b7c0 Mon Sep 17 00:00:00 2001
+From: Josh Poimboeuf <jpoimboe@redhat.com>
+Date: Thu, 20 Oct 2016 11:34:40 -0500
+Subject: x86/entry/unwind: Create stack frames for saved interrupt registers
+
+From: Josh Poimboeuf <jpoimboe@redhat.com>
+
+commit 946c191161cef10c667b5ee3179db1714fa5b7c0 upstream.
+
+With frame pointers, when a task is interrupted, its stack is no longer
+completely reliable because the function could have been interrupted
+before it had a chance to save the previous frame pointer on the stack.
+So the caller of the interrupted function could get skipped by a stack
+trace.
+
+This is problematic for live patching, which needs to know whether a
+stack trace of a sleeping task can be relied upon.  There's currently no
+way to detect if a sleeping task was interrupted by a page fault
+exception or preemption before it went to sleep.
+
+Another issue is that when dumping the stack of an interrupted task, the
+unwinder has no way of knowing where the saved pt_regs registers are, so
+it can't print them.
+
+This solves those issues by encoding the pt_regs pointer in the frame
+pointer on entry from an interrupt or an exception.
+
+This patch also updates the unwinder to be able to decode it, because
+otherwise the unwinder would be broken by this change.
+
+Note that this causes a change in the behavior of the unwinder: each
+instance of a pt_regs on the stack is now considered a "frame".  So
+callers of unwind_get_return_address() will now get an occasional
+'regs->ip' address that would have previously been skipped over.
+
+Suggested-by: Andy Lutomirski <luto@amacapital.net>
+Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Link: http://lkml.kernel.org/r/8b9f84a21e39d249049e0547b559ff8da0df0988.1476973742.git.jpoimboe@redhat.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Eduardo Valentin <eduval@amazon.com>
+Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/entry/calling.h       |   20 ++++++++++
+ arch/x86/entry/entry_32.S      |   33 +++++++++++++++--
+ arch/x86/entry/entry_64.S      |   10 +++--
+ arch/x86/include/asm/unwind.h  |   16 ++++++++
+ arch/x86/kernel/unwind_frame.c |   76 ++++++++++++++++++++++++++++++++++++-----
+ 5 files changed, 139 insertions(+), 16 deletions(-)
+
+--- a/arch/x86/entry/calling.h
++++ b/arch/x86/entry/calling.h
+@@ -201,6 +201,26 @@ For 32-bit we have the following convent
+       .byte 0xf1
+       .endm
+ 
++/*
++ * This is a sneaky trick to help the unwinder find pt_regs on the stack.  The
++ * frame pointer is replaced with an encoded pointer to pt_regs.  The encoding
++ * is just setting the LSB, which makes it an invalid stack address and is also
++ * a signal to the unwinder that it's a pt_regs pointer in disguise.
++ *
++ * NOTE: This macro must be used *after* SAVE_EXTRA_REGS because it corrupts
++ * the original rbp.
++ */
++.macro ENCODE_FRAME_POINTER ptregs_offset=0
++#ifdef CONFIG_FRAME_POINTER
++      .if \ptregs_offset
++              leaq \ptregs_offset(%rsp), %rbp
++      .else
++              mov %rsp, %rbp
++      .endif
++      orq     $0x1, %rbp
++#endif
++.endm
++
+ #endif /* CONFIG_X86_64 */
+ 
+ /*
+--- a/arch/x86/entry/entry_32.S
++++ b/arch/x86/entry/entry_32.S
+@@ -175,6 +175,22 @@
+       SET_KERNEL_GS %edx
+ .endm
+ 
++/*
++ * This is a sneaky trick to help the unwinder find pt_regs on the stack.  The
++ * frame pointer is replaced with an encoded pointer to pt_regs.  The encoding
++ * is just setting the LSB, which makes it an invalid stack address and is also
++ * a signal to the unwinder that it's a pt_regs pointer in disguise.
++ *
++ * NOTE: This macro must be used *after* SAVE_ALL because it corrupts the
++ * original rbp.
++ */
++.macro ENCODE_FRAME_POINTER
++#ifdef CONFIG_FRAME_POINTER
++      mov %esp, %ebp
++      orl $0x1, %ebp
++#endif
++.endm
++
+ .macro RESTORE_INT_REGS
+       popl    %ebx
+       popl    %ecx
+@@ -624,6 +640,7 @@ common_interrupt:
+       ASM_CLAC
+       addl    $-0x80, (%esp)                  /* Adjust vector into the [-256, -1] range */
+       SAVE_ALL
++      ENCODE_FRAME_POINTER
+       TRACE_IRQS_OFF
+       movl    %esp, %eax
+       call    do_IRQ
+@@ -635,6 +652,7 @@ ENTRY(name)                                \
+       ASM_CLAC;                       \
+       pushl   $~(nr);                 \
+       SAVE_ALL;                       \
++      ENCODE_FRAME_POINTER;           \
+       TRACE_IRQS_OFF                  \
+       movl    %esp, %eax;             \
+       call    fn;                     \
+@@ -769,6 +787,7 @@ END(spurious_interrupt_bug)
+ ENTRY(xen_hypervisor_callback)
+       pushl   $-1                             /* orig_ax = -1 => not a system call */
+       SAVE_ALL
++      ENCODE_FRAME_POINTER
+       TRACE_IRQS_OFF
+ 
+       /*
+@@ -823,6 +842,7 @@ ENTRY(xen_failsafe_callback)
+       jmp     iret_exc
+ 5:    pushl   $-1                             /* orig_ax = -1 => not a system call */
+       SAVE_ALL
++      ENCODE_FRAME_POINTER
+       jmp     ret_from_exception
+ 
+ .section .fixup, "ax"
+@@ -1047,6 +1067,7 @@ error_code:
+       pushl   %edx
+       pushl   %ecx
+       pushl   %ebx
++      ENCODE_FRAME_POINTER
+       cld
+       movl    $(__KERNEL_PERCPU), %ecx
+       movl    %ecx, %fs
+@@ -1079,6 +1100,7 @@ ENTRY(debug)
+       ASM_CLAC
+       pushl   $-1                             # mark this as an int
+       SAVE_ALL
++      ENCODE_FRAME_POINTER
+       xorl    %edx, %edx                      # error code 0
+       movl    %esp, %eax                      # pt_regs pointer
+ 
+@@ -1094,11 +1116,11 @@ ENTRY(debug)
+ 
+ .Ldebug_from_sysenter_stack:
+       /* We're on the SYSENTER stack.  Switch off. */
+-      movl    %esp, %ebp
++      movl    %esp, %ebx
+       movl    PER_CPU_VAR(cpu_current_top_of_stack), %esp
+       TRACE_IRQS_OFF
+       call    do_debug
+-      movl    %ebp, %esp
++      movl    %ebx, %esp
+       jmp     ret_from_exception
+ END(debug)
+ 
+@@ -1121,6 +1143,7 @@ ENTRY(nmi)
+ 
+       pushl   %eax                            # pt_regs->orig_ax
+       SAVE_ALL
++      ENCODE_FRAME_POINTER
+       xorl    %edx, %edx                      # zero error code
+       movl    %esp, %eax                      # pt_regs pointer
+ 
+@@ -1139,10 +1162,10 @@ ENTRY(nmi)
+        * We're on the SYSENTER stack.  Switch off.  No one (not even debug)
+        * is using the thread stack right now, so it's safe for us to use it.
+        */
+-      movl    %esp, %ebp
++      movl    %esp, %ebx
+       movl    PER_CPU_VAR(cpu_current_top_of_stack), %esp
+       call    do_nmi
+-      movl    %ebp, %esp
++      movl    %ebx, %esp
+       jmp     restore_all_notrace
+ 
+ #ifdef CONFIG_X86_ESPFIX32
+@@ -1159,6 +1182,7 @@ nmi_espfix_stack:
+       .endr
+       pushl   %eax
+       SAVE_ALL
++      ENCODE_FRAME_POINTER
+       FIXUP_ESPFIX_STACK                      # %eax == %esp
+       xorl    %edx, %edx                      # zero error code
+       call    do_nmi
+@@ -1172,6 +1196,7 @@ ENTRY(int3)
+       ASM_CLAC
+       pushl   $-1                             # mark this as an int
+       SAVE_ALL
++      ENCODE_FRAME_POINTER
+       TRACE_IRQS_OFF
+       xorl    %edx, %edx                      # zero error code
+       movl    %esp, %eax                      # pt_regs pointer
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -469,6 +469,7 @@ END(irq_entries_start)
+       ALLOC_PT_GPREGS_ON_STACK
+       SAVE_C_REGS
+       SAVE_EXTRA_REGS
++      ENCODE_FRAME_POINTER
+ 
+       testb   $3, CS(%rsp)
+       jz      1f
+@@ -985,6 +986,7 @@ ENTRY(xen_failsafe_callback)
+       ALLOC_PT_GPREGS_ON_STACK
+       SAVE_C_REGS
+       SAVE_EXTRA_REGS
++      ENCODE_FRAME_POINTER
+       jmp     error_exit
+ END(xen_failsafe_callback)
+ 
+@@ -1028,6 +1030,7 @@ ENTRY(paranoid_entry)
+       cld
+       SAVE_C_REGS 8
+       SAVE_EXTRA_REGS 8
++      ENCODE_FRAME_POINTER 8
+       movl    $1, %ebx
+       movl    $MSR_GS_BASE, %ecx
+       rdmsr
+@@ -1075,6 +1078,7 @@ ENTRY(error_entry)
+       cld
+       SAVE_C_REGS 8
+       SAVE_EXTRA_REGS 8
++      ENCODE_FRAME_POINTER 8
+       xorl    %ebx, %ebx
+       testb   $3, CS+8(%rsp)
+       jz      .Lerror_kernelspace
+@@ -1259,6 +1263,7 @@ ENTRY(nmi)
+       pushq   %r13            /* pt_regs->r13 */
+       pushq   %r14            /* pt_regs->r14 */
+       pushq   %r15            /* pt_regs->r15 */
++      ENCODE_FRAME_POINTER
+ 
+       /*
+        * At this point we no longer need to worry about stack damage
+@@ -1272,11 +1277,10 @@ ENTRY(nmi)
+ 
+       /*
+        * Return back to user mode.  We must *not* do the normal exit
+-       * work, because we don't want to enable interrupts.  Fortunately,
+-       * do_nmi doesn't modify pt_regs.
++       * work, because we don't want to enable interrupts.
+        */
+       SWAPGS
+-      jmp     restore_c_regs_and_iret
++      jmp     restore_regs_and_iret
+ 
+ .Lnmi_from_kernel:
+       /*
+--- a/arch/x86/include/asm/unwind.h
++++ b/arch/x86/include/asm/unwind.h
+@@ -13,6 +13,7 @@ struct unwind_state {
+       int graph_idx;
+ #ifdef CONFIG_FRAME_POINTER
+       unsigned long *bp;
++      struct pt_regs *regs;
+ #else
+       unsigned long *sp;
+ #endif
+@@ -47,7 +48,15 @@ unsigned long *unwind_get_return_address
+       if (unwind_done(state))
+               return NULL;
+ 
+-      return state->bp + 1;
++      return state->regs ? &state->regs->ip : state->bp + 1;
++}
++
++static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state)
++{
++      if (unwind_done(state))
++              return NULL;
++
++      return state->regs;
+ }
+ 
+ #else /* !CONFIG_FRAME_POINTER */
+@@ -57,6 +66,11 @@ unsigned long *unwind_get_return_address
+ {
+       return NULL;
+ }
++
++static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state)
++{
++      return NULL;
++}
+ 
+ #endif /* CONFIG_FRAME_POINTER */
+ 
+--- a/arch/x86/kernel/unwind_frame.c
++++ b/arch/x86/kernel/unwind_frame.c
+@@ -14,6 +14,9 @@ unsigned long unwind_get_return_address(
+       if (unwind_done(state))
+               return 0;
+ 
++      if (state->regs && user_mode(state->regs))
++              return 0;
++
+       addr = ftrace_graph_ret_addr(state->task, &state->graph_idx, *addr_p,
+                                    addr_p);
+ 
+@@ -21,6 +24,20 @@ unsigned long unwind_get_return_address(
+ }
+ EXPORT_SYMBOL_GPL(unwind_get_return_address);
+ 
++/*
++ * This determines if the frame pointer actually contains an encoded pointer to
++ * pt_regs on the stack.  See ENCODE_FRAME_POINTER.
++ */
++static struct pt_regs *decode_frame_pointer(unsigned long *bp)
++{
++      unsigned long regs = (unsigned long)bp;
++
++      if (!(regs & 0x1))
++              return NULL;
++
++      return (struct pt_regs *)(regs & ~0x1);
++}
++
+ static bool update_stack_state(struct unwind_state *state, void *addr,
+                              size_t len)
+ {
+@@ -43,26 +60,59 @@ static bool update_stack_state(struct un
+ 
+ bool unwind_next_frame(struct unwind_state *state)
+ {
+-      unsigned long *next_bp;
++      struct pt_regs *regs;
++      unsigned long *next_bp, *next_frame;
++      size_t next_len;
+ 
+       if (unwind_done(state))
+               return false;
+ 
+-      next_bp = (unsigned long *)*state->bp;
++      /* have we reached the end? */
++      if (state->regs && user_mode(state->regs))
++              goto the_end;
++
++      /* get the next frame pointer */
++      if (state->regs)
++              next_bp = (unsigned long *)state->regs->bp;
++      else
++              next_bp = (unsigned long *)*state->bp;
++
++      /* is the next frame pointer an encoded pointer to pt_regs? */
++      regs = decode_frame_pointer(next_bp);
++      if (regs) {
++              next_frame = (unsigned long *)regs;
++              next_len = sizeof(*regs);
++      } else {
++              next_frame = next_bp;
++              next_len = FRAME_HEADER_SIZE;
++      }
+ 
+       /* make sure the next frame's data is accessible */
+-      if (!update_stack_state(state, next_bp, FRAME_HEADER_SIZE))
++      if (!update_stack_state(state, next_frame, next_len))
+               return false;
+-
+       /* move to the next frame */
+-      state->bp = next_bp;
++      if (regs) {
++              state->regs = regs;
++              state->bp = NULL;
++      } else {
++              state->bp = next_bp;
++              state->regs = NULL;
++      }
++
+       return true;
++
++the_end:
++      state->stack_info.type = STACK_TYPE_UNKNOWN;
++      return false;
+ }
+ EXPORT_SYMBOL_GPL(unwind_next_frame);
+ 
+ void __unwind_start(struct unwind_state *state, struct task_struct *task,
+                   struct pt_regs *regs, unsigned long *first_frame)
+ {
++      unsigned long *bp, *frame;
++      size_t len;
++
+       memset(state, 0, sizeof(*state));
+       state->task = task;
+ 
+@@ -73,12 +123,22 @@ void __unwind_start(struct unwind_state
+       }
+ 
+       /* set up the starting stack frame */
+-      state->bp = get_frame_pointer(task, regs);
++      bp = get_frame_pointer(task, regs);
++      regs = decode_frame_pointer(bp);
++      if (regs) {
++              state->regs = regs;
++              frame = (unsigned long *)regs;
++              len = sizeof(*regs);
++      } else {
++              state->bp = bp;
++              frame = bp;
++              len = FRAME_HEADER_SIZE;
++      }
+ 
+       /* initialize stack info and make sure the frame data is accessible */
+-      get_stack_info(state->bp, state->task, &state->stack_info,
++      get_stack_info(frame, state->task, &state->stack_info,
+                      &state->stack_mask);
+-      update_stack_state(state, state->bp, FRAME_HEADER_SIZE);
++      update_stack_state(state, frame, len);
+ 
+       /*
+        * The caller can provide the address of the first frame directly
diff --git a/queue-4.9/x86-kvm-vmx-defer-tr-reload-after-vm-exit.patch b/queue-4.9/x86-kvm-vmx-defer-tr-reload-after-vm-exit.patch

new file mode 100644 (file)

index 0000000..340ab94
--- /dev/null
+++ b/queue-4.9/x86-kvm-vmx-defer-tr-reload-after-vm-exit.patch
@@ -0,0 +1,211 @@
+From b7ffc44d5b2ea163899d09289ca7743d5c32e926 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Mon, 20 Feb 2017 08:56:14 -0800
+Subject: x86/kvm/vmx: Defer TR reload after VM exit
+
+From: Andy Lutomirski <luto@kernel.org>
+
+commit b7ffc44d5b2ea163899d09289ca7743d5c32e926 upstream.
+
+Intel's VMX is daft and resets the hidden TSS limit register to 0x67
+on VMX reload, and the 0x67 is not configurable.  KVM currently
+reloads TR using the LTR instruction on every exit, but this is quite
+slow because LTR is serializing.
+
+The 0x67 limit is entirely harmless unless ioperm() is in use, so
+defer the reload until a task using ioperm() is actually running.
+
+Here's some poorly done benchmarking using kvm-unit-tests:
+
+Before:
+
+cpuid 1313
+vmcall 1195
+mov_from_cr8 11
+mov_to_cr8 17
+inl_from_pmtimer 6770
+inl_from_qemu 6856
+inl_from_kernel 2435
+outl_to_kernel 1402
+
+After:
+
+cpuid 1291
+vmcall 1181
+mov_from_cr8 11
+mov_to_cr8 16
+inl_from_pmtimer 6457
+inl_from_qemu 6209
+inl_from_kernel 2339
+outl_to_kernel 1391
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+[Force-reload TR in invalidate_tss_limit. - Paolo]
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Eduardo Valentin <eduval@amazon.com>
+Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/desc.h |   48 ++++++++++++++++++++++++++++++++++++++++++++
+ arch/x86/kernel/ioport.c    |    5 ++++
+ arch/x86/kernel/process.c   |   10 +++++++++
+ arch/x86/kvm/vmx.c          |   23 ++++++++-------------
+ 4 files changed, 72 insertions(+), 14 deletions(-)
+
+--- a/arch/x86/include/asm/desc.h
++++ b/arch/x86/include/asm/desc.h
+@@ -213,6 +213,54 @@ static inline void native_load_tr_desc(v
+       asm volatile("ltr %w0"::"q" (GDT_ENTRY_TSS*8));
+ }
+ 
++static inline void force_reload_TR(void)
++{
++      struct desc_struct *d = get_cpu_gdt_table(smp_processor_id());
++      tss_desc tss;
++
++      memcpy(&tss, &d[GDT_ENTRY_TSS], sizeof(tss_desc));
++
++      /*
++       * LTR requires an available TSS, and the TSS is currently
++       * busy.  Make it be available so that LTR will work.
++       */
++      tss.type = DESC_TSS;
++      write_gdt_entry(d, GDT_ENTRY_TSS, &tss, DESC_TSS);
++
++      load_TR_desc();
++}
++
++DECLARE_PER_CPU(bool, need_tr_refresh);
++
++static inline void refresh_TR(void)
++{
++      WARN_ON(preemptible());
++
++      if (unlikely(this_cpu_read(need_tr_refresh))) {
++              force_reload_TR();
++              this_cpu_write(need_tr_refresh, false);
++      }
++}
++
++/*
++ * If you do something evil that corrupts the cached TSS limit (I'm looking
++ * at you, VMX exits), call this function.
++ *
++ * The optimization here is that the TSS limit only matters for Linux if the
++ * IO bitmap is in use.  If the TSS limit gets forced to its minimum value,
++ * everything works except that IO bitmap will be ignored and all CPL 3 IO
++ * instructions will #GP, which is exactly what we want for normal tasks.
++ */
++static inline void invalidate_tss_limit(void)
++{
++      WARN_ON(preemptible());
++
++      if (unlikely(test_thread_flag(TIF_IO_BITMAP)))
++              force_reload_TR();
++      else
++              this_cpu_write(need_tr_refresh, true);
++}
++
+ static inline void native_load_gdt(const struct desc_ptr *dtr)
+ {
+       asm volatile("lgdt %0"::"m" (*dtr));
+--- a/arch/x86/kernel/ioport.c
++++ b/arch/x86/kernel/ioport.c
+@@ -16,6 +16,7 @@
+ #include <linux/syscalls.h>
+ #include <linux/bitmap.h>
+ #include <asm/syscalls.h>
++#include <asm/desc.h>
+ 
+ /*
+  * this changes the io permissions bitmap in the current task.
+@@ -45,6 +46,10 @@ asmlinkage long sys_ioperm(unsigned long
+               memset(bitmap, 0xff, IO_BITMAP_BYTES);
+               t->io_bitmap_ptr = bitmap;
+               set_thread_flag(TIF_IO_BITMAP);
++
++              preempt_disable();
++              refresh_TR();
++              preempt_enable();
+       }
+ 
+       /*
+--- a/arch/x86/kernel/process.c
++++ b/arch/x86/kernel/process.c
+@@ -33,6 +33,7 @@
+ #include <asm/mce.h>
+ #include <asm/vm86.h>
+ #include <asm/switch_to.h>
++#include <asm/desc.h>
+ 
+ /*
+  * per-CPU TSS segments. Threads are completely 'soft' on Linux,
+@@ -82,6 +83,9 @@ void idle_notifier_unregister(struct not
+ EXPORT_SYMBOL_GPL(idle_notifier_unregister);
+ #endif
+ 
++DEFINE_PER_CPU(bool, need_tr_refresh);
++EXPORT_PER_CPU_SYMBOL_GPL(need_tr_refresh);
++
+ /*
+  * this gets called so that we can store lazy state into memory and copy the
+  * current task into the new thread.
+@@ -227,6 +231,12 @@ void __switch_to_xtra(struct task_struct
+                */
+               memcpy(tss->io_bitmap, next->io_bitmap_ptr,
+                      max(prev->io_bitmap_max, next->io_bitmap_max));
++
++              /*
++               * Make sure that the TSS limit is correct for the CPU
++               * to notice the IO bitmap.
++               */
++              refresh_TR();
+       } else if (test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) {
+               /*
+                * Clear any possible leftover bits:
+--- a/arch/x86/kvm/vmx.c
++++ b/arch/x86/kvm/vmx.c
+@@ -1959,19 +1959,6 @@ static void add_atomic_switch_msr(struct
+       m->host[i].value = host_val;
+ }
+ 
+-static void reload_tss(void)
+-{
+-      /*
+-       * VT restores TR but not its size.  Useless.
+-       */
+-      struct desc_ptr *gdt = this_cpu_ptr(&host_gdt);
+-      struct desc_struct *descs;
+-
+-      descs = (void *)gdt->address;
+-      descs[GDT_ENTRY_TSS].type = 9; /* available TSS */
+-      load_TR_desc();
+-}
+-
+ static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset)
+ {
+       u64 guest_efer = vmx->vcpu.arch.efer;
+@@ -2141,7 +2128,7 @@ static void __vmx_load_host_state(struct
+               loadsegment(es, vmx->host_state.es_sel);
+       }
+ #endif
+-      reload_tss();
++      invalidate_tss_limit();
+ #ifdef CONFIG_X86_64
+       wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base);
+ #endif
+@@ -2265,6 +2252,14 @@ static void vmx_vcpu_load(struct kvm_vcp
+               vmcs_writel(HOST_TR_BASE, kvm_read_tr_base()); /* 22.2.4 */
+               vmcs_writel(HOST_GDTR_BASE, gdt->address);   /* 22.2.4 */
+ 
++              /*
++               * VM exits change the host TR limit to 0x67 after a VM
++               * exit.  This is okay, since 0x67 covers everything except
++               * the IO bitmap and have have code to handle the IO bitmap
++               * being lost after a VM exit.
++               */
++              BUILD_BUG_ON(IO_BITMAP_OFFSET - 1 != 0x67);
++
+               rdmsrl(MSR_IA32_SYSENTER_ESP, sysenter_esp);
+               vmcs_writel(HOST_IA32_SYSENTER_ESP, sysenter_esp); /* 22.2.3 */
+ 
diff --git a/queue-4.9/x86-kvm-vmx-remove-unused-variable-in-segment_base.patch b/queue-4.9/x86-kvm-vmx-remove-unused-variable-in-segment_base.patch

new file mode 100644 (file)

index 0000000..cfcf512
--- /dev/null
+++ b/queue-4.9/x86-kvm-vmx-remove-unused-variable-in-segment_base.patch
@@ -0,0 +1,35 @@
+From 0fce546f9f07b94ccc9de09cf48d35e18946d2fa Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?J=C3=A9r=C3=A9my=20Lefaure?= <jeremy.lefaure@lse.epita.fr>
+Date: Sat, 25 Feb 2017 17:46:53 -0500
+Subject: x86/kvm/vmx: remove unused variable in segment_base()
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Jérémy Lefaure <jeremy.lefaure@lse.epita.fr>
+
+commit 0fce546f9f07b94ccc9de09cf48d35e18946d2fa upstream.
+
+The pointer 'struct desc_struct *d' is unused since commit 8c2e41f7ae12
+("x86/kvm/vmx: Simplify segment_base()") so let's remove it.
+
+Signed-off-by: Jérémy Lefaure <jeremy.lefaure@lse.epita.fr>
+Reviewed-by: David Hildenbrand <david@redhat.com>
+Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
+Signed-off-by: Eduardo Valentin <eduval@amazon.com>
+Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/vmx.c |    1 -
+ 1 file changed, 1 deletion(-)
+
+--- a/arch/x86/kvm/vmx.c
++++ b/arch/x86/kvm/vmx.c
+@@ -2016,7 +2016,6 @@ static bool update_transition_efer(struc
+ static unsigned long segment_base(u16 selector)
+ {
+       struct desc_ptr *gdt = this_cpu_ptr(&host_gdt);
+-      struct desc_struct *d;
+       struct desc_struct *table;
+       unsigned long v;
+ 
diff --git a/queue-4.9/x86-kvm-vmx-simplify-segment_base.patch b/queue-4.9/x86-kvm-vmx-simplify-segment_base.patch

new file mode 100644 (file)

index 0000000..ab2b7b7
--- /dev/null
+++ b/queue-4.9/x86-kvm-vmx-simplify-segment_base.patch
@@ -0,0 +1,72 @@
+From 8c2e41f7ae1234c192ef497472ad306227c77c03 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Mon, 20 Feb 2017 08:56:12 -0800
+Subject: x86/kvm/vmx: Simplify segment_base()
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Andy Lutomirski <luto@kernel.org>
+
+commit 8c2e41f7ae1234c192ef497472ad306227c77c03 upstream.
+
+Use actual pointer types for pointers (instead of unsigned long) and
+replace hardcoded constants with the appropriate self-documenting
+macros.
+
+The function is still a bit messy, but this seems a lot better than
+before to me.
+
+This is mostly borrowed from a patch by Thomas Garnier.
+
+Cc: Thomas Garnier <thgarnie@google.com>
+Cc: Jim Mattson <jmattson@google.com>
+Cc: Radim Krčmář <rkrcmar@redhat.com>
+Cc: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Eduardo Valentin <eduval@amazon.com>
+Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/vmx.c |   19 +++++++------------
+ 1 file changed, 7 insertions(+), 12 deletions(-)
+
+--- a/arch/x86/kvm/vmx.c
++++ b/arch/x86/kvm/vmx.c
+@@ -2030,28 +2030,23 @@ static unsigned long segment_base(u16 se
+ {
+       struct desc_ptr *gdt = this_cpu_ptr(&host_gdt);
+       struct desc_struct *d;
+-      unsigned long table_base;
++      struct desc_struct *table;
+       unsigned long v;
+ 
+-      if (!(selector & ~3))
++      if (!(selector & ~SEGMENT_RPL_MASK))
+               return 0;
+ 
+-      table_base = gdt->address;
++      table = (struct desc_struct *)gdt->address;
+ 
+-      if (selector & 4) {           /* from ldt */
++      if ((selector & SEGMENT_TI_MASK) == SEGMENT_LDT) {
+               u16 ldt_selector = kvm_read_ldt();
+ 
+-              if (!(ldt_selector & ~3))
++              if (!(ldt_selector & ~SEGMENT_RPL_MASK))
+                       return 0;
+ 
+-              table_base = segment_base(ldt_selector);
++              table = (struct desc_struct *)segment_base(ldt_selector);
+       }
+-      d = (struct desc_struct *)(table_base + (selector & ~7));
+-      v = get_desc_base(d);
+-#ifdef CONFIG_X86_64
+-       if (d->s == 0 && (d->type == 2 || d->type == 9 || d->type == 11))
+-               v |= ((unsigned long)((struct ldttss_desc64 *)d)->base3) << 32;
+-#endif
++      v = get_desc_base(&table[selector >> 3]);
+       return v;
+ }
+ 
diff --git a/queue-4.9/x86-mm-be-more-consistent-wrt-page_shift-vs-page_size-in-tlb-flush-code.patch b/queue-4.9/x86-mm-be-more-consistent-wrt-page_shift-vs-page_size-in-tlb-flush-code.patch

new file mode 100644 (file)

index 0000000..472a6d5
--- /dev/null
+++ b/queue-4.9/x86-mm-be-more-consistent-wrt-page_shift-vs-page_size-in-tlb-flush-code.patch
@@ -0,0 +1,57 @@
+From be4ffc0d787fafb22b89a2f29e71fea3b119205e Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Sun, 28 May 2017 10:00:16 -0700
+Subject: x86/mm: Be more consistent wrt PAGE_SHIFT vs PAGE_SIZE in tlb flush code
+
+From: Andy Lutomirski <luto@kernel.org>
+
+commit be4ffc0d787fafb22b89a2f29e71fea3b119205e upstream.
+
+Nadav pointed out that some code used PAGE_SIZE and other code used
+PAGE_SHIFT.  Use PAGE_SHIFT instead of multiplying or dividing by
+PAGE_SIZE.
+
+Requested-by: Nadav Amit <nadav.amit@gmail.com>
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Arjan van de Ven <arjan@linux.intel.com>
+Cc: Borislav Petkov <bpetkov@suse.de>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Mel Gorman <mgorman@suse.de>
+Cc: Michal Hocko <mhocko@suse.com>
+Cc: Nadav Amit <nadav.amit@gmail.com>
+Cc: Nadav Amit <namit@vmware.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: linux-mm@kvack.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Eduardo Valentin <eduval@amazon.com>
+Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/mm/tlb.c |    5 ++---
+ 1 file changed, 2 insertions(+), 3 deletions(-)
+
+--- a/arch/x86/mm/tlb.c
++++ b/arch/x86/mm/tlb.c
+@@ -220,8 +220,7 @@ static void flush_tlb_func_common(const
+               trace_tlb_flush(reason, TLB_FLUSH_ALL);
+       } else {
+               unsigned long addr;
+-              unsigned long nr_pages =
+-                      (f->end - f->start) / PAGE_SIZE;
++              unsigned long nr_pages = (f->end - f->start) >> PAGE_SHIFT;
+               addr = f->start;
+               while (addr < f->end) {
+                       __flush_tlb_single(addr);
+@@ -374,7 +373,7 @@ void flush_tlb_kernel_range(unsigned lon
+ 
+       /* Balance as user space task's flush, a bit conservative */
+       if (end == TLB_FLUSH_ALL ||
+-          (end - start) > tlb_single_page_flush_ceiling * PAGE_SIZE) {
++          (end - start) > tlb_single_page_flush_ceiling << PAGE_SHIFT) {
+               on_each_cpu(do_flush_tlb_all, NULL, 1);
+       } else {
+               struct flush_tlb_info info;
diff --git a/queue-4.9/x86-mm-change-the-leave_mm-condition-for-local-tlb-flushes.patch b/queue-4.9/x86-mm-change-the-leave_mm-condition-for-local-tlb-flushes.patch

new file mode 100644 (file)

index 0000000..dbdfdf6
--- /dev/null
+++ b/queue-4.9/x86-mm-change-the-leave_mm-condition-for-local-tlb-flushes.patch
@@ -0,0 +1,53 @@
+From 59f537c1dea04287165bb11407921e095250dc80 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Sun, 28 May 2017 10:00:11 -0700
+Subject: x86/mm: Change the leave_mm() condition for local TLB flushes
+
+From: Andy Lutomirski <luto@kernel.org>
+
+commit 59f537c1dea04287165bb11407921e095250dc80 upstream.
+
+On a remote TLB flush, we leave_mm() if we're TLBSTATE_LAZY.  For a
+local flush_tlb_mm_range(), we leave_mm() if !current->mm.  These
+are approximately the same condition -- the scheduler sets lazy TLB
+mode when switching to a thread with no mm.
+
+I'm about to merge the local and remote flush code, but for ease of
+verifying and bisecting the patch, I want the local and remote flush
+behavior to match first.  This patch changes the local code to match
+the remote code.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Acked-by: Rik van Riel <riel@redhat.com>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Arjan van de Ven <arjan@linux.intel.com>
+Cc: Borislav Petkov <bpetkov@suse.de>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Mel Gorman <mgorman@suse.de>
+Cc: Michal Hocko <mhocko@suse.com>
+Cc: Nadav Amit <nadav.amit@gmail.com>
+Cc: Nadav Amit <namit@vmware.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: linux-mm@kvack.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Eduardo Valentin <eduval@amazon.com>
+Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/mm/tlb.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/x86/mm/tlb.c
++++ b/arch/x86/mm/tlb.c
+@@ -328,7 +328,7 @@ void flush_tlb_mm_range(struct mm_struct
+               goto out;
+       }
+ 
+-      if (!current->mm) {
++      if (this_cpu_read(cpu_tlbstate.state) != TLBSTATE_OK) {
+               leave_mm(smp_processor_id());
+ 
+               /* Synchronize with switch_mm. */
diff --git a/queue-4.9/x86-mm-kvm-teach-kvm-s-vmx-code-that-cr3-isn-t-a-constant.patch b/queue-4.9/x86-mm-kvm-teach-kvm-s-vmx-code-that-cr3-isn-t-a-constant.patch

new file mode 100644 (file)

index 0000000..55191ad
--- /dev/null
+++ b/queue-4.9/x86-mm-kvm-teach-kvm-s-vmx-code-that-cr3-isn-t-a-constant.patch
@@ -0,0 +1,144 @@
+From d6e41f1151feeb118eee776c09323aceb4a415d9 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Sun, 28 May 2017 10:00:17 -0700
+Subject: x86/mm, KVM: Teach KVM's VMX code that CR3 isn't a constant
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Andy Lutomirski <luto@kernel.org>
+
+commit d6e41f1151feeb118eee776c09323aceb4a415d9 upstream.
+
+When PCID is enabled, CR3's PCID bits can change during context
+switches, so KVM won't be able to treat CR3 as a per-mm constant any
+more.
+
+I structured this like the existing CR4 handling.  Under ordinary
+circumstances (PCID disabled or if the current PCID and the value
+that's already in the VMCS match), then we won't do an extra VMCS
+write, and we'll never do an extra direct CR3 read.  The overhead
+should be minimal.
+
+I disallowed using the new helper in non-atomic context because
+PCID support will cause CR3 to stop being constant in non-atomic
+process context.
+
+(Frankly, it also scares me a bit that KVM ever treated CR3 as
+constant, but it looks like it was okay before.)
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Arjan van de Ven <arjan@linux.intel.com>
+Cc: Borislav Petkov <bpetkov@suse.de>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Mel Gorman <mgorman@suse.de>
+Cc: Michal Hocko <mhocko@suse.com>
+Cc: Nadav Amit <nadav.amit@gmail.com>
+Cc: Nadav Amit <namit@vmware.com>
+Cc: Paolo Bonzini <pbonzini@redhat.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Radim Krčmář <rkrcmar@redhat.com>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: kvm@vger.kernel.org
+Cc: linux-mm@kvack.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Eduardo Valentin <eduval@amazon.com>
+Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/mmu_context.h |   19 +++++++++++++++++++
+ arch/x86/kvm/vmx.c                 |   25 +++++++++++++++++++++----
+ 2 files changed, 40 insertions(+), 4 deletions(-)
+
+--- a/arch/x86/include/asm/mmu_context.h
++++ b/arch/x86/include/asm/mmu_context.h
+@@ -268,4 +268,23 @@ static inline bool arch_pte_access_permi
+ {
+       return __pkru_allows_pkey(pte_flags_pkey(pte_flags(pte)), write);
+ }
++
++/*
++ * This can be used from process context to figure out what the value of
++ * CR3 is without needing to do a (slow) read_cr3().
++ *
++ * It's intended to be used for code like KVM that sneakily changes CR3
++ * and needs to restore it.  It needs to be used very carefully.
++ */
++static inline unsigned long __get_current_cr3_fast(void)
++{
++      unsigned long cr3 = __pa(this_cpu_read(cpu_tlbstate.loaded_mm)->pgd);
++
++      /* For now, be very restrictive about when this can be called. */
++      VM_WARN_ON(in_nmi() || !in_atomic());
++
++      VM_BUG_ON(cr3 != read_cr3());
++      return cr3;
++}
++
+ #endif /* _ASM_X86_MMU_CONTEXT_H */
+--- a/arch/x86/kvm/vmx.c
++++ b/arch/x86/kvm/vmx.c
+@@ -48,6 +48,7 @@
+ #include <asm/kexec.h>
+ #include <asm/apic.h>
+ #include <asm/irq_remapping.h>
++#include <asm/mmu_context.h>
+ 
+ #include "trace.h"
+ #include "pmu.h"
+@@ -572,6 +573,7 @@ struct vcpu_vmx {
+               int           gs_ldt_reload_needed;
+               int           fs_reload_needed;
+               u64           msr_host_bndcfgs;
++              unsigned long vmcs_host_cr3;    /* May not match real cr3 */
+               unsigned long vmcs_host_cr4;    /* May not match real cr4 */
+       } host_state;
+       struct {
+@@ -4857,10 +4859,19 @@ static void vmx_set_constant_host_state(
+       u32 low32, high32;
+       unsigned long tmpl;
+       struct desc_ptr dt;
+-      unsigned long cr4;
++      unsigned long cr0, cr3, cr4;
+ 
+-      vmcs_writel(HOST_CR0, read_cr0() & ~X86_CR0_TS);  /* 22.2.3 */
+-      vmcs_writel(HOST_CR3, read_cr3());  /* 22.2.3  FIXME: shadow tables */
++      cr0 = read_cr0();
++      WARN_ON(cr0 & X86_CR0_TS);
++      vmcs_writel(HOST_CR0, cr0);  /* 22.2.3 */
++
++      /*
++       * Save the most likely value for this task's CR3 in the VMCS.
++       * We can't use __get_current_cr3_fast() because we're not atomic.
++       */
++      cr3 = read_cr3();
++      vmcs_writel(HOST_CR3, cr3);             /* 22.2.3  FIXME: shadow tables */
++      vmx->host_state.vmcs_host_cr3 = cr3;
+ 
+       /* Save the most likely value for this task's CR4 in the VMCS. */
+       cr4 = cr4_read_shadow();
+@@ -8836,7 +8847,7 @@ void vmx_arm_hv_timer(struct kvm_vcpu *v
+ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
+ {
+       struct vcpu_vmx *vmx = to_vmx(vcpu);
+-      unsigned long debugctlmsr, cr4;
++      unsigned long debugctlmsr, cr3, cr4;
+ 
+       /* Record the guest's net vcpu time for enforced NMI injections. */
+       if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked))
+@@ -8862,6 +8873,12 @@ static void __noclone vmx_vcpu_run(struc
+       if (test_bit(VCPU_REGS_RIP, (unsigned long *)&vcpu->arch.regs_dirty))
+               vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]);
+ 
++      cr3 = __get_current_cr3_fast();
++      if (unlikely(cr3 != vmx->host_state.vmcs_host_cr3)) {
++              vmcs_writel(HOST_CR3, cr3);
++              vmx->host_state.vmcs_host_cr3 = cr3;
++      }
++
+       cr4 = cr4_read_shadow();
+       if (unlikely(cr4 != vmx->host_state.vmcs_host_cr4)) {
+               vmcs_writel(HOST_CR4, cr4);
diff --git a/queue-4.9/x86-mm-pass-flush_tlb_info-to-flush_tlb_others-etc.patch b/queue-4.9/x86-mm-pass-flush_tlb_info-to-flush_tlb_others-etc.patch

new file mode 100644 (file)

index 0000000..ca4bea7
--- /dev/null
+++ b/queue-4.9/x86-mm-pass-flush_tlb_info-to-flush_tlb_others-etc.patch
@@ -0,0 +1,422 @@
+From a2055abe9c6789cedef29abbdaa488a087faccc3 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Sun, 28 May 2017 10:00:10 -0700
+Subject: x86/mm: Pass flush_tlb_info to flush_tlb_others() etc
+
+From: Andy Lutomirski <luto@kernel.org>
+
+commit a2055abe9c6789cedef29abbdaa488a087faccc3 upstream.
+
+Rather than passing all the contents of flush_tlb_info to
+flush_tlb_others(), pass a pointer to the structure directly. For
+consistency, this also removes the unnecessary cpu parameter from
+uv_flush_tlb_others() to make its signature match the other
+*flush_tlb_others() functions.
+
+This serves two purposes:
+
+ - It will dramatically simplify future patches that change struct
+   flush_tlb_info, which I'm planning to do.
+
+ - struct flush_tlb_info is an adequate description of what to do
+   for a local flush, too, so by reusing it we can remove duplicated
+   code between local and remove flushes in a future patch.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Acked-by: Rik van Riel <riel@redhat.com>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Borislav Petkov <bpetkov@suse.de>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Mel Gorman <mgorman@suse.de>
+Cc: Michal Hocko <mhocko@suse.com>
+Cc: Nadav Amit <nadav.amit@gmail.com>
+Cc: Nadav Amit <namit@vmware.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: linux-mm@kvack.org
+[ Fix build warning. ]
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Eduardo Valentin <eduval@amazon.com>
+Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/paravirt.h       |    6 --
+ arch/x86/include/asm/paravirt_types.h |    5 --
+ arch/x86/include/asm/tlbflush.h       |   19 +++++---
+ arch/x86/include/asm/uv/uv.h          |   11 ++---
+ arch/x86/mm/tlb.c                     |   72 ++++++++++++++++++----------------
+ arch/x86/platform/uv/tlb_uv.c         |   10 +---
+ arch/x86/xen/mmu.c                    |   10 ++--
+ 7 files changed, 68 insertions(+), 65 deletions(-)
+
+--- a/arch/x86/include/asm/paravirt.h
++++ b/arch/x86/include/asm/paravirt.h
+@@ -317,11 +317,9 @@ static inline void __flush_tlb_single(un
+ }
+ 
+ static inline void flush_tlb_others(const struct cpumask *cpumask,
+-                                  struct mm_struct *mm,
+-                                  unsigned long start,
+-                                  unsigned long end)
++                                  const struct flush_tlb_info *info)
+ {
+-      PVOP_VCALL4(pv_mmu_ops.flush_tlb_others, cpumask, mm, start, end);
++      PVOP_VCALL2(pv_mmu_ops.flush_tlb_others, cpumask, info);
+ }
+ 
+ static inline int paravirt_pgd_alloc(struct mm_struct *mm)
+--- a/arch/x86/include/asm/paravirt_types.h
++++ b/arch/x86/include/asm/paravirt_types.h
+@@ -51,6 +51,7 @@ struct mm_struct;
+ struct desc_struct;
+ struct task_struct;
+ struct cpumask;
++struct flush_tlb_info;
+ 
+ /*
+  * Wrapper type for pointers to code which uses the non-standard
+@@ -225,9 +226,7 @@ struct pv_mmu_ops {
+       void (*flush_tlb_kernel)(void);
+       void (*flush_tlb_single)(unsigned long addr);
+       void (*flush_tlb_others)(const struct cpumask *cpus,
+-                               struct mm_struct *mm,
+-                               unsigned long start,
+-                               unsigned long end);
++                               const struct flush_tlb_info *info);
+ 
+       /* Hooks for allocating and freeing a pagetable top-level */
+       int  (*pgd_alloc)(struct mm_struct *mm);
+--- a/arch/x86/include/asm/tlbflush.h
++++ b/arch/x86/include/asm/tlbflush.h
+@@ -211,12 +211,18 @@ static inline void __flush_tlb_one(unsig
+  *  - flush_tlb_page(vma, vmaddr) flushes one page
+  *  - flush_tlb_range(vma, start, end) flushes a range of pages
+  *  - flush_tlb_kernel_range(start, end) flushes a range of kernel pages
+- *  - flush_tlb_others(cpumask, mm, start, end) flushes TLBs on other cpus
++ *  - flush_tlb_others(cpumask, info) flushes TLBs on other cpus
+  *
+  * ..but the i386 has somewhat limited tlb flushing capabilities,
+  * and page-granular flushes are available only on i486 and up.
+  */
+ 
++struct flush_tlb_info {
++      struct mm_struct *mm;
++      unsigned long start;
++      unsigned long end;
++};
++
+ #ifndef CONFIG_SMP
+ 
+ /* "_up" is for UniProcessor.
+@@ -275,9 +281,7 @@ static inline void flush_tlb_mm_range(st
+ }
+ 
+ static inline void native_flush_tlb_others(const struct cpumask *cpumask,
+-                                         struct mm_struct *mm,
+-                                         unsigned long start,
+-                                         unsigned long end)
++                                         const struct flush_tlb_info *info)
+ {
+ }
+ 
+@@ -315,8 +319,7 @@ static inline void flush_tlb_page(struct
+ }
+ 
+ void native_flush_tlb_others(const struct cpumask *cpumask,
+-                              struct mm_struct *mm,
+-                              unsigned long start, unsigned long end);
++                           const struct flush_tlb_info *info);
+ 
+ #define TLBSTATE_OK   1
+ #define TLBSTATE_LAZY 2
+@@ -338,8 +341,8 @@ extern void arch_tlbbatch_flush(struct a
+ #endif        /* SMP */
+ 
+ #ifndef CONFIG_PARAVIRT
+-#define flush_tlb_others(mask, mm, start, end)        \
+-      native_flush_tlb_others(mask, mm, start, end)
++#define flush_tlb_others(mask, info)  \
++      native_flush_tlb_others(mask, info)
+ #endif
+ 
+ #endif /* _ASM_X86_TLBFLUSH_H */
+--- a/arch/x86/include/asm/uv/uv.h
++++ b/arch/x86/include/asm/uv/uv.h
+@@ -1,6 +1,8 @@
+ #ifndef _ASM_X86_UV_UV_H
+ #define _ASM_X86_UV_UV_H
+ 
++#include <asm/tlbflush.h>
++
+ enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC, UV_NON_UNIQUE_APIC};
+ 
+ struct cpumask;
+@@ -14,10 +16,7 @@ extern void uv_cpu_init(void);
+ extern void uv_nmi_init(void);
+ extern void uv_system_init(void);
+ extern const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
+-                                               struct mm_struct *mm,
+-                                               unsigned long start,
+-                                               unsigned long end,
+-                                               unsigned int cpu);
++                                               const struct flush_tlb_info *info);
+ 
+ #else /* X86_UV */
+ 
+@@ -26,8 +25,8 @@ static inline int is_uv_system(void) { r
+ static inline void uv_cpu_init(void)  { }
+ static inline void uv_system_init(void)       { }
+ static inline const struct cpumask *
+-uv_flush_tlb_others(const struct cpumask *cpumask, struct mm_struct *mm,
+-                  unsigned long start, unsigned long end, unsigned int cpu)
++uv_flush_tlb_others(const struct cpumask *cpumask,
++                  const struct flush_tlb_info *info)
+ { return cpumask; }
+ 
+ #endif        /* X86_UV */
+--- a/arch/x86/mm/tlb.c
++++ b/arch/x86/mm/tlb.c
+@@ -30,12 +30,6 @@
+ 
+ #ifdef CONFIG_SMP
+ 
+-struct flush_tlb_info {
+-      struct mm_struct *flush_mm;
+-      unsigned long flush_start;
+-      unsigned long flush_end;
+-};
+-
+ /*
+  * We cannot call mmdrop() because we are in interrupt context,
+  * instead update mm->cpu_vm_mask.
+@@ -229,11 +223,11 @@ void switch_mm_irqs_off(struct mm_struct
+  */
+ static void flush_tlb_func(void *info)
+ {
+-      struct flush_tlb_info *f = info;
++      const struct flush_tlb_info *f = info;
+ 
+       inc_irq_stat(irq_tlb_count);
+ 
+-      if (f->flush_mm && f->flush_mm != this_cpu_read(cpu_tlbstate.active_mm))
++      if (f->mm && f->mm != this_cpu_read(cpu_tlbstate.active_mm))
+               return;
+ 
+       count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
+@@ -243,15 +237,15 @@ static void flush_tlb_func(void *info)
+               return;
+       }
+ 
+-      if (f->flush_end == TLB_FLUSH_ALL) {
++      if (f->end == TLB_FLUSH_ALL) {
+               local_flush_tlb();
+               trace_tlb_flush(TLB_REMOTE_SHOOTDOWN, TLB_FLUSH_ALL);
+       } else {
+               unsigned long addr;
+               unsigned long nr_pages =
+-                      (f->flush_end - f->flush_start) / PAGE_SIZE;
+-              addr = f->flush_start;
+-              while (addr < f->flush_end) {
++                      (f->end - f->start) / PAGE_SIZE;
++              addr = f->start;
++              while (addr < f->end) {
+                       __flush_tlb_single(addr);
+                       addr += PAGE_SIZE;
+               }
+@@ -260,38 +254,38 @@ static void flush_tlb_func(void *info)
+ }
+ 
+ void native_flush_tlb_others(const struct cpumask *cpumask,
+-                               struct mm_struct *mm, unsigned long start,
+-                               unsigned long end)
++                           const struct flush_tlb_info *info)
+ {
+-      struct flush_tlb_info info;
+-
+-      info.flush_mm = mm;
+-      info.flush_start = start;
+-      info.flush_end = end;
+-
+       count_vm_tlb_event(NR_TLB_REMOTE_FLUSH);
+-      if (end == TLB_FLUSH_ALL)
++      if (info->end == TLB_FLUSH_ALL)
+               trace_tlb_flush(TLB_REMOTE_SEND_IPI, TLB_FLUSH_ALL);
+       else
+               trace_tlb_flush(TLB_REMOTE_SEND_IPI,
+-                              (end - start) >> PAGE_SHIFT);
++                              (info->end - info->start) >> PAGE_SHIFT);
+ 
+       if (is_uv_system()) {
+               unsigned int cpu;
+ 
+               cpu = smp_processor_id();
+-              cpumask = uv_flush_tlb_others(cpumask, mm, start, end, cpu);
++              cpumask = uv_flush_tlb_others(cpumask, info);
+               if (cpumask)
+                       smp_call_function_many(cpumask, flush_tlb_func,
+-                                                              &info, 1);
++                                             (void *)info, 1);
+               return;
+       }
+-      smp_call_function_many(cpumask, flush_tlb_func, &info, 1);
++      smp_call_function_many(cpumask, flush_tlb_func,
++                             (void *)info, 1);
+ }
+ 
+ void flush_tlb_current_task(void)
+ {
+       struct mm_struct *mm = current->mm;
++      struct flush_tlb_info info = {
++              .mm = mm,
++              .start = 0UL,
++              .end = TLB_FLUSH_ALL,
++      };
++
+ 
+       preempt_disable();
+ 
+@@ -302,7 +296,7 @@ void flush_tlb_current_task(void)
+ 
+       trace_tlb_flush(TLB_LOCAL_SHOOTDOWN, TLB_FLUSH_ALL);
+       if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids)
+-              flush_tlb_others(mm_cpumask(mm), mm, 0UL, TLB_FLUSH_ALL);
++              flush_tlb_others(mm_cpumask(mm), &info);
+       preempt_enable();
+ }
+ 
+@@ -322,6 +316,7 @@ void flush_tlb_mm_range(struct mm_struct
+                               unsigned long end, unsigned long vmflag)
+ {
+       unsigned long addr;
++      struct flush_tlb_info info;
+       /* do a global flush by default */
+       unsigned long base_pages_to_flush = TLB_FLUSH_ALL;
+ 
+@@ -362,15 +357,20 @@ void flush_tlb_mm_range(struct mm_struct
+       }
+       trace_tlb_flush(TLB_LOCAL_MM_SHOOTDOWN, base_pages_to_flush);
+ out:
++      info.mm = mm;
+       if (base_pages_to_flush == TLB_FLUSH_ALL) {
+-              start = 0UL;
+-              end = TLB_FLUSH_ALL;
++              info.start = 0UL;
++              info.end = TLB_FLUSH_ALL;
++      } else {
++              info.start = start;
++              info.end = end;
+       }
+       if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids)
+-              flush_tlb_others(mm_cpumask(mm), mm, start, end);
++              flush_tlb_others(mm_cpumask(mm), &info);
+       preempt_enable();
+ }
+ 
++
+ static void do_flush_tlb_all(void *info)
+ {
+       count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
+@@ -391,7 +391,7 @@ static void do_kernel_range_flush(void *
+       unsigned long addr;
+ 
+       /* flush range by one by one 'invlpg' */
+-      for (addr = f->flush_start; addr < f->flush_end; addr += PAGE_SIZE)
++      for (addr = f->start; addr < f->end; addr += PAGE_SIZE)
+               __flush_tlb_single(addr);
+ }
+ 
+@@ -404,14 +404,20 @@ void flush_tlb_kernel_range(unsigned lon
+               on_each_cpu(do_flush_tlb_all, NULL, 1);
+       } else {
+               struct flush_tlb_info info;
+-              info.flush_start = start;
+-              info.flush_end = end;
++              info.start = start;
++              info.end = end;
+               on_each_cpu(do_kernel_range_flush, &info, 1);
+       }
+ }
+ 
+ void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch)
+ {
++      struct flush_tlb_info info = {
++              .mm = NULL,
++              .start = 0UL,
++              .end = TLB_FLUSH_ALL,
++      };
++
+       int cpu = get_cpu();
+ 
+       if (cpumask_test_cpu(cpu, &batch->cpumask)) {
+@@ -421,7 +427,7 @@ void arch_tlbbatch_flush(struct arch_tlb
+       }
+ 
+       if (cpumask_any_but(&batch->cpumask, cpu) < nr_cpu_ids)
+-              flush_tlb_others(&batch->cpumask, NULL, 0, TLB_FLUSH_ALL);
++              flush_tlb_others(&batch->cpumask, &info);
+       cpumask_clear(&batch->cpumask);
+ 
+       put_cpu();
+--- a/arch/x86/platform/uv/tlb_uv.c
++++ b/arch/x86/platform/uv/tlb_uv.c
+@@ -1110,11 +1110,9 @@ static int set_distrib_bits(struct cpuma
+  * done.  The returned pointer is valid till preemption is re-enabled.
+  */
+ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
+-                                              struct mm_struct *mm,
+-                                              unsigned long start,
+-                                              unsigned long end,
+-                                              unsigned int cpu)
++                                        const struct flush_tlb_info *info)
+ {
++      unsigned int cpu = smp_processor_id();
+       int locals = 0;
+       int remotes = 0;
+       int hubs = 0;
+@@ -1171,8 +1169,8 @@ const struct cpumask *uv_flush_tlb_other
+ 
+       record_send_statistics(stat, locals, hubs, remotes, bau_desc);
+ 
+-      if (!end || (end - start) <= PAGE_SIZE)
+-              bau_desc->payload.address = start;
++      if (!info->end || (info->end - info->start) <= PAGE_SIZE)
++              bau_desc->payload.address = info->start;
+       else
+               bau_desc->payload.address = TLB_FLUSH_ALL;
+       bau_desc->payload.sending_cpu = cpu;
+--- a/arch/x86/xen/mmu.c
++++ b/arch/x86/xen/mmu.c
+@@ -1372,8 +1372,7 @@ static void xen_flush_tlb_single(unsigne
+ }
+ 
+ static void xen_flush_tlb_others(const struct cpumask *cpus,
+-                               struct mm_struct *mm, unsigned long start,
+-                               unsigned long end)
++                               const struct flush_tlb_info *info)
+ {
+       struct {
+               struct mmuext_op op;
+@@ -1385,7 +1384,7 @@ static void xen_flush_tlb_others(const s
+       } *args;
+       struct multicall_space mcs;
+ 
+-      trace_xen_mmu_flush_tlb_others(cpus, mm, start, end);
++      trace_xen_mmu_flush_tlb_others(cpus, info->mm, info->start, info->end);
+ 
+       if (cpumask_empty(cpus))
+               return;         /* nothing to do */
+@@ -1399,9 +1398,10 @@ static void xen_flush_tlb_others(const s
+       cpumask_clear_cpu(smp_processor_id(), to_cpumask(args->mask));
+ 
+       args->op.cmd = MMUEXT_TLB_FLUSH_MULTI;
+-      if (end != TLB_FLUSH_ALL && (end - start) <= PAGE_SIZE) {
++      if (info->end != TLB_FLUSH_ALL &&
++          (info->end - info->start) <= PAGE_SIZE) {
+               args->op.cmd = MMUEXT_INVLPG_MULTI;
+-              args->op.arg1.linear_addr = start;
++              args->op.arg1.linear_addr = info->start;
+       }
+ 
+       MULTI_mmuext_op(mcs.mc, &args->op, 1, NULL, DOMID_SELF);
diff --git a/queue-4.9/x86-mm-reduce-indentation-in-flush_tlb_func.patch b/queue-4.9/x86-mm-reduce-indentation-in-flush_tlb_func.patch

new file mode 100644 (file)

index 0000000..737f6dc
--- /dev/null
+++ b/queue-4.9/x86-mm-reduce-indentation-in-flush_tlb_func.patch
@@ -0,0 +1,81 @@
+From b3b90e5af7976e46541f5029a369c9c38c5e4cea Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Mon, 22 May 2017 15:30:02 -0700
+Subject: x86/mm: Reduce indentation in flush_tlb_func()
+
+From: Andy Lutomirski <luto@kernel.org>
+
+commit b3b90e5af7976e46541f5029a369c9c38c5e4cea upstream.
+
+The leave_mm() case can just exit the function early so we don't
+need to indent the entire remainder of the function.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Acked-by: Kees Cook <keescook@chromium.org>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Borislav Petkov <bpetkov@suse.de>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Mel Gorman <mgorman@suse.de>
+Cc: Michal Hocko <mhocko@suse.com>
+Cc: Nadav Amit <nadav.amit@gmail.com>
+Cc: Nadav Amit <namit@vmware.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: linux-mm@kvack.org
+Link: http://lkml.kernel.org/r/97901ddcc9821d7bc7b296d2918d1179f08aaf22.1495492063.git.luto@kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Eduardo Valentin <eduval@amazon.com>
+Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/mm/tlb.c |   34 ++++++++++++++++++----------------
+ 1 file changed, 18 insertions(+), 16 deletions(-)
+
+--- a/arch/x86/mm/tlb.c
++++ b/arch/x86/mm/tlb.c
+@@ -237,24 +237,26 @@ static void flush_tlb_func(void *info)
+               return;
+ 
+       count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
+-      if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK) {
+-              if (f->flush_end == TLB_FLUSH_ALL) {
+-                      local_flush_tlb();
+-                      trace_tlb_flush(TLB_REMOTE_SHOOTDOWN, TLB_FLUSH_ALL);
+-              } else {
+-                      unsigned long addr;
+-                      unsigned long nr_pages =
+-                              (f->flush_end - f->flush_start) / PAGE_SIZE;
+-                      addr = f->flush_start;
+-                      while (addr < f->flush_end) {
+-                              __flush_tlb_single(addr);
+-                              addr += PAGE_SIZE;
+-                      }
+-                      trace_tlb_flush(TLB_REMOTE_SHOOTDOWN, nr_pages);
+-              }
+-      } else
++
++      if (this_cpu_read(cpu_tlbstate.state) != TLBSTATE_OK) {
+               leave_mm(smp_processor_id());
++              return;
++      }
+ 
++      if (f->flush_end == TLB_FLUSH_ALL) {
++              local_flush_tlb();
++              trace_tlb_flush(TLB_REMOTE_SHOOTDOWN, TLB_FLUSH_ALL);
++      } else {
++              unsigned long addr;
++              unsigned long nr_pages =
++                      (f->flush_end - f->flush_start) / PAGE_SIZE;
++              addr = f->flush_start;
++              while (addr < f->flush_end) {
++                      __flush_tlb_single(addr);
++                      addr += PAGE_SIZE;
++              }
++              trace_tlb_flush(TLB_REMOTE_SHOOTDOWN, nr_pages);
++      }
+ }
+ 
+ void native_flush_tlb_others(const struct cpumask *cpumask,
diff --git a/queue-4.9/x86-mm-refactor-flush_tlb_mm_range-to-merge-local-and-remote-cases.patch b/queue-4.9/x86-mm-refactor-flush_tlb_mm_range-to-merge-local-and-remote-cases.patch

new file mode 100644 (file)

index 0000000..da217d4
--- /dev/null
+++ b/queue-4.9/x86-mm-refactor-flush_tlb_mm_range-to-merge-local-and-remote-cases.patch
@@ -0,0 +1,224 @@
+From 454bbad9793f59f5656ce5971ee473a8be736ef5 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Sun, 28 May 2017 10:00:12 -0700
+Subject: x86/mm: Refactor flush_tlb_mm_range() to merge local and remote cases
+
+From: Andy Lutomirski <luto@kernel.org>
+
+commit 454bbad9793f59f5656ce5971ee473a8be736ef5 upstream.
+
+The local flush path is very similar to the remote flush path.
+Merge them.
+
+This is intended to make no difference to behavior whatsoever.  It
+removes some code and will make future changes to the flushing
+mechanics simpler.
+
+This patch does remove one small optimization: flush_tlb_mm_range()
+now has an unconditional smp_mb() instead of using MOV to CR3 or
+INVLPG as a full barrier when applicable.  I think this is okay for
+a few reasons.  First, smp_mb() is quite cheap compared to the cost
+of a TLB flush.  Second, this rearrangement makes a bigger
+optimization available: with some work on the SMP function call
+code, we could do the local and remote flushes in parallel.  Third,
+I'm planning a rework of the TLB flush algorithm that will require
+an atomic operation at the beginning of each flush, and that
+operation will replace the smp_mb().
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Arjan van de Ven <arjan@linux.intel.com>
+Cc: Borislav Petkov <bpetkov@suse.de>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Mel Gorman <mgorman@suse.de>
+Cc: Michal Hocko <mhocko@suse.com>
+Cc: Nadav Amit <nadav.amit@gmail.com>
+Cc: Nadav Amit <namit@vmware.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: linux-mm@kvack.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Eduardo Valentin <eduval@amazon.com>
+Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/tlbflush.h |    1 
+ arch/x86/mm/tlb.c               |  111 +++++++++++++++++-----------------------
+ 2 files changed, 48 insertions(+), 64 deletions(-)
+
+--- a/arch/x86/include/asm/tlbflush.h
++++ b/arch/x86/include/asm/tlbflush.h
+@@ -216,7 +216,6 @@ static inline void __flush_tlb_one(unsig
+  * ..but the i386 has somewhat limited tlb flushing capabilities,
+  * and page-granular flushes are available only on i486 and up.
+  */
+-
+ struct flush_tlb_info {
+       struct mm_struct *mm;
+       unsigned long start;
+--- a/arch/x86/mm/tlb.c
++++ b/arch/x86/mm/tlb.c
+@@ -216,22 +216,9 @@ void switch_mm_irqs_off(struct mm_struct
+  * write/read ordering problems.
+  */
+ 
+-/*
+- * TLB flush funcation:
+- * 1) Flush the tlb entries if the cpu uses the mm that's being flushed.
+- * 2) Leave the mm if we are in the lazy tlb mode.
+- */
+-static void flush_tlb_func(void *info)
++static void flush_tlb_func_common(const struct flush_tlb_info *f,
++                                bool local, enum tlb_flush_reason reason)
+ {
+-      const struct flush_tlb_info *f = info;
+-
+-      inc_irq_stat(irq_tlb_count);
+-
+-      if (f->mm && f->mm != this_cpu_read(cpu_tlbstate.active_mm))
+-              return;
+-
+-      count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
+-
+       if (this_cpu_read(cpu_tlbstate.state) != TLBSTATE_OK) {
+               leave_mm(smp_processor_id());
+               return;
+@@ -239,7 +226,9 @@ static void flush_tlb_func(void *info)
+ 
+       if (f->end == TLB_FLUSH_ALL) {
+               local_flush_tlb();
+-              trace_tlb_flush(TLB_REMOTE_SHOOTDOWN, TLB_FLUSH_ALL);
++              if (local)
++                      count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
++              trace_tlb_flush(reason, TLB_FLUSH_ALL);
+       } else {
+               unsigned long addr;
+               unsigned long nr_pages =
+@@ -249,10 +238,32 @@ static void flush_tlb_func(void *info)
+                       __flush_tlb_single(addr);
+                       addr += PAGE_SIZE;
+               }
+-              trace_tlb_flush(TLB_REMOTE_SHOOTDOWN, nr_pages);
++              if (local)
++                      count_vm_tlb_events(NR_TLB_LOCAL_FLUSH_ONE, nr_pages);
++              trace_tlb_flush(reason, nr_pages);
+       }
+ }
+ 
++static void flush_tlb_func_local(void *info, enum tlb_flush_reason reason)
++{
++      const struct flush_tlb_info *f = info;
++
++      flush_tlb_func_common(f, true, reason);
++}
++
++static void flush_tlb_func_remote(void *info)
++{
++      const struct flush_tlb_info *f = info;
++
++      inc_irq_stat(irq_tlb_count);
++
++      if (f->mm && f->mm != this_cpu_read(cpu_tlbstate.active_mm))
++              return;
++
++      count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
++      flush_tlb_func_common(f, false, TLB_REMOTE_SHOOTDOWN);
++}
++
+ void native_flush_tlb_others(const struct cpumask *cpumask,
+                            const struct flush_tlb_info *info)
+ {
+@@ -269,11 +280,11 @@ void native_flush_tlb_others(const struc
+               cpu = smp_processor_id();
+               cpumask = uv_flush_tlb_others(cpumask, info);
+               if (cpumask)
+-                      smp_call_function_many(cpumask, flush_tlb_func,
++                      smp_call_function_many(cpumask, flush_tlb_func_remote,
+                                              (void *)info, 1);
+               return;
+       }
+-      smp_call_function_many(cpumask, flush_tlb_func,
++      smp_call_function_many(cpumask, flush_tlb_func_remote,
+                              (void *)info, 1);
+ }
+ 
+@@ -315,59 +326,33 @@ static unsigned long tlb_single_page_flu
+ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
+                               unsigned long end, unsigned long vmflag)
+ {
+-      unsigned long addr;
+-      struct flush_tlb_info info;
+-      /* do a global flush by default */
+-      unsigned long base_pages_to_flush = TLB_FLUSH_ALL;
+-
+-      preempt_disable();
+-      if (current->active_mm != mm) {
+-              /* Synchronize with switch_mm. */
+-              smp_mb();
++      int cpu;
+ 
+-              goto out;
+-      }
+-
+-      if (this_cpu_read(cpu_tlbstate.state) != TLBSTATE_OK) {
+-              leave_mm(smp_processor_id());
+-
+-              /* Synchronize with switch_mm. */
+-              smp_mb();
++      struct flush_tlb_info info = {
++              .mm = mm,
++      };
+ 
+-              goto out;
+-      }
++      cpu = get_cpu();
+ 
+-      if ((end != TLB_FLUSH_ALL) && !(vmflag & VM_HUGETLB))
+-              base_pages_to_flush = (end - start) >> PAGE_SHIFT;
++      /* Synchronize with switch_mm. */
++      smp_mb();
+ 
+-      /*
+-       * Both branches below are implicit full barriers (MOV to CR or
+-       * INVLPG) that synchronize with switch_mm.
+-       */
+-      if (base_pages_to_flush > tlb_single_page_flush_ceiling) {
+-              base_pages_to_flush = TLB_FLUSH_ALL;
+-              count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
+-              local_flush_tlb();
++      /* Should we flush just the requested range? */
++      if ((end != TLB_FLUSH_ALL) &&
++          !(vmflag & VM_HUGETLB) &&
++          ((end - start) >> PAGE_SHIFT) <= tlb_single_page_flush_ceiling) {
++              info.start = start;
++              info.end = end;
+       } else {
+-              /* flush range by one by one 'invlpg' */
+-              for (addr = start; addr < end;  addr += PAGE_SIZE) {
+-                      count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ONE);
+-                      __flush_tlb_single(addr);
+-              }
+-      }
+-      trace_tlb_flush(TLB_LOCAL_MM_SHOOTDOWN, base_pages_to_flush);
+-out:
+-      info.mm = mm;
+-      if (base_pages_to_flush == TLB_FLUSH_ALL) {
+               info.start = 0UL;
+               info.end = TLB_FLUSH_ALL;
+-      } else {
+-              info.start = start;
+-              info.end = end;
+       }
+-      if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids)
++
++      if (mm == current->active_mm)
++              flush_tlb_func_local(&info, TLB_LOCAL_MM_SHOOTDOWN);
++      if (cpumask_any_but(mm_cpumask(mm), cpu) < nr_cpu_ids)
+               flush_tlb_others(mm_cpumask(mm), &info);
+-      preempt_enable();
++      put_cpu();
+ }
+ 
+ 
diff --git a/queue-4.9/x86-mm-reimplement-flush_tlb_page-using-flush_tlb_mm_range.patch b/queue-4.9/x86-mm-reimplement-flush_tlb_page-using-flush_tlb_mm_range.patch

new file mode 100644 (file)

index 0000000..afd15b7
--- /dev/null
+++ b/queue-4.9/x86-mm-reimplement-flush_tlb_page-using-flush_tlb_mm_range.patch
@@ -0,0 +1,104 @@
+From ca6c99c0794875c6d1db6e22f246699691ab7e6b Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Mon, 22 May 2017 15:30:01 -0700
+Subject: x86/mm: Reimplement flush_tlb_page() using flush_tlb_mm_range()
+
+From: Andy Lutomirski <luto@kernel.org>
+
+commit ca6c99c0794875c6d1db6e22f246699691ab7e6b upstream.
+
+flush_tlb_page() was very similar to flush_tlb_mm_range() except that
+it had a couple of issues:
+
+ - It was missing an smp_mb() in the case where
+   current->active_mm != mm.  (This is a longstanding bug reported by Nadav Amit)
+
+ - It was missing tracepoints and vm counter updates.
+
+The only reason that I can see for keeping it at as a separate
+function is that it could avoid a few branches that
+flush_tlb_mm_range() needs to decide to flush just one page.  This
+hardly seems worthwhile.  If we decide we want to get rid of those
+branches again, a better way would be to introduce an
+__flush_tlb_mm_range() helper and make both flush_tlb_page() and
+flush_tlb_mm_range() use it.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Acked-by: Kees Cook <keescook@chromium.org>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Borislav Petkov <bpetkov@suse.de>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Mel Gorman <mgorman@suse.de>
+Cc: Michal Hocko <mhocko@suse.com>
+Cc: Nadav Amit <nadav.amit@gmail.com>
+Cc: Nadav Amit <namit@vmware.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: linux-mm@kvack.org
+Link: http://lkml.kernel.org/r/3cc3847cf888d8907577569b8bac3f01992ef8f9.1495492063.git.luto@kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Eduardo Valentin <eduval@amazon.com>
+Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/tlbflush.h |    5 ++++-
+ arch/x86/mm/tlb.c               |   27 ---------------------------
+ 2 files changed, 4 insertions(+), 28 deletions(-)
+
+--- a/arch/x86/include/asm/tlbflush.h
++++ b/arch/x86/include/asm/tlbflush.h
+@@ -304,12 +304,15 @@ static inline void flush_tlb_kernel_rang
+ 
+ extern void flush_tlb_all(void);
+ extern void flush_tlb_current_task(void);
+-extern void flush_tlb_page(struct vm_area_struct *, unsigned long);
+ extern void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
+                               unsigned long end, unsigned long vmflag);
+ extern void flush_tlb_kernel_range(unsigned long start, unsigned long end);
+ 
+ #define flush_tlb()   flush_tlb_current_task()
++static inline void flush_tlb_page(struct vm_area_struct *vma, unsigned long a)
++{
++      flush_tlb_mm_range(vma->vm_mm, a, a + PAGE_SIZE, VM_NONE);
++}
+ 
+ void native_flush_tlb_others(const struct cpumask *cpumask,
+                               struct mm_struct *mm,
+--- a/arch/x86/mm/tlb.c
++++ b/arch/x86/mm/tlb.c
+@@ -369,33 +369,6 @@ out:
+       preempt_enable();
+ }
+ 
+-void flush_tlb_page(struct vm_area_struct *vma, unsigned long start)
+-{
+-      struct mm_struct *mm = vma->vm_mm;
+-
+-      preempt_disable();
+-
+-      if (current->active_mm == mm) {
+-              if (current->mm) {
+-                      /*
+-                       * Implicit full barrier (INVLPG) that synchronizes
+-                       * with switch_mm.
+-                       */
+-                      __flush_tlb_one(start);
+-              } else {
+-                      leave_mm(smp_processor_id());
+-
+-                      /* Synchronize with switch_mm. */
+-                      smp_mb();
+-              }
+-      }
+-
+-      if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids)
+-              flush_tlb_others(mm_cpumask(mm), mm, start, start + PAGE_SIZE);
+-
+-      preempt_enable();
+-}
+-
+ static void do_flush_tlb_all(void *info)
+ {
+       count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
diff --git a/queue-4.9/x86-mm-remove-the-up-asm-tlbflush.h-code-always-use-the-formerly-smp-code.patch b/queue-4.9/x86-mm-remove-the-up-asm-tlbflush.h-code-always-use-the-formerly-smp-code.patch

new file mode 100644 (file)

index 0000000..13badda
--- /dev/null
+++ b/queue-4.9/x86-mm-remove-the-up-asm-tlbflush.h-code-always-use-the-formerly-smp-code.patch
@@ -0,0 +1,320 @@
+From ce4a4e565f5264909a18c733b864c3f74467f69e Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Sun, 28 May 2017 10:00:14 -0700
+Subject: x86/mm: Remove the UP asm/tlbflush.h code, always use the (formerly) SMP code
+
+From: Andy Lutomirski <luto@kernel.org>
+
+commit ce4a4e565f5264909a18c733b864c3f74467f69e upstream.
+
+The UP asm/tlbflush.h generates somewhat nicer code than the SMP version.
+Aside from that, it's fallen quite a bit behind the SMP code:
+
+ - flush_tlb_mm_range() didn't flush individual pages if the range
+   was small.
+
+ - The lazy TLB code was much weaker.  This usually wouldn't matter,
+   but, if a kernel thread flushed its lazy "active_mm" more than
+   once (due to reclaim or similar), it wouldn't be unlazied and
+   would instead pointlessly flush repeatedly.
+
+ - Tracepoints were missing.
+
+Aside from that, simply having the UP code around was a maintanence
+burden, since it means that any change to the TLB flush code had to
+make sure not to break it.
+
+Simplify everything by deleting the UP code.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Arjan van de Ven <arjan@linux.intel.com>
+Cc: Borislav Petkov <bpetkov@suse.de>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Mel Gorman <mgorman@suse.de>
+Cc: Michal Hocko <mhocko@suse.com>
+Cc: Nadav Amit <nadav.amit@gmail.com>
+Cc: Nadav Amit <namit@vmware.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: linux-mm@kvack.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Eduardo Valentin <eduval@amazon.com>
+Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/Kconfig                   |    2 
+ arch/x86/include/asm/hardirq.h     |    2 
+ arch/x86/include/asm/mmu.h         |    6 --
+ arch/x86/include/asm/mmu_context.h |    2 
+ arch/x86/include/asm/tlbbatch.h    |    2 
+ arch/x86/include/asm/tlbflush.h    |   81 -------------------------------------
+ arch/x86/mm/init.c                 |    2 
+ arch/x86/mm/tlb.c                  |   17 -------
+ 8 files changed, 5 insertions(+), 109 deletions(-)
+
+--- a/arch/x86/Kconfig
++++ b/arch/x86/Kconfig
+@@ -45,7 +45,7 @@ config X86
+       select ARCH_USE_CMPXCHG_LOCKREF         if X86_64
+       select ARCH_USE_QUEUED_RWLOCKS
+       select ARCH_USE_QUEUED_SPINLOCKS
+-      select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH if SMP
++      select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
+       select ARCH_WANTS_DYNAMIC_TASK_STRUCT
+       select ARCH_WANT_FRAME_POINTERS
+       select ARCH_WANT_IPC_PARSE_VERSION      if X86_32
+--- a/arch/x86/include/asm/hardirq.h
++++ b/arch/x86/include/asm/hardirq.h
+@@ -22,8 +22,8 @@ typedef struct {
+ #ifdef CONFIG_SMP
+       unsigned int irq_resched_count;
+       unsigned int irq_call_count;
+-      unsigned int irq_tlb_count;
+ #endif
++      unsigned int irq_tlb_count;
+ #ifdef CONFIG_X86_THERMAL_VECTOR
+       unsigned int irq_thermal_count;
+ #endif
+--- a/arch/x86/include/asm/mmu.h
++++ b/arch/x86/include/asm/mmu.h
+@@ -33,12 +33,6 @@ typedef struct {
+ #endif
+ } mm_context_t;
+ 
+-#ifdef CONFIG_SMP
+ void leave_mm(int cpu);
+-#else
+-static inline void leave_mm(int cpu)
+-{
+-}
+-#endif
+ 
+ #endif /* _ASM_X86_MMU_H */
+--- a/arch/x86/include/asm/mmu_context.h
++++ b/arch/x86/include/asm/mmu_context.h
+@@ -99,10 +99,8 @@ static inline void load_mm_ldt(struct mm
+ 
+ static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
+ {
+-#ifdef CONFIG_SMP
+       if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK)
+               this_cpu_write(cpu_tlbstate.state, TLBSTATE_LAZY);
+-#endif
+ }
+ 
+ static inline int init_new_context(struct task_struct *tsk,
+--- a/arch/x86/include/asm/tlbbatch.h
++++ b/arch/x86/include/asm/tlbbatch.h
+@@ -3,7 +3,6 @@
+ 
+ #include <linux/cpumask.h>
+ 
+-#ifdef CONFIG_SMP
+ struct arch_tlbflush_unmap_batch {
+       /*
+        * Each bit set is a CPU that potentially has a TLB entry for one of
+@@ -11,6 +10,5 @@ struct arch_tlbflush_unmap_batch {
+        */
+       struct cpumask cpumask;
+ };
+-#endif
+ 
+ #endif /* _ARCH_X86_TLBBATCH_H */
+--- a/arch/x86/include/asm/tlbflush.h
++++ b/arch/x86/include/asm/tlbflush.h
+@@ -7,6 +7,7 @@
+ #include <asm/processor.h>
+ #include <asm/cpufeature.h>
+ #include <asm/special_insns.h>
++#include <asm/smp.h>
+ 
+ static inline void __invpcid(unsigned long pcid, unsigned long addr,
+                            unsigned long type)
+@@ -65,10 +66,8 @@ static inline void invpcid_flush_all_non
+ #endif
+ 
+ struct tlb_state {
+-#ifdef CONFIG_SMP
+       struct mm_struct *active_mm;
+       int state;
+-#endif
+ 
+       /*
+        * Access to this CR4 shadow and to H/W CR4 is protected by
+@@ -222,82 +221,6 @@ struct flush_tlb_info {
+       unsigned long end;
+ };
+ 
+-#ifndef CONFIG_SMP
+-
+-/* "_up" is for UniProcessor.
+- *
+- * This is a helper for other header functions.  *Not* intended to be called
+- * directly.  All global TLB flushes need to either call this, or to bump the
+- * vm statistics themselves.
+- */
+-static inline void __flush_tlb_up(void)
+-{
+-      count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
+-      __flush_tlb();
+-}
+-
+-static inline void flush_tlb_all(void)
+-{
+-      count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
+-      __flush_tlb_all();
+-}
+-
+-static inline void flush_tlb(void)
+-{
+-      __flush_tlb_up();
+-}
+-
+-static inline void local_flush_tlb(void)
+-{
+-      __flush_tlb_up();
+-}
+-
+-static inline void flush_tlb_mm(struct mm_struct *mm)
+-{
+-      if (mm == current->active_mm)
+-              __flush_tlb_up();
+-}
+-
+-static inline void flush_tlb_page(struct vm_area_struct *vma,
+-                                unsigned long addr)
+-{
+-      if (vma->vm_mm == current->active_mm)
+-              __flush_tlb_one(addr);
+-}
+-
+-static inline void flush_tlb_range(struct vm_area_struct *vma,
+-                                 unsigned long start, unsigned long end)
+-{
+-      if (vma->vm_mm == current->active_mm)
+-              __flush_tlb_up();
+-}
+-
+-static inline void flush_tlb_mm_range(struct mm_struct *mm,
+-         unsigned long start, unsigned long end, unsigned long vmflag)
+-{
+-      if (mm == current->active_mm)
+-              __flush_tlb_up();
+-}
+-
+-static inline void native_flush_tlb_others(const struct cpumask *cpumask,
+-                                         const struct flush_tlb_info *info)
+-{
+-}
+-
+-static inline void reset_lazy_tlbstate(void)
+-{
+-}
+-
+-static inline void flush_tlb_kernel_range(unsigned long start,
+-                                        unsigned long end)
+-{
+-      flush_tlb_all();
+-}
+-
+-#else  /* SMP */
+-
+-#include <asm/smp.h>
+-
+ #define local_flush_tlb() __flush_tlb()
+ 
+ #define flush_tlb_mm(mm)      flush_tlb_mm_range(mm, 0UL, TLB_FLUSH_ALL, 0UL)
+@@ -337,8 +260,6 @@ static inline void arch_tlbbatch_add_mm(
+ 
+ extern void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch);
+ 
+-#endif        /* SMP */
+-
+ #ifndef CONFIG_PARAVIRT
+ #define flush_tlb_others(mask, info)  \
+       native_flush_tlb_others(mask, info)
+--- a/arch/x86/mm/init.c
++++ b/arch/x86/mm/init.c
+@@ -764,10 +764,8 @@ void __init zone_sizes_init(void)
+ }
+ 
+ DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) = {
+-#ifdef CONFIG_SMP
+       .active_mm = &init_mm,
+       .state = 0,
+-#endif
+       .cr4 = ~0UL,    /* fail hard if we screw up cr4 shadow initialization */
+ };
+ EXPORT_SYMBOL_GPL(cpu_tlbstate);
+--- a/arch/x86/mm/tlb.c
++++ b/arch/x86/mm/tlb.c
+@@ -15,7 +15,7 @@
+ #include <linux/debugfs.h>
+ 
+ /*
+- *    Smarter SMP flushing macros.
++ *    TLB flushing, formerly SMP-only
+  *            c/o Linus Torvalds.
+  *
+  *    These mean you can really definitely utterly forget about
+@@ -28,8 +28,6 @@
+  *    Implement flush IPI by CALL_FUNCTION_VECTOR, Alex Shi
+  */
+ 
+-#ifdef CONFIG_SMP
+-
+ /*
+  * We cannot call mmdrop() because we are in interrupt context,
+  * instead update mm->cpu_vm_mask.
+@@ -53,8 +51,6 @@ void leave_mm(int cpu)
+ }
+ EXPORT_SYMBOL_GPL(leave_mm);
+ 
+-#endif /* CONFIG_SMP */
+-
+ void switch_mm(struct mm_struct *prev, struct mm_struct *next,
+              struct task_struct *tsk)
+ {
+@@ -85,10 +81,8 @@ void switch_mm_irqs_off(struct mm_struct
+                               set_pgd(pgd, init_mm.pgd[stack_pgd_index]);
+               }
+ 
+-#ifdef CONFIG_SMP
+               this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK);
+               this_cpu_write(cpu_tlbstate.active_mm, next);
+-#endif
+ 
+               cpumask_set_cpu(cpu, mm_cpumask(next));
+ 
+@@ -146,9 +140,7 @@ void switch_mm_irqs_off(struct mm_struct
+               if (unlikely(prev->context.ldt != next->context.ldt))
+                       load_mm_ldt(next);
+ #endif
+-      }
+-#ifdef CONFIG_SMP
+-        else {
++      } else {
+               this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK);
+               BUG_ON(this_cpu_read(cpu_tlbstate.active_mm) != next);
+ 
+@@ -175,11 +167,8 @@ void switch_mm_irqs_off(struct mm_struct
+                       load_mm_ldt(next);
+               }
+       }
+-#endif
+ }
+ 
+-#ifdef CONFIG_SMP
+-
+ /*
+  * The flush IPI assumes that a thread switch happens in this order:
+  * [cpu0: the cpu that switches]
+@@ -459,5 +448,3 @@ static int __init create_tlb_single_page
+       return 0;
+ }
+ late_initcall(create_tlb_single_page_flush_ceiling);
+-
+-#endif /* CONFIG_SMP */
diff --git a/queue-4.9/x86-mm-rework-lazy-tlb-to-track-the-actual-loaded-mm.patch b/queue-4.9/x86-mm-rework-lazy-tlb-to-track-the-actual-loaded-mm.patch

new file mode 100644 (file)

index 0000000..7906046
--- /dev/null
+++ b/queue-4.9/x86-mm-rework-lazy-tlb-to-track-the-actual-loaded-mm.patch
@@ -0,0 +1,507 @@
+From 3d28ebceaffab40f30afa87e33331560148d7b8b Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Sun, 28 May 2017 10:00:15 -0700
+Subject: x86/mm: Rework lazy TLB to track the actual loaded mm
+
+From: Andy Lutomirski <luto@kernel.org>
+
+commit 3d28ebceaffab40f30afa87e33331560148d7b8b upstream.
+
+Lazy TLB state is currently managed in a rather baroque manner.
+AFAICT, there are three possible states:
+
+ - Non-lazy.  This means that we're running a user thread or a
+   kernel thread that has called use_mm().  current->mm ==
+   current->active_mm == cpu_tlbstate.active_mm and
+   cpu_tlbstate.state == TLBSTATE_OK.
+
+ - Lazy with user mm.  We're running a kernel thread without an mm
+   and we're borrowing an mm_struct.  We have current->mm == NULL,
+   current->active_mm == cpu_tlbstate.active_mm, cpu_tlbstate.state
+   != TLBSTATE_OK (i.e. TLBSTATE_LAZY or 0).  The current cpu is set
+   in mm_cpumask(current->active_mm).  CR3 points to
+   current->active_mm->pgd.  The TLB is up to date.
+
+ - Lazy with init_mm.  This happens when we call leave_mm().  We
+   have current->mm == NULL, current->active_mm ==
+   cpu_tlbstate.active_mm, but that mm is only relelvant insofar as
+   the scheduler is tracking it for refcounting.  cpu_tlbstate.state
+   != TLBSTATE_OK.  The current cpu is clear in
+   mm_cpumask(current->active_mm).  CR3 points to swapper_pg_dir,
+   i.e. init_mm->pgd.
+
+This patch simplifies the situation.  Other than perf, x86 stops
+caring about current->active_mm at all.  We have
+cpu_tlbstate.loaded_mm pointing to the mm that CR3 references.  The
+TLB is always up to date for that mm.  leave_mm() just switches us
+to init_mm.  There are no longer any special cases for mm_cpumask,
+and switch_mm() switches mms without worrying about laziness.
+
+After this patch, cpu_tlbstate.state serves only to tell the TLB
+flush code whether it may switch to init_mm instead of doing a
+normal flush.
+
+This makes fairly extensive changes to xen_exit_mmap(), which used
+to look a bit like black magic.
+
+Perf is unchanged.  With or without this change, perf may behave a bit
+erratically if it tries to read user memory in kernel thread context.
+We should build on this patch to teach perf to never look at user
+memory when cpu_tlbstate.loaded_mm != current->mm.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Arjan van de Ven <arjan@linux.intel.com>
+Cc: Borislav Petkov <bpetkov@suse.de>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Mel Gorman <mgorman@suse.de>
+Cc: Michal Hocko <mhocko@suse.com>
+Cc: Nadav Amit <nadav.amit@gmail.com>
+Cc: Nadav Amit <namit@vmware.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: linux-mm@kvack.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Eduardo Valentin <eduval@amazon.com>
+Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/events/core.c          |    3 
+ arch/x86/include/asm/tlbflush.h |   12 +-
+ arch/x86/kernel/ldt.c           |    7 -
+ arch/x86/mm/init.c              |    2 
+ arch/x86/mm/tlb.c               |  216 ++++++++++++++++++++--------------------
+ arch/x86/xen/mmu.c              |   51 ++++-----
+ 6 files changed, 147 insertions(+), 144 deletions(-)
+
+--- a/arch/x86/events/core.c
++++ b/arch/x86/events/core.c
+@@ -2100,8 +2100,7 @@ static int x86_pmu_event_init(struct per
+ 
+ static void refresh_pce(void *ignored)
+ {
+-      if (current->active_mm)
+-              load_mm_cr4(current->active_mm);
++      load_mm_cr4(this_cpu_read(cpu_tlbstate.loaded_mm));
+ }
+ 
+ static void x86_pmu_event_mapped(struct perf_event *event)
+--- a/arch/x86/include/asm/tlbflush.h
++++ b/arch/x86/include/asm/tlbflush.h
+@@ -66,7 +66,13 @@ static inline void invpcid_flush_all_non
+ #endif
+ 
+ struct tlb_state {
+-      struct mm_struct *active_mm;
++      /*
++       * cpu_tlbstate.loaded_mm should match CR3 whenever interrupts
++       * are on.  This means that it may not match current->active_mm,
++       * which will contain the previous user mm when we're in lazy TLB
++       * mode even if we've already switched back to swapper_pg_dir.
++       */
++      struct mm_struct *loaded_mm;
+       int state;
+ 
+       /*
+@@ -249,7 +255,9 @@ void native_flush_tlb_others(const struc
+ static inline void reset_lazy_tlbstate(void)
+ {
+       this_cpu_write(cpu_tlbstate.state, 0);
+-      this_cpu_write(cpu_tlbstate.active_mm, &init_mm);
++      this_cpu_write(cpu_tlbstate.loaded_mm, &init_mm);
++
++      WARN_ON(read_cr3() != __pa_symbol(swapper_pg_dir));
+ }
+ 
+ static inline void arch_tlbbatch_add_mm(struct arch_tlbflush_unmap_batch *batch,
+--- a/arch/x86/kernel/ldt.c
++++ b/arch/x86/kernel/ldt.c
+@@ -23,14 +23,15 @@
+ #include <asm/syscalls.h>
+ 
+ /* context.lock is held for us, so we don't need any locking. */
+-static void flush_ldt(void *current_mm)
++static void flush_ldt(void *__mm)
+ {
++      struct mm_struct *mm = __mm;
+       mm_context_t *pc;
+ 
+-      if (current->active_mm != current_mm)
++      if (this_cpu_read(cpu_tlbstate.loaded_mm) != mm)
+               return;
+ 
+-      pc = &current->active_mm->context;
++      pc = &mm->context;
+       set_ldt(pc->ldt->entries, pc->ldt->size);
+ }
+ 
+--- a/arch/x86/mm/init.c
++++ b/arch/x86/mm/init.c
+@@ -764,7 +764,7 @@ void __init zone_sizes_init(void)
+ }
+ 
+ DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) = {
+-      .active_mm = &init_mm,
++      .loaded_mm = &init_mm,
+       .state = 0,
+       .cr4 = ~0UL,    /* fail hard if we screw up cr4 shadow initialization */
+ };
+--- a/arch/x86/mm/tlb.c
++++ b/arch/x86/mm/tlb.c
+@@ -28,26 +28,25 @@
+  *    Implement flush IPI by CALL_FUNCTION_VECTOR, Alex Shi
+  */
+ 
+-/*
+- * We cannot call mmdrop() because we are in interrupt context,
+- * instead update mm->cpu_vm_mask.
+- */
+ void leave_mm(int cpu)
+ {
+-      struct mm_struct *active_mm = this_cpu_read(cpu_tlbstate.active_mm);
++      struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm);
++
++      /*
++       * It's plausible that we're in lazy TLB mode while our mm is init_mm.
++       * If so, our callers still expect us to flush the TLB, but there
++       * aren't any user TLB entries in init_mm to worry about.
++       *
++       * This needs to happen before any other sanity checks due to
++       * intel_idle's shenanigans.
++       */
++      if (loaded_mm == &init_mm)
++              return;
++
+       if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK)
+               BUG();
+-      if (cpumask_test_cpu(cpu, mm_cpumask(active_mm))) {
+-              cpumask_clear_cpu(cpu, mm_cpumask(active_mm));
+-              load_cr3(swapper_pg_dir);
+-              /*
+-               * This gets called in the idle path where RCU
+-               * functions differently.  Tracing normally
+-               * uses RCU, so we have to call the tracepoint
+-               * specially here.
+-               */
+-              trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
+-      }
++
++      switch_mm(NULL, &init_mm, NULL);
+ }
+ EXPORT_SYMBOL_GPL(leave_mm);
+ 
+@@ -65,108 +64,109 @@ void switch_mm_irqs_off(struct mm_struct
+                       struct task_struct *tsk)
+ {
+       unsigned cpu = smp_processor_id();
++      struct mm_struct *real_prev = this_cpu_read(cpu_tlbstate.loaded_mm);
+ 
+-      if (likely(prev != next)) {
+-              if (IS_ENABLED(CONFIG_VMAP_STACK)) {
+-                      /*
+-                       * If our current stack is in vmalloc space and isn't
+-                       * mapped in the new pgd, we'll double-fault.  Forcibly
+-                       * map it.
+-                       */
+-                      unsigned int stack_pgd_index = pgd_index(current_stack_pointer());
+-
+-                      pgd_t *pgd = next->pgd + stack_pgd_index;
+-
+-                      if (unlikely(pgd_none(*pgd)))
+-                              set_pgd(pgd, init_mm.pgd[stack_pgd_index]);
+-              }
++      /*
++       * NB: The scheduler will call us with prev == next when
++       * switching from lazy TLB mode to normal mode if active_mm
++       * isn't changing.  When this happens, there is no guarantee
++       * that CR3 (and hence cpu_tlbstate.loaded_mm) matches next.
++       *
++       * NB: leave_mm() calls us with prev == NULL and tsk == NULL.
++       */
+ 
+-              this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK);
+-              this_cpu_write(cpu_tlbstate.active_mm, next);
++      this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK);
+ 
+-              cpumask_set_cpu(cpu, mm_cpumask(next));
++      if (real_prev == next) {
++              /*
++               * There's nothing to do: we always keep the per-mm control
++               * regs in sync with cpu_tlbstate.loaded_mm.  Just
++               * sanity-check mm_cpumask.
++               */
++              if (WARN_ON_ONCE(!cpumask_test_cpu(cpu, mm_cpumask(next))))
++                      cpumask_set_cpu(cpu, mm_cpumask(next));
++              return;
++      }
+ 
++      if (IS_ENABLED(CONFIG_VMAP_STACK)) {
+               /*
+-               * Re-load page tables.
+-               *
+-               * This logic has an ordering constraint:
+-               *
+-               *  CPU 0: Write to a PTE for 'next'
+-               *  CPU 0: load bit 1 in mm_cpumask.  if nonzero, send IPI.
+-               *  CPU 1: set bit 1 in next's mm_cpumask
+-               *  CPU 1: load from the PTE that CPU 0 writes (implicit)
+-               *
+-               * We need to prevent an outcome in which CPU 1 observes
+-               * the new PTE value and CPU 0 observes bit 1 clear in
+-               * mm_cpumask.  (If that occurs, then the IPI will never
+-               * be sent, and CPU 0's TLB will contain a stale entry.)
+-               *
+-               * The bad outcome can occur if either CPU's load is
+-               * reordered before that CPU's store, so both CPUs must
+-               * execute full barriers to prevent this from happening.
+-               *
+-               * Thus, switch_mm needs a full barrier between the
+-               * store to mm_cpumask and any operation that could load
+-               * from next->pgd.  TLB fills are special and can happen
+-               * due to instruction fetches or for no reason at all,
+-               * and neither LOCK nor MFENCE orders them.
+-               * Fortunately, load_cr3() is serializing and gives the
+-               * ordering guarantee we need.
+-               *
++               * If our current stack is in vmalloc space and isn't
++               * mapped in the new pgd, we'll double-fault.  Forcibly
++               * map it.
+                */
+-              load_cr3(next->pgd);
++              unsigned int stack_pgd_index = pgd_index(current_stack_pointer());
+ 
+-              trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
++              pgd_t *pgd = next->pgd + stack_pgd_index;
+ 
+-              /* Stop flush ipis for the previous mm */
+-              cpumask_clear_cpu(cpu, mm_cpumask(prev));
++              if (unlikely(pgd_none(*pgd)))
++                      set_pgd(pgd, init_mm.pgd[stack_pgd_index]);
++      }
+ 
+-              /* Load per-mm CR4 state */
+-              load_mm_cr4(next);
++      this_cpu_write(cpu_tlbstate.loaded_mm, next);
+ 
+-#ifdef CONFIG_MODIFY_LDT_SYSCALL
+-              /*
+-               * Load the LDT, if the LDT is different.
+-               *
+-               * It's possible that prev->context.ldt doesn't match
+-               * the LDT register.  This can happen if leave_mm(prev)
+-               * was called and then modify_ldt changed
+-               * prev->context.ldt but suppressed an IPI to this CPU.
+-               * In this case, prev->context.ldt != NULL, because we
+-               * never set context.ldt to NULL while the mm still
+-               * exists.  That means that next->context.ldt !=
+-               * prev->context.ldt, because mms never share an LDT.
+-               */
+-              if (unlikely(prev->context.ldt != next->context.ldt))
+-                      load_mm_ldt(next);
+-#endif
+-      } else {
+-              this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK);
+-              BUG_ON(this_cpu_read(cpu_tlbstate.active_mm) != next);
++      WARN_ON_ONCE(cpumask_test_cpu(cpu, mm_cpumask(next)));
++      cpumask_set_cpu(cpu, mm_cpumask(next));
+ 
+-              if (!cpumask_test_cpu(cpu, mm_cpumask(next))) {
+-                      /*
+-                       * On established mms, the mm_cpumask is only changed
+-                       * from irq context, from ptep_clear_flush() while in
+-                       * lazy tlb mode, and here. Irqs are blocked during
+-                       * schedule, protecting us from simultaneous changes.
+-                       */
+-                      cpumask_set_cpu(cpu, mm_cpumask(next));
++      /*
++       * Re-load page tables.
++       *
++       * This logic has an ordering constraint:
++       *
++       *  CPU 0: Write to a PTE for 'next'
++       *  CPU 0: load bit 1 in mm_cpumask.  if nonzero, send IPI.
++       *  CPU 1: set bit 1 in next's mm_cpumask
++       *  CPU 1: load from the PTE that CPU 0 writes (implicit)
++       *
++       * We need to prevent an outcome in which CPU 1 observes
++       * the new PTE value and CPU 0 observes bit 1 clear in
++       * mm_cpumask.  (If that occurs, then the IPI will never
++       * be sent, and CPU 0's TLB will contain a stale entry.)
++       *
++       * The bad outcome can occur if either CPU's load is
++       * reordered before that CPU's store, so both CPUs must
++       * execute full barriers to prevent this from happening.
++       *
++       * Thus, switch_mm needs a full barrier between the
++       * store to mm_cpumask and any operation that could load
++       * from next->pgd.  TLB fills are special and can happen
++       * due to instruction fetches or for no reason at all,
++       * and neither LOCK nor MFENCE orders them.
++       * Fortunately, load_cr3() is serializing and gives the
++       * ordering guarantee we need.
++       */
++      load_cr3(next->pgd);
++
++      /*
++       * This gets called via leave_mm() in the idle path where RCU
++       * functions differently.  Tracing normally uses RCU, so we have to
++       * call the tracepoint specially here.
++       */
++      trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
++
++      /* Stop flush ipis for the previous mm */
++      WARN_ON_ONCE(!cpumask_test_cpu(cpu, mm_cpumask(real_prev)) &&
++                   real_prev != &init_mm);
++      cpumask_clear_cpu(cpu, mm_cpumask(real_prev));
+ 
+-                      /*
+-                       * We were in lazy tlb mode and leave_mm disabled
+-                       * tlb flush IPI delivery. We must reload CR3
+-                       * to make sure to use no freed page tables.
+-                       *
+-                       * As above, load_cr3() is serializing and orders TLB
+-                       * fills with respect to the mm_cpumask write.
+-                       */
+-                      load_cr3(next->pgd);
+-                      trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
+-                      load_mm_cr4(next);
+-                      load_mm_ldt(next);
+-              }
+-      }
++      /* Load per-mm CR4 state */
++      load_mm_cr4(next);
++
++#ifdef CONFIG_MODIFY_LDT_SYSCALL
++      /*
++       * Load the LDT, if the LDT is different.
++       *
++       * It's possible that prev->context.ldt doesn't match
++       * the LDT register.  This can happen if leave_mm(prev)
++       * was called and then modify_ldt changed
++       * prev->context.ldt but suppressed an IPI to this CPU.
++       * In this case, prev->context.ldt != NULL, because we
++       * never set context.ldt to NULL while the mm still
++       * exists.  That means that next->context.ldt !=
++       * prev->context.ldt, because mms never share an LDT.
++       */
++      if (unlikely(real_prev->context.ldt != next->context.ldt))
++              load_mm_ldt(next);
++#endif
+ }
+ 
+ /*
+@@ -246,7 +246,7 @@ static void flush_tlb_func_remote(void *
+ 
+       inc_irq_stat(irq_tlb_count);
+ 
+-      if (f->mm && f->mm != this_cpu_read(cpu_tlbstate.active_mm))
++      if (f->mm && f->mm != this_cpu_read(cpu_tlbstate.loaded_mm))
+               return;
+ 
+       count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
+@@ -337,7 +337,7 @@ void flush_tlb_mm_range(struct mm_struct
+               info.end = TLB_FLUSH_ALL;
+       }
+ 
+-      if (mm == current->active_mm)
++      if (mm == this_cpu_read(cpu_tlbstate.loaded_mm))
+               flush_tlb_func_local(&info, TLB_LOCAL_MM_SHOOTDOWN);
+       if (cpumask_any_but(mm_cpumask(mm), cpu) < nr_cpu_ids)
+               flush_tlb_others(mm_cpumask(mm), &info);
+--- a/arch/x86/xen/mmu.c
++++ b/arch/x86/xen/mmu.c
+@@ -998,37 +998,32 @@ static void xen_dup_mmap(struct mm_struc
+       spin_unlock(&mm->page_table_lock);
+ }
+ 
+-
+-#ifdef CONFIG_SMP
+-/* Another cpu may still have their %cr3 pointing at the pagetable, so
+-   we need to repoint it somewhere else before we can unpin it. */
+-static void drop_other_mm_ref(void *info)
++static void drop_mm_ref_this_cpu(void *info)
+ {
+       struct mm_struct *mm = info;
+-      struct mm_struct *active_mm;
+-
+-      active_mm = this_cpu_read(cpu_tlbstate.active_mm);
+ 
+-      if (active_mm == mm && this_cpu_read(cpu_tlbstate.state) != TLBSTATE_OK)
++      if (this_cpu_read(cpu_tlbstate.loaded_mm) == mm)
+               leave_mm(smp_processor_id());
+ 
+-      /* If this cpu still has a stale cr3 reference, then make sure
+-         it has been flushed. */
++      /*
++       * If this cpu still has a stale cr3 reference, then make sure
++       * it has been flushed.
++       */
+       if (this_cpu_read(xen_current_cr3) == __pa(mm->pgd))
+-              load_cr3(swapper_pg_dir);
++              xen_mc_flush();
+ }
+ 
++#ifdef CONFIG_SMP
++/*
++ * Another cpu may still have their %cr3 pointing at the pagetable, so
++ * we need to repoint it somewhere else before we can unpin it.
++ */
+ static void xen_drop_mm_ref(struct mm_struct *mm)
+ {
+       cpumask_var_t mask;
+       unsigned cpu;
+ 
+-      if (current->active_mm == mm) {
+-              if (current->mm == mm)
+-                      load_cr3(swapper_pg_dir);
+-              else
+-                      leave_mm(smp_processor_id());
+-      }
++      drop_mm_ref_this_cpu(mm);
+ 
+       /* Get the "official" set of cpus referring to our pagetable. */
+       if (!alloc_cpumask_var(&mask, GFP_ATOMIC)) {
+@@ -1036,31 +1031,31 @@ static void xen_drop_mm_ref(struct mm_st
+                       if (!cpumask_test_cpu(cpu, mm_cpumask(mm))
+                           && per_cpu(xen_current_cr3, cpu) != __pa(mm->pgd))
+                               continue;
+-                      smp_call_function_single(cpu, drop_other_mm_ref, mm, 1);
++                      smp_call_function_single(cpu, drop_mm_ref_this_cpu, mm, 1);
+               }
+               return;
+       }
+       cpumask_copy(mask, mm_cpumask(mm));
+ 
+-      /* It's possible that a vcpu may have a stale reference to our
+-         cr3, because its in lazy mode, and it hasn't yet flushed
+-         its set of pending hypercalls yet.  In this case, we can
+-         look at its actual current cr3 value, and force it to flush
+-         if needed. */
++      /*
++       * It's possible that a vcpu may have a stale reference to our
++       * cr3, because its in lazy mode, and it hasn't yet flushed
++       * its set of pending hypercalls yet.  In this case, we can
++       * look at its actual current cr3 value, and force it to flush
++       * if needed.
++       */
+       for_each_online_cpu(cpu) {
+               if (per_cpu(xen_current_cr3, cpu) == __pa(mm->pgd))
+                       cpumask_set_cpu(cpu, mask);
+       }
+ 
+-      if (!cpumask_empty(mask))
+-              smp_call_function_many(mask, drop_other_mm_ref, mm, 1);
++      smp_call_function_many(mask, drop_mm_ref_this_cpu, mm, 1);
+       free_cpumask_var(mask);
+ }
+ #else
+ static void xen_drop_mm_ref(struct mm_struct *mm)
+ {
+-      if (current->active_mm == mm)
+-              load_cr3(swapper_pg_dir);
++      drop_mm_ref_this_cpu(mm);
+ }
+ #endif
+ 
diff --git a/queue-4.9/x86-mm-use-new-merged-flush-logic-in-arch_tlbbatch_flush.patch b/queue-4.9/x86-mm-use-new-merged-flush-logic-in-arch_tlbbatch_flush.patch

new file mode 100644 (file)

index 0000000..459c28b
--- /dev/null
+++ b/queue-4.9/x86-mm-use-new-merged-flush-logic-in-arch_tlbbatch_flush.patch
@@ -0,0 +1,55 @@
+From 3f79e4c7c9c2f5c30751ea5c8dd9fd1d56b81947 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Sun, 28 May 2017 10:00:13 -0700
+Subject: x86/mm: Use new merged flush logic in arch_tlbbatch_flush()
+
+From: Andy Lutomirski <luto@kernel.org>
+
+commit 3f79e4c7c9c2f5c30751ea5c8dd9fd1d56b81947 upstream.
+
+Now there's only one copy of the local tlb flush logic for
+non-kernel pages on SMP kernels.
+
+The only functional change is that arch_tlbbatch_flush() will now
+leave_mm() on the local CPU if that CPU is in the batch and is in
+TLBSTATE_LAZY mode.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Arjan van de Ven <arjan@linux.intel.com>
+Cc: Borislav Petkov <bpetkov@suse.de>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Mel Gorman <mgorman@suse.de>
+Cc: Michal Hocko <mhocko@suse.com>
+Cc: Nadav Amit <nadav.amit@gmail.com>
+Cc: Nadav Amit <namit@vmware.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: linux-mm@kvack.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Eduardo Valentin <eduval@amazon.com>
+Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/mm/tlb.c |    8 ++------
+ 1 file changed, 2 insertions(+), 6 deletions(-)
+
+--- a/arch/x86/mm/tlb.c
++++ b/arch/x86/mm/tlb.c
+@@ -405,12 +405,8 @@ void arch_tlbbatch_flush(struct arch_tlb
+ 
+       int cpu = get_cpu();
+ 
+-      if (cpumask_test_cpu(cpu, &batch->cpumask)) {
+-              count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
+-              local_flush_tlb();
+-              trace_tlb_flush(TLB_LOCAL_SHOOTDOWN, TLB_FLUSH_ALL);
+-      }
+-
++      if (cpumask_test_cpu(cpu, &batch->cpumask))
++              flush_tlb_func_local(&info, TLB_LOCAL_SHOOTDOWN);
+       if (cpumask_any_but(&batch->cpumask, cpu) < nr_cpu_ids)
+               flush_tlb_others(&batch->cpumask, &info);
+       cpumask_clear(&batch->cpumask);
diff --git a/queue-4.9/x86-unify-tss_struct.patch b/queue-4.9/x86-unify-tss_struct.patch

new file mode 100644 (file)

index 0000000..e86f87a
--- /dev/null
+++ b/queue-4.9/x86-unify-tss_struct.patch
@@ -0,0 +1,35 @@
+From ca241c75037b32e0216a68e39ad2801d04fa1f87 Mon Sep 17 00:00:00 2001
+From: Glauber de Oliveira Costa <gcosta@redhat.com>
+Date: Wed, 30 Jan 2008 13:31:31 +0100
+Subject: x86: unify tss_struct
+
+From: Glauber de Oliveira Costa <gcosta@redhat.com>
+
+commit ca241c75037b32e0216a68e39ad2801d04fa1f87 upstream.
+
+Although slighly different, the tss_struct is very similar in x86_64 and
+i386. The really different part, which matchs the hardware vision of it, is
+now called x86_hw_tss, and each of the architectures provides yours.
+It's then used as a field in the outter tss_struct.
+
+Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Eduardo Valentin <eduval@amazon.com>
+Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/processor.h |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/x86/include/asm/processor.h
++++ b/arch/x86/include/asm/processor.h
+@@ -272,7 +272,7 @@ struct x86_hw_tss {
+       u16                     reserved5;
+       u16                     io_bitmap_base;
+ 
+-} __attribute__((packed)) ____cacheline_aligned;
++} __attribute__((packed));
+ #endif
+ 
+ /*
diff --git a/queue-4.9/xhci-plat-register-shutdown-for-xhci_plat.patch b/queue-4.9/xhci-plat-register-shutdown-for-xhci_plat.patch

new file mode 100644 (file)

index 0000000..cdaffeb
--- /dev/null
+++ b/queue-4.9/xhci-plat-register-shutdown-for-xhci_plat.patch
@@ -0,0 +1,33 @@
+From foo@baz Thu Dec 21 09:02:40 CET 2017
+From: Adam Wallis <awallis@codeaurora.org>
+Date: Tue, 28 Mar 2017 15:55:28 +0300
+Subject: xhci: plat: Register shutdown for xhci_plat
+
+From: Adam Wallis <awallis@codeaurora.org>
+
+
+[ Upstream commit b07c12517f2aed0add8ce18146bb426b14099392 ]
+
+Shutdown should be called for xhci_plat devices especially for
+situations where kexec might be used by stopping DMA
+transactions.
+
+Signed-off-by: Adam Wallis <awallis@codeaurora.org>
+Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Sasha Levin <alexander.levin@verizon.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/usb/host/xhci-plat.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/usb/host/xhci-plat.c
++++ b/drivers/usb/host/xhci-plat.c
+@@ -335,6 +335,7 @@ MODULE_DEVICE_TABLE(acpi, usb_xhci_acpi_
+ static struct platform_driver usb_xhci_driver = {
+       .probe  = xhci_plat_probe,
+       .remove = xhci_plat_remove,
++      .shutdown       = usb_hcd_platform_shutdown,
+       .driver = {
+               .name = "xhci-hcd",
+               .pm = DEV_PM_OPS,
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Thu, 21 Dec 2017 08:40:09 +0000 (09:40 +0100)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Thu, 21 Dec 2017 08:40:09 +0000 (09:40 +0100)
queue-4.9/alsa-hda-add-support-for-docking-station-for-hp-820-g2.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/alsa-hda-add-support-for-docking-station-for-hp-840-g3.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/arm-dma-mapping-disallow-dma_get_sgtable-for-non-kernel-managed-memory.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/arm-dts-am335x-evmsk-adjust-mmc2-param-to-allow-suspend.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/arm-dts-ti-fix-pci-bus-dtc-warnings.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/arm-kprobes-align-stack-to-8-bytes-in-test-code.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/arm-kprobes-fix-the-return-address-of-multiple-kretprobes.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/asoc-img-parallel-out-add-pm_runtime_get-put-to-set_fmt-callback.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/asoc-sti-fix-reader-substream-pointer-set.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/backlight-pwm_bl-fix-overflow-condition.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/bna-avoid-writing-uninitialized-data-into-hw-registers.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/bna-integer-overflow-bug-in-debugfs.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/bnxt_en-fix-null-pointer-dereference-in-reopen-failure-path.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/btrfs-fix-an-integer-overflow-check.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/clk-sunxi-ng-sun6i-rename-hdmi-ddc-clock-to-avoid-name-collision.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/cpufreq-fix-creation-of-symbolic-links-to-policy-directories.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/cpuidle-fix-broadcast-control-when-broadcast-can-not-be-entered.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/cpuidle-powernv-pass-correct-drv-cpumask-for-registration.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/cpuidle-validate-cpu_dev-in-cpuidle_add_sysfs.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/crypto-crypto4xx-increase-context-and-scatter-ring-buffer-elements.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/crypto-deadlock-between-crypto_alg_sem-rtnl_mutex-genl_mutex.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/fm10k-ensure-we-process-sm-mbx-when-processing-vf-mbx.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/fm10k-fix-mis-ordered-parameters-in-declaration-for-.ndo_set_vf_bw.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/hid-corsair-add-driver-scimitar-pro-rgb-gaming-mouse-1b1c-1b3e-support-to-hid-corsair.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/hid-corsair-support-for-k65-k70-rapidfire-and-scimitar-pro-rgb.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/hid-xinmo-fix-for-out-of-range-for-tht-2p-arcade-controller.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/hwmon-asus_atk0110-fix-uninitialized-data-access.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/hwmon-max31790-set-correct-pwm-value.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/i2c-mux-pca954x-add-missing-pca9546-definition-to-chip_desc.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/i40e-do-not-enable-napi-on-q_vectors-that-have-no-rings.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/i40iw-receive-netdev-events-post-inet_notifier-state.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/ib-core-protect-against-self-requeue-of-a-cq-work-item.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/ib-rxe-check-for-allocation-failure-on-elem.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/ib-rxe-double-free-on-error.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/ib-rxe-increment-msn-only-when-completing-a-request.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/igb-check-memory-allocation-failure.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/inet-frag-release-spinlock-before-calling-icmp_send.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/infiniband-fix-alignment-of-mmap-cookies-to-support-vipt-caching.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/iommu-exynos-workaround-flpd-cache-flush-issues-for-sysmmu-v5.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/irda-vlsi_ir-fix-check-for-dma-mapping-errors.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/isdn-kcapi-avoid-uninitialized-data.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/ixgbe-fix-use-of-uninitialized-padding.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/kvm-fix-usage-of-uninit-spinlock-in-avic_vm_destroy.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/kvm-mm-account-kvm-related-kmem-slabs-to-kmemcg.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/kvm-nvmx-fix-host_cr3-host_cr4-cache.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/kvm-pci-assign-do-not-map-smm-memory-slot-pages-in-vt-d-page-tables.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/kvm-vmx-fix-enable-vpid-conditions.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/kvm-vmx-flush-tlb-when-the-apic-access-address-changes.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/kvm-x86-correct-async-page-present-tracepoint.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/mm-x86-mm-make-the-batched-unmap-tlb-flush-api-more-generic.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/nbd-set-queue-timeout-properly.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/net-do-not-allow-negative-values-for-busy_read-and-busy_poll-sysctl-interfaces.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/net-ipconfig-fix-ic_close_devs-use-after-free.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/net-ipv6-send-ns-for-dad-when-link-operationally-up.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/net-moxa-fix-tx-overrun-memory-leak.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/net-phy-at803x-change-error-to-einval-for-invalid-mac.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/net-qmi_wwan-add-usb-ids-for-mdm6600-modem-on-motorola-droid-4.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/netfilter-nf_nat_snmp-fix-panic-when-snmp_trap_helper-fails-to-register.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/netfilter-nfnetlink_queue-fix-secctx-memory-leak.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/netfilter-nfnl_cthelper-fix-a-race-when-walk-the-nf_ct_helper_hash-table.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/netfilter-nfnl_cthelper-fix-memory-leak.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/netfilter-nfnl_cthelper-fix-runtime-expectation-policy-updates.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/nvme-loop-handle-cpu-unplug-when-re-establishing-the-controller.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/pci-aer-report-non-fatal-errors-only-to-the-affected-endpoint.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/pci-avoid-bus-reset-if-bridge-itself-is-broken.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/pci-create-sr-iov-virtfn-physfn-links-before-attaching-driver.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/percpu-don-t-forget-to-free-the-temporary-struct-pcpu_alloc_info.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/pinctrl-st-add-irq_request-release_resources-callbacks.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/pm-opp-move-error-message-to-debug-level.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/r8152-fix-the-list-rx_done-may-be-used-without-initialization.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/r8152-fix-the-rx-early-size-of-rtl8153.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/r8152-prevent-the-driver-from-transmitting-packets-with-carrier-off.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/rdma-iser-fix-possible-mr-leak-on-device-removal-event.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/rtc-pl031-make-interrupt-optional.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/rtc-set-the-alarm-to-the-next-expiring-timer.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/s390-qeth-no-eth-header-for-outbound-af_iucv.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/s390-qeth-size-calculation-outbound-buffers.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/sch_dsmark-fix-invalid-skb_cow-usage.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/scsi-cxgb4i-fix-tx-skb-leak.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/scsi-lpfc-fix-pt2pt-prli-reject.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/scsi-lpfc-fix-secure-firmware-updates.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/scsi-lpfc-plogi-failures-during-npiv-testing.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/scsi-mpt3sas-fix-io-error-occurs-on-pulling-out-a-drive-from-raid1-volume-created-on-two-sata-drive.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/sctp-out_qlen-should-be-updated-when-pruning-unsent-queue.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/series		patch \| blob \| blame \| history
queue-4.9/staging-greybus-light-release-memory-obtained-by-kasprintf.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/tcp-fix-under-evaluated-ssthresh-in-tcp-vegas.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/tipc-fix-nametbl-deadlock-at-tipc_nametbl_unsubscribe.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/tracing-exclude-generic-fields-from-histograms.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/usb-gadget-f_uvc-sanity-check-wmaxpacketsize-for-superspeed.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/usb-gadget-udc-remove-pointer-dereference-after-free.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/vfio-pci-virtualize-maximum-payload-size.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/vhost-vsock-add-pkt-cancel-capability.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/virtio-balloon-use-actual-number-of-stats-for-stats-queue-buffers.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/virtio_balloon-prevent-uninitialized-variable-use.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/vsock-cancel-packets-when-failing-to-connect.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/vsock-track-pkt-owner-vsock.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/x86-entry-unwind-create-stack-frames-for-saved-interrupt-registers.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/x86-kvm-vmx-defer-tr-reload-after-vm-exit.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/x86-kvm-vmx-remove-unused-variable-in-segment_base.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/x86-kvm-vmx-simplify-segment_base.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/x86-mm-be-more-consistent-wrt-page_shift-vs-page_size-in-tlb-flush-code.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/x86-mm-change-the-leave_mm-condition-for-local-tlb-flushes.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/x86-mm-kvm-teach-kvm-s-vmx-code-that-cr3-isn-t-a-constant.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/x86-mm-pass-flush_tlb_info-to-flush_tlb_others-etc.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/x86-mm-reduce-indentation-in-flush_tlb_func.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/x86-mm-refactor-flush_tlb_mm_range-to-merge-local-and-remote-cases.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/x86-mm-reimplement-flush_tlb_page-using-flush_tlb_mm_range.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/x86-mm-remove-the-up-asm-tlbflush.h-code-always-use-the-formerly-smp-code.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/x86-mm-rework-lazy-tlb-to-track-the-actual-loaded-mm.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/x86-mm-use-new-merged-flush-logic-in-arch_tlbbatch_flush.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/x86-unify-tss_struct.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/xhci-plat-register-shutdown-for-xhci_plat.patch	[new file with mode: 0644]	patch \| blob