]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
4.14-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 27 Dec 2017 15:25:44 +0000 (16:25 +0100)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 27 Dec 2017 15:25:44 +0000 (16:25 +0100)
added patches:
acpi-apei-erst-fix-missing-error-handling-in-erst_reader.patch
acpi-nfit-fix-health-event-notification.patch
alsa-hda-add-vendor-id-for-cannonlake-hdmi-codec.patch
alsa-hda-realtek-fix-dell-aio-lineout-issue.patch
alsa-rawmidi-avoid-racy-info-ioctl-via-ctl-device.patch
alsa-usb-audio-add-native-dsd-support-for-esoteric-d-05x.patch
alsa-usb-audio-fix-the-missing-ctl-name-suffix-at-parsing-su.patch
arch-mm-allow-arch_dup_mmap-to-fail.patch
arm64-kvm-prevent-restoring-stale-pmscr_el1-for-vcpu.patch
block-throttle-avoid-double-charge.patch
block-unalign-call_single_data-in-struct-request.patch
clk-sunxi-sun9i-mmc-implement-reset-callback-for-reset-controls.patch
crypto-af_alg-fix-race-accessing-cipher-request.patch
crypto-af_alg-wait-for-data-at-beginning-of-recvmsg.patch
crypto-mcryptd-protect-the-per-cpu-queue-with-a-lock.patch
crypto-skcipher-set-walk.iv-for-zero-length-inputs.patch
drm-i915-flush-pending-gtt-writes-before-unbinding.patch
drm-sun4i-fix-error-path-handling.patch
init-invoke-init_espfix_bsp-from-mm_init.patch
kvm-arm-arm64-fix-hyp-unmapping-going-off-limits.patch
kvm-mmu-fix-infinite-loop-when-there-is-no-available-mmu-page.patch
kvm-ppc-book3s-fix-xive-migration-of-pending-interrupts.patch
kvm-ppc-book3s-hv-fix-pending_pri-value-in-kvmppc_xive_get_icp.patch
kvm-x86-fix-load-rflags-w-o-the-fixed-bit.patch
kvm-x86-fix-rsm-when-pcid-is-non-zero.patch
libnvdimm-btt-fix-an-incompatibility-in-the-log-layout.patch
libnvdimm-dax-fix-1gb-aligned-namespaces-vs-physical-misalignment.patch
libnvdimm-pfn-fix-start_pad-handling-for-aligned-namespaces.patch
mfd-cros-ec-spi-don-t-send-first-message-too-soon.patch
mfd-twl4030-audio-fix-sibling-node-lookup.patch
mfd-twl6040-fix-child-node-lookup.patch
net-mvneta-clear-interface-link-status-on-port-disable.patch
net-mvneta-eliminate-wrong-call-to-handle-rx-descriptor-error.patch
net-mvneta-use-proper-rxq_number-in-loop-on-rx-queues.patch
parisc-align-os_hpmc_size-on-word-boundary.patch
parisc-fix-indenting-in-puts.patch
parisc-hide-diva-built-in-serial-aux-and-graphics-card.patch
pci-pm-force-devices-to-d0-in-pci_pm_thaw_noirq.patch
pinctrl-cherryview-mask-all-interrupts-on-intel_strago-based-systems.patch
powerpc-perf-dereference-bhrb-entries-safely.patch
revert-ipmi_si-fix-memory-leak-on-new_smi.patch
revert-parisc-re-enable-interrupts-early.patch
spi-a3700-fix-clk-prescaling-for-coefficient-over-15.patch
spi-xilinx-detect-stall-with-unknown-commands.patch
x86-cpu_entry_area-move-it-out-of-the-fixmap.patch
x86-cpu_entry_area-move-it-to-a-separate-unit.patch
x86-cpu_entry_area-prevent-wraparound-in-setup_cpu_entry_area_ptes-on-32bit.patch
x86-doc-remove-obvious-weirdnesses-from-the-x86-mm-layout-documentation.patch
x86-entry-rename-sysenter_stack-to-cpu_entry_area_entry_stack.patch
x86-kconfig-limit-nr_cpus-on-32-bit-to-a-sane-amount.patch
x86-ldt-prevent-ldt-inheritance-on-exec.patch
x86-ldt-rework-locking.patch
x86-microcode-dont-abuse-the-tlb-flush-interface.patch
x86-mm-64-improve-the-memory-map-documentation.patch
x86-mm-add-comments-to-clarify-which-tlb-flush-functions-are-supposed-to-flush-what.patch
x86-mm-create-asm-invpcid.h.patch
x86-mm-dump_pagetables-check-page_present-for-real.patch
x86-mm-dump_pagetables-make-the-address-hints-correct-and-readable.patch
x86-mm-move-the-cr3-construction-functions-to-tlbflush.h.patch
x86-mm-put-mmu-to-hardware-asid-translation-in-one-place.patch
x86-mm-remove-hard-coded-asid-limit-checks.patch
x86-mm-remove-superfluous-barriers.patch
x86-mm-use-__flush_tlb_one-for-kernel-memory.patch
x86-uv-use-the-right-tlb-flush-api.patch
x86-vsyscall-64-explicitly-set-_page_user-in-the-pagetable-hierarchy.patch
x86-vsyscall-64-warn-and-fail-vsyscall-emulation-in-native-mode.patch

67 files changed:
queue-4.14/acpi-apei-erst-fix-missing-error-handling-in-erst_reader.patch [new file with mode: 0644]
queue-4.14/acpi-nfit-fix-health-event-notification.patch [new file with mode: 0644]
queue-4.14/alsa-hda-add-vendor-id-for-cannonlake-hdmi-codec.patch [new file with mode: 0644]
queue-4.14/alsa-hda-realtek-fix-dell-aio-lineout-issue.patch [new file with mode: 0644]
queue-4.14/alsa-rawmidi-avoid-racy-info-ioctl-via-ctl-device.patch [new file with mode: 0644]
queue-4.14/alsa-usb-audio-add-native-dsd-support-for-esoteric-d-05x.patch [new file with mode: 0644]
queue-4.14/alsa-usb-audio-fix-the-missing-ctl-name-suffix-at-parsing-su.patch [new file with mode: 0644]
queue-4.14/arch-mm-allow-arch_dup_mmap-to-fail.patch [new file with mode: 0644]
queue-4.14/arm64-kvm-prevent-restoring-stale-pmscr_el1-for-vcpu.patch [new file with mode: 0644]
queue-4.14/block-throttle-avoid-double-charge.patch [new file with mode: 0644]
queue-4.14/block-unalign-call_single_data-in-struct-request.patch [new file with mode: 0644]
queue-4.14/clk-sunxi-sun9i-mmc-implement-reset-callback-for-reset-controls.patch [new file with mode: 0644]
queue-4.14/crypto-af_alg-fix-race-accessing-cipher-request.patch [new file with mode: 0644]
queue-4.14/crypto-af_alg-wait-for-data-at-beginning-of-recvmsg.patch [new file with mode: 0644]
queue-4.14/crypto-mcryptd-protect-the-per-cpu-queue-with-a-lock.patch [new file with mode: 0644]
queue-4.14/crypto-skcipher-set-walk.iv-for-zero-length-inputs.patch [new file with mode: 0644]
queue-4.14/drm-i915-flush-pending-gtt-writes-before-unbinding.patch [new file with mode: 0644]
queue-4.14/drm-sun4i-fix-error-path-handling.patch [new file with mode: 0644]
queue-4.14/init-invoke-init_espfix_bsp-from-mm_init.patch [new file with mode: 0644]
queue-4.14/kvm-arm-arm64-fix-hyp-unmapping-going-off-limits.patch [new file with mode: 0644]
queue-4.14/kvm-mmu-fix-infinite-loop-when-there-is-no-available-mmu-page.patch [new file with mode: 0644]
queue-4.14/kvm-ppc-book3s-fix-xive-migration-of-pending-interrupts.patch [new file with mode: 0644]
queue-4.14/kvm-ppc-book3s-hv-fix-pending_pri-value-in-kvmppc_xive_get_icp.patch [new file with mode: 0644]
queue-4.14/kvm-x86-fix-load-rflags-w-o-the-fixed-bit.patch [new file with mode: 0644]
queue-4.14/kvm-x86-fix-rsm-when-pcid-is-non-zero.patch [new file with mode: 0644]
queue-4.14/libnvdimm-btt-fix-an-incompatibility-in-the-log-layout.patch [new file with mode: 0644]
queue-4.14/libnvdimm-dax-fix-1gb-aligned-namespaces-vs-physical-misalignment.patch [new file with mode: 0644]
queue-4.14/libnvdimm-pfn-fix-start_pad-handling-for-aligned-namespaces.patch [new file with mode: 0644]
queue-4.14/mfd-cros-ec-spi-don-t-send-first-message-too-soon.patch [new file with mode: 0644]
queue-4.14/mfd-twl4030-audio-fix-sibling-node-lookup.patch [new file with mode: 0644]
queue-4.14/mfd-twl6040-fix-child-node-lookup.patch [new file with mode: 0644]
queue-4.14/net-mvneta-clear-interface-link-status-on-port-disable.patch [new file with mode: 0644]
queue-4.14/net-mvneta-eliminate-wrong-call-to-handle-rx-descriptor-error.patch [new file with mode: 0644]
queue-4.14/net-mvneta-use-proper-rxq_number-in-loop-on-rx-queues.patch [new file with mode: 0644]
queue-4.14/parisc-align-os_hpmc_size-on-word-boundary.patch [new file with mode: 0644]
queue-4.14/parisc-fix-indenting-in-puts.patch [new file with mode: 0644]
queue-4.14/parisc-hide-diva-built-in-serial-aux-and-graphics-card.patch [new file with mode: 0644]
queue-4.14/pci-pm-force-devices-to-d0-in-pci_pm_thaw_noirq.patch [new file with mode: 0644]
queue-4.14/pinctrl-cherryview-mask-all-interrupts-on-intel_strago-based-systems.patch [new file with mode: 0644]
queue-4.14/powerpc-perf-dereference-bhrb-entries-safely.patch [new file with mode: 0644]
queue-4.14/revert-ipmi_si-fix-memory-leak-on-new_smi.patch [new file with mode: 0644]
queue-4.14/revert-parisc-re-enable-interrupts-early.patch [new file with mode: 0644]
queue-4.14/series
queue-4.14/spi-a3700-fix-clk-prescaling-for-coefficient-over-15.patch [new file with mode: 0644]
queue-4.14/spi-xilinx-detect-stall-with-unknown-commands.patch [new file with mode: 0644]
queue-4.14/x86-cpu_entry_area-move-it-out-of-the-fixmap.patch [new file with mode: 0644]
queue-4.14/x86-cpu_entry_area-move-it-to-a-separate-unit.patch [new file with mode: 0644]
queue-4.14/x86-cpu_entry_area-prevent-wraparound-in-setup_cpu_entry_area_ptes-on-32bit.patch [new file with mode: 0644]
queue-4.14/x86-doc-remove-obvious-weirdnesses-from-the-x86-mm-layout-documentation.patch [new file with mode: 0644]
queue-4.14/x86-entry-rename-sysenter_stack-to-cpu_entry_area_entry_stack.patch [new file with mode: 0644]
queue-4.14/x86-kconfig-limit-nr_cpus-on-32-bit-to-a-sane-amount.patch [new file with mode: 0644]
queue-4.14/x86-ldt-prevent-ldt-inheritance-on-exec.patch [new file with mode: 0644]
queue-4.14/x86-ldt-rework-locking.patch [new file with mode: 0644]
queue-4.14/x86-microcode-dont-abuse-the-tlb-flush-interface.patch [new file with mode: 0644]
queue-4.14/x86-mm-64-improve-the-memory-map-documentation.patch [new file with mode: 0644]
queue-4.14/x86-mm-add-comments-to-clarify-which-tlb-flush-functions-are-supposed-to-flush-what.patch [new file with mode: 0644]
queue-4.14/x86-mm-create-asm-invpcid.h.patch [new file with mode: 0644]
queue-4.14/x86-mm-dump_pagetables-check-page_present-for-real.patch [new file with mode: 0644]
queue-4.14/x86-mm-dump_pagetables-make-the-address-hints-correct-and-readable.patch [new file with mode: 0644]
queue-4.14/x86-mm-move-the-cr3-construction-functions-to-tlbflush.h.patch [new file with mode: 0644]
queue-4.14/x86-mm-put-mmu-to-hardware-asid-translation-in-one-place.patch [new file with mode: 0644]
queue-4.14/x86-mm-remove-hard-coded-asid-limit-checks.patch [new file with mode: 0644]
queue-4.14/x86-mm-remove-superfluous-barriers.patch [new file with mode: 0644]
queue-4.14/x86-mm-use-__flush_tlb_one-for-kernel-memory.patch [new file with mode: 0644]
queue-4.14/x86-uv-use-the-right-tlb-flush-api.patch [new file with mode: 0644]
queue-4.14/x86-vsyscall-64-explicitly-set-_page_user-in-the-pagetable-hierarchy.patch [new file with mode: 0644]
queue-4.14/x86-vsyscall-64-warn-and-fail-vsyscall-emulation-in-native-mode.patch [new file with mode: 0644]

diff --git a/queue-4.14/acpi-apei-erst-fix-missing-error-handling-in-erst_reader.patch b/queue-4.14/acpi-apei-erst-fix-missing-error-handling-in-erst_reader.patch
new file mode 100644 (file)
index 0000000..82dd5ac
--- /dev/null
@@ -0,0 +1,53 @@
+From bb82e0b4a7e96494f0c1004ce50cec3d7b5fb3d1 Mon Sep 17 00:00:00 2001
+From: Takashi Iwai <tiwai@suse.de>
+Date: Thu, 14 Dec 2017 13:31:16 +0100
+Subject: ACPI: APEI / ERST: Fix missing error handling in erst_reader()
+
+From: Takashi Iwai <tiwai@suse.de>
+
+commit bb82e0b4a7e96494f0c1004ce50cec3d7b5fb3d1 upstream.
+
+The commit f6f828513290 ("pstore: pass allocated memory region back to
+caller") changed the check of the return value from erst_read() in
+erst_reader() in the following way:
+
+        if (len == -ENOENT)
+                goto skip;
+-       else if (len < 0) {
+-               rc = -1;
++       else if (len < sizeof(*rcd)) {
++               rc = -EIO;
+                goto out;
+
+This introduced another bug: since the comparison with sizeof() is
+cast to unsigned, a negative len value doesn't hit any longer.
+As a result, when an error is returned from erst_read(), the code
+falls through, and it may eventually lead to some weird thing like
+memory corruption.
+
+This patch adds the negative error value check more explicitly for
+addressing the issue.
+
+Fixes: f6f828513290 (pstore: pass allocated memory region back to caller)
+Tested-by: Jerry Tang <jtang@suse.com>
+Signed-off-by: Takashi Iwai <tiwai@suse.de>
+Acked-by: Kees Cook <keescook@chromium.org>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/acpi/apei/erst.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/acpi/apei/erst.c
++++ b/drivers/acpi/apei/erst.c
+@@ -1007,7 +1007,7 @@ skip:
+       /* The record may be cleared by others, try read next record */
+       if (len == -ENOENT)
+               goto skip;
+-      else if (len < sizeof(*rcd)) {
++      else if (len < 0 || len < sizeof(*rcd)) {
+               rc = -EIO;
+               goto out;
+       }
diff --git a/queue-4.14/acpi-nfit-fix-health-event-notification.patch b/queue-4.14/acpi-nfit-fix-health-event-notification.patch
new file mode 100644 (file)
index 0000000..739f718
--- /dev/null
@@ -0,0 +1,60 @@
+From adf6895754e2503d994a765535fd1813f8834674 Mon Sep 17 00:00:00 2001
+From: Dan Williams <dan.j.williams@intel.com>
+Date: Thu, 30 Nov 2017 19:42:52 -0800
+Subject: acpi, nfit: fix health event notification
+
+From: Dan Williams <dan.j.williams@intel.com>
+
+commit adf6895754e2503d994a765535fd1813f8834674 upstream.
+
+Integration testing with a BIOS that generates injected health event
+notifications fails to communicate those events to userspace. The nfit
+driver neglects to link the ACPI DIMM device with the necessary driver
+data so acpi_nvdimm_notify() fails this lookup:
+
+        nfit_mem = dev_get_drvdata(dev);
+        if (nfit_mem && nfit_mem->flags_attr)
+                sysfs_notify_dirent(nfit_mem->flags_attr);
+
+Add the necessary linkage when installing the notification handler and
+clean it up when the nfit driver instance is torn down.
+
+Cc: Toshi Kani <toshi.kani@hpe.com>
+Cc: Vishal Verma <vishal.l.verma@intel.com>
+Fixes: ba9c8dd3c222 ("acpi, nfit: add dimm device notification support")
+Reported-by: Daniel Osawa <daniel.k.osawa@intel.com>
+Tested-by: Daniel Osawa <daniel.k.osawa@intel.com>
+Signed-off-by: Dan Williams <dan.j.williams@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/acpi/nfit/core.c |    9 ++++++++-
+ 1 file changed, 8 insertions(+), 1 deletion(-)
+
+--- a/drivers/acpi/nfit/core.c
++++ b/drivers/acpi/nfit/core.c
+@@ -1457,6 +1457,11 @@ static int acpi_nfit_add_dimm(struct acp
+                               dev_name(&adev_dimm->dev));
+               return -ENXIO;
+       }
++      /*
++       * Record nfit_mem for the notification path to track back to
++       * the nfit sysfs attributes for this dimm device object.
++       */
++      dev_set_drvdata(&adev_dimm->dev, nfit_mem);
+       /*
+        * Until standardization materializes we need to consider 4
+@@ -1516,9 +1521,11 @@ static void shutdown_dimm_notify(void *d
+                       sysfs_put(nfit_mem->flags_attr);
+                       nfit_mem->flags_attr = NULL;
+               }
+-              if (adev_dimm)
++              if (adev_dimm) {
+                       acpi_remove_notify_handler(adev_dimm->handle,
+                                       ACPI_DEVICE_NOTIFY, acpi_nvdimm_notify);
++                      dev_set_drvdata(&adev_dimm->dev, NULL);
++              }
+       }
+       mutex_unlock(&acpi_desc->init_mutex);
+ }
diff --git a/queue-4.14/alsa-hda-add-vendor-id-for-cannonlake-hdmi-codec.patch b/queue-4.14/alsa-hda-add-vendor-id-for-cannonlake-hdmi-codec.patch
new file mode 100644 (file)
index 0000000..ee0e8f7
--- /dev/null
@@ -0,0 +1,44 @@
+From 2b4584d00a6bc02b63ab3c7213060d41a74bdff1 Mon Sep 17 00:00:00 2001
+From: Guneshwor Singh <guneshwor.o.singh@intel.com>
+Date: Thu, 7 Dec 2017 18:06:20 +0530
+Subject: ALSA: hda - Add vendor id for Cannonlake HDMI codec
+
+From: Guneshwor Singh <guneshwor.o.singh@intel.com>
+
+commit 2b4584d00a6bc02b63ab3c7213060d41a74bdff1 upstream.
+
+Cannonlake HDMI codec has the same nid as Geminilake. This adds the
+codec entry for it.
+
+Signed-off-by: Guneshwor Singh <guneshwor.o.singh@intel.com>
+Signed-off-by: Takashi Iwai <tiwai@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ sound/pci/hda/patch_hdmi.c |    6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+--- a/sound/pci/hda/patch_hdmi.c
++++ b/sound/pci/hda/patch_hdmi.c
+@@ -55,10 +55,11 @@ MODULE_PARM_DESC(static_hdmi_pcm, "Don't
+ #define is_kabylake(codec) ((codec)->core.vendor_id == 0x8086280b)
+ #define is_geminilake(codec) (((codec)->core.vendor_id == 0x8086280d) || \
+                               ((codec)->core.vendor_id == 0x80862800))
++#define is_cannonlake(codec) ((codec)->core.vendor_id == 0x8086280c)
+ #define is_haswell_plus(codec) (is_haswell(codec) || is_broadwell(codec) \
+                               || is_skylake(codec) || is_broxton(codec) \
+-                              || is_kabylake(codec)) || is_geminilake(codec)
+-
++                              || is_kabylake(codec)) || is_geminilake(codec) \
++                              || is_cannonlake(codec)
+ #define is_valleyview(codec) ((codec)->core.vendor_id == 0x80862882)
+ #define is_cherryview(codec) ((codec)->core.vendor_id == 0x80862883)
+ #define is_valleyview_plus(codec) (is_valleyview(codec) || is_cherryview(codec))
+@@ -3841,6 +3842,7 @@ HDA_CODEC_ENTRY(0x80862808, "Broadwell H
+ HDA_CODEC_ENTRY(0x80862809, "Skylake HDMI",   patch_i915_hsw_hdmi),
+ HDA_CODEC_ENTRY(0x8086280a, "Broxton HDMI",   patch_i915_hsw_hdmi),
+ HDA_CODEC_ENTRY(0x8086280b, "Kabylake HDMI",  patch_i915_hsw_hdmi),
++HDA_CODEC_ENTRY(0x8086280c, "Cannonlake HDMI",        patch_i915_glk_hdmi),
+ HDA_CODEC_ENTRY(0x8086280d, "Geminilake HDMI",        patch_i915_glk_hdmi),
+ HDA_CODEC_ENTRY(0x80862800, "Geminilake HDMI",        patch_i915_glk_hdmi),
+ HDA_CODEC_ENTRY(0x80862880, "CedarTrail HDMI",        patch_generic_hdmi),
diff --git a/queue-4.14/alsa-hda-realtek-fix-dell-aio-lineout-issue.patch b/queue-4.14/alsa-hda-realtek-fix-dell-aio-lineout-issue.patch
new file mode 100644 (file)
index 0000000..6560d2c
--- /dev/null
@@ -0,0 +1,95 @@
+From 9226665159f0367ad08bc7d5dd194aeadb90316f Mon Sep 17 00:00:00 2001
+From: Kailang Yang <kailang@realtek.com>
+Date: Thu, 14 Dec 2017 15:28:58 +0800
+Subject: ALSA: hda/realtek - Fix Dell AIO LineOut issue
+
+From: Kailang Yang <kailang@realtek.com>
+
+commit 9226665159f0367ad08bc7d5dd194aeadb90316f upstream.
+
+Dell AIO had LineOut jack.
+Add LineOut verb into this patch.
+
+[ Additional notes:
+  the ALC274 codec seems requiring the fixed pin / DAC connections for
+  HP / line-out pins for enabling EQ for speakers; i.e. the HP / LO
+  pins expect to be connected with NID 0x03 while keeping the speaker
+  with NID 0x02.  However, by adding a new line-out pin, the
+  auto-parser assigns the NID 0x02 for HP/LO pins as primary outputs.
+  As an easy workaround, we provide the preferred_pairs[] to map
+  forcibly for these pins. -- tiwai ]
+
+Fixes: 75ee94b20b46 ("ALSA: hda - fix headset mic problem for Dell machines with alc274")
+Signed-off-by: Kailang Yang <kailang@realtek.com>
+Signed-off-by: Takashi Iwai <tiwai@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ sound/pci/hda/patch_realtek.c |   35 ++++++++++++++++++++++++++++++++++-
+ 1 file changed, 34 insertions(+), 1 deletion(-)
+
+--- a/sound/pci/hda/patch_realtek.c
++++ b/sound/pci/hda/patch_realtek.c
+@@ -5162,6 +5162,22 @@ static void alc233_alc662_fixup_lenovo_d
+       }
+ }
++/* Forcibly assign NID 0x03 to HP/LO while NID 0x02 to SPK for EQ */
++static void alc274_fixup_bind_dacs(struct hda_codec *codec,
++                                  const struct hda_fixup *fix, int action)
++{
++      struct alc_spec *spec = codec->spec;
++      static hda_nid_t preferred_pairs[] = {
++              0x21, 0x03, 0x1b, 0x03, 0x16, 0x02,
++              0
++      };
++
++      if (action != HDA_FIXUP_ACT_PRE_PROBE)
++              return;
++
++      spec->gen.preferred_dacs = preferred_pairs;
++}
++
+ /* for hda_fixup_thinkpad_acpi() */
+ #include "thinkpad_helper.c"
+@@ -5279,6 +5295,8 @@ enum {
+       ALC233_FIXUP_LENOVO_MULTI_CODECS,
+       ALC294_FIXUP_LENOVO_MIC_LOCATION,
+       ALC700_FIXUP_INTEL_REFERENCE,
++      ALC274_FIXUP_DELL_BIND_DACS,
++      ALC274_FIXUP_DELL_AIO_LINEOUT_VERB,
+ };
+ static const struct hda_fixup alc269_fixups[] = {
+@@ -6089,6 +6107,21 @@ static const struct hda_fixup alc269_fix
+                       {}
+               }
+       },
++      [ALC274_FIXUP_DELL_BIND_DACS] = {
++              .type = HDA_FIXUP_FUNC,
++              .v.func = alc274_fixup_bind_dacs,
++              .chained = true,
++              .chain_id = ALC269_FIXUP_DELL1_MIC_NO_PRESENCE
++      },
++      [ALC274_FIXUP_DELL_AIO_LINEOUT_VERB] = {
++              .type = HDA_FIXUP_PINS,
++              .v.pins = (const struct hda_pintbl[]) {
++                      { 0x1b, 0x0401102f },
++                      { }
++              },
++              .chained = true,
++              .chain_id = ALC274_FIXUP_DELL_BIND_DACS
++      },
+ };
+ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
+@@ -6550,7 +6583,7 @@ static const struct snd_hda_pin_quirk al
+               {0x14, 0x90170110},
+               {0x1b, 0x90a70130},
+               {0x21, 0x03211020}),
+-      SND_HDA_PIN_QUIRK(0x10ec0274, 0x1028, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE,
++      SND_HDA_PIN_QUIRK(0x10ec0274, 0x1028, "Dell", ALC274_FIXUP_DELL_AIO_LINEOUT_VERB,
+               {0x12, 0xb7a60130},
+               {0x13, 0xb8a61140},
+               {0x16, 0x90170110},
diff --git a/queue-4.14/alsa-rawmidi-avoid-racy-info-ioctl-via-ctl-device.patch b/queue-4.14/alsa-rawmidi-avoid-racy-info-ioctl-via-ctl-device.patch
new file mode 100644 (file)
index 0000000..50548c1
--- /dev/null
@@ -0,0 +1,66 @@
+From c1cfd9025cc394fd137a01159d74335c5ac978ce Mon Sep 17 00:00:00 2001
+From: Takashi Iwai <tiwai@suse.de>
+Date: Thu, 14 Dec 2017 16:44:12 +0100
+Subject: ALSA: rawmidi: Avoid racy info ioctl via ctl device
+
+From: Takashi Iwai <tiwai@suse.de>
+
+commit c1cfd9025cc394fd137a01159d74335c5ac978ce upstream.
+
+The rawmidi also allows to obtaining the information via ioctl of ctl
+API.  It means that user can issue an ioctl to the rawmidi device even
+when it's being removed as long as the control device is present.
+Although the code has some protection via the global register_mutex,
+its range is limited to the search of the corresponding rawmidi
+object, and the mutex is already unlocked at accessing the rawmidi
+object.  This may lead to a use-after-free.
+
+For avoiding it, this patch widens the application of register_mutex
+to the whole snd_rawmidi_info_select() function.  We have another
+mutex per rawmidi object, but this operation isn't very hot path, so
+it shouldn't matter from the performance POV.
+
+Signed-off-by: Takashi Iwai <tiwai@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ sound/core/rawmidi.c |   15 ++++++++++++---
+ 1 file changed, 12 insertions(+), 3 deletions(-)
+
+--- a/sound/core/rawmidi.c
++++ b/sound/core/rawmidi.c
+@@ -579,15 +579,14 @@ static int snd_rawmidi_info_user(struct
+       return 0;
+ }
+-int snd_rawmidi_info_select(struct snd_card *card, struct snd_rawmidi_info *info)
++static int __snd_rawmidi_info_select(struct snd_card *card,
++                                   struct snd_rawmidi_info *info)
+ {
+       struct snd_rawmidi *rmidi;
+       struct snd_rawmidi_str *pstr;
+       struct snd_rawmidi_substream *substream;
+-      mutex_lock(&register_mutex);
+       rmidi = snd_rawmidi_search(card, info->device);
+-      mutex_unlock(&register_mutex);
+       if (!rmidi)
+               return -ENXIO;
+       if (info->stream < 0 || info->stream > 1)
+@@ -603,6 +602,16 @@ int snd_rawmidi_info_select(struct snd_c
+       }
+       return -ENXIO;
+ }
++
++int snd_rawmidi_info_select(struct snd_card *card, struct snd_rawmidi_info *info)
++{
++      int ret;
++
++      mutex_lock(&register_mutex);
++      ret = __snd_rawmidi_info_select(card, info);
++      mutex_unlock(&register_mutex);
++      return ret;
++}
+ EXPORT_SYMBOL(snd_rawmidi_info_select);
+ static int snd_rawmidi_info_select_user(struct snd_card *card,
diff --git a/queue-4.14/alsa-usb-audio-add-native-dsd-support-for-esoteric-d-05x.patch b/queue-4.14/alsa-usb-audio-add-native-dsd-support-for-esoteric-d-05x.patch
new file mode 100644 (file)
index 0000000..cb3ca31
--- /dev/null
@@ -0,0 +1,55 @@
+From 866f7ed7d67936dcdbcddc111c8af878c918fe7c Mon Sep 17 00:00:00 2001
+From: Jussi Laako <jussi@sonarnerd.net>
+Date: Thu, 7 Dec 2017 12:58:33 +0200
+Subject: ALSA: usb-audio: Add native DSD support for Esoteric D-05X
+
+From: Jussi Laako <jussi@sonarnerd.net>
+
+commit 866f7ed7d67936dcdbcddc111c8af878c918fe7c upstream.
+
+Adds VID:PID of Esoteric D-05X to the TEAC device id's.
+Renames the is_teac_50X_dac() function to is_teac_dsd_dac() to cover
+broader device family from the same corporation sharing the same USB
+audio implementation.
+
+Signed-off-by: Jussi Laako <jussi@sonarnerd.net>
+Signed-off-by: Takashi Iwai <tiwai@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ sound/usb/quirks.c |    7 ++++---
+ 1 file changed, 4 insertions(+), 3 deletions(-)
+
+--- a/sound/usb/quirks.c
++++ b/sound/usb/quirks.c
+@@ -1172,10 +1172,11 @@ static bool is_marantz_denon_dac(unsigne
+ /* TEAC UD-501/UD-503/NT-503 USB DACs need a vendor cmd to switch
+  * between PCM/DOP and native DSD mode
+  */
+-static bool is_teac_50X_dac(unsigned int id)
++static bool is_teac_dsd_dac(unsigned int id)
+ {
+       switch (id) {
+       case USB_ID(0x0644, 0x8043): /* TEAC UD-501/UD-503/NT-503 */
++      case USB_ID(0x0644, 0x8044): /* Esoteric D-05X */
+               return true;
+       }
+       return false;
+@@ -1208,7 +1209,7 @@ int snd_usb_select_mode_quirk(struct snd
+                       break;
+               }
+               mdelay(20);
+-      } else if (is_teac_50X_dac(subs->stream->chip->usb_id)) {
++      } else if (is_teac_dsd_dac(subs->stream->chip->usb_id)) {
+               /* Vendor mode switch cmd is required. */
+               switch (fmt->altsetting) {
+               case 3: /* DSD mode (DSD_U32) requested */
+@@ -1398,7 +1399,7 @@ u64 snd_usb_interface_dsd_format_quirks(
+       }
+       /* TEAC devices with USB DAC functionality */
+-      if (is_teac_50X_dac(chip->usb_id)) {
++      if (is_teac_dsd_dac(chip->usb_id)) {
+               if (fp->altsetting == 3)
+                       return SNDRV_PCM_FMTBIT_DSD_U32_BE;
+       }
diff --git a/queue-4.14/alsa-usb-audio-fix-the-missing-ctl-name-suffix-at-parsing-su.patch b/queue-4.14/alsa-usb-audio-fix-the-missing-ctl-name-suffix-at-parsing-su.patch
new file mode 100644 (file)
index 0000000..b6653b3
--- /dev/null
@@ -0,0 +1,75 @@
+From 5a15f289ee87eaf33f13f08a4909ec99d837ec5f Mon Sep 17 00:00:00 2001
+From: Takashi Iwai <tiwai@suse.de>
+Date: Mon, 18 Dec 2017 23:36:57 +0100
+Subject: ALSA: usb-audio: Fix the missing ctl name suffix at parsing SU
+
+From: Takashi Iwai <tiwai@suse.de>
+
+commit 5a15f289ee87eaf33f13f08a4909ec99d837ec5f upstream.
+
+The commit 89b89d121ffc ("ALSA: usb-audio: Add check return value for
+usb_string()") added the check of the return value from
+snd_usb_copy_string_desc(), which is correct per se, but it introduced
+a regression.  In the original code, either the "Clock Source",
+"Playback Source" or "Capture Source" suffix is added after the
+terminal string, while the commit changed it to add the suffix only
+when get_term_name() is failing.  It ended up with an incorrect ctl
+name like "PCM" instead of "PCM Capture Source".
+
+Also, even the original code has a similar bug: when the ctl name is
+generated from snd_usb_copy_string_desc() for the given iSelector, it
+also doesn't put the suffix.
+
+This patch addresses these issues: the suffix is added always when no
+static mapping is found.  Also the patch tries to put more comments
+and cleans up the if/else block for better readability in order to
+avoid the same pitfall again.
+
+Fixes: 89b89d121ffc ("ALSA: usb-audio: Add check return value for usb_string()")
+Reported-and-tested-by: Mauro Santos <registo.mailling@gmail.com>
+Signed-off-by: Takashi Iwai <tiwai@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ sound/usb/mixer.c |   27 ++++++++++++++++-----------
+ 1 file changed, 16 insertions(+), 11 deletions(-)
+
+--- a/sound/usb/mixer.c
++++ b/sound/usb/mixer.c
+@@ -2173,20 +2173,25 @@ static int parse_audio_selector_unit(str
+       kctl->private_value = (unsigned long)namelist;
+       kctl->private_free = usb_mixer_selector_elem_free;
+-      nameid = uac_selector_unit_iSelector(desc);
++      /* check the static mapping table at first */
+       len = check_mapped_name(map, kctl->id.name, sizeof(kctl->id.name));
+-      if (len)
+-              ;
+-      else if (nameid)
+-              len = snd_usb_copy_string_desc(state, nameid, kctl->id.name,
+-                                       sizeof(kctl->id.name));
+-      else
+-              len = get_term_name(state, &state->oterm,
+-                                  kctl->id.name, sizeof(kctl->id.name), 0);
+-
+       if (!len) {
+-              strlcpy(kctl->id.name, "USB", sizeof(kctl->id.name));
++              /* no mapping ? */
++              /* if iSelector is given, use it */
++              nameid = uac_selector_unit_iSelector(desc);
++              if (nameid)
++                      len = snd_usb_copy_string_desc(state, nameid,
++                                                     kctl->id.name,
++                                                     sizeof(kctl->id.name));
++              /* ... or pick up the terminal name at next */
++              if (!len)
++                      len = get_term_name(state, &state->oterm,
++                                  kctl->id.name, sizeof(kctl->id.name), 0);
++              /* ... or use the fixed string "USB" as the last resort */
++              if (!len)
++                      strlcpy(kctl->id.name, "USB", sizeof(kctl->id.name));
++              /* and add the proper suffix */
+               if (desc->bDescriptorSubtype == UAC2_CLOCK_SELECTOR)
+                       append_ctl_name(kctl, " Clock Source");
+               else if ((state->oterm.type & 0xff00) == 0x0100)
diff --git a/queue-4.14/arch-mm-allow-arch_dup_mmap-to-fail.patch b/queue-4.14/arch-mm-allow-arch_dup_mmap-to-fail.patch
new file mode 100644 (file)
index 0000000..68f3719
--- /dev/null
@@ -0,0 +1,136 @@
+From c10e83f598d08046dd1ebc8360d4bb12d802d51b Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Thu, 14 Dec 2017 12:27:29 +0100
+Subject: arch, mm: Allow arch_dup_mmap() to fail
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+commit c10e83f598d08046dd1ebc8360d4bb12d802d51b upstream.
+
+In order to sanitize the LDT initialization on x86 arch_dup_mmap() must be
+allowed to fail. Fix up all instances.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Andy Lutomirsky <luto@kernel.org>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Borislav Petkov <bpetkov@suse.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Eduardo Valentin <eduval@amazon.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: aliguori@amazon.com
+Cc: dan.j.williams@intel.com
+Cc: hughd@google.com
+Cc: keescook@google.com
+Cc: kirill.shutemov@linux.intel.com
+Cc: linux-mm@kvack.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/powerpc/include/asm/mmu_context.h   |    5 +++--
+ arch/um/include/asm/mmu_context.h        |    3 ++-
+ arch/unicore32/include/asm/mmu_context.h |    5 +++--
+ arch/x86/include/asm/mmu_context.h       |    4 ++--
+ include/asm-generic/mm_hooks.h           |    5 +++--
+ kernel/fork.c                            |    3 +--
+ 6 files changed, 14 insertions(+), 11 deletions(-)
+
+--- a/arch/powerpc/include/asm/mmu_context.h
++++ b/arch/powerpc/include/asm/mmu_context.h
+@@ -114,9 +114,10 @@ static inline void enter_lazy_tlb(struct
+ #endif
+ }
+-static inline void arch_dup_mmap(struct mm_struct *oldmm,
+-                               struct mm_struct *mm)
++static inline int arch_dup_mmap(struct mm_struct *oldmm,
++                              struct mm_struct *mm)
+ {
++      return 0;
+ }
+ static inline void arch_exit_mmap(struct mm_struct *mm)
+--- a/arch/um/include/asm/mmu_context.h
++++ b/arch/um/include/asm/mmu_context.h
+@@ -15,9 +15,10 @@ extern void uml_setup_stubs(struct mm_st
+ /*
+  * Needed since we do not use the asm-generic/mm_hooks.h:
+  */
+-static inline void arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
++static inline int arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
+ {
+       uml_setup_stubs(mm);
++      return 0;
+ }
+ extern void arch_exit_mmap(struct mm_struct *mm);
+ static inline void arch_unmap(struct mm_struct *mm,
+--- a/arch/unicore32/include/asm/mmu_context.h
++++ b/arch/unicore32/include/asm/mmu_context.h
+@@ -81,9 +81,10 @@ do { \
+       } \
+ } while (0)
+-static inline void arch_dup_mmap(struct mm_struct *oldmm,
+-                               struct mm_struct *mm)
++static inline int arch_dup_mmap(struct mm_struct *oldmm,
++                              struct mm_struct *mm)
+ {
++      return 0;
+ }
+ static inline void arch_unmap(struct mm_struct *mm,
+--- a/arch/x86/include/asm/mmu_context.h
++++ b/arch/x86/include/asm/mmu_context.h
+@@ -176,10 +176,10 @@ do {                                             \
+ } while (0)
+ #endif
+-static inline void arch_dup_mmap(struct mm_struct *oldmm,
+-                               struct mm_struct *mm)
++static inline int arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
+ {
+       paravirt_arch_dup_mmap(oldmm, mm);
++      return 0;
+ }
+ static inline void arch_exit_mmap(struct mm_struct *mm)
+--- a/include/asm-generic/mm_hooks.h
++++ b/include/asm-generic/mm_hooks.h
+@@ -7,9 +7,10 @@
+ #ifndef _ASM_GENERIC_MM_HOOKS_H
+ #define _ASM_GENERIC_MM_HOOKS_H
+-static inline void arch_dup_mmap(struct mm_struct *oldmm,
+-                               struct mm_struct *mm)
++static inline int arch_dup_mmap(struct mm_struct *oldmm,
++                              struct mm_struct *mm)
+ {
++      return 0;
+ }
+ static inline void arch_exit_mmap(struct mm_struct *mm)
+--- a/kernel/fork.c
++++ b/kernel/fork.c
+@@ -721,8 +721,7 @@ static __latent_entropy int dup_mmap(str
+                       goto out;
+       }
+       /* a new mm has just been created */
+-      arch_dup_mmap(oldmm, mm);
+-      retval = 0;
++      retval = arch_dup_mmap(oldmm, mm);
+ out:
+       up_write(&mm->mmap_sem);
+       flush_tlb_mm(oldmm);
diff --git a/queue-4.14/arm64-kvm-prevent-restoring-stale-pmscr_el1-for-vcpu.patch b/queue-4.14/arm64-kvm-prevent-restoring-stale-pmscr_el1-for-vcpu.patch
new file mode 100644 (file)
index 0000000..4f91167
--- /dev/null
@@ -0,0 +1,43 @@
+From bfe766cf65fb65e68c4764f76158718560bdcee5 Mon Sep 17 00:00:00 2001
+From: Julien Thierry <julien.thierry@arm.com>
+Date: Wed, 6 Dec 2017 17:09:49 +0000
+Subject: arm64: kvm: Prevent restoring stale PMSCR_EL1 for vcpu
+
+From: Julien Thierry <julien.thierry@arm.com>
+
+commit bfe766cf65fb65e68c4764f76158718560bdcee5 upstream.
+
+When VHE is not present, KVM needs to save and restores PMSCR_EL1 when
+possible. If SPE is used by the host, value of PMSCR_EL1 cannot be saved
+for the guest.
+If the host starts using SPE between two save+restore on the same vcpu,
+restore will write the value of PMSCR_EL1 read during the first save.
+
+Make sure __debug_save_spe_nvhe clears the value of the saved PMSCR_EL1
+when the guest cannot use SPE.
+
+Signed-off-by: Julien Thierry <julien.thierry@arm.com>
+Cc: Christoffer Dall <christoffer.dall@linaro.org>
+Cc: Marc Zyngier <marc.zyngier@arm.com>
+Cc: Catalin Marinas <catalin.marinas@arm.com>
+Reviewed-by: Will Deacon <will.deacon@arm.com>
+Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
+Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/arm64/kvm/hyp/debug-sr.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/arch/arm64/kvm/hyp/debug-sr.c
++++ b/arch/arm64/kvm/hyp/debug-sr.c
+@@ -84,6 +84,9 @@ static void __hyp_text __debug_save_spe_
+ {
+       u64 reg;
++      /* Clear pmscr in case of early return */
++      *pmscr_el1 = 0;
++
+       /* SPE present on this CPU? */
+       if (!cpuid_feature_extract_unsigned_field(read_sysreg(id_aa64dfr0_el1),
+                                                 ID_AA64DFR0_PMSVER_SHIFT))
diff --git a/queue-4.14/block-throttle-avoid-double-charge.patch b/queue-4.14/block-throttle-avoid-double-charge.patch
new file mode 100644 (file)
index 0000000..16bf727
--- /dev/null
@@ -0,0 +1,111 @@
+From 111be883981748acc9a56e855c8336404a8e787c Mon Sep 17 00:00:00 2001
+From: Shaohua Li <shli@fb.com>
+Date: Wed, 20 Dec 2017 11:10:17 -0700
+Subject: block-throttle: avoid double charge
+
+From: Shaohua Li <shli@fb.com>
+
+commit 111be883981748acc9a56e855c8336404a8e787c upstream.
+
+If a bio is throttled and split after throttling, the bio could be
+resubmited and enters the throttling again. This will cause part of the
+bio to be charged multiple times. If the cgroup has an IO limit, the
+double charge will significantly harm the performance. The bio split
+becomes quite common after arbitrary bio size change.
+
+To fix this, we always set the BIO_THROTTLED flag if a bio is throttled.
+If the bio is cloned/split, we copy the flag to new bio too to avoid a
+double charge. However, cloned bio could be directed to a new disk,
+keeping the flag be a problem. The observation is we always set new disk
+for the bio in this case, so we can clear the flag in bio_set_dev().
+
+This issue exists for a long time, arbitrary bio size change just makes
+it worse, so this should go into stable at least since v4.2.
+
+V1-> V2: Not add extra field in bio based on discussion with Tejun
+
+Cc: Vivek Goyal <vgoyal@redhat.com>
+Acked-by: Tejun Heo <tj@kernel.org>
+Signed-off-by: Shaohua Li <shli@fb.com>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ block/bio.c               |    2 ++
+ block/blk-throttle.c      |    8 +-------
+ include/linux/bio.h       |    2 ++
+ include/linux/blk_types.h |    9 ++++-----
+ 4 files changed, 9 insertions(+), 12 deletions(-)
+
+--- a/block/bio.c
++++ b/block/bio.c
+@@ -599,6 +599,8 @@ void __bio_clone_fast(struct bio *bio, s
+       bio->bi_disk = bio_src->bi_disk;
+       bio->bi_partno = bio_src->bi_partno;
+       bio_set_flag(bio, BIO_CLONED);
++      if (bio_flagged(bio_src, BIO_THROTTLED))
++              bio_set_flag(bio, BIO_THROTTLED);
+       bio->bi_opf = bio_src->bi_opf;
+       bio->bi_write_hint = bio_src->bi_write_hint;
+       bio->bi_iter = bio_src->bi_iter;
+--- a/block/blk-throttle.c
++++ b/block/blk-throttle.c
+@@ -2223,13 +2223,7 @@ again:
+ out_unlock:
+       spin_unlock_irq(q->queue_lock);
+ out:
+-      /*
+-       * As multiple blk-throtls may stack in the same issue path, we
+-       * don't want bios to leave with the flag set.  Clear the flag if
+-       * being issued.
+-       */
+-      if (!throttled)
+-              bio_clear_flag(bio, BIO_THROTTLED);
++      bio_set_flag(bio, BIO_THROTTLED);
+ #ifdef CONFIG_BLK_DEV_THROTTLING_LOW
+       if (throttled || !td->track_bio_latency)
+--- a/include/linux/bio.h
++++ b/include/linux/bio.h
+@@ -504,6 +504,8 @@ extern unsigned int bvec_nr_vecs(unsigne
+ #define bio_set_dev(bio, bdev)                        \
+ do {                                          \
++      if ((bio)->bi_disk != (bdev)->bd_disk)  \
++              bio_clear_flag(bio, BIO_THROTTLED);\
+       (bio)->bi_disk = (bdev)->bd_disk;       \
+       (bio)->bi_partno = (bdev)->bd_partno;   \
+ } while (0)
+--- a/include/linux/blk_types.h
++++ b/include/linux/blk_types.h
+@@ -50,8 +50,6 @@ struct blk_issue_stat {
+ struct bio {
+       struct bio              *bi_next;       /* request queue link */
+       struct gendisk          *bi_disk;
+-      u8                      bi_partno;
+-      blk_status_t            bi_status;
+       unsigned int            bi_opf;         /* bottom bits req flags,
+                                                * top bits REQ_OP. Use
+                                                * accessors.
+@@ -59,8 +57,8 @@ struct bio {
+       unsigned short          bi_flags;       /* status, etc and bvec pool number */
+       unsigned short          bi_ioprio;
+       unsigned short          bi_write_hint;
+-
+-      struct bvec_iter        bi_iter;
++      blk_status_t            bi_status;
++      u8                      bi_partno;
+       /* Number of segments in this BIO after
+        * physical address coalescing is performed.
+@@ -74,8 +72,9 @@ struct bio {
+       unsigned int            bi_seg_front_size;
+       unsigned int            bi_seg_back_size;
+-      atomic_t                __bi_remaining;
++      struct bvec_iter        bi_iter;
++      atomic_t                __bi_remaining;
+       bio_end_io_t            *bi_end_io;
+       void                    *bi_private;
diff --git a/queue-4.14/block-unalign-call_single_data-in-struct-request.patch b/queue-4.14/block-unalign-call_single_data-in-struct-request.patch
new file mode 100644 (file)
index 0000000..4903f6e
--- /dev/null
@@ -0,0 +1,34 @@
+From 4ccafe032005e9b96acbef2e389a4de5b1254add Mon Sep 17 00:00:00 2001
+From: Jens Axboe <axboe@kernel.dk>
+Date: Wed, 20 Dec 2017 13:13:58 -0700
+Subject: block: unalign call_single_data in struct request
+
+From: Jens Axboe <axboe@kernel.dk>
+
+commit 4ccafe032005e9b96acbef2e389a4de5b1254add upstream.
+
+A previous change blindly added massive alignment to the
+call_single_data structure in struct request. This ballooned it in size
+from 296 to 320 bytes on my setup, for no valid reason at all.
+
+Use the unaligned struct __call_single_data variant instead.
+
+Fixes: 966a967116e69 ("smp: Avoid using two cache lines for struct call_single_data")
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/linux/blkdev.h |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/include/linux/blkdev.h
++++ b/include/linux/blkdev.h
+@@ -135,7 +135,7 @@ typedef __u32 __bitwise req_flags_t;
+ struct request {
+       struct list_head queuelist;
+       union {
+-              call_single_data_t csd;
++              struct __call_single_data csd;
+               u64 fifo_time;
+       };
diff --git a/queue-4.14/clk-sunxi-sun9i-mmc-implement-reset-callback-for-reset-controls.patch b/queue-4.14/clk-sunxi-sun9i-mmc-implement-reset-callback-for-reset-controls.patch
new file mode 100644 (file)
index 0000000..2bc0705
--- /dev/null
@@ -0,0 +1,60 @@
+From 61d2f2a05765a5f57149efbd93e3e81a83cbc2c1 Mon Sep 17 00:00:00 2001
+From: Chen-Yu Tsai <wens@csie.org>
+Date: Mon, 18 Dec 2017 11:57:51 +0800
+Subject: clk: sunxi: sun9i-mmc: Implement reset callback for reset controls
+
+From: Chen-Yu Tsai <wens@csie.org>
+
+commit 61d2f2a05765a5f57149efbd93e3e81a83cbc2c1 upstream.
+
+Our MMC host driver now issues a reset, instead of just deasserting
+the reset control, since commit c34eda69ad4c ("mmc: sunxi: Reset the
+device at probe time"). The sun9i-mmc clock driver does not support
+this, and will fail, which results in MMC not probing.
+
+This patch implements the reset callback by asserting the reset control,
+then deasserting it after a small delay.
+
+Fixes: 7a6fca879f59 ("clk: sunxi: Add driver for A80 MMC config clocks/resets")
+Signed-off-by: Chen-Yu Tsai <wens@csie.org>
+Acked-by: Philipp Zabel <p.zabel@pengutronix.de>
+Acked-by: Maxime Ripard <maxime.ripard@free-electrons.com>
+Signed-off-by: Michael Turquette <mturquette@baylibre.com>
+Link: lkml.kernel.org/r/20171218035751.20661-1-wens@csie.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/clk/sunxi/clk-sun9i-mmc.c |   12 ++++++++++++
+ 1 file changed, 12 insertions(+)
+
+--- a/drivers/clk/sunxi/clk-sun9i-mmc.c
++++ b/drivers/clk/sunxi/clk-sun9i-mmc.c
+@@ -16,6 +16,7 @@
+ #include <linux/clk.h>
+ #include <linux/clk-provider.h>
++#include <linux/delay.h>
+ #include <linux/init.h>
+ #include <linux/of.h>
+ #include <linux/of_device.h>
+@@ -83,9 +84,20 @@ static int sun9i_mmc_reset_deassert(stru
+       return 0;
+ }
++static int sun9i_mmc_reset_reset(struct reset_controller_dev *rcdev,
++                               unsigned long id)
++{
++      sun9i_mmc_reset_assert(rcdev, id);
++      udelay(10);
++      sun9i_mmc_reset_deassert(rcdev, id);
++
++      return 0;
++}
++
+ static const struct reset_control_ops sun9i_mmc_reset_ops = {
+       .assert         = sun9i_mmc_reset_assert,
+       .deassert       = sun9i_mmc_reset_deassert,
++      .reset          = sun9i_mmc_reset_reset,
+ };
+ static int sun9i_a80_mmc_config_clk_probe(struct platform_device *pdev)
diff --git a/queue-4.14/crypto-af_alg-fix-race-accessing-cipher-request.patch b/queue-4.14/crypto-af_alg-fix-race-accessing-cipher-request.patch
new file mode 100644 (file)
index 0000000..3d060d1
--- /dev/null
@@ -0,0 +1,87 @@
+From d53c5135792319e095bb126bc43b2ee98586f7fe Mon Sep 17 00:00:00 2001
+From: Stephan Mueller <smueller@chronox.de>
+Date: Fri, 8 Dec 2017 11:50:37 +0100
+Subject: crypto: af_alg - fix race accessing cipher request
+
+From: Stephan Mueller <smueller@chronox.de>
+
+commit d53c5135792319e095bb126bc43b2ee98586f7fe upstream.
+
+When invoking an asynchronous cipher operation, the invocation of the
+callback may be performed before the subsequent operations in the
+initial code path are invoked. The callback deletes the cipher request
+data structure which implies that after the invocation of the
+asynchronous cipher operation, this data structure must not be accessed
+any more.
+
+The setting of the return code size with the request data structure must
+therefore be moved before the invocation of the asynchronous cipher
+operation.
+
+Fixes: e870456d8e7c ("crypto: algif_skcipher - overhaul memory management")
+Fixes: d887c52d6ae4 ("crypto: algif_aead - overhaul memory management")
+Reported-by: syzbot <syzkaller@googlegroups.com>
+Signed-off-by: Stephan Mueller <smueller@chronox.de>
+Acked-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
+Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ crypto/algif_aead.c     |   10 +++++-----
+ crypto/algif_skcipher.c |   10 +++++-----
+ 2 files changed, 10 insertions(+), 10 deletions(-)
+
+--- a/crypto/algif_aead.c
++++ b/crypto/algif_aead.c
+@@ -291,6 +291,10 @@ static int _aead_recvmsg(struct socket *
+               /* AIO operation */
+               sock_hold(sk);
+               areq->iocb = msg->msg_iocb;
++
++              /* Remember output size that will be generated. */
++              areq->outlen = outlen;
++
+               aead_request_set_callback(&areq->cra_u.aead_req,
+                                         CRYPTO_TFM_REQ_MAY_BACKLOG,
+                                         af_alg_async_cb, areq);
+@@ -298,12 +302,8 @@ static int _aead_recvmsg(struct socket *
+                                crypto_aead_decrypt(&areq->cra_u.aead_req);
+               /* AIO operation in progress */
+-              if (err == -EINPROGRESS || err == -EBUSY) {
+-                      /* Remember output size that will be generated. */
+-                      areq->outlen = outlen;
+-
++              if (err == -EINPROGRESS || err == -EBUSY)
+                       return -EIOCBQUEUED;
+-              }
+               sock_put(sk);
+       } else {
+--- a/crypto/algif_skcipher.c
++++ b/crypto/algif_skcipher.c
+@@ -125,6 +125,10 @@ static int _skcipher_recvmsg(struct sock
+               /* AIO operation */
+               sock_hold(sk);
+               areq->iocb = msg->msg_iocb;
++
++              /* Remember output size that will be generated. */
++              areq->outlen = len;
++
+               skcipher_request_set_callback(&areq->cra_u.skcipher_req,
+                                             CRYPTO_TFM_REQ_MAY_SLEEP,
+                                             af_alg_async_cb, areq);
+@@ -133,12 +137,8 @@ static int _skcipher_recvmsg(struct sock
+                       crypto_skcipher_decrypt(&areq->cra_u.skcipher_req);
+               /* AIO operation in progress */
+-              if (err == -EINPROGRESS || err == -EBUSY) {
+-                      /* Remember output size that will be generated. */
+-                      areq->outlen = len;
+-
++              if (err == -EINPROGRESS || err == -EBUSY)
+                       return -EIOCBQUEUED;
+-              }
+               sock_put(sk);
+       } else {
diff --git a/queue-4.14/crypto-af_alg-wait-for-data-at-beginning-of-recvmsg.patch b/queue-4.14/crypto-af_alg-wait-for-data-at-beginning-of-recvmsg.patch
new file mode 100644 (file)
index 0000000..d118d35
--- /dev/null
@@ -0,0 +1,75 @@
+From 11edb555966ed2c66c533d17c604f9d7e580a829 Mon Sep 17 00:00:00 2001
+From: Stephan Mueller <smueller@chronox.de>
+Date: Wed, 29 Nov 2017 12:02:23 +0100
+Subject: crypto: af_alg - wait for data at beginning of recvmsg
+
+From: Stephan Mueller <smueller@chronox.de>
+
+commit 11edb555966ed2c66c533d17c604f9d7e580a829 upstream.
+
+The wait for data is a non-atomic operation that can sleep and therefore
+potentially release the socket lock. The release of the socket lock
+allows another thread to modify the context data structure. The waiting
+operation for new data therefore must be called at the beginning of
+recvmsg. This prevents a race condition where checks of the members of
+the context data structure are performed by recvmsg while there is a
+potential for modification of these values.
+
+Fixes: e870456d8e7c ("crypto: algif_skcipher - overhaul memory management")
+Fixes: d887c52d6ae4 ("crypto: algif_aead - overhaul memory management")
+Reported-by: syzbot <syzkaller@googlegroups.com>
+Signed-off-by: Stephan Mueller <smueller@chronox.de>
+Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ crypto/af_alg.c         |    6 ------
+ crypto/algif_aead.c     |    6 ++++++
+ crypto/algif_skcipher.c |    6 ++++++
+ 3 files changed, 12 insertions(+), 6 deletions(-)
+
+--- a/crypto/af_alg.c
++++ b/crypto/af_alg.c
+@@ -1165,12 +1165,6 @@ int af_alg_get_rsgl(struct sock *sk, str
+               if (!af_alg_readable(sk))
+                       break;
+-              if (!ctx->used) {
+-                      err = af_alg_wait_for_data(sk, flags);
+-                      if (err)
+-                              return err;
+-              }
+-
+               seglen = min_t(size_t, (maxsize - len),
+                              msg_data_left(msg));
+--- a/crypto/algif_aead.c
++++ b/crypto/algif_aead.c
+@@ -111,6 +111,12 @@ static int _aead_recvmsg(struct socket *
+       size_t usedpages = 0;           /* [in]  RX bufs to be used from user */
+       size_t processed = 0;           /* [in]  TX bufs to be consumed */
++      if (!ctx->used) {
++              err = af_alg_wait_for_data(sk, flags);
++              if (err)
++                      return err;
++      }
++
+       /*
+        * Data length provided by caller via sendmsg/sendpage that has not
+        * yet been processed.
+--- a/crypto/algif_skcipher.c
++++ b/crypto/algif_skcipher.c
+@@ -72,6 +72,12 @@ static int _skcipher_recvmsg(struct sock
+       int err = 0;
+       size_t len = 0;
++      if (!ctx->used) {
++              err = af_alg_wait_for_data(sk, flags);
++              if (err)
++                      return err;
++      }
++
+       /* Allocate cipher request for current operation. */
+       areq = af_alg_alloc_areq(sk, sizeof(struct af_alg_async_req) +
+                                    crypto_skcipher_reqsize(tfm));
diff --git a/queue-4.14/crypto-mcryptd-protect-the-per-cpu-queue-with-a-lock.patch b/queue-4.14/crypto-mcryptd-protect-the-per-cpu-queue-with-a-lock.patch
new file mode 100644 (file)
index 0000000..a79688a
--- /dev/null
@@ -0,0 +1,113 @@
+From 9abffc6f2efe46c3564c04312e52e07622d40e51 Mon Sep 17 00:00:00 2001
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Date: Thu, 30 Nov 2017 13:39:27 +0100
+Subject: crypto: mcryptd - protect the per-CPU queue with a lock
+
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+
+commit 9abffc6f2efe46c3564c04312e52e07622d40e51 upstream.
+
+mcryptd_enqueue_request() grabs the per-CPU queue struct and protects
+access to it with disabled preemption. Then it schedules a worker on the
+same CPU. The worker in mcryptd_queue_worker() guards access to the same
+per-CPU variable with disabled preemption.
+
+If we take CPU-hotplug into account then it is possible that between
+queue_work_on() and the actual invocation of the worker the CPU goes
+down and the worker will be scheduled on _another_ CPU. And here the
+preempt_disable() protection does not work anymore. The easiest thing is
+to add a spin_lock() to guard access to the list.
+
+Another detail: mcryptd_queue_worker() is not processing more than
+MCRYPTD_BATCH invocation in a row. If there are still items left, then
+it will invoke queue_work() to proceed with more later. *I* would
+suggest to simply drop that check because it does not use a system
+workqueue and the workqueue is already marked as "CPU_INTENSIVE". And if
+preemption is required then the scheduler should do it.
+However if queue_work() is used then the work item is marked as CPU
+unbound. That means it will try to run on the local CPU but it may run
+on another CPU as well. Especially with CONFIG_DEBUG_WQ_FORCE_RR_CPU=y.
+Again, the preempt_disable() won't work here but lock which was
+introduced will help.
+In order to keep work-item on the local CPU (and avoid RR) I changed it
+to queue_work_on().
+
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ crypto/mcryptd.c         |   23 ++++++++++-------------
+ include/crypto/mcryptd.h |    1 +
+ 2 files changed, 11 insertions(+), 13 deletions(-)
+
+--- a/crypto/mcryptd.c
++++ b/crypto/mcryptd.c
+@@ -81,6 +81,7 @@ static int mcryptd_init_queue(struct mcr
+               pr_debug("cpu_queue #%d %p\n", cpu, queue->cpu_queue);
+               crypto_init_queue(&cpu_queue->queue, max_cpu_qlen);
+               INIT_WORK(&cpu_queue->work, mcryptd_queue_worker);
++              spin_lock_init(&cpu_queue->q_lock);
+       }
+       return 0;
+ }
+@@ -104,15 +105,16 @@ static int mcryptd_enqueue_request(struc
+       int cpu, err;
+       struct mcryptd_cpu_queue *cpu_queue;
+-      cpu = get_cpu();
+-      cpu_queue = this_cpu_ptr(queue->cpu_queue);
+-      rctx->tag.cpu = cpu;
++      cpu_queue = raw_cpu_ptr(queue->cpu_queue);
++      spin_lock(&cpu_queue->q_lock);
++      cpu = smp_processor_id();
++      rctx->tag.cpu = smp_processor_id();
+       err = crypto_enqueue_request(&cpu_queue->queue, request);
+       pr_debug("enqueue request: cpu %d cpu_queue %p request %p\n",
+                cpu, cpu_queue, request);
++      spin_unlock(&cpu_queue->q_lock);
+       queue_work_on(cpu, kcrypto_wq, &cpu_queue->work);
+-      put_cpu();
+       return err;
+ }
+@@ -161,16 +163,11 @@ static void mcryptd_queue_worker(struct
+       cpu_queue = container_of(work, struct mcryptd_cpu_queue, work);
+       i = 0;
+       while (i < MCRYPTD_BATCH || single_task_running()) {
+-              /*
+-               * preempt_disable/enable is used to prevent
+-               * being preempted by mcryptd_enqueue_request()
+-               */
+-              local_bh_disable();
+-              preempt_disable();
++
++              spin_lock_bh(&cpu_queue->q_lock);
+               backlog = crypto_get_backlog(&cpu_queue->queue);
+               req = crypto_dequeue_request(&cpu_queue->queue);
+-              preempt_enable();
+-              local_bh_enable();
++              spin_unlock_bh(&cpu_queue->q_lock);
+               if (!req) {
+                       mcryptd_opportunistic_flush();
+@@ -185,7 +182,7 @@ static void mcryptd_queue_worker(struct
+               ++i;
+       }
+       if (cpu_queue->queue.qlen)
+-              queue_work(kcrypto_wq, &cpu_queue->work);
++              queue_work_on(smp_processor_id(), kcrypto_wq, &cpu_queue->work);
+ }
+ void mcryptd_flusher(struct work_struct *__work)
+--- a/include/crypto/mcryptd.h
++++ b/include/crypto/mcryptd.h
+@@ -27,6 +27,7 @@ static inline struct mcryptd_ahash *__mc
+ struct mcryptd_cpu_queue {
+       struct crypto_queue queue;
++      spinlock_t q_lock;
+       struct work_struct work;
+ };
diff --git a/queue-4.14/crypto-skcipher-set-walk.iv-for-zero-length-inputs.patch b/queue-4.14/crypto-skcipher-set-walk.iv-for-zero-length-inputs.patch
new file mode 100644 (file)
index 0000000..a38c45f
--- /dev/null
@@ -0,0 +1,80 @@
+From 2b4f27c36bcd46e820ddb9a8e6fe6a63fa4250b8 Mon Sep 17 00:00:00 2001
+From: Eric Biggers <ebiggers@google.com>
+Date: Wed, 29 Nov 2017 01:18:57 -0800
+Subject: crypto: skcipher - set walk.iv for zero-length inputs
+
+From: Eric Biggers <ebiggers@google.com>
+
+commit 2b4f27c36bcd46e820ddb9a8e6fe6a63fa4250b8 upstream.
+
+All the ChaCha20 algorithms as well as the ARM bit-sliced AES-XTS
+algorithms call skcipher_walk_virt(), then access the IV (walk.iv)
+before checking whether any bytes need to be processed (walk.nbytes).
+
+But if the input is empty, then skcipher_walk_virt() doesn't set the IV,
+and the algorithms crash trying to use the uninitialized IV pointer.
+
+Fix it by setting the IV earlier in skcipher_walk_virt().  Also fix it
+for the AEAD walk functions.
+
+This isn't a perfect solution because we can't actually align the IV to
+->cra_alignmask unless there are bytes to process, for one because the
+temporary buffer for the aligned IV is freed by skcipher_walk_done(),
+which is only called when there are bytes to process.  Thus, algorithms
+that require aligned IVs will still need to avoid accessing the IV when
+walk.nbytes == 0.  Still, many algorithms/architectures are fine with
+IVs having any alignment, and even for those that aren't, a misaligned
+pointer bug is much less severe than an uninitialized pointer bug.
+
+This change also matches the behavior of the older blkcipher_walk API.
+
+Fixes: 0cabf2af6f5a ("crypto: skcipher - Fix crash on zero-length input")
+Reported-by: syzbot <syzkaller@googlegroups.com>
+Signed-off-by: Eric Biggers <ebiggers@google.com>
+Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ crypto/skcipher.c |   10 ++++------
+ 1 file changed, 4 insertions(+), 6 deletions(-)
+
+--- a/crypto/skcipher.c
++++ b/crypto/skcipher.c
+@@ -449,6 +449,8 @@ static int skcipher_walk_skcipher(struct
+       walk->total = req->cryptlen;
+       walk->nbytes = 0;
++      walk->iv = req->iv;
++      walk->oiv = req->iv;
+       if (unlikely(!walk->total))
+               return 0;
+@@ -456,9 +458,6 @@ static int skcipher_walk_skcipher(struct
+       scatterwalk_start(&walk->in, req->src);
+       scatterwalk_start(&walk->out, req->dst);
+-      walk->iv = req->iv;
+-      walk->oiv = req->iv;
+-
+       walk->flags &= ~SKCIPHER_WALK_SLEEP;
+       walk->flags |= req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ?
+                      SKCIPHER_WALK_SLEEP : 0;
+@@ -510,6 +509,8 @@ static int skcipher_walk_aead_common(str
+       int err;
+       walk->nbytes = 0;
++      walk->iv = req->iv;
++      walk->oiv = req->iv;
+       if (unlikely(!walk->total))
+               return 0;
+@@ -525,9 +526,6 @@ static int skcipher_walk_aead_common(str
+       scatterwalk_done(&walk->in, 0, walk->total);
+       scatterwalk_done(&walk->out, 0, walk->total);
+-      walk->iv = req->iv;
+-      walk->oiv = req->iv;
+-
+       if (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP)
+               walk->flags |= SKCIPHER_WALK_SLEEP;
+       else
diff --git a/queue-4.14/drm-i915-flush-pending-gtt-writes-before-unbinding.patch b/queue-4.14/drm-i915-flush-pending-gtt-writes-before-unbinding.patch
new file mode 100644 (file)
index 0000000..5e5dcae
--- /dev/null
@@ -0,0 +1,56 @@
+From 2797c4a11f373b2545c2398ccb02e362ee66a142 Mon Sep 17 00:00:00 2001
+From: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Mon, 4 Dec 2017 13:25:13 +0000
+Subject: drm/i915: Flush pending GTT writes before unbinding
+
+From: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 2797c4a11f373b2545c2398ccb02e362ee66a142 upstream.
+
+From the shrinker paths, we want to relinquish the GPU and GGTT access to
+the object, releasing the backing storage back to the system for
+swapout. As a part of that process we would unpin the pages, marking
+them for access by the CPU (for the swapout/swapin). However, if that
+process was interrupted after unbind the vma, we missed a flush of the
+inflight GGTT writes before we made that GTT space available again for
+reuse, with the prospect that we would redirect them to another page.
+
+The bug dates back to the introduction of multiple GGTT vma, but the
+code itself dates to commit 02bef8f98d26 ("drm/i915: Unbind closed vma
+for i915_gem_object_unbind()").
+
+Fixes: 02bef8f98d26 ("drm/i915: Unbind closed vma for i915_gem_object_unbind()")
+Fixes: c5ad54cf7dd8 ("drm/i915: Use partial view in mmap fault handler")
+Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
+Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20171204132513.7303-1-chris@chris-wilson.co.uk
+(cherry picked from commit 5888fc9eac3c2ff96e76aeeb865fdb46ab2d711e)
+Signed-off-by: Jani Nikula <jani.nikula@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/gpu/drm/i915/i915_gem.c |    9 +--------
+ 1 file changed, 1 insertion(+), 8 deletions(-)
+
+--- a/drivers/gpu/drm/i915/i915_gem.c
++++ b/drivers/gpu/drm/i915/i915_gem.c
+@@ -325,17 +325,10 @@ int i915_gem_object_unbind(struct drm_i9
+        * must wait for all rendering to complete to the object (as unbinding
+        * must anyway), and retire the requests.
+        */
+-      ret = i915_gem_object_wait(obj,
+-                                 I915_WAIT_INTERRUPTIBLE |
+-                                 I915_WAIT_LOCKED |
+-                                 I915_WAIT_ALL,
+-                                 MAX_SCHEDULE_TIMEOUT,
+-                                 NULL);
++      ret = i915_gem_object_set_to_cpu_domain(obj, false);
+       if (ret)
+               return ret;
+-      i915_gem_retire_requests(to_i915(obj->base.dev));
+-
+       while ((vma = list_first_entry_or_null(&obj->vma_list,
+                                              struct i915_vma,
+                                              obj_link))) {
diff --git a/queue-4.14/drm-sun4i-fix-error-path-handling.patch b/queue-4.14/drm-sun4i-fix-error-path-handling.patch
new file mode 100644 (file)
index 0000000..fd1c6f0
--- /dev/null
@@ -0,0 +1,45 @@
+From 92411f6d7f1afcc95e54295d40e96a75385212ec Mon Sep 17 00:00:00 2001
+From: Maxime Ripard <maxime.ripard@free-electrons.com>
+Date: Thu, 7 Dec 2017 16:58:50 +0100
+Subject: drm/sun4i: Fix error path handling
+
+From: Maxime Ripard <maxime.ripard@free-electrons.com>
+
+commit 92411f6d7f1afcc95e54295d40e96a75385212ec upstream.
+
+The commit 4c7f16d14a33 ("drm/sun4i: Fix TCON clock and regmap
+initialization sequence") moved a bunch of logic around, but forgot to
+update the gotos after the introduction of the err_free_dotclock label.
+
+It means that if we fail later that the one introduced in that commit,
+we'll just to the old label which isn't free the clock we created. This
+will result in a breakage as soon as someone tries to do something with
+that clock, since its resources will have been long reclaimed.
+
+Fixes: 4c7f16d14a33 ("drm/sun4i: Fix TCON clock and regmap initialization sequence")
+Reviewed-by: Chen-Yu Tsai <wens@csie.org>
+Signed-off-by: Maxime Ripard <maxime.ripard@free-electrons.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/f83c1cebc731f0b4251f5ddd7b38c718cd79bb0b.1512662253.git-series.maxime.ripard@free-electrons.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/gpu/drm/sun4i/sun4i_tcon.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/gpu/drm/sun4i/sun4i_tcon.c
++++ b/drivers/gpu/drm/sun4i/sun4i_tcon.c
+@@ -567,12 +567,12 @@ static int sun4i_tcon_bind(struct device
+       if (IS_ERR(tcon->crtc)) {
+               dev_err(dev, "Couldn't create our CRTC\n");
+               ret = PTR_ERR(tcon->crtc);
+-              goto err_free_clocks;
++              goto err_free_dotclock;
+       }
+       ret = sun4i_rgb_init(drm, tcon);
+       if (ret < 0)
+-              goto err_free_clocks;
++              goto err_free_dotclock;
+       list_add_tail(&tcon->list, &drv->tcon_list);
diff --git a/queue-4.14/init-invoke-init_espfix_bsp-from-mm_init.patch b/queue-4.14/init-invoke-init_espfix_bsp-from-mm_init.patch
new file mode 100644 (file)
index 0000000..ffc27c9
--- /dev/null
@@ -0,0 +1,108 @@
+From 613e396bc0d4c7604fba23256644e78454c68cf6 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Sun, 17 Dec 2017 10:56:29 +0100
+Subject: init: Invoke init_espfix_bsp() from mm_init()
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+commit 613e396bc0d4c7604fba23256644e78454c68cf6 upstream.
+
+init_espfix_bsp() needs to be invoked before the page table isolation
+initialization. Move it into mm_init() which is the place where pti_init()
+will be added.
+
+While at it get rid of the #ifdeffery and provide proper stub functions.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/include/asm/espfix.h |    7 ++++---
+ arch/x86/kernel/smpboot.c     |    6 +-----
+ include/asm-generic/pgtable.h |    5 +++++
+ init/main.c                   |    6 ++----
+ 4 files changed, 12 insertions(+), 12 deletions(-)
+
+--- a/arch/x86/include/asm/espfix.h
++++ b/arch/x86/include/asm/espfix.h
+@@ -2,7 +2,7 @@
+ #ifndef _ASM_X86_ESPFIX_H
+ #define _ASM_X86_ESPFIX_H
+-#ifdef CONFIG_X86_64
++#ifdef CONFIG_X86_ESPFIX64
+ #include <asm/percpu.h>
+@@ -11,7 +11,8 @@ DECLARE_PER_CPU_READ_MOSTLY(unsigned lon
+ extern void init_espfix_bsp(void);
+ extern void init_espfix_ap(int cpu);
+-
+-#endif /* CONFIG_X86_64 */
++#else
++static inline void init_espfix_ap(int cpu) { }
++#endif
+ #endif /* _ASM_X86_ESPFIX_H */
+--- a/arch/x86/kernel/smpboot.c
++++ b/arch/x86/kernel/smpboot.c
+@@ -990,12 +990,8 @@ static int do_boot_cpu(int apicid, int c
+       initial_code = (unsigned long)start_secondary;
+       initial_stack  = idle->thread.sp;
+-      /*
+-       * Enable the espfix hack for this CPU
+-      */
+-#ifdef CONFIG_X86_ESPFIX64
++      /* Enable the espfix hack for this CPU */
+       init_espfix_ap(cpu);
+-#endif
+       /* So we see what's up */
+       announce_cpu(cpu, apicid);
+--- a/include/asm-generic/pgtable.h
++++ b/include/asm-generic/pgtable.h
+@@ -1025,6 +1025,11 @@ static inline int pmd_clear_huge(pmd_t *
+ struct file;
+ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
+                       unsigned long size, pgprot_t *vma_prot);
++
++#ifndef CONFIG_X86_ESPFIX64
++static inline void init_espfix_bsp(void) { }
++#endif
++
+ #endif /* !__ASSEMBLY__ */
+ #ifndef io_remap_pfn_range
+--- a/init/main.c
++++ b/init/main.c
+@@ -504,6 +504,8 @@ static void __init mm_init(void)
+       pgtable_init();
+       vmalloc_init();
+       ioremap_huge_init();
++      /* Should be run before the first non-init thread is created */
++      init_espfix_bsp();
+ }
+ asmlinkage __visible void __init start_kernel(void)
+@@ -674,10 +676,6 @@ asmlinkage __visible void __init start_k
+       if (efi_enabled(EFI_RUNTIME_SERVICES))
+               efi_enter_virtual_mode();
+ #endif
+-#ifdef CONFIG_X86_ESPFIX64
+-      /* Should be run before the first non-init thread is created */
+-      init_espfix_bsp();
+-#endif
+       thread_stack_cache_init();
+       cred_init();
+       fork_init();
diff --git a/queue-4.14/kvm-arm-arm64-fix-hyp-unmapping-going-off-limits.patch b/queue-4.14/kvm-arm-arm64-fix-hyp-unmapping-going-off-limits.patch
new file mode 100644 (file)
index 0000000..3c5814b
--- /dev/null
@@ -0,0 +1,63 @@
+From 7839c672e58bf62da8f2f0197fefb442c02ba1dd Mon Sep 17 00:00:00 2001
+From: Marc Zyngier <marc.zyngier@arm.com>
+Date: Thu, 7 Dec 2017 11:45:45 +0000
+Subject: KVM: arm/arm64: Fix HYP unmapping going off limits
+
+From: Marc Zyngier <marc.zyngier@arm.com>
+
+commit 7839c672e58bf62da8f2f0197fefb442c02ba1dd upstream.
+
+When we unmap the HYP memory, we try to be clever and unmap one
+PGD at a time. If we start with a non-PGD aligned address and try
+to unmap a whole PGD, things go horribly wrong in unmap_hyp_range
+(addr and end can never match, and it all goes really badly as we
+keep incrementing pgd and parse random memory as page tables...).
+
+The obvious fix is to let unmap_hyp_range do what it does best,
+which is to iterate over a range.
+
+The size of the linear mapping, which begins at PAGE_OFFSET, can be
+easily calculated by subtracting PAGE_OFFSET form high_memory, because
+high_memory is defined as the linear map address of the last byte of
+DRAM, plus one.
+
+The size of the vmalloc region is given trivially by VMALLOC_END -
+VMALLOC_START.
+
+Reported-by: Andre Przywara <andre.przywara@arm.com>
+Tested-by: Andre Przywara <andre.przywara@arm.com>
+Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
+Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
+Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ virt/kvm/arm/mmu.c |   10 ++++------
+ 1 file changed, 4 insertions(+), 6 deletions(-)
+
+--- a/virt/kvm/arm/mmu.c
++++ b/virt/kvm/arm/mmu.c
+@@ -509,8 +509,6 @@ static void unmap_hyp_range(pgd_t *pgdp,
+  */
+ void free_hyp_pgds(void)
+ {
+-      unsigned long addr;
+-
+       mutex_lock(&kvm_hyp_pgd_mutex);
+       if (boot_hyp_pgd) {
+@@ -521,10 +519,10 @@ void free_hyp_pgds(void)
+       if (hyp_pgd) {
+               unmap_hyp_range(hyp_pgd, hyp_idmap_start, PAGE_SIZE);
+-              for (addr = PAGE_OFFSET; virt_addr_valid(addr); addr += PGDIR_SIZE)
+-                      unmap_hyp_range(hyp_pgd, kern_hyp_va(addr), PGDIR_SIZE);
+-              for (addr = VMALLOC_START; is_vmalloc_addr((void*)addr); addr += PGDIR_SIZE)
+-                      unmap_hyp_range(hyp_pgd, kern_hyp_va(addr), PGDIR_SIZE);
++              unmap_hyp_range(hyp_pgd, kern_hyp_va(PAGE_OFFSET),
++                              (uintptr_t)high_memory - PAGE_OFFSET);
++              unmap_hyp_range(hyp_pgd, kern_hyp_va(VMALLOC_START),
++                              VMALLOC_END - VMALLOC_START);
+               free_pages((unsigned long)hyp_pgd, hyp_pgd_order);
+               hyp_pgd = NULL;
diff --git a/queue-4.14/kvm-mmu-fix-infinite-loop-when-there-is-no-available-mmu-page.patch b/queue-4.14/kvm-mmu-fix-infinite-loop-when-there-is-no-available-mmu-page.patch
new file mode 100644 (file)
index 0000000..6dbac9c
--- /dev/null
@@ -0,0 +1,97 @@
+From ed52870f4676489124d8697fd00e6ae6c504e586 Mon Sep 17 00:00:00 2001
+From: Wanpeng Li <wanpeng.li@hotmail.com>
+Date: Mon, 4 Dec 2017 22:21:30 -0800
+Subject: KVM: MMU: Fix infinite loop when there is no available mmu page
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Wanpeng Li <wanpeng.li@hotmail.com>
+
+commit ed52870f4676489124d8697fd00e6ae6c504e586 upstream.
+
+The below test case can cause infinite loop in kvm when ept=0.
+
+    #include <unistd.h>
+    #include <sys/syscall.h>
+    #include <string.h>
+    #include <stdint.h>
+    #include <linux/kvm.h>
+    #include <fcntl.h>
+    #include <sys/ioctl.h>
+
+    long r[5];
+    int main()
+    {
+       r[2] = open("/dev/kvm", O_RDONLY);
+       r[3] = ioctl(r[2], KVM_CREATE_VM, 0);
+       r[4] = ioctl(r[3], KVM_CREATE_VCPU, 7);
+       ioctl(r[4], KVM_RUN, 0);
+    }
+
+It doesn't setup the memory regions, mmu_alloc_shadow/direct_roots() in
+kvm return 1 when kvm fails to allocate root page table which can result
+in beblow infinite loop:
+
+    vcpu_run() {
+       for (;;) {
+               r = vcpu_enter_guest()::kvm_mmu_reload() returns 1
+               if (r <= 0)
+                       break;
+               if (need_resched())
+                       cond_resched();
+      }
+    }
+
+This patch fixes it by returning -ENOSPC when there is no available kvm mmu
+page for root page table.
+
+Cc: Paolo Bonzini <pbonzini@redhat.com>
+Cc: Radim Krčmář <rkrcmar@redhat.com>
+Fixes: 26eeb53cf0f (KVM: MMU: Bail out immediately if there is no available mmu page)
+Signed-off-by: Wanpeng Li <wanpeng.li@hotmail.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/kvm/mmu.c |    8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+--- a/arch/x86/kvm/mmu.c
++++ b/arch/x86/kvm/mmu.c
+@@ -3382,7 +3382,7 @@ static int mmu_alloc_direct_roots(struct
+               spin_lock(&vcpu->kvm->mmu_lock);
+               if(make_mmu_pages_available(vcpu) < 0) {
+                       spin_unlock(&vcpu->kvm->mmu_lock);
+-                      return 1;
++                      return -ENOSPC;
+               }
+               sp = kvm_mmu_get_page(vcpu, 0, 0,
+                               vcpu->arch.mmu.shadow_root_level, 1, ACC_ALL);
+@@ -3397,7 +3397,7 @@ static int mmu_alloc_direct_roots(struct
+                       spin_lock(&vcpu->kvm->mmu_lock);
+                       if (make_mmu_pages_available(vcpu) < 0) {
+                               spin_unlock(&vcpu->kvm->mmu_lock);
+-                              return 1;
++                              return -ENOSPC;
+                       }
+                       sp = kvm_mmu_get_page(vcpu, i << (30 - PAGE_SHIFT),
+                                       i << 30, PT32_ROOT_LEVEL, 1, ACC_ALL);
+@@ -3437,7 +3437,7 @@ static int mmu_alloc_shadow_roots(struct
+               spin_lock(&vcpu->kvm->mmu_lock);
+               if (make_mmu_pages_available(vcpu) < 0) {
+                       spin_unlock(&vcpu->kvm->mmu_lock);
+-                      return 1;
++                      return -ENOSPC;
+               }
+               sp = kvm_mmu_get_page(vcpu, root_gfn, 0,
+                               vcpu->arch.mmu.shadow_root_level, 0, ACC_ALL);
+@@ -3474,7 +3474,7 @@ static int mmu_alloc_shadow_roots(struct
+               spin_lock(&vcpu->kvm->mmu_lock);
+               if (make_mmu_pages_available(vcpu) < 0) {
+                       spin_unlock(&vcpu->kvm->mmu_lock);
+-                      return 1;
++                      return -ENOSPC;
+               }
+               sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30, PT32_ROOT_LEVEL,
+                                     0, ACC_ALL);
diff --git a/queue-4.14/kvm-ppc-book3s-fix-xive-migration-of-pending-interrupts.patch b/queue-4.14/kvm-ppc-book3s-fix-xive-migration-of-pending-interrupts.patch
new file mode 100644 (file)
index 0000000..f73d236
--- /dev/null
@@ -0,0 +1,51 @@
+From dc1c4165d189350cb51bdd3057deb6ecd164beda Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= <clg@kaod.org>
+Date: Tue, 12 Dec 2017 12:02:04 +0000
+Subject: KVM: PPC: Book3S: fix XIVE migration of pending interrupts
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Cédric Le Goater <clg@kaod.org>
+
+commit dc1c4165d189350cb51bdd3057deb6ecd164beda upstream.
+
+When restoring a pending interrupt, we are setting the Q bit to force
+a retrigger in xive_finish_unmask(). But we also need to force an EOI
+in this case to reach the same initial state : P=1, Q=0.
+
+This can be done by not setting 'old_p' for pending interrupts which
+will inform xive_finish_unmask() that an EOI needs to be sent.
+
+Fixes: 5af50993850a ("KVM: PPC: Book3S HV: Native usage of the XIVE interrupt controller")
+Suggested-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
+Signed-off-by: Cédric Le Goater <clg@kaod.org>
+Reviewed-by: Laurent Vivier <lvivier@redhat.com>
+Tested-by: Laurent Vivier <lvivier@redhat.com>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/powerpc/kvm/book3s_xive.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/arch/powerpc/kvm/book3s_xive.c
++++ b/arch/powerpc/kvm/book3s_xive.c
+@@ -1558,7 +1558,7 @@ static int xive_set_source(struct kvmppc
+       /*
+        * Restore P and Q. If the interrupt was pending, we
+-       * force both P and Q, which will trigger a resend.
++       * force Q and !P, which will trigger a resend.
+        *
+        * That means that a guest that had both an interrupt
+        * pending (queued) and Q set will restore with only
+@@ -1566,7 +1566,7 @@ static int xive_set_source(struct kvmppc
+        * is perfectly fine as coalescing interrupts that haven't
+        * been presented yet is always allowed.
+        */
+-      if (val & KVM_XICS_PRESENTED || val & KVM_XICS_PENDING)
++      if (val & KVM_XICS_PRESENTED && !(val & KVM_XICS_PENDING))
+               state->old_p = true;
+       if (val & KVM_XICS_QUEUED || val & KVM_XICS_PENDING)
+               state->old_q = true;
diff --git a/queue-4.14/kvm-ppc-book3s-hv-fix-pending_pri-value-in-kvmppc_xive_get_icp.patch b/queue-4.14/kvm-ppc-book3s-hv-fix-pending_pri-value-in-kvmppc_xive_get_icp.patch
new file mode 100644 (file)
index 0000000..934e379
--- /dev/null
@@ -0,0 +1,52 @@
+From 7333b5aca412d6ad02667b5a513485838a91b136 Mon Sep 17 00:00:00 2001
+From: Laurent Vivier <lvivier@redhat.com>
+Date: Tue, 12 Dec 2017 18:23:56 +0100
+Subject: KVM: PPC: Book3S HV: Fix pending_pri value in kvmppc_xive_get_icp()
+
+From: Laurent Vivier <lvivier@redhat.com>
+
+commit 7333b5aca412d6ad02667b5a513485838a91b136 upstream.
+
+When we migrate a VM from a POWER8 host (XICS) to a POWER9 host
+(XICS-on-XIVE), we have an error:
+
+qemu-kvm: Unable to restore KVM interrupt controller state \
+          (0xff000000) for CPU 0: Invalid argument
+
+This is because kvmppc_xics_set_icp() checks the new state
+is internaly consistent, and especially:
+
+...
+   1129         if (xisr == 0) {
+   1130                 if (pending_pri != 0xff)
+   1131                         return -EINVAL;
+...
+
+On the other side, kvmppc_xive_get_icp() doesn't set
+neither the pending_pri value, nor the xisr value (set to 0)
+(and kvmppc_xive_set_icp() ignores the pending_pri value)
+
+As xisr is 0, pending_pri must be set to 0xff.
+
+Fixes: 5af50993850a ("KVM: PPC: Book3S HV: Native usage of the XIVE interrupt controller")
+Signed-off-by: Laurent Vivier <lvivier@redhat.com>
+Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/powerpc/kvm/book3s_xive.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/arch/powerpc/kvm/book3s_xive.c
++++ b/arch/powerpc/kvm/book3s_xive.c
+@@ -725,7 +725,8 @@ u64 kvmppc_xive_get_icp(struct kvm_vcpu
+       /* Return the per-cpu state for state saving/migration */
+       return (u64)xc->cppr << KVM_REG_PPC_ICP_CPPR_SHIFT |
+-             (u64)xc->mfrr << KVM_REG_PPC_ICP_MFRR_SHIFT;
++             (u64)xc->mfrr << KVM_REG_PPC_ICP_MFRR_SHIFT |
++             (u64)0xff << KVM_REG_PPC_ICP_PPRI_SHIFT;
+ }
+ int kvmppc_xive_set_icp(struct kvm_vcpu *vcpu, u64 icpval)
diff --git a/queue-4.14/kvm-x86-fix-load-rflags-w-o-the-fixed-bit.patch b/queue-4.14/kvm-x86-fix-load-rflags-w-o-the-fixed-bit.patch
new file mode 100644 (file)
index 0000000..f9c8233
--- /dev/null
@@ -0,0 +1,72 @@
+From d73235d17ba63b53dc0e1051dbc10a1f1be91b71 Mon Sep 17 00:00:00 2001
+From: Wanpeng Li <wanpeng.li@hotmail.com>
+Date: Thu, 7 Dec 2017 00:30:08 -0800
+Subject: KVM: X86: Fix load RFLAGS w/o the fixed bit
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Wanpeng Li <wanpeng.li@hotmail.com>
+
+commit d73235d17ba63b53dc0e1051dbc10a1f1be91b71 upstream.
+
+ *** Guest State ***
+ CR0: actual=0x0000000000000030, shadow=0x0000000060000010, gh_mask=fffffffffffffff7
+ CR4: actual=0x0000000000002050, shadow=0x0000000000000000, gh_mask=ffffffffffffe871
+ CR3 = 0x00000000fffbc000
+ RSP = 0x0000000000000000  RIP = 0x0000000000000000
+ RFLAGS=0x00000000         DR7 = 0x0000000000000400
+        ^^^^^^^^^^
+
+The failed vmentry is triggered by the following testcase when ept=Y:
+
+    #include <unistd.h>
+    #include <sys/syscall.h>
+    #include <string.h>
+    #include <stdint.h>
+    #include <linux/kvm.h>
+    #include <fcntl.h>
+    #include <sys/ioctl.h>
+
+    long r[5];
+    int main()
+    {
+       r[2] = open("/dev/kvm", O_RDONLY);
+       r[3] = ioctl(r[2], KVM_CREATE_VM, 0);
+       r[4] = ioctl(r[3], KVM_CREATE_VCPU, 7);
+       struct kvm_regs regs = {
+               .rflags = 0,
+       };
+       ioctl(r[4], KVM_SET_REGS, &regs);
+       ioctl(r[4], KVM_RUN, 0);
+    }
+
+X86 RFLAGS bit 1 is fixed set, userspace can simply clearing bit 1
+of RFLAGS with KVM_SET_REGS ioctl which results in vmentry fails.
+This patch fixes it by oring X86_EFLAGS_FIXED during ioctl.
+
+Suggested-by: Jim Mattson <jmattson@google.com>
+Reviewed-by: David Hildenbrand <david@redhat.com>
+Reviewed-by: Quan Xu <quan.xu0@gmail.com>
+Cc: Paolo Bonzini <pbonzini@redhat.com>
+Cc: Radim Krčmář <rkrcmar@redhat.com>
+Cc: Jim Mattson <jmattson@google.com>
+Signed-off-by: Wanpeng Li <wanpeng.li@hotmail.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/kvm/x86.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -7359,7 +7359,7 @@ int kvm_arch_vcpu_ioctl_set_regs(struct
+ #endif
+       kvm_rip_write(vcpu, regs->rip);
+-      kvm_set_rflags(vcpu, regs->rflags);
++      kvm_set_rflags(vcpu, regs->rflags | X86_EFLAGS_FIXED);
+       vcpu->arch.exception.pending = false;
diff --git a/queue-4.14/kvm-x86-fix-rsm-when-pcid-is-non-zero.patch b/queue-4.14/kvm-x86-fix-rsm-when-pcid-is-non-zero.patch
new file mode 100644 (file)
index 0000000..da052ff
--- /dev/null
@@ -0,0 +1,114 @@
+From fae1a3e775cca8c3a9e0eb34443b310871a15a92 Mon Sep 17 00:00:00 2001
+From: Paolo Bonzini <pbonzini@redhat.com>
+Date: Thu, 21 Dec 2017 00:49:14 +0100
+Subject: kvm: x86: fix RSM when PCID is non-zero
+
+From: Paolo Bonzini <pbonzini@redhat.com>
+
+commit fae1a3e775cca8c3a9e0eb34443b310871a15a92 upstream.
+
+rsm_load_state_64() and rsm_enter_protected_mode() load CR3, then
+CR4 & ~PCIDE, then CR0, then CR4.
+
+However, setting CR4.PCIDE fails if CR3[11:0] != 0.  It's probably easier
+in the long run to replace rsm_enter_protected_mode() with an emulator
+callback that sets all the special registers (like KVM_SET_SREGS would
+do).  For now, set the PCID field of CR3 only after CR4.PCIDE is 1.
+
+Reported-by: Laszlo Ersek <lersek@redhat.com>
+Tested-by: Laszlo Ersek <lersek@redhat.com>
+Fixes: 660a5d517aaab9187f93854425c4c63f4a09195c
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/kvm/emulate.c |   32 +++++++++++++++++++++++++-------
+ 1 file changed, 25 insertions(+), 7 deletions(-)
+
+--- a/arch/x86/kvm/emulate.c
++++ b/arch/x86/kvm/emulate.c
+@@ -2404,9 +2404,21 @@ static int rsm_load_seg_64(struct x86_em
+ }
+ static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt,
+-                                   u64 cr0, u64 cr4)
++                                  u64 cr0, u64 cr3, u64 cr4)
+ {
+       int bad;
++      u64 pcid;
++
++      /* In order to later set CR4.PCIDE, CR3[11:0] must be zero.  */
++      pcid = 0;
++      if (cr4 & X86_CR4_PCIDE) {
++              pcid = cr3 & 0xfff;
++              cr3 &= ~0xfff;
++      }
++
++      bad = ctxt->ops->set_cr(ctxt, 3, cr3);
++      if (bad)
++              return X86EMUL_UNHANDLEABLE;
+       /*
+        * First enable PAE, long mode needs it before CR0.PG = 1 is set.
+@@ -2425,6 +2437,12 @@ static int rsm_enter_protected_mode(stru
+               bad = ctxt->ops->set_cr(ctxt, 4, cr4);
+               if (bad)
+                       return X86EMUL_UNHANDLEABLE;
++              if (pcid) {
++                      bad = ctxt->ops->set_cr(ctxt, 3, cr3 | pcid);
++                      if (bad)
++                              return X86EMUL_UNHANDLEABLE;
++              }
++
+       }
+       return X86EMUL_CONTINUE;
+@@ -2435,11 +2453,11 @@ static int rsm_load_state_32(struct x86_
+       struct desc_struct desc;
+       struct desc_ptr dt;
+       u16 selector;
+-      u32 val, cr0, cr4;
++      u32 val, cr0, cr3, cr4;
+       int i;
+       cr0 =                      GET_SMSTATE(u32, smbase, 0x7ffc);
+-      ctxt->ops->set_cr(ctxt, 3, GET_SMSTATE(u32, smbase, 0x7ff8));
++      cr3 =                      GET_SMSTATE(u32, smbase, 0x7ff8);
+       ctxt->eflags =             GET_SMSTATE(u32, smbase, 0x7ff4) | X86_EFLAGS_FIXED;
+       ctxt->_eip =               GET_SMSTATE(u32, smbase, 0x7ff0);
+@@ -2481,14 +2499,14 @@ static int rsm_load_state_32(struct x86_
+       ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smbase, 0x7ef8));
+-      return rsm_enter_protected_mode(ctxt, cr0, cr4);
++      return rsm_enter_protected_mode(ctxt, cr0, cr3, cr4);
+ }
+ static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase)
+ {
+       struct desc_struct desc;
+       struct desc_ptr dt;
+-      u64 val, cr0, cr4;
++      u64 val, cr0, cr3, cr4;
+       u32 base3;
+       u16 selector;
+       int i, r;
+@@ -2505,7 +2523,7 @@ static int rsm_load_state_64(struct x86_
+       ctxt->ops->set_dr(ctxt, 7, (val & DR7_VOLATILE) | DR7_FIXED_1);
+       cr0 =                       GET_SMSTATE(u64, smbase, 0x7f58);
+-      ctxt->ops->set_cr(ctxt, 3,  GET_SMSTATE(u64, smbase, 0x7f50));
++      cr3 =                       GET_SMSTATE(u64, smbase, 0x7f50);
+       cr4 =                       GET_SMSTATE(u64, smbase, 0x7f48);
+       ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smbase, 0x7f00));
+       val =                       GET_SMSTATE(u64, smbase, 0x7ed0);
+@@ -2533,7 +2551,7 @@ static int rsm_load_state_64(struct x86_
+       dt.address =                GET_SMSTATE(u64, smbase, 0x7e68);
+       ctxt->ops->set_gdt(ctxt, &dt);
+-      r = rsm_enter_protected_mode(ctxt, cr0, cr4);
++      r = rsm_enter_protected_mode(ctxt, cr0, cr3, cr4);
+       if (r != X86EMUL_CONTINUE)
+               return r;
diff --git a/queue-4.14/libnvdimm-btt-fix-an-incompatibility-in-the-log-layout.patch b/queue-4.14/libnvdimm-btt-fix-an-incompatibility-in-the-log-layout.patch
new file mode 100644 (file)
index 0000000..70bff7d
--- /dev/null
@@ -0,0 +1,433 @@
+From 24e3a7fb60a9187e5df90e5fa655ffc94b9c4f77 Mon Sep 17 00:00:00 2001
+From: Vishal Verma <vishal.l.verma@intel.com>
+Date: Mon, 18 Dec 2017 09:28:39 -0700
+Subject: libnvdimm, btt: Fix an incompatibility in the log layout
+
+From: Vishal Verma <vishal.l.verma@intel.com>
+
+commit 24e3a7fb60a9187e5df90e5fa655ffc94b9c4f77 upstream.
+
+Due to a spec misinterpretation, the Linux implementation of the BTT log
+area had different padding scheme from other implementations, such as
+UEFI and NVML.
+
+This fixes the padding scheme, and defaults to it for new BTT layouts.
+We attempt to detect the padding scheme in use when probing for an
+existing BTT. If we detect the older/incompatible scheme, we continue
+using it.
+
+Reported-by: Juston Li <juston.li@intel.com>
+Cc: Dan Williams <dan.j.williams@intel.com>
+Fixes: 5212e11fde4d ("nd_btt: atomic sector updates")
+Signed-off-by: Vishal Verma <vishal.l.verma@intel.com>
+Signed-off-by: Dan Williams <dan.j.williams@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/nvdimm/btt.c |  201 ++++++++++++++++++++++++++++++++++++++++++---------
+ drivers/nvdimm/btt.h |   45 +++++++++++
+ 2 files changed, 211 insertions(+), 35 deletions(-)
+
+--- a/drivers/nvdimm/btt.c
++++ b/drivers/nvdimm/btt.c
+@@ -210,12 +210,12 @@ static int btt_map_read(struct arena_inf
+       return ret;
+ }
+-static int btt_log_read_pair(struct arena_info *arena, u32 lane,
+-                      struct log_entry *ent)
++static int btt_log_group_read(struct arena_info *arena, u32 lane,
++                      struct log_group *log)
+ {
+       return arena_read_bytes(arena,
+-                      arena->logoff + (2 * lane * LOG_ENT_SIZE), ent,
+-                      2 * LOG_ENT_SIZE, 0);
++                      arena->logoff + (lane * LOG_GRP_SIZE), log,
++                      LOG_GRP_SIZE, 0);
+ }
+ static struct dentry *debugfs_root;
+@@ -255,6 +255,8 @@ static void arena_debugfs_init(struct ar
+       debugfs_create_x64("logoff", S_IRUGO, d, &a->logoff);
+       debugfs_create_x64("info2off", S_IRUGO, d, &a->info2off);
+       debugfs_create_x32("flags", S_IRUGO, d, &a->flags);
++      debugfs_create_u32("log_index_0", S_IRUGO, d, &a->log_index[0]);
++      debugfs_create_u32("log_index_1", S_IRUGO, d, &a->log_index[1]);
+ }
+ static void btt_debugfs_init(struct btt *btt)
+@@ -273,6 +275,11 @@ static void btt_debugfs_init(struct btt
+       }
+ }
++static u32 log_seq(struct log_group *log, int log_idx)
++{
++      return le32_to_cpu(log->ent[log_idx].seq);
++}
++
+ /*
+  * This function accepts two log entries, and uses the
+  * sequence number to find the 'older' entry.
+@@ -282,8 +289,10 @@ static void btt_debugfs_init(struct btt
+  *
+  * TODO The logic feels a bit kludge-y. make it better..
+  */
+-static int btt_log_get_old(struct log_entry *ent)
++static int btt_log_get_old(struct arena_info *a, struct log_group *log)
+ {
++      int idx0 = a->log_index[0];
++      int idx1 = a->log_index[1];
+       int old;
+       /*
+@@ -291,23 +300,23 @@ static int btt_log_get_old(struct log_en
+        * the next time, the following logic works out to put this
+        * (next) entry into [1]
+        */
+-      if (ent[0].seq == 0) {
+-              ent[0].seq = cpu_to_le32(1);
++      if (log_seq(log, idx0) == 0) {
++              log->ent[idx0].seq = cpu_to_le32(1);
+               return 0;
+       }
+-      if (ent[0].seq == ent[1].seq)
++      if (log_seq(log, idx0) == log_seq(log, idx1))
+               return -EINVAL;
+-      if (le32_to_cpu(ent[0].seq) + le32_to_cpu(ent[1].seq) > 5)
++      if (log_seq(log, idx0) + log_seq(log, idx1) > 5)
+               return -EINVAL;
+-      if (le32_to_cpu(ent[0].seq) < le32_to_cpu(ent[1].seq)) {
+-              if (le32_to_cpu(ent[1].seq) - le32_to_cpu(ent[0].seq) == 1)
++      if (log_seq(log, idx0) < log_seq(log, idx1)) {
++              if ((log_seq(log, idx1) - log_seq(log, idx0)) == 1)
+                       old = 0;
+               else
+                       old = 1;
+       } else {
+-              if (le32_to_cpu(ent[0].seq) - le32_to_cpu(ent[1].seq) == 1)
++              if ((log_seq(log, idx0) - log_seq(log, idx1)) == 1)
+                       old = 1;
+               else
+                       old = 0;
+@@ -327,17 +336,18 @@ static int btt_log_read(struct arena_inf
+ {
+       int ret;
+       int old_ent, ret_ent;
+-      struct log_entry log[2];
++      struct log_group log;
+-      ret = btt_log_read_pair(arena, lane, log);
++      ret = btt_log_group_read(arena, lane, &log);
+       if (ret)
+               return -EIO;
+-      old_ent = btt_log_get_old(log);
++      old_ent = btt_log_get_old(arena, &log);
+       if (old_ent < 0 || old_ent > 1) {
+               dev_err(to_dev(arena),
+                               "log corruption (%d): lane %d seq [%d, %d]\n",
+-                      old_ent, lane, log[0].seq, log[1].seq);
++                              old_ent, lane, log.ent[arena->log_index[0]].seq,
++                              log.ent[arena->log_index[1]].seq);
+               /* TODO set error state? */
+               return -EIO;
+       }
+@@ -345,7 +355,7 @@ static int btt_log_read(struct arena_inf
+       ret_ent = (old_flag ? old_ent : (1 - old_ent));
+       if (ent != NULL)
+-              memcpy(ent, &log[ret_ent], LOG_ENT_SIZE);
++              memcpy(ent, &log.ent[arena->log_index[ret_ent]], LOG_ENT_SIZE);
+       return ret_ent;
+ }
+@@ -359,17 +369,13 @@ static int __btt_log_write(struct arena_
+                       u32 sub, struct log_entry *ent, unsigned long flags)
+ {
+       int ret;
+-      /*
+-       * Ignore the padding in log_entry for calculating log_half.
+-       * The entry is 'committed' when we write the sequence number,
+-       * and we want to ensure that that is the last thing written.
+-       * We don't bother writing the padding as that would be extra
+-       * media wear and write amplification
+-       */
+-      unsigned int log_half = (LOG_ENT_SIZE - 2 * sizeof(u64)) / 2;
+-      u64 ns_off = arena->logoff + (((2 * lane) + sub) * LOG_ENT_SIZE);
++      u32 group_slot = arena->log_index[sub];
++      unsigned int log_half = LOG_ENT_SIZE / 2;
+       void *src = ent;
++      u64 ns_off;
++      ns_off = arena->logoff + (lane * LOG_GRP_SIZE) +
++              (group_slot * LOG_ENT_SIZE);
+       /* split the 16B write into atomic, durable halves */
+       ret = arena_write_bytes(arena, ns_off, src, log_half, flags);
+       if (ret)
+@@ -452,7 +458,7 @@ static int btt_log_init(struct arena_inf
+ {
+       size_t logsize = arena->info2off - arena->logoff;
+       size_t chunk_size = SZ_4K, offset = 0;
+-      struct log_entry log;
++      struct log_entry ent;
+       void *zerobuf;
+       int ret;
+       u32 i;
+@@ -484,11 +490,11 @@ static int btt_log_init(struct arena_inf
+       }
+       for (i = 0; i < arena->nfree; i++) {
+-              log.lba = cpu_to_le32(i);
+-              log.old_map = cpu_to_le32(arena->external_nlba + i);
+-              log.new_map = cpu_to_le32(arena->external_nlba + i);
+-              log.seq = cpu_to_le32(LOG_SEQ_INIT);
+-              ret = __btt_log_write(arena, i, 0, &log, 0);
++              ent.lba = cpu_to_le32(i);
++              ent.old_map = cpu_to_le32(arena->external_nlba + i);
++              ent.new_map = cpu_to_le32(arena->external_nlba + i);
++              ent.seq = cpu_to_le32(LOG_SEQ_INIT);
++              ret = __btt_log_write(arena, i, 0, &ent, 0);
+               if (ret)
+                       goto free;
+       }
+@@ -593,6 +599,123 @@ static int btt_freelist_init(struct aren
+       return 0;
+ }
++static bool ent_is_padding(struct log_entry *ent)
++{
++      return (ent->lba == 0) && (ent->old_map == 0) && (ent->new_map == 0)
++              && (ent->seq == 0);
++}
++
++/*
++ * Detecting valid log indices: We read a log group (see the comments in btt.h
++ * for a description of a 'log_group' and its 'slots'), and iterate over its
++ * four slots. We expect that a padding slot will be all-zeroes, and use this
++ * to detect a padding slot vs. an actual entry.
++ *
++ * If a log_group is in the initial state, i.e. hasn't been used since the
++ * creation of this BTT layout, it will have three of the four slots with
++ * zeroes. We skip over these log_groups for the detection of log_index. If
++ * all log_groups are in the initial state (i.e. the BTT has never been
++ * written to), it is safe to assume the 'new format' of log entries in slots
++ * (0, 1).
++ */
++static int log_set_indices(struct arena_info *arena)
++{
++      bool idx_set = false, initial_state = true;
++      int ret, log_index[2] = {-1, -1};
++      u32 i, j, next_idx = 0;
++      struct log_group log;
++      u32 pad_count = 0;
++
++      for (i = 0; i < arena->nfree; i++) {
++              ret = btt_log_group_read(arena, i, &log);
++              if (ret < 0)
++                      return ret;
++
++              for (j = 0; j < 4; j++) {
++                      if (!idx_set) {
++                              if (ent_is_padding(&log.ent[j])) {
++                                      pad_count++;
++                                      continue;
++                              } else {
++                                      /* Skip if index has been recorded */
++                                      if ((next_idx == 1) &&
++                                              (j == log_index[0]))
++                                              continue;
++                                      /* valid entry, record index */
++                                      log_index[next_idx] = j;
++                                      next_idx++;
++                              }
++                              if (next_idx == 2) {
++                                      /* two valid entries found */
++                                      idx_set = true;
++                              } else if (next_idx > 2) {
++                                      /* too many valid indices */
++                                      return -ENXIO;
++                              }
++                      } else {
++                              /*
++                               * once the indices have been set, just verify
++                               * that all subsequent log groups are either in
++                               * their initial state or follow the same
++                               * indices.
++                               */
++                              if (j == log_index[0]) {
++                                      /* entry must be 'valid' */
++                                      if (ent_is_padding(&log.ent[j]))
++                                              return -ENXIO;
++                              } else if (j == log_index[1]) {
++                                      ;
++                                      /*
++                                       * log_index[1] can be padding if the
++                                       * lane never got used and it is still
++                                       * in the initial state (three 'padding'
++                                       * entries)
++                                       */
++                              } else {
++                                      /* entry must be invalid (padding) */
++                                      if (!ent_is_padding(&log.ent[j]))
++                                              return -ENXIO;
++                              }
++                      }
++              }
++              /*
++               * If any of the log_groups have more than one valid,
++               * non-padding entry, then the we are no longer in the
++               * initial_state
++               */
++              if (pad_count < 3)
++                      initial_state = false;
++              pad_count = 0;
++      }
++
++      if (!initial_state && !idx_set)
++              return -ENXIO;
++
++      /*
++       * If all the entries in the log were in the initial state,
++       * assume new padding scheme
++       */
++      if (initial_state)
++              log_index[1] = 1;
++
++      /*
++       * Only allow the known permutations of log/padding indices,
++       * i.e. (0, 1), and (0, 2)
++       */
++      if ((log_index[0] == 0) && ((log_index[1] == 1) || (log_index[1] == 2)))
++              ; /* known index possibilities */
++      else {
++              dev_err(to_dev(arena), "Found an unknown padding scheme\n");
++              return -ENXIO;
++      }
++
++      arena->log_index[0] = log_index[0];
++      arena->log_index[1] = log_index[1];
++      dev_dbg(to_dev(arena), "log_index_0 = %d\n", log_index[0]);
++      dev_dbg(to_dev(arena), "log_index_1 = %d\n", log_index[1]);
++      return 0;
++}
++
+ static int btt_rtt_init(struct arena_info *arena)
+ {
+       arena->rtt = kcalloc(arena->nfree, sizeof(u32), GFP_KERNEL);
+@@ -649,8 +772,7 @@ static struct arena_info *alloc_arena(st
+       available -= 2 * BTT_PG_SIZE;
+       /* The log takes a fixed amount of space based on nfree */
+-      logsize = roundup(2 * arena->nfree * sizeof(struct log_entry),
+-                              BTT_PG_SIZE);
++      logsize = roundup(arena->nfree * LOG_GRP_SIZE, BTT_PG_SIZE);
+       available -= logsize;
+       /* Calculate optimal split between map and data area */
+@@ -667,6 +789,10 @@ static struct arena_info *alloc_arena(st
+       arena->mapoff = arena->dataoff + datasize;
+       arena->logoff = arena->mapoff + mapsize;
+       arena->info2off = arena->logoff + logsize;
++
++      /* Default log indices are (0,1) */
++      arena->log_index[0] = 0;
++      arena->log_index[1] = 1;
+       return arena;
+ }
+@@ -757,6 +883,13 @@ static int discover_arenas(struct btt *b
+               arena->external_lba_start = cur_nlba;
+               parse_arena_meta(arena, super, cur_off);
++              ret = log_set_indices(arena);
++              if (ret) {
++                      dev_err(to_dev(arena),
++                              "Unable to deduce log/padding indices\n");
++                      goto out;
++              }
++
+               mutex_init(&arena->err_lock);
+               ret = btt_freelist_init(arena);
+               if (ret)
+--- a/drivers/nvdimm/btt.h
++++ b/drivers/nvdimm/btt.h
+@@ -27,6 +27,7 @@
+ #define MAP_ERR_MASK (1 << MAP_ERR_SHIFT)
+ #define MAP_LBA_MASK (~((1 << MAP_TRIM_SHIFT) | (1 << MAP_ERR_SHIFT)))
+ #define MAP_ENT_NORMAL 0xC0000000
++#define LOG_GRP_SIZE sizeof(struct log_group)
+ #define LOG_ENT_SIZE sizeof(struct log_entry)
+ #define ARENA_MIN_SIZE (1UL << 24)    /* 16 MB */
+ #define ARENA_MAX_SIZE (1ULL << 39)   /* 512 GB */
+@@ -50,12 +51,52 @@ enum btt_init_state {
+       INIT_READY
+ };
++/*
++ * A log group represents one log 'lane', and consists of four log entries.
++ * Two of the four entries are valid entries, and the remaining two are
++ * padding. Due to an old bug in the padding location, we need to perform a
++ * test to determine the padding scheme being used, and use that scheme
++ * thereafter.
++ *
++ * In kernels prior to 4.15, 'log group' would have actual log entries at
++ * indices (0, 2) and padding at indices (1, 3), where as the correct/updated
++ * format has log entries at indices (0, 1) and padding at indices (2, 3).
++ *
++ * Old (pre 4.15) format:
++ * +-----------------+-----------------+
++ * |      ent[0]     |      ent[1]     |
++ * |       16B       |       16B       |
++ * | lba/old/new/seq |       pad       |
++ * +-----------------------------------+
++ * |      ent[2]     |      ent[3]     |
++ * |       16B       |       16B       |
++ * | lba/old/new/seq |       pad       |
++ * +-----------------+-----------------+
++ *
++ * New format:
++ * +-----------------+-----------------+
++ * |      ent[0]     |      ent[1]     |
++ * |       16B       |       16B       |
++ * | lba/old/new/seq | lba/old/new/seq |
++ * +-----------------------------------+
++ * |      ent[2]     |      ent[3]     |
++ * |       16B       |       16B       |
++ * |       pad       |       pad       |
++ * +-----------------+-----------------+
++ *
++ * We detect during start-up which format is in use, and set
++ * arena->log_index[(0, 1)] with the detected format.
++ */
++
+ struct log_entry {
+       __le32 lba;
+       __le32 old_map;
+       __le32 new_map;
+       __le32 seq;
+-      __le64 padding[2];
++};
++
++struct log_group {
++      struct log_entry ent[4];
+ };
+ struct btt_sb {
+@@ -125,6 +166,7 @@ struct aligned_lock {
+  * @list:             List head for list of arenas
+  * @debugfs_dir:      Debugfs dentry
+  * @flags:            Arena flags - may signify error states.
++ * @log_index:                Indices of the valid log entries in a log_group
+  *
+  * arena_info is a per-arena handle. Once an arena is narrowed down for an
+  * IO, this struct is passed around for the duration of the IO.
+@@ -157,6 +199,7 @@ struct arena_info {
+       /* Arena flags */
+       u32 flags;
+       struct mutex err_lock;
++      int log_index[2];
+ };
+ /**
diff --git a/queue-4.14/libnvdimm-dax-fix-1gb-aligned-namespaces-vs-physical-misalignment.patch b/queue-4.14/libnvdimm-dax-fix-1gb-aligned-namespaces-vs-physical-misalignment.patch
new file mode 100644 (file)
index 0000000..d1fa242
--- /dev/null
@@ -0,0 +1,73 @@
+From 41fce90f26333c4fa82e8e43b9ace86c4e8a0120 Mon Sep 17 00:00:00 2001
+From: Dan Williams <dan.j.williams@intel.com>
+Date: Mon, 4 Dec 2017 14:07:43 -0800
+Subject: libnvdimm, dax: fix 1GB-aligned namespaces vs physical misalignment
+
+From: Dan Williams <dan.j.williams@intel.com>
+
+commit 41fce90f26333c4fa82e8e43b9ace86c4e8a0120 upstream.
+
+The following namespace configuration attempt:
+
+    # ndctl create-namespace -e namespace0.0 -m devdax -a 1G -f
+    libndctl: ndctl_dax_enable: dax0.1: failed to enable
+      Error: namespace0.0: failed to enable
+
+    failed to reconfigure namespace: No such device or address
+
+...fails when the backing memory range is not physically aligned to 1G:
+
+    # cat /proc/iomem | grep Persistent
+    210000000-30fffffff : Persistent Memory (legacy)
+
+In the above example the 4G persistent memory range starts and ends on a
+256MB boundary.
+
+We handle this case correctly when needing to handle cases that violate
+section alignment (128MB) collisions against "System RAM", and we simply
+need to extend that padding/truncation for the 1GB alignment use case.
+
+Fixes: 315c562536c4 ("libnvdimm, pfn: add 'align' attribute...")
+Reported-and-tested-by: Jane Chu <jane.chu@oracle.com>
+Signed-off-by: Dan Williams <dan.j.williams@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/nvdimm/pfn_devs.c |   15 ++++++++++++---
+ 1 file changed, 12 insertions(+), 3 deletions(-)
+
+--- a/drivers/nvdimm/pfn_devs.c
++++ b/drivers/nvdimm/pfn_devs.c
+@@ -582,6 +582,12 @@ static struct vmem_altmap *__nvdimm_setu
+       return altmap;
+ }
++static u64 phys_pmem_align_down(struct nd_pfn *nd_pfn, u64 phys)
++{
++      return min_t(u64, PHYS_SECTION_ALIGN_DOWN(phys),
++                      ALIGN_DOWN(phys, nd_pfn->align));
++}
++
+ static int nd_pfn_init(struct nd_pfn *nd_pfn)
+ {
+       u32 dax_label_reserve = is_nd_dax(&nd_pfn->dev) ? SZ_128K : 0;
+@@ -637,13 +643,16 @@ static int nd_pfn_init(struct nd_pfn *nd
+       start = nsio->res.start;
+       size = PHYS_SECTION_ALIGN_UP(start + size) - start;
+       if (region_intersects(start, size, IORESOURCE_SYSTEM_RAM,
+-                              IORES_DESC_NONE) == REGION_MIXED) {
++                              IORES_DESC_NONE) == REGION_MIXED
++                      || !IS_ALIGNED(start + resource_size(&nsio->res),
++                              nd_pfn->align)) {
+               size = resource_size(&nsio->res);
+-              end_trunc = start + size - PHYS_SECTION_ALIGN_DOWN(start + size);
++              end_trunc = start + size - phys_pmem_align_down(nd_pfn,
++                              start + size);
+       }
+       if (start_pad + end_trunc)
+-              dev_info(&nd_pfn->dev, "%s section collision, truncate %d bytes\n",
++              dev_info(&nd_pfn->dev, "%s alignment collision, truncate %d bytes\n",
+                               dev_name(&ndns->dev), start_pad + end_trunc);
+       /*
diff --git a/queue-4.14/libnvdimm-pfn-fix-start_pad-handling-for-aligned-namespaces.patch b/queue-4.14/libnvdimm-pfn-fix-start_pad-handling-for-aligned-namespaces.patch
new file mode 100644 (file)
index 0000000..720ef68
--- /dev/null
@@ -0,0 +1,56 @@
+From 19deaa217bc04e83b59b5e8c8229eb0e53ad9efc Mon Sep 17 00:00:00 2001
+From: Dan Williams <dan.j.williams@intel.com>
+Date: Tue, 19 Dec 2017 15:07:10 -0800
+Subject: libnvdimm, pfn: fix start_pad handling for aligned namespaces
+
+From: Dan Williams <dan.j.williams@intel.com>
+
+commit 19deaa217bc04e83b59b5e8c8229eb0e53ad9efc upstream.
+
+The alignment checks at pfn driver startup fail to properly account for
+the 'start_pad' in the case where the namespace is misaligned relative
+to its internal alignment. This is typically triggered in 1G aligned
+namespace, but could theoretically trigger with small namespace
+alignments. When this triggers the kernel reports messages of the form:
+
+    dax2.1: bad offset: 0x3c000000 dax disabled align: 0x40000000
+
+Fixes: 1ee6667cd8d1 ("libnvdimm, pfn, dax: fix initialization vs autodetect...")
+Reported-by: Jane Chu <jane.chu@oracle.com>
+Signed-off-by: Dan Williams <dan.j.williams@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/nvdimm/pfn_devs.c |    5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+--- a/drivers/nvdimm/pfn_devs.c
++++ b/drivers/nvdimm/pfn_devs.c
+@@ -364,9 +364,9 @@ struct device *nd_pfn_create(struct nd_r
+ int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig)
+ {
+       u64 checksum, offset;
+-      unsigned long align;
+       enum nd_pfn_mode mode;
+       struct nd_namespace_io *nsio;
++      unsigned long align, start_pad;
+       struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb;
+       struct nd_namespace_common *ndns = nd_pfn->ndns;
+       const u8 *parent_uuid = nd_dev_to_uuid(&ndns->dev);
+@@ -410,6 +410,7 @@ int nd_pfn_validate(struct nd_pfn *nd_pf
+       align = le32_to_cpu(pfn_sb->align);
+       offset = le64_to_cpu(pfn_sb->dataoff);
++      start_pad = le32_to_cpu(pfn_sb->start_pad);
+       if (align == 0)
+               align = 1UL << ilog2(offset);
+       mode = le32_to_cpu(pfn_sb->mode);
+@@ -468,7 +469,7 @@ int nd_pfn_validate(struct nd_pfn *nd_pf
+               return -EBUSY;
+       }
+-      if ((align && !IS_ALIGNED(offset, align))
++      if ((align && !IS_ALIGNED(nsio->res.start + offset + start_pad, align))
+                       || !IS_ALIGNED(offset, PAGE_SIZE)) {
+               dev_err(&nd_pfn->dev,
+                               "bad offset: %#llx dax disabled align: %#lx\n",
diff --git a/queue-4.14/mfd-cros-ec-spi-don-t-send-first-message-too-soon.patch b/queue-4.14/mfd-cros-ec-spi-don-t-send-first-message-too-soon.patch
new file mode 100644 (file)
index 0000000..f3900b0
--- /dev/null
@@ -0,0 +1,46 @@
+From 15d8374874ded0bec37ef27f8301a6d54032c0e5 Mon Sep 17 00:00:00 2001
+From: Jon Hunter <jonathanh@nvidia.com>
+Date: Tue, 14 Nov 2017 14:43:27 +0000
+Subject: mfd: cros ec: spi: Don't send first message too soon
+
+From: Jon Hunter <jonathanh@nvidia.com>
+
+commit 15d8374874ded0bec37ef27f8301a6d54032c0e5 upstream.
+
+On the Tegra124 Nyan-Big chromebook the very first SPI message sent to
+the EC is failing.
+
+The Tegra SPI driver configures the SPI chip-selects to be active-high
+by default (and always has for many years). The EC SPI requires an
+active-low chip-select and so the Tegra chip-select is reconfigured to
+be active-low when the EC SPI driver calls spi_setup(). The problem is
+that if the first SPI message to the EC is sent too soon after
+reconfiguring the SPI chip-select, it fails.
+
+The EC SPI driver prevents back-to-back SPI messages being sent too
+soon by keeping track of the time the last transfer was sent via the
+variable 'last_transfer_ns'. To prevent the very first transfer being
+sent too soon, initialise the 'last_transfer_ns' variable after calling
+spi_setup() and before sending the first SPI message.
+
+Signed-off-by: Jon Hunter <jonathanh@nvidia.com>
+Reviewed-by: Brian Norris <briannorris@chromium.org>
+Reviewed-by: Douglas Anderson <dianders@chromium.org>
+Acked-by: Benson Leung <bleung@chromium.org>
+Signed-off-by: Lee Jones <lee.jones@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/mfd/cros_ec_spi.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/mfd/cros_ec_spi.c
++++ b/drivers/mfd/cros_ec_spi.c
+@@ -667,6 +667,7 @@ static int cros_ec_spi_probe(struct spi_
+                          sizeof(struct ec_response_get_protocol_info);
+       ec_dev->dout_size = sizeof(struct ec_host_request);
++      ec_spi->last_transfer_ns = ktime_get_ns();
+       err = cros_ec_register(ec_dev);
+       if (err) {
diff --git a/queue-4.14/mfd-twl4030-audio-fix-sibling-node-lookup.patch b/queue-4.14/mfd-twl4030-audio-fix-sibling-node-lookup.patch
new file mode 100644 (file)
index 0000000..0058ece
--- /dev/null
@@ -0,0 +1,50 @@
+From 0a423772de2f3d7b00899987884f62f63ae00dcb Mon Sep 17 00:00:00 2001
+From: Johan Hovold <johan@kernel.org>
+Date: Sat, 11 Nov 2017 16:38:43 +0100
+Subject: mfd: twl4030-audio: Fix sibling-node lookup
+
+From: Johan Hovold <johan@kernel.org>
+
+commit 0a423772de2f3d7b00899987884f62f63ae00dcb upstream.
+
+A helper purported to look up a child node based on its name was using
+the wrong of-helper and ended up prematurely freeing the parent of-node
+while leaking any matching node.
+
+To make things worse, any matching node would not even necessarily be a
+child node as the whole device tree was searched depth-first starting at
+the parent.
+
+Fixes: 019a7e6b7b31 ("mfd: twl4030-audio: Add DT support")
+Signed-off-by: Johan Hovold <johan@kernel.org>
+Acked-by: Peter Ujfalusi <peter.ujfalusi@ti.com>
+Signed-off-by: Lee Jones <lee.jones@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/mfd/twl4030-audio.c |    9 +++++++--
+ 1 file changed, 7 insertions(+), 2 deletions(-)
+
+--- a/drivers/mfd/twl4030-audio.c
++++ b/drivers/mfd/twl4030-audio.c
+@@ -159,13 +159,18 @@ unsigned int twl4030_audio_get_mclk(void
+ EXPORT_SYMBOL_GPL(twl4030_audio_get_mclk);
+ static bool twl4030_audio_has_codec(struct twl4030_audio_data *pdata,
+-                            struct device_node *node)
++                            struct device_node *parent)
+ {
++      struct device_node *node;
++
+       if (pdata && pdata->codec)
+               return true;
+-      if (of_find_node_by_name(node, "codec"))
++      node = of_get_child_by_name(parent, "codec");
++      if (node) {
++              of_node_put(node);
+               return true;
++      }
+       return false;
+ }
diff --git a/queue-4.14/mfd-twl6040-fix-child-node-lookup.patch b/queue-4.14/mfd-twl6040-fix-child-node-lookup.patch
new file mode 100644 (file)
index 0000000..333232f
--- /dev/null
@@ -0,0 +1,54 @@
+From 85e9b13cbb130a3209f21bd7933933399c389ffe Mon Sep 17 00:00:00 2001
+From: Johan Hovold <johan@kernel.org>
+Date: Sat, 11 Nov 2017 16:38:44 +0100
+Subject: mfd: twl6040: Fix child-node lookup
+
+From: Johan Hovold <johan@kernel.org>
+
+commit 85e9b13cbb130a3209f21bd7933933399c389ffe upstream.
+
+Fix child-node lookup during probe, which ended up searching the whole
+device tree depth-first starting at the parent rather than just matching
+on its children.
+
+To make things worse, the parent node was prematurely freed, while the
+child node was leaked.
+
+Note that the CONFIG_OF compile guard can be removed as
+of_get_child_by_name() provides a !CONFIG_OF implementation which always
+fails.
+
+Fixes: 37e13cecaa14 ("mfd: Add support for Device Tree to twl6040")
+Fixes: ca2cad6ae38e ("mfd: Fix twl6040 build failure")
+Signed-off-by: Johan Hovold <johan@kernel.org>
+Acked-by: Peter Ujfalusi <peter.ujfalusi@ti.com>
+Signed-off-by: Lee Jones <lee.jones@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/mfd/twl6040.c |   12 ++++++++----
+ 1 file changed, 8 insertions(+), 4 deletions(-)
+
+--- a/drivers/mfd/twl6040.c
++++ b/drivers/mfd/twl6040.c
+@@ -97,12 +97,16 @@ static struct reg_sequence twl6040_patch
+ };
+-static bool twl6040_has_vibra(struct device_node *node)
++static bool twl6040_has_vibra(struct device_node *parent)
+ {
+-#ifdef CONFIG_OF
+-      if (of_find_node_by_name(node, "vibra"))
++      struct device_node *node;
++
++      node = of_get_child_by_name(parent, "vibra");
++      if (node) {
++              of_node_put(node);
+               return true;
+-#endif
++      }
++
+       return false;
+ }
diff --git a/queue-4.14/net-mvneta-clear-interface-link-status-on-port-disable.patch b/queue-4.14/net-mvneta-clear-interface-link-status-on-port-disable.patch
new file mode 100644 (file)
index 0000000..778f78e
--- /dev/null
@@ -0,0 +1,38 @@
+From 4423c18e466afdfb02a36ee8b9f901d144b3c607 Mon Sep 17 00:00:00 2001
+From: Yelena Krivosheev <yelena@marvell.com>
+Date: Tue, 19 Dec 2017 17:59:45 +0100
+Subject: net: mvneta: clear interface link status on port disable
+
+From: Yelena Krivosheev <yelena@marvell.com>
+
+commit 4423c18e466afdfb02a36ee8b9f901d144b3c607 upstream.
+
+When port connect to PHY in polling mode (with poll interval 1 sec),
+port and phy link status must be synchronize in order don't loss link
+change event.
+
+[gregory.clement@free-electrons.com: add fixes tag]
+Fixes: c5aff18204da ("net: mvneta: driver for Marvell Armada 370/XP network unit")
+Signed-off-by: Yelena Krivosheev <yelena@marvell.com>
+Tested-by: Dmitri Epshtein <dima@marvell.com>
+Signed-off-by: Gregory CLEMENT <gregory.clement@free-electrons.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/net/ethernet/marvell/mvneta.c |    4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/drivers/net/ethernet/marvell/mvneta.c
++++ b/drivers/net/ethernet/marvell/mvneta.c
+@@ -1214,6 +1214,10 @@ static void mvneta_port_disable(struct m
+       val &= ~MVNETA_GMAC0_PORT_ENABLE;
+       mvreg_write(pp, MVNETA_GMAC_CTRL_0, val);
++      pp->link = 0;
++      pp->duplex = -1;
++      pp->speed = 0;
++
+       udelay(200);
+ }
diff --git a/queue-4.14/net-mvneta-eliminate-wrong-call-to-handle-rx-descriptor-error.patch b/queue-4.14/net-mvneta-eliminate-wrong-call-to-handle-rx-descriptor-error.patch
new file mode 100644 (file)
index 0000000..465395b
--- /dev/null
@@ -0,0 +1,38 @@
+From 2eecb2e04abb62ef8ea7b43e1a46bdb5b99d1bf8 Mon Sep 17 00:00:00 2001
+From: Yelena Krivosheev <yelena@marvell.com>
+Date: Tue, 19 Dec 2017 17:59:47 +0100
+Subject: net: mvneta: eliminate wrong call to handle rx descriptor error
+
+From: Yelena Krivosheev <yelena@marvell.com>
+
+commit 2eecb2e04abb62ef8ea7b43e1a46bdb5b99d1bf8 upstream.
+
+There are few reasons in mvneta_rx_swbm() function when received packet
+is dropped. mvneta_rx_error() should be called only if error bit [16]
+is set in rx descriptor.
+
+[gregory.clement@free-electrons.com: add fixes tag]
+Fixes: dc35a10f68d3 ("net: mvneta: bm: add support for hardware buffer management")
+Signed-off-by: Yelena Krivosheev <yelena@marvell.com>
+Tested-by: Dmitri Epshtein <dima@marvell.com>
+Signed-off-by: Gregory CLEMENT <gregory.clement@free-electrons.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/net/ethernet/marvell/mvneta.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/marvell/mvneta.c
++++ b/drivers/net/ethernet/marvell/mvneta.c
+@@ -1962,9 +1962,9 @@ static int mvneta_rx_swbm(struct mvneta_
+               if (!mvneta_rxq_desc_is_first_last(rx_status) ||
+                   (rx_status & MVNETA_RXD_ERR_SUMMARY)) {
++                      mvneta_rx_error(pp, rx_desc);
+ err_drop_frame:
+                       dev->stats.rx_errors++;
+-                      mvneta_rx_error(pp, rx_desc);
+                       /* leave the descriptor untouched */
+                       continue;
+               }
diff --git a/queue-4.14/net-mvneta-use-proper-rxq_number-in-loop-on-rx-queues.patch b/queue-4.14/net-mvneta-use-proper-rxq_number-in-loop-on-rx-queues.patch
new file mode 100644 (file)
index 0000000..1f51008
--- /dev/null
@@ -0,0 +1,35 @@
+From ca5902a6547f662419689ca28b3c29a772446caa Mon Sep 17 00:00:00 2001
+From: Yelena Krivosheev <yelena@marvell.com>
+Date: Tue, 19 Dec 2017 17:59:46 +0100
+Subject: net: mvneta: use proper rxq_number in loop on rx queues
+
+From: Yelena Krivosheev <yelena@marvell.com>
+
+commit ca5902a6547f662419689ca28b3c29a772446caa upstream.
+
+When adding the RX queue association with each CPU, a typo was made in
+the mvneta_cleanup_rxqs() function. This patch fixes it.
+
+[gregory.clement@free-electrons.com: add commit log and fixes tag]
+Fixes: 2dcf75e2793c ("net: mvneta: Associate RX queues with each CPU")
+Signed-off-by: Yelena Krivosheev <yelena@marvell.com>
+Tested-by: Dmitri Epshtein <dima@marvell.com>
+Signed-off-by: Gregory CLEMENT <gregory.clement@free-electrons.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/net/ethernet/marvell/mvneta.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/marvell/mvneta.c
++++ b/drivers/net/ethernet/marvell/mvneta.c
+@@ -3015,7 +3015,7 @@ static void mvneta_cleanup_rxqs(struct m
+ {
+       int queue;
+-      for (queue = 0; queue < txq_number; queue++)
++      for (queue = 0; queue < rxq_number; queue++)
+               mvneta_rxq_deinit(pp, &pp->rxqs[queue]);
+ }
diff --git a/queue-4.14/parisc-align-os_hpmc_size-on-word-boundary.patch b/queue-4.14/parisc-align-os_hpmc_size-on-word-boundary.patch
new file mode 100644 (file)
index 0000000..35bc856
--- /dev/null
@@ -0,0 +1,30 @@
+From 0ed9d3de5f8f97e6efd5ca0e3377cab5f0451ead Mon Sep 17 00:00:00 2001
+From: Helge Deller <deller@gmx.de>
+Date: Tue, 12 Dec 2017 21:25:41 +0100
+Subject: parisc: Align os_hpmc_size on word boundary
+
+From: Helge Deller <deller@gmx.de>
+
+commit 0ed9d3de5f8f97e6efd5ca0e3377cab5f0451ead upstream.
+
+The os_hpmc_size variable sometimes wasn't aligned at word boundary and thus
+triggered the unaligned fault handler at startup.
+Fix it by aligning it properly.
+
+Signed-off-by: Helge Deller <deller@gmx.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/parisc/kernel/hpmc.S |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/arch/parisc/kernel/hpmc.S
++++ b/arch/parisc/kernel/hpmc.S
+@@ -305,6 +305,7 @@ ENDPROC_CFI(os_hpmc)
+       __INITRODATA
++      .align 4
+       .export os_hpmc_size
+ os_hpmc_size:
+       .word .os_hpmc_end-.os_hpmc
diff --git a/queue-4.14/parisc-fix-indenting-in-puts.patch b/queue-4.14/parisc-fix-indenting-in-puts.patch
new file mode 100644 (file)
index 0000000..04aeef9
--- /dev/null
@@ -0,0 +1,35 @@
+From 203c110b39a89b48156c7450504e454fedb7f7f6 Mon Sep 17 00:00:00 2001
+From: Helge Deller <deller@gmx.de>
+Date: Tue, 12 Dec 2017 21:32:16 +0100
+Subject: parisc: Fix indenting in puts()
+
+From: Helge Deller <deller@gmx.de>
+
+commit 203c110b39a89b48156c7450504e454fedb7f7f6 upstream.
+
+Static analysis tools complain that we intended to have curly braces
+around this indent block. In this case this assumption is wrong, so fix
+the indenting.
+
+Fixes: 2f3c7b8137ef ("parisc: Add core code for self-extracting kernel")
+Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
+Signed-off-by: Helge Deller <deller@gmx.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/parisc/boot/compressed/misc.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/arch/parisc/boot/compressed/misc.c
++++ b/arch/parisc/boot/compressed/misc.c
+@@ -123,8 +123,8 @@ int puts(const char *s)
+       while ((nuline = strchr(s, '\n')) != NULL) {
+               if (nuline != s)
+                       pdc_iodc_print(s, nuline - s);
+-                      pdc_iodc_print("\r\n", 2);
+-                      s = nuline + 1;
++              pdc_iodc_print("\r\n", 2);
++              s = nuline + 1;
+       }
+       if (*s != '\0')
+               pdc_iodc_print(s, strlen(s));
diff --git a/queue-4.14/parisc-hide-diva-built-in-serial-aux-and-graphics-card.patch b/queue-4.14/parisc-hide-diva-built-in-serial-aux-and-graphics-card.patch
new file mode 100644 (file)
index 0000000..52685db
--- /dev/null
@@ -0,0 +1,60 @@
+From bcf3f1752a622f1372d3252d0fea8855d89812e7 Mon Sep 17 00:00:00 2001
+From: Helge Deller <deller@gmx.de>
+Date: Tue, 12 Dec 2017 21:52:26 +0100
+Subject: parisc: Hide Diva-built-in serial aux and graphics card
+
+From: Helge Deller <deller@gmx.de>
+
+commit bcf3f1752a622f1372d3252d0fea8855d89812e7 upstream.
+
+Diva GSP card has built-in serial AUX port and ATI graphic card which simply
+don't work and which both don't have external connectors.  User Guides even
+mention that those devices shouldn't be used.
+So, prevent that Linux drivers try to enable those devices.
+
+Signed-off-by: Helge Deller <deller@gmx.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/parisc/lba_pci.c |   33 +++++++++++++++++++++++++++++++++
+ 1 file changed, 33 insertions(+)
+
+--- a/drivers/parisc/lba_pci.c
++++ b/drivers/parisc/lba_pci.c
+@@ -1692,3 +1692,36 @@ void lba_set_iregs(struct parisc_device
+       iounmap(base_addr);
+ }
++
++/*
++ * The design of the Diva management card in rp34x0 machines (rp3410, rp3440)
++ * seems rushed, so that many built-in components simply don't work.
++ * The following quirks disable the serial AUX port and the built-in ATI RV100
++ * Radeon 7000 graphics card which both don't have any external connectors and
++ * thus are useless, and even worse, e.g. the AUX port occupies ttyS0 and as
++ * such makes those machines the only PARISC machines on which we can't use
++ * ttyS0 as boot console.
++ */
++static void quirk_diva_ati_card(struct pci_dev *dev)
++{
++      if (dev->subsystem_vendor != PCI_VENDOR_ID_HP ||
++          dev->subsystem_device != 0x1292)
++              return;
++
++      dev_info(&dev->dev, "Hiding Diva built-in ATI card");
++      dev->device = 0;
++}
++DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_RADEON_QY,
++      quirk_diva_ati_card);
++
++static void quirk_diva_aux_disable(struct pci_dev *dev)
++{
++      if (dev->subsystem_vendor != PCI_VENDOR_ID_HP ||
++          dev->subsystem_device != 0x1291)
++              return;
++
++      dev_info(&dev->dev, "Hiding Diva built-in AUX serial device");
++      dev->device = 0;
++}
++DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_DIVA_AUX,
++      quirk_diva_aux_disable);
diff --git a/queue-4.14/pci-pm-force-devices-to-d0-in-pci_pm_thaw_noirq.patch b/queue-4.14/pci-pm-force-devices-to-d0-in-pci_pm_thaw_noirq.patch
new file mode 100644 (file)
index 0000000..ff6cfae
--- /dev/null
@@ -0,0 +1,47 @@
+From 5839ee7389e893a31e4e3c9cf17b50d14103c902 Mon Sep 17 00:00:00 2001
+From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
+Date: Fri, 15 Dec 2017 03:07:18 +0100
+Subject: PCI / PM: Force devices to D0 in pci_pm_thaw_noirq()
+
+From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+
+commit 5839ee7389e893a31e4e3c9cf17b50d14103c902 upstream.
+
+It is incorrect to call pci_restore_state() for devices in low-power
+states (D1-D3), as that involves the restoration of MSI setup which
+requires MMIO to be operational and that is only the case in D0.
+
+However, pci_pm_thaw_noirq() may do that if the driver's "freeze"
+callbacks put the device into a low-power state, so fix it by making
+it force devices into D0 via pci_set_power_state() instead of trying
+to "update" their power state which is pointless.
+
+Fixes: e60514bd4485 (PCI/PM: Restore the status of PCI devices across hibernation)
+Reported-by: Thomas Gleixner <tglx@linutronix.de>
+Reported-by: Maarten Lankhorst <dev@mblankhorst.nl>
+Tested-by: Thomas Gleixner <tglx@linutronix.de>
+Tested-by: Maarten Lankhorst <dev@mblankhorst.nl>
+Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+Acked-by: Bjorn Helgaas <bhelgaas@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/pci/pci-driver.c |    7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+--- a/drivers/pci/pci-driver.c
++++ b/drivers/pci/pci-driver.c
+@@ -968,7 +968,12 @@ static int pci_pm_thaw_noirq(struct devi
+       if (pci_has_legacy_pm_support(pci_dev))
+               return pci_legacy_resume_early(dev);
+-      pci_update_current_state(pci_dev, PCI_D0);
++      /*
++       * pci_restore_state() requires the device to be in D0 (because of MSI
++       * restoration among other things), so force it into D0 in case the
++       * driver's "freeze" callbacks put it into a low-power state directly.
++       */
++      pci_set_power_state(pci_dev, PCI_D0);
+       pci_restore_state(pci_dev);
+       if (drv && drv->pm && drv->pm->thaw_noirq)
diff --git a/queue-4.14/pinctrl-cherryview-mask-all-interrupts-on-intel_strago-based-systems.patch b/queue-4.14/pinctrl-cherryview-mask-all-interrupts-on-intel_strago-based-systems.patch
new file mode 100644 (file)
index 0000000..5f413ea
--- /dev/null
@@ -0,0 +1,52 @@
+From d2b3c353595a855794f8b9df5b5bdbe8deb0c413 Mon Sep 17 00:00:00 2001
+From: Mika Westerberg <mika.westerberg@linux.intel.com>
+Date: Mon, 4 Dec 2017 12:11:02 +0300
+Subject: pinctrl: cherryview: Mask all interrupts on Intel_Strago based systems
+
+From: Mika Westerberg <mika.westerberg@linux.intel.com>
+
+commit d2b3c353595a855794f8b9df5b5bdbe8deb0c413 upstream.
+
+Guenter Roeck reported an interrupt storm on a prototype system which is
+based on Cyan Chromebook. The root cause turned out to be a incorrectly
+configured pin that triggers spurious interrupts. This will be fixed in
+coreboot but currently we need to prevent the interrupt storm from
+happening by masking all interrupts (but not GPEs) on those systems.
+
+Link: https://bugzilla.kernel.org/show_bug.cgi?id=197953
+Fixes: bcb48cca23ec ("pinctrl: cherryview: Do not mask all interrupts in probe")
+Reported-and-tested-by: Guenter Roeck <linux@roeck-us.net>
+Reported-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
+Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
+Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/pinctrl/intel/pinctrl-cherryview.c |   16 ++++++++++++++++
+ 1 file changed, 16 insertions(+)
+
+--- a/drivers/pinctrl/intel/pinctrl-cherryview.c
++++ b/drivers/pinctrl/intel/pinctrl-cherryview.c
+@@ -1620,6 +1620,22 @@ static int chv_gpio_probe(struct chv_pin
+                       clear_bit(i, chip->irq_valid_mask);
+       }
++      /*
++       * The same set of machines in chv_no_valid_mask[] have incorrectly
++       * configured GPIOs that generate spurious interrupts so we use
++       * this same list to apply another quirk for them.
++       *
++       * See also https://bugzilla.kernel.org/show_bug.cgi?id=197953.
++       */
++      if (!need_valid_mask) {
++              /*
++               * Mask all interrupts the community is able to generate
++               * but leave the ones that can only generate GPEs unmasked.
++               */
++              chv_writel(GENMASK(31, pctrl->community->nirqs),
++                         pctrl->regs + CHV_INTMASK);
++      }
++
+       /* Clear all interrupts */
+       chv_writel(0xffff, pctrl->regs + CHV_INTSTAT);
diff --git a/queue-4.14/powerpc-perf-dereference-bhrb-entries-safely.patch b/queue-4.14/powerpc-perf-dereference-bhrb-entries-safely.patch
new file mode 100644 (file)
index 0000000..39e45fc
--- /dev/null
@@ -0,0 +1,55 @@
+From f41d84dddc66b164ac16acf3f584c276146f1c48 Mon Sep 17 00:00:00 2001
+From: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com>
+Date: Tue, 12 Dec 2017 17:59:15 +0530
+Subject: powerpc/perf: Dereference BHRB entries safely
+
+From: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com>
+
+commit f41d84dddc66b164ac16acf3f584c276146f1c48 upstream.
+
+It's theoretically possible that branch instructions recorded in
+BHRB (Branch History Rolling Buffer) entries have already been
+unmapped before they are processed by the kernel. Hence, trying to
+dereference such memory location will result in a crash. eg:
+
+    Unable to handle kernel paging request for data at address 0xd000000019c41764
+    Faulting instruction address: 0xc000000000084a14
+    NIP [c000000000084a14] branch_target+0x4/0x70
+    LR [c0000000000eb828] record_and_restart+0x568/0x5c0
+    Call Trace:
+    [c0000000000eb3b4] record_and_restart+0xf4/0x5c0 (unreliable)
+    [c0000000000ec378] perf_event_interrupt+0x298/0x460
+    [c000000000027964] performance_monitor_exception+0x54/0x70
+    [c000000000009ba4] performance_monitor_common+0x114/0x120
+
+Fix it by deferefencing the addresses safely.
+
+Fixes: 691231846ceb ("powerpc/perf: Fix setting of "to" addresses for BHRB")
+Suggested-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
+Signed-off-by: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com>
+Reviewed-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
+[mpe: Use probe_kernel_read() which is clearer, tweak change log]
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/powerpc/perf/core-book3s.c |    8 ++++++--
+ 1 file changed, 6 insertions(+), 2 deletions(-)
+
+--- a/arch/powerpc/perf/core-book3s.c
++++ b/arch/powerpc/perf/core-book3s.c
+@@ -410,8 +410,12 @@ static __u64 power_pmu_bhrb_to(u64 addr)
+       int ret;
+       __u64 target;
+-      if (is_kernel_addr(addr))
+-              return branch_target((unsigned int *)addr);
++      if (is_kernel_addr(addr)) {
++              if (probe_kernel_read(&instr, (void *)addr, sizeof(instr)))
++                      return 0;
++
++              return branch_target(&instr);
++      }
+       /* Userspace: need copy instruction here then translate it */
+       pagefault_disable();
diff --git a/queue-4.14/revert-ipmi_si-fix-memory-leak-on-new_smi.patch b/queue-4.14/revert-ipmi_si-fix-memory-leak-on-new_smi.patch
new file mode 100644 (file)
index 0000000..fbdf08d
--- /dev/null
@@ -0,0 +1,73 @@
+From john.einar@gmail.com  Wed Dec 27 16:15:28 2017
+From: John Einar Reitan <john.einar@gmail.com>
+Date: Sun, 24 Dec 2017 00:03:44 +0100
+Subject: Revert "ipmi_si: fix memory leak on new_smi"
+To: stable@vger.kernel.org
+Cc: John Einar Reitan <john.einar@gmail.com>
+Message-ID: <20171223230344.2759-1-john.einar@gmail.com>
+
+From: John Einar Reitan <john.einar@gmail.com>
+
+This reverts commit c97e41076a298dbc4e910c33048e553658388eed, which
+incorrectly was taken from upstream c0a32fe13cd323ca9420500b16fd69589c9ba91e.
+
+The referenced memory leak doesn't exist on the 4.14 stable branch as
+the new logic of doing the kzalloc hasn't moved to this function.
+By adding this kfree we actually end up doing double kfree as all callers of
+smi_add does a kfree on error.
+
+Sample with SLAB_FREELIST_HARDENED=y:
+
+ipmi_si: Adding ACPI-specified kcs state machine
+IPMI System Interface driver.
+ipmi_si: probing via SPMI
+ipmi_si: SPMI: io 0xca2 regsize 1 spacing 1 irq 0
+(NULL device *): SPMI-specified kcs state machine: duplicate
+------------[ cut here ]------------
+kernel BUG at mm/slub.c:295!
+invalid opcode: 0000 [#1] SMP
+Modules linked in:
+CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.14.8-gentoo-r1 #5
+Hardware name: Supermicro X9SCL/X9SCM/X9SCL/X9SCM, BIOS 2.2 02/20/2015
+task: ffff88080c208000 task.stack: ffffc90000020000
+RIP: 0010:kfree+0xf5/0x157
+RSP: 0000:ffffc90000023e58 EFLAGS: 00010246
+RAX: ffff88080b2e6200 RBX: ffff88080b2e6200 RCX: ffff88080b2e6200
+RDX: 000000000000008e RSI: ffff88082fc1cd60 RDI: ffff88080c003080
+RBP: ffffc90000002808 R08: 000000000001cd60 R09: ffffffff814da10e
+R10: ffffea00202cb980 R11: 000000000000005c R12: ffffffff814da10e
+R13: 00000000ffffffed R14: ffffffff82317bd0 R15: 0000000000000003
+FS:  0000000000000000(0000) GS:ffff88082fc00000(0000) knlGS:0000000000000000
+CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+CR2: 0000000000000000 CR3: 0000000002e09001 CR4: 00000000001606f0
+Call Trace:
+ init_ipmi_si+0x493/0x5c7
+ ? cleanup_ipmi_si+0x84/0x84
+ ? set_debug_rodata+0xc/0xc
+ ? kthread+0x4c/0x11c
+ do_one_initcall+0x94/0x13d
+ ? set_debug_rodata+0xc/0xc
+ kernel_init_freeable+0x112/0x18e
+ ? rest_init+0xa0/0xa0
+ kernel_init+0x5/0xe1
+ ret_from_fork+0x22/0x30
+Code: 24 18 49 8b 7a 30 48 8b 37 65 48 8b 56 08 65 48 03 35 3a 29 e2 7e 4c 3b 56 10 75 39 48 8b 0e 48 63 47 20 48 01 d8 48 39 cb 75 02 <0f> 0b 49 89 c0 4c 33
+ 87 40 01 00 00 4c 31 c1 48 89 08 48 8d 4a
+---[ end trace 4ac2e2c100842676 ]---
+
+Signed-off-by: John Einar Reitan <john.einar@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/char/ipmi/ipmi_si_intf.c |    1 -
+ 1 file changed, 1 deletion(-)
+
+--- a/drivers/char/ipmi/ipmi_si_intf.c
++++ b/drivers/char/ipmi/ipmi_si_intf.c
+@@ -3469,7 +3469,6 @@ static int add_smi(struct smi_info *new_
+                                ipmi_addr_src_to_str(new_smi->addr_source),
+                                si_to_str[new_smi->si_type]);
+                       rv = -EBUSY;
+-                      kfree(new_smi);
+                       goto out_err;
+               }
+       }
diff --git a/queue-4.14/revert-parisc-re-enable-interrupts-early.patch b/queue-4.14/revert-parisc-re-enable-interrupts-early.patch
new file mode 100644 (file)
index 0000000..5f6934a
--- /dev/null
@@ -0,0 +1,79 @@
+From 9352aeada4d8d8753fc0e414fbfe8fdfcb68a12c Mon Sep 17 00:00:00 2001
+From: John David Anglin <dave.anglin@bell.net>
+Date: Mon, 13 Nov 2017 19:35:33 -0500
+Subject: Revert "parisc: Re-enable interrupts early"
+
+From: John David Anglin <dave.anglin@bell.net>
+
+commit 9352aeada4d8d8753fc0e414fbfe8fdfcb68a12c upstream.
+
+This reverts commit 5c38602d83e584047906b41b162ababd4db4106d.
+
+Interrupts can't be enabled early because the register saves are done on
+the thread stack prior to switching to the IRQ stack.  This caused stack
+overflows and the thread stack needed increasing to 32k.  Even then,
+stack overflows still occasionally occurred.
+
+Background:
+Even with a 32 kB thread stack, I have seen instances where the thread
+stack overflowed on the mx3210 buildd.  Detection of stack overflow only
+occurs when we have an external interrupt.  When an external interrupt
+occurs, we switch to the thread stack if we are not already on a kernel
+stack.  Then, registers and specials are saved to the kernel stack.
+
+The bug occurs in intr_return where interrupts are reenabled prior to
+returning from the interrupt.  This was done incase we need to schedule
+or deliver signals.  However, it introduces the possibility that
+multiple external interrupts may occur on the thread stack and cause a
+stack overflow.  These might not be detected and cause the kernel to
+misbehave in random ways.
+
+This patch changes the code back to only reenable interrupts when we are
+going to schedule or deliver signals.  As a result, we generally return
+from an interrupt before reenabling interrupts.  This minimizes the
+growth of the thread stack.
+
+Fixes: 5c38602d83e5 ("parisc: Re-enable interrupts early")
+Signed-off-by: John David Anglin <dave.anglin@bell.net>
+Signed-off-by: Helge Deller <deller@gmx.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/parisc/kernel/entry.S |   12 +++++++++---
+ 1 file changed, 9 insertions(+), 3 deletions(-)
+
+--- a/arch/parisc/kernel/entry.S
++++ b/arch/parisc/kernel/entry.S
+@@ -878,9 +878,6 @@ ENTRY_CFI(syscall_exit_rfi)
+       STREG   %r19,PT_SR7(%r16)
+ intr_return:
+-      /* NOTE: Need to enable interrupts incase we schedule. */
+-      ssm     PSW_SM_I, %r0
+-
+       /* check for reschedule */
+       mfctl   %cr30,%r1
+       LDREG   TI_FLAGS(%r1),%r19      /* sched.h: TIF_NEED_RESCHED */
+@@ -907,6 +904,11 @@ intr_check_sig:
+       LDREG   PT_IASQ1(%r16), %r20
+       cmpib,COND(=),n 0,%r20,intr_restore /* backward */
++      /* NOTE: We need to enable interrupts if we have to deliver
++       * signals. We used to do this earlier but it caused kernel
++       * stack overflows. */
++      ssm     PSW_SM_I, %r0
++
+       copy    %r0, %r25                       /* long in_syscall = 0 */
+ #ifdef CONFIG_64BIT
+       ldo     -16(%r30),%r29                  /* Reference param save area */
+@@ -958,6 +960,10 @@ intr_do_resched:
+       cmpib,COND(=)   0, %r20, intr_do_preempt
+       nop
++      /* NOTE: We need to enable interrupts if we schedule.  We used
++       * to do this earlier but it caused kernel stack overflows. */
++      ssm     PSW_SM_I, %r0
++
+ #ifdef CONFIG_64BIT
+       ldo     -16(%r30),%r29          /* Reference param save area */
+ #endif
index 8ee37c749c49ea207e01e42d3067f2227320f7e8..35e79597dc19731bdcaf95039ba8dceafcf80622 100644 (file)
@@ -6,3 +6,69 @@ tools-headers-sync-objtool-uapi-header.patch
 objtool-fix-64-bit-build-on-32-bit-host.patch
 x86-decoder-fix-and-update-the-opcodes-map.patch
 x86-insn-eval-add-utility-functions-to-get-segment-selector.patch
+x86-kconfig-limit-nr_cpus-on-32-bit-to-a-sane-amount.patch
+x86-mm-dump_pagetables-check-page_present-for-real.patch
+x86-mm-dump_pagetables-make-the-address-hints-correct-and-readable.patch
+x86-vsyscall-64-explicitly-set-_page_user-in-the-pagetable-hierarchy.patch
+x86-vsyscall-64-warn-and-fail-vsyscall-emulation-in-native-mode.patch
+arch-mm-allow-arch_dup_mmap-to-fail.patch
+x86-ldt-rework-locking.patch
+x86-ldt-prevent-ldt-inheritance-on-exec.patch
+x86-mm-64-improve-the-memory-map-documentation.patch
+x86-doc-remove-obvious-weirdnesses-from-the-x86-mm-layout-documentation.patch
+x86-entry-rename-sysenter_stack-to-cpu_entry_area_entry_stack.patch
+x86-uv-use-the-right-tlb-flush-api.patch
+x86-microcode-dont-abuse-the-tlb-flush-interface.patch
+x86-mm-use-__flush_tlb_one-for-kernel-memory.patch
+x86-mm-remove-superfluous-barriers.patch
+x86-mm-add-comments-to-clarify-which-tlb-flush-functions-are-supposed-to-flush-what.patch
+x86-mm-move-the-cr3-construction-functions-to-tlbflush.h.patch
+x86-mm-remove-hard-coded-asid-limit-checks.patch
+x86-mm-put-mmu-to-hardware-asid-translation-in-one-place.patch
+x86-mm-create-asm-invpcid.h.patch
+x86-cpu_entry_area-move-it-to-a-separate-unit.patch
+x86-cpu_entry_area-move-it-out-of-the-fixmap.patch
+init-invoke-init_espfix_bsp-from-mm_init.patch
+x86-cpu_entry_area-prevent-wraparound-in-setup_cpu_entry_area_ptes-on-32bit.patch
+acpi-apei-erst-fix-missing-error-handling-in-erst_reader.patch
+acpi-nfit-fix-health-event-notification.patch
+crypto-skcipher-set-walk.iv-for-zero-length-inputs.patch
+crypto-mcryptd-protect-the-per-cpu-queue-with-a-lock.patch
+crypto-af_alg-wait-for-data-at-beginning-of-recvmsg.patch
+crypto-af_alg-fix-race-accessing-cipher-request.patch
+mfd-cros-ec-spi-don-t-send-first-message-too-soon.patch
+mfd-twl4030-audio-fix-sibling-node-lookup.patch
+mfd-twl6040-fix-child-node-lookup.patch
+alsa-rawmidi-avoid-racy-info-ioctl-via-ctl-device.patch
+alsa-hda-realtek-fix-dell-aio-lineout-issue.patch
+alsa-hda-add-vendor-id-for-cannonlake-hdmi-codec.patch
+alsa-usb-audio-add-native-dsd-support-for-esoteric-d-05x.patch
+alsa-usb-audio-fix-the-missing-ctl-name-suffix-at-parsing-su.patch
+pci-pm-force-devices-to-d0-in-pci_pm_thaw_noirq.patch
+block-unalign-call_single_data-in-struct-request.patch
+block-throttle-avoid-double-charge.patch
+parisc-align-os_hpmc_size-on-word-boundary.patch
+parisc-fix-indenting-in-puts.patch
+parisc-hide-diva-built-in-serial-aux-and-graphics-card.patch
+revert-parisc-re-enable-interrupts-early.patch
+spi-xilinx-detect-stall-with-unknown-commands.patch
+spi-a3700-fix-clk-prescaling-for-coefficient-over-15.patch
+pinctrl-cherryview-mask-all-interrupts-on-intel_strago-based-systems.patch
+arm64-kvm-prevent-restoring-stale-pmscr_el1-for-vcpu.patch
+kvm-arm-arm64-fix-hyp-unmapping-going-off-limits.patch
+kvm-ppc-book3s-fix-xive-migration-of-pending-interrupts.patch
+kvm-ppc-book3s-hv-fix-pending_pri-value-in-kvmppc_xive_get_icp.patch
+kvm-mmu-fix-infinite-loop-when-there-is-no-available-mmu-page.patch
+kvm-x86-fix-load-rflags-w-o-the-fixed-bit.patch
+kvm-x86-fix-rsm-when-pcid-is-non-zero.patch
+clk-sunxi-sun9i-mmc-implement-reset-callback-for-reset-controls.patch
+powerpc-perf-dereference-bhrb-entries-safely.patch
+drm-i915-flush-pending-gtt-writes-before-unbinding.patch
+drm-sun4i-fix-error-path-handling.patch
+libnvdimm-dax-fix-1gb-aligned-namespaces-vs-physical-misalignment.patch
+libnvdimm-btt-fix-an-incompatibility-in-the-log-layout.patch
+libnvdimm-pfn-fix-start_pad-handling-for-aligned-namespaces.patch
+net-mvneta-clear-interface-link-status-on-port-disable.patch
+net-mvneta-use-proper-rxq_number-in-loop-on-rx-queues.patch
+net-mvneta-eliminate-wrong-call-to-handle-rx-descriptor-error.patch
+revert-ipmi_si-fix-memory-leak-on-new_smi.patch
diff --git a/queue-4.14/spi-a3700-fix-clk-prescaling-for-coefficient-over-15.patch b/queue-4.14/spi-a3700-fix-clk-prescaling-for-coefficient-over-15.patch
new file mode 100644 (file)
index 0000000..1e80b60
--- /dev/null
@@ -0,0 +1,51 @@
+From 251c201bf4f8b5bf4f1ccb4f8920eed2e1f57580 Mon Sep 17 00:00:00 2001
+From: Maxime Chevallier <maxime.chevallier@smile.fr>
+Date: Mon, 27 Nov 2017 15:16:32 +0100
+Subject: spi: a3700: Fix clk prescaling for coefficient over 15
+
+From: Maxime Chevallier <maxime.chevallier@smile.fr>
+
+commit 251c201bf4f8b5bf4f1ccb4f8920eed2e1f57580 upstream.
+
+The Armada 3700 SPI controller has 2 ranges of prescaler coefficients.
+One ranging from 0 to 15 by steps of 1, and one ranging from 0 to 30 by
+steps of 2.
+
+This commit fixes the prescaler coefficients that are over 15 so that it
+uses the correct range of values. The prescaling coefficient is rounded
+to the upper value if it is odd.
+
+This was tested on Espressobin with spidev and a locigal analyser.
+
+Signed-off-by: Maxime Chevallier <maxime.chevallier@smile.fr>
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/spi/spi-armada-3700.c |    8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+--- a/drivers/spi/spi-armada-3700.c
++++ b/drivers/spi/spi-armada-3700.c
+@@ -79,6 +79,7 @@
+ #define A3700_SPI_BYTE_LEN            BIT(5)
+ #define A3700_SPI_CLK_PRESCALE                BIT(0)
+ #define A3700_SPI_CLK_PRESCALE_MASK   (0x1f)
++#define A3700_SPI_CLK_EVEN_OFFS               (0x10)
+ #define A3700_SPI_WFIFO_THRS_BIT      28
+ #define A3700_SPI_RFIFO_THRS_BIT      24
+@@ -220,6 +221,13 @@ static void a3700_spi_clock_set(struct a
+       prescale = DIV_ROUND_UP(clk_get_rate(a3700_spi->clk), speed_hz);
++      /* For prescaler values over 15, we can only set it by steps of 2.
++       * Starting from A3700_SPI_CLK_EVEN_OFFS, we set values from 0 up to
++       * 30. We only use this range from 16 to 30.
++       */
++      if (prescale > 15)
++              prescale = A3700_SPI_CLK_EVEN_OFFS + DIV_ROUND_UP(prescale, 2);
++
+       val = spireg_read(a3700_spi, A3700_SPI_IF_CFG_REG);
+       val = val & ~A3700_SPI_CLK_PRESCALE_MASK;
diff --git a/queue-4.14/spi-xilinx-detect-stall-with-unknown-commands.patch b/queue-4.14/spi-xilinx-detect-stall-with-unknown-commands.patch
new file mode 100644 (file)
index 0000000..7df478f
--- /dev/null
@@ -0,0 +1,66 @@
+From 5a1314fa697fc65cefaba64cd4699bfc3e6882a6 Mon Sep 17 00:00:00 2001
+From: Ricardo Ribalda <ricardo.ribalda@gmail.com>
+Date: Tue, 21 Nov 2017 10:09:02 +0100
+Subject: spi: xilinx: Detect stall with Unknown commands
+
+From: Ricardo Ribalda Delgado <ricardo.ribalda@gmail.com>
+
+commit 5a1314fa697fc65cefaba64cd4699bfc3e6882a6 upstream.
+
+When the core is configured in C_SPI_MODE > 0, it integrates a
+lookup table that automatically configures the core in dual or quad mode
+based on the command (first byte on the tx fifo).
+
+Unfortunately, that list mode_?_memoy_*.mif does not contain all the
+supported commands by the flash.
+
+Since 4.14 spi-nor automatically tries to probe the flash using SFDP
+(command 0x5a), and that command is not part of the list_mode table.
+
+Whit the right combination of C_SPI_MODE and C_SPI_MEMORY this leads
+into a stall that can only be recovered with a soft rest.
+
+This patch detects this kind of stall and returns -EIO to the caller on
+those commands. spi-nor can handle this error properly:
+
+m25p80 spi0.0: Detected stall. Check C_SPI_MODE and C_SPI_MEMORY. 0x21 0x2404
+m25p80 spi0.0: SPI transfer failed: -5
+spi_master spi0: failed to transfer one message from queue
+m25p80 spi0.0: s25sl064p (8192 Kbytes)
+
+Signed-off-by: Ricardo Ribalda Delgado <ricardo.ribalda@gmail.com>
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/spi/spi-xilinx.c |   11 +++++++++++
+ 1 file changed, 11 insertions(+)
+
+--- a/drivers/spi/spi-xilinx.c
++++ b/drivers/spi/spi-xilinx.c
+@@ -271,6 +271,7 @@ static int xilinx_spi_txrx_bufs(struct s
+       while (remaining_words) {
+               int n_words, tx_words, rx_words;
+               u32 sr;
++              int stalled;
+               n_words = min(remaining_words, xspi->buffer_size);
+@@ -299,7 +300,17 @@ static int xilinx_spi_txrx_bufs(struct s
+               /* Read out all the data from the Rx FIFO */
+               rx_words = n_words;
++              stalled = 10;
+               while (rx_words) {
++                      if (rx_words == n_words && !(stalled--) &&
++                          !(sr & XSPI_SR_TX_EMPTY_MASK) &&
++                          (sr & XSPI_SR_RX_EMPTY_MASK)) {
++                              dev_err(&spi->dev,
++                                      "Detected stall. Check C_SPI_MODE and C_SPI_MEMORY\n");
++                              xspi_init_hw(xspi);
++                              return -EIO;
++                      }
++
+                       if ((sr & XSPI_SR_TX_EMPTY_MASK) && (rx_words > 1)) {
+                               xilinx_spi_rx(xspi);
+                               rx_words--;
diff --git a/queue-4.14/x86-cpu_entry_area-move-it-out-of-the-fixmap.patch b/queue-4.14/x86-cpu_entry_area-move-it-out-of-the-fixmap.patch
new file mode 100644 (file)
index 0000000..aa6880e
--- /dev/null
@@ -0,0 +1,551 @@
+From 92a0f81d89571e3e8759366e050ee05cc545ef99 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Wed, 20 Dec 2017 18:51:31 +0100
+Subject: x86/cpu_entry_area: Move it out of the fixmap
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+commit 92a0f81d89571e3e8759366e050ee05cc545ef99 upstream.
+
+Put the cpu_entry_area into a separate P4D entry. The fixmap gets too big
+and 0-day already hit a case where the fixmap PTEs were cleared by
+cleanup_highmap().
+
+Aside of that the fixmap API is a pain as it's all backwards.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: linux-kernel@vger.kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ Documentation/x86/x86_64/mm.txt         |    2 
+ arch/x86/include/asm/cpu_entry_area.h   |   18 ++++++++
+ arch/x86/include/asm/desc.h             |    1 
+ arch/x86/include/asm/fixmap.h           |   32 ---------------
+ arch/x86/include/asm/pgtable_32_types.h |   15 +++++--
+ arch/x86/include/asm/pgtable_64_types.h |   47 +++++++++++++---------
+ arch/x86/kernel/dumpstack.c             |    1 
+ arch/x86/kernel/traps.c                 |    5 +-
+ arch/x86/mm/cpu_entry_area.c            |   66 ++++++++++++++++++++++++--------
+ arch/x86/mm/dump_pagetables.c           |    6 ++
+ arch/x86/mm/init_32.c                   |    6 ++
+ arch/x86/mm/kasan_init_64.c             |   29 +++++++-------
+ arch/x86/mm/pgtable_32.c                |    1 
+ arch/x86/xen/mmu_pv.c                   |    2 
+ 14 files changed, 143 insertions(+), 88 deletions(-)
+
+--- a/Documentation/x86/x86_64/mm.txt
++++ b/Documentation/x86/x86_64/mm.txt
+@@ -12,6 +12,7 @@ ffffea0000000000 - ffffeaffffffffff (=40
+ ... unused hole ...
+ ffffec0000000000 - fffffbffffffffff (=44 bits) kasan shadow memory (16TB)
+ ... unused hole ...
++fffffe8000000000 - fffffeffffffffff (=39 bits) cpu_entry_area mapping
+ ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks
+ ... unused hole ...
+ ffffffef00000000 - fffffffeffffffff (=64 GB) EFI region mapping space
+@@ -35,6 +36,7 @@ ffd4000000000000 - ffd5ffffffffffff (=49
+ ... unused hole ...
+ ffdf000000000000 - fffffc0000000000 (=53 bits) kasan shadow memory (8PB)
+ ... unused hole ...
++fffffe8000000000 - fffffeffffffffff (=39 bits) cpu_entry_area mapping
+ ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks
+ ... unused hole ...
+ ffffffef00000000 - fffffffeffffffff (=64 GB) EFI region mapping space
+--- a/arch/x86/include/asm/cpu_entry_area.h
++++ b/arch/x86/include/asm/cpu_entry_area.h
+@@ -43,10 +43,26 @@ struct cpu_entry_area {
+ };
+ #define CPU_ENTRY_AREA_SIZE   (sizeof(struct cpu_entry_area))
+-#define CPU_ENTRY_AREA_PAGES  (CPU_ENTRY_AREA_SIZE / PAGE_SIZE)
++#define CPU_ENTRY_AREA_TOT_SIZE       (CPU_ENTRY_AREA_SIZE * NR_CPUS)
+ DECLARE_PER_CPU(struct cpu_entry_area *, cpu_entry_area);
+ extern void setup_cpu_entry_areas(void);
++extern void cea_set_pte(void *cea_vaddr, phys_addr_t pa, pgprot_t flags);
++
++#define       CPU_ENTRY_AREA_RO_IDT           CPU_ENTRY_AREA_BASE
++#define CPU_ENTRY_AREA_PER_CPU                (CPU_ENTRY_AREA_RO_IDT + PAGE_SIZE)
++
++#define CPU_ENTRY_AREA_RO_IDT_VADDR   ((void *)CPU_ENTRY_AREA_RO_IDT)
++
++#define CPU_ENTRY_AREA_MAP_SIZE                       \
++      (CPU_ENTRY_AREA_PER_CPU + CPU_ENTRY_AREA_TOT_SIZE - CPU_ENTRY_AREA_BASE)
++
++extern struct cpu_entry_area *get_cpu_entry_area(int cpu);
++
++static inline struct entry_stack *cpu_entry_stack(int cpu)
++{
++      return &get_cpu_entry_area(cpu)->entry_stack_page.stack;
++}
+ #endif
+--- a/arch/x86/include/asm/desc.h
++++ b/arch/x86/include/asm/desc.h
+@@ -7,6 +7,7 @@
+ #include <asm/mmu.h>
+ #include <asm/fixmap.h>
+ #include <asm/irq_vectors.h>
++#include <asm/cpu_entry_area.h>
+ #include <linux/smp.h>
+ #include <linux/percpu.h>
+--- a/arch/x86/include/asm/fixmap.h
++++ b/arch/x86/include/asm/fixmap.h
+@@ -25,7 +25,6 @@
+ #else
+ #include <uapi/asm/vsyscall.h>
+ #endif
+-#include <asm/cpu_entry_area.h>
+ /*
+  * We can't declare FIXADDR_TOP as variable for x86_64 because vsyscall
+@@ -84,7 +83,6 @@ enum fixed_addresses {
+       FIX_IO_APIC_BASE_0,
+       FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS - 1,
+ #endif
+-      FIX_RO_IDT,     /* Virtual mapping for read-only IDT */
+ #ifdef CONFIG_X86_32
+       FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */
+       FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1,
+@@ -100,9 +98,6 @@ enum fixed_addresses {
+ #ifdef        CONFIG_X86_INTEL_MID
+       FIX_LNW_VRTC,
+ #endif
+-      /* Fixmap entries to remap the GDTs, one per processor. */
+-      FIX_CPU_ENTRY_AREA_TOP,
+-      FIX_CPU_ENTRY_AREA_BOTTOM = FIX_CPU_ENTRY_AREA_TOP + (CPU_ENTRY_AREA_PAGES * NR_CPUS) - 1,
+ #ifdef CONFIG_ACPI_APEI_GHES
+       /* Used for GHES mapping from assorted contexts */
+@@ -143,7 +138,7 @@ enum fixed_addresses {
+ extern void reserve_top_address(unsigned long reserve);
+ #define FIXADDR_SIZE  (__end_of_permanent_fixed_addresses << PAGE_SHIFT)
+-#define FIXADDR_START         (FIXADDR_TOP - FIXADDR_SIZE)
++#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE)
+ extern int fixmaps_set;
+@@ -191,30 +186,5 @@ void __init *early_memremap_decrypted_wp
+ void __early_set_fixmap(enum fixed_addresses idx,
+                       phys_addr_t phys, pgprot_t flags);
+-static inline unsigned int __get_cpu_entry_area_page_index(int cpu, int page)
+-{
+-      BUILD_BUG_ON(sizeof(struct cpu_entry_area) % PAGE_SIZE != 0);
+-
+-      return FIX_CPU_ENTRY_AREA_BOTTOM - cpu*CPU_ENTRY_AREA_PAGES - page;
+-}
+-
+-#define __get_cpu_entry_area_offset_index(cpu, offset) ({             \
+-      BUILD_BUG_ON(offset % PAGE_SIZE != 0);                          \
+-      __get_cpu_entry_area_page_index(cpu, offset / PAGE_SIZE);       \
+-      })
+-
+-#define get_cpu_entry_area_index(cpu, field)                          \
+-      __get_cpu_entry_area_offset_index((cpu), offsetof(struct cpu_entry_area, field))
+-
+-static inline struct cpu_entry_area *get_cpu_entry_area(int cpu)
+-{
+-      return (struct cpu_entry_area *)__fix_to_virt(__get_cpu_entry_area_page_index(cpu, 0));
+-}
+-
+-static inline struct entry_stack *cpu_entry_stack(int cpu)
+-{
+-      return &get_cpu_entry_area(cpu)->entry_stack_page.stack;
+-}
+-
+ #endif /* !__ASSEMBLY__ */
+ #endif /* _ASM_X86_FIXMAP_H */
+--- a/arch/x86/include/asm/pgtable_32_types.h
++++ b/arch/x86/include/asm/pgtable_32_types.h
+@@ -38,13 +38,22 @@ extern bool __vmalloc_start_set; /* set
+ #define LAST_PKMAP 1024
+ #endif
+-#define PKMAP_BASE ((FIXADDR_START - PAGE_SIZE * (LAST_PKMAP + 1))    \
+-                  & PMD_MASK)
++/*
++ * Define this here and validate with BUILD_BUG_ON() in pgtable_32.c
++ * to avoid include recursion hell
++ */
++#define CPU_ENTRY_AREA_PAGES  (NR_CPUS * 40)
++
++#define CPU_ENTRY_AREA_BASE                           \
++      ((FIXADDR_START - PAGE_SIZE * (CPU_ENTRY_AREA_PAGES + 1)) & PMD_MASK)
++
++#define PKMAP_BASE            \
++      ((CPU_ENTRY_AREA_BASE - PAGE_SIZE) & PMD_MASK)
+ #ifdef CONFIG_HIGHMEM
+ # define VMALLOC_END  (PKMAP_BASE - 2 * PAGE_SIZE)
+ #else
+-# define VMALLOC_END  (FIXADDR_START - 2 * PAGE_SIZE)
++# define VMALLOC_END  (CPU_ENTRY_AREA_BASE - 2 * PAGE_SIZE)
+ #endif
+ #define MODULES_VADDR VMALLOC_START
+--- a/arch/x86/include/asm/pgtable_64_types.h
++++ b/arch/x86/include/asm/pgtable_64_types.h
+@@ -76,32 +76,41 @@ typedef struct { pteval_t pte; } pte_t;
+ #define PGDIR_MASK    (~(PGDIR_SIZE - 1))
+ /* See Documentation/x86/x86_64/mm.txt for a description of the memory map. */
+-#define MAXMEM                _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL)
++#define MAXMEM                        _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL)
++
+ #ifdef CONFIG_X86_5LEVEL
+-#define VMALLOC_SIZE_TB _AC(16384, UL)
+-#define __VMALLOC_BASE        _AC(0xff92000000000000, UL)
+-#define __VMEMMAP_BASE        _AC(0xffd4000000000000, UL)
++# define VMALLOC_SIZE_TB      _AC(16384, UL)
++# define __VMALLOC_BASE               _AC(0xff92000000000000, UL)
++# define __VMEMMAP_BASE               _AC(0xffd4000000000000, UL)
+ #else
+-#define VMALLOC_SIZE_TB       _AC(32, UL)
+-#define __VMALLOC_BASE        _AC(0xffffc90000000000, UL)
+-#define __VMEMMAP_BASE        _AC(0xffffea0000000000, UL)
++# define VMALLOC_SIZE_TB      _AC(32, UL)
++# define __VMALLOC_BASE               _AC(0xffffc90000000000, UL)
++# define __VMEMMAP_BASE               _AC(0xffffea0000000000, UL)
+ #endif
++
+ #ifdef CONFIG_RANDOMIZE_MEMORY
+-#define VMALLOC_START vmalloc_base
+-#define VMEMMAP_START vmemmap_base
++# define VMALLOC_START                vmalloc_base
++# define VMEMMAP_START                vmemmap_base
+ #else
+-#define VMALLOC_START __VMALLOC_BASE
+-#define VMEMMAP_START __VMEMMAP_BASE
++# define VMALLOC_START                __VMALLOC_BASE
++# define VMEMMAP_START                __VMEMMAP_BASE
+ #endif /* CONFIG_RANDOMIZE_MEMORY */
+-#define VMALLOC_END   (VMALLOC_START + _AC((VMALLOC_SIZE_TB << 40) - 1, UL))
+-#define MODULES_VADDR    (__START_KERNEL_map + KERNEL_IMAGE_SIZE)
++
++#define VMALLOC_END           (VMALLOC_START + _AC((VMALLOC_SIZE_TB << 40) - 1, UL))
++
++#define MODULES_VADDR         (__START_KERNEL_map + KERNEL_IMAGE_SIZE)
+ /* The module sections ends with the start of the fixmap */
+-#define MODULES_END   __fix_to_virt(__end_of_fixed_addresses + 1)
+-#define MODULES_LEN   (MODULES_END - MODULES_VADDR)
+-#define ESPFIX_PGD_ENTRY _AC(-2, UL)
+-#define ESPFIX_BASE_ADDR (ESPFIX_PGD_ENTRY << P4D_SHIFT)
+-#define EFI_VA_START   ( -4 * (_AC(1, UL) << 30))
+-#define EFI_VA_END     (-68 * (_AC(1, UL) << 30))
++#define MODULES_END           __fix_to_virt(__end_of_fixed_addresses + 1)
++#define MODULES_LEN           (MODULES_END - MODULES_VADDR)
++
++#define ESPFIX_PGD_ENTRY      _AC(-2, UL)
++#define ESPFIX_BASE_ADDR      (ESPFIX_PGD_ENTRY << P4D_SHIFT)
++
++#define CPU_ENTRY_AREA_PGD    _AC(-3, UL)
++#define CPU_ENTRY_AREA_BASE   (CPU_ENTRY_AREA_PGD << P4D_SHIFT)
++
++#define EFI_VA_START          ( -4 * (_AC(1, UL) << 30))
++#define EFI_VA_END            (-68 * (_AC(1, UL) << 30))
+ #define EARLY_DYNAMIC_PAGE_TABLES     64
+--- a/arch/x86/kernel/dumpstack.c
++++ b/arch/x86/kernel/dumpstack.c
+@@ -18,6 +18,7 @@
+ #include <linux/nmi.h>
+ #include <linux/sysfs.h>
++#include <asm/cpu_entry_area.h>
+ #include <asm/stacktrace.h>
+ #include <asm/unwind.h>
+--- a/arch/x86/kernel/traps.c
++++ b/arch/x86/kernel/traps.c
+@@ -951,8 +951,9 @@ void __init trap_init(void)
+        * "sidt" instruction will not leak the location of the kernel, and
+        * to defend the IDT against arbitrary memory write vulnerabilities.
+        * It will be reloaded in cpu_init() */
+-      __set_fixmap(FIX_RO_IDT, __pa_symbol(idt_table), PAGE_KERNEL_RO);
+-      idt_descr.address = fix_to_virt(FIX_RO_IDT);
++      cea_set_pte(CPU_ENTRY_AREA_RO_IDT_VADDR, __pa_symbol(idt_table),
++                  PAGE_KERNEL_RO);
++      idt_descr.address = CPU_ENTRY_AREA_RO_IDT;
+       /*
+        * Should be a barrier for any external CPU state:
+--- a/arch/x86/mm/cpu_entry_area.c
++++ b/arch/x86/mm/cpu_entry_area.c
+@@ -15,11 +15,27 @@ static DEFINE_PER_CPU_PAGE_ALIGNED(char,
+       [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]);
+ #endif
++struct cpu_entry_area *get_cpu_entry_area(int cpu)
++{
++      unsigned long va = CPU_ENTRY_AREA_PER_CPU + cpu * CPU_ENTRY_AREA_SIZE;
++      BUILD_BUG_ON(sizeof(struct cpu_entry_area) % PAGE_SIZE != 0);
++
++      return (struct cpu_entry_area *) va;
++}
++EXPORT_SYMBOL(get_cpu_entry_area);
++
++void cea_set_pte(void *cea_vaddr, phys_addr_t pa, pgprot_t flags)
++{
++      unsigned long va = (unsigned long) cea_vaddr;
++
++      set_pte_vaddr(va, pfn_pte(pa >> PAGE_SHIFT, flags));
++}
++
+ static void __init
+-set_percpu_fixmap_pages(int idx, void *ptr, int pages, pgprot_t prot)
++cea_map_percpu_pages(void *cea_vaddr, void *ptr, int pages, pgprot_t prot)
+ {
+-      for ( ; pages; pages--, idx--, ptr += PAGE_SIZE)
+-              __set_fixmap(idx, per_cpu_ptr_to_phys(ptr), prot);
++      for ( ; pages; pages--, cea_vaddr+= PAGE_SIZE, ptr += PAGE_SIZE)
++              cea_set_pte(cea_vaddr, per_cpu_ptr_to_phys(ptr), prot);
+ }
+ /* Setup the fixmap mappings only once per-processor */
+@@ -47,10 +63,12 @@ static void __init setup_cpu_entry_area(
+       pgprot_t tss_prot = PAGE_KERNEL;
+ #endif
+-      __set_fixmap(get_cpu_entry_area_index(cpu, gdt), get_cpu_gdt_paddr(cpu), gdt_prot);
+-      set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, entry_stack_page),
+-                              per_cpu_ptr(&entry_stack_storage, cpu), 1,
+-                              PAGE_KERNEL);
++      cea_set_pte(&get_cpu_entry_area(cpu)->gdt, get_cpu_gdt_paddr(cpu),
++                  gdt_prot);
++
++      cea_map_percpu_pages(&get_cpu_entry_area(cpu)->entry_stack_page,
++                           per_cpu_ptr(&entry_stack_storage, cpu), 1,
++                           PAGE_KERNEL);
+       /*
+        * The Intel SDM says (Volume 3, 7.2.1):
+@@ -72,10 +90,9 @@ static void __init setup_cpu_entry_area(
+       BUILD_BUG_ON((offsetof(struct tss_struct, x86_tss) ^
+                     offsetofend(struct tss_struct, x86_tss)) & PAGE_MASK);
+       BUILD_BUG_ON(sizeof(struct tss_struct) % PAGE_SIZE != 0);
+-      set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, tss),
+-                              &per_cpu(cpu_tss_rw, cpu),
+-                              sizeof(struct tss_struct) / PAGE_SIZE,
+-                              tss_prot);
++      cea_map_percpu_pages(&get_cpu_entry_area(cpu)->tss,
++                           &per_cpu(cpu_tss_rw, cpu),
++                           sizeof(struct tss_struct) / PAGE_SIZE, tss_prot);
+ #ifdef CONFIG_X86_32
+       per_cpu(cpu_entry_area, cpu) = get_cpu_entry_area(cpu);
+@@ -85,20 +102,37 @@ static void __init setup_cpu_entry_area(
+       BUILD_BUG_ON(sizeof(exception_stacks) % PAGE_SIZE != 0);
+       BUILD_BUG_ON(sizeof(exception_stacks) !=
+                    sizeof(((struct cpu_entry_area *)0)->exception_stacks));
+-      set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, exception_stacks),
+-                              &per_cpu(exception_stacks, cpu),
+-                              sizeof(exception_stacks) / PAGE_SIZE,
+-                              PAGE_KERNEL);
++      cea_map_percpu_pages(&get_cpu_entry_area(cpu)->exception_stacks,
++                           &per_cpu(exception_stacks, cpu),
++                           sizeof(exception_stacks) / PAGE_SIZE, PAGE_KERNEL);
+-      __set_fixmap(get_cpu_entry_area_index(cpu, entry_trampoline),
++      cea_set_pte(&get_cpu_entry_area(cpu)->entry_trampoline,
+                    __pa_symbol(_entry_trampoline), PAGE_KERNEL_RX);
+ #endif
+ }
++static __init void setup_cpu_entry_area_ptes(void)
++{
++#ifdef CONFIG_X86_32
++      unsigned long start, end;
++
++      BUILD_BUG_ON(CPU_ENTRY_AREA_PAGES * PAGE_SIZE < CPU_ENTRY_AREA_MAP_SIZE);
++      BUG_ON(CPU_ENTRY_AREA_BASE & ~PMD_MASK);
++
++      start = CPU_ENTRY_AREA_BASE;
++      end = start + CPU_ENTRY_AREA_MAP_SIZE;
++
++      for (; start < end; start += PMD_SIZE)
++              populate_extra_pte(start);
++#endif
++}
++
+ void __init setup_cpu_entry_areas(void)
+ {
+       unsigned int cpu;
++      setup_cpu_entry_area_ptes();
++
+       for_each_possible_cpu(cpu)
+               setup_cpu_entry_area(cpu);
+ }
+--- a/arch/x86/mm/dump_pagetables.c
++++ b/arch/x86/mm/dump_pagetables.c
+@@ -58,6 +58,7 @@ enum address_markers_idx {
+       KASAN_SHADOW_START_NR,
+       KASAN_SHADOW_END_NR,
+ #endif
++      CPU_ENTRY_AREA_NR,
+ #ifdef CONFIG_X86_ESPFIX64
+       ESPFIX_START_NR,
+ #endif
+@@ -81,6 +82,7 @@ static struct addr_marker address_marker
+       [KASAN_SHADOW_START_NR] = { KASAN_SHADOW_START, "KASAN shadow" },
+       [KASAN_SHADOW_END_NR]   = { KASAN_SHADOW_END,   "KASAN shadow end" },
+ #endif
++      [CPU_ENTRY_AREA_NR]     = { CPU_ENTRY_AREA_BASE,"CPU entry Area" },
+ #ifdef CONFIG_X86_ESPFIX64
+       [ESPFIX_START_NR]       = { ESPFIX_BASE_ADDR,   "ESPfix Area", 16 },
+ #endif
+@@ -104,6 +106,7 @@ enum address_markers_idx {
+ #ifdef CONFIG_HIGHMEM
+       PKMAP_BASE_NR,
+ #endif
++      CPU_ENTRY_AREA_NR,
+       FIXADDR_START_NR,
+       END_OF_SPACE_NR,
+ };
+@@ -116,6 +119,7 @@ static struct addr_marker address_marker
+ #ifdef CONFIG_HIGHMEM
+       [PKMAP_BASE_NR]         = { 0UL,                "Persistent kmap() Area" },
+ #endif
++      [CPU_ENTRY_AREA_NR]     = { 0UL,                "CPU entry area" },
+       [FIXADDR_START_NR]      = { 0UL,                "Fixmap area" },
+       [END_OF_SPACE_NR]       = { -1,                 NULL }
+ };
+@@ -541,8 +545,8 @@ static int __init pt_dump_init(void)
+       address_markers[PKMAP_BASE_NR].start_address = PKMAP_BASE;
+ # endif
+       address_markers[FIXADDR_START_NR].start_address = FIXADDR_START;
++      address_markers[CPU_ENTRY_AREA_NR].start_address = CPU_ENTRY_AREA_BASE;
+ #endif
+-
+       return 0;
+ }
+ __initcall(pt_dump_init);
+--- a/arch/x86/mm/init_32.c
++++ b/arch/x86/mm/init_32.c
+@@ -50,6 +50,7 @@
+ #include <asm/setup.h>
+ #include <asm/set_memory.h>
+ #include <asm/page_types.h>
++#include <asm/cpu_entry_area.h>
+ #include <asm/init.h>
+ #include "mm_internal.h"
+@@ -766,6 +767,7 @@ void __init mem_init(void)
+       mem_init_print_info(NULL);
+       printk(KERN_INFO "virtual kernel memory layout:\n"
+               "    fixmap  : 0x%08lx - 0x%08lx   (%4ld kB)\n"
++              "  cpu_entry : 0x%08lx - 0x%08lx   (%4ld kB)\n"
+ #ifdef CONFIG_HIGHMEM
+               "    pkmap   : 0x%08lx - 0x%08lx   (%4ld kB)\n"
+ #endif
+@@ -777,6 +779,10 @@ void __init mem_init(void)
+               FIXADDR_START, FIXADDR_TOP,
+               (FIXADDR_TOP - FIXADDR_START) >> 10,
++              CPU_ENTRY_AREA_BASE,
++              CPU_ENTRY_AREA_BASE + CPU_ENTRY_AREA_MAP_SIZE,
++              CPU_ENTRY_AREA_MAP_SIZE >> 10,
++
+ #ifdef CONFIG_HIGHMEM
+               PKMAP_BASE, PKMAP_BASE+LAST_PKMAP*PAGE_SIZE,
+               (LAST_PKMAP*PAGE_SIZE) >> 10,
+--- a/arch/x86/mm/kasan_init_64.c
++++ b/arch/x86/mm/kasan_init_64.c
+@@ -15,6 +15,7 @@
+ #include <asm/tlbflush.h>
+ #include <asm/sections.h>
+ #include <asm/pgtable.h>
++#include <asm/cpu_entry_area.h>
+ extern struct range pfn_mapped[E820_MAX_ENTRIES];
+@@ -322,31 +323,33 @@ void __init kasan_init(void)
+               map_range(&pfn_mapped[i]);
+       }
+-      kasan_populate_zero_shadow(
+-              kasan_mem_to_shadow((void *)PAGE_OFFSET + MAXMEM),
+-              kasan_mem_to_shadow((void *)__START_KERNEL_map));
+-
+-      kasan_populate_shadow((unsigned long)kasan_mem_to_shadow(_stext),
+-                            (unsigned long)kasan_mem_to_shadow(_end),
+-                            early_pfn_to_nid(__pa(_stext)));
+-
+-      shadow_cpu_entry_begin = (void *)__fix_to_virt(FIX_CPU_ENTRY_AREA_BOTTOM);
++      shadow_cpu_entry_begin = (void *)CPU_ENTRY_AREA_BASE;
+       shadow_cpu_entry_begin = kasan_mem_to_shadow(shadow_cpu_entry_begin);
+       shadow_cpu_entry_begin = (void *)round_down((unsigned long)shadow_cpu_entry_begin,
+                                               PAGE_SIZE);
+-      shadow_cpu_entry_end = (void *)(__fix_to_virt(FIX_CPU_ENTRY_AREA_TOP) + PAGE_SIZE);
++      shadow_cpu_entry_end = (void *)(CPU_ENTRY_AREA_BASE +
++                                      CPU_ENTRY_AREA_MAP_SIZE);
+       shadow_cpu_entry_end = kasan_mem_to_shadow(shadow_cpu_entry_end);
+       shadow_cpu_entry_end = (void *)round_up((unsigned long)shadow_cpu_entry_end,
+                                       PAGE_SIZE);
+-      kasan_populate_zero_shadow(kasan_mem_to_shadow((void *)MODULES_END),
+-                                 shadow_cpu_entry_begin);
++      kasan_populate_zero_shadow(
++              kasan_mem_to_shadow((void *)PAGE_OFFSET + MAXMEM),
++              shadow_cpu_entry_begin);
+       kasan_populate_shadow((unsigned long)shadow_cpu_entry_begin,
+                             (unsigned long)shadow_cpu_entry_end, 0);
+-      kasan_populate_zero_shadow(shadow_cpu_entry_end, (void *)KASAN_SHADOW_END);
++      kasan_populate_zero_shadow(shadow_cpu_entry_end,
++                              kasan_mem_to_shadow((void *)__START_KERNEL_map));
++
++      kasan_populate_shadow((unsigned long)kasan_mem_to_shadow(_stext),
++                            (unsigned long)kasan_mem_to_shadow(_end),
++                            early_pfn_to_nid(__pa(_stext)));
++
++      kasan_populate_zero_shadow(kasan_mem_to_shadow((void *)MODULES_END),
++                              (void *)KASAN_SHADOW_END);
+       load_cr3(init_top_pgt);
+       __flush_tlb_all();
+--- a/arch/x86/mm/pgtable_32.c
++++ b/arch/x86/mm/pgtable_32.c
+@@ -10,6 +10,7 @@
+ #include <linux/pagemap.h>
+ #include <linux/spinlock.h>
++#include <asm/cpu_entry_area.h>
+ #include <asm/pgtable.h>
+ #include <asm/pgalloc.h>
+ #include <asm/fixmap.h>
+--- a/arch/x86/xen/mmu_pv.c
++++ b/arch/x86/xen/mmu_pv.c
+@@ -2261,7 +2261,6 @@ static void xen_set_fixmap(unsigned idx,
+       switch (idx) {
+       case FIX_BTMAP_END ... FIX_BTMAP_BEGIN:
+-      case FIX_RO_IDT:
+ #ifdef CONFIG_X86_32
+       case FIX_WP_TEST:
+ # ifdef CONFIG_HIGHMEM
+@@ -2272,7 +2271,6 @@ static void xen_set_fixmap(unsigned idx,
+ #endif
+       case FIX_TEXT_POKE0:
+       case FIX_TEXT_POKE1:
+-      case FIX_CPU_ENTRY_AREA_TOP ... FIX_CPU_ENTRY_AREA_BOTTOM:
+               /* All local page mappings */
+               pte = pfn_pte(phys, prot);
+               break;
diff --git a/queue-4.14/x86-cpu_entry_area-move-it-to-a-separate-unit.patch b/queue-4.14/x86-cpu_entry_area-move-it-to-a-separate-unit.patch
new file mode 100644 (file)
index 0000000..85f4696
--- /dev/null
@@ -0,0 +1,377 @@
+From ed1bbc40a0d10e0c5c74fe7bdc6298295cf40255 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Wed, 20 Dec 2017 18:28:54 +0100
+Subject: x86/cpu_entry_area: Move it to a separate unit
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+commit ed1bbc40a0d10e0c5c74fe7bdc6298295cf40255 upstream.
+
+Separate the cpu_entry_area code out of cpu/common.c and the fixmap.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/include/asm/cpu_entry_area.h |   52 +++++++++++++++++
+ arch/x86/include/asm/fixmap.h         |   41 -------------
+ arch/x86/kernel/cpu/common.c          |   94 ------------------------------
+ arch/x86/kernel/traps.c               |    1 
+ arch/x86/mm/Makefile                  |    2 
+ arch/x86/mm/cpu_entry_area.c          |  104 ++++++++++++++++++++++++++++++++++
+ 6 files changed, 159 insertions(+), 135 deletions(-)
+
+--- /dev/null
++++ b/arch/x86/include/asm/cpu_entry_area.h
+@@ -0,0 +1,52 @@
++// SPDX-License-Identifier: GPL-2.0
++
++#ifndef _ASM_X86_CPU_ENTRY_AREA_H
++#define _ASM_X86_CPU_ENTRY_AREA_H
++
++#include <linux/percpu-defs.h>
++#include <asm/processor.h>
++
++/*
++ * cpu_entry_area is a percpu region that contains things needed by the CPU
++ * and early entry/exit code.  Real types aren't used for all fields here
++ * to avoid circular header dependencies.
++ *
++ * Every field is a virtual alias of some other allocated backing store.
++ * There is no direct allocation of a struct cpu_entry_area.
++ */
++struct cpu_entry_area {
++      char gdt[PAGE_SIZE];
++
++      /*
++       * The GDT is just below entry_stack and thus serves (on x86_64) as
++       * a a read-only guard page.
++       */
++      struct entry_stack_page entry_stack_page;
++
++      /*
++       * On x86_64, the TSS is mapped RO.  On x86_32, it's mapped RW because
++       * we need task switches to work, and task switches write to the TSS.
++       */
++      struct tss_struct tss;
++
++      char entry_trampoline[PAGE_SIZE];
++
++#ifdef CONFIG_X86_64
++      /*
++       * Exception stacks used for IST entries.
++       *
++       * In the future, this should have a separate slot for each stack
++       * with guard pages between them.
++       */
++      char exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ];
++#endif
++};
++
++#define CPU_ENTRY_AREA_SIZE   (sizeof(struct cpu_entry_area))
++#define CPU_ENTRY_AREA_PAGES  (CPU_ENTRY_AREA_SIZE / PAGE_SIZE)
++
++DECLARE_PER_CPU(struct cpu_entry_area *, cpu_entry_area);
++
++extern void setup_cpu_entry_areas(void);
++
++#endif
+--- a/arch/x86/include/asm/fixmap.h
++++ b/arch/x86/include/asm/fixmap.h
+@@ -25,6 +25,7 @@
+ #else
+ #include <uapi/asm/vsyscall.h>
+ #endif
++#include <asm/cpu_entry_area.h>
+ /*
+  * We can't declare FIXADDR_TOP as variable for x86_64 because vsyscall
+@@ -45,46 +46,6 @@ extern unsigned long __FIXADDR_TOP;
+ #endif
+ /*
+- * cpu_entry_area is a percpu region in the fixmap that contains things
+- * needed by the CPU and early entry/exit code.  Real types aren't used
+- * for all fields here to avoid circular header dependencies.
+- *
+- * Every field is a virtual alias of some other allocated backing store.
+- * There is no direct allocation of a struct cpu_entry_area.
+- */
+-struct cpu_entry_area {
+-      char gdt[PAGE_SIZE];
+-
+-      /*
+-       * The GDT is just below entry_stack and thus serves (on x86_64) as
+-       * a a read-only guard page.
+-       */
+-      struct entry_stack_page entry_stack_page;
+-
+-      /*
+-       * On x86_64, the TSS is mapped RO.  On x86_32, it's mapped RW because
+-       * we need task switches to work, and task switches write to the TSS.
+-       */
+-      struct tss_struct tss;
+-
+-      char entry_trampoline[PAGE_SIZE];
+-
+-#ifdef CONFIG_X86_64
+-      /*
+-       * Exception stacks used for IST entries.
+-       *
+-       * In the future, this should have a separate slot for each stack
+-       * with guard pages between them.
+-       */
+-      char exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ];
+-#endif
+-};
+-
+-#define CPU_ENTRY_AREA_PAGES (sizeof(struct cpu_entry_area) / PAGE_SIZE)
+-
+-extern void setup_cpu_entry_areas(void);
+-
+-/*
+  * Here we define all the compile-time 'special' virtual
+  * addresses. The point is to have a constant address at
+  * compile time, but to set the physical address only
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -482,102 +482,8 @@ static const unsigned int exception_stac
+         [0 ... N_EXCEPTION_STACKS - 1]        = EXCEPTION_STKSZ,
+         [DEBUG_STACK - 1]                     = DEBUG_STKSZ
+ };
+-
+-static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
+-      [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]);
+-#endif
+-
+-static DEFINE_PER_CPU_PAGE_ALIGNED(struct entry_stack_page,
+-                                 entry_stack_storage);
+-
+-static void __init
+-set_percpu_fixmap_pages(int idx, void *ptr, int pages, pgprot_t prot)
+-{
+-      for ( ; pages; pages--, idx--, ptr += PAGE_SIZE)
+-              __set_fixmap(idx, per_cpu_ptr_to_phys(ptr), prot);
+-}
+-
+-/* Setup the fixmap mappings only once per-processor */
+-static void __init setup_cpu_entry_area(int cpu)
+-{
+-#ifdef CONFIG_X86_64
+-      extern char _entry_trampoline[];
+-
+-      /* On 64-bit systems, we use a read-only fixmap GDT and TSS. */
+-      pgprot_t gdt_prot = PAGE_KERNEL_RO;
+-      pgprot_t tss_prot = PAGE_KERNEL_RO;
+-#else
+-      /*
+-       * On native 32-bit systems, the GDT cannot be read-only because
+-       * our double fault handler uses a task gate, and entering through
+-       * a task gate needs to change an available TSS to busy.  If the
+-       * GDT is read-only, that will triple fault.  The TSS cannot be
+-       * read-only because the CPU writes to it on task switches.
+-       *
+-       * On Xen PV, the GDT must be read-only because the hypervisor
+-       * requires it.
+-       */
+-      pgprot_t gdt_prot = boot_cpu_has(X86_FEATURE_XENPV) ?
+-              PAGE_KERNEL_RO : PAGE_KERNEL;
+-      pgprot_t tss_prot = PAGE_KERNEL;
+ #endif
+-      __set_fixmap(get_cpu_entry_area_index(cpu, gdt), get_cpu_gdt_paddr(cpu), gdt_prot);
+-      set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, entry_stack_page),
+-                              per_cpu_ptr(&entry_stack_storage, cpu), 1,
+-                              PAGE_KERNEL);
+-
+-      /*
+-       * The Intel SDM says (Volume 3, 7.2.1):
+-       *
+-       *  Avoid placing a page boundary in the part of the TSS that the
+-       *  processor reads during a task switch (the first 104 bytes). The
+-       *  processor may not correctly perform address translations if a
+-       *  boundary occurs in this area. During a task switch, the processor
+-       *  reads and writes into the first 104 bytes of each TSS (using
+-       *  contiguous physical addresses beginning with the physical address
+-       *  of the first byte of the TSS). So, after TSS access begins, if
+-       *  part of the 104 bytes is not physically contiguous, the processor
+-       *  will access incorrect information without generating a page-fault
+-       *  exception.
+-       *
+-       * There are also a lot of errata involving the TSS spanning a page
+-       * boundary.  Assert that we're not doing that.
+-       */
+-      BUILD_BUG_ON((offsetof(struct tss_struct, x86_tss) ^
+-                    offsetofend(struct tss_struct, x86_tss)) & PAGE_MASK);
+-      BUILD_BUG_ON(sizeof(struct tss_struct) % PAGE_SIZE != 0);
+-      set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, tss),
+-                              &per_cpu(cpu_tss_rw, cpu),
+-                              sizeof(struct tss_struct) / PAGE_SIZE,
+-                              tss_prot);
+-
+-#ifdef CONFIG_X86_32
+-      per_cpu(cpu_entry_area, cpu) = get_cpu_entry_area(cpu);
+-#endif
+-
+-#ifdef CONFIG_X86_64
+-      BUILD_BUG_ON(sizeof(exception_stacks) % PAGE_SIZE != 0);
+-      BUILD_BUG_ON(sizeof(exception_stacks) !=
+-                   sizeof(((struct cpu_entry_area *)0)->exception_stacks));
+-      set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, exception_stacks),
+-                              &per_cpu(exception_stacks, cpu),
+-                              sizeof(exception_stacks) / PAGE_SIZE,
+-                              PAGE_KERNEL);
+-
+-      __set_fixmap(get_cpu_entry_area_index(cpu, entry_trampoline),
+-                   __pa_symbol(_entry_trampoline), PAGE_KERNEL_RX);
+-#endif
+-}
+-
+-void __init setup_cpu_entry_areas(void)
+-{
+-      unsigned int cpu;
+-
+-      for_each_possible_cpu(cpu)
+-              setup_cpu_entry_area(cpu);
+-}
+-
+ /* Load the original GDT from the per-cpu structure */
+ void load_direct_gdt(int cpu)
+ {
+--- a/arch/x86/kernel/traps.c
++++ b/arch/x86/kernel/traps.c
+@@ -52,6 +52,7 @@
+ #include <asm/traps.h>
+ #include <asm/desc.h>
+ #include <asm/fpu/internal.h>
++#include <asm/cpu_entry_area.h>
+ #include <asm/mce.h>
+ #include <asm/fixmap.h>
+ #include <asm/mach_traps.h>
+--- a/arch/x86/mm/Makefile
++++ b/arch/x86/mm/Makefile
+@@ -10,7 +10,7 @@ CFLAGS_REMOVE_mem_encrypt.o  = -pg
+ endif
+ obj-y :=  init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \
+-          pat.o pgtable.o physaddr.o setup_nx.o tlb.o
++          pat.o pgtable.o physaddr.o setup_nx.o tlb.o cpu_entry_area.o
+ # Make sure __phys_addr has no stackprotector
+ nostackp := $(call cc-option, -fno-stack-protector)
+--- /dev/null
++++ b/arch/x86/mm/cpu_entry_area.c
+@@ -0,0 +1,104 @@
++// SPDX-License-Identifier: GPL-2.0
++
++#include <linux/spinlock.h>
++#include <linux/percpu.h>
++
++#include <asm/cpu_entry_area.h>
++#include <asm/pgtable.h>
++#include <asm/fixmap.h>
++#include <asm/desc.h>
++
++static DEFINE_PER_CPU_PAGE_ALIGNED(struct entry_stack_page, entry_stack_storage);
++
++#ifdef CONFIG_X86_64
++static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
++      [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]);
++#endif
++
++static void __init
++set_percpu_fixmap_pages(int idx, void *ptr, int pages, pgprot_t prot)
++{
++      for ( ; pages; pages--, idx--, ptr += PAGE_SIZE)
++              __set_fixmap(idx, per_cpu_ptr_to_phys(ptr), prot);
++}
++
++/* Setup the fixmap mappings only once per-processor */
++static void __init setup_cpu_entry_area(int cpu)
++{
++#ifdef CONFIG_X86_64
++      extern char _entry_trampoline[];
++
++      /* On 64-bit systems, we use a read-only fixmap GDT and TSS. */
++      pgprot_t gdt_prot = PAGE_KERNEL_RO;
++      pgprot_t tss_prot = PAGE_KERNEL_RO;
++#else
++      /*
++       * On native 32-bit systems, the GDT cannot be read-only because
++       * our double fault handler uses a task gate, and entering through
++       * a task gate needs to change an available TSS to busy.  If the
++       * GDT is read-only, that will triple fault.  The TSS cannot be
++       * read-only because the CPU writes to it on task switches.
++       *
++       * On Xen PV, the GDT must be read-only because the hypervisor
++       * requires it.
++       */
++      pgprot_t gdt_prot = boot_cpu_has(X86_FEATURE_XENPV) ?
++              PAGE_KERNEL_RO : PAGE_KERNEL;
++      pgprot_t tss_prot = PAGE_KERNEL;
++#endif
++
++      __set_fixmap(get_cpu_entry_area_index(cpu, gdt), get_cpu_gdt_paddr(cpu), gdt_prot);
++      set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, entry_stack_page),
++                              per_cpu_ptr(&entry_stack_storage, cpu), 1,
++                              PAGE_KERNEL);
++
++      /*
++       * The Intel SDM says (Volume 3, 7.2.1):
++       *
++       *  Avoid placing a page boundary in the part of the TSS that the
++       *  processor reads during a task switch (the first 104 bytes). The
++       *  processor may not correctly perform address translations if a
++       *  boundary occurs in this area. During a task switch, the processor
++       *  reads and writes into the first 104 bytes of each TSS (using
++       *  contiguous physical addresses beginning with the physical address
++       *  of the first byte of the TSS). So, after TSS access begins, if
++       *  part of the 104 bytes is not physically contiguous, the processor
++       *  will access incorrect information without generating a page-fault
++       *  exception.
++       *
++       * There are also a lot of errata involving the TSS spanning a page
++       * boundary.  Assert that we're not doing that.
++       */
++      BUILD_BUG_ON((offsetof(struct tss_struct, x86_tss) ^
++                    offsetofend(struct tss_struct, x86_tss)) & PAGE_MASK);
++      BUILD_BUG_ON(sizeof(struct tss_struct) % PAGE_SIZE != 0);
++      set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, tss),
++                              &per_cpu(cpu_tss_rw, cpu),
++                              sizeof(struct tss_struct) / PAGE_SIZE,
++                              tss_prot);
++
++#ifdef CONFIG_X86_32
++      per_cpu(cpu_entry_area, cpu) = get_cpu_entry_area(cpu);
++#endif
++
++#ifdef CONFIG_X86_64
++      BUILD_BUG_ON(sizeof(exception_stacks) % PAGE_SIZE != 0);
++      BUILD_BUG_ON(sizeof(exception_stacks) !=
++                   sizeof(((struct cpu_entry_area *)0)->exception_stacks));
++      set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, exception_stacks),
++                              &per_cpu(exception_stacks, cpu),
++                              sizeof(exception_stacks) / PAGE_SIZE,
++                              PAGE_KERNEL);
++
++      __set_fixmap(get_cpu_entry_area_index(cpu, entry_trampoline),
++                   __pa_symbol(_entry_trampoline), PAGE_KERNEL_RX);
++#endif
++}
++
++void __init setup_cpu_entry_areas(void)
++{
++      unsigned int cpu;
++
++      for_each_possible_cpu(cpu)
++              setup_cpu_entry_area(cpu);
++}
diff --git a/queue-4.14/x86-cpu_entry_area-prevent-wraparound-in-setup_cpu_entry_area_ptes-on-32bit.patch b/queue-4.14/x86-cpu_entry_area-prevent-wraparound-in-setup_cpu_entry_area_ptes-on-32bit.patch
new file mode 100644 (file)
index 0000000..8ddca47
--- /dev/null
@@ -0,0 +1,39 @@
+From f6c4fd506cb626e4346aa81688f255e593a7c5a0 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Sat, 23 Dec 2017 19:45:11 +0100
+Subject: x86/cpu_entry_area: Prevent wraparound in setup_cpu_entry_area_ptes() on 32bit
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+commit f6c4fd506cb626e4346aa81688f255e593a7c5a0 upstream.
+
+The loop which populates the CPU entry area PMDs can wrap around on 32bit
+machines when the number of CPUs is small.
+
+It worked wonderful for NR_CPUS=64 for whatever reason and the moron who
+wrote that code did not bother to test it with !SMP.
+
+Check for the wraparound to fix it.
+
+Fixes: 92a0f81d8957 ("x86/cpu_entry_area: Move it out of the fixmap")
+Reported-by: kernel test robot <fengguang.wu@intel.com>
+Signed-off-by: Thomas "Feels stupid" Gleixner <tglx@linutronix.de>
+Tested-by: Borislav Petkov <bp@alien8.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/mm/cpu_entry_area.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/mm/cpu_entry_area.c
++++ b/arch/x86/mm/cpu_entry_area.c
+@@ -122,7 +122,8 @@ static __init void setup_cpu_entry_area_
+       start = CPU_ENTRY_AREA_BASE;
+       end = start + CPU_ENTRY_AREA_MAP_SIZE;
+-      for (; start < end; start += PMD_SIZE)
++      /* Careful here: start + PMD_SIZE might wrap around */
++      for (; start < end && start >= CPU_ENTRY_AREA_BASE; start += PMD_SIZE)
+               populate_extra_pte(start);
+ #endif
+ }
diff --git a/queue-4.14/x86-doc-remove-obvious-weirdnesses-from-the-x86-mm-layout-documentation.patch b/queue-4.14/x86-doc-remove-obvious-weirdnesses-from-the-x86-mm-layout-documentation.patch
new file mode 100644 (file)
index 0000000..f908551
--- /dev/null
@@ -0,0 +1,75 @@
+From e8ffe96e5933d417195268478479933d56213a3f Mon Sep 17 00:00:00 2001
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 5 Dec 2017 13:34:54 +0100
+Subject: x86/doc: Remove obvious weirdnesses from the x86 MM layout documentation
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit e8ffe96e5933d417195268478479933d56213a3f upstream.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Eduardo Valentin <eduval@amazon.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: aliguori@amazon.com
+Cc: daniel.gruss@iaik.tugraz.at
+Cc: hughd@google.com
+Cc: keescook@google.com
+Cc: linux-mm@kvack.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ Documentation/x86/x86_64/mm.txt |   12 +++---------
+ 1 file changed, 3 insertions(+), 9 deletions(-)
+
+--- a/Documentation/x86/x86_64/mm.txt
++++ b/Documentation/x86/x86_64/mm.txt
+@@ -1,6 +1,4 @@
+-<previous description obsolete, deleted>
+-
+ Virtual memory map with 4 level page tables:
+ 0000000000000000 - 00007fffffffffff (=47 bits) user space, different per mm
+@@ -49,8 +47,9 @@ ffffffffffe00000 - ffffffffffffffff (=2
+ Architecture defines a 64-bit virtual address. Implementations can support
+ less. Currently supported are 48- and 57-bit virtual addresses. Bits 63
+-through to the most-significant implemented bit are set to either all ones
+-or all zero. This causes hole between user space and kernel addresses.
++through to the most-significant implemented bit are sign extended.
++This causes hole between user space and kernel addresses if you interpret them
++as unsigned.
+ The direct mapping covers all memory in the system up to the highest
+ memory address (this means in some cases it can also include PCI memory
+@@ -60,9 +59,6 @@ vmalloc space is lazily synchronized int
+ the processes using the page fault handler, with init_top_pgt as
+ reference.
+-Current X86-64 implementations support up to 46 bits of address space (64 TB),
+-which is our current limit. This expands into MBZ space in the page tables.
+-
+ We map EFI runtime services in the 'efi_pgd' PGD in a 64Gb large virtual
+ memory window (this size is arbitrary, it can be raised later if needed).
+ The mappings are not part of any other kernel PGD and are only available
+@@ -74,5 +70,3 @@ following fixmap section.
+ Note that if CONFIG_RANDOMIZE_MEMORY is enabled, the direct mapping of all
+ physical memory, vmalloc/ioremap space and virtual memory map are randomized.
+ Their order is preserved but their base will be offset early at boot time.
+-
+--Andi Kleen, Jul 2004
diff --git a/queue-4.14/x86-entry-rename-sysenter_stack-to-cpu_entry_area_entry_stack.patch b/queue-4.14/x86-entry-rename-sysenter_stack-to-cpu_entry_area_entry_stack.patch
new file mode 100644 (file)
index 0000000..71d3d4b
--- /dev/null
@@ -0,0 +1,316 @@
+From 4fe2d8b11a370af286287a2661de9d4e6c9a145a Mon Sep 17 00:00:00 2001
+From: Dave Hansen <dave.hansen@linux.intel.com>
+Date: Mon, 4 Dec 2017 17:25:07 -0800
+Subject: x86/entry: Rename SYSENTER_stack to CPU_ENTRY_AREA_entry_stack
+
+From: Dave Hansen <dave.hansen@linux.intel.com>
+
+commit 4fe2d8b11a370af286287a2661de9d4e6c9a145a upstream.
+
+If the kernel oopses while on the trampoline stack, it will print
+"<SYSENTER>" even if SYSENTER is not involved.  That is rather confusing.
+
+The "SYSENTER" stack is used for a lot more than SYSENTER now.  Give it a
+better string to display in stack dumps, and rename the kernel code to
+match.
+
+Also move the 32-bit code over to the new naming even though it still uses
+the entry stack only for SYSENTER.
+
+Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Borislav Petkov <bp@suse.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/entry/entry_32.S         |   12 ++++++------
+ arch/x86/entry/entry_64.S         |    4 ++--
+ arch/x86/include/asm/fixmap.h     |    8 ++++----
+ arch/x86/include/asm/processor.h  |    6 +++---
+ arch/x86/include/asm/stacktrace.h |    4 ++--
+ arch/x86/kernel/asm-offsets.c     |    4 ++--
+ arch/x86/kernel/asm-offsets_32.c  |    2 +-
+ arch/x86/kernel/cpu/common.c      |   14 +++++++-------
+ arch/x86/kernel/dumpstack.c       |   10 +++++-----
+ arch/x86/kernel/dumpstack_32.c    |    6 +++---
+ arch/x86/kernel/dumpstack_64.c    |   12 +++++++++---
+ 11 files changed, 44 insertions(+), 38 deletions(-)
+
+--- a/arch/x86/entry/entry_32.S
++++ b/arch/x86/entry/entry_32.S
+@@ -942,9 +942,9 @@ ENTRY(debug)
+       /* Are we currently on the SYSENTER stack? */
+       movl    PER_CPU_VAR(cpu_entry_area), %ecx
+-      addl    $CPU_ENTRY_AREA_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx
+-      subl    %eax, %ecx      /* ecx = (end of SYSENTER_stack) - esp */
+-      cmpl    $SIZEOF_SYSENTER_stack, %ecx
++      addl    $CPU_ENTRY_AREA_entry_stack + SIZEOF_entry_stack, %ecx
++      subl    %eax, %ecx      /* ecx = (end of entry_stack) - esp */
++      cmpl    $SIZEOF_entry_stack, %ecx
+       jb      .Ldebug_from_sysenter_stack
+       TRACE_IRQS_OFF
+@@ -986,9 +986,9 @@ ENTRY(nmi)
+       /* Are we currently on the SYSENTER stack? */
+       movl    PER_CPU_VAR(cpu_entry_area), %ecx
+-      addl    $CPU_ENTRY_AREA_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx
+-      subl    %eax, %ecx      /* ecx = (end of SYSENTER_stack) - esp */
+-      cmpl    $SIZEOF_SYSENTER_stack, %ecx
++      addl    $CPU_ENTRY_AREA_entry_stack + SIZEOF_entry_stack, %ecx
++      subl    %eax, %ecx      /* ecx = (end of entry_stack) - esp */
++      cmpl    $SIZEOF_entry_stack, %ecx
+       jb      .Lnmi_from_sysenter_stack
+       /* Not on SYSENTER stack. */
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -154,8 +154,8 @@ END(native_usergs_sysret64)
+       _entry_trampoline - CPU_ENTRY_AREA_entry_trampoline(%rip)
+ /* The top word of the SYSENTER stack is hot and is usable as scratch space. */
+-#define RSP_SCRATCH   CPU_ENTRY_AREA_SYSENTER_stack + \
+-                      SIZEOF_SYSENTER_stack - 8 + CPU_ENTRY_AREA
++#define RSP_SCRATCH   CPU_ENTRY_AREA_entry_stack + \
++                      SIZEOF_entry_stack - 8 + CPU_ENTRY_AREA
+ ENTRY(entry_SYSCALL_64_trampoline)
+       UNWIND_HINT_EMPTY
+--- a/arch/x86/include/asm/fixmap.h
++++ b/arch/x86/include/asm/fixmap.h
+@@ -56,10 +56,10 @@ struct cpu_entry_area {
+       char gdt[PAGE_SIZE];
+       /*
+-       * The GDT is just below SYSENTER_stack and thus serves (on x86_64) as
++       * The GDT is just below entry_stack and thus serves (on x86_64) as
+        * a a read-only guard page.
+        */
+-      struct SYSENTER_stack_page SYSENTER_stack_page;
++      struct entry_stack_page entry_stack_page;
+       /*
+        * On x86_64, the TSS is mapped RO.  On x86_32, it's mapped RW because
+@@ -250,9 +250,9 @@ static inline struct cpu_entry_area *get
+       return (struct cpu_entry_area *)__fix_to_virt(__get_cpu_entry_area_page_index(cpu, 0));
+ }
+-static inline struct SYSENTER_stack *cpu_SYSENTER_stack(int cpu)
++static inline struct entry_stack *cpu_entry_stack(int cpu)
+ {
+-      return &get_cpu_entry_area(cpu)->SYSENTER_stack_page.stack;
++      return &get_cpu_entry_area(cpu)->entry_stack_page.stack;
+ }
+ #endif /* !__ASSEMBLY__ */
+--- a/arch/x86/include/asm/processor.h
++++ b/arch/x86/include/asm/processor.h
+@@ -336,12 +336,12 @@ struct x86_hw_tss {
+ #define IO_BITMAP_OFFSET              (offsetof(struct tss_struct, io_bitmap) - offsetof(struct tss_struct, x86_tss))
+ #define INVALID_IO_BITMAP_OFFSET      0x8000
+-struct SYSENTER_stack {
++struct entry_stack {
+       unsigned long           words[64];
+ };
+-struct SYSENTER_stack_page {
+-      struct SYSENTER_stack stack;
++struct entry_stack_page {
++      struct entry_stack stack;
+ } __aligned(PAGE_SIZE);
+ struct tss_struct {
+--- a/arch/x86/include/asm/stacktrace.h
++++ b/arch/x86/include/asm/stacktrace.h
+@@ -16,7 +16,7 @@ enum stack_type {
+       STACK_TYPE_TASK,
+       STACK_TYPE_IRQ,
+       STACK_TYPE_SOFTIRQ,
+-      STACK_TYPE_SYSENTER,
++      STACK_TYPE_ENTRY,
+       STACK_TYPE_EXCEPTION,
+       STACK_TYPE_EXCEPTION_LAST = STACK_TYPE_EXCEPTION + N_EXCEPTION_STACKS-1,
+ };
+@@ -29,7 +29,7 @@ struct stack_info {
+ bool in_task_stack(unsigned long *stack, struct task_struct *task,
+                  struct stack_info *info);
+-bool in_sysenter_stack(unsigned long *stack, struct stack_info *info);
++bool in_entry_stack(unsigned long *stack, struct stack_info *info);
+ int get_stack_info(unsigned long *stack, struct task_struct *task,
+                  struct stack_info *info, unsigned long *visit_mask);
+--- a/arch/x86/kernel/asm-offsets.c
++++ b/arch/x86/kernel/asm-offsets.c
+@@ -97,6 +97,6 @@ void common(void) {
+       /* Layout info for cpu_entry_area */
+       OFFSET(CPU_ENTRY_AREA_tss, cpu_entry_area, tss);
+       OFFSET(CPU_ENTRY_AREA_entry_trampoline, cpu_entry_area, entry_trampoline);
+-      OFFSET(CPU_ENTRY_AREA_SYSENTER_stack, cpu_entry_area, SYSENTER_stack_page);
+-      DEFINE(SIZEOF_SYSENTER_stack, sizeof(struct SYSENTER_stack));
++      OFFSET(CPU_ENTRY_AREA_entry_stack, cpu_entry_area, entry_stack_page);
++      DEFINE(SIZEOF_entry_stack, sizeof(struct entry_stack));
+ }
+--- a/arch/x86/kernel/asm-offsets_32.c
++++ b/arch/x86/kernel/asm-offsets_32.c
+@@ -48,7 +48,7 @@ void foo(void)
+       /* Offset from the sysenter stack to tss.sp0 */
+       DEFINE(TSS_sysenter_sp0, offsetof(struct cpu_entry_area, tss.x86_tss.sp0) -
+-             offsetofend(struct cpu_entry_area, SYSENTER_stack_page.stack));
++             offsetofend(struct cpu_entry_area, entry_stack_page.stack));
+ #ifdef CONFIG_CC_STACKPROTECTOR
+       BLANK();
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -487,8 +487,8 @@ static DEFINE_PER_CPU_PAGE_ALIGNED(char,
+       [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]);
+ #endif
+-static DEFINE_PER_CPU_PAGE_ALIGNED(struct SYSENTER_stack_page,
+-                                 SYSENTER_stack_storage);
++static DEFINE_PER_CPU_PAGE_ALIGNED(struct entry_stack_page,
++                                 entry_stack_storage);
+ static void __init
+ set_percpu_fixmap_pages(int idx, void *ptr, int pages, pgprot_t prot)
+@@ -523,8 +523,8 @@ static void __init setup_cpu_entry_area(
+ #endif
+       __set_fixmap(get_cpu_entry_area_index(cpu, gdt), get_cpu_gdt_paddr(cpu), gdt_prot);
+-      set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, SYSENTER_stack_page),
+-                              per_cpu_ptr(&SYSENTER_stack_storage, cpu), 1,
++      set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, entry_stack_page),
++                              per_cpu_ptr(&entry_stack_storage, cpu), 1,
+                               PAGE_KERNEL);
+       /*
+@@ -1323,7 +1323,7 @@ void enable_sep_cpu(void)
+       tss->x86_tss.ss1 = __KERNEL_CS;
+       wrmsr(MSR_IA32_SYSENTER_CS, tss->x86_tss.ss1, 0);
+-      wrmsr(MSR_IA32_SYSENTER_ESP, (unsigned long)(cpu_SYSENTER_stack(cpu) + 1), 0);
++      wrmsr(MSR_IA32_SYSENTER_ESP, (unsigned long)(cpu_entry_stack(cpu) + 1), 0);
+       wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long)entry_SYSENTER_32, 0);
+       put_cpu();
+@@ -1440,7 +1440,7 @@ void syscall_init(void)
+        * AMD doesn't allow SYSENTER in long mode (either 32- or 64-bit).
+        */
+       wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS);
+-      wrmsrl_safe(MSR_IA32_SYSENTER_ESP, (unsigned long)(cpu_SYSENTER_stack(cpu) + 1));
++      wrmsrl_safe(MSR_IA32_SYSENTER_ESP, (unsigned long)(cpu_entry_stack(cpu) + 1));
+       wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)entry_SYSENTER_compat);
+ #else
+       wrmsrl(MSR_CSTAR, (unsigned long)ignore_sysret);
+@@ -1655,7 +1655,7 @@ void cpu_init(void)
+        */
+       set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss);
+       load_TR_desc();
+-      load_sp0((unsigned long)(cpu_SYSENTER_stack(cpu) + 1));
++      load_sp0((unsigned long)(cpu_entry_stack(cpu) + 1));
+       load_mm_ldt(&init_mm);
+--- a/arch/x86/kernel/dumpstack.c
++++ b/arch/x86/kernel/dumpstack.c
+@@ -43,9 +43,9 @@ bool in_task_stack(unsigned long *stack,
+       return true;
+ }
+-bool in_sysenter_stack(unsigned long *stack, struct stack_info *info)
++bool in_entry_stack(unsigned long *stack, struct stack_info *info)
+ {
+-      struct SYSENTER_stack *ss = cpu_SYSENTER_stack(smp_processor_id());
++      struct entry_stack *ss = cpu_entry_stack(smp_processor_id());
+       void *begin = ss;
+       void *end = ss + 1;
+@@ -53,7 +53,7 @@ bool in_sysenter_stack(unsigned long *st
+       if ((void *)stack < begin || (void *)stack >= end)
+               return false;
+-      info->type      = STACK_TYPE_SYSENTER;
++      info->type      = STACK_TYPE_ENTRY;
+       info->begin     = begin;
+       info->end       = end;
+       info->next_sp   = NULL;
+@@ -111,13 +111,13 @@ void show_trace_log_lvl(struct task_stru
+        * - task stack
+        * - interrupt stack
+        * - HW exception stacks (double fault, nmi, debug, mce)
+-       * - SYSENTER stack
++       * - entry stack
+        *
+        * x86-32 can have up to four stacks:
+        * - task stack
+        * - softirq stack
+        * - hardirq stack
+-       * - SYSENTER stack
++       * - entry stack
+        */
+       for (regs = NULL; stack; stack = PTR_ALIGN(stack_info.next_sp, sizeof(long))) {
+               const char *stack_name;
+--- a/arch/x86/kernel/dumpstack_32.c
++++ b/arch/x86/kernel/dumpstack_32.c
+@@ -26,8 +26,8 @@ const char *stack_type_name(enum stack_t
+       if (type == STACK_TYPE_SOFTIRQ)
+               return "SOFTIRQ";
+-      if (type == STACK_TYPE_SYSENTER)
+-              return "SYSENTER";
++      if (type == STACK_TYPE_ENTRY)
++              return "ENTRY_TRAMPOLINE";
+       return NULL;
+ }
+@@ -96,7 +96,7 @@ int get_stack_info(unsigned long *stack,
+       if (task != current)
+               goto unknown;
+-      if (in_sysenter_stack(stack, info))
++      if (in_entry_stack(stack, info))
+               goto recursion_check;
+       if (in_hardirq_stack(stack, info))
+--- a/arch/x86/kernel/dumpstack_64.c
++++ b/arch/x86/kernel/dumpstack_64.c
+@@ -37,8 +37,14 @@ const char *stack_type_name(enum stack_t
+       if (type == STACK_TYPE_IRQ)
+               return "IRQ";
+-      if (type == STACK_TYPE_SYSENTER)
+-              return "SYSENTER";
++      if (type == STACK_TYPE_ENTRY) {
++              /*
++               * On 64-bit, we have a generic entry stack that we
++               * use for all the kernel entry points, including
++               * SYSENTER.
++               */
++              return "ENTRY_TRAMPOLINE";
++      }
+       if (type >= STACK_TYPE_EXCEPTION && type <= STACK_TYPE_EXCEPTION_LAST)
+               return exception_stack_names[type - STACK_TYPE_EXCEPTION];
+@@ -118,7 +124,7 @@ int get_stack_info(unsigned long *stack,
+       if (in_irq_stack(stack, info))
+               goto recursion_check;
+-      if (in_sysenter_stack(stack, info))
++      if (in_entry_stack(stack, info))
+               goto recursion_check;
+       goto unknown;
diff --git a/queue-4.14/x86-kconfig-limit-nr_cpus-on-32-bit-to-a-sane-amount.patch b/queue-4.14/x86-kconfig-limit-nr_cpus-on-32-bit-to-a-sane-amount.patch
new file mode 100644 (file)
index 0000000..4440b30
--- /dev/null
@@ -0,0 +1,47 @@
+From 7bbcbd3d1cdcbacd0f9f8dc4c98d550972f1ca30 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Wed, 20 Dec 2017 18:02:34 +0100
+Subject: x86/Kconfig: Limit NR_CPUS on 32-bit to a sane amount
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+commit 7bbcbd3d1cdcbacd0f9f8dc4c98d550972f1ca30 upstream.
+
+The recent cpu_entry_area changes fail to compile on 32-bit when BIGSMP=y
+and NR_CPUS=512, because the fixmap area becomes too big.
+
+Limit the number of CPUs with BIGSMP to 64, which is already way to big for
+32-bit, but it's at least a working limitation.
+
+We performed a quick survey of 32-bit-only machines that might be affected
+by this change negatively, but found none.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: linux-kernel@vger.kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/Kconfig |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/Kconfig
++++ b/arch/x86/Kconfig
+@@ -925,7 +925,8 @@ config MAXSMP
+ config NR_CPUS
+       int "Maximum number of CPUs" if SMP && !MAXSMP
+       range 2 8 if SMP && X86_32 && !X86_BIGSMP
+-      range 2 512 if SMP && !MAXSMP && !CPUMASK_OFFSTACK
++      range 2 64 if SMP && X86_32 && X86_BIGSMP
++      range 2 512 if SMP && !MAXSMP && !CPUMASK_OFFSTACK && X86_64
+       range 2 8192 if SMP && !MAXSMP && CPUMASK_OFFSTACK && X86_64
+       default "1" if !SMP
+       default "8192" if MAXSMP
diff --git a/queue-4.14/x86-ldt-prevent-ldt-inheritance-on-exec.patch b/queue-4.14/x86-ldt-prevent-ldt-inheritance-on-exec.patch
new file mode 100644 (file)
index 0000000..f25c638
--- /dev/null
@@ -0,0 +1,164 @@
+From a4828f81037f491b2cc986595e3a969a6eeb2fb5 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Thu, 14 Dec 2017 12:27:31 +0100
+Subject: x86/ldt: Prevent LDT inheritance on exec
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+commit a4828f81037f491b2cc986595e3a969a6eeb2fb5 upstream.
+
+The LDT is inherited across fork() or exec(), but that makes no sense
+at all because exec() is supposed to start the process clean.
+
+The reason why this happens is that init_new_context_ldt() is called from
+init_new_context() which obviously needs to be called for both fork() and
+exec().
+
+It would be surprising if anything relies on that behaviour, so it seems to
+be safe to remove that misfeature.
+
+Split the context initialization into two parts. Clear the LDT pointer and
+initialize the mutex from the general context init and move the LDT
+duplication to arch_dup_mmap() which is only called on fork().
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Peter Zijlstra <peterz@infradead.org>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Andy Lutomirsky <luto@kernel.org>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Borislav Petkov <bpetkov@suse.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Eduardo Valentin <eduval@amazon.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: aliguori@amazon.com
+Cc: dan.j.williams@intel.com
+Cc: hughd@google.com
+Cc: keescook@google.com
+Cc: kirill.shutemov@linux.intel.com
+Cc: linux-mm@kvack.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/include/asm/mmu_context.h    |   21 ++++++++++++++-------
+ arch/x86/kernel/ldt.c                 |   18 +++++-------------
+ tools/testing/selftests/x86/ldt_gdt.c |    9 +++------
+ 3 files changed, 22 insertions(+), 26 deletions(-)
+
+--- a/arch/x86/include/asm/mmu_context.h
++++ b/arch/x86/include/asm/mmu_context.h
+@@ -57,11 +57,17 @@ struct ldt_struct {
+ /*
+  * Used for LDT copy/destruction.
+  */
+-int init_new_context_ldt(struct task_struct *tsk, struct mm_struct *mm);
++static inline void init_new_context_ldt(struct mm_struct *mm)
++{
++      mm->context.ldt = NULL;
++      init_rwsem(&mm->context.ldt_usr_sem);
++}
++int ldt_dup_context(struct mm_struct *oldmm, struct mm_struct *mm);
+ void destroy_context_ldt(struct mm_struct *mm);
+ #else /* CONFIG_MODIFY_LDT_SYSCALL */
+-static inline int init_new_context_ldt(struct task_struct *tsk,
+-                                     struct mm_struct *mm)
++static inline void init_new_context_ldt(struct mm_struct *mm) { }
++static inline int ldt_dup_context(struct mm_struct *oldmm,
++                                struct mm_struct *mm)
+ {
+       return 0;
+ }
+@@ -137,15 +143,16 @@ static inline int init_new_context(struc
+       mm->context.ctx_id = atomic64_inc_return(&last_mm_ctx_id);
+       atomic64_set(&mm->context.tlb_gen, 0);
+-      #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
++#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
+       if (cpu_feature_enabled(X86_FEATURE_OSPKE)) {
+               /* pkey 0 is the default and always allocated */
+               mm->context.pkey_allocation_map = 0x1;
+               /* -1 means unallocated or invalid */
+               mm->context.execute_only_pkey = -1;
+       }
+-      #endif
+-      return init_new_context_ldt(tsk, mm);
++#endif
++      init_new_context_ldt(mm);
++      return 0;
+ }
+ static inline void destroy_context(struct mm_struct *mm)
+ {
+@@ -181,7 +188,7 @@ do {                                               \
+ static inline int arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
+ {
+       paravirt_arch_dup_mmap(oldmm, mm);
+-      return 0;
++      return ldt_dup_context(oldmm, mm);
+ }
+ static inline void arch_exit_mmap(struct mm_struct *mm)
+--- a/arch/x86/kernel/ldt.c
++++ b/arch/x86/kernel/ldt.c
+@@ -131,28 +131,20 @@ static void free_ldt_struct(struct ldt_s
+ }
+ /*
+- * we do not have to muck with descriptors here, that is
+- * done in switch_mm() as needed.
++ * Called on fork from arch_dup_mmap(). Just copy the current LDT state,
++ * the new task is not running, so nothing can be installed.
+  */
+-int init_new_context_ldt(struct task_struct *tsk, struct mm_struct *mm)
++int ldt_dup_context(struct mm_struct *old_mm, struct mm_struct *mm)
+ {
+       struct ldt_struct *new_ldt;
+-      struct mm_struct *old_mm;
+       int retval = 0;
+-      init_rwsem(&mm->context.ldt_usr_sem);
+-
+-      old_mm = current->mm;
+-      if (!old_mm) {
+-              mm->context.ldt = NULL;
++      if (!old_mm)
+               return 0;
+-      }
+       mutex_lock(&old_mm->context.lock);
+-      if (!old_mm->context.ldt) {
+-              mm->context.ldt = NULL;
++      if (!old_mm->context.ldt)
+               goto out_unlock;
+-      }
+       new_ldt = alloc_ldt_struct(old_mm->context.ldt->nr_entries);
+       if (!new_ldt) {
+--- a/tools/testing/selftests/x86/ldt_gdt.c
++++ b/tools/testing/selftests/x86/ldt_gdt.c
+@@ -627,13 +627,10 @@ static void do_multicpu_tests(void)
+ static int finish_exec_test(void)
+ {
+       /*
+-       * In a sensible world, this would be check_invalid_segment(0, 1);
+-       * For better or for worse, though, the LDT is inherited across exec.
+-       * We can probably change this safely, but for now we test it.
++       * Older kernel versions did inherit the LDT on exec() which is
++       * wrong because exec() starts from a clean state.
+        */
+-      check_valid_segment(0, 1,
+-                          AR_DPL3 | AR_TYPE_XRCODE | AR_S | AR_P | AR_DB,
+-                          42, true);
++      check_invalid_segment(0, 1);
+       return nerrs ? 1 : 0;
+ }
diff --git a/queue-4.14/x86-ldt-rework-locking.patch b/queue-4.14/x86-ldt-rework-locking.patch
new file mode 100644 (file)
index 0000000..03dfa79
--- /dev/null
@@ -0,0 +1,186 @@
+From c2b3496bb30bd159e9de42e5c952e1f1f33c9a77 Mon Sep 17 00:00:00 2001
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Thu, 14 Dec 2017 12:27:30 +0100
+Subject: x86/ldt: Rework locking
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit c2b3496bb30bd159e9de42e5c952e1f1f33c9a77 upstream.
+
+The LDT is duplicated on fork() and on exec(), which is wrong as exec()
+should start from a clean state, i.e. without LDT. To fix this the LDT
+duplication code will be moved into arch_dup_mmap() which is only called
+for fork().
+
+This introduces a locking problem. arch_dup_mmap() holds mmap_sem of the
+parent process, but the LDT duplication code needs to acquire
+mm->context.lock to access the LDT data safely, which is the reverse lock
+order of write_ldt() where mmap_sem nests into context.lock.
+
+Solve this by introducing a new rw semaphore which serializes the
+read/write_ldt() syscall operations and use context.lock to protect the
+actual installment of the LDT descriptor.
+
+So context.lock stabilizes mm->context.ldt and can nest inside of the new
+semaphore or mmap_sem.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Andy Lutomirsky <luto@kernel.org>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Borislav Petkov <bpetkov@suse.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Eduardo Valentin <eduval@amazon.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: aliguori@amazon.com
+Cc: dan.j.williams@intel.com
+Cc: hughd@google.com
+Cc: keescook@google.com
+Cc: kirill.shutemov@linux.intel.com
+Cc: linux-mm@kvack.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/include/asm/mmu.h         |    4 +++-
+ arch/x86/include/asm/mmu_context.h |    2 ++
+ arch/x86/kernel/ldt.c              |   33 +++++++++++++++++++++------------
+ 3 files changed, 26 insertions(+), 13 deletions(-)
+
+--- a/arch/x86/include/asm/mmu.h
++++ b/arch/x86/include/asm/mmu.h
+@@ -3,6 +3,7 @@
+ #define _ASM_X86_MMU_H
+ #include <linux/spinlock.h>
++#include <linux/rwsem.h>
+ #include <linux/mutex.h>
+ #include <linux/atomic.h>
+@@ -27,7 +28,8 @@ typedef struct {
+       atomic64_t tlb_gen;
+ #ifdef CONFIG_MODIFY_LDT_SYSCALL
+-      struct ldt_struct *ldt;
++      struct rw_semaphore     ldt_usr_sem;
++      struct ldt_struct       *ldt;
+ #endif
+ #ifdef CONFIG_X86_64
+--- a/arch/x86/include/asm/mmu_context.h
++++ b/arch/x86/include/asm/mmu_context.h
+@@ -132,6 +132,8 @@ void enter_lazy_tlb(struct mm_struct *mm
+ static inline int init_new_context(struct task_struct *tsk,
+                                  struct mm_struct *mm)
+ {
++      mutex_init(&mm->context.lock);
++
+       mm->context.ctx_id = atomic64_inc_return(&last_mm_ctx_id);
+       atomic64_set(&mm->context.tlb_gen, 0);
+--- a/arch/x86/kernel/ldt.c
++++ b/arch/x86/kernel/ldt.c
+@@ -5,6 +5,11 @@
+  * Copyright (C) 2002 Andi Kleen
+  *
+  * This handles calls from both 32bit and 64bit mode.
++ *
++ * Lock order:
++ *    contex.ldt_usr_sem
++ *      mmap_sem
++ *        context.lock
+  */
+ #include <linux/errno.h>
+@@ -42,7 +47,7 @@ static void refresh_ldt_segments(void)
+ #endif
+ }
+-/* context.lock is held for us, so we don't need any locking. */
++/* context.lock is held by the task which issued the smp function call */
+ static void flush_ldt(void *__mm)
+ {
+       struct mm_struct *mm = __mm;
+@@ -99,15 +104,17 @@ static void finalize_ldt_struct(struct l
+       paravirt_alloc_ldt(ldt->entries, ldt->nr_entries);
+ }
+-/* context.lock is held */
+-static void install_ldt(struct mm_struct *current_mm,
+-                      struct ldt_struct *ldt)
++static void install_ldt(struct mm_struct *mm, struct ldt_struct *ldt)
+ {
++      mutex_lock(&mm->context.lock);
++
+       /* Synchronizes with READ_ONCE in load_mm_ldt. */
+-      smp_store_release(&current_mm->context.ldt, ldt);
++      smp_store_release(&mm->context.ldt, ldt);
+-      /* Activate the LDT for all CPUs using current_mm. */
+-      on_each_cpu_mask(mm_cpumask(current_mm), flush_ldt, current_mm, true);
++      /* Activate the LDT for all CPUs using currents mm. */
++      on_each_cpu_mask(mm_cpumask(mm), flush_ldt, mm, true);
++
++      mutex_unlock(&mm->context.lock);
+ }
+ static void free_ldt_struct(struct ldt_struct *ldt)
+@@ -133,7 +140,8 @@ int init_new_context_ldt(struct task_str
+       struct mm_struct *old_mm;
+       int retval = 0;
+-      mutex_init(&mm->context.lock);
++      init_rwsem(&mm->context.ldt_usr_sem);
++
+       old_mm = current->mm;
+       if (!old_mm) {
+               mm->context.ldt = NULL;
+@@ -180,7 +188,7 @@ static int read_ldt(void __user *ptr, un
+       unsigned long entries_size;
+       int retval;
+-      mutex_lock(&mm->context.lock);
++      down_read(&mm->context.ldt_usr_sem);
+       if (!mm->context.ldt) {
+               retval = 0;
+@@ -209,7 +217,7 @@ static int read_ldt(void __user *ptr, un
+       retval = bytecount;
+ out_unlock:
+-      mutex_unlock(&mm->context.lock);
++      up_read(&mm->context.ldt_usr_sem);
+       return retval;
+ }
+@@ -269,7 +277,8 @@ static int write_ldt(void __user *ptr, u
+                       ldt.avl = 0;
+       }
+-      mutex_lock(&mm->context.lock);
++      if (down_write_killable(&mm->context.ldt_usr_sem))
++              return -EINTR;
+       old_ldt       = mm->context.ldt;
+       old_nr_entries = old_ldt ? old_ldt->nr_entries : 0;
+@@ -291,7 +300,7 @@ static int write_ldt(void __user *ptr, u
+       error = 0;
+ out_unlock:
+-      mutex_unlock(&mm->context.lock);
++      up_write(&mm->context.ldt_usr_sem);
+ out:
+       return error;
+ }
diff --git a/queue-4.14/x86-microcode-dont-abuse-the-tlb-flush-interface.patch b/queue-4.14/x86-microcode-dont-abuse-the-tlb-flush-interface.patch
new file mode 100644 (file)
index 0000000..523807c
--- /dev/null
@@ -0,0 +1,115 @@
+From 23cb7d46f371844c004784ad9552a57446f73e5a Mon Sep 17 00:00:00 2001
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 5 Dec 2017 13:34:51 +0100
+Subject: x86/microcode: Dont abuse the TLB-flush interface
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 23cb7d46f371844c004784ad9552a57446f73e5a upstream.
+
+Commit:
+
+  ec400ddeff20 ("x86/microcode_intel_early.c: Early update ucode on Intel's CPU")
+
+... grubbed into tlbflush internals without coherent explanation.
+
+Since it says its a precaution and the SDM doesn't mention anything like
+this, take it out back.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Eduardo Valentin <eduval@amazon.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: aliguori@amazon.com
+Cc: daniel.gruss@iaik.tugraz.at
+Cc: fenghua.yu@intel.com
+Cc: hughd@google.com
+Cc: keescook@google.com
+Cc: linux-mm@kvack.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/include/asm/tlbflush.h       |   19 ++++++-------------
+ arch/x86/kernel/cpu/microcode/intel.c |   13 -------------
+ 2 files changed, 6 insertions(+), 26 deletions(-)
+
+--- a/arch/x86/include/asm/tlbflush.h
++++ b/arch/x86/include/asm/tlbflush.h
+@@ -246,20 +246,9 @@ static inline void __native_flush_tlb(vo
+       preempt_enable();
+ }
+-static inline void __native_flush_tlb_global_irq_disabled(void)
+-{
+-      unsigned long cr4;
+-
+-      cr4 = this_cpu_read(cpu_tlbstate.cr4);
+-      /* clear PGE */
+-      native_write_cr4(cr4 & ~X86_CR4_PGE);
+-      /* write old PGE again and flush TLBs */
+-      native_write_cr4(cr4);
+-}
+-
+ static inline void __native_flush_tlb_global(void)
+ {
+-      unsigned long flags;
++      unsigned long cr4, flags;
+       if (static_cpu_has(X86_FEATURE_INVPCID)) {
+               /*
+@@ -277,7 +266,11 @@ static inline void __native_flush_tlb_gl
+        */
+       raw_local_irq_save(flags);
+-      __native_flush_tlb_global_irq_disabled();
++      cr4 = this_cpu_read(cpu_tlbstate.cr4);
++      /* toggle PGE */
++      native_write_cr4(cr4 ^ X86_CR4_PGE);
++      /* write old PGE again and flush TLBs */
++      native_write_cr4(cr4);
+       raw_local_irq_restore(flags);
+ }
+--- a/arch/x86/kernel/cpu/microcode/intel.c
++++ b/arch/x86/kernel/cpu/microcode/intel.c
+@@ -565,15 +565,6 @@ static void print_ucode(struct ucode_cpu
+ }
+ #else
+-/*
+- * Flush global tlb. We only do this in x86_64 where paging has been enabled
+- * already and PGE should be enabled as well.
+- */
+-static inline void flush_tlb_early(void)
+-{
+-      __native_flush_tlb_global_irq_disabled();
+-}
+-
+ static inline void print_ucode(struct ucode_cpu_info *uci)
+ {
+       struct microcode_intel *mc;
+@@ -602,10 +593,6 @@ static int apply_microcode_early(struct
+       if (rev != mc->hdr.rev)
+               return -1;
+-#ifdef CONFIG_X86_64
+-      /* Flush global tlb. This is precaution. */
+-      flush_tlb_early();
+-#endif
+       uci->cpu_sig.rev = rev;
+       if (early)
diff --git a/queue-4.14/x86-mm-64-improve-the-memory-map-documentation.patch b/queue-4.14/x86-mm-64-improve-the-memory-map-documentation.patch
new file mode 100644 (file)
index 0000000..c4fbdb3
--- /dev/null
@@ -0,0 +1,61 @@
+From 5a7ccf4754fb3660569a6de52ba7f7fc3dfaf280 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Tue, 12 Dec 2017 07:56:43 -0800
+Subject: x86/mm/64: Improve the memory map documentation
+
+From: Andy Lutomirski <luto@kernel.org>
+
+commit 5a7ccf4754fb3660569a6de52ba7f7fc3dfaf280 upstream.
+
+The old docs had the vsyscall range wrong and were missing the fixmap.
+Fix both.
+
+There used to be 8 MB reserved for future vsyscalls, but that's long gone.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Kees Cook <keescook@chromium.org>
+Cc: Kirill A. Shutemov <kirill@shutemov.name>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ Documentation/x86/x86_64/mm.txt |   10 ++++++----
+ 1 file changed, 6 insertions(+), 4 deletions(-)
+
+--- a/Documentation/x86/x86_64/mm.txt
++++ b/Documentation/x86/x86_64/mm.txt
+@@ -19,8 +19,9 @@ ffffff0000000000 - ffffff7fffffffff (=39
+ ffffffef00000000 - fffffffeffffffff (=64 GB) EFI region mapping space
+ ... unused hole ...
+ ffffffff80000000 - ffffffff9fffffff (=512 MB)  kernel text mapping, from phys 0
+-ffffffffa0000000 - ffffffffff5fffff (=1526 MB) module mapping space (variable)
+-ffffffffff600000 - ffffffffffdfffff (=8 MB) vsyscalls
++ffffffffa0000000 - [fixmap start]   (~1526 MB) module mapping space (variable)
++[fixmap start]   - ffffffffff5fffff kernel-internal fixmap range
++ffffffffff600000 - ffffffffff600fff (=4 kB) legacy vsyscall ABI
+ ffffffffffe00000 - ffffffffffffffff (=2 MB) unused hole
+ Virtual memory map with 5 level page tables:
+@@ -41,8 +42,9 @@ ffffff0000000000 - ffffff7fffffffff (=39
+ ffffffef00000000 - fffffffeffffffff (=64 GB) EFI region mapping space
+ ... unused hole ...
+ ffffffff80000000 - ffffffff9fffffff (=512 MB)  kernel text mapping, from phys 0
+-ffffffffa0000000 - ffffffffff5fffff (=1526 MB) module mapping space
+-ffffffffff600000 - ffffffffffdfffff (=8 MB) vsyscalls
++ffffffffa0000000 - [fixmap start]   (~1526 MB) module mapping space
++[fixmap start]   - ffffffffff5fffff kernel-internal fixmap range
++ffffffffff600000 - ffffffffff600fff (=4 kB) legacy vsyscall ABI
+ ffffffffffe00000 - ffffffffffffffff (=2 MB) unused hole
+ Architecture defines a 64-bit virtual address. Implementations can support
diff --git a/queue-4.14/x86-mm-add-comments-to-clarify-which-tlb-flush-functions-are-supposed-to-flush-what.patch b/queue-4.14/x86-mm-add-comments-to-clarify-which-tlb-flush-functions-are-supposed-to-flush-what.patch
new file mode 100644 (file)
index 0000000..ebe3f36
--- /dev/null
@@ -0,0 +1,102 @@
+From 3f67af51e56f291d7417d77c4f67cd774633c5e1 Mon Sep 17 00:00:00 2001
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 5 Dec 2017 13:34:52 +0100
+Subject: x86/mm: Add comments to clarify which TLB-flush functions are supposed to flush what
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 3f67af51e56f291d7417d77c4f67cd774633c5e1 upstream.
+
+Per popular request..
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Eduardo Valentin <eduval@amazon.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: aliguori@amazon.com
+Cc: daniel.gruss@iaik.tugraz.at
+Cc: hughd@google.com
+Cc: keescook@google.com
+Cc: linux-mm@kvack.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/include/asm/tlbflush.h |   23 +++++++++++++++++++++--
+ 1 file changed, 21 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/include/asm/tlbflush.h
++++ b/arch/x86/include/asm/tlbflush.h
+@@ -228,6 +228,9 @@ static inline void cr4_set_bits_and_upda
+ extern void initialize_tlbstate_and_flush(void);
++/*
++ * flush the entire current user mapping
++ */
+ static inline void __native_flush_tlb(void)
+ {
+       /*
+@@ -240,6 +243,9 @@ static inline void __native_flush_tlb(vo
+       preempt_enable();
+ }
++/*
++ * flush everything
++ */
+ static inline void __native_flush_tlb_global(void)
+ {
+       unsigned long cr4, flags;
+@@ -269,17 +275,27 @@ static inline void __native_flush_tlb_gl
+       raw_local_irq_restore(flags);
+ }
++/*
++ * flush one page in the user mapping
++ */
+ static inline void __native_flush_tlb_single(unsigned long addr)
+ {
+       asm volatile("invlpg (%0)" ::"r" (addr) : "memory");
+ }
++/*
++ * flush everything
++ */
+ static inline void __flush_tlb_all(void)
+ {
+-      if (boot_cpu_has(X86_FEATURE_PGE))
++      if (boot_cpu_has(X86_FEATURE_PGE)) {
+               __flush_tlb_global();
+-      else
++      } else {
++              /*
++               * !PGE -> !PCID (setup_pcid()), thus every flush is total.
++               */
+               __flush_tlb();
++      }
+       /*
+        * Note: if we somehow had PCID but not PGE, then this wouldn't work --
+@@ -290,6 +306,9 @@ static inline void __flush_tlb_all(void)
+        */
+ }
++/*
++ * flush one page in the kernel mapping
++ */
+ static inline void __flush_tlb_one(unsigned long addr)
+ {
+       count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ONE);
diff --git a/queue-4.14/x86-mm-create-asm-invpcid.h.patch b/queue-4.14/x86-mm-create-asm-invpcid.h.patch
new file mode 100644 (file)
index 0000000..6d90e97
--- /dev/null
@@ -0,0 +1,155 @@
+From 1a3b0caeb77edeac5ce5fa05e6a61c474c9a9745 Mon Sep 17 00:00:00 2001
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 5 Dec 2017 13:34:47 +0100
+Subject: x86/mm: Create asm/invpcid.h
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 1a3b0caeb77edeac5ce5fa05e6a61c474c9a9745 upstream.
+
+Unclutter tlbflush.h a little.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Eduardo Valentin <eduval@amazon.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: aliguori@amazon.com
+Cc: daniel.gruss@iaik.tugraz.at
+Cc: hughd@google.com
+Cc: keescook@google.com
+Cc: linux-mm@kvack.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/include/asm/invpcid.h  |   53 ++++++++++++++++++++++++++++++++++++++++
+ arch/x86/include/asm/tlbflush.h |   49 ------------------------------------
+ 2 files changed, 54 insertions(+), 48 deletions(-)
+
+--- /dev/null
++++ b/arch/x86/include/asm/invpcid.h
+@@ -0,0 +1,53 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++#ifndef _ASM_X86_INVPCID
++#define _ASM_X86_INVPCID
++
++static inline void __invpcid(unsigned long pcid, unsigned long addr,
++                           unsigned long type)
++{
++      struct { u64 d[2]; } desc = { { pcid, addr } };
++
++      /*
++       * The memory clobber is because the whole point is to invalidate
++       * stale TLB entries and, especially if we're flushing global
++       * mappings, we don't want the compiler to reorder any subsequent
++       * memory accesses before the TLB flush.
++       *
++       * The hex opcode is invpcid (%ecx), %eax in 32-bit mode and
++       * invpcid (%rcx), %rax in long mode.
++       */
++      asm volatile (".byte 0x66, 0x0f, 0x38, 0x82, 0x01"
++                    : : "m" (desc), "a" (type), "c" (&desc) : "memory");
++}
++
++#define INVPCID_TYPE_INDIV_ADDR               0
++#define INVPCID_TYPE_SINGLE_CTXT      1
++#define INVPCID_TYPE_ALL_INCL_GLOBAL  2
++#define INVPCID_TYPE_ALL_NON_GLOBAL   3
++
++/* Flush all mappings for a given pcid and addr, not including globals. */
++static inline void invpcid_flush_one(unsigned long pcid,
++                                   unsigned long addr)
++{
++      __invpcid(pcid, addr, INVPCID_TYPE_INDIV_ADDR);
++}
++
++/* Flush all mappings for a given PCID, not including globals. */
++static inline void invpcid_flush_single_context(unsigned long pcid)
++{
++      __invpcid(pcid, 0, INVPCID_TYPE_SINGLE_CTXT);
++}
++
++/* Flush all mappings, including globals, for all PCIDs. */
++static inline void invpcid_flush_all(void)
++{
++      __invpcid(0, 0, INVPCID_TYPE_ALL_INCL_GLOBAL);
++}
++
++/* Flush all mappings for all PCIDs except globals. */
++static inline void invpcid_flush_all_nonglobals(void)
++{
++      __invpcid(0, 0, INVPCID_TYPE_ALL_NON_GLOBAL);
++}
++
++#endif /* _ASM_X86_INVPCID */
+--- a/arch/x86/include/asm/tlbflush.h
++++ b/arch/x86/include/asm/tlbflush.h
+@@ -9,54 +9,7 @@
+ #include <asm/cpufeature.h>
+ #include <asm/special_insns.h>
+ #include <asm/smp.h>
+-
+-static inline void __invpcid(unsigned long pcid, unsigned long addr,
+-                           unsigned long type)
+-{
+-      struct { u64 d[2]; } desc = { { pcid, addr } };
+-
+-      /*
+-       * The memory clobber is because the whole point is to invalidate
+-       * stale TLB entries and, especially if we're flushing global
+-       * mappings, we don't want the compiler to reorder any subsequent
+-       * memory accesses before the TLB flush.
+-       *
+-       * The hex opcode is invpcid (%ecx), %eax in 32-bit mode and
+-       * invpcid (%rcx), %rax in long mode.
+-       */
+-      asm volatile (".byte 0x66, 0x0f, 0x38, 0x82, 0x01"
+-                    : : "m" (desc), "a" (type), "c" (&desc) : "memory");
+-}
+-
+-#define INVPCID_TYPE_INDIV_ADDR               0
+-#define INVPCID_TYPE_SINGLE_CTXT      1
+-#define INVPCID_TYPE_ALL_INCL_GLOBAL  2
+-#define INVPCID_TYPE_ALL_NON_GLOBAL   3
+-
+-/* Flush all mappings for a given pcid and addr, not including globals. */
+-static inline void invpcid_flush_one(unsigned long pcid,
+-                                   unsigned long addr)
+-{
+-      __invpcid(pcid, addr, INVPCID_TYPE_INDIV_ADDR);
+-}
+-
+-/* Flush all mappings for a given PCID, not including globals. */
+-static inline void invpcid_flush_single_context(unsigned long pcid)
+-{
+-      __invpcid(pcid, 0, INVPCID_TYPE_SINGLE_CTXT);
+-}
+-
+-/* Flush all mappings, including globals, for all PCIDs. */
+-static inline void invpcid_flush_all(void)
+-{
+-      __invpcid(0, 0, INVPCID_TYPE_ALL_INCL_GLOBAL);
+-}
+-
+-/* Flush all mappings for all PCIDs except globals. */
+-static inline void invpcid_flush_all_nonglobals(void)
+-{
+-      __invpcid(0, 0, INVPCID_TYPE_ALL_NON_GLOBAL);
+-}
++#include <asm/invpcid.h>
+ static inline u64 inc_mm_tlb_gen(struct mm_struct *mm)
+ {
diff --git a/queue-4.14/x86-mm-dump_pagetables-check-page_present-for-real.patch b/queue-4.14/x86-mm-dump_pagetables-check-page_present-for-real.patch
new file mode 100644 (file)
index 0000000..6fcc63f
--- /dev/null
@@ -0,0 +1,47 @@
+From c05344947b37f7cda726e802457370bc6eac4d26 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Sat, 16 Dec 2017 01:14:39 +0100
+Subject: x86/mm/dump_pagetables: Check PAGE_PRESENT for real
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+commit c05344947b37f7cda726e802457370bc6eac4d26 upstream.
+
+The check for a present page in printk_prot():
+
+       if (!pgprot_val(prot)) {
+                /* Not present */
+
+is bogus. If a PTE is set to PAGE_NONE then the pgprot_val is not zero and
+the entry is decoded in bogus ways, e.g. as RX GLB. That is confusing when
+analyzing mapping correctness. Check for the present bit to make an
+informed decision.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: linux-kernel@vger.kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/mm/dump_pagetables.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/x86/mm/dump_pagetables.c
++++ b/arch/x86/mm/dump_pagetables.c
+@@ -140,7 +140,7 @@ static void printk_prot(struct seq_file
+       static const char * const level_name[] =
+               { "cr3", "pgd", "p4d", "pud", "pmd", "pte" };
+-      if (!pgprot_val(prot)) {
++      if (!(pr & _PAGE_PRESENT)) {
+               /* Not present */
+               pt_dump_cont_printf(m, dmsg, "                              ");
+       } else {
diff --git a/queue-4.14/x86-mm-dump_pagetables-make-the-address-hints-correct-and-readable.patch b/queue-4.14/x86-mm-dump_pagetables-make-the-address-hints-correct-and-readable.patch
new file mode 100644 (file)
index 0000000..331847c
--- /dev/null
@@ -0,0 +1,159 @@
+From 146122e24bdf208015d629babba673e28d090709 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Wed, 20 Dec 2017 18:07:42 +0100
+Subject: x86/mm/dump_pagetables: Make the address hints correct and readable
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+commit 146122e24bdf208015d629babba673e28d090709 upstream.
+
+The address hints are a trainwreck. The array entry numbers have to kept
+magically in sync with the actual hints, which is doomed as some of the
+array members are initialized at runtime via the entry numbers.
+
+Designated initializers have been around before this code was
+implemented....
+
+Use the entry numbers to populate the address hints array and add the
+missing bits and pieces. Split 32 and 64 bit for readability sake.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: linux-kernel@vger.kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/mm/dump_pagetables.c |   90 ++++++++++++++++++++++++------------------
+ 1 file changed, 53 insertions(+), 37 deletions(-)
+
+--- a/arch/x86/mm/dump_pagetables.c
++++ b/arch/x86/mm/dump_pagetables.c
+@@ -44,10 +44,12 @@ struct addr_marker {
+       unsigned long max_lines;
+ };
+-/* indices for address_markers; keep sync'd w/ address_markers below */
++/* Address space markers hints */
++
++#ifdef CONFIG_X86_64
++
+ enum address_markers_idx {
+       USER_SPACE_NR = 0,
+-#ifdef CONFIG_X86_64
+       KERNEL_SPACE_NR,
+       LOW_KERNEL_NR,
+       VMALLOC_START_NR,
+@@ -56,56 +58,70 @@ enum address_markers_idx {
+       KASAN_SHADOW_START_NR,
+       KASAN_SHADOW_END_NR,
+ #endif
+-# ifdef CONFIG_X86_ESPFIX64
++#ifdef CONFIG_X86_ESPFIX64
+       ESPFIX_START_NR,
+-# endif
++#endif
++#ifdef CONFIG_EFI
++      EFI_END_NR,
++#endif
+       HIGH_KERNEL_NR,
+       MODULES_VADDR_NR,
+       MODULES_END_NR,
+-#else
++      FIXADDR_START_NR,
++      END_OF_SPACE_NR,
++};
++
++static struct addr_marker address_markers[] = {
++      [USER_SPACE_NR]         = { 0,                  "User Space" },
++      [KERNEL_SPACE_NR]       = { (1UL << 63),        "Kernel Space" },
++      [LOW_KERNEL_NR]         = { 0UL,                "Low Kernel Mapping" },
++      [VMALLOC_START_NR]      = { 0UL,                "vmalloc() Area" },
++      [VMEMMAP_START_NR]      = { 0UL,                "Vmemmap" },
++#ifdef CONFIG_KASAN
++      [KASAN_SHADOW_START_NR] = { KASAN_SHADOW_START, "KASAN shadow" },
++      [KASAN_SHADOW_END_NR]   = { KASAN_SHADOW_END,   "KASAN shadow end" },
++#endif
++#ifdef CONFIG_X86_ESPFIX64
++      [ESPFIX_START_NR]       = { ESPFIX_BASE_ADDR,   "ESPfix Area", 16 },
++#endif
++#ifdef CONFIG_EFI
++      [EFI_END_NR]            = { EFI_VA_END,         "EFI Runtime Services" },
++#endif
++      [HIGH_KERNEL_NR]        = { __START_KERNEL_map, "High Kernel Mapping" },
++      [MODULES_VADDR_NR]      = { MODULES_VADDR,      "Modules" },
++      [MODULES_END_NR]        = { MODULES_END,        "End Modules" },
++      [FIXADDR_START_NR]      = { FIXADDR_START,      "Fixmap Area" },
++      [END_OF_SPACE_NR]       = { -1,                 NULL }
++};
++
++#else /* CONFIG_X86_64 */
++
++enum address_markers_idx {
++      USER_SPACE_NR = 0,
+       KERNEL_SPACE_NR,
+       VMALLOC_START_NR,
+       VMALLOC_END_NR,
+-# ifdef CONFIG_HIGHMEM
++#ifdef CONFIG_HIGHMEM
+       PKMAP_BASE_NR,
+-# endif
+-      FIXADDR_START_NR,
+ #endif
++      FIXADDR_START_NR,
++      END_OF_SPACE_NR,
+ };
+-/* Address space markers hints */
+ static struct addr_marker address_markers[] = {
+-      { 0, "User Space" },
+-#ifdef CONFIG_X86_64
+-      { 0x8000000000000000UL, "Kernel Space" },
+-      { 0/* PAGE_OFFSET */,   "Low Kernel Mapping" },
+-      { 0/* VMALLOC_START */, "vmalloc() Area" },
+-      { 0/* VMEMMAP_START */, "Vmemmap" },
+-#ifdef CONFIG_KASAN
+-      { KASAN_SHADOW_START,   "KASAN shadow" },
+-      { KASAN_SHADOW_END,     "KASAN shadow end" },
+-#endif
+-# ifdef CONFIG_X86_ESPFIX64
+-      { ESPFIX_BASE_ADDR,     "ESPfix Area", 16 },
+-# endif
+-# ifdef CONFIG_EFI
+-      { EFI_VA_END,           "EFI Runtime Services" },
+-# endif
+-      { __START_KERNEL_map,   "High Kernel Mapping" },
+-      { MODULES_VADDR,        "Modules" },
+-      { MODULES_END,          "End Modules" },
+-#else
+-      { PAGE_OFFSET,          "Kernel Mapping" },
+-      { 0/* VMALLOC_START */, "vmalloc() Area" },
+-      { 0/*VMALLOC_END*/,     "vmalloc() End" },
+-# ifdef CONFIG_HIGHMEM
+-      { 0/*PKMAP_BASE*/,      "Persistent kmap() Area" },
+-# endif
+-      { 0/*FIXADDR_START*/,   "Fixmap Area" },
++      [USER_SPACE_NR]         = { 0,                  "User Space" },
++      [KERNEL_SPACE_NR]       = { PAGE_OFFSET,        "Kernel Mapping" },
++      [VMALLOC_START_NR]      = { 0UL,                "vmalloc() Area" },
++      [VMALLOC_END_NR]        = { 0UL,                "vmalloc() End" },
++#ifdef CONFIG_HIGHMEM
++      [PKMAP_BASE_NR]         = { 0UL,                "Persistent kmap() Area" },
+ #endif
+-      { -1, NULL }            /* End of list */
++      [FIXADDR_START_NR]      = { 0UL,                "Fixmap area" },
++      [END_OF_SPACE_NR]       = { -1,                 NULL }
+ };
++#endif /* !CONFIG_X86_64 */
++
+ /* Multipliers for offsets within the PTEs */
+ #define PTE_LEVEL_MULT (PAGE_SIZE)
+ #define PMD_LEVEL_MULT (PTRS_PER_PTE * PTE_LEVEL_MULT)
diff --git a/queue-4.14/x86-mm-move-the-cr3-construction-functions-to-tlbflush.h.patch b/queue-4.14/x86-mm-move-the-cr3-construction-functions-to-tlbflush.h.patch
new file mode 100644 (file)
index 0000000..65e1c00
--- /dev/null
@@ -0,0 +1,166 @@
+From 50fb83a62cf472dc53ba23bd3f7bd6c1b2b3b53e Mon Sep 17 00:00:00 2001
+From: Dave Hansen <dave.hansen@linux.intel.com>
+Date: Mon, 4 Dec 2017 15:07:54 +0100
+Subject: x86/mm: Move the CR3 construction functions to tlbflush.h
+
+From: Dave Hansen <dave.hansen@linux.intel.com>
+
+commit 50fb83a62cf472dc53ba23bd3f7bd6c1b2b3b53e upstream.
+
+For flushing the TLB, the ASID which has been programmed into the hardware
+must be known.  That differs from what is in 'cpu_tlbstate'.
+
+Add functions to transform the 'cpu_tlbstate' values into to the one
+programmed into the hardware (CR3).
+
+It's not easy to include mmu_context.h into tlbflush.h, so just move the
+CR3 building over to tlbflush.h.
+
+Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Eduardo Valentin <eduval@amazon.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: aliguori@amazon.com
+Cc: daniel.gruss@iaik.tugraz.at
+Cc: hughd@google.com
+Cc: keescook@google.com
+Cc: linux-mm@kvack.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/include/asm/mmu_context.h |   29 +----------------------------
+ arch/x86/include/asm/tlbflush.h    |   26 ++++++++++++++++++++++++++
+ arch/x86/mm/tlb.c                  |    8 ++++----
+ 3 files changed, 31 insertions(+), 32 deletions(-)
+
+--- a/arch/x86/include/asm/mmu_context.h
++++ b/arch/x86/include/asm/mmu_context.h
+@@ -291,33 +291,6 @@ static inline bool arch_vma_access_permi
+ }
+ /*
+- * If PCID is on, ASID-aware code paths put the ASID+1 into the PCID
+- * bits.  This serves two purposes.  It prevents a nasty situation in
+- * which PCID-unaware code saves CR3, loads some other value (with PCID
+- * == 0), and then restores CR3, thus corrupting the TLB for ASID 0 if
+- * the saved ASID was nonzero.  It also means that any bugs involving
+- * loading a PCID-enabled CR3 with CR4.PCIDE off will trigger
+- * deterministically.
+- */
+-
+-static inline unsigned long build_cr3(struct mm_struct *mm, u16 asid)
+-{
+-      if (static_cpu_has(X86_FEATURE_PCID)) {
+-              VM_WARN_ON_ONCE(asid > 4094);
+-              return __sme_pa(mm->pgd) | (asid + 1);
+-      } else {
+-              VM_WARN_ON_ONCE(asid != 0);
+-              return __sme_pa(mm->pgd);
+-      }
+-}
+-
+-static inline unsigned long build_cr3_noflush(struct mm_struct *mm, u16 asid)
+-{
+-      VM_WARN_ON_ONCE(asid > 4094);
+-      return __sme_pa(mm->pgd) | (asid + 1) | CR3_NOFLUSH;
+-}
+-
+-/*
+  * This can be used from process context to figure out what the value of
+  * CR3 is without needing to do a (slow) __read_cr3().
+  *
+@@ -326,7 +299,7 @@ static inline unsigned long build_cr3_no
+  */
+ static inline unsigned long __get_current_cr3_fast(void)
+ {
+-      unsigned long cr3 = build_cr3(this_cpu_read(cpu_tlbstate.loaded_mm),
++      unsigned long cr3 = build_cr3(this_cpu_read(cpu_tlbstate.loaded_mm)->pgd,
+               this_cpu_read(cpu_tlbstate.loaded_mm_asid));
+       /* For now, be very restrictive about when this can be called. */
+--- a/arch/x86/include/asm/tlbflush.h
++++ b/arch/x86/include/asm/tlbflush.h
+@@ -69,6 +69,32 @@ static inline u64 inc_mm_tlb_gen(struct
+       return atomic64_inc_return(&mm->context.tlb_gen);
+ }
++/*
++ * If PCID is on, ASID-aware code paths put the ASID+1 into the PCID bits.
++ * This serves two purposes.  It prevents a nasty situation in which
++ * PCID-unaware code saves CR3, loads some other value (with PCID == 0),
++ * and then restores CR3, thus corrupting the TLB for ASID 0 if the saved
++ * ASID was nonzero.  It also means that any bugs involving loading a
++ * PCID-enabled CR3 with CR4.PCIDE off will trigger deterministically.
++ */
++struct pgd_t;
++static inline unsigned long build_cr3(pgd_t *pgd, u16 asid)
++{
++      if (static_cpu_has(X86_FEATURE_PCID)) {
++              VM_WARN_ON_ONCE(asid > 4094);
++              return __sme_pa(pgd) | (asid + 1);
++      } else {
++              VM_WARN_ON_ONCE(asid != 0);
++              return __sme_pa(pgd);
++      }
++}
++
++static inline unsigned long build_cr3_noflush(pgd_t *pgd, u16 asid)
++{
++      VM_WARN_ON_ONCE(asid > 4094);
++      return __sme_pa(pgd) | (asid + 1) | CR3_NOFLUSH;
++}
++
+ #ifdef CONFIG_PARAVIRT
+ #include <asm/paravirt.h>
+ #else
+--- a/arch/x86/mm/tlb.c
++++ b/arch/x86/mm/tlb.c
+@@ -128,7 +128,7 @@ void switch_mm_irqs_off(struct mm_struct
+        * isn't free.
+        */
+ #ifdef CONFIG_DEBUG_VM
+-      if (WARN_ON_ONCE(__read_cr3() != build_cr3(real_prev, prev_asid))) {
++      if (WARN_ON_ONCE(__read_cr3() != build_cr3(real_prev->pgd, prev_asid))) {
+               /*
+                * If we were to BUG here, we'd be very likely to kill
+                * the system so hard that we don't see the call trace.
+@@ -195,7 +195,7 @@ void switch_mm_irqs_off(struct mm_struct
+               if (need_flush) {
+                       this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id);
+                       this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen);
+-                      write_cr3(build_cr3(next, new_asid));
++                      write_cr3(build_cr3(next->pgd, new_asid));
+                       /*
+                        * NB: This gets called via leave_mm() in the idle path
+@@ -208,7 +208,7 @@ void switch_mm_irqs_off(struct mm_struct
+                       trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
+               } else {
+                       /* The new ASID is already up to date. */
+-                      write_cr3(build_cr3_noflush(next, new_asid));
++                      write_cr3(build_cr3_noflush(next->pgd, new_asid));
+                       /* See above wrt _rcuidle. */
+                       trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, 0);
+@@ -288,7 +288,7 @@ void initialize_tlbstate_and_flush(void)
+               !(cr4_read_shadow() & X86_CR4_PCIDE));
+       /* Force ASID 0 and force a TLB flush. */
+-      write_cr3(build_cr3(mm, 0));
++      write_cr3(build_cr3(mm->pgd, 0));
+       /* Reinitialize tlbstate. */
+       this_cpu_write(cpu_tlbstate.loaded_mm_asid, 0);
diff --git a/queue-4.14/x86-mm-put-mmu-to-hardware-asid-translation-in-one-place.patch b/queue-4.14/x86-mm-put-mmu-to-hardware-asid-translation-in-one-place.patch
new file mode 100644 (file)
index 0000000..781b161
--- /dev/null
@@ -0,0 +1,97 @@
+From dd95f1a4b5ca904c78e6a097091eb21436478abb Mon Sep 17 00:00:00 2001
+From: Dave Hansen <dave.hansen@linux.intel.com>
+Date: Mon, 4 Dec 2017 15:07:56 +0100
+Subject: x86/mm: Put MMU to hardware ASID translation in one place
+
+From: Dave Hansen <dave.hansen@linux.intel.com>
+
+commit dd95f1a4b5ca904c78e6a097091eb21436478abb upstream.
+
+There are effectively two ASID types:
+
+ 1. The one stored in the mmu_context that goes from 0..5
+ 2. The one programmed into the hardware that goes from 1..6
+
+This consolidates the locations where converting between the two (by doing
+a +1) to a single place which gives us a nice place to comment.
+PAGE_TABLE_ISOLATION will also need to, given an ASID, know which hardware
+ASID to flush for the userspace mapping.
+
+Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Eduardo Valentin <eduval@amazon.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: aliguori@amazon.com
+Cc: daniel.gruss@iaik.tugraz.at
+Cc: hughd@google.com
+Cc: keescook@google.com
+Cc: linux-mm@kvack.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/include/asm/tlbflush.h |   29 ++++++++++++++++++-----------
+ 1 file changed, 18 insertions(+), 11 deletions(-)
+
+--- a/arch/x86/include/asm/tlbflush.h
++++ b/arch/x86/include/asm/tlbflush.h
+@@ -85,20 +85,26 @@ static inline u64 inc_mm_tlb_gen(struct
+  */
+ #define MAX_ASID_AVAILABLE ((1 << CR3_AVAIL_ASID_BITS) - 2)
+-/*
+- * If PCID is on, ASID-aware code paths put the ASID+1 into the PCID bits.
+- * This serves two purposes.  It prevents a nasty situation in which
+- * PCID-unaware code saves CR3, loads some other value (with PCID == 0),
+- * and then restores CR3, thus corrupting the TLB for ASID 0 if the saved
+- * ASID was nonzero.  It also means that any bugs involving loading a
+- * PCID-enabled CR3 with CR4.PCIDE off will trigger deterministically.
+- */
++static inline u16 kern_pcid(u16 asid)
++{
++      VM_WARN_ON_ONCE(asid > MAX_ASID_AVAILABLE);
++      /*
++       * If PCID is on, ASID-aware code paths put the ASID+1 into the
++       * PCID bits.  This serves two purposes.  It prevents a nasty
++       * situation in which PCID-unaware code saves CR3, loads some other
++       * value (with PCID == 0), and then restores CR3, thus corrupting
++       * the TLB for ASID 0 if the saved ASID was nonzero.  It also means
++       * that any bugs involving loading a PCID-enabled CR3 with
++       * CR4.PCIDE off will trigger deterministically.
++       */
++      return asid + 1;
++}
++
+ struct pgd_t;
+ static inline unsigned long build_cr3(pgd_t *pgd, u16 asid)
+ {
+       if (static_cpu_has(X86_FEATURE_PCID)) {
+-              VM_WARN_ON_ONCE(asid > MAX_ASID_AVAILABLE);
+-              return __sme_pa(pgd) | (asid + 1);
++              return __sme_pa(pgd) | kern_pcid(asid);
+       } else {
+               VM_WARN_ON_ONCE(asid != 0);
+               return __sme_pa(pgd);
+@@ -108,7 +114,8 @@ static inline unsigned long build_cr3(pg
+ static inline unsigned long build_cr3_noflush(pgd_t *pgd, u16 asid)
+ {
+       VM_WARN_ON_ONCE(asid > MAX_ASID_AVAILABLE);
+-      return __sme_pa(pgd) | (asid + 1) | CR3_NOFLUSH;
++      VM_WARN_ON_ONCE(!this_cpu_has(X86_FEATURE_PCID));
++      return __sme_pa(pgd) | kern_pcid(asid) | CR3_NOFLUSH;
+ }
+ #ifdef CONFIG_PARAVIRT
diff --git a/queue-4.14/x86-mm-remove-hard-coded-asid-limit-checks.patch b/queue-4.14/x86-mm-remove-hard-coded-asid-limit-checks.patch
new file mode 100644 (file)
index 0000000..3872b2f
--- /dev/null
@@ -0,0 +1,87 @@
+From cb0a9144a744e55207e24dcef812f05cd15a499a Mon Sep 17 00:00:00 2001
+From: Dave Hansen <dave.hansen@linux.intel.com>
+Date: Mon, 4 Dec 2017 15:07:55 +0100
+Subject: x86/mm: Remove hard-coded ASID limit checks
+
+From: Dave Hansen <dave.hansen@linux.intel.com>
+
+commit cb0a9144a744e55207e24dcef812f05cd15a499a upstream.
+
+First, it's nice to remove the magic numbers.
+
+Second, PAGE_TABLE_ISOLATION is going to consume half of the available ASID
+space.  The space is currently unused, but add a comment to spell out this
+new restriction.
+
+Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Eduardo Valentin <eduval@amazon.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: aliguori@amazon.com
+Cc: daniel.gruss@iaik.tugraz.at
+Cc: hughd@google.com
+Cc: keescook@google.com
+Cc: linux-mm@kvack.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/include/asm/tlbflush.h |   20 ++++++++++++++++++--
+ 1 file changed, 18 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/include/asm/tlbflush.h
++++ b/arch/x86/include/asm/tlbflush.h
+@@ -69,6 +69,22 @@ static inline u64 inc_mm_tlb_gen(struct
+       return atomic64_inc_return(&mm->context.tlb_gen);
+ }
++/* There are 12 bits of space for ASIDS in CR3 */
++#define CR3_HW_ASID_BITS              12
++/*
++ * When enabled, PAGE_TABLE_ISOLATION consumes a single bit for
++ * user/kernel switches
++ */
++#define PTI_CONSUMED_ASID_BITS                0
++
++#define CR3_AVAIL_ASID_BITS (CR3_HW_ASID_BITS - PTI_CONSUMED_ASID_BITS)
++/*
++ * ASIDs are zero-based: 0->MAX_AVAIL_ASID are valid.  -1 below to account
++ * for them being zero-based.  Another -1 is because ASID 0 is reserved for
++ * use by non-PCID-aware users.
++ */
++#define MAX_ASID_AVAILABLE ((1 << CR3_AVAIL_ASID_BITS) - 2)
++
+ /*
+  * If PCID is on, ASID-aware code paths put the ASID+1 into the PCID bits.
+  * This serves two purposes.  It prevents a nasty situation in which
+@@ -81,7 +97,7 @@ struct pgd_t;
+ static inline unsigned long build_cr3(pgd_t *pgd, u16 asid)
+ {
+       if (static_cpu_has(X86_FEATURE_PCID)) {
+-              VM_WARN_ON_ONCE(asid > 4094);
++              VM_WARN_ON_ONCE(asid > MAX_ASID_AVAILABLE);
+               return __sme_pa(pgd) | (asid + 1);
+       } else {
+               VM_WARN_ON_ONCE(asid != 0);
+@@ -91,7 +107,7 @@ static inline unsigned long build_cr3(pg
+ static inline unsigned long build_cr3_noflush(pgd_t *pgd, u16 asid)
+ {
+-      VM_WARN_ON_ONCE(asid > 4094);
++      VM_WARN_ON_ONCE(asid > MAX_ASID_AVAILABLE);
+       return __sme_pa(pgd) | (asid + 1) | CR3_NOFLUSH;
+ }
diff --git a/queue-4.14/x86-mm-remove-superfluous-barriers.patch b/queue-4.14/x86-mm-remove-superfluous-barriers.patch
new file mode 100644 (file)
index 0000000..871d7a7
--- /dev/null
@@ -0,0 +1,63 @@
+From b5fc6d943808b570bdfbec80f40c6b3855f1c48b Mon Sep 17 00:00:00 2001
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 5 Dec 2017 13:34:46 +0100
+Subject: x86/mm: Remove superfluous barriers
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit b5fc6d943808b570bdfbec80f40c6b3855f1c48b upstream.
+
+atomic64_inc_return() already implies smp_mb() before and after.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Eduardo Valentin <eduval@amazon.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: aliguori@amazon.com
+Cc: daniel.gruss@iaik.tugraz.at
+Cc: hughd@google.com
+Cc: keescook@google.com
+Cc: linux-mm@kvack.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/include/asm/tlbflush.h |    8 +-------
+ 1 file changed, 1 insertion(+), 7 deletions(-)
+
+--- a/arch/x86/include/asm/tlbflush.h
++++ b/arch/x86/include/asm/tlbflush.h
+@@ -60,19 +60,13 @@ static inline void invpcid_flush_all_non
+ static inline u64 inc_mm_tlb_gen(struct mm_struct *mm)
+ {
+-      u64 new_tlb_gen;
+-
+       /*
+        * Bump the generation count.  This also serves as a full barrier
+        * that synchronizes with switch_mm(): callers are required to order
+        * their read of mm_cpumask after their writes to the paging
+        * structures.
+        */
+-      smp_mb__before_atomic();
+-      new_tlb_gen = atomic64_inc_return(&mm->context.tlb_gen);
+-      smp_mb__after_atomic();
+-
+-      return new_tlb_gen;
++      return atomic64_inc_return(&mm->context.tlb_gen);
+ }
+ #ifdef CONFIG_PARAVIRT
diff --git a/queue-4.14/x86-mm-use-__flush_tlb_one-for-kernel-memory.patch b/queue-4.14/x86-mm-use-__flush_tlb_one-for-kernel-memory.patch
new file mode 100644 (file)
index 0000000..0e76f8b
--- /dev/null
@@ -0,0 +1,52 @@
+From a501686b2923ce6f2ff2b1d0d50682c6411baf72 Mon Sep 17 00:00:00 2001
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 5 Dec 2017 13:34:49 +0100
+Subject: x86/mm: Use __flush_tlb_one() for kernel memory
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit a501686b2923ce6f2ff2b1d0d50682c6411baf72 upstream.
+
+__flush_tlb_single() is for user mappings, __flush_tlb_one() for
+kernel mappings.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Eduardo Valentin <eduval@amazon.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: aliguori@amazon.com
+Cc: daniel.gruss@iaik.tugraz.at
+Cc: hughd@google.com
+Cc: keescook@google.com
+Cc: linux-mm@kvack.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/mm/tlb.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/x86/mm/tlb.c
++++ b/arch/x86/mm/tlb.c
+@@ -551,7 +551,7 @@ static void do_kernel_range_flush(void *
+       /* flush range by one by one 'invlpg' */
+       for (addr = f->start; addr < f->end; addr += PAGE_SIZE)
+-              __flush_tlb_single(addr);
++              __flush_tlb_one(addr);
+ }
+ void flush_tlb_kernel_range(unsigned long start, unsigned long end)
diff --git a/queue-4.14/x86-uv-use-the-right-tlb-flush-api.patch b/queue-4.14/x86-uv-use-the-right-tlb-flush-api.patch
new file mode 100644 (file)
index 0000000..ae49812
--- /dev/null
@@ -0,0 +1,55 @@
+From 3e46e0f5ee3643a1239be9046c7ba6c66ca2b329 Mon Sep 17 00:00:00 2001
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 5 Dec 2017 13:34:50 +0100
+Subject: x86/uv: Use the right TLB-flush API
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 3e46e0f5ee3643a1239be9046c7ba6c66ca2b329 upstream.
+
+Since uv_flush_tlb_others() implements flush_tlb_others() which is
+about flushing user mappings, we should use __flush_tlb_single(),
+which too is about flushing user mappings.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Acked-by: Andrew Banman <abanman@hpe.com>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Eduardo Valentin <eduval@amazon.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Mike Travis <mike.travis@hpe.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: aliguori@amazon.com
+Cc: daniel.gruss@iaik.tugraz.at
+Cc: hughd@google.com
+Cc: keescook@google.com
+Cc: linux-mm@kvack.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/platform/uv/tlb_uv.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/x86/platform/uv/tlb_uv.c
++++ b/arch/x86/platform/uv/tlb_uv.c
+@@ -299,7 +299,7 @@ static void bau_process_message(struct m
+               local_flush_tlb();
+               stat->d_alltlb++;
+       } else {
+-              __flush_tlb_one(msg->address);
++              __flush_tlb_single(msg->address);
+               stat->d_onetlb++;
+       }
+       stat->d_requestee++;
diff --git a/queue-4.14/x86-vsyscall-64-explicitly-set-_page_user-in-the-pagetable-hierarchy.patch b/queue-4.14/x86-vsyscall-64-explicitly-set-_page_user-in-the-pagetable-hierarchy.patch
new file mode 100644 (file)
index 0000000..f4b957a
--- /dev/null
@@ -0,0 +1,98 @@
+From 49275fef986abfb8b476e4708aaecc07e7d3e087 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Sun, 10 Dec 2017 22:47:19 -0800
+Subject: x86/vsyscall/64: Explicitly set _PAGE_USER in the pagetable hierarchy
+
+From: Andy Lutomirski <luto@kernel.org>
+
+commit 49275fef986abfb8b476e4708aaecc07e7d3e087 upstream.
+
+The kernel is very erratic as to which pagetables have _PAGE_USER set.  The
+vsyscall page gets lucky: it seems that all of the relevant pagetables are
+among the apparently arbitrary ones that set _PAGE_USER.  Rather than
+relying on chance, just explicitly set _PAGE_USER.
+
+This will let us clean up pagetable setup to stop setting _PAGE_USER.  The
+added code can also be reused by pagetable isolation to manage the
+_PAGE_USER bit in the usermode tables.
+
+[ tglx: Folded paravirt fix from Juergen Gross ]
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Kees Cook <keescook@chromium.org>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/entry/vsyscall/vsyscall_64.c |   34 +++++++++++++++++++++++++++++++++-
+ 1 file changed, 33 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/entry/vsyscall/vsyscall_64.c
++++ b/arch/x86/entry/vsyscall/vsyscall_64.c
+@@ -37,6 +37,7 @@
+ #include <asm/unistd.h>
+ #include <asm/fixmap.h>
+ #include <asm/traps.h>
++#include <asm/paravirt.h>
+ #define CREATE_TRACE_POINTS
+ #include "vsyscall_trace.h"
+@@ -329,16 +330,47 @@ int in_gate_area_no_mm(unsigned long add
+       return vsyscall_mode != NONE && (addr & PAGE_MASK) == VSYSCALL_ADDR;
+ }
++/*
++ * The VSYSCALL page is the only user-accessible page in the kernel address
++ * range.  Normally, the kernel page tables can have _PAGE_USER clear, but
++ * the tables covering VSYSCALL_ADDR need _PAGE_USER set if vsyscalls
++ * are enabled.
++ *
++ * Some day we may create a "minimal" vsyscall mode in which we emulate
++ * vsyscalls but leave the page not present.  If so, we skip calling
++ * this.
++ */
++static void __init set_vsyscall_pgtable_user_bits(void)
++{
++      pgd_t *pgd;
++      p4d_t *p4d;
++      pud_t *pud;
++      pmd_t *pmd;
++
++      pgd = pgd_offset_k(VSYSCALL_ADDR);
++      set_pgd(pgd, __pgd(pgd_val(*pgd) | _PAGE_USER));
++      p4d = p4d_offset(pgd, VSYSCALL_ADDR);
++#if CONFIG_PGTABLE_LEVELS >= 5
++      p4d->p4d |= _PAGE_USER;
++#endif
++      pud = pud_offset(p4d, VSYSCALL_ADDR);
++      set_pud(pud, __pud(pud_val(*pud) | _PAGE_USER));
++      pmd = pmd_offset(pud, VSYSCALL_ADDR);
++      set_pmd(pmd, __pmd(pmd_val(*pmd) | _PAGE_USER));
++}
++
+ void __init map_vsyscall(void)
+ {
+       extern char __vsyscall_page;
+       unsigned long physaddr_vsyscall = __pa_symbol(&__vsyscall_page);
+-      if (vsyscall_mode != NONE)
++      if (vsyscall_mode != NONE) {
+               __set_fixmap(VSYSCALL_PAGE, physaddr_vsyscall,
+                            vsyscall_mode == NATIVE
+                            ? PAGE_KERNEL_VSYSCALL
+                            : PAGE_KERNEL_VVAR);
++              set_vsyscall_pgtable_user_bits();
++      }
+       BUILD_BUG_ON((unsigned long)__fix_to_virt(VSYSCALL_PAGE) !=
+                    (unsigned long)VSYSCALL_ADDR);
diff --git a/queue-4.14/x86-vsyscall-64-warn-and-fail-vsyscall-emulation-in-native-mode.patch b/queue-4.14/x86-vsyscall-64-warn-and-fail-vsyscall-emulation-in-native-mode.patch
new file mode 100644 (file)
index 0000000..bbe855e
--- /dev/null
@@ -0,0 +1,45 @@
+From 4831b779403a836158917d59a7ca880483c67378 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Sun, 10 Dec 2017 22:47:20 -0800
+Subject: x86/vsyscall/64: Warn and fail vsyscall emulation in NATIVE mode
+
+From: Andy Lutomirski <luto@kernel.org>
+
+commit 4831b779403a836158917d59a7ca880483c67378 upstream.
+
+If something goes wrong with pagetable setup, vsyscall=native will
+accidentally fall back to emulation.  Make it warn and fail so that we
+notice.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Kees Cook <keescook@chromium.org>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/entry/vsyscall/vsyscall_64.c |    4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/arch/x86/entry/vsyscall/vsyscall_64.c
++++ b/arch/x86/entry/vsyscall/vsyscall_64.c
+@@ -139,6 +139,10 @@ bool emulate_vsyscall(struct pt_regs *re
+       WARN_ON_ONCE(address != regs->ip);
++      /* This should be unreachable in NATIVE mode. */
++      if (WARN_ON(vsyscall_mode == NATIVE))
++              return false;
++
+       if (vsyscall_mode == NONE) {
+               warn_bad_vsyscall(KERN_INFO, regs,
+                                 "vsyscall attempted with vsyscall=none");