--- /dev/null
+From bb82e0b4a7e96494f0c1004ce50cec3d7b5fb3d1 Mon Sep 17 00:00:00 2001
+From: Takashi Iwai <tiwai@suse.de>
+Date: Thu, 14 Dec 2017 13:31:16 +0100
+Subject: ACPI: APEI / ERST: Fix missing error handling in erst_reader()
+
+From: Takashi Iwai <tiwai@suse.de>
+
+commit bb82e0b4a7e96494f0c1004ce50cec3d7b5fb3d1 upstream.
+
+The commit f6f828513290 ("pstore: pass allocated memory region back to
+caller") changed the check of the return value from erst_read() in
+erst_reader() in the following way:
+
+ if (len == -ENOENT)
+ goto skip;
+- else if (len < 0) {
+- rc = -1;
++ else if (len < sizeof(*rcd)) {
++ rc = -EIO;
+ goto out;
+
+This introduced another bug: since the comparison with sizeof() is
+cast to unsigned, a negative len value doesn't hit any longer.
+As a result, when an error is returned from erst_read(), the code
+falls through, and it may eventually lead to some weird thing like
+memory corruption.
+
+This patch adds the negative error value check more explicitly for
+addressing the issue.
+
+Fixes: f6f828513290 (pstore: pass allocated memory region back to caller)
+Tested-by: Jerry Tang <jtang@suse.com>
+Signed-off-by: Takashi Iwai <tiwai@suse.de>
+Acked-by: Kees Cook <keescook@chromium.org>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/acpi/apei/erst.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/acpi/apei/erst.c
++++ b/drivers/acpi/apei/erst.c
+@@ -1007,7 +1007,7 @@ skip:
+ /* The record may be cleared by others, try read next record */
+ if (len == -ENOENT)
+ goto skip;
+- else if (len < sizeof(*rcd)) {
++ else if (len < 0 || len < sizeof(*rcd)) {
+ rc = -EIO;
+ goto out;
+ }
--- /dev/null
+From adf6895754e2503d994a765535fd1813f8834674 Mon Sep 17 00:00:00 2001
+From: Dan Williams <dan.j.williams@intel.com>
+Date: Thu, 30 Nov 2017 19:42:52 -0800
+Subject: acpi, nfit: fix health event notification
+
+From: Dan Williams <dan.j.williams@intel.com>
+
+commit adf6895754e2503d994a765535fd1813f8834674 upstream.
+
+Integration testing with a BIOS that generates injected health event
+notifications fails to communicate those events to userspace. The nfit
+driver neglects to link the ACPI DIMM device with the necessary driver
+data so acpi_nvdimm_notify() fails this lookup:
+
+ nfit_mem = dev_get_drvdata(dev);
+ if (nfit_mem && nfit_mem->flags_attr)
+ sysfs_notify_dirent(nfit_mem->flags_attr);
+
+Add the necessary linkage when installing the notification handler and
+clean it up when the nfit driver instance is torn down.
+
+Cc: Toshi Kani <toshi.kani@hpe.com>
+Cc: Vishal Verma <vishal.l.verma@intel.com>
+Fixes: ba9c8dd3c222 ("acpi, nfit: add dimm device notification support")
+Reported-by: Daniel Osawa <daniel.k.osawa@intel.com>
+Tested-by: Daniel Osawa <daniel.k.osawa@intel.com>
+Signed-off-by: Dan Williams <dan.j.williams@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/acpi/nfit/core.c | 9 ++++++++-
+ 1 file changed, 8 insertions(+), 1 deletion(-)
+
+--- a/drivers/acpi/nfit/core.c
++++ b/drivers/acpi/nfit/core.c
+@@ -1457,6 +1457,11 @@ static int acpi_nfit_add_dimm(struct acp
+ dev_name(&adev_dimm->dev));
+ return -ENXIO;
+ }
++ /*
++ * Record nfit_mem for the notification path to track back to
++ * the nfit sysfs attributes for this dimm device object.
++ */
++ dev_set_drvdata(&adev_dimm->dev, nfit_mem);
+
+ /*
+ * Until standardization materializes we need to consider 4
+@@ -1516,9 +1521,11 @@ static void shutdown_dimm_notify(void *d
+ sysfs_put(nfit_mem->flags_attr);
+ nfit_mem->flags_attr = NULL;
+ }
+- if (adev_dimm)
++ if (adev_dimm) {
+ acpi_remove_notify_handler(adev_dimm->handle,
+ ACPI_DEVICE_NOTIFY, acpi_nvdimm_notify);
++ dev_set_drvdata(&adev_dimm->dev, NULL);
++ }
+ }
+ mutex_unlock(&acpi_desc->init_mutex);
+ }
--- /dev/null
+From 2b4584d00a6bc02b63ab3c7213060d41a74bdff1 Mon Sep 17 00:00:00 2001
+From: Guneshwor Singh <guneshwor.o.singh@intel.com>
+Date: Thu, 7 Dec 2017 18:06:20 +0530
+Subject: ALSA: hda - Add vendor id for Cannonlake HDMI codec
+
+From: Guneshwor Singh <guneshwor.o.singh@intel.com>
+
+commit 2b4584d00a6bc02b63ab3c7213060d41a74bdff1 upstream.
+
+Cannonlake HDMI codec has the same nid as Geminilake. This adds the
+codec entry for it.
+
+Signed-off-by: Guneshwor Singh <guneshwor.o.singh@intel.com>
+Signed-off-by: Takashi Iwai <tiwai@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ sound/pci/hda/patch_hdmi.c | 6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+--- a/sound/pci/hda/patch_hdmi.c
++++ b/sound/pci/hda/patch_hdmi.c
+@@ -55,10 +55,11 @@ MODULE_PARM_DESC(static_hdmi_pcm, "Don't
+ #define is_kabylake(codec) ((codec)->core.vendor_id == 0x8086280b)
+ #define is_geminilake(codec) (((codec)->core.vendor_id == 0x8086280d) || \
+ ((codec)->core.vendor_id == 0x80862800))
++#define is_cannonlake(codec) ((codec)->core.vendor_id == 0x8086280c)
+ #define is_haswell_plus(codec) (is_haswell(codec) || is_broadwell(codec) \
+ || is_skylake(codec) || is_broxton(codec) \
+- || is_kabylake(codec)) || is_geminilake(codec)
+-
++ || is_kabylake(codec)) || is_geminilake(codec) \
++ || is_cannonlake(codec)
+ #define is_valleyview(codec) ((codec)->core.vendor_id == 0x80862882)
+ #define is_cherryview(codec) ((codec)->core.vendor_id == 0x80862883)
+ #define is_valleyview_plus(codec) (is_valleyview(codec) || is_cherryview(codec))
+@@ -3841,6 +3842,7 @@ HDA_CODEC_ENTRY(0x80862808, "Broadwell H
+ HDA_CODEC_ENTRY(0x80862809, "Skylake HDMI", patch_i915_hsw_hdmi),
+ HDA_CODEC_ENTRY(0x8086280a, "Broxton HDMI", patch_i915_hsw_hdmi),
+ HDA_CODEC_ENTRY(0x8086280b, "Kabylake HDMI", patch_i915_hsw_hdmi),
++HDA_CODEC_ENTRY(0x8086280c, "Cannonlake HDMI", patch_i915_glk_hdmi),
+ HDA_CODEC_ENTRY(0x8086280d, "Geminilake HDMI", patch_i915_glk_hdmi),
+ HDA_CODEC_ENTRY(0x80862800, "Geminilake HDMI", patch_i915_glk_hdmi),
+ HDA_CODEC_ENTRY(0x80862880, "CedarTrail HDMI", patch_generic_hdmi),
--- /dev/null
+From 9226665159f0367ad08bc7d5dd194aeadb90316f Mon Sep 17 00:00:00 2001
+From: Kailang Yang <kailang@realtek.com>
+Date: Thu, 14 Dec 2017 15:28:58 +0800
+Subject: ALSA: hda/realtek - Fix Dell AIO LineOut issue
+
+From: Kailang Yang <kailang@realtek.com>
+
+commit 9226665159f0367ad08bc7d5dd194aeadb90316f upstream.
+
+Dell AIO had LineOut jack.
+Add LineOut verb into this patch.
+
+[ Additional notes:
+ the ALC274 codec seems requiring the fixed pin / DAC connections for
+ HP / line-out pins for enabling EQ for speakers; i.e. the HP / LO
+ pins expect to be connected with NID 0x03 while keeping the speaker
+ with NID 0x02. However, by adding a new line-out pin, the
+ auto-parser assigns the NID 0x02 for HP/LO pins as primary outputs.
+ As an easy workaround, we provide the preferred_pairs[] to map
+ forcibly for these pins. -- tiwai ]
+
+Fixes: 75ee94b20b46 ("ALSA: hda - fix headset mic problem for Dell machines with alc274")
+Signed-off-by: Kailang Yang <kailang@realtek.com>
+Signed-off-by: Takashi Iwai <tiwai@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ sound/pci/hda/patch_realtek.c | 35 ++++++++++++++++++++++++++++++++++-
+ 1 file changed, 34 insertions(+), 1 deletion(-)
+
+--- a/sound/pci/hda/patch_realtek.c
++++ b/sound/pci/hda/patch_realtek.c
+@@ -5162,6 +5162,22 @@ static void alc233_alc662_fixup_lenovo_d
+ }
+ }
+
++/* Forcibly assign NID 0x03 to HP/LO while NID 0x02 to SPK for EQ */
++static void alc274_fixup_bind_dacs(struct hda_codec *codec,
++ const struct hda_fixup *fix, int action)
++{
++ struct alc_spec *spec = codec->spec;
++ static hda_nid_t preferred_pairs[] = {
++ 0x21, 0x03, 0x1b, 0x03, 0x16, 0x02,
++ 0
++ };
++
++ if (action != HDA_FIXUP_ACT_PRE_PROBE)
++ return;
++
++ spec->gen.preferred_dacs = preferred_pairs;
++}
++
+ /* for hda_fixup_thinkpad_acpi() */
+ #include "thinkpad_helper.c"
+
+@@ -5279,6 +5295,8 @@ enum {
+ ALC233_FIXUP_LENOVO_MULTI_CODECS,
+ ALC294_FIXUP_LENOVO_MIC_LOCATION,
+ ALC700_FIXUP_INTEL_REFERENCE,
++ ALC274_FIXUP_DELL_BIND_DACS,
++ ALC274_FIXUP_DELL_AIO_LINEOUT_VERB,
+ };
+
+ static const struct hda_fixup alc269_fixups[] = {
+@@ -6089,6 +6107,21 @@ static const struct hda_fixup alc269_fix
+ {}
+ }
+ },
++ [ALC274_FIXUP_DELL_BIND_DACS] = {
++ .type = HDA_FIXUP_FUNC,
++ .v.func = alc274_fixup_bind_dacs,
++ .chained = true,
++ .chain_id = ALC269_FIXUP_DELL1_MIC_NO_PRESENCE
++ },
++ [ALC274_FIXUP_DELL_AIO_LINEOUT_VERB] = {
++ .type = HDA_FIXUP_PINS,
++ .v.pins = (const struct hda_pintbl[]) {
++ { 0x1b, 0x0401102f },
++ { }
++ },
++ .chained = true,
++ .chain_id = ALC274_FIXUP_DELL_BIND_DACS
++ },
+ };
+
+ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
+@@ -6550,7 +6583,7 @@ static const struct snd_hda_pin_quirk al
+ {0x14, 0x90170110},
+ {0x1b, 0x90a70130},
+ {0x21, 0x03211020}),
+- SND_HDA_PIN_QUIRK(0x10ec0274, 0x1028, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE,
++ SND_HDA_PIN_QUIRK(0x10ec0274, 0x1028, "Dell", ALC274_FIXUP_DELL_AIO_LINEOUT_VERB,
+ {0x12, 0xb7a60130},
+ {0x13, 0xb8a61140},
+ {0x16, 0x90170110},
--- /dev/null
+From c1cfd9025cc394fd137a01159d74335c5ac978ce Mon Sep 17 00:00:00 2001
+From: Takashi Iwai <tiwai@suse.de>
+Date: Thu, 14 Dec 2017 16:44:12 +0100
+Subject: ALSA: rawmidi: Avoid racy info ioctl via ctl device
+
+From: Takashi Iwai <tiwai@suse.de>
+
+commit c1cfd9025cc394fd137a01159d74335c5ac978ce upstream.
+
+The rawmidi also allows to obtaining the information via ioctl of ctl
+API. It means that user can issue an ioctl to the rawmidi device even
+when it's being removed as long as the control device is present.
+Although the code has some protection via the global register_mutex,
+its range is limited to the search of the corresponding rawmidi
+object, and the mutex is already unlocked at accessing the rawmidi
+object. This may lead to a use-after-free.
+
+For avoiding it, this patch widens the application of register_mutex
+to the whole snd_rawmidi_info_select() function. We have another
+mutex per rawmidi object, but this operation isn't very hot path, so
+it shouldn't matter from the performance POV.
+
+Signed-off-by: Takashi Iwai <tiwai@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ sound/core/rawmidi.c | 15 ++++++++++++---
+ 1 file changed, 12 insertions(+), 3 deletions(-)
+
+--- a/sound/core/rawmidi.c
++++ b/sound/core/rawmidi.c
+@@ -579,15 +579,14 @@ static int snd_rawmidi_info_user(struct
+ return 0;
+ }
+
+-int snd_rawmidi_info_select(struct snd_card *card, struct snd_rawmidi_info *info)
++static int __snd_rawmidi_info_select(struct snd_card *card,
++ struct snd_rawmidi_info *info)
+ {
+ struct snd_rawmidi *rmidi;
+ struct snd_rawmidi_str *pstr;
+ struct snd_rawmidi_substream *substream;
+
+- mutex_lock(®ister_mutex);
+ rmidi = snd_rawmidi_search(card, info->device);
+- mutex_unlock(®ister_mutex);
+ if (!rmidi)
+ return -ENXIO;
+ if (info->stream < 0 || info->stream > 1)
+@@ -603,6 +602,16 @@ int snd_rawmidi_info_select(struct snd_c
+ }
+ return -ENXIO;
+ }
++
++int snd_rawmidi_info_select(struct snd_card *card, struct snd_rawmidi_info *info)
++{
++ int ret;
++
++ mutex_lock(®ister_mutex);
++ ret = __snd_rawmidi_info_select(card, info);
++ mutex_unlock(®ister_mutex);
++ return ret;
++}
+ EXPORT_SYMBOL(snd_rawmidi_info_select);
+
+ static int snd_rawmidi_info_select_user(struct snd_card *card,
--- /dev/null
+From 866f7ed7d67936dcdbcddc111c8af878c918fe7c Mon Sep 17 00:00:00 2001
+From: Jussi Laako <jussi@sonarnerd.net>
+Date: Thu, 7 Dec 2017 12:58:33 +0200
+Subject: ALSA: usb-audio: Add native DSD support for Esoteric D-05X
+
+From: Jussi Laako <jussi@sonarnerd.net>
+
+commit 866f7ed7d67936dcdbcddc111c8af878c918fe7c upstream.
+
+Adds VID:PID of Esoteric D-05X to the TEAC device id's.
+Renames the is_teac_50X_dac() function to is_teac_dsd_dac() to cover
+broader device family from the same corporation sharing the same USB
+audio implementation.
+
+Signed-off-by: Jussi Laako <jussi@sonarnerd.net>
+Signed-off-by: Takashi Iwai <tiwai@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ sound/usb/quirks.c | 7 ++++---
+ 1 file changed, 4 insertions(+), 3 deletions(-)
+
+--- a/sound/usb/quirks.c
++++ b/sound/usb/quirks.c
+@@ -1172,10 +1172,11 @@ static bool is_marantz_denon_dac(unsigne
+ /* TEAC UD-501/UD-503/NT-503 USB DACs need a vendor cmd to switch
+ * between PCM/DOP and native DSD mode
+ */
+-static bool is_teac_50X_dac(unsigned int id)
++static bool is_teac_dsd_dac(unsigned int id)
+ {
+ switch (id) {
+ case USB_ID(0x0644, 0x8043): /* TEAC UD-501/UD-503/NT-503 */
++ case USB_ID(0x0644, 0x8044): /* Esoteric D-05X */
+ return true;
+ }
+ return false;
+@@ -1208,7 +1209,7 @@ int snd_usb_select_mode_quirk(struct snd
+ break;
+ }
+ mdelay(20);
+- } else if (is_teac_50X_dac(subs->stream->chip->usb_id)) {
++ } else if (is_teac_dsd_dac(subs->stream->chip->usb_id)) {
+ /* Vendor mode switch cmd is required. */
+ switch (fmt->altsetting) {
+ case 3: /* DSD mode (DSD_U32) requested */
+@@ -1398,7 +1399,7 @@ u64 snd_usb_interface_dsd_format_quirks(
+ }
+
+ /* TEAC devices with USB DAC functionality */
+- if (is_teac_50X_dac(chip->usb_id)) {
++ if (is_teac_dsd_dac(chip->usb_id)) {
+ if (fp->altsetting == 3)
+ return SNDRV_PCM_FMTBIT_DSD_U32_BE;
+ }
--- /dev/null
+From 5a15f289ee87eaf33f13f08a4909ec99d837ec5f Mon Sep 17 00:00:00 2001
+From: Takashi Iwai <tiwai@suse.de>
+Date: Mon, 18 Dec 2017 23:36:57 +0100
+Subject: ALSA: usb-audio: Fix the missing ctl name suffix at parsing SU
+
+From: Takashi Iwai <tiwai@suse.de>
+
+commit 5a15f289ee87eaf33f13f08a4909ec99d837ec5f upstream.
+
+The commit 89b89d121ffc ("ALSA: usb-audio: Add check return value for
+usb_string()") added the check of the return value from
+snd_usb_copy_string_desc(), which is correct per se, but it introduced
+a regression. In the original code, either the "Clock Source",
+"Playback Source" or "Capture Source" suffix is added after the
+terminal string, while the commit changed it to add the suffix only
+when get_term_name() is failing. It ended up with an incorrect ctl
+name like "PCM" instead of "PCM Capture Source".
+
+Also, even the original code has a similar bug: when the ctl name is
+generated from snd_usb_copy_string_desc() for the given iSelector, it
+also doesn't put the suffix.
+
+This patch addresses these issues: the suffix is added always when no
+static mapping is found. Also the patch tries to put more comments
+and cleans up the if/else block for better readability in order to
+avoid the same pitfall again.
+
+Fixes: 89b89d121ffc ("ALSA: usb-audio: Add check return value for usb_string()")
+Reported-and-tested-by: Mauro Santos <registo.mailling@gmail.com>
+Signed-off-by: Takashi Iwai <tiwai@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ sound/usb/mixer.c | 27 ++++++++++++++++-----------
+ 1 file changed, 16 insertions(+), 11 deletions(-)
+
+--- a/sound/usb/mixer.c
++++ b/sound/usb/mixer.c
+@@ -2173,20 +2173,25 @@ static int parse_audio_selector_unit(str
+ kctl->private_value = (unsigned long)namelist;
+ kctl->private_free = usb_mixer_selector_elem_free;
+
+- nameid = uac_selector_unit_iSelector(desc);
++ /* check the static mapping table at first */
+ len = check_mapped_name(map, kctl->id.name, sizeof(kctl->id.name));
+- if (len)
+- ;
+- else if (nameid)
+- len = snd_usb_copy_string_desc(state, nameid, kctl->id.name,
+- sizeof(kctl->id.name));
+- else
+- len = get_term_name(state, &state->oterm,
+- kctl->id.name, sizeof(kctl->id.name), 0);
+-
+ if (!len) {
+- strlcpy(kctl->id.name, "USB", sizeof(kctl->id.name));
++ /* no mapping ? */
++ /* if iSelector is given, use it */
++ nameid = uac_selector_unit_iSelector(desc);
++ if (nameid)
++ len = snd_usb_copy_string_desc(state, nameid,
++ kctl->id.name,
++ sizeof(kctl->id.name));
++ /* ... or pick up the terminal name at next */
++ if (!len)
++ len = get_term_name(state, &state->oterm,
++ kctl->id.name, sizeof(kctl->id.name), 0);
++ /* ... or use the fixed string "USB" as the last resort */
++ if (!len)
++ strlcpy(kctl->id.name, "USB", sizeof(kctl->id.name));
+
++ /* and add the proper suffix */
+ if (desc->bDescriptorSubtype == UAC2_CLOCK_SELECTOR)
+ append_ctl_name(kctl, " Clock Source");
+ else if ((state->oterm.type & 0xff00) == 0x0100)
--- /dev/null
+From c10e83f598d08046dd1ebc8360d4bb12d802d51b Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Thu, 14 Dec 2017 12:27:29 +0100
+Subject: arch, mm: Allow arch_dup_mmap() to fail
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+commit c10e83f598d08046dd1ebc8360d4bb12d802d51b upstream.
+
+In order to sanitize the LDT initialization on x86 arch_dup_mmap() must be
+allowed to fail. Fix up all instances.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Andy Lutomirsky <luto@kernel.org>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Borislav Petkov <bpetkov@suse.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Eduardo Valentin <eduval@amazon.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: aliguori@amazon.com
+Cc: dan.j.williams@intel.com
+Cc: hughd@google.com
+Cc: keescook@google.com
+Cc: kirill.shutemov@linux.intel.com
+Cc: linux-mm@kvack.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/powerpc/include/asm/mmu_context.h | 5 +++--
+ arch/um/include/asm/mmu_context.h | 3 ++-
+ arch/unicore32/include/asm/mmu_context.h | 5 +++--
+ arch/x86/include/asm/mmu_context.h | 4 ++--
+ include/asm-generic/mm_hooks.h | 5 +++--
+ kernel/fork.c | 3 +--
+ 6 files changed, 14 insertions(+), 11 deletions(-)
+
+--- a/arch/powerpc/include/asm/mmu_context.h
++++ b/arch/powerpc/include/asm/mmu_context.h
+@@ -114,9 +114,10 @@ static inline void enter_lazy_tlb(struct
+ #endif
+ }
+
+-static inline void arch_dup_mmap(struct mm_struct *oldmm,
+- struct mm_struct *mm)
++static inline int arch_dup_mmap(struct mm_struct *oldmm,
++ struct mm_struct *mm)
+ {
++ return 0;
+ }
+
+ static inline void arch_exit_mmap(struct mm_struct *mm)
+--- a/arch/um/include/asm/mmu_context.h
++++ b/arch/um/include/asm/mmu_context.h
+@@ -15,9 +15,10 @@ extern void uml_setup_stubs(struct mm_st
+ /*
+ * Needed since we do not use the asm-generic/mm_hooks.h:
+ */
+-static inline void arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
++static inline int arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
+ {
+ uml_setup_stubs(mm);
++ return 0;
+ }
+ extern void arch_exit_mmap(struct mm_struct *mm);
+ static inline void arch_unmap(struct mm_struct *mm,
+--- a/arch/unicore32/include/asm/mmu_context.h
++++ b/arch/unicore32/include/asm/mmu_context.h
+@@ -81,9 +81,10 @@ do { \
+ } \
+ } while (0)
+
+-static inline void arch_dup_mmap(struct mm_struct *oldmm,
+- struct mm_struct *mm)
++static inline int arch_dup_mmap(struct mm_struct *oldmm,
++ struct mm_struct *mm)
+ {
++ return 0;
+ }
+
+ static inline void arch_unmap(struct mm_struct *mm,
+--- a/arch/x86/include/asm/mmu_context.h
++++ b/arch/x86/include/asm/mmu_context.h
+@@ -176,10 +176,10 @@ do { \
+ } while (0)
+ #endif
+
+-static inline void arch_dup_mmap(struct mm_struct *oldmm,
+- struct mm_struct *mm)
++static inline int arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
+ {
+ paravirt_arch_dup_mmap(oldmm, mm);
++ return 0;
+ }
+
+ static inline void arch_exit_mmap(struct mm_struct *mm)
+--- a/include/asm-generic/mm_hooks.h
++++ b/include/asm-generic/mm_hooks.h
+@@ -7,9 +7,10 @@
+ #ifndef _ASM_GENERIC_MM_HOOKS_H
+ #define _ASM_GENERIC_MM_HOOKS_H
+
+-static inline void arch_dup_mmap(struct mm_struct *oldmm,
+- struct mm_struct *mm)
++static inline int arch_dup_mmap(struct mm_struct *oldmm,
++ struct mm_struct *mm)
+ {
++ return 0;
+ }
+
+ static inline void arch_exit_mmap(struct mm_struct *mm)
+--- a/kernel/fork.c
++++ b/kernel/fork.c
+@@ -721,8 +721,7 @@ static __latent_entropy int dup_mmap(str
+ goto out;
+ }
+ /* a new mm has just been created */
+- arch_dup_mmap(oldmm, mm);
+- retval = 0;
++ retval = arch_dup_mmap(oldmm, mm);
+ out:
+ up_write(&mm->mmap_sem);
+ flush_tlb_mm(oldmm);
--- /dev/null
+From bfe766cf65fb65e68c4764f76158718560bdcee5 Mon Sep 17 00:00:00 2001
+From: Julien Thierry <julien.thierry@arm.com>
+Date: Wed, 6 Dec 2017 17:09:49 +0000
+Subject: arm64: kvm: Prevent restoring stale PMSCR_EL1 for vcpu
+
+From: Julien Thierry <julien.thierry@arm.com>
+
+commit bfe766cf65fb65e68c4764f76158718560bdcee5 upstream.
+
+When VHE is not present, KVM needs to save and restores PMSCR_EL1 when
+possible. If SPE is used by the host, value of PMSCR_EL1 cannot be saved
+for the guest.
+If the host starts using SPE between two save+restore on the same vcpu,
+restore will write the value of PMSCR_EL1 read during the first save.
+
+Make sure __debug_save_spe_nvhe clears the value of the saved PMSCR_EL1
+when the guest cannot use SPE.
+
+Signed-off-by: Julien Thierry <julien.thierry@arm.com>
+Cc: Christoffer Dall <christoffer.dall@linaro.org>
+Cc: Marc Zyngier <marc.zyngier@arm.com>
+Cc: Catalin Marinas <catalin.marinas@arm.com>
+Reviewed-by: Will Deacon <will.deacon@arm.com>
+Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
+Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/arm64/kvm/hyp/debug-sr.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/arch/arm64/kvm/hyp/debug-sr.c
++++ b/arch/arm64/kvm/hyp/debug-sr.c
+@@ -84,6 +84,9 @@ static void __hyp_text __debug_save_spe_
+ {
+ u64 reg;
+
++ /* Clear pmscr in case of early return */
++ *pmscr_el1 = 0;
++
+ /* SPE present on this CPU? */
+ if (!cpuid_feature_extract_unsigned_field(read_sysreg(id_aa64dfr0_el1),
+ ID_AA64DFR0_PMSVER_SHIFT))
--- /dev/null
+From 111be883981748acc9a56e855c8336404a8e787c Mon Sep 17 00:00:00 2001
+From: Shaohua Li <shli@fb.com>
+Date: Wed, 20 Dec 2017 11:10:17 -0700
+Subject: block-throttle: avoid double charge
+
+From: Shaohua Li <shli@fb.com>
+
+commit 111be883981748acc9a56e855c8336404a8e787c upstream.
+
+If a bio is throttled and split after throttling, the bio could be
+resubmited and enters the throttling again. This will cause part of the
+bio to be charged multiple times. If the cgroup has an IO limit, the
+double charge will significantly harm the performance. The bio split
+becomes quite common after arbitrary bio size change.
+
+To fix this, we always set the BIO_THROTTLED flag if a bio is throttled.
+If the bio is cloned/split, we copy the flag to new bio too to avoid a
+double charge. However, cloned bio could be directed to a new disk,
+keeping the flag be a problem. The observation is we always set new disk
+for the bio in this case, so we can clear the flag in bio_set_dev().
+
+This issue exists for a long time, arbitrary bio size change just makes
+it worse, so this should go into stable at least since v4.2.
+
+V1-> V2: Not add extra field in bio based on discussion with Tejun
+
+Cc: Vivek Goyal <vgoyal@redhat.com>
+Acked-by: Tejun Heo <tj@kernel.org>
+Signed-off-by: Shaohua Li <shli@fb.com>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ block/bio.c | 2 ++
+ block/blk-throttle.c | 8 +-------
+ include/linux/bio.h | 2 ++
+ include/linux/blk_types.h | 9 ++++-----
+ 4 files changed, 9 insertions(+), 12 deletions(-)
+
+--- a/block/bio.c
++++ b/block/bio.c
+@@ -599,6 +599,8 @@ void __bio_clone_fast(struct bio *bio, s
+ bio->bi_disk = bio_src->bi_disk;
+ bio->bi_partno = bio_src->bi_partno;
+ bio_set_flag(bio, BIO_CLONED);
++ if (bio_flagged(bio_src, BIO_THROTTLED))
++ bio_set_flag(bio, BIO_THROTTLED);
+ bio->bi_opf = bio_src->bi_opf;
+ bio->bi_write_hint = bio_src->bi_write_hint;
+ bio->bi_iter = bio_src->bi_iter;
+--- a/block/blk-throttle.c
++++ b/block/blk-throttle.c
+@@ -2223,13 +2223,7 @@ again:
+ out_unlock:
+ spin_unlock_irq(q->queue_lock);
+ out:
+- /*
+- * As multiple blk-throtls may stack in the same issue path, we
+- * don't want bios to leave with the flag set. Clear the flag if
+- * being issued.
+- */
+- if (!throttled)
+- bio_clear_flag(bio, BIO_THROTTLED);
++ bio_set_flag(bio, BIO_THROTTLED);
+
+ #ifdef CONFIG_BLK_DEV_THROTTLING_LOW
+ if (throttled || !td->track_bio_latency)
+--- a/include/linux/bio.h
++++ b/include/linux/bio.h
+@@ -504,6 +504,8 @@ extern unsigned int bvec_nr_vecs(unsigne
+
+ #define bio_set_dev(bio, bdev) \
+ do { \
++ if ((bio)->bi_disk != (bdev)->bd_disk) \
++ bio_clear_flag(bio, BIO_THROTTLED);\
+ (bio)->bi_disk = (bdev)->bd_disk; \
+ (bio)->bi_partno = (bdev)->bd_partno; \
+ } while (0)
+--- a/include/linux/blk_types.h
++++ b/include/linux/blk_types.h
+@@ -50,8 +50,6 @@ struct blk_issue_stat {
+ struct bio {
+ struct bio *bi_next; /* request queue link */
+ struct gendisk *bi_disk;
+- u8 bi_partno;
+- blk_status_t bi_status;
+ unsigned int bi_opf; /* bottom bits req flags,
+ * top bits REQ_OP. Use
+ * accessors.
+@@ -59,8 +57,8 @@ struct bio {
+ unsigned short bi_flags; /* status, etc and bvec pool number */
+ unsigned short bi_ioprio;
+ unsigned short bi_write_hint;
+-
+- struct bvec_iter bi_iter;
++ blk_status_t bi_status;
++ u8 bi_partno;
+
+ /* Number of segments in this BIO after
+ * physical address coalescing is performed.
+@@ -74,8 +72,9 @@ struct bio {
+ unsigned int bi_seg_front_size;
+ unsigned int bi_seg_back_size;
+
+- atomic_t __bi_remaining;
++ struct bvec_iter bi_iter;
+
++ atomic_t __bi_remaining;
+ bio_end_io_t *bi_end_io;
+
+ void *bi_private;
--- /dev/null
+From 4ccafe032005e9b96acbef2e389a4de5b1254add Mon Sep 17 00:00:00 2001
+From: Jens Axboe <axboe@kernel.dk>
+Date: Wed, 20 Dec 2017 13:13:58 -0700
+Subject: block: unalign call_single_data in struct request
+
+From: Jens Axboe <axboe@kernel.dk>
+
+commit 4ccafe032005e9b96acbef2e389a4de5b1254add upstream.
+
+A previous change blindly added massive alignment to the
+call_single_data structure in struct request. This ballooned it in size
+from 296 to 320 bytes on my setup, for no valid reason at all.
+
+Use the unaligned struct __call_single_data variant instead.
+
+Fixes: 966a967116e69 ("smp: Avoid using two cache lines for struct call_single_data")
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/linux/blkdev.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/include/linux/blkdev.h
++++ b/include/linux/blkdev.h
+@@ -135,7 +135,7 @@ typedef __u32 __bitwise req_flags_t;
+ struct request {
+ struct list_head queuelist;
+ union {
+- call_single_data_t csd;
++ struct __call_single_data csd;
+ u64 fifo_time;
+ };
+
--- /dev/null
+From 61d2f2a05765a5f57149efbd93e3e81a83cbc2c1 Mon Sep 17 00:00:00 2001
+From: Chen-Yu Tsai <wens@csie.org>
+Date: Mon, 18 Dec 2017 11:57:51 +0800
+Subject: clk: sunxi: sun9i-mmc: Implement reset callback for reset controls
+
+From: Chen-Yu Tsai <wens@csie.org>
+
+commit 61d2f2a05765a5f57149efbd93e3e81a83cbc2c1 upstream.
+
+Our MMC host driver now issues a reset, instead of just deasserting
+the reset control, since commit c34eda69ad4c ("mmc: sunxi: Reset the
+device at probe time"). The sun9i-mmc clock driver does not support
+this, and will fail, which results in MMC not probing.
+
+This patch implements the reset callback by asserting the reset control,
+then deasserting it after a small delay.
+
+Fixes: 7a6fca879f59 ("clk: sunxi: Add driver for A80 MMC config clocks/resets")
+Signed-off-by: Chen-Yu Tsai <wens@csie.org>
+Acked-by: Philipp Zabel <p.zabel@pengutronix.de>
+Acked-by: Maxime Ripard <maxime.ripard@free-electrons.com>
+Signed-off-by: Michael Turquette <mturquette@baylibre.com>
+Link: lkml.kernel.org/r/20171218035751.20661-1-wens@csie.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/clk/sunxi/clk-sun9i-mmc.c | 12 ++++++++++++
+ 1 file changed, 12 insertions(+)
+
+--- a/drivers/clk/sunxi/clk-sun9i-mmc.c
++++ b/drivers/clk/sunxi/clk-sun9i-mmc.c
+@@ -16,6 +16,7 @@
+
+ #include <linux/clk.h>
+ #include <linux/clk-provider.h>
++#include <linux/delay.h>
+ #include <linux/init.h>
+ #include <linux/of.h>
+ #include <linux/of_device.h>
+@@ -83,9 +84,20 @@ static int sun9i_mmc_reset_deassert(stru
+ return 0;
+ }
+
++static int sun9i_mmc_reset_reset(struct reset_controller_dev *rcdev,
++ unsigned long id)
++{
++ sun9i_mmc_reset_assert(rcdev, id);
++ udelay(10);
++ sun9i_mmc_reset_deassert(rcdev, id);
++
++ return 0;
++}
++
+ static const struct reset_control_ops sun9i_mmc_reset_ops = {
+ .assert = sun9i_mmc_reset_assert,
+ .deassert = sun9i_mmc_reset_deassert,
++ .reset = sun9i_mmc_reset_reset,
+ };
+
+ static int sun9i_a80_mmc_config_clk_probe(struct platform_device *pdev)
--- /dev/null
+From d53c5135792319e095bb126bc43b2ee98586f7fe Mon Sep 17 00:00:00 2001
+From: Stephan Mueller <smueller@chronox.de>
+Date: Fri, 8 Dec 2017 11:50:37 +0100
+Subject: crypto: af_alg - fix race accessing cipher request
+
+From: Stephan Mueller <smueller@chronox.de>
+
+commit d53c5135792319e095bb126bc43b2ee98586f7fe upstream.
+
+When invoking an asynchronous cipher operation, the invocation of the
+callback may be performed before the subsequent operations in the
+initial code path are invoked. The callback deletes the cipher request
+data structure which implies that after the invocation of the
+asynchronous cipher operation, this data structure must not be accessed
+any more.
+
+The setting of the return code size with the request data structure must
+therefore be moved before the invocation of the asynchronous cipher
+operation.
+
+Fixes: e870456d8e7c ("crypto: algif_skcipher - overhaul memory management")
+Fixes: d887c52d6ae4 ("crypto: algif_aead - overhaul memory management")
+Reported-by: syzbot <syzkaller@googlegroups.com>
+Signed-off-by: Stephan Mueller <smueller@chronox.de>
+Acked-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
+Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ crypto/algif_aead.c | 10 +++++-----
+ crypto/algif_skcipher.c | 10 +++++-----
+ 2 files changed, 10 insertions(+), 10 deletions(-)
+
+--- a/crypto/algif_aead.c
++++ b/crypto/algif_aead.c
+@@ -291,6 +291,10 @@ static int _aead_recvmsg(struct socket *
+ /* AIO operation */
+ sock_hold(sk);
+ areq->iocb = msg->msg_iocb;
++
++ /* Remember output size that will be generated. */
++ areq->outlen = outlen;
++
+ aead_request_set_callback(&areq->cra_u.aead_req,
+ CRYPTO_TFM_REQ_MAY_BACKLOG,
+ af_alg_async_cb, areq);
+@@ -298,12 +302,8 @@ static int _aead_recvmsg(struct socket *
+ crypto_aead_decrypt(&areq->cra_u.aead_req);
+
+ /* AIO operation in progress */
+- if (err == -EINPROGRESS || err == -EBUSY) {
+- /* Remember output size that will be generated. */
+- areq->outlen = outlen;
+-
++ if (err == -EINPROGRESS || err == -EBUSY)
+ return -EIOCBQUEUED;
+- }
+
+ sock_put(sk);
+ } else {
+--- a/crypto/algif_skcipher.c
++++ b/crypto/algif_skcipher.c
+@@ -125,6 +125,10 @@ static int _skcipher_recvmsg(struct sock
+ /* AIO operation */
+ sock_hold(sk);
+ areq->iocb = msg->msg_iocb;
++
++ /* Remember output size that will be generated. */
++ areq->outlen = len;
++
+ skcipher_request_set_callback(&areq->cra_u.skcipher_req,
+ CRYPTO_TFM_REQ_MAY_SLEEP,
+ af_alg_async_cb, areq);
+@@ -133,12 +137,8 @@ static int _skcipher_recvmsg(struct sock
+ crypto_skcipher_decrypt(&areq->cra_u.skcipher_req);
+
+ /* AIO operation in progress */
+- if (err == -EINPROGRESS || err == -EBUSY) {
+- /* Remember output size that will be generated. */
+- areq->outlen = len;
+-
++ if (err == -EINPROGRESS || err == -EBUSY)
+ return -EIOCBQUEUED;
+- }
+
+ sock_put(sk);
+ } else {
--- /dev/null
+From 11edb555966ed2c66c533d17c604f9d7e580a829 Mon Sep 17 00:00:00 2001
+From: Stephan Mueller <smueller@chronox.de>
+Date: Wed, 29 Nov 2017 12:02:23 +0100
+Subject: crypto: af_alg - wait for data at beginning of recvmsg
+
+From: Stephan Mueller <smueller@chronox.de>
+
+commit 11edb555966ed2c66c533d17c604f9d7e580a829 upstream.
+
+The wait for data is a non-atomic operation that can sleep and therefore
+potentially release the socket lock. The release of the socket lock
+allows another thread to modify the context data structure. The waiting
+operation for new data therefore must be called at the beginning of
+recvmsg. This prevents a race condition where checks of the members of
+the context data structure are performed by recvmsg while there is a
+potential for modification of these values.
+
+Fixes: e870456d8e7c ("crypto: algif_skcipher - overhaul memory management")
+Fixes: d887c52d6ae4 ("crypto: algif_aead - overhaul memory management")
+Reported-by: syzbot <syzkaller@googlegroups.com>
+Signed-off-by: Stephan Mueller <smueller@chronox.de>
+Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ crypto/af_alg.c | 6 ------
+ crypto/algif_aead.c | 6 ++++++
+ crypto/algif_skcipher.c | 6 ++++++
+ 3 files changed, 12 insertions(+), 6 deletions(-)
+
+--- a/crypto/af_alg.c
++++ b/crypto/af_alg.c
+@@ -1165,12 +1165,6 @@ int af_alg_get_rsgl(struct sock *sk, str
+ if (!af_alg_readable(sk))
+ break;
+
+- if (!ctx->used) {
+- err = af_alg_wait_for_data(sk, flags);
+- if (err)
+- return err;
+- }
+-
+ seglen = min_t(size_t, (maxsize - len),
+ msg_data_left(msg));
+
+--- a/crypto/algif_aead.c
++++ b/crypto/algif_aead.c
+@@ -111,6 +111,12 @@ static int _aead_recvmsg(struct socket *
+ size_t usedpages = 0; /* [in] RX bufs to be used from user */
+ size_t processed = 0; /* [in] TX bufs to be consumed */
+
++ if (!ctx->used) {
++ err = af_alg_wait_for_data(sk, flags);
++ if (err)
++ return err;
++ }
++
+ /*
+ * Data length provided by caller via sendmsg/sendpage that has not
+ * yet been processed.
+--- a/crypto/algif_skcipher.c
++++ b/crypto/algif_skcipher.c
+@@ -72,6 +72,12 @@ static int _skcipher_recvmsg(struct sock
+ int err = 0;
+ size_t len = 0;
+
++ if (!ctx->used) {
++ err = af_alg_wait_for_data(sk, flags);
++ if (err)
++ return err;
++ }
++
+ /* Allocate cipher request for current operation. */
+ areq = af_alg_alloc_areq(sk, sizeof(struct af_alg_async_req) +
+ crypto_skcipher_reqsize(tfm));
--- /dev/null
+From 9abffc6f2efe46c3564c04312e52e07622d40e51 Mon Sep 17 00:00:00 2001
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Date: Thu, 30 Nov 2017 13:39:27 +0100
+Subject: crypto: mcryptd - protect the per-CPU queue with a lock
+
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+
+commit 9abffc6f2efe46c3564c04312e52e07622d40e51 upstream.
+
+mcryptd_enqueue_request() grabs the per-CPU queue struct and protects
+access to it with disabled preemption. Then it schedules a worker on the
+same CPU. The worker in mcryptd_queue_worker() guards access to the same
+per-CPU variable with disabled preemption.
+
+If we take CPU-hotplug into account then it is possible that between
+queue_work_on() and the actual invocation of the worker the CPU goes
+down and the worker will be scheduled on _another_ CPU. And here the
+preempt_disable() protection does not work anymore. The easiest thing is
+to add a spin_lock() to guard access to the list.
+
+Another detail: mcryptd_queue_worker() is not processing more than
+MCRYPTD_BATCH invocation in a row. If there are still items left, then
+it will invoke queue_work() to proceed with more later. *I* would
+suggest to simply drop that check because it does not use a system
+workqueue and the workqueue is already marked as "CPU_INTENSIVE". And if
+preemption is required then the scheduler should do it.
+However if queue_work() is used then the work item is marked as CPU
+unbound. That means it will try to run on the local CPU but it may run
+on another CPU as well. Especially with CONFIG_DEBUG_WQ_FORCE_RR_CPU=y.
+Again, the preempt_disable() won't work here but lock which was
+introduced will help.
+In order to keep work-item on the local CPU (and avoid RR) I changed it
+to queue_work_on().
+
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ crypto/mcryptd.c | 23 ++++++++++-------------
+ include/crypto/mcryptd.h | 1 +
+ 2 files changed, 11 insertions(+), 13 deletions(-)
+
+--- a/crypto/mcryptd.c
++++ b/crypto/mcryptd.c
+@@ -81,6 +81,7 @@ static int mcryptd_init_queue(struct mcr
+ pr_debug("cpu_queue #%d %p\n", cpu, queue->cpu_queue);
+ crypto_init_queue(&cpu_queue->queue, max_cpu_qlen);
+ INIT_WORK(&cpu_queue->work, mcryptd_queue_worker);
++ spin_lock_init(&cpu_queue->q_lock);
+ }
+ return 0;
+ }
+@@ -104,15 +105,16 @@ static int mcryptd_enqueue_request(struc
+ int cpu, err;
+ struct mcryptd_cpu_queue *cpu_queue;
+
+- cpu = get_cpu();
+- cpu_queue = this_cpu_ptr(queue->cpu_queue);
+- rctx->tag.cpu = cpu;
++ cpu_queue = raw_cpu_ptr(queue->cpu_queue);
++ spin_lock(&cpu_queue->q_lock);
++ cpu = smp_processor_id();
++ rctx->tag.cpu = smp_processor_id();
+
+ err = crypto_enqueue_request(&cpu_queue->queue, request);
+ pr_debug("enqueue request: cpu %d cpu_queue %p request %p\n",
+ cpu, cpu_queue, request);
++ spin_unlock(&cpu_queue->q_lock);
+ queue_work_on(cpu, kcrypto_wq, &cpu_queue->work);
+- put_cpu();
+
+ return err;
+ }
+@@ -161,16 +163,11 @@ static void mcryptd_queue_worker(struct
+ cpu_queue = container_of(work, struct mcryptd_cpu_queue, work);
+ i = 0;
+ while (i < MCRYPTD_BATCH || single_task_running()) {
+- /*
+- * preempt_disable/enable is used to prevent
+- * being preempted by mcryptd_enqueue_request()
+- */
+- local_bh_disable();
+- preempt_disable();
++
++ spin_lock_bh(&cpu_queue->q_lock);
+ backlog = crypto_get_backlog(&cpu_queue->queue);
+ req = crypto_dequeue_request(&cpu_queue->queue);
+- preempt_enable();
+- local_bh_enable();
++ spin_unlock_bh(&cpu_queue->q_lock);
+
+ if (!req) {
+ mcryptd_opportunistic_flush();
+@@ -185,7 +182,7 @@ static void mcryptd_queue_worker(struct
+ ++i;
+ }
+ if (cpu_queue->queue.qlen)
+- queue_work(kcrypto_wq, &cpu_queue->work);
++ queue_work_on(smp_processor_id(), kcrypto_wq, &cpu_queue->work);
+ }
+
+ void mcryptd_flusher(struct work_struct *__work)
+--- a/include/crypto/mcryptd.h
++++ b/include/crypto/mcryptd.h
+@@ -27,6 +27,7 @@ static inline struct mcryptd_ahash *__mc
+
+ struct mcryptd_cpu_queue {
+ struct crypto_queue queue;
++ spinlock_t q_lock;
+ struct work_struct work;
+ };
+
--- /dev/null
+From 2b4f27c36bcd46e820ddb9a8e6fe6a63fa4250b8 Mon Sep 17 00:00:00 2001
+From: Eric Biggers <ebiggers@google.com>
+Date: Wed, 29 Nov 2017 01:18:57 -0800
+Subject: crypto: skcipher - set walk.iv for zero-length inputs
+
+From: Eric Biggers <ebiggers@google.com>
+
+commit 2b4f27c36bcd46e820ddb9a8e6fe6a63fa4250b8 upstream.
+
+All the ChaCha20 algorithms as well as the ARM bit-sliced AES-XTS
+algorithms call skcipher_walk_virt(), then access the IV (walk.iv)
+before checking whether any bytes need to be processed (walk.nbytes).
+
+But if the input is empty, then skcipher_walk_virt() doesn't set the IV,
+and the algorithms crash trying to use the uninitialized IV pointer.
+
+Fix it by setting the IV earlier in skcipher_walk_virt(). Also fix it
+for the AEAD walk functions.
+
+This isn't a perfect solution because we can't actually align the IV to
+->cra_alignmask unless there are bytes to process, for one because the
+temporary buffer for the aligned IV is freed by skcipher_walk_done(),
+which is only called when there are bytes to process. Thus, algorithms
+that require aligned IVs will still need to avoid accessing the IV when
+walk.nbytes == 0. Still, many algorithms/architectures are fine with
+IVs having any alignment, and even for those that aren't, a misaligned
+pointer bug is much less severe than an uninitialized pointer bug.
+
+This change also matches the behavior of the older blkcipher_walk API.
+
+Fixes: 0cabf2af6f5a ("crypto: skcipher - Fix crash on zero-length input")
+Reported-by: syzbot <syzkaller@googlegroups.com>
+Signed-off-by: Eric Biggers <ebiggers@google.com>
+Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ crypto/skcipher.c | 10 ++++------
+ 1 file changed, 4 insertions(+), 6 deletions(-)
+
+--- a/crypto/skcipher.c
++++ b/crypto/skcipher.c
+@@ -449,6 +449,8 @@ static int skcipher_walk_skcipher(struct
+
+ walk->total = req->cryptlen;
+ walk->nbytes = 0;
++ walk->iv = req->iv;
++ walk->oiv = req->iv;
+
+ if (unlikely(!walk->total))
+ return 0;
+@@ -456,9 +458,6 @@ static int skcipher_walk_skcipher(struct
+ scatterwalk_start(&walk->in, req->src);
+ scatterwalk_start(&walk->out, req->dst);
+
+- walk->iv = req->iv;
+- walk->oiv = req->iv;
+-
+ walk->flags &= ~SKCIPHER_WALK_SLEEP;
+ walk->flags |= req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ?
+ SKCIPHER_WALK_SLEEP : 0;
+@@ -510,6 +509,8 @@ static int skcipher_walk_aead_common(str
+ int err;
+
+ walk->nbytes = 0;
++ walk->iv = req->iv;
++ walk->oiv = req->iv;
+
+ if (unlikely(!walk->total))
+ return 0;
+@@ -525,9 +526,6 @@ static int skcipher_walk_aead_common(str
+ scatterwalk_done(&walk->in, 0, walk->total);
+ scatterwalk_done(&walk->out, 0, walk->total);
+
+- walk->iv = req->iv;
+- walk->oiv = req->iv;
+-
+ if (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP)
+ walk->flags |= SKCIPHER_WALK_SLEEP;
+ else
--- /dev/null
+From 2797c4a11f373b2545c2398ccb02e362ee66a142 Mon Sep 17 00:00:00 2001
+From: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Mon, 4 Dec 2017 13:25:13 +0000
+Subject: drm/i915: Flush pending GTT writes before unbinding
+
+From: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 2797c4a11f373b2545c2398ccb02e362ee66a142 upstream.
+
+From the shrinker paths, we want to relinquish the GPU and GGTT access to
+the object, releasing the backing storage back to the system for
+swapout. As a part of that process we would unpin the pages, marking
+them for access by the CPU (for the swapout/swapin). However, if that
+process was interrupted after unbind the vma, we missed a flush of the
+inflight GGTT writes before we made that GTT space available again for
+reuse, with the prospect that we would redirect them to another page.
+
+The bug dates back to the introduction of multiple GGTT vma, but the
+code itself dates to commit 02bef8f98d26 ("drm/i915: Unbind closed vma
+for i915_gem_object_unbind()").
+
+Fixes: 02bef8f98d26 ("drm/i915: Unbind closed vma for i915_gem_object_unbind()")
+Fixes: c5ad54cf7dd8 ("drm/i915: Use partial view in mmap fault handler")
+Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
+Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20171204132513.7303-1-chris@chris-wilson.co.uk
+(cherry picked from commit 5888fc9eac3c2ff96e76aeeb865fdb46ab2d711e)
+Signed-off-by: Jani Nikula <jani.nikula@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/gpu/drm/i915/i915_gem.c | 9 +--------
+ 1 file changed, 1 insertion(+), 8 deletions(-)
+
+--- a/drivers/gpu/drm/i915/i915_gem.c
++++ b/drivers/gpu/drm/i915/i915_gem.c
+@@ -325,17 +325,10 @@ int i915_gem_object_unbind(struct drm_i9
+ * must wait for all rendering to complete to the object (as unbinding
+ * must anyway), and retire the requests.
+ */
+- ret = i915_gem_object_wait(obj,
+- I915_WAIT_INTERRUPTIBLE |
+- I915_WAIT_LOCKED |
+- I915_WAIT_ALL,
+- MAX_SCHEDULE_TIMEOUT,
+- NULL);
++ ret = i915_gem_object_set_to_cpu_domain(obj, false);
+ if (ret)
+ return ret;
+
+- i915_gem_retire_requests(to_i915(obj->base.dev));
+-
+ while ((vma = list_first_entry_or_null(&obj->vma_list,
+ struct i915_vma,
+ obj_link))) {
--- /dev/null
+From 92411f6d7f1afcc95e54295d40e96a75385212ec Mon Sep 17 00:00:00 2001
+From: Maxime Ripard <maxime.ripard@free-electrons.com>
+Date: Thu, 7 Dec 2017 16:58:50 +0100
+Subject: drm/sun4i: Fix error path handling
+
+From: Maxime Ripard <maxime.ripard@free-electrons.com>
+
+commit 92411f6d7f1afcc95e54295d40e96a75385212ec upstream.
+
+The commit 4c7f16d14a33 ("drm/sun4i: Fix TCON clock and regmap
+initialization sequence") moved a bunch of logic around, but forgot to
+update the gotos after the introduction of the err_free_dotclock label.
+
+It means that if we fail later that the one introduced in that commit,
+we'll just to the old label which isn't free the clock we created. This
+will result in a breakage as soon as someone tries to do something with
+that clock, since its resources will have been long reclaimed.
+
+Fixes: 4c7f16d14a33 ("drm/sun4i: Fix TCON clock and regmap initialization sequence")
+Reviewed-by: Chen-Yu Tsai <wens@csie.org>
+Signed-off-by: Maxime Ripard <maxime.ripard@free-electrons.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/f83c1cebc731f0b4251f5ddd7b38c718cd79bb0b.1512662253.git-series.maxime.ripard@free-electrons.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/gpu/drm/sun4i/sun4i_tcon.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/gpu/drm/sun4i/sun4i_tcon.c
++++ b/drivers/gpu/drm/sun4i/sun4i_tcon.c
+@@ -567,12 +567,12 @@ static int sun4i_tcon_bind(struct device
+ if (IS_ERR(tcon->crtc)) {
+ dev_err(dev, "Couldn't create our CRTC\n");
+ ret = PTR_ERR(tcon->crtc);
+- goto err_free_clocks;
++ goto err_free_dotclock;
+ }
+
+ ret = sun4i_rgb_init(drm, tcon);
+ if (ret < 0)
+- goto err_free_clocks;
++ goto err_free_dotclock;
+
+ list_add_tail(&tcon->list, &drv->tcon_list);
+
--- /dev/null
+From 613e396bc0d4c7604fba23256644e78454c68cf6 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Sun, 17 Dec 2017 10:56:29 +0100
+Subject: init: Invoke init_espfix_bsp() from mm_init()
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+commit 613e396bc0d4c7604fba23256644e78454c68cf6 upstream.
+
+init_espfix_bsp() needs to be invoked before the page table isolation
+initialization. Move it into mm_init() which is the place where pti_init()
+will be added.
+
+While at it get rid of the #ifdeffery and provide proper stub functions.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/include/asm/espfix.h | 7 ++++---
+ arch/x86/kernel/smpboot.c | 6 +-----
+ include/asm-generic/pgtable.h | 5 +++++
+ init/main.c | 6 ++----
+ 4 files changed, 12 insertions(+), 12 deletions(-)
+
+--- a/arch/x86/include/asm/espfix.h
++++ b/arch/x86/include/asm/espfix.h
+@@ -2,7 +2,7 @@
+ #ifndef _ASM_X86_ESPFIX_H
+ #define _ASM_X86_ESPFIX_H
+
+-#ifdef CONFIG_X86_64
++#ifdef CONFIG_X86_ESPFIX64
+
+ #include <asm/percpu.h>
+
+@@ -11,7 +11,8 @@ DECLARE_PER_CPU_READ_MOSTLY(unsigned lon
+
+ extern void init_espfix_bsp(void);
+ extern void init_espfix_ap(int cpu);
+-
+-#endif /* CONFIG_X86_64 */
++#else
++static inline void init_espfix_ap(int cpu) { }
++#endif
+
+ #endif /* _ASM_X86_ESPFIX_H */
+--- a/arch/x86/kernel/smpboot.c
++++ b/arch/x86/kernel/smpboot.c
+@@ -990,12 +990,8 @@ static int do_boot_cpu(int apicid, int c
+ initial_code = (unsigned long)start_secondary;
+ initial_stack = idle->thread.sp;
+
+- /*
+- * Enable the espfix hack for this CPU
+- */
+-#ifdef CONFIG_X86_ESPFIX64
++ /* Enable the espfix hack for this CPU */
+ init_espfix_ap(cpu);
+-#endif
+
+ /* So we see what's up */
+ announce_cpu(cpu, apicid);
+--- a/include/asm-generic/pgtable.h
++++ b/include/asm-generic/pgtable.h
+@@ -1025,6 +1025,11 @@ static inline int pmd_clear_huge(pmd_t *
+ struct file;
+ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
+ unsigned long size, pgprot_t *vma_prot);
++
++#ifndef CONFIG_X86_ESPFIX64
++static inline void init_espfix_bsp(void) { }
++#endif
++
+ #endif /* !__ASSEMBLY__ */
+
+ #ifndef io_remap_pfn_range
+--- a/init/main.c
++++ b/init/main.c
+@@ -504,6 +504,8 @@ static void __init mm_init(void)
+ pgtable_init();
+ vmalloc_init();
+ ioremap_huge_init();
++ /* Should be run before the first non-init thread is created */
++ init_espfix_bsp();
+ }
+
+ asmlinkage __visible void __init start_kernel(void)
+@@ -674,10 +676,6 @@ asmlinkage __visible void __init start_k
+ if (efi_enabled(EFI_RUNTIME_SERVICES))
+ efi_enter_virtual_mode();
+ #endif
+-#ifdef CONFIG_X86_ESPFIX64
+- /* Should be run before the first non-init thread is created */
+- init_espfix_bsp();
+-#endif
+ thread_stack_cache_init();
+ cred_init();
+ fork_init();
--- /dev/null
+From 7839c672e58bf62da8f2f0197fefb442c02ba1dd Mon Sep 17 00:00:00 2001
+From: Marc Zyngier <marc.zyngier@arm.com>
+Date: Thu, 7 Dec 2017 11:45:45 +0000
+Subject: KVM: arm/arm64: Fix HYP unmapping going off limits
+
+From: Marc Zyngier <marc.zyngier@arm.com>
+
+commit 7839c672e58bf62da8f2f0197fefb442c02ba1dd upstream.
+
+When we unmap the HYP memory, we try to be clever and unmap one
+PGD at a time. If we start with a non-PGD aligned address and try
+to unmap a whole PGD, things go horribly wrong in unmap_hyp_range
+(addr and end can never match, and it all goes really badly as we
+keep incrementing pgd and parse random memory as page tables...).
+
+The obvious fix is to let unmap_hyp_range do what it does best,
+which is to iterate over a range.
+
+The size of the linear mapping, which begins at PAGE_OFFSET, can be
+easily calculated by subtracting PAGE_OFFSET form high_memory, because
+high_memory is defined as the linear map address of the last byte of
+DRAM, plus one.
+
+The size of the vmalloc region is given trivially by VMALLOC_END -
+VMALLOC_START.
+
+Reported-by: Andre Przywara <andre.przywara@arm.com>
+Tested-by: Andre Przywara <andre.przywara@arm.com>
+Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
+Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
+Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ virt/kvm/arm/mmu.c | 10 ++++------
+ 1 file changed, 4 insertions(+), 6 deletions(-)
+
+--- a/virt/kvm/arm/mmu.c
++++ b/virt/kvm/arm/mmu.c
+@@ -509,8 +509,6 @@ static void unmap_hyp_range(pgd_t *pgdp,
+ */
+ void free_hyp_pgds(void)
+ {
+- unsigned long addr;
+-
+ mutex_lock(&kvm_hyp_pgd_mutex);
+
+ if (boot_hyp_pgd) {
+@@ -521,10 +519,10 @@ void free_hyp_pgds(void)
+
+ if (hyp_pgd) {
+ unmap_hyp_range(hyp_pgd, hyp_idmap_start, PAGE_SIZE);
+- for (addr = PAGE_OFFSET; virt_addr_valid(addr); addr += PGDIR_SIZE)
+- unmap_hyp_range(hyp_pgd, kern_hyp_va(addr), PGDIR_SIZE);
+- for (addr = VMALLOC_START; is_vmalloc_addr((void*)addr); addr += PGDIR_SIZE)
+- unmap_hyp_range(hyp_pgd, kern_hyp_va(addr), PGDIR_SIZE);
++ unmap_hyp_range(hyp_pgd, kern_hyp_va(PAGE_OFFSET),
++ (uintptr_t)high_memory - PAGE_OFFSET);
++ unmap_hyp_range(hyp_pgd, kern_hyp_va(VMALLOC_START),
++ VMALLOC_END - VMALLOC_START);
+
+ free_pages((unsigned long)hyp_pgd, hyp_pgd_order);
+ hyp_pgd = NULL;
--- /dev/null
+From ed52870f4676489124d8697fd00e6ae6c504e586 Mon Sep 17 00:00:00 2001
+From: Wanpeng Li <wanpeng.li@hotmail.com>
+Date: Mon, 4 Dec 2017 22:21:30 -0800
+Subject: KVM: MMU: Fix infinite loop when there is no available mmu page
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Wanpeng Li <wanpeng.li@hotmail.com>
+
+commit ed52870f4676489124d8697fd00e6ae6c504e586 upstream.
+
+The below test case can cause infinite loop in kvm when ept=0.
+
+ #include <unistd.h>
+ #include <sys/syscall.h>
+ #include <string.h>
+ #include <stdint.h>
+ #include <linux/kvm.h>
+ #include <fcntl.h>
+ #include <sys/ioctl.h>
+
+ long r[5];
+ int main()
+ {
+ r[2] = open("/dev/kvm", O_RDONLY);
+ r[3] = ioctl(r[2], KVM_CREATE_VM, 0);
+ r[4] = ioctl(r[3], KVM_CREATE_VCPU, 7);
+ ioctl(r[4], KVM_RUN, 0);
+ }
+
+It doesn't setup the memory regions, mmu_alloc_shadow/direct_roots() in
+kvm return 1 when kvm fails to allocate root page table which can result
+in beblow infinite loop:
+
+ vcpu_run() {
+ for (;;) {
+ r = vcpu_enter_guest()::kvm_mmu_reload() returns 1
+ if (r <= 0)
+ break;
+ if (need_resched())
+ cond_resched();
+ }
+ }
+
+This patch fixes it by returning -ENOSPC when there is no available kvm mmu
+page for root page table.
+
+Cc: Paolo Bonzini <pbonzini@redhat.com>
+Cc: Radim Krčmář <rkrcmar@redhat.com>
+Fixes: 26eeb53cf0f (KVM: MMU: Bail out immediately if there is no available mmu page)
+Signed-off-by: Wanpeng Li <wanpeng.li@hotmail.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/kvm/mmu.c | 8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+--- a/arch/x86/kvm/mmu.c
++++ b/arch/x86/kvm/mmu.c
+@@ -3382,7 +3382,7 @@ static int mmu_alloc_direct_roots(struct
+ spin_lock(&vcpu->kvm->mmu_lock);
+ if(make_mmu_pages_available(vcpu) < 0) {
+ spin_unlock(&vcpu->kvm->mmu_lock);
+- return 1;
++ return -ENOSPC;
+ }
+ sp = kvm_mmu_get_page(vcpu, 0, 0,
+ vcpu->arch.mmu.shadow_root_level, 1, ACC_ALL);
+@@ -3397,7 +3397,7 @@ static int mmu_alloc_direct_roots(struct
+ spin_lock(&vcpu->kvm->mmu_lock);
+ if (make_mmu_pages_available(vcpu) < 0) {
+ spin_unlock(&vcpu->kvm->mmu_lock);
+- return 1;
++ return -ENOSPC;
+ }
+ sp = kvm_mmu_get_page(vcpu, i << (30 - PAGE_SHIFT),
+ i << 30, PT32_ROOT_LEVEL, 1, ACC_ALL);
+@@ -3437,7 +3437,7 @@ static int mmu_alloc_shadow_roots(struct
+ spin_lock(&vcpu->kvm->mmu_lock);
+ if (make_mmu_pages_available(vcpu) < 0) {
+ spin_unlock(&vcpu->kvm->mmu_lock);
+- return 1;
++ return -ENOSPC;
+ }
+ sp = kvm_mmu_get_page(vcpu, root_gfn, 0,
+ vcpu->arch.mmu.shadow_root_level, 0, ACC_ALL);
+@@ -3474,7 +3474,7 @@ static int mmu_alloc_shadow_roots(struct
+ spin_lock(&vcpu->kvm->mmu_lock);
+ if (make_mmu_pages_available(vcpu) < 0) {
+ spin_unlock(&vcpu->kvm->mmu_lock);
+- return 1;
++ return -ENOSPC;
+ }
+ sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30, PT32_ROOT_LEVEL,
+ 0, ACC_ALL);
--- /dev/null
+From dc1c4165d189350cb51bdd3057deb6ecd164beda Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= <clg@kaod.org>
+Date: Tue, 12 Dec 2017 12:02:04 +0000
+Subject: KVM: PPC: Book3S: fix XIVE migration of pending interrupts
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Cédric Le Goater <clg@kaod.org>
+
+commit dc1c4165d189350cb51bdd3057deb6ecd164beda upstream.
+
+When restoring a pending interrupt, we are setting the Q bit to force
+a retrigger in xive_finish_unmask(). But we also need to force an EOI
+in this case to reach the same initial state : P=1, Q=0.
+
+This can be done by not setting 'old_p' for pending interrupts which
+will inform xive_finish_unmask() that an EOI needs to be sent.
+
+Fixes: 5af50993850a ("KVM: PPC: Book3S HV: Native usage of the XIVE interrupt controller")
+Suggested-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
+Signed-off-by: Cédric Le Goater <clg@kaod.org>
+Reviewed-by: Laurent Vivier <lvivier@redhat.com>
+Tested-by: Laurent Vivier <lvivier@redhat.com>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/powerpc/kvm/book3s_xive.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/arch/powerpc/kvm/book3s_xive.c
++++ b/arch/powerpc/kvm/book3s_xive.c
+@@ -1558,7 +1558,7 @@ static int xive_set_source(struct kvmppc
+
+ /*
+ * Restore P and Q. If the interrupt was pending, we
+- * force both P and Q, which will trigger a resend.
++ * force Q and !P, which will trigger a resend.
+ *
+ * That means that a guest that had both an interrupt
+ * pending (queued) and Q set will restore with only
+@@ -1566,7 +1566,7 @@ static int xive_set_source(struct kvmppc
+ * is perfectly fine as coalescing interrupts that haven't
+ * been presented yet is always allowed.
+ */
+- if (val & KVM_XICS_PRESENTED || val & KVM_XICS_PENDING)
++ if (val & KVM_XICS_PRESENTED && !(val & KVM_XICS_PENDING))
+ state->old_p = true;
+ if (val & KVM_XICS_QUEUED || val & KVM_XICS_PENDING)
+ state->old_q = true;
--- /dev/null
+From 7333b5aca412d6ad02667b5a513485838a91b136 Mon Sep 17 00:00:00 2001
+From: Laurent Vivier <lvivier@redhat.com>
+Date: Tue, 12 Dec 2017 18:23:56 +0100
+Subject: KVM: PPC: Book3S HV: Fix pending_pri value in kvmppc_xive_get_icp()
+
+From: Laurent Vivier <lvivier@redhat.com>
+
+commit 7333b5aca412d6ad02667b5a513485838a91b136 upstream.
+
+When we migrate a VM from a POWER8 host (XICS) to a POWER9 host
+(XICS-on-XIVE), we have an error:
+
+qemu-kvm: Unable to restore KVM interrupt controller state \
+ (0xff000000) for CPU 0: Invalid argument
+
+This is because kvmppc_xics_set_icp() checks the new state
+is internaly consistent, and especially:
+
+...
+ 1129 if (xisr == 0) {
+ 1130 if (pending_pri != 0xff)
+ 1131 return -EINVAL;
+...
+
+On the other side, kvmppc_xive_get_icp() doesn't set
+neither the pending_pri value, nor the xisr value (set to 0)
+(and kvmppc_xive_set_icp() ignores the pending_pri value)
+
+As xisr is 0, pending_pri must be set to 0xff.
+
+Fixes: 5af50993850a ("KVM: PPC: Book3S HV: Native usage of the XIVE interrupt controller")
+Signed-off-by: Laurent Vivier <lvivier@redhat.com>
+Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/powerpc/kvm/book3s_xive.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/arch/powerpc/kvm/book3s_xive.c
++++ b/arch/powerpc/kvm/book3s_xive.c
+@@ -725,7 +725,8 @@ u64 kvmppc_xive_get_icp(struct kvm_vcpu
+
+ /* Return the per-cpu state for state saving/migration */
+ return (u64)xc->cppr << KVM_REG_PPC_ICP_CPPR_SHIFT |
+- (u64)xc->mfrr << KVM_REG_PPC_ICP_MFRR_SHIFT;
++ (u64)xc->mfrr << KVM_REG_PPC_ICP_MFRR_SHIFT |
++ (u64)0xff << KVM_REG_PPC_ICP_PPRI_SHIFT;
+ }
+
+ int kvmppc_xive_set_icp(struct kvm_vcpu *vcpu, u64 icpval)
--- /dev/null
+From d73235d17ba63b53dc0e1051dbc10a1f1be91b71 Mon Sep 17 00:00:00 2001
+From: Wanpeng Li <wanpeng.li@hotmail.com>
+Date: Thu, 7 Dec 2017 00:30:08 -0800
+Subject: KVM: X86: Fix load RFLAGS w/o the fixed bit
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Wanpeng Li <wanpeng.li@hotmail.com>
+
+commit d73235d17ba63b53dc0e1051dbc10a1f1be91b71 upstream.
+
+ *** Guest State ***
+ CR0: actual=0x0000000000000030, shadow=0x0000000060000010, gh_mask=fffffffffffffff7
+ CR4: actual=0x0000000000002050, shadow=0x0000000000000000, gh_mask=ffffffffffffe871
+ CR3 = 0x00000000fffbc000
+ RSP = 0x0000000000000000 RIP = 0x0000000000000000
+ RFLAGS=0x00000000 DR7 = 0x0000000000000400
+ ^^^^^^^^^^
+
+The failed vmentry is triggered by the following testcase when ept=Y:
+
+ #include <unistd.h>
+ #include <sys/syscall.h>
+ #include <string.h>
+ #include <stdint.h>
+ #include <linux/kvm.h>
+ #include <fcntl.h>
+ #include <sys/ioctl.h>
+
+ long r[5];
+ int main()
+ {
+ r[2] = open("/dev/kvm", O_RDONLY);
+ r[3] = ioctl(r[2], KVM_CREATE_VM, 0);
+ r[4] = ioctl(r[3], KVM_CREATE_VCPU, 7);
+ struct kvm_regs regs = {
+ .rflags = 0,
+ };
+ ioctl(r[4], KVM_SET_REGS, ®s);
+ ioctl(r[4], KVM_RUN, 0);
+ }
+
+X86 RFLAGS bit 1 is fixed set, userspace can simply clearing bit 1
+of RFLAGS with KVM_SET_REGS ioctl which results in vmentry fails.
+This patch fixes it by oring X86_EFLAGS_FIXED during ioctl.
+
+Suggested-by: Jim Mattson <jmattson@google.com>
+Reviewed-by: David Hildenbrand <david@redhat.com>
+Reviewed-by: Quan Xu <quan.xu0@gmail.com>
+Cc: Paolo Bonzini <pbonzini@redhat.com>
+Cc: Radim Krčmář <rkrcmar@redhat.com>
+Cc: Jim Mattson <jmattson@google.com>
+Signed-off-by: Wanpeng Li <wanpeng.li@hotmail.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/kvm/x86.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -7359,7 +7359,7 @@ int kvm_arch_vcpu_ioctl_set_regs(struct
+ #endif
+
+ kvm_rip_write(vcpu, regs->rip);
+- kvm_set_rflags(vcpu, regs->rflags);
++ kvm_set_rflags(vcpu, regs->rflags | X86_EFLAGS_FIXED);
+
+ vcpu->arch.exception.pending = false;
+
--- /dev/null
+From fae1a3e775cca8c3a9e0eb34443b310871a15a92 Mon Sep 17 00:00:00 2001
+From: Paolo Bonzini <pbonzini@redhat.com>
+Date: Thu, 21 Dec 2017 00:49:14 +0100
+Subject: kvm: x86: fix RSM when PCID is non-zero
+
+From: Paolo Bonzini <pbonzini@redhat.com>
+
+commit fae1a3e775cca8c3a9e0eb34443b310871a15a92 upstream.
+
+rsm_load_state_64() and rsm_enter_protected_mode() load CR3, then
+CR4 & ~PCIDE, then CR0, then CR4.
+
+However, setting CR4.PCIDE fails if CR3[11:0] != 0. It's probably easier
+in the long run to replace rsm_enter_protected_mode() with an emulator
+callback that sets all the special registers (like KVM_SET_SREGS would
+do). For now, set the PCID field of CR3 only after CR4.PCIDE is 1.
+
+Reported-by: Laszlo Ersek <lersek@redhat.com>
+Tested-by: Laszlo Ersek <lersek@redhat.com>
+Fixes: 660a5d517aaab9187f93854425c4c63f4a09195c
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/kvm/emulate.c | 32 +++++++++++++++++++++++++-------
+ 1 file changed, 25 insertions(+), 7 deletions(-)
+
+--- a/arch/x86/kvm/emulate.c
++++ b/arch/x86/kvm/emulate.c
+@@ -2404,9 +2404,21 @@ static int rsm_load_seg_64(struct x86_em
+ }
+
+ static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt,
+- u64 cr0, u64 cr4)
++ u64 cr0, u64 cr3, u64 cr4)
+ {
+ int bad;
++ u64 pcid;
++
++ /* In order to later set CR4.PCIDE, CR3[11:0] must be zero. */
++ pcid = 0;
++ if (cr4 & X86_CR4_PCIDE) {
++ pcid = cr3 & 0xfff;
++ cr3 &= ~0xfff;
++ }
++
++ bad = ctxt->ops->set_cr(ctxt, 3, cr3);
++ if (bad)
++ return X86EMUL_UNHANDLEABLE;
+
+ /*
+ * First enable PAE, long mode needs it before CR0.PG = 1 is set.
+@@ -2425,6 +2437,12 @@ static int rsm_enter_protected_mode(stru
+ bad = ctxt->ops->set_cr(ctxt, 4, cr4);
+ if (bad)
+ return X86EMUL_UNHANDLEABLE;
++ if (pcid) {
++ bad = ctxt->ops->set_cr(ctxt, 3, cr3 | pcid);
++ if (bad)
++ return X86EMUL_UNHANDLEABLE;
++ }
++
+ }
+
+ return X86EMUL_CONTINUE;
+@@ -2435,11 +2453,11 @@ static int rsm_load_state_32(struct x86_
+ struct desc_struct desc;
+ struct desc_ptr dt;
+ u16 selector;
+- u32 val, cr0, cr4;
++ u32 val, cr0, cr3, cr4;
+ int i;
+
+ cr0 = GET_SMSTATE(u32, smbase, 0x7ffc);
+- ctxt->ops->set_cr(ctxt, 3, GET_SMSTATE(u32, smbase, 0x7ff8));
++ cr3 = GET_SMSTATE(u32, smbase, 0x7ff8);
+ ctxt->eflags = GET_SMSTATE(u32, smbase, 0x7ff4) | X86_EFLAGS_FIXED;
+ ctxt->_eip = GET_SMSTATE(u32, smbase, 0x7ff0);
+
+@@ -2481,14 +2499,14 @@ static int rsm_load_state_32(struct x86_
+
+ ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smbase, 0x7ef8));
+
+- return rsm_enter_protected_mode(ctxt, cr0, cr4);
++ return rsm_enter_protected_mode(ctxt, cr0, cr3, cr4);
+ }
+
+ static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase)
+ {
+ struct desc_struct desc;
+ struct desc_ptr dt;
+- u64 val, cr0, cr4;
++ u64 val, cr0, cr3, cr4;
+ u32 base3;
+ u16 selector;
+ int i, r;
+@@ -2505,7 +2523,7 @@ static int rsm_load_state_64(struct x86_
+ ctxt->ops->set_dr(ctxt, 7, (val & DR7_VOLATILE) | DR7_FIXED_1);
+
+ cr0 = GET_SMSTATE(u64, smbase, 0x7f58);
+- ctxt->ops->set_cr(ctxt, 3, GET_SMSTATE(u64, smbase, 0x7f50));
++ cr3 = GET_SMSTATE(u64, smbase, 0x7f50);
+ cr4 = GET_SMSTATE(u64, smbase, 0x7f48);
+ ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smbase, 0x7f00));
+ val = GET_SMSTATE(u64, smbase, 0x7ed0);
+@@ -2533,7 +2551,7 @@ static int rsm_load_state_64(struct x86_
+ dt.address = GET_SMSTATE(u64, smbase, 0x7e68);
+ ctxt->ops->set_gdt(ctxt, &dt);
+
+- r = rsm_enter_protected_mode(ctxt, cr0, cr4);
++ r = rsm_enter_protected_mode(ctxt, cr0, cr3, cr4);
+ if (r != X86EMUL_CONTINUE)
+ return r;
+
--- /dev/null
+From 24e3a7fb60a9187e5df90e5fa655ffc94b9c4f77 Mon Sep 17 00:00:00 2001
+From: Vishal Verma <vishal.l.verma@intel.com>
+Date: Mon, 18 Dec 2017 09:28:39 -0700
+Subject: libnvdimm, btt: Fix an incompatibility in the log layout
+
+From: Vishal Verma <vishal.l.verma@intel.com>
+
+commit 24e3a7fb60a9187e5df90e5fa655ffc94b9c4f77 upstream.
+
+Due to a spec misinterpretation, the Linux implementation of the BTT log
+area had different padding scheme from other implementations, such as
+UEFI and NVML.
+
+This fixes the padding scheme, and defaults to it for new BTT layouts.
+We attempt to detect the padding scheme in use when probing for an
+existing BTT. If we detect the older/incompatible scheme, we continue
+using it.
+
+Reported-by: Juston Li <juston.li@intel.com>
+Cc: Dan Williams <dan.j.williams@intel.com>
+Fixes: 5212e11fde4d ("nd_btt: atomic sector updates")
+Signed-off-by: Vishal Verma <vishal.l.verma@intel.com>
+Signed-off-by: Dan Williams <dan.j.williams@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/nvdimm/btt.c | 201 ++++++++++++++++++++++++++++++++++++++++++---------
+ drivers/nvdimm/btt.h | 45 +++++++++++
+ 2 files changed, 211 insertions(+), 35 deletions(-)
+
+--- a/drivers/nvdimm/btt.c
++++ b/drivers/nvdimm/btt.c
+@@ -210,12 +210,12 @@ static int btt_map_read(struct arena_inf
+ return ret;
+ }
+
+-static int btt_log_read_pair(struct arena_info *arena, u32 lane,
+- struct log_entry *ent)
++static int btt_log_group_read(struct arena_info *arena, u32 lane,
++ struct log_group *log)
+ {
+ return arena_read_bytes(arena,
+- arena->logoff + (2 * lane * LOG_ENT_SIZE), ent,
+- 2 * LOG_ENT_SIZE, 0);
++ arena->logoff + (lane * LOG_GRP_SIZE), log,
++ LOG_GRP_SIZE, 0);
+ }
+
+ static struct dentry *debugfs_root;
+@@ -255,6 +255,8 @@ static void arena_debugfs_init(struct ar
+ debugfs_create_x64("logoff", S_IRUGO, d, &a->logoff);
+ debugfs_create_x64("info2off", S_IRUGO, d, &a->info2off);
+ debugfs_create_x32("flags", S_IRUGO, d, &a->flags);
++ debugfs_create_u32("log_index_0", S_IRUGO, d, &a->log_index[0]);
++ debugfs_create_u32("log_index_1", S_IRUGO, d, &a->log_index[1]);
+ }
+
+ static void btt_debugfs_init(struct btt *btt)
+@@ -273,6 +275,11 @@ static void btt_debugfs_init(struct btt
+ }
+ }
+
++static u32 log_seq(struct log_group *log, int log_idx)
++{
++ return le32_to_cpu(log->ent[log_idx].seq);
++}
++
+ /*
+ * This function accepts two log entries, and uses the
+ * sequence number to find the 'older' entry.
+@@ -282,8 +289,10 @@ static void btt_debugfs_init(struct btt
+ *
+ * TODO The logic feels a bit kludge-y. make it better..
+ */
+-static int btt_log_get_old(struct log_entry *ent)
++static int btt_log_get_old(struct arena_info *a, struct log_group *log)
+ {
++ int idx0 = a->log_index[0];
++ int idx1 = a->log_index[1];
+ int old;
+
+ /*
+@@ -291,23 +300,23 @@ static int btt_log_get_old(struct log_en
+ * the next time, the following logic works out to put this
+ * (next) entry into [1]
+ */
+- if (ent[0].seq == 0) {
+- ent[0].seq = cpu_to_le32(1);
++ if (log_seq(log, idx0) == 0) {
++ log->ent[idx0].seq = cpu_to_le32(1);
+ return 0;
+ }
+
+- if (ent[0].seq == ent[1].seq)
++ if (log_seq(log, idx0) == log_seq(log, idx1))
+ return -EINVAL;
+- if (le32_to_cpu(ent[0].seq) + le32_to_cpu(ent[1].seq) > 5)
++ if (log_seq(log, idx0) + log_seq(log, idx1) > 5)
+ return -EINVAL;
+
+- if (le32_to_cpu(ent[0].seq) < le32_to_cpu(ent[1].seq)) {
+- if (le32_to_cpu(ent[1].seq) - le32_to_cpu(ent[0].seq) == 1)
++ if (log_seq(log, idx0) < log_seq(log, idx1)) {
++ if ((log_seq(log, idx1) - log_seq(log, idx0)) == 1)
+ old = 0;
+ else
+ old = 1;
+ } else {
+- if (le32_to_cpu(ent[0].seq) - le32_to_cpu(ent[1].seq) == 1)
++ if ((log_seq(log, idx0) - log_seq(log, idx1)) == 1)
+ old = 1;
+ else
+ old = 0;
+@@ -327,17 +336,18 @@ static int btt_log_read(struct arena_inf
+ {
+ int ret;
+ int old_ent, ret_ent;
+- struct log_entry log[2];
++ struct log_group log;
+
+- ret = btt_log_read_pair(arena, lane, log);
++ ret = btt_log_group_read(arena, lane, &log);
+ if (ret)
+ return -EIO;
+
+- old_ent = btt_log_get_old(log);
++ old_ent = btt_log_get_old(arena, &log);
+ if (old_ent < 0 || old_ent > 1) {
+ dev_err(to_dev(arena),
+ "log corruption (%d): lane %d seq [%d, %d]\n",
+- old_ent, lane, log[0].seq, log[1].seq);
++ old_ent, lane, log.ent[arena->log_index[0]].seq,
++ log.ent[arena->log_index[1]].seq);
+ /* TODO set error state? */
+ return -EIO;
+ }
+@@ -345,7 +355,7 @@ static int btt_log_read(struct arena_inf
+ ret_ent = (old_flag ? old_ent : (1 - old_ent));
+
+ if (ent != NULL)
+- memcpy(ent, &log[ret_ent], LOG_ENT_SIZE);
++ memcpy(ent, &log.ent[arena->log_index[ret_ent]], LOG_ENT_SIZE);
+
+ return ret_ent;
+ }
+@@ -359,17 +369,13 @@ static int __btt_log_write(struct arena_
+ u32 sub, struct log_entry *ent, unsigned long flags)
+ {
+ int ret;
+- /*
+- * Ignore the padding in log_entry for calculating log_half.
+- * The entry is 'committed' when we write the sequence number,
+- * and we want to ensure that that is the last thing written.
+- * We don't bother writing the padding as that would be extra
+- * media wear and write amplification
+- */
+- unsigned int log_half = (LOG_ENT_SIZE - 2 * sizeof(u64)) / 2;
+- u64 ns_off = arena->logoff + (((2 * lane) + sub) * LOG_ENT_SIZE);
++ u32 group_slot = arena->log_index[sub];
++ unsigned int log_half = LOG_ENT_SIZE / 2;
+ void *src = ent;
++ u64 ns_off;
+
++ ns_off = arena->logoff + (lane * LOG_GRP_SIZE) +
++ (group_slot * LOG_ENT_SIZE);
+ /* split the 16B write into atomic, durable halves */
+ ret = arena_write_bytes(arena, ns_off, src, log_half, flags);
+ if (ret)
+@@ -452,7 +458,7 @@ static int btt_log_init(struct arena_inf
+ {
+ size_t logsize = arena->info2off - arena->logoff;
+ size_t chunk_size = SZ_4K, offset = 0;
+- struct log_entry log;
++ struct log_entry ent;
+ void *zerobuf;
+ int ret;
+ u32 i;
+@@ -484,11 +490,11 @@ static int btt_log_init(struct arena_inf
+ }
+
+ for (i = 0; i < arena->nfree; i++) {
+- log.lba = cpu_to_le32(i);
+- log.old_map = cpu_to_le32(arena->external_nlba + i);
+- log.new_map = cpu_to_le32(arena->external_nlba + i);
+- log.seq = cpu_to_le32(LOG_SEQ_INIT);
+- ret = __btt_log_write(arena, i, 0, &log, 0);
++ ent.lba = cpu_to_le32(i);
++ ent.old_map = cpu_to_le32(arena->external_nlba + i);
++ ent.new_map = cpu_to_le32(arena->external_nlba + i);
++ ent.seq = cpu_to_le32(LOG_SEQ_INIT);
++ ret = __btt_log_write(arena, i, 0, &ent, 0);
+ if (ret)
+ goto free;
+ }
+@@ -593,6 +599,123 @@ static int btt_freelist_init(struct aren
+ return 0;
+ }
+
++static bool ent_is_padding(struct log_entry *ent)
++{
++ return (ent->lba == 0) && (ent->old_map == 0) && (ent->new_map == 0)
++ && (ent->seq == 0);
++}
++
++/*
++ * Detecting valid log indices: We read a log group (see the comments in btt.h
++ * for a description of a 'log_group' and its 'slots'), and iterate over its
++ * four slots. We expect that a padding slot will be all-zeroes, and use this
++ * to detect a padding slot vs. an actual entry.
++ *
++ * If a log_group is in the initial state, i.e. hasn't been used since the
++ * creation of this BTT layout, it will have three of the four slots with
++ * zeroes. We skip over these log_groups for the detection of log_index. If
++ * all log_groups are in the initial state (i.e. the BTT has never been
++ * written to), it is safe to assume the 'new format' of log entries in slots
++ * (0, 1).
++ */
++static int log_set_indices(struct arena_info *arena)
++{
++ bool idx_set = false, initial_state = true;
++ int ret, log_index[2] = {-1, -1};
++ u32 i, j, next_idx = 0;
++ struct log_group log;
++ u32 pad_count = 0;
++
++ for (i = 0; i < arena->nfree; i++) {
++ ret = btt_log_group_read(arena, i, &log);
++ if (ret < 0)
++ return ret;
++
++ for (j = 0; j < 4; j++) {
++ if (!idx_set) {
++ if (ent_is_padding(&log.ent[j])) {
++ pad_count++;
++ continue;
++ } else {
++ /* Skip if index has been recorded */
++ if ((next_idx == 1) &&
++ (j == log_index[0]))
++ continue;
++ /* valid entry, record index */
++ log_index[next_idx] = j;
++ next_idx++;
++ }
++ if (next_idx == 2) {
++ /* two valid entries found */
++ idx_set = true;
++ } else if (next_idx > 2) {
++ /* too many valid indices */
++ return -ENXIO;
++ }
++ } else {
++ /*
++ * once the indices have been set, just verify
++ * that all subsequent log groups are either in
++ * their initial state or follow the same
++ * indices.
++ */
++ if (j == log_index[0]) {
++ /* entry must be 'valid' */
++ if (ent_is_padding(&log.ent[j]))
++ return -ENXIO;
++ } else if (j == log_index[1]) {
++ ;
++ /*
++ * log_index[1] can be padding if the
++ * lane never got used and it is still
++ * in the initial state (three 'padding'
++ * entries)
++ */
++ } else {
++ /* entry must be invalid (padding) */
++ if (!ent_is_padding(&log.ent[j]))
++ return -ENXIO;
++ }
++ }
++ }
++ /*
++ * If any of the log_groups have more than one valid,
++ * non-padding entry, then the we are no longer in the
++ * initial_state
++ */
++ if (pad_count < 3)
++ initial_state = false;
++ pad_count = 0;
++ }
++
++ if (!initial_state && !idx_set)
++ return -ENXIO;
++
++ /*
++ * If all the entries in the log were in the initial state,
++ * assume new padding scheme
++ */
++ if (initial_state)
++ log_index[1] = 1;
++
++ /*
++ * Only allow the known permutations of log/padding indices,
++ * i.e. (0, 1), and (0, 2)
++ */
++ if ((log_index[0] == 0) && ((log_index[1] == 1) || (log_index[1] == 2)))
++ ; /* known index possibilities */
++ else {
++ dev_err(to_dev(arena), "Found an unknown padding scheme\n");
++ return -ENXIO;
++ }
++
++ arena->log_index[0] = log_index[0];
++ arena->log_index[1] = log_index[1];
++ dev_dbg(to_dev(arena), "log_index_0 = %d\n", log_index[0]);
++ dev_dbg(to_dev(arena), "log_index_1 = %d\n", log_index[1]);
++ return 0;
++}
++
+ static int btt_rtt_init(struct arena_info *arena)
+ {
+ arena->rtt = kcalloc(arena->nfree, sizeof(u32), GFP_KERNEL);
+@@ -649,8 +772,7 @@ static struct arena_info *alloc_arena(st
+ available -= 2 * BTT_PG_SIZE;
+
+ /* The log takes a fixed amount of space based on nfree */
+- logsize = roundup(2 * arena->nfree * sizeof(struct log_entry),
+- BTT_PG_SIZE);
++ logsize = roundup(arena->nfree * LOG_GRP_SIZE, BTT_PG_SIZE);
+ available -= logsize;
+
+ /* Calculate optimal split between map and data area */
+@@ -667,6 +789,10 @@ static struct arena_info *alloc_arena(st
+ arena->mapoff = arena->dataoff + datasize;
+ arena->logoff = arena->mapoff + mapsize;
+ arena->info2off = arena->logoff + logsize;
++
++ /* Default log indices are (0,1) */
++ arena->log_index[0] = 0;
++ arena->log_index[1] = 1;
+ return arena;
+ }
+
+@@ -757,6 +883,13 @@ static int discover_arenas(struct btt *b
+ arena->external_lba_start = cur_nlba;
+ parse_arena_meta(arena, super, cur_off);
+
++ ret = log_set_indices(arena);
++ if (ret) {
++ dev_err(to_dev(arena),
++ "Unable to deduce log/padding indices\n");
++ goto out;
++ }
++
+ mutex_init(&arena->err_lock);
+ ret = btt_freelist_init(arena);
+ if (ret)
+--- a/drivers/nvdimm/btt.h
++++ b/drivers/nvdimm/btt.h
+@@ -27,6 +27,7 @@
+ #define MAP_ERR_MASK (1 << MAP_ERR_SHIFT)
+ #define MAP_LBA_MASK (~((1 << MAP_TRIM_SHIFT) | (1 << MAP_ERR_SHIFT)))
+ #define MAP_ENT_NORMAL 0xC0000000
++#define LOG_GRP_SIZE sizeof(struct log_group)
+ #define LOG_ENT_SIZE sizeof(struct log_entry)
+ #define ARENA_MIN_SIZE (1UL << 24) /* 16 MB */
+ #define ARENA_MAX_SIZE (1ULL << 39) /* 512 GB */
+@@ -50,12 +51,52 @@ enum btt_init_state {
+ INIT_READY
+ };
+
++/*
++ * A log group represents one log 'lane', and consists of four log entries.
++ * Two of the four entries are valid entries, and the remaining two are
++ * padding. Due to an old bug in the padding location, we need to perform a
++ * test to determine the padding scheme being used, and use that scheme
++ * thereafter.
++ *
++ * In kernels prior to 4.15, 'log group' would have actual log entries at
++ * indices (0, 2) and padding at indices (1, 3), where as the correct/updated
++ * format has log entries at indices (0, 1) and padding at indices (2, 3).
++ *
++ * Old (pre 4.15) format:
++ * +-----------------+-----------------+
++ * | ent[0] | ent[1] |
++ * | 16B | 16B |
++ * | lba/old/new/seq | pad |
++ * +-----------------------------------+
++ * | ent[2] | ent[3] |
++ * | 16B | 16B |
++ * | lba/old/new/seq | pad |
++ * +-----------------+-----------------+
++ *
++ * New format:
++ * +-----------------+-----------------+
++ * | ent[0] | ent[1] |
++ * | 16B | 16B |
++ * | lba/old/new/seq | lba/old/new/seq |
++ * +-----------------------------------+
++ * | ent[2] | ent[3] |
++ * | 16B | 16B |
++ * | pad | pad |
++ * +-----------------+-----------------+
++ *
++ * We detect during start-up which format is in use, and set
++ * arena->log_index[(0, 1)] with the detected format.
++ */
++
+ struct log_entry {
+ __le32 lba;
+ __le32 old_map;
+ __le32 new_map;
+ __le32 seq;
+- __le64 padding[2];
++};
++
++struct log_group {
++ struct log_entry ent[4];
+ };
+
+ struct btt_sb {
+@@ -125,6 +166,7 @@ struct aligned_lock {
+ * @list: List head for list of arenas
+ * @debugfs_dir: Debugfs dentry
+ * @flags: Arena flags - may signify error states.
++ * @log_index: Indices of the valid log entries in a log_group
+ *
+ * arena_info is a per-arena handle. Once an arena is narrowed down for an
+ * IO, this struct is passed around for the duration of the IO.
+@@ -157,6 +199,7 @@ struct arena_info {
+ /* Arena flags */
+ u32 flags;
+ struct mutex err_lock;
++ int log_index[2];
+ };
+
+ /**
--- /dev/null
+From 41fce90f26333c4fa82e8e43b9ace86c4e8a0120 Mon Sep 17 00:00:00 2001
+From: Dan Williams <dan.j.williams@intel.com>
+Date: Mon, 4 Dec 2017 14:07:43 -0800
+Subject: libnvdimm, dax: fix 1GB-aligned namespaces vs physical misalignment
+
+From: Dan Williams <dan.j.williams@intel.com>
+
+commit 41fce90f26333c4fa82e8e43b9ace86c4e8a0120 upstream.
+
+The following namespace configuration attempt:
+
+ # ndctl create-namespace -e namespace0.0 -m devdax -a 1G -f
+ libndctl: ndctl_dax_enable: dax0.1: failed to enable
+ Error: namespace0.0: failed to enable
+
+ failed to reconfigure namespace: No such device or address
+
+...fails when the backing memory range is not physically aligned to 1G:
+
+ # cat /proc/iomem | grep Persistent
+ 210000000-30fffffff : Persistent Memory (legacy)
+
+In the above example the 4G persistent memory range starts and ends on a
+256MB boundary.
+
+We handle this case correctly when needing to handle cases that violate
+section alignment (128MB) collisions against "System RAM", and we simply
+need to extend that padding/truncation for the 1GB alignment use case.
+
+Fixes: 315c562536c4 ("libnvdimm, pfn: add 'align' attribute...")
+Reported-and-tested-by: Jane Chu <jane.chu@oracle.com>
+Signed-off-by: Dan Williams <dan.j.williams@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/nvdimm/pfn_devs.c | 15 ++++++++++++---
+ 1 file changed, 12 insertions(+), 3 deletions(-)
+
+--- a/drivers/nvdimm/pfn_devs.c
++++ b/drivers/nvdimm/pfn_devs.c
+@@ -582,6 +582,12 @@ static struct vmem_altmap *__nvdimm_setu
+ return altmap;
+ }
+
++static u64 phys_pmem_align_down(struct nd_pfn *nd_pfn, u64 phys)
++{
++ return min_t(u64, PHYS_SECTION_ALIGN_DOWN(phys),
++ ALIGN_DOWN(phys, nd_pfn->align));
++}
++
+ static int nd_pfn_init(struct nd_pfn *nd_pfn)
+ {
+ u32 dax_label_reserve = is_nd_dax(&nd_pfn->dev) ? SZ_128K : 0;
+@@ -637,13 +643,16 @@ static int nd_pfn_init(struct nd_pfn *nd
+ start = nsio->res.start;
+ size = PHYS_SECTION_ALIGN_UP(start + size) - start;
+ if (region_intersects(start, size, IORESOURCE_SYSTEM_RAM,
+- IORES_DESC_NONE) == REGION_MIXED) {
++ IORES_DESC_NONE) == REGION_MIXED
++ || !IS_ALIGNED(start + resource_size(&nsio->res),
++ nd_pfn->align)) {
+ size = resource_size(&nsio->res);
+- end_trunc = start + size - PHYS_SECTION_ALIGN_DOWN(start + size);
++ end_trunc = start + size - phys_pmem_align_down(nd_pfn,
++ start + size);
+ }
+
+ if (start_pad + end_trunc)
+- dev_info(&nd_pfn->dev, "%s section collision, truncate %d bytes\n",
++ dev_info(&nd_pfn->dev, "%s alignment collision, truncate %d bytes\n",
+ dev_name(&ndns->dev), start_pad + end_trunc);
+
+ /*
--- /dev/null
+From 19deaa217bc04e83b59b5e8c8229eb0e53ad9efc Mon Sep 17 00:00:00 2001
+From: Dan Williams <dan.j.williams@intel.com>
+Date: Tue, 19 Dec 2017 15:07:10 -0800
+Subject: libnvdimm, pfn: fix start_pad handling for aligned namespaces
+
+From: Dan Williams <dan.j.williams@intel.com>
+
+commit 19deaa217bc04e83b59b5e8c8229eb0e53ad9efc upstream.
+
+The alignment checks at pfn driver startup fail to properly account for
+the 'start_pad' in the case where the namespace is misaligned relative
+to its internal alignment. This is typically triggered in 1G aligned
+namespace, but could theoretically trigger with small namespace
+alignments. When this triggers the kernel reports messages of the form:
+
+ dax2.1: bad offset: 0x3c000000 dax disabled align: 0x40000000
+
+Fixes: 1ee6667cd8d1 ("libnvdimm, pfn, dax: fix initialization vs autodetect...")
+Reported-by: Jane Chu <jane.chu@oracle.com>
+Signed-off-by: Dan Williams <dan.j.williams@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/nvdimm/pfn_devs.c | 5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+--- a/drivers/nvdimm/pfn_devs.c
++++ b/drivers/nvdimm/pfn_devs.c
+@@ -364,9 +364,9 @@ struct device *nd_pfn_create(struct nd_r
+ int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig)
+ {
+ u64 checksum, offset;
+- unsigned long align;
+ enum nd_pfn_mode mode;
+ struct nd_namespace_io *nsio;
++ unsigned long align, start_pad;
+ struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb;
+ struct nd_namespace_common *ndns = nd_pfn->ndns;
+ const u8 *parent_uuid = nd_dev_to_uuid(&ndns->dev);
+@@ -410,6 +410,7 @@ int nd_pfn_validate(struct nd_pfn *nd_pf
+
+ align = le32_to_cpu(pfn_sb->align);
+ offset = le64_to_cpu(pfn_sb->dataoff);
++ start_pad = le32_to_cpu(pfn_sb->start_pad);
+ if (align == 0)
+ align = 1UL << ilog2(offset);
+ mode = le32_to_cpu(pfn_sb->mode);
+@@ -468,7 +469,7 @@ int nd_pfn_validate(struct nd_pfn *nd_pf
+ return -EBUSY;
+ }
+
+- if ((align && !IS_ALIGNED(offset, align))
++ if ((align && !IS_ALIGNED(nsio->res.start + offset + start_pad, align))
+ || !IS_ALIGNED(offset, PAGE_SIZE)) {
+ dev_err(&nd_pfn->dev,
+ "bad offset: %#llx dax disabled align: %#lx\n",
--- /dev/null
+From 15d8374874ded0bec37ef27f8301a6d54032c0e5 Mon Sep 17 00:00:00 2001
+From: Jon Hunter <jonathanh@nvidia.com>
+Date: Tue, 14 Nov 2017 14:43:27 +0000
+Subject: mfd: cros ec: spi: Don't send first message too soon
+
+From: Jon Hunter <jonathanh@nvidia.com>
+
+commit 15d8374874ded0bec37ef27f8301a6d54032c0e5 upstream.
+
+On the Tegra124 Nyan-Big chromebook the very first SPI message sent to
+the EC is failing.
+
+The Tegra SPI driver configures the SPI chip-selects to be active-high
+by default (and always has for many years). The EC SPI requires an
+active-low chip-select and so the Tegra chip-select is reconfigured to
+be active-low when the EC SPI driver calls spi_setup(). The problem is
+that if the first SPI message to the EC is sent too soon after
+reconfiguring the SPI chip-select, it fails.
+
+The EC SPI driver prevents back-to-back SPI messages being sent too
+soon by keeping track of the time the last transfer was sent via the
+variable 'last_transfer_ns'. To prevent the very first transfer being
+sent too soon, initialise the 'last_transfer_ns' variable after calling
+spi_setup() and before sending the first SPI message.
+
+Signed-off-by: Jon Hunter <jonathanh@nvidia.com>
+Reviewed-by: Brian Norris <briannorris@chromium.org>
+Reviewed-by: Douglas Anderson <dianders@chromium.org>
+Acked-by: Benson Leung <bleung@chromium.org>
+Signed-off-by: Lee Jones <lee.jones@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/mfd/cros_ec_spi.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/mfd/cros_ec_spi.c
++++ b/drivers/mfd/cros_ec_spi.c
+@@ -667,6 +667,7 @@ static int cros_ec_spi_probe(struct spi_
+ sizeof(struct ec_response_get_protocol_info);
+ ec_dev->dout_size = sizeof(struct ec_host_request);
+
++ ec_spi->last_transfer_ns = ktime_get_ns();
+
+ err = cros_ec_register(ec_dev);
+ if (err) {
--- /dev/null
+From 0a423772de2f3d7b00899987884f62f63ae00dcb Mon Sep 17 00:00:00 2001
+From: Johan Hovold <johan@kernel.org>
+Date: Sat, 11 Nov 2017 16:38:43 +0100
+Subject: mfd: twl4030-audio: Fix sibling-node lookup
+
+From: Johan Hovold <johan@kernel.org>
+
+commit 0a423772de2f3d7b00899987884f62f63ae00dcb upstream.
+
+A helper purported to look up a child node based on its name was using
+the wrong of-helper and ended up prematurely freeing the parent of-node
+while leaking any matching node.
+
+To make things worse, any matching node would not even necessarily be a
+child node as the whole device tree was searched depth-first starting at
+the parent.
+
+Fixes: 019a7e6b7b31 ("mfd: twl4030-audio: Add DT support")
+Signed-off-by: Johan Hovold <johan@kernel.org>
+Acked-by: Peter Ujfalusi <peter.ujfalusi@ti.com>
+Signed-off-by: Lee Jones <lee.jones@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/mfd/twl4030-audio.c | 9 +++++++--
+ 1 file changed, 7 insertions(+), 2 deletions(-)
+
+--- a/drivers/mfd/twl4030-audio.c
++++ b/drivers/mfd/twl4030-audio.c
+@@ -159,13 +159,18 @@ unsigned int twl4030_audio_get_mclk(void
+ EXPORT_SYMBOL_GPL(twl4030_audio_get_mclk);
+
+ static bool twl4030_audio_has_codec(struct twl4030_audio_data *pdata,
+- struct device_node *node)
++ struct device_node *parent)
+ {
++ struct device_node *node;
++
+ if (pdata && pdata->codec)
+ return true;
+
+- if (of_find_node_by_name(node, "codec"))
++ node = of_get_child_by_name(parent, "codec");
++ if (node) {
++ of_node_put(node);
+ return true;
++ }
+
+ return false;
+ }
--- /dev/null
+From 85e9b13cbb130a3209f21bd7933933399c389ffe Mon Sep 17 00:00:00 2001
+From: Johan Hovold <johan@kernel.org>
+Date: Sat, 11 Nov 2017 16:38:44 +0100
+Subject: mfd: twl6040: Fix child-node lookup
+
+From: Johan Hovold <johan@kernel.org>
+
+commit 85e9b13cbb130a3209f21bd7933933399c389ffe upstream.
+
+Fix child-node lookup during probe, which ended up searching the whole
+device tree depth-first starting at the parent rather than just matching
+on its children.
+
+To make things worse, the parent node was prematurely freed, while the
+child node was leaked.
+
+Note that the CONFIG_OF compile guard can be removed as
+of_get_child_by_name() provides a !CONFIG_OF implementation which always
+fails.
+
+Fixes: 37e13cecaa14 ("mfd: Add support for Device Tree to twl6040")
+Fixes: ca2cad6ae38e ("mfd: Fix twl6040 build failure")
+Signed-off-by: Johan Hovold <johan@kernel.org>
+Acked-by: Peter Ujfalusi <peter.ujfalusi@ti.com>
+Signed-off-by: Lee Jones <lee.jones@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/mfd/twl6040.c | 12 ++++++++----
+ 1 file changed, 8 insertions(+), 4 deletions(-)
+
+--- a/drivers/mfd/twl6040.c
++++ b/drivers/mfd/twl6040.c
+@@ -97,12 +97,16 @@ static struct reg_sequence twl6040_patch
+ };
+
+
+-static bool twl6040_has_vibra(struct device_node *node)
++static bool twl6040_has_vibra(struct device_node *parent)
+ {
+-#ifdef CONFIG_OF
+- if (of_find_node_by_name(node, "vibra"))
++ struct device_node *node;
++
++ node = of_get_child_by_name(parent, "vibra");
++ if (node) {
++ of_node_put(node);
+ return true;
+-#endif
++ }
++
+ return false;
+ }
+
--- /dev/null
+From 4423c18e466afdfb02a36ee8b9f901d144b3c607 Mon Sep 17 00:00:00 2001
+From: Yelena Krivosheev <yelena@marvell.com>
+Date: Tue, 19 Dec 2017 17:59:45 +0100
+Subject: net: mvneta: clear interface link status on port disable
+
+From: Yelena Krivosheev <yelena@marvell.com>
+
+commit 4423c18e466afdfb02a36ee8b9f901d144b3c607 upstream.
+
+When port connect to PHY in polling mode (with poll interval 1 sec),
+port and phy link status must be synchronize in order don't loss link
+change event.
+
+[gregory.clement@free-electrons.com: add fixes tag]
+Fixes: c5aff18204da ("net: mvneta: driver for Marvell Armada 370/XP network unit")
+Signed-off-by: Yelena Krivosheev <yelena@marvell.com>
+Tested-by: Dmitri Epshtein <dima@marvell.com>
+Signed-off-by: Gregory CLEMENT <gregory.clement@free-electrons.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/net/ethernet/marvell/mvneta.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/drivers/net/ethernet/marvell/mvneta.c
++++ b/drivers/net/ethernet/marvell/mvneta.c
+@@ -1214,6 +1214,10 @@ static void mvneta_port_disable(struct m
+ val &= ~MVNETA_GMAC0_PORT_ENABLE;
+ mvreg_write(pp, MVNETA_GMAC_CTRL_0, val);
+
++ pp->link = 0;
++ pp->duplex = -1;
++ pp->speed = 0;
++
+ udelay(200);
+ }
+
--- /dev/null
+From 2eecb2e04abb62ef8ea7b43e1a46bdb5b99d1bf8 Mon Sep 17 00:00:00 2001
+From: Yelena Krivosheev <yelena@marvell.com>
+Date: Tue, 19 Dec 2017 17:59:47 +0100
+Subject: net: mvneta: eliminate wrong call to handle rx descriptor error
+
+From: Yelena Krivosheev <yelena@marvell.com>
+
+commit 2eecb2e04abb62ef8ea7b43e1a46bdb5b99d1bf8 upstream.
+
+There are few reasons in mvneta_rx_swbm() function when received packet
+is dropped. mvneta_rx_error() should be called only if error bit [16]
+is set in rx descriptor.
+
+[gregory.clement@free-electrons.com: add fixes tag]
+Fixes: dc35a10f68d3 ("net: mvneta: bm: add support for hardware buffer management")
+Signed-off-by: Yelena Krivosheev <yelena@marvell.com>
+Tested-by: Dmitri Epshtein <dima@marvell.com>
+Signed-off-by: Gregory CLEMENT <gregory.clement@free-electrons.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/net/ethernet/marvell/mvneta.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/marvell/mvneta.c
++++ b/drivers/net/ethernet/marvell/mvneta.c
+@@ -1962,9 +1962,9 @@ static int mvneta_rx_swbm(struct mvneta_
+
+ if (!mvneta_rxq_desc_is_first_last(rx_status) ||
+ (rx_status & MVNETA_RXD_ERR_SUMMARY)) {
++ mvneta_rx_error(pp, rx_desc);
+ err_drop_frame:
+ dev->stats.rx_errors++;
+- mvneta_rx_error(pp, rx_desc);
+ /* leave the descriptor untouched */
+ continue;
+ }
--- /dev/null
+From ca5902a6547f662419689ca28b3c29a772446caa Mon Sep 17 00:00:00 2001
+From: Yelena Krivosheev <yelena@marvell.com>
+Date: Tue, 19 Dec 2017 17:59:46 +0100
+Subject: net: mvneta: use proper rxq_number in loop on rx queues
+
+From: Yelena Krivosheev <yelena@marvell.com>
+
+commit ca5902a6547f662419689ca28b3c29a772446caa upstream.
+
+When adding the RX queue association with each CPU, a typo was made in
+the mvneta_cleanup_rxqs() function. This patch fixes it.
+
+[gregory.clement@free-electrons.com: add commit log and fixes tag]
+Fixes: 2dcf75e2793c ("net: mvneta: Associate RX queues with each CPU")
+Signed-off-by: Yelena Krivosheev <yelena@marvell.com>
+Tested-by: Dmitri Epshtein <dima@marvell.com>
+Signed-off-by: Gregory CLEMENT <gregory.clement@free-electrons.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/net/ethernet/marvell/mvneta.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/marvell/mvneta.c
++++ b/drivers/net/ethernet/marvell/mvneta.c
+@@ -3015,7 +3015,7 @@ static void mvneta_cleanup_rxqs(struct m
+ {
+ int queue;
+
+- for (queue = 0; queue < txq_number; queue++)
++ for (queue = 0; queue < rxq_number; queue++)
+ mvneta_rxq_deinit(pp, &pp->rxqs[queue]);
+ }
+
--- /dev/null
+From 0ed9d3de5f8f97e6efd5ca0e3377cab5f0451ead Mon Sep 17 00:00:00 2001
+From: Helge Deller <deller@gmx.de>
+Date: Tue, 12 Dec 2017 21:25:41 +0100
+Subject: parisc: Align os_hpmc_size on word boundary
+
+From: Helge Deller <deller@gmx.de>
+
+commit 0ed9d3de5f8f97e6efd5ca0e3377cab5f0451ead upstream.
+
+The os_hpmc_size variable sometimes wasn't aligned at word boundary and thus
+triggered the unaligned fault handler at startup.
+Fix it by aligning it properly.
+
+Signed-off-by: Helge Deller <deller@gmx.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/parisc/kernel/hpmc.S | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/arch/parisc/kernel/hpmc.S
++++ b/arch/parisc/kernel/hpmc.S
+@@ -305,6 +305,7 @@ ENDPROC_CFI(os_hpmc)
+
+
+ __INITRODATA
++ .align 4
+ .export os_hpmc_size
+ os_hpmc_size:
+ .word .os_hpmc_end-.os_hpmc
--- /dev/null
+From 203c110b39a89b48156c7450504e454fedb7f7f6 Mon Sep 17 00:00:00 2001
+From: Helge Deller <deller@gmx.de>
+Date: Tue, 12 Dec 2017 21:32:16 +0100
+Subject: parisc: Fix indenting in puts()
+
+From: Helge Deller <deller@gmx.de>
+
+commit 203c110b39a89b48156c7450504e454fedb7f7f6 upstream.
+
+Static analysis tools complain that we intended to have curly braces
+around this indent block. In this case this assumption is wrong, so fix
+the indenting.
+
+Fixes: 2f3c7b8137ef ("parisc: Add core code for self-extracting kernel")
+Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
+Signed-off-by: Helge Deller <deller@gmx.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/parisc/boot/compressed/misc.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/arch/parisc/boot/compressed/misc.c
++++ b/arch/parisc/boot/compressed/misc.c
+@@ -123,8 +123,8 @@ int puts(const char *s)
+ while ((nuline = strchr(s, '\n')) != NULL) {
+ if (nuline != s)
+ pdc_iodc_print(s, nuline - s);
+- pdc_iodc_print("\r\n", 2);
+- s = nuline + 1;
++ pdc_iodc_print("\r\n", 2);
++ s = nuline + 1;
+ }
+ if (*s != '\0')
+ pdc_iodc_print(s, strlen(s));
--- /dev/null
+From bcf3f1752a622f1372d3252d0fea8855d89812e7 Mon Sep 17 00:00:00 2001
+From: Helge Deller <deller@gmx.de>
+Date: Tue, 12 Dec 2017 21:52:26 +0100
+Subject: parisc: Hide Diva-built-in serial aux and graphics card
+
+From: Helge Deller <deller@gmx.de>
+
+commit bcf3f1752a622f1372d3252d0fea8855d89812e7 upstream.
+
+Diva GSP card has built-in serial AUX port and ATI graphic card which simply
+don't work and which both don't have external connectors. User Guides even
+mention that those devices shouldn't be used.
+So, prevent that Linux drivers try to enable those devices.
+
+Signed-off-by: Helge Deller <deller@gmx.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/parisc/lba_pci.c | 33 +++++++++++++++++++++++++++++++++
+ 1 file changed, 33 insertions(+)
+
+--- a/drivers/parisc/lba_pci.c
++++ b/drivers/parisc/lba_pci.c
+@@ -1692,3 +1692,36 @@ void lba_set_iregs(struct parisc_device
+ iounmap(base_addr);
+ }
+
++
++/*
++ * The design of the Diva management card in rp34x0 machines (rp3410, rp3440)
++ * seems rushed, so that many built-in components simply don't work.
++ * The following quirks disable the serial AUX port and the built-in ATI RV100
++ * Radeon 7000 graphics card which both don't have any external connectors and
++ * thus are useless, and even worse, e.g. the AUX port occupies ttyS0 and as
++ * such makes those machines the only PARISC machines on which we can't use
++ * ttyS0 as boot console.
++ */
++static void quirk_diva_ati_card(struct pci_dev *dev)
++{
++ if (dev->subsystem_vendor != PCI_VENDOR_ID_HP ||
++ dev->subsystem_device != 0x1292)
++ return;
++
++ dev_info(&dev->dev, "Hiding Diva built-in ATI card");
++ dev->device = 0;
++}
++DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_RADEON_QY,
++ quirk_diva_ati_card);
++
++static void quirk_diva_aux_disable(struct pci_dev *dev)
++{
++ if (dev->subsystem_vendor != PCI_VENDOR_ID_HP ||
++ dev->subsystem_device != 0x1291)
++ return;
++
++ dev_info(&dev->dev, "Hiding Diva built-in AUX serial device");
++ dev->device = 0;
++}
++DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_DIVA_AUX,
++ quirk_diva_aux_disable);
--- /dev/null
+From 5839ee7389e893a31e4e3c9cf17b50d14103c902 Mon Sep 17 00:00:00 2001
+From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
+Date: Fri, 15 Dec 2017 03:07:18 +0100
+Subject: PCI / PM: Force devices to D0 in pci_pm_thaw_noirq()
+
+From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+
+commit 5839ee7389e893a31e4e3c9cf17b50d14103c902 upstream.
+
+It is incorrect to call pci_restore_state() for devices in low-power
+states (D1-D3), as that involves the restoration of MSI setup which
+requires MMIO to be operational and that is only the case in D0.
+
+However, pci_pm_thaw_noirq() may do that if the driver's "freeze"
+callbacks put the device into a low-power state, so fix it by making
+it force devices into D0 via pci_set_power_state() instead of trying
+to "update" their power state which is pointless.
+
+Fixes: e60514bd4485 (PCI/PM: Restore the status of PCI devices across hibernation)
+Reported-by: Thomas Gleixner <tglx@linutronix.de>
+Reported-by: Maarten Lankhorst <dev@mblankhorst.nl>
+Tested-by: Thomas Gleixner <tglx@linutronix.de>
+Tested-by: Maarten Lankhorst <dev@mblankhorst.nl>
+Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+Acked-by: Bjorn Helgaas <bhelgaas@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/pci/pci-driver.c | 7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+--- a/drivers/pci/pci-driver.c
++++ b/drivers/pci/pci-driver.c
+@@ -968,7 +968,12 @@ static int pci_pm_thaw_noirq(struct devi
+ if (pci_has_legacy_pm_support(pci_dev))
+ return pci_legacy_resume_early(dev);
+
+- pci_update_current_state(pci_dev, PCI_D0);
++ /*
++ * pci_restore_state() requires the device to be in D0 (because of MSI
++ * restoration among other things), so force it into D0 in case the
++ * driver's "freeze" callbacks put it into a low-power state directly.
++ */
++ pci_set_power_state(pci_dev, PCI_D0);
+ pci_restore_state(pci_dev);
+
+ if (drv && drv->pm && drv->pm->thaw_noirq)
--- /dev/null
+From d2b3c353595a855794f8b9df5b5bdbe8deb0c413 Mon Sep 17 00:00:00 2001
+From: Mika Westerberg <mika.westerberg@linux.intel.com>
+Date: Mon, 4 Dec 2017 12:11:02 +0300
+Subject: pinctrl: cherryview: Mask all interrupts on Intel_Strago based systems
+
+From: Mika Westerberg <mika.westerberg@linux.intel.com>
+
+commit d2b3c353595a855794f8b9df5b5bdbe8deb0c413 upstream.
+
+Guenter Roeck reported an interrupt storm on a prototype system which is
+based on Cyan Chromebook. The root cause turned out to be a incorrectly
+configured pin that triggers spurious interrupts. This will be fixed in
+coreboot but currently we need to prevent the interrupt storm from
+happening by masking all interrupts (but not GPEs) on those systems.
+
+Link: https://bugzilla.kernel.org/show_bug.cgi?id=197953
+Fixes: bcb48cca23ec ("pinctrl: cherryview: Do not mask all interrupts in probe")
+Reported-and-tested-by: Guenter Roeck <linux@roeck-us.net>
+Reported-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
+Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
+Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/pinctrl/intel/pinctrl-cherryview.c | 16 ++++++++++++++++
+ 1 file changed, 16 insertions(+)
+
+--- a/drivers/pinctrl/intel/pinctrl-cherryview.c
++++ b/drivers/pinctrl/intel/pinctrl-cherryview.c
+@@ -1620,6 +1620,22 @@ static int chv_gpio_probe(struct chv_pin
+ clear_bit(i, chip->irq_valid_mask);
+ }
+
++ /*
++ * The same set of machines in chv_no_valid_mask[] have incorrectly
++ * configured GPIOs that generate spurious interrupts so we use
++ * this same list to apply another quirk for them.
++ *
++ * See also https://bugzilla.kernel.org/show_bug.cgi?id=197953.
++ */
++ if (!need_valid_mask) {
++ /*
++ * Mask all interrupts the community is able to generate
++ * but leave the ones that can only generate GPEs unmasked.
++ */
++ chv_writel(GENMASK(31, pctrl->community->nirqs),
++ pctrl->regs + CHV_INTMASK);
++ }
++
+ /* Clear all interrupts */
+ chv_writel(0xffff, pctrl->regs + CHV_INTSTAT);
+
--- /dev/null
+From f41d84dddc66b164ac16acf3f584c276146f1c48 Mon Sep 17 00:00:00 2001
+From: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com>
+Date: Tue, 12 Dec 2017 17:59:15 +0530
+Subject: powerpc/perf: Dereference BHRB entries safely
+
+From: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com>
+
+commit f41d84dddc66b164ac16acf3f584c276146f1c48 upstream.
+
+It's theoretically possible that branch instructions recorded in
+BHRB (Branch History Rolling Buffer) entries have already been
+unmapped before they are processed by the kernel. Hence, trying to
+dereference such memory location will result in a crash. eg:
+
+ Unable to handle kernel paging request for data at address 0xd000000019c41764
+ Faulting instruction address: 0xc000000000084a14
+ NIP [c000000000084a14] branch_target+0x4/0x70
+ LR [c0000000000eb828] record_and_restart+0x568/0x5c0
+ Call Trace:
+ [c0000000000eb3b4] record_and_restart+0xf4/0x5c0 (unreliable)
+ [c0000000000ec378] perf_event_interrupt+0x298/0x460
+ [c000000000027964] performance_monitor_exception+0x54/0x70
+ [c000000000009ba4] performance_monitor_common+0x114/0x120
+
+Fix it by deferefencing the addresses safely.
+
+Fixes: 691231846ceb ("powerpc/perf: Fix setting of "to" addresses for BHRB")
+Suggested-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
+Signed-off-by: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com>
+Reviewed-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
+[mpe: Use probe_kernel_read() which is clearer, tweak change log]
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/powerpc/perf/core-book3s.c | 8 ++++++--
+ 1 file changed, 6 insertions(+), 2 deletions(-)
+
+--- a/arch/powerpc/perf/core-book3s.c
++++ b/arch/powerpc/perf/core-book3s.c
+@@ -410,8 +410,12 @@ static __u64 power_pmu_bhrb_to(u64 addr)
+ int ret;
+ __u64 target;
+
+- if (is_kernel_addr(addr))
+- return branch_target((unsigned int *)addr);
++ if (is_kernel_addr(addr)) {
++ if (probe_kernel_read(&instr, (void *)addr, sizeof(instr)))
++ return 0;
++
++ return branch_target(&instr);
++ }
+
+ /* Userspace: need copy instruction here then translate it */
+ pagefault_disable();
--- /dev/null
+From john.einar@gmail.com Wed Dec 27 16:15:28 2017
+From: John Einar Reitan <john.einar@gmail.com>
+Date: Sun, 24 Dec 2017 00:03:44 +0100
+Subject: Revert "ipmi_si: fix memory leak on new_smi"
+To: stable@vger.kernel.org
+Cc: John Einar Reitan <john.einar@gmail.com>
+Message-ID: <20171223230344.2759-1-john.einar@gmail.com>
+
+From: John Einar Reitan <john.einar@gmail.com>
+
+This reverts commit c97e41076a298dbc4e910c33048e553658388eed, which
+incorrectly was taken from upstream c0a32fe13cd323ca9420500b16fd69589c9ba91e.
+
+The referenced memory leak doesn't exist on the 4.14 stable branch as
+the new logic of doing the kzalloc hasn't moved to this function.
+By adding this kfree we actually end up doing double kfree as all callers of
+smi_add does a kfree on error.
+
+Sample with SLAB_FREELIST_HARDENED=y:
+
+ipmi_si: Adding ACPI-specified kcs state machine
+IPMI System Interface driver.
+ipmi_si: probing via SPMI
+ipmi_si: SPMI: io 0xca2 regsize 1 spacing 1 irq 0
+(NULL device *): SPMI-specified kcs state machine: duplicate
+------------[ cut here ]------------
+kernel BUG at mm/slub.c:295!
+invalid opcode: 0000 [#1] SMP
+Modules linked in:
+CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.14.8-gentoo-r1 #5
+Hardware name: Supermicro X9SCL/X9SCM/X9SCL/X9SCM, BIOS 2.2 02/20/2015
+task: ffff88080c208000 task.stack: ffffc90000020000
+RIP: 0010:kfree+0xf5/0x157
+RSP: 0000:ffffc90000023e58 EFLAGS: 00010246
+RAX: ffff88080b2e6200 RBX: ffff88080b2e6200 RCX: ffff88080b2e6200
+RDX: 000000000000008e RSI: ffff88082fc1cd60 RDI: ffff88080c003080
+RBP: ffffc90000002808 R08: 000000000001cd60 R09: ffffffff814da10e
+R10: ffffea00202cb980 R11: 000000000000005c R12: ffffffff814da10e
+R13: 00000000ffffffed R14: ffffffff82317bd0 R15: 0000000000000003
+FS: 0000000000000000(0000) GS:ffff88082fc00000(0000) knlGS:0000000000000000
+CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+CR2: 0000000000000000 CR3: 0000000002e09001 CR4: 00000000001606f0
+Call Trace:
+ init_ipmi_si+0x493/0x5c7
+ ? cleanup_ipmi_si+0x84/0x84
+ ? set_debug_rodata+0xc/0xc
+ ? kthread+0x4c/0x11c
+ do_one_initcall+0x94/0x13d
+ ? set_debug_rodata+0xc/0xc
+ kernel_init_freeable+0x112/0x18e
+ ? rest_init+0xa0/0xa0
+ kernel_init+0x5/0xe1
+ ret_from_fork+0x22/0x30
+Code: 24 18 49 8b 7a 30 48 8b 37 65 48 8b 56 08 65 48 03 35 3a 29 e2 7e 4c 3b 56 10 75 39 48 8b 0e 48 63 47 20 48 01 d8 48 39 cb 75 02 <0f> 0b 49 89 c0 4c 33
+ 87 40 01 00 00 4c 31 c1 48 89 08 48 8d 4a
+---[ end trace 4ac2e2c100842676 ]---
+
+Signed-off-by: John Einar Reitan <john.einar@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/char/ipmi/ipmi_si_intf.c | 1 -
+ 1 file changed, 1 deletion(-)
+
+--- a/drivers/char/ipmi/ipmi_si_intf.c
++++ b/drivers/char/ipmi/ipmi_si_intf.c
+@@ -3469,7 +3469,6 @@ static int add_smi(struct smi_info *new_
+ ipmi_addr_src_to_str(new_smi->addr_source),
+ si_to_str[new_smi->si_type]);
+ rv = -EBUSY;
+- kfree(new_smi);
+ goto out_err;
+ }
+ }
--- /dev/null
+From 9352aeada4d8d8753fc0e414fbfe8fdfcb68a12c Mon Sep 17 00:00:00 2001
+From: John David Anglin <dave.anglin@bell.net>
+Date: Mon, 13 Nov 2017 19:35:33 -0500
+Subject: Revert "parisc: Re-enable interrupts early"
+
+From: John David Anglin <dave.anglin@bell.net>
+
+commit 9352aeada4d8d8753fc0e414fbfe8fdfcb68a12c upstream.
+
+This reverts commit 5c38602d83e584047906b41b162ababd4db4106d.
+
+Interrupts can't be enabled early because the register saves are done on
+the thread stack prior to switching to the IRQ stack. This caused stack
+overflows and the thread stack needed increasing to 32k. Even then,
+stack overflows still occasionally occurred.
+
+Background:
+Even with a 32 kB thread stack, I have seen instances where the thread
+stack overflowed on the mx3210 buildd. Detection of stack overflow only
+occurs when we have an external interrupt. When an external interrupt
+occurs, we switch to the thread stack if we are not already on a kernel
+stack. Then, registers and specials are saved to the kernel stack.
+
+The bug occurs in intr_return where interrupts are reenabled prior to
+returning from the interrupt. This was done incase we need to schedule
+or deliver signals. However, it introduces the possibility that
+multiple external interrupts may occur on the thread stack and cause a
+stack overflow. These might not be detected and cause the kernel to
+misbehave in random ways.
+
+This patch changes the code back to only reenable interrupts when we are
+going to schedule or deliver signals. As a result, we generally return
+from an interrupt before reenabling interrupts. This minimizes the
+growth of the thread stack.
+
+Fixes: 5c38602d83e5 ("parisc: Re-enable interrupts early")
+Signed-off-by: John David Anglin <dave.anglin@bell.net>
+Signed-off-by: Helge Deller <deller@gmx.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/parisc/kernel/entry.S | 12 +++++++++---
+ 1 file changed, 9 insertions(+), 3 deletions(-)
+
+--- a/arch/parisc/kernel/entry.S
++++ b/arch/parisc/kernel/entry.S
+@@ -878,9 +878,6 @@ ENTRY_CFI(syscall_exit_rfi)
+ STREG %r19,PT_SR7(%r16)
+
+ intr_return:
+- /* NOTE: Need to enable interrupts incase we schedule. */
+- ssm PSW_SM_I, %r0
+-
+ /* check for reschedule */
+ mfctl %cr30,%r1
+ LDREG TI_FLAGS(%r1),%r19 /* sched.h: TIF_NEED_RESCHED */
+@@ -907,6 +904,11 @@ intr_check_sig:
+ LDREG PT_IASQ1(%r16), %r20
+ cmpib,COND(=),n 0,%r20,intr_restore /* backward */
+
++ /* NOTE: We need to enable interrupts if we have to deliver
++ * signals. We used to do this earlier but it caused kernel
++ * stack overflows. */
++ ssm PSW_SM_I, %r0
++
+ copy %r0, %r25 /* long in_syscall = 0 */
+ #ifdef CONFIG_64BIT
+ ldo -16(%r30),%r29 /* Reference param save area */
+@@ -958,6 +960,10 @@ intr_do_resched:
+ cmpib,COND(=) 0, %r20, intr_do_preempt
+ nop
+
++ /* NOTE: We need to enable interrupts if we schedule. We used
++ * to do this earlier but it caused kernel stack overflows. */
++ ssm PSW_SM_I, %r0
++
+ #ifdef CONFIG_64BIT
+ ldo -16(%r30),%r29 /* Reference param save area */
+ #endif
objtool-fix-64-bit-build-on-32-bit-host.patch
x86-decoder-fix-and-update-the-opcodes-map.patch
x86-insn-eval-add-utility-functions-to-get-segment-selector.patch
+x86-kconfig-limit-nr_cpus-on-32-bit-to-a-sane-amount.patch
+x86-mm-dump_pagetables-check-page_present-for-real.patch
+x86-mm-dump_pagetables-make-the-address-hints-correct-and-readable.patch
+x86-vsyscall-64-explicitly-set-_page_user-in-the-pagetable-hierarchy.patch
+x86-vsyscall-64-warn-and-fail-vsyscall-emulation-in-native-mode.patch
+arch-mm-allow-arch_dup_mmap-to-fail.patch
+x86-ldt-rework-locking.patch
+x86-ldt-prevent-ldt-inheritance-on-exec.patch
+x86-mm-64-improve-the-memory-map-documentation.patch
+x86-doc-remove-obvious-weirdnesses-from-the-x86-mm-layout-documentation.patch
+x86-entry-rename-sysenter_stack-to-cpu_entry_area_entry_stack.patch
+x86-uv-use-the-right-tlb-flush-api.patch
+x86-microcode-dont-abuse-the-tlb-flush-interface.patch
+x86-mm-use-__flush_tlb_one-for-kernel-memory.patch
+x86-mm-remove-superfluous-barriers.patch
+x86-mm-add-comments-to-clarify-which-tlb-flush-functions-are-supposed-to-flush-what.patch
+x86-mm-move-the-cr3-construction-functions-to-tlbflush.h.patch
+x86-mm-remove-hard-coded-asid-limit-checks.patch
+x86-mm-put-mmu-to-hardware-asid-translation-in-one-place.patch
+x86-mm-create-asm-invpcid.h.patch
+x86-cpu_entry_area-move-it-to-a-separate-unit.patch
+x86-cpu_entry_area-move-it-out-of-the-fixmap.patch
+init-invoke-init_espfix_bsp-from-mm_init.patch
+x86-cpu_entry_area-prevent-wraparound-in-setup_cpu_entry_area_ptes-on-32bit.patch
+acpi-apei-erst-fix-missing-error-handling-in-erst_reader.patch
+acpi-nfit-fix-health-event-notification.patch
+crypto-skcipher-set-walk.iv-for-zero-length-inputs.patch
+crypto-mcryptd-protect-the-per-cpu-queue-with-a-lock.patch
+crypto-af_alg-wait-for-data-at-beginning-of-recvmsg.patch
+crypto-af_alg-fix-race-accessing-cipher-request.patch
+mfd-cros-ec-spi-don-t-send-first-message-too-soon.patch
+mfd-twl4030-audio-fix-sibling-node-lookup.patch
+mfd-twl6040-fix-child-node-lookup.patch
+alsa-rawmidi-avoid-racy-info-ioctl-via-ctl-device.patch
+alsa-hda-realtek-fix-dell-aio-lineout-issue.patch
+alsa-hda-add-vendor-id-for-cannonlake-hdmi-codec.patch
+alsa-usb-audio-add-native-dsd-support-for-esoteric-d-05x.patch
+alsa-usb-audio-fix-the-missing-ctl-name-suffix-at-parsing-su.patch
+pci-pm-force-devices-to-d0-in-pci_pm_thaw_noirq.patch
+block-unalign-call_single_data-in-struct-request.patch
+block-throttle-avoid-double-charge.patch
+parisc-align-os_hpmc_size-on-word-boundary.patch
+parisc-fix-indenting-in-puts.patch
+parisc-hide-diva-built-in-serial-aux-and-graphics-card.patch
+revert-parisc-re-enable-interrupts-early.patch
+spi-xilinx-detect-stall-with-unknown-commands.patch
+spi-a3700-fix-clk-prescaling-for-coefficient-over-15.patch
+pinctrl-cherryview-mask-all-interrupts-on-intel_strago-based-systems.patch
+arm64-kvm-prevent-restoring-stale-pmscr_el1-for-vcpu.patch
+kvm-arm-arm64-fix-hyp-unmapping-going-off-limits.patch
+kvm-ppc-book3s-fix-xive-migration-of-pending-interrupts.patch
+kvm-ppc-book3s-hv-fix-pending_pri-value-in-kvmppc_xive_get_icp.patch
+kvm-mmu-fix-infinite-loop-when-there-is-no-available-mmu-page.patch
+kvm-x86-fix-load-rflags-w-o-the-fixed-bit.patch
+kvm-x86-fix-rsm-when-pcid-is-non-zero.patch
+clk-sunxi-sun9i-mmc-implement-reset-callback-for-reset-controls.patch
+powerpc-perf-dereference-bhrb-entries-safely.patch
+drm-i915-flush-pending-gtt-writes-before-unbinding.patch
+drm-sun4i-fix-error-path-handling.patch
+libnvdimm-dax-fix-1gb-aligned-namespaces-vs-physical-misalignment.patch
+libnvdimm-btt-fix-an-incompatibility-in-the-log-layout.patch
+libnvdimm-pfn-fix-start_pad-handling-for-aligned-namespaces.patch
+net-mvneta-clear-interface-link-status-on-port-disable.patch
+net-mvneta-use-proper-rxq_number-in-loop-on-rx-queues.patch
+net-mvneta-eliminate-wrong-call-to-handle-rx-descriptor-error.patch
+revert-ipmi_si-fix-memory-leak-on-new_smi.patch
--- /dev/null
+From 251c201bf4f8b5bf4f1ccb4f8920eed2e1f57580 Mon Sep 17 00:00:00 2001
+From: Maxime Chevallier <maxime.chevallier@smile.fr>
+Date: Mon, 27 Nov 2017 15:16:32 +0100
+Subject: spi: a3700: Fix clk prescaling for coefficient over 15
+
+From: Maxime Chevallier <maxime.chevallier@smile.fr>
+
+commit 251c201bf4f8b5bf4f1ccb4f8920eed2e1f57580 upstream.
+
+The Armada 3700 SPI controller has 2 ranges of prescaler coefficients.
+One ranging from 0 to 15 by steps of 1, and one ranging from 0 to 30 by
+steps of 2.
+
+This commit fixes the prescaler coefficients that are over 15 so that it
+uses the correct range of values. The prescaling coefficient is rounded
+to the upper value if it is odd.
+
+This was tested on Espressobin with spidev and a locigal analyser.
+
+Signed-off-by: Maxime Chevallier <maxime.chevallier@smile.fr>
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/spi/spi-armada-3700.c | 8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+--- a/drivers/spi/spi-armada-3700.c
++++ b/drivers/spi/spi-armada-3700.c
+@@ -79,6 +79,7 @@
+ #define A3700_SPI_BYTE_LEN BIT(5)
+ #define A3700_SPI_CLK_PRESCALE BIT(0)
+ #define A3700_SPI_CLK_PRESCALE_MASK (0x1f)
++#define A3700_SPI_CLK_EVEN_OFFS (0x10)
+
+ #define A3700_SPI_WFIFO_THRS_BIT 28
+ #define A3700_SPI_RFIFO_THRS_BIT 24
+@@ -220,6 +221,13 @@ static void a3700_spi_clock_set(struct a
+
+ prescale = DIV_ROUND_UP(clk_get_rate(a3700_spi->clk), speed_hz);
+
++ /* For prescaler values over 15, we can only set it by steps of 2.
++ * Starting from A3700_SPI_CLK_EVEN_OFFS, we set values from 0 up to
++ * 30. We only use this range from 16 to 30.
++ */
++ if (prescale > 15)
++ prescale = A3700_SPI_CLK_EVEN_OFFS + DIV_ROUND_UP(prescale, 2);
++
+ val = spireg_read(a3700_spi, A3700_SPI_IF_CFG_REG);
+ val = val & ~A3700_SPI_CLK_PRESCALE_MASK;
+
--- /dev/null
+From 5a1314fa697fc65cefaba64cd4699bfc3e6882a6 Mon Sep 17 00:00:00 2001
+From: Ricardo Ribalda <ricardo.ribalda@gmail.com>
+Date: Tue, 21 Nov 2017 10:09:02 +0100
+Subject: spi: xilinx: Detect stall with Unknown commands
+
+From: Ricardo Ribalda Delgado <ricardo.ribalda@gmail.com>
+
+commit 5a1314fa697fc65cefaba64cd4699bfc3e6882a6 upstream.
+
+When the core is configured in C_SPI_MODE > 0, it integrates a
+lookup table that automatically configures the core in dual or quad mode
+based on the command (first byte on the tx fifo).
+
+Unfortunately, that list mode_?_memoy_*.mif does not contain all the
+supported commands by the flash.
+
+Since 4.14 spi-nor automatically tries to probe the flash using SFDP
+(command 0x5a), and that command is not part of the list_mode table.
+
+Whit the right combination of C_SPI_MODE and C_SPI_MEMORY this leads
+into a stall that can only be recovered with a soft rest.
+
+This patch detects this kind of stall and returns -EIO to the caller on
+those commands. spi-nor can handle this error properly:
+
+m25p80 spi0.0: Detected stall. Check C_SPI_MODE and C_SPI_MEMORY. 0x21 0x2404
+m25p80 spi0.0: SPI transfer failed: -5
+spi_master spi0: failed to transfer one message from queue
+m25p80 spi0.0: s25sl064p (8192 Kbytes)
+
+Signed-off-by: Ricardo Ribalda Delgado <ricardo.ribalda@gmail.com>
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/spi/spi-xilinx.c | 11 +++++++++++
+ 1 file changed, 11 insertions(+)
+
+--- a/drivers/spi/spi-xilinx.c
++++ b/drivers/spi/spi-xilinx.c
+@@ -271,6 +271,7 @@ static int xilinx_spi_txrx_bufs(struct s
+ while (remaining_words) {
+ int n_words, tx_words, rx_words;
+ u32 sr;
++ int stalled;
+
+ n_words = min(remaining_words, xspi->buffer_size);
+
+@@ -299,7 +300,17 @@ static int xilinx_spi_txrx_bufs(struct s
+
+ /* Read out all the data from the Rx FIFO */
+ rx_words = n_words;
++ stalled = 10;
+ while (rx_words) {
++ if (rx_words == n_words && !(stalled--) &&
++ !(sr & XSPI_SR_TX_EMPTY_MASK) &&
++ (sr & XSPI_SR_RX_EMPTY_MASK)) {
++ dev_err(&spi->dev,
++ "Detected stall. Check C_SPI_MODE and C_SPI_MEMORY\n");
++ xspi_init_hw(xspi);
++ return -EIO;
++ }
++
+ if ((sr & XSPI_SR_TX_EMPTY_MASK) && (rx_words > 1)) {
+ xilinx_spi_rx(xspi);
+ rx_words--;
--- /dev/null
+From 92a0f81d89571e3e8759366e050ee05cc545ef99 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Wed, 20 Dec 2017 18:51:31 +0100
+Subject: x86/cpu_entry_area: Move it out of the fixmap
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+commit 92a0f81d89571e3e8759366e050ee05cc545ef99 upstream.
+
+Put the cpu_entry_area into a separate P4D entry. The fixmap gets too big
+and 0-day already hit a case where the fixmap PTEs were cleared by
+cleanup_highmap().
+
+Aside of that the fixmap API is a pain as it's all backwards.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: linux-kernel@vger.kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ Documentation/x86/x86_64/mm.txt | 2
+ arch/x86/include/asm/cpu_entry_area.h | 18 ++++++++
+ arch/x86/include/asm/desc.h | 1
+ arch/x86/include/asm/fixmap.h | 32 ---------------
+ arch/x86/include/asm/pgtable_32_types.h | 15 +++++--
+ arch/x86/include/asm/pgtable_64_types.h | 47 +++++++++++++---------
+ arch/x86/kernel/dumpstack.c | 1
+ arch/x86/kernel/traps.c | 5 +-
+ arch/x86/mm/cpu_entry_area.c | 66 ++++++++++++++++++++++++--------
+ arch/x86/mm/dump_pagetables.c | 6 ++
+ arch/x86/mm/init_32.c | 6 ++
+ arch/x86/mm/kasan_init_64.c | 29 +++++++-------
+ arch/x86/mm/pgtable_32.c | 1
+ arch/x86/xen/mmu_pv.c | 2
+ 14 files changed, 143 insertions(+), 88 deletions(-)
+
+--- a/Documentation/x86/x86_64/mm.txt
++++ b/Documentation/x86/x86_64/mm.txt
+@@ -12,6 +12,7 @@ ffffea0000000000 - ffffeaffffffffff (=40
+ ... unused hole ...
+ ffffec0000000000 - fffffbffffffffff (=44 bits) kasan shadow memory (16TB)
+ ... unused hole ...
++fffffe8000000000 - fffffeffffffffff (=39 bits) cpu_entry_area mapping
+ ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks
+ ... unused hole ...
+ ffffffef00000000 - fffffffeffffffff (=64 GB) EFI region mapping space
+@@ -35,6 +36,7 @@ ffd4000000000000 - ffd5ffffffffffff (=49
+ ... unused hole ...
+ ffdf000000000000 - fffffc0000000000 (=53 bits) kasan shadow memory (8PB)
+ ... unused hole ...
++fffffe8000000000 - fffffeffffffffff (=39 bits) cpu_entry_area mapping
+ ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks
+ ... unused hole ...
+ ffffffef00000000 - fffffffeffffffff (=64 GB) EFI region mapping space
+--- a/arch/x86/include/asm/cpu_entry_area.h
++++ b/arch/x86/include/asm/cpu_entry_area.h
+@@ -43,10 +43,26 @@ struct cpu_entry_area {
+ };
+
+ #define CPU_ENTRY_AREA_SIZE (sizeof(struct cpu_entry_area))
+-#define CPU_ENTRY_AREA_PAGES (CPU_ENTRY_AREA_SIZE / PAGE_SIZE)
++#define CPU_ENTRY_AREA_TOT_SIZE (CPU_ENTRY_AREA_SIZE * NR_CPUS)
+
+ DECLARE_PER_CPU(struct cpu_entry_area *, cpu_entry_area);
+
+ extern void setup_cpu_entry_areas(void);
++extern void cea_set_pte(void *cea_vaddr, phys_addr_t pa, pgprot_t flags);
++
++#define CPU_ENTRY_AREA_RO_IDT CPU_ENTRY_AREA_BASE
++#define CPU_ENTRY_AREA_PER_CPU (CPU_ENTRY_AREA_RO_IDT + PAGE_SIZE)
++
++#define CPU_ENTRY_AREA_RO_IDT_VADDR ((void *)CPU_ENTRY_AREA_RO_IDT)
++
++#define CPU_ENTRY_AREA_MAP_SIZE \
++ (CPU_ENTRY_AREA_PER_CPU + CPU_ENTRY_AREA_TOT_SIZE - CPU_ENTRY_AREA_BASE)
++
++extern struct cpu_entry_area *get_cpu_entry_area(int cpu);
++
++static inline struct entry_stack *cpu_entry_stack(int cpu)
++{
++ return &get_cpu_entry_area(cpu)->entry_stack_page.stack;
++}
+
+ #endif
+--- a/arch/x86/include/asm/desc.h
++++ b/arch/x86/include/asm/desc.h
+@@ -7,6 +7,7 @@
+ #include <asm/mmu.h>
+ #include <asm/fixmap.h>
+ #include <asm/irq_vectors.h>
++#include <asm/cpu_entry_area.h>
+
+ #include <linux/smp.h>
+ #include <linux/percpu.h>
+--- a/arch/x86/include/asm/fixmap.h
++++ b/arch/x86/include/asm/fixmap.h
+@@ -25,7 +25,6 @@
+ #else
+ #include <uapi/asm/vsyscall.h>
+ #endif
+-#include <asm/cpu_entry_area.h>
+
+ /*
+ * We can't declare FIXADDR_TOP as variable for x86_64 because vsyscall
+@@ -84,7 +83,6 @@ enum fixed_addresses {
+ FIX_IO_APIC_BASE_0,
+ FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS - 1,
+ #endif
+- FIX_RO_IDT, /* Virtual mapping for read-only IDT */
+ #ifdef CONFIG_X86_32
+ FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */
+ FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1,
+@@ -100,9 +98,6 @@ enum fixed_addresses {
+ #ifdef CONFIG_X86_INTEL_MID
+ FIX_LNW_VRTC,
+ #endif
+- /* Fixmap entries to remap the GDTs, one per processor. */
+- FIX_CPU_ENTRY_AREA_TOP,
+- FIX_CPU_ENTRY_AREA_BOTTOM = FIX_CPU_ENTRY_AREA_TOP + (CPU_ENTRY_AREA_PAGES * NR_CPUS) - 1,
+
+ #ifdef CONFIG_ACPI_APEI_GHES
+ /* Used for GHES mapping from assorted contexts */
+@@ -143,7 +138,7 @@ enum fixed_addresses {
+ extern void reserve_top_address(unsigned long reserve);
+
+ #define FIXADDR_SIZE (__end_of_permanent_fixed_addresses << PAGE_SHIFT)
+-#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE)
++#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE)
+
+ extern int fixmaps_set;
+
+@@ -191,30 +186,5 @@ void __init *early_memremap_decrypted_wp
+ void __early_set_fixmap(enum fixed_addresses idx,
+ phys_addr_t phys, pgprot_t flags);
+
+-static inline unsigned int __get_cpu_entry_area_page_index(int cpu, int page)
+-{
+- BUILD_BUG_ON(sizeof(struct cpu_entry_area) % PAGE_SIZE != 0);
+-
+- return FIX_CPU_ENTRY_AREA_BOTTOM - cpu*CPU_ENTRY_AREA_PAGES - page;
+-}
+-
+-#define __get_cpu_entry_area_offset_index(cpu, offset) ({ \
+- BUILD_BUG_ON(offset % PAGE_SIZE != 0); \
+- __get_cpu_entry_area_page_index(cpu, offset / PAGE_SIZE); \
+- })
+-
+-#define get_cpu_entry_area_index(cpu, field) \
+- __get_cpu_entry_area_offset_index((cpu), offsetof(struct cpu_entry_area, field))
+-
+-static inline struct cpu_entry_area *get_cpu_entry_area(int cpu)
+-{
+- return (struct cpu_entry_area *)__fix_to_virt(__get_cpu_entry_area_page_index(cpu, 0));
+-}
+-
+-static inline struct entry_stack *cpu_entry_stack(int cpu)
+-{
+- return &get_cpu_entry_area(cpu)->entry_stack_page.stack;
+-}
+-
+ #endif /* !__ASSEMBLY__ */
+ #endif /* _ASM_X86_FIXMAP_H */
+--- a/arch/x86/include/asm/pgtable_32_types.h
++++ b/arch/x86/include/asm/pgtable_32_types.h
+@@ -38,13 +38,22 @@ extern bool __vmalloc_start_set; /* set
+ #define LAST_PKMAP 1024
+ #endif
+
+-#define PKMAP_BASE ((FIXADDR_START - PAGE_SIZE * (LAST_PKMAP + 1)) \
+- & PMD_MASK)
++/*
++ * Define this here and validate with BUILD_BUG_ON() in pgtable_32.c
++ * to avoid include recursion hell
++ */
++#define CPU_ENTRY_AREA_PAGES (NR_CPUS * 40)
++
++#define CPU_ENTRY_AREA_BASE \
++ ((FIXADDR_START - PAGE_SIZE * (CPU_ENTRY_AREA_PAGES + 1)) & PMD_MASK)
++
++#define PKMAP_BASE \
++ ((CPU_ENTRY_AREA_BASE - PAGE_SIZE) & PMD_MASK)
+
+ #ifdef CONFIG_HIGHMEM
+ # define VMALLOC_END (PKMAP_BASE - 2 * PAGE_SIZE)
+ #else
+-# define VMALLOC_END (FIXADDR_START - 2 * PAGE_SIZE)
++# define VMALLOC_END (CPU_ENTRY_AREA_BASE - 2 * PAGE_SIZE)
+ #endif
+
+ #define MODULES_VADDR VMALLOC_START
+--- a/arch/x86/include/asm/pgtable_64_types.h
++++ b/arch/x86/include/asm/pgtable_64_types.h
+@@ -76,32 +76,41 @@ typedef struct { pteval_t pte; } pte_t;
+ #define PGDIR_MASK (~(PGDIR_SIZE - 1))
+
+ /* See Documentation/x86/x86_64/mm.txt for a description of the memory map. */
+-#define MAXMEM _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL)
++#define MAXMEM _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL)
++
+ #ifdef CONFIG_X86_5LEVEL
+-#define VMALLOC_SIZE_TB _AC(16384, UL)
+-#define __VMALLOC_BASE _AC(0xff92000000000000, UL)
+-#define __VMEMMAP_BASE _AC(0xffd4000000000000, UL)
++# define VMALLOC_SIZE_TB _AC(16384, UL)
++# define __VMALLOC_BASE _AC(0xff92000000000000, UL)
++# define __VMEMMAP_BASE _AC(0xffd4000000000000, UL)
+ #else
+-#define VMALLOC_SIZE_TB _AC(32, UL)
+-#define __VMALLOC_BASE _AC(0xffffc90000000000, UL)
+-#define __VMEMMAP_BASE _AC(0xffffea0000000000, UL)
++# define VMALLOC_SIZE_TB _AC(32, UL)
++# define __VMALLOC_BASE _AC(0xffffc90000000000, UL)
++# define __VMEMMAP_BASE _AC(0xffffea0000000000, UL)
+ #endif
++
+ #ifdef CONFIG_RANDOMIZE_MEMORY
+-#define VMALLOC_START vmalloc_base
+-#define VMEMMAP_START vmemmap_base
++# define VMALLOC_START vmalloc_base
++# define VMEMMAP_START vmemmap_base
+ #else
+-#define VMALLOC_START __VMALLOC_BASE
+-#define VMEMMAP_START __VMEMMAP_BASE
++# define VMALLOC_START __VMALLOC_BASE
++# define VMEMMAP_START __VMEMMAP_BASE
+ #endif /* CONFIG_RANDOMIZE_MEMORY */
+-#define VMALLOC_END (VMALLOC_START + _AC((VMALLOC_SIZE_TB << 40) - 1, UL))
+-#define MODULES_VADDR (__START_KERNEL_map + KERNEL_IMAGE_SIZE)
++
++#define VMALLOC_END (VMALLOC_START + _AC((VMALLOC_SIZE_TB << 40) - 1, UL))
++
++#define MODULES_VADDR (__START_KERNEL_map + KERNEL_IMAGE_SIZE)
+ /* The module sections ends with the start of the fixmap */
+-#define MODULES_END __fix_to_virt(__end_of_fixed_addresses + 1)
+-#define MODULES_LEN (MODULES_END - MODULES_VADDR)
+-#define ESPFIX_PGD_ENTRY _AC(-2, UL)
+-#define ESPFIX_BASE_ADDR (ESPFIX_PGD_ENTRY << P4D_SHIFT)
+-#define EFI_VA_START ( -4 * (_AC(1, UL) << 30))
+-#define EFI_VA_END (-68 * (_AC(1, UL) << 30))
++#define MODULES_END __fix_to_virt(__end_of_fixed_addresses + 1)
++#define MODULES_LEN (MODULES_END - MODULES_VADDR)
++
++#define ESPFIX_PGD_ENTRY _AC(-2, UL)
++#define ESPFIX_BASE_ADDR (ESPFIX_PGD_ENTRY << P4D_SHIFT)
++
++#define CPU_ENTRY_AREA_PGD _AC(-3, UL)
++#define CPU_ENTRY_AREA_BASE (CPU_ENTRY_AREA_PGD << P4D_SHIFT)
++
++#define EFI_VA_START ( -4 * (_AC(1, UL) << 30))
++#define EFI_VA_END (-68 * (_AC(1, UL) << 30))
+
+ #define EARLY_DYNAMIC_PAGE_TABLES 64
+
+--- a/arch/x86/kernel/dumpstack.c
++++ b/arch/x86/kernel/dumpstack.c
+@@ -18,6 +18,7 @@
+ #include <linux/nmi.h>
+ #include <linux/sysfs.h>
+
++#include <asm/cpu_entry_area.h>
+ #include <asm/stacktrace.h>
+ #include <asm/unwind.h>
+
+--- a/arch/x86/kernel/traps.c
++++ b/arch/x86/kernel/traps.c
+@@ -951,8 +951,9 @@ void __init trap_init(void)
+ * "sidt" instruction will not leak the location of the kernel, and
+ * to defend the IDT against arbitrary memory write vulnerabilities.
+ * It will be reloaded in cpu_init() */
+- __set_fixmap(FIX_RO_IDT, __pa_symbol(idt_table), PAGE_KERNEL_RO);
+- idt_descr.address = fix_to_virt(FIX_RO_IDT);
++ cea_set_pte(CPU_ENTRY_AREA_RO_IDT_VADDR, __pa_symbol(idt_table),
++ PAGE_KERNEL_RO);
++ idt_descr.address = CPU_ENTRY_AREA_RO_IDT;
+
+ /*
+ * Should be a barrier for any external CPU state:
+--- a/arch/x86/mm/cpu_entry_area.c
++++ b/arch/x86/mm/cpu_entry_area.c
+@@ -15,11 +15,27 @@ static DEFINE_PER_CPU_PAGE_ALIGNED(char,
+ [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]);
+ #endif
+
++struct cpu_entry_area *get_cpu_entry_area(int cpu)
++{
++ unsigned long va = CPU_ENTRY_AREA_PER_CPU + cpu * CPU_ENTRY_AREA_SIZE;
++ BUILD_BUG_ON(sizeof(struct cpu_entry_area) % PAGE_SIZE != 0);
++
++ return (struct cpu_entry_area *) va;
++}
++EXPORT_SYMBOL(get_cpu_entry_area);
++
++void cea_set_pte(void *cea_vaddr, phys_addr_t pa, pgprot_t flags)
++{
++ unsigned long va = (unsigned long) cea_vaddr;
++
++ set_pte_vaddr(va, pfn_pte(pa >> PAGE_SHIFT, flags));
++}
++
+ static void __init
+-set_percpu_fixmap_pages(int idx, void *ptr, int pages, pgprot_t prot)
++cea_map_percpu_pages(void *cea_vaddr, void *ptr, int pages, pgprot_t prot)
+ {
+- for ( ; pages; pages--, idx--, ptr += PAGE_SIZE)
+- __set_fixmap(idx, per_cpu_ptr_to_phys(ptr), prot);
++ for ( ; pages; pages--, cea_vaddr+= PAGE_SIZE, ptr += PAGE_SIZE)
++ cea_set_pte(cea_vaddr, per_cpu_ptr_to_phys(ptr), prot);
+ }
+
+ /* Setup the fixmap mappings only once per-processor */
+@@ -47,10 +63,12 @@ static void __init setup_cpu_entry_area(
+ pgprot_t tss_prot = PAGE_KERNEL;
+ #endif
+
+- __set_fixmap(get_cpu_entry_area_index(cpu, gdt), get_cpu_gdt_paddr(cpu), gdt_prot);
+- set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, entry_stack_page),
+- per_cpu_ptr(&entry_stack_storage, cpu), 1,
+- PAGE_KERNEL);
++ cea_set_pte(&get_cpu_entry_area(cpu)->gdt, get_cpu_gdt_paddr(cpu),
++ gdt_prot);
++
++ cea_map_percpu_pages(&get_cpu_entry_area(cpu)->entry_stack_page,
++ per_cpu_ptr(&entry_stack_storage, cpu), 1,
++ PAGE_KERNEL);
+
+ /*
+ * The Intel SDM says (Volume 3, 7.2.1):
+@@ -72,10 +90,9 @@ static void __init setup_cpu_entry_area(
+ BUILD_BUG_ON((offsetof(struct tss_struct, x86_tss) ^
+ offsetofend(struct tss_struct, x86_tss)) & PAGE_MASK);
+ BUILD_BUG_ON(sizeof(struct tss_struct) % PAGE_SIZE != 0);
+- set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, tss),
+- &per_cpu(cpu_tss_rw, cpu),
+- sizeof(struct tss_struct) / PAGE_SIZE,
+- tss_prot);
++ cea_map_percpu_pages(&get_cpu_entry_area(cpu)->tss,
++ &per_cpu(cpu_tss_rw, cpu),
++ sizeof(struct tss_struct) / PAGE_SIZE, tss_prot);
+
+ #ifdef CONFIG_X86_32
+ per_cpu(cpu_entry_area, cpu) = get_cpu_entry_area(cpu);
+@@ -85,20 +102,37 @@ static void __init setup_cpu_entry_area(
+ BUILD_BUG_ON(sizeof(exception_stacks) % PAGE_SIZE != 0);
+ BUILD_BUG_ON(sizeof(exception_stacks) !=
+ sizeof(((struct cpu_entry_area *)0)->exception_stacks));
+- set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, exception_stacks),
+- &per_cpu(exception_stacks, cpu),
+- sizeof(exception_stacks) / PAGE_SIZE,
+- PAGE_KERNEL);
++ cea_map_percpu_pages(&get_cpu_entry_area(cpu)->exception_stacks,
++ &per_cpu(exception_stacks, cpu),
++ sizeof(exception_stacks) / PAGE_SIZE, PAGE_KERNEL);
+
+- __set_fixmap(get_cpu_entry_area_index(cpu, entry_trampoline),
++ cea_set_pte(&get_cpu_entry_area(cpu)->entry_trampoline,
+ __pa_symbol(_entry_trampoline), PAGE_KERNEL_RX);
+ #endif
+ }
+
++static __init void setup_cpu_entry_area_ptes(void)
++{
++#ifdef CONFIG_X86_32
++ unsigned long start, end;
++
++ BUILD_BUG_ON(CPU_ENTRY_AREA_PAGES * PAGE_SIZE < CPU_ENTRY_AREA_MAP_SIZE);
++ BUG_ON(CPU_ENTRY_AREA_BASE & ~PMD_MASK);
++
++ start = CPU_ENTRY_AREA_BASE;
++ end = start + CPU_ENTRY_AREA_MAP_SIZE;
++
++ for (; start < end; start += PMD_SIZE)
++ populate_extra_pte(start);
++#endif
++}
++
+ void __init setup_cpu_entry_areas(void)
+ {
+ unsigned int cpu;
+
++ setup_cpu_entry_area_ptes();
++
+ for_each_possible_cpu(cpu)
+ setup_cpu_entry_area(cpu);
+ }
+--- a/arch/x86/mm/dump_pagetables.c
++++ b/arch/x86/mm/dump_pagetables.c
+@@ -58,6 +58,7 @@ enum address_markers_idx {
+ KASAN_SHADOW_START_NR,
+ KASAN_SHADOW_END_NR,
+ #endif
++ CPU_ENTRY_AREA_NR,
+ #ifdef CONFIG_X86_ESPFIX64
+ ESPFIX_START_NR,
+ #endif
+@@ -81,6 +82,7 @@ static struct addr_marker address_marker
+ [KASAN_SHADOW_START_NR] = { KASAN_SHADOW_START, "KASAN shadow" },
+ [KASAN_SHADOW_END_NR] = { KASAN_SHADOW_END, "KASAN shadow end" },
+ #endif
++ [CPU_ENTRY_AREA_NR] = { CPU_ENTRY_AREA_BASE,"CPU entry Area" },
+ #ifdef CONFIG_X86_ESPFIX64
+ [ESPFIX_START_NR] = { ESPFIX_BASE_ADDR, "ESPfix Area", 16 },
+ #endif
+@@ -104,6 +106,7 @@ enum address_markers_idx {
+ #ifdef CONFIG_HIGHMEM
+ PKMAP_BASE_NR,
+ #endif
++ CPU_ENTRY_AREA_NR,
+ FIXADDR_START_NR,
+ END_OF_SPACE_NR,
+ };
+@@ -116,6 +119,7 @@ static struct addr_marker address_marker
+ #ifdef CONFIG_HIGHMEM
+ [PKMAP_BASE_NR] = { 0UL, "Persistent kmap() Area" },
+ #endif
++ [CPU_ENTRY_AREA_NR] = { 0UL, "CPU entry area" },
+ [FIXADDR_START_NR] = { 0UL, "Fixmap area" },
+ [END_OF_SPACE_NR] = { -1, NULL }
+ };
+@@ -541,8 +545,8 @@ static int __init pt_dump_init(void)
+ address_markers[PKMAP_BASE_NR].start_address = PKMAP_BASE;
+ # endif
+ address_markers[FIXADDR_START_NR].start_address = FIXADDR_START;
++ address_markers[CPU_ENTRY_AREA_NR].start_address = CPU_ENTRY_AREA_BASE;
+ #endif
+-
+ return 0;
+ }
+ __initcall(pt_dump_init);
+--- a/arch/x86/mm/init_32.c
++++ b/arch/x86/mm/init_32.c
+@@ -50,6 +50,7 @@
+ #include <asm/setup.h>
+ #include <asm/set_memory.h>
+ #include <asm/page_types.h>
++#include <asm/cpu_entry_area.h>
+ #include <asm/init.h>
+
+ #include "mm_internal.h"
+@@ -766,6 +767,7 @@ void __init mem_init(void)
+ mem_init_print_info(NULL);
+ printk(KERN_INFO "virtual kernel memory layout:\n"
+ " fixmap : 0x%08lx - 0x%08lx (%4ld kB)\n"
++ " cpu_entry : 0x%08lx - 0x%08lx (%4ld kB)\n"
+ #ifdef CONFIG_HIGHMEM
+ " pkmap : 0x%08lx - 0x%08lx (%4ld kB)\n"
+ #endif
+@@ -777,6 +779,10 @@ void __init mem_init(void)
+ FIXADDR_START, FIXADDR_TOP,
+ (FIXADDR_TOP - FIXADDR_START) >> 10,
+
++ CPU_ENTRY_AREA_BASE,
++ CPU_ENTRY_AREA_BASE + CPU_ENTRY_AREA_MAP_SIZE,
++ CPU_ENTRY_AREA_MAP_SIZE >> 10,
++
+ #ifdef CONFIG_HIGHMEM
+ PKMAP_BASE, PKMAP_BASE+LAST_PKMAP*PAGE_SIZE,
+ (LAST_PKMAP*PAGE_SIZE) >> 10,
+--- a/arch/x86/mm/kasan_init_64.c
++++ b/arch/x86/mm/kasan_init_64.c
+@@ -15,6 +15,7 @@
+ #include <asm/tlbflush.h>
+ #include <asm/sections.h>
+ #include <asm/pgtable.h>
++#include <asm/cpu_entry_area.h>
+
+ extern struct range pfn_mapped[E820_MAX_ENTRIES];
+
+@@ -322,31 +323,33 @@ void __init kasan_init(void)
+ map_range(&pfn_mapped[i]);
+ }
+
+- kasan_populate_zero_shadow(
+- kasan_mem_to_shadow((void *)PAGE_OFFSET + MAXMEM),
+- kasan_mem_to_shadow((void *)__START_KERNEL_map));
+-
+- kasan_populate_shadow((unsigned long)kasan_mem_to_shadow(_stext),
+- (unsigned long)kasan_mem_to_shadow(_end),
+- early_pfn_to_nid(__pa(_stext)));
+-
+- shadow_cpu_entry_begin = (void *)__fix_to_virt(FIX_CPU_ENTRY_AREA_BOTTOM);
++ shadow_cpu_entry_begin = (void *)CPU_ENTRY_AREA_BASE;
+ shadow_cpu_entry_begin = kasan_mem_to_shadow(shadow_cpu_entry_begin);
+ shadow_cpu_entry_begin = (void *)round_down((unsigned long)shadow_cpu_entry_begin,
+ PAGE_SIZE);
+
+- shadow_cpu_entry_end = (void *)(__fix_to_virt(FIX_CPU_ENTRY_AREA_TOP) + PAGE_SIZE);
++ shadow_cpu_entry_end = (void *)(CPU_ENTRY_AREA_BASE +
++ CPU_ENTRY_AREA_MAP_SIZE);
+ shadow_cpu_entry_end = kasan_mem_to_shadow(shadow_cpu_entry_end);
+ shadow_cpu_entry_end = (void *)round_up((unsigned long)shadow_cpu_entry_end,
+ PAGE_SIZE);
+
+- kasan_populate_zero_shadow(kasan_mem_to_shadow((void *)MODULES_END),
+- shadow_cpu_entry_begin);
++ kasan_populate_zero_shadow(
++ kasan_mem_to_shadow((void *)PAGE_OFFSET + MAXMEM),
++ shadow_cpu_entry_begin);
+
+ kasan_populate_shadow((unsigned long)shadow_cpu_entry_begin,
+ (unsigned long)shadow_cpu_entry_end, 0);
+
+- kasan_populate_zero_shadow(shadow_cpu_entry_end, (void *)KASAN_SHADOW_END);
++ kasan_populate_zero_shadow(shadow_cpu_entry_end,
++ kasan_mem_to_shadow((void *)__START_KERNEL_map));
++
++ kasan_populate_shadow((unsigned long)kasan_mem_to_shadow(_stext),
++ (unsigned long)kasan_mem_to_shadow(_end),
++ early_pfn_to_nid(__pa(_stext)));
++
++ kasan_populate_zero_shadow(kasan_mem_to_shadow((void *)MODULES_END),
++ (void *)KASAN_SHADOW_END);
+
+ load_cr3(init_top_pgt);
+ __flush_tlb_all();
+--- a/arch/x86/mm/pgtable_32.c
++++ b/arch/x86/mm/pgtable_32.c
+@@ -10,6 +10,7 @@
+ #include <linux/pagemap.h>
+ #include <linux/spinlock.h>
+
++#include <asm/cpu_entry_area.h>
+ #include <asm/pgtable.h>
+ #include <asm/pgalloc.h>
+ #include <asm/fixmap.h>
+--- a/arch/x86/xen/mmu_pv.c
++++ b/arch/x86/xen/mmu_pv.c
+@@ -2261,7 +2261,6 @@ static void xen_set_fixmap(unsigned idx,
+
+ switch (idx) {
+ case FIX_BTMAP_END ... FIX_BTMAP_BEGIN:
+- case FIX_RO_IDT:
+ #ifdef CONFIG_X86_32
+ case FIX_WP_TEST:
+ # ifdef CONFIG_HIGHMEM
+@@ -2272,7 +2271,6 @@ static void xen_set_fixmap(unsigned idx,
+ #endif
+ case FIX_TEXT_POKE0:
+ case FIX_TEXT_POKE1:
+- case FIX_CPU_ENTRY_AREA_TOP ... FIX_CPU_ENTRY_AREA_BOTTOM:
+ /* All local page mappings */
+ pte = pfn_pte(phys, prot);
+ break;
--- /dev/null
+From ed1bbc40a0d10e0c5c74fe7bdc6298295cf40255 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Wed, 20 Dec 2017 18:28:54 +0100
+Subject: x86/cpu_entry_area: Move it to a separate unit
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+commit ed1bbc40a0d10e0c5c74fe7bdc6298295cf40255 upstream.
+
+Separate the cpu_entry_area code out of cpu/common.c and the fixmap.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/include/asm/cpu_entry_area.h | 52 +++++++++++++++++
+ arch/x86/include/asm/fixmap.h | 41 -------------
+ arch/x86/kernel/cpu/common.c | 94 ------------------------------
+ arch/x86/kernel/traps.c | 1
+ arch/x86/mm/Makefile | 2
+ arch/x86/mm/cpu_entry_area.c | 104 ++++++++++++++++++++++++++++++++++
+ 6 files changed, 159 insertions(+), 135 deletions(-)
+
+--- /dev/null
++++ b/arch/x86/include/asm/cpu_entry_area.h
+@@ -0,0 +1,52 @@
++// SPDX-License-Identifier: GPL-2.0
++
++#ifndef _ASM_X86_CPU_ENTRY_AREA_H
++#define _ASM_X86_CPU_ENTRY_AREA_H
++
++#include <linux/percpu-defs.h>
++#include <asm/processor.h>
++
++/*
++ * cpu_entry_area is a percpu region that contains things needed by the CPU
++ * and early entry/exit code. Real types aren't used for all fields here
++ * to avoid circular header dependencies.
++ *
++ * Every field is a virtual alias of some other allocated backing store.
++ * There is no direct allocation of a struct cpu_entry_area.
++ */
++struct cpu_entry_area {
++ char gdt[PAGE_SIZE];
++
++ /*
++ * The GDT is just below entry_stack and thus serves (on x86_64) as
++ * a a read-only guard page.
++ */
++ struct entry_stack_page entry_stack_page;
++
++ /*
++ * On x86_64, the TSS is mapped RO. On x86_32, it's mapped RW because
++ * we need task switches to work, and task switches write to the TSS.
++ */
++ struct tss_struct tss;
++
++ char entry_trampoline[PAGE_SIZE];
++
++#ifdef CONFIG_X86_64
++ /*
++ * Exception stacks used for IST entries.
++ *
++ * In the future, this should have a separate slot for each stack
++ * with guard pages between them.
++ */
++ char exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ];
++#endif
++};
++
++#define CPU_ENTRY_AREA_SIZE (sizeof(struct cpu_entry_area))
++#define CPU_ENTRY_AREA_PAGES (CPU_ENTRY_AREA_SIZE / PAGE_SIZE)
++
++DECLARE_PER_CPU(struct cpu_entry_area *, cpu_entry_area);
++
++extern void setup_cpu_entry_areas(void);
++
++#endif
+--- a/arch/x86/include/asm/fixmap.h
++++ b/arch/x86/include/asm/fixmap.h
+@@ -25,6 +25,7 @@
+ #else
+ #include <uapi/asm/vsyscall.h>
+ #endif
++#include <asm/cpu_entry_area.h>
+
+ /*
+ * We can't declare FIXADDR_TOP as variable for x86_64 because vsyscall
+@@ -45,46 +46,6 @@ extern unsigned long __FIXADDR_TOP;
+ #endif
+
+ /*
+- * cpu_entry_area is a percpu region in the fixmap that contains things
+- * needed by the CPU and early entry/exit code. Real types aren't used
+- * for all fields here to avoid circular header dependencies.
+- *
+- * Every field is a virtual alias of some other allocated backing store.
+- * There is no direct allocation of a struct cpu_entry_area.
+- */
+-struct cpu_entry_area {
+- char gdt[PAGE_SIZE];
+-
+- /*
+- * The GDT is just below entry_stack and thus serves (on x86_64) as
+- * a a read-only guard page.
+- */
+- struct entry_stack_page entry_stack_page;
+-
+- /*
+- * On x86_64, the TSS is mapped RO. On x86_32, it's mapped RW because
+- * we need task switches to work, and task switches write to the TSS.
+- */
+- struct tss_struct tss;
+-
+- char entry_trampoline[PAGE_SIZE];
+-
+-#ifdef CONFIG_X86_64
+- /*
+- * Exception stacks used for IST entries.
+- *
+- * In the future, this should have a separate slot for each stack
+- * with guard pages between them.
+- */
+- char exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ];
+-#endif
+-};
+-
+-#define CPU_ENTRY_AREA_PAGES (sizeof(struct cpu_entry_area) / PAGE_SIZE)
+-
+-extern void setup_cpu_entry_areas(void);
+-
+-/*
+ * Here we define all the compile-time 'special' virtual
+ * addresses. The point is to have a constant address at
+ * compile time, but to set the physical address only
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -482,102 +482,8 @@ static const unsigned int exception_stac
+ [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ,
+ [DEBUG_STACK - 1] = DEBUG_STKSZ
+ };
+-
+-static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
+- [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]);
+-#endif
+-
+-static DEFINE_PER_CPU_PAGE_ALIGNED(struct entry_stack_page,
+- entry_stack_storage);
+-
+-static void __init
+-set_percpu_fixmap_pages(int idx, void *ptr, int pages, pgprot_t prot)
+-{
+- for ( ; pages; pages--, idx--, ptr += PAGE_SIZE)
+- __set_fixmap(idx, per_cpu_ptr_to_phys(ptr), prot);
+-}
+-
+-/* Setup the fixmap mappings only once per-processor */
+-static void __init setup_cpu_entry_area(int cpu)
+-{
+-#ifdef CONFIG_X86_64
+- extern char _entry_trampoline[];
+-
+- /* On 64-bit systems, we use a read-only fixmap GDT and TSS. */
+- pgprot_t gdt_prot = PAGE_KERNEL_RO;
+- pgprot_t tss_prot = PAGE_KERNEL_RO;
+-#else
+- /*
+- * On native 32-bit systems, the GDT cannot be read-only because
+- * our double fault handler uses a task gate, and entering through
+- * a task gate needs to change an available TSS to busy. If the
+- * GDT is read-only, that will triple fault. The TSS cannot be
+- * read-only because the CPU writes to it on task switches.
+- *
+- * On Xen PV, the GDT must be read-only because the hypervisor
+- * requires it.
+- */
+- pgprot_t gdt_prot = boot_cpu_has(X86_FEATURE_XENPV) ?
+- PAGE_KERNEL_RO : PAGE_KERNEL;
+- pgprot_t tss_prot = PAGE_KERNEL;
+ #endif
+
+- __set_fixmap(get_cpu_entry_area_index(cpu, gdt), get_cpu_gdt_paddr(cpu), gdt_prot);
+- set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, entry_stack_page),
+- per_cpu_ptr(&entry_stack_storage, cpu), 1,
+- PAGE_KERNEL);
+-
+- /*
+- * The Intel SDM says (Volume 3, 7.2.1):
+- *
+- * Avoid placing a page boundary in the part of the TSS that the
+- * processor reads during a task switch (the first 104 bytes). The
+- * processor may not correctly perform address translations if a
+- * boundary occurs in this area. During a task switch, the processor
+- * reads and writes into the first 104 bytes of each TSS (using
+- * contiguous physical addresses beginning with the physical address
+- * of the first byte of the TSS). So, after TSS access begins, if
+- * part of the 104 bytes is not physically contiguous, the processor
+- * will access incorrect information without generating a page-fault
+- * exception.
+- *
+- * There are also a lot of errata involving the TSS spanning a page
+- * boundary. Assert that we're not doing that.
+- */
+- BUILD_BUG_ON((offsetof(struct tss_struct, x86_tss) ^
+- offsetofend(struct tss_struct, x86_tss)) & PAGE_MASK);
+- BUILD_BUG_ON(sizeof(struct tss_struct) % PAGE_SIZE != 0);
+- set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, tss),
+- &per_cpu(cpu_tss_rw, cpu),
+- sizeof(struct tss_struct) / PAGE_SIZE,
+- tss_prot);
+-
+-#ifdef CONFIG_X86_32
+- per_cpu(cpu_entry_area, cpu) = get_cpu_entry_area(cpu);
+-#endif
+-
+-#ifdef CONFIG_X86_64
+- BUILD_BUG_ON(sizeof(exception_stacks) % PAGE_SIZE != 0);
+- BUILD_BUG_ON(sizeof(exception_stacks) !=
+- sizeof(((struct cpu_entry_area *)0)->exception_stacks));
+- set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, exception_stacks),
+- &per_cpu(exception_stacks, cpu),
+- sizeof(exception_stacks) / PAGE_SIZE,
+- PAGE_KERNEL);
+-
+- __set_fixmap(get_cpu_entry_area_index(cpu, entry_trampoline),
+- __pa_symbol(_entry_trampoline), PAGE_KERNEL_RX);
+-#endif
+-}
+-
+-void __init setup_cpu_entry_areas(void)
+-{
+- unsigned int cpu;
+-
+- for_each_possible_cpu(cpu)
+- setup_cpu_entry_area(cpu);
+-}
+-
+ /* Load the original GDT from the per-cpu structure */
+ void load_direct_gdt(int cpu)
+ {
+--- a/arch/x86/kernel/traps.c
++++ b/arch/x86/kernel/traps.c
+@@ -52,6 +52,7 @@
+ #include <asm/traps.h>
+ #include <asm/desc.h>
+ #include <asm/fpu/internal.h>
++#include <asm/cpu_entry_area.h>
+ #include <asm/mce.h>
+ #include <asm/fixmap.h>
+ #include <asm/mach_traps.h>
+--- a/arch/x86/mm/Makefile
++++ b/arch/x86/mm/Makefile
+@@ -10,7 +10,7 @@ CFLAGS_REMOVE_mem_encrypt.o = -pg
+ endif
+
+ obj-y := init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \
+- pat.o pgtable.o physaddr.o setup_nx.o tlb.o
++ pat.o pgtable.o physaddr.o setup_nx.o tlb.o cpu_entry_area.o
+
+ # Make sure __phys_addr has no stackprotector
+ nostackp := $(call cc-option, -fno-stack-protector)
+--- /dev/null
++++ b/arch/x86/mm/cpu_entry_area.c
+@@ -0,0 +1,104 @@
++// SPDX-License-Identifier: GPL-2.0
++
++#include <linux/spinlock.h>
++#include <linux/percpu.h>
++
++#include <asm/cpu_entry_area.h>
++#include <asm/pgtable.h>
++#include <asm/fixmap.h>
++#include <asm/desc.h>
++
++static DEFINE_PER_CPU_PAGE_ALIGNED(struct entry_stack_page, entry_stack_storage);
++
++#ifdef CONFIG_X86_64
++static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
++ [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]);
++#endif
++
++static void __init
++set_percpu_fixmap_pages(int idx, void *ptr, int pages, pgprot_t prot)
++{
++ for ( ; pages; pages--, idx--, ptr += PAGE_SIZE)
++ __set_fixmap(idx, per_cpu_ptr_to_phys(ptr), prot);
++}
++
++/* Setup the fixmap mappings only once per-processor */
++static void __init setup_cpu_entry_area(int cpu)
++{
++#ifdef CONFIG_X86_64
++ extern char _entry_trampoline[];
++
++ /* On 64-bit systems, we use a read-only fixmap GDT and TSS. */
++ pgprot_t gdt_prot = PAGE_KERNEL_RO;
++ pgprot_t tss_prot = PAGE_KERNEL_RO;
++#else
++ /*
++ * On native 32-bit systems, the GDT cannot be read-only because
++ * our double fault handler uses a task gate, and entering through
++ * a task gate needs to change an available TSS to busy. If the
++ * GDT is read-only, that will triple fault. The TSS cannot be
++ * read-only because the CPU writes to it on task switches.
++ *
++ * On Xen PV, the GDT must be read-only because the hypervisor
++ * requires it.
++ */
++ pgprot_t gdt_prot = boot_cpu_has(X86_FEATURE_XENPV) ?
++ PAGE_KERNEL_RO : PAGE_KERNEL;
++ pgprot_t tss_prot = PAGE_KERNEL;
++#endif
++
++ __set_fixmap(get_cpu_entry_area_index(cpu, gdt), get_cpu_gdt_paddr(cpu), gdt_prot);
++ set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, entry_stack_page),
++ per_cpu_ptr(&entry_stack_storage, cpu), 1,
++ PAGE_KERNEL);
++
++ /*
++ * The Intel SDM says (Volume 3, 7.2.1):
++ *
++ * Avoid placing a page boundary in the part of the TSS that the
++ * processor reads during a task switch (the first 104 bytes). The
++ * processor may not correctly perform address translations if a
++ * boundary occurs in this area. During a task switch, the processor
++ * reads and writes into the first 104 bytes of each TSS (using
++ * contiguous physical addresses beginning with the physical address
++ * of the first byte of the TSS). So, after TSS access begins, if
++ * part of the 104 bytes is not physically contiguous, the processor
++ * will access incorrect information without generating a page-fault
++ * exception.
++ *
++ * There are also a lot of errata involving the TSS spanning a page
++ * boundary. Assert that we're not doing that.
++ */
++ BUILD_BUG_ON((offsetof(struct tss_struct, x86_tss) ^
++ offsetofend(struct tss_struct, x86_tss)) & PAGE_MASK);
++ BUILD_BUG_ON(sizeof(struct tss_struct) % PAGE_SIZE != 0);
++ set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, tss),
++ &per_cpu(cpu_tss_rw, cpu),
++ sizeof(struct tss_struct) / PAGE_SIZE,
++ tss_prot);
++
++#ifdef CONFIG_X86_32
++ per_cpu(cpu_entry_area, cpu) = get_cpu_entry_area(cpu);
++#endif
++
++#ifdef CONFIG_X86_64
++ BUILD_BUG_ON(sizeof(exception_stacks) % PAGE_SIZE != 0);
++ BUILD_BUG_ON(sizeof(exception_stacks) !=
++ sizeof(((struct cpu_entry_area *)0)->exception_stacks));
++ set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, exception_stacks),
++ &per_cpu(exception_stacks, cpu),
++ sizeof(exception_stacks) / PAGE_SIZE,
++ PAGE_KERNEL);
++
++ __set_fixmap(get_cpu_entry_area_index(cpu, entry_trampoline),
++ __pa_symbol(_entry_trampoline), PAGE_KERNEL_RX);
++#endif
++}
++
++void __init setup_cpu_entry_areas(void)
++{
++ unsigned int cpu;
++
++ for_each_possible_cpu(cpu)
++ setup_cpu_entry_area(cpu);
++}
--- /dev/null
+From f6c4fd506cb626e4346aa81688f255e593a7c5a0 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Sat, 23 Dec 2017 19:45:11 +0100
+Subject: x86/cpu_entry_area: Prevent wraparound in setup_cpu_entry_area_ptes() on 32bit
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+commit f6c4fd506cb626e4346aa81688f255e593a7c5a0 upstream.
+
+The loop which populates the CPU entry area PMDs can wrap around on 32bit
+machines when the number of CPUs is small.
+
+It worked wonderful for NR_CPUS=64 for whatever reason and the moron who
+wrote that code did not bother to test it with !SMP.
+
+Check for the wraparound to fix it.
+
+Fixes: 92a0f81d8957 ("x86/cpu_entry_area: Move it out of the fixmap")
+Reported-by: kernel test robot <fengguang.wu@intel.com>
+Signed-off-by: Thomas "Feels stupid" Gleixner <tglx@linutronix.de>
+Tested-by: Borislav Petkov <bp@alien8.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/mm/cpu_entry_area.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/mm/cpu_entry_area.c
++++ b/arch/x86/mm/cpu_entry_area.c
+@@ -122,7 +122,8 @@ static __init void setup_cpu_entry_area_
+ start = CPU_ENTRY_AREA_BASE;
+ end = start + CPU_ENTRY_AREA_MAP_SIZE;
+
+- for (; start < end; start += PMD_SIZE)
++ /* Careful here: start + PMD_SIZE might wrap around */
++ for (; start < end && start >= CPU_ENTRY_AREA_BASE; start += PMD_SIZE)
+ populate_extra_pte(start);
+ #endif
+ }
--- /dev/null
+From e8ffe96e5933d417195268478479933d56213a3f Mon Sep 17 00:00:00 2001
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 5 Dec 2017 13:34:54 +0100
+Subject: x86/doc: Remove obvious weirdnesses from the x86 MM layout documentation
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit e8ffe96e5933d417195268478479933d56213a3f upstream.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Eduardo Valentin <eduval@amazon.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: aliguori@amazon.com
+Cc: daniel.gruss@iaik.tugraz.at
+Cc: hughd@google.com
+Cc: keescook@google.com
+Cc: linux-mm@kvack.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ Documentation/x86/x86_64/mm.txt | 12 +++---------
+ 1 file changed, 3 insertions(+), 9 deletions(-)
+
+--- a/Documentation/x86/x86_64/mm.txt
++++ b/Documentation/x86/x86_64/mm.txt
+@@ -1,6 +1,4 @@
+
+-<previous description obsolete, deleted>
+-
+ Virtual memory map with 4 level page tables:
+
+ 0000000000000000 - 00007fffffffffff (=47 bits) user space, different per mm
+@@ -49,8 +47,9 @@ ffffffffffe00000 - ffffffffffffffff (=2
+
+ Architecture defines a 64-bit virtual address. Implementations can support
+ less. Currently supported are 48- and 57-bit virtual addresses. Bits 63
+-through to the most-significant implemented bit are set to either all ones
+-or all zero. This causes hole between user space and kernel addresses.
++through to the most-significant implemented bit are sign extended.
++This causes hole between user space and kernel addresses if you interpret them
++as unsigned.
+
+ The direct mapping covers all memory in the system up to the highest
+ memory address (this means in some cases it can also include PCI memory
+@@ -60,9 +59,6 @@ vmalloc space is lazily synchronized int
+ the processes using the page fault handler, with init_top_pgt as
+ reference.
+
+-Current X86-64 implementations support up to 46 bits of address space (64 TB),
+-which is our current limit. This expands into MBZ space in the page tables.
+-
+ We map EFI runtime services in the 'efi_pgd' PGD in a 64Gb large virtual
+ memory window (this size is arbitrary, it can be raised later if needed).
+ The mappings are not part of any other kernel PGD and are only available
+@@ -74,5 +70,3 @@ following fixmap section.
+ Note that if CONFIG_RANDOMIZE_MEMORY is enabled, the direct mapping of all
+ physical memory, vmalloc/ioremap space and virtual memory map are randomized.
+ Their order is preserved but their base will be offset early at boot time.
+-
+--Andi Kleen, Jul 2004
--- /dev/null
+From 4fe2d8b11a370af286287a2661de9d4e6c9a145a Mon Sep 17 00:00:00 2001
+From: Dave Hansen <dave.hansen@linux.intel.com>
+Date: Mon, 4 Dec 2017 17:25:07 -0800
+Subject: x86/entry: Rename SYSENTER_stack to CPU_ENTRY_AREA_entry_stack
+
+From: Dave Hansen <dave.hansen@linux.intel.com>
+
+commit 4fe2d8b11a370af286287a2661de9d4e6c9a145a upstream.
+
+If the kernel oopses while on the trampoline stack, it will print
+"<SYSENTER>" even if SYSENTER is not involved. That is rather confusing.
+
+The "SYSENTER" stack is used for a lot more than SYSENTER now. Give it a
+better string to display in stack dumps, and rename the kernel code to
+match.
+
+Also move the 32-bit code over to the new naming even though it still uses
+the entry stack only for SYSENTER.
+
+Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Borislav Petkov <bp@suse.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/entry/entry_32.S | 12 ++++++------
+ arch/x86/entry/entry_64.S | 4 ++--
+ arch/x86/include/asm/fixmap.h | 8 ++++----
+ arch/x86/include/asm/processor.h | 6 +++---
+ arch/x86/include/asm/stacktrace.h | 4 ++--
+ arch/x86/kernel/asm-offsets.c | 4 ++--
+ arch/x86/kernel/asm-offsets_32.c | 2 +-
+ arch/x86/kernel/cpu/common.c | 14 +++++++-------
+ arch/x86/kernel/dumpstack.c | 10 +++++-----
+ arch/x86/kernel/dumpstack_32.c | 6 +++---
+ arch/x86/kernel/dumpstack_64.c | 12 +++++++++---
+ 11 files changed, 44 insertions(+), 38 deletions(-)
+
+--- a/arch/x86/entry/entry_32.S
++++ b/arch/x86/entry/entry_32.S
+@@ -942,9 +942,9 @@ ENTRY(debug)
+
+ /* Are we currently on the SYSENTER stack? */
+ movl PER_CPU_VAR(cpu_entry_area), %ecx
+- addl $CPU_ENTRY_AREA_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx
+- subl %eax, %ecx /* ecx = (end of SYSENTER_stack) - esp */
+- cmpl $SIZEOF_SYSENTER_stack, %ecx
++ addl $CPU_ENTRY_AREA_entry_stack + SIZEOF_entry_stack, %ecx
++ subl %eax, %ecx /* ecx = (end of entry_stack) - esp */
++ cmpl $SIZEOF_entry_stack, %ecx
+ jb .Ldebug_from_sysenter_stack
+
+ TRACE_IRQS_OFF
+@@ -986,9 +986,9 @@ ENTRY(nmi)
+
+ /* Are we currently on the SYSENTER stack? */
+ movl PER_CPU_VAR(cpu_entry_area), %ecx
+- addl $CPU_ENTRY_AREA_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx
+- subl %eax, %ecx /* ecx = (end of SYSENTER_stack) - esp */
+- cmpl $SIZEOF_SYSENTER_stack, %ecx
++ addl $CPU_ENTRY_AREA_entry_stack + SIZEOF_entry_stack, %ecx
++ subl %eax, %ecx /* ecx = (end of entry_stack) - esp */
++ cmpl $SIZEOF_entry_stack, %ecx
+ jb .Lnmi_from_sysenter_stack
+
+ /* Not on SYSENTER stack. */
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -154,8 +154,8 @@ END(native_usergs_sysret64)
+ _entry_trampoline - CPU_ENTRY_AREA_entry_trampoline(%rip)
+
+ /* The top word of the SYSENTER stack is hot and is usable as scratch space. */
+-#define RSP_SCRATCH CPU_ENTRY_AREA_SYSENTER_stack + \
+- SIZEOF_SYSENTER_stack - 8 + CPU_ENTRY_AREA
++#define RSP_SCRATCH CPU_ENTRY_AREA_entry_stack + \
++ SIZEOF_entry_stack - 8 + CPU_ENTRY_AREA
+
+ ENTRY(entry_SYSCALL_64_trampoline)
+ UNWIND_HINT_EMPTY
+--- a/arch/x86/include/asm/fixmap.h
++++ b/arch/x86/include/asm/fixmap.h
+@@ -56,10 +56,10 @@ struct cpu_entry_area {
+ char gdt[PAGE_SIZE];
+
+ /*
+- * The GDT is just below SYSENTER_stack and thus serves (on x86_64) as
++ * The GDT is just below entry_stack and thus serves (on x86_64) as
+ * a a read-only guard page.
+ */
+- struct SYSENTER_stack_page SYSENTER_stack_page;
++ struct entry_stack_page entry_stack_page;
+
+ /*
+ * On x86_64, the TSS is mapped RO. On x86_32, it's mapped RW because
+@@ -250,9 +250,9 @@ static inline struct cpu_entry_area *get
+ return (struct cpu_entry_area *)__fix_to_virt(__get_cpu_entry_area_page_index(cpu, 0));
+ }
+
+-static inline struct SYSENTER_stack *cpu_SYSENTER_stack(int cpu)
++static inline struct entry_stack *cpu_entry_stack(int cpu)
+ {
+- return &get_cpu_entry_area(cpu)->SYSENTER_stack_page.stack;
++ return &get_cpu_entry_area(cpu)->entry_stack_page.stack;
+ }
+
+ #endif /* !__ASSEMBLY__ */
+--- a/arch/x86/include/asm/processor.h
++++ b/arch/x86/include/asm/processor.h
+@@ -336,12 +336,12 @@ struct x86_hw_tss {
+ #define IO_BITMAP_OFFSET (offsetof(struct tss_struct, io_bitmap) - offsetof(struct tss_struct, x86_tss))
+ #define INVALID_IO_BITMAP_OFFSET 0x8000
+
+-struct SYSENTER_stack {
++struct entry_stack {
+ unsigned long words[64];
+ };
+
+-struct SYSENTER_stack_page {
+- struct SYSENTER_stack stack;
++struct entry_stack_page {
++ struct entry_stack stack;
+ } __aligned(PAGE_SIZE);
+
+ struct tss_struct {
+--- a/arch/x86/include/asm/stacktrace.h
++++ b/arch/x86/include/asm/stacktrace.h
+@@ -16,7 +16,7 @@ enum stack_type {
+ STACK_TYPE_TASK,
+ STACK_TYPE_IRQ,
+ STACK_TYPE_SOFTIRQ,
+- STACK_TYPE_SYSENTER,
++ STACK_TYPE_ENTRY,
+ STACK_TYPE_EXCEPTION,
+ STACK_TYPE_EXCEPTION_LAST = STACK_TYPE_EXCEPTION + N_EXCEPTION_STACKS-1,
+ };
+@@ -29,7 +29,7 @@ struct stack_info {
+ bool in_task_stack(unsigned long *stack, struct task_struct *task,
+ struct stack_info *info);
+
+-bool in_sysenter_stack(unsigned long *stack, struct stack_info *info);
++bool in_entry_stack(unsigned long *stack, struct stack_info *info);
+
+ int get_stack_info(unsigned long *stack, struct task_struct *task,
+ struct stack_info *info, unsigned long *visit_mask);
+--- a/arch/x86/kernel/asm-offsets.c
++++ b/arch/x86/kernel/asm-offsets.c
+@@ -97,6 +97,6 @@ void common(void) {
+ /* Layout info for cpu_entry_area */
+ OFFSET(CPU_ENTRY_AREA_tss, cpu_entry_area, tss);
+ OFFSET(CPU_ENTRY_AREA_entry_trampoline, cpu_entry_area, entry_trampoline);
+- OFFSET(CPU_ENTRY_AREA_SYSENTER_stack, cpu_entry_area, SYSENTER_stack_page);
+- DEFINE(SIZEOF_SYSENTER_stack, sizeof(struct SYSENTER_stack));
++ OFFSET(CPU_ENTRY_AREA_entry_stack, cpu_entry_area, entry_stack_page);
++ DEFINE(SIZEOF_entry_stack, sizeof(struct entry_stack));
+ }
+--- a/arch/x86/kernel/asm-offsets_32.c
++++ b/arch/x86/kernel/asm-offsets_32.c
+@@ -48,7 +48,7 @@ void foo(void)
+
+ /* Offset from the sysenter stack to tss.sp0 */
+ DEFINE(TSS_sysenter_sp0, offsetof(struct cpu_entry_area, tss.x86_tss.sp0) -
+- offsetofend(struct cpu_entry_area, SYSENTER_stack_page.stack));
++ offsetofend(struct cpu_entry_area, entry_stack_page.stack));
+
+ #ifdef CONFIG_CC_STACKPROTECTOR
+ BLANK();
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -487,8 +487,8 @@ static DEFINE_PER_CPU_PAGE_ALIGNED(char,
+ [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]);
+ #endif
+
+-static DEFINE_PER_CPU_PAGE_ALIGNED(struct SYSENTER_stack_page,
+- SYSENTER_stack_storage);
++static DEFINE_PER_CPU_PAGE_ALIGNED(struct entry_stack_page,
++ entry_stack_storage);
+
+ static void __init
+ set_percpu_fixmap_pages(int idx, void *ptr, int pages, pgprot_t prot)
+@@ -523,8 +523,8 @@ static void __init setup_cpu_entry_area(
+ #endif
+
+ __set_fixmap(get_cpu_entry_area_index(cpu, gdt), get_cpu_gdt_paddr(cpu), gdt_prot);
+- set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, SYSENTER_stack_page),
+- per_cpu_ptr(&SYSENTER_stack_storage, cpu), 1,
++ set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, entry_stack_page),
++ per_cpu_ptr(&entry_stack_storage, cpu), 1,
+ PAGE_KERNEL);
+
+ /*
+@@ -1323,7 +1323,7 @@ void enable_sep_cpu(void)
+
+ tss->x86_tss.ss1 = __KERNEL_CS;
+ wrmsr(MSR_IA32_SYSENTER_CS, tss->x86_tss.ss1, 0);
+- wrmsr(MSR_IA32_SYSENTER_ESP, (unsigned long)(cpu_SYSENTER_stack(cpu) + 1), 0);
++ wrmsr(MSR_IA32_SYSENTER_ESP, (unsigned long)(cpu_entry_stack(cpu) + 1), 0);
+ wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long)entry_SYSENTER_32, 0);
+
+ put_cpu();
+@@ -1440,7 +1440,7 @@ void syscall_init(void)
+ * AMD doesn't allow SYSENTER in long mode (either 32- or 64-bit).
+ */
+ wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS);
+- wrmsrl_safe(MSR_IA32_SYSENTER_ESP, (unsigned long)(cpu_SYSENTER_stack(cpu) + 1));
++ wrmsrl_safe(MSR_IA32_SYSENTER_ESP, (unsigned long)(cpu_entry_stack(cpu) + 1));
+ wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)entry_SYSENTER_compat);
+ #else
+ wrmsrl(MSR_CSTAR, (unsigned long)ignore_sysret);
+@@ -1655,7 +1655,7 @@ void cpu_init(void)
+ */
+ set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss);
+ load_TR_desc();
+- load_sp0((unsigned long)(cpu_SYSENTER_stack(cpu) + 1));
++ load_sp0((unsigned long)(cpu_entry_stack(cpu) + 1));
+
+ load_mm_ldt(&init_mm);
+
+--- a/arch/x86/kernel/dumpstack.c
++++ b/arch/x86/kernel/dumpstack.c
+@@ -43,9 +43,9 @@ bool in_task_stack(unsigned long *stack,
+ return true;
+ }
+
+-bool in_sysenter_stack(unsigned long *stack, struct stack_info *info)
++bool in_entry_stack(unsigned long *stack, struct stack_info *info)
+ {
+- struct SYSENTER_stack *ss = cpu_SYSENTER_stack(smp_processor_id());
++ struct entry_stack *ss = cpu_entry_stack(smp_processor_id());
+
+ void *begin = ss;
+ void *end = ss + 1;
+@@ -53,7 +53,7 @@ bool in_sysenter_stack(unsigned long *st
+ if ((void *)stack < begin || (void *)stack >= end)
+ return false;
+
+- info->type = STACK_TYPE_SYSENTER;
++ info->type = STACK_TYPE_ENTRY;
+ info->begin = begin;
+ info->end = end;
+ info->next_sp = NULL;
+@@ -111,13 +111,13 @@ void show_trace_log_lvl(struct task_stru
+ * - task stack
+ * - interrupt stack
+ * - HW exception stacks (double fault, nmi, debug, mce)
+- * - SYSENTER stack
++ * - entry stack
+ *
+ * x86-32 can have up to four stacks:
+ * - task stack
+ * - softirq stack
+ * - hardirq stack
+- * - SYSENTER stack
++ * - entry stack
+ */
+ for (regs = NULL; stack; stack = PTR_ALIGN(stack_info.next_sp, sizeof(long))) {
+ const char *stack_name;
+--- a/arch/x86/kernel/dumpstack_32.c
++++ b/arch/x86/kernel/dumpstack_32.c
+@@ -26,8 +26,8 @@ const char *stack_type_name(enum stack_t
+ if (type == STACK_TYPE_SOFTIRQ)
+ return "SOFTIRQ";
+
+- if (type == STACK_TYPE_SYSENTER)
+- return "SYSENTER";
++ if (type == STACK_TYPE_ENTRY)
++ return "ENTRY_TRAMPOLINE";
+
+ return NULL;
+ }
+@@ -96,7 +96,7 @@ int get_stack_info(unsigned long *stack,
+ if (task != current)
+ goto unknown;
+
+- if (in_sysenter_stack(stack, info))
++ if (in_entry_stack(stack, info))
+ goto recursion_check;
+
+ if (in_hardirq_stack(stack, info))
+--- a/arch/x86/kernel/dumpstack_64.c
++++ b/arch/x86/kernel/dumpstack_64.c
+@@ -37,8 +37,14 @@ const char *stack_type_name(enum stack_t
+ if (type == STACK_TYPE_IRQ)
+ return "IRQ";
+
+- if (type == STACK_TYPE_SYSENTER)
+- return "SYSENTER";
++ if (type == STACK_TYPE_ENTRY) {
++ /*
++ * On 64-bit, we have a generic entry stack that we
++ * use for all the kernel entry points, including
++ * SYSENTER.
++ */
++ return "ENTRY_TRAMPOLINE";
++ }
+
+ if (type >= STACK_TYPE_EXCEPTION && type <= STACK_TYPE_EXCEPTION_LAST)
+ return exception_stack_names[type - STACK_TYPE_EXCEPTION];
+@@ -118,7 +124,7 @@ int get_stack_info(unsigned long *stack,
+ if (in_irq_stack(stack, info))
+ goto recursion_check;
+
+- if (in_sysenter_stack(stack, info))
++ if (in_entry_stack(stack, info))
+ goto recursion_check;
+
+ goto unknown;
--- /dev/null
+From 7bbcbd3d1cdcbacd0f9f8dc4c98d550972f1ca30 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Wed, 20 Dec 2017 18:02:34 +0100
+Subject: x86/Kconfig: Limit NR_CPUS on 32-bit to a sane amount
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+commit 7bbcbd3d1cdcbacd0f9f8dc4c98d550972f1ca30 upstream.
+
+The recent cpu_entry_area changes fail to compile on 32-bit when BIGSMP=y
+and NR_CPUS=512, because the fixmap area becomes too big.
+
+Limit the number of CPUs with BIGSMP to 64, which is already way to big for
+32-bit, but it's at least a working limitation.
+
+We performed a quick survey of 32-bit-only machines that might be affected
+by this change negatively, but found none.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: linux-kernel@vger.kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/Kconfig | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/Kconfig
++++ b/arch/x86/Kconfig
+@@ -925,7 +925,8 @@ config MAXSMP
+ config NR_CPUS
+ int "Maximum number of CPUs" if SMP && !MAXSMP
+ range 2 8 if SMP && X86_32 && !X86_BIGSMP
+- range 2 512 if SMP && !MAXSMP && !CPUMASK_OFFSTACK
++ range 2 64 if SMP && X86_32 && X86_BIGSMP
++ range 2 512 if SMP && !MAXSMP && !CPUMASK_OFFSTACK && X86_64
+ range 2 8192 if SMP && !MAXSMP && CPUMASK_OFFSTACK && X86_64
+ default "1" if !SMP
+ default "8192" if MAXSMP
--- /dev/null
+From a4828f81037f491b2cc986595e3a969a6eeb2fb5 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Thu, 14 Dec 2017 12:27:31 +0100
+Subject: x86/ldt: Prevent LDT inheritance on exec
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+commit a4828f81037f491b2cc986595e3a969a6eeb2fb5 upstream.
+
+The LDT is inherited across fork() or exec(), but that makes no sense
+at all because exec() is supposed to start the process clean.
+
+The reason why this happens is that init_new_context_ldt() is called from
+init_new_context() which obviously needs to be called for both fork() and
+exec().
+
+It would be surprising if anything relies on that behaviour, so it seems to
+be safe to remove that misfeature.
+
+Split the context initialization into two parts. Clear the LDT pointer and
+initialize the mutex from the general context init and move the LDT
+duplication to arch_dup_mmap() which is only called on fork().
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Peter Zijlstra <peterz@infradead.org>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Andy Lutomirsky <luto@kernel.org>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Borislav Petkov <bpetkov@suse.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Eduardo Valentin <eduval@amazon.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: aliguori@amazon.com
+Cc: dan.j.williams@intel.com
+Cc: hughd@google.com
+Cc: keescook@google.com
+Cc: kirill.shutemov@linux.intel.com
+Cc: linux-mm@kvack.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/include/asm/mmu_context.h | 21 ++++++++++++++-------
+ arch/x86/kernel/ldt.c | 18 +++++-------------
+ tools/testing/selftests/x86/ldt_gdt.c | 9 +++------
+ 3 files changed, 22 insertions(+), 26 deletions(-)
+
+--- a/arch/x86/include/asm/mmu_context.h
++++ b/arch/x86/include/asm/mmu_context.h
+@@ -57,11 +57,17 @@ struct ldt_struct {
+ /*
+ * Used for LDT copy/destruction.
+ */
+-int init_new_context_ldt(struct task_struct *tsk, struct mm_struct *mm);
++static inline void init_new_context_ldt(struct mm_struct *mm)
++{
++ mm->context.ldt = NULL;
++ init_rwsem(&mm->context.ldt_usr_sem);
++}
++int ldt_dup_context(struct mm_struct *oldmm, struct mm_struct *mm);
+ void destroy_context_ldt(struct mm_struct *mm);
+ #else /* CONFIG_MODIFY_LDT_SYSCALL */
+-static inline int init_new_context_ldt(struct task_struct *tsk,
+- struct mm_struct *mm)
++static inline void init_new_context_ldt(struct mm_struct *mm) { }
++static inline int ldt_dup_context(struct mm_struct *oldmm,
++ struct mm_struct *mm)
+ {
+ return 0;
+ }
+@@ -137,15 +143,16 @@ static inline int init_new_context(struc
+ mm->context.ctx_id = atomic64_inc_return(&last_mm_ctx_id);
+ atomic64_set(&mm->context.tlb_gen, 0);
+
+- #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
++#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
+ if (cpu_feature_enabled(X86_FEATURE_OSPKE)) {
+ /* pkey 0 is the default and always allocated */
+ mm->context.pkey_allocation_map = 0x1;
+ /* -1 means unallocated or invalid */
+ mm->context.execute_only_pkey = -1;
+ }
+- #endif
+- return init_new_context_ldt(tsk, mm);
++#endif
++ init_new_context_ldt(mm);
++ return 0;
+ }
+ static inline void destroy_context(struct mm_struct *mm)
+ {
+@@ -181,7 +188,7 @@ do { \
+ static inline int arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
+ {
+ paravirt_arch_dup_mmap(oldmm, mm);
+- return 0;
++ return ldt_dup_context(oldmm, mm);
+ }
+
+ static inline void arch_exit_mmap(struct mm_struct *mm)
+--- a/arch/x86/kernel/ldt.c
++++ b/arch/x86/kernel/ldt.c
+@@ -131,28 +131,20 @@ static void free_ldt_struct(struct ldt_s
+ }
+
+ /*
+- * we do not have to muck with descriptors here, that is
+- * done in switch_mm() as needed.
++ * Called on fork from arch_dup_mmap(). Just copy the current LDT state,
++ * the new task is not running, so nothing can be installed.
+ */
+-int init_new_context_ldt(struct task_struct *tsk, struct mm_struct *mm)
++int ldt_dup_context(struct mm_struct *old_mm, struct mm_struct *mm)
+ {
+ struct ldt_struct *new_ldt;
+- struct mm_struct *old_mm;
+ int retval = 0;
+
+- init_rwsem(&mm->context.ldt_usr_sem);
+-
+- old_mm = current->mm;
+- if (!old_mm) {
+- mm->context.ldt = NULL;
++ if (!old_mm)
+ return 0;
+- }
+
+ mutex_lock(&old_mm->context.lock);
+- if (!old_mm->context.ldt) {
+- mm->context.ldt = NULL;
++ if (!old_mm->context.ldt)
+ goto out_unlock;
+- }
+
+ new_ldt = alloc_ldt_struct(old_mm->context.ldt->nr_entries);
+ if (!new_ldt) {
+--- a/tools/testing/selftests/x86/ldt_gdt.c
++++ b/tools/testing/selftests/x86/ldt_gdt.c
+@@ -627,13 +627,10 @@ static void do_multicpu_tests(void)
+ static int finish_exec_test(void)
+ {
+ /*
+- * In a sensible world, this would be check_invalid_segment(0, 1);
+- * For better or for worse, though, the LDT is inherited across exec.
+- * We can probably change this safely, but for now we test it.
++ * Older kernel versions did inherit the LDT on exec() which is
++ * wrong because exec() starts from a clean state.
+ */
+- check_valid_segment(0, 1,
+- AR_DPL3 | AR_TYPE_XRCODE | AR_S | AR_P | AR_DB,
+- 42, true);
++ check_invalid_segment(0, 1);
+
+ return nerrs ? 1 : 0;
+ }
--- /dev/null
+From c2b3496bb30bd159e9de42e5c952e1f1f33c9a77 Mon Sep 17 00:00:00 2001
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Thu, 14 Dec 2017 12:27:30 +0100
+Subject: x86/ldt: Rework locking
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit c2b3496bb30bd159e9de42e5c952e1f1f33c9a77 upstream.
+
+The LDT is duplicated on fork() and on exec(), which is wrong as exec()
+should start from a clean state, i.e. without LDT. To fix this the LDT
+duplication code will be moved into arch_dup_mmap() which is only called
+for fork().
+
+This introduces a locking problem. arch_dup_mmap() holds mmap_sem of the
+parent process, but the LDT duplication code needs to acquire
+mm->context.lock to access the LDT data safely, which is the reverse lock
+order of write_ldt() where mmap_sem nests into context.lock.
+
+Solve this by introducing a new rw semaphore which serializes the
+read/write_ldt() syscall operations and use context.lock to protect the
+actual installment of the LDT descriptor.
+
+So context.lock stabilizes mm->context.ldt and can nest inside of the new
+semaphore or mmap_sem.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Andy Lutomirsky <luto@kernel.org>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Borislav Petkov <bpetkov@suse.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Eduardo Valentin <eduval@amazon.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: aliguori@amazon.com
+Cc: dan.j.williams@intel.com
+Cc: hughd@google.com
+Cc: keescook@google.com
+Cc: kirill.shutemov@linux.intel.com
+Cc: linux-mm@kvack.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/include/asm/mmu.h | 4 +++-
+ arch/x86/include/asm/mmu_context.h | 2 ++
+ arch/x86/kernel/ldt.c | 33 +++++++++++++++++++++------------
+ 3 files changed, 26 insertions(+), 13 deletions(-)
+
+--- a/arch/x86/include/asm/mmu.h
++++ b/arch/x86/include/asm/mmu.h
+@@ -3,6 +3,7 @@
+ #define _ASM_X86_MMU_H
+
+ #include <linux/spinlock.h>
++#include <linux/rwsem.h>
+ #include <linux/mutex.h>
+ #include <linux/atomic.h>
+
+@@ -27,7 +28,8 @@ typedef struct {
+ atomic64_t tlb_gen;
+
+ #ifdef CONFIG_MODIFY_LDT_SYSCALL
+- struct ldt_struct *ldt;
++ struct rw_semaphore ldt_usr_sem;
++ struct ldt_struct *ldt;
+ #endif
+
+ #ifdef CONFIG_X86_64
+--- a/arch/x86/include/asm/mmu_context.h
++++ b/arch/x86/include/asm/mmu_context.h
+@@ -132,6 +132,8 @@ void enter_lazy_tlb(struct mm_struct *mm
+ static inline int init_new_context(struct task_struct *tsk,
+ struct mm_struct *mm)
+ {
++ mutex_init(&mm->context.lock);
++
+ mm->context.ctx_id = atomic64_inc_return(&last_mm_ctx_id);
+ atomic64_set(&mm->context.tlb_gen, 0);
+
+--- a/arch/x86/kernel/ldt.c
++++ b/arch/x86/kernel/ldt.c
+@@ -5,6 +5,11 @@
+ * Copyright (C) 2002 Andi Kleen
+ *
+ * This handles calls from both 32bit and 64bit mode.
++ *
++ * Lock order:
++ * contex.ldt_usr_sem
++ * mmap_sem
++ * context.lock
+ */
+
+ #include <linux/errno.h>
+@@ -42,7 +47,7 @@ static void refresh_ldt_segments(void)
+ #endif
+ }
+
+-/* context.lock is held for us, so we don't need any locking. */
++/* context.lock is held by the task which issued the smp function call */
+ static void flush_ldt(void *__mm)
+ {
+ struct mm_struct *mm = __mm;
+@@ -99,15 +104,17 @@ static void finalize_ldt_struct(struct l
+ paravirt_alloc_ldt(ldt->entries, ldt->nr_entries);
+ }
+
+-/* context.lock is held */
+-static void install_ldt(struct mm_struct *current_mm,
+- struct ldt_struct *ldt)
++static void install_ldt(struct mm_struct *mm, struct ldt_struct *ldt)
+ {
++ mutex_lock(&mm->context.lock);
++
+ /* Synchronizes with READ_ONCE in load_mm_ldt. */
+- smp_store_release(¤t_mm->context.ldt, ldt);
++ smp_store_release(&mm->context.ldt, ldt);
+
+- /* Activate the LDT for all CPUs using current_mm. */
+- on_each_cpu_mask(mm_cpumask(current_mm), flush_ldt, current_mm, true);
++ /* Activate the LDT for all CPUs using currents mm. */
++ on_each_cpu_mask(mm_cpumask(mm), flush_ldt, mm, true);
++
++ mutex_unlock(&mm->context.lock);
+ }
+
+ static void free_ldt_struct(struct ldt_struct *ldt)
+@@ -133,7 +140,8 @@ int init_new_context_ldt(struct task_str
+ struct mm_struct *old_mm;
+ int retval = 0;
+
+- mutex_init(&mm->context.lock);
++ init_rwsem(&mm->context.ldt_usr_sem);
++
+ old_mm = current->mm;
+ if (!old_mm) {
+ mm->context.ldt = NULL;
+@@ -180,7 +188,7 @@ static int read_ldt(void __user *ptr, un
+ unsigned long entries_size;
+ int retval;
+
+- mutex_lock(&mm->context.lock);
++ down_read(&mm->context.ldt_usr_sem);
+
+ if (!mm->context.ldt) {
+ retval = 0;
+@@ -209,7 +217,7 @@ static int read_ldt(void __user *ptr, un
+ retval = bytecount;
+
+ out_unlock:
+- mutex_unlock(&mm->context.lock);
++ up_read(&mm->context.ldt_usr_sem);
+ return retval;
+ }
+
+@@ -269,7 +277,8 @@ static int write_ldt(void __user *ptr, u
+ ldt.avl = 0;
+ }
+
+- mutex_lock(&mm->context.lock);
++ if (down_write_killable(&mm->context.ldt_usr_sem))
++ return -EINTR;
+
+ old_ldt = mm->context.ldt;
+ old_nr_entries = old_ldt ? old_ldt->nr_entries : 0;
+@@ -291,7 +300,7 @@ static int write_ldt(void __user *ptr, u
+ error = 0;
+
+ out_unlock:
+- mutex_unlock(&mm->context.lock);
++ up_write(&mm->context.ldt_usr_sem);
+ out:
+ return error;
+ }
--- /dev/null
+From 23cb7d46f371844c004784ad9552a57446f73e5a Mon Sep 17 00:00:00 2001
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 5 Dec 2017 13:34:51 +0100
+Subject: x86/microcode: Dont abuse the TLB-flush interface
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 23cb7d46f371844c004784ad9552a57446f73e5a upstream.
+
+Commit:
+
+ ec400ddeff20 ("x86/microcode_intel_early.c: Early update ucode on Intel's CPU")
+
+... grubbed into tlbflush internals without coherent explanation.
+
+Since it says its a precaution and the SDM doesn't mention anything like
+this, take it out back.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Eduardo Valentin <eduval@amazon.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: aliguori@amazon.com
+Cc: daniel.gruss@iaik.tugraz.at
+Cc: fenghua.yu@intel.com
+Cc: hughd@google.com
+Cc: keescook@google.com
+Cc: linux-mm@kvack.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/include/asm/tlbflush.h | 19 ++++++-------------
+ arch/x86/kernel/cpu/microcode/intel.c | 13 -------------
+ 2 files changed, 6 insertions(+), 26 deletions(-)
+
+--- a/arch/x86/include/asm/tlbflush.h
++++ b/arch/x86/include/asm/tlbflush.h
+@@ -246,20 +246,9 @@ static inline void __native_flush_tlb(vo
+ preempt_enable();
+ }
+
+-static inline void __native_flush_tlb_global_irq_disabled(void)
+-{
+- unsigned long cr4;
+-
+- cr4 = this_cpu_read(cpu_tlbstate.cr4);
+- /* clear PGE */
+- native_write_cr4(cr4 & ~X86_CR4_PGE);
+- /* write old PGE again and flush TLBs */
+- native_write_cr4(cr4);
+-}
+-
+ static inline void __native_flush_tlb_global(void)
+ {
+- unsigned long flags;
++ unsigned long cr4, flags;
+
+ if (static_cpu_has(X86_FEATURE_INVPCID)) {
+ /*
+@@ -277,7 +266,11 @@ static inline void __native_flush_tlb_gl
+ */
+ raw_local_irq_save(flags);
+
+- __native_flush_tlb_global_irq_disabled();
++ cr4 = this_cpu_read(cpu_tlbstate.cr4);
++ /* toggle PGE */
++ native_write_cr4(cr4 ^ X86_CR4_PGE);
++ /* write old PGE again and flush TLBs */
++ native_write_cr4(cr4);
+
+ raw_local_irq_restore(flags);
+ }
+--- a/arch/x86/kernel/cpu/microcode/intel.c
++++ b/arch/x86/kernel/cpu/microcode/intel.c
+@@ -565,15 +565,6 @@ static void print_ucode(struct ucode_cpu
+ }
+ #else
+
+-/*
+- * Flush global tlb. We only do this in x86_64 where paging has been enabled
+- * already and PGE should be enabled as well.
+- */
+-static inline void flush_tlb_early(void)
+-{
+- __native_flush_tlb_global_irq_disabled();
+-}
+-
+ static inline void print_ucode(struct ucode_cpu_info *uci)
+ {
+ struct microcode_intel *mc;
+@@ -602,10 +593,6 @@ static int apply_microcode_early(struct
+ if (rev != mc->hdr.rev)
+ return -1;
+
+-#ifdef CONFIG_X86_64
+- /* Flush global tlb. This is precaution. */
+- flush_tlb_early();
+-#endif
+ uci->cpu_sig.rev = rev;
+
+ if (early)
--- /dev/null
+From 5a7ccf4754fb3660569a6de52ba7f7fc3dfaf280 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Tue, 12 Dec 2017 07:56:43 -0800
+Subject: x86/mm/64: Improve the memory map documentation
+
+From: Andy Lutomirski <luto@kernel.org>
+
+commit 5a7ccf4754fb3660569a6de52ba7f7fc3dfaf280 upstream.
+
+The old docs had the vsyscall range wrong and were missing the fixmap.
+Fix both.
+
+There used to be 8 MB reserved for future vsyscalls, but that's long gone.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Kees Cook <keescook@chromium.org>
+Cc: Kirill A. Shutemov <kirill@shutemov.name>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ Documentation/x86/x86_64/mm.txt | 10 ++++++----
+ 1 file changed, 6 insertions(+), 4 deletions(-)
+
+--- a/Documentation/x86/x86_64/mm.txt
++++ b/Documentation/x86/x86_64/mm.txt
+@@ -19,8 +19,9 @@ ffffff0000000000 - ffffff7fffffffff (=39
+ ffffffef00000000 - fffffffeffffffff (=64 GB) EFI region mapping space
+ ... unused hole ...
+ ffffffff80000000 - ffffffff9fffffff (=512 MB) kernel text mapping, from phys 0
+-ffffffffa0000000 - ffffffffff5fffff (=1526 MB) module mapping space (variable)
+-ffffffffff600000 - ffffffffffdfffff (=8 MB) vsyscalls
++ffffffffa0000000 - [fixmap start] (~1526 MB) module mapping space (variable)
++[fixmap start] - ffffffffff5fffff kernel-internal fixmap range
++ffffffffff600000 - ffffffffff600fff (=4 kB) legacy vsyscall ABI
+ ffffffffffe00000 - ffffffffffffffff (=2 MB) unused hole
+
+ Virtual memory map with 5 level page tables:
+@@ -41,8 +42,9 @@ ffffff0000000000 - ffffff7fffffffff (=39
+ ffffffef00000000 - fffffffeffffffff (=64 GB) EFI region mapping space
+ ... unused hole ...
+ ffffffff80000000 - ffffffff9fffffff (=512 MB) kernel text mapping, from phys 0
+-ffffffffa0000000 - ffffffffff5fffff (=1526 MB) module mapping space
+-ffffffffff600000 - ffffffffffdfffff (=8 MB) vsyscalls
++ffffffffa0000000 - [fixmap start] (~1526 MB) module mapping space
++[fixmap start] - ffffffffff5fffff kernel-internal fixmap range
++ffffffffff600000 - ffffffffff600fff (=4 kB) legacy vsyscall ABI
+ ffffffffffe00000 - ffffffffffffffff (=2 MB) unused hole
+
+ Architecture defines a 64-bit virtual address. Implementations can support
--- /dev/null
+From 3f67af51e56f291d7417d77c4f67cd774633c5e1 Mon Sep 17 00:00:00 2001
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 5 Dec 2017 13:34:52 +0100
+Subject: x86/mm: Add comments to clarify which TLB-flush functions are supposed to flush what
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 3f67af51e56f291d7417d77c4f67cd774633c5e1 upstream.
+
+Per popular request..
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Eduardo Valentin <eduval@amazon.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: aliguori@amazon.com
+Cc: daniel.gruss@iaik.tugraz.at
+Cc: hughd@google.com
+Cc: keescook@google.com
+Cc: linux-mm@kvack.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/include/asm/tlbflush.h | 23 +++++++++++++++++++++--
+ 1 file changed, 21 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/include/asm/tlbflush.h
++++ b/arch/x86/include/asm/tlbflush.h
+@@ -228,6 +228,9 @@ static inline void cr4_set_bits_and_upda
+
+ extern void initialize_tlbstate_and_flush(void);
+
++/*
++ * flush the entire current user mapping
++ */
+ static inline void __native_flush_tlb(void)
+ {
+ /*
+@@ -240,6 +243,9 @@ static inline void __native_flush_tlb(vo
+ preempt_enable();
+ }
+
++/*
++ * flush everything
++ */
+ static inline void __native_flush_tlb_global(void)
+ {
+ unsigned long cr4, flags;
+@@ -269,17 +275,27 @@ static inline void __native_flush_tlb_gl
+ raw_local_irq_restore(flags);
+ }
+
++/*
++ * flush one page in the user mapping
++ */
+ static inline void __native_flush_tlb_single(unsigned long addr)
+ {
+ asm volatile("invlpg (%0)" ::"r" (addr) : "memory");
+ }
+
++/*
++ * flush everything
++ */
+ static inline void __flush_tlb_all(void)
+ {
+- if (boot_cpu_has(X86_FEATURE_PGE))
++ if (boot_cpu_has(X86_FEATURE_PGE)) {
+ __flush_tlb_global();
+- else
++ } else {
++ /*
++ * !PGE -> !PCID (setup_pcid()), thus every flush is total.
++ */
+ __flush_tlb();
++ }
+
+ /*
+ * Note: if we somehow had PCID but not PGE, then this wouldn't work --
+@@ -290,6 +306,9 @@ static inline void __flush_tlb_all(void)
+ */
+ }
+
++/*
++ * flush one page in the kernel mapping
++ */
+ static inline void __flush_tlb_one(unsigned long addr)
+ {
+ count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ONE);
--- /dev/null
+From 1a3b0caeb77edeac5ce5fa05e6a61c474c9a9745 Mon Sep 17 00:00:00 2001
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 5 Dec 2017 13:34:47 +0100
+Subject: x86/mm: Create asm/invpcid.h
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 1a3b0caeb77edeac5ce5fa05e6a61c474c9a9745 upstream.
+
+Unclutter tlbflush.h a little.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Eduardo Valentin <eduval@amazon.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: aliguori@amazon.com
+Cc: daniel.gruss@iaik.tugraz.at
+Cc: hughd@google.com
+Cc: keescook@google.com
+Cc: linux-mm@kvack.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/include/asm/invpcid.h | 53 ++++++++++++++++++++++++++++++++++++++++
+ arch/x86/include/asm/tlbflush.h | 49 ------------------------------------
+ 2 files changed, 54 insertions(+), 48 deletions(-)
+
+--- /dev/null
++++ b/arch/x86/include/asm/invpcid.h
+@@ -0,0 +1,53 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++#ifndef _ASM_X86_INVPCID
++#define _ASM_X86_INVPCID
++
++static inline void __invpcid(unsigned long pcid, unsigned long addr,
++ unsigned long type)
++{
++ struct { u64 d[2]; } desc = { { pcid, addr } };
++
++ /*
++ * The memory clobber is because the whole point is to invalidate
++ * stale TLB entries and, especially if we're flushing global
++ * mappings, we don't want the compiler to reorder any subsequent
++ * memory accesses before the TLB flush.
++ *
++ * The hex opcode is invpcid (%ecx), %eax in 32-bit mode and
++ * invpcid (%rcx), %rax in long mode.
++ */
++ asm volatile (".byte 0x66, 0x0f, 0x38, 0x82, 0x01"
++ : : "m" (desc), "a" (type), "c" (&desc) : "memory");
++}
++
++#define INVPCID_TYPE_INDIV_ADDR 0
++#define INVPCID_TYPE_SINGLE_CTXT 1
++#define INVPCID_TYPE_ALL_INCL_GLOBAL 2
++#define INVPCID_TYPE_ALL_NON_GLOBAL 3
++
++/* Flush all mappings for a given pcid and addr, not including globals. */
++static inline void invpcid_flush_one(unsigned long pcid,
++ unsigned long addr)
++{
++ __invpcid(pcid, addr, INVPCID_TYPE_INDIV_ADDR);
++}
++
++/* Flush all mappings for a given PCID, not including globals. */
++static inline void invpcid_flush_single_context(unsigned long pcid)
++{
++ __invpcid(pcid, 0, INVPCID_TYPE_SINGLE_CTXT);
++}
++
++/* Flush all mappings, including globals, for all PCIDs. */
++static inline void invpcid_flush_all(void)
++{
++ __invpcid(0, 0, INVPCID_TYPE_ALL_INCL_GLOBAL);
++}
++
++/* Flush all mappings for all PCIDs except globals. */
++static inline void invpcid_flush_all_nonglobals(void)
++{
++ __invpcid(0, 0, INVPCID_TYPE_ALL_NON_GLOBAL);
++}
++
++#endif /* _ASM_X86_INVPCID */
+--- a/arch/x86/include/asm/tlbflush.h
++++ b/arch/x86/include/asm/tlbflush.h
+@@ -9,54 +9,7 @@
+ #include <asm/cpufeature.h>
+ #include <asm/special_insns.h>
+ #include <asm/smp.h>
+-
+-static inline void __invpcid(unsigned long pcid, unsigned long addr,
+- unsigned long type)
+-{
+- struct { u64 d[2]; } desc = { { pcid, addr } };
+-
+- /*
+- * The memory clobber is because the whole point is to invalidate
+- * stale TLB entries and, especially if we're flushing global
+- * mappings, we don't want the compiler to reorder any subsequent
+- * memory accesses before the TLB flush.
+- *
+- * The hex opcode is invpcid (%ecx), %eax in 32-bit mode and
+- * invpcid (%rcx), %rax in long mode.
+- */
+- asm volatile (".byte 0x66, 0x0f, 0x38, 0x82, 0x01"
+- : : "m" (desc), "a" (type), "c" (&desc) : "memory");
+-}
+-
+-#define INVPCID_TYPE_INDIV_ADDR 0
+-#define INVPCID_TYPE_SINGLE_CTXT 1
+-#define INVPCID_TYPE_ALL_INCL_GLOBAL 2
+-#define INVPCID_TYPE_ALL_NON_GLOBAL 3
+-
+-/* Flush all mappings for a given pcid and addr, not including globals. */
+-static inline void invpcid_flush_one(unsigned long pcid,
+- unsigned long addr)
+-{
+- __invpcid(pcid, addr, INVPCID_TYPE_INDIV_ADDR);
+-}
+-
+-/* Flush all mappings for a given PCID, not including globals. */
+-static inline void invpcid_flush_single_context(unsigned long pcid)
+-{
+- __invpcid(pcid, 0, INVPCID_TYPE_SINGLE_CTXT);
+-}
+-
+-/* Flush all mappings, including globals, for all PCIDs. */
+-static inline void invpcid_flush_all(void)
+-{
+- __invpcid(0, 0, INVPCID_TYPE_ALL_INCL_GLOBAL);
+-}
+-
+-/* Flush all mappings for all PCIDs except globals. */
+-static inline void invpcid_flush_all_nonglobals(void)
+-{
+- __invpcid(0, 0, INVPCID_TYPE_ALL_NON_GLOBAL);
+-}
++#include <asm/invpcid.h>
+
+ static inline u64 inc_mm_tlb_gen(struct mm_struct *mm)
+ {
--- /dev/null
+From c05344947b37f7cda726e802457370bc6eac4d26 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Sat, 16 Dec 2017 01:14:39 +0100
+Subject: x86/mm/dump_pagetables: Check PAGE_PRESENT for real
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+commit c05344947b37f7cda726e802457370bc6eac4d26 upstream.
+
+The check for a present page in printk_prot():
+
+ if (!pgprot_val(prot)) {
+ /* Not present */
+
+is bogus. If a PTE is set to PAGE_NONE then the pgprot_val is not zero and
+the entry is decoded in bogus ways, e.g. as RX GLB. That is confusing when
+analyzing mapping correctness. Check for the present bit to make an
+informed decision.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: linux-kernel@vger.kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/mm/dump_pagetables.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/x86/mm/dump_pagetables.c
++++ b/arch/x86/mm/dump_pagetables.c
+@@ -140,7 +140,7 @@ static void printk_prot(struct seq_file
+ static const char * const level_name[] =
+ { "cr3", "pgd", "p4d", "pud", "pmd", "pte" };
+
+- if (!pgprot_val(prot)) {
++ if (!(pr & _PAGE_PRESENT)) {
+ /* Not present */
+ pt_dump_cont_printf(m, dmsg, " ");
+ } else {
--- /dev/null
+From 146122e24bdf208015d629babba673e28d090709 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Wed, 20 Dec 2017 18:07:42 +0100
+Subject: x86/mm/dump_pagetables: Make the address hints correct and readable
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+commit 146122e24bdf208015d629babba673e28d090709 upstream.
+
+The address hints are a trainwreck. The array entry numbers have to kept
+magically in sync with the actual hints, which is doomed as some of the
+array members are initialized at runtime via the entry numbers.
+
+Designated initializers have been around before this code was
+implemented....
+
+Use the entry numbers to populate the address hints array and add the
+missing bits and pieces. Split 32 and 64 bit for readability sake.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: linux-kernel@vger.kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/mm/dump_pagetables.c | 90 ++++++++++++++++++++++++------------------
+ 1 file changed, 53 insertions(+), 37 deletions(-)
+
+--- a/arch/x86/mm/dump_pagetables.c
++++ b/arch/x86/mm/dump_pagetables.c
+@@ -44,10 +44,12 @@ struct addr_marker {
+ unsigned long max_lines;
+ };
+
+-/* indices for address_markers; keep sync'd w/ address_markers below */
++/* Address space markers hints */
++
++#ifdef CONFIG_X86_64
++
+ enum address_markers_idx {
+ USER_SPACE_NR = 0,
+-#ifdef CONFIG_X86_64
+ KERNEL_SPACE_NR,
+ LOW_KERNEL_NR,
+ VMALLOC_START_NR,
+@@ -56,56 +58,70 @@ enum address_markers_idx {
+ KASAN_SHADOW_START_NR,
+ KASAN_SHADOW_END_NR,
+ #endif
+-# ifdef CONFIG_X86_ESPFIX64
++#ifdef CONFIG_X86_ESPFIX64
+ ESPFIX_START_NR,
+-# endif
++#endif
++#ifdef CONFIG_EFI
++ EFI_END_NR,
++#endif
+ HIGH_KERNEL_NR,
+ MODULES_VADDR_NR,
+ MODULES_END_NR,
+-#else
++ FIXADDR_START_NR,
++ END_OF_SPACE_NR,
++};
++
++static struct addr_marker address_markers[] = {
++ [USER_SPACE_NR] = { 0, "User Space" },
++ [KERNEL_SPACE_NR] = { (1UL << 63), "Kernel Space" },
++ [LOW_KERNEL_NR] = { 0UL, "Low Kernel Mapping" },
++ [VMALLOC_START_NR] = { 0UL, "vmalloc() Area" },
++ [VMEMMAP_START_NR] = { 0UL, "Vmemmap" },
++#ifdef CONFIG_KASAN
++ [KASAN_SHADOW_START_NR] = { KASAN_SHADOW_START, "KASAN shadow" },
++ [KASAN_SHADOW_END_NR] = { KASAN_SHADOW_END, "KASAN shadow end" },
++#endif
++#ifdef CONFIG_X86_ESPFIX64
++ [ESPFIX_START_NR] = { ESPFIX_BASE_ADDR, "ESPfix Area", 16 },
++#endif
++#ifdef CONFIG_EFI
++ [EFI_END_NR] = { EFI_VA_END, "EFI Runtime Services" },
++#endif
++ [HIGH_KERNEL_NR] = { __START_KERNEL_map, "High Kernel Mapping" },
++ [MODULES_VADDR_NR] = { MODULES_VADDR, "Modules" },
++ [MODULES_END_NR] = { MODULES_END, "End Modules" },
++ [FIXADDR_START_NR] = { FIXADDR_START, "Fixmap Area" },
++ [END_OF_SPACE_NR] = { -1, NULL }
++};
++
++#else /* CONFIG_X86_64 */
++
++enum address_markers_idx {
++ USER_SPACE_NR = 0,
+ KERNEL_SPACE_NR,
+ VMALLOC_START_NR,
+ VMALLOC_END_NR,
+-# ifdef CONFIG_HIGHMEM
++#ifdef CONFIG_HIGHMEM
+ PKMAP_BASE_NR,
+-# endif
+- FIXADDR_START_NR,
+ #endif
++ FIXADDR_START_NR,
++ END_OF_SPACE_NR,
+ };
+
+-/* Address space markers hints */
+ static struct addr_marker address_markers[] = {
+- { 0, "User Space" },
+-#ifdef CONFIG_X86_64
+- { 0x8000000000000000UL, "Kernel Space" },
+- { 0/* PAGE_OFFSET */, "Low Kernel Mapping" },
+- { 0/* VMALLOC_START */, "vmalloc() Area" },
+- { 0/* VMEMMAP_START */, "Vmemmap" },
+-#ifdef CONFIG_KASAN
+- { KASAN_SHADOW_START, "KASAN shadow" },
+- { KASAN_SHADOW_END, "KASAN shadow end" },
+-#endif
+-# ifdef CONFIG_X86_ESPFIX64
+- { ESPFIX_BASE_ADDR, "ESPfix Area", 16 },
+-# endif
+-# ifdef CONFIG_EFI
+- { EFI_VA_END, "EFI Runtime Services" },
+-# endif
+- { __START_KERNEL_map, "High Kernel Mapping" },
+- { MODULES_VADDR, "Modules" },
+- { MODULES_END, "End Modules" },
+-#else
+- { PAGE_OFFSET, "Kernel Mapping" },
+- { 0/* VMALLOC_START */, "vmalloc() Area" },
+- { 0/*VMALLOC_END*/, "vmalloc() End" },
+-# ifdef CONFIG_HIGHMEM
+- { 0/*PKMAP_BASE*/, "Persistent kmap() Area" },
+-# endif
+- { 0/*FIXADDR_START*/, "Fixmap Area" },
++ [USER_SPACE_NR] = { 0, "User Space" },
++ [KERNEL_SPACE_NR] = { PAGE_OFFSET, "Kernel Mapping" },
++ [VMALLOC_START_NR] = { 0UL, "vmalloc() Area" },
++ [VMALLOC_END_NR] = { 0UL, "vmalloc() End" },
++#ifdef CONFIG_HIGHMEM
++ [PKMAP_BASE_NR] = { 0UL, "Persistent kmap() Area" },
+ #endif
+- { -1, NULL } /* End of list */
++ [FIXADDR_START_NR] = { 0UL, "Fixmap area" },
++ [END_OF_SPACE_NR] = { -1, NULL }
+ };
+
++#endif /* !CONFIG_X86_64 */
++
+ /* Multipliers for offsets within the PTEs */
+ #define PTE_LEVEL_MULT (PAGE_SIZE)
+ #define PMD_LEVEL_MULT (PTRS_PER_PTE * PTE_LEVEL_MULT)
--- /dev/null
+From 50fb83a62cf472dc53ba23bd3f7bd6c1b2b3b53e Mon Sep 17 00:00:00 2001
+From: Dave Hansen <dave.hansen@linux.intel.com>
+Date: Mon, 4 Dec 2017 15:07:54 +0100
+Subject: x86/mm: Move the CR3 construction functions to tlbflush.h
+
+From: Dave Hansen <dave.hansen@linux.intel.com>
+
+commit 50fb83a62cf472dc53ba23bd3f7bd6c1b2b3b53e upstream.
+
+For flushing the TLB, the ASID which has been programmed into the hardware
+must be known. That differs from what is in 'cpu_tlbstate'.
+
+Add functions to transform the 'cpu_tlbstate' values into to the one
+programmed into the hardware (CR3).
+
+It's not easy to include mmu_context.h into tlbflush.h, so just move the
+CR3 building over to tlbflush.h.
+
+Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Eduardo Valentin <eduval@amazon.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: aliguori@amazon.com
+Cc: daniel.gruss@iaik.tugraz.at
+Cc: hughd@google.com
+Cc: keescook@google.com
+Cc: linux-mm@kvack.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/include/asm/mmu_context.h | 29 +----------------------------
+ arch/x86/include/asm/tlbflush.h | 26 ++++++++++++++++++++++++++
+ arch/x86/mm/tlb.c | 8 ++++----
+ 3 files changed, 31 insertions(+), 32 deletions(-)
+
+--- a/arch/x86/include/asm/mmu_context.h
++++ b/arch/x86/include/asm/mmu_context.h
+@@ -291,33 +291,6 @@ static inline bool arch_vma_access_permi
+ }
+
+ /*
+- * If PCID is on, ASID-aware code paths put the ASID+1 into the PCID
+- * bits. This serves two purposes. It prevents a nasty situation in
+- * which PCID-unaware code saves CR3, loads some other value (with PCID
+- * == 0), and then restores CR3, thus corrupting the TLB for ASID 0 if
+- * the saved ASID was nonzero. It also means that any bugs involving
+- * loading a PCID-enabled CR3 with CR4.PCIDE off will trigger
+- * deterministically.
+- */
+-
+-static inline unsigned long build_cr3(struct mm_struct *mm, u16 asid)
+-{
+- if (static_cpu_has(X86_FEATURE_PCID)) {
+- VM_WARN_ON_ONCE(asid > 4094);
+- return __sme_pa(mm->pgd) | (asid + 1);
+- } else {
+- VM_WARN_ON_ONCE(asid != 0);
+- return __sme_pa(mm->pgd);
+- }
+-}
+-
+-static inline unsigned long build_cr3_noflush(struct mm_struct *mm, u16 asid)
+-{
+- VM_WARN_ON_ONCE(asid > 4094);
+- return __sme_pa(mm->pgd) | (asid + 1) | CR3_NOFLUSH;
+-}
+-
+-/*
+ * This can be used from process context to figure out what the value of
+ * CR3 is without needing to do a (slow) __read_cr3().
+ *
+@@ -326,7 +299,7 @@ static inline unsigned long build_cr3_no
+ */
+ static inline unsigned long __get_current_cr3_fast(void)
+ {
+- unsigned long cr3 = build_cr3(this_cpu_read(cpu_tlbstate.loaded_mm),
++ unsigned long cr3 = build_cr3(this_cpu_read(cpu_tlbstate.loaded_mm)->pgd,
+ this_cpu_read(cpu_tlbstate.loaded_mm_asid));
+
+ /* For now, be very restrictive about when this can be called. */
+--- a/arch/x86/include/asm/tlbflush.h
++++ b/arch/x86/include/asm/tlbflush.h
+@@ -69,6 +69,32 @@ static inline u64 inc_mm_tlb_gen(struct
+ return atomic64_inc_return(&mm->context.tlb_gen);
+ }
+
++/*
++ * If PCID is on, ASID-aware code paths put the ASID+1 into the PCID bits.
++ * This serves two purposes. It prevents a nasty situation in which
++ * PCID-unaware code saves CR3, loads some other value (with PCID == 0),
++ * and then restores CR3, thus corrupting the TLB for ASID 0 if the saved
++ * ASID was nonzero. It also means that any bugs involving loading a
++ * PCID-enabled CR3 with CR4.PCIDE off will trigger deterministically.
++ */
++struct pgd_t;
++static inline unsigned long build_cr3(pgd_t *pgd, u16 asid)
++{
++ if (static_cpu_has(X86_FEATURE_PCID)) {
++ VM_WARN_ON_ONCE(asid > 4094);
++ return __sme_pa(pgd) | (asid + 1);
++ } else {
++ VM_WARN_ON_ONCE(asid != 0);
++ return __sme_pa(pgd);
++ }
++}
++
++static inline unsigned long build_cr3_noflush(pgd_t *pgd, u16 asid)
++{
++ VM_WARN_ON_ONCE(asid > 4094);
++ return __sme_pa(pgd) | (asid + 1) | CR3_NOFLUSH;
++}
++
+ #ifdef CONFIG_PARAVIRT
+ #include <asm/paravirt.h>
+ #else
+--- a/arch/x86/mm/tlb.c
++++ b/arch/x86/mm/tlb.c
+@@ -128,7 +128,7 @@ void switch_mm_irqs_off(struct mm_struct
+ * isn't free.
+ */
+ #ifdef CONFIG_DEBUG_VM
+- if (WARN_ON_ONCE(__read_cr3() != build_cr3(real_prev, prev_asid))) {
++ if (WARN_ON_ONCE(__read_cr3() != build_cr3(real_prev->pgd, prev_asid))) {
+ /*
+ * If we were to BUG here, we'd be very likely to kill
+ * the system so hard that we don't see the call trace.
+@@ -195,7 +195,7 @@ void switch_mm_irqs_off(struct mm_struct
+ if (need_flush) {
+ this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id);
+ this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen);
+- write_cr3(build_cr3(next, new_asid));
++ write_cr3(build_cr3(next->pgd, new_asid));
+
+ /*
+ * NB: This gets called via leave_mm() in the idle path
+@@ -208,7 +208,7 @@ void switch_mm_irqs_off(struct mm_struct
+ trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
+ } else {
+ /* The new ASID is already up to date. */
+- write_cr3(build_cr3_noflush(next, new_asid));
++ write_cr3(build_cr3_noflush(next->pgd, new_asid));
+
+ /* See above wrt _rcuidle. */
+ trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, 0);
+@@ -288,7 +288,7 @@ void initialize_tlbstate_and_flush(void)
+ !(cr4_read_shadow() & X86_CR4_PCIDE));
+
+ /* Force ASID 0 and force a TLB flush. */
+- write_cr3(build_cr3(mm, 0));
++ write_cr3(build_cr3(mm->pgd, 0));
+
+ /* Reinitialize tlbstate. */
+ this_cpu_write(cpu_tlbstate.loaded_mm_asid, 0);
--- /dev/null
+From dd95f1a4b5ca904c78e6a097091eb21436478abb Mon Sep 17 00:00:00 2001
+From: Dave Hansen <dave.hansen@linux.intel.com>
+Date: Mon, 4 Dec 2017 15:07:56 +0100
+Subject: x86/mm: Put MMU to hardware ASID translation in one place
+
+From: Dave Hansen <dave.hansen@linux.intel.com>
+
+commit dd95f1a4b5ca904c78e6a097091eb21436478abb upstream.
+
+There are effectively two ASID types:
+
+ 1. The one stored in the mmu_context that goes from 0..5
+ 2. The one programmed into the hardware that goes from 1..6
+
+This consolidates the locations where converting between the two (by doing
+a +1) to a single place which gives us a nice place to comment.
+PAGE_TABLE_ISOLATION will also need to, given an ASID, know which hardware
+ASID to flush for the userspace mapping.
+
+Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Eduardo Valentin <eduval@amazon.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: aliguori@amazon.com
+Cc: daniel.gruss@iaik.tugraz.at
+Cc: hughd@google.com
+Cc: keescook@google.com
+Cc: linux-mm@kvack.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/include/asm/tlbflush.h | 29 ++++++++++++++++++-----------
+ 1 file changed, 18 insertions(+), 11 deletions(-)
+
+--- a/arch/x86/include/asm/tlbflush.h
++++ b/arch/x86/include/asm/tlbflush.h
+@@ -85,20 +85,26 @@ static inline u64 inc_mm_tlb_gen(struct
+ */
+ #define MAX_ASID_AVAILABLE ((1 << CR3_AVAIL_ASID_BITS) - 2)
+
+-/*
+- * If PCID is on, ASID-aware code paths put the ASID+1 into the PCID bits.
+- * This serves two purposes. It prevents a nasty situation in which
+- * PCID-unaware code saves CR3, loads some other value (with PCID == 0),
+- * and then restores CR3, thus corrupting the TLB for ASID 0 if the saved
+- * ASID was nonzero. It also means that any bugs involving loading a
+- * PCID-enabled CR3 with CR4.PCIDE off will trigger deterministically.
+- */
++static inline u16 kern_pcid(u16 asid)
++{
++ VM_WARN_ON_ONCE(asid > MAX_ASID_AVAILABLE);
++ /*
++ * If PCID is on, ASID-aware code paths put the ASID+1 into the
++ * PCID bits. This serves two purposes. It prevents a nasty
++ * situation in which PCID-unaware code saves CR3, loads some other
++ * value (with PCID == 0), and then restores CR3, thus corrupting
++ * the TLB for ASID 0 if the saved ASID was nonzero. It also means
++ * that any bugs involving loading a PCID-enabled CR3 with
++ * CR4.PCIDE off will trigger deterministically.
++ */
++ return asid + 1;
++}
++
+ struct pgd_t;
+ static inline unsigned long build_cr3(pgd_t *pgd, u16 asid)
+ {
+ if (static_cpu_has(X86_FEATURE_PCID)) {
+- VM_WARN_ON_ONCE(asid > MAX_ASID_AVAILABLE);
+- return __sme_pa(pgd) | (asid + 1);
++ return __sme_pa(pgd) | kern_pcid(asid);
+ } else {
+ VM_WARN_ON_ONCE(asid != 0);
+ return __sme_pa(pgd);
+@@ -108,7 +114,8 @@ static inline unsigned long build_cr3(pg
+ static inline unsigned long build_cr3_noflush(pgd_t *pgd, u16 asid)
+ {
+ VM_WARN_ON_ONCE(asid > MAX_ASID_AVAILABLE);
+- return __sme_pa(pgd) | (asid + 1) | CR3_NOFLUSH;
++ VM_WARN_ON_ONCE(!this_cpu_has(X86_FEATURE_PCID));
++ return __sme_pa(pgd) | kern_pcid(asid) | CR3_NOFLUSH;
+ }
+
+ #ifdef CONFIG_PARAVIRT
--- /dev/null
+From cb0a9144a744e55207e24dcef812f05cd15a499a Mon Sep 17 00:00:00 2001
+From: Dave Hansen <dave.hansen@linux.intel.com>
+Date: Mon, 4 Dec 2017 15:07:55 +0100
+Subject: x86/mm: Remove hard-coded ASID limit checks
+
+From: Dave Hansen <dave.hansen@linux.intel.com>
+
+commit cb0a9144a744e55207e24dcef812f05cd15a499a upstream.
+
+First, it's nice to remove the magic numbers.
+
+Second, PAGE_TABLE_ISOLATION is going to consume half of the available ASID
+space. The space is currently unused, but add a comment to spell out this
+new restriction.
+
+Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Eduardo Valentin <eduval@amazon.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: aliguori@amazon.com
+Cc: daniel.gruss@iaik.tugraz.at
+Cc: hughd@google.com
+Cc: keescook@google.com
+Cc: linux-mm@kvack.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/include/asm/tlbflush.h | 20 ++++++++++++++++++--
+ 1 file changed, 18 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/include/asm/tlbflush.h
++++ b/arch/x86/include/asm/tlbflush.h
+@@ -69,6 +69,22 @@ static inline u64 inc_mm_tlb_gen(struct
+ return atomic64_inc_return(&mm->context.tlb_gen);
+ }
+
++/* There are 12 bits of space for ASIDS in CR3 */
++#define CR3_HW_ASID_BITS 12
++/*
++ * When enabled, PAGE_TABLE_ISOLATION consumes a single bit for
++ * user/kernel switches
++ */
++#define PTI_CONSUMED_ASID_BITS 0
++
++#define CR3_AVAIL_ASID_BITS (CR3_HW_ASID_BITS - PTI_CONSUMED_ASID_BITS)
++/*
++ * ASIDs are zero-based: 0->MAX_AVAIL_ASID are valid. -1 below to account
++ * for them being zero-based. Another -1 is because ASID 0 is reserved for
++ * use by non-PCID-aware users.
++ */
++#define MAX_ASID_AVAILABLE ((1 << CR3_AVAIL_ASID_BITS) - 2)
++
+ /*
+ * If PCID is on, ASID-aware code paths put the ASID+1 into the PCID bits.
+ * This serves two purposes. It prevents a nasty situation in which
+@@ -81,7 +97,7 @@ struct pgd_t;
+ static inline unsigned long build_cr3(pgd_t *pgd, u16 asid)
+ {
+ if (static_cpu_has(X86_FEATURE_PCID)) {
+- VM_WARN_ON_ONCE(asid > 4094);
++ VM_WARN_ON_ONCE(asid > MAX_ASID_AVAILABLE);
+ return __sme_pa(pgd) | (asid + 1);
+ } else {
+ VM_WARN_ON_ONCE(asid != 0);
+@@ -91,7 +107,7 @@ static inline unsigned long build_cr3(pg
+
+ static inline unsigned long build_cr3_noflush(pgd_t *pgd, u16 asid)
+ {
+- VM_WARN_ON_ONCE(asid > 4094);
++ VM_WARN_ON_ONCE(asid > MAX_ASID_AVAILABLE);
+ return __sme_pa(pgd) | (asid + 1) | CR3_NOFLUSH;
+ }
+
--- /dev/null
+From b5fc6d943808b570bdfbec80f40c6b3855f1c48b Mon Sep 17 00:00:00 2001
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 5 Dec 2017 13:34:46 +0100
+Subject: x86/mm: Remove superfluous barriers
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit b5fc6d943808b570bdfbec80f40c6b3855f1c48b upstream.
+
+atomic64_inc_return() already implies smp_mb() before and after.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Eduardo Valentin <eduval@amazon.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: aliguori@amazon.com
+Cc: daniel.gruss@iaik.tugraz.at
+Cc: hughd@google.com
+Cc: keescook@google.com
+Cc: linux-mm@kvack.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/include/asm/tlbflush.h | 8 +-------
+ 1 file changed, 1 insertion(+), 7 deletions(-)
+
+--- a/arch/x86/include/asm/tlbflush.h
++++ b/arch/x86/include/asm/tlbflush.h
+@@ -60,19 +60,13 @@ static inline void invpcid_flush_all_non
+
+ static inline u64 inc_mm_tlb_gen(struct mm_struct *mm)
+ {
+- u64 new_tlb_gen;
+-
+ /*
+ * Bump the generation count. This also serves as a full barrier
+ * that synchronizes with switch_mm(): callers are required to order
+ * their read of mm_cpumask after their writes to the paging
+ * structures.
+ */
+- smp_mb__before_atomic();
+- new_tlb_gen = atomic64_inc_return(&mm->context.tlb_gen);
+- smp_mb__after_atomic();
+-
+- return new_tlb_gen;
++ return atomic64_inc_return(&mm->context.tlb_gen);
+ }
+
+ #ifdef CONFIG_PARAVIRT
--- /dev/null
+From a501686b2923ce6f2ff2b1d0d50682c6411baf72 Mon Sep 17 00:00:00 2001
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 5 Dec 2017 13:34:49 +0100
+Subject: x86/mm: Use __flush_tlb_one() for kernel memory
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit a501686b2923ce6f2ff2b1d0d50682c6411baf72 upstream.
+
+__flush_tlb_single() is for user mappings, __flush_tlb_one() for
+kernel mappings.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Eduardo Valentin <eduval@amazon.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: aliguori@amazon.com
+Cc: daniel.gruss@iaik.tugraz.at
+Cc: hughd@google.com
+Cc: keescook@google.com
+Cc: linux-mm@kvack.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/mm/tlb.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/x86/mm/tlb.c
++++ b/arch/x86/mm/tlb.c
+@@ -551,7 +551,7 @@ static void do_kernel_range_flush(void *
+
+ /* flush range by one by one 'invlpg' */
+ for (addr = f->start; addr < f->end; addr += PAGE_SIZE)
+- __flush_tlb_single(addr);
++ __flush_tlb_one(addr);
+ }
+
+ void flush_tlb_kernel_range(unsigned long start, unsigned long end)
--- /dev/null
+From 3e46e0f5ee3643a1239be9046c7ba6c66ca2b329 Mon Sep 17 00:00:00 2001
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 5 Dec 2017 13:34:50 +0100
+Subject: x86/uv: Use the right TLB-flush API
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 3e46e0f5ee3643a1239be9046c7ba6c66ca2b329 upstream.
+
+Since uv_flush_tlb_others() implements flush_tlb_others() which is
+about flushing user mappings, we should use __flush_tlb_single(),
+which too is about flushing user mappings.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Acked-by: Andrew Banman <abanman@hpe.com>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Eduardo Valentin <eduval@amazon.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Mike Travis <mike.travis@hpe.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: aliguori@amazon.com
+Cc: daniel.gruss@iaik.tugraz.at
+Cc: hughd@google.com
+Cc: keescook@google.com
+Cc: linux-mm@kvack.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/platform/uv/tlb_uv.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/x86/platform/uv/tlb_uv.c
++++ b/arch/x86/platform/uv/tlb_uv.c
+@@ -299,7 +299,7 @@ static void bau_process_message(struct m
+ local_flush_tlb();
+ stat->d_alltlb++;
+ } else {
+- __flush_tlb_one(msg->address);
++ __flush_tlb_single(msg->address);
+ stat->d_onetlb++;
+ }
+ stat->d_requestee++;
--- /dev/null
+From 49275fef986abfb8b476e4708aaecc07e7d3e087 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Sun, 10 Dec 2017 22:47:19 -0800
+Subject: x86/vsyscall/64: Explicitly set _PAGE_USER in the pagetable hierarchy
+
+From: Andy Lutomirski <luto@kernel.org>
+
+commit 49275fef986abfb8b476e4708aaecc07e7d3e087 upstream.
+
+The kernel is very erratic as to which pagetables have _PAGE_USER set. The
+vsyscall page gets lucky: it seems that all of the relevant pagetables are
+among the apparently arbitrary ones that set _PAGE_USER. Rather than
+relying on chance, just explicitly set _PAGE_USER.
+
+This will let us clean up pagetable setup to stop setting _PAGE_USER. The
+added code can also be reused by pagetable isolation to manage the
+_PAGE_USER bit in the usermode tables.
+
+[ tglx: Folded paravirt fix from Juergen Gross ]
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Kees Cook <keescook@chromium.org>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/entry/vsyscall/vsyscall_64.c | 34 +++++++++++++++++++++++++++++++++-
+ 1 file changed, 33 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/entry/vsyscall/vsyscall_64.c
++++ b/arch/x86/entry/vsyscall/vsyscall_64.c
+@@ -37,6 +37,7 @@
+ #include <asm/unistd.h>
+ #include <asm/fixmap.h>
+ #include <asm/traps.h>
++#include <asm/paravirt.h>
+
+ #define CREATE_TRACE_POINTS
+ #include "vsyscall_trace.h"
+@@ -329,16 +330,47 @@ int in_gate_area_no_mm(unsigned long add
+ return vsyscall_mode != NONE && (addr & PAGE_MASK) == VSYSCALL_ADDR;
+ }
+
++/*
++ * The VSYSCALL page is the only user-accessible page in the kernel address
++ * range. Normally, the kernel page tables can have _PAGE_USER clear, but
++ * the tables covering VSYSCALL_ADDR need _PAGE_USER set if vsyscalls
++ * are enabled.
++ *
++ * Some day we may create a "minimal" vsyscall mode in which we emulate
++ * vsyscalls but leave the page not present. If so, we skip calling
++ * this.
++ */
++static void __init set_vsyscall_pgtable_user_bits(void)
++{
++ pgd_t *pgd;
++ p4d_t *p4d;
++ pud_t *pud;
++ pmd_t *pmd;
++
++ pgd = pgd_offset_k(VSYSCALL_ADDR);
++ set_pgd(pgd, __pgd(pgd_val(*pgd) | _PAGE_USER));
++ p4d = p4d_offset(pgd, VSYSCALL_ADDR);
++#if CONFIG_PGTABLE_LEVELS >= 5
++ p4d->p4d |= _PAGE_USER;
++#endif
++ pud = pud_offset(p4d, VSYSCALL_ADDR);
++ set_pud(pud, __pud(pud_val(*pud) | _PAGE_USER));
++ pmd = pmd_offset(pud, VSYSCALL_ADDR);
++ set_pmd(pmd, __pmd(pmd_val(*pmd) | _PAGE_USER));
++}
++
+ void __init map_vsyscall(void)
+ {
+ extern char __vsyscall_page;
+ unsigned long physaddr_vsyscall = __pa_symbol(&__vsyscall_page);
+
+- if (vsyscall_mode != NONE)
++ if (vsyscall_mode != NONE) {
+ __set_fixmap(VSYSCALL_PAGE, physaddr_vsyscall,
+ vsyscall_mode == NATIVE
+ ? PAGE_KERNEL_VSYSCALL
+ : PAGE_KERNEL_VVAR);
++ set_vsyscall_pgtable_user_bits();
++ }
+
+ BUILD_BUG_ON((unsigned long)__fix_to_virt(VSYSCALL_PAGE) !=
+ (unsigned long)VSYSCALL_ADDR);
--- /dev/null
+From 4831b779403a836158917d59a7ca880483c67378 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Sun, 10 Dec 2017 22:47:20 -0800
+Subject: x86/vsyscall/64: Warn and fail vsyscall emulation in NATIVE mode
+
+From: Andy Lutomirski <luto@kernel.org>
+
+commit 4831b779403a836158917d59a7ca880483c67378 upstream.
+
+If something goes wrong with pagetable setup, vsyscall=native will
+accidentally fall back to emulation. Make it warn and fail so that we
+notice.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Kees Cook <keescook@chromium.org>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/entry/vsyscall/vsyscall_64.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/arch/x86/entry/vsyscall/vsyscall_64.c
++++ b/arch/x86/entry/vsyscall/vsyscall_64.c
+@@ -139,6 +139,10 @@ bool emulate_vsyscall(struct pt_regs *re
+
+ WARN_ON_ONCE(address != regs->ip);
+
++ /* This should be unreachable in NATIVE mode. */
++ if (WARN_ON(vsyscall_mode == NATIVE))
++ return false;
++
+ if (vsyscall_mode == NONE) {
+ warn_bad_vsyscall(KERN_INFO, regs,
+ "vsyscall attempted with vsyscall=none");