From: Greg Kroah-Hartman Date: Mon, 19 Mar 2018 10:02:39 +0000 (+0100) Subject: 4.14-stable patches X-Git-Tag: v4.15.12~26 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=de12fd02ae148faeb6862b953bc7684c54d062c1;p=thirdparty%2Fkernel%2Fstable-queue.git 4.14-stable patches added patches: alsa-hda-revert-power_save-option-default-value.patch alsa-pcm-fix-uaf-in-snd_pcm_oss_get_formats.patch alsa-seq-clear-client-entry-before-deleting-else-at-closing.patch alsa-seq-fix-possible-uaf-in-snd_seq_check_queue.patch drm-amdgpu-dce-don-t-turn-off-dp-sink-when-disconnected.patch drm-amdgpu-fix-prime-teardown-order.patch drm-nouveau-bl-fix-oops-on-driver-unbind.patch drm-radeon-fix-prime-teardown-order.patch fs-aio-add-explicit-rcu-grace-period-when-freeing-kioctx.patch fs-aio-use-rcu-accessors-for-kioctx_table-table.patch fs-teach-path_connected-to-handle-nfs-filesystems-with-multiple-roots.patch kvm-arm-arm64-reduce-verbosity-of-kvm-init-log.patch kvm-arm-arm64-vgic-don-t-populate-multiple-lrs-with-the-same-vintid.patch kvm-arm-arm64-vgic-v3-tighten-synchronization-for-guests-using-v2-on-v3.patch kvm-x86-fix-device-passthrough-when-sme-is-active.patch lock_parent-needs-to-recheck-if-dentry-got-__dentry_kill-ed-under-it.patch parisc-handle-case-where-flush_cache_range-is-called-with-no-context.patch rdmavt-fix-synchronization-around-percpu_ref.patch selftests-x86-add-tests-for-the-str-and-sldt-instructions.patch selftests-x86-add-tests-for-user-mode-instruction-prevention.patch selftests-x86-entry_from_vm86-add-test-cases-for-popf.patch selftests-x86-entry_from_vm86-exit-with-1-if-we-fail.patch series x86-cpufeatures-add-intel-pconfig-cpufeature.patch x86-cpufeatures-add-intel-total-memory-encryption-cpufeature.patch x86-mm-fix-vmalloc_fault-to-use-pxd_large.patch x86-speculation-objtool-annotate-indirect-calls-jumps-for-objtool-on-32-bit-kernels.patch x86-speculation-remove-skylake-c2-from-speculation-control-microcode-blacklist.patch x86-vm86-32-fix-popf-emulation.patch --- diff --git a/queue-4.14/alsa-hda-revert-power_save-option-default-value.patch b/queue-4.14/alsa-hda-revert-power_save-option-default-value.patch new file mode 100644 index 00000000000..b738949ad53 --- /dev/null +++ b/queue-4.14/alsa-hda-revert-power_save-option-default-value.patch @@ -0,0 +1,66 @@ +From 40088dc4e1ead7df31728c73f5b51d71da18831d Mon Sep 17 00:00:00 2001 +From: Takashi Iwai +Date: Mon, 12 Mar 2018 13:55:48 +0100 +Subject: ALSA: hda - Revert power_save option default value + +From: Takashi Iwai + +commit 40088dc4e1ead7df31728c73f5b51d71da18831d upstream. + +With the commit 1ba8f9d30817 ("ALSA: hda: Add a power_save +blacklist"), we changed the default value of power_save option to -1 +for processing the power-save blacklist. +Unfortunately, this seems breaking user-space applications that +actually read the power_save parameter value via sysfs and judge / +adjust the power-saving status. They see the value -1 as if the +power-save is turned off, although the actual value is taken from +CONFIG_SND_HDA_POWER_SAVE_DEFAULT and it can be a positive. + +So, overall, passing -1 there was no good idea. Let's partially +revert it -- at least for power_save option default value is restored +again to CONFIG_SND_HDA_POWER_SAVE_DEFAULT. Meanwhile, in this patch, +we keep the blacklist behavior and make is adjustable via the new +option, pm_blacklist. + +Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=199073 +Fixes: 1ba8f9d30817 ("ALSA: hda: Add a power_save blacklist") +Acked-by: Hans de Goede +Cc: +Signed-off-by: Takashi Iwai +Signed-off-by: Greg Kroah-Hartman + +--- + sound/pci/hda/hda_intel.c | 9 ++++++--- + 1 file changed, 6 insertions(+), 3 deletions(-) + +--- a/sound/pci/hda/hda_intel.c ++++ b/sound/pci/hda/hda_intel.c +@@ -181,11 +181,15 @@ static const struct kernel_param_ops par + }; + #define param_check_xint param_check_int + +-static int power_save = -1; ++static int power_save = CONFIG_SND_HDA_POWER_SAVE_DEFAULT; + module_param(power_save, xint, 0644); + MODULE_PARM_DESC(power_save, "Automatic power-saving timeout " + "(in second, 0 = disable)."); + ++static bool pm_blacklist = true; ++module_param(pm_blacklist, bool, 0644); ++MODULE_PARM_DESC(pm_blacklist, "Enable power-management blacklist"); ++ + /* reset the HD-audio controller in power save mode. + * this may give more power-saving, but will take longer time to + * wake up. +@@ -2300,10 +2304,9 @@ static int azx_probe_continue(struct azx + + val = power_save; + #ifdef CONFIG_PM +- if (val == -1) { ++ if (pm_blacklist) { + const struct snd_pci_quirk *q; + +- val = CONFIG_SND_HDA_POWER_SAVE_DEFAULT; + q = snd_pci_quirk_lookup(chip->pci, power_save_blacklist); + if (q && val) { + dev_info(chip->card->dev, "device %04x:%04x is on the power_save blacklist, forcing power_save to 0\n", diff --git a/queue-4.14/alsa-pcm-fix-uaf-in-snd_pcm_oss_get_formats.patch b/queue-4.14/alsa-pcm-fix-uaf-in-snd_pcm_oss_get_formats.patch new file mode 100644 index 00000000000..b691e9373f6 --- /dev/null +++ b/queue-4.14/alsa-pcm-fix-uaf-in-snd_pcm_oss_get_formats.patch @@ -0,0 +1,51 @@ +From 01c0b4265cc16bc1f43f475c5944c55c10d5768f Mon Sep 17 00:00:00 2001 +From: Takashi Iwai +Date: Sat, 10 Mar 2018 23:04:23 +0100 +Subject: ALSA: pcm: Fix UAF in snd_pcm_oss_get_formats() + +From: Takashi Iwai + +commit 01c0b4265cc16bc1f43f475c5944c55c10d5768f upstream. + +snd_pcm_oss_get_formats() has an obvious use-after-free around +snd_mask_test() calls, as spotted by syzbot. The passed format_mask +argument is a pointer to the hw_params object that is freed before the +loop. What a surprise that it has been present since the original +code of decades ago... + +Reported-by: syzbot+4090700a4f13fccaf648@syzkaller.appspotmail.com +Cc: +Signed-off-by: Takashi Iwai +Signed-off-by: Greg Kroah-Hartman + +--- + sound/core/oss/pcm_oss.c | 10 ++++++---- + 1 file changed, 6 insertions(+), 4 deletions(-) + +--- a/sound/core/oss/pcm_oss.c ++++ b/sound/core/oss/pcm_oss.c +@@ -1762,10 +1762,9 @@ static int snd_pcm_oss_get_formats(struc + return -ENOMEM; + _snd_pcm_hw_params_any(params); + err = snd_pcm_hw_refine(substream, params); +- format_mask = hw_param_mask_c(params, SNDRV_PCM_HW_PARAM_FORMAT); +- kfree(params); + if (err < 0) +- return err; ++ goto error; ++ format_mask = hw_param_mask_c(params, SNDRV_PCM_HW_PARAM_FORMAT); + for (fmt = 0; fmt < 32; ++fmt) { + if (snd_mask_test(format_mask, fmt)) { + int f = snd_pcm_oss_format_to(fmt); +@@ -1773,7 +1772,10 @@ static int snd_pcm_oss_get_formats(struc + formats |= f; + } + } +- return formats; ++ ++ error: ++ kfree(params); ++ return err < 0 ? err : formats; + } + + static int snd_pcm_oss_set_format(struct snd_pcm_oss_file *pcm_oss_file, int format) diff --git a/queue-4.14/alsa-seq-clear-client-entry-before-deleting-else-at-closing.patch b/queue-4.14/alsa-seq-clear-client-entry-before-deleting-else-at-closing.patch new file mode 100644 index 00000000000..746b592a582 --- /dev/null +++ b/queue-4.14/alsa-seq-clear-client-entry-before-deleting-else-at-closing.patch @@ -0,0 +1,50 @@ +From a2ff19f7b70118ced291a28d5313469914de451b Mon Sep 17 00:00:00 2001 +From: Takashi Iwai +Date: Fri, 9 Mar 2018 22:23:31 +0100 +Subject: ALSA: seq: Clear client entry before deleting else at closing + +From: Takashi Iwai + +commit a2ff19f7b70118ced291a28d5313469914de451b upstream. + +When releasing a client, we need to clear the clienttab[] entry at +first, then call snd_seq_queue_client_leave(). Otherwise, the +in-flight cell in the queue might be picked up by the timer interrupt +via snd_seq_check_queue() before calling snd_seq_queue_client_leave(), +and it's delivered to another queue while the client is clearing +queues. This may eventually result in an uncleared cell remaining in +a queue, and the later snd_seq_pool_delete() may need to wait for a +long time until the event gets really processed. + +By moving the clienttab[] clearance at the beginning of release, any +event delivery of a cell belonging to this client will fail at a later +point, since snd_seq_client_ptr() returns NULL. Thus the cell that +was picked up by the timer interrupt will be returned immediately +without further delivery, and the long stall of snd_seq_delete_pool() +can be avoided, too. + +Cc: +Signed-off-by: Takashi Iwai +Signed-off-by: Greg Kroah-Hartman + +--- + sound/core/seq/seq_clientmgr.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/sound/core/seq/seq_clientmgr.c ++++ b/sound/core/seq/seq_clientmgr.c +@@ -255,12 +255,12 @@ static int seq_free_client1(struct snd_s + + if (!client) + return 0; +- snd_seq_delete_all_ports(client); +- snd_seq_queue_client_leave(client->number); + spin_lock_irqsave(&clients_lock, flags); + clienttablock[client->number] = 1; + clienttab[client->number] = NULL; + spin_unlock_irqrestore(&clients_lock, flags); ++ snd_seq_delete_all_ports(client); ++ snd_seq_queue_client_leave(client->number); + snd_use_lock_sync(&client->use_lock); + snd_seq_queue_client_termination(client->number); + if (client->pool) diff --git a/queue-4.14/alsa-seq-fix-possible-uaf-in-snd_seq_check_queue.patch b/queue-4.14/alsa-seq-fix-possible-uaf-in-snd_seq_check_queue.patch new file mode 100644 index 00000000000..61fcb3a61c3 --- /dev/null +++ b/queue-4.14/alsa-seq-fix-possible-uaf-in-snd_seq_check_queue.patch @@ -0,0 +1,170 @@ +From d0f833065221cbfcbadf19fd4102bcfa9330006a Mon Sep 17 00:00:00 2001 +From: Takashi Iwai +Date: Fri, 9 Mar 2018 21:58:28 +0100 +Subject: ALSA: seq: Fix possible UAF in snd_seq_check_queue() + +From: Takashi Iwai + +commit d0f833065221cbfcbadf19fd4102bcfa9330006a upstream. + +Although we've covered the races between concurrent write() and +ioctl() in the previous patch series, there is still a possible UAF in +the following scenario: + +A: user client closed B: timer irq + -> snd_seq_release() -> snd_seq_timer_interrupt() + -> snd_seq_free_client() -> snd_seq_check_queue() + -> cell = snd_seq_prioq_cell_peek() + -> snd_seq_prioq_leave() + .... removing all cells + -> snd_seq_pool_done() + .... vfree() + -> snd_seq_compare_tick_time(cell) + ... Oops + +So the problem is that a cell is peeked and accessed without any +protection until it's retrieved from the queue again via +snd_seq_prioq_cell_out(). + +This patch tries to address it, also cleans up the code by a slight +refactoring. snd_seq_prioq_cell_out() now receives an extra pointer +argument. When it's non-NULL, the function checks the event timestamp +with the given pointer. The caller needs to pass the right reference +either to snd_seq_tick or snd_seq_realtime depending on the event +timestamp type. + +A good news is that the above change allows us to remove the +snd_seq_prioq_cell_peek(), too, thus the patch actually reduces the +code size. + +Reviewed-by: Nicolai Stange +Cc: +Signed-off-by: Takashi Iwai +Signed-off-by: Greg Kroah-Hartman + +--- + sound/core/seq/seq_prioq.c | 28 ++++++++++++++-------------- + sound/core/seq/seq_prioq.h | 6 ++---- + sound/core/seq/seq_queue.c | 28 +++++++++------------------- + 3 files changed, 25 insertions(+), 37 deletions(-) + +--- a/sound/core/seq/seq_prioq.c ++++ b/sound/core/seq/seq_prioq.c +@@ -87,7 +87,7 @@ void snd_seq_prioq_delete(struct snd_seq + if (f->cells > 0) { + /* drain prioQ */ + while (f->cells > 0) +- snd_seq_cell_free(snd_seq_prioq_cell_out(f)); ++ snd_seq_cell_free(snd_seq_prioq_cell_out(f, NULL)); + } + + kfree(f); +@@ -214,8 +214,18 @@ int snd_seq_prioq_cell_in(struct snd_seq + return 0; + } + ++/* return 1 if the current time >= event timestamp */ ++static int event_is_ready(struct snd_seq_event *ev, void *current_time) ++{ ++ if ((ev->flags & SNDRV_SEQ_TIME_STAMP_MASK) == SNDRV_SEQ_TIME_STAMP_TICK) ++ return snd_seq_compare_tick_time(current_time, &ev->time.tick); ++ else ++ return snd_seq_compare_real_time(current_time, &ev->time.time); ++} ++ + /* dequeue cell from prioq */ +-struct snd_seq_event_cell *snd_seq_prioq_cell_out(struct snd_seq_prioq *f) ++struct snd_seq_event_cell *snd_seq_prioq_cell_out(struct snd_seq_prioq *f, ++ void *current_time) + { + struct snd_seq_event_cell *cell; + unsigned long flags; +@@ -227,6 +237,8 @@ struct snd_seq_event_cell *snd_seq_prioq + spin_lock_irqsave(&f->lock, flags); + + cell = f->head; ++ if (cell && current_time && !event_is_ready(&cell->event, current_time)) ++ cell = NULL; + if (cell) { + f->head = cell->next; + +@@ -252,18 +264,6 @@ int snd_seq_prioq_avail(struct snd_seq_p + return f->cells; + } + +- +-/* peek at cell at the head of the prioq */ +-struct snd_seq_event_cell *snd_seq_prioq_cell_peek(struct snd_seq_prioq * f) +-{ +- if (f == NULL) { +- pr_debug("ALSA: seq: snd_seq_prioq_cell_in() called with NULL prioq\n"); +- return NULL; +- } +- return f->head; +-} +- +- + static inline int prioq_match(struct snd_seq_event_cell *cell, + int client, int timestamp) + { +--- a/sound/core/seq/seq_prioq.h ++++ b/sound/core/seq/seq_prioq.h +@@ -44,14 +44,12 @@ void snd_seq_prioq_delete(struct snd_seq + int snd_seq_prioq_cell_in(struct snd_seq_prioq *f, struct snd_seq_event_cell *cell); + + /* dequeue cell from prioq */ +-struct snd_seq_event_cell *snd_seq_prioq_cell_out(struct snd_seq_prioq *f); ++struct snd_seq_event_cell *snd_seq_prioq_cell_out(struct snd_seq_prioq *f, ++ void *current_time); + + /* return number of events available in prioq */ + int snd_seq_prioq_avail(struct snd_seq_prioq *f); + +-/* peek at cell at the head of the prioq */ +-struct snd_seq_event_cell *snd_seq_prioq_cell_peek(struct snd_seq_prioq *f); +- + /* client left queue */ + void snd_seq_prioq_leave(struct snd_seq_prioq *f, int client, int timestamp); + +--- a/sound/core/seq/seq_queue.c ++++ b/sound/core/seq/seq_queue.c +@@ -277,30 +277,20 @@ void snd_seq_check_queue(struct snd_seq_ + + __again: + /* Process tick queue... */ +- while ((cell = snd_seq_prioq_cell_peek(q->tickq)) != NULL) { +- if (snd_seq_compare_tick_time(&q->timer->tick.cur_tick, +- &cell->event.time.tick)) { +- cell = snd_seq_prioq_cell_out(q->tickq); +- if (cell) +- snd_seq_dispatch_event(cell, atomic, hop); +- } else { +- /* event remains in the queue */ ++ for (;;) { ++ cell = snd_seq_prioq_cell_out(q->tickq, ++ &q->timer->tick.cur_tick); ++ if (!cell) + break; +- } ++ snd_seq_dispatch_event(cell, atomic, hop); + } + +- + /* Process time queue... */ +- while ((cell = snd_seq_prioq_cell_peek(q->timeq)) != NULL) { +- if (snd_seq_compare_real_time(&q->timer->cur_time, +- &cell->event.time.time)) { +- cell = snd_seq_prioq_cell_out(q->timeq); +- if (cell) +- snd_seq_dispatch_event(cell, atomic, hop); +- } else { +- /* event remains in the queue */ ++ for (;;) { ++ cell = snd_seq_prioq_cell_out(q->timeq, &q->timer->cur_time); ++ if (!cell) + break; +- } ++ snd_seq_dispatch_event(cell, atomic, hop); + } + + /* free lock */ diff --git a/queue-4.14/drm-amdgpu-dce-don-t-turn-off-dp-sink-when-disconnected.patch b/queue-4.14/drm-amdgpu-dce-don-t-turn-off-dp-sink-when-disconnected.patch new file mode 100644 index 00000000000..92f75645d5f --- /dev/null +++ b/queue-4.14/drm-amdgpu-dce-don-t-turn-off-dp-sink-when-disconnected.patch @@ -0,0 +1,68 @@ +From 7d617264eb22b18d979eac6e85877a141253034e Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Michel=20D=C3=A4nzer?= +Date: Fri, 9 Mar 2018 18:26:18 +0100 +Subject: drm/amdgpu/dce: Don't turn off DP sink when disconnected +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Michel Dänzer + +commit 7d617264eb22b18d979eac6e85877a141253034e upstream. + +Turning off the sink in this case causes various issues, because +userspace expects it to stay on until it turns it off explicitly. + +Instead, turn the sink off and back on when a display is connected +again. This dance seems necessary for link training to work correctly. + +Bugzilla: https://bugs.freedesktop.org/105308 +Cc: stable@vger.kernel.org +Reviewed-by: Alex Deucher +Signed-off-by: Michel Dänzer +Signed-off-by: Alex Deucher +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c | 29 +++++++++---------------- + 1 file changed, 11 insertions(+), 18 deletions(-) + +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c +@@ -69,25 +69,18 @@ void amdgpu_connector_hotplug(struct drm + /* don't do anything if sink is not display port, i.e., + * passive dp->(dvi|hdmi) adaptor + */ +- if (dig_connector->dp_sink_type == CONNECTOR_OBJECT_ID_DISPLAYPORT) { +- int saved_dpms = connector->dpms; +- /* Only turn off the display if it's physically disconnected */ +- if (!amdgpu_display_hpd_sense(adev, amdgpu_connector->hpd.hpd)) { +- drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF); +- } else if (amdgpu_atombios_dp_needs_link_train(amdgpu_connector)) { +- /* Don't try to start link training before we +- * have the dpcd */ +- if (amdgpu_atombios_dp_get_dpcd(amdgpu_connector)) +- return; ++ if (dig_connector->dp_sink_type == CONNECTOR_OBJECT_ID_DISPLAYPORT && ++ amdgpu_display_hpd_sense(adev, amdgpu_connector->hpd.hpd) && ++ amdgpu_atombios_dp_needs_link_train(amdgpu_connector)) { ++ /* Don't start link training before we have the DPCD */ ++ if (amdgpu_atombios_dp_get_dpcd(amdgpu_connector)) ++ return; + +- /* set it to OFF so that drm_helper_connector_dpms() +- * won't return immediately since the current state +- * is ON at this point. +- */ +- connector->dpms = DRM_MODE_DPMS_OFF; +- drm_helper_connector_dpms(connector, DRM_MODE_DPMS_ON); +- } +- connector->dpms = saved_dpms; ++ /* Turn the connector off and back on immediately, which ++ * will trigger link training ++ */ ++ drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF); ++ drm_helper_connector_dpms(connector, DRM_MODE_DPMS_ON); + } + } + } diff --git a/queue-4.14/drm-amdgpu-fix-prime-teardown-order.patch b/queue-4.14/drm-amdgpu-fix-prime-teardown-order.patch new file mode 100644 index 00000000000..151ed629047 --- /dev/null +++ b/queue-4.14/drm-amdgpu-fix-prime-teardown-order.patch @@ -0,0 +1,48 @@ +From 342038d92403b3efa1138a8599666b9f026279d6 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Christian=20K=C3=B6nig?= +Date: Fri, 9 Mar 2018 14:42:54 +0100 +Subject: drm/amdgpu: fix prime teardown order +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Christian König + +commit 342038d92403b3efa1138a8599666b9f026279d6 upstream. + +We unmapped imported DMA-bufs when the GEM handle was dropped, not when the +hardware was done with the buffere. + +Signed-off-by: Christian König +Reviewed-by: Michel Dänzer +CC: stable@vger.kernel.org +Signed-off-by: Alex Deucher +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 2 -- + drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 2 ++ + 2 files changed, 2 insertions(+), 2 deletions(-) + +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +@@ -36,8 +36,6 @@ void amdgpu_gem_object_free(struct drm_g + struct amdgpu_bo *robj = gem_to_amdgpu_bo(gobj); + + if (robj) { +- if (robj->gem_base.import_attach) +- drm_prime_gem_destroy(&robj->gem_base, robj->tbo.sg); + amdgpu_mn_unregister(robj); + amdgpu_bo_unref(&robj); + } +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +@@ -46,6 +46,8 @@ static void amdgpu_ttm_bo_destroy(struct + + amdgpu_bo_kunmap(bo); + ++ if (bo->gem_base.import_attach) ++ drm_prime_gem_destroy(&bo->gem_base, bo->tbo.sg); + drm_gem_object_release(&bo->gem_base); + amdgpu_bo_unref(&bo->parent); + if (!list_empty(&bo->shadow_list)) { diff --git a/queue-4.14/drm-nouveau-bl-fix-oops-on-driver-unbind.patch b/queue-4.14/drm-nouveau-bl-fix-oops-on-driver-unbind.patch new file mode 100644 index 00000000000..1715df3ffbb --- /dev/null +++ b/queue-4.14/drm-nouveau-bl-fix-oops-on-driver-unbind.patch @@ -0,0 +1,58 @@ +From 76f2e2bc627f7d08360ac731b6277d744d4eb599 Mon Sep 17 00:00:00 2001 +From: Lukas Wunner +Date: Sat, 17 Feb 2018 13:40:23 +0100 +Subject: drm/nouveau/bl: Fix oops on driver unbind + +From: Lukas Wunner + +commit 76f2e2bc627f7d08360ac731b6277d744d4eb599 upstream. + +Unbinding nouveau on a dual GPU MacBook Pro oopses because we iterate +over the bl_connectors list in nouveau_backlight_exit() but skipped +initializing it in nouveau_backlight_init(). Stacktrace for posterity: + + BUG: unable to handle kernel NULL pointer dereference at 0000000000000010 + IP: nouveau_backlight_exit+0x2b/0x70 [nouveau] + nouveau_display_destroy+0x29/0x80 [nouveau] + nouveau_drm_unload+0x65/0xe0 [nouveau] + drm_dev_unregister+0x3c/0xe0 [drm] + drm_put_dev+0x2e/0x60 [drm] + nouveau_drm_device_remove+0x47/0x70 [nouveau] + pci_device_remove+0x36/0xb0 + device_release_driver_internal+0x157/0x220 + driver_detach+0x39/0x70 + bus_remove_driver+0x51/0xd0 + pci_unregister_driver+0x2a/0xa0 + nouveau_drm_exit+0x15/0xfb0 [nouveau] + SyS_delete_module+0x18c/0x290 + system_call_fast_compare_end+0xc/0x6f + +Fixes: b53ac1ee12a3 ("drm/nouveau/bl: Do not register interface if Apple GMUX detected") +Cc: stable@vger.kernel.org # v4.10+ +Cc: Pierre Moreau +Signed-off-by: Lukas Wunner +Signed-off-by: Ben Skeggs +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/gpu/drm/nouveau/nouveau_backlight.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/drivers/gpu/drm/nouveau/nouveau_backlight.c ++++ b/drivers/gpu/drm/nouveau/nouveau_backlight.c +@@ -268,13 +268,13 @@ nouveau_backlight_init(struct drm_device + struct nvif_device *device = &drm->client.device; + struct drm_connector *connector; + ++ INIT_LIST_HEAD(&drm->bl_connectors); ++ + if (apple_gmux_present()) { + NV_INFO(drm, "Apple GMUX detected: not registering Nouveau backlight interface\n"); + return 0; + } + +- INIT_LIST_HEAD(&drm->bl_connectors); +- + list_for_each_entry(connector, &dev->mode_config.connector_list, head) { + if (connector->connector_type != DRM_MODE_CONNECTOR_LVDS && + connector->connector_type != DRM_MODE_CONNECTOR_eDP) diff --git a/queue-4.14/drm-radeon-fix-prime-teardown-order.patch b/queue-4.14/drm-radeon-fix-prime-teardown-order.patch new file mode 100644 index 00000000000..bd375a12094 --- /dev/null +++ b/queue-4.14/drm-radeon-fix-prime-teardown-order.patch @@ -0,0 +1,48 @@ +From 0f4f715bc6bed3bf14c5cd7d5fe88d443e756b14 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Christian=20K=C3=B6nig?= +Date: Fri, 9 Mar 2018 14:44:32 +0100 +Subject: drm/radeon: fix prime teardown order +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Christian König + +commit 0f4f715bc6bed3bf14c5cd7d5fe88d443e756b14 upstream. + +We unmapped imported DMA-bufs when the GEM handle was dropped, not when the +hardware was done with the buffere. + +Signed-off-by: Christian König +Reviewed-by: Michel Dänzer +CC: stable@vger.kernel.org +Signed-off-by: Alex Deucher +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/gpu/drm/radeon/radeon_gem.c | 2 -- + drivers/gpu/drm/radeon/radeon_object.c | 2 ++ + 2 files changed, 2 insertions(+), 2 deletions(-) + +--- a/drivers/gpu/drm/radeon/radeon_gem.c ++++ b/drivers/gpu/drm/radeon/radeon_gem.c +@@ -34,8 +34,6 @@ void radeon_gem_object_free(struct drm_g + struct radeon_bo *robj = gem_to_radeon_bo(gobj); + + if (robj) { +- if (robj->gem_base.import_attach) +- drm_prime_gem_destroy(&robj->gem_base, robj->tbo.sg); + radeon_mn_unregister(robj); + radeon_bo_unref(&robj); + } +--- a/drivers/gpu/drm/radeon/radeon_object.c ++++ b/drivers/gpu/drm/radeon/radeon_object.c +@@ -82,6 +82,8 @@ static void radeon_ttm_bo_destroy(struct + mutex_unlock(&bo->rdev->gem.mutex); + radeon_bo_clear_surface_reg(bo); + WARN_ON_ONCE(!list_empty(&bo->va)); ++ if (bo->gem_base.import_attach) ++ drm_prime_gem_destroy(&bo->gem_base, bo->tbo.sg); + drm_gem_object_release(&bo->gem_base); + kfree(bo); + } diff --git a/queue-4.14/fs-aio-add-explicit-rcu-grace-period-when-freeing-kioctx.patch b/queue-4.14/fs-aio-add-explicit-rcu-grace-period-when-freeing-kioctx.patch new file mode 100644 index 00000000000..13ff377122e --- /dev/null +++ b/queue-4.14/fs-aio-add-explicit-rcu-grace-period-when-freeing-kioctx.patch @@ -0,0 +1,92 @@ +From a6d7cff472eea87d96899a20fa718d2bab7109f3 Mon Sep 17 00:00:00 2001 +From: Tejun Heo +Date: Wed, 14 Mar 2018 12:10:17 -0700 +Subject: fs/aio: Add explicit RCU grace period when freeing kioctx + +From: Tejun Heo + +commit a6d7cff472eea87d96899a20fa718d2bab7109f3 upstream. + +While fixing refcounting, e34ecee2ae79 ("aio: Fix a trinity splat") +incorrectly removed explicit RCU grace period before freeing kioctx. +The intention seems to be depending on the internal RCU grace periods +of percpu_ref; however, percpu_ref uses a different flavor of RCU, +sched-RCU. This can lead to kioctx being freed while RCU read +protected dereferences are still in progress. + +Fix it by updating free_ioctx() to go through call_rcu() explicitly. + +v2: Comment added to explain double bouncing. + +Signed-off-by: Tejun Heo +Reported-by: Jann Horn +Fixes: e34ecee2ae79 ("aio: Fix a trinity splat") +Cc: Kent Overstreet +Cc: Linus Torvalds +Cc: stable@vger.kernel.org # v3.13+ +Signed-off-by: Greg Kroah-Hartman + +--- + fs/aio.c | 23 +++++++++++++++++++---- + 1 file changed, 19 insertions(+), 4 deletions(-) + +--- a/fs/aio.c ++++ b/fs/aio.c +@@ -115,7 +115,8 @@ struct kioctx { + struct page **ring_pages; + long nr_pages; + +- struct work_struct free_work; ++ struct rcu_head free_rcu; ++ struct work_struct free_work; /* see free_ioctx() */ + + /* + * signals when all in-flight requests are done +@@ -588,6 +589,12 @@ static int kiocb_cancel(struct aio_kiocb + return cancel(&kiocb->common); + } + ++/* ++ * free_ioctx() should be RCU delayed to synchronize against the RCU ++ * protected lookup_ioctx() and also needs process context to call ++ * aio_free_ring(), so the double bouncing through kioctx->free_rcu and ++ * ->free_work. ++ */ + static void free_ioctx(struct work_struct *work) + { + struct kioctx *ctx = container_of(work, struct kioctx, free_work); +@@ -601,6 +608,14 @@ static void free_ioctx(struct work_struc + kmem_cache_free(kioctx_cachep, ctx); + } + ++static void free_ioctx_rcufn(struct rcu_head *head) ++{ ++ struct kioctx *ctx = container_of(head, struct kioctx, free_rcu); ++ ++ INIT_WORK(&ctx->free_work, free_ioctx); ++ schedule_work(&ctx->free_work); ++} ++ + static void free_ioctx_reqs(struct percpu_ref *ref) + { + struct kioctx *ctx = container_of(ref, struct kioctx, reqs); +@@ -609,8 +624,8 @@ static void free_ioctx_reqs(struct percp + if (ctx->rq_wait && atomic_dec_and_test(&ctx->rq_wait->count)) + complete(&ctx->rq_wait->comp); + +- INIT_WORK(&ctx->free_work, free_ioctx); +- schedule_work(&ctx->free_work); ++ /* Synchronize against RCU protected table->table[] dereferences */ ++ call_rcu(&ctx->free_rcu, free_ioctx_rcufn); + } + + /* +@@ -838,7 +853,7 @@ static int kill_ioctx(struct mm_struct * + table->table[ctx->id] = NULL; + spin_unlock(&mm->ioctx_lock); + +- /* percpu_ref_kill() will do the necessary call_rcu() */ ++ /* free_ioctx_reqs() will do the necessary RCU synchronization */ + wake_up_all(&ctx->wait); + + /* diff --git a/queue-4.14/fs-aio-use-rcu-accessors-for-kioctx_table-table.patch b/queue-4.14/fs-aio-use-rcu-accessors-for-kioctx_table-table.patch new file mode 100644 index 00000000000..0c51cb7d387 --- /dev/null +++ b/queue-4.14/fs-aio-use-rcu-accessors-for-kioctx_table-table.patch @@ -0,0 +1,96 @@ +From d0264c01e7587001a8c4608a5d1818dba9a4c11a Mon Sep 17 00:00:00 2001 +From: Tejun Heo +Date: Wed, 14 Mar 2018 12:10:17 -0700 +Subject: fs/aio: Use RCU accessors for kioctx_table->table[] + +From: Tejun Heo + +commit d0264c01e7587001a8c4608a5d1818dba9a4c11a upstream. + +While converting ioctx index from a list to a table, db446a08c23d +("aio: convert the ioctx list to table lookup v3") missed tagging +kioctx_table->table[] as an array of RCU pointers and using the +appropriate RCU accessors. This introduces a small window in the +lookup path where init and access may race. + +Mark kioctx_table->table[] with __rcu and use the approriate RCU +accessors when using the field. + +Signed-off-by: Tejun Heo +Reported-by: Jann Horn +Fixes: db446a08c23d ("aio: convert the ioctx list to table lookup v3") +Cc: Benjamin LaHaise +Cc: Linus Torvalds +Cc: stable@vger.kernel.org # v3.12+ +Signed-off-by: Greg Kroah-Hartman + +--- + fs/aio.c | 21 +++++++++++---------- + 1 file changed, 11 insertions(+), 10 deletions(-) + +--- a/fs/aio.c ++++ b/fs/aio.c +@@ -68,9 +68,9 @@ struct aio_ring { + #define AIO_RING_PAGES 8 + + struct kioctx_table { +- struct rcu_head rcu; +- unsigned nr; +- struct kioctx *table[]; ++ struct rcu_head rcu; ++ unsigned nr; ++ struct kioctx __rcu *table[]; + }; + + struct kioctx_cpu { +@@ -330,7 +330,7 @@ static int aio_ring_mremap(struct vm_are + for (i = 0; i < table->nr; i++) { + struct kioctx *ctx; + +- ctx = table->table[i]; ++ ctx = rcu_dereference(table->table[i]); + if (ctx && ctx->aio_ring_file == file) { + if (!atomic_read(&ctx->dead)) { + ctx->user_id = ctx->mmap_base = vma->vm_start; +@@ -666,9 +666,9 @@ static int ioctx_add_table(struct kioctx + while (1) { + if (table) + for (i = 0; i < table->nr; i++) +- if (!table->table[i]) { ++ if (!rcu_access_pointer(table->table[i])) { + ctx->id = i; +- table->table[i] = ctx; ++ rcu_assign_pointer(table->table[i], ctx); + spin_unlock(&mm->ioctx_lock); + + /* While kioctx setup is in progress, +@@ -849,8 +849,8 @@ static int kill_ioctx(struct mm_struct * + } + + table = rcu_dereference_raw(mm->ioctx_table); +- WARN_ON(ctx != table->table[ctx->id]); +- table->table[ctx->id] = NULL; ++ WARN_ON(ctx != rcu_access_pointer(table->table[ctx->id])); ++ RCU_INIT_POINTER(table->table[ctx->id], NULL); + spin_unlock(&mm->ioctx_lock); + + /* free_ioctx_reqs() will do the necessary RCU synchronization */ +@@ -895,7 +895,8 @@ void exit_aio(struct mm_struct *mm) + + skipped = 0; + for (i = 0; i < table->nr; ++i) { +- struct kioctx *ctx = table->table[i]; ++ struct kioctx *ctx = ++ rcu_dereference_protected(table->table[i], true); + + if (!ctx) { + skipped++; +@@ -1084,7 +1085,7 @@ static struct kioctx *lookup_ioctx(unsig + if (!table || id >= table->nr) + goto out; + +- ctx = table->table[id]; ++ ctx = rcu_dereference(table->table[id]); + if (ctx && ctx->user_id == ctx_id) { + percpu_ref_get(&ctx->users); + ret = ctx; diff --git a/queue-4.14/fs-teach-path_connected-to-handle-nfs-filesystems-with-multiple-roots.patch b/queue-4.14/fs-teach-path_connected-to-handle-nfs-filesystems-with-multiple-roots.patch new file mode 100644 index 00000000000..0af37cae3d7 --- /dev/null +++ b/queue-4.14/fs-teach-path_connected-to-handle-nfs-filesystems-with-multiple-roots.patch @@ -0,0 +1,104 @@ +From 95dd77580ccd66a0da96e6d4696945b8cea39431 Mon Sep 17 00:00:00 2001 +From: "Eric W. Biederman" +Date: Wed, 14 Mar 2018 18:20:29 -0500 +Subject: fs: Teach path_connected to handle nfs filesystems with multiple roots. + +From: Eric W. Biederman + +commit 95dd77580ccd66a0da96e6d4696945b8cea39431 upstream. + +On nfsv2 and nfsv3 the nfs server can export subsets of the same +filesystem and report the same filesystem identifier, so that the nfs +client can know they are the same filesystem. The subsets can be from +disjoint directory trees. The nfsv2 and nfsv3 filesystems provides no +way to find the common root of all directory trees exported form the +server with the same filesystem identifier. + +The practical result is that in struct super s_root for nfs s_root is +not necessarily the root of the filesystem. The nfs mount code sets +s_root to the root of the first subset of the nfs filesystem that the +kernel mounts. + +This effects the dcache invalidation code in generic_shutdown_super +currently called shrunk_dcache_for_umount and that code for years +has gone through an additional list of dentries that might be dentry +trees that need to be freed to accomodate nfs. + +When I wrote path_connected I did not realize nfs was so special, and +it's hueristic for avoiding calling is_subdir can fail. + +The practical case where this fails is when there is a move of a +directory from the subtree exposed by one nfs mount to the subtree +exposed by another nfs mount. This move can happen either locally or +remotely. With the remote case requiring that the move directory be cached +before the move and that after the move someone walks the path +to where the move directory now exists and in so doing causes the +already cached directory to be moved in the dcache through the magic +of d_splice_alias. + +If someone whose working directory is in the move directory or a +subdirectory and now starts calling .. from the initial mount of nfs +(where s_root == mnt_root), then path_connected as a heuristic will +not bother with the is_subdir check. As s_root really is not the root +of the nfs filesystem this heuristic is wrong, and the path may +actually not be connected and path_connected can fail. + +The is_subdir function might be cheap enough that we can call it +unconditionally. Verifying that will take some benchmarking and +the result may not be the same on all kernels this fix needs +to be backported to. So I am avoiding that for now. + +Filesystems with snapshots such as nilfs and btrfs do something +similar. But as the directory tree of the snapshots are disjoint +from one another and from the main directory tree rename won't move +things between them and this problem will not occur. + +Cc: stable@vger.kernel.org +Reported-by: Al Viro +Fixes: 397d425dc26d ("vfs: Test for and handle paths that are unreachable from their mnt_root") +Signed-off-by: "Eric W. Biederman" +Signed-off-by: Al Viro +Signed-off-by: Greg Kroah-Hartman + +--- + fs/namei.c | 5 +++-- + fs/nfs/super.c | 2 ++ + include/linux/fs.h | 1 + + 3 files changed, 6 insertions(+), 2 deletions(-) + +--- a/fs/namei.c ++++ b/fs/namei.c +@@ -578,9 +578,10 @@ static int __nd_alloc_stack(struct namei + static bool path_connected(const struct path *path) + { + struct vfsmount *mnt = path->mnt; ++ struct super_block *sb = mnt->mnt_sb; + +- /* Only bind mounts can have disconnected paths */ +- if (mnt->mnt_root == mnt->mnt_sb->s_root) ++ /* Bind mounts and multi-root filesystems can have disconnected paths */ ++ if (!(sb->s_iflags & SB_I_MULTIROOT) && (mnt->mnt_root == sb->s_root)) + return true; + + return is_subdir(path->dentry, mnt->mnt_root); +--- a/fs/nfs/super.c ++++ b/fs/nfs/super.c +@@ -2623,6 +2623,8 @@ struct dentry *nfs_fs_mount_common(struc + /* initial superblock/root creation */ + mount_info->fill_super(s, mount_info); + nfs_get_cache_cookie(s, mount_info->parsed, mount_info->cloned); ++ if (!(server->flags & NFS_MOUNT_UNSHARED)) ++ s->s_iflags |= SB_I_MULTIROOT; + } + + mntroot = nfs_get_root(s, mount_info->mntfh, dev_name); +--- a/include/linux/fs.h ++++ b/include/linux/fs.h +@@ -1312,6 +1312,7 @@ extern int send_sigurg(struct fown_struc + #define SB_I_CGROUPWB 0x00000001 /* cgroup-aware writeback enabled */ + #define SB_I_NOEXEC 0x00000002 /* Ignore executables on this fs */ + #define SB_I_NODEV 0x00000004 /* Ignore devices on this fs */ ++#define SB_I_MULTIROOT 0x00000008 /* Multiple roots to the dentry tree */ + + /* sb->s_iflags to limit user namespace mounts */ + #define SB_I_USERNS_VISIBLE 0x00000010 /* fstype already mounted */ diff --git a/queue-4.14/kvm-arm-arm64-reduce-verbosity-of-kvm-init-log.patch b/queue-4.14/kvm-arm-arm64-reduce-verbosity-of-kvm-init-log.patch new file mode 100644 index 00000000000..452e0635b4c --- /dev/null +++ b/queue-4.14/kvm-arm-arm64-reduce-verbosity-of-kvm-init-log.patch @@ -0,0 +1,79 @@ +From 76600428c3677659e3c3633bb4f2ea302220a275 Mon Sep 17 00:00:00 2001 +From: Ard Biesheuvel +Date: Fri, 2 Mar 2018 08:16:30 +0000 +Subject: KVM: arm/arm64: Reduce verbosity of KVM init log + +From: Ard Biesheuvel + +commit 76600428c3677659e3c3633bb4f2ea302220a275 upstream. + +On my GICv3 system, the following is printed to the kernel log at boot: + + kvm [1]: 8-bit VMID + kvm [1]: IDMAP page: d20e35000 + kvm [1]: HYP VA range: 800000000000:ffffffffffff + kvm [1]: vgic-v2@2c020000 + kvm [1]: GIC system register CPU interface enabled + kvm [1]: vgic interrupt IRQ1 + kvm [1]: virtual timer IRQ4 + kvm [1]: Hyp mode initialized successfully + +The KVM IDMAP is a mapping of a statically allocated kernel structure, +and so printing its physical address leaks the physical placement of +the kernel when physical KASLR in effect. So change the kvm_info() to +kvm_debug() to remove it from the log output. + +While at it, trim the output a bit more: IRQ numbers can be found in +/proc/interrupts, and the HYP VA and vgic-v2 lines are not highly +informational either. + +Cc: +Acked-by: Will Deacon +Acked-by: Christoffer Dall +Signed-off-by: Ard Biesheuvel +Signed-off-by: Marc Zyngier +Signed-off-by: Greg Kroah-Hartman + +--- + virt/kvm/arm/arch_timer.c | 2 +- + virt/kvm/arm/mmu.c | 6 +++--- + virt/kvm/arm/vgic/vgic-v2.c | 2 +- + 3 files changed, 5 insertions(+), 5 deletions(-) + +--- a/virt/kvm/arm/arch_timer.c ++++ b/virt/kvm/arm/arch_timer.c +@@ -602,7 +602,7 @@ int kvm_timer_hyp_init(void) + return err; + } + +- kvm_info("virtual timer IRQ%d\n", host_vtimer_irq); ++ kvm_debug("virtual timer IRQ%d\n", host_vtimer_irq); + + cpuhp_setup_state(CPUHP_AP_KVM_ARM_TIMER_STARTING, + "kvm/arm/timer:starting", kvm_timer_starting_cpu, +--- a/virt/kvm/arm/mmu.c ++++ b/virt/kvm/arm/mmu.c +@@ -1760,9 +1760,9 @@ int kvm_mmu_init(void) + */ + BUG_ON((hyp_idmap_start ^ (hyp_idmap_end - 1)) & PAGE_MASK); + +- kvm_info("IDMAP page: %lx\n", hyp_idmap_start); +- kvm_info("HYP VA range: %lx:%lx\n", +- kern_hyp_va(PAGE_OFFSET), kern_hyp_va(~0UL)); ++ kvm_debug("IDMAP page: %lx\n", hyp_idmap_start); ++ kvm_debug("HYP VA range: %lx:%lx\n", ++ kern_hyp_va(PAGE_OFFSET), kern_hyp_va(~0UL)); + + if (hyp_idmap_start >= kern_hyp_va(PAGE_OFFSET) && + hyp_idmap_start < kern_hyp_va(~0UL) && +--- a/virt/kvm/arm/vgic/vgic-v2.c ++++ b/virt/kvm/arm/vgic/vgic-v2.c +@@ -380,7 +380,7 @@ int vgic_v2_probe(const struct gic_kvm_i + kvm_vgic_global_state.type = VGIC_V2; + kvm_vgic_global_state.max_gic_vcpus = VGIC_V2_MAX_CPUS; + +- kvm_info("vgic-v2@%llx\n", info->vctrl.start); ++ kvm_debug("vgic-v2@%llx\n", info->vctrl.start); + + return 0; + out: diff --git a/queue-4.14/kvm-arm-arm64-vgic-don-t-populate-multiple-lrs-with-the-same-vintid.patch b/queue-4.14/kvm-arm-arm64-vgic-don-t-populate-multiple-lrs-with-the-same-vintid.patch new file mode 100644 index 00000000000..4b00ab3910d --- /dev/null +++ b/queue-4.14/kvm-arm-arm64-vgic-don-t-populate-multiple-lrs-with-the-same-vintid.patch @@ -0,0 +1,256 @@ +From 16ca6a607d84bef0129698d8d808f501afd08d43 Mon Sep 17 00:00:00 2001 +From: Marc Zyngier +Date: Tue, 6 Mar 2018 21:48:01 +0000 +Subject: KVM: arm/arm64: vgic: Don't populate multiple LRs with the same vintid + +From: Marc Zyngier + +commit 16ca6a607d84bef0129698d8d808f501afd08d43 upstream. + +The vgic code is trying to be clever when injecting GICv2 SGIs, +and will happily populate LRs with the same interrupt number if +they come from multiple vcpus (after all, they are distinct +interrupt sources). + +Unfortunately, this is against the letter of the architecture, +and the GICv2 architecture spec says "Each valid interrupt stored +in the List registers must have a unique VirtualID for that +virtual CPU interface.". GICv3 has similar (although slightly +ambiguous) restrictions. + +This results in guests locking up when using GICv2-on-GICv3, for +example. The obvious fix is to stop trying so hard, and inject +a single vcpu per SGI per guest entry. After all, pending SGIs +with multiple source vcpus are pretty rare, and are mostly seen +in scenario where the physical CPUs are severely overcomitted. + +But as we now only inject a single instance of a multi-source SGI per +vcpu entry, we may delay those interrupts for longer than strictly +necessary, and run the risk of injecting lower priority interrupts +in the meantime. + +In order to address this, we adopt a three stage strategy: +- If we encounter a multi-source SGI in the AP list while computing + its depth, we force the list to be sorted +- When populating the LRs, we prevent the injection of any interrupt + of lower priority than that of the first multi-source SGI we've + injected. +- Finally, the injection of a multi-source SGI triggers the request + of a maintenance interrupt when there will be no pending interrupt + in the LRs (HCR_NPIE). + +At the point where the last pending interrupt in the LRs switches +from Pending to Active, the maintenance interrupt will be delivered, +allowing us to add the remaining SGIs using the same process. + +Cc: stable@vger.kernel.org +Fixes: 0919e84c0fc1 ("KVM: arm/arm64: vgic-new: Add IRQ sync/flush framework") +Acked-by: Christoffer Dall +Signed-off-by: Marc Zyngier +Signed-off-by: Greg Kroah-Hartman + +--- + include/linux/irqchip/arm-gic-v3.h | 1 + include/linux/irqchip/arm-gic.h | 1 + virt/kvm/arm/vgic/vgic-v2.c | 9 ++++- + virt/kvm/arm/vgic/vgic-v3.c | 9 ++++- + virt/kvm/arm/vgic/vgic.c | 61 ++++++++++++++++++++++++++++--------- + virt/kvm/arm/vgic/vgic.h | 2 + + 6 files changed, 67 insertions(+), 16 deletions(-) + +--- a/include/linux/irqchip/arm-gic-v3.h ++++ b/include/linux/irqchip/arm-gic-v3.h +@@ -501,6 +501,7 @@ + + #define ICH_HCR_EN (1 << 0) + #define ICH_HCR_UIE (1 << 1) ++#define ICH_HCR_NPIE (1 << 3) + #define ICH_HCR_TC (1 << 10) + #define ICH_HCR_TALL0 (1 << 11) + #define ICH_HCR_TALL1 (1 << 12) +--- a/include/linux/irqchip/arm-gic.h ++++ b/include/linux/irqchip/arm-gic.h +@@ -84,6 +84,7 @@ + + #define GICH_HCR_EN (1 << 0) + #define GICH_HCR_UIE (1 << 1) ++#define GICH_HCR_NPIE (1 << 3) + + #define GICH_LR_VIRTUALID (0x3ff << 0) + #define GICH_LR_PHYSID_CPUID_SHIFT (10) +--- a/virt/kvm/arm/vgic/vgic-v2.c ++++ b/virt/kvm/arm/vgic/vgic-v2.c +@@ -37,6 +37,13 @@ void vgic_v2_init_lrs(void) + vgic_v2_write_lr(i, 0); + } + ++void vgic_v2_set_npie(struct kvm_vcpu *vcpu) ++{ ++ struct vgic_v2_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v2; ++ ++ cpuif->vgic_hcr |= GICH_HCR_NPIE; ++} ++ + void vgic_v2_set_underflow(struct kvm_vcpu *vcpu) + { + struct vgic_v2_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v2; +@@ -63,7 +70,7 @@ void vgic_v2_fold_lr_state(struct kvm_vc + struct vgic_v2_cpu_if *cpuif = &vgic_cpu->vgic_v2; + int lr; + +- cpuif->vgic_hcr &= ~GICH_HCR_UIE; ++ cpuif->vgic_hcr &= ~(GICH_HCR_UIE | GICH_HCR_NPIE); + + for (lr = 0; lr < vgic_cpu->used_lrs; lr++) { + u32 val = cpuif->vgic_lr[lr]; +--- a/virt/kvm/arm/vgic/vgic-v3.c ++++ b/virt/kvm/arm/vgic/vgic-v3.c +@@ -25,6 +25,13 @@ static bool group0_trap; + static bool group1_trap; + static bool common_trap; + ++void vgic_v3_set_npie(struct kvm_vcpu *vcpu) ++{ ++ struct vgic_v3_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v3; ++ ++ cpuif->vgic_hcr |= ICH_HCR_NPIE; ++} ++ + void vgic_v3_set_underflow(struct kvm_vcpu *vcpu) + { + struct vgic_v3_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v3; +@@ -45,7 +52,7 @@ void vgic_v3_fold_lr_state(struct kvm_vc + u32 model = vcpu->kvm->arch.vgic.vgic_model; + int lr; + +- cpuif->vgic_hcr &= ~ICH_HCR_UIE; ++ cpuif->vgic_hcr &= ~(ICH_HCR_UIE | ICH_HCR_NPIE); + + for (lr = 0; lr < vgic_cpu->used_lrs; lr++) { + u64 val = cpuif->vgic_lr[lr]; +--- a/virt/kvm/arm/vgic/vgic.c ++++ b/virt/kvm/arm/vgic/vgic.c +@@ -610,22 +610,37 @@ static inline void vgic_set_underflow(st + vgic_v3_set_underflow(vcpu); + } + ++static inline void vgic_set_npie(struct kvm_vcpu *vcpu) ++{ ++ if (kvm_vgic_global_state.type == VGIC_V2) ++ vgic_v2_set_npie(vcpu); ++ else ++ vgic_v3_set_npie(vcpu); ++} ++ + /* Requires the ap_list_lock to be held. */ +-static int compute_ap_list_depth(struct kvm_vcpu *vcpu) ++static int compute_ap_list_depth(struct kvm_vcpu *vcpu, ++ bool *multi_sgi) + { + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + struct vgic_irq *irq; + int count = 0; + ++ *multi_sgi = false; ++ + DEBUG_SPINLOCK_BUG_ON(!spin_is_locked(&vgic_cpu->ap_list_lock)); + + list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) { + spin_lock(&irq->irq_lock); + /* GICv2 SGIs can count for more than one... */ +- if (vgic_irq_is_sgi(irq->intid) && irq->source) +- count += hweight8(irq->source); +- else ++ if (vgic_irq_is_sgi(irq->intid) && irq->source) { ++ int w = hweight8(irq->source); ++ ++ count += w; ++ *multi_sgi |= (w > 1); ++ } else { + count++; ++ } + spin_unlock(&irq->irq_lock); + } + return count; +@@ -636,28 +651,43 @@ static void vgic_flush_lr_state(struct k + { + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + struct vgic_irq *irq; +- int count = 0; ++ int count; ++ bool npie = false; ++ bool multi_sgi; ++ u8 prio = 0xff; + + DEBUG_SPINLOCK_BUG_ON(!spin_is_locked(&vgic_cpu->ap_list_lock)); + +- if (compute_ap_list_depth(vcpu) > kvm_vgic_global_state.nr_lr) ++ count = compute_ap_list_depth(vcpu, &multi_sgi); ++ if (count > kvm_vgic_global_state.nr_lr || multi_sgi) + vgic_sort_ap_list(vcpu); + ++ count = 0; ++ + list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) { + spin_lock(&irq->irq_lock); + +- if (unlikely(vgic_target_oracle(irq) != vcpu)) +- goto next; +- + /* +- * If we get an SGI with multiple sources, try to get +- * them in all at once. ++ * If we have multi-SGIs in the pipeline, we need to ++ * guarantee that they are all seen before any IRQ of ++ * lower priority. In that case, we need to filter out ++ * these interrupts by exiting early. This is easy as ++ * the AP list has been sorted already. + */ +- do { ++ if (multi_sgi && irq->priority > prio) { ++ spin_unlock(&irq->irq_lock); ++ break; ++ } ++ ++ if (likely(vgic_target_oracle(irq) == vcpu)) { + vgic_populate_lr(vcpu, irq, count++); +- } while (irq->source && count < kvm_vgic_global_state.nr_lr); + +-next: ++ if (irq->source) { ++ npie = true; ++ prio = irq->priority; ++ } ++ } ++ + spin_unlock(&irq->irq_lock); + + if (count == kvm_vgic_global_state.nr_lr) { +@@ -668,6 +698,9 @@ next: + } + } + ++ if (npie) ++ vgic_set_npie(vcpu); ++ + vcpu->arch.vgic_cpu.used_lrs = count; + + /* Nuke remaining LRs */ +--- a/virt/kvm/arm/vgic/vgic.h ++++ b/virt/kvm/arm/vgic/vgic.h +@@ -150,6 +150,7 @@ void vgic_v2_fold_lr_state(struct kvm_vc + void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr); + void vgic_v2_clear_lr(struct kvm_vcpu *vcpu, int lr); + void vgic_v2_set_underflow(struct kvm_vcpu *vcpu); ++void vgic_v2_set_npie(struct kvm_vcpu *vcpu); + int vgic_v2_has_attr_regs(struct kvm_device *dev, struct kvm_device_attr *attr); + int vgic_v2_dist_uaccess(struct kvm_vcpu *vcpu, bool is_write, + int offset, u32 *val); +@@ -179,6 +180,7 @@ void vgic_v3_fold_lr_state(struct kvm_vc + void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr); + void vgic_v3_clear_lr(struct kvm_vcpu *vcpu, int lr); + void vgic_v3_set_underflow(struct kvm_vcpu *vcpu); ++void vgic_v3_set_npie(struct kvm_vcpu *vcpu); + void vgic_v3_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); + void vgic_v3_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); + void vgic_v3_enable(struct kvm_vcpu *vcpu); diff --git a/queue-4.14/kvm-arm-arm64-vgic-v3-tighten-synchronization-for-guests-using-v2-on-v3.patch b/queue-4.14/kvm-arm-arm64-vgic-v3-tighten-synchronization-for-guests-using-v2-on-v3.patch new file mode 100644 index 00000000000..6785675ecdf --- /dev/null +++ b/queue-4.14/kvm-arm-arm64-vgic-v3-tighten-synchronization-for-guests-using-v2-on-v3.patch @@ -0,0 +1,46 @@ +From 27e91ad1e746e341ca2312f29bccb9736be7b476 Mon Sep 17 00:00:00 2001 +From: Marc Zyngier +Date: Tue, 6 Mar 2018 21:44:37 +0000 +Subject: kvm: arm/arm64: vgic-v3: Tighten synchronization for guests using v2 on v3 + +From: Marc Zyngier + +commit 27e91ad1e746e341ca2312f29bccb9736be7b476 upstream. + +On guest exit, and when using GICv2 on GICv3, we use a dsb(st) to +force synchronization between the memory-mapped guest view and +the system-register view that the hypervisor uses. + +This is incorrect, as the spec calls out the need for "a DSB whose +required access type is both loads and stores with any Shareability +attribute", while we're only synchronizing stores. + +We also lack an isb after the dsb to ensure that the latter has +actually been executed before we start reading stuff from the sysregs. + +The fix is pretty easy: turn dsb(st) into dsb(sy), and slap an isb() +just after. + +Cc: stable@vger.kernel.org +Fixes: f68d2b1b73cc ("arm64: KVM: Implement vgic-v3 save/restore") +Acked-by: Christoffer Dall +Reviewed-by: Andre Przywara +Signed-off-by: Marc Zyngier +Signed-off-by: Greg Kroah-Hartman + +--- + virt/kvm/arm/hyp/vgic-v3-sr.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/virt/kvm/arm/hyp/vgic-v3-sr.c ++++ b/virt/kvm/arm/hyp/vgic-v3-sr.c +@@ -215,7 +215,8 @@ void __hyp_text __vgic_v3_save_state(str + * are now visible to the system register interface. + */ + if (!cpu_if->vgic_sre) { +- dsb(st); ++ dsb(sy); ++ isb(); + cpu_if->vgic_vmcr = read_gicreg(ICH_VMCR_EL2); + } + diff --git a/queue-4.14/kvm-x86-fix-device-passthrough-when-sme-is-active.patch b/queue-4.14/kvm-x86-fix-device-passthrough-when-sme-is-active.patch new file mode 100644 index 00000000000..b201a76838a --- /dev/null +++ b/queue-4.14/kvm-x86-fix-device-passthrough-when-sme-is-active.patch @@ -0,0 +1,37 @@ +From daaf216c06fba4ee4dc3f62715667da929d68774 Mon Sep 17 00:00:00 2001 +From: Tom Lendacky +Date: Thu, 8 Mar 2018 17:17:31 -0600 +Subject: KVM: x86: Fix device passthrough when SME is active + +From: Tom Lendacky + +commit daaf216c06fba4ee4dc3f62715667da929d68774 upstream. + +When using device passthrough with SME active, the MMIO range that is +mapped for the device should not be mapped encrypted. Add a check in +set_spte() to insure that a page is not mapped encrypted if that page +is a device MMIO page as indicated by kvm_is_mmio_pfn(). + +Cc: # 4.14.x- +Signed-off-by: Tom Lendacky +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/kvm/mmu.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/arch/x86/kvm/mmu.c ++++ b/arch/x86/kvm/mmu.c +@@ -2758,8 +2758,10 @@ static int set_spte(struct kvm_vcpu *vcp + else + pte_access &= ~ACC_WRITE_MASK; + ++ if (!kvm_is_mmio_pfn(pfn)) ++ spte |= shadow_me_mask; ++ + spte |= (u64)pfn << PAGE_SHIFT; +- spte |= shadow_me_mask; + + if (pte_access & ACC_WRITE_MASK) { + diff --git a/queue-4.14/lock_parent-needs-to-recheck-if-dentry-got-__dentry_kill-ed-under-it.patch b/queue-4.14/lock_parent-needs-to-recheck-if-dentry-got-__dentry_kill-ed-under-it.patch new file mode 100644 index 00000000000..1f76ba6454a --- /dev/null +++ b/queue-4.14/lock_parent-needs-to-recheck-if-dentry-got-__dentry_kill-ed-under-it.patch @@ -0,0 +1,49 @@ +From 3b821409632ab778d46e807516b457dfa72736ed Mon Sep 17 00:00:00 2001 +From: Al Viro +Date: Fri, 23 Feb 2018 20:47:17 -0500 +Subject: lock_parent() needs to recheck if dentry got __dentry_kill'ed under it + +From: Al Viro + +commit 3b821409632ab778d46e807516b457dfa72736ed upstream. + +In case when dentry passed to lock_parent() is protected from freeing only +by the fact that it's on a shrink list and trylock of parent fails, we +could get hit by __dentry_kill() (and subsequent dentry_kill(parent)) +between unlocking dentry and locking presumed parent. We need to recheck +that dentry is alive once we lock both it and parent *and* postpone +rcu_read_unlock() until after that point. Otherwise we could return +a pointer to struct dentry that already is rcu-scheduled for freeing, with +->d_lock held on it; caller's subsequent attempt to unlock it can end +up with memory corruption. + +Cc: stable@vger.kernel.org # 3.12+, counting backports +Signed-off-by: Al Viro +Signed-off-by: Greg Kroah-Hartman + +--- + fs/dcache.c | 11 ++++++++--- + 1 file changed, 8 insertions(+), 3 deletions(-) + +--- a/fs/dcache.c ++++ b/fs/dcache.c +@@ -644,11 +644,16 @@ again: + spin_unlock(&parent->d_lock); + goto again; + } +- rcu_read_unlock(); +- if (parent != dentry) ++ if (parent != dentry) { + spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); +- else ++ if (unlikely(dentry->d_lockref.count < 0)) { ++ spin_unlock(&parent->d_lock); ++ parent = NULL; ++ } ++ } else { + parent = NULL; ++ } ++ rcu_read_unlock(); + return parent; + } + diff --git a/queue-4.14/parisc-handle-case-where-flush_cache_range-is-called-with-no-context.patch b/queue-4.14/parisc-handle-case-where-flush_cache_range-is-called-with-no-context.patch new file mode 100644 index 00000000000..d28bf44fdc5 --- /dev/null +++ b/queue-4.14/parisc-handle-case-where-flush_cache_range-is-called-with-no-context.patch @@ -0,0 +1,137 @@ +From 9ef0f88fe5466c2ca1d2975549ba6be502c464c1 Mon Sep 17 00:00:00 2001 +From: John David Anglin +Date: Wed, 7 Mar 2018 08:18:05 -0500 +Subject: parisc: Handle case where flush_cache_range is called with no context +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: John David Anglin + +commit 9ef0f88fe5466c2ca1d2975549ba6be502c464c1 upstream. + +Just when I had decided that flush_cache_range() was always called with +a valid context, Helge reported two cases where the +"BUG_ON(!vma->vm_mm->context);" was hit on the phantom buildd: + + kernel BUG at /mnt/sdb6/linux/linux-4.15.4/arch/parisc/kernel/cache.c:587! + CPU: 1 PID: 3254 Comm: kworker/1:2 Tainted: G D 4.15.0-1-parisc64-smp #1 Debian 4.15.4-1+b1 + Workqueue: events free_ioctx +  IAOQ[0]: flush_cache_range+0x164/0x168 +  IAOQ[1]: flush_cache_page+0x0/0x1c8 +  RP(r2): unmap_page_range+0xae8/0xb88 + Backtrace: +  [<00000000404a6980>] unmap_page_range+0xae8/0xb88 +  [<00000000404a6ae0>] unmap_single_vma+0xc0/0x188 +  [<00000000404a6cdc>] zap_page_range_single+0x134/0x1f8 +  [<00000000404a702c>] unmap_mapping_range+0x1cc/0x208 +  [<0000000040461518>] truncate_pagecache+0x98/0x108 +  [<0000000040461624>] truncate_setsize+0x9c/0xb8 +  [<00000000405d7f30>] put_aio_ring_file+0x80/0x100 +  [<00000000405d803c>] aio_free_ring+0x8c/0x290 +  [<00000000405d82c0>] free_ioctx+0x80/0x180 +  [<0000000040284e6c>] process_one_work+0x21c/0x668 +  [<00000000402854c4>] worker_thread+0x20c/0x778 +  [<0000000040291d44>] kthread+0x2d4/0x2e0 +  [<0000000040204020>] end_fault_vector+0x20/0xc0 + +This indicates that we need to handle the no context case in +flush_cache_range() as we do in flush_cache_mm(). + +In thinking about this, I realized that we don't need to flush the TLB +when there is no context. So, I added context checks to the large flush +cases in flush_cache_mm() and flush_cache_range(). The large flush case +occurs frequently in flush_cache_mm() and the change should improve fork +performance. + +The v2 version of this change removes the BUG_ON from flush_cache_page() +by skipping the TLB flush when there is no context.  I also added code +to flush the TLB in flush_cache_mm() and flush_cache_range() when we +have a context that's not current.  Now all three routines handle TLB +flushes in a similar manner. + +Signed-off-by: John David Anglin +Cc: stable@vger.kernel.org # 4.9+ +Signed-off-by: Helge Deller +Signed-off-by: Greg Kroah-Hartman + +--- + arch/parisc/kernel/cache.c | 41 ++++++++++++++++++++++++++++++++--------- + 1 file changed, 32 insertions(+), 9 deletions(-) + +--- a/arch/parisc/kernel/cache.c ++++ b/arch/parisc/kernel/cache.c +@@ -543,7 +543,8 @@ void flush_cache_mm(struct mm_struct *mm + rp3440, etc. So, avoid it if the mm isn't too big. */ + if ((!IS_ENABLED(CONFIG_SMP) || !arch_irqs_disabled()) && + mm_total_size(mm) >= parisc_cache_flush_threshold) { +- flush_tlb_all(); ++ if (mm->context) ++ flush_tlb_all(); + flush_cache_all(); + return; + } +@@ -571,6 +572,8 @@ void flush_cache_mm(struct mm_struct *mm + pfn = pte_pfn(*ptep); + if (!pfn_valid(pfn)) + continue; ++ if (unlikely(mm->context)) ++ flush_tlb_page(vma, addr); + __flush_cache_page(vma, addr, PFN_PHYS(pfn)); + } + } +@@ -579,26 +582,46 @@ void flush_cache_mm(struct mm_struct *mm + void flush_cache_range(struct vm_area_struct *vma, + unsigned long start, unsigned long end) + { ++ pgd_t *pgd; ++ unsigned long addr; ++ + if ((!IS_ENABLED(CONFIG_SMP) || !arch_irqs_disabled()) && + end - start >= parisc_cache_flush_threshold) { +- flush_tlb_range(vma, start, end); ++ if (vma->vm_mm->context) ++ flush_tlb_range(vma, start, end); + flush_cache_all(); + return; + } + +- flush_user_dcache_range_asm(start, end); +- if (vma->vm_flags & VM_EXEC) +- flush_user_icache_range_asm(start, end); +- flush_tlb_range(vma, start, end); ++ if (vma->vm_mm->context == mfsp(3)) { ++ flush_user_dcache_range_asm(start, end); ++ if (vma->vm_flags & VM_EXEC) ++ flush_user_icache_range_asm(start, end); ++ flush_tlb_range(vma, start, end); ++ return; ++ } ++ ++ pgd = vma->vm_mm->pgd; ++ for (addr = vma->vm_start; addr < vma->vm_end; addr += PAGE_SIZE) { ++ unsigned long pfn; ++ pte_t *ptep = get_ptep(pgd, addr); ++ if (!ptep) ++ continue; ++ pfn = pte_pfn(*ptep); ++ if (pfn_valid(pfn)) { ++ if (unlikely(vma->vm_mm->context)) ++ flush_tlb_page(vma, addr); ++ __flush_cache_page(vma, addr, PFN_PHYS(pfn)); ++ } ++ } + } + + void + flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr, unsigned long pfn) + { +- BUG_ON(!vma->vm_mm->context); +- + if (pfn_valid(pfn)) { +- flush_tlb_page(vma, vmaddr); ++ if (likely(vma->vm_mm->context)) ++ flush_tlb_page(vma, vmaddr); + __flush_cache_page(vma, vmaddr, PFN_PHYS(pfn)); + } + } diff --git a/queue-4.14/rdmavt-fix-synchronization-around-percpu_ref.patch b/queue-4.14/rdmavt-fix-synchronization-around-percpu_ref.patch new file mode 100644 index 00000000000..8cf47ca8d44 --- /dev/null +++ b/queue-4.14/rdmavt-fix-synchronization-around-percpu_ref.patch @@ -0,0 +1,61 @@ +From 74b44bbe80b4c62113ac1501482ea1ee40eb9d67 Mon Sep 17 00:00:00 2001 +From: Tejun Heo +Date: Wed, 14 Mar 2018 12:10:18 -0700 +Subject: RDMAVT: Fix synchronization around percpu_ref + +From: Tejun Heo + +commit 74b44bbe80b4c62113ac1501482ea1ee40eb9d67 upstream. + +rvt_mregion uses percpu_ref for reference counting and RCU to protect +accesses from lkey_table. When a rvt_mregion needs to be freed, it +first gets unregistered from lkey_table and then rvt_check_refs() is +called to wait for in-flight usages before the rvt_mregion is freed. + +rvt_check_refs() seems to have a couple issues. + +* It has a fast exit path which tests percpu_ref_is_zero(). However, + a percpu_ref reading zero doesn't mean that the object can be + released. In fact, the ->release() callback might not even have + started executing yet. Proceeding with freeing can lead to + use-after-free. + +* lkey_table is RCU protected but there is no RCU grace period in the + free path. percpu_ref uses RCU internally but it's sched-RCU whose + grace periods are different from regular RCU. Also, it generally + isn't a good idea to depend on internal behaviors like this. + +To address the above issues, this patch removes the fast exit and adds +an explicit synchronize_rcu(). + +Signed-off-by: Tejun Heo +Acked-by: Dennis Dalessandro +Cc: Mike Marciniszyn +Cc: linux-rdma@vger.kernel.org +Cc: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/infiniband/sw/rdmavt/mr.c | 10 ++++++---- + 1 file changed, 6 insertions(+), 4 deletions(-) + +--- a/drivers/infiniband/sw/rdmavt/mr.c ++++ b/drivers/infiniband/sw/rdmavt/mr.c +@@ -489,11 +489,13 @@ static int rvt_check_refs(struct rvt_mre + unsigned long timeout; + struct rvt_dev_info *rdi = ib_to_rvt(mr->pd->device); + +- if (percpu_ref_is_zero(&mr->refcount)) +- return 0; +- /* avoid dma mr */ +- if (mr->lkey) ++ if (mr->lkey) { ++ /* avoid dma mr */ + rvt_dereg_clean_qps(mr); ++ /* @mr was indexed on rcu protected @lkey_table */ ++ synchronize_rcu(); ++ } ++ + timeout = wait_for_completion_timeout(&mr->comp, 5 * HZ); + if (!timeout) { + rvt_pr_err(rdi, diff --git a/queue-4.14/selftests-x86-add-tests-for-the-str-and-sldt-instructions.patch b/queue-4.14/selftests-x86-add-tests-for-the-str-and-sldt-instructions.patch new file mode 100644 index 00000000000..56f6389c45c --- /dev/null +++ b/queue-4.14/selftests-x86-add-tests-for-the-str-and-sldt-instructions.patch @@ -0,0 +1,100 @@ +From a9e017d5619eb371460c8e516f4684def62bef3a Mon Sep 17 00:00:00 2001 +From: Ricardo Neri +Date: Sun, 5 Nov 2017 18:27:57 -0800 +Subject: selftests/x86: Add tests for the STR and SLDT instructions + +From: Ricardo Neri + +commit a9e017d5619eb371460c8e516f4684def62bef3a upstream. + +The STR and SLDT instructions are not valid when running on virtual-8086 +mode and generate an invalid operand exception. These two instructions are +protected by the Intel User-Mode Instruction Prevention (UMIP) security +feature. In protected mode, if UMIP is enabled, these instructions generate +a general protection fault if called from CPL > 0. Linux traps the general +protection fault and emulates the instructions sgdt, sidt and smsw; but not +str and sldt. + +These tests are added to verify that the emulation code does not emulate +these two instructions but the expected invalid operand exception is +seen. + +Tests fallback to exit with INT3 in case emulation does happen. + +Signed-off-by: Ricardo Neri +Reviewed-by: Thomas Gleixner +Cc: Andrew Morton +Cc: Andy Lutomirski +Cc: Borislav Petkov +Cc: Borislav Petkov +Cc: Brian Gerst +Cc: Chen Yucong +Cc: Chris Metcalf +Cc: Dave Hansen +Cc: Denys Vlasenko +Cc: Fenghua Yu +Cc: H. Peter Anvin +Cc: Huang Rui +Cc: Jiri Slaby +Cc: Jonathan Corbet +Cc: Josh Poimboeuf +Cc: Linus Torvalds +Cc: Masami Hiramatsu +Cc: Michael S. Tsirkin +Cc: Paolo Bonzini +Cc: Paul Gortmaker +Cc: Peter Zijlstra +Cc: Ravi V. Shankar +Cc: Shuah Khan +Cc: Tony Luck +Cc: Vlastimil Babka +Cc: ricardo.neri@intel.com +Link: http://lkml.kernel.org/r/1509935277-22138-13-git-send-email-ricardo.neri-calderon@linux.intel.com +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + tools/testing/selftests/x86/entry_from_vm86.c | 18 +++++++++++++++++- + 1 file changed, 17 insertions(+), 1 deletion(-) + +--- a/tools/testing/selftests/x86/entry_from_vm86.c ++++ b/tools/testing/selftests/x86/entry_from_vm86.c +@@ -111,6 +111,11 @@ asm ( + "smsw %ax\n\t" + "mov %ax, (2080)\n\t" + "int3\n\t" ++ "vmcode_umip_str:\n\t" ++ "str %eax\n\t" ++ "vmcode_umip_sldt:\n\t" ++ "sldt %eax\n\t" ++ "int3\n\t" + ".size vmcode, . - vmcode\n\t" + "end_vmcode:\n\t" + ".code32\n\t" +@@ -119,7 +124,8 @@ asm ( + + extern unsigned char vmcode[], end_vmcode[]; + extern unsigned char vmcode_bound[], vmcode_sysenter[], vmcode_syscall[], +- vmcode_sti[], vmcode_int3[], vmcode_int80[], vmcode_umip[]; ++ vmcode_sti[], vmcode_int3[], vmcode_int80[], vmcode_umip[], ++ vmcode_umip_str[], vmcode_umip_sldt[]; + + /* Returns false if the test was skipped. */ + static bool do_test(struct vm86plus_struct *v86, unsigned long eip, +@@ -226,6 +232,16 @@ void do_umip_tests(struct vm86plus_struc + printf("[FAIL]\tAll the results of SIDT should be the same.\n"); + else + printf("[PASS]\tAll the results from SIDT are identical.\n"); ++ ++ sethandler(SIGILL, sighandler, 0); ++ do_test(vm86, vmcode_umip_str - vmcode, VM86_SIGNAL, 0, ++ "STR instruction"); ++ clearhandler(SIGILL); ++ ++ sethandler(SIGILL, sighandler, 0); ++ do_test(vm86, vmcode_umip_sldt - vmcode, VM86_SIGNAL, 0, ++ "SLDT instruction"); ++ clearhandler(SIGILL); + } + + int main(void) diff --git a/queue-4.14/selftests-x86-add-tests-for-user-mode-instruction-prevention.patch b/queue-4.14/selftests-x86-add-tests-for-user-mode-instruction-prevention.patch new file mode 100644 index 00000000000..96719a4bd89 --- /dev/null +++ b/queue-4.14/selftests-x86-add-tests-for-user-mode-instruction-prevention.patch @@ -0,0 +1,171 @@ +From 9390afebe1d3f5a0be18b1afdd0ce09d67cebf9e Mon Sep 17 00:00:00 2001 +From: Ricardo Neri +Date: Sun, 5 Nov 2017 18:27:56 -0800 +Subject: selftests/x86: Add tests for User-Mode Instruction Prevention + +From: Ricardo Neri + +commit 9390afebe1d3f5a0be18b1afdd0ce09d67cebf9e upstream. + +Certain user space programs that run on virtual-8086 mode may utilize +instructions protected by the User-Mode Instruction Prevention (UMIP) +security feature present in new Intel processors: SGDT, SIDT and SMSW. In +such a case, a general protection fault is issued if UMIP is enabled. When +such a fault happens, the kernel traps it and emulates the results of +these instructions with dummy values. The purpose of this new +test is to verify whether the impacted instructions can be executed +without causing such #GP. If no #GP exceptions occur, we expect to exit +virtual-8086 mode from INT3. + +The instructions protected by UMIP are executed in representative use +cases: + + a) displacement-only memory addressing + b) register-indirect memory addressing + c) results stored directly in operands + +Unfortunately, it is not possible to check the results against a set of +expected values because no emulation will occur in systems that do not +have the UMIP feature. Instead, results are printed for verification. A +simple verification is done to ensure that results of all tests are +identical. + +Signed-off-by: Ricardo Neri +Reviewed-by: Thomas Gleixner +Cc: Andrew Morton +Cc: Andy Lutomirski +Cc: Borislav Petkov +Cc: Borislav Petkov +Cc: Brian Gerst +Cc: Chen Yucong +Cc: Chris Metcalf +Cc: Dave Hansen +Cc: Denys Vlasenko +Cc: Fenghua Yu +Cc: H. Peter Anvin +Cc: Huang Rui +Cc: Jiri Slaby +Cc: Jonathan Corbet +Cc: Josh Poimboeuf +Cc: Linus Torvalds +Cc: Masami Hiramatsu +Cc: Michael S. Tsirkin +Cc: Paolo Bonzini +Cc: Paul Gortmaker +Cc: Peter Zijlstra +Cc: Ravi V. Shankar +Cc: Shuah Khan +Cc: Tony Luck +Cc: Vlastimil Babka +Cc: ricardo.neri@intel.com +Link: http://lkml.kernel.org/r/1509935277-22138-12-git-send-email-ricardo.neri-calderon@linux.intel.com +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + tools/testing/selftests/x86/entry_from_vm86.c | 73 +++++++++++++++++++++++++- + 1 file changed, 72 insertions(+), 1 deletion(-) + +--- a/tools/testing/selftests/x86/entry_from_vm86.c ++++ b/tools/testing/selftests/x86/entry_from_vm86.c +@@ -95,6 +95,22 @@ asm ( + "int3\n\t" + "vmcode_int80:\n\t" + "int $0x80\n\t" ++ "vmcode_umip:\n\t" ++ /* addressing via displacements */ ++ "smsw (2052)\n\t" ++ "sidt (2054)\n\t" ++ "sgdt (2060)\n\t" ++ /* addressing via registers */ ++ "mov $2066, %bx\n\t" ++ "smsw (%bx)\n\t" ++ "mov $2068, %bx\n\t" ++ "sidt (%bx)\n\t" ++ "mov $2074, %bx\n\t" ++ "sgdt (%bx)\n\t" ++ /* register operands, only for smsw */ ++ "smsw %ax\n\t" ++ "mov %ax, (2080)\n\t" ++ "int3\n\t" + ".size vmcode, . - vmcode\n\t" + "end_vmcode:\n\t" + ".code32\n\t" +@@ -103,7 +119,7 @@ asm ( + + extern unsigned char vmcode[], end_vmcode[]; + extern unsigned char vmcode_bound[], vmcode_sysenter[], vmcode_syscall[], +- vmcode_sti[], vmcode_int3[], vmcode_int80[]; ++ vmcode_sti[], vmcode_int3[], vmcode_int80[], vmcode_umip[]; + + /* Returns false if the test was skipped. */ + static bool do_test(struct vm86plus_struct *v86, unsigned long eip, +@@ -160,6 +176,58 @@ static bool do_test(struct vm86plus_stru + return true; + } + ++void do_umip_tests(struct vm86plus_struct *vm86, unsigned char *test_mem) ++{ ++ struct table_desc { ++ unsigned short limit; ++ unsigned long base; ++ } __attribute__((packed)); ++ ++ /* Initialize variables with arbitrary values */ ++ struct table_desc gdt1 = { .base = 0x3c3c3c3c, .limit = 0x9999 }; ++ struct table_desc gdt2 = { .base = 0x1a1a1a1a, .limit = 0xaeae }; ++ struct table_desc idt1 = { .base = 0x7b7b7b7b, .limit = 0xf1f1 }; ++ struct table_desc idt2 = { .base = 0x89898989, .limit = 0x1313 }; ++ unsigned short msw1 = 0x1414, msw2 = 0x2525, msw3 = 3737; ++ ++ /* UMIP -- exit with INT3 unless kernel emulation did not trap #GP */ ++ do_test(vm86, vmcode_umip - vmcode, VM86_TRAP, 3, "UMIP tests"); ++ ++ /* Results from displacement-only addressing */ ++ msw1 = *(unsigned short *)(test_mem + 2052); ++ memcpy(&idt1, test_mem + 2054, sizeof(idt1)); ++ memcpy(&gdt1, test_mem + 2060, sizeof(gdt1)); ++ ++ /* Results from register-indirect addressing */ ++ msw2 = *(unsigned short *)(test_mem + 2066); ++ memcpy(&idt2, test_mem + 2068, sizeof(idt2)); ++ memcpy(&gdt2, test_mem + 2074, sizeof(gdt2)); ++ ++ /* Results when using register operands */ ++ msw3 = *(unsigned short *)(test_mem + 2080); ++ ++ printf("[INFO]\tResult from SMSW:[0x%04x]\n", msw1); ++ printf("[INFO]\tResult from SIDT: limit[0x%04x]base[0x%08lx]\n", ++ idt1.limit, idt1.base); ++ printf("[INFO]\tResult from SGDT: limit[0x%04x]base[0x%08lx]\n", ++ gdt1.limit, gdt1.base); ++ ++ if (msw1 != msw2 || msw1 != msw3) ++ printf("[FAIL]\tAll the results of SMSW should be the same.\n"); ++ else ++ printf("[PASS]\tAll the results from SMSW are identical.\n"); ++ ++ if (memcmp(&gdt1, &gdt2, sizeof(gdt1))) ++ printf("[FAIL]\tAll the results of SGDT should be the same.\n"); ++ else ++ printf("[PASS]\tAll the results from SGDT are identical.\n"); ++ ++ if (memcmp(&idt1, &idt2, sizeof(idt1))) ++ printf("[FAIL]\tAll the results of SIDT should be the same.\n"); ++ else ++ printf("[PASS]\tAll the results from SIDT are identical.\n"); ++} ++ + int main(void) + { + struct vm86plus_struct v86; +@@ -218,6 +286,9 @@ int main(void) + v86.regs.eax = (unsigned int)-1; + do_test(&v86, vmcode_int80 - vmcode, VM86_INTx, 0x80, "int80"); + ++ /* UMIP -- should exit with INTx 0x80 unless UMIP was not disabled */ ++ do_umip_tests(&v86, addr); ++ + /* Execute a null pointer */ + v86.regs.cs = 0; + v86.regs.ss = 0; diff --git a/queue-4.14/selftests-x86-entry_from_vm86-add-test-cases-for-popf.patch b/queue-4.14/selftests-x86-entry_from_vm86-add-test-cases-for-popf.patch new file mode 100644 index 00000000000..c99594537de --- /dev/null +++ b/queue-4.14/selftests-x86-entry_from_vm86-add-test-cases-for-popf.patch @@ -0,0 +1,108 @@ +From 78393fdde2a456cafa414b171c90f26a3df98b20 Mon Sep 17 00:00:00 2001 +From: Andy Lutomirski +Date: Tue, 13 Mar 2018 22:03:11 -0700 +Subject: selftests/x86/entry_from_vm86: Add test cases for POPF + +From: Andy Lutomirski + +commit 78393fdde2a456cafa414b171c90f26a3df98b20 upstream. + +POPF is currently broken -- add tests to catch the error. This +results in: + + [RUN] POPF with VIP set and IF clear from vm86 mode + [INFO] Exited vm86 mode due to STI + [FAIL] Incorrect return reason (started at eip = 0xd, ended at eip = 0xf) + +because POPF currently fails to check IF before reporting a pending +interrupt. + +This patch also makes the FAIL message a bit more informative. + +Reported-by: Bart Oldeman +Signed-off-by: Andy Lutomirski +Cc: Borislav Petkov +Cc: Brian Gerst +Cc: Denys Vlasenko +Cc: H. Peter Anvin +Cc: Josh Poimboeuf +Cc: Linus Torvalds +Cc: Peter Zijlstra +Cc: Stas Sergeev +Cc: Thomas Gleixner +Cc: stable@vger.kernel.org +Link: http://lkml.kernel.org/r/a16270b5cfe7832d6d00c479d0f871066cbdb52b.1521003603.git.luto@kernel.org +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + tools/testing/selftests/x86/entry_from_vm86.c | 30 +++++++++++++++++++++++--- + 1 file changed, 27 insertions(+), 3 deletions(-) + +--- a/tools/testing/selftests/x86/entry_from_vm86.c ++++ b/tools/testing/selftests/x86/entry_from_vm86.c +@@ -95,6 +95,10 @@ asm ( + "int3\n\t" + "vmcode_int80:\n\t" + "int $0x80\n\t" ++ "vmcode_popf_hlt:\n\t" ++ "push %ax\n\t" ++ "popf\n\t" ++ "hlt\n\t" + "vmcode_umip:\n\t" + /* addressing via displacements */ + "smsw (2052)\n\t" +@@ -124,8 +128,8 @@ asm ( + + extern unsigned char vmcode[], end_vmcode[]; + extern unsigned char vmcode_bound[], vmcode_sysenter[], vmcode_syscall[], +- vmcode_sti[], vmcode_int3[], vmcode_int80[], vmcode_umip[], +- vmcode_umip_str[], vmcode_umip_sldt[]; ++ vmcode_sti[], vmcode_int3[], vmcode_int80[], vmcode_popf_hlt[], ++ vmcode_umip[], vmcode_umip_str[], vmcode_umip_sldt[]; + + /* Returns false if the test was skipped. */ + static bool do_test(struct vm86plus_struct *v86, unsigned long eip, +@@ -175,7 +179,7 @@ static bool do_test(struct vm86plus_stru + (VM86_TYPE(ret) == rettype && VM86_ARG(ret) == retarg)) { + printf("[OK]\tReturned correctly\n"); + } else { +- printf("[FAIL]\tIncorrect return reason\n"); ++ printf("[FAIL]\tIncorrect return reason (started at eip = 0x%lx, ended at eip = 0x%lx)\n", eip, v86->regs.eip); + nerrs++; + } + +@@ -264,6 +268,9 @@ int main(void) + v86.regs.ds = load_addr / 16; + v86.regs.es = load_addr / 16; + ++ /* Use the end of the page as our stack. */ ++ v86.regs.esp = 4096; ++ + assert((v86.regs.cs & 3) == 0); /* Looks like RPL = 0 */ + + /* #BR -- should deliver SIG??? */ +@@ -295,6 +302,23 @@ int main(void) + v86.regs.eflags &= ~X86_EFLAGS_IF; + do_test(&v86, vmcode_sti - vmcode, VM86_STI, 0, "STI with VIP set"); + ++ /* POPF with VIP set but IF clear: should not trap */ ++ v86.regs.eflags = X86_EFLAGS_VIP; ++ v86.regs.eax = 0; ++ do_test(&v86, vmcode_popf_hlt - vmcode, VM86_UNKNOWN, 0, "POPF with VIP set and IF clear"); ++ ++ /* POPF with VIP set and IF set: should trap */ ++ v86.regs.eflags = X86_EFLAGS_VIP; ++ v86.regs.eax = X86_EFLAGS_IF; ++ do_test(&v86, vmcode_popf_hlt - vmcode, VM86_STI, 0, "POPF with VIP and IF set"); ++ ++ /* POPF with VIP clear and IF set: should not trap */ ++ v86.regs.eflags = 0; ++ v86.regs.eax = X86_EFLAGS_IF; ++ do_test(&v86, vmcode_popf_hlt - vmcode, VM86_UNKNOWN, 0, "POPF with VIP clear and IF set"); ++ ++ v86.regs.eflags = 0; ++ + /* INT3 -- should cause #BP */ + do_test(&v86, vmcode_int3 - vmcode, VM86_TRAP, 3, "INT3"); + diff --git a/queue-4.14/selftests-x86-entry_from_vm86-exit-with-1-if-we-fail.patch b/queue-4.14/selftests-x86-entry_from_vm86-exit-with-1-if-we-fail.patch new file mode 100644 index 00000000000..a66691bb9ce --- /dev/null +++ b/queue-4.14/selftests-x86-entry_from_vm86-exit-with-1-if-we-fail.patch @@ -0,0 +1,43 @@ +From 327d53d005ca47b10eae940616ed11c569f75a9b Mon Sep 17 00:00:00 2001 +From: Andy Lutomirski +Date: Tue, 13 Mar 2018 22:03:10 -0700 +Subject: selftests/x86/entry_from_vm86: Exit with 1 if we fail + +From: Andy Lutomirski + +commit 327d53d005ca47b10eae940616ed11c569f75a9b upstream. + +Fix a logic error that caused the test to exit with 0 even if test +cases failed. + +Signed-off-by: Andy Lutomirski +Cc: Borislav Petkov +Cc: Brian Gerst +Cc: Denys Vlasenko +Cc: H. Peter Anvin +Cc: Josh Poimboeuf +Cc: Linus Torvalds +Cc: Peter Zijlstra +Cc: Stas Sergeev +Cc: Thomas Gleixner +Cc: bartoldeman@gmail.com +Cc: stable@vger.kernel.org +Link: http://lkml.kernel.org/r/b1cc37144038958a469c8f70a5f47a6a5638636a.1521003603.git.luto@kernel.org +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + tools/testing/selftests/x86/entry_from_vm86.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/tools/testing/selftests/x86/entry_from_vm86.c ++++ b/tools/testing/selftests/x86/entry_from_vm86.c +@@ -231,7 +231,7 @@ int main(void) + clearhandler(SIGSEGV); + + /* Make sure nothing explodes if we fork. */ +- if (fork() > 0) ++ if (fork() == 0) + return 0; + + return (nerrs == 0 ? 0 : 1); diff --git a/queue-4.14/series b/queue-4.14/series new file mode 100644 index 00000000000..67c037cbb19 --- /dev/null +++ b/queue-4.14/series @@ -0,0 +1,28 @@ +x86-cpufeatures-add-intel-total-memory-encryption-cpufeature.patch +x86-cpufeatures-add-intel-pconfig-cpufeature.patch +selftests-x86-entry_from_vm86-exit-with-1-if-we-fail.patch +selftests-x86-add-tests-for-user-mode-instruction-prevention.patch +selftests-x86-add-tests-for-the-str-and-sldt-instructions.patch +selftests-x86-entry_from_vm86-add-test-cases-for-popf.patch +x86-vm86-32-fix-popf-emulation.patch +x86-speculation-objtool-annotate-indirect-calls-jumps-for-objtool-on-32-bit-kernels.patch +x86-speculation-remove-skylake-c2-from-speculation-control-microcode-blacklist.patch +kvm-x86-fix-device-passthrough-when-sme-is-active.patch +x86-mm-fix-vmalloc_fault-to-use-pxd_large.patch +parisc-handle-case-where-flush_cache_range-is-called-with-no-context.patch +alsa-pcm-fix-uaf-in-snd_pcm_oss_get_formats.patch +alsa-hda-revert-power_save-option-default-value.patch +alsa-seq-fix-possible-uaf-in-snd_seq_check_queue.patch +alsa-seq-clear-client-entry-before-deleting-else-at-closing.patch +drm-nouveau-bl-fix-oops-on-driver-unbind.patch +drm-amdgpu-fix-prime-teardown-order.patch +drm-radeon-fix-prime-teardown-order.patch +drm-amdgpu-dce-don-t-turn-off-dp-sink-when-disconnected.patch +fs-teach-path_connected-to-handle-nfs-filesystems-with-multiple-roots.patch +kvm-arm-arm64-reduce-verbosity-of-kvm-init-log.patch +kvm-arm-arm64-vgic-v3-tighten-synchronization-for-guests-using-v2-on-v3.patch +kvm-arm-arm64-vgic-don-t-populate-multiple-lrs-with-the-same-vintid.patch +lock_parent-needs-to-recheck-if-dentry-got-__dentry_kill-ed-under-it.patch +fs-aio-add-explicit-rcu-grace-period-when-freeing-kioctx.patch +fs-aio-use-rcu-accessors-for-kioctx_table-table.patch +rdmavt-fix-synchronization-around-percpu_ref.patch diff --git a/queue-4.14/x86-cpufeatures-add-intel-pconfig-cpufeature.patch b/queue-4.14/x86-cpufeatures-add-intel-pconfig-cpufeature.patch new file mode 100644 index 00000000000..3eb8813d01c --- /dev/null +++ b/queue-4.14/x86-cpufeatures-add-intel-pconfig-cpufeature.patch @@ -0,0 +1,37 @@ +From 7958b2246fadf54b7ff820a2a5a2c5ca1554716f Mon Sep 17 00:00:00 2001 +From: "Kirill A. Shutemov" +Date: Mon, 5 Mar 2018 19:25:51 +0300 +Subject: x86/cpufeatures: Add Intel PCONFIG cpufeature + +From: Kirill A. Shutemov + +commit 7958b2246fadf54b7ff820a2a5a2c5ca1554716f upstream. + +CPUID.0x7.0x0:EDX[18] indicates whether Intel CPU support PCONFIG instruction. + +Signed-off-by: Kirill A. Shutemov +Cc: Dave Hansen +Cc: Kai Huang +Cc: Linus Torvalds +Cc: Peter Zijlstra +Cc: Thomas Gleixner +Cc: Tom Lendacky +Cc: linux-mm@kvack.org +Link: http://lkml.kernel.org/r/20180305162610.37510-4-kirill.shutemov@linux.intel.com +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/include/asm/cpufeatures.h | 1 + + 1 file changed, 1 insertion(+) + +--- a/arch/x86/include/asm/cpufeatures.h ++++ b/arch/x86/include/asm/cpufeatures.h +@@ -327,6 +327,7 @@ + /* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */ + #define X86_FEATURE_AVX512_4VNNIW (18*32+ 2) /* AVX-512 Neural Network Instructions */ + #define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */ ++#define X86_FEATURE_PCONFIG (18*32+18) /* Intel PCONFIG */ + #define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */ + #define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */ + #define X86_FEATURE_ARCH_CAPABILITIES (18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */ diff --git a/queue-4.14/x86-cpufeatures-add-intel-total-memory-encryption-cpufeature.patch b/queue-4.14/x86-cpufeatures-add-intel-total-memory-encryption-cpufeature.patch new file mode 100644 index 00000000000..d3d38ba850f --- /dev/null +++ b/queue-4.14/x86-cpufeatures-add-intel-total-memory-encryption-cpufeature.patch @@ -0,0 +1,38 @@ +From 1da961d72ab0cfbe8b7c26cba731dc2bb6b9494b Mon Sep 17 00:00:00 2001 +From: "Kirill A. Shutemov" +Date: Mon, 5 Mar 2018 19:25:49 +0300 +Subject: x86/cpufeatures: Add Intel Total Memory Encryption cpufeature + +From: Kirill A. Shutemov + +commit 1da961d72ab0cfbe8b7c26cba731dc2bb6b9494b upstream. + +CPUID.0x7.0x0:ECX[13] indicates whether CPU supports Intel Total Memory +Encryption. + +Signed-off-by: Kirill A. Shutemov +Cc: Dave Hansen +Cc: Kai Huang +Cc: Linus Torvalds +Cc: Peter Zijlstra +Cc: Thomas Gleixner +Cc: Tom Lendacky +Cc: linux-mm@kvack.org +Link: http://lkml.kernel.org/r/20180305162610.37510-2-kirill.shutemov@linux.intel.com +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/include/asm/cpufeatures.h | 1 + + 1 file changed, 1 insertion(+) + +--- a/arch/x86/include/asm/cpufeatures.h ++++ b/arch/x86/include/asm/cpufeatures.h +@@ -314,6 +314,7 @@ + #define X86_FEATURE_VPCLMULQDQ (16*32+10) /* Carry-Less Multiplication Double Quadword */ + #define X86_FEATURE_AVX512_VNNI (16*32+11) /* Vector Neural Network Instructions */ + #define X86_FEATURE_AVX512_BITALG (16*32+12) /* Support for VPOPCNT[B,W] and VPSHUF-BITQMB instructions */ ++#define X86_FEATURE_TME (16*32+13) /* Intel Total Memory Encryption */ + #define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14) /* POPCNT for vectors of DW/QW */ + #define X86_FEATURE_LA57 (16*32+16) /* 5-level page tables */ + #define X86_FEATURE_RDPID (16*32+22) /* RDPID instruction */ diff --git a/queue-4.14/x86-mm-fix-vmalloc_fault-to-use-pxd_large.patch b/queue-4.14/x86-mm-fix-vmalloc_fault-to-use-pxd_large.patch new file mode 100644 index 00000000000..19f786d64ab --- /dev/null +++ b/queue-4.14/x86-mm-fix-vmalloc_fault-to-use-pxd_large.patch @@ -0,0 +1,59 @@ +From 18a955219bf7d9008ce480d4451b6b8bf4483a22 Mon Sep 17 00:00:00 2001 +From: Toshi Kani +Date: Tue, 13 Mar 2018 11:03:46 -0600 +Subject: x86/mm: Fix vmalloc_fault to use pXd_large + +From: Toshi Kani + +commit 18a955219bf7d9008ce480d4451b6b8bf4483a22 upstream. + +Gratian Crisan reported that vmalloc_fault() crashes when CONFIG_HUGETLBFS +is not set since the function inadvertently uses pXn_huge(), which always +return 0 in this case. ioremap() does not depend on CONFIG_HUGETLBFS. + +Fix vmalloc_fault() to call pXd_large() instead. + +Fixes: f4eafd8bcd52 ("x86/mm: Fix vmalloc_fault() to handle large pages properly") +Reported-by: Gratian Crisan +Signed-off-by: Toshi Kani +Signed-off-by: Thomas Gleixner +Cc: stable@vger.kernel.org +Cc: linux-mm@kvack.org +Cc: Borislav Petkov +Cc: Andy Lutomirski +Link: https://lkml.kernel.org/r/20180313170347.3829-2-toshi.kani@hpe.com +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/mm/fault.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +--- a/arch/x86/mm/fault.c ++++ b/arch/x86/mm/fault.c +@@ -330,7 +330,7 @@ static noinline int vmalloc_fault(unsign + if (!pmd_k) + return -1; + +- if (pmd_huge(*pmd_k)) ++ if (pmd_large(*pmd_k)) + return 0; + + pte_k = pte_offset_kernel(pmd_k, address); +@@ -479,7 +479,7 @@ static noinline int vmalloc_fault(unsign + if (pud_none(*pud) || pud_pfn(*pud) != pud_pfn(*pud_ref)) + BUG(); + +- if (pud_huge(*pud)) ++ if (pud_large(*pud)) + return 0; + + pmd = pmd_offset(pud, address); +@@ -490,7 +490,7 @@ static noinline int vmalloc_fault(unsign + if (pmd_none(*pmd) || pmd_pfn(*pmd) != pmd_pfn(*pmd_ref)) + BUG(); + +- if (pmd_huge(*pmd)) ++ if (pmd_large(*pmd)) + return 0; + + pte_ref = pte_offset_kernel(pmd_ref, address); diff --git a/queue-4.14/x86-speculation-objtool-annotate-indirect-calls-jumps-for-objtool-on-32-bit-kernels.patch b/queue-4.14/x86-speculation-objtool-annotate-indirect-calls-jumps-for-objtool-on-32-bit-kernels.patch new file mode 100644 index 00000000000..5449e5421ab --- /dev/null +++ b/queue-4.14/x86-speculation-objtool-annotate-indirect-calls-jumps-for-objtool-on-32-bit-kernels.patch @@ -0,0 +1,54 @@ +From a14bff131108faf50cc0cf864589fd71ee216c96 Mon Sep 17 00:00:00 2001 +From: Andy Whitcroft +Date: Wed, 14 Mar 2018 11:24:27 +0000 +Subject: x86/speculation, objtool: Annotate indirect calls/jumps for objtool on 32-bit kernels + +From: Andy Whitcroft + +commit a14bff131108faf50cc0cf864589fd71ee216c96 upstream. + +In the following commit: + + 9e0e3c5130e9 ("x86/speculation, objtool: Annotate indirect calls/jumps for objtool") + +... we added annotations for CALL_NOSPEC/JMP_NOSPEC on 64-bit x86 kernels, +but we did not annotate the 32-bit path. + +Annotate it similarly. + +Signed-off-by: Andy Whitcroft +Acked-by: Peter Zijlstra (Intel) +Cc: Andy Lutomirski +Cc: Arjan van de Ven +Cc: Borislav Petkov +Cc: Dan Williams +Cc: Dave Hansen +Cc: David Woodhouse +Cc: David Woodhouse +Cc: Greg Kroah-Hartman +Cc: Josh Poimboeuf +Cc: Linus Torvalds +Cc: Peter Zijlstra +Cc: Thomas Gleixner +Link: http://lkml.kernel.org/r/20180314112427.22351-1-apw@canonical.com +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/include/asm/nospec-branch.h | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +--- a/arch/x86/include/asm/nospec-branch.h ++++ b/arch/x86/include/asm/nospec-branch.h +@@ -183,7 +183,10 @@ + * otherwise we'll run out of registers. We don't care about CET + * here, anyway. + */ +-# define CALL_NOSPEC ALTERNATIVE("call *%[thunk_target]\n", \ ++# define CALL_NOSPEC \ ++ ALTERNATIVE( \ ++ ANNOTATE_RETPOLINE_SAFE \ ++ "call *%[thunk_target]\n", \ + " jmp 904f;\n" \ + " .align 16\n" \ + "901: call 903f;\n" \ diff --git a/queue-4.14/x86-speculation-remove-skylake-c2-from-speculation-control-microcode-blacklist.patch b/queue-4.14/x86-speculation-remove-skylake-c2-from-speculation-control-microcode-blacklist.patch new file mode 100644 index 00000000000..6350ac1e509 --- /dev/null +++ b/queue-4.14/x86-speculation-remove-skylake-c2-from-speculation-control-microcode-blacklist.patch @@ -0,0 +1,45 @@ +From e3b3121fa8da94cb20f9e0c64ab7981ae47fd085 Mon Sep 17 00:00:00 2001 +From: Alexander Sergeyev +Date: Tue, 13 Mar 2018 22:38:56 +0300 +Subject: x86/speculation: Remove Skylake C2 from Speculation Control microcode blacklist + +From: Alexander Sergeyev + +commit e3b3121fa8da94cb20f9e0c64ab7981ae47fd085 upstream. + +In accordance with Intel's microcode revision guidance from March 6 MCU +rev 0xc2 is cleared on both Skylake H/S and Skylake Xeon E3 processors +that share CPUID 506E3. + +Signed-off-by: Alexander Sergeyev +Signed-off-by: Thomas Gleixner +Cc: Jia Zhang +Cc: Greg Kroah-Hartman +Cc: Kyle Huey +Cc: David Woodhouse +Link: https://lkml.kernel.org/r/20180313193856.GA8580@localhost.localdomain +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/kernel/cpu/intel.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +--- a/arch/x86/kernel/cpu/intel.c ++++ b/arch/x86/kernel/cpu/intel.c +@@ -105,7 +105,7 @@ static void probe_xeon_phi_r3mwait(struc + /* + * Early microcode releases for the Spectre v2 mitigation were broken. + * Information taken from; +- * - https://newsroom.intel.com/wp-content/uploads/sites/11/2018/01/microcode-update-guidance.pdf ++ * - https://newsroom.intel.com/wp-content/uploads/sites/11/2018/03/microcode-update-guidance.pdf + * - https://kb.vmware.com/s/article/52345 + * - Microcode revisions observed in the wild + * - Release note from 20180108 microcode release +@@ -123,7 +123,6 @@ static const struct sku_microcode spectr + { INTEL_FAM6_KABYLAKE_MOBILE, 0x09, 0x80 }, + { INTEL_FAM6_SKYLAKE_X, 0x03, 0x0100013e }, + { INTEL_FAM6_SKYLAKE_X, 0x04, 0x0200003c }, +- { INTEL_FAM6_SKYLAKE_DESKTOP, 0x03, 0xc2 }, + { INTEL_FAM6_BROADWELL_CORE, 0x04, 0x28 }, + { INTEL_FAM6_BROADWELL_GT3E, 0x01, 0x1b }, + { INTEL_FAM6_BROADWELL_XEON_D, 0x02, 0x14 }, diff --git a/queue-4.14/x86-vm86-32-fix-popf-emulation.patch b/queue-4.14/x86-vm86-32-fix-popf-emulation.patch new file mode 100644 index 00000000000..2eb11bc4371 --- /dev/null +++ b/queue-4.14/x86-vm86-32-fix-popf-emulation.patch @@ -0,0 +1,44 @@ +From b5069782453459f6ec1fdeb495d9901a4545fcb5 Mon Sep 17 00:00:00 2001 +From: Andy Lutomirski +Date: Tue, 13 Mar 2018 22:03:12 -0700 +Subject: x86/vm86/32: Fix POPF emulation + +From: Andy Lutomirski + +commit b5069782453459f6ec1fdeb495d9901a4545fcb5 upstream. + +POPF would trap if VIP was set regardless of whether IF was set. Fix it. + +Suggested-by: Stas Sergeev +Reported-by: Bart Oldeman +Signed-off-by: Andy Lutomirski +Cc: Borislav Petkov +Cc: Brian Gerst +Cc: Denys Vlasenko +Cc: H. Peter Anvin +Cc: Josh Poimboeuf +Cc: Linus Torvalds +Cc: Peter Zijlstra +Cc: Thomas Gleixner +Cc: stable@vger.kernel.org +Fixes: 5ed92a8ab71f ("x86/vm86: Use the normal pt_regs area for vm86") +Link: http://lkml.kernel.org/r/ce95f40556e7b2178b6bc06ee9557827ff94bd28.1521003603.git.luto@kernel.org +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/kernel/vm86_32.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/arch/x86/kernel/vm86_32.c ++++ b/arch/x86/kernel/vm86_32.c +@@ -727,7 +727,8 @@ void handle_vm86_fault(struct kernel_vm8 + return; + + check_vip: +- if (VEFLAGS & X86_EFLAGS_VIP) { ++ if ((VEFLAGS & (X86_EFLAGS_VIP | X86_EFLAGS_VIF)) == ++ (X86_EFLAGS_VIP | X86_EFLAGS_VIF)) { + save_v86_state(regs, VM86_STI); + return; + }