From: Greg Kroah-Hartman Date: Mon, 26 Jun 2017 06:15:25 +0000 (+0200) Subject: 4.11-stable patches X-Git-Tag: v3.18.59~33 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=d44df69b6f5f15a470c38d7e446d7a0cbc83ad22;p=thirdparty%2Fkernel%2Fstable-queue.git 4.11-stable patches added patches: alsa-firewire-lib-fix-stall-of-process-context-at-packet-error.patch alsa-hda-add-coffelake-pci-id.patch alsa-hda-apply-quirks-to-broxton-t-too.patch alsa-pcm-don-t-treat-null-chmap-as-a-fatal-error.patch autofs-sanity-check-status-reported-with-autofs_dev_ioctl_fail.patch cifs-fix-some-return-values-in-case-of-error-in-crypt_message.patch cifs-improve-readdir-verbosity.patch clk-sunxi-ng-a31-correct-lcd1-ch1-clock-register-offset.patch clk-sunxi-ng-sun5i-fix-ahb_bist_clk-definition.patch clk-sunxi-ng-v3s-fix-usb-otg-device-reset-bit.patch cxgb4-notify-up-to-route-ctrlq-compl-to-rdma-rspq.patch fs-dax.c-fix-inefficiency-in-dax_writeback_mapping_range.patch fs-exec.c-account-for-argv-envp-pointers.patch hid-add-quirk-for-dell-pixart-oem-mouse.patch kvm-mips-fix-maybe-uninitialized-build-failure.patch kvm-ppc-book3s-hv-context-switch-ebb-registers-properly.patch kvm-ppc-book3s-hv-cope-with-host-using-large-decrementer-mode.patch kvm-ppc-book3s-hv-ignore-timebase-offset-on-power9-dd1.patch kvm-ppc-book3s-hv-preserve-userspace-htm-state-properly.patch kvm-ppc-book3s-hv-restore-critical-sprs-to-host-values-on-guest-exit.patch kvm-ppc-book3s-hv-save-restore-host-values-of-debug-registers.patch kvm-s390-gaccess-fix-real-space-designation-asce-handling-for-gmap-shadows.patch kvm-x86-fix-singlestepping-over-syscall.patch lib-cmdline.c-fix-get_options-overflow-while-parsing-ranges.patch perf-probe-fix-probe-definition-for-inlined-functions.patch perf-x86-intel-add-1g-dtlb-load-store-miss-support-for-skl.patch powerpc-perf-fix-oops-when-kthread-execs-user-process.patch random-silence-compiler-warnings-and-fix-race.patch signal-only-reschedule-timers-on-signals-timers-have-sent.patch xen-blkback-don-t-leak-stack-data-via-response-ring.patch xen-blkback-fix-disconnect-while-i-os-in-flight.patch --- diff --git a/queue-4.11/alsa-firewire-lib-fix-stall-of-process-context-at-packet-error.patch b/queue-4.11/alsa-firewire-lib-fix-stall-of-process-context-at-packet-error.patch new file mode 100644 index 00000000000..a1b13ff725b --- /dev/null +++ b/queue-4.11/alsa-firewire-lib-fix-stall-of-process-context-at-packet-error.patch @@ -0,0 +1,148 @@ +From 4a9bfafc64f44ef83de4e00ca1b57352af6cd8c2 Mon Sep 17 00:00:00 2001 +From: Takashi Sakamoto +Date: Sun, 11 Jun 2017 16:08:21 +0900 +Subject: ALSA: firewire-lib: Fix stall of process context at packet error + +From: Takashi Sakamoto + +commit 4a9bfafc64f44ef83de4e00ca1b57352af6cd8c2 upstream. + +At Linux v3.5, packet processing can be done in process context of ALSA +PCM application as well as software IRQ context for OHCI 1394. Below is +an example of the callgraph (some calls are omitted). + +ioctl(2) with e.g. HWSYNC +(sound/core/pcm_native.c) +->snd_pcm_common_ioctl1() + ->snd_pcm_hwsync() + ->snd_pcm_stream_lock_irq + (sound/core/pcm_lib.c) + ->snd_pcm_update_hw_ptr() + ->snd_pcm_udpate_hw_ptr0() + ->struct snd_pcm_ops.pointer() + (sound/firewire/*) + = Each handler on drivers in ALSA firewire stack + (sound/firewire/amdtp-stream.c) + ->amdtp_stream_pcm_pointer() + (drivers/firewire/core-iso.c) + ->fw_iso_context_flush_completions() + ->struct fw_card_driver.flush_iso_completion() + (drivers/firewire/ohci.c) + = flush_iso_completions() + ->struct fw_iso_context.callback.sc + (sound/firewire/amdtp-stream.c) + = in_stream_callback() or out_stream_callback() + ->... + ->snd_pcm_stream_unlock_irq + +When packet queueing error occurs or detecting invalid packets in +'in_stream_callback()' or 'out_stream_callback()', 'snd_pcm_stop_xrun()' +is called on local CPU with disabled IRQ. + +(sound/firewire/amdtp-stream.c) +in_stream_callback() or out_stream_callback() +->amdtp_stream_pcm_abort() + ->snd_pcm_stop_xrun() + ->snd_pcm_stream_lock_irqsave() + ->snd_pcm_stop() + ->snd_pcm_stream_unlock_irqrestore() + +The process is stalled on the CPU due to attempt to acquire recursive lock. + +[ 562.630853] INFO: rcu_sched detected stalls on CPUs/tasks: +[ 562.630861] 2-...: (1 GPs behind) idle=37d/140000000000000/0 softirq=38323/38323 fqs=7140 +[ 562.630862] (detected by 3, t=15002 jiffies, g=21036, c=21035, q=5933) +[ 562.630866] Task dump for CPU 2: +[ 562.630867] alsa-source-OXF R running task 0 6619 1 0x00000008 +[ 562.630870] Call Trace: +[ 562.630876] ? vt_console_print+0x79/0x3e0 +[ 562.630880] ? msg_print_text+0x9d/0x100 +[ 562.630883] ? up+0x32/0x50 +[ 562.630885] ? irq_work_queue+0x8d/0xa0 +[ 562.630886] ? console_unlock+0x2b6/0x4b0 +[ 562.630888] ? vprintk_emit+0x312/0x4a0 +[ 562.630892] ? dev_vprintk_emit+0xbf/0x230 +[ 562.630895] ? do_sys_poll+0x37a/0x550 +[ 562.630897] ? dev_printk_emit+0x4e/0x70 +[ 562.630900] ? __dev_printk+0x3c/0x80 +[ 562.630903] ? _raw_spin_lock+0x20/0x30 +[ 562.630909] ? snd_pcm_stream_lock+0x31/0x50 [snd_pcm] +[ 562.630914] ? _snd_pcm_stream_lock_irqsave+0x2e/0x40 [snd_pcm] +[ 562.630918] ? snd_pcm_stop_xrun+0x16/0x70 [snd_pcm] +[ 562.630922] ? in_stream_callback+0x3e6/0x450 [snd_firewire_lib] +[ 562.630925] ? handle_ir_packet_per_buffer+0x8e/0x1a0 [firewire_ohci] +[ 562.630928] ? ohci_flush_iso_completions+0xa3/0x130 [firewire_ohci] +[ 562.630932] ? fw_iso_context_flush_completions+0x15/0x20 [firewire_core] +[ 562.630935] ? amdtp_stream_pcm_pointer+0x2d/0x40 [snd_firewire_lib] +[ 562.630938] ? pcm_capture_pointer+0x19/0x20 [snd_oxfw] +[ 562.630943] ? snd_pcm_update_hw_ptr0+0x47/0x3d0 [snd_pcm] +[ 562.630945] ? poll_select_copy_remaining+0x150/0x150 +[ 562.630947] ? poll_select_copy_remaining+0x150/0x150 +[ 562.630952] ? snd_pcm_update_hw_ptr+0x10/0x20 [snd_pcm] +[ 562.630956] ? snd_pcm_hwsync+0x45/0xb0 [snd_pcm] +[ 562.630960] ? snd_pcm_common_ioctl1+0x1ff/0xc90 [snd_pcm] +[ 562.630962] ? futex_wake+0x90/0x170 +[ 562.630966] ? snd_pcm_capture_ioctl1+0x136/0x260 [snd_pcm] +[ 562.630970] ? snd_pcm_capture_ioctl+0x27/0x40 [snd_pcm] +[ 562.630972] ? do_vfs_ioctl+0xa3/0x610 +[ 562.630974] ? vfs_read+0x11b/0x130 +[ 562.630976] ? SyS_ioctl+0x79/0x90 +[ 562.630978] ? entry_SYSCALL_64_fastpath+0x1e/0xad + +This commit fixes the above bug. This assumes two cases: +1. Any error is detected in software IRQ context of OHCI 1394 context. +In this case, PCM substream should be aborted in packet handler. On the +other hand, it should not be done in any process context. TO distinguish +these two context, use 'in_interrupt()' macro. +2. Any error is detect in process context of ALSA PCM application. +In this case, PCM substream should not be aborted in packet handler +because PCM substream lock is acquired. The task to abort PCM substream +should be done in ALSA PCM core. For this purpose, SNDRV_PCM_POS_XRUN is +returned at 'struct snd_pcm_ops.pointer()'. + +Suggested-by: Clemens Ladisch +Fixes: e9148dddc3c7("ALSA: firewire-lib: flush completed packets when reading PCM position") +Signed-off-by: Takashi Sakamoto +Signed-off-by: Takashi Iwai +Signed-off-by: Greg Kroah-Hartman + +--- + sound/firewire/amdtp-stream.c | 8 ++++++-- + sound/firewire/amdtp-stream.h | 2 +- + 2 files changed, 7 insertions(+), 3 deletions(-) + +--- a/sound/firewire/amdtp-stream.c ++++ b/sound/firewire/amdtp-stream.c +@@ -606,7 +606,9 @@ static void out_stream_callback(struct f + cycle = increment_cycle_count(cycle, 1); + if (handle_out_packet(s, cycle, i) < 0) { + s->packet_index = -1; +- amdtp_stream_pcm_abort(s); ++ if (in_interrupt()) ++ amdtp_stream_pcm_abort(s); ++ WRITE_ONCE(s->pcm_buffer_pointer, SNDRV_PCM_POS_XRUN); + return; + } + } +@@ -658,7 +660,9 @@ static void in_stream_callback(struct fw + /* Queueing error or detecting invalid payload. */ + if (i < packets) { + s->packet_index = -1; +- amdtp_stream_pcm_abort(s); ++ if (in_interrupt()) ++ amdtp_stream_pcm_abort(s); ++ WRITE_ONCE(s->pcm_buffer_pointer, SNDRV_PCM_POS_XRUN); + return; + } + +--- a/sound/firewire/amdtp-stream.h ++++ b/sound/firewire/amdtp-stream.h +@@ -124,7 +124,7 @@ struct amdtp_stream { + /* For a PCM substream processing. */ + struct snd_pcm_substream *pcm; + struct tasklet_struct period_tasklet; +- unsigned int pcm_buffer_pointer; ++ snd_pcm_uframes_t pcm_buffer_pointer; + unsigned int pcm_period_pointer; + + /* To wait for first packet. */ diff --git a/queue-4.11/alsa-hda-add-coffelake-pci-id.patch b/queue-4.11/alsa-hda-add-coffelake-pci-id.patch new file mode 100644 index 00000000000..d929439f51e --- /dev/null +++ b/queue-4.11/alsa-hda-add-coffelake-pci-id.patch @@ -0,0 +1,45 @@ +From e79b0006c45c9b0b22f3ea54ff6e256b34c1f208 Mon Sep 17 00:00:00 2001 +From: Megha Dey +Date: Wed, 14 Jun 2017 09:51:56 +0530 +Subject: ALSA: hda - Add Coffelake PCI ID + +From: Megha Dey + +commit e79b0006c45c9b0b22f3ea54ff6e256b34c1f208 upstream. + +Coffelake is another Intel part, so need to add PCI ID for it. + +Signed-off-by: Megha Dey +Signed-off-by: Subhransu S. Prusty +Acked-by: Vinod Koul +Signed-off-by: Takashi Iwai +Signed-off-by: Greg Kroah-Hartman + +--- + sound/pci/hda/hda_intel.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +--- a/sound/pci/hda/hda_intel.c ++++ b/sound/pci/hda/hda_intel.c +@@ -370,9 +370,10 @@ enum { + #define IS_KBL_H(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0xa2f0) + #define IS_BXT(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0x5a98) + #define IS_GLK(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0x3198) ++#define IS_CFL(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0xa348) + #define IS_SKL_PLUS(pci) (IS_SKL(pci) || IS_SKL_LP(pci) || IS_BXT(pci)) || \ + IS_KBL(pci) || IS_KBL_LP(pci) || IS_KBL_H(pci) || \ +- IS_GLK(pci) ++ IS_GLK(pci) || IS_CFL(pci) + + static char *driver_short_names[] = { + [AZX_DRIVER_ICH] = "HDA Intel", +@@ -2251,6 +2252,9 @@ static const struct pci_device_id azx_id + /* Kabylake-H */ + { PCI_DEVICE(0x8086, 0xa2f0), + .driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_SKYLAKE }, ++ /* Coffelake */ ++ { PCI_DEVICE(0x8086, 0xa348), ++ .driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_SKYLAKE}, + /* Broxton-P(Apollolake) */ + { PCI_DEVICE(0x8086, 0x5a98), + .driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_BROXTON }, diff --git a/queue-4.11/alsa-hda-apply-quirks-to-broxton-t-too.patch b/queue-4.11/alsa-hda-apply-quirks-to-broxton-t-too.patch new file mode 100644 index 00000000000..92c71ce8073 --- /dev/null +++ b/queue-4.11/alsa-hda-apply-quirks-to-broxton-t-too.patch @@ -0,0 +1,38 @@ +From c7ecb9068e6772c43941ce609f08bc53f36e1dce Mon Sep 17 00:00:00 2001 +From: Takashi Iwai +Date: Wed, 14 Jun 2017 07:37:14 +0200 +Subject: ALSA: hda - Apply quirks to Broxton-T, too + +From: Takashi Iwai + +commit c7ecb9068e6772c43941ce609f08bc53f36e1dce upstream. + +Broxton-T was a forgotten child and we didn't apply the quirks for +Skylake+ properly. Meanwhile, a quirk for reducing the DMA latency +seems specific to the early Broxton model, so we leave as is. + +Signed-off-by: Takashi Iwai +Signed-off-by: Greg Kroah-Hartman + +--- + sound/pci/hda/hda_intel.c | 7 ++++--- + 1 file changed, 4 insertions(+), 3 deletions(-) + +--- a/sound/pci/hda/hda_intel.c ++++ b/sound/pci/hda/hda_intel.c +@@ -369,11 +369,12 @@ enum { + #define IS_KBL_LP(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0x9d71) + #define IS_KBL_H(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0xa2f0) + #define IS_BXT(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0x5a98) ++#define IS_BXT_T(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0x1a98) + #define IS_GLK(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0x3198) + #define IS_CFL(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0xa348) +-#define IS_SKL_PLUS(pci) (IS_SKL(pci) || IS_SKL_LP(pci) || IS_BXT(pci)) || \ +- IS_KBL(pci) || IS_KBL_LP(pci) || IS_KBL_H(pci) || \ +- IS_GLK(pci) || IS_CFL(pci) ++#define IS_SKL_PLUS(pci) (IS_SKL(pci) || IS_SKL_LP(pci) || IS_BXT(pci) || \ ++ IS_BXT_T(pci) || IS_KBL(pci) || IS_KBL_LP(pci) || \ ++ IS_KBL_H(pci) || IS_GLK(pci) || IS_CFL(pci)) + + static char *driver_short_names[] = { + [AZX_DRIVER_ICH] = "HDA Intel", diff --git a/queue-4.11/alsa-pcm-don-t-treat-null-chmap-as-a-fatal-error.patch b/queue-4.11/alsa-pcm-don-t-treat-null-chmap-as-a-fatal-error.patch new file mode 100644 index 00000000000..43c8ebc3611 --- /dev/null +++ b/queue-4.11/alsa-pcm-don-t-treat-null-chmap-as-a-fatal-error.patch @@ -0,0 +1,47 @@ +From 2deaeaf102d692cb6f764123b1df7aa118a8e97c Mon Sep 17 00:00:00 2001 +From: Takashi Iwai +Date: Wed, 14 Jun 2017 16:20:32 +0200 +Subject: ALSA: pcm: Don't treat NULL chmap as a fatal error + +From: Takashi Iwai + +commit 2deaeaf102d692cb6f764123b1df7aa118a8e97c upstream. + +The standard PCM chmap helper callbacks treat the NULL info->chmap as +a fatal error and spews the kernel warning with stack trace when +CONFIG_SND_DEBUG is on. This was OK, originally it was supposed to be +always static and non-NULL. But, as the recent addition of Intel LPE +audio driver shows, the chmap content may vary dynamically, and it can +be even NULL when disconnected. The user still sees the kernel +warning unnecessarily. + +For clearing such a confusion, this patch simply removes the +snd_BUG_ON() in each place, just returns an error without warning. + +Signed-off-by: Takashi Iwai +Signed-off-by: Greg Kroah-Hartman + +--- + sound/core/pcm_lib.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/sound/core/pcm_lib.c ++++ b/sound/core/pcm_lib.c +@@ -2492,7 +2492,7 @@ static int pcm_chmap_ctl_get(struct snd_ + struct snd_pcm_substream *substream; + const struct snd_pcm_chmap_elem *map; + +- if (snd_BUG_ON(!info->chmap)) ++ if (!info->chmap) + return -EINVAL; + substream = snd_pcm_chmap_substream(info, idx); + if (!substream) +@@ -2524,7 +2524,7 @@ static int pcm_chmap_ctl_tlv(struct snd_ + unsigned int __user *dst; + int c, count = 0; + +- if (snd_BUG_ON(!info->chmap)) ++ if (!info->chmap) + return -EINVAL; + if (size < 8) + return -ENOMEM; diff --git a/queue-4.11/autofs-sanity-check-status-reported-with-autofs_dev_ioctl_fail.patch b/queue-4.11/autofs-sanity-check-status-reported-with-autofs_dev_ioctl_fail.patch new file mode 100644 index 00000000000..f17697500b0 --- /dev/null +++ b/queue-4.11/autofs-sanity-check-status-reported-with-autofs_dev_ioctl_fail.patch @@ -0,0 +1,43 @@ +From 9fa4eb8e490a28de40964b1b0e583d8db4c7e57c Mon Sep 17 00:00:00 2001 +From: NeilBrown +Date: Fri, 23 Jun 2017 15:08:43 -0700 +Subject: autofs: sanity check status reported with AUTOFS_DEV_IOCTL_FAIL + +From: NeilBrown + +commit 9fa4eb8e490a28de40964b1b0e583d8db4c7e57c upstream. + +If a positive status is passed with the AUTOFS_DEV_IOCTL_FAIL ioctl, +autofs4_d_automount() will return + + ERR_PTR(status) + +with that status to follow_automount(), which will then dereference an +invalid pointer. + +So treat a positive status the same as zero, and map to ENOENT. + +See comment in systemd src/core/automount.c::automount_send_ready(). + +Link: http://lkml.kernel.org/r/871sqwczx5.fsf@notabene.neil.brown.name +Signed-off-by: NeilBrown +Cc: Ian Kent +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + fs/autofs4/dev-ioctl.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/fs/autofs4/dev-ioctl.c ++++ b/fs/autofs4/dev-ioctl.c +@@ -344,7 +344,7 @@ static int autofs_dev_ioctl_fail(struct + int status; + + token = (autofs_wqt_t) param->fail.token; +- status = param->fail.status ? param->fail.status : -ENOENT; ++ status = param->fail.status < 0 ? param->fail.status : -ENOENT; + return autofs4_wait_release(sbi, token, status); + } + diff --git a/queue-4.11/cifs-fix-some-return-values-in-case-of-error-in-crypt_message.patch b/queue-4.11/cifs-fix-some-return-values-in-case-of-error-in-crypt_message.patch new file mode 100644 index 00000000000..d29eec3e39a --- /dev/null +++ b/queue-4.11/cifs-fix-some-return-values-in-case-of-error-in-crypt_message.patch @@ -0,0 +1,45 @@ +From 517a6e43c4872c89794af5b377fa085e47345952 Mon Sep 17 00:00:00 2001 +From: Christophe Jaillet +Date: Sun, 11 Jun 2017 09:12:47 +0200 +Subject: CIFS: Fix some return values in case of error in 'crypt_message' + +From: Christophe Jaillet + +commit 517a6e43c4872c89794af5b377fa085e47345952 upstream. + +'rc' is known to be 0 at this point. So if 'init_sg' or 'kzalloc' fails, we +should return -ENOMEM instead. + +Also remove a useless 'rc' in a debug message as it is meaningless here. + +Fixes: 026e93dc0a3ee ("CIFS: Encrypt SMB3 requests before sending") +Signed-off-by: Christophe JAILLET +Reviewed-by: Pavel Shilovsky +Reviewed-by: Aurelien Aptel +Signed-off-by: Steve French +Signed-off-by: Greg Kroah-Hartman + +--- + fs/cifs/smb2ops.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/fs/cifs/smb2ops.c ++++ b/fs/cifs/smb2ops.c +@@ -1809,7 +1809,8 @@ crypt_message(struct TCP_Server_Info *se + + sg = init_sg(rqst, sign); + if (!sg) { +- cifs_dbg(VFS, "%s: Failed to init sg %d", __func__, rc); ++ cifs_dbg(VFS, "%s: Failed to init sg", __func__); ++ rc = -ENOMEM; + goto free_req; + } + +@@ -1817,6 +1818,7 @@ crypt_message(struct TCP_Server_Info *se + iv = kzalloc(iv_len, GFP_KERNEL); + if (!iv) { + cifs_dbg(VFS, "%s: Failed to alloc IV", __func__); ++ rc = -ENOMEM; + goto free_sg; + } + iv[0] = 3; diff --git a/queue-4.11/cifs-improve-readdir-verbosity.patch b/queue-4.11/cifs-improve-readdir-verbosity.patch new file mode 100644 index 00000000000..8978da507f0 --- /dev/null +++ b/queue-4.11/cifs-improve-readdir-verbosity.patch @@ -0,0 +1,60 @@ +From dcd87838c06f05ab7650b249ebf0d5b57ae63e1e Mon Sep 17 00:00:00 2001 +From: Pavel Shilovsky +Date: Tue, 6 Jun 2017 16:58:58 -0700 +Subject: CIFS: Improve readdir verbosity + +From: Pavel Shilovsky + +commit dcd87838c06f05ab7650b249ebf0d5b57ae63e1e upstream. + +Downgrade the loglevel for SMB2 to prevent filling the log +with messages if e.g. readdir was interrupted. Also make SMB2 +and SMB1 codepaths do the same logging during readdir. + +Signed-off-by: Pavel Shilovsky +Signed-off-by: Steve French +Signed-off-by: Greg Kroah-Hartman + +--- + fs/cifs/smb1ops.c | 9 +++++++-- + fs/cifs/smb2ops.c | 4 ++-- + 2 files changed, 9 insertions(+), 4 deletions(-) + +--- a/fs/cifs/smb1ops.c ++++ b/fs/cifs/smb1ops.c +@@ -849,8 +849,13 @@ cifs_query_dir_first(const unsigned int + struct cifs_fid *fid, __u16 search_flags, + struct cifs_search_info *srch_inf) + { +- return CIFSFindFirst(xid, tcon, path, cifs_sb, +- &fid->netfid, search_flags, srch_inf, true); ++ int rc; ++ ++ rc = CIFSFindFirst(xid, tcon, path, cifs_sb, ++ &fid->netfid, search_flags, srch_inf, true); ++ if (rc) ++ cifs_dbg(FYI, "find first failed=%d\n", rc); ++ return rc; + } + + static int +--- a/fs/cifs/smb2ops.c ++++ b/fs/cifs/smb2ops.c +@@ -982,7 +982,7 @@ smb2_query_dir_first(const unsigned int + rc = SMB2_open(xid, &oparms, utf16_path, &oplock, NULL, NULL); + kfree(utf16_path); + if (rc) { +- cifs_dbg(VFS, "open dir failed\n"); ++ cifs_dbg(FYI, "open dir failed rc=%d\n", rc); + return rc; + } + +@@ -992,7 +992,7 @@ smb2_query_dir_first(const unsigned int + rc = SMB2_query_directory(xid, tcon, fid->persistent_fid, + fid->volatile_fid, 0, srch_inf); + if (rc) { +- cifs_dbg(VFS, "query directory failed\n"); ++ cifs_dbg(FYI, "query directory failed rc=%d\n", rc); + SMB2_close(xid, tcon, fid->persistent_fid, fid->volatile_fid); + } + return rc; diff --git a/queue-4.11/clk-sunxi-ng-a31-correct-lcd1-ch1-clock-register-offset.patch b/queue-4.11/clk-sunxi-ng-a31-correct-lcd1-ch1-clock-register-offset.patch new file mode 100644 index 00000000000..95b173bc491 --- /dev/null +++ b/queue-4.11/clk-sunxi-ng-a31-correct-lcd1-ch1-clock-register-offset.patch @@ -0,0 +1,35 @@ +From 38b8f823864707eb1cf331d2247608c419ed388c Mon Sep 17 00:00:00 2001 +From: Chen-Yu Tsai +Date: Wed, 3 May 2017 11:13:46 +0800 +Subject: clk: sunxi-ng: a31: Correct lcd1-ch1 clock register offset + +From: Chen-Yu Tsai + +commit 38b8f823864707eb1cf331d2247608c419ed388c upstream. + +The register offset for the lcd1-ch1 clock was incorrectly pointing to +the lcd0-ch1 clock. This resulted in the lcd0-ch1 clock being disabled +when the clk core disables unused clocks. This then stops the simplefb +HDMI output path. + +Reported-by: Bob Ham +Fixes: c6e6c96d8fa6 ("clk: sunxi-ng: Add A31/A31s clocks") +Signed-off-by: Chen-Yu Tsai +Signed-off-by: Maxime Ripard +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/clk/sunxi-ng/ccu-sun6i-a31.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/clk/sunxi-ng/ccu-sun6i-a31.c ++++ b/drivers/clk/sunxi-ng/ccu-sun6i-a31.c +@@ -556,7 +556,7 @@ static SUNXI_CCU_M_WITH_MUX_GATE(lcd0_ch + 0x12c, 0, 4, 24, 3, BIT(31), + CLK_SET_RATE_PARENT); + static SUNXI_CCU_M_WITH_MUX_GATE(lcd1_ch1_clk, "lcd1-ch1", lcd_ch1_parents, +- 0x12c, 0, 4, 24, 3, BIT(31), ++ 0x130, 0, 4, 24, 3, BIT(31), + CLK_SET_RATE_PARENT); + + static const char * const csi_sclk_parents[] = { "pll-video0", "pll-video1", diff --git a/queue-4.11/clk-sunxi-ng-sun5i-fix-ahb_bist_clk-definition.patch b/queue-4.11/clk-sunxi-ng-sun5i-fix-ahb_bist_clk-definition.patch new file mode 100644 index 00000000000..aca8eeb8701 --- /dev/null +++ b/queue-4.11/clk-sunxi-ng-sun5i-fix-ahb_bist_clk-definition.patch @@ -0,0 +1,42 @@ +From 370d9192719e6c174167888cf9240df2542e3b4b Mon Sep 17 00:00:00 2001 +From: Boris Brezillon +Date: Wed, 24 May 2017 18:34:29 +0200 +Subject: clk: sunxi-ng: sun5i: Fix ahb_bist_clk definition + +From: Boris Brezillon + +commit 370d9192719e6c174167888cf9240df2542e3b4b upstream. + +AHB BIST gate is actually controlled with bit 7. + +This bug was detected while trying to use the NAND controller which is +using the DMA engine to transfer data to the NAND. +Since the ahb_bist_clk gate bit conflicts with the ahb_dma_clk gate bit, +the core was disabling the DMA engine clock as part of its 'disable +unused clks' procedure, which was causing all DMA transfers to fail after +this point. + +Fixes: 5e73761786d6 ("clk: sunxi-ng: Add sun5i CCU driver") +Reported-by: Angus Ainslie +Signed-off-by: Boris Brezillon +Tested-by: Angus Ainslie +Reviewed-by: Chen-Yu Tsai +Signed-off-by: Michael Turquette +Link: lkml.kernel.org/r/1495643669-28221-1-git-send-email-boris.brezillon@free-electrons.com +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/clk/sunxi-ng/ccu-sun5i.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/clk/sunxi-ng/ccu-sun5i.c ++++ b/drivers/clk/sunxi-ng/ccu-sun5i.c +@@ -243,7 +243,7 @@ static SUNXI_CCU_GATE(ahb_ss_clk, "ahb-s + static SUNXI_CCU_GATE(ahb_dma_clk, "ahb-dma", "ahb", + 0x060, BIT(6), 0); + static SUNXI_CCU_GATE(ahb_bist_clk, "ahb-bist", "ahb", +- 0x060, BIT(6), 0); ++ 0x060, BIT(7), 0); + static SUNXI_CCU_GATE(ahb_mmc0_clk, "ahb-mmc0", "ahb", + 0x060, BIT(8), 0); + static SUNXI_CCU_GATE(ahb_mmc1_clk, "ahb-mmc1", "ahb", diff --git a/queue-4.11/clk-sunxi-ng-v3s-fix-usb-otg-device-reset-bit.patch b/queue-4.11/clk-sunxi-ng-v3s-fix-usb-otg-device-reset-bit.patch new file mode 100644 index 00000000000..e42c477b94b --- /dev/null +++ b/queue-4.11/clk-sunxi-ng-v3s-fix-usb-otg-device-reset-bit.patch @@ -0,0 +1,31 @@ +From 7ffc781ec46ef1e9aedb482f5f04425bd8bb2753 Mon Sep 17 00:00:00 2001 +From: Yong Deng +Date: Fri, 5 May 2017 18:31:57 +0800 +Subject: clk: sunxi-ng: v3s: Fix usb otg device reset bit + +From: Yong Deng + +commit 7ffc781ec46ef1e9aedb482f5f04425bd8bb2753 upstream. + +V3S's usb otg device reset bit should be 24, not 23. + +Signed-off-by: Yong Deng +Reviewed-By: Icenowy Zheng +Signed-off-by: Maxime Ripard +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/clk/sunxi-ng/ccu-sun8i-v3s.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/clk/sunxi-ng/ccu-sun8i-v3s.c ++++ b/drivers/clk/sunxi-ng/ccu-sun8i-v3s.c +@@ -537,7 +537,7 @@ static struct ccu_reset_map sun8i_v3s_cc + [RST_BUS_EMAC] = { 0x2c0, BIT(17) }, + [RST_BUS_HSTIMER] = { 0x2c0, BIT(19) }, + [RST_BUS_SPI0] = { 0x2c0, BIT(20) }, +- [RST_BUS_OTG] = { 0x2c0, BIT(23) }, ++ [RST_BUS_OTG] = { 0x2c0, BIT(24) }, + [RST_BUS_EHCI0] = { 0x2c0, BIT(26) }, + [RST_BUS_OHCI0] = { 0x2c0, BIT(29) }, + diff --git a/queue-4.11/cxgb4-notify-up-to-route-ctrlq-compl-to-rdma-rspq.patch b/queue-4.11/cxgb4-notify-up-to-route-ctrlq-compl-to-rdma-rspq.patch new file mode 100644 index 00000000000..26c2ea55f4e --- /dev/null +++ b/queue-4.11/cxgb4-notify-up-to-route-ctrlq-compl-to-rdma-rspq.patch @@ -0,0 +1,95 @@ +From dec6b33163d24e2c19ba521c89fffbaab53ae986 Mon Sep 17 00:00:00 2001 +From: Raju Rangoju +Date: Mon, 19 Jun 2017 19:46:00 +0530 +Subject: cxgb4: notify uP to route ctrlq compl to rdma rspq + +From: Raju Rangoju + +commit dec6b33163d24e2c19ba521c89fffbaab53ae986 upstream. + +During the module initialisation there is a possible race +(basically race between uld and lld) where neither the uld +nor lld notifies the uP about where to route the ctrl queue +completions. LLD skips notifying uP as the rdma queues were +not created by then (will leave it to ULD to notify the uP). +As the ULD comes up, it also skips notifying the uP as the +flag FULL_INIT_DONE is not set yet (ULD assumes that the +interface is not up yet). + +Consequently, this race between uld and lld leaves uP +unnotified about where to send the ctrl queue completions +to, leading to iwarp RI_RES WR failure. + +Here is the race: + +CPU 0 CPU1 + +- allocates nic rx queus +- t4_sge_alloc_ctrl_txq() +(if rdma rsp queues exists, +tell uP to route ctrl queue +compl to rdma rspq) + - acquires the mutex_lock + - allocates rdma response queues + - if FULL_INIT_DONE set, + tell uP to route ctrl queue compl + to rdma rspq + - relinquishes mutex_lock +- acquires the mutex_lock +- enable_rx() +- set FULL_INIT_DONE +- relinquishes mutex_lock + +This patch fixes the above issue. + +Fixes: e7519f9926f1('cxgb4: avoid enabling napi twice to the same queue') +Signed-off-by: Raju Rangoju +Acked-by: Steve Wise +Signed-off-by: Ganesh Goudar +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 10 ++++++---- + 1 file changed, 6 insertions(+), 4 deletions(-) + +--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c ++++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +@@ -2192,9 +2192,10 @@ static int cxgb_up(struct adapter *adap) + { + int err; + ++ mutex_lock(&uld_mutex); + err = setup_sge_queues(adap); + if (err) +- goto out; ++ goto rel_lock; + err = setup_rss(adap); + if (err) + goto freeq; +@@ -2218,7 +2219,6 @@ static int cxgb_up(struct adapter *adap) + goto irq_err; + } + +- mutex_lock(&uld_mutex); + enable_rx(adap); + t4_sge_start(adap); + t4_intr_enable(adap); +@@ -2231,13 +2231,15 @@ static int cxgb_up(struct adapter *adap) + #endif + /* Initialize hash mac addr list*/ + INIT_LIST_HEAD(&adap->mac_hlist); +- out: + return err; ++ + irq_err: + dev_err(adap->pdev_dev, "request_irq failed, err %d\n", err); + freeq: + t4_free_sge_resources(adap); +- goto out; ++ rel_lock: ++ mutex_unlock(&uld_mutex); ++ return err; + } + + static void cxgb_down(struct adapter *adapter) diff --git a/queue-4.11/fs-dax.c-fix-inefficiency-in-dax_writeback_mapping_range.patch b/queue-4.11/fs-dax.c-fix-inefficiency-in-dax_writeback_mapping_range.patch new file mode 100644 index 00000000000..49a59a0c969 --- /dev/null +++ b/queue-4.11/fs-dax.c-fix-inefficiency-in-dax_writeback_mapping_range.patch @@ -0,0 +1,37 @@ +From 1eb643d02b21412e603b42cdd96010a2ac31c05f Mon Sep 17 00:00:00 2001 +From: Jan Kara +Date: Fri, 23 Jun 2017 15:08:46 -0700 +Subject: fs/dax.c: fix inefficiency in dax_writeback_mapping_range() + +From: Jan Kara + +commit 1eb643d02b21412e603b42cdd96010a2ac31c05f upstream. + +dax_writeback_mapping_range() fails to update iteration index when +searching radix tree for entries needing cache flushing. Thus each +pagevec worth of entries is searched starting from the start which is +inefficient and prone to livelocks. Update index properly. + +Link: http://lkml.kernel.org/r/20170619124531.21491-1-jack@suse.cz +Fixes: 9973c98ecfda3 ("dax: add support for fsync/sync") +Signed-off-by: Jan Kara +Reviewed-by: Ross Zwisler +Cc: Dan Williams +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + fs/dax.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/fs/dax.c ++++ b/fs/dax.c +@@ -875,6 +875,7 @@ int dax_writeback_mapping_range(struct a + if (ret < 0) + return ret; + } ++ start_index = indices[pvec.nr - 1] + 1; + } + return 0; + } diff --git a/queue-4.11/fs-exec.c-account-for-argv-envp-pointers.patch b/queue-4.11/fs-exec.c-account-for-argv-envp-pointers.patch new file mode 100644 index 00000000000..2a367d32d55 --- /dev/null +++ b/queue-4.11/fs-exec.c-account-for-argv-envp-pointers.patch @@ -0,0 +1,89 @@ +From 98da7d08850fb8bdeb395d6368ed15753304aa0c Mon Sep 17 00:00:00 2001 +From: Kees Cook +Date: Fri, 23 Jun 2017 15:08:57 -0700 +Subject: fs/exec.c: account for argv/envp pointers + +From: Kees Cook + +commit 98da7d08850fb8bdeb395d6368ed15753304aa0c upstream. + +When limiting the argv/envp strings during exec to 1/4 of the stack limit, +the storage of the pointers to the strings was not included. This means +that an exec with huge numbers of tiny strings could eat 1/4 of the stack +limit in strings and then additional space would be later used by the +pointers to the strings. + +For example, on 32-bit with a 8MB stack rlimit, an exec with 1677721 +single-byte strings would consume less than 2MB of stack, the max (8MB / +4) amount allowed, but the pointers to the strings would consume the +remaining additional stack space (1677721 * 4 == 6710884). + +The result (1677721 + 6710884 == 8388605) would exhaust stack space +entirely. Controlling this stack exhaustion could result in +pathological behavior in setuid binaries (CVE-2017-1000365). + +[akpm@linux-foundation.org: additional commenting from Kees] +Fixes: b6a2fea39318 ("mm: variable length argument support") +Link: http://lkml.kernel.org/r/20170622001720.GA32173@beast +Signed-off-by: Kees Cook +Acked-by: Rik van Riel +Acked-by: Michal Hocko +Cc: Alexander Viro +Cc: Qualys Security Advisory +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + fs/exec.c | 28 ++++++++++++++++++++++++---- + 1 file changed, 24 insertions(+), 4 deletions(-) + +--- a/fs/exec.c ++++ b/fs/exec.c +@@ -220,8 +220,26 @@ static struct page *get_arg_page(struct + + if (write) { + unsigned long size = bprm->vma->vm_end - bprm->vma->vm_start; ++ unsigned long ptr_size; + struct rlimit *rlim; + ++ /* ++ * Since the stack will hold pointers to the strings, we ++ * must account for them as well. ++ * ++ * The size calculation is the entire vma while each arg page is ++ * built, so each time we get here it's calculating how far it ++ * is currently (rather than each call being just the newly ++ * added size from the arg page). As a result, we need to ++ * always add the entire size of the pointers, so that on the ++ * last call to get_arg_page() we'll actually have the entire ++ * correct size. ++ */ ++ ptr_size = (bprm->argc + bprm->envc) * sizeof(void *); ++ if (ptr_size > ULONG_MAX - size) ++ goto fail; ++ size += ptr_size; ++ + acct_arg_size(bprm, size / PAGE_SIZE); + + /* +@@ -239,13 +257,15 @@ static struct page *get_arg_page(struct + * to work from. + */ + rlim = current->signal->rlim; +- if (size > ACCESS_ONCE(rlim[RLIMIT_STACK].rlim_cur) / 4) { +- put_page(page); +- return NULL; +- } ++ if (size > READ_ONCE(rlim[RLIMIT_STACK].rlim_cur) / 4) ++ goto fail; + } + + return page; ++ ++fail: ++ put_page(page); ++ return NULL; + } + + static void put_arg_page(struct page *page) diff --git a/queue-4.11/hid-add-quirk-for-dell-pixart-oem-mouse.patch b/queue-4.11/hid-add-quirk-for-dell-pixart-oem-mouse.patch new file mode 100644 index 00000000000..47163feb9f3 --- /dev/null +++ b/queue-4.11/hid-add-quirk-for-dell-pixart-oem-mouse.patch @@ -0,0 +1,43 @@ +From 3db28271f0feae129262d30e41384a7c4c767987 Mon Sep 17 00:00:00 2001 +From: Sebastian Parschauer +Date: Tue, 6 Jun 2017 13:53:13 +0200 +Subject: HID: Add quirk for Dell PIXART OEM mouse + +From: Sebastian Parschauer + +commit 3db28271f0feae129262d30e41384a7c4c767987 upstream. + +This mouse is also known under other IDs. It needs the quirk +ALWAYS_POLL or will disconnect in runlevel 1 or 3. + +Signed-off-by: Sebastian Parschauer +Signed-off-by: Jiri Kosina +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/hid/hid-ids.h | 3 +++ + drivers/hid/usbhid/hid-quirks.c | 1 + + 2 files changed, 4 insertions(+) + +--- a/drivers/hid/hid-ids.h ++++ b/drivers/hid/hid-ids.h +@@ -315,6 +315,9 @@ + #define USB_VENDOR_ID_DELCOM 0x0fc5 + #define USB_DEVICE_ID_DELCOM_VISUAL_IND 0xb080 + ++#define USB_VENDOR_ID_DELL 0x413c ++#define USB_DEVICE_ID_DELL_PIXART_USB_OPTICAL_MOUSE 0x301a ++ + #define USB_VENDOR_ID_DELORME 0x1163 + #define USB_DEVICE_ID_DELORME_EARTHMATE 0x0100 + #define USB_DEVICE_ID_DELORME_EM_LT20 0x0200 +--- a/drivers/hid/usbhid/hid-quirks.c ++++ b/drivers/hid/usbhid/hid-quirks.c +@@ -84,6 +84,7 @@ static const struct hid_blacklist { + { USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K65RGB_RAPIDFIRE, HID_QUIRK_NO_INIT_REPORTS | HID_QUIRK_ALWAYS_POLL }, + { USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_SCIMITAR_PRO_RGB, HID_QUIRK_NO_INIT_REPORTS | HID_QUIRK_ALWAYS_POLL }, + { USB_VENDOR_ID_CREATIVELABS, USB_DEVICE_ID_CREATIVE_SB_OMNI_SURROUND_51, HID_QUIRK_NOGET }, ++ { USB_VENDOR_ID_DELL, USB_DEVICE_ID_DELL_PIXART_USB_OPTICAL_MOUSE, HID_QUIRK_ALWAYS_POLL }, + { USB_VENDOR_ID_DMI, USB_DEVICE_ID_DMI_ENC, HID_QUIRK_NOGET }, + { USB_VENDOR_ID_DRAGONRISE, USB_DEVICE_ID_DRAGONRISE_WIIU, HID_QUIRK_MULTI_INPUT }, + { USB_VENDOR_ID_DRAGONRISE, USB_DEVICE_ID_DRAGONRISE_PS3, HID_QUIRK_MULTI_INPUT }, diff --git a/queue-4.11/kvm-mips-fix-maybe-uninitialized-build-failure.patch b/queue-4.11/kvm-mips-fix-maybe-uninitialized-build-failure.patch new file mode 100644 index 00000000000..8f045ced1c8 --- /dev/null +++ b/queue-4.11/kvm-mips-fix-maybe-uninitialized-build-failure.patch @@ -0,0 +1,66 @@ +From e27a9eca5d4a392b96ce5d5238c8d637bcb0a52c Mon Sep 17 00:00:00 2001 +From: James Cowgill +Date: Tue, 20 Jun 2017 10:57:51 +0100 +Subject: KVM: MIPS: Fix maybe-uninitialized build failure +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: James Cowgill + +commit e27a9eca5d4a392b96ce5d5238c8d637bcb0a52c upstream. + +This commit fixes a "maybe-uninitialized" build failure in +arch/mips/kvm/tlb.c when KVM, DYNAMIC_DEBUG and JUMP_LABEL are all +enabled. The failure is: + +In file included from ./include/linux/printk.h:329:0, + from ./include/linux/kernel.h:13, + from ./include/asm-generic/bug.h:15, + from ./arch/mips/include/asm/bug.h:41, + from ./include/linux/bug.h:4, + from ./include/linux/thread_info.h:11, + from ./include/asm-generic/current.h:4, + from ./arch/mips/include/generated/asm/current.h:1, + from ./include/linux/sched.h:11, + from arch/mips/kvm/tlb.c:13: +arch/mips/kvm/tlb.c: In function ‘kvm_mips_host_tlb_inv’: +./include/linux/dynamic_debug.h:126:3: error: ‘idx_kernel’ may be used uninitialized in this function [-Werror=maybe-uninitialized] + __dynamic_pr_debug(&descriptor, pr_fmt(fmt), \ + ^~~~~~~~~~~~~~~~~~ +arch/mips/kvm/tlb.c:169:16: note: ‘idx_kernel’ was declared here + int idx_user, idx_kernel; + ^~~~~~~~~~ + +There is a similar error relating to "idx_user". Both errors were +observed with GCC 6. + +As far as I can tell, it is impossible for either idx_user or idx_kernel +to be uninitialized when they are later read in the calls to kvm_debug, +but to satisfy the compiler, add zero initializers to both variables. + +Signed-off-by: James Cowgill +Fixes: 57e3869cfaae ("KVM: MIPS/TLB: Generalise host TLB invalidate to kernel ASID") +Acked-by: James Hogan +Signed-off-by: Radim Krčmář +Signed-off-by: Greg Kroah-Hartman + +--- + arch/mips/kvm/tlb.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +--- a/arch/mips/kvm/tlb.c ++++ b/arch/mips/kvm/tlb.c +@@ -147,7 +147,11 @@ static int _kvm_mips_host_tlb_inv(unsign + int kvm_mips_host_tlb_inv(struct kvm_vcpu *vcpu, unsigned long va, + bool user, bool kernel) + { +- int idx_user, idx_kernel; ++ /* ++ * Initialize idx_user and idx_kernel to workaround bogus ++ * maybe-initialized warning when using GCC 6. ++ */ ++ int idx_user = 0, idx_kernel = 0; + unsigned long flags, old_entryhi; + + local_irq_save(flags); diff --git a/queue-4.11/kvm-ppc-book3s-hv-context-switch-ebb-registers-properly.patch b/queue-4.11/kvm-ppc-book3s-hv-context-switch-ebb-registers-properly.patch new file mode 100644 index 00000000000..7d241997c8f --- /dev/null +++ b/queue-4.11/kvm-ppc-book3s-hv-context-switch-ebb-registers-properly.patch @@ -0,0 +1,78 @@ +From ca8efa1df1d15a1795a2da57f9f6aada6ed6b946 Mon Sep 17 00:00:00 2001 +From: Paul Mackerras +Date: Tue, 6 Jun 2017 16:47:22 +1000 +Subject: KVM: PPC: Book3S HV: Context-switch EBB registers properly + +From: Paul Mackerras + +commit ca8efa1df1d15a1795a2da57f9f6aada6ed6b946 upstream. + +This adds code to save the values of three SPRs (special-purpose +registers) used by userspace to control event-based branches (EBBs), +which are essentially interrupts that get delivered directly to +userspace. These registers are loaded up with guest values when +entering the guest, and their values are saved when exiting the +guest, but we were not saving the host values and restoring them +before going back to userspace. + +On POWER8 this would only affect userspace programs which explicitly +request the use of EBBs and also use the KVM_RUN ioctl, since the +only source of EBBs on POWER8 is the PMU, and there is an explicit +enable bit in the PMU registers (and those PMU registers do get +properly context-switched between host and guest). On POWER9 there +is provision for externally-generated EBBs, and these are not subject +to the control in the PMU registers. + +Since these registers only affect userspace, we can save them when +we first come in from userspace and restore them before returning to +userspace, rather than saving/restoring the host values on every +guest entry/exit. Similarly, we don't need to worry about their +values on offline secondary threads since they execute in the context +of the idle task, which never executes in userspace. + +Fixes: b005255e12a3 ("KVM: PPC: Book3S HV: Context-switch new POWER8 SPRs", 2014-01-08) +Signed-off-by: Paul Mackerras +Signed-off-by: Greg Kroah-Hartman + +--- + arch/powerpc/kvm/book3s_hv.c | 15 +++++++++++++++ + 1 file changed, 15 insertions(+) + +--- a/arch/powerpc/kvm/book3s_hv.c ++++ b/arch/powerpc/kvm/book3s_hv.c +@@ -2910,6 +2910,7 @@ static int kvmppc_vcpu_run_hv(struct kvm + { + int r; + int srcu_idx; ++ unsigned long ebb_regs[3] = {}; /* shut up GCC */ + + if (!vcpu->arch.sane) { + run->exit_reason = KVM_EXIT_INTERNAL_ERROR; +@@ -2958,6 +2959,13 @@ static int kvmppc_vcpu_run_hv(struct kvm + + flush_all_to_thread(current); + ++ /* Save userspace EBB register values */ ++ if (cpu_has_feature(CPU_FTR_ARCH_207S)) { ++ ebb_regs[0] = mfspr(SPRN_EBBHR); ++ ebb_regs[1] = mfspr(SPRN_EBBRR); ++ ebb_regs[2] = mfspr(SPRN_BESCR); ++ } ++ + vcpu->arch.wqp = &vcpu->arch.vcore->wq; + vcpu->arch.pgdir = current->mm->pgd; + vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST; +@@ -2980,6 +2988,13 @@ static int kvmppc_vcpu_run_hv(struct kvm + r = kvmppc_xics_rm_complete(vcpu, 0); + } while (is_kvmppc_resume_guest(r)); + ++ /* Restore userspace EBB register values */ ++ if (cpu_has_feature(CPU_FTR_ARCH_207S)) { ++ mtspr(SPRN_EBBHR, ebb_regs[0]); ++ mtspr(SPRN_EBBRR, ebb_regs[1]); ++ mtspr(SPRN_BESCR, ebb_regs[2]); ++ } ++ + out: + vcpu->arch.state = KVMPPC_VCPU_NOTREADY; + atomic_dec(&vcpu->kvm->arch.vcpus_running); diff --git a/queue-4.11/kvm-ppc-book3s-hv-cope-with-host-using-large-decrementer-mode.patch b/queue-4.11/kvm-ppc-book3s-hv-cope-with-host-using-large-decrementer-mode.patch new file mode 100644 index 00000000000..92a564607c4 --- /dev/null +++ b/queue-4.11/kvm-ppc-book3s-hv-cope-with-host-using-large-decrementer-mode.patch @@ -0,0 +1,135 @@ +From 2f2724630f7a8d582470f03ee56b96746767d270 Mon Sep 17 00:00:00 2001 +From: Paul Mackerras +Date: Mon, 22 May 2017 16:25:14 +1000 +Subject: KVM: PPC: Book3S HV: Cope with host using large decrementer mode + +From: Paul Mackerras + +commit 2f2724630f7a8d582470f03ee56b96746767d270 upstream. + +POWER9 introduces a new mode for the decrementer register, called +large decrementer mode, in which the decrementer counter is 56 bits +wide rather than 32, and reads are sign-extended rather than +zero-extended. For the decrementer, this new mode is optional and +controlled by a bit in the LPCR. The hypervisor decrementer (HDEC) +is 56 bits wide on POWER9 and has no mode control. + +Since KVM code reads and writes the decrementer and hypervisor +decrementer registers in a few places, it needs to be aware of the +need to treat the decrementer value as a 64-bit quantity, and only do +a 32-bit sign extension when large decrementer mode is not in effect. +Similarly, the HDEC should always be treated as a 64-bit quantity on +POWER9. We define a new EXTEND_HDEC macro to encapsulate the feature +test for POWER9 and the sign extension. + +To enable the sign extension to be removed in large decrementer mode, +we test the LPCR_LD bit in the host LPCR image stored in the struct +kvm for the guest. If is set then large decrementer mode is enabled +and the sign extension should be skipped. + +This is partly based on an earlier patch by Oliver O'Halloran. + +Signed-off-by: Paul Mackerras +Signed-off-by: Greg Kroah-Hartman + +--- + arch/powerpc/kvm/book3s_hv_interrupts.S | 12 +++++++++++- + arch/powerpc/kvm/book3s_hv_rmhandlers.S | 23 +++++++++++++++++------ + 2 files changed, 28 insertions(+), 7 deletions(-) + +--- a/arch/powerpc/kvm/book3s_hv_interrupts.S ++++ b/arch/powerpc/kvm/book3s_hv_interrupts.S +@@ -121,10 +121,20 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) + * Put whatever is in the decrementer into the + * hypervisor decrementer. + */ ++BEGIN_FTR_SECTION ++ ld r5, HSTATE_KVM_VCORE(r13) ++ ld r6, VCORE_KVM(r5) ++ ld r9, KVM_HOST_LPCR(r6) ++ andis. r9, r9, LPCR_LD@h ++END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) + mfspr r8,SPRN_DEC + mftb r7 +- mtspr SPRN_HDEC,r8 ++BEGIN_FTR_SECTION ++ /* On POWER9, don't sign-extend if host LPCR[LD] bit is set */ ++ bne 32f ++END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) + extsw r8,r8 ++32: mtspr SPRN_HDEC,r8 + add r8,r8,r7 + std r8,HSTATE_DECEXP(r13) + +--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S ++++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S +@@ -31,6 +31,12 @@ + #include + #include + ++/* Sign-extend HDEC if not on POWER9 */ ++#define EXTEND_HDEC(reg) \ ++BEGIN_FTR_SECTION; \ ++ extsw reg, reg; \ ++END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) ++ + #define VCPU_GPRS_TM(reg) (((reg) * ULONG_SIZE) + VCPU_GPR_TM) + + /* Values in HSTATE_NAPPING(r13) */ +@@ -213,6 +219,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) + kvmppc_primary_no_guest: + /* We handle this much like a ceded vcpu */ + /* put the HDEC into the DEC, since HDEC interrupts don't wake us */ ++ /* HDEC may be larger than DEC for arch >= v3.00, but since the */ ++ /* HDEC value came from DEC in the first place, it will fit */ + mfspr r3, SPRN_HDEC + mtspr SPRN_DEC, r3 + /* +@@ -294,8 +302,9 @@ kvm_novcpu_wakeup: + + /* See if our timeslice has expired (HDEC is negative) */ + mfspr r0, SPRN_HDEC ++ EXTEND_HDEC(r0) + li r12, BOOK3S_INTERRUPT_HV_DECREMENTER +- cmpwi r0, 0 ++ cmpdi r0, 0 + blt kvm_novcpu_exit + + /* Got an IPI but other vcpus aren't yet exiting, must be a latecomer */ +@@ -389,8 +398,8 @@ kvm_secondary_got_guest: + lbz r4, HSTATE_PTID(r13) + cmpwi r4, 0 + bne 63f +- lis r6, 0x7fff +- ori r6, r6, 0xffff ++ LOAD_REG_ADDR(r6, decrementer_max) ++ ld r6, 0(r6) + mtspr SPRN_HDEC, r6 + /* and set per-LPAR registers, if doing dynamic micro-threading */ + ld r6, HSTATE_SPLIT_MODE(r13) +@@ -967,7 +976,8 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_3 + + /* Check if HDEC expires soon */ + mfspr r3, SPRN_HDEC +- cmpwi r3, 512 /* 1 microsecond */ ++ EXTEND_HDEC(r3) ++ cmpdi r3, 512 /* 1 microsecond */ + blt hdec_soon + + deliver_guest_interrupt: +@@ -2308,12 +2318,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_TM) + mfspr r3, SPRN_DEC + mfspr r4, SPRN_HDEC + mftb r5 +- cmpw r3, r4 ++ extsw r3, r3 ++ EXTEND_HDEC(r4) ++ cmpd r3, r4 + ble 67f + mtspr SPRN_DEC, r4 + 67: + /* save expiry time of guest decrementer */ +- extsw r3, r3 + add r3, r3, r5 + ld r4, HSTATE_KVM_VCPU(r13) + ld r5, HSTATE_KVM_VCORE(r13) diff --git a/queue-4.11/kvm-ppc-book3s-hv-ignore-timebase-offset-on-power9-dd1.patch b/queue-4.11/kvm-ppc-book3s-hv-ignore-timebase-offset-on-power9-dd1.patch new file mode 100644 index 00000000000..d022e0a7c13 --- /dev/null +++ b/queue-4.11/kvm-ppc-book3s-hv-ignore-timebase-offset-on-power9-dd1.patch @@ -0,0 +1,43 @@ +From 3d3efb68c19e539f0535c93a5258c1299270215f Mon Sep 17 00:00:00 2001 +From: Paul Mackerras +Date: Tue, 6 Jun 2017 14:35:30 +1000 +Subject: KVM: PPC: Book3S HV: Ignore timebase offset on POWER9 DD1 + +From: Paul Mackerras + +commit 3d3efb68c19e539f0535c93a5258c1299270215f upstream. + +POWER9 DD1 has an erratum where writing to the TBU40 register, which +is used to apply an offset to the timebase, can cause the timebase to +lose counts. This results in the timebase on some CPUs getting out of +sync with other CPUs, which then results in misbehaviour of the +timekeeping code. + +To work around the problem, we make KVM ignore the timebase offset for +all guests on POWER9 DD1 machines. This means that live migration +cannot be supported on POWER9 DD1 machines. + +Signed-off-by: Paul Mackerras +Signed-off-by: Greg Kroah-Hartman + +--- + arch/powerpc/kvm/book3s_hv.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +--- a/arch/powerpc/kvm/book3s_hv.c ++++ b/arch/powerpc/kvm/book3s_hv.c +@@ -1481,6 +1481,14 @@ static int kvmppc_set_one_reg_hv(struct + r = set_vpa(vcpu, &vcpu->arch.dtl, addr, len); + break; + case KVM_REG_PPC_TB_OFFSET: ++ /* ++ * POWER9 DD1 has an erratum where writing TBU40 causes ++ * the timebase to lose ticks. So we don't let the ++ * timebase offset be changed on P9 DD1. (It is ++ * initialized to zero.) ++ */ ++ if (cpu_has_feature(CPU_FTR_POWER9_DD1)) ++ break; + /* round up to multiple of 2^24 */ + vcpu->arch.vcore->tb_offset = + ALIGN(set_reg_val(id, *val), 1UL << 24); diff --git a/queue-4.11/kvm-ppc-book3s-hv-preserve-userspace-htm-state-properly.patch b/queue-4.11/kvm-ppc-book3s-hv-preserve-userspace-htm-state-properly.patch new file mode 100644 index 00000000000..f145921c13c --- /dev/null +++ b/queue-4.11/kvm-ppc-book3s-hv-preserve-userspace-htm-state-properly.patch @@ -0,0 +1,63 @@ +From 46a704f8409f79fd66567ad3f8a7304830a84293 Mon Sep 17 00:00:00 2001 +From: Paul Mackerras +Date: Thu, 15 Jun 2017 16:10:27 +1000 +Subject: KVM: PPC: Book3S HV: Preserve userspace HTM state properly + +From: Paul Mackerras + +commit 46a704f8409f79fd66567ad3f8a7304830a84293 upstream. + +If userspace attempts to call the KVM_RUN ioctl when it has hardware +transactional memory (HTM) enabled, the values that it has put in the +HTM-related SPRs TFHAR, TFIAR and TEXASR will get overwritten by +guest values. To fix this, we detect this condition and save those +SPR values in the thread struct, and disable HTM for the task. If +userspace goes to access those SPRs or the HTM facility in future, +a TM-unavailable interrupt will occur and the handler will reload +those SPRs and re-enable HTM. + +If userspace has started a transaction and suspended it, we would +currently lose the transactional state in the guest entry path and +would almost certainly get a "TM Bad Thing" interrupt, which would +cause the host to crash. To avoid this, we detect this case and +return from the KVM_RUN ioctl with an EINVAL error, with the KVM +exit reason set to KVM_EXIT_FAIL_ENTRY. + +Fixes: b005255e12a3 ("KVM: PPC: Book3S HV: Context-switch new POWER8 SPRs", 2014-01-08) +Signed-off-by: Paul Mackerras +Signed-off-by: Greg Kroah-Hartman + +--- + arch/powerpc/kvm/book3s_hv.c | 21 +++++++++++++++++++++ + 1 file changed, 21 insertions(+) + +--- a/arch/powerpc/kvm/book3s_hv.c ++++ b/arch/powerpc/kvm/book3s_hv.c +@@ -2908,6 +2908,27 @@ static int kvmppc_vcpu_run_hv(struct kvm + return -EINVAL; + } + ++ /* ++ * Don't allow entry with a suspended transaction, because ++ * the guest entry/exit code will lose it. ++ * If the guest has TM enabled, save away their TM-related SPRs ++ * (they will get restored by the TM unavailable interrupt). ++ */ ++#ifdef CONFIG_PPC_TRANSACTIONAL_MEM ++ if (cpu_has_feature(CPU_FTR_TM) && current->thread.regs && ++ (current->thread.regs->msr & MSR_TM)) { ++ if (MSR_TM_ACTIVE(current->thread.regs->msr)) { ++ run->exit_reason = KVM_EXIT_FAIL_ENTRY; ++ run->fail_entry.hardware_entry_failure_reason = 0; ++ return -EINVAL; ++ } ++ current->thread.tm_tfhar = mfspr(SPRN_TFHAR); ++ current->thread.tm_tfiar = mfspr(SPRN_TFIAR); ++ current->thread.tm_texasr = mfspr(SPRN_TEXASR); ++ current->thread.regs->msr &= ~MSR_TM; ++ } ++#endif ++ + kvmppc_core_prepare_to_enter(vcpu); + + /* No need to go into the guest when all we'll do is come back out */ diff --git a/queue-4.11/kvm-ppc-book3s-hv-restore-critical-sprs-to-host-values-on-guest-exit.patch b/queue-4.11/kvm-ppc-book3s-hv-restore-critical-sprs-to-host-values-on-guest-exit.patch new file mode 100644 index 00000000000..17f2a20e7c0 --- /dev/null +++ b/queue-4.11/kvm-ppc-book3s-hv-restore-critical-sprs-to-host-values-on-guest-exit.patch @@ -0,0 +1,145 @@ +From 4c3bb4ccd074e1a0552078c0bf94c662367a1658 Mon Sep 17 00:00:00 2001 +From: Paul Mackerras +Date: Thu, 15 Jun 2017 15:43:17 +1000 +Subject: KVM: PPC: Book3S HV: Restore critical SPRs to host values on guest exit + +From: Paul Mackerras + +commit 4c3bb4ccd074e1a0552078c0bf94c662367a1658 upstream. + +This restores several special-purpose registers (SPRs) to sane values +on guest exit that were missed before. + +TAR and VRSAVE are readable and writable by userspace, and we need to +save and restore them to prevent the guest from potentially affecting +userspace execution (not that TAR or VRSAVE are used by any known +program that run uses the KVM_RUN ioctl). We save/restore these +in kvmppc_vcpu_run_hv() rather than on every guest entry/exit. + +FSCR affects userspace execution in that it can prohibit access to +certain facilities by userspace. We restore it to the normal value +for the task on exit from the KVM_RUN ioctl. + +IAMR is normally 0, and is restored to 0 on guest exit. However, +with a radix host on POWER9, it is set to a value that prevents the +kernel from executing user-accessible memory. On POWER9, we save +IAMR on guest entry and restore it on guest exit to the saved value +rather than 0. On POWER8 we continue to set it to 0 on guest exit. + +PSPB is normally 0. We restore it to 0 on guest exit to prevent +userspace taking advantage of the guest having set it non-zero +(which would allow userspace to set its SMT priority to high). + +UAMOR is normally 0. We restore it to 0 on guest exit to prevent +the AMR from being used as a covert channel between userspace +processes, since the AMR is not context-switched at present. + +Fixes: b005255e12a3 ("KVM: PPC: Book3S HV: Context-switch new POWER8 SPRs", 2014-01-08) +Signed-off-by: Paul Mackerras +Signed-off-by: Greg Kroah-Hartman + +--- + arch/powerpc/kvm/book3s_hv.c | 11 +++++++++-- + arch/powerpc/kvm/book3s_hv_rmhandlers.S | 9 ++++++++- + 2 files changed, 17 insertions(+), 3 deletions(-) + +--- a/arch/powerpc/kvm/book3s_hv.c ++++ b/arch/powerpc/kvm/book3s_hv.c +@@ -2911,6 +2911,8 @@ static int kvmppc_vcpu_run_hv(struct kvm + int r; + int srcu_idx; + unsigned long ebb_regs[3] = {}; /* shut up GCC */ ++ unsigned long user_tar = 0; ++ unsigned int user_vrsave; + + if (!vcpu->arch.sane) { + run->exit_reason = KVM_EXIT_INTERNAL_ERROR; +@@ -2959,12 +2961,14 @@ static int kvmppc_vcpu_run_hv(struct kvm + + flush_all_to_thread(current); + +- /* Save userspace EBB register values */ ++ /* Save userspace EBB and other register values */ + if (cpu_has_feature(CPU_FTR_ARCH_207S)) { + ebb_regs[0] = mfspr(SPRN_EBBHR); + ebb_regs[1] = mfspr(SPRN_EBBRR); + ebb_regs[2] = mfspr(SPRN_BESCR); ++ user_tar = mfspr(SPRN_TAR); + } ++ user_vrsave = mfspr(SPRN_VRSAVE); + + vcpu->arch.wqp = &vcpu->arch.vcore->wq; + vcpu->arch.pgdir = current->mm->pgd; +@@ -2988,12 +2992,15 @@ static int kvmppc_vcpu_run_hv(struct kvm + r = kvmppc_xics_rm_complete(vcpu, 0); + } while (is_kvmppc_resume_guest(r)); + +- /* Restore userspace EBB register values */ ++ /* Restore userspace EBB and other register values */ + if (cpu_has_feature(CPU_FTR_ARCH_207S)) { + mtspr(SPRN_EBBHR, ebb_regs[0]); + mtspr(SPRN_EBBRR, ebb_regs[1]); + mtspr(SPRN_BESCR, ebb_regs[2]); ++ mtspr(SPRN_TAR, user_tar); ++ mtspr(SPRN_FSCR, current->thread.fscr); + } ++ mtspr(SPRN_VRSAVE, user_vrsave); + + out: + vcpu->arch.state = KVMPPC_VCPU_NOTREADY; +--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S ++++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S +@@ -557,6 +557,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) + #define STACK_SLOT_TID (112-16) + #define STACK_SLOT_PSSCR (112-24) + #define STACK_SLOT_PID (112-32) ++#define STACK_SLOT_IAMR (112-40) + + .global kvmppc_hv_entry + kvmppc_hv_entry: +@@ -757,9 +758,11 @@ BEGIN_FTR_SECTION + mfspr r5, SPRN_TIDR + mfspr r6, SPRN_PSSCR + mfspr r7, SPRN_PID ++ mfspr r8, SPRN_IAMR + std r5, STACK_SLOT_TID(r1) + std r6, STACK_SLOT_PSSCR(r1) + std r7, STACK_SLOT_PID(r1) ++ std r8, STACK_SLOT_IAMR(r1) + END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) + + BEGIN_FTR_SECTION +@@ -1461,11 +1464,12 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_3 + * set by the guest could disrupt the host. + */ + li r0, 0 +- mtspr SPRN_IAMR, r0 + mtspr SPRN_CIABR, r0 + mtspr SPRN_DAWRX, r0 ++ mtspr SPRN_PSPB, r0 + mtspr SPRN_WORT, r0 + BEGIN_FTR_SECTION ++ mtspr SPRN_IAMR, r0 + mtspr SPRN_TCSCR, r0 + /* Set MMCRS to 1<<31 to freeze and disable the SPMC counters */ + li r0, 1 +@@ -1481,6 +1485,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) + std r6,VCPU_UAMOR(r9) + li r6,0 + mtspr SPRN_AMR,r6 ++ mtspr SPRN_UAMOR, r6 + + /* Switch DSCR back to host value */ + mfspr r8, SPRN_DSCR +@@ -1629,9 +1634,11 @@ BEGIN_FTR_SECTION + ld r5, STACK_SLOT_TID(r1) + ld r6, STACK_SLOT_PSSCR(r1) + ld r7, STACK_SLOT_PID(r1) ++ ld r8, STACK_SLOT_IAMR(r1) + mtspr SPRN_TIDR, r5 + mtspr SPRN_PSSCR, r6 + mtspr SPRN_PID, r7 ++ mtspr SPRN_IAMR, r8 + END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) + BEGIN_FTR_SECTION + PPC_INVALIDATE_ERAT diff --git a/queue-4.11/kvm-ppc-book3s-hv-save-restore-host-values-of-debug-registers.patch b/queue-4.11/kvm-ppc-book3s-hv-save-restore-host-values-of-debug-registers.patch new file mode 100644 index 00000000000..d9967d2b9e9 --- /dev/null +++ b/queue-4.11/kvm-ppc-book3s-hv-save-restore-host-values-of-debug-registers.patch @@ -0,0 +1,132 @@ +From 7ceaa6dcd8c6f59588428cec37f3c8093dd1011f Mon Sep 17 00:00:00 2001 +From: Paul Mackerras +Date: Fri, 16 Jun 2017 11:53:19 +1000 +Subject: KVM: PPC: Book3S HV: Save/restore host values of debug registers + +From: Paul Mackerras + +commit 7ceaa6dcd8c6f59588428cec37f3c8093dd1011f upstream. + +At present, HV KVM on POWER8 and POWER9 machines loses any instruction +or data breakpoint set in the host whenever a guest is run. +Instruction breakpoints are currently only used by xmon, but ptrace +and the perf_event subsystem can set data breakpoints as well as xmon. + +To fix this, we save the host values of the debug registers (CIABR, +DAWR and DAWRX) before entering the guest and restore them on exit. +To provide space to save them in the stack frame, we expand the stack +frame allocated by kvmppc_hv_entry() from 112 to 144 bytes. + +Fixes: b005255e12a3 ("KVM: PPC: Book3S HV: Context-switch new POWER8 SPRs", 2014-01-08) +Signed-off-by: Paul Mackerras +Signed-off-by: Greg Kroah-Hartman + +--- + arch/powerpc/kvm/book3s_hv_rmhandlers.S | 45 ++++++++++++++++++++++---------- + 1 file changed, 32 insertions(+), 13 deletions(-) + +--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S ++++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S +@@ -43,6 +43,17 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) + #define NAPPING_CEDE 1 + #define NAPPING_NOVCPU 2 + ++/* Stack frame offsets for kvmppc_hv_entry */ ++#define SFS 144 ++#define STACK_SLOT_TRAP (SFS-4) ++#define STACK_SLOT_TID (SFS-16) ++#define STACK_SLOT_PSSCR (SFS-24) ++#define STACK_SLOT_PID (SFS-32) ++#define STACK_SLOT_IAMR (SFS-40) ++#define STACK_SLOT_CIABR (SFS-48) ++#define STACK_SLOT_DAWR (SFS-56) ++#define STACK_SLOT_DAWRX (SFS-64) ++ + /* + * Call kvmppc_hv_entry in real mode. + * Must be called with interrupts hard-disabled. +@@ -327,10 +338,10 @@ kvm_novcpu_exit: + bl kvmhv_accumulate_time + #endif + 13: mr r3, r12 +- stw r12, 112-4(r1) ++ stw r12, STACK_SLOT_TRAP(r1) + bl kvmhv_commence_exit + nop +- lwz r12, 112-4(r1) ++ lwz r12, STACK_SLOT_TRAP(r1) + b kvmhv_switch_to_host + + /* +@@ -553,12 +564,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) + * * + *****************************************************************************/ + +-/* Stack frame offsets */ +-#define STACK_SLOT_TID (112-16) +-#define STACK_SLOT_PSSCR (112-24) +-#define STACK_SLOT_PID (112-32) +-#define STACK_SLOT_IAMR (112-40) +- + .global kvmppc_hv_entry + kvmppc_hv_entry: + +@@ -574,7 +579,7 @@ kvmppc_hv_entry: + */ + mflr r0 + std r0, PPC_LR_STKOFF(r1) +- stdu r1, -112(r1) ++ stdu r1, -SFS(r1) + + /* Save R1 in the PACA */ + std r1, HSTATE_HOST_R1(r13) +@@ -764,6 +769,14 @@ BEGIN_FTR_SECTION + std r7, STACK_SLOT_PID(r1) + std r8, STACK_SLOT_IAMR(r1) + END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) ++BEGIN_FTR_SECTION ++ mfspr r5, SPRN_CIABR ++ mfspr r6, SPRN_DAWR ++ mfspr r7, SPRN_DAWRX ++ std r5, STACK_SLOT_CIABR(r1) ++ std r6, STACK_SLOT_DAWR(r1) ++ std r7, STACK_SLOT_DAWRX(r1) ++END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) + + BEGIN_FTR_SECTION + /* Set partition DABR */ +@@ -1464,8 +1477,6 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_3 + * set by the guest could disrupt the host. + */ + li r0, 0 +- mtspr SPRN_CIABR, r0 +- mtspr SPRN_DAWRX, r0 + mtspr SPRN_PSPB, r0 + mtspr SPRN_WORT, r0 + BEGIN_FTR_SECTION +@@ -1631,6 +1642,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) + + /* Restore host values of some registers */ + BEGIN_FTR_SECTION ++ ld r5, STACK_SLOT_CIABR(r1) ++ ld r6, STACK_SLOT_DAWR(r1) ++ ld r7, STACK_SLOT_DAWRX(r1) ++ mtspr SPRN_CIABR, r5 ++ mtspr SPRN_DAWR, r6 ++ mtspr SPRN_DAWRX, r7 ++END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) ++BEGIN_FTR_SECTION + ld r5, STACK_SLOT_TID(r1) + ld r6, STACK_SLOT_PSSCR(r1) + ld r7, STACK_SLOT_PID(r1) +@@ -1782,8 +1801,8 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_R + li r0, KVM_GUEST_MODE_NONE + stb r0, HSTATE_IN_GUEST(r13) + +- ld r0, 112+PPC_LR_STKOFF(r1) +- addi r1, r1, 112 ++ ld r0, SFS+PPC_LR_STKOFF(r1) ++ addi r1, r1, SFS + mtlr r0 + blr + diff --git a/queue-4.11/kvm-s390-gaccess-fix-real-space-designation-asce-handling-for-gmap-shadows.patch b/queue-4.11/kvm-s390-gaccess-fix-real-space-designation-asce-handling-for-gmap-shadows.patch new file mode 100644 index 00000000000..4c8a1b11389 --- /dev/null +++ b/queue-4.11/kvm-s390-gaccess-fix-real-space-designation-asce-handling-for-gmap-shadows.patch @@ -0,0 +1,93 @@ +From addb63c18a0d52a9ce2611d039f981f7b6148d2b Mon Sep 17 00:00:00 2001 +From: Heiko Carstens +Date: Mon, 19 Jun 2017 08:02:28 +0200 +Subject: KVM: s390: gaccess: fix real-space designation asce handling for gmap shadows + +From: Heiko Carstens + +commit addb63c18a0d52a9ce2611d039f981f7b6148d2b upstream. + +For real-space designation asces the asce origin part is only a token. +The asce token origin must not be used to generate an effective +address for storage references. This however is erroneously done +within kvm_s390_shadow_tables(). + +Furthermore within the same function the wrong parts of virtual +addresses are used to generate a corresponding real address +(e.g. the region second index is used as region first index). + +Both of the above can result in incorrect address translations. Only +for real space designations with a token origin of zero and addresses +below one megabyte the translation was correct. + +Furthermore replace a "!asce.r" statement with a "!*fake" statement to +make it more obvious that a specific condition has nothing to do with +the architecture, but with the fake handling of real space designations. + +Fixes: 3218f7094b6b ("s390/mm: support real-space for gmap shadows") +Cc: David Hildenbrand +Signed-off-by: Heiko Carstens +Reviewed-by: Martin Schwidefsky +Signed-off-by: Christian Borntraeger +Signed-off-by: Greg Kroah-Hartman + +--- + arch/s390/kvm/gaccess.c | 15 ++++++--------- + 1 file changed, 6 insertions(+), 9 deletions(-) + +--- a/arch/s390/kvm/gaccess.c ++++ b/arch/s390/kvm/gaccess.c +@@ -977,11 +977,12 @@ static int kvm_s390_shadow_tables(struct + ptr = asce.origin * 4096; + if (asce.r) { + *fake = 1; ++ ptr = 0; + asce.dt = ASCE_TYPE_REGION1; + } + switch (asce.dt) { + case ASCE_TYPE_REGION1: +- if (vaddr.rfx01 > asce.tl && !asce.r) ++ if (vaddr.rfx01 > asce.tl && !*fake) + return PGM_REGION_FIRST_TRANS; + break; + case ASCE_TYPE_REGION2: +@@ -1009,8 +1010,7 @@ static int kvm_s390_shadow_tables(struct + union region1_table_entry rfte; + + if (*fake) { +- /* offset in 16EB guest memory block */ +- ptr = ptr + ((unsigned long) vaddr.rsx << 53UL); ++ ptr += (unsigned long) vaddr.rfx << 53; + rfte.val = ptr; + goto shadow_r2t; + } +@@ -1036,8 +1036,7 @@ shadow_r2t: + union region2_table_entry rste; + + if (*fake) { +- /* offset in 8PB guest memory block */ +- ptr = ptr + ((unsigned long) vaddr.rtx << 42UL); ++ ptr += (unsigned long) vaddr.rsx << 42; + rste.val = ptr; + goto shadow_r3t; + } +@@ -1064,8 +1063,7 @@ shadow_r3t: + union region3_table_entry rtte; + + if (*fake) { +- /* offset in 4TB guest memory block */ +- ptr = ptr + ((unsigned long) vaddr.sx << 31UL); ++ ptr += (unsigned long) vaddr.rtx << 31; + rtte.val = ptr; + goto shadow_sgt; + } +@@ -1101,8 +1099,7 @@ shadow_sgt: + union segment_table_entry ste; + + if (*fake) { +- /* offset in 2G guest memory block */ +- ptr = ptr + ((unsigned long) vaddr.sx << 20UL); ++ ptr += (unsigned long) vaddr.sx << 20; + ste.val = ptr; + goto shadow_pgt; + } diff --git a/queue-4.11/kvm-x86-fix-singlestepping-over-syscall.patch b/queue-4.11/kvm-x86-fix-singlestepping-over-syscall.patch new file mode 100644 index 00000000000..5f6ac1e3430 --- /dev/null +++ b/queue-4.11/kvm-x86-fix-singlestepping-over-syscall.patch @@ -0,0 +1,151 @@ +From c8401dda2f0a00cd25c0af6a95ed50e478d25de4 Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Wed, 7 Jun 2017 15:13:14 +0200 +Subject: KVM: x86: fix singlestepping over syscall +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Paolo Bonzini + +commit c8401dda2f0a00cd25c0af6a95ed50e478d25de4 upstream. + +TF is handled a bit differently for syscall and sysret, compared +to the other instructions: TF is checked after the instruction completes, +so that the OS can disable #DB at a syscall by adding TF to FMASK. +When the sysret is executed the #DB is taken "as if" the syscall insn +just completed. + +KVM emulates syscall so that it can trap 32-bit syscall on Intel processors. +Fix the behavior, otherwise you could get #DB on a user stack which is not +nice. This does not affect Linux guests, as they use an IST or task gate +for #DB. + +This fixes CVE-2017-7518. + +Reported-by: Andy Lutomirski +Signed-off-by: Paolo Bonzini +Signed-off-by: Radim Krčmář +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/include/asm/kvm_emulate.h | 1 + arch/x86/kvm/emulate.c | 1 + arch/x86/kvm/x86.c | 62 +++++++++++++++++++------------------ + 3 files changed, 34 insertions(+), 30 deletions(-) + +--- a/arch/x86/include/asm/kvm_emulate.h ++++ b/arch/x86/include/asm/kvm_emulate.h +@@ -294,6 +294,7 @@ struct x86_emulate_ctxt { + + bool perm_ok; /* do not check permissions if true */ + bool ud; /* inject an #UD if host doesn't support insn */ ++ bool tf; /* TF value before instruction (after for syscall/sysret) */ + + bool have_exception; + struct x86_exception exception; +--- a/arch/x86/kvm/emulate.c ++++ b/arch/x86/kvm/emulate.c +@@ -2742,6 +2742,7 @@ static int em_syscall(struct x86_emulate + ctxt->eflags &= ~(X86_EFLAGS_VM | X86_EFLAGS_IF); + } + ++ ctxt->tf = (ctxt->eflags & X86_EFLAGS_TF) != 0; + return X86EMUL_CONTINUE; + } + +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -5330,6 +5330,8 @@ static void init_emulate_ctxt(struct kvm + kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); + + ctxt->eflags = kvm_get_rflags(vcpu); ++ ctxt->tf = (ctxt->eflags & X86_EFLAGS_TF) != 0; ++ + ctxt->eip = kvm_rip_read(vcpu); + ctxt->mode = (!is_protmode(vcpu)) ? X86EMUL_MODE_REAL : + (ctxt->eflags & X86_EFLAGS_VM) ? X86EMUL_MODE_VM86 : +@@ -5546,36 +5548,25 @@ static int kvm_vcpu_check_hw_bp(unsigned + return dr6; + } + +-static void kvm_vcpu_check_singlestep(struct kvm_vcpu *vcpu, unsigned long rflags, int *r) ++static void kvm_vcpu_do_singlestep(struct kvm_vcpu *vcpu, int *r) + { + struct kvm_run *kvm_run = vcpu->run; + +- /* +- * rflags is the old, "raw" value of the flags. The new value has +- * not been saved yet. +- * +- * This is correct even for TF set by the guest, because "the +- * processor will not generate this exception after the instruction +- * that sets the TF flag". +- */ +- if (unlikely(rflags & X86_EFLAGS_TF)) { +- if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) { +- kvm_run->debug.arch.dr6 = DR6_BS | DR6_FIXED_1 | +- DR6_RTM; +- kvm_run->debug.arch.pc = vcpu->arch.singlestep_rip; +- kvm_run->debug.arch.exception = DB_VECTOR; +- kvm_run->exit_reason = KVM_EXIT_DEBUG; +- *r = EMULATE_USER_EXIT; +- } else { +- /* +- * "Certain debug exceptions may clear bit 0-3. The +- * remaining contents of the DR6 register are never +- * cleared by the processor". +- */ +- vcpu->arch.dr6 &= ~15; +- vcpu->arch.dr6 |= DR6_BS | DR6_RTM; +- kvm_queue_exception(vcpu, DB_VECTOR); +- } ++ if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) { ++ kvm_run->debug.arch.dr6 = DR6_BS | DR6_FIXED_1 | DR6_RTM; ++ kvm_run->debug.arch.pc = vcpu->arch.singlestep_rip; ++ kvm_run->debug.arch.exception = DB_VECTOR; ++ kvm_run->exit_reason = KVM_EXIT_DEBUG; ++ *r = EMULATE_USER_EXIT; ++ } else { ++ /* ++ * "Certain debug exceptions may clear bit 0-3. The ++ * remaining contents of the DR6 register are never ++ * cleared by the processor". ++ */ ++ vcpu->arch.dr6 &= ~15; ++ vcpu->arch.dr6 |= DR6_BS | DR6_RTM; ++ kvm_queue_exception(vcpu, DB_VECTOR); + } + } + +@@ -5585,7 +5576,17 @@ int kvm_skip_emulated_instruction(struct + int r = EMULATE_DONE; + + kvm_x86_ops->skip_emulated_instruction(vcpu); +- kvm_vcpu_check_singlestep(vcpu, rflags, &r); ++ ++ /* ++ * rflags is the old, "raw" value of the flags. The new value has ++ * not been saved yet. ++ * ++ * This is correct even for TF set by the guest, because "the ++ * processor will not generate this exception after the instruction ++ * that sets the TF flag". ++ */ ++ if (unlikely(rflags & X86_EFLAGS_TF)) ++ kvm_vcpu_do_singlestep(vcpu, &r); + return r == EMULATE_DONE; + } + EXPORT_SYMBOL_GPL(kvm_skip_emulated_instruction); +@@ -5746,8 +5747,9 @@ restart: + if (vcpu->arch.hflags != ctxt->emul_flags) + kvm_set_hflags(vcpu, ctxt->emul_flags); + kvm_rip_write(vcpu, ctxt->eip); +- if (r == EMULATE_DONE) +- kvm_vcpu_check_singlestep(vcpu, rflags, &r); ++ if (r == EMULATE_DONE && ++ (ctxt->tf || (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP))) ++ kvm_vcpu_do_singlestep(vcpu, &r); + if (!ctxt->have_exception || + exception_type(ctxt->exception.vector) == EXCPT_TRAP) + __kvm_set_rflags(vcpu, ctxt->eflags); diff --git a/queue-4.11/lib-cmdline.c-fix-get_options-overflow-while-parsing-ranges.patch b/queue-4.11/lib-cmdline.c-fix-get_options-overflow-while-parsing-ranges.patch new file mode 100644 index 00000000000..c6c8678ca24 --- /dev/null +++ b/queue-4.11/lib-cmdline.c-fix-get_options-overflow-while-parsing-ranges.patch @@ -0,0 +1,53 @@ +From a91e0f680bcd9e10c253ae8b62462a38bd48f09f Mon Sep 17 00:00:00 2001 +From: Ilya Matveychikov +Date: Fri, 23 Jun 2017 15:08:49 -0700 +Subject: lib/cmdline.c: fix get_options() overflow while parsing ranges + +From: Ilya Matveychikov + +commit a91e0f680bcd9e10c253ae8b62462a38bd48f09f upstream. + +When using get_options() it's possible to specify a range of numbers, +like 1-100500. The problem is that it doesn't track array size while +calling internally to get_range() which iterates over the range and +fills the memory with numbers. + +Link: http://lkml.kernel.org/r/2613C75C-B04D-4BFF-82A6-12F97BA0F620@gmail.com +Signed-off-by: Ilya V. Matveychikov +Cc: Jonathan Corbet +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + lib/cmdline.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +--- a/lib/cmdline.c ++++ b/lib/cmdline.c +@@ -22,14 +22,14 @@ + * the values[M, M+1, ..., N] into the ints array in get_options. + */ + +-static int get_range(char **str, int *pint) ++static int get_range(char **str, int *pint, int n) + { + int x, inc_counter, upper_range; + + (*str)++; + upper_range = simple_strtol((*str), NULL, 0); + inc_counter = upper_range - *pint; +- for (x = *pint; x < upper_range; x++) ++ for (x = *pint; n && x < upper_range; x++, n--) + *pint++ = x; + return inc_counter; + } +@@ -96,7 +96,7 @@ char *get_options(const char *str, int n + break; + if (res == 3) { + int range_nums; +- range_nums = get_range((char **)&str, ints + i); ++ range_nums = get_range((char **)&str, ints + i, nints - i); + if (range_nums < 0) + break; + /* diff --git a/queue-4.11/perf-probe-fix-probe-definition-for-inlined-functions.patch b/queue-4.11/perf-probe-fix-probe-definition-for-inlined-functions.patch new file mode 100644 index 00000000000..3eaf3f1f5b3 --- /dev/null +++ b/queue-4.11/perf-probe-fix-probe-definition-for-inlined-functions.patch @@ -0,0 +1,165 @@ +From 7598f8bc1383ffd77686cb4e92e749bef3c75937 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= +Date: Wed, 21 Jun 2017 18:41:34 +0200 +Subject: perf probe: Fix probe definition for inlined functions +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Björn Töpel + +commit 7598f8bc1383ffd77686cb4e92e749bef3c75937 upstream. + +In commit 613f050d68a8 ("perf probe: Fix to probe on gcc generated +functions in modules"), the offset from symbol is, incorrectly, added +to the trace point address. This leads to incorrect probe trace points +for inlined functions and when using relative line number on symbols. + +Prior this patch: + $ perf probe -m nf_nat -D in_range + p:probe/in_range nf_nat:in_range.isra.9+0 + $ perf probe -m i40e -D i40e_clean_rx_irq + p:probe/i40e_clean_rx_irq i40e:i40e_napi_poll+2212 + $ perf probe -m i40e -D i40e_clean_rx_irq:16 + p:probe/i40e_clean_rx_irq i40e:i40e_lan_xmit_frame+626 + +After: + $ perf probe -m nf_nat -D in_range + p:probe/in_range nf_nat:in_range.isra.9+0 + $ perf probe -m i40e -D i40e_clean_rx_irq + p:probe/i40e_clean_rx_irq i40e:i40e_napi_poll+1106 + $ perf probe -m i40e -D i40e_clean_rx_irq:16 + p:probe/i40e_clean_rx_irq i40e:i40e_napi_poll+2665 + +Committer testing: + +Using 'pfunct', a tool found in the 'dwarves' package [1], one can ask what are +the functions that while not being explicitely marked as inline, were inlined +by the compiler: + + # pfunct --cc_inlined /lib/modules/4.12.0-rc4+/kernel/drivers/net/ethernet/intel/e1000e/e1000e.ko | head + __ew32 + e1000_regdump + e1000e_dump_ps_pages + e1000_desc_unused + e1000e_systim_to_hwtstamp + e1000e_rx_hwtstamp + e1000e_update_rdt_wa + e1000e_update_tdt_wa + e1000_put_txbuf + e1000_consume_page + +Then ask 'perf probe' to produce the kprobe_tracer probe definitions for two of +them: + + # perf probe -m e1000e -D e1000e_rx_hwtstamp + p:probe/e1000e_rx_hwtstamp e1000e:e1000_receive_skb+74 + + # perf probe -m e1000e -D e1000_consume_page + p:probe/e1000_consume_page e1000e:e1000_clean_jumbo_rx_irq+876 + p:probe/e1000_consume_page_1 e1000e:e1000_clean_jumbo_rx_irq+1506 + p:probe/e1000_consume_page_2 e1000e:e1000_clean_rx_irq_ps+1074 + +Now lets concentrate on the 'e1000_consume_page' one, that was inlined twice in +e1000_clean_jumbo_rx_irq(), lets see what readelf says about the DWARF tags for +that function: + + $ readelf -wi /lib/modules/4.12.0-rc4+/kernel/drivers/net/ethernet/intel/e1000e/e1000e.ko + + <1><13e27b>: Abbrev Number: 121 (DW_TAG_subprogram) + <13e27c> DW_AT_name : (indirect string, offset: 0xa8945): e1000_clean_jumbo_rx_irq + <13e287> DW_AT_low_pc : 0x17a30 + <3><13e6ef>: Abbrev Number: 119 (DW_TAG_inlined_subroutine) + <13e6f0> DW_AT_abstract_origin: <0x13ed2c> + <13e6f4> DW_AT_low_pc : 0x17be6 + + <1><13ed2c>: Abbrev Number: 142 (DW_TAG_subprogram) + <13ed2e> DW_AT_name : (indirect string, offset: 0xa54c3): e1000_consume_page + +So, the first time in e1000_clean_jumbo_rx_irq() where e1000_consume_page() is +inlined is at PC 0x17be6, which subtracted from e1000_clean_jumbo_rx_irq()'s +address, gives us the offset we should use in the probe definition: + + 0x17be6 - 0x17a30 = 438 + +but above we have 876, which is twice as much. + +Lets see the second inline expansion of e1000_consume_page() in +e1000_clean_jumbo_rx_irq(): + + <3><13e86e>: Abbrev Number: 119 (DW_TAG_inlined_subroutine) + <13e86f> DW_AT_abstract_origin: <0x13ed2c> + <13e873> DW_AT_low_pc : 0x17d21 + + 0x17d21 - 0x17a30 = 753 + +So we where adding it at twice the offset from the containing function as we +should. + +And then after this patch: + + # perf probe -m e1000e -D e1000e_rx_hwtstamp + p:probe/e1000e_rx_hwtstamp e1000e:e1000_receive_skb+37 + + # perf probe -m e1000e -D e1000_consume_page + p:probe/e1000_consume_page e1000e:e1000_clean_jumbo_rx_irq+438 + p:probe/e1000_consume_page_1 e1000e:e1000_clean_jumbo_rx_irq+753 + p:probe/e1000_consume_page_2 e1000e:e1000_clean_jumbo_rx_irq+1353 + # + +Which matches the two first expansions and shows that because we were +doubling the offset it would spill over the next function: + + readelf -sw /lib/modules/4.12.0-rc4+/kernel/drivers/net/ethernet/intel/e1000e/e1000e.ko + 673: 0000000000017a30 1626 FUNC LOCAL DEFAULT 2 e1000_clean_jumbo_rx_irq + 674: 0000000000018090 2013 FUNC LOCAL DEFAULT 2 e1000_clean_rx_irq_ps + +This is the 3rd inline expansion of e1000_consume_page() in +e1000_clean_jumbo_rx_irq(): + + <3><13ec77>: Abbrev Number: 119 (DW_TAG_inlined_subroutine) + <13ec78> DW_AT_abstract_origin: <0x13ed2c> + <13ec7c> DW_AT_low_pc : 0x17f79 + + 0x17f79 - 0x17a30 = 1353 + + So: + + 0x17a30 + 2 * 1353 = 0x184c2 + + And: + + 0x184c2 - 0x18090 = 1074 + +Which explains the bogus third expansion for e1000_consume_page() to end up at: + + p:probe/e1000_consume_page_2 e1000e:e1000_clean_rx_irq_ps+1074 + +All fixed now :-) + +[1] https://git.kernel.org/pub/scm/devel/pahole/pahole.git/ + +Signed-off-by: Björn Töpel +Tested-by: Arnaldo Carvalho de Melo +Acked-by: Magnus Karlsson +Acked-by: Masami Hiramatsu +Fixes: 613f050d68a8 ("perf probe: Fix to probe on gcc generated functions in modules") +Link: http://lkml.kernel.org/r/20170621164134.5701-1-bjorn.topel@gmail.com +Signed-off-by: Arnaldo Carvalho de Melo +Signed-off-by: Greg Kroah-Hartman + +--- + tools/perf/util/probe-event.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/tools/perf/util/probe-event.c ++++ b/tools/perf/util/probe-event.c +@@ -615,7 +615,7 @@ static int post_process_probe_trace_poin + struct map *map, unsigned long offs) + { + struct symbol *sym; +- u64 addr = tp->address + tp->offset - offs; ++ u64 addr = tp->address - offs; + + sym = map__find_symbol(map, addr); + if (!sym) diff --git a/queue-4.11/perf-x86-intel-add-1g-dtlb-load-store-miss-support-for-skl.patch b/queue-4.11/perf-x86-intel-add-1g-dtlb-load-store-miss-support-for-skl.patch new file mode 100644 index 00000000000..bafdb4fcd38 --- /dev/null +++ b/queue-4.11/perf-x86-intel-add-1g-dtlb-load-store-miss-support-for-skl.patch @@ -0,0 +1,50 @@ +From fb3a5055cd7098f8d1dd0cd38d7172211113255f Mon Sep 17 00:00:00 2001 +From: Kan Liang +Date: Mon, 19 Jun 2017 07:26:09 -0700 +Subject: perf/x86/intel: Add 1G DTLB load/store miss support for SKL + +From: Kan Liang + +commit fb3a5055cd7098f8d1dd0cd38d7172211113255f upstream. + +Current DTLB load/store miss events (0x608/0x649) only counts 4K,2M and +4M page size. +Need to extend the events to support any page size (4K/2M/4M/1G). + +The complete DTLB load/store miss events are: + + DTLB_LOAD_MISSES.WALK_COMPLETED 0xe08 + DTLB_STORE_MISSES.WALK_COMPLETED 0xe49 + +Signed-off-by: Kan Liang +Cc: Alexander Shishkin +Cc: Arnaldo Carvalho de Melo +Cc: Jiri Olsa +Cc: Linus Torvalds +Cc: Peter Zijlstra +Cc: Thomas Gleixner +Cc: eranian@google.com +Link: http://lkml.kernel.org/r/20170619142609.11058-1-kan.liang@intel.com +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/events/intel/core.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/arch/x86/events/intel/core.c ++++ b/arch/x86/events/intel/core.c +@@ -431,11 +431,11 @@ static __initconst const u64 skl_hw_cach + [ C(DTLB) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x81d0, /* MEM_INST_RETIRED.ALL_LOADS */ +- [ C(RESULT_MISS) ] = 0x608, /* DTLB_LOAD_MISSES.WALK_COMPLETED */ ++ [ C(RESULT_MISS) ] = 0xe08, /* DTLB_LOAD_MISSES.WALK_COMPLETED */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0x82d0, /* MEM_INST_RETIRED.ALL_STORES */ +- [ C(RESULT_MISS) ] = 0x649, /* DTLB_STORE_MISSES.WALK_COMPLETED */ ++ [ C(RESULT_MISS) ] = 0xe49, /* DTLB_STORE_MISSES.WALK_COMPLETED */ + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0x0, diff --git a/queue-4.11/powerpc-perf-fix-oops-when-kthread-execs-user-process.patch b/queue-4.11/powerpc-perf-fix-oops-when-kthread-execs-user-process.patch new file mode 100644 index 00000000000..7afebd99741 --- /dev/null +++ b/queue-4.11/powerpc-perf-fix-oops-when-kthread-execs-user-process.patch @@ -0,0 +1,63 @@ +From bf05fc25f268cd62f147f368fe65ad3e5b04fe9f Mon Sep 17 00:00:00 2001 +From: Ravi Bangoria +Date: Thu, 15 Jun 2017 19:16:48 +0530 +Subject: powerpc/perf: Fix oops when kthread execs user process + +From: Ravi Bangoria + +commit bf05fc25f268cd62f147f368fe65ad3e5b04fe9f upstream. + +When a kthread calls call_usermodehelper() the steps are: + 1. allocate current->mm + 2. load_elf_binary() + 3. populate current->thread.regs + +While doing this, interrupts are not disabled. If there is a perf +interrupt in the middle of this process (i.e. step 1 has completed +but not yet reached to step 3) and if perf tries to read userspace +regs, kernel oops with following log: + + Unable to handle kernel paging request for data at address 0x00000000 + Faulting instruction address: 0xc0000000000da0fc + ... + Call Trace: + perf_output_sample_regs+0x6c/0xd0 + perf_output_sample+0x4e4/0x830 + perf_event_output_forward+0x64/0x90 + __perf_event_overflow+0x8c/0x1e0 + record_and_restart+0x220/0x5c0 + perf_event_interrupt+0x2d8/0x4d0 + performance_monitor_exception+0x54/0x70 + performance_monitor_common+0x158/0x160 + --- interrupt: f01 at avtab_search_node+0x150/0x1a0 + LR = avtab_search_node+0x100/0x1a0 + ... + load_elf_binary+0x6e8/0x15a0 + search_binary_handler+0xe8/0x290 + do_execveat_common.isra.14+0x5f4/0x840 + call_usermodehelper_exec_async+0x170/0x210 + ret_from_kernel_thread+0x5c/0x7c + +Fix it by setting abi to PERF_SAMPLE_REGS_ABI_NONE when userspace +pt_regs are not set. + +Fixes: ed4a4ef85cf5 ("powerpc/perf: Add support for sampling interrupt register state") +Signed-off-by: Ravi Bangoria +Acked-by: Naveen N. Rao +Signed-off-by: Michael Ellerman +Signed-off-by: Greg Kroah-Hartman + +--- + arch/powerpc/perf/perf_regs.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/arch/powerpc/perf/perf_regs.c ++++ b/arch/powerpc/perf/perf_regs.c +@@ -101,5 +101,6 @@ void perf_get_regs_user(struct perf_regs + struct pt_regs *regs_user_copy) + { + regs_user->regs = task_pt_regs(current); +- regs_user->abi = perf_reg_abi(current); ++ regs_user->abi = (regs_user->regs) ? perf_reg_abi(current) : ++ PERF_SAMPLE_REGS_ABI_NONE; + } diff --git a/queue-4.11/random-silence-compiler-warnings-and-fix-race.patch b/queue-4.11/random-silence-compiler-warnings-and-fix-race.patch new file mode 100644 index 00000000000..8f700dcd24f --- /dev/null +++ b/queue-4.11/random-silence-compiler-warnings-and-fix-race.patch @@ -0,0 +1,87 @@ +From 4a072c71f49b0a0e495ea13423bdb850da73c58c Mon Sep 17 00:00:00 2001 +From: "Jason A. Donenfeld" +Date: Thu, 15 Jun 2017 00:45:26 +0200 +Subject: random: silence compiler warnings and fix race + +From: Jason A. Donenfeld + +commit 4a072c71f49b0a0e495ea13423bdb850da73c58c upstream. + +Odd versions of gcc for the sh4 architecture will actually warn about +flags being used while uninitialized, so we set them to zero. Non crazy +gccs will optimize that out again, so it doesn't make a difference. + +Next, over aggressive gccs could inline the expression that defines +use_lock, which could then introduce a race resulting in a lock +imbalance. By using READ_ONCE, we prevent that fate. Finally, we make +that assignment const, so that gcc can still optimize a nice amount. + +Finally, we fix a potential deadlock between primary_crng.lock and +batched_entropy_reset_lock, where they could be called in opposite +order. Moving the call to invalidate_batched_entropy to outside the lock +rectifies this issue. + +Fixes: b169c13de473a85b3c859bb36216a4cb5f00a54a +Signed-off-by: Jason A. Donenfeld +Signed-off-by: Theodore Ts'o +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/char/random.c | 12 ++++++------ + 1 file changed, 6 insertions(+), 6 deletions(-) + +--- a/drivers/char/random.c ++++ b/drivers/char/random.c +@@ -803,13 +803,13 @@ static int crng_fast_load(const char *cp + p[crng_init_cnt % CHACHA20_KEY_SIZE] ^= *cp; + cp++; crng_init_cnt++; len--; + } ++ spin_unlock_irqrestore(&primary_crng.lock, flags); + if (crng_init_cnt >= CRNG_INIT_CNT_THRESH) { + invalidate_batched_entropy(); + crng_init = 1; + wake_up_interruptible(&crng_init_wait); + pr_notice("random: fast init done\n"); + } +- spin_unlock_irqrestore(&primary_crng.lock, flags); + return 1; + } + +@@ -841,6 +841,7 @@ static void crng_reseed(struct crng_stat + } + memzero_explicit(&buf, sizeof(buf)); + crng->init_time = jiffies; ++ spin_unlock_irqrestore(&primary_crng.lock, flags); + if (crng == &primary_crng && crng_init < 2) { + invalidate_batched_entropy(); + crng_init = 2; +@@ -848,7 +849,6 @@ static void crng_reseed(struct crng_stat + wake_up_interruptible(&crng_init_wait); + pr_notice("random: crng init done\n"); + } +- spin_unlock_irqrestore(&primary_crng.lock, flags); + } + + static inline void crng_wait_ready(void) +@@ -2037,8 +2037,8 @@ static DEFINE_PER_CPU(struct batched_ent + u64 get_random_u64(void) + { + u64 ret; +- bool use_lock = crng_init < 2; +- unsigned long flags; ++ bool use_lock = READ_ONCE(crng_init) < 2; ++ unsigned long flags = 0; + struct batched_entropy *batch; + + #if BITS_PER_LONG == 64 +@@ -2069,8 +2069,8 @@ static DEFINE_PER_CPU(struct batched_ent + u32 get_random_u32(void) + { + u32 ret; +- bool use_lock = crng_init < 2; +- unsigned long flags; ++ bool use_lock = READ_ONCE(crng_init) < 2; ++ unsigned long flags = 0; + struct batched_entropy *batch; + + if (arch_get_random_int(&ret)) diff --git a/queue-4.11/signal-only-reschedule-timers-on-signals-timers-have-sent.patch b/queue-4.11/signal-only-reschedule-timers-on-signals-timers-have-sent.patch new file mode 100644 index 00000000000..8e30af6b592 --- /dev/null +++ b/queue-4.11/signal-only-reschedule-timers-on-signals-timers-have-sent.patch @@ -0,0 +1,152 @@ +From 57db7e4a2d92c2d3dfbca4ef8057849b2682436b Mon Sep 17 00:00:00 2001 +From: "Eric W. Biederman" +Date: Tue, 13 Jun 2017 04:31:16 -0500 +Subject: signal: Only reschedule timers on signals timers have sent + +From: Eric W. Biederman + +commit 57db7e4a2d92c2d3dfbca4ef8057849b2682436b upstream. + +Thomas Gleixner wrote: +> The CRIU support added a 'feature' which allows a user space task to send +> arbitrary (kernel) signals to itself. The changelog says: +> +> The kernel prevents sending of siginfo with positive si_code, because +> these codes are reserved for kernel. I think we can allow a task to +> send such a siginfo to itself. This operation should not be dangerous. +> +> Quite contrary to that claim, it turns out that it is outright dangerous +> for signals with info->si_code == SI_TIMER. The following code sequence in +> a user space task allows to crash the kernel: +> +> id = timer_create(CLOCK_XXX, ..... signo = SIGX); +> timer_set(id, ....); +> info->si_signo = SIGX; +> info->si_code = SI_TIMER: +> info->_sifields._timer._tid = id; +> info->_sifields._timer._sys_private = 2; +> rt_[tg]sigqueueinfo(..., SIGX, info); +> sigemptyset(&sigset); +> sigaddset(&sigset, SIGX); +> rt_sigtimedwait(sigset, info); +> +> For timers based on CLOCK_PROCESS_CPUTIME_ID, CLOCK_THREAD_CPUTIME_ID this +> results in a kernel crash because sigwait() dequeues the signal and the +> dequeue code observes: +> +> info->si_code == SI_TIMER && info->_sifields._timer._sys_private != 0 +> +> which triggers the following callchain: +> +> do_schedule_next_timer() -> posix_cpu_timer_schedule() -> arm_timer() +> +> arm_timer() executes a list_add() on the timer, which is already armed via +> the timer_set() syscall. That's a double list add which corrupts the posix +> cpu timer list. As a consequence the kernel crashes on the next operation +> touching the posix cpu timer list. +> +> Posix clocks which are internally implemented based on hrtimers are not +> affected by this because hrtimer_start() can handle already armed timers +> nicely, but it's a reliable way to trigger the WARN_ON() in +> hrtimer_forward(), which complains about calling that function on an +> already armed timer. + +This problem has existed since the posix timer code was merged into +2.5.63. A few releases earlier in 2.5.60 ptrace gained the ability to +inject not just a signal (which linux has supported since 1.0) but the +full siginfo of a signal. + +The core problem is that the code will reschedule in response to +signals getting dequeued not just for signals the timers sent but +for other signals that happen to a si_code of SI_TIMER. + +Avoid this confusion by testing to see if the queued signal was +preallocated as all timer signals are preallocated, and so far +only the timer code preallocates signals. + +Move the check for if a timer needs to be rescheduled up into +collect_signal where the preallocation check must be performed, +and pass the result back to dequeue_signal where the code reschedules +timers. This makes it clear why the code cares about preallocated +timers. + +Reported-by: Thomas Gleixner +History Tree: https://git.kernel.org/pub/scm/linux/kernel/git/tglx/history.git +Reference: 66dd34ad31e5 ("signal: allow to send any siginfo to itself") +Reference: 1669ce53e2ff ("Add PTRACE_GETSIGINFO and PTRACE_SETSIGINFO") +Fixes: db8b50ba75f2 ("[PATCH] POSIX clocks & timers") +Signed-off-by: "Eric W. Biederman" +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/signal.c | 20 ++++++++++++++------ + 1 file changed, 14 insertions(+), 6 deletions(-) + +--- a/kernel/signal.c ++++ b/kernel/signal.c +@@ -510,7 +510,8 @@ int unhandled_signal(struct task_struct + return !tsk->ptrace; + } + +-static void collect_signal(int sig, struct sigpending *list, siginfo_t *info) ++static void collect_signal(int sig, struct sigpending *list, siginfo_t *info, ++ bool *resched_timer) + { + struct sigqueue *q, *first = NULL; + +@@ -532,6 +533,12 @@ static void collect_signal(int sig, stru + still_pending: + list_del_init(&first->list); + copy_siginfo(info, &first->info); ++ ++ *resched_timer = ++ (first->flags & SIGQUEUE_PREALLOC) && ++ (info->si_code == SI_TIMER) && ++ (info->si_sys_private); ++ + __sigqueue_free(first); + } else { + /* +@@ -548,12 +555,12 @@ still_pending: + } + + static int __dequeue_signal(struct sigpending *pending, sigset_t *mask, +- siginfo_t *info) ++ siginfo_t *info, bool *resched_timer) + { + int sig = next_signal(pending, mask); + + if (sig) +- collect_signal(sig, pending, info); ++ collect_signal(sig, pending, info, resched_timer); + return sig; + } + +@@ -565,15 +572,16 @@ static int __dequeue_signal(struct sigpe + */ + int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info) + { ++ bool resched_timer = false; + int signr; + + /* We only dequeue private signals from ourselves, we don't let + * signalfd steal them + */ +- signr = __dequeue_signal(&tsk->pending, mask, info); ++ signr = __dequeue_signal(&tsk->pending, mask, info, &resched_timer); + if (!signr) { + signr = __dequeue_signal(&tsk->signal->shared_pending, +- mask, info); ++ mask, info, &resched_timer); + #ifdef CONFIG_POSIX_TIMERS + /* + * itimer signal ? +@@ -621,7 +629,7 @@ int dequeue_signal(struct task_struct *t + current->jobctl |= JOBCTL_STOP_DEQUEUED; + } + #ifdef CONFIG_POSIX_TIMERS +- if ((info->si_code & __SI_MASK) == __SI_TIMER && info->si_sys_private) { ++ if (resched_timer) { + /* + * Release the siglock to ensure proper locking order + * of timer locks outside of siglocks. Note, we leave diff --git a/queue-4.11/xen-blkback-don-t-leak-stack-data-via-response-ring.patch b/queue-4.11/xen-blkback-don-t-leak-stack-data-via-response-ring.patch new file mode 100644 index 00000000000..8a5ac1b609d --- /dev/null +++ b/queue-4.11/xen-blkback-don-t-leak-stack-data-via-response-ring.patch @@ -0,0 +1,126 @@ +From 089bc0143f489bd3a4578bdff5f4ca68fb26f341 Mon Sep 17 00:00:00 2001 +From: Jan Beulich +Date: Tue, 13 Jun 2017 16:28:27 -0400 +Subject: xen-blkback: don't leak stack data via response ring + +From: Jan Beulich + +commit 089bc0143f489bd3a4578bdff5f4ca68fb26f341 upstream. + +Rather than constructing a local structure instance on the stack, fill +the fields directly on the shared ring, just like other backends do. +Build on the fact that all response structure flavors are actually +identical (the old code did make this assumption too). + +This is XSA-216. + +Signed-off-by: Jan Beulich +Reviewed-by: Konrad Rzeszutek Wilk +Signed-off-by: Konrad Rzeszutek Wilk +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/block/xen-blkback/blkback.c | 23 ++++++++++++----------- + drivers/block/xen-blkback/common.h | 25 +++++-------------------- + 2 files changed, 17 insertions(+), 31 deletions(-) + +--- a/drivers/block/xen-blkback/blkback.c ++++ b/drivers/block/xen-blkback/blkback.c +@@ -1436,34 +1436,35 @@ static int dispatch_rw_block_io(struct x + static void make_response(struct xen_blkif_ring *ring, u64 id, + unsigned short op, int st) + { +- struct blkif_response resp; ++ struct blkif_response *resp; + unsigned long flags; + union blkif_back_rings *blk_rings; + int notify; + +- resp.id = id; +- resp.operation = op; +- resp.status = st; +- + spin_lock_irqsave(&ring->blk_ring_lock, flags); + blk_rings = &ring->blk_rings; + /* Place on the response ring for the relevant domain. */ + switch (ring->blkif->blk_protocol) { + case BLKIF_PROTOCOL_NATIVE: +- memcpy(RING_GET_RESPONSE(&blk_rings->native, blk_rings->native.rsp_prod_pvt), +- &resp, sizeof(resp)); ++ resp = RING_GET_RESPONSE(&blk_rings->native, ++ blk_rings->native.rsp_prod_pvt); + break; + case BLKIF_PROTOCOL_X86_32: +- memcpy(RING_GET_RESPONSE(&blk_rings->x86_32, blk_rings->x86_32.rsp_prod_pvt), +- &resp, sizeof(resp)); ++ resp = RING_GET_RESPONSE(&blk_rings->x86_32, ++ blk_rings->x86_32.rsp_prod_pvt); + break; + case BLKIF_PROTOCOL_X86_64: +- memcpy(RING_GET_RESPONSE(&blk_rings->x86_64, blk_rings->x86_64.rsp_prod_pvt), +- &resp, sizeof(resp)); ++ resp = RING_GET_RESPONSE(&blk_rings->x86_64, ++ blk_rings->x86_64.rsp_prod_pvt); + break; + default: + BUG(); + } ++ ++ resp->id = id; ++ resp->operation = op; ++ resp->status = st; ++ + blk_rings->common.rsp_prod_pvt++; + RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&blk_rings->common, notify); + spin_unlock_irqrestore(&ring->blk_ring_lock, flags); +--- a/drivers/block/xen-blkback/common.h ++++ b/drivers/block/xen-blkback/common.h +@@ -75,9 +75,8 @@ extern unsigned int xenblk_max_queues; + struct blkif_common_request { + char dummy; + }; +-struct blkif_common_response { +- char dummy; +-}; ++ ++/* i386 protocol version */ + + struct blkif_x86_32_request_rw { + uint8_t nr_segments; /* number of segments */ +@@ -129,14 +128,6 @@ struct blkif_x86_32_request { + } u; + } __attribute__((__packed__)); + +-/* i386 protocol version */ +-#pragma pack(push, 4) +-struct blkif_x86_32_response { +- uint64_t id; /* copied from request */ +- uint8_t operation; /* copied from request */ +- int16_t status; /* BLKIF_RSP_??? */ +-}; +-#pragma pack(pop) + /* x86_64 protocol version */ + + struct blkif_x86_64_request_rw { +@@ -193,18 +184,12 @@ struct blkif_x86_64_request { + } u; + } __attribute__((__packed__)); + +-struct blkif_x86_64_response { +- uint64_t __attribute__((__aligned__(8))) id; +- uint8_t operation; /* copied from request */ +- int16_t status; /* BLKIF_RSP_??? */ +-}; +- + DEFINE_RING_TYPES(blkif_common, struct blkif_common_request, +- struct blkif_common_response); ++ struct blkif_response); + DEFINE_RING_TYPES(blkif_x86_32, struct blkif_x86_32_request, +- struct blkif_x86_32_response); ++ struct blkif_response __packed); + DEFINE_RING_TYPES(blkif_x86_64, struct blkif_x86_64_request, +- struct blkif_x86_64_response); ++ struct blkif_response); + + union blkif_back_rings { + struct blkif_back_ring native; diff --git a/queue-4.11/xen-blkback-fix-disconnect-while-i-os-in-flight.patch b/queue-4.11/xen-blkback-fix-disconnect-while-i-os-in-flight.patch new file mode 100644 index 00000000000..97cf45b8422 --- /dev/null +++ b/queue-4.11/xen-blkback-fix-disconnect-while-i-os-in-flight.patch @@ -0,0 +1,82 @@ +From 46464411307746e6297a034a9983a22c9dfc5a0c Mon Sep 17 00:00:00 2001 +From: Juergen Gross +Date: Thu, 18 May 2017 17:28:47 +0200 +Subject: xen/blkback: fix disconnect while I/Os in flight +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Juergen Gross + +commit 46464411307746e6297a034a9983a22c9dfc5a0c upstream. + +Today disconnecting xen-blkback is broken in case there are still +I/Os in flight: xen_blkif_disconnect() will bail out early without +releasing all resources in the hope it will be called again when +the last request has terminated. This, however, won't happen as +xen_blkif_free() won't be called on termination of the last running +request: xen_blkif_put() won't decrement the blkif refcnt to 0 as +xen_blkif_disconnect() didn't finish before thus some xen_blkif_put() +calls in xen_blkif_disconnect() didn't happen. + +To solve this deadlock xen_blkif_disconnect() and +xen_blkif_alloc_rings() shouldn't use xen_blkif_put() and +xen_blkif_get() but use some other way to do their accounting of +resources. + +This at once fixes another error in xen_blkif_disconnect(): when it +returned early with -EBUSY for another ring than 0 it would call +xen_blkif_put() again for already handled rings on a subsequent call. +This will lead to inconsistencies in the refcnt handling. + +Signed-off-by: Juergen Gross +Tested-by: Steven Haigh +Acked-by: Roger Pau Monné +Signed-off-by: Konrad Rzeszutek Wilk +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/block/xen-blkback/common.h | 1 + + drivers/block/xen-blkback/xenbus.c | 7 +++++-- + 2 files changed, 6 insertions(+), 2 deletions(-) + +--- a/drivers/block/xen-blkback/common.h ++++ b/drivers/block/xen-blkback/common.h +@@ -281,6 +281,7 @@ struct xen_blkif_ring { + + wait_queue_head_t wq; + atomic_t inflight; ++ bool active; + /* One thread per blkif ring. */ + struct task_struct *xenblkd; + unsigned int waiting_reqs; +--- a/drivers/block/xen-blkback/xenbus.c ++++ b/drivers/block/xen-blkback/xenbus.c +@@ -159,7 +159,7 @@ static int xen_blkif_alloc_rings(struct + init_waitqueue_head(&ring->shutdown_wq); + ring->blkif = blkif; + ring->st_print = jiffies; +- xen_blkif_get(blkif); ++ ring->active = true; + } + + return 0; +@@ -249,6 +249,9 @@ static int xen_blkif_disconnect(struct x + struct xen_blkif_ring *ring = &blkif->rings[r]; + unsigned int i = 0; + ++ if (!ring->active) ++ continue; ++ + if (ring->xenblkd) { + kthread_stop(ring->xenblkd); + wake_up(&ring->shutdown_wq); +@@ -296,7 +299,7 @@ static int xen_blkif_disconnect(struct x + BUG_ON(ring->free_pages_num != 0); + BUG_ON(ring->persistent_gnt_c != 0); + WARN_ON(i != (XEN_BLKIF_REQS_PER_PAGE * blkif->nr_ring_pages)); +- xen_blkif_put(blkif); ++ ring->active = false; + } + blkif->nr_ring_pages = 0; + /*