From: Greg Kroah-Hartman Date: Mon, 12 Jun 2017 12:49:16 +0000 (+0200) Subject: 4.4-stable patches X-Git-Tag: v3.18.57~12 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=a88132282d55c16404a4e22a40d5cd228eaf11b8;p=thirdparty%2Fkernel%2Fstable-queue.git 4.4-stable patches added patches: alsa-timer-fix-missing-queue-indices-reset-at-sndrv_timer_ioctl_select.patch alsa-timer-fix-race-between-read-and-ioctl.patch asoc-fix-use-after-free-at-card-unregistration.patch drivers-char-mem-fix-wraparound-check-to-allow-mappings-up-to-the-end.patch drm-nouveau-tmr-fully-separate-alarm-execution-pending-lists.patch drm-vmwgfx-handle-vmalloc-failure-in-vmw_local_fifo_reserve.patch drm-vmwgfx-limit-the-number-of-mip-levels-in-vmw_gb_surface_define_ioctl.patch drm-vmwgfx-make-sure-backup_handle-is-always-valid.patch kvm-arm-arm64-handle-possible-null-stage2-pud-when-ageing-pages.patch mm-consider-memblock-reservations-for-deferred-memory-initialization-sizing.patch net-better-skb-sender_cpu-and-skb-napi_id-cohabitation.patch nfs-ensure-we-revalidate-attributes-before-using-execute_ok.patch nfsv4-don-t-perform-cached-access-checks-before-we-ve-opened-the-file.patch perf-core-drop-kernel-samples-even-though-u-is-specified.patch powerpc-eeh-avoid-use-after-free-in-eeh_handle_special_event.patch powerpc-hotplug-mem-fix-missing-endian-conversion-of-aa_index.patch powerpc-numa-fix-percpu-allocations-to-be-numa-aware.patch scsi-qla2xxx-don-t-disable-a-not-previously-enabled-pci-device.patch serial-sh-sci-fix-panic-when-serial-console-and-dma-are-enabled.patch tty-drop-krefs-for-interrupted-tty-lock.patch --- diff --git a/queue-4.4/alsa-timer-fix-missing-queue-indices-reset-at-sndrv_timer_ioctl_select.patch b/queue-4.4/alsa-timer-fix-missing-queue-indices-reset-at-sndrv_timer_ioctl_select.patch new file mode 100644 index 00000000000..63219e10d00 --- /dev/null +++ b/queue-4.4/alsa-timer-fix-missing-queue-indices-reset-at-sndrv_timer_ioctl_select.patch @@ -0,0 +1,55 @@ +From ba3021b2c79b2fa9114f92790a99deb27a65b728 Mon Sep 17 00:00:00 2001 +From: Takashi Iwai +Date: Fri, 2 Jun 2017 17:26:56 +0200 +Subject: ALSA: timer: Fix missing queue indices reset at SNDRV_TIMER_IOCTL_SELECT + +From: Takashi Iwai + +commit ba3021b2c79b2fa9114f92790a99deb27a65b728 upstream. + +snd_timer_user_tselect() reallocates the queue buffer dynamically, but +it forgot to reset its indices. Since the read may happen +concurrently with ioctl and snd_timer_user_tselect() allocates the +buffer via kmalloc(), this may lead to the leak of uninitialized +kernel-space data, as spotted via KMSAN: + + BUG: KMSAN: use of unitialized memory in snd_timer_user_read+0x6c4/0xa10 + CPU: 0 PID: 1037 Comm: probe Not tainted 4.11.0-rc5+ #2739 + Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 + Call Trace: + __dump_stack lib/dump_stack.c:16 + dump_stack+0x143/0x1b0 lib/dump_stack.c:52 + kmsan_report+0x12a/0x180 mm/kmsan/kmsan.c:1007 + kmsan_check_memory+0xc2/0x140 mm/kmsan/kmsan.c:1086 + copy_to_user ./arch/x86/include/asm/uaccess.h:725 + snd_timer_user_read+0x6c4/0xa10 sound/core/timer.c:2004 + do_loop_readv_writev fs/read_write.c:716 + __do_readv_writev+0x94c/0x1380 fs/read_write.c:864 + do_readv_writev fs/read_write.c:894 + vfs_readv fs/read_write.c:908 + do_readv+0x52a/0x5d0 fs/read_write.c:934 + SYSC_readv+0xb6/0xd0 fs/read_write.c:1021 + SyS_readv+0x87/0xb0 fs/read_write.c:1018 + +This patch adds the missing reset of queue indices. Together with the +previous fix for the ioctl/read race, we cover the whole problem. + +Reported-by: Alexander Potapenko +Tested-by: Alexander Potapenko +Signed-off-by: Takashi Iwai +Signed-off-by: Greg Kroah-Hartman + +--- + sound/core/timer.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/sound/core/timer.c ++++ b/sound/core/timer.c +@@ -1621,6 +1621,7 @@ static int snd_timer_user_tselect(struct + if (err < 0) + goto __err; + ++ tu->qhead = tu->qtail = tu->qused = 0; + kfree(tu->queue); + tu->queue = NULL; + kfree(tu->tqueue); diff --git a/queue-4.4/alsa-timer-fix-race-between-read-and-ioctl.patch b/queue-4.4/alsa-timer-fix-race-between-read-and-ioctl.patch new file mode 100644 index 00000000000..1e4ece4a79a --- /dev/null +++ b/queue-4.4/alsa-timer-fix-race-between-read-and-ioctl.patch @@ -0,0 +1,73 @@ +From d11662f4f798b50d8c8743f433842c3e40fe3378 Mon Sep 17 00:00:00 2001 +From: Takashi Iwai +Date: Fri, 2 Jun 2017 15:03:38 +0200 +Subject: ALSA: timer: Fix race between read and ioctl + +From: Takashi Iwai + +commit d11662f4f798b50d8c8743f433842c3e40fe3378 upstream. + +The read from ALSA timer device, the function snd_timer_user_tread(), +may access to an uninitialized struct snd_timer_user fields when the +read is concurrently performed while the ioctl like +snd_timer_user_tselect() is invoked. We have already fixed the races +among ioctls via a mutex, but we seem to have forgotten the race +between read vs ioctl. + +This patch simply applies (more exactly extends the already applied +range of) tu->ioctl_lock in snd_timer_user_tread() for closing the +race window. + +Reported-by: Alexander Potapenko +Tested-by: Alexander Potapenko +Signed-off-by: Takashi Iwai +Signed-off-by: Greg Kroah-Hartman + +--- + sound/core/timer.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +--- a/sound/core/timer.c ++++ b/sound/core/timer.c +@@ -1958,6 +1958,7 @@ static ssize_t snd_timer_user_read(struc + + tu = file->private_data; + unit = tu->tread ? sizeof(struct snd_timer_tread) : sizeof(struct snd_timer_read); ++ mutex_lock(&tu->ioctl_lock); + spin_lock_irq(&tu->qlock); + while ((long)count - result >= unit) { + while (!tu->qused) { +@@ -1973,7 +1974,9 @@ static ssize_t snd_timer_user_read(struc + add_wait_queue(&tu->qchange_sleep, &wait); + + spin_unlock_irq(&tu->qlock); ++ mutex_unlock(&tu->ioctl_lock); + schedule(); ++ mutex_lock(&tu->ioctl_lock); + spin_lock_irq(&tu->qlock); + + remove_wait_queue(&tu->qchange_sleep, &wait); +@@ -1993,7 +1996,6 @@ static ssize_t snd_timer_user_read(struc + tu->qused--; + spin_unlock_irq(&tu->qlock); + +- mutex_lock(&tu->ioctl_lock); + if (tu->tread) { + if (copy_to_user(buffer, &tu->tqueue[qhead], + sizeof(struct snd_timer_tread))) +@@ -2003,7 +2005,6 @@ static ssize_t snd_timer_user_read(struc + sizeof(struct snd_timer_read))) + err = -EFAULT; + } +- mutex_unlock(&tu->ioctl_lock); + + spin_lock_irq(&tu->qlock); + if (err < 0) +@@ -2013,6 +2014,7 @@ static ssize_t snd_timer_user_read(struc + } + _error: + spin_unlock_irq(&tu->qlock); ++ mutex_unlock(&tu->ioctl_lock); + return result > 0 ? result : err; + } + diff --git a/queue-4.4/asoc-fix-use-after-free-at-card-unregistration.patch b/queue-4.4/asoc-fix-use-after-free-at-card-unregistration.patch new file mode 100644 index 00000000000..4fe73856098 --- /dev/null +++ b/queue-4.4/asoc-fix-use-after-free-at-card-unregistration.patch @@ -0,0 +1,51 @@ +From 4efda5f2130da033aeedc5b3205569893b910de2 Mon Sep 17 00:00:00 2001 +From: Takashi Iwai +Date: Wed, 24 May 2017 10:19:45 +0200 +Subject: ASoC: Fix use-after-free at card unregistration + +From: Takashi Iwai + +commit 4efda5f2130da033aeedc5b3205569893b910de2 upstream. + +soc_cleanup_card_resources() call snd_card_free() at the last of its +procedure. This turned out to lead to a use-after-free. +PCM runtimes have been already removed via soc_remove_pcm_runtimes(), +while it's dereferenced later in soc_pcm_free() called via +snd_card_free(). + +The fix is simple: just move the snd_card_free() call to the beginning +of the whole procedure. This also gives another benefit: it +guarantees that all operations have been shut down before actually +releasing the resources, which was racy until now. + +Reported-and-tested-by: Robert Jarzmik +Signed-off-by: Takashi Iwai +Signed-off-by: Mark Brown +Signed-off-by: Greg Kroah-Hartman + +--- + sound/soc/soc-core.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +--- a/sound/soc/soc-core.c ++++ b/sound/soc/soc-core.c +@@ -1775,6 +1775,9 @@ static int soc_cleanup_card_resources(st + for (i = 0; i < card->num_aux_devs; i++) + soc_remove_aux_dev(card, i); + ++ /* free the ALSA card at first; this syncs with pending operations */ ++ snd_card_free(card->snd_card); ++ + /* remove and free each DAI */ + soc_remove_dai_links(card); + +@@ -1786,9 +1789,7 @@ static int soc_cleanup_card_resources(st + + snd_soc_dapm_free(&card->dapm); + +- snd_card_free(card->snd_card); + return 0; +- + } + + /* removes a socdev */ diff --git a/queue-4.4/drivers-char-mem-fix-wraparound-check-to-allow-mappings-up-to-the-end.patch b/queue-4.4/drivers-char-mem-fix-wraparound-check-to-allow-mappings-up-to-the-end.patch new file mode 100644 index 00000000000..05c71952b3b --- /dev/null +++ b/queue-4.4/drivers-char-mem-fix-wraparound-check-to-allow-mappings-up-to-the-end.patch @@ -0,0 +1,36 @@ +From 32829da54d9368103a2f03269a5120aa9ee4d5da Mon Sep 17 00:00:00 2001 +From: Julius Werner +Date: Fri, 2 Jun 2017 15:36:39 -0700 +Subject: drivers: char: mem: Fix wraparound check to allow mappings up to the end + +From: Julius Werner + +commit 32829da54d9368103a2f03269a5120aa9ee4d5da upstream. + +A recent fix to /dev/mem prevents mappings from wrapping around the end +of physical address space. However, the check was written in a way that +also prevents a mapping reaching just up to the end of physical address +space, which may be a valid use case (especially on 32-bit systems). +This patch fixes it by checking the last mapped address (instead of the +first address behind that) for overflow. + +Fixes: b299cde245 ("drivers: char: mem: Check for address space wraparound with mmap()") +Reported-by: Nico Huber +Signed-off-by: Julius Werner +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/char/mem.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/char/mem.c ++++ b/drivers/char/mem.c +@@ -346,7 +346,7 @@ static int mmap_mem(struct file *file, s + phys_addr_t offset = (phys_addr_t)vma->vm_pgoff << PAGE_SHIFT; + + /* It's illegal to wrap around the end of the physical address space. */ +- if (offset + (phys_addr_t)size < offset) ++ if (offset + (phys_addr_t)size - 1 < offset) + return -EINVAL; + + if (!valid_mmap_phys_addr_range(vma->vm_pgoff, size)) diff --git a/queue-4.4/drm-nouveau-tmr-fully-separate-alarm-execution-pending-lists.patch b/queue-4.4/drm-nouveau-tmr-fully-separate-alarm-execution-pending-lists.patch new file mode 100644 index 00000000000..59ad28e773d --- /dev/null +++ b/queue-4.4/drm-nouveau-tmr-fully-separate-alarm-execution-pending-lists.patch @@ -0,0 +1,57 @@ +From b4e382ca7586a63b6c1e5221ce0863ff867c2df6 Mon Sep 17 00:00:00 2001 +From: Ben Skeggs +Date: Mon, 5 Jun 2017 17:23:32 +1000 +Subject: drm/nouveau/tmr: fully separate alarm execution/pending lists + +From: Ben Skeggs + +commit b4e382ca7586a63b6c1e5221ce0863ff867c2df6 upstream. + +Reusing the list_head for both is a bad idea. Callback execution is done +with the lock dropped so that alarms can be rescheduled from the callback, +which means that with some unfortunate timing, lists can get corrupted. + +The execution list should not require its own locking, the single function +that uses it can only be called from a single context. + +Signed-off-by: Ben Skeggs +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/gpu/drm/nouveau/include/nvkm/subdev/timer.h | 1 + + drivers/gpu/drm/nouveau/nvkm/subdev/timer/base.c | 7 ++++--- + 2 files changed, 5 insertions(+), 3 deletions(-) + +--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/timer.h ++++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/timer.h +@@ -4,6 +4,7 @@ + + struct nvkm_alarm { + struct list_head head; ++ struct list_head exec; + u64 timestamp; + void (*func)(struct nvkm_alarm *); + }; +--- a/drivers/gpu/drm/nouveau/nvkm/subdev/timer/base.c ++++ b/drivers/gpu/drm/nouveau/nvkm/subdev/timer/base.c +@@ -50,7 +50,8 @@ nvkm_timer_alarm_trigger(struct nvkm_tim + /* Move to completed list. We'll drop the lock before + * executing the callback so it can reschedule itself. + */ +- list_move_tail(&alarm->head, &exec); ++ list_del_init(&alarm->head); ++ list_add(&alarm->exec, &exec); + } + + /* Shut down interrupt if no more pending alarms. */ +@@ -59,8 +60,8 @@ nvkm_timer_alarm_trigger(struct nvkm_tim + spin_unlock_irqrestore(&tmr->lock, flags); + + /* Execute completed callbacks. */ +- list_for_each_entry_safe(alarm, atemp, &exec, head) { +- list_del_init(&alarm->head); ++ list_for_each_entry_safe(alarm, atemp, &exec, exec) { ++ list_del(&alarm->exec); + alarm->func(alarm); + } + } diff --git a/queue-4.4/drm-vmwgfx-handle-vmalloc-failure-in-vmw_local_fifo_reserve.patch b/queue-4.4/drm-vmwgfx-handle-vmalloc-failure-in-vmw_local_fifo_reserve.patch new file mode 100644 index 00000000000..7f85a501474 --- /dev/null +++ b/queue-4.4/drm-vmwgfx-handle-vmalloc-failure-in-vmw_local_fifo_reserve.patch @@ -0,0 +1,31 @@ +From f0c62e9878024300319ba2438adc7b06c6b9c448 Mon Sep 17 00:00:00 2001 +From: Dan Carpenter +Date: Thu, 27 Apr 2017 12:12:08 +0300 +Subject: drm/vmwgfx: Handle vmalloc() failure in vmw_local_fifo_reserve() + +From: Dan Carpenter + +commit f0c62e9878024300319ba2438adc7b06c6b9c448 upstream. + +If vmalloc() fails then we need to a bit of cleanup before returning. + +Fixes: fb1d9738ca05 ("drm/vmwgfx: Add DRM driver for VMware Virtual GPU") +Signed-off-by: Dan Carpenter +Reviewed-by: Sinclair Yeh +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c ++++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c +@@ -368,6 +368,8 @@ static void *vmw_local_fifo_reserve(stru + return fifo_state->static_buffer; + else { + fifo_state->dynamic_buffer = vmalloc(bytes); ++ if (!fifo_state->dynamic_buffer) ++ goto out_err; + return fifo_state->dynamic_buffer; + } + } diff --git a/queue-4.4/drm-vmwgfx-limit-the-number-of-mip-levels-in-vmw_gb_surface_define_ioctl.patch b/queue-4.4/drm-vmwgfx-limit-the-number-of-mip-levels-in-vmw_gb_surface_define_ioctl.patch new file mode 100644 index 00000000000..fe31afedcee --- /dev/null +++ b/queue-4.4/drm-vmwgfx-limit-the-number-of-mip-levels-in-vmw_gb_surface_define_ioctl.patch @@ -0,0 +1,36 @@ +From ee9c4e681ec4f58e42a83cb0c22a0289ade1aacf Mon Sep 17 00:00:00 2001 +From: Vladis Dronov +Date: Fri, 2 Jun 2017 07:42:09 +0200 +Subject: drm/vmwgfx: limit the number of mip levels in vmw_gb_surface_define_ioctl() + +From: Vladis Dronov + +commit ee9c4e681ec4f58e42a83cb0c22a0289ade1aacf upstream. + +The 'req->mip_levels' parameter in vmw_gb_surface_define_ioctl() is +a user-controlled 'uint32_t' value which is used as a loop count limit. +This can lead to a kernel lockup and DoS. Add check for 'req->mip_levels'. + +References: +https://bugzilla.redhat.com/show_bug.cgi?id=1437431 + +Signed-off-by: Vladis Dronov +Reviewed-by: Sinclair Yeh +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/gpu/drm/vmwgfx/vmwgfx_surface.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c ++++ b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c +@@ -1293,6 +1293,9 @@ int vmw_gb_surface_define_ioctl(struct d + if (req->multisample_count != 0) + return -EINVAL; + ++ if (req->mip_levels > DRM_VMW_MAX_MIP_LEVELS) ++ return -EINVAL; ++ + if (unlikely(vmw_user_surface_size == 0)) + vmw_user_surface_size = ttm_round_pot(sizeof(*user_srf)) + + 128; diff --git a/queue-4.4/drm-vmwgfx-make-sure-backup_handle-is-always-valid.patch b/queue-4.4/drm-vmwgfx-make-sure-backup_handle-is-always-valid.patch new file mode 100644 index 00000000000..f871daf97a9 --- /dev/null +++ b/queue-4.4/drm-vmwgfx-make-sure-backup_handle-is-always-valid.patch @@ -0,0 +1,59 @@ +From 07678eca2cf9c9a18584e546c2b2a0d0c9a3150c Mon Sep 17 00:00:00 2001 +From: Sinclair Yeh +Date: Fri, 2 Jun 2017 07:50:57 +0200 +Subject: drm/vmwgfx: Make sure backup_handle is always valid + +From: Sinclair Yeh + +commit 07678eca2cf9c9a18584e546c2b2a0d0c9a3150c upstream. + +When vmw_gb_surface_define_ioctl() is called with an existing buffer, +we end up returning an uninitialized variable in the backup_handle. + +The fix is to first initialize backup_handle to 0 just to be sure, and +second, when a user-provided buffer is found, we will use the +req->buffer_handle as the backup_handle. + +Reported-by: Murray McAllister +Signed-off-by: Sinclair Yeh +Reviewed-by: Deepak Rawat +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/gpu/drm/vmwgfx/vmwgfx_surface.c | 18 +++++++++++------- + 1 file changed, 11 insertions(+), 7 deletions(-) + +--- a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c ++++ b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c +@@ -1288,7 +1288,7 @@ int vmw_gb_surface_define_ioctl(struct d + struct ttm_object_file *tfile = vmw_fpriv(file_priv)->tfile; + int ret; + uint32_t size; +- uint32_t backup_handle; ++ uint32_t backup_handle = 0; + + if (req->multisample_count != 0) + return -EINVAL; +@@ -1331,12 +1331,16 @@ int vmw_gb_surface_define_ioctl(struct d + ret = vmw_user_dmabuf_lookup(tfile, req->buffer_handle, + &res->backup, + &user_srf->backup_base); +- if (ret == 0 && res->backup->base.num_pages * PAGE_SIZE < +- res->backup_size) { +- DRM_ERROR("Surface backup buffer is too small.\n"); +- vmw_dmabuf_unreference(&res->backup); +- ret = -EINVAL; +- goto out_unlock; ++ if (ret == 0) { ++ if (res->backup->base.num_pages * PAGE_SIZE < ++ res->backup_size) { ++ DRM_ERROR("Surface backup buffer is too small.\n"); ++ vmw_dmabuf_unreference(&res->backup); ++ ret = -EINVAL; ++ goto out_unlock; ++ } else { ++ backup_handle = req->buffer_handle; ++ } + } + } else if (req->drm_surface_flags & drm_vmw_surface_flag_create_buffer) + ret = vmw_user_dmabuf_alloc(dev_priv, tfile, diff --git a/queue-4.4/kvm-arm-arm64-handle-possible-null-stage2-pud-when-ageing-pages.patch b/queue-4.4/kvm-arm-arm64-handle-possible-null-stage2-pud-when-ageing-pages.patch new file mode 100644 index 00000000000..081a76e87e6 --- /dev/null +++ b/queue-4.4/kvm-arm-arm64-handle-possible-null-stage2-pud-when-ageing-pages.patch @@ -0,0 +1,85 @@ +From d6dbdd3c8558cad3b6d74cc357b408622d122331 Mon Sep 17 00:00:00 2001 +From: Marc Zyngier +Date: Mon, 5 Jun 2017 19:17:18 +0100 +Subject: KVM: arm/arm64: Handle possible NULL stage2 pud when ageing pages + +From: Marc Zyngier + +commit d6dbdd3c8558cad3b6d74cc357b408622d122331 upstream. + +Under memory pressure, we start ageing pages, which amounts to parsing +the page tables. Since we don't want to allocate any extra level, +we pass NULL for our private allocation cache. Which means that +stage2_get_pud() is allowed to fail. This results in the following +splat: + +[ 1520.409577] Unable to handle kernel NULL pointer dereference at virtual address 00000008 +[ 1520.417741] pgd = ffff810f52fef000 +[ 1520.421201] [00000008] *pgd=0000010f636c5003, *pud=0000010f56f48003, *pmd=0000000000000000 +[ 1520.429546] Internal error: Oops: 96000006 [#1] PREEMPT SMP +[ 1520.435156] Modules linked in: +[ 1520.438246] CPU: 15 PID: 53550 Comm: qemu-system-aar Tainted: G W 4.12.0-rc4-00027-g1885c397eaec #7205 +[ 1520.448705] Hardware name: FOXCONN R2-1221R-A4/C2U4N_MB, BIOS G31FB12A 10/26/2016 +[ 1520.463726] task: ffff800ac5fb4e00 task.stack: ffff800ce04e0000 +[ 1520.469666] PC is at stage2_get_pmd+0x34/0x110 +[ 1520.474119] LR is at kvm_age_hva_handler+0x44/0xf0 +[ 1520.478917] pc : [] lr : [] pstate: 40000145 +[ 1520.486325] sp : ffff800ce04e33d0 +[ 1520.489644] x29: ffff800ce04e33d0 x28: 0000000ffff40064 +[ 1520.494967] x27: 0000ffff27e00000 x26: 0000000000000000 +[ 1520.500289] x25: ffff81051ba65008 x24: 0000ffff40065000 +[ 1520.505618] x23: 0000ffff40064000 x22: 0000000000000000 +[ 1520.510947] x21: ffff810f52b20000 x20: 0000000000000000 +[ 1520.516274] x19: 0000000058264000 x18: 0000000000000000 +[ 1520.521603] x17: 0000ffffa6fe7438 x16: ffff000008278b70 +[ 1520.526940] x15: 000028ccd8000000 x14: 0000000000000008 +[ 1520.532264] x13: ffff7e0018298000 x12: 0000000000000002 +[ 1520.537582] x11: ffff000009241b93 x10: 0000000000000940 +[ 1520.542908] x9 : ffff0000092ef800 x8 : 0000000000000200 +[ 1520.548229] x7 : ffff800ce04e36a8 x6 : 0000000000000000 +[ 1520.553552] x5 : 0000000000000001 x4 : 0000000000000000 +[ 1520.558873] x3 : 0000000000000000 x2 : 0000000000000008 +[ 1520.571696] x1 : ffff000008fd5000 x0 : ffff0000080b149c +[ 1520.577039] Process qemu-system-aar (pid: 53550, stack limit = 0xffff800ce04e0000) +[...] +[ 1521.510735] [] stage2_get_pmd+0x34/0x110 +[ 1521.516221] [] kvm_age_hva_handler+0x44/0xf0 +[ 1521.522054] [] handle_hva_to_gpa+0xb8/0xe8 +[ 1521.527716] [] kvm_age_hva+0x44/0xf0 +[ 1521.532854] [] kvm_mmu_notifier_clear_flush_young+0x70/0xc0 +[ 1521.539992] [] __mmu_notifier_clear_flush_young+0x88/0xd0 +[ 1521.546958] [] page_referenced_one+0xf0/0x188 +[ 1521.552881] [] rmap_walk_anon+0xec/0x250 +[ 1521.558370] [] rmap_walk+0x78/0xa0 +[ 1521.563337] [] page_referenced+0x164/0x180 +[ 1521.569002] [] shrink_active_list+0x178/0x3b8 +[ 1521.574922] [] shrink_node_memcg+0x328/0x600 +[ 1521.580758] [] shrink_node+0xc4/0x328 +[ 1521.585986] [] do_try_to_free_pages+0xc0/0x340 +[ 1521.592000] [] try_to_free_pages+0xcc/0x240 +[...] + +The trivial fix is to handle this NULL pud value early, rather than +dereferencing it blindly. + +Signed-off-by: Marc Zyngier +Reviewed-by: Christoffer Dall +Signed-off-by: Christoffer Dall +Signed-off-by: Greg Kroah-Hartman + +--- + arch/arm/kvm/mmu.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/arch/arm/kvm/mmu.c ++++ b/arch/arm/kvm/mmu.c +@@ -869,6 +869,9 @@ static pmd_t *stage2_get_pmd(struct kvm + pmd_t *pmd; + + pud = stage2_get_pud(kvm, cache, addr); ++ if (!pud) ++ return NULL; ++ + if (pud_none(*pud)) { + if (!cache) + return NULL; diff --git a/queue-4.4/mm-consider-memblock-reservations-for-deferred-memory-initialization-sizing.patch b/queue-4.4/mm-consider-memblock-reservations-for-deferred-memory-initialization-sizing.patch new file mode 100644 index 00000000000..f05ad5c0046 --- /dev/null +++ b/queue-4.4/mm-consider-memblock-reservations-for-deferred-memory-initialization-sizing.patch @@ -0,0 +1,209 @@ +From 864b9a393dcb5aed09b8fd31b9bbda0fdda99374 Mon Sep 17 00:00:00 2001 +From: Michal Hocko +Date: Fri, 2 Jun 2017 14:46:49 -0700 +Subject: mm: consider memblock reservations for deferred memory initialization sizing + +From: Michal Hocko + +commit 864b9a393dcb5aed09b8fd31b9bbda0fdda99374 upstream. + +We have seen an early OOM killer invocation on ppc64 systems with +crashkernel=4096M: + + kthreadd invoked oom-killer: gfp_mask=0x16040c0(GFP_KERNEL|__GFP_COMP|__GFP_NOTRACK), nodemask=7, order=0, oom_score_adj=0 + kthreadd cpuset=/ mems_allowed=7 + CPU: 0 PID: 2 Comm: kthreadd Not tainted 4.4.68-1.gd7fe927-default #1 + Call Trace: + dump_stack+0xb0/0xf0 (unreliable) + dump_header+0xb0/0x258 + out_of_memory+0x5f0/0x640 + __alloc_pages_nodemask+0xa8c/0xc80 + kmem_getpages+0x84/0x1a0 + fallback_alloc+0x2a4/0x320 + kmem_cache_alloc_node+0xc0/0x2e0 + copy_process.isra.25+0x260/0x1b30 + _do_fork+0x94/0x470 + kernel_thread+0x48/0x60 + kthreadd+0x264/0x330 + ret_from_kernel_thread+0x5c/0xa4 + + Mem-Info: + active_anon:0 inactive_anon:0 isolated_anon:0 + active_file:0 inactive_file:0 isolated_file:0 + unevictable:0 dirty:0 writeback:0 unstable:0 + slab_reclaimable:5 slab_unreclaimable:73 + mapped:0 shmem:0 pagetables:0 bounce:0 + free:0 free_pcp:0 free_cma:0 + Node 7 DMA free:0kB min:0kB low:0kB high:0kB active_anon:0kB inactive_anon:0kB active_file:0kB inactive_file:0kB unevictable:0kB isolated(anon):0kB isolated(file):0kB present:52428800kB managed:110016kB mlocked:0kB dirty:0kB writeback:0kB mapped:0kB shmem:0kB slab_reclaimable:320kB slab_unreclaimable:4672kB kernel_stack:1152kB pagetables:0kB unstable:0kB bounce:0kB free_pcp:0kB local_pcp:0kB free_cma:0kB writeback_tmp:0kB pages_scanned:0 all_unreclaimable? yes + lowmem_reserve[]: 0 0 0 0 + Node 7 DMA: 0*64kB 0*128kB 0*256kB 0*512kB 0*1024kB 0*2048kB 0*4096kB 0*8192kB 0*16384kB = 0kB + 0 total pagecache pages + 0 pages in swap cache + Swap cache stats: add 0, delete 0, find 0/0 + Free swap = 0kB + Total swap = 0kB + 819200 pages RAM + 0 pages HighMem/MovableOnly + 817481 pages reserved + 0 pages cma reserved + 0 pages hwpoisoned + +the reason is that the managed memory is too low (only 110MB) while the +rest of the the 50GB is still waiting for the deferred intialization to +be done. update_defer_init estimates the initial memoty to initialize +to 2GB at least but it doesn't consider any memory allocated in that +range. In this particular case we've had + + Reserving 4096MB of memory at 128MB for crashkernel (System RAM: 51200MB) + +so the low 2GB is mostly depleted. + +Fix this by considering memblock allocations in the initial static +initialization estimation. Move the max_initialise to +reset_deferred_meminit and implement a simple memblock_reserved_memory +helper which iterates all reserved blocks and sums the size of all that +start below the given address. The cumulative size is than added on top +of the initial estimation. This is still not ideal because +reset_deferred_meminit doesn't consider holes and so reservation might +be above the initial estimation whihch we ignore but let's make the +logic simpler until we really need to handle more complicated cases. + +Fixes: 3a80a7fa7989 ("mm: meminit: initialise a subset of struct pages if CONFIG_DEFERRED_STRUCT_PAGE_INIT is set") +Link: http://lkml.kernel.org/r/20170531104010.GI27783@dhcp22.suse.cz +Signed-off-by: Michal Hocko +Acked-by: Mel Gorman +Tested-by: Srikar Dronamraju +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + + +--- + include/linux/memblock.h | 8 ++++++++ + include/linux/mmzone.h | 1 + + mm/memblock.c | 24 ++++++++++++++++++++++++ + mm/page_alloc.c | 25 ++++++++++++++++++++++--- + 4 files changed, 55 insertions(+), 3 deletions(-) + +--- a/include/linux/memblock.h ++++ b/include/linux/memblock.h +@@ -408,11 +408,19 @@ static inline void early_memtest(phys_ad + } + #endif + ++extern unsigned long memblock_reserved_memory_within(phys_addr_t start_addr, ++ phys_addr_t end_addr); + #else + static inline phys_addr_t memblock_alloc(phys_addr_t size, phys_addr_t align) + { + return 0; + } ++ ++static inline unsigned long memblock_reserved_memory_within(phys_addr_t start_addr, ++ phys_addr_t end_addr) ++{ ++ return 0; ++} + + #endif /* CONFIG_HAVE_MEMBLOCK */ + +--- a/include/linux/mmzone.h ++++ b/include/linux/mmzone.h +@@ -688,6 +688,7 @@ typedef struct pglist_data { + * is the first PFN that needs to be initialised. + */ + unsigned long first_deferred_pfn; ++ unsigned long static_init_size; + #endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */ + } pg_data_t; + +--- a/mm/memblock.c ++++ b/mm/memblock.c +@@ -1634,6 +1634,30 @@ static void __init_memblock memblock_dum + } + } + ++extern unsigned long __init_memblock ++memblock_reserved_memory_within(phys_addr_t start_addr, phys_addr_t end_addr) ++{ ++ struct memblock_type *type = &memblock.reserved; ++ unsigned long size = 0; ++ int idx; ++ ++ for (idx = 0; idx < type->cnt; idx++) { ++ struct memblock_region *rgn = &type->regions[idx]; ++ phys_addr_t start, end; ++ ++ if (rgn->base + rgn->size < start_addr) ++ continue; ++ if (rgn->base > end_addr) ++ continue; ++ ++ start = rgn->base; ++ end = start + rgn->size; ++ size += end - start; ++ } ++ ++ return size; ++} ++ + void __init_memblock __memblock_dump_all(void) + { + pr_info("MEMBLOCK configuration:\n"); +--- a/mm/page_alloc.c ++++ b/mm/page_alloc.c +@@ -269,6 +269,26 @@ int page_group_by_mobility_disabled __re + #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT + static inline void reset_deferred_meminit(pg_data_t *pgdat) + { ++ unsigned long max_initialise; ++ unsigned long reserved_lowmem; ++ ++ /* ++ * Initialise at least 2G of a node but also take into account that ++ * two large system hashes that can take up 1GB for 0.25TB/node. ++ */ ++ max_initialise = max(2UL << (30 - PAGE_SHIFT), ++ (pgdat->node_spanned_pages >> 8)); ++ ++ /* ++ * Compensate the all the memblock reservations (e.g. crash kernel) ++ * from the initial estimation to make sure we will initialize enough ++ * memory to boot. ++ */ ++ reserved_lowmem = memblock_reserved_memory_within(pgdat->node_start_pfn, ++ pgdat->node_start_pfn + max_initialise); ++ max_initialise += reserved_lowmem; ++ ++ pgdat->static_init_size = min(max_initialise, pgdat->node_spanned_pages); + pgdat->first_deferred_pfn = ULONG_MAX; + } + +@@ -302,10 +322,9 @@ static inline bool update_defer_init(pg_ + /* Always populate low zones for address-contrained allocations */ + if (zone_end < pgdat_end_pfn(pgdat)) + return true; +- + /* Initialise at least 2G of the highest zone */ + (*nr_initialised)++; +- if (*nr_initialised > (2UL << (30 - PAGE_SHIFT)) && ++ if ((*nr_initialised > pgdat->static_init_size) && + (pfn & (PAGES_PER_SECTION - 1)) == 0) { + pgdat->first_deferred_pfn = pfn; + return false; +@@ -5343,7 +5362,6 @@ void __paginginit free_area_init_node(in + /* pg_data_t should be reset to zero when it's allocated */ + WARN_ON(pgdat->nr_zones || pgdat->classzone_idx); + +- reset_deferred_meminit(pgdat); + pgdat->node_id = nid; + pgdat->node_start_pfn = node_start_pfn; + #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP +@@ -5362,6 +5380,7 @@ void __paginginit free_area_init_node(in + (unsigned long)pgdat->node_mem_map); + #endif + ++ reset_deferred_meminit(pgdat); + free_area_init_core(pgdat); + } + diff --git a/queue-4.4/net-better-skb-sender_cpu-and-skb-napi_id-cohabitation.patch b/queue-4.4/net-better-skb-sender_cpu-and-skb-napi_id-cohabitation.patch new file mode 100644 index 00000000000..e13e8deb559 --- /dev/null +++ b/queue-4.4/net-better-skb-sender_cpu-and-skb-napi_id-cohabitation.patch @@ -0,0 +1,108 @@ +From 52bd2d62ce6758d811edcbd2256eb9ea7f6a56cb Mon Sep 17 00:00:00 2001 +From: Eric Dumazet +Date: Wed, 18 Nov 2015 06:30:50 -0800 +Subject: net: better skb->sender_cpu and skb->napi_id cohabitation + +From: Eric Dumazet + +commit 52bd2d62ce6758d811edcbd2256eb9ea7f6a56cb upstream. + +skb->sender_cpu and skb->napi_id share a common storage, +and we had various bugs about this. + +We had to call skb_sender_cpu_clear() in some places to +not leave a prior skb->napi_id and fool netdev_pick_tx() + +As suggested by Alexei, we could split the space so that +these errors can not happen. + +0 value being reserved as the common (not initialized) value, +let's reserve [1 .. NR_CPUS] range for valid sender_cpu, +and [NR_CPUS+1 .. ~0U] for valid napi_id. + +This will allow proper busy polling support over tunnels. + +Signed-off-by: Eric Dumazet +Suggested-by: Alexei Starovoitov +Acked-by: Alexei Starovoitov +Signed-off-by: David S. Miller +Cc: Paul Menzel +Signed-off-by: Greg Kroah-Hartman + +--- + include/linux/skbuff.h | 3 --- + net/core/dev.c | 33 ++++++++++++++++----------------- + 2 files changed, 16 insertions(+), 20 deletions(-) + +--- a/include/linux/skbuff.h ++++ b/include/linux/skbuff.h +@@ -1084,9 +1084,6 @@ static inline void skb_copy_hash(struct + + static inline void skb_sender_cpu_clear(struct sk_buff *skb) + { +-#ifdef CONFIG_XPS +- skb->sender_cpu = 0; +-#endif + } + + #ifdef NET_SKBUFF_DATA_USES_OFFSET +--- a/net/core/dev.c ++++ b/net/core/dev.c +@@ -182,7 +182,7 @@ EXPORT_SYMBOL(dev_base_lock); + /* protects napi_hash addition/deletion and napi_gen_id */ + static DEFINE_SPINLOCK(napi_hash_lock); + +-static unsigned int napi_gen_id; ++static unsigned int napi_gen_id = NR_CPUS; + static DEFINE_HASHTABLE(napi_hash, 8); + + static seqcount_t devnet_rename_seq; +@@ -3049,7 +3049,9 @@ struct netdev_queue *netdev_pick_tx(stru + int queue_index = 0; + + #ifdef CONFIG_XPS +- if (skb->sender_cpu == 0) ++ u32 sender_cpu = skb->sender_cpu - 1; ++ ++ if (sender_cpu >= (u32)NR_CPUS) + skb->sender_cpu = raw_smp_processor_id() + 1; + #endif + +@@ -4726,25 +4728,22 @@ EXPORT_SYMBOL_GPL(napi_by_id); + + void napi_hash_add(struct napi_struct *napi) + { +- if (!test_and_set_bit(NAPI_STATE_HASHED, &napi->state)) { ++ if (test_and_set_bit(NAPI_STATE_HASHED, &napi->state)) ++ return; + +- spin_lock(&napi_hash_lock); ++ spin_lock(&napi_hash_lock); + +- /* 0 is not a valid id, we also skip an id that is taken +- * we expect both events to be extremely rare +- */ +- napi->napi_id = 0; +- while (!napi->napi_id) { +- napi->napi_id = ++napi_gen_id; +- if (napi_by_id(napi->napi_id)) +- napi->napi_id = 0; +- } ++ /* 0..NR_CPUS+1 range is reserved for sender_cpu use */ ++ do { ++ if (unlikely(++napi_gen_id < NR_CPUS + 1)) ++ napi_gen_id = NR_CPUS + 1; ++ } while (napi_by_id(napi_gen_id)); ++ napi->napi_id = napi_gen_id; + +- hlist_add_head_rcu(&napi->napi_hash_node, +- &napi_hash[napi->napi_id % HASH_SIZE(napi_hash)]); ++ hlist_add_head_rcu(&napi->napi_hash_node, ++ &napi_hash[napi->napi_id % HASH_SIZE(napi_hash)]); + +- spin_unlock(&napi_hash_lock); +- } ++ spin_unlock(&napi_hash_lock); + } + EXPORT_SYMBOL_GPL(napi_hash_add); + diff --git a/queue-4.4/nfs-ensure-we-revalidate-attributes-before-using-execute_ok.patch b/queue-4.4/nfs-ensure-we-revalidate-attributes-before-using-execute_ok.patch new file mode 100644 index 00000000000..02a036ea8e0 --- /dev/null +++ b/queue-4.4/nfs-ensure-we-revalidate-attributes-before-using-execute_ok.patch @@ -0,0 +1,61 @@ +From 5c5fc09a1157a11dbe84e6421c3e0b37d05238cb Mon Sep 17 00:00:00 2001 +From: Trond Myklebust +Date: Mon, 28 Dec 2015 19:30:05 -0500 +Subject: NFS: Ensure we revalidate attributes before using execute_ok() + +From: Trond Myklebust + +commit 5c5fc09a1157a11dbe84e6421c3e0b37d05238cb upstream. + +Donald Buczek reports that NFS clients can also report incorrect +results for access() due to lack of revalidation of attributes +before calling execute_ok(). +Looking closely, it seems chdir() is afflicted with the same problem. + +Fix is to ensure we call nfs_revalidate_inode_rcu() or +nfs_revalidate_inode() as appropriate before deciding to trust +execute_ok(). + +Reported-by: Donald Buczek +Link: http://lkml.kernel.org/r/1451331530-3748-1-git-send-email-buczek@molgen.mpg.de +Signed-off-by: Trond Myklebust +Signed-off-by: Paul Menzel +Signed-off-by: Greg Kroah-Hartman +--- + fs/nfs/dir.c | 18 ++++++++++++++++-- + 1 file changed, 16 insertions(+), 2 deletions(-) + +--- a/fs/nfs/dir.c ++++ b/fs/nfs/dir.c +@@ -2421,6 +2421,20 @@ int nfs_may_open(struct inode *inode, st + } + EXPORT_SYMBOL_GPL(nfs_may_open); + ++static int nfs_execute_ok(struct inode *inode, int mask) ++{ ++ struct nfs_server *server = NFS_SERVER(inode); ++ int ret; ++ ++ if (mask & MAY_NOT_BLOCK) ++ ret = nfs_revalidate_inode_rcu(server, inode); ++ else ++ ret = nfs_revalidate_inode(server, inode); ++ if (ret == 0 && !execute_ok(inode)) ++ ret = -EACCES; ++ return ret; ++} ++ + int nfs_permission(struct inode *inode, int mask) + { + struct rpc_cred *cred; +@@ -2470,8 +2484,8 @@ force_lookup: + res = PTR_ERR(cred); + } + out: +- if (!res && (mask & MAY_EXEC) && !execute_ok(inode)) +- res = -EACCES; ++ if (!res && (mask & MAY_EXEC)) ++ res = nfs_execute_ok(inode, mask); + + dfprintk(VFS, "NFS: permission(%s/%lu), mask=0x%x, res=%d\n", + inode->i_sb->s_id, inode->i_ino, mask, res); diff --git a/queue-4.4/nfsv4-don-t-perform-cached-access-checks-before-we-ve-opened-the-file.patch b/queue-4.4/nfsv4-don-t-perform-cached-access-checks-before-we-ve-opened-the-file.patch new file mode 100644 index 00000000000..2e4a4d768ba --- /dev/null +++ b/queue-4.4/nfsv4-don-t-perform-cached-access-checks-before-we-ve-opened-the-file.patch @@ -0,0 +1,62 @@ +From 762674f86d0328d5dc923c966e209e1ee59663f2 Mon Sep 17 00:00:00 2001 +From: Trond Myklebust +Date: Sat, 26 Dec 2015 21:54:58 -0500 +Subject: NFSv4: Don't perform cached access checks before we've OPENed the file + +From: Trond Myklebust + +commit 762674f86d0328d5dc923c966e209e1ee59663f2 upstream. + +Donald Buczek reports that a nfs4 client incorrectly denies +execute access based on outdated file mode (missing 'x' bit). +After the mode on the server is 'fixed' (chmod +x) further execution +attempts continue to fail, because the nfs ACCESS call updates +the access parameter but not the mode parameter or the mode in +the inode. + +The root cause is ultimately that the VFS is calling may_open() +before the NFS client has a chance to OPEN the file and hence revalidate +the access and attribute caches. + +Al Viro suggests: +>>> Make nfs_permission() relax the checks when it sees MAY_OPEN, if you know +>>> that things will be caught by server anyway? +>> +>> That can work as long as we're guaranteed that everything that calls +>> inode_permission() with MAY_OPEN on a regular file will also follow up +>> with a vfs_open() or dentry_open() on success. Is this always the +>> case? +> +> 1) in do_tmpfile(), followed by do_dentry_open() (not reachable by NFS since +> it doesn't have ->tmpfile() instance anyway) +> +> 2) in atomic_open(), after the call of ->atomic_open() has succeeded. +> +> 3) in do_last(), followed on success by vfs_open() +> +> That's all. All calls of inode_permission() that get MAY_OPEN come from +> may_open(), and there's no other callers of that puppy. + +Reported-by: Donald Buczek +Link: https://bugzilla.kernel.org/show_bug.cgi?id=109771 +Link: http://lkml.kernel.org/r/1451046656-26319-1-git-send-email-buczek@molgen.mpg.de +Cc: Al Viro +Signed-off-by: Trond Myklebust +Signed-off-by: Paul Menzel +Signed-off-by: Greg Kroah-Hartman +--- + fs/nfs/dir.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/fs/nfs/dir.c ++++ b/fs/nfs/dir.c +@@ -2452,6 +2452,9 @@ int nfs_permission(struct inode *inode, + case S_IFLNK: + goto out; + case S_IFREG: ++ if ((mask & MAY_OPEN) && ++ nfs_server_capable(inode, NFS_CAP_ATOMIC_OPEN)) ++ return 0; + break; + case S_IFDIR: + /* diff --git a/queue-4.4/perf-core-drop-kernel-samples-even-though-u-is-specified.patch b/queue-4.4/perf-core-drop-kernel-samples-even-though-u-is-specified.patch new file mode 100644 index 00000000000..f32642ec6f3 --- /dev/null +++ b/queue-4.4/perf-core-drop-kernel-samples-even-though-u-is-specified.patch @@ -0,0 +1,127 @@ +From cc1582c231ea041fbc68861dfaf957eaf902b829 Mon Sep 17 00:00:00 2001 +From: Jin Yao +Date: Thu, 25 May 2017 18:09:07 +0800 +Subject: perf/core: Drop kernel samples even though :u is specified + +From: Jin Yao + +commit cc1582c231ea041fbc68861dfaf957eaf902b829 upstream. + +When doing sampling, for example: + + perf record -e cycles:u ... + +On workloads that do a lot of kernel entry/exits we see kernel +samples, even though :u is specified. This is due to skid existing. + +This might be a security issue because it can leak kernel addresses even +though kernel sampling support is disabled. + +The patch drops the kernel samples if exclude_kernel is specified. + +For example, test on Haswell desktop: + + perf record -e cycles:u + perf report --stdio + +Before patch applied: + + 99.77% mgen mgen [.] buf_read + 0.20% mgen mgen [.] rand_buf_init + 0.01% mgen [kernel.vmlinux] [k] apic_timer_interrupt + 0.00% mgen mgen [.] last_free_elem + 0.00% mgen libc-2.23.so [.] __random_r + 0.00% mgen libc-2.23.so [.] _int_malloc + 0.00% mgen mgen [.] rand_array_init + 0.00% mgen [kernel.vmlinux] [k] page_fault + 0.00% mgen libc-2.23.so [.] __random + 0.00% mgen libc-2.23.so [.] __strcasestr + 0.00% mgen ld-2.23.so [.] strcmp + 0.00% mgen ld-2.23.so [.] _dl_start + 0.00% mgen libc-2.23.so [.] sched_setaffinity@@GLIBC_2.3.4 + 0.00% mgen ld-2.23.so [.] _start + +We can see kernel symbols apic_timer_interrupt and page_fault. + +After patch applied: + + 99.79% mgen mgen [.] buf_read + 0.19% mgen mgen [.] rand_buf_init + 0.00% mgen libc-2.23.so [.] __random_r + 0.00% mgen mgen [.] rand_array_init + 0.00% mgen mgen [.] last_free_elem + 0.00% mgen libc-2.23.so [.] vfprintf + 0.00% mgen libc-2.23.so [.] rand + 0.00% mgen libc-2.23.so [.] __random + 0.00% mgen libc-2.23.so [.] _int_malloc + 0.00% mgen libc-2.23.so [.] _IO_doallocbuf + 0.00% mgen ld-2.23.so [.] do_lookup_x + 0.00% mgen ld-2.23.so [.] open_verify.constprop.7 + 0.00% mgen ld-2.23.so [.] _dl_important_hwcaps + 0.00% mgen libc-2.23.so [.] sched_setaffinity@@GLIBC_2.3.4 + 0.00% mgen ld-2.23.so [.] _start + +There are only userspace symbols. + +Signed-off-by: Jin Yao +Signed-off-by: Peter Zijlstra (Intel) +Cc: Alexander Shishkin +Cc: Arnaldo Carvalho de Melo +Cc: Jiri Olsa +Cc: Linus Torvalds +Cc: Namhyung Kim +Cc: Peter Zijlstra +Cc: Stephane Eranian +Cc: Thomas Gleixner +Cc: Vince Weaver +Cc: acme@kernel.org +Cc: jolsa@kernel.org +Cc: kan.liang@intel.com +Cc: mark.rutland@arm.com +Cc: will.deacon@arm.com +Cc: yao.jin@intel.com +Link: http://lkml.kernel.org/r/1495706947-3744-1-git-send-email-yao.jin@linux.intel.com +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/events/core.c | 21 +++++++++++++++++++++ + 1 file changed, 21 insertions(+) + +--- a/kernel/events/core.c ++++ b/kernel/events/core.c +@@ -6410,6 +6410,21 @@ static void perf_log_itrace_start(struct + perf_output_end(&handle); + } + ++static bool sample_is_allowed(struct perf_event *event, struct pt_regs *regs) ++{ ++ /* ++ * Due to interrupt latency (AKA "skid"), we may enter the ++ * kernel before taking an overflow, even if the PMU is only ++ * counting user events. ++ * To avoid leaking information to userspace, we must always ++ * reject kernel samples when exclude_kernel is set. ++ */ ++ if (event->attr.exclude_kernel && !user_mode(regs)) ++ return false; ++ ++ return true; ++} ++ + /* + * Generic event overflow handling, sampling. + */ +@@ -6457,6 +6472,12 @@ static int __perf_event_overflow(struct + } + + /* ++ * For security, drop the skid kernel samples if necessary. ++ */ ++ if (!sample_is_allowed(event, regs)) ++ return ret; ++ ++ /* + * XXX event_limit might not quite work as expected on inherited + * events + */ diff --git a/queue-4.4/powerpc-eeh-avoid-use-after-free-in-eeh_handle_special_event.patch b/queue-4.4/powerpc-eeh-avoid-use-after-free-in-eeh_handle_special_event.patch new file mode 100644 index 00000000000..b6c0da412ec --- /dev/null +++ b/queue-4.4/powerpc-eeh-avoid-use-after-free-in-eeh_handle_special_event.patch @@ -0,0 +1,91 @@ +From daeba2956f32f91f3493788ff6ee02fb1b2f02fa Mon Sep 17 00:00:00 2001 +From: Russell Currey +Date: Wed, 19 Apr 2017 17:39:26 +1000 +Subject: powerpc/eeh: Avoid use after free in eeh_handle_special_event() + +From: Russell Currey + +commit daeba2956f32f91f3493788ff6ee02fb1b2f02fa upstream. + +eeh_handle_special_event() is called when an EEH event is detected but +can't be narrowed down to a specific PE. This function looks through +every PE to find one in an erroneous state, then calls the regular event +handler eeh_handle_normal_event() once it knows which PE has an error. + +However, if eeh_handle_normal_event() found that the PE cannot possibly +be recovered, it will free it, rendering the passed PE stale. +This leads to a use after free in eeh_handle_special_event() as it attempts to +clear the "recovering" state on the PE after eeh_handle_normal_event() returns. + +Thus, make sure the PE is valid when attempting to clear state in +eeh_handle_special_event(). + +Fixes: 8a6b1bc70dbb ("powerpc/eeh: EEH core to handle special event") +Reported-by: Alexey Kardashevskiy +Signed-off-by: Russell Currey +Reviewed-by: Gavin Shan +Signed-off-by: Michael Ellerman +Signed-off-by: Greg Kroah-Hartman + + +--- + arch/powerpc/kernel/eeh_driver.c | 19 +++++++++++++++---- + 1 file changed, 15 insertions(+), 4 deletions(-) + +--- a/arch/powerpc/kernel/eeh_driver.c ++++ b/arch/powerpc/kernel/eeh_driver.c +@@ -655,7 +655,7 @@ static int eeh_reset_device(struct eeh_p + */ + #define MAX_WAIT_FOR_RECOVERY 300 + +-static void eeh_handle_normal_event(struct eeh_pe *pe) ++static bool eeh_handle_normal_event(struct eeh_pe *pe) + { + struct pci_bus *frozen_bus; + int rc = 0; +@@ -665,7 +665,7 @@ static void eeh_handle_normal_event(stru + if (!frozen_bus) { + pr_err("%s: Cannot find PCI bus for PHB#%d-PE#%x\n", + __func__, pe->phb->global_number, pe->addr); +- return; ++ return false; + } + + eeh_pe_update_time_stamp(pe); +@@ -790,7 +790,7 @@ static void eeh_handle_normal_event(stru + pr_info("EEH: Notify device driver to resume\n"); + eeh_pe_dev_traverse(pe, eeh_report_resume, NULL); + +- return; ++ return false; + + excess_failures: + /* +@@ -831,7 +831,11 @@ perm_error: + pci_lock_rescan_remove(); + pcibios_remove_pci_devices(frozen_bus); + pci_unlock_rescan_remove(); ++ ++ /* The passed PE should no longer be used */ ++ return true; + } ++ return false; + } + + static void eeh_handle_special_event(void) +@@ -897,7 +901,14 @@ static void eeh_handle_special_event(voi + */ + if (rc == EEH_NEXT_ERR_FROZEN_PE || + rc == EEH_NEXT_ERR_FENCED_PHB) { +- eeh_handle_normal_event(pe); ++ /* ++ * eeh_handle_normal_event() can make the PE stale if it ++ * determines that the PE cannot possibly be recovered. ++ * Don't modify the PE state if that's the case. ++ */ ++ if (eeh_handle_normal_event(pe)) ++ continue; ++ + eeh_pe_state_clear(pe, EEH_PE_RECOVERING); + } else { + pci_lock_rescan_remove(); diff --git a/queue-4.4/powerpc-hotplug-mem-fix-missing-endian-conversion-of-aa_index.patch b/queue-4.4/powerpc-hotplug-mem-fix-missing-endian-conversion-of-aa_index.patch new file mode 100644 index 00000000000..00ba05f6317 --- /dev/null +++ b/queue-4.4/powerpc-hotplug-mem-fix-missing-endian-conversion-of-aa_index.patch @@ -0,0 +1,42 @@ +From dc421b200f91930c9c6a9586810ff8c232cf10fc Mon Sep 17 00:00:00 2001 +From: Michael Bringmann +Date: Mon, 22 May 2017 15:44:37 -0500 +Subject: powerpc/hotplug-mem: Fix missing endian conversion of aa_index + +From: Michael Bringmann + +commit dc421b200f91930c9c6a9586810ff8c232cf10fc upstream. + +When adding or removing memory, the aa_index (affinity value) for the +memblock must also be converted to match the endianness of the rest +of the 'ibm,dynamic-memory' property. Otherwise, subsequent retrieval +of the attribute will likely lead to non-existent nodes, followed by +using the default node in the code inappropriately. + +Fixes: 5f97b2a0d176 ("powerpc/pseries: Implement memory hotplug add in the kernel") +Signed-off-by: Michael Bringmann +Signed-off-by: Michael Ellerman +Signed-off-by: Greg Kroah-Hartman + +--- + arch/powerpc/platforms/pseries/hotplug-memory.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/arch/powerpc/platforms/pseries/hotplug-memory.c ++++ b/arch/powerpc/platforms/pseries/hotplug-memory.c +@@ -110,6 +110,7 @@ static struct property *dlpar_clone_drco + for (i = 0; i < num_lmbs; i++) { + lmbs[i].base_addr = be64_to_cpu(lmbs[i].base_addr); + lmbs[i].drc_index = be32_to_cpu(lmbs[i].drc_index); ++ lmbs[i].aa_index = be32_to_cpu(lmbs[i].aa_index); + lmbs[i].flags = be32_to_cpu(lmbs[i].flags); + } + +@@ -553,6 +554,7 @@ static void dlpar_update_drconf_property + for (i = 0; i < num_lmbs; i++) { + lmbs[i].base_addr = cpu_to_be64(lmbs[i].base_addr); + lmbs[i].drc_index = cpu_to_be32(lmbs[i].drc_index); ++ lmbs[i].aa_index = cpu_to_be32(lmbs[i].aa_index); + lmbs[i].flags = cpu_to_be32(lmbs[i].flags); + } + diff --git a/queue-4.4/powerpc-numa-fix-percpu-allocations-to-be-numa-aware.patch b/queue-4.4/powerpc-numa-fix-percpu-allocations-to-be-numa-aware.patch new file mode 100644 index 00000000000..6dd216760ab --- /dev/null +++ b/queue-4.4/powerpc-numa-fix-percpu-allocations-to-be-numa-aware.patch @@ -0,0 +1,105 @@ +From ba4a648f12f4cd0a8003dd229b6ca8a53348ee4b Mon Sep 17 00:00:00 2001 +From: Michael Ellerman +Date: Tue, 6 Jun 2017 20:23:57 +1000 +Subject: powerpc/numa: Fix percpu allocations to be NUMA aware + +From: Michael Ellerman + +commit ba4a648f12f4cd0a8003dd229b6ca8a53348ee4b upstream. + +In commit 8c272261194d ("powerpc/numa: Enable USE_PERCPU_NUMA_NODE_ID"), we +switched to the generic implementation of cpu_to_node(), which uses a percpu +variable to hold the NUMA node for each CPU. + +Unfortunately we neglected to notice that we use cpu_to_node() in the allocation +of our percpu areas, leading to a chicken and egg problem. In practice what +happens is when we are setting up the percpu areas, cpu_to_node() reports that +all CPUs are on node 0, so we allocate all percpu areas on node 0. + +This is visible in the dmesg output, as all pcpu allocs being in group 0: + + pcpu-alloc: [0] 00 01 02 03 [0] 04 05 06 07 + pcpu-alloc: [0] 08 09 10 11 [0] 12 13 14 15 + pcpu-alloc: [0] 16 17 18 19 [0] 20 21 22 23 + pcpu-alloc: [0] 24 25 26 27 [0] 28 29 30 31 + pcpu-alloc: [0] 32 33 34 35 [0] 36 37 38 39 + pcpu-alloc: [0] 40 41 42 43 [0] 44 45 46 47 + +To fix it we need an early_cpu_to_node() which can run prior to percpu being +setup. We already have the numa_cpu_lookup_table we can use, so just plumb it +in. With the patch dmesg output shows two groups, 0 and 1: + + pcpu-alloc: [0] 00 01 02 03 [0] 04 05 06 07 + pcpu-alloc: [0] 08 09 10 11 [0] 12 13 14 15 + pcpu-alloc: [0] 16 17 18 19 [0] 20 21 22 23 + pcpu-alloc: [1] 24 25 26 27 [1] 28 29 30 31 + pcpu-alloc: [1] 32 33 34 35 [1] 36 37 38 39 + pcpu-alloc: [1] 40 41 42 43 [1] 44 45 46 47 + +We can also check the data_offset in the paca of various CPUs, with the fix we +see: + + CPU 0: data_offset = 0x0ffe8b0000 + CPU 24: data_offset = 0x1ffe5b0000 + +And we can see from dmesg that CPU 24 has an allocation on node 1: + + node 0: [mem 0x0000000000000000-0x0000000fffffffff] + node 1: [mem 0x0000001000000000-0x0000001fffffffff] + +Fixes: 8c272261194d ("powerpc/numa: Enable USE_PERCPU_NUMA_NODE_ID") +Signed-off-by: Michael Ellerman +Reviewed-by: Nicholas Piggin +Signed-off-by: Michael Ellerman +Signed-off-by: Greg Kroah-Hartman + +--- + arch/powerpc/include/asm/topology.h | 14 ++++++++++++++ + arch/powerpc/kernel/setup_64.c | 4 ++-- + 2 files changed, 16 insertions(+), 2 deletions(-) + +--- a/arch/powerpc/include/asm/topology.h ++++ b/arch/powerpc/include/asm/topology.h +@@ -44,8 +44,22 @@ extern void __init dump_numa_cpu_topolog + extern int sysfs_add_device_to_node(struct device *dev, int nid); + extern void sysfs_remove_device_from_node(struct device *dev, int nid); + ++static inline int early_cpu_to_node(int cpu) ++{ ++ int nid; ++ ++ nid = numa_cpu_lookup_table[cpu]; ++ ++ /* ++ * Fall back to node 0 if nid is unset (it should be, except bugs). ++ * This allows callers to safely do NODE_DATA(early_cpu_to_node(cpu)). ++ */ ++ return (nid < 0) ? 0 : nid; ++} + #else + ++static inline int early_cpu_to_node(int cpu) { return 0; } ++ + static inline void dump_numa_cpu_topology(void) {} + + static inline int sysfs_add_device_to_node(struct device *dev, int nid) +--- a/arch/powerpc/kernel/setup_64.c ++++ b/arch/powerpc/kernel/setup_64.c +@@ -751,7 +751,7 @@ void __init setup_arch(char **cmdline_p) + + static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align) + { +- return __alloc_bootmem_node(NODE_DATA(cpu_to_node(cpu)), size, align, ++ return __alloc_bootmem_node(NODE_DATA(early_cpu_to_node(cpu)), size, align, + __pa(MAX_DMA_ADDRESS)); + } + +@@ -762,7 +762,7 @@ static void __init pcpu_fc_free(void *pt + + static int pcpu_cpu_distance(unsigned int from, unsigned int to) + { +- if (cpu_to_node(from) == cpu_to_node(to)) ++ if (early_cpu_to_node(from) == early_cpu_to_node(to)) + return LOCAL_DISTANCE; + else + return REMOTE_DISTANCE; diff --git a/queue-4.4/scsi-qla2xxx-don-t-disable-a-not-previously-enabled-pci-device.patch b/queue-4.4/scsi-qla2xxx-don-t-disable-a-not-previously-enabled-pci-device.patch new file mode 100644 index 00000000000..38c40c0d685 --- /dev/null +++ b/queue-4.4/scsi-qla2xxx-don-t-disable-a-not-previously-enabled-pci-device.patch @@ -0,0 +1,65 @@ +From ddff7ed45edce4a4c92949d3c61cd25d229c4a14 Mon Sep 17 00:00:00 2001 +From: Johannes Thumshirn +Date: Tue, 23 May 2017 16:50:47 +0200 +Subject: scsi: qla2xxx: don't disable a not previously enabled PCI device + +From: Johannes Thumshirn + +commit ddff7ed45edce4a4c92949d3c61cd25d229c4a14 upstream. + +When pci_enable_device() or pci_enable_device_mem() fail in +qla2x00_probe_one() we bail out but do a call to +pci_disable_device(). This causes the dev_WARN_ON() in +pci_disable_device() to trigger, as the device wasn't enabled +previously. + +So instead of taking the 'probe_out' error path we can directly return +*iff* one of the pci_enable_device() calls fails. + +Additionally rename the 'probe_out' goto label's name to the more +descriptive 'disable_device'. + +Signed-off-by: Johannes Thumshirn +Fixes: e315cd28b9ef ("[SCSI] qla2xxx: Code changes for qla data structure refactoring") +Reviewed-by: Bart Van Assche +Reviewed-by: Giridhar Malavali +Signed-off-by: Martin K. Petersen +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/scsi/qla2xxx/qla_os.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +--- a/drivers/scsi/qla2xxx/qla_os.c ++++ b/drivers/scsi/qla2xxx/qla_os.c +@@ -2311,10 +2311,10 @@ qla2x00_probe_one(struct pci_dev *pdev, + + if (mem_only) { + if (pci_enable_device_mem(pdev)) +- goto probe_out; ++ return ret; + } else { + if (pci_enable_device(pdev)) +- goto probe_out; ++ return ret; + } + + /* This may fail but that's ok */ +@@ -2324,7 +2324,7 @@ qla2x00_probe_one(struct pci_dev *pdev, + if (!ha) { + ql_log_pci(ql_log_fatal, pdev, 0x0009, + "Unable to allocate memory for ha.\n"); +- goto probe_out; ++ goto disable_device; + } + ql_dbg_pci(ql_dbg_init, pdev, 0x000a, + "Memory allocated for ha=%p.\n", ha); +@@ -2923,7 +2923,7 @@ iospace_config_failed: + kfree(ha); + ha = NULL; + +-probe_out: ++disable_device: + pci_disable_device(pdev); + return ret; + } diff --git a/queue-4.4/serial-sh-sci-fix-panic-when-serial-console-and-dma-are-enabled.patch b/queue-4.4/serial-sh-sci-fix-panic-when-serial-console-and-dma-are-enabled.patch new file mode 100644 index 00000000000..d618f19f9f4 --- /dev/null +++ b/queue-4.4/serial-sh-sci-fix-panic-when-serial-console-and-dma-are-enabled.patch @@ -0,0 +1,61 @@ +From 3c9101766b502a0163d1d437fada5801cf616be2 Mon Sep 17 00:00:00 2001 +From: Takatoshi Akiyama +Date: Mon, 27 Feb 2017 15:56:31 +0900 +Subject: serial: sh-sci: Fix panic when serial console and DMA are enabled + +From: Takatoshi Akiyama + +commit 3c9101766b502a0163d1d437fada5801cf616be2 upstream. + +This patch fixes an issue that kernel panic happens when DMA is enabled +and we press enter key while the kernel booting on the serial console. + +* An interrupt may occur after sci_request_irq(). +* DMA transfer area is initialized by setup_timer() in sci_request_dma() + and used in interrupt. + +If an interrupt occurred between sci_request_irq() and setup_timer() in +sci_request_dma(), DMA transfer area has not been initialized yet. +So, this patch changes the order of sci_request_irq() and +sci_request_dma(). + +Fixes: 73a19e4c0301 ("serial: sh-sci: Add DMA support.") +Signed-off-by: Takatoshi Akiyama +[Shimoda changes the commit log] +Signed-off-by: Yoshihiro Shimoda +Cc: Jiri Slaby +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/tty/serial/sh-sci.c | 10 ++++++---- + 1 file changed, 6 insertions(+), 4 deletions(-) + +--- a/drivers/tty/serial/sh-sci.c ++++ b/drivers/tty/serial/sh-sci.c +@@ -1800,11 +1800,13 @@ static int sci_startup(struct uart_port + + dev_dbg(port->dev, "%s(%d)\n", __func__, port->line); + ++ sci_request_dma(port); ++ + ret = sci_request_irq(s); +- if (unlikely(ret < 0)) ++ if (unlikely(ret < 0)) { ++ sci_free_dma(port); + return ret; +- +- sci_request_dma(port); ++ } + + spin_lock_irqsave(&port->lock, flags); + sci_start_tx(port); +@@ -1834,8 +1836,8 @@ static void sci_shutdown(struct uart_por + } + #endif + +- sci_free_dma(port); + sci_free_irq(s); ++ sci_free_dma(port); + } + + static unsigned int sci_scbrr_calc(struct sci_port *s, unsigned int bps, diff --git a/queue-4.4/series b/queue-4.4/series index 5d55c001143..acdf8a43a93 100644 --- a/queue-4.4/series +++ b/queue-4.4/series @@ -60,3 +60,23 @@ ufs_getfrag_block-we-only-grab-truncate_mutex-on-block-creation-path.patch cxl-fix-error-path-on-bad-ioctl.patch btrfs-use-correct-types-for-page-indices-in-btrfs_page_exists_in_range.patch btrfs-fix-memory-leak-in-update_space_info-failure-path.patch +kvm-arm-arm64-handle-possible-null-stage2-pud-when-ageing-pages.patch +scsi-qla2xxx-don-t-disable-a-not-previously-enabled-pci-device.patch +powerpc-eeh-avoid-use-after-free-in-eeh_handle_special_event.patch +powerpc-numa-fix-percpu-allocations-to-be-numa-aware.patch +powerpc-hotplug-mem-fix-missing-endian-conversion-of-aa_index.patch +perf-core-drop-kernel-samples-even-though-u-is-specified.patch +drm-vmwgfx-handle-vmalloc-failure-in-vmw_local_fifo_reserve.patch +drm-vmwgfx-limit-the-number-of-mip-levels-in-vmw_gb_surface_define_ioctl.patch +drm-vmwgfx-make-sure-backup_handle-is-always-valid.patch +drm-nouveau-tmr-fully-separate-alarm-execution-pending-lists.patch +alsa-timer-fix-race-between-read-and-ioctl.patch +alsa-timer-fix-missing-queue-indices-reset-at-sndrv_timer_ioctl_select.patch +asoc-fix-use-after-free-at-card-unregistration.patch +drivers-char-mem-fix-wraparound-check-to-allow-mappings-up-to-the-end.patch +tty-drop-krefs-for-interrupted-tty-lock.patch +serial-sh-sci-fix-panic-when-serial-console-and-dma-are-enabled.patch +net-better-skb-sender_cpu-and-skb-napi_id-cohabitation.patch +mm-consider-memblock-reservations-for-deferred-memory-initialization-sizing.patch +nfs-ensure-we-revalidate-attributes-before-using-execute_ok.patch +nfsv4-don-t-perform-cached-access-checks-before-we-ve-opened-the-file.patch diff --git a/queue-4.4/tty-drop-krefs-for-interrupted-tty-lock.patch b/queue-4.4/tty-drop-krefs-for-interrupted-tty-lock.patch new file mode 100644 index 00000000000..ce66a5e812c --- /dev/null +++ b/queue-4.4/tty-drop-krefs-for-interrupted-tty-lock.patch @@ -0,0 +1,61 @@ +From e9036d0662360cd4c79578565ce422ed5872f301 Mon Sep 17 00:00:00 2001 +From: Peter Hurley +Date: Fri, 5 Feb 2016 10:49:36 -0800 +Subject: tty: Drop krefs for interrupted tty lock + +From: Peter Hurley + +commit e9036d0662360cd4c79578565ce422ed5872f301 upstream. + +When the tty lock is interrupted on attempted re-open, 2 tty krefs +are still held. Drop extra kref before returning failure from +tty_lock_interruptible(), and drop lookup kref before returning +failure from tty_open(). + +Fixes: 0bfd464d3fdd ("tty: Wait interruptibly for tty lock on reopen") +Reported-by: Dmitry Vyukov +Signed-off-by: Peter Hurley +Cc: Jiri Slaby +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/tty/tty_io.c | 3 +-- + drivers/tty/tty_mutex.c | 7 ++++++- + 2 files changed, 7 insertions(+), 3 deletions(-) + +--- a/drivers/tty/tty_io.c ++++ b/drivers/tty/tty_io.c +@@ -2070,13 +2070,12 @@ retry_open: + if (tty) { + mutex_unlock(&tty_mutex); + retval = tty_lock_interruptible(tty); ++ tty_kref_put(tty); /* drop kref from tty_driver_lookup_tty() */ + if (retval) { + if (retval == -EINTR) + retval = -ERESTARTSYS; + goto err_unref; + } +- /* safe to drop the kref from tty_driver_lookup_tty() */ +- tty_kref_put(tty); + retval = tty_reopen(tty); + if (retval < 0) { + tty_unlock(tty); +--- a/drivers/tty/tty_mutex.c ++++ b/drivers/tty/tty_mutex.c +@@ -24,10 +24,15 @@ EXPORT_SYMBOL(tty_lock); + + int tty_lock_interruptible(struct tty_struct *tty) + { ++ int ret; ++ + if (WARN(tty->magic != TTY_MAGIC, "L Bad %p\n", tty)) + return -EIO; + tty_kref_get(tty); +- return mutex_lock_interruptible(&tty->legacy_mutex); ++ ret = mutex_lock_interruptible(&tty->legacy_mutex); ++ if (ret) ++ tty_kref_put(tty); ++ return ret; + } + + void __lockfunc tty_unlock(struct tty_struct *tty)