From: Greg Kroah-Hartman Date: Fri, 9 Sep 2016 14:36:52 +0000 (+0200) Subject: 4.4-stable patches X-Git-Tag: v3.14.79~8 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=e7ad46801f8522313a3e48ae121a02f9acea7b14;p=thirdparty%2Fkernel%2Fstable-queue.git 4.4-stable patches added patches: block-fix-race-triggered-by-blk_set_queue_dying.patch block-make-sure-a-big-bio-is-split-into-at-most-256-bvecs.patch cgroup-reduce-read-locked-section-of-cgroup_threadgroup_rwsem-during-fork.patch nvme-call-pci_disable_device-on-the-error-path.patch powerpc-tm-avoid-slb-faults-in-treclaim-trecheckpoint-when-ri-0.patch rds-fix-an-infoleak-in-rds_inc_info_copy.patch s390-sclp_ctl-fix-potential-information-leak-with-dev-sclp.patch --- diff --git a/queue-4.4/block-fix-race-triggered-by-blk_set_queue_dying.patch b/queue-4.4/block-fix-race-triggered-by-blk_set_queue_dying.patch new file mode 100644 index 00000000000..aca38ca49ae --- /dev/null +++ b/queue-4.4/block-fix-race-triggered-by-blk_set_queue_dying.patch @@ -0,0 +1,36 @@ +From 1b856086813be9371929b6cc62045f9fd470f5a0 Mon Sep 17 00:00:00 2001 +From: Bart Van Assche +Date: Tue, 16 Aug 2016 16:48:36 -0700 +Subject: block: Fix race triggered by blk_set_queue_dying() + +From: Bart Van Assche + +commit 1b856086813be9371929b6cc62045f9fd470f5a0 upstream. + +blk_set_queue_dying() can be called while another thread is +submitting I/O or changing queue flags, e.g. through dm_stop_queue(). +Hence protect the QUEUE_FLAG_DYING flag change with locking. + +Signed-off-by: Bart Van Assche +Cc: Christoph Hellwig +Cc: Mike Snitzer +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman + +--- + block/blk-core.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/block/blk-core.c ++++ b/block/blk-core.c +@@ -515,7 +515,9 @@ EXPORT_SYMBOL_GPL(blk_queue_bypass_end); + + void blk_set_queue_dying(struct request_queue *q) + { +- queue_flag_set_unlocked(QUEUE_FLAG_DYING, q); ++ spin_lock_irq(q->queue_lock); ++ queue_flag_set(QUEUE_FLAG_DYING, q); ++ spin_unlock_irq(q->queue_lock); + + if (q->mq_ops) + blk_mq_wake_waiters(q); diff --git a/queue-4.4/block-make-sure-a-big-bio-is-split-into-at-most-256-bvecs.patch b/queue-4.4/block-make-sure-a-big-bio-is-split-into-at-most-256-bvecs.patch new file mode 100644 index 00000000000..710ba51e389 --- /dev/null +++ b/queue-4.4/block-make-sure-a-big-bio-is-split-into-at-most-256-bvecs.patch @@ -0,0 +1,84 @@ +From 4d70dca4eadf2f95abe389116ac02b8439c2d16c Mon Sep 17 00:00:00 2001 +From: Ming Lei +Date: Tue, 23 Aug 2016 21:49:45 +0800 +Subject: block: make sure a big bio is split into at most 256 bvecs + +From: Ming Lei + +commit 4d70dca4eadf2f95abe389116ac02b8439c2d16c upstream. + +After arbitrary bio size was introduced, the incoming bio may +be very big. We have to split the bio into small bios so that +each holds at most BIO_MAX_PAGES bvecs for safety reason, such +as bio_clone(). + +This patch fixes the following kernel crash: + +> [ 172.660142] BUG: unable to handle kernel NULL pointer dereference at 0000000000000028 +> [ 172.660229] IP: [] bio_trim+0xf/0x2a +> [ 172.660289] PGD 7faf3e067 PUD 7f9279067 PMD 0 +> [ 172.660399] Oops: 0000 [#1] SMP +> [...] +> [ 172.664780] Call Trace: +> [ 172.664813] [] ? raid1_make_request+0x2e8/0xad7 [raid1] +> [ 172.664846] [] ? blk_queue_split+0x377/0x3d4 +> [ 172.664880] [] ? md_make_request+0xf6/0x1e9 [md_mod] +> [ 172.664912] [] ? generic_make_request+0xb5/0x155 +> [ 172.664947] [] ? prio_io+0x85/0x95 [bcache] +> [ 172.664981] [] ? register_cache_set+0x355/0x8d0 [bcache] +> [ 172.665016] [] ? register_bcache+0x1006/0x1174 [bcache] + +The issue can be reproduced by the following steps: + - create one raid1 over two virtio-blk + - build bcache device over the above raid1 and another cache device + and bucket size is set as 2Mbytes + - set cache mode as writeback + - run random write over ext4 on the bcache device + +Fixes: 54efd50(block: make generic_make_request handle arbitrarily sized bios) +Reported-by: Sebastian Roesner +Reported-by: Eric Wheeler +Cc: Shaohua Li +Acked-by: Kent Overstreet +Signed-off-by: Ming Lei +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman + +--- + block/blk-merge.c | 22 ++++++++++++++++++++++ + 1 file changed, 22 insertions(+) + +--- a/block/blk-merge.c ++++ b/block/blk-merge.c +@@ -92,9 +92,31 @@ static struct bio *blk_bio_segment_split + bool do_split = true; + struct bio *new = NULL; + const unsigned max_sectors = get_max_io_size(q, bio); ++ unsigned bvecs = 0; + + bio_for_each_segment(bv, bio, iter) { + /* ++ * With arbitrary bio size, the incoming bio may be very ++ * big. We have to split the bio into small bios so that ++ * each holds at most BIO_MAX_PAGES bvecs because ++ * bio_clone() can fail to allocate big bvecs. ++ * ++ * It should have been better to apply the limit per ++ * request queue in which bio_clone() is involved, ++ * instead of globally. The biggest blocker is the ++ * bio_clone() in bio bounce. ++ * ++ * If bio is splitted by this reason, we should have ++ * allowed to continue bios merging, but don't do ++ * that now for making the change simple. ++ * ++ * TODO: deal with bio bounce's bio_clone() gracefully ++ * and convert the global limit into per-queue limit. ++ */ ++ if (bvecs++ >= BIO_MAX_PAGES) ++ goto split; ++ ++ /* + * If the queue doesn't support SG gaps and adding this + * offset would create a gap, disallow it. + */ diff --git a/queue-4.4/cgroup-reduce-read-locked-section-of-cgroup_threadgroup_rwsem-during-fork.patch b/queue-4.4/cgroup-reduce-read-locked-section-of-cgroup_threadgroup_rwsem-during-fork.patch new file mode 100644 index 00000000000..85f6584c53e --- /dev/null +++ b/queue-4.4/cgroup-reduce-read-locked-section-of-cgroup_threadgroup_rwsem-during-fork.patch @@ -0,0 +1,111 @@ +From 568ac888215c7fb2fabe8ea739b00ec3c1f5d440 Mon Sep 17 00:00:00 2001 +From: Balbir Singh +Date: Wed, 10 Aug 2016 15:43:06 -0400 +Subject: cgroup: reduce read locked section of cgroup_threadgroup_rwsem during fork + +From: Balbir Singh + +commit 568ac888215c7fb2fabe8ea739b00ec3c1f5d440 upstream. + +cgroup_threadgroup_rwsem is acquired in read mode during process exit +and fork. It is also grabbed in write mode during +__cgroups_proc_write(). I've recently run into a scenario with lots +of memory pressure and OOM and I am beginning to see + +systemd + + __switch_to+0x1f8/0x350 + __schedule+0x30c/0x990 + schedule+0x48/0xc0 + percpu_down_write+0x114/0x170 + __cgroup_procs_write.isra.12+0xb8/0x3c0 + cgroup_file_write+0x74/0x1a0 + kernfs_fop_write+0x188/0x200 + __vfs_write+0x6c/0xe0 + vfs_write+0xc0/0x230 + SyS_write+0x6c/0x110 + system_call+0x38/0xb4 + +This thread is waiting on the reader of cgroup_threadgroup_rwsem to +exit. The reader itself is under memory pressure and has gone into +reclaim after fork. There are times the reader also ends up waiting on +oom_lock as well. + + __switch_to+0x1f8/0x350 + __schedule+0x30c/0x990 + schedule+0x48/0xc0 + jbd2_log_wait_commit+0xd4/0x180 + ext4_evict_inode+0x88/0x5c0 + evict+0xf8/0x2a0 + dispose_list+0x50/0x80 + prune_icache_sb+0x6c/0x90 + super_cache_scan+0x190/0x210 + shrink_slab.part.15+0x22c/0x4c0 + shrink_zone+0x288/0x3c0 + do_try_to_free_pages+0x1dc/0x590 + try_to_free_pages+0xdc/0x260 + __alloc_pages_nodemask+0x72c/0xc90 + alloc_pages_current+0xb4/0x1a0 + page_table_alloc+0xc0/0x170 + __pte_alloc+0x58/0x1f0 + copy_page_range+0x4ec/0x950 + copy_process.isra.5+0x15a0/0x1870 + _do_fork+0xa8/0x4b0 + ppc_clone+0x8/0xc + +In the meanwhile, all processes exiting/forking are blocked almost +stalling the system. + +This patch moves the threadgroup_change_begin from before +cgroup_fork() to just before cgroup_canfork(). There is no nee to +worry about threadgroup changes till the task is actually added to the +threadgroup. This avoids having to call reclaim with +cgroup_threadgroup_rwsem held. + +tj: Subject and description edits. + +Signed-off-by: Balbir Singh +Acked-by: Zefan Li +Cc: Oleg Nesterov +Cc: Andrew Morton +Signed-off-by: Tejun Heo +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/fork.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/kernel/fork.c ++++ b/kernel/fork.c +@@ -1369,7 +1369,6 @@ static struct task_struct *copy_process( + p->real_start_time = ktime_get_boot_ns(); + p->io_context = NULL; + p->audit_context = NULL; +- threadgroup_change_begin(current); + cgroup_fork(p); + #ifdef CONFIG_NUMA + p->mempolicy = mpol_dup(p->mempolicy); +@@ -1521,6 +1520,7 @@ static struct task_struct *copy_process( + INIT_LIST_HEAD(&p->thread_group); + p->task_works = NULL; + ++ threadgroup_change_begin(current); + /* + * Ensure that the cgroup subsystem policies allow the new process to be + * forked. It should be noted the the new process's css_set can be changed +@@ -1621,6 +1621,7 @@ static struct task_struct *copy_process( + bad_fork_cancel_cgroup: + cgroup_cancel_fork(p, cgrp_ss_priv); + bad_fork_free_pid: ++ threadgroup_change_end(current); + if (pid != &init_struct_pid) + free_pid(pid); + bad_fork_cleanup_io: +@@ -1651,7 +1652,6 @@ bad_fork_cleanup_policy: + mpol_put(p->mempolicy); + bad_fork_cleanup_threadgroup_lock: + #endif +- threadgroup_change_end(current); + delayacct_tsk_free(p); + bad_fork_cleanup_count: + atomic_dec(&p->cred->user->processes); diff --git a/queue-4.4/nvme-call-pci_disable_device-on-the-error-path.patch b/queue-4.4/nvme-call-pci_disable_device-on-the-error-path.patch new file mode 100644 index 00000000000..9ed48f4eae3 --- /dev/null +++ b/queue-4.4/nvme-call-pci_disable_device-on-the-error-path.patch @@ -0,0 +1,35 @@ +From krisman@linux.vnet.ibm.com Fri Sep 9 16:16:39 2016 +From: Gabriel Krisman Bertazi +Date: Thu, 08 Sep 2016 18:10:23 -0300 +Subject: nvme: Call pci_disable_device on the error path. +To: Jiri Slaby +Cc: gregkh@linuxfoundation.org, stable@vger.kernel.org, stewart@linux.vnet.ibm.com, mniyer@us.ibm.com, keith.busch@intel.com +Message-ID: <87h99qf680.fsf@linux.vnet.ibm.com> + +From: Gabriel Krisman Bertazi + + +Commit 5706aca74fe4 ("NVMe: Don't unmap controller registers on reset"), +which backported b00a726a9fd8 to the 4.4.y kernel introduced a +regression in which it didn't call pci_disable_device in the error path +of nvme_pci_enable. + +Reported-by: Jiri Slaby +Embarassed-developer: Gabriel Krisman Bertazi +Signed-off-by: Gabriel Krisman Bertazi +Signed-off-by: Greg Kroah-Hartman +--- + drivers/nvme/host/pci.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/nvme/host/pci.c ++++ b/drivers/nvme/host/pci.c +@@ -2725,7 +2725,7 @@ static int nvme_pci_enable(struct nvme_d + return 0; + + disable: +- pci_release_regions(pdev); ++ pci_disable_device(pdev); + + return result; + } diff --git a/queue-4.4/powerpc-tm-avoid-slb-faults-in-treclaim-trecheckpoint-when-ri-0.patch b/queue-4.4/powerpc-tm-avoid-slb-faults-in-treclaim-trecheckpoint-when-ri-0.patch new file mode 100644 index 00000000000..ae601cad55c --- /dev/null +++ b/queue-4.4/powerpc-tm-avoid-slb-faults-in-treclaim-trecheckpoint-when-ri-0.patch @@ -0,0 +1,189 @@ +From 190ce8693c23eae09ba5f303a83bf2fbeb6478b1 Mon Sep 17 00:00:00 2001 +From: Michael Neuling +Date: Tue, 28 Jun 2016 13:01:04 +1000 +Subject: powerpc/tm: Avoid SLB faults in treclaim/trecheckpoint when RI=0 + +From: Michael Neuling + +commit 190ce8693c23eae09ba5f303a83bf2fbeb6478b1 upstream. + +Currently we have 2 segments that are bolted for the kernel linear +mapping (ie 0xc000... addresses). This is 0 to 1TB and also the kernel +stacks. Anything accessed outside of these regions may need to be +faulted in. (In practice machines with TM always have 1T segments) + +If a machine has < 2TB of memory we never fault on the kernel linear +mapping as these two segments cover all physical memory. If a machine +has > 2TB of memory, there may be structures outside of these two +segments that need to be faulted in. This faulting can occur when +running as a guest as the hypervisor may remove any SLB that's not +bolted. + +When we treclaim and trecheckpoint we have a window where we need to +run with the userspace GPRs. This means that we no longer have a valid +stack pointer in r1. For this window we therefore clear MSR RI to +indicate that any exceptions taken at this point won't be able to be +handled. This means that we can't take segment misses in this RI=0 +window. + +In this RI=0 region, we currently access the thread_struct for the +process being context switched to or from. This thread_struct access +may cause a segment fault since it's not guaranteed to be covered by +the two bolted segment entries described above. + +We've seen this with a crash when running as a guest with > 2TB of +memory on PowerVM: + + Unrecoverable exception 4100 at c00000000004f138 + Oops: Unrecoverable exception, sig: 6 [#1] + SMP NR_CPUS=2048 NUMA pSeries + CPU: 1280 PID: 7755 Comm: kworker/1280:1 Tainted: G X 4.4.13-46-default #1 + task: c000189001df4210 ti: c000189001d5c000 task.ti: c000189001d5c000 + NIP: c00000000004f138 LR: 0000000010003a24 CTR: 0000000010001b20 + REGS: c000189001d5f730 TRAP: 4100 Tainted: G X (4.4.13-46-default) + MSR: 8000000100001031 CR: 24000048 XER: 00000000 + CFAR: c00000000004ed18 SOFTE: 0 + GPR00: ffffffffc58d7b60 c000189001d5f9b0 00000000100d7d00 000000003a738288 + GPR04: 0000000000002781 0000000000000006 0000000000000000 c0000d1f4d889620 + GPR08: 000000000000c350 00000000000008ab 00000000000008ab 00000000100d7af0 + GPR12: 00000000100d7ae8 00003ffe787e67a0 0000000000000000 0000000000000211 + GPR16: 0000000010001b20 0000000000000000 0000000000800000 00003ffe787df110 + GPR20: 0000000000000001 00000000100d1e10 0000000000000000 00003ffe787df050 + GPR24: 0000000000000003 0000000000010000 0000000000000000 00003fffe79e2e30 + GPR28: 00003fffe79e2e68 00000000003d0f00 00003ffe787e67a0 00003ffe787de680 + NIP [c00000000004f138] restore_gprs+0xd0/0x16c + LR [0000000010003a24] 0x10003a24 + Call Trace: + [c000189001d5f9b0] [c000189001d5f9f0] 0xc000189001d5f9f0 (unreliable) + [c000189001d5fb90] [c00000000001583c] tm_recheckpoint+0x6c/0xa0 + [c000189001d5fbd0] [c000000000015c40] __switch_to+0x2c0/0x350 + [c000189001d5fc30] [c0000000007e647c] __schedule+0x32c/0x9c0 + [c000189001d5fcb0] [c0000000007e6b58] schedule+0x48/0xc0 + [c000189001d5fce0] [c0000000000deabc] worker_thread+0x22c/0x5b0 + [c000189001d5fd80] [c0000000000e7000] kthread+0x110/0x130 + [c000189001d5fe30] [c000000000009538] ret_from_kernel_thread+0x5c/0xa4 + Instruction dump: + 7cb103a6 7cc0e3a6 7ca222a6 78a58402 38c00800 7cc62838 08860000 7cc000a6 + 38a00006 78c60022 7cc62838 0b060000 7ccff120 e8270078 e8a70098 + ---[ end trace 602126d0a1dedd54 ]--- + +This fixes this by copying the required data from the thread_struct to +the stack before we clear MSR RI. Then once we clear RI, we only access +the stack, guaranteeing there's no segment miss. + +We also tighten the region over which we set RI=0 on the treclaim() +path. This may have a slight performance impact since we're adding an +mtmsr instruction. + +Fixes: 090b9284d725 ("powerpc/tm: Clear MSR RI in non-recoverable TM code") +Signed-off-by: Michael Neuling +Reviewed-by: Cyril Bur +Signed-off-by: Michael Ellerman +Signed-off-by: Greg Kroah-Hartman + +--- + arch/powerpc/kernel/tm.S | 61 +++++++++++++++++++++++++++++++++-------------- + 1 file changed, 44 insertions(+), 17 deletions(-) + +--- a/arch/powerpc/kernel/tm.S ++++ b/arch/powerpc/kernel/tm.S +@@ -110,17 +110,11 @@ _GLOBAL(tm_reclaim) + std r3, STK_PARAM(R3)(r1) + SAVE_NVGPRS(r1) + +- /* We need to setup MSR for VSX register save instructions. Here we +- * also clear the MSR RI since when we do the treclaim, we won't have a +- * valid kernel pointer for a while. We clear RI here as it avoids +- * adding another mtmsr closer to the treclaim. This makes the region +- * maked as non-recoverable wider than it needs to be but it saves on +- * inserting another mtmsrd later. +- */ ++ /* We need to setup MSR for VSX register save instructions. */ + mfmsr r14 + mr r15, r14 + ori r15, r15, MSR_FP +- li r16, MSR_RI ++ li r16, 0 + ori r16, r16, MSR_EE /* IRQs hard off */ + andc r15, r15, r16 + oris r15, r15, MSR_VEC@h +@@ -176,7 +170,17 @@ dont_backup_fp: + 1: tdeqi r6, 0 + EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,0 + +- /* The moment we treclaim, ALL of our GPRs will switch ++ /* Clear MSR RI since we are about to change r1, EE is already off. */ ++ li r4, 0 ++ mtmsrd r4, 1 ++ ++ /* ++ * BE CAREFUL HERE: ++ * At this point we can't take an SLB miss since we have MSR_RI ++ * off. Load only to/from the stack/paca which are in SLB bolted regions ++ * until we turn MSR RI back on. ++ * ++ * The moment we treclaim, ALL of our GPRs will switch + * to user register state. (FPRs, CCR etc. also!) + * Use an sprg and a tm_scratch in the PACA to shuffle. + */ +@@ -197,6 +201,11 @@ dont_backup_fp: + + /* Store the PPR in r11 and reset to decent value */ + std r11, GPR11(r1) /* Temporary stash */ ++ ++ /* Reset MSR RI so we can take SLB faults again */ ++ li r11, MSR_RI ++ mtmsrd r11, 1 ++ + mfspr r11, SPRN_PPR + HMT_MEDIUM + +@@ -397,11 +406,6 @@ restore_gprs: + ld r5, THREAD_TM_DSCR(r3) + ld r6, THREAD_TM_PPR(r3) + +- /* Clear the MSR RI since we are about to change R1. EE is already off +- */ +- li r4, 0 +- mtmsrd r4, 1 +- + REST_GPR(0, r7) /* GPR0 */ + REST_2GPRS(2, r7) /* GPR2-3 */ + REST_GPR(4, r7) /* GPR4 */ +@@ -439,10 +443,33 @@ restore_gprs: + ld r6, _CCR(r7) + mtcr r6 + +- REST_GPR(1, r7) /* GPR1 */ +- REST_GPR(5, r7) /* GPR5-7 */ + REST_GPR(6, r7) +- ld r7, GPR7(r7) ++ ++ /* ++ * Store r1 and r5 on the stack so that we can access them ++ * after we clear MSR RI. ++ */ ++ ++ REST_GPR(5, r7) ++ std r5, -8(r1) ++ ld r5, GPR1(r7) ++ std r5, -16(r1) ++ ++ REST_GPR(7, r7) ++ ++ /* Clear MSR RI since we are about to change r1. EE is already off */ ++ li r5, 0 ++ mtmsrd r5, 1 ++ ++ /* ++ * BE CAREFUL HERE: ++ * At this point we can't take an SLB miss since we have MSR_RI ++ * off. Load only to/from the stack/paca which are in SLB bolted regions ++ * until we turn MSR RI back on. ++ */ ++ ++ ld r5, -8(r1) ++ ld r1, -16(r1) + + /* Commit register state as checkpointed state: */ + TRECHKPT diff --git a/queue-4.4/rds-fix-an-infoleak-in-rds_inc_info_copy.patch b/queue-4.4/rds-fix-an-infoleak-in-rds_inc_info_copy.patch new file mode 100644 index 00000000000..0654d726619 --- /dev/null +++ b/queue-4.4/rds-fix-an-infoleak-in-rds_inc_info_copy.patch @@ -0,0 +1,33 @@ +From 4116def2337991b39919f3b448326e21c40e0dbb Mon Sep 17 00:00:00 2001 +From: Kangjie Lu +Date: Thu, 2 Jun 2016 04:11:20 -0400 +Subject: rds: fix an infoleak in rds_inc_info_copy + +From: Kangjie Lu + +commit 4116def2337991b39919f3b448326e21c40e0dbb upstream. + +The last field "flags" of object "minfo" is not initialized. +Copying this object out may leak kernel stack data. +Assign 0 to it to avoid leak. + +Signed-off-by: Kangjie Lu +Acked-by: Santosh Shilimkar +Signed-off-by: David S. Miller +Signed-off-by: Juerg Haefliger +Signed-off-by: Greg Kroah-Hartman + +--- + net/rds/recv.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/net/rds/recv.c ++++ b/net/rds/recv.c +@@ -545,5 +545,7 @@ void rds_inc_info_copy(struct rds_incomi + minfo.fport = inc->i_hdr.h_dport; + } + ++ minfo.flags = 0; ++ + rds_info_copy(iter, &minfo, sizeof(minfo)); + } diff --git a/queue-4.4/s390-sclp_ctl-fix-potential-information-leak-with-dev-sclp.patch b/queue-4.4/s390-sclp_ctl-fix-potential-information-leak-with-dev-sclp.patch new file mode 100644 index 00000000000..8ceb66b4baa --- /dev/null +++ b/queue-4.4/s390-sclp_ctl-fix-potential-information-leak-with-dev-sclp.patch @@ -0,0 +1,57 @@ +From 532c34b5fbf1687df63b3fcd5b2846312ac943c6 Mon Sep 17 00:00:00 2001 +From: Martin Schwidefsky +Date: Mon, 25 Apr 2016 17:54:28 +0200 +Subject: s390/sclp_ctl: fix potential information leak with /dev/sclp + +From: Martin Schwidefsky + +commit 532c34b5fbf1687df63b3fcd5b2846312ac943c6 upstream. + +The sclp_ctl_ioctl_sccb function uses two copy_from_user calls to +retrieve the sclp request from user space. The first copy_from_user +fetches the length of the request which is stored in the first two +bytes of the request. The second copy_from_user gets the complete +sclp request, but this copies the length field a second time. +A malicious user may have changed the length in the meantime. + +Reported-by: Pengfei Wang +Reviewed-by: Michael Holzheu +Signed-off-by: Martin Schwidefsky +Signed-off-by: Juerg Haefliger +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/s390/char/sclp_ctl.c | 12 +++++++----- + 1 file changed, 7 insertions(+), 5 deletions(-) + +--- a/drivers/s390/char/sclp_ctl.c ++++ b/drivers/s390/char/sclp_ctl.c +@@ -56,6 +56,7 @@ static int sclp_ctl_ioctl_sccb(void __us + { + struct sclp_ctl_sccb ctl_sccb; + struct sccb_header *sccb; ++ unsigned long copied; + int rc; + + if (copy_from_user(&ctl_sccb, user_area, sizeof(ctl_sccb))) +@@ -65,14 +66,15 @@ static int sclp_ctl_ioctl_sccb(void __us + sccb = (void *) get_zeroed_page(GFP_KERNEL | GFP_DMA); + if (!sccb) + return -ENOMEM; +- if (copy_from_user(sccb, u64_to_uptr(ctl_sccb.sccb), sizeof(*sccb))) { ++ copied = PAGE_SIZE - ++ copy_from_user(sccb, u64_to_uptr(ctl_sccb.sccb), PAGE_SIZE); ++ if (offsetof(struct sccb_header, length) + ++ sizeof(sccb->length) > copied || sccb->length > copied) { + rc = -EFAULT; + goto out_free; + } +- if (sccb->length > PAGE_SIZE || sccb->length < 8) +- return -EINVAL; +- if (copy_from_user(sccb, u64_to_uptr(ctl_sccb.sccb), sccb->length)) { +- rc = -EFAULT; ++ if (sccb->length < 8) { ++ rc = -EINVAL; + goto out_free; + } + rc = sclp_sync_request(ctl_sccb.cmdw, sccb); diff --git a/queue-4.4/series b/queue-4.4/series index 56a5db0b154..47caf014516 100644 --- a/queue-4.4/series +++ b/queue-4.4/series @@ -152,3 +152,10 @@ ext4-fix-xattr-shifting-when-expanding-inodes-part-2.patch ext4-properly-align-shifted-xattrs-when-expanding-inodes.patch ext4-avoid-deadlock-when-expanding-inode-size.patch ext4-avoid-modifying-checksum-fields-directly-during-checksum-verification.patch +block-fix-race-triggered-by-blk_set_queue_dying.patch +block-make-sure-a-big-bio-is-split-into-at-most-256-bvecs.patch +cgroup-reduce-read-locked-section-of-cgroup_threadgroup_rwsem-during-fork.patch +nvme-call-pci_disable_device-on-the-error-path.patch +powerpc-tm-avoid-slb-faults-in-treclaim-trecheckpoint-when-ri-0.patch +rds-fix-an-infoleak-in-rds_inc_info_copy.patch +s390-sclp_ctl-fix-potential-information-leak-with-dev-sclp.patch