From 691fbb6673d808261ba275afa512728985d9c5c3 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sun, 27 Aug 2017 14:53:41 +0200 Subject: [PATCH] 4.12-stable patches added patches: acpi-device-property-fix-node-lookup-in-acpi_graph_get_child_prop_value.patch arm64-fpsimd-prevent-registers-leaking-across-exec.patch cifs-fix-df-output-for-users-with-quota-limits.patch cifs-return-enametoolong-for-overlong-names-in-cifs_open-cifs_lookup.patch drm-atomic-handle-edeadlk-with-out-fences-correctly.patch drm-atomic-if-the-atomic-check-fails-return-its-value-first.patch drm-fix-framebuffer-leak.patch drm-i915-gvt-fix-the-kernel-null-pointer-error.patch drm-i915-vbt-ignore-extraneous-child-devices-for-a-port.patch drm-release-driver-tracking-before-making-the-object-available-again.patch drm-sun4i-implement-drm_driver-lastclose-to-restore-fbdev-console.patch fork-fix-incorrect-fput-of-exe_file-causing-use-after-free.patch ftrace-check-for-null-ret_stack-on-profile-function-graph-entry-function.patch i2c-designware-fix-system-suspend.patch kbuild-linker-script-do-not-match-c-names-unless-ld_dead_code_data_elimination-is-configured.patch mm-madvise.c-fix-freeing-of-locked-page-with-madv_free.patch mm-memblock.c-reversed-logic-in-memblock_discard.patch net-sunrpc-svcsock-fix-null-pointer-exception.patch netfilter-expect-fix-crash-when-putting-uninited-expectation.patch netfilter-nat-fix-src-map-lookup.patch netfilter-nfnetlink-improve-input-length-sanitization-in-nfnetlink_rcv.patch nfsd-limit-end-of-page-list-when-decoding-nfsv4-write.patch perf-core-fix-group-cpu-task-validation.patch rdma-uverbs-initialize-cq_context-appropriately.patch revert-drm-amdgpu-fix-vblank_time-when-displays-are-off.patch ring-buffer-have-ring_buffer_alloc_read_page-return-error-on-offline-cpu.patch timers-fix-excessive-granularity-of-new-timers-after-a-nohz-idle.patch tracing-call-clear_boot_tracer-at-lateinit_sync.patch tracing-fix-freeing-of-filter-in-create_filter-when-set_str-is-false.patch tracing-fix-kmemleak-in-tracing_map_array_free.patch tracing-missing-error-code-in-tracer_alloc_buffers.patch virtio_pci-fix-cpu-affinity-support.patch x86-mm-fix-use-after-free-of-ldt_struct.patch --- ...p-in-acpi_graph_get_child_prop_value.patch | 44 ++++ ...revent-registers-leaking-across-exec.patch | 49 +++++ ...f-output-for-users-with-quota-limits.patch | 57 +++++ ...rlong-names-in-cifs_open-cifs_lookup.patch | 88 ++++++++ ...le-edeadlk-with-out-fences-correctly.patch | 60 +++++ ...c-check-fails-return-its-value-first.patch | 102 +++++++++ queue-4.12/drm-fix-framebuffer-leak.patch | 30 +++ ...vt-fix-the-kernel-null-pointer-error.patch | 33 +++ ...-extraneous-child-devices-for-a-port.patch | 101 +++++++++ ...re-making-the-object-available-again.patch | 56 +++++ ...r-lastclose-to-restore-fbdev-console.patch | 47 ++++ ...t-of-exe_file-causing-use-after-free.patch | 105 +++++++++ ...rofile-function-graph-entry-function.patch | 46 ++++ .../i2c-designware-fix-system-suspend.patch | 92 ++++++++ ..._code_data_elimination-is-configured.patch | 106 +++++++++ ...reeing-of-locked-page-with-madv_free.patch | 128 +++++++++++ ...c-reversed-logic-in-memblock_discard.patch | 38 ++++ ...c-svcsock-fix-null-pointer-exception.patch | 186 ++++++++++++++++ ...sh-when-putting-uninited-expectation.patch | 36 +++ .../netfilter-nat-fix-src-map-lookup.patch | 68 ++++++ ...length-sanitization-in-nfnetlink_rcv.patch | 52 +++++ ...-page-list-when-decoding-nfsv4-write.patch | 48 ++++ ...f-core-fix-group-cpu-task-validation.patch | 182 ++++++++++++++++ ...-initialize-cq_context-appropriately.patch | 35 +++ ...ix-vblank_time-when-displays-are-off.patch | 32 +++ ...ead_page-return-error-on-offline-cpu.patch | 155 +++++++++++++ queue-4.12/series | 33 +++ ...rity-of-new-timers-after-a-nohz-idle.patch | 206 ++++++++++++++++++ ...l-clear_boot_tracer-at-lateinit_sync.patch | 39 ++++ ...-create_filter-when-set_str-is-false.patch | 67 ++++++ ...x-kmemleak-in-tracing_map_array_free.patch | 88 ++++++++ ...g-error-code-in-tracer_alloc_buffers.patch | 35 +++ .../virtio_pci-fix-cpu-affinity-support.patch | 53 +++++ ...-mm-fix-use-after-free-of-ldt_struct.patch | 173 +++++++++++++++ 34 files changed, 2670 insertions(+) create mode 100644 queue-4.12/acpi-device-property-fix-node-lookup-in-acpi_graph_get_child_prop_value.patch create mode 100644 queue-4.12/arm64-fpsimd-prevent-registers-leaking-across-exec.patch create mode 100644 queue-4.12/cifs-fix-df-output-for-users-with-quota-limits.patch create mode 100644 queue-4.12/cifs-return-enametoolong-for-overlong-names-in-cifs_open-cifs_lookup.patch create mode 100644 queue-4.12/drm-atomic-handle-edeadlk-with-out-fences-correctly.patch create mode 100644 queue-4.12/drm-atomic-if-the-atomic-check-fails-return-its-value-first.patch create mode 100644 queue-4.12/drm-fix-framebuffer-leak.patch create mode 100644 queue-4.12/drm-i915-gvt-fix-the-kernel-null-pointer-error.patch create mode 100644 queue-4.12/drm-i915-vbt-ignore-extraneous-child-devices-for-a-port.patch create mode 100644 queue-4.12/drm-release-driver-tracking-before-making-the-object-available-again.patch create mode 100644 queue-4.12/drm-sun4i-implement-drm_driver-lastclose-to-restore-fbdev-console.patch create mode 100644 queue-4.12/fork-fix-incorrect-fput-of-exe_file-causing-use-after-free.patch create mode 100644 queue-4.12/ftrace-check-for-null-ret_stack-on-profile-function-graph-entry-function.patch create mode 100644 queue-4.12/i2c-designware-fix-system-suspend.patch create mode 100644 queue-4.12/kbuild-linker-script-do-not-match-c-names-unless-ld_dead_code_data_elimination-is-configured.patch create mode 100644 queue-4.12/mm-madvise.c-fix-freeing-of-locked-page-with-madv_free.patch create mode 100644 queue-4.12/mm-memblock.c-reversed-logic-in-memblock_discard.patch create mode 100644 queue-4.12/net-sunrpc-svcsock-fix-null-pointer-exception.patch create mode 100644 queue-4.12/netfilter-expect-fix-crash-when-putting-uninited-expectation.patch create mode 100644 queue-4.12/netfilter-nat-fix-src-map-lookup.patch create mode 100644 queue-4.12/netfilter-nfnetlink-improve-input-length-sanitization-in-nfnetlink_rcv.patch create mode 100644 queue-4.12/nfsd-limit-end-of-page-list-when-decoding-nfsv4-write.patch create mode 100644 queue-4.12/perf-core-fix-group-cpu-task-validation.patch create mode 100644 queue-4.12/rdma-uverbs-initialize-cq_context-appropriately.patch create mode 100644 queue-4.12/revert-drm-amdgpu-fix-vblank_time-when-displays-are-off.patch create mode 100644 queue-4.12/ring-buffer-have-ring_buffer_alloc_read_page-return-error-on-offline-cpu.patch create mode 100644 queue-4.12/timers-fix-excessive-granularity-of-new-timers-after-a-nohz-idle.patch create mode 100644 queue-4.12/tracing-call-clear_boot_tracer-at-lateinit_sync.patch create mode 100644 queue-4.12/tracing-fix-freeing-of-filter-in-create_filter-when-set_str-is-false.patch create mode 100644 queue-4.12/tracing-fix-kmemleak-in-tracing_map_array_free.patch create mode 100644 queue-4.12/tracing-missing-error-code-in-tracer_alloc_buffers.patch create mode 100644 queue-4.12/virtio_pci-fix-cpu-affinity-support.patch create mode 100644 queue-4.12/x86-mm-fix-use-after-free-of-ldt_struct.patch diff --git a/queue-4.12/acpi-device-property-fix-node-lookup-in-acpi_graph_get_child_prop_value.patch b/queue-4.12/acpi-device-property-fix-node-lookup-in-acpi_graph_get_child_prop_value.patch new file mode 100644 index 00000000000..2fb39a04fd2 --- /dev/null +++ b/queue-4.12/acpi-device-property-fix-node-lookup-in-acpi_graph_get_child_prop_value.patch @@ -0,0 +1,44 @@ +From b5212f57da145e53df790a7e211d94daac768bf8 Mon Sep 17 00:00:00 2001 +From: Sakari Ailus +Date: Tue, 22 Aug 2017 23:39:58 +0300 +Subject: ACPI: device property: Fix node lookup in acpi_graph_get_child_prop_value() + +From: Sakari Ailus + +commit b5212f57da145e53df790a7e211d94daac768bf8 upstream. + +acpi_graph_get_child_prop_value() is intended to find a child node with a +certain property value pair. The check + + if (!fwnode_property_read_u32(fwnode, prop_name, &nr)) + continue; + +is faulty: fwnode_property_read_u32() returns zero on success, not on +failure, leading to comparing values only if the searched property was not +found. + +Moreover, the check is made against the parent device node instead of +the child one as it should be. + +Fixes: 79389a83bc38 (ACPI / property: Add support for remote endpoints) +Reported-by: Hyungwoo Yang +Signed-off-by: Sakari Ailus +[ rjw: Changelog ] +Signed-off-by: Rafael J. Wysocki +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/acpi/property.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/acpi/property.c ++++ b/drivers/acpi/property.c +@@ -1046,7 +1046,7 @@ static struct fwnode_handle *acpi_graph_ + fwnode_for_each_child_node(fwnode, child) { + u32 nr; + +- if (!fwnode_property_read_u32(fwnode, prop_name, &nr)) ++ if (fwnode_property_read_u32(child, prop_name, &nr)) + continue; + + if (val == nr) diff --git a/queue-4.12/arm64-fpsimd-prevent-registers-leaking-across-exec.patch b/queue-4.12/arm64-fpsimd-prevent-registers-leaking-across-exec.patch new file mode 100644 index 00000000000..90cebf1ea7a --- /dev/null +++ b/queue-4.12/arm64-fpsimd-prevent-registers-leaking-across-exec.patch @@ -0,0 +1,49 @@ +From 096622104e14d8a1db4860bd557717067a0515d2 Mon Sep 17 00:00:00 2001 +From: Dave Martin +Date: Fri, 18 Aug 2017 16:57:01 +0100 +Subject: arm64: fpsimd: Prevent registers leaking across exec + +From: Dave Martin + +commit 096622104e14d8a1db4860bd557717067a0515d2 upstream. + +There are some tricky dependencies between the different stages of +flushing the FPSIMD register state during exec, and these can race +with context switch in ways that can cause the old task's regs to +leak across. In particular, a context switch during the memset() can +cause some of the task's old FPSIMD registers to reappear. + +Disabling preemption for this small window would be no big deal for +performance: preemption is already disabled for similar scenarios +like updating the FPSIMD registers in sigreturn. + +So, instead of rearranging things in ways that might swap existing +subtle bugs for new ones, this patch just disables preemption +around the FPSIMD state flushing so that races of this type can't +occur here. This brings fpsimd_flush_thread() into line with other +code paths. + +Fixes: 674c242c9323 ("arm64: flush FP/SIMD state correctly after execve()") +Reviewed-by: Ard Biesheuvel +Signed-off-by: Dave Martin +Signed-off-by: Will Deacon +Signed-off-by: Greg Kroah-Hartman + +--- + arch/arm64/kernel/fpsimd.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/arch/arm64/kernel/fpsimd.c ++++ b/arch/arm64/kernel/fpsimd.c +@@ -161,9 +161,11 @@ void fpsimd_flush_thread(void) + { + if (!system_supports_fpsimd()) + return; ++ preempt_disable(); + memset(¤t->thread.fpsimd_state, 0, sizeof(struct fpsimd_state)); + fpsimd_flush_task_state(current); + set_thread_flag(TIF_FOREIGN_FPSTATE); ++ preempt_enable(); + } + + /* diff --git a/queue-4.12/cifs-fix-df-output-for-users-with-quota-limits.patch b/queue-4.12/cifs-fix-df-output-for-users-with-quota-limits.patch new file mode 100644 index 00000000000..7bcc1767c13 --- /dev/null +++ b/queue-4.12/cifs-fix-df-output-for-users-with-quota-limits.patch @@ -0,0 +1,57 @@ +From 42bec214d8bd432be6d32a1acb0a9079ecd4d142 Mon Sep 17 00:00:00 2001 +From: Sachin Prabhu +Date: Thu, 3 Aug 2017 13:09:03 +0530 +Subject: cifs: Fix df output for users with quota limits + +From: Sachin Prabhu + +commit 42bec214d8bd432be6d32a1acb0a9079ecd4d142 upstream. + +The df for a SMB2 share triggers a GetInfo call for +FS_FULL_SIZE_INFORMATION. The values returned are used to populate +struct statfs. + +The problem is that none of the information returned by the call +contains the total blocks available on the filesystem. Instead we use +the blocks available to the user ie. quota limitation when filling out +statfs.f_blocks. The information returned does contain Actual free units +on the filesystem and is used to populate statfs.f_bfree. For users with +quota enabled, it can lead to situations where the total free space +reported is more than the total blocks on the system ending up with df +reports like the following + + # df -h /mnt/a +Filesystem Size Used Avail Use% Mounted on +//192.168.22.10/a 2.5G -2.3G 2.5G - /mnt/a + +To fix this problem, we instead populate both statfs.f_bfree with the +same value as statfs.f_bavail ie. CallerAvailableAllocationUnits. This +is similar to what is done already in the code for cifs and df now +reports the quota information for the user used to mount the share. + + # df --si /mnt/a +Filesystem Size Used Avail Use% Mounted on +//192.168.22.10/a 2.7G 101M 2.6G 4% /mnt/a + +Signed-off-by: Sachin Prabhu +Signed-off-by: Pierguido Lambri +Signed-off-by: Steve French +Signed-off-by: Greg Kroah-Hartman + +--- + fs/cifs/smb2pdu.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/fs/cifs/smb2pdu.c ++++ b/fs/cifs/smb2pdu.c +@@ -3195,8 +3195,8 @@ copy_fs_info_to_kstatfs(struct smb2_fs_f + kst->f_bsize = le32_to_cpu(pfs_inf->BytesPerSector) * + le32_to_cpu(pfs_inf->SectorsPerAllocationUnit); + kst->f_blocks = le64_to_cpu(pfs_inf->TotalAllocationUnits); +- kst->f_bfree = le64_to_cpu(pfs_inf->ActualAvailableAllocationUnits); +- kst->f_bavail = le64_to_cpu(pfs_inf->CallerAvailableAllocationUnits); ++ kst->f_bfree = kst->f_bavail = ++ le64_to_cpu(pfs_inf->CallerAvailableAllocationUnits); + return; + } + diff --git a/queue-4.12/cifs-return-enametoolong-for-overlong-names-in-cifs_open-cifs_lookup.patch b/queue-4.12/cifs-return-enametoolong-for-overlong-names-in-cifs_open-cifs_lookup.patch new file mode 100644 index 00000000000..94a3b1e0c6a --- /dev/null +++ b/queue-4.12/cifs-return-enametoolong-for-overlong-names-in-cifs_open-cifs_lookup.patch @@ -0,0 +1,88 @@ +From d3edede29f74d335f81d95a4588f5f136a9f7dcf Mon Sep 17 00:00:00 2001 +From: Ronnie Sahlberg +Date: Wed, 23 Aug 2017 14:48:14 +1000 +Subject: cifs: return ENAMETOOLONG for overlong names in cifs_open()/cifs_lookup() + +From: Ronnie Sahlberg + +commit d3edede29f74d335f81d95a4588f5f136a9f7dcf upstream. + +Add checking for the path component length and verify it is <= the maximum +that the server advertizes via FileFsAttributeInformation. + +With this patch cifs.ko will now return ENAMETOOLONG instead of ENOENT +when users to access an overlong path. + +To test this, try to cd into a (non-existing) directory on a CIFS share +that has a too long name: +cd /mnt/aaaaaaaaaaaaaaa... + +and it now should show a good error message from the shell: +bash: cd: /mnt/aaaaaaaaaaaaaaaa...aaaaaa: File name too long + +rh bz 1153996 + +Signed-off-by: Ronnie Sahlberg +Signed-off-by: Steve French +Signed-off-by: Greg Kroah-Hartman + +--- + fs/cifs/dir.c | 18 ++++++++++++------ + 1 file changed, 12 insertions(+), 6 deletions(-) + +--- a/fs/cifs/dir.c ++++ b/fs/cifs/dir.c +@@ -194,15 +194,20 @@ cifs_bp_rename_retry: + } + + /* ++ * Don't allow path components longer than the server max. + * Don't allow the separator character in a path component. + * The VFS will not allow "/", but "\" is allowed by posix. + */ + static int +-check_name(struct dentry *direntry) ++check_name(struct dentry *direntry, struct cifs_tcon *tcon) + { + struct cifs_sb_info *cifs_sb = CIFS_SB(direntry->d_sb); + int i; + ++ if (unlikely(direntry->d_name.len > ++ tcon->fsAttrInfo.MaxPathNameComponentLength)) ++ return -ENAMETOOLONG; ++ + if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS)) { + for (i = 0; i < direntry->d_name.len; i++) { + if (direntry->d_name.name[i] == '\\') { +@@ -500,10 +505,6 @@ cifs_atomic_open(struct inode *inode, st + return finish_no_open(file, res); + } + +- rc = check_name(direntry); +- if (rc) +- return rc; +- + xid = get_xid(); + + cifs_dbg(FYI, "parent inode = 0x%p name is: %pd and dentry = 0x%p\n", +@@ -516,6 +517,11 @@ cifs_atomic_open(struct inode *inode, st + } + + tcon = tlink_tcon(tlink); ++ ++ rc = check_name(direntry, tcon); ++ if (rc) ++ goto out_free_xid; ++ + server = tcon->ses->server; + + if (server->ops->new_lease_key) +@@ -776,7 +782,7 @@ cifs_lookup(struct inode *parent_dir_ino + } + pTcon = tlink_tcon(tlink); + +- rc = check_name(direntry); ++ rc = check_name(direntry, pTcon); + if (rc) + goto lookup_out; + diff --git a/queue-4.12/drm-atomic-handle-edeadlk-with-out-fences-correctly.patch b/queue-4.12/drm-atomic-handle-edeadlk-with-out-fences-correctly.patch new file mode 100644 index 00000000000..00edf6448eb --- /dev/null +++ b/queue-4.12/drm-atomic-handle-edeadlk-with-out-fences-correctly.patch @@ -0,0 +1,60 @@ +From 7f5d6dac548b983702dd7aac1d463bd88dff50a8 Mon Sep 17 00:00:00 2001 +From: Maarten Lankhorst +Date: Mon, 14 Aug 2017 12:07:21 +0200 +Subject: drm/atomic: Handle -EDEADLK with out-fences correctly + +From: Maarten Lankhorst + +commit 7f5d6dac548b983702dd7aac1d463bd88dff50a8 upstream. + +complete_crtc_signaling is freeing fence_state, but when retrying +num_fences and fence_state are not zero'd. This caused duplicate +fd's in the fence_state array, followed by a BUG_ON in fs/file.c +because we reallocate freed memory, and installing over an existing +fd, or potential other fun. + +Zero fence_state and num_fences correctly in the retry loop, which +allows kms_atomic_transition to pass. + +Fixes: beaf5af48034 ("drm/fence: add out-fences support") +Cc: Gustavo Padovan +Cc: Brian Starkey (v10) +Cc: Sean Paul +Cc: Daniel Vetter +Cc: Jani Nikula +Cc: David Airlie +Signed-off-by: Maarten Lankhorst +Testcase: kms_atomic_transitions.plane-all-modeset-transition-fencing +(with CONFIG_DEBUG_WW_MUTEX_SLOWPATH=y) +Link: https://patchwork.freedesktop.org/patch/msgid/20170814100721.13340-1-maarten.lankhorst@linux.intel.com +Reviewed-by: Daniel Vetter #intel-gfx on irc +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/gpu/drm/drm_atomic.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +--- a/drivers/gpu/drm/drm_atomic.c ++++ b/drivers/gpu/drm/drm_atomic.c +@@ -2093,10 +2093,10 @@ int drm_mode_atomic_ioctl(struct drm_dev + struct drm_atomic_state *state; + struct drm_modeset_acquire_ctx ctx; + struct drm_plane *plane; +- struct drm_out_fence_state *fence_state = NULL; ++ struct drm_out_fence_state *fence_state; + unsigned plane_mask; + int ret = 0; +- unsigned int i, j, num_fences = 0; ++ unsigned int i, j, num_fences; + + /* disallow for drivers not supporting atomic: */ + if (!drm_core_check_feature(dev, DRIVER_ATOMIC)) +@@ -2137,6 +2137,8 @@ retry: + plane_mask = 0; + copied_objs = 0; + copied_props = 0; ++ fence_state = NULL; ++ num_fences = 0; + + for (i = 0; i < arg->count_objs; i++) { + uint32_t obj_id, count_props; diff --git a/queue-4.12/drm-atomic-if-the-atomic-check-fails-return-its-value-first.patch b/queue-4.12/drm-atomic-if-the-atomic-check-fails-return-its-value-first.patch new file mode 100644 index 00000000000..c3613d519f1 --- /dev/null +++ b/queue-4.12/drm-atomic-if-the-atomic-check-fails-return-its-value-first.patch @@ -0,0 +1,102 @@ +From a0ffc51e20e90e0c1c2491de2b4b03f48b6caaba Mon Sep 17 00:00:00 2001 +From: Maarten Lankhorst +Date: Tue, 15 Aug 2017 11:57:06 +0200 +Subject: drm/atomic: If the atomic check fails, return its value first + +From: Maarten Lankhorst + +commit a0ffc51e20e90e0c1c2491de2b4b03f48b6caaba upstream. + +The last part of drm_atomic_check_only is testing whether we need to +fail with -EINVAL when modeset is not allowed, but forgets to return +the value when atomic_check() fails first. + +This results in -EDEADLK being replaced by -EINVAL, and the sanity +check in drm_modeset_drop_locks kicks in: + +[ 308.531734] ------------[ cut here ]------------ +[ 308.531791] WARNING: CPU: 0 PID: 1886 at drivers/gpu/drm/drm_modeset_lock.c:217 drm_modeset_drop_locks+0x33/0xc0 [drm] +[ 308.531828] Modules linked in: +[ 308.532050] CPU: 0 PID: 1886 Comm: kms_atomic Tainted: G U W 4.13.0-rc5-patser+ #5225 +[ 308.532082] Hardware name: NUC5i7RYB, BIOS RYBDWi35.86A.0246.2015.0309.1355 03/09/2015 +[ 308.532124] task: ffff8800cd9dae00 task.stack: ffff8800ca3b8000 +[ 308.532168] RIP: 0010:drm_modeset_drop_locks+0x33/0xc0 [drm] +[ 308.532189] RSP: 0018:ffff8800ca3bf980 EFLAGS: 00010282 +[ 308.532211] RAX: dffffc0000000000 RBX: ffff8800ca3bfaf8 RCX: 0000000013a171e6 +[ 308.532235] RDX: 1ffff10019477f69 RSI: ffffffffa8ba4fa0 RDI: ffff8800ca3bfb48 +[ 308.532258] RBP: ffff8800ca3bf998 R08: 0000000000000000 R09: 0000000000000003 +[ 308.532281] R10: 0000000079dbe066 R11: 00000000f760b34b R12: 0000000000000001 +[ 308.532304] R13: dffffc0000000000 R14: 00000000ffffffea R15: ffff880096889680 +[ 308.532328] FS: 00007ff00959cec0(0000) GS:ffff8800d4e00000(0000) knlGS:0000000000000000 +[ 308.532359] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +[ 308.532380] CR2: 0000000000000008 CR3: 00000000ca2e3000 CR4: 00000000003406f0 +[ 308.532402] Call Trace: +[ 308.532440] drm_mode_atomic_ioctl+0x19fa/0x1c00 [drm] +[ 308.532488] ? drm_atomic_set_property+0x1220/0x1220 [drm] +[ 308.532565] ? avc_has_extended_perms+0xc39/0xff0 +[ 308.532593] ? lock_downgrade+0x610/0x610 +[ 308.532640] ? drm_atomic_set_property+0x1220/0x1220 [drm] +[ 308.532680] drm_ioctl_kernel+0x154/0x1a0 [drm] +[ 308.532755] drm_ioctl+0x624/0x8f0 [drm] +[ 308.532858] ? drm_atomic_set_property+0x1220/0x1220 [drm] +[ 308.532976] ? drm_getunique+0x210/0x210 [drm] +[ 308.533061] do_vfs_ioctl+0xd92/0xe40 +[ 308.533121] ? ioctl_preallocate+0x1b0/0x1b0 +[ 308.533160] ? selinux_capable+0x20/0x20 +[ 308.533191] ? do_fcntl+0x1b1/0xbf0 +[ 308.533219] ? kasan_slab_free+0xa2/0xb0 +[ 308.533249] ? f_getown+0x4b/0xa0 +[ 308.533278] ? putname+0xcf/0xe0 +[ 308.533309] ? security_file_ioctl+0x57/0x90 +[ 308.533342] SyS_ioctl+0x4e/0x80 +[ 308.533374] entry_SYSCALL_64_fastpath+0x18/0xad +[ 308.533405] RIP: 0033:0x7ff00779e4d7 +[ 308.533431] RSP: 002b:00007fff66a043d8 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 +[ 308.533481] RAX: ffffffffffffffda RBX: 000000e7c7ca5910 RCX: 00007ff00779e4d7 +[ 308.533560] RDX: 00007fff66a04430 RSI: 00000000c03864bc RDI: 0000000000000003 +[ 308.533608] RBP: 00007ff007a5fb00 R08: 000000e7c7ca4620 R09: 000000e7c7ca5e60 +[ 308.533647] R10: 0000000000000001 R11: 0000000000000246 R12: 0000000000000070 +[ 308.533685] R13: 0000000000000000 R14: 0000000000000000 R15: 000000e7c7ca5930 +[ 308.533770] Code: ff df 55 48 89 e5 41 55 41 54 53 48 89 fb 48 83 c7 +50 48 89 fa 48 c1 ea 03 80 3c 02 00 74 05 e8 94 d4 16 e7 48 83 7b 50 00 +74 02 <0f> ff 4c 8d 6b 58 48 b8 00 00 00 00 00 fc ff df 4c 89 ea 48 c1 +[ 308.534086] ---[ end trace 77f11e53b1df44ad ]--- + +Solve this by adding the missing return. + +This is also a bugfix because we could end up rejecting updates with +-EINVAL because of a early -EDEADLK, while if atomic_check ran to +completion it might have downgraded the modeset to a fastset. + +Signed-off-by: Maarten Lankhorst +Testcase: kms_atomic +Link: https://patchwork.freedesktop.org/patch/msgid/20170815095706.23624-1-maarten.lankhorst@linux.intel.com +Fixes: d34f20d6e2f2 ("drm: Atomic modeset ioctl") +Reviewed-by: Daniel Vetter +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/gpu/drm/drm_atomic.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +--- a/drivers/gpu/drm/drm_atomic.c ++++ b/drivers/gpu/drm/drm_atomic.c +@@ -1581,6 +1581,9 @@ int drm_atomic_check_only(struct drm_ato + if (config->funcs->atomic_check) + ret = config->funcs->atomic_check(state->dev, state); + ++ if (ret) ++ return ret; ++ + if (!state->allow_modeset) { + for_each_new_crtc_in_state(state, crtc, crtc_state, i) { + if (drm_atomic_crtc_needs_modeset(crtc_state)) { +@@ -1591,7 +1594,7 @@ int drm_atomic_check_only(struct drm_ato + } + } + +- return ret; ++ return 0; + } + EXPORT_SYMBOL(drm_atomic_check_only); + diff --git a/queue-4.12/drm-fix-framebuffer-leak.patch b/queue-4.12/drm-fix-framebuffer-leak.patch new file mode 100644 index 00000000000..c3fd9449235 --- /dev/null +++ b/queue-4.12/drm-fix-framebuffer-leak.patch @@ -0,0 +1,30 @@ +From 491ab4700d1b64f5cf2f9055e01613a923df5fab Mon Sep 17 00:00:00 2001 +From: Nikhil Mahale +Date: Wed, 9 Aug 2017 09:23:01 +0530 +Subject: drm: Fix framebuffer leak + +From: Nikhil Mahale + +commit 491ab4700d1b64f5cf2f9055e01613a923df5fab upstream. + +Do not leak framebuffer if client provided crtc id found invalid. + +Signed-off-by: Nikhil Mahale +Signed-off-by: Daniel Vetter +Link: https://patchwork.freedesktop.org/patch/msgid/1502250781-5779-1-git-send-email-nmahale@nvidia.com +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/gpu/drm/drm_plane.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/drivers/gpu/drm/drm_plane.c ++++ b/drivers/gpu/drm/drm_plane.c +@@ -601,6 +601,7 @@ int drm_mode_setplane(struct drm_device + + crtc = drm_crtc_find(dev, plane_req->crtc_id); + if (!crtc) { ++ drm_framebuffer_put(fb); + DRM_DEBUG_KMS("Unknown crtc ID %d\n", + plane_req->crtc_id); + return -ENOENT; diff --git a/queue-4.12/drm-i915-gvt-fix-the-kernel-null-pointer-error.patch b/queue-4.12/drm-i915-gvt-fix-the-kernel-null-pointer-error.patch new file mode 100644 index 00000000000..d97ee27f05a --- /dev/null +++ b/queue-4.12/drm-i915-gvt-fix-the-kernel-null-pointer-error.patch @@ -0,0 +1,33 @@ +From ffeaf9aaf97b4bdaf114d6df52f800d71918768c Mon Sep 17 00:00:00 2001 +From: fred gao +Date: Wed, 16 Aug 2017 15:48:03 +0800 +Subject: drm/i915/gvt: Fix the kernel null pointer error + +From: fred gao + +commit ffeaf9aaf97b4bdaf114d6df52f800d71918768c upstream. + +once error happens in shadow_indirect_ctx function, the variable +wa_ctx->indirect_ctx.obj is not initialized but accessed, so the +kernel null point panic occurs. + +Fixes: 894cf7d15634 ("drm/i915/gvt: i915_gem_object_create() returns an error pointer") +Signed-off-by: fred gao +Signed-off-by: Zhenyu Wang +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/gpu/drm/i915/gvt/cmd_parser.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/gpu/drm/i915/gvt/cmd_parser.c ++++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c +@@ -2754,7 +2754,7 @@ static int shadow_indirect_ctx(struct in + unmap_src: + i915_gem_object_unpin_map(obj); + put_obj: +- i915_gem_object_put(wa_ctx->indirect_ctx.obj); ++ i915_gem_object_put(obj); + return ret; + } + diff --git a/queue-4.12/drm-i915-vbt-ignore-extraneous-child-devices-for-a-port.patch b/queue-4.12/drm-i915-vbt-ignore-extraneous-child-devices-for-a-port.patch new file mode 100644 index 00000000000..bef3a39c30d --- /dev/null +++ b/queue-4.12/drm-i915-vbt-ignore-extraneous-child-devices-for-a-port.patch @@ -0,0 +1,101 @@ +From 7c648bde211baeda7a029bd6be4957e8be48d8c9 Mon Sep 17 00:00:00 2001 +From: Jani Nikula +Date: Fri, 11 Aug 2017 14:39:07 +0300 +Subject: drm/i915/vbt: ignore extraneous child devices for a port +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Jani Nikula + +commit 7c648bde211baeda7a029bd6be4957e8be48d8c9 upstream. + +Ever since we've parsed VBT child devices, starting from 6acab15a7b0d +("drm/i915: use the HDMI DDI buffer translations from VBT"), we've +ignored the child device information if more than one child device +references the same port. The rationale for this seems lost in time. + +Since commit 311a20949f04 ("drm/i915: don't init DP or HDMI when not +supported by DDI port") we started using this information more to skip +HDMI/DP init if the port wasn't there per VBT child devices. However, at +the same time it added port defaults without further explanation. + +Thus, if the child device info was skipped due to multiple child devices +referencing the same port, the device info would be retrieved from the +somewhat arbitrary defaults. + +Finally, when commit bb1d132935c2 ("drm/i915/vbt: split out defaults +that are set when there is no VBT") stopped initializing the defaults +whenever VBT is present, thus trusting the VBT more, we stopped +initializing ports which were referenced by more than one child device. + +Apparently at least Asus UX305UA, UX305U, and UX306U laptops have VBT +child device blocks which cause this behaviour. Arguably they were +shipped with a broken VBT. + +Relax the rules for multiple references to the same port, and use the +first child device info to reference a port. Retain the logic to debug +log about this, though. + +Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=101745 +Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=196233 +Fixes: bb1d132935c2 ("drm/i915/vbt: split out defaults that are set when there is no VBT") +Tested-by: Oliver Weißbarth +Reported-by: Oliver Weißbarth +Reported-by: Didier G +Reported-by: Giles Anderson +Cc: Manasi Navare +Cc: Ville Syrjälä +Cc: Paulo Zanoni +Reviewed-by: Ville Syrjälä +Signed-off-by: Jani Nikula +Link: https://patchwork.freedesktop.org/patch/msgid/20170811113907.6716-1-jani.nikula@intel.com +Signed-off-by: Jani Nikula +(cherry picked from commit b5273d72750555a673040070bfb23c454a7cd3ef) +Signed-off-by: Jani Nikula +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/gpu/drm/i915/intel_bios.c | 15 +++++++++------ + 1 file changed, 9 insertions(+), 6 deletions(-) + +--- a/drivers/gpu/drm/i915/intel_bios.c ++++ b/drivers/gpu/drm/i915/intel_bios.c +@@ -1120,8 +1120,8 @@ static void parse_ddi_port(struct drm_i9 + bool is_dvi, is_hdmi, is_dp, is_edp, is_crt; + uint8_t aux_channel, ddc_pin; + /* Each DDI port can have more than one value on the "DVO Port" field, +- * so look for all the possible values for each port and abort if more +- * than one is found. */ ++ * so look for all the possible values for each port. ++ */ + int dvo_ports[][3] = { + {DVO_PORT_HDMIA, DVO_PORT_DPA, -1}, + {DVO_PORT_HDMIB, DVO_PORT_DPB, -1}, +@@ -1130,7 +1130,10 @@ static void parse_ddi_port(struct drm_i9 + {DVO_PORT_CRT, DVO_PORT_HDMIE, DVO_PORT_DPE}, + }; + +- /* Find the child device to use, abort if more than one found. */ ++ /* ++ * Find the first child device to reference the port, report if more ++ * than one found. ++ */ + for (i = 0; i < dev_priv->vbt.child_dev_num; i++) { + it = dev_priv->vbt.child_dev + i; + +@@ -1140,11 +1143,11 @@ static void parse_ddi_port(struct drm_i9 + + if (it->common.dvo_port == dvo_ports[port][j]) { + if (child) { +- DRM_DEBUG_KMS("More than one child device for port %c in VBT.\n", ++ DRM_DEBUG_KMS("More than one child device for port %c in VBT, using the first.\n", + port_name(port)); +- return; ++ } else { ++ child = it; + } +- child = it; + } + } + } diff --git a/queue-4.12/drm-release-driver-tracking-before-making-the-object-available-again.patch b/queue-4.12/drm-release-driver-tracking-before-making-the-object-available-again.patch new file mode 100644 index 00000000000..f4737ce8265 --- /dev/null +++ b/queue-4.12/drm-release-driver-tracking-before-making-the-object-available-again.patch @@ -0,0 +1,56 @@ +From fe4600a548f2763dec91b3b27a1245c370ceee2a Mon Sep 17 00:00:00 2001 +From: Chris Wilson +Date: Sat, 19 Aug 2017 13:05:58 +0100 +Subject: drm: Release driver tracking before making the object available again +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Chris Wilson + +commit fe4600a548f2763dec91b3b27a1245c370ceee2a upstream. + +This is the same bug as we fixed in commit f6cd7daecff5 ("drm: Release +driver references to handle before making it available again"), but now +the exposure is via the PRIME lookup tables. If we remove the +object/handle from the PRIME lut, then a new request for the same +object/fd will generate a new handle, thus for a short window that +object is known to userspace by two different handles. Fix this by +releasing the driver tracking before PRIME. + +Fixes: 0ff926c7d4f0 ("drm/prime: add exported buffers to current fprivs +imported buffer list (v2)") +Signed-off-by: Chris Wilson +Cc: David Airlie +Cc: Daniel Vetter +Cc: Rob Clark +Cc: Ville Syrjälä +Cc: Thierry Reding +Reviewed-by: Daniel Vetter +Signed-off-by: Joonas Lahtinen +Link: https://patchwork.freedesktop.org/patch/msgid/20170819120558.6465-1-chris@chris-wilson.co.uk +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/gpu/drm/drm_gem.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +--- a/drivers/gpu/drm/drm_gem.c ++++ b/drivers/gpu/drm/drm_gem.c +@@ -255,13 +255,13 @@ drm_gem_object_release_handle(int id, vo + struct drm_gem_object *obj = ptr; + struct drm_device *dev = obj->dev; + ++ if (dev->driver->gem_close_object) ++ dev->driver->gem_close_object(obj, file_priv); ++ + if (drm_core_check_feature(dev, DRIVER_PRIME)) + drm_gem_remove_prime_handles(obj, file_priv); + drm_vma_node_revoke(&obj->vma_node, file_priv); + +- if (dev->driver->gem_close_object) +- dev->driver->gem_close_object(obj, file_priv); +- + drm_gem_object_handle_put_unlocked(obj); + + return 0; diff --git a/queue-4.12/drm-sun4i-implement-drm_driver-lastclose-to-restore-fbdev-console.patch b/queue-4.12/drm-sun4i-implement-drm_driver-lastclose-to-restore-fbdev-console.patch new file mode 100644 index 00000000000..eae35e2b178 --- /dev/null +++ b/queue-4.12/drm-sun4i-implement-drm_driver-lastclose-to-restore-fbdev-console.patch @@ -0,0 +1,47 @@ +From 2a596fc9d974bb040eda9ab70bf8756fcaaa6afe Mon Sep 17 00:00:00 2001 +From: Jonathan Liu +Date: Mon, 10 Jul 2017 16:55:04 +1000 +Subject: drm/sun4i: Implement drm_driver lastclose to restore fbdev console + +From: Jonathan Liu + +commit 2a596fc9d974bb040eda9ab70bf8756fcaaa6afe upstream. + +The drm_driver lastclose callback is called when the last userspace +DRM client has closed. Call drm_fbdev_cma_restore_mode to restore +the fbdev console otherwise the fbdev console will stop working. + +Fixes: 9026e0d122ac ("drm: Add Allwinner A10 Display Engine support") +Tested-by: Olliver Schinagl +Reviewed-by: Chen-Yu Tsai +Signed-off-by: Jonathan Liu +Signed-off-by: Maxime Ripard +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/gpu/drm/sun4i/sun4i_drv.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +--- a/drivers/gpu/drm/sun4i/sun4i_drv.c ++++ b/drivers/gpu/drm/sun4i/sun4i_drv.c +@@ -25,12 +25,20 @@ + #include "sun4i_framebuffer.h" + #include "sun4i_tcon.h" + ++static void sun4i_drv_lastclose(struct drm_device *dev) ++{ ++ struct sun4i_drv *drv = dev->dev_private; ++ ++ drm_fbdev_cma_restore_mode(drv->fbdev); ++} ++ + DEFINE_DRM_GEM_CMA_FOPS(sun4i_drv_fops); + + static struct drm_driver sun4i_drv_driver = { + .driver_features = DRIVER_GEM | DRIVER_MODESET | DRIVER_PRIME | DRIVER_ATOMIC, + + /* Generic Operations */ ++ .lastclose = sun4i_drv_lastclose, + .fops = &sun4i_drv_fops, + .name = "sun4i-drm", + .desc = "Allwinner sun4i Display Engine", diff --git a/queue-4.12/fork-fix-incorrect-fput-of-exe_file-causing-use-after-free.patch b/queue-4.12/fork-fix-incorrect-fput-of-exe_file-causing-use-after-free.patch new file mode 100644 index 00000000000..f926014157b --- /dev/null +++ b/queue-4.12/fork-fix-incorrect-fput-of-exe_file-causing-use-after-free.patch @@ -0,0 +1,105 @@ +From 2b7e8665b4ff51c034c55df3cff76518d1a9ee3a Mon Sep 17 00:00:00 2001 +From: Eric Biggers +Date: Fri, 25 Aug 2017 15:55:43 -0700 +Subject: fork: fix incorrect fput of ->exe_file causing use-after-free + +From: Eric Biggers + +commit 2b7e8665b4ff51c034c55df3cff76518d1a9ee3a upstream. + +Commit 7c051267931a ("mm, fork: make dup_mmap wait for mmap_sem for +write killable") made it possible to kill a forking task while it is +waiting to acquire its ->mmap_sem for write, in dup_mmap(). + +However, it was overlooked that this introduced an new error path before +a reference is taken on the mm_struct's ->exe_file. Since the +->exe_file of the new mm_struct was already set to the old ->exe_file by +the memcpy() in dup_mm(), it was possible for the mmput() in the error +path of dup_mm() to drop a reference to ->exe_file which was never +taken. + +This caused the struct file to later be freed prematurely. + +Fix it by updating mm_init() to NULL out the ->exe_file, in the same +place it clears other things like the list of mmaps. + +This bug was found by syzkaller. It can be reproduced using the +following C program: + + #define _GNU_SOURCE + #include + #include + #include + #include + #include + #include + + static void *mmap_thread(void *_arg) + { + for (;;) { + mmap(NULL, 0x1000000, PROT_READ, + MAP_POPULATE|MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); + } + } + + static void *fork_thread(void *_arg) + { + usleep(rand() % 10000); + fork(); + } + + int main(void) + { + fork(); + fork(); + fork(); + for (;;) { + if (fork() == 0) { + pthread_t t; + + pthread_create(&t, NULL, mmap_thread, NULL); + pthread_create(&t, NULL, fork_thread, NULL); + usleep(rand() % 10000); + syscall(__NR_exit_group, 0); + } + wait(NULL); + } + } + +No special kernel config options are needed. It usually causes a NULL +pointer dereference in __remove_shared_vm_struct() during exit, or in +dup_mmap() (which is usually inlined into copy_process()) during fork. +Both are due to a vm_area_struct's ->vm_file being used after it's +already been freed. + +Google Bug Id: 64772007 + +Link: http://lkml.kernel.org/r/20170823211408.31198-1-ebiggers3@gmail.com +Fixes: 7c051267931a ("mm, fork: make dup_mmap wait for mmap_sem for write killable") +Signed-off-by: Eric Biggers +Tested-by: Mark Rutland +Acked-by: Michal Hocko +Cc: Dmitry Vyukov +Cc: Ingo Molnar +Cc: Konstantin Khlebnikov +Cc: Oleg Nesterov +Cc: Peter Zijlstra +Cc: Vlastimil Babka +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/fork.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/kernel/fork.c ++++ b/kernel/fork.c +@@ -802,6 +802,7 @@ static struct mm_struct *mm_init(struct + mm_init_cpumask(mm); + mm_init_aio(mm); + mm_init_owner(mm, p); ++ RCU_INIT_POINTER(mm->exe_file, NULL); + mmu_notifier_mm_init(mm); + clear_tlb_flush_pending(mm); + #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS diff --git a/queue-4.12/ftrace-check-for-null-ret_stack-on-profile-function-graph-entry-function.patch b/queue-4.12/ftrace-check-for-null-ret_stack-on-profile-function-graph-entry-function.patch new file mode 100644 index 00000000000..657ea3535c6 --- /dev/null +++ b/queue-4.12/ftrace-check-for-null-ret_stack-on-profile-function-graph-entry-function.patch @@ -0,0 +1,46 @@ +From a8f0f9e49956a74718874b800251455680085600 Mon Sep 17 00:00:00 2001 +From: "Steven Rostedt (VMware)" +Date: Thu, 17 Aug 2017 16:37:25 -0400 +Subject: ftrace: Check for null ret_stack on profile function graph entry function + +From: Steven Rostedt (VMware) + +commit a8f0f9e49956a74718874b800251455680085600 upstream. + +There's a small race when function graph shutsdown and the calling of the +registered function graph entry callback. The callback must not reference +the task's ret_stack without first checking that it is not NULL. Note, when +a ret_stack is allocated for a task, it stays allocated until the task exits. +The problem here, is that function_graph is shutdown, and a new task was +created, which doesn't have its ret_stack allocated. But since some of the +functions are still being traced, the callbacks can still be called. + +The normal function_graph code handles this, but starting with commit +8861dd303c ("ftrace: Access ret_stack->subtime only in the function +profiler") the profiler code references the ret_stack on function entry, but +doesn't check if it is NULL first. + +Link: https://bugzilla.kernel.org/show_bug.cgi?id=196611 + +Fixes: 8861dd303c ("ftrace: Access ret_stack->subtime only in the function profiler") +Reported-by: lilydjwg@gmail.com +Signed-off-by: Steven Rostedt (VMware) +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/trace/ftrace.c | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/kernel/trace/ftrace.c ++++ b/kernel/trace/ftrace.c +@@ -878,6 +878,10 @@ static int profile_graph_entry(struct ft + + function_profile_call(trace->func, 0, NULL, NULL); + ++ /* If function graph is shutting down, ret_stack can be NULL */ ++ if (!current->ret_stack) ++ return 0; ++ + if (index >= 0 && index < FTRACE_RETFUNC_DEPTH) + current->ret_stack[index].subtime = 0; + diff --git a/queue-4.12/i2c-designware-fix-system-suspend.patch b/queue-4.12/i2c-designware-fix-system-suspend.patch new file mode 100644 index 00000000000..d8767276e94 --- /dev/null +++ b/queue-4.12/i2c-designware-fix-system-suspend.patch @@ -0,0 +1,92 @@ +From a23318feeff662c8d25d21623daebdd2e55ec221 Mon Sep 17 00:00:00 2001 +From: Ulf Hansson +Date: Wed, 9 Aug 2017 15:28:22 +0200 +Subject: i2c: designware: Fix system suspend + +From: Ulf Hansson + +commit a23318feeff662c8d25d21623daebdd2e55ec221 upstream. + +The commit 8503ff166504 ("i2c: designware: Avoid unnecessary resuming +during system suspend"), may suggest to the PM core to try out the so +called direct_complete path for system sleep. In this path, the PM core +treats a runtime suspended device as it's already in a proper low power +state for system sleep, which makes it skip calling the system sleep +callbacks for the device, except for the ->prepare() and the ->complete() +callbacks. + +However, the PM core may unset the direct_complete flag for a parent +device, in case its child device are being system suspended before. In this +scenario, the PM core invokes the system sleep callbacks, no matter if the +device is runtime suspended or not. + +Particularly in cases of an existing i2c slave device, the above path is +triggered, which breaks the assumption that the i2c device is always +runtime resumed whenever the dw_i2c_plat_suspend() is being called. + +More precisely, dw_i2c_plat_suspend() calls clk_core_disable() and +clk_core_unprepare(), for an already disabled/unprepared clock, leading to +a splat in the log about clocks calls being wrongly balanced and breaking +system sleep. + +To still allow the direct_complete path in cases when it's possible, but +also to keep the fix simple, let's runtime resume the i2c device in the +->suspend() callback, before continuing to put the device into low power +state. + +Note, in cases when the i2c device is attached to the ACPI PM domain, this +problem doesn't occur, because ACPI's ->suspend() callback, assigned to +acpi_subsys_suspend(), already calls pm_runtime_resume() for the device. + +It should also be noted that this change does not fix commit 8503ff166504 +("i2c: designware: Avoid unnecessary resuming during system suspend"). +Because for the non-ACPI case, the system sleep support was already broken +prior that point. + +Signed-off-by: Ulf Hansson +Acked-by: Rafael J. Wysocki +Tested-by: John Stultz +Tested-by: Jarkko Nikula +Acked-by: Jarkko Nikula +Reviewed-by: Mika Westerberg +Signed-off-by: Wolfram Sang +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/i2c/busses/i2c-designware-platdrv.c | 14 ++++++++++++-- + 1 file changed, 12 insertions(+), 2 deletions(-) + +--- a/drivers/i2c/busses/i2c-designware-platdrv.c ++++ b/drivers/i2c/busses/i2c-designware-platdrv.c +@@ -392,7 +392,7 @@ static void dw_i2c_plat_complete(struct + #endif + + #ifdef CONFIG_PM +-static int dw_i2c_plat_suspend(struct device *dev) ++static int dw_i2c_plat_runtime_suspend(struct device *dev) + { + struct platform_device *pdev = to_platform_device(dev); + struct dw_i2c_dev *i_dev = platform_get_drvdata(pdev); +@@ -414,11 +414,21 @@ static int dw_i2c_plat_resume(struct dev + return 0; + } + ++#ifdef CONFIG_PM_SLEEP ++static int dw_i2c_plat_suspend(struct device *dev) ++{ ++ pm_runtime_resume(dev); ++ return dw_i2c_plat_runtime_suspend(dev); ++} ++#endif ++ + static const struct dev_pm_ops dw_i2c_dev_pm_ops = { + .prepare = dw_i2c_plat_prepare, + .complete = dw_i2c_plat_complete, + SET_SYSTEM_SLEEP_PM_OPS(dw_i2c_plat_suspend, dw_i2c_plat_resume) +- SET_RUNTIME_PM_OPS(dw_i2c_plat_suspend, dw_i2c_plat_resume, NULL) ++ SET_RUNTIME_PM_OPS(dw_i2c_plat_runtime_suspend, ++ dw_i2c_plat_resume, ++ NULL) + }; + + #define DW_I2C_DEV_PMOPS (&dw_i2c_dev_pm_ops) diff --git a/queue-4.12/kbuild-linker-script-do-not-match-c-names-unless-ld_dead_code_data_elimination-is-configured.patch b/queue-4.12/kbuild-linker-script-do-not-match-c-names-unless-ld_dead_code_data_elimination-is-configured.patch new file mode 100644 index 00000000000..a20b1d1f9a4 --- /dev/null +++ b/queue-4.12/kbuild-linker-script-do-not-match-c-names-unless-ld_dead_code_data_elimination-is-configured.patch @@ -0,0 +1,106 @@ +From cb87481ee89dbd6609e227afbf64900fb4e5c930 Mon Sep 17 00:00:00 2001 +From: Nicholas Piggin +Date: Wed, 26 Jul 2017 22:46:27 +1000 +Subject: kbuild: linker script do not match C names unless LD_DEAD_CODE_DATA_ELIMINATION is configured + +From: Nicholas Piggin + +commit cb87481ee89dbd6609e227afbf64900fb4e5c930 upstream. + +The .data and .bss sections were modified in the generic linker script to +pull in sections named .data., which are generated by gcc with +-ffunction-sections and -fdata-sections options. + +The problem with this pattern is it can also match section names that Linux +defines explicitly, e.g., .data.unlikely. This can cause Linux sections to +get moved into the wrong place. + +The way to avoid this is to use ".." separators for explicit section names +(the dot character is valid in a section name but not a C identifier). +However currently there are sections which don't follow this rule, so for +now just disable the wild card by default. + +Example: http://marc.info/?l=linux-arm-kernel&m=150106824024221&w=2 + +Fixes: b67067f1176df ("kbuild: allow archs to select link dead code/data elimination") +Signed-off-by: Nicholas Piggin +Signed-off-by: Masahiro Yamada +Signed-off-by: Greg Kroah-Hartman + +--- + include/asm-generic/vmlinux.lds.h | 38 ++++++++++++++++++++++++++------------ + 1 file changed, 26 insertions(+), 12 deletions(-) + +--- a/include/asm-generic/vmlinux.lds.h ++++ b/include/asm-generic/vmlinux.lds.h +@@ -60,6 +60,22 @@ + #define ALIGN_FUNCTION() . = ALIGN(8) + + /* ++ * LD_DEAD_CODE_DATA_ELIMINATION option enables -fdata-sections, which ++ * generates .data.identifier sections, which need to be pulled in with ++ * .data. We don't want to pull in .data..other sections, which Linux ++ * has defined. Same for text and bss. ++ */ ++#ifdef CONFIG_LD_DEAD_CODE_DATA_ELIMINATION ++#define TEXT_MAIN .text .text.[0-9a-zA-Z_]* ++#define DATA_MAIN .data .data.[0-9a-zA-Z_]* ++#define BSS_MAIN .bss .bss.[0-9a-zA-Z_]* ++#else ++#define TEXT_MAIN .text ++#define DATA_MAIN .data ++#define BSS_MAIN .bss ++#endif ++ ++/* + * Align to a 32 byte boundary equal to the + * alignment gcc 4.5 uses for a struct + */ +@@ -199,12 +215,9 @@ + + /* + * .data section +- * LD_DEAD_CODE_DATA_ELIMINATION option enables -fdata-sections generates +- * .data.identifier which needs to be pulled in with .data, but don't want to +- * pull in .data..stuff which has its own requirements. Same for bss. + */ + #define DATA_DATA \ +- *(.data .data.[0-9a-zA-Z_]*) \ ++ *(DATA_MAIN) \ + *(.ref.data) \ + *(.data..shared_aligned) /* percpu related */ \ + MEM_KEEP(init.data) \ +@@ -435,16 +448,17 @@ + VMLINUX_SYMBOL(__security_initcall_end) = .; \ + } + +-/* .text section. Map to function alignment to avoid address changes ++/* ++ * .text section. Map to function alignment to avoid address changes + * during second ld run in second ld pass when generating System.map +- * LD_DEAD_CODE_DATA_ELIMINATION option enables -ffunction-sections generates +- * .text.identifier which needs to be pulled in with .text , but some +- * architectures define .text.foo which is not intended to be pulled in here. +- * Those enabling LD_DEAD_CODE_DATA_ELIMINATION must ensure they don't have +- * conflicting section names, and must pull in .text.[0-9a-zA-Z_]* */ ++ * ++ * TEXT_MAIN here will match .text.fixup and .text.unlikely if dead ++ * code elimination is enabled, so these sections should be converted ++ * to use ".." first. ++ */ + #define TEXT_TEXT \ + ALIGN_FUNCTION(); \ +- *(.text.hot .text .text.fixup .text.unlikely) \ ++ *(.text.hot TEXT_MAIN .text.fixup .text.unlikely) \ + *(.ref.text) \ + MEM_KEEP(init.text) \ + MEM_KEEP(exit.text) \ +@@ -613,7 +627,7 @@ + BSS_FIRST_SECTIONS \ + *(.bss..page_aligned) \ + *(.dynbss) \ +- *(.bss .bss.[0-9a-zA-Z_]*) \ ++ *(BSS_MAIN) \ + *(COMMON) \ + } + diff --git a/queue-4.12/mm-madvise.c-fix-freeing-of-locked-page-with-madv_free.patch b/queue-4.12/mm-madvise.c-fix-freeing-of-locked-page-with-madv_free.patch new file mode 100644 index 00000000000..7ed4a1212ab --- /dev/null +++ b/queue-4.12/mm-madvise.c-fix-freeing-of-locked-page-with-madv_free.patch @@ -0,0 +1,128 @@ +From 263630e8d176d87308481ebdcd78ef9426739c6b Mon Sep 17 00:00:00 2001 +From: Eric Biggers +Date: Fri, 25 Aug 2017 15:55:39 -0700 +Subject: mm/madvise.c: fix freeing of locked page with MADV_FREE + +From: Eric Biggers + +commit 263630e8d176d87308481ebdcd78ef9426739c6b upstream. + +If madvise(..., MADV_FREE) split a transparent hugepage, it called +put_page() before unlock_page(). + +This was wrong because put_page() can free the page, e.g. if a +concurrent madvise(..., MADV_DONTNEED) has removed it from the memory +mapping. put_page() then rightfully complained about freeing a locked +page. + +Fix this by moving the unlock_page() before put_page(). + +This bug was found by syzkaller, which encountered the following splat: + + BUG: Bad page state in process syzkaller412798 pfn:1bd800 + page:ffffea0006f60000 count:0 mapcount:0 mapping: (null) index:0x20a00 + flags: 0x200000000040019(locked|uptodate|dirty|swapbacked) + raw: 0200000000040019 0000000000000000 0000000000020a00 00000000ffffffff + raw: ffffea0006f60020 ffffea0006f60020 0000000000000000 0000000000000000 + page dumped because: PAGE_FLAGS_CHECK_AT_FREE flag(s) set + bad because of flags: 0x1(locked) + Modules linked in: + CPU: 1 PID: 3037 Comm: syzkaller412798 Not tainted 4.13.0-rc5+ #35 + Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 + Call Trace: + __dump_stack lib/dump_stack.c:16 [inline] + dump_stack+0x194/0x257 lib/dump_stack.c:52 + bad_page+0x230/0x2b0 mm/page_alloc.c:565 + free_pages_check_bad+0x1f0/0x2e0 mm/page_alloc.c:943 + free_pages_check mm/page_alloc.c:952 [inline] + free_pages_prepare mm/page_alloc.c:1043 [inline] + free_pcp_prepare mm/page_alloc.c:1068 [inline] + free_hot_cold_page+0x8cf/0x12b0 mm/page_alloc.c:2584 + __put_single_page mm/swap.c:79 [inline] + __put_page+0xfb/0x160 mm/swap.c:113 + put_page include/linux/mm.h:814 [inline] + madvise_free_pte_range+0x137a/0x1ec0 mm/madvise.c:371 + walk_pmd_range mm/pagewalk.c:50 [inline] + walk_pud_range mm/pagewalk.c:108 [inline] + walk_p4d_range mm/pagewalk.c:134 [inline] + walk_pgd_range mm/pagewalk.c:160 [inline] + __walk_page_range+0xc3a/0x1450 mm/pagewalk.c:249 + walk_page_range+0x200/0x470 mm/pagewalk.c:326 + madvise_free_page_range.isra.9+0x17d/0x230 mm/madvise.c:444 + madvise_free_single_vma+0x353/0x580 mm/madvise.c:471 + madvise_dontneed_free mm/madvise.c:555 [inline] + madvise_vma mm/madvise.c:664 [inline] + SYSC_madvise mm/madvise.c:832 [inline] + SyS_madvise+0x7d3/0x13c0 mm/madvise.c:760 + entry_SYSCALL_64_fastpath+0x1f/0xbe + +Here is a C reproducer: + + #define _GNU_SOURCE + #include + #include + #include + + #define MADV_FREE 8 + #define PAGE_SIZE 4096 + + static void *mapping; + static const size_t mapping_size = 0x1000000; + + static void *madvise_thrproc(void *arg) + { + madvise(mapping, mapping_size, (long)arg); + } + + int main(void) + { + pthread_t t[2]; + + for (;;) { + mapping = mmap(NULL, mapping_size, PROT_WRITE, + MAP_POPULATE|MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); + + munmap(mapping + mapping_size / 2, PAGE_SIZE); + + pthread_create(&t[0], 0, madvise_thrproc, (void*)MADV_DONTNEED); + pthread_create(&t[1], 0, madvise_thrproc, (void*)MADV_FREE); + pthread_join(t[0], NULL); + pthread_join(t[1], NULL); + munmap(mapping, mapping_size); + } + } + +Note: to see the splat, CONFIG_TRANSPARENT_HUGEPAGE=y and +CONFIG_DEBUG_VM=y are needed. + +Google Bug Id: 64696096 + +Link: http://lkml.kernel.org/r/20170823205235.132061-1-ebiggers3@gmail.com +Fixes: 854e9ed09ded ("mm: support madvise(MADV_FREE)") +Signed-off-by: Eric Biggers +Acked-by: David Rientjes +Acked-by: Minchan Kim +Acked-by: Michal Hocko +Cc: Dmitry Vyukov +Cc: Hugh Dickins +Cc: Andrea Arcangeli +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + mm/madvise.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/mm/madvise.c ++++ b/mm/madvise.c +@@ -368,8 +368,8 @@ static int madvise_free_pte_range(pmd_t + pte_offset_map_lock(mm, pmd, addr, &ptl); + goto out; + } +- put_page(page); + unlock_page(page); ++ put_page(page); + pte = pte_offset_map_lock(mm, pmd, addr, &ptl); + pte--; + addr -= PAGE_SIZE; diff --git a/queue-4.12/mm-memblock.c-reversed-logic-in-memblock_discard.patch b/queue-4.12/mm-memblock.c-reversed-logic-in-memblock_discard.patch new file mode 100644 index 00000000000..b172929c4e5 --- /dev/null +++ b/queue-4.12/mm-memblock.c-reversed-logic-in-memblock_discard.patch @@ -0,0 +1,38 @@ +From 91b540f98872a206ea1c49e4aa6ea8eed0886644 Mon Sep 17 00:00:00 2001 +From: Pavel Tatashin +Date: Fri, 25 Aug 2017 15:55:46 -0700 +Subject: mm/memblock.c: reversed logic in memblock_discard() + +From: Pavel Tatashin + +commit 91b540f98872a206ea1c49e4aa6ea8eed0886644 upstream. + +In recently introduced memblock_discard() there is a reversed logic bug. +Memory is freed of static array instead of dynamically allocated one. + +Link: http://lkml.kernel.org/r/1503511441-95478-2-git-send-email-pasha.tatashin@oracle.com +Fixes: 3010f876500f ("mm: discard memblock data later") +Signed-off-by: Pavel Tatashin +Reported-by: Woody Suwalski +Tested-by: Woody Suwalski +Acked-by: Michal Hocko +Cc: Vlastimil Babka +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + mm/memblock.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/mm/memblock.c ++++ b/mm/memblock.c +@@ -302,7 +302,7 @@ void __init memblock_discard(void) + __memblock_free_late(addr, size); + } + +- if (memblock.memory.regions == memblock_memory_init_regions) { ++ if (memblock.memory.regions != memblock_memory_init_regions) { + addr = __pa(memblock.memory.regions); + size = PAGE_ALIGN(sizeof(struct memblock_region) * + memblock.memory.max); diff --git a/queue-4.12/net-sunrpc-svcsock-fix-null-pointer-exception.patch b/queue-4.12/net-sunrpc-svcsock-fix-null-pointer-exception.patch new file mode 100644 index 00000000000..79a80804795 --- /dev/null +++ b/queue-4.12/net-sunrpc-svcsock-fix-null-pointer-exception.patch @@ -0,0 +1,186 @@ +From eebe53e87f97975ee58a21693e44797608bf679c Mon Sep 17 00:00:00 2001 +From: Vadim Lomovtsev +Date: Mon, 21 Aug 2017 07:23:07 -0400 +Subject: net: sunrpc: svcsock: fix NULL-pointer exception + +From: Vadim Lomovtsev + +commit eebe53e87f97975ee58a21693e44797608bf679c upstream. + +While running nfs/connectathon tests kernel NULL-pointer exception +has been observed due to races in svcsock.c. + +Race is appear when kernel accepts connection by kernel_accept +(which creates new socket) and start queuing ingress packets +to new socket. This happens in ksoftirq context which could run +concurrently on a different core while new socket setup is not done yet. + +The fix is to re-order socket user data init sequence and add +write/read barrier calls to be sure that we got proper values +for callback pointers before actually calling them. + +Test results: nfs/connectathon reports '0' failed tests for about 200+ iterations. + +Crash log: +---<-snip->--- +[ 6708.638984] Unable to handle kernel NULL pointer dereference at virtual address 00000000 +[ 6708.647093] pgd = ffff0000094e0000 +[ 6708.650497] [00000000] *pgd=0000010ffff90003, *pud=0000010ffff90003, *pmd=0000010ffff80003, *pte=0000000000000000 +[ 6708.660761] Internal error: Oops: 86000005 [#1] SMP +[ 6708.665630] Modules linked in: nfsv3 nfnetlink_queue nfnetlink_log nfnetlink rpcsec_gss_krb5 nfsv4 dns_resolver nfs fscache overlay xt_CONNSECMARK xt_SECMARK xt_conntrack iptable_security ip_tables ah4 xfrm4_mode_transport sctp tun binfmt_misc ext4 jbd2 mbcache loop tcp_diag udp_diag inet_diag rpcrdma ib_isert iscsi_target_mod ib_iser rdma_cm iw_cm libiscsi scsi_transport_iscsi ib_srpt target_core_mod ib_srp scsi_transport_srp ib_ipoib ib_ucm ib_uverbs ib_umad ib_cm ib_core nls_koi8_u nls_cp932 ts_kmp nf_conntrack_ipv4 nf_defrag_ipv4 nf_conntrack vfat fat ghash_ce sha2_ce sha1_ce cavium_rng_vf i2c_thunderx sg thunderx_edac i2c_smbus edac_core cavium_rng nfsd auth_rpcgss nfs_acl lockd grace sunrpc xfs libcrc32c nicvf nicpf ast i2c_algo_bit drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops +[ 6708.736446] ttm drm i2c_core thunder_bgx thunder_xcv mdio_thunder mdio_cavium dm_mirror dm_region_hash dm_log dm_mod [last unloaded: stap_3c300909c5b3f46dcacd49aab3334af_87021] +[ 6708.752275] CPU: 84 PID: 0 Comm: swapper/84 Tainted: G W OE 4.11.0-4.el7.aarch64 #1 +[ 6708.760787] Hardware name: www.cavium.com CRB-2S/CRB-2S, BIOS 0.3 Mar 13 2017 +[ 6708.767910] task: ffff810006842e80 task.stack: ffff81000689c000 +[ 6708.773822] PC is at 0x0 +[ 6708.776739] LR is at svc_data_ready+0x38/0x88 [sunrpc] +[ 6708.781866] pc : [<0000000000000000>] lr : [] pstate: 60000145 +[ 6708.789248] sp : ffff810ffbad3900 +[ 6708.792551] x29: ffff810ffbad3900 x28: ffff000008c73d58 +[ 6708.797853] x27: 0000000000000000 x26: ffff81000bbe1e00 +[ 6708.803156] x25: 0000000000000020 x24: ffff800f7410bf28 +[ 6708.808458] x23: ffff000008c63000 x22: ffff000008c63000 +[ 6708.813760] x21: ffff800f7410bf28 x20: ffff81000bbe1e00 +[ 6708.819063] x19: ffff810012412400 x18: 00000000d82a9df2 +[ 6708.824365] x17: 0000000000000000 x16: 0000000000000000 +[ 6708.829667] x15: 0000000000000000 x14: 0000000000000001 +[ 6708.834969] x13: 0000000000000000 x12: 722e736f622e676e +[ 6708.840271] x11: 00000000f814dd99 x10: 0000000000000000 +[ 6708.845573] x9 : 7374687225000000 x8 : 0000000000000000 +[ 6708.850875] x7 : 0000000000000000 x6 : 0000000000000000 +[ 6708.856177] x5 : 0000000000000028 x4 : 0000000000000000 +[ 6708.861479] x3 : 0000000000000000 x2 : 00000000e5000000 +[ 6708.866781] x1 : 0000000000000000 x0 : ffff81000bbe1e00 +[ 6708.872084] +[ 6708.873565] Process swapper/84 (pid: 0, stack limit = 0xffff81000689c000) +[ 6708.880341] Stack: (0xffff810ffbad3900 to 0xffff8100068a0000) +[ 6708.886075] Call trace: +[ 6708.888513] Exception stack(0xffff810ffbad3710 to 0xffff810ffbad3840) +[ 6708.894942] 3700: ffff810012412400 0001000000000000 +[ 6708.902759] 3720: ffff810ffbad3900 0000000000000000 0000000060000145 ffff800f79300000 +[ 6708.910577] 3740: ffff000009274d00 00000000000003ea 0000000000000015 ffff000008c63000 +[ 6708.918395] 3760: ffff810ffbad3830 ffff800f79300000 000000000000004d 0000000000000000 +[ 6708.926212] 3780: ffff810ffbad3890 ffff0000080f88dc ffff800f79300000 000000000000004d +[ 6708.934030] 37a0: ffff800f7930093c ffff000008c63000 0000000000000000 0000000000000140 +[ 6708.941848] 37c0: ffff000008c2c000 0000000000040b00 ffff81000bbe1e00 0000000000000000 +[ 6708.949665] 37e0: 00000000e5000000 0000000000000000 0000000000000000 0000000000000028 +[ 6708.957483] 3800: 0000000000000000 0000000000000000 0000000000000000 7374687225000000 +[ 6708.965300] 3820: 0000000000000000 00000000f814dd99 722e736f622e676e 0000000000000000 +[ 6708.973117] [< (null)>] (null) +[ 6708.977824] [] tcp_data_queue+0x754/0xc5c +[ 6708.983386] [] tcp_rcv_established+0x1a0/0x67c +[ 6708.989384] [] tcp_v4_do_rcv+0x15c/0x22c +[ 6708.994858] [] tcp_v4_rcv+0xaf0/0xb58 +[ 6709.000077] [] ip_local_deliver_finish+0x10c/0x254 +[ 6709.006419] [] ip_local_deliver+0xf0/0xfc +[ 6709.011980] [] ip_rcv_finish+0x208/0x3a4 +[ 6709.017454] [] ip_rcv+0x2dc/0x3c8 +[ 6709.022328] [] __netif_receive_skb_core+0x2f8/0xa0c +[ 6709.028758] [] __netif_receive_skb+0x38/0x84 +[ 6709.034580] [] netif_receive_skb_internal+0x68/0xdc +[ 6709.041010] [] napi_gro_receive+0xcc/0x1a8 +[ 6709.046690] [] nicvf_cq_intr_handler+0x59c/0x730 [nicvf] +[ 6709.053559] [] nicvf_poll+0x38/0xb8 [nicvf] +[ 6709.059295] [] net_rx_action+0x2f8/0x464 +[ 6709.064771] [] __do_softirq+0x11c/0x308 +[ 6709.070164] [] irq_exit+0x12c/0x174 +[ 6709.075206] [] __handle_domain_irq+0x78/0xc4 +[ 6709.081027] [] gic_handle_irq+0x94/0x190 +[ 6709.086501] Exception stack(0xffff81000689fdf0 to 0xffff81000689ff20) +[ 6709.092929] fde0: 0000810ff2ec0000 ffff000008c10000 +[ 6709.100747] fe00: ffff000008c70ef4 0000000000000001 0000000000000000 ffff810ffbad9b18 +[ 6709.108565] fe20: ffff810ffbad9c70 ffff8100169d3800 ffff810006843ab0 ffff81000689fe80 +[ 6709.116382] fe40: 0000000000000bd0 0000ffffdf979cd0 183f5913da192500 0000ffff8a254ce4 +[ 6709.124200] fe60: 0000ffff8a254b78 0000aaab10339808 0000000000000000 0000ffff8a0c2a50 +[ 6709.132018] fe80: 0000ffffdf979b10 ffff000008d6d450 ffff000008c10000 ffff000008d6d000 +[ 6709.139836] fea0: 0000000000000054 ffff000008cd3dbc 0000000000000000 0000000000000000 +[ 6709.147653] fec0: 0000000000000000 0000000000000000 0000000000000000 ffff81000689ff20 +[ 6709.155471] fee0: ffff000008085240 ffff81000689ff20 ffff000008085244 0000000060000145 +[ 6709.163289] ff00: ffff81000689ff10 ffff00000813f1e4 ffffffffffffffff ffff00000813f238 +[ 6709.171107] [] el1_irq+0xb4/0x140 +[ 6709.175976] [] arch_cpu_idle+0x44/0x11c +[ 6709.181368] [] default_idle_call+0x20/0x30 +[ 6709.187020] [] do_idle+0x158/0x1e4 +[ 6709.191973] [] cpu_startup_entry+0x2c/0x30 +[ 6709.197624] [] secondary_start_kernel+0x13c/0x160 +[ 6709.203878] [<0000000001bc71c4>] 0x1bc71c4 +[ 6709.207967] Code: bad PC value +[ 6709.211061] SMP: stopping secondary CPUs +[ 6709.218830] Starting crashdump kernel... +[ 6709.222749] Bye! +---<-snip>--- + +Signed-off-by: Vadim Lomovtsev +Reviewed-by: Jeff Layton +Signed-off-by: J. Bruce Fields +Signed-off-by: Greg Kroah-Hartman + +--- + net/sunrpc/svcsock.c | 22 ++++++++++++++++++++-- + 1 file changed, 20 insertions(+), 2 deletions(-) + +--- a/net/sunrpc/svcsock.c ++++ b/net/sunrpc/svcsock.c +@@ -421,6 +421,9 @@ static void svc_data_ready(struct sock * + dprintk("svc: socket %p(inet %p), busy=%d\n", + svsk, sk, + test_bit(XPT_BUSY, &svsk->sk_xprt.xpt_flags)); ++ ++ /* Refer to svc_setup_socket() for details. */ ++ rmb(); + svsk->sk_odata(sk); + if (!test_and_set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags)) + svc_xprt_enqueue(&svsk->sk_xprt); +@@ -437,6 +440,9 @@ static void svc_write_space(struct sock + if (svsk) { + dprintk("svc: socket %p(inet %p), write_space busy=%d\n", + svsk, sk, test_bit(XPT_BUSY, &svsk->sk_xprt.xpt_flags)); ++ ++ /* Refer to svc_setup_socket() for details. */ ++ rmb(); + svsk->sk_owspace(sk); + svc_xprt_enqueue(&svsk->sk_xprt); + } +@@ -760,8 +766,12 @@ static void svc_tcp_listen_data_ready(st + dprintk("svc: socket %p TCP (listen) state change %d\n", + sk, sk->sk_state); + +- if (svsk) ++ if (svsk) { ++ /* Refer to svc_setup_socket() for details. */ ++ rmb(); + svsk->sk_odata(sk); ++ } ++ + /* + * This callback may called twice when a new connection + * is established as a child socket inherits everything +@@ -794,6 +804,8 @@ static void svc_tcp_state_change(struct + if (!svsk) + printk("svc: socket %p: no user data\n", sk); + else { ++ /* Refer to svc_setup_socket() for details. */ ++ rmb(); + svsk->sk_ostate(sk); + if (sk->sk_state != TCP_ESTABLISHED) { + set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags); +@@ -1381,12 +1393,18 @@ static struct svc_sock *svc_setup_socket + return ERR_PTR(err); + } + +- inet->sk_user_data = svsk; + svsk->sk_sock = sock; + svsk->sk_sk = inet; + svsk->sk_ostate = inet->sk_state_change; + svsk->sk_odata = inet->sk_data_ready; + svsk->sk_owspace = inet->sk_write_space; ++ /* ++ * This barrier is necessary in order to prevent race condition ++ * with svc_data_ready(), svc_listen_data_ready() and others ++ * when calling callbacks above. ++ */ ++ wmb(); ++ inet->sk_user_data = svsk; + + /* Initialize the socket */ + if (sock->type == SOCK_DGRAM) diff --git a/queue-4.12/netfilter-expect-fix-crash-when-putting-uninited-expectation.patch b/queue-4.12/netfilter-expect-fix-crash-when-putting-uninited-expectation.patch new file mode 100644 index 00000000000..ab467716e38 --- /dev/null +++ b/queue-4.12/netfilter-expect-fix-crash-when-putting-uninited-expectation.patch @@ -0,0 +1,36 @@ +From 36ac344e16e04e3e55e8fed7446095a6458c64e6 Mon Sep 17 00:00:00 2001 +From: Florian Westphal +Date: Mon, 10 Jul 2017 13:53:53 +0200 +Subject: netfilter: expect: fix crash when putting uninited expectation + +From: Florian Westphal + +commit 36ac344e16e04e3e55e8fed7446095a6458c64e6 upstream. + +We crash in __nf_ct_expect_check, it calls nf_ct_remove_expect on the +uninitialised expectation instead of existing one, so del_timer chokes +on random memory address. + +Fixes: ec0e3f01114ad32711243 ("netfilter: nf_ct_expect: Add nf_ct_remove_expect()") +Reported-by: Sergey Kvachonok +Tested-by: Sergey Kvachonok +Cc: Gao Feng +Signed-off-by: Florian Westphal +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Greg Kroah-Hartman + +--- + net/netfilter/nf_conntrack_expect.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/netfilter/nf_conntrack_expect.c ++++ b/net/netfilter/nf_conntrack_expect.c +@@ -422,7 +422,7 @@ static inline int __nf_ct_expect_check(s + h = nf_ct_expect_dst_hash(net, &expect->tuple); + hlist_for_each_entry_safe(i, next, &nf_ct_expect_hash[h], hnode) { + if (expect_matches(i, expect)) { +- if (nf_ct_remove_expect(expect)) ++ if (nf_ct_remove_expect(i)) + break; + } else if (expect_clash(i, expect)) { + ret = -EBUSY; diff --git a/queue-4.12/netfilter-nat-fix-src-map-lookup.patch b/queue-4.12/netfilter-nat-fix-src-map-lookup.patch new file mode 100644 index 00000000000..d184c3aaf1e --- /dev/null +++ b/queue-4.12/netfilter-nat-fix-src-map-lookup.patch @@ -0,0 +1,68 @@ +From 97772bcd56efa21d9d8976db6f205574ea602f51 Mon Sep 17 00:00:00 2001 +From: Florian Westphal +Date: Fri, 7 Jul 2017 13:07:17 +0200 +Subject: netfilter: nat: fix src map lookup + +From: Florian Westphal + +commit 97772bcd56efa21d9d8976db6f205574ea602f51 upstream. + +When doing initial conversion to rhashtable I replaced the bucket +walk with a single rhashtable_lookup_fast(). + +When moving to rhlist I failed to properly walk the list of identical +tuples, but that is what is needed for this to work correctly. +The table contains the original tuples, so the reply tuples are all +distinct. + +We currently decide that mapping is (not) in range only based on the +first entry, but in case its not we need to try the reply tuple of the +next entry until we either find an in-range mapping or we checked +all the entries. + +This bug makes nat core attempt collision resolution while it might be +able to use the mapping as-is. + +Fixes: 870190a9ec90 ("netfilter: nat: convert nat bysrc hash to rhashtable") +Reported-by: Jaco Kroon +Tested-by: Jaco Kroon +Signed-off-by: Florian Westphal +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Greg Kroah-Hartman + +--- + net/netfilter/nf_nat_core.c | 17 +++++++++-------- + 1 file changed, 9 insertions(+), 8 deletions(-) + +--- a/net/netfilter/nf_nat_core.c ++++ b/net/netfilter/nf_nat_core.c +@@ -222,20 +222,21 @@ find_appropriate_src(struct net *net, + .tuple = tuple, + .zone = zone + }; +- struct rhlist_head *hl; ++ struct rhlist_head *hl, *h; + + hl = rhltable_lookup(&nf_nat_bysource_table, &key, + nf_nat_bysource_params); +- if (!hl) +- return 0; + +- ct = container_of(hl, typeof(*ct), nat_bysource); ++ rhl_for_each_entry_rcu(ct, h, hl, nat_bysource) { ++ nf_ct_invert_tuplepr(result, ++ &ct->tuplehash[IP_CT_DIR_REPLY].tuple); ++ result->dst = tuple->dst; + +- nf_ct_invert_tuplepr(result, +- &ct->tuplehash[IP_CT_DIR_REPLY].tuple); +- result->dst = tuple->dst; ++ if (in_range(l3proto, l4proto, result, range)) ++ return 1; ++ } + +- return in_range(l3proto, l4proto, result, range); ++ return 0; + } + + /* For [FUTURE] fragmentation handling, we want the least-used diff --git a/queue-4.12/netfilter-nfnetlink-improve-input-length-sanitization-in-nfnetlink_rcv.patch b/queue-4.12/netfilter-nfnetlink-improve-input-length-sanitization-in-nfnetlink_rcv.patch new file mode 100644 index 00000000000..881a50b60ab --- /dev/null +++ b/queue-4.12/netfilter-nfnetlink-improve-input-length-sanitization-in-nfnetlink_rcv.patch @@ -0,0 +1,52 @@ +From f55ce7b024090a51382ccab2730b96e2f7b4e9cf Mon Sep 17 00:00:00 2001 +From: Mateusz Jurczyk +Date: Wed, 7 Jun 2017 15:50:38 +0200 +Subject: netfilter: nfnetlink: Improve input length sanitization in nfnetlink_rcv + +From: Mateusz Jurczyk + +commit f55ce7b024090a51382ccab2730b96e2f7b4e9cf upstream. + +Verify that the length of the socket buffer is sufficient to cover the +nlmsghdr structure before accessing the nlh->nlmsg_len field for further +input sanitization. If the client only supplies 1-3 bytes of data in +sk_buff, then nlh->nlmsg_len remains partially uninitialized and +contains leftover memory from the corresponding kernel allocation. +Operating on such data may result in indeterminate evaluation of the +nlmsg_len < NLMSG_HDRLEN expression. + +The bug was discovered by a runtime instrumentation designed to detect +use of uninitialized memory in the kernel. The patch prevents this and +other similar tools (e.g. KMSAN) from flagging this behavior in the future. + +Signed-off-by: Mateusz Jurczyk +Signed-off-by: Pablo Neira Ayuso +Cc: Florian Westphal +Signed-off-by: Greg Kroah-Hartman + +--- + net/netfilter/nfnetlink.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +--- a/net/netfilter/nfnetlink.c ++++ b/net/netfilter/nfnetlink.c +@@ -463,8 +463,7 @@ static void nfnetlink_rcv_skb_batch(stru + if (msglen > skb->len) + msglen = skb->len; + +- if (nlh->nlmsg_len < NLMSG_HDRLEN || +- skb->len < NLMSG_HDRLEN + sizeof(struct nfgenmsg)) ++ if (skb->len < NLMSG_HDRLEN + sizeof(struct nfgenmsg)) + return; + + err = nla_parse(cda, NFNL_BATCH_MAX, attr, attrlen, nfnl_batch_policy, +@@ -491,7 +490,8 @@ static void nfnetlink_rcv(struct sk_buff + { + struct nlmsghdr *nlh = nlmsg_hdr(skb); + +- if (nlh->nlmsg_len < NLMSG_HDRLEN || ++ if (skb->len < NLMSG_HDRLEN || ++ nlh->nlmsg_len < NLMSG_HDRLEN || + skb->len < nlh->nlmsg_len) + return; + diff --git a/queue-4.12/nfsd-limit-end-of-page-list-when-decoding-nfsv4-write.patch b/queue-4.12/nfsd-limit-end-of-page-list-when-decoding-nfsv4-write.patch new file mode 100644 index 00000000000..118e2562a32 --- /dev/null +++ b/queue-4.12/nfsd-limit-end-of-page-list-when-decoding-nfsv4-write.patch @@ -0,0 +1,48 @@ +From fc788f64f1f3eb31e87d4f53bcf1ab76590d5838 Mon Sep 17 00:00:00 2001 +From: Chuck Lever +Date: Fri, 18 Aug 2017 11:12:19 -0400 +Subject: nfsd: Limit end of page list when decoding NFSv4 WRITE + +From: Chuck Lever + +commit fc788f64f1f3eb31e87d4f53bcf1ab76590d5838 upstream. + +When processing an NFSv4 WRITE operation, argp->end should never +point past the end of the data in the final page of the page list. +Otherwise, nfsd4_decode_compound can walk into uninitialized memory. + +More critical, nfsd4_decode_write is failing to increment argp->pagelen +when it increments argp->pagelist. This can cause later xdr decoders +to assume more data is available than really is, which can cause server +crashes on malformed requests. + +Signed-off-by: Chuck Lever +Signed-off-by: J. Bruce Fields +Signed-off-by: Greg Kroah-Hartman + +--- + fs/nfsd/nfs4xdr.c | 6 ++---- + 1 file changed, 2 insertions(+), 4 deletions(-) + +--- a/fs/nfsd/nfs4xdr.c ++++ b/fs/nfsd/nfs4xdr.c +@@ -144,7 +144,7 @@ static void next_decode_page(struct nfsd + argp->p = page_address(argp->pagelist[0]); + argp->pagelist++; + if (argp->pagelen < PAGE_SIZE) { +- argp->end = argp->p + (argp->pagelen>>2); ++ argp->end = argp->p + XDR_QUADLEN(argp->pagelen); + argp->pagelen = 0; + } else { + argp->end = argp->p + (PAGE_SIZE>>2); +@@ -1279,9 +1279,7 @@ nfsd4_decode_write(struct nfsd4_compound + argp->pagelen -= pages * PAGE_SIZE; + len -= pages * PAGE_SIZE; + +- argp->p = (__be32 *)page_address(argp->pagelist[0]); +- argp->pagelist++; +- argp->end = argp->p + XDR_QUADLEN(PAGE_SIZE); ++ next_decode_page(argp); + } + argp->p += XDR_QUADLEN(len); + diff --git a/queue-4.12/perf-core-fix-group-cpu-task-validation.patch b/queue-4.12/perf-core-fix-group-cpu-task-validation.patch new file mode 100644 index 00000000000..bcca0153888 --- /dev/null +++ b/queue-4.12/perf-core-fix-group-cpu-task-validation.patch @@ -0,0 +1,182 @@ +From 64aee2a965cf2954a038b5522f11d2cd2f0f8f3e Mon Sep 17 00:00:00 2001 +From: Mark Rutland +Date: Thu, 22 Jun 2017 15:41:38 +0100 +Subject: perf/core: Fix group {cpu,task} validation + +From: Mark Rutland + +commit 64aee2a965cf2954a038b5522f11d2cd2f0f8f3e upstream. + +Regardless of which events form a group, it does not make sense for the +events to target different tasks and/or CPUs, as this leaves the group +inconsistent and impossible to schedule. The core perf code assumes that +these are consistent across (successfully intialised) groups. + +Core perf code only verifies this when moving SW events into a HW +context. Thus, we can violate this requirement for pure SW groups and +pure HW groups, unless the relevant PMU driver happens to perform this +verification itself. These mismatched groups subsequently wreak havoc +elsewhere. + +For example, we handle watchpoints as SW events, and reserve watchpoint +HW on a per-CPU basis at pmu::event_init() time to ensure that any event +that is initialised is guaranteed to have a slot at pmu::add() time. +However, the core code only checks the group leader's cpu filter (via +event_filter_match()), and can thus install follower events onto CPUs +violating thier (mismatched) CPU filters, potentially installing them +into a CPU without sufficient reserved slots. + +This can be triggered with the below test case, resulting in warnings +from arch backends. + + #define _GNU_SOURCE + #include + #include + #include + #include + #include + #include + #include + + static int perf_event_open(struct perf_event_attr *attr, pid_t pid, int cpu, + int group_fd, unsigned long flags) + { + return syscall(__NR_perf_event_open, attr, pid, cpu, group_fd, flags); + } + + char watched_char; + + struct perf_event_attr wp_attr = { + .type = PERF_TYPE_BREAKPOINT, + .bp_type = HW_BREAKPOINT_RW, + .bp_addr = (unsigned long)&watched_char, + .bp_len = 1, + .size = sizeof(wp_attr), + }; + + int main(int argc, char *argv[]) + { + int leader, ret; + cpu_set_t cpus; + + /* + * Force use of CPU0 to ensure our CPU0-bound events get scheduled. + */ + CPU_ZERO(&cpus); + CPU_SET(0, &cpus); + ret = sched_setaffinity(0, sizeof(cpus), &cpus); + if (ret) { + printf("Unable to set cpu affinity\n"); + return 1; + } + + /* open leader event, bound to this task, CPU0 only */ + leader = perf_event_open(&wp_attr, 0, 0, -1, 0); + if (leader < 0) { + printf("Couldn't open leader: %d\n", leader); + return 1; + } + + /* + * Open a follower event that is bound to the same task, but a + * different CPU. This means that the group should never be possible to + * schedule. + */ + ret = perf_event_open(&wp_attr, 0, 1, leader, 0); + if (ret < 0) { + printf("Couldn't open mismatched follower: %d\n", ret); + return 1; + } else { + printf("Opened leader/follower with mismastched CPUs\n"); + } + + /* + * Open as many independent events as we can, all bound to the same + * task, CPU0 only. + */ + do { + ret = perf_event_open(&wp_attr, 0, 0, -1, 0); + } while (ret >= 0); + + /* + * Force enable/disble all events to trigger the erronoeous + * installation of the follower event. + */ + printf("Opened all events. Toggling..\n"); + for (;;) { + prctl(PR_TASK_PERF_EVENTS_DISABLE, 0, 0, 0, 0); + prctl(PR_TASK_PERF_EVENTS_ENABLE, 0, 0, 0, 0); + } + + return 0; + } + +Fix this by validating this requirement regardless of whether we're +moving events. + +Signed-off-by: Mark Rutland +Signed-off-by: Peter Zijlstra (Intel) +Cc: Alexander Shishkin +Cc: Arnaldo Carvalho de Melo +Cc: Linus Torvalds +Cc: Peter Zijlstra +Cc: Thomas Gleixner +Cc: Zhou Chengming +Link: http://lkml.kernel.org/r/1498142498-15758-1-git-send-email-mark.rutland@arm.com +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/events/core.c | 39 +++++++++++++++++++-------------------- + 1 file changed, 19 insertions(+), 20 deletions(-) + +--- a/kernel/events/core.c ++++ b/kernel/events/core.c +@@ -9996,28 +9996,27 @@ SYSCALL_DEFINE5(perf_event_open, + goto err_context; + + /* +- * Do not allow to attach to a group in a different +- * task or CPU context: ++ * Make sure we're both events for the same CPU; ++ * grouping events for different CPUs is broken; since ++ * you can never concurrently schedule them anyhow. + */ +- if (move_group) { +- /* +- * Make sure we're both on the same task, or both +- * per-cpu events. +- */ +- if (group_leader->ctx->task != ctx->task) +- goto err_context; ++ if (group_leader->cpu != event->cpu) ++ goto err_context; ++ ++ /* ++ * Make sure we're both on the same task, or both ++ * per-CPU events. ++ */ ++ if (group_leader->ctx->task != ctx->task) ++ goto err_context; + +- /* +- * Make sure we're both events for the same CPU; +- * grouping events for different CPUs is broken; since +- * you can never concurrently schedule them anyhow. +- */ +- if (group_leader->cpu != event->cpu) +- goto err_context; +- } else { +- if (group_leader->ctx != ctx) +- goto err_context; +- } ++ /* ++ * Do not allow to attach to a group in a different task ++ * or CPU context. If we're moving SW events, we'll fix ++ * this up later, so allow that. ++ */ ++ if (!move_group && group_leader->ctx != ctx) ++ goto err_context; + + /* + * Only a group leader can be exclusive or pinned diff --git a/queue-4.12/rdma-uverbs-initialize-cq_context-appropriately.patch b/queue-4.12/rdma-uverbs-initialize-cq_context-appropriately.patch new file mode 100644 index 00000000000..6bcf3cc7111 --- /dev/null +++ b/queue-4.12/rdma-uverbs-initialize-cq_context-appropriately.patch @@ -0,0 +1,35 @@ +From 65159c051c45f269cf40a14f9404248f2d524920 Mon Sep 17 00:00:00 2001 +From: Bharat Potnuri +Date: Tue, 1 Aug 2017 10:58:35 +0530 +Subject: RDMA/uverbs: Initialize cq_context appropriately + +From: Bharat Potnuri + +commit 65159c051c45f269cf40a14f9404248f2d524920 upstream. + +Initializing cq_context with ev_queue in create_cq(), leads to NULL pointer +dereference in ib_uverbs_comp_handler(), if application doesnot use completion +channel. This patch fixes the cq_context initialization. + +Fixes: 1e7710f3f65 ("IB/core: Change completion channel to use the reworked") +Signed-off-by: Potnuri Bharat Teja +Reviewed-by: Matan Barak +Signed-off-by: Doug Ledford +(cherry picked from commit 699a2d5b1b880b4e4e1c7d55fa25659322cf5b51) +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/infiniband/core/uverbs_cmd.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/infiniband/core/uverbs_cmd.c ++++ b/drivers/infiniband/core/uverbs_cmd.c +@@ -1015,7 +1015,7 @@ static struct ib_ucq_object *create_cq(s + cq->uobject = &obj->uobject; + cq->comp_handler = ib_uverbs_comp_handler; + cq->event_handler = ib_uverbs_cq_event_handler; +- cq->cq_context = &ev_file->ev_queue; ++ cq->cq_context = ev_file ? &ev_file->ev_queue : NULL; + atomic_set(&cq->usecnt, 0); + + obj->uobject.object = cq; diff --git a/queue-4.12/revert-drm-amdgpu-fix-vblank_time-when-displays-are-off.patch b/queue-4.12/revert-drm-amdgpu-fix-vblank_time-when-displays-are-off.patch new file mode 100644 index 00000000000..8b2bede3271 --- /dev/null +++ b/queue-4.12/revert-drm-amdgpu-fix-vblank_time-when-displays-are-off.patch @@ -0,0 +1,32 @@ +From alexdeucher@gmail.com Sun Aug 27 14:19:36 2017 +From: Alex Deucher +Date: Thu, 17 Aug 2017 16:36:51 -0400 +Subject: Revert "drm/amdgpu: fix vblank_time when displays are off" +To: gregkh@linuxfoundation.org, stable@vger.kernel.org +Cc: Alex Deucher +Message-ID: <1503002211-2813-1-git-send-email-alexander.deucher@amd.com> + +From: Alex Deucher + +This reverts commit 2dc1889ebf8501b0edf125e89a30e1cf3744a2a7. + +Fixes a suspend and resume regression. + +bug: https://bugzilla.kernel.org/show_bug.cgi?id=196615 +Signed-off-by: Alex Deucher +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c | 2 -- + 1 file changed, 2 deletions(-) + +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c +@@ -839,8 +839,6 @@ static int amdgpu_cgs_get_active_display + + mode_info = info->mode_info; + if (mode_info) { +- /* if the displays are off, vblank time is max */ +- mode_info->vblank_time_us = 0xffffffff; + /* always set the reference clock */ + mode_info->ref_clock = adev->clock.spll.reference_freq; + } diff --git a/queue-4.12/ring-buffer-have-ring_buffer_alloc_read_page-return-error-on-offline-cpu.patch b/queue-4.12/ring-buffer-have-ring_buffer_alloc_read_page-return-error-on-offline-cpu.patch new file mode 100644 index 00000000000..ed837bd84b9 --- /dev/null +++ b/queue-4.12/ring-buffer-have-ring_buffer_alloc_read_page-return-error-on-offline-cpu.patch @@ -0,0 +1,155 @@ +From a7e52ad7ed82e21273eccff93d1477a7b313aabb Mon Sep 17 00:00:00 2001 +From: "Steven Rostedt (VMware)" +Date: Wed, 2 Aug 2017 14:20:54 -0400 +Subject: ring-buffer: Have ring_buffer_alloc_read_page() return error on offline CPU + +From: Steven Rostedt (VMware) + +commit a7e52ad7ed82e21273eccff93d1477a7b313aabb upstream. + +Chunyu Hu reported: + "per_cpu trace directories and files are created for all possible cpus, + but only the cpus which have ever been on-lined have their own per cpu + ring buffer (allocated by cpuhp threads). While trace_buffers_open, the + open handler for trace file 'trace_pipe_raw' is always trying to access + field of ring_buffer_per_cpu, and would panic with the NULL pointer. + + Align the behavior of trace_pipe_raw with trace_pipe, that returns -NODEV + when openning it if that cpu does not have trace ring buffer. + + Reproduce: + cat /sys/kernel/debug/tracing/per_cpu/cpu31/trace_pipe_raw + (cpu31 is never on-lined, this is a 16 cores x86_64 box) + + Tested with: + 1) boot with maxcpus=14, read trace_pipe_raw of cpu15. + Got -NODEV. + 2) oneline cpu15, read trace_pipe_raw of cpu15. + Get the raw trace data. + + Call trace: + [ 5760.950995] RIP: 0010:ring_buffer_alloc_read_page+0x32/0xe0 + [ 5760.961678] tracing_buffers_read+0x1f6/0x230 + [ 5760.962695] __vfs_read+0x37/0x160 + [ 5760.963498] ? __vfs_read+0x5/0x160 + [ 5760.964339] ? security_file_permission+0x9d/0xc0 + [ 5760.965451] ? __vfs_read+0x5/0x160 + [ 5760.966280] vfs_read+0x8c/0x130 + [ 5760.967070] SyS_read+0x55/0xc0 + [ 5760.967779] do_syscall_64+0x67/0x150 + [ 5760.968687] entry_SYSCALL64_slow_path+0x25/0x25" + +This was introduced by the addition of the feature to reuse reader pages +instead of re-allocating them. The problem is that the allocation of a +reader page (which is per cpu) does not check if the cpu is online and set +up for the ring buffer. + +Link: http://lkml.kernel.org/r/1500880866-1177-1-git-send-email-chuhu@redhat.com + +Fixes: 73a757e63114 ("ring-buffer: Return reader page back into existing ring buffer") +Reported-by: Chunyu Hu +Signed-off-by: Steven Rostedt (VMware) +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/trace/ring_buffer.c | 14 +++++++++----- + kernel/trace/ring_buffer_benchmark.c | 2 +- + kernel/trace/trace.c | 16 +++++++++++----- + 3 files changed, 21 insertions(+), 11 deletions(-) + +--- a/kernel/trace/ring_buffer.c ++++ b/kernel/trace/ring_buffer.c +@@ -4386,15 +4386,19 @@ EXPORT_SYMBOL_GPL(ring_buffer_swap_cpu); + * the page that was allocated, with the read page of the buffer. + * + * Returns: +- * The page allocated, or NULL on error. ++ * The page allocated, or ERR_PTR + */ + void *ring_buffer_alloc_read_page(struct ring_buffer *buffer, int cpu) + { +- struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu]; ++ struct ring_buffer_per_cpu *cpu_buffer; + struct buffer_data_page *bpage = NULL; + unsigned long flags; + struct page *page; + ++ if (!cpumask_test_cpu(cpu, buffer->cpumask)) ++ return ERR_PTR(-ENODEV); ++ ++ cpu_buffer = buffer->buffers[cpu]; + local_irq_save(flags); + arch_spin_lock(&cpu_buffer->lock); + +@@ -4412,7 +4416,7 @@ void *ring_buffer_alloc_read_page(struct + page = alloc_pages_node(cpu_to_node(cpu), + GFP_KERNEL | __GFP_NORETRY, 0); + if (!page) +- return NULL; ++ return ERR_PTR(-ENOMEM); + + bpage = page_address(page); + +@@ -4467,8 +4471,8 @@ EXPORT_SYMBOL_GPL(ring_buffer_free_read_ + * + * for example: + * rpage = ring_buffer_alloc_read_page(buffer, cpu); +- * if (!rpage) +- * return error; ++ * if (IS_ERR(rpage)) ++ * return PTR_ERR(rpage); + * ret = ring_buffer_read_page(buffer, &rpage, len, cpu, 0); + * if (ret >= 0) + * process_page(rpage, ret); +--- a/kernel/trace/ring_buffer_benchmark.c ++++ b/kernel/trace/ring_buffer_benchmark.c +@@ -113,7 +113,7 @@ static enum event_status read_page(int c + int i; + + bpage = ring_buffer_alloc_read_page(buffer, cpu); +- if (!bpage) ++ if (IS_ERR(bpage)) + return EVENT_DROPPED; + + ret = ring_buffer_read_page(buffer, &bpage, PAGE_SIZE, cpu, 1); +--- a/kernel/trace/trace.c ++++ b/kernel/trace/trace.c +@@ -6403,7 +6403,7 @@ tracing_buffers_read(struct file *filp, + { + struct ftrace_buffer_info *info = filp->private_data; + struct trace_iterator *iter = &info->iter; +- ssize_t ret; ++ ssize_t ret = 0; + ssize_t size; + + if (!count) +@@ -6417,10 +6417,15 @@ tracing_buffers_read(struct file *filp, + if (!info->spare) { + info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer, + iter->cpu_file); +- info->spare_cpu = iter->cpu_file; ++ if (IS_ERR(info->spare)) { ++ ret = PTR_ERR(info->spare); ++ info->spare = NULL; ++ } else { ++ info->spare_cpu = iter->cpu_file; ++ } + } + if (!info->spare) +- return -ENOMEM; ++ return ret; + + /* Do we have previous read data to read? */ + if (info->read < PAGE_SIZE) +@@ -6595,8 +6600,9 @@ tracing_buffers_splice_read(struct file + ref->ref = 1; + ref->buffer = iter->trace_buffer->buffer; + ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file); +- if (!ref->page) { +- ret = -ENOMEM; ++ if (IS_ERR(ref->page)) { ++ ret = PTR_ERR(ref->page); ++ ref->page = NULL; + kfree(ref); + break; + } diff --git a/queue-4.12/series b/queue-4.12/series index 4debf7f5947..656a40f7914 100644 --- a/queue-4.12/series +++ b/queue-4.12/series @@ -45,3 +45,36 @@ arcv2-pae40-set-msb-even-if-config_arc_has_pae40-but-pae-exists-in-soc.patch pm-hibernate-touch-nmi-watchdog-when-creating-snapshot.patch mm-shmem-fix-handling-sys-kernel-mm-transparent_hugepage-shmem_enabled.patch dax-fix-deadlock-due-to-misaligned-pmd-faults.patch +i2c-designware-fix-system-suspend.patch +mm-madvise.c-fix-freeing-of-locked-page-with-madv_free.patch +fork-fix-incorrect-fput-of-exe_file-causing-use-after-free.patch +mm-memblock.c-reversed-logic-in-memblock_discard.patch +arm64-fpsimd-prevent-registers-leaking-across-exec.patch +drm-fix-framebuffer-leak.patch +drm-release-driver-tracking-before-making-the-object-available-again.patch +drm-sun4i-implement-drm_driver-lastclose-to-restore-fbdev-console.patch +drm-atomic-handle-edeadlk-with-out-fences-correctly.patch +drm-atomic-if-the-atomic-check-fails-return-its-value-first.patch +drm-i915-vbt-ignore-extraneous-child-devices-for-a-port.patch +drm-i915-gvt-fix-the-kernel-null-pointer-error.patch +revert-drm-amdgpu-fix-vblank_time-when-displays-are-off.patch +acpi-device-property-fix-node-lookup-in-acpi_graph_get_child_prop_value.patch +tracing-call-clear_boot_tracer-at-lateinit_sync.patch +tracing-missing-error-code-in-tracer_alloc_buffers.patch +tracing-fix-kmemleak-in-tracing_map_array_free.patch +tracing-fix-freeing-of-filter-in-create_filter-when-set_str-is-false.patch +rdma-uverbs-initialize-cq_context-appropriately.patch +kbuild-linker-script-do-not-match-c-names-unless-ld_dead_code_data_elimination-is-configured.patch +cifs-fix-df-output-for-users-with-quota-limits.patch +cifs-return-enametoolong-for-overlong-names-in-cifs_open-cifs_lookup.patch +nfsd-limit-end-of-page-list-when-decoding-nfsv4-write.patch +ring-buffer-have-ring_buffer_alloc_read_page-return-error-on-offline-cpu.patch +virtio_pci-fix-cpu-affinity-support.patch +ftrace-check-for-null-ret_stack-on-profile-function-graph-entry-function.patch +perf-core-fix-group-cpu-task-validation.patch +timers-fix-excessive-granularity-of-new-timers-after-a-nohz-idle.patch +x86-mm-fix-use-after-free-of-ldt_struct.patch +net-sunrpc-svcsock-fix-null-pointer-exception.patch +netfilter-expect-fix-crash-when-putting-uninited-expectation.patch +netfilter-nat-fix-src-map-lookup.patch +netfilter-nfnetlink-improve-input-length-sanitization-in-nfnetlink_rcv.patch diff --git a/queue-4.12/timers-fix-excessive-granularity-of-new-timers-after-a-nohz-idle.patch b/queue-4.12/timers-fix-excessive-granularity-of-new-timers-after-a-nohz-idle.patch new file mode 100644 index 00000000000..25ba0610935 --- /dev/null +++ b/queue-4.12/timers-fix-excessive-granularity-of-new-timers-after-a-nohz-idle.patch @@ -0,0 +1,206 @@ +From 2fe59f507a65dbd734b990a11ebc7488f6f87a24 Mon Sep 17 00:00:00 2001 +From: Nicholas Piggin +Date: Tue, 22 Aug 2017 18:43:48 +1000 +Subject: timers: Fix excessive granularity of new timers after a nohz idle + +From: Nicholas Piggin + +commit 2fe59f507a65dbd734b990a11ebc7488f6f87a24 upstream. + +When a timer base is idle, it is forwarded when a new timer is added +to ensure that granularity does not become excessive. When not idle, +the timer tick is expected to increment the base. + +However there are several problems: + +- If an existing timer is modified, the base is forwarded only after + the index is calculated. + +- The base is not forwarded by add_timer_on. + +- There is a window after a timer is restarted from a nohz idle, after + it is marked not-idle and before the timer tick on this CPU, where a + timer may be added but the ancient base does not get forwarded. + +These result in excessive granularity (a 1 jiffy timeout can blow out +to 100s of jiffies), which cause the rcu lockup detector to trigger, +among other things. + +Fix this by keeping track of whether the timer base has been idle +since it was last run or forwarded, and if so then forward it before +adding a new timer. + +There is still a case where mod_timer optimises the case of a pending +timer mod with the same expiry time, where the timer can see excessive +granularity relative to the new, shorter interval. A comment is added, +but it's not changed because it is an important fastpath for +networking. + +This has been tested and found to fix the RCU softlockup messages. + +Testing was also done with tracing to measure requested versus +achieved wakeup latencies for all non-deferrable timers in an idle +system (with no lockup watchdogs running). Wakeup latency relative to +absolute latency is calculated (note this suffers from round-up skew +at low absolute times) and analysed: + + max avg std +upstream 506.0 1.20 4.68 +patched 2.0 1.08 0.15 + +The bug was noticed due to the lockup detector Kconfig changes +dropping it out of people's .configs and resulting in larger base +clk skew When the lockup detectors are enabled, no CPU can go idle for +longer than 4 seconds, which limits the granularity errors. +Sub-optimal timer behaviour is observable on a smaller scale in that +case: + + max avg std +upstream 9.0 1.05 0.19 +patched 2.0 1.04 0.11 + +Fixes: Fixes: a683f390b93f ("timers: Forward the wheel clock whenever possible") +Signed-off-by: Nicholas Piggin +Signed-off-by: Thomas Gleixner +Tested-by: Jonathan Cameron +Tested-by: David Miller +Cc: dzickus@redhat.com +Cc: sfr@canb.auug.org.au +Cc: mpe@ellerman.id.au +Cc: Stephen Boyd +Cc: linuxarm@huawei.com +Cc: abdhalee@linux.vnet.ibm.com +Cc: John Stultz +Cc: akpm@linux-foundation.org +Cc: paulmck@linux.vnet.ibm.com +Cc: torvalds@linux-foundation.org +Link: http://lkml.kernel.org/r/20170822084348.21436-1-npiggin@gmail.com +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/time/timer.c | 50 +++++++++++++++++++++++++++++++++++++++++--------- + 1 file changed, 41 insertions(+), 9 deletions(-) + +--- a/kernel/time/timer.c ++++ b/kernel/time/timer.c +@@ -203,6 +203,7 @@ struct timer_base { + bool migration_enabled; + bool nohz_active; + bool is_idle; ++ bool must_forward_clk; + DECLARE_BITMAP(pending_map, WHEEL_SIZE); + struct hlist_head vectors[WHEEL_SIZE]; + } ____cacheline_aligned; +@@ -856,13 +857,19 @@ get_target_base(struct timer_base *base, + + static inline void forward_timer_base(struct timer_base *base) + { +- unsigned long jnow = READ_ONCE(jiffies); ++ unsigned long jnow; + + /* +- * We only forward the base when it's idle and we have a delta between +- * base clock and jiffies. ++ * We only forward the base when we are idle or have just come out of ++ * idle (must_forward_clk logic), and have a delta between base clock ++ * and jiffies. In the common case, run_timers will take care of it. + */ +- if (!base->is_idle || (long) (jnow - base->clk) < 2) ++ if (likely(!base->must_forward_clk)) ++ return; ++ ++ jnow = READ_ONCE(jiffies); ++ base->must_forward_clk = base->is_idle; ++ if ((long)(jnow - base->clk) < 2) + return; + + /* +@@ -938,6 +945,11 @@ __mod_timer(struct timer_list *timer, un + * same array bucket then just return: + */ + if (timer_pending(timer)) { ++ /* ++ * The downside of this optimization is that it can result in ++ * larger granularity than you would get from adding a new ++ * timer with this expiry. ++ */ + if (timer->expires == expires) + return 1; + +@@ -948,6 +960,7 @@ __mod_timer(struct timer_list *timer, un + * dequeue/enqueue dance. + */ + base = lock_timer_base(timer, &flags); ++ forward_timer_base(base); + + clk = base->clk; + idx = calc_wheel_index(expires, clk); +@@ -964,6 +977,7 @@ __mod_timer(struct timer_list *timer, un + } + } else { + base = lock_timer_base(timer, &flags); ++ forward_timer_base(base); + } + + ret = detach_if_pending(timer, base, false); +@@ -991,12 +1005,10 @@ __mod_timer(struct timer_list *timer, un + spin_lock(&base->lock); + WRITE_ONCE(timer->flags, + (timer->flags & ~TIMER_BASEMASK) | base->cpu); ++ forward_timer_base(base); + } + } + +- /* Try to forward a stale timer base clock */ +- forward_timer_base(base); +- + timer->expires = expires; + /* + * If 'idx' was calculated above and the base time did not advance +@@ -1112,6 +1124,7 @@ void add_timer_on(struct timer_list *tim + WRITE_ONCE(timer->flags, + (timer->flags & ~TIMER_BASEMASK) | cpu); + } ++ forward_timer_base(base); + + debug_activate(timer, timer->expires); + internal_add_timer(base, timer); +@@ -1497,10 +1510,16 @@ u64 get_next_timer_interrupt(unsigned lo + if (!is_max_delta) + expires = basem + (u64)(nextevt - basej) * TICK_NSEC; + /* +- * If we expect to sleep more than a tick, mark the base idle: ++ * If we expect to sleep more than a tick, mark the base idle. ++ * Also the tick is stopped so any added timer must forward ++ * the base clk itself to keep granularity small. This idle ++ * logic is only maintained for the BASE_STD base, deferrable ++ * timers may still see large granularity skew (by design). + */ +- if ((expires - basem) > TICK_NSEC) ++ if ((expires - basem) > TICK_NSEC) { ++ base->must_forward_clk = true; + base->is_idle = true; ++ } + } + spin_unlock(&base->lock); + +@@ -1611,6 +1630,19 @@ static __latent_entropy void run_timer_s + { + struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]); + ++ /* ++ * must_forward_clk must be cleared before running timers so that any ++ * timer functions that call mod_timer will not try to forward the ++ * base. idle trcking / clock forwarding logic is only used with ++ * BASE_STD timers. ++ * ++ * The deferrable base does not do idle tracking at all, so we do ++ * not forward it. This can result in very large variations in ++ * granularity for deferrable timers, but they can be deferred for ++ * long periods due to idle. ++ */ ++ base->must_forward_clk = false; ++ + __run_timers(base); + if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && base->nohz_active) + __run_timers(this_cpu_ptr(&timer_bases[BASE_DEF])); diff --git a/queue-4.12/tracing-call-clear_boot_tracer-at-lateinit_sync.patch b/queue-4.12/tracing-call-clear_boot_tracer-at-lateinit_sync.patch new file mode 100644 index 00000000000..0e5ed357ed7 --- /dev/null +++ b/queue-4.12/tracing-call-clear_boot_tracer-at-lateinit_sync.patch @@ -0,0 +1,39 @@ +From 4bb0f0e73c8c30917d169c4a0f1ac083690c545b Mon Sep 17 00:00:00 2001 +From: "Steven Rostedt (VMware)" +Date: Tue, 1 Aug 2017 12:01:52 -0400 +Subject: tracing: Call clear_boot_tracer() at lateinit_sync + +From: Steven Rostedt (VMware) + +commit 4bb0f0e73c8c30917d169c4a0f1ac083690c545b upstream. + +The clear_boot_tracer function is used to reset the default_bootup_tracer +string to prevent it from being accessed after boot, as it originally points +to init data. But since clear_boot_tracer() is called via the +init_lateinit() call, it races with the initcall for registering the hwlat +tracer. If someone adds "ftrace=hwlat" to the kernel command line, depending +on how the linker sets up the text, the saved command line may be cleared, +and the hwlat tracer never is initialized. + +Simply have the clear_boot_tracer() be called by initcall_lateinit_sync() as +that's for tasks to be called after lateinit. + +Link: https://bugzilla.kernel.org/show_bug.cgi?id=196551 + +Fixes: e7c15cd8a ("tracing: Added hardware latency tracer") +Reported-by: Zamir SUN +Signed-off-by: Steven Rostedt (VMware) +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/trace/trace.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/kernel/trace/trace.c ++++ b/kernel/trace/trace.c +@@ -8224,4 +8224,4 @@ __init static int clear_boot_tracer(void + } + + fs_initcall(tracer_init_tracefs); +-late_initcall(clear_boot_tracer); ++late_initcall_sync(clear_boot_tracer); diff --git a/queue-4.12/tracing-fix-freeing-of-filter-in-create_filter-when-set_str-is-false.patch b/queue-4.12/tracing-fix-freeing-of-filter-in-create_filter-when-set_str-is-false.patch new file mode 100644 index 00000000000..2ef90fab51f --- /dev/null +++ b/queue-4.12/tracing-fix-freeing-of-filter-in-create_filter-when-set_str-is-false.patch @@ -0,0 +1,67 @@ +From 8b0db1a5bdfcee0dbfa89607672598ae203c9045 Mon Sep 17 00:00:00 2001 +From: "Steven Rostedt (VMware)" +Date: Wed, 23 Aug 2017 12:46:27 -0400 +Subject: tracing: Fix freeing of filter in create_filter() when set_str is false + +From: Steven Rostedt (VMware) + +commit 8b0db1a5bdfcee0dbfa89607672598ae203c9045 upstream. + +Performing the following task with kmemleak enabled: + + # cd /sys/kernel/tracing/events/irq/irq_handler_entry/ + # echo 'enable_event:kmem:kmalloc:3 if irq >' > trigger + # echo 'enable_event:kmem:kmalloc:3 if irq > 31' > trigger + # echo scan > /sys/kernel/debug/kmemleak + # cat /sys/kernel/debug/kmemleak +unreferenced object 0xffff8800b9290308 (size 32): + comm "bash", pid 1114, jiffies 4294848451 (age 141.139s) + hex dump (first 32 bytes): + 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ + 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ + backtrace: + [] kmemleak_alloc+0x4a/0xa0 + [] kmem_cache_alloc_trace+0x158/0x290 + [] create_filter_start.constprop.28+0x99/0x940 + [] create_filter+0xa9/0x160 + [] create_event_filter+0xc/0x10 + [] set_trigger_filter+0xe5/0x210 + [] event_enable_trigger_func+0x324/0x490 + [] event_trigger_write+0x1a2/0x260 + [] __vfs_write+0xd7/0x380 + [] vfs_write+0x101/0x260 + [] SyS_write+0xab/0x130 + [] entry_SYSCALL_64_fastpath+0x1f/0xbe + [] 0xffffffffffffffff + +The function create_filter() is passed a 'filterp' pointer that gets +allocated, and if "set_str" is true, it is up to the caller to free it, even +on error. The problem is that the pointer is not freed by create_filter() +when set_str is false. This is a bug, and it is not up to the caller to free +the filter on error if it doesn't care about the string. + +Link: http://lkml.kernel.org/r/1502705898-27571-2-git-send-email-chuhu@redhat.com + +Fixes: 38b78eb85 ("tracing: Factorize filter creation") +Reported-by: Chunyu Hu +Tested-by: Chunyu Hu +Signed-off-by: Steven Rostedt (VMware) +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/trace/trace_events_filter.c | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/kernel/trace/trace_events_filter.c ++++ b/kernel/trace/trace_events_filter.c +@@ -1959,6 +1959,10 @@ static int create_filter(struct trace_ev + if (err && set_str) + append_filter_err(ps, filter); + } ++ if (err && !set_str) { ++ free_event_filter(filter); ++ filter = NULL; ++ } + create_filter_finish(ps); + + *filterp = filter; diff --git a/queue-4.12/tracing-fix-kmemleak-in-tracing_map_array_free.patch b/queue-4.12/tracing-fix-kmemleak-in-tracing_map_array_free.patch new file mode 100644 index 00000000000..6aee43e8efb --- /dev/null +++ b/queue-4.12/tracing-fix-kmemleak-in-tracing_map_array_free.patch @@ -0,0 +1,88 @@ +From 475bb3c69ab05df2a6ecef6acc2393703d134180 Mon Sep 17 00:00:00 2001 +From: Chunyu Hu +Date: Mon, 14 Aug 2017 18:18:17 +0800 +Subject: tracing: Fix kmemleak in tracing_map_array_free() + +From: Chunyu Hu + +commit 475bb3c69ab05df2a6ecef6acc2393703d134180 upstream. + +kmemleak reported the below leak when I was doing clear of the hist +trigger. With this patch, the kmeamleak is gone. + +unreferenced object 0xffff94322b63d760 (size 32): + comm "bash", pid 1522, jiffies 4403687962 (age 2442.311s) + hex dump (first 32 bytes): + 00 01 00 00 04 00 00 00 08 00 00 00 ff 00 00 00 ................ + 10 00 00 00 00 00 00 00 80 a8 7a f2 31 94 ff ff ..........z.1... + backtrace: + [] kmemleak_alloc+0x4a/0xa0 + [] kmem_cache_alloc_trace+0xca/0x1d0 + [] tracing_map_array_alloc+0x26/0x140 + [] kretprobe_trampoline+0x0/0x50 + [] create_hist_data+0x535/0x750 + [] event_hist_trigger_func+0x1f7/0x420 + [] event_trigger_write+0xfd/0x1a0 + [] __vfs_write+0x37/0x170 + [] vfs_write+0xb2/0x1b0 + [] SyS_write+0x55/0xc0 + [] do_syscall_64+0x67/0x150 + [] return_from_SYSCALL_64+0x0/0x6a + [] 0xffffffffffffffff +unreferenced object 0xffff9431f27aa880 (size 128): + comm "bash", pid 1522, jiffies 4403687962 (age 2442.311s) + hex dump (first 32 bytes): + 00 00 8c 2a 32 94 ff ff 00 f0 8b 2a 32 94 ff ff ...*2......*2... + 00 e0 8b 2a 32 94 ff ff 00 d0 8b 2a 32 94 ff ff ...*2......*2... + backtrace: + [] kmemleak_alloc+0x4a/0xa0 + [] __kmalloc+0xe8/0x220 + [] tracing_map_array_alloc+0xb1/0x140 + [] kretprobe_trampoline+0x0/0x50 + [] create_hist_data+0x535/0x750 + [] event_hist_trigger_func+0x1f7/0x420 + [] event_trigger_write+0xfd/0x1a0 + [] __vfs_write+0x37/0x170 + [] vfs_write+0xb2/0x1b0 + [] SyS_write+0x55/0xc0 + [] do_syscall_64+0x67/0x150 + [] return_from_SYSCALL_64+0x0/0x6a + [] 0xffffffffffffffff + +Link: http://lkml.kernel.org/r/1502705898-27571-1-git-send-email-chuhu@redhat.com + +Fixes: 08d43a5fa063 ("tracing: Add lock-free tracing_map") +Signed-off-by: Chunyu Hu +Signed-off-by: Steven Rostedt (VMware) +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/trace/tracing_map.c | 11 +++++++---- + 1 file changed, 7 insertions(+), 4 deletions(-) + +--- a/kernel/trace/tracing_map.c ++++ b/kernel/trace/tracing_map.c +@@ -221,16 +221,19 @@ void tracing_map_array_free(struct traci + if (!a) + return; + +- if (!a->pages) { +- kfree(a); +- return; +- } ++ if (!a->pages) ++ goto free; + + for (i = 0; i < a->n_pages; i++) { + if (!a->pages[i]) + break; + free_page((unsigned long)a->pages[i]); + } ++ ++ kfree(a->pages); ++ ++ free: ++ kfree(a); + } + + struct tracing_map_array *tracing_map_array_alloc(unsigned int n_elts, diff --git a/queue-4.12/tracing-missing-error-code-in-tracer_alloc_buffers.patch b/queue-4.12/tracing-missing-error-code-in-tracer_alloc_buffers.patch new file mode 100644 index 00000000000..1f826ec06ed --- /dev/null +++ b/queue-4.12/tracing-missing-error-code-in-tracer_alloc_buffers.patch @@ -0,0 +1,35 @@ +From 147d88e0b5eb90191bc5c12ca0a3c410b75a13d2 Mon Sep 17 00:00:00 2001 +From: Dan Carpenter +Date: Tue, 1 Aug 2017 14:02:01 +0300 +Subject: tracing: Missing error code in tracer_alloc_buffers() + +From: Dan Carpenter + +commit 147d88e0b5eb90191bc5c12ca0a3c410b75a13d2 upstream. + +If ring_buffer_alloc() or one of the next couple function calls fail +then we should return -ENOMEM but the current code returns success. + +Link: http://lkml.kernel.org/r/20170801110201.ajdkct7vwzixahvx@mwanda + +Cc: Sebastian Andrzej Siewior +Cc: Ingo Molnar +Fixes: b32614c03413 ('tracing/rb: Convert to hotplug state machine') +Signed-off-by: Dan Carpenter +Signed-off-by: Steven Rostedt (VMware) +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/trace/trace.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/kernel/trace/trace.c ++++ b/kernel/trace/trace.c +@@ -8110,6 +8110,7 @@ __init static int tracer_alloc_buffers(v + if (ret < 0) + goto out_free_cpumask; + /* Used for event triggers */ ++ ret = -ENOMEM; + temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE); + if (!temp_buffer) + goto out_rm_hp_state; diff --git a/queue-4.12/virtio_pci-fix-cpu-affinity-support.patch b/queue-4.12/virtio_pci-fix-cpu-affinity-support.patch new file mode 100644 index 00000000000..d5d1989c4bb --- /dev/null +++ b/queue-4.12/virtio_pci-fix-cpu-affinity-support.patch @@ -0,0 +1,53 @@ +From ba74b6f7fcc07355d087af6939712eed4a454821 Mon Sep 17 00:00:00 2001 +From: Christoph Hellwig +Date: Thu, 24 Aug 2017 18:07:02 +0200 +Subject: virtio_pci: fix cpu affinity support + +From: Christoph Hellwig + +commit ba74b6f7fcc07355d087af6939712eed4a454821 upstream. + +Commit 0b0f9dc5 ("Revert "virtio_pci: use shared interrupts for +virtqueues"") removed the adjustment of the pre_vectors for the virtio +MSI-X vector allocation which was added in commit fb5e31d9 ("virtio: +allow drivers to request IRQ affinity when creating VQs"). This will +lead to an incorrect assignment of MSI-X vectors, and potential +deadlocks when offlining cpus. + +Signed-off-by: Christoph Hellwig +Fixes: 0b0f9dc5 ("Revert "virtio_pci: use shared interrupts for virtqueues") +Reported-by: YASUAKI ISHIMATSU +Signed-off-by: Michael S. Tsirkin +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/virtio/virtio_pci_common.c | 10 +++++++--- + 1 file changed, 7 insertions(+), 3 deletions(-) + +--- a/drivers/virtio/virtio_pci_common.c ++++ b/drivers/virtio/virtio_pci_common.c +@@ -107,6 +107,7 @@ static int vp_request_msix_vectors(struc + { + struct virtio_pci_device *vp_dev = to_vp_device(vdev); + const char *name = dev_name(&vp_dev->vdev.dev); ++ unsigned flags = PCI_IRQ_MSIX; + unsigned i, v; + int err = -ENOMEM; + +@@ -126,10 +127,13 @@ static int vp_request_msix_vectors(struc + GFP_KERNEL)) + goto error; + ++ if (desc) { ++ flags |= PCI_IRQ_AFFINITY; ++ desc->pre_vectors++; /* virtio config vector */ ++ } ++ + err = pci_alloc_irq_vectors_affinity(vp_dev->pci_dev, nvectors, +- nvectors, PCI_IRQ_MSIX | +- (desc ? PCI_IRQ_AFFINITY : 0), +- desc); ++ nvectors, flags, desc); + if (err < 0) + goto error; + vp_dev->msix_enabled = 1; diff --git a/queue-4.12/x86-mm-fix-use-after-free-of-ldt_struct.patch b/queue-4.12/x86-mm-fix-use-after-free-of-ldt_struct.patch new file mode 100644 index 00000000000..692fe9ef9a8 --- /dev/null +++ b/queue-4.12/x86-mm-fix-use-after-free-of-ldt_struct.patch @@ -0,0 +1,173 @@ +From ccd5b3235180eef3cfec337df1c8554ab151b5cc Mon Sep 17 00:00:00 2001 +From: Eric Biggers +Date: Thu, 24 Aug 2017 10:50:29 -0700 +Subject: x86/mm: Fix use-after-free of ldt_struct + +From: Eric Biggers + +commit ccd5b3235180eef3cfec337df1c8554ab151b5cc upstream. + +The following commit: + + 39a0526fb3f7 ("x86/mm: Factor out LDT init from context init") + +renamed init_new_context() to init_new_context_ldt() and added a new +init_new_context() which calls init_new_context_ldt(). However, the +error code of init_new_context_ldt() was ignored. Consequently, if a +memory allocation in alloc_ldt_struct() failed during a fork(), the +->context.ldt of the new task remained the same as that of the old task +(due to the memcpy() in dup_mm()). ldt_struct's are not intended to be +shared, so a use-after-free occurred after one task exited. + +Fix the bug by making init_new_context() pass through the error code of +init_new_context_ldt(). + +This bug was found by syzkaller, which encountered the following splat: + + BUG: KASAN: use-after-free in free_ldt_struct.part.2+0x10a/0x150 arch/x86/kernel/ldt.c:116 + Read of size 4 at addr ffff88006d2cb7c8 by task kworker/u9:0/3710 + + CPU: 1 PID: 3710 Comm: kworker/u9:0 Not tainted 4.13.0-rc4-next-20170811 #2 + Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 + Call Trace: + __dump_stack lib/dump_stack.c:16 [inline] + dump_stack+0x194/0x257 lib/dump_stack.c:52 + print_address_description+0x73/0x250 mm/kasan/report.c:252 + kasan_report_error mm/kasan/report.c:351 [inline] + kasan_report+0x24e/0x340 mm/kasan/report.c:409 + __asan_report_load4_noabort+0x14/0x20 mm/kasan/report.c:429 + free_ldt_struct.part.2+0x10a/0x150 arch/x86/kernel/ldt.c:116 + free_ldt_struct arch/x86/kernel/ldt.c:173 [inline] + destroy_context_ldt+0x60/0x80 arch/x86/kernel/ldt.c:171 + destroy_context arch/x86/include/asm/mmu_context.h:157 [inline] + __mmdrop+0xe9/0x530 kernel/fork.c:889 + mmdrop include/linux/sched/mm.h:42 [inline] + exec_mmap fs/exec.c:1061 [inline] + flush_old_exec+0x173c/0x1ff0 fs/exec.c:1291 + load_elf_binary+0x81f/0x4ba0 fs/binfmt_elf.c:855 + search_binary_handler+0x142/0x6b0 fs/exec.c:1652 + exec_binprm fs/exec.c:1694 [inline] + do_execveat_common.isra.33+0x1746/0x22e0 fs/exec.c:1816 + do_execve+0x31/0x40 fs/exec.c:1860 + call_usermodehelper_exec_async+0x457/0x8f0 kernel/umh.c:100 + ret_from_fork+0x2a/0x40 arch/x86/entry/entry_64.S:431 + + Allocated by task 3700: + save_stack_trace+0x16/0x20 arch/x86/kernel/stacktrace.c:59 + save_stack+0x43/0xd0 mm/kasan/kasan.c:447 + set_track mm/kasan/kasan.c:459 [inline] + kasan_kmalloc+0xad/0xe0 mm/kasan/kasan.c:551 + kmem_cache_alloc_trace+0x136/0x750 mm/slab.c:3627 + kmalloc include/linux/slab.h:493 [inline] + alloc_ldt_struct+0x52/0x140 arch/x86/kernel/ldt.c:67 + write_ldt+0x7b7/0xab0 arch/x86/kernel/ldt.c:277 + sys_modify_ldt+0x1ef/0x240 arch/x86/kernel/ldt.c:307 + entry_SYSCALL_64_fastpath+0x1f/0xbe + + Freed by task 3700: + save_stack_trace+0x16/0x20 arch/x86/kernel/stacktrace.c:59 + save_stack+0x43/0xd0 mm/kasan/kasan.c:447 + set_track mm/kasan/kasan.c:459 [inline] + kasan_slab_free+0x71/0xc0 mm/kasan/kasan.c:524 + __cache_free mm/slab.c:3503 [inline] + kfree+0xca/0x250 mm/slab.c:3820 + free_ldt_struct.part.2+0xdd/0x150 arch/x86/kernel/ldt.c:121 + free_ldt_struct arch/x86/kernel/ldt.c:173 [inline] + destroy_context_ldt+0x60/0x80 arch/x86/kernel/ldt.c:171 + destroy_context arch/x86/include/asm/mmu_context.h:157 [inline] + __mmdrop+0xe9/0x530 kernel/fork.c:889 + mmdrop include/linux/sched/mm.h:42 [inline] + __mmput kernel/fork.c:916 [inline] + mmput+0x541/0x6e0 kernel/fork.c:927 + copy_process.part.36+0x22e1/0x4af0 kernel/fork.c:1931 + copy_process kernel/fork.c:1546 [inline] + _do_fork+0x1ef/0xfb0 kernel/fork.c:2025 + SYSC_clone kernel/fork.c:2135 [inline] + SyS_clone+0x37/0x50 kernel/fork.c:2129 + do_syscall_64+0x26c/0x8c0 arch/x86/entry/common.c:287 + return_from_SYSCALL_64+0x0/0x7a + +Here is a C reproducer: + + #include + #include + #include + #include + #include + #include + #include + + static void *fork_thread(void *_arg) + { + fork(); + } + + int main(void) + { + struct user_desc desc = { .entry_number = 8191 }; + + syscall(__NR_modify_ldt, 1, &desc, sizeof(desc)); + + for (;;) { + if (fork() == 0) { + pthread_t t; + + srand(getpid()); + pthread_create(&t, NULL, fork_thread, NULL); + usleep(rand() % 10000); + syscall(__NR_exit_group, 0); + } + wait(NULL); + } + } + +Note: the reproducer takes advantage of the fact that alloc_ldt_struct() +may use vmalloc() to allocate a large ->entries array, and after +commit: + + 5d17a73a2ebe ("vmalloc: back off when the current task is killed") + +it is possible for userspace to fail a task's vmalloc() by +sending a fatal signal, e.g. via exit_group(). It would be more +difficult to reproduce this bug on kernels without that commit. + +This bug only affected kernels with CONFIG_MODIFY_LDT_SYSCALL=y. + +Signed-off-by: Eric Biggers +Acked-by: Dave Hansen +Cc: Andrew Morton +Cc: Andy Lutomirski +Cc: Borislav Petkov +Cc: Brian Gerst +Cc: Christoph Hellwig +Cc: Denys Vlasenko +Cc: Dmitry Vyukov +Cc: Linus Torvalds +Cc: Michal Hocko +Cc: Peter Zijlstra +Cc: Rik van Riel +Cc: Tetsuo Handa +Cc: Thomas Gleixner +Cc: linux-mm@kvack.org +Fixes: 39a0526fb3f7 ("x86/mm: Factor out LDT init from context init") +Link: http://lkml.kernel.org/r/20170824175029.76040-1-ebiggers3@gmail.com +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/include/asm/mmu_context.h | 4 +--- + 1 file changed, 1 insertion(+), 3 deletions(-) + +--- a/arch/x86/include/asm/mmu_context.h ++++ b/arch/x86/include/asm/mmu_context.h +@@ -116,9 +116,7 @@ static inline int init_new_context(struc + mm->context.execute_only_pkey = -1; + } + #endif +- init_new_context_ldt(tsk, mm); +- +- return 0; ++ return init_new_context_ldt(tsk, mm); + } + static inline void destroy_context(struct mm_struct *mm) + { -- 2.47.3