From 42367fa99e7ceb6d1d599b8d62cad00210384cb9 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sun, 27 Aug 2017 14:55:49 +0200 Subject: [PATCH] 4.9-stable patches added patches: cifs-fix-df-output-for-users-with-quota-limits.patch cifs-return-enametoolong-for-overlong-names-in-cifs_open-cifs_lookup.patch drm-atomic-if-the-atomic-check-fails-return-its-value-first.patch drm-rcar-du-fix-crash-in-encoder-failure-error-path.patch drm-rcar-du-fix-display-timing-controller-parameter.patch drm-rcar-du-fix-h-v-sync-signal-polarity-configuration.patch drm-release-driver-tracking-before-making-the-object-available-again.patch fork-fix-incorrect-fput-of-exe_file-causing-use-after-free.patch ftrace-check-for-null-ret_stack-on-profile-function-graph-entry-function.patch i2c-designware-fix-system-suspend.patch kbuild-linker-script-do-not-match-c-names-unless-ld_dead_code_data_elimination-is-configured.patch mm-madvise.c-fix-freeing-of-locked-page-with-madv_free.patch mm-memblock.c-reversed-logic-in-memblock_discard.patch net-sunrpc-svcsock-fix-null-pointer-exception.patch netfilter-nat-fix-src-map-lookup.patch nfsd-limit-end-of-page-list-when-decoding-nfsv4-write.patch perf-core-fix-group-cpu-task-validation.patch perf-probe-fix-funcs-to-show-correct-symbols-for-offline-module.patch perf-x86-intel-rapl-make-package-handling-more-robust.patch revert-leds-handle-suspend-resume-in-heartbeat-trigger.patch timers-fix-excessive-granularity-of-new-timers-after-a-nohz-idle.patch tracing-call-clear_boot_tracer-at-lateinit_sync.patch tracing-fix-freeing-of-filter-in-create_filter-when-set_str-is-false.patch tracing-fix-kmemleak-in-tracing_map_array_free.patch x86-mm-fix-use-after-free-of-ldt_struct.patch --- ...f-output-for-users-with-quota-limits.patch | 57 +++++ ...rlong-names-in-cifs_open-cifs_lookup.patch | 88 ++++++++ ...c-check-fails-return-its-value-first.patch | 102 +++++++++ ...-crash-in-encoder-failure-error-path.patch | 47 ++++ ...-display-timing-controller-parameter.patch | 35 +++ ...v-sync-signal-polarity-configuration.patch | 36 +++ ...re-making-the-object-available-again.patch | 56 +++++ ...t-of-exe_file-causing-use-after-free.patch | 105 +++++++++ ...rofile-function-graph-entry-function.patch | 46 ++++ .../i2c-designware-fix-system-suspend.patch | 92 ++++++++ ..._code_data_elimination-is-configured.patch | 106 +++++++++ ...reeing-of-locked-page-with-madv_free.patch | 128 +++++++++++ ...c-reversed-logic-in-memblock_discard.patch | 38 ++++ ...c-svcsock-fix-null-pointer-exception.patch | 186 ++++++++++++++++ .../netfilter-nat-fix-src-map-lookup.patch | 68 ++++++ ...-page-list-when-decoding-nfsv4-write.patch | 48 ++++ ...f-core-fix-group-cpu-task-validation.patch | 182 ++++++++++++++++ ...w-correct-symbols-for-offline-module.patch | 107 +++++++++ ...pl-make-package-handling-more-robust.patch | 178 +++++++++++++++ ...-suspend-resume-in-heartbeat-trigger.patch | 99 +++++++++ queue-4.9/series | 25 +++ ...rity-of-new-timers-after-a-nohz-idle.patch | 206 ++++++++++++++++++ ...l-clear_boot_tracer-at-lateinit_sync.patch | 39 ++++ ...-create_filter-when-set_str-is-false.patch | 67 ++++++ ...x-kmemleak-in-tracing_map_array_free.patch | 88 ++++++++ ...-mm-fix-use-after-free-of-ldt_struct.patch | 173 +++++++++++++++ 26 files changed, 2402 insertions(+) create mode 100644 queue-4.9/cifs-fix-df-output-for-users-with-quota-limits.patch create mode 100644 queue-4.9/cifs-return-enametoolong-for-overlong-names-in-cifs_open-cifs_lookup.patch create mode 100644 queue-4.9/drm-atomic-if-the-atomic-check-fails-return-its-value-first.patch create mode 100644 queue-4.9/drm-rcar-du-fix-crash-in-encoder-failure-error-path.patch create mode 100644 queue-4.9/drm-rcar-du-fix-display-timing-controller-parameter.patch create mode 100644 queue-4.9/drm-rcar-du-fix-h-v-sync-signal-polarity-configuration.patch create mode 100644 queue-4.9/drm-release-driver-tracking-before-making-the-object-available-again.patch create mode 100644 queue-4.9/fork-fix-incorrect-fput-of-exe_file-causing-use-after-free.patch create mode 100644 queue-4.9/ftrace-check-for-null-ret_stack-on-profile-function-graph-entry-function.patch create mode 100644 queue-4.9/i2c-designware-fix-system-suspend.patch create mode 100644 queue-4.9/kbuild-linker-script-do-not-match-c-names-unless-ld_dead_code_data_elimination-is-configured.patch create mode 100644 queue-4.9/mm-madvise.c-fix-freeing-of-locked-page-with-madv_free.patch create mode 100644 queue-4.9/mm-memblock.c-reversed-logic-in-memblock_discard.patch create mode 100644 queue-4.9/net-sunrpc-svcsock-fix-null-pointer-exception.patch create mode 100644 queue-4.9/netfilter-nat-fix-src-map-lookup.patch create mode 100644 queue-4.9/nfsd-limit-end-of-page-list-when-decoding-nfsv4-write.patch create mode 100644 queue-4.9/perf-core-fix-group-cpu-task-validation.patch create mode 100644 queue-4.9/perf-probe-fix-funcs-to-show-correct-symbols-for-offline-module.patch create mode 100644 queue-4.9/perf-x86-intel-rapl-make-package-handling-more-robust.patch create mode 100644 queue-4.9/revert-leds-handle-suspend-resume-in-heartbeat-trigger.patch create mode 100644 queue-4.9/timers-fix-excessive-granularity-of-new-timers-after-a-nohz-idle.patch create mode 100644 queue-4.9/tracing-call-clear_boot_tracer-at-lateinit_sync.patch create mode 100644 queue-4.9/tracing-fix-freeing-of-filter-in-create_filter-when-set_str-is-false.patch create mode 100644 queue-4.9/tracing-fix-kmemleak-in-tracing_map_array_free.patch create mode 100644 queue-4.9/x86-mm-fix-use-after-free-of-ldt_struct.patch diff --git a/queue-4.9/cifs-fix-df-output-for-users-with-quota-limits.patch b/queue-4.9/cifs-fix-df-output-for-users-with-quota-limits.patch new file mode 100644 index 00000000000..717b220d8a1 --- /dev/null +++ b/queue-4.9/cifs-fix-df-output-for-users-with-quota-limits.patch @@ -0,0 +1,57 @@ +From 42bec214d8bd432be6d32a1acb0a9079ecd4d142 Mon Sep 17 00:00:00 2001 +From: Sachin Prabhu +Date: Thu, 3 Aug 2017 13:09:03 +0530 +Subject: cifs: Fix df output for users with quota limits + +From: Sachin Prabhu + +commit 42bec214d8bd432be6d32a1acb0a9079ecd4d142 upstream. + +The df for a SMB2 share triggers a GetInfo call for +FS_FULL_SIZE_INFORMATION. The values returned are used to populate +struct statfs. + +The problem is that none of the information returned by the call +contains the total blocks available on the filesystem. Instead we use +the blocks available to the user ie. quota limitation when filling out +statfs.f_blocks. The information returned does contain Actual free units +on the filesystem and is used to populate statfs.f_bfree. For users with +quota enabled, it can lead to situations where the total free space +reported is more than the total blocks on the system ending up with df +reports like the following + + # df -h /mnt/a +Filesystem Size Used Avail Use% Mounted on +//192.168.22.10/a 2.5G -2.3G 2.5G - /mnt/a + +To fix this problem, we instead populate both statfs.f_bfree with the +same value as statfs.f_bavail ie. CallerAvailableAllocationUnits. This +is similar to what is done already in the code for cifs and df now +reports the quota information for the user used to mount the share. + + # df --si /mnt/a +Filesystem Size Used Avail Use% Mounted on +//192.168.22.10/a 2.7G 101M 2.6G 4% /mnt/a + +Signed-off-by: Sachin Prabhu +Signed-off-by: Pierguido Lambri +Signed-off-by: Steve French +Signed-off-by: Greg Kroah-Hartman + +--- + fs/cifs/smb2pdu.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/fs/cifs/smb2pdu.c ++++ b/fs/cifs/smb2pdu.c +@@ -2930,8 +2930,8 @@ copy_fs_info_to_kstatfs(struct smb2_fs_f + kst->f_bsize = le32_to_cpu(pfs_inf->BytesPerSector) * + le32_to_cpu(pfs_inf->SectorsPerAllocationUnit); + kst->f_blocks = le64_to_cpu(pfs_inf->TotalAllocationUnits); +- kst->f_bfree = le64_to_cpu(pfs_inf->ActualAvailableAllocationUnits); +- kst->f_bavail = le64_to_cpu(pfs_inf->CallerAvailableAllocationUnits); ++ kst->f_bfree = kst->f_bavail = ++ le64_to_cpu(pfs_inf->CallerAvailableAllocationUnits); + return; + } + diff --git a/queue-4.9/cifs-return-enametoolong-for-overlong-names-in-cifs_open-cifs_lookup.patch b/queue-4.9/cifs-return-enametoolong-for-overlong-names-in-cifs_open-cifs_lookup.patch new file mode 100644 index 00000000000..238730895a0 --- /dev/null +++ b/queue-4.9/cifs-return-enametoolong-for-overlong-names-in-cifs_open-cifs_lookup.patch @@ -0,0 +1,88 @@ +From d3edede29f74d335f81d95a4588f5f136a9f7dcf Mon Sep 17 00:00:00 2001 +From: Ronnie Sahlberg +Date: Wed, 23 Aug 2017 14:48:14 +1000 +Subject: cifs: return ENAMETOOLONG for overlong names in cifs_open()/cifs_lookup() + +From: Ronnie Sahlberg + +commit d3edede29f74d335f81d95a4588f5f136a9f7dcf upstream. + +Add checking for the path component length and verify it is <= the maximum +that the server advertizes via FileFsAttributeInformation. + +With this patch cifs.ko will now return ENAMETOOLONG instead of ENOENT +when users to access an overlong path. + +To test this, try to cd into a (non-existing) directory on a CIFS share +that has a too long name: +cd /mnt/aaaaaaaaaaaaaaa... + +and it now should show a good error message from the shell: +bash: cd: /mnt/aaaaaaaaaaaaaaaa...aaaaaa: File name too long + +rh bz 1153996 + +Signed-off-by: Ronnie Sahlberg +Signed-off-by: Steve French +Signed-off-by: Greg Kroah-Hartman + +--- + fs/cifs/dir.c | 18 ++++++++++++------ + 1 file changed, 12 insertions(+), 6 deletions(-) + +--- a/fs/cifs/dir.c ++++ b/fs/cifs/dir.c +@@ -183,15 +183,20 @@ cifs_bp_rename_retry: + } + + /* ++ * Don't allow path components longer than the server max. + * Don't allow the separator character in a path component. + * The VFS will not allow "/", but "\" is allowed by posix. + */ + static int +-check_name(struct dentry *direntry) ++check_name(struct dentry *direntry, struct cifs_tcon *tcon) + { + struct cifs_sb_info *cifs_sb = CIFS_SB(direntry->d_sb); + int i; + ++ if (unlikely(direntry->d_name.len > ++ tcon->fsAttrInfo.MaxPathNameComponentLength)) ++ return -ENAMETOOLONG; ++ + if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS)) { + for (i = 0; i < direntry->d_name.len; i++) { + if (direntry->d_name.name[i] == '\\') { +@@ -489,10 +494,6 @@ cifs_atomic_open(struct inode *inode, st + return finish_no_open(file, res); + } + +- rc = check_name(direntry); +- if (rc) +- return rc; +- + xid = get_xid(); + + cifs_dbg(FYI, "parent inode = 0x%p name is: %pd and dentry = 0x%p\n", +@@ -505,6 +506,11 @@ cifs_atomic_open(struct inode *inode, st + } + + tcon = tlink_tcon(tlink); ++ ++ rc = check_name(direntry, tcon); ++ if (rc) ++ goto out_free_xid; ++ + server = tcon->ses->server; + + if (server->ops->new_lease_key) +@@ -765,7 +771,7 @@ cifs_lookup(struct inode *parent_dir_ino + } + pTcon = tlink_tcon(tlink); + +- rc = check_name(direntry); ++ rc = check_name(direntry, pTcon); + if (rc) + goto lookup_out; + diff --git a/queue-4.9/drm-atomic-if-the-atomic-check-fails-return-its-value-first.patch b/queue-4.9/drm-atomic-if-the-atomic-check-fails-return-its-value-first.patch new file mode 100644 index 00000000000..264921a53e9 --- /dev/null +++ b/queue-4.9/drm-atomic-if-the-atomic-check-fails-return-its-value-first.patch @@ -0,0 +1,102 @@ +From a0ffc51e20e90e0c1c2491de2b4b03f48b6caaba Mon Sep 17 00:00:00 2001 +From: Maarten Lankhorst +Date: Tue, 15 Aug 2017 11:57:06 +0200 +Subject: drm/atomic: If the atomic check fails, return its value first + +From: Maarten Lankhorst + +commit a0ffc51e20e90e0c1c2491de2b4b03f48b6caaba upstream. + +The last part of drm_atomic_check_only is testing whether we need to +fail with -EINVAL when modeset is not allowed, but forgets to return +the value when atomic_check() fails first. + +This results in -EDEADLK being replaced by -EINVAL, and the sanity +check in drm_modeset_drop_locks kicks in: + +[ 308.531734] ------------[ cut here ]------------ +[ 308.531791] WARNING: CPU: 0 PID: 1886 at drivers/gpu/drm/drm_modeset_lock.c:217 drm_modeset_drop_locks+0x33/0xc0 [drm] +[ 308.531828] Modules linked in: +[ 308.532050] CPU: 0 PID: 1886 Comm: kms_atomic Tainted: G U W 4.13.0-rc5-patser+ #5225 +[ 308.532082] Hardware name: NUC5i7RYB, BIOS RYBDWi35.86A.0246.2015.0309.1355 03/09/2015 +[ 308.532124] task: ffff8800cd9dae00 task.stack: ffff8800ca3b8000 +[ 308.532168] RIP: 0010:drm_modeset_drop_locks+0x33/0xc0 [drm] +[ 308.532189] RSP: 0018:ffff8800ca3bf980 EFLAGS: 00010282 +[ 308.532211] RAX: dffffc0000000000 RBX: ffff8800ca3bfaf8 RCX: 0000000013a171e6 +[ 308.532235] RDX: 1ffff10019477f69 RSI: ffffffffa8ba4fa0 RDI: ffff8800ca3bfb48 +[ 308.532258] RBP: ffff8800ca3bf998 R08: 0000000000000000 R09: 0000000000000003 +[ 308.532281] R10: 0000000079dbe066 R11: 00000000f760b34b R12: 0000000000000001 +[ 308.532304] R13: dffffc0000000000 R14: 00000000ffffffea R15: ffff880096889680 +[ 308.532328] FS: 00007ff00959cec0(0000) GS:ffff8800d4e00000(0000) knlGS:0000000000000000 +[ 308.532359] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +[ 308.532380] CR2: 0000000000000008 CR3: 00000000ca2e3000 CR4: 00000000003406f0 +[ 308.532402] Call Trace: +[ 308.532440] drm_mode_atomic_ioctl+0x19fa/0x1c00 [drm] +[ 308.532488] ? drm_atomic_set_property+0x1220/0x1220 [drm] +[ 308.532565] ? avc_has_extended_perms+0xc39/0xff0 +[ 308.532593] ? lock_downgrade+0x610/0x610 +[ 308.532640] ? drm_atomic_set_property+0x1220/0x1220 [drm] +[ 308.532680] drm_ioctl_kernel+0x154/0x1a0 [drm] +[ 308.532755] drm_ioctl+0x624/0x8f0 [drm] +[ 308.532858] ? drm_atomic_set_property+0x1220/0x1220 [drm] +[ 308.532976] ? drm_getunique+0x210/0x210 [drm] +[ 308.533061] do_vfs_ioctl+0xd92/0xe40 +[ 308.533121] ? ioctl_preallocate+0x1b0/0x1b0 +[ 308.533160] ? selinux_capable+0x20/0x20 +[ 308.533191] ? do_fcntl+0x1b1/0xbf0 +[ 308.533219] ? kasan_slab_free+0xa2/0xb0 +[ 308.533249] ? f_getown+0x4b/0xa0 +[ 308.533278] ? putname+0xcf/0xe0 +[ 308.533309] ? security_file_ioctl+0x57/0x90 +[ 308.533342] SyS_ioctl+0x4e/0x80 +[ 308.533374] entry_SYSCALL_64_fastpath+0x18/0xad +[ 308.533405] RIP: 0033:0x7ff00779e4d7 +[ 308.533431] RSP: 002b:00007fff66a043d8 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 +[ 308.533481] RAX: ffffffffffffffda RBX: 000000e7c7ca5910 RCX: 00007ff00779e4d7 +[ 308.533560] RDX: 00007fff66a04430 RSI: 00000000c03864bc RDI: 0000000000000003 +[ 308.533608] RBP: 00007ff007a5fb00 R08: 000000e7c7ca4620 R09: 000000e7c7ca5e60 +[ 308.533647] R10: 0000000000000001 R11: 0000000000000246 R12: 0000000000000070 +[ 308.533685] R13: 0000000000000000 R14: 0000000000000000 R15: 000000e7c7ca5930 +[ 308.533770] Code: ff df 55 48 89 e5 41 55 41 54 53 48 89 fb 48 83 c7 +50 48 89 fa 48 c1 ea 03 80 3c 02 00 74 05 e8 94 d4 16 e7 48 83 7b 50 00 +74 02 <0f> ff 4c 8d 6b 58 48 b8 00 00 00 00 00 fc ff df 4c 89 ea 48 c1 +[ 308.534086] ---[ end trace 77f11e53b1df44ad ]--- + +Solve this by adding the missing return. + +This is also a bugfix because we could end up rejecting updates with +-EINVAL because of a early -EDEADLK, while if atomic_check ran to +completion it might have downgraded the modeset to a fastset. + +Signed-off-by: Maarten Lankhorst +Testcase: kms_atomic +Link: https://patchwork.freedesktop.org/patch/msgid/20170815095706.23624-1-maarten.lankhorst@linux.intel.com +Fixes: d34f20d6e2f2 ("drm: Atomic modeset ioctl") +Reviewed-by: Daniel Vetter +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/gpu/drm/drm_atomic.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +--- a/drivers/gpu/drm/drm_atomic.c ++++ b/drivers/gpu/drm/drm_atomic.c +@@ -1386,6 +1386,9 @@ int drm_atomic_check_only(struct drm_ato + if (config->funcs->atomic_check) + ret = config->funcs->atomic_check(state->dev, state); + ++ if (ret) ++ return ret; ++ + if (!state->allow_modeset) { + for_each_crtc_in_state(state, crtc, crtc_state, i) { + if (drm_atomic_crtc_needs_modeset(crtc_state)) { +@@ -1396,7 +1399,7 @@ int drm_atomic_check_only(struct drm_ato + } + } + +- return ret; ++ return 0; + } + EXPORT_SYMBOL(drm_atomic_check_only); + diff --git a/queue-4.9/drm-rcar-du-fix-crash-in-encoder-failure-error-path.patch b/queue-4.9/drm-rcar-du-fix-crash-in-encoder-failure-error-path.patch new file mode 100644 index 00000000000..a54a47549c8 --- /dev/null +++ b/queue-4.9/drm-rcar-du-fix-crash-in-encoder-failure-error-path.patch @@ -0,0 +1,47 @@ +From 05ee29e94acf0d4b3998c3f93374952de8f90176 Mon Sep 17 00:00:00 2001 +From: Laurent Pinchart +Date: Mon, 3 Oct 2016 20:03:22 +0300 +Subject: drm: rcar-du: Fix crash in encoder failure error path + +From: Laurent Pinchart + +commit 05ee29e94acf0d4b3998c3f93374952de8f90176 upstream. + +When an encoder fails to initialize the driver prints an error message +to the kernel log. The message contains the name of the encoder's DT +node, which is NULL for internal encoders. Use the of_node_full_name() +macro to avoid dereferencing a NULL pointer, print the output number to +add more context to the error, and make sure we still own a reference to +the encoder's DT node by delaying the of_node_put() call. + +Signed-off-by: Laurent Pinchart +Reviewed-by: Gustavo Padovan +Signed-off-by: Thong Ho +Signed-off-by: Nhan Nguyen +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/gpu/drm/rcar-du/rcar_du_kms.c | 10 +++++----- + 1 file changed, 5 insertions(+), 5 deletions(-) + +--- a/drivers/gpu/drm/rcar-du/rcar_du_kms.c ++++ b/drivers/gpu/drm/rcar-du/rcar_du_kms.c +@@ -453,13 +453,13 @@ static int rcar_du_encoders_init_one(str + } + + ret = rcar_du_encoder_init(rcdu, enc_type, output, encoder, connector); +- of_node_put(encoder); +- of_node_put(connector); +- + if (ret && ret != -EPROBE_DEFER) + dev_warn(rcdu->dev, +- "failed to initialize encoder %s (%d), skipping\n", +- encoder->full_name, ret); ++ "failed to initialize encoder %s on output %u (%d), skipping\n", ++ of_node_full_name(encoder), output, ret); ++ ++ of_node_put(encoder); ++ of_node_put(connector); + + return ret; + } diff --git a/queue-4.9/drm-rcar-du-fix-display-timing-controller-parameter.patch b/queue-4.9/drm-rcar-du-fix-display-timing-controller-parameter.patch new file mode 100644 index 00000000000..5048aa13fba --- /dev/null +++ b/queue-4.9/drm-rcar-du-fix-display-timing-controller-parameter.patch @@ -0,0 +1,35 @@ +From 9cdced8a39c04cf798ddb2a27cb5952f7d39f633 Mon Sep 17 00:00:00 2001 +From: Koji Matsuoka +Date: Mon, 18 Apr 2016 16:31:30 +0900 +Subject: drm: rcar-du: Fix display timing controller parameter + +From: Koji Matsuoka + +commit 9cdced8a39c04cf798ddb2a27cb5952f7d39f633 upstream. + +There is a bug in the setting of the DES (Display Enable Signal) +register. This current setting occurs 1 dot left shift. The DES +register should be set minus one value about the specifying value +with H/W specification. This patch corrects it. + +Signed-off-by: Koji Matsuoka +Signed-off-by: Laurent Pinchart +Signed-off-by: Thong Ho +Signed-off-by: Nhan Nguyen +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/gpu/drm/rcar-du/rcar_du_crtc.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/gpu/drm/rcar-du/rcar_du_crtc.c ++++ b/drivers/gpu/drm/rcar-du/rcar_du_crtc.c +@@ -172,7 +172,7 @@ static void rcar_du_crtc_set_display_tim + mode->crtc_vsync_start - 1); + rcar_du_crtc_write(rcrtc, VCR, mode->crtc_vtotal - 1); + +- rcar_du_crtc_write(rcrtc, DESR, mode->htotal - mode->hsync_start); ++ rcar_du_crtc_write(rcrtc, DESR, mode->htotal - mode->hsync_start - 1); + rcar_du_crtc_write(rcrtc, DEWR, mode->hdisplay); + } + diff --git a/queue-4.9/drm-rcar-du-fix-h-v-sync-signal-polarity-configuration.patch b/queue-4.9/drm-rcar-du-fix-h-v-sync-signal-polarity-configuration.patch new file mode 100644 index 00000000000..cb84fe7d9c3 --- /dev/null +++ b/queue-4.9/drm-rcar-du-fix-h-v-sync-signal-polarity-configuration.patch @@ -0,0 +1,36 @@ +From fd1adef3bff0663c5ac31b45bc4a05fafd43d19b Mon Sep 17 00:00:00 2001 +From: Koji Matsuoka +Date: Mon, 16 May 2016 11:28:15 +0900 +Subject: drm: rcar-du: Fix H/V sync signal polarity configuration + +From: Koji Matsuoka + +commit fd1adef3bff0663c5ac31b45bc4a05fafd43d19b upstream. + +The VSL and HSL bits in the DSMR register set the corresponding +horizontal and vertical sync signal polarity to active high. The code +got it the wrong way around, fix it. + +Signed-off-by: Koji Matsuoka +Signed-off-by: Laurent Pinchart +Signed-off-by: Thong Ho +Signed-off-by: Nhan Nguyen +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/gpu/drm/rcar-du/rcar_du_crtc.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/drivers/gpu/drm/rcar-du/rcar_du_crtc.c ++++ b/drivers/gpu/drm/rcar-du/rcar_du_crtc.c +@@ -149,8 +149,8 @@ static void rcar_du_crtc_set_display_tim + rcar_du_group_write(rcrtc->group, rcrtc->index % 2 ? OTAR2 : OTAR, 0); + + /* Signal polarities */ +- value = ((mode->flags & DRM_MODE_FLAG_PVSYNC) ? 0 : DSMR_VSL) +- | ((mode->flags & DRM_MODE_FLAG_PHSYNC) ? 0 : DSMR_HSL) ++ value = ((mode->flags & DRM_MODE_FLAG_PVSYNC) ? DSMR_VSL : 0) ++ | ((mode->flags & DRM_MODE_FLAG_PHSYNC) ? DSMR_HSL : 0) + | DSMR_DIPM_DISP | DSMR_CSPM; + rcar_du_crtc_write(rcrtc, DSMR, value); + diff --git a/queue-4.9/drm-release-driver-tracking-before-making-the-object-available-again.patch b/queue-4.9/drm-release-driver-tracking-before-making-the-object-available-again.patch new file mode 100644 index 00000000000..97ed42171f8 --- /dev/null +++ b/queue-4.9/drm-release-driver-tracking-before-making-the-object-available-again.patch @@ -0,0 +1,56 @@ +From fe4600a548f2763dec91b3b27a1245c370ceee2a Mon Sep 17 00:00:00 2001 +From: Chris Wilson +Date: Sat, 19 Aug 2017 13:05:58 +0100 +Subject: drm: Release driver tracking before making the object available again +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Chris Wilson + +commit fe4600a548f2763dec91b3b27a1245c370ceee2a upstream. + +This is the same bug as we fixed in commit f6cd7daecff5 ("drm: Release +driver references to handle before making it available again"), but now +the exposure is via the PRIME lookup tables. If we remove the +object/handle from the PRIME lut, then a new request for the same +object/fd will generate a new handle, thus for a short window that +object is known to userspace by two different handles. Fix this by +releasing the driver tracking before PRIME. + +Fixes: 0ff926c7d4f0 ("drm/prime: add exported buffers to current fprivs +imported buffer list (v2)") +Signed-off-by: Chris Wilson +Cc: David Airlie +Cc: Daniel Vetter +Cc: Rob Clark +Cc: Ville Syrjälä +Cc: Thierry Reding +Reviewed-by: Daniel Vetter +Signed-off-by: Joonas Lahtinen +Link: https://patchwork.freedesktop.org/patch/msgid/20170819120558.6465-1-chris@chris-wilson.co.uk +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/gpu/drm/drm_gem.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +--- a/drivers/gpu/drm/drm_gem.c ++++ b/drivers/gpu/drm/drm_gem.c +@@ -255,13 +255,13 @@ drm_gem_object_release_handle(int id, vo + struct drm_gem_object *obj = ptr; + struct drm_device *dev = obj->dev; + ++ if (dev->driver->gem_close_object) ++ dev->driver->gem_close_object(obj, file_priv); ++ + if (drm_core_check_feature(dev, DRIVER_PRIME)) + drm_gem_remove_prime_handles(obj, file_priv); + drm_vma_node_revoke(&obj->vma_node, file_priv); + +- if (dev->driver->gem_close_object) +- dev->driver->gem_close_object(obj, file_priv); +- + drm_gem_object_handle_unreference_unlocked(obj); + + return 0; diff --git a/queue-4.9/fork-fix-incorrect-fput-of-exe_file-causing-use-after-free.patch b/queue-4.9/fork-fix-incorrect-fput-of-exe_file-causing-use-after-free.patch new file mode 100644 index 00000000000..d855feb42b8 --- /dev/null +++ b/queue-4.9/fork-fix-incorrect-fput-of-exe_file-causing-use-after-free.patch @@ -0,0 +1,105 @@ +From 2b7e8665b4ff51c034c55df3cff76518d1a9ee3a Mon Sep 17 00:00:00 2001 +From: Eric Biggers +Date: Fri, 25 Aug 2017 15:55:43 -0700 +Subject: fork: fix incorrect fput of ->exe_file causing use-after-free + +From: Eric Biggers + +commit 2b7e8665b4ff51c034c55df3cff76518d1a9ee3a upstream. + +Commit 7c051267931a ("mm, fork: make dup_mmap wait for mmap_sem for +write killable") made it possible to kill a forking task while it is +waiting to acquire its ->mmap_sem for write, in dup_mmap(). + +However, it was overlooked that this introduced an new error path before +a reference is taken on the mm_struct's ->exe_file. Since the +->exe_file of the new mm_struct was already set to the old ->exe_file by +the memcpy() in dup_mm(), it was possible for the mmput() in the error +path of dup_mm() to drop a reference to ->exe_file which was never +taken. + +This caused the struct file to later be freed prematurely. + +Fix it by updating mm_init() to NULL out the ->exe_file, in the same +place it clears other things like the list of mmaps. + +This bug was found by syzkaller. It can be reproduced using the +following C program: + + #define _GNU_SOURCE + #include + #include + #include + #include + #include + #include + + static void *mmap_thread(void *_arg) + { + for (;;) { + mmap(NULL, 0x1000000, PROT_READ, + MAP_POPULATE|MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); + } + } + + static void *fork_thread(void *_arg) + { + usleep(rand() % 10000); + fork(); + } + + int main(void) + { + fork(); + fork(); + fork(); + for (;;) { + if (fork() == 0) { + pthread_t t; + + pthread_create(&t, NULL, mmap_thread, NULL); + pthread_create(&t, NULL, fork_thread, NULL); + usleep(rand() % 10000); + syscall(__NR_exit_group, 0); + } + wait(NULL); + } + } + +No special kernel config options are needed. It usually causes a NULL +pointer dereference in __remove_shared_vm_struct() during exit, or in +dup_mmap() (which is usually inlined into copy_process()) during fork. +Both are due to a vm_area_struct's ->vm_file being used after it's +already been freed. + +Google Bug Id: 64772007 + +Link: http://lkml.kernel.org/r/20170823211408.31198-1-ebiggers3@gmail.com +Fixes: 7c051267931a ("mm, fork: make dup_mmap wait for mmap_sem for write killable") +Signed-off-by: Eric Biggers +Tested-by: Mark Rutland +Acked-by: Michal Hocko +Cc: Dmitry Vyukov +Cc: Ingo Molnar +Cc: Konstantin Khlebnikov +Cc: Oleg Nesterov +Cc: Peter Zijlstra +Cc: Vlastimil Babka +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/fork.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/kernel/fork.c ++++ b/kernel/fork.c +@@ -766,6 +766,7 @@ static struct mm_struct *mm_init(struct + mm_init_cpumask(mm); + mm_init_aio(mm); + mm_init_owner(mm, p); ++ RCU_INIT_POINTER(mm->exe_file, NULL); + mmu_notifier_mm_init(mm); + clear_tlb_flush_pending(mm); + #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS diff --git a/queue-4.9/ftrace-check-for-null-ret_stack-on-profile-function-graph-entry-function.patch b/queue-4.9/ftrace-check-for-null-ret_stack-on-profile-function-graph-entry-function.patch new file mode 100644 index 00000000000..a1932b0ea7c --- /dev/null +++ b/queue-4.9/ftrace-check-for-null-ret_stack-on-profile-function-graph-entry-function.patch @@ -0,0 +1,46 @@ +From a8f0f9e49956a74718874b800251455680085600 Mon Sep 17 00:00:00 2001 +From: "Steven Rostedt (VMware)" +Date: Thu, 17 Aug 2017 16:37:25 -0400 +Subject: ftrace: Check for null ret_stack on profile function graph entry function + +From: Steven Rostedt (VMware) + +commit a8f0f9e49956a74718874b800251455680085600 upstream. + +There's a small race when function graph shutsdown and the calling of the +registered function graph entry callback. The callback must not reference +the task's ret_stack without first checking that it is not NULL. Note, when +a ret_stack is allocated for a task, it stays allocated until the task exits. +The problem here, is that function_graph is shutdown, and a new task was +created, which doesn't have its ret_stack allocated. But since some of the +functions are still being traced, the callbacks can still be called. + +The normal function_graph code handles this, but starting with commit +8861dd303c ("ftrace: Access ret_stack->subtime only in the function +profiler") the profiler code references the ret_stack on function entry, but +doesn't check if it is NULL first. + +Link: https://bugzilla.kernel.org/show_bug.cgi?id=196611 + +Fixes: 8861dd303c ("ftrace: Access ret_stack->subtime only in the function profiler") +Reported-by: lilydjwg@gmail.com +Signed-off-by: Steven Rostedt (VMware) +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/trace/ftrace.c | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/kernel/trace/ftrace.c ++++ b/kernel/trace/ftrace.c +@@ -876,6 +876,10 @@ static int profile_graph_entry(struct ft + + function_profile_call(trace->func, 0, NULL, NULL); + ++ /* If function graph is shutting down, ret_stack can be NULL */ ++ if (!current->ret_stack) ++ return 0; ++ + if (index >= 0 && index < FTRACE_RETFUNC_DEPTH) + current->ret_stack[index].subtime = 0; + diff --git a/queue-4.9/i2c-designware-fix-system-suspend.patch b/queue-4.9/i2c-designware-fix-system-suspend.patch new file mode 100644 index 00000000000..8c446ff8fe0 --- /dev/null +++ b/queue-4.9/i2c-designware-fix-system-suspend.patch @@ -0,0 +1,92 @@ +From a23318feeff662c8d25d21623daebdd2e55ec221 Mon Sep 17 00:00:00 2001 +From: Ulf Hansson +Date: Wed, 9 Aug 2017 15:28:22 +0200 +Subject: i2c: designware: Fix system suspend + +From: Ulf Hansson + +commit a23318feeff662c8d25d21623daebdd2e55ec221 upstream. + +The commit 8503ff166504 ("i2c: designware: Avoid unnecessary resuming +during system suspend"), may suggest to the PM core to try out the so +called direct_complete path for system sleep. In this path, the PM core +treats a runtime suspended device as it's already in a proper low power +state for system sleep, which makes it skip calling the system sleep +callbacks for the device, except for the ->prepare() and the ->complete() +callbacks. + +However, the PM core may unset the direct_complete flag for a parent +device, in case its child device are being system suspended before. In this +scenario, the PM core invokes the system sleep callbacks, no matter if the +device is runtime suspended or not. + +Particularly in cases of an existing i2c slave device, the above path is +triggered, which breaks the assumption that the i2c device is always +runtime resumed whenever the dw_i2c_plat_suspend() is being called. + +More precisely, dw_i2c_plat_suspend() calls clk_core_disable() and +clk_core_unprepare(), for an already disabled/unprepared clock, leading to +a splat in the log about clocks calls being wrongly balanced and breaking +system sleep. + +To still allow the direct_complete path in cases when it's possible, but +also to keep the fix simple, let's runtime resume the i2c device in the +->suspend() callback, before continuing to put the device into low power +state. + +Note, in cases when the i2c device is attached to the ACPI PM domain, this +problem doesn't occur, because ACPI's ->suspend() callback, assigned to +acpi_subsys_suspend(), already calls pm_runtime_resume() for the device. + +It should also be noted that this change does not fix commit 8503ff166504 +("i2c: designware: Avoid unnecessary resuming during system suspend"). +Because for the non-ACPI case, the system sleep support was already broken +prior that point. + +Signed-off-by: Ulf Hansson +Acked-by: Rafael J. Wysocki +Tested-by: John Stultz +Tested-by: Jarkko Nikula +Acked-by: Jarkko Nikula +Reviewed-by: Mika Westerberg +Signed-off-by: Wolfram Sang +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/i2c/busses/i2c-designware-platdrv.c | 14 ++++++++++++-- + 1 file changed, 12 insertions(+), 2 deletions(-) + +--- a/drivers/i2c/busses/i2c-designware-platdrv.c ++++ b/drivers/i2c/busses/i2c-designware-platdrv.c +@@ -319,7 +319,7 @@ static void dw_i2c_plat_complete(struct + #endif + + #ifdef CONFIG_PM +-static int dw_i2c_plat_suspend(struct device *dev) ++static int dw_i2c_plat_runtime_suspend(struct device *dev) + { + struct platform_device *pdev = to_platform_device(dev); + struct dw_i2c_dev *i_dev = platform_get_drvdata(pdev); +@@ -343,11 +343,21 @@ static int dw_i2c_plat_resume(struct dev + return 0; + } + ++#ifdef CONFIG_PM_SLEEP ++static int dw_i2c_plat_suspend(struct device *dev) ++{ ++ pm_runtime_resume(dev); ++ return dw_i2c_plat_runtime_suspend(dev); ++} ++#endif ++ + static const struct dev_pm_ops dw_i2c_dev_pm_ops = { + .prepare = dw_i2c_plat_prepare, + .complete = dw_i2c_plat_complete, + SET_SYSTEM_SLEEP_PM_OPS(dw_i2c_plat_suspend, dw_i2c_plat_resume) +- SET_RUNTIME_PM_OPS(dw_i2c_plat_suspend, dw_i2c_plat_resume, NULL) ++ SET_RUNTIME_PM_OPS(dw_i2c_plat_runtime_suspend, ++ dw_i2c_plat_resume, ++ NULL) + }; + + #define DW_I2C_DEV_PMOPS (&dw_i2c_dev_pm_ops) diff --git a/queue-4.9/kbuild-linker-script-do-not-match-c-names-unless-ld_dead_code_data_elimination-is-configured.patch b/queue-4.9/kbuild-linker-script-do-not-match-c-names-unless-ld_dead_code_data_elimination-is-configured.patch new file mode 100644 index 00000000000..e77e69911d7 --- /dev/null +++ b/queue-4.9/kbuild-linker-script-do-not-match-c-names-unless-ld_dead_code_data_elimination-is-configured.patch @@ -0,0 +1,106 @@ +From cb87481ee89dbd6609e227afbf64900fb4e5c930 Mon Sep 17 00:00:00 2001 +From: Nicholas Piggin +Date: Wed, 26 Jul 2017 22:46:27 +1000 +Subject: kbuild: linker script do not match C names unless LD_DEAD_CODE_DATA_ELIMINATION is configured + +From: Nicholas Piggin + +commit cb87481ee89dbd6609e227afbf64900fb4e5c930 upstream. + +The .data and .bss sections were modified in the generic linker script to +pull in sections named .data., which are generated by gcc with +-ffunction-sections and -fdata-sections options. + +The problem with this pattern is it can also match section names that Linux +defines explicitly, e.g., .data.unlikely. This can cause Linux sections to +get moved into the wrong place. + +The way to avoid this is to use ".." separators for explicit section names +(the dot character is valid in a section name but not a C identifier). +However currently there are sections which don't follow this rule, so for +now just disable the wild card by default. + +Example: http://marc.info/?l=linux-arm-kernel&m=150106824024221&w=2 + +Fixes: b67067f1176df ("kbuild: allow archs to select link dead code/data elimination") +Signed-off-by: Nicholas Piggin +Signed-off-by: Masahiro Yamada +Signed-off-by: Greg Kroah-Hartman + +--- + include/asm-generic/vmlinux.lds.h | 38 ++++++++++++++++++++++++++------------ + 1 file changed, 26 insertions(+), 12 deletions(-) + +--- a/include/asm-generic/vmlinux.lds.h ++++ b/include/asm-generic/vmlinux.lds.h +@@ -60,6 +60,22 @@ + #define ALIGN_FUNCTION() . = ALIGN(8) + + /* ++ * LD_DEAD_CODE_DATA_ELIMINATION option enables -fdata-sections, which ++ * generates .data.identifier sections, which need to be pulled in with ++ * .data. We don't want to pull in .data..other sections, which Linux ++ * has defined. Same for text and bss. ++ */ ++#ifdef CONFIG_LD_DEAD_CODE_DATA_ELIMINATION ++#define TEXT_MAIN .text .text.[0-9a-zA-Z_]* ++#define DATA_MAIN .data .data.[0-9a-zA-Z_]* ++#define BSS_MAIN .bss .bss.[0-9a-zA-Z_]* ++#else ++#define TEXT_MAIN .text ++#define DATA_MAIN .data ++#define BSS_MAIN .bss ++#endif ++ ++/* + * Align to a 32 byte boundary equal to the + * alignment gcc 4.5 uses for a struct + */ +@@ -198,12 +214,9 @@ + + /* + * .data section +- * LD_DEAD_CODE_DATA_ELIMINATION option enables -fdata-sections generates +- * .data.identifier which needs to be pulled in with .data, but don't want to +- * pull in .data..stuff which has its own requirements. Same for bss. + */ + #define DATA_DATA \ +- *(.data .data.[0-9a-zA-Z_]*) \ ++ *(DATA_MAIN) \ + *(.ref.data) \ + *(.data..shared_aligned) /* percpu related */ \ + MEM_KEEP(init.data) \ +@@ -436,16 +449,17 @@ + VMLINUX_SYMBOL(__security_initcall_end) = .; \ + } + +-/* .text section. Map to function alignment to avoid address changes ++/* ++ * .text section. Map to function alignment to avoid address changes + * during second ld run in second ld pass when generating System.map +- * LD_DEAD_CODE_DATA_ELIMINATION option enables -ffunction-sections generates +- * .text.identifier which needs to be pulled in with .text , but some +- * architectures define .text.foo which is not intended to be pulled in here. +- * Those enabling LD_DEAD_CODE_DATA_ELIMINATION must ensure they don't have +- * conflicting section names, and must pull in .text.[0-9a-zA-Z_]* */ ++ * ++ * TEXT_MAIN here will match .text.fixup and .text.unlikely if dead ++ * code elimination is enabled, so these sections should be converted ++ * to use ".." first. ++ */ + #define TEXT_TEXT \ + ALIGN_FUNCTION(); \ +- *(.text.hot .text .text.fixup .text.unlikely) \ ++ *(.text.hot TEXT_MAIN .text.fixup .text.unlikely) \ + *(.ref.text) \ + MEM_KEEP(init.text) \ + MEM_KEEP(exit.text) \ +@@ -613,7 +627,7 @@ + BSS_FIRST_SECTIONS \ + *(.bss..page_aligned) \ + *(.dynbss) \ +- *(.bss .bss.[0-9a-zA-Z_]*) \ ++ *(BSS_MAIN) \ + *(COMMON) \ + } + diff --git a/queue-4.9/mm-madvise.c-fix-freeing-of-locked-page-with-madv_free.patch b/queue-4.9/mm-madvise.c-fix-freeing-of-locked-page-with-madv_free.patch new file mode 100644 index 00000000000..31a7a4e2385 --- /dev/null +++ b/queue-4.9/mm-madvise.c-fix-freeing-of-locked-page-with-madv_free.patch @@ -0,0 +1,128 @@ +From 263630e8d176d87308481ebdcd78ef9426739c6b Mon Sep 17 00:00:00 2001 +From: Eric Biggers +Date: Fri, 25 Aug 2017 15:55:39 -0700 +Subject: mm/madvise.c: fix freeing of locked page with MADV_FREE + +From: Eric Biggers + +commit 263630e8d176d87308481ebdcd78ef9426739c6b upstream. + +If madvise(..., MADV_FREE) split a transparent hugepage, it called +put_page() before unlock_page(). + +This was wrong because put_page() can free the page, e.g. if a +concurrent madvise(..., MADV_DONTNEED) has removed it from the memory +mapping. put_page() then rightfully complained about freeing a locked +page. + +Fix this by moving the unlock_page() before put_page(). + +This bug was found by syzkaller, which encountered the following splat: + + BUG: Bad page state in process syzkaller412798 pfn:1bd800 + page:ffffea0006f60000 count:0 mapcount:0 mapping: (null) index:0x20a00 + flags: 0x200000000040019(locked|uptodate|dirty|swapbacked) + raw: 0200000000040019 0000000000000000 0000000000020a00 00000000ffffffff + raw: ffffea0006f60020 ffffea0006f60020 0000000000000000 0000000000000000 + page dumped because: PAGE_FLAGS_CHECK_AT_FREE flag(s) set + bad because of flags: 0x1(locked) + Modules linked in: + CPU: 1 PID: 3037 Comm: syzkaller412798 Not tainted 4.13.0-rc5+ #35 + Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 + Call Trace: + __dump_stack lib/dump_stack.c:16 [inline] + dump_stack+0x194/0x257 lib/dump_stack.c:52 + bad_page+0x230/0x2b0 mm/page_alloc.c:565 + free_pages_check_bad+0x1f0/0x2e0 mm/page_alloc.c:943 + free_pages_check mm/page_alloc.c:952 [inline] + free_pages_prepare mm/page_alloc.c:1043 [inline] + free_pcp_prepare mm/page_alloc.c:1068 [inline] + free_hot_cold_page+0x8cf/0x12b0 mm/page_alloc.c:2584 + __put_single_page mm/swap.c:79 [inline] + __put_page+0xfb/0x160 mm/swap.c:113 + put_page include/linux/mm.h:814 [inline] + madvise_free_pte_range+0x137a/0x1ec0 mm/madvise.c:371 + walk_pmd_range mm/pagewalk.c:50 [inline] + walk_pud_range mm/pagewalk.c:108 [inline] + walk_p4d_range mm/pagewalk.c:134 [inline] + walk_pgd_range mm/pagewalk.c:160 [inline] + __walk_page_range+0xc3a/0x1450 mm/pagewalk.c:249 + walk_page_range+0x200/0x470 mm/pagewalk.c:326 + madvise_free_page_range.isra.9+0x17d/0x230 mm/madvise.c:444 + madvise_free_single_vma+0x353/0x580 mm/madvise.c:471 + madvise_dontneed_free mm/madvise.c:555 [inline] + madvise_vma mm/madvise.c:664 [inline] + SYSC_madvise mm/madvise.c:832 [inline] + SyS_madvise+0x7d3/0x13c0 mm/madvise.c:760 + entry_SYSCALL_64_fastpath+0x1f/0xbe + +Here is a C reproducer: + + #define _GNU_SOURCE + #include + #include + #include + + #define MADV_FREE 8 + #define PAGE_SIZE 4096 + + static void *mapping; + static const size_t mapping_size = 0x1000000; + + static void *madvise_thrproc(void *arg) + { + madvise(mapping, mapping_size, (long)arg); + } + + int main(void) + { + pthread_t t[2]; + + for (;;) { + mapping = mmap(NULL, mapping_size, PROT_WRITE, + MAP_POPULATE|MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); + + munmap(mapping + mapping_size / 2, PAGE_SIZE); + + pthread_create(&t[0], 0, madvise_thrproc, (void*)MADV_DONTNEED); + pthread_create(&t[1], 0, madvise_thrproc, (void*)MADV_FREE); + pthread_join(t[0], NULL); + pthread_join(t[1], NULL); + munmap(mapping, mapping_size); + } + } + +Note: to see the splat, CONFIG_TRANSPARENT_HUGEPAGE=y and +CONFIG_DEBUG_VM=y are needed. + +Google Bug Id: 64696096 + +Link: http://lkml.kernel.org/r/20170823205235.132061-1-ebiggers3@gmail.com +Fixes: 854e9ed09ded ("mm: support madvise(MADV_FREE)") +Signed-off-by: Eric Biggers +Acked-by: David Rientjes +Acked-by: Minchan Kim +Acked-by: Michal Hocko +Cc: Dmitry Vyukov +Cc: Hugh Dickins +Cc: Andrea Arcangeli +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + mm/madvise.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/mm/madvise.c ++++ b/mm/madvise.c +@@ -331,8 +331,8 @@ static int madvise_free_pte_range(pmd_t + pte_offset_map_lock(mm, pmd, addr, &ptl); + goto out; + } +- put_page(page); + unlock_page(page); ++ put_page(page); + pte = pte_offset_map_lock(mm, pmd, addr, &ptl); + pte--; + addr -= PAGE_SIZE; diff --git a/queue-4.9/mm-memblock.c-reversed-logic-in-memblock_discard.patch b/queue-4.9/mm-memblock.c-reversed-logic-in-memblock_discard.patch new file mode 100644 index 00000000000..c380df728da --- /dev/null +++ b/queue-4.9/mm-memblock.c-reversed-logic-in-memblock_discard.patch @@ -0,0 +1,38 @@ +From 91b540f98872a206ea1c49e4aa6ea8eed0886644 Mon Sep 17 00:00:00 2001 +From: Pavel Tatashin +Date: Fri, 25 Aug 2017 15:55:46 -0700 +Subject: mm/memblock.c: reversed logic in memblock_discard() + +From: Pavel Tatashin + +commit 91b540f98872a206ea1c49e4aa6ea8eed0886644 upstream. + +In recently introduced memblock_discard() there is a reversed logic bug. +Memory is freed of static array instead of dynamically allocated one. + +Link: http://lkml.kernel.org/r/1503511441-95478-2-git-send-email-pasha.tatashin@oracle.com +Fixes: 3010f876500f ("mm: discard memblock data later") +Signed-off-by: Pavel Tatashin +Reported-by: Woody Suwalski +Tested-by: Woody Suwalski +Acked-by: Michal Hocko +Cc: Vlastimil Babka +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + mm/memblock.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/mm/memblock.c ++++ b/mm/memblock.c +@@ -311,7 +311,7 @@ void __init memblock_discard(void) + __memblock_free_late(addr, size); + } + +- if (memblock.memory.regions == memblock_memory_init_regions) { ++ if (memblock.memory.regions != memblock_memory_init_regions) { + addr = __pa(memblock.memory.regions); + size = PAGE_ALIGN(sizeof(struct memblock_region) * + memblock.memory.max); diff --git a/queue-4.9/net-sunrpc-svcsock-fix-null-pointer-exception.patch b/queue-4.9/net-sunrpc-svcsock-fix-null-pointer-exception.patch new file mode 100644 index 00000000000..498da348a44 --- /dev/null +++ b/queue-4.9/net-sunrpc-svcsock-fix-null-pointer-exception.patch @@ -0,0 +1,186 @@ +From eebe53e87f97975ee58a21693e44797608bf679c Mon Sep 17 00:00:00 2001 +From: Vadim Lomovtsev +Date: Mon, 21 Aug 2017 07:23:07 -0400 +Subject: net: sunrpc: svcsock: fix NULL-pointer exception + +From: Vadim Lomovtsev + +commit eebe53e87f97975ee58a21693e44797608bf679c upstream. + +While running nfs/connectathon tests kernel NULL-pointer exception +has been observed due to races in svcsock.c. + +Race is appear when kernel accepts connection by kernel_accept +(which creates new socket) and start queuing ingress packets +to new socket. This happens in ksoftirq context which could run +concurrently on a different core while new socket setup is not done yet. + +The fix is to re-order socket user data init sequence and add +write/read barrier calls to be sure that we got proper values +for callback pointers before actually calling them. + +Test results: nfs/connectathon reports '0' failed tests for about 200+ iterations. + +Crash log: +---<-snip->--- +[ 6708.638984] Unable to handle kernel NULL pointer dereference at virtual address 00000000 +[ 6708.647093] pgd = ffff0000094e0000 +[ 6708.650497] [00000000] *pgd=0000010ffff90003, *pud=0000010ffff90003, *pmd=0000010ffff80003, *pte=0000000000000000 +[ 6708.660761] Internal error: Oops: 86000005 [#1] SMP +[ 6708.665630] Modules linked in: nfsv3 nfnetlink_queue nfnetlink_log nfnetlink rpcsec_gss_krb5 nfsv4 dns_resolver nfs fscache overlay xt_CONNSECMARK xt_SECMARK xt_conntrack iptable_security ip_tables ah4 xfrm4_mode_transport sctp tun binfmt_misc ext4 jbd2 mbcache loop tcp_diag udp_diag inet_diag rpcrdma ib_isert iscsi_target_mod ib_iser rdma_cm iw_cm libiscsi scsi_transport_iscsi ib_srpt target_core_mod ib_srp scsi_transport_srp ib_ipoib ib_ucm ib_uverbs ib_umad ib_cm ib_core nls_koi8_u nls_cp932 ts_kmp nf_conntrack_ipv4 nf_defrag_ipv4 nf_conntrack vfat fat ghash_ce sha2_ce sha1_ce cavium_rng_vf i2c_thunderx sg thunderx_edac i2c_smbus edac_core cavium_rng nfsd auth_rpcgss nfs_acl lockd grace sunrpc xfs libcrc32c nicvf nicpf ast i2c_algo_bit drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops +[ 6708.736446] ttm drm i2c_core thunder_bgx thunder_xcv mdio_thunder mdio_cavium dm_mirror dm_region_hash dm_log dm_mod [last unloaded: stap_3c300909c5b3f46dcacd49aab3334af_87021] +[ 6708.752275] CPU: 84 PID: 0 Comm: swapper/84 Tainted: G W OE 4.11.0-4.el7.aarch64 #1 +[ 6708.760787] Hardware name: www.cavium.com CRB-2S/CRB-2S, BIOS 0.3 Mar 13 2017 +[ 6708.767910] task: ffff810006842e80 task.stack: ffff81000689c000 +[ 6708.773822] PC is at 0x0 +[ 6708.776739] LR is at svc_data_ready+0x38/0x88 [sunrpc] +[ 6708.781866] pc : [<0000000000000000>] lr : [] pstate: 60000145 +[ 6708.789248] sp : ffff810ffbad3900 +[ 6708.792551] x29: ffff810ffbad3900 x28: ffff000008c73d58 +[ 6708.797853] x27: 0000000000000000 x26: ffff81000bbe1e00 +[ 6708.803156] x25: 0000000000000020 x24: ffff800f7410bf28 +[ 6708.808458] x23: ffff000008c63000 x22: ffff000008c63000 +[ 6708.813760] x21: ffff800f7410bf28 x20: ffff81000bbe1e00 +[ 6708.819063] x19: ffff810012412400 x18: 00000000d82a9df2 +[ 6708.824365] x17: 0000000000000000 x16: 0000000000000000 +[ 6708.829667] x15: 0000000000000000 x14: 0000000000000001 +[ 6708.834969] x13: 0000000000000000 x12: 722e736f622e676e +[ 6708.840271] x11: 00000000f814dd99 x10: 0000000000000000 +[ 6708.845573] x9 : 7374687225000000 x8 : 0000000000000000 +[ 6708.850875] x7 : 0000000000000000 x6 : 0000000000000000 +[ 6708.856177] x5 : 0000000000000028 x4 : 0000000000000000 +[ 6708.861479] x3 : 0000000000000000 x2 : 00000000e5000000 +[ 6708.866781] x1 : 0000000000000000 x0 : ffff81000bbe1e00 +[ 6708.872084] +[ 6708.873565] Process swapper/84 (pid: 0, stack limit = 0xffff81000689c000) +[ 6708.880341] Stack: (0xffff810ffbad3900 to 0xffff8100068a0000) +[ 6708.886075] Call trace: +[ 6708.888513] Exception stack(0xffff810ffbad3710 to 0xffff810ffbad3840) +[ 6708.894942] 3700: ffff810012412400 0001000000000000 +[ 6708.902759] 3720: ffff810ffbad3900 0000000000000000 0000000060000145 ffff800f79300000 +[ 6708.910577] 3740: ffff000009274d00 00000000000003ea 0000000000000015 ffff000008c63000 +[ 6708.918395] 3760: ffff810ffbad3830 ffff800f79300000 000000000000004d 0000000000000000 +[ 6708.926212] 3780: ffff810ffbad3890 ffff0000080f88dc ffff800f79300000 000000000000004d +[ 6708.934030] 37a0: ffff800f7930093c ffff000008c63000 0000000000000000 0000000000000140 +[ 6708.941848] 37c0: ffff000008c2c000 0000000000040b00 ffff81000bbe1e00 0000000000000000 +[ 6708.949665] 37e0: 00000000e5000000 0000000000000000 0000000000000000 0000000000000028 +[ 6708.957483] 3800: 0000000000000000 0000000000000000 0000000000000000 7374687225000000 +[ 6708.965300] 3820: 0000000000000000 00000000f814dd99 722e736f622e676e 0000000000000000 +[ 6708.973117] [< (null)>] (null) +[ 6708.977824] [] tcp_data_queue+0x754/0xc5c +[ 6708.983386] [] tcp_rcv_established+0x1a0/0x67c +[ 6708.989384] [] tcp_v4_do_rcv+0x15c/0x22c +[ 6708.994858] [] tcp_v4_rcv+0xaf0/0xb58 +[ 6709.000077] [] ip_local_deliver_finish+0x10c/0x254 +[ 6709.006419] [] ip_local_deliver+0xf0/0xfc +[ 6709.011980] [] ip_rcv_finish+0x208/0x3a4 +[ 6709.017454] [] ip_rcv+0x2dc/0x3c8 +[ 6709.022328] [] __netif_receive_skb_core+0x2f8/0xa0c +[ 6709.028758] [] __netif_receive_skb+0x38/0x84 +[ 6709.034580] [] netif_receive_skb_internal+0x68/0xdc +[ 6709.041010] [] napi_gro_receive+0xcc/0x1a8 +[ 6709.046690] [] nicvf_cq_intr_handler+0x59c/0x730 [nicvf] +[ 6709.053559] [] nicvf_poll+0x38/0xb8 [nicvf] +[ 6709.059295] [] net_rx_action+0x2f8/0x464 +[ 6709.064771] [] __do_softirq+0x11c/0x308 +[ 6709.070164] [] irq_exit+0x12c/0x174 +[ 6709.075206] [] __handle_domain_irq+0x78/0xc4 +[ 6709.081027] [] gic_handle_irq+0x94/0x190 +[ 6709.086501] Exception stack(0xffff81000689fdf0 to 0xffff81000689ff20) +[ 6709.092929] fde0: 0000810ff2ec0000 ffff000008c10000 +[ 6709.100747] fe00: ffff000008c70ef4 0000000000000001 0000000000000000 ffff810ffbad9b18 +[ 6709.108565] fe20: ffff810ffbad9c70 ffff8100169d3800 ffff810006843ab0 ffff81000689fe80 +[ 6709.116382] fe40: 0000000000000bd0 0000ffffdf979cd0 183f5913da192500 0000ffff8a254ce4 +[ 6709.124200] fe60: 0000ffff8a254b78 0000aaab10339808 0000000000000000 0000ffff8a0c2a50 +[ 6709.132018] fe80: 0000ffffdf979b10 ffff000008d6d450 ffff000008c10000 ffff000008d6d000 +[ 6709.139836] fea0: 0000000000000054 ffff000008cd3dbc 0000000000000000 0000000000000000 +[ 6709.147653] fec0: 0000000000000000 0000000000000000 0000000000000000 ffff81000689ff20 +[ 6709.155471] fee0: ffff000008085240 ffff81000689ff20 ffff000008085244 0000000060000145 +[ 6709.163289] ff00: ffff81000689ff10 ffff00000813f1e4 ffffffffffffffff ffff00000813f238 +[ 6709.171107] [] el1_irq+0xb4/0x140 +[ 6709.175976] [] arch_cpu_idle+0x44/0x11c +[ 6709.181368] [] default_idle_call+0x20/0x30 +[ 6709.187020] [] do_idle+0x158/0x1e4 +[ 6709.191973] [] cpu_startup_entry+0x2c/0x30 +[ 6709.197624] [] secondary_start_kernel+0x13c/0x160 +[ 6709.203878] [<0000000001bc71c4>] 0x1bc71c4 +[ 6709.207967] Code: bad PC value +[ 6709.211061] SMP: stopping secondary CPUs +[ 6709.218830] Starting crashdump kernel... +[ 6709.222749] Bye! +---<-snip>--- + +Signed-off-by: Vadim Lomovtsev +Reviewed-by: Jeff Layton +Signed-off-by: J. Bruce Fields +Signed-off-by: Greg Kroah-Hartman + +--- + net/sunrpc/svcsock.c | 22 ++++++++++++++++++++-- + 1 file changed, 20 insertions(+), 2 deletions(-) + +--- a/net/sunrpc/svcsock.c ++++ b/net/sunrpc/svcsock.c +@@ -408,6 +408,9 @@ static void svc_data_ready(struct sock * + dprintk("svc: socket %p(inet %p), busy=%d\n", + svsk, sk, + test_bit(XPT_BUSY, &svsk->sk_xprt.xpt_flags)); ++ ++ /* Refer to svc_setup_socket() for details. */ ++ rmb(); + svsk->sk_odata(sk); + if (!test_and_set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags)) + svc_xprt_enqueue(&svsk->sk_xprt); +@@ -424,6 +427,9 @@ static void svc_write_space(struct sock + if (svsk) { + dprintk("svc: socket %p(inet %p), write_space busy=%d\n", + svsk, sk, test_bit(XPT_BUSY, &svsk->sk_xprt.xpt_flags)); ++ ++ /* Refer to svc_setup_socket() for details. */ ++ rmb(); + svsk->sk_owspace(sk); + svc_xprt_enqueue(&svsk->sk_xprt); + } +@@ -748,8 +754,12 @@ static void svc_tcp_listen_data_ready(st + dprintk("svc: socket %p TCP (listen) state change %d\n", + sk, sk->sk_state); + +- if (svsk) ++ if (svsk) { ++ /* Refer to svc_setup_socket() for details. */ ++ rmb(); + svsk->sk_odata(sk); ++ } ++ + /* + * This callback may called twice when a new connection + * is established as a child socket inherits everything +@@ -782,6 +792,8 @@ static void svc_tcp_state_change(struct + if (!svsk) + printk("svc: socket %p: no user data\n", sk); + else { ++ /* Refer to svc_setup_socket() for details. */ ++ rmb(); + svsk->sk_ostate(sk); + if (sk->sk_state != TCP_ESTABLISHED) { + set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags); +@@ -1368,12 +1380,18 @@ static struct svc_sock *svc_setup_socket + return ERR_PTR(err); + } + +- inet->sk_user_data = svsk; + svsk->sk_sock = sock; + svsk->sk_sk = inet; + svsk->sk_ostate = inet->sk_state_change; + svsk->sk_odata = inet->sk_data_ready; + svsk->sk_owspace = inet->sk_write_space; ++ /* ++ * This barrier is necessary in order to prevent race condition ++ * with svc_data_ready(), svc_listen_data_ready() and others ++ * when calling callbacks above. ++ */ ++ wmb(); ++ inet->sk_user_data = svsk; + + /* Initialize the socket */ + if (sock->type == SOCK_DGRAM) diff --git a/queue-4.9/netfilter-nat-fix-src-map-lookup.patch b/queue-4.9/netfilter-nat-fix-src-map-lookup.patch new file mode 100644 index 00000000000..7e1db791747 --- /dev/null +++ b/queue-4.9/netfilter-nat-fix-src-map-lookup.patch @@ -0,0 +1,68 @@ +From 97772bcd56efa21d9d8976db6f205574ea602f51 Mon Sep 17 00:00:00 2001 +From: Florian Westphal +Date: Fri, 7 Jul 2017 13:07:17 +0200 +Subject: netfilter: nat: fix src map lookup + +From: Florian Westphal + +commit 97772bcd56efa21d9d8976db6f205574ea602f51 upstream. + +When doing initial conversion to rhashtable I replaced the bucket +walk with a single rhashtable_lookup_fast(). + +When moving to rhlist I failed to properly walk the list of identical +tuples, but that is what is needed for this to work correctly. +The table contains the original tuples, so the reply tuples are all +distinct. + +We currently decide that mapping is (not) in range only based on the +first entry, but in case its not we need to try the reply tuple of the +next entry until we either find an in-range mapping or we checked +all the entries. + +This bug makes nat core attempt collision resolution while it might be +able to use the mapping as-is. + +Fixes: 870190a9ec90 ("netfilter: nat: convert nat bysrc hash to rhashtable") +Reported-by: Jaco Kroon +Tested-by: Jaco Kroon +Signed-off-by: Florian Westphal +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Greg Kroah-Hartman + +--- + net/netfilter/nf_nat_core.c | 17 +++++++++-------- + 1 file changed, 9 insertions(+), 8 deletions(-) + +--- a/net/netfilter/nf_nat_core.c ++++ b/net/netfilter/nf_nat_core.c +@@ -225,20 +225,21 @@ find_appropriate_src(struct net *net, + .tuple = tuple, + .zone = zone + }; +- struct rhlist_head *hl; ++ struct rhlist_head *hl, *h; + + hl = rhltable_lookup(&nf_nat_bysource_table, &key, + nf_nat_bysource_params); +- if (!hl) +- return 0; + +- ct = container_of(hl, typeof(*ct), nat_bysource); ++ rhl_for_each_entry_rcu(ct, h, hl, nat_bysource) { ++ nf_ct_invert_tuplepr(result, ++ &ct->tuplehash[IP_CT_DIR_REPLY].tuple); ++ result->dst = tuple->dst; + +- nf_ct_invert_tuplepr(result, +- &ct->tuplehash[IP_CT_DIR_REPLY].tuple); +- result->dst = tuple->dst; ++ if (in_range(l3proto, l4proto, result, range)) ++ return 1; ++ } + +- return in_range(l3proto, l4proto, result, range); ++ return 0; + } + + /* For [FUTURE] fragmentation handling, we want the least-used diff --git a/queue-4.9/nfsd-limit-end-of-page-list-when-decoding-nfsv4-write.patch b/queue-4.9/nfsd-limit-end-of-page-list-when-decoding-nfsv4-write.patch new file mode 100644 index 00000000000..d7ecca3e559 --- /dev/null +++ b/queue-4.9/nfsd-limit-end-of-page-list-when-decoding-nfsv4-write.patch @@ -0,0 +1,48 @@ +From fc788f64f1f3eb31e87d4f53bcf1ab76590d5838 Mon Sep 17 00:00:00 2001 +From: Chuck Lever +Date: Fri, 18 Aug 2017 11:12:19 -0400 +Subject: nfsd: Limit end of page list when decoding NFSv4 WRITE + +From: Chuck Lever + +commit fc788f64f1f3eb31e87d4f53bcf1ab76590d5838 upstream. + +When processing an NFSv4 WRITE operation, argp->end should never +point past the end of the data in the final page of the page list. +Otherwise, nfsd4_decode_compound can walk into uninitialized memory. + +More critical, nfsd4_decode_write is failing to increment argp->pagelen +when it increments argp->pagelist. This can cause later xdr decoders +to assume more data is available than really is, which can cause server +crashes on malformed requests. + +Signed-off-by: Chuck Lever +Signed-off-by: J. Bruce Fields +Signed-off-by: Greg Kroah-Hartman + +--- + fs/nfsd/nfs4xdr.c | 6 ++---- + 1 file changed, 2 insertions(+), 4 deletions(-) + +--- a/fs/nfsd/nfs4xdr.c ++++ b/fs/nfsd/nfs4xdr.c +@@ -129,7 +129,7 @@ static void next_decode_page(struct nfsd + argp->p = page_address(argp->pagelist[0]); + argp->pagelist++; + if (argp->pagelen < PAGE_SIZE) { +- argp->end = argp->p + (argp->pagelen>>2); ++ argp->end = argp->p + XDR_QUADLEN(argp->pagelen); + argp->pagelen = 0; + } else { + argp->end = argp->p + (PAGE_SIZE>>2); +@@ -1246,9 +1246,7 @@ nfsd4_decode_write(struct nfsd4_compound + argp->pagelen -= pages * PAGE_SIZE; + len -= pages * PAGE_SIZE; + +- argp->p = (__be32 *)page_address(argp->pagelist[0]); +- argp->pagelist++; +- argp->end = argp->p + XDR_QUADLEN(PAGE_SIZE); ++ next_decode_page(argp); + } + argp->p += XDR_QUADLEN(len); + diff --git a/queue-4.9/perf-core-fix-group-cpu-task-validation.patch b/queue-4.9/perf-core-fix-group-cpu-task-validation.patch new file mode 100644 index 00000000000..50214968a04 --- /dev/null +++ b/queue-4.9/perf-core-fix-group-cpu-task-validation.patch @@ -0,0 +1,182 @@ +From 64aee2a965cf2954a038b5522f11d2cd2f0f8f3e Mon Sep 17 00:00:00 2001 +From: Mark Rutland +Date: Thu, 22 Jun 2017 15:41:38 +0100 +Subject: perf/core: Fix group {cpu,task} validation + +From: Mark Rutland + +commit 64aee2a965cf2954a038b5522f11d2cd2f0f8f3e upstream. + +Regardless of which events form a group, it does not make sense for the +events to target different tasks and/or CPUs, as this leaves the group +inconsistent and impossible to schedule. The core perf code assumes that +these are consistent across (successfully intialised) groups. + +Core perf code only verifies this when moving SW events into a HW +context. Thus, we can violate this requirement for pure SW groups and +pure HW groups, unless the relevant PMU driver happens to perform this +verification itself. These mismatched groups subsequently wreak havoc +elsewhere. + +For example, we handle watchpoints as SW events, and reserve watchpoint +HW on a per-CPU basis at pmu::event_init() time to ensure that any event +that is initialised is guaranteed to have a slot at pmu::add() time. +However, the core code only checks the group leader's cpu filter (via +event_filter_match()), and can thus install follower events onto CPUs +violating thier (mismatched) CPU filters, potentially installing them +into a CPU without sufficient reserved slots. + +This can be triggered with the below test case, resulting in warnings +from arch backends. + + #define _GNU_SOURCE + #include + #include + #include + #include + #include + #include + #include + + static int perf_event_open(struct perf_event_attr *attr, pid_t pid, int cpu, + int group_fd, unsigned long flags) + { + return syscall(__NR_perf_event_open, attr, pid, cpu, group_fd, flags); + } + + char watched_char; + + struct perf_event_attr wp_attr = { + .type = PERF_TYPE_BREAKPOINT, + .bp_type = HW_BREAKPOINT_RW, + .bp_addr = (unsigned long)&watched_char, + .bp_len = 1, + .size = sizeof(wp_attr), + }; + + int main(int argc, char *argv[]) + { + int leader, ret; + cpu_set_t cpus; + + /* + * Force use of CPU0 to ensure our CPU0-bound events get scheduled. + */ + CPU_ZERO(&cpus); + CPU_SET(0, &cpus); + ret = sched_setaffinity(0, sizeof(cpus), &cpus); + if (ret) { + printf("Unable to set cpu affinity\n"); + return 1; + } + + /* open leader event, bound to this task, CPU0 only */ + leader = perf_event_open(&wp_attr, 0, 0, -1, 0); + if (leader < 0) { + printf("Couldn't open leader: %d\n", leader); + return 1; + } + + /* + * Open a follower event that is bound to the same task, but a + * different CPU. This means that the group should never be possible to + * schedule. + */ + ret = perf_event_open(&wp_attr, 0, 1, leader, 0); + if (ret < 0) { + printf("Couldn't open mismatched follower: %d\n", ret); + return 1; + } else { + printf("Opened leader/follower with mismastched CPUs\n"); + } + + /* + * Open as many independent events as we can, all bound to the same + * task, CPU0 only. + */ + do { + ret = perf_event_open(&wp_attr, 0, 0, -1, 0); + } while (ret >= 0); + + /* + * Force enable/disble all events to trigger the erronoeous + * installation of the follower event. + */ + printf("Opened all events. Toggling..\n"); + for (;;) { + prctl(PR_TASK_PERF_EVENTS_DISABLE, 0, 0, 0, 0); + prctl(PR_TASK_PERF_EVENTS_ENABLE, 0, 0, 0, 0); + } + + return 0; + } + +Fix this by validating this requirement regardless of whether we're +moving events. + +Signed-off-by: Mark Rutland +Signed-off-by: Peter Zijlstra (Intel) +Cc: Alexander Shishkin +Cc: Arnaldo Carvalho de Melo +Cc: Linus Torvalds +Cc: Peter Zijlstra +Cc: Thomas Gleixner +Cc: Zhou Chengming +Link: http://lkml.kernel.org/r/1498142498-15758-1-git-send-email-mark.rutland@arm.com +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/events/core.c | 39 +++++++++++++++++++-------------------- + 1 file changed, 19 insertions(+), 20 deletions(-) + +--- a/kernel/events/core.c ++++ b/kernel/events/core.c +@@ -9786,28 +9786,27 @@ SYSCALL_DEFINE5(perf_event_open, + goto err_context; + + /* +- * Do not allow to attach to a group in a different +- * task or CPU context: ++ * Make sure we're both events for the same CPU; ++ * grouping events for different CPUs is broken; since ++ * you can never concurrently schedule them anyhow. + */ +- if (move_group) { +- /* +- * Make sure we're both on the same task, or both +- * per-cpu events. +- */ +- if (group_leader->ctx->task != ctx->task) +- goto err_context; ++ if (group_leader->cpu != event->cpu) ++ goto err_context; ++ ++ /* ++ * Make sure we're both on the same task, or both ++ * per-CPU events. ++ */ ++ if (group_leader->ctx->task != ctx->task) ++ goto err_context; + +- /* +- * Make sure we're both events for the same CPU; +- * grouping events for different CPUs is broken; since +- * you can never concurrently schedule them anyhow. +- */ +- if (group_leader->cpu != event->cpu) +- goto err_context; +- } else { +- if (group_leader->ctx != ctx) +- goto err_context; +- } ++ /* ++ * Do not allow to attach to a group in a different task ++ * or CPU context. If we're moving SW events, we'll fix ++ * this up later, so allow that. ++ */ ++ if (!move_group && group_leader->ctx != ctx) ++ goto err_context; + + /* + * Only a group leader can be exclusive or pinned diff --git a/queue-4.9/perf-probe-fix-funcs-to-show-correct-symbols-for-offline-module.patch b/queue-4.9/perf-probe-fix-funcs-to-show-correct-symbols-for-offline-module.patch new file mode 100644 index 00000000000..1d49ba41f13 --- /dev/null +++ b/queue-4.9/perf-probe-fix-funcs-to-show-correct-symbols-for-offline-module.patch @@ -0,0 +1,107 @@ +From eebc509b20881b92d62e317b2c073e57c5f200f0 Mon Sep 17 00:00:00 2001 +From: Masami Hiramatsu +Date: Wed, 4 Jan 2017 12:29:05 +0900 +Subject: perf probe: Fix --funcs to show correct symbols for offline module + +From: Masami Hiramatsu + +commit eebc509b20881b92d62e317b2c073e57c5f200f0 upstream. + +Fix --funcs (-F) option to show correct symbols for offline module. +Since previous perf-probe uses machine__findnew_module_map() for offline +module, even if user passes a module file (with full path) which is for +other architecture, perf-probe always tries to load symbol map for +current kernel module. + +This fix uses dso__new_map() to load the map from given binary as same +as a map for user applications. + +Signed-off-by: Masami Hiramatsu +Cc: Jiri Olsa +Cc: Namhyung Kim +Cc: Peter Zijlstra +Link: http://lkml.kernel.org/r/148350053478.19001.15435255244512631545.stgit@devbox +Signed-off-by: Arnaldo Carvalho de Melo +Cc: Krister Johansen +Signed-off-by: Greg Kroah-Hartman + +--- + tools/perf/util/probe-event.c | 25 ++++++------------------- + 1 file changed, 6 insertions(+), 19 deletions(-) + +--- a/tools/perf/util/probe-event.c ++++ b/tools/perf/util/probe-event.c +@@ -163,7 +163,7 @@ static struct map *kernel_get_module_map + + /* A file path -- this is an offline module */ + if (module && strchr(module, '/')) +- return machine__findnew_module_map(host_machine, 0, module); ++ return dso__new_map(module); + + if (!module) + module = "kernel"; +@@ -173,6 +173,7 @@ static struct map *kernel_get_module_map + if (strncmp(pos->dso->short_name + 1, module, + pos->dso->short_name_len - 2) == 0 && + module[pos->dso->short_name_len - 2] == '\0') { ++ map__get(pos); + return pos; + } + } +@@ -188,15 +189,6 @@ struct map *get_target_map(const char *t + return kernel_get_module_map(target); + } + +-static void put_target_map(struct map *map, bool user) +-{ +- if (map && user) { +- /* Only the user map needs to be released */ +- map__put(map); +- } +-} +- +- + static int convert_exec_to_group(const char *exec, char **result) + { + char *ptr1, *ptr2, *exec_copy; +@@ -412,7 +404,7 @@ static int find_alternative_probe_point( + } + + out: +- put_target_map(map, uprobes); ++ map__put(map); + return ret; + + } +@@ -2944,7 +2936,7 @@ static int find_probe_trace_events_from_ + } + + out: +- put_target_map(map, pev->uprobes); ++ map__put(map); + free(syms); + return ret; + +@@ -3437,10 +3429,7 @@ int show_available_funcs(const char *tar + return ret; + + /* Get a symbol map */ +- if (user) +- map = dso__new_map(target); +- else +- map = kernel_get_module_map(target); ++ map = get_target_map(target, user); + if (!map) { + pr_err("Failed to get a map for %s\n", (target) ? : "kernel"); + return -EINVAL; +@@ -3472,9 +3461,7 @@ int show_available_funcs(const char *tar + } + + end: +- if (user) { +- map__put(map); +- } ++ map__put(map); + exit_probe_symbol_maps(); + + return ret; diff --git a/queue-4.9/perf-x86-intel-rapl-make-package-handling-more-robust.patch b/queue-4.9/perf-x86-intel-rapl-make-package-handling-more-robust.patch new file mode 100644 index 00000000000..720cf3da589 --- /dev/null +++ b/queue-4.9/perf-x86-intel-rapl-make-package-handling-more-robust.patch @@ -0,0 +1,178 @@ +From dd86e373e09fb16b83e8adf5c48c421a4ca76468 Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Tue, 31 Jan 2017 23:58:38 +0100 +Subject: perf/x86/intel/rapl: Make package handling more robust + +From: Thomas Gleixner + +commit dd86e373e09fb16b83e8adf5c48c421a4ca76468 upstream. + +The package management code in RAPL relies on package mapping being +available before a CPU is started. This changed with: + + 9d85eb9119f4 ("x86/smpboot: Make logical package management more robust") + +because the ACPI/BIOS information turned out to be unreliable, but that +left RAPL in broken state. This was not noticed because on a regular boot +all CPUs are online before RAPL is initialized. + +A possible fix would be to reintroduce the mess which allocates a package +data structure in CPU prepare and when it turns out to already exist in +starting throw it away later in the CPU online callback. But that's a +horrible hack and not required at all because RAPL becomes functional for +perf only in the CPU online callback. That's correct because user space is +not yet informed about the CPU being onlined, so nothing caan rely on RAPL +being available on that particular CPU. + +Move the allocation to the CPU online callback and simplify the hotplug +handling. At this point the package mapping is established and correct. + +This also adds a missing check for available package data in the +event_init() function. + +Reported-by: Yasuaki Ishimatsu +Signed-off-by: Thomas Gleixner +Cc: Alexander Shishkin +Cc: Arnaldo Carvalho de Melo +Cc: Jiri Olsa +Cc: Linus Torvalds +Cc: Peter Zijlstra +Cc: Sebastian Siewior +Cc: Stephane Eranian +Cc: Vince Weaver +Fixes: 9d85eb9119f4 ("x86/smpboot: Make logical package management more robust") +Link: http://lkml.kernel.org/r/20170131230141.212593966@linutronix.de +Signed-off-by: Ingo Molnar +[ jwang: backport to 4.9 fix Null pointer deref during hotplug cpu.] +Signed-off-by: Jack Wang +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/events/intel/rapl.c | 58 ++++++++++++++++++------------------------- + include/linux/cpuhotplug.h | 1 + 2 files changed, 25 insertions(+), 34 deletions(-) + +--- a/arch/x86/events/intel/rapl.c ++++ b/arch/x86/events/intel/rapl.c +@@ -161,7 +161,13 @@ static u64 rapl_timer_ms; + + static inline struct rapl_pmu *cpu_to_rapl_pmu(unsigned int cpu) + { +- return rapl_pmus->pmus[topology_logical_package_id(cpu)]; ++ unsigned int pkgid = topology_logical_package_id(cpu); ++ ++ /* ++ * The unsigned check also catches the '-1' return value for non ++ * existent mappings in the topology map. ++ */ ++ return pkgid < rapl_pmus->maxpkg ? rapl_pmus->pmus[pkgid] : NULL; + } + + static inline u64 rapl_read_counter(struct perf_event *event) +@@ -402,6 +408,8 @@ static int rapl_pmu_event_init(struct pe + + /* must be done before validate_group */ + pmu = cpu_to_rapl_pmu(event->cpu); ++ if (!pmu) ++ return -EINVAL; + event->cpu = pmu->cpu; + event->pmu_private = pmu; + event->hw.event_base = msr; +@@ -585,6 +593,19 @@ static int rapl_cpu_online(unsigned int + struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu); + int target; + ++ if (!pmu) { ++ pmu = kzalloc_node(sizeof(*pmu), GFP_KERNEL, cpu_to_node(cpu)); ++ if (!pmu) ++ return -ENOMEM; ++ ++ raw_spin_lock_init(&pmu->lock); ++ INIT_LIST_HEAD(&pmu->active_list); ++ pmu->pmu = &rapl_pmus->pmu; ++ pmu->timer_interval = ms_to_ktime(rapl_timer_ms); ++ rapl_hrtimer_init(pmu); ++ ++ rapl_pmus->pmus[topology_logical_package_id(cpu)] = pmu; ++ } + /* + * Check if there is an online cpu in the package which collects rapl + * events already. +@@ -598,27 +619,6 @@ static int rapl_cpu_online(unsigned int + return 0; + } + +-static int rapl_cpu_prepare(unsigned int cpu) +-{ +- struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu); +- +- if (pmu) +- return 0; +- +- pmu = kzalloc_node(sizeof(*pmu), GFP_KERNEL, cpu_to_node(cpu)); +- if (!pmu) +- return -ENOMEM; +- +- raw_spin_lock_init(&pmu->lock); +- INIT_LIST_HEAD(&pmu->active_list); +- pmu->pmu = &rapl_pmus->pmu; +- pmu->timer_interval = ms_to_ktime(rapl_timer_ms); +- pmu->cpu = -1; +- rapl_hrtimer_init(pmu); +- rapl_pmus->pmus[topology_logical_package_id(cpu)] = pmu; +- return 0; +-} +- + static int rapl_check_hw_unit(bool apply_quirk) + { + u64 msr_rapl_power_unit_bits; +@@ -804,28 +804,21 @@ static int __init rapl_pmu_init(void) + * Install callbacks. Core will call them for each online cpu. + */ + +- ret = cpuhp_setup_state(CPUHP_PERF_X86_RAPL_PREP, "PERF_X86_RAPL_PREP", +- rapl_cpu_prepare, NULL); +- if (ret) +- goto out; +- + ret = cpuhp_setup_state(CPUHP_AP_PERF_X86_RAPL_ONLINE, + "AP_PERF_X86_RAPL_ONLINE", + rapl_cpu_online, rapl_cpu_offline); + if (ret) +- goto out1; ++ goto out; + + ret = perf_pmu_register(&rapl_pmus->pmu, "power", -1); + if (ret) +- goto out2; ++ goto out1; + + rapl_advertise(); + return 0; + +-out2: +- cpuhp_remove_state(CPUHP_AP_PERF_X86_RAPL_ONLINE); + out1: +- cpuhp_remove_state(CPUHP_PERF_X86_RAPL_PREP); ++ cpuhp_remove_state(CPUHP_AP_PERF_X86_RAPL_ONLINE); + out: + pr_warn("Initialization failed (%d), disabled\n", ret); + cleanup_rapl_pmus(); +@@ -836,7 +829,6 @@ module_init(rapl_pmu_init); + static void __exit intel_rapl_exit(void) + { + cpuhp_remove_state_nocalls(CPUHP_AP_PERF_X86_RAPL_ONLINE); +- cpuhp_remove_state_nocalls(CPUHP_PERF_X86_RAPL_PREP); + perf_pmu_unregister(&rapl_pmus->pmu); + cleanup_rapl_pmus(); + } +--- a/include/linux/cpuhotplug.h ++++ b/include/linux/cpuhotplug.h +@@ -10,7 +10,6 @@ enum cpuhp_state { + CPUHP_PERF_X86_PREPARE, + CPUHP_PERF_X86_UNCORE_PREP, + CPUHP_PERF_X86_AMD_UNCORE_PREP, +- CPUHP_PERF_X86_RAPL_PREP, + CPUHP_PERF_BFIN, + CPUHP_PERF_POWER, + CPUHP_PERF_SUPERH, diff --git a/queue-4.9/revert-leds-handle-suspend-resume-in-heartbeat-trigger.patch b/queue-4.9/revert-leds-handle-suspend-resume-in-heartbeat-trigger.patch new file mode 100644 index 00000000000..f6ce3e33430 --- /dev/null +++ b/queue-4.9/revert-leds-handle-suspend-resume-in-heartbeat-trigger.patch @@ -0,0 +1,99 @@ +From 436c4c45b5b9562b59cedbb51b7343ab4a6dd8cc Mon Sep 17 00:00:00 2001 +From: Zhang Bo +Date: Tue, 13 Jun 2017 10:39:20 +0800 +Subject: Revert "leds: handle suspend/resume in heartbeat trigger" + +From: Zhang Bo + +commit 436c4c45b5b9562b59cedbb51b7343ab4a6dd8cc upstream. + +This reverts commit 5ab92a7cb82c66bf30685583a38a18538e3807db. + +System cannot enter suspend mode because of heartbeat led trigger. +In autosleep_wq, try_to_suspend function will try to enter suspend +mode in specific period. it will get wakeup_count then call pm_notifier +chain callback function and freeze processes. +Heartbeat_pm_notifier is called and it call led_trigger_unregister to +change the trigger of led device to none. It will send uevent message +and the wakeup source count changed. As wakeup_count changed, suspend +will abort. + +Fixes: 5ab92a7cb82c ("leds: handle suspend/resume in heartbeat trigger") +Signed-off-by: Zhang Bo +Acked-by: Pavel Machek +Reviewed-by: Linus Walleij +Signed-off-by: Jacek Anaszewski +Cc: Geert Uytterhoeven +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/leds/trigger/ledtrig-heartbeat.c | 31 ------------------------------- + 1 file changed, 31 deletions(-) + +--- a/drivers/leds/trigger/ledtrig-heartbeat.c ++++ b/drivers/leds/trigger/ledtrig-heartbeat.c +@@ -19,7 +19,6 @@ + #include + #include + #include +-#include + #include "../leds.h" + + static int panic_heartbeats; +@@ -155,30 +154,6 @@ static struct led_trigger heartbeat_led_ + .deactivate = heartbeat_trig_deactivate, + }; + +-static int heartbeat_pm_notifier(struct notifier_block *nb, +- unsigned long pm_event, void *unused) +-{ +- int rc; +- +- switch (pm_event) { +- case PM_SUSPEND_PREPARE: +- case PM_HIBERNATION_PREPARE: +- case PM_RESTORE_PREPARE: +- led_trigger_unregister(&heartbeat_led_trigger); +- break; +- case PM_POST_SUSPEND: +- case PM_POST_HIBERNATION: +- case PM_POST_RESTORE: +- rc = led_trigger_register(&heartbeat_led_trigger); +- if (rc) +- pr_err("could not re-register heartbeat trigger\n"); +- break; +- default: +- break; +- } +- return NOTIFY_DONE; +-} +- + static int heartbeat_reboot_notifier(struct notifier_block *nb, + unsigned long code, void *unused) + { +@@ -193,10 +168,6 @@ static int heartbeat_panic_notifier(stru + return NOTIFY_DONE; + } + +-static struct notifier_block heartbeat_pm_nb = { +- .notifier_call = heartbeat_pm_notifier, +-}; +- + static struct notifier_block heartbeat_reboot_nb = { + .notifier_call = heartbeat_reboot_notifier, + }; +@@ -213,14 +184,12 @@ static int __init heartbeat_trig_init(vo + atomic_notifier_chain_register(&panic_notifier_list, + &heartbeat_panic_nb); + register_reboot_notifier(&heartbeat_reboot_nb); +- register_pm_notifier(&heartbeat_pm_nb); + } + return rc; + } + + static void __exit heartbeat_trig_exit(void) + { +- unregister_pm_notifier(&heartbeat_pm_nb); + unregister_reboot_notifier(&heartbeat_reboot_nb); + atomic_notifier_chain_unregister(&panic_notifier_list, + &heartbeat_panic_nb); diff --git a/queue-4.9/series b/queue-4.9/series index bde31e8d311..49ae4f287b4 100644 --- a/queue-4.9/series +++ b/queue-4.9/series @@ -36,3 +36,28 @@ alsa-hda-add-stereo-mic-quirk-for-lenovo-g50-70-17aa-3978.patch alsa-firewire-fix-null-pointer-dereference-when-releasing-uninitialized-data-of-iso-resource.patch arcv2-pae40-explicitly-set-msb-counterpart-of-slc-region-ops-addresses.patch mm-shmem-fix-handling-sys-kernel-mm-transparent_hugepage-shmem_enabled.patch +i2c-designware-fix-system-suspend.patch +mm-madvise.c-fix-freeing-of-locked-page-with-madv_free.patch +fork-fix-incorrect-fput-of-exe_file-causing-use-after-free.patch +mm-memblock.c-reversed-logic-in-memblock_discard.patch +drm-release-driver-tracking-before-making-the-object-available-again.patch +drm-atomic-if-the-atomic-check-fails-return-its-value-first.patch +drm-rcar-du-fix-crash-in-encoder-failure-error-path.patch +drm-rcar-du-fix-display-timing-controller-parameter.patch +drm-rcar-du-fix-h-v-sync-signal-polarity-configuration.patch +tracing-call-clear_boot_tracer-at-lateinit_sync.patch +tracing-fix-kmemleak-in-tracing_map_array_free.patch +tracing-fix-freeing-of-filter-in-create_filter-when-set_str-is-false.patch +kbuild-linker-script-do-not-match-c-names-unless-ld_dead_code_data_elimination-is-configured.patch +cifs-fix-df-output-for-users-with-quota-limits.patch +cifs-return-enametoolong-for-overlong-names-in-cifs_open-cifs_lookup.patch +nfsd-limit-end-of-page-list-when-decoding-nfsv4-write.patch +ftrace-check-for-null-ret_stack-on-profile-function-graph-entry-function.patch +perf-core-fix-group-cpu-task-validation.patch +perf-probe-fix-funcs-to-show-correct-symbols-for-offline-module.patch +perf-x86-intel-rapl-make-package-handling-more-robust.patch +timers-fix-excessive-granularity-of-new-timers-after-a-nohz-idle.patch +x86-mm-fix-use-after-free-of-ldt_struct.patch +net-sunrpc-svcsock-fix-null-pointer-exception.patch +revert-leds-handle-suspend-resume-in-heartbeat-trigger.patch +netfilter-nat-fix-src-map-lookup.patch diff --git a/queue-4.9/timers-fix-excessive-granularity-of-new-timers-after-a-nohz-idle.patch b/queue-4.9/timers-fix-excessive-granularity-of-new-timers-after-a-nohz-idle.patch new file mode 100644 index 00000000000..2b38b9d45da --- /dev/null +++ b/queue-4.9/timers-fix-excessive-granularity-of-new-timers-after-a-nohz-idle.patch @@ -0,0 +1,206 @@ +From 2fe59f507a65dbd734b990a11ebc7488f6f87a24 Mon Sep 17 00:00:00 2001 +From: Nicholas Piggin +Date: Tue, 22 Aug 2017 18:43:48 +1000 +Subject: timers: Fix excessive granularity of new timers after a nohz idle + +From: Nicholas Piggin + +commit 2fe59f507a65dbd734b990a11ebc7488f6f87a24 upstream. + +When a timer base is idle, it is forwarded when a new timer is added +to ensure that granularity does not become excessive. When not idle, +the timer tick is expected to increment the base. + +However there are several problems: + +- If an existing timer is modified, the base is forwarded only after + the index is calculated. + +- The base is not forwarded by add_timer_on. + +- There is a window after a timer is restarted from a nohz idle, after + it is marked not-idle and before the timer tick on this CPU, where a + timer may be added but the ancient base does not get forwarded. + +These result in excessive granularity (a 1 jiffy timeout can blow out +to 100s of jiffies), which cause the rcu lockup detector to trigger, +among other things. + +Fix this by keeping track of whether the timer base has been idle +since it was last run or forwarded, and if so then forward it before +adding a new timer. + +There is still a case where mod_timer optimises the case of a pending +timer mod with the same expiry time, where the timer can see excessive +granularity relative to the new, shorter interval. A comment is added, +but it's not changed because it is an important fastpath for +networking. + +This has been tested and found to fix the RCU softlockup messages. + +Testing was also done with tracing to measure requested versus +achieved wakeup latencies for all non-deferrable timers in an idle +system (with no lockup watchdogs running). Wakeup latency relative to +absolute latency is calculated (note this suffers from round-up skew +at low absolute times) and analysed: + + max avg std +upstream 506.0 1.20 4.68 +patched 2.0 1.08 0.15 + +The bug was noticed due to the lockup detector Kconfig changes +dropping it out of people's .configs and resulting in larger base +clk skew When the lockup detectors are enabled, no CPU can go idle for +longer than 4 seconds, which limits the granularity errors. +Sub-optimal timer behaviour is observable on a smaller scale in that +case: + + max avg std +upstream 9.0 1.05 0.19 +patched 2.0 1.04 0.11 + +Fixes: Fixes: a683f390b93f ("timers: Forward the wheel clock whenever possible") +Signed-off-by: Nicholas Piggin +Signed-off-by: Thomas Gleixner +Tested-by: Jonathan Cameron +Tested-by: David Miller +Cc: dzickus@redhat.com +Cc: sfr@canb.auug.org.au +Cc: mpe@ellerman.id.au +Cc: Stephen Boyd +Cc: linuxarm@huawei.com +Cc: abdhalee@linux.vnet.ibm.com +Cc: John Stultz +Cc: akpm@linux-foundation.org +Cc: paulmck@linux.vnet.ibm.com +Cc: torvalds@linux-foundation.org +Link: http://lkml.kernel.org/r/20170822084348.21436-1-npiggin@gmail.com +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/time/timer.c | 50 +++++++++++++++++++++++++++++++++++++++++--------- + 1 file changed, 41 insertions(+), 9 deletions(-) + +--- a/kernel/time/timer.c ++++ b/kernel/time/timer.c +@@ -201,6 +201,7 @@ struct timer_base { + bool migration_enabled; + bool nohz_active; + bool is_idle; ++ bool must_forward_clk; + DECLARE_BITMAP(pending_map, WHEEL_SIZE); + struct hlist_head vectors[WHEEL_SIZE]; + } ____cacheline_aligned; +@@ -891,13 +892,19 @@ get_target_base(struct timer_base *base, + + static inline void forward_timer_base(struct timer_base *base) + { +- unsigned long jnow = READ_ONCE(jiffies); ++ unsigned long jnow; + + /* +- * We only forward the base when it's idle and we have a delta between +- * base clock and jiffies. ++ * We only forward the base when we are idle or have just come out of ++ * idle (must_forward_clk logic), and have a delta between base clock ++ * and jiffies. In the common case, run_timers will take care of it. + */ +- if (!base->is_idle || (long) (jnow - base->clk) < 2) ++ if (likely(!base->must_forward_clk)) ++ return; ++ ++ jnow = READ_ONCE(jiffies); ++ base->must_forward_clk = base->is_idle; ++ if ((long)(jnow - base->clk) < 2) + return; + + /* +@@ -973,6 +980,11 @@ __mod_timer(struct timer_list *timer, un + * same array bucket then just return: + */ + if (timer_pending(timer)) { ++ /* ++ * The downside of this optimization is that it can result in ++ * larger granularity than you would get from adding a new ++ * timer with this expiry. ++ */ + if (timer->expires == expires) + return 1; + +@@ -983,6 +995,7 @@ __mod_timer(struct timer_list *timer, un + * dequeue/enqueue dance. + */ + base = lock_timer_base(timer, &flags); ++ forward_timer_base(base); + + clk = base->clk; + idx = calc_wheel_index(expires, clk); +@@ -999,6 +1012,7 @@ __mod_timer(struct timer_list *timer, un + } + } else { + base = lock_timer_base(timer, &flags); ++ forward_timer_base(base); + } + + timer_stats_timer_set_start_info(timer); +@@ -1028,12 +1042,10 @@ __mod_timer(struct timer_list *timer, un + spin_lock(&base->lock); + WRITE_ONCE(timer->flags, + (timer->flags & ~TIMER_BASEMASK) | base->cpu); ++ forward_timer_base(base); + } + } + +- /* Try to forward a stale timer base clock */ +- forward_timer_base(base); +- + timer->expires = expires; + /* + * If 'idx' was calculated above and the base time did not advance +@@ -1150,6 +1162,7 @@ void add_timer_on(struct timer_list *tim + WRITE_ONCE(timer->flags, + (timer->flags & ~TIMER_BASEMASK) | cpu); + } ++ forward_timer_base(base); + + debug_activate(timer, timer->expires); + internal_add_timer(base, timer); +@@ -1538,10 +1551,16 @@ u64 get_next_timer_interrupt(unsigned lo + if (!is_max_delta) + expires = basem + (u64)(nextevt - basej) * TICK_NSEC; + /* +- * If we expect to sleep more than a tick, mark the base idle: ++ * If we expect to sleep more than a tick, mark the base idle. ++ * Also the tick is stopped so any added timer must forward ++ * the base clk itself to keep granularity small. This idle ++ * logic is only maintained for the BASE_STD base, deferrable ++ * timers may still see large granularity skew (by design). + */ +- if ((expires - basem) > TICK_NSEC) ++ if ((expires - basem) > TICK_NSEC) { ++ base->must_forward_clk = true; + base->is_idle = true; ++ } + } + spin_unlock(&base->lock); + +@@ -1651,6 +1670,19 @@ static __latent_entropy void run_timer_s + { + struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]); + ++ /* ++ * must_forward_clk must be cleared before running timers so that any ++ * timer functions that call mod_timer will not try to forward the ++ * base. idle trcking / clock forwarding logic is only used with ++ * BASE_STD timers. ++ * ++ * The deferrable base does not do idle tracking at all, so we do ++ * not forward it. This can result in very large variations in ++ * granularity for deferrable timers, but they can be deferred for ++ * long periods due to idle. ++ */ ++ base->must_forward_clk = false; ++ + __run_timers(base); + if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && base->nohz_active) + __run_timers(this_cpu_ptr(&timer_bases[BASE_DEF])); diff --git a/queue-4.9/tracing-call-clear_boot_tracer-at-lateinit_sync.patch b/queue-4.9/tracing-call-clear_boot_tracer-at-lateinit_sync.patch new file mode 100644 index 00000000000..b6da25ee99b --- /dev/null +++ b/queue-4.9/tracing-call-clear_boot_tracer-at-lateinit_sync.patch @@ -0,0 +1,39 @@ +From 4bb0f0e73c8c30917d169c4a0f1ac083690c545b Mon Sep 17 00:00:00 2001 +From: "Steven Rostedt (VMware)" +Date: Tue, 1 Aug 2017 12:01:52 -0400 +Subject: tracing: Call clear_boot_tracer() at lateinit_sync + +From: Steven Rostedt (VMware) + +commit 4bb0f0e73c8c30917d169c4a0f1ac083690c545b upstream. + +The clear_boot_tracer function is used to reset the default_bootup_tracer +string to prevent it from being accessed after boot, as it originally points +to init data. But since clear_boot_tracer() is called via the +init_lateinit() call, it races with the initcall for registering the hwlat +tracer. If someone adds "ftrace=hwlat" to the kernel command line, depending +on how the linker sets up the text, the saved command line may be cleared, +and the hwlat tracer never is initialized. + +Simply have the clear_boot_tracer() be called by initcall_lateinit_sync() as +that's for tasks to be called after lateinit. + +Link: https://bugzilla.kernel.org/show_bug.cgi?id=196551 + +Fixes: e7c15cd8a ("tracing: Added hardware latency tracer") +Reported-by: Zamir SUN +Signed-off-by: Steven Rostedt (VMware) +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/trace/trace.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/kernel/trace/trace.c ++++ b/kernel/trace/trace.c +@@ -7767,4 +7767,4 @@ __init static int clear_boot_tracer(void + } + + fs_initcall(tracer_init_tracefs); +-late_initcall(clear_boot_tracer); ++late_initcall_sync(clear_boot_tracer); diff --git a/queue-4.9/tracing-fix-freeing-of-filter-in-create_filter-when-set_str-is-false.patch b/queue-4.9/tracing-fix-freeing-of-filter-in-create_filter-when-set_str-is-false.patch new file mode 100644 index 00000000000..b317018ef29 --- /dev/null +++ b/queue-4.9/tracing-fix-freeing-of-filter-in-create_filter-when-set_str-is-false.patch @@ -0,0 +1,67 @@ +From 8b0db1a5bdfcee0dbfa89607672598ae203c9045 Mon Sep 17 00:00:00 2001 +From: "Steven Rostedt (VMware)" +Date: Wed, 23 Aug 2017 12:46:27 -0400 +Subject: tracing: Fix freeing of filter in create_filter() when set_str is false + +From: Steven Rostedt (VMware) + +commit 8b0db1a5bdfcee0dbfa89607672598ae203c9045 upstream. + +Performing the following task with kmemleak enabled: + + # cd /sys/kernel/tracing/events/irq/irq_handler_entry/ + # echo 'enable_event:kmem:kmalloc:3 if irq >' > trigger + # echo 'enable_event:kmem:kmalloc:3 if irq > 31' > trigger + # echo scan > /sys/kernel/debug/kmemleak + # cat /sys/kernel/debug/kmemleak +unreferenced object 0xffff8800b9290308 (size 32): + comm "bash", pid 1114, jiffies 4294848451 (age 141.139s) + hex dump (first 32 bytes): + 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ + 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ + backtrace: + [] kmemleak_alloc+0x4a/0xa0 + [] kmem_cache_alloc_trace+0x158/0x290 + [] create_filter_start.constprop.28+0x99/0x940 + [] create_filter+0xa9/0x160 + [] create_event_filter+0xc/0x10 + [] set_trigger_filter+0xe5/0x210 + [] event_enable_trigger_func+0x324/0x490 + [] event_trigger_write+0x1a2/0x260 + [] __vfs_write+0xd7/0x380 + [] vfs_write+0x101/0x260 + [] SyS_write+0xab/0x130 + [] entry_SYSCALL_64_fastpath+0x1f/0xbe + [] 0xffffffffffffffff + +The function create_filter() is passed a 'filterp' pointer that gets +allocated, and if "set_str" is true, it is up to the caller to free it, even +on error. The problem is that the pointer is not freed by create_filter() +when set_str is false. This is a bug, and it is not up to the caller to free +the filter on error if it doesn't care about the string. + +Link: http://lkml.kernel.org/r/1502705898-27571-2-git-send-email-chuhu@redhat.com + +Fixes: 38b78eb85 ("tracing: Factorize filter creation") +Reported-by: Chunyu Hu +Tested-by: Chunyu Hu +Signed-off-by: Steven Rostedt (VMware) +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/trace/trace_events_filter.c | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/kernel/trace/trace_events_filter.c ++++ b/kernel/trace/trace_events_filter.c +@@ -1926,6 +1926,10 @@ static int create_filter(struct trace_ev + if (err && set_str) + append_filter_err(ps, filter); + } ++ if (err && !set_str) { ++ free_event_filter(filter); ++ filter = NULL; ++ } + create_filter_finish(ps); + + *filterp = filter; diff --git a/queue-4.9/tracing-fix-kmemleak-in-tracing_map_array_free.patch b/queue-4.9/tracing-fix-kmemleak-in-tracing_map_array_free.patch new file mode 100644 index 00000000000..6aee43e8efb --- /dev/null +++ b/queue-4.9/tracing-fix-kmemleak-in-tracing_map_array_free.patch @@ -0,0 +1,88 @@ +From 475bb3c69ab05df2a6ecef6acc2393703d134180 Mon Sep 17 00:00:00 2001 +From: Chunyu Hu +Date: Mon, 14 Aug 2017 18:18:17 +0800 +Subject: tracing: Fix kmemleak in tracing_map_array_free() + +From: Chunyu Hu + +commit 475bb3c69ab05df2a6ecef6acc2393703d134180 upstream. + +kmemleak reported the below leak when I was doing clear of the hist +trigger. With this patch, the kmeamleak is gone. + +unreferenced object 0xffff94322b63d760 (size 32): + comm "bash", pid 1522, jiffies 4403687962 (age 2442.311s) + hex dump (first 32 bytes): + 00 01 00 00 04 00 00 00 08 00 00 00 ff 00 00 00 ................ + 10 00 00 00 00 00 00 00 80 a8 7a f2 31 94 ff ff ..........z.1... + backtrace: + [] kmemleak_alloc+0x4a/0xa0 + [] kmem_cache_alloc_trace+0xca/0x1d0 + [] tracing_map_array_alloc+0x26/0x140 + [] kretprobe_trampoline+0x0/0x50 + [] create_hist_data+0x535/0x750 + [] event_hist_trigger_func+0x1f7/0x420 + [] event_trigger_write+0xfd/0x1a0 + [] __vfs_write+0x37/0x170 + [] vfs_write+0xb2/0x1b0 + [] SyS_write+0x55/0xc0 + [] do_syscall_64+0x67/0x150 + [] return_from_SYSCALL_64+0x0/0x6a + [] 0xffffffffffffffff +unreferenced object 0xffff9431f27aa880 (size 128): + comm "bash", pid 1522, jiffies 4403687962 (age 2442.311s) + hex dump (first 32 bytes): + 00 00 8c 2a 32 94 ff ff 00 f0 8b 2a 32 94 ff ff ...*2......*2... + 00 e0 8b 2a 32 94 ff ff 00 d0 8b 2a 32 94 ff ff ...*2......*2... + backtrace: + [] kmemleak_alloc+0x4a/0xa0 + [] __kmalloc+0xe8/0x220 + [] tracing_map_array_alloc+0xb1/0x140 + [] kretprobe_trampoline+0x0/0x50 + [] create_hist_data+0x535/0x750 + [] event_hist_trigger_func+0x1f7/0x420 + [] event_trigger_write+0xfd/0x1a0 + [] __vfs_write+0x37/0x170 + [] vfs_write+0xb2/0x1b0 + [] SyS_write+0x55/0xc0 + [] do_syscall_64+0x67/0x150 + [] return_from_SYSCALL_64+0x0/0x6a + [] 0xffffffffffffffff + +Link: http://lkml.kernel.org/r/1502705898-27571-1-git-send-email-chuhu@redhat.com + +Fixes: 08d43a5fa063 ("tracing: Add lock-free tracing_map") +Signed-off-by: Chunyu Hu +Signed-off-by: Steven Rostedt (VMware) +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/trace/tracing_map.c | 11 +++++++---- + 1 file changed, 7 insertions(+), 4 deletions(-) + +--- a/kernel/trace/tracing_map.c ++++ b/kernel/trace/tracing_map.c +@@ -221,16 +221,19 @@ void tracing_map_array_free(struct traci + if (!a) + return; + +- if (!a->pages) { +- kfree(a); +- return; +- } ++ if (!a->pages) ++ goto free; + + for (i = 0; i < a->n_pages; i++) { + if (!a->pages[i]) + break; + free_page((unsigned long)a->pages[i]); + } ++ ++ kfree(a->pages); ++ ++ free: ++ kfree(a); + } + + struct tracing_map_array *tracing_map_array_alloc(unsigned int n_elts, diff --git a/queue-4.9/x86-mm-fix-use-after-free-of-ldt_struct.patch b/queue-4.9/x86-mm-fix-use-after-free-of-ldt_struct.patch new file mode 100644 index 00000000000..692fe9ef9a8 --- /dev/null +++ b/queue-4.9/x86-mm-fix-use-after-free-of-ldt_struct.patch @@ -0,0 +1,173 @@ +From ccd5b3235180eef3cfec337df1c8554ab151b5cc Mon Sep 17 00:00:00 2001 +From: Eric Biggers +Date: Thu, 24 Aug 2017 10:50:29 -0700 +Subject: x86/mm: Fix use-after-free of ldt_struct + +From: Eric Biggers + +commit ccd5b3235180eef3cfec337df1c8554ab151b5cc upstream. + +The following commit: + + 39a0526fb3f7 ("x86/mm: Factor out LDT init from context init") + +renamed init_new_context() to init_new_context_ldt() and added a new +init_new_context() which calls init_new_context_ldt(). However, the +error code of init_new_context_ldt() was ignored. Consequently, if a +memory allocation in alloc_ldt_struct() failed during a fork(), the +->context.ldt of the new task remained the same as that of the old task +(due to the memcpy() in dup_mm()). ldt_struct's are not intended to be +shared, so a use-after-free occurred after one task exited. + +Fix the bug by making init_new_context() pass through the error code of +init_new_context_ldt(). + +This bug was found by syzkaller, which encountered the following splat: + + BUG: KASAN: use-after-free in free_ldt_struct.part.2+0x10a/0x150 arch/x86/kernel/ldt.c:116 + Read of size 4 at addr ffff88006d2cb7c8 by task kworker/u9:0/3710 + + CPU: 1 PID: 3710 Comm: kworker/u9:0 Not tainted 4.13.0-rc4-next-20170811 #2 + Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 + Call Trace: + __dump_stack lib/dump_stack.c:16 [inline] + dump_stack+0x194/0x257 lib/dump_stack.c:52 + print_address_description+0x73/0x250 mm/kasan/report.c:252 + kasan_report_error mm/kasan/report.c:351 [inline] + kasan_report+0x24e/0x340 mm/kasan/report.c:409 + __asan_report_load4_noabort+0x14/0x20 mm/kasan/report.c:429 + free_ldt_struct.part.2+0x10a/0x150 arch/x86/kernel/ldt.c:116 + free_ldt_struct arch/x86/kernel/ldt.c:173 [inline] + destroy_context_ldt+0x60/0x80 arch/x86/kernel/ldt.c:171 + destroy_context arch/x86/include/asm/mmu_context.h:157 [inline] + __mmdrop+0xe9/0x530 kernel/fork.c:889 + mmdrop include/linux/sched/mm.h:42 [inline] + exec_mmap fs/exec.c:1061 [inline] + flush_old_exec+0x173c/0x1ff0 fs/exec.c:1291 + load_elf_binary+0x81f/0x4ba0 fs/binfmt_elf.c:855 + search_binary_handler+0x142/0x6b0 fs/exec.c:1652 + exec_binprm fs/exec.c:1694 [inline] + do_execveat_common.isra.33+0x1746/0x22e0 fs/exec.c:1816 + do_execve+0x31/0x40 fs/exec.c:1860 + call_usermodehelper_exec_async+0x457/0x8f0 kernel/umh.c:100 + ret_from_fork+0x2a/0x40 arch/x86/entry/entry_64.S:431 + + Allocated by task 3700: + save_stack_trace+0x16/0x20 arch/x86/kernel/stacktrace.c:59 + save_stack+0x43/0xd0 mm/kasan/kasan.c:447 + set_track mm/kasan/kasan.c:459 [inline] + kasan_kmalloc+0xad/0xe0 mm/kasan/kasan.c:551 + kmem_cache_alloc_trace+0x136/0x750 mm/slab.c:3627 + kmalloc include/linux/slab.h:493 [inline] + alloc_ldt_struct+0x52/0x140 arch/x86/kernel/ldt.c:67 + write_ldt+0x7b7/0xab0 arch/x86/kernel/ldt.c:277 + sys_modify_ldt+0x1ef/0x240 arch/x86/kernel/ldt.c:307 + entry_SYSCALL_64_fastpath+0x1f/0xbe + + Freed by task 3700: + save_stack_trace+0x16/0x20 arch/x86/kernel/stacktrace.c:59 + save_stack+0x43/0xd0 mm/kasan/kasan.c:447 + set_track mm/kasan/kasan.c:459 [inline] + kasan_slab_free+0x71/0xc0 mm/kasan/kasan.c:524 + __cache_free mm/slab.c:3503 [inline] + kfree+0xca/0x250 mm/slab.c:3820 + free_ldt_struct.part.2+0xdd/0x150 arch/x86/kernel/ldt.c:121 + free_ldt_struct arch/x86/kernel/ldt.c:173 [inline] + destroy_context_ldt+0x60/0x80 arch/x86/kernel/ldt.c:171 + destroy_context arch/x86/include/asm/mmu_context.h:157 [inline] + __mmdrop+0xe9/0x530 kernel/fork.c:889 + mmdrop include/linux/sched/mm.h:42 [inline] + __mmput kernel/fork.c:916 [inline] + mmput+0x541/0x6e0 kernel/fork.c:927 + copy_process.part.36+0x22e1/0x4af0 kernel/fork.c:1931 + copy_process kernel/fork.c:1546 [inline] + _do_fork+0x1ef/0xfb0 kernel/fork.c:2025 + SYSC_clone kernel/fork.c:2135 [inline] + SyS_clone+0x37/0x50 kernel/fork.c:2129 + do_syscall_64+0x26c/0x8c0 arch/x86/entry/common.c:287 + return_from_SYSCALL_64+0x0/0x7a + +Here is a C reproducer: + + #include + #include + #include + #include + #include + #include + #include + + static void *fork_thread(void *_arg) + { + fork(); + } + + int main(void) + { + struct user_desc desc = { .entry_number = 8191 }; + + syscall(__NR_modify_ldt, 1, &desc, sizeof(desc)); + + for (;;) { + if (fork() == 0) { + pthread_t t; + + srand(getpid()); + pthread_create(&t, NULL, fork_thread, NULL); + usleep(rand() % 10000); + syscall(__NR_exit_group, 0); + } + wait(NULL); + } + } + +Note: the reproducer takes advantage of the fact that alloc_ldt_struct() +may use vmalloc() to allocate a large ->entries array, and after +commit: + + 5d17a73a2ebe ("vmalloc: back off when the current task is killed") + +it is possible for userspace to fail a task's vmalloc() by +sending a fatal signal, e.g. via exit_group(). It would be more +difficult to reproduce this bug on kernels without that commit. + +This bug only affected kernels with CONFIG_MODIFY_LDT_SYSCALL=y. + +Signed-off-by: Eric Biggers +Acked-by: Dave Hansen +Cc: Andrew Morton +Cc: Andy Lutomirski +Cc: Borislav Petkov +Cc: Brian Gerst +Cc: Christoph Hellwig +Cc: Denys Vlasenko +Cc: Dmitry Vyukov +Cc: Linus Torvalds +Cc: Michal Hocko +Cc: Peter Zijlstra +Cc: Rik van Riel +Cc: Tetsuo Handa +Cc: Thomas Gleixner +Cc: linux-mm@kvack.org +Fixes: 39a0526fb3f7 ("x86/mm: Factor out LDT init from context init") +Link: http://lkml.kernel.org/r/20170824175029.76040-1-ebiggers3@gmail.com +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/include/asm/mmu_context.h | 4 +--- + 1 file changed, 1 insertion(+), 3 deletions(-) + +--- a/arch/x86/include/asm/mmu_context.h ++++ b/arch/x86/include/asm/mmu_context.h +@@ -116,9 +116,7 @@ static inline int init_new_context(struc + mm->context.execute_only_pkey = -1; + } + #endif +- init_new_context_ldt(tsk, mm); +- +- return 0; ++ return init_new_context_ldt(tsk, mm); + } + static inline void destroy_context(struct mm_struct *mm) + { -- 2.47.3