]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
4.9-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sun, 27 Aug 2017 12:55:49 +0000 (14:55 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sun, 27 Aug 2017 12:55:49 +0000 (14:55 +0200)
added patches:
cifs-fix-df-output-for-users-with-quota-limits.patch
cifs-return-enametoolong-for-overlong-names-in-cifs_open-cifs_lookup.patch
drm-atomic-if-the-atomic-check-fails-return-its-value-first.patch
drm-rcar-du-fix-crash-in-encoder-failure-error-path.patch
drm-rcar-du-fix-display-timing-controller-parameter.patch
drm-rcar-du-fix-h-v-sync-signal-polarity-configuration.patch
drm-release-driver-tracking-before-making-the-object-available-again.patch
fork-fix-incorrect-fput-of-exe_file-causing-use-after-free.patch
ftrace-check-for-null-ret_stack-on-profile-function-graph-entry-function.patch
i2c-designware-fix-system-suspend.patch
kbuild-linker-script-do-not-match-c-names-unless-ld_dead_code_data_elimination-is-configured.patch
mm-madvise.c-fix-freeing-of-locked-page-with-madv_free.patch
mm-memblock.c-reversed-logic-in-memblock_discard.patch
net-sunrpc-svcsock-fix-null-pointer-exception.patch
netfilter-nat-fix-src-map-lookup.patch
nfsd-limit-end-of-page-list-when-decoding-nfsv4-write.patch
perf-core-fix-group-cpu-task-validation.patch
perf-probe-fix-funcs-to-show-correct-symbols-for-offline-module.patch
perf-x86-intel-rapl-make-package-handling-more-robust.patch
revert-leds-handle-suspend-resume-in-heartbeat-trigger.patch
timers-fix-excessive-granularity-of-new-timers-after-a-nohz-idle.patch
tracing-call-clear_boot_tracer-at-lateinit_sync.patch
tracing-fix-freeing-of-filter-in-create_filter-when-set_str-is-false.patch
tracing-fix-kmemleak-in-tracing_map_array_free.patch
x86-mm-fix-use-after-free-of-ldt_struct.patch

26 files changed:
queue-4.9/cifs-fix-df-output-for-users-with-quota-limits.patch [new file with mode: 0644]
queue-4.9/cifs-return-enametoolong-for-overlong-names-in-cifs_open-cifs_lookup.patch [new file with mode: 0644]
queue-4.9/drm-atomic-if-the-atomic-check-fails-return-its-value-first.patch [new file with mode: 0644]
queue-4.9/drm-rcar-du-fix-crash-in-encoder-failure-error-path.patch [new file with mode: 0644]
queue-4.9/drm-rcar-du-fix-display-timing-controller-parameter.patch [new file with mode: 0644]
queue-4.9/drm-rcar-du-fix-h-v-sync-signal-polarity-configuration.patch [new file with mode: 0644]
queue-4.9/drm-release-driver-tracking-before-making-the-object-available-again.patch [new file with mode: 0644]
queue-4.9/fork-fix-incorrect-fput-of-exe_file-causing-use-after-free.patch [new file with mode: 0644]
queue-4.9/ftrace-check-for-null-ret_stack-on-profile-function-graph-entry-function.patch [new file with mode: 0644]
queue-4.9/i2c-designware-fix-system-suspend.patch [new file with mode: 0644]
queue-4.9/kbuild-linker-script-do-not-match-c-names-unless-ld_dead_code_data_elimination-is-configured.patch [new file with mode: 0644]
queue-4.9/mm-madvise.c-fix-freeing-of-locked-page-with-madv_free.patch [new file with mode: 0644]
queue-4.9/mm-memblock.c-reversed-logic-in-memblock_discard.patch [new file with mode: 0644]
queue-4.9/net-sunrpc-svcsock-fix-null-pointer-exception.patch [new file with mode: 0644]
queue-4.9/netfilter-nat-fix-src-map-lookup.patch [new file with mode: 0644]
queue-4.9/nfsd-limit-end-of-page-list-when-decoding-nfsv4-write.patch [new file with mode: 0644]
queue-4.9/perf-core-fix-group-cpu-task-validation.patch [new file with mode: 0644]
queue-4.9/perf-probe-fix-funcs-to-show-correct-symbols-for-offline-module.patch [new file with mode: 0644]
queue-4.9/perf-x86-intel-rapl-make-package-handling-more-robust.patch [new file with mode: 0644]
queue-4.9/revert-leds-handle-suspend-resume-in-heartbeat-trigger.patch [new file with mode: 0644]
queue-4.9/series
queue-4.9/timers-fix-excessive-granularity-of-new-timers-after-a-nohz-idle.patch [new file with mode: 0644]
queue-4.9/tracing-call-clear_boot_tracer-at-lateinit_sync.patch [new file with mode: 0644]
queue-4.9/tracing-fix-freeing-of-filter-in-create_filter-when-set_str-is-false.patch [new file with mode: 0644]
queue-4.9/tracing-fix-kmemleak-in-tracing_map_array_free.patch [new file with mode: 0644]
queue-4.9/x86-mm-fix-use-after-free-of-ldt_struct.patch [new file with mode: 0644]

diff --git a/queue-4.9/cifs-fix-df-output-for-users-with-quota-limits.patch b/queue-4.9/cifs-fix-df-output-for-users-with-quota-limits.patch
new file mode 100644 (file)
index 0000000..717b220
--- /dev/null
@@ -0,0 +1,57 @@
+From 42bec214d8bd432be6d32a1acb0a9079ecd4d142 Mon Sep 17 00:00:00 2001
+From: Sachin Prabhu <sprabhu@redhat.com>
+Date: Thu, 3 Aug 2017 13:09:03 +0530
+Subject: cifs: Fix df output for users with quota limits
+
+From: Sachin Prabhu <sprabhu@redhat.com>
+
+commit 42bec214d8bd432be6d32a1acb0a9079ecd4d142 upstream.
+
+The df for a SMB2 share triggers a GetInfo call for
+FS_FULL_SIZE_INFORMATION. The values returned are used to populate
+struct statfs.
+
+The problem is that none of the information returned by the call
+contains the total blocks available on the filesystem. Instead we use
+the blocks available to the user ie. quota limitation when filling out
+statfs.f_blocks. The information returned does contain Actual free units
+on the filesystem and is used to populate statfs.f_bfree. For users with
+quota enabled, it can lead to situations where the total free space
+reported is more than the total blocks on the system ending up with df
+reports like the following
+
+ # df -h /mnt/a
+Filesystem         Size  Used Avail Use% Mounted on
+//192.168.22.10/a  2.5G -2.3G  2.5G    - /mnt/a
+
+To fix this problem, we instead populate both statfs.f_bfree with the
+same value as statfs.f_bavail ie. CallerAvailableAllocationUnits. This
+is similar to what is done already in the code for cifs and df now
+reports the quota information for the user used to mount the share.
+
+ # df --si /mnt/a
+Filesystem         Size  Used Avail Use% Mounted on
+//192.168.22.10/a  2.7G  101M  2.6G   4% /mnt/a
+
+Signed-off-by: Sachin Prabhu <sprabhu@redhat.com>
+Signed-off-by: Pierguido Lambri <plambri@redhat.com>
+Signed-off-by: Steve French <smfrench@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/cifs/smb2pdu.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/fs/cifs/smb2pdu.c
++++ b/fs/cifs/smb2pdu.c
+@@ -2930,8 +2930,8 @@ copy_fs_info_to_kstatfs(struct smb2_fs_f
+       kst->f_bsize = le32_to_cpu(pfs_inf->BytesPerSector) *
+                         le32_to_cpu(pfs_inf->SectorsPerAllocationUnit);
+       kst->f_blocks = le64_to_cpu(pfs_inf->TotalAllocationUnits);
+-      kst->f_bfree  = le64_to_cpu(pfs_inf->ActualAvailableAllocationUnits);
+-      kst->f_bavail = le64_to_cpu(pfs_inf->CallerAvailableAllocationUnits);
++      kst->f_bfree  = kst->f_bavail =
++                      le64_to_cpu(pfs_inf->CallerAvailableAllocationUnits);
+       return;
+ }
diff --git a/queue-4.9/cifs-return-enametoolong-for-overlong-names-in-cifs_open-cifs_lookup.patch b/queue-4.9/cifs-return-enametoolong-for-overlong-names-in-cifs_open-cifs_lookup.patch
new file mode 100644 (file)
index 0000000..2387308
--- /dev/null
@@ -0,0 +1,88 @@
+From d3edede29f74d335f81d95a4588f5f136a9f7dcf Mon Sep 17 00:00:00 2001
+From: Ronnie Sahlberg <lsahlber@redhat.com>
+Date: Wed, 23 Aug 2017 14:48:14 +1000
+Subject: cifs: return ENAMETOOLONG for overlong names in cifs_open()/cifs_lookup()
+
+From: Ronnie Sahlberg <lsahlber@redhat.com>
+
+commit d3edede29f74d335f81d95a4588f5f136a9f7dcf upstream.
+
+Add checking for the path component length and verify it is <= the maximum
+that the server advertizes via FileFsAttributeInformation.
+
+With this patch cifs.ko will now return ENAMETOOLONG instead of ENOENT
+when users to access an overlong path.
+
+To test this, try to cd into a (non-existing) directory on a CIFS share
+that has a too long name:
+cd /mnt/aaaaaaaaaaaaaaa...
+
+and it now should show a good error message from the shell:
+bash: cd: /mnt/aaaaaaaaaaaaaaaa...aaaaaa: File name too long
+
+rh bz 1153996
+
+Signed-off-by: Ronnie Sahlberg <lsahlber@redhat.com>
+Signed-off-by: Steve French <smfrench@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/cifs/dir.c |   18 ++++++++++++------
+ 1 file changed, 12 insertions(+), 6 deletions(-)
+
+--- a/fs/cifs/dir.c
++++ b/fs/cifs/dir.c
+@@ -183,15 +183,20 @@ cifs_bp_rename_retry:
+ }
+ /*
++ * Don't allow path components longer than the server max.
+  * Don't allow the separator character in a path component.
+  * The VFS will not allow "/", but "\" is allowed by posix.
+  */
+ static int
+-check_name(struct dentry *direntry)
++check_name(struct dentry *direntry, struct cifs_tcon *tcon)
+ {
+       struct cifs_sb_info *cifs_sb = CIFS_SB(direntry->d_sb);
+       int i;
++      if (unlikely(direntry->d_name.len >
++                   tcon->fsAttrInfo.MaxPathNameComponentLength))
++              return -ENAMETOOLONG;
++
+       if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS)) {
+               for (i = 0; i < direntry->d_name.len; i++) {
+                       if (direntry->d_name.name[i] == '\\') {
+@@ -489,10 +494,6 @@ cifs_atomic_open(struct inode *inode, st
+               return finish_no_open(file, res);
+       }
+-      rc = check_name(direntry);
+-      if (rc)
+-              return rc;
+-
+       xid = get_xid();
+       cifs_dbg(FYI, "parent inode = 0x%p name is: %pd and dentry = 0x%p\n",
+@@ -505,6 +506,11 @@ cifs_atomic_open(struct inode *inode, st
+       }
+       tcon = tlink_tcon(tlink);
++
++      rc = check_name(direntry, tcon);
++      if (rc)
++              goto out_free_xid;
++
+       server = tcon->ses->server;
+       if (server->ops->new_lease_key)
+@@ -765,7 +771,7 @@ cifs_lookup(struct inode *parent_dir_ino
+       }
+       pTcon = tlink_tcon(tlink);
+-      rc = check_name(direntry);
++      rc = check_name(direntry, pTcon);
+       if (rc)
+               goto lookup_out;
diff --git a/queue-4.9/drm-atomic-if-the-atomic-check-fails-return-its-value-first.patch b/queue-4.9/drm-atomic-if-the-atomic-check-fails-return-its-value-first.patch
new file mode 100644 (file)
index 0000000..264921a
--- /dev/null
@@ -0,0 +1,102 @@
+From a0ffc51e20e90e0c1c2491de2b4b03f48b6caaba Mon Sep 17 00:00:00 2001
+From: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
+Date: Tue, 15 Aug 2017 11:57:06 +0200
+Subject: drm/atomic: If the atomic check fails, return its value first
+
+From: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
+
+commit a0ffc51e20e90e0c1c2491de2b4b03f48b6caaba upstream.
+
+The last part of drm_atomic_check_only is testing whether we need to
+fail with -EINVAL when modeset is not allowed, but forgets to return
+the value when atomic_check() fails first.
+
+This results in -EDEADLK being replaced by -EINVAL, and the sanity
+check in drm_modeset_drop_locks kicks in:
+
+[  308.531734] ------------[ cut here ]------------
+[  308.531791] WARNING: CPU: 0 PID: 1886 at drivers/gpu/drm/drm_modeset_lock.c:217 drm_modeset_drop_locks+0x33/0xc0 [drm]
+[  308.531828] Modules linked in:
+[  308.532050] CPU: 0 PID: 1886 Comm: kms_atomic Tainted: G     U  W 4.13.0-rc5-patser+ #5225
+[  308.532082] Hardware name: NUC5i7RYB, BIOS RYBDWi35.86A.0246.2015.0309.1355 03/09/2015
+[  308.532124] task: ffff8800cd9dae00 task.stack: ffff8800ca3b8000
+[  308.532168] RIP: 0010:drm_modeset_drop_locks+0x33/0xc0 [drm]
+[  308.532189] RSP: 0018:ffff8800ca3bf980 EFLAGS: 00010282
+[  308.532211] RAX: dffffc0000000000 RBX: ffff8800ca3bfaf8 RCX: 0000000013a171e6
+[  308.532235] RDX: 1ffff10019477f69 RSI: ffffffffa8ba4fa0 RDI: ffff8800ca3bfb48
+[  308.532258] RBP: ffff8800ca3bf998 R08: 0000000000000000 R09: 0000000000000003
+[  308.532281] R10: 0000000079dbe066 R11: 00000000f760b34b R12: 0000000000000001
+[  308.532304] R13: dffffc0000000000 R14: 00000000ffffffea R15: ffff880096889680
+[  308.532328] FS:  00007ff00959cec0(0000) GS:ffff8800d4e00000(0000) knlGS:0000000000000000
+[  308.532359] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+[  308.532380] CR2: 0000000000000008 CR3: 00000000ca2e3000 CR4: 00000000003406f0
+[  308.532402] Call Trace:
+[  308.532440]  drm_mode_atomic_ioctl+0x19fa/0x1c00 [drm]
+[  308.532488]  ? drm_atomic_set_property+0x1220/0x1220 [drm]
+[  308.532565]  ? avc_has_extended_perms+0xc39/0xff0
+[  308.532593]  ? lock_downgrade+0x610/0x610
+[  308.532640]  ? drm_atomic_set_property+0x1220/0x1220 [drm]
+[  308.532680]  drm_ioctl_kernel+0x154/0x1a0 [drm]
+[  308.532755]  drm_ioctl+0x624/0x8f0 [drm]
+[  308.532858]  ? drm_atomic_set_property+0x1220/0x1220 [drm]
+[  308.532976]  ? drm_getunique+0x210/0x210 [drm]
+[  308.533061]  do_vfs_ioctl+0xd92/0xe40
+[  308.533121]  ? ioctl_preallocate+0x1b0/0x1b0
+[  308.533160]  ? selinux_capable+0x20/0x20
+[  308.533191]  ? do_fcntl+0x1b1/0xbf0
+[  308.533219]  ? kasan_slab_free+0xa2/0xb0
+[  308.533249]  ? f_getown+0x4b/0xa0
+[  308.533278]  ? putname+0xcf/0xe0
+[  308.533309]  ? security_file_ioctl+0x57/0x90
+[  308.533342]  SyS_ioctl+0x4e/0x80
+[  308.533374]  entry_SYSCALL_64_fastpath+0x18/0xad
+[  308.533405] RIP: 0033:0x7ff00779e4d7
+[  308.533431] RSP: 002b:00007fff66a043d8 EFLAGS: 00000246 ORIG_RAX: 0000000000000010
+[  308.533481] RAX: ffffffffffffffda RBX: 000000e7c7ca5910 RCX: 00007ff00779e4d7
+[  308.533560] RDX: 00007fff66a04430 RSI: 00000000c03864bc RDI: 0000000000000003
+[  308.533608] RBP: 00007ff007a5fb00 R08: 000000e7c7ca4620 R09: 000000e7c7ca5e60
+[  308.533647] R10: 0000000000000001 R11: 0000000000000246 R12: 0000000000000070
+[  308.533685] R13: 0000000000000000 R14: 0000000000000000 R15: 000000e7c7ca5930
+[  308.533770] Code: ff df 55 48 89 e5 41 55 41 54 53 48 89 fb 48 83 c7
+50 48 89 fa 48 c1 ea 03 80 3c 02 00 74 05 e8 94 d4 16 e7 48 83 7b 50 00
+74 02 <0f> ff 4c 8d 6b 58 48 b8 00 00 00 00 00 fc ff df 4c 89 ea 48 c1
+[  308.534086] ---[ end trace 77f11e53b1df44ad ]---
+
+Solve this by adding the missing return.
+
+This is also a bugfix because we could end up rejecting updates with
+-EINVAL because of a early -EDEADLK, while if atomic_check ran to
+completion it might have downgraded the modeset to a fastset.
+
+Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
+Testcase: kms_atomic
+Link: https://patchwork.freedesktop.org/patch/msgid/20170815095706.23624-1-maarten.lankhorst@linux.intel.com
+Fixes: d34f20d6e2f2 ("drm: Atomic modeset ioctl")
+Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/gpu/drm/drm_atomic.c |    5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+--- a/drivers/gpu/drm/drm_atomic.c
++++ b/drivers/gpu/drm/drm_atomic.c
+@@ -1386,6 +1386,9 @@ int drm_atomic_check_only(struct drm_ato
+       if (config->funcs->atomic_check)
+               ret = config->funcs->atomic_check(state->dev, state);
++      if (ret)
++              return ret;
++
+       if (!state->allow_modeset) {
+               for_each_crtc_in_state(state, crtc, crtc_state, i) {
+                       if (drm_atomic_crtc_needs_modeset(crtc_state)) {
+@@ -1396,7 +1399,7 @@ int drm_atomic_check_only(struct drm_ato
+               }
+       }
+-      return ret;
++      return 0;
+ }
+ EXPORT_SYMBOL(drm_atomic_check_only);
diff --git a/queue-4.9/drm-rcar-du-fix-crash-in-encoder-failure-error-path.patch b/queue-4.9/drm-rcar-du-fix-crash-in-encoder-failure-error-path.patch
new file mode 100644 (file)
index 0000000..a54a475
--- /dev/null
@@ -0,0 +1,47 @@
+From 05ee29e94acf0d4b3998c3f93374952de8f90176 Mon Sep 17 00:00:00 2001
+From: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
+Date: Mon, 3 Oct 2016 20:03:22 +0300
+Subject: drm: rcar-du: Fix crash in encoder failure error path
+
+From: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
+
+commit 05ee29e94acf0d4b3998c3f93374952de8f90176 upstream.
+
+When an encoder fails to initialize the driver prints an error message
+to the kernel log. The message contains the name of the encoder's DT
+node, which is NULL for internal encoders. Use the of_node_full_name()
+macro to avoid dereferencing a NULL pointer, print the output number to
+add more context to the error, and make sure we still own a reference to
+the encoder's DT node by delaying the of_node_put() call.
+
+Signed-off-by: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
+Reviewed-by: Gustavo Padovan <gustavo.padovan@collabora.co.uk>
+Signed-off-by: Thong Ho <thong.ho.px@rvc.renesas.com>
+Signed-off-by: Nhan Nguyen <nhan.nguyen.yb@renesas.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/gpu/drm/rcar-du/rcar_du_kms.c |   10 +++++-----
+ 1 file changed, 5 insertions(+), 5 deletions(-)
+
+--- a/drivers/gpu/drm/rcar-du/rcar_du_kms.c
++++ b/drivers/gpu/drm/rcar-du/rcar_du_kms.c
+@@ -453,13 +453,13 @@ static int rcar_du_encoders_init_one(str
+       }
+       ret = rcar_du_encoder_init(rcdu, enc_type, output, encoder, connector);
+-      of_node_put(encoder);
+-      of_node_put(connector);
+-
+       if (ret && ret != -EPROBE_DEFER)
+               dev_warn(rcdu->dev,
+-                       "failed to initialize encoder %s (%d), skipping\n",
+-                       encoder->full_name, ret);
++                       "failed to initialize encoder %s on output %u (%d), skipping\n",
++                       of_node_full_name(encoder), output, ret);
++
++      of_node_put(encoder);
++      of_node_put(connector);
+       return ret;
+ }
diff --git a/queue-4.9/drm-rcar-du-fix-display-timing-controller-parameter.patch b/queue-4.9/drm-rcar-du-fix-display-timing-controller-parameter.patch
new file mode 100644 (file)
index 0000000..5048aa1
--- /dev/null
@@ -0,0 +1,35 @@
+From 9cdced8a39c04cf798ddb2a27cb5952f7d39f633 Mon Sep 17 00:00:00 2001
+From: Koji Matsuoka <koji.matsuoka.xm@renesas.com>
+Date: Mon, 18 Apr 2016 16:31:30 +0900
+Subject: drm: rcar-du: Fix display timing controller parameter
+
+From: Koji Matsuoka <koji.matsuoka.xm@renesas.com>
+
+commit 9cdced8a39c04cf798ddb2a27cb5952f7d39f633 upstream.
+
+There is a bug in the setting of the DES (Display Enable Signal)
+register. This current setting occurs 1 dot left shift. The DES
+register should be set minus one value about the specifying value
+with H/W specification. This patch corrects it.
+
+Signed-off-by: Koji Matsuoka <koji.matsuoka.xm@renesas.com>
+Signed-off-by: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
+Signed-off-by: Thong Ho <thong.ho.px@rvc.renesas.com>
+Signed-off-by: Nhan Nguyen <nhan.nguyen.yb@renesas.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/gpu/drm/rcar-du/rcar_du_crtc.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/gpu/drm/rcar-du/rcar_du_crtc.c
++++ b/drivers/gpu/drm/rcar-du/rcar_du_crtc.c
+@@ -172,7 +172,7 @@ static void rcar_du_crtc_set_display_tim
+                                       mode->crtc_vsync_start - 1);
+       rcar_du_crtc_write(rcrtc, VCR,  mode->crtc_vtotal - 1);
+-      rcar_du_crtc_write(rcrtc, DESR,  mode->htotal - mode->hsync_start);
++      rcar_du_crtc_write(rcrtc, DESR,  mode->htotal - mode->hsync_start - 1);
+       rcar_du_crtc_write(rcrtc, DEWR,  mode->hdisplay);
+ }
diff --git a/queue-4.9/drm-rcar-du-fix-h-v-sync-signal-polarity-configuration.patch b/queue-4.9/drm-rcar-du-fix-h-v-sync-signal-polarity-configuration.patch
new file mode 100644 (file)
index 0000000..cb84fe7
--- /dev/null
@@ -0,0 +1,36 @@
+From fd1adef3bff0663c5ac31b45bc4a05fafd43d19b Mon Sep 17 00:00:00 2001
+From: Koji Matsuoka <koji.matsuoka.xm@renesas.com>
+Date: Mon, 16 May 2016 11:28:15 +0900
+Subject: drm: rcar-du: Fix H/V sync signal polarity configuration
+
+From: Koji Matsuoka <koji.matsuoka.xm@renesas.com>
+
+commit fd1adef3bff0663c5ac31b45bc4a05fafd43d19b upstream.
+
+The VSL and HSL bits in the DSMR register set the corresponding
+horizontal and vertical sync signal polarity to active high. The code
+got it the wrong way around, fix it.
+
+Signed-off-by: Koji Matsuoka <koji.matsuoka.xm@renesas.com>
+Signed-off-by: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
+Signed-off-by: Thong Ho <thong.ho.px@rvc.renesas.com>
+Signed-off-by: Nhan Nguyen <nhan.nguyen.yb@renesas.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/gpu/drm/rcar-du/rcar_du_crtc.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/gpu/drm/rcar-du/rcar_du_crtc.c
++++ b/drivers/gpu/drm/rcar-du/rcar_du_crtc.c
+@@ -149,8 +149,8 @@ static void rcar_du_crtc_set_display_tim
+       rcar_du_group_write(rcrtc->group, rcrtc->index % 2 ? OTAR2 : OTAR, 0);
+       /* Signal polarities */
+-      value = ((mode->flags & DRM_MODE_FLAG_PVSYNC) ? 0 : DSMR_VSL)
+-            | ((mode->flags & DRM_MODE_FLAG_PHSYNC) ? 0 : DSMR_HSL)
++      value = ((mode->flags & DRM_MODE_FLAG_PVSYNC) ? DSMR_VSL : 0)
++            | ((mode->flags & DRM_MODE_FLAG_PHSYNC) ? DSMR_HSL : 0)
+             | DSMR_DIPM_DISP | DSMR_CSPM;
+       rcar_du_crtc_write(rcrtc, DSMR, value);
diff --git a/queue-4.9/drm-release-driver-tracking-before-making-the-object-available-again.patch b/queue-4.9/drm-release-driver-tracking-before-making-the-object-available-again.patch
new file mode 100644 (file)
index 0000000..97ed421
--- /dev/null
@@ -0,0 +1,56 @@
+From fe4600a548f2763dec91b3b27a1245c370ceee2a Mon Sep 17 00:00:00 2001
+From: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Sat, 19 Aug 2017 13:05:58 +0100
+Subject: drm: Release driver tracking before making the object available again
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit fe4600a548f2763dec91b3b27a1245c370ceee2a upstream.
+
+This is the same bug as we fixed in commit f6cd7daecff5 ("drm: Release
+driver references to handle before making it available again"), but now
+the exposure is via the PRIME lookup tables. If we remove the
+object/handle from the PRIME lut, then a new request for the same
+object/fd will generate a new handle, thus for a short window that
+object is known to userspace by two different handles. Fix this by
+releasing the driver tracking before PRIME.
+
+Fixes: 0ff926c7d4f0 ("drm/prime: add exported buffers to current fprivs
+imported buffer list (v2)")
+Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+Cc: David Airlie <airlied@linux.ie>
+Cc: Daniel Vetter <daniel.vetter@intel.com>
+Cc: Rob Clark <robdclark@gmail.com>
+Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
+Cc: Thierry Reding <treding@nvidia.com>
+Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
+Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20170819120558.6465-1-chris@chris-wilson.co.uk
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/gpu/drm/drm_gem.c |    6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/drivers/gpu/drm/drm_gem.c
++++ b/drivers/gpu/drm/drm_gem.c
+@@ -255,13 +255,13 @@ drm_gem_object_release_handle(int id, vo
+       struct drm_gem_object *obj = ptr;
+       struct drm_device *dev = obj->dev;
++      if (dev->driver->gem_close_object)
++              dev->driver->gem_close_object(obj, file_priv);
++
+       if (drm_core_check_feature(dev, DRIVER_PRIME))
+               drm_gem_remove_prime_handles(obj, file_priv);
+       drm_vma_node_revoke(&obj->vma_node, file_priv);
+-      if (dev->driver->gem_close_object)
+-              dev->driver->gem_close_object(obj, file_priv);
+-
+       drm_gem_object_handle_unreference_unlocked(obj);
+       return 0;
diff --git a/queue-4.9/fork-fix-incorrect-fput-of-exe_file-causing-use-after-free.patch b/queue-4.9/fork-fix-incorrect-fput-of-exe_file-causing-use-after-free.patch
new file mode 100644 (file)
index 0000000..d855feb
--- /dev/null
@@ -0,0 +1,105 @@
+From 2b7e8665b4ff51c034c55df3cff76518d1a9ee3a Mon Sep 17 00:00:00 2001
+From: Eric Biggers <ebiggers@google.com>
+Date: Fri, 25 Aug 2017 15:55:43 -0700
+Subject: fork: fix incorrect fput of ->exe_file causing use-after-free
+
+From: Eric Biggers <ebiggers@google.com>
+
+commit 2b7e8665b4ff51c034c55df3cff76518d1a9ee3a upstream.
+
+Commit 7c051267931a ("mm, fork: make dup_mmap wait for mmap_sem for
+write killable") made it possible to kill a forking task while it is
+waiting to acquire its ->mmap_sem for write, in dup_mmap().
+
+However, it was overlooked that this introduced an new error path before
+a reference is taken on the mm_struct's ->exe_file.  Since the
+->exe_file of the new mm_struct was already set to the old ->exe_file by
+the memcpy() in dup_mm(), it was possible for the mmput() in the error
+path of dup_mm() to drop a reference to ->exe_file which was never
+taken.
+
+This caused the struct file to later be freed prematurely.
+
+Fix it by updating mm_init() to NULL out the ->exe_file, in the same
+place it clears other things like the list of mmaps.
+
+This bug was found by syzkaller.  It can be reproduced using the
+following C program:
+
+    #define _GNU_SOURCE
+    #include <pthread.h>
+    #include <stdlib.h>
+    #include <sys/mman.h>
+    #include <sys/syscall.h>
+    #include <sys/wait.h>
+    #include <unistd.h>
+
+    static void *mmap_thread(void *_arg)
+    {
+        for (;;) {
+            mmap(NULL, 0x1000000, PROT_READ,
+                 MAP_POPULATE|MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
+        }
+    }
+
+    static void *fork_thread(void *_arg)
+    {
+        usleep(rand() % 10000);
+        fork();
+    }
+
+    int main(void)
+    {
+        fork();
+        fork();
+        fork();
+        for (;;) {
+            if (fork() == 0) {
+                pthread_t t;
+
+                pthread_create(&t, NULL, mmap_thread, NULL);
+                pthread_create(&t, NULL, fork_thread, NULL);
+                usleep(rand() % 10000);
+                syscall(__NR_exit_group, 0);
+            }
+            wait(NULL);
+        }
+    }
+
+No special kernel config options are needed.  It usually causes a NULL
+pointer dereference in __remove_shared_vm_struct() during exit, or in
+dup_mmap() (which is usually inlined into copy_process()) during fork.
+Both are due to a vm_area_struct's ->vm_file being used after it's
+already been freed.
+
+Google Bug Id: 64772007
+
+Link: http://lkml.kernel.org/r/20170823211408.31198-1-ebiggers3@gmail.com
+Fixes: 7c051267931a ("mm, fork: make dup_mmap wait for mmap_sem for write killable")
+Signed-off-by: Eric Biggers <ebiggers@google.com>
+Tested-by: Mark Rutland <mark.rutland@arm.com>
+Acked-by: Michal Hocko <mhocko@suse.com>
+Cc: Dmitry Vyukov <dvyukov@google.com>
+Cc: Ingo Molnar <mingo@kernel.org>
+Cc: Konstantin Khlebnikov <koct9i@gmail.com>
+Cc: Oleg Nesterov <oleg@redhat.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Vlastimil Babka <vbabka@suse.cz>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/fork.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/kernel/fork.c
++++ b/kernel/fork.c
+@@ -766,6 +766,7 @@ static struct mm_struct *mm_init(struct
+       mm_init_cpumask(mm);
+       mm_init_aio(mm);
+       mm_init_owner(mm, p);
++      RCU_INIT_POINTER(mm->exe_file, NULL);
+       mmu_notifier_mm_init(mm);
+       clear_tlb_flush_pending(mm);
+ #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS
diff --git a/queue-4.9/ftrace-check-for-null-ret_stack-on-profile-function-graph-entry-function.patch b/queue-4.9/ftrace-check-for-null-ret_stack-on-profile-function-graph-entry-function.patch
new file mode 100644 (file)
index 0000000..a1932b0
--- /dev/null
@@ -0,0 +1,46 @@
+From a8f0f9e49956a74718874b800251455680085600 Mon Sep 17 00:00:00 2001
+From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
+Date: Thu, 17 Aug 2017 16:37:25 -0400
+Subject: ftrace: Check for null ret_stack on profile function graph entry function
+
+From: Steven Rostedt (VMware) <rostedt@goodmis.org>
+
+commit a8f0f9e49956a74718874b800251455680085600 upstream.
+
+There's a small race when function graph shutsdown and the calling of the
+registered function graph entry callback. The callback must not reference
+the task's ret_stack without first checking that it is not NULL. Note, when
+a ret_stack is allocated for a task, it stays allocated until the task exits.
+The problem here, is that function_graph is shutdown, and a new task was
+created, which doesn't have its ret_stack allocated. But since some of the
+functions are still being traced, the callbacks can still be called.
+
+The normal function_graph code handles this, but starting with commit
+8861dd303c ("ftrace: Access ret_stack->subtime only in the function
+profiler") the profiler code references the ret_stack on function entry, but
+doesn't check if it is NULL first.
+
+Link: https://bugzilla.kernel.org/show_bug.cgi?id=196611
+
+Fixes: 8861dd303c ("ftrace: Access ret_stack->subtime only in the function profiler")
+Reported-by: lilydjwg@gmail.com
+Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/trace/ftrace.c |    4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/kernel/trace/ftrace.c
++++ b/kernel/trace/ftrace.c
+@@ -876,6 +876,10 @@ static int profile_graph_entry(struct ft
+       function_profile_call(trace->func, 0, NULL, NULL);
++      /* If function graph is shutting down, ret_stack can be NULL */
++      if (!current->ret_stack)
++              return 0;
++
+       if (index >= 0 && index < FTRACE_RETFUNC_DEPTH)
+               current->ret_stack[index].subtime = 0;
diff --git a/queue-4.9/i2c-designware-fix-system-suspend.patch b/queue-4.9/i2c-designware-fix-system-suspend.patch
new file mode 100644 (file)
index 0000000..8c446ff
--- /dev/null
@@ -0,0 +1,92 @@
+From a23318feeff662c8d25d21623daebdd2e55ec221 Mon Sep 17 00:00:00 2001
+From: Ulf Hansson <ulf.hansson@linaro.org>
+Date: Wed, 9 Aug 2017 15:28:22 +0200
+Subject: i2c: designware: Fix system suspend
+
+From: Ulf Hansson <ulf.hansson@linaro.org>
+
+commit a23318feeff662c8d25d21623daebdd2e55ec221 upstream.
+
+The commit 8503ff166504 ("i2c: designware: Avoid unnecessary resuming
+during system suspend"), may suggest to the PM core to try out the so
+called direct_complete path for system sleep. In this path, the PM core
+treats a runtime suspended device as it's already in a proper low power
+state for system sleep, which makes it skip calling the system sleep
+callbacks for the device, except for the ->prepare() and the ->complete()
+callbacks.
+
+However, the PM core may unset the direct_complete flag for a parent
+device, in case its child device are being system suspended before. In this
+scenario, the PM core invokes the system sleep callbacks, no matter if the
+device is runtime suspended or not.
+
+Particularly in cases of an existing i2c slave device, the above path is
+triggered, which breaks the assumption that the i2c device is always
+runtime resumed whenever the dw_i2c_plat_suspend() is being called.
+
+More precisely, dw_i2c_plat_suspend() calls clk_core_disable() and
+clk_core_unprepare(), for an already disabled/unprepared clock, leading to
+a splat in the log about clocks calls being wrongly balanced and breaking
+system sleep.
+
+To still allow the direct_complete path in cases when it's possible, but
+also to keep the fix simple, let's runtime resume the i2c device in the
+->suspend() callback, before continuing to put the device into low power
+state.
+
+Note, in cases when the i2c device is attached to the ACPI PM domain, this
+problem doesn't occur, because ACPI's ->suspend() callback, assigned to
+acpi_subsys_suspend(), already calls pm_runtime_resume() for the device.
+
+It should also be noted that this change does not fix commit 8503ff166504
+("i2c: designware: Avoid unnecessary resuming during system suspend").
+Because for the non-ACPI case, the system sleep support was already broken
+prior that point.
+
+Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
+Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+Tested-by: John Stultz <john.stultz@linaro.org>
+Tested-by: Jarkko Nikula <jarkko.nikula@linux.intel.com>
+Acked-by: Jarkko Nikula <jarkko.nikula@linux.intel.com>
+Reviewed-by: Mika Westerberg <mika.westerberg@linux.intel.com>
+Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/i2c/busses/i2c-designware-platdrv.c |   14 ++++++++++++--
+ 1 file changed, 12 insertions(+), 2 deletions(-)
+
+--- a/drivers/i2c/busses/i2c-designware-platdrv.c
++++ b/drivers/i2c/busses/i2c-designware-platdrv.c
+@@ -319,7 +319,7 @@ static void dw_i2c_plat_complete(struct
+ #endif
+ #ifdef CONFIG_PM
+-static int dw_i2c_plat_suspend(struct device *dev)
++static int dw_i2c_plat_runtime_suspend(struct device *dev)
+ {
+       struct platform_device *pdev = to_platform_device(dev);
+       struct dw_i2c_dev *i_dev = platform_get_drvdata(pdev);
+@@ -343,11 +343,21 @@ static int dw_i2c_plat_resume(struct dev
+       return 0;
+ }
++#ifdef CONFIG_PM_SLEEP
++static int dw_i2c_plat_suspend(struct device *dev)
++{
++      pm_runtime_resume(dev);
++      return dw_i2c_plat_runtime_suspend(dev);
++}
++#endif
++
+ static const struct dev_pm_ops dw_i2c_dev_pm_ops = {
+       .prepare = dw_i2c_plat_prepare,
+       .complete = dw_i2c_plat_complete,
+       SET_SYSTEM_SLEEP_PM_OPS(dw_i2c_plat_suspend, dw_i2c_plat_resume)
+-      SET_RUNTIME_PM_OPS(dw_i2c_plat_suspend, dw_i2c_plat_resume, NULL)
++      SET_RUNTIME_PM_OPS(dw_i2c_plat_runtime_suspend,
++                         dw_i2c_plat_resume,
++                         NULL)
+ };
+ #define DW_I2C_DEV_PMOPS (&dw_i2c_dev_pm_ops)
diff --git a/queue-4.9/kbuild-linker-script-do-not-match-c-names-unless-ld_dead_code_data_elimination-is-configured.patch b/queue-4.9/kbuild-linker-script-do-not-match-c-names-unless-ld_dead_code_data_elimination-is-configured.patch
new file mode 100644 (file)
index 0000000..e77e699
--- /dev/null
@@ -0,0 +1,106 @@
+From cb87481ee89dbd6609e227afbf64900fb4e5c930 Mon Sep 17 00:00:00 2001
+From: Nicholas Piggin <npiggin@gmail.com>
+Date: Wed, 26 Jul 2017 22:46:27 +1000
+Subject: kbuild: linker script do not match C names unless LD_DEAD_CODE_DATA_ELIMINATION is configured
+
+From: Nicholas Piggin <npiggin@gmail.com>
+
+commit cb87481ee89dbd6609e227afbf64900fb4e5c930 upstream.
+
+The .data and .bss sections were modified in the generic linker script to
+pull in sections named .data.<C identifier>, which are generated by gcc with
+-ffunction-sections and -fdata-sections options.
+
+The problem with this pattern is it can also match section names that Linux
+defines explicitly, e.g., .data.unlikely. This can cause Linux sections to
+get moved into the wrong place.
+
+The way to avoid this is to use ".." separators for explicit section names
+(the dot character is valid in a section name but not a C identifier).
+However currently there are sections which don't follow this rule, so for
+now just disable the wild card by default.
+
+Example: http://marc.info/?l=linux-arm-kernel&m=150106824024221&w=2
+
+Fixes: b67067f1176df ("kbuild: allow archs to select link dead code/data elimination")
+Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
+Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/asm-generic/vmlinux.lds.h |   38 ++++++++++++++++++++++++++------------
+ 1 file changed, 26 insertions(+), 12 deletions(-)
+
+--- a/include/asm-generic/vmlinux.lds.h
++++ b/include/asm-generic/vmlinux.lds.h
+@@ -60,6 +60,22 @@
+ #define ALIGN_FUNCTION()  . = ALIGN(8)
+ /*
++ * LD_DEAD_CODE_DATA_ELIMINATION option enables -fdata-sections, which
++ * generates .data.identifier sections, which need to be pulled in with
++ * .data. We don't want to pull in .data..other sections, which Linux
++ * has defined. Same for text and bss.
++ */
++#ifdef CONFIG_LD_DEAD_CODE_DATA_ELIMINATION
++#define TEXT_MAIN .text .text.[0-9a-zA-Z_]*
++#define DATA_MAIN .data .data.[0-9a-zA-Z_]*
++#define BSS_MAIN .bss .bss.[0-9a-zA-Z_]*
++#else
++#define TEXT_MAIN .text
++#define DATA_MAIN .data
++#define BSS_MAIN .bss
++#endif
++
++/*
+  * Align to a 32 byte boundary equal to the
+  * alignment gcc 4.5 uses for a struct
+  */
+@@ -198,12 +214,9 @@
+ /*
+  * .data section
+- * LD_DEAD_CODE_DATA_ELIMINATION option enables -fdata-sections generates
+- * .data.identifier which needs to be pulled in with .data, but don't want to
+- * pull in .data..stuff which has its own requirements. Same for bss.
+  */
+ #define DATA_DATA                                                     \
+-      *(.data .data.[0-9a-zA-Z_]*)                                    \
++      *(DATA_MAIN)                                                    \
+       *(.ref.data)                                                    \
+       *(.data..shared_aligned) /* percpu related */                   \
+       MEM_KEEP(init.data)                                             \
+@@ -436,16 +449,17 @@
+               VMLINUX_SYMBOL(__security_initcall_end) = .;            \
+       }
+-/* .text section. Map to function alignment to avoid address changes
++/*
++ * .text section. Map to function alignment to avoid address changes
+  * during second ld run in second ld pass when generating System.map
+- * LD_DEAD_CODE_DATA_ELIMINATION option enables -ffunction-sections generates
+- * .text.identifier which needs to be pulled in with .text , but some
+- * architectures define .text.foo which is not intended to be pulled in here.
+- * Those enabling LD_DEAD_CODE_DATA_ELIMINATION must ensure they don't have
+- * conflicting section names, and must pull in .text.[0-9a-zA-Z_]* */
++ *
++ * TEXT_MAIN here will match .text.fixup and .text.unlikely if dead
++ * code elimination is enabled, so these sections should be converted
++ * to use ".." first.
++ */
+ #define TEXT_TEXT                                                     \
+               ALIGN_FUNCTION();                                       \
+-              *(.text.hot .text .text.fixup .text.unlikely)           \
++              *(.text.hot TEXT_MAIN .text.fixup .text.unlikely)       \
+               *(.ref.text)                                            \
+       MEM_KEEP(init.text)                                             \
+       MEM_KEEP(exit.text)                                             \
+@@ -613,7 +627,7 @@
+               BSS_FIRST_SECTIONS                                      \
+               *(.bss..page_aligned)                                   \
+               *(.dynbss)                                              \
+-              *(.bss .bss.[0-9a-zA-Z_]*)                              \
++              *(BSS_MAIN)                                             \
+               *(COMMON)                                               \
+       }
diff --git a/queue-4.9/mm-madvise.c-fix-freeing-of-locked-page-with-madv_free.patch b/queue-4.9/mm-madvise.c-fix-freeing-of-locked-page-with-madv_free.patch
new file mode 100644 (file)
index 0000000..31a7a4e
--- /dev/null
@@ -0,0 +1,128 @@
+From 263630e8d176d87308481ebdcd78ef9426739c6b Mon Sep 17 00:00:00 2001
+From: Eric Biggers <ebiggers@google.com>
+Date: Fri, 25 Aug 2017 15:55:39 -0700
+Subject: mm/madvise.c: fix freeing of locked page with MADV_FREE
+
+From: Eric Biggers <ebiggers@google.com>
+
+commit 263630e8d176d87308481ebdcd78ef9426739c6b upstream.
+
+If madvise(..., MADV_FREE) split a transparent hugepage, it called
+put_page() before unlock_page().
+
+This was wrong because put_page() can free the page, e.g. if a
+concurrent madvise(..., MADV_DONTNEED) has removed it from the memory
+mapping. put_page() then rightfully complained about freeing a locked
+page.
+
+Fix this by moving the unlock_page() before put_page().
+
+This bug was found by syzkaller, which encountered the following splat:
+
+    BUG: Bad page state in process syzkaller412798  pfn:1bd800
+    page:ffffea0006f60000 count:0 mapcount:0 mapping:          (null) index:0x20a00
+    flags: 0x200000000040019(locked|uptodate|dirty|swapbacked)
+    raw: 0200000000040019 0000000000000000 0000000000020a00 00000000ffffffff
+    raw: ffffea0006f60020 ffffea0006f60020 0000000000000000 0000000000000000
+    page dumped because: PAGE_FLAGS_CHECK_AT_FREE flag(s) set
+    bad because of flags: 0x1(locked)
+    Modules linked in:
+    CPU: 1 PID: 3037 Comm: syzkaller412798 Not tainted 4.13.0-rc5+ #35
+    Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
+    Call Trace:
+     __dump_stack lib/dump_stack.c:16 [inline]
+     dump_stack+0x194/0x257 lib/dump_stack.c:52
+     bad_page+0x230/0x2b0 mm/page_alloc.c:565
+     free_pages_check_bad+0x1f0/0x2e0 mm/page_alloc.c:943
+     free_pages_check mm/page_alloc.c:952 [inline]
+     free_pages_prepare mm/page_alloc.c:1043 [inline]
+     free_pcp_prepare mm/page_alloc.c:1068 [inline]
+     free_hot_cold_page+0x8cf/0x12b0 mm/page_alloc.c:2584
+     __put_single_page mm/swap.c:79 [inline]
+     __put_page+0xfb/0x160 mm/swap.c:113
+     put_page include/linux/mm.h:814 [inline]
+     madvise_free_pte_range+0x137a/0x1ec0 mm/madvise.c:371
+     walk_pmd_range mm/pagewalk.c:50 [inline]
+     walk_pud_range mm/pagewalk.c:108 [inline]
+     walk_p4d_range mm/pagewalk.c:134 [inline]
+     walk_pgd_range mm/pagewalk.c:160 [inline]
+     __walk_page_range+0xc3a/0x1450 mm/pagewalk.c:249
+     walk_page_range+0x200/0x470 mm/pagewalk.c:326
+     madvise_free_page_range.isra.9+0x17d/0x230 mm/madvise.c:444
+     madvise_free_single_vma+0x353/0x580 mm/madvise.c:471
+     madvise_dontneed_free mm/madvise.c:555 [inline]
+     madvise_vma mm/madvise.c:664 [inline]
+     SYSC_madvise mm/madvise.c:832 [inline]
+     SyS_madvise+0x7d3/0x13c0 mm/madvise.c:760
+     entry_SYSCALL_64_fastpath+0x1f/0xbe
+
+Here is a C reproducer:
+
+    #define _GNU_SOURCE
+    #include <pthread.h>
+    #include <sys/mman.h>
+    #include <unistd.h>
+
+    #define MADV_FREE  8
+    #define PAGE_SIZE  4096
+
+    static void *mapping;
+    static const size_t mapping_size = 0x1000000;
+
+    static void *madvise_thrproc(void *arg)
+    {
+        madvise(mapping, mapping_size, (long)arg);
+    }
+
+    int main(void)
+    {
+        pthread_t t[2];
+
+        for (;;) {
+            mapping = mmap(NULL, mapping_size, PROT_WRITE,
+                           MAP_POPULATE|MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
+
+            munmap(mapping + mapping_size / 2, PAGE_SIZE);
+
+            pthread_create(&t[0], 0, madvise_thrproc, (void*)MADV_DONTNEED);
+            pthread_create(&t[1], 0, madvise_thrproc, (void*)MADV_FREE);
+            pthread_join(t[0], NULL);
+            pthread_join(t[1], NULL);
+            munmap(mapping, mapping_size);
+        }
+    }
+
+Note: to see the splat, CONFIG_TRANSPARENT_HUGEPAGE=y and
+CONFIG_DEBUG_VM=y are needed.
+
+Google Bug Id: 64696096
+
+Link: http://lkml.kernel.org/r/20170823205235.132061-1-ebiggers3@gmail.com
+Fixes: 854e9ed09ded ("mm: support madvise(MADV_FREE)")
+Signed-off-by: Eric Biggers <ebiggers@google.com>
+Acked-by: David Rientjes <rientjes@google.com>
+Acked-by: Minchan Kim <minchan@kernel.org>
+Acked-by: Michal Hocko <mhocko@suse.com>
+Cc: Dmitry Vyukov <dvyukov@google.com>
+Cc: Hugh Dickins <hughd@google.com>
+Cc: Andrea Arcangeli <aarcange@redhat.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/madvise.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/mm/madvise.c
++++ b/mm/madvise.c
+@@ -331,8 +331,8 @@ static int madvise_free_pte_range(pmd_t
+                               pte_offset_map_lock(mm, pmd, addr, &ptl);
+                               goto out;
+                       }
+-                      put_page(page);
+                       unlock_page(page);
++                      put_page(page);
+                       pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
+                       pte--;
+                       addr -= PAGE_SIZE;
diff --git a/queue-4.9/mm-memblock.c-reversed-logic-in-memblock_discard.patch b/queue-4.9/mm-memblock.c-reversed-logic-in-memblock_discard.patch
new file mode 100644 (file)
index 0000000..c380df7
--- /dev/null
@@ -0,0 +1,38 @@
+From 91b540f98872a206ea1c49e4aa6ea8eed0886644 Mon Sep 17 00:00:00 2001
+From: Pavel Tatashin <pasha.tatashin@oracle.com>
+Date: Fri, 25 Aug 2017 15:55:46 -0700
+Subject: mm/memblock.c: reversed logic in memblock_discard()
+
+From: Pavel Tatashin <pasha.tatashin@oracle.com>
+
+commit 91b540f98872a206ea1c49e4aa6ea8eed0886644 upstream.
+
+In recently introduced memblock_discard() there is a reversed logic bug.
+Memory is freed of static array instead of dynamically allocated one.
+
+Link: http://lkml.kernel.org/r/1503511441-95478-2-git-send-email-pasha.tatashin@oracle.com
+Fixes: 3010f876500f ("mm: discard memblock data later")
+Signed-off-by: Pavel Tatashin <pasha.tatashin@oracle.com>
+Reported-by: Woody Suwalski <terraluna977@gmail.com>
+Tested-by: Woody Suwalski <terraluna977@gmail.com>
+Acked-by: Michal Hocko <mhocko@suse.com>
+Cc: Vlastimil Babka <vbabka@suse.cz>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/memblock.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/mm/memblock.c
++++ b/mm/memblock.c
+@@ -311,7 +311,7 @@ void __init memblock_discard(void)
+               __memblock_free_late(addr, size);
+       }
+-      if (memblock.memory.regions == memblock_memory_init_regions) {
++      if (memblock.memory.regions != memblock_memory_init_regions) {
+               addr = __pa(memblock.memory.regions);
+               size = PAGE_ALIGN(sizeof(struct memblock_region) *
+                                 memblock.memory.max);
diff --git a/queue-4.9/net-sunrpc-svcsock-fix-null-pointer-exception.patch b/queue-4.9/net-sunrpc-svcsock-fix-null-pointer-exception.patch
new file mode 100644 (file)
index 0000000..498da34
--- /dev/null
@@ -0,0 +1,186 @@
+From eebe53e87f97975ee58a21693e44797608bf679c Mon Sep 17 00:00:00 2001
+From: Vadim Lomovtsev <vlomovts@redhat.com>
+Date: Mon, 21 Aug 2017 07:23:07 -0400
+Subject: net: sunrpc: svcsock: fix NULL-pointer exception
+
+From: Vadim Lomovtsev <vlomovts@redhat.com>
+
+commit eebe53e87f97975ee58a21693e44797608bf679c upstream.
+
+While running nfs/connectathon tests kernel NULL-pointer exception
+has been observed due to races in svcsock.c.
+
+Race is appear when kernel accepts connection by kernel_accept
+(which creates new socket) and start queuing ingress packets
+to new socket. This happens in ksoftirq context which could run
+concurrently on a different core while new socket setup is not done yet.
+
+The fix is to re-order socket user data init sequence and add
+write/read barrier calls to be sure that we got proper values
+for callback pointers before actually calling them.
+
+Test results: nfs/connectathon reports '0' failed tests for about 200+ iterations.
+
+Crash log:
+---<-snip->---
+[ 6708.638984] Unable to handle kernel NULL pointer dereference at virtual address 00000000
+[ 6708.647093] pgd = ffff0000094e0000
+[ 6708.650497] [00000000] *pgd=0000010ffff90003, *pud=0000010ffff90003, *pmd=0000010ffff80003, *pte=0000000000000000
+[ 6708.660761] Internal error: Oops: 86000005 [#1] SMP
+[ 6708.665630] Modules linked in: nfsv3 nfnetlink_queue nfnetlink_log nfnetlink rpcsec_gss_krb5 nfsv4 dns_resolver nfs fscache overlay xt_CONNSECMARK xt_SECMARK xt_conntrack iptable_security ip_tables ah4 xfrm4_mode_transport sctp tun binfmt_misc ext4 jbd2 mbcache loop tcp_diag udp_diag inet_diag rpcrdma ib_isert iscsi_target_mod ib_iser rdma_cm iw_cm libiscsi scsi_transport_iscsi ib_srpt target_core_mod ib_srp scsi_transport_srp ib_ipoib ib_ucm ib_uverbs ib_umad ib_cm ib_core nls_koi8_u nls_cp932 ts_kmp nf_conntrack_ipv4 nf_defrag_ipv4 nf_conntrack vfat fat ghash_ce sha2_ce sha1_ce cavium_rng_vf i2c_thunderx sg thunderx_edac i2c_smbus edac_core cavium_rng nfsd auth_rpcgss nfs_acl lockd grace sunrpc xfs libcrc32c nicvf nicpf ast i2c_algo_bit drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops
+[ 6708.736446]  ttm drm i2c_core thunder_bgx thunder_xcv mdio_thunder mdio_cavium dm_mirror dm_region_hash dm_log dm_mod [last unloaded: stap_3c300909c5b3f46dcacd49aab3334af_87021]
+[ 6708.752275] CPU: 84 PID: 0 Comm: swapper/84 Tainted: G        W  OE   4.11.0-4.el7.aarch64 #1
+[ 6708.760787] Hardware name: www.cavium.com CRB-2S/CRB-2S, BIOS 0.3 Mar 13 2017
+[ 6708.767910] task: ffff810006842e80 task.stack: ffff81000689c000
+[ 6708.773822] PC is at 0x0
+[ 6708.776739] LR is at svc_data_ready+0x38/0x88 [sunrpc]
+[ 6708.781866] pc : [<0000000000000000>] lr : [<ffff0000029d7378>] pstate: 60000145
+[ 6708.789248] sp : ffff810ffbad3900
+[ 6708.792551] x29: ffff810ffbad3900 x28: ffff000008c73d58
+[ 6708.797853] x27: 0000000000000000 x26: ffff81000bbe1e00
+[ 6708.803156] x25: 0000000000000020 x24: ffff800f7410bf28
+[ 6708.808458] x23: ffff000008c63000 x22: ffff000008c63000
+[ 6708.813760] x21: ffff800f7410bf28 x20: ffff81000bbe1e00
+[ 6708.819063] x19: ffff810012412400 x18: 00000000d82a9df2
+[ 6708.824365] x17: 0000000000000000 x16: 0000000000000000
+[ 6708.829667] x15: 0000000000000000 x14: 0000000000000001
+[ 6708.834969] x13: 0000000000000000 x12: 722e736f622e676e
+[ 6708.840271] x11: 00000000f814dd99 x10: 0000000000000000
+[ 6708.845573] x9 : 7374687225000000 x8 : 0000000000000000
+[ 6708.850875] x7 : 0000000000000000 x6 : 0000000000000000
+[ 6708.856177] x5 : 0000000000000028 x4 : 0000000000000000
+[ 6708.861479] x3 : 0000000000000000 x2 : 00000000e5000000
+[ 6708.866781] x1 : 0000000000000000 x0 : ffff81000bbe1e00
+[ 6708.872084]
+[ 6708.873565] Process swapper/84 (pid: 0, stack limit = 0xffff81000689c000)
+[ 6708.880341] Stack: (0xffff810ffbad3900 to 0xffff8100068a0000)
+[ 6708.886075] Call trace:
+[ 6708.888513] Exception stack(0xffff810ffbad3710 to 0xffff810ffbad3840)
+[ 6708.894942] 3700:                                   ffff810012412400 0001000000000000
+[ 6708.902759] 3720: ffff810ffbad3900 0000000000000000 0000000060000145 ffff800f79300000
+[ 6708.910577] 3740: ffff000009274d00 00000000000003ea 0000000000000015 ffff000008c63000
+[ 6708.918395] 3760: ffff810ffbad3830 ffff800f79300000 000000000000004d 0000000000000000
+[ 6708.926212] 3780: ffff810ffbad3890 ffff0000080f88dc ffff800f79300000 000000000000004d
+[ 6708.934030] 37a0: ffff800f7930093c ffff000008c63000 0000000000000000 0000000000000140
+[ 6708.941848] 37c0: ffff000008c2c000 0000000000040b00 ffff81000bbe1e00 0000000000000000
+[ 6708.949665] 37e0: 00000000e5000000 0000000000000000 0000000000000000 0000000000000028
+[ 6708.957483] 3800: 0000000000000000 0000000000000000 0000000000000000 7374687225000000
+[ 6708.965300] 3820: 0000000000000000 00000000f814dd99 722e736f622e676e 0000000000000000
+[ 6708.973117] [<          (null)>]           (null)
+[ 6708.977824] [<ffff0000086f9fa4>] tcp_data_queue+0x754/0xc5c
+[ 6708.983386] [<ffff0000086fa64c>] tcp_rcv_established+0x1a0/0x67c
+[ 6708.989384] [<ffff000008704120>] tcp_v4_do_rcv+0x15c/0x22c
+[ 6708.994858] [<ffff000008707418>] tcp_v4_rcv+0xaf0/0xb58
+[ 6709.000077] [<ffff0000086df784>] ip_local_deliver_finish+0x10c/0x254
+[ 6709.006419] [<ffff0000086dfea4>] ip_local_deliver+0xf0/0xfc
+[ 6709.011980] [<ffff0000086dfad4>] ip_rcv_finish+0x208/0x3a4
+[ 6709.017454] [<ffff0000086e018c>] ip_rcv+0x2dc/0x3c8
+[ 6709.022328] [<ffff000008692fc8>] __netif_receive_skb_core+0x2f8/0xa0c
+[ 6709.028758] [<ffff000008696068>] __netif_receive_skb+0x38/0x84
+[ 6709.034580] [<ffff00000869611c>] netif_receive_skb_internal+0x68/0xdc
+[ 6709.041010] [<ffff000008696bc0>] napi_gro_receive+0xcc/0x1a8
+[ 6709.046690] [<ffff0000014b0fc4>] nicvf_cq_intr_handler+0x59c/0x730 [nicvf]
+[ 6709.053559] [<ffff0000014b1380>] nicvf_poll+0x38/0xb8 [nicvf]
+[ 6709.059295] [<ffff000008697a6c>] net_rx_action+0x2f8/0x464
+[ 6709.064771] [<ffff000008081824>] __do_softirq+0x11c/0x308
+[ 6709.070164] [<ffff0000080d14e4>] irq_exit+0x12c/0x174
+[ 6709.075206] [<ffff00000813101c>] __handle_domain_irq+0x78/0xc4
+[ 6709.081027] [<ffff000008081608>] gic_handle_irq+0x94/0x190
+[ 6709.086501] Exception stack(0xffff81000689fdf0 to 0xffff81000689ff20)
+[ 6709.092929] fde0:                                   0000810ff2ec0000 ffff000008c10000
+[ 6709.100747] fe00: ffff000008c70ef4 0000000000000001 0000000000000000 ffff810ffbad9b18
+[ 6709.108565] fe20: ffff810ffbad9c70 ffff8100169d3800 ffff810006843ab0 ffff81000689fe80
+[ 6709.116382] fe40: 0000000000000bd0 0000ffffdf979cd0 183f5913da192500 0000ffff8a254ce4
+[ 6709.124200] fe60: 0000ffff8a254b78 0000aaab10339808 0000000000000000 0000ffff8a0c2a50
+[ 6709.132018] fe80: 0000ffffdf979b10 ffff000008d6d450 ffff000008c10000 ffff000008d6d000
+[ 6709.139836] fea0: 0000000000000054 ffff000008cd3dbc 0000000000000000 0000000000000000
+[ 6709.147653] fec0: 0000000000000000 0000000000000000 0000000000000000 ffff81000689ff20
+[ 6709.155471] fee0: ffff000008085240 ffff81000689ff20 ffff000008085244 0000000060000145
+[ 6709.163289] ff00: ffff81000689ff10 ffff00000813f1e4 ffffffffffffffff ffff00000813f238
+[ 6709.171107] [<ffff000008082eb4>] el1_irq+0xb4/0x140
+[ 6709.175976] [<ffff000008085244>] arch_cpu_idle+0x44/0x11c
+[ 6709.181368] [<ffff0000087bf3b8>] default_idle_call+0x20/0x30
+[ 6709.187020] [<ffff000008116d50>] do_idle+0x158/0x1e4
+[ 6709.191973] [<ffff000008116ff4>] cpu_startup_entry+0x2c/0x30
+[ 6709.197624] [<ffff00000808e7cc>] secondary_start_kernel+0x13c/0x160
+[ 6709.203878] [<0000000001bc71c4>] 0x1bc71c4
+[ 6709.207967] Code: bad PC value
+[ 6709.211061] SMP: stopping secondary CPUs
+[ 6709.218830] Starting crashdump kernel...
+[ 6709.222749] Bye!
+---<-snip>---
+
+Signed-off-by: Vadim Lomovtsev <vlomovts@redhat.com>
+Reviewed-by: Jeff Layton <jlayton@redhat.com>
+Signed-off-by: J. Bruce Fields <bfields@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ net/sunrpc/svcsock.c |   22 ++++++++++++++++++++--
+ 1 file changed, 20 insertions(+), 2 deletions(-)
+
+--- a/net/sunrpc/svcsock.c
++++ b/net/sunrpc/svcsock.c
+@@ -408,6 +408,9 @@ static void svc_data_ready(struct sock *
+               dprintk("svc: socket %p(inet %p), busy=%d\n",
+                       svsk, sk,
+                       test_bit(XPT_BUSY, &svsk->sk_xprt.xpt_flags));
++
++              /* Refer to svc_setup_socket() for details. */
++              rmb();
+               svsk->sk_odata(sk);
+               if (!test_and_set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags))
+                       svc_xprt_enqueue(&svsk->sk_xprt);
+@@ -424,6 +427,9 @@ static void svc_write_space(struct sock
+       if (svsk) {
+               dprintk("svc: socket %p(inet %p), write_space busy=%d\n",
+                       svsk, sk, test_bit(XPT_BUSY, &svsk->sk_xprt.xpt_flags));
++
++              /* Refer to svc_setup_socket() for details. */
++              rmb();
+               svsk->sk_owspace(sk);
+               svc_xprt_enqueue(&svsk->sk_xprt);
+       }
+@@ -748,8 +754,12 @@ static void svc_tcp_listen_data_ready(st
+       dprintk("svc: socket %p TCP (listen) state change %d\n",
+               sk, sk->sk_state);
+-      if (svsk)
++      if (svsk) {
++              /* Refer to svc_setup_socket() for details. */
++              rmb();
+               svsk->sk_odata(sk);
++      }
++
+       /*
+        * This callback may called twice when a new connection
+        * is established as a child socket inherits everything
+@@ -782,6 +792,8 @@ static void svc_tcp_state_change(struct
+       if (!svsk)
+               printk("svc: socket %p: no user data\n", sk);
+       else {
++              /* Refer to svc_setup_socket() for details. */
++              rmb();
+               svsk->sk_ostate(sk);
+               if (sk->sk_state != TCP_ESTABLISHED) {
+                       set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
+@@ -1368,12 +1380,18 @@ static struct svc_sock *svc_setup_socket
+               return ERR_PTR(err);
+       }
+-      inet->sk_user_data = svsk;
+       svsk->sk_sock = sock;
+       svsk->sk_sk = inet;
+       svsk->sk_ostate = inet->sk_state_change;
+       svsk->sk_odata = inet->sk_data_ready;
+       svsk->sk_owspace = inet->sk_write_space;
++      /*
++       * This barrier is necessary in order to prevent race condition
++       * with svc_data_ready(), svc_listen_data_ready() and others
++       * when calling callbacks above.
++       */
++      wmb();
++      inet->sk_user_data = svsk;
+       /* Initialize the socket */
+       if (sock->type == SOCK_DGRAM)
diff --git a/queue-4.9/netfilter-nat-fix-src-map-lookup.patch b/queue-4.9/netfilter-nat-fix-src-map-lookup.patch
new file mode 100644 (file)
index 0000000..7e1db79
--- /dev/null
@@ -0,0 +1,68 @@
+From 97772bcd56efa21d9d8976db6f205574ea602f51 Mon Sep 17 00:00:00 2001
+From: Florian Westphal <fw@strlen.de>
+Date: Fri, 7 Jul 2017 13:07:17 +0200
+Subject: netfilter: nat: fix src map lookup
+
+From: Florian Westphal <fw@strlen.de>
+
+commit 97772bcd56efa21d9d8976db6f205574ea602f51 upstream.
+
+When doing initial conversion to rhashtable I replaced the bucket
+walk with a single rhashtable_lookup_fast().
+
+When moving to rhlist I failed to properly walk the list of identical
+tuples, but that is what is needed for this to work correctly.
+The table contains the original tuples, so the reply tuples are all
+distinct.
+
+We currently decide that mapping is (not) in range only based on the
+first entry, but in case its not we need to try the reply tuple of the
+next entry until we either find an in-range mapping or we checked
+all the entries.
+
+This bug makes nat core attempt collision resolution while it might be
+able to use the mapping as-is.
+
+Fixes: 870190a9ec90 ("netfilter: nat: convert nat bysrc hash to rhashtable")
+Reported-by: Jaco Kroon <jaco@uls.co.za>
+Tested-by: Jaco Kroon <jaco@uls.co.za>
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ net/netfilter/nf_nat_core.c |   17 +++++++++--------
+ 1 file changed, 9 insertions(+), 8 deletions(-)
+
+--- a/net/netfilter/nf_nat_core.c
++++ b/net/netfilter/nf_nat_core.c
+@@ -225,20 +225,21 @@ find_appropriate_src(struct net *net,
+               .tuple = tuple,
+               .zone = zone
+       };
+-      struct rhlist_head *hl;
++      struct rhlist_head *hl, *h;
+       hl = rhltable_lookup(&nf_nat_bysource_table, &key,
+                            nf_nat_bysource_params);
+-      if (!hl)
+-              return 0;
+-      ct = container_of(hl, typeof(*ct), nat_bysource);
++      rhl_for_each_entry_rcu(ct, h, hl, nat_bysource) {
++              nf_ct_invert_tuplepr(result,
++                                   &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
++              result->dst = tuple->dst;
+-      nf_ct_invert_tuplepr(result,
+-                           &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
+-      result->dst = tuple->dst;
++              if (in_range(l3proto, l4proto, result, range))
++                      return 1;
++      }
+-      return in_range(l3proto, l4proto, result, range);
++      return 0;
+ }
+ /* For [FUTURE] fragmentation handling, we want the least-used
diff --git a/queue-4.9/nfsd-limit-end-of-page-list-when-decoding-nfsv4-write.patch b/queue-4.9/nfsd-limit-end-of-page-list-when-decoding-nfsv4-write.patch
new file mode 100644 (file)
index 0000000..d7ecca3
--- /dev/null
@@ -0,0 +1,48 @@
+From fc788f64f1f3eb31e87d4f53bcf1ab76590d5838 Mon Sep 17 00:00:00 2001
+From: Chuck Lever <chuck.lever@oracle.com>
+Date: Fri, 18 Aug 2017 11:12:19 -0400
+Subject: nfsd: Limit end of page list when decoding NFSv4 WRITE
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+commit fc788f64f1f3eb31e87d4f53bcf1ab76590d5838 upstream.
+
+When processing an NFSv4 WRITE operation, argp->end should never
+point past the end of the data in the final page of the page list.
+Otherwise, nfsd4_decode_compound can walk into uninitialized memory.
+
+More critical, nfsd4_decode_write is failing to increment argp->pagelen
+when it increments argp->pagelist.  This can cause later xdr decoders
+to assume more data is available than really is, which can cause server
+crashes on malformed requests.
+
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Signed-off-by: J. Bruce Fields <bfields@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/nfsd/nfs4xdr.c |    6 ++----
+ 1 file changed, 2 insertions(+), 4 deletions(-)
+
+--- a/fs/nfsd/nfs4xdr.c
++++ b/fs/nfsd/nfs4xdr.c
+@@ -129,7 +129,7 @@ static void next_decode_page(struct nfsd
+       argp->p = page_address(argp->pagelist[0]);
+       argp->pagelist++;
+       if (argp->pagelen < PAGE_SIZE) {
+-              argp->end = argp->p + (argp->pagelen>>2);
++              argp->end = argp->p + XDR_QUADLEN(argp->pagelen);
+               argp->pagelen = 0;
+       } else {
+               argp->end = argp->p + (PAGE_SIZE>>2);
+@@ -1246,9 +1246,7 @@ nfsd4_decode_write(struct nfsd4_compound
+               argp->pagelen -= pages * PAGE_SIZE;
+               len -= pages * PAGE_SIZE;
+-              argp->p = (__be32 *)page_address(argp->pagelist[0]);
+-              argp->pagelist++;
+-              argp->end = argp->p + XDR_QUADLEN(PAGE_SIZE);
++              next_decode_page(argp);
+       }
+       argp->p += XDR_QUADLEN(len);
diff --git a/queue-4.9/perf-core-fix-group-cpu-task-validation.patch b/queue-4.9/perf-core-fix-group-cpu-task-validation.patch
new file mode 100644 (file)
index 0000000..5021496
--- /dev/null
@@ -0,0 +1,182 @@
+From 64aee2a965cf2954a038b5522f11d2cd2f0f8f3e Mon Sep 17 00:00:00 2001
+From: Mark Rutland <mark.rutland@arm.com>
+Date: Thu, 22 Jun 2017 15:41:38 +0100
+Subject: perf/core: Fix group {cpu,task} validation
+
+From: Mark Rutland <mark.rutland@arm.com>
+
+commit 64aee2a965cf2954a038b5522f11d2cd2f0f8f3e upstream.
+
+Regardless of which events form a group, it does not make sense for the
+events to target different tasks and/or CPUs, as this leaves the group
+inconsistent and impossible to schedule. The core perf code assumes that
+these are consistent across (successfully intialised) groups.
+
+Core perf code only verifies this when moving SW events into a HW
+context. Thus, we can violate this requirement for pure SW groups and
+pure HW groups, unless the relevant PMU driver happens to perform this
+verification itself. These mismatched groups subsequently wreak havoc
+elsewhere.
+
+For example, we handle watchpoints as SW events, and reserve watchpoint
+HW on a per-CPU basis at pmu::event_init() time to ensure that any event
+that is initialised is guaranteed to have a slot at pmu::add() time.
+However, the core code only checks the group leader's cpu filter (via
+event_filter_match()), and can thus install follower events onto CPUs
+violating thier (mismatched) CPU filters, potentially installing them
+into a CPU without sufficient reserved slots.
+
+This can be triggered with the below test case, resulting in warnings
+from arch backends.
+
+  #define _GNU_SOURCE
+  #include <linux/hw_breakpoint.h>
+  #include <linux/perf_event.h>
+  #include <sched.h>
+  #include <stdio.h>
+  #include <sys/prctl.h>
+  #include <sys/syscall.h>
+  #include <unistd.h>
+
+  static int perf_event_open(struct perf_event_attr *attr, pid_t pid, int cpu,
+                          int group_fd, unsigned long flags)
+  {
+       return syscall(__NR_perf_event_open, attr, pid, cpu, group_fd, flags);
+  }
+
+  char watched_char;
+
+  struct perf_event_attr wp_attr = {
+       .type = PERF_TYPE_BREAKPOINT,
+       .bp_type = HW_BREAKPOINT_RW,
+       .bp_addr = (unsigned long)&watched_char,
+       .bp_len = 1,
+       .size = sizeof(wp_attr),
+  };
+
+  int main(int argc, char *argv[])
+  {
+       int leader, ret;
+       cpu_set_t cpus;
+
+       /*
+        * Force use of CPU0 to ensure our CPU0-bound events get scheduled.
+        */
+       CPU_ZERO(&cpus);
+       CPU_SET(0, &cpus);
+       ret = sched_setaffinity(0, sizeof(cpus), &cpus);
+       if (ret) {
+               printf("Unable to set cpu affinity\n");
+               return 1;
+       }
+
+       /* open leader event, bound to this task, CPU0 only */
+       leader = perf_event_open(&wp_attr, 0, 0, -1, 0);
+       if (leader < 0) {
+               printf("Couldn't open leader: %d\n", leader);
+               return 1;
+       }
+
+       /*
+        * Open a follower event that is bound to the same task, but a
+        * different CPU. This means that the group should never be possible to
+        * schedule.
+        */
+       ret = perf_event_open(&wp_attr, 0, 1, leader, 0);
+       if (ret < 0) {
+               printf("Couldn't open mismatched follower: %d\n", ret);
+               return 1;
+       } else {
+               printf("Opened leader/follower with mismastched CPUs\n");
+       }
+
+       /*
+        * Open as many independent events as we can, all bound to the same
+        * task, CPU0 only.
+        */
+       do {
+               ret = perf_event_open(&wp_attr, 0, 0, -1, 0);
+       } while (ret >= 0);
+
+       /*
+        * Force enable/disble all events to trigger the erronoeous
+        * installation of the follower event.
+        */
+       printf("Opened all events. Toggling..\n");
+       for (;;) {
+               prctl(PR_TASK_PERF_EVENTS_DISABLE, 0, 0, 0, 0);
+               prctl(PR_TASK_PERF_EVENTS_ENABLE, 0, 0, 0, 0);
+       }
+
+       return 0;
+  }
+
+Fix this by validating this requirement regardless of whether we're
+moving events.
+
+Signed-off-by: Mark Rutland <mark.rutland@arm.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
+Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: Zhou Chengming <zhouchengming1@huawei.com>
+Link: http://lkml.kernel.org/r/1498142498-15758-1-git-send-email-mark.rutland@arm.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/events/core.c |   39 +++++++++++++++++++--------------------
+ 1 file changed, 19 insertions(+), 20 deletions(-)
+
+--- a/kernel/events/core.c
++++ b/kernel/events/core.c
+@@ -9786,28 +9786,27 @@ SYSCALL_DEFINE5(perf_event_open,
+                       goto err_context;
+               /*
+-               * Do not allow to attach to a group in a different
+-               * task or CPU context:
++               * Make sure we're both events for the same CPU;
++               * grouping events for different CPUs is broken; since
++               * you can never concurrently schedule them anyhow.
+                */
+-              if (move_group) {
+-                      /*
+-                       * Make sure we're both on the same task, or both
+-                       * per-cpu events.
+-                       */
+-                      if (group_leader->ctx->task != ctx->task)
+-                              goto err_context;
++              if (group_leader->cpu != event->cpu)
++                      goto err_context;
++
++              /*
++               * Make sure we're both on the same task, or both
++               * per-CPU events.
++               */
++              if (group_leader->ctx->task != ctx->task)
++                      goto err_context;
+-                      /*
+-                       * Make sure we're both events for the same CPU;
+-                       * grouping events for different CPUs is broken; since
+-                       * you can never concurrently schedule them anyhow.
+-                       */
+-                      if (group_leader->cpu != event->cpu)
+-                              goto err_context;
+-              } else {
+-                      if (group_leader->ctx != ctx)
+-                              goto err_context;
+-              }
++              /*
++               * Do not allow to attach to a group in a different task
++               * or CPU context. If we're moving SW events, we'll fix
++               * this up later, so allow that.
++               */
++              if (!move_group && group_leader->ctx != ctx)
++                      goto err_context;
+               /*
+                * Only a group leader can be exclusive or pinned
diff --git a/queue-4.9/perf-probe-fix-funcs-to-show-correct-symbols-for-offline-module.patch b/queue-4.9/perf-probe-fix-funcs-to-show-correct-symbols-for-offline-module.patch
new file mode 100644 (file)
index 0000000..1d49ba4
--- /dev/null
@@ -0,0 +1,107 @@
+From eebc509b20881b92d62e317b2c073e57c5f200f0 Mon Sep 17 00:00:00 2001
+From: Masami Hiramatsu <mhiramat@kernel.org>
+Date: Wed, 4 Jan 2017 12:29:05 +0900
+Subject: perf probe: Fix --funcs to show correct symbols for offline module
+
+From: Masami Hiramatsu <mhiramat@kernel.org>
+
+commit eebc509b20881b92d62e317b2c073e57c5f200f0 upstream.
+
+Fix --funcs (-F) option to show correct symbols for offline module.
+Since previous perf-probe uses machine__findnew_module_map() for offline
+module, even if user passes a module file (with full path) which is for
+other architecture, perf-probe always tries to load symbol map for
+current kernel module.
+
+This fix uses dso__new_map() to load the map from given binary as same
+as a map for user applications.
+
+Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Jiri Olsa <jolsa@redhat.com>
+Cc: Namhyung Kim <namhyung@kernel.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Link: http://lkml.kernel.org/r/148350053478.19001.15435255244512631545.stgit@devbox
+Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
+Cc: Krister Johansen <kjlx@templeofstupid.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ tools/perf/util/probe-event.c |   25 ++++++-------------------
+ 1 file changed, 6 insertions(+), 19 deletions(-)
+
+--- a/tools/perf/util/probe-event.c
++++ b/tools/perf/util/probe-event.c
+@@ -163,7 +163,7 @@ static struct map *kernel_get_module_map
+       /* A file path -- this is an offline module */
+       if (module && strchr(module, '/'))
+-              return machine__findnew_module_map(host_machine, 0, module);
++              return dso__new_map(module);
+       if (!module)
+               module = "kernel";
+@@ -173,6 +173,7 @@ static struct map *kernel_get_module_map
+               if (strncmp(pos->dso->short_name + 1, module,
+                           pos->dso->short_name_len - 2) == 0 &&
+                   module[pos->dso->short_name_len - 2] == '\0') {
++                      map__get(pos);
+                       return pos;
+               }
+       }
+@@ -188,15 +189,6 @@ struct map *get_target_map(const char *t
+               return kernel_get_module_map(target);
+ }
+-static void put_target_map(struct map *map, bool user)
+-{
+-      if (map && user) {
+-              /* Only the user map needs to be released */
+-              map__put(map);
+-      }
+-}
+-
+-
+ static int convert_exec_to_group(const char *exec, char **result)
+ {
+       char *ptr1, *ptr2, *exec_copy;
+@@ -412,7 +404,7 @@ static int find_alternative_probe_point(
+       }
+ out:
+-      put_target_map(map, uprobes);
++      map__put(map);
+       return ret;
+ }
+@@ -2944,7 +2936,7 @@ static int find_probe_trace_events_from_
+       }
+ out:
+-      put_target_map(map, pev->uprobes);
++      map__put(map);
+       free(syms);
+       return ret;
+@@ -3437,10 +3429,7 @@ int show_available_funcs(const char *tar
+               return ret;
+       /* Get a symbol map */
+-      if (user)
+-              map = dso__new_map(target);
+-      else
+-              map = kernel_get_module_map(target);
++      map = get_target_map(target, user);
+       if (!map) {
+               pr_err("Failed to get a map for %s\n", (target) ? : "kernel");
+               return -EINVAL;
+@@ -3472,9 +3461,7 @@ int show_available_funcs(const char *tar
+         }
+ end:
+-      if (user) {
+-              map__put(map);
+-      }
++      map__put(map);
+       exit_probe_symbol_maps();
+       return ret;
diff --git a/queue-4.9/perf-x86-intel-rapl-make-package-handling-more-robust.patch b/queue-4.9/perf-x86-intel-rapl-make-package-handling-more-robust.patch
new file mode 100644 (file)
index 0000000..720cf3d
--- /dev/null
@@ -0,0 +1,178 @@
+From dd86e373e09fb16b83e8adf5c48c421a4ca76468 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Tue, 31 Jan 2017 23:58:38 +0100
+Subject: perf/x86/intel/rapl: Make package handling more robust
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+commit dd86e373e09fb16b83e8adf5c48c421a4ca76468 upstream.
+
+The package management code in RAPL relies on package mapping being
+available before a CPU is started. This changed with:
+
+  9d85eb9119f4 ("x86/smpboot: Make logical package management more robust")
+
+because the ACPI/BIOS information turned out to be unreliable, but that
+left RAPL in broken state. This was not noticed because on a regular boot
+all CPUs are online before RAPL is initialized.
+
+A possible fix would be to reintroduce the mess which allocates a package
+data structure in CPU prepare and when it turns out to already exist in
+starting throw it away later in the CPU online callback. But that's a
+horrible hack and not required at all because RAPL becomes functional for
+perf only in the CPU online callback. That's correct because user space is
+not yet informed about the CPU being onlined, so nothing caan rely on RAPL
+being available on that particular CPU.
+
+Move the allocation to the CPU online callback and simplify the hotplug
+handling. At this point the package mapping is established and correct.
+
+This also adds a missing check for available package data in the
+event_init() function.
+
+Reported-by: Yasuaki Ishimatsu <yasu.isimatu@gmail.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
+Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
+Cc: Jiri Olsa <jolsa@redhat.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Sebastian Siewior <bigeasy@linutronix.de>
+Cc: Stephane Eranian <eranian@google.com>
+Cc: Vince Weaver <vincent.weaver@maine.edu>
+Fixes: 9d85eb9119f4 ("x86/smpboot: Make logical package management more robust")
+Link: http://lkml.kernel.org/r/20170131230141.212593966@linutronix.de
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+[ jwang: backport to 4.9 fix Null pointer deref during hotplug cpu.]
+Signed-off-by: Jack Wang <jinpu.wang@profitbricks.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/events/intel/rapl.c |   58 ++++++++++++++++++-------------------------
+ include/linux/cpuhotplug.h   |    1 
+ 2 files changed, 25 insertions(+), 34 deletions(-)
+
+--- a/arch/x86/events/intel/rapl.c
++++ b/arch/x86/events/intel/rapl.c
+@@ -161,7 +161,13 @@ static u64 rapl_timer_ms;
+ static inline struct rapl_pmu *cpu_to_rapl_pmu(unsigned int cpu)
+ {
+-      return rapl_pmus->pmus[topology_logical_package_id(cpu)];
++      unsigned int pkgid = topology_logical_package_id(cpu);
++
++      /*
++       * The unsigned check also catches the '-1' return value for non
++       * existent mappings in the topology map.
++       */
++      return pkgid < rapl_pmus->maxpkg ? rapl_pmus->pmus[pkgid] : NULL;
+ }
+ static inline u64 rapl_read_counter(struct perf_event *event)
+@@ -402,6 +408,8 @@ static int rapl_pmu_event_init(struct pe
+       /* must be done before validate_group */
+       pmu = cpu_to_rapl_pmu(event->cpu);
++      if (!pmu)
++              return -EINVAL;
+       event->cpu = pmu->cpu;
+       event->pmu_private = pmu;
+       event->hw.event_base = msr;
+@@ -585,6 +593,19 @@ static int rapl_cpu_online(unsigned int
+       struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu);
+       int target;
++      if (!pmu) {
++              pmu = kzalloc_node(sizeof(*pmu), GFP_KERNEL, cpu_to_node(cpu));
++              if (!pmu)
++                      return -ENOMEM;
++
++              raw_spin_lock_init(&pmu->lock);
++              INIT_LIST_HEAD(&pmu->active_list);
++              pmu->pmu = &rapl_pmus->pmu;
++              pmu->timer_interval = ms_to_ktime(rapl_timer_ms);
++              rapl_hrtimer_init(pmu);
++
++              rapl_pmus->pmus[topology_logical_package_id(cpu)] = pmu;
++      }
+       /*
+        * Check if there is an online cpu in the package which collects rapl
+        * events already.
+@@ -598,27 +619,6 @@ static int rapl_cpu_online(unsigned int
+       return 0;
+ }
+-static int rapl_cpu_prepare(unsigned int cpu)
+-{
+-      struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu);
+-
+-      if (pmu)
+-              return 0;
+-
+-      pmu = kzalloc_node(sizeof(*pmu), GFP_KERNEL, cpu_to_node(cpu));
+-      if (!pmu)
+-              return -ENOMEM;
+-
+-      raw_spin_lock_init(&pmu->lock);
+-      INIT_LIST_HEAD(&pmu->active_list);
+-      pmu->pmu = &rapl_pmus->pmu;
+-      pmu->timer_interval = ms_to_ktime(rapl_timer_ms);
+-      pmu->cpu = -1;
+-      rapl_hrtimer_init(pmu);
+-      rapl_pmus->pmus[topology_logical_package_id(cpu)] = pmu;
+-      return 0;
+-}
+-
+ static int rapl_check_hw_unit(bool apply_quirk)
+ {
+       u64 msr_rapl_power_unit_bits;
+@@ -804,28 +804,21 @@ static int __init rapl_pmu_init(void)
+        * Install callbacks. Core will call them for each online cpu.
+        */
+-      ret = cpuhp_setup_state(CPUHP_PERF_X86_RAPL_PREP, "PERF_X86_RAPL_PREP",
+-                              rapl_cpu_prepare, NULL);
+-      if (ret)
+-              goto out;
+-
+       ret = cpuhp_setup_state(CPUHP_AP_PERF_X86_RAPL_ONLINE,
+                               "AP_PERF_X86_RAPL_ONLINE",
+                               rapl_cpu_online, rapl_cpu_offline);
+       if (ret)
+-              goto out1;
++              goto out;
+       ret = perf_pmu_register(&rapl_pmus->pmu, "power", -1);
+       if (ret)
+-              goto out2;
++              goto out1;
+       rapl_advertise();
+       return 0;
+-out2:
+-      cpuhp_remove_state(CPUHP_AP_PERF_X86_RAPL_ONLINE);
+ out1:
+-      cpuhp_remove_state(CPUHP_PERF_X86_RAPL_PREP);
++      cpuhp_remove_state(CPUHP_AP_PERF_X86_RAPL_ONLINE);
+ out:
+       pr_warn("Initialization failed (%d), disabled\n", ret);
+       cleanup_rapl_pmus();
+@@ -836,7 +829,6 @@ module_init(rapl_pmu_init);
+ static void __exit intel_rapl_exit(void)
+ {
+       cpuhp_remove_state_nocalls(CPUHP_AP_PERF_X86_RAPL_ONLINE);
+-      cpuhp_remove_state_nocalls(CPUHP_PERF_X86_RAPL_PREP);
+       perf_pmu_unregister(&rapl_pmus->pmu);
+       cleanup_rapl_pmus();
+ }
+--- a/include/linux/cpuhotplug.h
++++ b/include/linux/cpuhotplug.h
+@@ -10,7 +10,6 @@ enum cpuhp_state {
+       CPUHP_PERF_X86_PREPARE,
+       CPUHP_PERF_X86_UNCORE_PREP,
+       CPUHP_PERF_X86_AMD_UNCORE_PREP,
+-      CPUHP_PERF_X86_RAPL_PREP,
+       CPUHP_PERF_BFIN,
+       CPUHP_PERF_POWER,
+       CPUHP_PERF_SUPERH,
diff --git a/queue-4.9/revert-leds-handle-suspend-resume-in-heartbeat-trigger.patch b/queue-4.9/revert-leds-handle-suspend-resume-in-heartbeat-trigger.patch
new file mode 100644 (file)
index 0000000..f6ce3e3
--- /dev/null
@@ -0,0 +1,99 @@
+From 436c4c45b5b9562b59cedbb51b7343ab4a6dd8cc Mon Sep 17 00:00:00 2001
+From: Zhang Bo <bo.zhang@nxp.com>
+Date: Tue, 13 Jun 2017 10:39:20 +0800
+Subject: Revert "leds: handle suspend/resume in heartbeat trigger"
+
+From: Zhang Bo <bo.zhang@nxp.com>
+
+commit 436c4c45b5b9562b59cedbb51b7343ab4a6dd8cc upstream.
+
+This reverts commit 5ab92a7cb82c66bf30685583a38a18538e3807db.
+
+System cannot enter suspend mode because of heartbeat led trigger.
+In autosleep_wq, try_to_suspend function will try to enter suspend
+mode in specific period. it will get wakeup_count then call pm_notifier
+chain callback function and freeze processes.
+Heartbeat_pm_notifier is called and it call led_trigger_unregister to
+change the trigger of led device to none. It will send uevent message
+and the wakeup source count changed. As wakeup_count changed, suspend
+will abort.
+
+Fixes: 5ab92a7cb82c ("leds: handle suspend/resume in heartbeat trigger")
+Signed-off-by: Zhang Bo <bo.zhang@nxp.com>
+Acked-by: Pavel Machek <pavel@ucw.cz>
+Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
+Signed-off-by: Jacek Anaszewski <jacek.anaszewski@gmail.com>
+Cc: Geert Uytterhoeven <geert@linux-m68k.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/leds/trigger/ledtrig-heartbeat.c |   31 -------------------------------
+ 1 file changed, 31 deletions(-)
+
+--- a/drivers/leds/trigger/ledtrig-heartbeat.c
++++ b/drivers/leds/trigger/ledtrig-heartbeat.c
+@@ -19,7 +19,6 @@
+ #include <linux/sched.h>
+ #include <linux/leds.h>
+ #include <linux/reboot.h>
+-#include <linux/suspend.h>
+ #include "../leds.h"
+ static int panic_heartbeats;
+@@ -155,30 +154,6 @@ static struct led_trigger heartbeat_led_
+       .deactivate = heartbeat_trig_deactivate,
+ };
+-static int heartbeat_pm_notifier(struct notifier_block *nb,
+-                               unsigned long pm_event, void *unused)
+-{
+-      int rc;
+-
+-      switch (pm_event) {
+-      case PM_SUSPEND_PREPARE:
+-      case PM_HIBERNATION_PREPARE:
+-      case PM_RESTORE_PREPARE:
+-              led_trigger_unregister(&heartbeat_led_trigger);
+-              break;
+-      case PM_POST_SUSPEND:
+-      case PM_POST_HIBERNATION:
+-      case PM_POST_RESTORE:
+-              rc = led_trigger_register(&heartbeat_led_trigger);
+-              if (rc)
+-                      pr_err("could not re-register heartbeat trigger\n");
+-              break;
+-      default:
+-              break;
+-      }
+-      return NOTIFY_DONE;
+-}
+-
+ static int heartbeat_reboot_notifier(struct notifier_block *nb,
+                                    unsigned long code, void *unused)
+ {
+@@ -193,10 +168,6 @@ static int heartbeat_panic_notifier(stru
+       return NOTIFY_DONE;
+ }
+-static struct notifier_block heartbeat_pm_nb = {
+-      .notifier_call = heartbeat_pm_notifier,
+-};
+-
+ static struct notifier_block heartbeat_reboot_nb = {
+       .notifier_call = heartbeat_reboot_notifier,
+ };
+@@ -213,14 +184,12 @@ static int __init heartbeat_trig_init(vo
+               atomic_notifier_chain_register(&panic_notifier_list,
+                                              &heartbeat_panic_nb);
+               register_reboot_notifier(&heartbeat_reboot_nb);
+-              register_pm_notifier(&heartbeat_pm_nb);
+       }
+       return rc;
+ }
+ static void __exit heartbeat_trig_exit(void)
+ {
+-      unregister_pm_notifier(&heartbeat_pm_nb);
+       unregister_reboot_notifier(&heartbeat_reboot_nb);
+       atomic_notifier_chain_unregister(&panic_notifier_list,
+                                        &heartbeat_panic_nb);
index bde31e8d311668b374006a8c17a92108b654ed8f..49ae4f287b46c3a5971d5ed125ed3cc6e7125d12 100644 (file)
@@ -36,3 +36,28 @@ alsa-hda-add-stereo-mic-quirk-for-lenovo-g50-70-17aa-3978.patch
 alsa-firewire-fix-null-pointer-dereference-when-releasing-uninitialized-data-of-iso-resource.patch
 arcv2-pae40-explicitly-set-msb-counterpart-of-slc-region-ops-addresses.patch
 mm-shmem-fix-handling-sys-kernel-mm-transparent_hugepage-shmem_enabled.patch
+i2c-designware-fix-system-suspend.patch
+mm-madvise.c-fix-freeing-of-locked-page-with-madv_free.patch
+fork-fix-incorrect-fput-of-exe_file-causing-use-after-free.patch
+mm-memblock.c-reversed-logic-in-memblock_discard.patch
+drm-release-driver-tracking-before-making-the-object-available-again.patch
+drm-atomic-if-the-atomic-check-fails-return-its-value-first.patch
+drm-rcar-du-fix-crash-in-encoder-failure-error-path.patch
+drm-rcar-du-fix-display-timing-controller-parameter.patch
+drm-rcar-du-fix-h-v-sync-signal-polarity-configuration.patch
+tracing-call-clear_boot_tracer-at-lateinit_sync.patch
+tracing-fix-kmemleak-in-tracing_map_array_free.patch
+tracing-fix-freeing-of-filter-in-create_filter-when-set_str-is-false.patch
+kbuild-linker-script-do-not-match-c-names-unless-ld_dead_code_data_elimination-is-configured.patch
+cifs-fix-df-output-for-users-with-quota-limits.patch
+cifs-return-enametoolong-for-overlong-names-in-cifs_open-cifs_lookup.patch
+nfsd-limit-end-of-page-list-when-decoding-nfsv4-write.patch
+ftrace-check-for-null-ret_stack-on-profile-function-graph-entry-function.patch
+perf-core-fix-group-cpu-task-validation.patch
+perf-probe-fix-funcs-to-show-correct-symbols-for-offline-module.patch
+perf-x86-intel-rapl-make-package-handling-more-robust.patch
+timers-fix-excessive-granularity-of-new-timers-after-a-nohz-idle.patch
+x86-mm-fix-use-after-free-of-ldt_struct.patch
+net-sunrpc-svcsock-fix-null-pointer-exception.patch
+revert-leds-handle-suspend-resume-in-heartbeat-trigger.patch
+netfilter-nat-fix-src-map-lookup.patch
diff --git a/queue-4.9/timers-fix-excessive-granularity-of-new-timers-after-a-nohz-idle.patch b/queue-4.9/timers-fix-excessive-granularity-of-new-timers-after-a-nohz-idle.patch
new file mode 100644 (file)
index 0000000..2b38b9d
--- /dev/null
@@ -0,0 +1,206 @@
+From 2fe59f507a65dbd734b990a11ebc7488f6f87a24 Mon Sep 17 00:00:00 2001
+From: Nicholas Piggin <npiggin@gmail.com>
+Date: Tue, 22 Aug 2017 18:43:48 +1000
+Subject: timers: Fix excessive granularity of new timers after a nohz idle
+
+From: Nicholas Piggin <npiggin@gmail.com>
+
+commit 2fe59f507a65dbd734b990a11ebc7488f6f87a24 upstream.
+
+When a timer base is idle, it is forwarded when a new timer is added
+to ensure that granularity does not become excessive. When not idle,
+the timer tick is expected to increment the base.
+
+However there are several problems:
+
+- If an existing timer is modified, the base is forwarded only after
+  the index is calculated.
+
+- The base is not forwarded by add_timer_on.
+
+- There is a window after a timer is restarted from a nohz idle, after
+  it is marked not-idle and before the timer tick on this CPU, where a
+  timer may be added but the ancient base does not get forwarded.
+
+These result in excessive granularity (a 1 jiffy timeout can blow out
+to 100s of jiffies), which cause the rcu lockup detector to trigger,
+among other things.
+
+Fix this by keeping track of whether the timer base has been idle
+since it was last run or forwarded, and if so then forward it before
+adding a new timer.
+
+There is still a case where mod_timer optimises the case of a pending
+timer mod with the same expiry time, where the timer can see excessive
+granularity relative to the new, shorter interval. A comment is added,
+but it's not changed because it is an important fastpath for
+networking.
+
+This has been tested and found to fix the RCU softlockup messages.
+
+Testing was also done with tracing to measure requested versus
+achieved wakeup latencies for all non-deferrable timers in an idle
+system (with no lockup watchdogs running). Wakeup latency relative to
+absolute latency is calculated (note this suffers from round-up skew
+at low absolute times) and analysed:
+
+             max     avg      std
+upstream   506.0    1.20     4.68
+patched      2.0    1.08     0.15
+
+The bug was noticed due to the lockup detector Kconfig changes
+dropping it out of people's .configs and resulting in larger base
+clk skew When the lockup detectors are enabled, no CPU can go idle for
+longer than 4 seconds, which limits the granularity errors.
+Sub-optimal timer behaviour is observable on a smaller scale in that
+case:
+
+            max     avg      std
+upstream     9.0    1.05     0.19
+patched      2.0    1.04     0.11
+
+Fixes: Fixes: a683f390b93f ("timers: Forward the wheel clock whenever possible")
+Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Tested-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
+Tested-by: David Miller <davem@davemloft.net>
+Cc: dzickus@redhat.com
+Cc: sfr@canb.auug.org.au
+Cc: mpe@ellerman.id.au
+Cc: Stephen Boyd <sboyd@codeaurora.org>
+Cc: linuxarm@huawei.com
+Cc: abdhalee@linux.vnet.ibm.com
+Cc: John Stultz <john.stultz@linaro.org>
+Cc: akpm@linux-foundation.org
+Cc: paulmck@linux.vnet.ibm.com
+Cc: torvalds@linux-foundation.org
+Link: http://lkml.kernel.org/r/20170822084348.21436-1-npiggin@gmail.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/time/timer.c |   50 +++++++++++++++++++++++++++++++++++++++++---------
+ 1 file changed, 41 insertions(+), 9 deletions(-)
+
+--- a/kernel/time/timer.c
++++ b/kernel/time/timer.c
+@@ -201,6 +201,7 @@ struct timer_base {
+       bool                    migration_enabled;
+       bool                    nohz_active;
+       bool                    is_idle;
++      bool                    must_forward_clk;
+       DECLARE_BITMAP(pending_map, WHEEL_SIZE);
+       struct hlist_head       vectors[WHEEL_SIZE];
+ } ____cacheline_aligned;
+@@ -891,13 +892,19 @@ get_target_base(struct timer_base *base,
+ static inline void forward_timer_base(struct timer_base *base)
+ {
+-      unsigned long jnow = READ_ONCE(jiffies);
++      unsigned long jnow;
+       /*
+-       * We only forward the base when it's idle and we have a delta between
+-       * base clock and jiffies.
++       * We only forward the base when we are idle or have just come out of
++       * idle (must_forward_clk logic), and have a delta between base clock
++       * and jiffies. In the common case, run_timers will take care of it.
+        */
+-      if (!base->is_idle || (long) (jnow - base->clk) < 2)
++      if (likely(!base->must_forward_clk))
++              return;
++
++      jnow = READ_ONCE(jiffies);
++      base->must_forward_clk = base->is_idle;
++      if ((long)(jnow - base->clk) < 2)
+               return;
+       /*
+@@ -973,6 +980,11 @@ __mod_timer(struct timer_list *timer, un
+        * same array bucket then just return:
+        */
+       if (timer_pending(timer)) {
++              /*
++               * The downside of this optimization is that it can result in
++               * larger granularity than you would get from adding a new
++               * timer with this expiry.
++               */
+               if (timer->expires == expires)
+                       return 1;
+@@ -983,6 +995,7 @@ __mod_timer(struct timer_list *timer, un
+                * dequeue/enqueue dance.
+                */
+               base = lock_timer_base(timer, &flags);
++              forward_timer_base(base);
+               clk = base->clk;
+               idx = calc_wheel_index(expires, clk);
+@@ -999,6 +1012,7 @@ __mod_timer(struct timer_list *timer, un
+               }
+       } else {
+               base = lock_timer_base(timer, &flags);
++              forward_timer_base(base);
+       }
+       timer_stats_timer_set_start_info(timer);
+@@ -1028,12 +1042,10 @@ __mod_timer(struct timer_list *timer, un
+                       spin_lock(&base->lock);
+                       WRITE_ONCE(timer->flags,
+                                  (timer->flags & ~TIMER_BASEMASK) | base->cpu);
++                      forward_timer_base(base);
+               }
+       }
+-      /* Try to forward a stale timer base clock */
+-      forward_timer_base(base);
+-
+       timer->expires = expires;
+       /*
+        * If 'idx' was calculated above and the base time did not advance
+@@ -1150,6 +1162,7 @@ void add_timer_on(struct timer_list *tim
+               WRITE_ONCE(timer->flags,
+                          (timer->flags & ~TIMER_BASEMASK) | cpu);
+       }
++      forward_timer_base(base);
+       debug_activate(timer, timer->expires);
+       internal_add_timer(base, timer);
+@@ -1538,10 +1551,16 @@ u64 get_next_timer_interrupt(unsigned lo
+               if (!is_max_delta)
+                       expires = basem + (u64)(nextevt - basej) * TICK_NSEC;
+               /*
+-               * If we expect to sleep more than a tick, mark the base idle:
++               * If we expect to sleep more than a tick, mark the base idle.
++               * Also the tick is stopped so any added timer must forward
++               * the base clk itself to keep granularity small. This idle
++               * logic is only maintained for the BASE_STD base, deferrable
++               * timers may still see large granularity skew (by design).
+                */
+-              if ((expires - basem) > TICK_NSEC)
++              if ((expires - basem) > TICK_NSEC) {
++                      base->must_forward_clk = true;
+                       base->is_idle = true;
++              }
+       }
+       spin_unlock(&base->lock);
+@@ -1651,6 +1670,19 @@ static __latent_entropy void run_timer_s
+ {
+       struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]);
++      /*
++       * must_forward_clk must be cleared before running timers so that any
++       * timer functions that call mod_timer will not try to forward the
++       * base. idle trcking / clock forwarding logic is only used with
++       * BASE_STD timers.
++       *
++       * The deferrable base does not do idle tracking at all, so we do
++       * not forward it. This can result in very large variations in
++       * granularity for deferrable timers, but they can be deferred for
++       * long periods due to idle.
++       */
++      base->must_forward_clk = false;
++
+       __run_timers(base);
+       if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && base->nohz_active)
+               __run_timers(this_cpu_ptr(&timer_bases[BASE_DEF]));
diff --git a/queue-4.9/tracing-call-clear_boot_tracer-at-lateinit_sync.patch b/queue-4.9/tracing-call-clear_boot_tracer-at-lateinit_sync.patch
new file mode 100644 (file)
index 0000000..b6da25e
--- /dev/null
@@ -0,0 +1,39 @@
+From 4bb0f0e73c8c30917d169c4a0f1ac083690c545b Mon Sep 17 00:00:00 2001
+From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
+Date: Tue, 1 Aug 2017 12:01:52 -0400
+Subject: tracing: Call clear_boot_tracer() at lateinit_sync
+
+From: Steven Rostedt (VMware) <rostedt@goodmis.org>
+
+commit 4bb0f0e73c8c30917d169c4a0f1ac083690c545b upstream.
+
+The clear_boot_tracer function is used to reset the default_bootup_tracer
+string to prevent it from being accessed after boot, as it originally points
+to init data. But since clear_boot_tracer() is called via the
+init_lateinit() call, it races with the initcall for registering the hwlat
+tracer. If someone adds "ftrace=hwlat" to the kernel command line, depending
+on how the linker sets up the text, the saved command line may be cleared,
+and the hwlat tracer never is initialized.
+
+Simply have the clear_boot_tracer() be called by initcall_lateinit_sync() as
+that's for tasks to be called after lateinit.
+
+Link: https://bugzilla.kernel.org/show_bug.cgi?id=196551
+
+Fixes: e7c15cd8a ("tracing: Added hardware latency tracer")
+Reported-by: Zamir SUN <sztsian@gmail.com>
+Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/trace/trace.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/kernel/trace/trace.c
++++ b/kernel/trace/trace.c
+@@ -7767,4 +7767,4 @@ __init static int clear_boot_tracer(void
+ }
+ fs_initcall(tracer_init_tracefs);
+-late_initcall(clear_boot_tracer);
++late_initcall_sync(clear_boot_tracer);
diff --git a/queue-4.9/tracing-fix-freeing-of-filter-in-create_filter-when-set_str-is-false.patch b/queue-4.9/tracing-fix-freeing-of-filter-in-create_filter-when-set_str-is-false.patch
new file mode 100644 (file)
index 0000000..b317018
--- /dev/null
@@ -0,0 +1,67 @@
+From 8b0db1a5bdfcee0dbfa89607672598ae203c9045 Mon Sep 17 00:00:00 2001
+From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
+Date: Wed, 23 Aug 2017 12:46:27 -0400
+Subject: tracing: Fix freeing of filter in create_filter() when set_str is false
+
+From: Steven Rostedt (VMware) <rostedt@goodmis.org>
+
+commit 8b0db1a5bdfcee0dbfa89607672598ae203c9045 upstream.
+
+Performing the following task with kmemleak enabled:
+
+ # cd /sys/kernel/tracing/events/irq/irq_handler_entry/
+ # echo 'enable_event:kmem:kmalloc:3 if irq >' > trigger
+ # echo 'enable_event:kmem:kmalloc:3 if irq > 31' > trigger
+ # echo scan > /sys/kernel/debug/kmemleak
+ # cat /sys/kernel/debug/kmemleak
+unreferenced object 0xffff8800b9290308 (size 32):
+  comm "bash", pid 1114, jiffies 4294848451 (age 141.139s)
+  hex dump (first 32 bytes):
+    00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
+    00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
+  backtrace:
+    [<ffffffff81cef5aa>] kmemleak_alloc+0x4a/0xa0
+    [<ffffffff81357938>] kmem_cache_alloc_trace+0x158/0x290
+    [<ffffffff81261c09>] create_filter_start.constprop.28+0x99/0x940
+    [<ffffffff812639c9>] create_filter+0xa9/0x160
+    [<ffffffff81263bdc>] create_event_filter+0xc/0x10
+    [<ffffffff812655e5>] set_trigger_filter+0xe5/0x210
+    [<ffffffff812660c4>] event_enable_trigger_func+0x324/0x490
+    [<ffffffff812652e2>] event_trigger_write+0x1a2/0x260
+    [<ffffffff8138cf87>] __vfs_write+0xd7/0x380
+    [<ffffffff8138f421>] vfs_write+0x101/0x260
+    [<ffffffff8139187b>] SyS_write+0xab/0x130
+    [<ffffffff81cfd501>] entry_SYSCALL_64_fastpath+0x1f/0xbe
+    [<ffffffffffffffff>] 0xffffffffffffffff
+
+The function create_filter() is passed a 'filterp' pointer that gets
+allocated, and if "set_str" is true, it is up to the caller to free it, even
+on error. The problem is that the pointer is not freed by create_filter()
+when set_str is false. This is a bug, and it is not up to the caller to free
+the filter on error if it doesn't care about the string.
+
+Link: http://lkml.kernel.org/r/1502705898-27571-2-git-send-email-chuhu@redhat.com
+
+Fixes: 38b78eb85 ("tracing: Factorize filter creation")
+Reported-by: Chunyu Hu <chuhu@redhat.com>
+Tested-by: Chunyu Hu <chuhu@redhat.com>
+Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/trace/trace_events_filter.c |    4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/kernel/trace/trace_events_filter.c
++++ b/kernel/trace/trace_events_filter.c
+@@ -1926,6 +1926,10 @@ static int create_filter(struct trace_ev
+               if (err && set_str)
+                       append_filter_err(ps, filter);
+       }
++      if (err && !set_str) {
++              free_event_filter(filter);
++              filter = NULL;
++      }
+       create_filter_finish(ps);
+       *filterp = filter;
diff --git a/queue-4.9/tracing-fix-kmemleak-in-tracing_map_array_free.patch b/queue-4.9/tracing-fix-kmemleak-in-tracing_map_array_free.patch
new file mode 100644 (file)
index 0000000..6aee43e
--- /dev/null
@@ -0,0 +1,88 @@
+From 475bb3c69ab05df2a6ecef6acc2393703d134180 Mon Sep 17 00:00:00 2001
+From: Chunyu Hu <chuhu@redhat.com>
+Date: Mon, 14 Aug 2017 18:18:17 +0800
+Subject: tracing: Fix kmemleak in tracing_map_array_free()
+
+From: Chunyu Hu <chuhu@redhat.com>
+
+commit 475bb3c69ab05df2a6ecef6acc2393703d134180 upstream.
+
+kmemleak reported the below leak when I was doing clear of the hist
+trigger. With this patch, the kmeamleak is gone.
+
+unreferenced object 0xffff94322b63d760 (size 32):
+  comm "bash", pid 1522, jiffies 4403687962 (age 2442.311s)
+  hex dump (first 32 bytes):
+    00 01 00 00 04 00 00 00 08 00 00 00 ff 00 00 00  ................
+    10 00 00 00 00 00 00 00 80 a8 7a f2 31 94 ff ff  ..........z.1...
+  backtrace:
+    [<ffffffff9e96c27a>] kmemleak_alloc+0x4a/0xa0
+    [<ffffffff9e424cba>] kmem_cache_alloc_trace+0xca/0x1d0
+    [<ffffffff9e377736>] tracing_map_array_alloc+0x26/0x140
+    [<ffffffff9e261be0>] kretprobe_trampoline+0x0/0x50
+    [<ffffffff9e38b935>] create_hist_data+0x535/0x750
+    [<ffffffff9e38bd47>] event_hist_trigger_func+0x1f7/0x420
+    [<ffffffff9e38893d>] event_trigger_write+0xfd/0x1a0
+    [<ffffffff9e44dfc7>] __vfs_write+0x37/0x170
+    [<ffffffff9e44f552>] vfs_write+0xb2/0x1b0
+    [<ffffffff9e450b85>] SyS_write+0x55/0xc0
+    [<ffffffff9e203857>] do_syscall_64+0x67/0x150
+    [<ffffffff9e977ce7>] return_from_SYSCALL_64+0x0/0x6a
+    [<ffffffffffffffff>] 0xffffffffffffffff
+unreferenced object 0xffff9431f27aa880 (size 128):
+  comm "bash", pid 1522, jiffies 4403687962 (age 2442.311s)
+  hex dump (first 32 bytes):
+    00 00 8c 2a 32 94 ff ff 00 f0 8b 2a 32 94 ff ff  ...*2......*2...
+    00 e0 8b 2a 32 94 ff ff 00 d0 8b 2a 32 94 ff ff  ...*2......*2...
+  backtrace:
+    [<ffffffff9e96c27a>] kmemleak_alloc+0x4a/0xa0
+    [<ffffffff9e425348>] __kmalloc+0xe8/0x220
+    [<ffffffff9e3777c1>] tracing_map_array_alloc+0xb1/0x140
+    [<ffffffff9e261be0>] kretprobe_trampoline+0x0/0x50
+    [<ffffffff9e38b935>] create_hist_data+0x535/0x750
+    [<ffffffff9e38bd47>] event_hist_trigger_func+0x1f7/0x420
+    [<ffffffff9e38893d>] event_trigger_write+0xfd/0x1a0
+    [<ffffffff9e44dfc7>] __vfs_write+0x37/0x170
+    [<ffffffff9e44f552>] vfs_write+0xb2/0x1b0
+    [<ffffffff9e450b85>] SyS_write+0x55/0xc0
+    [<ffffffff9e203857>] do_syscall_64+0x67/0x150
+    [<ffffffff9e977ce7>] return_from_SYSCALL_64+0x0/0x6a
+    [<ffffffffffffffff>] 0xffffffffffffffff
+
+Link: http://lkml.kernel.org/r/1502705898-27571-1-git-send-email-chuhu@redhat.com
+
+Fixes: 08d43a5fa063 ("tracing: Add lock-free tracing_map")
+Signed-off-by: Chunyu Hu <chuhu@redhat.com>
+Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/trace/tracing_map.c |   11 +++++++----
+ 1 file changed, 7 insertions(+), 4 deletions(-)
+
+--- a/kernel/trace/tracing_map.c
++++ b/kernel/trace/tracing_map.c
+@@ -221,16 +221,19 @@ void tracing_map_array_free(struct traci
+       if (!a)
+               return;
+-      if (!a->pages) {
+-              kfree(a);
+-              return;
+-      }
++      if (!a->pages)
++              goto free;
+       for (i = 0; i < a->n_pages; i++) {
+               if (!a->pages[i])
+                       break;
+               free_page((unsigned long)a->pages[i]);
+       }
++
++      kfree(a->pages);
++
++ free:
++      kfree(a);
+ }
+ struct tracing_map_array *tracing_map_array_alloc(unsigned int n_elts,
diff --git a/queue-4.9/x86-mm-fix-use-after-free-of-ldt_struct.patch b/queue-4.9/x86-mm-fix-use-after-free-of-ldt_struct.patch
new file mode 100644 (file)
index 0000000..692fe9e
--- /dev/null
@@ -0,0 +1,173 @@
+From ccd5b3235180eef3cfec337df1c8554ab151b5cc Mon Sep 17 00:00:00 2001
+From: Eric Biggers <ebiggers@google.com>
+Date: Thu, 24 Aug 2017 10:50:29 -0700
+Subject: x86/mm: Fix use-after-free of ldt_struct
+
+From: Eric Biggers <ebiggers@google.com>
+
+commit ccd5b3235180eef3cfec337df1c8554ab151b5cc upstream.
+
+The following commit:
+
+  39a0526fb3f7 ("x86/mm: Factor out LDT init from context init")
+
+renamed init_new_context() to init_new_context_ldt() and added a new
+init_new_context() which calls init_new_context_ldt().  However, the
+error code of init_new_context_ldt() was ignored.  Consequently, if a
+memory allocation in alloc_ldt_struct() failed during a fork(), the
+->context.ldt of the new task remained the same as that of the old task
+(due to the memcpy() in dup_mm()).  ldt_struct's are not intended to be
+shared, so a use-after-free occurred after one task exited.
+
+Fix the bug by making init_new_context() pass through the error code of
+init_new_context_ldt().
+
+This bug was found by syzkaller, which encountered the following splat:
+
+    BUG: KASAN: use-after-free in free_ldt_struct.part.2+0x10a/0x150 arch/x86/kernel/ldt.c:116
+    Read of size 4 at addr ffff88006d2cb7c8 by task kworker/u9:0/3710
+
+    CPU: 1 PID: 3710 Comm: kworker/u9:0 Not tainted 4.13.0-rc4-next-20170811 #2
+    Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
+    Call Trace:
+     __dump_stack lib/dump_stack.c:16 [inline]
+     dump_stack+0x194/0x257 lib/dump_stack.c:52
+     print_address_description+0x73/0x250 mm/kasan/report.c:252
+     kasan_report_error mm/kasan/report.c:351 [inline]
+     kasan_report+0x24e/0x340 mm/kasan/report.c:409
+     __asan_report_load4_noabort+0x14/0x20 mm/kasan/report.c:429
+     free_ldt_struct.part.2+0x10a/0x150 arch/x86/kernel/ldt.c:116
+     free_ldt_struct arch/x86/kernel/ldt.c:173 [inline]
+     destroy_context_ldt+0x60/0x80 arch/x86/kernel/ldt.c:171
+     destroy_context arch/x86/include/asm/mmu_context.h:157 [inline]
+     __mmdrop+0xe9/0x530 kernel/fork.c:889
+     mmdrop include/linux/sched/mm.h:42 [inline]
+     exec_mmap fs/exec.c:1061 [inline]
+     flush_old_exec+0x173c/0x1ff0 fs/exec.c:1291
+     load_elf_binary+0x81f/0x4ba0 fs/binfmt_elf.c:855
+     search_binary_handler+0x142/0x6b0 fs/exec.c:1652
+     exec_binprm fs/exec.c:1694 [inline]
+     do_execveat_common.isra.33+0x1746/0x22e0 fs/exec.c:1816
+     do_execve+0x31/0x40 fs/exec.c:1860
+     call_usermodehelper_exec_async+0x457/0x8f0 kernel/umh.c:100
+     ret_from_fork+0x2a/0x40 arch/x86/entry/entry_64.S:431
+
+    Allocated by task 3700:
+     save_stack_trace+0x16/0x20 arch/x86/kernel/stacktrace.c:59
+     save_stack+0x43/0xd0 mm/kasan/kasan.c:447
+     set_track mm/kasan/kasan.c:459 [inline]
+     kasan_kmalloc+0xad/0xe0 mm/kasan/kasan.c:551
+     kmem_cache_alloc_trace+0x136/0x750 mm/slab.c:3627
+     kmalloc include/linux/slab.h:493 [inline]
+     alloc_ldt_struct+0x52/0x140 arch/x86/kernel/ldt.c:67
+     write_ldt+0x7b7/0xab0 arch/x86/kernel/ldt.c:277
+     sys_modify_ldt+0x1ef/0x240 arch/x86/kernel/ldt.c:307
+     entry_SYSCALL_64_fastpath+0x1f/0xbe
+
+    Freed by task 3700:
+     save_stack_trace+0x16/0x20 arch/x86/kernel/stacktrace.c:59
+     save_stack+0x43/0xd0 mm/kasan/kasan.c:447
+     set_track mm/kasan/kasan.c:459 [inline]
+     kasan_slab_free+0x71/0xc0 mm/kasan/kasan.c:524
+     __cache_free mm/slab.c:3503 [inline]
+     kfree+0xca/0x250 mm/slab.c:3820
+     free_ldt_struct.part.2+0xdd/0x150 arch/x86/kernel/ldt.c:121
+     free_ldt_struct arch/x86/kernel/ldt.c:173 [inline]
+     destroy_context_ldt+0x60/0x80 arch/x86/kernel/ldt.c:171
+     destroy_context arch/x86/include/asm/mmu_context.h:157 [inline]
+     __mmdrop+0xe9/0x530 kernel/fork.c:889
+     mmdrop include/linux/sched/mm.h:42 [inline]
+     __mmput kernel/fork.c:916 [inline]
+     mmput+0x541/0x6e0 kernel/fork.c:927
+     copy_process.part.36+0x22e1/0x4af0 kernel/fork.c:1931
+     copy_process kernel/fork.c:1546 [inline]
+     _do_fork+0x1ef/0xfb0 kernel/fork.c:2025
+     SYSC_clone kernel/fork.c:2135 [inline]
+     SyS_clone+0x37/0x50 kernel/fork.c:2129
+     do_syscall_64+0x26c/0x8c0 arch/x86/entry/common.c:287
+     return_from_SYSCALL_64+0x0/0x7a
+
+Here is a C reproducer:
+
+    #include <asm/ldt.h>
+    #include <pthread.h>
+    #include <signal.h>
+    #include <stdlib.h>
+    #include <sys/syscall.h>
+    #include <sys/wait.h>
+    #include <unistd.h>
+
+    static void *fork_thread(void *_arg)
+    {
+        fork();
+    }
+
+    int main(void)
+    {
+        struct user_desc desc = { .entry_number = 8191 };
+
+        syscall(__NR_modify_ldt, 1, &desc, sizeof(desc));
+
+        for (;;) {
+            if (fork() == 0) {
+                pthread_t t;
+
+                srand(getpid());
+                pthread_create(&t, NULL, fork_thread, NULL);
+                usleep(rand() % 10000);
+                syscall(__NR_exit_group, 0);
+            }
+            wait(NULL);
+        }
+    }
+
+Note: the reproducer takes advantage of the fact that alloc_ldt_struct()
+may use vmalloc() to allocate a large ->entries array, and after
+commit:
+
+  5d17a73a2ebe ("vmalloc: back off when the current task is killed")
+
+it is possible for userspace to fail a task's vmalloc() by
+sending a fatal signal, e.g. via exit_group().  It would be more
+difficult to reproduce this bug on kernels without that commit.
+
+This bug only affected kernels with CONFIG_MODIFY_LDT_SYSCALL=y.
+
+Signed-off-by: Eric Biggers <ebiggers@google.com>
+Acked-by: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Andy Lutomirski <luto@amacapital.net>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Christoph Hellwig <hch@lst.de>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Dmitry Vyukov <dvyukov@google.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Michal Hocko <mhocko@suse.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: linux-mm@kvack.org
+Fixes: 39a0526fb3f7 ("x86/mm: Factor out LDT init from context init")
+Link: http://lkml.kernel.org/r/20170824175029.76040-1-ebiggers3@gmail.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/include/asm/mmu_context.h |    4 +---
+ 1 file changed, 1 insertion(+), 3 deletions(-)
+
+--- a/arch/x86/include/asm/mmu_context.h
++++ b/arch/x86/include/asm/mmu_context.h
+@@ -116,9 +116,7 @@ static inline int init_new_context(struc
+               mm->context.execute_only_pkey = -1;
+       }
+       #endif
+-      init_new_context_ldt(tsk, mm);
+-
+-      return 0;
++      return init_new_context_ldt(tsk, mm);
+ }
+ static inline void destroy_context(struct mm_struct *mm)
+ {