--- /dev/null
+From eeca958dce0a9231d1969f86196653eb50fcc9b3 Mon Sep 17 00:00:00 2001
+From: Luis Henriques <lhenriques@suse.com>
+Date: Fri, 28 Apr 2017 11:14:04 +0100
+Subject: ceph: fix memory leak in __ceph_setxattr()
+
+From: Luis Henriques <lhenriques@suse.com>
+
+commit eeca958dce0a9231d1969f86196653eb50fcc9b3 upstream.
+
+The ceph_inode_xattr needs to be released when removing an xattr. Easily
+reproducible running the 'generic/020' test from xfstests or simply by
+doing:
+
+ attr -s attr0 -V 0 /mnt/test && attr -r attr0 /mnt/test
+
+While there, also fix the error path.
+
+Here's the kmemleak splat:
+
+unreferenced object 0xffff88001f86fbc0 (size 64):
+ comm "attr", pid 244, jiffies 4294904246 (age 98.464s)
+ hex dump (first 32 bytes):
+ 40 fa 86 1f 00 88 ff ff 80 32 38 1f 00 88 ff ff @........28.....
+ 00 01 00 00 00 00 ad de 00 02 00 00 00 00 ad de ................
+ backtrace:
+ [<ffffffff81560199>] kmemleak_alloc+0x49/0xa0
+ [<ffffffff810f3e5b>] kmem_cache_alloc+0x9b/0xf0
+ [<ffffffff812b157e>] __ceph_setxattr+0x17e/0x820
+ [<ffffffff812b1c57>] ceph_set_xattr_handler+0x37/0x40
+ [<ffffffff8111fb4b>] __vfs_removexattr+0x4b/0x60
+ [<ffffffff8111fd37>] vfs_removexattr+0x77/0xd0
+ [<ffffffff8111fdd1>] removexattr+0x41/0x60
+ [<ffffffff8111fe65>] path_removexattr+0x75/0xa0
+ [<ffffffff81120aeb>] SyS_lremovexattr+0xb/0x10
+ [<ffffffff81564b20>] entry_SYSCALL_64_fastpath+0x13/0x94
+ [<ffffffffffffffff>] 0xffffffffffffffff
+
+Signed-off-by: Luis Henriques <lhenriques@suse.com>
+Reviewed-by: "Yan, Zheng" <zyan@redhat.com>
+Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ceph/xattr.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/fs/ceph/xattr.c
++++ b/fs/ceph/xattr.c
+@@ -392,6 +392,7 @@ static int __set_xattr(struct ceph_inode
+
+ if (update_xattr) {
+ int err = 0;
++
+ if (xattr && (flags & XATTR_CREATE))
+ err = -EEXIST;
+ else if (!xattr && (flags & XATTR_REPLACE))
+@@ -399,12 +400,14 @@ static int __set_xattr(struct ceph_inode
+ if (err) {
+ kfree(name);
+ kfree(val);
++ kfree(*newxattr);
+ return err;
+ }
+ if (update_xattr < 0) {
+ if (xattr)
+ __remove_xattr(ci, xattr);
+ kfree(name);
++ kfree(*newxattr);
+ return 0;
+ }
+ }
--- /dev/null
+From 85435d7a15294f9f7ef23469e6aaf7c5dfcc54f0 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Bj=C3=B6rn=20Jacke?= <bj@sernet.de>
+Date: Fri, 5 May 2017 04:36:16 +0200
+Subject: CIFS: add misssing SFM mapping for doublequote
+
+From: Björn Jacke <bj@sernet.de>
+
+commit 85435d7a15294f9f7ef23469e6aaf7c5dfcc54f0 upstream.
+
+SFM is mapping doublequote to 0xF020
+
+Without this patch creating files with doublequote fails to Windows/Mac
+
+Signed-off-by: Bjoern Jacke <bjacke@samba.org>
+Signed-off-by: Steve French <smfrench@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/cifs/cifs_unicode.c | 6 ++++++
+ fs/cifs/cifs_unicode.h | 1 +
+ 2 files changed, 7 insertions(+)
+
+--- a/fs/cifs/cifs_unicode.c
++++ b/fs/cifs/cifs_unicode.c
+@@ -83,6 +83,9 @@ convert_sfm_char(const __u16 src_char, c
+ case SFM_COLON:
+ *target = ':';
+ break;
++ case SFM_DOUBLEQUOTE:
++ *target = '"';
++ break;
+ case SFM_ASTERISK:
+ *target = '*';
+ break;
+@@ -418,6 +421,9 @@ static __le16 convert_to_sfm_char(char s
+ case ':':
+ dest_char = cpu_to_le16(SFM_COLON);
+ break;
++ case '"':
++ dest_char = cpu_to_le16(SFM_DOUBLEQUOTE);
++ break;
+ case '*':
+ dest_char = cpu_to_le16(SFM_ASTERISK);
+ break;
+--- a/fs/cifs/cifs_unicode.h
++++ b/fs/cifs/cifs_unicode.h
+@@ -57,6 +57,7 @@
+ * not conflict (although almost does) with the mapping above.
+ */
+
++#define SFM_DOUBLEQUOTE ((__u16) 0xF020)
+ #define SFM_ASTERISK ((__u16) 0xF021)
+ #define SFM_QUESTION ((__u16) 0xF025)
+ #define SFM_COLON ((__u16) 0xF022)
--- /dev/null
+From 6026685de33b0db5b2b6b0e9b41b3a1a3261033c Mon Sep 17 00:00:00 2001
+From: David Disseldorp <ddiss@suse.de>
+Date: Wed, 3 May 2017 17:39:08 +0200
+Subject: cifs: fix CIFS_ENUMERATE_SNAPSHOTS oops
+
+From: David Disseldorp <ddiss@suse.de>
+
+commit 6026685de33b0db5b2b6b0e9b41b3a1a3261033c upstream.
+
+As with 618763958b22, an open directory may have a NULL private_data
+pointer prior to readdir. CIFS_ENUMERATE_SNAPSHOTS must check for this
+before dereference.
+
+Fixes: 834170c85978 ("Enable previous version support")
+Signed-off-by: David Disseldorp <ddiss@suse.de>
+Signed-off-by: Steve French <smfrench@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/cifs/ioctl.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/fs/cifs/ioctl.c
++++ b/fs/cifs/ioctl.c
+@@ -213,6 +213,8 @@ long cifs_ioctl(struct file *filep, unsi
+ rc = smb_mnt_get_fsinfo(xid, tcon, (void __user *)arg);
+ break;
+ case CIFS_ENUMERATE_SNAPSHOTS:
++ if (pSMBFile == NULL)
++ break;
+ if (arg == 0) {
+ rc = -EINVAL;
+ goto cifs_ioc_exit;
--- /dev/null
+From d8a6e505d6bba2250852fbc1c1c86fe68aaf9af3 Mon Sep 17 00:00:00 2001
+From: David Disseldorp <ddiss@suse.de>
+Date: Thu, 4 May 2017 00:41:13 +0200
+Subject: cifs: fix CIFS_IOC_GET_MNT_INFO oops
+
+From: David Disseldorp <ddiss@suse.de>
+
+commit d8a6e505d6bba2250852fbc1c1c86fe68aaf9af3 upstream.
+
+An open directory may have a NULL private_data pointer prior to readdir.
+
+Fixes: 0de1f4c6f6c0 ("Add way to query server fs info for smb3")
+Signed-off-by: David Disseldorp <ddiss@suse.de>
+Signed-off-by: Steve French <smfrench@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/cifs/ioctl.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/fs/cifs/ioctl.c
++++ b/fs/cifs/ioctl.c
+@@ -209,6 +209,8 @@ long cifs_ioctl(struct file *filep, unsi
+ rc = -EOPNOTSUPP;
+ break;
+ case CIFS_IOC_GET_MNT_INFO:
++ if (pSMBFile == NULL)
++ break;
+ tcon = tlink_tcon(pSMBFile->tlink);
+ rc = smb_mnt_get_fsinfo(xid, tcon, (void __user *)arg);
+ break;
--- /dev/null
+From 0e5c795592930d51fd30d53a2e7b73cba022a29b Mon Sep 17 00:00:00 2001
+From: David Disseldorp <ddiss@suse.de>
+Date: Wed, 3 May 2017 17:39:09 +0200
+Subject: cifs: fix leak in FSCTL_ENUM_SNAPS response handling
+
+From: David Disseldorp <ddiss@suse.de>
+
+commit 0e5c795592930d51fd30d53a2e7b73cba022a29b upstream.
+
+The server may respond with success, and an output buffer less than
+sizeof(struct smb_snapshot_array) in length. Do not leak the output
+buffer in this case.
+
+Fixes: 834170c85978 ("Enable previous version support")
+Signed-off-by: David Disseldorp <ddiss@suse.de>
+Signed-off-by: Steve French <smfrench@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/cifs/smb2ops.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/fs/cifs/smb2ops.c
++++ b/fs/cifs/smb2ops.c
+@@ -942,6 +942,7 @@ smb3_enum_snapshots(const unsigned int x
+ }
+ if (snapshot_in.snapshot_array_size < sizeof(struct smb_snapshot_array)) {
+ rc = -ERANGE;
++ kfree(retbuf);
+ return rc;
+ }
+
--- /dev/null
+From b704e70b7cf48f9b67c07d585168e102dfa30bb4 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Bj=C3=B6rn=20Jacke?= <bj@sernet.de>
+Date: Wed, 3 May 2017 23:47:44 +0200
+Subject: CIFS: fix mapping of SFM_SPACE and SFM_PERIOD
+
+From: Björn Jacke <bj@sernet.de>
+
+commit b704e70b7cf48f9b67c07d585168e102dfa30bb4 upstream.
+
+- trailing space maps to 0xF028
+- trailing period maps to 0xF029
+
+This fix corrects the mapping of file names which have a trailing character
+that would otherwise be illegal (period or space) but is allowed by POSIX.
+
+Signed-off-by: Bjoern Jacke <bjacke@samba.org>
+Signed-off-by: Steve French <smfrench@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/cifs/cifs_unicode.h | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/fs/cifs/cifs_unicode.h
++++ b/fs/cifs/cifs_unicode.h
+@@ -64,8 +64,8 @@
+ #define SFM_LESSTHAN ((__u16) 0xF023)
+ #define SFM_PIPE ((__u16) 0xF027)
+ #define SFM_SLASH ((__u16) 0xF026)
+-#define SFM_PERIOD ((__u16) 0xF028)
+-#define SFM_SPACE ((__u16) 0xF029)
++#define SFM_SPACE ((__u16) 0xF028)
++#define SFM_PERIOD ((__u16) 0xF029)
+
+ /*
+ * Mapping mechanism to use when one of the seven reserved characters is
--- /dev/null
+From 3998e6b87d4258a70df358296d6f1c7234012bfe Mon Sep 17 00:00:00 2001
+From: Rabin Vincent <rabinv@axis.com>
+Date: Wed, 3 May 2017 17:54:01 +0200
+Subject: CIFS: fix oplock break deadlocks
+
+From: Rabin Vincent <rabinv@axis.com>
+
+commit 3998e6b87d4258a70df358296d6f1c7234012bfe upstream.
+
+When the final cifsFileInfo_put() is called from cifsiod and an oplock
+break work is queued, lockdep complains loudly:
+
+ =============================================
+ [ INFO: possible recursive locking detected ]
+ 4.11.0+ #21 Not tainted
+ ---------------------------------------------
+ kworker/0:2/78 is trying to acquire lock:
+ ("cifsiod"){++++.+}, at: flush_work+0x215/0x350
+
+ but task is already holding lock:
+ ("cifsiod"){++++.+}, at: process_one_work+0x255/0x8e0
+
+ other info that might help us debug this:
+ Possible unsafe locking scenario:
+
+ CPU0
+ ----
+ lock("cifsiod");
+ lock("cifsiod");
+
+ *** DEADLOCK ***
+
+ May be due to missing lock nesting notation
+
+ 2 locks held by kworker/0:2/78:
+ #0: ("cifsiod"){++++.+}, at: process_one_work+0x255/0x8e0
+ #1: ((&wdata->work)){+.+...}, at: process_one_work+0x255/0x8e0
+
+ stack backtrace:
+ CPU: 0 PID: 78 Comm: kworker/0:2 Not tainted 4.11.0+ #21
+ Workqueue: cifsiod cifs_writev_complete
+ Call Trace:
+ dump_stack+0x85/0xc2
+ __lock_acquire+0x17dd/0x2260
+ ? match_held_lock+0x20/0x2b0
+ ? trace_hardirqs_off_caller+0x86/0x130
+ ? mark_lock+0xa6/0x920
+ lock_acquire+0xcc/0x260
+ ? lock_acquire+0xcc/0x260
+ ? flush_work+0x215/0x350
+ flush_work+0x236/0x350
+ ? flush_work+0x215/0x350
+ ? destroy_worker+0x170/0x170
+ __cancel_work_timer+0x17d/0x210
+ ? ___preempt_schedule+0x16/0x18
+ cancel_work_sync+0x10/0x20
+ cifsFileInfo_put+0x338/0x7f0
+ cifs_writedata_release+0x2a/0x40
+ ? cifs_writedata_release+0x2a/0x40
+ cifs_writev_complete+0x29d/0x850
+ ? preempt_count_sub+0x18/0xd0
+ process_one_work+0x304/0x8e0
+ worker_thread+0x9b/0x6a0
+ kthread+0x1b2/0x200
+ ? process_one_work+0x8e0/0x8e0
+ ? kthread_create_on_node+0x40/0x40
+ ret_from_fork+0x31/0x40
+
+This is a real warning. Since the oplock is queued on the same
+workqueue this can deadlock if there is only one worker thread active
+for the workqueue (which will be the case during memory pressure when
+the rescuer thread is handling it).
+
+Furthermore, there is at least one other kind of hang possible due to
+the oplock break handling if there is only worker. (This can be
+reproduced without introducing memory pressure by having passing 1 for
+the max_active parameter of cifsiod.) cifs_oplock_break() can wait
+indefintely in the filemap_fdatawait() while the cifs_writev_complete()
+work is blocked:
+
+ sysrq: SysRq : Show Blocked State
+ task PC stack pid father
+ kworker/0:1 D 0 16 2 0x00000000
+ Workqueue: cifsiod cifs_oplock_break
+ Call Trace:
+ __schedule+0x562/0xf40
+ ? mark_held_locks+0x4a/0xb0
+ schedule+0x57/0xe0
+ io_schedule+0x21/0x50
+ wait_on_page_bit+0x143/0x190
+ ? add_to_page_cache_lru+0x150/0x150
+ __filemap_fdatawait_range+0x134/0x190
+ ? do_writepages+0x51/0x70
+ filemap_fdatawait_range+0x14/0x30
+ filemap_fdatawait+0x3b/0x40
+ cifs_oplock_break+0x651/0x710
+ ? preempt_count_sub+0x18/0xd0
+ process_one_work+0x304/0x8e0
+ worker_thread+0x9b/0x6a0
+ kthread+0x1b2/0x200
+ ? process_one_work+0x8e0/0x8e0
+ ? kthread_create_on_node+0x40/0x40
+ ret_from_fork+0x31/0x40
+ dd D 0 683 171 0x00000000
+ Call Trace:
+ __schedule+0x562/0xf40
+ ? mark_held_locks+0x29/0xb0
+ schedule+0x57/0xe0
+ io_schedule+0x21/0x50
+ wait_on_page_bit+0x143/0x190
+ ? add_to_page_cache_lru+0x150/0x150
+ __filemap_fdatawait_range+0x134/0x190
+ ? do_writepages+0x51/0x70
+ filemap_fdatawait_range+0x14/0x30
+ filemap_fdatawait+0x3b/0x40
+ filemap_write_and_wait+0x4e/0x70
+ cifs_flush+0x6a/0xb0
+ filp_close+0x52/0xa0
+ __close_fd+0xdc/0x150
+ SyS_close+0x33/0x60
+ entry_SYSCALL_64_fastpath+0x1f/0xbe
+
+ Showing all locks held in the system:
+ 2 locks held by kworker/0:1/16:
+ #0: ("cifsiod"){.+.+.+}, at: process_one_work+0x255/0x8e0
+ #1: ((&cfile->oplock_break)){+.+.+.}, at: process_one_work+0x255/0x8e0
+
+ Showing busy workqueues and worker pools:
+ workqueue cifsiod: flags=0xc
+ pwq 0: cpus=0 node=0 flags=0x0 nice=0 active=1/1
+ in-flight: 16:cifs_oplock_break
+ delayed: cifs_writev_complete, cifs_echo_request
+ pool 0: cpus=0 node=0 flags=0x0 nice=0 hung=0s workers=3 idle: 750 3
+
+Fix these problems by creating a a new workqueue (with a rescuer) for
+the oplock break work.
+
+Signed-off-by: Rabin Vincent <rabinv@axis.com>
+Signed-off-by: Steve French <smfrench@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/cifs/cifsfs.c | 15 +++++++++++++--
+ fs/cifs/cifsglob.h | 1 +
+ fs/cifs/misc.c | 2 +-
+ fs/cifs/smb2misc.c | 5 +++--
+ 4 files changed, 18 insertions(+), 5 deletions(-)
+
+--- a/fs/cifs/cifsfs.c
++++ b/fs/cifs/cifsfs.c
+@@ -87,6 +87,7 @@ extern mempool_t *cifs_req_poolp;
+ extern mempool_t *cifs_mid_poolp;
+
+ struct workqueue_struct *cifsiod_wq;
++struct workqueue_struct *cifsoplockd_wq;
+ __u32 cifs_lock_secret;
+
+ /*
+@@ -1369,9 +1370,16 @@ init_cifs(void)
+ goto out_clean_proc;
+ }
+
++ cifsoplockd_wq = alloc_workqueue("cifsoplockd",
++ WQ_FREEZABLE|WQ_MEM_RECLAIM, 0);
++ if (!cifsoplockd_wq) {
++ rc = -ENOMEM;
++ goto out_destroy_cifsiod_wq;
++ }
++
+ rc = cifs_fscache_register();
+ if (rc)
+- goto out_destroy_wq;
++ goto out_destroy_cifsoplockd_wq;
+
+ rc = cifs_init_inodecache();
+ if (rc)
+@@ -1419,7 +1427,9 @@ out_destroy_inodecache:
+ cifs_destroy_inodecache();
+ out_unreg_fscache:
+ cifs_fscache_unregister();
+-out_destroy_wq:
++out_destroy_cifsoplockd_wq:
++ destroy_workqueue(cifsoplockd_wq);
++out_destroy_cifsiod_wq:
+ destroy_workqueue(cifsiod_wq);
+ out_clean_proc:
+ cifs_proc_clean();
+@@ -1442,6 +1452,7 @@ exit_cifs(void)
+ cifs_destroy_mids();
+ cifs_destroy_inodecache();
+ cifs_fscache_unregister();
++ destroy_workqueue(cifsoplockd_wq);
+ destroy_workqueue(cifsiod_wq);
+ cifs_proc_clean();
+ }
+--- a/fs/cifs/cifsglob.h
++++ b/fs/cifs/cifsglob.h
+@@ -1683,6 +1683,7 @@ void cifs_oplock_break(struct work_struc
+
+ extern const struct slow_work_ops cifs_oplock_break_ops;
+ extern struct workqueue_struct *cifsiod_wq;
++extern struct workqueue_struct *cifsoplockd_wq;
+ extern __u32 cifs_lock_secret;
+
+ extern mempool_t *cifs_mid_poolp;
+--- a/fs/cifs/misc.c
++++ b/fs/cifs/misc.c
+@@ -492,7 +492,7 @@ is_valid_oplock_break(char *buffer, stru
+ CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2,
+ &pCifsInode->flags);
+
+- queue_work(cifsiod_wq,
++ queue_work(cifsoplockd_wq,
+ &netfile->oplock_break);
+ netfile->oplock_break_cancelled = false;
+
+--- a/fs/cifs/smb2misc.c
++++ b/fs/cifs/smb2misc.c
+@@ -499,7 +499,7 @@ smb2_tcon_has_lease(struct cifs_tcon *tc
+ else
+ cfile->oplock_break_cancelled = true;
+
+- queue_work(cifsiod_wq, &cfile->oplock_break);
++ queue_work(cifsoplockd_wq, &cfile->oplock_break);
+ kfree(lw);
+ return true;
+ }
+@@ -643,7 +643,8 @@ smb2_is_valid_oplock_break(char *buffer,
+ CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2,
+ &cinode->flags);
+ spin_unlock(&cfile->file_info_lock);
+- queue_work(cifsiod_wq, &cfile->oplock_break);
++ queue_work(cifsoplockd_wq,
++ &cfile->oplock_break);
+
+ spin_unlock(&tcon->open_file_lock);
+ spin_unlock(&cifs_tcp_ses_lock);
--- /dev/null
+From 7d0c234fd2e1c9ca3fa032696c0c58b1b74a9e0b Mon Sep 17 00:00:00 2001
+From: Sachin Prabhu <sprabhu@redhat.com>
+Date: Wed, 26 Apr 2017 17:10:17 +0100
+Subject: Do not return number of bytes written for ioctl CIFS_IOC_COPYCHUNK_FILE
+
+From: Sachin Prabhu <sprabhu@redhat.com>
+
+commit 7d0c234fd2e1c9ca3fa032696c0c58b1b74a9e0b upstream.
+
+commit 620d8745b35d ("Introduce cifs_copy_file_range()") changes the
+behaviour of the cifs ioctl call CIFS_IOC_COPYCHUNK_FILE. In case of
+successful writes, it now returns the number of bytes written. This
+return value is treated as an error by the xfstest cifs/001. Depending
+on the errno set at that time, this may or may not result in the test
+failing.
+
+The patch fixes this by setting the return value to 0 in case of
+successful writes.
+
+Fixes: commit 620d8745b35d ("Introduce cifs_copy_file_range()")
+Reported-by: Eryu Guan <eguan@redhat.com>
+Signed-off-by: Sachin Prabhu <sprabhu@redhat.com>
+Acked-by: Pavel Shilovsky <pshilov@microsoft.com>
+Signed-off-by: Steve French <smfrench@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/cifs/ioctl.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/fs/cifs/ioctl.c
++++ b/fs/cifs/ioctl.c
+@@ -74,7 +74,8 @@ static long cifs_ioctl_copychunk(unsigne
+
+ rc = cifs_file_copychunk_range(xid, src_file.file, 0, dst_file, 0,
+ src_inode->i_size, 0);
+-
++ if (rc > 0)
++ rc = 0;
+ out_fput:
+ fdput(src_file);
+ out_drop_write:
--- /dev/null
+From 7b4cc9787fe35b3ee2dfb1c35e22eafc32e00c33 Mon Sep 17 00:00:00 2001
+From: Eric Biggers <ebiggers@google.com>
+Date: Sun, 30 Apr 2017 00:10:50 -0400
+Subject: ext4: evict inline data when writing to memory map
+
+From: Eric Biggers <ebiggers@google.com>
+
+commit 7b4cc9787fe35b3ee2dfb1c35e22eafc32e00c33 upstream.
+
+Currently the case of writing via mmap to a file with inline data is not
+handled. This is maybe a rare case since it requires a writable memory
+map of a very small file, but it is trivial to trigger with on
+inline_data filesystem, and it causes the
+'BUG_ON(ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA));' in
+ext4_writepages() to be hit:
+
+ mkfs.ext4 -O inline_data /dev/vdb
+ mount /dev/vdb /mnt
+ xfs_io -f /mnt/file \
+ -c 'pwrite 0 1' \
+ -c 'mmap -w 0 1m' \
+ -c 'mwrite 0 1' \
+ -c 'fsync'
+
+ kernel BUG at fs/ext4/inode.c:2723!
+ invalid opcode: 0000 [#1] SMP
+ CPU: 1 PID: 2532 Comm: xfs_io Not tainted 4.11.0-rc1-xfstests-00301-g071d9acf3d1f #633
+ Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-20170228_101828-anatol 04/01/2014
+ task: ffff88003d3a8040 task.stack: ffffc90000300000
+ RIP: 0010:ext4_writepages+0xc89/0xf8a
+ RSP: 0018:ffffc90000303ca0 EFLAGS: 00010283
+ RAX: 0000028410000000 RBX: ffff8800383fa3b0 RCX: ffffffff812afcdc
+ RDX: 00000a9d00000246 RSI: ffffffff81e660e0 RDI: 0000000000000246
+ RBP: ffffc90000303dc0 R08: 0000000000000002 R09: 869618e8f99b4fa5
+ R10: 00000000852287a2 R11: 00000000a03b49f4 R12: ffff88003808e698
+ R13: 0000000000000000 R14: 7fffffffffffffff R15: 7fffffffffffffff
+ FS: 00007fd3e53094c0(0000) GS:ffff88003e400000(0000) knlGS:0000000000000000
+ CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+ CR2: 00007fd3e4c51000 CR3: 000000003d554000 CR4: 00000000003406e0
+ Call Trace:
+ ? _raw_spin_unlock+0x27/0x2a
+ ? kvm_clock_read+0x1e/0x20
+ do_writepages+0x23/0x2c
+ ? do_writepages+0x23/0x2c
+ __filemap_fdatawrite_range+0x80/0x87
+ filemap_write_and_wait_range+0x67/0x8c
+ ext4_sync_file+0x20e/0x472
+ vfs_fsync_range+0x8e/0x9f
+ ? syscall_trace_enter+0x25b/0x2d0
+ vfs_fsync+0x1c/0x1e
+ do_fsync+0x31/0x4a
+ SyS_fsync+0x10/0x14
+ do_syscall_64+0x69/0x131
+ entry_SYSCALL64_slow_path+0x25/0x25
+
+We could try to be smart and keep the inline data in this case, or at
+least support delayed allocation when allocating the block, but these
+solutions would be more complicated and don't seem worthwhile given how
+rare this case seems to be. So just fix the bug by calling
+ext4_convert_inline_data() when we're asked to make a page writable, so
+that any inline data gets evicted, with the block allocated immediately.
+
+Reported-by: Nick Alcock <nick.alcock@oracle.com>
+Reviewed-by: Andreas Dilger <adilger@dilger.ca>
+Signed-off-by: Eric Biggers <ebiggers@google.com>
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ext4/inode.c | 5 +++++
+ 1 file changed, 5 insertions(+)
+
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -5874,6 +5874,11 @@ int ext4_page_mkwrite(struct vm_fault *v
+ file_update_time(vma->vm_file);
+
+ down_read(&EXT4_I(inode)->i_mmap_sem);
++
++ ret = ext4_convert_inline_data(inode);
++ if (ret)
++ goto out_ret;
++
+ /* Delalloc case is easy... */
+ if (test_opt(inode->i_sb, DELALLOC) &&
+ !ext4_should_journal_data(inode) &&
--- /dev/null
+From cd8c42968ee651b69e00f8661caff32b0086e82d Mon Sep 17 00:00:00 2001
+From: Sachin Prabhu <sprabhu@redhat.com>
+Date: Wed, 26 Apr 2017 14:05:46 +0100
+Subject: Fix match_prepath()
+
+From: Sachin Prabhu <sprabhu@redhat.com>
+
+commit cd8c42968ee651b69e00f8661caff32b0086e82d upstream.
+
+Incorrect return value for shares not using the prefix path means that
+we will never match superblocks for these shares.
+
+Fixes: commit c1d8b24d1819 ("Compare prepaths when comparing superblocks")
+Signed-off-by: Sachin Prabhu <sprabhu@redhat.com>
+Reviewed-by: Pavel Shilovsky <pshilov@microsoft.com>
+Signed-off-by: Steve French <smfrench@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/cifs/connect.c | 14 ++++++--------
+ 1 file changed, 6 insertions(+), 8 deletions(-)
+
+--- a/fs/cifs/connect.c
++++ b/fs/cifs/connect.c
+@@ -2912,16 +2912,14 @@ match_prepath(struct super_block *sb, st
+ {
+ struct cifs_sb_info *old = CIFS_SB(sb);
+ struct cifs_sb_info *new = mnt_data->cifs_sb;
++ bool old_set = old->mnt_cifs_flags & CIFS_MOUNT_USE_PREFIX_PATH;
++ bool new_set = new->mnt_cifs_flags & CIFS_MOUNT_USE_PREFIX_PATH;
+
+- if (old->mnt_cifs_flags & CIFS_MOUNT_USE_PREFIX_PATH) {
+- if (!(new->mnt_cifs_flags & CIFS_MOUNT_USE_PREFIX_PATH))
+- return 0;
+- /* The prepath should be null terminated strings */
+- if (strcmp(new->prepath, old->prepath))
+- return 0;
+-
++ if (old_set && new_set && !strcmp(new->prepath, old->prepath))
++ return 1;
++ else if (!old_set && !new_set)
+ return 1;
+- }
++
+ return 0;
+ }
+
--- /dev/null
+From a5f6a6a9c72eac38a7fadd1a038532bc8516337c Mon Sep 17 00:00:00 2001
+From: Andrey Ryabinin <aryabinin@virtuozzo.com>
+Date: Wed, 3 May 2017 14:56:02 -0700
+Subject: fs/block_dev: always invalidate cleancache in invalidate_bdev()
+
+From: Andrey Ryabinin <aryabinin@virtuozzo.com>
+
+commit a5f6a6a9c72eac38a7fadd1a038532bc8516337c upstream.
+
+invalidate_bdev() calls cleancache_invalidate_inode() iff ->nrpages != 0
+which doen't make any sense.
+
+Make sure that invalidate_bdev() always calls cleancache_invalidate_inode()
+regardless of mapping->nrpages value.
+
+Fixes: c515e1fd361c ("mm/fs: add hooks to support cleancache")
+Link: http://lkml.kernel.org/r/20170424164135.22350-3-aryabinin@virtuozzo.com
+Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Acked-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Cc: Alexander Viro <viro@zeniv.linux.org.uk>
+Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
+Cc: Jens Axboe <axboe@kernel.dk>
+Cc: Johannes Weiner <hannes@cmpxchg.org>
+Cc: Alexey Kuznetsov <kuznet@virtuozzo.com>
+Cc: Christoph Hellwig <hch@lst.de>
+Cc: Nikolay Borisov <n.borisov.lkml@gmail.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/block_dev.c | 11 +++++------
+ 1 file changed, 5 insertions(+), 6 deletions(-)
+
+--- a/fs/block_dev.c
++++ b/fs/block_dev.c
+@@ -103,12 +103,11 @@ void invalidate_bdev(struct block_device
+ {
+ struct address_space *mapping = bdev->bd_inode->i_mapping;
+
+- if (mapping->nrpages == 0)
+- return;
+-
+- invalidate_bh_lrus();
+- lru_add_drain_all(); /* make sure all lru add caches are flushed */
+- invalidate_mapping_pages(mapping, 0, -1);
++ if (mapping->nrpages) {
++ invalidate_bh_lrus();
++ lru_add_drain_all(); /* make sure all lru add caches are flushed */
++ invalidate_mapping_pages(mapping, 0, -1);
++ }
+ /* 99% of the time, we don't need to flush the cleancache on the bdev.
+ * But, for the strange corners, lets be cautious
+ */
--- /dev/null
+From 55635ba76ef91f26b418702ace5e6287eb727f6a Mon Sep 17 00:00:00 2001
+From: Andrey Ryabinin <aryabinin@virtuozzo.com>
+Date: Wed, 3 May 2017 14:55:59 -0700
+Subject: fs: fix data invalidation in the cleancache during direct IO
+
+From: Andrey Ryabinin <aryabinin@virtuozzo.com>
+
+commit 55635ba76ef91f26b418702ace5e6287eb727f6a upstream.
+
+Patch series "Properly invalidate data in the cleancache", v2.
+
+We've noticed that after direct IO write, buffered read sometimes gets
+stale data which is coming from the cleancache. The reason for this is
+that some direct write hooks call call invalidate_inode_pages2[_range]()
+conditionally iff mapping->nrpages is not zero, so we may not invalidate
+data in the cleancache.
+
+Another odd thing is that we check only for ->nrpages and don't check
+for ->nrexceptional, but invalidate_inode_pages2[_range] also
+invalidates exceptional entries as well. So we invalidate exceptional
+entries only if ->nrpages != 0? This doesn't feel right.
+
+ - Patch 1 fixes direct IO writes by removing ->nrpages check.
+ - Patch 2 fixes similar case in invalidate_bdev().
+ Note: I only fixed conditional cleancache_invalidate_inode() here.
+ Do we also need to add ->nrexceptional check in into invalidate_bdev()?
+
+ - Patches 3-4: some optimizations.
+
+This patch (of 4):
+
+Some direct IO write fs hooks call invalidate_inode_pages2[_range]()
+conditionally iff mapping->nrpages is not zero. This can't be right,
+because invalidate_inode_pages2[_range]() also invalidate data in the
+cleancache via cleancache_invalidate_inode() call. So if page cache is
+empty but there is some data in the cleancache, buffered read after
+direct IO write would get stale data from the cleancache.
+
+Also it doesn't feel right to check only for ->nrpages because
+invalidate_inode_pages2[_range] invalidates exceptional entries as well.
+
+Fix this by calling invalidate_inode_pages2[_range]() regardless of
+nrpages state.
+
+Note: nfs,cifs,9p doesn't need similar fix because the never call
+cleancache_get_page() (nor directly, nor via mpage_readpage[s]()), so
+they are not affected by this bug.
+
+Fixes: c515e1fd361c ("mm/fs: add hooks to support cleancache")
+Link: http://lkml.kernel.org/r/20170424164135.22350-2-aryabinin@virtuozzo.com
+Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Acked-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Cc: Alexander Viro <viro@zeniv.linux.org.uk>
+Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
+Cc: Jens Axboe <axboe@kernel.dk>
+Cc: Johannes Weiner <hannes@cmpxchg.org>
+Cc: Alexey Kuznetsov <kuznet@virtuozzo.com>
+Cc: Christoph Hellwig <hch@lst.de>
+Cc: Nikolay Borisov <n.borisov.lkml@gmail.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/iomap.c | 20 +++++++++-----------
+ mm/filemap.c | 26 +++++++++++---------------
+ 2 files changed, 20 insertions(+), 26 deletions(-)
+
+--- a/fs/iomap.c
++++ b/fs/iomap.c
+@@ -887,16 +887,14 @@ iomap_dio_rw(struct kiocb *iocb, struct
+ flags |= IOMAP_WRITE;
+ }
+
+- if (mapping->nrpages) {
+- ret = filemap_write_and_wait_range(mapping, start, end);
+- if (ret)
+- goto out_free_dio;
+-
+- ret = invalidate_inode_pages2_range(mapping,
+- start >> PAGE_SHIFT, end >> PAGE_SHIFT);
+- WARN_ON_ONCE(ret);
+- ret = 0;
+- }
++ ret = filemap_write_and_wait_range(mapping, start, end);
++ if (ret)
++ goto out_free_dio;
++
++ ret = invalidate_inode_pages2_range(mapping,
++ start >> PAGE_SHIFT, end >> PAGE_SHIFT);
++ WARN_ON_ONCE(ret);
++ ret = 0;
+
+ inode_dio_begin(inode);
+
+@@ -951,7 +949,7 @@ iomap_dio_rw(struct kiocb *iocb, struct
+ * one is a pretty crazy thing to do, so we don't support it 100%. If
+ * this invalidation fails, tough, the write still worked...
+ */
+- if (iov_iter_rw(iter) == WRITE && mapping->nrpages) {
++ if (iov_iter_rw(iter) == WRITE) {
+ int err = invalidate_inode_pages2_range(mapping,
+ start >> PAGE_SHIFT, end >> PAGE_SHIFT);
+ WARN_ON_ONCE(err);
+--- a/mm/filemap.c
++++ b/mm/filemap.c
+@@ -2719,18 +2719,16 @@ generic_file_direct_write(struct kiocb *
+ * about to write. We do this *before* the write so that we can return
+ * without clobbering -EIOCBQUEUED from ->direct_IO().
+ */
+- if (mapping->nrpages) {
+- written = invalidate_inode_pages2_range(mapping,
++ written = invalidate_inode_pages2_range(mapping,
+ pos >> PAGE_SHIFT, end);
+- /*
+- * If a page can not be invalidated, return 0 to fall back
+- * to buffered write.
+- */
+- if (written) {
+- if (written == -EBUSY)
+- return 0;
+- goto out;
+- }
++ /*
++ * If a page can not be invalidated, return 0 to fall back
++ * to buffered write.
++ */
++ if (written) {
++ if (written == -EBUSY)
++ return 0;
++ goto out;
+ }
+
+ data = *from;
+@@ -2744,10 +2742,8 @@ generic_file_direct_write(struct kiocb *
+ * so we don't support it 100%. If this invalidation
+ * fails, tough, the write still worked...
+ */
+- if (mapping->nrpages) {
+- invalidate_inode_pages2_range(mapping,
+- pos >> PAGE_SHIFT, end);
+- }
++ invalidate_inode_pages2_range(mapping,
++ pos >> PAGE_SHIFT, end);
+
+ if (written > 0) {
+ pos += written;
--- /dev/null
+From 81be3dee96346fbe08c31be5ef74f03f6b63cf68 Mon Sep 17 00:00:00 2001
+From: Michal Hocko <mhocko@suse.com>
+Date: Mon, 8 May 2017 15:57:24 -0700
+Subject: fs/xattr.c: zero out memory copied to userspace in getxattr
+
+From: Michal Hocko <mhocko@suse.com>
+
+commit 81be3dee96346fbe08c31be5ef74f03f6b63cf68 upstream.
+
+getxattr uses vmalloc to allocate memory if kzalloc fails. This is
+filled by vfs_getxattr and then copied to the userspace. vmalloc,
+however, doesn't zero out the memory so if the specific implementation
+of the xattr handler is sloppy we can theoretically expose a kernel
+memory. There is no real sign this is really the case but let's make
+sure this will not happen and use vzalloc instead.
+
+Fixes: 779302e67835 ("fs/xattr.c:getxattr(): improve handling of allocation failures")
+Link: http://lkml.kernel.org/r/20170306103327.2766-1-mhocko@kernel.org
+Acked-by: Kees Cook <keescook@chromium.org>
+Reported-by: Vlastimil Babka <vbabka@suse.cz>
+Signed-off-by: Michal Hocko <mhocko@suse.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/xattr.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/xattr.c
++++ b/fs/xattr.c
+@@ -530,7 +530,7 @@ getxattr(struct dentry *d, const char __
+ size = XATTR_SIZE_MAX;
+ kvalue = kzalloc(size, GFP_KERNEL | __GFP_NOWARN);
+ if (!kvalue) {
+- kvalue = vmalloc(size);
++ kvalue = vzalloc(size);
+ if (!kvalue)
+ return -ENOMEM;
+ }
--- /dev/null
+From 4be3a4fa51f432ef045546d16f25c68a1ab525b9 Mon Sep 17 00:00:00 2001
+From: Parav Pandit <parav@mellanox.com>
+Date: Sun, 19 Mar 2017 10:55:55 +0200
+Subject: IB/core: Fix kernel crash during fail to initialize device
+
+From: Parav Pandit <parav@mellanox.com>
+
+commit 4be3a4fa51f432ef045546d16f25c68a1ab525b9 upstream.
+
+This patch fixes the kernel crash that occurs during ib_dealloc_device()
+called due to provider driver fails with an error after
+ib_alloc_device() and before it can register using ib_register_device().
+
+This crashed seen in tha lab as below which can occur with any IB device
+which fails to perform its device initialization before invoking
+ib_register_device().
+
+This patch avoids touching cache and port immutable structures if device
+is not yet initialized.
+It also releases related memory when cache and port immutable data
+structure initialization fails during register_device() state.
+
+[81416.561946] BUG: unable to handle kernel NULL pointer dereference at (null)
+[81416.570340] IP: ib_cache_release_one+0x29/0x80 [ib_core]
+[81416.576222] PGD 78da66067
+[81416.576223] PUD 7f2d7c067
+[81416.579484] PMD 0
+[81416.582720]
+[81416.587242] Oops: 0000 [#1] SMP
+[81416.722395] task: ffff8807887515c0 task.stack: ffffc900062c0000
+[81416.729148] RIP: 0010:ib_cache_release_one+0x29/0x80 [ib_core]
+[81416.735793] RSP: 0018:ffffc900062c3a90 EFLAGS: 00010202
+[81416.741823] RAX: 0000000000000000 RBX: 0000000000000001 RCX: 0000000000000000
+[81416.749785] RDX: 0000000000000000 RSI: 0000000000000282 RDI: ffff880859fec000
+[81416.757757] RBP: ffffc900062c3aa0 R08: ffff8808536e5ac0 R09: ffff880859fec5b0
+[81416.765708] R10: 00000000536e5c01 R11: ffff8808536e5ac0 R12: ffff880859fec000
+[81416.773672] R13: 0000000000000000 R14: ffff8808536e5ac0 R15: ffff88084ebc0060
+[81416.781621] FS: 00007fd879fab740(0000) GS:ffff88085fac0000(0000) knlGS:0000000000000000
+[81416.790522] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+[81416.797094] CR2: 0000000000000000 CR3: 00000007eb215000 CR4: 00000000003406e0
+[81416.805051] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+[81416.812997] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+[81416.820950] Call Trace:
+[81416.824226] ib_device_release+0x1e/0x40 [ib_core]
+[81416.829858] device_release+0x32/0xa0
+[81416.834370] kobject_cleanup+0x63/0x170
+[81416.839058] kobject_put+0x25/0x50
+[81416.843319] ib_dealloc_device+0x25/0x40 [ib_core]
+[81416.848986] mlx5_ib_add+0x163/0x1990 [mlx5_ib]
+[81416.854414] mlx5_add_device+0x5a/0x160 [mlx5_core]
+[81416.860191] mlx5_register_interface+0x8d/0xc0 [mlx5_core]
+[81416.866587] ? 0xffffffffa09e9000
+[81416.870816] mlx5_ib_init+0x15/0x17 [mlx5_ib]
+[81416.876094] do_one_initcall+0x51/0x1b0
+[81416.880861] ? __vunmap+0x85/0xd0
+[81416.885113] ? kmem_cache_alloc_trace+0x14b/0x1b0
+[81416.890768] ? vfree+0x2e/0x70
+[81416.894762] do_init_module+0x60/0x1fa
+[81416.899441] load_module+0x15f6/0x1af0
+[81416.904114] ? __symbol_put+0x60/0x60
+[81416.908709] ? ima_post_read_file+0x3d/0x80
+[81416.913828] ? security_kernel_post_read_file+0x6b/0x80
+[81416.920006] SYSC_finit_module+0xa6/0xf0
+[81416.924888] SyS_finit_module+0xe/0x10
+[81416.929568] entry_SYSCALL_64_fastpath+0x1a/0xa9
+[81416.935089] RIP: 0033:0x7fd879494949
+[81416.939543] RSP: 002b:00007ffdbc1b4e58 EFLAGS: 00000202 ORIG_RAX: 0000000000000139
+[81416.947982] RAX: ffffffffffffffda RBX: 0000000001b66f00 RCX: 00007fd879494949
+[81416.955965] RDX: 0000000000000000 RSI: 000000000041a13c RDI: 0000000000000003
+[81416.963926] RBP: 0000000000000003 R08: 0000000000000000 R09: 0000000001b652a0
+[81416.971861] R10: 0000000000000003 R11: 0000000000000202 R12: 00007ffdbc1b3e70
+[81416.979763] R13: 00007ffdbc1b3e50 R14: 0000000000000005 R15: 0000000000000000
+[81417.008005] RIP: ib_cache_release_one+0x29/0x80 [ib_core] RSP: ffffc900062c3a90
+[81417.016045] CR2: 0000000000000000
+
+Fixes: 55aeed0654 ("IB/core: Make ib_alloc_device init the kobject")
+Fixes: 7738613e7c ("IB/core: Add per port immutable struct to ib_device")
+Reviewed-by: Daniel Jurgens <danielj@mellanox.com>
+Signed-off-by: Parav Pandit <parav@mellanox.com>
+Signed-off-by: Leon Romanovsky <leon@kernel.org>
+Signed-off-by: Doug Ledford <dledford@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/infiniband/core/device.c | 33 ++++++++++++++++++++++-----------
+ 1 file changed, 22 insertions(+), 11 deletions(-)
+
+--- a/drivers/infiniband/core/device.c
++++ b/drivers/infiniband/core/device.c
+@@ -172,8 +172,16 @@ static void ib_device_release(struct dev
+ {
+ struct ib_device *dev = container_of(device, struct ib_device, dev);
+
+- ib_cache_release_one(dev);
+- kfree(dev->port_immutable);
++ WARN_ON(dev->reg_state == IB_DEV_REGISTERED);
++ if (dev->reg_state == IB_DEV_UNREGISTERED) {
++ /*
++ * In IB_DEV_UNINITIALIZED state, cache or port table
++ * is not even created. Free cache and port table only when
++ * device reaches UNREGISTERED state.
++ */
++ ib_cache_release_one(dev);
++ kfree(dev->port_immutable);
++ }
+ kfree(dev);
+ }
+
+@@ -380,32 +388,27 @@ int ib_register_device(struct ib_device
+ ret = ib_cache_setup_one(device);
+ if (ret) {
+ pr_warn("Couldn't set up InfiniBand P_Key/GID cache\n");
+- goto out;
++ goto port_cleanup;
+ }
+
+ ret = ib_device_register_rdmacg(device);
+ if (ret) {
+ pr_warn("Couldn't register device with rdma cgroup\n");
+- ib_cache_cleanup_one(device);
+- goto out;
++ goto cache_cleanup;
+ }
+
+ memset(&device->attrs, 0, sizeof(device->attrs));
+ ret = device->query_device(device, &device->attrs, &uhw);
+ if (ret) {
+ pr_warn("Couldn't query the device attributes\n");
+- ib_device_unregister_rdmacg(device);
+- ib_cache_cleanup_one(device);
+- goto out;
++ goto cache_cleanup;
+ }
+
+ ret = ib_device_register_sysfs(device, port_callback);
+ if (ret) {
+ pr_warn("Couldn't register device %s with driver model\n",
+ device->name);
+- ib_device_unregister_rdmacg(device);
+- ib_cache_cleanup_one(device);
+- goto out;
++ goto cache_cleanup;
+ }
+
+ device->reg_state = IB_DEV_REGISTERED;
+@@ -417,6 +420,14 @@ int ib_register_device(struct ib_device
+ down_write(&lists_rwsem);
+ list_add_tail(&device->core_list, &device_list);
+ up_write(&lists_rwsem);
++ mutex_unlock(&device_mutex);
++ return 0;
++
++cache_cleanup:
++ ib_cache_cleanup_one(device);
++ ib_cache_release_one(device);
++port_cleanup:
++ kfree(device->port_immutable);
+ out:
+ mutex_unlock(&device_mutex);
+ return ret;
--- /dev/null
+From b312be3d87e4c80872cbea869e569175c5eb0f9a Mon Sep 17 00:00:00 2001
+From: Jack Morgenstein <jackm@dev.mellanox.co.il>
+Date: Sun, 19 Mar 2017 10:55:57 +0200
+Subject: IB/core: Fix sysfs registration error flow
+
+From: Jack Morgenstein <jackm@dev.mellanox.co.il>
+
+commit b312be3d87e4c80872cbea869e569175c5eb0f9a upstream.
+
+The kernel commit cited below restructured ib device management
+so that the device kobject is initialized in ib_alloc_device.
+
+As part of the restructuring, the kobject is now initialized in
+procedure ib_alloc_device, and is later added to the device hierarchy
+in the ib_register_device call stack, in procedure
+ib_device_register_sysfs (which calls device_add).
+
+However, in the ib_device_register_sysfs error flow, if an error
+occurs following the call to device_add, the cleanup procedure
+device_unregister is called. This call results in the device object
+being deleted -- which results in various use-after-free crashes.
+
+The correct cleanup call is device_del -- which undoes device_add
+without deleting the device object.
+
+The device object will then (correctly) be deleted in the
+ib_register_device caller's error cleanup flow, when the caller invokes
+ib_dealloc_device.
+
+Fixes: 55aeed06544f6 ("IB/core: Make ib_alloc_device init the kobject")
+Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
+Signed-off-by: Leon Romanovsky <leon@kernel.org>
+Signed-off-by: Doug Ledford <dledford@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/infiniband/core/sysfs.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/infiniband/core/sysfs.c
++++ b/drivers/infiniband/core/sysfs.c
+@@ -1301,7 +1301,7 @@ err_put:
+ free_port_list_attributes(device);
+
+ err_unregister:
+- device_unregister(class_dev);
++ device_del(class_dev);
+
+ err:
+ return ret;
--- /dev/null
+From 8561eae60ff9417a50fa1fb2b83ae950dc5c1e21 Mon Sep 17 00:00:00 2001
+From: "Michael J. Ruhl" <michael.j.ruhl@intel.com>
+Date: Sun, 9 Apr 2017 10:15:51 -0700
+Subject: IB/core: For multicast functions, verify that LIDs are multicast LIDs
+
+From: Michael J. Ruhl <michael.j.ruhl@intel.com>
+
+commit 8561eae60ff9417a50fa1fb2b83ae950dc5c1e21 upstream.
+
+The Infiniband spec defines "A multicast address is defined by a
+MGID and a MLID" (section 10.5). Currently the MLID value is not
+validated.
+
+Add check to verify that the MLID value is in the correct address
+range.
+
+Fixes: 0c33aeedb2cf ("[IB] Add checks to multicast attach and detach")
+Reviewed-by: Ira Weiny <ira.weiny@intel.com>
+Reviewed-by: Dasaratharaman Chandramouli <dasaratharaman.chandramouli@intel.com>
+Signed-off-by: Michael J. Ruhl <michael.j.ruhl@intel.com>
+Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
+Reviewed-by: Leon Romanovsky <leonro@mellanox.com>
+Signed-off-by: Doug Ledford <dledford@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/infiniband/core/verbs.c | 8 ++++++--
+ 1 file changed, 6 insertions(+), 2 deletions(-)
+
+--- a/drivers/infiniband/core/verbs.c
++++ b/drivers/infiniband/core/verbs.c
+@@ -1519,7 +1519,9 @@ int ib_attach_mcast(struct ib_qp *qp, un
+
+ if (!qp->device->attach_mcast)
+ return -ENOSYS;
+- if (gid->raw[0] != 0xff || qp->qp_type != IB_QPT_UD)
++ if (gid->raw[0] != 0xff || qp->qp_type != IB_QPT_UD ||
++ lid < be16_to_cpu(IB_MULTICAST_LID_BASE) ||
++ lid == be16_to_cpu(IB_LID_PERMISSIVE))
+ return -EINVAL;
+
+ ret = qp->device->attach_mcast(qp, gid, lid);
+@@ -1535,7 +1537,9 @@ int ib_detach_mcast(struct ib_qp *qp, un
+
+ if (!qp->device->detach_mcast)
+ return -ENOSYS;
+- if (gid->raw[0] != 0xff || qp->qp_type != IB_QPT_UD)
++ if (gid->raw[0] != 0xff || qp->qp_type != IB_QPT_UD ||
++ lid < be16_to_cpu(IB_MULTICAST_LID_BASE) ||
++ lid == be16_to_cpu(IB_LID_PERMISSIVE))
+ return -EINVAL;
+
+ ret = qp->device->detach_mcast(qp, gid, lid);
--- /dev/null
+From b6eac931b9bb2bce4db7032c35b41e5e34ec22a5 Mon Sep 17 00:00:00 2001
+From: Mike Marciniszyn <mike.marciniszyn@intel.com>
+Date: Sun, 9 Apr 2017 10:16:35 -0700
+Subject: IB/hfi1: Prevent kernel QP post send hard lockups
+
+From: Mike Marciniszyn <mike.marciniszyn@intel.com>
+
+commit b6eac931b9bb2bce4db7032c35b41e5e34ec22a5 upstream.
+
+The driver progress routines can call cond_resched() when
+a timeslice is exhausted and irqs are enabled.
+
+If the ULP had been holding a spin lock without disabling irqs and
+the post send directly called the progress routine, the cond_resched()
+could yield allowing another thread from the same ULP to deadlock
+on that same lock.
+
+Correct by replacing the current hfi1_do_send() calldown with a unique
+one for post send and adding an argument to hfi1_do_send() to indicate
+that the send engine is running in a thread. If the routine is not
+running in a thread, avoid calling cond_resched().
+
+Fixes: Commit 831464ce4b74 ("IB/hfi1: Don't call cond_resched in atomic mode when sending packets")
+Reviewed-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
+Signed-off-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
+Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
+Signed-off-by: Doug Ledford <dledford@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/infiniband/hw/hfi1/ruc.c | 26 ++++++++++++++++----------
+ drivers/infiniband/hw/hfi1/verbs.c | 4 ++--
+ drivers/infiniband/hw/hfi1/verbs.h | 6 ++++--
+ 3 files changed, 22 insertions(+), 14 deletions(-)
+
+--- a/drivers/infiniband/hw/hfi1/ruc.c
++++ b/drivers/infiniband/hw/hfi1/ruc.c
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright(c) 2015, 2016 Intel Corporation.
++ * Copyright(c) 2015 - 2017 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+@@ -784,23 +784,29 @@ void hfi1_make_ruc_header(struct rvt_qp
+ /* when sending, force a reschedule every one of these periods */
+ #define SEND_RESCHED_TIMEOUT (5 * HZ) /* 5s in jiffies */
+
++void hfi1_do_send_from_rvt(struct rvt_qp *qp)
++{
++ hfi1_do_send(qp, false);
++}
++
+ void _hfi1_do_send(struct work_struct *work)
+ {
+ struct iowait *wait = container_of(work, struct iowait, iowork);
+ struct rvt_qp *qp = iowait_to_qp(wait);
+
+- hfi1_do_send(qp);
++ hfi1_do_send(qp, true);
+ }
+
+ /**
+ * hfi1_do_send - perform a send on a QP
+ * @work: contains a pointer to the QP
++ * @in_thread: true if in a workqueue thread
+ *
+ * Process entries in the send work queue until credit or queue is
+ * exhausted. Only allow one CPU to send a packet per QP.
+ * Otherwise, two threads could send packets out of order.
+ */
+-void hfi1_do_send(struct rvt_qp *qp)
++void hfi1_do_send(struct rvt_qp *qp, bool in_thread)
+ {
+ struct hfi1_pkt_state ps;
+ struct hfi1_qp_priv *priv = qp->priv;
+@@ -868,8 +874,10 @@ void hfi1_do_send(struct rvt_qp *qp)
+ qp->s_hdrwords = 0;
+ /* allow other tasks to run */
+ if (unlikely(time_after(jiffies, timeout))) {
+- if (workqueue_congested(cpu,
+- ps.ppd->hfi1_wq)) {
++ if (!in_thread ||
++ workqueue_congested(
++ cpu,
++ ps.ppd->hfi1_wq)) {
+ spin_lock_irqsave(
+ &qp->s_lock,
+ ps.flags);
+@@ -882,11 +890,9 @@ void hfi1_do_send(struct rvt_qp *qp)
+ *ps.ppd->dd->send_schedule);
+ return;
+ }
+- if (!irqs_disabled()) {
+- cond_resched();
+- this_cpu_inc(
+- *ps.ppd->dd->send_schedule);
+- }
++ cond_resched();
++ this_cpu_inc(
++ *ps.ppd->dd->send_schedule);
+ timeout = jiffies + (timeout_int) / 8;
+ }
+ spin_lock_irqsave(&qp->s_lock, ps.flags);
+--- a/drivers/infiniband/hw/hfi1/verbs.c
++++ b/drivers/infiniband/hw/hfi1/verbs.c
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright(c) 2015, 2016 Intel Corporation.
++ * Copyright(c) 2015 - 2017 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+@@ -1751,7 +1751,7 @@ int hfi1_register_ib_device(struct hfi1_
+ dd->verbs_dev.rdi.driver_f.qp_priv_free = qp_priv_free;
+ dd->verbs_dev.rdi.driver_f.free_all_qps = free_all_qps;
+ dd->verbs_dev.rdi.driver_f.notify_qp_reset = notify_qp_reset;
+- dd->verbs_dev.rdi.driver_f.do_send = hfi1_do_send;
++ dd->verbs_dev.rdi.driver_f.do_send = hfi1_do_send_from_rvt;
+ dd->verbs_dev.rdi.driver_f.schedule_send = hfi1_schedule_send;
+ dd->verbs_dev.rdi.driver_f.schedule_send_no_lock = _hfi1_schedule_send;
+ dd->verbs_dev.rdi.driver_f.get_pmtu_from_attr = get_pmtu_from_attr;
+--- a/drivers/infiniband/hw/hfi1/verbs.h
++++ b/drivers/infiniband/hw/hfi1/verbs.h
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright(c) 2015, 2016 Intel Corporation.
++ * Copyright(c) 2015 - 2017 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+@@ -350,7 +350,9 @@ void hfi1_make_ruc_header(struct rvt_qp
+
+ void _hfi1_do_send(struct work_struct *work);
+
+-void hfi1_do_send(struct rvt_qp *qp);
++void hfi1_do_send_from_rvt(struct rvt_qp *qp);
++
++void hfi1_do_send(struct rvt_qp *qp, bool in_thread);
+
+ void hfi1_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe,
+ enum ib_wc_status status);
--- /dev/null
+From 771a52584096c45e4565e8aabb596eece9d73d61 Mon Sep 17 00:00:00 2001
+From: Shamir Rabinovitch <shamir.rabinovitch@oracle.com>
+Date: Wed, 29 Mar 2017 06:21:59 -0400
+Subject: IB/IPoIB: ibX: failed to create mcg debug file
+
+From: Shamir Rabinovitch <shamir.rabinovitch@oracle.com>
+
+commit 771a52584096c45e4565e8aabb596eece9d73d61 upstream.
+
+When udev renames the netdev devices, ipoib debugfs entries does not
+get renamed. As a result, if subsequent probe of ipoib device reuse the
+name then creating a debugfs entry for the new device would fail.
+
+Also, moved ipoib_create_debug_files and ipoib_delete_debug_files as part
+of ipoib event handling in order to avoid any race condition between these.
+
+Fixes: 1732b0ef3b3a ([IPoIB] add path record information in debugfs)
+Signed-off-by: Vijay Kumar <vijay.ac.kumar@oracle.com>
+Signed-off-by: Shamir Rabinovitch <shamir.rabinovitch@oracle.com>
+Reviewed-by: Mark Bloch <markb@mellanox.com>
+Signed-off-by: Doug Ledford <dledford@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/infiniband/ulp/ipoib/ipoib_fs.c | 3 ++
+ drivers/infiniband/ulp/ipoib/ipoib_main.c | 44 ++++++++++++++++++++++++++----
+ drivers/infiniband/ulp/ipoib/ipoib_vlan.c | 3 --
+ 3 files changed, 42 insertions(+), 8 deletions(-)
+
+--- a/drivers/infiniband/ulp/ipoib/ipoib_fs.c
++++ b/drivers/infiniband/ulp/ipoib/ipoib_fs.c
+@@ -281,8 +281,11 @@ void ipoib_delete_debug_files(struct net
+ {
+ struct ipoib_dev_priv *priv = netdev_priv(dev);
+
++ WARN_ONCE(!priv->mcg_dentry, "null mcg debug file\n");
++ WARN_ONCE(!priv->path_dentry, "null path debug file\n");
+ debugfs_remove(priv->mcg_dentry);
+ debugfs_remove(priv->path_dentry);
++ priv->mcg_dentry = priv->path_dentry = NULL;
+ }
+
+ int ipoib_register_debugfs(void)
+--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
++++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
+@@ -108,6 +108,33 @@ static struct ib_client ipoib_client = {
+ .get_net_dev_by_params = ipoib_get_net_dev_by_params,
+ };
+
++#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
++static int ipoib_netdev_event(struct notifier_block *this,
++ unsigned long event, void *ptr)
++{
++ struct netdev_notifier_info *ni = ptr;
++ struct net_device *dev = ni->dev;
++
++ if (dev->netdev_ops->ndo_open != ipoib_open)
++ return NOTIFY_DONE;
++
++ switch (event) {
++ case NETDEV_REGISTER:
++ ipoib_create_debug_files(dev);
++ break;
++ case NETDEV_CHANGENAME:
++ ipoib_delete_debug_files(dev);
++ ipoib_create_debug_files(dev);
++ break;
++ case NETDEV_UNREGISTER:
++ ipoib_delete_debug_files(dev);
++ break;
++ }
++
++ return NOTIFY_DONE;
++}
++#endif
++
+ int ipoib_open(struct net_device *dev)
+ {
+ struct ipoib_dev_priv *priv = netdev_priv(dev);
+@@ -1674,8 +1701,6 @@ void ipoib_dev_cleanup(struct net_device
+
+ ASSERT_RTNL();
+
+- ipoib_delete_debug_files(dev);
+-
+ /* Delete any child interfaces first */
+ list_for_each_entry_safe(cpriv, tcpriv, &priv->child_intfs, list) {
+ /* Stop GC on child */
+@@ -2090,8 +2115,6 @@ static struct net_device *ipoib_add_port
+ goto register_failed;
+ }
+
+- ipoib_create_debug_files(priv->dev);
+-
+ if (ipoib_cm_add_mode_attr(priv->dev))
+ goto sysfs_failed;
+ if (ipoib_add_pkey_attr(priv->dev))
+@@ -2106,7 +2129,6 @@ static struct net_device *ipoib_add_port
+ return priv->dev;
+
+ sysfs_failed:
+- ipoib_delete_debug_files(priv->dev);
+ unregister_netdev(priv->dev);
+
+ register_failed:
+@@ -2191,6 +2213,12 @@ static void ipoib_remove_one(struct ib_d
+ kfree(dev_list);
+ }
+
++#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
++static struct notifier_block ipoib_netdev_notifier = {
++ .notifier_call = ipoib_netdev_event,
++};
++#endif
++
+ static int __init ipoib_init_module(void)
+ {
+ int ret;
+@@ -2243,6 +2271,9 @@ static int __init ipoib_init_module(void
+ if (ret)
+ goto err_client;
+
++#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
++ register_netdevice_notifier(&ipoib_netdev_notifier);
++#endif
+ return 0;
+
+ err_client:
+@@ -2260,6 +2291,9 @@ err_fs:
+
+ static void __exit ipoib_cleanup_module(void)
+ {
++#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
++ unregister_netdevice_notifier(&ipoib_netdev_notifier);
++#endif
+ ipoib_netlink_fini();
+ ib_unregister_client(&ipoib_client);
+ ib_sa_unregister_client(&ipoib_sa_client);
+--- a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
++++ b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
+@@ -86,8 +86,6 @@ int __ipoib_vlan_add(struct ipoib_dev_pr
+ goto register_failed;
+ }
+
+- ipoib_create_debug_files(priv->dev);
+-
+ /* RTNL childs don't need proprietary sysfs entries */
+ if (type == IPOIB_LEGACY_CHILD) {
+ if (ipoib_cm_add_mode_attr(priv->dev))
+@@ -108,7 +106,6 @@ int __ipoib_vlan_add(struct ipoib_dev_pr
+
+ sysfs_failed:
+ result = -ENOMEM;
+- ipoib_delete_debug_files(priv->dev);
+ unregister_netdevice(priv->dev);
+
+ register_failed:
--- /dev/null
+From 99e68909d5aba1861897fe7afc3306c3c81b6de0 Mon Sep 17 00:00:00 2001
+From: Jack Morgenstein <jackm@dev.mellanox.co.il>
+Date: Tue, 21 Mar 2017 12:57:05 +0200
+Subject: IB/mlx4: Fix ib device initialization error flow
+
+From: Jack Morgenstein <jackm@dev.mellanox.co.il>
+
+commit 99e68909d5aba1861897fe7afc3306c3c81b6de0 upstream.
+
+In mlx4_ib_add, procedure mlx4_ib_alloc_eqs is called to allocate EQs.
+
+However, in the mlx4_ib_add error flow, procedure mlx4_ib_free_eqs is not
+called to free the allocated EQs.
+
+Fixes: e605b743f33d ("IB/mlx4: Increase the number of vectors (EQs) available for ULPs")
+Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
+Signed-off-by: Leon Romanovsky <leon@kernel.org>
+Signed-off-by: Doug Ledford <dledford@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/infiniband/hw/mlx4/main.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/infiniband/hw/mlx4/main.c
++++ b/drivers/infiniband/hw/mlx4/main.c
+@@ -2941,6 +2941,7 @@ err_counter:
+ mlx4_ib_delete_counters_table(ibdev, &ibdev->counters_table[i]);
+
+ err_map:
++ mlx4_ib_free_eqs(dev, ibdev);
+ iounmap(ibdev->uar_map);
+
+ err_uar:
--- /dev/null
+From fb7a91746af18b2ebf596778b38a709cdbc488d3 Mon Sep 17 00:00:00 2001
+From: Jack Morgenstein <jackm@dev.mellanox.co.il>
+Date: Tue, 21 Mar 2017 12:57:06 +0200
+Subject: IB/mlx4: Reduce SRIOV multicast cleanup warning message to debug level
+
+From: Jack Morgenstein <jackm@dev.mellanox.co.il>
+
+commit fb7a91746af18b2ebf596778b38a709cdbc488d3 upstream.
+
+A warning message during SRIOV multicast cleanup should have actually been
+a debug level message. The condition generating the warning does no harm
+and can fill the message log.
+
+In some cases, during testing, some tests were so intense as to swamp the
+message log with these warning messages, causing a stall in the console
+message log output task. This stall caused an NMI to be sent to all CPUs
+(so that they all dumped their stacks into the message log).
+Aside from the message flood causing an NMI, the tests all passed.
+
+Once the message flood which caused the NMI is removed (by reducing the
+warning message to debug level), the NMI no longer occurs.
+
+Sample message log (console log) output illustrating the flood and
+resultant NMI (snippets with comments and modified with ... instead
+of hex digits, to satisfy checkpatch.pl):
+
+ <mlx4_ib> _mlx4_ib_mcg_port_cleanup: ... WARNING: group refcount 1!!!...
+ *** About 4000 almost identical lines in less than one second ***
+ <mlx4_ib> _mlx4_ib_mcg_port_cleanup: ... WARNING: group refcount 1!!!...
+ INFO: rcu_sched detected stalls on CPUs/tasks: { 17} (...)
+ *** { 17} above indicates that CPU 17 was the one that stalled ***
+ sending NMI to all CPUs:
+ ...
+ NMI backtrace for cpu 17
+ CPU: 17 PID: 45909 Comm: kworker/17:2
+ Hardware name: HP ProLiant DL360p Gen8, BIOS P71 09/08/2013
+ Workqueue: events fb_flashcursor
+ task: ffff880478...... ti: ffff88064e...... task.ti: ffff88064e......
+ RIP: 0010:[ffffffff81......] [ffffffff81......] io_serial_in+0x15/0x20
+ RSP: 0018:ffff88064e257cb0 EFLAGS: 00000002
+ RAX: 0000000000...... RBX: ffffffff81...... RCX: 0000000000......
+ RDX: 0000000000...... RSI: 0000000000...... RDI: ffffffff81......
+ RBP: ffff88064e...... R08: ffffffff81...... R09: 0000000000......
+ R10: 0000000000...... R11: ffff88064e...... R12: 0000000000......
+ R13: 0000000000...... R14: ffffffff81...... R15: 0000000000......
+ FS: 0000000000......(0000) GS:ffff8804af......(0000) knlGS:000000000000
+ CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080......
+ CR2: 00007f2a2f...... CR3: 0000000001...... CR4: 0000000000......
+ DR0: 0000000000...... DR1: 0000000000...... DR2: 0000000000......
+ DR3: 0000000000...... DR6: 00000000ff...... DR7: 0000000000......
+ Stack:
+ ffff88064e...... ffffffff81...... ffffffff81...... 0000000000......
+ ffffffff81...... ffff88064e...... ffffffff81...... ffffffff81......
+ ffffffff81...... ffff88064e...... ffffffff81...... 0000000000......
+ Call Trace:
+[<ffffffff813d099b>] wait_for_xmitr+0x3b/0xa0
+[<ffffffff813d0b5c>] serial8250_console_putchar+0x1c/0x30
+[<ffffffff813d0b40>] ? serial8250_console_write+0x140/0x140
+[<ffffffff813cb5fa>] uart_console_write+0x3a/0x80
+[<ffffffff813d0aae>] serial8250_console_write+0xae/0x140
+[<ffffffff8107c4d1>] call_console_drivers.constprop.15+0x91/0xf0
+[<ffffffff8107d6cf>] console_unlock+0x3bf/0x400
+[<ffffffff813503cd>] fb_flashcursor+0x5d/0x140
+[<ffffffff81355c30>] ? bit_clear+0x120/0x120
+[<ffffffff8109d5fb>] process_one_work+0x17b/0x470
+[<ffffffff8109e3cb>] worker_thread+0x11b/0x400
+[<ffffffff8109e2b0>] ? rescuer_thread+0x400/0x400
+[<ffffffff810a5aef>] kthread+0xcf/0xe0
+[<ffffffff810a5a20>] ? kthread_create_on_node+0x140/0x140
+[<ffffffff81645858>] ret_from_fork+0x58/0x90
+[<ffffffff810a5a20>] ? kthread_create_on_node+0x140/0x140
+Code: 48 89 e5 d3 e6 48 63 f6 48 03 77 10 8b 06 5d c3 66 0f 1f 44 00 00 66 66 66 6
+
+As indicated in the stack trace above, the console output task got swamped.
+
+Fixes: b9c5d6a64358 ("IB/mlx4: Add multicast group (MCG) paravirtualization for SR-IOV")
+Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
+Signed-off-by: Leon Romanovsky <leon@kernel.org>
+Signed-off-by: Doug Ledford <dledford@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/infiniband/hw/mlx4/mcg.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/drivers/infiniband/hw/mlx4/mcg.c
++++ b/drivers/infiniband/hw/mlx4/mcg.c
+@@ -1102,7 +1102,8 @@ static void _mlx4_ib_mcg_port_cleanup(st
+ while ((p = rb_first(&ctx->mcg_table)) != NULL) {
+ group = rb_entry(p, struct mcast_group, node);
+ if (atomic_read(&group->refcount))
+- mcg_warn_group(group, "group refcount %d!!! (pointer %p)\n", atomic_read(&group->refcount), group);
++ mcg_debug_group(group, "group refcount %d!!! (pointer %p)\n",
++ atomic_read(&group->refcount), group);
+
+ force_clean_group(group);
+ }
--- /dev/null
+From a6a5993243550b09f620941dea741b7421fdf79c Mon Sep 17 00:00:00 2001
+From: Ding Tianhong <dingtianhong@huawei.com>
+Date: Sat, 29 Apr 2017 10:38:48 +0800
+Subject: iov_iter: don't revert iov buffer if csum error
+
+From: Ding Tianhong <dingtianhong@huawei.com>
+
+commit a6a5993243550b09f620941dea741b7421fdf79c upstream.
+
+The patch 327868212381 (make skb_copy_datagram_msg() et.al. preserve
+->msg_iter on error) will revert the iov buffer if copy to iter
+failed, but it didn't copy any datagram if the skb_checksum_complete
+error, so no need to revert any data at this place.
+
+v2: Sabrina notice that return -EFAULT when checksum error is not correct
+ here, it would confuse the caller about the return value, so fix it.
+
+Fixes: 327868212381 ("make skb_copy_datagram_msg() et.al. preserve->msg_iter on error")
+Signed-off-by: Ding Tianhong <dingtianhong@huawei.com>
+Acked-by: Al Viro <viro@zeniv.linux.org.uk>
+Signed-off-by: Wei Yongjun <weiyongjun1@huawei.com>
+Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ net/core/datagram.c | 13 +++++++------
+ 1 file changed, 7 insertions(+), 6 deletions(-)
+
+--- a/net/core/datagram.c
++++ b/net/core/datagram.c
+@@ -760,7 +760,7 @@ int skb_copy_and_csum_datagram_msg(struc
+
+ if (msg_data_left(msg) < chunk) {
+ if (__skb_checksum_complete(skb))
+- goto csum_error;
++ return -EINVAL;
+ if (skb_copy_datagram_msg(skb, hlen, msg, chunk))
+ goto fault;
+ } else {
+@@ -768,15 +768,16 @@ int skb_copy_and_csum_datagram_msg(struc
+ if (skb_copy_and_csum_datagram(skb, hlen, &msg->msg_iter,
+ chunk, &csum))
+ goto fault;
+- if (csum_fold(csum))
+- goto csum_error;
++
++ if (csum_fold(csum)) {
++ iov_iter_revert(&msg->msg_iter, chunk);
++ return -EINVAL;
++ }
++
+ if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE))
+ netdev_rx_csum_fault(skb->dev);
+ }
+ return 0;
+-csum_error:
+- iov_iter_revert(&msg->msg_iter, chunk);
+- return -EINVAL;
+ fault:
+ return -EFAULT;
+ }
--- /dev/null
+From 5052b069acf73866d00077d8bc49983c3ee903e5 Mon Sep 17 00:00:00 2001
+From: Jan Kara <jack@suse.cz>
+Date: Sat, 29 Apr 2017 21:07:30 -0400
+Subject: jbd2: fix dbench4 performance regression for 'nobarrier' mounts
+
+From: Jan Kara <jack@suse.cz>
+
+commit 5052b069acf73866d00077d8bc49983c3ee903e5 upstream.
+
+Commit b685d3d65ac7 "block: treat REQ_FUA and REQ_PREFLUSH as
+synchronous" removed REQ_SYNC flag from WRITE_FUA implementation. Since
+JBD2 strips REQ_FUA and REQ_FLUSH flags from submitted IO when the
+filesystem is mounted with nobarrier mount option, journal superblock
+writes ended up being async writes after this patch and that caused
+heavy performance regression for dbench4 benchmark with high number of
+processes. In my test setup with HP RAID array with non-volatile write
+cache and 32 GB ram, dbench4 runs with 8 processes regressed by ~25%.
+
+Fix the problem by making sure journal superblock writes are always
+treated as synchronous since they generally block progress of the
+journalling machinery and thus the whole filesystem.
+
+Fixes: b685d3d65ac791406e0dfd8779cc9b3707fea5a3
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/jbd2/journal.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/jbd2/journal.c
++++ b/fs/jbd2/journal.c
+@@ -1348,7 +1348,7 @@ static int jbd2_write_superblock(journal
+ jbd2_superblock_csum_set(journal, sb);
+ get_bh(bh);
+ bh->b_end_io = end_buffer_write_sync;
+- ret = submit_bh(REQ_OP_WRITE, write_flags, bh);
++ ret = submit_bh(REQ_OP_WRITE, write_flags | REQ_SYNC, bh);
+ wait_on_buffer(bh);
+ if (buffer_write_io_error(bh)) {
+ clear_buffer_write_io_error(bh);
--- /dev/null
+From 0c9d5b127f695818c2c5a3868c1f28ca2969e905 Mon Sep 17 00:00:00 2001
+From: NeilBrown <neilb@suse.com>
+Date: Thu, 6 Apr 2017 12:06:37 +1000
+Subject: md/raid1: avoid reusing a resync bio after error handling.
+
+From: NeilBrown <neilb@suse.com>
+
+commit 0c9d5b127f695818c2c5a3868c1f28ca2969e905 upstream.
+
+fix_sync_read_error() modifies a bio on a newly faulty
+device by setting bi_end_io to end_sync_write.
+This ensure that put_buf() will still call rdev_dec_pending()
+as required, but makes sure that subsequent code in
+fix_sync_read_error() doesn't try to read from the device.
+
+Unfortunately this interacts badly with sync_request_write()
+which assumes that any bio with bi_end_io set to non-NULL
+other than end_sync_read is safe to write to.
+
+As the device is now faulty it doesn't make sense to write.
+As the bio was recently used for a read, it is "dirty"
+and not suitable for immediate submission.
+In particular, ->bi_next might be non-NULL, which will cause
+generic_make_request() to complain.
+
+Break this interaction by refusing to write to devices
+which are marked as Faulty.
+
+Reported-and-tested-by: Michael Wang <yun.wang@profitbricks.com>
+Fixes: 2e52d449bcec ("md/raid1: add failfast handling for reads.")
+Signed-off-by: NeilBrown <neilb@suse.com>
+Signed-off-by: Shaohua Li <shli@fb.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/md/raid1.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/drivers/md/raid1.c
++++ b/drivers/md/raid1.c
+@@ -2222,6 +2222,8 @@ static void sync_request_write(struct md
+ (i == r1_bio->read_disk ||
+ !test_bit(MD_RECOVERY_SYNC, &mddev->recovery))))
+ continue;
++ if (test_bit(Faulty, &conf->mirrors[i].rdev->flags))
++ continue;
+
+ bio_set_op_attrs(wbio, REQ_OP_WRITE, 0);
+ if (test_bit(FailFast, &conf->mirrors[i].rdev->flags))
--- /dev/null
+From 62be1511b1db8066220b18b7d4da2e6b9fdc69fb Mon Sep 17 00:00:00 2001
+From: Vlastimil Babka <vbabka@suse.cz>
+Date: Mon, 8 May 2017 15:59:46 -0700
+Subject: mm: prevent potential recursive reclaim due to clearing PF_MEMALLOC
+
+From: Vlastimil Babka <vbabka@suse.cz>
+
+commit 62be1511b1db8066220b18b7d4da2e6b9fdc69fb upstream.
+
+Patch series "more robust PF_MEMALLOC handling"
+
+This series aims to unify the setting and clearing of PF_MEMALLOC, which
+prevents recursive reclaim. There are some places that clear the flag
+unconditionally from current->flags, which may result in clearing a
+pre-existing flag. This already resulted in a bug report that Patch 1
+fixes (without the new helpers, to make backporting easier). Patch 2
+introduces the new helpers, modelled after existing memalloc_noio_* and
+memalloc_nofs_* helpers, and converts mm core to use them. Patches 3
+and 4 convert non-mm code.
+
+This patch (of 4):
+
+__alloc_pages_direct_compact() sets PF_MEMALLOC to prevent deadlock
+during page migration by lock_page() (see the comment in
+__unmap_and_move()). Then it unconditionally clears the flag, which can
+clear a pre-existing PF_MEMALLOC flag and result in recursive reclaim.
+This was not a problem until commit a8161d1ed609 ("mm, page_alloc:
+restructure direct compaction handling in slowpath"), because direct
+compation was called only after direct reclaim, which was skipped when
+PF_MEMALLOC flag was set.
+
+Even now it's only a theoretical issue, as the new callsite of
+__alloc_pages_direct_compact() is reached only for costly orders and
+when gfp_pfmemalloc_allowed() is true, which means either
+__GFP_NOMEMALLOC is in gfp_flags or in_interrupt() is true. There is no
+such known context, but let's play it safe and make
+__alloc_pages_direct_compact() robust for cases where PF_MEMALLOC is
+already set.
+
+Fixes: a8161d1ed609 ("mm, page_alloc: restructure direct compaction handling in slowpath")
+Link: http://lkml.kernel.org/r/20170405074700.29871-2-vbabka@suse.cz
+Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
+Reported-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
+Acked-by: Michal Hocko <mhocko@suse.com>
+Acked-by: Hillf Danton <hillf.zj@alibaba-inc.com>
+Cc: Mel Gorman <mgorman@techsingularity.net>
+Cc: Johannes Weiner <hannes@cmpxchg.org>
+Cc: Boris Brezillon <boris.brezillon@free-electrons.com>
+Cc: Chris Leech <cleech@redhat.com>
+Cc: "David S. Miller" <davem@davemloft.net>
+Cc: Eric Dumazet <edumazet@google.com>
+Cc: Josef Bacik <jbacik@fb.com>
+Cc: Lee Duncan <lduncan@suse.com>
+Cc: Michal Hocko <mhocko@suse.com>
+Cc: Richard Weinberger <richard@nod.at>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/page_alloc.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/mm/page_alloc.c
++++ b/mm/page_alloc.c
+@@ -3245,6 +3245,7 @@ __alloc_pages_direct_compact(gfp_t gfp_m
+ enum compact_priority prio, enum compact_result *compact_result)
+ {
+ struct page *page;
++ unsigned int noreclaim_flag = current->flags & PF_MEMALLOC;
+
+ if (!order)
+ return NULL;
+@@ -3252,7 +3253,7 @@ __alloc_pages_direct_compact(gfp_t gfp_m
+ current->flags |= PF_MEMALLOC;
+ *compact_result = try_to_compact_pages(gfp_mask, order, alloc_flags, ac,
+ prio);
+- current->flags &= ~PF_MEMALLOC;
++ current->flags = (current->flags & ~PF_MEMALLOC) | noreclaim_flag;
+
+ if (*compact_result <= COMPACT_INACTIVE)
+ return NULL;
--- /dev/null
+From 2a2e48854d704214dac7546e87ae0e4daa0e61a0 Mon Sep 17 00:00:00 2001
+From: Johannes Weiner <hannes@cmpxchg.org>
+Date: Wed, 3 May 2017 14:55:03 -0700
+Subject: mm: vmscan: fix IO/refault regression in cache workingset transition
+
+From: Johannes Weiner <hannes@cmpxchg.org>
+
+commit 2a2e48854d704214dac7546e87ae0e4daa0e61a0 upstream.
+
+Since commit 59dc76b0d4df ("mm: vmscan: reduce size of inactive file
+list") we noticed bigger IO spikes during changes in cache access
+patterns.
+
+The patch in question shrunk the inactive list size to leave more room
+for the current workingset in the presence of streaming IO. However,
+workingset transitions that previously happened on the inactive list are
+now pushed out of memory and incur more refaults to complete.
+
+This patch disables active list protection when refaults are being
+observed. This accelerates workingset transitions, and allows more of
+the new set to establish itself from memory, without eating into the
+ability to protect the established workingset during stable periods.
+
+The workloads that were measurably affected for us were hit pretty bad
+by it, with refault/majfault rates doubling and tripling during cache
+transitions, and the machines sustaining half-hour periods of 100% IO
+utilization, where they'd previously have sub-minute peaks at 60-90%.
+
+Stateful services that handle user data tend to be more conservative
+with kernel upgrades. As a result we hit most page cache issues with
+some delay, as was the case here.
+
+The severity seemed to warrant a stable tag.
+
+Fixes: 59dc76b0d4df ("mm: vmscan: reduce size of inactive file list")
+Link: http://lkml.kernel.org/r/20170404220052.27593-1-hannes@cmpxchg.org
+Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Mel Gorman <mgorman@suse.de>
+Cc: Michal Hocko <mhocko@suse.com>
+Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/linux/memcontrol.h | 64 +++++++++++++++++++++++++++++-
+ include/linux/mmzone.h | 2
+ mm/memcontrol.c | 24 +++--------
+ mm/vmscan.c | 94 +++++++++++++++++++++++++++++++++++----------
+ mm/workingset.c | 7 ++-
+ 5 files changed, 150 insertions(+), 41 deletions(-)
+
+--- a/include/linux/memcontrol.h
++++ b/include/linux/memcontrol.h
+@@ -56,6 +56,9 @@ enum mem_cgroup_stat_index {
+ MEMCG_SLAB_RECLAIMABLE,
+ MEMCG_SLAB_UNRECLAIMABLE,
+ MEMCG_SOCK,
++ MEMCG_WORKINGSET_REFAULT,
++ MEMCG_WORKINGSET_ACTIVATE,
++ MEMCG_WORKINGSET_NODERECLAIM,
+ MEMCG_NR_STAT,
+ };
+
+@@ -494,6 +497,40 @@ extern int do_swap_account;
+ void lock_page_memcg(struct page *page);
+ void unlock_page_memcg(struct page *page);
+
++static inline unsigned long mem_cgroup_read_stat(struct mem_cgroup *memcg,
++ enum mem_cgroup_stat_index idx)
++{
++ long val = 0;
++ int cpu;
++
++ for_each_possible_cpu(cpu)
++ val += per_cpu(memcg->stat->count[idx], cpu);
++
++ if (val < 0)
++ val = 0;
++
++ return val;
++}
++
++static inline void mem_cgroup_update_stat(struct mem_cgroup *memcg,
++ enum mem_cgroup_stat_index idx, int val)
++{
++ if (!mem_cgroup_disabled())
++ this_cpu_add(memcg->stat->count[idx], val);
++}
++
++static inline void mem_cgroup_inc_stat(struct mem_cgroup *memcg,
++ enum mem_cgroup_stat_index idx)
++{
++ mem_cgroup_update_stat(memcg, idx, 1);
++}
++
++static inline void mem_cgroup_dec_stat(struct mem_cgroup *memcg,
++ enum mem_cgroup_stat_index idx)
++{
++ mem_cgroup_update_stat(memcg, idx, -1);
++}
++
+ /**
+ * mem_cgroup_update_page_stat - update page state statistics
+ * @page: the page
+@@ -508,14 +545,14 @@ void unlock_page_memcg(struct page *page
+ * if (TestClearPageState(page))
+ * mem_cgroup_update_page_stat(page, state, -1);
+ * unlock_page(page) or unlock_page_memcg(page)
++ *
++ * Kernel pages are an exception to this, since they'll never move.
+ */
+ static inline void mem_cgroup_update_page_stat(struct page *page,
+ enum mem_cgroup_stat_index idx, int val)
+ {
+- VM_BUG_ON(!(rcu_read_lock_held() || PageLocked(page)));
+-
+ if (page->mem_cgroup)
+- this_cpu_add(page->mem_cgroup->stat->count[idx], val);
++ mem_cgroup_update_stat(page->mem_cgroup, idx, val);
+ }
+
+ static inline void mem_cgroup_inc_page_stat(struct page *page,
+@@ -740,6 +777,27 @@ static inline bool mem_cgroup_oom_synchr
+ return false;
+ }
+
++static inline unsigned long mem_cgroup_read_stat(struct mem_cgroup *memcg,
++ enum mem_cgroup_stat_index idx)
++{
++ return 0;
++}
++
++static inline void mem_cgroup_update_stat(struct mem_cgroup *memcg,
++ enum mem_cgroup_stat_index idx, int val)
++{
++}
++
++static inline void mem_cgroup_inc_stat(struct mem_cgroup *memcg,
++ enum mem_cgroup_stat_index idx)
++{
++}
++
++static inline void mem_cgroup_dec_stat(struct mem_cgroup *memcg,
++ enum mem_cgroup_stat_index idx)
++{
++}
++
+ static inline void mem_cgroup_update_page_stat(struct page *page,
+ enum mem_cgroup_stat_index idx,
+ int nr)
+--- a/include/linux/mmzone.h
++++ b/include/linux/mmzone.h
+@@ -226,6 +226,8 @@ struct lruvec {
+ struct zone_reclaim_stat reclaim_stat;
+ /* Evictions & activations on the inactive file list */
+ atomic_long_t inactive_age;
++ /* Refaults at the time of last reclaim cycle */
++ unsigned long refaults;
+ #ifdef CONFIG_MEMCG
+ struct pglist_data *pgdat;
+ #endif
+--- a/mm/memcontrol.c
++++ b/mm/memcontrol.c
+@@ -568,23 +568,6 @@ mem_cgroup_largest_soft_limit_node(struc
+ * common workload, threshold and synchronization as vmstat[] should be
+ * implemented.
+ */
+-static unsigned long
+-mem_cgroup_read_stat(struct mem_cgroup *memcg, enum mem_cgroup_stat_index idx)
+-{
+- long val = 0;
+- int cpu;
+-
+- /* Per-cpu values can be negative, use a signed accumulator */
+- for_each_possible_cpu(cpu)
+- val += per_cpu(memcg->stat->count[idx], cpu);
+- /*
+- * Summing races with updates, so val may be negative. Avoid exposing
+- * transient negative values.
+- */
+- if (val < 0)
+- val = 0;
+- return val;
+-}
+
+ static unsigned long mem_cgroup_read_events(struct mem_cgroup *memcg,
+ enum mem_cgroup_events_index idx)
+@@ -5237,6 +5220,13 @@ static int memory_stat_show(struct seq_f
+ seq_printf(m, "pgmajfault %lu\n",
+ events[MEM_CGROUP_EVENTS_PGMAJFAULT]);
+
++ seq_printf(m, "workingset_refault %lu\n",
++ stat[MEMCG_WORKINGSET_REFAULT]);
++ seq_printf(m, "workingset_activate %lu\n",
++ stat[MEMCG_WORKINGSET_ACTIVATE]);
++ seq_printf(m, "workingset_nodereclaim %lu\n",
++ stat[MEMCG_WORKINGSET_NODERECLAIM]);
++
+ return 0;
+ }
+
+--- a/mm/vmscan.c
++++ b/mm/vmscan.c
+@@ -2033,6 +2033,8 @@ static void shrink_active_list(unsigned
+ * Both inactive lists should also be large enough that each inactive
+ * page has a chance to be referenced again before it is reclaimed.
+ *
++ * If that fails and refaulting is observed, the inactive list grows.
++ *
+ * The inactive_ratio is the target ratio of ACTIVE to INACTIVE pages
+ * on this LRU, maintained by the pageout code. A zone->inactive_ratio
+ * of 3 means 3:1 or 25% of the pages are kept on the inactive list.
+@@ -2049,12 +2051,15 @@ static void shrink_active_list(unsigned
+ * 10TB 320 32GB
+ */
+ static bool inactive_list_is_low(struct lruvec *lruvec, bool file,
+- struct scan_control *sc, bool trace)
++ struct mem_cgroup *memcg,
++ struct scan_control *sc, bool actual_reclaim)
+ {
+- unsigned long inactive_ratio;
+- unsigned long inactive, active;
+- enum lru_list inactive_lru = file * LRU_FILE;
+ enum lru_list active_lru = file * LRU_FILE + LRU_ACTIVE;
++ struct pglist_data *pgdat = lruvec_pgdat(lruvec);
++ enum lru_list inactive_lru = file * LRU_FILE;
++ unsigned long inactive, active;
++ unsigned long inactive_ratio;
++ unsigned long refaults;
+ unsigned long gb;
+
+ /*
+@@ -2067,27 +2072,43 @@ static bool inactive_list_is_low(struct
+ inactive = lruvec_lru_size(lruvec, inactive_lru, sc->reclaim_idx);
+ active = lruvec_lru_size(lruvec, active_lru, sc->reclaim_idx);
+
+- gb = (inactive + active) >> (30 - PAGE_SHIFT);
+- if (gb)
+- inactive_ratio = int_sqrt(10 * gb);
++ if (memcg)
++ refaults = mem_cgroup_read_stat(memcg,
++ MEMCG_WORKINGSET_ACTIVATE);
+ else
+- inactive_ratio = 1;
++ refaults = node_page_state(pgdat, WORKINGSET_ACTIVATE);
+
+- if (trace)
+- trace_mm_vmscan_inactive_list_is_low(lruvec_pgdat(lruvec)->node_id,
+- sc->reclaim_idx,
+- lruvec_lru_size(lruvec, inactive_lru, MAX_NR_ZONES), inactive,
+- lruvec_lru_size(lruvec, active_lru, MAX_NR_ZONES), active,
+- inactive_ratio, file);
++ /*
++ * When refaults are being observed, it means a new workingset
++ * is being established. Disable active list protection to get
++ * rid of the stale workingset quickly.
++ */
++ if (file && actual_reclaim && lruvec->refaults != refaults) {
++ inactive_ratio = 0;
++ } else {
++ gb = (inactive + active) >> (30 - PAGE_SHIFT);
++ if (gb)
++ inactive_ratio = int_sqrt(10 * gb);
++ else
++ inactive_ratio = 1;
++ }
++
++ if (actual_reclaim)
++ trace_mm_vmscan_inactive_list_is_low(pgdat->node_id, sc->reclaim_idx,
++ lruvec_lru_size(lruvec, inactive_lru, MAX_NR_ZONES), inactive,
++ lruvec_lru_size(lruvec, active_lru, MAX_NR_ZONES), active,
++ inactive_ratio, file);
+
+ return inactive * inactive_ratio < active;
+ }
+
+ static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan,
+- struct lruvec *lruvec, struct scan_control *sc)
++ struct lruvec *lruvec, struct mem_cgroup *memcg,
++ struct scan_control *sc)
+ {
+ if (is_active_lru(lru)) {
+- if (inactive_list_is_low(lruvec, is_file_lru(lru), sc, true))
++ if (inactive_list_is_low(lruvec, is_file_lru(lru),
++ memcg, sc, true))
+ shrink_active_list(nr_to_scan, lruvec, sc, lru);
+ return 0;
+ }
+@@ -2218,7 +2239,7 @@ static void get_scan_count(struct lruvec
+ * lruvec even if it has plenty of old anonymous pages unless the
+ * system is under heavy pressure.
+ */
+- if (!inactive_list_is_low(lruvec, true, sc, false) &&
++ if (!inactive_list_is_low(lruvec, true, memcg, sc, false) &&
+ lruvec_lru_size(lruvec, LRU_INACTIVE_FILE, sc->reclaim_idx) >> sc->priority) {
+ scan_balance = SCAN_FILE;
+ goto out;
+@@ -2376,7 +2397,7 @@ static void shrink_node_memcg(struct pgl
+ nr[lru] -= nr_to_scan;
+
+ nr_reclaimed += shrink_list(lru, nr_to_scan,
+- lruvec, sc);
++ lruvec, memcg, sc);
+ }
+ }
+
+@@ -2443,7 +2464,7 @@ static void shrink_node_memcg(struct pgl
+ * Even if we did not try to evict anon pages at all, we want to
+ * rebalance the anon lru active/inactive ratio.
+ */
+- if (inactive_list_is_low(lruvec, false, sc, true))
++ if (inactive_list_is_low(lruvec, false, memcg, sc, true))
+ shrink_active_list(SWAP_CLUSTER_MAX, lruvec,
+ sc, LRU_ACTIVE_ANON);
+ }
+@@ -2752,6 +2773,26 @@ static void shrink_zones(struct zonelist
+ sc->gfp_mask = orig_mask;
+ }
+
++static void snapshot_refaults(struct mem_cgroup *root_memcg, pg_data_t *pgdat)
++{
++ struct mem_cgroup *memcg;
++
++ memcg = mem_cgroup_iter(root_memcg, NULL, NULL);
++ do {
++ unsigned long refaults;
++ struct lruvec *lruvec;
++
++ if (memcg)
++ refaults = mem_cgroup_read_stat(memcg,
++ MEMCG_WORKINGSET_ACTIVATE);
++ else
++ refaults = node_page_state(pgdat, WORKINGSET_ACTIVATE);
++
++ lruvec = mem_cgroup_lruvec(pgdat, memcg);
++ lruvec->refaults = refaults;
++ } while ((memcg = mem_cgroup_iter(root_memcg, memcg, NULL)));
++}
++
+ /*
+ * This is the main entry point to direct page reclaim.
+ *
+@@ -2772,6 +2813,9 @@ static unsigned long do_try_to_free_page
+ struct scan_control *sc)
+ {
+ int initial_priority = sc->priority;
++ pg_data_t *last_pgdat;
++ struct zoneref *z;
++ struct zone *zone;
+ retry:
+ delayacct_freepages_start();
+
+@@ -2798,6 +2842,15 @@ retry:
+ sc->may_writepage = 1;
+ } while (--sc->priority >= 0);
+
++ last_pgdat = NULL;
++ for_each_zone_zonelist_nodemask(zone, z, zonelist, sc->reclaim_idx,
++ sc->nodemask) {
++ if (zone->zone_pgdat == last_pgdat)
++ continue;
++ last_pgdat = zone->zone_pgdat;
++ snapshot_refaults(sc->target_mem_cgroup, zone->zone_pgdat);
++ }
++
+ delayacct_freepages_end();
+
+ if (sc->nr_reclaimed)
+@@ -3076,7 +3129,7 @@ static void age_active_anon(struct pglis
+ do {
+ struct lruvec *lruvec = mem_cgroup_lruvec(pgdat, memcg);
+
+- if (inactive_list_is_low(lruvec, false, sc, true))
++ if (inactive_list_is_low(lruvec, false, memcg, sc, true))
+ shrink_active_list(SWAP_CLUSTER_MAX, lruvec,
+ sc, LRU_ACTIVE_ANON);
+
+@@ -3311,6 +3364,7 @@ static int balance_pgdat(pg_data_t *pgda
+ } while (sc.priority >= 1);
+
+ out:
++ snapshot_refaults(NULL, pgdat);
+ /*
+ * Return the order kswapd stopped reclaiming at as
+ * prepare_kswapd_sleep() takes it into account. If another caller
+--- a/mm/workingset.c
++++ b/mm/workingset.c
+@@ -269,7 +269,6 @@ bool workingset_refault(void *shadow)
+ lruvec = mem_cgroup_lruvec(pgdat, memcg);
+ refault = atomic_long_read(&lruvec->inactive_age);
+ active_file = lruvec_lru_size(lruvec, LRU_ACTIVE_FILE, MAX_NR_ZONES);
+- rcu_read_unlock();
+
+ /*
+ * The unsigned subtraction here gives an accurate distance
+@@ -290,11 +289,15 @@ bool workingset_refault(void *shadow)
+ refault_distance = (refault - eviction) & EVICTION_MASK;
+
+ inc_node_state(pgdat, WORKINGSET_REFAULT);
++ mem_cgroup_inc_stat(memcg, MEMCG_WORKINGSET_REFAULT);
+
+ if (refault_distance <= active_file) {
+ inc_node_state(pgdat, WORKINGSET_ACTIVATE);
++ mem_cgroup_inc_stat(memcg, MEMCG_WORKINGSET_ACTIVATE);
++ rcu_read_unlock();
+ return true;
+ }
++ rcu_read_unlock();
+ return false;
+ }
+
+@@ -472,6 +475,8 @@ static enum lru_status shadow_lru_isolat
+ if (WARN_ON_ONCE(node->exceptional))
+ goto out_invalid;
+ inc_node_state(page_pgdat(virt_to_page(node)), WORKINGSET_NODERECLAIM);
++ mem_cgroup_inc_page_stat(virt_to_page(node),
++ MEMCG_WORKINGSET_NODERECLAIM);
+ __radix_tree_delete_node(&mapping->page_tree, node,
+ workingset_update_node, mapping);
+
--- /dev/null
+From e675c5ec51fe2554719a7b6bcdbef0a770f2c19b Mon Sep 17 00:00:00 2001
+From: Martin Brandenburg <martin@omnibond.com>
+Date: Tue, 25 Apr 2017 15:37:57 -0400
+Subject: orangefs: clean up oversize xattr validation
+
+From: Martin Brandenburg <martin@omnibond.com>
+
+commit e675c5ec51fe2554719a7b6bcdbef0a770f2c19b upstream.
+
+Also don't check flags as this has been validated by the VFS already.
+
+Fix an off-by-one error in the max size checking.
+
+Stop logging just because userspace wants to write attributes which do
+not fit.
+
+This and the previous commit fix xfstests generic/020.
+
+Signed-off-by: Martin Brandenburg <martin@omnibond.com>
+Signed-off-by: Mike Marshall <hubcap@omnibond.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/orangefs/xattr.c | 24 +++++++-----------------
+ 1 file changed, 7 insertions(+), 17 deletions(-)
+
+--- a/fs/orangefs/xattr.c
++++ b/fs/orangefs/xattr.c
+@@ -76,11 +76,8 @@ ssize_t orangefs_inode_getxattr(struct i
+ if (S_ISLNK(inode->i_mode))
+ return -EOPNOTSUPP;
+
+- if (strlen(name) >= ORANGEFS_MAX_XATTR_NAMELEN) {
+- gossip_err("Invalid key length (%d)\n",
+- (int)strlen(name));
++ if (strlen(name) > ORANGEFS_MAX_XATTR_NAMELEN)
+ return -EINVAL;
+- }
+
+ fsuid = from_kuid(&init_user_ns, current_fsuid());
+ fsgid = from_kgid(&init_user_ns, current_fsgid());
+@@ -172,6 +169,9 @@ static int orangefs_inode_removexattr(st
+ struct orangefs_kernel_op_s *new_op = NULL;
+ int ret = -ENOMEM;
+
++ if (strlen(name) > ORANGEFS_MAX_XATTR_NAMELEN)
++ return -EINVAL;
++
+ down_write(&orangefs_inode->xattr_sem);
+ new_op = op_alloc(ORANGEFS_VFS_OP_REMOVEXATTR);
+ if (!new_op)
+@@ -231,23 +231,13 @@ int orangefs_inode_setxattr(struct inode
+ "%s: name %s, buffer_size %zd\n",
+ __func__, name, size);
+
+- if (size >= ORANGEFS_MAX_XATTR_VALUELEN ||
+- flags < 0) {
+- gossip_err("orangefs_inode_setxattr: bogus values of size(%d), flags(%d)\n",
+- (int)size,
+- flags);
++ if (size > ORANGEFS_MAX_XATTR_VALUELEN)
++ return -EINVAL;
++ if (strlen(name) > ORANGEFS_MAX_XATTR_NAMELEN)
+ return -EINVAL;
+- }
+
+ internal_flag = convert_to_internal_xattr_flags(flags);
+
+- if (strlen(name) >= ORANGEFS_MAX_XATTR_NAMELEN) {
+- gossip_err
+- ("orangefs_inode_setxattr: bogus key size (%d)\n",
+- (int)(strlen(name)));
+- return -EINVAL;
+- }
+-
+ /* This is equivalent to a removexattr */
+ if (size == 0 && value == NULL) {
+ gossip_debug(GOSSIP_XATTR_DEBUG,
--- /dev/null
+From 53950ef541675df48c219a8d665111a0e68dfc2f Mon Sep 17 00:00:00 2001
+From: Martin Brandenburg <martin@omnibond.com>
+Date: Tue, 25 Apr 2017 15:38:04 -0400
+Subject: orangefs: do not check possibly stale size on truncate
+
+From: Martin Brandenburg <martin@omnibond.com>
+
+commit 53950ef541675df48c219a8d665111a0e68dfc2f upstream.
+
+Let the server figure this out because our size might be out of date or
+not present.
+
+The bug was that
+
+ xfs_io -f -t -c "pread -v 0 100" /mnt/foo
+ echo "Test" > /mnt/foo
+ xfs_io -f -t -c "pread -v 0 100" /mnt/foo
+
+fails because the second truncate did not happen if nothing had
+requested the size after the write in echo. Thus i_size was zero (not
+present) and the orangefs_setattr though i_size was zero and there was
+nothing to do.
+
+Signed-off-by: Martin Brandenburg <martin@omnibond.com>
+Signed-off-by: Mike Marshall <hubcap@omnibond.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/orangefs/inode.c | 3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+--- a/fs/orangefs/inode.c
++++ b/fs/orangefs/inode.c
+@@ -218,8 +218,7 @@ int orangefs_setattr(struct dentry *dent
+ if (ret)
+ goto out;
+
+- if ((iattr->ia_valid & ATTR_SIZE) &&
+- iattr->ia_size != i_size_read(inode)) {
++ if (iattr->ia_valid & ATTR_SIZE) {
+ ret = orangefs_setattr_size(inode, iattr);
+ if (ret)
+ goto out;
--- /dev/null
+From 17930b252cd6f31163c259eaa99dd8aa630fb9ba Mon Sep 17 00:00:00 2001
+From: Martin Brandenburg <martin@omnibond.com>
+Date: Tue, 25 Apr 2017 15:37:58 -0400
+Subject: orangefs: do not set getattr_time on orangefs_lookup
+
+From: Martin Brandenburg <martin@omnibond.com>
+
+commit 17930b252cd6f31163c259eaa99dd8aa630fb9ba upstream.
+
+Since orangefs_lookup calls orangefs_iget which calls
+orangefs_inode_getattr, getattr_time will get set.
+
+Signed-off-by: Martin Brandenburg <martin@omnibond.com>
+Signed-off-by: Mike Marshall <hubcap@omnibond.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/orangefs/namei.c | 2 --
+ 1 file changed, 2 deletions(-)
+
+--- a/fs/orangefs/namei.c
++++ b/fs/orangefs/namei.c
+@@ -193,8 +193,6 @@ static struct dentry *orangefs_lookup(st
+ goto out;
+ }
+
+- ORANGEFS_I(inode)->getattr_time = jiffies - 1;
+-
+ gossip_debug(GOSSIP_NAME_DEBUG,
+ "%s:%s:%d "
+ "Found good inode [%lu] with count [%d]\n",
--- /dev/null
+From a956af337b9ff25822d9ce1a59c6ed0c09fc14b9 Mon Sep 17 00:00:00 2001
+From: Martin Brandenburg <martin@omnibond.com>
+Date: Tue, 25 Apr 2017 15:37:56 -0400
+Subject: orangefs: fix bounds check for listxattr
+
+From: Martin Brandenburg <martin@omnibond.com>
+
+commit a956af337b9ff25822d9ce1a59c6ed0c09fc14b9 upstream.
+
+Signed-off-by: Martin Brandenburg <martin@omnibond.com>
+Signed-off-by: Mike Marshall <hubcap@omnibond.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/orangefs/xattr.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/orangefs/xattr.c
++++ b/fs/orangefs/xattr.c
+@@ -358,7 +358,7 @@ try_again:
+
+ returned_count = new_op->downcall.resp.listxattr.returned_count;
+ if (returned_count < 0 ||
+- returned_count >= ORANGEFS_MAX_XATTR_LISTLEN) {
++ returned_count > ORANGEFS_MAX_XATTR_LISTLEN) {
+ gossip_err("%s: impossible value for returned_count:%d:\n",
+ __func__,
+ returned_count);
--- /dev/null
+From 4a99f3c83dc493c8ea84693d78cd792839c8aa64 Mon Sep 17 00:00:00 2001
+From: Amir Goldstein <amir73il@gmail.com>
+Date: Mon, 24 Apr 2017 22:26:40 +0300
+Subject: ovl: do not set overlay.opaque on non-dir create
+
+From: Amir Goldstein <amir73il@gmail.com>
+
+commit 4a99f3c83dc493c8ea84693d78cd792839c8aa64 upstream.
+
+The optimization for opaque dir create was wrongly being applied
+also to non-dir create.
+
+Fixes: 97c684cc9110 ("ovl: create directories inside merged parent opaque")
+Signed-off-by: Amir Goldstein <amir73il@gmail.com>
+Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/overlayfs/dir.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/overlayfs/dir.c
++++ b/fs/overlayfs/dir.c
+@@ -210,7 +210,7 @@ static int ovl_create_upper(struct dentr
+ if (err)
+ goto out_dput;
+
+- if (ovl_type_merge(dentry->d_parent)) {
++ if (ovl_type_merge(dentry->d_parent) && d_is_dir(newdentry)) {
+ /* Setting opaque here is just an optimization, allow to fail */
+ ovl_set_opaque(dentry, newdentry);
+ }
--- /dev/null
+From 07a77929ba672d93642a56dc2255dd21e6e2290b Mon Sep 17 00:00:00 2001
+From: "Jason A. Donenfeld" <Jason@zx2c4.com>
+Date: Fri, 7 Apr 2017 02:33:30 +0200
+Subject: padata: free correct variable
+
+From: Jason A. Donenfeld <Jason@zx2c4.com>
+
+commit 07a77929ba672d93642a56dc2255dd21e6e2290b upstream.
+
+The author meant to free the variable that was just allocated, instead
+of the one that failed to be allocated, but made a simple typo. This
+patch rectifies that.
+
+Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
+Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/padata.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/kernel/padata.c
++++ b/kernel/padata.c
+@@ -354,7 +354,7 @@ static int padata_setup_cpumasks(struct
+
+ cpumask_and(pd->cpumask.pcpu, pcpumask, cpu_online_mask);
+ if (!alloc_cpumask_var(&pd->cpumask.cbcpu, GFP_KERNEL)) {
+- free_cpumask_var(pd->cpumask.cbcpu);
++ free_cpumask_var(pd->cpumask.pcpu);
+ return -ENOMEM;
+ }
+
--- /dev/null
+From e77852b32d6d4430c68c38aaf73efe5650fa25af Mon Sep 17 00:00:00 2001
+From: Christian Borntraeger <borntraeger@de.ibm.com>
+Date: Thu, 6 Apr 2017 09:51:51 +0200
+Subject: perf annotate s390: Fix perf annotate error -95 (4.10 regression)
+
+From: Christian Borntraeger <borntraeger@de.ibm.com>
+
+commit e77852b32d6d4430c68c38aaf73efe5650fa25af upstream.
+
+since 4.10 perf annotate exits on s390 with an "unknown error -95".
+Turns out that commit 786c1b51844d ("perf annotate: Start supporting
+cross arch annotation") added a hard requirement for architecture
+support when objdump is used but only provided x86 and arm support.
+Meanwhile power was added so lets add s390 as well.
+
+While at it make sure to implement the branch and jump types.
+
+Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
+Cc: Andreas Krebbel <krebbel@linux.vnet.ibm.com>
+Cc: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
+Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: linux-s390 <linux-s390@vger.kernel.org>
+Fixes: 786c1b51844 "perf annotate: Start supporting cross arch annotation"
+Link: http://lkml.kernel.org/r/1491465112-45819-2-git-send-email-borntraeger@de.ibm.com
+Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ tools/perf/util/annotate.c | 6 ++++++
+ 1 file changed, 6 insertions(+)
+
+--- a/tools/perf/util/annotate.c
++++ b/tools/perf/util/annotate.c
+@@ -136,6 +136,12 @@ static struct arch architectures[] = {
+ .comment_char = '#',
+ },
+ },
++ {
++ .name = "s390",
++ .objdump = {
++ .comment_char = '#',
++ },
++ },
+ };
+
+ static void ins__delete(struct ins_operands *ops)
--- /dev/null
+From d9f8dfa9baf9b6ae1f2f84f887176558ecde5268 Mon Sep 17 00:00:00 2001
+From: Christian Borntraeger <borntraeger@de.ibm.com>
+Date: Thu, 6 Apr 2017 09:51:52 +0200
+Subject: perf annotate s390: Implement jump types for perf annotate
+
+From: Christian Borntraeger <borntraeger@de.ibm.com>
+
+commit d9f8dfa9baf9b6ae1f2f84f887176558ecde5268 upstream.
+
+Implement simple detection for all kind of jumps and branches.
+
+Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
+Cc: Andreas Krebbel <krebbel@linux.vnet.ibm.com>
+Cc: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
+Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: linux-s390 <linux-s390@vger.kernel.org>
+Link: http://lkml.kernel.org/r/1491465112-45819-3-git-send-email-borntraeger@de.ibm.com
+Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ tools/perf/arch/s390/annotate/instructions.c | 30 +++++++++++++++++++++++++++
+ tools/perf/util/annotate.c | 2 +
+ 2 files changed, 32 insertions(+)
+
+--- /dev/null
++++ b/tools/perf/arch/s390/annotate/instructions.c
+@@ -0,0 +1,30 @@
++static struct ins_ops *s390__associate_ins_ops(struct arch *arch, const char *name)
++{
++ struct ins_ops *ops = NULL;
++
++ /* catch all kind of jumps */
++ if (strchr(name, 'j') ||
++ !strncmp(name, "bct", 3) ||
++ !strncmp(name, "br", 2))
++ ops = &jump_ops;
++ /* override call/returns */
++ if (!strcmp(name, "bras") ||
++ !strcmp(name, "brasl") ||
++ !strcmp(name, "basr"))
++ ops = &call_ops;
++ if (!strcmp(name, "br"))
++ ops = &ret_ops;
++
++ arch__associate_ins_ops(arch, name, ops);
++ return ops;
++}
++
++static int s390__annotate_init(struct arch *arch)
++{
++ if (!arch->initialized) {
++ arch->initialized = true;
++ arch->associate_instruction_ops = s390__associate_ins_ops;
++ }
++
++ return 0;
++}
+--- a/tools/perf/util/annotate.c
++++ b/tools/perf/util/annotate.c
+@@ -108,6 +108,7 @@ static int arch__associate_ins_ops(struc
+ #include "arch/arm64/annotate/instructions.c"
+ #include "arch/x86/annotate/instructions.c"
+ #include "arch/powerpc/annotate/instructions.c"
++#include "arch/s390/annotate/instructions.c"
+
+ static struct arch architectures[] = {
+ {
+@@ -132,6 +133,7 @@ static struct arch architectures[] = {
+ },
+ {
+ .name = "s390",
++ .init = s390__annotate_init,
+ .objdump = {
+ .comment_char = '#',
+ },
--- /dev/null
+From c3a0bbc7ad7598dec5a204868bdf8a2b1b51df14 Mon Sep 17 00:00:00 2001
+From: Adrian Hunter <adrian.hunter@intel.com>
+Date: Fri, 24 Mar 2017 14:15:52 +0200
+Subject: perf auxtrace: Fix no_size logic in addr_filter__resolve_kernel_syms()
+
+From: Adrian Hunter <adrian.hunter@intel.com>
+
+commit c3a0bbc7ad7598dec5a204868bdf8a2b1b51df14 upstream.
+
+Address filtering with kernel symbols incorrectly resulted in the error
+"Cannot determine size of symbol" because the no_size logic was the wrong
+way around.
+
+Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
+Tested-by: Andi Kleen <ak@linux.intel.com>
+Link: http://lkml.kernel.org/r/1490357752-27942-1-git-send-email-adrian.hunter@intel.com
+Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ tools/perf/util/auxtrace.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/tools/perf/util/auxtrace.c
++++ b/tools/perf/util/auxtrace.c
+@@ -1826,7 +1826,7 @@ static int addr_filter__resolve_kernel_s
+ filt->addr = start;
+ if (filt->range && !filt->size && !filt->sym_to) {
+ filt->size = size;
+- no_size = !!size;
++ no_size = !size;
+ }
+ }
+
+@@ -1840,7 +1840,7 @@ static int addr_filter__resolve_kernel_s
+ if (err)
+ return err;
+ filt->size = start + size - filt->addr;
+- no_size = !!size;
++ no_size = !size;
+ }
+
+ /* The very last symbol in kallsyms does not imply a particular size */
dm-rq-check-blk_mq_register_dev-return-value-in-dm_mq_init_request_queue.patch
dm-thin-fix-a-memory-leak-when-passing-discard-bio-down.patch
vfio-type1-remove-locked-page-accounting-workqueue.patch
+iov_iter-don-t-revert-iov-buffer-if-csum-error.patch
+ib-core-fix-sysfs-registration-error-flow.patch
+ib-core-fix-kernel-crash-during-fail-to-initialize-device.patch
+ib-core-for-multicast-functions-verify-that-lids-are-multicast-lids.patch
+ib-ipoib-ibx-failed-to-create-mcg-debug-file.patch
+ib-mlx4-fix-ib-device-initialization-error-flow.patch
+ib-mlx4-reduce-sriov-multicast-cleanup-warning-message-to-debug-level.patch
+ib-hfi1-prevent-kernel-qp-post-send-hard-lockups.patch
+perf-auxtrace-fix-no_size-logic-in-addr_filter__resolve_kernel_syms.patch
+perf-annotate-s390-fix-perf-annotate-error-95-4.10-regression.patch
+perf-annotate-s390-implement-jump-types-for-perf-annotate.patch
+jbd2-fix-dbench4-performance-regression-for-nobarrier-mounts.patch
+ext4-evict-inline-data-when-writing-to-memory-map.patch
+orangefs-fix-bounds-check-for-listxattr.patch
+orangefs-clean-up-oversize-xattr-validation.patch
+orangefs-do-not-set-getattr_time-on-orangefs_lookup.patch
+orangefs-do-not-check-possibly-stale-size-on-truncate.patch
+fs-xattr.c-zero-out-memory-copied-to-userspace-in-getxattr.patch
+ceph-fix-memory-leak-in-__ceph_setxattr.patch
+fs-fix-data-invalidation-in-the-cleancache-during-direct-io.patch
+fs-block_dev-always-invalidate-cleancache-in-invalidate_bdev.patch
+mm-vmscan-fix-io-refault-regression-in-cache-workingset-transition.patch
+mm-prevent-potential-recursive-reclaim-due-to-clearing-pf_memalloc.patch
+fix-match_prepath.patch
+do-not-return-number-of-bytes-written-for-ioctl-cifs_ioc_copychunk_file.patch
+set-unicode-flag-on-cifs-echo-request-to-avoid-mac-error.patch
+smb3-work-around-mount-failure-when-using-smb3-dialect-to-macs.patch
+cifs-fix-mapping-of-sfm_space-and-sfm_period.patch
+cifs-fix-leak-in-fsctl_enum_snaps-response-handling.patch
+cifs-fix-cifs_enumerate_snapshots-oops.patch
+cifs-fix-oplock-break-deadlocks.patch
+cifs-fix-cifs_ioc_get_mnt_info-oops.patch
+cifs-add-misssing-sfm-mapping-for-doublequote.patch
+ovl-do-not-set-overlay.opaque-on-non-dir-create.patch
+padata-free-correct-variable.patch
+md-raid1-avoid-reusing-a-resync-bio-after-error-handling.patch
--- /dev/null
+From 26c9cb668c7fbf9830516b75d8bee70b699ed449 Mon Sep 17 00:00:00 2001
+From: Steve French <smfrench@gmail.com>
+Date: Tue, 2 May 2017 13:35:20 -0500
+Subject: Set unicode flag on cifs echo request to avoid Mac error
+
+From: Steve French <smfrench@gmail.com>
+
+commit 26c9cb668c7fbf9830516b75d8bee70b699ed449 upstream.
+
+Mac requires the unicode flag to be set for cifs, even for the smb
+echo request (which doesn't have strings).
+
+Without this Mac rejects the periodic echo requests (when mounting
+with cifs) that we use to check if server is down
+
+Signed-off-by: Steve French <smfrench@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/cifs/cifssmb.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/fs/cifs/cifssmb.c
++++ b/fs/cifs/cifssmb.c
+@@ -718,6 +718,9 @@ CIFSSMBEcho(struct TCP_Server_Info *serv
+ if (rc)
+ return rc;
+
++ if (server->capabilities & CAP_UNICODE)
++ smb->hdr.Flags2 |= SMBFLG2_UNICODE;
++
+ /* set up echo request */
+ smb->hdr.Tid = 0xffff;
+ smb->hdr.WordCount = 1;
--- /dev/null
+From 7db0a6efdc3e990cdfd4b24820d010e9eb7890ad Mon Sep 17 00:00:00 2001
+From: Steve French <smfrench@gmail.com>
+Date: Wed, 3 May 2017 21:12:20 -0500
+Subject: SMB3: Work around mount failure when using SMB3 dialect to Macs
+
+From: Steve French <smfrench@gmail.com>
+
+commit 7db0a6efdc3e990cdfd4b24820d010e9eb7890ad upstream.
+
+Macs send the maximum buffer size in response on ioctl to validate
+negotiate security information, which causes us to fail the mount
+as the response buffer is larger than the expected response.
+
+Changed ioctl response processing to allow for padding of validate
+negotiate ioctl response and limit the maximum response size to
+maximum buffer size.
+
+Signed-off-by: Steve French <steve.french@primarydata.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/cifs/smb2pdu.c | 14 +++++++++++---
+ 1 file changed, 11 insertions(+), 3 deletions(-)
+
+--- a/fs/cifs/smb2pdu.c
++++ b/fs/cifs/smb2pdu.c
+@@ -632,8 +632,12 @@ int smb3_validate_negotiate(const unsign
+ }
+
+ if (rsplen != sizeof(struct validate_negotiate_info_rsp)) {
+- cifs_dbg(VFS, "invalid size of protocol negotiate response\n");
+- return -EIO;
++ cifs_dbg(VFS, "invalid protocol negotiate response size: %d\n",
++ rsplen);
++
++ /* relax check since Mac returns max bufsize allowed on ioctl */
++ if (rsplen > CIFSMaxBufSize)
++ return -EIO;
+ }
+
+ /* check validate negotiate info response matches what we got earlier */
+@@ -1853,8 +1857,12 @@ SMB2_ioctl(const unsigned int xid, struc
+ * than one credit. Windows typically sets this smaller, but for some
+ * ioctls it may be useful to allow server to send more. No point
+ * limiting what the server can send as long as fits in one credit
++ * Unfortunately - we can not handle more than CIFS_MAX_MSG_SIZE
++ * (by default, note that it can be overridden to make max larger)
++ * in responses (except for read responses which can be bigger.
++ * We may want to bump this limit up
+ */
+- req->MaxOutputResponse = cpu_to_le32(0xFF00); /* < 64K uses 1 credit */
++ req->MaxOutputResponse = cpu_to_le32(CIFSMaxBufSize);
+
+ if (is_fsctl)
+ req->Flags = cpu_to_le32(SMB2_0_IOCTL_IS_FSCTL);