4.11-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Wed, 17 May 2017 15:55:51 +0000 (17:55 +0200)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Wed, 17 May 2017 15:55:51 +0000 (17:55 +0200)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 17 May 2017 15:55:51 +0000 (17:55 +0200)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 17 May 2017 15:55:51 +0000 (17:55 +0200)
diff --git a/queue-4.11/ceph-fix-memory-leak-in-__ceph_setxattr.patch b/queue-4.11/ceph-fix-memory-leak-in-__ceph_setxattr.patch

new file mode 100644 (file)

index 0000000..0a83f27
--- /dev/null
+++ b/queue-4.11/ceph-fix-memory-leak-in-__ceph_setxattr.patch
@@ -0,0 +1,71 @@
+From eeca958dce0a9231d1969f86196653eb50fcc9b3 Mon Sep 17 00:00:00 2001
+From: Luis Henriques <lhenriques@suse.com>
+Date: Fri, 28 Apr 2017 11:14:04 +0100
+Subject: ceph: fix memory leak in __ceph_setxattr()
+
+From: Luis Henriques <lhenriques@suse.com>
+
+commit eeca958dce0a9231d1969f86196653eb50fcc9b3 upstream.
+
+The ceph_inode_xattr needs to be released when removing an xattr.  Easily
+reproducible running the 'generic/020' test from xfstests or simply by
+doing:
+
+  attr -s attr0 -V 0 /mnt/test && attr -r attr0 /mnt/test
+
+While there, also fix the error path.
+
+Here's the kmemleak splat:
+
+unreferenced object 0xffff88001f86fbc0 (size 64):
+  comm "attr", pid 244, jiffies 4294904246 (age 98.464s)
+  hex dump (first 32 bytes):
+    40 fa 86 1f 00 88 ff ff 80 32 38 1f 00 88 ff ff  @........28.....
+    00 01 00 00 00 00 ad de 00 02 00 00 00 00 ad de  ................
+  backtrace:
+    [<ffffffff81560199>] kmemleak_alloc+0x49/0xa0
+    [<ffffffff810f3e5b>] kmem_cache_alloc+0x9b/0xf0
+    [<ffffffff812b157e>] __ceph_setxattr+0x17e/0x820
+    [<ffffffff812b1c57>] ceph_set_xattr_handler+0x37/0x40
+    [<ffffffff8111fb4b>] __vfs_removexattr+0x4b/0x60
+    [<ffffffff8111fd37>] vfs_removexattr+0x77/0xd0
+    [<ffffffff8111fdd1>] removexattr+0x41/0x60
+    [<ffffffff8111fe65>] path_removexattr+0x75/0xa0
+    [<ffffffff81120aeb>] SyS_lremovexattr+0xb/0x10
+    [<ffffffff81564b20>] entry_SYSCALL_64_fastpath+0x13/0x94
+    [<ffffffffffffffff>] 0xffffffffffffffff
+
+Signed-off-by: Luis Henriques <lhenriques@suse.com>
+Reviewed-by: "Yan, Zheng" <zyan@redhat.com>
+Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ceph/xattr.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/fs/ceph/xattr.c
++++ b/fs/ceph/xattr.c
+@@ -392,6 +392,7 @@ static int __set_xattr(struct ceph_inode
+ 
+       if (update_xattr) {
+               int err = 0;
++
+               if (xattr && (flags & XATTR_CREATE))
+                       err = -EEXIST;
+               else if (!xattr && (flags & XATTR_REPLACE))
+@@ -399,12 +400,14 @@ static int __set_xattr(struct ceph_inode
+               if (err) {
+                       kfree(name);
+                       kfree(val);
++                      kfree(*newxattr);
+                       return err;
+               }
+               if (update_xattr < 0) {
+                       if (xattr)
+                               __remove_xattr(ci, xattr);
+                       kfree(name);
++                      kfree(*newxattr);
+                       return 0;
+               }
+       }
diff --git a/queue-4.11/cifs-add-misssing-sfm-mapping-for-doublequote.patch b/queue-4.11/cifs-add-misssing-sfm-mapping-for-doublequote.patch

new file mode 100644 (file)

index 0000000..98675b1
--- /dev/null
+++ b/queue-4.11/cifs-add-misssing-sfm-mapping-for-doublequote.patch
@@ -0,0 +1,54 @@
+From 85435d7a15294f9f7ef23469e6aaf7c5dfcc54f0 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Bj=C3=B6rn=20Jacke?= <bj@sernet.de>
+Date: Fri, 5 May 2017 04:36:16 +0200
+Subject: CIFS: add misssing SFM mapping for doublequote
+
+From: Björn Jacke <bj@sernet.de>
+
+commit 85435d7a15294f9f7ef23469e6aaf7c5dfcc54f0 upstream.
+
+SFM is mapping doublequote to 0xF020
+
+Without this patch creating files with doublequote fails to Windows/Mac
+
+Signed-off-by: Bjoern Jacke <bjacke@samba.org>
+Signed-off-by: Steve French <smfrench@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/cifs/cifs_unicode.c |    6 ++++++
+ fs/cifs/cifs_unicode.h |    1 +
+ 2 files changed, 7 insertions(+)
+
+--- a/fs/cifs/cifs_unicode.c
++++ b/fs/cifs/cifs_unicode.c
+@@ -83,6 +83,9 @@ convert_sfm_char(const __u16 src_char, c
+       case SFM_COLON:
+               *target = ':';
+               break;
++      case SFM_DOUBLEQUOTE:
++              *target = '"';
++              break;
+       case SFM_ASTERISK:
+               *target = '*';
+               break;
+@@ -418,6 +421,9 @@ static __le16 convert_to_sfm_char(char s
+       case ':':
+               dest_char = cpu_to_le16(SFM_COLON);
+               break;
++      case '"':
++              dest_char = cpu_to_le16(SFM_DOUBLEQUOTE);
++              break;
+       case '*':
+               dest_char = cpu_to_le16(SFM_ASTERISK);
+               break;
+--- a/fs/cifs/cifs_unicode.h
++++ b/fs/cifs/cifs_unicode.h
+@@ -57,6 +57,7 @@
+  * not conflict (although almost does) with the mapping above.
+  */
+ 
++#define SFM_DOUBLEQUOTE ((__u16) 0xF020)
+ #define SFM_ASTERISK    ((__u16) 0xF021)
+ #define SFM_QUESTION    ((__u16) 0xF025)
+ #define SFM_COLON       ((__u16) 0xF022)
diff --git a/queue-4.11/cifs-fix-cifs_enumerate_snapshots-oops.patch b/queue-4.11/cifs-fix-cifs_enumerate_snapshots-oops.patch

new file mode 100644 (file)

index 0000000..3b8ab4b
--- /dev/null
+++ b/queue-4.11/cifs-fix-cifs_enumerate_snapshots-oops.patch
@@ -0,0 +1,33 @@
+From 6026685de33b0db5b2b6b0e9b41b3a1a3261033c Mon Sep 17 00:00:00 2001
+From: David Disseldorp <ddiss@suse.de>
+Date: Wed, 3 May 2017 17:39:08 +0200
+Subject: cifs: fix CIFS_ENUMERATE_SNAPSHOTS oops
+
+From: David Disseldorp <ddiss@suse.de>
+
+commit 6026685de33b0db5b2b6b0e9b41b3a1a3261033c upstream.
+
+As with 618763958b22, an open directory may have a NULL private_data
+pointer prior to readdir. CIFS_ENUMERATE_SNAPSHOTS must check for this
+before dereference.
+
+Fixes: 834170c85978 ("Enable previous version support")
+Signed-off-by: David Disseldorp <ddiss@suse.de>
+Signed-off-by: Steve French <smfrench@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/cifs/ioctl.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/fs/cifs/ioctl.c
++++ b/fs/cifs/ioctl.c
+@@ -213,6 +213,8 @@ long cifs_ioctl(struct file *filep, unsi
+                       rc = smb_mnt_get_fsinfo(xid, tcon, (void __user *)arg);
+                       break;
+               case CIFS_ENUMERATE_SNAPSHOTS:
++                      if (pSMBFile == NULL)
++                              break;
+                       if (arg == 0) {
+                               rc = -EINVAL;
+                               goto cifs_ioc_exit;
diff --git a/queue-4.11/cifs-fix-cifs_ioc_get_mnt_info-oops.patch b/queue-4.11/cifs-fix-cifs_ioc_get_mnt_info-oops.patch

new file mode 100644 (file)

index 0000000..5c16e86
--- /dev/null
+++ b/queue-4.11/cifs-fix-cifs_ioc_get_mnt_info-oops.patch
@@ -0,0 +1,31 @@
+From d8a6e505d6bba2250852fbc1c1c86fe68aaf9af3 Mon Sep 17 00:00:00 2001
+From: David Disseldorp <ddiss@suse.de>
+Date: Thu, 4 May 2017 00:41:13 +0200
+Subject: cifs: fix CIFS_IOC_GET_MNT_INFO oops
+
+From: David Disseldorp <ddiss@suse.de>
+
+commit d8a6e505d6bba2250852fbc1c1c86fe68aaf9af3 upstream.
+
+An open directory may have a NULL private_data pointer prior to readdir.
+
+Fixes: 0de1f4c6f6c0 ("Add way to query server fs info for smb3")
+Signed-off-by: David Disseldorp <ddiss@suse.de>
+Signed-off-by: Steve French <smfrench@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/cifs/ioctl.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/fs/cifs/ioctl.c
++++ b/fs/cifs/ioctl.c
+@@ -209,6 +209,8 @@ long cifs_ioctl(struct file *filep, unsi
+                               rc = -EOPNOTSUPP;
+                       break;
+               case CIFS_IOC_GET_MNT_INFO:
++                      if (pSMBFile == NULL)
++                              break;
+                       tcon = tlink_tcon(pSMBFile->tlink);
+                       rc = smb_mnt_get_fsinfo(xid, tcon, (void __user *)arg);
+                       break;
diff --git a/queue-4.11/cifs-fix-leak-in-fsctl_enum_snaps-response-handling.patch b/queue-4.11/cifs-fix-leak-in-fsctl_enum_snaps-response-handling.patch

new file mode 100644 (file)

index 0000000..46347e9
--- /dev/null
+++ b/queue-4.11/cifs-fix-leak-in-fsctl_enum_snaps-response-handling.patch
@@ -0,0 +1,32 @@
+From 0e5c795592930d51fd30d53a2e7b73cba022a29b Mon Sep 17 00:00:00 2001
+From: David Disseldorp <ddiss@suse.de>
+Date: Wed, 3 May 2017 17:39:09 +0200
+Subject: cifs: fix leak in FSCTL_ENUM_SNAPS response handling
+
+From: David Disseldorp <ddiss@suse.de>
+
+commit 0e5c795592930d51fd30d53a2e7b73cba022a29b upstream.
+
+The server may respond with success, and an output buffer less than
+sizeof(struct smb_snapshot_array) in length. Do not leak the output
+buffer in this case.
+
+Fixes: 834170c85978 ("Enable previous version support")
+Signed-off-by: David Disseldorp <ddiss@suse.de>
+Signed-off-by: Steve French <smfrench@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/cifs/smb2ops.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/fs/cifs/smb2ops.c
++++ b/fs/cifs/smb2ops.c
+@@ -942,6 +942,7 @@ smb3_enum_snapshots(const unsigned int x
+               }
+               if (snapshot_in.snapshot_array_size < sizeof(struct smb_snapshot_array)) {
+                       rc = -ERANGE;
++                      kfree(retbuf);
+                       return rc;
+               }
+ 
diff --git a/queue-4.11/cifs-fix-mapping-of-sfm_space-and-sfm_period.patch b/queue-4.11/cifs-fix-mapping-of-sfm_space-and-sfm_period.patch

new file mode 100644 (file)

index 0000000..d315113
--- /dev/null
+++ b/queue-4.11/cifs-fix-mapping-of-sfm_space-and-sfm_period.patch
@@ -0,0 +1,36 @@
+From b704e70b7cf48f9b67c07d585168e102dfa30bb4 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Bj=C3=B6rn=20Jacke?= <bj@sernet.de>
+Date: Wed, 3 May 2017 23:47:44 +0200
+Subject: CIFS: fix mapping of SFM_SPACE and SFM_PERIOD
+
+From: Björn Jacke <bj@sernet.de>
+
+commit b704e70b7cf48f9b67c07d585168e102dfa30bb4 upstream.
+
+- trailing space maps to 0xF028
+- trailing period maps to 0xF029
+
+This fix corrects the mapping of file names which have a trailing character
+that would otherwise be illegal (period or space) but is allowed by POSIX.
+
+Signed-off-by: Bjoern Jacke <bjacke@samba.org>
+Signed-off-by: Steve French <smfrench@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/cifs/cifs_unicode.h |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/fs/cifs/cifs_unicode.h
++++ b/fs/cifs/cifs_unicode.h
+@@ -64,8 +64,8 @@
+ #define SFM_LESSTHAN    ((__u16) 0xF023)
+ #define SFM_PIPE        ((__u16) 0xF027)
+ #define SFM_SLASH       ((__u16) 0xF026)
+-#define SFM_PERIOD    ((__u16) 0xF028)
+-#define SFM_SPACE     ((__u16) 0xF029)
++#define SFM_SPACE     ((__u16) 0xF028)
++#define SFM_PERIOD    ((__u16) 0xF029)
+ 
+ /*
+  * Mapping mechanism to use when one of the seven reserved characters is
diff --git a/queue-4.11/cifs-fix-oplock-break-deadlocks.patch b/queue-4.11/cifs-fix-oplock-break-deadlocks.patch

new file mode 100644 (file)

index 0000000..3236b67
--- /dev/null
+++ b/queue-4.11/cifs-fix-oplock-break-deadlocks.patch
@@ -0,0 +1,237 @@
+From 3998e6b87d4258a70df358296d6f1c7234012bfe Mon Sep 17 00:00:00 2001
+From: Rabin Vincent <rabinv@axis.com>
+Date: Wed, 3 May 2017 17:54:01 +0200
+Subject: CIFS: fix oplock break deadlocks
+
+From: Rabin Vincent <rabinv@axis.com>
+
+commit 3998e6b87d4258a70df358296d6f1c7234012bfe upstream.
+
+When the final cifsFileInfo_put() is called from cifsiod and an oplock
+break work is queued, lockdep complains loudly:
+
+ =============================================
+ [ INFO: possible recursive locking detected ]
+ 4.11.0+ #21 Not tainted
+ ---------------------------------------------
+ kworker/0:2/78 is trying to acquire lock:
+  ("cifsiod"){++++.+}, at: flush_work+0x215/0x350
+
+ but task is already holding lock:
+  ("cifsiod"){++++.+}, at: process_one_work+0x255/0x8e0
+
+ other info that might help us debug this:
+  Possible unsafe locking scenario:
+
+        CPU0
+        ----
+   lock("cifsiod");
+   lock("cifsiod");
+
+  *** DEADLOCK ***
+
+  May be due to missing lock nesting notation
+
+ 2 locks held by kworker/0:2/78:
+  #0:  ("cifsiod"){++++.+}, at: process_one_work+0x255/0x8e0
+  #1:  ((&wdata->work)){+.+...}, at: process_one_work+0x255/0x8e0
+
+ stack backtrace:
+ CPU: 0 PID: 78 Comm: kworker/0:2 Not tainted 4.11.0+ #21
+ Workqueue: cifsiod cifs_writev_complete
+ Call Trace:
+  dump_stack+0x85/0xc2
+  __lock_acquire+0x17dd/0x2260
+  ? match_held_lock+0x20/0x2b0
+  ? trace_hardirqs_off_caller+0x86/0x130
+  ? mark_lock+0xa6/0x920
+  lock_acquire+0xcc/0x260
+  ? lock_acquire+0xcc/0x260
+  ? flush_work+0x215/0x350
+  flush_work+0x236/0x350
+  ? flush_work+0x215/0x350
+  ? destroy_worker+0x170/0x170
+  __cancel_work_timer+0x17d/0x210
+  ? ___preempt_schedule+0x16/0x18
+  cancel_work_sync+0x10/0x20
+  cifsFileInfo_put+0x338/0x7f0
+  cifs_writedata_release+0x2a/0x40
+  ? cifs_writedata_release+0x2a/0x40
+  cifs_writev_complete+0x29d/0x850
+  ? preempt_count_sub+0x18/0xd0
+  process_one_work+0x304/0x8e0
+  worker_thread+0x9b/0x6a0
+  kthread+0x1b2/0x200
+  ? process_one_work+0x8e0/0x8e0
+  ? kthread_create_on_node+0x40/0x40
+  ret_from_fork+0x31/0x40
+
+This is a real warning.  Since the oplock is queued on the same
+workqueue this can deadlock if there is only one worker thread active
+for the workqueue (which will be the case during memory pressure when
+the rescuer thread is handling it).
+
+Furthermore, there is at least one other kind of hang possible due to
+the oplock break handling if there is only worker.  (This can be
+reproduced without introducing memory pressure by having passing 1 for
+the max_active parameter of cifsiod.) cifs_oplock_break() can wait
+indefintely in the filemap_fdatawait() while the cifs_writev_complete()
+work is blocked:
+
+ sysrq: SysRq : Show Blocked State
+   task                        PC stack   pid father
+ kworker/0:1     D    0    16      2 0x00000000
+ Workqueue: cifsiod cifs_oplock_break
+ Call Trace:
+  __schedule+0x562/0xf40
+  ? mark_held_locks+0x4a/0xb0
+  schedule+0x57/0xe0
+  io_schedule+0x21/0x50
+  wait_on_page_bit+0x143/0x190
+  ? add_to_page_cache_lru+0x150/0x150
+  __filemap_fdatawait_range+0x134/0x190
+  ? do_writepages+0x51/0x70
+  filemap_fdatawait_range+0x14/0x30
+  filemap_fdatawait+0x3b/0x40
+  cifs_oplock_break+0x651/0x710
+  ? preempt_count_sub+0x18/0xd0
+  process_one_work+0x304/0x8e0
+  worker_thread+0x9b/0x6a0
+  kthread+0x1b2/0x200
+  ? process_one_work+0x8e0/0x8e0
+  ? kthread_create_on_node+0x40/0x40
+  ret_from_fork+0x31/0x40
+ dd              D    0   683    171 0x00000000
+ Call Trace:
+  __schedule+0x562/0xf40
+  ? mark_held_locks+0x29/0xb0
+  schedule+0x57/0xe0
+  io_schedule+0x21/0x50
+  wait_on_page_bit+0x143/0x190
+  ? add_to_page_cache_lru+0x150/0x150
+  __filemap_fdatawait_range+0x134/0x190
+  ? do_writepages+0x51/0x70
+  filemap_fdatawait_range+0x14/0x30
+  filemap_fdatawait+0x3b/0x40
+  filemap_write_and_wait+0x4e/0x70
+  cifs_flush+0x6a/0xb0
+  filp_close+0x52/0xa0
+  __close_fd+0xdc/0x150
+  SyS_close+0x33/0x60
+  entry_SYSCALL_64_fastpath+0x1f/0xbe
+
+ Showing all locks held in the system:
+ 2 locks held by kworker/0:1/16:
+  #0:  ("cifsiod"){.+.+.+}, at: process_one_work+0x255/0x8e0
+  #1:  ((&cfile->oplock_break)){+.+.+.}, at: process_one_work+0x255/0x8e0
+
+ Showing busy workqueues and worker pools:
+ workqueue cifsiod: flags=0xc
+   pwq 0: cpus=0 node=0 flags=0x0 nice=0 active=1/1
+     in-flight: 16:cifs_oplock_break
+     delayed: cifs_writev_complete, cifs_echo_request
+ pool 0: cpus=0 node=0 flags=0x0 nice=0 hung=0s workers=3 idle: 750 3
+
+Fix these problems by creating a a new workqueue (with a rescuer) for
+the oplock break work.
+
+Signed-off-by: Rabin Vincent <rabinv@axis.com>
+Signed-off-by: Steve French <smfrench@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/cifs/cifsfs.c   |   15 +++++++++++++--
+ fs/cifs/cifsglob.h |    1 +
+ fs/cifs/misc.c     |    2 +-
+ fs/cifs/smb2misc.c |    5 +++--
+ 4 files changed, 18 insertions(+), 5 deletions(-)
+
+--- a/fs/cifs/cifsfs.c
++++ b/fs/cifs/cifsfs.c
+@@ -87,6 +87,7 @@ extern mempool_t *cifs_req_poolp;
+ extern mempool_t *cifs_mid_poolp;
+ 
+ struct workqueue_struct       *cifsiod_wq;
++struct workqueue_struct       *cifsoplockd_wq;
+ __u32 cifs_lock_secret;
+ 
+ /*
+@@ -1369,9 +1370,16 @@ init_cifs(void)
+               goto out_clean_proc;
+       }
+ 
++      cifsoplockd_wq = alloc_workqueue("cifsoplockd",
++                                       WQ_FREEZABLE|WQ_MEM_RECLAIM, 0);
++      if (!cifsoplockd_wq) {
++              rc = -ENOMEM;
++              goto out_destroy_cifsiod_wq;
++      }
++
+       rc = cifs_fscache_register();
+       if (rc)
+-              goto out_destroy_wq;
++              goto out_destroy_cifsoplockd_wq;
+ 
+       rc = cifs_init_inodecache();
+       if (rc)
+@@ -1419,7 +1427,9 @@ out_destroy_inodecache:
+       cifs_destroy_inodecache();
+ out_unreg_fscache:
+       cifs_fscache_unregister();
+-out_destroy_wq:
++out_destroy_cifsoplockd_wq:
++      destroy_workqueue(cifsoplockd_wq);
++out_destroy_cifsiod_wq:
+       destroy_workqueue(cifsiod_wq);
+ out_clean_proc:
+       cifs_proc_clean();
+@@ -1442,6 +1452,7 @@ exit_cifs(void)
+       cifs_destroy_mids();
+       cifs_destroy_inodecache();
+       cifs_fscache_unregister();
++      destroy_workqueue(cifsoplockd_wq);
+       destroy_workqueue(cifsiod_wq);
+       cifs_proc_clean();
+ }
+--- a/fs/cifs/cifsglob.h
++++ b/fs/cifs/cifsglob.h
+@@ -1683,6 +1683,7 @@ void cifs_oplock_break(struct work_struc
+ 
+ extern const struct slow_work_ops cifs_oplock_break_ops;
+ extern struct workqueue_struct *cifsiod_wq;
++extern struct workqueue_struct *cifsoplockd_wq;
+ extern __u32 cifs_lock_secret;
+ 
+ extern mempool_t *cifs_mid_poolp;
+--- a/fs/cifs/misc.c
++++ b/fs/cifs/misc.c
+@@ -492,7 +492,7 @@ is_valid_oplock_break(char *buffer, stru
+                                          CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2,
+                                          &pCifsInode->flags);
+ 
+-                              queue_work(cifsiod_wq,
++                              queue_work(cifsoplockd_wq,
+                                          &netfile->oplock_break);
+                               netfile->oplock_break_cancelled = false;
+ 
+--- a/fs/cifs/smb2misc.c
++++ b/fs/cifs/smb2misc.c
+@@ -499,7 +499,7 @@ smb2_tcon_has_lease(struct cifs_tcon *tc
+               else
+                       cfile->oplock_break_cancelled = true;
+ 
+-              queue_work(cifsiod_wq, &cfile->oplock_break);
++              queue_work(cifsoplockd_wq, &cfile->oplock_break);
+               kfree(lw);
+               return true;
+       }
+@@ -643,7 +643,8 @@ smb2_is_valid_oplock_break(char *buffer,
+                                          CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2,
+                                          &cinode->flags);
+                               spin_unlock(&cfile->file_info_lock);
+-                              queue_work(cifsiod_wq, &cfile->oplock_break);
++                              queue_work(cifsoplockd_wq,
++                                         &cfile->oplock_break);
+ 
+                               spin_unlock(&tcon->open_file_lock);
+                               spin_unlock(&cifs_tcp_ses_lock);
diff --git a/queue-4.11/do-not-return-number-of-bytes-written-for-ioctl-cifs_ioc_copychunk_file.patch b/queue-4.11/do-not-return-number-of-bytes-written-for-ioctl-cifs_ioc_copychunk_file.patch

new file mode 100644 (file)

index 0000000..20a396e
--- /dev/null
+++ b/queue-4.11/do-not-return-number-of-bytes-written-for-ioctl-cifs_ioc_copychunk_file.patch
@@ -0,0 +1,42 @@
+From 7d0c234fd2e1c9ca3fa032696c0c58b1b74a9e0b Mon Sep 17 00:00:00 2001
+From: Sachin Prabhu <sprabhu@redhat.com>
+Date: Wed, 26 Apr 2017 17:10:17 +0100
+Subject: Do not return number of bytes written for ioctl CIFS_IOC_COPYCHUNK_FILE
+
+From: Sachin Prabhu <sprabhu@redhat.com>
+
+commit 7d0c234fd2e1c9ca3fa032696c0c58b1b74a9e0b upstream.
+
+commit 620d8745b35d ("Introduce cifs_copy_file_range()") changes the
+behaviour of the cifs ioctl call CIFS_IOC_COPYCHUNK_FILE. In case of
+successful writes, it now returns the number of bytes written. This
+return value is treated as an error by the xfstest cifs/001. Depending
+on the errno set at that time, this may or may not result in the test
+failing.
+
+The patch fixes this by setting the return value to 0 in case of
+successful writes.
+
+Fixes: commit 620d8745b35d ("Introduce cifs_copy_file_range()")
+Reported-by: Eryu Guan <eguan@redhat.com>
+Signed-off-by: Sachin Prabhu <sprabhu@redhat.com>
+Acked-by: Pavel Shilovsky <pshilov@microsoft.com>
+Signed-off-by: Steve French <smfrench@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/cifs/ioctl.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/fs/cifs/ioctl.c
++++ b/fs/cifs/ioctl.c
+@@ -74,7 +74,8 @@ static long cifs_ioctl_copychunk(unsigne
+ 
+       rc = cifs_file_copychunk_range(xid, src_file.file, 0, dst_file, 0,
+                                       src_inode->i_size, 0);
+-
++      if (rc > 0)
++              rc = 0;
+ out_fput:
+       fdput(src_file);
+ out_drop_write:
diff --git a/queue-4.11/ext4-evict-inline-data-when-writing-to-memory-map.patch b/queue-4.11/ext4-evict-inline-data-when-writing-to-memory-map.patch

new file mode 100644 (file)

index 0000000..0248c3e
--- /dev/null
+++ b/queue-4.11/ext4-evict-inline-data-when-writing-to-memory-map.patch
@@ -0,0 +1,86 @@
+From 7b4cc9787fe35b3ee2dfb1c35e22eafc32e00c33 Mon Sep 17 00:00:00 2001
+From: Eric Biggers <ebiggers@google.com>
+Date: Sun, 30 Apr 2017 00:10:50 -0400
+Subject: ext4: evict inline data when writing to memory map
+
+From: Eric Biggers <ebiggers@google.com>
+
+commit 7b4cc9787fe35b3ee2dfb1c35e22eafc32e00c33 upstream.
+
+Currently the case of writing via mmap to a file with inline data is not
+handled.  This is maybe a rare case since it requires a writable memory
+map of a very small file, but it is trivial to trigger with on
+inline_data filesystem, and it causes the
+'BUG_ON(ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA));' in
+ext4_writepages() to be hit:
+
+    mkfs.ext4 -O inline_data /dev/vdb
+    mount /dev/vdb /mnt
+    xfs_io -f /mnt/file \
+       -c 'pwrite 0 1' \
+       -c 'mmap -w 0 1m' \
+       -c 'mwrite 0 1' \
+       -c 'fsync'
+
+       kernel BUG at fs/ext4/inode.c:2723!
+       invalid opcode: 0000 [#1] SMP
+       CPU: 1 PID: 2532 Comm: xfs_io Not tainted 4.11.0-rc1-xfstests-00301-g071d9acf3d1f #633
+       Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-20170228_101828-anatol 04/01/2014
+       task: ffff88003d3a8040 task.stack: ffffc90000300000
+       RIP: 0010:ext4_writepages+0xc89/0xf8a
+       RSP: 0018:ffffc90000303ca0 EFLAGS: 00010283
+       RAX: 0000028410000000 RBX: ffff8800383fa3b0 RCX: ffffffff812afcdc
+       RDX: 00000a9d00000246 RSI: ffffffff81e660e0 RDI: 0000000000000246
+       RBP: ffffc90000303dc0 R08: 0000000000000002 R09: 869618e8f99b4fa5
+       R10: 00000000852287a2 R11: 00000000a03b49f4 R12: ffff88003808e698
+       R13: 0000000000000000 R14: 7fffffffffffffff R15: 7fffffffffffffff
+       FS:  00007fd3e53094c0(0000) GS:ffff88003e400000(0000) knlGS:0000000000000000
+       CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+       CR2: 00007fd3e4c51000 CR3: 000000003d554000 CR4: 00000000003406e0
+       Call Trace:
+        ? _raw_spin_unlock+0x27/0x2a
+        ? kvm_clock_read+0x1e/0x20
+        do_writepages+0x23/0x2c
+        ? do_writepages+0x23/0x2c
+        __filemap_fdatawrite_range+0x80/0x87
+        filemap_write_and_wait_range+0x67/0x8c
+        ext4_sync_file+0x20e/0x472
+        vfs_fsync_range+0x8e/0x9f
+        ? syscall_trace_enter+0x25b/0x2d0
+        vfs_fsync+0x1c/0x1e
+        do_fsync+0x31/0x4a
+        SyS_fsync+0x10/0x14
+        do_syscall_64+0x69/0x131
+        entry_SYSCALL64_slow_path+0x25/0x25
+
+We could try to be smart and keep the inline data in this case, or at
+least support delayed allocation when allocating the block, but these
+solutions would be more complicated and don't seem worthwhile given how
+rare this case seems to be.  So just fix the bug by calling
+ext4_convert_inline_data() when we're asked to make a page writable, so
+that any inline data gets evicted, with the block allocated immediately.
+
+Reported-by: Nick Alcock <nick.alcock@oracle.com>
+Reviewed-by: Andreas Dilger <adilger@dilger.ca>
+Signed-off-by: Eric Biggers <ebiggers@google.com>
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ext4/inode.c |    5 +++++
+ 1 file changed, 5 insertions(+)
+
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -5874,6 +5874,11 @@ int ext4_page_mkwrite(struct vm_fault *v
+       file_update_time(vma->vm_file);
+ 
+       down_read(&EXT4_I(inode)->i_mmap_sem);
++
++      ret = ext4_convert_inline_data(inode);
++      if (ret)
++              goto out_ret;
++
+       /* Delalloc case is easy... */
+       if (test_opt(inode->i_sb, DELALLOC) &&
+           !ext4_should_journal_data(inode) &&
diff --git a/queue-4.11/fix-match_prepath.patch b/queue-4.11/fix-match_prepath.patch

new file mode 100644 (file)

index 0000000..ba549ed
--- /dev/null
+++ b/queue-4.11/fix-match_prepath.patch
@@ -0,0 +1,47 @@
+From cd8c42968ee651b69e00f8661caff32b0086e82d Mon Sep 17 00:00:00 2001
+From: Sachin Prabhu <sprabhu@redhat.com>
+Date: Wed, 26 Apr 2017 14:05:46 +0100
+Subject: Fix match_prepath()
+
+From: Sachin Prabhu <sprabhu@redhat.com>
+
+commit cd8c42968ee651b69e00f8661caff32b0086e82d upstream.
+
+Incorrect return value for shares not using the prefix path means that
+we will never match superblocks for these shares.
+
+Fixes: commit c1d8b24d1819 ("Compare prepaths when comparing superblocks")
+Signed-off-by: Sachin Prabhu <sprabhu@redhat.com>
+Reviewed-by: Pavel Shilovsky <pshilov@microsoft.com>
+Signed-off-by: Steve French <smfrench@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/cifs/connect.c |   14 ++++++--------
+ 1 file changed, 6 insertions(+), 8 deletions(-)
+
+--- a/fs/cifs/connect.c
++++ b/fs/cifs/connect.c
+@@ -2912,16 +2912,14 @@ match_prepath(struct super_block *sb, st
+ {
+       struct cifs_sb_info *old = CIFS_SB(sb);
+       struct cifs_sb_info *new = mnt_data->cifs_sb;
++      bool old_set = old->mnt_cifs_flags & CIFS_MOUNT_USE_PREFIX_PATH;
++      bool new_set = new->mnt_cifs_flags & CIFS_MOUNT_USE_PREFIX_PATH;
+ 
+-      if (old->mnt_cifs_flags & CIFS_MOUNT_USE_PREFIX_PATH) {
+-              if (!(new->mnt_cifs_flags & CIFS_MOUNT_USE_PREFIX_PATH))
+-                      return 0;
+-              /* The prepath should be null terminated strings */
+-              if (strcmp(new->prepath, old->prepath))
+-                      return 0;
+-
++      if (old_set && new_set && !strcmp(new->prepath, old->prepath))
++              return 1;
++      else if (!old_set && !new_set)
+               return 1;
+-      }
++
+       return 0;
+ }
+ 
diff --git a/queue-4.11/fs-block_dev-always-invalidate-cleancache-in-invalidate_bdev.patch b/queue-4.11/fs-block_dev-always-invalidate-cleancache-in-invalidate_bdev.patch

new file mode 100644 (file)

index 0000000..128963f
--- /dev/null
+++ b/queue-4.11/fs-block_dev-always-invalidate-cleancache-in-invalidate_bdev.patch
@@ -0,0 +1,55 @@
+From a5f6a6a9c72eac38a7fadd1a038532bc8516337c Mon Sep 17 00:00:00 2001
+From: Andrey Ryabinin <aryabinin@virtuozzo.com>
+Date: Wed, 3 May 2017 14:56:02 -0700
+Subject: fs/block_dev: always invalidate cleancache in invalidate_bdev()
+
+From: Andrey Ryabinin <aryabinin@virtuozzo.com>
+
+commit a5f6a6a9c72eac38a7fadd1a038532bc8516337c upstream.
+
+invalidate_bdev() calls cleancache_invalidate_inode() iff ->nrpages != 0
+which doen't make any sense.
+
+Make sure that invalidate_bdev() always calls cleancache_invalidate_inode()
+regardless of mapping->nrpages value.
+
+Fixes: c515e1fd361c ("mm/fs: add hooks to support cleancache")
+Link: http://lkml.kernel.org/r/20170424164135.22350-3-aryabinin@virtuozzo.com
+Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Acked-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Cc: Alexander Viro <viro@zeniv.linux.org.uk>
+Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
+Cc: Jens Axboe <axboe@kernel.dk>
+Cc: Johannes Weiner <hannes@cmpxchg.org>
+Cc: Alexey Kuznetsov <kuznet@virtuozzo.com>
+Cc: Christoph Hellwig <hch@lst.de>
+Cc: Nikolay Borisov <n.borisov.lkml@gmail.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/block_dev.c |   11 +++++------
+ 1 file changed, 5 insertions(+), 6 deletions(-)
+
+--- a/fs/block_dev.c
++++ b/fs/block_dev.c
+@@ -103,12 +103,11 @@ void invalidate_bdev(struct block_device
+ {
+       struct address_space *mapping = bdev->bd_inode->i_mapping;
+ 
+-      if (mapping->nrpages == 0)
+-              return;
+-
+-      invalidate_bh_lrus();
+-      lru_add_drain_all();    /* make sure all lru add caches are flushed */
+-      invalidate_mapping_pages(mapping, 0, -1);
++      if (mapping->nrpages) {
++              invalidate_bh_lrus();
++              lru_add_drain_all();    /* make sure all lru add caches are flushed */
++              invalidate_mapping_pages(mapping, 0, -1);
++      }
+       /* 99% of the time, we don't need to flush the cleancache on the bdev.
+        * But, for the strange corners, lets be cautious
+        */
diff --git a/queue-4.11/fs-fix-data-invalidation-in-the-cleancache-during-direct-io.patch b/queue-4.11/fs-fix-data-invalidation-in-the-cleancache-during-direct-io.patch

new file mode 100644 (file)

index 0000000..108ead7
--- /dev/null
+++ b/queue-4.11/fs-fix-data-invalidation-in-the-cleancache-during-direct-io.patch
@@ -0,0 +1,148 @@
+From 55635ba76ef91f26b418702ace5e6287eb727f6a Mon Sep 17 00:00:00 2001
+From: Andrey Ryabinin <aryabinin@virtuozzo.com>
+Date: Wed, 3 May 2017 14:55:59 -0700
+Subject: fs: fix data invalidation in the cleancache during direct IO
+
+From: Andrey Ryabinin <aryabinin@virtuozzo.com>
+
+commit 55635ba76ef91f26b418702ace5e6287eb727f6a upstream.
+
+Patch series "Properly invalidate data in the cleancache", v2.
+
+We've noticed that after direct IO write, buffered read sometimes gets
+stale data which is coming from the cleancache.  The reason for this is
+that some direct write hooks call call invalidate_inode_pages2[_range]()
+conditionally iff mapping->nrpages is not zero, so we may not invalidate
+data in the cleancache.
+
+Another odd thing is that we check only for ->nrpages and don't check
+for ->nrexceptional, but invalidate_inode_pages2[_range] also
+invalidates exceptional entries as well.  So we invalidate exceptional
+entries only if ->nrpages != 0? This doesn't feel right.
+
+ - Patch 1 fixes direct IO writes by removing ->nrpages check.
+ - Patch 2 fixes similar case in invalidate_bdev().
+     Note: I only fixed conditional cleancache_invalidate_inode() here.
+       Do we also need to add ->nrexceptional check in into invalidate_bdev()?
+
+ - Patches 3-4: some optimizations.
+
+This patch (of 4):
+
+Some direct IO write fs hooks call invalidate_inode_pages2[_range]()
+conditionally iff mapping->nrpages is not zero.  This can't be right,
+because invalidate_inode_pages2[_range]() also invalidate data in the
+cleancache via cleancache_invalidate_inode() call.  So if page cache is
+empty but there is some data in the cleancache, buffered read after
+direct IO write would get stale data from the cleancache.
+
+Also it doesn't feel right to check only for ->nrpages because
+invalidate_inode_pages2[_range] invalidates exceptional entries as well.
+
+Fix this by calling invalidate_inode_pages2[_range]() regardless of
+nrpages state.
+
+Note: nfs,cifs,9p doesn't need similar fix because the never call
+cleancache_get_page() (nor directly, nor via mpage_readpage[s]()), so
+they are not affected by this bug.
+
+Fixes: c515e1fd361c ("mm/fs: add hooks to support cleancache")
+Link: http://lkml.kernel.org/r/20170424164135.22350-2-aryabinin@virtuozzo.com
+Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Acked-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Cc: Alexander Viro <viro@zeniv.linux.org.uk>
+Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
+Cc: Jens Axboe <axboe@kernel.dk>
+Cc: Johannes Weiner <hannes@cmpxchg.org>
+Cc: Alexey Kuznetsov <kuznet@virtuozzo.com>
+Cc: Christoph Hellwig <hch@lst.de>
+Cc: Nikolay Borisov <n.borisov.lkml@gmail.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/iomap.c   |   20 +++++++++-----------
+ mm/filemap.c |   26 +++++++++++---------------
+ 2 files changed, 20 insertions(+), 26 deletions(-)
+
+--- a/fs/iomap.c
++++ b/fs/iomap.c
+@@ -887,16 +887,14 @@ iomap_dio_rw(struct kiocb *iocb, struct
+               flags |= IOMAP_WRITE;
+       }
+ 
+-      if (mapping->nrpages) {
+-              ret = filemap_write_and_wait_range(mapping, start, end);
+-              if (ret)
+-                      goto out_free_dio;
+-
+-              ret = invalidate_inode_pages2_range(mapping,
+-                              start >> PAGE_SHIFT, end >> PAGE_SHIFT);
+-              WARN_ON_ONCE(ret);
+-              ret = 0;
+-      }
++      ret = filemap_write_and_wait_range(mapping, start, end);
++      if (ret)
++              goto out_free_dio;
++
++      ret = invalidate_inode_pages2_range(mapping,
++                      start >> PAGE_SHIFT, end >> PAGE_SHIFT);
++      WARN_ON_ONCE(ret);
++      ret = 0;
+ 
+       inode_dio_begin(inode);
+ 
+@@ -951,7 +949,7 @@ iomap_dio_rw(struct kiocb *iocb, struct
+        * one is a pretty crazy thing to do, so we don't support it 100%.  If
+        * this invalidation fails, tough, the write still worked...
+        */
+-      if (iov_iter_rw(iter) == WRITE && mapping->nrpages) {
++      if (iov_iter_rw(iter) == WRITE) {
+               int err = invalidate_inode_pages2_range(mapping,
+                               start >> PAGE_SHIFT, end >> PAGE_SHIFT);
+               WARN_ON_ONCE(err);
+--- a/mm/filemap.c
++++ b/mm/filemap.c
+@@ -2719,18 +2719,16 @@ generic_file_direct_write(struct kiocb *
+        * about to write.  We do this *before* the write so that we can return
+        * without clobbering -EIOCBQUEUED from ->direct_IO().
+        */
+-      if (mapping->nrpages) {
+-              written = invalidate_inode_pages2_range(mapping,
++      written = invalidate_inode_pages2_range(mapping,
+                                       pos >> PAGE_SHIFT, end);
+-              /*
+-               * If a page can not be invalidated, return 0 to fall back
+-               * to buffered write.
+-               */
+-              if (written) {
+-                      if (written == -EBUSY)
+-                              return 0;
+-                      goto out;
+-              }
++      /*
++       * If a page can not be invalidated, return 0 to fall back
++       * to buffered write.
++       */
++      if (written) {
++              if (written == -EBUSY)
++                      return 0;
++              goto out;
+       }
+ 
+       data = *from;
+@@ -2744,10 +2742,8 @@ generic_file_direct_write(struct kiocb *
+        * so we don't support it 100%.  If this invalidation
+        * fails, tough, the write still worked...
+        */
+-      if (mapping->nrpages) {
+-              invalidate_inode_pages2_range(mapping,
+-                                            pos >> PAGE_SHIFT, end);
+-      }
++      invalidate_inode_pages2_range(mapping,
++                              pos >> PAGE_SHIFT, end);
+ 
+       if (written > 0) {
+               pos += written;
diff --git a/queue-4.11/fs-xattr.c-zero-out-memory-copied-to-userspace-in-getxattr.patch b/queue-4.11/fs-xattr.c-zero-out-memory-copied-to-userspace-in-getxattr.patch

new file mode 100644 (file)

index 0000000..9c01395
--- /dev/null
+++ b/queue-4.11/fs-xattr.c-zero-out-memory-copied-to-userspace-in-getxattr.patch
@@ -0,0 +1,40 @@
+From 81be3dee96346fbe08c31be5ef74f03f6b63cf68 Mon Sep 17 00:00:00 2001
+From: Michal Hocko <mhocko@suse.com>
+Date: Mon, 8 May 2017 15:57:24 -0700
+Subject: fs/xattr.c: zero out memory copied to userspace in getxattr
+
+From: Michal Hocko <mhocko@suse.com>
+
+commit 81be3dee96346fbe08c31be5ef74f03f6b63cf68 upstream.
+
+getxattr uses vmalloc to allocate memory if kzalloc fails.  This is
+filled by vfs_getxattr and then copied to the userspace.  vmalloc,
+however, doesn't zero out the memory so if the specific implementation
+of the xattr handler is sloppy we can theoretically expose a kernel
+memory.  There is no real sign this is really the case but let's make
+sure this will not happen and use vzalloc instead.
+
+Fixes: 779302e67835 ("fs/xattr.c:getxattr(): improve handling of allocation failures")
+Link: http://lkml.kernel.org/r/20170306103327.2766-1-mhocko@kernel.org
+Acked-by: Kees Cook <keescook@chromium.org>
+Reported-by: Vlastimil Babka <vbabka@suse.cz>
+Signed-off-by: Michal Hocko <mhocko@suse.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/xattr.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/xattr.c
++++ b/fs/xattr.c
+@@ -530,7 +530,7 @@ getxattr(struct dentry *d, const char __
+                       size = XATTR_SIZE_MAX;
+               kvalue = kzalloc(size, GFP_KERNEL | __GFP_NOWARN);
+               if (!kvalue) {
+-                      kvalue = vmalloc(size);
++                      kvalue = vzalloc(size);
+                       if (!kvalue)
+                               return -ENOMEM;
+               }
diff --git a/queue-4.11/ib-core-fix-kernel-crash-during-fail-to-initialize-device.patch b/queue-4.11/ib-core-fix-kernel-crash-during-fail-to-initialize-device.patch

new file mode 100644 (file)

index 0000000..17f0dbb
--- /dev/null
+++ b/queue-4.11/ib-core-fix-kernel-crash-during-fail-to-initialize-device.patch
@@ -0,0 +1,160 @@
+From 4be3a4fa51f432ef045546d16f25c68a1ab525b9 Mon Sep 17 00:00:00 2001
+From: Parav Pandit <parav@mellanox.com>
+Date: Sun, 19 Mar 2017 10:55:55 +0200
+Subject: IB/core: Fix kernel crash during fail to initialize device
+
+From: Parav Pandit <parav@mellanox.com>
+
+commit 4be3a4fa51f432ef045546d16f25c68a1ab525b9 upstream.
+
+This patch fixes the kernel crash that occurs during ib_dealloc_device()
+called due to provider driver fails with an error after
+ib_alloc_device() and before it can register using ib_register_device().
+
+This crashed seen in tha lab as below which can occur with any IB device
+which fails to perform its device initialization before invoking
+ib_register_device().
+
+This patch avoids touching cache and port immutable structures if device
+is not yet initialized.
+It also releases related memory when cache and port immutable data
+structure initialization fails during register_device() state.
+
+[81416.561946] BUG: unable to handle kernel NULL pointer dereference at (null)
+[81416.570340] IP: ib_cache_release_one+0x29/0x80 [ib_core]
+[81416.576222] PGD 78da66067
+[81416.576223] PUD 7f2d7c067
+[81416.579484] PMD 0
+[81416.582720]
+[81416.587242] Oops: 0000 [#1] SMP
+[81416.722395] task: ffff8807887515c0 task.stack: ffffc900062c0000
+[81416.729148] RIP: 0010:ib_cache_release_one+0x29/0x80 [ib_core]
+[81416.735793] RSP: 0018:ffffc900062c3a90 EFLAGS: 00010202
+[81416.741823] RAX: 0000000000000000 RBX: 0000000000000001 RCX: 0000000000000000
+[81416.749785] RDX: 0000000000000000 RSI: 0000000000000282 RDI: ffff880859fec000
+[81416.757757] RBP: ffffc900062c3aa0 R08: ffff8808536e5ac0 R09: ffff880859fec5b0
+[81416.765708] R10: 00000000536e5c01 R11: ffff8808536e5ac0 R12: ffff880859fec000
+[81416.773672] R13: 0000000000000000 R14: ffff8808536e5ac0 R15: ffff88084ebc0060
+[81416.781621] FS:  00007fd879fab740(0000) GS:ffff88085fac0000(0000) knlGS:0000000000000000
+[81416.790522] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+[81416.797094] CR2: 0000000000000000 CR3: 00000007eb215000 CR4: 00000000003406e0
+[81416.805051] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+[81416.812997] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+[81416.820950] Call Trace:
+[81416.824226]  ib_device_release+0x1e/0x40 [ib_core]
+[81416.829858]  device_release+0x32/0xa0
+[81416.834370]  kobject_cleanup+0x63/0x170
+[81416.839058]  kobject_put+0x25/0x50
+[81416.843319]  ib_dealloc_device+0x25/0x40 [ib_core]
+[81416.848986]  mlx5_ib_add+0x163/0x1990 [mlx5_ib]
+[81416.854414]  mlx5_add_device+0x5a/0x160 [mlx5_core]
+[81416.860191]  mlx5_register_interface+0x8d/0xc0 [mlx5_core]
+[81416.866587]  ? 0xffffffffa09e9000
+[81416.870816]  mlx5_ib_init+0x15/0x17 [mlx5_ib]
+[81416.876094]  do_one_initcall+0x51/0x1b0
+[81416.880861]  ? __vunmap+0x85/0xd0
+[81416.885113]  ? kmem_cache_alloc_trace+0x14b/0x1b0
+[81416.890768]  ? vfree+0x2e/0x70
+[81416.894762]  do_init_module+0x60/0x1fa
+[81416.899441]  load_module+0x15f6/0x1af0
+[81416.904114]  ? __symbol_put+0x60/0x60
+[81416.908709]  ? ima_post_read_file+0x3d/0x80
+[81416.913828]  ? security_kernel_post_read_file+0x6b/0x80
+[81416.920006]  SYSC_finit_module+0xa6/0xf0
+[81416.924888]  SyS_finit_module+0xe/0x10
+[81416.929568]  entry_SYSCALL_64_fastpath+0x1a/0xa9
+[81416.935089] RIP: 0033:0x7fd879494949
+[81416.939543] RSP: 002b:00007ffdbc1b4e58 EFLAGS: 00000202 ORIG_RAX: 0000000000000139
+[81416.947982] RAX: ffffffffffffffda RBX: 0000000001b66f00 RCX: 00007fd879494949
+[81416.955965] RDX: 0000000000000000 RSI: 000000000041a13c RDI: 0000000000000003
+[81416.963926] RBP: 0000000000000003 R08: 0000000000000000 R09: 0000000001b652a0
+[81416.971861] R10: 0000000000000003 R11: 0000000000000202 R12: 00007ffdbc1b3e70
+[81416.979763] R13: 00007ffdbc1b3e50 R14: 0000000000000005 R15: 0000000000000000
+[81417.008005] RIP: ib_cache_release_one+0x29/0x80 [ib_core] RSP: ffffc900062c3a90
+[81417.016045] CR2: 0000000000000000
+
+Fixes: 55aeed0654 ("IB/core: Make ib_alloc_device init the kobject")
+Fixes: 7738613e7c ("IB/core: Add per port immutable struct to ib_device")
+Reviewed-by: Daniel Jurgens <danielj@mellanox.com>
+Signed-off-by: Parav Pandit <parav@mellanox.com>
+Signed-off-by: Leon Romanovsky <leon@kernel.org>
+Signed-off-by: Doug Ledford <dledford@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/infiniband/core/device.c |   33 ++++++++++++++++++++++-----------
+ 1 file changed, 22 insertions(+), 11 deletions(-)
+
+--- a/drivers/infiniband/core/device.c
++++ b/drivers/infiniband/core/device.c
+@@ -172,8 +172,16 @@ static void ib_device_release(struct dev
+ {
+       struct ib_device *dev = container_of(device, struct ib_device, dev);
+ 
+-      ib_cache_release_one(dev);
+-      kfree(dev->port_immutable);
++      WARN_ON(dev->reg_state == IB_DEV_REGISTERED);
++      if (dev->reg_state == IB_DEV_UNREGISTERED) {
++              /*
++               * In IB_DEV_UNINITIALIZED state, cache or port table
++               * is not even created. Free cache and port table only when
++               * device reaches UNREGISTERED state.
++               */
++              ib_cache_release_one(dev);
++              kfree(dev->port_immutable);
++      }
+       kfree(dev);
+ }
+ 
+@@ -380,32 +388,27 @@ int ib_register_device(struct ib_device
+       ret = ib_cache_setup_one(device);
+       if (ret) {
+               pr_warn("Couldn't set up InfiniBand P_Key/GID cache\n");
+-              goto out;
++              goto port_cleanup;
+       }
+ 
+       ret = ib_device_register_rdmacg(device);
+       if (ret) {
+               pr_warn("Couldn't register device with rdma cgroup\n");
+-              ib_cache_cleanup_one(device);
+-              goto out;
++              goto cache_cleanup;
+       }
+ 
+       memset(&device->attrs, 0, sizeof(device->attrs));
+       ret = device->query_device(device, &device->attrs, &uhw);
+       if (ret) {
+               pr_warn("Couldn't query the device attributes\n");
+-              ib_device_unregister_rdmacg(device);
+-              ib_cache_cleanup_one(device);
+-              goto out;
++              goto cache_cleanup;
+       }
+ 
+       ret = ib_device_register_sysfs(device, port_callback);
+       if (ret) {
+               pr_warn("Couldn't register device %s with driver model\n",
+                       device->name);
+-              ib_device_unregister_rdmacg(device);
+-              ib_cache_cleanup_one(device);
+-              goto out;
++              goto cache_cleanup;
+       }
+ 
+       device->reg_state = IB_DEV_REGISTERED;
+@@ -417,6 +420,14 @@ int ib_register_device(struct ib_device
+       down_write(&lists_rwsem);
+       list_add_tail(&device->core_list, &device_list);
+       up_write(&lists_rwsem);
++      mutex_unlock(&device_mutex);
++      return 0;
++
++cache_cleanup:
++      ib_cache_cleanup_one(device);
++      ib_cache_release_one(device);
++port_cleanup:
++      kfree(device->port_immutable);
+ out:
+       mutex_unlock(&device_mutex);
+       return ret;
diff --git a/queue-4.11/ib-core-fix-sysfs-registration-error-flow.patch b/queue-4.11/ib-core-fix-sysfs-registration-error-flow.patch

new file mode 100644 (file)

index 0000000..fbc74d5
--- /dev/null
+++ b/queue-4.11/ib-core-fix-sysfs-registration-error-flow.patch
@@ -0,0 +1,50 @@
+From b312be3d87e4c80872cbea869e569175c5eb0f9a Mon Sep 17 00:00:00 2001
+From: Jack Morgenstein <jackm@dev.mellanox.co.il>
+Date: Sun, 19 Mar 2017 10:55:57 +0200
+Subject: IB/core: Fix sysfs registration error flow
+
+From: Jack Morgenstein <jackm@dev.mellanox.co.il>
+
+commit b312be3d87e4c80872cbea869e569175c5eb0f9a upstream.
+
+The kernel commit cited below restructured ib device management
+so that the device kobject is initialized in ib_alloc_device.
+
+As part of the restructuring, the kobject is now initialized in
+procedure ib_alloc_device, and is later added to the device hierarchy
+in the ib_register_device call stack, in procedure
+ib_device_register_sysfs (which calls device_add).
+
+However, in the ib_device_register_sysfs error flow, if an error
+occurs following the call to device_add, the cleanup procedure
+device_unregister is called. This call results in the device object
+being deleted -- which results in various use-after-free crashes.
+
+The correct cleanup call is device_del -- which undoes device_add
+without deleting the device object.
+
+The device object will then (correctly) be deleted in the
+ib_register_device caller's error cleanup flow, when the caller invokes
+ib_dealloc_device.
+
+Fixes: 55aeed06544f6 ("IB/core: Make ib_alloc_device init the kobject")
+Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
+Signed-off-by: Leon Romanovsky <leon@kernel.org>
+Signed-off-by: Doug Ledford <dledford@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/infiniband/core/sysfs.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/infiniband/core/sysfs.c
++++ b/drivers/infiniband/core/sysfs.c
+@@ -1301,7 +1301,7 @@ err_put:
+       free_port_list_attributes(device);
+ 
+ err_unregister:
+-      device_unregister(class_dev);
++      device_del(class_dev);
+ 
+ err:
+       return ret;
diff --git a/queue-4.11/ib-core-for-multicast-functions-verify-that-lids-are-multicast-lids.patch b/queue-4.11/ib-core-for-multicast-functions-verify-that-lids-are-multicast-lids.patch

new file mode 100644 (file)

index 0000000..462b5b6
--- /dev/null
+++ b/queue-4.11/ib-core-for-multicast-functions-verify-that-lids-are-multicast-lids.patch
@@ -0,0 +1,53 @@
+From 8561eae60ff9417a50fa1fb2b83ae950dc5c1e21 Mon Sep 17 00:00:00 2001
+From: "Michael J. Ruhl" <michael.j.ruhl@intel.com>
+Date: Sun, 9 Apr 2017 10:15:51 -0700
+Subject: IB/core: For multicast functions, verify that LIDs are multicast LIDs
+
+From: Michael J. Ruhl <michael.j.ruhl@intel.com>
+
+commit 8561eae60ff9417a50fa1fb2b83ae950dc5c1e21 upstream.
+
+The Infiniband spec defines "A multicast address is defined by a
+MGID and a MLID" (section 10.5).  Currently the MLID value is not
+validated.
+
+Add check to verify that the MLID value is in the correct address
+range.
+
+Fixes: 0c33aeedb2cf ("[IB] Add checks to multicast attach and detach")
+Reviewed-by: Ira Weiny <ira.weiny@intel.com>
+Reviewed-by: Dasaratharaman Chandramouli <dasaratharaman.chandramouli@intel.com>
+Signed-off-by: Michael J. Ruhl <michael.j.ruhl@intel.com>
+Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
+Reviewed-by: Leon Romanovsky <leonro@mellanox.com>
+Signed-off-by: Doug Ledford <dledford@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/infiniband/core/verbs.c |    8 ++++++--
+ 1 file changed, 6 insertions(+), 2 deletions(-)
+
+--- a/drivers/infiniband/core/verbs.c
++++ b/drivers/infiniband/core/verbs.c
+@@ -1519,7 +1519,9 @@ int ib_attach_mcast(struct ib_qp *qp, un
+ 
+       if (!qp->device->attach_mcast)
+               return -ENOSYS;
+-      if (gid->raw[0] != 0xff || qp->qp_type != IB_QPT_UD)
++      if (gid->raw[0] != 0xff || qp->qp_type != IB_QPT_UD ||
++          lid < be16_to_cpu(IB_MULTICAST_LID_BASE) ||
++          lid == be16_to_cpu(IB_LID_PERMISSIVE))
+               return -EINVAL;
+ 
+       ret = qp->device->attach_mcast(qp, gid, lid);
+@@ -1535,7 +1537,9 @@ int ib_detach_mcast(struct ib_qp *qp, un
+ 
+       if (!qp->device->detach_mcast)
+               return -ENOSYS;
+-      if (gid->raw[0] != 0xff || qp->qp_type != IB_QPT_UD)
++      if (gid->raw[0] != 0xff || qp->qp_type != IB_QPT_UD ||
++          lid < be16_to_cpu(IB_MULTICAST_LID_BASE) ||
++          lid == be16_to_cpu(IB_LID_PERMISSIVE))
+               return -EINVAL;
+ 
+       ret = qp->device->detach_mcast(qp, gid, lid);
diff --git a/queue-4.11/ib-hfi1-prevent-kernel-qp-post-send-hard-lockups.patch b/queue-4.11/ib-hfi1-prevent-kernel-qp-post-send-hard-lockups.patch

new file mode 100644 (file)

index 0000000..309df42
--- /dev/null
+++ b/queue-4.11/ib-hfi1-prevent-kernel-qp-post-send-hard-lockups.patch
@@ -0,0 +1,142 @@
+From b6eac931b9bb2bce4db7032c35b41e5e34ec22a5 Mon Sep 17 00:00:00 2001
+From: Mike Marciniszyn <mike.marciniszyn@intel.com>
+Date: Sun, 9 Apr 2017 10:16:35 -0700
+Subject: IB/hfi1: Prevent kernel QP post send hard lockups
+
+From: Mike Marciniszyn <mike.marciniszyn@intel.com>
+
+commit b6eac931b9bb2bce4db7032c35b41e5e34ec22a5 upstream.
+
+The driver progress routines can call cond_resched() when
+a timeslice is exhausted and irqs are enabled.
+
+If the ULP had been holding a spin lock without disabling irqs and
+the post send directly called the progress routine, the cond_resched()
+could yield allowing another thread from the same ULP to deadlock
+on that same lock.
+
+Correct by replacing the current hfi1_do_send() calldown with a unique
+one for post send and adding an argument to hfi1_do_send() to indicate
+that the send engine is running in a thread.   If the routine is not
+running in a thread, avoid calling cond_resched().
+
+Fixes: Commit 831464ce4b74 ("IB/hfi1: Don't call cond_resched in atomic mode when sending packets")
+Reviewed-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
+Signed-off-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
+Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
+Signed-off-by: Doug Ledford <dledford@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/infiniband/hw/hfi1/ruc.c   |   26 ++++++++++++++++----------
+ drivers/infiniband/hw/hfi1/verbs.c |    4 ++--
+ drivers/infiniband/hw/hfi1/verbs.h |    6 ++++--
+ 3 files changed, 22 insertions(+), 14 deletions(-)
+
+--- a/drivers/infiniband/hw/hfi1/ruc.c
++++ b/drivers/infiniband/hw/hfi1/ruc.c
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright(c) 2015, 2016 Intel Corporation.
++ * Copyright(c) 2015 - 2017 Intel Corporation.
+  *
+  * This file is provided under a dual BSD/GPLv2 license.  When using or
+  * redistributing this file, you may do so under either license.
+@@ -784,23 +784,29 @@ void hfi1_make_ruc_header(struct rvt_qp
+ /* when sending, force a reschedule every one of these periods */
+ #define SEND_RESCHED_TIMEOUT (5 * HZ)  /* 5s in jiffies */
+ 
++void hfi1_do_send_from_rvt(struct rvt_qp *qp)
++{
++      hfi1_do_send(qp, false);
++}
++
+ void _hfi1_do_send(struct work_struct *work)
+ {
+       struct iowait *wait = container_of(work, struct iowait, iowork);
+       struct rvt_qp *qp = iowait_to_qp(wait);
+ 
+-      hfi1_do_send(qp);
++      hfi1_do_send(qp, true);
+ }
+ 
+ /**
+  * hfi1_do_send - perform a send on a QP
+  * @work: contains a pointer to the QP
++ * @in_thread: true if in a workqueue thread
+  *
+  * Process entries in the send work queue until credit or queue is
+  * exhausted.  Only allow one CPU to send a packet per QP.
+  * Otherwise, two threads could send packets out of order.
+  */
+-void hfi1_do_send(struct rvt_qp *qp)
++void hfi1_do_send(struct rvt_qp *qp, bool in_thread)
+ {
+       struct hfi1_pkt_state ps;
+       struct hfi1_qp_priv *priv = qp->priv;
+@@ -868,8 +874,10 @@ void hfi1_do_send(struct rvt_qp *qp)
+                       qp->s_hdrwords = 0;
+                       /* allow other tasks to run */
+                       if (unlikely(time_after(jiffies, timeout))) {
+-                              if (workqueue_congested(cpu,
+-                                                      ps.ppd->hfi1_wq)) {
++                              if (!in_thread ||
++                                  workqueue_congested(
++                                              cpu,
++                                              ps.ppd->hfi1_wq)) {
+                                       spin_lock_irqsave(
+                                               &qp->s_lock,
+                                               ps.flags);
+@@ -882,11 +890,9 @@ void hfi1_do_send(struct rvt_qp *qp)
+                                               *ps.ppd->dd->send_schedule);
+                                       return;
+                               }
+-                              if (!irqs_disabled()) {
+-                                      cond_resched();
+-                                      this_cpu_inc(
+-                                         *ps.ppd->dd->send_schedule);
+-                              }
++                              cond_resched();
++                              this_cpu_inc(
++                                      *ps.ppd->dd->send_schedule);
+                               timeout = jiffies + (timeout_int) / 8;
+                       }
+                       spin_lock_irqsave(&qp->s_lock, ps.flags);
+--- a/drivers/infiniband/hw/hfi1/verbs.c
++++ b/drivers/infiniband/hw/hfi1/verbs.c
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright(c) 2015, 2016 Intel Corporation.
++ * Copyright(c) 2015 - 2017 Intel Corporation.
+  *
+  * This file is provided under a dual BSD/GPLv2 license.  When using or
+  * redistributing this file, you may do so under either license.
+@@ -1751,7 +1751,7 @@ int hfi1_register_ib_device(struct hfi1_
+       dd->verbs_dev.rdi.driver_f.qp_priv_free = qp_priv_free;
+       dd->verbs_dev.rdi.driver_f.free_all_qps = free_all_qps;
+       dd->verbs_dev.rdi.driver_f.notify_qp_reset = notify_qp_reset;
+-      dd->verbs_dev.rdi.driver_f.do_send = hfi1_do_send;
++      dd->verbs_dev.rdi.driver_f.do_send = hfi1_do_send_from_rvt;
+       dd->verbs_dev.rdi.driver_f.schedule_send = hfi1_schedule_send;
+       dd->verbs_dev.rdi.driver_f.schedule_send_no_lock = _hfi1_schedule_send;
+       dd->verbs_dev.rdi.driver_f.get_pmtu_from_attr = get_pmtu_from_attr;
+--- a/drivers/infiniband/hw/hfi1/verbs.h
++++ b/drivers/infiniband/hw/hfi1/verbs.h
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright(c) 2015, 2016 Intel Corporation.
++ * Copyright(c) 2015 - 2017 Intel Corporation.
+  *
+  * This file is provided under a dual BSD/GPLv2 license.  When using or
+  * redistributing this file, you may do so under either license.
+@@ -350,7 +350,9 @@ void hfi1_make_ruc_header(struct rvt_qp
+ 
+ void _hfi1_do_send(struct work_struct *work);
+ 
+-void hfi1_do_send(struct rvt_qp *qp);
++void hfi1_do_send_from_rvt(struct rvt_qp *qp);
++
++void hfi1_do_send(struct rvt_qp *qp, bool in_thread);
+ 
+ void hfi1_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe,
+                       enum ib_wc_status status);
diff --git a/queue-4.11/ib-ipoib-ibx-failed-to-create-mcg-debug-file.patch b/queue-4.11/ib-ipoib-ibx-failed-to-create-mcg-debug-file.patch

new file mode 100644 (file)

index 0000000..41b050a
--- /dev/null
+++ b/queue-4.11/ib-ipoib-ibx-failed-to-create-mcg-debug-file.patch
@@ -0,0 +1,157 @@
+From 771a52584096c45e4565e8aabb596eece9d73d61 Mon Sep 17 00:00:00 2001
+From: Shamir Rabinovitch <shamir.rabinovitch@oracle.com>
+Date: Wed, 29 Mar 2017 06:21:59 -0400
+Subject: IB/IPoIB: ibX: failed to create mcg debug file
+
+From: Shamir Rabinovitch <shamir.rabinovitch@oracle.com>
+
+commit 771a52584096c45e4565e8aabb596eece9d73d61 upstream.
+
+When udev renames the netdev devices, ipoib debugfs entries does not
+get renamed. As a result, if subsequent probe of ipoib device reuse the
+name then creating a debugfs entry for the new device would fail.
+
+Also, moved ipoib_create_debug_files and ipoib_delete_debug_files as part
+of ipoib event handling in order to avoid any race condition between these.
+
+Fixes: 1732b0ef3b3a ([IPoIB] add path record information in debugfs)
+Signed-off-by: Vijay Kumar <vijay.ac.kumar@oracle.com>
+Signed-off-by: Shamir Rabinovitch <shamir.rabinovitch@oracle.com>
+Reviewed-by: Mark Bloch <markb@mellanox.com>
+Signed-off-by: Doug Ledford <dledford@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/infiniband/ulp/ipoib/ipoib_fs.c   |    3 ++
+ drivers/infiniband/ulp/ipoib/ipoib_main.c |   44 ++++++++++++++++++++++++++----
+ drivers/infiniband/ulp/ipoib/ipoib_vlan.c |    3 --
+ 3 files changed, 42 insertions(+), 8 deletions(-)
+
+--- a/drivers/infiniband/ulp/ipoib/ipoib_fs.c
++++ b/drivers/infiniband/ulp/ipoib/ipoib_fs.c
+@@ -281,8 +281,11 @@ void ipoib_delete_debug_files(struct net
+ {
+       struct ipoib_dev_priv *priv = netdev_priv(dev);
+ 
++      WARN_ONCE(!priv->mcg_dentry, "null mcg debug file\n");
++      WARN_ONCE(!priv->path_dentry, "null path debug file\n");
+       debugfs_remove(priv->mcg_dentry);
+       debugfs_remove(priv->path_dentry);
++      priv->mcg_dentry = priv->path_dentry = NULL;
+ }
+ 
+ int ipoib_register_debugfs(void)
+--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
++++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
+@@ -108,6 +108,33 @@ static struct ib_client ipoib_client = {
+       .get_net_dev_by_params = ipoib_get_net_dev_by_params,
+ };
+ 
++#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
++static int ipoib_netdev_event(struct notifier_block *this,
++                            unsigned long event, void *ptr)
++{
++      struct netdev_notifier_info *ni = ptr;
++      struct net_device *dev = ni->dev;
++
++      if (dev->netdev_ops->ndo_open != ipoib_open)
++              return NOTIFY_DONE;
++
++      switch (event) {
++      case NETDEV_REGISTER:
++              ipoib_create_debug_files(dev);
++              break;
++      case NETDEV_CHANGENAME:
++              ipoib_delete_debug_files(dev);
++              ipoib_create_debug_files(dev);
++              break;
++      case NETDEV_UNREGISTER:
++              ipoib_delete_debug_files(dev);
++              break;
++      }
++
++      return NOTIFY_DONE;
++}
++#endif
++
+ int ipoib_open(struct net_device *dev)
+ {
+       struct ipoib_dev_priv *priv = netdev_priv(dev);
+@@ -1674,8 +1701,6 @@ void ipoib_dev_cleanup(struct net_device
+ 
+       ASSERT_RTNL();
+ 
+-      ipoib_delete_debug_files(dev);
+-
+       /* Delete any child interfaces first */
+       list_for_each_entry_safe(cpriv, tcpriv, &priv->child_intfs, list) {
+               /* Stop GC on child */
+@@ -2090,8 +2115,6 @@ static struct net_device *ipoib_add_port
+               goto register_failed;
+       }
+ 
+-      ipoib_create_debug_files(priv->dev);
+-
+       if (ipoib_cm_add_mode_attr(priv->dev))
+               goto sysfs_failed;
+       if (ipoib_add_pkey_attr(priv->dev))
+@@ -2106,7 +2129,6 @@ static struct net_device *ipoib_add_port
+       return priv->dev;
+ 
+ sysfs_failed:
+-      ipoib_delete_debug_files(priv->dev);
+       unregister_netdev(priv->dev);
+ 
+ register_failed:
+@@ -2191,6 +2213,12 @@ static void ipoib_remove_one(struct ib_d
+       kfree(dev_list);
+ }
+ 
++#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
++static struct notifier_block ipoib_netdev_notifier = {
++      .notifier_call = ipoib_netdev_event,
++};
++#endif
++
+ static int __init ipoib_init_module(void)
+ {
+       int ret;
+@@ -2243,6 +2271,9 @@ static int __init ipoib_init_module(void
+       if (ret)
+               goto err_client;
+ 
++#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
++      register_netdevice_notifier(&ipoib_netdev_notifier);
++#endif
+       return 0;
+ 
+ err_client:
+@@ -2260,6 +2291,9 @@ err_fs:
+ 
+ static void __exit ipoib_cleanup_module(void)
+ {
++#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
++      unregister_netdevice_notifier(&ipoib_netdev_notifier);
++#endif
+       ipoib_netlink_fini();
+       ib_unregister_client(&ipoib_client);
+       ib_sa_unregister_client(&ipoib_sa_client);
+--- a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
++++ b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
+@@ -86,8 +86,6 @@ int __ipoib_vlan_add(struct ipoib_dev_pr
+               goto register_failed;
+       }
+ 
+-      ipoib_create_debug_files(priv->dev);
+-
+       /* RTNL childs don't need proprietary sysfs entries */
+       if (type == IPOIB_LEGACY_CHILD) {
+               if (ipoib_cm_add_mode_attr(priv->dev))
+@@ -108,7 +106,6 @@ int __ipoib_vlan_add(struct ipoib_dev_pr
+ 
+ sysfs_failed:
+       result = -ENOMEM;
+-      ipoib_delete_debug_files(priv->dev);
+       unregister_netdevice(priv->dev);
+ 
+ register_failed:
diff --git a/queue-4.11/ib-mlx4-fix-ib-device-initialization-error-flow.patch b/queue-4.11/ib-mlx4-fix-ib-device-initialization-error-flow.patch

new file mode 100644 (file)

index 0000000..c5c7e32
--- /dev/null
+++ b/queue-4.11/ib-mlx4-fix-ib-device-initialization-error-flow.patch
@@ -0,0 +1,34 @@
+From 99e68909d5aba1861897fe7afc3306c3c81b6de0 Mon Sep 17 00:00:00 2001
+From: Jack Morgenstein <jackm@dev.mellanox.co.il>
+Date: Tue, 21 Mar 2017 12:57:05 +0200
+Subject: IB/mlx4: Fix ib device initialization error flow
+
+From: Jack Morgenstein <jackm@dev.mellanox.co.il>
+
+commit 99e68909d5aba1861897fe7afc3306c3c81b6de0 upstream.
+
+In mlx4_ib_add, procedure mlx4_ib_alloc_eqs is called to allocate EQs.
+
+However, in the mlx4_ib_add error flow, procedure mlx4_ib_free_eqs is not
+called to free the allocated EQs.
+
+Fixes: e605b743f33d ("IB/mlx4: Increase the number of vectors (EQs) available for ULPs")
+Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
+Signed-off-by: Leon Romanovsky <leon@kernel.org>
+Signed-off-by: Doug Ledford <dledford@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/infiniband/hw/mlx4/main.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/infiniband/hw/mlx4/main.c
++++ b/drivers/infiniband/hw/mlx4/main.c
+@@ -2941,6 +2941,7 @@ err_counter:
+               mlx4_ib_delete_counters_table(ibdev, &ibdev->counters_table[i]);
+ 
+ err_map:
++      mlx4_ib_free_eqs(dev, ibdev);
+       iounmap(ibdev->uar_map);
+ 
+ err_uar:
diff --git a/queue-4.11/ib-mlx4-reduce-sriov-multicast-cleanup-warning-message-to-debug-level.patch b/queue-4.11/ib-mlx4-reduce-sriov-multicast-cleanup-warning-message-to-debug-level.patch

new file mode 100644 (file)

index 0000000..90b2cd0
--- /dev/null
+++ b/queue-4.11/ib-mlx4-reduce-sriov-multicast-cleanup-warning-message-to-debug-level.patch
@@ -0,0 +1,97 @@
+From fb7a91746af18b2ebf596778b38a709cdbc488d3 Mon Sep 17 00:00:00 2001
+From: Jack Morgenstein <jackm@dev.mellanox.co.il>
+Date: Tue, 21 Mar 2017 12:57:06 +0200
+Subject: IB/mlx4: Reduce SRIOV multicast cleanup warning message to debug level
+
+From: Jack Morgenstein <jackm@dev.mellanox.co.il>
+
+commit fb7a91746af18b2ebf596778b38a709cdbc488d3 upstream.
+
+A warning message during SRIOV multicast cleanup should have actually been
+a debug level message. The condition generating the warning does no harm
+and can fill the message log.
+
+In some cases, during testing, some tests were so intense as to swamp the
+message log with these warning messages, causing a stall in the console
+message log output task. This stall caused an NMI to be sent to all CPUs
+(so that they all dumped their stacks into the message log).
+Aside from the message flood causing an NMI, the tests all passed.
+
+Once the message flood which caused the NMI is removed (by reducing the
+warning message to debug level), the NMI no longer occurs.
+
+Sample message log (console log) output illustrating the flood and
+resultant NMI (snippets with comments and modified with ... instead
+of hex digits, to satisfy checkpatch.pl):
+
+ <mlx4_ib> _mlx4_ib_mcg_port_cleanup: ... WARNING: group refcount 1!!!...
+ *** About 4000 almost identical lines in less than one second ***
+ <mlx4_ib> _mlx4_ib_mcg_port_cleanup: ... WARNING: group refcount 1!!!...
+ INFO: rcu_sched detected stalls on CPUs/tasks: { 17} (...)
+ *** { 17} above indicates that CPU 17 was the one that stalled ***
+ sending NMI to all CPUs:
+ ...
+ NMI backtrace for cpu 17
+ CPU: 17 PID: 45909 Comm: kworker/17:2
+ Hardware name: HP ProLiant DL360p Gen8, BIOS P71 09/08/2013
+ Workqueue: events fb_flashcursor
+ task: ffff880478...... ti: ffff88064e...... task.ti: ffff88064e......
+ RIP: 0010:[ffffffff81......]  [ffffffff81......] io_serial_in+0x15/0x20
+ RSP: 0018:ffff88064e257cb0  EFLAGS: 00000002
+ RAX: 0000000000...... RBX: ffffffff81...... RCX: 0000000000......
+ RDX: 0000000000...... RSI: 0000000000...... RDI: ffffffff81......
+ RBP: ffff88064e...... R08: ffffffff81...... R09: 0000000000......
+ R10: 0000000000...... R11: ffff88064e...... R12: 0000000000......
+ R13: 0000000000...... R14: ffffffff81...... R15: 0000000000......
+ FS:  0000000000......(0000) GS:ffff8804af......(0000) knlGS:000000000000
+ CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080......
+ CR2: 00007f2a2f...... CR3: 0000000001...... CR4: 0000000000......
+ DR0: 0000000000...... DR1: 0000000000...... DR2: 0000000000......
+ DR3: 0000000000...... DR6: 00000000ff...... DR7: 0000000000......
+ Stack:
+ ffff88064e...... ffffffff81...... ffffffff81...... 0000000000......
+ ffffffff81...... ffff88064e...... ffffffff81...... ffffffff81......
+ ffffffff81...... ffff88064e...... ffffffff81...... 0000000000......
+ Call Trace:
+[<ffffffff813d099b>] wait_for_xmitr+0x3b/0xa0
+[<ffffffff813d0b5c>] serial8250_console_putchar+0x1c/0x30
+[<ffffffff813d0b40>] ? serial8250_console_write+0x140/0x140
+[<ffffffff813cb5fa>] uart_console_write+0x3a/0x80
+[<ffffffff813d0aae>] serial8250_console_write+0xae/0x140
+[<ffffffff8107c4d1>] call_console_drivers.constprop.15+0x91/0xf0
+[<ffffffff8107d6cf>] console_unlock+0x3bf/0x400
+[<ffffffff813503cd>] fb_flashcursor+0x5d/0x140
+[<ffffffff81355c30>] ? bit_clear+0x120/0x120
+[<ffffffff8109d5fb>] process_one_work+0x17b/0x470
+[<ffffffff8109e3cb>] worker_thread+0x11b/0x400
+[<ffffffff8109e2b0>] ? rescuer_thread+0x400/0x400
+[<ffffffff810a5aef>] kthread+0xcf/0xe0
+[<ffffffff810a5a20>] ? kthread_create_on_node+0x140/0x140
+[<ffffffff81645858>] ret_from_fork+0x58/0x90
+[<ffffffff810a5a20>] ? kthread_create_on_node+0x140/0x140
+Code: 48 89 e5 d3 e6 48 63 f6 48 03 77 10 8b 06 5d c3 66 0f 1f 44 00 00 66 66 66 6
+
+As indicated in the stack trace above, the console output task got swamped.
+
+Fixes: b9c5d6a64358 ("IB/mlx4: Add multicast group (MCG) paravirtualization for SR-IOV")
+Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
+Signed-off-by: Leon Romanovsky <leon@kernel.org>
+Signed-off-by: Doug Ledford <dledford@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/infiniband/hw/mlx4/mcg.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/drivers/infiniband/hw/mlx4/mcg.c
++++ b/drivers/infiniband/hw/mlx4/mcg.c
+@@ -1102,7 +1102,8 @@ static void _mlx4_ib_mcg_port_cleanup(st
+       while ((p = rb_first(&ctx->mcg_table)) != NULL) {
+               group = rb_entry(p, struct mcast_group, node);
+               if (atomic_read(&group->refcount))
+-                      mcg_warn_group(group, "group refcount %d!!! (pointer %p)\n", atomic_read(&group->refcount), group);
++                      mcg_debug_group(group, "group refcount %d!!! (pointer %p)\n",
++                                      atomic_read(&group->refcount), group);
+ 
+               force_clean_group(group);
+       }
diff --git a/queue-4.11/iov_iter-don-t-revert-iov-buffer-if-csum-error.patch b/queue-4.11/iov_iter-don-t-revert-iov-buffer-if-csum-error.patch

new file mode 100644 (file)

index 0000000..f2554e3
--- /dev/null
+++ b/queue-4.11/iov_iter-don-t-revert-iov-buffer-if-csum-error.patch
@@ -0,0 +1,61 @@
+From a6a5993243550b09f620941dea741b7421fdf79c Mon Sep 17 00:00:00 2001
+From: Ding Tianhong <dingtianhong@huawei.com>
+Date: Sat, 29 Apr 2017 10:38:48 +0800
+Subject: iov_iter: don't revert iov buffer if csum error
+
+From: Ding Tianhong <dingtianhong@huawei.com>
+
+commit a6a5993243550b09f620941dea741b7421fdf79c upstream.
+
+The patch 327868212381 (make skb_copy_datagram_msg() et.al. preserve
+->msg_iter on error) will revert the iov buffer if copy to iter
+failed, but it didn't copy any datagram if the skb_checksum_complete
+error, so no need to revert any data at this place.
+
+v2: Sabrina notice that return -EFAULT when checksum error is not correct
+    here, it would confuse the caller about the return value, so fix it.
+
+Fixes: 327868212381 ("make skb_copy_datagram_msg() et.al. preserve->msg_iter on error")
+Signed-off-by: Ding Tianhong <dingtianhong@huawei.com>
+Acked-by: Al Viro <viro@zeniv.linux.org.uk>
+Signed-off-by: Wei Yongjun <weiyongjun1@huawei.com>
+Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ net/core/datagram.c |   13 +++++++------
+ 1 file changed, 7 insertions(+), 6 deletions(-)
+
+--- a/net/core/datagram.c
++++ b/net/core/datagram.c
+@@ -760,7 +760,7 @@ int skb_copy_and_csum_datagram_msg(struc
+ 
+       if (msg_data_left(msg) < chunk) {
+               if (__skb_checksum_complete(skb))
+-                      goto csum_error;
++                      return -EINVAL;
+               if (skb_copy_datagram_msg(skb, hlen, msg, chunk))
+                       goto fault;
+       } else {
+@@ -768,15 +768,16 @@ int skb_copy_and_csum_datagram_msg(struc
+               if (skb_copy_and_csum_datagram(skb, hlen, &msg->msg_iter,
+                                              chunk, &csum))
+                       goto fault;
+-              if (csum_fold(csum))
+-                      goto csum_error;
++
++              if (csum_fold(csum)) {
++                      iov_iter_revert(&msg->msg_iter, chunk);
++                      return -EINVAL;
++              }
++
+               if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE))
+                       netdev_rx_csum_fault(skb->dev);
+       }
+       return 0;
+-csum_error:
+-      iov_iter_revert(&msg->msg_iter, chunk);
+-      return -EINVAL;
+ fault:
+       return -EFAULT;
+ }
diff --git a/queue-4.11/jbd2-fix-dbench4-performance-regression-for-nobarrier-mounts.patch b/queue-4.11/jbd2-fix-dbench4-performance-regression-for-nobarrier-mounts.patch

new file mode 100644 (file)

index 0000000..afcfb87
--- /dev/null
+++ b/queue-4.11/jbd2-fix-dbench4-performance-regression-for-nobarrier-mounts.patch
@@ -0,0 +1,42 @@
+From 5052b069acf73866d00077d8bc49983c3ee903e5 Mon Sep 17 00:00:00 2001
+From: Jan Kara <jack@suse.cz>
+Date: Sat, 29 Apr 2017 21:07:30 -0400
+Subject: jbd2: fix dbench4 performance regression for 'nobarrier' mounts
+
+From: Jan Kara <jack@suse.cz>
+
+commit 5052b069acf73866d00077d8bc49983c3ee903e5 upstream.
+
+Commit b685d3d65ac7 "block: treat REQ_FUA and REQ_PREFLUSH as
+synchronous" removed REQ_SYNC flag from WRITE_FUA implementation. Since
+JBD2 strips REQ_FUA and REQ_FLUSH flags from submitted IO when the
+filesystem is mounted with nobarrier mount option, journal superblock
+writes ended up being async writes after this patch and that caused
+heavy performance regression for dbench4 benchmark with high number of
+processes. In my test setup with HP RAID array with non-volatile write
+cache and 32 GB ram, dbench4 runs with 8 processes regressed by ~25%.
+
+Fix the problem by making sure journal superblock writes are always
+treated as synchronous since they generally block progress of the
+journalling machinery and thus the whole filesystem.
+
+Fixes: b685d3d65ac791406e0dfd8779cc9b3707fea5a3
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/jbd2/journal.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/jbd2/journal.c
++++ b/fs/jbd2/journal.c
+@@ -1348,7 +1348,7 @@ static int jbd2_write_superblock(journal
+       jbd2_superblock_csum_set(journal, sb);
+       get_bh(bh);
+       bh->b_end_io = end_buffer_write_sync;
+-      ret = submit_bh(REQ_OP_WRITE, write_flags, bh);
++      ret = submit_bh(REQ_OP_WRITE, write_flags | REQ_SYNC, bh);
+       wait_on_buffer(bh);
+       if (buffer_write_io_error(bh)) {
+               clear_buffer_write_io_error(bh);
diff --git a/queue-4.11/md-raid1-avoid-reusing-a-resync-bio-after-error-handling.patch b/queue-4.11/md-raid1-avoid-reusing-a-resync-bio-after-error-handling.patch

new file mode 100644 (file)

index 0000000..cc14f40
--- /dev/null
+++ b/queue-4.11/md-raid1-avoid-reusing-a-resync-bio-after-error-handling.patch
@@ -0,0 +1,49 @@
+From 0c9d5b127f695818c2c5a3868c1f28ca2969e905 Mon Sep 17 00:00:00 2001
+From: NeilBrown <neilb@suse.com>
+Date: Thu, 6 Apr 2017 12:06:37 +1000
+Subject: md/raid1: avoid reusing a resync bio after error handling.
+
+From: NeilBrown <neilb@suse.com>
+
+commit 0c9d5b127f695818c2c5a3868c1f28ca2969e905 upstream.
+
+fix_sync_read_error() modifies a bio on a newly faulty
+device by setting bi_end_io to end_sync_write.
+This ensure that put_buf() will still call rdev_dec_pending()
+as required, but makes sure that subsequent code in
+fix_sync_read_error() doesn't try to read from the device.
+
+Unfortunately this interacts badly with sync_request_write()
+which assumes that any bio with bi_end_io set to non-NULL
+other than end_sync_read is safe to write to.
+
+As the device is now faulty it doesn't make sense to write.
+As the bio was recently used for a read, it is "dirty"
+and not suitable for immediate submission.
+In particular, ->bi_next might be non-NULL, which will cause
+generic_make_request() to complain.
+
+Break this interaction by refusing to write to devices
+which are marked as Faulty.
+
+Reported-and-tested-by: Michael Wang <yun.wang@profitbricks.com>
+Fixes: 2e52d449bcec ("md/raid1: add failfast handling for reads.")
+Signed-off-by: NeilBrown <neilb@suse.com>
+Signed-off-by: Shaohua Li <shli@fb.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/md/raid1.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/drivers/md/raid1.c
++++ b/drivers/md/raid1.c
+@@ -2222,6 +2222,8 @@ static void sync_request_write(struct md
+                    (i == r1_bio->read_disk ||
+                     !test_bit(MD_RECOVERY_SYNC, &mddev->recovery))))
+                       continue;
++              if (test_bit(Faulty, &conf->mirrors[i].rdev->flags))
++                      continue;
+ 
+               bio_set_op_attrs(wbio, REQ_OP_WRITE, 0);
+               if (test_bit(FailFast, &conf->mirrors[i].rdev->flags))
diff --git a/queue-4.11/mm-prevent-potential-recursive-reclaim-due-to-clearing-pf_memalloc.patch b/queue-4.11/mm-prevent-potential-recursive-reclaim-due-to-clearing-pf_memalloc.patch

new file mode 100644 (file)

index 0000000..49b1998
--- /dev/null
+++ b/queue-4.11/mm-prevent-potential-recursive-reclaim-due-to-clearing-pf_memalloc.patch
@@ -0,0 +1,82 @@
+From 62be1511b1db8066220b18b7d4da2e6b9fdc69fb Mon Sep 17 00:00:00 2001
+From: Vlastimil Babka <vbabka@suse.cz>
+Date: Mon, 8 May 2017 15:59:46 -0700
+Subject: mm: prevent potential recursive reclaim due to clearing PF_MEMALLOC
+
+From: Vlastimil Babka <vbabka@suse.cz>
+
+commit 62be1511b1db8066220b18b7d4da2e6b9fdc69fb upstream.
+
+Patch series "more robust PF_MEMALLOC handling"
+
+This series aims to unify the setting and clearing of PF_MEMALLOC, which
+prevents recursive reclaim.  There are some places that clear the flag
+unconditionally from current->flags, which may result in clearing a
+pre-existing flag.  This already resulted in a bug report that Patch 1
+fixes (without the new helpers, to make backporting easier).  Patch 2
+introduces the new helpers, modelled after existing memalloc_noio_* and
+memalloc_nofs_* helpers, and converts mm core to use them.  Patches 3
+and 4 convert non-mm code.
+
+This patch (of 4):
+
+__alloc_pages_direct_compact() sets PF_MEMALLOC to prevent deadlock
+during page migration by lock_page() (see the comment in
+__unmap_and_move()).  Then it unconditionally clears the flag, which can
+clear a pre-existing PF_MEMALLOC flag and result in recursive reclaim.
+This was not a problem until commit a8161d1ed609 ("mm, page_alloc:
+restructure direct compaction handling in slowpath"), because direct
+compation was called only after direct reclaim, which was skipped when
+PF_MEMALLOC flag was set.
+
+Even now it's only a theoretical issue, as the new callsite of
+__alloc_pages_direct_compact() is reached only for costly orders and
+when gfp_pfmemalloc_allowed() is true, which means either
+__GFP_NOMEMALLOC is in gfp_flags or in_interrupt() is true.  There is no
+such known context, but let's play it safe and make
+__alloc_pages_direct_compact() robust for cases where PF_MEMALLOC is
+already set.
+
+Fixes: a8161d1ed609 ("mm, page_alloc: restructure direct compaction handling in slowpath")
+Link: http://lkml.kernel.org/r/20170405074700.29871-2-vbabka@suse.cz
+Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
+Reported-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
+Acked-by: Michal Hocko <mhocko@suse.com>
+Acked-by: Hillf Danton <hillf.zj@alibaba-inc.com>
+Cc: Mel Gorman <mgorman@techsingularity.net>
+Cc: Johannes Weiner <hannes@cmpxchg.org>
+Cc: Boris Brezillon <boris.brezillon@free-electrons.com>
+Cc: Chris Leech <cleech@redhat.com>
+Cc: "David S. Miller" <davem@davemloft.net>
+Cc: Eric Dumazet <edumazet@google.com>
+Cc: Josef Bacik <jbacik@fb.com>
+Cc: Lee Duncan <lduncan@suse.com>
+Cc: Michal Hocko <mhocko@suse.com>
+Cc: Richard Weinberger <richard@nod.at>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/page_alloc.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/mm/page_alloc.c
++++ b/mm/page_alloc.c
+@@ -3245,6 +3245,7 @@ __alloc_pages_direct_compact(gfp_t gfp_m
+               enum compact_priority prio, enum compact_result *compact_result)
+ {
+       struct page *page;
++      unsigned int noreclaim_flag = current->flags & PF_MEMALLOC;
+ 
+       if (!order)
+               return NULL;
+@@ -3252,7 +3253,7 @@ __alloc_pages_direct_compact(gfp_t gfp_m
+       current->flags |= PF_MEMALLOC;
+       *compact_result = try_to_compact_pages(gfp_mask, order, alloc_flags, ac,
+                                                                       prio);
+-      current->flags &= ~PF_MEMALLOC;
++      current->flags = (current->flags & ~PF_MEMALLOC) | noreclaim_flag;
+ 
+       if (*compact_result <= COMPACT_INACTIVE)
+               return NULL;
diff --git a/queue-4.11/mm-vmscan-fix-io-refault-regression-in-cache-workingset-transition.patch b/queue-4.11/mm-vmscan-fix-io-refault-regression-in-cache-workingset-transition.patch

new file mode 100644 (file)

index 0000000..1f355ea
--- /dev/null
+++ b/queue-4.11/mm-vmscan-fix-io-refault-regression-in-cache-workingset-transition.patch
@@ -0,0 +1,422 @@
+From 2a2e48854d704214dac7546e87ae0e4daa0e61a0 Mon Sep 17 00:00:00 2001
+From: Johannes Weiner <hannes@cmpxchg.org>
+Date: Wed, 3 May 2017 14:55:03 -0700
+Subject: mm: vmscan: fix IO/refault regression in cache workingset transition
+
+From: Johannes Weiner <hannes@cmpxchg.org>
+
+commit 2a2e48854d704214dac7546e87ae0e4daa0e61a0 upstream.
+
+Since commit 59dc76b0d4df ("mm: vmscan: reduce size of inactive file
+list") we noticed bigger IO spikes during changes in cache access
+patterns.
+
+The patch in question shrunk the inactive list size to leave more room
+for the current workingset in the presence of streaming IO.  However,
+workingset transitions that previously happened on the inactive list are
+now pushed out of memory and incur more refaults to complete.
+
+This patch disables active list protection when refaults are being
+observed.  This accelerates workingset transitions, and allows more of
+the new set to establish itself from memory, without eating into the
+ability to protect the established workingset during stable periods.
+
+The workloads that were measurably affected for us were hit pretty bad
+by it, with refault/majfault rates doubling and tripling during cache
+transitions, and the machines sustaining half-hour periods of 100% IO
+utilization, where they'd previously have sub-minute peaks at 60-90%.
+
+Stateful services that handle user data tend to be more conservative
+with kernel upgrades.  As a result we hit most page cache issues with
+some delay, as was the case here.
+
+The severity seemed to warrant a stable tag.
+
+Fixes: 59dc76b0d4df ("mm: vmscan: reduce size of inactive file list")
+Link: http://lkml.kernel.org/r/20170404220052.27593-1-hannes@cmpxchg.org
+Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Mel Gorman <mgorman@suse.de>
+Cc: Michal Hocko <mhocko@suse.com>
+Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/linux/memcontrol.h |   64 +++++++++++++++++++++++++++++-
+ include/linux/mmzone.h     |    2 
+ mm/memcontrol.c            |   24 +++--------
+ mm/vmscan.c                |   94 +++++++++++++++++++++++++++++++++++----------
+ mm/workingset.c            |    7 ++-
+ 5 files changed, 150 insertions(+), 41 deletions(-)
+
+--- a/include/linux/memcontrol.h
++++ b/include/linux/memcontrol.h
+@@ -56,6 +56,9 @@ enum mem_cgroup_stat_index {
+       MEMCG_SLAB_RECLAIMABLE,
+       MEMCG_SLAB_UNRECLAIMABLE,
+       MEMCG_SOCK,
++      MEMCG_WORKINGSET_REFAULT,
++      MEMCG_WORKINGSET_ACTIVATE,
++      MEMCG_WORKINGSET_NODERECLAIM,
+       MEMCG_NR_STAT,
+ };
+ 
+@@ -494,6 +497,40 @@ extern int do_swap_account;
+ void lock_page_memcg(struct page *page);
+ void unlock_page_memcg(struct page *page);
+ 
++static inline unsigned long mem_cgroup_read_stat(struct mem_cgroup *memcg,
++                                               enum mem_cgroup_stat_index idx)
++{
++      long val = 0;
++      int cpu;
++
++      for_each_possible_cpu(cpu)
++              val += per_cpu(memcg->stat->count[idx], cpu);
++
++      if (val < 0)
++              val = 0;
++
++      return val;
++}
++
++static inline void mem_cgroup_update_stat(struct mem_cgroup *memcg,
++                                 enum mem_cgroup_stat_index idx, int val)
++{
++      if (!mem_cgroup_disabled())
++              this_cpu_add(memcg->stat->count[idx], val);
++}
++
++static inline void mem_cgroup_inc_stat(struct mem_cgroup *memcg,
++                                 enum mem_cgroup_stat_index idx)
++{
++      mem_cgroup_update_stat(memcg, idx, 1);
++}
++
++static inline void mem_cgroup_dec_stat(struct mem_cgroup *memcg,
++                                 enum mem_cgroup_stat_index idx)
++{
++      mem_cgroup_update_stat(memcg, idx, -1);
++}
++
+ /**
+  * mem_cgroup_update_page_stat - update page state statistics
+  * @page: the page
+@@ -508,14 +545,14 @@ void unlock_page_memcg(struct page *page
+  *   if (TestClearPageState(page))
+  *     mem_cgroup_update_page_stat(page, state, -1);
+  *   unlock_page(page) or unlock_page_memcg(page)
++ *
++ * Kernel pages are an exception to this, since they'll never move.
+  */
+ static inline void mem_cgroup_update_page_stat(struct page *page,
+                                enum mem_cgroup_stat_index idx, int val)
+ {
+-      VM_BUG_ON(!(rcu_read_lock_held() || PageLocked(page)));
+-
+       if (page->mem_cgroup)
+-              this_cpu_add(page->mem_cgroup->stat->count[idx], val);
++              mem_cgroup_update_stat(page->mem_cgroup, idx, val);
+ }
+ 
+ static inline void mem_cgroup_inc_page_stat(struct page *page,
+@@ -740,6 +777,27 @@ static inline bool mem_cgroup_oom_synchr
+       return false;
+ }
+ 
++static inline unsigned long mem_cgroup_read_stat(struct mem_cgroup *memcg,
++                                               enum mem_cgroup_stat_index idx)
++{
++      return 0;
++}
++
++static inline void mem_cgroup_update_stat(struct mem_cgroup *memcg,
++                                 enum mem_cgroup_stat_index idx, int val)
++{
++}
++
++static inline void mem_cgroup_inc_stat(struct mem_cgroup *memcg,
++                                 enum mem_cgroup_stat_index idx)
++{
++}
++
++static inline void mem_cgroup_dec_stat(struct mem_cgroup *memcg,
++                                 enum mem_cgroup_stat_index idx)
++{
++}
++
+ static inline void mem_cgroup_update_page_stat(struct page *page,
+                                              enum mem_cgroup_stat_index idx,
+                                              int nr)
+--- a/include/linux/mmzone.h
++++ b/include/linux/mmzone.h
+@@ -226,6 +226,8 @@ struct lruvec {
+       struct zone_reclaim_stat        reclaim_stat;
+       /* Evictions & activations on the inactive file list */
+       atomic_long_t                   inactive_age;
++      /* Refaults at the time of last reclaim cycle */
++      unsigned long                   refaults;
+ #ifdef CONFIG_MEMCG
+       struct pglist_data *pgdat;
+ #endif
+--- a/mm/memcontrol.c
++++ b/mm/memcontrol.c
+@@ -568,23 +568,6 @@ mem_cgroup_largest_soft_limit_node(struc
+  * common workload, threshold and synchronization as vmstat[] should be
+  * implemented.
+  */
+-static unsigned long
+-mem_cgroup_read_stat(struct mem_cgroup *memcg, enum mem_cgroup_stat_index idx)
+-{
+-      long val = 0;
+-      int cpu;
+-
+-      /* Per-cpu values can be negative, use a signed accumulator */
+-      for_each_possible_cpu(cpu)
+-              val += per_cpu(memcg->stat->count[idx], cpu);
+-      /*
+-       * Summing races with updates, so val may be negative.  Avoid exposing
+-       * transient negative values.
+-       */
+-      if (val < 0)
+-              val = 0;
+-      return val;
+-}
+ 
+ static unsigned long mem_cgroup_read_events(struct mem_cgroup *memcg,
+                                           enum mem_cgroup_events_index idx)
+@@ -5237,6 +5220,13 @@ static int memory_stat_show(struct seq_f
+       seq_printf(m, "pgmajfault %lu\n",
+                  events[MEM_CGROUP_EVENTS_PGMAJFAULT]);
+ 
++      seq_printf(m, "workingset_refault %lu\n",
++                 stat[MEMCG_WORKINGSET_REFAULT]);
++      seq_printf(m, "workingset_activate %lu\n",
++                 stat[MEMCG_WORKINGSET_ACTIVATE]);
++      seq_printf(m, "workingset_nodereclaim %lu\n",
++                 stat[MEMCG_WORKINGSET_NODERECLAIM]);
++
+       return 0;
+ }
+ 
+--- a/mm/vmscan.c
++++ b/mm/vmscan.c
+@@ -2033,6 +2033,8 @@ static void shrink_active_list(unsigned
+  * Both inactive lists should also be large enough that each inactive
+  * page has a chance to be referenced again before it is reclaimed.
+  *
++ * If that fails and refaulting is observed, the inactive list grows.
++ *
+  * The inactive_ratio is the target ratio of ACTIVE to INACTIVE pages
+  * on this LRU, maintained by the pageout code. A zone->inactive_ratio
+  * of 3 means 3:1 or 25% of the pages are kept on the inactive list.
+@@ -2049,12 +2051,15 @@ static void shrink_active_list(unsigned
+  *   10TB     320        32GB
+  */
+ static bool inactive_list_is_low(struct lruvec *lruvec, bool file,
+-                                              struct scan_control *sc, bool trace)
++                               struct mem_cgroup *memcg,
++                               struct scan_control *sc, bool actual_reclaim)
+ {
+-      unsigned long inactive_ratio;
+-      unsigned long inactive, active;
+-      enum lru_list inactive_lru = file * LRU_FILE;
+       enum lru_list active_lru = file * LRU_FILE + LRU_ACTIVE;
++      struct pglist_data *pgdat = lruvec_pgdat(lruvec);
++      enum lru_list inactive_lru = file * LRU_FILE;
++      unsigned long inactive, active;
++      unsigned long inactive_ratio;
++      unsigned long refaults;
+       unsigned long gb;
+ 
+       /*
+@@ -2067,27 +2072,43 @@ static bool inactive_list_is_low(struct
+       inactive = lruvec_lru_size(lruvec, inactive_lru, sc->reclaim_idx);
+       active = lruvec_lru_size(lruvec, active_lru, sc->reclaim_idx);
+ 
+-      gb = (inactive + active) >> (30 - PAGE_SHIFT);
+-      if (gb)
+-              inactive_ratio = int_sqrt(10 * gb);
++      if (memcg)
++              refaults = mem_cgroup_read_stat(memcg,
++                                              MEMCG_WORKINGSET_ACTIVATE);
+       else
+-              inactive_ratio = 1;
++              refaults = node_page_state(pgdat, WORKINGSET_ACTIVATE);
+ 
+-      if (trace)
+-              trace_mm_vmscan_inactive_list_is_low(lruvec_pgdat(lruvec)->node_id,
+-                              sc->reclaim_idx,
+-                              lruvec_lru_size(lruvec, inactive_lru, MAX_NR_ZONES), inactive,
+-                              lruvec_lru_size(lruvec, active_lru, MAX_NR_ZONES), active,
+-                              inactive_ratio, file);
++      /*
++       * When refaults are being observed, it means a new workingset
++       * is being established. Disable active list protection to get
++       * rid of the stale workingset quickly.
++       */
++      if (file && actual_reclaim && lruvec->refaults != refaults) {
++              inactive_ratio = 0;
++      } else {
++              gb = (inactive + active) >> (30 - PAGE_SHIFT);
++              if (gb)
++                      inactive_ratio = int_sqrt(10 * gb);
++              else
++                      inactive_ratio = 1;
++      }
++
++      if (actual_reclaim)
++              trace_mm_vmscan_inactive_list_is_low(pgdat->node_id, sc->reclaim_idx,
++                      lruvec_lru_size(lruvec, inactive_lru, MAX_NR_ZONES), inactive,
++                      lruvec_lru_size(lruvec, active_lru, MAX_NR_ZONES), active,
++                      inactive_ratio, file);
+ 
+       return inactive * inactive_ratio < active;
+ }
+ 
+ static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan,
+-                               struct lruvec *lruvec, struct scan_control *sc)
++                               struct lruvec *lruvec, struct mem_cgroup *memcg,
++                               struct scan_control *sc)
+ {
+       if (is_active_lru(lru)) {
+-              if (inactive_list_is_low(lruvec, is_file_lru(lru), sc, true))
++              if (inactive_list_is_low(lruvec, is_file_lru(lru),
++                                       memcg, sc, true))
+                       shrink_active_list(nr_to_scan, lruvec, sc, lru);
+               return 0;
+       }
+@@ -2218,7 +2239,7 @@ static void get_scan_count(struct lruvec
+        * lruvec even if it has plenty of old anonymous pages unless the
+        * system is under heavy pressure.
+        */
+-      if (!inactive_list_is_low(lruvec, true, sc, false) &&
++      if (!inactive_list_is_low(lruvec, true, memcg, sc, false) &&
+           lruvec_lru_size(lruvec, LRU_INACTIVE_FILE, sc->reclaim_idx) >> sc->priority) {
+               scan_balance = SCAN_FILE;
+               goto out;
+@@ -2376,7 +2397,7 @@ static void shrink_node_memcg(struct pgl
+                               nr[lru] -= nr_to_scan;
+ 
+                               nr_reclaimed += shrink_list(lru, nr_to_scan,
+-                                                          lruvec, sc);
++                                                          lruvec, memcg, sc);
+                       }
+               }
+ 
+@@ -2443,7 +2464,7 @@ static void shrink_node_memcg(struct pgl
+        * Even if we did not try to evict anon pages at all, we want to
+        * rebalance the anon lru active/inactive ratio.
+        */
+-      if (inactive_list_is_low(lruvec, false, sc, true))
++      if (inactive_list_is_low(lruvec, false, memcg, sc, true))
+               shrink_active_list(SWAP_CLUSTER_MAX, lruvec,
+                                  sc, LRU_ACTIVE_ANON);
+ }
+@@ -2752,6 +2773,26 @@ static void shrink_zones(struct zonelist
+       sc->gfp_mask = orig_mask;
+ }
+ 
++static void snapshot_refaults(struct mem_cgroup *root_memcg, pg_data_t *pgdat)
++{
++      struct mem_cgroup *memcg;
++
++      memcg = mem_cgroup_iter(root_memcg, NULL, NULL);
++      do {
++              unsigned long refaults;
++              struct lruvec *lruvec;
++
++              if (memcg)
++                      refaults = mem_cgroup_read_stat(memcg,
++                                              MEMCG_WORKINGSET_ACTIVATE);
++              else
++                      refaults = node_page_state(pgdat, WORKINGSET_ACTIVATE);
++
++              lruvec = mem_cgroup_lruvec(pgdat, memcg);
++              lruvec->refaults = refaults;
++      } while ((memcg = mem_cgroup_iter(root_memcg, memcg, NULL)));
++}
++
+ /*
+  * This is the main entry point to direct page reclaim.
+  *
+@@ -2772,6 +2813,9 @@ static unsigned long do_try_to_free_page
+                                         struct scan_control *sc)
+ {
+       int initial_priority = sc->priority;
++      pg_data_t *last_pgdat;
++      struct zoneref *z;
++      struct zone *zone;
+ retry:
+       delayacct_freepages_start();
+ 
+@@ -2798,6 +2842,15 @@ retry:
+                       sc->may_writepage = 1;
+       } while (--sc->priority >= 0);
+ 
++      last_pgdat = NULL;
++      for_each_zone_zonelist_nodemask(zone, z, zonelist, sc->reclaim_idx,
++                                      sc->nodemask) {
++              if (zone->zone_pgdat == last_pgdat)
++                      continue;
++              last_pgdat = zone->zone_pgdat;
++              snapshot_refaults(sc->target_mem_cgroup, zone->zone_pgdat);
++      }
++
+       delayacct_freepages_end();
+ 
+       if (sc->nr_reclaimed)
+@@ -3076,7 +3129,7 @@ static void age_active_anon(struct pglis
+       do {
+               struct lruvec *lruvec = mem_cgroup_lruvec(pgdat, memcg);
+ 
+-              if (inactive_list_is_low(lruvec, false, sc, true))
++              if (inactive_list_is_low(lruvec, false, memcg, sc, true))
+                       shrink_active_list(SWAP_CLUSTER_MAX, lruvec,
+                                          sc, LRU_ACTIVE_ANON);
+ 
+@@ -3311,6 +3364,7 @@ static int balance_pgdat(pg_data_t *pgda
+       } while (sc.priority >= 1);
+ 
+ out:
++      snapshot_refaults(NULL, pgdat);
+       /*
+        * Return the order kswapd stopped reclaiming at as
+        * prepare_kswapd_sleep() takes it into account. If another caller
+--- a/mm/workingset.c
++++ b/mm/workingset.c
+@@ -269,7 +269,6 @@ bool workingset_refault(void *shadow)
+       lruvec = mem_cgroup_lruvec(pgdat, memcg);
+       refault = atomic_long_read(&lruvec->inactive_age);
+       active_file = lruvec_lru_size(lruvec, LRU_ACTIVE_FILE, MAX_NR_ZONES);
+-      rcu_read_unlock();
+ 
+       /*
+        * The unsigned subtraction here gives an accurate distance
+@@ -290,11 +289,15 @@ bool workingset_refault(void *shadow)
+       refault_distance = (refault - eviction) & EVICTION_MASK;
+ 
+       inc_node_state(pgdat, WORKINGSET_REFAULT);
++      mem_cgroup_inc_stat(memcg, MEMCG_WORKINGSET_REFAULT);
+ 
+       if (refault_distance <= active_file) {
+               inc_node_state(pgdat, WORKINGSET_ACTIVATE);
++              mem_cgroup_inc_stat(memcg, MEMCG_WORKINGSET_ACTIVATE);
++              rcu_read_unlock();
+               return true;
+       }
++      rcu_read_unlock();
+       return false;
+ }
+ 
+@@ -472,6 +475,8 @@ static enum lru_status shadow_lru_isolat
+       if (WARN_ON_ONCE(node->exceptional))
+               goto out_invalid;
+       inc_node_state(page_pgdat(virt_to_page(node)), WORKINGSET_NODERECLAIM);
++      mem_cgroup_inc_page_stat(virt_to_page(node),
++                               MEMCG_WORKINGSET_NODERECLAIM);
+       __radix_tree_delete_node(&mapping->page_tree, node,
+                                workingset_update_node, mapping);
+ 
diff --git a/queue-4.11/orangefs-clean-up-oversize-xattr-validation.patch b/queue-4.11/orangefs-clean-up-oversize-xattr-validation.patch

new file mode 100644 (file)

index 0000000..f694df0
--- /dev/null
+++ b/queue-4.11/orangefs-clean-up-oversize-xattr-validation.patch
@@ -0,0 +1,78 @@
+From e675c5ec51fe2554719a7b6bcdbef0a770f2c19b Mon Sep 17 00:00:00 2001
+From: Martin Brandenburg <martin@omnibond.com>
+Date: Tue, 25 Apr 2017 15:37:57 -0400
+Subject: orangefs: clean up oversize xattr validation
+
+From: Martin Brandenburg <martin@omnibond.com>
+
+commit e675c5ec51fe2554719a7b6bcdbef0a770f2c19b upstream.
+
+Also don't check flags as this has been validated by the VFS already.
+
+Fix an off-by-one error in the max size checking.
+
+Stop logging just because userspace wants to write attributes which do
+not fit.
+
+This and the previous commit fix xfstests generic/020.
+
+Signed-off-by: Martin Brandenburg <martin@omnibond.com>
+Signed-off-by: Mike Marshall <hubcap@omnibond.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/orangefs/xattr.c |   24 +++++++-----------------
+ 1 file changed, 7 insertions(+), 17 deletions(-)
+
+--- a/fs/orangefs/xattr.c
++++ b/fs/orangefs/xattr.c
+@@ -76,11 +76,8 @@ ssize_t orangefs_inode_getxattr(struct i
+       if (S_ISLNK(inode->i_mode))
+               return -EOPNOTSUPP;
+ 
+-      if (strlen(name) >= ORANGEFS_MAX_XATTR_NAMELEN) {
+-              gossip_err("Invalid key length (%d)\n",
+-                         (int)strlen(name));
++      if (strlen(name) > ORANGEFS_MAX_XATTR_NAMELEN)
+               return -EINVAL;
+-      }
+ 
+       fsuid = from_kuid(&init_user_ns, current_fsuid());
+       fsgid = from_kgid(&init_user_ns, current_fsgid());
+@@ -172,6 +169,9 @@ static int orangefs_inode_removexattr(st
+       struct orangefs_kernel_op_s *new_op = NULL;
+       int ret = -ENOMEM;
+ 
++      if (strlen(name) > ORANGEFS_MAX_XATTR_NAMELEN)
++              return -EINVAL;
++
+       down_write(&orangefs_inode->xattr_sem);
+       new_op = op_alloc(ORANGEFS_VFS_OP_REMOVEXATTR);
+       if (!new_op)
+@@ -231,23 +231,13 @@ int orangefs_inode_setxattr(struct inode
+                    "%s: name %s, buffer_size %zd\n",
+                    __func__, name, size);
+ 
+-      if (size >= ORANGEFS_MAX_XATTR_VALUELEN ||
+-          flags < 0) {
+-              gossip_err("orangefs_inode_setxattr: bogus values of size(%d), flags(%d)\n",
+-                         (int)size,
+-                         flags);
++      if (size > ORANGEFS_MAX_XATTR_VALUELEN)
++              return -EINVAL;
++      if (strlen(name) > ORANGEFS_MAX_XATTR_NAMELEN)
+               return -EINVAL;
+-      }
+ 
+       internal_flag = convert_to_internal_xattr_flags(flags);
+ 
+-      if (strlen(name) >= ORANGEFS_MAX_XATTR_NAMELEN) {
+-              gossip_err
+-                  ("orangefs_inode_setxattr: bogus key size (%d)\n",
+-                   (int)(strlen(name)));
+-              return -EINVAL;
+-      }
+-
+       /* This is equivalent to a removexattr */
+       if (size == 0 && value == NULL) {
+               gossip_debug(GOSSIP_XATTR_DEBUG,
diff --git a/queue-4.11/orangefs-do-not-check-possibly-stale-size-on-truncate.patch b/queue-4.11/orangefs-do-not-check-possibly-stale-size-on-truncate.patch

new file mode 100644 (file)

index 0000000..4f9781d
--- /dev/null
+++ b/queue-4.11/orangefs-do-not-check-possibly-stale-size-on-truncate.patch
@@ -0,0 +1,43 @@
+From 53950ef541675df48c219a8d665111a0e68dfc2f Mon Sep 17 00:00:00 2001
+From: Martin Brandenburg <martin@omnibond.com>
+Date: Tue, 25 Apr 2017 15:38:04 -0400
+Subject: orangefs: do not check possibly stale size on truncate
+
+From: Martin Brandenburg <martin@omnibond.com>
+
+commit 53950ef541675df48c219a8d665111a0e68dfc2f upstream.
+
+Let the server figure this out because our size might be out of date or
+not present.
+
+The bug was that
+
+       xfs_io -f -t -c "pread -v 0 100" /mnt/foo
+       echo "Test" > /mnt/foo
+       xfs_io -f -t -c "pread -v 0 100" /mnt/foo
+
+fails because the second truncate did not happen if nothing had
+requested the size after the write in echo.  Thus i_size was zero (not
+present) and the orangefs_setattr though i_size was zero and there was
+nothing to do.
+
+Signed-off-by: Martin Brandenburg <martin@omnibond.com>
+Signed-off-by: Mike Marshall <hubcap@omnibond.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/orangefs/inode.c |    3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+--- a/fs/orangefs/inode.c
++++ b/fs/orangefs/inode.c
+@@ -218,8 +218,7 @@ int orangefs_setattr(struct dentry *dent
+       if (ret)
+               goto out;
+ 
+-      if ((iattr->ia_valid & ATTR_SIZE) &&
+-          iattr->ia_size != i_size_read(inode)) {
++      if (iattr->ia_valid & ATTR_SIZE) {
+               ret = orangefs_setattr_size(inode, iattr);
+               if (ret)
+                       goto out;
diff --git a/queue-4.11/orangefs-do-not-set-getattr_time-on-orangefs_lookup.patch b/queue-4.11/orangefs-do-not-set-getattr_time-on-orangefs_lookup.patch

new file mode 100644 (file)

index 0000000..2299f4a
--- /dev/null
+++ b/queue-4.11/orangefs-do-not-set-getattr_time-on-orangefs_lookup.patch
@@ -0,0 +1,31 @@
+From 17930b252cd6f31163c259eaa99dd8aa630fb9ba Mon Sep 17 00:00:00 2001
+From: Martin Brandenburg <martin@omnibond.com>
+Date: Tue, 25 Apr 2017 15:37:58 -0400
+Subject: orangefs: do not set getattr_time on orangefs_lookup
+
+From: Martin Brandenburg <martin@omnibond.com>
+
+commit 17930b252cd6f31163c259eaa99dd8aa630fb9ba upstream.
+
+Since orangefs_lookup calls orangefs_iget which calls
+orangefs_inode_getattr, getattr_time will get set.
+
+Signed-off-by: Martin Brandenburg <martin@omnibond.com>
+Signed-off-by: Mike Marshall <hubcap@omnibond.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/orangefs/namei.c |    2 --
+ 1 file changed, 2 deletions(-)
+
+--- a/fs/orangefs/namei.c
++++ b/fs/orangefs/namei.c
+@@ -193,8 +193,6 @@ static struct dentry *orangefs_lookup(st
+               goto out;
+       }
+ 
+-      ORANGEFS_I(inode)->getattr_time = jiffies - 1;
+-
+       gossip_debug(GOSSIP_NAME_DEBUG,
+                    "%s:%s:%d "
+                    "Found good inode [%lu] with count [%d]\n",
diff --git a/queue-4.11/orangefs-fix-bounds-check-for-listxattr.patch b/queue-4.11/orangefs-fix-bounds-check-for-listxattr.patch

new file mode 100644 (file)

index 0000000..ca1b4e6
--- /dev/null
+++ b/queue-4.11/orangefs-fix-bounds-check-for-listxattr.patch
@@ -0,0 +1,28 @@
+From a956af337b9ff25822d9ce1a59c6ed0c09fc14b9 Mon Sep 17 00:00:00 2001
+From: Martin Brandenburg <martin@omnibond.com>
+Date: Tue, 25 Apr 2017 15:37:56 -0400
+Subject: orangefs: fix bounds check for listxattr
+
+From: Martin Brandenburg <martin@omnibond.com>
+
+commit a956af337b9ff25822d9ce1a59c6ed0c09fc14b9 upstream.
+
+Signed-off-by: Martin Brandenburg <martin@omnibond.com>
+Signed-off-by: Mike Marshall <hubcap@omnibond.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/orangefs/xattr.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/orangefs/xattr.c
++++ b/fs/orangefs/xattr.c
+@@ -358,7 +358,7 @@ try_again:
+ 
+       returned_count = new_op->downcall.resp.listxattr.returned_count;
+       if (returned_count < 0 ||
+-          returned_count >= ORANGEFS_MAX_XATTR_LISTLEN) {
++          returned_count > ORANGEFS_MAX_XATTR_LISTLEN) {
+               gossip_err("%s: impossible value for returned_count:%d:\n",
+               __func__,
+               returned_count);
diff --git a/queue-4.11/ovl-do-not-set-overlay.opaque-on-non-dir-create.patch b/queue-4.11/ovl-do-not-set-overlay.opaque-on-non-dir-create.patch

new file mode 100644 (file)

index 0000000..3c3a34e
--- /dev/null
+++ b/queue-4.11/ovl-do-not-set-overlay.opaque-on-non-dir-create.patch
@@ -0,0 +1,32 @@
+From 4a99f3c83dc493c8ea84693d78cd792839c8aa64 Mon Sep 17 00:00:00 2001
+From: Amir Goldstein <amir73il@gmail.com>
+Date: Mon, 24 Apr 2017 22:26:40 +0300
+Subject: ovl: do not set overlay.opaque on non-dir create
+
+From: Amir Goldstein <amir73il@gmail.com>
+
+commit 4a99f3c83dc493c8ea84693d78cd792839c8aa64 upstream.
+
+The optimization for opaque dir create was wrongly being applied
+also to non-dir create.
+
+Fixes: 97c684cc9110 ("ovl: create directories inside merged parent opaque")
+Signed-off-by: Amir Goldstein <amir73il@gmail.com>
+Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/overlayfs/dir.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/overlayfs/dir.c
++++ b/fs/overlayfs/dir.c
+@@ -210,7 +210,7 @@ static int ovl_create_upper(struct dentr
+       if (err)
+               goto out_dput;
+ 
+-      if (ovl_type_merge(dentry->d_parent)) {
++      if (ovl_type_merge(dentry->d_parent) && d_is_dir(newdentry)) {
+               /* Setting opaque here is just an optimization, allow to fail */
+               ovl_set_opaque(dentry, newdentry);
+       }
diff --git a/queue-4.11/padata-free-correct-variable.patch b/queue-4.11/padata-free-correct-variable.patch

new file mode 100644 (file)

index 0000000..5683b2c
--- /dev/null
+++ b/queue-4.11/padata-free-correct-variable.patch
@@ -0,0 +1,32 @@
+From 07a77929ba672d93642a56dc2255dd21e6e2290b Mon Sep 17 00:00:00 2001
+From: "Jason A. Donenfeld" <Jason@zx2c4.com>
+Date: Fri, 7 Apr 2017 02:33:30 +0200
+Subject: padata: free correct variable
+
+From: Jason A. Donenfeld <Jason@zx2c4.com>
+
+commit 07a77929ba672d93642a56dc2255dd21e6e2290b upstream.
+
+The author meant to free the variable that was just allocated, instead
+of the one that failed to be allocated, but made a simple typo. This
+patch rectifies that.
+
+Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
+Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/padata.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/kernel/padata.c
++++ b/kernel/padata.c
+@@ -354,7 +354,7 @@ static int padata_setup_cpumasks(struct
+ 
+       cpumask_and(pd->cpumask.pcpu, pcpumask, cpu_online_mask);
+       if (!alloc_cpumask_var(&pd->cpumask.cbcpu, GFP_KERNEL)) {
+-              free_cpumask_var(pd->cpumask.cbcpu);
++              free_cpumask_var(pd->cpumask.pcpu);
+               return -ENOMEM;
+       }
+ 
diff --git a/queue-4.11/perf-annotate-s390-fix-perf-annotate-error-95-4.10-regression.patch b/queue-4.11/perf-annotate-s390-fix-perf-annotate-error-95-4.10-regression.patch

new file mode 100644 (file)

index 0000000..2d6b6a2
--- /dev/null
+++ b/queue-4.11/perf-annotate-s390-fix-perf-annotate-error-95-4.10-regression.patch
@@ -0,0 +1,47 @@
+From e77852b32d6d4430c68c38aaf73efe5650fa25af Mon Sep 17 00:00:00 2001
+From: Christian Borntraeger <borntraeger@de.ibm.com>
+Date: Thu, 6 Apr 2017 09:51:51 +0200
+Subject: perf annotate s390: Fix perf annotate error -95 (4.10 regression)
+
+From: Christian Borntraeger <borntraeger@de.ibm.com>
+
+commit e77852b32d6d4430c68c38aaf73efe5650fa25af upstream.
+
+since 4.10 perf annotate exits on s390 with an "unknown error -95".
+Turns out that commit 786c1b51844d ("perf annotate: Start supporting
+cross arch annotation") added a hard requirement for architecture
+support when objdump is used but only provided x86 and arm support.
+Meanwhile power was added so lets add s390 as well.
+
+While at it make sure to implement the branch and jump types.
+
+Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
+Cc: Andreas Krebbel <krebbel@linux.vnet.ibm.com>
+Cc: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
+Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: linux-s390 <linux-s390@vger.kernel.org>
+Fixes: 786c1b51844 "perf annotate: Start supporting cross arch annotation"
+Link: http://lkml.kernel.org/r/1491465112-45819-2-git-send-email-borntraeger@de.ibm.com
+Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ tools/perf/util/annotate.c |    6 ++++++
+ 1 file changed, 6 insertions(+)
+
+--- a/tools/perf/util/annotate.c
++++ b/tools/perf/util/annotate.c
+@@ -136,6 +136,12 @@ static struct arch architectures[] = {
+                       .comment_char = '#',
+               },
+       },
++      {
++              .name = "s390",
++              .objdump =  {
++                      .comment_char = '#',
++              },
++      },
+ };
+ 
+ static void ins__delete(struct ins_operands *ops)
diff --git a/queue-4.11/perf-annotate-s390-implement-jump-types-for-perf-annotate.patch b/queue-4.11/perf-annotate-s390-implement-jump-types-for-perf-annotate.patch

new file mode 100644 (file)

index 0000000..7d3c4d5
--- /dev/null
+++ b/queue-4.11/perf-annotate-s390-implement-jump-types-for-perf-annotate.patch
@@ -0,0 +1,77 @@
+From d9f8dfa9baf9b6ae1f2f84f887176558ecde5268 Mon Sep 17 00:00:00 2001
+From: Christian Borntraeger <borntraeger@de.ibm.com>
+Date: Thu, 6 Apr 2017 09:51:52 +0200
+Subject: perf annotate s390: Implement jump types for perf annotate
+
+From: Christian Borntraeger <borntraeger@de.ibm.com>
+
+commit d9f8dfa9baf9b6ae1f2f84f887176558ecde5268 upstream.
+
+Implement simple detection for all kind of jumps and branches.
+
+Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
+Cc: Andreas Krebbel <krebbel@linux.vnet.ibm.com>
+Cc: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
+Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: linux-s390 <linux-s390@vger.kernel.org>
+Link: http://lkml.kernel.org/r/1491465112-45819-3-git-send-email-borntraeger@de.ibm.com
+Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ tools/perf/arch/s390/annotate/instructions.c |   30 +++++++++++++++++++++++++++
+ tools/perf/util/annotate.c                   |    2 +
+ 2 files changed, 32 insertions(+)
+
+--- /dev/null
++++ b/tools/perf/arch/s390/annotate/instructions.c
+@@ -0,0 +1,30 @@
++static struct ins_ops *s390__associate_ins_ops(struct arch *arch, const char *name)
++{
++      struct ins_ops *ops = NULL;
++
++      /* catch all kind of jumps */
++      if (strchr(name, 'j') ||
++          !strncmp(name, "bct", 3) ||
++          !strncmp(name, "br", 2))
++              ops = &jump_ops;
++      /* override call/returns */
++      if (!strcmp(name, "bras") ||
++          !strcmp(name, "brasl") ||
++          !strcmp(name, "basr"))
++              ops = &call_ops;
++      if (!strcmp(name, "br"))
++              ops = &ret_ops;
++
++      arch__associate_ins_ops(arch, name, ops);
++      return ops;
++}
++
++static int s390__annotate_init(struct arch *arch)
++{
++      if (!arch->initialized) {
++              arch->initialized = true;
++              arch->associate_instruction_ops = s390__associate_ins_ops;
++      }
++
++      return 0;
++}
+--- a/tools/perf/util/annotate.c
++++ b/tools/perf/util/annotate.c
+@@ -108,6 +108,7 @@ static int arch__associate_ins_ops(struc
+ #include "arch/arm64/annotate/instructions.c"
+ #include "arch/x86/annotate/instructions.c"
+ #include "arch/powerpc/annotate/instructions.c"
++#include "arch/s390/annotate/instructions.c"
+ 
+ static struct arch architectures[] = {
+       {
+@@ -132,6 +133,7 @@ static struct arch architectures[] = {
+       },
+       {
+               .name = "s390",
++              .init = s390__annotate_init,
+               .objdump =  {
+                       .comment_char = '#',
+               },
diff --git a/queue-4.11/perf-auxtrace-fix-no_size-logic-in-addr_filter__resolve_kernel_syms.patch b/queue-4.11/perf-auxtrace-fix-no_size-logic-in-addr_filter__resolve_kernel_syms.patch

new file mode 100644 (file)

index 0000000..b10d92a
--- /dev/null
+++ b/queue-4.11/perf-auxtrace-fix-no_size-logic-in-addr_filter__resolve_kernel_syms.patch
@@ -0,0 +1,43 @@
+From c3a0bbc7ad7598dec5a204868bdf8a2b1b51df14 Mon Sep 17 00:00:00 2001
+From: Adrian Hunter <adrian.hunter@intel.com>
+Date: Fri, 24 Mar 2017 14:15:52 +0200
+Subject: perf auxtrace: Fix no_size logic in addr_filter__resolve_kernel_syms()
+
+From: Adrian Hunter <adrian.hunter@intel.com>
+
+commit c3a0bbc7ad7598dec5a204868bdf8a2b1b51df14 upstream.
+
+Address filtering with kernel symbols incorrectly resulted in the error
+"Cannot determine size of symbol" because the no_size logic was the wrong
+way around.
+
+Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
+Tested-by: Andi Kleen <ak@linux.intel.com>
+Link: http://lkml.kernel.org/r/1490357752-27942-1-git-send-email-adrian.hunter@intel.com
+Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ tools/perf/util/auxtrace.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/tools/perf/util/auxtrace.c
++++ b/tools/perf/util/auxtrace.c
+@@ -1826,7 +1826,7 @@ static int addr_filter__resolve_kernel_s
+               filt->addr = start;
+               if (filt->range && !filt->size && !filt->sym_to) {
+                       filt->size = size;
+-                      no_size = !!size;
++                      no_size = !size;
+               }
+       }
+ 
+@@ -1840,7 +1840,7 @@ static int addr_filter__resolve_kernel_s
+               if (err)
+                       return err;
+               filt->size = start + size - filt->addr;
+-              no_size = !!size;
++              no_size = !size;
+       }
+ 
+       /* The very last symbol in kallsyms does not imply a particular size */
diff --git a/queue-4.11/series b/queue-4.11/series

index c212fb48adbb2a7a88c82ed0244ddd128a1ffe2e..c33aa1749b655ebfee4034e9c8648ffe1e347fc2 100644 (file)
--- a/queue-4.11/series
+++ b/queue-4.11/series
@@ -41,3 +41,39 @@ dm-era-save-spacemap-metadata-root-after-the-pre-commit.patch
  dm-rq-check-blk_mq_register_dev-return-value-in-dm_mq_init_request_queue.patch
  dm-thin-fix-a-memory-leak-when-passing-discard-bio-down.patch
  vfio-type1-remove-locked-page-accounting-workqueue.patch
+iov_iter-don-t-revert-iov-buffer-if-csum-error.patch
+ib-core-fix-sysfs-registration-error-flow.patch
+ib-core-fix-kernel-crash-during-fail-to-initialize-device.patch
+ib-core-for-multicast-functions-verify-that-lids-are-multicast-lids.patch
+ib-ipoib-ibx-failed-to-create-mcg-debug-file.patch
+ib-mlx4-fix-ib-device-initialization-error-flow.patch
+ib-mlx4-reduce-sriov-multicast-cleanup-warning-message-to-debug-level.patch
+ib-hfi1-prevent-kernel-qp-post-send-hard-lockups.patch
+perf-auxtrace-fix-no_size-logic-in-addr_filter__resolve_kernel_syms.patch
+perf-annotate-s390-fix-perf-annotate-error-95-4.10-regression.patch
+perf-annotate-s390-implement-jump-types-for-perf-annotate.patch
+jbd2-fix-dbench4-performance-regression-for-nobarrier-mounts.patch
+ext4-evict-inline-data-when-writing-to-memory-map.patch
+orangefs-fix-bounds-check-for-listxattr.patch
+orangefs-clean-up-oversize-xattr-validation.patch
+orangefs-do-not-set-getattr_time-on-orangefs_lookup.patch
+orangefs-do-not-check-possibly-stale-size-on-truncate.patch
+fs-xattr.c-zero-out-memory-copied-to-userspace-in-getxattr.patch
+ceph-fix-memory-leak-in-__ceph_setxattr.patch
+fs-fix-data-invalidation-in-the-cleancache-during-direct-io.patch
+fs-block_dev-always-invalidate-cleancache-in-invalidate_bdev.patch
+mm-vmscan-fix-io-refault-regression-in-cache-workingset-transition.patch
+mm-prevent-potential-recursive-reclaim-due-to-clearing-pf_memalloc.patch
+fix-match_prepath.patch
+do-not-return-number-of-bytes-written-for-ioctl-cifs_ioc_copychunk_file.patch
+set-unicode-flag-on-cifs-echo-request-to-avoid-mac-error.patch
+smb3-work-around-mount-failure-when-using-smb3-dialect-to-macs.patch
+cifs-fix-mapping-of-sfm_space-and-sfm_period.patch
+cifs-fix-leak-in-fsctl_enum_snaps-response-handling.patch
+cifs-fix-cifs_enumerate_snapshots-oops.patch
+cifs-fix-oplock-break-deadlocks.patch
+cifs-fix-cifs_ioc_get_mnt_info-oops.patch
+cifs-add-misssing-sfm-mapping-for-doublequote.patch
+ovl-do-not-set-overlay.opaque-on-non-dir-create.patch
+padata-free-correct-variable.patch
+md-raid1-avoid-reusing-a-resync-bio-after-error-handling.patch
diff --git a/queue-4.11/set-unicode-flag-on-cifs-echo-request-to-avoid-mac-error.patch b/queue-4.11/set-unicode-flag-on-cifs-echo-request-to-avoid-mac-error.patch

new file mode 100644 (file)

index 0000000..d6ac2be
--- /dev/null
+++ b/queue-4.11/set-unicode-flag-on-cifs-echo-request-to-avoid-mac-error.patch
@@ -0,0 +1,34 @@
+From 26c9cb668c7fbf9830516b75d8bee70b699ed449 Mon Sep 17 00:00:00 2001
+From: Steve French <smfrench@gmail.com>
+Date: Tue, 2 May 2017 13:35:20 -0500
+Subject: Set unicode flag on cifs echo request to avoid Mac error
+
+From: Steve French <smfrench@gmail.com>
+
+commit 26c9cb668c7fbf9830516b75d8bee70b699ed449 upstream.
+
+Mac requires the unicode flag to be set for cifs, even for the smb
+echo request (which doesn't have strings).
+
+Without this Mac rejects the periodic echo requests (when mounting
+with cifs) that we use to check if server is down
+
+Signed-off-by: Steve French <smfrench@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/cifs/cifssmb.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/fs/cifs/cifssmb.c
++++ b/fs/cifs/cifssmb.c
+@@ -718,6 +718,9 @@ CIFSSMBEcho(struct TCP_Server_Info *serv
+       if (rc)
+               return rc;
+ 
++      if (server->capabilities & CAP_UNICODE)
++              smb->hdr.Flags2 |= SMBFLG2_UNICODE;
++
+       /* set up echo request */
+       smb->hdr.Tid = 0xffff;
+       smb->hdr.WordCount = 1;
diff --git a/queue-4.11/smb3-work-around-mount-failure-when-using-smb3-dialect-to-macs.patch b/queue-4.11/smb3-work-around-mount-failure-when-using-smb3-dialect-to-macs.patch

new file mode 100644 (file)

index 0000000..fa66636
--- /dev/null
+++ b/queue-4.11/smb3-work-around-mount-failure-when-using-smb3-dialect-to-macs.patch
@@ -0,0 +1,55 @@
+From 7db0a6efdc3e990cdfd4b24820d010e9eb7890ad Mon Sep 17 00:00:00 2001
+From: Steve French <smfrench@gmail.com>
+Date: Wed, 3 May 2017 21:12:20 -0500
+Subject: SMB3: Work around mount failure when using SMB3 dialect to Macs
+
+From: Steve French <smfrench@gmail.com>
+
+commit 7db0a6efdc3e990cdfd4b24820d010e9eb7890ad upstream.
+
+Macs send the maximum buffer size in response on ioctl to validate
+negotiate security information, which causes us to fail the mount
+as the response buffer is larger than the expected response.
+
+Changed ioctl response processing to allow for padding of validate
+negotiate ioctl response and limit the maximum response size to
+maximum buffer size.
+
+Signed-off-by: Steve French <steve.french@primarydata.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/cifs/smb2pdu.c |   14 +++++++++++---
+ 1 file changed, 11 insertions(+), 3 deletions(-)
+
+--- a/fs/cifs/smb2pdu.c
++++ b/fs/cifs/smb2pdu.c
+@@ -632,8 +632,12 @@ int smb3_validate_negotiate(const unsign
+       }
+ 
+       if (rsplen != sizeof(struct validate_negotiate_info_rsp)) {
+-              cifs_dbg(VFS, "invalid size of protocol negotiate response\n");
+-              return -EIO;
++              cifs_dbg(VFS, "invalid protocol negotiate response size: %d\n",
++                       rsplen);
++
++              /* relax check since Mac returns max bufsize allowed on ioctl */
++              if (rsplen > CIFSMaxBufSize)
++                      return -EIO;
+       }
+ 
+       /* check validate negotiate info response matches what we got earlier */
+@@ -1853,8 +1857,12 @@ SMB2_ioctl(const unsigned int xid, struc
+        * than one credit. Windows typically sets this smaller, but for some
+        * ioctls it may be useful to allow server to send more. No point
+        * limiting what the server can send as long as fits in one credit
++       * Unfortunately - we can not handle more than CIFS_MAX_MSG_SIZE
++       * (by default, note that it can be overridden to make max larger)
++       * in responses (except for read responses which can be bigger.
++       * We may want to bump this limit up
+        */
+-      req->MaxOutputResponse = cpu_to_le32(0xFF00); /* < 64K uses 1 credit */
++      req->MaxOutputResponse = cpu_to_le32(CIFSMaxBufSize);
+ 
+       if (is_fsctl)
+               req->Flags = cpu_to_le32(SMB2_0_IOCTL_IS_FSCTL);
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Wed, 17 May 2017 15:55:51 +0000 (17:55 +0200)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Wed, 17 May 2017 15:55:51 +0000 (17:55 +0200)
queue-4.11/ceph-fix-memory-leak-in-__ceph_setxattr.patch	[new file with mode: 0644]	patch \| blob
queue-4.11/cifs-add-misssing-sfm-mapping-for-doublequote.patch	[new file with mode: 0644]	patch \| blob
queue-4.11/cifs-fix-cifs_enumerate_snapshots-oops.patch	[new file with mode: 0644]	patch \| blob
queue-4.11/cifs-fix-cifs_ioc_get_mnt_info-oops.patch	[new file with mode: 0644]	patch \| blob
queue-4.11/cifs-fix-leak-in-fsctl_enum_snaps-response-handling.patch	[new file with mode: 0644]	patch \| blob
queue-4.11/cifs-fix-mapping-of-sfm_space-and-sfm_period.patch	[new file with mode: 0644]	patch \| blob
queue-4.11/cifs-fix-oplock-break-deadlocks.patch	[new file with mode: 0644]	patch \| blob
queue-4.11/do-not-return-number-of-bytes-written-for-ioctl-cifs_ioc_copychunk_file.patch	[new file with mode: 0644]	patch \| blob
queue-4.11/ext4-evict-inline-data-when-writing-to-memory-map.patch	[new file with mode: 0644]	patch \| blob
queue-4.11/fix-match_prepath.patch	[new file with mode: 0644]	patch \| blob
queue-4.11/fs-block_dev-always-invalidate-cleancache-in-invalidate_bdev.patch	[new file with mode: 0644]	patch \| blob
queue-4.11/fs-fix-data-invalidation-in-the-cleancache-during-direct-io.patch	[new file with mode: 0644]	patch \| blob
queue-4.11/fs-xattr.c-zero-out-memory-copied-to-userspace-in-getxattr.patch	[new file with mode: 0644]	patch \| blob
queue-4.11/ib-core-fix-kernel-crash-during-fail-to-initialize-device.patch	[new file with mode: 0644]	patch \| blob
queue-4.11/ib-core-fix-sysfs-registration-error-flow.patch	[new file with mode: 0644]	patch \| blob
queue-4.11/ib-core-for-multicast-functions-verify-that-lids-are-multicast-lids.patch	[new file with mode: 0644]	patch \| blob
queue-4.11/ib-hfi1-prevent-kernel-qp-post-send-hard-lockups.patch	[new file with mode: 0644]	patch \| blob
queue-4.11/ib-ipoib-ibx-failed-to-create-mcg-debug-file.patch	[new file with mode: 0644]	patch \| blob
queue-4.11/ib-mlx4-fix-ib-device-initialization-error-flow.patch	[new file with mode: 0644]	patch \| blob
queue-4.11/ib-mlx4-reduce-sriov-multicast-cleanup-warning-message-to-debug-level.patch	[new file with mode: 0644]	patch \| blob
queue-4.11/iov_iter-don-t-revert-iov-buffer-if-csum-error.patch	[new file with mode: 0644]	patch \| blob
queue-4.11/jbd2-fix-dbench4-performance-regression-for-nobarrier-mounts.patch	[new file with mode: 0644]	patch \| blob
queue-4.11/md-raid1-avoid-reusing-a-resync-bio-after-error-handling.patch	[new file with mode: 0644]	patch \| blob
queue-4.11/mm-prevent-potential-recursive-reclaim-due-to-clearing-pf_memalloc.patch	[new file with mode: 0644]	patch \| blob
queue-4.11/mm-vmscan-fix-io-refault-regression-in-cache-workingset-transition.patch	[new file with mode: 0644]	patch \| blob
queue-4.11/orangefs-clean-up-oversize-xattr-validation.patch	[new file with mode: 0644]	patch \| blob
queue-4.11/orangefs-do-not-check-possibly-stale-size-on-truncate.patch	[new file with mode: 0644]	patch \| blob
queue-4.11/orangefs-do-not-set-getattr_time-on-orangefs_lookup.patch	[new file with mode: 0644]	patch \| blob
queue-4.11/orangefs-fix-bounds-check-for-listxattr.patch	[new file with mode: 0644]	patch \| blob
queue-4.11/ovl-do-not-set-overlay.opaque-on-non-dir-create.patch	[new file with mode: 0644]	patch \| blob
queue-4.11/padata-free-correct-variable.patch	[new file with mode: 0644]	patch \| blob
queue-4.11/perf-annotate-s390-fix-perf-annotate-error-95-4.10-regression.patch	[new file with mode: 0644]	patch \| blob
queue-4.11/perf-annotate-s390-implement-jump-types-for-perf-annotate.patch	[new file with mode: 0644]	patch \| blob
queue-4.11/perf-auxtrace-fix-no_size-logic-in-addr_filter__resolve_kernel_syms.patch	[new file with mode: 0644]	patch \| blob
queue-4.11/series		patch \| blob \| blame \| history
queue-4.11/set-unicode-flag-on-cifs-echo-request-to-avoid-mac-error.patch	[new file with mode: 0644]	patch \| blob
queue-4.11/smb3-work-around-mount-failure-when-using-smb3-dialect-to-macs.patch	[new file with mode: 0644]	patch \| blob