From e55a062096b4aea9c3f1d0b84fdd06facb444e58 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Sat, 23 Feb 2019 12:11:30 +0100
Subject: [PATCH] 4.9-stable patches

added patches:
	ceph-avoid-repeatedly-adding-inode-to-mdsc-snap_flush_list.patch
	keys-allow-reaching-the-keys-quotas-exactly.patch
	libceph-handle-an-empty-authorize-reply.patch
	mac80211-free-mpath-object-when-rhashtable-insertion-fails.patch
	numa-change-get_mempolicy-to-use-nr_node_ids-instead-of-max_numnodes.patch
	proc-oom-do-not-report-alien-mms-when-setting-oom_score_adj.patch
---
 ...adding-inode-to-mdsc-snap_flush_list.patch | 33 +++++++++
 ...low-reaching-the-keys-quotas-exactly.patch | 43 +++++++++++
 ...ceph-handle-an-empty-authorize-reply.patch | 74 +++++++++++++++++++
 ...ject-when-rhashtable-insertion-fails.patch | 57 ++++++++++++++
 ...-nr_node_ids-instead-of-max_numnodes.patch | 71 ++++++++++++++++++
 ...alien-mms-when-setting-oom_score_adj.patch | 56 ++++++++++++++
 queue-4.9/series                              |  6 ++
 7 files changed, 340 insertions(+)
 create mode 100644 queue-4.9/ceph-avoid-repeatedly-adding-inode-to-mdsc-snap_flush_list.patch
 create mode 100644 queue-4.9/keys-allow-reaching-the-keys-quotas-exactly.patch
 create mode 100644 queue-4.9/libceph-handle-an-empty-authorize-reply.patch
 create mode 100644 queue-4.9/mac80211-free-mpath-object-when-rhashtable-insertion-fails.patch
 create mode 100644 queue-4.9/numa-change-get_mempolicy-to-use-nr_node_ids-instead-of-max_numnodes.patch
 create mode 100644 queue-4.9/proc-oom-do-not-report-alien-mms-when-setting-oom_score_adj.patch

diff --git a/queue-4.9/ceph-avoid-repeatedly-adding-inode-to-mdsc-snap_flush_list.patch b/queue-4.9/ceph-avoid-repeatedly-adding-inode-to-mdsc-snap_flush_list.patch
new file mode 100644
index 00000000000..c0ccf1c5100
--- /dev/null
+++ b/queue-4.9/ceph-avoid-repeatedly-adding-inode-to-mdsc-snap_flush_list.patch
@@ -0,0 +1,33 @@
+From 04242ff3ac0abbaa4362f97781dac268e6c3541a Mon Sep 17 00:00:00 2001
+From: "Yan, Zheng" <zyan@redhat.com>
+Date: Mon, 11 Feb 2019 15:18:52 +0800
+Subject: ceph: avoid repeatedly adding inode to mdsc->snap_flush_list
+
+From: Yan, Zheng <zyan@redhat.com>
+
+commit 04242ff3ac0abbaa4362f97781dac268e6c3541a upstream.
+
+Otherwise, mdsc->snap_flush_list may get corrupted.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: "Yan, Zheng" <zyan@redhat.com>
+Reviewed-by: Ilya Dryomov <idryomov@gmail.com>
+Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ceph/snap.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/fs/ceph/snap.c
++++ b/fs/ceph/snap.c
+@@ -609,7 +609,8 @@ int __ceph_finish_cap_snap(struct ceph_i
+ 	     capsnap->size);
+ 
+ 	spin_lock(&mdsc->snap_flush_lock);
+-	list_add_tail(&ci->i_snap_flush_item, &mdsc->snap_flush_list);
++	if (list_empty(&ci->i_snap_flush_item))
++		list_add_tail(&ci->i_snap_flush_item, &mdsc->snap_flush_list);
+ 	spin_unlock(&mdsc->snap_flush_lock);
+ 	return 1;  /* caller may want to ceph_flush_snaps */
+ }
diff --git a/queue-4.9/keys-allow-reaching-the-keys-quotas-exactly.patch b/queue-4.9/keys-allow-reaching-the-keys-quotas-exactly.patch
new file mode 100644
index 00000000000..f5b1707ec51
--- /dev/null
+++ b/queue-4.9/keys-allow-reaching-the-keys-quotas-exactly.patch
@@ -0,0 +1,43 @@
+From a08bf91ce28ed3ae7b6fef35d843fef8dc8c2cd9 Mon Sep 17 00:00:00 2001
+From: Eric Biggers <ebiggers@google.com>
+Date: Thu, 14 Feb 2019 16:20:01 +0000
+Subject: KEYS: allow reaching the keys quotas exactly
+
+From: Eric Biggers <ebiggers@google.com>
+
+commit a08bf91ce28ed3ae7b6fef35d843fef8dc8c2cd9 upstream.
+
+If the sysctl 'kernel.keys.maxkeys' is set to some number n, then
+actually users can only add up to 'n - 1' keys.  Likewise for
+'kernel.keys.maxbytes' and the root_* versions of these sysctls.  But
+these sysctls are apparently supposed to be *maximums*, as per their
+names and all documentation I could find -- the keyrings(7) man page,
+Documentation/security/keys/core.rst, and all the mentions of EDQUOT
+meaning that the key quota was *exceeded* (as opposed to reached).
+
+Thus, fix the code to allow reaching the quotas exactly.
+
+Fixes: 0b77f5bfb45c ("keys: make the keyring quotas controllable through /proc/sys")
+Cc: stable@vger.kernel.org
+Signed-off-by: Eric Biggers <ebiggers@google.com>
+Signed-off-by: David Howells <dhowells@redhat.com>
+Signed-off-by: James Morris <james.morris@microsoft.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ security/keys/key.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/security/keys/key.c
++++ b/security/keys/key.c
+@@ -264,8 +264,8 @@ struct key *key_alloc(struct key_type *t
+ 
+ 		spin_lock(&user->lock);
+ 		if (!(flags & KEY_ALLOC_QUOTA_OVERRUN)) {
+-			if (user->qnkeys + 1 >= maxkeys ||
+-			    user->qnbytes + quotalen >= maxbytes ||
++			if (user->qnkeys + 1 > maxkeys ||
++			    user->qnbytes + quotalen > maxbytes ||
+ 			    user->qnbytes + quotalen < user->qnbytes)
+ 				goto no_quota;
+ 		}
diff --git a/queue-4.9/libceph-handle-an-empty-authorize-reply.patch b/queue-4.9/libceph-handle-an-empty-authorize-reply.patch
new file mode 100644
index 00000000000..a0598affdaf
--- /dev/null
+++ b/queue-4.9/libceph-handle-an-empty-authorize-reply.patch
@@ -0,0 +1,74 @@
+From 0fd3fd0a9bb0b02b6435bb7070e9f7b82a23f068 Mon Sep 17 00:00:00 2001
+From: Ilya Dryomov <idryomov@gmail.com>
+Date: Tue, 5 Feb 2019 20:30:27 +0100
+Subject: libceph: handle an empty authorize reply
+
+From: Ilya Dryomov <idryomov@gmail.com>
+
+commit 0fd3fd0a9bb0b02b6435bb7070e9f7b82a23f068 upstream.
+
+The authorize reply can be empty, for example when the ticket used to
+build the authorizer is too old and TAG_BADAUTHORIZER is returned from
+the service.  Calling ->verify_authorizer_reply() results in an attempt
+to decrypt and validate (somewhat) random data in au->buf (most likely
+the signature block from calc_signature()), which fails and ends up in
+con_fault_finish() with !con->auth_retry.  The ticket isn't invalidated
+and the connection is retried again and again until a new ticket is
+obtained from the monitor:
+
+  libceph: osd2 192.168.122.1:6809 bad authorize reply
+  libceph: osd2 192.168.122.1:6809 bad authorize reply
+  libceph: osd2 192.168.122.1:6809 bad authorize reply
+  libceph: osd2 192.168.122.1:6809 bad authorize reply
+
+Let TAG_BADAUTHORIZER handler kick in and increment con->auth_retry.
+
+Cc: stable@vger.kernel.org
+Fixes: 5c056fdc5b47 ("libceph: verify authorize reply on connect")
+Link: https://tracker.ceph.com/issues/20164
+Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
+Reviewed-by: Sage Weil <sage@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ net/ceph/messenger.c |   15 +++++++++------
+ 1 file changed, 9 insertions(+), 6 deletions(-)
+
+--- a/net/ceph/messenger.c
++++ b/net/ceph/messenger.c
+@@ -2042,6 +2042,8 @@ static int process_connect(struct ceph_c
+ 	dout("process_connect on %p tag %d\n", con, (int)con->in_tag);
+ 
+ 	if (con->auth) {
++		int len = le32_to_cpu(con->in_reply.authorizer_len);
++
+ 		/*
+ 		 * Any connection that defines ->get_authorizer()
+ 		 * should also define ->add_authorizer_challenge() and
+@@ -2051,8 +2053,7 @@ static int process_connect(struct ceph_c
+ 		 */
+ 		if (con->in_reply.tag == CEPH_MSGR_TAG_CHALLENGE_AUTHORIZER) {
+ 			ret = con->ops->add_authorizer_challenge(
+-				    con, con->auth->authorizer_reply_buf,
+-				    le32_to_cpu(con->in_reply.authorizer_len));
++				    con, con->auth->authorizer_reply_buf, len);
+ 			if (ret < 0)
+ 				return ret;
+ 
+@@ -2062,10 +2063,12 @@ static int process_connect(struct ceph_c
+ 			return 0;
+ 		}
+ 
+-		ret = con->ops->verify_authorizer_reply(con);
+-		if (ret < 0) {
+-			con->error_msg = "bad authorize reply";
+-			return ret;
++		if (len) {
++			ret = con->ops->verify_authorizer_reply(con);
++			if (ret < 0) {
++				con->error_msg = "bad authorize reply";
++				return ret;
++			}
+ 		}
+ 	}
+ 
diff --git a/queue-4.9/mac80211-free-mpath-object-when-rhashtable-insertion-fails.patch b/queue-4.9/mac80211-free-mpath-object-when-rhashtable-insertion-fails.patch
new file mode 100644
index 00000000000..d6fc83b933e
--- /dev/null
+++ b/queue-4.9/mac80211-free-mpath-object-when-rhashtable-insertion-fails.patch
@@ -0,0 +1,57 @@
+From 4ff3a9d14c6c06eaa4e5976c61599ea2bd9e81b2 Mon Sep 17 00:00:00 2001
+From: Herbert Xu <herbert@gondor.apana.org.au>
+Date: Thu, 14 Feb 2019 22:03:25 +0800
+Subject: mac80211: Free mpath object when rhashtable insertion fails
+
+From: Herbert Xu <herbert@gondor.apana.org.au>
+
+commit 4ff3a9d14c6c06eaa4e5976c61599ea2bd9e81b2 upstream.
+
+When rhashtable insertion fails the mesh table code doesn't free
+the now-orphan mesh path object.  This patch fixes that.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
+Signed-off-by: Johannes Berg <johannes.berg@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ net/mac80211/mesh_pathtbl.c |   17 +++++++++--------
+ 1 file changed, 9 insertions(+), 8 deletions(-)
+
+--- a/net/mac80211/mesh_pathtbl.c
++++ b/net/mac80211/mesh_pathtbl.c
+@@ -449,17 +449,15 @@ struct mesh_path *mesh_path_add(struct i
+ 
+ 	} while (unlikely(ret == -EEXIST && !mpath));
+ 
+-	if (ret && ret != -EEXIST)
+-		return ERR_PTR(ret);
+-
+-	/* At this point either new_mpath was added, or we found a
+-	 * matching entry already in the table; in the latter case
+-	 * free the unnecessary new entry.
+-	 */
+-	if (ret == -EEXIST) {
++	if (ret) {
+ 		kfree(new_mpath);
++
++		if (ret != -EEXIST)
++			return ERR_PTR(ret);
++
+ 		new_mpath = mpath;
+ 	}
++
+ 	sdata->u.mesh.mesh_paths_generation++;
+ 	return new_mpath;
+ }
+@@ -489,6 +487,9 @@ int mpp_path_add(struct ieee80211_sub_if
+ 					    &new_mpath->rhash,
+ 					    mesh_rht_params);
+ 
++	if (ret)
++		kfree(new_mpath);
++
+ 	sdata->u.mesh.mpp_paths_generation++;
+ 	return ret;
+ }
diff --git a/queue-4.9/numa-change-get_mempolicy-to-use-nr_node_ids-instead-of-max_numnodes.patch b/queue-4.9/numa-change-get_mempolicy-to-use-nr_node_ids-instead-of-max_numnodes.patch
new file mode 100644
index 00000000000..0a826b79fe6
--- /dev/null
+++ b/queue-4.9/numa-change-get_mempolicy-to-use-nr_node_ids-instead-of-max_numnodes.patch
@@ -0,0 +1,71 @@
+From 050c17f239fd53adb55aa768d4f41bc76c0fe045 Mon Sep 17 00:00:00 2001
+From: Ralph Campbell <rcampbell@nvidia.com>
+Date: Wed, 20 Feb 2019 22:18:58 -0800
+Subject: numa: change get_mempolicy() to use nr_node_ids instead of MAX_NUMNODES
+
+From: Ralph Campbell <rcampbell@nvidia.com>
+
+commit 050c17f239fd53adb55aa768d4f41bc76c0fe045 upstream.
+
+The system call, get_mempolicy() [1], passes an unsigned long *nodemask
+pointer and an unsigned long maxnode argument which specifies the length
+of the user's nodemask array in bits (which is rounded up).  The manual
+page says that if the maxnode value is too small, get_mempolicy will
+return EINVAL but there is no system call to return this minimum value.
+To determine this value, some programs search /proc/<pid>/status for a
+line starting with "Mems_allowed:" and use the number of digits in the
+mask to determine the minimum value.  A recent change to the way this line
+is formatted [2] causes these programs to compute a value less than
+MAX_NUMNODES so get_mempolicy() returns EINVAL.
+
+Change get_mempolicy(), the older compat version of get_mempolicy(), and
+the copy_nodes_to_user() function to use nr_node_ids instead of
+MAX_NUMNODES, thus preserving the defacto method of computing the minimum
+size for the nodemask array and the maxnode argument.
+
+[1] http://man7.org/linux/man-pages/man2/get_mempolicy.2.html
+[2] https://lore.kernel.org/lkml/1545405631-6808-1-git-send-email-longman@redhat.com
+
+Link: http://lkml.kernel.org/r/20190211180245.22295-1-rcampbell@nvidia.com
+Fixes: 4fb8e5b89bcbbbb ("include/linux/nodemask.h: use nr_node_ids (not MAX_NUMNODES) in __nodemask_pr_numnodes()")
+Signed-off-by: Ralph Campbell <rcampbell@nvidia.com>
+Suggested-by: Alexander Duyck <alexander.duyck@gmail.com>
+Cc: Waiman Long <longman@redhat.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/mempolicy.c |    6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/mm/mempolicy.c
++++ b/mm/mempolicy.c
+@@ -1327,7 +1327,7 @@ static int copy_nodes_to_user(unsigned l
+ 			      nodemask_t *nodes)
+ {
+ 	unsigned long copy = ALIGN(maxnode-1, 64) / 8;
+-	const int nbytes = BITS_TO_LONGS(MAX_NUMNODES) * sizeof(long);
++	unsigned int nbytes = BITS_TO_LONGS(nr_node_ids) * sizeof(long);
+ 
+ 	if (copy > nbytes) {
+ 		if (copy > PAGE_SIZE)
+@@ -1488,7 +1488,7 @@ SYSCALL_DEFINE5(get_mempolicy, int __use
+ 	int uninitialized_var(pval);
+ 	nodemask_t nodes;
+ 
+-	if (nmask != NULL && maxnode < MAX_NUMNODES)
++	if (nmask != NULL && maxnode < nr_node_ids)
+ 		return -EINVAL;
+ 
+ 	err = do_get_mempolicy(&pval, &nodes, addr, flags);
+@@ -1517,7 +1517,7 @@ COMPAT_SYSCALL_DEFINE5(get_mempolicy, in
+ 	unsigned long nr_bits, alloc_size;
+ 	DECLARE_BITMAP(bm, MAX_NUMNODES);
+ 
+-	nr_bits = min_t(unsigned long, maxnode-1, MAX_NUMNODES);
++	nr_bits = min_t(unsigned long, maxnode-1, nr_node_ids);
+ 	alloc_size = ALIGN(nr_bits, BITS_PER_LONG) / 8;
+ 
+ 	if (nmask)
diff --git a/queue-4.9/proc-oom-do-not-report-alien-mms-when-setting-oom_score_adj.patch b/queue-4.9/proc-oom-do-not-report-alien-mms-when-setting-oom_score_adj.patch
new file mode 100644
index 00000000000..212f43b01b5
--- /dev/null
+++ b/queue-4.9/proc-oom-do-not-report-alien-mms-when-setting-oom_score_adj.patch
@@ -0,0 +1,56 @@
+From b2b469939e93458753cfbf8282ad52636495965e Mon Sep 17 00:00:00 2001
+From: Michal Hocko <mhocko@suse.com>
+Date: Wed, 20 Feb 2019 22:19:42 -0800
+Subject: proc, oom: do not report alien mms when setting oom_score_adj
+
+From: Michal Hocko <mhocko@suse.com>
+
+commit b2b469939e93458753cfbf8282ad52636495965e upstream.
+
+Tetsuo has reported that creating a thousands of processes sharing MM
+without SIGHAND (aka alien threads) and setting
+/proc/<pid>/oom_score_adj will swamp the kernel log and takes ages [1]
+to finish.  This is especially worrisome that all that printing is done
+under RCU lock and this can potentially trigger RCU stall or softlockup
+detector.
+
+The primary reason for the printk was to catch potential users who might
+depend on the behavior prior to 44a70adec910 ("mm, oom_adj: make sure
+processes sharing mm have same view of oom_score_adj") but after more
+than 2 years without a single report I guess it is safe to simply remove
+the printk altogether.
+
+The next step should be moving oom_score_adj over to the mm struct and
+remove all the tasks crawling as suggested by [2]
+
+[1] http://lkml.kernel.org/r/97fce864-6f75-bca5-14bc-12c9f890e740@i-love.sakura.ne.jp
+[2] http://lkml.kernel.org/r/20190117155159.GA4087@dhcp22.suse.cz
+
+Link: http://lkml.kernel.org/r/20190212102129.26288-1-mhocko@kernel.org
+Signed-off-by: Michal Hocko <mhocko@suse.com>
+Reported-by: Tetsuo Handa <penguin-kernel@i-love.sakura.ne.jp>
+Acked-by: Johannes Weiner <hannes@cmpxchg.org>
+Cc: David Rientjes <rientjes@google.com>
+Cc: Yong-Taek Lee <ytk.lee@samsung.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/proc/base.c |    4 ----
+ 1 file changed, 4 deletions(-)
+
+--- a/fs/proc/base.c
++++ b/fs/proc/base.c
+@@ -1134,10 +1134,6 @@ static int __set_oom_adj(struct file *fi
+ 
+ 			task_lock(p);
+ 			if (!p->vfork_done && process_shares_mm(p, mm)) {
+-				pr_info("updating oom_score_adj for %d (%s) from %d to %d because it shares mm with %d (%s). Report if this is unexpected.\n",
+-						task_pid_nr(p), p->comm,
+-						p->signal->oom_score_adj, oom_adj,
+-						task_pid_nr(task), task->comm);
+ 				p->signal->oom_score_adj = oom_adj;
+ 				if (!legacy && has_capability_noaudit(current, CAP_SYS_RESOURCE))
+ 					p->signal->oom_score_adj_min = (short)oom_adj;
diff --git a/queue-4.9/series b/queue-4.9/series
index e69de29bb2d..11acaed6424 100644
--- a/queue-4.9/series
+++ b/queue-4.9/series
@@ -0,0 +1,6 @@
+mac80211-free-mpath-object-when-rhashtable-insertion-fails.patch
+libceph-handle-an-empty-authorize-reply.patch
+ceph-avoid-repeatedly-adding-inode-to-mdsc-snap_flush_list.patch
+numa-change-get_mempolicy-to-use-nr_node_ids-instead-of-max_numnodes.patch
+proc-oom-do-not-report-alien-mms-when-setting-oom_score_adj.patch
+keys-allow-reaching-the-keys-quotas-exactly.patch
-- 
2.39.5