From e55a062096b4aea9c3f1d0b84fdd06facb444e58 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sat, 23 Feb 2019 12:11:30 +0100 Subject: [PATCH] 4.9-stable patches added patches: ceph-avoid-repeatedly-adding-inode-to-mdsc-snap_flush_list.patch keys-allow-reaching-the-keys-quotas-exactly.patch libceph-handle-an-empty-authorize-reply.patch mac80211-free-mpath-object-when-rhashtable-insertion-fails.patch numa-change-get_mempolicy-to-use-nr_node_ids-instead-of-max_numnodes.patch proc-oom-do-not-report-alien-mms-when-setting-oom_score_adj.patch --- ...adding-inode-to-mdsc-snap_flush_list.patch | 33 +++++++++ ...low-reaching-the-keys-quotas-exactly.patch | 43 +++++++++++ ...ceph-handle-an-empty-authorize-reply.patch | 74 +++++++++++++++++++ ...ject-when-rhashtable-insertion-fails.patch | 57 ++++++++++++++ ...-nr_node_ids-instead-of-max_numnodes.patch | 71 ++++++++++++++++++ ...alien-mms-when-setting-oom_score_adj.patch | 56 ++++++++++++++ queue-4.9/series | 6 ++ 7 files changed, 340 insertions(+) create mode 100644 queue-4.9/ceph-avoid-repeatedly-adding-inode-to-mdsc-snap_flush_list.patch create mode 100644 queue-4.9/keys-allow-reaching-the-keys-quotas-exactly.patch create mode 100644 queue-4.9/libceph-handle-an-empty-authorize-reply.patch create mode 100644 queue-4.9/mac80211-free-mpath-object-when-rhashtable-insertion-fails.patch create mode 100644 queue-4.9/numa-change-get_mempolicy-to-use-nr_node_ids-instead-of-max_numnodes.patch create mode 100644 queue-4.9/proc-oom-do-not-report-alien-mms-when-setting-oom_score_adj.patch diff --git a/queue-4.9/ceph-avoid-repeatedly-adding-inode-to-mdsc-snap_flush_list.patch b/queue-4.9/ceph-avoid-repeatedly-adding-inode-to-mdsc-snap_flush_list.patch new file mode 100644 index 00000000000..c0ccf1c5100 --- /dev/null +++ b/queue-4.9/ceph-avoid-repeatedly-adding-inode-to-mdsc-snap_flush_list.patch @@ -0,0 +1,33 @@ +From 04242ff3ac0abbaa4362f97781dac268e6c3541a Mon Sep 17 00:00:00 2001 +From: "Yan, Zheng" +Date: Mon, 11 Feb 2019 15:18:52 +0800 +Subject: ceph: avoid repeatedly adding inode to mdsc->snap_flush_list + +From: Yan, Zheng + +commit 04242ff3ac0abbaa4362f97781dac268e6c3541a upstream. + +Otherwise, mdsc->snap_flush_list may get corrupted. + +Cc: stable@vger.kernel.org +Signed-off-by: "Yan, Zheng" +Reviewed-by: Ilya Dryomov +Signed-off-by: Ilya Dryomov +Signed-off-by: Greg Kroah-Hartman + +--- + fs/ceph/snap.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/fs/ceph/snap.c ++++ b/fs/ceph/snap.c +@@ -609,7 +609,8 @@ int __ceph_finish_cap_snap(struct ceph_i + capsnap->size); + + spin_lock(&mdsc->snap_flush_lock); +- list_add_tail(&ci->i_snap_flush_item, &mdsc->snap_flush_list); ++ if (list_empty(&ci->i_snap_flush_item)) ++ list_add_tail(&ci->i_snap_flush_item, &mdsc->snap_flush_list); + spin_unlock(&mdsc->snap_flush_lock); + return 1; /* caller may want to ceph_flush_snaps */ + } diff --git a/queue-4.9/keys-allow-reaching-the-keys-quotas-exactly.patch b/queue-4.9/keys-allow-reaching-the-keys-quotas-exactly.patch new file mode 100644 index 00000000000..f5b1707ec51 --- /dev/null +++ b/queue-4.9/keys-allow-reaching-the-keys-quotas-exactly.patch @@ -0,0 +1,43 @@ +From a08bf91ce28ed3ae7b6fef35d843fef8dc8c2cd9 Mon Sep 17 00:00:00 2001 +From: Eric Biggers +Date: Thu, 14 Feb 2019 16:20:01 +0000 +Subject: KEYS: allow reaching the keys quotas exactly + +From: Eric Biggers + +commit a08bf91ce28ed3ae7b6fef35d843fef8dc8c2cd9 upstream. + +If the sysctl 'kernel.keys.maxkeys' is set to some number n, then +actually users can only add up to 'n - 1' keys. Likewise for +'kernel.keys.maxbytes' and the root_* versions of these sysctls. But +these sysctls are apparently supposed to be *maximums*, as per their +names and all documentation I could find -- the keyrings(7) man page, +Documentation/security/keys/core.rst, and all the mentions of EDQUOT +meaning that the key quota was *exceeded* (as opposed to reached). + +Thus, fix the code to allow reaching the quotas exactly. + +Fixes: 0b77f5bfb45c ("keys: make the keyring quotas controllable through /proc/sys") +Cc: stable@vger.kernel.org +Signed-off-by: Eric Biggers +Signed-off-by: David Howells +Signed-off-by: James Morris +Signed-off-by: Greg Kroah-Hartman + +--- + security/keys/key.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/security/keys/key.c ++++ b/security/keys/key.c +@@ -264,8 +264,8 @@ struct key *key_alloc(struct key_type *t + + spin_lock(&user->lock); + if (!(flags & KEY_ALLOC_QUOTA_OVERRUN)) { +- if (user->qnkeys + 1 >= maxkeys || +- user->qnbytes + quotalen >= maxbytes || ++ if (user->qnkeys + 1 > maxkeys || ++ user->qnbytes + quotalen > maxbytes || + user->qnbytes + quotalen < user->qnbytes) + goto no_quota; + } diff --git a/queue-4.9/libceph-handle-an-empty-authorize-reply.patch b/queue-4.9/libceph-handle-an-empty-authorize-reply.patch new file mode 100644 index 00000000000..a0598affdaf --- /dev/null +++ b/queue-4.9/libceph-handle-an-empty-authorize-reply.patch @@ -0,0 +1,74 @@ +From 0fd3fd0a9bb0b02b6435bb7070e9f7b82a23f068 Mon Sep 17 00:00:00 2001 +From: Ilya Dryomov +Date: Tue, 5 Feb 2019 20:30:27 +0100 +Subject: libceph: handle an empty authorize reply + +From: Ilya Dryomov + +commit 0fd3fd0a9bb0b02b6435bb7070e9f7b82a23f068 upstream. + +The authorize reply can be empty, for example when the ticket used to +build the authorizer is too old and TAG_BADAUTHORIZER is returned from +the service. Calling ->verify_authorizer_reply() results in an attempt +to decrypt and validate (somewhat) random data in au->buf (most likely +the signature block from calc_signature()), which fails and ends up in +con_fault_finish() with !con->auth_retry. The ticket isn't invalidated +and the connection is retried again and again until a new ticket is +obtained from the monitor: + + libceph: osd2 192.168.122.1:6809 bad authorize reply + libceph: osd2 192.168.122.1:6809 bad authorize reply + libceph: osd2 192.168.122.1:6809 bad authorize reply + libceph: osd2 192.168.122.1:6809 bad authorize reply + +Let TAG_BADAUTHORIZER handler kick in and increment con->auth_retry. + +Cc: stable@vger.kernel.org +Fixes: 5c056fdc5b47 ("libceph: verify authorize reply on connect") +Link: https://tracker.ceph.com/issues/20164 +Signed-off-by: Ilya Dryomov +Reviewed-by: Sage Weil +Signed-off-by: Greg Kroah-Hartman + +--- + net/ceph/messenger.c | 15 +++++++++------ + 1 file changed, 9 insertions(+), 6 deletions(-) + +--- a/net/ceph/messenger.c ++++ b/net/ceph/messenger.c +@@ -2042,6 +2042,8 @@ static int process_connect(struct ceph_c + dout("process_connect on %p tag %d\n", con, (int)con->in_tag); + + if (con->auth) { ++ int len = le32_to_cpu(con->in_reply.authorizer_len); ++ + /* + * Any connection that defines ->get_authorizer() + * should also define ->add_authorizer_challenge() and +@@ -2051,8 +2053,7 @@ static int process_connect(struct ceph_c + */ + if (con->in_reply.tag == CEPH_MSGR_TAG_CHALLENGE_AUTHORIZER) { + ret = con->ops->add_authorizer_challenge( +- con, con->auth->authorizer_reply_buf, +- le32_to_cpu(con->in_reply.authorizer_len)); ++ con, con->auth->authorizer_reply_buf, len); + if (ret < 0) + return ret; + +@@ -2062,10 +2063,12 @@ static int process_connect(struct ceph_c + return 0; + } + +- ret = con->ops->verify_authorizer_reply(con); +- if (ret < 0) { +- con->error_msg = "bad authorize reply"; +- return ret; ++ if (len) { ++ ret = con->ops->verify_authorizer_reply(con); ++ if (ret < 0) { ++ con->error_msg = "bad authorize reply"; ++ return ret; ++ } + } + } + diff --git a/queue-4.9/mac80211-free-mpath-object-when-rhashtable-insertion-fails.patch b/queue-4.9/mac80211-free-mpath-object-when-rhashtable-insertion-fails.patch new file mode 100644 index 00000000000..d6fc83b933e --- /dev/null +++ b/queue-4.9/mac80211-free-mpath-object-when-rhashtable-insertion-fails.patch @@ -0,0 +1,57 @@ +From 4ff3a9d14c6c06eaa4e5976c61599ea2bd9e81b2 Mon Sep 17 00:00:00 2001 +From: Herbert Xu +Date: Thu, 14 Feb 2019 22:03:25 +0800 +Subject: mac80211: Free mpath object when rhashtable insertion fails + +From: Herbert Xu + +commit 4ff3a9d14c6c06eaa4e5976c61599ea2bd9e81b2 upstream. + +When rhashtable insertion fails the mesh table code doesn't free +the now-orphan mesh path object. This patch fixes that. + +Cc: stable@vger.kernel.org +Signed-off-by: Herbert Xu +Signed-off-by: Johannes Berg +Signed-off-by: Greg Kroah-Hartman + +--- + net/mac80211/mesh_pathtbl.c | 17 +++++++++-------- + 1 file changed, 9 insertions(+), 8 deletions(-) + +--- a/net/mac80211/mesh_pathtbl.c ++++ b/net/mac80211/mesh_pathtbl.c +@@ -449,17 +449,15 @@ struct mesh_path *mesh_path_add(struct i + + } while (unlikely(ret == -EEXIST && !mpath)); + +- if (ret && ret != -EEXIST) +- return ERR_PTR(ret); +- +- /* At this point either new_mpath was added, or we found a +- * matching entry already in the table; in the latter case +- * free the unnecessary new entry. +- */ +- if (ret == -EEXIST) { ++ if (ret) { + kfree(new_mpath); ++ ++ if (ret != -EEXIST) ++ return ERR_PTR(ret); ++ + new_mpath = mpath; + } ++ + sdata->u.mesh.mesh_paths_generation++; + return new_mpath; + } +@@ -489,6 +487,9 @@ int mpp_path_add(struct ieee80211_sub_if + &new_mpath->rhash, + mesh_rht_params); + ++ if (ret) ++ kfree(new_mpath); ++ + sdata->u.mesh.mpp_paths_generation++; + return ret; + } diff --git a/queue-4.9/numa-change-get_mempolicy-to-use-nr_node_ids-instead-of-max_numnodes.patch b/queue-4.9/numa-change-get_mempolicy-to-use-nr_node_ids-instead-of-max_numnodes.patch new file mode 100644 index 00000000000..0a826b79fe6 --- /dev/null +++ b/queue-4.9/numa-change-get_mempolicy-to-use-nr_node_ids-instead-of-max_numnodes.patch @@ -0,0 +1,71 @@ +From 050c17f239fd53adb55aa768d4f41bc76c0fe045 Mon Sep 17 00:00:00 2001 +From: Ralph Campbell +Date: Wed, 20 Feb 2019 22:18:58 -0800 +Subject: numa: change get_mempolicy() to use nr_node_ids instead of MAX_NUMNODES + +From: Ralph Campbell + +commit 050c17f239fd53adb55aa768d4f41bc76c0fe045 upstream. + +The system call, get_mempolicy() [1], passes an unsigned long *nodemask +pointer and an unsigned long maxnode argument which specifies the length +of the user's nodemask array in bits (which is rounded up). The manual +page says that if the maxnode value is too small, get_mempolicy will +return EINVAL but there is no system call to return this minimum value. +To determine this value, some programs search /proc//status for a +line starting with "Mems_allowed:" and use the number of digits in the +mask to determine the minimum value. A recent change to the way this line +is formatted [2] causes these programs to compute a value less than +MAX_NUMNODES so get_mempolicy() returns EINVAL. + +Change get_mempolicy(), the older compat version of get_mempolicy(), and +the copy_nodes_to_user() function to use nr_node_ids instead of +MAX_NUMNODES, thus preserving the defacto method of computing the minimum +size for the nodemask array and the maxnode argument. + +[1] http://man7.org/linux/man-pages/man2/get_mempolicy.2.html +[2] https://lore.kernel.org/lkml/1545405631-6808-1-git-send-email-longman@redhat.com + +Link: http://lkml.kernel.org/r/20190211180245.22295-1-rcampbell@nvidia.com +Fixes: 4fb8e5b89bcbbbb ("include/linux/nodemask.h: use nr_node_ids (not MAX_NUMNODES) in __nodemask_pr_numnodes()") +Signed-off-by: Ralph Campbell +Suggested-by: Alexander Duyck +Cc: Waiman Long +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + mm/mempolicy.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +--- a/mm/mempolicy.c ++++ b/mm/mempolicy.c +@@ -1327,7 +1327,7 @@ static int copy_nodes_to_user(unsigned l + nodemask_t *nodes) + { + unsigned long copy = ALIGN(maxnode-1, 64) / 8; +- const int nbytes = BITS_TO_LONGS(MAX_NUMNODES) * sizeof(long); ++ unsigned int nbytes = BITS_TO_LONGS(nr_node_ids) * sizeof(long); + + if (copy > nbytes) { + if (copy > PAGE_SIZE) +@@ -1488,7 +1488,7 @@ SYSCALL_DEFINE5(get_mempolicy, int __use + int uninitialized_var(pval); + nodemask_t nodes; + +- if (nmask != NULL && maxnode < MAX_NUMNODES) ++ if (nmask != NULL && maxnode < nr_node_ids) + return -EINVAL; + + err = do_get_mempolicy(&pval, &nodes, addr, flags); +@@ -1517,7 +1517,7 @@ COMPAT_SYSCALL_DEFINE5(get_mempolicy, in + unsigned long nr_bits, alloc_size; + DECLARE_BITMAP(bm, MAX_NUMNODES); + +- nr_bits = min_t(unsigned long, maxnode-1, MAX_NUMNODES); ++ nr_bits = min_t(unsigned long, maxnode-1, nr_node_ids); + alloc_size = ALIGN(nr_bits, BITS_PER_LONG) / 8; + + if (nmask) diff --git a/queue-4.9/proc-oom-do-not-report-alien-mms-when-setting-oom_score_adj.patch b/queue-4.9/proc-oom-do-not-report-alien-mms-when-setting-oom_score_adj.patch new file mode 100644 index 00000000000..212f43b01b5 --- /dev/null +++ b/queue-4.9/proc-oom-do-not-report-alien-mms-when-setting-oom_score_adj.patch @@ -0,0 +1,56 @@ +From b2b469939e93458753cfbf8282ad52636495965e Mon Sep 17 00:00:00 2001 +From: Michal Hocko +Date: Wed, 20 Feb 2019 22:19:42 -0800 +Subject: proc, oom: do not report alien mms when setting oom_score_adj + +From: Michal Hocko + +commit b2b469939e93458753cfbf8282ad52636495965e upstream. + +Tetsuo has reported that creating a thousands of processes sharing MM +without SIGHAND (aka alien threads) and setting +/proc//oom_score_adj will swamp the kernel log and takes ages [1] +to finish. This is especially worrisome that all that printing is done +under RCU lock and this can potentially trigger RCU stall or softlockup +detector. + +The primary reason for the printk was to catch potential users who might +depend on the behavior prior to 44a70adec910 ("mm, oom_adj: make sure +processes sharing mm have same view of oom_score_adj") but after more +than 2 years without a single report I guess it is safe to simply remove +the printk altogether. + +The next step should be moving oom_score_adj over to the mm struct and +remove all the tasks crawling as suggested by [2] + +[1] http://lkml.kernel.org/r/97fce864-6f75-bca5-14bc-12c9f890e740@i-love.sakura.ne.jp +[2] http://lkml.kernel.org/r/20190117155159.GA4087@dhcp22.suse.cz + +Link: http://lkml.kernel.org/r/20190212102129.26288-1-mhocko@kernel.org +Signed-off-by: Michal Hocko +Reported-by: Tetsuo Handa +Acked-by: Johannes Weiner +Cc: David Rientjes +Cc: Yong-Taek Lee +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + fs/proc/base.c | 4 ---- + 1 file changed, 4 deletions(-) + +--- a/fs/proc/base.c ++++ b/fs/proc/base.c +@@ -1134,10 +1134,6 @@ static int __set_oom_adj(struct file *fi + + task_lock(p); + if (!p->vfork_done && process_shares_mm(p, mm)) { +- pr_info("updating oom_score_adj for %d (%s) from %d to %d because it shares mm with %d (%s). Report if this is unexpected.\n", +- task_pid_nr(p), p->comm, +- p->signal->oom_score_adj, oom_adj, +- task_pid_nr(task), task->comm); + p->signal->oom_score_adj = oom_adj; + if (!legacy && has_capability_noaudit(current, CAP_SYS_RESOURCE)) + p->signal->oom_score_adj_min = (short)oom_adj; diff --git a/queue-4.9/series b/queue-4.9/series index e69de29bb2d..11acaed6424 100644 --- a/queue-4.9/series +++ b/queue-4.9/series @@ -0,0 +1,6 @@ +mac80211-free-mpath-object-when-rhashtable-insertion-fails.patch +libceph-handle-an-empty-authorize-reply.patch +ceph-avoid-repeatedly-adding-inode-to-mdsc-snap_flush_list.patch +numa-change-get_mempolicy-to-use-nr_node_ids-instead-of-max_numnodes.patch +proc-oom-do-not-report-alien-mms-when-setting-oom_score_adj.patch +keys-allow-reaching-the-keys-quotas-exactly.patch -- 2.39.5