From 126fdf42fe683cb3250df5f92b6ec39e5340ec6b Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 29 Jul 2019 20:02:45 +0200 Subject: [PATCH] 4.14-stable patches added patches: access-avoid-the-rcu-grace-period-for-the-temporary-subjective-credentials.patch --- ...the-temporary-subjective-credentials.patch | 174 ++++++++++++++++++ queue-4.14/series | 1 + 2 files changed, 175 insertions(+) create mode 100644 queue-4.14/access-avoid-the-rcu-grace-period-for-the-temporary-subjective-credentials.patch diff --git a/queue-4.14/access-avoid-the-rcu-grace-period-for-the-temporary-subjective-credentials.patch b/queue-4.14/access-avoid-the-rcu-grace-period-for-the-temporary-subjective-credentials.patch new file mode 100644 index 00000000000..8681caaae53 --- /dev/null +++ b/queue-4.14/access-avoid-the-rcu-grace-period-for-the-temporary-subjective-credentials.patch @@ -0,0 +1,174 @@ +From d7852fbd0f0423937fa287a598bfde188bb68c22 Mon Sep 17 00:00:00 2001 +From: Linus Torvalds +Date: Thu, 11 Jul 2019 09:54:40 -0700 +Subject: access: avoid the RCU grace period for the temporary subjective credentials + +From: Linus Torvalds + +commit d7852fbd0f0423937fa287a598bfde188bb68c22 upstream. + +It turns out that 'access()' (and 'faccessat()') can cause a lot of RCU +work because it installs a temporary credential that gets allocated and +freed for each system call. + +The allocation and freeing overhead is mostly benign, but because +credentials can be accessed under the RCU read lock, the freeing +involves a RCU grace period. + +Which is not a huge deal normally, but if you have a lot of access() +calls, this causes a fair amount of seconday damage: instead of having a +nice alloc/free patterns that hits in hot per-CPU slab caches, you have +all those delayed free's, and on big machines with hundreds of cores, +the RCU overhead can end up being enormous. + +But it turns out that all of this is entirely unnecessary. Exactly +because access() only installs the credential as the thread-local +subjective credential, the temporary cred pointer doesn't actually need +to be RCU free'd at all. Once we're done using it, we can just free it +synchronously and avoid all the RCU overhead. + +So add a 'non_rcu' flag to 'struct cred', which can be set by users that +know they only use it in non-RCU context (there are other potential +users for this). We can make it a union with the rcu freeing list head +that we need for the RCU case, so this doesn't need any extra storage. + +Note that this also makes 'get_current_cred()' clear the new non_rcu +flag, in case we have filesystems that take a long-term reference to the +cred and then expect the RCU delayed freeing afterwards. It's not +entirely clear that this is required, but it makes for clear semantics: +the subjective cred remains non-RCU as long as you only access it +synchronously using the thread-local accessors, but you _can_ use it as +a generic cred if you want to. + +It is possible that we should just remove the whole RCU markings for +->cred entirely. Only ->real_cred is really supposed to be accessed +through RCU, and the long-term cred copies that nfs uses might want to +explicitly re-enable RCU freeing if required, rather than have +get_current_cred() do it implicitly. + +But this is a "minimal semantic changes" change for the immediate +problem. + +Acked-by: Peter Zijlstra (Intel) +Acked-by: Eric Dumazet +Acked-by: Paul E. McKenney +Cc: Oleg Nesterov +Cc: Jan Glauber +Cc: Jiri Kosina +Cc: Jayachandran Chandrasekharan Nair +Cc: Greg KH +Cc: Kees Cook +Cc: David Howells +Cc: Miklos Szeredi +Cc: Al Viro +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + fs/open.c | 19 +++++++++++++++++++ + include/linux/cred.h | 7 ++++++- + kernel/cred.c | 21 +++++++++++++++++++-- + 3 files changed, 44 insertions(+), 3 deletions(-) + +--- a/fs/open.c ++++ b/fs/open.c +@@ -379,6 +379,25 @@ SYSCALL_DEFINE3(faccessat, int, dfd, con + override_cred->cap_permitted; + } + ++ /* ++ * The new set of credentials can *only* be used in ++ * task-synchronous circumstances, and does not need ++ * RCU freeing, unless somebody then takes a separate ++ * reference to it. ++ * ++ * NOTE! This is _only_ true because this credential ++ * is used purely for override_creds() that installs ++ * it as the subjective cred. Other threads will be ++ * accessing ->real_cred, not the subjective cred. ++ * ++ * If somebody _does_ make a copy of this (using the ++ * 'get_current_cred()' function), that will clear the ++ * non_rcu field, because now that other user may be ++ * expecting RCU freeing. But normal thread-synchronous ++ * cred accesses will keep things non-RCY. ++ */ ++ override_cred->non_rcu = 1; ++ + old_cred = override_creds(override_cred); + retry: + res = user_path_at(dfd, filename, lookup_flags, &path); +--- a/include/linux/cred.h ++++ b/include/linux/cred.h +@@ -145,7 +145,11 @@ struct cred { + struct user_struct *user; /* real user ID subscription */ + struct user_namespace *user_ns; /* user_ns the caps and keyrings are relative to. */ + struct group_info *group_info; /* supplementary groups for euid/fsgid */ +- struct rcu_head rcu; /* RCU deletion hook */ ++ /* RCU deletion */ ++ union { ++ int non_rcu; /* Can we skip RCU deletion? */ ++ struct rcu_head rcu; /* RCU deletion hook */ ++ }; + } __randomize_layout; + + extern void __put_cred(struct cred *); +@@ -243,6 +247,7 @@ static inline const struct cred *get_cre + { + struct cred *nonconst_cred = (struct cred *) cred; + validate_creds(cred); ++ nonconst_cred->non_rcu = 0; + return get_new_cred(nonconst_cred); + } + +--- a/kernel/cred.c ++++ b/kernel/cred.c +@@ -147,7 +147,10 @@ void __put_cred(struct cred *cred) + BUG_ON(cred == current->cred); + BUG_ON(cred == current->real_cred); + +- call_rcu(&cred->rcu, put_cred_rcu); ++ if (cred->non_rcu) ++ put_cred_rcu(&cred->rcu); ++ else ++ call_rcu(&cred->rcu, put_cred_rcu); + } + EXPORT_SYMBOL(__put_cred); + +@@ -258,6 +261,7 @@ struct cred *prepare_creds(void) + old = task->cred; + memcpy(new, old, sizeof(struct cred)); + ++ new->non_rcu = 0; + atomic_set(&new->usage, 1); + set_cred_subscribers(new, 0); + get_group_info(new->group_info); +@@ -537,7 +541,19 @@ const struct cred *override_creds(const + + validate_creds(old); + validate_creds(new); +- get_cred(new); ++ ++ /* ++ * NOTE! This uses 'get_new_cred()' rather than 'get_cred()'. ++ * ++ * That means that we do not clear the 'non_rcu' flag, since ++ * we are only installing the cred into the thread-synchronous ++ * '->cred' pointer, not the '->real_cred' pointer that is ++ * visible to other threads under RCU. ++ * ++ * Also note that we did validate_creds() manually, not depending ++ * on the validation in 'get_cred()'. ++ */ ++ get_new_cred((struct cred *)new); + alter_cred_subscribers(new, 1); + rcu_assign_pointer(current->cred, new); + alter_cred_subscribers(old, -1); +@@ -620,6 +636,7 @@ struct cred *prepare_kernel_cred(struct + validate_creds(old); + + *new = *old; ++ new->non_rcu = 0; + atomic_set(&new->usage, 1); + set_cred_subscribers(new, 0); + get_uid(new->user); diff --git a/queue-4.14/series b/queue-4.14/series index 39d88f46a8c..9a57be70fb8 100644 --- a/queue-4.14/series +++ b/queue-4.14/series @@ -290,3 +290,4 @@ alsa-line6-fix-wrong-altsetting-for-line6_podhd500_1.patch alsa-hda-add-a-conexant-codec-entry-to-let-mute-led-work.patch powerpc-xive-fix-loop-exit-condition-in-xive_find_target_in_mask.patch powerpc-tm-fix-oops-on-sigreturn-on-systems-without-tm.patch +access-avoid-the-rcu-grace-period-for-the-temporary-subjective-credentials.patch -- 2.47.3