--- /dev/null
+From d7852fbd0f0423937fa287a598bfde188bb68c22 Mon Sep 17 00:00:00 2001
+From: Linus Torvalds <torvalds@linux-foundation.org>
+Date: Thu, 11 Jul 2019 09:54:40 -0700
+Subject: access: avoid the RCU grace period for the temporary subjective credentials
+
+From: Linus Torvalds <torvalds@linux-foundation.org>
+
+commit d7852fbd0f0423937fa287a598bfde188bb68c22 upstream.
+
+It turns out that 'access()' (and 'faccessat()') can cause a lot of RCU
+work because it installs a temporary credential that gets allocated and
+freed for each system call.
+
+The allocation and freeing overhead is mostly benign, but because
+credentials can be accessed under the RCU read lock, the freeing
+involves a RCU grace period.
+
+Which is not a huge deal normally, but if you have a lot of access()
+calls, this causes a fair amount of seconday damage: instead of having a
+nice alloc/free patterns that hits in hot per-CPU slab caches, you have
+all those delayed free's, and on big machines with hundreds of cores,
+the RCU overhead can end up being enormous.
+
+But it turns out that all of this is entirely unnecessary. Exactly
+because access() only installs the credential as the thread-local
+subjective credential, the temporary cred pointer doesn't actually need
+to be RCU free'd at all. Once we're done using it, we can just free it
+synchronously and avoid all the RCU overhead.
+
+So add a 'non_rcu' flag to 'struct cred', which can be set by users that
+know they only use it in non-RCU context (there are other potential
+users for this). We can make it a union with the rcu freeing list head
+that we need for the RCU case, so this doesn't need any extra storage.
+
+Note that this also makes 'get_current_cred()' clear the new non_rcu
+flag, in case we have filesystems that take a long-term reference to the
+cred and then expect the RCU delayed freeing afterwards. It's not
+entirely clear that this is required, but it makes for clear semantics:
+the subjective cred remains non-RCU as long as you only access it
+synchronously using the thread-local accessors, but you _can_ use it as
+a generic cred if you want to.
+
+It is possible that we should just remove the whole RCU markings for
+->cred entirely. Only ->real_cred is really supposed to be accessed
+through RCU, and the long-term cred copies that nfs uses might want to
+explicitly re-enable RCU freeing if required, rather than have
+get_current_cred() do it implicitly.
+
+But this is a "minimal semantic changes" change for the immediate
+problem.
+
+Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Acked-by: Eric Dumazet <edumazet@google.com>
+Acked-by: Paul E. McKenney <paulmck@linux.ibm.com>
+Cc: Oleg Nesterov <oleg@redhat.com>
+Cc: Jan Glauber <jglauber@marvell.com>
+Cc: Jiri Kosina <jikos@kernel.org>
+Cc: Jayachandran Chandrasekharan Nair <jnair@marvell.com>
+Cc: Greg KH <greg@kroah.com>
+Cc: Kees Cook <keescook@chromium.org>
+Cc: David Howells <dhowells@redhat.com>
+Cc: Miklos Szeredi <miklos@szeredi.hu>
+Cc: Al Viro <viro@zeniv.linux.org.uk>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/open.c | 19 +++++++++++++++++++
+ include/linux/cred.h | 7 ++++++-
+ kernel/cred.c | 21 +++++++++++++++++++--
+ 3 files changed, 44 insertions(+), 3 deletions(-)
+
+--- a/fs/open.c
++++ b/fs/open.c
+@@ -379,6 +379,25 @@ SYSCALL_DEFINE3(faccessat, int, dfd, con
+ override_cred->cap_permitted;
+ }
+
++ /*
++ * The new set of credentials can *only* be used in
++ * task-synchronous circumstances, and does not need
++ * RCU freeing, unless somebody then takes a separate
++ * reference to it.
++ *
++ * NOTE! This is _only_ true because this credential
++ * is used purely for override_creds() that installs
++ * it as the subjective cred. Other threads will be
++ * accessing ->real_cred, not the subjective cred.
++ *
++ * If somebody _does_ make a copy of this (using the
++ * 'get_current_cred()' function), that will clear the
++ * non_rcu field, because now that other user may be
++ * expecting RCU freeing. But normal thread-synchronous
++ * cred accesses will keep things non-RCY.
++ */
++ override_cred->non_rcu = 1;
++
+ old_cred = override_creds(override_cred);
+ retry:
+ res = user_path_at(dfd, filename, lookup_flags, &path);
+--- a/include/linux/cred.h
++++ b/include/linux/cred.h
+@@ -145,7 +145,11 @@ struct cred {
+ struct user_struct *user; /* real user ID subscription */
+ struct user_namespace *user_ns; /* user_ns the caps and keyrings are relative to. */
+ struct group_info *group_info; /* supplementary groups for euid/fsgid */
+- struct rcu_head rcu; /* RCU deletion hook */
++ /* RCU deletion */
++ union {
++ int non_rcu; /* Can we skip RCU deletion? */
++ struct rcu_head rcu; /* RCU deletion hook */
++ };
+ } __randomize_layout;
+
+ extern void __put_cred(struct cred *);
+@@ -243,6 +247,7 @@ static inline const struct cred *get_cre
+ {
+ struct cred *nonconst_cred = (struct cred *) cred;
+ validate_creds(cred);
++ nonconst_cred->non_rcu = 0;
+ return get_new_cred(nonconst_cred);
+ }
+
+--- a/kernel/cred.c
++++ b/kernel/cred.c
+@@ -147,7 +147,10 @@ void __put_cred(struct cred *cred)
+ BUG_ON(cred == current->cred);
+ BUG_ON(cred == current->real_cred);
+
+- call_rcu(&cred->rcu, put_cred_rcu);
++ if (cred->non_rcu)
++ put_cred_rcu(&cred->rcu);
++ else
++ call_rcu(&cred->rcu, put_cred_rcu);
+ }
+ EXPORT_SYMBOL(__put_cred);
+
+@@ -258,6 +261,7 @@ struct cred *prepare_creds(void)
+ old = task->cred;
+ memcpy(new, old, sizeof(struct cred));
+
++ new->non_rcu = 0;
+ atomic_set(&new->usage, 1);
+ set_cred_subscribers(new, 0);
+ get_group_info(new->group_info);
+@@ -537,7 +541,19 @@ const struct cred *override_creds(const
+
+ validate_creds(old);
+ validate_creds(new);
+- get_cred(new);
++
++ /*
++ * NOTE! This uses 'get_new_cred()' rather than 'get_cred()'.
++ *
++ * That means that we do not clear the 'non_rcu' flag, since
++ * we are only installing the cred into the thread-synchronous
++ * '->cred' pointer, not the '->real_cred' pointer that is
++ * visible to other threads under RCU.
++ *
++ * Also note that we did validate_creds() manually, not depending
++ * on the validation in 'get_cred()'.
++ */
++ get_new_cred((struct cred *)new);
+ alter_cred_subscribers(new, 1);
+ rcu_assign_pointer(current->cred, new);
+ alter_cred_subscribers(old, -1);
+@@ -620,6 +636,7 @@ struct cred *prepare_kernel_cred(struct
+ validate_creds(old);
+
+ *new = *old;
++ new->non_rcu = 0;
+ atomic_set(&new->usage, 1);
+ set_cred_subscribers(new, 0);
+ get_uid(new->user);