]>
Commit | Line | Data |
---|---|---|
7f85ba28 GKH |
1 | From 5467a68cbf6884c9a9d91e2a89140afb1839c835 Mon Sep 17 00:00:00 2001 |
2 | From: Al Viro <viro@zeniv.linux.org.uk> | |
3 | Date: Fri, 15 Mar 2019 22:23:19 -0400 | |
4 | Subject: dcache: sort the freeing-without-RCU-delay mess for good. | |
5 | ||
6 | From: Al Viro <viro@zeniv.linux.org.uk> | |
7 | ||
8 | commit 5467a68cbf6884c9a9d91e2a89140afb1839c835 upstream. | |
9 | ||
10 | For lockless accesses to dentries we don't have pinned we rely | |
11 | (among other things) upon having an RCU delay between dropping | |
12 | the last reference and actually freeing the memory. | |
13 | ||
14 | On the other hand, for things like pipes and sockets we neither | |
15 | do that kind of lockless access, nor want to deal with the | |
16 | overhead of an RCU delay every time a socket gets closed. | |
17 | ||
18 | So delay was made optional - setting DCACHE_RCUACCESS in ->d_flags | |
19 | made sure it would happen. We tried to avoid setting it unless | |
20 | we knew we need it. Unfortunately, that had led to recurring | |
21 | class of bugs, in which we missed the need to set it. | |
22 | ||
23 | We only really need it for dentries that are created by | |
24 | d_alloc_pseudo(), so let's not bother with trying to be smart - | |
25 | just make having an RCU delay the default. The ones that do | |
26 | *not* get it set the replacement flag (DCACHE_NORCU) and we'd | |
27 | better use that sparingly. d_alloc_pseudo() is the only | |
28 | such user right now. | |
29 | ||
30 | FWIW, the race that finally prompted that switch had been | |
31 | between __lock_parent() of immediate subdirectory of what's | |
32 | currently the root of a disconnected tree (e.g. from | |
33 | open-by-handle in progress) racing with d_splice_alias() | |
34 | elsewhere picking another alias for the same inode, either | |
35 | on outright corrupted fs image, or (in case of open-by-handle | |
36 | on NFS) that subdirectory having been just moved on server. | |
37 | It's not easy to hit, so the sky is not falling, but that's | |
38 | not the first race on similar missed cases and the logics | |
39 | for settinf DCACHE_RCUACCESS has gotten ridiculously | |
40 | convoluted. | |
41 | ||
42 | Cc: stable@vger.kernel.org | |
43 | Signed-off-by: Al Viro <viro@zeniv.linux.org.uk> | |
44 | Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> | |
45 | ||
46 | --- | |
47 | Documentation/filesystems/porting | 5 +++++ | |
48 | fs/dcache.c | 24 +++++++++++++----------- | |
49 | fs/nsfs.c | 3 +-- | |
50 | include/linux/dcache.h | 2 +- | |
51 | 4 files changed, 20 insertions(+), 14 deletions(-) | |
52 | ||
53 | --- a/Documentation/filesystems/porting | |
54 | +++ b/Documentation/filesystems/porting | |
55 | @@ -622,3 +622,8 @@ in your dentry operations instead. | |
56 | alloc_file_clone(file, flags, ops) does not affect any caller's references. | |
57 | On success you get a new struct file sharing the mount/dentry with the | |
58 | original, on failure - ERR_PTR(). | |
59 | +-- | |
60 | +[mandatory] | |
61 | + DCACHE_RCUACCESS is gone; having an RCU delay on dentry freeing is the | |
62 | + default. DCACHE_NORCU opts out, and only d_alloc_pseudo() has any | |
63 | + business doing so. | |
64 | --- a/fs/dcache.c | |
65 | +++ b/fs/dcache.c | |
66 | @@ -344,7 +344,7 @@ static void dentry_free(struct dentry *d | |
67 | } | |
68 | } | |
69 | /* if dentry was never visible to RCU, immediate free is OK */ | |
70 | - if (!(dentry->d_flags & DCACHE_RCUACCESS)) | |
71 | + if (dentry->d_flags & DCACHE_NORCU) | |
72 | __d_free(&dentry->d_u.d_rcu); | |
73 | else | |
74 | call_rcu(&dentry->d_u.d_rcu, __d_free); | |
75 | @@ -1694,7 +1694,6 @@ struct dentry *d_alloc(struct dentry * p | |
76 | struct dentry *dentry = __d_alloc(parent->d_sb, name); | |
77 | if (!dentry) | |
78 | return NULL; | |
79 | - dentry->d_flags |= DCACHE_RCUACCESS; | |
80 | spin_lock(&parent->d_lock); | |
81 | /* | |
82 | * don't need child lock because it is not subject | |
83 | @@ -1719,7 +1718,7 @@ struct dentry *d_alloc_cursor(struct den | |
84 | { | |
85 | struct dentry *dentry = d_alloc_anon(parent->d_sb); | |
86 | if (dentry) { | |
87 | - dentry->d_flags |= DCACHE_RCUACCESS | DCACHE_DENTRY_CURSOR; | |
88 | + dentry->d_flags |= DCACHE_DENTRY_CURSOR; | |
89 | dentry->d_parent = dget(parent); | |
90 | } | |
91 | return dentry; | |
92 | @@ -1732,10 +1731,17 @@ struct dentry *d_alloc_cursor(struct den | |
93 | * | |
94 | * For a filesystem that just pins its dentries in memory and never | |
95 | * performs lookups at all, return an unhashed IS_ROOT dentry. | |
96 | + * This is used for pipes, sockets et.al. - the stuff that should | |
97 | + * never be anyone's children or parents. Unlike all other | |
98 | + * dentries, these will not have RCU delay between dropping the | |
99 | + * last reference and freeing them. | |
100 | */ | |
101 | struct dentry *d_alloc_pseudo(struct super_block *sb, const struct qstr *name) | |
102 | { | |
103 | - return __d_alloc(sb, name); | |
104 | + struct dentry *dentry = __d_alloc(sb, name); | |
105 | + if (likely(dentry)) | |
106 | + dentry->d_flags |= DCACHE_NORCU; | |
107 | + return dentry; | |
108 | } | |
109 | EXPORT_SYMBOL(d_alloc_pseudo); | |
110 | ||
111 | @@ -1899,12 +1905,10 @@ struct dentry *d_make_root(struct inode | |
112 | ||
113 | if (root_inode) { | |
114 | res = d_alloc_anon(root_inode->i_sb); | |
115 | - if (res) { | |
116 | - res->d_flags |= DCACHE_RCUACCESS; | |
117 | + if (res) | |
118 | d_instantiate(res, root_inode); | |
119 | - } else { | |
120 | + else | |
121 | iput(root_inode); | |
122 | - } | |
123 | } | |
124 | return res; | |
125 | } | |
126 | @@ -2769,9 +2773,7 @@ static void __d_move(struct dentry *dent | |
127 | copy_name(dentry, target); | |
128 | target->d_hash.pprev = NULL; | |
129 | dentry->d_parent->d_lockref.count++; | |
130 | - if (dentry == old_parent) | |
131 | - dentry->d_flags |= DCACHE_RCUACCESS; | |
132 | - else | |
133 | + if (dentry != old_parent) /* wasn't IS_ROOT */ | |
134 | WARN_ON(!--old_parent->d_lockref.count); | |
135 | } else { | |
136 | target->d_parent = old_parent; | |
137 | --- a/fs/nsfs.c | |
138 | +++ b/fs/nsfs.c | |
139 | @@ -85,13 +85,12 @@ slow: | |
140 | inode->i_fop = &ns_file_operations; | |
141 | inode->i_private = ns; | |
142 | ||
143 | - dentry = d_alloc_pseudo(mnt->mnt_sb, &empty_name); | |
144 | + dentry = d_alloc_anon(mnt->mnt_sb); | |
145 | if (!dentry) { | |
146 | iput(inode); | |
147 | return ERR_PTR(-ENOMEM); | |
148 | } | |
149 | d_instantiate(dentry, inode); | |
150 | - dentry->d_flags |= DCACHE_RCUACCESS; | |
151 | dentry->d_fsdata = (void *)ns->ops; | |
152 | d = atomic_long_cmpxchg(&ns->stashed, 0, (unsigned long)dentry); | |
153 | if (d) { | |
154 | --- a/include/linux/dcache.h | |
155 | +++ b/include/linux/dcache.h | |
156 | @@ -175,7 +175,6 @@ struct dentry_operations { | |
157 | * typically using d_splice_alias. */ | |
158 | ||
159 | #define DCACHE_REFERENCED 0x00000040 /* Recently used, don't discard. */ | |
160 | -#define DCACHE_RCUACCESS 0x00000080 /* Entry has ever been RCU-visible */ | |
161 | ||
162 | #define DCACHE_CANT_MOUNT 0x00000100 | |
163 | #define DCACHE_GENOCIDE 0x00000200 | |
164 | @@ -216,6 +215,7 @@ struct dentry_operations { | |
165 | ||
166 | #define DCACHE_PAR_LOOKUP 0x10000000 /* being looked up (with parent locked shared) */ | |
167 | #define DCACHE_DENTRY_CURSOR 0x20000000 | |
168 | +#define DCACHE_NORCU 0x40000000 /* No RCU delay for freeing */ | |
169 | ||
170 | extern seqlock_t rename_lock; | |
171 |