]> git.ipfire.org Git - thirdparty/kernel/stable.git/blame - fs/nfs/dir.c
treewide: Add SPDX license identifier for missed files
[thirdparty/kernel/stable.git] / fs / nfs / dir.c
CommitLineData
457c8996 1// SPDX-License-Identifier: GPL-2.0-only
1da177e4
LT
2/*
3 * linux/fs/nfs/dir.c
4 *
5 * Copyright (C) 1992 Rick Sladkey
6 *
7 * nfs directory handling functions
8 *
9 * 10 Apr 1996 Added silly rename for unlink --okir
10 * 28 Sep 1996 Improved directory cache --okir
11 * 23 Aug 1997 Claus Heine claus@momo.math.rwth-aachen.de
12 * Re-implemented silly rename for unlink, newly implemented
13 * silly rename for nfs_rename() following the suggestions
14 * of Olaf Kirch (okir) found in this file.
15 * Following Linus comments on my original hack, this version
16 * depends only on the dcache stuff and doesn't touch the inode
17 * layer (iput() and friends).
18 * 6 Jun 1999 Cache readdir lookups in the page cache. -DaveM
19 */
20
ddda8e0a 21#include <linux/module.h>
1da177e4
LT
22#include <linux/time.h>
23#include <linux/errno.h>
24#include <linux/stat.h>
25#include <linux/fcntl.h>
26#include <linux/string.h>
27#include <linux/kernel.h>
28#include <linux/slab.h>
29#include <linux/mm.h>
30#include <linux/sunrpc/clnt.h>
31#include <linux/nfs_fs.h>
32#include <linux/nfs_mount.h>
33#include <linux/pagemap.h>
873101b3 34#include <linux/pagevec.h>
1da177e4 35#include <linux/namei.h>
54ceac45 36#include <linux/mount.h>
a0b8cab3 37#include <linux/swap.h>
e8edc6e0 38#include <linux/sched.h>
04e4bd1c 39#include <linux/kmemleak.h>
64c2ce8b 40#include <linux/xattr.h>
1da177e4
LT
41
42#include "delegation.h"
91d5b470 43#include "iostat.h"
4c30d56e 44#include "internal.h"
cd9a1c0e 45#include "fscache.h"
1da177e4 46
f4ce1299
TM
47#include "nfstrace.h"
48
1da177e4
LT
49/* #define NFS_DEBUG_VERBOSE 1 */
50
51static int nfs_opendir(struct inode *, struct file *);
480c2006 52static int nfs_closedir(struct inode *, struct file *);
23db8620 53static int nfs_readdir(struct file *, struct dir_context *);
02c24a82 54static int nfs_fsync_dir(struct file *, loff_t, loff_t, int);
f0dd2136 55static loff_t nfs_llseek_dir(struct file *, loff_t, int);
11de3b11 56static void nfs_readdir_clear_array(struct page*);
1da177e4 57
4b6f5d20 58const struct file_operations nfs_dir_operations = {
f0dd2136 59 .llseek = nfs_llseek_dir,
1da177e4 60 .read = generic_read_dir,
b044f645 61 .iterate = nfs_readdir,
1da177e4 62 .open = nfs_opendir,
480c2006 63 .release = nfs_closedir,
1da177e4
LT
64 .fsync = nfs_fsync_dir,
65};
66
11de3b11
TM
67const struct address_space_operations nfs_dir_aops = {
68 .freepage = nfs_readdir_clear_array,
d1bacf9e
BS
69};
70
684f39b4 71static struct nfs_open_dir_context *alloc_nfs_open_dir_context(struct inode *dir, const struct cred *cred)
480c2006 72{
311324ad 73 struct nfs_inode *nfsi = NFS_I(dir);
480c2006
BS
74 struct nfs_open_dir_context *ctx;
75 ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
76 if (ctx != NULL) {
8ef2ce3e 77 ctx->duped = 0;
311324ad 78 ctx->attr_gencount = nfsi->attr_gencount;
480c2006 79 ctx->dir_cookie = 0;
8ef2ce3e 80 ctx->dup_cookie = 0;
684f39b4 81 ctx->cred = get_cred(cred);
311324ad
TM
82 spin_lock(&dir->i_lock);
83 list_add(&ctx->list, &nfsi->open_files);
84 spin_unlock(&dir->i_lock);
0c030806
TM
85 return ctx;
86 }
87 return ERR_PTR(-ENOMEM);
480c2006
BS
88}
89
311324ad 90static void put_nfs_open_dir_context(struct inode *dir, struct nfs_open_dir_context *ctx)
480c2006 91{
311324ad
TM
92 spin_lock(&dir->i_lock);
93 list_del(&ctx->list);
94 spin_unlock(&dir->i_lock);
684f39b4 95 put_cred(ctx->cred);
480c2006
BS
96 kfree(ctx);
97}
98
1da177e4
LT
99/*
100 * Open file
101 */
102static int
103nfs_opendir(struct inode *inode, struct file *filp)
104{
480c2006
BS
105 int res = 0;
106 struct nfs_open_dir_context *ctx;
1da177e4 107
6de1472f 108 dfprintk(FILE, "NFS: open dir(%pD2)\n", filp);
cc0dd2d1
CL
109
110 nfs_inc_stats(inode, NFSIOS_VFSOPEN);
1e7cb3dc 111
684f39b4 112 ctx = alloc_nfs_open_dir_context(inode, current_cred());
480c2006
BS
113 if (IS_ERR(ctx)) {
114 res = PTR_ERR(ctx);
115 goto out;
116 }
117 filp->private_data = ctx;
480c2006 118out:
1da177e4
LT
119 return res;
120}
121
480c2006
BS
122static int
123nfs_closedir(struct inode *inode, struct file *filp)
124{
a455589f 125 put_nfs_open_dir_context(file_inode(filp), filp->private_data);
480c2006
BS
126 return 0;
127}
128
d1bacf9e
BS
129struct nfs_cache_array_entry {
130 u64 cookie;
131 u64 ino;
132 struct qstr string;
0b26a0bf 133 unsigned char d_type;
d1bacf9e
BS
134};
135
136struct nfs_cache_array {
88b8e133 137 int size;
d1bacf9e
BS
138 int eof_index;
139 u64 last_cookie;
140 struct nfs_cache_array_entry array[0];
141};
142
be4c2d47 143struct readdirvec {
144 unsigned long nr;
145 unsigned long index;
146 struct page *pages[NFS_MAX_READDIR_RAPAGES];
147};
148
a7a3b1e9 149typedef int (*decode_dirent_t)(struct xdr_stream *, struct nfs_entry *, bool);
1da177e4
LT
150typedef struct {
151 struct file *file;
152 struct page *page;
23db8620 153 struct dir_context *ctx;
1da177e4 154 unsigned long page_index;
be4c2d47 155 struct readdirvec pvec;
f0dd2136 156 u64 *dir_cookie;
0aded708 157 u64 last_cookie;
f0dd2136 158 loff_t current_index;
1da177e4 159 decode_dirent_t decode;
d1bacf9e 160
1f4eab7e 161 unsigned long timestamp;
4704f0e2 162 unsigned long gencount;
d1bacf9e 163 unsigned int cache_entry_index;
a7a3b1e9
BC
164 bool plus;
165 bool eof;
1da177e4
LT
166} nfs_readdir_descriptor_t;
167
d1bacf9e
BS
168/*
169 * we are freeing strings created by nfs_add_to_readdir_array()
170 */
171static
11de3b11 172void nfs_readdir_clear_array(struct page *page)
d1bacf9e 173{
11de3b11 174 struct nfs_cache_array *array;
d1bacf9e 175 int i;
8cd51a0c 176
2b86ce2d 177 array = kmap_atomic(page);
b044f645
BC
178 for (i = 0; i < array->size; i++)
179 kfree(array->array[i].string.name);
2b86ce2d 180 kunmap_atomic(array);
d1bacf9e
BS
181}
182
183/*
184 * the caller is responsible for freeing qstr.name
185 * when called by nfs_readdir_add_to_array, the strings will be freed in
186 * nfs_clear_readdir_array()
187 */
188static
4a201d6e 189int nfs_readdir_make_qstr(struct qstr *string, const char *name, unsigned int len)
d1bacf9e
BS
190{
191 string->len = len;
192 string->name = kmemdup(name, len, GFP_KERNEL);
4a201d6e
TM
193 if (string->name == NULL)
194 return -ENOMEM;
04e4bd1c
CM
195 /*
196 * Avoid a kmemleak false positive. The pointer to the name is stored
197 * in a page cache page which kmemleak does not scan.
198 */
199 kmemleak_not_leak(string->name);
8387ff25 200 string->hash = full_name_hash(NULL, name, len);
4a201d6e 201 return 0;
d1bacf9e
BS
202}
203
204static
205int nfs_readdir_add_to_array(struct nfs_entry *entry, struct page *page)
206{
0795bf83 207 struct nfs_cache_array *array = kmap(page);
4a201d6e
TM
208 struct nfs_cache_array_entry *cache_entry;
209 int ret;
210
3020093f
TM
211 cache_entry = &array->array[array->size];
212
213 /* Check that this entry lies within the page bounds */
8cd51a0c 214 ret = -ENOSPC;
3020093f 215 if ((char *)&cache_entry[1] - (char *)page_address(page) > PAGE_SIZE)
4a201d6e 216 goto out;
d1bacf9e 217
4a201d6e
TM
218 cache_entry->cookie = entry->prev_cookie;
219 cache_entry->ino = entry->ino;
0b26a0bf 220 cache_entry->d_type = entry->d_type;
4a201d6e
TM
221 ret = nfs_readdir_make_qstr(&cache_entry->string, entry->name, entry->len);
222 if (ret)
223 goto out;
d1bacf9e 224 array->last_cookie = entry->cookie;
8cd51a0c 225 array->size++;
47c716cb 226 if (entry->eof != 0)
d1bacf9e 227 array->eof_index = array->size;
4a201d6e 228out:
0795bf83 229 kunmap(page);
4a201d6e 230 return ret;
d1bacf9e
BS
231}
232
233static
234int nfs_readdir_search_for_pos(struct nfs_cache_array *array, nfs_readdir_descriptor_t *desc)
235{
23db8620 236 loff_t diff = desc->ctx->pos - desc->current_index;
d1bacf9e
BS
237 unsigned int index;
238
239 if (diff < 0)
240 goto out_eof;
241 if (diff >= array->size) {
8cd51a0c 242 if (array->eof_index >= 0)
d1bacf9e 243 goto out_eof;
d1bacf9e
BS
244 return -EAGAIN;
245 }
246
247 index = (unsigned int)diff;
248 *desc->dir_cookie = array->array[index].cookie;
249 desc->cache_entry_index = index;
d1bacf9e
BS
250 return 0;
251out_eof:
6089dd0d 252 desc->eof = true;
d1bacf9e
BS
253 return -EBADCOOKIE;
254}
255
4db72b40
JL
256static bool
257nfs_readdir_inode_mapping_valid(struct nfs_inode *nfsi)
258{
259 if (nfsi->cache_validity & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA))
260 return false;
261 smp_rmb();
262 return !test_bit(NFS_INO_INVALIDATING, &nfsi->flags);
263}
264
d1bacf9e
BS
265static
266int nfs_readdir_search_for_cookie(struct nfs_cache_array *array, nfs_readdir_descriptor_t *desc)
267{
268 int i;
8ef2ce3e 269 loff_t new_pos;
d1bacf9e
BS
270 int status = -EAGAIN;
271
272 for (i = 0; i < array->size; i++) {
d1bacf9e 273 if (array->array[i].cookie == *desc->dir_cookie) {
496ad9aa 274 struct nfs_inode *nfsi = NFS_I(file_inode(desc->file));
0c030806
TM
275 struct nfs_open_dir_context *ctx = desc->file->private_data;
276
8ef2ce3e 277 new_pos = desc->current_index + i;
4db72b40
JL
278 if (ctx->attr_gencount != nfsi->attr_gencount ||
279 !nfs_readdir_inode_mapping_valid(nfsi)) {
0c030806
TM
280 ctx->duped = 0;
281 ctx->attr_gencount = nfsi->attr_gencount;
23db8620 282 } else if (new_pos < desc->ctx->pos) {
0c030806
TM
283 if (ctx->duped > 0
284 && ctx->dup_cookie == *desc->dir_cookie) {
285 if (printk_ratelimit()) {
6de1472f 286 pr_notice("NFS: directory %pD2 contains a readdir loop."
0c030806 287 "Please contact your server vendor. "
9581a4ae
JL
288 "The file: %.*s has duplicate cookie %llu\n",
289 desc->file, array->array[i].string.len,
290 array->array[i].string.name, *desc->dir_cookie);
0c030806
TM
291 }
292 status = -ELOOP;
293 goto out;
294 }
8ef2ce3e 295 ctx->dup_cookie = *desc->dir_cookie;
0c030806 296 ctx->duped = -1;
8ef2ce3e 297 }
23db8620 298 desc->ctx->pos = new_pos;
d1bacf9e 299 desc->cache_entry_index = i;
47c716cb 300 return 0;
d1bacf9e
BS
301 }
302 }
47c716cb 303 if (array->eof_index >= 0) {
8cd51a0c 304 status = -EBADCOOKIE;
18fb5fe4 305 if (*desc->dir_cookie == array->last_cookie)
6089dd0d 306 desc->eof = true;
8cd51a0c 307 }
0c030806 308out:
d1bacf9e
BS
309 return status;
310}
311
312static
313int nfs_readdir_search_array(nfs_readdir_descriptor_t *desc)
314{
315 struct nfs_cache_array *array;
47c716cb 316 int status;
d1bacf9e 317
0795bf83 318 array = kmap(desc->page);
d1bacf9e
BS
319
320 if (*desc->dir_cookie == 0)
321 status = nfs_readdir_search_for_pos(array, desc);
322 else
323 status = nfs_readdir_search_for_cookie(array, desc);
324
47c716cb 325 if (status == -EAGAIN) {
0aded708 326 desc->last_cookie = array->last_cookie;
e47c085a 327 desc->current_index += array->size;
47c716cb
TM
328 desc->page_index++;
329 }
0795bf83 330 kunmap(desc->page);
d1bacf9e
BS
331 return status;
332}
333
334/* Fill a page with xdr information before transferring to the cache page */
335static
56e4ebf8 336int nfs_readdir_xdr_filler(struct page **pages, nfs_readdir_descriptor_t *desc,
d1bacf9e 337 struct nfs_entry *entry, struct file *file, struct inode *inode)
1da177e4 338{
480c2006 339 struct nfs_open_dir_context *ctx = file->private_data;
684f39b4 340 const struct cred *cred = ctx->cred;
4704f0e2 341 unsigned long timestamp, gencount;
1da177e4
LT
342 int error;
343
1da177e4
LT
344 again:
345 timestamp = jiffies;
4704f0e2 346 gencount = nfs_inc_attr_generation_counter();
be62a1a8 347 error = NFS_PROTO(inode)->readdir(file_dentry(file), cred, entry->cookie, pages,
1da177e4
LT
348 NFS_SERVER(inode)->dtsize, desc->plus);
349 if (error < 0) {
350 /* We requested READDIRPLUS, but the server doesn't grok it */
351 if (error == -ENOTSUPP && desc->plus) {
352 NFS_SERVER(inode)->caps &= ~NFS_CAP_READDIRPLUS;
3a10c30a 353 clear_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(inode)->flags);
a7a3b1e9 354 desc->plus = false;
1da177e4
LT
355 goto again;
356 }
357 goto error;
358 }
1f4eab7e 359 desc->timestamp = timestamp;
4704f0e2 360 desc->gencount = gencount;
d1bacf9e
BS
361error:
362 return error;
1da177e4
LT
363}
364
573c4e1e
CL
365static int xdr_decode(nfs_readdir_descriptor_t *desc,
366 struct nfs_entry *entry, struct xdr_stream *xdr)
1da177e4 367{
573c4e1e 368 int error;
1da177e4 369
573c4e1e
CL
370 error = desc->decode(xdr, entry, desc->plus);
371 if (error)
372 return error;
d1bacf9e
BS
373 entry->fattr->time_start = desc->timestamp;
374 entry->fattr->gencount = desc->gencount;
375 return 0;
1da177e4
LT
376}
377
fa923369
TM
378/* Match file and dirent using either filehandle or fileid
379 * Note: caller is responsible for checking the fsid
380 */
d39ab9de
BS
381static
382int nfs_same_file(struct dentry *dentry, struct nfs_entry *entry)
383{
d8fdb47f 384 struct inode *inode;
fa923369
TM
385 struct nfs_inode *nfsi;
386
2b0143b5
DH
387 if (d_really_is_negative(dentry))
388 return 0;
fa923369 389
d8fdb47f
TM
390 inode = d_inode(dentry);
391 if (is_bad_inode(inode) || NFS_STALE(inode))
392 return 0;
393
394 nfsi = NFS_I(inode);
7dc72d5f
TM
395 if (entry->fattr->fileid != nfsi->fileid)
396 return 0;
397 if (entry->fh->size && nfs_compare_fh(entry->fh, &nfsi->fh) != 0)
398 return 0;
399 return 1;
d39ab9de
BS
400}
401
d69ee9b8 402static
23db8620 403bool nfs_use_readdirplus(struct inode *dir, struct dir_context *ctx)
d69ee9b8
TM
404{
405 if (!nfs_server_capable(dir, NFS_CAP_READDIRPLUS))
406 return false;
407 if (test_and_clear_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(dir)->flags))
408 return true;
23db8620 409 if (ctx->pos == 0)
d69ee9b8
TM
410 return true;
411 return false;
412}
413
414/*
63519fbc
TM
415 * This function is called by the lookup and getattr code to request the
416 * use of readdirplus to accelerate any future lookups in the same
d69ee9b8
TM
417 * directory.
418 */
d69ee9b8
TM
419void nfs_advise_use_readdirplus(struct inode *dir)
420{
63519fbc
TM
421 struct nfs_inode *nfsi = NFS_I(dir);
422
423 if (nfs_server_capable(dir, NFS_CAP_READDIRPLUS) &&
424 !list_empty(&nfsi->open_files))
425 set_bit(NFS_INO_ADVISE_RDPLUS, &nfsi->flags);
d69ee9b8
TM
426}
427
311324ad
TM
428/*
429 * This function is mainly for use by nfs_getattr().
430 *
431 * If this is an 'ls -l', we want to force use of readdirplus.
432 * Do this by checking if there is an active file descriptor
433 * and calling nfs_advise_use_readdirplus, then forcing a
434 * cache flush.
435 */
436void nfs_force_use_readdirplus(struct inode *dir)
437{
63519fbc
TM
438 struct nfs_inode *nfsi = NFS_I(dir);
439
440 if (nfs_server_capable(dir, NFS_CAP_READDIRPLUS) &&
441 !list_empty(&nfsi->open_files)) {
442 set_bit(NFS_INO_ADVISE_RDPLUS, &nfsi->flags);
79f687a3 443 invalidate_mapping_pages(dir->i_mapping, 0, -1);
311324ad
TM
444 }
445}
446
d39ab9de
BS
447static
448void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry)
449{
26fe5750 450 struct qstr filename = QSTR_INIT(entry->name, entry->len);
9ac3d3e8 451 DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
4a201d6e
TM
452 struct dentry *dentry;
453 struct dentry *alias;
2b0143b5 454 struct inode *dir = d_inode(parent);
d39ab9de 455 struct inode *inode;
aa9c2669 456 int status;
d39ab9de 457
fa923369
TM
458 if (!(entry->fattr->valid & NFS_ATTR_FATTR_FILEID))
459 return;
6c441c25
TM
460 if (!(entry->fattr->valid & NFS_ATTR_FATTR_FSID))
461 return;
78d04af4
TM
462 if (filename.len == 0)
463 return;
464 /* Validate that the name doesn't contain any illegal '\0' */
465 if (strnlen(filename.name, filename.len) != filename.len)
466 return;
467 /* ...or '/' */
468 if (strnchr(filename.name, filename.len, '/'))
469 return;
4a201d6e
TM
470 if (filename.name[0] == '.') {
471 if (filename.len == 1)
472 return;
473 if (filename.len == 2 && filename.name[1] == '.')
474 return;
475 }
8387ff25 476 filename.hash = full_name_hash(parent, filename.name, filename.len);
d39ab9de 477
4a201d6e 478 dentry = d_lookup(parent, &filename);
9ac3d3e8
AV
479again:
480 if (!dentry) {
481 dentry = d_alloc_parallel(parent, &filename, &wq);
482 if (IS_ERR(dentry))
483 return;
484 }
485 if (!d_in_lookup(dentry)) {
6c441c25
TM
486 /* Is there a mountpoint here? If so, just exit */
487 if (!nfs_fsid_equal(&NFS_SB(dentry->d_sb)->fsid,
488 &entry->fattr->fsid))
489 goto out;
d39ab9de 490 if (nfs_same_file(dentry, entry)) {
7dc72d5f
TM
491 if (!entry->fh->size)
492 goto out;
cda57a1e 493 nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
2b0143b5 494 status = nfs_refresh_inode(d_inode(dentry), entry->fattr);
aa9c2669 495 if (!status)
2b0143b5 496 nfs_setsecurity(d_inode(dentry), entry->fattr, entry->label);
d39ab9de
BS
497 goto out;
498 } else {
5542aa2f 499 d_invalidate(dentry);
d39ab9de 500 dput(dentry);
9ac3d3e8
AV
501 dentry = NULL;
502 goto again;
d39ab9de
BS
503 }
504 }
7dc72d5f
TM
505 if (!entry->fh->size) {
506 d_lookup_done(dentry);
507 goto out;
508 }
d39ab9de 509
1775fd3e 510 inode = nfs_fhget(dentry->d_sb, entry->fh, entry->fattr, entry->label);
41d28bca 511 alias = d_splice_alias(inode, dentry);
9ac3d3e8
AV
512 d_lookup_done(dentry);
513 if (alias) {
514 if (IS_ERR(alias))
515 goto out;
516 dput(dentry);
517 dentry = alias;
518 }
519 nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
d39ab9de
BS
520out:
521 dput(dentry);
d39ab9de
BS
522}
523
d1bacf9e
BS
524/* Perform conversion from xdr to cache array */
525static
8cd51a0c 526int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *entry,
6650239a 527 struct page **xdr_pages, struct page *page, unsigned int buflen)
1da177e4 528{
babddc72 529 struct xdr_stream stream;
f7da7a12 530 struct xdr_buf buf;
6650239a 531 struct page *scratch;
99424380 532 struct nfs_cache_array *array;
5c346854
TM
533 unsigned int count = 0;
534 int status;
be4c2d47 535 int max_rapages = NFS_MAX_READDIR_RAPAGES;
536
537 desc->pvec.index = desc->page_index;
538 desc->pvec.nr = 0;
babddc72 539
6650239a
TM
540 scratch = alloc_page(GFP_KERNEL);
541 if (scratch == NULL)
542 return -ENOMEM;
babddc72 543
ce85cfbe
BC
544 if (buflen == 0)
545 goto out_nopages;
546
f7da7a12 547 xdr_init_decode_pages(&stream, &buf, xdr_pages, buflen);
6650239a 548 xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE);
99424380
BS
549
550 do {
551 status = xdr_decode(desc, entry, &stream);
8cd51a0c
TM
552 if (status != 0) {
553 if (status == -EAGAIN)
554 status = 0;
99424380 555 break;
8cd51a0c 556 }
99424380 557
5c346854
TM
558 count++;
559
a7a3b1e9 560 if (desc->plus)
be62a1a8 561 nfs_prime_dcache(file_dentry(desc->file), entry);
8cd51a0c 562
be4c2d47 563 status = nfs_readdir_add_to_array(entry, desc->pvec.pages[desc->pvec.nr]);
564 if (status == -ENOSPC) {
565 desc->pvec.nr++;
566 if (desc->pvec.nr == max_rapages)
567 break;
568 status = nfs_readdir_add_to_array(entry, desc->pvec.pages[desc->pvec.nr]);
569 }
8cd51a0c
TM
570 if (status != 0)
571 break;
99424380
BS
572 } while (!entry->eof);
573
be4c2d47 574 /*
575 * page and desc->pvec.pages[0] are valid, don't need to check
576 * whether or not to be NULL.
577 */
578 copy_highpage(page, desc->pvec.pages[0]);
579
ce85cfbe 580out_nopages:
47c716cb 581 if (count == 0 || (status == -EBADCOOKIE && entry->eof != 0)) {
be4c2d47 582 array = kmap_atomic(desc->pvec.pages[desc->pvec.nr]);
0795bf83
FF
583 array->eof_index = array->size;
584 status = 0;
be4c2d47 585 kunmap_atomic(array);
1da177e4 586 }
6650239a
TM
587
588 put_page(scratch);
be4c2d47 589
590 /*
591 * desc->pvec.nr > 0 means at least one page was completely filled,
592 * we should return -ENOSPC. Otherwise function
593 * nfs_readdir_xdr_to_array will enter infinite loop.
594 */
595 if (desc->pvec.nr > 0)
596 return -ENOSPC;
8cd51a0c 597 return status;
56e4ebf8
BS
598}
599
600static
c7e9668e 601void nfs_readdir_free_pages(struct page **pages, unsigned int npages)
56e4ebf8
BS
602{
603 unsigned int i;
604 for (i = 0; i < npages; i++)
605 put_page(pages[i]);
606}
607
56e4ebf8 608/*
bf211ca1 609 * nfs_readdir_alloc_pages() will allocate pages that must be freed with a call
610 * to nfs_readdir_free_pages()
56e4ebf8
BS
611 */
612static
c7e9668e 613int nfs_readdir_alloc_pages(struct page **pages, unsigned int npages)
56e4ebf8 614{
56e4ebf8
BS
615 unsigned int i;
616
617 for (i = 0; i < npages; i++) {
618 struct page *page = alloc_page(GFP_KERNEL);
619 if (page == NULL)
620 goto out_freepages;
621 pages[i] = page;
622 }
6650239a 623 return 0;
56e4ebf8 624
56e4ebf8 625out_freepages:
c7e9668e 626 nfs_readdir_free_pages(pages, i);
6650239a 627 return -ENOMEM;
1da177e4
LT
628}
629
be4c2d47 630/*
631 * nfs_readdir_rapages_init initialize rapages by nfs_cache_array structure.
632 */
633static
634void nfs_readdir_rapages_init(nfs_readdir_descriptor_t *desc)
635{
636 struct nfs_cache_array *array;
637 int max_rapages = NFS_MAX_READDIR_RAPAGES;
638 int index;
639
640 for (index = 0; index < max_rapages; index++) {
641 array = kmap_atomic(desc->pvec.pages[index]);
642 memset(array, 0, sizeof(struct nfs_cache_array));
643 array->eof_index = -1;
644 kunmap_atomic(array);
645 }
646}
647
d1bacf9e
BS
648static
649int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page, struct inode *inode)
00a92642 650{
56e4ebf8 651 struct page *pages[NFS_MAX_READDIR_PAGES];
d1bacf9e
BS
652 struct nfs_entry entry;
653 struct file *file = desc->file;
654 struct nfs_cache_array *array;
8cd51a0c 655 int status = -ENOMEM;
56e4ebf8 656 unsigned int array_size = ARRAY_SIZE(pages);
d1bacf9e 657
be4c2d47 658 /*
659 * This means we hit readdir rdpages miss, the preallocated rdpages
660 * are useless, the preallocate rdpages should be reinitialized.
661 */
662 nfs_readdir_rapages_init(desc);
663
d1bacf9e 664 entry.prev_cookie = 0;
0aded708 665 entry.cookie = desc->last_cookie;
d1bacf9e
BS
666 entry.eof = 0;
667 entry.fh = nfs_alloc_fhandle();
668 entry.fattr = nfs_alloc_fattr();
573c4e1e 669 entry.server = NFS_SERVER(inode);
d1bacf9e
BS
670 if (entry.fh == NULL || entry.fattr == NULL)
671 goto out;
00a92642 672
14c43f76
DQ
673 entry.label = nfs4_label_alloc(NFS_SERVER(inode), GFP_NOWAIT);
674 if (IS_ERR(entry.label)) {
675 status = PTR_ERR(entry.label);
676 goto out;
677 }
678
0795bf83 679 array = kmap(page);
d1bacf9e
BS
680 memset(array, 0, sizeof(struct nfs_cache_array));
681 array->eof_index = -1;
00a92642 682
c7e9668e 683 status = nfs_readdir_alloc_pages(pages, array_size);
6650239a 684 if (status < 0)
d1bacf9e
BS
685 goto out_release_array;
686 do {
ac396128 687 unsigned int pglen;
56e4ebf8 688 status = nfs_readdir_xdr_filler(pages, desc, &entry, file, inode);
babddc72 689
d1bacf9e 690 if (status < 0)
00a92642 691 break;
ac396128 692 pglen = status;
6650239a 693 status = nfs_readdir_page_filler(desc, &entry, pages, page, pglen);
8cd51a0c
TM
694 if (status < 0) {
695 if (status == -ENOSPC)
696 status = 0;
697 break;
698 }
699 } while (array->eof_index < 0);
d1bacf9e 700
c7e9668e 701 nfs_readdir_free_pages(pages, array_size);
d1bacf9e 702out_release_array:
0795bf83 703 kunmap(page);
14c43f76 704 nfs4_label_free(entry.label);
d1bacf9e
BS
705out:
706 nfs_free_fattr(entry.fattr);
707 nfs_free_fhandle(entry.fh);
00a92642
OG
708 return status;
709}
710
711/*
d1bacf9e
BS
712 * Now we cache directories properly, by converting xdr information
713 * to an array that can be used for lookups later. This results in
714 * fewer cache pages, since we can store more information on each page.
715 * We only need to convert from xdr once so future lookups are much simpler
1da177e4 716 */
d1bacf9e 717static
a46126cc 718int nfs_readdir_filler(void *data, struct page* page)
1da177e4 719{
a46126cc 720 nfs_readdir_descriptor_t *desc = data;
496ad9aa 721 struct inode *inode = file_inode(desc->file);
8cd51a0c 722 int ret;
1da177e4 723
be4c2d47 724 /*
725 * If desc->page_index in range desc->pvec.index and
726 * desc->pvec.index + desc->pvec.nr, we get readdir cache hit.
727 */
728 if (desc->page_index >= desc->pvec.index &&
729 desc->page_index < (desc->pvec.index + desc->pvec.nr)) {
730 /*
731 * page and desc->pvec.pages[x] are valid, don't need to check
732 * whether or not to be NULL.
733 */
734 copy_highpage(page, desc->pvec.pages[desc->page_index - desc->pvec.index]);
735 ret = 0;
736 } else {
737 ret = nfs_readdir_xdr_to_array(desc, page, inode);
738 if (ret < 0)
739 goto error;
740 }
741
d1bacf9e 742 SetPageUptodate(page);
1da177e4 743
d1bacf9e
BS
744 if (invalidate_inode_pages2_range(inode->i_mapping, page->index + 1, -1) < 0) {
745 /* Should never happen */
746 nfs_zap_mapping(inode, inode->i_mapping);
1da177e4 747 }
d1bacf9e
BS
748 unlock_page(page);
749 return 0;
750 error:
751 unlock_page(page);
8cd51a0c 752 return ret;
d1bacf9e 753}
1da177e4 754
d1bacf9e
BS
755static
756void cache_page_release(nfs_readdir_descriptor_t *desc)
757{
b044f645
BC
758 if (!desc->page->mapping)
759 nfs_readdir_clear_array(desc->page);
09cbfeaf 760 put_page(desc->page);
d1bacf9e
BS
761 desc->page = NULL;
762}
763
764static
765struct page *get_cache_page(nfs_readdir_descriptor_t *desc)
766{
a46126cc
CH
767 return read_cache_page(desc->file->f_mapping, desc->page_index,
768 nfs_readdir_filler, desc);
1da177e4
LT
769}
770
771/*
d1bacf9e 772 * Returns 0 if desc->dir_cookie was found on page desc->page_index
1da177e4 773 */
d1bacf9e
BS
774static
775int find_cache_page(nfs_readdir_descriptor_t *desc)
776{
777 int res;
778
779 desc->page = get_cache_page(desc);
780 if (IS_ERR(desc->page))
781 return PTR_ERR(desc->page);
782
783 res = nfs_readdir_search_array(desc);
47c716cb
TM
784 if (res != 0)
785 cache_page_release(desc);
d1bacf9e
BS
786 return res;
787}
788
789/* Search for desc->dir_cookie from the beginning of the page cache */
1da177e4
LT
790static inline
791int readdir_search_pagecache(nfs_readdir_descriptor_t *desc)
792{
8cd51a0c 793 int res;
d1bacf9e 794
0aded708 795 if (desc->page_index == 0) {
8cd51a0c 796 desc->current_index = 0;
0aded708
TM
797 desc->last_cookie = 0;
798 }
47c716cb 799 do {
d1bacf9e 800 res = find_cache_page(desc);
47c716cb 801 } while (res == -EAGAIN);
1da177e4
LT
802 return res;
803}
804
1da177e4
LT
805/*
806 * Once we've found the start of the dirent within a page: fill 'er up...
807 */
808static
23db8620 809int nfs_do_filldir(nfs_readdir_descriptor_t *desc)
1da177e4
LT
810{
811 struct file *file = desc->file;
d1bacf9e
BS
812 int i = 0;
813 int res = 0;
814 struct nfs_cache_array *array = NULL;
8ef2ce3e
BS
815 struct nfs_open_dir_context *ctx = file->private_data;
816
0795bf83 817 array = kmap(desc->page);
d1bacf9e 818 for (i = desc->cache_entry_index; i < array->size; i++) {
ece0b423 819 struct nfs_cache_array_entry *ent;
1da177e4 820
ece0b423 821 ent = &array->array[i];
23db8620
AV
822 if (!dir_emit(desc->ctx, ent->string.name, ent->string.len,
823 nfs_compat_user_ino64(ent->ino), ent->d_type)) {
6089dd0d 824 desc->eof = true;
1da177e4 825 break;
ece0b423 826 }
23db8620 827 desc->ctx->pos++;
d1bacf9e
BS
828 if (i < (array->size-1))
829 *desc->dir_cookie = array->array[i+1].cookie;
830 else
831 *desc->dir_cookie = array->last_cookie;
0c030806
TM
832 if (ctx->duped != 0)
833 ctx->duped = 1;
1da177e4 834 }
47c716cb 835 if (array->eof_index >= 0)
6089dd0d 836 desc->eof = true;
d1bacf9e 837
0795bf83 838 kunmap(desc->page);
d1bacf9e 839 cache_page_release(desc);
1e7cb3dc
CL
840 dfprintk(DIRCACHE, "NFS: nfs_do_filldir() filling ended @ cookie %Lu; returning = %d\n",
841 (unsigned long long)*desc->dir_cookie, res);
1da177e4
LT
842 return res;
843}
844
845/*
846 * If we cannot find a cookie in our cache, we suspect that this is
847 * because it points to a deleted file, so we ask the server to return
848 * whatever it thinks is the next entry. We then feed this to filldir.
849 * If all goes well, we should then be able to find our way round the
850 * cache on the next call to readdir_search_pagecache();
851 *
852 * NOTE: we cannot add the anonymous page to the pagecache because
853 * the data it contains might not be page aligned. Besides,
854 * we should already have a complete representation of the
855 * directory in the page cache by the time we get here.
856 */
857static inline
23db8620 858int uncached_readdir(nfs_readdir_descriptor_t *desc)
1da177e4 859{
1da177e4
LT
860 struct page *page = NULL;
861 int status;
496ad9aa 862 struct inode *inode = file_inode(desc->file);
0c030806 863 struct nfs_open_dir_context *ctx = desc->file->private_data;
1da177e4 864
1e7cb3dc
CL
865 dfprintk(DIRCACHE, "NFS: uncached_readdir() searching for cookie %Lu\n",
866 (unsigned long long)*desc->dir_cookie);
1da177e4
LT
867
868 page = alloc_page(GFP_HIGHUSER);
869 if (!page) {
870 status = -ENOMEM;
871 goto out;
872 }
d1bacf9e 873
7a8e1dc3 874 desc->page_index = 0;
0aded708 875 desc->last_cookie = *desc->dir_cookie;
7a8e1dc3 876 desc->page = page;
0c030806 877 ctx->duped = 0;
7a8e1dc3 878
85f8607e
TM
879 status = nfs_readdir_xdr_to_array(desc, page, inode);
880 if (status < 0)
1da177e4
LT
881 goto out_release;
882
23db8620 883 status = nfs_do_filldir(desc);
1da177e4 884
1da177e4 885 out:
1e7cb3dc 886 dfprintk(DIRCACHE, "NFS: %s: returns %d\n",
3110ff80 887 __func__, status);
1da177e4
LT
888 return status;
889 out_release:
d1bacf9e 890 cache_page_release(desc);
1da177e4
LT
891 goto out;
892}
893
00a92642
OG
894/* The file offset position represents the dirent entry number. A
895 last cookie cache takes care of the common case of reading the
896 whole directory.
1da177e4 897 */
23db8620 898static int nfs_readdir(struct file *file, struct dir_context *ctx)
1da177e4 899{
be62a1a8 900 struct dentry *dentry = file_dentry(file);
2b0143b5 901 struct inode *inode = d_inode(dentry);
1da177e4
LT
902 nfs_readdir_descriptor_t my_desc,
903 *desc = &my_desc;
23db8620 904 struct nfs_open_dir_context *dir_ctx = file->private_data;
07b5ce8e 905 int res = 0;
be4c2d47 906 int max_rapages = NFS_MAX_READDIR_RAPAGES;
1da177e4 907
6de1472f
AV
908 dfprintk(FILE, "NFS: readdir(%pD2) starting at cookie %llu\n",
909 file, (long long)ctx->pos);
91d5b470
CL
910 nfs_inc_stats(inode, NFSIOS_VFSGETDENTS);
911
1da177e4 912 /*
23db8620 913 * ctx->pos points to the dirent entry number.
f0dd2136 914 * *desc->dir_cookie has the cookie for the next entry. We have
00a92642
OG
915 * to either find the entry with the appropriate number or
916 * revalidate the cookie.
1da177e4
LT
917 */
918 memset(desc, 0, sizeof(*desc));
919
23db8620
AV
920 desc->file = file;
921 desc->ctx = ctx;
480c2006 922 desc->dir_cookie = &dir_ctx->dir_cookie;
1da177e4 923 desc->decode = NFS_PROTO(inode)->decode_dirent;
a7a3b1e9 924 desc->plus = nfs_use_readdirplus(inode, ctx);
1da177e4 925
be4c2d47 926 res = nfs_readdir_alloc_pages(desc->pvec.pages, max_rapages);
927 if (res < 0)
928 return -ENOMEM;
929
930 nfs_readdir_rapages_init(desc);
931
79f687a3 932 if (ctx->pos == 0 || nfs_attribute_cache_expired(inode))
07b5ce8e 933 res = nfs_revalidate_mapping(inode, file->f_mapping);
fccca7fc
TM
934 if (res < 0)
935 goto out;
936
47c716cb 937 do {
1da177e4 938 res = readdir_search_pagecache(desc);
00a92642 939
1da177e4 940 if (res == -EBADCOOKIE) {
ece0b423 941 res = 0;
1da177e4 942 /* This means either end of directory */
6089dd0d 943 if (*desc->dir_cookie && !desc->eof) {
1da177e4 944 /* Or that the server has 'lost' a cookie */
23db8620 945 res = uncached_readdir(desc);
ece0b423 946 if (res == 0)
1da177e4
LT
947 continue;
948 }
1da177e4
LT
949 break;
950 }
951 if (res == -ETOOSMALL && desc->plus) {
3a10c30a 952 clear_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(inode)->flags);
1da177e4 953 nfs_zap_caches(inode);
baf57a09 954 desc->page_index = 0;
a7a3b1e9
BC
955 desc->plus = false;
956 desc->eof = false;
1da177e4
LT
957 continue;
958 }
959 if (res < 0)
960 break;
961
23db8620 962 res = nfs_do_filldir(desc);
ece0b423 963 if (res < 0)
1da177e4 964 break;
47c716cb 965 } while (!desc->eof);
fccca7fc 966out:
be4c2d47 967 nfs_readdir_free_pages(desc->pvec.pages, max_rapages);
1e7cb3dc
CL
968 if (res > 0)
969 res = 0;
6de1472f 970 dfprintk(FILE, "NFS: readdir(%pD2) returns %d\n", file, res);
1e7cb3dc 971 return res;
1da177e4
LT
972}
973
965c8e59 974static loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int whence)
f0dd2136 975{
b044f645 976 struct inode *inode = file_inode(filp);
480c2006 977 struct nfs_open_dir_context *dir_ctx = filp->private_data;
b84e06c5 978
6de1472f
AV
979 dfprintk(FILE, "NFS: llseek dir(%pD2, %lld, %d)\n",
980 filp, offset, whence);
b84e06c5 981
965c8e59 982 switch (whence) {
b2b1ff3d
TM
983 default:
984 return -EINVAL;
985 case SEEK_SET:
986 if (offset < 0)
987 return -EINVAL;
988 inode_lock(inode);
989 break;
990 case SEEK_CUR:
991 if (offset == 0)
992 return filp->f_pos;
993 inode_lock(inode);
994 offset += filp->f_pos;
995 if (offset < 0) {
996 inode_unlock(inode);
997 return -EINVAL;
998 }
f0dd2136
TM
999 }
1000 if (offset != filp->f_pos) {
1001 filp->f_pos = offset;
480c2006 1002 dir_ctx->dir_cookie = 0;
8ef2ce3e 1003 dir_ctx->duped = 0;
f0dd2136 1004 }
b044f645 1005 inode_unlock(inode);
f0dd2136
TM
1006 return offset;
1007}
1008
1da177e4
LT
1009/*
1010 * All directory operations under NFS are synchronous, so fsync()
1011 * is a dummy operation.
1012 */
02c24a82
JB
1013static int nfs_fsync_dir(struct file *filp, loff_t start, loff_t end,
1014 int datasync)
1da177e4 1015{
6de1472f 1016 struct inode *inode = file_inode(filp);
7ea80859 1017
6de1472f 1018 dfprintk(FILE, "NFS: fsync dir(%pD2) datasync %d\n", filp, datasync);
1e7cb3dc 1019
5955102c 1020 inode_lock(inode);
6de1472f 1021 nfs_inc_stats(inode, NFSIOS_VFSFSYNC);
5955102c 1022 inode_unlock(inode);
1da177e4
LT
1023 return 0;
1024}
1025
bfc69a45
TM
1026/**
1027 * nfs_force_lookup_revalidate - Mark the directory as having changed
302fad7b 1028 * @dir: pointer to directory inode
bfc69a45
TM
1029 *
1030 * This forces the revalidation code in nfs_lookup_revalidate() to do a
1031 * full lookup on all child dentries of 'dir' whenever a change occurs
1032 * on the server that might have invalidated our dcache.
1033 *
1034 * The caller should be holding dir->i_lock
1035 */
1036void nfs_force_lookup_revalidate(struct inode *dir)
1037{
011935a0 1038 NFS_I(dir)->cache_change_attribute++;
bfc69a45 1039}
89d77c8f 1040EXPORT_SYMBOL_GPL(nfs_force_lookup_revalidate);
bfc69a45 1041
1da177e4
LT
1042/*
1043 * A check for whether or not the parent directory has changed.
1044 * In the case it has, we assume that the dentries are untrustworthy
1045 * and may need to be looked up again.
912a108d 1046 * If rcu_walk prevents us from performing a full check, return 0.
1da177e4 1047 */
912a108d
N
1048static int nfs_check_verifier(struct inode *dir, struct dentry *dentry,
1049 int rcu_walk)
1da177e4
LT
1050{
1051 if (IS_ROOT(dentry))
1052 return 1;
4eec952e
TM
1053 if (NFS_SERVER(dir)->flags & NFS_MOUNT_LOOKUP_CACHE_NONE)
1054 return 0;
f2c77f4e
TM
1055 if (!nfs_verify_change_attribute(dir, dentry->d_time))
1056 return 0;
1057 /* Revalidate nfsi->cache_change_attribute before we declare a match */
1cd9cb05
TM
1058 if (nfs_mapping_need_revalidate_inode(dir)) {
1059 if (rcu_walk)
1060 return 0;
1061 if (__nfs_revalidate_inode(NFS_SERVER(dir), dir) < 0)
1062 return 0;
1063 }
f2c77f4e
TM
1064 if (!nfs_verify_change_attribute(dir, dentry->d_time))
1065 return 0;
1066 return 1;
1da177e4
LT
1067}
1068
a12802ca
TM
1069/*
1070 * Use intent information to check whether or not we're going to do
1071 * an O_EXCL create using this path component.
1072 */
fa3c56bb 1073static int nfs_is_exclusive_create(struct inode *dir, unsigned int flags)
a12802ca
TM
1074{
1075 if (NFS_PROTO(dir)->version == 2)
1076 return 0;
fa3c56bb 1077 return flags & LOOKUP_EXCL;
a12802ca
TM
1078}
1079
1d6757fb
TM
1080/*
1081 * Inode and filehandle revalidation for lookups.
1082 *
1083 * We force revalidation in the cases where the VFS sets LOOKUP_REVAL,
1084 * or if the intent information indicates that we're about to open this
1085 * particular file and the "nocto" mount flag is not set.
1086 *
1087 */
65a0c149 1088static
fa3c56bb 1089int nfs_lookup_verify_inode(struct inode *inode, unsigned int flags)
1da177e4
LT
1090{
1091 struct nfs_server *server = NFS_SERVER(inode);
65a0c149 1092 int ret;
1da177e4 1093
36d43a43 1094 if (IS_AUTOMOUNT(inode))
4e99a1ff 1095 return 0;
47921921
TM
1096
1097 if (flags & LOOKUP_OPEN) {
1098 switch (inode->i_mode & S_IFMT) {
1099 case S_IFREG:
1100 /* A NFSv4 OPEN will revalidate later */
1101 if (server->caps & NFS_CAP_ATOMIC_OPEN)
1102 goto out;
1103 /* Fallthrough */
1104 case S_IFDIR:
1105 if (server->flags & NFS_MOUNT_NOCTO)
1106 break;
1107 /* NFS close-to-open cache consistency validation */
1108 goto out_force;
1109 }
1110 }
1111
facc3530 1112 /* VFS wants an on-the-wire revalidation */
fa3c56bb 1113 if (flags & LOOKUP_REVAL)
facc3530 1114 goto out_force;
65a0c149 1115out:
a61246c9 1116 return (inode->i_nlink == 0) ? -ESTALE : 0;
1da177e4 1117out_force:
1fa1e384
N
1118 if (flags & LOOKUP_RCU)
1119 return -ECHILD;
65a0c149
TM
1120 ret = __nfs_revalidate_inode(server, inode);
1121 if (ret != 0)
1122 return ret;
1123 goto out;
1da177e4
LT
1124}
1125
1126/*
1127 * We judge how long we want to trust negative
1128 * dentries by looking at the parent inode mtime.
1129 *
1130 * If parent mtime has changed, we revalidate, else we wait for a
1131 * period corresponding to the parent's attribute cache timeout value.
912a108d
N
1132 *
1133 * If LOOKUP_RCU prevents us from performing a full check, return 1
1134 * suggesting a reval is needed.
9f6d44d4
TM
1135 *
1136 * Note that when creating a new file, or looking up a rename target,
1137 * then it shouldn't be necessary to revalidate a negative dentry.
1da177e4
LT
1138 */
1139static inline
1140int nfs_neg_need_reval(struct inode *dir, struct dentry *dentry,
fa3c56bb 1141 unsigned int flags)
1da177e4 1142{
9f6d44d4 1143 if (flags & (LOOKUP_CREATE | LOOKUP_RENAME_TARGET))
1da177e4 1144 return 0;
4eec952e
TM
1145 if (NFS_SERVER(dir)->flags & NFS_MOUNT_LOOKUP_CACHE_NONEG)
1146 return 1;
912a108d 1147 return !nfs_check_verifier(dir, dentry, flags & LOOKUP_RCU);
1da177e4
LT
1148}
1149
5ceb9d7f
TM
1150static int
1151nfs_lookup_revalidate_done(struct inode *dir, struct dentry *dentry,
1152 struct inode *inode, int error)
1153{
1154 switch (error) {
1155 case 1:
1156 dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) is valid\n",
1157 __func__, dentry);
1158 return 1;
1159 case 0:
1160 nfs_mark_for_revalidate(dir);
1161 if (inode && S_ISDIR(inode->i_mode)) {
1162 /* Purge readdir caches. */
1163 nfs_zap_caches(inode);
1164 /*
1165 * We can't d_drop the root of a disconnected tree:
1166 * its d_hash is on the s_anon list and d_drop() would hide
1167 * it from shrink_dcache_for_unmount(), leading to busy
1168 * inodes on unmount and further oopses.
1169 */
1170 if (IS_ROOT(dentry))
1171 return 1;
1172 }
1173 dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) is invalid\n",
1174 __func__, dentry);
1175 return 0;
1176 }
1177 dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) lookup returned error %d\n",
1178 __func__, dentry, error);
1179 return error;
1180}
1181
1182static int
1183nfs_lookup_revalidate_negative(struct inode *dir, struct dentry *dentry,
1184 unsigned int flags)
1185{
1186 int ret = 1;
1187 if (nfs_neg_need_reval(dir, dentry, flags)) {
1188 if (flags & LOOKUP_RCU)
1189 return -ECHILD;
1190 ret = 0;
1191 }
1192 return nfs_lookup_revalidate_done(dir, dentry, NULL, ret);
1193}
1194
1195static int
1196nfs_lookup_revalidate_delegated(struct inode *dir, struct dentry *dentry,
1197 struct inode *inode)
1198{
1199 nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
1200 return nfs_lookup_revalidate_done(dir, dentry, inode, 1);
1201}
1202
1203static int
1204nfs_lookup_revalidate_dentry(struct inode *dir, struct dentry *dentry,
1205 struct inode *inode)
1206{
1207 struct nfs_fh *fhandle;
1208 struct nfs_fattr *fattr;
1209 struct nfs4_label *label;
1210 int ret;
1211
1212 ret = -ENOMEM;
1213 fhandle = nfs_alloc_fhandle();
1214 fattr = nfs_alloc_fattr();
1215 label = nfs4_label_alloc(NFS_SERVER(inode), GFP_KERNEL);
1216 if (fhandle == NULL || fattr == NULL || IS_ERR(label))
1217 goto out;
1218
1219 ret = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr, label);
1220 if (ret < 0) {
1221 if (ret == -ESTALE || ret == -ENOENT)
1222 ret = 0;
1223 goto out;
1224 }
1225 ret = 0;
1226 if (nfs_compare_fh(NFS_FH(inode), fhandle))
1227 goto out;
1228 if (nfs_refresh_inode(inode, fattr) < 0)
1229 goto out;
1230
1231 nfs_setsecurity(inode, fattr, label);
1232 nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
1233
1234 /* set a readdirplus hint that we had a cache miss */
1235 nfs_force_use_readdirplus(dir);
1236 ret = 1;
1237out:
1238 nfs_free_fattr(fattr);
1239 nfs_free_fhandle(fhandle);
1240 nfs4_label_free(label);
1241 return nfs_lookup_revalidate_done(dir, dentry, inode, ret);
1242}
1243
1da177e4
LT
1244/*
1245 * This is called every time the dcache has a lookup hit,
1246 * and we should check whether we can really trust that
1247 * lookup.
1248 *
1249 * NOTE! The hit can be a negative hit too, don't assume
1250 * we have an inode!
1251 *
1252 * If the parent directory is seen to have changed, we throw out the
1253 * cached dentry and do a new lookup.
1254 */
5ceb9d7f
TM
1255static int
1256nfs_do_lookup_revalidate(struct inode *dir, struct dentry *dentry,
1257 unsigned int flags)
1da177e4 1258{
1da177e4 1259 struct inode *inode;
1da177e4 1260 int error;
1da177e4 1261
91d5b470 1262 nfs_inc_stats(dir, NFSIOS_DENTRYREVALIDATE);
2b0143b5 1263 inode = d_inode(dentry);
1da177e4 1264
5ceb9d7f
TM
1265 if (!inode)
1266 return nfs_lookup_revalidate_negative(dir, dentry, flags);
1da177e4
LT
1267
1268 if (is_bad_inode(inode)) {
6de1472f
AV
1269 dfprintk(LOOKUPCACHE, "%s: %pd2 has dud inode\n",
1270 __func__, dentry);
1da177e4
LT
1271 goto out_bad;
1272 }
1273
011e2a7f 1274 if (NFS_PROTO(dir)->have_delegation(inode, FMODE_READ))
5ceb9d7f 1275 return nfs_lookup_revalidate_delegated(dir, dentry, inode);
15860ab1 1276
1da177e4 1277 /* Force a full look up iff the parent directory has changed */
73dd684a 1278 if (!(flags & (LOOKUP_EXCL | LOOKUP_REVAL)) &&
912a108d 1279 nfs_check_verifier(dir, dentry, flags & LOOKUP_RCU)) {
cc89684c
N
1280 error = nfs_lookup_verify_inode(inode, flags);
1281 if (error) {
cc89684c 1282 if (error == -ESTALE)
5ceb9d7f
TM
1283 nfs_zap_caches(dir);
1284 goto out_bad;
1fa1e384 1285 }
63519fbc 1286 nfs_advise_use_readdirplus(dir);
1da177e4
LT
1287 goto out_valid;
1288 }
1289
912a108d
N
1290 if (flags & LOOKUP_RCU)
1291 return -ECHILD;
1292
1da177e4
LT
1293 if (NFS_STALE(inode))
1294 goto out_bad;
1295
6e0d0be7 1296 trace_nfs_lookup_revalidate_enter(dir, dentry, flags);
5ceb9d7f 1297 error = nfs_lookup_revalidate_dentry(dir, dentry, inode);
6e0d0be7 1298 trace_nfs_lookup_revalidate_exit(dir, dentry, flags, error);
5ceb9d7f
TM
1299 return error;
1300out_valid:
1301 return nfs_lookup_revalidate_done(dir, dentry, inode, 1);
1302out_bad:
1303 if (flags & LOOKUP_RCU)
1304 return -ECHILD;
1305 return nfs_lookup_revalidate_done(dir, dentry, inode, 0);
1306}
14c43f76 1307
5ceb9d7f 1308static int
c7944ebb
TM
1309__nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags,
1310 int (*reval)(struct inode *, struct dentry *, unsigned int))
5ceb9d7f
TM
1311{
1312 struct dentry *parent;
1313 struct inode *dir;
1314 int ret;
63519fbc 1315
d51ac1a8 1316 if (flags & LOOKUP_RCU) {
5ceb9d7f
TM
1317 parent = READ_ONCE(dentry->d_parent);
1318 dir = d_inode_rcu(parent);
1319 if (!dir)
1320 return -ECHILD;
c7944ebb 1321 ret = reval(dir, dentry, flags);
6aa7de05 1322 if (parent != READ_ONCE(dentry->d_parent))
d51ac1a8 1323 return -ECHILD;
5ceb9d7f
TM
1324 } else {
1325 parent = dget_parent(dentry);
c7944ebb 1326 ret = reval(d_inode(parent), dentry, flags);
d51ac1a8 1327 dput(parent);
1da177e4 1328 }
5ceb9d7f 1329 return ret;
1da177e4
LT
1330}
1331
c7944ebb
TM
1332static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags)
1333{
1334 return __nfs_lookup_revalidate(dentry, flags, nfs_do_lookup_revalidate);
1335}
1336
ecf3d1f1 1337/*
2b0143b5 1338 * A weaker form of d_revalidate for revalidating just the d_inode(dentry)
ecf3d1f1
JL
1339 * when we don't really care about the dentry name. This is called when a
1340 * pathwalk ends on a dentry that was not found via a normal lookup in the
1341 * parent dir (e.g.: ".", "..", procfs symlinks or mountpoint traversals).
1342 *
1343 * In this situation, we just want to verify that the inode itself is OK
1344 * since the dentry might have changed on the server.
1345 */
1346static int nfs_weak_revalidate(struct dentry *dentry, unsigned int flags)
1347{
2b0143b5 1348 struct inode *inode = d_inode(dentry);
9cdd1d3f 1349 int error = 0;
ecf3d1f1
JL
1350
1351 /*
1352 * I believe we can only get a negative dentry here in the case of a
1353 * procfs-style symlink. Just assume it's correct for now, but we may
1354 * eventually need to do something more here.
1355 */
1356 if (!inode) {
6de1472f
AV
1357 dfprintk(LOOKUPCACHE, "%s: %pd2 has negative inode\n",
1358 __func__, dentry);
ecf3d1f1
JL
1359 return 1;
1360 }
1361
1362 if (is_bad_inode(inode)) {
6de1472f
AV
1363 dfprintk(LOOKUPCACHE, "%s: %pd2 has dud inode\n",
1364 __func__, dentry);
ecf3d1f1
JL
1365 return 0;
1366 }
1367
b688741c 1368 error = nfs_lookup_verify_inode(inode, flags);
ecf3d1f1
JL
1369 dfprintk(LOOKUPCACHE, "NFS: %s: inode %lu is %s\n",
1370 __func__, inode->i_ino, error ? "invalid" : "valid");
1371 return !error;
1372}
1373
1da177e4
LT
1374/*
1375 * This is called from dput() when d_count is going to 0.
1376 */
fe15ce44 1377static int nfs_dentry_delete(const struct dentry *dentry)
1da177e4 1378{
6de1472f
AV
1379 dfprintk(VFS, "NFS: dentry_delete(%pd2, %x)\n",
1380 dentry, dentry->d_flags);
1da177e4 1381
77f11192 1382 /* Unhash any dentry with a stale inode */
2b0143b5 1383 if (d_really_is_positive(dentry) && NFS_STALE(d_inode(dentry)))
77f11192
TM
1384 return 1;
1385
1da177e4
LT
1386 if (dentry->d_flags & DCACHE_NFSFS_RENAMED) {
1387 /* Unhash it, so that ->d_iput() would be called */
1388 return 1;
1389 }
1751e8a6 1390 if (!(dentry->d_sb->s_flags & SB_ACTIVE)) {
1da177e4
LT
1391 /* Unhash it, so that ancestors of killed async unlink
1392 * files will be cleaned up during umount */
1393 return 1;
1394 }
1395 return 0;
1396
1397}
1398
1f018458 1399/* Ensure that we revalidate inode->i_nlink */
1b83d707
TM
1400static void nfs_drop_nlink(struct inode *inode)
1401{
1402 spin_lock(&inode->i_lock);
1f018458 1403 /* drop the inode if we're reasonably sure this is the last link */
59a707b0
TM
1404 if (inode->i_nlink > 0)
1405 drop_nlink(inode);
1406 NFS_I(inode)->attr_gencount = nfs_inc_attr_generation_counter();
16e14375
TM
1407 NFS_I(inode)->cache_validity |= NFS_INO_INVALID_CHANGE
1408 | NFS_INO_INVALID_CTIME
59a707b0
TM
1409 | NFS_INO_INVALID_OTHER
1410 | NFS_INO_REVAL_FORCED;
1b83d707
TM
1411 spin_unlock(&inode->i_lock);
1412}
1413
1da177e4
LT
1414/*
1415 * Called when the dentry loses inode.
1416 * We use it to clean up silly-renamed files.
1417 */
1418static void nfs_dentry_iput(struct dentry *dentry, struct inode *inode)
1419{
83672d39
NB
1420 if (S_ISDIR(inode->i_mode))
1421 /* drop any readdir cache as it could easily be old */
1422 NFS_I(inode)->cache_validity |= NFS_INO_INVALID_DATA;
1423
1da177e4 1424 if (dentry->d_flags & DCACHE_NFSFS_RENAMED) {
e4eff1a6 1425 nfs_complete_unlink(dentry, inode);
1f018458 1426 nfs_drop_nlink(inode);
1da177e4 1427 }
1da177e4
LT
1428 iput(inode);
1429}
1430
b1942c5f
AV
1431static void nfs_d_release(struct dentry *dentry)
1432{
1433 /* free cached devname value, if it survived that far */
1434 if (unlikely(dentry->d_fsdata)) {
1435 if (dentry->d_flags & DCACHE_NFSFS_RENAMED)
1436 WARN_ON(1);
1437 else
1438 kfree(dentry->d_fsdata);
1439 }
1440}
1441
f786aa90 1442const struct dentry_operations nfs_dentry_operations = {
1da177e4 1443 .d_revalidate = nfs_lookup_revalidate,
ecf3d1f1 1444 .d_weak_revalidate = nfs_weak_revalidate,
1da177e4
LT
1445 .d_delete = nfs_dentry_delete,
1446 .d_iput = nfs_dentry_iput,
36d43a43 1447 .d_automount = nfs_d_automount,
b1942c5f 1448 .d_release = nfs_d_release,
1da177e4 1449};
ddda8e0a 1450EXPORT_SYMBOL_GPL(nfs_dentry_operations);
1da177e4 1451
597d9289 1452struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, unsigned int flags)
1da177e4
LT
1453{
1454 struct dentry *res;
1455 struct inode *inode = NULL;
e1fb4d05
TM
1456 struct nfs_fh *fhandle = NULL;
1457 struct nfs_fattr *fattr = NULL;
1775fd3e 1458 struct nfs4_label *label = NULL;
1da177e4 1459 int error;
1da177e4 1460
6de1472f 1461 dfprintk(VFS, "NFS: lookup(%pd2)\n", dentry);
91d5b470 1462 nfs_inc_stats(dir, NFSIOS_VFSLOOKUP);
1da177e4 1463
130f9ab7
AV
1464 if (unlikely(dentry->d_name.len > NFS_SERVER(dir)->namelen))
1465 return ERR_PTR(-ENAMETOOLONG);
1da177e4 1466
fd684071
TM
1467 /*
1468 * If we're doing an exclusive create, optimize away the lookup
1469 * but don't hash the dentry.
1470 */
9f6d44d4 1471 if (nfs_is_exclusive_create(dir, flags) || flags & LOOKUP_RENAME_TARGET)
130f9ab7 1472 return NULL;
1da177e4 1473
e1fb4d05
TM
1474 res = ERR_PTR(-ENOMEM);
1475 fhandle = nfs_alloc_fhandle();
1476 fattr = nfs_alloc_fattr();
1477 if (fhandle == NULL || fattr == NULL)
1478 goto out;
1479
14c43f76
DQ
1480 label = nfs4_label_alloc(NFS_SERVER(dir), GFP_NOWAIT);
1481 if (IS_ERR(label))
1482 goto out;
1483
6e0d0be7 1484 trace_nfs_lookup_enter(dir, dentry, flags);
1775fd3e 1485 error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr, label);
1da177e4
LT
1486 if (error == -ENOENT)
1487 goto no_entry;
1488 if (error < 0) {
1489 res = ERR_PTR(error);
bf130914 1490 goto out_label;
1da177e4 1491 }
1775fd3e 1492 inode = nfs_fhget(dentry->d_sb, fhandle, fattr, label);
bf0c84f1 1493 res = ERR_CAST(inode);
03f28e3a 1494 if (IS_ERR(res))
bf130914 1495 goto out_label;
54ceac45 1496
63519fbc
TM
1497 /* Notify readdir to use READDIRPLUS */
1498 nfs_force_use_readdirplus(dir);
d69ee9b8 1499
1da177e4 1500no_entry:
41d28bca 1501 res = d_splice_alias(inode, dentry);
9eaef27b
TM
1502 if (res != NULL) {
1503 if (IS_ERR(res))
bf130914 1504 goto out_label;
1da177e4 1505 dentry = res;
9eaef27b 1506 }
1da177e4 1507 nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
bf130914 1508out_label:
6e0d0be7 1509 trace_nfs_lookup_exit(dir, dentry, flags, error);
14c43f76 1510 nfs4_label_free(label);
1da177e4 1511out:
e1fb4d05
TM
1512 nfs_free_fattr(fattr);
1513 nfs_free_fhandle(fhandle);
1da177e4
LT
1514 return res;
1515}
ddda8e0a 1516EXPORT_SYMBOL_GPL(nfs_lookup);
1da177e4 1517
89d77c8f 1518#if IS_ENABLED(CONFIG_NFS_V4)
0b728e19 1519static int nfs4_lookup_revalidate(struct dentry *, unsigned int);
1da177e4 1520
f786aa90 1521const struct dentry_operations nfs4_dentry_operations = {
0ef97dcf 1522 .d_revalidate = nfs4_lookup_revalidate,
b688741c 1523 .d_weak_revalidate = nfs_weak_revalidate,
1da177e4
LT
1524 .d_delete = nfs_dentry_delete,
1525 .d_iput = nfs_dentry_iput,
36d43a43 1526 .d_automount = nfs_d_automount,
b1942c5f 1527 .d_release = nfs_d_release,
1da177e4 1528};
89d77c8f 1529EXPORT_SYMBOL_GPL(nfs4_dentry_operations);
1da177e4 1530
8a5e929d
AV
1531static fmode_t flags_to_mode(int flags)
1532{
1533 fmode_t res = (__force fmode_t)flags & FMODE_EXEC;
1534 if ((flags & O_ACCMODE) != O_WRONLY)
1535 res |= FMODE_READ;
1536 if ((flags & O_ACCMODE) != O_RDONLY)
1537 res |= FMODE_WRITE;
1538 return res;
1539}
1540
532d4def 1541static struct nfs_open_context *create_nfs_open_context(struct dentry *dentry, int open_flags, struct file *filp)
cd9a1c0e 1542{
532d4def 1543 return alloc_nfs_open_context(dentry, flags_to_mode(open_flags), filp);
cd9a1c0e
TM
1544}
1545
1546static int do_open(struct inode *inode, struct file *filp)
1547{
f1fe29b4 1548 nfs_fscache_open_file(inode, filp);
cd9a1c0e
TM
1549 return 0;
1550}
1551
d9585277
AV
1552static int nfs_finish_open(struct nfs_open_context *ctx,
1553 struct dentry *dentry,
b452a458 1554 struct file *file, unsigned open_flags)
cd9a1c0e 1555{
0dd2b474
MS
1556 int err;
1557
be12af3e 1558 err = finish_open(file, dentry, do_open);
30d90494 1559 if (err)
d9585277 1560 goto out;
eaa2b82c
N
1561 if (S_ISREG(file->f_path.dentry->d_inode->i_mode))
1562 nfs_file_set_open_context(file, ctx);
1563 else
1564 err = -ESTALE;
cd9a1c0e 1565out:
d9585277 1566 return err;
cd9a1c0e
TM
1567}
1568
73a79706
BS
1569int nfs_atomic_open(struct inode *dir, struct dentry *dentry,
1570 struct file *file, unsigned open_flags,
44907d79 1571 umode_t mode)
1da177e4 1572{
c94c0953 1573 DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
cd9a1c0e 1574 struct nfs_open_context *ctx;
0dd2b474
MS
1575 struct dentry *res;
1576 struct iattr attr = { .ia_valid = ATTR_OPEN };
f46e0bd3 1577 struct inode *inode;
1472b83e 1578 unsigned int lookup_flags = 0;
c94c0953 1579 bool switched = false;
73a09dd9 1580 int created = 0;
898f635c 1581 int err;
1da177e4 1582
0dd2b474 1583 /* Expect a negative dentry */
2b0143b5 1584 BUG_ON(d_inode(dentry));
0dd2b474 1585
1e8968c5 1586 dfprintk(VFS, "NFS: atomic_open(%s/%lu), %pd\n",
6de1472f 1587 dir->i_sb->s_id, dir->i_ino, dentry);
1e7cb3dc 1588
9597c13b
JL
1589 err = nfs_check_flags(open_flags);
1590 if (err)
1591 return err;
1592
0dd2b474
MS
1593 /* NFS only supports OPEN on regular files */
1594 if ((open_flags & O_DIRECTORY)) {
00699ad8 1595 if (!d_in_lookup(dentry)) {
0dd2b474
MS
1596 /*
1597 * Hashed negative dentry with O_DIRECTORY: dentry was
1598 * revalidated and is fine, no need to perform lookup
1599 * again
1600 */
d9585277 1601 return -ENOENT;
0dd2b474 1602 }
1472b83e 1603 lookup_flags = LOOKUP_OPEN|LOOKUP_DIRECTORY;
1da177e4 1604 goto no_open;
02a913a7 1605 }
1da177e4 1606
0dd2b474 1607 if (dentry->d_name.len > NFS_SERVER(dir)->namelen)
d9585277 1608 return -ENAMETOOLONG;
cd9a1c0e 1609
0dd2b474 1610 if (open_flags & O_CREAT) {
dff25ddb
AG
1611 struct nfs_server *server = NFS_SERVER(dir);
1612
1613 if (!(server->attr_bitmask[2] & FATTR4_WORD2_MODE_UMASK))
1614 mode &= ~current_umask();
1615
536e43d1 1616 attr.ia_valid |= ATTR_MODE;
dff25ddb 1617 attr.ia_mode = mode;
0dd2b474 1618 }
536e43d1
TM
1619 if (open_flags & O_TRUNC) {
1620 attr.ia_valid |= ATTR_SIZE;
1621 attr.ia_size = 0;
cd9a1c0e
TM
1622 }
1623
c94c0953
AV
1624 if (!(open_flags & O_CREAT) && !d_in_lookup(dentry)) {
1625 d_drop(dentry);
1626 switched = true;
1627 dentry = d_alloc_parallel(dentry->d_parent,
1628 &dentry->d_name, &wq);
1629 if (IS_ERR(dentry))
1630 return PTR_ERR(dentry);
1631 if (unlikely(!d_in_lookup(dentry)))
1632 return finish_no_open(file, dentry);
1633 }
1634
532d4def 1635 ctx = create_nfs_open_context(dentry, open_flags, file);
0dd2b474
MS
1636 err = PTR_ERR(ctx);
1637 if (IS_ERR(ctx))
d9585277 1638 goto out;
0dd2b474 1639
6e0d0be7 1640 trace_nfs_atomic_open_enter(dir, ctx, open_flags);
73a09dd9
AV
1641 inode = NFS_PROTO(dir)->open_context(dir, ctx, open_flags, &attr, &created);
1642 if (created)
1643 file->f_mode |= FMODE_CREATED;
f46e0bd3 1644 if (IS_ERR(inode)) {
0dd2b474 1645 err = PTR_ERR(inode);
6e0d0be7 1646 trace_nfs_atomic_open_exit(dir, ctx, open_flags, err);
2d9db750 1647 put_nfs_open_context(ctx);
d20cb71d 1648 d_drop(dentry);
0dd2b474
MS
1649 switch (err) {
1650 case -ENOENT:
774d9513 1651 d_splice_alias(NULL, dentry);
809fd143 1652 nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
0dd2b474
MS
1653 break;
1654 case -EISDIR:
1655 case -ENOTDIR:
1656 goto no_open;
1657 case -ELOOP:
1658 if (!(open_flags & O_NOFOLLOW))
6f926b5b 1659 goto no_open;
0dd2b474 1660 break;
1da177e4 1661 /* case -EINVAL: */
0dd2b474
MS
1662 default:
1663 break;
1da177e4 1664 }
d9585277 1665 goto out;
cd9a1c0e 1666 }
0dd2b474 1667
b452a458 1668 err = nfs_finish_open(ctx, ctx->dentry, file, open_flags);
6e0d0be7 1669 trace_nfs_atomic_open_exit(dir, ctx, open_flags, err);
2d9db750 1670 put_nfs_open_context(ctx);
d9585277 1671out:
c94c0953
AV
1672 if (unlikely(switched)) {
1673 d_lookup_done(dentry);
1674 dput(dentry);
1675 }
d9585277 1676 return err;
0dd2b474 1677
1da177e4 1678no_open:
1472b83e 1679 res = nfs_lookup(dir, dentry, lookup_flags);
c94c0953
AV
1680 if (switched) {
1681 d_lookup_done(dentry);
1682 if (!res)
1683 res = dentry;
1684 else
1685 dput(dentry);
1686 }
0dd2b474 1687 if (IS_ERR(res))
c94c0953 1688 return PTR_ERR(res);
e45198a6 1689 return finish_no_open(file, res);
1da177e4 1690}
89d77c8f 1691EXPORT_SYMBOL_GPL(nfs_atomic_open);
1da177e4 1692
c7944ebb
TM
1693static int
1694nfs4_do_lookup_revalidate(struct inode *dir, struct dentry *dentry,
1695 unsigned int flags)
1da177e4 1696{
657e94b6 1697 struct inode *inode;
1da177e4 1698
fa3c56bb 1699 if (!(flags & LOOKUP_OPEN) || (flags & LOOKUP_DIRECTORY))
c7944ebb 1700 goto full_reval;
eda72afb 1701 if (d_mountpoint(dentry))
c7944ebb 1702 goto full_reval;
2b484297 1703
2b0143b5 1704 inode = d_inode(dentry);
2b484297 1705
1da177e4
LT
1706 /* We can't create new files in nfs_open_revalidate(), so we
1707 * optimize away revalidation of negative dentries.
1708 */
c7944ebb
TM
1709 if (inode == NULL)
1710 goto full_reval;
1711
1712 if (NFS_PROTO(dir)->have_delegation(inode, FMODE_READ))
1713 return nfs_lookup_revalidate_delegated(dir, dentry, inode);
216d5d06 1714
1da177e4
LT
1715 /* NFS only supports OPEN on regular files */
1716 if (!S_ISREG(inode->i_mode))
c7944ebb
TM
1717 goto full_reval;
1718
1da177e4 1719 /* We cannot do exclusive creation on a positive dentry */
c7944ebb
TM
1720 if (flags & (LOOKUP_EXCL | LOOKUP_REVAL))
1721 goto reval_dentry;
1722
1723 /* Check if the directory changed */
1724 if (!nfs_check_verifier(dir, dentry, flags & LOOKUP_RCU))
1725 goto reval_dentry;
1da177e4 1726
0ef97dcf 1727 /* Let f_op->open() actually open (and revalidate) the file */
c7944ebb
TM
1728 return 1;
1729reval_dentry:
1730 if (flags & LOOKUP_RCU)
1731 return -ECHILD;
42f72cf3 1732 return nfs_lookup_revalidate_dentry(dir, dentry, inode);
536e43d1 1733
c7944ebb
TM
1734full_reval:
1735 return nfs_do_lookup_revalidate(dir, dentry, flags);
1736}
535918f1 1737
c7944ebb
TM
1738static int nfs4_lookup_revalidate(struct dentry *dentry, unsigned int flags)
1739{
1740 return __nfs_lookup_revalidate(dentry, flags,
1741 nfs4_do_lookup_revalidate);
c0204fd2
TM
1742}
1743
1da177e4
LT
1744#endif /* CONFIG_NFSV4 */
1745
1da177e4
LT
1746/*
1747 * Code common to create, mkdir, and mknod.
1748 */
1749int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fhandle,
1775fd3e
DQ
1750 struct nfs_fattr *fattr,
1751 struct nfs4_label *label)
1da177e4 1752{
fab728e1 1753 struct dentry *parent = dget_parent(dentry);
2b0143b5 1754 struct inode *dir = d_inode(parent);
1da177e4 1755 struct inode *inode;
b0c6108e 1756 struct dentry *d;
1da177e4
LT
1757 int error = -EACCES;
1758
fab728e1
TM
1759 d_drop(dentry);
1760
1da177e4 1761 /* We may have been initialized further down */
2b0143b5 1762 if (d_really_is_positive(dentry))
fab728e1 1763 goto out;
1da177e4 1764 if (fhandle->size == 0) {
1775fd3e 1765 error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr, NULL);
1da177e4 1766 if (error)
fab728e1 1767 goto out_error;
1da177e4 1768 }
5724ab37 1769 nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
1da177e4
LT
1770 if (!(fattr->valid & NFS_ATTR_FATTR)) {
1771 struct nfs_server *server = NFS_SB(dentry->d_sb);
a841b54d
TM
1772 error = server->nfs_client->rpc_ops->getattr(server, fhandle,
1773 fattr, NULL, NULL);
1da177e4 1774 if (error < 0)
fab728e1 1775 goto out_error;
1da177e4 1776 }
1775fd3e 1777 inode = nfs_fhget(dentry->d_sb, fhandle, fattr, label);
b0c6108e
AV
1778 d = d_splice_alias(inode, dentry);
1779 if (IS_ERR(d)) {
1780 error = PTR_ERR(d);
fab728e1 1781 goto out_error;
b0c6108e
AV
1782 }
1783 dput(d);
fab728e1
TM
1784out:
1785 dput(parent);
1da177e4 1786 return 0;
fab728e1
TM
1787out_error:
1788 nfs_mark_for_revalidate(dir);
1789 dput(parent);
1790 return error;
1da177e4 1791}
ddda8e0a 1792EXPORT_SYMBOL_GPL(nfs_instantiate);
1da177e4
LT
1793
1794/*
1795 * Following a failed create operation, we drop the dentry rather
1796 * than retain a negative dentry. This avoids a problem in the event
1797 * that the operation succeeded on the server, but an error in the
1798 * reply path made it appear to have failed.
1799 */
597d9289 1800int nfs_create(struct inode *dir, struct dentry *dentry,
ebfc3b49 1801 umode_t mode, bool excl)
1da177e4
LT
1802{
1803 struct iattr attr;
ebfc3b49 1804 int open_flags = excl ? O_CREAT | O_EXCL : O_CREAT;
1da177e4 1805 int error;
1da177e4 1806
1e8968c5 1807 dfprintk(VFS, "NFS: create(%s/%lu), %pd\n",
6de1472f 1808 dir->i_sb->s_id, dir->i_ino, dentry);
1da177e4
LT
1809
1810 attr.ia_mode = mode;
1811 attr.ia_valid = ATTR_MODE;
1812
8b0ad3d4 1813 trace_nfs_create_enter(dir, dentry, open_flags);
8867fe58 1814 error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags);
8b0ad3d4 1815 trace_nfs_create_exit(dir, dentry, open_flags, error);
1da177e4
LT
1816 if (error != 0)
1817 goto out_err;
1da177e4
LT
1818 return 0;
1819out_err:
1da177e4
LT
1820 d_drop(dentry);
1821 return error;
1822}
ddda8e0a 1823EXPORT_SYMBOL_GPL(nfs_create);
1da177e4
LT
1824
1825/*
1826 * See comments for nfs_proc_create regarding failed operations.
1827 */
597d9289 1828int
1a67aafb 1829nfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rdev)
1da177e4
LT
1830{
1831 struct iattr attr;
1832 int status;
1833
1e8968c5 1834 dfprintk(VFS, "NFS: mknod(%s/%lu), %pd\n",
6de1472f 1835 dir->i_sb->s_id, dir->i_ino, dentry);
1da177e4 1836
1da177e4
LT
1837 attr.ia_mode = mode;
1838 attr.ia_valid = ATTR_MODE;
1839
1ca42382 1840 trace_nfs_mknod_enter(dir, dentry);
1da177e4 1841 status = NFS_PROTO(dir)->mknod(dir, dentry, &attr, rdev);
1ca42382 1842 trace_nfs_mknod_exit(dir, dentry, status);
1da177e4
LT
1843 if (status != 0)
1844 goto out_err;
1da177e4
LT
1845 return 0;
1846out_err:
1da177e4
LT
1847 d_drop(dentry);
1848 return status;
1849}
ddda8e0a 1850EXPORT_SYMBOL_GPL(nfs_mknod);
1da177e4
LT
1851
1852/*
1853 * See comments for nfs_proc_create regarding failed operations.
1854 */
597d9289 1855int nfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
1da177e4
LT
1856{
1857 struct iattr attr;
1858 int error;
1859
1e8968c5 1860 dfprintk(VFS, "NFS: mkdir(%s/%lu), %pd\n",
6de1472f 1861 dir->i_sb->s_id, dir->i_ino, dentry);
1da177e4
LT
1862
1863 attr.ia_valid = ATTR_MODE;
1864 attr.ia_mode = mode | S_IFDIR;
1865
1ca42382 1866 trace_nfs_mkdir_enter(dir, dentry);
1da177e4 1867 error = NFS_PROTO(dir)->mkdir(dir, dentry, &attr);
1ca42382 1868 trace_nfs_mkdir_exit(dir, dentry, error);
1da177e4
LT
1869 if (error != 0)
1870 goto out_err;
1da177e4
LT
1871 return 0;
1872out_err:
1873 d_drop(dentry);
1da177e4
LT
1874 return error;
1875}
ddda8e0a 1876EXPORT_SYMBOL_GPL(nfs_mkdir);
1da177e4 1877
d45b9d8b
TM
1878static void nfs_dentry_handle_enoent(struct dentry *dentry)
1879{
dc3f4198 1880 if (simple_positive(dentry))
d45b9d8b
TM
1881 d_delete(dentry);
1882}
1883
597d9289 1884int nfs_rmdir(struct inode *dir, struct dentry *dentry)
1da177e4
LT
1885{
1886 int error;
1887
1e8968c5 1888 dfprintk(VFS, "NFS: rmdir(%s/%lu), %pd\n",
6de1472f 1889 dir->i_sb->s_id, dir->i_ino, dentry);
1da177e4 1890
1ca42382 1891 trace_nfs_rmdir_enter(dir, dentry);
2b0143b5 1892 if (d_really_is_positive(dentry)) {
884be175 1893 down_write(&NFS_I(d_inode(dentry))->rmdir_sem);
ba6c0592
TM
1894 error = NFS_PROTO(dir)->rmdir(dir, &dentry->d_name);
1895 /* Ensure the VFS deletes this inode */
1896 switch (error) {
1897 case 0:
2b0143b5 1898 clear_nlink(d_inode(dentry));
ba6c0592
TM
1899 break;
1900 case -ENOENT:
1901 nfs_dentry_handle_enoent(dentry);
1902 }
884be175 1903 up_write(&NFS_I(d_inode(dentry))->rmdir_sem);
ba6c0592
TM
1904 } else
1905 error = NFS_PROTO(dir)->rmdir(dir, &dentry->d_name);
1ca42382 1906 trace_nfs_rmdir_exit(dir, dentry, error);
1da177e4
LT
1907
1908 return error;
1909}
ddda8e0a 1910EXPORT_SYMBOL_GPL(nfs_rmdir);
1da177e4 1911
1da177e4
LT
1912/*
1913 * Remove a file after making sure there are no pending writes,
1914 * and after checking that the file has only one user.
1915 *
1916 * We invalidate the attribute cache and free the inode prior to the operation
1917 * to avoid possible races if the server reuses the inode.
1918 */
1919static int nfs_safe_remove(struct dentry *dentry)
1920{
2b0143b5
DH
1921 struct inode *dir = d_inode(dentry->d_parent);
1922 struct inode *inode = d_inode(dentry);
1da177e4
LT
1923 int error = -EBUSY;
1924
6de1472f 1925 dfprintk(VFS, "NFS: safe_remove(%pd2)\n", dentry);
1da177e4
LT
1926
1927 /* If the dentry was sillyrenamed, we simply call d_delete() */
1928 if (dentry->d_flags & DCACHE_NFSFS_RENAMED) {
1929 error = 0;
1930 goto out;
1931 }
1932
1ca42382 1933 trace_nfs_remove_enter(dir, dentry);
1da177e4 1934 if (inode != NULL) {
912678db 1935 error = NFS_PROTO(dir)->remove(dir, dentry);
1da177e4 1936 if (error == 0)
1b83d707 1937 nfs_drop_nlink(inode);
1da177e4 1938 } else
912678db 1939 error = NFS_PROTO(dir)->remove(dir, dentry);
d45b9d8b
TM
1940 if (error == -ENOENT)
1941 nfs_dentry_handle_enoent(dentry);
1ca42382 1942 trace_nfs_remove_exit(dir, dentry, error);
1da177e4
LT
1943out:
1944 return error;
1945}
1946
1947/* We do silly rename. In case sillyrename() returns -EBUSY, the inode
1948 * belongs to an active ".nfs..." file and we return -EBUSY.
1949 *
1950 * If sillyrename() returns 0, we do nothing, otherwise we unlink.
1951 */
597d9289 1952int nfs_unlink(struct inode *dir, struct dentry *dentry)
1da177e4
LT
1953{
1954 int error;
1955 int need_rehash = 0;
1956
1e8968c5 1957 dfprintk(VFS, "NFS: unlink(%s/%lu, %pd)\n", dir->i_sb->s_id,
6de1472f 1958 dir->i_ino, dentry);
1da177e4 1959
1ca42382 1960 trace_nfs_unlink_enter(dir, dentry);
1da177e4 1961 spin_lock(&dentry->d_lock);
84d08fa8 1962 if (d_count(dentry) > 1) {
1da177e4 1963 spin_unlock(&dentry->d_lock);
ccfeb506 1964 /* Start asynchronous writeout of the inode */
2b0143b5 1965 write_inode_now(d_inode(dentry), 0);
1da177e4 1966 error = nfs_sillyrename(dir, dentry);
1ca42382 1967 goto out;
1da177e4
LT
1968 }
1969 if (!d_unhashed(dentry)) {
1970 __d_drop(dentry);
1971 need_rehash = 1;
1972 }
1973 spin_unlock(&dentry->d_lock);
1da177e4 1974 error = nfs_safe_remove(dentry);
d45b9d8b 1975 if (!error || error == -ENOENT) {
1da177e4
LT
1976 nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
1977 } else if (need_rehash)
1978 d_rehash(dentry);
1ca42382
TM
1979out:
1980 trace_nfs_unlink_exit(dir, dentry, error);
1da177e4
LT
1981 return error;
1982}
ddda8e0a 1983EXPORT_SYMBOL_GPL(nfs_unlink);
1da177e4 1984
873101b3
CL
1985/*
1986 * To create a symbolic link, most file systems instantiate a new inode,
1987 * add a page to it containing the path, then write it out to the disk
1988 * using prepare_write/commit_write.
1989 *
1990 * Unfortunately the NFS client can't create the in-core inode first
1991 * because it needs a file handle to create an in-core inode (see
1992 * fs/nfs/inode.c:nfs_fhget). We only have a file handle *after* the
1993 * symlink request has completed on the server.
1994 *
1995 * So instead we allocate a raw page, copy the symname into it, then do
1996 * the SYMLINK request with the page as the buffer. If it succeeds, we
1997 * now have a new file handle and can instantiate an in-core NFS inode
1998 * and move the raw page into its mapping.
1999 */
597d9289 2000int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
1da177e4 2001{
873101b3
CL
2002 struct page *page;
2003 char *kaddr;
1da177e4 2004 struct iattr attr;
873101b3 2005 unsigned int pathlen = strlen(symname);
1da177e4
LT
2006 int error;
2007
1e8968c5 2008 dfprintk(VFS, "NFS: symlink(%s/%lu, %pd, %s)\n", dir->i_sb->s_id,
6de1472f 2009 dir->i_ino, dentry, symname);
1da177e4 2010
873101b3
CL
2011 if (pathlen > PAGE_SIZE)
2012 return -ENAMETOOLONG;
1da177e4 2013
873101b3
CL
2014 attr.ia_mode = S_IFLNK | S_IRWXUGO;
2015 attr.ia_valid = ATTR_MODE;
1da177e4 2016
e8ecde25 2017 page = alloc_page(GFP_USER);
76566991 2018 if (!page)
873101b3 2019 return -ENOMEM;
873101b3 2020
e8ecde25 2021 kaddr = page_address(page);
873101b3
CL
2022 memcpy(kaddr, symname, pathlen);
2023 if (pathlen < PAGE_SIZE)
2024 memset(kaddr + pathlen, 0, PAGE_SIZE - pathlen);
873101b3 2025
1ca42382 2026 trace_nfs_symlink_enter(dir, dentry);
94a6d753 2027 error = NFS_PROTO(dir)->symlink(dir, dentry, page, pathlen, &attr);
1ca42382 2028 trace_nfs_symlink_exit(dir, dentry, error);
873101b3 2029 if (error != 0) {
1e8968c5 2030 dfprintk(VFS, "NFS: symlink(%s/%lu, %pd, %s) error %d\n",
873101b3 2031 dir->i_sb->s_id, dir->i_ino,
6de1472f 2032 dentry, symname, error);
1da177e4 2033 d_drop(dentry);
873101b3 2034 __free_page(page);
873101b3
CL
2035 return error;
2036 }
2037
2038 /*
2039 * No big deal if we can't add this page to the page cache here.
2040 * READLINK will get the missing page from the server if needed.
2041 */
2b0143b5 2042 if (!add_to_page_cache_lru(page, d_inode(dentry)->i_mapping, 0,
873101b3 2043 GFP_KERNEL)) {
873101b3
CL
2044 SetPageUptodate(page);
2045 unlock_page(page);
a0b54add
RA
2046 /*
2047 * add_to_page_cache_lru() grabs an extra page refcount.
2048 * Drop it here to avoid leaking this page later.
2049 */
09cbfeaf 2050 put_page(page);
873101b3
CL
2051 } else
2052 __free_page(page);
2053
873101b3 2054 return 0;
1da177e4 2055}
ddda8e0a 2056EXPORT_SYMBOL_GPL(nfs_symlink);
1da177e4 2057
597d9289 2058int
1da177e4
LT
2059nfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
2060{
2b0143b5 2061 struct inode *inode = d_inode(old_dentry);
1da177e4
LT
2062 int error;
2063
6de1472f
AV
2064 dfprintk(VFS, "NFS: link(%pd2 -> %pd2)\n",
2065 old_dentry, dentry);
1da177e4 2066
1fd1085b 2067 trace_nfs_link_enter(inode, dir, dentry);
9697d234 2068 d_drop(dentry);
1da177e4 2069 error = NFS_PROTO(dir)->link(inode, dir, &dentry->d_name);
cf809556 2070 if (error == 0) {
7de9c6ee 2071 ihold(inode);
9697d234 2072 d_add(dentry, inode);
cf809556 2073 }
1fd1085b 2074 trace_nfs_link_exit(inode, dir, dentry, error);
1da177e4
LT
2075 return error;
2076}
ddda8e0a 2077EXPORT_SYMBOL_GPL(nfs_link);
1da177e4
LT
2078
2079/*
2080 * RENAME
2081 * FIXME: Some nfsds, like the Linux user space nfsd, may generate a
2082 * different file handle for the same inode after a rename (e.g. when
2083 * moving to a different directory). A fail-safe method to do so would
2084 * be to look up old_dir/old_name, create a link to new_dir/new_name and
2085 * rename the old file using the sillyrename stuff. This way, the original
2086 * file in old_dir will go away when the last process iput()s the inode.
2087 *
2088 * FIXED.
2089 *
2090 * It actually works quite well. One needs to have the possibility for
2091 * at least one ".nfs..." file in each directory the file ever gets
2092 * moved or linked to which happens automagically with the new
2093 * implementation that only depends on the dcache stuff instead of
2094 * using the inode layer
2095 *
2096 * Unfortunately, things are a little more complicated than indicated
2097 * above. For a cross-directory move, we want to make sure we can get
2098 * rid of the old inode after the operation. This means there must be
2099 * no pending writes (if it's a file), and the use count must be 1.
2100 * If these conditions are met, we can drop the dentries before doing
2101 * the rename.
2102 */
597d9289 2103int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
1cd66c93
MS
2104 struct inode *new_dir, struct dentry *new_dentry,
2105 unsigned int flags)
1da177e4 2106{
2b0143b5
DH
2107 struct inode *old_inode = d_inode(old_dentry);
2108 struct inode *new_inode = d_inode(new_dentry);
d9f29500 2109 struct dentry *dentry = NULL, *rehash = NULL;
80a491fd 2110 struct rpc_task *task;
1da177e4
LT
2111 int error = -EBUSY;
2112
1cd66c93
MS
2113 if (flags)
2114 return -EINVAL;
2115
6de1472f
AV
2116 dfprintk(VFS, "NFS: rename(%pd2 -> %pd2, ct=%d)\n",
2117 old_dentry, new_dentry,
84d08fa8 2118 d_count(new_dentry));
1da177e4 2119
70ded201 2120 trace_nfs_rename_enter(old_dir, old_dentry, new_dir, new_dentry);
1da177e4 2121 /*
28f79a1a
MS
2122 * For non-directories, check whether the target is busy and if so,
2123 * make a copy of the dentry and then do a silly-rename. If the
2124 * silly-rename succeeds, the copied dentry is hashed and becomes
2125 * the new target.
1da177e4 2126 */
27226104
MS
2127 if (new_inode && !S_ISDIR(new_inode->i_mode)) {
2128 /*
2129 * To prevent any new references to the target during the
2130 * rename, we unhash the dentry in advance.
2131 */
d9f29500 2132 if (!d_unhashed(new_dentry)) {
27226104 2133 d_drop(new_dentry);
d9f29500
BC
2134 rehash = new_dentry;
2135 }
1da177e4 2136
84d08fa8 2137 if (d_count(new_dentry) > 2) {
27226104
MS
2138 int err;
2139
2140 /* copy the target dentry's name */
2141 dentry = d_alloc(new_dentry->d_parent,
2142 &new_dentry->d_name);
2143 if (!dentry)
2144 goto out;
2145
2146 /* silly-rename the existing target ... */
2147 err = nfs_sillyrename(new_dir, new_dentry);
24e93025 2148 if (err)
27226104 2149 goto out;
24e93025
MS
2150
2151 new_dentry = dentry;
d9f29500 2152 rehash = NULL;
24e93025 2153 new_inode = NULL;
27226104 2154 }
b1e4adf4 2155 }
1da177e4 2156
d9f29500 2157 task = nfs_async_rename(old_dir, new_dir, old_dentry, new_dentry, NULL);
80a491fd
JL
2158 if (IS_ERR(task)) {
2159 error = PTR_ERR(task);
2160 goto out;
2161 }
2162
2163 error = rpc_wait_for_completion_task(task);
818a8dbe
BC
2164 if (error != 0) {
2165 ((struct nfs_renamedata *)task->tk_calldata)->cancelled = 1;
2166 /* Paired with the atomic_dec_and_test() barrier in rpc_do_put_task() */
2167 smp_wmb();
2168 } else
80a491fd
JL
2169 error = task->tk_status;
2170 rpc_put_task(task);
59a707b0
TM
2171 /* Ensure the inode attributes are revalidated */
2172 if (error == 0) {
2173 spin_lock(&old_inode->i_lock);
2174 NFS_I(old_inode)->attr_gencount = nfs_inc_attr_generation_counter();
2175 NFS_I(old_inode)->cache_validity |= NFS_INO_INVALID_CHANGE
2176 | NFS_INO_INVALID_CTIME
2177 | NFS_INO_REVAL_FORCED;
2178 spin_unlock(&old_inode->i_lock);
2179 }
1da177e4 2180out:
d9f29500
BC
2181 if (rehash)
2182 d_rehash(rehash);
70ded201
TM
2183 trace_nfs_rename_exit(old_dir, old_dentry,
2184 new_dir, new_dentry, error);
d9f29500
BC
2185 if (!error) {
2186 if (new_inode != NULL)
2187 nfs_drop_nlink(new_inode);
2188 /*
2189 * The d_move() should be here instead of in an async RPC completion
2190 * handler because we need the proper locks to move the dentry. If
2191 * we're interrupted by a signal, the async RPC completion handler
2192 * should mark the directories for revalidation.
2193 */
2194 d_move(old_dentry, new_dentry);
d803224c 2195 nfs_set_verifier(old_dentry,
d9f29500
BC
2196 nfs_save_change_attribute(new_dir));
2197 } else if (error == -ENOENT)
2198 nfs_dentry_handle_enoent(old_dentry);
2199
1da177e4
LT
2200 /* new dentry created? */
2201 if (dentry)
2202 dput(dentry);
1da177e4
LT
2203 return error;
2204}
ddda8e0a 2205EXPORT_SYMBOL_GPL(nfs_rename);
1da177e4 2206
cfcea3e8
TM
2207static DEFINE_SPINLOCK(nfs_access_lru_lock);
2208static LIST_HEAD(nfs_access_lru_list);
2209static atomic_long_t nfs_access_nr_entries;
2210
3a505845
TM
2211static unsigned long nfs_access_max_cachesize = ULONG_MAX;
2212module_param(nfs_access_max_cachesize, ulong, 0644);
2213MODULE_PARM_DESC(nfs_access_max_cachesize, "NFS access maximum total cache length");
2214
1c3c07e9
TM
2215static void nfs_access_free_entry(struct nfs_access_entry *entry)
2216{
b68572e0 2217 put_cred(entry->cred);
f682a398 2218 kfree_rcu(entry, rcu_head);
4e857c58 2219 smp_mb__before_atomic();
cfcea3e8 2220 atomic_long_dec(&nfs_access_nr_entries);
4e857c58 2221 smp_mb__after_atomic();
1c3c07e9
TM
2222}
2223
1a81bb8a
TM
2224static void nfs_access_free_list(struct list_head *head)
2225{
2226 struct nfs_access_entry *cache;
2227
2228 while (!list_empty(head)) {
2229 cache = list_entry(head->next, struct nfs_access_entry, lru);
2230 list_del(&cache->lru);
2231 nfs_access_free_entry(cache);
2232 }
2233}
2234
3a505845
TM
2235static unsigned long
2236nfs_do_access_cache_scan(unsigned int nr_to_scan)
979df72e
TM
2237{
2238 LIST_HEAD(head);
aa510da5 2239 struct nfs_inode *nfsi, *next;
979df72e 2240 struct nfs_access_entry *cache;
1ab6c499 2241 long freed = 0;
979df72e 2242
a50f7951 2243 spin_lock(&nfs_access_lru_lock);
aa510da5 2244 list_for_each_entry_safe(nfsi, next, &nfs_access_lru_list, access_cache_inode_lru) {
979df72e
TM
2245 struct inode *inode;
2246
2247 if (nr_to_scan-- == 0)
2248 break;
9c7e7e23 2249 inode = &nfsi->vfs_inode;
979df72e
TM
2250 spin_lock(&inode->i_lock);
2251 if (list_empty(&nfsi->access_cache_entry_lru))
2252 goto remove_lru_entry;
2253 cache = list_entry(nfsi->access_cache_entry_lru.next,
2254 struct nfs_access_entry, lru);
2255 list_move(&cache->lru, &head);
2256 rb_erase(&cache->rb_node, &nfsi->access_cache);
1ab6c499 2257 freed++;
979df72e
TM
2258 if (!list_empty(&nfsi->access_cache_entry_lru))
2259 list_move_tail(&nfsi->access_cache_inode_lru,
2260 &nfs_access_lru_list);
2261 else {
2262remove_lru_entry:
2263 list_del_init(&nfsi->access_cache_inode_lru);
4e857c58 2264 smp_mb__before_atomic();
979df72e 2265 clear_bit(NFS_INO_ACL_LRU_SET, &nfsi->flags);
4e857c58 2266 smp_mb__after_atomic();
979df72e 2267 }
59844a9b 2268 spin_unlock(&inode->i_lock);
979df72e
TM
2269 }
2270 spin_unlock(&nfs_access_lru_lock);
1a81bb8a 2271 nfs_access_free_list(&head);
1ab6c499
DC
2272 return freed;
2273}
2274
3a505845
TM
2275unsigned long
2276nfs_access_cache_scan(struct shrinker *shrink, struct shrink_control *sc)
2277{
2278 int nr_to_scan = sc->nr_to_scan;
2279 gfp_t gfp_mask = sc->gfp_mask;
2280
2281 if ((gfp_mask & GFP_KERNEL) != GFP_KERNEL)
2282 return SHRINK_STOP;
2283 return nfs_do_access_cache_scan(nr_to_scan);
2284}
2285
2286
1ab6c499
DC
2287unsigned long
2288nfs_access_cache_count(struct shrinker *shrink, struct shrink_control *sc)
2289{
55f841ce 2290 return vfs_pressure_ratio(atomic_long_read(&nfs_access_nr_entries));
979df72e
TM
2291}
2292
3a505845
TM
2293static void
2294nfs_access_cache_enforce_limit(void)
2295{
2296 long nr_entries = atomic_long_read(&nfs_access_nr_entries);
2297 unsigned long diff;
2298 unsigned int nr_to_scan;
2299
2300 if (nr_entries < 0 || nr_entries <= nfs_access_max_cachesize)
2301 return;
2302 nr_to_scan = 100;
2303 diff = nr_entries - nfs_access_max_cachesize;
2304 if (diff < nr_to_scan)
2305 nr_to_scan = diff;
2306 nfs_do_access_cache_scan(nr_to_scan);
2307}
2308
1a81bb8a 2309static void __nfs_access_zap_cache(struct nfs_inode *nfsi, struct list_head *head)
1da177e4 2310{
1c3c07e9 2311 struct rb_root *root_node = &nfsi->access_cache;
1a81bb8a 2312 struct rb_node *n;
1c3c07e9
TM
2313 struct nfs_access_entry *entry;
2314
2315 /* Unhook entries from the cache */
2316 while ((n = rb_first(root_node)) != NULL) {
2317 entry = rb_entry(n, struct nfs_access_entry, rb_node);
2318 rb_erase(n, root_node);
1a81bb8a 2319 list_move(&entry->lru, head);
1c3c07e9
TM
2320 }
2321 nfsi->cache_validity &= ~NFS_INO_INVALID_ACCESS;
1da177e4
LT
2322}
2323
1c3c07e9 2324void nfs_access_zap_cache(struct inode *inode)
1da177e4 2325{
1a81bb8a
TM
2326 LIST_HEAD(head);
2327
2328 if (test_bit(NFS_INO_ACL_LRU_SET, &NFS_I(inode)->flags) == 0)
2329 return;
cfcea3e8 2330 /* Remove from global LRU init */
1a81bb8a
TM
2331 spin_lock(&nfs_access_lru_lock);
2332 if (test_and_clear_bit(NFS_INO_ACL_LRU_SET, &NFS_I(inode)->flags))
cfcea3e8 2333 list_del_init(&NFS_I(inode)->access_cache_inode_lru);
cfcea3e8 2334
1c3c07e9 2335 spin_lock(&inode->i_lock);
1a81bb8a
TM
2336 __nfs_access_zap_cache(NFS_I(inode), &head);
2337 spin_unlock(&inode->i_lock);
2338 spin_unlock(&nfs_access_lru_lock);
2339 nfs_access_free_list(&head);
1c3c07e9 2340}
1c606fb7 2341EXPORT_SYMBOL_GPL(nfs_access_zap_cache);
1da177e4 2342
b68572e0 2343static struct nfs_access_entry *nfs_access_search_rbtree(struct inode *inode, const struct cred *cred)
1c3c07e9
TM
2344{
2345 struct rb_node *n = NFS_I(inode)->access_cache.rb_node;
1c3c07e9
TM
2346
2347 while (n != NULL) {
b68572e0
N
2348 struct nfs_access_entry *entry =
2349 rb_entry(n, struct nfs_access_entry, rb_node);
2350 int cmp = cred_fscmp(cred, entry->cred);
1c3c07e9 2351
b68572e0 2352 if (cmp < 0)
1c3c07e9 2353 n = n->rb_left;
b68572e0 2354 else if (cmp > 0)
1c3c07e9
TM
2355 n = n->rb_right;
2356 else
2357 return entry;
1da177e4 2358 }
1c3c07e9
TM
2359 return NULL;
2360}
2361
b68572e0 2362static int nfs_access_get_cached(struct inode *inode, const struct cred *cred, struct nfs_access_entry *res, bool may_block)
1c3c07e9
TM
2363{
2364 struct nfs_inode *nfsi = NFS_I(inode);
2365 struct nfs_access_entry *cache;
57b69181
TM
2366 bool retry = true;
2367 int err;
1c3c07e9 2368
dc59250c 2369 spin_lock(&inode->i_lock);
57b69181
TM
2370 for(;;) {
2371 if (nfsi->cache_validity & NFS_INO_INVALID_ACCESS)
2372 goto out_zap;
2373 cache = nfs_access_search_rbtree(inode, cred);
2374 err = -ENOENT;
2375 if (cache == NULL)
2376 goto out;
2377 /* Found an entry, is our attribute cache valid? */
21c3ba7e 2378 if (!nfs_check_cache_invalid(inode, NFS_INO_INVALID_ACCESS))
57b69181
TM
2379 break;
2380 err = -ECHILD;
2381 if (!may_block)
2382 goto out;
2383 if (!retry)
2384 goto out_zap;
2385 spin_unlock(&inode->i_lock);
2386 err = __nfs_revalidate_inode(NFS_SERVER(inode), inode);
2387 if (err)
2388 return err;
2389 spin_lock(&inode->i_lock);
2390 retry = false;
2391 }
1c3c07e9
TM
2392 res->cred = cache->cred;
2393 res->mask = cache->mask;
cfcea3e8 2394 list_move_tail(&cache->lru, &nfsi->access_cache_entry_lru);
1c3c07e9
TM
2395 err = 0;
2396out:
2397 spin_unlock(&inode->i_lock);
2398 return err;
1c3c07e9 2399out_zap:
1a81bb8a
TM
2400 spin_unlock(&inode->i_lock);
2401 nfs_access_zap_cache(inode);
1c3c07e9
TM
2402 return -ENOENT;
2403}
2404
b68572e0 2405static int nfs_access_get_cached_rcu(struct inode *inode, const struct cred *cred, struct nfs_access_entry *res)
f682a398
N
2406{
2407 /* Only check the most recently returned cache entry,
2408 * but do it without locking.
2409 */
2410 struct nfs_inode *nfsi = NFS_I(inode);
2411 struct nfs_access_entry *cache;
2412 int err = -ECHILD;
2413 struct list_head *lh;
2414
2415 rcu_read_lock();
2416 if (nfsi->cache_validity & NFS_INO_INVALID_ACCESS)
2417 goto out;
2418 lh = rcu_dereference(nfsi->access_cache_entry_lru.prev);
2419 cache = list_entry(lh, struct nfs_access_entry, lru);
2420 if (lh == &nfsi->access_cache_entry_lru ||
2421 cred != cache->cred)
2422 cache = NULL;
2423 if (cache == NULL)
2424 goto out;
21c3ba7e 2425 if (nfs_check_cache_invalid(inode, NFS_INO_INVALID_ACCESS))
f682a398 2426 goto out;
f682a398
N
2427 res->cred = cache->cred;
2428 res->mask = cache->mask;
21c3ba7e 2429 err = 0;
f682a398
N
2430out:
2431 rcu_read_unlock();
2432 return err;
2433}
2434
1c3c07e9
TM
2435static void nfs_access_add_rbtree(struct inode *inode, struct nfs_access_entry *set)
2436{
cfcea3e8
TM
2437 struct nfs_inode *nfsi = NFS_I(inode);
2438 struct rb_root *root_node = &nfsi->access_cache;
1c3c07e9
TM
2439 struct rb_node **p = &root_node->rb_node;
2440 struct rb_node *parent = NULL;
2441 struct nfs_access_entry *entry;
b68572e0 2442 int cmp;
1c3c07e9
TM
2443
2444 spin_lock(&inode->i_lock);
2445 while (*p != NULL) {
2446 parent = *p;
2447 entry = rb_entry(parent, struct nfs_access_entry, rb_node);
b68572e0 2448 cmp = cred_fscmp(set->cred, entry->cred);
1c3c07e9 2449
b68572e0 2450 if (cmp < 0)
1c3c07e9 2451 p = &parent->rb_left;
b68572e0 2452 else if (cmp > 0)
1c3c07e9
TM
2453 p = &parent->rb_right;
2454 else
2455 goto found;
2456 }
2457 rb_link_node(&set->rb_node, parent, p);
2458 rb_insert_color(&set->rb_node, root_node);
cfcea3e8 2459 list_add_tail(&set->lru, &nfsi->access_cache_entry_lru);
dc59250c 2460 spin_unlock(&inode->i_lock);
1c3c07e9
TM
2461 return;
2462found:
2463 rb_replace_node(parent, &set->rb_node, root_node);
cfcea3e8
TM
2464 list_add_tail(&set->lru, &nfsi->access_cache_entry_lru);
2465 list_del(&entry->lru);
1c3c07e9
TM
2466 spin_unlock(&inode->i_lock);
2467 nfs_access_free_entry(entry);
2468}
2469
6168f62c 2470void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set)
1c3c07e9
TM
2471{
2472 struct nfs_access_entry *cache = kmalloc(sizeof(*cache), GFP_KERNEL);
2473 if (cache == NULL)
2474 return;
2475 RB_CLEAR_NODE(&cache->rb_node);
b68572e0 2476 cache->cred = get_cred(set->cred);
1da177e4 2477 cache->mask = set->mask;
1c3c07e9 2478
f682a398
N
2479 /* The above field assignments must be visible
2480 * before this item appears on the lru. We cannot easily
2481 * use rcu_assign_pointer, so just force the memory barrier.
2482 */
2483 smp_wmb();
1c3c07e9 2484 nfs_access_add_rbtree(inode, cache);
cfcea3e8
TM
2485
2486 /* Update accounting */
4e857c58 2487 smp_mb__before_atomic();
cfcea3e8 2488 atomic_long_inc(&nfs_access_nr_entries);
4e857c58 2489 smp_mb__after_atomic();
cfcea3e8
TM
2490
2491 /* Add inode to global LRU list */
1a81bb8a 2492 if (!test_bit(NFS_INO_ACL_LRU_SET, &NFS_I(inode)->flags)) {
cfcea3e8 2493 spin_lock(&nfs_access_lru_lock);
1a81bb8a
TM
2494 if (!test_and_set_bit(NFS_INO_ACL_LRU_SET, &NFS_I(inode)->flags))
2495 list_add_tail(&NFS_I(inode)->access_cache_inode_lru,
2496 &nfs_access_lru_list);
cfcea3e8
TM
2497 spin_unlock(&nfs_access_lru_lock);
2498 }
3a505845 2499 nfs_access_cache_enforce_limit();
1da177e4 2500}
6168f62c
WAA
2501EXPORT_SYMBOL_GPL(nfs_access_add_cache);
2502
3c181827
AS
2503#define NFS_MAY_READ (NFS_ACCESS_READ)
2504#define NFS_MAY_WRITE (NFS_ACCESS_MODIFY | \
2505 NFS_ACCESS_EXTEND | \
2506 NFS_ACCESS_DELETE)
2507#define NFS_FILE_MAY_WRITE (NFS_ACCESS_MODIFY | \
2508 NFS_ACCESS_EXTEND)
ecbb903c 2509#define NFS_DIR_MAY_WRITE NFS_MAY_WRITE
3c181827
AS
2510#define NFS_MAY_LOOKUP (NFS_ACCESS_LOOKUP)
2511#define NFS_MAY_EXECUTE (NFS_ACCESS_EXECUTE)
15d4b73a 2512static int
ecbb903c 2513nfs_access_calc_mask(u32 access_result, umode_t umode)
15d4b73a
TM
2514{
2515 int mask = 0;
2516
2517 if (access_result & NFS_MAY_READ)
2518 mask |= MAY_READ;
ecbb903c
TM
2519 if (S_ISDIR(umode)) {
2520 if ((access_result & NFS_DIR_MAY_WRITE) == NFS_DIR_MAY_WRITE)
2521 mask |= MAY_WRITE;
2522 if ((access_result & NFS_MAY_LOOKUP) == NFS_MAY_LOOKUP)
2523 mask |= MAY_EXEC;
2524 } else if (S_ISREG(umode)) {
2525 if ((access_result & NFS_FILE_MAY_WRITE) == NFS_FILE_MAY_WRITE)
2526 mask |= MAY_WRITE;
2527 if ((access_result & NFS_MAY_EXECUTE) == NFS_MAY_EXECUTE)
2528 mask |= MAY_EXEC;
2529 } else if (access_result & NFS_MAY_WRITE)
2530 mask |= MAY_WRITE;
15d4b73a
TM
2531 return mask;
2532}
2533
6168f62c
WAA
2534void nfs_access_set_mask(struct nfs_access_entry *entry, u32 access_result)
2535{
bd8b2441 2536 entry->mask = access_result;
6168f62c
WAA
2537}
2538EXPORT_SYMBOL_GPL(nfs_access_set_mask);
1da177e4 2539
b68572e0 2540static int nfs_do_access(struct inode *inode, const struct cred *cred, int mask)
1da177e4
LT
2541{
2542 struct nfs_access_entry cache;
57b69181 2543 bool may_block = (mask & MAY_NOT_BLOCK) == 0;
bd8b2441 2544 int cache_mask;
1da177e4
LT
2545 int status;
2546
f4ce1299
TM
2547 trace_nfs_access_enter(inode);
2548
f682a398
N
2549 status = nfs_access_get_cached_rcu(inode, cred, &cache);
2550 if (status != 0)
57b69181 2551 status = nfs_access_get_cached(inode, cred, &cache, may_block);
1da177e4 2552 if (status == 0)
f4ce1299 2553 goto out_cached;
1da177e4 2554
f3324a2a 2555 status = -ECHILD;
57b69181 2556 if (!may_block)
f3324a2a
N
2557 goto out;
2558
1750d929
AS
2559 /*
2560 * Determine which access bits we want to ask for...
2561 */
2562 cache.mask = NFS_ACCESS_READ | NFS_ACCESS_MODIFY | NFS_ACCESS_EXTEND;
2563 if (S_ISDIR(inode->i_mode))
2564 cache.mask |= NFS_ACCESS_DELETE | NFS_ACCESS_LOOKUP;
2565 else
2566 cache.mask |= NFS_ACCESS_EXECUTE;
1da177e4 2567 cache.cred = cred;
1da177e4 2568 status = NFS_PROTO(inode)->access(inode, &cache);
a71ee337
SJ
2569 if (status != 0) {
2570 if (status == -ESTALE) {
2571 nfs_zap_caches(inode);
2572 if (!S_ISDIR(inode->i_mode))
2573 set_bit(NFS_INO_STALE, &NFS_I(inode)->flags);
2574 }
f4ce1299 2575 goto out;
a71ee337 2576 }
1da177e4 2577 nfs_access_add_cache(inode, &cache);
f4ce1299 2578out_cached:
ecbb903c 2579 cache_mask = nfs_access_calc_mask(cache.mask, inode->i_mode);
bd8b2441 2580 if ((mask & ~cache_mask & (MAY_READ | MAY_WRITE | MAY_EXEC)) != 0)
f4ce1299 2581 status = -EACCES;
1da177e4 2582out:
f4ce1299
TM
2583 trace_nfs_access_exit(inode, status);
2584 return status;
1da177e4
LT
2585}
2586
af22f94a
TM
2587static int nfs_open_permission_mask(int openflags)
2588{
2589 int mask = 0;
2590
f8d9a897
WAA
2591 if (openflags & __FMODE_EXEC) {
2592 /* ONLY check exec rights */
2593 mask = MAY_EXEC;
2594 } else {
2595 if ((openflags & O_ACCMODE) != O_WRONLY)
2596 mask |= MAY_READ;
2597 if ((openflags & O_ACCMODE) != O_RDONLY)
2598 mask |= MAY_WRITE;
2599 }
2600
af22f94a
TM
2601 return mask;
2602}
2603
b68572e0 2604int nfs_may_open(struct inode *inode, const struct cred *cred, int openflags)
af22f94a
TM
2605{
2606 return nfs_do_access(inode, cred, nfs_open_permission_mask(openflags));
2607}
89d77c8f 2608EXPORT_SYMBOL_GPL(nfs_may_open);
af22f94a 2609
5c5fc09a
TM
2610static int nfs_execute_ok(struct inode *inode, int mask)
2611{
2612 struct nfs_server *server = NFS_SERVER(inode);
21c3ba7e 2613 int ret = 0;
5c5fc09a 2614
3825827e
TM
2615 if (S_ISDIR(inode->i_mode))
2616 return 0;
cf834027 2617 if (nfs_check_cache_invalid(inode, NFS_INO_INVALID_OTHER)) {
21c3ba7e
TM
2618 if (mask & MAY_NOT_BLOCK)
2619 return -ECHILD;
2620 ret = __nfs_revalidate_inode(server, inode);
2621 }
5c5fc09a
TM
2622 if (ret == 0 && !execute_ok(inode))
2623 ret = -EACCES;
2624 return ret;
2625}
2626
10556cb2 2627int nfs_permission(struct inode *inode, int mask)
1da177e4 2628{
b68572e0 2629 const struct cred *cred = current_cred();
1da177e4
LT
2630 int res = 0;
2631
91d5b470
CL
2632 nfs_inc_stats(inode, NFSIOS_VFSACCESS);
2633
e6305c43 2634 if ((mask & (MAY_READ | MAY_WRITE | MAY_EXEC)) == 0)
1da177e4
LT
2635 goto out;
2636 /* Is this sys_access() ? */
9cfcac81 2637 if (mask & (MAY_ACCESS | MAY_CHDIR))
1da177e4
LT
2638 goto force_lookup;
2639
2640 switch (inode->i_mode & S_IFMT) {
2641 case S_IFLNK:
2642 goto out;
2643 case S_IFREG:
762674f8
TM
2644 if ((mask & MAY_OPEN) &&
2645 nfs_server_capable(inode, NFS_CAP_ATOMIC_OPEN))
2646 return 0;
1da177e4
LT
2647 break;
2648 case S_IFDIR:
2649 /*
2650 * Optimize away all write operations, since the server
2651 * will check permissions when we perform the op.
2652 */
2653 if ((mask & MAY_WRITE) && !(mask & MAY_READ))
2654 goto out;
2655 }
2656
2657force_lookup:
1da177e4
LT
2658 if (!NFS_PROTO(inode)->access)
2659 goto out_notsup;
2660
f3324a2a
N
2661 /* Always try fast lookups first */
2662 rcu_read_lock();
b68572e0 2663 res = nfs_do_access(inode, cred, mask|MAY_NOT_BLOCK);
f3324a2a
N
2664 rcu_read_unlock();
2665 if (res == -ECHILD && !(mask & MAY_NOT_BLOCK)) {
2666 /* Fast lookup failed, try the slow way */
b68572e0 2667 res = nfs_do_access(inode, cred, mask);
f3324a2a 2668 }
1da177e4 2669out:
5c5fc09a
TM
2670 if (!res && (mask & MAY_EXEC))
2671 res = nfs_execute_ok(inode, mask);
f696a365 2672
1e8968c5 2673 dfprintk(VFS, "NFS: permission(%s/%lu), mask=0x%x, res=%d\n",
1e7cb3dc 2674 inode->i_sb->s_id, inode->i_ino, mask, res);
1da177e4
LT
2675 return res;
2676out_notsup:
d51ac1a8
N
2677 if (mask & MAY_NOT_BLOCK)
2678 return -ECHILD;
2679
1da177e4
LT
2680 res = nfs_revalidate_inode(NFS_SERVER(inode), inode);
2681 if (res == 0)
2830ba7f 2682 res = generic_permission(inode, mask);
1e7cb3dc 2683 goto out;
1da177e4 2684}
ddda8e0a 2685EXPORT_SYMBOL_GPL(nfs_permission);
1da177e4
LT
2686
2687/*
2688 * Local variables:
2689 * version-control: t
2690 * kept-new-versions: 5
2691 * End:
2692 */