]> git.ipfire.org Git - people/ms/linux.git/blame - fs/ceph/export.c
Merge tag 'trace-v6.0-rc1-2' of git://git.kernel.org/pub/scm/linux/kernel/git/rostedt...
[people/ms/linux.git] / fs / ceph / export.c
CommitLineData
b2441318 1// SPDX-License-Identifier: GPL-2.0
3d14c5d2 2#include <linux/ceph/ceph_debug.h>
a8e63b7d
SW
3
4#include <linux/exportfs.h>
5a0e3ad6 5#include <linux/slab.h>
a8e63b7d
SW
6#include <asm/unaligned.h>
7
8#include "super.h"
3d14c5d2 9#include "mds_client.h"
a8e63b7d 10
a8e63b7d
SW
11/*
12 * Basic fh
13 */
14struct ceph_nfs_fh {
15 u64 ino;
16} __attribute__ ((packed));
17
18/*
4f32b42d 19 * Larger fh that includes parent ino.
a8e63b7d
SW
20 */
21struct ceph_nfs_confh {
22 u64 ino, parent_ino;
a8e63b7d
SW
23} __attribute__ ((packed));
24
570df4e9
YZ
25/*
26 * fh for snapped inode
27 */
28struct ceph_nfs_snapfh {
29 u64 ino;
30 u64 snapid;
31 u64 parent_ino;
32 u32 hash;
33} __attribute__ ((packed));
34
35static int ceph_encode_snapfh(struct inode *inode, u32 *rawfh, int *max_len,
36 struct inode *parent_inode)
37{
536cc331 38 static const int snap_handle_length =
570df4e9
YZ
39 sizeof(struct ceph_nfs_snapfh) >> 2;
40 struct ceph_nfs_snapfh *sfh = (void *)rawfh;
41 u64 snapid = ceph_snap(inode);
42 int ret;
43 bool no_parent = true;
44
45 if (*max_len < snap_handle_length) {
46 *max_len = snap_handle_length;
47 ret = FILEID_INVALID;
48 goto out;
49 }
50
51 ret = -EINVAL;
52 if (snapid != CEPH_SNAPDIR) {
53 struct inode *dir;
54 struct dentry *dentry = d_find_alias(inode);
55 if (!dentry)
56 goto out;
57
58 rcu_read_lock();
59 dir = d_inode_rcu(dentry->d_parent);
60 if (ceph_snap(dir) != CEPH_SNAPDIR) {
61 sfh->parent_ino = ceph_ino(dir);
62 sfh->hash = ceph_dentry_hash(dir, dentry);
63 no_parent = false;
64 }
65 rcu_read_unlock();
66 dput(dentry);
67 }
68
69 if (no_parent) {
70 if (!S_ISDIR(inode->i_mode))
71 goto out;
72 sfh->parent_ino = sfh->ino;
73 sfh->hash = 0;
74 }
75 sfh->ino = ceph_ino(inode);
76 sfh->snapid = snapid;
77
78 *max_len = snap_handle_length;
79 ret = FILEID_BTRFS_WITH_PARENT;
80out:
81 dout("encode_snapfh %llx.%llx ret=%d\n", ceph_vinop(inode), ret);
82 return ret;
83}
84
c862868b
SW
85static int ceph_encode_fh(struct inode *inode, u32 *rawfh, int *max_len,
86 struct inode *parent_inode)
a8e63b7d 87{
536cc331 88 static const int handle_length =
570df4e9 89 sizeof(struct ceph_nfs_fh) >> 2;
536cc331 90 static const int connected_handle_length =
570df4e9 91 sizeof(struct ceph_nfs_confh) >> 2;
92923dcb 92 int type;
a8e63b7d 93
a8e63b7d 94 if (ceph_snap(inode) != CEPH_NOSNAP)
570df4e9 95 return ceph_encode_snapfh(inode, rawfh, max_len, parent_inode);
a8e63b7d 96
4f32b42d
YZ
97 if (parent_inode && (*max_len < connected_handle_length)) {
98 *max_len = connected_handle_length;
99 return FILEID_INVALID;
100 } else if (*max_len < handle_length) {
101 *max_len = handle_length;
102 return FILEID_INVALID;
103 }
f6af75da 104
4f32b42d 105 if (parent_inode) {
570df4e9 106 struct ceph_nfs_confh *cfh = (void *)rawfh;
4f32b42d
YZ
107 dout("encode_fh %llx with parent %llx\n",
108 ceph_ino(inode), ceph_ino(parent_inode));
c862868b 109 cfh->ino = ceph_ino(inode);
4f32b42d 110 cfh->parent_ino = ceph_ino(parent_inode);
92923dcb 111 *max_len = connected_handle_length;
4f32b42d 112 type = FILEID_INO32_GEN_PARENT;
a8e63b7d 113 } else {
570df4e9 114 struct ceph_nfs_fh *fh = (void *)rawfh;
4f32b42d
YZ
115 dout("encode_fh %llx\n", ceph_ino(inode));
116 fh->ino = ceph_ino(inode);
bba0cd0e 117 *max_len = handle_length;
4f32b42d 118 type = FILEID_INO32_GEN;
a8e63b7d
SW
119 }
120 return type;
121}
122
570df4e9 123static struct inode *__lookup_inode(struct super_block *sb, u64 ino)
a8e63b7d 124{
3c454cf2 125 struct ceph_mds_client *mdsc = ceph_sb_to_client(sb)->mdsc;
a8e63b7d 126 struct inode *inode;
a8e63b7d
SW
127 struct ceph_vino vino;
128 int err;
129
4f32b42d 130 vino.ino = ino;
a8e63b7d 131 vino.snap = CEPH_NOSNAP;
d4f6b31d
JL
132
133 if (ceph_vino_is_reserved(vino))
134 return ERR_PTR(-ESTALE);
135
a8e63b7d 136 inode = ceph_find_inode(sb, vino);
3c454cf2
SW
137 if (!inode) {
138 struct ceph_mds_request *req;
315f2408 139 int mask;
3c454cf2
SW
140
141 req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LOOKUPINO,
142 USE_ANY_MDS);
143 if (IS_ERR(req))
144 return ERR_CAST(req);
145
315f2408
YZ
146 mask = CEPH_STAT_CAP_INODE;
147 if (ceph_security_xattr_wanted(d_inode(sb->s_root)))
148 mask |= CEPH_CAP_XATTR_SHARED;
570df4e9 149 req->r_args.lookupino.mask = cpu_to_le32(mask);
315f2408 150
3c454cf2
SW
151 req->r_ino1 = vino;
152 req->r_num_caps = 1;
153 err = ceph_mdsc_do_request(mdsc, NULL, req);
45e3d3ee
SW
154 inode = req->r_target_inode;
155 if (inode)
70b666c3 156 ihold(inode);
3c454cf2 157 ceph_mdsc_put_request(req);
3c454cf2 158 if (!inode)
3886274a 159 return err < 0 ? ERR_PTR(err) : ERR_PTR(-ESTALE);
5d6451b1
JL
160 } else {
161 if (ceph_inode_is_shutdown(inode)) {
162 iput(inode);
163 return ERR_PTR(-ESTALE);
164 }
3c454cf2 165 }
570df4e9
YZ
166 return inode;
167}
a8e63b7d 168
570df4e9
YZ
169struct inode *ceph_lookup_inode(struct super_block *sb, u64 ino)
170{
171 struct inode *inode = __lookup_inode(sb, ino);
172 if (IS_ERR(inode))
173 return inode;
174 if (inode->i_nlink == 0) {
175 iput(inode);
176 return ERR_PTR(-ESTALE);
177 }
3886274a
LH
178 return inode;
179}
180
181static struct dentry *__fh_to_dentry(struct super_block *sb, u64 ino)
182{
570df4e9 183 struct inode *inode = __lookup_inode(sb, ino);
878dabb6
LH
184 int err;
185
3886274a
LH
186 if (IS_ERR(inode))
187 return ERR_CAST(inode);
878dabb6
LH
188 /* We need LINK caps to reliably check i_nlink */
189 err = ceph_do_getattr(inode, CEPH_CAP_LINK_SHARED, false);
1775c7dd
JL
190 if (err) {
191 iput(inode);
878dabb6 192 return ERR_PTR(err);
1775c7dd 193 }
878dabb6
LH
194 /* -ESTALE if inode as been unlinked and no file is open */
195 if ((inode->i_nlink == 0) && (atomic_read(&inode->i_count) == 1)) {
570df4e9
YZ
196 iput(inode);
197 return ERR_PTR(-ESTALE);
198 }
ad5cb123 199 return d_obtain_alias(inode);
a8e63b7d
SW
200}
201
570df4e9
YZ
202static struct dentry *__snapfh_to_dentry(struct super_block *sb,
203 struct ceph_nfs_snapfh *sfh,
204 bool want_parent)
205{
206 struct ceph_mds_client *mdsc = ceph_sb_to_client(sb)->mdsc;
207 struct ceph_mds_request *req;
208 struct inode *inode;
209 struct ceph_vino vino;
210 int mask;
211 int err;
212 bool unlinked = false;
213
214 if (want_parent) {
215 vino.ino = sfh->parent_ino;
216 if (sfh->snapid == CEPH_SNAPDIR)
217 vino.snap = CEPH_NOSNAP;
218 else if (sfh->ino == sfh->parent_ino)
219 vino.snap = CEPH_SNAPDIR;
220 else
221 vino.snap = sfh->snapid;
222 } else {
223 vino.ino = sfh->ino;
224 vino.snap = sfh->snapid;
225 }
d4f6b31d
JL
226
227 if (ceph_vino_is_reserved(vino))
228 return ERR_PTR(-ESTALE);
229
570df4e9 230 inode = ceph_find_inode(sb, vino);
5d6451b1
JL
231 if (inode) {
232 if (ceph_inode_is_shutdown(inode)) {
233 iput(inode);
234 return ERR_PTR(-ESTALE);
235 }
570df4e9 236 return d_obtain_alias(inode);
5d6451b1 237 }
570df4e9
YZ
238
239 req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LOOKUPINO,
240 USE_ANY_MDS);
241 if (IS_ERR(req))
242 return ERR_CAST(req);
243
244 mask = CEPH_STAT_CAP_INODE;
245 if (ceph_security_xattr_wanted(d_inode(sb->s_root)))
246 mask |= CEPH_CAP_XATTR_SHARED;
247 req->r_args.lookupino.mask = cpu_to_le32(mask);
248 if (vino.snap < CEPH_NOSNAP) {
249 req->r_args.lookupino.snapid = cpu_to_le64(vino.snap);
250 if (!want_parent && sfh->ino != sfh->parent_ino) {
251 req->r_args.lookupino.parent =
252 cpu_to_le64(sfh->parent_ino);
253 req->r_args.lookupino.hash =
254 cpu_to_le32(sfh->hash);
255 }
256 }
257
258 req->r_ino1 = vino;
259 req->r_num_caps = 1;
260 err = ceph_mdsc_do_request(mdsc, NULL, req);
261 inode = req->r_target_inode;
262 if (inode) {
263 if (vino.snap == CEPH_SNAPDIR) {
264 if (inode->i_nlink == 0)
265 unlinked = true;
266 inode = ceph_get_snapdir(inode);
267 } else if (ceph_snap(inode) == vino.snap) {
268 ihold(inode);
269 } else {
270 /* mds does not support lookup snapped inode */
3e10a15f 271 inode = ERR_PTR(-EOPNOTSUPP);
570df4e9 272 }
3e10a15f
JL
273 } else {
274 inode = ERR_PTR(-ESTALE);
570df4e9
YZ
275 }
276 ceph_mdsc_put_request(req);
277
278 if (want_parent) {
279 dout("snapfh_to_parent %llx.%llx\n err=%d\n",
280 vino.ino, vino.snap, err);
281 } else {
282 dout("snapfh_to_dentry %llx.%llx parent %llx hash %x err=%d",
283 vino.ino, vino.snap, sfh->parent_ino, sfh->hash, err);
284 }
3e10a15f
JL
285 if (IS_ERR(inode))
286 return ERR_CAST(inode);
570df4e9
YZ
287 /* see comments in ceph_get_parent() */
288 return unlinked ? d_obtain_root(inode) : d_obtain_alias(inode);
289}
290
a8e63b7d 291/*
4f32b42d 292 * convert regular fh to dentry
a8e63b7d 293 */
4f32b42d
YZ
294static struct dentry *ceph_fh_to_dentry(struct super_block *sb,
295 struct fid *fid,
296 int fh_len, int fh_type)
a8e63b7d 297{
4f32b42d 298 struct ceph_nfs_fh *fh = (void *)fid->raw;
a8e63b7d 299
570df4e9
YZ
300 if (fh_type == FILEID_BTRFS_WITH_PARENT) {
301 struct ceph_nfs_snapfh *sfh = (void *)fid->raw;
302 return __snapfh_to_dentry(sb, sfh, false);
303 }
304
4f32b42d
YZ
305 if (fh_type != FILEID_INO32_GEN &&
306 fh_type != FILEID_INO32_GEN_PARENT)
307 return NULL;
308 if (fh_len < sizeof(*fh) / 4)
309 return NULL;
a8e63b7d 310
4f32b42d
YZ
311 dout("fh_to_dentry %llx\n", fh->ino);
312 return __fh_to_dentry(sb, fh->ino);
a8e63b7d
SW
313}
314
9017c2ec
YZ
315static struct dentry *__get_parent(struct super_block *sb,
316 struct dentry *child, u64 ino)
317{
318 struct ceph_mds_client *mdsc = ceph_sb_to_client(sb)->mdsc;
319 struct ceph_mds_request *req;
320 struct inode *inode;
315f2408 321 int mask;
9017c2ec
YZ
322 int err;
323
324 req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LOOKUPPARENT,
325 USE_ANY_MDS);
326 if (IS_ERR(req))
327 return ERR_CAST(req);
328
329 if (child) {
2b0143b5
DH
330 req->r_inode = d_inode(child);
331 ihold(d_inode(child));
9017c2ec
YZ
332 } else {
333 req->r_ino1 = (struct ceph_vino) {
334 .ino = ino,
335 .snap = CEPH_NOSNAP,
336 };
337 }
315f2408
YZ
338
339 mask = CEPH_STAT_CAP_INODE;
340 if (ceph_security_xattr_wanted(d_inode(sb->s_root)))
341 mask |= CEPH_CAP_XATTR_SHARED;
342 req->r_args.getattr.mask = cpu_to_le32(mask);
343
9017c2ec
YZ
344 req->r_num_caps = 1;
345 err = ceph_mdsc_do_request(mdsc, NULL, req);
c6d50296
QH
346 if (err) {
347 ceph_mdsc_put_request(req);
348 return ERR_PTR(err);
349 }
350
9017c2ec
YZ
351 inode = req->r_target_inode;
352 if (inode)
353 ihold(inode);
354 ceph_mdsc_put_request(req);
355 if (!inode)
356 return ERR_PTR(-ENOENT);
357
ad5cb123 358 return d_obtain_alias(inode);
9017c2ec
YZ
359}
360
e84be11c 361static struct dentry *ceph_get_parent(struct dentry *child)
9017c2ec 362{
570df4e9
YZ
363 struct inode *inode = d_inode(child);
364 struct dentry *dn;
365
366 if (ceph_snap(inode) != CEPH_NOSNAP) {
367 struct inode* dir;
368 bool unlinked = false;
369 /* do not support non-directory */
370 if (!d_is_dir(child)) {
371 dn = ERR_PTR(-EINVAL);
372 goto out;
373 }
374 dir = __lookup_inode(inode->i_sb, ceph_ino(inode));
375 if (IS_ERR(dir)) {
376 dn = ERR_CAST(dir);
377 goto out;
378 }
379 /* There can be multiple paths to access snapped inode.
380 * For simplicity, treat snapdir of head inode as parent */
381 if (ceph_snap(inode) != CEPH_SNAPDIR) {
382 struct inode *snapdir = ceph_get_snapdir(dir);
383 if (dir->i_nlink == 0)
384 unlinked = true;
385 iput(dir);
386 if (IS_ERR(snapdir)) {
387 dn = ERR_CAST(snapdir);
388 goto out;
389 }
390 dir = snapdir;
391 }
392 /* If directory has already been deleted, futher get_parent
393 * will fail. Do not mark snapdir dentry as disconnected,
394 * this prevent exportfs from doing futher get_parent. */
395 if (unlinked)
396 dn = d_obtain_root(dir);
397 else
398 dn = d_obtain_alias(dir);
399 } else {
400 dn = __get_parent(child->d_sb, child, 0);
401 }
402out:
403 dout("get_parent %p ino %llx.%llx err=%ld\n",
03af439a 404 child, ceph_vinop(inode), (long)PTR_ERR_OR_ZERO(dn));
570df4e9 405 return dn;
9017c2ec
YZ
406}
407
a8e63b7d 408/*
8996f4f2 409 * convert regular fh to parent
a8e63b7d
SW
410 */
411static struct dentry *ceph_fh_to_parent(struct super_block *sb,
8996f4f2 412 struct fid *fid,
a8e63b7d
SW
413 int fh_len, int fh_type)
414{
415 struct ceph_nfs_confh *cfh = (void *)fid->raw;
a8e63b7d 416 struct dentry *dentry;
a8e63b7d 417
570df4e9
YZ
418 if (fh_type == FILEID_BTRFS_WITH_PARENT) {
419 struct ceph_nfs_snapfh *sfh = (void *)fid->raw;
420 return __snapfh_to_dentry(sb, sfh, true);
421 }
422
8996f4f2
YZ
423 if (fh_type != FILEID_INO32_GEN_PARENT)
424 return NULL;
35c2a7f4 425 if (fh_len < sizeof(*cfh) / 4)
8996f4f2 426 return NULL;
a8e63b7d 427
8996f4f2
YZ
428 dout("fh_to_parent %llx\n", cfh->parent_ino);
429 dentry = __get_parent(sb, NULL, cfh->ino);
b42b90d1 430 if (unlikely(dentry == ERR_PTR(-ENOENT)))
8996f4f2 431 dentry = __fh_to_dentry(sb, cfh->parent_ino);
a8e63b7d
SW
432 return dentry;
433}
434
570df4e9
YZ
435static int __get_snap_name(struct dentry *parent, char *name,
436 struct dentry *child)
437{
438 struct inode *inode = d_inode(child);
439 struct inode *dir = d_inode(parent);
440 struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
441 struct ceph_mds_request *req = NULL;
442 char *last_name = NULL;
443 unsigned next_offset = 2;
444 int err = -EINVAL;
445
446 if (ceph_ino(inode) != ceph_ino(dir))
447 goto out;
448 if (ceph_snap(inode) == CEPH_SNAPDIR) {
449 if (ceph_snap(dir) == CEPH_NOSNAP) {
450 strcpy(name, fsc->mount_options->snapdir_name);
451 err = 0;
452 }
453 goto out;
454 }
455 if (ceph_snap(dir) != CEPH_SNAPDIR)
456 goto out;
457
458 while (1) {
459 struct ceph_mds_reply_info_parsed *rinfo;
460 struct ceph_mds_reply_dir_entry *rde;
461 int i;
462
463 req = ceph_mdsc_create_request(fsc->mdsc, CEPH_MDS_OP_LSSNAP,
464 USE_AUTH_MDS);
465 if (IS_ERR(req)) {
466 err = PTR_ERR(req);
467 req = NULL;
468 goto out;
469 }
470 err = ceph_alloc_readdir_reply_buffer(req, inode);
471 if (err)
472 goto out;
473
474 req->r_direct_mode = USE_AUTH_MDS;
475 req->r_readdir_offset = next_offset;
476 req->r_args.readdir.flags =
477 cpu_to_le16(CEPH_READDIR_REPLY_BITFLAGS);
478 if (last_name) {
479 req->r_path2 = last_name;
480 last_name = NULL;
481 }
482
483 req->r_inode = dir;
484 ihold(dir);
485 req->r_dentry = dget(parent);
486
487 inode_lock(dir);
488 err = ceph_mdsc_do_request(fsc->mdsc, NULL, req);
489 inode_unlock(dir);
490
491 if (err < 0)
492 goto out;
493
0ed26f36
ID
494 rinfo = &req->r_reply_info;
495 for (i = 0; i < rinfo->dir_nr; i++) {
496 rde = rinfo->dir_entries + i;
497 BUG_ON(!rde->inode.in);
498 if (ceph_snap(inode) ==
499 le64_to_cpu(rde->inode.in->snapid)) {
500 memcpy(name, rde->name, rde->name_len);
501 name[rde->name_len] = '\0';
502 err = 0;
503 goto out;
504 }
505 }
506
507 if (rinfo->dir_end)
508 break;
509
510 BUG_ON(rinfo->dir_nr <= 0);
511 rde = rinfo->dir_entries + (rinfo->dir_nr - 1);
512 next_offset += rinfo->dir_nr;
513 last_name = kstrndup(rde->name, rde->name_len, GFP_KERNEL);
514 if (!last_name) {
515 err = -ENOMEM;
516 goto out;
517 }
518
519 ceph_mdsc_put_request(req);
520 req = NULL;
570df4e9
YZ
521 }
522 err = -ENOENT;
523out:
524 if (req)
525 ceph_mdsc_put_request(req);
526 kfree(last_name);
527 dout("get_snap_name %p ino %llx.%llx err=%d\n",
528 child, ceph_vinop(inode), err);
529 return err;
530}
531
19913b4e
YZ
532static int ceph_get_name(struct dentry *parent, char *name,
533 struct dentry *child)
534{
535 struct ceph_mds_client *mdsc;
536 struct ceph_mds_request *req;
570df4e9 537 struct inode *inode = d_inode(child);
19913b4e
YZ
538 int err;
539
570df4e9
YZ
540 if (ceph_snap(inode) != CEPH_NOSNAP)
541 return __get_snap_name(parent, name, child);
542
543 mdsc = ceph_inode_to_client(inode)->mdsc;
19913b4e
YZ
544 req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LOOKUPNAME,
545 USE_ANY_MDS);
546 if (IS_ERR(req))
547 return PTR_ERR(req);
548
5955102c 549 inode_lock(d_inode(parent));
19913b4e 550
570df4e9
YZ
551 req->r_inode = inode;
552 ihold(inode);
2b0143b5 553 req->r_ino2 = ceph_vino(d_inode(parent));
3dd69aab 554 req->r_parent = d_inode(parent);
4c183472 555 ihold(req->r_parent);
3dd69aab 556 set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
19913b4e
YZ
557 req->r_num_caps = 2;
558 err = ceph_mdsc_do_request(mdsc, NULL, req);
559
5955102c 560 inode_unlock(d_inode(parent));
19913b4e
YZ
561
562 if (!err) {
563 struct ceph_mds_reply_info_parsed *rinfo = &req->r_reply_info;
564 memcpy(name, rinfo->dname, rinfo->dname_len);
565 name[rinfo->dname_len] = 0;
566 dout("get_name %p ino %llx.%llx name %s\n",
570df4e9 567 child, ceph_vinop(inode), name);
19913b4e
YZ
568 } else {
569 dout("get_name %p ino %llx.%llx err %d\n",
570df4e9 570 child, ceph_vinop(inode), err);
19913b4e
YZ
571 }
572
573 ceph_mdsc_put_request(req);
574 return err;
575}
576
a8e63b7d
SW
577const struct export_operations ceph_export_ops = {
578 .encode_fh = ceph_encode_fh,
579 .fh_to_dentry = ceph_fh_to_dentry,
580 .fh_to_parent = ceph_fh_to_parent,
9017c2ec 581 .get_parent = ceph_get_parent,
19913b4e 582 .get_name = ceph_get_name,
a8e63b7d 583};