1 // SPDX-License-Identifier: GPL-2.0
3 * The base64 encode/decode code was copied from fscrypt:
4 * Copyright (C) 2015, Google, Inc.
5 * Copyright (C) 2015, Motorola Mobility
6 * Written by Uday Savagaonkar, 2014.
7 * Modified by Jaegeuk Kim, 2015.
9 #include <linux/ceph/ceph_debug.h>
10 #include <linux/xattr.h>
11 #include <linux/fscrypt.h>
12 #include <linux/ceph/striper.h>
15 #include "mds_client.h"
19 * The base64url encoding used by fscrypt includes the '_' character, which may
20 * cause problems in snapshot names (which can not start with '_'). Thus, we
21 * used the base64 encoding defined for IMAP mailbox names (RFC 3501) instead,
22 * which replaces '-' and '_' by '+' and ','.
24 static const char base64_table
[65] =
25 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,";
27 int ceph_base64_encode(const u8
*src
, int srclen
, char *dst
)
34 for (i
= 0; i
< srclen
; i
++) {
35 ac
= (ac
<< 8) | src
[i
];
39 *cp
++ = base64_table
[(ac
>> bits
) & 0x3f];
43 *cp
++ = base64_table
[(ac
<< (6 - bits
)) & 0x3f];
47 int ceph_base64_decode(const char *src
, int srclen
, u8
*dst
)
54 for (i
= 0; i
< srclen
; i
++) {
55 const char *p
= strchr(base64_table
, src
[i
]);
57 if (p
== NULL
|| src
[i
] == 0)
59 ac
= (ac
<< 6) | (p
- base64_table
);
63 *bp
++ = (u8
)(ac
>> bits
);
66 if (ac
& ((1 << bits
) - 1))
71 static int ceph_crypt_get_context(struct inode
*inode
, void *ctx
, size_t len
)
73 struct ceph_inode_info
*ci
= ceph_inode(inode
);
74 struct ceph_fscrypt_auth
*cfa
= (struct ceph_fscrypt_auth
*)ci
->fscrypt_auth
;
77 /* Non existent or too short? */
78 if (!cfa
|| (ci
->fscrypt_auth_len
< (offsetof(struct ceph_fscrypt_auth
, cfa_blob
) + 1)))
81 /* Some format we don't recognize? */
82 if (le32_to_cpu(cfa
->cfa_version
) != CEPH_FSCRYPT_AUTH_VERSION
)
85 ctxlen
= le32_to_cpu(cfa
->cfa_blob_len
);
89 memcpy(ctx
, cfa
->cfa_blob
, ctxlen
);
93 static int ceph_crypt_set_context(struct inode
*inode
, const void *ctx
,
94 size_t len
, void *fs_data
)
97 struct iattr attr
= { };
98 struct ceph_iattr cia
= { };
99 struct ceph_fscrypt_auth
*cfa
;
101 WARN_ON_ONCE(fs_data
);
103 if (len
> FSCRYPT_SET_CONTEXT_MAX_SIZE
)
106 cfa
= kzalloc(sizeof(*cfa
), GFP_KERNEL
);
110 cfa
->cfa_version
= cpu_to_le32(CEPH_FSCRYPT_AUTH_VERSION
);
111 cfa
->cfa_blob_len
= cpu_to_le32(len
);
112 memcpy(cfa
->cfa_blob
, ctx
, len
);
114 cia
.fscrypt_auth
= cfa
;
116 ret
= __ceph_setattr(inode
, &attr
, &cia
);
118 inode_set_flags(inode
, S_ENCRYPTED
, S_ENCRYPTED
);
119 kfree(cia
.fscrypt_auth
);
123 static bool ceph_crypt_empty_dir(struct inode
*inode
)
125 struct ceph_inode_info
*ci
= ceph_inode(inode
);
127 return ci
->i_rsubdirs
+ ci
->i_rfiles
== 1;
130 static const union fscrypt_policy
*ceph_get_dummy_policy(struct super_block
*sb
)
132 return ceph_sb_to_client(sb
)->fsc_dummy_enc_policy
.policy
;
135 static struct fscrypt_operations ceph_fscrypt_ops
= {
136 .get_context
= ceph_crypt_get_context
,
137 .set_context
= ceph_crypt_set_context
,
138 .get_dummy_policy
= ceph_get_dummy_policy
,
139 .empty_dir
= ceph_crypt_empty_dir
,
142 void ceph_fscrypt_set_ops(struct super_block
*sb
)
144 fscrypt_set_ops(sb
, &ceph_fscrypt_ops
);
147 void ceph_fscrypt_free_dummy_policy(struct ceph_fs_client
*fsc
)
149 fscrypt_free_dummy_policy(&fsc
->fsc_dummy_enc_policy
);
152 int ceph_fscrypt_prepare_context(struct inode
*dir
, struct inode
*inode
,
153 struct ceph_acl_sec_ctx
*as
)
156 bool encrypted
= false;
157 struct ceph_inode_info
*ci
= ceph_inode(inode
);
159 ret
= fscrypt_prepare_new_inode(dir
, inode
, &encrypted
);
165 as
->fscrypt_auth
= kzalloc(sizeof(*as
->fscrypt_auth
), GFP_KERNEL
);
166 if (!as
->fscrypt_auth
)
169 ctxsize
= fscrypt_context_for_new_inode(as
->fscrypt_auth
->cfa_blob
,
174 as
->fscrypt_auth
->cfa_version
= cpu_to_le32(CEPH_FSCRYPT_AUTH_VERSION
);
175 as
->fscrypt_auth
->cfa_blob_len
= cpu_to_le32(ctxsize
);
177 WARN_ON_ONCE(ci
->fscrypt_auth
);
178 kfree(ci
->fscrypt_auth
);
179 ci
->fscrypt_auth_len
= ceph_fscrypt_auth_len(as
->fscrypt_auth
);
180 ci
->fscrypt_auth
= kmemdup(as
->fscrypt_auth
, ci
->fscrypt_auth_len
,
182 if (!ci
->fscrypt_auth
)
185 inode
->i_flags
|= S_ENCRYPTED
;
190 void ceph_fscrypt_as_ctx_to_req(struct ceph_mds_request
*req
,
191 struct ceph_acl_sec_ctx
*as
)
193 swap(req
->r_fscrypt_auth
, as
->fscrypt_auth
);
197 * User-created snapshots can't start with '_'. Snapshots that start with this
198 * character are special (hint: there aren't real snapshots) and use the
201 * _<SNAPSHOT-NAME>_<INODE-NUMBER>
204 * - <SNAPSHOT-NAME> - the real snapshot name that may need to be decrypted,
205 * - <INODE-NUMBER> - the inode number (in decimal) for the actual snapshot
207 * This function parses these snapshot names and returns the inode
208 * <INODE-NUMBER>. 'name_len' will also bet set with the <SNAPSHOT-NAME>
211 static struct inode
*parse_longname(const struct inode
*parent
,
212 const char *name
, int *name_len
)
214 struct inode
*dir
= NULL
;
215 struct ceph_vino vino
= { .snap
= CEPH_NOSNAP
};
218 int orig_len
= *name_len
;
221 /* Skip initial '_' */
223 name_end
= strrchr(name
, '_');
225 dout("Failed to parse long snapshot name: %s\n", name
);
226 return ERR_PTR(-EIO
);
228 *name_len
= (name_end
- name
);
229 if (*name_len
<= 0) {
230 pr_err("Failed to parse long snapshot name\n");
231 return ERR_PTR(-EIO
);
234 /* Get the inode number */
235 inode_number
= kmemdup_nul(name_end
+ 1,
236 orig_len
- *name_len
- 2,
239 return ERR_PTR(-ENOMEM
);
240 ret
= kstrtou64(inode_number
, 10, &vino
.ino
);
242 dout("Failed to parse inode number: %s\n", name
);
247 /* And finally the inode */
248 dir
= ceph_find_inode(parent
->i_sb
, vino
);
250 /* This can happen if we're not mounting cephfs on the root */
251 dir
= ceph_get_inode(parent
->i_sb
, vino
, NULL
);
253 dir
= ERR_PTR(-ENOENT
);
256 dout("Can't find inode %s (%s)\n", inode_number
, name
);
263 int ceph_encode_encrypted_dname(struct inode
*parent
, struct qstr
*d_name
,
266 struct inode
*dir
= parent
;
274 iname
.name
= d_name
->name
;
275 name_len
= d_name
->len
;
277 /* Handle the special case of snapshot names that start with '_' */
278 if ((ceph_snap(dir
) == CEPH_SNAPDIR
) && (name_len
> 0) &&
279 (iname
.name
[0] == '_')) {
280 dir
= parse_longname(parent
, iname
.name
, &name_len
);
283 iname
.name
++; /* skip initial '_' */
285 iname
.len
= name_len
;
287 if (!fscrypt_has_encryption_key(dir
)) {
288 memcpy(buf
, d_name
->name
, d_name
->len
);
294 * Convert cleartext d_name to ciphertext. If result is longer than
295 * CEPH_NOHASH_NAME_MAX, sha256 the remaining bytes
297 * See: fscrypt_setup_filename
299 if (!fscrypt_fname_encrypted_size(dir
, iname
.len
, NAME_MAX
, &len
)) {
300 elen
= -ENAMETOOLONG
;
304 /* Allocate a buffer appropriate to hold the result */
305 cryptbuf
= kmalloc(len
> CEPH_NOHASH_NAME_MAX
? NAME_MAX
: len
,
312 ret
= fscrypt_fname_encrypt(dir
, &iname
, cryptbuf
, len
);
318 /* hash the end if the name is long enough */
319 if (len
> CEPH_NOHASH_NAME_MAX
) {
320 u8 hash
[SHA256_DIGEST_SIZE
];
321 u8
*extra
= cryptbuf
+ CEPH_NOHASH_NAME_MAX
;
324 * hash the extra bytes and overwrite crypttext beyond that
327 sha256(extra
, len
- CEPH_NOHASH_NAME_MAX
, hash
);
328 memcpy(extra
, hash
, SHA256_DIGEST_SIZE
);
329 len
= CEPH_NOHASH_NAME_MAX
+ SHA256_DIGEST_SIZE
;
332 /* base64 encode the encrypted name */
333 elen
= ceph_base64_encode(cryptbuf
, len
, buf
);
334 dout("base64-encoded ciphertext name = %.*s\n", elen
, buf
);
336 /* To understand the 240 limit, see CEPH_NOHASH_NAME_MAX comments */
338 if ((elen
> 0) && (dir
!= parent
)) {
339 char tmp_buf
[NAME_MAX
];
341 elen
= snprintf(tmp_buf
, sizeof(tmp_buf
), "_%.*s_%ld",
342 elen
, buf
, dir
->i_ino
);
343 memcpy(buf
, tmp_buf
, elen
);
349 if ((dir
->i_state
& I_NEW
))
350 discard_new_inode(dir
);
357 int ceph_encode_encrypted_fname(struct inode
*parent
, struct dentry
*dentry
,
360 WARN_ON_ONCE(!fscrypt_has_encryption_key(parent
));
362 return ceph_encode_encrypted_dname(parent
, &dentry
->d_name
, buf
);
366 * ceph_fname_to_usr - convert a filename for userland presentation
367 * @fname: ceph_fname to be converted
368 * @tname: temporary name buffer to use for conversion (may be NULL)
369 * @oname: where converted name should be placed
370 * @is_nokey: set to true if key wasn't available during conversion (may be NULL)
372 * Given a filename (usually from the MDS), format it for presentation to
373 * userland. If @parent is not encrypted, just pass it back as-is.
375 * Otherwise, base64 decode the string, and then ask fscrypt to format it
376 * for userland presentation.
378 * Returns 0 on success or negative error code on error.
380 int ceph_fname_to_usr(const struct ceph_fname
*fname
, struct fscrypt_str
*tname
,
381 struct fscrypt_str
*oname
, bool *is_nokey
)
383 struct inode
*dir
= fname
->dir
;
384 struct fscrypt_str _tname
= FSTR_INIT(NULL
, 0);
385 struct fscrypt_str iname
;
386 char *name
= fname
->name
;
387 int name_len
= fname
->name_len
;
390 /* Sanity check that the resulting name will fit in the buffer */
391 if (fname
->name_len
> NAME_MAX
|| fname
->ctext_len
> NAME_MAX
)
394 /* Handle the special case of snapshot names that start with '_' */
395 if ((ceph_snap(dir
) == CEPH_SNAPDIR
) && (name_len
> 0) &&
397 dir
= parse_longname(dir
, name
, &name_len
);
400 name
++; /* skip initial '_' */
403 if (!IS_ENCRYPTED(dir
)) {
404 oname
->name
= fname
->name
;
405 oname
->len
= fname
->name_len
;
410 ret
= ceph_fscrypt_prepare_readdir(dir
);
415 * Use the raw dentry name as sent by the MDS instead of
416 * generating a nokey name via fscrypt.
418 if (!fscrypt_has_encryption_key(dir
)) {
420 oname
->name
= fname
->name
;
422 memcpy(oname
->name
, fname
->name
, fname
->name_len
);
423 oname
->len
= fname
->name_len
;
430 if (fname
->ctext_len
== 0) {
434 ret
= fscrypt_fname_alloc_buffer(NAME_MAX
, &_tname
);
440 declen
= ceph_base64_decode(name
, name_len
, tname
->name
);
445 iname
.name
= tname
->name
;
448 iname
.name
= fname
->ctext
;
449 iname
.len
= fname
->ctext_len
;
452 ret
= fscrypt_fname_disk_to_usr(dir
, 0, 0, &iname
, oname
);
453 if (!ret
&& (dir
!= fname
->dir
)) {
454 char tmp_buf
[CEPH_BASE64_CHARS(NAME_MAX
)];
456 name_len
= snprintf(tmp_buf
, sizeof(tmp_buf
), "_%.*s_%ld",
457 oname
->len
, oname
->name
, dir
->i_ino
);
458 memcpy(oname
->name
, tmp_buf
, name_len
);
459 oname
->len
= name_len
;
463 fscrypt_fname_free_buffer(&_tname
);
465 if ((dir
!= fname
->dir
) && !IS_ERR(dir
)) {
466 if ((dir
->i_state
& I_NEW
))
467 discard_new_inode(dir
);
475 * ceph_fscrypt_prepare_readdir - simple __fscrypt_prepare_readdir() wrapper
476 * @dir: directory inode for readdir prep
478 * Simple wrapper around __fscrypt_prepare_readdir() that will mark directory as
479 * non-complete if this call results in having the directory unlocked.
482 * 1 - if directory was locked and key is now loaded (i.e. dir is unlocked)
483 * 0 - if directory is still locked
484 * < 0 - if __fscrypt_prepare_readdir() fails
486 int ceph_fscrypt_prepare_readdir(struct inode
*dir
)
488 bool had_key
= fscrypt_has_encryption_key(dir
);
491 if (!IS_ENCRYPTED(dir
))
494 err
= __fscrypt_prepare_readdir(dir
);
497 if (!had_key
&& fscrypt_has_encryption_key(dir
)) {
498 /* directory just got unlocked, mark it as not complete */
499 ceph_dir_clear_complete(dir
);
505 int ceph_fscrypt_decrypt_block_inplace(const struct inode
*inode
,
506 struct page
*page
, unsigned int len
,
507 unsigned int offs
, u64 lblk_num
)
509 dout("%s: len %u offs %u blk %llu\n", __func__
, len
, offs
, lblk_num
);
510 return fscrypt_decrypt_block_inplace(inode
, page
, len
, offs
, lblk_num
);
513 int ceph_fscrypt_encrypt_block_inplace(const struct inode
*inode
,
514 struct page
*page
, unsigned int len
,
515 unsigned int offs
, u64 lblk_num
,
518 dout("%s: len %u offs %u blk %llu\n", __func__
, len
, offs
, lblk_num
);
519 return fscrypt_encrypt_block_inplace(inode
, page
, len
, offs
, lblk_num
,
524 * ceph_fscrypt_decrypt_pages - decrypt an array of pages
525 * @inode: pointer to inode associated with these pages
526 * @page: pointer to page array
527 * @off: offset into the file that the read data starts
528 * @len: max length to decrypt
530 * Decrypt an array of fscrypt'ed pages and return the amount of
531 * data decrypted. Any data in the page prior to the start of the
532 * first complete block in the read is ignored. Any incomplete
533 * crypto blocks at the end of the array are ignored (and should
534 * probably be zeroed by the caller).
536 * Returns the length of the decrypted data or a negative errno.
538 int ceph_fscrypt_decrypt_pages(struct inode
*inode
, struct page
**page
,
542 u64 baseblk
= off
>> CEPH_FSCRYPT_BLOCK_SHIFT
;
546 * We can't deal with partial blocks on an encrypted file, so mask off
549 num_blocks
= ceph_fscrypt_blocks(off
, len
& CEPH_FSCRYPT_BLOCK_MASK
);
551 /* Decrypt each block */
552 for (i
= 0; i
< num_blocks
; ++i
) {
553 int blkoff
= i
<< CEPH_FSCRYPT_BLOCK_SHIFT
;
554 int pgidx
= blkoff
>> PAGE_SHIFT
;
555 unsigned int pgoffs
= offset_in_page(blkoff
);
558 fret
= ceph_fscrypt_decrypt_block_inplace(inode
, page
[pgidx
],
559 CEPH_FSCRYPT_BLOCK_SIZE
, pgoffs
,
566 ret
+= CEPH_FSCRYPT_BLOCK_SIZE
;
572 * ceph_fscrypt_decrypt_extents: decrypt received extents in given buffer
573 * @inode: inode associated with pages being decrypted
574 * @page: pointer to page array
575 * @off: offset into the file that the data in page[0] starts
576 * @map: pointer to extent array
577 * @ext_cnt: length of extent array
579 * Given an extent map and a page array, decrypt the received data in-place,
580 * skipping holes. Returns the offset into buffer of end of last decrypted
583 int ceph_fscrypt_decrypt_extents(struct inode
*inode
, struct page
**page
,
584 u64 off
, struct ceph_sparse_extent
*map
,
588 struct ceph_inode_info
*ci
= ceph_inode(inode
);
592 /* Nothing to do for empty array */
594 dout("%s: empty array, ret 0\n", __func__
);
598 ceph_calc_file_object_mapping(&ci
->i_layout
, off
, map
[0].len
,
599 &objno
, &objoff
, &xlen
);
601 for (i
= 0; i
< ext_cnt
; ++i
) {
602 struct ceph_sparse_extent
*ext
= &map
[i
];
603 int pgsoff
= ext
->off
- objoff
;
604 int pgidx
= pgsoff
>> PAGE_SHIFT
;
607 if ((ext
->off
| ext
->len
) & ~CEPH_FSCRYPT_BLOCK_MASK
) {
608 pr_warn("%s: bad encrypted sparse extent idx %d off %llx len %llx\n",
609 __func__
, i
, ext
->off
, ext
->len
);
612 fret
= ceph_fscrypt_decrypt_pages(inode
, &page
[pgidx
],
613 off
+ pgsoff
, ext
->len
);
614 dout("%s: [%d] 0x%llx~0x%llx fret %d\n", __func__
, i
,
615 ext
->off
, ext
->len
, fret
);
623 dout("%s: ret %d\n", __func__
, ret
);
628 * ceph_fscrypt_encrypt_pages - encrypt an array of pages
629 * @inode: pointer to inode associated with these pages
630 * @page: pointer to page array
631 * @off: offset into the file that the data starts
632 * @len: max length to encrypt
633 * @gfp: gfp flags to use for allocation
635 * Decrypt an array of cleartext pages and return the amount of
636 * data encrypted. Any data in the page prior to the start of the
637 * first complete block in the read is ignored. Any incomplete
638 * crypto blocks at the end of the array are ignored.
640 * Returns the length of the encrypted data or a negative errno.
642 int ceph_fscrypt_encrypt_pages(struct inode
*inode
, struct page
**page
, u64 off
,
646 u64 baseblk
= off
>> CEPH_FSCRYPT_BLOCK_SHIFT
;
650 * We can't deal with partial blocks on an encrypted file, so mask off
653 num_blocks
= ceph_fscrypt_blocks(off
, len
& CEPH_FSCRYPT_BLOCK_MASK
);
655 /* Encrypt each block */
656 for (i
= 0; i
< num_blocks
; ++i
) {
657 int blkoff
= i
<< CEPH_FSCRYPT_BLOCK_SHIFT
;
658 int pgidx
= blkoff
>> PAGE_SHIFT
;
659 unsigned int pgoffs
= offset_in_page(blkoff
);
662 fret
= ceph_fscrypt_encrypt_block_inplace(inode
, page
[pgidx
],
663 CEPH_FSCRYPT_BLOCK_SIZE
, pgoffs
,
670 ret
+= CEPH_FSCRYPT_BLOCK_SIZE
;