From: Greg Kroah-Hartman Date: Thu, 6 Dec 2018 14:28:25 +0000 (+0100) Subject: 4.9-stable patches X-Git-Tag: v4.19.8~5 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=f800b258d21a4a7a5d2f370891e853af067fea98;p=thirdparty%2Fkernel%2Fstable-queue.git 4.9-stable patches added patches: bpf-prevent-memory-disambiguation-attack.patch bpf-verifier-add-spi-variable-to-check_stack_write.patch bpf-verifier-pass-instruction-index-to-check_mem_access-and-check_xadd.patch btrfs-add-checker-for-extent_csum.patch btrfs-add-sanity-check-for-extent_data-when-reading-out-leaf.patch btrfs-check-if-item-pointer-overlaps-with-the-item-itself.patch btrfs-check-that-each-block-group-has-corresponding-chunk-at-mount-time.patch btrfs-move-leaf-and-node-validation-checker-to-tree-checker.c.patch btrfs-refactor-check_leaf-function-for-later-expansion.patch btrfs-struct-funcs-constify-readers.patch btrfs-tree-check-reduce-stack-consumption-in-check_dir_item.patch btrfs-tree-checker-add-checker-for-dir-item.patch btrfs-tree-checker-check-level-for-leaves-and-nodes.patch btrfs-tree-checker-detect-invalid-and-empty-essential-trees.patch btrfs-tree-checker-enhance-btrfs_check_node-output.patch btrfs-tree-checker-fix-false-panic-for-sanity-test.patch btrfs-tree-checker-fix-misleading-group-system-information.patch btrfs-tree-checker-use-zu-format-string-for-size_t.patch btrfs-tree-checker-verify-block_group_item.patch btrfs-validate-type-when-reading-a-chunk.patch btrfs-verify-that-every-chunk-has-corresponding-block-group-at-mount-time.patch f2fs-add-sanity_check_inode-function.patch f2fs-check-blkaddr-more-accuratly-before-issue-a-bio.patch f2fs-clean-up-with-is_valid_blkaddr.patch f2fs-detect-wrong-layout.patch f2fs-enhance-sanity_check_raw_super-to-avoid-potential-overflow.patch f2fs-fix-a-panic-caused-by-null-flush_cmd_control.patch f2fs-fix-missing-up_read.patch f2fs-fix-race-condition-in-between-free-nid-allocator-initializer.patch f2fs-fix-to-do-sanity-check-with-block-address-in-main-area-v2.patch f2fs-fix-to-do-sanity-check-with-block-address-in-main-area.patch f2fs-fix-to-do-sanity-check-with-cp_pack_start_sum.patch f2fs-fix-to-do-sanity-check-with-node-footer-and-iblocks.patch f2fs-fix-to-do-sanity-check-with-secs_per_zone.patch f2fs-fix-to-do-sanity-check-with-user_block_count.patch f2fs-free-meta-pages-if-sanity-check-for-ckpt-is-failed.patch f2fs-introduce-and-spread-verify_blkaddr.patch f2fs-return-error-during-fill_super.patch f2fs-sanity-check-on-sit-entry.patch hugetlbfs-check-for-pgoff-value-overflow.patch hugetlbfs-fix-offset-overflow-in-hugetlbfs-mmap.patch libceph-add-authorizer-challenge.patch libceph-check-authorizer-reply-challenge-length-before-reading.patch libceph-drop-len-argument-of-verify_authorizer_reply.patch libceph-factor-out-__ceph_x_decrypt.patch libceph-factor-out-__prepare_write_connect.patch libceph-factor-out-encrypt_authorizer.patch libceph-implement-cephx_v2-calculation-mode.patch libceph-no-need-to-drop-con-mutex-for-get_authorizer.patch libceph-store-ceph_auth_handshake-pointer-in-ceph_connection.patch libceph-weaken-sizeof-check-in-ceph_x_verify_authorizer_reply.patch mm-hugetlb.c-don-t-call-region_abort-if-region_chg-fails.patch wil6210-missing-length-check-in-wmi_set_ie.patch xfs-don-t-fail-when-converting-shortform-attr-to-long-form-during-attr_replace.patch --- diff --git a/queue-4.9/bpf-prevent-memory-disambiguation-attack.patch b/queue-4.9/bpf-prevent-memory-disambiguation-attack.patch new file mode 100644 index 00000000000..ed9c69e4c4a --- /dev/null +++ b/queue-4.9/bpf-prevent-memory-disambiguation-attack.patch @@ -0,0 +1,148 @@ +From foo@baz Thu Dec 6 15:27:31 CET 2018 +From: Alexei Starovoitov +Date: Tue, 15 May 2018 09:27:05 -0700 +Subject: bpf: Prevent memory disambiguation attack + +From: Alexei Starovoitov + +commit af86ca4e3088fe5eacf2f7e58c01fa68ca067672 upstream. + +Detect code patterns where malicious 'speculative store bypass' can be used +and sanitize such patterns. + + 39: (bf) r3 = r10 + 40: (07) r3 += -216 + 41: (79) r8 = *(u64 *)(r7 +0) // slow read + 42: (7a) *(u64 *)(r10 -72) = 0 // verifier inserts this instruction + 43: (7b) *(u64 *)(r8 +0) = r3 // this store becomes slow due to r8 + 44: (79) r1 = *(u64 *)(r6 +0) // cpu speculatively executes this load + 45: (71) r2 = *(u8 *)(r1 +0) // speculatively arbitrary 'load byte' + // is now sanitized + +Above code after x86 JIT becomes: + e5: mov %rbp,%rdx + e8: add $0xffffffffffffff28,%rdx + ef: mov 0x0(%r13),%r14 + f3: movq $0x0,-0x48(%rbp) + fb: mov %rdx,0x0(%r14) + ff: mov 0x0(%rbx),%rdi +103: movzbq 0x0(%rdi),%rsi + +Signed-off-by: Alexei Starovoitov +Signed-off-by: Thomas Gleixner +[bwh: Backported to 4.9: + - Add bpf_verifier_env parameter to check_stack_write() + - Look up stack slot_types with state->stack_slot_type[] rather than + state->stack[].slot_type[] + - Drop bpf_verifier_env argument to verbose() + - Adjust context] +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/bpf_verifier.h | 1 + kernel/bpf/verifier.c | 62 ++++++++++++++++++++++++++++++++++++++++--- + 2 files changed, 59 insertions(+), 4 deletions(-) + +--- a/include/linux/bpf_verifier.h ++++ b/include/linux/bpf_verifier.h +@@ -71,6 +71,7 @@ struct bpf_insn_aux_data { + enum bpf_reg_type ptr_type; /* pointer type for load/store insns */ + struct bpf_map *map_ptr; /* pointer for call insn into lookup_elem */ + }; ++ int sanitize_stack_off; /* stack slot to be cleared */ + bool seen; /* this insn was processed by the verifier */ + }; + +--- a/kernel/bpf/verifier.c ++++ b/kernel/bpf/verifier.c +@@ -540,8 +540,9 @@ static bool is_spillable_regtype(enum bp + /* check_stack_read/write functions track spill/fill of registers, + * stack boundary and alignment are checked in check_mem_access() + */ +-static int check_stack_write(struct bpf_verifier_state *state, int off, +- int size, int value_regno) ++static int check_stack_write(struct bpf_verifier_env *env, ++ struct bpf_verifier_state *state, int off, ++ int size, int value_regno, int insn_idx) + { + int i, spi = (MAX_BPF_STACK + off) / BPF_REG_SIZE; + /* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0, +@@ -560,8 +561,32 @@ static int check_stack_write(struct bpf_ + /* save register state */ + state->spilled_regs[spi] = state->regs[value_regno]; + +- for (i = 0; i < BPF_REG_SIZE; i++) ++ for (i = 0; i < BPF_REG_SIZE; i++) { ++ if (state->stack_slot_type[MAX_BPF_STACK + off + i] == STACK_MISC && ++ !env->allow_ptr_leaks) { ++ int *poff = &env->insn_aux_data[insn_idx].sanitize_stack_off; ++ int soff = (-spi - 1) * BPF_REG_SIZE; ++ ++ /* detected reuse of integer stack slot with a pointer ++ * which means either llvm is reusing stack slot or ++ * an attacker is trying to exploit CVE-2018-3639 ++ * (speculative store bypass) ++ * Have to sanitize that slot with preemptive ++ * store of zero. ++ */ ++ if (*poff && *poff != soff) { ++ /* disallow programs where single insn stores ++ * into two different stack slots, since verifier ++ * cannot sanitize them ++ */ ++ verbose("insn %d cannot access two stack slots fp%d and fp%d", ++ insn_idx, *poff, soff); ++ return -EINVAL; ++ } ++ *poff = soff; ++ } + state->stack_slot_type[MAX_BPF_STACK + off + i] = STACK_SPILL; ++ } + } else { + /* regular write of data into stack */ + state->spilled_regs[spi] = (struct bpf_reg_state) {}; +@@ -841,7 +866,8 @@ static int check_mem_access(struct bpf_v + verbose("attempt to corrupt spilled pointer on stack\n"); + return -EACCES; + } +- err = check_stack_write(state, off, size, value_regno); ++ err = check_stack_write(env, state, off, size, ++ value_regno, insn_idx); + } else { + err = check_stack_read(state, off, size, value_regno); + } +@@ -3367,6 +3393,34 @@ static int convert_ctx_accesses(struct b + else + continue; + ++ if (type == BPF_WRITE && ++ env->insn_aux_data[i + delta].sanitize_stack_off) { ++ struct bpf_insn patch[] = { ++ /* Sanitize suspicious stack slot with zero. ++ * There are no memory dependencies for this store, ++ * since it's only using frame pointer and immediate ++ * constant of zero ++ */ ++ BPF_ST_MEM(BPF_DW, BPF_REG_FP, ++ env->insn_aux_data[i + delta].sanitize_stack_off, ++ 0), ++ /* the original STX instruction will immediately ++ * overwrite the same stack slot with appropriate value ++ */ ++ *insn, ++ }; ++ ++ cnt = ARRAY_SIZE(patch); ++ new_prog = bpf_patch_insn_data(env, i + delta, patch, cnt); ++ if (!new_prog) ++ return -ENOMEM; ++ ++ delta += cnt - 1; ++ env->prog = new_prog; ++ insn = new_prog->insnsi + i + delta; ++ continue; ++ } ++ + if (env->insn_aux_data[i + delta].ptr_type != PTR_TO_CTX) + continue; + diff --git a/queue-4.9/bpf-verifier-add-spi-variable-to-check_stack_write.patch b/queue-4.9/bpf-verifier-add-spi-variable-to-check_stack_write.patch new file mode 100644 index 00000000000..d6b488d504b --- /dev/null +++ b/queue-4.9/bpf-verifier-add-spi-variable-to-check_stack_write.patch @@ -0,0 +1,47 @@ +From foo@baz Thu Dec 6 15:27:31 CET 2018 +From: Ben Hutchings +Date: Wed, 5 Dec 2018 22:45:15 +0000 +Subject: bpf/verifier: Add spi variable to check_stack_write() + +From: Ben Hutchings + +Extracted from commit dc503a8ad984 "bpf/verifier: track liveness for +pruning". + +Cc: Daniel Borkmann +Cc: Alexei Starovoitov +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + kernel/bpf/verifier.c | 8 +++----- + 1 file changed, 3 insertions(+), 5 deletions(-) + +--- a/kernel/bpf/verifier.c ++++ b/kernel/bpf/verifier.c +@@ -543,7 +543,7 @@ static bool is_spillable_regtype(enum bp + static int check_stack_write(struct bpf_verifier_state *state, int off, + int size, int value_regno) + { +- int i; ++ int i, spi = (MAX_BPF_STACK + off) / BPF_REG_SIZE; + /* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0, + * so it's aligned access and [off, off + size) are within stack limits + */ +@@ -558,15 +558,13 @@ static int check_stack_write(struct bpf_ + } + + /* save register state */ +- state->spilled_regs[(MAX_BPF_STACK + off) / BPF_REG_SIZE] = +- state->regs[value_regno]; ++ state->spilled_regs[spi] = state->regs[value_regno]; + + for (i = 0; i < BPF_REG_SIZE; i++) + state->stack_slot_type[MAX_BPF_STACK + off + i] = STACK_SPILL; + } else { + /* regular write of data into stack */ +- state->spilled_regs[(MAX_BPF_STACK + off) / BPF_REG_SIZE] = +- (struct bpf_reg_state) {}; ++ state->spilled_regs[spi] = (struct bpf_reg_state) {}; + + for (i = 0; i < size; i++) + state->stack_slot_type[MAX_BPF_STACK + off + i] = STACK_MISC; diff --git a/queue-4.9/bpf-verifier-pass-instruction-index-to-check_mem_access-and-check_xadd.patch b/queue-4.9/bpf-verifier-pass-instruction-index-to-check_mem_access-and-check_xadd.patch new file mode 100644 index 00000000000..1c0fa89bd31 --- /dev/null +++ b/queue-4.9/bpf-verifier-pass-instruction-index-to-check_mem_access-and-check_xadd.patch @@ -0,0 +1,99 @@ +From foo@baz Thu Dec 6 15:27:31 CET 2018 +From: Ben Hutchings +Date: Wed, 5 Dec 2018 22:41:36 +0000 +Subject: bpf/verifier: Pass instruction index to check_mem_access() and check_xadd() + +From: Ben Hutchings + +Extracted from commit 31fd85816dbe "bpf: permits narrower load from +bpf program context fields". + +Cc: Daniel Borkmann +Cc: Alexei Starovoitov +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + kernel/bpf/verifier.c | 18 +++++++++--------- + 1 file changed, 9 insertions(+), 9 deletions(-) + +--- a/kernel/bpf/verifier.c ++++ b/kernel/bpf/verifier.c +@@ -745,7 +745,7 @@ static int check_ptr_alignment(struct bp + * if t==write && value_regno==-1, some unknown value is stored into memory + * if t==read && value_regno==-1, don't care what we read from memory + */ +-static int check_mem_access(struct bpf_verifier_env *env, u32 regno, int off, ++static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regno, int off, + int bpf_size, enum bpf_access_type t, + int value_regno) + { +@@ -875,7 +875,7 @@ static int check_mem_access(struct bpf_v + return err; + } + +-static int check_xadd(struct bpf_verifier_env *env, struct bpf_insn *insn) ++static int check_xadd(struct bpf_verifier_env *env, int insn_idx, struct bpf_insn *insn) + { + struct bpf_reg_state *regs = env->cur_state.regs; + int err; +@@ -908,13 +908,13 @@ static int check_xadd(struct bpf_verifie + } + + /* check whether atomic_add can read the memory */ +- err = check_mem_access(env, insn->dst_reg, insn->off, ++ err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off, + BPF_SIZE(insn->code), BPF_READ, -1); + if (err) + return err; + + /* check whether atomic_add can write into the same memory */ +- return check_mem_access(env, insn->dst_reg, insn->off, ++ return check_mem_access(env, insn_idx, insn->dst_reg, insn->off, + BPF_SIZE(insn->code), BPF_WRITE, -1); + } + +@@ -1270,7 +1270,7 @@ static int check_call(struct bpf_verifie + * is inferred from register state. + */ + for (i = 0; i < meta.access_size; i++) { +- err = check_mem_access(env, meta.regno, i, BPF_B, BPF_WRITE, -1); ++ err = check_mem_access(env, insn_idx, meta.regno, i, BPF_B, BPF_WRITE, -1); + if (err) + return err; + } +@@ -2936,7 +2936,7 @@ static int do_check(struct bpf_verifier_ + /* check that memory (src_reg + off) is readable, + * the state of dst_reg will be updated by this func + */ +- err = check_mem_access(env, insn->src_reg, insn->off, ++ err = check_mem_access(env, insn_idx, insn->src_reg, insn->off, + BPF_SIZE(insn->code), BPF_READ, + insn->dst_reg); + if (err) +@@ -2976,7 +2976,7 @@ static int do_check(struct bpf_verifier_ + enum bpf_reg_type *prev_dst_type, dst_reg_type; + + if (BPF_MODE(insn->code) == BPF_XADD) { +- err = check_xadd(env, insn); ++ err = check_xadd(env, insn_idx, insn); + if (err) + return err; + insn_idx++; +@@ -2995,7 +2995,7 @@ static int do_check(struct bpf_verifier_ + dst_reg_type = regs[insn->dst_reg].type; + + /* check that memory (dst_reg + off) is writeable */ +- err = check_mem_access(env, insn->dst_reg, insn->off, ++ err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off, + BPF_SIZE(insn->code), BPF_WRITE, + insn->src_reg); + if (err) +@@ -3030,7 +3030,7 @@ static int do_check(struct bpf_verifier_ + } + + /* check that memory (dst_reg + off) is writeable */ +- err = check_mem_access(env, insn->dst_reg, insn->off, ++ err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off, + BPF_SIZE(insn->code), BPF_WRITE, + -1); + if (err) diff --git a/queue-4.9/btrfs-add-checker-for-extent_csum.patch b/queue-4.9/btrfs-add-checker-for-extent_csum.patch new file mode 100644 index 00000000000..b92520e80d3 --- /dev/null +++ b/queue-4.9/btrfs-add-checker-for-extent_csum.patch @@ -0,0 +1,72 @@ +From foo@baz Thu Dec 6 15:27:31 CET 2018 +From: Qu Wenruo +Date: Wed, 23 Aug 2017 16:57:59 +0900 +Subject: btrfs: Add checker for EXTENT_CSUM + +From: Qu Wenruo + +commit 4b865cab96fe2a30ed512cf667b354bd291b3b0a upstream. + +EXTENT_CSUM checker is a relatively easy one, only needs to check: + +1) Objectid + Fixed to BTRFS_EXTENT_CSUM_OBJECTID + +2) Key offset alignment + Must be aligned to sectorsize + +3) Item size alignedment + Must be aligned to csum size + +Signed-off-by: Qu Wenruo +Reviewed-by: Nikolay Borisov +Reviewed-by: David Sterba +Signed-off-by: David Sterba +[bwh: Backported to 4.9: Use root->sectorsize instead of + root->fs_info->sectorsize] +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/disk-io.c | 24 ++++++++++++++++++++++++ + 1 file changed, 24 insertions(+) + +--- a/fs/btrfs/disk-io.c ++++ b/fs/btrfs/disk-io.c +@@ -621,6 +621,27 @@ static int check_extent_data_item(struct + return 0; + } + ++static int check_csum_item(struct btrfs_root *root, struct extent_buffer *leaf, ++ struct btrfs_key *key, int slot) ++{ ++ u32 sectorsize = root->sectorsize; ++ u32 csumsize = btrfs_super_csum_size(root->fs_info->super_copy); ++ ++ if (key->objectid != BTRFS_EXTENT_CSUM_OBJECTID) { ++ CORRUPT("invalid objectid for csum item", leaf, root, slot); ++ return -EUCLEAN; ++ } ++ if (!IS_ALIGNED(key->offset, sectorsize)) { ++ CORRUPT("unaligned key offset for csum item", leaf, root, slot); ++ return -EUCLEAN; ++ } ++ if (!IS_ALIGNED(btrfs_item_size_nr(leaf, slot), csumsize)) { ++ CORRUPT("unaligned csum item size", leaf, root, slot); ++ return -EUCLEAN; ++ } ++ return 0; ++} ++ + /* + * Common point to switch the item-specific validation. + */ +@@ -634,6 +655,9 @@ static int check_leaf_item(struct btrfs_ + case BTRFS_EXTENT_DATA_KEY: + ret = check_extent_data_item(root, leaf, key, slot); + break; ++ case BTRFS_EXTENT_CSUM_KEY: ++ ret = check_csum_item(root, leaf, key, slot); ++ break; + } + return ret; + } diff --git a/queue-4.9/btrfs-add-sanity-check-for-extent_data-when-reading-out-leaf.patch b/queue-4.9/btrfs-add-sanity-check-for-extent_data-when-reading-out-leaf.patch new file mode 100644 index 00000000000..bbd799b04a9 --- /dev/null +++ b/queue-4.9/btrfs-add-sanity-check-for-extent_data-when-reading-out-leaf.patch @@ -0,0 +1,184 @@ +From foo@baz Thu Dec 6 15:27:31 CET 2018 +From: Qu Wenruo +Date: Wed, 23 Aug 2017 16:57:58 +0900 +Subject: btrfs: Add sanity check for EXTENT_DATA when reading out leaf + +From: Qu Wenruo + +commit 40c3c40947324d9f40bf47830c92c59a9bbadf4a upstream. + +Add extra checks for item with EXTENT_DATA type. This checks the +following thing: + +0) Key offset + All key offsets must be aligned to sectorsize. + Inline extent must have 0 for key offset. + +1) Item size + Uncompressed inline file extent size must match item size. + (Compressed inline file extent has no information about its on-disk size.) + Regular/preallocated file extent size must be a fixed value. + +2) Every member of regular file extent item + Including alignment for bytenr and offset, possible value for + compression/encryption/type. + +3) Type/compression/encode must be one of the valid values. + +This should be the most comprehensive and strict check in the context +of btrfs_item for EXTENT_DATA. + +Signed-off-by: Qu Wenruo +Reviewed-by: Nikolay Borisov +Reviewed-by: David Sterba +[ switch to BTRFS_FILE_EXTENT_TYPES, similar to what + BTRFS_COMPRESS_TYPES does ] +Signed-off-by: David Sterba +[bwh: Backported to 4.9: Use root->sectorsize instead of + root->fs_info->sectorsize] +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/disk-io.c | 103 ++++++++++++++++++++++++++++++++++++++++ + include/uapi/linux/btrfs_tree.h | 1 + 2 files changed, 104 insertions(+) + +--- a/fs/btrfs/disk-io.c ++++ b/fs/btrfs/disk-io.c +@@ -544,6 +544,100 @@ static int check_tree_block_fsid(struct + btrfs_header_level(eb) == 0 ? "leaf" : "node",\ + reason, btrfs_header_bytenr(eb), root->objectid, slot) + ++static int check_extent_data_item(struct btrfs_root *root, ++ struct extent_buffer *leaf, ++ struct btrfs_key *key, int slot) ++{ ++ struct btrfs_file_extent_item *fi; ++ u32 sectorsize = root->sectorsize; ++ u32 item_size = btrfs_item_size_nr(leaf, slot); ++ ++ if (!IS_ALIGNED(key->offset, sectorsize)) { ++ CORRUPT("unaligned key offset for file extent", ++ leaf, root, slot); ++ return -EUCLEAN; ++ } ++ ++ fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); ++ ++ if (btrfs_file_extent_type(leaf, fi) > BTRFS_FILE_EXTENT_TYPES) { ++ CORRUPT("invalid file extent type", leaf, root, slot); ++ return -EUCLEAN; ++ } ++ ++ /* ++ * Support for new compression/encrption must introduce incompat flag, ++ * and must be caught in open_ctree(). ++ */ ++ if (btrfs_file_extent_compression(leaf, fi) > BTRFS_COMPRESS_TYPES) { ++ CORRUPT("invalid file extent compression", leaf, root, slot); ++ return -EUCLEAN; ++ } ++ if (btrfs_file_extent_encryption(leaf, fi)) { ++ CORRUPT("invalid file extent encryption", leaf, root, slot); ++ return -EUCLEAN; ++ } ++ if (btrfs_file_extent_type(leaf, fi) == BTRFS_FILE_EXTENT_INLINE) { ++ /* Inline extent must have 0 as key offset */ ++ if (key->offset) { ++ CORRUPT("inline extent has non-zero key offset", ++ leaf, root, slot); ++ return -EUCLEAN; ++ } ++ ++ /* Compressed inline extent has no on-disk size, skip it */ ++ if (btrfs_file_extent_compression(leaf, fi) != ++ BTRFS_COMPRESS_NONE) ++ return 0; ++ ++ /* Uncompressed inline extent size must match item size */ ++ if (item_size != BTRFS_FILE_EXTENT_INLINE_DATA_START + ++ btrfs_file_extent_ram_bytes(leaf, fi)) { ++ CORRUPT("plaintext inline extent has invalid size", ++ leaf, root, slot); ++ return -EUCLEAN; ++ } ++ return 0; ++ } ++ ++ /* Regular or preallocated extent has fixed item size */ ++ if (item_size != sizeof(*fi)) { ++ CORRUPT( ++ "regluar or preallocated extent data item size is invalid", ++ leaf, root, slot); ++ return -EUCLEAN; ++ } ++ if (!IS_ALIGNED(btrfs_file_extent_ram_bytes(leaf, fi), sectorsize) || ++ !IS_ALIGNED(btrfs_file_extent_disk_bytenr(leaf, fi), sectorsize) || ++ !IS_ALIGNED(btrfs_file_extent_disk_num_bytes(leaf, fi), sectorsize) || ++ !IS_ALIGNED(btrfs_file_extent_offset(leaf, fi), sectorsize) || ++ !IS_ALIGNED(btrfs_file_extent_num_bytes(leaf, fi), sectorsize)) { ++ CORRUPT( ++ "regular or preallocated extent data item has unaligned value", ++ leaf, root, slot); ++ return -EUCLEAN; ++ } ++ ++ return 0; ++} ++ ++/* ++ * Common point to switch the item-specific validation. ++ */ ++static int check_leaf_item(struct btrfs_root *root, ++ struct extent_buffer *leaf, ++ struct btrfs_key *key, int slot) ++{ ++ int ret = 0; ++ ++ switch (key->type) { ++ case BTRFS_EXTENT_DATA_KEY: ++ ret = check_extent_data_item(root, leaf, key, slot); ++ break; ++ } ++ return ret; ++} ++ + static noinline int check_leaf(struct btrfs_root *root, + struct extent_buffer *leaf) + { +@@ -599,9 +693,13 @@ static noinline int check_leaf(struct bt + * 1) key order + * 2) item offset and size + * No overlap, no hole, all inside the leaf. ++ * 3) item content ++ * If possible, do comprehensive sanity check. ++ * NOTE: All checks must only rely on the item data itself. + */ + for (slot = 0; slot < nritems; slot++) { + u32 item_end_expected; ++ int ret; + + btrfs_item_key_to_cpu(leaf, &key, slot); + +@@ -644,6 +742,11 @@ static noinline int check_leaf(struct bt + return -EUCLEAN; + } + ++ /* Check if the item size and content meet other criteria */ ++ ret = check_leaf_item(root, leaf, &key, slot); ++ if (ret < 0) ++ return ret; ++ + prev_key.objectid = key.objectid; + prev_key.type = key.type; + prev_key.offset = key.offset; +--- a/include/uapi/linux/btrfs_tree.h ++++ b/include/uapi/linux/btrfs_tree.h +@@ -730,6 +730,7 @@ struct btrfs_balance_item { + #define BTRFS_FILE_EXTENT_INLINE 0 + #define BTRFS_FILE_EXTENT_REG 1 + #define BTRFS_FILE_EXTENT_PREALLOC 2 ++#define BTRFS_FILE_EXTENT_TYPES 2 + + struct btrfs_file_extent_item { + /* diff --git a/queue-4.9/btrfs-check-if-item-pointer-overlaps-with-the-item-itself.patch b/queue-4.9/btrfs-check-if-item-pointer-overlaps-with-the-item-itself.patch new file mode 100644 index 00000000000..f2cd1ab8f2c --- /dev/null +++ b/queue-4.9/btrfs-check-if-item-pointer-overlaps-with-the-item-itself.patch @@ -0,0 +1,41 @@ +From foo@baz Thu Dec 6 15:27:31 CET 2018 +From: Qu Wenruo +Date: Wed, 23 Aug 2017 16:57:57 +0900 +Subject: btrfs: Check if item pointer overlaps with the item itself + +From: Qu Wenruo + +commit 7f43d4affb2a254d421ab20b0cf65ac2569909fb upstream. + +Function check_leaf() checks if any item pointer points outside of the +leaf, but it doesn't check if the pointer overlaps with the item itself. + +Normally only the last item may be the victim, but adding such check is +never a bad idea anyway. + +Signed-off-by: Qu Wenruo +Reviewed-by: Nikolay Borisov +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/disk-io.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +--- a/fs/btrfs/disk-io.c ++++ b/fs/btrfs/disk-io.c +@@ -637,6 +637,13 @@ static noinline int check_leaf(struct bt + return -EUCLEAN; + } + ++ /* Also check if the item pointer overlaps with btrfs item. */ ++ if (btrfs_item_nr_offset(slot) + sizeof(struct btrfs_item) > ++ btrfs_item_ptr_offset(leaf, slot)) { ++ CORRUPT("slot overlap with its data", leaf, root, slot); ++ return -EUCLEAN; ++ } ++ + prev_key.objectid = key.objectid; + prev_key.type = key.type; + prev_key.offset = key.offset; diff --git a/queue-4.9/btrfs-check-that-each-block-group-has-corresponding-chunk-at-mount-time.patch b/queue-4.9/btrfs-check-that-each-block-group-has-corresponding-chunk-at-mount-time.patch new file mode 100644 index 00000000000..2993f55dce6 --- /dev/null +++ b/queue-4.9/btrfs-check-that-each-block-group-has-corresponding-chunk-at-mount-time.patch @@ -0,0 +1,83 @@ +From foo@baz Thu Dec 6 15:27:31 CET 2018 +From: Qu Wenruo +Date: Wed, 1 Aug 2018 10:37:16 +0800 +Subject: btrfs: Check that each block group has corresponding chunk at mount time + +From: Qu Wenruo + +commit 514c7dca85a0bf40be984dab0b477403a6db901f upstream. + +A crafted btrfs image with incorrect chunk<->block group mapping will +trigger a lot of unexpected things as the mapping is essential. + +Although the problem can be caught by block group item checker +added in "btrfs: tree-checker: Verify block_group_item", it's still not +sufficient. A sufficiently valid block group item can pass the check +added by the mentioned patch but could fail to match the existing chunk. + +This patch will add extra block group -> chunk mapping check, to ensure +we have a completely matching (start, len, flags) chunk for each block +group at mount time. + +Here we reuse the original helper find_first_block_group(), which is +already doing the basic bg -> chunk checks, adding further checks of the +start/len and type flags. + +Link: https://bugzilla.kernel.org/show_bug.cgi?id=199837 +Reported-by: Xu Wen +Signed-off-by: Qu Wenruo +Reviewed-by: Su Yue +Reviewed-by: David Sterba +Signed-off-by: David Sterba +[bwh: Backported to 4.9: Use root->fs_info instead of fs_info] +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/extent-tree.c | 28 +++++++++++++++++++++++++++- + 1 file changed, 27 insertions(+), 1 deletion(-) + +--- a/fs/btrfs/extent-tree.c ++++ b/fs/btrfs/extent-tree.c +@@ -9896,6 +9896,8 @@ static int find_first_block_group(struct + int ret = 0; + struct btrfs_key found_key; + struct extent_buffer *leaf; ++ struct btrfs_block_group_item bg; ++ u64 flags; + int slot; + + ret = btrfs_search_slot(NULL, root, key, path, 0, 0); +@@ -9930,8 +9932,32 @@ static int find_first_block_group(struct + "logical %llu len %llu found bg but no related chunk", + found_key.objectid, found_key.offset); + ret = -ENOENT; ++ } else if (em->start != found_key.objectid || ++ em->len != found_key.offset) { ++ btrfs_err(root->fs_info, ++ "block group %llu len %llu mismatch with chunk %llu len %llu", ++ found_key.objectid, found_key.offset, ++ em->start, em->len); ++ ret = -EUCLEAN; + } else { +- ret = 0; ++ read_extent_buffer(leaf, &bg, ++ btrfs_item_ptr_offset(leaf, slot), ++ sizeof(bg)); ++ flags = btrfs_block_group_flags(&bg) & ++ BTRFS_BLOCK_GROUP_TYPE_MASK; ++ ++ if (flags != (em->map_lookup->type & ++ BTRFS_BLOCK_GROUP_TYPE_MASK)) { ++ btrfs_err(root->fs_info, ++"block group %llu len %llu type flags 0x%llx mismatch with chunk type flags 0x%llx", ++ found_key.objectid, ++ found_key.offset, flags, ++ (BTRFS_BLOCK_GROUP_TYPE_MASK & ++ em->map_lookup->type)); ++ ret = -EUCLEAN; ++ } else { ++ ret = 0; ++ } + } + free_extent_map(em); + goto out; diff --git a/queue-4.9/btrfs-move-leaf-and-node-validation-checker-to-tree-checker.c.patch b/queue-4.9/btrfs-move-leaf-and-node-validation-checker-to-tree-checker.c.patch new file mode 100644 index 00000000000..e2b99be4387 --- /dev/null +++ b/queue-4.9/btrfs-move-leaf-and-node-validation-checker-to-tree-checker.c.patch @@ -0,0 +1,697 @@ +From foo@baz Thu Dec 6 15:27:31 CET 2018 +From: Qu Wenruo +Date: Mon, 9 Oct 2017 01:51:02 +0000 +Subject: btrfs: Move leaf and node validation checker to tree-checker.c + +From: Qu Wenruo + +commit 557ea5dd003d371536f6b4e8f7c8209a2b6fd4e3 upstream. + +It's no doubt the comprehensive tree block checker will become larger, +so moving them into their own files is quite reasonable. + +Signed-off-by: Qu Wenruo +[ wording adjustments ] +Signed-off-by: David Sterba +[bwh: Backported to 4.9: The moved code is slightly different] +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/Makefile | 2 + fs/btrfs/disk-io.c | 284 -------------------------------------------- + fs/btrfs/tree-checker.c | 309 ++++++++++++++++++++++++++++++++++++++++++++++++ + fs/btrfs/tree-checker.h | 26 ++++ + 4 files changed, 340 insertions(+), 281 deletions(-) + create mode 100644 fs/btrfs/tree-checker.c + create mode 100644 fs/btrfs/tree-checker.h + +--- a/fs/btrfs/Makefile ++++ b/fs/btrfs/Makefile +@@ -9,7 +9,7 @@ btrfs-y += super.o ctree.o extent-tree.o + export.o tree-log.o free-space-cache.o zlib.o lzo.o \ + compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o \ + reada.o backref.o ulist.o qgroup.o send.o dev-replace.o raid56.o \ +- uuid-tree.o props.o hash.o free-space-tree.o ++ uuid-tree.o props.o hash.o free-space-tree.o tree-checker.o + + btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o + btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o +--- a/fs/btrfs/disk-io.c ++++ b/fs/btrfs/disk-io.c +@@ -50,6 +50,7 @@ + #include "sysfs.h" + #include "qgroup.h" + #include "compression.h" ++#include "tree-checker.h" + + #ifdef CONFIG_X86 + #include +@@ -538,283 +539,6 @@ static int check_tree_block_fsid(struct + return ret; + } + +-#define CORRUPT(reason, eb, root, slot) \ +- btrfs_crit(root->fs_info, "corrupt %s, %s: block=%llu," \ +- " root=%llu, slot=%d", \ +- btrfs_header_level(eb) == 0 ? "leaf" : "node",\ +- reason, btrfs_header_bytenr(eb), root->objectid, slot) +- +-static int check_extent_data_item(struct btrfs_root *root, +- struct extent_buffer *leaf, +- struct btrfs_key *key, int slot) +-{ +- struct btrfs_file_extent_item *fi; +- u32 sectorsize = root->sectorsize; +- u32 item_size = btrfs_item_size_nr(leaf, slot); +- +- if (!IS_ALIGNED(key->offset, sectorsize)) { +- CORRUPT("unaligned key offset for file extent", +- leaf, root, slot); +- return -EUCLEAN; +- } +- +- fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); +- +- if (btrfs_file_extent_type(leaf, fi) > BTRFS_FILE_EXTENT_TYPES) { +- CORRUPT("invalid file extent type", leaf, root, slot); +- return -EUCLEAN; +- } +- +- /* +- * Support for new compression/encrption must introduce incompat flag, +- * and must be caught in open_ctree(). +- */ +- if (btrfs_file_extent_compression(leaf, fi) > BTRFS_COMPRESS_TYPES) { +- CORRUPT("invalid file extent compression", leaf, root, slot); +- return -EUCLEAN; +- } +- if (btrfs_file_extent_encryption(leaf, fi)) { +- CORRUPT("invalid file extent encryption", leaf, root, slot); +- return -EUCLEAN; +- } +- if (btrfs_file_extent_type(leaf, fi) == BTRFS_FILE_EXTENT_INLINE) { +- /* Inline extent must have 0 as key offset */ +- if (key->offset) { +- CORRUPT("inline extent has non-zero key offset", +- leaf, root, slot); +- return -EUCLEAN; +- } +- +- /* Compressed inline extent has no on-disk size, skip it */ +- if (btrfs_file_extent_compression(leaf, fi) != +- BTRFS_COMPRESS_NONE) +- return 0; +- +- /* Uncompressed inline extent size must match item size */ +- if (item_size != BTRFS_FILE_EXTENT_INLINE_DATA_START + +- btrfs_file_extent_ram_bytes(leaf, fi)) { +- CORRUPT("plaintext inline extent has invalid size", +- leaf, root, slot); +- return -EUCLEAN; +- } +- return 0; +- } +- +- /* Regular or preallocated extent has fixed item size */ +- if (item_size != sizeof(*fi)) { +- CORRUPT( +- "regluar or preallocated extent data item size is invalid", +- leaf, root, slot); +- return -EUCLEAN; +- } +- if (!IS_ALIGNED(btrfs_file_extent_ram_bytes(leaf, fi), sectorsize) || +- !IS_ALIGNED(btrfs_file_extent_disk_bytenr(leaf, fi), sectorsize) || +- !IS_ALIGNED(btrfs_file_extent_disk_num_bytes(leaf, fi), sectorsize) || +- !IS_ALIGNED(btrfs_file_extent_offset(leaf, fi), sectorsize) || +- !IS_ALIGNED(btrfs_file_extent_num_bytes(leaf, fi), sectorsize)) { +- CORRUPT( +- "regular or preallocated extent data item has unaligned value", +- leaf, root, slot); +- return -EUCLEAN; +- } +- +- return 0; +-} +- +-static int check_csum_item(struct btrfs_root *root, struct extent_buffer *leaf, +- struct btrfs_key *key, int slot) +-{ +- u32 sectorsize = root->sectorsize; +- u32 csumsize = btrfs_super_csum_size(root->fs_info->super_copy); +- +- if (key->objectid != BTRFS_EXTENT_CSUM_OBJECTID) { +- CORRUPT("invalid objectid for csum item", leaf, root, slot); +- return -EUCLEAN; +- } +- if (!IS_ALIGNED(key->offset, sectorsize)) { +- CORRUPT("unaligned key offset for csum item", leaf, root, slot); +- return -EUCLEAN; +- } +- if (!IS_ALIGNED(btrfs_item_size_nr(leaf, slot), csumsize)) { +- CORRUPT("unaligned csum item size", leaf, root, slot); +- return -EUCLEAN; +- } +- return 0; +-} +- +-/* +- * Common point to switch the item-specific validation. +- */ +-static int check_leaf_item(struct btrfs_root *root, +- struct extent_buffer *leaf, +- struct btrfs_key *key, int slot) +-{ +- int ret = 0; +- +- switch (key->type) { +- case BTRFS_EXTENT_DATA_KEY: +- ret = check_extent_data_item(root, leaf, key, slot); +- break; +- case BTRFS_EXTENT_CSUM_KEY: +- ret = check_csum_item(root, leaf, key, slot); +- break; +- } +- return ret; +-} +- +-static noinline int check_leaf(struct btrfs_root *root, +- struct extent_buffer *leaf) +-{ +- /* No valid key type is 0, so all key should be larger than this key */ +- struct btrfs_key prev_key = {0, 0, 0}; +- struct btrfs_key key; +- u32 nritems = btrfs_header_nritems(leaf); +- int slot; +- +- /* +- * Extent buffers from a relocation tree have a owner field that +- * corresponds to the subvolume tree they are based on. So just from an +- * extent buffer alone we can not find out what is the id of the +- * corresponding subvolume tree, so we can not figure out if the extent +- * buffer corresponds to the root of the relocation tree or not. So skip +- * this check for relocation trees. +- */ +- if (nritems == 0 && !btrfs_header_flag(leaf, BTRFS_HEADER_FLAG_RELOC)) { +- struct btrfs_root *check_root; +- +- key.objectid = btrfs_header_owner(leaf); +- key.type = BTRFS_ROOT_ITEM_KEY; +- key.offset = (u64)-1; +- +- check_root = btrfs_get_fs_root(root->fs_info, &key, false); +- /* +- * The only reason we also check NULL here is that during +- * open_ctree() some roots has not yet been set up. +- */ +- if (!IS_ERR_OR_NULL(check_root)) { +- struct extent_buffer *eb; +- +- eb = btrfs_root_node(check_root); +- /* if leaf is the root, then it's fine */ +- if (leaf != eb) { +- CORRUPT("non-root leaf's nritems is 0", +- leaf, check_root, 0); +- free_extent_buffer(eb); +- return -EUCLEAN; +- } +- free_extent_buffer(eb); +- } +- return 0; +- } +- +- if (nritems == 0) +- return 0; +- +- /* +- * Check the following things to make sure this is a good leaf, and +- * leaf users won't need to bother with similar sanity checks: +- * +- * 1) key order +- * 2) item offset and size +- * No overlap, no hole, all inside the leaf. +- * 3) item content +- * If possible, do comprehensive sanity check. +- * NOTE: All checks must only rely on the item data itself. +- */ +- for (slot = 0; slot < nritems; slot++) { +- u32 item_end_expected; +- int ret; +- +- btrfs_item_key_to_cpu(leaf, &key, slot); +- +- /* Make sure the keys are in the right order */ +- if (btrfs_comp_cpu_keys(&prev_key, &key) >= 0) { +- CORRUPT("bad key order", leaf, root, slot); +- return -EUCLEAN; +- } +- +- /* +- * Make sure the offset and ends are right, remember that the +- * item data starts at the end of the leaf and grows towards the +- * front. +- */ +- if (slot == 0) +- item_end_expected = BTRFS_LEAF_DATA_SIZE(root); +- else +- item_end_expected = btrfs_item_offset_nr(leaf, +- slot - 1); +- if (btrfs_item_end_nr(leaf, slot) != item_end_expected) { +- CORRUPT("slot offset bad", leaf, root, slot); +- return -EUCLEAN; +- } +- +- /* +- * Check to make sure that we don't point outside of the leaf, +- * just in case all the items are consistent to each other, but +- * all point outside of the leaf. +- */ +- if (btrfs_item_end_nr(leaf, slot) > +- BTRFS_LEAF_DATA_SIZE(root)) { +- CORRUPT("slot end outside of leaf", leaf, root, slot); +- return -EUCLEAN; +- } +- +- /* Also check if the item pointer overlaps with btrfs item. */ +- if (btrfs_item_nr_offset(slot) + sizeof(struct btrfs_item) > +- btrfs_item_ptr_offset(leaf, slot)) { +- CORRUPT("slot overlap with its data", leaf, root, slot); +- return -EUCLEAN; +- } +- +- /* Check if the item size and content meet other criteria */ +- ret = check_leaf_item(root, leaf, &key, slot); +- if (ret < 0) +- return ret; +- +- prev_key.objectid = key.objectid; +- prev_key.type = key.type; +- prev_key.offset = key.offset; +- } +- +- return 0; +-} +- +-static int check_node(struct btrfs_root *root, struct extent_buffer *node) +-{ +- unsigned long nr = btrfs_header_nritems(node); +- struct btrfs_key key, next_key; +- int slot; +- u64 bytenr; +- int ret = 0; +- +- if (nr == 0 || nr > BTRFS_NODEPTRS_PER_BLOCK(root)) { +- btrfs_crit(root->fs_info, +- "corrupt node: block %llu root %llu nritems %lu", +- node->start, root->objectid, nr); +- return -EIO; +- } +- +- for (slot = 0; slot < nr - 1; slot++) { +- bytenr = btrfs_node_blockptr(node, slot); +- btrfs_node_key_to_cpu(node, &key, slot); +- btrfs_node_key_to_cpu(node, &next_key, slot + 1); +- +- if (!bytenr) { +- CORRUPT("invalid item slot", node, root, slot); +- ret = -EIO; +- goto out; +- } +- +- if (btrfs_comp_cpu_keys(&key, &next_key) >= 0) { +- CORRUPT("bad key order", node, root, slot); +- ret = -EIO; +- goto out; +- } +- } +-out: +- return ret; +-} +- + static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio, + u64 phy_offset, struct page *page, + u64 start, u64 end, int mirror) +@@ -880,12 +604,12 @@ static int btree_readpage_end_io_hook(st + * that we don't try and read the other copies of this block, just + * return -EIO. + */ +- if (found_level == 0 && check_leaf(root, eb)) { ++ if (found_level == 0 && btrfs_check_leaf(root, eb)) { + set_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags); + ret = -EIO; + } + +- if (found_level > 0 && check_node(root, eb)) ++ if (found_level > 0 && btrfs_check_node(root, eb)) + ret = -EIO; + + if (!ret) +@@ -4216,7 +3940,7 @@ void btrfs_mark_buffer_dirty(struct exte + buf->len, + root->fs_info->dirty_metadata_batch); + #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY +- if (btrfs_header_level(buf) == 0 && check_leaf(root, buf)) { ++ if (btrfs_header_level(buf) == 0 && btrfs_check_leaf(root, buf)) { + btrfs_print_leaf(root, buf); + ASSERT(0); + } +--- /dev/null ++++ b/fs/btrfs/tree-checker.c +@@ -0,0 +1,309 @@ ++/* ++ * Copyright (C) Qu Wenruo 2017. All rights reserved. ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public ++ * License v2 as published by the Free Software Foundation. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public ++ * License along with this program. ++ */ ++ ++/* ++ * The module is used to catch unexpected/corrupted tree block data. ++ * Such behavior can be caused either by a fuzzed image or bugs. ++ * ++ * The objective is to do leaf/node validation checks when tree block is read ++ * from disk, and check *every* possible member, so other code won't ++ * need to checking them again. ++ * ++ * Due to the potential and unwanted damage, every checker needs to be ++ * carefully reviewed otherwise so it does not prevent mount of valid images. ++ */ ++ ++#include "ctree.h" ++#include "tree-checker.h" ++#include "disk-io.h" ++#include "compression.h" ++ ++#define CORRUPT(reason, eb, root, slot) \ ++ btrfs_crit(root->fs_info, \ ++ "corrupt %s, %s: block=%llu, root=%llu, slot=%d", \ ++ btrfs_header_level(eb) == 0 ? "leaf" : "node", \ ++ reason, btrfs_header_bytenr(eb), root->objectid, slot) ++ ++static int check_extent_data_item(struct btrfs_root *root, ++ struct extent_buffer *leaf, ++ struct btrfs_key *key, int slot) ++{ ++ struct btrfs_file_extent_item *fi; ++ u32 sectorsize = root->sectorsize; ++ u32 item_size = btrfs_item_size_nr(leaf, slot); ++ ++ if (!IS_ALIGNED(key->offset, sectorsize)) { ++ CORRUPT("unaligned key offset for file extent", ++ leaf, root, slot); ++ return -EUCLEAN; ++ } ++ ++ fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); ++ ++ if (btrfs_file_extent_type(leaf, fi) > BTRFS_FILE_EXTENT_TYPES) { ++ CORRUPT("invalid file extent type", leaf, root, slot); ++ return -EUCLEAN; ++ } ++ ++ /* ++ * Support for new compression/encrption must introduce incompat flag, ++ * and must be caught in open_ctree(). ++ */ ++ if (btrfs_file_extent_compression(leaf, fi) > BTRFS_COMPRESS_TYPES) { ++ CORRUPT("invalid file extent compression", leaf, root, slot); ++ return -EUCLEAN; ++ } ++ if (btrfs_file_extent_encryption(leaf, fi)) { ++ CORRUPT("invalid file extent encryption", leaf, root, slot); ++ return -EUCLEAN; ++ } ++ if (btrfs_file_extent_type(leaf, fi) == BTRFS_FILE_EXTENT_INLINE) { ++ /* Inline extent must have 0 as key offset */ ++ if (key->offset) { ++ CORRUPT("inline extent has non-zero key offset", ++ leaf, root, slot); ++ return -EUCLEAN; ++ } ++ ++ /* Compressed inline extent has no on-disk size, skip it */ ++ if (btrfs_file_extent_compression(leaf, fi) != ++ BTRFS_COMPRESS_NONE) ++ return 0; ++ ++ /* Uncompressed inline extent size must match item size */ ++ if (item_size != BTRFS_FILE_EXTENT_INLINE_DATA_START + ++ btrfs_file_extent_ram_bytes(leaf, fi)) { ++ CORRUPT("plaintext inline extent has invalid size", ++ leaf, root, slot); ++ return -EUCLEAN; ++ } ++ return 0; ++ } ++ ++ /* Regular or preallocated extent has fixed item size */ ++ if (item_size != sizeof(*fi)) { ++ CORRUPT( ++ "regluar or preallocated extent data item size is invalid", ++ leaf, root, slot); ++ return -EUCLEAN; ++ } ++ if (!IS_ALIGNED(btrfs_file_extent_ram_bytes(leaf, fi), sectorsize) || ++ !IS_ALIGNED(btrfs_file_extent_disk_bytenr(leaf, fi), sectorsize) || ++ !IS_ALIGNED(btrfs_file_extent_disk_num_bytes(leaf, fi), sectorsize) || ++ !IS_ALIGNED(btrfs_file_extent_offset(leaf, fi), sectorsize) || ++ !IS_ALIGNED(btrfs_file_extent_num_bytes(leaf, fi), sectorsize)) { ++ CORRUPT( ++ "regular or preallocated extent data item has unaligned value", ++ leaf, root, slot); ++ return -EUCLEAN; ++ } ++ ++ return 0; ++} ++ ++static int check_csum_item(struct btrfs_root *root, struct extent_buffer *leaf, ++ struct btrfs_key *key, int slot) ++{ ++ u32 sectorsize = root->sectorsize; ++ u32 csumsize = btrfs_super_csum_size(root->fs_info->super_copy); ++ ++ if (key->objectid != BTRFS_EXTENT_CSUM_OBJECTID) { ++ CORRUPT("invalid objectid for csum item", leaf, root, slot); ++ return -EUCLEAN; ++ } ++ if (!IS_ALIGNED(key->offset, sectorsize)) { ++ CORRUPT("unaligned key offset for csum item", leaf, root, slot); ++ return -EUCLEAN; ++ } ++ if (!IS_ALIGNED(btrfs_item_size_nr(leaf, slot), csumsize)) { ++ CORRUPT("unaligned csum item size", leaf, root, slot); ++ return -EUCLEAN; ++ } ++ return 0; ++} ++ ++/* ++ * Common point to switch the item-specific validation. ++ */ ++static int check_leaf_item(struct btrfs_root *root, ++ struct extent_buffer *leaf, ++ struct btrfs_key *key, int slot) ++{ ++ int ret = 0; ++ ++ switch (key->type) { ++ case BTRFS_EXTENT_DATA_KEY: ++ ret = check_extent_data_item(root, leaf, key, slot); ++ break; ++ case BTRFS_EXTENT_CSUM_KEY: ++ ret = check_csum_item(root, leaf, key, slot); ++ break; ++ } ++ return ret; ++} ++ ++int btrfs_check_leaf(struct btrfs_root *root, struct extent_buffer *leaf) ++{ ++ struct btrfs_fs_info *fs_info = root->fs_info; ++ /* No valid key type is 0, so all key should be larger than this key */ ++ struct btrfs_key prev_key = {0, 0, 0}; ++ struct btrfs_key key; ++ u32 nritems = btrfs_header_nritems(leaf); ++ int slot; ++ ++ /* ++ * Extent buffers from a relocation tree have a owner field that ++ * corresponds to the subvolume tree they are based on. So just from an ++ * extent buffer alone we can not find out what is the id of the ++ * corresponding subvolume tree, so we can not figure out if the extent ++ * buffer corresponds to the root of the relocation tree or not. So ++ * skip this check for relocation trees. ++ */ ++ if (nritems == 0 && !btrfs_header_flag(leaf, BTRFS_HEADER_FLAG_RELOC)) { ++ struct btrfs_root *check_root; ++ ++ key.objectid = btrfs_header_owner(leaf); ++ key.type = BTRFS_ROOT_ITEM_KEY; ++ key.offset = (u64)-1; ++ ++ check_root = btrfs_get_fs_root(fs_info, &key, false); ++ /* ++ * The only reason we also check NULL here is that during ++ * open_ctree() some roots has not yet been set up. ++ */ ++ if (!IS_ERR_OR_NULL(check_root)) { ++ struct extent_buffer *eb; ++ ++ eb = btrfs_root_node(check_root); ++ /* if leaf is the root, then it's fine */ ++ if (leaf != eb) { ++ CORRUPT("non-root leaf's nritems is 0", ++ leaf, check_root, 0); ++ free_extent_buffer(eb); ++ return -EUCLEAN; ++ } ++ free_extent_buffer(eb); ++ } ++ return 0; ++ } ++ ++ if (nritems == 0) ++ return 0; ++ ++ /* ++ * Check the following things to make sure this is a good leaf, and ++ * leaf users won't need to bother with similar sanity checks: ++ * ++ * 1) key ordering ++ * 2) item offset and size ++ * No overlap, no hole, all inside the leaf. ++ * 3) item content ++ * If possible, do comprehensive sanity check. ++ * NOTE: All checks must only rely on the item data itself. ++ */ ++ for (slot = 0; slot < nritems; slot++) { ++ u32 item_end_expected; ++ int ret; ++ ++ btrfs_item_key_to_cpu(leaf, &key, slot); ++ ++ /* Make sure the keys are in the right order */ ++ if (btrfs_comp_cpu_keys(&prev_key, &key) >= 0) { ++ CORRUPT("bad key order", leaf, root, slot); ++ return -EUCLEAN; ++ } ++ ++ /* ++ * Make sure the offset and ends are right, remember that the ++ * item data starts at the end of the leaf and grows towards the ++ * front. ++ */ ++ if (slot == 0) ++ item_end_expected = BTRFS_LEAF_DATA_SIZE(root); ++ else ++ item_end_expected = btrfs_item_offset_nr(leaf, ++ slot - 1); ++ if (btrfs_item_end_nr(leaf, slot) != item_end_expected) { ++ CORRUPT("slot offset bad", leaf, root, slot); ++ return -EUCLEAN; ++ } ++ ++ /* ++ * Check to make sure that we don't point outside of the leaf, ++ * just in case all the items are consistent to each other, but ++ * all point outside of the leaf. ++ */ ++ if (btrfs_item_end_nr(leaf, slot) > ++ BTRFS_LEAF_DATA_SIZE(root)) { ++ CORRUPT("slot end outside of leaf", leaf, root, slot); ++ return -EUCLEAN; ++ } ++ ++ /* Also check if the item pointer overlaps with btrfs item. */ ++ if (btrfs_item_nr_offset(slot) + sizeof(struct btrfs_item) > ++ btrfs_item_ptr_offset(leaf, slot)) { ++ CORRUPT("slot overlap with its data", leaf, root, slot); ++ return -EUCLEAN; ++ } ++ ++ /* Check if the item size and content meet other criteria */ ++ ret = check_leaf_item(root, leaf, &key, slot); ++ if (ret < 0) ++ return ret; ++ ++ prev_key.objectid = key.objectid; ++ prev_key.type = key.type; ++ prev_key.offset = key.offset; ++ } ++ ++ return 0; ++} ++ ++int btrfs_check_node(struct btrfs_root *root, struct extent_buffer *node) ++{ ++ unsigned long nr = btrfs_header_nritems(node); ++ struct btrfs_key key, next_key; ++ int slot; ++ u64 bytenr; ++ int ret = 0; ++ ++ if (nr == 0 || nr > BTRFS_NODEPTRS_PER_BLOCK(root)) { ++ btrfs_crit(root->fs_info, ++ "corrupt node: block %llu root %llu nritems %lu", ++ node->start, root->objectid, nr); ++ return -EIO; ++ } ++ ++ for (slot = 0; slot < nr - 1; slot++) { ++ bytenr = btrfs_node_blockptr(node, slot); ++ btrfs_node_key_to_cpu(node, &key, slot); ++ btrfs_node_key_to_cpu(node, &next_key, slot + 1); ++ ++ if (!bytenr) { ++ CORRUPT("invalid item slot", node, root, slot); ++ ret = -EIO; ++ goto out; ++ } ++ ++ if (btrfs_comp_cpu_keys(&key, &next_key) >= 0) { ++ CORRUPT("bad key order", node, root, slot); ++ ret = -EIO; ++ goto out; ++ } ++ } ++out: ++ return ret; ++} +--- /dev/null ++++ b/fs/btrfs/tree-checker.h +@@ -0,0 +1,26 @@ ++/* ++ * Copyright (C) Qu Wenruo 2017. All rights reserved. ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public ++ * License v2 as published by the Free Software Foundation. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public ++ * License along with this program. ++ */ ++ ++#ifndef __BTRFS_TREE_CHECKER__ ++#define __BTRFS_TREE_CHECKER__ ++ ++#include "ctree.h" ++#include "extent_io.h" ++ ++int btrfs_check_leaf(struct btrfs_root *root, struct extent_buffer *leaf); ++int btrfs_check_node(struct btrfs_root *root, struct extent_buffer *node); ++ ++#endif diff --git a/queue-4.9/btrfs-refactor-check_leaf-function-for-later-expansion.patch b/queue-4.9/btrfs-refactor-check_leaf-function-for-later-expansion.patch new file mode 100644 index 00000000000..7e41ed11a18 --- /dev/null +++ b/queue-4.9/btrfs-refactor-check_leaf-function-for-later-expansion.patch @@ -0,0 +1,137 @@ +From foo@baz Thu Dec 6 15:27:31 CET 2018 +From: Qu Wenruo +Date: Wed, 23 Aug 2017 16:57:56 +0900 +Subject: btrfs: Refactor check_leaf function for later expansion + +From: Qu Wenruo + +commit c3267bbaa9cae09b62960eafe33ad19196803285 upstream. + +Current check_leaf() function does a good job checking key order and +item offset/size. + +However it only checks from slot 0 to the last but one slot, this is +good but makes later expansion hard. + +So this refactoring iterates from slot 0 to the last slot. +For key comparison, it uses a key with all 0 as initial key, so all +valid keys should be larger than that. + +And for item size/offset checks, it compares current item end with +previous item offset. +For slot 0, use leaf end as a special case. + +This makes later item/key offset checks and item size checks easier to +be implemented. + +Also, makes check_leaf() to return -EUCLEAN other than -EIO to indicate +error. + +Signed-off-by: Qu Wenruo +Reviewed-by: Nikolay Borisov +Reviewed-by: David Sterba +Signed-off-by: David Sterba +[bwh: Backported to 4.9: + - BTRFS_LEAF_DATA_SIZE() takes a root rather than an fs_info + - Adjust context] +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/disk-io.c | 50 +++++++++++++++++++++++++++----------------------- + 1 file changed, 27 insertions(+), 23 deletions(-) + +--- a/fs/btrfs/disk-io.c ++++ b/fs/btrfs/disk-io.c +@@ -547,8 +547,9 @@ static int check_tree_block_fsid(struct + static noinline int check_leaf(struct btrfs_root *root, + struct extent_buffer *leaf) + { ++ /* No valid key type is 0, so all key should be larger than this key */ ++ struct btrfs_key prev_key = {0, 0, 0}; + struct btrfs_key key; +- struct btrfs_key leaf_key; + u32 nritems = btrfs_header_nritems(leaf); + int slot; + +@@ -581,7 +582,7 @@ static noinline int check_leaf(struct bt + CORRUPT("non-root leaf's nritems is 0", + leaf, check_root, 0); + free_extent_buffer(eb); +- return -EIO; ++ return -EUCLEAN; + } + free_extent_buffer(eb); + } +@@ -591,28 +592,23 @@ static noinline int check_leaf(struct bt + if (nritems == 0) + return 0; + +- /* Check the 0 item */ +- if (btrfs_item_offset_nr(leaf, 0) + btrfs_item_size_nr(leaf, 0) != +- BTRFS_LEAF_DATA_SIZE(root)) { +- CORRUPT("invalid item offset size pair", leaf, root, 0); +- return -EIO; +- } +- + /* +- * Check to make sure each items keys are in the correct order and their +- * offsets make sense. We only have to loop through nritems-1 because +- * we check the current slot against the next slot, which verifies the +- * next slot's offset+size makes sense and that the current's slot +- * offset is correct. ++ * Check the following things to make sure this is a good leaf, and ++ * leaf users won't need to bother with similar sanity checks: ++ * ++ * 1) key order ++ * 2) item offset and size ++ * No overlap, no hole, all inside the leaf. + */ +- for (slot = 0; slot < nritems - 1; slot++) { +- btrfs_item_key_to_cpu(leaf, &leaf_key, slot); +- btrfs_item_key_to_cpu(leaf, &key, slot + 1); ++ for (slot = 0; slot < nritems; slot++) { ++ u32 item_end_expected; ++ ++ btrfs_item_key_to_cpu(leaf, &key, slot); + + /* Make sure the keys are in the right order */ +- if (btrfs_comp_cpu_keys(&leaf_key, &key) >= 0) { ++ if (btrfs_comp_cpu_keys(&prev_key, &key) >= 0) { + CORRUPT("bad key order", leaf, root, slot); +- return -EIO; ++ return -EUCLEAN; + } + + /* +@@ -620,10 +616,14 @@ static noinline int check_leaf(struct bt + * item data starts at the end of the leaf and grows towards the + * front. + */ +- if (btrfs_item_offset_nr(leaf, slot) != +- btrfs_item_end_nr(leaf, slot + 1)) { ++ if (slot == 0) ++ item_end_expected = BTRFS_LEAF_DATA_SIZE(root); ++ else ++ item_end_expected = btrfs_item_offset_nr(leaf, ++ slot - 1); ++ if (btrfs_item_end_nr(leaf, slot) != item_end_expected) { + CORRUPT("slot offset bad", leaf, root, slot); +- return -EIO; ++ return -EUCLEAN; + } + + /* +@@ -634,8 +634,12 @@ static noinline int check_leaf(struct bt + if (btrfs_item_end_nr(leaf, slot) > + BTRFS_LEAF_DATA_SIZE(root)) { + CORRUPT("slot end outside of leaf", leaf, root, slot); +- return -EIO; ++ return -EUCLEAN; + } ++ ++ prev_key.objectid = key.objectid; ++ prev_key.type = key.type; ++ prev_key.offset = key.offset; + } + + return 0; diff --git a/queue-4.9/btrfs-struct-funcs-constify-readers.patch b/queue-4.9/btrfs-struct-funcs-constify-readers.patch new file mode 100644 index 00000000000..d5c719be623 --- /dev/null +++ b/queue-4.9/btrfs-struct-funcs-constify-readers.patch @@ -0,0 +1,532 @@ +From foo@baz Thu Dec 6 15:27:31 CET 2018 +From: Jeff Mahoney +Date: Wed, 28 Jun 2017 21:56:53 -0600 +Subject: btrfs: struct-funcs, constify readers + +From: Jeff Mahoney + +commit 1cbb1f454e5321e47fc1e6b233066c7ccc979d15 upstream. + +We have reader helpers for most of the on-disk structures that use +an extent_buffer and pointer as offset into the buffer that are +read-only. We should mark them as const and, in turn, allow consumers +of these interfaces to mark the buffers const as well. + +No impact on code, but serves as documentation that a buffer is intended +not to be modified. + +Signed-off-by: Jeff Mahoney +Signed-off-by: David Sterba +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/ctree.h | 128 ++++++++++++++++++++++++------------------------ + fs/btrfs/extent_io.c | 24 ++++----- + fs/btrfs/extent_io.h | 19 +++---- + fs/btrfs/struct-funcs.c | 9 +-- + 4 files changed, 91 insertions(+), 89 deletions(-) + +--- a/fs/btrfs/ctree.h ++++ b/fs/btrfs/ctree.h +@@ -1415,7 +1415,7 @@ do { + #define BTRFS_INODE_ROOT_ITEM_INIT (1 << 31) + + struct btrfs_map_token { +- struct extent_buffer *eb; ++ const struct extent_buffer *eb; + char *kaddr; + unsigned long offset; + }; +@@ -1449,18 +1449,19 @@ static inline void btrfs_init_map_token + sizeof(((type *)0)->member))) + + #define DECLARE_BTRFS_SETGET_BITS(bits) \ +-u##bits btrfs_get_token_##bits(struct extent_buffer *eb, void *ptr, \ +- unsigned long off, \ +- struct btrfs_map_token *token); \ +-void btrfs_set_token_##bits(struct extent_buffer *eb, void *ptr, \ ++u##bits btrfs_get_token_##bits(const struct extent_buffer *eb, \ ++ const void *ptr, unsigned long off, \ ++ struct btrfs_map_token *token); \ ++void btrfs_set_token_##bits(struct extent_buffer *eb, const void *ptr, \ + unsigned long off, u##bits val, \ + struct btrfs_map_token *token); \ +-static inline u##bits btrfs_get_##bits(struct extent_buffer *eb, void *ptr, \ ++static inline u##bits btrfs_get_##bits(const struct extent_buffer *eb, \ ++ const void *ptr, \ + unsigned long off) \ + { \ + return btrfs_get_token_##bits(eb, ptr, off, NULL); \ + } \ +-static inline void btrfs_set_##bits(struct extent_buffer *eb, void *ptr, \ ++static inline void btrfs_set_##bits(struct extent_buffer *eb, void *ptr,\ + unsigned long off, u##bits val) \ + { \ + btrfs_set_token_##bits(eb, ptr, off, val, NULL); \ +@@ -1472,7 +1473,8 @@ DECLARE_BTRFS_SETGET_BITS(32) + DECLARE_BTRFS_SETGET_BITS(64) + + #define BTRFS_SETGET_FUNCS(name, type, member, bits) \ +-static inline u##bits btrfs_##name(struct extent_buffer *eb, type *s) \ ++static inline u##bits btrfs_##name(const struct extent_buffer *eb, \ ++ const type *s) \ + { \ + BUILD_BUG_ON(sizeof(u##bits) != sizeof(((type *)0))->member); \ + return btrfs_get_##bits(eb, s, offsetof(type, member)); \ +@@ -1483,7 +1485,8 @@ static inline void btrfs_set_##name(stru + BUILD_BUG_ON(sizeof(u##bits) != sizeof(((type *)0))->member); \ + btrfs_set_##bits(eb, s, offsetof(type, member), val); \ + } \ +-static inline u##bits btrfs_token_##name(struct extent_buffer *eb, type *s, \ ++static inline u##bits btrfs_token_##name(const struct extent_buffer *eb,\ ++ const type *s, \ + struct btrfs_map_token *token) \ + { \ + BUILD_BUG_ON(sizeof(u##bits) != sizeof(((type *)0))->member); \ +@@ -1498,9 +1501,9 @@ static inline void btrfs_set_token_##nam + } + + #define BTRFS_SETGET_HEADER_FUNCS(name, type, member, bits) \ +-static inline u##bits btrfs_##name(struct extent_buffer *eb) \ ++static inline u##bits btrfs_##name(const struct extent_buffer *eb) \ + { \ +- type *p = page_address(eb->pages[0]); \ ++ const type *p = page_address(eb->pages[0]); \ + u##bits res = le##bits##_to_cpu(p->member); \ + return res; \ + } \ +@@ -1512,7 +1515,7 @@ static inline void btrfs_set_##name(stru + } + + #define BTRFS_SETGET_STACK_FUNCS(name, type, member, bits) \ +-static inline u##bits btrfs_##name(type *s) \ ++static inline u##bits btrfs_##name(const type *s) \ + { \ + return le##bits##_to_cpu(s->member); \ + } \ +@@ -1818,7 +1821,7 @@ static inline unsigned long btrfs_node_k + sizeof(struct btrfs_key_ptr) * nr; + } + +-void btrfs_node_key(struct extent_buffer *eb, ++void btrfs_node_key(const struct extent_buffer *eb, + struct btrfs_disk_key *disk_key, int nr); + + static inline void btrfs_set_node_key(struct extent_buffer *eb, +@@ -1847,28 +1850,28 @@ static inline struct btrfs_item *btrfs_i + return (struct btrfs_item *)btrfs_item_nr_offset(nr); + } + +-static inline u32 btrfs_item_end(struct extent_buffer *eb, ++static inline u32 btrfs_item_end(const struct extent_buffer *eb, + struct btrfs_item *item) + { + return btrfs_item_offset(eb, item) + btrfs_item_size(eb, item); + } + +-static inline u32 btrfs_item_end_nr(struct extent_buffer *eb, int nr) ++static inline u32 btrfs_item_end_nr(const struct extent_buffer *eb, int nr) + { + return btrfs_item_end(eb, btrfs_item_nr(nr)); + } + +-static inline u32 btrfs_item_offset_nr(struct extent_buffer *eb, int nr) ++static inline u32 btrfs_item_offset_nr(const struct extent_buffer *eb, int nr) + { + return btrfs_item_offset(eb, btrfs_item_nr(nr)); + } + +-static inline u32 btrfs_item_size_nr(struct extent_buffer *eb, int nr) ++static inline u32 btrfs_item_size_nr(const struct extent_buffer *eb, int nr) + { + return btrfs_item_size(eb, btrfs_item_nr(nr)); + } + +-static inline void btrfs_item_key(struct extent_buffer *eb, ++static inline void btrfs_item_key(const struct extent_buffer *eb, + struct btrfs_disk_key *disk_key, int nr) + { + struct btrfs_item *item = btrfs_item_nr(nr); +@@ -1904,8 +1907,8 @@ BTRFS_SETGET_STACK_FUNCS(stack_dir_name_ + BTRFS_SETGET_STACK_FUNCS(stack_dir_transid, struct btrfs_dir_item, + transid, 64); + +-static inline void btrfs_dir_item_key(struct extent_buffer *eb, +- struct btrfs_dir_item *item, ++static inline void btrfs_dir_item_key(const struct extent_buffer *eb, ++ const struct btrfs_dir_item *item, + struct btrfs_disk_key *key) + { + read_eb_member(eb, item, struct btrfs_dir_item, location, key); +@@ -1913,7 +1916,7 @@ static inline void btrfs_dir_item_key(st + + static inline void btrfs_set_dir_item_key(struct extent_buffer *eb, + struct btrfs_dir_item *item, +- struct btrfs_disk_key *key) ++ const struct btrfs_disk_key *key) + { + write_eb_member(eb, item, struct btrfs_dir_item, location, key); + } +@@ -1925,8 +1928,8 @@ BTRFS_SETGET_FUNCS(free_space_bitmaps, s + BTRFS_SETGET_FUNCS(free_space_generation, struct btrfs_free_space_header, + generation, 64); + +-static inline void btrfs_free_space_key(struct extent_buffer *eb, +- struct btrfs_free_space_header *h, ++static inline void btrfs_free_space_key(const struct extent_buffer *eb, ++ const struct btrfs_free_space_header *h, + struct btrfs_disk_key *key) + { + read_eb_member(eb, h, struct btrfs_free_space_header, location, key); +@@ -1934,7 +1937,7 @@ static inline void btrfs_free_space_key( + + static inline void btrfs_set_free_space_key(struct extent_buffer *eb, + struct btrfs_free_space_header *h, +- struct btrfs_disk_key *key) ++ const struct btrfs_disk_key *key) + { + write_eb_member(eb, h, struct btrfs_free_space_header, location, key); + } +@@ -1961,25 +1964,25 @@ static inline void btrfs_cpu_key_to_disk + disk->objectid = cpu_to_le64(cpu->objectid); + } + +-static inline void btrfs_node_key_to_cpu(struct extent_buffer *eb, +- struct btrfs_key *key, int nr) ++static inline void btrfs_node_key_to_cpu(const struct extent_buffer *eb, ++ struct btrfs_key *key, int nr) + { + struct btrfs_disk_key disk_key; + btrfs_node_key(eb, &disk_key, nr); + btrfs_disk_key_to_cpu(key, &disk_key); + } + +-static inline void btrfs_item_key_to_cpu(struct extent_buffer *eb, +- struct btrfs_key *key, int nr) ++static inline void btrfs_item_key_to_cpu(const struct extent_buffer *eb, ++ struct btrfs_key *key, int nr) + { + struct btrfs_disk_key disk_key; + btrfs_item_key(eb, &disk_key, nr); + btrfs_disk_key_to_cpu(key, &disk_key); + } + +-static inline void btrfs_dir_item_key_to_cpu(struct extent_buffer *eb, +- struct btrfs_dir_item *item, +- struct btrfs_key *key) ++static inline void btrfs_dir_item_key_to_cpu(const struct extent_buffer *eb, ++ const struct btrfs_dir_item *item, ++ struct btrfs_key *key) + { + struct btrfs_disk_key disk_key; + btrfs_dir_item_key(eb, item, &disk_key); +@@ -2012,7 +2015,7 @@ BTRFS_SETGET_STACK_FUNCS(stack_header_nr + nritems, 32); + BTRFS_SETGET_STACK_FUNCS(stack_header_bytenr, struct btrfs_header, bytenr, 64); + +-static inline int btrfs_header_flag(struct extent_buffer *eb, u64 flag) ++static inline int btrfs_header_flag(const struct extent_buffer *eb, u64 flag) + { + return (btrfs_header_flags(eb) & flag) == flag; + } +@@ -2031,7 +2034,7 @@ static inline int btrfs_clear_header_fla + return (flags & flag) == flag; + } + +-static inline int btrfs_header_backref_rev(struct extent_buffer *eb) ++static inline int btrfs_header_backref_rev(const struct extent_buffer *eb) + { + u64 flags = btrfs_header_flags(eb); + return flags >> BTRFS_BACKREF_REV_SHIFT; +@@ -2051,12 +2054,12 @@ static inline unsigned long btrfs_header + return offsetof(struct btrfs_header, fsid); + } + +-static inline unsigned long btrfs_header_chunk_tree_uuid(struct extent_buffer *eb) ++static inline unsigned long btrfs_header_chunk_tree_uuid(const struct extent_buffer *eb) + { + return offsetof(struct btrfs_header, chunk_tree_uuid); + } + +-static inline int btrfs_is_leaf(struct extent_buffer *eb) ++static inline int btrfs_is_leaf(const struct extent_buffer *eb) + { + return btrfs_header_level(eb) == 0; + } +@@ -2090,12 +2093,12 @@ BTRFS_SETGET_STACK_FUNCS(root_stransid, + BTRFS_SETGET_STACK_FUNCS(root_rtransid, struct btrfs_root_item, + rtransid, 64); + +-static inline bool btrfs_root_readonly(struct btrfs_root *root) ++static inline bool btrfs_root_readonly(const struct btrfs_root *root) + { + return (root->root_item.flags & cpu_to_le64(BTRFS_ROOT_SUBVOL_RDONLY)) != 0; + } + +-static inline bool btrfs_root_dead(struct btrfs_root *root) ++static inline bool btrfs_root_dead(const struct btrfs_root *root) + { + return (root->root_item.flags & cpu_to_le64(BTRFS_ROOT_SUBVOL_DEAD)) != 0; + } +@@ -2152,51 +2155,51 @@ BTRFS_SETGET_STACK_FUNCS(backup_num_devi + /* struct btrfs_balance_item */ + BTRFS_SETGET_FUNCS(balance_flags, struct btrfs_balance_item, flags, 64); + +-static inline void btrfs_balance_data(struct extent_buffer *eb, +- struct btrfs_balance_item *bi, ++static inline void btrfs_balance_data(const struct extent_buffer *eb, ++ const struct btrfs_balance_item *bi, + struct btrfs_disk_balance_args *ba) + { + read_eb_member(eb, bi, struct btrfs_balance_item, data, ba); + } + + static inline void btrfs_set_balance_data(struct extent_buffer *eb, +- struct btrfs_balance_item *bi, +- struct btrfs_disk_balance_args *ba) ++ struct btrfs_balance_item *bi, ++ const struct btrfs_disk_balance_args *ba) + { + write_eb_member(eb, bi, struct btrfs_balance_item, data, ba); + } + +-static inline void btrfs_balance_meta(struct extent_buffer *eb, +- struct btrfs_balance_item *bi, ++static inline void btrfs_balance_meta(const struct extent_buffer *eb, ++ const struct btrfs_balance_item *bi, + struct btrfs_disk_balance_args *ba) + { + read_eb_member(eb, bi, struct btrfs_balance_item, meta, ba); + } + + static inline void btrfs_set_balance_meta(struct extent_buffer *eb, +- struct btrfs_balance_item *bi, +- struct btrfs_disk_balance_args *ba) ++ struct btrfs_balance_item *bi, ++ const struct btrfs_disk_balance_args *ba) + { + write_eb_member(eb, bi, struct btrfs_balance_item, meta, ba); + } + +-static inline void btrfs_balance_sys(struct extent_buffer *eb, +- struct btrfs_balance_item *bi, ++static inline void btrfs_balance_sys(const struct extent_buffer *eb, ++ const struct btrfs_balance_item *bi, + struct btrfs_disk_balance_args *ba) + { + read_eb_member(eb, bi, struct btrfs_balance_item, sys, ba); + } + + static inline void btrfs_set_balance_sys(struct extent_buffer *eb, +- struct btrfs_balance_item *bi, +- struct btrfs_disk_balance_args *ba) ++ struct btrfs_balance_item *bi, ++ const struct btrfs_disk_balance_args *ba) + { + write_eb_member(eb, bi, struct btrfs_balance_item, sys, ba); + } + + static inline void + btrfs_disk_balance_args_to_cpu(struct btrfs_balance_args *cpu, +- struct btrfs_disk_balance_args *disk) ++ const struct btrfs_disk_balance_args *disk) + { + memset(cpu, 0, sizeof(*cpu)); + +@@ -2216,7 +2219,7 @@ btrfs_disk_balance_args_to_cpu(struct bt + + static inline void + btrfs_cpu_balance_args_to_disk(struct btrfs_disk_balance_args *disk, +- struct btrfs_balance_args *cpu) ++ const struct btrfs_balance_args *cpu) + { + memset(disk, 0, sizeof(*disk)); + +@@ -2284,7 +2287,7 @@ BTRFS_SETGET_STACK_FUNCS(super_magic, st + BTRFS_SETGET_STACK_FUNCS(super_uuid_tree_generation, struct btrfs_super_block, + uuid_tree_generation, 64); + +-static inline int btrfs_super_csum_size(struct btrfs_super_block *s) ++static inline int btrfs_super_csum_size(const struct btrfs_super_block *s) + { + u16 t = btrfs_super_csum_type(s); + /* +@@ -2303,8 +2306,8 @@ static inline unsigned long btrfs_leaf_d + * this returns the address of the start of the last item, + * which is the stop of the leaf data stack + */ +-static inline unsigned int leaf_data_end(struct btrfs_root *root, +- struct extent_buffer *leaf) ++static inline unsigned int leaf_data_end(const struct btrfs_root *root, ++ const struct extent_buffer *leaf) + { + u32 nr = btrfs_header_nritems(leaf); + +@@ -2329,7 +2332,7 @@ BTRFS_SETGET_STACK_FUNCS(stack_file_exte + struct btrfs_file_extent_item, compression, 8); + + static inline unsigned long +-btrfs_file_extent_inline_start(struct btrfs_file_extent_item *e) ++btrfs_file_extent_inline_start(const struct btrfs_file_extent_item *e) + { + return (unsigned long)e + BTRFS_FILE_EXTENT_INLINE_DATA_START; + } +@@ -2363,8 +2366,9 @@ BTRFS_SETGET_FUNCS(file_extent_other_enc + * size of any extent headers. If a file is compressed on disk, this is + * the compressed size + */ +-static inline u32 btrfs_file_extent_inline_item_len(struct extent_buffer *eb, +- struct btrfs_item *e) ++static inline u32 btrfs_file_extent_inline_item_len( ++ const struct extent_buffer *eb, ++ struct btrfs_item *e) + { + return btrfs_item_size(eb, e) - BTRFS_FILE_EXTENT_INLINE_DATA_START; + } +@@ -2372,9 +2376,9 @@ static inline u32 btrfs_file_extent_inli + /* this returns the number of file bytes represented by the inline item. + * If an item is compressed, this is the uncompressed size + */ +-static inline u32 btrfs_file_extent_inline_len(struct extent_buffer *eb, +- int slot, +- struct btrfs_file_extent_item *fi) ++static inline u32 btrfs_file_extent_inline_len(const struct extent_buffer *eb, ++ int slot, ++ const struct btrfs_file_extent_item *fi) + { + struct btrfs_map_token token; + +@@ -2396,8 +2400,8 @@ static inline u32 btrfs_file_extent_inli + + + /* btrfs_dev_stats_item */ +-static inline u64 btrfs_dev_stats_value(struct extent_buffer *eb, +- struct btrfs_dev_stats_item *ptr, ++static inline u64 btrfs_dev_stats_value(const struct extent_buffer *eb, ++ const struct btrfs_dev_stats_item *ptr, + int index) + { + u64 val; +--- a/fs/btrfs/extent_io.c ++++ b/fs/btrfs/extent_io.c +@@ -5442,9 +5442,8 @@ unlock_exit: + return ret; + } + +-void read_extent_buffer(struct extent_buffer *eb, void *dstv, +- unsigned long start, +- unsigned long len) ++void read_extent_buffer(const struct extent_buffer *eb, void *dstv, ++ unsigned long start, unsigned long len) + { + size_t cur; + size_t offset; +@@ -5473,9 +5472,9 @@ void read_extent_buffer(struct extent_bu + } + } + +-int read_extent_buffer_to_user(struct extent_buffer *eb, void __user *dstv, +- unsigned long start, +- unsigned long len) ++int read_extent_buffer_to_user(const struct extent_buffer *eb, ++ void __user *dstv, ++ unsigned long start, unsigned long len) + { + size_t cur; + size_t offset; +@@ -5515,10 +5514,10 @@ int read_extent_buffer_to_user(struct ex + * return 1 if the item spans two pages. + * return -EINVAL otherwise. + */ +-int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start, +- unsigned long min_len, char **map, +- unsigned long *map_start, +- unsigned long *map_len) ++int map_private_extent_buffer(const struct extent_buffer *eb, ++ unsigned long start, unsigned long min_len, ++ char **map, unsigned long *map_start, ++ unsigned long *map_len) + { + size_t offset = start & (PAGE_SIZE - 1); + char *kaddr; +@@ -5552,9 +5551,8 @@ int map_private_extent_buffer(struct ext + return 0; + } + +-int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv, +- unsigned long start, +- unsigned long len) ++int memcmp_extent_buffer(const struct extent_buffer *eb, const void *ptrv, ++ unsigned long start, unsigned long len) + { + size_t cur; + size_t offset; +--- a/fs/btrfs/extent_io.h ++++ b/fs/btrfs/extent_io.h +@@ -396,14 +396,13 @@ static inline void extent_buffer_get(str + atomic_inc(&eb->refs); + } + +-int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv, +- unsigned long start, +- unsigned long len); +-void read_extent_buffer(struct extent_buffer *eb, void *dst, ++int memcmp_extent_buffer(const struct extent_buffer *eb, const void *ptrv, ++ unsigned long start, unsigned long len); ++void read_extent_buffer(const struct extent_buffer *eb, void *dst, + unsigned long start, + unsigned long len); +-int read_extent_buffer_to_user(struct extent_buffer *eb, void __user *dst, +- unsigned long start, ++int read_extent_buffer_to_user(const struct extent_buffer *eb, ++ void __user *dst, unsigned long start, + unsigned long len); + void write_extent_buffer(struct extent_buffer *eb, const void *src, + unsigned long start, unsigned long len); +@@ -428,10 +427,10 @@ void set_extent_buffer_uptodate(struct e + void clear_extent_buffer_uptodate(struct extent_buffer *eb); + int extent_buffer_uptodate(struct extent_buffer *eb); + int extent_buffer_under_io(struct extent_buffer *eb); +-int map_private_extent_buffer(struct extent_buffer *eb, unsigned long offset, +- unsigned long min_len, char **map, +- unsigned long *map_start, +- unsigned long *map_len); ++int map_private_extent_buffer(const struct extent_buffer *eb, ++ unsigned long offset, unsigned long min_len, ++ char **map, unsigned long *map_start, ++ unsigned long *map_len); + void extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end); + void extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end); + void extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end, +--- a/fs/btrfs/struct-funcs.c ++++ b/fs/btrfs/struct-funcs.c +@@ -50,8 +50,8 @@ static inline void put_unaligned_le8(u8 + */ + + #define DEFINE_BTRFS_SETGET_BITS(bits) \ +-u##bits btrfs_get_token_##bits(struct extent_buffer *eb, void *ptr, \ +- unsigned long off, \ ++u##bits btrfs_get_token_##bits(const struct extent_buffer *eb, \ ++ const void *ptr, unsigned long off, \ + struct btrfs_map_token *token) \ + { \ + unsigned long part_offset = (unsigned long)ptr; \ +@@ -90,7 +90,8 @@ u##bits btrfs_get_token_##bits(struct ex + return res; \ + } \ + void btrfs_set_token_##bits(struct extent_buffer *eb, \ +- void *ptr, unsigned long off, u##bits val, \ ++ const void *ptr, unsigned long off, \ ++ u##bits val, \ + struct btrfs_map_token *token) \ + { \ + unsigned long part_offset = (unsigned long)ptr; \ +@@ -133,7 +134,7 @@ DEFINE_BTRFS_SETGET_BITS(16) + DEFINE_BTRFS_SETGET_BITS(32) + DEFINE_BTRFS_SETGET_BITS(64) + +-void btrfs_node_key(struct extent_buffer *eb, ++void btrfs_node_key(const struct extent_buffer *eb, + struct btrfs_disk_key *disk_key, int nr) + { + unsigned long ptr = btrfs_node_key_ptr_offset(nr); diff --git a/queue-4.9/btrfs-tree-check-reduce-stack-consumption-in-check_dir_item.patch b/queue-4.9/btrfs-tree-check-reduce-stack-consumption-in-check_dir_item.patch new file mode 100644 index 00000000000..12557372681 --- /dev/null +++ b/queue-4.9/btrfs-tree-check-reduce-stack-consumption-in-check_dir_item.patch @@ -0,0 +1,49 @@ +From foo@baz Thu Dec 6 15:27:31 CET 2018 +From: David Sterba +Date: Wed, 10 Jan 2018 15:13:07 +0100 +Subject: btrfs: tree-check: reduce stack consumption in check_dir_item + +From: David Sterba + +commit e2683fc9d219430f5b78889b50cde7f40efeba7b upstream. + +I've noticed that the updated item checker stack consumption increased +dramatically in 542f5385e20cf97447 ("btrfs: tree-checker: Add checker +for dir item") + +tree-checker.c:check_leaf +552 (176 -> 728) + +The array is 255 bytes long, dynamic allocation would slow down the +sanity checks so it's more reasonable to keep it on-stack. Moving the +variable to the scope of use reduces the stack usage again + +tree-checker.c:check_leaf -264 (728 -> 464) + +Reviewed-by: Josef Bacik +Reviewed-by: Qu Wenruo +Signed-off-by: David Sterba +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/tree-checker.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/fs/btrfs/tree-checker.c ++++ b/fs/btrfs/tree-checker.c +@@ -212,7 +212,6 @@ static int check_dir_item(struct btrfs_r + + di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item); + while (cur < item_size) { +- char namebuf[max(BTRFS_NAME_LEN, XATTR_NAME_MAX)]; + u32 name_len; + u32 data_len; + u32 max_name_len; +@@ -295,6 +294,8 @@ static int check_dir_item(struct btrfs_r + */ + if (key->type == BTRFS_DIR_ITEM_KEY || + key->type == BTRFS_XATTR_ITEM_KEY) { ++ char namebuf[max(BTRFS_NAME_LEN, XATTR_NAME_MAX)]; ++ + read_extent_buffer(leaf, namebuf, + (unsigned long)(di + 1), name_len); + name_hash = btrfs_name_hash(namebuf, name_len); diff --git a/queue-4.9/btrfs-tree-checker-add-checker-for-dir-item.patch b/queue-4.9/btrfs-tree-checker-add-checker-for-dir-item.patch new file mode 100644 index 00000000000..d91ab595f0c --- /dev/null +++ b/queue-4.9/btrfs-tree-checker-add-checker-for-dir-item.patch @@ -0,0 +1,208 @@ +From foo@baz Thu Dec 6 15:27:31 CET 2018 +From: Qu Wenruo +Date: Wed, 8 Nov 2017 08:54:25 +0800 +Subject: btrfs: tree-checker: Add checker for dir item + +From: Qu Wenruo + +commit ad7b0368f33cffe67fecd302028915926e50ef7e upstream. + +Add checker for dir item, for key types DIR_ITEM, DIR_INDEX and +XATTR_ITEM. + +This checker does comprehensive checks for: + +1) dir_item header and its data size + Against item boundary and maximum name/xattr length. + This part is mostly the same as old verify_dir_item(). + +2) dir_type + Against maximum file types, and against key type. + Since XATTR key should only have FT_XATTR dir item, and normal dir + item type should not have XATTR key. + + The check between key->type and dir_type is newly introduced by this + patch. + +3) name hash + For XATTR and DIR_ITEM key, key->offset is name hash (crc32c). + Check the hash of the name against the key to ensure it's correct. + + The name hash check is only found in btrfs-progs before this patch. + +Signed-off-by: Qu Wenruo +Reviewed-by: Nikolay Borisov +Reviewed-by: Su Yue +Reviewed-by: David Sterba +Signed-off-by: David Sterba +[bwh: Backported to 4.9: BTRFS_MAX_XATTR_SIZE() takes a root not an fs_info] +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/tree-checker.c | 141 ++++++++++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 141 insertions(+) + +--- a/fs/btrfs/tree-checker.c ++++ b/fs/btrfs/tree-checker.c +@@ -30,6 +30,7 @@ + #include "tree-checker.h" + #include "disk-io.h" + #include "compression.h" ++#include "hash.h" + + #define CORRUPT(reason, eb, root, slot) \ + btrfs_crit(root->fs_info, \ +@@ -176,6 +177,141 @@ static int check_csum_item(struct btrfs_ + } + + /* ++ * Customized reported for dir_item, only important new info is key->objectid, ++ * which represents inode number ++ */ ++__printf(4, 5) ++static void dir_item_err(const struct btrfs_root *root, ++ const struct extent_buffer *eb, int slot, ++ const char *fmt, ...) ++{ ++ struct btrfs_key key; ++ struct va_format vaf; ++ va_list args; ++ ++ btrfs_item_key_to_cpu(eb, &key, slot); ++ va_start(args, fmt); ++ ++ vaf.fmt = fmt; ++ vaf.va = &args; ++ ++ btrfs_crit(root->fs_info, ++ "corrupt %s: root=%llu block=%llu slot=%d ino=%llu, %pV", ++ btrfs_header_level(eb) == 0 ? "leaf" : "node", root->objectid, ++ btrfs_header_bytenr(eb), slot, key.objectid, &vaf); ++ va_end(args); ++} ++ ++static int check_dir_item(struct btrfs_root *root, ++ struct extent_buffer *leaf, ++ struct btrfs_key *key, int slot) ++{ ++ struct btrfs_dir_item *di; ++ u32 item_size = btrfs_item_size_nr(leaf, slot); ++ u32 cur = 0; ++ ++ di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item); ++ while (cur < item_size) { ++ char namebuf[max(BTRFS_NAME_LEN, XATTR_NAME_MAX)]; ++ u32 name_len; ++ u32 data_len; ++ u32 max_name_len; ++ u32 total_size; ++ u32 name_hash; ++ u8 dir_type; ++ ++ /* header itself should not cross item boundary */ ++ if (cur + sizeof(*di) > item_size) { ++ dir_item_err(root, leaf, slot, ++ "dir item header crosses item boundary, have %lu boundary %u", ++ cur + sizeof(*di), item_size); ++ return -EUCLEAN; ++ } ++ ++ /* dir type check */ ++ dir_type = btrfs_dir_type(leaf, di); ++ if (dir_type >= BTRFS_FT_MAX) { ++ dir_item_err(root, leaf, slot, ++ "invalid dir item type, have %u expect [0, %u)", ++ dir_type, BTRFS_FT_MAX); ++ return -EUCLEAN; ++ } ++ ++ if (key->type == BTRFS_XATTR_ITEM_KEY && ++ dir_type != BTRFS_FT_XATTR) { ++ dir_item_err(root, leaf, slot, ++ "invalid dir item type for XATTR key, have %u expect %u", ++ dir_type, BTRFS_FT_XATTR); ++ return -EUCLEAN; ++ } ++ if (dir_type == BTRFS_FT_XATTR && ++ key->type != BTRFS_XATTR_ITEM_KEY) { ++ dir_item_err(root, leaf, slot, ++ "xattr dir type found for non-XATTR key"); ++ return -EUCLEAN; ++ } ++ if (dir_type == BTRFS_FT_XATTR) ++ max_name_len = XATTR_NAME_MAX; ++ else ++ max_name_len = BTRFS_NAME_LEN; ++ ++ /* Name/data length check */ ++ name_len = btrfs_dir_name_len(leaf, di); ++ data_len = btrfs_dir_data_len(leaf, di); ++ if (name_len > max_name_len) { ++ dir_item_err(root, leaf, slot, ++ "dir item name len too long, have %u max %u", ++ name_len, max_name_len); ++ return -EUCLEAN; ++ } ++ if (name_len + data_len > BTRFS_MAX_XATTR_SIZE(root)) { ++ dir_item_err(root, leaf, slot, ++ "dir item name and data len too long, have %u max %u", ++ name_len + data_len, ++ BTRFS_MAX_XATTR_SIZE(root)); ++ return -EUCLEAN; ++ } ++ ++ if (data_len && dir_type != BTRFS_FT_XATTR) { ++ dir_item_err(root, leaf, slot, ++ "dir item with invalid data len, have %u expect 0", ++ data_len); ++ return -EUCLEAN; ++ } ++ ++ total_size = sizeof(*di) + name_len + data_len; ++ ++ /* header and name/data should not cross item boundary */ ++ if (cur + total_size > item_size) { ++ dir_item_err(root, leaf, slot, ++ "dir item data crosses item boundary, have %u boundary %u", ++ cur + total_size, item_size); ++ return -EUCLEAN; ++ } ++ ++ /* ++ * Special check for XATTR/DIR_ITEM, as key->offset is name ++ * hash, should match its name ++ */ ++ if (key->type == BTRFS_DIR_ITEM_KEY || ++ key->type == BTRFS_XATTR_ITEM_KEY) { ++ read_extent_buffer(leaf, namebuf, ++ (unsigned long)(di + 1), name_len); ++ name_hash = btrfs_name_hash(namebuf, name_len); ++ if (key->offset != name_hash) { ++ dir_item_err(root, leaf, slot, ++ "name hash mismatch with key, have 0x%016x expect 0x%016llx", ++ name_hash, key->offset); ++ return -EUCLEAN; ++ } ++ } ++ cur += total_size; ++ di = (struct btrfs_dir_item *)((void *)di + total_size); ++ } ++ return 0; ++} ++ ++/* + * Common point to switch the item-specific validation. + */ + static int check_leaf_item(struct btrfs_root *root, +@@ -191,6 +327,11 @@ static int check_leaf_item(struct btrfs_ + case BTRFS_EXTENT_CSUM_KEY: + ret = check_csum_item(root, leaf, key, slot); + break; ++ case BTRFS_DIR_ITEM_KEY: ++ case BTRFS_DIR_INDEX_KEY: ++ case BTRFS_XATTR_ITEM_KEY: ++ ret = check_dir_item(root, leaf, key, slot); ++ break; + } + return ret; + } diff --git a/queue-4.9/btrfs-tree-checker-check-level-for-leaves-and-nodes.patch b/queue-4.9/btrfs-tree-checker-check-level-for-leaves-and-nodes.patch new file mode 100644 index 00000000000..8afce3323ad --- /dev/null +++ b/queue-4.9/btrfs-tree-checker-check-level-for-leaves-and-nodes.patch @@ -0,0 +1,63 @@ +From foo@baz Thu Dec 6 15:27:31 CET 2018 +From: Qu Wenruo +Date: Fri, 28 Sep 2018 07:59:34 +0800 +Subject: btrfs: tree-checker: Check level for leaves and nodes + +From: Qu Wenruo + +commit f556faa46eb4e96d0d0772e74ecf66781e132f72 upstream. + +Although we have tree level check at tree read runtime, it's completely +based on its parent level. +We still need to do accurate level check to avoid invalid tree blocks +sneak into kernel space. + +The check itself is simple, for leaf its level should always be 0. +For nodes its level should be in range [1, BTRFS_MAX_LEVEL - 1]. + +Signed-off-by: Qu Wenruo +Reviewed-by: Su Yue +Reviewed-by: David Sterba +Signed-off-by: David Sterba +[bwh: Backported to 4.9: + - Pass root instead of fs_info to generic_err() + - Adjust context] +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/tree-checker.c | 14 ++++++++++++++ + 1 file changed, 14 insertions(+) + +--- a/fs/btrfs/tree-checker.c ++++ b/fs/btrfs/tree-checker.c +@@ -447,6 +447,13 @@ static int check_leaf(struct btrfs_root + u32 nritems = btrfs_header_nritems(leaf); + int slot; + ++ if (btrfs_header_level(leaf) != 0) { ++ generic_err(root, leaf, 0, ++ "invalid level for leaf, have %d expect 0", ++ btrfs_header_level(leaf)); ++ return -EUCLEAN; ++ } ++ + /* + * Extent buffers from a relocation tree have a owner field that + * corresponds to the subvolume tree they are based on. So just from an +@@ -589,9 +596,16 @@ int btrfs_check_node(struct btrfs_root * + unsigned long nr = btrfs_header_nritems(node); + struct btrfs_key key, next_key; + int slot; ++ int level = btrfs_header_level(node); + u64 bytenr; + int ret = 0; + ++ if (level <= 0 || level >= BTRFS_MAX_LEVEL) { ++ generic_err(root, node, 0, ++ "invalid level for node, have %d expect [1, %d]", ++ level, BTRFS_MAX_LEVEL - 1); ++ return -EUCLEAN; ++ } + if (nr == 0 || nr > BTRFS_NODEPTRS_PER_BLOCK(root)) { + btrfs_crit(root->fs_info, + "corrupt node: root=%llu block=%llu, nritems too %s, have %lu expect range [1,%u]", diff --git a/queue-4.9/btrfs-tree-checker-detect-invalid-and-empty-essential-trees.patch b/queue-4.9/btrfs-tree-checker-detect-invalid-and-empty-essential-trees.patch new file mode 100644 index 00000000000..e8cd3aad180 --- /dev/null +++ b/queue-4.9/btrfs-tree-checker-detect-invalid-and-empty-essential-trees.patch @@ -0,0 +1,71 @@ +From foo@baz Thu Dec 6 15:27:31 CET 2018 +From: Qu Wenruo +Date: Tue, 3 Jul 2018 17:10:06 +0800 +Subject: btrfs: tree-checker: Detect invalid and empty essential trees + +From: Qu Wenruo + +commit ba480dd4db9f1798541eb2d1c423fc95feee8d36 upstream. + +A crafted image has empty root tree block, which will later cause NULL +pointer dereference. + +The following trees should never be empty: +1) Tree root + Must contain at least root items for extent tree, device tree and fs + tree + +2) Chunk tree + Or we can't even bootstrap as it contains the mapping. + +3) Fs tree + At least inode item for top level inode (.). + +4) Device tree + Dev extents for chunks + +5) Extent tree + Must have corresponding extent for each chunk. + +If any of them is empty, we are sure the fs is corrupted and no need to +mount it. + +Link: https://bugzilla.kernel.org/show_bug.cgi?id=199847 +Reported-by: Xu Wen +Signed-off-by: Qu Wenruo +Tested-by: Gu Jinxiang +Reviewed-by: David Sterba +Signed-off-by: David Sterba +[bwh: Backported to 4.9: Pass root instead of fs_info to generic_err()] +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/tree-checker.c | 15 ++++++++++++++- + 1 file changed, 14 insertions(+), 1 deletion(-) + +--- a/fs/btrfs/tree-checker.c ++++ b/fs/btrfs/tree-checker.c +@@ -456,9 +456,22 @@ static int check_leaf(struct btrfs_root + * skip this check for relocation trees. + */ + if (nritems == 0 && !btrfs_header_flag(leaf, BTRFS_HEADER_FLAG_RELOC)) { ++ u64 owner = btrfs_header_owner(leaf); + struct btrfs_root *check_root; + +- key.objectid = btrfs_header_owner(leaf); ++ /* These trees must never be empty */ ++ if (owner == BTRFS_ROOT_TREE_OBJECTID || ++ owner == BTRFS_CHUNK_TREE_OBJECTID || ++ owner == BTRFS_EXTENT_TREE_OBJECTID || ++ owner == BTRFS_DEV_TREE_OBJECTID || ++ owner == BTRFS_FS_TREE_OBJECTID || ++ owner == BTRFS_DATA_RELOC_TREE_OBJECTID) { ++ generic_err(root, leaf, 0, ++ "invalid root, root %llu must never be empty", ++ owner); ++ return -EUCLEAN; ++ } ++ key.objectid = owner; + key.type = BTRFS_ROOT_ITEM_KEY; + key.offset = (u64)-1; + diff --git a/queue-4.9/btrfs-tree-checker-enhance-btrfs_check_node-output.patch b/queue-4.9/btrfs-tree-checker-enhance-btrfs_check_node-output.patch new file mode 100644 index 00000000000..8a6c3db98bd --- /dev/null +++ b/queue-4.9/btrfs-tree-checker-enhance-btrfs_check_node-output.patch @@ -0,0 +1,130 @@ +From foo@baz Thu Dec 6 15:27:31 CET 2018 +From: Qu Wenruo +Date: Mon, 9 Oct 2017 01:51:03 +0000 +Subject: btrfs: tree-checker: Enhance btrfs_check_node output + +From: Qu Wenruo + +commit bba4f29896c986c4cec17bc0f19f2ce644fceae1 upstream. + +Use inline function to replace macro since we don't need +stringification. +(Macro still exists until all callers get updated) + +And add more info about the error, and replace EIO with EUCLEAN. + +For nr_items error, report if it's too large or too small, and output +the valid value range. + +For node block pointer, added a new alignment checker. + +For key order, also output the next key to make the problem more +obvious. + +Signed-off-by: Qu Wenruo +[ wording adjustments, unindented long strings ] +Signed-off-by: David Sterba +[bwh: Backported to 4.9: + - Use root->sectorsize instead of root->fs_info->sectorsize + - BTRFS_NODEPTRS_PER_BLOCK() takes a root instead of an fs_info] +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/tree-checker.c | 68 +++++++++++++++++++++++++++++++++++++++++++----- + 1 file changed, 61 insertions(+), 7 deletions(-) + +--- a/fs/btrfs/tree-checker.c ++++ b/fs/btrfs/tree-checker.c +@@ -37,6 +37,46 @@ + btrfs_header_level(eb) == 0 ? "leaf" : "node", \ + reason, btrfs_header_bytenr(eb), root->objectid, slot) + ++/* ++ * Error message should follow the following format: ++ * corrupt : , [, ] ++ * ++ * @type: leaf or node ++ * @identifier: the necessary info to locate the leaf/node. ++ * It's recommened to decode key.objecitd/offset if it's ++ * meaningful. ++ * @reason: describe the error ++ * @bad_value: optional, it's recommened to output bad value and its ++ * expected value (range). ++ * ++ * Since comma is used to separate the components, only space is allowed ++ * inside each component. ++ */ ++ ++/* ++ * Append generic "corrupt leaf/node root=%llu block=%llu slot=%d: " to @fmt. ++ * Allows callers to customize the output. ++ */ ++__printf(4, 5) ++static void generic_err(const struct btrfs_root *root, ++ const struct extent_buffer *eb, int slot, ++ const char *fmt, ...) ++{ ++ struct va_format vaf; ++ va_list args; ++ ++ va_start(args, fmt); ++ ++ vaf.fmt = fmt; ++ vaf.va = &args; ++ ++ btrfs_crit(root->fs_info, ++ "corrupt %s: root=%llu block=%llu slot=%d, %pV", ++ btrfs_header_level(eb) == 0 ? "leaf" : "node", ++ root->objectid, btrfs_header_bytenr(eb), slot, &vaf); ++ va_end(args); ++} ++ + static int check_extent_data_item(struct btrfs_root *root, + struct extent_buffer *leaf, + struct btrfs_key *key, int slot) +@@ -282,9 +322,11 @@ int btrfs_check_node(struct btrfs_root * + + if (nr == 0 || nr > BTRFS_NODEPTRS_PER_BLOCK(root)) { + btrfs_crit(root->fs_info, +- "corrupt node: block %llu root %llu nritems %lu", +- node->start, root->objectid, nr); +- return -EIO; ++"corrupt node: root=%llu block=%llu, nritems too %s, have %lu expect range [1,%u]", ++ root->objectid, node->start, ++ nr == 0 ? "small" : "large", nr, ++ BTRFS_NODEPTRS_PER_BLOCK(root)); ++ return -EUCLEAN; + } + + for (slot = 0; slot < nr - 1; slot++) { +@@ -293,14 +335,26 @@ int btrfs_check_node(struct btrfs_root * + btrfs_node_key_to_cpu(node, &next_key, slot + 1); + + if (!bytenr) { +- CORRUPT("invalid item slot", node, root, slot); +- ret = -EIO; ++ generic_err(root, node, slot, ++ "invalid NULL node pointer"); ++ ret = -EUCLEAN; ++ goto out; ++ } ++ if (!IS_ALIGNED(bytenr, root->sectorsize)) { ++ generic_err(root, node, slot, ++ "unaligned pointer, have %llu should be aligned to %u", ++ bytenr, root->sectorsize); ++ ret = -EUCLEAN; + goto out; + } + + if (btrfs_comp_cpu_keys(&key, &next_key) >= 0) { +- CORRUPT("bad key order", node, root, slot); +- ret = -EIO; ++ generic_err(root, node, slot, ++ "bad key order, current (%llu %u %llu) next (%llu %u %llu)", ++ key.objectid, key.type, key.offset, ++ next_key.objectid, next_key.type, ++ next_key.offset); ++ ret = -EUCLEAN; + goto out; + } + } diff --git a/queue-4.9/btrfs-tree-checker-fix-false-panic-for-sanity-test.patch b/queue-4.9/btrfs-tree-checker-fix-false-panic-for-sanity-test.patch new file mode 100644 index 00000000000..c5c0f585ef1 --- /dev/null +++ b/queue-4.9/btrfs-tree-checker-fix-false-panic-for-sanity-test.patch @@ -0,0 +1,162 @@ +From foo@baz Thu Dec 6 15:27:31 CET 2018 +From: Qu Wenruo +Date: Wed, 8 Nov 2017 08:54:24 +0800 +Subject: btrfs: tree-checker: Fix false panic for sanity test + +From: Qu Wenruo + +commit 69fc6cbbac542c349b3d350d10f6e394c253c81d upstream. + +[BUG] +If we run btrfs with CONFIG_BTRFS_FS_RUN_SANITY_TESTS=y, it will +instantly cause kernel panic like: + +------ +... +assertion failed: 0, file: fs/btrfs/disk-io.c, line: 3853 +... +Call Trace: + btrfs_mark_buffer_dirty+0x187/0x1f0 [btrfs] + setup_items_for_insert+0x385/0x650 [btrfs] + __btrfs_drop_extents+0x129a/0x1870 [btrfs] +... +----- + +[Cause] +Btrfs will call btrfs_check_leaf() in btrfs_mark_buffer_dirty() to check +if the leaf is valid with CONFIG_BTRFS_FS_RUN_SANITY_TESTS=y. + +However quite some btrfs_mark_buffer_dirty() callers(*) don't really +initialize its item data but only initialize its item pointers, leaving +item data uninitialized. + +This makes tree-checker catch uninitialized data as error, causing +such panic. + +*: These callers include but not limited to +setup_items_for_insert() +btrfs_split_item() +btrfs_expand_item() + +[Fix] +Add a new parameter @check_item_data to btrfs_check_leaf(). +With @check_item_data set to false, item data check will be skipped and +fallback to old btrfs_check_leaf() behavior. + +So we can still get early warning if we screw up item pointers, and +avoid false panic. + +Cc: Filipe Manana +Reported-by: Lakshmipathi.G +Signed-off-by: Qu Wenruo +Reviewed-by: Liu Bo +Reviewed-by: David Sterba +Signed-off-by: David Sterba +[bwh: Backported to 4.9: adjust context] +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/disk-io.c | 10 ++++++++-- + fs/btrfs/tree-checker.c | 27 ++++++++++++++++++++++----- + fs/btrfs/tree-checker.h | 14 +++++++++++++- + 3 files changed, 43 insertions(+), 8 deletions(-) + +--- a/fs/btrfs/disk-io.c ++++ b/fs/btrfs/disk-io.c +@@ -604,7 +604,7 @@ static int btree_readpage_end_io_hook(st + * that we don't try and read the other copies of this block, just + * return -EIO. + */ +- if (found_level == 0 && btrfs_check_leaf(root, eb)) { ++ if (found_level == 0 && btrfs_check_leaf_full(root, eb)) { + set_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags); + ret = -EIO; + } +@@ -3940,7 +3940,13 @@ void btrfs_mark_buffer_dirty(struct exte + buf->len, + root->fs_info->dirty_metadata_batch); + #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY +- if (btrfs_header_level(buf) == 0 && btrfs_check_leaf(root, buf)) { ++ /* ++ * Since btrfs_mark_buffer_dirty() can be called with item pointer set ++ * but item data not updated. ++ * So here we should only check item pointers, not item data. ++ */ ++ if (btrfs_header_level(buf) == 0 && ++ btrfs_check_leaf_relaxed(root, buf)) { + btrfs_print_leaf(root, buf); + ASSERT(0); + } +--- a/fs/btrfs/tree-checker.c ++++ b/fs/btrfs/tree-checker.c +@@ -195,7 +195,8 @@ static int check_leaf_item(struct btrfs_ + return ret; + } + +-int btrfs_check_leaf(struct btrfs_root *root, struct extent_buffer *leaf) ++static int check_leaf(struct btrfs_root *root, struct extent_buffer *leaf, ++ bool check_item_data) + { + struct btrfs_fs_info *fs_info = root->fs_info; + /* No valid key type is 0, so all key should be larger than this key */ +@@ -299,10 +300,15 @@ int btrfs_check_leaf(struct btrfs_root * + return -EUCLEAN; + } + +- /* Check if the item size and content meet other criteria */ +- ret = check_leaf_item(root, leaf, &key, slot); +- if (ret < 0) +- return ret; ++ if (check_item_data) { ++ /* ++ * Check if the item size and content meet other ++ * criteria ++ */ ++ ret = check_leaf_item(root, leaf, &key, slot); ++ if (ret < 0) ++ return ret; ++ } + + prev_key.objectid = key.objectid; + prev_key.type = key.type; +@@ -312,6 +318,17 @@ int btrfs_check_leaf(struct btrfs_root * + return 0; + } + ++int btrfs_check_leaf_full(struct btrfs_root *root, struct extent_buffer *leaf) ++{ ++ return check_leaf(root, leaf, true); ++} ++ ++int btrfs_check_leaf_relaxed(struct btrfs_root *root, ++ struct extent_buffer *leaf) ++{ ++ return check_leaf(root, leaf, false); ++} ++ + int btrfs_check_node(struct btrfs_root *root, struct extent_buffer *node) + { + unsigned long nr = btrfs_header_nritems(node); +--- a/fs/btrfs/tree-checker.h ++++ b/fs/btrfs/tree-checker.h +@@ -20,7 +20,19 @@ + #include "ctree.h" + #include "extent_io.h" + +-int btrfs_check_leaf(struct btrfs_root *root, struct extent_buffer *leaf); ++/* ++ * Comprehensive leaf checker. ++ * Will check not only the item pointers, but also every possible member ++ * in item data. ++ */ ++int btrfs_check_leaf_full(struct btrfs_root *root, struct extent_buffer *leaf); ++ ++/* ++ * Less strict leaf checker. ++ * Will only check item pointers, not reading item data. ++ */ ++int btrfs_check_leaf_relaxed(struct btrfs_root *root, ++ struct extent_buffer *leaf); + int btrfs_check_node(struct btrfs_root *root, struct extent_buffer *node); + + #endif diff --git a/queue-4.9/btrfs-tree-checker-fix-misleading-group-system-information.patch b/queue-4.9/btrfs-tree-checker-fix-misleading-group-system-information.patch new file mode 100644 index 00000000000..4e2ce6e112f --- /dev/null +++ b/queue-4.9/btrfs-tree-checker-fix-misleading-group-system-information.patch @@ -0,0 +1,37 @@ +From foo@baz Thu Dec 6 15:27:31 CET 2018 +From: Shaokun Zhang +Date: Mon, 5 Nov 2018 18:49:09 +0800 +Subject: btrfs: tree-checker: Fix misleading group system information + +From: Shaokun Zhang + +commit 761333f2f50ccc887aa9957ae829300262c0d15b upstream. + +block_group_err shows the group system as a decimal value with a '0x' +prefix, which is somewhat misleading. + +Fix it to print hexadecimal, as was intended. + +Fixes: fce466eab7ac6 ("btrfs: tree-checker: Verify block_group_item") +Reviewed-by: Nikolay Borisov +Reviewed-by: Qu Wenruo +Signed-off-by: Shaokun Zhang +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/tree-checker.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/fs/btrfs/tree-checker.c ++++ b/fs/btrfs/tree-checker.c +@@ -399,7 +399,7 @@ static int check_block_group_item(struct + type != (BTRFS_BLOCK_GROUP_METADATA | + BTRFS_BLOCK_GROUP_DATA)) { + block_group_err(fs_info, leaf, slot, +-"invalid type, have 0x%llx (%lu bits set) expect either 0x%llx, 0x%llx, 0x%llu or 0x%llx", ++"invalid type, have 0x%llx (%lu bits set) expect either 0x%llx, 0x%llx, 0x%llx or 0x%llx", + type, hweight64(type), + BTRFS_BLOCK_GROUP_DATA, BTRFS_BLOCK_GROUP_METADATA, + BTRFS_BLOCK_GROUP_SYSTEM, diff --git a/queue-4.9/btrfs-tree-checker-use-zu-format-string-for-size_t.patch b/queue-4.9/btrfs-tree-checker-use-zu-format-string-for-size_t.patch new file mode 100644 index 00000000000..d9692db0865 --- /dev/null +++ b/queue-4.9/btrfs-tree-checker-use-zu-format-string-for-size_t.patch @@ -0,0 +1,37 @@ +From foo@baz Thu Dec 6 15:27:31 CET 2018 +From: Arnd Bergmann +Date: Wed, 6 Dec 2017 15:18:14 +0100 +Subject: btrfs: tree-checker: use %zu format string for size_t + +From: Arnd Bergmann + +commit 7cfad65297bfe0aa2996cd72d21c898aa84436d9 upstream. + +The return value of sizeof() is of type size_t, so we must print it +using the %z format modifier rather than %l to avoid this warning +on some architectures: + +fs/btrfs/tree-checker.c: In function 'check_dir_item': +fs/btrfs/tree-checker.c:273:50: error: format '%lu' expects argument of type 'long unsigned int', but argument 5 has type 'u32' {aka 'unsigned int'} [-Werror=format=] + +Fixes: 005887f2e3e0 ("btrfs: tree-checker: Add checker for dir item") +Signed-off-by: Arnd Bergmann +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/tree-checker.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/fs/btrfs/tree-checker.c ++++ b/fs/btrfs/tree-checker.c +@@ -223,7 +223,7 @@ static int check_dir_item(struct btrfs_r + /* header itself should not cross item boundary */ + if (cur + sizeof(*di) > item_size) { + dir_item_err(root, leaf, slot, +- "dir item header crosses item boundary, have %lu boundary %u", ++ "dir item header crosses item boundary, have %zu boundary %u", + cur + sizeof(*di), item_size); + return -EUCLEAN; + } diff --git a/queue-4.9/btrfs-tree-checker-verify-block_group_item.patch b/queue-4.9/btrfs-tree-checker-verify-block_group_item.patch new file mode 100644 index 00000000000..43bea251b65 --- /dev/null +++ b/queue-4.9/btrfs-tree-checker-verify-block_group_item.patch @@ -0,0 +1,191 @@ +From foo@baz Thu Dec 6 15:27:31 CET 2018 +From: Qu Wenruo +Date: Tue, 3 Jul 2018 17:10:05 +0800 +Subject: btrfs: tree-checker: Verify block_group_item + +From: Qu Wenruo + +commit fce466eab7ac6baa9d2dcd88abcf945be3d4a089 upstream. + +A crafted image with invalid block group items could make free space cache +code to cause panic. + +We could detect such invalid block group item by checking: +1) Item size + Known fixed value. +2) Block group size (key.offset) + We have an upper limit on block group item (10G) +3) Chunk objectid + Known fixed value. +4) Type + Only 4 valid type values, DATA, METADATA, SYSTEM and DATA|METADATA. + No more than 1 bit set for profile type. +5) Used space + No more than the block group size. + +This should allow btrfs to detect and refuse to mount the crafted image. + +Link: https://bugzilla.kernel.org/show_bug.cgi?id=199849 +Reported-by: Xu Wen +Signed-off-by: Qu Wenruo +Reviewed-by: Gu Jinxiang +Reviewed-by: Nikolay Borisov +Tested-by: Gu Jinxiang +Reviewed-by: David Sterba +Signed-off-by: David Sterba +[bwh: Backported to 4.9: + - In check_leaf_item(), pass root->fs_info to check_block_group_item() + - Adjust context] +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/tree-checker.c | 100 ++++++++++++++++++++++++++++++++++++++++++++++++ + fs/btrfs/volumes.c | 2 + fs/btrfs/volumes.h | 2 + 3 files changed, 103 insertions(+), 1 deletion(-) + +--- a/fs/btrfs/tree-checker.c ++++ b/fs/btrfs/tree-checker.c +@@ -31,6 +31,7 @@ + #include "disk-io.h" + #include "compression.h" + #include "hash.h" ++#include "volumes.h" + + #define CORRUPT(reason, eb, root, slot) \ + btrfs_crit(root->fs_info, \ +@@ -312,6 +313,102 @@ static int check_dir_item(struct btrfs_r + return 0; + } + ++__printf(4, 5) ++__cold ++static void block_group_err(const struct btrfs_fs_info *fs_info, ++ const struct extent_buffer *eb, int slot, ++ const char *fmt, ...) ++{ ++ struct btrfs_key key; ++ struct va_format vaf; ++ va_list args; ++ ++ btrfs_item_key_to_cpu(eb, &key, slot); ++ va_start(args, fmt); ++ ++ vaf.fmt = fmt; ++ vaf.va = &args; ++ ++ btrfs_crit(fs_info, ++ "corrupt %s: root=%llu block=%llu slot=%d bg_start=%llu bg_len=%llu, %pV", ++ btrfs_header_level(eb) == 0 ? "leaf" : "node", ++ btrfs_header_owner(eb), btrfs_header_bytenr(eb), slot, ++ key.objectid, key.offset, &vaf); ++ va_end(args); ++} ++ ++static int check_block_group_item(struct btrfs_fs_info *fs_info, ++ struct extent_buffer *leaf, ++ struct btrfs_key *key, int slot) ++{ ++ struct btrfs_block_group_item bgi; ++ u32 item_size = btrfs_item_size_nr(leaf, slot); ++ u64 flags; ++ u64 type; ++ ++ /* ++ * Here we don't really care about alignment since extent allocator can ++ * handle it. We care more about the size, as if one block group is ++ * larger than maximum size, it's must be some obvious corruption. ++ */ ++ if (key->offset > BTRFS_MAX_DATA_CHUNK_SIZE || key->offset == 0) { ++ block_group_err(fs_info, leaf, slot, ++ "invalid block group size, have %llu expect (0, %llu]", ++ key->offset, BTRFS_MAX_DATA_CHUNK_SIZE); ++ return -EUCLEAN; ++ } ++ ++ if (item_size != sizeof(bgi)) { ++ block_group_err(fs_info, leaf, slot, ++ "invalid item size, have %u expect %zu", ++ item_size, sizeof(bgi)); ++ return -EUCLEAN; ++ } ++ ++ read_extent_buffer(leaf, &bgi, btrfs_item_ptr_offset(leaf, slot), ++ sizeof(bgi)); ++ if (btrfs_block_group_chunk_objectid(&bgi) != ++ BTRFS_FIRST_CHUNK_TREE_OBJECTID) { ++ block_group_err(fs_info, leaf, slot, ++ "invalid block group chunk objectid, have %llu expect %llu", ++ btrfs_block_group_chunk_objectid(&bgi), ++ BTRFS_FIRST_CHUNK_TREE_OBJECTID); ++ return -EUCLEAN; ++ } ++ ++ if (btrfs_block_group_used(&bgi) > key->offset) { ++ block_group_err(fs_info, leaf, slot, ++ "invalid block group used, have %llu expect [0, %llu)", ++ btrfs_block_group_used(&bgi), key->offset); ++ return -EUCLEAN; ++ } ++ ++ flags = btrfs_block_group_flags(&bgi); ++ if (hweight64(flags & BTRFS_BLOCK_GROUP_PROFILE_MASK) > 1) { ++ block_group_err(fs_info, leaf, slot, ++"invalid profile flags, have 0x%llx (%lu bits set) expect no more than 1 bit set", ++ flags & BTRFS_BLOCK_GROUP_PROFILE_MASK, ++ hweight64(flags & BTRFS_BLOCK_GROUP_PROFILE_MASK)); ++ return -EUCLEAN; ++ } ++ ++ type = flags & BTRFS_BLOCK_GROUP_TYPE_MASK; ++ if (type != BTRFS_BLOCK_GROUP_DATA && ++ type != BTRFS_BLOCK_GROUP_METADATA && ++ type != BTRFS_BLOCK_GROUP_SYSTEM && ++ type != (BTRFS_BLOCK_GROUP_METADATA | ++ BTRFS_BLOCK_GROUP_DATA)) { ++ block_group_err(fs_info, leaf, slot, ++"invalid type, have 0x%llx (%lu bits set) expect either 0x%llx, 0x%llx, 0x%llu or 0x%llx", ++ type, hweight64(type), ++ BTRFS_BLOCK_GROUP_DATA, BTRFS_BLOCK_GROUP_METADATA, ++ BTRFS_BLOCK_GROUP_SYSTEM, ++ BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA); ++ return -EUCLEAN; ++ } ++ return 0; ++} ++ + /* + * Common point to switch the item-specific validation. + */ +@@ -333,6 +430,9 @@ static int check_leaf_item(struct btrfs_ + case BTRFS_XATTR_ITEM_KEY: + ret = check_dir_item(root, leaf, key, slot); + break; ++ case BTRFS_BLOCK_GROUP_ITEM_KEY: ++ ret = check_block_group_item(root->fs_info, leaf, key, slot); ++ break; + } + return ret; + } +--- a/fs/btrfs/volumes.c ++++ b/fs/btrfs/volumes.c +@@ -4656,7 +4656,7 @@ static int __btrfs_alloc_chunk(struct bt + + if (type & BTRFS_BLOCK_GROUP_DATA) { + max_stripe_size = SZ_1G; +- max_chunk_size = 10 * max_stripe_size; ++ max_chunk_size = BTRFS_MAX_DATA_CHUNK_SIZE; + if (!devs_max) + devs_max = BTRFS_MAX_DEVS(info->chunk_root); + } else if (type & BTRFS_BLOCK_GROUP_METADATA) { +--- a/fs/btrfs/volumes.h ++++ b/fs/btrfs/volumes.h +@@ -24,6 +24,8 @@ + #include + #include "async-thread.h" + ++#define BTRFS_MAX_DATA_CHUNK_SIZE (10ULL * SZ_1G) ++ + extern struct mutex uuid_mutex; + + #define BTRFS_STRIPE_LEN SZ_64K diff --git a/queue-4.9/btrfs-validate-type-when-reading-a-chunk.patch b/queue-4.9/btrfs-validate-type-when-reading-a-chunk.patch new file mode 100644 index 00000000000..7b770856c96 --- /dev/null +++ b/queue-4.9/btrfs-validate-type-when-reading-a-chunk.patch @@ -0,0 +1,71 @@ +From foo@baz Thu Dec 6 15:27:31 CET 2018 +From: Gu Jinxiang +Date: Wed, 4 Jul 2018 18:16:39 +0800 +Subject: btrfs: validate type when reading a chunk + +From: Gu Jinxiang + +commit 315409b0098fb2651d86553f0436b70502b29bb2 upstream. + +Reported in https://bugzilla.kernel.org/show_bug.cgi?id=199839, with an +image that has an invalid chunk type but does not return an error. + +Add chunk type check in btrfs_check_chunk_valid, to detect the wrong +type combinations. + +Link: https://bugzilla.kernel.org/show_bug.cgi?id=199839 +Reported-by: Xu Wen +Reviewed-by: Qu Wenruo +Signed-off-by: Gu Jinxiang +Signed-off-by: David Sterba +[bwh: Backported to 4.9: Use root->fs_info instead of fs_info] +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/volumes.c | 28 ++++++++++++++++++++++++++++ + 1 file changed, 28 insertions(+) + +--- a/fs/btrfs/volumes.c ++++ b/fs/btrfs/volumes.c +@@ -6370,6 +6370,8 @@ static int btrfs_check_chunk_valid(struc + u16 num_stripes; + u16 sub_stripes; + u64 type; ++ u64 features; ++ bool mixed = false; + + length = btrfs_chunk_length(leaf, chunk); + stripe_len = btrfs_chunk_stripe_len(leaf, chunk); +@@ -6410,6 +6412,32 @@ static int btrfs_check_chunk_valid(struc + btrfs_chunk_type(leaf, chunk)); + return -EIO; + } ++ ++ if ((type & BTRFS_BLOCK_GROUP_TYPE_MASK) == 0) { ++ btrfs_err(root->fs_info, "missing chunk type flag: 0x%llx", type); ++ return -EIO; ++ } ++ ++ if ((type & BTRFS_BLOCK_GROUP_SYSTEM) && ++ (type & (BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA))) { ++ btrfs_err(root->fs_info, ++ "system chunk with data or metadata type: 0x%llx", type); ++ return -EIO; ++ } ++ ++ features = btrfs_super_incompat_flags(root->fs_info->super_copy); ++ if (features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS) ++ mixed = true; ++ ++ if (!mixed) { ++ if ((type & BTRFS_BLOCK_GROUP_METADATA) && ++ (type & BTRFS_BLOCK_GROUP_DATA)) { ++ btrfs_err(root->fs_info, ++ "mixed chunk type in non-mixed mode: 0x%llx", type); ++ return -EIO; ++ } ++ } ++ + if ((type & BTRFS_BLOCK_GROUP_RAID10 && sub_stripes != 2) || + (type & BTRFS_BLOCK_GROUP_RAID1 && num_stripes < 1) || + (type & BTRFS_BLOCK_GROUP_RAID5 && num_stripes < 2) || diff --git a/queue-4.9/btrfs-verify-that-every-chunk-has-corresponding-block-group-at-mount-time.patch b/queue-4.9/btrfs-verify-that-every-chunk-has-corresponding-block-group-at-mount-time.patch new file mode 100644 index 00000000000..33c6837e9ed --- /dev/null +++ b/queue-4.9/btrfs-verify-that-every-chunk-has-corresponding-block-group-at-mount-time.patch @@ -0,0 +1,106 @@ +From foo@baz Thu Dec 6 15:27:31 CET 2018 +From: Qu Wenruo +Date: Wed, 1 Aug 2018 10:37:17 +0800 +Subject: btrfs: Verify that every chunk has corresponding block group at mount time + +From: Qu Wenruo + +commit 7ef49515fa6727cb4b6f2f5b0ffbc5fc20a9f8c6 upstream. + +If a crafted image has missing block group items, it could cause +unexpected behavior and breaks the assumption of 1:1 chunk<->block group +mapping. + +Although we have the block group -> chunk mapping check, we still need +chunk -> block group mapping check. + +This patch will do extra check to ensure each chunk has its +corresponding block group. + +Link: https://bugzilla.kernel.org/show_bug.cgi?id=199847 +Reported-by: Xu Wen +Signed-off-by: Qu Wenruo +Reviewed-by: Gu Jinxiang +Reviewed-by: David Sterba +Signed-off-by: David Sterba +[bwh: Backported to 4.9: adjust context] +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/extent-tree.c | 58 ++++++++++++++++++++++++++++++++++++++++++++++++- + 1 file changed, 57 insertions(+), 1 deletion(-) + +--- a/fs/btrfs/extent-tree.c ++++ b/fs/btrfs/extent-tree.c +@@ -10159,6 +10159,62 @@ btrfs_create_block_group_cache(struct bt + return cache; + } + ++ ++/* ++ * Iterate all chunks and verify that each of them has the corresponding block ++ * group ++ */ ++static int check_chunk_block_group_mappings(struct btrfs_fs_info *fs_info) ++{ ++ struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree; ++ struct extent_map *em; ++ struct btrfs_block_group_cache *bg; ++ u64 start = 0; ++ int ret = 0; ++ ++ while (1) { ++ read_lock(&map_tree->map_tree.lock); ++ /* ++ * lookup_extent_mapping will return the first extent map ++ * intersecting the range, so setting @len to 1 is enough to ++ * get the first chunk. ++ */ ++ em = lookup_extent_mapping(&map_tree->map_tree, start, 1); ++ read_unlock(&map_tree->map_tree.lock); ++ if (!em) ++ break; ++ ++ bg = btrfs_lookup_block_group(fs_info, em->start); ++ if (!bg) { ++ btrfs_err(fs_info, ++ "chunk start=%llu len=%llu doesn't have corresponding block group", ++ em->start, em->len); ++ ret = -EUCLEAN; ++ free_extent_map(em); ++ break; ++ } ++ if (bg->key.objectid != em->start || ++ bg->key.offset != em->len || ++ (bg->flags & BTRFS_BLOCK_GROUP_TYPE_MASK) != ++ (em->map_lookup->type & BTRFS_BLOCK_GROUP_TYPE_MASK)) { ++ btrfs_err(fs_info, ++"chunk start=%llu len=%llu flags=0x%llx doesn't match block group start=%llu len=%llu flags=0x%llx", ++ em->start, em->len, ++ em->map_lookup->type & BTRFS_BLOCK_GROUP_TYPE_MASK, ++ bg->key.objectid, bg->key.offset, ++ bg->flags & BTRFS_BLOCK_GROUP_TYPE_MASK); ++ ret = -EUCLEAN; ++ free_extent_map(em); ++ btrfs_put_block_group(bg); ++ break; ++ } ++ start = em->start + em->len; ++ free_extent_map(em); ++ btrfs_put_block_group(bg); ++ } ++ return ret; ++} ++ + int btrfs_read_block_groups(struct btrfs_root *root) + { + struct btrfs_path *path; +@@ -10343,7 +10399,7 @@ int btrfs_read_block_groups(struct btrfs + } + + init_global_block_rsv(info); +- ret = 0; ++ ret = check_chunk_block_group_mappings(info); + error: + btrfs_free_path(path); + return ret; diff --git a/queue-4.9/f2fs-add-sanity_check_inode-function.patch b/queue-4.9/f2fs-add-sanity_check_inode-function.patch new file mode 100644 index 00000000000..2aa240784c5 --- /dev/null +++ b/queue-4.9/f2fs-add-sanity_check_inode-function.patch @@ -0,0 +1,48 @@ +From foo@baz Thu Dec 6 15:27:31 CET 2018 +From: Ben Hutchings +Date: Thu, 29 Nov 2018 19:17:34 +0000 +Subject: f2fs: Add sanity_check_inode() function + +From: Ben Hutchings + +This was done as part of commits 5d64600d4f33 "f2fs: avoid bug_on on +corrupted inode" and 76d56d4ab4f2 "f2fs: fix to do sanity check with +extra_attr feature" upstream, but the specific checks they added are +not applicable to 4.9. + +Cc: Jaegeuk Kim +Cc: Chao Yu +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + fs/f2fs/inode.c | 12 ++++++++++++ + 1 file changed, 12 insertions(+) + +--- a/fs/f2fs/inode.c ++++ b/fs/f2fs/inode.c +@@ -104,6 +104,13 @@ static void __recover_inline_status(stru + return; + } + ++static bool sanity_check_inode(struct inode *inode) ++{ ++ struct f2fs_sb_info *sbi = F2FS_I_SB(inode); ++ ++ return true; ++} ++ + static int do_read_inode(struct inode *inode) + { + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); +@@ -153,6 +160,11 @@ static int do_read_inode(struct inode *i + + get_inline_info(inode, ri); + ++ if (!sanity_check_inode(inode)) { ++ f2fs_put_page(node_page, 1); ++ return -EINVAL; ++ } ++ + /* check data exist */ + if (f2fs_has_inline_data(inode) && !f2fs_exist_data(inode)) + __recover_inline_status(inode, node_page); diff --git a/queue-4.9/f2fs-check-blkaddr-more-accuratly-before-issue-a-bio.patch b/queue-4.9/f2fs-check-blkaddr-more-accuratly-before-issue-a-bio.patch new file mode 100644 index 00000000000..a275ff7802f --- /dev/null +++ b/queue-4.9/f2fs-check-blkaddr-more-accuratly-before-issue-a-bio.patch @@ -0,0 +1,120 @@ +From foo@baz Thu Dec 6 15:27:31 CET 2018 +From: Yunlei He +Date: Thu, 8 Mar 2018 16:29:13 +0800 +Subject: f2fs: check blkaddr more accuratly before issue a bio + +From: Yunlei He + +commit 0833721ec3658a4e9d5e58b6fa82cf9edc431e59 upstream. + +This patch check blkaddr more accuratly before issue a +write or read bio. + +Signed-off-by: Yunlei He +Reviewed-by: Chao Yu +Signed-off-by: Jaegeuk Kim +[bwh: Backported to 4.9: adjust context] +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + fs/f2fs/checkpoint.c | 2 ++ + fs/f2fs/data.c | 5 +++-- + fs/f2fs/f2fs.h | 1 + + fs/f2fs/segment.h | 25 +++++++++++++++++++------ + 4 files changed, 25 insertions(+), 8 deletions(-) + +--- a/fs/f2fs/checkpoint.c ++++ b/fs/f2fs/checkpoint.c +@@ -69,6 +69,7 @@ static struct page *__get_meta_page(stru + .old_blkaddr = index, + .new_blkaddr = index, + .encrypted_page = NULL, ++ .is_meta = is_meta, + }; + + if (unlikely(!is_meta)) +@@ -162,6 +163,7 @@ int ra_meta_pages(struct f2fs_sb_info *s + .op = REQ_OP_READ, + .op_flags = sync ? (READ_SYNC | REQ_META | REQ_PRIO) : REQ_RAHEAD, + .encrypted_page = NULL, ++ .is_meta = (type != META_POR), + }; + struct blk_plug plug; + +--- a/fs/f2fs/data.c ++++ b/fs/f2fs/data.c +@@ -240,6 +240,7 @@ int f2fs_submit_page_bio(struct f2fs_io_ + struct page *page = fio->encrypted_page ? + fio->encrypted_page : fio->page; + ++ verify_block_addr(fio, fio->new_blkaddr); + trace_f2fs_submit_page_bio(page, fio); + f2fs_trace_ios(fio, 0); + +@@ -267,8 +268,8 @@ void f2fs_submit_page_mbio(struct f2fs_i + io = is_read ? &sbi->read_io : &sbi->write_io[btype]; + + if (fio->old_blkaddr != NEW_ADDR) +- verify_block_addr(sbi, fio->old_blkaddr); +- verify_block_addr(sbi, fio->new_blkaddr); ++ verify_block_addr(fio, fio->old_blkaddr); ++ verify_block_addr(fio, fio->new_blkaddr); + + down_write(&io->io_rwsem); + +--- a/fs/f2fs/f2fs.h ++++ b/fs/f2fs/f2fs.h +@@ -694,6 +694,7 @@ struct f2fs_io_info { + block_t old_blkaddr; /* old block address before Cow */ + struct page *page; /* page to be written */ + struct page *encrypted_page; /* encrypted page */ ++ bool is_meta; /* indicate borrow meta inode mapping or not */ + }; + + #define is_read_io(rw) (rw == READ) +--- a/fs/f2fs/segment.h ++++ b/fs/f2fs/segment.h +@@ -49,13 +49,19 @@ + (secno == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno / \ + sbi->segs_per_sec)) \ + +-#define MAIN_BLKADDR(sbi) (SM_I(sbi)->main_blkaddr) +-#define SEG0_BLKADDR(sbi) (SM_I(sbi)->seg0_blkaddr) ++#define MAIN_BLKADDR(sbi) \ ++ (SM_I(sbi) ? SM_I(sbi)->main_blkaddr : \ ++ le32_to_cpu(F2FS_RAW_SUPER(sbi)->main_blkaddr)) ++#define SEG0_BLKADDR(sbi) \ ++ (SM_I(sbi) ? SM_I(sbi)->seg0_blkaddr : \ ++ le32_to_cpu(F2FS_RAW_SUPER(sbi)->segment0_blkaddr)) + + #define MAIN_SEGS(sbi) (SM_I(sbi)->main_segments) + #define MAIN_SECS(sbi) (sbi->total_sections) + +-#define TOTAL_SEGS(sbi) (SM_I(sbi)->segment_count) ++#define TOTAL_SEGS(sbi) \ ++ (SM_I(sbi) ? SM_I(sbi)->segment_count : \ ++ le32_to_cpu(F2FS_RAW_SUPER(sbi)->segment_count)) + #define TOTAL_BLKS(sbi) (TOTAL_SEGS(sbi) << sbi->log_blocks_per_seg) + + #define MAX_BLKADDR(sbi) (SEG0_BLKADDR(sbi) + TOTAL_BLKS(sbi)) +@@ -591,10 +597,17 @@ static inline void check_seg_range(struc + f2fs_bug_on(sbi, segno > TOTAL_SEGS(sbi) - 1); + } + +-static inline void verify_block_addr(struct f2fs_sb_info *sbi, block_t blk_addr) ++static inline void verify_block_addr(struct f2fs_io_info *fio, block_t blk_addr) + { +- BUG_ON(blk_addr < SEG0_BLKADDR(sbi) +- || blk_addr >= MAX_BLKADDR(sbi)); ++ struct f2fs_sb_info *sbi = fio->sbi; ++ ++ if (PAGE_TYPE_OF_BIO(fio->type) == META && ++ (!is_read_io(fio->op) || fio->is_meta)) ++ BUG_ON(blk_addr < SEG0_BLKADDR(sbi) || ++ blk_addr >= MAIN_BLKADDR(sbi)); ++ else ++ BUG_ON(blk_addr < MAIN_BLKADDR(sbi) || ++ blk_addr >= MAX_BLKADDR(sbi)); + } + + /* diff --git a/queue-4.9/f2fs-clean-up-with-is_valid_blkaddr.patch b/queue-4.9/f2fs-clean-up-with-is_valid_blkaddr.patch new file mode 100644 index 00000000000..fc031732a31 --- /dev/null +++ b/queue-4.9/f2fs-clean-up-with-is_valid_blkaddr.patch @@ -0,0 +1,208 @@ +From foo@baz Thu Dec 6 15:27:31 CET 2018 +From: Chao Yu +Date: Wed, 23 May 2018 22:25:08 +0800 +Subject: f2fs: clean up with is_valid_blkaddr() + +From: Chao Yu + +commit 7b525dd01365c6764018e374d391c92466be1b7a upstream. + +- rename is_valid_blkaddr() to is_valid_meta_blkaddr() for readability. +- introduce is_valid_blkaddr() for cleanup. + +No logic change in this patch. + +Signed-off-by: Chao Yu +Signed-off-by: Jaegeuk Kim +[bwh: Backported to 4.9: adjust context] +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + fs/f2fs/checkpoint.c | 4 ++-- + fs/f2fs/data.c | 6 +++--- + fs/f2fs/f2fs.h | 9 ++++++++- + fs/f2fs/file.c | 2 +- + fs/f2fs/inode.c | 2 +- + fs/f2fs/node.c | 5 ++--- + fs/f2fs/recovery.c | 6 +++--- + fs/f2fs/segment.c | 4 ++-- + fs/f2fs/segment.h | 2 +- + 9 files changed, 23 insertions(+), 17 deletions(-) + +--- a/fs/f2fs/checkpoint.c ++++ b/fs/f2fs/checkpoint.c +@@ -118,7 +118,7 @@ struct page *get_tmp_page(struct f2fs_sb + return __get_meta_page(sbi, index, false); + } + +-bool is_valid_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr, int type) ++bool is_valid_meta_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr, int type) + { + switch (type) { + case META_NAT: +@@ -173,7 +173,7 @@ int ra_meta_pages(struct f2fs_sb_info *s + blk_start_plug(&plug); + for (; nrpages-- > 0; blkno++) { + +- if (!is_valid_blkaddr(sbi, blkno, type)) ++ if (!is_valid_meta_blkaddr(sbi, blkno, type)) + goto out; + + switch (type) { +--- a/fs/f2fs/data.c ++++ b/fs/f2fs/data.c +@@ -267,7 +267,7 @@ void f2fs_submit_page_mbio(struct f2fs_i + + io = is_read ? &sbi->read_io : &sbi->write_io[btype]; + +- if (fio->old_blkaddr != NEW_ADDR) ++ if (is_valid_blkaddr(fio->old_blkaddr)) + verify_block_addr(fio, fio->old_blkaddr); + verify_block_addr(fio, fio->new_blkaddr); + +@@ -723,7 +723,7 @@ next_dnode: + next_block: + blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node); + +- if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR) { ++ if (!is_valid_blkaddr(blkaddr)) { + if (create) { + if (unlikely(f2fs_cp_error(sbi))) { + err = -EIO; +@@ -1217,7 +1217,7 @@ retry_encrypt: + * If current allocation needs SSR, + * it had better in-place writes for updated data. + */ +- if (unlikely(fio->old_blkaddr != NEW_ADDR && ++ if (unlikely(is_valid_blkaddr(fio->old_blkaddr) && + !is_cold_data(page) && + !IS_ATOMIC_WRITTEN_PAGE(page) && + need_inplace_update(inode))) { +--- a/fs/f2fs/f2fs.h ++++ b/fs/f2fs/f2fs.h +@@ -1930,6 +1930,13 @@ static inline void *f2fs_kvzalloc(size_t + (pgofs - ADDRS_PER_INODE(inode) + ADDRS_PER_BLOCK) / \ + ADDRS_PER_BLOCK * ADDRS_PER_BLOCK + ADDRS_PER_INODE(inode)) + ++static inline bool is_valid_blkaddr(block_t blkaddr) ++{ ++ if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR) ++ return false; ++ return true; ++} ++ + /* + * file.c + */ +@@ -2115,7 +2122,7 @@ void f2fs_stop_checkpoint(struct f2fs_sb + struct page *grab_meta_page(struct f2fs_sb_info *, pgoff_t); + struct page *get_meta_page(struct f2fs_sb_info *, pgoff_t); + struct page *get_tmp_page(struct f2fs_sb_info *, pgoff_t); +-bool is_valid_blkaddr(struct f2fs_sb_info *, block_t, int); ++bool is_valid_meta_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr, int type); + int ra_meta_pages(struct f2fs_sb_info *, block_t, int, int, bool); + void ra_meta_pages_cond(struct f2fs_sb_info *, pgoff_t); + long sync_meta_pages(struct f2fs_sb_info *, enum page_type, long); +--- a/fs/f2fs/file.c ++++ b/fs/f2fs/file.c +@@ -316,7 +316,7 @@ static bool __found_offset(block_t blkad + switch (whence) { + case SEEK_DATA: + if ((blkaddr == NEW_ADDR && dirty == pgofs) || +- (blkaddr != NEW_ADDR && blkaddr != NULL_ADDR)) ++ is_valid_blkaddr(blkaddr)) + return true; + break; + case SEEK_HOLE: +--- a/fs/f2fs/inode.c ++++ b/fs/f2fs/inode.c +@@ -63,7 +63,7 @@ static bool __written_first_block(struct + { + block_t addr = le32_to_cpu(ri->i_addr[0]); + +- if (addr != NEW_ADDR && addr != NULL_ADDR) ++ if (is_valid_blkaddr(addr)) + return true; + return false; + } +--- a/fs/f2fs/node.c ++++ b/fs/f2fs/node.c +@@ -304,8 +304,7 @@ static void set_node_addr(struct f2fs_sb + new_blkaddr == NULL_ADDR); + f2fs_bug_on(sbi, nat_get_blkaddr(e) == NEW_ADDR && + new_blkaddr == NEW_ADDR); +- f2fs_bug_on(sbi, nat_get_blkaddr(e) != NEW_ADDR && +- nat_get_blkaddr(e) != NULL_ADDR && ++ f2fs_bug_on(sbi, is_valid_blkaddr(nat_get_blkaddr(e)) && + new_blkaddr == NEW_ADDR); + + /* increment version no as node is removed */ +@@ -320,7 +319,7 @@ static void set_node_addr(struct f2fs_sb + + /* change address */ + nat_set_blkaddr(e, new_blkaddr); +- if (new_blkaddr == NEW_ADDR || new_blkaddr == NULL_ADDR) ++ if (!is_valid_blkaddr(new_blkaddr)) + set_nat_flag(e, IS_CHECKPOINTED, false); + __set_nat_cache_dirty(nm_i, e); + +--- a/fs/f2fs/recovery.c ++++ b/fs/f2fs/recovery.c +@@ -236,7 +236,7 @@ static int find_fsync_dnodes(struct f2fs + while (1) { + struct fsync_inode_entry *entry; + +- if (!is_valid_blkaddr(sbi, blkaddr, META_POR)) ++ if (!is_valid_meta_blkaddr(sbi, blkaddr, META_POR)) + return 0; + + page = get_tmp_page(sbi, blkaddr); +@@ -468,7 +468,7 @@ retry_dn: + } + + /* dest is valid block, try to recover from src to dest */ +- if (is_valid_blkaddr(sbi, dest, META_POR)) { ++ if (is_valid_meta_blkaddr(sbi, dest, META_POR)) { + + if (src == NULL_ADDR) { + err = reserve_new_block(&dn); +@@ -527,7 +527,7 @@ static int recover_data(struct f2fs_sb_i + while (1) { + struct fsync_inode_entry *entry; + +- if (!is_valid_blkaddr(sbi, blkaddr, META_POR)) ++ if (!is_valid_meta_blkaddr(sbi, blkaddr, META_POR)) + break; + + ra_meta_pages_cond(sbi, blkaddr); +--- a/fs/f2fs/segment.c ++++ b/fs/f2fs/segment.c +@@ -944,7 +944,7 @@ bool is_checkpointed_data(struct f2fs_sb + struct seg_entry *se; + bool is_cp = false; + +- if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR) ++ if (!is_valid_blkaddr(blkaddr)) + return true; + + mutex_lock(&sit_i->sentry_lock); +@@ -1668,7 +1668,7 @@ void f2fs_wait_on_encrypted_page_writeba + { + struct page *cpage; + +- if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR) ++ if (!is_valid_blkaddr(blkaddr)) + return; + + cpage = find_lock_page(META_MAPPING(sbi), blkaddr); +--- a/fs/f2fs/segment.h ++++ b/fs/f2fs/segment.h +@@ -81,7 +81,7 @@ + (GET_SEGOFF_FROM_SEG0(sbi, blk_addr) & (sbi->blocks_per_seg - 1)) + + #define GET_SEGNO(sbi, blk_addr) \ +- (((blk_addr == NULL_ADDR) || (blk_addr == NEW_ADDR)) ? \ ++ ((!is_valid_blkaddr(blk_addr)) ? \ + NULL_SEGNO : GET_L2R_SEGNO(FREE_I(sbi), \ + GET_SEGNO_FROM_SEG0(sbi, blk_addr))) + #define GET_SECNO(sbi, segno) \ diff --git a/queue-4.9/f2fs-detect-wrong-layout.patch b/queue-4.9/f2fs-detect-wrong-layout.patch new file mode 100644 index 00000000000..55669f3113b --- /dev/null +++ b/queue-4.9/f2fs-detect-wrong-layout.patch @@ -0,0 +1,64 @@ +From foo@baz Thu Dec 6 15:27:31 CET 2018 +From: Jaegeuk Kim +Date: Mon, 5 Dec 2016 13:56:04 -0800 +Subject: f2fs: detect wrong layout + +From: Jaegeuk Kim + +commit 2040fce83fe17763b07c97c1f691da2bb85e4135 upstream. + +Previous mkfs.f2fs allows small partition inappropriately, so f2fs should detect +that as well. + +Refer this in f2fs-tools. + +mkfs.f2fs: detect small partition by overprovision ratio and # of segments + +Reported-and-Tested-by: Eric Biggers +Signed-off-by: Jaegeuk Kim +[bwh: Backported to 4.9: adjust context] +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + fs/f2fs/segment.h | 2 ++ + fs/f2fs/super.c | 11 +++++++++++ + 2 files changed, 13 insertions(+) + +--- a/fs/f2fs/segment.h ++++ b/fs/f2fs/segment.h +@@ -18,6 +18,8 @@ + #define DEF_RECLAIM_PREFREE_SEGMENTS 5 /* 5% over total segments */ + #define DEF_MAX_RECLAIM_PREFREE_SEGMENTS 4096 /* 8GB in maximum */ + ++#define F2FS_MIN_SEGMENTS 9 /* SB + 2 (CP + SIT + NAT) + SSA + MAIN */ ++ + /* L: Logical segment # in volume, R: Relative segment # in main area */ + #define GET_L2R_SEGNO(free_i, segno) (segno - free_i->start_segno) + #define GET_R2L_SEGNO(free_i, segno) (segno + free_i->start_segno) +--- a/fs/f2fs/super.c ++++ b/fs/f2fs/super.c +@@ -1424,6 +1424,7 @@ int sanity_check_ckpt(struct f2fs_sb_inf + unsigned int total, fsmeta; + struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi); + struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); ++ unsigned int ovp_segments, reserved_segments; + unsigned int main_segs, blocks_per_seg; + unsigned int sit_segs, nat_segs; + unsigned int sit_bitmap_size, nat_bitmap_size; +@@ -1442,6 +1443,16 @@ int sanity_check_ckpt(struct f2fs_sb_inf + if (unlikely(fsmeta >= total)) + return 1; + ++ ovp_segments = le32_to_cpu(ckpt->overprov_segment_count); ++ reserved_segments = le32_to_cpu(ckpt->rsvd_segment_count); ++ ++ if (unlikely(fsmeta < F2FS_MIN_SEGMENTS || ++ ovp_segments == 0 || reserved_segments == 0)) { ++ f2fs_msg(sbi->sb, KERN_ERR, ++ "Wrong layout: check mkfs.f2fs version"); ++ return 1; ++ } ++ + main_segs = le32_to_cpu(raw_super->segment_count_main); + blocks_per_seg = sbi->blocks_per_seg; + diff --git a/queue-4.9/f2fs-enhance-sanity_check_raw_super-to-avoid-potential-overflow.patch b/queue-4.9/f2fs-enhance-sanity_check_raw_super-to-avoid-potential-overflow.patch new file mode 100644 index 00000000000..2e5024f19ec --- /dev/null +++ b/queue-4.9/f2fs-enhance-sanity_check_raw_super-to-avoid-potential-overflow.patch @@ -0,0 +1,179 @@ +From foo@baz Thu Dec 6 15:27:31 CET 2018 +From: Jaegeuk Kim +Date: Fri, 27 Apr 2018 19:03:22 -0700 +Subject: f2fs: enhance sanity_check_raw_super() to avoid potential overflow + +From: Jaegeuk Kim + +commit 0cfe75c5b011994651a4ca6d74f20aa997bfc69a upstream. + +In order to avoid the below overflow issue, we should have checked the +boundaries in superblock before reaching out to allocation. As Linus suggested, +the right place should be sanity_check_raw_super(). + +Dr Silvio Cesare of InfoSect reported: + +There are integer overflows with using the cp_payload superblock field in the +f2fs filesystem potentially leading to memory corruption. + +include/linux/f2fs_fs.h + +struct f2fs_super_block { +... + __le32 cp_payload; + +fs/f2fs/f2fs.h + +typedef u32 block_t; /* + * should not change u32, since it is the on-disk block + * address format, __le32. + */ +... + +static inline block_t __cp_payload(struct f2fs_sb_info *sbi) +{ + return le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_payload); +} + +fs/f2fs/checkpoint.c + + block_t start_blk, orphan_blocks, i, j; +... + start_blk = __start_cp_addr(sbi) + 1 + __cp_payload(sbi); + orphan_blocks = __start_sum_addr(sbi) - 1 - __cp_payload(sbi); + ++++ integer overflows + +... + unsigned int cp_blks = 1 + __cp_payload(sbi); +... + sbi->ckpt = kzalloc(cp_blks * blk_size, GFP_KERNEL); + ++++ integer overflow leading to incorrect heap allocation. + + int cp_payload_blks = __cp_payload(sbi); +... + ckpt->cp_pack_start_sum = cpu_to_le32(1 + cp_payload_blks + + orphan_blocks); + ++++ sign bug and integer overflow + +... + for (i = 1; i < 1 + cp_payload_blks; i++) + ++++ integer overflow + +... + + sbi->max_orphans = (sbi->blocks_per_seg - F2FS_CP_PACKS - + NR_CURSEG_TYPE - __cp_payload(sbi)) * + F2FS_ORPHANS_PER_BLOCK; + ++++ integer overflow + +Reported-by: Greg KH +Reported-by: Silvio Cesare +Suggested-by: Linus Torvalds +Reviewed-by: Chao Yu +Signed-off-by: Jaegeuk Kim +[bwh: Backported to 4.9: No hot file extension support] +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + fs/f2fs/super.c | 71 ++++++++++++++++++++++++++++++++++++++++++++++++++------ + 1 file changed, 64 insertions(+), 7 deletions(-) + +--- a/fs/f2fs/super.c ++++ b/fs/f2fs/super.c +@@ -1337,6 +1337,8 @@ static inline bool sanity_check_area_bou + static int sanity_check_raw_super(struct f2fs_sb_info *sbi, + struct buffer_head *bh) + { ++ block_t segment_count, segs_per_sec, secs_per_zone; ++ block_t total_sections, blocks_per_seg; + struct f2fs_super_block *raw_super = (struct f2fs_super_block *) + (bh->b_data + F2FS_SUPER_OFFSET); + struct super_block *sb = sbi->sb; +@@ -1393,6 +1395,68 @@ static int sanity_check_raw_super(struct + return 1; + } + ++ segment_count = le32_to_cpu(raw_super->segment_count); ++ segs_per_sec = le32_to_cpu(raw_super->segs_per_sec); ++ secs_per_zone = le32_to_cpu(raw_super->secs_per_zone); ++ total_sections = le32_to_cpu(raw_super->section_count); ++ ++ /* blocks_per_seg should be 512, given the above check */ ++ blocks_per_seg = 1 << le32_to_cpu(raw_super->log_blocks_per_seg); ++ ++ if (segment_count > F2FS_MAX_SEGMENT || ++ segment_count < F2FS_MIN_SEGMENTS) { ++ f2fs_msg(sb, KERN_INFO, ++ "Invalid segment count (%u)", ++ segment_count); ++ return 1; ++ } ++ ++ if (total_sections > segment_count || ++ total_sections < F2FS_MIN_SEGMENTS || ++ segs_per_sec > segment_count || !segs_per_sec) { ++ f2fs_msg(sb, KERN_INFO, ++ "Invalid segment/section count (%u, %u x %u)", ++ segment_count, total_sections, segs_per_sec); ++ return 1; ++ } ++ ++ if ((segment_count / segs_per_sec) < total_sections) { ++ f2fs_msg(sb, KERN_INFO, ++ "Small segment_count (%u < %u * %u)", ++ segment_count, segs_per_sec, total_sections); ++ return 1; ++ } ++ ++ if (segment_count > (le32_to_cpu(raw_super->block_count) >> 9)) { ++ f2fs_msg(sb, KERN_INFO, ++ "Wrong segment_count / block_count (%u > %u)", ++ segment_count, le32_to_cpu(raw_super->block_count)); ++ return 1; ++ } ++ ++ if (secs_per_zone > total_sections) { ++ f2fs_msg(sb, KERN_INFO, ++ "Wrong secs_per_zone (%u > %u)", ++ secs_per_zone, total_sections); ++ return 1; ++ } ++ if (le32_to_cpu(raw_super->extension_count) > F2FS_MAX_EXTENSION) { ++ f2fs_msg(sb, KERN_INFO, ++ "Corrupted extension count (%u > %u)", ++ le32_to_cpu(raw_super->extension_count), ++ F2FS_MAX_EXTENSION); ++ return 1; ++ } ++ ++ if (le32_to_cpu(raw_super->cp_payload) > ++ (blocks_per_seg - F2FS_CP_PACKS)) { ++ f2fs_msg(sb, KERN_INFO, ++ "Insane cp_payload (%u > %u)", ++ le32_to_cpu(raw_super->cp_payload), ++ blocks_per_seg - F2FS_CP_PACKS); ++ return 1; ++ } ++ + /* check reserved ino info */ + if (le32_to_cpu(raw_super->node_ino) != 1 || + le32_to_cpu(raw_super->meta_ino) != 2 || +@@ -1405,13 +1469,6 @@ static int sanity_check_raw_super(struct + return 1; + } + +- if (le32_to_cpu(raw_super->segment_count) > F2FS_MAX_SEGMENT) { +- f2fs_msg(sb, KERN_INFO, +- "Invalid segment count (%u)", +- le32_to_cpu(raw_super->segment_count)); +- return 1; +- } +- + /* check CP/SIT/NAT/SSA/MAIN_AREA area boundary */ + if (sanity_check_area_boundary(sbi, bh)) + return 1; diff --git a/queue-4.9/f2fs-fix-a-panic-caused-by-null-flush_cmd_control.patch b/queue-4.9/f2fs-fix-a-panic-caused-by-null-flush_cmd_control.patch new file mode 100644 index 00000000000..fdf85390e1c --- /dev/null +++ b/queue-4.9/f2fs-fix-a-panic-caused-by-null-flush_cmd_control.patch @@ -0,0 +1,48 @@ +From foo@baz Thu Dec 6 15:27:31 CET 2018 +From: Yunlei He +Date: Thu, 1 Jun 2017 16:43:51 +0800 +Subject: f2fs: fix a panic caused by NULL flush_cmd_control + +From: Yunlei He + +commit d4fdf8ba0e5808ba9ad6b44337783bd9935e0982 upstream. + +Mount fs with option noflush_merge, boot failed for illegal address +fcc in function f2fs_issue_flush: + + if (!test_opt(sbi, FLUSH_MERGE)) { + ret = submit_flush_wait(sbi); + atomic_inc(&fcc->issued_flush); -> Here, fcc illegal + return ret; + } + +Signed-off-by: Yunlei He +Signed-off-by: Jaegeuk Kim +[bwh: Backported to 4.9: adjust context] +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + fs/f2fs/segment.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +--- a/fs/f2fs/segment.c ++++ b/fs/f2fs/segment.c +@@ -493,6 +493,9 @@ int create_flush_cmd_control(struct f2fs + init_waitqueue_head(&fcc->flush_wait_queue); + init_llist_head(&fcc->issue_list); + SM_I(sbi)->cmd_control_info = fcc; ++ if (!test_opt(sbi, FLUSH_MERGE)) ++ return err; ++ + fcc->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi, + "f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev)); + if (IS_ERR(fcc->f2fs_issue_flush)) { +@@ -2539,7 +2542,7 @@ int build_segment_manager(struct f2fs_sb + + INIT_LIST_HEAD(&sm_info->sit_entry_set); + +- if (test_opt(sbi, FLUSH_MERGE) && !f2fs_readonly(sbi->sb)) { ++ if (!f2fs_readonly(sbi->sb)) { + err = create_flush_cmd_control(sbi); + if (err) + return err; diff --git a/queue-4.9/f2fs-fix-missing-up_read.patch b/queue-4.9/f2fs-fix-missing-up_read.patch new file mode 100644 index 00000000000..d6f3f702898 --- /dev/null +++ b/queue-4.9/f2fs-fix-missing-up_read.patch @@ -0,0 +1,34 @@ +From foo@baz Thu Dec 6 15:27:31 CET 2018 +From: Jaegeuk Kim +Date: Thu, 27 Sep 2018 22:15:31 -0700 +Subject: f2fs: fix missing up_read + +From: Jaegeuk Kim + +commit 89d13c38501df730cbb2e02c4499da1b5187119d upstream. + +This patch fixes missing up_read call. + +Fixes: c9b60788fc76 ("f2fs: fix to do sanity check with block address in main area") +Reviewed-by: Chao Yu +Signed-off-by: Jaegeuk Kim +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + fs/f2fs/node.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/fs/f2fs/node.c ++++ b/fs/f2fs/node.c +@@ -1606,8 +1606,10 @@ static int f2fs_write_node_page(struct p + } + + if (__is_valid_data_blkaddr(ni.blk_addr) && +- !f2fs_is_valid_blkaddr(sbi, ni.blk_addr, DATA_GENERIC)) ++ !f2fs_is_valid_blkaddr(sbi, ni.blk_addr, DATA_GENERIC)) { ++ up_read(&sbi->node_write); + goto redirty_out; ++ } + + set_page_writeback(page); + fio.old_blkaddr = ni.blk_addr; diff --git a/queue-4.9/f2fs-fix-race-condition-in-between-free-nid-allocator-initializer.patch b/queue-4.9/f2fs-fix-race-condition-in-between-free-nid-allocator-initializer.patch new file mode 100644 index 00000000000..1a5233d309f --- /dev/null +++ b/queue-4.9/f2fs-fix-race-condition-in-between-free-nid-allocator-initializer.patch @@ -0,0 +1,137 @@ +From foo@baz Thu Dec 6 15:27:31 CET 2018 +From: Chao Yu +Date: Wed, 22 Mar 2017 14:45:05 +0800 +Subject: f2fs: fix race condition in between free nid allocator/initializer + +From: Chao Yu + +commit 30a61ddf8117c26ac5b295e1233eaa9629a94ca3 upstream. + +In below concurrent case, allocated nid can be loaded into free nid cache +and be allocated again. + +Thread A Thread B +- f2fs_create + - f2fs_new_inode + - alloc_nid + - __insert_nid_to_list(ALLOC_NID_LIST) + - f2fs_balance_fs_bg + - build_free_nids + - __build_free_nids + - scan_nat_page + - add_free_nid + - __lookup_nat_cache + - f2fs_add_link + - init_inode_metadata + - new_inode_page + - new_node_page + - set_node_addr + - alloc_nid_done + - __remove_nid_from_list(ALLOC_NID_LIST) + - __insert_nid_to_list(FREE_NID_LIST) + +This patch makes nat cache lookup and free nid list operation being atomical +to avoid this race condition. + +Signed-off-by: Jaegeuk Kim +Signed-off-by: Chao Yu +Signed-off-by: Jaegeuk Kim +[bwh: Backported to 4.9: + - add_free_nid() returns 0 in case of any error (except low memory) + - Tree/list addition has not been moved into __insert_nid_to_list()] +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + fs/f2fs/node.c | 62 +++++++++++++++++++++++++++++++++++++++------------------ + 1 file changed, 43 insertions(+), 19 deletions(-) + +--- a/fs/f2fs/node.c ++++ b/fs/f2fs/node.c +@@ -1704,8 +1704,9 @@ static void __del_from_free_nid_list(str + static int add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build) + { + struct f2fs_nm_info *nm_i = NM_I(sbi); +- struct free_nid *i; ++ struct free_nid *i, *e; + struct nat_entry *ne; ++ int err = -EINVAL; + + if (!available_free_memory(sbi, FREE_NIDS)) + return -1; +@@ -1714,35 +1715,58 @@ static int add_free_nid(struct f2fs_sb_i + if (unlikely(nid == 0)) + return 0; + +- if (build) { +- /* do not add allocated nids */ +- ne = __lookup_nat_cache(nm_i, nid); +- if (ne && (!get_nat_flag(ne, IS_CHECKPOINTED) || +- nat_get_blkaddr(ne) != NULL_ADDR)) +- return 0; +- } +- + i = f2fs_kmem_cache_alloc(free_nid_slab, GFP_NOFS); + i->nid = nid; + i->state = NID_NEW; + +- if (radix_tree_preload(GFP_NOFS)) { +- kmem_cache_free(free_nid_slab, i); +- return 0; +- } ++ if (radix_tree_preload(GFP_NOFS)) ++ goto err; + + spin_lock(&nm_i->free_nid_list_lock); +- if (radix_tree_insert(&nm_i->free_nid_root, i->nid, i)) { +- spin_unlock(&nm_i->free_nid_list_lock); +- radix_tree_preload_end(); +- kmem_cache_free(free_nid_slab, i); +- return 0; ++ ++ if (build) { ++ /* ++ * Thread A Thread B ++ * - f2fs_create ++ * - f2fs_new_inode ++ * - alloc_nid ++ * - __insert_nid_to_list(ALLOC_NID_LIST) ++ * - f2fs_balance_fs_bg ++ * - build_free_nids ++ * - __build_free_nids ++ * - scan_nat_page ++ * - add_free_nid ++ * - __lookup_nat_cache ++ * - f2fs_add_link ++ * - init_inode_metadata ++ * - new_inode_page ++ * - new_node_page ++ * - set_node_addr ++ * - alloc_nid_done ++ * - __remove_nid_from_list(ALLOC_NID_LIST) ++ * - __insert_nid_to_list(FREE_NID_LIST) ++ */ ++ ne = __lookup_nat_cache(nm_i, nid); ++ if (ne && (!get_nat_flag(ne, IS_CHECKPOINTED) || ++ nat_get_blkaddr(ne) != NULL_ADDR)) ++ goto err_out; ++ ++ e = __lookup_free_nid_list(nm_i, nid); ++ if (e) ++ goto err_out; + } ++ if (radix_tree_insert(&nm_i->free_nid_root, i->nid, i)) ++ goto err_out; ++ err = 0; + list_add_tail(&i->list, &nm_i->free_nid_list); + nm_i->fcnt++; ++err_out: + spin_unlock(&nm_i->free_nid_list_lock); + radix_tree_preload_end(); +- return 1; ++err: ++ if (err) ++ kmem_cache_free(free_nid_slab, i); ++ return !err; + } + + static void remove_free_nid(struct f2fs_nm_info *nm_i, nid_t nid) diff --git a/queue-4.9/f2fs-fix-to-do-sanity-check-with-block-address-in-main-area-v2.patch b/queue-4.9/f2fs-fix-to-do-sanity-check-with-block-address-in-main-area-v2.patch new file mode 100644 index 00000000000..abac309929e --- /dev/null +++ b/queue-4.9/f2fs-fix-to-do-sanity-check-with-block-address-in-main-area-v2.patch @@ -0,0 +1,365 @@ +From foo@baz Thu Dec 6 15:27:31 CET 2018 +From: Chao Yu +Date: Tue, 10 Jul 2018 23:01:45 +0800 +Subject: f2fs: fix to do sanity check with block address in main area v2 + +From: Chao Yu + +commit 91291e9998d208370eb8156c760691b873bd7522 upstream. + +This patch adds f2fs_is_valid_blkaddr() in below functions to do sanity +check with block address to avoid pentential panic: +- f2fs_grab_read_bio() +- __written_first_block() + +https://bugzilla.kernel.org/show_bug.cgi?id=200465 + +- Reproduce + +- POC (poc.c) + #define _GNU_SOURCE + #include + #include + #include + #include + #include + + #include + #include + #include + #include + #include + #include + #include + #include + + #include + #include + + static void activity(char *mpoint) { + + char *xattr; + int err; + + err = asprintf(&xattr, "%s/foo/bar/xattr", mpoint); + + char buf2[113]; + memset(buf2, 0, sizeof(buf2)); + listxattr(xattr, buf2, sizeof(buf2)); + + } + + int main(int argc, char *argv[]) { + activity(argv[1]); + return 0; + } + +- kernel message +[ 844.718738] F2FS-fs (loop0): Mounted with checkpoint version = 2 +[ 846.430929] F2FS-fs (loop0): access invalid blkaddr:1024 +[ 846.431058] WARNING: CPU: 1 PID: 1249 at fs/f2fs/checkpoint.c:154 f2fs_is_valid_blkaddr+0x10f/0x160 +[ 846.431059] Modules linked in: snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hda_core snd_hwdep snd_pcm snd_timer snd input_leds joydev soundcore serio_raw i2c_piix4 mac_hid ib_iser rdma_cm iw_cm ib_cm ib_core configfs iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi autofs4 raid10 raid456 libcrc32c async_raid6_recov async_memcpy async_pq async_xor xor async_tx raid6_pq raid1 raid0 multipath linear qxl ttm crct10dif_pclmul crc32_pclmul drm_kms_helper ghash_clmulni_intel syscopyarea sysfillrect sysimgblt fb_sys_fops pcbc drm 8139too aesni_intel 8139cp floppy psmouse mii aes_x86_64 crypto_simd pata_acpi cryptd glue_helper +[ 846.431310] CPU: 1 PID: 1249 Comm: a.out Not tainted 4.18.0-rc3+ #1 +[ 846.431312] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 +[ 846.431315] RIP: 0010:f2fs_is_valid_blkaddr+0x10f/0x160 +[ 846.431316] Code: 00 eb ed 31 c0 83 fa 05 75 ae 48 83 ec 08 48 8b 3f 89 f1 48 c7 c2 fc 0b 0f 8b 48 c7 c6 8b d7 09 8b 88 44 24 07 e8 61 8b ff ff <0f> 0b 0f b6 44 24 07 48 83 c4 08 eb 81 4c 8b 47 10 8b 8f 38 04 00 +[ 846.431347] RSP: 0018:ffff961c414a7bc0 EFLAGS: 00010282 +[ 846.431349] RAX: 0000000000000000 RBX: ffffc5f787b8ea80 RCX: 0000000000000000 +[ 846.431350] RDX: 0000000000000000 RSI: ffff89dfffd165d8 RDI: ffff89dfffd165d8 +[ 846.431351] RBP: ffff961c414a7c20 R08: 0000000000000001 R09: 0000000000000248 +[ 846.431353] R10: 0000000000000000 R11: 0000000000000248 R12: 0000000000000007 +[ 846.431369] R13: ffff89dff5492800 R14: ffff89dfae3aa000 R15: ffff89dff4ff88d0 +[ 846.431372] FS: 00007f882e2fb700(0000) GS:ffff89dfffd00000(0000) knlGS:0000000000000000 +[ 846.431373] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +[ 846.431374] CR2: 0000000001a88008 CR3: 00000001eb572000 CR4: 00000000000006e0 +[ 846.431384] Call Trace: +[ 846.431426] f2fs_iget+0x6f4/0xe70 +[ 846.431430] ? f2fs_find_entry+0x71/0x90 +[ 846.431432] f2fs_lookup+0x1aa/0x390 +[ 846.431452] __lookup_slow+0x97/0x150 +[ 846.431459] lookup_slow+0x35/0x50 +[ 846.431462] walk_component+0x1c6/0x470 +[ 846.431479] ? memcg_kmem_charge_memcg+0x70/0x90 +[ 846.431488] ? page_add_file_rmap+0x13/0x200 +[ 846.431491] path_lookupat+0x76/0x230 +[ 846.431501] ? __alloc_pages_nodemask+0xfc/0x280 +[ 846.431504] filename_lookup+0xb8/0x1a0 +[ 846.431534] ? _cond_resched+0x16/0x40 +[ 846.431541] ? kmem_cache_alloc+0x160/0x1d0 +[ 846.431549] ? path_listxattr+0x41/0xa0 +[ 846.431551] path_listxattr+0x41/0xa0 +[ 846.431570] do_syscall_64+0x55/0x100 +[ 846.431583] entry_SYSCALL_64_after_hwframe+0x44/0xa9 +[ 846.431607] RIP: 0033:0x7f882de1c0d7 +[ 846.431607] Code: f0 ff ff 73 01 c3 48 8b 0d be dd 2b 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 b8 c2 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 91 dd 2b 00 f7 d8 64 89 01 48 +[ 846.431639] RSP: 002b:00007ffe8e66c238 EFLAGS: 00000202 ORIG_RAX: 00000000000000c2 +[ 846.431641] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f882de1c0d7 +[ 846.431642] RDX: 0000000000000071 RSI: 00007ffe8e66c280 RDI: 0000000001a880c0 +[ 846.431643] RBP: 00007ffe8e66c300 R08: 0000000001a88010 R09: 0000000000000000 +[ 846.431645] R10: 00000000000001ab R11: 0000000000000202 R12: 0000000000400550 +[ 846.431646] R13: 00007ffe8e66c400 R14: 0000000000000000 R15: 0000000000000000 +[ 846.431648] ---[ end trace abca54df39d14f5c ]--- +[ 846.431651] F2FS-fs (loop0): invalid blkaddr: 1024, type: 5, run fsck to fix. +[ 846.431762] WARNING: CPU: 1 PID: 1249 at fs/f2fs/f2fs.h:2697 f2fs_iget+0xd17/0xe70 +[ 846.431763] Modules linked in: snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hda_core snd_hwdep snd_pcm snd_timer snd input_leds joydev soundcore serio_raw i2c_piix4 mac_hid ib_iser rdma_cm iw_cm ib_cm ib_core configfs iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi autofs4 raid10 raid456 libcrc32c async_raid6_recov async_memcpy async_pq async_xor xor async_tx raid6_pq raid1 raid0 multipath linear qxl ttm crct10dif_pclmul crc32_pclmul drm_kms_helper ghash_clmulni_intel syscopyarea sysfillrect sysimgblt fb_sys_fops pcbc drm 8139too aesni_intel 8139cp floppy psmouse mii aes_x86_64 crypto_simd pata_acpi cryptd glue_helper +[ 846.431797] CPU: 1 PID: 1249 Comm: a.out Tainted: G W 4.18.0-rc3+ #1 +[ 846.431798] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 +[ 846.431800] RIP: 0010:f2fs_iget+0xd17/0xe70 +[ 846.431801] Code: ff ff 48 63 d8 e9 e1 f6 ff ff 48 8b 45 c8 41 b8 05 00 00 00 48 c7 c2 d8 e8 0e 8b 48 c7 c6 1d b0 0a 8b 48 8b 38 e8 f9 b4 00 00 <0f> 0b 48 8b 45 c8 f0 80 48 48 04 e9 d8 f9 ff ff 0f 0b 48 8b 43 18 +[ 846.431832] RSP: 0018:ffff961c414a7bd0 EFLAGS: 00010282 +[ 846.431834] RAX: 0000000000000000 RBX: ffffc5f787b8ea80 RCX: 0000000000000006 +[ 846.431835] RDX: 0000000000000000 RSI: 0000000000000096 RDI: ffff89dfffd165d0 +[ 846.431836] RBP: ffff961c414a7c20 R08: 0000000000000000 R09: 0000000000000273 +[ 846.431837] R10: 0000000000000000 R11: ffff89dfad50ca60 R12: 0000000000000007 +[ 846.431838] R13: ffff89dff5492800 R14: ffff89dfae3aa000 R15: ffff89dff4ff88d0 +[ 846.431840] FS: 00007f882e2fb700(0000) GS:ffff89dfffd00000(0000) knlGS:0000000000000000 +[ 846.431841] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +[ 846.431842] CR2: 0000000001a88008 CR3: 00000001eb572000 CR4: 00000000000006e0 +[ 846.431846] Call Trace: +[ 846.431850] ? f2fs_find_entry+0x71/0x90 +[ 846.431853] f2fs_lookup+0x1aa/0x390 +[ 846.431856] __lookup_slow+0x97/0x150 +[ 846.431858] lookup_slow+0x35/0x50 +[ 846.431874] walk_component+0x1c6/0x470 +[ 846.431878] ? memcg_kmem_charge_memcg+0x70/0x90 +[ 846.431880] ? page_add_file_rmap+0x13/0x200 +[ 846.431882] path_lookupat+0x76/0x230 +[ 846.431884] ? __alloc_pages_nodemask+0xfc/0x280 +[ 846.431886] filename_lookup+0xb8/0x1a0 +[ 846.431890] ? _cond_resched+0x16/0x40 +[ 846.431891] ? kmem_cache_alloc+0x160/0x1d0 +[ 846.431894] ? path_listxattr+0x41/0xa0 +[ 846.431896] path_listxattr+0x41/0xa0 +[ 846.431898] do_syscall_64+0x55/0x100 +[ 846.431901] entry_SYSCALL_64_after_hwframe+0x44/0xa9 +[ 846.431902] RIP: 0033:0x7f882de1c0d7 +[ 846.431903] Code: f0 ff ff 73 01 c3 48 8b 0d be dd 2b 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 b8 c2 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 91 dd 2b 00 f7 d8 64 89 01 48 +[ 846.431934] RSP: 002b:00007ffe8e66c238 EFLAGS: 00000202 ORIG_RAX: 00000000000000c2 +[ 846.431936] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f882de1c0d7 +[ 846.431937] RDX: 0000000000000071 RSI: 00007ffe8e66c280 RDI: 0000000001a880c0 +[ 846.431939] RBP: 00007ffe8e66c300 R08: 0000000001a88010 R09: 0000000000000000 +[ 846.431940] R10: 00000000000001ab R11: 0000000000000202 R12: 0000000000400550 +[ 846.431941] R13: 00007ffe8e66c400 R14: 0000000000000000 R15: 0000000000000000 +[ 846.431943] ---[ end trace abca54df39d14f5d ]--- +[ 846.432033] F2FS-fs (loop0): access invalid blkaddr:1024 +[ 846.432051] WARNING: CPU: 1 PID: 1249 at fs/f2fs/checkpoint.c:154 f2fs_is_valid_blkaddr+0x10f/0x160 +[ 846.432051] Modules linked in: snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hda_core snd_hwdep snd_pcm snd_timer snd input_leds joydev soundcore serio_raw i2c_piix4 mac_hid ib_iser rdma_cm iw_cm ib_cm ib_core configfs iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi autofs4 raid10 raid456 libcrc32c async_raid6_recov async_memcpy async_pq async_xor xor async_tx raid6_pq raid1 raid0 multipath linear qxl ttm crct10dif_pclmul crc32_pclmul drm_kms_helper ghash_clmulni_intel syscopyarea sysfillrect sysimgblt fb_sys_fops pcbc drm 8139too aesni_intel 8139cp floppy psmouse mii aes_x86_64 crypto_simd pata_acpi cryptd glue_helper +[ 846.432085] CPU: 1 PID: 1249 Comm: a.out Tainted: G W 4.18.0-rc3+ #1 +[ 846.432086] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 +[ 846.432089] RIP: 0010:f2fs_is_valid_blkaddr+0x10f/0x160 +[ 846.432089] Code: 00 eb ed 31 c0 83 fa 05 75 ae 48 83 ec 08 48 8b 3f 89 f1 48 c7 c2 fc 0b 0f 8b 48 c7 c6 8b d7 09 8b 88 44 24 07 e8 61 8b ff ff <0f> 0b 0f b6 44 24 07 48 83 c4 08 eb 81 4c 8b 47 10 8b 8f 38 04 00 +[ 846.432120] RSP: 0018:ffff961c414a7900 EFLAGS: 00010286 +[ 846.432122] RAX: 0000000000000000 RBX: 0000000000000400 RCX: 0000000000000006 +[ 846.432123] RDX: 0000000000000000 RSI: 0000000000000096 RDI: ffff89dfffd165d0 +[ 846.432124] RBP: ffff89dff5492800 R08: 0000000000000001 R09: 000000000000029d +[ 846.432125] R10: ffff961c414a7820 R11: 000000000000029d R12: 0000000000000400 +[ 846.432126] R13: 0000000000000000 R14: ffff89dff4ff88d0 R15: 0000000000000000 +[ 846.432128] FS: 00007f882e2fb700(0000) GS:ffff89dfffd00000(0000) knlGS:0000000000000000 +[ 846.432130] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +[ 846.432131] CR2: 0000000001a88008 CR3: 00000001eb572000 CR4: 00000000000006e0 +[ 846.432135] Call Trace: +[ 846.432151] f2fs_wait_on_block_writeback+0x20/0x110 +[ 846.432158] f2fs_grab_read_bio+0xbc/0xe0 +[ 846.432161] f2fs_submit_page_read+0x21/0x280 +[ 846.432163] f2fs_get_read_data_page+0xb7/0x3c0 +[ 846.432165] f2fs_get_lock_data_page+0x29/0x1e0 +[ 846.432167] f2fs_get_new_data_page+0x148/0x550 +[ 846.432170] f2fs_add_regular_entry+0x1d2/0x550 +[ 846.432178] ? __switch_to+0x12f/0x460 +[ 846.432181] f2fs_add_dentry+0x6a/0xd0 +[ 846.432184] f2fs_do_add_link+0xe9/0x140 +[ 846.432186] __recover_dot_dentries+0x260/0x280 +[ 846.432189] f2fs_lookup+0x343/0x390 +[ 846.432193] __lookup_slow+0x97/0x150 +[ 846.432195] lookup_slow+0x35/0x50 +[ 846.432208] walk_component+0x1c6/0x470 +[ 846.432212] ? memcg_kmem_charge_memcg+0x70/0x90 +[ 846.432215] ? page_add_file_rmap+0x13/0x200 +[ 846.432217] path_lookupat+0x76/0x230 +[ 846.432219] ? __alloc_pages_nodemask+0xfc/0x280 +[ 846.432221] filename_lookup+0xb8/0x1a0 +[ 846.432224] ? _cond_resched+0x16/0x40 +[ 846.432226] ? kmem_cache_alloc+0x160/0x1d0 +[ 846.432228] ? path_listxattr+0x41/0xa0 +[ 846.432230] path_listxattr+0x41/0xa0 +[ 846.432233] do_syscall_64+0x55/0x100 +[ 846.432235] entry_SYSCALL_64_after_hwframe+0x44/0xa9 +[ 846.432237] RIP: 0033:0x7f882de1c0d7 +[ 846.432237] Code: f0 ff ff 73 01 c3 48 8b 0d be dd 2b 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 b8 c2 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 91 dd 2b 00 f7 d8 64 89 01 48 +[ 846.432269] RSP: 002b:00007ffe8e66c238 EFLAGS: 00000202 ORIG_RAX: 00000000000000c2 +[ 846.432271] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f882de1c0d7 +[ 846.432272] RDX: 0000000000000071 RSI: 00007ffe8e66c280 RDI: 0000000001a880c0 +[ 846.432273] RBP: 00007ffe8e66c300 R08: 0000000001a88010 R09: 0000000000000000 +[ 846.432274] R10: 00000000000001ab R11: 0000000000000202 R12: 0000000000400550 +[ 846.432275] R13: 00007ffe8e66c400 R14: 0000000000000000 R15: 0000000000000000 +[ 846.432277] ---[ end trace abca54df39d14f5e ]--- +[ 846.432279] F2FS-fs (loop0): invalid blkaddr: 1024, type: 5, run fsck to fix. +[ 846.432376] WARNING: CPU: 1 PID: 1249 at fs/f2fs/f2fs.h:2697 f2fs_wait_on_block_writeback+0xb1/0x110 +[ 846.432376] Modules linked in: snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hda_core snd_hwdep snd_pcm snd_timer snd input_leds joydev soundcore serio_raw i2c_piix4 mac_hid ib_iser rdma_cm iw_cm ib_cm ib_core configfs iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi autofs4 raid10 raid456 libcrc32c async_raid6_recov async_memcpy async_pq async_xor xor async_tx raid6_pq raid1 raid0 multipath linear qxl ttm crct10dif_pclmul crc32_pclmul drm_kms_helper ghash_clmulni_intel syscopyarea sysfillrect sysimgblt fb_sys_fops pcbc drm 8139too aesni_intel 8139cp floppy psmouse mii aes_x86_64 crypto_simd pata_acpi cryptd glue_helper +[ 846.432410] CPU: 1 PID: 1249 Comm: a.out Tainted: G W 4.18.0-rc3+ #1 +[ 846.432411] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 +[ 846.432413] RIP: 0010:f2fs_wait_on_block_writeback+0xb1/0x110 +[ 846.432414] Code: 66 90 f0 ff 4b 34 74 59 5b 5d c3 48 8b 7d 00 41 b8 05 00 00 00 89 d9 48 c7 c2 d8 e8 0e 8b 48 c7 c6 1d b0 0a 8b e8 df bc fd ff <0f> 0b f0 80 4d 48 04 e9 67 ff ff ff 48 8b 03 48 c1 e8 37 83 e0 07 +[ 846.432445] RSP: 0018:ffff961c414a7910 EFLAGS: 00010286 +[ 846.432447] RAX: 0000000000000000 RBX: 0000000000000400 RCX: 0000000000000006 +[ 846.432448] RDX: 0000000000000000 RSI: 0000000000000092 RDI: ffff89dfffd165d0 +[ 846.432449] RBP: ffff89dff5492800 R08: 0000000000000000 R09: 00000000000002d1 +[ 846.432450] R10: ffff961c414a7820 R11: ffff89dfad50cf80 R12: 0000000000000400 +[ 846.432451] R13: 0000000000000000 R14: ffff89dff4ff88d0 R15: 0000000000000000 +[ 846.432453] FS: 00007f882e2fb700(0000) GS:ffff89dfffd00000(0000) knlGS:0000000000000000 +[ 846.432454] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +[ 846.432455] CR2: 0000000001a88008 CR3: 00000001eb572000 CR4: 00000000000006e0 +[ 846.432459] Call Trace: +[ 846.432463] f2fs_grab_read_bio+0xbc/0xe0 +[ 846.432464] f2fs_submit_page_read+0x21/0x280 +[ 846.432466] f2fs_get_read_data_page+0xb7/0x3c0 +[ 846.432468] f2fs_get_lock_data_page+0x29/0x1e0 +[ 846.432470] f2fs_get_new_data_page+0x148/0x550 +[ 846.432473] f2fs_add_regular_entry+0x1d2/0x550 +[ 846.432475] ? __switch_to+0x12f/0x460 +[ 846.432477] f2fs_add_dentry+0x6a/0xd0 +[ 846.432480] f2fs_do_add_link+0xe9/0x140 +[ 846.432483] __recover_dot_dentries+0x260/0x280 +[ 846.432485] f2fs_lookup+0x343/0x390 +[ 846.432488] __lookup_slow+0x97/0x150 +[ 846.432490] lookup_slow+0x35/0x50 +[ 846.432505] walk_component+0x1c6/0x470 +[ 846.432509] ? memcg_kmem_charge_memcg+0x70/0x90 +[ 846.432511] ? page_add_file_rmap+0x13/0x200 +[ 846.432513] path_lookupat+0x76/0x230 +[ 846.432515] ? __alloc_pages_nodemask+0xfc/0x280 +[ 846.432517] filename_lookup+0xb8/0x1a0 +[ 846.432520] ? _cond_resched+0x16/0x40 +[ 846.432522] ? kmem_cache_alloc+0x160/0x1d0 +[ 846.432525] ? path_listxattr+0x41/0xa0 +[ 846.432526] path_listxattr+0x41/0xa0 +[ 846.432529] do_syscall_64+0x55/0x100 +[ 846.432531] entry_SYSCALL_64_after_hwframe+0x44/0xa9 +[ 846.432533] RIP: 0033:0x7f882de1c0d7 +[ 846.432533] Code: f0 ff ff 73 01 c3 48 8b 0d be dd 2b 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 b8 c2 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 91 dd 2b 00 f7 d8 64 89 01 48 +[ 846.432565] RSP: 002b:00007ffe8e66c238 EFLAGS: 00000202 ORIG_RAX: 00000000000000c2 +[ 846.432567] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f882de1c0d7 +[ 846.432568] RDX: 0000000000000071 RSI: 00007ffe8e66c280 RDI: 0000000001a880c0 +[ 846.432569] RBP: 00007ffe8e66c300 R08: 0000000001a88010 R09: 0000000000000000 +[ 846.432570] R10: 00000000000001ab R11: 0000000000000202 R12: 0000000000400550 +[ 846.432571] R13: 00007ffe8e66c400 R14: 0000000000000000 R15: 0000000000000000 +[ 846.432573] ---[ end trace abca54df39d14f5f ]--- +[ 846.434280] BUG: unable to handle kernel NULL pointer dereference at 0000000000000008 +[ 846.434424] PGD 80000001ebd3a067 P4D 80000001ebd3a067 PUD 1eb1ae067 PMD 0 +[ 846.434551] Oops: 0000 [#1] SMP PTI +[ 846.434697] CPU: 0 PID: 44 Comm: kworker/u5:0 Tainted: G W 4.18.0-rc3+ #1 +[ 846.434805] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 +[ 846.435000] Workqueue: fscrypt_read_queue decrypt_work +[ 846.435174] RIP: 0010:fscrypt_do_page_crypto+0x6e/0x2d0 +[ 846.435351] Code: 00 65 48 8b 04 25 28 00 00 00 48 89 84 24 88 00 00 00 31 c0 e8 43 c2 e0 ff 49 8b 86 48 02 00 00 85 ed c7 44 24 70 00 00 00 00 <48> 8b 58 08 0f 84 14 02 00 00 48 8b 78 10 48 8b 0c 24 48 c7 84 24 +[ 846.435696] RSP: 0018:ffff961c40f9bd60 EFLAGS: 00010206 +[ 846.435870] RAX: 0000000000000000 RBX: ffffc5f787719b80 RCX: ffffc5f787719b80 +[ 846.436051] RDX: ffffffff8b9f4b88 RSI: ffffffff8b0ae622 RDI: ffff961c40f9bdb8 +[ 846.436261] RBP: 0000000000001000 R08: ffffc5f787719b80 R09: 0000000000001000 +[ 846.436433] R10: 0000000000000018 R11: fefefefefefefeff R12: ffffc5f787719b80 +[ 846.436562] R13: ffffc5f787719b80 R14: ffff89dff4ff88d0 R15: 0ffff89dfaddee60 +[ 846.436658] FS: 0000000000000000(0000) GS:ffff89dfffc00000(0000) knlGS:0000000000000000 +[ 846.436758] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +[ 846.436898] CR2: 0000000000000008 CR3: 00000001eddd0000 CR4: 00000000000006f0 +[ 846.437001] Call Trace: +[ 846.437181] ? check_preempt_wakeup+0xf2/0x230 +[ 846.437276] ? check_preempt_curr+0x7c/0x90 +[ 846.437370] fscrypt_decrypt_page+0x48/0x4d +[ 846.437466] __fscrypt_decrypt_bio+0x5b/0x90 +[ 846.437542] decrypt_work+0x12/0x20 +[ 846.437651] process_one_work+0x15e/0x3d0 +[ 846.437740] worker_thread+0x4c/0x440 +[ 846.437848] kthread+0xf8/0x130 +[ 846.437938] ? rescuer_thread+0x350/0x350 +[ 846.438022] ? kthread_associate_blkcg+0x90/0x90 +[ 846.438117] ret_from_fork+0x35/0x40 +[ 846.438201] Modules linked in: snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hda_core snd_hwdep snd_pcm snd_timer snd input_leds joydev soundcore serio_raw i2c_piix4 mac_hid ib_iser rdma_cm iw_cm ib_cm ib_core configfs iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi autofs4 raid10 raid456 libcrc32c async_raid6_recov async_memcpy async_pq async_xor xor async_tx raid6_pq raid1 raid0 multipath linear qxl ttm crct10dif_pclmul crc32_pclmul drm_kms_helper ghash_clmulni_intel syscopyarea sysfillrect sysimgblt fb_sys_fops pcbc drm 8139too aesni_intel 8139cp floppy psmouse mii aes_x86_64 crypto_simd pata_acpi cryptd glue_helper +[ 846.438653] CR2: 0000000000000008 +[ 846.438713] ---[ end trace abca54df39d14f60 ]--- +[ 846.438796] RIP: 0010:fscrypt_do_page_crypto+0x6e/0x2d0 +[ 846.438844] Code: 00 65 48 8b 04 25 28 00 00 00 48 89 84 24 88 00 00 00 31 c0 e8 43 c2 e0 ff 49 8b 86 48 02 00 00 85 ed c7 44 24 70 00 00 00 00 <48> 8b 58 08 0f 84 14 02 00 00 48 8b 78 10 48 8b 0c 24 48 c7 84 24 +[ 846.439084] RSP: 0018:ffff961c40f9bd60 EFLAGS: 00010206 +[ 846.439176] RAX: 0000000000000000 RBX: ffffc5f787719b80 RCX: ffffc5f787719b80 +[ 846.440927] RDX: ffffffff8b9f4b88 RSI: ffffffff8b0ae622 RDI: ffff961c40f9bdb8 +[ 846.442083] RBP: 0000000000001000 R08: ffffc5f787719b80 R09: 0000000000001000 +[ 846.443284] R10: 0000000000000018 R11: fefefefefefefeff R12: ffffc5f787719b80 +[ 846.444448] R13: ffffc5f787719b80 R14: ffff89dff4ff88d0 R15: 0ffff89dfaddee60 +[ 846.445558] FS: 0000000000000000(0000) GS:ffff89dfffc00000(0000) knlGS:0000000000000000 +[ 846.446687] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +[ 846.447796] CR2: 0000000000000008 CR3: 00000001eddd0000 CR4: 00000000000006f0 + +- Location +https://elixir.bootlin.com/linux/v4.18-rc4/source/fs/crypto/crypto.c#L149 + struct crypto_skcipher *tfm = ci->ci_ctfm; +Here ci can be NULL + +Note that this issue maybe require CONFIG_F2FS_FS_ENCRYPTION=y to reproduce. + +Reported-by Wen Xu +Signed-off-by: Chao Yu +Signed-off-by: Jaegeuk Kim +[bwh: Backported to 4.9: adjust context] +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + fs/f2fs/data.c | 3 +++ + fs/f2fs/inode.c | 18 +++++++++++++----- + 2 files changed, 16 insertions(+), 5 deletions(-) + +--- a/fs/f2fs/data.c ++++ b/fs/f2fs/data.c +@@ -995,6 +995,9 @@ static struct bio *f2fs_grab_bio(struct + struct block_device *bdev = sbi->sb->s_bdev; + struct bio *bio; + ++ if (!f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC)) ++ return ERR_PTR(-EFAULT); ++ + if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) { + ctx = fscrypt_get_ctx(inode, GFP_NOFS); + if (IS_ERR(ctx)) +--- a/fs/f2fs/inode.c ++++ b/fs/f2fs/inode.c +@@ -59,14 +59,16 @@ static void __get_inode_rdev(struct inod + } + } + +-static bool __written_first_block(struct f2fs_sb_info *sbi, ++static int __written_first_block(struct f2fs_sb_info *sbi, + struct f2fs_inode *ri) + { + block_t addr = le32_to_cpu(ri->i_addr[0]); + +- if (is_valid_data_blkaddr(sbi, addr)) +- return true; +- return false; ++ if (!__is_valid_data_blkaddr(addr)) ++ return 1; ++ if (!f2fs_is_valid_blkaddr(sbi, addr, DATA_GENERIC)) ++ return -EFAULT; ++ return 0; + } + + static void __set_inode_rdev(struct inode *inode, struct f2fs_inode *ri) +@@ -154,6 +156,7 @@ static int do_read_inode(struct inode *i + struct f2fs_inode_info *fi = F2FS_I(inode); + struct page *node_page; + struct f2fs_inode *ri; ++ int err; + + /* Check if ino is within scope */ + if (check_nid_range(sbi, inode->i_ino)) { +@@ -209,7 +212,12 @@ static int do_read_inode(struct inode *i + /* get rdev by using inline_info */ + __get_inode_rdev(inode, ri); + +- if (__written_first_block(sbi, ri)) ++ err = __written_first_block(sbi, ri); ++ if (err < 0) { ++ f2fs_put_page(node_page, 1); ++ return err; ++ } ++ if (!err) + set_inode_flag(inode, FI_FIRST_BLOCK_WRITTEN); + + if (!need_inode_block_update(sbi, inode->i_ino)) diff --git a/queue-4.9/f2fs-fix-to-do-sanity-check-with-block-address-in-main-area.patch b/queue-4.9/f2fs-fix-to-do-sanity-check-with-block-address-in-main-area.patch new file mode 100644 index 00000000000..7c56989d0e0 --- /dev/null +++ b/queue-4.9/f2fs-fix-to-do-sanity-check-with-block-address-in-main-area.patch @@ -0,0 +1,489 @@ +From foo@baz Thu Dec 6 15:27:31 CET 2018 +From: Chao Yu +Date: Wed, 1 Aug 2018 19:13:44 +0800 +Subject: f2fs: fix to do sanity check with block address in main area + +From: Chao Yu + +commit c9b60788fc760d136211853f10ce73dc152d1f4a upstream. + +This patch add to do sanity check with below field: +- cp_pack_total_block_count +- blkaddr of data/node +- extent info + +- Overview +BUG() in verify_block_addr() when writing to a corrupted f2fs image + +- Reproduce (4.18 upstream kernel) + +- POC (poc.c) + +static void activity(char *mpoint) { + + char *foo_bar_baz; + int err; + + static int buf[8192]; + memset(buf, 0, sizeof(buf)); + + err = asprintf(&foo_bar_baz, "%s/foo/bar/baz", mpoint); + + int fd = open(foo_bar_baz, O_RDWR | O_TRUNC, 0777); + if (fd >= 0) { + write(fd, (char *)buf, sizeof(buf)); + fdatasync(fd); + close(fd); + } +} + +int main(int argc, char *argv[]) { + activity(argv[1]); + return 0; +} + +- Kernel message +[ 689.349473] F2FS-fs (loop0): Mounted with checkpoint version = 3 +[ 699.728662] WARNING: CPU: 0 PID: 1309 at fs/f2fs/segment.c:2860 f2fs_inplace_write_data+0x232/0x240 +[ 699.728670] Modules linked in: snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hwdep snd_hda_core snd_pcm snd_timer snd mac_hid i2c_piix4 soundcore ib_iser rdma_cm iw_cm ib_cm ib_core iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx raid1 raid0 multipath linear 8139too crct10dif_pclmul crc32_pclmul qxl drm_kms_helper syscopyarea aesni_intel sysfillrect sysimgblt fb_sys_fops ttm drm aes_x86_64 crypto_simd cryptd 8139cp glue_helper mii pata_acpi floppy +[ 699.729056] CPU: 0 PID: 1309 Comm: a.out Not tainted 4.18.0-rc1+ #4 +[ 699.729064] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 +[ 699.729074] RIP: 0010:f2fs_inplace_write_data+0x232/0x240 +[ 699.729076] Code: ff e9 cf fe ff ff 49 8d 7d 10 e8 39 45 ad ff 4d 8b 7d 10 be 04 00 00 00 49 8d 7f 48 e8 07 49 ad ff 45 8b 7f 48 e9 fb fe ff ff <0f> 0b f0 41 80 4d 48 04 e9 65 fe ff ff 90 66 66 66 66 90 55 48 8d +[ 699.729130] RSP: 0018:ffff8801f43af568 EFLAGS: 00010202 +[ 699.729139] RAX: 000000000000003f RBX: ffff8801f43af7b8 RCX: ffffffffb88c9113 +[ 699.729142] RDX: 0000000000000003 RSI: dffffc0000000000 RDI: ffff8802024e5540 +[ 699.729144] RBP: ffff8801f43af590 R08: 0000000000000009 R09: ffffffffffffffe8 +[ 699.729147] R10: 0000000000000001 R11: ffffed0039b0596a R12: ffff8802024e5540 +[ 699.729149] R13: ffff8801f0335500 R14: ffff8801e3e7a700 R15: ffff8801e1ee4450 +[ 699.729154] FS: 00007f9bf97f5700(0000) GS:ffff8801f6e00000(0000) knlGS:0000000000000000 +[ 699.729156] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +[ 699.729159] CR2: 00007f9bf925d170 CR3: 00000001f0c34000 CR4: 00000000000006f0 +[ 699.729171] Call Trace: +[ 699.729192] f2fs_do_write_data_page+0x2e2/0xe00 +[ 699.729203] ? f2fs_should_update_outplace+0xd0/0xd0 +[ 699.729238] ? memcg_drain_all_list_lrus+0x280/0x280 +[ 699.729269] ? __radix_tree_replace+0xa3/0x120 +[ 699.729276] __write_data_page+0x5c7/0xe30 +[ 699.729291] ? kasan_check_read+0x11/0x20 +[ 699.729310] ? page_mapped+0x8a/0x110 +[ 699.729321] ? page_mkclean+0xe9/0x160 +[ 699.729327] ? f2fs_do_write_data_page+0xe00/0xe00 +[ 699.729331] ? invalid_page_referenced_vma+0x130/0x130 +[ 699.729345] ? clear_page_dirty_for_io+0x332/0x450 +[ 699.729351] f2fs_write_cache_pages+0x4ca/0x860 +[ 699.729358] ? __write_data_page+0xe30/0xe30 +[ 699.729374] ? percpu_counter_add_batch+0x22/0xa0 +[ 699.729380] ? kasan_check_write+0x14/0x20 +[ 699.729391] ? _raw_spin_lock+0x17/0x40 +[ 699.729403] ? f2fs_mark_inode_dirty_sync.part.18+0x16/0x30 +[ 699.729413] ? iov_iter_advance+0x113/0x640 +[ 699.729418] ? f2fs_write_end+0x133/0x2e0 +[ 699.729423] ? balance_dirty_pages_ratelimited+0x239/0x640 +[ 699.729428] f2fs_write_data_pages+0x329/0x520 +[ 699.729433] ? generic_perform_write+0x250/0x320 +[ 699.729438] ? f2fs_write_cache_pages+0x860/0x860 +[ 699.729454] ? current_time+0x110/0x110 +[ 699.729459] ? f2fs_preallocate_blocks+0x1ef/0x370 +[ 699.729464] do_writepages+0x37/0xb0 +[ 699.729468] ? f2fs_write_cache_pages+0x860/0x860 +[ 699.729472] ? do_writepages+0x37/0xb0 +[ 699.729478] __filemap_fdatawrite_range+0x19a/0x1f0 +[ 699.729483] ? delete_from_page_cache_batch+0x4e0/0x4e0 +[ 699.729496] ? __vfs_write+0x2b2/0x410 +[ 699.729501] file_write_and_wait_range+0x66/0xb0 +[ 699.729506] f2fs_do_sync_file+0x1f9/0xd90 +[ 699.729511] ? truncate_partial_data_page+0x290/0x290 +[ 699.729521] ? __sb_end_write+0x30/0x50 +[ 699.729526] ? vfs_write+0x20f/0x260 +[ 699.729530] f2fs_sync_file+0x9a/0xb0 +[ 699.729534] ? f2fs_do_sync_file+0xd90/0xd90 +[ 699.729548] vfs_fsync_range+0x68/0x100 +[ 699.729554] ? __fget_light+0xc9/0xe0 +[ 699.729558] do_fsync+0x3d/0x70 +[ 699.729562] __x64_sys_fdatasync+0x24/0x30 +[ 699.729585] do_syscall_64+0x78/0x170 +[ 699.729595] entry_SYSCALL_64_after_hwframe+0x44/0xa9 +[ 699.729613] RIP: 0033:0x7f9bf930d800 +[ 699.729615] Code: 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 83 3d 49 bf 2c 00 00 75 10 b8 4b 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 31 c3 48 83 ec 08 e8 be 78 01 00 48 89 04 24 +[ 699.729668] RSP: 002b:00007ffee3606c68 EFLAGS: 00000246 ORIG_RAX: 000000000000004b +[ 699.729673] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f9bf930d800 +[ 699.729675] RDX: 0000000000008000 RSI: 00000000006010a0 RDI: 0000000000000003 +[ 699.729678] RBP: 00007ffee3606ca0 R08: 0000000001503010 R09: 0000000000000000 +[ 699.729680] R10: 00000000000002e8 R11: 0000000000000246 R12: 0000000000400610 +[ 699.729683] R13: 00007ffee3606da0 R14: 0000000000000000 R15: 0000000000000000 +[ 699.729687] ---[ end trace 4ce02f25ff7d3df5 ]--- +[ 699.729782] ------------[ cut here ]------------ +[ 699.729785] kernel BUG at fs/f2fs/segment.h:654! +[ 699.731055] invalid opcode: 0000 [#1] SMP KASAN PTI +[ 699.732104] CPU: 0 PID: 1309 Comm: a.out Tainted: G W 4.18.0-rc1+ #4 +[ 699.733684] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 +[ 699.735611] RIP: 0010:f2fs_submit_page_bio+0x29b/0x730 +[ 699.736649] Code: 54 49 8d bd 18 04 00 00 e8 b2 59 af ff 41 8b 8d 18 04 00 00 8b 45 b8 41 d3 e6 44 01 f0 4c 8d 73 14 41 39 c7 0f 82 37 fe ff ff <0f> 0b 65 8b 05 2c 04 77 47 89 c0 48 0f a3 05 52 c1 d5 01 0f 92 c0 +[ 699.740524] RSP: 0018:ffff8801f43af508 EFLAGS: 00010283 +[ 699.741573] RAX: 0000000000000000 RBX: ffff8801f43af7b8 RCX: ffffffffb88a7cef +[ 699.743006] RDX: 0000000000000007 RSI: dffffc0000000000 RDI: ffff8801e3e7a64c +[ 699.744426] RBP: ffff8801f43af558 R08: ffffed003e066b55 R09: ffffed003e066b55 +[ 699.745833] R10: 0000000000000001 R11: ffffed003e066b54 R12: ffffea0007876940 +[ 699.747256] R13: ffff8801f0335500 R14: ffff8801e3e7a600 R15: 0000000000000001 +[ 699.748683] FS: 00007f9bf97f5700(0000) GS:ffff8801f6e00000(0000) knlGS:0000000000000000 +[ 699.750293] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +[ 699.751462] CR2: 00007f9bf925d170 CR3: 00000001f0c34000 CR4: 00000000000006f0 +[ 699.752874] Call Trace: +[ 699.753386] ? f2fs_inplace_write_data+0x93/0x240 +[ 699.754341] f2fs_inplace_write_data+0xd2/0x240 +[ 699.755271] f2fs_do_write_data_page+0x2e2/0xe00 +[ 699.756214] ? f2fs_should_update_outplace+0xd0/0xd0 +[ 699.757215] ? memcg_drain_all_list_lrus+0x280/0x280 +[ 699.758209] ? __radix_tree_replace+0xa3/0x120 +[ 699.759164] __write_data_page+0x5c7/0xe30 +[ 699.760002] ? kasan_check_read+0x11/0x20 +[ 699.760823] ? page_mapped+0x8a/0x110 +[ 699.761573] ? page_mkclean+0xe9/0x160 +[ 699.762345] ? f2fs_do_write_data_page+0xe00/0xe00 +[ 699.763332] ? invalid_page_referenced_vma+0x130/0x130 +[ 699.764374] ? clear_page_dirty_for_io+0x332/0x450 +[ 699.765347] f2fs_write_cache_pages+0x4ca/0x860 +[ 699.766276] ? __write_data_page+0xe30/0xe30 +[ 699.767161] ? percpu_counter_add_batch+0x22/0xa0 +[ 699.768112] ? kasan_check_write+0x14/0x20 +[ 699.768951] ? _raw_spin_lock+0x17/0x40 +[ 699.769739] ? f2fs_mark_inode_dirty_sync.part.18+0x16/0x30 +[ 699.770885] ? iov_iter_advance+0x113/0x640 +[ 699.771743] ? f2fs_write_end+0x133/0x2e0 +[ 699.772569] ? balance_dirty_pages_ratelimited+0x239/0x640 +[ 699.773680] f2fs_write_data_pages+0x329/0x520 +[ 699.774603] ? generic_perform_write+0x250/0x320 +[ 699.775544] ? f2fs_write_cache_pages+0x860/0x860 +[ 699.776510] ? current_time+0x110/0x110 +[ 699.777299] ? f2fs_preallocate_blocks+0x1ef/0x370 +[ 699.778279] do_writepages+0x37/0xb0 +[ 699.779026] ? f2fs_write_cache_pages+0x860/0x860 +[ 699.779978] ? do_writepages+0x37/0xb0 +[ 699.780755] __filemap_fdatawrite_range+0x19a/0x1f0 +[ 699.781746] ? delete_from_page_cache_batch+0x4e0/0x4e0 +[ 699.782820] ? __vfs_write+0x2b2/0x410 +[ 699.783597] file_write_and_wait_range+0x66/0xb0 +[ 699.784540] f2fs_do_sync_file+0x1f9/0xd90 +[ 699.785381] ? truncate_partial_data_page+0x290/0x290 +[ 699.786415] ? __sb_end_write+0x30/0x50 +[ 699.787204] ? vfs_write+0x20f/0x260 +[ 699.787941] f2fs_sync_file+0x9a/0xb0 +[ 699.788694] ? f2fs_do_sync_file+0xd90/0xd90 +[ 699.789572] vfs_fsync_range+0x68/0x100 +[ 699.790360] ? __fget_light+0xc9/0xe0 +[ 699.791128] do_fsync+0x3d/0x70 +[ 699.791779] __x64_sys_fdatasync+0x24/0x30 +[ 699.792614] do_syscall_64+0x78/0x170 +[ 699.793371] entry_SYSCALL_64_after_hwframe+0x44/0xa9 +[ 699.794406] RIP: 0033:0x7f9bf930d800 +[ 699.795134] Code: 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 83 3d 49 bf 2c 00 00 75 10 b8 4b 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 31 c3 48 83 ec 08 e8 be 78 01 00 48 89 04 24 +[ 699.798960] RSP: 002b:00007ffee3606c68 EFLAGS: 00000246 ORIG_RAX: 000000000000004b +[ 699.800483] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f9bf930d800 +[ 699.801923] RDX: 0000000000008000 RSI: 00000000006010a0 RDI: 0000000000000003 +[ 699.803373] RBP: 00007ffee3606ca0 R08: 0000000001503010 R09: 0000000000000000 +[ 699.804798] R10: 00000000000002e8 R11: 0000000000000246 R12: 0000000000400610 +[ 699.806233] R13: 00007ffee3606da0 R14: 0000000000000000 R15: 0000000000000000 +[ 699.807667] Modules linked in: snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hwdep snd_hda_core snd_pcm snd_timer snd mac_hid i2c_piix4 soundcore ib_iser rdma_cm iw_cm ib_cm ib_core iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx raid1 raid0 multipath linear 8139too crct10dif_pclmul crc32_pclmul qxl drm_kms_helper syscopyarea aesni_intel sysfillrect sysimgblt fb_sys_fops ttm drm aes_x86_64 crypto_simd cryptd 8139cp glue_helper mii pata_acpi floppy +[ 699.817079] ---[ end trace 4ce02f25ff7d3df6 ]--- +[ 699.818068] RIP: 0010:f2fs_submit_page_bio+0x29b/0x730 +[ 699.819114] Code: 54 49 8d bd 18 04 00 00 e8 b2 59 af ff 41 8b 8d 18 04 00 00 8b 45 b8 41 d3 e6 44 01 f0 4c 8d 73 14 41 39 c7 0f 82 37 fe ff ff <0f> 0b 65 8b 05 2c 04 77 47 89 c0 48 0f a3 05 52 c1 d5 01 0f 92 c0 +[ 699.822919] RSP: 0018:ffff8801f43af508 EFLAGS: 00010283 +[ 699.823977] RAX: 0000000000000000 RBX: ffff8801f43af7b8 RCX: ffffffffb88a7cef +[ 699.825436] RDX: 0000000000000007 RSI: dffffc0000000000 RDI: ffff8801e3e7a64c +[ 699.826881] RBP: ffff8801f43af558 R08: ffffed003e066b55 R09: ffffed003e066b55 +[ 699.828292] R10: 0000000000000001 R11: ffffed003e066b54 R12: ffffea0007876940 +[ 699.829750] R13: ffff8801f0335500 R14: ffff8801e3e7a600 R15: 0000000000000001 +[ 699.831192] FS: 00007f9bf97f5700(0000) GS:ffff8801f6e00000(0000) knlGS:0000000000000000 +[ 699.832793] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +[ 699.833981] CR2: 00007f9bf925d170 CR3: 00000001f0c34000 CR4: 00000000000006f0 +[ 699.835556] ================================================================== +[ 699.837029] BUG: KASAN: stack-out-of-bounds in update_stack_state+0x38c/0x3e0 +[ 699.838462] Read of size 8 at addr ffff8801f43af970 by task a.out/1309 + +[ 699.840086] CPU: 0 PID: 1309 Comm: a.out Tainted: G D W 4.18.0-rc1+ #4 +[ 699.841603] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 +[ 699.843475] Call Trace: +[ 699.843982] dump_stack+0x7b/0xb5 +[ 699.844661] print_address_description+0x70/0x290 +[ 699.845607] kasan_report+0x291/0x390 +[ 699.846351] ? update_stack_state+0x38c/0x3e0 +[ 699.853831] __asan_load8+0x54/0x90 +[ 699.854569] update_stack_state+0x38c/0x3e0 +[ 699.855428] ? __read_once_size_nocheck.constprop.7+0x20/0x20 +[ 699.856601] ? __save_stack_trace+0x5e/0x100 +[ 699.857476] unwind_next_frame.part.5+0x18e/0x490 +[ 699.858448] ? unwind_dump+0x290/0x290 +[ 699.859217] ? clear_page_dirty_for_io+0x332/0x450 +[ 699.860185] __unwind_start+0x106/0x190 +[ 699.860974] __save_stack_trace+0x5e/0x100 +[ 699.861808] ? __save_stack_trace+0x5e/0x100 +[ 699.862691] ? unlink_anon_vmas+0xba/0x2c0 +[ 699.863525] save_stack_trace+0x1f/0x30 +[ 699.864312] save_stack+0x46/0xd0 +[ 699.864993] ? __alloc_pages_slowpath+0x1420/0x1420 +[ 699.865990] ? flush_tlb_mm_range+0x15e/0x220 +[ 699.866889] ? kasan_check_write+0x14/0x20 +[ 699.867724] ? __dec_node_state+0x92/0xb0 +[ 699.868543] ? lock_page_memcg+0x85/0xf0 +[ 699.869350] ? unlock_page_memcg+0x16/0x80 +[ 699.870185] ? page_remove_rmap+0x198/0x520 +[ 699.871048] ? mark_page_accessed+0x133/0x200 +[ 699.871930] ? _cond_resched+0x1a/0x50 +[ 699.872700] ? unmap_page_range+0xcd4/0xe50 +[ 699.873551] ? rb_next+0x58/0x80 +[ 699.874217] ? rb_next+0x58/0x80 +[ 699.874895] __kasan_slab_free+0x13c/0x1a0 +[ 699.875734] ? unlink_anon_vmas+0xba/0x2c0 +[ 699.876563] kasan_slab_free+0xe/0x10 +[ 699.877315] kmem_cache_free+0x89/0x1e0 +[ 699.878095] unlink_anon_vmas+0xba/0x2c0 +[ 699.878913] free_pgtables+0x101/0x1b0 +[ 699.879677] exit_mmap+0x146/0x2a0 +[ 699.880378] ? __ia32_sys_munmap+0x50/0x50 +[ 699.881214] ? kasan_check_read+0x11/0x20 +[ 699.882052] ? mm_update_next_owner+0x322/0x380 +[ 699.882985] mmput+0x8b/0x1d0 +[ 699.883602] do_exit+0x43a/0x1390 +[ 699.884288] ? mm_update_next_owner+0x380/0x380 +[ 699.885212] ? f2fs_sync_file+0x9a/0xb0 +[ 699.885995] ? f2fs_do_sync_file+0xd90/0xd90 +[ 699.886877] ? vfs_fsync_range+0x68/0x100 +[ 699.887694] ? __fget_light+0xc9/0xe0 +[ 699.888442] ? do_fsync+0x3d/0x70 +[ 699.889118] ? __x64_sys_fdatasync+0x24/0x30 +[ 699.889996] rewind_stack_do_exit+0x17/0x20 +[ 699.890860] RIP: 0033:0x7f9bf930d800 +[ 699.891585] Code: Bad RIP value. +[ 699.892268] RSP: 002b:00007ffee3606c68 EFLAGS: 00000246 ORIG_RAX: 000000000000004b +[ 699.893781] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f9bf930d800 +[ 699.895220] RDX: 0000000000008000 RSI: 00000000006010a0 RDI: 0000000000000003 +[ 699.896643] RBP: 00007ffee3606ca0 R08: 0000000001503010 R09: 0000000000000000 +[ 699.898069] R10: 00000000000002e8 R11: 0000000000000246 R12: 0000000000400610 +[ 699.899505] R13: 00007ffee3606da0 R14: 0000000000000000 R15: 0000000000000000 + +[ 699.901241] The buggy address belongs to the page: +[ 699.902215] page:ffffea0007d0ebc0 count:0 mapcount:0 mapping:0000000000000000 index:0x0 +[ 699.903811] flags: 0x2ffff0000000000() +[ 699.904585] raw: 02ffff0000000000 0000000000000000 ffffffff07d00101 0000000000000000 +[ 699.906125] raw: 0000000000000000 0000000000240000 00000000ffffffff 0000000000000000 +[ 699.907673] page dumped because: kasan: bad access detected + +[ 699.909108] Memory state around the buggy address: +[ 699.910077] ffff8801f43af800: 00 f1 f1 f1 f1 00 f4 f4 f4 f3 f3 f3 f3 00 00 00 +[ 699.911528] ffff8801f43af880: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +[ 699.912953] >ffff8801f43af900: 00 00 00 00 00 00 00 00 f1 01 f4 f4 f4 f2 f2 f2 +[ 699.914392] ^ +[ 699.915758] ffff8801f43af980: f2 00 f4 f4 00 00 00 00 f2 00 00 00 00 00 00 00 +[ 699.917193] ffff8801f43afa00: 00 00 00 00 00 00 00 00 00 f3 f3 f3 00 00 00 00 +[ 699.918634] ================================================================== + +- Location +https://elixir.bootlin.com/linux/v4.18-rc1/source/fs/f2fs/segment.h#L644 + +Reported-by Wen Xu +Signed-off-by: Chao Yu +Signed-off-by: Jaegeuk Kim +[bwh: Backported to 4.9: + - Error label is different in validate_checkpoint() due to the earlier + backport of "f2fs: fix invalid memory access" + - Adjust context] +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + fs/f2fs/checkpoint.c | 22 +++++++++++++++++++--- + fs/f2fs/data.c | 21 ++++++++++++++++++++- + fs/f2fs/f2fs.h | 3 +++ + fs/f2fs/file.c | 12 ++++++++++++ + fs/f2fs/inode.c | 16 ++++++++++++++++ + fs/f2fs/node.c | 4 ++++ + fs/f2fs/segment.h | 3 +-- + 7 files changed, 75 insertions(+), 6 deletions(-) + +--- a/fs/f2fs/checkpoint.c ++++ b/fs/f2fs/checkpoint.c +@@ -86,8 +86,10 @@ repeat: + fio.page = page; + + if (f2fs_submit_page_bio(&fio)) { +- f2fs_put_page(page, 1); +- goto repeat; ++ memset(page_address(page), 0, PAGE_SIZE); ++ f2fs_stop_checkpoint(sbi, false); ++ f2fs_bug_on(sbi, 1); ++ return page; + } + + lock_page(page); +@@ -141,8 +143,14 @@ bool f2fs_is_valid_blkaddr(struct f2fs_s + case META_POR: + case DATA_GENERIC: + if (unlikely(blkaddr >= MAX_BLKADDR(sbi) || +- blkaddr < MAIN_BLKADDR(sbi))) ++ blkaddr < MAIN_BLKADDR(sbi))) { ++ if (type == DATA_GENERIC) { ++ f2fs_msg(sbi->sb, KERN_WARNING, ++ "access invalid blkaddr:%u", blkaddr); ++ WARN_ON(1); ++ } + return false; ++ } + break; + case META_GENERIC: + if (unlikely(blkaddr < SEG0_BLKADDR(sbi) || +@@ -715,6 +723,14 @@ static struct page *validate_checkpoint( + &cp_page_1, version); + if (err) + return NULL; ++ ++ if (le32_to_cpu(cp_block->cp_pack_total_block_count) > ++ sbi->blocks_per_seg) { ++ f2fs_msg(sbi->sb, KERN_WARNING, ++ "invalid cp_pack_total_block_count:%u", ++ le32_to_cpu(cp_block->cp_pack_total_block_count)); ++ goto invalid_cp; ++ } + pre_version = *version; + + cp_addr += le32_to_cpu(cp_block->cp_pack_total_block_count) - 1; +--- a/fs/f2fs/data.c ++++ b/fs/f2fs/data.c +@@ -240,7 +240,10 @@ int f2fs_submit_page_bio(struct f2fs_io_ + struct page *page = fio->encrypted_page ? + fio->encrypted_page : fio->page; + +- verify_block_addr(fio, fio->new_blkaddr); ++ if (!f2fs_is_valid_blkaddr(fio->sbi, fio->new_blkaddr, ++ __is_meta_io(fio) ? META_GENERIC : DATA_GENERIC)) ++ return -EFAULT; ++ + trace_f2fs_submit_page_bio(page, fio); + f2fs_trace_ios(fio, 0); + +@@ -723,6 +726,12 @@ next_dnode: + next_block: + blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node); + ++ if (__is_valid_data_blkaddr(blkaddr) && ++ !f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC)) { ++ err = -EFAULT; ++ goto sync_out; ++ } ++ + if (!is_valid_data_blkaddr(sbi, blkaddr)) { + if (create) { + if (unlikely(f2fs_cp_error(sbi))) { +@@ -1085,6 +1094,10 @@ got_it: + SetPageUptodate(page); + goto confused; + } ++ ++ if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), block_nr, ++ DATA_GENERIC)) ++ goto set_error_page; + } else { + zero_user_segment(page, 0, PAGE_SIZE); + if (!PageUptodate(page)) +@@ -1213,6 +1226,12 @@ retry_encrypt: + + set_page_writeback(page); + ++ if (__is_valid_data_blkaddr(fio->old_blkaddr) && ++ !f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr, ++ DATA_GENERIC)) { ++ err = -EFAULT; ++ goto out_writepage; ++ } + /* + * If current allocation needs SSR, + * it had better in-place writes for updated data. +--- a/fs/f2fs/f2fs.h ++++ b/fs/f2fs/f2fs.h +@@ -1932,6 +1932,9 @@ static inline void *f2fs_kvzalloc(size_t + (pgofs - ADDRS_PER_INODE(inode) + ADDRS_PER_BLOCK) / \ + ADDRS_PER_BLOCK * ADDRS_PER_BLOCK + ADDRS_PER_INODE(inode)) + ++#define __is_meta_io(fio) (PAGE_TYPE_OF_BIO(fio->type) == META && \ ++ (!is_read_io(fio->op) || fio->is_meta)) ++ + bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi, + block_t blkaddr, int type); + void f2fs_msg(struct super_block *sb, const char *level, const char *fmt, ...); +--- a/fs/f2fs/file.c ++++ b/fs/f2fs/file.c +@@ -378,6 +378,13 @@ static loff_t f2fs_seek_block(struct fil + block_t blkaddr; + blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node); + ++ if (__is_valid_data_blkaddr(blkaddr) && ++ !f2fs_is_valid_blkaddr(F2FS_I_SB(inode), ++ blkaddr, DATA_GENERIC)) { ++ f2fs_put_dnode(&dn); ++ goto fail; ++ } ++ + if (__found_offset(F2FS_I_SB(inode), blkaddr, dirty, + pgofs, whence)) { + f2fs_put_dnode(&dn); +@@ -482,6 +489,11 @@ int truncate_data_blocks_range(struct dn + + dn->data_blkaddr = NULL_ADDR; + set_data_blkaddr(dn); ++ ++ if (__is_valid_data_blkaddr(blkaddr) && ++ !f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC)) ++ continue; ++ + invalidate_blocks(sbi, blkaddr); + if (dn->ofs_in_node == 0 && IS_INODE(dn->node_page)) + clear_inode_flag(dn->inode, FI_FIRST_BLOCK_WRITTEN); +--- a/fs/f2fs/inode.c ++++ b/fs/f2fs/inode.c +@@ -129,6 +129,22 @@ static bool sanity_check_inode(struct in + return false; + } + ++ if (F2FS_I(inode)->extent_tree) { ++ struct extent_info *ei = &F2FS_I(inode)->extent_tree->largest; ++ ++ if (ei->len && ++ (!f2fs_is_valid_blkaddr(sbi, ei->blk, DATA_GENERIC) || ++ !f2fs_is_valid_blkaddr(sbi, ei->blk + ei->len - 1, ++ DATA_GENERIC))) { ++ set_sbi_flag(sbi, SBI_NEED_FSCK); ++ f2fs_msg(sbi->sb, KERN_WARNING, ++ "%s: inode (ino=%lx) extent info [%u, %u, %u] " ++ "is incorrect, run fsck to fix", ++ __func__, inode->i_ino, ++ ei->blk, ei->fofs, ei->len); ++ return false; ++ } ++ } + return true; + } + +--- a/fs/f2fs/node.c ++++ b/fs/f2fs/node.c +@@ -1605,6 +1605,10 @@ static int f2fs_write_node_page(struct p + return 0; + } + ++ if (__is_valid_data_blkaddr(ni.blk_addr) && ++ !f2fs_is_valid_blkaddr(sbi, ni.blk_addr, DATA_GENERIC)) ++ goto redirty_out; ++ + set_page_writeback(page); + fio.old_blkaddr = ni.blk_addr; + write_node_page(nid, &fio); +--- a/fs/f2fs/segment.h ++++ b/fs/f2fs/segment.h +@@ -601,8 +601,7 @@ static inline void verify_block_addr(str + { + struct f2fs_sb_info *sbi = fio->sbi; + +- if (PAGE_TYPE_OF_BIO(fio->type) == META && +- (!is_read_io(fio->op) || fio->is_meta)) ++ if (__is_meta_io(fio)) + verify_blkaddr(sbi, blk_addr, META_GENERIC); + else + verify_blkaddr(sbi, blk_addr, DATA_GENERIC); diff --git a/queue-4.9/f2fs-fix-to-do-sanity-check-with-cp_pack_start_sum.patch b/queue-4.9/f2fs-fix-to-do-sanity-check-with-cp_pack_start_sum.patch new file mode 100644 index 00000000000..f6b339a0eec --- /dev/null +++ b/queue-4.9/f2fs-fix-to-do-sanity-check-with-cp_pack_start_sum.patch @@ -0,0 +1,349 @@ +From foo@baz Thu Dec 6 15:27:31 CET 2018 +From: Ben Hutchings +Date: Thu, 6 Dec 2018 13:47:03 +0000 +Subject: f2fs: fix to do sanity check with cp_pack_start_sum + +From: Ben Hutchings + +commit e494c2f995d6181d6e29c4927d68e0f295ecf75b upstream. + +After fuzzing, cp_pack_start_sum could be corrupted, so current log's +summary info should be wrong due to loading incorrect summary block. +Then, if segment's type in current log is exceeded NR_CURSEG_TYPE, it +can lead accessing invalid dirty_i->dirty_segmap bitmap finally. + +Add sanity check for cp_pack_start_sum to fix this issue. + +https://bugzilla.kernel.org/show_bug.cgi?id=200419 + +- Reproduce + +- Kernel message (f2fs-dev w/ KASAN) +[ 3117.578432] F2FS-fs (loop0): Invalid log blocks per segment (8) + +[ 3117.578445] F2FS-fs (loop0): Can't find valid F2FS filesystem in 2th superblock +[ 3117.581364] F2FS-fs (loop0): invalid crc_offset: 30716 +[ 3117.583564] WARNING: CPU: 1 PID: 1225 at fs/f2fs/checkpoint.c:90 __get_meta_page+0x448/0x4b0 +[ 3117.583570] Modules linked in: snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hda_core snd_hwdep snd_pcm snd_timer joydev input_leds serio_raw snd soundcore mac_hid i2c_piix4 ib_iser rdma_cm iw_cm ib_cm ib_core configfs iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi btrfs zstd_decompress zstd_compress xxhash raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx xor raid6_pq libcrc32c raid1 raid0 multipath linear 8139too qxl ttm drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops drm crct10dif_pclmul crc32_pclmul ghash_clmulni_intel pcbc aesni_intel psmouse aes_x86_64 8139cp crypto_simd cryptd mii glue_helper pata_acpi floppy +[ 3117.584014] CPU: 1 PID: 1225 Comm: mount Not tainted 4.17.0+ #1 +[ 3117.584017] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 +[ 3117.584022] RIP: 0010:__get_meta_page+0x448/0x4b0 +[ 3117.584023] Code: 00 49 8d bc 24 84 00 00 00 e8 74 54 da ff 41 83 8c 24 84 00 00 00 08 4c 89 f6 4c 89 ef e8 c0 d9 95 00 48 89 ef e8 18 e3 00 00 <0f> 0b f0 80 4d 48 04 e9 0f fe ff ff 0f 0b 48 89 c7 48 89 04 24 e8 +[ 3117.584072] RSP: 0018:ffff88018eb678c0 EFLAGS: 00010286 +[ 3117.584082] RAX: ffff88018f0a6a78 RBX: ffffea0007a46600 RCX: ffffffff9314d1b2 +[ 3117.584085] RDX: ffffffff00000001 RSI: 0000000000000000 RDI: ffff88018f0a6a98 +[ 3117.584087] RBP: ffff88018ebe9980 R08: 0000000000000002 R09: 0000000000000001 +[ 3117.584090] R10: 0000000000000001 R11: ffffed00326e4450 R12: ffff880193722200 +[ 3117.584092] R13: ffff88018ebe9afc R14: 0000000000000206 R15: ffff88018eb67900 +[ 3117.584096] FS: 00007f5694636840(0000) GS:ffff8801f3b00000(0000) knlGS:0000000000000000 +[ 3117.584098] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +[ 3117.584101] CR2: 00000000016f21b8 CR3: 0000000191c22000 CR4: 00000000000006e0 +[ 3117.584112] Call Trace: +[ 3117.584121] ? f2fs_set_meta_page_dirty+0x150/0x150 +[ 3117.584127] ? f2fs_build_segment_manager+0xbf9/0x3190 +[ 3117.584133] ? f2fs_npages_for_summary_flush+0x75/0x120 +[ 3117.584145] f2fs_build_segment_manager+0xda8/0x3190 +[ 3117.584151] ? f2fs_get_valid_checkpoint+0x298/0xa00 +[ 3117.584156] ? f2fs_flush_sit_entries+0x10e0/0x10e0 +[ 3117.584184] ? map_id_range_down+0x17c/0x1b0 +[ 3117.584188] ? __put_user_ns+0x30/0x30 +[ 3117.584206] ? find_next_bit+0x53/0x90 +[ 3117.584237] ? cpumask_next+0x16/0x20 +[ 3117.584249] f2fs_fill_super+0x1948/0x2b40 +[ 3117.584258] ? f2fs_commit_super+0x1a0/0x1a0 +[ 3117.584279] ? sget_userns+0x65e/0x690 +[ 3117.584296] ? set_blocksize+0x88/0x130 +[ 3117.584302] ? f2fs_commit_super+0x1a0/0x1a0 +[ 3117.584305] mount_bdev+0x1c0/0x200 +[ 3117.584310] mount_fs+0x5c/0x190 +[ 3117.584320] vfs_kern_mount+0x64/0x190 +[ 3117.584330] do_mount+0x2e4/0x1450 +[ 3117.584343] ? lockref_put_return+0x130/0x130 +[ 3117.584347] ? copy_mount_string+0x20/0x20 +[ 3117.584357] ? kasan_unpoison_shadow+0x31/0x40 +[ 3117.584362] ? kasan_kmalloc+0xa6/0xd0 +[ 3117.584373] ? memcg_kmem_put_cache+0x16/0x90 +[ 3117.584377] ? __kmalloc_track_caller+0x196/0x210 +[ 3117.584383] ? _copy_from_user+0x61/0x90 +[ 3117.584396] ? memdup_user+0x3e/0x60 +[ 3117.584401] ksys_mount+0x7e/0xd0 +[ 3117.584405] __x64_sys_mount+0x62/0x70 +[ 3117.584427] do_syscall_64+0x73/0x160 +[ 3117.584440] entry_SYSCALL_64_after_hwframe+0x44/0xa9 +[ 3117.584455] RIP: 0033:0x7f5693f14b9a +[ 3117.584456] Code: 48 8b 0d 01 c3 2b 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 49 89 ca b8 a5 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d ce c2 2b 00 f7 d8 64 89 01 48 +[ 3117.584505] RSP: 002b:00007fff27346488 EFLAGS: 00000206 ORIG_RAX: 00000000000000a5 +[ 3117.584510] RAX: ffffffffffffffda RBX: 00000000016e2030 RCX: 00007f5693f14b9a +[ 3117.584512] RDX: 00000000016e2210 RSI: 00000000016e3f30 RDI: 00000000016ee040 +[ 3117.584514] RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000013 +[ 3117.584516] R10: 00000000c0ed0000 R11: 0000000000000206 R12: 00000000016ee040 +[ 3117.584519] R13: 00000000016e2210 R14: 0000000000000000 R15: 0000000000000003 +[ 3117.584523] ---[ end trace a8e0d899985faf31 ]--- +[ 3117.685663] F2FS-fs (loop0): f2fs_check_nid_range: out-of-range nid=2, run fsck to fix. +[ 3117.685673] F2FS-fs (loop0): recover_data: ino = 2 (i_size: recover) recovered = 1, err = 0 +[ 3117.685707] ================================================================== +[ 3117.685955] BUG: KASAN: slab-out-of-bounds in __remove_dirty_segment+0xdd/0x1e0 +[ 3117.686175] Read of size 8 at addr ffff88018f0a63d0 by task mount/1225 + +[ 3117.686477] CPU: 0 PID: 1225 Comm: mount Tainted: G W 4.17.0+ #1 +[ 3117.686481] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 +[ 3117.686483] Call Trace: +[ 3117.686494] dump_stack+0x71/0xab +[ 3117.686512] print_address_description+0x6b/0x290 +[ 3117.686517] kasan_report+0x28e/0x390 +[ 3117.686522] ? __remove_dirty_segment+0xdd/0x1e0 +[ 3117.686527] __remove_dirty_segment+0xdd/0x1e0 +[ 3117.686532] locate_dirty_segment+0x189/0x190 +[ 3117.686538] f2fs_allocate_new_segments+0xa9/0xe0 +[ 3117.686543] recover_data+0x703/0x2c20 +[ 3117.686547] ? f2fs_recover_fsync_data+0x48f/0xd50 +[ 3117.686553] ? ksys_mount+0x7e/0xd0 +[ 3117.686564] ? policy_nodemask+0x1a/0x90 +[ 3117.686567] ? policy_node+0x56/0x70 +[ 3117.686571] ? add_fsync_inode+0xf0/0xf0 +[ 3117.686592] ? blk_finish_plug+0x44/0x60 +[ 3117.686597] ? f2fs_ra_meta_pages+0x38b/0x5e0 +[ 3117.686602] ? find_inode_fast+0xac/0xc0 +[ 3117.686606] ? f2fs_is_valid_blkaddr+0x320/0x320 +[ 3117.686618] ? __radix_tree_lookup+0x150/0x150 +[ 3117.686633] ? dqget+0x670/0x670 +[ 3117.686648] ? pagecache_get_page+0x29/0x410 +[ 3117.686656] ? kmem_cache_alloc+0x176/0x1e0 +[ 3117.686660] ? f2fs_is_valid_blkaddr+0x11d/0x320 +[ 3117.686664] f2fs_recover_fsync_data+0xc23/0xd50 +[ 3117.686670] ? f2fs_space_for_roll_forward+0x60/0x60 +[ 3117.686674] ? rb_insert_color+0x323/0x3d0 +[ 3117.686678] ? f2fs_recover_orphan_inodes+0xa5/0x700 +[ 3117.686683] ? proc_register+0x153/0x1d0 +[ 3117.686686] ? f2fs_remove_orphan_inode+0x10/0x10 +[ 3117.686695] ? f2fs_attr_store+0x50/0x50 +[ 3117.686700] ? proc_create_single_data+0x52/0x60 +[ 3117.686707] f2fs_fill_super+0x1d06/0x2b40 +[ 3117.686728] ? f2fs_commit_super+0x1a0/0x1a0 +[ 3117.686735] ? sget_userns+0x65e/0x690 +[ 3117.686740] ? set_blocksize+0x88/0x130 +[ 3117.686745] ? f2fs_commit_super+0x1a0/0x1a0 +[ 3117.686748] mount_bdev+0x1c0/0x200 +[ 3117.686753] mount_fs+0x5c/0x190 +[ 3117.686758] vfs_kern_mount+0x64/0x190 +[ 3117.686762] do_mount+0x2e4/0x1450 +[ 3117.686769] ? lockref_put_return+0x130/0x130 +[ 3117.686773] ? copy_mount_string+0x20/0x20 +[ 3117.686777] ? kasan_unpoison_shadow+0x31/0x40 +[ 3117.686780] ? kasan_kmalloc+0xa6/0xd0 +[ 3117.686786] ? memcg_kmem_put_cache+0x16/0x90 +[ 3117.686790] ? __kmalloc_track_caller+0x196/0x210 +[ 3117.686795] ? _copy_from_user+0x61/0x90 +[ 3117.686801] ? memdup_user+0x3e/0x60 +[ 3117.686804] ksys_mount+0x7e/0xd0 +[ 3117.686809] __x64_sys_mount+0x62/0x70 +[ 3117.686816] do_syscall_64+0x73/0x160 +[ 3117.686824] entry_SYSCALL_64_after_hwframe+0x44/0xa9 +[ 3117.686829] RIP: 0033:0x7f5693f14b9a +[ 3117.686830] Code: 48 8b 0d 01 c3 2b 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 49 89 ca b8 a5 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d ce c2 2b 00 f7 d8 64 89 01 48 +[ 3117.686887] RSP: 002b:00007fff27346488 EFLAGS: 00000206 ORIG_RAX: 00000000000000a5 +[ 3117.686892] RAX: ffffffffffffffda RBX: 00000000016e2030 RCX: 00007f5693f14b9a +[ 3117.686894] RDX: 00000000016e2210 RSI: 00000000016e3f30 RDI: 00000000016ee040 +[ 3117.686896] RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000013 +[ 3117.686899] R10: 00000000c0ed0000 R11: 0000000000000206 R12: 00000000016ee040 +[ 3117.686901] R13: 00000000016e2210 R14: 0000000000000000 R15: 0000000000000003 + +[ 3117.687005] Allocated by task 1225: +[ 3117.687152] kasan_kmalloc+0xa6/0xd0 +[ 3117.687157] kmem_cache_alloc_trace+0xfd/0x200 +[ 3117.687161] f2fs_build_segment_manager+0x2d09/0x3190 +[ 3117.687165] f2fs_fill_super+0x1948/0x2b40 +[ 3117.687168] mount_bdev+0x1c0/0x200 +[ 3117.687171] mount_fs+0x5c/0x190 +[ 3117.687174] vfs_kern_mount+0x64/0x190 +[ 3117.687177] do_mount+0x2e4/0x1450 +[ 3117.687180] ksys_mount+0x7e/0xd0 +[ 3117.687182] __x64_sys_mount+0x62/0x70 +[ 3117.687186] do_syscall_64+0x73/0x160 +[ 3117.687190] entry_SYSCALL_64_after_hwframe+0x44/0xa9 + +[ 3117.687285] Freed by task 19: +[ 3117.687412] __kasan_slab_free+0x137/0x190 +[ 3117.687416] kfree+0x8b/0x1b0 +[ 3117.687460] ttm_bo_man_put_node+0x61/0x80 [ttm] +[ 3117.687476] ttm_bo_cleanup_refs+0x15f/0x250 [ttm] +[ 3117.687492] ttm_bo_delayed_delete+0x2f0/0x300 [ttm] +[ 3117.687507] ttm_bo_delayed_workqueue+0x17/0x50 [ttm] +[ 3117.687528] process_one_work+0x2f9/0x740 +[ 3117.687531] worker_thread+0x78/0x6b0 +[ 3117.687541] kthread+0x177/0x1c0 +[ 3117.687545] ret_from_fork+0x35/0x40 + +[ 3117.687638] The buggy address belongs to the object at ffff88018f0a6300 + which belongs to the cache kmalloc-192 of size 192 +[ 3117.688014] The buggy address is located 16 bytes to the right of + 192-byte region [ffff88018f0a6300, ffff88018f0a63c0) +[ 3117.688382] The buggy address belongs to the page: +[ 3117.688554] page:ffffea00063c2980 count:1 mapcount:0 mapping:ffff8801f3403180 index:0x0 +[ 3117.688788] flags: 0x17fff8000000100(slab) +[ 3117.688944] raw: 017fff8000000100 ffffea00063c2840 0000000e0000000e ffff8801f3403180 +[ 3117.689166] raw: 0000000000000000 0000000080100010 00000001ffffffff 0000000000000000 +[ 3117.689386] page dumped because: kasan: bad access detected + +[ 3117.689653] Memory state around the buggy address: +[ 3117.689816] ffff88018f0a6280: fb fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc +[ 3117.690027] ffff88018f0a6300: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +[ 3117.690239] >ffff88018f0a6380: 00 00 fc fc fc fc fc fc fc fc fc fc fc fc fc fc +[ 3117.690448] ^ +[ 3117.690644] ffff88018f0a6400: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +[ 3117.690868] ffff88018f0a6480: 00 00 fc fc fc fc fc fc fc fc fc fc fc fc fc fc +[ 3117.691077] ================================================================== +[ 3117.691290] Disabling lock debugging due to kernel taint +[ 3117.693893] BUG: unable to handle kernel NULL pointer dereference at 0000000000000000 +[ 3117.694120] PGD 80000001f01bc067 P4D 80000001f01bc067 PUD 1d9638067 PMD 0 +[ 3117.694338] Oops: 0002 [#1] SMP KASAN PTI +[ 3117.694490] CPU: 1 PID: 1225 Comm: mount Tainted: G B W 4.17.0+ #1 +[ 3117.694703] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 +[ 3117.695073] RIP: 0010:__remove_dirty_segment+0xe2/0x1e0 +[ 3117.695246] Code: c4 48 89 c7 e8 cf bb d7 ff 45 0f b6 24 24 41 83 e4 3f 44 88 64 24 07 41 83 e4 3f 4a 8d 7c e3 08 e8 b3 bc d7 ff 4a 8b 4c e3 08 4c 0f b3 29 0f 82 94 00 00 00 48 8d bd 20 04 00 00 e8 97 bb d7 +[ 3117.695793] RSP: 0018:ffff88018eb67638 EFLAGS: 00010292 +[ 3117.695969] RAX: 0000000000000000 RBX: ffff88018f0a6300 RCX: 0000000000000000 +[ 3117.696182] RDX: 0000000000000000 RSI: 0000000000000297 RDI: 0000000000000297 +[ 3117.696391] RBP: ffff88018ebe9980 R08: ffffed003e743ebb R09: ffffed003e743ebb +[ 3117.696604] R10: 0000000000000001 R11: ffffed003e743eba R12: 0000000000000019 +[ 3117.696813] R13: 0000000000000014 R14: 0000000000000320 R15: ffff88018ebe99e0 +[ 3117.697032] FS: 00007f5694636840(0000) GS:ffff8801f3b00000(0000) knlGS:0000000000000000 +[ 3117.697280] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +[ 3117.702357] CR2: 00007fe89bb1a000 CR3: 0000000191c22000 CR4: 00000000000006e0 +[ 3117.707235] Call Trace: +[ 3117.712077] locate_dirty_segment+0x189/0x190 +[ 3117.716891] f2fs_allocate_new_segments+0xa9/0xe0 +[ 3117.721617] recover_data+0x703/0x2c20 +[ 3117.726316] ? f2fs_recover_fsync_data+0x48f/0xd50 +[ 3117.730957] ? ksys_mount+0x7e/0xd0 +[ 3117.735573] ? policy_nodemask+0x1a/0x90 +[ 3117.740198] ? policy_node+0x56/0x70 +[ 3117.744829] ? add_fsync_inode+0xf0/0xf0 +[ 3117.749487] ? blk_finish_plug+0x44/0x60 +[ 3117.754152] ? f2fs_ra_meta_pages+0x38b/0x5e0 +[ 3117.758831] ? find_inode_fast+0xac/0xc0 +[ 3117.763448] ? f2fs_is_valid_blkaddr+0x320/0x320 +[ 3117.768046] ? __radix_tree_lookup+0x150/0x150 +[ 3117.772603] ? dqget+0x670/0x670 +[ 3117.777159] ? pagecache_get_page+0x29/0x410 +[ 3117.781648] ? kmem_cache_alloc+0x176/0x1e0 +[ 3117.786067] ? f2fs_is_valid_blkaddr+0x11d/0x320 +[ 3117.790476] f2fs_recover_fsync_data+0xc23/0xd50 +[ 3117.794790] ? f2fs_space_for_roll_forward+0x60/0x60 +[ 3117.799086] ? rb_insert_color+0x323/0x3d0 +[ 3117.803304] ? f2fs_recover_orphan_inodes+0xa5/0x700 +[ 3117.807563] ? proc_register+0x153/0x1d0 +[ 3117.811766] ? f2fs_remove_orphan_inode+0x10/0x10 +[ 3117.815947] ? f2fs_attr_store+0x50/0x50 +[ 3117.820087] ? proc_create_single_data+0x52/0x60 +[ 3117.824262] f2fs_fill_super+0x1d06/0x2b40 +[ 3117.828367] ? f2fs_commit_super+0x1a0/0x1a0 +[ 3117.832432] ? sget_userns+0x65e/0x690 +[ 3117.836500] ? set_blocksize+0x88/0x130 +[ 3117.840501] ? f2fs_commit_super+0x1a0/0x1a0 +[ 3117.844420] mount_bdev+0x1c0/0x200 +[ 3117.848275] mount_fs+0x5c/0x190 +[ 3117.852053] vfs_kern_mount+0x64/0x190 +[ 3117.855810] do_mount+0x2e4/0x1450 +[ 3117.859441] ? lockref_put_return+0x130/0x130 +[ 3117.862996] ? copy_mount_string+0x20/0x20 +[ 3117.866417] ? kasan_unpoison_shadow+0x31/0x40 +[ 3117.869719] ? kasan_kmalloc+0xa6/0xd0 +[ 3117.872948] ? memcg_kmem_put_cache+0x16/0x90 +[ 3117.876121] ? __kmalloc_track_caller+0x196/0x210 +[ 3117.879333] ? _copy_from_user+0x61/0x90 +[ 3117.882467] ? memdup_user+0x3e/0x60 +[ 3117.885604] ksys_mount+0x7e/0xd0 +[ 3117.888700] __x64_sys_mount+0x62/0x70 +[ 3117.891742] do_syscall_64+0x73/0x160 +[ 3117.894692] entry_SYSCALL_64_after_hwframe+0x44/0xa9 +[ 3117.897669] RIP: 0033:0x7f5693f14b9a +[ 3117.900563] Code: 48 8b 0d 01 c3 2b 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 49 89 ca b8 a5 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d ce c2 2b 00 f7 d8 64 89 01 48 +[ 3117.906922] RSP: 002b:00007fff27346488 EFLAGS: 00000206 ORIG_RAX: 00000000000000a5 +[ 3117.910159] RAX: ffffffffffffffda RBX: 00000000016e2030 RCX: 00007f5693f14b9a +[ 3117.913469] RDX: 00000000016e2210 RSI: 00000000016e3f30 RDI: 00000000016ee040 +[ 3117.916764] RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000013 +[ 3117.920071] R10: 00000000c0ed0000 R11: 0000000000000206 R12: 00000000016ee040 +[ 3117.923393] R13: 00000000016e2210 R14: 0000000000000000 R15: 0000000000000003 +[ 3117.926680] Modules linked in: snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hda_core snd_hwdep snd_pcm snd_timer joydev input_leds serio_raw snd soundcore mac_hid i2c_piix4 ib_iser rdma_cm iw_cm ib_cm ib_core configfs iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi btrfs zstd_decompress zstd_compress xxhash raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx xor raid6_pq libcrc32c raid1 raid0 multipath linear 8139too qxl ttm drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops drm crct10dif_pclmul crc32_pclmul ghash_clmulni_intel pcbc aesni_intel psmouse aes_x86_64 8139cp crypto_simd cryptd mii glue_helper pata_acpi floppy +[ 3117.949979] CR2: 0000000000000000 +[ 3117.954283] ---[ end trace a8e0d899985faf32 ]--- +[ 3117.958575] RIP: 0010:__remove_dirty_segment+0xe2/0x1e0 +[ 3117.962810] Code: c4 48 89 c7 e8 cf bb d7 ff 45 0f b6 24 24 41 83 e4 3f 44 88 64 24 07 41 83 e4 3f 4a 8d 7c e3 08 e8 b3 bc d7 ff 4a 8b 4c e3 08 4c 0f b3 29 0f 82 94 00 00 00 48 8d bd 20 04 00 00 e8 97 bb d7 +[ 3117.971789] RSP: 0018:ffff88018eb67638 EFLAGS: 00010292 +[ 3117.976333] RAX: 0000000000000000 RBX: ffff88018f0a6300 RCX: 0000000000000000 +[ 3117.980926] RDX: 0000000000000000 RSI: 0000000000000297 RDI: 0000000000000297 +[ 3117.985497] RBP: ffff88018ebe9980 R08: ffffed003e743ebb R09: ffffed003e743ebb +[ 3117.990098] R10: 0000000000000001 R11: ffffed003e743eba R12: 0000000000000019 +[ 3117.994761] R13: 0000000000000014 R14: 0000000000000320 R15: ffff88018ebe99e0 +[ 3117.999392] FS: 00007f5694636840(0000) GS:ffff8801f3b00000(0000) knlGS:0000000000000000 +[ 3118.004096] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +[ 3118.008816] CR2: 00007fe89bb1a000 CR3: 0000000191c22000 CR4: 00000000000006e0 + +- Location +https://elixir.bootlin.com/linux/v4.18-rc3/source/fs/f2fs/segment.c#L775 + if (test_and_clear_bit(segno, dirty_i->dirty_segmap[t])) + dirty_i->nr_dirty[t]--; +Here dirty_i->dirty_segmap[t] can be NULL which leads to crash in test_and_clear_bit() + +Reported-by Wen Xu +Signed-off-by: Chao Yu +Signed-off-by: Jaegeuk Kim +[bwh: Backported to 4.9: The function is called sanity_check_ckpt()] +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + fs/f2fs/checkpoint.c | 8 ++++---- + fs/f2fs/super.c | 12 ++++++++++++ + 2 files changed, 16 insertions(+), 4 deletions(-) + +--- a/fs/f2fs/checkpoint.c ++++ b/fs/f2fs/checkpoint.c +@@ -794,15 +794,15 @@ int get_valid_checkpoint(struct f2fs_sb_ + cp_block = (struct f2fs_checkpoint *)page_address(cur_page); + memcpy(sbi->ckpt, cp_block, blk_size); + +- /* Sanity checking of checkpoint */ +- if (sanity_check_ckpt(sbi)) +- goto free_fail_no_cp; +- + if (cur_page == cp1) + sbi->cur_cp_pack = 1; + else + sbi->cur_cp_pack = 2; + ++ /* Sanity checking of checkpoint */ ++ if (sanity_check_ckpt(sbi)) ++ goto free_fail_no_cp; ++ + if (cp_blks <= 1) + goto done; + +--- a/fs/f2fs/super.c ++++ b/fs/f2fs/super.c +@@ -1487,6 +1487,7 @@ int sanity_check_ckpt(struct f2fs_sb_inf + unsigned int sit_bitmap_size, nat_bitmap_size; + unsigned int log_blocks_per_seg; + unsigned int segment_count_main; ++ unsigned int cp_pack_start_sum, cp_payload; + block_t user_block_count; + int i; + +@@ -1547,6 +1548,17 @@ int sanity_check_ckpt(struct f2fs_sb_inf + return 1; + } + ++ cp_pack_start_sum = __start_sum_addr(sbi); ++ cp_payload = __cp_payload(sbi); ++ if (cp_pack_start_sum < cp_payload + 1 || ++ cp_pack_start_sum > blocks_per_seg - 1 - ++ NR_CURSEG_TYPE) { ++ f2fs_msg(sbi->sb, KERN_ERR, ++ "Wrong cp_pack_start_sum: %u", ++ cp_pack_start_sum); ++ return 1; ++ } ++ + if (unlikely(f2fs_cp_error(sbi))) { + f2fs_msg(sbi->sb, KERN_ERR, "A bug case: need to run fsck"); + return 1; diff --git a/queue-4.9/f2fs-fix-to-do-sanity-check-with-node-footer-and-iblocks.patch b/queue-4.9/f2fs-fix-to-do-sanity-check-with-node-footer-and-iblocks.patch new file mode 100644 index 00000000000..d1851219bfe --- /dev/null +++ b/queue-4.9/f2fs-fix-to-do-sanity-check-with-node-footer-and-iblocks.patch @@ -0,0 +1,240 @@ +From foo@baz Thu Dec 6 15:27:31 CET 2018 +From: Chao Yu +Date: Fri, 29 Jun 2018 13:55:22 +0800 +Subject: f2fs: fix to do sanity check with node footer and iblocks + +From: Chao Yu + +commit e34438c903b653daca2b2a7de95aed46226f8ed3 upstream. + +This patch adds to do sanity check with below fields of inode to +avoid reported panic. +- node footer +- iblocks + +https://bugzilla.kernel.org/show_bug.cgi?id=200223 + +- Overview +BUG() triggered in f2fs_truncate_inode_blocks() when un-mounting a mounted f2fs image after writing to it + +- Reproduce + +- POC (poc.c) + +static void activity(char *mpoint) { + + char *foo_bar_baz; + int err; + + static int buf[8192]; + memset(buf, 0, sizeof(buf)); + + err = asprintf(&foo_bar_baz, "%s/foo/bar/baz", mpoint); + + // open / write / read + int fd = open(foo_bar_baz, O_RDWR | O_TRUNC, 0777); + if (fd >= 0) { + write(fd, (char *)buf, 517); + write(fd, (char *)buf, sizeof(buf)); + close(fd); + } + +} + +int main(int argc, char *argv[]) { + activity(argv[1]); + return 0; +} + +- Kernel meesage +[ 552.479723] F2FS-fs (loop0): Mounted with checkpoint version = 2 +[ 556.451891] ------------[ cut here ]------------ +[ 556.451899] kernel BUG at fs/f2fs/node.c:987! +[ 556.452920] invalid opcode: 0000 [#1] SMP KASAN PTI +[ 556.453936] CPU: 1 PID: 1310 Comm: umount Not tainted 4.18.0-rc1+ #4 +[ 556.455213] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 +[ 556.457140] RIP: 0010:f2fs_truncate_inode_blocks+0x4a7/0x6f0 +[ 556.458280] Code: e8 ae ea ff ff 41 89 c7 c1 e8 1f 84 c0 74 0a 41 83 ff fe 0f 85 35 ff ff ff 81 85 b0 fe ff ff fb 03 00 00 e9 f7 fd ff ff 0f 0b <0f> 0b e8 62 b7 9a 00 48 8b bd a0 fe ff ff e8 56 54 ae ff 48 8b b5 +[ 556.462015] RSP: 0018:ffff8801f292f808 EFLAGS: 00010286 +[ 556.463068] RAX: ffffed003e73242d RBX: ffff8801f292f958 RCX: ffffffffb88b81bc +[ 556.464479] RDX: 0000000000000000 RSI: 0000000000000004 RDI: ffff8801f3992164 +[ 556.465901] RBP: ffff8801f292f980 R08: ffffed003e73242d R09: ffffed003e73242d +[ 556.467311] R10: 0000000000000001 R11: ffffed003e73242c R12: 00000000fffffc64 +[ 556.468706] R13: ffff8801f3992000 R14: 0000000000000058 R15: 00000000ffff8801 +[ 556.470117] FS: 00007f8029297840(0000) GS:ffff8801f6f00000(0000) knlGS:0000000000000000 +[ 556.471702] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +[ 556.472838] CR2: 000055f5f57305d8 CR3: 00000001f18b0000 CR4: 00000000000006e0 +[ 556.474265] Call Trace: +[ 556.474782] ? f2fs_alloc_nid_failed+0xf0/0xf0 +[ 556.475686] ? truncate_nodes+0x980/0x980 +[ 556.476516] ? pagecache_get_page+0x21f/0x2f0 +[ 556.477412] ? __asan_loadN+0xf/0x20 +[ 556.478153] ? __get_node_page+0x331/0x5b0 +[ 556.478992] ? reweight_entity+0x1e6/0x3b0 +[ 556.479826] f2fs_truncate_blocks+0x55e/0x740 +[ 556.480709] ? f2fs_truncate_data_blocks+0x20/0x20 +[ 556.481689] ? __radix_tree_lookup+0x34/0x160 +[ 556.482630] ? radix_tree_lookup+0xd/0x10 +[ 556.483445] f2fs_truncate+0xd4/0x1a0 +[ 556.484206] f2fs_evict_inode+0x5ce/0x630 +[ 556.485032] evict+0x16f/0x290 +[ 556.485664] iput+0x280/0x300 +[ 556.486300] dentry_unlink_inode+0x165/0x1e0 +[ 556.487169] __dentry_kill+0x16a/0x260 +[ 556.487936] dentry_kill+0x70/0x250 +[ 556.488651] shrink_dentry_list+0x125/0x260 +[ 556.489504] shrink_dcache_parent+0xc1/0x110 +[ 556.490379] ? shrink_dcache_sb+0x200/0x200 +[ 556.491231] ? bit_wait_timeout+0xc0/0xc0 +[ 556.492047] do_one_tree+0x12/0x40 +[ 556.492743] shrink_dcache_for_umount+0x3f/0xa0 +[ 556.493656] generic_shutdown_super+0x43/0x1c0 +[ 556.494561] kill_block_super+0x52/0x80 +[ 556.495341] kill_f2fs_super+0x62/0x70 +[ 556.496105] deactivate_locked_super+0x6f/0xa0 +[ 556.497004] deactivate_super+0x5e/0x80 +[ 556.497785] cleanup_mnt+0x61/0xa0 +[ 556.498492] __cleanup_mnt+0x12/0x20 +[ 556.499218] task_work_run+0xc8/0xf0 +[ 556.499949] exit_to_usermode_loop+0x125/0x130 +[ 556.500846] do_syscall_64+0x138/0x170 +[ 556.501609] entry_SYSCALL_64_after_hwframe+0x44/0xa9 +[ 556.502659] RIP: 0033:0x7f8028b77487 +[ 556.503384] Code: 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 31 f6 e9 09 00 00 00 66 0f 1f 84 00 00 00 00 00 b8 a6 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d e1 c9 2b 00 f7 d8 64 89 01 48 +[ 556.507137] RSP: 002b:00007fff9f2e3598 EFLAGS: 00000246 ORIG_RAX: 00000000000000a6 +[ 556.508637] RAX: 0000000000000000 RBX: 0000000000ebd030 RCX: 00007f8028b77487 +[ 556.510069] RDX: 0000000000000001 RSI: 0000000000000000 RDI: 0000000000ec41e0 +[ 556.511481] RBP: 0000000000ec41e0 R08: 0000000000000000 R09: 0000000000000014 +[ 556.512892] R10: 00000000000006b2 R11: 0000000000000246 R12: 00007f802908083c +[ 556.514320] R13: 0000000000000000 R14: 0000000000ebd210 R15: 00007fff9f2e3820 +[ 556.515745] Modules linked in: snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hwdep snd_hda_core snd_pcm snd_timer snd mac_hid i2c_piix4 soundcore ib_iser rdma_cm iw_cm ib_cm ib_core iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx raid1 raid0 multipath linear 8139too crct10dif_pclmul crc32_pclmul qxl drm_kms_helper syscopyarea aesni_intel sysfillrect sysimgblt fb_sys_fops ttm drm aes_x86_64 crypto_simd cryptd 8139cp glue_helper mii pata_acpi floppy +[ 556.529276] ---[ end trace 4ce02f25ff7d3df5 ]--- +[ 556.530340] RIP: 0010:f2fs_truncate_inode_blocks+0x4a7/0x6f0 +[ 556.531513] Code: e8 ae ea ff ff 41 89 c7 c1 e8 1f 84 c0 74 0a 41 83 ff fe 0f 85 35 ff ff ff 81 85 b0 fe ff ff fb 03 00 00 e9 f7 fd ff ff 0f 0b <0f> 0b e8 62 b7 9a 00 48 8b bd a0 fe ff ff e8 56 54 ae ff 48 8b b5 +[ 556.535330] RSP: 0018:ffff8801f292f808 EFLAGS: 00010286 +[ 556.536395] RAX: ffffed003e73242d RBX: ffff8801f292f958 RCX: ffffffffb88b81bc +[ 556.537824] RDX: 0000000000000000 RSI: 0000000000000004 RDI: ffff8801f3992164 +[ 556.539290] RBP: ffff8801f292f980 R08: ffffed003e73242d R09: ffffed003e73242d +[ 556.540709] R10: 0000000000000001 R11: ffffed003e73242c R12: 00000000fffffc64 +[ 556.542131] R13: ffff8801f3992000 R14: 0000000000000058 R15: 00000000ffff8801 +[ 556.543579] FS: 00007f8029297840(0000) GS:ffff8801f6f00000(0000) knlGS:0000000000000000 +[ 556.545180] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +[ 556.546338] CR2: 000055f5f57305d8 CR3: 00000001f18b0000 CR4: 00000000000006e0 +[ 556.547809] ================================================================== +[ 556.549248] BUG: KASAN: stack-out-of-bounds in arch_tlb_gather_mmu+0x52/0x170 +[ 556.550672] Write of size 8 at addr ffff8801f292fd10 by task umount/1310 + +[ 556.552338] CPU: 1 PID: 1310 Comm: umount Tainted: G D 4.18.0-rc1+ #4 +[ 556.553886] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 +[ 556.555756] Call Trace: +[ 556.556264] dump_stack+0x7b/0xb5 +[ 556.556944] print_address_description+0x70/0x290 +[ 556.557903] kasan_report+0x291/0x390 +[ 556.558649] ? arch_tlb_gather_mmu+0x52/0x170 +[ 556.559537] __asan_store8+0x57/0x90 +[ 556.560268] arch_tlb_gather_mmu+0x52/0x170 +[ 556.561110] tlb_gather_mmu+0x12/0x40 +[ 556.561862] exit_mmap+0x123/0x2a0 +[ 556.562555] ? __ia32_sys_munmap+0x50/0x50 +[ 556.563384] ? exit_aio+0x98/0x230 +[ 556.564079] ? __x32_compat_sys_io_submit+0x260/0x260 +[ 556.565099] ? taskstats_exit+0x1f4/0x640 +[ 556.565925] ? kasan_check_read+0x11/0x20 +[ 556.566739] ? mm_update_next_owner+0x322/0x380 +[ 556.567652] mmput+0x8b/0x1d0 +[ 556.568260] do_exit+0x43a/0x1390 +[ 556.568937] ? mm_update_next_owner+0x380/0x380 +[ 556.569855] ? deactivate_super+0x5e/0x80 +[ 556.570668] ? cleanup_mnt+0x61/0xa0 +[ 556.571395] ? __cleanup_mnt+0x12/0x20 +[ 556.572156] ? task_work_run+0xc8/0xf0 +[ 556.572917] ? exit_to_usermode_loop+0x125/0x130 +[ 556.573861] rewind_stack_do_exit+0x17/0x20 +[ 556.574707] RIP: 0033:0x7f8028b77487 +[ 556.575428] Code: Bad RIP value. +[ 556.576106] RSP: 002b:00007fff9f2e3598 EFLAGS: 00000246 ORIG_RAX: 00000000000000a6 +[ 556.577599] RAX: 0000000000000000 RBX: 0000000000ebd030 RCX: 00007f8028b77487 +[ 556.579020] RDX: 0000000000000001 RSI: 0000000000000000 RDI: 0000000000ec41e0 +[ 556.580422] RBP: 0000000000ec41e0 R08: 0000000000000000 R09: 0000000000000014 +[ 556.581833] R10: 00000000000006b2 R11: 0000000000000246 R12: 00007f802908083c +[ 556.583252] R13: 0000000000000000 R14: 0000000000ebd210 R15: 00007fff9f2e3820 + +[ 556.584983] The buggy address belongs to the page: +[ 556.585961] page:ffffea0007ca4bc0 count:0 mapcount:0 mapping:0000000000000000 index:0x0 +[ 556.587540] flags: 0x2ffff0000000000() +[ 556.588296] raw: 02ffff0000000000 0000000000000000 dead000000000200 0000000000000000 +[ 556.589822] raw: 0000000000000000 0000000000000000 00000000ffffffff 0000000000000000 +[ 556.591359] page dumped because: kasan: bad access detected + +[ 556.592786] Memory state around the buggy address: +[ 556.593753] ffff8801f292fc00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +[ 556.595191] ffff8801f292fc80: 00 00 00 00 00 00 00 00 f1 f1 f1 f1 00 00 00 00 +[ 556.596613] >ffff8801f292fd00: 00 00 f3 00 00 00 00 f3 f3 00 00 00 00 f4 f4 f4 +[ 556.598044] ^ +[ 556.598797] ffff8801f292fd80: f3 f3 f3 f3 00 00 00 00 00 00 00 00 00 00 00 00 +[ 556.600225] ffff8801f292fe00: 00 00 00 00 00 00 00 00 f1 f1 f1 f1 00 f4 f4 f4 +[ 556.601647] ================================================================== + +- Location +https://elixir.bootlin.com/linux/v4.18-rc1/source/fs/f2fs/node.c#L987 + case NODE_DIND_BLOCK: + err = truncate_nodes(&dn, nofs, offset[1], 3); + cont = 0; + break; + + default: + BUG(); <--- + } + +Reported-by Wen Xu +Signed-off-by: Chao Yu +Signed-off-by: Jaegeuk Kim +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + fs/f2fs/inode.c | 25 +++++++++++++++++++++++-- + 1 file changed, 23 insertions(+), 2 deletions(-) + +--- a/fs/f2fs/inode.c ++++ b/fs/f2fs/inode.c +@@ -104,9 +104,30 @@ static void __recover_inline_status(stru + return; + } + +-static bool sanity_check_inode(struct inode *inode) ++static bool sanity_check_inode(struct inode *inode, struct page *node_page) + { + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); ++ unsigned long long iblocks; ++ ++ iblocks = le64_to_cpu(F2FS_INODE(node_page)->i_blocks); ++ if (!iblocks) { ++ set_sbi_flag(sbi, SBI_NEED_FSCK); ++ f2fs_msg(sbi->sb, KERN_WARNING, ++ "%s: corrupted inode i_blocks i_ino=%lx iblocks=%llu, " ++ "run fsck to fix.", ++ __func__, inode->i_ino, iblocks); ++ return false; ++ } ++ ++ if (ino_of_node(node_page) != nid_of_node(node_page)) { ++ set_sbi_flag(sbi, SBI_NEED_FSCK); ++ f2fs_msg(sbi->sb, KERN_WARNING, ++ "%s: corrupted inode footer i_ino=%lx, ino,nid: " ++ "[%u, %u] run fsck to fix.", ++ __func__, inode->i_ino, ++ ino_of_node(node_page), nid_of_node(node_page)); ++ return false; ++ } + + return true; + } +@@ -160,7 +181,7 @@ static int do_read_inode(struct inode *i + + get_inline_info(inode, ri); + +- if (!sanity_check_inode(inode)) { ++ if (!sanity_check_inode(inode, node_page)) { + f2fs_put_page(node_page, 1); + return -EINVAL; + } diff --git a/queue-4.9/f2fs-fix-to-do-sanity-check-with-secs_per_zone.patch b/queue-4.9/f2fs-fix-to-do-sanity-check-with-secs_per_zone.patch new file mode 100644 index 00000000000..266e048f489 --- /dev/null +++ b/queue-4.9/f2fs-fix-to-do-sanity-check-with-secs_per_zone.patch @@ -0,0 +1,98 @@ +From foo@baz Thu Dec 6 15:27:31 CET 2018 +From: Chao Yu +Date: Sat, 23 Jun 2018 00:12:36 +0800 +Subject: f2fs: fix to do sanity check with secs_per_zone + +From: Chao Yu + +commit 42bf546c1fe3f3654bdf914e977acbc2b80a5be5 upstream. + +As Wen Xu reported in below link: + +https://bugzilla.kernel.org/show_bug.cgi?id=200183 + +- Overview +Divide zero in reset_curseg() when mounting a crafted f2fs image + +- Reproduce + +- Kernel message +[ 588.281510] divide error: 0000 [#1] SMP KASAN PTI +[ 588.282701] CPU: 0 PID: 1293 Comm: mount Not tainted 4.18.0-rc1+ #4 +[ 588.284000] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 +[ 588.286178] RIP: 0010:reset_curseg+0x94/0x1a0 +[ 588.298166] RSP: 0018:ffff8801e88d7940 EFLAGS: 00010246 +[ 588.299360] RAX: 0000000000000014 RBX: ffff8801e1d46d00 RCX: ffffffffb88bf60b +[ 588.300809] RDX: 0000000000000000 RSI: dffffc0000000000 RDI: ffff8801e1d46d64 +[ 588.305272] R13: 0000000000000000 R14: 0000000000000014 R15: 0000000000000000 +[ 588.306822] FS: 00007fad85008840(0000) GS:ffff8801f6e00000(0000) knlGS:0000000000000000 +[ 588.308456] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +[ 588.309623] CR2: 0000000001705078 CR3: 00000001f30f8000 CR4: 00000000000006f0 +[ 588.311085] Call Trace: +[ 588.311637] f2fs_build_segment_manager+0x103f/0x3410 +[ 588.316136] ? f2fs_commit_super+0x1b0/0x1b0 +[ 588.317031] ? set_blocksize+0x90/0x140 +[ 588.319473] f2fs_mount+0x15/0x20 +[ 588.320166] mount_fs+0x60/0x1a0 +[ 588.320847] ? alloc_vfsmnt+0x309/0x360 +[ 588.321647] vfs_kern_mount+0x6b/0x1a0 +[ 588.322432] do_mount+0x34a/0x18c0 +[ 588.323175] ? strndup_user+0x46/0x70 +[ 588.323937] ? copy_mount_string+0x20/0x20 +[ 588.324793] ? memcg_kmem_put_cache+0x1b/0xa0 +[ 588.325702] ? kasan_check_write+0x14/0x20 +[ 588.326562] ? _copy_from_user+0x6a/0x90 +[ 588.327375] ? memdup_user+0x42/0x60 +[ 588.328118] ksys_mount+0x83/0xd0 +[ 588.328808] __x64_sys_mount+0x67/0x80 +[ 588.329607] do_syscall_64+0x78/0x170 +[ 588.330400] entry_SYSCALL_64_after_hwframe+0x44/0xa9 +[ 588.331461] RIP: 0033:0x7fad848e8b9a +[ 588.336022] RSP: 002b:00007ffd7c5b6be8 EFLAGS: 00000206 ORIG_RAX: 00000000000000a5 +[ 588.337547] RAX: ffffffffffffffda RBX: 00000000016f8030 RCX: 00007fad848e8b9a +[ 588.338999] RDX: 00000000016f8210 RSI: 00000000016f9f30 RDI: 0000000001700ec0 +[ 588.340442] RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000013 +[ 588.341887] R10: 00000000c0ed0000 R11: 0000000000000206 R12: 0000000001700ec0 +[ 588.343341] R13: 00000000016f8210 R14: 0000000000000000 R15: 0000000000000003 +[ 588.354891] ---[ end trace 4ce02f25ff7d3df5 ]--- +[ 588.355862] RIP: 0010:reset_curseg+0x94/0x1a0 +[ 588.360742] RSP: 0018:ffff8801e88d7940 EFLAGS: 00010246 +[ 588.361812] RAX: 0000000000000014 RBX: ffff8801e1d46d00 RCX: ffffffffb88bf60b +[ 588.363485] RDX: 0000000000000000 RSI: dffffc0000000000 RDI: ffff8801e1d46d64 +[ 588.365213] RBP: ffff8801e88d7968 R08: ffffed003c32266f R09: ffffed003c32266f +[ 588.366661] R10: 0000000000000001 R11: ffffed003c32266e R12: ffff8801f0337700 +[ 588.368110] R13: 0000000000000000 R14: 0000000000000014 R15: 0000000000000000 +[ 588.370057] FS: 00007fad85008840(0000) GS:ffff8801f6e00000(0000) knlGS:0000000000000000 +[ 588.372099] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +[ 588.373291] CR2: 0000000001705078 CR3: 00000001f30f8000 CR4: 00000000000006f0 + +- Location +https://elixir.bootlin.com/linux/latest/source/fs/f2fs/segment.c#L2147 + curseg->zone = GET_ZONE_FROM_SEG(sbi, curseg->segno); + +If secs_per_zone is corrupted due to fuzzing test, it will cause divide +zero operation when using GET_ZONE_FROM_SEG macro, so we should do more +sanity check with secs_per_zone during mount to avoid this issue. + +Signed-off-by: Chao Yu +Signed-off-by: Jaegeuk Kim +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + fs/f2fs/super.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/fs/f2fs/super.c ++++ b/fs/f2fs/super.c +@@ -1434,9 +1434,9 @@ static int sanity_check_raw_super(struct + return 1; + } + +- if (secs_per_zone > total_sections) { ++ if (secs_per_zone > total_sections || !secs_per_zone) { + f2fs_msg(sb, KERN_INFO, +- "Wrong secs_per_zone (%u > %u)", ++ "Wrong secs_per_zone / total_sections (%u, %u)", + secs_per_zone, total_sections); + return 1; + } diff --git a/queue-4.9/f2fs-fix-to-do-sanity-check-with-user_block_count.patch b/queue-4.9/f2fs-fix-to-do-sanity-check-with-user_block_count.patch new file mode 100644 index 00000000000..eb4e75083f2 --- /dev/null +++ b/queue-4.9/f2fs-fix-to-do-sanity-check-with-user_block_count.patch @@ -0,0 +1,148 @@ +From foo@baz Thu Dec 6 15:27:31 CET 2018 +From: Chao Yu +Date: Wed, 27 Jun 2018 18:05:54 +0800 +Subject: f2fs: fix to do sanity check with user_block_count + +From: Chao Yu + +commit 9dc956b2c8523aed39d1e6508438be9fea28c8fc upstream. + +This patch fixs to do sanity check with user_block_count. + +- Overview +Divide zero in utilization when mount() a corrupted f2fs image + +- Reproduce (4.18 upstream kernel) + +- Kernel message +[ 564.099503] F2FS-fs (loop0): invalid crc value +[ 564.101991] divide error: 0000 [#1] SMP KASAN PTI +[ 564.103103] CPU: 1 PID: 1298 Comm: f2fs_discard-7: Not tainted 4.18.0-rc1+ #4 +[ 564.104584] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 +[ 564.106624] RIP: 0010:issue_discard_thread+0x248/0x5c0 +[ 564.107692] Code: ff ff 48 8b bd e8 fe ff ff 41 8b 9d 4c 04 00 00 e8 cd b8 ad ff 41 8b 85 50 04 00 00 31 d2 48 8d 04 80 48 8d 04 80 48 c1 e0 02 <48> f7 f3 83 f8 50 7e 16 41 c7 86 7c ff ff ff 01 00 00 00 41 c7 86 +[ 564.111686] RSP: 0018:ffff8801f3117dc0 EFLAGS: 00010206 +[ 564.112775] RAX: 0000000000000384 RBX: 0000000000000000 RCX: ffffffffb88c1e03 +[ 564.114250] RDX: 0000000000000000 RSI: dffffc0000000000 RDI: ffff8801e3aa4850 +[ 564.115706] RBP: ffff8801f3117f00 R08: 1ffffffff751a1d0 R09: fffffbfff751a1d0 +[ 564.117177] R10: 0000000000000001 R11: fffffbfff751a1d0 R12: 00000000fffffffc +[ 564.118634] R13: ffff8801e3aa4400 R14: ffff8801f3117ed8 R15: ffff8801e2050000 +[ 564.120094] FS: 0000000000000000(0000) GS:ffff8801f6f00000(0000) knlGS:0000000000000000 +[ 564.121748] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +[ 564.122923] CR2: 000000000202b078 CR3: 00000001f11ac000 CR4: 00000000000006e0 +[ 564.124383] Call Trace: +[ 564.124924] ? __issue_discard_cmd+0x480/0x480 +[ 564.125882] ? __sched_text_start+0x8/0x8 +[ 564.126756] ? __kthread_parkme+0xcb/0x100 +[ 564.127620] ? kthread_blkcg+0x70/0x70 +[ 564.128412] kthread+0x180/0x1d0 +[ 564.129105] ? __issue_discard_cmd+0x480/0x480 +[ 564.130029] ? kthread_associate_blkcg+0x150/0x150 +[ 564.131033] ret_from_fork+0x35/0x40 +[ 564.131794] Modules linked in: snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hwdep snd_hda_core snd_pcm snd_timer snd mac_hid i2c_piix4 soundcore ib_iser rdma_cm iw_cm ib_cm ib_core iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx raid1 raid0 multipath linear 8139too crct10dif_pclmul crc32_pclmul qxl drm_kms_helper syscopyarea aesni_intel sysfillrect sysimgblt fb_sys_fops ttm drm aes_x86_64 crypto_simd cryptd 8139cp glue_helper mii pata_acpi floppy +[ 564.141798] ---[ end trace 4ce02f25ff7d3df5 ]--- +[ 564.142773] RIP: 0010:issue_discard_thread+0x248/0x5c0 +[ 564.143885] Code: ff ff 48 8b bd e8 fe ff ff 41 8b 9d 4c 04 00 00 e8 cd b8 ad ff 41 8b 85 50 04 00 00 31 d2 48 8d 04 80 48 8d 04 80 48 c1 e0 02 <48> f7 f3 83 f8 50 7e 16 41 c7 86 7c ff ff ff 01 00 00 00 41 c7 86 +[ 564.147776] RSP: 0018:ffff8801f3117dc0 EFLAGS: 00010206 +[ 564.148856] RAX: 0000000000000384 RBX: 0000000000000000 RCX: ffffffffb88c1e03 +[ 564.150424] RDX: 0000000000000000 RSI: dffffc0000000000 RDI: ffff8801e3aa4850 +[ 564.151906] RBP: ffff8801f3117f00 R08: 1ffffffff751a1d0 R09: fffffbfff751a1d0 +[ 564.153463] R10: 0000000000000001 R11: fffffbfff751a1d0 R12: 00000000fffffffc +[ 564.154915] R13: ffff8801e3aa4400 R14: ffff8801f3117ed8 R15: ffff8801e2050000 +[ 564.156405] FS: 0000000000000000(0000) GS:ffff8801f6f00000(0000) knlGS:0000000000000000 +[ 564.158070] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +[ 564.159279] CR2: 000000000202b078 CR3: 00000001f11ac000 CR4: 00000000000006e0 +[ 564.161043] ================================================================== +[ 564.162587] BUG: KASAN: stack-out-of-bounds in from_kuid_munged+0x1d/0x50 +[ 564.163994] Read of size 4 at addr ffff8801f3117c84 by task f2fs_discard-7:/1298 + +[ 564.165852] CPU: 1 PID: 1298 Comm: f2fs_discard-7: Tainted: G D 4.18.0-rc1+ #4 +[ 564.167593] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 +[ 564.169522] Call Trace: +[ 564.170057] dump_stack+0x7b/0xb5 +[ 564.170778] print_address_description+0x70/0x290 +[ 564.171765] kasan_report+0x291/0x390 +[ 564.172540] ? from_kuid_munged+0x1d/0x50 +[ 564.173408] __asan_load4+0x78/0x80 +[ 564.174148] from_kuid_munged+0x1d/0x50 +[ 564.174962] do_notify_parent+0x1f5/0x4f0 +[ 564.175808] ? send_sigqueue+0x390/0x390 +[ 564.176639] ? css_set_move_task+0x152/0x340 +[ 564.184197] do_exit+0x1290/0x1390 +[ 564.184950] ? __issue_discard_cmd+0x480/0x480 +[ 564.185884] ? mm_update_next_owner+0x380/0x380 +[ 564.186829] ? __sched_text_start+0x8/0x8 +[ 564.187672] ? __kthread_parkme+0xcb/0x100 +[ 564.188528] ? kthread_blkcg+0x70/0x70 +[ 564.189333] ? kthread+0x180/0x1d0 +[ 564.190052] ? __issue_discard_cmd+0x480/0x480 +[ 564.190983] rewind_stack_do_exit+0x17/0x20 + +[ 564.192190] The buggy address belongs to the page: +[ 564.193213] page:ffffea0007cc45c0 count:0 mapcount:0 mapping:0000000000000000 index:0x0 +[ 564.194856] flags: 0x2ffff0000000000() +[ 564.195644] raw: 02ffff0000000000 0000000000000000 dead000000000200 0000000000000000 +[ 564.197247] raw: 0000000000000000 0000000000000000 00000000ffffffff 0000000000000000 +[ 564.198826] page dumped because: kasan: bad access detected + +[ 564.200299] Memory state around the buggy address: +[ 564.201306] ffff8801f3117b80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +[ 564.202779] ffff8801f3117c00: 00 00 00 00 00 00 00 00 00 00 00 f3 f3 f3 f3 f3 +[ 564.204252] >ffff8801f3117c80: f3 f3 f3 00 00 00 00 00 00 00 00 00 f1 f1 f1 f1 +[ 564.205742] ^ +[ 564.206424] ffff8801f3117d00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +[ 564.207908] ffff8801f3117d80: f3 f3 f3 f3 f3 f3 f3 f3 00 00 00 00 00 00 00 00 +[ 564.209389] ================================================================== +[ 564.231795] F2FS-fs (loop0): Mounted with checkpoint version = 2 + +- Location +https://elixir.bootlin.com/linux/v4.18-rc1/source/fs/f2fs/segment.h#L586 + return div_u64((u64)valid_user_blocks(sbi) * 100, + sbi->user_block_count); +Missing checks on sbi->user_block_count. + +Reported-by: Wen Xu +Signed-off-by: Chao Yu +Signed-off-by: Jaegeuk Kim +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + fs/f2fs/super.c | 13 ++++++++++++- + 1 file changed, 12 insertions(+), 1 deletion(-) + +--- a/fs/f2fs/super.c ++++ b/fs/f2fs/super.c +@@ -1486,6 +1486,8 @@ int sanity_check_ckpt(struct f2fs_sb_inf + unsigned int sit_segs, nat_segs; + unsigned int sit_bitmap_size, nat_bitmap_size; + unsigned int log_blocks_per_seg; ++ unsigned int segment_count_main; ++ block_t user_block_count; + int i; + + total = le32_to_cpu(raw_super->segment_count); +@@ -1510,6 +1512,16 @@ int sanity_check_ckpt(struct f2fs_sb_inf + return 1; + } + ++ user_block_count = le64_to_cpu(ckpt->user_block_count); ++ segment_count_main = le32_to_cpu(raw_super->segment_count_main); ++ log_blocks_per_seg = le32_to_cpu(raw_super->log_blocks_per_seg); ++ if (!user_block_count || user_block_count >= ++ segment_count_main << log_blocks_per_seg) { ++ f2fs_msg(sbi->sb, KERN_ERR, ++ "Wrong user_block_count: %u", user_block_count); ++ return 1; ++ } ++ + main_segs = le32_to_cpu(raw_super->segment_count_main); + blocks_per_seg = sbi->blocks_per_seg; + +@@ -1526,7 +1538,6 @@ int sanity_check_ckpt(struct f2fs_sb_inf + + sit_bitmap_size = le32_to_cpu(ckpt->sit_ver_bitmap_bytesize); + nat_bitmap_size = le32_to_cpu(ckpt->nat_ver_bitmap_bytesize); +- log_blocks_per_seg = le32_to_cpu(raw_super->log_blocks_per_seg); + + if (sit_bitmap_size != ((sit_segs / 2) << log_blocks_per_seg) / 8 || + nat_bitmap_size != ((nat_segs / 2) << log_blocks_per_seg) / 8) { diff --git a/queue-4.9/f2fs-free-meta-pages-if-sanity-check-for-ckpt-is-failed.patch b/queue-4.9/f2fs-free-meta-pages-if-sanity-check-for-ckpt-is-failed.patch new file mode 100644 index 00000000000..c6c97febb09 --- /dev/null +++ b/queue-4.9/f2fs-free-meta-pages-if-sanity-check-for-ckpt-is-failed.patch @@ -0,0 +1,40 @@ +From foo@baz Thu Dec 6 15:27:31 CET 2018 +From: Jaegeuk Kim +Date: Mon, 5 Dec 2016 17:25:32 -0800 +Subject: f2fs: free meta pages if sanity check for ckpt is failed + +From: Jaegeuk Kim + +commit a2125ff7dd1ed3a2a53cdc1f8f9c9cec9cfaa7ab upstream. + +This fixes missing freeing meta pages in the error case. + +Tested-by: Eric Biggers +Signed-off-by: Jaegeuk Kim +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + fs/f2fs/checkpoint.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +--- a/fs/f2fs/checkpoint.c ++++ b/fs/f2fs/checkpoint.c +@@ -796,7 +796,7 @@ int get_valid_checkpoint(struct f2fs_sb_ + + /* Sanity checking of checkpoint */ + if (sanity_check_ckpt(sbi)) +- goto fail_no_cp; ++ goto free_fail_no_cp; + + if (cur_page == cp1) + sbi->cur_cp_pack = 1; +@@ -824,6 +824,9 @@ done: + f2fs_put_page(cp2, 1); + return 0; + ++free_fail_no_cp: ++ f2fs_put_page(cp1, 1); ++ f2fs_put_page(cp2, 1); + fail_no_cp: + kfree(sbi->ckpt); + return -EINVAL; diff --git a/queue-4.9/f2fs-introduce-and-spread-verify_blkaddr.patch b/queue-4.9/f2fs-introduce-and-spread-verify_blkaddr.patch new file mode 100644 index 00000000000..8f3aa17c2ad --- /dev/null +++ b/queue-4.9/f2fs-introduce-and-spread-verify_blkaddr.patch @@ -0,0 +1,319 @@ +From foo@baz Thu Dec 6 15:27:31 CET 2018 +From: Chao Yu +Date: Tue, 5 Jun 2018 17:44:11 +0800 +Subject: f2fs: introduce and spread verify_blkaddr + +From: Chao Yu + +commit e1da7872f6eda977bd812346bf588c35e4495a1e upstream. + +This patch introduces verify_blkaddr to check meta/data block address +with valid range to detect bug earlier. + +In addition, once we encounter an invalid blkaddr, notice user to run +fsck to fix, and let the kernel panic. + +Signed-off-by: Chao Yu +Signed-off-by: Jaegeuk Kim +[bwh: Backported to 4.9: + - I skipped an earlier renaming of is_valid_meta_blkaddr() to + f2fs_is_valid_meta_blkaddr() + - Adjust context] +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + fs/f2fs/checkpoint.c | 11 +++++++++-- + fs/f2fs/data.c | 6 +++--- + fs/f2fs/f2fs.h | 32 +++++++++++++++++++++++++++++--- + fs/f2fs/file.c | 9 +++++---- + fs/f2fs/inode.c | 7 ++++--- + fs/f2fs/node.c | 4 ++-- + fs/f2fs/recovery.c | 6 +++--- + fs/f2fs/segment.c | 4 ++-- + fs/f2fs/segment.h | 8 +++----- + 9 files changed, 60 insertions(+), 27 deletions(-) + +--- a/fs/f2fs/checkpoint.c ++++ b/fs/f2fs/checkpoint.c +@@ -118,7 +118,8 @@ struct page *get_tmp_page(struct f2fs_sb + return __get_meta_page(sbi, index, false); + } + +-bool is_valid_meta_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr, int type) ++bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi, ++ block_t blkaddr, int type) + { + switch (type) { + case META_NAT: +@@ -138,10 +139,16 @@ bool is_valid_meta_blkaddr(struct f2fs_s + return false; + break; + case META_POR: ++ case DATA_GENERIC: + if (unlikely(blkaddr >= MAX_BLKADDR(sbi) || + blkaddr < MAIN_BLKADDR(sbi))) + return false; + break; ++ case META_GENERIC: ++ if (unlikely(blkaddr < SEG0_BLKADDR(sbi) || ++ blkaddr >= MAIN_BLKADDR(sbi))) ++ return false; ++ break; + default: + BUG(); + } +@@ -173,7 +180,7 @@ int ra_meta_pages(struct f2fs_sb_info *s + blk_start_plug(&plug); + for (; nrpages-- > 0; blkno++) { + +- if (!is_valid_meta_blkaddr(sbi, blkno, type)) ++ if (!f2fs_is_valid_blkaddr(sbi, blkno, type)) + goto out; + + switch (type) { +--- a/fs/f2fs/data.c ++++ b/fs/f2fs/data.c +@@ -267,7 +267,7 @@ void f2fs_submit_page_mbio(struct f2fs_i + + io = is_read ? &sbi->read_io : &sbi->write_io[btype]; + +- if (is_valid_blkaddr(fio->old_blkaddr)) ++ if (__is_valid_data_blkaddr(fio->old_blkaddr)) + verify_block_addr(fio, fio->old_blkaddr); + verify_block_addr(fio, fio->new_blkaddr); + +@@ -723,7 +723,7 @@ next_dnode: + next_block: + blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node); + +- if (!is_valid_blkaddr(blkaddr)) { ++ if (!is_valid_data_blkaddr(sbi, blkaddr)) { + if (create) { + if (unlikely(f2fs_cp_error(sbi))) { + err = -EIO; +@@ -1217,7 +1217,7 @@ retry_encrypt: + * If current allocation needs SSR, + * it had better in-place writes for updated data. + */ +- if (unlikely(is_valid_blkaddr(fio->old_blkaddr) && ++ if (unlikely(is_valid_data_blkaddr(fio->sbi, fio->old_blkaddr) && + !is_cold_data(page) && + !IS_ATOMIC_WRITTEN_PAGE(page) && + need_inplace_update(inode))) { +--- a/fs/f2fs/f2fs.h ++++ b/fs/f2fs/f2fs.h +@@ -145,7 +145,7 @@ struct cp_control { + }; + + /* +- * For CP/NAT/SIT/SSA readahead ++ * indicate meta/data type + */ + enum { + META_CP, +@@ -153,6 +153,8 @@ enum { + META_SIT, + META_SSA, + META_POR, ++ DATA_GENERIC, ++ META_GENERIC, + }; + + /* for the list of ino */ +@@ -1930,13 +1932,36 @@ static inline void *f2fs_kvzalloc(size_t + (pgofs - ADDRS_PER_INODE(inode) + ADDRS_PER_BLOCK) / \ + ADDRS_PER_BLOCK * ADDRS_PER_BLOCK + ADDRS_PER_INODE(inode)) + +-static inline bool is_valid_blkaddr(block_t blkaddr) ++bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi, ++ block_t blkaddr, int type); ++void f2fs_msg(struct super_block *sb, const char *level, const char *fmt, ...); ++static inline void verify_blkaddr(struct f2fs_sb_info *sbi, ++ block_t blkaddr, int type) ++{ ++ if (!f2fs_is_valid_blkaddr(sbi, blkaddr, type)) { ++ f2fs_msg(sbi->sb, KERN_ERR, ++ "invalid blkaddr: %u, type: %d, run fsck to fix.", ++ blkaddr, type); ++ f2fs_bug_on(sbi, 1); ++ } ++} ++ ++static inline bool __is_valid_data_blkaddr(block_t blkaddr) + { + if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR) + return false; + return true; + } + ++static inline bool is_valid_data_blkaddr(struct f2fs_sb_info *sbi, ++ block_t blkaddr) ++{ ++ if (!__is_valid_data_blkaddr(blkaddr)) ++ return false; ++ verify_blkaddr(sbi, blkaddr, DATA_GENERIC); ++ return true; ++} ++ + /* + * file.c + */ +@@ -2122,7 +2147,8 @@ void f2fs_stop_checkpoint(struct f2fs_sb + struct page *grab_meta_page(struct f2fs_sb_info *, pgoff_t); + struct page *get_meta_page(struct f2fs_sb_info *, pgoff_t); + struct page *get_tmp_page(struct f2fs_sb_info *, pgoff_t); +-bool is_valid_meta_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr, int type); ++bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi, ++ block_t blkaddr, int type); + int ra_meta_pages(struct f2fs_sb_info *, block_t, int, int, bool); + void ra_meta_pages_cond(struct f2fs_sb_info *, pgoff_t); + long sync_meta_pages(struct f2fs_sb_info *, enum page_type, long); +--- a/fs/f2fs/file.c ++++ b/fs/f2fs/file.c +@@ -310,13 +310,13 @@ static pgoff_t __get_first_dirty_index(s + return pgofs; + } + +-static bool __found_offset(block_t blkaddr, pgoff_t dirty, pgoff_t pgofs, +- int whence) ++static bool __found_offset(struct f2fs_sb_info *sbi, block_t blkaddr, ++ pgoff_t dirty, pgoff_t pgofs, int whence) + { + switch (whence) { + case SEEK_DATA: + if ((blkaddr == NEW_ADDR && dirty == pgofs) || +- is_valid_blkaddr(blkaddr)) ++ is_valid_data_blkaddr(sbi, blkaddr)) + return true; + break; + case SEEK_HOLE: +@@ -378,7 +378,8 @@ static loff_t f2fs_seek_block(struct fil + block_t blkaddr; + blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node); + +- if (__found_offset(blkaddr, dirty, pgofs, whence)) { ++ if (__found_offset(F2FS_I_SB(inode), blkaddr, dirty, ++ pgofs, whence)) { + f2fs_put_dnode(&dn); + goto found; + } +--- a/fs/f2fs/inode.c ++++ b/fs/f2fs/inode.c +@@ -59,11 +59,12 @@ static void __get_inode_rdev(struct inod + } + } + +-static bool __written_first_block(struct f2fs_inode *ri) ++static bool __written_first_block(struct f2fs_sb_info *sbi, ++ struct f2fs_inode *ri) + { + block_t addr = le32_to_cpu(ri->i_addr[0]); + +- if (is_valid_blkaddr(addr)) ++ if (is_valid_data_blkaddr(sbi, addr)) + return true; + return false; + } +@@ -159,7 +160,7 @@ static int do_read_inode(struct inode *i + /* get rdev by using inline_info */ + __get_inode_rdev(inode, ri); + +- if (__written_first_block(ri)) ++ if (__written_first_block(sbi, ri)) + set_inode_flag(inode, FI_FIRST_BLOCK_WRITTEN); + + if (!need_inode_block_update(sbi, inode->i_ino)) +--- a/fs/f2fs/node.c ++++ b/fs/f2fs/node.c +@@ -304,7 +304,7 @@ static void set_node_addr(struct f2fs_sb + new_blkaddr == NULL_ADDR); + f2fs_bug_on(sbi, nat_get_blkaddr(e) == NEW_ADDR && + new_blkaddr == NEW_ADDR); +- f2fs_bug_on(sbi, is_valid_blkaddr(nat_get_blkaddr(e)) && ++ f2fs_bug_on(sbi, is_valid_data_blkaddr(sbi, nat_get_blkaddr(e)) && + new_blkaddr == NEW_ADDR); + + /* increment version no as node is removed */ +@@ -319,7 +319,7 @@ static void set_node_addr(struct f2fs_sb + + /* change address */ + nat_set_blkaddr(e, new_blkaddr); +- if (!is_valid_blkaddr(new_blkaddr)) ++ if (!is_valid_data_blkaddr(sbi, new_blkaddr)) + set_nat_flag(e, IS_CHECKPOINTED, false); + __set_nat_cache_dirty(nm_i, e); + +--- a/fs/f2fs/recovery.c ++++ b/fs/f2fs/recovery.c +@@ -236,7 +236,7 @@ static int find_fsync_dnodes(struct f2fs + while (1) { + struct fsync_inode_entry *entry; + +- if (!is_valid_meta_blkaddr(sbi, blkaddr, META_POR)) ++ if (!f2fs_is_valid_blkaddr(sbi, blkaddr, META_POR)) + return 0; + + page = get_tmp_page(sbi, blkaddr); +@@ -468,7 +468,7 @@ retry_dn: + } + + /* dest is valid block, try to recover from src to dest */ +- if (is_valid_meta_blkaddr(sbi, dest, META_POR)) { ++ if (f2fs_is_valid_blkaddr(sbi, dest, META_POR)) { + + if (src == NULL_ADDR) { + err = reserve_new_block(&dn); +@@ -527,7 +527,7 @@ static int recover_data(struct f2fs_sb_i + while (1) { + struct fsync_inode_entry *entry; + +- if (!is_valid_meta_blkaddr(sbi, blkaddr, META_POR)) ++ if (!f2fs_is_valid_blkaddr(sbi, blkaddr, META_POR)) + break; + + ra_meta_pages_cond(sbi, blkaddr); +--- a/fs/f2fs/segment.c ++++ b/fs/f2fs/segment.c +@@ -944,7 +944,7 @@ bool is_checkpointed_data(struct f2fs_sb + struct seg_entry *se; + bool is_cp = false; + +- if (!is_valid_blkaddr(blkaddr)) ++ if (!is_valid_data_blkaddr(sbi, blkaddr)) + return true; + + mutex_lock(&sit_i->sentry_lock); +@@ -1668,7 +1668,7 @@ void f2fs_wait_on_encrypted_page_writeba + { + struct page *cpage; + +- if (!is_valid_blkaddr(blkaddr)) ++ if (!is_valid_data_blkaddr(sbi, blkaddr)) + return; + + cpage = find_lock_page(META_MAPPING(sbi), blkaddr); +--- a/fs/f2fs/segment.h ++++ b/fs/f2fs/segment.h +@@ -81,7 +81,7 @@ + (GET_SEGOFF_FROM_SEG0(sbi, blk_addr) & (sbi->blocks_per_seg - 1)) + + #define GET_SEGNO(sbi, blk_addr) \ +- ((!is_valid_blkaddr(blk_addr)) ? \ ++ ((!is_valid_data_blkaddr(sbi, blk_addr)) ? \ + NULL_SEGNO : GET_L2R_SEGNO(FREE_I(sbi), \ + GET_SEGNO_FROM_SEG0(sbi, blk_addr))) + #define GET_SECNO(sbi, segno) \ +@@ -603,11 +603,9 @@ static inline void verify_block_addr(str + + if (PAGE_TYPE_OF_BIO(fio->type) == META && + (!is_read_io(fio->op) || fio->is_meta)) +- BUG_ON(blk_addr < SEG0_BLKADDR(sbi) || +- blk_addr >= MAIN_BLKADDR(sbi)); ++ verify_blkaddr(sbi, blk_addr, META_GENERIC); + else +- BUG_ON(blk_addr < MAIN_BLKADDR(sbi) || +- blk_addr >= MAX_BLKADDR(sbi)); ++ verify_blkaddr(sbi, blk_addr, DATA_GENERIC); + } + + /* diff --git a/queue-4.9/f2fs-return-error-during-fill_super.patch b/queue-4.9/f2fs-return-error-during-fill_super.patch new file mode 100644 index 00000000000..f5d90f5276c --- /dev/null +++ b/queue-4.9/f2fs-return-error-during-fill_super.patch @@ -0,0 +1,123 @@ +From foo@baz Thu Dec 6 15:27:31 CET 2018 +From: Jaegeuk Kim +Date: Tue, 19 Dec 2017 19:16:34 -0800 +Subject: f2fs: return error during fill_super + +From: Jaegeuk Kim + +commit c39a1b348c4fe172729eff77c533dabc3c7cdaa7 upstream. + +Let's avoid BUG_ON during fill_super, when on-disk was totall corrupted. + +Reviewed-by: Chao Yu +Signed-off-by: Jaegeuk Kim +Signed-off-by: Sudip Mukherjee +Signed-off-by: Greg Kroah-Hartman +[bwh: Backported to 4.9: adjust context] +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + fs/f2fs/segment.c | 16 ++++++++++++---- + fs/f2fs/segment.h | 22 ++++++++++++++++++---- + 2 files changed, 30 insertions(+), 8 deletions(-) + +--- a/fs/f2fs/segment.c ++++ b/fs/f2fs/segment.c +@@ -2322,7 +2322,7 @@ static int build_curseg(struct f2fs_sb_i + return restore_curseg_summaries(sbi); + } + +-static void build_sit_entries(struct f2fs_sb_info *sbi) ++static int build_sit_entries(struct f2fs_sb_info *sbi) + { + struct sit_info *sit_i = SIT_I(sbi); + struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA); +@@ -2333,6 +2333,7 @@ static void build_sit_entries(struct f2f + unsigned int i, start, end; + unsigned int readed, start_blk = 0; + int nrpages = MAX_BIO_BLOCKS(sbi) * 8; ++ int err = 0; + + do { + readed = ra_meta_pages(sbi, start_blk, nrpages, META_SIT, true); +@@ -2350,7 +2351,9 @@ static void build_sit_entries(struct f2f + sit = sit_blk->entries[SIT_ENTRY_OFFSET(sit_i, start)]; + f2fs_put_page(page, 1); + +- check_block_count(sbi, start, &sit); ++ err = check_block_count(sbi, start, &sit); ++ if (err) ++ return err; + seg_info_from_raw_sit(se, &sit); + + /* build discard map only one time */ +@@ -2378,7 +2381,9 @@ static void build_sit_entries(struct f2f + + old_valid_blocks = se->valid_blocks; + +- check_block_count(sbi, start, &sit); ++ err = check_block_count(sbi, start, &sit); ++ if (err) ++ break; + seg_info_from_raw_sit(se, &sit); + + if (f2fs_discard_en(sbi)) { +@@ -2393,6 +2398,7 @@ static void build_sit_entries(struct f2f + se->valid_blocks - old_valid_blocks; + } + up_read(&curseg->journal_rwsem); ++ return err; + } + + static void init_free_segmap(struct f2fs_sb_info *sbi) +@@ -2559,7 +2565,9 @@ int build_segment_manager(struct f2fs_sb + return err; + + /* reinit free segmap based on SIT */ +- build_sit_entries(sbi); ++ err = build_sit_entries(sbi); ++ if (err) ++ return err; + + init_free_segmap(sbi); + err = build_dirty_segmap(sbi); +--- a/fs/f2fs/segment.h ++++ b/fs/f2fs/segment.h +@@ -600,7 +600,7 @@ static inline void verify_block_addr(str + /* + * Summary block is always treated as an invalid block + */ +-static inline void check_block_count(struct f2fs_sb_info *sbi, ++static inline int check_block_count(struct f2fs_sb_info *sbi, + int segno, struct f2fs_sit_entry *raw_sit) + { + #ifdef CONFIG_F2FS_CHECK_FS +@@ -622,11 +622,25 @@ static inline void check_block_count(str + cur_pos = next_pos; + is_valid = !is_valid; + } while (cur_pos < sbi->blocks_per_seg); +- BUG_ON(GET_SIT_VBLOCKS(raw_sit) != valid_blocks); ++ ++ if (unlikely(GET_SIT_VBLOCKS(raw_sit) != valid_blocks)) { ++ f2fs_msg(sbi->sb, KERN_ERR, ++ "Mismatch valid blocks %d vs. %d", ++ GET_SIT_VBLOCKS(raw_sit), valid_blocks); ++ set_sbi_flag(sbi, SBI_NEED_FSCK); ++ return -EINVAL; ++ } + #endif + /* check segment usage, and check boundary of a given segment number */ +- f2fs_bug_on(sbi, GET_SIT_VBLOCKS(raw_sit) > sbi->blocks_per_seg +- || segno > TOTAL_SEGS(sbi) - 1); ++ if (unlikely(GET_SIT_VBLOCKS(raw_sit) > sbi->blocks_per_seg ++ || segno > TOTAL_SEGS(sbi) - 1)) { ++ f2fs_msg(sbi->sb, KERN_ERR, ++ "Wrong valid blocks %d or segno %u", ++ GET_SIT_VBLOCKS(raw_sit), segno); ++ set_sbi_flag(sbi, SBI_NEED_FSCK); ++ return -EINVAL; ++ } ++ return 0; + } + + static inline pgoff_t current_sit_addr(struct f2fs_sb_info *sbi, diff --git a/queue-4.9/f2fs-sanity-check-on-sit-entry.patch b/queue-4.9/f2fs-sanity-check-on-sit-entry.patch new file mode 100644 index 00000000000..9821eaf4029 --- /dev/null +++ b/queue-4.9/f2fs-sanity-check-on-sit-entry.patch @@ -0,0 +1,103 @@ +From foo@baz Thu Dec 6 15:27:31 CET 2018 +From: Jaegeuk Kim +Date: Tue, 24 Apr 2018 15:44:16 -0600 +Subject: f2fs: sanity check on sit entry + +From: Jaegeuk Kim + +commit b2ca374f33bd33fd822eb871876e4888cf79dc97 upstream. + +syzbot hit the following crash on upstream commit +87ef12027b9b1dd0e0b12cf311fbcb19f9d92539 (Wed Apr 18 19:48:17 2018 +0000) +Merge tag 'ceph-for-4.17-rc2' of git://github.com/ceph/ceph-client +syzbot dashboard link: https://syzkaller.appspot.com/bug?extid=83699adeb2d13579c31e + +C reproducer: https://syzkaller.appspot.com/x/repro.c?id=5805208181407744 +syzkaller reproducer: https://syzkaller.appspot.com/x/repro.syz?id=6005073343676416 +Raw console output: https://syzkaller.appspot.com/x/log.txt?id=6555047731134464 +Kernel config: https://syzkaller.appspot.com/x/.config?id=1808800213120130118 +compiler: gcc (GCC) 8.0.1 20180413 (experimental) + +IMPORTANT: if you fix the bug, please add the following tag to the commit: +Reported-by: syzbot+83699adeb2d13579c31e@syzkaller.appspotmail.com +It will help syzbot understand when the bug is fixed. See footer for details. +If you forward the report, please keep this part and the footer. + +F2FS-fs (loop0): Magic Mismatch, valid(0xf2f52010) - read(0x0) +F2FS-fs (loop0): Can't find valid F2FS filesystem in 1th superblock +F2FS-fs (loop0): invalid crc value +BUG: unable to handle kernel paging request at ffffed006b2a50c0 +PGD 21ffee067 P4D 21ffee067 PUD 21fbeb067 PMD 0 +Oops: 0000 [#1] SMP KASAN +Dumping ftrace buffer: + (ftrace buffer empty) +Modules linked in: +CPU: 0 PID: 4514 Comm: syzkaller989480 Not tainted 4.17.0-rc1+ #8 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 +RIP: 0010:build_sit_entries fs/f2fs/segment.c:3653 [inline] +RIP: 0010:build_segment_manager+0x7ef7/0xbf70 fs/f2fs/segment.c:3852 +RSP: 0018:ffff8801b102e5b0 EFLAGS: 00010a06 +RAX: 1ffff1006b2a50c0 RBX: 0000000000000004 RCX: 0000000000000001 +RDX: 0000000000000000 RSI: 0000000000000001 RDI: ffff8801ac74243e +RBP: ffff8801b102f410 R08: ffff8801acbd46c0 R09: fffffbfff14d9af8 +R10: fffffbfff14d9af8 R11: ffff8801acbd46c0 R12: ffff8801ac742a80 +R13: ffff8801d9519100 R14: dffffc0000000000 R15: ffff880359528600 +FS: 0000000001e04880(0000) GS:ffff8801dae00000(0000) knlGS:0000000000000000 +CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +CR2: ffffed006b2a50c0 CR3: 00000001ac6ac000 CR4: 00000000001406f0 +DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 +DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 +Call Trace: + f2fs_fill_super+0x4095/0x7bf0 fs/f2fs/super.c:2803 + mount_bdev+0x30c/0x3e0 fs/super.c:1165 + f2fs_mount+0x34/0x40 fs/f2fs/super.c:3020 + mount_fs+0xae/0x328 fs/super.c:1268 + vfs_kern_mount.part.34+0xd4/0x4d0 fs/namespace.c:1037 + vfs_kern_mount fs/namespace.c:1027 [inline] + do_new_mount fs/namespace.c:2517 [inline] + do_mount+0x564/0x3070 fs/namespace.c:2847 + ksys_mount+0x12d/0x140 fs/namespace.c:3063 + __do_sys_mount fs/namespace.c:3077 [inline] + __se_sys_mount fs/namespace.c:3074 [inline] + __x64_sys_mount+0xbe/0x150 fs/namespace.c:3074 + do_syscall_64+0x1b1/0x800 arch/x86/entry/common.c:287 + entry_SYSCALL_64_after_hwframe+0x49/0xbe +RIP: 0033:0x443d6a +RSP: 002b:00007ffd312813c8 EFLAGS: 00000297 ORIG_RAX: 00000000000000a5 +RAX: ffffffffffffffda RBX: 0000000020000c00 RCX: 0000000000443d6a +RDX: 0000000020000000 RSI: 0000000020000100 RDI: 00007ffd312813d0 +RBP: 0000000000000003 R08: 0000000020016a00 R09: 000000000000000a +R10: 0000000000000000 R11: 0000000000000297 R12: 0000000000000004 +R13: 0000000000402c60 R14: 0000000000000000 R15: 0000000000000000 +RIP: build_sit_entries fs/f2fs/segment.c:3653 [inline] RSP: ffff8801b102e5b0 +RIP: build_segment_manager+0x7ef7/0xbf70 fs/f2fs/segment.c:3852 RSP: ffff8801b102e5b0 +CR2: ffffed006b2a50c0 +---[ end trace a2034989e196ff17 ]--- + +Reported-and-tested-by: syzbot+83699adeb2d13579c31e@syzkaller.appspotmail.com +Reviewed-by: Chao Yu +Signed-off-by: Jaegeuk Kim +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + fs/f2fs/segment.c | 9 +++++++++ + 1 file changed, 9 insertions(+) + +--- a/fs/f2fs/segment.c ++++ b/fs/f2fs/segment.c +@@ -2376,6 +2376,15 @@ static int build_sit_entries(struct f2fs + unsigned int old_valid_blocks; + + start = le32_to_cpu(segno_in_journal(journal, i)); ++ if (start >= MAIN_SEGS(sbi)) { ++ f2fs_msg(sbi->sb, KERN_ERR, ++ "Wrong journal entry on segno %u", ++ start); ++ set_sbi_flag(sbi, SBI_NEED_FSCK); ++ err = -EINVAL; ++ break; ++ } ++ + se = &sit_i->sentries[start]; + sit = sit_in_journal(journal, i); + diff --git a/queue-4.9/hugetlbfs-check-for-pgoff-value-overflow.patch b/queue-4.9/hugetlbfs-check-for-pgoff-value-overflow.patch new file mode 100644 index 00000000000..c7e31c41a57 --- /dev/null +++ b/queue-4.9/hugetlbfs-check-for-pgoff-value-overflow.patch @@ -0,0 +1,111 @@ +From foo@baz Thu Dec 6 15:27:31 CET 2018 +From: Mike Kravetz +Date: Thu, 22 Mar 2018 16:17:13 -0700 +Subject: hugetlbfs: check for pgoff value overflow + +From: Mike Kravetz + +commit 63489f8e821144000e0bdca7e65a8d1cc23a7ee7 upstream. + +A vma with vm_pgoff large enough to overflow a loff_t type when +converted to a byte offset can be passed via the remap_file_pages system +call. The hugetlbfs mmap routine uses the byte offset to calculate +reservations and file size. + +A sequence such as: + + mmap(0x20a00000, 0x600000, 0, 0x66033, -1, 0); + remap_file_pages(0x20a00000, 0x600000, 0, 0x20000000000000, 0); + +will result in the following when task exits/file closed, + + kernel BUG at mm/hugetlb.c:749! + Call Trace: + hugetlbfs_evict_inode+0x2f/0x40 + evict+0xcb/0x190 + __dentry_kill+0xcb/0x150 + __fput+0x164/0x1e0 + task_work_run+0x84/0xa0 + exit_to_usermode_loop+0x7d/0x80 + do_syscall_64+0x18b/0x190 + entry_SYSCALL_64_after_hwframe+0x3d/0xa2 + +The overflowed pgoff value causes hugetlbfs to try to set up a mapping +with a negative range (end < start) that leaves invalid state which +causes the BUG. + +The previous overflow fix to this code was incomplete and did not take +the remap_file_pages system call into account. + +[mike.kravetz@oracle.com: v3] + Link: http://lkml.kernel.org/r/20180309002726.7248-1-mike.kravetz@oracle.com +[akpm@linux-foundation.org: include mmdebug.h] +[akpm@linux-foundation.org: fix -ve left shift count on sh] +Link: http://lkml.kernel.org/r/20180308210502.15952-1-mike.kravetz@oracle.com +Fixes: 045c7a3f53d9 ("hugetlbfs: fix offset overflow in hugetlbfs mmap") +Signed-off-by: Mike Kravetz +Reported-by: Nic Losby +Acked-by: Michal Hocko +Cc: "Kirill A . Shutemov" +Cc: Yisheng Xie +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + fs/hugetlbfs/inode.c | 17 ++++++++++++++--- + mm/hugetlb.c | 6 ++++++ + 2 files changed, 20 insertions(+), 3 deletions(-) + +--- a/fs/hugetlbfs/inode.c ++++ b/fs/hugetlbfs/inode.c +@@ -118,6 +118,16 @@ static void huge_pagevec_release(struct + pagevec_reinit(pvec); + } + ++/* ++ * Mask used when checking the page offset value passed in via system ++ * calls. This value will be converted to a loff_t which is signed. ++ * Therefore, we want to check the upper PAGE_SHIFT + 1 bits of the ++ * value. The extra bit (- 1 in the shift value) is to take the sign ++ * bit into account. ++ */ ++#define PGOFF_LOFFT_MAX \ ++ (((1UL << (PAGE_SHIFT + 1)) - 1) << (BITS_PER_LONG - (PAGE_SHIFT + 1))) ++ + static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) + { + struct inode *inode = file_inode(file); +@@ -137,12 +147,13 @@ static int hugetlbfs_file_mmap(struct fi + vma->vm_ops = &hugetlb_vm_ops; + + /* +- * Offset passed to mmap (before page shift) could have been +- * negative when represented as a (l)off_t. ++ * page based offset in vm_pgoff could be sufficiently large to ++ * overflow a (l)off_t when converted to byte offset. + */ +- if (((loff_t)vma->vm_pgoff << PAGE_SHIFT) < 0) ++ if (vma->vm_pgoff & PGOFF_LOFFT_MAX) + return -EINVAL; + ++ /* must be huge page aligned */ + if (vma->vm_pgoff & (~huge_page_mask(h) >> PAGE_SHIFT)) + return -EINVAL; + +--- a/mm/hugetlb.c ++++ b/mm/hugetlb.c +@@ -4170,6 +4170,12 @@ int hugetlb_reserve_pages(struct inode * + struct resv_map *resv_map; + long gbl_reserve; + ++ /* This should never happen */ ++ if (from > to) { ++ VM_WARN(1, "%s called with a negative range\n", __func__); ++ return -EINVAL; ++ } ++ + /* + * Only apply hugepage reservation if asked. At fault time, an + * attempt will be made for VM_NORESERVE to allocate a page diff --git a/queue-4.9/hugetlbfs-fix-offset-overflow-in-hugetlbfs-mmap.patch b/queue-4.9/hugetlbfs-fix-offset-overflow-in-hugetlbfs-mmap.patch new file mode 100644 index 00000000000..71ed96e2691 --- /dev/null +++ b/queue-4.9/hugetlbfs-fix-offset-overflow-in-hugetlbfs-mmap.patch @@ -0,0 +1,100 @@ +From foo@baz Thu Dec 6 15:27:31 CET 2018 +From: Mike Kravetz +Date: Thu, 13 Apr 2017 14:56:32 -0700 +Subject: hugetlbfs: fix offset overflow in hugetlbfs mmap + +From: Mike Kravetz + +commit 045c7a3f53d9403b62d396b6d051c4be5044cdb4 upstream. + +If mmap() maps a file, it can be passed an offset into the file at which +the mapping is to start. Offset could be a negative value when +represented as a loff_t. The offset plus length will be used to update +the file size (i_size) which is also a loff_t. + +Validate the value of offset and offset + length to make sure they do +not overflow and appear as negative. + +Found by syzcaller with commit ff8c0c53c475 ("mm/hugetlb.c: don't call +region_abort if region_chg fails") applied. Prior to this commit, the +overflow would still occur but we would luckily return ENOMEM. + +To reproduce: + + mmap(0, 0x2000, 0, 0x40021, 0xffffffffffffffffULL, 0x8000000000000000ULL); + +Resulted in, + + kernel BUG at mm/hugetlb.c:742! + Call Trace: + hugetlbfs_evict_inode+0x80/0xa0 + evict+0x24a/0x620 + iput+0x48f/0x8c0 + dentry_unlink_inode+0x31f/0x4d0 + __dentry_kill+0x292/0x5e0 + dput+0x730/0x830 + __fput+0x438/0x720 + ____fput+0x1a/0x20 + task_work_run+0xfe/0x180 + exit_to_usermode_loop+0x133/0x150 + syscall_return_slowpath+0x184/0x1c0 + entry_SYSCALL_64_fastpath+0xab/0xad + +Fixes: ff8c0c53c475 ("mm/hugetlb.c: don't call region_abort if region_chg fails") +Link: http://lkml.kernel.org/r/1491951118-30678-1-git-send-email-mike.kravetz@oracle.com +Reported-by: Vegard Nossum +Signed-off-by: Mike Kravetz +Acked-by: Hillf Danton +Cc: Dmitry Vyukov +Cc: Michal Hocko +Cc: "Kirill A . Shutemov" +Cc: Andrey Ryabinin +Cc: Naoya Horiguchi +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + fs/hugetlbfs/inode.c | 15 ++++++++++++--- + 1 file changed, 12 insertions(+), 3 deletions(-) + +--- a/fs/hugetlbfs/inode.c ++++ b/fs/hugetlbfs/inode.c +@@ -136,17 +136,26 @@ static int hugetlbfs_file_mmap(struct fi + vma->vm_flags |= VM_HUGETLB | VM_DONTEXPAND; + vma->vm_ops = &hugetlb_vm_ops; + ++ /* ++ * Offset passed to mmap (before page shift) could have been ++ * negative when represented as a (l)off_t. ++ */ ++ if (((loff_t)vma->vm_pgoff << PAGE_SHIFT) < 0) ++ return -EINVAL; ++ + if (vma->vm_pgoff & (~huge_page_mask(h) >> PAGE_SHIFT)) + return -EINVAL; + + vma_len = (loff_t)(vma->vm_end - vma->vm_start); ++ len = vma_len + ((loff_t)vma->vm_pgoff << PAGE_SHIFT); ++ /* check for overflow */ ++ if (len < vma_len) ++ return -EINVAL; + + inode_lock(inode); + file_accessed(file); + + ret = -ENOMEM; +- len = vma_len + ((loff_t)vma->vm_pgoff << PAGE_SHIFT); +- + if (hugetlb_reserve_pages(inode, + vma->vm_pgoff >> huge_page_order(h), + len >> huge_page_shift(h), vma, +@@ -155,7 +164,7 @@ static int hugetlbfs_file_mmap(struct fi + + ret = 0; + if (vma->vm_flags & VM_WRITE && inode->i_size < len) +- inode->i_size = len; ++ i_size_write(inode, len); + out: + inode_unlock(inode); + diff --git a/queue-4.9/libceph-add-authorizer-challenge.patch b/queue-4.9/libceph-add-authorizer-challenge.patch new file mode 100644 index 00000000000..51c6577ce85 --- /dev/null +++ b/queue-4.9/libceph-add-authorizer-challenge.patch @@ -0,0 +1,330 @@ +From foo@baz Thu Dec 6 15:27:31 CET 2018 +From: Ilya Dryomov +Date: Fri, 27 Jul 2018 19:18:34 +0200 +Subject: libceph: add authorizer challenge + +From: Ilya Dryomov + +commit 6daca13d2e72bedaaacfc08f873114c9307d5aea upstream. + +When a client authenticates with a service, an authorizer is sent with +a nonce to the service (ceph_x_authorize_[ab]) and the service responds +with a mutation of that nonce (ceph_x_authorize_reply). This lets the +client verify the service is who it says it is but it doesn't protect +against a replay: someone can trivially capture the exchange and reuse +the same authorizer to authenticate themselves. + +Allow the service to reject an initial authorizer with a random +challenge (ceph_x_authorize_challenge). The client then has to respond +with an updated authorizer proving they are able to decrypt the +service's challenge and that the new authorizer was produced for this +specific connection instance. + +The accepting side requires this challenge and response unconditionally +if the client side advertises they have CEPHX_V2 feature bit. + +This addresses CVE-2018-1128. + +Link: http://tracker.ceph.com/issues/24836 +Signed-off-by: Ilya Dryomov +Reviewed-by: Sage Weil +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + fs/ceph/mds_client.c | 11 ++++++ + include/linux/ceph/auth.h | 8 ++++ + include/linux/ceph/messenger.h | 3 + + include/linux/ceph/msgr.h | 2 - + net/ceph/auth.c | 16 +++++++++ + net/ceph/auth_x.c | 72 ++++++++++++++++++++++++++++++++++++++--- + net/ceph/auth_x_protocol.h | 7 +++ + net/ceph/messenger.c | 17 +++++++++ + net/ceph/osd_client.c | 11 ++++++ + 9 files changed, 140 insertions(+), 7 deletions(-) + +--- a/fs/ceph/mds_client.c ++++ b/fs/ceph/mds_client.c +@@ -3983,6 +3983,16 @@ static struct ceph_auth_handshake *get_a + return auth; + } + ++static int add_authorizer_challenge(struct ceph_connection *con, ++ void *challenge_buf, int challenge_buf_len) ++{ ++ struct ceph_mds_session *s = con->private; ++ struct ceph_mds_client *mdsc = s->s_mdsc; ++ struct ceph_auth_client *ac = mdsc->fsc->client->monc.auth; ++ ++ return ceph_auth_add_authorizer_challenge(ac, s->s_auth.authorizer, ++ challenge_buf, challenge_buf_len); ++} + + static int verify_authorizer_reply(struct ceph_connection *con) + { +@@ -4046,6 +4056,7 @@ static const struct ceph_connection_oper + .put = con_put, + .dispatch = dispatch, + .get_authorizer = get_authorizer, ++ .add_authorizer_challenge = add_authorizer_challenge, + .verify_authorizer_reply = verify_authorizer_reply, + .invalidate_authorizer = invalidate_authorizer, + .peer_reset = peer_reset, +--- a/include/linux/ceph/auth.h ++++ b/include/linux/ceph/auth.h +@@ -63,6 +63,10 @@ struct ceph_auth_client_ops { + /* ensure that an existing authorizer is up to date */ + int (*update_authorizer)(struct ceph_auth_client *ac, int peer_type, + struct ceph_auth_handshake *auth); ++ int (*add_authorizer_challenge)(struct ceph_auth_client *ac, ++ struct ceph_authorizer *a, ++ void *challenge_buf, ++ int challenge_buf_len); + int (*verify_authorizer_reply)(struct ceph_auth_client *ac, + struct ceph_authorizer *a); + void (*invalidate_authorizer)(struct ceph_auth_client *ac, +@@ -117,6 +121,10 @@ void ceph_auth_destroy_authorizer(struct + extern int ceph_auth_update_authorizer(struct ceph_auth_client *ac, + int peer_type, + struct ceph_auth_handshake *a); ++int ceph_auth_add_authorizer_challenge(struct ceph_auth_client *ac, ++ struct ceph_authorizer *a, ++ void *challenge_buf, ++ int challenge_buf_len); + extern int ceph_auth_verify_authorizer_reply(struct ceph_auth_client *ac, + struct ceph_authorizer *a); + extern void ceph_auth_invalidate_authorizer(struct ceph_auth_client *ac, +--- a/include/linux/ceph/messenger.h ++++ b/include/linux/ceph/messenger.h +@@ -30,6 +30,9 @@ struct ceph_connection_operations { + struct ceph_auth_handshake *(*get_authorizer) ( + struct ceph_connection *con, + int *proto, int force_new); ++ int (*add_authorizer_challenge)(struct ceph_connection *con, ++ void *challenge_buf, ++ int challenge_buf_len); + int (*verify_authorizer_reply) (struct ceph_connection *con); + int (*invalidate_authorizer)(struct ceph_connection *con); + +--- a/include/linux/ceph/msgr.h ++++ b/include/linux/ceph/msgr.h +@@ -90,7 +90,7 @@ struct ceph_entity_inst { + #define CEPH_MSGR_TAG_SEQ 13 /* 64-bit int follows with seen seq number */ + #define CEPH_MSGR_TAG_KEEPALIVE2 14 /* keepalive2 byte + ceph_timespec */ + #define CEPH_MSGR_TAG_KEEPALIVE2_ACK 15 /* keepalive2 reply */ +- ++#define CEPH_MSGR_TAG_CHALLENGE_AUTHORIZER 16 /* cephx v2 doing server challenge */ + + /* + * connection negotiation +--- a/net/ceph/auth.c ++++ b/net/ceph/auth.c +@@ -314,6 +314,22 @@ int ceph_auth_update_authorizer(struct c + } + EXPORT_SYMBOL(ceph_auth_update_authorizer); + ++int ceph_auth_add_authorizer_challenge(struct ceph_auth_client *ac, ++ struct ceph_authorizer *a, ++ void *challenge_buf, ++ int challenge_buf_len) ++{ ++ int ret = 0; ++ ++ mutex_lock(&ac->mutex); ++ if (ac->ops && ac->ops->add_authorizer_challenge) ++ ret = ac->ops->add_authorizer_challenge(ac, a, challenge_buf, ++ challenge_buf_len); ++ mutex_unlock(&ac->mutex); ++ return ret; ++} ++EXPORT_SYMBOL(ceph_auth_add_authorizer_challenge); ++ + int ceph_auth_verify_authorizer_reply(struct ceph_auth_client *ac, + struct ceph_authorizer *a) + { +--- a/net/ceph/auth_x.c ++++ b/net/ceph/auth_x.c +@@ -291,7 +291,8 @@ bad: + * authorizer. The first part (ceph_x_authorize_a) should already be + * encoded. + */ +-static int encrypt_authorizer(struct ceph_x_authorizer *au) ++static int encrypt_authorizer(struct ceph_x_authorizer *au, ++ u64 *server_challenge) + { + struct ceph_x_authorize_a *msg_a; + struct ceph_x_authorize_b *msg_b; +@@ -304,16 +305,28 @@ static int encrypt_authorizer(struct cep + end = au->buf->vec.iov_base + au->buf->vec.iov_len; + + msg_b = p + ceph_x_encrypt_offset(); +- msg_b->struct_v = 1; ++ msg_b->struct_v = 2; + msg_b->nonce = cpu_to_le64(au->nonce); ++ if (server_challenge) { ++ msg_b->have_challenge = 1; ++ msg_b->server_challenge_plus_one = ++ cpu_to_le64(*server_challenge + 1); ++ } else { ++ msg_b->have_challenge = 0; ++ msg_b->server_challenge_plus_one = 0; ++ } + + ret = ceph_x_encrypt(&au->session_key, p, end - p, sizeof(*msg_b)); + if (ret < 0) + return ret; + + p += ret; +- WARN_ON(p > end); +- au->buf->vec.iov_len = p - au->buf->vec.iov_base; ++ if (server_challenge) { ++ WARN_ON(p != end); ++ } else { ++ WARN_ON(p > end); ++ au->buf->vec.iov_len = p - au->buf->vec.iov_base; ++ } + + return 0; + } +@@ -378,7 +391,7 @@ static int ceph_x_build_authorizer(struc + le64_to_cpu(msg_a->ticket_blob.secret_id)); + + get_random_bytes(&au->nonce, sizeof(au->nonce)); +- ret = encrypt_authorizer(au); ++ ret = encrypt_authorizer(au, NULL); + if (ret) { + pr_err("failed to encrypt authorizer: %d", ret); + goto out_au; +@@ -660,6 +673,54 @@ static int ceph_x_update_authorizer( + return 0; + } + ++static int decrypt_authorize_challenge(struct ceph_x_authorizer *au, ++ void *challenge_buf, ++ int challenge_buf_len, ++ u64 *server_challenge) ++{ ++ struct ceph_x_authorize_challenge *ch = ++ challenge_buf + sizeof(struct ceph_x_encrypt_header); ++ int ret; ++ ++ /* no leading len */ ++ ret = __ceph_x_decrypt(&au->session_key, challenge_buf, ++ challenge_buf_len); ++ if (ret < 0) ++ return ret; ++ if (ret < sizeof(*ch)) { ++ pr_err("bad size %d for ceph_x_authorize_challenge\n", ret); ++ return -EINVAL; ++ } ++ ++ *server_challenge = le64_to_cpu(ch->server_challenge); ++ return 0; ++} ++ ++static int ceph_x_add_authorizer_challenge(struct ceph_auth_client *ac, ++ struct ceph_authorizer *a, ++ void *challenge_buf, ++ int challenge_buf_len) ++{ ++ struct ceph_x_authorizer *au = (void *)a; ++ u64 server_challenge; ++ int ret; ++ ++ ret = decrypt_authorize_challenge(au, challenge_buf, challenge_buf_len, ++ &server_challenge); ++ if (ret) { ++ pr_err("failed to decrypt authorize challenge: %d", ret); ++ return ret; ++ } ++ ++ ret = encrypt_authorizer(au, &server_challenge); ++ if (ret) { ++ pr_err("failed to encrypt authorizer w/ challenge: %d", ret); ++ return ret; ++ } ++ ++ return 0; ++} ++ + static int ceph_x_verify_authorizer_reply(struct ceph_auth_client *ac, + struct ceph_authorizer *a) + { +@@ -812,6 +873,7 @@ static const struct ceph_auth_client_ops + .handle_reply = ceph_x_handle_reply, + .create_authorizer = ceph_x_create_authorizer, + .update_authorizer = ceph_x_update_authorizer, ++ .add_authorizer_challenge = ceph_x_add_authorizer_challenge, + .verify_authorizer_reply = ceph_x_verify_authorizer_reply, + .invalidate_authorizer = ceph_x_invalidate_authorizer, + .reset = ceph_x_reset, +--- a/net/ceph/auth_x_protocol.h ++++ b/net/ceph/auth_x_protocol.h +@@ -69,6 +69,13 @@ struct ceph_x_authorize_a { + struct ceph_x_authorize_b { + __u8 struct_v; + __le64 nonce; ++ __u8 have_challenge; ++ __le64 server_challenge_plus_one; ++} __attribute__ ((packed)); ++ ++struct ceph_x_authorize_challenge { ++ __u8 struct_v; ++ __le64 server_challenge; + } __attribute__ ((packed)); + + struct ceph_x_authorize_reply { +--- a/net/ceph/messenger.c ++++ b/net/ceph/messenger.c +@@ -2037,9 +2037,24 @@ static int process_connect(struct ceph_c + if (con->auth) { + /* + * Any connection that defines ->get_authorizer() +- * should also define ->verify_authorizer_reply(). ++ * should also define ->add_authorizer_challenge() and ++ * ->verify_authorizer_reply(). ++ * + * See get_connect_authorizer(). + */ ++ if (con->in_reply.tag == CEPH_MSGR_TAG_CHALLENGE_AUTHORIZER) { ++ ret = con->ops->add_authorizer_challenge( ++ con, con->auth->authorizer_reply_buf, ++ le32_to_cpu(con->in_reply.authorizer_len)); ++ if (ret < 0) ++ return ret; ++ ++ con_out_kvec_reset(con); ++ __prepare_write_connect(con); ++ prepare_read_connect(con); ++ return 0; ++ } ++ + ret = con->ops->verify_authorizer_reply(con); + if (ret < 0) { + con->error_msg = "bad authorize reply"; +--- a/net/ceph/osd_client.c ++++ b/net/ceph/osd_client.c +@@ -4478,6 +4478,16 @@ static struct ceph_auth_handshake *get_a + return auth; + } + ++static int add_authorizer_challenge(struct ceph_connection *con, ++ void *challenge_buf, int challenge_buf_len) ++{ ++ struct ceph_osd *o = con->private; ++ struct ceph_osd_client *osdc = o->o_osdc; ++ struct ceph_auth_client *ac = osdc->client->monc.auth; ++ ++ return ceph_auth_add_authorizer_challenge(ac, o->o_auth.authorizer, ++ challenge_buf, challenge_buf_len); ++} + + static int verify_authorizer_reply(struct ceph_connection *con) + { +@@ -4519,6 +4529,7 @@ static const struct ceph_connection_oper + .put = put_osd_con, + .dispatch = dispatch, + .get_authorizer = get_authorizer, ++ .add_authorizer_challenge = add_authorizer_challenge, + .verify_authorizer_reply = verify_authorizer_reply, + .invalidate_authorizer = invalidate_authorizer, + .alloc_msg = alloc_msg, diff --git a/queue-4.9/libceph-check-authorizer-reply-challenge-length-before-reading.patch b/queue-4.9/libceph-check-authorizer-reply-challenge-length-before-reading.patch new file mode 100644 index 00000000000..6dd491537ee --- /dev/null +++ b/queue-4.9/libceph-check-authorizer-reply-challenge-length-before-reading.patch @@ -0,0 +1,36 @@ +From foo@baz Thu Dec 6 15:27:31 CET 2018 +From: Ilya Dryomov +Date: Fri, 27 Jul 2018 19:40:30 +0200 +Subject: libceph: check authorizer reply/challenge length before reading + +From: Ilya Dryomov + +commit 130f52f2b203aa0aec179341916ffb2e905f3afd upstream. + +Avoid scribbling over memory if the received reply/challenge is larger +than the buffer supplied with the authorizer. + +Signed-off-by: Ilya Dryomov +Reviewed-by: Sage Weil +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + net/ceph/messenger.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +--- a/net/ceph/messenger.c ++++ b/net/ceph/messenger.c +@@ -1738,6 +1738,13 @@ static int read_partial_connect(struct c + + if (con->auth) { + size = le32_to_cpu(con->in_reply.authorizer_len); ++ if (size > con->auth->authorizer_reply_buf_len) { ++ pr_err("authorizer reply too big: %d > %zu\n", size, ++ con->auth->authorizer_reply_buf_len); ++ ret = -EINVAL; ++ goto out; ++ } ++ + end += size; + ret = read_partial(con, end, size, + con->auth->authorizer_reply_buf); diff --git a/queue-4.9/libceph-drop-len-argument-of-verify_authorizer_reply.patch b/queue-4.9/libceph-drop-len-argument-of-verify_authorizer_reply.patch new file mode 100644 index 00000000000..fdbce8411fd --- /dev/null +++ b/queue-4.9/libceph-drop-len-argument-of-verify_authorizer_reply.patch @@ -0,0 +1,135 @@ +From foo@baz Thu Dec 6 15:27:31 CET 2018 +From: Ilya Dryomov +Date: Fri, 2 Dec 2016 16:35:09 +0100 +Subject: libceph: drop len argument of *verify_authorizer_reply() + +From: Ilya Dryomov + +commit 0dde584882ade13dc9708d611fbf69b0ae8a9e48 upstream. + +The length of the reply is protocol-dependent - for cephx it's +ceph_x_authorize_reply. Nothing sensible can be passed from the +messenger layer anyway. + +Signed-off-by: Ilya Dryomov +Reviewed-by: Sage Weil +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + fs/ceph/mds_client.c | 4 ++-- + include/linux/ceph/auth.h | 5 ++--- + include/linux/ceph/messenger.h | 2 +- + net/ceph/auth.c | 4 ++-- + net/ceph/auth_x.c | 2 +- + net/ceph/messenger.c | 2 +- + net/ceph/osd_client.c | 4 ++-- + 7 files changed, 11 insertions(+), 12 deletions(-) + +--- a/fs/ceph/mds_client.c ++++ b/fs/ceph/mds_client.c +@@ -3984,13 +3984,13 @@ static struct ceph_auth_handshake *get_a + } + + +-static int verify_authorizer_reply(struct ceph_connection *con, int len) ++static int verify_authorizer_reply(struct ceph_connection *con) + { + struct ceph_mds_session *s = con->private; + struct ceph_mds_client *mdsc = s->s_mdsc; + struct ceph_auth_client *ac = mdsc->fsc->client->monc.auth; + +- return ceph_auth_verify_authorizer_reply(ac, s->s_auth.authorizer, len); ++ return ceph_auth_verify_authorizer_reply(ac, s->s_auth.authorizer); + } + + static int invalidate_authorizer(struct ceph_connection *con) +--- a/include/linux/ceph/auth.h ++++ b/include/linux/ceph/auth.h +@@ -64,7 +64,7 @@ struct ceph_auth_client_ops { + int (*update_authorizer)(struct ceph_auth_client *ac, int peer_type, + struct ceph_auth_handshake *auth); + int (*verify_authorizer_reply)(struct ceph_auth_client *ac, +- struct ceph_authorizer *a, size_t len); ++ struct ceph_authorizer *a); + void (*invalidate_authorizer)(struct ceph_auth_client *ac, + int peer_type); + +@@ -118,8 +118,7 @@ extern int ceph_auth_update_authorizer(s + int peer_type, + struct ceph_auth_handshake *a); + extern int ceph_auth_verify_authorizer_reply(struct ceph_auth_client *ac, +- struct ceph_authorizer *a, +- size_t len); ++ struct ceph_authorizer *a); + extern void ceph_auth_invalidate_authorizer(struct ceph_auth_client *ac, + int peer_type); + +--- a/include/linux/ceph/messenger.h ++++ b/include/linux/ceph/messenger.h +@@ -30,7 +30,7 @@ struct ceph_connection_operations { + struct ceph_auth_handshake *(*get_authorizer) ( + struct ceph_connection *con, + int *proto, int force_new); +- int (*verify_authorizer_reply) (struct ceph_connection *con, int len); ++ int (*verify_authorizer_reply) (struct ceph_connection *con); + int (*invalidate_authorizer)(struct ceph_connection *con); + + /* there was some error on the socket (disconnect, whatever) */ +--- a/net/ceph/auth.c ++++ b/net/ceph/auth.c +@@ -315,13 +315,13 @@ int ceph_auth_update_authorizer(struct c + EXPORT_SYMBOL(ceph_auth_update_authorizer); + + int ceph_auth_verify_authorizer_reply(struct ceph_auth_client *ac, +- struct ceph_authorizer *a, size_t len) ++ struct ceph_authorizer *a) + { + int ret = 0; + + mutex_lock(&ac->mutex); + if (ac->ops && ac->ops->verify_authorizer_reply) +- ret = ac->ops->verify_authorizer_reply(ac, a, len); ++ ret = ac->ops->verify_authorizer_reply(ac, a); + mutex_unlock(&ac->mutex); + return ret; + } +--- a/net/ceph/auth_x.c ++++ b/net/ceph/auth_x.c +@@ -623,7 +623,7 @@ static int ceph_x_update_authorizer( + } + + static int ceph_x_verify_authorizer_reply(struct ceph_auth_client *ac, +- struct ceph_authorizer *a, size_t len) ++ struct ceph_authorizer *a) + { + struct ceph_x_authorizer *au = (void *)a; + void *p = au->enc_buf; +--- a/net/ceph/messenger.c ++++ b/net/ceph/messenger.c +@@ -2045,7 +2045,7 @@ static int process_connect(struct ceph_c + * should also define ->verify_authorizer_reply(). + * See get_connect_authorizer(). + */ +- ret = con->ops->verify_authorizer_reply(con, 0); ++ ret = con->ops->verify_authorizer_reply(con); + if (ret < 0) { + con->error_msg = "bad authorize reply"; + return ret; +--- a/net/ceph/osd_client.c ++++ b/net/ceph/osd_client.c +@@ -4479,13 +4479,13 @@ static struct ceph_auth_handshake *get_a + } + + +-static int verify_authorizer_reply(struct ceph_connection *con, int len) ++static int verify_authorizer_reply(struct ceph_connection *con) + { + struct ceph_osd *o = con->private; + struct ceph_osd_client *osdc = o->o_osdc; + struct ceph_auth_client *ac = osdc->client->monc.auth; + +- return ceph_auth_verify_authorizer_reply(ac, o->o_auth.authorizer, len); ++ return ceph_auth_verify_authorizer_reply(ac, o->o_auth.authorizer); + } + + static int invalidate_authorizer(struct ceph_connection *con) diff --git a/queue-4.9/libceph-factor-out-__ceph_x_decrypt.patch b/queue-4.9/libceph-factor-out-__ceph_x_decrypt.patch new file mode 100644 index 00000000000..8c7634e1413 --- /dev/null +++ b/queue-4.9/libceph-factor-out-__ceph_x_decrypt.patch @@ -0,0 +1,75 @@ +From foo@baz Thu Dec 6 15:27:31 CET 2018 +From: Ilya Dryomov +Date: Thu, 26 Jul 2018 18:05:43 +0200 +Subject: libceph: factor out __ceph_x_decrypt() + +From: Ilya Dryomov + +commit c571fe24d243bfe7017f0e67fe800b3cc2a1d1f7 upstream. + +Will be used for decrypting the server challenge which is only preceded +by ceph_x_encrypt_header. + +Drop struct_v check to allow for extending ceph_x_encrypt_header in the +future. + +Signed-off-by: Ilya Dryomov +Reviewed-by: Sage Weil +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + net/ceph/auth_x.c | 33 ++++++++++++++++++++++++--------- + 1 file changed, 24 insertions(+), 9 deletions(-) + +--- a/net/ceph/auth_x.c ++++ b/net/ceph/auth_x.c +@@ -69,25 +69,40 @@ static int ceph_x_encrypt(struct ceph_cr + return sizeof(u32) + ciphertext_len; + } + ++static int __ceph_x_decrypt(struct ceph_crypto_key *secret, void *p, ++ int ciphertext_len) ++{ ++ struct ceph_x_encrypt_header *hdr = p; ++ int plaintext_len; ++ int ret; ++ ++ ret = ceph_crypt(secret, false, p, ciphertext_len, ciphertext_len, ++ &plaintext_len); ++ if (ret) ++ return ret; ++ ++ if (le64_to_cpu(hdr->magic) != CEPHX_ENC_MAGIC) { ++ pr_err("%s bad magic\n", __func__); ++ return -EINVAL; ++ } ++ ++ return plaintext_len - sizeof(*hdr); ++} ++ + static int ceph_x_decrypt(struct ceph_crypto_key *secret, void **p, void *end) + { +- struct ceph_x_encrypt_header *hdr = *p + sizeof(u32); +- int ciphertext_len, plaintext_len; ++ int ciphertext_len; + int ret; + + ceph_decode_32_safe(p, end, ciphertext_len, e_inval); + ceph_decode_need(p, end, ciphertext_len, e_inval); + +- ret = ceph_crypt(secret, false, *p, end - *p, ciphertext_len, +- &plaintext_len); +- if (ret) ++ ret = __ceph_x_decrypt(secret, *p, ciphertext_len); ++ if (ret < 0) + return ret; + +- if (hdr->struct_v != 1 || le64_to_cpu(hdr->magic) != CEPHX_ENC_MAGIC) +- return -EPERM; +- + *p += ciphertext_len; +- return plaintext_len - sizeof(struct ceph_x_encrypt_header); ++ return ret; + + e_inval: + return -EINVAL; diff --git a/queue-4.9/libceph-factor-out-__prepare_write_connect.patch b/queue-4.9/libceph-factor-out-__prepare_write_connect.patch new file mode 100644 index 00000000000..afd80b08f65 --- /dev/null +++ b/queue-4.9/libceph-factor-out-__prepare_write_connect.patch @@ -0,0 +1,57 @@ +From foo@baz Thu Dec 6 15:27:31 CET 2018 +From: Ilya Dryomov +Date: Thu, 26 Jul 2018 17:43:47 +0200 +Subject: libceph: factor out __prepare_write_connect() + +From: Ilya Dryomov + +commit c0f56b483aa09c99bfe97409a43ad786f33b8a5a upstream. + +Will be used for sending ceph_msg_connect with an updated authorizer, +after the server challenges the initial authorizer. + +Signed-off-by: Ilya Dryomov +Reviewed-by: Sage Weil +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + net/ceph/messenger.c | 21 ++++++++++++--------- + 1 file changed, 12 insertions(+), 9 deletions(-) + +--- a/net/ceph/messenger.c ++++ b/net/ceph/messenger.c +@@ -1429,6 +1429,17 @@ static void prepare_write_banner(struct + con_flag_set(con, CON_FLAG_WRITE_PENDING); + } + ++static void __prepare_write_connect(struct ceph_connection *con) ++{ ++ con_out_kvec_add(con, sizeof(con->out_connect), &con->out_connect); ++ if (con->auth) ++ con_out_kvec_add(con, con->auth->authorizer_buf_len, ++ con->auth->authorizer_buf); ++ ++ con->out_more = 0; ++ con_flag_set(con, CON_FLAG_WRITE_PENDING); ++} ++ + static int prepare_write_connect(struct ceph_connection *con) + { + unsigned int global_seq = get_global_seq(con->msgr, 0); +@@ -1464,15 +1475,7 @@ static int prepare_write_connect(struct + if (ret) + return ret; + +- con_out_kvec_add(con, sizeof (con->out_connect), +- &con->out_connect); +- if (con->auth) +- con_out_kvec_add(con, con->auth->authorizer_buf_len, +- con->auth->authorizer_buf); +- +- con->out_more = 0; +- con_flag_set(con, CON_FLAG_WRITE_PENDING); +- ++ __prepare_write_connect(con); + return 0; + } + diff --git a/queue-4.9/libceph-factor-out-encrypt_authorizer.patch b/queue-4.9/libceph-factor-out-encrypt_authorizer.patch new file mode 100644 index 00000000000..4bf62a7d7a8 --- /dev/null +++ b/queue-4.9/libceph-factor-out-encrypt_authorizer.patch @@ -0,0 +1,94 @@ +From foo@baz Thu Dec 6 15:27:31 CET 2018 +From: Ilya Dryomov +Date: Fri, 27 Jul 2018 16:37:54 +0200 +Subject: libceph: factor out encrypt_authorizer() + +From: Ilya Dryomov + +commit 149cac4a50b0b4081b38b2f38de6ef71c27eaa85 upstream. + +Will be used for encrypting both the initial and updated authorizers. + +Signed-off-by: Ilya Dryomov +Reviewed-by: Sage Weil +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + net/ceph/auth_x.c | 49 ++++++++++++++++++++++++++++++++++++------------- + 1 file changed, 36 insertions(+), 13 deletions(-) + +--- a/net/ceph/auth_x.c ++++ b/net/ceph/auth_x.c +@@ -286,6 +286,38 @@ bad: + return -EINVAL; + } + ++/* ++ * Encode and encrypt the second part (ceph_x_authorize_b) of the ++ * authorizer. The first part (ceph_x_authorize_a) should already be ++ * encoded. ++ */ ++static int encrypt_authorizer(struct ceph_x_authorizer *au) ++{ ++ struct ceph_x_authorize_a *msg_a; ++ struct ceph_x_authorize_b *msg_b; ++ void *p, *end; ++ int ret; ++ ++ msg_a = au->buf->vec.iov_base; ++ WARN_ON(msg_a->ticket_blob.secret_id != cpu_to_le64(au->secret_id)); ++ p = (void *)(msg_a + 1) + le32_to_cpu(msg_a->ticket_blob.blob_len); ++ end = au->buf->vec.iov_base + au->buf->vec.iov_len; ++ ++ msg_b = p + ceph_x_encrypt_offset(); ++ msg_b->struct_v = 1; ++ msg_b->nonce = cpu_to_le64(au->nonce); ++ ++ ret = ceph_x_encrypt(&au->session_key, p, end - p, sizeof(*msg_b)); ++ if (ret < 0) ++ return ret; ++ ++ p += ret; ++ WARN_ON(p > end); ++ au->buf->vec.iov_len = p - au->buf->vec.iov_base; ++ ++ return 0; ++} ++ + static void ceph_x_authorizer_cleanup(struct ceph_x_authorizer *au) + { + ceph_crypto_key_destroy(&au->session_key); +@@ -302,7 +334,6 @@ static int ceph_x_build_authorizer(struc + int maxlen; + struct ceph_x_authorize_a *msg_a; + struct ceph_x_authorize_b *msg_b; +- void *p, *end; + int ret; + int ticket_blob_len = + (th->ticket_blob ? th->ticket_blob->vec.iov_len : 0); +@@ -346,21 +377,13 @@ static int ceph_x_build_authorizer(struc + dout(" th %p secret_id %lld %lld\n", th, th->secret_id, + le64_to_cpu(msg_a->ticket_blob.secret_id)); + +- p = msg_a + 1; +- p += ticket_blob_len; +- end = au->buf->vec.iov_base + au->buf->vec.iov_len; +- +- msg_b = p + ceph_x_encrypt_offset(); +- msg_b->struct_v = 1; + get_random_bytes(&au->nonce, sizeof(au->nonce)); +- msg_b->nonce = cpu_to_le64(au->nonce); +- ret = ceph_x_encrypt(&au->session_key, p, end - p, sizeof(*msg_b)); +- if (ret < 0) ++ ret = encrypt_authorizer(au); ++ if (ret) { ++ pr_err("failed to encrypt authorizer: %d", ret); + goto out_au; ++ } + +- p += ret; +- WARN_ON(p > end); +- au->buf->vec.iov_len = p - au->buf->vec.iov_base; + dout(" built authorizer nonce %llx len %d\n", au->nonce, + (int)au->buf->vec.iov_len); + return 0; diff --git a/queue-4.9/libceph-implement-cephx_v2-calculation-mode.patch b/queue-4.9/libceph-implement-cephx_v2-calculation-mode.patch new file mode 100644 index 00000000000..e0f4f1146e8 --- /dev/null +++ b/queue-4.9/libceph-implement-cephx_v2-calculation-mode.patch @@ -0,0 +1,142 @@ +From foo@baz Thu Dec 6 15:27:31 CET 2018 +From: Ilya Dryomov +Date: Fri, 27 Jul 2018 19:25:32 +0200 +Subject: libceph: implement CEPHX_V2 calculation mode + +From: Ilya Dryomov + +commit cc255c76c70f7a87d97939621eae04b600d9f4a1 upstream. + +Derive the signature from the entire buffer (both AES cipher blocks) +instead of using just the first half of the first block, leaving out +data_crc entirely. + +This addresses CVE-2018-1129. + +Link: http://tracker.ceph.com/issues/24837 +Signed-off-by: Ilya Dryomov +Reviewed-by: Sage Weil +[bwh: Backported to 4.9: + - Define and test the feature bit in the old way + - Don't change any other feature bits in ceph_features.h] +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/ceph/ceph_features.h | 4 + + net/ceph/auth_x.c | 77 +++++++++++++++++++++++++++---------- + 2 files changed, 61 insertions(+), 20 deletions(-) + +--- a/include/linux/ceph/ceph_features.h ++++ b/include/linux/ceph/ceph_features.h +@@ -76,6 +76,7 @@ + // duplicated since it was introduced at the same time as CEPH_FEATURE_CRUSH_TUNABLES5 + #define CEPH_FEATURE_NEW_OSDOPREPLY_ENCODING (1ULL<<58) /* New, v7 encoding */ + #define CEPH_FEATURE_FS_FILE_LAYOUT_V2 (1ULL<<58) /* file_layout_t */ ++#define CEPH_FEATURE_CEPHX_V2 (1ULL<<61) // *do not share this bit* + + /* + * The introduction of CEPH_FEATURE_OSD_SNAPMAPPER caused the feature +@@ -124,7 +125,8 @@ static inline u64 ceph_sanitize_features + CEPH_FEATURE_MSGR_KEEPALIVE2 | \ + CEPH_FEATURE_CRUSH_V4 | \ + CEPH_FEATURE_CRUSH_TUNABLES5 | \ +- CEPH_FEATURE_NEW_OSDOPREPLY_ENCODING) ++ CEPH_FEATURE_NEW_OSDOPREPLY_ENCODING | \ ++ CEPH_FEATURE_CEPHX_V2) + + #define CEPH_FEATURES_REQUIRED_DEFAULT \ + (CEPH_FEATURE_NOSRCADDR | \ +--- a/net/ceph/auth_x.c ++++ b/net/ceph/auth_x.c +@@ -8,6 +8,7 @@ + + #include + #include ++#include + #include + #include + +@@ -799,26 +800,64 @@ static int calc_signature(struct ceph_x_ + __le64 *psig) + { + void *enc_buf = au->enc_buf; +- struct { +- __le32 len; +- __le32 header_crc; +- __le32 front_crc; +- __le32 middle_crc; +- __le32 data_crc; +- } __packed *sigblock = enc_buf + ceph_x_encrypt_offset(); +- int ret; +- +- sigblock->len = cpu_to_le32(4*sizeof(u32)); +- sigblock->header_crc = msg->hdr.crc; +- sigblock->front_crc = msg->footer.front_crc; +- sigblock->middle_crc = msg->footer.middle_crc; +- sigblock->data_crc = msg->footer.data_crc; +- ret = ceph_x_encrypt(&au->session_key, enc_buf, CEPHX_AU_ENC_BUF_LEN, +- sizeof(*sigblock)); +- if (ret < 0) +- return ret; ++ int ret; ++ ++ if (msg->con->peer_features & CEPH_FEATURE_CEPHX_V2) { ++ struct { ++ __le32 len; ++ __le32 header_crc; ++ __le32 front_crc; ++ __le32 middle_crc; ++ __le32 data_crc; ++ } __packed *sigblock = enc_buf + ceph_x_encrypt_offset(); ++ ++ sigblock->len = cpu_to_le32(4*sizeof(u32)); ++ sigblock->header_crc = msg->hdr.crc; ++ sigblock->front_crc = msg->footer.front_crc; ++ sigblock->middle_crc = msg->footer.middle_crc; ++ sigblock->data_crc = msg->footer.data_crc; ++ ++ ret = ceph_x_encrypt(&au->session_key, enc_buf, ++ CEPHX_AU_ENC_BUF_LEN, sizeof(*sigblock)); ++ if (ret < 0) ++ return ret; ++ ++ *psig = *(__le64 *)(enc_buf + sizeof(u32)); ++ } else { ++ struct { ++ __le32 header_crc; ++ __le32 front_crc; ++ __le32 front_len; ++ __le32 middle_crc; ++ __le32 middle_len; ++ __le32 data_crc; ++ __le32 data_len; ++ __le32 seq_lower_word; ++ } __packed *sigblock = enc_buf; ++ struct { ++ __le64 a, b, c, d; ++ } __packed *penc = enc_buf; ++ int ciphertext_len; ++ ++ sigblock->header_crc = msg->hdr.crc; ++ sigblock->front_crc = msg->footer.front_crc; ++ sigblock->front_len = msg->hdr.front_len; ++ sigblock->middle_crc = msg->footer.middle_crc; ++ sigblock->middle_len = msg->hdr.middle_len; ++ sigblock->data_crc = msg->footer.data_crc; ++ sigblock->data_len = msg->hdr.data_len; ++ sigblock->seq_lower_word = *(__le32 *)&msg->hdr.seq; ++ ++ /* no leading len, no ceph_x_encrypt_header */ ++ ret = ceph_crypt(&au->session_key, true, enc_buf, ++ CEPHX_AU_ENC_BUF_LEN, sizeof(*sigblock), ++ &ciphertext_len); ++ if (ret) ++ return ret; ++ ++ *psig = penc->a ^ penc->b ^ penc->c ^ penc->d; ++ } + +- *psig = *(__le64 *)(enc_buf + sizeof(u32)); + return 0; + } + diff --git a/queue-4.9/libceph-no-need-to-drop-con-mutex-for-get_authorizer.patch b/queue-4.9/libceph-no-need-to-drop-con-mutex-for-get_authorizer.patch new file mode 100644 index 00000000000..edd4111eaf1 --- /dev/null +++ b/queue-4.9/libceph-no-need-to-drop-con-mutex-for-get_authorizer.patch @@ -0,0 +1,39 @@ +From foo@baz Thu Dec 6 15:27:31 CET 2018 +From: Ilya Dryomov +Date: Fri, 2 Dec 2016 16:35:09 +0100 +Subject: libceph: no need to drop con->mutex for ->get_authorizer() + +From: Ilya Dryomov + +commit b3bbd3f2ab19c8ca319003b4b51ce4c4ca74da06 upstream. + +->get_authorizer(), ->verify_authorizer_reply(), ->sign_message() and +->check_message_signature() shouldn't be doing anything with or on the +connection (like closing it or sending messages). + +Signed-off-by: Ilya Dryomov +Reviewed-by: Sage Weil +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + net/ceph/messenger.c | 6 ------ + 1 file changed, 6 deletions(-) + +--- a/net/ceph/messenger.c ++++ b/net/ceph/messenger.c +@@ -1405,15 +1405,9 @@ static struct ceph_auth_handshake *get_c + return NULL; + } + +- /* Can't hold the mutex while getting authorizer */ +- mutex_unlock(&con->mutex); + auth = con->ops->get_authorizer(con, auth_proto, con->auth_retry); +- mutex_lock(&con->mutex); +- + if (IS_ERR(auth)) + return auth; +- if (con->state != CON_STATE_NEGOTIATING) +- return ERR_PTR(-EAGAIN); + + con->auth_reply_buf = auth->authorizer_reply_buf; + con->auth_reply_buf_len = auth->authorizer_reply_buf_len; diff --git a/queue-4.9/libceph-store-ceph_auth_handshake-pointer-in-ceph_connection.patch b/queue-4.9/libceph-store-ceph_auth_handshake-pointer-in-ceph_connection.patch new file mode 100644 index 00000000000..157be773a76 --- /dev/null +++ b/queue-4.9/libceph-store-ceph_auth_handshake-pointer-in-ceph_connection.patch @@ -0,0 +1,147 @@ +From foo@baz Thu Dec 6 15:27:31 CET 2018 +From: Ilya Dryomov +Date: Thu, 26 Jul 2018 15:17:46 +0200 +Subject: libceph: store ceph_auth_handshake pointer in ceph_connection + +From: Ilya Dryomov + +commit 262614c4294d33b1f19e0d18c0091d9c329b544a upstream. + +We already copy authorizer_reply_buf and authorizer_reply_buf_len into +ceph_connection. Factoring out __prepare_write_connect() requires two +more: authorizer_buf and authorizer_buf_len. Store the pointer to the +handshake in con->auth rather than piling on. + +Signed-off-by: Ilya Dryomov +Reviewed-by: Sage Weil +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/ceph/messenger.h | 3 -- + net/ceph/messenger.c | 54 +++++++++++++++++++---------------------- + 2 files changed, 27 insertions(+), 30 deletions(-) + +--- a/include/linux/ceph/messenger.h ++++ b/include/linux/ceph/messenger.h +@@ -200,9 +200,8 @@ struct ceph_connection { + attempt for this connection, client */ + u32 peer_global_seq; /* peer's global seq for this connection */ + ++ struct ceph_auth_handshake *auth; + int auth_retry; /* true if we need a newer authorizer */ +- void *auth_reply_buf; /* where to put the authorizer reply */ +- int auth_reply_buf_len; + + struct mutex mutex; + +--- a/net/ceph/messenger.c ++++ b/net/ceph/messenger.c +@@ -1394,24 +1394,26 @@ static void prepare_write_keepalive(stru + * Connection negotiation. + */ + +-static struct ceph_auth_handshake *get_connect_authorizer(struct ceph_connection *con, +- int *auth_proto) ++static int get_connect_authorizer(struct ceph_connection *con) + { + struct ceph_auth_handshake *auth; ++ int auth_proto; + + if (!con->ops->get_authorizer) { ++ con->auth = NULL; + con->out_connect.authorizer_protocol = CEPH_AUTH_UNKNOWN; + con->out_connect.authorizer_len = 0; +- return NULL; ++ return 0; + } + +- auth = con->ops->get_authorizer(con, auth_proto, con->auth_retry); ++ auth = con->ops->get_authorizer(con, &auth_proto, con->auth_retry); + if (IS_ERR(auth)) +- return auth; ++ return PTR_ERR(auth); + +- con->auth_reply_buf = auth->authorizer_reply_buf; +- con->auth_reply_buf_len = auth->authorizer_reply_buf_len; +- return auth; ++ con->auth = auth; ++ con->out_connect.authorizer_protocol = cpu_to_le32(auth_proto); ++ con->out_connect.authorizer_len = cpu_to_le32(auth->authorizer_buf_len); ++ return 0; + } + + /* +@@ -1431,8 +1433,7 @@ static int prepare_write_connect(struct + { + unsigned int global_seq = get_global_seq(con->msgr, 0); + int proto; +- int auth_proto; +- struct ceph_auth_handshake *auth; ++ int ret; + + switch (con->peer_name.type) { + case CEPH_ENTITY_TYPE_MON: +@@ -1459,20 +1460,15 @@ static int prepare_write_connect(struct + con->out_connect.protocol_version = cpu_to_le32(proto); + con->out_connect.flags = 0; + +- auth_proto = CEPH_AUTH_UNKNOWN; +- auth = get_connect_authorizer(con, &auth_proto); +- if (IS_ERR(auth)) +- return PTR_ERR(auth); +- +- con->out_connect.authorizer_protocol = cpu_to_le32(auth_proto); +- con->out_connect.authorizer_len = auth ? +- cpu_to_le32(auth->authorizer_buf_len) : 0; ++ ret = get_connect_authorizer(con); ++ if (ret) ++ return ret; + + con_out_kvec_add(con, sizeof (con->out_connect), + &con->out_connect); +- if (auth && auth->authorizer_buf_len) +- con_out_kvec_add(con, auth->authorizer_buf_len, +- auth->authorizer_buf); ++ if (con->auth) ++ con_out_kvec_add(con, con->auth->authorizer_buf_len, ++ con->auth->authorizer_buf); + + con->out_more = 0; + con_flag_set(con, CON_FLAG_WRITE_PENDING); +@@ -1737,11 +1733,14 @@ static int read_partial_connect(struct c + if (ret <= 0) + goto out; + +- size = le32_to_cpu(con->in_reply.authorizer_len); +- end += size; +- ret = read_partial(con, end, size, con->auth_reply_buf); +- if (ret <= 0) +- goto out; ++ if (con->auth) { ++ size = le32_to_cpu(con->in_reply.authorizer_len); ++ end += size; ++ ret = read_partial(con, end, size, ++ con->auth->authorizer_reply_buf); ++ if (ret <= 0) ++ goto out; ++ } + + dout("read_partial_connect %p tag %d, con_seq = %u, g_seq = %u\n", + con, (int)con->in_reply.tag, +@@ -1749,7 +1748,6 @@ static int read_partial_connect(struct c + le32_to_cpu(con->in_reply.global_seq)); + out: + return ret; +- + } + + /* +@@ -2033,7 +2031,7 @@ static int process_connect(struct ceph_c + + dout("process_connect on %p tag %d\n", con, (int)con->in_tag); + +- if (con->auth_reply_buf) { ++ if (con->auth) { + /* + * Any connection that defines ->get_authorizer() + * should also define ->verify_authorizer_reply(). diff --git a/queue-4.9/libceph-weaken-sizeof-check-in-ceph_x_verify_authorizer_reply.patch b/queue-4.9/libceph-weaken-sizeof-check-in-ceph_x_verify_authorizer_reply.patch new file mode 100644 index 00000000000..5116a04ed30 --- /dev/null +++ b/queue-4.9/libceph-weaken-sizeof-check-in-ceph_x_verify_authorizer_reply.patch @@ -0,0 +1,34 @@ +From foo@baz Thu Dec 6 15:27:31 CET 2018 +From: Ilya Dryomov +Date: Fri, 27 Jul 2018 19:45:36 +0200 +Subject: libceph: weaken sizeof check in ceph_x_verify_authorizer_reply() + +From: Ilya Dryomov + +commit f1d10e04637924f2b00a0fecdd2ca4565f5cfc3f upstream. + +Allow for extending ceph_x_authorize_reply in the future. + +Signed-off-by: Ilya Dryomov +Reviewed-by: Sage Weil +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + net/ceph/auth_x.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +--- a/net/ceph/auth_x.c ++++ b/net/ceph/auth_x.c +@@ -733,8 +733,10 @@ static int ceph_x_verify_authorizer_repl + ret = ceph_x_decrypt(&au->session_key, &p, p + CEPHX_AU_ENC_BUF_LEN); + if (ret < 0) + return ret; +- if (ret != sizeof(*reply)) +- return -EPERM; ++ if (ret < sizeof(*reply)) { ++ pr_err("bad size %d for ceph_x_authorize_reply\n", ret); ++ return -EINVAL; ++ } + + if (au->nonce + 1 != le64_to_cpu(reply->nonce_plus_one)) + ret = -EPERM; diff --git a/queue-4.9/mm-hugetlb.c-don-t-call-region_abort-if-region_chg-fails.patch b/queue-4.9/mm-hugetlb.c-don-t-call-region_abort-if-region_chg-fails.patch new file mode 100644 index 00000000000..094b7fa8fc1 --- /dev/null +++ b/queue-4.9/mm-hugetlb.c-don-t-call-region_abort-if-region_chg-fails.patch @@ -0,0 +1,62 @@ +From foo@baz Thu Dec 6 15:27:31 CET 2018 +From: Mike Kravetz +Date: Fri, 31 Mar 2017 15:12:07 -0700 +Subject: mm/hugetlb.c: don't call region_abort if region_chg fails + +From: Mike Kravetz + +commit ff8c0c53c47530ffea82c22a0a6df6332b56c957 upstream. + +Changes to hugetlbfs reservation maps is a two step process. The first +step is a call to region_chg to determine what needs to be changed, and +prepare that change. This should be followed by a call to call to +region_add to commit the change, or region_abort to abort the change. + +The error path in hugetlb_reserve_pages called region_abort after a +failed call to region_chg. As a result, the adds_in_progress counter in +the reservation map is off by 1. This is caught by a VM_BUG_ON in +resv_map_release when the reservation map is freed. + +syzkaller fuzzer (when using an injected kmalloc failure) found this +bug, that resulted in the following: + + kernel BUG at mm/hugetlb.c:742! + Call Trace: + hugetlbfs_evict_inode+0x7b/0xa0 fs/hugetlbfs/inode.c:493 + evict+0x481/0x920 fs/inode.c:553 + iput_final fs/inode.c:1515 [inline] + iput+0x62b/0xa20 fs/inode.c:1542 + hugetlb_file_setup+0x593/0x9f0 fs/hugetlbfs/inode.c:1306 + newseg+0x422/0xd30 ipc/shm.c:575 + ipcget_new ipc/util.c:285 [inline] + ipcget+0x21e/0x580 ipc/util.c:639 + SYSC_shmget ipc/shm.c:673 [inline] + SyS_shmget+0x158/0x230 ipc/shm.c:657 + entry_SYSCALL_64_fastpath+0x1f/0xc2 + RIP: resv_map_release+0x265/0x330 mm/hugetlb.c:742 + +Link: http://lkml.kernel.org/r/1490821682-23228-1-git-send-email-mike.kravetz@oracle.com +Signed-off-by: Mike Kravetz +Reported-by: Dmitry Vyukov +Acked-by: Hillf Danton +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + mm/hugetlb.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/mm/hugetlb.c ++++ b/mm/hugetlb.c +@@ -4259,7 +4259,9 @@ int hugetlb_reserve_pages(struct inode * + return 0; + out_err: + if (!vma || vma->vm_flags & VM_MAYSHARE) +- region_abort(resv_map, from, to); ++ /* Don't call region_abort if region_chg failed */ ++ if (chg >= 0) ++ region_abort(resv_map, from, to); + if (vma && is_vma_resv_set(vma, HPAGE_RESV_OWNER)) + kref_put(&resv_map->refs, resv_map_release); + return ret; diff --git a/queue-4.9/series b/queue-4.9/series index 62e12da7bef..a9a7dedd47e 100644 --- a/queue-4.9/series +++ b/queue-4.9/series @@ -45,3 +45,57 @@ mm-cleancache-fix-corruption-on-missed-inode-invalidation.patch usb-gadget-dummy-fix-nonsensical-comparisons.patch net-qed-use-correct-strncpy-size.patch tipc-use-destination-length-for-copy-string.patch +libceph-drop-len-argument-of-verify_authorizer_reply.patch +libceph-no-need-to-drop-con-mutex-for-get_authorizer.patch +libceph-store-ceph_auth_handshake-pointer-in-ceph_connection.patch +libceph-factor-out-__prepare_write_connect.patch +libceph-factor-out-__ceph_x_decrypt.patch +libceph-factor-out-encrypt_authorizer.patch +libceph-add-authorizer-challenge.patch +libceph-implement-cephx_v2-calculation-mode.patch +libceph-weaken-sizeof-check-in-ceph_x_verify_authorizer_reply.patch +libceph-check-authorizer-reply-challenge-length-before-reading.patch +bpf-verifier-add-spi-variable-to-check_stack_write.patch +bpf-verifier-pass-instruction-index-to-check_mem_access-and-check_xadd.patch +bpf-prevent-memory-disambiguation-attack.patch +wil6210-missing-length-check-in-wmi_set_ie.patch +mm-hugetlb.c-don-t-call-region_abort-if-region_chg-fails.patch +hugetlbfs-fix-offset-overflow-in-hugetlbfs-mmap.patch +hugetlbfs-check-for-pgoff-value-overflow.patch +btrfs-validate-type-when-reading-a-chunk.patch +btrfs-verify-that-every-chunk-has-corresponding-block-group-at-mount-time.patch +btrfs-refactor-check_leaf-function-for-later-expansion.patch +btrfs-check-if-item-pointer-overlaps-with-the-item-itself.patch +btrfs-add-sanity-check-for-extent_data-when-reading-out-leaf.patch +btrfs-add-checker-for-extent_csum.patch +btrfs-move-leaf-and-node-validation-checker-to-tree-checker.c.patch +btrfs-struct-funcs-constify-readers.patch +btrfs-tree-checker-enhance-btrfs_check_node-output.patch +btrfs-tree-checker-fix-false-panic-for-sanity-test.patch +btrfs-tree-checker-add-checker-for-dir-item.patch +btrfs-tree-checker-use-zu-format-string-for-size_t.patch +btrfs-tree-check-reduce-stack-consumption-in-check_dir_item.patch +btrfs-tree-checker-verify-block_group_item.patch +btrfs-tree-checker-detect-invalid-and-empty-essential-trees.patch +btrfs-check-that-each-block-group-has-corresponding-chunk-at-mount-time.patch +btrfs-tree-checker-check-level-for-leaves-and-nodes.patch +btrfs-tree-checker-fix-misleading-group-system-information.patch +f2fs-fix-a-panic-caused-by-null-flush_cmd_control.patch +f2fs-fix-race-condition-in-between-free-nid-allocator-initializer.patch +f2fs-detect-wrong-layout.patch +f2fs-return-error-during-fill_super.patch +f2fs-check-blkaddr-more-accuratly-before-issue-a-bio.patch +f2fs-sanity-check-on-sit-entry.patch +f2fs-enhance-sanity_check_raw_super-to-avoid-potential-overflow.patch +f2fs-clean-up-with-is_valid_blkaddr.patch +f2fs-introduce-and-spread-verify_blkaddr.patch +f2fs-fix-to-do-sanity-check-with-secs_per_zone.patch +f2fs-fix-to-do-sanity-check-with-user_block_count.patch +f2fs-add-sanity_check_inode-function.patch +f2fs-fix-to-do-sanity-check-with-node-footer-and-iblocks.patch +f2fs-fix-to-do-sanity-check-with-block-address-in-main-area.patch +f2fs-fix-missing-up_read.patch +f2fs-fix-to-do-sanity-check-with-block-address-in-main-area-v2.patch +f2fs-free-meta-pages-if-sanity-check-for-ckpt-is-failed.patch +f2fs-fix-to-do-sanity-check-with-cp_pack_start_sum.patch +xfs-don-t-fail-when-converting-shortform-attr-to-long-form-during-attr_replace.patch diff --git a/queue-4.9/wil6210-missing-length-check-in-wmi_set_ie.patch b/queue-4.9/wil6210-missing-length-check-in-wmi_set_ie.patch new file mode 100644 index 00000000000..113260457ac --- /dev/null +++ b/queue-4.9/wil6210-missing-length-check-in-wmi_set_ie.patch @@ -0,0 +1,39 @@ +From foo@baz Thu Dec 6 15:27:31 CET 2018 +From: Lior David +Date: Tue, 14 Nov 2017 15:25:39 +0200 +Subject: wil6210: missing length check in wmi_set_ie + +From: Lior David + +commit b5a8ffcae4103a9d823ea3aa3a761f65779fbe2a upstream. + +Add a length check in wmi_set_ie to detect unsigned integer +overflow. + +Signed-off-by: Lior David +Signed-off-by: Maya Erez +Signed-off-by: Kalle Valo +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/wireless/ath/wil6210/wmi.c | 8 +++++++- + 1 file changed, 7 insertions(+), 1 deletion(-) + +--- a/drivers/net/wireless/ath/wil6210/wmi.c ++++ b/drivers/net/wireless/ath/wil6210/wmi.c +@@ -1302,8 +1302,14 @@ int wmi_set_ie(struct wil6210_priv *wil, + }; + int rc; + u16 len = sizeof(struct wmi_set_appie_cmd) + ie_len; +- struct wmi_set_appie_cmd *cmd = kzalloc(len, GFP_KERNEL); ++ struct wmi_set_appie_cmd *cmd; + ++ if (len < ie_len) { ++ rc = -EINVAL; ++ goto out; ++ } ++ ++ cmd = kzalloc(len, GFP_KERNEL); + if (!cmd) { + rc = -ENOMEM; + goto out; diff --git a/queue-4.9/xfs-don-t-fail-when-converting-shortform-attr-to-long-form-during-attr_replace.patch b/queue-4.9/xfs-don-t-fail-when-converting-shortform-attr-to-long-form-during-attr_replace.patch new file mode 100644 index 00000000000..e56de000b08 --- /dev/null +++ b/queue-4.9/xfs-don-t-fail-when-converting-shortform-attr-to-long-form-during-attr_replace.patch @@ -0,0 +1,47 @@ +From foo@baz Thu Dec 6 15:27:31 CET 2018 +From: "Darrick J. Wong" +Date: Tue, 17 Apr 2018 19:10:15 -0700 +Subject: xfs: don't fail when converting shortform attr to long form during ATTR_REPLACE + +From: "Darrick J. Wong" + +commit 7b38460dc8e4eafba06c78f8e37099d3b34d473c upstream. + +Kanda Motohiro reported that expanding a tiny xattr into a large xattr +fails on XFS because we remove the tiny xattr from a shortform fork and +then try to re-add it after converting the fork to extents format having +not removed the ATTR_REPLACE flag. This fails because the attr is no +longer present, causing a fs shutdown. + +This is derived from the patch in his bug report, but we really +shouldn't ignore a nonzero retval from the remove call. + +Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=199119 +Reported-by: kanda.motohiro@gmail.com +Reviewed-by: Dave Chinner +Reviewed-by: Christoph Hellwig +Signed-off-by: Darrick J. Wong +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + fs/xfs/libxfs/xfs_attr.c | 9 ++++++++- + 1 file changed, 8 insertions(+), 1 deletion(-) + +--- a/fs/xfs/libxfs/xfs_attr.c ++++ b/fs/xfs/libxfs/xfs_attr.c +@@ -487,7 +487,14 @@ xfs_attr_shortform_addname(xfs_da_args_t + if (args->flags & ATTR_CREATE) + return retval; + retval = xfs_attr_shortform_remove(args); +- ASSERT(retval == 0); ++ if (retval) ++ return retval; ++ /* ++ * Since we have removed the old attr, clear ATTR_REPLACE so ++ * that the leaf format add routine won't trip over the attr ++ * not being around. ++ */ ++ args->flags &= ~ATTR_REPLACE; + } + + if (args->namelen >= XFS_ATTR_SF_ENTSIZE_MAX ||