4.9-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Thu, 6 Dec 2018 14:28:25 +0000 (15:28 +0100)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Thu, 6 Dec 2018 14:28:25 +0000 (15:28 +0100)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 6 Dec 2018 14:28:25 +0000 (15:28 +0100)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 6 Dec 2018 14:28:25 +0000 (15:28 +0100)
diff --git a/queue-4.9/bpf-prevent-memory-disambiguation-attack.patch b/queue-4.9/bpf-prevent-memory-disambiguation-attack.patch

new file mode 100644 (file)

index 0000000..ed9c69e
--- /dev/null
+++ b/queue-4.9/bpf-prevent-memory-disambiguation-attack.patch
@@ -0,0 +1,148 @@
+From foo@baz Thu Dec  6 15:27:31 CET 2018
+From: Alexei Starovoitov <ast@kernel.org>
+Date: Tue, 15 May 2018 09:27:05 -0700
+Subject: bpf: Prevent memory disambiguation attack
+
+From: Alexei Starovoitov <ast@kernel.org>
+
+commit af86ca4e3088fe5eacf2f7e58c01fa68ca067672 upstream.
+
+Detect code patterns where malicious 'speculative store bypass' can be used
+and sanitize such patterns.
+
+ 39: (bf) r3 = r10
+ 40: (07) r3 += -216
+ 41: (79) r8 = *(u64 *)(r7 +0)   // slow read
+ 42: (7a) *(u64 *)(r10 -72) = 0  // verifier inserts this instruction
+ 43: (7b) *(u64 *)(r8 +0) = r3   // this store becomes slow due to r8
+ 44: (79) r1 = *(u64 *)(r6 +0)   // cpu speculatively executes this load
+ 45: (71) r2 = *(u8 *)(r1 +0)    // speculatively arbitrary 'load byte'
+                                 // is now sanitized
+
+Above code after x86 JIT becomes:
+ e5: mov    %rbp,%rdx
+ e8: add    $0xffffffffffffff28,%rdx
+ ef: mov    0x0(%r13),%r14
+ f3: movq   $0x0,-0x48(%rbp)
+ fb: mov    %rdx,0x0(%r14)
+ ff: mov    0x0(%rbx),%rdi
+103: movzbq 0x0(%rdi),%rsi
+
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+[bwh: Backported to 4.9:
+ - Add bpf_verifier_env parameter to check_stack_write()
+ - Look up stack slot_types with state->stack_slot_type[] rather than
+   state->stack[].slot_type[]
+ - Drop bpf_verifier_env argument to verbose()
+ - Adjust context]
+Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/bpf_verifier.h |    1 
+ kernel/bpf/verifier.c        |   62 ++++++++++++++++++++++++++++++++++++++++---
+ 2 files changed, 59 insertions(+), 4 deletions(-)
+
+--- a/include/linux/bpf_verifier.h
++++ b/include/linux/bpf_verifier.h
+@@ -71,6 +71,7 @@ struct bpf_insn_aux_data {
+               enum bpf_reg_type ptr_type;     /* pointer type for load/store insns */
+               struct bpf_map *map_ptr;        /* pointer for call insn into lookup_elem */
+       };
++      int sanitize_stack_off; /* stack slot to be cleared */
+       bool seen; /* this insn was processed by the verifier */
+ };
+ 
+--- a/kernel/bpf/verifier.c
++++ b/kernel/bpf/verifier.c
+@@ -540,8 +540,9 @@ static bool is_spillable_regtype(enum bp
+ /* check_stack_read/write functions track spill/fill of registers,
+  * stack boundary and alignment are checked in check_mem_access()
+  */
+-static int check_stack_write(struct bpf_verifier_state *state, int off,
+-                           int size, int value_regno)
++static int check_stack_write(struct bpf_verifier_env *env,
++                           struct bpf_verifier_state *state, int off,
++                           int size, int value_regno, int insn_idx)
+ {
+       int i, spi = (MAX_BPF_STACK + off) / BPF_REG_SIZE;
+       /* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0,
+@@ -560,8 +561,32 @@ static int check_stack_write(struct bpf_
+               /* save register state */
+               state->spilled_regs[spi] = state->regs[value_regno];
+ 
+-              for (i = 0; i < BPF_REG_SIZE; i++)
++              for (i = 0; i < BPF_REG_SIZE; i++) {
++                      if (state->stack_slot_type[MAX_BPF_STACK + off + i] == STACK_MISC &&
++                          !env->allow_ptr_leaks) {
++                              int *poff = &env->insn_aux_data[insn_idx].sanitize_stack_off;
++                              int soff = (-spi - 1) * BPF_REG_SIZE;
++
++                              /* detected reuse of integer stack slot with a pointer
++                               * which means either llvm is reusing stack slot or
++                               * an attacker is trying to exploit CVE-2018-3639
++                               * (speculative store bypass)
++                               * Have to sanitize that slot with preemptive
++                               * store of zero.
++                               */
++                              if (*poff && *poff != soff) {
++                                      /* disallow programs where single insn stores
++                                       * into two different stack slots, since verifier
++                                       * cannot sanitize them
++                                       */
++                                      verbose("insn %d cannot access two stack slots fp%d and fp%d",
++                                              insn_idx, *poff, soff);
++                                      return -EINVAL;
++                              }
++                              *poff = soff;
++                      }
+                       state->stack_slot_type[MAX_BPF_STACK + off + i] = STACK_SPILL;
++              }
+       } else {
+               /* regular write of data into stack */
+               state->spilled_regs[spi] = (struct bpf_reg_state) {};
+@@ -841,7 +866,8 @@ static int check_mem_access(struct bpf_v
+                               verbose("attempt to corrupt spilled pointer on stack\n");
+                               return -EACCES;
+                       }
+-                      err = check_stack_write(state, off, size, value_regno);
++                      err = check_stack_write(env, state, off, size,
++                                              value_regno, insn_idx);
+               } else {
+                       err = check_stack_read(state, off, size, value_regno);
+               }
+@@ -3367,6 +3393,34 @@ static int convert_ctx_accesses(struct b
+               else
+                       continue;
+ 
++              if (type == BPF_WRITE &&
++                  env->insn_aux_data[i + delta].sanitize_stack_off) {
++                      struct bpf_insn patch[] = {
++                              /* Sanitize suspicious stack slot with zero.
++                               * There are no memory dependencies for this store,
++                               * since it's only using frame pointer and immediate
++                               * constant of zero
++                               */
++                              BPF_ST_MEM(BPF_DW, BPF_REG_FP,
++                                         env->insn_aux_data[i + delta].sanitize_stack_off,
++                                         0),
++                              /* the original STX instruction will immediately
++                               * overwrite the same stack slot with appropriate value
++                               */
++                              *insn,
++                      };
++
++                      cnt = ARRAY_SIZE(patch);
++                      new_prog = bpf_patch_insn_data(env, i + delta, patch, cnt);
++                      if (!new_prog)
++                              return -ENOMEM;
++
++                      delta    += cnt - 1;
++                      env->prog = new_prog;
++                      insn      = new_prog->insnsi + i + delta;
++                      continue;
++              }
++
+               if (env->insn_aux_data[i + delta].ptr_type != PTR_TO_CTX)
+                       continue;
+ 
diff --git a/queue-4.9/bpf-verifier-add-spi-variable-to-check_stack_write.patch b/queue-4.9/bpf-verifier-add-spi-variable-to-check_stack_write.patch

new file mode 100644 (file)

index 0000000..d6b488d
--- /dev/null
+++ b/queue-4.9/bpf-verifier-add-spi-variable-to-check_stack_write.patch
@@ -0,0 +1,47 @@
+From foo@baz Thu Dec  6 15:27:31 CET 2018
+From: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Date: Wed, 5 Dec 2018 22:45:15 +0000
+Subject: bpf/verifier: Add spi variable to check_stack_write()
+
+From: Ben Hutchings <ben.hutchings@codethink.co.uk>
+
+Extracted from commit dc503a8ad984 "bpf/verifier: track liveness for
+pruning".
+
+Cc: Daniel Borkmann <daniel@iogearbox.net>
+Cc: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/bpf/verifier.c |    8 +++-----
+ 1 file changed, 3 insertions(+), 5 deletions(-)
+
+--- a/kernel/bpf/verifier.c
++++ b/kernel/bpf/verifier.c
+@@ -543,7 +543,7 @@ static bool is_spillable_regtype(enum bp
+ static int check_stack_write(struct bpf_verifier_state *state, int off,
+                            int size, int value_regno)
+ {
+-      int i;
++      int i, spi = (MAX_BPF_STACK + off) / BPF_REG_SIZE;
+       /* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0,
+        * so it's aligned access and [off, off + size) are within stack limits
+        */
+@@ -558,15 +558,13 @@ static int check_stack_write(struct bpf_
+               }
+ 
+               /* save register state */
+-              state->spilled_regs[(MAX_BPF_STACK + off) / BPF_REG_SIZE] =
+-                      state->regs[value_regno];
++              state->spilled_regs[spi] = state->regs[value_regno];
+ 
+               for (i = 0; i < BPF_REG_SIZE; i++)
+                       state->stack_slot_type[MAX_BPF_STACK + off + i] = STACK_SPILL;
+       } else {
+               /* regular write of data into stack */
+-              state->spilled_regs[(MAX_BPF_STACK + off) / BPF_REG_SIZE] =
+-                      (struct bpf_reg_state) {};
++              state->spilled_regs[spi] = (struct bpf_reg_state) {};
+ 
+               for (i = 0; i < size; i++)
+                       state->stack_slot_type[MAX_BPF_STACK + off + i] = STACK_MISC;
diff --git a/queue-4.9/bpf-verifier-pass-instruction-index-to-check_mem_access-and-check_xadd.patch b/queue-4.9/bpf-verifier-pass-instruction-index-to-check_mem_access-and-check_xadd.patch

new file mode 100644 (file)

index 0000000..1c0fa89
--- /dev/null
+++ b/queue-4.9/bpf-verifier-pass-instruction-index-to-check_mem_access-and-check_xadd.patch
@@ -0,0 +1,99 @@
+From foo@baz Thu Dec  6 15:27:31 CET 2018
+From: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Date: Wed, 5 Dec 2018 22:41:36 +0000
+Subject: bpf/verifier: Pass instruction index to check_mem_access() and check_xadd()
+
+From: Ben Hutchings <ben.hutchings@codethink.co.uk>
+
+Extracted from commit 31fd85816dbe "bpf: permits narrower load from
+bpf program context fields".
+
+Cc: Daniel Borkmann <daniel@iogearbox.net>
+Cc: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/bpf/verifier.c |   18 +++++++++---------
+ 1 file changed, 9 insertions(+), 9 deletions(-)
+
+--- a/kernel/bpf/verifier.c
++++ b/kernel/bpf/verifier.c
+@@ -745,7 +745,7 @@ static int check_ptr_alignment(struct bp
+  * if t==write && value_regno==-1, some unknown value is stored into memory
+  * if t==read && value_regno==-1, don't care what we read from memory
+  */
+-static int check_mem_access(struct bpf_verifier_env *env, u32 regno, int off,
++static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regno, int off,
+                           int bpf_size, enum bpf_access_type t,
+                           int value_regno)
+ {
+@@ -875,7 +875,7 @@ static int check_mem_access(struct bpf_v
+       return err;
+ }
+ 
+-static int check_xadd(struct bpf_verifier_env *env, struct bpf_insn *insn)
++static int check_xadd(struct bpf_verifier_env *env, int insn_idx, struct bpf_insn *insn)
+ {
+       struct bpf_reg_state *regs = env->cur_state.regs;
+       int err;
+@@ -908,13 +908,13 @@ static int check_xadd(struct bpf_verifie
+       }
+ 
+       /* check whether atomic_add can read the memory */
+-      err = check_mem_access(env, insn->dst_reg, insn->off,
++      err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
+                              BPF_SIZE(insn->code), BPF_READ, -1);
+       if (err)
+               return err;
+ 
+       /* check whether atomic_add can write into the same memory */
+-      return check_mem_access(env, insn->dst_reg, insn->off,
++      return check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
+                               BPF_SIZE(insn->code), BPF_WRITE, -1);
+ }
+ 
+@@ -1270,7 +1270,7 @@ static int check_call(struct bpf_verifie
+        * is inferred from register state.
+        */
+       for (i = 0; i < meta.access_size; i++) {
+-              err = check_mem_access(env, meta.regno, i, BPF_B, BPF_WRITE, -1);
++              err = check_mem_access(env, insn_idx, meta.regno, i, BPF_B, BPF_WRITE, -1);
+               if (err)
+                       return err;
+       }
+@@ -2936,7 +2936,7 @@ static int do_check(struct bpf_verifier_
+                       /* check that memory (src_reg + off) is readable,
+                        * the state of dst_reg will be updated by this func
+                        */
+-                      err = check_mem_access(env, insn->src_reg, insn->off,
++                      err = check_mem_access(env, insn_idx, insn->src_reg, insn->off,
+                                              BPF_SIZE(insn->code), BPF_READ,
+                                              insn->dst_reg);
+                       if (err)
+@@ -2976,7 +2976,7 @@ static int do_check(struct bpf_verifier_
+                       enum bpf_reg_type *prev_dst_type, dst_reg_type;
+ 
+                       if (BPF_MODE(insn->code) == BPF_XADD) {
+-                              err = check_xadd(env, insn);
++                              err = check_xadd(env, insn_idx, insn);
+                               if (err)
+                                       return err;
+                               insn_idx++;
+@@ -2995,7 +2995,7 @@ static int do_check(struct bpf_verifier_
+                       dst_reg_type = regs[insn->dst_reg].type;
+ 
+                       /* check that memory (dst_reg + off) is writeable */
+-                      err = check_mem_access(env, insn->dst_reg, insn->off,
++                      err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
+                                              BPF_SIZE(insn->code), BPF_WRITE,
+                                              insn->src_reg);
+                       if (err)
+@@ -3030,7 +3030,7 @@ static int do_check(struct bpf_verifier_
+                       }
+ 
+                       /* check that memory (dst_reg + off) is writeable */
+-                      err = check_mem_access(env, insn->dst_reg, insn->off,
++                      err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
+                                              BPF_SIZE(insn->code), BPF_WRITE,
+                                              -1);
+                       if (err)
diff --git a/queue-4.9/btrfs-add-checker-for-extent_csum.patch b/queue-4.9/btrfs-add-checker-for-extent_csum.patch

new file mode 100644 (file)

index 0000000..b92520e
--- /dev/null
+++ b/queue-4.9/btrfs-add-checker-for-extent_csum.patch
@@ -0,0 +1,72 @@
+From foo@baz Thu Dec  6 15:27:31 CET 2018
+From: Qu Wenruo <quwenruo.btrfs@gmx.com>
+Date: Wed, 23 Aug 2017 16:57:59 +0900
+Subject: btrfs: Add checker for EXTENT_CSUM
+
+From: Qu Wenruo <quwenruo.btrfs@gmx.com>
+
+commit 4b865cab96fe2a30ed512cf667b354bd291b3b0a upstream.
+
+EXTENT_CSUM checker is a relatively easy one, only needs to check:
+
+1) Objectid
+   Fixed to BTRFS_EXTENT_CSUM_OBJECTID
+
+2) Key offset alignment
+   Must be aligned to sectorsize
+
+3) Item size alignedment
+   Must be aligned to csum size
+
+Signed-off-by: Qu Wenruo <quwenruo.btrfs@gmx.com>
+Reviewed-by: Nikolay Borisov <nborisov@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+[bwh: Backported to 4.9: Use root->sectorsize instead of
+ root->fs_info->sectorsize]
+Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/disk-io.c |   24 ++++++++++++++++++++++++
+ 1 file changed, 24 insertions(+)
+
+--- a/fs/btrfs/disk-io.c
++++ b/fs/btrfs/disk-io.c
+@@ -621,6 +621,27 @@ static int check_extent_data_item(struct
+       return 0;
+ }
+ 
++static int check_csum_item(struct btrfs_root *root, struct extent_buffer *leaf,
++                         struct btrfs_key *key, int slot)
++{
++      u32 sectorsize = root->sectorsize;
++      u32 csumsize = btrfs_super_csum_size(root->fs_info->super_copy);
++
++      if (key->objectid != BTRFS_EXTENT_CSUM_OBJECTID) {
++              CORRUPT("invalid objectid for csum item", leaf, root, slot);
++              return -EUCLEAN;
++      }
++      if (!IS_ALIGNED(key->offset, sectorsize)) {
++              CORRUPT("unaligned key offset for csum item", leaf, root, slot);
++              return -EUCLEAN;
++      }
++      if (!IS_ALIGNED(btrfs_item_size_nr(leaf, slot), csumsize)) {
++              CORRUPT("unaligned csum item size", leaf, root, slot);
++              return -EUCLEAN;
++      }
++      return 0;
++}
++
+ /*
+  * Common point to switch the item-specific validation.
+  */
+@@ -634,6 +655,9 @@ static int check_leaf_item(struct btrfs_
+       case BTRFS_EXTENT_DATA_KEY:
+               ret = check_extent_data_item(root, leaf, key, slot);
+               break;
++      case BTRFS_EXTENT_CSUM_KEY:
++              ret = check_csum_item(root, leaf, key, slot);
++              break;
+       }
+       return ret;
+ }
diff --git a/queue-4.9/btrfs-add-sanity-check-for-extent_data-when-reading-out-leaf.patch b/queue-4.9/btrfs-add-sanity-check-for-extent_data-when-reading-out-leaf.patch

new file mode 100644 (file)

index 0000000..bbd799b
--- /dev/null
+++ b/queue-4.9/btrfs-add-sanity-check-for-extent_data-when-reading-out-leaf.patch
@@ -0,0 +1,184 @@
+From foo@baz Thu Dec  6 15:27:31 CET 2018
+From: Qu Wenruo <quwenruo.btrfs@gmx.com>
+Date: Wed, 23 Aug 2017 16:57:58 +0900
+Subject: btrfs: Add sanity check for EXTENT_DATA when reading out leaf
+
+From: Qu Wenruo <quwenruo.btrfs@gmx.com>
+
+commit 40c3c40947324d9f40bf47830c92c59a9bbadf4a upstream.
+
+Add extra checks for item with EXTENT_DATA type.  This checks the
+following thing:
+
+0) Key offset
+   All key offsets must be aligned to sectorsize.
+   Inline extent must have 0 for key offset.
+
+1) Item size
+   Uncompressed inline file extent size must match item size.
+   (Compressed inline file extent has no information about its on-disk size.)
+   Regular/preallocated file extent size must be a fixed value.
+
+2) Every member of regular file extent item
+   Including alignment for bytenr and offset, possible value for
+   compression/encryption/type.
+
+3) Type/compression/encode must be one of the valid values.
+
+This should be the most comprehensive and strict check in the context
+of btrfs_item for EXTENT_DATA.
+
+Signed-off-by: Qu Wenruo <quwenruo.btrfs@gmx.com>
+Reviewed-by: Nikolay Borisov <nborisov@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+[ switch to BTRFS_FILE_EXTENT_TYPES, similar to what
+  BTRFS_COMPRESS_TYPES does ]
+Signed-off-by: David Sterba <dsterba@suse.com>
+[bwh: Backported to 4.9: Use root->sectorsize instead of
+ root->fs_info->sectorsize]
+Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/disk-io.c              |  103 ++++++++++++++++++++++++++++++++++++++++
+ include/uapi/linux/btrfs_tree.h |    1 
+ 2 files changed, 104 insertions(+)
+
+--- a/fs/btrfs/disk-io.c
++++ b/fs/btrfs/disk-io.c
+@@ -544,6 +544,100 @@ static int check_tree_block_fsid(struct
+                  btrfs_header_level(eb) == 0 ? "leaf" : "node",\
+                  reason, btrfs_header_bytenr(eb), root->objectid, slot)
+ 
++static int check_extent_data_item(struct btrfs_root *root,
++                                struct extent_buffer *leaf,
++                                struct btrfs_key *key, int slot)
++{
++      struct btrfs_file_extent_item *fi;
++      u32 sectorsize = root->sectorsize;
++      u32 item_size = btrfs_item_size_nr(leaf, slot);
++
++      if (!IS_ALIGNED(key->offset, sectorsize)) {
++              CORRUPT("unaligned key offset for file extent",
++                      leaf, root, slot);
++              return -EUCLEAN;
++      }
++
++      fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
++
++      if (btrfs_file_extent_type(leaf, fi) > BTRFS_FILE_EXTENT_TYPES) {
++              CORRUPT("invalid file extent type", leaf, root, slot);
++              return -EUCLEAN;
++      }
++
++      /*
++       * Support for new compression/encrption must introduce incompat flag,
++       * and must be caught in open_ctree().
++       */
++      if (btrfs_file_extent_compression(leaf, fi) > BTRFS_COMPRESS_TYPES) {
++              CORRUPT("invalid file extent compression", leaf, root, slot);
++              return -EUCLEAN;
++      }
++      if (btrfs_file_extent_encryption(leaf, fi)) {
++              CORRUPT("invalid file extent encryption", leaf, root, slot);
++              return -EUCLEAN;
++      }
++      if (btrfs_file_extent_type(leaf, fi) == BTRFS_FILE_EXTENT_INLINE) {
++              /* Inline extent must have 0 as key offset */
++              if (key->offset) {
++                      CORRUPT("inline extent has non-zero key offset",
++                              leaf, root, slot);
++                      return -EUCLEAN;
++              }
++
++              /* Compressed inline extent has no on-disk size, skip it */
++              if (btrfs_file_extent_compression(leaf, fi) !=
++                  BTRFS_COMPRESS_NONE)
++                      return 0;
++
++              /* Uncompressed inline extent size must match item size */
++              if (item_size != BTRFS_FILE_EXTENT_INLINE_DATA_START +
++                  btrfs_file_extent_ram_bytes(leaf, fi)) {
++                      CORRUPT("plaintext inline extent has invalid size",
++                              leaf, root, slot);
++                      return -EUCLEAN;
++              }
++              return 0;
++      }
++
++      /* Regular or preallocated extent has fixed item size */
++      if (item_size != sizeof(*fi)) {
++              CORRUPT(
++              "regluar or preallocated extent data item size is invalid",
++                      leaf, root, slot);
++              return -EUCLEAN;
++      }
++      if (!IS_ALIGNED(btrfs_file_extent_ram_bytes(leaf, fi), sectorsize) ||
++          !IS_ALIGNED(btrfs_file_extent_disk_bytenr(leaf, fi), sectorsize) ||
++          !IS_ALIGNED(btrfs_file_extent_disk_num_bytes(leaf, fi), sectorsize) ||
++          !IS_ALIGNED(btrfs_file_extent_offset(leaf, fi), sectorsize) ||
++          !IS_ALIGNED(btrfs_file_extent_num_bytes(leaf, fi), sectorsize)) {
++              CORRUPT(
++              "regular or preallocated extent data item has unaligned value",
++                      leaf, root, slot);
++              return -EUCLEAN;
++      }
++
++      return 0;
++}
++
++/*
++ * Common point to switch the item-specific validation.
++ */
++static int check_leaf_item(struct btrfs_root *root,
++                         struct extent_buffer *leaf,
++                         struct btrfs_key *key, int slot)
++{
++      int ret = 0;
++
++      switch (key->type) {
++      case BTRFS_EXTENT_DATA_KEY:
++              ret = check_extent_data_item(root, leaf, key, slot);
++              break;
++      }
++      return ret;
++}
++
+ static noinline int check_leaf(struct btrfs_root *root,
+                              struct extent_buffer *leaf)
+ {
+@@ -599,9 +693,13 @@ static noinline int check_leaf(struct bt
+        * 1) key order
+        * 2) item offset and size
+        *    No overlap, no hole, all inside the leaf.
++       * 3) item content
++       *    If possible, do comprehensive sanity check.
++       *    NOTE: All checks must only rely on the item data itself.
+        */
+       for (slot = 0; slot < nritems; slot++) {
+               u32 item_end_expected;
++              int ret;
+ 
+               btrfs_item_key_to_cpu(leaf, &key, slot);
+ 
+@@ -644,6 +742,11 @@ static noinline int check_leaf(struct bt
+                       return -EUCLEAN;
+               }
+ 
++              /* Check if the item size and content meet other criteria */
++              ret = check_leaf_item(root, leaf, &key, slot);
++              if (ret < 0)
++                      return ret;
++
+               prev_key.objectid = key.objectid;
+               prev_key.type = key.type;
+               prev_key.offset = key.offset;
+--- a/include/uapi/linux/btrfs_tree.h
++++ b/include/uapi/linux/btrfs_tree.h
+@@ -730,6 +730,7 @@ struct btrfs_balance_item {
+ #define BTRFS_FILE_EXTENT_INLINE 0
+ #define BTRFS_FILE_EXTENT_REG 1
+ #define BTRFS_FILE_EXTENT_PREALLOC 2
++#define BTRFS_FILE_EXTENT_TYPES       2
+ 
+ struct btrfs_file_extent_item {
+       /*
diff --git a/queue-4.9/btrfs-check-if-item-pointer-overlaps-with-the-item-itself.patch b/queue-4.9/btrfs-check-if-item-pointer-overlaps-with-the-item-itself.patch

new file mode 100644 (file)

index 0000000..f2cd1ab
--- /dev/null
+++ b/queue-4.9/btrfs-check-if-item-pointer-overlaps-with-the-item-itself.patch
@@ -0,0 +1,41 @@
+From foo@baz Thu Dec  6 15:27:31 CET 2018
+From: Qu Wenruo <quwenruo.btrfs@gmx.com>
+Date: Wed, 23 Aug 2017 16:57:57 +0900
+Subject: btrfs: Check if item pointer overlaps with the item itself
+
+From: Qu Wenruo <quwenruo.btrfs@gmx.com>
+
+commit 7f43d4affb2a254d421ab20b0cf65ac2569909fb upstream.
+
+Function check_leaf() checks if any item pointer points outside of the
+leaf, but it doesn't check if the pointer overlaps with the item itself.
+
+Normally only the last item may be the victim, but adding such check is
+never a bad idea anyway.
+
+Signed-off-by: Qu Wenruo <quwenruo.btrfs@gmx.com>
+Reviewed-by: Nikolay Borisov <nborisov@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/disk-io.c |    7 +++++++
+ 1 file changed, 7 insertions(+)
+
+--- a/fs/btrfs/disk-io.c
++++ b/fs/btrfs/disk-io.c
+@@ -637,6 +637,13 @@ static noinline int check_leaf(struct bt
+                       return -EUCLEAN;
+               }
+ 
++              /* Also check if the item pointer overlaps with btrfs item. */
++              if (btrfs_item_nr_offset(slot) + sizeof(struct btrfs_item) >
++                  btrfs_item_ptr_offset(leaf, slot)) {
++                      CORRUPT("slot overlap with its data", leaf, root, slot);
++                      return -EUCLEAN;
++              }
++
+               prev_key.objectid = key.objectid;
+               prev_key.type = key.type;
+               prev_key.offset = key.offset;
diff --git a/queue-4.9/btrfs-check-that-each-block-group-has-corresponding-chunk-at-mount-time.patch b/queue-4.9/btrfs-check-that-each-block-group-has-corresponding-chunk-at-mount-time.patch

new file mode 100644 (file)

index 0000000..2993f55
--- /dev/null
+++ b/queue-4.9/btrfs-check-that-each-block-group-has-corresponding-chunk-at-mount-time.patch
@@ -0,0 +1,83 @@
+From foo@baz Thu Dec  6 15:27:31 CET 2018
+From: Qu Wenruo <wqu@suse.com>
+Date: Wed, 1 Aug 2018 10:37:16 +0800
+Subject: btrfs: Check that each block group has corresponding chunk at mount time
+
+From: Qu Wenruo <wqu@suse.com>
+
+commit 514c7dca85a0bf40be984dab0b477403a6db901f upstream.
+
+A crafted btrfs image with incorrect chunk<->block group mapping will
+trigger a lot of unexpected things as the mapping is essential.
+
+Although the problem can be caught by block group item checker
+added in "btrfs: tree-checker: Verify block_group_item", it's still not
+sufficient.  A sufficiently valid block group item can pass the check
+added by the mentioned patch but could fail to match the existing chunk.
+
+This patch will add extra block group -> chunk mapping check, to ensure
+we have a completely matching (start, len, flags) chunk for each block
+group at mount time.
+
+Here we reuse the original helper find_first_block_group(), which is
+already doing the basic bg -> chunk checks, adding further checks of the
+start/len and type flags.
+
+Link: https://bugzilla.kernel.org/show_bug.cgi?id=199837
+Reported-by: Xu Wen <wen.xu@gatech.edu>
+Signed-off-by: Qu Wenruo <wqu@suse.com>
+Reviewed-by: Su Yue <suy.fnst@cn.fujitsu.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+[bwh: Backported to 4.9: Use root->fs_info instead of fs_info]
+Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/extent-tree.c |   28 +++++++++++++++++++++++++++-
+ 1 file changed, 27 insertions(+), 1 deletion(-)
+
+--- a/fs/btrfs/extent-tree.c
++++ b/fs/btrfs/extent-tree.c
+@@ -9896,6 +9896,8 @@ static int find_first_block_group(struct
+       int ret = 0;
+       struct btrfs_key found_key;
+       struct extent_buffer *leaf;
++      struct btrfs_block_group_item bg;
++      u64 flags;
+       int slot;
+ 
+       ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
+@@ -9930,8 +9932,32 @@ static int find_first_block_group(struct
+                       "logical %llu len %llu found bg but no related chunk",
+                                         found_key.objectid, found_key.offset);
+                               ret = -ENOENT;
++                      } else if (em->start != found_key.objectid ||
++                                 em->len != found_key.offset) {
++                              btrfs_err(root->fs_info,
++              "block group %llu len %llu mismatch with chunk %llu len %llu",
++                                        found_key.objectid, found_key.offset,
++                                        em->start, em->len);
++                              ret = -EUCLEAN;
+                       } else {
+-                              ret = 0;
++                              read_extent_buffer(leaf, &bg,
++                                      btrfs_item_ptr_offset(leaf, slot),
++                                      sizeof(bg));
++                              flags = btrfs_block_group_flags(&bg) &
++                                      BTRFS_BLOCK_GROUP_TYPE_MASK;
++
++                              if (flags != (em->map_lookup->type &
++                                            BTRFS_BLOCK_GROUP_TYPE_MASK)) {
++                                      btrfs_err(root->fs_info,
++"block group %llu len %llu type flags 0x%llx mismatch with chunk type flags 0x%llx",
++                                              found_key.objectid,
++                                              found_key.offset, flags,
++                                              (BTRFS_BLOCK_GROUP_TYPE_MASK &
++                                               em->map_lookup->type));
++                                      ret = -EUCLEAN;
++                              } else {
++                                      ret = 0;
++                              }
+                       }
+                       free_extent_map(em);
+                       goto out;
diff --git a/queue-4.9/btrfs-move-leaf-and-node-validation-checker-to-tree-checker.c.patch b/queue-4.9/btrfs-move-leaf-and-node-validation-checker-to-tree-checker.c.patch

new file mode 100644 (file)

index 0000000..e2b99be
--- /dev/null
+++ b/queue-4.9/btrfs-move-leaf-and-node-validation-checker-to-tree-checker.c.patch
@@ -0,0 +1,697 @@
+From foo@baz Thu Dec  6 15:27:31 CET 2018
+From: Qu Wenruo <quwenruo.btrfs@gmx.com>
+Date: Mon, 9 Oct 2017 01:51:02 +0000
+Subject: btrfs: Move leaf and node validation checker to tree-checker.c
+
+From: Qu Wenruo <quwenruo.btrfs@gmx.com>
+
+commit 557ea5dd003d371536f6b4e8f7c8209a2b6fd4e3 upstream.
+
+It's no doubt the comprehensive tree block checker will become larger,
+so moving them into their own files is quite reasonable.
+
+Signed-off-by: Qu Wenruo <quwenruo.btrfs@gmx.com>
+[ wording adjustments ]
+Signed-off-by: David Sterba <dsterba@suse.com>
+[bwh: Backported to 4.9: The moved code is slightly different]
+Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/Makefile       |    2 
+ fs/btrfs/disk-io.c      |  284 --------------------------------------------
+ fs/btrfs/tree-checker.c |  309 ++++++++++++++++++++++++++++++++++++++++++++++++
+ fs/btrfs/tree-checker.h |   26 ++++
+ 4 files changed, 340 insertions(+), 281 deletions(-)
+ create mode 100644 fs/btrfs/tree-checker.c
+ create mode 100644 fs/btrfs/tree-checker.h
+
+--- a/fs/btrfs/Makefile
++++ b/fs/btrfs/Makefile
+@@ -9,7 +9,7 @@ btrfs-y += super.o ctree.o extent-tree.o
+          export.o tree-log.o free-space-cache.o zlib.o lzo.o \
+          compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o \
+          reada.o backref.o ulist.o qgroup.o send.o dev-replace.o raid56.o \
+-         uuid-tree.o props.o hash.o free-space-tree.o
++         uuid-tree.o props.o hash.o free-space-tree.o tree-checker.o
+ 
+ btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o
+ btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o
+--- a/fs/btrfs/disk-io.c
++++ b/fs/btrfs/disk-io.c
+@@ -50,6 +50,7 @@
+ #include "sysfs.h"
+ #include "qgroup.h"
+ #include "compression.h"
++#include "tree-checker.h"
+ 
+ #ifdef CONFIG_X86
+ #include <asm/cpufeature.h>
+@@ -538,283 +539,6 @@ static int check_tree_block_fsid(struct
+       return ret;
+ }
+ 
+-#define CORRUPT(reason, eb, root, slot)                               \
+-      btrfs_crit(root->fs_info, "corrupt %s, %s: block=%llu," \
+-                 " root=%llu, slot=%d",                       \
+-                 btrfs_header_level(eb) == 0 ? "leaf" : "node",\
+-                 reason, btrfs_header_bytenr(eb), root->objectid, slot)
+-
+-static int check_extent_data_item(struct btrfs_root *root,
+-                                struct extent_buffer *leaf,
+-                                struct btrfs_key *key, int slot)
+-{
+-      struct btrfs_file_extent_item *fi;
+-      u32 sectorsize = root->sectorsize;
+-      u32 item_size = btrfs_item_size_nr(leaf, slot);
+-
+-      if (!IS_ALIGNED(key->offset, sectorsize)) {
+-              CORRUPT("unaligned key offset for file extent",
+-                      leaf, root, slot);
+-              return -EUCLEAN;
+-      }
+-
+-      fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
+-
+-      if (btrfs_file_extent_type(leaf, fi) > BTRFS_FILE_EXTENT_TYPES) {
+-              CORRUPT("invalid file extent type", leaf, root, slot);
+-              return -EUCLEAN;
+-      }
+-
+-      /*
+-       * Support for new compression/encrption must introduce incompat flag,
+-       * and must be caught in open_ctree().
+-       */
+-      if (btrfs_file_extent_compression(leaf, fi) > BTRFS_COMPRESS_TYPES) {
+-              CORRUPT("invalid file extent compression", leaf, root, slot);
+-              return -EUCLEAN;
+-      }
+-      if (btrfs_file_extent_encryption(leaf, fi)) {
+-              CORRUPT("invalid file extent encryption", leaf, root, slot);
+-              return -EUCLEAN;
+-      }
+-      if (btrfs_file_extent_type(leaf, fi) == BTRFS_FILE_EXTENT_INLINE) {
+-              /* Inline extent must have 0 as key offset */
+-              if (key->offset) {
+-                      CORRUPT("inline extent has non-zero key offset",
+-                              leaf, root, slot);
+-                      return -EUCLEAN;
+-              }
+-
+-              /* Compressed inline extent has no on-disk size, skip it */
+-              if (btrfs_file_extent_compression(leaf, fi) !=
+-                  BTRFS_COMPRESS_NONE)
+-                      return 0;
+-
+-              /* Uncompressed inline extent size must match item size */
+-              if (item_size != BTRFS_FILE_EXTENT_INLINE_DATA_START +
+-                  btrfs_file_extent_ram_bytes(leaf, fi)) {
+-                      CORRUPT("plaintext inline extent has invalid size",
+-                              leaf, root, slot);
+-                      return -EUCLEAN;
+-              }
+-              return 0;
+-      }
+-
+-      /* Regular or preallocated extent has fixed item size */
+-      if (item_size != sizeof(*fi)) {
+-              CORRUPT(
+-              "regluar or preallocated extent data item size is invalid",
+-                      leaf, root, slot);
+-              return -EUCLEAN;
+-      }
+-      if (!IS_ALIGNED(btrfs_file_extent_ram_bytes(leaf, fi), sectorsize) ||
+-          !IS_ALIGNED(btrfs_file_extent_disk_bytenr(leaf, fi), sectorsize) ||
+-          !IS_ALIGNED(btrfs_file_extent_disk_num_bytes(leaf, fi), sectorsize) ||
+-          !IS_ALIGNED(btrfs_file_extent_offset(leaf, fi), sectorsize) ||
+-          !IS_ALIGNED(btrfs_file_extent_num_bytes(leaf, fi), sectorsize)) {
+-              CORRUPT(
+-              "regular or preallocated extent data item has unaligned value",
+-                      leaf, root, slot);
+-              return -EUCLEAN;
+-      }
+-
+-      return 0;
+-}
+-
+-static int check_csum_item(struct btrfs_root *root, struct extent_buffer *leaf,
+-                         struct btrfs_key *key, int slot)
+-{
+-      u32 sectorsize = root->sectorsize;
+-      u32 csumsize = btrfs_super_csum_size(root->fs_info->super_copy);
+-
+-      if (key->objectid != BTRFS_EXTENT_CSUM_OBJECTID) {
+-              CORRUPT("invalid objectid for csum item", leaf, root, slot);
+-              return -EUCLEAN;
+-      }
+-      if (!IS_ALIGNED(key->offset, sectorsize)) {
+-              CORRUPT("unaligned key offset for csum item", leaf, root, slot);
+-              return -EUCLEAN;
+-      }
+-      if (!IS_ALIGNED(btrfs_item_size_nr(leaf, slot), csumsize)) {
+-              CORRUPT("unaligned csum item size", leaf, root, slot);
+-              return -EUCLEAN;
+-      }
+-      return 0;
+-}
+-
+-/*
+- * Common point to switch the item-specific validation.
+- */
+-static int check_leaf_item(struct btrfs_root *root,
+-                         struct extent_buffer *leaf,
+-                         struct btrfs_key *key, int slot)
+-{
+-      int ret = 0;
+-
+-      switch (key->type) {
+-      case BTRFS_EXTENT_DATA_KEY:
+-              ret = check_extent_data_item(root, leaf, key, slot);
+-              break;
+-      case BTRFS_EXTENT_CSUM_KEY:
+-              ret = check_csum_item(root, leaf, key, slot);
+-              break;
+-      }
+-      return ret;
+-}
+-
+-static noinline int check_leaf(struct btrfs_root *root,
+-                             struct extent_buffer *leaf)
+-{
+-      /* No valid key type is 0, so all key should be larger than this key */
+-      struct btrfs_key prev_key = {0, 0, 0};
+-      struct btrfs_key key;
+-      u32 nritems = btrfs_header_nritems(leaf);
+-      int slot;
+-
+-      /*
+-       * Extent buffers from a relocation tree have a owner field that
+-       * corresponds to the subvolume tree they are based on. So just from an
+-       * extent buffer alone we can not find out what is the id of the
+-       * corresponding subvolume tree, so we can not figure out if the extent
+-       * buffer corresponds to the root of the relocation tree or not. So skip
+-       * this check for relocation trees.
+-       */
+-      if (nritems == 0 && !btrfs_header_flag(leaf, BTRFS_HEADER_FLAG_RELOC)) {
+-              struct btrfs_root *check_root;
+-
+-              key.objectid = btrfs_header_owner(leaf);
+-              key.type = BTRFS_ROOT_ITEM_KEY;
+-              key.offset = (u64)-1;
+-
+-              check_root = btrfs_get_fs_root(root->fs_info, &key, false);
+-              /*
+-               * The only reason we also check NULL here is that during
+-               * open_ctree() some roots has not yet been set up.
+-               */
+-              if (!IS_ERR_OR_NULL(check_root)) {
+-                      struct extent_buffer *eb;
+-
+-                      eb = btrfs_root_node(check_root);
+-                      /* if leaf is the root, then it's fine */
+-                      if (leaf != eb) {
+-                              CORRUPT("non-root leaf's nritems is 0",
+-                                      leaf, check_root, 0);
+-                              free_extent_buffer(eb);
+-                              return -EUCLEAN;
+-                      }
+-                      free_extent_buffer(eb);
+-              }
+-              return 0;
+-      }
+-
+-      if (nritems == 0)
+-              return 0;
+-
+-      /*
+-       * Check the following things to make sure this is a good leaf, and
+-       * leaf users won't need to bother with similar sanity checks:
+-       *
+-       * 1) key order
+-       * 2) item offset and size
+-       *    No overlap, no hole, all inside the leaf.
+-       * 3) item content
+-       *    If possible, do comprehensive sanity check.
+-       *    NOTE: All checks must only rely on the item data itself.
+-       */
+-      for (slot = 0; slot < nritems; slot++) {
+-              u32 item_end_expected;
+-              int ret;
+-
+-              btrfs_item_key_to_cpu(leaf, &key, slot);
+-
+-              /* Make sure the keys are in the right order */
+-              if (btrfs_comp_cpu_keys(&prev_key, &key) >= 0) {
+-                      CORRUPT("bad key order", leaf, root, slot);
+-                      return -EUCLEAN;
+-              }
+-
+-              /*
+-               * Make sure the offset and ends are right, remember that the
+-               * item data starts at the end of the leaf and grows towards the
+-               * front.
+-               */
+-              if (slot == 0)
+-                      item_end_expected = BTRFS_LEAF_DATA_SIZE(root);
+-              else
+-                      item_end_expected = btrfs_item_offset_nr(leaf,
+-                                                               slot - 1);
+-              if (btrfs_item_end_nr(leaf, slot) != item_end_expected) {
+-                      CORRUPT("slot offset bad", leaf, root, slot);
+-                      return -EUCLEAN;
+-              }
+-
+-              /*
+-               * Check to make sure that we don't point outside of the leaf,
+-               * just in case all the items are consistent to each other, but
+-               * all point outside of the leaf.
+-               */
+-              if (btrfs_item_end_nr(leaf, slot) >
+-                  BTRFS_LEAF_DATA_SIZE(root)) {
+-                      CORRUPT("slot end outside of leaf", leaf, root, slot);
+-                      return -EUCLEAN;
+-              }
+-
+-              /* Also check if the item pointer overlaps with btrfs item. */
+-              if (btrfs_item_nr_offset(slot) + sizeof(struct btrfs_item) >
+-                  btrfs_item_ptr_offset(leaf, slot)) {
+-                      CORRUPT("slot overlap with its data", leaf, root, slot);
+-                      return -EUCLEAN;
+-              }
+-
+-              /* Check if the item size and content meet other criteria */
+-              ret = check_leaf_item(root, leaf, &key, slot);
+-              if (ret < 0)
+-                      return ret;
+-
+-              prev_key.objectid = key.objectid;
+-              prev_key.type = key.type;
+-              prev_key.offset = key.offset;
+-      }
+-
+-      return 0;
+-}
+-
+-static int check_node(struct btrfs_root *root, struct extent_buffer *node)
+-{
+-      unsigned long nr = btrfs_header_nritems(node);
+-      struct btrfs_key key, next_key;
+-      int slot;
+-      u64 bytenr;
+-      int ret = 0;
+-
+-      if (nr == 0 || nr > BTRFS_NODEPTRS_PER_BLOCK(root)) {
+-              btrfs_crit(root->fs_info,
+-                         "corrupt node: block %llu root %llu nritems %lu",
+-                         node->start, root->objectid, nr);
+-              return -EIO;
+-      }
+-
+-      for (slot = 0; slot < nr - 1; slot++) {
+-              bytenr = btrfs_node_blockptr(node, slot);
+-              btrfs_node_key_to_cpu(node, &key, slot);
+-              btrfs_node_key_to_cpu(node, &next_key, slot + 1);
+-
+-              if (!bytenr) {
+-                      CORRUPT("invalid item slot", node, root, slot);
+-                      ret = -EIO;
+-                      goto out;
+-              }
+-
+-              if (btrfs_comp_cpu_keys(&key, &next_key) >= 0) {
+-                      CORRUPT("bad key order", node, root, slot);
+-                      ret = -EIO;
+-                      goto out;
+-              }
+-      }
+-out:
+-      return ret;
+-}
+-
+ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
+                                     u64 phy_offset, struct page *page,
+                                     u64 start, u64 end, int mirror)
+@@ -880,12 +604,12 @@ static int btree_readpage_end_io_hook(st
+        * that we don't try and read the other copies of this block, just
+        * return -EIO.
+        */
+-      if (found_level == 0 && check_leaf(root, eb)) {
++      if (found_level == 0 && btrfs_check_leaf(root, eb)) {
+               set_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags);
+               ret = -EIO;
+       }
+ 
+-      if (found_level > 0 && check_node(root, eb))
++      if (found_level > 0 && btrfs_check_node(root, eb))
+               ret = -EIO;
+ 
+       if (!ret)
+@@ -4216,7 +3940,7 @@ void btrfs_mark_buffer_dirty(struct exte
+                                    buf->len,
+                                    root->fs_info->dirty_metadata_batch);
+ #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
+-      if (btrfs_header_level(buf) == 0 && check_leaf(root, buf)) {
++      if (btrfs_header_level(buf) == 0 && btrfs_check_leaf(root, buf)) {
+               btrfs_print_leaf(root, buf);
+               ASSERT(0);
+       }
+--- /dev/null
++++ b/fs/btrfs/tree-checker.c
+@@ -0,0 +1,309 @@
++/*
++ * Copyright (C) Qu Wenruo 2017.  All rights reserved.
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public
++ * License v2 as published by the Free Software Foundation.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public
++ * License along with this program.
++ */
++
++/*
++ * The module is used to catch unexpected/corrupted tree block data.
++ * Such behavior can be caused either by a fuzzed image or bugs.
++ *
++ * The objective is to do leaf/node validation checks when tree block is read
++ * from disk, and check *every* possible member, so other code won't
++ * need to checking them again.
++ *
++ * Due to the potential and unwanted damage, every checker needs to be
++ * carefully reviewed otherwise so it does not prevent mount of valid images.
++ */
++
++#include "ctree.h"
++#include "tree-checker.h"
++#include "disk-io.h"
++#include "compression.h"
++
++#define CORRUPT(reason, eb, root, slot)                                       \
++      btrfs_crit(root->fs_info,                                       \
++                 "corrupt %s, %s: block=%llu, root=%llu, slot=%d",    \
++                 btrfs_header_level(eb) == 0 ? "leaf" : "node",       \
++                 reason, btrfs_header_bytenr(eb), root->objectid, slot)
++
++static int check_extent_data_item(struct btrfs_root *root,
++                                struct extent_buffer *leaf,
++                                struct btrfs_key *key, int slot)
++{
++      struct btrfs_file_extent_item *fi;
++      u32 sectorsize = root->sectorsize;
++      u32 item_size = btrfs_item_size_nr(leaf, slot);
++
++      if (!IS_ALIGNED(key->offset, sectorsize)) {
++              CORRUPT("unaligned key offset for file extent",
++                      leaf, root, slot);
++              return -EUCLEAN;
++      }
++
++      fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
++
++      if (btrfs_file_extent_type(leaf, fi) > BTRFS_FILE_EXTENT_TYPES) {
++              CORRUPT("invalid file extent type", leaf, root, slot);
++              return -EUCLEAN;
++      }
++
++      /*
++       * Support for new compression/encrption must introduce incompat flag,
++       * and must be caught in open_ctree().
++       */
++      if (btrfs_file_extent_compression(leaf, fi) > BTRFS_COMPRESS_TYPES) {
++              CORRUPT("invalid file extent compression", leaf, root, slot);
++              return -EUCLEAN;
++      }
++      if (btrfs_file_extent_encryption(leaf, fi)) {
++              CORRUPT("invalid file extent encryption", leaf, root, slot);
++              return -EUCLEAN;
++      }
++      if (btrfs_file_extent_type(leaf, fi) == BTRFS_FILE_EXTENT_INLINE) {
++              /* Inline extent must have 0 as key offset */
++              if (key->offset) {
++                      CORRUPT("inline extent has non-zero key offset",
++                              leaf, root, slot);
++                      return -EUCLEAN;
++              }
++
++              /* Compressed inline extent has no on-disk size, skip it */
++              if (btrfs_file_extent_compression(leaf, fi) !=
++                  BTRFS_COMPRESS_NONE)
++                      return 0;
++
++              /* Uncompressed inline extent size must match item size */
++              if (item_size != BTRFS_FILE_EXTENT_INLINE_DATA_START +
++                  btrfs_file_extent_ram_bytes(leaf, fi)) {
++                      CORRUPT("plaintext inline extent has invalid size",
++                              leaf, root, slot);
++                      return -EUCLEAN;
++              }
++              return 0;
++      }
++
++      /* Regular or preallocated extent has fixed item size */
++      if (item_size != sizeof(*fi)) {
++              CORRUPT(
++              "regluar or preallocated extent data item size is invalid",
++                      leaf, root, slot);
++              return -EUCLEAN;
++      }
++      if (!IS_ALIGNED(btrfs_file_extent_ram_bytes(leaf, fi), sectorsize) ||
++          !IS_ALIGNED(btrfs_file_extent_disk_bytenr(leaf, fi), sectorsize) ||
++          !IS_ALIGNED(btrfs_file_extent_disk_num_bytes(leaf, fi), sectorsize) ||
++          !IS_ALIGNED(btrfs_file_extent_offset(leaf, fi), sectorsize) ||
++          !IS_ALIGNED(btrfs_file_extent_num_bytes(leaf, fi), sectorsize)) {
++              CORRUPT(
++              "regular or preallocated extent data item has unaligned value",
++                      leaf, root, slot);
++              return -EUCLEAN;
++      }
++
++      return 0;
++}
++
++static int check_csum_item(struct btrfs_root *root, struct extent_buffer *leaf,
++                         struct btrfs_key *key, int slot)
++{
++      u32 sectorsize = root->sectorsize;
++      u32 csumsize = btrfs_super_csum_size(root->fs_info->super_copy);
++
++      if (key->objectid != BTRFS_EXTENT_CSUM_OBJECTID) {
++              CORRUPT("invalid objectid for csum item", leaf, root, slot);
++              return -EUCLEAN;
++      }
++      if (!IS_ALIGNED(key->offset, sectorsize)) {
++              CORRUPT("unaligned key offset for csum item", leaf, root, slot);
++              return -EUCLEAN;
++      }
++      if (!IS_ALIGNED(btrfs_item_size_nr(leaf, slot), csumsize)) {
++              CORRUPT("unaligned csum item size", leaf, root, slot);
++              return -EUCLEAN;
++      }
++      return 0;
++}
++
++/*
++ * Common point to switch the item-specific validation.
++ */
++static int check_leaf_item(struct btrfs_root *root,
++                         struct extent_buffer *leaf,
++                         struct btrfs_key *key, int slot)
++{
++      int ret = 0;
++
++      switch (key->type) {
++      case BTRFS_EXTENT_DATA_KEY:
++              ret = check_extent_data_item(root, leaf, key, slot);
++              break;
++      case BTRFS_EXTENT_CSUM_KEY:
++              ret = check_csum_item(root, leaf, key, slot);
++              break;
++      }
++      return ret;
++}
++
++int btrfs_check_leaf(struct btrfs_root *root, struct extent_buffer *leaf)
++{
++      struct btrfs_fs_info *fs_info = root->fs_info;
++      /* No valid key type is 0, so all key should be larger than this key */
++      struct btrfs_key prev_key = {0, 0, 0};
++      struct btrfs_key key;
++      u32 nritems = btrfs_header_nritems(leaf);
++      int slot;
++
++      /*
++       * Extent buffers from a relocation tree have a owner field that
++       * corresponds to the subvolume tree they are based on. So just from an
++       * extent buffer alone we can not find out what is the id of the
++       * corresponding subvolume tree, so we can not figure out if the extent
++       * buffer corresponds to the root of the relocation tree or not. So
++       * skip this check for relocation trees.
++       */
++      if (nritems == 0 && !btrfs_header_flag(leaf, BTRFS_HEADER_FLAG_RELOC)) {
++              struct btrfs_root *check_root;
++
++              key.objectid = btrfs_header_owner(leaf);
++              key.type = BTRFS_ROOT_ITEM_KEY;
++              key.offset = (u64)-1;
++
++              check_root = btrfs_get_fs_root(fs_info, &key, false);
++              /*
++               * The only reason we also check NULL here is that during
++               * open_ctree() some roots has not yet been set up.
++               */
++              if (!IS_ERR_OR_NULL(check_root)) {
++                      struct extent_buffer *eb;
++
++                      eb = btrfs_root_node(check_root);
++                      /* if leaf is the root, then it's fine */
++                      if (leaf != eb) {
++                              CORRUPT("non-root leaf's nritems is 0",
++                                      leaf, check_root, 0);
++                              free_extent_buffer(eb);
++                              return -EUCLEAN;
++                      }
++                      free_extent_buffer(eb);
++              }
++              return 0;
++      }
++
++      if (nritems == 0)
++              return 0;
++
++      /*
++       * Check the following things to make sure this is a good leaf, and
++       * leaf users won't need to bother with similar sanity checks:
++       *
++       * 1) key ordering
++       * 2) item offset and size
++       *    No overlap, no hole, all inside the leaf.
++       * 3) item content
++       *    If possible, do comprehensive sanity check.
++       *    NOTE: All checks must only rely on the item data itself.
++       */
++      for (slot = 0; slot < nritems; slot++) {
++              u32 item_end_expected;
++              int ret;
++
++              btrfs_item_key_to_cpu(leaf, &key, slot);
++
++              /* Make sure the keys are in the right order */
++              if (btrfs_comp_cpu_keys(&prev_key, &key) >= 0) {
++                      CORRUPT("bad key order", leaf, root, slot);
++                      return -EUCLEAN;
++              }
++
++              /*
++               * Make sure the offset and ends are right, remember that the
++               * item data starts at the end of the leaf and grows towards the
++               * front.
++               */
++              if (slot == 0)
++                      item_end_expected = BTRFS_LEAF_DATA_SIZE(root);
++              else
++                      item_end_expected = btrfs_item_offset_nr(leaf,
++                                                               slot - 1);
++              if (btrfs_item_end_nr(leaf, slot) != item_end_expected) {
++                      CORRUPT("slot offset bad", leaf, root, slot);
++                      return -EUCLEAN;
++              }
++
++              /*
++               * Check to make sure that we don't point outside of the leaf,
++               * just in case all the items are consistent to each other, but
++               * all point outside of the leaf.
++               */
++              if (btrfs_item_end_nr(leaf, slot) >
++                  BTRFS_LEAF_DATA_SIZE(root)) {
++                      CORRUPT("slot end outside of leaf", leaf, root, slot);
++                      return -EUCLEAN;
++              }
++
++              /* Also check if the item pointer overlaps with btrfs item. */
++              if (btrfs_item_nr_offset(slot) + sizeof(struct btrfs_item) >
++                  btrfs_item_ptr_offset(leaf, slot)) {
++                      CORRUPT("slot overlap with its data", leaf, root, slot);
++                      return -EUCLEAN;
++              }
++
++              /* Check if the item size and content meet other criteria */
++              ret = check_leaf_item(root, leaf, &key, slot);
++              if (ret < 0)
++                      return ret;
++
++              prev_key.objectid = key.objectid;
++              prev_key.type = key.type;
++              prev_key.offset = key.offset;
++      }
++
++      return 0;
++}
++
++int btrfs_check_node(struct btrfs_root *root, struct extent_buffer *node)
++{
++      unsigned long nr = btrfs_header_nritems(node);
++      struct btrfs_key key, next_key;
++      int slot;
++      u64 bytenr;
++      int ret = 0;
++
++      if (nr == 0 || nr > BTRFS_NODEPTRS_PER_BLOCK(root)) {
++              btrfs_crit(root->fs_info,
++                         "corrupt node: block %llu root %llu nritems %lu",
++                         node->start, root->objectid, nr);
++              return -EIO;
++      }
++
++      for (slot = 0; slot < nr - 1; slot++) {
++              bytenr = btrfs_node_blockptr(node, slot);
++              btrfs_node_key_to_cpu(node, &key, slot);
++              btrfs_node_key_to_cpu(node, &next_key, slot + 1);
++
++              if (!bytenr) {
++                      CORRUPT("invalid item slot", node, root, slot);
++                      ret = -EIO;
++                      goto out;
++              }
++
++              if (btrfs_comp_cpu_keys(&key, &next_key) >= 0) {
++                      CORRUPT("bad key order", node, root, slot);
++                      ret = -EIO;
++                      goto out;
++              }
++      }
++out:
++      return ret;
++}
+--- /dev/null
++++ b/fs/btrfs/tree-checker.h
+@@ -0,0 +1,26 @@
++/*
++ * Copyright (C) Qu Wenruo 2017.  All rights reserved.
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public
++ * License v2 as published by the Free Software Foundation.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public
++ * License along with this program.
++ */
++
++#ifndef __BTRFS_TREE_CHECKER__
++#define __BTRFS_TREE_CHECKER__
++
++#include "ctree.h"
++#include "extent_io.h"
++
++int btrfs_check_leaf(struct btrfs_root *root, struct extent_buffer *leaf);
++int btrfs_check_node(struct btrfs_root *root, struct extent_buffer *node);
++
++#endif
diff --git a/queue-4.9/btrfs-refactor-check_leaf-function-for-later-expansion.patch b/queue-4.9/btrfs-refactor-check_leaf-function-for-later-expansion.patch

new file mode 100644 (file)

index 0000000..7e41ed1
--- /dev/null
+++ b/queue-4.9/btrfs-refactor-check_leaf-function-for-later-expansion.patch
@@ -0,0 +1,137 @@
+From foo@baz Thu Dec  6 15:27:31 CET 2018
+From: Qu Wenruo <quwenruo.btrfs@gmx.com>
+Date: Wed, 23 Aug 2017 16:57:56 +0900
+Subject: btrfs: Refactor check_leaf function for later expansion
+
+From: Qu Wenruo <quwenruo.btrfs@gmx.com>
+
+commit c3267bbaa9cae09b62960eafe33ad19196803285 upstream.
+
+Current check_leaf() function does a good job checking key order and
+item offset/size.
+
+However it only checks from slot 0 to the last but one slot, this is
+good but makes later expansion hard.
+
+So this refactoring iterates from slot 0 to the last slot.
+For key comparison, it uses a key with all 0 as initial key, so all
+valid keys should be larger than that.
+
+And for item size/offset checks, it compares current item end with
+previous item offset.
+For slot 0, use leaf end as a special case.
+
+This makes later item/key offset checks and item size checks easier to
+be implemented.
+
+Also, makes check_leaf() to return -EUCLEAN other than -EIO to indicate
+error.
+
+Signed-off-by: Qu Wenruo <quwenruo.btrfs@gmx.com>
+Reviewed-by: Nikolay Borisov <nborisov@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+[bwh: Backported to 4.9:
+ - BTRFS_LEAF_DATA_SIZE() takes a root rather than an fs_info
+ - Adjust context]
+Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/disk-io.c |   50 +++++++++++++++++++++++++++-----------------------
+ 1 file changed, 27 insertions(+), 23 deletions(-)
+
+--- a/fs/btrfs/disk-io.c
++++ b/fs/btrfs/disk-io.c
+@@ -547,8 +547,9 @@ static int check_tree_block_fsid(struct
+ static noinline int check_leaf(struct btrfs_root *root,
+                              struct extent_buffer *leaf)
+ {
++      /* No valid key type is 0, so all key should be larger than this key */
++      struct btrfs_key prev_key = {0, 0, 0};
+       struct btrfs_key key;
+-      struct btrfs_key leaf_key;
+       u32 nritems = btrfs_header_nritems(leaf);
+       int slot;
+ 
+@@ -581,7 +582,7 @@ static noinline int check_leaf(struct bt
+                               CORRUPT("non-root leaf's nritems is 0",
+                                       leaf, check_root, 0);
+                               free_extent_buffer(eb);
+-                              return -EIO;
++                              return -EUCLEAN;
+                       }
+                       free_extent_buffer(eb);
+               }
+@@ -591,28 +592,23 @@ static noinline int check_leaf(struct bt
+       if (nritems == 0)
+               return 0;
+ 
+-      /* Check the 0 item */
+-      if (btrfs_item_offset_nr(leaf, 0) + btrfs_item_size_nr(leaf, 0) !=
+-          BTRFS_LEAF_DATA_SIZE(root)) {
+-              CORRUPT("invalid item offset size pair", leaf, root, 0);
+-              return -EIO;
+-      }
+-
+       /*
+-       * Check to make sure each items keys are in the correct order and their
+-       * offsets make sense.  We only have to loop through nritems-1 because
+-       * we check the current slot against the next slot, which verifies the
+-       * next slot's offset+size makes sense and that the current's slot
+-       * offset is correct.
++       * Check the following things to make sure this is a good leaf, and
++       * leaf users won't need to bother with similar sanity checks:
++       *
++       * 1) key order
++       * 2) item offset and size
++       *    No overlap, no hole, all inside the leaf.
+        */
+-      for (slot = 0; slot < nritems - 1; slot++) {
+-              btrfs_item_key_to_cpu(leaf, &leaf_key, slot);
+-              btrfs_item_key_to_cpu(leaf, &key, slot + 1);
++      for (slot = 0; slot < nritems; slot++) {
++              u32 item_end_expected;
++
++              btrfs_item_key_to_cpu(leaf, &key, slot);
+ 
+               /* Make sure the keys are in the right order */
+-              if (btrfs_comp_cpu_keys(&leaf_key, &key) >= 0) {
++              if (btrfs_comp_cpu_keys(&prev_key, &key) >= 0) {
+                       CORRUPT("bad key order", leaf, root, slot);
+-                      return -EIO;
++                      return -EUCLEAN;
+               }
+ 
+               /*
+@@ -620,10 +616,14 @@ static noinline int check_leaf(struct bt
+                * item data starts at the end of the leaf and grows towards the
+                * front.
+                */
+-              if (btrfs_item_offset_nr(leaf, slot) !=
+-                      btrfs_item_end_nr(leaf, slot + 1)) {
++              if (slot == 0)
++                      item_end_expected = BTRFS_LEAF_DATA_SIZE(root);
++              else
++                      item_end_expected = btrfs_item_offset_nr(leaf,
++                                                               slot - 1);
++              if (btrfs_item_end_nr(leaf, slot) != item_end_expected) {
+                       CORRUPT("slot offset bad", leaf, root, slot);
+-                      return -EIO;
++                      return -EUCLEAN;
+               }
+ 
+               /*
+@@ -634,8 +634,12 @@ static noinline int check_leaf(struct bt
+               if (btrfs_item_end_nr(leaf, slot) >
+                   BTRFS_LEAF_DATA_SIZE(root)) {
+                       CORRUPT("slot end outside of leaf", leaf, root, slot);
+-                      return -EIO;
++                      return -EUCLEAN;
+               }
++
++              prev_key.objectid = key.objectid;
++              prev_key.type = key.type;
++              prev_key.offset = key.offset;
+       }
+ 
+       return 0;
diff --git a/queue-4.9/btrfs-struct-funcs-constify-readers.patch b/queue-4.9/btrfs-struct-funcs-constify-readers.patch

new file mode 100644 (file)

index 0000000..d5c719b
--- /dev/null
+++ b/queue-4.9/btrfs-struct-funcs-constify-readers.patch
@@ -0,0 +1,532 @@
+From foo@baz Thu Dec  6 15:27:31 CET 2018
+From: Jeff Mahoney <jeffm@suse.com>
+Date: Wed, 28 Jun 2017 21:56:53 -0600
+Subject: btrfs: struct-funcs, constify readers
+
+From: Jeff Mahoney <jeffm@suse.com>
+
+commit 1cbb1f454e5321e47fc1e6b233066c7ccc979d15 upstream.
+
+We have reader helpers for most of the on-disk structures that use
+an extent_buffer and pointer as offset into the buffer that are
+read-only.  We should mark them as const and, in turn, allow consumers
+of these interfaces to mark the buffers const as well.
+
+No impact on code, but serves as documentation that a buffer is intended
+not to be modified.
+
+Signed-off-by: Jeff Mahoney <jeffm@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/ctree.h        |  128 ++++++++++++++++++++++++------------------------
+ fs/btrfs/extent_io.c    |   24 ++++-----
+ fs/btrfs/extent_io.h    |   19 +++----
+ fs/btrfs/struct-funcs.c |    9 +--
+ 4 files changed, 91 insertions(+), 89 deletions(-)
+
+--- a/fs/btrfs/ctree.h
++++ b/fs/btrfs/ctree.h
+@@ -1415,7 +1415,7 @@ do {
+ #define BTRFS_INODE_ROOT_ITEM_INIT    (1 << 31)
+ 
+ struct btrfs_map_token {
+-      struct extent_buffer *eb;
++      const struct extent_buffer *eb;
+       char *kaddr;
+       unsigned long offset;
+ };
+@@ -1449,18 +1449,19 @@ static inline void btrfs_init_map_token
+                          sizeof(((type *)0)->member)))
+ 
+ #define DECLARE_BTRFS_SETGET_BITS(bits)                                       \
+-u##bits btrfs_get_token_##bits(struct extent_buffer *eb, void *ptr,   \
+-                             unsigned long off,                       \
+-                              struct btrfs_map_token *token);         \
+-void btrfs_set_token_##bits(struct extent_buffer *eb, void *ptr,      \
++u##bits btrfs_get_token_##bits(const struct extent_buffer *eb,                \
++                             const void *ptr, unsigned long off,      \
++                             struct btrfs_map_token *token);          \
++void btrfs_set_token_##bits(struct extent_buffer *eb, const void *ptr,        \
+                           unsigned long off, u##bits val,             \
+                           struct btrfs_map_token *token);             \
+-static inline u##bits btrfs_get_##bits(struct extent_buffer *eb, void *ptr, \
++static inline u##bits btrfs_get_##bits(const struct extent_buffer *eb,        \
++                                     const void *ptr,                 \
+                                      unsigned long off)               \
+ {                                                                     \
+       return btrfs_get_token_##bits(eb, ptr, off, NULL);              \
+ }                                                                     \
+-static inline void btrfs_set_##bits(struct extent_buffer *eb, void *ptr, \
++static inline void btrfs_set_##bits(struct extent_buffer *eb, void *ptr,\
+                                   unsigned long off, u##bits val)     \
+ {                                                                     \
+        btrfs_set_token_##bits(eb, ptr, off, val, NULL);                       \
+@@ -1472,7 +1473,8 @@ DECLARE_BTRFS_SETGET_BITS(32)
+ DECLARE_BTRFS_SETGET_BITS(64)
+ 
+ #define BTRFS_SETGET_FUNCS(name, type, member, bits)                  \
+-static inline u##bits btrfs_##name(struct extent_buffer *eb, type *s) \
++static inline u##bits btrfs_##name(const struct extent_buffer *eb,    \
++                                 const type *s)                       \
+ {                                                                     \
+       BUILD_BUG_ON(sizeof(u##bits) != sizeof(((type *)0))->member);   \
+       return btrfs_get_##bits(eb, s, offsetof(type, member));         \
+@@ -1483,7 +1485,8 @@ static inline void btrfs_set_##name(stru
+       BUILD_BUG_ON(sizeof(u##bits) != sizeof(((type *)0))->member);   \
+       btrfs_set_##bits(eb, s, offsetof(type, member), val);           \
+ }                                                                     \
+-static inline u##bits btrfs_token_##name(struct extent_buffer *eb, type *s, \
++static inline u##bits btrfs_token_##name(const struct extent_buffer *eb,\
++                                       const type *s,                 \
+                                        struct btrfs_map_token *token) \
+ {                                                                     \
+       BUILD_BUG_ON(sizeof(u##bits) != sizeof(((type *)0))->member);   \
+@@ -1498,9 +1501,9 @@ static inline void btrfs_set_token_##nam
+ }
+ 
+ #define BTRFS_SETGET_HEADER_FUNCS(name, type, member, bits)           \
+-static inline u##bits btrfs_##name(struct extent_buffer *eb)          \
++static inline u##bits btrfs_##name(const struct extent_buffer *eb)    \
+ {                                                                     \
+-      type *p = page_address(eb->pages[0]);                           \
++      const type *p = page_address(eb->pages[0]);                     \
+       u##bits res = le##bits##_to_cpu(p->member);                     \
+       return res;                                                     \
+ }                                                                     \
+@@ -1512,7 +1515,7 @@ static inline void btrfs_set_##name(stru
+ }
+ 
+ #define BTRFS_SETGET_STACK_FUNCS(name, type, member, bits)            \
+-static inline u##bits btrfs_##name(type *s)                           \
++static inline u##bits btrfs_##name(const type *s)                     \
+ {                                                                     \
+       return le##bits##_to_cpu(s->member);                            \
+ }                                                                     \
+@@ -1818,7 +1821,7 @@ static inline unsigned long btrfs_node_k
+               sizeof(struct btrfs_key_ptr) * nr;
+ }
+ 
+-void btrfs_node_key(struct extent_buffer *eb,
++void btrfs_node_key(const struct extent_buffer *eb,
+                   struct btrfs_disk_key *disk_key, int nr);
+ 
+ static inline void btrfs_set_node_key(struct extent_buffer *eb,
+@@ -1847,28 +1850,28 @@ static inline struct btrfs_item *btrfs_i
+       return (struct btrfs_item *)btrfs_item_nr_offset(nr);
+ }
+ 
+-static inline u32 btrfs_item_end(struct extent_buffer *eb,
++static inline u32 btrfs_item_end(const struct extent_buffer *eb,
+                                struct btrfs_item *item)
+ {
+       return btrfs_item_offset(eb, item) + btrfs_item_size(eb, item);
+ }
+ 
+-static inline u32 btrfs_item_end_nr(struct extent_buffer *eb, int nr)
++static inline u32 btrfs_item_end_nr(const struct extent_buffer *eb, int nr)
+ {
+       return btrfs_item_end(eb, btrfs_item_nr(nr));
+ }
+ 
+-static inline u32 btrfs_item_offset_nr(struct extent_buffer *eb, int nr)
++static inline u32 btrfs_item_offset_nr(const struct extent_buffer *eb, int nr)
+ {
+       return btrfs_item_offset(eb, btrfs_item_nr(nr));
+ }
+ 
+-static inline u32 btrfs_item_size_nr(struct extent_buffer *eb, int nr)
++static inline u32 btrfs_item_size_nr(const struct extent_buffer *eb, int nr)
+ {
+       return btrfs_item_size(eb, btrfs_item_nr(nr));
+ }
+ 
+-static inline void btrfs_item_key(struct extent_buffer *eb,
++static inline void btrfs_item_key(const struct extent_buffer *eb,
+                          struct btrfs_disk_key *disk_key, int nr)
+ {
+       struct btrfs_item *item = btrfs_item_nr(nr);
+@@ -1904,8 +1907,8 @@ BTRFS_SETGET_STACK_FUNCS(stack_dir_name_
+ BTRFS_SETGET_STACK_FUNCS(stack_dir_transid, struct btrfs_dir_item,
+                        transid, 64);
+ 
+-static inline void btrfs_dir_item_key(struct extent_buffer *eb,
+-                                    struct btrfs_dir_item *item,
++static inline void btrfs_dir_item_key(const struct extent_buffer *eb,
++                                    const struct btrfs_dir_item *item,
+                                     struct btrfs_disk_key *key)
+ {
+       read_eb_member(eb, item, struct btrfs_dir_item, location, key);
+@@ -1913,7 +1916,7 @@ static inline void btrfs_dir_item_key(st
+ 
+ static inline void btrfs_set_dir_item_key(struct extent_buffer *eb,
+                                         struct btrfs_dir_item *item,
+-                                        struct btrfs_disk_key *key)
++                                        const struct btrfs_disk_key *key)
+ {
+       write_eb_member(eb, item, struct btrfs_dir_item, location, key);
+ }
+@@ -1925,8 +1928,8 @@ BTRFS_SETGET_FUNCS(free_space_bitmaps, s
+ BTRFS_SETGET_FUNCS(free_space_generation, struct btrfs_free_space_header,
+                  generation, 64);
+ 
+-static inline void btrfs_free_space_key(struct extent_buffer *eb,
+-                                      struct btrfs_free_space_header *h,
++static inline void btrfs_free_space_key(const struct extent_buffer *eb,
++                                      const struct btrfs_free_space_header *h,
+                                       struct btrfs_disk_key *key)
+ {
+       read_eb_member(eb, h, struct btrfs_free_space_header, location, key);
+@@ -1934,7 +1937,7 @@ static inline void btrfs_free_space_key(
+ 
+ static inline void btrfs_set_free_space_key(struct extent_buffer *eb,
+                                           struct btrfs_free_space_header *h,
+-                                          struct btrfs_disk_key *key)
++                                          const struct btrfs_disk_key *key)
+ {
+       write_eb_member(eb, h, struct btrfs_free_space_header, location, key);
+ }
+@@ -1961,25 +1964,25 @@ static inline void btrfs_cpu_key_to_disk
+       disk->objectid = cpu_to_le64(cpu->objectid);
+ }
+ 
+-static inline void btrfs_node_key_to_cpu(struct extent_buffer *eb,
+-                                struct btrfs_key *key, int nr)
++static inline void btrfs_node_key_to_cpu(const struct extent_buffer *eb,
++                                       struct btrfs_key *key, int nr)
+ {
+       struct btrfs_disk_key disk_key;
+       btrfs_node_key(eb, &disk_key, nr);
+       btrfs_disk_key_to_cpu(key, &disk_key);
+ }
+ 
+-static inline void btrfs_item_key_to_cpu(struct extent_buffer *eb,
+-                                struct btrfs_key *key, int nr)
++static inline void btrfs_item_key_to_cpu(const struct extent_buffer *eb,
++                                       struct btrfs_key *key, int nr)
+ {
+       struct btrfs_disk_key disk_key;
+       btrfs_item_key(eb, &disk_key, nr);
+       btrfs_disk_key_to_cpu(key, &disk_key);
+ }
+ 
+-static inline void btrfs_dir_item_key_to_cpu(struct extent_buffer *eb,
+-                                    struct btrfs_dir_item *item,
+-                                    struct btrfs_key *key)
++static inline void btrfs_dir_item_key_to_cpu(const struct extent_buffer *eb,
++                                           const struct btrfs_dir_item *item,
++                                           struct btrfs_key *key)
+ {
+       struct btrfs_disk_key disk_key;
+       btrfs_dir_item_key(eb, item, &disk_key);
+@@ -2012,7 +2015,7 @@ BTRFS_SETGET_STACK_FUNCS(stack_header_nr
+                        nritems, 32);
+ BTRFS_SETGET_STACK_FUNCS(stack_header_bytenr, struct btrfs_header, bytenr, 64);
+ 
+-static inline int btrfs_header_flag(struct extent_buffer *eb, u64 flag)
++static inline int btrfs_header_flag(const struct extent_buffer *eb, u64 flag)
+ {
+       return (btrfs_header_flags(eb) & flag) == flag;
+ }
+@@ -2031,7 +2034,7 @@ static inline int btrfs_clear_header_fla
+       return (flags & flag) == flag;
+ }
+ 
+-static inline int btrfs_header_backref_rev(struct extent_buffer *eb)
++static inline int btrfs_header_backref_rev(const struct extent_buffer *eb)
+ {
+       u64 flags = btrfs_header_flags(eb);
+       return flags >> BTRFS_BACKREF_REV_SHIFT;
+@@ -2051,12 +2054,12 @@ static inline unsigned long btrfs_header
+       return offsetof(struct btrfs_header, fsid);
+ }
+ 
+-static inline unsigned long btrfs_header_chunk_tree_uuid(struct extent_buffer *eb)
++static inline unsigned long btrfs_header_chunk_tree_uuid(const struct extent_buffer *eb)
+ {
+       return offsetof(struct btrfs_header, chunk_tree_uuid);
+ }
+ 
+-static inline int btrfs_is_leaf(struct extent_buffer *eb)
++static inline int btrfs_is_leaf(const struct extent_buffer *eb)
+ {
+       return btrfs_header_level(eb) == 0;
+ }
+@@ -2090,12 +2093,12 @@ BTRFS_SETGET_STACK_FUNCS(root_stransid,
+ BTRFS_SETGET_STACK_FUNCS(root_rtransid, struct btrfs_root_item,
+                        rtransid, 64);
+ 
+-static inline bool btrfs_root_readonly(struct btrfs_root *root)
++static inline bool btrfs_root_readonly(const struct btrfs_root *root)
+ {
+       return (root->root_item.flags & cpu_to_le64(BTRFS_ROOT_SUBVOL_RDONLY)) != 0;
+ }
+ 
+-static inline bool btrfs_root_dead(struct btrfs_root *root)
++static inline bool btrfs_root_dead(const struct btrfs_root *root)
+ {
+       return (root->root_item.flags & cpu_to_le64(BTRFS_ROOT_SUBVOL_DEAD)) != 0;
+ }
+@@ -2152,51 +2155,51 @@ BTRFS_SETGET_STACK_FUNCS(backup_num_devi
+ /* struct btrfs_balance_item */
+ BTRFS_SETGET_FUNCS(balance_flags, struct btrfs_balance_item, flags, 64);
+ 
+-static inline void btrfs_balance_data(struct extent_buffer *eb,
+-                                    struct btrfs_balance_item *bi,
++static inline void btrfs_balance_data(const struct extent_buffer *eb,
++                                    const struct btrfs_balance_item *bi,
+                                     struct btrfs_disk_balance_args *ba)
+ {
+       read_eb_member(eb, bi, struct btrfs_balance_item, data, ba);
+ }
+ 
+ static inline void btrfs_set_balance_data(struct extent_buffer *eb,
+-                                        struct btrfs_balance_item *bi,
+-                                        struct btrfs_disk_balance_args *ba)
++                                struct btrfs_balance_item *bi,
++                                const struct btrfs_disk_balance_args *ba)
+ {
+       write_eb_member(eb, bi, struct btrfs_balance_item, data, ba);
+ }
+ 
+-static inline void btrfs_balance_meta(struct extent_buffer *eb,
+-                                    struct btrfs_balance_item *bi,
++static inline void btrfs_balance_meta(const struct extent_buffer *eb,
++                                    const struct btrfs_balance_item *bi,
+                                     struct btrfs_disk_balance_args *ba)
+ {
+       read_eb_member(eb, bi, struct btrfs_balance_item, meta, ba);
+ }
+ 
+ static inline void btrfs_set_balance_meta(struct extent_buffer *eb,
+-                                        struct btrfs_balance_item *bi,
+-                                        struct btrfs_disk_balance_args *ba)
++                                struct btrfs_balance_item *bi,
++                                const struct btrfs_disk_balance_args *ba)
+ {
+       write_eb_member(eb, bi, struct btrfs_balance_item, meta, ba);
+ }
+ 
+-static inline void btrfs_balance_sys(struct extent_buffer *eb,
+-                                   struct btrfs_balance_item *bi,
++static inline void btrfs_balance_sys(const struct extent_buffer *eb,
++                                   const struct btrfs_balance_item *bi,
+                                    struct btrfs_disk_balance_args *ba)
+ {
+       read_eb_member(eb, bi, struct btrfs_balance_item, sys, ba);
+ }
+ 
+ static inline void btrfs_set_balance_sys(struct extent_buffer *eb,
+-                                       struct btrfs_balance_item *bi,
+-                                       struct btrfs_disk_balance_args *ba)
++                               struct btrfs_balance_item *bi,
++                               const struct btrfs_disk_balance_args *ba)
+ {
+       write_eb_member(eb, bi, struct btrfs_balance_item, sys, ba);
+ }
+ 
+ static inline void
+ btrfs_disk_balance_args_to_cpu(struct btrfs_balance_args *cpu,
+-                             struct btrfs_disk_balance_args *disk)
++                             const struct btrfs_disk_balance_args *disk)
+ {
+       memset(cpu, 0, sizeof(*cpu));
+ 
+@@ -2216,7 +2219,7 @@ btrfs_disk_balance_args_to_cpu(struct bt
+ 
+ static inline void
+ btrfs_cpu_balance_args_to_disk(struct btrfs_disk_balance_args *disk,
+-                             struct btrfs_balance_args *cpu)
++                             const struct btrfs_balance_args *cpu)
+ {
+       memset(disk, 0, sizeof(*disk));
+ 
+@@ -2284,7 +2287,7 @@ BTRFS_SETGET_STACK_FUNCS(super_magic, st
+ BTRFS_SETGET_STACK_FUNCS(super_uuid_tree_generation, struct btrfs_super_block,
+                        uuid_tree_generation, 64);
+ 
+-static inline int btrfs_super_csum_size(struct btrfs_super_block *s)
++static inline int btrfs_super_csum_size(const struct btrfs_super_block *s)
+ {
+       u16 t = btrfs_super_csum_type(s);
+       /*
+@@ -2303,8 +2306,8 @@ static inline unsigned long btrfs_leaf_d
+  * this returns the address of the start of the last item,
+  * which is the stop of the leaf data stack
+  */
+-static inline unsigned int leaf_data_end(struct btrfs_root *root,
+-                                       struct extent_buffer *leaf)
++static inline unsigned int leaf_data_end(const struct btrfs_root *root,
++                                       const struct extent_buffer *leaf)
+ {
+       u32 nr = btrfs_header_nritems(leaf);
+ 
+@@ -2329,7 +2332,7 @@ BTRFS_SETGET_STACK_FUNCS(stack_file_exte
+                        struct btrfs_file_extent_item, compression, 8);
+ 
+ static inline unsigned long
+-btrfs_file_extent_inline_start(struct btrfs_file_extent_item *e)
++btrfs_file_extent_inline_start(const struct btrfs_file_extent_item *e)
+ {
+       return (unsigned long)e + BTRFS_FILE_EXTENT_INLINE_DATA_START;
+ }
+@@ -2363,8 +2366,9 @@ BTRFS_SETGET_FUNCS(file_extent_other_enc
+  * size of any extent headers.  If a file is compressed on disk, this is
+  * the compressed size
+  */
+-static inline u32 btrfs_file_extent_inline_item_len(struct extent_buffer *eb,
+-                                                  struct btrfs_item *e)
++static inline u32 btrfs_file_extent_inline_item_len(
++                                              const struct extent_buffer *eb,
++                                              struct btrfs_item *e)
+ {
+       return btrfs_item_size(eb, e) - BTRFS_FILE_EXTENT_INLINE_DATA_START;
+ }
+@@ -2372,9 +2376,9 @@ static inline u32 btrfs_file_extent_inli
+ /* this returns the number of file bytes represented by the inline item.
+  * If an item is compressed, this is the uncompressed size
+  */
+-static inline u32 btrfs_file_extent_inline_len(struct extent_buffer *eb,
+-                                             int slot,
+-                                             struct btrfs_file_extent_item *fi)
++static inline u32 btrfs_file_extent_inline_len(const struct extent_buffer *eb,
++                                      int slot,
++                                      const struct btrfs_file_extent_item *fi)
+ {
+       struct btrfs_map_token token;
+ 
+@@ -2396,8 +2400,8 @@ static inline u32 btrfs_file_extent_inli
+ 
+ 
+ /* btrfs_dev_stats_item */
+-static inline u64 btrfs_dev_stats_value(struct extent_buffer *eb,
+-                                      struct btrfs_dev_stats_item *ptr,
++static inline u64 btrfs_dev_stats_value(const struct extent_buffer *eb,
++                                      const struct btrfs_dev_stats_item *ptr,
+                                       int index)
+ {
+       u64 val;
+--- a/fs/btrfs/extent_io.c
++++ b/fs/btrfs/extent_io.c
+@@ -5442,9 +5442,8 @@ unlock_exit:
+       return ret;
+ }
+ 
+-void read_extent_buffer(struct extent_buffer *eb, void *dstv,
+-                      unsigned long start,
+-                      unsigned long len)
++void read_extent_buffer(const struct extent_buffer *eb, void *dstv,
++                      unsigned long start, unsigned long len)
+ {
+       size_t cur;
+       size_t offset;
+@@ -5473,9 +5472,9 @@ void read_extent_buffer(struct extent_bu
+       }
+ }
+ 
+-int read_extent_buffer_to_user(struct extent_buffer *eb, void __user *dstv,
+-                      unsigned long start,
+-                      unsigned long len)
++int read_extent_buffer_to_user(const struct extent_buffer *eb,
++                             void __user *dstv,
++                             unsigned long start, unsigned long len)
+ {
+       size_t cur;
+       size_t offset;
+@@ -5515,10 +5514,10 @@ int read_extent_buffer_to_user(struct ex
+  * return 1 if the item spans two pages.
+  * return -EINVAL otherwise.
+  */
+-int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start,
+-                             unsigned long min_len, char **map,
+-                             unsigned long *map_start,
+-                             unsigned long *map_len)
++int map_private_extent_buffer(const struct extent_buffer *eb,
++                            unsigned long start, unsigned long min_len,
++                            char **map, unsigned long *map_start,
++                            unsigned long *map_len)
+ {
+       size_t offset = start & (PAGE_SIZE - 1);
+       char *kaddr;
+@@ -5552,9 +5551,8 @@ int map_private_extent_buffer(struct ext
+       return 0;
+ }
+ 
+-int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv,
+-                        unsigned long start,
+-                        unsigned long len)
++int memcmp_extent_buffer(const struct extent_buffer *eb, const void *ptrv,
++                       unsigned long start, unsigned long len)
+ {
+       size_t cur;
+       size_t offset;
+--- a/fs/btrfs/extent_io.h
++++ b/fs/btrfs/extent_io.h
+@@ -396,14 +396,13 @@ static inline void extent_buffer_get(str
+       atomic_inc(&eb->refs);
+ }
+ 
+-int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv,
+-                        unsigned long start,
+-                        unsigned long len);
+-void read_extent_buffer(struct extent_buffer *eb, void *dst,
++int memcmp_extent_buffer(const struct extent_buffer *eb, const void *ptrv,
++                       unsigned long start, unsigned long len);
++void read_extent_buffer(const struct extent_buffer *eb, void *dst,
+                       unsigned long start,
+                       unsigned long len);
+-int read_extent_buffer_to_user(struct extent_buffer *eb, void __user *dst,
+-                             unsigned long start,
++int read_extent_buffer_to_user(const struct extent_buffer *eb,
++                             void __user *dst, unsigned long start,
+                              unsigned long len);
+ void write_extent_buffer(struct extent_buffer *eb, const void *src,
+                        unsigned long start, unsigned long len);
+@@ -428,10 +427,10 @@ void set_extent_buffer_uptodate(struct e
+ void clear_extent_buffer_uptodate(struct extent_buffer *eb);
+ int extent_buffer_uptodate(struct extent_buffer *eb);
+ int extent_buffer_under_io(struct extent_buffer *eb);
+-int map_private_extent_buffer(struct extent_buffer *eb, unsigned long offset,
+-                    unsigned long min_len, char **map,
+-                    unsigned long *map_start,
+-                    unsigned long *map_len);
++int map_private_extent_buffer(const struct extent_buffer *eb,
++                            unsigned long offset, unsigned long min_len,
++                            char **map, unsigned long *map_start,
++                            unsigned long *map_len);
+ void extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end);
+ void extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end);
+ void extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end,
+--- a/fs/btrfs/struct-funcs.c
++++ b/fs/btrfs/struct-funcs.c
+@@ -50,8 +50,8 @@ static inline void put_unaligned_le8(u8
+  */
+ 
+ #define DEFINE_BTRFS_SETGET_BITS(bits)                                        \
+-u##bits btrfs_get_token_##bits(struct extent_buffer *eb, void *ptr,   \
+-                             unsigned long off,                       \
++u##bits btrfs_get_token_##bits(const struct extent_buffer *eb,                \
++                             const void *ptr, unsigned long off,      \
+                              struct btrfs_map_token *token)           \
+ {                                                                     \
+       unsigned long part_offset = (unsigned long)ptr;                 \
+@@ -90,7 +90,8 @@ u##bits btrfs_get_token_##bits(struct ex
+       return res;                                                     \
+ }                                                                     \
+ void btrfs_set_token_##bits(struct extent_buffer *eb,                 \
+-                          void *ptr, unsigned long off, u##bits val,  \
++                          const void *ptr, unsigned long off,         \
++                          u##bits val,                                \
+                           struct btrfs_map_token *token)              \
+ {                                                                     \
+       unsigned long part_offset = (unsigned long)ptr;                 \
+@@ -133,7 +134,7 @@ DEFINE_BTRFS_SETGET_BITS(16)
+ DEFINE_BTRFS_SETGET_BITS(32)
+ DEFINE_BTRFS_SETGET_BITS(64)
+ 
+-void btrfs_node_key(struct extent_buffer *eb,
++void btrfs_node_key(const struct extent_buffer *eb,
+                   struct btrfs_disk_key *disk_key, int nr)
+ {
+       unsigned long ptr = btrfs_node_key_ptr_offset(nr);
diff --git a/queue-4.9/btrfs-tree-check-reduce-stack-consumption-in-check_dir_item.patch b/queue-4.9/btrfs-tree-check-reduce-stack-consumption-in-check_dir_item.patch

new file mode 100644 (file)

index 0000000..1255737
--- /dev/null
+++ b/queue-4.9/btrfs-tree-check-reduce-stack-consumption-in-check_dir_item.patch
@@ -0,0 +1,49 @@
+From foo@baz Thu Dec  6 15:27:31 CET 2018
+From: David Sterba <dsterba@suse.com>
+Date: Wed, 10 Jan 2018 15:13:07 +0100
+Subject: btrfs: tree-check: reduce stack consumption in check_dir_item
+
+From: David Sterba <dsterba@suse.com>
+
+commit e2683fc9d219430f5b78889b50cde7f40efeba7b upstream.
+
+I've noticed that the updated item checker stack consumption increased
+dramatically in 542f5385e20cf97447 ("btrfs: tree-checker: Add checker
+for dir item")
+
+tree-checker.c:check_leaf                    +552 (176 -> 728)
+
+The array is 255 bytes long, dynamic allocation would slow down the
+sanity checks so it's more reasonable to keep it on-stack. Moving the
+variable to the scope of use reduces the stack usage again
+
+tree-checker.c:check_leaf                    -264 (728 -> 464)
+
+Reviewed-by: Josef Bacik <jbacik@fb.com>
+Reviewed-by: Qu Wenruo <wqu@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/tree-checker.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/fs/btrfs/tree-checker.c
++++ b/fs/btrfs/tree-checker.c
+@@ -212,7 +212,6 @@ static int check_dir_item(struct btrfs_r
+ 
+       di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item);
+       while (cur < item_size) {
+-              char namebuf[max(BTRFS_NAME_LEN, XATTR_NAME_MAX)];
+               u32 name_len;
+               u32 data_len;
+               u32 max_name_len;
+@@ -295,6 +294,8 @@ static int check_dir_item(struct btrfs_r
+                */
+               if (key->type == BTRFS_DIR_ITEM_KEY ||
+                   key->type == BTRFS_XATTR_ITEM_KEY) {
++                      char namebuf[max(BTRFS_NAME_LEN, XATTR_NAME_MAX)];
++
+                       read_extent_buffer(leaf, namebuf,
+                                       (unsigned long)(di + 1), name_len);
+                       name_hash = btrfs_name_hash(namebuf, name_len);
diff --git a/queue-4.9/btrfs-tree-checker-add-checker-for-dir-item.patch b/queue-4.9/btrfs-tree-checker-add-checker-for-dir-item.patch

new file mode 100644 (file)

index 0000000..d91ab59
--- /dev/null
+++ b/queue-4.9/btrfs-tree-checker-add-checker-for-dir-item.patch
@@ -0,0 +1,208 @@
+From foo@baz Thu Dec  6 15:27:31 CET 2018
+From: Qu Wenruo <wqu@suse.com>
+Date: Wed, 8 Nov 2017 08:54:25 +0800
+Subject: btrfs: tree-checker: Add checker for dir item
+
+From: Qu Wenruo <wqu@suse.com>
+
+commit ad7b0368f33cffe67fecd302028915926e50ef7e upstream.
+
+Add checker for dir item, for key types DIR_ITEM, DIR_INDEX and
+XATTR_ITEM.
+
+This checker does comprehensive checks for:
+
+1) dir_item header and its data size
+   Against item boundary and maximum name/xattr length.
+   This part is mostly the same as old verify_dir_item().
+
+2) dir_type
+   Against maximum file types, and against key type.
+   Since XATTR key should only have FT_XATTR dir item, and normal dir
+   item type should not have XATTR key.
+
+   The check between key->type and dir_type is newly introduced by this
+   patch.
+
+3) name hash
+   For XATTR and DIR_ITEM key, key->offset is name hash (crc32c).
+   Check the hash of the name against the key to ensure it's correct.
+
+   The name hash check is only found in btrfs-progs before this patch.
+
+Signed-off-by: Qu Wenruo <wqu@suse.com>
+Reviewed-by: Nikolay Borisov <nborisov@suse.com>
+Reviewed-by: Su Yue <suy.fnst@cn.fujitsu.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+[bwh: Backported to 4.9: BTRFS_MAX_XATTR_SIZE() takes a root not an fs_info]
+Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/tree-checker.c |  141 ++++++++++++++++++++++++++++++++++++++++++++++++
+ 1 file changed, 141 insertions(+)
+
+--- a/fs/btrfs/tree-checker.c
++++ b/fs/btrfs/tree-checker.c
+@@ -30,6 +30,7 @@
+ #include "tree-checker.h"
+ #include "disk-io.h"
+ #include "compression.h"
++#include "hash.h"
+ 
+ #define CORRUPT(reason, eb, root, slot)                                       \
+       btrfs_crit(root->fs_info,                                       \
+@@ -176,6 +177,141 @@ static int check_csum_item(struct btrfs_
+ }
+ 
+ /*
++ * Customized reported for dir_item, only important new info is key->objectid,
++ * which represents inode number
++ */
++__printf(4, 5)
++static void dir_item_err(const struct btrfs_root *root,
++                       const struct extent_buffer *eb, int slot,
++                       const char *fmt, ...)
++{
++      struct btrfs_key key;
++      struct va_format vaf;
++      va_list args;
++
++      btrfs_item_key_to_cpu(eb, &key, slot);
++      va_start(args, fmt);
++
++      vaf.fmt = fmt;
++      vaf.va = &args;
++
++      btrfs_crit(root->fs_info,
++      "corrupt %s: root=%llu block=%llu slot=%d ino=%llu, %pV",
++              btrfs_header_level(eb) == 0 ? "leaf" : "node", root->objectid,
++              btrfs_header_bytenr(eb), slot, key.objectid, &vaf);
++      va_end(args);
++}
++
++static int check_dir_item(struct btrfs_root *root,
++                        struct extent_buffer *leaf,
++                        struct btrfs_key *key, int slot)
++{
++      struct btrfs_dir_item *di;
++      u32 item_size = btrfs_item_size_nr(leaf, slot);
++      u32 cur = 0;
++
++      di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item);
++      while (cur < item_size) {
++              char namebuf[max(BTRFS_NAME_LEN, XATTR_NAME_MAX)];
++              u32 name_len;
++              u32 data_len;
++              u32 max_name_len;
++              u32 total_size;
++              u32 name_hash;
++              u8 dir_type;
++
++              /* header itself should not cross item boundary */
++              if (cur + sizeof(*di) > item_size) {
++                      dir_item_err(root, leaf, slot,
++              "dir item header crosses item boundary, have %lu boundary %u",
++                              cur + sizeof(*di), item_size);
++                      return -EUCLEAN;
++              }
++
++              /* dir type check */
++              dir_type = btrfs_dir_type(leaf, di);
++              if (dir_type >= BTRFS_FT_MAX) {
++                      dir_item_err(root, leaf, slot,
++                      "invalid dir item type, have %u expect [0, %u)",
++                              dir_type, BTRFS_FT_MAX);
++                      return -EUCLEAN;
++              }
++
++              if (key->type == BTRFS_XATTR_ITEM_KEY &&
++                  dir_type != BTRFS_FT_XATTR) {
++                      dir_item_err(root, leaf, slot,
++              "invalid dir item type for XATTR key, have %u expect %u",
++                              dir_type, BTRFS_FT_XATTR);
++                      return -EUCLEAN;
++              }
++              if (dir_type == BTRFS_FT_XATTR &&
++                  key->type != BTRFS_XATTR_ITEM_KEY) {
++                      dir_item_err(root, leaf, slot,
++                      "xattr dir type found for non-XATTR key");
++                      return -EUCLEAN;
++              }
++              if (dir_type == BTRFS_FT_XATTR)
++                      max_name_len = XATTR_NAME_MAX;
++              else
++                      max_name_len = BTRFS_NAME_LEN;
++
++              /* Name/data length check */
++              name_len = btrfs_dir_name_len(leaf, di);
++              data_len = btrfs_dir_data_len(leaf, di);
++              if (name_len > max_name_len) {
++                      dir_item_err(root, leaf, slot,
++                      "dir item name len too long, have %u max %u",
++                              name_len, max_name_len);
++                      return -EUCLEAN;
++              }
++              if (name_len + data_len > BTRFS_MAX_XATTR_SIZE(root)) {
++                      dir_item_err(root, leaf, slot,
++                      "dir item name and data len too long, have %u max %u",
++                              name_len + data_len,
++                              BTRFS_MAX_XATTR_SIZE(root));
++                      return -EUCLEAN;
++              }
++
++              if (data_len && dir_type != BTRFS_FT_XATTR) {
++                      dir_item_err(root, leaf, slot,
++                      "dir item with invalid data len, have %u expect 0",
++                              data_len);
++                      return -EUCLEAN;
++              }
++
++              total_size = sizeof(*di) + name_len + data_len;
++
++              /* header and name/data should not cross item boundary */
++              if (cur + total_size > item_size) {
++                      dir_item_err(root, leaf, slot,
++              "dir item data crosses item boundary, have %u boundary %u",
++                              cur + total_size, item_size);
++                      return -EUCLEAN;
++              }
++
++              /*
++               * Special check for XATTR/DIR_ITEM, as key->offset is name
++               * hash, should match its name
++               */
++              if (key->type == BTRFS_DIR_ITEM_KEY ||
++                  key->type == BTRFS_XATTR_ITEM_KEY) {
++                      read_extent_buffer(leaf, namebuf,
++                                      (unsigned long)(di + 1), name_len);
++                      name_hash = btrfs_name_hash(namebuf, name_len);
++                      if (key->offset != name_hash) {
++                              dir_item_err(root, leaf, slot,
++              "name hash mismatch with key, have 0x%016x expect 0x%016llx",
++                                      name_hash, key->offset);
++                              return -EUCLEAN;
++                      }
++              }
++              cur += total_size;
++              di = (struct btrfs_dir_item *)((void *)di + total_size);
++      }
++      return 0;
++}
++
++/*
+  * Common point to switch the item-specific validation.
+  */
+ static int check_leaf_item(struct btrfs_root *root,
+@@ -191,6 +327,11 @@ static int check_leaf_item(struct btrfs_
+       case BTRFS_EXTENT_CSUM_KEY:
+               ret = check_csum_item(root, leaf, key, slot);
+               break;
++      case BTRFS_DIR_ITEM_KEY:
++      case BTRFS_DIR_INDEX_KEY:
++      case BTRFS_XATTR_ITEM_KEY:
++              ret = check_dir_item(root, leaf, key, slot);
++              break;
+       }
+       return ret;
+ }
diff --git a/queue-4.9/btrfs-tree-checker-check-level-for-leaves-and-nodes.patch b/queue-4.9/btrfs-tree-checker-check-level-for-leaves-and-nodes.patch

new file mode 100644 (file)

index 0000000..8afce33
--- /dev/null
+++ b/queue-4.9/btrfs-tree-checker-check-level-for-leaves-and-nodes.patch
@@ -0,0 +1,63 @@
+From foo@baz Thu Dec  6 15:27:31 CET 2018
+From: Qu Wenruo <wqu@suse.com>
+Date: Fri, 28 Sep 2018 07:59:34 +0800
+Subject: btrfs: tree-checker: Check level for leaves and nodes
+
+From: Qu Wenruo <wqu@suse.com>
+
+commit f556faa46eb4e96d0d0772e74ecf66781e132f72 upstream.
+
+Although we have tree level check at tree read runtime, it's completely
+based on its parent level.
+We still need to do accurate level check to avoid invalid tree blocks
+sneak into kernel space.
+
+The check itself is simple, for leaf its level should always be 0.
+For nodes its level should be in range [1, BTRFS_MAX_LEVEL - 1].
+
+Signed-off-by: Qu Wenruo <wqu@suse.com>
+Reviewed-by: Su Yue <suy.fnst@cn.fujitsu.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+[bwh: Backported to 4.9:
+ - Pass root instead of fs_info to generic_err()
+ - Adjust context]
+Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/tree-checker.c |   14 ++++++++++++++
+ 1 file changed, 14 insertions(+)
+
+--- a/fs/btrfs/tree-checker.c
++++ b/fs/btrfs/tree-checker.c
+@@ -447,6 +447,13 @@ static int check_leaf(struct btrfs_root
+       u32 nritems = btrfs_header_nritems(leaf);
+       int slot;
+ 
++      if (btrfs_header_level(leaf) != 0) {
++              generic_err(root, leaf, 0,
++                      "invalid level for leaf, have %d expect 0",
++                      btrfs_header_level(leaf));
++              return -EUCLEAN;
++      }
++
+       /*
+        * Extent buffers from a relocation tree have a owner field that
+        * corresponds to the subvolume tree they are based on. So just from an
+@@ -589,9 +596,16 @@ int btrfs_check_node(struct btrfs_root *
+       unsigned long nr = btrfs_header_nritems(node);
+       struct btrfs_key key, next_key;
+       int slot;
++      int level = btrfs_header_level(node);
+       u64 bytenr;
+       int ret = 0;
+ 
++      if (level <= 0 || level >= BTRFS_MAX_LEVEL) {
++              generic_err(root, node, 0,
++                      "invalid level for node, have %d expect [1, %d]",
++                      level, BTRFS_MAX_LEVEL - 1);
++              return -EUCLEAN;
++      }
+       if (nr == 0 || nr > BTRFS_NODEPTRS_PER_BLOCK(root)) {
+               btrfs_crit(root->fs_info,
+ "corrupt node: root=%llu block=%llu, nritems too %s, have %lu expect range [1,%u]",
diff --git a/queue-4.9/btrfs-tree-checker-detect-invalid-and-empty-essential-trees.patch b/queue-4.9/btrfs-tree-checker-detect-invalid-and-empty-essential-trees.patch

new file mode 100644 (file)

index 0000000..e8cd3aa
--- /dev/null
+++ b/queue-4.9/btrfs-tree-checker-detect-invalid-and-empty-essential-trees.patch
@@ -0,0 +1,71 @@
+From foo@baz Thu Dec  6 15:27:31 CET 2018
+From: Qu Wenruo <wqu@suse.com>
+Date: Tue, 3 Jul 2018 17:10:06 +0800
+Subject: btrfs: tree-checker: Detect invalid and empty essential trees
+
+From: Qu Wenruo <wqu@suse.com>
+
+commit ba480dd4db9f1798541eb2d1c423fc95feee8d36 upstream.
+
+A crafted image has empty root tree block, which will later cause NULL
+pointer dereference.
+
+The following trees should never be empty:
+1) Tree root
+   Must contain at least root items for extent tree, device tree and fs
+   tree
+
+2) Chunk tree
+   Or we can't even bootstrap as it contains the mapping.
+
+3) Fs tree
+   At least inode item for top level inode (.).
+
+4) Device tree
+   Dev extents for chunks
+
+5) Extent tree
+   Must have corresponding extent for each chunk.
+
+If any of them is empty, we are sure the fs is corrupted and no need to
+mount it.
+
+Link: https://bugzilla.kernel.org/show_bug.cgi?id=199847
+Reported-by: Xu Wen <wen.xu@gatech.edu>
+Signed-off-by: Qu Wenruo <wqu@suse.com>
+Tested-by: Gu Jinxiang <gujx@cn.fujitsu.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+[bwh: Backported to 4.9: Pass root instead of fs_info to generic_err()]
+Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/tree-checker.c |   15 ++++++++++++++-
+ 1 file changed, 14 insertions(+), 1 deletion(-)
+
+--- a/fs/btrfs/tree-checker.c
++++ b/fs/btrfs/tree-checker.c
+@@ -456,9 +456,22 @@ static int check_leaf(struct btrfs_root
+        * skip this check for relocation trees.
+        */
+       if (nritems == 0 && !btrfs_header_flag(leaf, BTRFS_HEADER_FLAG_RELOC)) {
++              u64 owner = btrfs_header_owner(leaf);
+               struct btrfs_root *check_root;
+ 
+-              key.objectid = btrfs_header_owner(leaf);
++              /* These trees must never be empty */
++              if (owner == BTRFS_ROOT_TREE_OBJECTID ||
++                  owner == BTRFS_CHUNK_TREE_OBJECTID ||
++                  owner == BTRFS_EXTENT_TREE_OBJECTID ||
++                  owner == BTRFS_DEV_TREE_OBJECTID ||
++                  owner == BTRFS_FS_TREE_OBJECTID ||
++                  owner == BTRFS_DATA_RELOC_TREE_OBJECTID) {
++                      generic_err(root, leaf, 0,
++                      "invalid root, root %llu must never be empty",
++                                  owner);
++                      return -EUCLEAN;
++              }
++              key.objectid = owner;
+               key.type = BTRFS_ROOT_ITEM_KEY;
+               key.offset = (u64)-1;
+ 
diff --git a/queue-4.9/btrfs-tree-checker-enhance-btrfs_check_node-output.patch b/queue-4.9/btrfs-tree-checker-enhance-btrfs_check_node-output.patch

new file mode 100644 (file)

index 0000000..8a6c3db
--- /dev/null
+++ b/queue-4.9/btrfs-tree-checker-enhance-btrfs_check_node-output.patch
@@ -0,0 +1,130 @@
+From foo@baz Thu Dec  6 15:27:31 CET 2018
+From: Qu Wenruo <quwenruo.btrfs@gmx.com>
+Date: Mon, 9 Oct 2017 01:51:03 +0000
+Subject: btrfs: tree-checker: Enhance btrfs_check_node output
+
+From: Qu Wenruo <quwenruo.btrfs@gmx.com>
+
+commit bba4f29896c986c4cec17bc0f19f2ce644fceae1 upstream.
+
+Use inline function to replace macro since we don't need
+stringification.
+(Macro still exists until all callers get updated)
+
+And add more info about the error, and replace EIO with EUCLEAN.
+
+For nr_items error, report if it's too large or too small, and output
+the valid value range.
+
+For node block pointer, added a new alignment checker.
+
+For key order, also output the next key to make the problem more
+obvious.
+
+Signed-off-by: Qu Wenruo <quwenruo.btrfs@gmx.com>
+[ wording adjustments, unindented long strings ]
+Signed-off-by: David Sterba <dsterba@suse.com>
+[bwh: Backported to 4.9:
+ - Use root->sectorsize instead of root->fs_info->sectorsize
+ - BTRFS_NODEPTRS_PER_BLOCK() takes a root instead of an fs_info]
+Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/tree-checker.c |   68 +++++++++++++++++++++++++++++++++++++++++++-----
+ 1 file changed, 61 insertions(+), 7 deletions(-)
+
+--- a/fs/btrfs/tree-checker.c
++++ b/fs/btrfs/tree-checker.c
+@@ -37,6 +37,46 @@
+                  btrfs_header_level(eb) == 0 ? "leaf" : "node",       \
+                  reason, btrfs_header_bytenr(eb), root->objectid, slot)
+ 
++/*
++ * Error message should follow the following format:
++ * corrupt <type>: <identifier>, <reason>[, <bad_value>]
++ *
++ * @type:     leaf or node
++ * @identifier:       the necessary info to locate the leaf/node.
++ *            It's recommened to decode key.objecitd/offset if it's
++ *            meaningful.
++ * @reason:   describe the error
++ * @bad_value:        optional, it's recommened to output bad value and its
++ *            expected value (range).
++ *
++ * Since comma is used to separate the components, only space is allowed
++ * inside each component.
++ */
++
++/*
++ * Append generic "corrupt leaf/node root=%llu block=%llu slot=%d: " to @fmt.
++ * Allows callers to customize the output.
++ */
++__printf(4, 5)
++static void generic_err(const struct btrfs_root *root,
++                      const struct extent_buffer *eb, int slot,
++                      const char *fmt, ...)
++{
++      struct va_format vaf;
++      va_list args;
++
++      va_start(args, fmt);
++
++      vaf.fmt = fmt;
++      vaf.va = &args;
++
++      btrfs_crit(root->fs_info,
++              "corrupt %s: root=%llu block=%llu slot=%d, %pV",
++              btrfs_header_level(eb) == 0 ? "leaf" : "node",
++              root->objectid, btrfs_header_bytenr(eb), slot, &vaf);
++      va_end(args);
++}
++
+ static int check_extent_data_item(struct btrfs_root *root,
+                                 struct extent_buffer *leaf,
+                                 struct btrfs_key *key, int slot)
+@@ -282,9 +322,11 @@ int btrfs_check_node(struct btrfs_root *
+ 
+       if (nr == 0 || nr > BTRFS_NODEPTRS_PER_BLOCK(root)) {
+               btrfs_crit(root->fs_info,
+-                         "corrupt node: block %llu root %llu nritems %lu",
+-                         node->start, root->objectid, nr);
+-              return -EIO;
++"corrupt node: root=%llu block=%llu, nritems too %s, have %lu expect range [1,%u]",
++                         root->objectid, node->start,
++                         nr == 0 ? "small" : "large", nr,
++                         BTRFS_NODEPTRS_PER_BLOCK(root));
++              return -EUCLEAN;
+       }
+ 
+       for (slot = 0; slot < nr - 1; slot++) {
+@@ -293,14 +335,26 @@ int btrfs_check_node(struct btrfs_root *
+               btrfs_node_key_to_cpu(node, &next_key, slot + 1);
+ 
+               if (!bytenr) {
+-                      CORRUPT("invalid item slot", node, root, slot);
+-                      ret = -EIO;
++                      generic_err(root, node, slot,
++                              "invalid NULL node pointer");
++                      ret = -EUCLEAN;
++                      goto out;
++              }
++              if (!IS_ALIGNED(bytenr, root->sectorsize)) {
++                      generic_err(root, node, slot,
++                      "unaligned pointer, have %llu should be aligned to %u",
++                              bytenr, root->sectorsize);
++                      ret = -EUCLEAN;
+                       goto out;
+               }
+ 
+               if (btrfs_comp_cpu_keys(&key, &next_key) >= 0) {
+-                      CORRUPT("bad key order", node, root, slot);
+-                      ret = -EIO;
++                      generic_err(root, node, slot,
++      "bad key order, current (%llu %u %llu) next (%llu %u %llu)",
++                              key.objectid, key.type, key.offset,
++                              next_key.objectid, next_key.type,
++                              next_key.offset);
++                      ret = -EUCLEAN;
+                       goto out;
+               }
+       }
diff --git a/queue-4.9/btrfs-tree-checker-fix-false-panic-for-sanity-test.patch b/queue-4.9/btrfs-tree-checker-fix-false-panic-for-sanity-test.patch

new file mode 100644 (file)

index 0000000..c5c0f58
--- /dev/null
+++ b/queue-4.9/btrfs-tree-checker-fix-false-panic-for-sanity-test.patch
@@ -0,0 +1,162 @@
+From foo@baz Thu Dec  6 15:27:31 CET 2018
+From: Qu Wenruo <wqu@suse.com>
+Date: Wed, 8 Nov 2017 08:54:24 +0800
+Subject: btrfs: tree-checker: Fix false panic for sanity test
+
+From: Qu Wenruo <wqu@suse.com>
+
+commit 69fc6cbbac542c349b3d350d10f6e394c253c81d upstream.
+
+[BUG]
+If we run btrfs with CONFIG_BTRFS_FS_RUN_SANITY_TESTS=y, it will
+instantly cause kernel panic like:
+
+------
+...
+assertion failed: 0, file: fs/btrfs/disk-io.c, line: 3853
+...
+Call Trace:
+ btrfs_mark_buffer_dirty+0x187/0x1f0 [btrfs]
+ setup_items_for_insert+0x385/0x650 [btrfs]
+ __btrfs_drop_extents+0x129a/0x1870 [btrfs]
+...
+-----
+
+[Cause]
+Btrfs will call btrfs_check_leaf() in btrfs_mark_buffer_dirty() to check
+if the leaf is valid with CONFIG_BTRFS_FS_RUN_SANITY_TESTS=y.
+
+However quite some btrfs_mark_buffer_dirty() callers(*) don't really
+initialize its item data but only initialize its item pointers, leaving
+item data uninitialized.
+
+This makes tree-checker catch uninitialized data as error, causing
+such panic.
+
+*: These callers include but not limited to
+setup_items_for_insert()
+btrfs_split_item()
+btrfs_expand_item()
+
+[Fix]
+Add a new parameter @check_item_data to btrfs_check_leaf().
+With @check_item_data set to false, item data check will be skipped and
+fallback to old btrfs_check_leaf() behavior.
+
+So we can still get early warning if we screw up item pointers, and
+avoid false panic.
+
+Cc: Filipe Manana <fdmanana@gmail.com>
+Reported-by: Lakshmipathi.G <lakshmipathi.g@gmail.com>
+Signed-off-by: Qu Wenruo <wqu@suse.com>
+Reviewed-by: Liu Bo <bo.li.liu@oracle.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+[bwh: Backported to 4.9: adjust context]
+Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/disk-io.c      |   10 ++++++++--
+ fs/btrfs/tree-checker.c |   27 ++++++++++++++++++++++-----
+ fs/btrfs/tree-checker.h |   14 +++++++++++++-
+ 3 files changed, 43 insertions(+), 8 deletions(-)
+
+--- a/fs/btrfs/disk-io.c
++++ b/fs/btrfs/disk-io.c
+@@ -604,7 +604,7 @@ static int btree_readpage_end_io_hook(st
+        * that we don't try and read the other copies of this block, just
+        * return -EIO.
+        */
+-      if (found_level == 0 && btrfs_check_leaf(root, eb)) {
++      if (found_level == 0 && btrfs_check_leaf_full(root, eb)) {
+               set_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags);
+               ret = -EIO;
+       }
+@@ -3940,7 +3940,13 @@ void btrfs_mark_buffer_dirty(struct exte
+                                    buf->len,
+                                    root->fs_info->dirty_metadata_batch);
+ #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
+-      if (btrfs_header_level(buf) == 0 && btrfs_check_leaf(root, buf)) {
++      /*
++       * Since btrfs_mark_buffer_dirty() can be called with item pointer set
++       * but item data not updated.
++       * So here we should only check item pointers, not item data.
++       */
++      if (btrfs_header_level(buf) == 0 &&
++          btrfs_check_leaf_relaxed(root, buf)) {
+               btrfs_print_leaf(root, buf);
+               ASSERT(0);
+       }
+--- a/fs/btrfs/tree-checker.c
++++ b/fs/btrfs/tree-checker.c
+@@ -195,7 +195,8 @@ static int check_leaf_item(struct btrfs_
+       return ret;
+ }
+ 
+-int btrfs_check_leaf(struct btrfs_root *root, struct extent_buffer *leaf)
++static int check_leaf(struct btrfs_root *root, struct extent_buffer *leaf,
++                    bool check_item_data)
+ {
+       struct btrfs_fs_info *fs_info = root->fs_info;
+       /* No valid key type is 0, so all key should be larger than this key */
+@@ -299,10 +300,15 @@ int btrfs_check_leaf(struct btrfs_root *
+                       return -EUCLEAN;
+               }
+ 
+-              /* Check if the item size and content meet other criteria */
+-              ret = check_leaf_item(root, leaf, &key, slot);
+-              if (ret < 0)
+-                      return ret;
++              if (check_item_data) {
++                      /*
++                       * Check if the item size and content meet other
++                       * criteria
++                       */
++                      ret = check_leaf_item(root, leaf, &key, slot);
++                      if (ret < 0)
++                              return ret;
++              }
+ 
+               prev_key.objectid = key.objectid;
+               prev_key.type = key.type;
+@@ -312,6 +318,17 @@ int btrfs_check_leaf(struct btrfs_root *
+       return 0;
+ }
+ 
++int btrfs_check_leaf_full(struct btrfs_root *root, struct extent_buffer *leaf)
++{
++      return check_leaf(root, leaf, true);
++}
++
++int btrfs_check_leaf_relaxed(struct btrfs_root *root,
++                           struct extent_buffer *leaf)
++{
++      return check_leaf(root, leaf, false);
++}
++
+ int btrfs_check_node(struct btrfs_root *root, struct extent_buffer *node)
+ {
+       unsigned long nr = btrfs_header_nritems(node);
+--- a/fs/btrfs/tree-checker.h
++++ b/fs/btrfs/tree-checker.h
+@@ -20,7 +20,19 @@
+ #include "ctree.h"
+ #include "extent_io.h"
+ 
+-int btrfs_check_leaf(struct btrfs_root *root, struct extent_buffer *leaf);
++/*
++ * Comprehensive leaf checker.
++ * Will check not only the item pointers, but also every possible member
++ * in item data.
++ */
++int btrfs_check_leaf_full(struct btrfs_root *root, struct extent_buffer *leaf);
++
++/*
++ * Less strict leaf checker.
++ * Will only check item pointers, not reading item data.
++ */
++int btrfs_check_leaf_relaxed(struct btrfs_root *root,
++                           struct extent_buffer *leaf);
+ int btrfs_check_node(struct btrfs_root *root, struct extent_buffer *node);
+ 
+ #endif
diff --git a/queue-4.9/btrfs-tree-checker-fix-misleading-group-system-information.patch b/queue-4.9/btrfs-tree-checker-fix-misleading-group-system-information.patch

new file mode 100644 (file)

index 0000000..4e2ce6e
--- /dev/null
+++ b/queue-4.9/btrfs-tree-checker-fix-misleading-group-system-information.patch
@@ -0,0 +1,37 @@
+From foo@baz Thu Dec  6 15:27:31 CET 2018
+From: Shaokun Zhang <zhangshaokun@hisilicon.com>
+Date: Mon, 5 Nov 2018 18:49:09 +0800
+Subject: btrfs: tree-checker: Fix misleading group system information
+
+From: Shaokun Zhang <zhangshaokun@hisilicon.com>
+
+commit 761333f2f50ccc887aa9957ae829300262c0d15b upstream.
+
+block_group_err shows the group system as a decimal value with a '0x'
+prefix, which is somewhat misleading.
+
+Fix it to print hexadecimal, as was intended.
+
+Fixes: fce466eab7ac6 ("btrfs: tree-checker: Verify block_group_item")
+Reviewed-by: Nikolay Borisov <nborisov@suse.com>
+Reviewed-by: Qu Wenruo <wqu@suse.com>
+Signed-off-by: Shaokun Zhang <zhangshaokun@hisilicon.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/tree-checker.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/btrfs/tree-checker.c
++++ b/fs/btrfs/tree-checker.c
+@@ -399,7 +399,7 @@ static int check_block_group_item(struct
+           type != (BTRFS_BLOCK_GROUP_METADATA |
+                          BTRFS_BLOCK_GROUP_DATA)) {
+               block_group_err(fs_info, leaf, slot,
+-"invalid type, have 0x%llx (%lu bits set) expect either 0x%llx, 0x%llx, 0x%llu or 0x%llx",
++"invalid type, have 0x%llx (%lu bits set) expect either 0x%llx, 0x%llx, 0x%llx or 0x%llx",
+                       type, hweight64(type),
+                       BTRFS_BLOCK_GROUP_DATA, BTRFS_BLOCK_GROUP_METADATA,
+                       BTRFS_BLOCK_GROUP_SYSTEM,
diff --git a/queue-4.9/btrfs-tree-checker-use-zu-format-string-for-size_t.patch b/queue-4.9/btrfs-tree-checker-use-zu-format-string-for-size_t.patch

new file mode 100644 (file)

index 0000000..d9692db
--- /dev/null
+++ b/queue-4.9/btrfs-tree-checker-use-zu-format-string-for-size_t.patch
@@ -0,0 +1,37 @@
+From foo@baz Thu Dec  6 15:27:31 CET 2018
+From: Arnd Bergmann <arnd@arndb.de>
+Date: Wed, 6 Dec 2017 15:18:14 +0100
+Subject: btrfs: tree-checker: use %zu format string for size_t
+
+From: Arnd Bergmann <arnd@arndb.de>
+
+commit 7cfad65297bfe0aa2996cd72d21c898aa84436d9 upstream.
+
+The return value of sizeof() is of type size_t, so we must print it
+using the %z format modifier rather than %l to avoid this warning
+on some architectures:
+
+fs/btrfs/tree-checker.c: In function 'check_dir_item':
+fs/btrfs/tree-checker.c:273:50: error: format '%lu' expects argument of type 'long unsigned int', but argument 5 has type 'u32' {aka 'unsigned int'} [-Werror=format=]
+
+Fixes: 005887f2e3e0 ("btrfs: tree-checker: Add checker for dir item")
+Signed-off-by: Arnd Bergmann <arnd@arndb.de>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/tree-checker.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/btrfs/tree-checker.c
++++ b/fs/btrfs/tree-checker.c
+@@ -223,7 +223,7 @@ static int check_dir_item(struct btrfs_r
+               /* header itself should not cross item boundary */
+               if (cur + sizeof(*di) > item_size) {
+                       dir_item_err(root, leaf, slot,
+-              "dir item header crosses item boundary, have %lu boundary %u",
++              "dir item header crosses item boundary, have %zu boundary %u",
+                               cur + sizeof(*di), item_size);
+                       return -EUCLEAN;
+               }
diff --git a/queue-4.9/btrfs-tree-checker-verify-block_group_item.patch b/queue-4.9/btrfs-tree-checker-verify-block_group_item.patch

new file mode 100644 (file)

index 0000000..43bea25
--- /dev/null
+++ b/queue-4.9/btrfs-tree-checker-verify-block_group_item.patch
@@ -0,0 +1,191 @@
+From foo@baz Thu Dec  6 15:27:31 CET 2018
+From: Qu Wenruo <wqu@suse.com>
+Date: Tue, 3 Jul 2018 17:10:05 +0800
+Subject: btrfs: tree-checker: Verify block_group_item
+
+From: Qu Wenruo <wqu@suse.com>
+
+commit fce466eab7ac6baa9d2dcd88abcf945be3d4a089 upstream.
+
+A crafted image with invalid block group items could make free space cache
+code to cause panic.
+
+We could detect such invalid block group item by checking:
+1) Item size
+   Known fixed value.
+2) Block group size (key.offset)
+   We have an upper limit on block group item (10G)
+3) Chunk objectid
+   Known fixed value.
+4) Type
+   Only 4 valid type values, DATA, METADATA, SYSTEM and DATA|METADATA.
+   No more than 1 bit set for profile type.
+5) Used space
+   No more than the block group size.
+
+This should allow btrfs to detect and refuse to mount the crafted image.
+
+Link: https://bugzilla.kernel.org/show_bug.cgi?id=199849
+Reported-by: Xu Wen <wen.xu@gatech.edu>
+Signed-off-by: Qu Wenruo <wqu@suse.com>
+Reviewed-by: Gu Jinxiang <gujx@cn.fujitsu.com>
+Reviewed-by: Nikolay Borisov <nborisov@suse.com>
+Tested-by: Gu Jinxiang <gujx@cn.fujitsu.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+[bwh: Backported to 4.9:
+ - In check_leaf_item(), pass root->fs_info to check_block_group_item()
+ - Adjust context]
+Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/tree-checker.c |  100 ++++++++++++++++++++++++++++++++++++++++++++++++
+ fs/btrfs/volumes.c      |    2 
+ fs/btrfs/volumes.h      |    2 
+ 3 files changed, 103 insertions(+), 1 deletion(-)
+
+--- a/fs/btrfs/tree-checker.c
++++ b/fs/btrfs/tree-checker.c
+@@ -31,6 +31,7 @@
+ #include "disk-io.h"
+ #include "compression.h"
+ #include "hash.h"
++#include "volumes.h"
+ 
+ #define CORRUPT(reason, eb, root, slot)                                       \
+       btrfs_crit(root->fs_info,                                       \
+@@ -312,6 +313,102 @@ static int check_dir_item(struct btrfs_r
+       return 0;
+ }
+ 
++__printf(4, 5)
++__cold
++static void block_group_err(const struct btrfs_fs_info *fs_info,
++                          const struct extent_buffer *eb, int slot,
++                          const char *fmt, ...)
++{
++      struct btrfs_key key;
++      struct va_format vaf;
++      va_list args;
++
++      btrfs_item_key_to_cpu(eb, &key, slot);
++      va_start(args, fmt);
++
++      vaf.fmt = fmt;
++      vaf.va = &args;
++
++      btrfs_crit(fs_info,
++      "corrupt %s: root=%llu block=%llu slot=%d bg_start=%llu bg_len=%llu, %pV",
++              btrfs_header_level(eb) == 0 ? "leaf" : "node",
++              btrfs_header_owner(eb), btrfs_header_bytenr(eb), slot,
++              key.objectid, key.offset, &vaf);
++      va_end(args);
++}
++
++static int check_block_group_item(struct btrfs_fs_info *fs_info,
++                                struct extent_buffer *leaf,
++                                struct btrfs_key *key, int slot)
++{
++      struct btrfs_block_group_item bgi;
++      u32 item_size = btrfs_item_size_nr(leaf, slot);
++      u64 flags;
++      u64 type;
++
++      /*
++       * Here we don't really care about alignment since extent allocator can
++       * handle it.  We care more about the size, as if one block group is
++       * larger than maximum size, it's must be some obvious corruption.
++       */
++      if (key->offset > BTRFS_MAX_DATA_CHUNK_SIZE || key->offset == 0) {
++              block_group_err(fs_info, leaf, slot,
++                      "invalid block group size, have %llu expect (0, %llu]",
++                              key->offset, BTRFS_MAX_DATA_CHUNK_SIZE);
++              return -EUCLEAN;
++      }
++
++      if (item_size != sizeof(bgi)) {
++              block_group_err(fs_info, leaf, slot,
++                      "invalid item size, have %u expect %zu",
++                              item_size, sizeof(bgi));
++              return -EUCLEAN;
++      }
++
++      read_extent_buffer(leaf, &bgi, btrfs_item_ptr_offset(leaf, slot),
++                         sizeof(bgi));
++      if (btrfs_block_group_chunk_objectid(&bgi) !=
++          BTRFS_FIRST_CHUNK_TREE_OBJECTID) {
++              block_group_err(fs_info, leaf, slot,
++              "invalid block group chunk objectid, have %llu expect %llu",
++                              btrfs_block_group_chunk_objectid(&bgi),
++                              BTRFS_FIRST_CHUNK_TREE_OBJECTID);
++              return -EUCLEAN;
++      }
++
++      if (btrfs_block_group_used(&bgi) > key->offset) {
++              block_group_err(fs_info, leaf, slot,
++                      "invalid block group used, have %llu expect [0, %llu)",
++                              btrfs_block_group_used(&bgi), key->offset);
++              return -EUCLEAN;
++      }
++
++      flags = btrfs_block_group_flags(&bgi);
++      if (hweight64(flags & BTRFS_BLOCK_GROUP_PROFILE_MASK) > 1) {
++              block_group_err(fs_info, leaf, slot,
++"invalid profile flags, have 0x%llx (%lu bits set) expect no more than 1 bit set",
++                      flags & BTRFS_BLOCK_GROUP_PROFILE_MASK,
++                      hweight64(flags & BTRFS_BLOCK_GROUP_PROFILE_MASK));
++              return -EUCLEAN;
++      }
++
++      type = flags & BTRFS_BLOCK_GROUP_TYPE_MASK;
++      if (type != BTRFS_BLOCK_GROUP_DATA &&
++          type != BTRFS_BLOCK_GROUP_METADATA &&
++          type != BTRFS_BLOCK_GROUP_SYSTEM &&
++          type != (BTRFS_BLOCK_GROUP_METADATA |
++                         BTRFS_BLOCK_GROUP_DATA)) {
++              block_group_err(fs_info, leaf, slot,
++"invalid type, have 0x%llx (%lu bits set) expect either 0x%llx, 0x%llx, 0x%llu or 0x%llx",
++                      type, hweight64(type),
++                      BTRFS_BLOCK_GROUP_DATA, BTRFS_BLOCK_GROUP_METADATA,
++                      BTRFS_BLOCK_GROUP_SYSTEM,
++                      BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA);
++              return -EUCLEAN;
++      }
++      return 0;
++}
++
+ /*
+  * Common point to switch the item-specific validation.
+  */
+@@ -333,6 +430,9 @@ static int check_leaf_item(struct btrfs_
+       case BTRFS_XATTR_ITEM_KEY:
+               ret = check_dir_item(root, leaf, key, slot);
+               break;
++      case BTRFS_BLOCK_GROUP_ITEM_KEY:
++              ret = check_block_group_item(root->fs_info, leaf, key, slot);
++              break;
+       }
+       return ret;
+ }
+--- a/fs/btrfs/volumes.c
++++ b/fs/btrfs/volumes.c
+@@ -4656,7 +4656,7 @@ static int __btrfs_alloc_chunk(struct bt
+ 
+       if (type & BTRFS_BLOCK_GROUP_DATA) {
+               max_stripe_size = SZ_1G;
+-              max_chunk_size = 10 * max_stripe_size;
++              max_chunk_size = BTRFS_MAX_DATA_CHUNK_SIZE;
+               if (!devs_max)
+                       devs_max = BTRFS_MAX_DEVS(info->chunk_root);
+       } else if (type & BTRFS_BLOCK_GROUP_METADATA) {
+--- a/fs/btrfs/volumes.h
++++ b/fs/btrfs/volumes.h
+@@ -24,6 +24,8 @@
+ #include <linux/btrfs.h>
+ #include "async-thread.h"
+ 
++#define BTRFS_MAX_DATA_CHUNK_SIZE     (10ULL * SZ_1G)
++
+ extern struct mutex uuid_mutex;
+ 
+ #define BTRFS_STRIPE_LEN      SZ_64K
diff --git a/queue-4.9/btrfs-validate-type-when-reading-a-chunk.patch b/queue-4.9/btrfs-validate-type-when-reading-a-chunk.patch

new file mode 100644 (file)

index 0000000..7b77085
--- /dev/null
+++ b/queue-4.9/btrfs-validate-type-when-reading-a-chunk.patch
@@ -0,0 +1,71 @@
+From foo@baz Thu Dec  6 15:27:31 CET 2018
+From: Gu Jinxiang <gujx@cn.fujitsu.com>
+Date: Wed, 4 Jul 2018 18:16:39 +0800
+Subject: btrfs: validate type when reading a chunk
+
+From: Gu Jinxiang <gujx@cn.fujitsu.com>
+
+commit 315409b0098fb2651d86553f0436b70502b29bb2 upstream.
+
+Reported in https://bugzilla.kernel.org/show_bug.cgi?id=199839, with an
+image that has an invalid chunk type but does not return an error.
+
+Add chunk type check in btrfs_check_chunk_valid, to detect the wrong
+type combinations.
+
+Link: https://bugzilla.kernel.org/show_bug.cgi?id=199839
+Reported-by: Xu Wen <wen.xu@gatech.edu>
+Reviewed-by: Qu Wenruo <wqu@suse.com>
+Signed-off-by: Gu Jinxiang <gujx@cn.fujitsu.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+[bwh: Backported to 4.9: Use root->fs_info instead of fs_info]
+Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/volumes.c |   28 ++++++++++++++++++++++++++++
+ 1 file changed, 28 insertions(+)
+
+--- a/fs/btrfs/volumes.c
++++ b/fs/btrfs/volumes.c
+@@ -6370,6 +6370,8 @@ static int btrfs_check_chunk_valid(struc
+       u16 num_stripes;
+       u16 sub_stripes;
+       u64 type;
++      u64 features;
++      bool mixed = false;
+ 
+       length = btrfs_chunk_length(leaf, chunk);
+       stripe_len = btrfs_chunk_stripe_len(leaf, chunk);
+@@ -6410,6 +6412,32 @@ static int btrfs_check_chunk_valid(struc
+                         btrfs_chunk_type(leaf, chunk));
+               return -EIO;
+       }
++
++      if ((type & BTRFS_BLOCK_GROUP_TYPE_MASK) == 0) {
++              btrfs_err(root->fs_info, "missing chunk type flag: 0x%llx", type);
++              return -EIO;
++      }
++
++      if ((type & BTRFS_BLOCK_GROUP_SYSTEM) &&
++          (type & (BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA))) {
++              btrfs_err(root->fs_info,
++                      "system chunk with data or metadata type: 0x%llx", type);
++              return -EIO;
++      }
++
++      features = btrfs_super_incompat_flags(root->fs_info->super_copy);
++      if (features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)
++              mixed = true;
++
++      if (!mixed) {
++              if ((type & BTRFS_BLOCK_GROUP_METADATA) &&
++                  (type & BTRFS_BLOCK_GROUP_DATA)) {
++                      btrfs_err(root->fs_info,
++                      "mixed chunk type in non-mixed mode: 0x%llx", type);
++                      return -EIO;
++              }
++      }
++
+       if ((type & BTRFS_BLOCK_GROUP_RAID10 && sub_stripes != 2) ||
+           (type & BTRFS_BLOCK_GROUP_RAID1 && num_stripes < 1) ||
+           (type & BTRFS_BLOCK_GROUP_RAID5 && num_stripes < 2) ||
diff --git a/queue-4.9/btrfs-verify-that-every-chunk-has-corresponding-block-group-at-mount-time.patch b/queue-4.9/btrfs-verify-that-every-chunk-has-corresponding-block-group-at-mount-time.patch

new file mode 100644 (file)

index 0000000..33c6837
--- /dev/null
+++ b/queue-4.9/btrfs-verify-that-every-chunk-has-corresponding-block-group-at-mount-time.patch
@@ -0,0 +1,106 @@
+From foo@baz Thu Dec  6 15:27:31 CET 2018
+From: Qu Wenruo <wqu@suse.com>
+Date: Wed, 1 Aug 2018 10:37:17 +0800
+Subject: btrfs: Verify that every chunk has corresponding block group at mount time
+
+From: Qu Wenruo <wqu@suse.com>
+
+commit 7ef49515fa6727cb4b6f2f5b0ffbc5fc20a9f8c6 upstream.
+
+If a crafted image has missing block group items, it could cause
+unexpected behavior and breaks the assumption of 1:1 chunk<->block group
+mapping.
+
+Although we have the block group -> chunk mapping check, we still need
+chunk -> block group mapping check.
+
+This patch will do extra check to ensure each chunk has its
+corresponding block group.
+
+Link: https://bugzilla.kernel.org/show_bug.cgi?id=199847
+Reported-by: Xu Wen <wen.xu@gatech.edu>
+Signed-off-by: Qu Wenruo <wqu@suse.com>
+Reviewed-by: Gu Jinxiang <gujx@cn.fujitsu.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+[bwh: Backported to 4.9: adjust context]
+Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/extent-tree.c |   58 ++++++++++++++++++++++++++++++++++++++++++++++++-
+ 1 file changed, 57 insertions(+), 1 deletion(-)
+
+--- a/fs/btrfs/extent-tree.c
++++ b/fs/btrfs/extent-tree.c
+@@ -10159,6 +10159,62 @@ btrfs_create_block_group_cache(struct bt
+       return cache;
+ }
+ 
++
++/*
++ * Iterate all chunks and verify that each of them has the corresponding block
++ * group
++ */
++static int check_chunk_block_group_mappings(struct btrfs_fs_info *fs_info)
++{
++      struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
++      struct extent_map *em;
++      struct btrfs_block_group_cache *bg;
++      u64 start = 0;
++      int ret = 0;
++
++      while (1) {
++              read_lock(&map_tree->map_tree.lock);
++              /*
++               * lookup_extent_mapping will return the first extent map
++               * intersecting the range, so setting @len to 1 is enough to
++               * get the first chunk.
++               */
++              em = lookup_extent_mapping(&map_tree->map_tree, start, 1);
++              read_unlock(&map_tree->map_tree.lock);
++              if (!em)
++                      break;
++
++              bg = btrfs_lookup_block_group(fs_info, em->start);
++              if (!bg) {
++                      btrfs_err(fs_info,
++      "chunk start=%llu len=%llu doesn't have corresponding block group",
++                                   em->start, em->len);
++                      ret = -EUCLEAN;
++                      free_extent_map(em);
++                      break;
++              }
++              if (bg->key.objectid != em->start ||
++                  bg->key.offset != em->len ||
++                  (bg->flags & BTRFS_BLOCK_GROUP_TYPE_MASK) !=
++                  (em->map_lookup->type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
++                      btrfs_err(fs_info,
++"chunk start=%llu len=%llu flags=0x%llx doesn't match block group start=%llu len=%llu flags=0x%llx",
++                              em->start, em->len,
++                              em->map_lookup->type & BTRFS_BLOCK_GROUP_TYPE_MASK,
++                              bg->key.objectid, bg->key.offset,
++                              bg->flags & BTRFS_BLOCK_GROUP_TYPE_MASK);
++                      ret = -EUCLEAN;
++                      free_extent_map(em);
++                      btrfs_put_block_group(bg);
++                      break;
++              }
++              start = em->start + em->len;
++              free_extent_map(em);
++              btrfs_put_block_group(bg);
++      }
++      return ret;
++}
++
+ int btrfs_read_block_groups(struct btrfs_root *root)
+ {
+       struct btrfs_path *path;
+@@ -10343,7 +10399,7 @@ int btrfs_read_block_groups(struct btrfs
+       }
+ 
+       init_global_block_rsv(info);
+-      ret = 0;
++      ret = check_chunk_block_group_mappings(info);
+ error:
+       btrfs_free_path(path);
+       return ret;
diff --git a/queue-4.9/f2fs-add-sanity_check_inode-function.patch b/queue-4.9/f2fs-add-sanity_check_inode-function.patch

new file mode 100644 (file)

index 0000000..2aa2407
--- /dev/null
+++ b/queue-4.9/f2fs-add-sanity_check_inode-function.patch
@@ -0,0 +1,48 @@
+From foo@baz Thu Dec  6 15:27:31 CET 2018
+From: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Date: Thu, 29 Nov 2018 19:17:34 +0000
+Subject: f2fs: Add sanity_check_inode() function
+
+From: Ben Hutchings <ben.hutchings@codethink.co.uk>
+
+This was done as part of commits 5d64600d4f33 "f2fs: avoid bug_on on
+corrupted inode" and 76d56d4ab4f2 "f2fs: fix to do sanity check with
+extra_attr feature" upstream, but the specific checks they added are
+not applicable to 4.9.
+
+Cc: Jaegeuk Kim <jaegeuk@kernel.org>
+Cc: Chao Yu <yuchao0@huawei.com>
+Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/f2fs/inode.c |   12 ++++++++++++
+ 1 file changed, 12 insertions(+)
+
+--- a/fs/f2fs/inode.c
++++ b/fs/f2fs/inode.c
+@@ -104,6 +104,13 @@ static void __recover_inline_status(stru
+       return;
+ }
+ 
++static bool sanity_check_inode(struct inode *inode)
++{
++      struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
++
++      return true;
++}
++
+ static int do_read_inode(struct inode *inode)
+ {
+       struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+@@ -153,6 +160,11 @@ static int do_read_inode(struct inode *i
+ 
+       get_inline_info(inode, ri);
+ 
++      if (!sanity_check_inode(inode)) {
++              f2fs_put_page(node_page, 1);
++              return -EINVAL;
++      }
++
+       /* check data exist */
+       if (f2fs_has_inline_data(inode) && !f2fs_exist_data(inode))
+               __recover_inline_status(inode, node_page);
diff --git a/queue-4.9/f2fs-check-blkaddr-more-accuratly-before-issue-a-bio.patch b/queue-4.9/f2fs-check-blkaddr-more-accuratly-before-issue-a-bio.patch

new file mode 100644 (file)

index 0000000..a275ff7
--- /dev/null
+++ b/queue-4.9/f2fs-check-blkaddr-more-accuratly-before-issue-a-bio.patch
@@ -0,0 +1,120 @@
+From foo@baz Thu Dec  6 15:27:31 CET 2018
+From: Yunlei He <heyunlei@huawei.com>
+Date: Thu, 8 Mar 2018 16:29:13 +0800
+Subject: f2fs: check blkaddr more accuratly before issue a bio
+
+From: Yunlei He <heyunlei@huawei.com>
+
+commit 0833721ec3658a4e9d5e58b6fa82cf9edc431e59 upstream.
+
+This patch check blkaddr more accuratly before issue a
+write or read bio.
+
+Signed-off-by: Yunlei He <heyunlei@huawei.com>
+Reviewed-by: Chao Yu <yuchao0@huawei.com>
+Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
+[bwh: Backported to 4.9: adjust context]
+Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/f2fs/checkpoint.c |    2 ++
+ fs/f2fs/data.c       |    5 +++--
+ fs/f2fs/f2fs.h       |    1 +
+ fs/f2fs/segment.h    |   25 +++++++++++++++++++------
+ 4 files changed, 25 insertions(+), 8 deletions(-)
+
+--- a/fs/f2fs/checkpoint.c
++++ b/fs/f2fs/checkpoint.c
+@@ -69,6 +69,7 @@ static struct page *__get_meta_page(stru
+               .old_blkaddr = index,
+               .new_blkaddr = index,
+               .encrypted_page = NULL,
++              .is_meta = is_meta,
+       };
+ 
+       if (unlikely(!is_meta))
+@@ -162,6 +163,7 @@ int ra_meta_pages(struct f2fs_sb_info *s
+               .op = REQ_OP_READ,
+               .op_flags = sync ? (READ_SYNC | REQ_META | REQ_PRIO) : REQ_RAHEAD,
+               .encrypted_page = NULL,
++              .is_meta = (type != META_POR),
+       };
+       struct blk_plug plug;
+ 
+--- a/fs/f2fs/data.c
++++ b/fs/f2fs/data.c
+@@ -240,6 +240,7 @@ int f2fs_submit_page_bio(struct f2fs_io_
+       struct page *page = fio->encrypted_page ?
+                       fio->encrypted_page : fio->page;
+ 
++      verify_block_addr(fio, fio->new_blkaddr);
+       trace_f2fs_submit_page_bio(page, fio);
+       f2fs_trace_ios(fio, 0);
+ 
+@@ -267,8 +268,8 @@ void f2fs_submit_page_mbio(struct f2fs_i
+       io = is_read ? &sbi->read_io : &sbi->write_io[btype];
+ 
+       if (fio->old_blkaddr != NEW_ADDR)
+-              verify_block_addr(sbi, fio->old_blkaddr);
+-      verify_block_addr(sbi, fio->new_blkaddr);
++              verify_block_addr(fio, fio->old_blkaddr);
++      verify_block_addr(fio, fio->new_blkaddr);
+ 
+       down_write(&io->io_rwsem);
+ 
+--- a/fs/f2fs/f2fs.h
++++ b/fs/f2fs/f2fs.h
+@@ -694,6 +694,7 @@ struct f2fs_io_info {
+       block_t old_blkaddr;    /* old block address before Cow */
+       struct page *page;      /* page to be written */
+       struct page *encrypted_page;    /* encrypted page */
++      bool is_meta;           /* indicate borrow meta inode mapping or not */
+ };
+ 
+ #define is_read_io(rw) (rw == READ)
+--- a/fs/f2fs/segment.h
++++ b/fs/f2fs/segment.h
+@@ -49,13 +49,19 @@
+        (secno == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno /             \
+         sbi->segs_per_sec))   \
+ 
+-#define MAIN_BLKADDR(sbi)     (SM_I(sbi)->main_blkaddr)
+-#define SEG0_BLKADDR(sbi)     (SM_I(sbi)->seg0_blkaddr)
++#define MAIN_BLKADDR(sbi)                                             \
++      (SM_I(sbi) ? SM_I(sbi)->main_blkaddr :                          \
++              le32_to_cpu(F2FS_RAW_SUPER(sbi)->main_blkaddr))
++#define SEG0_BLKADDR(sbi)                                             \
++      (SM_I(sbi) ? SM_I(sbi)->seg0_blkaddr :                          \
++              le32_to_cpu(F2FS_RAW_SUPER(sbi)->segment0_blkaddr))
+ 
+ #define MAIN_SEGS(sbi)        (SM_I(sbi)->main_segments)
+ #define MAIN_SECS(sbi)        (sbi->total_sections)
+ 
+-#define TOTAL_SEGS(sbi)       (SM_I(sbi)->segment_count)
++#define TOTAL_SEGS(sbi)                                                       \
++      (SM_I(sbi) ? SM_I(sbi)->segment_count :                                 \
++              le32_to_cpu(F2FS_RAW_SUPER(sbi)->segment_count))
+ #define TOTAL_BLKS(sbi)       (TOTAL_SEGS(sbi) << sbi->log_blocks_per_seg)
+ 
+ #define MAX_BLKADDR(sbi)      (SEG0_BLKADDR(sbi) + TOTAL_BLKS(sbi))
+@@ -591,10 +597,17 @@ static inline void check_seg_range(struc
+       f2fs_bug_on(sbi, segno > TOTAL_SEGS(sbi) - 1);
+ }
+ 
+-static inline void verify_block_addr(struct f2fs_sb_info *sbi, block_t blk_addr)
++static inline void verify_block_addr(struct f2fs_io_info *fio, block_t blk_addr)
+ {
+-      BUG_ON(blk_addr < SEG0_BLKADDR(sbi)
+-                      || blk_addr >= MAX_BLKADDR(sbi));
++      struct f2fs_sb_info *sbi = fio->sbi;
++
++      if (PAGE_TYPE_OF_BIO(fio->type) == META &&
++                              (!is_read_io(fio->op) || fio->is_meta))
++              BUG_ON(blk_addr < SEG0_BLKADDR(sbi) ||
++                              blk_addr >= MAIN_BLKADDR(sbi));
++      else
++              BUG_ON(blk_addr < MAIN_BLKADDR(sbi) ||
++                              blk_addr >= MAX_BLKADDR(sbi));
+ }
+ 
+ /*
diff --git a/queue-4.9/f2fs-clean-up-with-is_valid_blkaddr.patch b/queue-4.9/f2fs-clean-up-with-is_valid_blkaddr.patch

new file mode 100644 (file)

index 0000000..fc03173
--- /dev/null
+++ b/queue-4.9/f2fs-clean-up-with-is_valid_blkaddr.patch
@@ -0,0 +1,208 @@
+From foo@baz Thu Dec  6 15:27:31 CET 2018
+From: Chao Yu <yuchao0@huawei.com>
+Date: Wed, 23 May 2018 22:25:08 +0800
+Subject: f2fs: clean up with is_valid_blkaddr()
+
+From: Chao Yu <yuchao0@huawei.com>
+
+commit 7b525dd01365c6764018e374d391c92466be1b7a upstream.
+
+- rename is_valid_blkaddr() to is_valid_meta_blkaddr() for readability.
+- introduce is_valid_blkaddr() for cleanup.
+
+No logic change in this patch.
+
+Signed-off-by: Chao Yu <yuchao0@huawei.com>
+Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
+[bwh: Backported to 4.9: adjust context]
+Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/f2fs/checkpoint.c |    4 ++--
+ fs/f2fs/data.c       |    6 +++---
+ fs/f2fs/f2fs.h       |    9 ++++++++-
+ fs/f2fs/file.c       |    2 +-
+ fs/f2fs/inode.c      |    2 +-
+ fs/f2fs/node.c       |    5 ++---
+ fs/f2fs/recovery.c   |    6 +++---
+ fs/f2fs/segment.c    |    4 ++--
+ fs/f2fs/segment.h    |    2 +-
+ 9 files changed, 23 insertions(+), 17 deletions(-)
+
+--- a/fs/f2fs/checkpoint.c
++++ b/fs/f2fs/checkpoint.c
+@@ -118,7 +118,7 @@ struct page *get_tmp_page(struct f2fs_sb
+       return __get_meta_page(sbi, index, false);
+ }
+ 
+-bool is_valid_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr, int type)
++bool is_valid_meta_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr, int type)
+ {
+       switch (type) {
+       case META_NAT:
+@@ -173,7 +173,7 @@ int ra_meta_pages(struct f2fs_sb_info *s
+       blk_start_plug(&plug);
+       for (; nrpages-- > 0; blkno++) {
+ 
+-              if (!is_valid_blkaddr(sbi, blkno, type))
++              if (!is_valid_meta_blkaddr(sbi, blkno, type))
+                       goto out;
+ 
+               switch (type) {
+--- a/fs/f2fs/data.c
++++ b/fs/f2fs/data.c
+@@ -267,7 +267,7 @@ void f2fs_submit_page_mbio(struct f2fs_i
+ 
+       io = is_read ? &sbi->read_io : &sbi->write_io[btype];
+ 
+-      if (fio->old_blkaddr != NEW_ADDR)
++      if (is_valid_blkaddr(fio->old_blkaddr))
+               verify_block_addr(fio, fio->old_blkaddr);
+       verify_block_addr(fio, fio->new_blkaddr);
+ 
+@@ -723,7 +723,7 @@ next_dnode:
+ next_block:
+       blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node);
+ 
+-      if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR) {
++      if (!is_valid_blkaddr(blkaddr)) {
+               if (create) {
+                       if (unlikely(f2fs_cp_error(sbi))) {
+                               err = -EIO;
+@@ -1217,7 +1217,7 @@ retry_encrypt:
+        * If current allocation needs SSR,
+        * it had better in-place writes for updated data.
+        */
+-      if (unlikely(fio->old_blkaddr != NEW_ADDR &&
++      if (unlikely(is_valid_blkaddr(fio->old_blkaddr) &&
+                       !is_cold_data(page) &&
+                       !IS_ATOMIC_WRITTEN_PAGE(page) &&
+                       need_inplace_update(inode))) {
+--- a/fs/f2fs/f2fs.h
++++ b/fs/f2fs/f2fs.h
+@@ -1930,6 +1930,13 @@ static inline void *f2fs_kvzalloc(size_t
+       (pgofs - ADDRS_PER_INODE(inode) + ADDRS_PER_BLOCK) /    \
+       ADDRS_PER_BLOCK * ADDRS_PER_BLOCK + ADDRS_PER_INODE(inode))
+ 
++static inline bool is_valid_blkaddr(block_t blkaddr)
++{
++      if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR)
++              return false;
++      return true;
++}
++
+ /*
+  * file.c
+  */
+@@ -2115,7 +2122,7 @@ void f2fs_stop_checkpoint(struct f2fs_sb
+ struct page *grab_meta_page(struct f2fs_sb_info *, pgoff_t);
+ struct page *get_meta_page(struct f2fs_sb_info *, pgoff_t);
+ struct page *get_tmp_page(struct f2fs_sb_info *, pgoff_t);
+-bool is_valid_blkaddr(struct f2fs_sb_info *, block_t, int);
++bool is_valid_meta_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr, int type);
+ int ra_meta_pages(struct f2fs_sb_info *, block_t, int, int, bool);
+ void ra_meta_pages_cond(struct f2fs_sb_info *, pgoff_t);
+ long sync_meta_pages(struct f2fs_sb_info *, enum page_type, long);
+--- a/fs/f2fs/file.c
++++ b/fs/f2fs/file.c
+@@ -316,7 +316,7 @@ static bool __found_offset(block_t blkad
+       switch (whence) {
+       case SEEK_DATA:
+               if ((blkaddr == NEW_ADDR && dirty == pgofs) ||
+-                      (blkaddr != NEW_ADDR && blkaddr != NULL_ADDR))
++                      is_valid_blkaddr(blkaddr))
+                       return true;
+               break;
+       case SEEK_HOLE:
+--- a/fs/f2fs/inode.c
++++ b/fs/f2fs/inode.c
+@@ -63,7 +63,7 @@ static bool __written_first_block(struct
+ {
+       block_t addr = le32_to_cpu(ri->i_addr[0]);
+ 
+-      if (addr != NEW_ADDR && addr != NULL_ADDR)
++      if (is_valid_blkaddr(addr))
+               return true;
+       return false;
+ }
+--- a/fs/f2fs/node.c
++++ b/fs/f2fs/node.c
+@@ -304,8 +304,7 @@ static void set_node_addr(struct f2fs_sb
+                       new_blkaddr == NULL_ADDR);
+       f2fs_bug_on(sbi, nat_get_blkaddr(e) == NEW_ADDR &&
+                       new_blkaddr == NEW_ADDR);
+-      f2fs_bug_on(sbi, nat_get_blkaddr(e) != NEW_ADDR &&
+-                      nat_get_blkaddr(e) != NULL_ADDR &&
++      f2fs_bug_on(sbi, is_valid_blkaddr(nat_get_blkaddr(e)) &&
+                       new_blkaddr == NEW_ADDR);
+ 
+       /* increment version no as node is removed */
+@@ -320,7 +319,7 @@ static void set_node_addr(struct f2fs_sb
+ 
+       /* change address */
+       nat_set_blkaddr(e, new_blkaddr);
+-      if (new_blkaddr == NEW_ADDR || new_blkaddr == NULL_ADDR)
++      if (!is_valid_blkaddr(new_blkaddr))
+               set_nat_flag(e, IS_CHECKPOINTED, false);
+       __set_nat_cache_dirty(nm_i, e);
+ 
+--- a/fs/f2fs/recovery.c
++++ b/fs/f2fs/recovery.c
+@@ -236,7 +236,7 @@ static int find_fsync_dnodes(struct f2fs
+       while (1) {
+               struct fsync_inode_entry *entry;
+ 
+-              if (!is_valid_blkaddr(sbi, blkaddr, META_POR))
++              if (!is_valid_meta_blkaddr(sbi, blkaddr, META_POR))
+                       return 0;
+ 
+               page = get_tmp_page(sbi, blkaddr);
+@@ -468,7 +468,7 @@ retry_dn:
+               }
+ 
+               /* dest is valid block, try to recover from src to dest */
+-              if (is_valid_blkaddr(sbi, dest, META_POR)) {
++              if (is_valid_meta_blkaddr(sbi, dest, META_POR)) {
+ 
+                       if (src == NULL_ADDR) {
+                               err = reserve_new_block(&dn);
+@@ -527,7 +527,7 @@ static int recover_data(struct f2fs_sb_i
+       while (1) {
+               struct fsync_inode_entry *entry;
+ 
+-              if (!is_valid_blkaddr(sbi, blkaddr, META_POR))
++              if (!is_valid_meta_blkaddr(sbi, blkaddr, META_POR))
+                       break;
+ 
+               ra_meta_pages_cond(sbi, blkaddr);
+--- a/fs/f2fs/segment.c
++++ b/fs/f2fs/segment.c
+@@ -944,7 +944,7 @@ bool is_checkpointed_data(struct f2fs_sb
+       struct seg_entry *se;
+       bool is_cp = false;
+ 
+-      if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR)
++      if (!is_valid_blkaddr(blkaddr))
+               return true;
+ 
+       mutex_lock(&sit_i->sentry_lock);
+@@ -1668,7 +1668,7 @@ void f2fs_wait_on_encrypted_page_writeba
+ {
+       struct page *cpage;
+ 
+-      if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR)
++      if (!is_valid_blkaddr(blkaddr))
+               return;
+ 
+       cpage = find_lock_page(META_MAPPING(sbi), blkaddr);
+--- a/fs/f2fs/segment.h
++++ b/fs/f2fs/segment.h
+@@ -81,7 +81,7 @@
+       (GET_SEGOFF_FROM_SEG0(sbi, blk_addr) & (sbi->blocks_per_seg - 1))
+ 
+ #define GET_SEGNO(sbi, blk_addr)                                      \
+-      (((blk_addr == NULL_ADDR) || (blk_addr == NEW_ADDR)) ?          \
++      ((!is_valid_blkaddr(blk_addr)) ?                        \
+       NULL_SEGNO : GET_L2R_SEGNO(FREE_I(sbi),                 \
+               GET_SEGNO_FROM_SEG0(sbi, blk_addr)))
+ #define GET_SECNO(sbi, segno)                                 \
diff --git a/queue-4.9/f2fs-detect-wrong-layout.patch b/queue-4.9/f2fs-detect-wrong-layout.patch

new file mode 100644 (file)

index 0000000..55669f3
--- /dev/null
+++ b/queue-4.9/f2fs-detect-wrong-layout.patch
@@ -0,0 +1,64 @@
+From foo@baz Thu Dec  6 15:27:31 CET 2018
+From: Jaegeuk Kim <jaegeuk@kernel.org>
+Date: Mon, 5 Dec 2016 13:56:04 -0800
+Subject: f2fs: detect wrong layout
+
+From: Jaegeuk Kim <jaegeuk@kernel.org>
+
+commit 2040fce83fe17763b07c97c1f691da2bb85e4135 upstream.
+
+Previous mkfs.f2fs allows small partition inappropriately, so f2fs should detect
+that as well.
+
+Refer this in f2fs-tools.
+
+mkfs.f2fs: detect small partition by overprovision ratio and # of segments
+
+Reported-and-Tested-by: Eric Biggers <ebiggers@google.com>
+Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
+[bwh: Backported to 4.9: adjust context]
+Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/f2fs/segment.h |    2 ++
+ fs/f2fs/super.c   |   11 +++++++++++
+ 2 files changed, 13 insertions(+)
+
+--- a/fs/f2fs/segment.h
++++ b/fs/f2fs/segment.h
+@@ -18,6 +18,8 @@
+ #define DEF_RECLAIM_PREFREE_SEGMENTS  5       /* 5% over total segments */
+ #define DEF_MAX_RECLAIM_PREFREE_SEGMENTS      4096    /* 8GB in maximum */
+ 
++#define F2FS_MIN_SEGMENTS     9 /* SB + 2 (CP + SIT + NAT) + SSA + MAIN */
++
+ /* L: Logical segment # in volume, R: Relative segment # in main area */
+ #define GET_L2R_SEGNO(free_i, segno)  (segno - free_i->start_segno)
+ #define GET_R2L_SEGNO(free_i, segno)  (segno + free_i->start_segno)
+--- a/fs/f2fs/super.c
++++ b/fs/f2fs/super.c
+@@ -1424,6 +1424,7 @@ int sanity_check_ckpt(struct f2fs_sb_inf
+       unsigned int total, fsmeta;
+       struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
+       struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
++      unsigned int ovp_segments, reserved_segments;
+       unsigned int main_segs, blocks_per_seg;
+       unsigned int sit_segs, nat_segs;
+       unsigned int sit_bitmap_size, nat_bitmap_size;
+@@ -1442,6 +1443,16 @@ int sanity_check_ckpt(struct f2fs_sb_inf
+       if (unlikely(fsmeta >= total))
+               return 1;
+ 
++      ovp_segments = le32_to_cpu(ckpt->overprov_segment_count);
++      reserved_segments = le32_to_cpu(ckpt->rsvd_segment_count);
++
++      if (unlikely(fsmeta < F2FS_MIN_SEGMENTS ||
++                      ovp_segments == 0 || reserved_segments == 0)) {
++              f2fs_msg(sbi->sb, KERN_ERR,
++                      "Wrong layout: check mkfs.f2fs version");
++              return 1;
++      }
++
+       main_segs = le32_to_cpu(raw_super->segment_count_main);
+       blocks_per_seg = sbi->blocks_per_seg;
+ 
diff --git a/queue-4.9/f2fs-enhance-sanity_check_raw_super-to-avoid-potential-overflow.patch b/queue-4.9/f2fs-enhance-sanity_check_raw_super-to-avoid-potential-overflow.patch

new file mode 100644 (file)

index 0000000..2e5024f
--- /dev/null
+++ b/queue-4.9/f2fs-enhance-sanity_check_raw_super-to-avoid-potential-overflow.patch
@@ -0,0 +1,179 @@
+From foo@baz Thu Dec  6 15:27:31 CET 2018
+From: Jaegeuk Kim <jaegeuk@kernel.org>
+Date: Fri, 27 Apr 2018 19:03:22 -0700
+Subject: f2fs: enhance sanity_check_raw_super() to avoid potential overflow
+
+From: Jaegeuk Kim <jaegeuk@kernel.org>
+
+commit 0cfe75c5b011994651a4ca6d74f20aa997bfc69a upstream.
+
+In order to avoid the below overflow issue, we should have checked the
+boundaries in superblock before reaching out to allocation. As Linus suggested,
+the right place should be sanity_check_raw_super().
+
+Dr Silvio Cesare of InfoSect reported:
+
+There are integer overflows with using the cp_payload superblock field in the
+f2fs filesystem potentially leading to memory corruption.
+
+include/linux/f2fs_fs.h
+
+struct f2fs_super_block {
+...
+        __le32 cp_payload;
+
+fs/f2fs/f2fs.h
+
+typedef u32 block_t;    /*
+                         * should not change u32, since it is the on-disk block
+                         * address format, __le32.
+                         */
+...
+
+static inline block_t __cp_payload(struct f2fs_sb_info *sbi)
+{
+        return le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_payload);
+}
+
+fs/f2fs/checkpoint.c
+
+        block_t start_blk, orphan_blocks, i, j;
+...
+        start_blk = __start_cp_addr(sbi) + 1 + __cp_payload(sbi);
+        orphan_blocks = __start_sum_addr(sbi) - 1 - __cp_payload(sbi);
+
++++ integer overflows
+
+...
+        unsigned int cp_blks = 1 + __cp_payload(sbi);
+...
+        sbi->ckpt = kzalloc(cp_blks * blk_size, GFP_KERNEL);
+
++++ integer overflow leading to incorrect heap allocation.
+
+        int cp_payload_blks = __cp_payload(sbi);
+...
+        ckpt->cp_pack_start_sum = cpu_to_le32(1 + cp_payload_blks +
+                        orphan_blocks);
+
++++ sign bug and integer overflow
+
+...
+        for (i = 1; i < 1 + cp_payload_blks; i++)
+
++++ integer overflow
+
+...
+
+      sbi->max_orphans = (sbi->blocks_per_seg - F2FS_CP_PACKS -
+                        NR_CURSEG_TYPE - __cp_payload(sbi)) *
+                                F2FS_ORPHANS_PER_BLOCK;
+
++++ integer overflow
+
+Reported-by: Greg KH <greg@kroah.com>
+Reported-by: Silvio Cesare <silvio.cesare@gmail.com>
+Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
+Reviewed-by: Chao Yu <yuchao0@huawei.com>
+Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
+[bwh: Backported to 4.9: No hot file extension support]
+Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/f2fs/super.c |   71 ++++++++++++++++++++++++++++++++++++++++++++++++++------
+ 1 file changed, 64 insertions(+), 7 deletions(-)
+
+--- a/fs/f2fs/super.c
++++ b/fs/f2fs/super.c
+@@ -1337,6 +1337,8 @@ static inline bool sanity_check_area_bou
+ static int sanity_check_raw_super(struct f2fs_sb_info *sbi,
+                               struct buffer_head *bh)
+ {
++      block_t segment_count, segs_per_sec, secs_per_zone;
++      block_t total_sections, blocks_per_seg;
+       struct f2fs_super_block *raw_super = (struct f2fs_super_block *)
+                                       (bh->b_data + F2FS_SUPER_OFFSET);
+       struct super_block *sb = sbi->sb;
+@@ -1393,6 +1395,68 @@ static int sanity_check_raw_super(struct
+               return 1;
+       }
+ 
++      segment_count = le32_to_cpu(raw_super->segment_count);
++      segs_per_sec = le32_to_cpu(raw_super->segs_per_sec);
++      secs_per_zone = le32_to_cpu(raw_super->secs_per_zone);
++      total_sections = le32_to_cpu(raw_super->section_count);
++
++      /* blocks_per_seg should be 512, given the above check */
++      blocks_per_seg = 1 << le32_to_cpu(raw_super->log_blocks_per_seg);
++
++      if (segment_count > F2FS_MAX_SEGMENT ||
++                              segment_count < F2FS_MIN_SEGMENTS) {
++              f2fs_msg(sb, KERN_INFO,
++                      "Invalid segment count (%u)",
++                      segment_count);
++              return 1;
++      }
++
++      if (total_sections > segment_count ||
++                      total_sections < F2FS_MIN_SEGMENTS ||
++                      segs_per_sec > segment_count || !segs_per_sec) {
++              f2fs_msg(sb, KERN_INFO,
++                      "Invalid segment/section count (%u, %u x %u)",
++                      segment_count, total_sections, segs_per_sec);
++              return 1;
++      }
++
++      if ((segment_count / segs_per_sec) < total_sections) {
++              f2fs_msg(sb, KERN_INFO,
++                      "Small segment_count (%u < %u * %u)",
++                      segment_count, segs_per_sec, total_sections);
++              return 1;
++      }
++
++      if (segment_count > (le32_to_cpu(raw_super->block_count) >> 9)) {
++              f2fs_msg(sb, KERN_INFO,
++                      "Wrong segment_count / block_count (%u > %u)",
++                      segment_count, le32_to_cpu(raw_super->block_count));
++              return 1;
++      }
++
++      if (secs_per_zone > total_sections) {
++              f2fs_msg(sb, KERN_INFO,
++                      "Wrong secs_per_zone (%u > %u)",
++                      secs_per_zone, total_sections);
++              return 1;
++      }
++      if (le32_to_cpu(raw_super->extension_count) > F2FS_MAX_EXTENSION) {
++              f2fs_msg(sb, KERN_INFO,
++                      "Corrupted extension count (%u > %u)",
++                      le32_to_cpu(raw_super->extension_count),
++                      F2FS_MAX_EXTENSION);
++              return 1;
++      }
++
++      if (le32_to_cpu(raw_super->cp_payload) >
++                              (blocks_per_seg - F2FS_CP_PACKS)) {
++              f2fs_msg(sb, KERN_INFO,
++                      "Insane cp_payload (%u > %u)",
++                      le32_to_cpu(raw_super->cp_payload),
++                      blocks_per_seg - F2FS_CP_PACKS);
++              return 1;
++      }
++
+       /* check reserved ino info */
+       if (le32_to_cpu(raw_super->node_ino) != 1 ||
+               le32_to_cpu(raw_super->meta_ino) != 2 ||
+@@ -1405,13 +1469,6 @@ static int sanity_check_raw_super(struct
+               return 1;
+       }
+ 
+-      if (le32_to_cpu(raw_super->segment_count) > F2FS_MAX_SEGMENT) {
+-              f2fs_msg(sb, KERN_INFO,
+-                      "Invalid segment count (%u)",
+-                      le32_to_cpu(raw_super->segment_count));
+-              return 1;
+-      }
+-
+       /* check CP/SIT/NAT/SSA/MAIN_AREA area boundary */
+       if (sanity_check_area_boundary(sbi, bh))
+               return 1;
diff --git a/queue-4.9/f2fs-fix-a-panic-caused-by-null-flush_cmd_control.patch b/queue-4.9/f2fs-fix-a-panic-caused-by-null-flush_cmd_control.patch

new file mode 100644 (file)

index 0000000..fdf8539
--- /dev/null
+++ b/queue-4.9/f2fs-fix-a-panic-caused-by-null-flush_cmd_control.patch
@@ -0,0 +1,48 @@
+From foo@baz Thu Dec  6 15:27:31 CET 2018
+From: Yunlei He <heyunlei@huawei.com>
+Date: Thu, 1 Jun 2017 16:43:51 +0800
+Subject: f2fs: fix a panic caused by NULL flush_cmd_control
+
+From: Yunlei He <heyunlei@huawei.com>
+
+commit d4fdf8ba0e5808ba9ad6b44337783bd9935e0982 upstream.
+
+Mount fs with option noflush_merge, boot failed for illegal address
+fcc in function f2fs_issue_flush:
+
+        if (!test_opt(sbi, FLUSH_MERGE)) {
+                ret = submit_flush_wait(sbi);
+                atomic_inc(&fcc->issued_flush);   ->  Here, fcc illegal
+                return ret;
+        }
+
+Signed-off-by: Yunlei He <heyunlei@huawei.com>
+Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
+[bwh: Backported to 4.9: adjust context]
+Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/f2fs/segment.c |    5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+--- a/fs/f2fs/segment.c
++++ b/fs/f2fs/segment.c
+@@ -493,6 +493,9 @@ int create_flush_cmd_control(struct f2fs
+       init_waitqueue_head(&fcc->flush_wait_queue);
+       init_llist_head(&fcc->issue_list);
+       SM_I(sbi)->cmd_control_info = fcc;
++      if (!test_opt(sbi, FLUSH_MERGE))
++              return err;
++
+       fcc->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi,
+                               "f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev));
+       if (IS_ERR(fcc->f2fs_issue_flush)) {
+@@ -2539,7 +2542,7 @@ int build_segment_manager(struct f2fs_sb
+ 
+       INIT_LIST_HEAD(&sm_info->sit_entry_set);
+ 
+-      if (test_opt(sbi, FLUSH_MERGE) && !f2fs_readonly(sbi->sb)) {
++      if (!f2fs_readonly(sbi->sb)) {
+               err = create_flush_cmd_control(sbi);
+               if (err)
+                       return err;
diff --git a/queue-4.9/f2fs-fix-missing-up_read.patch b/queue-4.9/f2fs-fix-missing-up_read.patch

new file mode 100644 (file)

index 0000000..d6f3f70
--- /dev/null
+++ b/queue-4.9/f2fs-fix-missing-up_read.patch
@@ -0,0 +1,34 @@
+From foo@baz Thu Dec  6 15:27:31 CET 2018
+From: Jaegeuk Kim <jaegeuk@kernel.org>
+Date: Thu, 27 Sep 2018 22:15:31 -0700
+Subject: f2fs: fix missing up_read
+
+From: Jaegeuk Kim <jaegeuk@kernel.org>
+
+commit 89d13c38501df730cbb2e02c4499da1b5187119d upstream.
+
+This patch fixes missing up_read call.
+
+Fixes: c9b60788fc76 ("f2fs: fix to do sanity check with block address in main area")
+Reviewed-by: Chao Yu <yuchao0@huawei.com>
+Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
+Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/f2fs/node.c |    4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/fs/f2fs/node.c
++++ b/fs/f2fs/node.c
+@@ -1606,8 +1606,10 @@ static int f2fs_write_node_page(struct p
+       }
+ 
+       if (__is_valid_data_blkaddr(ni.blk_addr) &&
+-              !f2fs_is_valid_blkaddr(sbi, ni.blk_addr, DATA_GENERIC))
++              !f2fs_is_valid_blkaddr(sbi, ni.blk_addr, DATA_GENERIC)) {
++              up_read(&sbi->node_write);
+               goto redirty_out;
++      }
+ 
+       set_page_writeback(page);
+       fio.old_blkaddr = ni.blk_addr;
diff --git a/queue-4.9/f2fs-fix-race-condition-in-between-free-nid-allocator-initializer.patch b/queue-4.9/f2fs-fix-race-condition-in-between-free-nid-allocator-initializer.patch

new file mode 100644 (file)

index 0000000..1a5233d
--- /dev/null
+++ b/queue-4.9/f2fs-fix-race-condition-in-between-free-nid-allocator-initializer.patch
@@ -0,0 +1,137 @@
+From foo@baz Thu Dec  6 15:27:31 CET 2018
+From: Chao Yu <yuchao0@huawei.com>
+Date: Wed, 22 Mar 2017 14:45:05 +0800
+Subject: f2fs: fix race condition in between free nid allocator/initializer
+
+From: Chao Yu <yuchao0@huawei.com>
+
+commit 30a61ddf8117c26ac5b295e1233eaa9629a94ca3 upstream.
+
+In below concurrent case, allocated nid can be loaded into free nid cache
+and be allocated again.
+
+Thread A                               Thread B
+- f2fs_create
+ - f2fs_new_inode
+  - alloc_nid
+   - __insert_nid_to_list(ALLOC_NID_LIST)
+                                       - f2fs_balance_fs_bg
+                                        - build_free_nids
+                                         - __build_free_nids
+                                          - scan_nat_page
+                                           - add_free_nid
+                                            - __lookup_nat_cache
+ - f2fs_add_link
+  - init_inode_metadata
+   - new_inode_page
+    - new_node_page
+     - set_node_addr
+ - alloc_nid_done
+  - __remove_nid_from_list(ALLOC_NID_LIST)
+                                            - __insert_nid_to_list(FREE_NID_LIST)
+
+This patch makes nat cache lookup and free nid list operation being atomical
+to avoid this race condition.
+
+Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
+Signed-off-by: Chao Yu <yuchao0@huawei.com>
+Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
+[bwh: Backported to 4.9:
+ - add_free_nid() returns 0 in case of any error (except low memory)
+ - Tree/list addition has not been moved into __insert_nid_to_list()]
+Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/f2fs/node.c |   62 +++++++++++++++++++++++++++++++++++++++------------------
+ 1 file changed, 43 insertions(+), 19 deletions(-)
+
+--- a/fs/f2fs/node.c
++++ b/fs/f2fs/node.c
+@@ -1704,8 +1704,9 @@ static void __del_from_free_nid_list(str
+ static int add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build)
+ {
+       struct f2fs_nm_info *nm_i = NM_I(sbi);
+-      struct free_nid *i;
++      struct free_nid *i, *e;
+       struct nat_entry *ne;
++      int err = -EINVAL;
+ 
+       if (!available_free_memory(sbi, FREE_NIDS))
+               return -1;
+@@ -1714,35 +1715,58 @@ static int add_free_nid(struct f2fs_sb_i
+       if (unlikely(nid == 0))
+               return 0;
+ 
+-      if (build) {
+-              /* do not add allocated nids */
+-              ne = __lookup_nat_cache(nm_i, nid);
+-              if (ne && (!get_nat_flag(ne, IS_CHECKPOINTED) ||
+-                              nat_get_blkaddr(ne) != NULL_ADDR))
+-                      return 0;
+-      }
+-
+       i = f2fs_kmem_cache_alloc(free_nid_slab, GFP_NOFS);
+       i->nid = nid;
+       i->state = NID_NEW;
+ 
+-      if (radix_tree_preload(GFP_NOFS)) {
+-              kmem_cache_free(free_nid_slab, i);
+-              return 0;
+-      }
++      if (radix_tree_preload(GFP_NOFS))
++              goto err;
+ 
+       spin_lock(&nm_i->free_nid_list_lock);
+-      if (radix_tree_insert(&nm_i->free_nid_root, i->nid, i)) {
+-              spin_unlock(&nm_i->free_nid_list_lock);
+-              radix_tree_preload_end();
+-              kmem_cache_free(free_nid_slab, i);
+-              return 0;
++
++      if (build) {
++              /*
++               *   Thread A             Thread B
++               *  - f2fs_create
++               *   - f2fs_new_inode
++               *    - alloc_nid
++               *     - __insert_nid_to_list(ALLOC_NID_LIST)
++               *                     - f2fs_balance_fs_bg
++               *                      - build_free_nids
++               *                       - __build_free_nids
++               *                        - scan_nat_page
++               *                         - add_free_nid
++               *                          - __lookup_nat_cache
++               *  - f2fs_add_link
++               *   - init_inode_metadata
++               *    - new_inode_page
++               *     - new_node_page
++               *      - set_node_addr
++               *  - alloc_nid_done
++               *   - __remove_nid_from_list(ALLOC_NID_LIST)
++               *                         - __insert_nid_to_list(FREE_NID_LIST)
++               */
++              ne = __lookup_nat_cache(nm_i, nid);
++              if (ne && (!get_nat_flag(ne, IS_CHECKPOINTED) ||
++                              nat_get_blkaddr(ne) != NULL_ADDR))
++                      goto err_out;
++
++              e = __lookup_free_nid_list(nm_i, nid);
++              if (e)
++                      goto err_out;
+       }
++      if (radix_tree_insert(&nm_i->free_nid_root, i->nid, i))
++              goto err_out;
++      err = 0;
+       list_add_tail(&i->list, &nm_i->free_nid_list);
+       nm_i->fcnt++;
++err_out:
+       spin_unlock(&nm_i->free_nid_list_lock);
+       radix_tree_preload_end();
+-      return 1;
++err:
++      if (err)
++              kmem_cache_free(free_nid_slab, i);
++      return !err;
+ }
+ 
+ static void remove_free_nid(struct f2fs_nm_info *nm_i, nid_t nid)
diff --git a/queue-4.9/f2fs-fix-to-do-sanity-check-with-block-address-in-main-area-v2.patch b/queue-4.9/f2fs-fix-to-do-sanity-check-with-block-address-in-main-area-v2.patch

new file mode 100644 (file)

index 0000000..abac309
--- /dev/null
+++ b/queue-4.9/f2fs-fix-to-do-sanity-check-with-block-address-in-main-area-v2.patch
@@ -0,0 +1,365 @@
+From foo@baz Thu Dec  6 15:27:31 CET 2018
+From: Chao Yu <yuchao0@huawei.com>
+Date: Tue, 10 Jul 2018 23:01:45 +0800
+Subject: f2fs: fix to do sanity check with block address in main area v2
+
+From: Chao Yu <yuchao0@huawei.com>
+
+commit 91291e9998d208370eb8156c760691b873bd7522 upstream.
+
+This patch adds f2fs_is_valid_blkaddr() in below functions to do sanity
+check with block address to avoid pentential panic:
+- f2fs_grab_read_bio()
+- __written_first_block()
+
+https://bugzilla.kernel.org/show_bug.cgi?id=200465
+
+- Reproduce
+
+- POC (poc.c)
+    #define _GNU_SOURCE
+    #include <sys/types.h>
+    #include <sys/mount.h>
+    #include <sys/mman.h>
+    #include <sys/stat.h>
+    #include <sys/xattr.h>
+
+    #include <dirent.h>
+    #include <errno.h>
+    #include <error.h>
+    #include <fcntl.h>
+    #include <stdio.h>
+    #include <stdlib.h>
+    #include <string.h>
+    #include <unistd.h>
+
+    #include <linux/falloc.h>
+    #include <linux/loop.h>
+
+    static void activity(char *mpoint) {
+
+      char *xattr;
+      int err;
+
+      err = asprintf(&xattr, "%s/foo/bar/xattr", mpoint);
+
+      char buf2[113];
+      memset(buf2, 0, sizeof(buf2));
+      listxattr(xattr, buf2, sizeof(buf2));
+
+    }
+
+    int main(int argc, char *argv[]) {
+      activity(argv[1]);
+      return 0;
+    }
+
+- kernel message
+[  844.718738] F2FS-fs (loop0): Mounted with checkpoint version = 2
+[  846.430929] F2FS-fs (loop0): access invalid blkaddr:1024
+[  846.431058] WARNING: CPU: 1 PID: 1249 at fs/f2fs/checkpoint.c:154 f2fs_is_valid_blkaddr+0x10f/0x160
+[  846.431059] Modules linked in: snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hda_core snd_hwdep snd_pcm snd_timer snd input_leds joydev soundcore serio_raw i2c_piix4 mac_hid ib_iser rdma_cm iw_cm ib_cm ib_core configfs iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi autofs4 raid10 raid456 libcrc32c async_raid6_recov async_memcpy async_pq async_xor xor async_tx raid6_pq raid1 raid0 multipath linear qxl ttm crct10dif_pclmul crc32_pclmul drm_kms_helper ghash_clmulni_intel syscopyarea sysfillrect sysimgblt fb_sys_fops pcbc drm 8139too aesni_intel 8139cp floppy psmouse mii aes_x86_64 crypto_simd pata_acpi cryptd glue_helper
+[  846.431310] CPU: 1 PID: 1249 Comm: a.out Not tainted 4.18.0-rc3+ #1
+[  846.431312] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014
+[  846.431315] RIP: 0010:f2fs_is_valid_blkaddr+0x10f/0x160
+[  846.431316] Code: 00 eb ed 31 c0 83 fa 05 75 ae 48 83 ec 08 48 8b 3f 89 f1 48 c7 c2 fc 0b 0f 8b 48 c7 c6 8b d7 09 8b 88 44 24 07 e8 61 8b ff ff <0f> 0b 0f b6 44 24 07 48 83 c4 08 eb 81 4c 8b 47 10 8b 8f 38 04 00
+[  846.431347] RSP: 0018:ffff961c414a7bc0 EFLAGS: 00010282
+[  846.431349] RAX: 0000000000000000 RBX: ffffc5f787b8ea80 RCX: 0000000000000000
+[  846.431350] RDX: 0000000000000000 RSI: ffff89dfffd165d8 RDI: ffff89dfffd165d8
+[  846.431351] RBP: ffff961c414a7c20 R08: 0000000000000001 R09: 0000000000000248
+[  846.431353] R10: 0000000000000000 R11: 0000000000000248 R12: 0000000000000007
+[  846.431369] R13: ffff89dff5492800 R14: ffff89dfae3aa000 R15: ffff89dff4ff88d0
+[  846.431372] FS:  00007f882e2fb700(0000) GS:ffff89dfffd00000(0000) knlGS:0000000000000000
+[  846.431373] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+[  846.431374] CR2: 0000000001a88008 CR3: 00000001eb572000 CR4: 00000000000006e0
+[  846.431384] Call Trace:
+[  846.431426]  f2fs_iget+0x6f4/0xe70
+[  846.431430]  ? f2fs_find_entry+0x71/0x90
+[  846.431432]  f2fs_lookup+0x1aa/0x390
+[  846.431452]  __lookup_slow+0x97/0x150
+[  846.431459]  lookup_slow+0x35/0x50
+[  846.431462]  walk_component+0x1c6/0x470
+[  846.431479]  ? memcg_kmem_charge_memcg+0x70/0x90
+[  846.431488]  ? page_add_file_rmap+0x13/0x200
+[  846.431491]  path_lookupat+0x76/0x230
+[  846.431501]  ? __alloc_pages_nodemask+0xfc/0x280
+[  846.431504]  filename_lookup+0xb8/0x1a0
+[  846.431534]  ? _cond_resched+0x16/0x40
+[  846.431541]  ? kmem_cache_alloc+0x160/0x1d0
+[  846.431549]  ? path_listxattr+0x41/0xa0
+[  846.431551]  path_listxattr+0x41/0xa0
+[  846.431570]  do_syscall_64+0x55/0x100
+[  846.431583]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
+[  846.431607] RIP: 0033:0x7f882de1c0d7
+[  846.431607] Code: f0 ff ff 73 01 c3 48 8b 0d be dd 2b 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 b8 c2 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 91 dd 2b 00 f7 d8 64 89 01 48
+[  846.431639] RSP: 002b:00007ffe8e66c238 EFLAGS: 00000202 ORIG_RAX: 00000000000000c2
+[  846.431641] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f882de1c0d7
+[  846.431642] RDX: 0000000000000071 RSI: 00007ffe8e66c280 RDI: 0000000001a880c0
+[  846.431643] RBP: 00007ffe8e66c300 R08: 0000000001a88010 R09: 0000000000000000
+[  846.431645] R10: 00000000000001ab R11: 0000000000000202 R12: 0000000000400550
+[  846.431646] R13: 00007ffe8e66c400 R14: 0000000000000000 R15: 0000000000000000
+[  846.431648] ---[ end trace abca54df39d14f5c ]---
+[  846.431651] F2FS-fs (loop0): invalid blkaddr: 1024, type: 5, run fsck to fix.
+[  846.431762] WARNING: CPU: 1 PID: 1249 at fs/f2fs/f2fs.h:2697 f2fs_iget+0xd17/0xe70
+[  846.431763] Modules linked in: snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hda_core snd_hwdep snd_pcm snd_timer snd input_leds joydev soundcore serio_raw i2c_piix4 mac_hid ib_iser rdma_cm iw_cm ib_cm ib_core configfs iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi autofs4 raid10 raid456 libcrc32c async_raid6_recov async_memcpy async_pq async_xor xor async_tx raid6_pq raid1 raid0 multipath linear qxl ttm crct10dif_pclmul crc32_pclmul drm_kms_helper ghash_clmulni_intel syscopyarea sysfillrect sysimgblt fb_sys_fops pcbc drm 8139too aesni_intel 8139cp floppy psmouse mii aes_x86_64 crypto_simd pata_acpi cryptd glue_helper
+[  846.431797] CPU: 1 PID: 1249 Comm: a.out Tainted: G        W         4.18.0-rc3+ #1
+[  846.431798] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014
+[  846.431800] RIP: 0010:f2fs_iget+0xd17/0xe70
+[  846.431801] Code: ff ff 48 63 d8 e9 e1 f6 ff ff 48 8b 45 c8 41 b8 05 00 00 00 48 c7 c2 d8 e8 0e 8b 48 c7 c6 1d b0 0a 8b 48 8b 38 e8 f9 b4 00 00 <0f> 0b 48 8b 45 c8 f0 80 48 48 04 e9 d8 f9 ff ff 0f 0b 48 8b 43 18
+[  846.431832] RSP: 0018:ffff961c414a7bd0 EFLAGS: 00010282
+[  846.431834] RAX: 0000000000000000 RBX: ffffc5f787b8ea80 RCX: 0000000000000006
+[  846.431835] RDX: 0000000000000000 RSI: 0000000000000096 RDI: ffff89dfffd165d0
+[  846.431836] RBP: ffff961c414a7c20 R08: 0000000000000000 R09: 0000000000000273
+[  846.431837] R10: 0000000000000000 R11: ffff89dfad50ca60 R12: 0000000000000007
+[  846.431838] R13: ffff89dff5492800 R14: ffff89dfae3aa000 R15: ffff89dff4ff88d0
+[  846.431840] FS:  00007f882e2fb700(0000) GS:ffff89dfffd00000(0000) knlGS:0000000000000000
+[  846.431841] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+[  846.431842] CR2: 0000000001a88008 CR3: 00000001eb572000 CR4: 00000000000006e0
+[  846.431846] Call Trace:
+[  846.431850]  ? f2fs_find_entry+0x71/0x90
+[  846.431853]  f2fs_lookup+0x1aa/0x390
+[  846.431856]  __lookup_slow+0x97/0x150
+[  846.431858]  lookup_slow+0x35/0x50
+[  846.431874]  walk_component+0x1c6/0x470
+[  846.431878]  ? memcg_kmem_charge_memcg+0x70/0x90
+[  846.431880]  ? page_add_file_rmap+0x13/0x200
+[  846.431882]  path_lookupat+0x76/0x230
+[  846.431884]  ? __alloc_pages_nodemask+0xfc/0x280
+[  846.431886]  filename_lookup+0xb8/0x1a0
+[  846.431890]  ? _cond_resched+0x16/0x40
+[  846.431891]  ? kmem_cache_alloc+0x160/0x1d0
+[  846.431894]  ? path_listxattr+0x41/0xa0
+[  846.431896]  path_listxattr+0x41/0xa0
+[  846.431898]  do_syscall_64+0x55/0x100
+[  846.431901]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
+[  846.431902] RIP: 0033:0x7f882de1c0d7
+[  846.431903] Code: f0 ff ff 73 01 c3 48 8b 0d be dd 2b 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 b8 c2 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 91 dd 2b 00 f7 d8 64 89 01 48
+[  846.431934] RSP: 002b:00007ffe8e66c238 EFLAGS: 00000202 ORIG_RAX: 00000000000000c2
+[  846.431936] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f882de1c0d7
+[  846.431937] RDX: 0000000000000071 RSI: 00007ffe8e66c280 RDI: 0000000001a880c0
+[  846.431939] RBP: 00007ffe8e66c300 R08: 0000000001a88010 R09: 0000000000000000
+[  846.431940] R10: 00000000000001ab R11: 0000000000000202 R12: 0000000000400550
+[  846.431941] R13: 00007ffe8e66c400 R14: 0000000000000000 R15: 0000000000000000
+[  846.431943] ---[ end trace abca54df39d14f5d ]---
+[  846.432033] F2FS-fs (loop0): access invalid blkaddr:1024
+[  846.432051] WARNING: CPU: 1 PID: 1249 at fs/f2fs/checkpoint.c:154 f2fs_is_valid_blkaddr+0x10f/0x160
+[  846.432051] Modules linked in: snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hda_core snd_hwdep snd_pcm snd_timer snd input_leds joydev soundcore serio_raw i2c_piix4 mac_hid ib_iser rdma_cm iw_cm ib_cm ib_core configfs iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi autofs4 raid10 raid456 libcrc32c async_raid6_recov async_memcpy async_pq async_xor xor async_tx raid6_pq raid1 raid0 multipath linear qxl ttm crct10dif_pclmul crc32_pclmul drm_kms_helper ghash_clmulni_intel syscopyarea sysfillrect sysimgblt fb_sys_fops pcbc drm 8139too aesni_intel 8139cp floppy psmouse mii aes_x86_64 crypto_simd pata_acpi cryptd glue_helper
+[  846.432085] CPU: 1 PID: 1249 Comm: a.out Tainted: G        W         4.18.0-rc3+ #1
+[  846.432086] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014
+[  846.432089] RIP: 0010:f2fs_is_valid_blkaddr+0x10f/0x160
+[  846.432089] Code: 00 eb ed 31 c0 83 fa 05 75 ae 48 83 ec 08 48 8b 3f 89 f1 48 c7 c2 fc 0b 0f 8b 48 c7 c6 8b d7 09 8b 88 44 24 07 e8 61 8b ff ff <0f> 0b 0f b6 44 24 07 48 83 c4 08 eb 81 4c 8b 47 10 8b 8f 38 04 00
+[  846.432120] RSP: 0018:ffff961c414a7900 EFLAGS: 00010286
+[  846.432122] RAX: 0000000000000000 RBX: 0000000000000400 RCX: 0000000000000006
+[  846.432123] RDX: 0000000000000000 RSI: 0000000000000096 RDI: ffff89dfffd165d0
+[  846.432124] RBP: ffff89dff5492800 R08: 0000000000000001 R09: 000000000000029d
+[  846.432125] R10: ffff961c414a7820 R11: 000000000000029d R12: 0000000000000400
+[  846.432126] R13: 0000000000000000 R14: ffff89dff4ff88d0 R15: 0000000000000000
+[  846.432128] FS:  00007f882e2fb700(0000) GS:ffff89dfffd00000(0000) knlGS:0000000000000000
+[  846.432130] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+[  846.432131] CR2: 0000000001a88008 CR3: 00000001eb572000 CR4: 00000000000006e0
+[  846.432135] Call Trace:
+[  846.432151]  f2fs_wait_on_block_writeback+0x20/0x110
+[  846.432158]  f2fs_grab_read_bio+0xbc/0xe0
+[  846.432161]  f2fs_submit_page_read+0x21/0x280
+[  846.432163]  f2fs_get_read_data_page+0xb7/0x3c0
+[  846.432165]  f2fs_get_lock_data_page+0x29/0x1e0
+[  846.432167]  f2fs_get_new_data_page+0x148/0x550
+[  846.432170]  f2fs_add_regular_entry+0x1d2/0x550
+[  846.432178]  ? __switch_to+0x12f/0x460
+[  846.432181]  f2fs_add_dentry+0x6a/0xd0
+[  846.432184]  f2fs_do_add_link+0xe9/0x140
+[  846.432186]  __recover_dot_dentries+0x260/0x280
+[  846.432189]  f2fs_lookup+0x343/0x390
+[  846.432193]  __lookup_slow+0x97/0x150
+[  846.432195]  lookup_slow+0x35/0x50
+[  846.432208]  walk_component+0x1c6/0x470
+[  846.432212]  ? memcg_kmem_charge_memcg+0x70/0x90
+[  846.432215]  ? page_add_file_rmap+0x13/0x200
+[  846.432217]  path_lookupat+0x76/0x230
+[  846.432219]  ? __alloc_pages_nodemask+0xfc/0x280
+[  846.432221]  filename_lookup+0xb8/0x1a0
+[  846.432224]  ? _cond_resched+0x16/0x40
+[  846.432226]  ? kmem_cache_alloc+0x160/0x1d0
+[  846.432228]  ? path_listxattr+0x41/0xa0
+[  846.432230]  path_listxattr+0x41/0xa0
+[  846.432233]  do_syscall_64+0x55/0x100
+[  846.432235]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
+[  846.432237] RIP: 0033:0x7f882de1c0d7
+[  846.432237] Code: f0 ff ff 73 01 c3 48 8b 0d be dd 2b 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 b8 c2 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 91 dd 2b 00 f7 d8 64 89 01 48
+[  846.432269] RSP: 002b:00007ffe8e66c238 EFLAGS: 00000202 ORIG_RAX: 00000000000000c2
+[  846.432271] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f882de1c0d7
+[  846.432272] RDX: 0000000000000071 RSI: 00007ffe8e66c280 RDI: 0000000001a880c0
+[  846.432273] RBP: 00007ffe8e66c300 R08: 0000000001a88010 R09: 0000000000000000
+[  846.432274] R10: 00000000000001ab R11: 0000000000000202 R12: 0000000000400550
+[  846.432275] R13: 00007ffe8e66c400 R14: 0000000000000000 R15: 0000000000000000
+[  846.432277] ---[ end trace abca54df39d14f5e ]---
+[  846.432279] F2FS-fs (loop0): invalid blkaddr: 1024, type: 5, run fsck to fix.
+[  846.432376] WARNING: CPU: 1 PID: 1249 at fs/f2fs/f2fs.h:2697 f2fs_wait_on_block_writeback+0xb1/0x110
+[  846.432376] Modules linked in: snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hda_core snd_hwdep snd_pcm snd_timer snd input_leds joydev soundcore serio_raw i2c_piix4 mac_hid ib_iser rdma_cm iw_cm ib_cm ib_core configfs iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi autofs4 raid10 raid456 libcrc32c async_raid6_recov async_memcpy async_pq async_xor xor async_tx raid6_pq raid1 raid0 multipath linear qxl ttm crct10dif_pclmul crc32_pclmul drm_kms_helper ghash_clmulni_intel syscopyarea sysfillrect sysimgblt fb_sys_fops pcbc drm 8139too aesni_intel 8139cp floppy psmouse mii aes_x86_64 crypto_simd pata_acpi cryptd glue_helper
+[  846.432410] CPU: 1 PID: 1249 Comm: a.out Tainted: G        W         4.18.0-rc3+ #1
+[  846.432411] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014
+[  846.432413] RIP: 0010:f2fs_wait_on_block_writeback+0xb1/0x110
+[  846.432414] Code: 66 90 f0 ff 4b 34 74 59 5b 5d c3 48 8b 7d 00 41 b8 05 00 00 00 89 d9 48 c7 c2 d8 e8 0e 8b 48 c7 c6 1d b0 0a 8b e8 df bc fd ff <0f> 0b f0 80 4d 48 04 e9 67 ff ff ff 48 8b 03 48 c1 e8 37 83 e0 07
+[  846.432445] RSP: 0018:ffff961c414a7910 EFLAGS: 00010286
+[  846.432447] RAX: 0000000000000000 RBX: 0000000000000400 RCX: 0000000000000006
+[  846.432448] RDX: 0000000000000000 RSI: 0000000000000092 RDI: ffff89dfffd165d0
+[  846.432449] RBP: ffff89dff5492800 R08: 0000000000000000 R09: 00000000000002d1
+[  846.432450] R10: ffff961c414a7820 R11: ffff89dfad50cf80 R12: 0000000000000400
+[  846.432451] R13: 0000000000000000 R14: ffff89dff4ff88d0 R15: 0000000000000000
+[  846.432453] FS:  00007f882e2fb700(0000) GS:ffff89dfffd00000(0000) knlGS:0000000000000000
+[  846.432454] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+[  846.432455] CR2: 0000000001a88008 CR3: 00000001eb572000 CR4: 00000000000006e0
+[  846.432459] Call Trace:
+[  846.432463]  f2fs_grab_read_bio+0xbc/0xe0
+[  846.432464]  f2fs_submit_page_read+0x21/0x280
+[  846.432466]  f2fs_get_read_data_page+0xb7/0x3c0
+[  846.432468]  f2fs_get_lock_data_page+0x29/0x1e0
+[  846.432470]  f2fs_get_new_data_page+0x148/0x550
+[  846.432473]  f2fs_add_regular_entry+0x1d2/0x550
+[  846.432475]  ? __switch_to+0x12f/0x460
+[  846.432477]  f2fs_add_dentry+0x6a/0xd0
+[  846.432480]  f2fs_do_add_link+0xe9/0x140
+[  846.432483]  __recover_dot_dentries+0x260/0x280
+[  846.432485]  f2fs_lookup+0x343/0x390
+[  846.432488]  __lookup_slow+0x97/0x150
+[  846.432490]  lookup_slow+0x35/0x50
+[  846.432505]  walk_component+0x1c6/0x470
+[  846.432509]  ? memcg_kmem_charge_memcg+0x70/0x90
+[  846.432511]  ? page_add_file_rmap+0x13/0x200
+[  846.432513]  path_lookupat+0x76/0x230
+[  846.432515]  ? __alloc_pages_nodemask+0xfc/0x280
+[  846.432517]  filename_lookup+0xb8/0x1a0
+[  846.432520]  ? _cond_resched+0x16/0x40
+[  846.432522]  ? kmem_cache_alloc+0x160/0x1d0
+[  846.432525]  ? path_listxattr+0x41/0xa0
+[  846.432526]  path_listxattr+0x41/0xa0
+[  846.432529]  do_syscall_64+0x55/0x100
+[  846.432531]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
+[  846.432533] RIP: 0033:0x7f882de1c0d7
+[  846.432533] Code: f0 ff ff 73 01 c3 48 8b 0d be dd 2b 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 b8 c2 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 91 dd 2b 00 f7 d8 64 89 01 48
+[  846.432565] RSP: 002b:00007ffe8e66c238 EFLAGS: 00000202 ORIG_RAX: 00000000000000c2
+[  846.432567] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f882de1c0d7
+[  846.432568] RDX: 0000000000000071 RSI: 00007ffe8e66c280 RDI: 0000000001a880c0
+[  846.432569] RBP: 00007ffe8e66c300 R08: 0000000001a88010 R09: 0000000000000000
+[  846.432570] R10: 00000000000001ab R11: 0000000000000202 R12: 0000000000400550
+[  846.432571] R13: 00007ffe8e66c400 R14: 0000000000000000 R15: 0000000000000000
+[  846.432573] ---[ end trace abca54df39d14f5f ]---
+[  846.434280] BUG: unable to handle kernel NULL pointer dereference at 0000000000000008
+[  846.434424] PGD 80000001ebd3a067 P4D 80000001ebd3a067 PUD 1eb1ae067 PMD 0
+[  846.434551] Oops: 0000 [#1] SMP PTI
+[  846.434697] CPU: 0 PID: 44 Comm: kworker/u5:0 Tainted: G        W         4.18.0-rc3+ #1
+[  846.434805] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014
+[  846.435000] Workqueue: fscrypt_read_queue decrypt_work
+[  846.435174] RIP: 0010:fscrypt_do_page_crypto+0x6e/0x2d0
+[  846.435351] Code: 00 65 48 8b 04 25 28 00 00 00 48 89 84 24 88 00 00 00 31 c0 e8 43 c2 e0 ff 49 8b 86 48 02 00 00 85 ed c7 44 24 70 00 00 00 00 <48> 8b 58 08 0f 84 14 02 00 00 48 8b 78 10 48 8b 0c 24 48 c7 84 24
+[  846.435696] RSP: 0018:ffff961c40f9bd60 EFLAGS: 00010206
+[  846.435870] RAX: 0000000000000000 RBX: ffffc5f787719b80 RCX: ffffc5f787719b80
+[  846.436051] RDX: ffffffff8b9f4b88 RSI: ffffffff8b0ae622 RDI: ffff961c40f9bdb8
+[  846.436261] RBP: 0000000000001000 R08: ffffc5f787719b80 R09: 0000000000001000
+[  846.436433] R10: 0000000000000018 R11: fefefefefefefeff R12: ffffc5f787719b80
+[  846.436562] R13: ffffc5f787719b80 R14: ffff89dff4ff88d0 R15: 0ffff89dfaddee60
+[  846.436658] FS:  0000000000000000(0000) GS:ffff89dfffc00000(0000) knlGS:0000000000000000
+[  846.436758] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+[  846.436898] CR2: 0000000000000008 CR3: 00000001eddd0000 CR4: 00000000000006f0
+[  846.437001] Call Trace:
+[  846.437181]  ? check_preempt_wakeup+0xf2/0x230
+[  846.437276]  ? check_preempt_curr+0x7c/0x90
+[  846.437370]  fscrypt_decrypt_page+0x48/0x4d
+[  846.437466]  __fscrypt_decrypt_bio+0x5b/0x90
+[  846.437542]  decrypt_work+0x12/0x20
+[  846.437651]  process_one_work+0x15e/0x3d0
+[  846.437740]  worker_thread+0x4c/0x440
+[  846.437848]  kthread+0xf8/0x130
+[  846.437938]  ? rescuer_thread+0x350/0x350
+[  846.438022]  ? kthread_associate_blkcg+0x90/0x90
+[  846.438117]  ret_from_fork+0x35/0x40
+[  846.438201] Modules linked in: snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hda_core snd_hwdep snd_pcm snd_timer snd input_leds joydev soundcore serio_raw i2c_piix4 mac_hid ib_iser rdma_cm iw_cm ib_cm ib_core configfs iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi autofs4 raid10 raid456 libcrc32c async_raid6_recov async_memcpy async_pq async_xor xor async_tx raid6_pq raid1 raid0 multipath linear qxl ttm crct10dif_pclmul crc32_pclmul drm_kms_helper ghash_clmulni_intel syscopyarea sysfillrect sysimgblt fb_sys_fops pcbc drm 8139too aesni_intel 8139cp floppy psmouse mii aes_x86_64 crypto_simd pata_acpi cryptd glue_helper
+[  846.438653] CR2: 0000000000000008
+[  846.438713] ---[ end trace abca54df39d14f60 ]---
+[  846.438796] RIP: 0010:fscrypt_do_page_crypto+0x6e/0x2d0
+[  846.438844] Code: 00 65 48 8b 04 25 28 00 00 00 48 89 84 24 88 00 00 00 31 c0 e8 43 c2 e0 ff 49 8b 86 48 02 00 00 85 ed c7 44 24 70 00 00 00 00 <48> 8b 58 08 0f 84 14 02 00 00 48 8b 78 10 48 8b 0c 24 48 c7 84 24
+[  846.439084] RSP: 0018:ffff961c40f9bd60 EFLAGS: 00010206
+[  846.439176] RAX: 0000000000000000 RBX: ffffc5f787719b80 RCX: ffffc5f787719b80
+[  846.440927] RDX: ffffffff8b9f4b88 RSI: ffffffff8b0ae622 RDI: ffff961c40f9bdb8
+[  846.442083] RBP: 0000000000001000 R08: ffffc5f787719b80 R09: 0000000000001000
+[  846.443284] R10: 0000000000000018 R11: fefefefefefefeff R12: ffffc5f787719b80
+[  846.444448] R13: ffffc5f787719b80 R14: ffff89dff4ff88d0 R15: 0ffff89dfaddee60
+[  846.445558] FS:  0000000000000000(0000) GS:ffff89dfffc00000(0000) knlGS:0000000000000000
+[  846.446687] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+[  846.447796] CR2: 0000000000000008 CR3: 00000001eddd0000 CR4: 00000000000006f0
+
+- Location
+https://elixir.bootlin.com/linux/v4.18-rc4/source/fs/crypto/crypto.c#L149
+       struct crypto_skcipher *tfm = ci->ci_ctfm;
+Here ci can be NULL
+
+Note that this issue maybe require CONFIG_F2FS_FS_ENCRYPTION=y to reproduce.
+
+Reported-by Wen Xu <wen.xu@gatech.edu>
+Signed-off-by: Chao Yu <yuchao0@huawei.com>
+Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
+[bwh: Backported to 4.9: adjust context]
+Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/f2fs/data.c  |    3 +++
+ fs/f2fs/inode.c |   18 +++++++++++++-----
+ 2 files changed, 16 insertions(+), 5 deletions(-)
+
+--- a/fs/f2fs/data.c
++++ b/fs/f2fs/data.c
+@@ -995,6 +995,9 @@ static struct bio *f2fs_grab_bio(struct
+       struct block_device *bdev = sbi->sb->s_bdev;
+       struct bio *bio;
+ 
++      if (!f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC))
++              return ERR_PTR(-EFAULT);
++
+       if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) {
+               ctx = fscrypt_get_ctx(inode, GFP_NOFS);
+               if (IS_ERR(ctx))
+--- a/fs/f2fs/inode.c
++++ b/fs/f2fs/inode.c
+@@ -59,14 +59,16 @@ static void __get_inode_rdev(struct inod
+       }
+ }
+ 
+-static bool __written_first_block(struct f2fs_sb_info *sbi,
++static int __written_first_block(struct f2fs_sb_info *sbi,
+                                       struct f2fs_inode *ri)
+ {
+       block_t addr = le32_to_cpu(ri->i_addr[0]);
+ 
+-      if (is_valid_data_blkaddr(sbi, addr))
+-              return true;
+-      return false;
++      if (!__is_valid_data_blkaddr(addr))
++              return 1;
++      if (!f2fs_is_valid_blkaddr(sbi, addr, DATA_GENERIC))
++              return -EFAULT;
++      return 0;
+ }
+ 
+ static void __set_inode_rdev(struct inode *inode, struct f2fs_inode *ri)
+@@ -154,6 +156,7 @@ static int do_read_inode(struct inode *i
+       struct f2fs_inode_info *fi = F2FS_I(inode);
+       struct page *node_page;
+       struct f2fs_inode *ri;
++      int err;
+ 
+       /* Check if ino is within scope */
+       if (check_nid_range(sbi, inode->i_ino)) {
+@@ -209,7 +212,12 @@ static int do_read_inode(struct inode *i
+       /* get rdev by using inline_info */
+       __get_inode_rdev(inode, ri);
+ 
+-      if (__written_first_block(sbi, ri))
++      err = __written_first_block(sbi, ri);
++      if (err < 0) {
++              f2fs_put_page(node_page, 1);
++              return err;
++      }
++      if (!err)
+               set_inode_flag(inode, FI_FIRST_BLOCK_WRITTEN);
+ 
+       if (!need_inode_block_update(sbi, inode->i_ino))
diff --git a/queue-4.9/f2fs-fix-to-do-sanity-check-with-block-address-in-main-area.patch b/queue-4.9/f2fs-fix-to-do-sanity-check-with-block-address-in-main-area.patch

new file mode 100644 (file)

index 0000000..7c56989
--- /dev/null
+++ b/queue-4.9/f2fs-fix-to-do-sanity-check-with-block-address-in-main-area.patch
@@ -0,0 +1,489 @@
+From foo@baz Thu Dec  6 15:27:31 CET 2018
+From: Chao Yu <yuchao0@huawei.com>
+Date: Wed, 1 Aug 2018 19:13:44 +0800
+Subject: f2fs: fix to do sanity check with block address in main area
+
+From: Chao Yu <yuchao0@huawei.com>
+
+commit c9b60788fc760d136211853f10ce73dc152d1f4a upstream.
+
+This patch add to do sanity check with below field:
+- cp_pack_total_block_count
+- blkaddr of data/node
+- extent info
+
+- Overview
+BUG() in verify_block_addr() when writing to a corrupted f2fs image
+
+- Reproduce (4.18 upstream kernel)
+
+- POC (poc.c)
+
+static void activity(char *mpoint) {
+
+  char *foo_bar_baz;
+  int err;
+
+  static int buf[8192];
+  memset(buf, 0, sizeof(buf));
+
+  err = asprintf(&foo_bar_baz, "%s/foo/bar/baz", mpoint);
+
+  int fd = open(foo_bar_baz, O_RDWR | O_TRUNC, 0777);
+  if (fd >= 0) {
+    write(fd, (char *)buf, sizeof(buf));
+    fdatasync(fd);
+    close(fd);
+  }
+}
+
+int main(int argc, char *argv[]) {
+  activity(argv[1]);
+  return 0;
+}
+
+- Kernel message
+[  689.349473] F2FS-fs (loop0): Mounted with checkpoint version = 3
+[  699.728662] WARNING: CPU: 0 PID: 1309 at fs/f2fs/segment.c:2860 f2fs_inplace_write_data+0x232/0x240
+[  699.728670] Modules linked in: snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hwdep snd_hda_core snd_pcm snd_timer snd mac_hid i2c_piix4 soundcore ib_iser rdma_cm iw_cm ib_cm ib_core iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx raid1 raid0 multipath linear 8139too crct10dif_pclmul crc32_pclmul qxl drm_kms_helper syscopyarea aesni_intel sysfillrect sysimgblt fb_sys_fops ttm drm aes_x86_64 crypto_simd cryptd 8139cp glue_helper mii pata_acpi floppy
+[  699.729056] CPU: 0 PID: 1309 Comm: a.out Not tainted 4.18.0-rc1+ #4
+[  699.729064] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014
+[  699.729074] RIP: 0010:f2fs_inplace_write_data+0x232/0x240
+[  699.729076] Code: ff e9 cf fe ff ff 49 8d 7d 10 e8 39 45 ad ff 4d 8b 7d 10 be 04 00 00 00 49 8d 7f 48 e8 07 49 ad ff 45 8b 7f 48 e9 fb fe ff ff <0f> 0b f0 41 80 4d 48 04 e9 65 fe ff ff 90 66 66 66 66 90 55 48 8d
+[  699.729130] RSP: 0018:ffff8801f43af568 EFLAGS: 00010202
+[  699.729139] RAX: 000000000000003f RBX: ffff8801f43af7b8 RCX: ffffffffb88c9113
+[  699.729142] RDX: 0000000000000003 RSI: dffffc0000000000 RDI: ffff8802024e5540
+[  699.729144] RBP: ffff8801f43af590 R08: 0000000000000009 R09: ffffffffffffffe8
+[  699.729147] R10: 0000000000000001 R11: ffffed0039b0596a R12: ffff8802024e5540
+[  699.729149] R13: ffff8801f0335500 R14: ffff8801e3e7a700 R15: ffff8801e1ee4450
+[  699.729154] FS:  00007f9bf97f5700(0000) GS:ffff8801f6e00000(0000) knlGS:0000000000000000
+[  699.729156] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+[  699.729159] CR2: 00007f9bf925d170 CR3: 00000001f0c34000 CR4: 00000000000006f0
+[  699.729171] Call Trace:
+[  699.729192]  f2fs_do_write_data_page+0x2e2/0xe00
+[  699.729203]  ? f2fs_should_update_outplace+0xd0/0xd0
+[  699.729238]  ? memcg_drain_all_list_lrus+0x280/0x280
+[  699.729269]  ? __radix_tree_replace+0xa3/0x120
+[  699.729276]  __write_data_page+0x5c7/0xe30
+[  699.729291]  ? kasan_check_read+0x11/0x20
+[  699.729310]  ? page_mapped+0x8a/0x110
+[  699.729321]  ? page_mkclean+0xe9/0x160
+[  699.729327]  ? f2fs_do_write_data_page+0xe00/0xe00
+[  699.729331]  ? invalid_page_referenced_vma+0x130/0x130
+[  699.729345]  ? clear_page_dirty_for_io+0x332/0x450
+[  699.729351]  f2fs_write_cache_pages+0x4ca/0x860
+[  699.729358]  ? __write_data_page+0xe30/0xe30
+[  699.729374]  ? percpu_counter_add_batch+0x22/0xa0
+[  699.729380]  ? kasan_check_write+0x14/0x20
+[  699.729391]  ? _raw_spin_lock+0x17/0x40
+[  699.729403]  ? f2fs_mark_inode_dirty_sync.part.18+0x16/0x30
+[  699.729413]  ? iov_iter_advance+0x113/0x640
+[  699.729418]  ? f2fs_write_end+0x133/0x2e0
+[  699.729423]  ? balance_dirty_pages_ratelimited+0x239/0x640
+[  699.729428]  f2fs_write_data_pages+0x329/0x520
+[  699.729433]  ? generic_perform_write+0x250/0x320
+[  699.729438]  ? f2fs_write_cache_pages+0x860/0x860
+[  699.729454]  ? current_time+0x110/0x110
+[  699.729459]  ? f2fs_preallocate_blocks+0x1ef/0x370
+[  699.729464]  do_writepages+0x37/0xb0
+[  699.729468]  ? f2fs_write_cache_pages+0x860/0x860
+[  699.729472]  ? do_writepages+0x37/0xb0
+[  699.729478]  __filemap_fdatawrite_range+0x19a/0x1f0
+[  699.729483]  ? delete_from_page_cache_batch+0x4e0/0x4e0
+[  699.729496]  ? __vfs_write+0x2b2/0x410
+[  699.729501]  file_write_and_wait_range+0x66/0xb0
+[  699.729506]  f2fs_do_sync_file+0x1f9/0xd90
+[  699.729511]  ? truncate_partial_data_page+0x290/0x290
+[  699.729521]  ? __sb_end_write+0x30/0x50
+[  699.729526]  ? vfs_write+0x20f/0x260
+[  699.729530]  f2fs_sync_file+0x9a/0xb0
+[  699.729534]  ? f2fs_do_sync_file+0xd90/0xd90
+[  699.729548]  vfs_fsync_range+0x68/0x100
+[  699.729554]  ? __fget_light+0xc9/0xe0
+[  699.729558]  do_fsync+0x3d/0x70
+[  699.729562]  __x64_sys_fdatasync+0x24/0x30
+[  699.729585]  do_syscall_64+0x78/0x170
+[  699.729595]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
+[  699.729613] RIP: 0033:0x7f9bf930d800
+[  699.729615] Code: 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 83 3d 49 bf 2c 00 00 75 10 b8 4b 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 31 c3 48 83 ec 08 e8 be 78 01 00 48 89 04 24
+[  699.729668] RSP: 002b:00007ffee3606c68 EFLAGS: 00000246 ORIG_RAX: 000000000000004b
+[  699.729673] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f9bf930d800
+[  699.729675] RDX: 0000000000008000 RSI: 00000000006010a0 RDI: 0000000000000003
+[  699.729678] RBP: 00007ffee3606ca0 R08: 0000000001503010 R09: 0000000000000000
+[  699.729680] R10: 00000000000002e8 R11: 0000000000000246 R12: 0000000000400610
+[  699.729683] R13: 00007ffee3606da0 R14: 0000000000000000 R15: 0000000000000000
+[  699.729687] ---[ end trace 4ce02f25ff7d3df5 ]---
+[  699.729782] ------------[ cut here ]------------
+[  699.729785] kernel BUG at fs/f2fs/segment.h:654!
+[  699.731055] invalid opcode: 0000 [#1] SMP KASAN PTI
+[  699.732104] CPU: 0 PID: 1309 Comm: a.out Tainted: G        W         4.18.0-rc1+ #4
+[  699.733684] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014
+[  699.735611] RIP: 0010:f2fs_submit_page_bio+0x29b/0x730
+[  699.736649] Code: 54 49 8d bd 18 04 00 00 e8 b2 59 af ff 41 8b 8d 18 04 00 00 8b 45 b8 41 d3 e6 44 01 f0 4c 8d 73 14 41 39 c7 0f 82 37 fe ff ff <0f> 0b 65 8b 05 2c 04 77 47 89 c0 48 0f a3 05 52 c1 d5 01 0f 92 c0
+[  699.740524] RSP: 0018:ffff8801f43af508 EFLAGS: 00010283
+[  699.741573] RAX: 0000000000000000 RBX: ffff8801f43af7b8 RCX: ffffffffb88a7cef
+[  699.743006] RDX: 0000000000000007 RSI: dffffc0000000000 RDI: ffff8801e3e7a64c
+[  699.744426] RBP: ffff8801f43af558 R08: ffffed003e066b55 R09: ffffed003e066b55
+[  699.745833] R10: 0000000000000001 R11: ffffed003e066b54 R12: ffffea0007876940
+[  699.747256] R13: ffff8801f0335500 R14: ffff8801e3e7a600 R15: 0000000000000001
+[  699.748683] FS:  00007f9bf97f5700(0000) GS:ffff8801f6e00000(0000) knlGS:0000000000000000
+[  699.750293] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+[  699.751462] CR2: 00007f9bf925d170 CR3: 00000001f0c34000 CR4: 00000000000006f0
+[  699.752874] Call Trace:
+[  699.753386]  ? f2fs_inplace_write_data+0x93/0x240
+[  699.754341]  f2fs_inplace_write_data+0xd2/0x240
+[  699.755271]  f2fs_do_write_data_page+0x2e2/0xe00
+[  699.756214]  ? f2fs_should_update_outplace+0xd0/0xd0
+[  699.757215]  ? memcg_drain_all_list_lrus+0x280/0x280
+[  699.758209]  ? __radix_tree_replace+0xa3/0x120
+[  699.759164]  __write_data_page+0x5c7/0xe30
+[  699.760002]  ? kasan_check_read+0x11/0x20
+[  699.760823]  ? page_mapped+0x8a/0x110
+[  699.761573]  ? page_mkclean+0xe9/0x160
+[  699.762345]  ? f2fs_do_write_data_page+0xe00/0xe00
+[  699.763332]  ? invalid_page_referenced_vma+0x130/0x130
+[  699.764374]  ? clear_page_dirty_for_io+0x332/0x450
+[  699.765347]  f2fs_write_cache_pages+0x4ca/0x860
+[  699.766276]  ? __write_data_page+0xe30/0xe30
+[  699.767161]  ? percpu_counter_add_batch+0x22/0xa0
+[  699.768112]  ? kasan_check_write+0x14/0x20
+[  699.768951]  ? _raw_spin_lock+0x17/0x40
+[  699.769739]  ? f2fs_mark_inode_dirty_sync.part.18+0x16/0x30
+[  699.770885]  ? iov_iter_advance+0x113/0x640
+[  699.771743]  ? f2fs_write_end+0x133/0x2e0
+[  699.772569]  ? balance_dirty_pages_ratelimited+0x239/0x640
+[  699.773680]  f2fs_write_data_pages+0x329/0x520
+[  699.774603]  ? generic_perform_write+0x250/0x320
+[  699.775544]  ? f2fs_write_cache_pages+0x860/0x860
+[  699.776510]  ? current_time+0x110/0x110
+[  699.777299]  ? f2fs_preallocate_blocks+0x1ef/0x370
+[  699.778279]  do_writepages+0x37/0xb0
+[  699.779026]  ? f2fs_write_cache_pages+0x860/0x860
+[  699.779978]  ? do_writepages+0x37/0xb0
+[  699.780755]  __filemap_fdatawrite_range+0x19a/0x1f0
+[  699.781746]  ? delete_from_page_cache_batch+0x4e0/0x4e0
+[  699.782820]  ? __vfs_write+0x2b2/0x410
+[  699.783597]  file_write_and_wait_range+0x66/0xb0
+[  699.784540]  f2fs_do_sync_file+0x1f9/0xd90
+[  699.785381]  ? truncate_partial_data_page+0x290/0x290
+[  699.786415]  ? __sb_end_write+0x30/0x50
+[  699.787204]  ? vfs_write+0x20f/0x260
+[  699.787941]  f2fs_sync_file+0x9a/0xb0
+[  699.788694]  ? f2fs_do_sync_file+0xd90/0xd90
+[  699.789572]  vfs_fsync_range+0x68/0x100
+[  699.790360]  ? __fget_light+0xc9/0xe0
+[  699.791128]  do_fsync+0x3d/0x70
+[  699.791779]  __x64_sys_fdatasync+0x24/0x30
+[  699.792614]  do_syscall_64+0x78/0x170
+[  699.793371]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
+[  699.794406] RIP: 0033:0x7f9bf930d800
+[  699.795134] Code: 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 83 3d 49 bf 2c 00 00 75 10 b8 4b 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 31 c3 48 83 ec 08 e8 be 78 01 00 48 89 04 24
+[  699.798960] RSP: 002b:00007ffee3606c68 EFLAGS: 00000246 ORIG_RAX: 000000000000004b
+[  699.800483] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f9bf930d800
+[  699.801923] RDX: 0000000000008000 RSI: 00000000006010a0 RDI: 0000000000000003
+[  699.803373] RBP: 00007ffee3606ca0 R08: 0000000001503010 R09: 0000000000000000
+[  699.804798] R10: 00000000000002e8 R11: 0000000000000246 R12: 0000000000400610
+[  699.806233] R13: 00007ffee3606da0 R14: 0000000000000000 R15: 0000000000000000
+[  699.807667] Modules linked in: snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hwdep snd_hda_core snd_pcm snd_timer snd mac_hid i2c_piix4 soundcore ib_iser rdma_cm iw_cm ib_cm ib_core iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx raid1 raid0 multipath linear 8139too crct10dif_pclmul crc32_pclmul qxl drm_kms_helper syscopyarea aesni_intel sysfillrect sysimgblt fb_sys_fops ttm drm aes_x86_64 crypto_simd cryptd 8139cp glue_helper mii pata_acpi floppy
+[  699.817079] ---[ end trace 4ce02f25ff7d3df6 ]---
+[  699.818068] RIP: 0010:f2fs_submit_page_bio+0x29b/0x730
+[  699.819114] Code: 54 49 8d bd 18 04 00 00 e8 b2 59 af ff 41 8b 8d 18 04 00 00 8b 45 b8 41 d3 e6 44 01 f0 4c 8d 73 14 41 39 c7 0f 82 37 fe ff ff <0f> 0b 65 8b 05 2c 04 77 47 89 c0 48 0f a3 05 52 c1 d5 01 0f 92 c0
+[  699.822919] RSP: 0018:ffff8801f43af508 EFLAGS: 00010283
+[  699.823977] RAX: 0000000000000000 RBX: ffff8801f43af7b8 RCX: ffffffffb88a7cef
+[  699.825436] RDX: 0000000000000007 RSI: dffffc0000000000 RDI: ffff8801e3e7a64c
+[  699.826881] RBP: ffff8801f43af558 R08: ffffed003e066b55 R09: ffffed003e066b55
+[  699.828292] R10: 0000000000000001 R11: ffffed003e066b54 R12: ffffea0007876940
+[  699.829750] R13: ffff8801f0335500 R14: ffff8801e3e7a600 R15: 0000000000000001
+[  699.831192] FS:  00007f9bf97f5700(0000) GS:ffff8801f6e00000(0000) knlGS:0000000000000000
+[  699.832793] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+[  699.833981] CR2: 00007f9bf925d170 CR3: 00000001f0c34000 CR4: 00000000000006f0
+[  699.835556] ==================================================================
+[  699.837029] BUG: KASAN: stack-out-of-bounds in update_stack_state+0x38c/0x3e0
+[  699.838462] Read of size 8 at addr ffff8801f43af970 by task a.out/1309
+
+[  699.840086] CPU: 0 PID: 1309 Comm: a.out Tainted: G      D W         4.18.0-rc1+ #4
+[  699.841603] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014
+[  699.843475] Call Trace:
+[  699.843982]  dump_stack+0x7b/0xb5
+[  699.844661]  print_address_description+0x70/0x290
+[  699.845607]  kasan_report+0x291/0x390
+[  699.846351]  ? update_stack_state+0x38c/0x3e0
+[  699.853831]  __asan_load8+0x54/0x90
+[  699.854569]  update_stack_state+0x38c/0x3e0
+[  699.855428]  ? __read_once_size_nocheck.constprop.7+0x20/0x20
+[  699.856601]  ? __save_stack_trace+0x5e/0x100
+[  699.857476]  unwind_next_frame.part.5+0x18e/0x490
+[  699.858448]  ? unwind_dump+0x290/0x290
+[  699.859217]  ? clear_page_dirty_for_io+0x332/0x450
+[  699.860185]  __unwind_start+0x106/0x190
+[  699.860974]  __save_stack_trace+0x5e/0x100
+[  699.861808]  ? __save_stack_trace+0x5e/0x100
+[  699.862691]  ? unlink_anon_vmas+0xba/0x2c0
+[  699.863525]  save_stack_trace+0x1f/0x30
+[  699.864312]  save_stack+0x46/0xd0
+[  699.864993]  ? __alloc_pages_slowpath+0x1420/0x1420
+[  699.865990]  ? flush_tlb_mm_range+0x15e/0x220
+[  699.866889]  ? kasan_check_write+0x14/0x20
+[  699.867724]  ? __dec_node_state+0x92/0xb0
+[  699.868543]  ? lock_page_memcg+0x85/0xf0
+[  699.869350]  ? unlock_page_memcg+0x16/0x80
+[  699.870185]  ? page_remove_rmap+0x198/0x520
+[  699.871048]  ? mark_page_accessed+0x133/0x200
+[  699.871930]  ? _cond_resched+0x1a/0x50
+[  699.872700]  ? unmap_page_range+0xcd4/0xe50
+[  699.873551]  ? rb_next+0x58/0x80
+[  699.874217]  ? rb_next+0x58/0x80
+[  699.874895]  __kasan_slab_free+0x13c/0x1a0
+[  699.875734]  ? unlink_anon_vmas+0xba/0x2c0
+[  699.876563]  kasan_slab_free+0xe/0x10
+[  699.877315]  kmem_cache_free+0x89/0x1e0
+[  699.878095]  unlink_anon_vmas+0xba/0x2c0
+[  699.878913]  free_pgtables+0x101/0x1b0
+[  699.879677]  exit_mmap+0x146/0x2a0
+[  699.880378]  ? __ia32_sys_munmap+0x50/0x50
+[  699.881214]  ? kasan_check_read+0x11/0x20
+[  699.882052]  ? mm_update_next_owner+0x322/0x380
+[  699.882985]  mmput+0x8b/0x1d0
+[  699.883602]  do_exit+0x43a/0x1390
+[  699.884288]  ? mm_update_next_owner+0x380/0x380
+[  699.885212]  ? f2fs_sync_file+0x9a/0xb0
+[  699.885995]  ? f2fs_do_sync_file+0xd90/0xd90
+[  699.886877]  ? vfs_fsync_range+0x68/0x100
+[  699.887694]  ? __fget_light+0xc9/0xe0
+[  699.888442]  ? do_fsync+0x3d/0x70
+[  699.889118]  ? __x64_sys_fdatasync+0x24/0x30
+[  699.889996]  rewind_stack_do_exit+0x17/0x20
+[  699.890860] RIP: 0033:0x7f9bf930d800
+[  699.891585] Code: Bad RIP value.
+[  699.892268] RSP: 002b:00007ffee3606c68 EFLAGS: 00000246 ORIG_RAX: 000000000000004b
+[  699.893781] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f9bf930d800
+[  699.895220] RDX: 0000000000008000 RSI: 00000000006010a0 RDI: 0000000000000003
+[  699.896643] RBP: 00007ffee3606ca0 R08: 0000000001503010 R09: 0000000000000000
+[  699.898069] R10: 00000000000002e8 R11: 0000000000000246 R12: 0000000000400610
+[  699.899505] R13: 00007ffee3606da0 R14: 0000000000000000 R15: 0000000000000000
+
+[  699.901241] The buggy address belongs to the page:
+[  699.902215] page:ffffea0007d0ebc0 count:0 mapcount:0 mapping:0000000000000000 index:0x0
+[  699.903811] flags: 0x2ffff0000000000()
+[  699.904585] raw: 02ffff0000000000 0000000000000000 ffffffff07d00101 0000000000000000
+[  699.906125] raw: 0000000000000000 0000000000240000 00000000ffffffff 0000000000000000
+[  699.907673] page dumped because: kasan: bad access detected
+
+[  699.909108] Memory state around the buggy address:
+[  699.910077]  ffff8801f43af800: 00 f1 f1 f1 f1 00 f4 f4 f4 f3 f3 f3 f3 00 00 00
+[  699.911528]  ffff8801f43af880: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
+[  699.912953] >ffff8801f43af900: 00 00 00 00 00 00 00 00 f1 01 f4 f4 f4 f2 f2 f2
+[  699.914392]                                                              ^
+[  699.915758]  ffff8801f43af980: f2 00 f4 f4 00 00 00 00 f2 00 00 00 00 00 00 00
+[  699.917193]  ffff8801f43afa00: 00 00 00 00 00 00 00 00 00 f3 f3 f3 00 00 00 00
+[  699.918634] ==================================================================
+
+- Location
+https://elixir.bootlin.com/linux/v4.18-rc1/source/fs/f2fs/segment.h#L644
+
+Reported-by Wen Xu <wen.xu@gatech.edu>
+Signed-off-by: Chao Yu <yuchao0@huawei.com>
+Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
+[bwh: Backported to 4.9:
+ - Error label is different in validate_checkpoint() due to the earlier
+   backport of "f2fs: fix invalid memory access"
+ - Adjust context]
+Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/f2fs/checkpoint.c |   22 +++++++++++++++++++---
+ fs/f2fs/data.c       |   21 ++++++++++++++++++++-
+ fs/f2fs/f2fs.h       |    3 +++
+ fs/f2fs/file.c       |   12 ++++++++++++
+ fs/f2fs/inode.c      |   16 ++++++++++++++++
+ fs/f2fs/node.c       |    4 ++++
+ fs/f2fs/segment.h    |    3 +--
+ 7 files changed, 75 insertions(+), 6 deletions(-)
+
+--- a/fs/f2fs/checkpoint.c
++++ b/fs/f2fs/checkpoint.c
+@@ -86,8 +86,10 @@ repeat:
+       fio.page = page;
+ 
+       if (f2fs_submit_page_bio(&fio)) {
+-              f2fs_put_page(page, 1);
+-              goto repeat;
++              memset(page_address(page), 0, PAGE_SIZE);
++              f2fs_stop_checkpoint(sbi, false);
++              f2fs_bug_on(sbi, 1);
++              return page;
+       }
+ 
+       lock_page(page);
+@@ -141,8 +143,14 @@ bool f2fs_is_valid_blkaddr(struct f2fs_s
+       case META_POR:
+       case DATA_GENERIC:
+               if (unlikely(blkaddr >= MAX_BLKADDR(sbi) ||
+-                      blkaddr < MAIN_BLKADDR(sbi)))
++                      blkaddr < MAIN_BLKADDR(sbi))) {
++                      if (type == DATA_GENERIC) {
++                              f2fs_msg(sbi->sb, KERN_WARNING,
++                                      "access invalid blkaddr:%u", blkaddr);
++                              WARN_ON(1);
++                      }
+                       return false;
++              }
+               break;
+       case META_GENERIC:
+               if (unlikely(blkaddr < SEG0_BLKADDR(sbi) ||
+@@ -715,6 +723,14 @@ static struct page *validate_checkpoint(
+                                       &cp_page_1, version);
+       if (err)
+               return NULL;
++
++      if (le32_to_cpu(cp_block->cp_pack_total_block_count) >
++                                      sbi->blocks_per_seg) {
++              f2fs_msg(sbi->sb, KERN_WARNING,
++                      "invalid cp_pack_total_block_count:%u",
++                      le32_to_cpu(cp_block->cp_pack_total_block_count));
++              goto invalid_cp;
++      }
+       pre_version = *version;
+ 
+       cp_addr += le32_to_cpu(cp_block->cp_pack_total_block_count) - 1;
+--- a/fs/f2fs/data.c
++++ b/fs/f2fs/data.c
+@@ -240,7 +240,10 @@ int f2fs_submit_page_bio(struct f2fs_io_
+       struct page *page = fio->encrypted_page ?
+                       fio->encrypted_page : fio->page;
+ 
+-      verify_block_addr(fio, fio->new_blkaddr);
++      if (!f2fs_is_valid_blkaddr(fio->sbi, fio->new_blkaddr,
++                      __is_meta_io(fio) ? META_GENERIC : DATA_GENERIC))
++              return -EFAULT;
++
+       trace_f2fs_submit_page_bio(page, fio);
+       f2fs_trace_ios(fio, 0);
+ 
+@@ -723,6 +726,12 @@ next_dnode:
+ next_block:
+       blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node);
+ 
++      if (__is_valid_data_blkaddr(blkaddr) &&
++              !f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC)) {
++              err = -EFAULT;
++              goto sync_out;
++      }
++
+       if (!is_valid_data_blkaddr(sbi, blkaddr)) {
+               if (create) {
+                       if (unlikely(f2fs_cp_error(sbi))) {
+@@ -1085,6 +1094,10 @@ got_it:
+                               SetPageUptodate(page);
+                               goto confused;
+                       }
++
++                      if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), block_nr,
++                                                              DATA_GENERIC))
++                              goto set_error_page;
+               } else {
+                       zero_user_segment(page, 0, PAGE_SIZE);
+                       if (!PageUptodate(page))
+@@ -1213,6 +1226,12 @@ retry_encrypt:
+ 
+       set_page_writeback(page);
+ 
++      if (__is_valid_data_blkaddr(fio->old_blkaddr) &&
++              !f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr,
++                                                      DATA_GENERIC)) {
++              err = -EFAULT;
++              goto out_writepage;
++      }
+       /*
+        * If current allocation needs SSR,
+        * it had better in-place writes for updated data.
+--- a/fs/f2fs/f2fs.h
++++ b/fs/f2fs/f2fs.h
+@@ -1932,6 +1932,9 @@ static inline void *f2fs_kvzalloc(size_t
+       (pgofs - ADDRS_PER_INODE(inode) + ADDRS_PER_BLOCK) /    \
+       ADDRS_PER_BLOCK * ADDRS_PER_BLOCK + ADDRS_PER_INODE(inode))
+ 
++#define __is_meta_io(fio) (PAGE_TYPE_OF_BIO(fio->type) == META &&     \
++                              (!is_read_io(fio->op) || fio->is_meta))
++
+ bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
+                                       block_t blkaddr, int type);
+ void f2fs_msg(struct super_block *sb, const char *level, const char *fmt, ...);
+--- a/fs/f2fs/file.c
++++ b/fs/f2fs/file.c
+@@ -378,6 +378,13 @@ static loff_t f2fs_seek_block(struct fil
+                       block_t blkaddr;
+                       blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node);
+ 
++                      if (__is_valid_data_blkaddr(blkaddr) &&
++                              !f2fs_is_valid_blkaddr(F2FS_I_SB(inode),
++                                              blkaddr, DATA_GENERIC)) {
++                              f2fs_put_dnode(&dn);
++                              goto fail;
++                      }
++
+                       if (__found_offset(F2FS_I_SB(inode), blkaddr, dirty,
+                                                       pgofs, whence)) {
+                               f2fs_put_dnode(&dn);
+@@ -482,6 +489,11 @@ int truncate_data_blocks_range(struct dn
+ 
+               dn->data_blkaddr = NULL_ADDR;
+               set_data_blkaddr(dn);
++
++              if (__is_valid_data_blkaddr(blkaddr) &&
++                      !f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC))
++                      continue;
++
+               invalidate_blocks(sbi, blkaddr);
+               if (dn->ofs_in_node == 0 && IS_INODE(dn->node_page))
+                       clear_inode_flag(dn->inode, FI_FIRST_BLOCK_WRITTEN);
+--- a/fs/f2fs/inode.c
++++ b/fs/f2fs/inode.c
+@@ -129,6 +129,22 @@ static bool sanity_check_inode(struct in
+               return false;
+       }
+ 
++      if (F2FS_I(inode)->extent_tree) {
++              struct extent_info *ei = &F2FS_I(inode)->extent_tree->largest;
++
++              if (ei->len &&
++                      (!f2fs_is_valid_blkaddr(sbi, ei->blk, DATA_GENERIC) ||
++                      !f2fs_is_valid_blkaddr(sbi, ei->blk + ei->len - 1,
++                                                      DATA_GENERIC))) {
++                      set_sbi_flag(sbi, SBI_NEED_FSCK);
++                      f2fs_msg(sbi->sb, KERN_WARNING,
++                              "%s: inode (ino=%lx) extent info [%u, %u, %u] "
++                              "is incorrect, run fsck to fix",
++                              __func__, inode->i_ino,
++                              ei->blk, ei->fofs, ei->len);
++                      return false;
++              }
++      }
+       return true;
+ }
+ 
+--- a/fs/f2fs/node.c
++++ b/fs/f2fs/node.c
+@@ -1605,6 +1605,10 @@ static int f2fs_write_node_page(struct p
+               return 0;
+       }
+ 
++      if (__is_valid_data_blkaddr(ni.blk_addr) &&
++              !f2fs_is_valid_blkaddr(sbi, ni.blk_addr, DATA_GENERIC))
++              goto redirty_out;
++
+       set_page_writeback(page);
+       fio.old_blkaddr = ni.blk_addr;
+       write_node_page(nid, &fio);
+--- a/fs/f2fs/segment.h
++++ b/fs/f2fs/segment.h
+@@ -601,8 +601,7 @@ static inline void verify_block_addr(str
+ {
+       struct f2fs_sb_info *sbi = fio->sbi;
+ 
+-      if (PAGE_TYPE_OF_BIO(fio->type) == META &&
+-                              (!is_read_io(fio->op) || fio->is_meta))
++      if (__is_meta_io(fio))
+               verify_blkaddr(sbi, blk_addr, META_GENERIC);
+       else
+               verify_blkaddr(sbi, blk_addr, DATA_GENERIC);
diff --git a/queue-4.9/f2fs-fix-to-do-sanity-check-with-cp_pack_start_sum.patch b/queue-4.9/f2fs-fix-to-do-sanity-check-with-cp_pack_start_sum.patch

new file mode 100644 (file)

index 0000000..f6b339a
--- /dev/null
+++ b/queue-4.9/f2fs-fix-to-do-sanity-check-with-cp_pack_start_sum.patch
@@ -0,0 +1,349 @@
+From foo@baz Thu Dec  6 15:27:31 CET 2018
+From: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Date: Thu, 6 Dec 2018 13:47:03 +0000
+Subject: f2fs: fix to do sanity check with cp_pack_start_sum
+
+From: Ben Hutchings <ben.hutchings@codethink.co.uk>
+
+commit e494c2f995d6181d6e29c4927d68e0f295ecf75b upstream.
+
+After fuzzing, cp_pack_start_sum could be corrupted, so current log's
+summary info should be wrong due to loading incorrect summary block.
+Then, if segment's type in current log is exceeded NR_CURSEG_TYPE, it
+can lead accessing invalid dirty_i->dirty_segmap bitmap finally.
+
+Add sanity check for cp_pack_start_sum to fix this issue.
+
+https://bugzilla.kernel.org/show_bug.cgi?id=200419
+
+- Reproduce
+
+- Kernel message (f2fs-dev w/ KASAN)
+[ 3117.578432] F2FS-fs (loop0): Invalid log blocks per segment (8)
+
+[ 3117.578445] F2FS-fs (loop0): Can't find valid F2FS filesystem in 2th superblock
+[ 3117.581364] F2FS-fs (loop0): invalid crc_offset: 30716
+[ 3117.583564] WARNING: CPU: 1 PID: 1225 at fs/f2fs/checkpoint.c:90 __get_meta_page+0x448/0x4b0
+[ 3117.583570] Modules linked in: snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hda_core snd_hwdep snd_pcm snd_timer joydev input_leds serio_raw snd soundcore mac_hid i2c_piix4 ib_iser rdma_cm iw_cm ib_cm ib_core configfs iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi btrfs zstd_decompress zstd_compress xxhash raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx xor raid6_pq libcrc32c raid1 raid0 multipath linear 8139too qxl ttm drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops drm crct10dif_pclmul crc32_pclmul ghash_clmulni_intel pcbc aesni_intel psmouse aes_x86_64 8139cp crypto_simd cryptd mii glue_helper pata_acpi floppy
+[ 3117.584014] CPU: 1 PID: 1225 Comm: mount Not tainted 4.17.0+ #1
+[ 3117.584017] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014
+[ 3117.584022] RIP: 0010:__get_meta_page+0x448/0x4b0
+[ 3117.584023] Code: 00 49 8d bc 24 84 00 00 00 e8 74 54 da ff 41 83 8c 24 84 00 00 00 08 4c 89 f6 4c 89 ef e8 c0 d9 95 00 48 89 ef e8 18 e3 00 00 <0f> 0b f0 80 4d 48 04 e9 0f fe ff ff 0f 0b 48 89 c7 48 89 04 24 e8
+[ 3117.584072] RSP: 0018:ffff88018eb678c0 EFLAGS: 00010286
+[ 3117.584082] RAX: ffff88018f0a6a78 RBX: ffffea0007a46600 RCX: ffffffff9314d1b2
+[ 3117.584085] RDX: ffffffff00000001 RSI: 0000000000000000 RDI: ffff88018f0a6a98
+[ 3117.584087] RBP: ffff88018ebe9980 R08: 0000000000000002 R09: 0000000000000001
+[ 3117.584090] R10: 0000000000000001 R11: ffffed00326e4450 R12: ffff880193722200
+[ 3117.584092] R13: ffff88018ebe9afc R14: 0000000000000206 R15: ffff88018eb67900
+[ 3117.584096] FS:  00007f5694636840(0000) GS:ffff8801f3b00000(0000) knlGS:0000000000000000
+[ 3117.584098] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+[ 3117.584101] CR2: 00000000016f21b8 CR3: 0000000191c22000 CR4: 00000000000006e0
+[ 3117.584112] Call Trace:
+[ 3117.584121]  ? f2fs_set_meta_page_dirty+0x150/0x150
+[ 3117.584127]  ? f2fs_build_segment_manager+0xbf9/0x3190
+[ 3117.584133]  ? f2fs_npages_for_summary_flush+0x75/0x120
+[ 3117.584145]  f2fs_build_segment_manager+0xda8/0x3190
+[ 3117.584151]  ? f2fs_get_valid_checkpoint+0x298/0xa00
+[ 3117.584156]  ? f2fs_flush_sit_entries+0x10e0/0x10e0
+[ 3117.584184]  ? map_id_range_down+0x17c/0x1b0
+[ 3117.584188]  ? __put_user_ns+0x30/0x30
+[ 3117.584206]  ? find_next_bit+0x53/0x90
+[ 3117.584237]  ? cpumask_next+0x16/0x20
+[ 3117.584249]  f2fs_fill_super+0x1948/0x2b40
+[ 3117.584258]  ? f2fs_commit_super+0x1a0/0x1a0
+[ 3117.584279]  ? sget_userns+0x65e/0x690
+[ 3117.584296]  ? set_blocksize+0x88/0x130
+[ 3117.584302]  ? f2fs_commit_super+0x1a0/0x1a0
+[ 3117.584305]  mount_bdev+0x1c0/0x200
+[ 3117.584310]  mount_fs+0x5c/0x190
+[ 3117.584320]  vfs_kern_mount+0x64/0x190
+[ 3117.584330]  do_mount+0x2e4/0x1450
+[ 3117.584343]  ? lockref_put_return+0x130/0x130
+[ 3117.584347]  ? copy_mount_string+0x20/0x20
+[ 3117.584357]  ? kasan_unpoison_shadow+0x31/0x40
+[ 3117.584362]  ? kasan_kmalloc+0xa6/0xd0
+[ 3117.584373]  ? memcg_kmem_put_cache+0x16/0x90
+[ 3117.584377]  ? __kmalloc_track_caller+0x196/0x210
+[ 3117.584383]  ? _copy_from_user+0x61/0x90
+[ 3117.584396]  ? memdup_user+0x3e/0x60
+[ 3117.584401]  ksys_mount+0x7e/0xd0
+[ 3117.584405]  __x64_sys_mount+0x62/0x70
+[ 3117.584427]  do_syscall_64+0x73/0x160
+[ 3117.584440]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
+[ 3117.584455] RIP: 0033:0x7f5693f14b9a
+[ 3117.584456] Code: 48 8b 0d 01 c3 2b 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 49 89 ca b8 a5 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d ce c2 2b 00 f7 d8 64 89 01 48
+[ 3117.584505] RSP: 002b:00007fff27346488 EFLAGS: 00000206 ORIG_RAX: 00000000000000a5
+[ 3117.584510] RAX: ffffffffffffffda RBX: 00000000016e2030 RCX: 00007f5693f14b9a
+[ 3117.584512] RDX: 00000000016e2210 RSI: 00000000016e3f30 RDI: 00000000016ee040
+[ 3117.584514] RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000013
+[ 3117.584516] R10: 00000000c0ed0000 R11: 0000000000000206 R12: 00000000016ee040
+[ 3117.584519] R13: 00000000016e2210 R14: 0000000000000000 R15: 0000000000000003
+[ 3117.584523] ---[ end trace a8e0d899985faf31 ]---
+[ 3117.685663] F2FS-fs (loop0): f2fs_check_nid_range: out-of-range nid=2, run fsck to fix.
+[ 3117.685673] F2FS-fs (loop0): recover_data: ino = 2 (i_size: recover) recovered = 1, err = 0
+[ 3117.685707] ==================================================================
+[ 3117.685955] BUG: KASAN: slab-out-of-bounds in __remove_dirty_segment+0xdd/0x1e0
+[ 3117.686175] Read of size 8 at addr ffff88018f0a63d0 by task mount/1225
+
+[ 3117.686477] CPU: 0 PID: 1225 Comm: mount Tainted: G        W         4.17.0+ #1
+[ 3117.686481] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014
+[ 3117.686483] Call Trace:
+[ 3117.686494]  dump_stack+0x71/0xab
+[ 3117.686512]  print_address_description+0x6b/0x290
+[ 3117.686517]  kasan_report+0x28e/0x390
+[ 3117.686522]  ? __remove_dirty_segment+0xdd/0x1e0
+[ 3117.686527]  __remove_dirty_segment+0xdd/0x1e0
+[ 3117.686532]  locate_dirty_segment+0x189/0x190
+[ 3117.686538]  f2fs_allocate_new_segments+0xa9/0xe0
+[ 3117.686543]  recover_data+0x703/0x2c20
+[ 3117.686547]  ? f2fs_recover_fsync_data+0x48f/0xd50
+[ 3117.686553]  ? ksys_mount+0x7e/0xd0
+[ 3117.686564]  ? policy_nodemask+0x1a/0x90
+[ 3117.686567]  ? policy_node+0x56/0x70
+[ 3117.686571]  ? add_fsync_inode+0xf0/0xf0
+[ 3117.686592]  ? blk_finish_plug+0x44/0x60
+[ 3117.686597]  ? f2fs_ra_meta_pages+0x38b/0x5e0
+[ 3117.686602]  ? find_inode_fast+0xac/0xc0
+[ 3117.686606]  ? f2fs_is_valid_blkaddr+0x320/0x320
+[ 3117.686618]  ? __radix_tree_lookup+0x150/0x150
+[ 3117.686633]  ? dqget+0x670/0x670
+[ 3117.686648]  ? pagecache_get_page+0x29/0x410
+[ 3117.686656]  ? kmem_cache_alloc+0x176/0x1e0
+[ 3117.686660]  ? f2fs_is_valid_blkaddr+0x11d/0x320
+[ 3117.686664]  f2fs_recover_fsync_data+0xc23/0xd50
+[ 3117.686670]  ? f2fs_space_for_roll_forward+0x60/0x60
+[ 3117.686674]  ? rb_insert_color+0x323/0x3d0
+[ 3117.686678]  ? f2fs_recover_orphan_inodes+0xa5/0x700
+[ 3117.686683]  ? proc_register+0x153/0x1d0
+[ 3117.686686]  ? f2fs_remove_orphan_inode+0x10/0x10
+[ 3117.686695]  ? f2fs_attr_store+0x50/0x50
+[ 3117.686700]  ? proc_create_single_data+0x52/0x60
+[ 3117.686707]  f2fs_fill_super+0x1d06/0x2b40
+[ 3117.686728]  ? f2fs_commit_super+0x1a0/0x1a0
+[ 3117.686735]  ? sget_userns+0x65e/0x690
+[ 3117.686740]  ? set_blocksize+0x88/0x130
+[ 3117.686745]  ? f2fs_commit_super+0x1a0/0x1a0
+[ 3117.686748]  mount_bdev+0x1c0/0x200
+[ 3117.686753]  mount_fs+0x5c/0x190
+[ 3117.686758]  vfs_kern_mount+0x64/0x190
+[ 3117.686762]  do_mount+0x2e4/0x1450
+[ 3117.686769]  ? lockref_put_return+0x130/0x130
+[ 3117.686773]  ? copy_mount_string+0x20/0x20
+[ 3117.686777]  ? kasan_unpoison_shadow+0x31/0x40
+[ 3117.686780]  ? kasan_kmalloc+0xa6/0xd0
+[ 3117.686786]  ? memcg_kmem_put_cache+0x16/0x90
+[ 3117.686790]  ? __kmalloc_track_caller+0x196/0x210
+[ 3117.686795]  ? _copy_from_user+0x61/0x90
+[ 3117.686801]  ? memdup_user+0x3e/0x60
+[ 3117.686804]  ksys_mount+0x7e/0xd0
+[ 3117.686809]  __x64_sys_mount+0x62/0x70
+[ 3117.686816]  do_syscall_64+0x73/0x160
+[ 3117.686824]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
+[ 3117.686829] RIP: 0033:0x7f5693f14b9a
+[ 3117.686830] Code: 48 8b 0d 01 c3 2b 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 49 89 ca b8 a5 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d ce c2 2b 00 f7 d8 64 89 01 48
+[ 3117.686887] RSP: 002b:00007fff27346488 EFLAGS: 00000206 ORIG_RAX: 00000000000000a5
+[ 3117.686892] RAX: ffffffffffffffda RBX: 00000000016e2030 RCX: 00007f5693f14b9a
+[ 3117.686894] RDX: 00000000016e2210 RSI: 00000000016e3f30 RDI: 00000000016ee040
+[ 3117.686896] RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000013
+[ 3117.686899] R10: 00000000c0ed0000 R11: 0000000000000206 R12: 00000000016ee040
+[ 3117.686901] R13: 00000000016e2210 R14: 0000000000000000 R15: 0000000000000003
+
+[ 3117.687005] Allocated by task 1225:
+[ 3117.687152]  kasan_kmalloc+0xa6/0xd0
+[ 3117.687157]  kmem_cache_alloc_trace+0xfd/0x200
+[ 3117.687161]  f2fs_build_segment_manager+0x2d09/0x3190
+[ 3117.687165]  f2fs_fill_super+0x1948/0x2b40
+[ 3117.687168]  mount_bdev+0x1c0/0x200
+[ 3117.687171]  mount_fs+0x5c/0x190
+[ 3117.687174]  vfs_kern_mount+0x64/0x190
+[ 3117.687177]  do_mount+0x2e4/0x1450
+[ 3117.687180]  ksys_mount+0x7e/0xd0
+[ 3117.687182]  __x64_sys_mount+0x62/0x70
+[ 3117.687186]  do_syscall_64+0x73/0x160
+[ 3117.687190]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
+
+[ 3117.687285] Freed by task 19:
+[ 3117.687412]  __kasan_slab_free+0x137/0x190
+[ 3117.687416]  kfree+0x8b/0x1b0
+[ 3117.687460]  ttm_bo_man_put_node+0x61/0x80 [ttm]
+[ 3117.687476]  ttm_bo_cleanup_refs+0x15f/0x250 [ttm]
+[ 3117.687492]  ttm_bo_delayed_delete+0x2f0/0x300 [ttm]
+[ 3117.687507]  ttm_bo_delayed_workqueue+0x17/0x50 [ttm]
+[ 3117.687528]  process_one_work+0x2f9/0x740
+[ 3117.687531]  worker_thread+0x78/0x6b0
+[ 3117.687541]  kthread+0x177/0x1c0
+[ 3117.687545]  ret_from_fork+0x35/0x40
+
+[ 3117.687638] The buggy address belongs to the object at ffff88018f0a6300
+                which belongs to the cache kmalloc-192 of size 192
+[ 3117.688014] The buggy address is located 16 bytes to the right of
+                192-byte region [ffff88018f0a6300, ffff88018f0a63c0)
+[ 3117.688382] The buggy address belongs to the page:
+[ 3117.688554] page:ffffea00063c2980 count:1 mapcount:0 mapping:ffff8801f3403180 index:0x0
+[ 3117.688788] flags: 0x17fff8000000100(slab)
+[ 3117.688944] raw: 017fff8000000100 ffffea00063c2840 0000000e0000000e ffff8801f3403180
+[ 3117.689166] raw: 0000000000000000 0000000080100010 00000001ffffffff 0000000000000000
+[ 3117.689386] page dumped because: kasan: bad access detected
+
+[ 3117.689653] Memory state around the buggy address:
+[ 3117.689816]  ffff88018f0a6280: fb fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc
+[ 3117.690027]  ffff88018f0a6300: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
+[ 3117.690239] >ffff88018f0a6380: 00 00 fc fc fc fc fc fc fc fc fc fc fc fc fc fc
+[ 3117.690448]                                                  ^
+[ 3117.690644]  ffff88018f0a6400: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
+[ 3117.690868]  ffff88018f0a6480: 00 00 fc fc fc fc fc fc fc fc fc fc fc fc fc fc
+[ 3117.691077] ==================================================================
+[ 3117.691290] Disabling lock debugging due to kernel taint
+[ 3117.693893] BUG: unable to handle kernel NULL pointer dereference at 0000000000000000
+[ 3117.694120] PGD 80000001f01bc067 P4D 80000001f01bc067 PUD 1d9638067 PMD 0
+[ 3117.694338] Oops: 0002 [#1] SMP KASAN PTI
+[ 3117.694490] CPU: 1 PID: 1225 Comm: mount Tainted: G    B   W         4.17.0+ #1
+[ 3117.694703] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014
+[ 3117.695073] RIP: 0010:__remove_dirty_segment+0xe2/0x1e0
+[ 3117.695246] Code: c4 48 89 c7 e8 cf bb d7 ff 45 0f b6 24 24 41 83 e4 3f 44 88 64 24 07 41 83 e4 3f 4a 8d 7c e3 08 e8 b3 bc d7 ff 4a 8b 4c e3 08 <f0> 4c 0f b3 29 0f 82 94 00 00 00 48 8d bd 20 04 00 00 e8 97 bb d7
+[ 3117.695793] RSP: 0018:ffff88018eb67638 EFLAGS: 00010292
+[ 3117.695969] RAX: 0000000000000000 RBX: ffff88018f0a6300 RCX: 0000000000000000
+[ 3117.696182] RDX: 0000000000000000 RSI: 0000000000000297 RDI: 0000000000000297
+[ 3117.696391] RBP: ffff88018ebe9980 R08: ffffed003e743ebb R09: ffffed003e743ebb
+[ 3117.696604] R10: 0000000000000001 R11: ffffed003e743eba R12: 0000000000000019
+[ 3117.696813] R13: 0000000000000014 R14: 0000000000000320 R15: ffff88018ebe99e0
+[ 3117.697032] FS:  00007f5694636840(0000) GS:ffff8801f3b00000(0000) knlGS:0000000000000000
+[ 3117.697280] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+[ 3117.702357] CR2: 00007fe89bb1a000 CR3: 0000000191c22000 CR4: 00000000000006e0
+[ 3117.707235] Call Trace:
+[ 3117.712077]  locate_dirty_segment+0x189/0x190
+[ 3117.716891]  f2fs_allocate_new_segments+0xa9/0xe0
+[ 3117.721617]  recover_data+0x703/0x2c20
+[ 3117.726316]  ? f2fs_recover_fsync_data+0x48f/0xd50
+[ 3117.730957]  ? ksys_mount+0x7e/0xd0
+[ 3117.735573]  ? policy_nodemask+0x1a/0x90
+[ 3117.740198]  ? policy_node+0x56/0x70
+[ 3117.744829]  ? add_fsync_inode+0xf0/0xf0
+[ 3117.749487]  ? blk_finish_plug+0x44/0x60
+[ 3117.754152]  ? f2fs_ra_meta_pages+0x38b/0x5e0
+[ 3117.758831]  ? find_inode_fast+0xac/0xc0
+[ 3117.763448]  ? f2fs_is_valid_blkaddr+0x320/0x320
+[ 3117.768046]  ? __radix_tree_lookup+0x150/0x150
+[ 3117.772603]  ? dqget+0x670/0x670
+[ 3117.777159]  ? pagecache_get_page+0x29/0x410
+[ 3117.781648]  ? kmem_cache_alloc+0x176/0x1e0
+[ 3117.786067]  ? f2fs_is_valid_blkaddr+0x11d/0x320
+[ 3117.790476]  f2fs_recover_fsync_data+0xc23/0xd50
+[ 3117.794790]  ? f2fs_space_for_roll_forward+0x60/0x60
+[ 3117.799086]  ? rb_insert_color+0x323/0x3d0
+[ 3117.803304]  ? f2fs_recover_orphan_inodes+0xa5/0x700
+[ 3117.807563]  ? proc_register+0x153/0x1d0
+[ 3117.811766]  ? f2fs_remove_orphan_inode+0x10/0x10
+[ 3117.815947]  ? f2fs_attr_store+0x50/0x50
+[ 3117.820087]  ? proc_create_single_data+0x52/0x60
+[ 3117.824262]  f2fs_fill_super+0x1d06/0x2b40
+[ 3117.828367]  ? f2fs_commit_super+0x1a0/0x1a0
+[ 3117.832432]  ? sget_userns+0x65e/0x690
+[ 3117.836500]  ? set_blocksize+0x88/0x130
+[ 3117.840501]  ? f2fs_commit_super+0x1a0/0x1a0
+[ 3117.844420]  mount_bdev+0x1c0/0x200
+[ 3117.848275]  mount_fs+0x5c/0x190
+[ 3117.852053]  vfs_kern_mount+0x64/0x190
+[ 3117.855810]  do_mount+0x2e4/0x1450
+[ 3117.859441]  ? lockref_put_return+0x130/0x130
+[ 3117.862996]  ? copy_mount_string+0x20/0x20
+[ 3117.866417]  ? kasan_unpoison_shadow+0x31/0x40
+[ 3117.869719]  ? kasan_kmalloc+0xa6/0xd0
+[ 3117.872948]  ? memcg_kmem_put_cache+0x16/0x90
+[ 3117.876121]  ? __kmalloc_track_caller+0x196/0x210
+[ 3117.879333]  ? _copy_from_user+0x61/0x90
+[ 3117.882467]  ? memdup_user+0x3e/0x60
+[ 3117.885604]  ksys_mount+0x7e/0xd0
+[ 3117.888700]  __x64_sys_mount+0x62/0x70
+[ 3117.891742]  do_syscall_64+0x73/0x160
+[ 3117.894692]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
+[ 3117.897669] RIP: 0033:0x7f5693f14b9a
+[ 3117.900563] Code: 48 8b 0d 01 c3 2b 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 49 89 ca b8 a5 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d ce c2 2b 00 f7 d8 64 89 01 48
+[ 3117.906922] RSP: 002b:00007fff27346488 EFLAGS: 00000206 ORIG_RAX: 00000000000000a5
+[ 3117.910159] RAX: ffffffffffffffda RBX: 00000000016e2030 RCX: 00007f5693f14b9a
+[ 3117.913469] RDX: 00000000016e2210 RSI: 00000000016e3f30 RDI: 00000000016ee040
+[ 3117.916764] RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000013
+[ 3117.920071] R10: 00000000c0ed0000 R11: 0000000000000206 R12: 00000000016ee040
+[ 3117.923393] R13: 00000000016e2210 R14: 0000000000000000 R15: 0000000000000003
+[ 3117.926680] Modules linked in: snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hda_core snd_hwdep snd_pcm snd_timer joydev input_leds serio_raw snd soundcore mac_hid i2c_piix4 ib_iser rdma_cm iw_cm ib_cm ib_core configfs iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi btrfs zstd_decompress zstd_compress xxhash raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx xor raid6_pq libcrc32c raid1 raid0 multipath linear 8139too qxl ttm drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops drm crct10dif_pclmul crc32_pclmul ghash_clmulni_intel pcbc aesni_intel psmouse aes_x86_64 8139cp crypto_simd cryptd mii glue_helper pata_acpi floppy
+[ 3117.949979] CR2: 0000000000000000
+[ 3117.954283] ---[ end trace a8e0d899985faf32 ]---
+[ 3117.958575] RIP: 0010:__remove_dirty_segment+0xe2/0x1e0
+[ 3117.962810] Code: c4 48 89 c7 e8 cf bb d7 ff 45 0f b6 24 24 41 83 e4 3f 44 88 64 24 07 41 83 e4 3f 4a 8d 7c e3 08 e8 b3 bc d7 ff 4a 8b 4c e3 08 <f0> 4c 0f b3 29 0f 82 94 00 00 00 48 8d bd 20 04 00 00 e8 97 bb d7
+[ 3117.971789] RSP: 0018:ffff88018eb67638 EFLAGS: 00010292
+[ 3117.976333] RAX: 0000000000000000 RBX: ffff88018f0a6300 RCX: 0000000000000000
+[ 3117.980926] RDX: 0000000000000000 RSI: 0000000000000297 RDI: 0000000000000297
+[ 3117.985497] RBP: ffff88018ebe9980 R08: ffffed003e743ebb R09: ffffed003e743ebb
+[ 3117.990098] R10: 0000000000000001 R11: ffffed003e743eba R12: 0000000000000019
+[ 3117.994761] R13: 0000000000000014 R14: 0000000000000320 R15: ffff88018ebe99e0
+[ 3117.999392] FS:  00007f5694636840(0000) GS:ffff8801f3b00000(0000) knlGS:0000000000000000
+[ 3118.004096] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+[ 3118.008816] CR2: 00007fe89bb1a000 CR3: 0000000191c22000 CR4: 00000000000006e0
+
+- Location
+https://elixir.bootlin.com/linux/v4.18-rc3/source/fs/f2fs/segment.c#L775
+               if (test_and_clear_bit(segno, dirty_i->dirty_segmap[t]))
+                       dirty_i->nr_dirty[t]--;
+Here dirty_i->dirty_segmap[t] can be NULL which leads to crash in test_and_clear_bit()
+
+Reported-by Wen Xu <wen.xu@gatech.edu>
+Signed-off-by: Chao Yu <yuchao0@huawei.com>
+Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
+[bwh: Backported to 4.9: The function is called sanity_check_ckpt()]
+Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/f2fs/checkpoint.c |    8 ++++----
+ fs/f2fs/super.c      |   12 ++++++++++++
+ 2 files changed, 16 insertions(+), 4 deletions(-)
+
+--- a/fs/f2fs/checkpoint.c
++++ b/fs/f2fs/checkpoint.c
+@@ -794,15 +794,15 @@ int get_valid_checkpoint(struct f2fs_sb_
+       cp_block = (struct f2fs_checkpoint *)page_address(cur_page);
+       memcpy(sbi->ckpt, cp_block, blk_size);
+ 
+-      /* Sanity checking of checkpoint */
+-      if (sanity_check_ckpt(sbi))
+-              goto free_fail_no_cp;
+-
+       if (cur_page == cp1)
+               sbi->cur_cp_pack = 1;
+       else
+               sbi->cur_cp_pack = 2;
+ 
++      /* Sanity checking of checkpoint */
++      if (sanity_check_ckpt(sbi))
++              goto free_fail_no_cp;
++
+       if (cp_blks <= 1)
+               goto done;
+ 
+--- a/fs/f2fs/super.c
++++ b/fs/f2fs/super.c
+@@ -1487,6 +1487,7 @@ int sanity_check_ckpt(struct f2fs_sb_inf
+       unsigned int sit_bitmap_size, nat_bitmap_size;
+       unsigned int log_blocks_per_seg;
+       unsigned int segment_count_main;
++      unsigned int cp_pack_start_sum, cp_payload;
+       block_t user_block_count;
+       int i;
+ 
+@@ -1547,6 +1548,17 @@ int sanity_check_ckpt(struct f2fs_sb_inf
+               return 1;
+       }
+ 
++      cp_pack_start_sum = __start_sum_addr(sbi);
++      cp_payload = __cp_payload(sbi);
++      if (cp_pack_start_sum < cp_payload + 1 ||
++              cp_pack_start_sum > blocks_per_seg - 1 -
++                      NR_CURSEG_TYPE) {
++              f2fs_msg(sbi->sb, KERN_ERR,
++                      "Wrong cp_pack_start_sum: %u",
++                      cp_pack_start_sum);
++              return 1;
++      }
++
+       if (unlikely(f2fs_cp_error(sbi))) {
+               f2fs_msg(sbi->sb, KERN_ERR, "A bug case: need to run fsck");
+               return 1;
diff --git a/queue-4.9/f2fs-fix-to-do-sanity-check-with-node-footer-and-iblocks.patch b/queue-4.9/f2fs-fix-to-do-sanity-check-with-node-footer-and-iblocks.patch

new file mode 100644 (file)

index 0000000..d185121
--- /dev/null
+++ b/queue-4.9/f2fs-fix-to-do-sanity-check-with-node-footer-and-iblocks.patch
@@ -0,0 +1,240 @@
+From foo@baz Thu Dec  6 15:27:31 CET 2018
+From: Chao Yu <yuchao0@huawei.com>
+Date: Fri, 29 Jun 2018 13:55:22 +0800
+Subject: f2fs: fix to do sanity check with node footer and iblocks
+
+From: Chao Yu <yuchao0@huawei.com>
+
+commit e34438c903b653daca2b2a7de95aed46226f8ed3 upstream.
+
+This patch adds to do sanity check with below fields of inode to
+avoid reported panic.
+- node footer
+- iblocks
+
+https://bugzilla.kernel.org/show_bug.cgi?id=200223
+
+- Overview
+BUG() triggered in f2fs_truncate_inode_blocks() when un-mounting a mounted f2fs image after writing to it
+
+- Reproduce
+
+- POC (poc.c)
+
+static void activity(char *mpoint) {
+
+  char *foo_bar_baz;
+  int err;
+
+  static int buf[8192];
+  memset(buf, 0, sizeof(buf));
+
+  err = asprintf(&foo_bar_baz, "%s/foo/bar/baz", mpoint);
+
+  // open / write / read
+  int fd = open(foo_bar_baz, O_RDWR | O_TRUNC, 0777);
+  if (fd >= 0) {
+    write(fd, (char *)buf, 517);
+    write(fd, (char *)buf, sizeof(buf));
+    close(fd);
+  }
+
+}
+
+int main(int argc, char *argv[]) {
+  activity(argv[1]);
+  return 0;
+}
+
+- Kernel meesage
+[  552.479723] F2FS-fs (loop0): Mounted with checkpoint version = 2
+[  556.451891] ------------[ cut here ]------------
+[  556.451899] kernel BUG at fs/f2fs/node.c:987!
+[  556.452920] invalid opcode: 0000 [#1] SMP KASAN PTI
+[  556.453936] CPU: 1 PID: 1310 Comm: umount Not tainted 4.18.0-rc1+ #4
+[  556.455213] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014
+[  556.457140] RIP: 0010:f2fs_truncate_inode_blocks+0x4a7/0x6f0
+[  556.458280] Code: e8 ae ea ff ff 41 89 c7 c1 e8 1f 84 c0 74 0a 41 83 ff fe 0f 85 35 ff ff ff 81 85 b0 fe ff ff fb 03 00 00 e9 f7 fd ff ff 0f 0b <0f> 0b e8 62 b7 9a 00 48 8b bd a0 fe ff ff e8 56 54 ae ff 48 8b b5
+[  556.462015] RSP: 0018:ffff8801f292f808 EFLAGS: 00010286
+[  556.463068] RAX: ffffed003e73242d RBX: ffff8801f292f958 RCX: ffffffffb88b81bc
+[  556.464479] RDX: 0000000000000000 RSI: 0000000000000004 RDI: ffff8801f3992164
+[  556.465901] RBP: ffff8801f292f980 R08: ffffed003e73242d R09: ffffed003e73242d
+[  556.467311] R10: 0000000000000001 R11: ffffed003e73242c R12: 00000000fffffc64
+[  556.468706] R13: ffff8801f3992000 R14: 0000000000000058 R15: 00000000ffff8801
+[  556.470117] FS:  00007f8029297840(0000) GS:ffff8801f6f00000(0000) knlGS:0000000000000000
+[  556.471702] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+[  556.472838] CR2: 000055f5f57305d8 CR3: 00000001f18b0000 CR4: 00000000000006e0
+[  556.474265] Call Trace:
+[  556.474782]  ? f2fs_alloc_nid_failed+0xf0/0xf0
+[  556.475686]  ? truncate_nodes+0x980/0x980
+[  556.476516]  ? pagecache_get_page+0x21f/0x2f0
+[  556.477412]  ? __asan_loadN+0xf/0x20
+[  556.478153]  ? __get_node_page+0x331/0x5b0
+[  556.478992]  ? reweight_entity+0x1e6/0x3b0
+[  556.479826]  f2fs_truncate_blocks+0x55e/0x740
+[  556.480709]  ? f2fs_truncate_data_blocks+0x20/0x20
+[  556.481689]  ? __radix_tree_lookup+0x34/0x160
+[  556.482630]  ? radix_tree_lookup+0xd/0x10
+[  556.483445]  f2fs_truncate+0xd4/0x1a0
+[  556.484206]  f2fs_evict_inode+0x5ce/0x630
+[  556.485032]  evict+0x16f/0x290
+[  556.485664]  iput+0x280/0x300
+[  556.486300]  dentry_unlink_inode+0x165/0x1e0
+[  556.487169]  __dentry_kill+0x16a/0x260
+[  556.487936]  dentry_kill+0x70/0x250
+[  556.488651]  shrink_dentry_list+0x125/0x260
+[  556.489504]  shrink_dcache_parent+0xc1/0x110
+[  556.490379]  ? shrink_dcache_sb+0x200/0x200
+[  556.491231]  ? bit_wait_timeout+0xc0/0xc0
+[  556.492047]  do_one_tree+0x12/0x40
+[  556.492743]  shrink_dcache_for_umount+0x3f/0xa0
+[  556.493656]  generic_shutdown_super+0x43/0x1c0
+[  556.494561]  kill_block_super+0x52/0x80
+[  556.495341]  kill_f2fs_super+0x62/0x70
+[  556.496105]  deactivate_locked_super+0x6f/0xa0
+[  556.497004]  deactivate_super+0x5e/0x80
+[  556.497785]  cleanup_mnt+0x61/0xa0
+[  556.498492]  __cleanup_mnt+0x12/0x20
+[  556.499218]  task_work_run+0xc8/0xf0
+[  556.499949]  exit_to_usermode_loop+0x125/0x130
+[  556.500846]  do_syscall_64+0x138/0x170
+[  556.501609]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
+[  556.502659] RIP: 0033:0x7f8028b77487
+[  556.503384] Code: 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 31 f6 e9 09 00 00 00 66 0f 1f 84 00 00 00 00 00 b8 a6 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d e1 c9 2b 00 f7 d8 64 89 01 48
+[  556.507137] RSP: 002b:00007fff9f2e3598 EFLAGS: 00000246 ORIG_RAX: 00000000000000a6
+[  556.508637] RAX: 0000000000000000 RBX: 0000000000ebd030 RCX: 00007f8028b77487
+[  556.510069] RDX: 0000000000000001 RSI: 0000000000000000 RDI: 0000000000ec41e0
+[  556.511481] RBP: 0000000000ec41e0 R08: 0000000000000000 R09: 0000000000000014
+[  556.512892] R10: 00000000000006b2 R11: 0000000000000246 R12: 00007f802908083c
+[  556.514320] R13: 0000000000000000 R14: 0000000000ebd210 R15: 00007fff9f2e3820
+[  556.515745] Modules linked in: snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hwdep snd_hda_core snd_pcm snd_timer snd mac_hid i2c_piix4 soundcore ib_iser rdma_cm iw_cm ib_cm ib_core iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx raid1 raid0 multipath linear 8139too crct10dif_pclmul crc32_pclmul qxl drm_kms_helper syscopyarea aesni_intel sysfillrect sysimgblt fb_sys_fops ttm drm aes_x86_64 crypto_simd cryptd 8139cp glue_helper mii pata_acpi floppy
+[  556.529276] ---[ end trace 4ce02f25ff7d3df5 ]---
+[  556.530340] RIP: 0010:f2fs_truncate_inode_blocks+0x4a7/0x6f0
+[  556.531513] Code: e8 ae ea ff ff 41 89 c7 c1 e8 1f 84 c0 74 0a 41 83 ff fe 0f 85 35 ff ff ff 81 85 b0 fe ff ff fb 03 00 00 e9 f7 fd ff ff 0f 0b <0f> 0b e8 62 b7 9a 00 48 8b bd a0 fe ff ff e8 56 54 ae ff 48 8b b5
+[  556.535330] RSP: 0018:ffff8801f292f808 EFLAGS: 00010286
+[  556.536395] RAX: ffffed003e73242d RBX: ffff8801f292f958 RCX: ffffffffb88b81bc
+[  556.537824] RDX: 0000000000000000 RSI: 0000000000000004 RDI: ffff8801f3992164
+[  556.539290] RBP: ffff8801f292f980 R08: ffffed003e73242d R09: ffffed003e73242d
+[  556.540709] R10: 0000000000000001 R11: ffffed003e73242c R12: 00000000fffffc64
+[  556.542131] R13: ffff8801f3992000 R14: 0000000000000058 R15: 00000000ffff8801
+[  556.543579] FS:  00007f8029297840(0000) GS:ffff8801f6f00000(0000) knlGS:0000000000000000
+[  556.545180] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+[  556.546338] CR2: 000055f5f57305d8 CR3: 00000001f18b0000 CR4: 00000000000006e0
+[  556.547809] ==================================================================
+[  556.549248] BUG: KASAN: stack-out-of-bounds in arch_tlb_gather_mmu+0x52/0x170
+[  556.550672] Write of size 8 at addr ffff8801f292fd10 by task umount/1310
+
+[  556.552338] CPU: 1 PID: 1310 Comm: umount Tainted: G      D           4.18.0-rc1+ #4
+[  556.553886] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014
+[  556.555756] Call Trace:
+[  556.556264]  dump_stack+0x7b/0xb5
+[  556.556944]  print_address_description+0x70/0x290
+[  556.557903]  kasan_report+0x291/0x390
+[  556.558649]  ? arch_tlb_gather_mmu+0x52/0x170
+[  556.559537]  __asan_store8+0x57/0x90
+[  556.560268]  arch_tlb_gather_mmu+0x52/0x170
+[  556.561110]  tlb_gather_mmu+0x12/0x40
+[  556.561862]  exit_mmap+0x123/0x2a0
+[  556.562555]  ? __ia32_sys_munmap+0x50/0x50
+[  556.563384]  ? exit_aio+0x98/0x230
+[  556.564079]  ? __x32_compat_sys_io_submit+0x260/0x260
+[  556.565099]  ? taskstats_exit+0x1f4/0x640
+[  556.565925]  ? kasan_check_read+0x11/0x20
+[  556.566739]  ? mm_update_next_owner+0x322/0x380
+[  556.567652]  mmput+0x8b/0x1d0
+[  556.568260]  do_exit+0x43a/0x1390
+[  556.568937]  ? mm_update_next_owner+0x380/0x380
+[  556.569855]  ? deactivate_super+0x5e/0x80
+[  556.570668]  ? cleanup_mnt+0x61/0xa0
+[  556.571395]  ? __cleanup_mnt+0x12/0x20
+[  556.572156]  ? task_work_run+0xc8/0xf0
+[  556.572917]  ? exit_to_usermode_loop+0x125/0x130
+[  556.573861]  rewind_stack_do_exit+0x17/0x20
+[  556.574707] RIP: 0033:0x7f8028b77487
+[  556.575428] Code: Bad RIP value.
+[  556.576106] RSP: 002b:00007fff9f2e3598 EFLAGS: 00000246 ORIG_RAX: 00000000000000a6
+[  556.577599] RAX: 0000000000000000 RBX: 0000000000ebd030 RCX: 00007f8028b77487
+[  556.579020] RDX: 0000000000000001 RSI: 0000000000000000 RDI: 0000000000ec41e0
+[  556.580422] RBP: 0000000000ec41e0 R08: 0000000000000000 R09: 0000000000000014
+[  556.581833] R10: 00000000000006b2 R11: 0000000000000246 R12: 00007f802908083c
+[  556.583252] R13: 0000000000000000 R14: 0000000000ebd210 R15: 00007fff9f2e3820
+
+[  556.584983] The buggy address belongs to the page:
+[  556.585961] page:ffffea0007ca4bc0 count:0 mapcount:0 mapping:0000000000000000 index:0x0
+[  556.587540] flags: 0x2ffff0000000000()
+[  556.588296] raw: 02ffff0000000000 0000000000000000 dead000000000200 0000000000000000
+[  556.589822] raw: 0000000000000000 0000000000000000 00000000ffffffff 0000000000000000
+[  556.591359] page dumped because: kasan: bad access detected
+
+[  556.592786] Memory state around the buggy address:
+[  556.593753]  ffff8801f292fc00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
+[  556.595191]  ffff8801f292fc80: 00 00 00 00 00 00 00 00 f1 f1 f1 f1 00 00 00 00
+[  556.596613] >ffff8801f292fd00: 00 00 f3 00 00 00 00 f3 f3 00 00 00 00 f4 f4 f4
+[  556.598044]                          ^
+[  556.598797]  ffff8801f292fd80: f3 f3 f3 f3 00 00 00 00 00 00 00 00 00 00 00 00
+[  556.600225]  ffff8801f292fe00: 00 00 00 00 00 00 00 00 f1 f1 f1 f1 00 f4 f4 f4
+[  556.601647] ==================================================================
+
+- Location
+https://elixir.bootlin.com/linux/v4.18-rc1/source/fs/f2fs/node.c#L987
+               case NODE_DIND_BLOCK:
+                       err = truncate_nodes(&dn, nofs, offset[1], 3);
+                       cont = 0;
+                       break;
+
+               default:
+                       BUG(); <---
+               }
+
+Reported-by Wen Xu <wen.xu@gatech.edu>
+Signed-off-by: Chao Yu <yuchao0@huawei.com>
+Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
+Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/f2fs/inode.c |   25 +++++++++++++++++++++++--
+ 1 file changed, 23 insertions(+), 2 deletions(-)
+
+--- a/fs/f2fs/inode.c
++++ b/fs/f2fs/inode.c
+@@ -104,9 +104,30 @@ static void __recover_inline_status(stru
+       return;
+ }
+ 
+-static bool sanity_check_inode(struct inode *inode)
++static bool sanity_check_inode(struct inode *inode, struct page *node_page)
+ {
+       struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
++      unsigned long long iblocks;
++
++      iblocks = le64_to_cpu(F2FS_INODE(node_page)->i_blocks);
++      if (!iblocks) {
++              set_sbi_flag(sbi, SBI_NEED_FSCK);
++              f2fs_msg(sbi->sb, KERN_WARNING,
++                      "%s: corrupted inode i_blocks i_ino=%lx iblocks=%llu, "
++                      "run fsck to fix.",
++                      __func__, inode->i_ino, iblocks);
++              return false;
++      }
++
++      if (ino_of_node(node_page) != nid_of_node(node_page)) {
++              set_sbi_flag(sbi, SBI_NEED_FSCK);
++              f2fs_msg(sbi->sb, KERN_WARNING,
++                      "%s: corrupted inode footer i_ino=%lx, ino,nid: "
++                      "[%u, %u] run fsck to fix.",
++                      __func__, inode->i_ino,
++                      ino_of_node(node_page), nid_of_node(node_page));
++              return false;
++      }
+ 
+       return true;
+ }
+@@ -160,7 +181,7 @@ static int do_read_inode(struct inode *i
+ 
+       get_inline_info(inode, ri);
+ 
+-      if (!sanity_check_inode(inode)) {
++      if (!sanity_check_inode(inode, node_page)) {
+               f2fs_put_page(node_page, 1);
+               return -EINVAL;
+       }
diff --git a/queue-4.9/f2fs-fix-to-do-sanity-check-with-secs_per_zone.patch b/queue-4.9/f2fs-fix-to-do-sanity-check-with-secs_per_zone.patch

new file mode 100644 (file)

index 0000000..266e048
--- /dev/null
+++ b/queue-4.9/f2fs-fix-to-do-sanity-check-with-secs_per_zone.patch
@@ -0,0 +1,98 @@
+From foo@baz Thu Dec  6 15:27:31 CET 2018
+From: Chao Yu <yuchao0@huawei.com>
+Date: Sat, 23 Jun 2018 00:12:36 +0800
+Subject: f2fs: fix to do sanity check with secs_per_zone
+
+From: Chao Yu <yuchao0@huawei.com>
+
+commit 42bf546c1fe3f3654bdf914e977acbc2b80a5be5 upstream.
+
+As Wen Xu reported in below link:
+
+https://bugzilla.kernel.org/show_bug.cgi?id=200183
+
+- Overview
+Divide zero in reset_curseg() when mounting a crafted f2fs image
+
+- Reproduce
+
+- Kernel message
+[  588.281510] divide error: 0000 [#1] SMP KASAN PTI
+[  588.282701] CPU: 0 PID: 1293 Comm: mount Not tainted 4.18.0-rc1+ #4
+[  588.284000] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014
+[  588.286178] RIP: 0010:reset_curseg+0x94/0x1a0
+[  588.298166] RSP: 0018:ffff8801e88d7940 EFLAGS: 00010246
+[  588.299360] RAX: 0000000000000014 RBX: ffff8801e1d46d00 RCX: ffffffffb88bf60b
+[  588.300809] RDX: 0000000000000000 RSI: dffffc0000000000 RDI: ffff8801e1d46d64
+[  588.305272] R13: 0000000000000000 R14: 0000000000000014 R15: 0000000000000000
+[  588.306822] FS:  00007fad85008840(0000) GS:ffff8801f6e00000(0000) knlGS:0000000000000000
+[  588.308456] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+[  588.309623] CR2: 0000000001705078 CR3: 00000001f30f8000 CR4: 00000000000006f0
+[  588.311085] Call Trace:
+[  588.311637]  f2fs_build_segment_manager+0x103f/0x3410
+[  588.316136]  ? f2fs_commit_super+0x1b0/0x1b0
+[  588.317031]  ? set_blocksize+0x90/0x140
+[  588.319473]  f2fs_mount+0x15/0x20
+[  588.320166]  mount_fs+0x60/0x1a0
+[  588.320847]  ? alloc_vfsmnt+0x309/0x360
+[  588.321647]  vfs_kern_mount+0x6b/0x1a0
+[  588.322432]  do_mount+0x34a/0x18c0
+[  588.323175]  ? strndup_user+0x46/0x70
+[  588.323937]  ? copy_mount_string+0x20/0x20
+[  588.324793]  ? memcg_kmem_put_cache+0x1b/0xa0
+[  588.325702]  ? kasan_check_write+0x14/0x20
+[  588.326562]  ? _copy_from_user+0x6a/0x90
+[  588.327375]  ? memdup_user+0x42/0x60
+[  588.328118]  ksys_mount+0x83/0xd0
+[  588.328808]  __x64_sys_mount+0x67/0x80
+[  588.329607]  do_syscall_64+0x78/0x170
+[  588.330400]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
+[  588.331461] RIP: 0033:0x7fad848e8b9a
+[  588.336022] RSP: 002b:00007ffd7c5b6be8 EFLAGS: 00000206 ORIG_RAX: 00000000000000a5
+[  588.337547] RAX: ffffffffffffffda RBX: 00000000016f8030 RCX: 00007fad848e8b9a
+[  588.338999] RDX: 00000000016f8210 RSI: 00000000016f9f30 RDI: 0000000001700ec0
+[  588.340442] RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000013
+[  588.341887] R10: 00000000c0ed0000 R11: 0000000000000206 R12: 0000000001700ec0
+[  588.343341] R13: 00000000016f8210 R14: 0000000000000000 R15: 0000000000000003
+[  588.354891] ---[ end trace 4ce02f25ff7d3df5 ]---
+[  588.355862] RIP: 0010:reset_curseg+0x94/0x1a0
+[  588.360742] RSP: 0018:ffff8801e88d7940 EFLAGS: 00010246
+[  588.361812] RAX: 0000000000000014 RBX: ffff8801e1d46d00 RCX: ffffffffb88bf60b
+[  588.363485] RDX: 0000000000000000 RSI: dffffc0000000000 RDI: ffff8801e1d46d64
+[  588.365213] RBP: ffff8801e88d7968 R08: ffffed003c32266f R09: ffffed003c32266f
+[  588.366661] R10: 0000000000000001 R11: ffffed003c32266e R12: ffff8801f0337700
+[  588.368110] R13: 0000000000000000 R14: 0000000000000014 R15: 0000000000000000
+[  588.370057] FS:  00007fad85008840(0000) GS:ffff8801f6e00000(0000) knlGS:0000000000000000
+[  588.372099] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+[  588.373291] CR2: 0000000001705078 CR3: 00000001f30f8000 CR4: 00000000000006f0
+
+- Location
+https://elixir.bootlin.com/linux/latest/source/fs/f2fs/segment.c#L2147
+        curseg->zone = GET_ZONE_FROM_SEG(sbi, curseg->segno);
+
+If secs_per_zone is corrupted due to fuzzing test, it will cause divide
+zero operation when using GET_ZONE_FROM_SEG macro, so we should do more
+sanity check with secs_per_zone during mount to avoid this issue.
+
+Signed-off-by: Chao Yu <yuchao0@huawei.com>
+Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
+Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/f2fs/super.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/fs/f2fs/super.c
++++ b/fs/f2fs/super.c
+@@ -1434,9 +1434,9 @@ static int sanity_check_raw_super(struct
+               return 1;
+       }
+ 
+-      if (secs_per_zone > total_sections) {
++      if (secs_per_zone > total_sections || !secs_per_zone) {
+               f2fs_msg(sb, KERN_INFO,
+-                      "Wrong secs_per_zone (%u > %u)",
++                      "Wrong secs_per_zone / total_sections (%u, %u)",
+                       secs_per_zone, total_sections);
+               return 1;
+       }
diff --git a/queue-4.9/f2fs-fix-to-do-sanity-check-with-user_block_count.patch b/queue-4.9/f2fs-fix-to-do-sanity-check-with-user_block_count.patch

new file mode 100644 (file)

index 0000000..eb4e750
--- /dev/null
+++ b/queue-4.9/f2fs-fix-to-do-sanity-check-with-user_block_count.patch
@@ -0,0 +1,148 @@
+From foo@baz Thu Dec  6 15:27:31 CET 2018
+From: Chao Yu <yuchao0@huawei.com>
+Date: Wed, 27 Jun 2018 18:05:54 +0800
+Subject: f2fs: fix to do sanity check with user_block_count
+
+From: Chao Yu <yuchao0@huawei.com>
+
+commit 9dc956b2c8523aed39d1e6508438be9fea28c8fc upstream.
+
+This patch fixs to do sanity check with user_block_count.
+
+- Overview
+Divide zero in utilization when mount() a corrupted f2fs image
+
+- Reproduce (4.18 upstream kernel)
+
+- Kernel message
+[  564.099503] F2FS-fs (loop0): invalid crc value
+[  564.101991] divide error: 0000 [#1] SMP KASAN PTI
+[  564.103103] CPU: 1 PID: 1298 Comm: f2fs_discard-7: Not tainted 4.18.0-rc1+ #4
+[  564.104584] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014
+[  564.106624] RIP: 0010:issue_discard_thread+0x248/0x5c0
+[  564.107692] Code: ff ff 48 8b bd e8 fe ff ff 41 8b 9d 4c 04 00 00 e8 cd b8 ad ff 41 8b 85 50 04 00 00 31 d2 48 8d 04 80 48 8d 04 80 48 c1 e0 02 <48> f7 f3 83 f8 50 7e 16 41 c7 86 7c ff ff ff 01 00 00 00 41 c7 86
+[  564.111686] RSP: 0018:ffff8801f3117dc0 EFLAGS: 00010206
+[  564.112775] RAX: 0000000000000384 RBX: 0000000000000000 RCX: ffffffffb88c1e03
+[  564.114250] RDX: 0000000000000000 RSI: dffffc0000000000 RDI: ffff8801e3aa4850
+[  564.115706] RBP: ffff8801f3117f00 R08: 1ffffffff751a1d0 R09: fffffbfff751a1d0
+[  564.117177] R10: 0000000000000001 R11: fffffbfff751a1d0 R12: 00000000fffffffc
+[  564.118634] R13: ffff8801e3aa4400 R14: ffff8801f3117ed8 R15: ffff8801e2050000
+[  564.120094] FS:  0000000000000000(0000) GS:ffff8801f6f00000(0000) knlGS:0000000000000000
+[  564.121748] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+[  564.122923] CR2: 000000000202b078 CR3: 00000001f11ac000 CR4: 00000000000006e0
+[  564.124383] Call Trace:
+[  564.124924]  ? __issue_discard_cmd+0x480/0x480
+[  564.125882]  ? __sched_text_start+0x8/0x8
+[  564.126756]  ? __kthread_parkme+0xcb/0x100
+[  564.127620]  ? kthread_blkcg+0x70/0x70
+[  564.128412]  kthread+0x180/0x1d0
+[  564.129105]  ? __issue_discard_cmd+0x480/0x480
+[  564.130029]  ? kthread_associate_blkcg+0x150/0x150
+[  564.131033]  ret_from_fork+0x35/0x40
+[  564.131794] Modules linked in: snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hwdep snd_hda_core snd_pcm snd_timer snd mac_hid i2c_piix4 soundcore ib_iser rdma_cm iw_cm ib_cm ib_core iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx raid1 raid0 multipath linear 8139too crct10dif_pclmul crc32_pclmul qxl drm_kms_helper syscopyarea aesni_intel sysfillrect sysimgblt fb_sys_fops ttm drm aes_x86_64 crypto_simd cryptd 8139cp glue_helper mii pata_acpi floppy
+[  564.141798] ---[ end trace 4ce02f25ff7d3df5 ]---
+[  564.142773] RIP: 0010:issue_discard_thread+0x248/0x5c0
+[  564.143885] Code: ff ff 48 8b bd e8 fe ff ff 41 8b 9d 4c 04 00 00 e8 cd b8 ad ff 41 8b 85 50 04 00 00 31 d2 48 8d 04 80 48 8d 04 80 48 c1 e0 02 <48> f7 f3 83 f8 50 7e 16 41 c7 86 7c ff ff ff 01 00 00 00 41 c7 86
+[  564.147776] RSP: 0018:ffff8801f3117dc0 EFLAGS: 00010206
+[  564.148856] RAX: 0000000000000384 RBX: 0000000000000000 RCX: ffffffffb88c1e03
+[  564.150424] RDX: 0000000000000000 RSI: dffffc0000000000 RDI: ffff8801e3aa4850
+[  564.151906] RBP: ffff8801f3117f00 R08: 1ffffffff751a1d0 R09: fffffbfff751a1d0
+[  564.153463] R10: 0000000000000001 R11: fffffbfff751a1d0 R12: 00000000fffffffc
+[  564.154915] R13: ffff8801e3aa4400 R14: ffff8801f3117ed8 R15: ffff8801e2050000
+[  564.156405] FS:  0000000000000000(0000) GS:ffff8801f6f00000(0000) knlGS:0000000000000000
+[  564.158070] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+[  564.159279] CR2: 000000000202b078 CR3: 00000001f11ac000 CR4: 00000000000006e0
+[  564.161043] ==================================================================
+[  564.162587] BUG: KASAN: stack-out-of-bounds in from_kuid_munged+0x1d/0x50
+[  564.163994] Read of size 4 at addr ffff8801f3117c84 by task f2fs_discard-7:/1298
+
+[  564.165852] CPU: 1 PID: 1298 Comm: f2fs_discard-7: Tainted: G      D           4.18.0-rc1+ #4
+[  564.167593] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014
+[  564.169522] Call Trace:
+[  564.170057]  dump_stack+0x7b/0xb5
+[  564.170778]  print_address_description+0x70/0x290
+[  564.171765]  kasan_report+0x291/0x390
+[  564.172540]  ? from_kuid_munged+0x1d/0x50
+[  564.173408]  __asan_load4+0x78/0x80
+[  564.174148]  from_kuid_munged+0x1d/0x50
+[  564.174962]  do_notify_parent+0x1f5/0x4f0
+[  564.175808]  ? send_sigqueue+0x390/0x390
+[  564.176639]  ? css_set_move_task+0x152/0x340
+[  564.184197]  do_exit+0x1290/0x1390
+[  564.184950]  ? __issue_discard_cmd+0x480/0x480
+[  564.185884]  ? mm_update_next_owner+0x380/0x380
+[  564.186829]  ? __sched_text_start+0x8/0x8
+[  564.187672]  ? __kthread_parkme+0xcb/0x100
+[  564.188528]  ? kthread_blkcg+0x70/0x70
+[  564.189333]  ? kthread+0x180/0x1d0
+[  564.190052]  ? __issue_discard_cmd+0x480/0x480
+[  564.190983]  rewind_stack_do_exit+0x17/0x20
+
+[  564.192190] The buggy address belongs to the page:
+[  564.193213] page:ffffea0007cc45c0 count:0 mapcount:0 mapping:0000000000000000 index:0x0
+[  564.194856] flags: 0x2ffff0000000000()
+[  564.195644] raw: 02ffff0000000000 0000000000000000 dead000000000200 0000000000000000
+[  564.197247] raw: 0000000000000000 0000000000000000 00000000ffffffff 0000000000000000
+[  564.198826] page dumped because: kasan: bad access detected
+
+[  564.200299] Memory state around the buggy address:
+[  564.201306]  ffff8801f3117b80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
+[  564.202779]  ffff8801f3117c00: 00 00 00 00 00 00 00 00 00 00 00 f3 f3 f3 f3 f3
+[  564.204252] >ffff8801f3117c80: f3 f3 f3 00 00 00 00 00 00 00 00 00 f1 f1 f1 f1
+[  564.205742]                    ^
+[  564.206424]  ffff8801f3117d00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
+[  564.207908]  ffff8801f3117d80: f3 f3 f3 f3 f3 f3 f3 f3 00 00 00 00 00 00 00 00
+[  564.209389] ==================================================================
+[  564.231795] F2FS-fs (loop0): Mounted with checkpoint version = 2
+
+- Location
+https://elixir.bootlin.com/linux/v4.18-rc1/source/fs/f2fs/segment.h#L586
+       return div_u64((u64)valid_user_blocks(sbi) * 100,
+                                       sbi->user_block_count);
+Missing checks on sbi->user_block_count.
+
+Reported-by: Wen Xu <wen.xu@gatech.edu>
+Signed-off-by: Chao Yu <yuchao0@huawei.com>
+Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
+Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/f2fs/super.c |   13 ++++++++++++-
+ 1 file changed, 12 insertions(+), 1 deletion(-)
+
+--- a/fs/f2fs/super.c
++++ b/fs/f2fs/super.c
+@@ -1486,6 +1486,8 @@ int sanity_check_ckpt(struct f2fs_sb_inf
+       unsigned int sit_segs, nat_segs;
+       unsigned int sit_bitmap_size, nat_bitmap_size;
+       unsigned int log_blocks_per_seg;
++      unsigned int segment_count_main;
++      block_t user_block_count;
+       int i;
+ 
+       total = le32_to_cpu(raw_super->segment_count);
+@@ -1510,6 +1512,16 @@ int sanity_check_ckpt(struct f2fs_sb_inf
+               return 1;
+       }
+ 
++      user_block_count = le64_to_cpu(ckpt->user_block_count);
++      segment_count_main = le32_to_cpu(raw_super->segment_count_main);
++      log_blocks_per_seg = le32_to_cpu(raw_super->log_blocks_per_seg);
++      if (!user_block_count || user_block_count >=
++                      segment_count_main << log_blocks_per_seg) {
++              f2fs_msg(sbi->sb, KERN_ERR,
++                      "Wrong user_block_count: %u", user_block_count);
++              return 1;
++      }
++
+       main_segs = le32_to_cpu(raw_super->segment_count_main);
+       blocks_per_seg = sbi->blocks_per_seg;
+ 
+@@ -1526,7 +1538,6 @@ int sanity_check_ckpt(struct f2fs_sb_inf
+ 
+       sit_bitmap_size = le32_to_cpu(ckpt->sit_ver_bitmap_bytesize);
+       nat_bitmap_size = le32_to_cpu(ckpt->nat_ver_bitmap_bytesize);
+-      log_blocks_per_seg = le32_to_cpu(raw_super->log_blocks_per_seg);
+ 
+       if (sit_bitmap_size != ((sit_segs / 2) << log_blocks_per_seg) / 8 ||
+               nat_bitmap_size != ((nat_segs / 2) << log_blocks_per_seg) / 8) {
diff --git a/queue-4.9/f2fs-free-meta-pages-if-sanity-check-for-ckpt-is-failed.patch b/queue-4.9/f2fs-free-meta-pages-if-sanity-check-for-ckpt-is-failed.patch

new file mode 100644 (file)

index 0000000..c6c97fe
--- /dev/null
+++ b/queue-4.9/f2fs-free-meta-pages-if-sanity-check-for-ckpt-is-failed.patch
@@ -0,0 +1,40 @@
+From foo@baz Thu Dec  6 15:27:31 CET 2018
+From: Jaegeuk Kim <jaegeuk@kernel.org>
+Date: Mon, 5 Dec 2016 17:25:32 -0800
+Subject: f2fs: free meta pages if sanity check for ckpt is failed
+
+From: Jaegeuk Kim <jaegeuk@kernel.org>
+
+commit a2125ff7dd1ed3a2a53cdc1f8f9c9cec9cfaa7ab upstream.
+
+This fixes missing freeing meta pages in the error case.
+
+Tested-by: Eric Biggers <ebiggers@google.com>
+Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
+Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/f2fs/checkpoint.c |    5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+--- a/fs/f2fs/checkpoint.c
++++ b/fs/f2fs/checkpoint.c
+@@ -796,7 +796,7 @@ int get_valid_checkpoint(struct f2fs_sb_
+ 
+       /* Sanity checking of checkpoint */
+       if (sanity_check_ckpt(sbi))
+-              goto fail_no_cp;
++              goto free_fail_no_cp;
+ 
+       if (cur_page == cp1)
+               sbi->cur_cp_pack = 1;
+@@ -824,6 +824,9 @@ done:
+       f2fs_put_page(cp2, 1);
+       return 0;
+ 
++free_fail_no_cp:
++      f2fs_put_page(cp1, 1);
++      f2fs_put_page(cp2, 1);
+ fail_no_cp:
+       kfree(sbi->ckpt);
+       return -EINVAL;
diff --git a/queue-4.9/f2fs-introduce-and-spread-verify_blkaddr.patch b/queue-4.9/f2fs-introduce-and-spread-verify_blkaddr.patch

new file mode 100644 (file)

index 0000000..8f3aa17
--- /dev/null
+++ b/queue-4.9/f2fs-introduce-and-spread-verify_blkaddr.patch
@@ -0,0 +1,319 @@
+From foo@baz Thu Dec  6 15:27:31 CET 2018
+From: Chao Yu <yuchao0@huawei.com>
+Date: Tue, 5 Jun 2018 17:44:11 +0800
+Subject: f2fs: introduce and spread verify_blkaddr
+
+From: Chao Yu <yuchao0@huawei.com>
+
+commit e1da7872f6eda977bd812346bf588c35e4495a1e upstream.
+
+This patch introduces verify_blkaddr to check meta/data block address
+with valid range to detect bug earlier.
+
+In addition, once we encounter an invalid blkaddr, notice user to run
+fsck to fix, and let the kernel panic.
+
+Signed-off-by: Chao Yu <yuchao0@huawei.com>
+Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
+[bwh: Backported to 4.9:
+ - I skipped an earlier renaming of is_valid_meta_blkaddr() to
+   f2fs_is_valid_meta_blkaddr()
+ - Adjust context]
+Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/f2fs/checkpoint.c |   11 +++++++++--
+ fs/f2fs/data.c       |    6 +++---
+ fs/f2fs/f2fs.h       |   32 +++++++++++++++++++++++++++++---
+ fs/f2fs/file.c       |    9 +++++----
+ fs/f2fs/inode.c      |    7 ++++---
+ fs/f2fs/node.c       |    4 ++--
+ fs/f2fs/recovery.c   |    6 +++---
+ fs/f2fs/segment.c    |    4 ++--
+ fs/f2fs/segment.h    |    8 +++-----
+ 9 files changed, 60 insertions(+), 27 deletions(-)
+
+--- a/fs/f2fs/checkpoint.c
++++ b/fs/f2fs/checkpoint.c
+@@ -118,7 +118,8 @@ struct page *get_tmp_page(struct f2fs_sb
+       return __get_meta_page(sbi, index, false);
+ }
+ 
+-bool is_valid_meta_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr, int type)
++bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
++                                      block_t blkaddr, int type)
+ {
+       switch (type) {
+       case META_NAT:
+@@ -138,10 +139,16 @@ bool is_valid_meta_blkaddr(struct f2fs_s
+                       return false;
+               break;
+       case META_POR:
++      case DATA_GENERIC:
+               if (unlikely(blkaddr >= MAX_BLKADDR(sbi) ||
+                       blkaddr < MAIN_BLKADDR(sbi)))
+                       return false;
+               break;
++      case META_GENERIC:
++              if (unlikely(blkaddr < SEG0_BLKADDR(sbi) ||
++                      blkaddr >= MAIN_BLKADDR(sbi)))
++                      return false;
++              break;
+       default:
+               BUG();
+       }
+@@ -173,7 +180,7 @@ int ra_meta_pages(struct f2fs_sb_info *s
+       blk_start_plug(&plug);
+       for (; nrpages-- > 0; blkno++) {
+ 
+-              if (!is_valid_meta_blkaddr(sbi, blkno, type))
++              if (!f2fs_is_valid_blkaddr(sbi, blkno, type))
+                       goto out;
+ 
+               switch (type) {
+--- a/fs/f2fs/data.c
++++ b/fs/f2fs/data.c
+@@ -267,7 +267,7 @@ void f2fs_submit_page_mbio(struct f2fs_i
+ 
+       io = is_read ? &sbi->read_io : &sbi->write_io[btype];
+ 
+-      if (is_valid_blkaddr(fio->old_blkaddr))
++      if (__is_valid_data_blkaddr(fio->old_blkaddr))
+               verify_block_addr(fio, fio->old_blkaddr);
+       verify_block_addr(fio, fio->new_blkaddr);
+ 
+@@ -723,7 +723,7 @@ next_dnode:
+ next_block:
+       blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node);
+ 
+-      if (!is_valid_blkaddr(blkaddr)) {
++      if (!is_valid_data_blkaddr(sbi, blkaddr)) {
+               if (create) {
+                       if (unlikely(f2fs_cp_error(sbi))) {
+                               err = -EIO;
+@@ -1217,7 +1217,7 @@ retry_encrypt:
+        * If current allocation needs SSR,
+        * it had better in-place writes for updated data.
+        */
+-      if (unlikely(is_valid_blkaddr(fio->old_blkaddr) &&
++      if (unlikely(is_valid_data_blkaddr(fio->sbi, fio->old_blkaddr) &&
+                       !is_cold_data(page) &&
+                       !IS_ATOMIC_WRITTEN_PAGE(page) &&
+                       need_inplace_update(inode))) {
+--- a/fs/f2fs/f2fs.h
++++ b/fs/f2fs/f2fs.h
+@@ -145,7 +145,7 @@ struct cp_control {
+ };
+ 
+ /*
+- * For CP/NAT/SIT/SSA readahead
++ * indicate meta/data type
+  */
+ enum {
+       META_CP,
+@@ -153,6 +153,8 @@ enum {
+       META_SIT,
+       META_SSA,
+       META_POR,
++      DATA_GENERIC,
++      META_GENERIC,
+ };
+ 
+ /* for the list of ino */
+@@ -1930,13 +1932,36 @@ static inline void *f2fs_kvzalloc(size_t
+       (pgofs - ADDRS_PER_INODE(inode) + ADDRS_PER_BLOCK) /    \
+       ADDRS_PER_BLOCK * ADDRS_PER_BLOCK + ADDRS_PER_INODE(inode))
+ 
+-static inline bool is_valid_blkaddr(block_t blkaddr)
++bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
++                                      block_t blkaddr, int type);
++void f2fs_msg(struct super_block *sb, const char *level, const char *fmt, ...);
++static inline void verify_blkaddr(struct f2fs_sb_info *sbi,
++                                      block_t blkaddr, int type)
++{
++      if (!f2fs_is_valid_blkaddr(sbi, blkaddr, type)) {
++              f2fs_msg(sbi->sb, KERN_ERR,
++                      "invalid blkaddr: %u, type: %d, run fsck to fix.",
++                      blkaddr, type);
++              f2fs_bug_on(sbi, 1);
++      }
++}
++
++static inline bool __is_valid_data_blkaddr(block_t blkaddr)
+ {
+       if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR)
+               return false;
+       return true;
+ }
+ 
++static inline bool is_valid_data_blkaddr(struct f2fs_sb_info *sbi,
++                                              block_t blkaddr)
++{
++      if (!__is_valid_data_blkaddr(blkaddr))
++              return false;
++      verify_blkaddr(sbi, blkaddr, DATA_GENERIC);
++      return true;
++}
++
+ /*
+  * file.c
+  */
+@@ -2122,7 +2147,8 @@ void f2fs_stop_checkpoint(struct f2fs_sb
+ struct page *grab_meta_page(struct f2fs_sb_info *, pgoff_t);
+ struct page *get_meta_page(struct f2fs_sb_info *, pgoff_t);
+ struct page *get_tmp_page(struct f2fs_sb_info *, pgoff_t);
+-bool is_valid_meta_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr, int type);
++bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
++                                      block_t blkaddr, int type);
+ int ra_meta_pages(struct f2fs_sb_info *, block_t, int, int, bool);
+ void ra_meta_pages_cond(struct f2fs_sb_info *, pgoff_t);
+ long sync_meta_pages(struct f2fs_sb_info *, enum page_type, long);
+--- a/fs/f2fs/file.c
++++ b/fs/f2fs/file.c
+@@ -310,13 +310,13 @@ static pgoff_t __get_first_dirty_index(s
+       return pgofs;
+ }
+ 
+-static bool __found_offset(block_t blkaddr, pgoff_t dirty, pgoff_t pgofs,
+-                                                      int whence)
++static bool __found_offset(struct f2fs_sb_info *sbi, block_t blkaddr,
++                              pgoff_t dirty, pgoff_t pgofs, int whence)
+ {
+       switch (whence) {
+       case SEEK_DATA:
+               if ((blkaddr == NEW_ADDR && dirty == pgofs) ||
+-                      is_valid_blkaddr(blkaddr))
++                      is_valid_data_blkaddr(sbi, blkaddr))
+                       return true;
+               break;
+       case SEEK_HOLE:
+@@ -378,7 +378,8 @@ static loff_t f2fs_seek_block(struct fil
+                       block_t blkaddr;
+                       blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node);
+ 
+-                      if (__found_offset(blkaddr, dirty, pgofs, whence)) {
++                      if (__found_offset(F2FS_I_SB(inode), blkaddr, dirty,
++                                                      pgofs, whence)) {
+                               f2fs_put_dnode(&dn);
+                               goto found;
+                       }
+--- a/fs/f2fs/inode.c
++++ b/fs/f2fs/inode.c
+@@ -59,11 +59,12 @@ static void __get_inode_rdev(struct inod
+       }
+ }
+ 
+-static bool __written_first_block(struct f2fs_inode *ri)
++static bool __written_first_block(struct f2fs_sb_info *sbi,
++                                      struct f2fs_inode *ri)
+ {
+       block_t addr = le32_to_cpu(ri->i_addr[0]);
+ 
+-      if (is_valid_blkaddr(addr))
++      if (is_valid_data_blkaddr(sbi, addr))
+               return true;
+       return false;
+ }
+@@ -159,7 +160,7 @@ static int do_read_inode(struct inode *i
+       /* get rdev by using inline_info */
+       __get_inode_rdev(inode, ri);
+ 
+-      if (__written_first_block(ri))
++      if (__written_first_block(sbi, ri))
+               set_inode_flag(inode, FI_FIRST_BLOCK_WRITTEN);
+ 
+       if (!need_inode_block_update(sbi, inode->i_ino))
+--- a/fs/f2fs/node.c
++++ b/fs/f2fs/node.c
+@@ -304,7 +304,7 @@ static void set_node_addr(struct f2fs_sb
+                       new_blkaddr == NULL_ADDR);
+       f2fs_bug_on(sbi, nat_get_blkaddr(e) == NEW_ADDR &&
+                       new_blkaddr == NEW_ADDR);
+-      f2fs_bug_on(sbi, is_valid_blkaddr(nat_get_blkaddr(e)) &&
++      f2fs_bug_on(sbi, is_valid_data_blkaddr(sbi, nat_get_blkaddr(e)) &&
+                       new_blkaddr == NEW_ADDR);
+ 
+       /* increment version no as node is removed */
+@@ -319,7 +319,7 @@ static void set_node_addr(struct f2fs_sb
+ 
+       /* change address */
+       nat_set_blkaddr(e, new_blkaddr);
+-      if (!is_valid_blkaddr(new_blkaddr))
++      if (!is_valid_data_blkaddr(sbi, new_blkaddr))
+               set_nat_flag(e, IS_CHECKPOINTED, false);
+       __set_nat_cache_dirty(nm_i, e);
+ 
+--- a/fs/f2fs/recovery.c
++++ b/fs/f2fs/recovery.c
+@@ -236,7 +236,7 @@ static int find_fsync_dnodes(struct f2fs
+       while (1) {
+               struct fsync_inode_entry *entry;
+ 
+-              if (!is_valid_meta_blkaddr(sbi, blkaddr, META_POR))
++              if (!f2fs_is_valid_blkaddr(sbi, blkaddr, META_POR))
+                       return 0;
+ 
+               page = get_tmp_page(sbi, blkaddr);
+@@ -468,7 +468,7 @@ retry_dn:
+               }
+ 
+               /* dest is valid block, try to recover from src to dest */
+-              if (is_valid_meta_blkaddr(sbi, dest, META_POR)) {
++              if (f2fs_is_valid_blkaddr(sbi, dest, META_POR)) {
+ 
+                       if (src == NULL_ADDR) {
+                               err = reserve_new_block(&dn);
+@@ -527,7 +527,7 @@ static int recover_data(struct f2fs_sb_i
+       while (1) {
+               struct fsync_inode_entry *entry;
+ 
+-              if (!is_valid_meta_blkaddr(sbi, blkaddr, META_POR))
++              if (!f2fs_is_valid_blkaddr(sbi, blkaddr, META_POR))
+                       break;
+ 
+               ra_meta_pages_cond(sbi, blkaddr);
+--- a/fs/f2fs/segment.c
++++ b/fs/f2fs/segment.c
+@@ -944,7 +944,7 @@ bool is_checkpointed_data(struct f2fs_sb
+       struct seg_entry *se;
+       bool is_cp = false;
+ 
+-      if (!is_valid_blkaddr(blkaddr))
++      if (!is_valid_data_blkaddr(sbi, blkaddr))
+               return true;
+ 
+       mutex_lock(&sit_i->sentry_lock);
+@@ -1668,7 +1668,7 @@ void f2fs_wait_on_encrypted_page_writeba
+ {
+       struct page *cpage;
+ 
+-      if (!is_valid_blkaddr(blkaddr))
++      if (!is_valid_data_blkaddr(sbi, blkaddr))
+               return;
+ 
+       cpage = find_lock_page(META_MAPPING(sbi), blkaddr);
+--- a/fs/f2fs/segment.h
++++ b/fs/f2fs/segment.h
+@@ -81,7 +81,7 @@
+       (GET_SEGOFF_FROM_SEG0(sbi, blk_addr) & (sbi->blocks_per_seg - 1))
+ 
+ #define GET_SEGNO(sbi, blk_addr)                                      \
+-      ((!is_valid_blkaddr(blk_addr)) ?                        \
++      ((!is_valid_data_blkaddr(sbi, blk_addr)) ?                      \
+       NULL_SEGNO : GET_L2R_SEGNO(FREE_I(sbi),                 \
+               GET_SEGNO_FROM_SEG0(sbi, blk_addr)))
+ #define GET_SECNO(sbi, segno)                                 \
+@@ -603,11 +603,9 @@ static inline void verify_block_addr(str
+ 
+       if (PAGE_TYPE_OF_BIO(fio->type) == META &&
+                               (!is_read_io(fio->op) || fio->is_meta))
+-              BUG_ON(blk_addr < SEG0_BLKADDR(sbi) ||
+-                              blk_addr >= MAIN_BLKADDR(sbi));
++              verify_blkaddr(sbi, blk_addr, META_GENERIC);
+       else
+-              BUG_ON(blk_addr < MAIN_BLKADDR(sbi) ||
+-                              blk_addr >= MAX_BLKADDR(sbi));
++              verify_blkaddr(sbi, blk_addr, DATA_GENERIC);
+ }
+ 
+ /*
diff --git a/queue-4.9/f2fs-return-error-during-fill_super.patch b/queue-4.9/f2fs-return-error-during-fill_super.patch

new file mode 100644 (file)

index 0000000..f5d90f5
--- /dev/null
+++ b/queue-4.9/f2fs-return-error-during-fill_super.patch
@@ -0,0 +1,123 @@
+From foo@baz Thu Dec  6 15:27:31 CET 2018
+From: Jaegeuk Kim <jaegeuk@kernel.org>
+Date: Tue, 19 Dec 2017 19:16:34 -0800
+Subject: f2fs: return error during fill_super
+
+From: Jaegeuk Kim <jaegeuk@kernel.org>
+
+commit c39a1b348c4fe172729eff77c533dabc3c7cdaa7 upstream.
+
+Let's avoid BUG_ON during fill_super, when on-disk was totall corrupted.
+
+Reviewed-by: Chao Yu <yuchao0@huawei.com>
+Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
+Signed-off-by: Sudip Mukherjee <sudipm.mukherjee@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+[bwh: Backported to 4.9: adjust context]
+Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/f2fs/segment.c |   16 ++++++++++++----
+ fs/f2fs/segment.h |   22 ++++++++++++++++++----
+ 2 files changed, 30 insertions(+), 8 deletions(-)
+
+--- a/fs/f2fs/segment.c
++++ b/fs/f2fs/segment.c
+@@ -2322,7 +2322,7 @@ static int build_curseg(struct f2fs_sb_i
+       return restore_curseg_summaries(sbi);
+ }
+ 
+-static void build_sit_entries(struct f2fs_sb_info *sbi)
++static int build_sit_entries(struct f2fs_sb_info *sbi)
+ {
+       struct sit_info *sit_i = SIT_I(sbi);
+       struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
+@@ -2333,6 +2333,7 @@ static void build_sit_entries(struct f2f
+       unsigned int i, start, end;
+       unsigned int readed, start_blk = 0;
+       int nrpages = MAX_BIO_BLOCKS(sbi) * 8;
++      int err = 0;
+ 
+       do {
+               readed = ra_meta_pages(sbi, start_blk, nrpages, META_SIT, true);
+@@ -2350,7 +2351,9 @@ static void build_sit_entries(struct f2f
+                       sit = sit_blk->entries[SIT_ENTRY_OFFSET(sit_i, start)];
+                       f2fs_put_page(page, 1);
+ 
+-                      check_block_count(sbi, start, &sit);
++                      err = check_block_count(sbi, start, &sit);
++                      if (err)
++                              return err;
+                       seg_info_from_raw_sit(se, &sit);
+ 
+                       /* build discard map only one time */
+@@ -2378,7 +2381,9 @@ static void build_sit_entries(struct f2f
+ 
+               old_valid_blocks = se->valid_blocks;
+ 
+-              check_block_count(sbi, start, &sit);
++              err = check_block_count(sbi, start, &sit);
++              if (err)
++                      break;
+               seg_info_from_raw_sit(se, &sit);
+ 
+               if (f2fs_discard_en(sbi)) {
+@@ -2393,6 +2398,7 @@ static void build_sit_entries(struct f2f
+                               se->valid_blocks - old_valid_blocks;
+       }
+       up_read(&curseg->journal_rwsem);
++      return err;
+ }
+ 
+ static void init_free_segmap(struct f2fs_sb_info *sbi)
+@@ -2559,7 +2565,9 @@ int build_segment_manager(struct f2fs_sb
+               return err;
+ 
+       /* reinit free segmap based on SIT */
+-      build_sit_entries(sbi);
++      err = build_sit_entries(sbi);
++      if (err)
++              return err;
+ 
+       init_free_segmap(sbi);
+       err = build_dirty_segmap(sbi);
+--- a/fs/f2fs/segment.h
++++ b/fs/f2fs/segment.h
+@@ -600,7 +600,7 @@ static inline void verify_block_addr(str
+ /*
+  * Summary block is always treated as an invalid block
+  */
+-static inline void check_block_count(struct f2fs_sb_info *sbi,
++static inline int check_block_count(struct f2fs_sb_info *sbi,
+               int segno, struct f2fs_sit_entry *raw_sit)
+ {
+ #ifdef CONFIG_F2FS_CHECK_FS
+@@ -622,11 +622,25 @@ static inline void check_block_count(str
+               cur_pos = next_pos;
+               is_valid = !is_valid;
+       } while (cur_pos < sbi->blocks_per_seg);
+-      BUG_ON(GET_SIT_VBLOCKS(raw_sit) != valid_blocks);
++
++      if (unlikely(GET_SIT_VBLOCKS(raw_sit) != valid_blocks)) {
++              f2fs_msg(sbi->sb, KERN_ERR,
++                              "Mismatch valid blocks %d vs. %d",
++                                      GET_SIT_VBLOCKS(raw_sit), valid_blocks);
++              set_sbi_flag(sbi, SBI_NEED_FSCK);
++              return -EINVAL;
++      }
+ #endif
+       /* check segment usage, and check boundary of a given segment number */
+-      f2fs_bug_on(sbi, GET_SIT_VBLOCKS(raw_sit) > sbi->blocks_per_seg
+-                                      || segno > TOTAL_SEGS(sbi) - 1);
++      if (unlikely(GET_SIT_VBLOCKS(raw_sit) > sbi->blocks_per_seg
++                                      || segno > TOTAL_SEGS(sbi) - 1)) {
++              f2fs_msg(sbi->sb, KERN_ERR,
++                              "Wrong valid blocks %d or segno %u",
++                                      GET_SIT_VBLOCKS(raw_sit), segno);
++              set_sbi_flag(sbi, SBI_NEED_FSCK);
++              return -EINVAL;
++      }
++      return 0;
+ }
+ 
+ static inline pgoff_t current_sit_addr(struct f2fs_sb_info *sbi,
diff --git a/queue-4.9/f2fs-sanity-check-on-sit-entry.patch b/queue-4.9/f2fs-sanity-check-on-sit-entry.patch

new file mode 100644 (file)

index 0000000..9821eaf
--- /dev/null
+++ b/queue-4.9/f2fs-sanity-check-on-sit-entry.patch
@@ -0,0 +1,103 @@
+From foo@baz Thu Dec  6 15:27:31 CET 2018
+From: Jaegeuk Kim <jaegeuk@kernel.org>
+Date: Tue, 24 Apr 2018 15:44:16 -0600
+Subject: f2fs: sanity check on sit entry
+
+From: Jaegeuk Kim <jaegeuk@kernel.org>
+
+commit b2ca374f33bd33fd822eb871876e4888cf79dc97 upstream.
+
+syzbot hit the following crash on upstream commit
+87ef12027b9b1dd0e0b12cf311fbcb19f9d92539 (Wed Apr 18 19:48:17 2018 +0000)
+Merge tag 'ceph-for-4.17-rc2' of git://github.com/ceph/ceph-client
+syzbot dashboard link: https://syzkaller.appspot.com/bug?extid=83699adeb2d13579c31e
+
+C reproducer: https://syzkaller.appspot.com/x/repro.c?id=5805208181407744
+syzkaller reproducer: https://syzkaller.appspot.com/x/repro.syz?id=6005073343676416
+Raw console output: https://syzkaller.appspot.com/x/log.txt?id=6555047731134464
+Kernel config: https://syzkaller.appspot.com/x/.config?id=1808800213120130118
+compiler: gcc (GCC) 8.0.1 20180413 (experimental)
+
+IMPORTANT: if you fix the bug, please add the following tag to the commit:
+Reported-by: syzbot+83699adeb2d13579c31e@syzkaller.appspotmail.com
+It will help syzbot understand when the bug is fixed. See footer for details.
+If you forward the report, please keep this part and the footer.
+
+F2FS-fs (loop0): Magic Mismatch, valid(0xf2f52010) - read(0x0)
+F2FS-fs (loop0): Can't find valid F2FS filesystem in 1th superblock
+F2FS-fs (loop0): invalid crc value
+BUG: unable to handle kernel paging request at ffffed006b2a50c0
+PGD 21ffee067 P4D 21ffee067 PUD 21fbeb067 PMD 0
+Oops: 0000 [#1] SMP KASAN
+Dumping ftrace buffer:
+   (ftrace buffer empty)
+Modules linked in:
+CPU: 0 PID: 4514 Comm: syzkaller989480 Not tainted 4.17.0-rc1+ #8
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
+RIP: 0010:build_sit_entries fs/f2fs/segment.c:3653 [inline]
+RIP: 0010:build_segment_manager+0x7ef7/0xbf70 fs/f2fs/segment.c:3852
+RSP: 0018:ffff8801b102e5b0 EFLAGS: 00010a06
+RAX: 1ffff1006b2a50c0 RBX: 0000000000000004 RCX: 0000000000000001
+RDX: 0000000000000000 RSI: 0000000000000001 RDI: ffff8801ac74243e
+RBP: ffff8801b102f410 R08: ffff8801acbd46c0 R09: fffffbfff14d9af8
+R10: fffffbfff14d9af8 R11: ffff8801acbd46c0 R12: ffff8801ac742a80
+R13: ffff8801d9519100 R14: dffffc0000000000 R15: ffff880359528600
+FS:  0000000001e04880(0000) GS:ffff8801dae00000(0000) knlGS:0000000000000000
+CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+CR2: ffffed006b2a50c0 CR3: 00000001ac6ac000 CR4: 00000000001406f0
+DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+Call Trace:
+ f2fs_fill_super+0x4095/0x7bf0 fs/f2fs/super.c:2803
+ mount_bdev+0x30c/0x3e0 fs/super.c:1165
+ f2fs_mount+0x34/0x40 fs/f2fs/super.c:3020
+ mount_fs+0xae/0x328 fs/super.c:1268
+ vfs_kern_mount.part.34+0xd4/0x4d0 fs/namespace.c:1037
+ vfs_kern_mount fs/namespace.c:1027 [inline]
+ do_new_mount fs/namespace.c:2517 [inline]
+ do_mount+0x564/0x3070 fs/namespace.c:2847
+ ksys_mount+0x12d/0x140 fs/namespace.c:3063
+ __do_sys_mount fs/namespace.c:3077 [inline]
+ __se_sys_mount fs/namespace.c:3074 [inline]
+ __x64_sys_mount+0xbe/0x150 fs/namespace.c:3074
+ do_syscall_64+0x1b1/0x800 arch/x86/entry/common.c:287
+ entry_SYSCALL_64_after_hwframe+0x49/0xbe
+RIP: 0033:0x443d6a
+RSP: 002b:00007ffd312813c8 EFLAGS: 00000297 ORIG_RAX: 00000000000000a5
+RAX: ffffffffffffffda RBX: 0000000020000c00 RCX: 0000000000443d6a
+RDX: 0000000020000000 RSI: 0000000020000100 RDI: 00007ffd312813d0
+RBP: 0000000000000003 R08: 0000000020016a00 R09: 000000000000000a
+R10: 0000000000000000 R11: 0000000000000297 R12: 0000000000000004
+R13: 0000000000402c60 R14: 0000000000000000 R15: 0000000000000000
+RIP: build_sit_entries fs/f2fs/segment.c:3653 [inline] RSP: ffff8801b102e5b0
+RIP: build_segment_manager+0x7ef7/0xbf70 fs/f2fs/segment.c:3852 RSP: ffff8801b102e5b0
+CR2: ffffed006b2a50c0
+---[ end trace a2034989e196ff17 ]---
+
+Reported-and-tested-by: syzbot+83699adeb2d13579c31e@syzkaller.appspotmail.com
+Reviewed-by: Chao Yu <yuchao0@huawei.com>
+Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
+Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/f2fs/segment.c |    9 +++++++++
+ 1 file changed, 9 insertions(+)
+
+--- a/fs/f2fs/segment.c
++++ b/fs/f2fs/segment.c
+@@ -2376,6 +2376,15 @@ static int build_sit_entries(struct f2fs
+               unsigned int old_valid_blocks;
+ 
+               start = le32_to_cpu(segno_in_journal(journal, i));
++              if (start >= MAIN_SEGS(sbi)) {
++                      f2fs_msg(sbi->sb, KERN_ERR,
++                                      "Wrong journal entry on segno %u",
++                                      start);
++                      set_sbi_flag(sbi, SBI_NEED_FSCK);
++                      err = -EINVAL;
++                      break;
++              }
++
+               se = &sit_i->sentries[start];
+               sit = sit_in_journal(journal, i);
+ 
diff --git a/queue-4.9/hugetlbfs-check-for-pgoff-value-overflow.patch b/queue-4.9/hugetlbfs-check-for-pgoff-value-overflow.patch

new file mode 100644 (file)

index 0000000..c7e31c4
--- /dev/null
+++ b/queue-4.9/hugetlbfs-check-for-pgoff-value-overflow.patch
@@ -0,0 +1,111 @@
+From foo@baz Thu Dec  6 15:27:31 CET 2018
+From: Mike Kravetz <mike.kravetz@oracle.com>
+Date: Thu, 22 Mar 2018 16:17:13 -0700
+Subject: hugetlbfs: check for pgoff value overflow
+
+From: Mike Kravetz <mike.kravetz@oracle.com>
+
+commit 63489f8e821144000e0bdca7e65a8d1cc23a7ee7 upstream.
+
+A vma with vm_pgoff large enough to overflow a loff_t type when
+converted to a byte offset can be passed via the remap_file_pages system
+call.  The hugetlbfs mmap routine uses the byte offset to calculate
+reservations and file size.
+
+A sequence such as:
+
+  mmap(0x20a00000, 0x600000, 0, 0x66033, -1, 0);
+  remap_file_pages(0x20a00000, 0x600000, 0, 0x20000000000000, 0);
+
+will result in the following when task exits/file closed,
+
+  kernel BUG at mm/hugetlb.c:749!
+  Call Trace:
+    hugetlbfs_evict_inode+0x2f/0x40
+    evict+0xcb/0x190
+    __dentry_kill+0xcb/0x150
+    __fput+0x164/0x1e0
+    task_work_run+0x84/0xa0
+    exit_to_usermode_loop+0x7d/0x80
+    do_syscall_64+0x18b/0x190
+    entry_SYSCALL_64_after_hwframe+0x3d/0xa2
+
+The overflowed pgoff value causes hugetlbfs to try to set up a mapping
+with a negative range (end < start) that leaves invalid state which
+causes the BUG.
+
+The previous overflow fix to this code was incomplete and did not take
+the remap_file_pages system call into account.
+
+[mike.kravetz@oracle.com: v3]
+  Link: http://lkml.kernel.org/r/20180309002726.7248-1-mike.kravetz@oracle.com
+[akpm@linux-foundation.org: include mmdebug.h]
+[akpm@linux-foundation.org: fix -ve left shift count on sh]
+Link: http://lkml.kernel.org/r/20180308210502.15952-1-mike.kravetz@oracle.com
+Fixes: 045c7a3f53d9 ("hugetlbfs: fix offset overflow in hugetlbfs mmap")
+Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com>
+Reported-by: Nic Losby <blurbdust@gmail.com>
+Acked-by: Michal Hocko <mhocko@suse.com>
+Cc: "Kirill A . Shutemov" <kirill.shutemov@linux.intel.com>
+Cc: Yisheng Xie <xieyisheng1@huawei.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/hugetlbfs/inode.c |   17 ++++++++++++++---
+ mm/hugetlb.c         |    6 ++++++
+ 2 files changed, 20 insertions(+), 3 deletions(-)
+
+--- a/fs/hugetlbfs/inode.c
++++ b/fs/hugetlbfs/inode.c
+@@ -118,6 +118,16 @@ static void huge_pagevec_release(struct
+       pagevec_reinit(pvec);
+ }
+ 
++/*
++ * Mask used when checking the page offset value passed in via system
++ * calls.  This value will be converted to a loff_t which is signed.
++ * Therefore, we want to check the upper PAGE_SHIFT + 1 bits of the
++ * value.  The extra bit (- 1 in the shift value) is to take the sign
++ * bit into account.
++ */
++#define PGOFF_LOFFT_MAX \
++      (((1UL << (PAGE_SHIFT + 1)) - 1) <<  (BITS_PER_LONG - (PAGE_SHIFT + 1)))
++
+ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
+ {
+       struct inode *inode = file_inode(file);
+@@ -137,12 +147,13 @@ static int hugetlbfs_file_mmap(struct fi
+       vma->vm_ops = &hugetlb_vm_ops;
+ 
+       /*
+-       * Offset passed to mmap (before page shift) could have been
+-       * negative when represented as a (l)off_t.
++       * page based offset in vm_pgoff could be sufficiently large to
++       * overflow a (l)off_t when converted to byte offset.
+        */
+-      if (((loff_t)vma->vm_pgoff << PAGE_SHIFT) < 0)
++      if (vma->vm_pgoff & PGOFF_LOFFT_MAX)
+               return -EINVAL;
+ 
++      /* must be huge page aligned */
+       if (vma->vm_pgoff & (~huge_page_mask(h) >> PAGE_SHIFT))
+               return -EINVAL;
+ 
+--- a/mm/hugetlb.c
++++ b/mm/hugetlb.c
+@@ -4170,6 +4170,12 @@ int hugetlb_reserve_pages(struct inode *
+       struct resv_map *resv_map;
+       long gbl_reserve;
+ 
++      /* This should never happen */
++      if (from > to) {
++              VM_WARN(1, "%s called with a negative range\n", __func__);
++              return -EINVAL;
++      }
++
+       /*
+        * Only apply hugepage reservation if asked. At fault time, an
+        * attempt will be made for VM_NORESERVE to allocate a page
diff --git a/queue-4.9/hugetlbfs-fix-offset-overflow-in-hugetlbfs-mmap.patch b/queue-4.9/hugetlbfs-fix-offset-overflow-in-hugetlbfs-mmap.patch

new file mode 100644 (file)

index 0000000..71ed96e
--- /dev/null
+++ b/queue-4.9/hugetlbfs-fix-offset-overflow-in-hugetlbfs-mmap.patch
@@ -0,0 +1,100 @@
+From foo@baz Thu Dec  6 15:27:31 CET 2018
+From: Mike Kravetz <mike.kravetz@oracle.com>
+Date: Thu, 13 Apr 2017 14:56:32 -0700
+Subject: hugetlbfs: fix offset overflow in hugetlbfs mmap
+
+From: Mike Kravetz <mike.kravetz@oracle.com>
+
+commit 045c7a3f53d9403b62d396b6d051c4be5044cdb4 upstream.
+
+If mmap() maps a file, it can be passed an offset into the file at which
+the mapping is to start.  Offset could be a negative value when
+represented as a loff_t.  The offset plus length will be used to update
+the file size (i_size) which is also a loff_t.
+
+Validate the value of offset and offset + length to make sure they do
+not overflow and appear as negative.
+
+Found by syzcaller with commit ff8c0c53c475 ("mm/hugetlb.c: don't call
+region_abort if region_chg fails") applied.  Prior to this commit, the
+overflow would still occur but we would luckily return ENOMEM.
+
+To reproduce:
+
+   mmap(0, 0x2000, 0, 0x40021, 0xffffffffffffffffULL, 0x8000000000000000ULL);
+
+Resulted in,
+
+  kernel BUG at mm/hugetlb.c:742!
+  Call Trace:
+   hugetlbfs_evict_inode+0x80/0xa0
+   evict+0x24a/0x620
+   iput+0x48f/0x8c0
+   dentry_unlink_inode+0x31f/0x4d0
+   __dentry_kill+0x292/0x5e0
+   dput+0x730/0x830
+   __fput+0x438/0x720
+   ____fput+0x1a/0x20
+   task_work_run+0xfe/0x180
+   exit_to_usermode_loop+0x133/0x150
+   syscall_return_slowpath+0x184/0x1c0
+   entry_SYSCALL_64_fastpath+0xab/0xad
+
+Fixes: ff8c0c53c475 ("mm/hugetlb.c: don't call region_abort if region_chg fails")
+Link: http://lkml.kernel.org/r/1491951118-30678-1-git-send-email-mike.kravetz@oracle.com
+Reported-by: Vegard Nossum <vegard.nossum@oracle.com>
+Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com>
+Acked-by: Hillf Danton <hillf.zj@alibaba-inc.com>
+Cc: Dmitry Vyukov <dvyukov@google.com>
+Cc: Michal Hocko <mhocko@suse.com>
+Cc: "Kirill A . Shutemov" <kirill.shutemov@linux.intel.com>
+Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
+Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/hugetlbfs/inode.c |   15 ++++++++++++---
+ 1 file changed, 12 insertions(+), 3 deletions(-)
+
+--- a/fs/hugetlbfs/inode.c
++++ b/fs/hugetlbfs/inode.c
+@@ -136,17 +136,26 @@ static int hugetlbfs_file_mmap(struct fi
+       vma->vm_flags |= VM_HUGETLB | VM_DONTEXPAND;
+       vma->vm_ops = &hugetlb_vm_ops;
+ 
++      /*
++       * Offset passed to mmap (before page shift) could have been
++       * negative when represented as a (l)off_t.
++       */
++      if (((loff_t)vma->vm_pgoff << PAGE_SHIFT) < 0)
++              return -EINVAL;
++
+       if (vma->vm_pgoff & (~huge_page_mask(h) >> PAGE_SHIFT))
+               return -EINVAL;
+ 
+       vma_len = (loff_t)(vma->vm_end - vma->vm_start);
++      len = vma_len + ((loff_t)vma->vm_pgoff << PAGE_SHIFT);
++      /* check for overflow */
++      if (len < vma_len)
++              return -EINVAL;
+ 
+       inode_lock(inode);
+       file_accessed(file);
+ 
+       ret = -ENOMEM;
+-      len = vma_len + ((loff_t)vma->vm_pgoff << PAGE_SHIFT);
+-
+       if (hugetlb_reserve_pages(inode,
+                               vma->vm_pgoff >> huge_page_order(h),
+                               len >> huge_page_shift(h), vma,
+@@ -155,7 +164,7 @@ static int hugetlbfs_file_mmap(struct fi
+ 
+       ret = 0;
+       if (vma->vm_flags & VM_WRITE && inode->i_size < len)
+-              inode->i_size = len;
++              i_size_write(inode, len);
+ out:
+       inode_unlock(inode);
+ 
diff --git a/queue-4.9/libceph-add-authorizer-challenge.patch b/queue-4.9/libceph-add-authorizer-challenge.patch

new file mode 100644 (file)

index 0000000..51c6577
--- /dev/null
+++ b/queue-4.9/libceph-add-authorizer-challenge.patch
@@ -0,0 +1,330 @@
+From foo@baz Thu Dec  6 15:27:31 CET 2018
+From: Ilya Dryomov <idryomov@gmail.com>
+Date: Fri, 27 Jul 2018 19:18:34 +0200
+Subject: libceph: add authorizer challenge
+
+From: Ilya Dryomov <idryomov@gmail.com>
+
+commit 6daca13d2e72bedaaacfc08f873114c9307d5aea upstream.
+
+When a client authenticates with a service, an authorizer is sent with
+a nonce to the service (ceph_x_authorize_[ab]) and the service responds
+with a mutation of that nonce (ceph_x_authorize_reply).  This lets the
+client verify the service is who it says it is but it doesn't protect
+against a replay: someone can trivially capture the exchange and reuse
+the same authorizer to authenticate themselves.
+
+Allow the service to reject an initial authorizer with a random
+challenge (ceph_x_authorize_challenge).  The client then has to respond
+with an updated authorizer proving they are able to decrypt the
+service's challenge and that the new authorizer was produced for this
+specific connection instance.
+
+The accepting side requires this challenge and response unconditionally
+if the client side advertises they have CEPHX_V2 feature bit.
+
+This addresses CVE-2018-1128.
+
+Link: http://tracker.ceph.com/issues/24836
+Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
+Reviewed-by: Sage Weil <sage@redhat.com>
+Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/ceph/mds_client.c           |   11 ++++++
+ include/linux/ceph/auth.h      |    8 ++++
+ include/linux/ceph/messenger.h |    3 +
+ include/linux/ceph/msgr.h      |    2 -
+ net/ceph/auth.c                |   16 +++++++++
+ net/ceph/auth_x.c              |   72 ++++++++++++++++++++++++++++++++++++++---
+ net/ceph/auth_x_protocol.h     |    7 +++
+ net/ceph/messenger.c           |   17 +++++++++
+ net/ceph/osd_client.c          |   11 ++++++
+ 9 files changed, 140 insertions(+), 7 deletions(-)
+
+--- a/fs/ceph/mds_client.c
++++ b/fs/ceph/mds_client.c
+@@ -3983,6 +3983,16 @@ static struct ceph_auth_handshake *get_a
+       return auth;
+ }
+ 
++static int add_authorizer_challenge(struct ceph_connection *con,
++                                  void *challenge_buf, int challenge_buf_len)
++{
++      struct ceph_mds_session *s = con->private;
++      struct ceph_mds_client *mdsc = s->s_mdsc;
++      struct ceph_auth_client *ac = mdsc->fsc->client->monc.auth;
++
++      return ceph_auth_add_authorizer_challenge(ac, s->s_auth.authorizer,
++                                          challenge_buf, challenge_buf_len);
++}
+ 
+ static int verify_authorizer_reply(struct ceph_connection *con)
+ {
+@@ -4046,6 +4056,7 @@ static const struct ceph_connection_oper
+       .put = con_put,
+       .dispatch = dispatch,
+       .get_authorizer = get_authorizer,
++      .add_authorizer_challenge = add_authorizer_challenge,
+       .verify_authorizer_reply = verify_authorizer_reply,
+       .invalidate_authorizer = invalidate_authorizer,
+       .peer_reset = peer_reset,
+--- a/include/linux/ceph/auth.h
++++ b/include/linux/ceph/auth.h
+@@ -63,6 +63,10 @@ struct ceph_auth_client_ops {
+       /* ensure that an existing authorizer is up to date */
+       int (*update_authorizer)(struct ceph_auth_client *ac, int peer_type,
+                                struct ceph_auth_handshake *auth);
++      int (*add_authorizer_challenge)(struct ceph_auth_client *ac,
++                                      struct ceph_authorizer *a,
++                                      void *challenge_buf,
++                                      int challenge_buf_len);
+       int (*verify_authorizer_reply)(struct ceph_auth_client *ac,
+                                      struct ceph_authorizer *a);
+       void (*invalidate_authorizer)(struct ceph_auth_client *ac,
+@@ -117,6 +121,10 @@ void ceph_auth_destroy_authorizer(struct
+ extern int ceph_auth_update_authorizer(struct ceph_auth_client *ac,
+                                      int peer_type,
+                                      struct ceph_auth_handshake *a);
++int ceph_auth_add_authorizer_challenge(struct ceph_auth_client *ac,
++                                     struct ceph_authorizer *a,
++                                     void *challenge_buf,
++                                     int challenge_buf_len);
+ extern int ceph_auth_verify_authorizer_reply(struct ceph_auth_client *ac,
+                                            struct ceph_authorizer *a);
+ extern void ceph_auth_invalidate_authorizer(struct ceph_auth_client *ac,
+--- a/include/linux/ceph/messenger.h
++++ b/include/linux/ceph/messenger.h
+@@ -30,6 +30,9 @@ struct ceph_connection_operations {
+       struct ceph_auth_handshake *(*get_authorizer) (
+                               struct ceph_connection *con,
+                              int *proto, int force_new);
++      int (*add_authorizer_challenge)(struct ceph_connection *con,
++                                      void *challenge_buf,
++                                      int challenge_buf_len);
+       int (*verify_authorizer_reply) (struct ceph_connection *con);
+       int (*invalidate_authorizer)(struct ceph_connection *con);
+ 
+--- a/include/linux/ceph/msgr.h
++++ b/include/linux/ceph/msgr.h
+@@ -90,7 +90,7 @@ struct ceph_entity_inst {
+ #define CEPH_MSGR_TAG_SEQ           13 /* 64-bit int follows with seen seq number */
+ #define CEPH_MSGR_TAG_KEEPALIVE2    14 /* keepalive2 byte + ceph_timespec */
+ #define CEPH_MSGR_TAG_KEEPALIVE2_ACK 15 /* keepalive2 reply */
+-
++#define CEPH_MSGR_TAG_CHALLENGE_AUTHORIZER 16  /* cephx v2 doing server challenge */
+ 
+ /*
+  * connection negotiation
+--- a/net/ceph/auth.c
++++ b/net/ceph/auth.c
+@@ -314,6 +314,22 @@ int ceph_auth_update_authorizer(struct c
+ }
+ EXPORT_SYMBOL(ceph_auth_update_authorizer);
+ 
++int ceph_auth_add_authorizer_challenge(struct ceph_auth_client *ac,
++                                     struct ceph_authorizer *a,
++                                     void *challenge_buf,
++                                     int challenge_buf_len)
++{
++      int ret = 0;
++
++      mutex_lock(&ac->mutex);
++      if (ac->ops && ac->ops->add_authorizer_challenge)
++              ret = ac->ops->add_authorizer_challenge(ac, a, challenge_buf,
++                                                      challenge_buf_len);
++      mutex_unlock(&ac->mutex);
++      return ret;
++}
++EXPORT_SYMBOL(ceph_auth_add_authorizer_challenge);
++
+ int ceph_auth_verify_authorizer_reply(struct ceph_auth_client *ac,
+                                     struct ceph_authorizer *a)
+ {
+--- a/net/ceph/auth_x.c
++++ b/net/ceph/auth_x.c
+@@ -291,7 +291,8 @@ bad:
+  * authorizer.  The first part (ceph_x_authorize_a) should already be
+  * encoded.
+  */
+-static int encrypt_authorizer(struct ceph_x_authorizer *au)
++static int encrypt_authorizer(struct ceph_x_authorizer *au,
++                            u64 *server_challenge)
+ {
+       struct ceph_x_authorize_a *msg_a;
+       struct ceph_x_authorize_b *msg_b;
+@@ -304,16 +305,28 @@ static int encrypt_authorizer(struct cep
+       end = au->buf->vec.iov_base + au->buf->vec.iov_len;
+ 
+       msg_b = p + ceph_x_encrypt_offset();
+-      msg_b->struct_v = 1;
++      msg_b->struct_v = 2;
+       msg_b->nonce = cpu_to_le64(au->nonce);
++      if (server_challenge) {
++              msg_b->have_challenge = 1;
++              msg_b->server_challenge_plus_one =
++                  cpu_to_le64(*server_challenge + 1);
++      } else {
++              msg_b->have_challenge = 0;
++              msg_b->server_challenge_plus_one = 0;
++      }
+ 
+       ret = ceph_x_encrypt(&au->session_key, p, end - p, sizeof(*msg_b));
+       if (ret < 0)
+               return ret;
+ 
+       p += ret;
+-      WARN_ON(p > end);
+-      au->buf->vec.iov_len = p - au->buf->vec.iov_base;
++      if (server_challenge) {
++              WARN_ON(p != end);
++      } else {
++              WARN_ON(p > end);
++              au->buf->vec.iov_len = p - au->buf->vec.iov_base;
++      }
+ 
+       return 0;
+ }
+@@ -378,7 +391,7 @@ static int ceph_x_build_authorizer(struc
+            le64_to_cpu(msg_a->ticket_blob.secret_id));
+ 
+       get_random_bytes(&au->nonce, sizeof(au->nonce));
+-      ret = encrypt_authorizer(au);
++      ret = encrypt_authorizer(au, NULL);
+       if (ret) {
+               pr_err("failed to encrypt authorizer: %d", ret);
+               goto out_au;
+@@ -660,6 +673,54 @@ static int ceph_x_update_authorizer(
+       return 0;
+ }
+ 
++static int decrypt_authorize_challenge(struct ceph_x_authorizer *au,
++                                     void *challenge_buf,
++                                     int challenge_buf_len,
++                                     u64 *server_challenge)
++{
++      struct ceph_x_authorize_challenge *ch =
++          challenge_buf + sizeof(struct ceph_x_encrypt_header);
++      int ret;
++
++      /* no leading len */
++      ret = __ceph_x_decrypt(&au->session_key, challenge_buf,
++                             challenge_buf_len);
++      if (ret < 0)
++              return ret;
++      if (ret < sizeof(*ch)) {
++              pr_err("bad size %d for ceph_x_authorize_challenge\n", ret);
++              return -EINVAL;
++      }
++
++      *server_challenge = le64_to_cpu(ch->server_challenge);
++      return 0;
++}
++
++static int ceph_x_add_authorizer_challenge(struct ceph_auth_client *ac,
++                                         struct ceph_authorizer *a,
++                                         void *challenge_buf,
++                                         int challenge_buf_len)
++{
++      struct ceph_x_authorizer *au = (void *)a;
++      u64 server_challenge;
++      int ret;
++
++      ret = decrypt_authorize_challenge(au, challenge_buf, challenge_buf_len,
++                                        &server_challenge);
++      if (ret) {
++              pr_err("failed to decrypt authorize challenge: %d", ret);
++              return ret;
++      }
++
++      ret = encrypt_authorizer(au, &server_challenge);
++      if (ret) {
++              pr_err("failed to encrypt authorizer w/ challenge: %d", ret);
++              return ret;
++      }
++
++      return 0;
++}
++
+ static int ceph_x_verify_authorizer_reply(struct ceph_auth_client *ac,
+                                         struct ceph_authorizer *a)
+ {
+@@ -812,6 +873,7 @@ static const struct ceph_auth_client_ops
+       .handle_reply = ceph_x_handle_reply,
+       .create_authorizer = ceph_x_create_authorizer,
+       .update_authorizer = ceph_x_update_authorizer,
++      .add_authorizer_challenge = ceph_x_add_authorizer_challenge,
+       .verify_authorizer_reply = ceph_x_verify_authorizer_reply,
+       .invalidate_authorizer = ceph_x_invalidate_authorizer,
+       .reset =  ceph_x_reset,
+--- a/net/ceph/auth_x_protocol.h
++++ b/net/ceph/auth_x_protocol.h
+@@ -69,6 +69,13 @@ struct ceph_x_authorize_a {
+ struct ceph_x_authorize_b {
+       __u8 struct_v;
+       __le64 nonce;
++      __u8 have_challenge;
++      __le64 server_challenge_plus_one;
++} __attribute__ ((packed));
++
++struct ceph_x_authorize_challenge {
++      __u8 struct_v;
++      __le64 server_challenge;
+ } __attribute__ ((packed));
+ 
+ struct ceph_x_authorize_reply {
+--- a/net/ceph/messenger.c
++++ b/net/ceph/messenger.c
+@@ -2037,9 +2037,24 @@ static int process_connect(struct ceph_c
+       if (con->auth) {
+               /*
+                * Any connection that defines ->get_authorizer()
+-               * should also define ->verify_authorizer_reply().
++               * should also define ->add_authorizer_challenge() and
++               * ->verify_authorizer_reply().
++               *
+                * See get_connect_authorizer().
+                */
++              if (con->in_reply.tag == CEPH_MSGR_TAG_CHALLENGE_AUTHORIZER) {
++                      ret = con->ops->add_authorizer_challenge(
++                                  con, con->auth->authorizer_reply_buf,
++                                  le32_to_cpu(con->in_reply.authorizer_len));
++                      if (ret < 0)
++                              return ret;
++
++                      con_out_kvec_reset(con);
++                      __prepare_write_connect(con);
++                      prepare_read_connect(con);
++                      return 0;
++              }
++
+               ret = con->ops->verify_authorizer_reply(con);
+               if (ret < 0) {
+                       con->error_msg = "bad authorize reply";
+--- a/net/ceph/osd_client.c
++++ b/net/ceph/osd_client.c
+@@ -4478,6 +4478,16 @@ static struct ceph_auth_handshake *get_a
+       return auth;
+ }
+ 
++static int add_authorizer_challenge(struct ceph_connection *con,
++                                  void *challenge_buf, int challenge_buf_len)
++{
++      struct ceph_osd *o = con->private;
++      struct ceph_osd_client *osdc = o->o_osdc;
++      struct ceph_auth_client *ac = osdc->client->monc.auth;
++
++      return ceph_auth_add_authorizer_challenge(ac, o->o_auth.authorizer,
++                                          challenge_buf, challenge_buf_len);
++}
+ 
+ static int verify_authorizer_reply(struct ceph_connection *con)
+ {
+@@ -4519,6 +4529,7 @@ static const struct ceph_connection_oper
+       .put = put_osd_con,
+       .dispatch = dispatch,
+       .get_authorizer = get_authorizer,
++      .add_authorizer_challenge = add_authorizer_challenge,
+       .verify_authorizer_reply = verify_authorizer_reply,
+       .invalidate_authorizer = invalidate_authorizer,
+       .alloc_msg = alloc_msg,
diff --git a/queue-4.9/libceph-check-authorizer-reply-challenge-length-before-reading.patch b/queue-4.9/libceph-check-authorizer-reply-challenge-length-before-reading.patch

new file mode 100644 (file)

index 0000000..6dd4915
--- /dev/null
+++ b/queue-4.9/libceph-check-authorizer-reply-challenge-length-before-reading.patch
@@ -0,0 +1,36 @@
+From foo@baz Thu Dec  6 15:27:31 CET 2018
+From: Ilya Dryomov <idryomov@gmail.com>
+Date: Fri, 27 Jul 2018 19:40:30 +0200
+Subject: libceph: check authorizer reply/challenge length before reading
+
+From: Ilya Dryomov <idryomov@gmail.com>
+
+commit 130f52f2b203aa0aec179341916ffb2e905f3afd upstream.
+
+Avoid scribbling over memory if the received reply/challenge is larger
+than the buffer supplied with the authorizer.
+
+Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
+Reviewed-by: Sage Weil <sage@redhat.com>
+Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ceph/messenger.c |    7 +++++++
+ 1 file changed, 7 insertions(+)
+
+--- a/net/ceph/messenger.c
++++ b/net/ceph/messenger.c
+@@ -1738,6 +1738,13 @@ static int read_partial_connect(struct c
+ 
+       if (con->auth) {
+               size = le32_to_cpu(con->in_reply.authorizer_len);
++              if (size > con->auth->authorizer_reply_buf_len) {
++                      pr_err("authorizer reply too big: %d > %zu\n", size,
++                             con->auth->authorizer_reply_buf_len);
++                      ret = -EINVAL;
++                      goto out;
++              }
++
+               end += size;
+               ret = read_partial(con, end, size,
+                                  con->auth->authorizer_reply_buf);
diff --git a/queue-4.9/libceph-drop-len-argument-of-verify_authorizer_reply.patch b/queue-4.9/libceph-drop-len-argument-of-verify_authorizer_reply.patch

new file mode 100644 (file)

index 0000000..fdbce84
--- /dev/null
+++ b/queue-4.9/libceph-drop-len-argument-of-verify_authorizer_reply.patch
@@ -0,0 +1,135 @@
+From foo@baz Thu Dec  6 15:27:31 CET 2018
+From: Ilya Dryomov <idryomov@gmail.com>
+Date: Fri, 2 Dec 2016 16:35:09 +0100
+Subject: libceph: drop len argument of *verify_authorizer_reply()
+
+From: Ilya Dryomov <idryomov@gmail.com>
+
+commit 0dde584882ade13dc9708d611fbf69b0ae8a9e48 upstream.
+
+The length of the reply is protocol-dependent - for cephx it's
+ceph_x_authorize_reply.  Nothing sensible can be passed from the
+messenger layer anyway.
+
+Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
+Reviewed-by: Sage Weil <sage@redhat.com>
+Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/ceph/mds_client.c           |    4 ++--
+ include/linux/ceph/auth.h      |    5 ++---
+ include/linux/ceph/messenger.h |    2 +-
+ net/ceph/auth.c                |    4 ++--
+ net/ceph/auth_x.c              |    2 +-
+ net/ceph/messenger.c           |    2 +-
+ net/ceph/osd_client.c          |    4 ++--
+ 7 files changed, 11 insertions(+), 12 deletions(-)
+
+--- a/fs/ceph/mds_client.c
++++ b/fs/ceph/mds_client.c
+@@ -3984,13 +3984,13 @@ static struct ceph_auth_handshake *get_a
+ }
+ 
+ 
+-static int verify_authorizer_reply(struct ceph_connection *con, int len)
++static int verify_authorizer_reply(struct ceph_connection *con)
+ {
+       struct ceph_mds_session *s = con->private;
+       struct ceph_mds_client *mdsc = s->s_mdsc;
+       struct ceph_auth_client *ac = mdsc->fsc->client->monc.auth;
+ 
+-      return ceph_auth_verify_authorizer_reply(ac, s->s_auth.authorizer, len);
++      return ceph_auth_verify_authorizer_reply(ac, s->s_auth.authorizer);
+ }
+ 
+ static int invalidate_authorizer(struct ceph_connection *con)
+--- a/include/linux/ceph/auth.h
++++ b/include/linux/ceph/auth.h
+@@ -64,7 +64,7 @@ struct ceph_auth_client_ops {
+       int (*update_authorizer)(struct ceph_auth_client *ac, int peer_type,
+                                struct ceph_auth_handshake *auth);
+       int (*verify_authorizer_reply)(struct ceph_auth_client *ac,
+-                                     struct ceph_authorizer *a, size_t len);
++                                     struct ceph_authorizer *a);
+       void (*invalidate_authorizer)(struct ceph_auth_client *ac,
+                                     int peer_type);
+ 
+@@ -118,8 +118,7 @@ extern int ceph_auth_update_authorizer(s
+                                      int peer_type,
+                                      struct ceph_auth_handshake *a);
+ extern int ceph_auth_verify_authorizer_reply(struct ceph_auth_client *ac,
+-                                           struct ceph_authorizer *a,
+-                                           size_t len);
++                                           struct ceph_authorizer *a);
+ extern void ceph_auth_invalidate_authorizer(struct ceph_auth_client *ac,
+                                           int peer_type);
+ 
+--- a/include/linux/ceph/messenger.h
++++ b/include/linux/ceph/messenger.h
+@@ -30,7 +30,7 @@ struct ceph_connection_operations {
+       struct ceph_auth_handshake *(*get_authorizer) (
+                               struct ceph_connection *con,
+                              int *proto, int force_new);
+-      int (*verify_authorizer_reply) (struct ceph_connection *con, int len);
++      int (*verify_authorizer_reply) (struct ceph_connection *con);
+       int (*invalidate_authorizer)(struct ceph_connection *con);
+ 
+       /* there was some error on the socket (disconnect, whatever) */
+--- a/net/ceph/auth.c
++++ b/net/ceph/auth.c
+@@ -315,13 +315,13 @@ int ceph_auth_update_authorizer(struct c
+ EXPORT_SYMBOL(ceph_auth_update_authorizer);
+ 
+ int ceph_auth_verify_authorizer_reply(struct ceph_auth_client *ac,
+-                                    struct ceph_authorizer *a, size_t len)
++                                    struct ceph_authorizer *a)
+ {
+       int ret = 0;
+ 
+       mutex_lock(&ac->mutex);
+       if (ac->ops && ac->ops->verify_authorizer_reply)
+-              ret = ac->ops->verify_authorizer_reply(ac, a, len);
++              ret = ac->ops->verify_authorizer_reply(ac, a);
+       mutex_unlock(&ac->mutex);
+       return ret;
+ }
+--- a/net/ceph/auth_x.c
++++ b/net/ceph/auth_x.c
+@@ -623,7 +623,7 @@ static int ceph_x_update_authorizer(
+ }
+ 
+ static int ceph_x_verify_authorizer_reply(struct ceph_auth_client *ac,
+-                                        struct ceph_authorizer *a, size_t len)
++                                        struct ceph_authorizer *a)
+ {
+       struct ceph_x_authorizer *au = (void *)a;
+       void *p = au->enc_buf;
+--- a/net/ceph/messenger.c
++++ b/net/ceph/messenger.c
+@@ -2045,7 +2045,7 @@ static int process_connect(struct ceph_c
+                * should also define ->verify_authorizer_reply().
+                * See get_connect_authorizer().
+                */
+-              ret = con->ops->verify_authorizer_reply(con, 0);
++              ret = con->ops->verify_authorizer_reply(con);
+               if (ret < 0) {
+                       con->error_msg = "bad authorize reply";
+                       return ret;
+--- a/net/ceph/osd_client.c
++++ b/net/ceph/osd_client.c
+@@ -4479,13 +4479,13 @@ static struct ceph_auth_handshake *get_a
+ }
+ 
+ 
+-static int verify_authorizer_reply(struct ceph_connection *con, int len)
++static int verify_authorizer_reply(struct ceph_connection *con)
+ {
+       struct ceph_osd *o = con->private;
+       struct ceph_osd_client *osdc = o->o_osdc;
+       struct ceph_auth_client *ac = osdc->client->monc.auth;
+ 
+-      return ceph_auth_verify_authorizer_reply(ac, o->o_auth.authorizer, len);
++      return ceph_auth_verify_authorizer_reply(ac, o->o_auth.authorizer);
+ }
+ 
+ static int invalidate_authorizer(struct ceph_connection *con)
diff --git a/queue-4.9/libceph-factor-out-__ceph_x_decrypt.patch b/queue-4.9/libceph-factor-out-__ceph_x_decrypt.patch

new file mode 100644 (file)

index 0000000..8c7634e
--- /dev/null
+++ b/queue-4.9/libceph-factor-out-__ceph_x_decrypt.patch
@@ -0,0 +1,75 @@
+From foo@baz Thu Dec  6 15:27:31 CET 2018
+From: Ilya Dryomov <idryomov@gmail.com>
+Date: Thu, 26 Jul 2018 18:05:43 +0200
+Subject: libceph: factor out __ceph_x_decrypt()
+
+From: Ilya Dryomov <idryomov@gmail.com>
+
+commit c571fe24d243bfe7017f0e67fe800b3cc2a1d1f7 upstream.
+
+Will be used for decrypting the server challenge which is only preceded
+by ceph_x_encrypt_header.
+
+Drop struct_v check to allow for extending ceph_x_encrypt_header in the
+future.
+
+Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
+Reviewed-by: Sage Weil <sage@redhat.com>
+Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ceph/auth_x.c |   33 ++++++++++++++++++++++++---------
+ 1 file changed, 24 insertions(+), 9 deletions(-)
+
+--- a/net/ceph/auth_x.c
++++ b/net/ceph/auth_x.c
+@@ -69,25 +69,40 @@ static int ceph_x_encrypt(struct ceph_cr
+       return sizeof(u32) + ciphertext_len;
+ }
+ 
++static int __ceph_x_decrypt(struct ceph_crypto_key *secret, void *p,
++                          int ciphertext_len)
++{
++      struct ceph_x_encrypt_header *hdr = p;
++      int plaintext_len;
++      int ret;
++
++      ret = ceph_crypt(secret, false, p, ciphertext_len, ciphertext_len,
++                       &plaintext_len);
++      if (ret)
++              return ret;
++
++      if (le64_to_cpu(hdr->magic) != CEPHX_ENC_MAGIC) {
++              pr_err("%s bad magic\n", __func__);
++              return -EINVAL;
++      }
++
++      return plaintext_len - sizeof(*hdr);
++}
++
+ static int ceph_x_decrypt(struct ceph_crypto_key *secret, void **p, void *end)
+ {
+-      struct ceph_x_encrypt_header *hdr = *p + sizeof(u32);
+-      int ciphertext_len, plaintext_len;
++      int ciphertext_len;
+       int ret;
+ 
+       ceph_decode_32_safe(p, end, ciphertext_len, e_inval);
+       ceph_decode_need(p, end, ciphertext_len, e_inval);
+ 
+-      ret = ceph_crypt(secret, false, *p, end - *p, ciphertext_len,
+-                       &plaintext_len);
+-      if (ret)
++      ret = __ceph_x_decrypt(secret, *p, ciphertext_len);
++      if (ret < 0)
+               return ret;
+ 
+-      if (hdr->struct_v != 1 || le64_to_cpu(hdr->magic) != CEPHX_ENC_MAGIC)
+-              return -EPERM;
+-
+       *p += ciphertext_len;
+-      return plaintext_len - sizeof(struct ceph_x_encrypt_header);
++      return ret;
+ 
+ e_inval:
+       return -EINVAL;
diff --git a/queue-4.9/libceph-factor-out-__prepare_write_connect.patch b/queue-4.9/libceph-factor-out-__prepare_write_connect.patch

new file mode 100644 (file)

index 0000000..afd80b0
--- /dev/null
+++ b/queue-4.9/libceph-factor-out-__prepare_write_connect.patch
@@ -0,0 +1,57 @@
+From foo@baz Thu Dec  6 15:27:31 CET 2018
+From: Ilya Dryomov <idryomov@gmail.com>
+Date: Thu, 26 Jul 2018 17:43:47 +0200
+Subject: libceph: factor out __prepare_write_connect()
+
+From: Ilya Dryomov <idryomov@gmail.com>
+
+commit c0f56b483aa09c99bfe97409a43ad786f33b8a5a upstream.
+
+Will be used for sending ceph_msg_connect with an updated authorizer,
+after the server challenges the initial authorizer.
+
+Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
+Reviewed-by: Sage Weil <sage@redhat.com>
+Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ceph/messenger.c |   21 ++++++++++++---------
+ 1 file changed, 12 insertions(+), 9 deletions(-)
+
+--- a/net/ceph/messenger.c
++++ b/net/ceph/messenger.c
+@@ -1429,6 +1429,17 @@ static void prepare_write_banner(struct
+       con_flag_set(con, CON_FLAG_WRITE_PENDING);
+ }
+ 
++static void __prepare_write_connect(struct ceph_connection *con)
++{
++      con_out_kvec_add(con, sizeof(con->out_connect), &con->out_connect);
++      if (con->auth)
++              con_out_kvec_add(con, con->auth->authorizer_buf_len,
++                               con->auth->authorizer_buf);
++
++      con->out_more = 0;
++      con_flag_set(con, CON_FLAG_WRITE_PENDING);
++}
++
+ static int prepare_write_connect(struct ceph_connection *con)
+ {
+       unsigned int global_seq = get_global_seq(con->msgr, 0);
+@@ -1464,15 +1475,7 @@ static int prepare_write_connect(struct
+       if (ret)
+               return ret;
+ 
+-      con_out_kvec_add(con, sizeof (con->out_connect),
+-                                      &con->out_connect);
+-      if (con->auth)
+-              con_out_kvec_add(con, con->auth->authorizer_buf_len,
+-                               con->auth->authorizer_buf);
+-
+-      con->out_more = 0;
+-      con_flag_set(con, CON_FLAG_WRITE_PENDING);
+-
++      __prepare_write_connect(con);
+       return 0;
+ }
+ 
diff --git a/queue-4.9/libceph-factor-out-encrypt_authorizer.patch b/queue-4.9/libceph-factor-out-encrypt_authorizer.patch

new file mode 100644 (file)

index 0000000..4bf62a7
--- /dev/null
+++ b/queue-4.9/libceph-factor-out-encrypt_authorizer.patch
@@ -0,0 +1,94 @@
+From foo@baz Thu Dec  6 15:27:31 CET 2018
+From: Ilya Dryomov <idryomov@gmail.com>
+Date: Fri, 27 Jul 2018 16:37:54 +0200
+Subject: libceph: factor out encrypt_authorizer()
+
+From: Ilya Dryomov <idryomov@gmail.com>
+
+commit 149cac4a50b0b4081b38b2f38de6ef71c27eaa85 upstream.
+
+Will be used for encrypting both the initial and updated authorizers.
+
+Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
+Reviewed-by: Sage Weil <sage@redhat.com>
+Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ceph/auth_x.c |   49 ++++++++++++++++++++++++++++++++++++-------------
+ 1 file changed, 36 insertions(+), 13 deletions(-)
+
+--- a/net/ceph/auth_x.c
++++ b/net/ceph/auth_x.c
+@@ -286,6 +286,38 @@ bad:
+       return -EINVAL;
+ }
+ 
++/*
++ * Encode and encrypt the second part (ceph_x_authorize_b) of the
++ * authorizer.  The first part (ceph_x_authorize_a) should already be
++ * encoded.
++ */
++static int encrypt_authorizer(struct ceph_x_authorizer *au)
++{
++      struct ceph_x_authorize_a *msg_a;
++      struct ceph_x_authorize_b *msg_b;
++      void *p, *end;
++      int ret;
++
++      msg_a = au->buf->vec.iov_base;
++      WARN_ON(msg_a->ticket_blob.secret_id != cpu_to_le64(au->secret_id));
++      p = (void *)(msg_a + 1) + le32_to_cpu(msg_a->ticket_blob.blob_len);
++      end = au->buf->vec.iov_base + au->buf->vec.iov_len;
++
++      msg_b = p + ceph_x_encrypt_offset();
++      msg_b->struct_v = 1;
++      msg_b->nonce = cpu_to_le64(au->nonce);
++
++      ret = ceph_x_encrypt(&au->session_key, p, end - p, sizeof(*msg_b));
++      if (ret < 0)
++              return ret;
++
++      p += ret;
++      WARN_ON(p > end);
++      au->buf->vec.iov_len = p - au->buf->vec.iov_base;
++
++      return 0;
++}
++
+ static void ceph_x_authorizer_cleanup(struct ceph_x_authorizer *au)
+ {
+       ceph_crypto_key_destroy(&au->session_key);
+@@ -302,7 +334,6 @@ static int ceph_x_build_authorizer(struc
+       int maxlen;
+       struct ceph_x_authorize_a *msg_a;
+       struct ceph_x_authorize_b *msg_b;
+-      void *p, *end;
+       int ret;
+       int ticket_blob_len =
+               (th->ticket_blob ? th->ticket_blob->vec.iov_len : 0);
+@@ -346,21 +377,13 @@ static int ceph_x_build_authorizer(struc
+       dout(" th %p secret_id %lld %lld\n", th, th->secret_id,
+            le64_to_cpu(msg_a->ticket_blob.secret_id));
+ 
+-      p = msg_a + 1;
+-      p += ticket_blob_len;
+-      end = au->buf->vec.iov_base + au->buf->vec.iov_len;
+-
+-      msg_b = p + ceph_x_encrypt_offset();
+-      msg_b->struct_v = 1;
+       get_random_bytes(&au->nonce, sizeof(au->nonce));
+-      msg_b->nonce = cpu_to_le64(au->nonce);
+-      ret = ceph_x_encrypt(&au->session_key, p, end - p, sizeof(*msg_b));
+-      if (ret < 0)
++      ret = encrypt_authorizer(au);
++      if (ret) {
++              pr_err("failed to encrypt authorizer: %d", ret);
+               goto out_au;
++      }
+ 
+-      p += ret;
+-      WARN_ON(p > end);
+-      au->buf->vec.iov_len = p - au->buf->vec.iov_base;
+       dout(" built authorizer nonce %llx len %d\n", au->nonce,
+            (int)au->buf->vec.iov_len);
+       return 0;
diff --git a/queue-4.9/libceph-implement-cephx_v2-calculation-mode.patch b/queue-4.9/libceph-implement-cephx_v2-calculation-mode.patch

new file mode 100644 (file)

index 0000000..e0f4f11
--- /dev/null
+++ b/queue-4.9/libceph-implement-cephx_v2-calculation-mode.patch
@@ -0,0 +1,142 @@
+From foo@baz Thu Dec  6 15:27:31 CET 2018
+From: Ilya Dryomov <idryomov@gmail.com>
+Date: Fri, 27 Jul 2018 19:25:32 +0200
+Subject: libceph: implement CEPHX_V2 calculation mode
+
+From: Ilya Dryomov <idryomov@gmail.com>
+
+commit cc255c76c70f7a87d97939621eae04b600d9f4a1 upstream.
+
+Derive the signature from the entire buffer (both AES cipher blocks)
+instead of using just the first half of the first block, leaving out
+data_crc entirely.
+
+This addresses CVE-2018-1129.
+
+Link: http://tracker.ceph.com/issues/24837
+Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
+Reviewed-by: Sage Weil <sage@redhat.com>
+[bwh: Backported to 4.9:
+ - Define and test the feature bit in the old way
+ - Don't change any other feature bits in ceph_features.h]
+Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/ceph/ceph_features.h |    4 +
+ net/ceph/auth_x.c                  |   77 +++++++++++++++++++++++++++----------
+ 2 files changed, 61 insertions(+), 20 deletions(-)
+
+--- a/include/linux/ceph/ceph_features.h
++++ b/include/linux/ceph/ceph_features.h
+@@ -76,6 +76,7 @@
+ // duplicated since it was introduced at the same time as CEPH_FEATURE_CRUSH_TUNABLES5
+ #define CEPH_FEATURE_NEW_OSDOPREPLY_ENCODING   (1ULL<<58) /* New, v7 encoding */
+ #define CEPH_FEATURE_FS_FILE_LAYOUT_V2       (1ULL<<58) /* file_layout_t */
++#define CEPH_FEATURE_CEPHX_V2 (1ULL<<61) // *do not share this bit*
+ 
+ /*
+  * The introduction of CEPH_FEATURE_OSD_SNAPMAPPER caused the feature
+@@ -124,7 +125,8 @@ static inline u64 ceph_sanitize_features
+        CEPH_FEATURE_MSGR_KEEPALIVE2 |         \
+        CEPH_FEATURE_CRUSH_V4 |                \
+        CEPH_FEATURE_CRUSH_TUNABLES5 |         \
+-       CEPH_FEATURE_NEW_OSDOPREPLY_ENCODING)
++       CEPH_FEATURE_NEW_OSDOPREPLY_ENCODING | \
++       CEPH_FEATURE_CEPHX_V2)
+ 
+ #define CEPH_FEATURES_REQUIRED_DEFAULT   \
+       (CEPH_FEATURE_NOSRCADDR |        \
+--- a/net/ceph/auth_x.c
++++ b/net/ceph/auth_x.c
+@@ -8,6 +8,7 @@
+ 
+ #include <linux/ceph/decode.h>
+ #include <linux/ceph/auth.h>
++#include <linux/ceph/ceph_features.h>
+ #include <linux/ceph/libceph.h>
+ #include <linux/ceph/messenger.h>
+ 
+@@ -799,26 +800,64 @@ static int calc_signature(struct ceph_x_
+                         __le64 *psig)
+ {
+       void *enc_buf = au->enc_buf;
+-      struct {
+-              __le32 len;
+-              __le32 header_crc;
+-              __le32 front_crc;
+-              __le32 middle_crc;
+-              __le32 data_crc;
+-      } __packed *sigblock = enc_buf + ceph_x_encrypt_offset();
+-      int ret;
+-
+-      sigblock->len = cpu_to_le32(4*sizeof(u32));
+-      sigblock->header_crc = msg->hdr.crc;
+-      sigblock->front_crc = msg->footer.front_crc;
+-      sigblock->middle_crc = msg->footer.middle_crc;
+-      sigblock->data_crc =  msg->footer.data_crc;
+-      ret = ceph_x_encrypt(&au->session_key, enc_buf, CEPHX_AU_ENC_BUF_LEN,
+-                           sizeof(*sigblock));
+-      if (ret < 0)
+-              return ret;
++      int ret;
++
++      if (msg->con->peer_features & CEPH_FEATURE_CEPHX_V2) {
++              struct {
++                      __le32 len;
++                      __le32 header_crc;
++                      __le32 front_crc;
++                      __le32 middle_crc;
++                      __le32 data_crc;
++              } __packed *sigblock = enc_buf + ceph_x_encrypt_offset();
++
++              sigblock->len = cpu_to_le32(4*sizeof(u32));
++              sigblock->header_crc = msg->hdr.crc;
++              sigblock->front_crc = msg->footer.front_crc;
++              sigblock->middle_crc = msg->footer.middle_crc;
++              sigblock->data_crc =  msg->footer.data_crc;
++
++              ret = ceph_x_encrypt(&au->session_key, enc_buf,
++                                   CEPHX_AU_ENC_BUF_LEN, sizeof(*sigblock));
++              if (ret < 0)
++                      return ret;
++
++              *psig = *(__le64 *)(enc_buf + sizeof(u32));
++      } else {
++              struct {
++                      __le32 header_crc;
++                      __le32 front_crc;
++                      __le32 front_len;
++                      __le32 middle_crc;
++                      __le32 middle_len;
++                      __le32 data_crc;
++                      __le32 data_len;
++                      __le32 seq_lower_word;
++              } __packed *sigblock = enc_buf;
++              struct {
++                      __le64 a, b, c, d;
++              } __packed *penc = enc_buf;
++              int ciphertext_len;
++
++              sigblock->header_crc = msg->hdr.crc;
++              sigblock->front_crc = msg->footer.front_crc;
++              sigblock->front_len = msg->hdr.front_len;
++              sigblock->middle_crc = msg->footer.middle_crc;
++              sigblock->middle_len = msg->hdr.middle_len;
++              sigblock->data_crc =  msg->footer.data_crc;
++              sigblock->data_len = msg->hdr.data_len;
++              sigblock->seq_lower_word = *(__le32 *)&msg->hdr.seq;
++
++              /* no leading len, no ceph_x_encrypt_header */
++              ret = ceph_crypt(&au->session_key, true, enc_buf,
++                               CEPHX_AU_ENC_BUF_LEN, sizeof(*sigblock),
++                               &ciphertext_len);
++              if (ret)
++                      return ret;
++
++              *psig = penc->a ^ penc->b ^ penc->c ^ penc->d;
++      }
+ 
+-      *psig = *(__le64 *)(enc_buf + sizeof(u32));
+       return 0;
+ }
+ 
diff --git a/queue-4.9/libceph-no-need-to-drop-con-mutex-for-get_authorizer.patch b/queue-4.9/libceph-no-need-to-drop-con-mutex-for-get_authorizer.patch

new file mode 100644 (file)

index 0000000..edd4111
--- /dev/null
+++ b/queue-4.9/libceph-no-need-to-drop-con-mutex-for-get_authorizer.patch
@@ -0,0 +1,39 @@
+From foo@baz Thu Dec  6 15:27:31 CET 2018
+From: Ilya Dryomov <idryomov@gmail.com>
+Date: Fri, 2 Dec 2016 16:35:09 +0100
+Subject: libceph: no need to drop con->mutex for ->get_authorizer()
+
+From: Ilya Dryomov <idryomov@gmail.com>
+
+commit b3bbd3f2ab19c8ca319003b4b51ce4c4ca74da06 upstream.
+
+->get_authorizer(), ->verify_authorizer_reply(), ->sign_message() and
+->check_message_signature() shouldn't be doing anything with or on the
+connection (like closing it or sending messages).
+
+Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
+Reviewed-by: Sage Weil <sage@redhat.com>
+Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ceph/messenger.c |    6 ------
+ 1 file changed, 6 deletions(-)
+
+--- a/net/ceph/messenger.c
++++ b/net/ceph/messenger.c
+@@ -1405,15 +1405,9 @@ static struct ceph_auth_handshake *get_c
+               return NULL;
+       }
+ 
+-      /* Can't hold the mutex while getting authorizer */
+-      mutex_unlock(&con->mutex);
+       auth = con->ops->get_authorizer(con, auth_proto, con->auth_retry);
+-      mutex_lock(&con->mutex);
+-
+       if (IS_ERR(auth))
+               return auth;
+-      if (con->state != CON_STATE_NEGOTIATING)
+-              return ERR_PTR(-EAGAIN);
+ 
+       con->auth_reply_buf = auth->authorizer_reply_buf;
+       con->auth_reply_buf_len = auth->authorizer_reply_buf_len;
diff --git a/queue-4.9/libceph-store-ceph_auth_handshake-pointer-in-ceph_connection.patch b/queue-4.9/libceph-store-ceph_auth_handshake-pointer-in-ceph_connection.patch

new file mode 100644 (file)

index 0000000..157be77
--- /dev/null
+++ b/queue-4.9/libceph-store-ceph_auth_handshake-pointer-in-ceph_connection.patch
@@ -0,0 +1,147 @@
+From foo@baz Thu Dec  6 15:27:31 CET 2018
+From: Ilya Dryomov <idryomov@gmail.com>
+Date: Thu, 26 Jul 2018 15:17:46 +0200
+Subject: libceph: store ceph_auth_handshake pointer in ceph_connection
+
+From: Ilya Dryomov <idryomov@gmail.com>
+
+commit 262614c4294d33b1f19e0d18c0091d9c329b544a upstream.
+
+We already copy authorizer_reply_buf and authorizer_reply_buf_len into
+ceph_connection.  Factoring out __prepare_write_connect() requires two
+more: authorizer_buf and authorizer_buf_len.  Store the pointer to the
+handshake in con->auth rather than piling on.
+
+Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
+Reviewed-by: Sage Weil <sage@redhat.com>
+Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/ceph/messenger.h |    3 --
+ net/ceph/messenger.c           |   54 +++++++++++++++++++----------------------
+ 2 files changed, 27 insertions(+), 30 deletions(-)
+
+--- a/include/linux/ceph/messenger.h
++++ b/include/linux/ceph/messenger.h
+@@ -200,9 +200,8 @@ struct ceph_connection {
+                                attempt for this connection, client */
+       u32 peer_global_seq;  /* peer's global seq for this connection */
+ 
++      struct ceph_auth_handshake *auth;
+       int auth_retry;       /* true if we need a newer authorizer */
+-      void *auth_reply_buf;   /* where to put the authorizer reply */
+-      int auth_reply_buf_len;
+ 
+       struct mutex mutex;
+ 
+--- a/net/ceph/messenger.c
++++ b/net/ceph/messenger.c
+@@ -1394,24 +1394,26 @@ static void prepare_write_keepalive(stru
+  * Connection negotiation.
+  */
+ 
+-static struct ceph_auth_handshake *get_connect_authorizer(struct ceph_connection *con,
+-                                              int *auth_proto)
++static int get_connect_authorizer(struct ceph_connection *con)
+ {
+       struct ceph_auth_handshake *auth;
++      int auth_proto;
+ 
+       if (!con->ops->get_authorizer) {
++              con->auth = NULL;
+               con->out_connect.authorizer_protocol = CEPH_AUTH_UNKNOWN;
+               con->out_connect.authorizer_len = 0;
+-              return NULL;
++              return 0;
+       }
+ 
+-      auth = con->ops->get_authorizer(con, auth_proto, con->auth_retry);
++      auth = con->ops->get_authorizer(con, &auth_proto, con->auth_retry);
+       if (IS_ERR(auth))
+-              return auth;
++              return PTR_ERR(auth);
+ 
+-      con->auth_reply_buf = auth->authorizer_reply_buf;
+-      con->auth_reply_buf_len = auth->authorizer_reply_buf_len;
+-      return auth;
++      con->auth = auth;
++      con->out_connect.authorizer_protocol = cpu_to_le32(auth_proto);
++      con->out_connect.authorizer_len = cpu_to_le32(auth->authorizer_buf_len);
++      return 0;
+ }
+ 
+ /*
+@@ -1431,8 +1433,7 @@ static int prepare_write_connect(struct
+ {
+       unsigned int global_seq = get_global_seq(con->msgr, 0);
+       int proto;
+-      int auth_proto;
+-      struct ceph_auth_handshake *auth;
++      int ret;
+ 
+       switch (con->peer_name.type) {
+       case CEPH_ENTITY_TYPE_MON:
+@@ -1459,20 +1460,15 @@ static int prepare_write_connect(struct
+       con->out_connect.protocol_version = cpu_to_le32(proto);
+       con->out_connect.flags = 0;
+ 
+-      auth_proto = CEPH_AUTH_UNKNOWN;
+-      auth = get_connect_authorizer(con, &auth_proto);
+-      if (IS_ERR(auth))
+-              return PTR_ERR(auth);
+-
+-      con->out_connect.authorizer_protocol = cpu_to_le32(auth_proto);
+-      con->out_connect.authorizer_len = auth ?
+-              cpu_to_le32(auth->authorizer_buf_len) : 0;
++      ret = get_connect_authorizer(con);
++      if (ret)
++              return ret;
+ 
+       con_out_kvec_add(con, sizeof (con->out_connect),
+                                       &con->out_connect);
+-      if (auth && auth->authorizer_buf_len)
+-              con_out_kvec_add(con, auth->authorizer_buf_len,
+-                                      auth->authorizer_buf);
++      if (con->auth)
++              con_out_kvec_add(con, con->auth->authorizer_buf_len,
++                               con->auth->authorizer_buf);
+ 
+       con->out_more = 0;
+       con_flag_set(con, CON_FLAG_WRITE_PENDING);
+@@ -1737,11 +1733,14 @@ static int read_partial_connect(struct c
+       if (ret <= 0)
+               goto out;
+ 
+-      size = le32_to_cpu(con->in_reply.authorizer_len);
+-      end += size;
+-      ret = read_partial(con, end, size, con->auth_reply_buf);
+-      if (ret <= 0)
+-              goto out;
++      if (con->auth) {
++              size = le32_to_cpu(con->in_reply.authorizer_len);
++              end += size;
++              ret = read_partial(con, end, size,
++                                 con->auth->authorizer_reply_buf);
++              if (ret <= 0)
++                      goto out;
++      }
+ 
+       dout("read_partial_connect %p tag %d, con_seq = %u, g_seq = %u\n",
+            con, (int)con->in_reply.tag,
+@@ -1749,7 +1748,6 @@ static int read_partial_connect(struct c
+            le32_to_cpu(con->in_reply.global_seq));
+ out:
+       return ret;
+-
+ }
+ 
+ /*
+@@ -2033,7 +2031,7 @@ static int process_connect(struct ceph_c
+ 
+       dout("process_connect on %p tag %d\n", con, (int)con->in_tag);
+ 
+-      if (con->auth_reply_buf) {
++      if (con->auth) {
+               /*
+                * Any connection that defines ->get_authorizer()
+                * should also define ->verify_authorizer_reply().
diff --git a/queue-4.9/libceph-weaken-sizeof-check-in-ceph_x_verify_authorizer_reply.patch b/queue-4.9/libceph-weaken-sizeof-check-in-ceph_x_verify_authorizer_reply.patch

new file mode 100644 (file)

index 0000000..5116a04
--- /dev/null
+++ b/queue-4.9/libceph-weaken-sizeof-check-in-ceph_x_verify_authorizer_reply.patch
@@ -0,0 +1,34 @@
+From foo@baz Thu Dec  6 15:27:31 CET 2018
+From: Ilya Dryomov <idryomov@gmail.com>
+Date: Fri, 27 Jul 2018 19:45:36 +0200
+Subject: libceph: weaken sizeof check in ceph_x_verify_authorizer_reply()
+
+From: Ilya Dryomov <idryomov@gmail.com>
+
+commit f1d10e04637924f2b00a0fecdd2ca4565f5cfc3f upstream.
+
+Allow for extending ceph_x_authorize_reply in the future.
+
+Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
+Reviewed-by: Sage Weil <sage@redhat.com>
+Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ceph/auth_x.c |    6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+--- a/net/ceph/auth_x.c
++++ b/net/ceph/auth_x.c
+@@ -733,8 +733,10 @@ static int ceph_x_verify_authorizer_repl
+       ret = ceph_x_decrypt(&au->session_key, &p, p + CEPHX_AU_ENC_BUF_LEN);
+       if (ret < 0)
+               return ret;
+-      if (ret != sizeof(*reply))
+-              return -EPERM;
++      if (ret < sizeof(*reply)) {
++              pr_err("bad size %d for ceph_x_authorize_reply\n", ret);
++              return -EINVAL;
++      }
+ 
+       if (au->nonce + 1 != le64_to_cpu(reply->nonce_plus_one))
+               ret = -EPERM;
diff --git a/queue-4.9/mm-hugetlb.c-don-t-call-region_abort-if-region_chg-fails.patch b/queue-4.9/mm-hugetlb.c-don-t-call-region_abort-if-region_chg-fails.patch

new file mode 100644 (file)

index 0000000..094b7fa
--- /dev/null
+++ b/queue-4.9/mm-hugetlb.c-don-t-call-region_abort-if-region_chg-fails.patch
@@ -0,0 +1,62 @@
+From foo@baz Thu Dec  6 15:27:31 CET 2018
+From: Mike Kravetz <mike.kravetz@oracle.com>
+Date: Fri, 31 Mar 2017 15:12:07 -0700
+Subject: mm/hugetlb.c: don't call region_abort if region_chg fails
+
+From: Mike Kravetz <mike.kravetz@oracle.com>
+
+commit ff8c0c53c47530ffea82c22a0a6df6332b56c957 upstream.
+
+Changes to hugetlbfs reservation maps is a two step process.  The first
+step is a call to region_chg to determine what needs to be changed, and
+prepare that change.  This should be followed by a call to call to
+region_add to commit the change, or region_abort to abort the change.
+
+The error path in hugetlb_reserve_pages called region_abort after a
+failed call to region_chg.  As a result, the adds_in_progress counter in
+the reservation map is off by 1.  This is caught by a VM_BUG_ON in
+resv_map_release when the reservation map is freed.
+
+syzkaller fuzzer (when using an injected kmalloc failure) found this
+bug, that resulted in the following:
+
+ kernel BUG at mm/hugetlb.c:742!
+ Call Trace:
+  hugetlbfs_evict_inode+0x7b/0xa0 fs/hugetlbfs/inode.c:493
+  evict+0x481/0x920 fs/inode.c:553
+  iput_final fs/inode.c:1515 [inline]
+  iput+0x62b/0xa20 fs/inode.c:1542
+  hugetlb_file_setup+0x593/0x9f0 fs/hugetlbfs/inode.c:1306
+  newseg+0x422/0xd30 ipc/shm.c:575
+  ipcget_new ipc/util.c:285 [inline]
+  ipcget+0x21e/0x580 ipc/util.c:639
+  SYSC_shmget ipc/shm.c:673 [inline]
+  SyS_shmget+0x158/0x230 ipc/shm.c:657
+  entry_SYSCALL_64_fastpath+0x1f/0xc2
+ RIP: resv_map_release+0x265/0x330 mm/hugetlb.c:742
+
+Link: http://lkml.kernel.org/r/1490821682-23228-1-git-send-email-mike.kravetz@oracle.com
+Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com>
+Reported-by: Dmitry Vyukov <dvyukov@google.com>
+Acked-by: Hillf Danton <hillf.zj@alibaba-inc.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/hugetlb.c |    4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/mm/hugetlb.c
++++ b/mm/hugetlb.c
+@@ -4259,7 +4259,9 @@ int hugetlb_reserve_pages(struct inode *
+       return 0;
+ out_err:
+       if (!vma || vma->vm_flags & VM_MAYSHARE)
+-              region_abort(resv_map, from, to);
++              /* Don't call region_abort if region_chg failed */
++              if (chg >= 0)
++                      region_abort(resv_map, from, to);
+       if (vma && is_vma_resv_set(vma, HPAGE_RESV_OWNER))
+               kref_put(&resv_map->refs, resv_map_release);
+       return ret;
diff --git a/queue-4.9/series b/queue-4.9/series

index 62e12da7bef357e698459e5278eeb3ff7fb19722..a9a7dedd47ef2165c04ec3874ee085b1356eb03a 100644 (file)
--- a/queue-4.9/series
+++ b/queue-4.9/series
@@ -45,3 +45,57 @@ mm-cleancache-fix-corruption-on-missed-inode-invalidation.patch
  usb-gadget-dummy-fix-nonsensical-comparisons.patch
  net-qed-use-correct-strncpy-size.patch
  tipc-use-destination-length-for-copy-string.patch
+libceph-drop-len-argument-of-verify_authorizer_reply.patch
+libceph-no-need-to-drop-con-mutex-for-get_authorizer.patch
+libceph-store-ceph_auth_handshake-pointer-in-ceph_connection.patch
+libceph-factor-out-__prepare_write_connect.patch
+libceph-factor-out-__ceph_x_decrypt.patch
+libceph-factor-out-encrypt_authorizer.patch
+libceph-add-authorizer-challenge.patch
+libceph-implement-cephx_v2-calculation-mode.patch
+libceph-weaken-sizeof-check-in-ceph_x_verify_authorizer_reply.patch
+libceph-check-authorizer-reply-challenge-length-before-reading.patch
+bpf-verifier-add-spi-variable-to-check_stack_write.patch
+bpf-verifier-pass-instruction-index-to-check_mem_access-and-check_xadd.patch
+bpf-prevent-memory-disambiguation-attack.patch
+wil6210-missing-length-check-in-wmi_set_ie.patch
+mm-hugetlb.c-don-t-call-region_abort-if-region_chg-fails.patch
+hugetlbfs-fix-offset-overflow-in-hugetlbfs-mmap.patch
+hugetlbfs-check-for-pgoff-value-overflow.patch
+btrfs-validate-type-when-reading-a-chunk.patch
+btrfs-verify-that-every-chunk-has-corresponding-block-group-at-mount-time.patch
+btrfs-refactor-check_leaf-function-for-later-expansion.patch
+btrfs-check-if-item-pointer-overlaps-with-the-item-itself.patch
+btrfs-add-sanity-check-for-extent_data-when-reading-out-leaf.patch
+btrfs-add-checker-for-extent_csum.patch
+btrfs-move-leaf-and-node-validation-checker-to-tree-checker.c.patch
+btrfs-struct-funcs-constify-readers.patch
+btrfs-tree-checker-enhance-btrfs_check_node-output.patch
+btrfs-tree-checker-fix-false-panic-for-sanity-test.patch
+btrfs-tree-checker-add-checker-for-dir-item.patch
+btrfs-tree-checker-use-zu-format-string-for-size_t.patch
+btrfs-tree-check-reduce-stack-consumption-in-check_dir_item.patch
+btrfs-tree-checker-verify-block_group_item.patch
+btrfs-tree-checker-detect-invalid-and-empty-essential-trees.patch
+btrfs-check-that-each-block-group-has-corresponding-chunk-at-mount-time.patch
+btrfs-tree-checker-check-level-for-leaves-and-nodes.patch
+btrfs-tree-checker-fix-misleading-group-system-information.patch
+f2fs-fix-a-panic-caused-by-null-flush_cmd_control.patch
+f2fs-fix-race-condition-in-between-free-nid-allocator-initializer.patch
+f2fs-detect-wrong-layout.patch
+f2fs-return-error-during-fill_super.patch
+f2fs-check-blkaddr-more-accuratly-before-issue-a-bio.patch
+f2fs-sanity-check-on-sit-entry.patch
+f2fs-enhance-sanity_check_raw_super-to-avoid-potential-overflow.patch
+f2fs-clean-up-with-is_valid_blkaddr.patch
+f2fs-introduce-and-spread-verify_blkaddr.patch
+f2fs-fix-to-do-sanity-check-with-secs_per_zone.patch
+f2fs-fix-to-do-sanity-check-with-user_block_count.patch
+f2fs-add-sanity_check_inode-function.patch
+f2fs-fix-to-do-sanity-check-with-node-footer-and-iblocks.patch
+f2fs-fix-to-do-sanity-check-with-block-address-in-main-area.patch
+f2fs-fix-missing-up_read.patch
+f2fs-fix-to-do-sanity-check-with-block-address-in-main-area-v2.patch
+f2fs-free-meta-pages-if-sanity-check-for-ckpt-is-failed.patch
+f2fs-fix-to-do-sanity-check-with-cp_pack_start_sum.patch
+xfs-don-t-fail-when-converting-shortform-attr-to-long-form-during-attr_replace.patch
diff --git a/queue-4.9/wil6210-missing-length-check-in-wmi_set_ie.patch b/queue-4.9/wil6210-missing-length-check-in-wmi_set_ie.patch

new file mode 100644 (file)

index 0000000..1132604
--- /dev/null
+++ b/queue-4.9/wil6210-missing-length-check-in-wmi_set_ie.patch
@@ -0,0 +1,39 @@
+From foo@baz Thu Dec  6 15:27:31 CET 2018
+From: Lior David <qca_liord@qca.qualcomm.com>
+Date: Tue, 14 Nov 2017 15:25:39 +0200
+Subject: wil6210: missing length check in wmi_set_ie
+
+From: Lior David <qca_liord@qca.qualcomm.com>
+
+commit b5a8ffcae4103a9d823ea3aa3a761f65779fbe2a upstream.
+
+Add a length check in wmi_set_ie to detect unsigned integer
+overflow.
+
+Signed-off-by: Lior David <qca_liord@qca.qualcomm.com>
+Signed-off-by: Maya Erez <qca_merez@qca.qualcomm.com>
+Signed-off-by: Kalle Valo <kvalo@qca.qualcomm.com>
+Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/wireless/ath/wil6210/wmi.c |    8 +++++++-
+ 1 file changed, 7 insertions(+), 1 deletion(-)
+
+--- a/drivers/net/wireless/ath/wil6210/wmi.c
++++ b/drivers/net/wireless/ath/wil6210/wmi.c
+@@ -1302,8 +1302,14 @@ int wmi_set_ie(struct wil6210_priv *wil,
+       };
+       int rc;
+       u16 len = sizeof(struct wmi_set_appie_cmd) + ie_len;
+-      struct wmi_set_appie_cmd *cmd = kzalloc(len, GFP_KERNEL);
++      struct wmi_set_appie_cmd *cmd;
+ 
++      if (len < ie_len) {
++              rc = -EINVAL;
++              goto out;
++      }
++
++      cmd = kzalloc(len, GFP_KERNEL);
+       if (!cmd) {
+               rc = -ENOMEM;
+               goto out;
diff --git a/queue-4.9/xfs-don-t-fail-when-converting-shortform-attr-to-long-form-during-attr_replace.patch b/queue-4.9/xfs-don-t-fail-when-converting-shortform-attr-to-long-form-during-attr_replace.patch

new file mode 100644 (file)

index 0000000..e56de00
--- /dev/null
+++ b/queue-4.9/xfs-don-t-fail-when-converting-shortform-attr-to-long-form-during-attr_replace.patch
@@ -0,0 +1,47 @@
+From foo@baz Thu Dec  6 15:27:31 CET 2018
+From: "Darrick J. Wong" <darrick.wong@oracle.com>
+Date: Tue, 17 Apr 2018 19:10:15 -0700
+Subject: xfs: don't fail when converting shortform attr to long form during ATTR_REPLACE
+
+From: "Darrick J. Wong" <darrick.wong@oracle.com>
+
+commit 7b38460dc8e4eafba06c78f8e37099d3b34d473c upstream.
+
+Kanda Motohiro reported that expanding a tiny xattr into a large xattr
+fails on XFS because we remove the tiny xattr from a shortform fork and
+then try to re-add it after converting the fork to extents format having
+not removed the ATTR_REPLACE flag.  This fails because the attr is no
+longer present, causing a fs shutdown.
+
+This is derived from the patch in his bug report, but we really
+shouldn't ignore a nonzero retval from the remove call.
+
+Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=199119
+Reported-by: kanda.motohiro@gmail.com
+Reviewed-by: Dave Chinner <dchinner@redhat.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_attr.c |    9 ++++++++-
+ 1 file changed, 8 insertions(+), 1 deletion(-)
+
+--- a/fs/xfs/libxfs/xfs_attr.c
++++ b/fs/xfs/libxfs/xfs_attr.c
+@@ -487,7 +487,14 @@ xfs_attr_shortform_addname(xfs_da_args_t
+               if (args->flags & ATTR_CREATE)
+                       return retval;
+               retval = xfs_attr_shortform_remove(args);
+-              ASSERT(retval == 0);
++              if (retval)
++                      return retval;
++              /*
++               * Since we have removed the old attr, clear ATTR_REPLACE so
++               * that the leaf format add routine won't trip over the attr
++               * not being around.
++               */
++              args->flags &= ~ATTR_REPLACE;
+       }
+ 
+       if (args->namelen >= XFS_ATTR_SF_ENTSIZE_MAX ||
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Thu, 6 Dec 2018 14:28:25 +0000 (15:28 +0100)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Thu, 6 Dec 2018 14:28:25 +0000 (15:28 +0100)
queue-4.9/bpf-prevent-memory-disambiguation-attack.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/bpf-verifier-add-spi-variable-to-check_stack_write.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/bpf-verifier-pass-instruction-index-to-check_mem_access-and-check_xadd.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/btrfs-add-checker-for-extent_csum.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/btrfs-add-sanity-check-for-extent_data-when-reading-out-leaf.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/btrfs-check-if-item-pointer-overlaps-with-the-item-itself.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/btrfs-check-that-each-block-group-has-corresponding-chunk-at-mount-time.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/btrfs-move-leaf-and-node-validation-checker-to-tree-checker.c.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/btrfs-refactor-check_leaf-function-for-later-expansion.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/btrfs-struct-funcs-constify-readers.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/btrfs-tree-check-reduce-stack-consumption-in-check_dir_item.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/btrfs-tree-checker-add-checker-for-dir-item.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/btrfs-tree-checker-check-level-for-leaves-and-nodes.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/btrfs-tree-checker-detect-invalid-and-empty-essential-trees.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/btrfs-tree-checker-enhance-btrfs_check_node-output.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/btrfs-tree-checker-fix-false-panic-for-sanity-test.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/btrfs-tree-checker-fix-misleading-group-system-information.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/btrfs-tree-checker-use-zu-format-string-for-size_t.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/btrfs-tree-checker-verify-block_group_item.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/btrfs-validate-type-when-reading-a-chunk.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/btrfs-verify-that-every-chunk-has-corresponding-block-group-at-mount-time.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/f2fs-add-sanity_check_inode-function.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/f2fs-check-blkaddr-more-accuratly-before-issue-a-bio.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/f2fs-clean-up-with-is_valid_blkaddr.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/f2fs-detect-wrong-layout.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/f2fs-enhance-sanity_check_raw_super-to-avoid-potential-overflow.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/f2fs-fix-a-panic-caused-by-null-flush_cmd_control.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/f2fs-fix-missing-up_read.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/f2fs-fix-race-condition-in-between-free-nid-allocator-initializer.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/f2fs-fix-to-do-sanity-check-with-block-address-in-main-area-v2.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/f2fs-fix-to-do-sanity-check-with-block-address-in-main-area.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/f2fs-fix-to-do-sanity-check-with-cp_pack_start_sum.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/f2fs-fix-to-do-sanity-check-with-node-footer-and-iblocks.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/f2fs-fix-to-do-sanity-check-with-secs_per_zone.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/f2fs-fix-to-do-sanity-check-with-user_block_count.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/f2fs-free-meta-pages-if-sanity-check-for-ckpt-is-failed.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/f2fs-introduce-and-spread-verify_blkaddr.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/f2fs-return-error-during-fill_super.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/f2fs-sanity-check-on-sit-entry.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/hugetlbfs-check-for-pgoff-value-overflow.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/hugetlbfs-fix-offset-overflow-in-hugetlbfs-mmap.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/libceph-add-authorizer-challenge.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/libceph-check-authorizer-reply-challenge-length-before-reading.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/libceph-drop-len-argument-of-verify_authorizer_reply.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/libceph-factor-out-__ceph_x_decrypt.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/libceph-factor-out-__prepare_write_connect.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/libceph-factor-out-encrypt_authorizer.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/libceph-implement-cephx_v2-calculation-mode.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/libceph-no-need-to-drop-con-mutex-for-get_authorizer.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/libceph-store-ceph_auth_handshake-pointer-in-ceph_connection.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/libceph-weaken-sizeof-check-in-ceph_x_verify_authorizer_reply.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/mm-hugetlb.c-don-t-call-region_abort-if-region_chg-fails.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/series		patch \| blob \| blame \| history
queue-4.9/wil6210-missing-length-check-in-wmi_set_ie.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/xfs-don-t-fail-when-converting-shortform-attr-to-long-form-during-attr_replace.patch	[new file with mode: 0644]	patch \| blob