5.4-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Mon, 6 Jan 2020 12:47:23 +0000 (13:47 +0100)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Mon, 6 Jan 2020 12:47:23 +0000 (13:47 +0100)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 6 Jan 2020 12:47:23 +0000 (13:47 +0100)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 6 Jan 2020 12:47:23 +0000 (13:47 +0100)
diff --git a/queue-5.4/block-fix-splitting-segments-on-boundary-masks.patch b/queue-5.4/block-fix-splitting-segments-on-boundary-masks.patch

new file mode 100644 (file)

index 0000000..fe44b7e
--- /dev/null
+++ b/queue-5.4/block-fix-splitting-segments-on-boundary-masks.patch
@@ -0,0 +1,86 @@
+From 429120f3df2dba2bf3a4a19f4212a53ecefc7102 Mon Sep 17 00:00:00 2001
+From: Ming Lei <ming.lei@redhat.com>
+Date: Sun, 29 Dec 2019 10:32:30 +0800
+Subject: block: fix splitting segments on boundary masks
+
+From: Ming Lei <ming.lei@redhat.com>
+
+commit 429120f3df2dba2bf3a4a19f4212a53ecefc7102 upstream.
+
+We ran into a problem with a mpt3sas based controller, where we would
+see random (and hard to reproduce) file corruption). The issue seemed
+specific to this controller, but wasn't specific to the file system.
+After a lot of debugging, we find out that it's caused by segments
+spanning a 4G memory boundary. This shouldn't happen, as the default
+setting for segment boundary masks is 4G.
+
+Turns out there are two issues in get_max_segment_size():
+
+1) The default segment boundary mask is bypassed
+
+2) The segment start address isn't taken into account when checking
+   segment boundary limit
+
+Fix these two issues by removing the bypass of the segment boundary
+check even if the mask is set to the default value, and taking into
+account the actual start address of the request when checking if a
+segment needs splitting.
+
+Cc: stable@vger.kernel.org # v5.1+
+Reviewed-by: Chris Mason <clm@fb.com>
+Tested-by: Chris Mason <clm@fb.com>
+Fixes: dcebd755926b ("block: use bio_for_each_bvec() to compute multi-page bvec count")
+Signed-off-by: Ming Lei <ming.lei@redhat.com>
+Dropped const on the page pointer, ppc page_to_phys() doesn't mark the
+page as const...
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ block/blk-merge.c |   18 +++++++++---------
+ 1 file changed, 9 insertions(+), 9 deletions(-)
+
+--- a/block/blk-merge.c
++++ b/block/blk-merge.c
+@@ -157,16 +157,14 @@ static inline unsigned get_max_io_size(s
+       return sectors & (lbs - 1);
+ }
+ 
+-static unsigned get_max_segment_size(const struct request_queue *q,
+-                                   unsigned offset)
++static inline unsigned get_max_segment_size(const struct request_queue *q,
++                                          struct page *start_page,
++                                          unsigned long offset)
+ {
+       unsigned long mask = queue_segment_boundary(q);
+ 
+-      /* default segment boundary mask means no boundary limit */
+-      if (mask == BLK_SEG_BOUNDARY_MASK)
+-              return queue_max_segment_size(q);
+-
+-      return min_t(unsigned long, mask - (mask & offset) + 1,
++      offset = mask & (page_to_phys(start_page) + offset);
++      return min_t(unsigned long, mask - offset + 1,
+                    queue_max_segment_size(q));
+ }
+ 
+@@ -201,7 +199,8 @@ static bool bvec_split_segs(const struct
+       unsigned seg_size = 0;
+ 
+       while (len && *nsegs < max_segs) {
+-              seg_size = get_max_segment_size(q, bv->bv_offset + total_len);
++              seg_size = get_max_segment_size(q, bv->bv_page,
++                                              bv->bv_offset + total_len);
+               seg_size = min(seg_size, len);
+ 
+               (*nsegs)++;
+@@ -404,7 +403,8 @@ static unsigned blk_bvec_map_sg(struct r
+ 
+       while (nbytes > 0) {
+               unsigned offset = bvec->bv_offset + total;
+-              unsigned len = min(get_max_segment_size(q, offset), nbytes);
++              unsigned len = min(get_max_segment_size(q, bvec->bv_page,
++                                      offset), nbytes);
+               struct page *page = bvec->bv_page;
+ 
+               /*
diff --git a/queue-5.4/bpf-fix-precision-tracking-for-unbounded-scalars.patch b/queue-5.4/bpf-fix-precision-tracking-for-unbounded-scalars.patch

new file mode 100644 (file)

index 0000000..ea5f856
--- /dev/null
+++ b/queue-5.4/bpf-fix-precision-tracking-for-unbounded-scalars.patch
@@ -0,0 +1,259 @@
+From f54c7898ed1c3c9331376c0337a5049c38f66497 Mon Sep 17 00:00:00 2001
+From: Daniel Borkmann <daniel@iogearbox.net>
+Date: Sun, 22 Dec 2019 23:37:40 +0100
+Subject: bpf: Fix precision tracking for unbounded scalars
+
+From: Daniel Borkmann <daniel@iogearbox.net>
+
+commit f54c7898ed1c3c9331376c0337a5049c38f66497 upstream.
+
+Anatoly has been fuzzing with kBdysch harness and reported a hang in one
+of the outcomes. Upon closer analysis, it turns out that precise scalar
+value tracking is missing a few precision markings for unknown scalars:
+
+  0: R1=ctx(id=0,off=0,imm=0) R10=fp0
+  0: (b7) r0 = 0
+  1: R0_w=invP0 R1=ctx(id=0,off=0,imm=0) R10=fp0
+  1: (35) if r0 >= 0xf72e goto pc+0
+  --> only follow fallthrough
+  2: R0_w=invP0 R1=ctx(id=0,off=0,imm=0) R10=fp0
+  2: (35) if r0 >= 0x80fe0000 goto pc+0
+  --> only follow fallthrough
+  3: R0_w=invP0 R1=ctx(id=0,off=0,imm=0) R10=fp0
+  3: (14) w0 -= -536870912
+  4: R0_w=invP536870912 R1=ctx(id=0,off=0,imm=0) R10=fp0
+  4: (0f) r1 += r0
+  5: R0_w=invP536870912 R1_w=inv(id=0) R10=fp0
+  5: (55) if r1 != 0x104c1500 goto pc+0
+  --> push other branch for later analysis
+  R0_w=invP536870912 R1_w=inv273421568 R10=fp0
+  6: R0_w=invP536870912 R1_w=inv273421568 R10=fp0
+  6: (b7) r0 = 0
+  7: R0=invP0 R1=inv273421568 R10=fp0
+  7: (76) if w1 s>= 0xffffff00 goto pc+3
+  --> only follow goto
+  11: R0=invP0 R1=inv273421568 R10=fp0
+  11: (95) exit
+  6: R0_w=invP536870912 R1_w=inv(id=0) R10=fp0
+  6: (b7) r0 = 0
+  propagating r0
+  7: safe
+  processed 11 insns [...]
+
+In the analysis of the second path coming after the successful exit above,
+the path is being pruned at line 7. Pruning analysis found that both r0 are
+precise P0 and both R1 are non-precise scalars and given prior path with
+R1 as non-precise scalar succeeded, this one is therefore safe as well.
+
+However, problem is that given condition at insn 7 in the first run, we only
+followed goto and didn't push the other branch for later analysis, we've
+never walked the few insns in there and therefore dead-code sanitation
+rewrites it as goto pc-1, causing the hang depending on the skb address
+hitting these conditions. The issue is that R1 should have been marked as
+precise as well such that pruning enforces range check and conluded that new
+R1 is not in range of old R1. In insn 4, we mark R1 (skb) as unknown scalar
+via __mark_reg_unbounded() but not mark_reg_unbounded() and therefore
+regs->precise remains as false.
+
+Back in b5dc0163d8fd ("bpf: precise scalar_value tracking"), this was not
+the case since marking out of __mark_reg_unbounded() had this covered as well.
+Once in both are set as precise in 4 as they should have been, we conclude
+that given R1 was in prior fall-through path 0x104c1500 and now is completely
+unknown, the check at insn 7 concludes that we need to continue walking.
+Analysis after the fix:
+
+  0: R1=ctx(id=0,off=0,imm=0) R10=fp0
+  0: (b7) r0 = 0
+  1: R0_w=invP0 R1=ctx(id=0,off=0,imm=0) R10=fp0
+  1: (35) if r0 >= 0xf72e goto pc+0
+  2: R0_w=invP0 R1=ctx(id=0,off=0,imm=0) R10=fp0
+  2: (35) if r0 >= 0x80fe0000 goto pc+0
+  3: R0_w=invP0 R1=ctx(id=0,off=0,imm=0) R10=fp0
+  3: (14) w0 -= -536870912
+  4: R0_w=invP536870912 R1=ctx(id=0,off=0,imm=0) R10=fp0
+  4: (0f) r1 += r0
+  5: R0_w=invP536870912 R1_w=invP(id=0) R10=fp0
+  5: (55) if r1 != 0x104c1500 goto pc+0
+  R0_w=invP536870912 R1_w=invP273421568 R10=fp0
+  6: R0_w=invP536870912 R1_w=invP273421568 R10=fp0
+  6: (b7) r0 = 0
+  7: R0=invP0 R1=invP273421568 R10=fp0
+  7: (76) if w1 s>= 0xffffff00 goto pc+3
+  11: R0=invP0 R1=invP273421568 R10=fp0
+  11: (95) exit
+  6: R0_w=invP536870912 R1_w=invP(id=0) R10=fp0
+  6: (b7) r0 = 0
+  7: R0_w=invP0 R1_w=invP(id=0) R10=fp0
+  7: (76) if w1 s>= 0xffffff00 goto pc+3
+  R0_w=invP0 R1_w=invP(id=0) R10=fp0
+  8: R0_w=invP0 R1_w=invP(id=0) R10=fp0
+  8: (a5) if r0 < 0x2007002a goto pc+0
+  9: R0_w=invP0 R1_w=invP(id=0) R10=fp0
+  9: (57) r0 &= -16316416
+  10: R0_w=invP0 R1_w=invP(id=0) R10=fp0
+  10: (a6) if w0 < 0x1201 goto pc+0
+  11: R0_w=invP0 R1_w=invP(id=0) R10=fp0
+  11: (95) exit
+  11: R0=invP0 R1=invP(id=0) R10=fp0
+  11: (95) exit
+  processed 16 insns [...]
+
+Fixes: 6754172c208d ("bpf: fix precision tracking in presence of bpf2bpf calls")
+Reported-by: Anatoly Trosinenko <anatoly.trosinenko@gmail.com>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Link: https://lore.kernel.org/bpf/20191222223740.25297-1-daniel@iogearbox.net
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/bpf/verifier.c |   43 ++++++++++++++++++++++---------------------
+ 1 file changed, 22 insertions(+), 21 deletions(-)
+
+--- a/kernel/bpf/verifier.c
++++ b/kernel/bpf/verifier.c
+@@ -852,7 +852,8 @@ static const int caller_saved[CALLER_SAV
+       BPF_REG_0, BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4, BPF_REG_5
+ };
+ 
+-static void __mark_reg_not_init(struct bpf_reg_state *reg);
++static void __mark_reg_not_init(const struct bpf_verifier_env *env,
++                              struct bpf_reg_state *reg);
+ 
+ /* Mark the unknown part of a register (variable offset or scalar value) as
+  * known to have the value @imm.
+@@ -890,7 +891,7 @@ static void mark_reg_known_zero(struct b
+               verbose(env, "mark_reg_known_zero(regs, %u)\n", regno);
+               /* Something bad happened, let's kill all regs */
+               for (regno = 0; regno < MAX_BPF_REG; regno++)
+-                      __mark_reg_not_init(regs + regno);
++                      __mark_reg_not_init(env, regs + regno);
+               return;
+       }
+       __mark_reg_known_zero(regs + regno);
+@@ -999,7 +1000,8 @@ static void __mark_reg_unbounded(struct
+ }
+ 
+ /* Mark a register as having a completely unknown (scalar) value. */
+-static void __mark_reg_unknown(struct bpf_reg_state *reg)
++static void __mark_reg_unknown(const struct bpf_verifier_env *env,
++                             struct bpf_reg_state *reg)
+ {
+       /*
+        * Clear type, id, off, and union(map_ptr, range) and
+@@ -1009,6 +1011,8 @@ static void __mark_reg_unknown(struct bp
+       reg->type = SCALAR_VALUE;
+       reg->var_off = tnum_unknown;
+       reg->frameno = 0;
++      reg->precise = env->subprog_cnt > 1 || !env->allow_ptr_leaks ?
++                     true : false;
+       __mark_reg_unbounded(reg);
+ }
+ 
+@@ -1019,19 +1023,16 @@ static void mark_reg_unknown(struct bpf_
+               verbose(env, "mark_reg_unknown(regs, %u)\n", regno);
+               /* Something bad happened, let's kill all regs except FP */
+               for (regno = 0; regno < BPF_REG_FP; regno++)
+-                      __mark_reg_not_init(regs + regno);
++                      __mark_reg_not_init(env, regs + regno);
+               return;
+       }
+-      regs += regno;
+-      __mark_reg_unknown(regs);
+-      /* constant backtracking is enabled for root without bpf2bpf calls */
+-      regs->precise = env->subprog_cnt > 1 || !env->allow_ptr_leaks ?
+-                      true : false;
++      __mark_reg_unknown(env, regs + regno);
+ }
+ 
+-static void __mark_reg_not_init(struct bpf_reg_state *reg)
++static void __mark_reg_not_init(const struct bpf_verifier_env *env,
++                              struct bpf_reg_state *reg)
+ {
+-      __mark_reg_unknown(reg);
++      __mark_reg_unknown(env, reg);
+       reg->type = NOT_INIT;
+ }
+ 
+@@ -1042,10 +1043,10 @@ static void mark_reg_not_init(struct bpf
+               verbose(env, "mark_reg_not_init(regs, %u)\n", regno);
+               /* Something bad happened, let's kill all regs except FP */
+               for (regno = 0; regno < BPF_REG_FP; regno++)
+-                      __mark_reg_not_init(regs + regno);
++                      __mark_reg_not_init(env, regs + regno);
+               return;
+       }
+-      __mark_reg_not_init(regs + regno);
++      __mark_reg_not_init(env, regs + regno);
+ }
+ 
+ #define DEF_NOT_SUBREG        (0)
+@@ -3066,7 +3067,7 @@ static int check_stack_boundary(struct b
+               }
+               if (state->stack[spi].slot_type[0] == STACK_SPILL &&
+                   state->stack[spi].spilled_ptr.type == SCALAR_VALUE) {
+-                      __mark_reg_unknown(&state->stack[spi].spilled_ptr);
++                      __mark_reg_unknown(env, &state->stack[spi].spilled_ptr);
+                       for (j = 0; j < BPF_REG_SIZE; j++)
+                               state->stack[spi].slot_type[j] = STACK_MISC;
+                       goto mark;
+@@ -3706,7 +3707,7 @@ static void __clear_all_pkt_pointers(str
+               if (!reg)
+                       continue;
+               if (reg_is_pkt_pointer_any(reg))
+-                      __mark_reg_unknown(reg);
++                      __mark_reg_unknown(env, reg);
+       }
+ }
+ 
+@@ -3734,7 +3735,7 @@ static void release_reg_references(struc
+               if (!reg)
+                       continue;
+               if (reg->ref_obj_id == ref_obj_id)
+-                      __mark_reg_unknown(reg);
++                      __mark_reg_unknown(env, reg);
+       }
+ }
+ 
+@@ -4357,7 +4358,7 @@ static int adjust_ptr_min_max_vals(struc
+               /* Taint dst register if offset had invalid bounds derived from
+                * e.g. dead branches.
+                */
+-              __mark_reg_unknown(dst_reg);
++              __mark_reg_unknown(env, dst_reg);
+               return 0;
+       }
+ 
+@@ -4609,13 +4610,13 @@ static int adjust_scalar_min_max_vals(st
+               /* Taint dst register if offset had invalid bounds derived from
+                * e.g. dead branches.
+                */
+-              __mark_reg_unknown(dst_reg);
++              __mark_reg_unknown(env, dst_reg);
+               return 0;
+       }
+ 
+       if (!src_known &&
+           opcode != BPF_ADD && opcode != BPF_SUB && opcode != BPF_AND) {
+-              __mark_reg_unknown(dst_reg);
++              __mark_reg_unknown(env, dst_reg);
+               return 0;
+       }
+ 
+@@ -6746,7 +6747,7 @@ static void clean_func_state(struct bpf_
+                       /* since the register is unused, clear its state
+                        * to make further comparison simpler
+                        */
+-                      __mark_reg_not_init(&st->regs[i]);
++                      __mark_reg_not_init(env, &st->regs[i]);
+       }
+ 
+       for (i = 0; i < st->allocated_stack / BPF_REG_SIZE; i++) {
+@@ -6754,7 +6755,7 @@ static void clean_func_state(struct bpf_
+               /* liveness must not touch this stack slot anymore */
+               st->stack[i].spilled_ptr.live |= REG_LIVE_DONE;
+               if (!(live & REG_LIVE_READ)) {
+-                      __mark_reg_not_init(&st->stack[i].spilled_ptr);
++                      __mark_reg_not_init(env, &st->stack[i].spilled_ptr);
+                       for (j = 0; j < BPF_REG_SIZE; j++)
+                               st->stack[i].slot_type[j] = STACK_INVALID;
+               }
diff --git a/queue-5.4/btrfs-fix-infinite-loop-during-nocow-writeback-due-to-race.patch b/queue-5.4/btrfs-fix-infinite-loop-during-nocow-writeback-due-to-race.patch

new file mode 100644 (file)

index 0000000..4c7d531
--- /dev/null
+++ b/queue-5.4/btrfs-fix-infinite-loop-during-nocow-writeback-due-to-race.patch
@@ -0,0 +1,212 @@
+From de7999afedff02c6631feab3ea726a0e8f8c3d40 Mon Sep 17 00:00:00 2001
+From: Filipe Manana <fdmanana@suse.com>
+Date: Wed, 11 Dec 2019 09:01:40 +0000
+Subject: Btrfs: fix infinite loop during nocow writeback due to race
+
+From: Filipe Manana <fdmanana@suse.com>
+
+commit de7999afedff02c6631feab3ea726a0e8f8c3d40 upstream.
+
+When starting writeback for a range that covers part of a preallocated
+extent, due to a race with writeback for another range that also covers
+another part of the same preallocated extent, we can end up in an infinite
+loop.
+
+Consider the following example where for inode 280 we have two dirty
+ranges:
+
+  range A, from 294912 to 303103, 8192 bytes
+  range B, from 348160 to 438271, 90112 bytes
+
+and we have the following file extent item layout for our inode:
+
+  leaf 38895616 gen 24544 total ptrs 29 free space 13820 owner 5
+      (...)
+      item 27 key (280 108 200704) itemoff 14598 itemsize 53
+          extent data disk bytenr 0 nr 0 type 1 (regular)
+          extent data offset 0 nr 94208 ram 94208
+      item 28 key (280 108 294912) itemoff 14545 itemsize 53
+          extent data disk bytenr 10433052672 nr 81920 type 2 (prealloc)
+          extent data offset 0 nr 81920 ram 81920
+
+Then the following happens:
+
+1) Writeback starts for range B (from 348160 to 438271), execution of
+   run_delalloc_nocow() starts;
+
+2) The first iteration of run_delalloc_nocow()'s whil loop leaves us at
+   the extent item at slot 28, pointing to the prealloc extent item
+   covering the range from 294912 to 376831. This extent covers part of
+   our range;
+
+3) An ordered extent is created against that extent, covering the file
+   range from 348160 to 376831 (28672 bytes);
+
+4) We adjust 'cur_offset' to 376832 and move on to the next iteration of
+   the while loop;
+
+5) The call to btrfs_lookup_file_extent() leaves us at the same leaf,
+   pointing to slot 29, 1 slot after the last item (the extent item
+   we processed in the previous iteration);
+
+6) Because we are a slot beyond the last item, we call btrfs_next_leaf(),
+   which releases the search path before doing a another search for the
+   last key of the leaf (280 108 294912);
+
+7) Right after btrfs_next_leaf() released the path, and before it did
+   another search for the last key of the leaf, writeback for the range
+   A (from 294912 to 303103) completes (it was previously started at
+   some point);
+
+8) Upon completion of the ordered extent for range A, the prealloc extent
+   we previously found got split into two extent items, one covering the
+   range from 294912 to 303103 (8192 bytes), with a type of regular extent
+   (and no longer prealloc) and another covering the range from 303104 to
+   376831 (73728 bytes), with a type of prealloc and an offset of 8192
+   bytes. So our leaf now has the following layout:
+
+     leaf 38895616 gen 24544 total ptrs 31 free space 13664 owner 5
+         (...)
+         item 27 key (280 108 200704) itemoff 14598 itemsize 53
+             extent data disk bytenr 0 nr 0 type 1
+             extent data offset 0 nr 8192 ram 94208
+         item 28 key (280 108 208896) itemoff 14545 itemsize 53
+             extent data disk bytenr 10433142784 nr 86016 type 1
+             extent data offset 0 nr 86016 ram 86016
+         item 29 key (280 108 294912) itemoff 14492 itemsize 53
+             extent data disk bytenr 10433052672 nr 81920 type 1
+             extent data offset 0 nr 8192 ram 81920
+         item 30 key (280 108 303104) itemoff 14439 itemsize 53
+             extent data disk bytenr 10433052672 nr 81920 type 2
+             extent data offset 8192 nr 73728 ram 81920
+
+9) After btrfs_next_leaf() returns, we have our path pointing to that same
+   leaf and at slot 30, since it has a key we didn't have before and it's
+   the first key greater then the key that was previously the last key of
+   the leaf (key (280 108 294912));
+
+10) The extent item at slot 30 covers the range from 303104 to 376831
+    which is in our target range, so we process it, despite having already
+    created an ordered extent against this extent for the file range from
+    348160 to 376831. This is because we skip to the next extent item only
+    if its end is less than or equals to the start of our delalloc range,
+    and not less than or equals to the current offset ('cur_offset');
+
+11) As a result we compute 'num_bytes' as:
+
+    num_bytes = min(end + 1, extent_end) - cur_offset;
+              = min(438271 + 1, 376832) - 376832 = 0
+
+12) We then call create_io_em() for a 0 bytes range starting at offset
+    376832;
+
+13) Then create_io_em() enters an infinite loop because its calls to
+    btrfs_drop_extent_cache() do nothing due to the 0 length range
+    passed to it. So no existing extent maps that cover the offset
+    376832 get removed, and therefore calls to add_extent_mapping()
+    return -EEXIST, resulting in an infinite loop. This loop from
+    create_io_em() is the following:
+
+    do {
+        btrfs_drop_extent_cache(BTRFS_I(inode), em->start,
+                                em->start + em->len - 1, 0);
+        write_lock(&em_tree->lock);
+        ret = add_extent_mapping(em_tree, em, 1);
+        write_unlock(&em_tree->lock);
+        /*
+         * The caller has taken lock_extent(), who could race with us
+         * to add em?
+         */
+    } while (ret == -EEXIST);
+
+Also, each call to btrfs_drop_extent_cache() triggers a warning because
+the start offset passed to it (376832) is smaller then the end offset
+(376832 - 1) passed to it by -1, due to the 0 length:
+
+  [258532.052621] ------------[ cut here ]------------
+  [258532.052643] WARNING: CPU: 0 PID: 9987 at fs/btrfs/file.c:602 btrfs_drop_extent_cache+0x3f4/0x590 [btrfs]
+  (...)
+  [258532.052672] CPU: 0 PID: 9987 Comm: fsx Tainted: G        W         5.4.0-rc7-btrfs-next-64 #1
+  [258532.052673] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.0-0-ga698c8995f-prebuilt.qemu.org 04/01/2014
+  [258532.052691] RIP: 0010:btrfs_drop_extent_cache+0x3f4/0x590 [btrfs]
+  (...)
+  [258532.052695] RSP: 0018:ffffb4be0153f860 EFLAGS: 00010287
+  [258532.052700] RAX: ffff975b445ee360 RBX: ffff975b44eb3e08 RCX: 0000000000000000
+  [258532.052700] RDX: 0000000000038fff RSI: 0000000000039000 RDI: ffff975b445ee308
+  [258532.052700] RBP: 0000000000038fff R08: 0000000000000000 R09: 0000000000000001
+  [258532.052701] R10: ffff975b513c5c10 R11: 00000000e3c0cfa9 R12: 0000000000039000
+  [258532.052703] R13: ffff975b445ee360 R14: 00000000ffffffef R15: ffff975b445ee308
+  [258532.052705] FS:  00007f86a821de80(0000) GS:ffff975b76a00000(0000) knlGS:0000000000000000
+  [258532.052707] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+  [258532.052708] CR2: 00007fdacf0f3ab4 CR3: 00000001f9d26002 CR4: 00000000003606f0
+  [258532.052712] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+  [258532.052717] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+  [258532.052717] Call Trace:
+  [258532.052718]  ? preempt_schedule_common+0x32/0x70
+  [258532.052722]  ? ___preempt_schedule+0x16/0x20
+  [258532.052741]  create_io_em+0xff/0x180 [btrfs]
+  [258532.052767]  run_delalloc_nocow+0x942/0xb10 [btrfs]
+  [258532.052791]  btrfs_run_delalloc_range+0x30b/0x520 [btrfs]
+  [258532.052812]  ? find_lock_delalloc_range+0x221/0x250 [btrfs]
+  [258532.052834]  writepage_delalloc+0xe4/0x140 [btrfs]
+  [258532.052855]  __extent_writepage+0x110/0x4e0 [btrfs]
+  [258532.052876]  extent_write_cache_pages+0x21c/0x480 [btrfs]
+  [258532.052906]  extent_writepages+0x52/0xb0 [btrfs]
+  [258532.052911]  do_writepages+0x23/0x80
+  [258532.052915]  __filemap_fdatawrite_range+0xd2/0x110
+  [258532.052938]  btrfs_fdatawrite_range+0x1b/0x50 [btrfs]
+  [258532.052954]  start_ordered_ops+0x57/0xa0 [btrfs]
+  [258532.052973]  ? btrfs_sync_file+0x225/0x490 [btrfs]
+  [258532.052988]  btrfs_sync_file+0x225/0x490 [btrfs]
+  [258532.052997]  __x64_sys_msync+0x199/0x200
+  [258532.053004]  do_syscall_64+0x5c/0x250
+  [258532.053007]  entry_SYSCALL_64_after_hwframe+0x49/0xbe
+  [258532.053010] RIP: 0033:0x7f86a7dfd760
+  (...)
+  [258532.053014] RSP: 002b:00007ffd99af0368 EFLAGS: 00000246 ORIG_RAX: 000000000000001a
+  [258532.053016] RAX: ffffffffffffffda RBX: 0000000000000ec9 RCX: 00007f86a7dfd760
+  [258532.053017] RDX: 0000000000000004 RSI: 000000000000836c RDI: 00007f86a8221000
+  [258532.053019] RBP: 0000000000021ec9 R08: 0000000000000003 R09: 00007f86a812037c
+  [258532.053020] R10: 0000000000000001 R11: 0000000000000246 R12: 00000000000074a3
+  [258532.053021] R13: 00007f86a8221000 R14: 000000000000836c R15: 0000000000000001
+  [258532.053032] irq event stamp: 1653450494
+  [258532.053035] hardirqs last  enabled at (1653450493): [<ffffffff9dec69f9>] _raw_spin_unlock_irq+0x29/0x50
+  [258532.053037] hardirqs last disabled at (1653450494): [<ffffffff9d4048ea>] trace_hardirqs_off_thunk+0x1a/0x20
+  [258532.053039] softirqs last  enabled at (1653449852): [<ffffffff9e200466>] __do_softirq+0x466/0x6bd
+  [258532.053042] softirqs last disabled at (1653449845): [<ffffffff9d4c8a0c>] irq_exit+0xec/0x120
+  [258532.053043] ---[ end trace 8476fce13d9ce20a ]---
+
+Which results in flooding dmesg/syslog since btrfs_drop_extent_cache()
+uses WARN_ON() and not WARN_ON_ONCE().
+
+So fix this issue by changing run_delalloc_nocow()'s loop to move to the
+next extent item when the current extent item ends at at offset less than
+or equals to the current offset instead of the start offset.
+
+Fixes: 80ff385665b7fc ("Btrfs: update nodatacow code v2")
+CC: stable@vger.kernel.org # 4.4+
+Reviewed-by: Josef Bacik <josef@toxicpanda.com>
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/inode.c |    6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/fs/btrfs/inode.c
++++ b/fs/btrfs/inode.c
+@@ -1439,10 +1439,10 @@ next_slot:
+                       disk_num_bytes =
+                               btrfs_file_extent_disk_num_bytes(leaf, fi);
+                       /*
+-                       * If extent we got ends before our range starts, skip
+-                       * to next extent
++                       * If the extent we got ends before our current offset,
++                       * skip to the next extent.
+                        */
+-                      if (extent_end <= start) {
++                      if (extent_end <= cur_offset) {
+                               path->slots[0]++;
+                               goto next_slot;
+                       }
diff --git a/queue-5.4/compat_ioctl-block-handle-blkgetzonesz-blkgetnrzones.patch b/queue-5.4/compat_ioctl-block-handle-blkgetzonesz-blkgetnrzones.patch

new file mode 100644 (file)

index 0000000..da6dc1c
--- /dev/null
+++ b/queue-5.4/compat_ioctl-block-handle-blkgetzonesz-blkgetnrzones.patch
@@ -0,0 +1,35 @@
+From 21d37340912d74b1222d43c11aa9dd0687162573 Mon Sep 17 00:00:00 2001
+From: Arnd Bergmann <arnd@arndb.de>
+Date: Fri, 29 Nov 2019 11:28:22 +0100
+Subject: compat_ioctl: block: handle BLKGETZONESZ/BLKGETNRZONES
+
+From: Arnd Bergmann <arnd@arndb.de>
+
+commit 21d37340912d74b1222d43c11aa9dd0687162573 upstream.
+
+These were added to blkdev_ioctl() in v4.20 but not blkdev_compat_ioctl,
+so add them now.
+
+Cc: <stable@vger.kernel.org> # v4.20+
+Fixes: 72cd87576d1d ("block: Introduce BLKGETZONESZ ioctl")
+Fixes: 65e4e3eee83d ("block: Introduce BLKGETNRZONES ioctl")
+Reviewed-by: Damien Le Moal <damien.lemoal@wdc.com>
+Signed-off-by: Arnd Bergmann <arnd@arndb.de>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ block/compat_ioctl.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/block/compat_ioctl.c
++++ b/block/compat_ioctl.c
+@@ -357,6 +357,8 @@ long compat_blkdev_ioctl(struct file *fi
+       case BLKRRPART:
+       case BLKREPORTZONE:
+       case BLKRESETZONE:
++      case BLKGETZONESZ:
++      case BLKGETNRZONES:
+               return blkdev_ioctl(bdev, mode, cmd,
+                               (unsigned long)compat_ptr(arg));
+       case BLKBSZSET_32:
diff --git a/queue-5.4/compat_ioctl-block-handle-blkreportzone-blkresetzone.patch b/queue-5.4/compat_ioctl-block-handle-blkreportzone-blkresetzone.patch

new file mode 100644 (file)

index 0000000..653712f
--- /dev/null
+++ b/queue-5.4/compat_ioctl-block-handle-blkreportzone-blkresetzone.patch
@@ -0,0 +1,34 @@
+From 673bdf8ce0a387ef585c13b69a2676096c6edfe9 Mon Sep 17 00:00:00 2001
+From: Arnd Bergmann <arnd@arndb.de>
+Date: Fri, 29 Nov 2019 11:28:22 +0100
+Subject: compat_ioctl: block: handle BLKREPORTZONE/BLKRESETZONE
+
+From: Arnd Bergmann <arnd@arndb.de>
+
+commit 673bdf8ce0a387ef585c13b69a2676096c6edfe9 upstream.
+
+These were added to blkdev_ioctl() but not blkdev_compat_ioctl,
+so add them now.
+
+Cc: <stable@vger.kernel.org> # v4.10+
+Fixes: 3ed05a987e0f ("blk-zoned: implement ioctls")
+Reviewed-by: Damien Le Moal <damien.lemoal@wdc.com>
+Signed-off-by: Arnd Bergmann <arnd@arndb.de>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ block/compat_ioctl.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/block/compat_ioctl.c
++++ b/block/compat_ioctl.c
+@@ -355,6 +355,8 @@ long compat_blkdev_ioctl(struct file *fi
+        * but we call blkdev_ioctl, which gets the lock for us
+        */
+       case BLKRRPART:
++      case BLKREPORTZONE:
++      case BLKRESETZONE:
+               return blkdev_ioctl(bdev, mode, cmd,
+                               (unsigned long)compat_ptr(arg));
+       case BLKBSZSET_32:
diff --git a/queue-5.4/compat_ioctl-block-handle-persistent-reservations.patch b/queue-5.4/compat_ioctl-block-handle-persistent-reservations.patch

new file mode 100644 (file)

index 0000000..a749dbd
--- /dev/null
+++ b/queue-5.4/compat_ioctl-block-handle-persistent-reservations.patch
@@ -0,0 +1,50 @@
+From b2c0fcd28772f99236d261509bcd242135677965 Mon Sep 17 00:00:00 2001
+From: Arnd Bergmann <arnd@arndb.de>
+Date: Fri, 29 Nov 2019 11:28:22 +0100
+Subject: compat_ioctl: block: handle Persistent Reservations
+
+From: Arnd Bergmann <arnd@arndb.de>
+
+commit b2c0fcd28772f99236d261509bcd242135677965 upstream.
+
+These were added to blkdev_ioctl() in linux-5.5 but not
+blkdev_compat_ioctl, so add them now.
+
+Cc: <stable@vger.kernel.org> # v4.4+
+Fixes: bbd3e064362e ("block: add an API for Persistent Reservations")
+Signed-off-by: Arnd Bergmann <arnd@arndb.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+Fold in followup patch from Arnd with missing pr.h header include.
+
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+
+---
+ block/compat_ioctl.c |    9 +++++++++
+ 1 file changed, 9 insertions(+)
+
+--- a/block/compat_ioctl.c
++++ b/block/compat_ioctl.c
+@@ -6,6 +6,7 @@
+ #include <linux/compat.h>
+ #include <linux/elevator.h>
+ #include <linux/hdreg.h>
++#include <linux/pr.h>
+ #include <linux/slab.h>
+ #include <linux/syscalls.h>
+ #include <linux/types.h>
+@@ -401,6 +402,14 @@ long compat_blkdev_ioctl(struct file *fi
+       case BLKTRACETEARDOWN: /* compatible */
+               ret = blk_trace_ioctl(bdev, cmd, compat_ptr(arg));
+               return ret;
++      case IOC_PR_REGISTER:
++      case IOC_PR_RESERVE:
++      case IOC_PR_RELEASE:
++      case IOC_PR_PREEMPT:
++      case IOC_PR_PREEMPT_ABORT:
++      case IOC_PR_CLEAR:
++              return blkdev_ioctl(bdev, mode, cmd,
++                              (unsigned long)compat_ptr(arg));
+       default:
+               if (disk->fops->compat_ioctl)
+                       ret = disk->fops->compat_ioctl(bdev, mode, cmd, arg);
diff --git a/queue-5.4/dmaengine-dma-jz4780-also-break-descriptor-chains-on-jz4725b.patch b/queue-5.4/dmaengine-dma-jz4780-also-break-descriptor-chains-on-jz4725b.patch

new file mode 100644 (file)

index 0000000..de1ebdd
--- /dev/null
+++ b/queue-5.4/dmaengine-dma-jz4780-also-break-descriptor-chains-on-jz4725b.patch
@@ -0,0 +1,41 @@
+From a40c94be2336f3002563c9ae16572143ae3422e2 Mon Sep 17 00:00:00 2001
+From: Paul Cercueil <paul@crapouillou.net>
+Date: Tue, 10 Dec 2019 17:55:45 +0100
+Subject: dmaengine: dma-jz4780: Also break descriptor chains on JZ4725B
+
+From: Paul Cercueil <paul@crapouillou.net>
+
+commit a40c94be2336f3002563c9ae16572143ae3422e2 upstream.
+
+It turns out that the JZ4725B displays the same buggy behaviour as the
+JZ4740 that was described in commit f4c255f1a747 ("dmaengine: dma-jz4780:
+Break descriptor chains on JZ4740").
+
+Work around it by using the same workaround previously used for the
+JZ4740.
+
+Fixes commit f4c255f1a747 ("dmaengine: dma-jz4780: Break descriptor
+chains on JZ4740")
+
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Paul Cercueil <paul@crapouillou.net>
+Link: https://lore.kernel.org/r/20191210165545.59690-1-paul@crapouillou.net
+Signed-off-by: Vinod Koul <vkoul@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/dma/dma-jz4780.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/drivers/dma/dma-jz4780.c
++++ b/drivers/dma/dma-jz4780.c
+@@ -1004,7 +1004,8 @@ static const struct jz4780_dma_soc_data
+ static const struct jz4780_dma_soc_data jz4725b_dma_soc_data = {
+       .nb_channels = 6,
+       .transfer_ord_max = 5,
+-      .flags = JZ_SOC_DATA_PER_CHAN_PM | JZ_SOC_DATA_NO_DCKES_DCKEC,
++      .flags = JZ_SOC_DATA_PER_CHAN_PM | JZ_SOC_DATA_NO_DCKES_DCKEC |
++               JZ_SOC_DATA_BREAK_LINKS,
+ };
+ 
+ static const struct jz4780_dma_soc_data jz4770_dma_soc_data = {
diff --git a/queue-5.4/dmaengine-fix-access-to-uninitialized-dma_slave_caps.patch b/queue-5.4/dmaengine-fix-access-to-uninitialized-dma_slave_caps.patch

new file mode 100644 (file)

index 0000000..f4fa54f
--- /dev/null
+++ b/queue-5.4/dmaengine-fix-access-to-uninitialized-dma_slave_caps.patch
@@ -0,0 +1,57 @@
+From 53a256a9b925b47c7e67fc1f16ca41561a7b877c Mon Sep 17 00:00:00 2001
+From: Lukas Wunner <lukas@wunner.de>
+Date: Thu, 5 Dec 2019 12:54:49 +0100
+Subject: dmaengine: Fix access to uninitialized dma_slave_caps
+
+From: Lukas Wunner <lukas@wunner.de>
+
+commit 53a256a9b925b47c7e67fc1f16ca41561a7b877c upstream.
+
+dmaengine_desc_set_reuse() allocates a struct dma_slave_caps on the
+stack, populates it using dma_get_slave_caps() and then accesses one
+of its members.
+
+However dma_get_slave_caps() may fail and this isn't accounted for,
+leading to a legitimate warning of gcc-4.9 (but not newer versions):
+
+   In file included from drivers/spi/spi-bcm2835.c:19:0:
+   drivers/spi/spi-bcm2835.c: In function 'dmaengine_desc_set_reuse':
+>> include/linux/dmaengine.h:1370:10: warning: 'caps.descriptor_reuse' is used uninitialized in this function [-Wuninitialized]
+     if (caps.descriptor_reuse) {
+
+Fix it, thereby also silencing the gcc-4.9 warning.
+
+The issue has been present for 4 years but surfaces only now that
+the first caller of dmaengine_desc_set_reuse() has been added in
+spi-bcm2835.c. Another user of reusable DMA descriptors has existed
+for a while in pxa_camera.c, but it sets the DMA_CTRL_REUSE flag
+directly instead of calling dmaengine_desc_set_reuse(). Nevertheless,
+tag this commit for stable in case there are out-of-tree users.
+
+Fixes: 272420214d26 ("dmaengine: Add DMA_CTRL_REUSE")
+Reported-by: kbuild test robot <lkp@intel.com>
+Signed-off-by: Lukas Wunner <lukas@wunner.de>
+Cc: stable@vger.kernel.org # v4.3+
+Link: https://lore.kernel.org/r/ca92998ccc054b4f2bfd60ef3adbab2913171eac.1575546234.git.lukas@wunner.de
+Signed-off-by: Vinod Koul <vkoul@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/linux/dmaengine.h |    5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+--- a/include/linux/dmaengine.h
++++ b/include/linux/dmaengine.h
+@@ -1364,8 +1364,11 @@ static inline int dma_get_slave_caps(str
+ static inline int dmaengine_desc_set_reuse(struct dma_async_tx_descriptor *tx)
+ {
+       struct dma_slave_caps caps;
++      int ret;
+ 
+-      dma_get_slave_caps(tx->chan, &caps);
++      ret = dma_get_slave_caps(tx->chan, &caps);
++      if (ret)
++              return ret;
+ 
+       if (caps.descriptor_reuse) {
+               tx->flags |= DMA_CTRL_REUSE;
diff --git a/queue-5.4/gcc-plugins-make-it-possible-to-disable-config_gcc_plugins-again.patch b/queue-5.4/gcc-plugins-make-it-possible-to-disable-config_gcc_plugins-again.patch

new file mode 100644 (file)

index 0000000..00fb031
--- /dev/null
+++ b/queue-5.4/gcc-plugins-make-it-possible-to-disable-config_gcc_plugins-again.patch
@@ -0,0 +1,67 @@
+From a5b0dc5a46c221725c43bd9b01570239a4cd78b1 Mon Sep 17 00:00:00 2001
+From: Arnd Bergmann <arnd@arndb.de>
+Date: Wed, 11 Dec 2019 14:39:28 +0100
+Subject: gcc-plugins: make it possible to disable CONFIG_GCC_PLUGINS again
+
+From: Arnd Bergmann <arnd@arndb.de>
+
+commit a5b0dc5a46c221725c43bd9b01570239a4cd78b1 upstream.
+
+I noticed that randconfig builds with gcc no longer produce a lot of
+ccache hits, unlike with clang, and traced this back to plugins
+now being enabled unconditionally if they are supported.
+
+I am now working around this by adding
+
+   export CCACHE_COMPILERCHECK=/usr/bin/size -A %compiler%
+
+to my top-level Makefile. This changes the heuristic that ccache uses
+to determine whether the plugins are the same after a 'make clean'.
+
+However, it also seems that being able to just turn off the plugins is
+generally useful, at least for build testing it adds noticeable overhead
+but does not find a lot of bugs additional bugs, and may be easier for
+ccache users than my workaround.
+
+Fixes: 9f671e58159a ("security: Create "kernel hardening" config area")
+Signed-off-by: Arnd Bergmann <arnd@arndb.de>
+Acked-by: Ard Biesheuvel <ardb@kernel.org>
+Reviewed-by: Masahiro Yamada <masahiroy@kernel.org>
+Link: https://lore.kernel.org/r/20191211133951.401933-1-arnd@arndb.de
+Cc: stable@vger.kernel.org
+Signed-off-by: Kees Cook <keescook@chromium.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ scripts/gcc-plugins/Kconfig |    9 ++++-----
+ 1 file changed, 4 insertions(+), 5 deletions(-)
+
+--- a/scripts/gcc-plugins/Kconfig
++++ b/scripts/gcc-plugins/Kconfig
+@@ -14,8 +14,8 @@ config HAVE_GCC_PLUGINS
+         An arch should select this symbol if it supports building with
+         GCC plugins.
+ 
+-config GCC_PLUGINS
+-      bool
++menuconfig GCC_PLUGINS
++      bool "GCC plugins"
+       depends on HAVE_GCC_PLUGINS
+       depends on PLUGIN_HOSTCC != ""
+       default y
+@@ -25,8 +25,7 @@ config GCC_PLUGINS
+ 
+         See Documentation/core-api/gcc-plugins.rst for details.
+ 
+-menu "GCC plugins"
+-      depends on GCC_PLUGINS
++if GCC_PLUGINS
+ 
+ config GCC_PLUGIN_CYC_COMPLEXITY
+       bool "Compute the cyclomatic complexity of a function" if EXPERT
+@@ -113,4 +112,4 @@ config GCC_PLUGIN_ARM_SSP_PER_TASK
+       bool
+       depends on GCC_PLUGINS && ARM
+ 
+-endmenu
++endif
diff --git a/queue-5.4/locks-print-unsigned-ino-in-proc-locks.patch b/queue-5.4/locks-print-unsigned-ino-in-proc-locks.patch

new file mode 100644 (file)

index 0000000..afc5159
--- /dev/null
+++ b/queue-5.4/locks-print-unsigned-ino-in-proc-locks.patch
@@ -0,0 +1,31 @@
+From 98ca480a8f22fdbd768e3dad07024c8d4856576c Mon Sep 17 00:00:00 2001
+From: Amir Goldstein <amir73il@gmail.com>
+Date: Sun, 22 Dec 2019 20:45:28 +0200
+Subject: locks: print unsigned ino in /proc/locks
+
+From: Amir Goldstein <amir73il@gmail.com>
+
+commit 98ca480a8f22fdbd768e3dad07024c8d4856576c upstream.
+
+An ino is unsigned, so display it as such in /proc/locks.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Amir Goldstein <amir73il@gmail.com>
+Signed-off-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/locks.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/locks.c
++++ b/fs/locks.c
+@@ -2853,7 +2853,7 @@ static void lock_get_status(struct seq_f
+       }
+       if (inode) {
+               /* userspace relies on this representation of dev_t */
+-              seq_printf(f, "%d %02x:%02x:%ld ", fl_pid,
++              seq_printf(f, "%d %02x:%02x:%lu ", fl_pid,
+                               MAJOR(inode->i_sb->s_dev),
+                               MINOR(inode->i_sb->s_dev), inode->i_ino);
+       } else {
diff --git a/queue-5.4/memcg-account-security-cred-as-well-to-kmemcg.patch b/queue-5.4/memcg-account-security-cred-as-well-to-kmemcg.patch

new file mode 100644 (file)

index 0000000..00fef94
--- /dev/null
+++ b/queue-5.4/memcg-account-security-cred-as-well-to-kmemcg.patch
@@ -0,0 +1,66 @@
+From 84029fd04c201a4c7e0b07ba262664900f47c6f5 Mon Sep 17 00:00:00 2001
+From: Shakeel Butt <shakeelb@google.com>
+Date: Sat, 4 Jan 2020 12:59:43 -0800
+Subject: memcg: account security cred as well to kmemcg
+
+From: Shakeel Butt <shakeelb@google.com>
+
+commit 84029fd04c201a4c7e0b07ba262664900f47c6f5 upstream.
+
+The cred_jar kmem_cache is already memcg accounted in the current kernel
+but cred->security is not.  Account cred->security to kmemcg.
+
+Recently we saw high root slab usage on our production and on further
+inspection, we found a buggy application leaking processes.  Though that
+buggy application was contained within its memcg but we observe much
+more system memory overhead, couple of GiBs, during that period.  This
+overhead can adversely impact the isolation on the system.
+
+One source of high overhead we found was cred->security objects, which
+have a lifetime of at least the life of the process which allocated
+them.
+
+Link: http://lkml.kernel.org/r/20191205223721.40034-1-shakeelb@google.com
+Signed-off-by: Shakeel Butt <shakeelb@google.com>
+Acked-by: Chris Down <chris@chrisdown.name>
+Reviewed-by: Roman Gushchin <guro@fb.com>
+Acked-by: Michal Hocko <mhocko@suse.com>
+Cc: Johannes Weiner <hannes@cmpxchg.org>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/cred.c |    6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/kernel/cred.c
++++ b/kernel/cred.c
+@@ -223,7 +223,7 @@ struct cred *cred_alloc_blank(void)
+       new->magic = CRED_MAGIC;
+ #endif
+ 
+-      if (security_cred_alloc_blank(new, GFP_KERNEL) < 0)
++      if (security_cred_alloc_blank(new, GFP_KERNEL_ACCOUNT) < 0)
+               goto error;
+ 
+       return new;
+@@ -282,7 +282,7 @@ struct cred *prepare_creds(void)
+       new->security = NULL;
+ #endif
+ 
+-      if (security_prepare_creds(new, old, GFP_KERNEL) < 0)
++      if (security_prepare_creds(new, old, GFP_KERNEL_ACCOUNT) < 0)
+               goto error;
+       validate_creds(new);
+       return new;
+@@ -715,7 +715,7 @@ struct cred *prepare_kernel_cred(struct
+ #ifdef CONFIG_SECURITY
+       new->security = NULL;
+ #endif
+-      if (security_prepare_creds(new, old, GFP_KERNEL) < 0)
++      if (security_prepare_creds(new, old, GFP_KERNEL_ACCOUNT) < 0)
+               goto error;
+ 
+       put_cred(old);
diff --git a/queue-5.4/mm-memory_hotplug-shrink-zones-when-offlining-memory.patch b/queue-5.4/mm-memory_hotplug-shrink-zones-when-offlining-memory.patch

new file mode 100644 (file)

index 0000000..b3cbf79
--- /dev/null
+++ b/queue-5.4/mm-memory_hotplug-shrink-zones-when-offlining-memory.patch
@@ -0,0 +1,329 @@
+From feee6b2989165631b17ac6d4ccdbf6759254e85a Mon Sep 17 00:00:00 2001
+From: David Hildenbrand <david@redhat.com>
+Date: Sat, 4 Jan 2020 12:59:33 -0800
+Subject: mm/memory_hotplug: shrink zones when offlining memory
+
+From: David Hildenbrand <david@redhat.com>
+
+commit feee6b2989165631b17ac6d4ccdbf6759254e85a upstream.
+
+We currently try to shrink a single zone when removing memory.  We use
+the zone of the first page of the memory we are removing.  If that
+memmap was never initialized (e.g., memory was never onlined), we will
+read garbage and can trigger kernel BUGs (due to a stale pointer):
+
+    BUG: unable to handle page fault for address: 000000000000353d
+    #PF: supervisor write access in kernel mode
+    #PF: error_code(0x0002) - not-present page
+    PGD 0 P4D 0
+    Oops: 0002 [#1] SMP PTI
+    CPU: 1 PID: 7 Comm: kworker/u8:0 Not tainted 5.3.0-rc5-next-20190820+ #317
+    Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.1-0-ga5cab58e9a3f-prebuilt.qemu.4
+    Workqueue: kacpi_hotplug acpi_hotplug_work_fn
+    RIP: 0010:clear_zone_contiguous+0x5/0x10
+    Code: 48 89 c6 48 89 c3 e8 2a fe ff ff 48 85 c0 75 cf 5b 5d c3 c6 85 fd 05 00 00 01 5b 5d c3 0f 1f 840
+    RSP: 0018:ffffad2400043c98 EFLAGS: 00010246
+    RAX: 0000000000000000 RBX: 0000000200000000 RCX: 0000000000000000
+    RDX: 0000000000200000 RSI: 0000000000140000 RDI: 0000000000002f40
+    RBP: 0000000140000000 R08: 0000000000000000 R09: 0000000000000001
+    R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000140000
+    R13: 0000000000140000 R14: 0000000000002f40 R15: ffff9e3e7aff3680
+    FS:  0000000000000000(0000) GS:ffff9e3e7bb00000(0000) knlGS:0000000000000000
+    CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+    CR2: 000000000000353d CR3: 0000000058610000 CR4: 00000000000006e0
+    DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+    DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+    Call Trace:
+     __remove_pages+0x4b/0x640
+     arch_remove_memory+0x63/0x8d
+     try_remove_memory+0xdb/0x130
+     __remove_memory+0xa/0x11
+     acpi_memory_device_remove+0x70/0x100
+     acpi_bus_trim+0x55/0x90
+     acpi_device_hotplug+0x227/0x3a0
+     acpi_hotplug_work_fn+0x1a/0x30
+     process_one_work+0x221/0x550
+     worker_thread+0x50/0x3b0
+     kthread+0x105/0x140
+     ret_from_fork+0x3a/0x50
+    Modules linked in:
+    CR2: 000000000000353d
+
+Instead, shrink the zones when offlining memory or when onlining failed.
+Introduce and use remove_pfn_range_from_zone(() for that.  We now
+properly shrink the zones, even if we have DIMMs whereby
+
+ - Some memory blocks fall into no zone (never onlined)
+
+ - Some memory blocks fall into multiple zones (offlined+re-onlined)
+
+ - Multiple memory blocks that fall into different zones
+
+Drop the zone parameter (with a potential dubious value) from
+__remove_pages() and __remove_section().
+
+Link: http://lkml.kernel.org/r/20191006085646.5768-6-david@redhat.com
+Fixes: f1dd2cd13c4b ("mm, memory_hotplug: do not associate hotadded memory to zones until online")     [visible after d0dc12e86b319]
+Signed-off-by: David Hildenbrand <david@redhat.com>
+Reviewed-by: Oscar Salvador <osalvador@suse.de>
+Cc: Michal Hocko <mhocko@suse.com>
+Cc: "Matthew Wilcox (Oracle)" <willy@infradead.org>
+Cc: "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com>
+Cc: Pavel Tatashin <pasha.tatashin@soleen.com>
+Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Cc: Dan Williams <dan.j.williams@intel.com>
+Cc: Logan Gunthorpe <logang@deltatee.com>
+Cc: <stable@vger.kernel.org>   [5.0+]
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/arm64/mm/mmu.c            |    4 +---
+ arch/ia64/mm/init.c            |    4 +---
+ arch/powerpc/mm/mem.c          |    3 +--
+ arch/s390/mm/init.c            |    4 +---
+ arch/sh/mm/init.c              |    4 +---
+ arch/x86/mm/init_32.c          |    4 +---
+ arch/x86/mm/init_64.c          |    4 +---
+ include/linux/memory_hotplug.h |    7 +++++--
+ mm/memory_hotplug.c            |   31 ++++++++++++++++---------------
+ mm/memremap.c                  |    2 +-
+ 10 files changed, 29 insertions(+), 38 deletions(-)
+
+--- a/arch/arm64/mm/mmu.c
++++ b/arch/arm64/mm/mmu.c
+@@ -1069,7 +1069,6 @@ void arch_remove_memory(int nid, u64 sta
+ {
+       unsigned long start_pfn = start >> PAGE_SHIFT;
+       unsigned long nr_pages = size >> PAGE_SHIFT;
+-      struct zone *zone;
+ 
+       /*
+        * FIXME: Cleanup page tables (also in arch_add_memory() in case
+@@ -1078,7 +1077,6 @@ void arch_remove_memory(int nid, u64 sta
+        * unplug. ARCH_ENABLE_MEMORY_HOTREMOVE must not be
+        * unlocked yet.
+        */
+-      zone = page_zone(pfn_to_page(start_pfn));
+-      __remove_pages(zone, start_pfn, nr_pages, altmap);
++      __remove_pages(start_pfn, nr_pages, altmap);
+ }
+ #endif
+--- a/arch/ia64/mm/init.c
++++ b/arch/ia64/mm/init.c
+@@ -689,9 +689,7 @@ void arch_remove_memory(int nid, u64 sta
+ {
+       unsigned long start_pfn = start >> PAGE_SHIFT;
+       unsigned long nr_pages = size >> PAGE_SHIFT;
+-      struct zone *zone;
+ 
+-      zone = page_zone(pfn_to_page(start_pfn));
+-      __remove_pages(zone, start_pfn, nr_pages, altmap);
++      __remove_pages(start_pfn, nr_pages, altmap);
+ }
+ #endif
+--- a/arch/powerpc/mm/mem.c
++++ b/arch/powerpc/mm/mem.c
+@@ -130,10 +130,9 @@ void __ref arch_remove_memory(int nid, u
+ {
+       unsigned long start_pfn = start >> PAGE_SHIFT;
+       unsigned long nr_pages = size >> PAGE_SHIFT;
+-      struct page *page = pfn_to_page(start_pfn) + vmem_altmap_offset(altmap);
+       int ret;
+ 
+-      __remove_pages(page_zone(page), start_pfn, nr_pages, altmap);
++      __remove_pages(start_pfn, nr_pages, altmap);
+ 
+       /* Remove htab bolted mappings for this section of memory */
+       start = (unsigned long)__va(start);
+--- a/arch/s390/mm/init.c
++++ b/arch/s390/mm/init.c
+@@ -291,10 +291,8 @@ void arch_remove_memory(int nid, u64 sta
+ {
+       unsigned long start_pfn = start >> PAGE_SHIFT;
+       unsigned long nr_pages = size >> PAGE_SHIFT;
+-      struct zone *zone;
+ 
+-      zone = page_zone(pfn_to_page(start_pfn));
+-      __remove_pages(zone, start_pfn, nr_pages, altmap);
++      __remove_pages(start_pfn, nr_pages, altmap);
+       vmem_remove_mapping(start, size);
+ }
+ #endif /* CONFIG_MEMORY_HOTPLUG */
+--- a/arch/sh/mm/init.c
++++ b/arch/sh/mm/init.c
+@@ -434,9 +434,7 @@ void arch_remove_memory(int nid, u64 sta
+ {
+       unsigned long start_pfn = PFN_DOWN(start);
+       unsigned long nr_pages = size >> PAGE_SHIFT;
+-      struct zone *zone;
+ 
+-      zone = page_zone(pfn_to_page(start_pfn));
+-      __remove_pages(zone, start_pfn, nr_pages, altmap);
++      __remove_pages(start_pfn, nr_pages, altmap);
+ }
+ #endif /* CONFIG_MEMORY_HOTPLUG */
+--- a/arch/x86/mm/init_32.c
++++ b/arch/x86/mm/init_32.c
+@@ -865,10 +865,8 @@ void arch_remove_memory(int nid, u64 sta
+ {
+       unsigned long start_pfn = start >> PAGE_SHIFT;
+       unsigned long nr_pages = size >> PAGE_SHIFT;
+-      struct zone *zone;
+ 
+-      zone = page_zone(pfn_to_page(start_pfn));
+-      __remove_pages(zone, start_pfn, nr_pages, altmap);
++      __remove_pages(start_pfn, nr_pages, altmap);
+ }
+ #endif
+ 
+--- a/arch/x86/mm/init_64.c
++++ b/arch/x86/mm/init_64.c
+@@ -1212,10 +1212,8 @@ void __ref arch_remove_memory(int nid, u
+ {
+       unsigned long start_pfn = start >> PAGE_SHIFT;
+       unsigned long nr_pages = size >> PAGE_SHIFT;
+-      struct page *page = pfn_to_page(start_pfn) + vmem_altmap_offset(altmap);
+-      struct zone *zone = page_zone(page);
+ 
+-      __remove_pages(zone, start_pfn, nr_pages, altmap);
++      __remove_pages(start_pfn, nr_pages, altmap);
+       kernel_physical_mapping_remove(start, start + size);
+ }
+ #endif /* CONFIG_MEMORY_HOTPLUG */
+--- a/include/linux/memory_hotplug.h
++++ b/include/linux/memory_hotplug.h
+@@ -125,8 +125,8 @@ static inline bool movable_node_is_enabl
+ 
+ extern void arch_remove_memory(int nid, u64 start, u64 size,
+                              struct vmem_altmap *altmap);
+-extern void __remove_pages(struct zone *zone, unsigned long start_pfn,
+-                         unsigned long nr_pages, struct vmem_altmap *altmap);
++extern void __remove_pages(unsigned long start_pfn, unsigned long nr_pages,
++                         struct vmem_altmap *altmap);
+ 
+ /* reasonably generic interface to expand the physical pages */
+ extern int __add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages,
+@@ -345,6 +345,9 @@ extern int add_memory(int nid, u64 start
+ extern int add_memory_resource(int nid, struct resource *resource);
+ extern void move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn,
+               unsigned long nr_pages, struct vmem_altmap *altmap);
++extern void remove_pfn_range_from_zone(struct zone *zone,
++                                     unsigned long start_pfn,
++                                     unsigned long nr_pages);
+ extern bool is_memblock_offlined(struct memory_block *mem);
+ extern int sparse_add_section(int nid, unsigned long pfn,
+               unsigned long nr_pages, struct vmem_altmap *altmap);
+--- a/mm/memory_hotplug.c
++++ b/mm/memory_hotplug.c
+@@ -465,8 +465,9 @@ static void update_pgdat_span(struct pgl
+       pgdat->node_spanned_pages = node_end_pfn - node_start_pfn;
+ }
+ 
+-static void __remove_zone(struct zone *zone, unsigned long start_pfn,
+-              unsigned long nr_pages)
++void __ref remove_pfn_range_from_zone(struct zone *zone,
++                                    unsigned long start_pfn,
++                                    unsigned long nr_pages)
+ {
+       struct pglist_data *pgdat = zone->zone_pgdat;
+       unsigned long flags;
+@@ -481,28 +482,30 @@ static void __remove_zone(struct zone *z
+               return;
+ #endif
+ 
++      clear_zone_contiguous(zone);
++
+       pgdat_resize_lock(zone->zone_pgdat, &flags);
+       shrink_zone_span(zone, start_pfn, start_pfn + nr_pages);
+       update_pgdat_span(pgdat);
+       pgdat_resize_unlock(zone->zone_pgdat, &flags);
++
++      set_zone_contiguous(zone);
+ }
+ 
+-static void __remove_section(struct zone *zone, unsigned long pfn,
+-              unsigned long nr_pages, unsigned long map_offset,
+-              struct vmem_altmap *altmap)
++static void __remove_section(unsigned long pfn, unsigned long nr_pages,
++                           unsigned long map_offset,
++                           struct vmem_altmap *altmap)
+ {
+       struct mem_section *ms = __nr_to_section(pfn_to_section_nr(pfn));
+ 
+       if (WARN_ON_ONCE(!valid_section(ms)))
+               return;
+ 
+-      __remove_zone(zone, pfn, nr_pages);
+       sparse_remove_section(ms, pfn, nr_pages, map_offset, altmap);
+ }
+ 
+ /**
+- * __remove_pages() - remove sections of pages from a zone
+- * @zone: zone from which pages need to be removed
++ * __remove_pages() - remove sections of pages
+  * @pfn: starting pageframe (must be aligned to start of a section)
+  * @nr_pages: number of pages to remove (must be multiple of section size)
+  * @altmap: alternative device page map or %NULL if default memmap is used
+@@ -512,16 +515,14 @@ static void __remove_section(struct zone
+  * sure that pages are marked reserved and zones are adjust properly by
+  * calling offline_pages().
+  */
+-void __remove_pages(struct zone *zone, unsigned long pfn,
+-                  unsigned long nr_pages, struct vmem_altmap *altmap)
++void __remove_pages(unsigned long pfn, unsigned long nr_pages,
++                  struct vmem_altmap *altmap)
+ {
+       unsigned long map_offset = 0;
+       unsigned long nr, start_sec, end_sec;
+ 
+       map_offset = vmem_altmap_offset(altmap);
+ 
+-      clear_zone_contiguous(zone);
+-
+       if (check_pfn_span(pfn, nr_pages, "remove"))
+               return;
+ 
+@@ -533,13 +534,11 @@ void __remove_pages(struct zone *zone, u
+               cond_resched();
+               pfns = min(nr_pages, PAGES_PER_SECTION
+                               - (pfn & ~PAGE_SECTION_MASK));
+-              __remove_section(zone, pfn, pfns, map_offset, altmap);
++              __remove_section(pfn, pfns, map_offset, altmap);
+               pfn += pfns;
+               nr_pages -= pfns;
+               map_offset = 0;
+       }
+-
+-      set_zone_contiguous(zone);
+ }
+ 
+ int set_online_page_callback(online_page_callback_t callback)
+@@ -867,6 +866,7 @@ failed_addition:
+                (unsigned long long) pfn << PAGE_SHIFT,
+                (((unsigned long long) pfn + nr_pages) << PAGE_SHIFT) - 1);
+       memory_notify(MEM_CANCEL_ONLINE, &arg);
++      remove_pfn_range_from_zone(zone, pfn, nr_pages);
+       mem_hotplug_done();
+       return ret;
+ }
+@@ -1602,6 +1602,7 @@ static int __ref __offline_pages(unsigne
+       writeback_set_ratelimit();
+ 
+       memory_notify(MEM_OFFLINE, &arg);
++      remove_pfn_range_from_zone(zone, start_pfn, nr_pages);
+       mem_hotplug_done();
+       return 0;
+ 
+--- a/mm/memremap.c
++++ b/mm/memremap.c
+@@ -120,7 +120,7 @@ void memunmap_pages(struct dev_pagemap *
+ 
+       mem_hotplug_begin();
+       if (pgmap->type == MEMORY_DEVICE_PRIVATE) {
+-              __remove_pages(page_zone(first_page), PHYS_PFN(res->start),
++              __remove_pages(PHYS_PFN(res->start),
+                              PHYS_PFN(resource_size(res)), NULL);
+       } else {
+               arch_remove_memory(nid, res->start, resource_size(res),
diff --git a/queue-5.4/mm-move_pages-return-valid-node-id-in-status-if-the-page-is-already-on-the-target-node.patch b/queue-5.4/mm-move_pages-return-valid-node-id-in-status-if-the-page-is-already-on-the-target-node.patch

new file mode 100644 (file)

index 0000000..a981f50
--- /dev/null
+++ b/queue-5.4/mm-move_pages-return-valid-node-id-in-status-if-the-page-is-already-on-the-target-node.patch
@@ -0,0 +1,144 @@
+From e0153fc2c7606f101392b682e720a7a456d6c766 Mon Sep 17 00:00:00 2001
+From: Yang Shi <yang.shi@linux.alibaba.com>
+Date: Sat, 4 Jan 2020 12:59:46 -0800
+Subject: mm: move_pages: return valid node id in status if the page is already on the target node
+
+From: Yang Shi <yang.shi@linux.alibaba.com>
+
+commit e0153fc2c7606f101392b682e720a7a456d6c766 upstream.
+
+Felix Abecassis reports move_pages() would return random status if the
+pages are already on the target node by the below test program:
+
+  int main(void)
+  {
+       const long node_id = 1;
+       const long page_size = sysconf(_SC_PAGESIZE);
+       const int64_t num_pages = 8;
+
+       unsigned long nodemask =  1 << node_id;
+       long ret = set_mempolicy(MPOL_BIND, &nodemask, sizeof(nodemask));
+       if (ret < 0)
+               return (EXIT_FAILURE);
+
+       void **pages = malloc(sizeof(void*) * num_pages);
+       for (int i = 0; i < num_pages; ++i) {
+               pages[i] = mmap(NULL, page_size, PROT_WRITE | PROT_READ,
+                               MAP_PRIVATE | MAP_POPULATE | MAP_ANONYMOUS,
+                               -1, 0);
+               if (pages[i] == MAP_FAILED)
+                       return (EXIT_FAILURE);
+       }
+
+       ret = set_mempolicy(MPOL_DEFAULT, NULL, 0);
+       if (ret < 0)
+               return (EXIT_FAILURE);
+
+       int *nodes = malloc(sizeof(int) * num_pages);
+       int *status = malloc(sizeof(int) * num_pages);
+       for (int i = 0; i < num_pages; ++i) {
+               nodes[i] = node_id;
+               status[i] = 0xd0; /* simulate garbage values */
+       }
+
+       ret = move_pages(0, num_pages, pages, nodes, status, MPOL_MF_MOVE);
+       printf("move_pages: %ld\n", ret);
+       for (int i = 0; i < num_pages; ++i)
+               printf("status[%d] = %d\n", i, status[i]);
+  }
+
+Then running the program would return nonsense status values:
+
+  $ ./move_pages_bug
+  move_pages: 0
+  status[0] = 208
+  status[1] = 208
+  status[2] = 208
+  status[3] = 208
+  status[4] = 208
+  status[5] = 208
+  status[6] = 208
+  status[7] = 208
+
+This is because the status is not set if the page is already on the
+target node, but move_pages() should return valid status as long as it
+succeeds.  The valid status may be errno or node id.
+
+We can't simply initialize status array to zero since the pages may be
+not on node 0.  Fix it by updating status with node id which the page is
+already on.
+
+Link: http://lkml.kernel.org/r/1575584353-125392-1-git-send-email-yang.shi@linux.alibaba.com
+Fixes: a49bd4d71637 ("mm, numa: rework do_pages_move")
+Signed-off-by: Yang Shi <yang.shi@linux.alibaba.com>
+Reported-by: Felix Abecassis <fabecassis@nvidia.com>
+Tested-by: Felix Abecassis <fabecassis@nvidia.com>
+Suggested-by: Michal Hocko <mhocko@suse.com>
+Reviewed-by: John Hubbard <jhubbard@nvidia.com>
+Acked-by: Christoph Lameter <cl@linux.com>
+Acked-by: Michal Hocko <mhocko@suse.com>
+Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
+Cc: Mel Gorman <mgorman@techsingularity.net>
+Cc: <stable@vger.kernel.org>   [4.17+]
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/migrate.c |   23 +++++++++++++++++------
+ 1 file changed, 17 insertions(+), 6 deletions(-)
+
+--- a/mm/migrate.c
++++ b/mm/migrate.c
+@@ -1516,9 +1516,11 @@ static int do_move_pages_to_node(struct
+ /*
+  * Resolves the given address to a struct page, isolates it from the LRU and
+  * puts it to the given pagelist.
+- * Returns -errno if the page cannot be found/isolated or 0 when it has been
+- * queued or the page doesn't need to be migrated because it is already on
+- * the target node
++ * Returns:
++ *     errno - if the page cannot be found/isolated
++ *     0 - when it doesn't have to be migrated because it is already on the
++ *         target node
++ *     1 - when it has been queued
+  */
+ static int add_page_for_migration(struct mm_struct *mm, unsigned long addr,
+               int node, struct list_head *pagelist, bool migrate_all)
+@@ -1557,7 +1559,7 @@ static int add_page_for_migration(struct
+       if (PageHuge(page)) {
+               if (PageHead(page)) {
+                       isolate_huge_page(page, pagelist);
+-                      err = 0;
++                      err = 1;
+               }
+       } else {
+               struct page *head;
+@@ -1567,7 +1569,7 @@ static int add_page_for_migration(struct
+               if (err)
+                       goto out_putpage;
+ 
+-              err = 0;
++              err = 1;
+               list_add_tail(&head->lru, pagelist);
+               mod_node_page_state(page_pgdat(head),
+                       NR_ISOLATED_ANON + page_is_file_cache(head),
+@@ -1644,8 +1646,17 @@ static int do_pages_move(struct mm_struc
+                */
+               err = add_page_for_migration(mm, addr, current_node,
+                               &pagelist, flags & MPOL_MF_MOVE_ALL);
+-              if (!err)
++
++              if (!err) {
++                      /* The page is already on the target node */
++                      err = store_status(status, i, current_node, 1);
++                      if (err)
++                              goto out_flush;
+                       continue;
++              } else if (err > 0) {
++                      /* The page is successfully queued for migration */
++                      continue;
++              }
+ 
+               err = store_status(status, i, err, 1);
+               if (err)
diff --git a/queue-5.4/mm-oom-fix-pgtables-units-mismatch-in-killed-process-message.patch b/queue-5.4/mm-oom-fix-pgtables-units-mismatch-in-killed-process-message.patch

new file mode 100644 (file)

index 0000000..632193e
--- /dev/null
+++ b/queue-5.4/mm-oom-fix-pgtables-units-mismatch-in-killed-process-message.patch
@@ -0,0 +1,57 @@
+From 941f762bcb276259a78e7931674668874ccbda59 Mon Sep 17 00:00:00 2001
+From: Ilya Dryomov <idryomov@gmail.com>
+Date: Sat, 4 Jan 2020 13:00:09 -0800
+Subject: mm/oom: fix pgtables units mismatch in Killed process message
+
+From: Ilya Dryomov <idryomov@gmail.com>
+
+commit 941f762bcb276259a78e7931674668874ccbda59 upstream.
+
+pr_err() expects kB, but mm_pgtables_bytes() returns the number of bytes.
+As everything else is printed in kB, I chose to fix the value rather than
+the string.
+
+Before:
+
+[  pid  ]   uid  tgid total_vm      rss pgtables_bytes swapents oom_score_adj name
+...
+[   1878]  1000  1878   217253   151144  1269760        0             0 python
+...
+Out of memory: Killed process 1878 (python) total-vm:869012kB, anon-rss:604572kB, file-rss:4kB, shmem-rss:0kB, UID:1000 pgtables:1269760kB oom_score_adj:0
+
+After:
+
+[  pid  ]   uid  tgid total_vm      rss pgtables_bytes swapents oom_score_adj name
+...
+[   1436]  1000  1436   217253   151890  1294336        0             0 python
+...
+Out of memory: Killed process 1436 (python) total-vm:869012kB, anon-rss:607516kB, file-rss:44kB, shmem-rss:0kB, UID:1000 pgtables:1264kB oom_score_adj:0
+
+Link: http://lkml.kernel.org/r/20191211202830.1600-1-idryomov@gmail.com
+Fixes: 70cb6d267790 ("mm/oom: add oom_score_adj and pgtables to Killed process message")
+Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
+Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
+Acked-by: David Rientjes <rientjes@google.com>
+Acked-by: Michal Hocko <mhocko@suse.com>
+Cc: Edward Chron <echron@arista.com>
+Cc: David Rientjes <rientjes@google.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/oom_kill.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/mm/oom_kill.c
++++ b/mm/oom_kill.c
+@@ -890,7 +890,7 @@ static void __oom_kill_process(struct ta
+               K(get_mm_counter(mm, MM_FILEPAGES)),
+               K(get_mm_counter(mm, MM_SHMEMPAGES)),
+               from_kuid(&init_user_ns, task_uid(victim)),
+-              mm_pgtables_bytes(mm), victim->signal->oom_score_adj);
++              mm_pgtables_bytes(mm) >> 10, victim->signal->oom_score_adj);
+       task_unlock(victim);
+ 
+       /*
diff --git a/queue-5.4/mm-zsmalloc.c-fix-the-migrated-zspage-statistics.patch b/queue-5.4/mm-zsmalloc.c-fix-the-migrated-zspage-statistics.patch

new file mode 100644 (file)

index 0000000..e932f37
--- /dev/null
+++ b/queue-5.4/mm-zsmalloc.c-fix-the-migrated-zspage-statistics.patch
@@ -0,0 +1,42 @@
+From ac8f05da5174c560de122c499ce5dfb5d0dfbee5 Mon Sep 17 00:00:00 2001
+From: Chanho Min <chanho.min@lge.com>
+Date: Sat, 4 Jan 2020 12:59:36 -0800
+Subject: mm/zsmalloc.c: fix the migrated zspage statistics.
+
+From: Chanho Min <chanho.min@lge.com>
+
+commit ac8f05da5174c560de122c499ce5dfb5d0dfbee5 upstream.
+
+When zspage is migrated to the other zone, the zone page state should be
+updated as well, otherwise the NR_ZSPAGE for each zone shows wrong
+counts including proc/zoneinfo in practice.
+
+Link: http://lkml.kernel.org/r/1575434841-48009-1-git-send-email-chanho.min@lge.com
+Fixes: 91537fee0013 ("mm: add NR_ZSMALLOC to vmstat")
+Signed-off-by: Chanho Min <chanho.min@lge.com>
+Signed-off-by: Jinsuk Choi <jjinsuk.choi@lge.com>
+Reviewed-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
+Acked-by: Minchan Kim <minchan@kernel.org>
+Cc: <stable@vger.kernel.org>        [4.9+]
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/zsmalloc.c |    5 +++++
+ 1 file changed, 5 insertions(+)
+
+--- a/mm/zsmalloc.c
++++ b/mm/zsmalloc.c
+@@ -2069,6 +2069,11 @@ static int zs_page_migrate(struct addres
+               zs_pool_dec_isolated(pool);
+       }
+ 
++      if (page_zone(newpage) != page_zone(page)) {
++              dec_zone_page_state(page, NR_ZSPAGES);
++              inc_zone_page_state(newpage, NR_ZSPAGES);
++      }
++
+       reset_page(page);
+       put_page(page);
+       page = newpage;
diff --git a/queue-5.4/ocfs2-fix-the-crash-due-to-call-ocfs2_get_dlm_debug-once-less.patch b/queue-5.4/ocfs2-fix-the-crash-due-to-call-ocfs2_get_dlm_debug-once-less.patch

new file mode 100644 (file)

index 0000000..075ccb3
--- /dev/null
+++ b/queue-5.4/ocfs2-fix-the-crash-due-to-call-ocfs2_get_dlm_debug-once-less.patch
@@ -0,0 +1,74 @@
+From b73eba2a867e10b9b4477738677341f3307c07bb Mon Sep 17 00:00:00 2001
+From: Gang He <GHe@suse.com>
+Date: Sat, 4 Jan 2020 13:00:22 -0800
+Subject: ocfs2: fix the crash due to call ocfs2_get_dlm_debug once less
+
+From: Gang He <GHe@suse.com>
+
+commit b73eba2a867e10b9b4477738677341f3307c07bb upstream.
+
+Because ocfs2_get_dlm_debug() function is called once less here, ocfs2
+file system will trigger the system crash, usually after ocfs2 file
+system is unmounted.
+
+This system crash is caused by a generic memory corruption, these crash
+backtraces are not always the same, for exapmle,
+
+    ocfs2: Unmounting device (253,16) on (node 172167785)
+    general protection fault: 0000 [#1] SMP PTI
+    CPU: 3 PID: 14107 Comm: fence_legacy Kdump:
+    Hardware name: QEMU Standard PC (i440FX + PIIX, 1996)
+    RIP: 0010:__kmalloc+0xa5/0x2a0
+    Code: 00 00 4d 8b 07 65 4d 8b
+    RSP: 0018:ffffaa1fc094bbe8 EFLAGS: 00010286
+    RAX: 0000000000000000 RBX: d310a8800d7a3faf RCX: 0000000000000000
+    RDX: 0000000000000000 RSI: 0000000000000dc0 RDI: ffff96e68fc036c0
+    RBP: d310a8800d7a3faf R08: ffff96e6ffdb10a0 R09: 00000000752e7079
+    R10: 000000000001c513 R11: 0000000004091041 R12: 0000000000000dc0
+    R13: 0000000000000039 R14: ffff96e68fc036c0 R15: ffff96e68fc036c0
+    FS:  00007f699dfba540(0000) GS:ffff96e6ffd80000(0000) knlGS:00000
+    CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+    CR2: 000055f3a9d9b768 CR3: 000000002cd1c000 CR4: 00000000000006e0
+    Call Trace:
+     ext4_htree_store_dirent+0x35/0x100 [ext4]
+     htree_dirblock_to_tree+0xea/0x290 [ext4]
+     ext4_htree_fill_tree+0x1c1/0x2d0 [ext4]
+     ext4_readdir+0x67c/0x9d0 [ext4]
+     iterate_dir+0x8d/0x1a0
+     __x64_sys_getdents+0xab/0x130
+     do_syscall_64+0x60/0x1f0
+     entry_SYSCALL_64_after_hwframe+0x49/0xbe
+    RIP: 0033:0x7f699d33a9fb
+
+This regression problem was introduced by commit e581595ea29c ("ocfs: no
+need to check return value of debugfs_create functions").
+
+Link: http://lkml.kernel.org/r/20191225061501.13587-1-ghe@suse.com
+Fixes: e581595ea29c ("ocfs: no need to check return value of debugfs_create functions")
+Signed-off-by: Gang He <ghe@suse.com>
+Acked-by: Joseph Qi <joseph.qi@linux.alibaba.com>
+Cc: Mark Fasheh <mark@fasheh.com>
+Cc: Joel Becker <jlbec@evilplan.org>
+Cc: Junxiao Bi <junxiao.bi@oracle.com>
+Cc: Changwei Ge <gechangwei@live.cn>
+Cc: Gang He <ghe@suse.com>
+Cc: Jun Piao <piaojun@huawei.com>
+Cc: <stable@vger.kernel.org>   [5.3+]
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ocfs2/dlmglue.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/fs/ocfs2/dlmglue.c
++++ b/fs/ocfs2/dlmglue.c
+@@ -3282,6 +3282,7 @@ static void ocfs2_dlm_init_debug(struct
+ 
+       debugfs_create_u32("locking_filter", 0600, osb->osb_debug_root,
+                          &dlm_debug->d_filter_secs);
++      ocfs2_get_dlm_debug(dlm_debug);
+ }
+ 
+ static void ocfs2_dlm_shutdown_debug(struct ocfs2_super *osb)
diff --git a/queue-5.4/pstore-ram-fix-error-path-memory-leak-in-persistent_ram_new-callers.patch b/queue-5.4/pstore-ram-fix-error-path-memory-leak-in-persistent_ram_new-callers.patch

new file mode 100644 (file)

index 0000000..8e38744
--- /dev/null
+++ b/queue-5.4/pstore-ram-fix-error-path-memory-leak-in-persistent_ram_new-callers.patch
@@ -0,0 +1,41 @@
+From 8df955a32a73315055e0cd187cbb1cea5820394b Mon Sep 17 00:00:00 2001
+From: Kees Cook <keescook@chromium.org>
+Date: Mon, 30 Dec 2019 11:48:10 -0800
+Subject: pstore/ram: Fix error-path memory leak in persistent_ram_new() callers
+
+From: Kees Cook <keescook@chromium.org>
+
+commit 8df955a32a73315055e0cd187cbb1cea5820394b upstream.
+
+For callers that allocated a label for persistent_ram_new(), if the call
+fails, they must clean up the allocation.
+
+Suggested-by: Navid Emamdoost <navid.emamdoost@gmail.com>
+Fixes: 1227daa43bce ("pstore/ram: Clarify resource reservation labels")
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/lkml/20191211191353.14385-1-navid.emamdoost@gmail.com
+Signed-off-by: Kees Cook <keescook@chromium.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/pstore/ram.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/fs/pstore/ram.c
++++ b/fs/pstore/ram.c
+@@ -588,6 +588,7 @@ static int ramoops_init_przs(const char
+                       dev_err(dev, "failed to request %s mem region (0x%zx@0x%llx): %d\n",
+                               name, record_size,
+                               (unsigned long long)*paddr, err);
++                      kfree(label);
+ 
+                       while (i > 0) {
+                               i--;
+@@ -633,6 +634,7 @@ static int ramoops_init_prz(const char *
+ 
+               dev_err(dev, "failed to request %s mem region (0x%zx@0x%llx): %d\n",
+                       name, sz, (unsigned long long)*paddr, err);
++              kfree(label);
+               return err;
+       }
+ 
diff --git a/queue-5.4/pstore-ram-write-new-dumps-to-start-of-recycled-zones.patch b/queue-5.4/pstore-ram-write-new-dumps-to-start-of-recycled-zones.patch

new file mode 100644 (file)

index 0000000..987fd99
--- /dev/null
+++ b/queue-5.4/pstore-ram-write-new-dumps-to-start-of-recycled-zones.patch
@@ -0,0 +1,49 @@
+From 9e5f1c19800b808a37fb9815a26d382132c26c3d Mon Sep 17 00:00:00 2001
+From: Aleksandr Yashkin <a.yashkin@inango-systems.com>
+Date: Mon, 23 Dec 2019 18:38:16 +0500
+Subject: pstore/ram: Write new dumps to start of recycled zones
+
+From: Aleksandr Yashkin <a.yashkin@inango-systems.com>
+
+commit 9e5f1c19800b808a37fb9815a26d382132c26c3d upstream.
+
+The ram_core.c routines treat przs as circular buffers. When writing a
+new crash dump, the old buffer needs to be cleared so that the new dump
+doesn't end up in the wrong place (i.e. at the end).
+
+The solution to this problem is to reset the circular buffer state before
+writing a new Oops dump.
+
+Signed-off-by: Aleksandr Yashkin <a.yashkin@inango-systems.com>
+Signed-off-by: Nikolay Merinov <n.merinov@inango-systems.com>
+Signed-off-by: Ariel Gilman <a.gilman@inango-systems.com>
+Link: https://lore.kernel.org/r/20191223133816.28155-1-n.merinov@inango-systems.com
+Fixes: 896fc1f0c4c6 ("pstore/ram: Switch to persistent_ram routines")
+Cc: stable@vger.kernel.org
+Signed-off-by: Kees Cook <keescook@chromium.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/pstore/ram.c |   11 +++++++++++
+ 1 file changed, 11 insertions(+)
+
+--- a/fs/pstore/ram.c
++++ b/fs/pstore/ram.c
+@@ -407,6 +407,17 @@ static int notrace ramoops_pstore_write(
+ 
+       prz = cxt->dprzs[cxt->dump_write_cnt];
+ 
++      /*
++       * Since this is a new crash dump, we need to reset the buffer in
++       * case it still has an old dump present. Without this, the new dump
++       * will get appended, which would seriously confuse anything trying
++       * to check dump file contents. Specifically, ramoops_read_kmsg_hdr()
++       * expects to find a dump header in the beginning of buffer data, so
++       * we must to reset the buffer values, in order to ensure that the
++       * header will be written to the beginning of the buffer.
++       */
++      persistent_ram_zap(prz);
++
+       /* Build header and append record contents. */
+       hlen = ramoops_write_kmsg_hdr(prz, record);
+       if (!hlen)
diff --git a/queue-5.4/samples-seccomp-zero-out-members-based-on-seccomp_notif_sizes.patch b/queue-5.4/samples-seccomp-zero-out-members-based-on-seccomp_notif_sizes.patch

new file mode 100644 (file)

index 0000000..cef8eac
--- /dev/null
+++ b/queue-5.4/samples-seccomp-zero-out-members-based-on-seccomp_notif_sizes.patch
@@ -0,0 +1,48 @@
+From 771b894f2f3dfedc2ba5561731fffa0e39b1bbb6 Mon Sep 17 00:00:00 2001
+From: Sargun Dhillon <sargun@sargun.me>
+Date: Mon, 30 Dec 2019 12:35:03 -0800
+Subject: samples/seccomp: Zero out members based on seccomp_notif_sizes
+
+From: Sargun Dhillon <sargun@sargun.me>
+
+commit 771b894f2f3dfedc2ba5561731fffa0e39b1bbb6 upstream.
+
+The sizes by which seccomp_notif and seccomp_notif_resp are allocated are
+based on the SECCOMP_GET_NOTIF_SIZES ioctl. This allows for graceful
+extension of these datastructures. If userspace zeroes out the
+datastructure based on its version, and it is lagging behind the kernel's
+version, it will end up sending trailing garbage. On the other hand,
+if it is ahead of the kernel version, it will write extra zero space,
+and potentially cause corruption.
+
+Signed-off-by: Sargun Dhillon <sargun@sargun.me>
+Suggested-by: Tycho Andersen <tycho@tycho.ws>
+Link: https://lore.kernel.org/r/20191230203503.4925-1-sargun@sargun.me
+Fixes: fec7b6690541 ("samples: add an example of seccomp user trap")
+Cc: stable@vger.kernel.org
+Signed-off-by: Kees Cook <keescook@chromium.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ samples/seccomp/user-trap.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/samples/seccomp/user-trap.c
++++ b/samples/seccomp/user-trap.c
+@@ -298,14 +298,14 @@ int main(void)
+               req = malloc(sizes.seccomp_notif);
+               if (!req)
+                       goto out_close;
+-              memset(req, 0, sizeof(*req));
+ 
+               resp = malloc(sizes.seccomp_notif_resp);
+               if (!resp)
+                       goto out_req;
+-              memset(resp, 0, sizeof(*resp));
++              memset(resp, 0, sizes.seccomp_notif_resp);
+ 
+               while (1) {
++                      memset(req, 0, sizes.seccomp_notif);
+                       if (ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, req)) {
+                               perror("ioctl recv");
+                               goto out_resp;
diff --git a/queue-5.4/seccomp-check-that-seccomp_notif-is-zeroed-out-by-the-user.patch b/queue-5.4/seccomp-check-that-seccomp_notif-is-zeroed-out-by-the-user.patch

new file mode 100644 (file)

index 0000000..4a3aa6e
--- /dev/null
+++ b/queue-5.4/seccomp-check-that-seccomp_notif-is-zeroed-out-by-the-user.patch
@@ -0,0 +1,46 @@
+From 2882d53c9c6f3b8311d225062522f03772cf0179 Mon Sep 17 00:00:00 2001
+From: Sargun Dhillon <sargun@sargun.me>
+Date: Sat, 28 Dec 2019 22:24:50 -0800
+Subject: seccomp: Check that seccomp_notif is zeroed out by the user
+
+From: Sargun Dhillon <sargun@sargun.me>
+
+commit 2882d53c9c6f3b8311d225062522f03772cf0179 upstream.
+
+This patch is a small change in enforcement of the uapi for
+SECCOMP_IOCTL_NOTIF_RECV ioctl. Specifically, the datastructure which
+is passed (seccomp_notif) must be zeroed out. Previously any of its
+members could be set to nonsense values, and we would ignore it.
+
+This ensures all fields are set to their zero value.
+
+Signed-off-by: Sargun Dhillon <sargun@sargun.me>
+Reviewed-by: Christian Brauner <christian.brauner@ubuntu.com>
+Reviewed-by: Aleksa Sarai <cyphar@cyphar.com>
+Acked-by: Tycho Andersen <tycho@tycho.ws>
+Link: https://lore.kernel.org/r/20191229062451.9467-2-sargun@sargun.me
+Fixes: 6a21cc50f0c7 ("seccomp: add a return code to trap to userspace")
+Cc: stable@vger.kernel.org
+Signed-off-by: Kees Cook <keescook@chromium.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/seccomp.c |    7 +++++++
+ 1 file changed, 7 insertions(+)
+
+--- a/kernel/seccomp.c
++++ b/kernel/seccomp.c
+@@ -1015,6 +1015,13 @@ static long seccomp_notify_recv(struct s
+       struct seccomp_notif unotif;
+       ssize_t ret;
+ 
++      /* Verify that we're not given garbage to keep struct extensible. */
++      ret = check_zeroed_user(buf, sizeof(unotif));
++      if (ret < 0)
++              return ret;
++      if (!ret)
++              return -EINVAL;
++
+       memset(&unotif, 0, sizeof(unotif));
+ 
+       ret = down_interruptible(&filter->notif->request);
diff --git a/queue-5.4/selftests-seccomp-catch-garbage-on-seccomp_ioctl_notif_recv.patch b/queue-5.4/selftests-seccomp-catch-garbage-on-seccomp_ioctl_notif_recv.patch

new file mode 100644 (file)

index 0000000..51f1ff2
--- /dev/null
+++ b/queue-5.4/selftests-seccomp-catch-garbage-on-seccomp_ioctl_notif_recv.patch
@@ -0,0 +1,48 @@
+From e4ab5ccc357b978999328fadae164e098c26fa40 Mon Sep 17 00:00:00 2001
+From: Sargun Dhillon <sargun@sargun.me>
+Date: Mon, 30 Dec 2019 12:38:11 -0800
+Subject: selftests/seccomp: Catch garbage on SECCOMP_IOCTL_NOTIF_RECV
+
+From: Sargun Dhillon <sargun@sargun.me>
+
+commit e4ab5ccc357b978999328fadae164e098c26fa40 upstream.
+
+This adds logic to the user_notification_basic test to set a member
+of struct seccomp_notif to an invalid value to ensure that the kernel
+returns EINVAL if any of the struct seccomp_notif members are set to
+invalid values.
+
+Signed-off-by: Sargun Dhillon <sargun@sargun.me>
+Suggested-by: Christian Brauner <christian.brauner@ubuntu.com>
+Link: https://lore.kernel.org/r/20191230203811.4996-1-sargun@sargun.me
+Fixes: 6a21cc50f0c7 ("seccomp: add a return code to trap to userspace")
+Cc: stable@vger.kernel.org
+Signed-off-by: Kees Cook <keescook@chromium.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ tools/testing/selftests/seccomp/seccomp_bpf.c |   13 ++++++++++++-
+ 1 file changed, 12 insertions(+), 1 deletion(-)
+
+--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
++++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
+@@ -3147,7 +3147,18 @@ TEST(user_notification_basic)
+       EXPECT_GT(poll(&pollfd, 1, -1), 0);
+       EXPECT_EQ(pollfd.revents, POLLIN);
+ 
+-      EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
++      /* Test that we can't pass garbage to the kernel. */
++      memset(&req, 0, sizeof(req));
++      req.pid = -1;
++      errno = 0;
++      ret = ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req);
++      EXPECT_EQ(-1, ret);
++      EXPECT_EQ(EINVAL, errno);
++
++      if (ret) {
++              req.pid = 0;
++              EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
++      }
+ 
+       pollfd.fd = listener;
+       pollfd.events = POLLIN | POLLOUT;
diff --git a/queue-5.4/selftests-seccomp-zero-out-seccomp_notif.patch b/queue-5.4/selftests-seccomp-zero-out-seccomp_notif.patch

new file mode 100644 (file)

index 0000000..a86c959
--- /dev/null
+++ b/queue-5.4/selftests-seccomp-zero-out-seccomp_notif.patch
@@ -0,0 +1,47 @@
+From 88c13f8bd71472fbab5338b01d99122908c77e53 Mon Sep 17 00:00:00 2001
+From: Sargun Dhillon <sargun@sargun.me>
+Date: Sat, 28 Dec 2019 22:24:49 -0800
+Subject: selftests/seccomp: Zero out seccomp_notif
+
+From: Sargun Dhillon <sargun@sargun.me>
+
+commit 88c13f8bd71472fbab5338b01d99122908c77e53 upstream.
+
+The seccomp_notif structure should be zeroed out prior to calling the
+SECCOMP_IOCTL_NOTIF_RECV ioctl. Previously, the kernel did not check
+whether these structures were zeroed out or not, so these worked.
+
+This patch zeroes out the seccomp_notif data structure prior to calling
+the ioctl.
+
+Signed-off-by: Sargun Dhillon <sargun@sargun.me>
+Reviewed-by: Tycho Andersen <tycho@tycho.ws>
+Reviewed-by: Christian Brauner <christian.brauner@ubuntu.com>
+Link: https://lore.kernel.org/r/20191229062451.9467-1-sargun@sargun.me
+Fixes: 6a21cc50f0c7 ("seccomp: add a return code to trap to userspace")
+Cc: stable@vger.kernel.org
+Signed-off-by: Kees Cook <keescook@chromium.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ tools/testing/selftests/seccomp/seccomp_bpf.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
++++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
+@@ -3267,6 +3267,7 @@ TEST(user_notification_signal)
+ 
+       close(sk_pair[1]);
+ 
++      memset(&req, 0, sizeof(req));
+       EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
+ 
+       EXPECT_EQ(kill(pid, SIGUSR1), 0);
+@@ -3285,6 +3286,7 @@ TEST(user_notification_signal)
+       EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), -1);
+       EXPECT_EQ(errno, ENOENT);
+ 
++      memset(&req, 0, sizeof(req));
+       EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
+ 
+       resp.id = req.id;
diff --git a/queue-5.4/series b/queue-5.4/series

index b7663dc69748bfcedc842d4b6dc3fe44fc4e3983..2d0d280764156386938838894a8264e8dd233a29 100644 (file)
--- a/queue-5.4/series
+++ b/queue-5.4/series
@@ -80,3 +80,25 @@ media-pulse8-cec-fix-lost-cec_transmit_attempt_done-call.patch
  media-cec-cec-2.0-only-bcast-messages-were-ignored.patch
  media-cec-avoid-decrementing-transmit_queue_sz-if-it-is-0.patch
  media-cec-check-transmit_in_progress-not-transmitting.patch
+mm-memory_hotplug-shrink-zones-when-offlining-memory.patch
+mm-zsmalloc.c-fix-the-migrated-zspage-statistics.patch
+memcg-account-security-cred-as-well-to-kmemcg.patch
+mm-move_pages-return-valid-node-id-in-status-if-the-page-is-already-on-the-target-node.patch
+mm-oom-fix-pgtables-units-mismatch-in-killed-process-message.patch
+ocfs2-fix-the-crash-due-to-call-ocfs2_get_dlm_debug-once-less.patch
+pstore-ram-write-new-dumps-to-start-of-recycled-zones.patch
+pstore-ram-fix-error-path-memory-leak-in-persistent_ram_new-callers.patch
+gcc-plugins-make-it-possible-to-disable-config_gcc_plugins-again.patch
+locks-print-unsigned-ino-in-proc-locks.patch
+selftests-seccomp-zero-out-seccomp_notif.patch
+seccomp-check-that-seccomp_notif-is-zeroed-out-by-the-user.patch
+samples-seccomp-zero-out-members-based-on-seccomp_notif_sizes.patch
+selftests-seccomp-catch-garbage-on-seccomp_ioctl_notif_recv.patch
+dmaengine-fix-access-to-uninitialized-dma_slave_caps.patch
+dmaengine-dma-jz4780-also-break-descriptor-chains-on-jz4725b.patch
+btrfs-fix-infinite-loop-during-nocow-writeback-due-to-race.patch
+block-fix-splitting-segments-on-boundary-masks.patch
+compat_ioctl-block-handle-persistent-reservations.patch
+compat_ioctl-block-handle-blkreportzone-blkresetzone.patch
+compat_ioctl-block-handle-blkgetzonesz-blkgetnrzones.patch
+bpf-fix-precision-tracking-for-unbounded-scalars.patch
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Mon, 6 Jan 2020 12:47:23 +0000 (13:47 +0100)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Mon, 6 Jan 2020 12:47:23 +0000 (13:47 +0100)
queue-5.4/block-fix-splitting-segments-on-boundary-masks.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/bpf-fix-precision-tracking-for-unbounded-scalars.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/btrfs-fix-infinite-loop-during-nocow-writeback-due-to-race.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/compat_ioctl-block-handle-blkgetzonesz-blkgetnrzones.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/compat_ioctl-block-handle-blkreportzone-blkresetzone.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/compat_ioctl-block-handle-persistent-reservations.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/dmaengine-dma-jz4780-also-break-descriptor-chains-on-jz4725b.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/dmaengine-fix-access-to-uninitialized-dma_slave_caps.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/gcc-plugins-make-it-possible-to-disable-config_gcc_plugins-again.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/locks-print-unsigned-ino-in-proc-locks.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/memcg-account-security-cred-as-well-to-kmemcg.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/mm-memory_hotplug-shrink-zones-when-offlining-memory.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/mm-move_pages-return-valid-node-id-in-status-if-the-page-is-already-on-the-target-node.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/mm-oom-fix-pgtables-units-mismatch-in-killed-process-message.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/mm-zsmalloc.c-fix-the-migrated-zspage-statistics.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/ocfs2-fix-the-crash-due-to-call-ocfs2_get_dlm_debug-once-less.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/pstore-ram-fix-error-path-memory-leak-in-persistent_ram_new-callers.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/pstore-ram-write-new-dumps-to-start-of-recycled-zones.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/samples-seccomp-zero-out-members-based-on-seccomp_notif_sizes.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/seccomp-check-that-seccomp_notif-is-zeroed-out-by-the-user.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/selftests-seccomp-catch-garbage-on-seccomp_ioctl_notif_recv.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/selftests-seccomp-zero-out-seccomp_notif.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/series		patch \| blob \| blame \| history