--- /dev/null
+From 429120f3df2dba2bf3a4a19f4212a53ecefc7102 Mon Sep 17 00:00:00 2001
+From: Ming Lei <ming.lei@redhat.com>
+Date: Sun, 29 Dec 2019 10:32:30 +0800
+Subject: block: fix splitting segments on boundary masks
+
+From: Ming Lei <ming.lei@redhat.com>
+
+commit 429120f3df2dba2bf3a4a19f4212a53ecefc7102 upstream.
+
+We ran into a problem with a mpt3sas based controller, where we would
+see random (and hard to reproduce) file corruption). The issue seemed
+specific to this controller, but wasn't specific to the file system.
+After a lot of debugging, we find out that it's caused by segments
+spanning a 4G memory boundary. This shouldn't happen, as the default
+setting for segment boundary masks is 4G.
+
+Turns out there are two issues in get_max_segment_size():
+
+1) The default segment boundary mask is bypassed
+
+2) The segment start address isn't taken into account when checking
+ segment boundary limit
+
+Fix these two issues by removing the bypass of the segment boundary
+check even if the mask is set to the default value, and taking into
+account the actual start address of the request when checking if a
+segment needs splitting.
+
+Cc: stable@vger.kernel.org # v5.1+
+Reviewed-by: Chris Mason <clm@fb.com>
+Tested-by: Chris Mason <clm@fb.com>
+Fixes: dcebd755926b ("block: use bio_for_each_bvec() to compute multi-page bvec count")
+Signed-off-by: Ming Lei <ming.lei@redhat.com>
+Dropped const on the page pointer, ppc page_to_phys() doesn't mark the
+page as const...
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ block/blk-merge.c | 18 +++++++++---------
+ 1 file changed, 9 insertions(+), 9 deletions(-)
+
+--- a/block/blk-merge.c
++++ b/block/blk-merge.c
+@@ -157,16 +157,14 @@ static inline unsigned get_max_io_size(s
+ return sectors & (lbs - 1);
+ }
+
+-static unsigned get_max_segment_size(const struct request_queue *q,
+- unsigned offset)
++static inline unsigned get_max_segment_size(const struct request_queue *q,
++ struct page *start_page,
++ unsigned long offset)
+ {
+ unsigned long mask = queue_segment_boundary(q);
+
+- /* default segment boundary mask means no boundary limit */
+- if (mask == BLK_SEG_BOUNDARY_MASK)
+- return queue_max_segment_size(q);
+-
+- return min_t(unsigned long, mask - (mask & offset) + 1,
++ offset = mask & (page_to_phys(start_page) + offset);
++ return min_t(unsigned long, mask - offset + 1,
+ queue_max_segment_size(q));
+ }
+
+@@ -201,7 +199,8 @@ static bool bvec_split_segs(const struct
+ unsigned seg_size = 0;
+
+ while (len && *nsegs < max_segs) {
+- seg_size = get_max_segment_size(q, bv->bv_offset + total_len);
++ seg_size = get_max_segment_size(q, bv->bv_page,
++ bv->bv_offset + total_len);
+ seg_size = min(seg_size, len);
+
+ (*nsegs)++;
+@@ -404,7 +403,8 @@ static unsigned blk_bvec_map_sg(struct r
+
+ while (nbytes > 0) {
+ unsigned offset = bvec->bv_offset + total;
+- unsigned len = min(get_max_segment_size(q, offset), nbytes);
++ unsigned len = min(get_max_segment_size(q, bvec->bv_page,
++ offset), nbytes);
+ struct page *page = bvec->bv_page;
+
+ /*
--- /dev/null
+From f54c7898ed1c3c9331376c0337a5049c38f66497 Mon Sep 17 00:00:00 2001
+From: Daniel Borkmann <daniel@iogearbox.net>
+Date: Sun, 22 Dec 2019 23:37:40 +0100
+Subject: bpf: Fix precision tracking for unbounded scalars
+
+From: Daniel Borkmann <daniel@iogearbox.net>
+
+commit f54c7898ed1c3c9331376c0337a5049c38f66497 upstream.
+
+Anatoly has been fuzzing with kBdysch harness and reported a hang in one
+of the outcomes. Upon closer analysis, it turns out that precise scalar
+value tracking is missing a few precision markings for unknown scalars:
+
+ 0: R1=ctx(id=0,off=0,imm=0) R10=fp0
+ 0: (b7) r0 = 0
+ 1: R0_w=invP0 R1=ctx(id=0,off=0,imm=0) R10=fp0
+ 1: (35) if r0 >= 0xf72e goto pc+0
+ --> only follow fallthrough
+ 2: R0_w=invP0 R1=ctx(id=0,off=0,imm=0) R10=fp0
+ 2: (35) if r0 >= 0x80fe0000 goto pc+0
+ --> only follow fallthrough
+ 3: R0_w=invP0 R1=ctx(id=0,off=0,imm=0) R10=fp0
+ 3: (14) w0 -= -536870912
+ 4: R0_w=invP536870912 R1=ctx(id=0,off=0,imm=0) R10=fp0
+ 4: (0f) r1 += r0
+ 5: R0_w=invP536870912 R1_w=inv(id=0) R10=fp0
+ 5: (55) if r1 != 0x104c1500 goto pc+0
+ --> push other branch for later analysis
+ R0_w=invP536870912 R1_w=inv273421568 R10=fp0
+ 6: R0_w=invP536870912 R1_w=inv273421568 R10=fp0
+ 6: (b7) r0 = 0
+ 7: R0=invP0 R1=inv273421568 R10=fp0
+ 7: (76) if w1 s>= 0xffffff00 goto pc+3
+ --> only follow goto
+ 11: R0=invP0 R1=inv273421568 R10=fp0
+ 11: (95) exit
+ 6: R0_w=invP536870912 R1_w=inv(id=0) R10=fp0
+ 6: (b7) r0 = 0
+ propagating r0
+ 7: safe
+ processed 11 insns [...]
+
+In the analysis of the second path coming after the successful exit above,
+the path is being pruned at line 7. Pruning analysis found that both r0 are
+precise P0 and both R1 are non-precise scalars and given prior path with
+R1 as non-precise scalar succeeded, this one is therefore safe as well.
+
+However, problem is that given condition at insn 7 in the first run, we only
+followed goto and didn't push the other branch for later analysis, we've
+never walked the few insns in there and therefore dead-code sanitation
+rewrites it as goto pc-1, causing the hang depending on the skb address
+hitting these conditions. The issue is that R1 should have been marked as
+precise as well such that pruning enforces range check and conluded that new
+R1 is not in range of old R1. In insn 4, we mark R1 (skb) as unknown scalar
+via __mark_reg_unbounded() but not mark_reg_unbounded() and therefore
+regs->precise remains as false.
+
+Back in b5dc0163d8fd ("bpf: precise scalar_value tracking"), this was not
+the case since marking out of __mark_reg_unbounded() had this covered as well.
+Once in both are set as precise in 4 as they should have been, we conclude
+that given R1 was in prior fall-through path 0x104c1500 and now is completely
+unknown, the check at insn 7 concludes that we need to continue walking.
+Analysis after the fix:
+
+ 0: R1=ctx(id=0,off=0,imm=0) R10=fp0
+ 0: (b7) r0 = 0
+ 1: R0_w=invP0 R1=ctx(id=0,off=0,imm=0) R10=fp0
+ 1: (35) if r0 >= 0xf72e goto pc+0
+ 2: R0_w=invP0 R1=ctx(id=0,off=0,imm=0) R10=fp0
+ 2: (35) if r0 >= 0x80fe0000 goto pc+0
+ 3: R0_w=invP0 R1=ctx(id=0,off=0,imm=0) R10=fp0
+ 3: (14) w0 -= -536870912
+ 4: R0_w=invP536870912 R1=ctx(id=0,off=0,imm=0) R10=fp0
+ 4: (0f) r1 += r0
+ 5: R0_w=invP536870912 R1_w=invP(id=0) R10=fp0
+ 5: (55) if r1 != 0x104c1500 goto pc+0
+ R0_w=invP536870912 R1_w=invP273421568 R10=fp0
+ 6: R0_w=invP536870912 R1_w=invP273421568 R10=fp0
+ 6: (b7) r0 = 0
+ 7: R0=invP0 R1=invP273421568 R10=fp0
+ 7: (76) if w1 s>= 0xffffff00 goto pc+3
+ 11: R0=invP0 R1=invP273421568 R10=fp0
+ 11: (95) exit
+ 6: R0_w=invP536870912 R1_w=invP(id=0) R10=fp0
+ 6: (b7) r0 = 0
+ 7: R0_w=invP0 R1_w=invP(id=0) R10=fp0
+ 7: (76) if w1 s>= 0xffffff00 goto pc+3
+ R0_w=invP0 R1_w=invP(id=0) R10=fp0
+ 8: R0_w=invP0 R1_w=invP(id=0) R10=fp0
+ 8: (a5) if r0 < 0x2007002a goto pc+0
+ 9: R0_w=invP0 R1_w=invP(id=0) R10=fp0
+ 9: (57) r0 &= -16316416
+ 10: R0_w=invP0 R1_w=invP(id=0) R10=fp0
+ 10: (a6) if w0 < 0x1201 goto pc+0
+ 11: R0_w=invP0 R1_w=invP(id=0) R10=fp0
+ 11: (95) exit
+ 11: R0=invP0 R1=invP(id=0) R10=fp0
+ 11: (95) exit
+ processed 16 insns [...]
+
+Fixes: 6754172c208d ("bpf: fix precision tracking in presence of bpf2bpf calls")
+Reported-by: Anatoly Trosinenko <anatoly.trosinenko@gmail.com>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Link: https://lore.kernel.org/bpf/20191222223740.25297-1-daniel@iogearbox.net
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/bpf/verifier.c | 43 ++++++++++++++++++++++---------------------
+ 1 file changed, 22 insertions(+), 21 deletions(-)
+
+--- a/kernel/bpf/verifier.c
++++ b/kernel/bpf/verifier.c
+@@ -852,7 +852,8 @@ static const int caller_saved[CALLER_SAV
+ BPF_REG_0, BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4, BPF_REG_5
+ };
+
+-static void __mark_reg_not_init(struct bpf_reg_state *reg);
++static void __mark_reg_not_init(const struct bpf_verifier_env *env,
++ struct bpf_reg_state *reg);
+
+ /* Mark the unknown part of a register (variable offset or scalar value) as
+ * known to have the value @imm.
+@@ -890,7 +891,7 @@ static void mark_reg_known_zero(struct b
+ verbose(env, "mark_reg_known_zero(regs, %u)\n", regno);
+ /* Something bad happened, let's kill all regs */
+ for (regno = 0; regno < MAX_BPF_REG; regno++)
+- __mark_reg_not_init(regs + regno);
++ __mark_reg_not_init(env, regs + regno);
+ return;
+ }
+ __mark_reg_known_zero(regs + regno);
+@@ -999,7 +1000,8 @@ static void __mark_reg_unbounded(struct
+ }
+
+ /* Mark a register as having a completely unknown (scalar) value. */
+-static void __mark_reg_unknown(struct bpf_reg_state *reg)
++static void __mark_reg_unknown(const struct bpf_verifier_env *env,
++ struct bpf_reg_state *reg)
+ {
+ /*
+ * Clear type, id, off, and union(map_ptr, range) and
+@@ -1009,6 +1011,8 @@ static void __mark_reg_unknown(struct bp
+ reg->type = SCALAR_VALUE;
+ reg->var_off = tnum_unknown;
+ reg->frameno = 0;
++ reg->precise = env->subprog_cnt > 1 || !env->allow_ptr_leaks ?
++ true : false;
+ __mark_reg_unbounded(reg);
+ }
+
+@@ -1019,19 +1023,16 @@ static void mark_reg_unknown(struct bpf_
+ verbose(env, "mark_reg_unknown(regs, %u)\n", regno);
+ /* Something bad happened, let's kill all regs except FP */
+ for (regno = 0; regno < BPF_REG_FP; regno++)
+- __mark_reg_not_init(regs + regno);
++ __mark_reg_not_init(env, regs + regno);
+ return;
+ }
+- regs += regno;
+- __mark_reg_unknown(regs);
+- /* constant backtracking is enabled for root without bpf2bpf calls */
+- regs->precise = env->subprog_cnt > 1 || !env->allow_ptr_leaks ?
+- true : false;
++ __mark_reg_unknown(env, regs + regno);
+ }
+
+-static void __mark_reg_not_init(struct bpf_reg_state *reg)
++static void __mark_reg_not_init(const struct bpf_verifier_env *env,
++ struct bpf_reg_state *reg)
+ {
+- __mark_reg_unknown(reg);
++ __mark_reg_unknown(env, reg);
+ reg->type = NOT_INIT;
+ }
+
+@@ -1042,10 +1043,10 @@ static void mark_reg_not_init(struct bpf
+ verbose(env, "mark_reg_not_init(regs, %u)\n", regno);
+ /* Something bad happened, let's kill all regs except FP */
+ for (regno = 0; regno < BPF_REG_FP; regno++)
+- __mark_reg_not_init(regs + regno);
++ __mark_reg_not_init(env, regs + regno);
+ return;
+ }
+- __mark_reg_not_init(regs + regno);
++ __mark_reg_not_init(env, regs + regno);
+ }
+
+ #define DEF_NOT_SUBREG (0)
+@@ -3066,7 +3067,7 @@ static int check_stack_boundary(struct b
+ }
+ if (state->stack[spi].slot_type[0] == STACK_SPILL &&
+ state->stack[spi].spilled_ptr.type == SCALAR_VALUE) {
+- __mark_reg_unknown(&state->stack[spi].spilled_ptr);
++ __mark_reg_unknown(env, &state->stack[spi].spilled_ptr);
+ for (j = 0; j < BPF_REG_SIZE; j++)
+ state->stack[spi].slot_type[j] = STACK_MISC;
+ goto mark;
+@@ -3706,7 +3707,7 @@ static void __clear_all_pkt_pointers(str
+ if (!reg)
+ continue;
+ if (reg_is_pkt_pointer_any(reg))
+- __mark_reg_unknown(reg);
++ __mark_reg_unknown(env, reg);
+ }
+ }
+
+@@ -3734,7 +3735,7 @@ static void release_reg_references(struc
+ if (!reg)
+ continue;
+ if (reg->ref_obj_id == ref_obj_id)
+- __mark_reg_unknown(reg);
++ __mark_reg_unknown(env, reg);
+ }
+ }
+
+@@ -4357,7 +4358,7 @@ static int adjust_ptr_min_max_vals(struc
+ /* Taint dst register if offset had invalid bounds derived from
+ * e.g. dead branches.
+ */
+- __mark_reg_unknown(dst_reg);
++ __mark_reg_unknown(env, dst_reg);
+ return 0;
+ }
+
+@@ -4609,13 +4610,13 @@ static int adjust_scalar_min_max_vals(st
+ /* Taint dst register if offset had invalid bounds derived from
+ * e.g. dead branches.
+ */
+- __mark_reg_unknown(dst_reg);
++ __mark_reg_unknown(env, dst_reg);
+ return 0;
+ }
+
+ if (!src_known &&
+ opcode != BPF_ADD && opcode != BPF_SUB && opcode != BPF_AND) {
+- __mark_reg_unknown(dst_reg);
++ __mark_reg_unknown(env, dst_reg);
+ return 0;
+ }
+
+@@ -6746,7 +6747,7 @@ static void clean_func_state(struct bpf_
+ /* since the register is unused, clear its state
+ * to make further comparison simpler
+ */
+- __mark_reg_not_init(&st->regs[i]);
++ __mark_reg_not_init(env, &st->regs[i]);
+ }
+
+ for (i = 0; i < st->allocated_stack / BPF_REG_SIZE; i++) {
+@@ -6754,7 +6755,7 @@ static void clean_func_state(struct bpf_
+ /* liveness must not touch this stack slot anymore */
+ st->stack[i].spilled_ptr.live |= REG_LIVE_DONE;
+ if (!(live & REG_LIVE_READ)) {
+- __mark_reg_not_init(&st->stack[i].spilled_ptr);
++ __mark_reg_not_init(env, &st->stack[i].spilled_ptr);
+ for (j = 0; j < BPF_REG_SIZE; j++)
+ st->stack[i].slot_type[j] = STACK_INVALID;
+ }
--- /dev/null
+From de7999afedff02c6631feab3ea726a0e8f8c3d40 Mon Sep 17 00:00:00 2001
+From: Filipe Manana <fdmanana@suse.com>
+Date: Wed, 11 Dec 2019 09:01:40 +0000
+Subject: Btrfs: fix infinite loop during nocow writeback due to race
+
+From: Filipe Manana <fdmanana@suse.com>
+
+commit de7999afedff02c6631feab3ea726a0e8f8c3d40 upstream.
+
+When starting writeback for a range that covers part of a preallocated
+extent, due to a race with writeback for another range that also covers
+another part of the same preallocated extent, we can end up in an infinite
+loop.
+
+Consider the following example where for inode 280 we have two dirty
+ranges:
+
+ range A, from 294912 to 303103, 8192 bytes
+ range B, from 348160 to 438271, 90112 bytes
+
+and we have the following file extent item layout for our inode:
+
+ leaf 38895616 gen 24544 total ptrs 29 free space 13820 owner 5
+ (...)
+ item 27 key (280 108 200704) itemoff 14598 itemsize 53
+ extent data disk bytenr 0 nr 0 type 1 (regular)
+ extent data offset 0 nr 94208 ram 94208
+ item 28 key (280 108 294912) itemoff 14545 itemsize 53
+ extent data disk bytenr 10433052672 nr 81920 type 2 (prealloc)
+ extent data offset 0 nr 81920 ram 81920
+
+Then the following happens:
+
+1) Writeback starts for range B (from 348160 to 438271), execution of
+ run_delalloc_nocow() starts;
+
+2) The first iteration of run_delalloc_nocow()'s whil loop leaves us at
+ the extent item at slot 28, pointing to the prealloc extent item
+ covering the range from 294912 to 376831. This extent covers part of
+ our range;
+
+3) An ordered extent is created against that extent, covering the file
+ range from 348160 to 376831 (28672 bytes);
+
+4) We adjust 'cur_offset' to 376832 and move on to the next iteration of
+ the while loop;
+
+5) The call to btrfs_lookup_file_extent() leaves us at the same leaf,
+ pointing to slot 29, 1 slot after the last item (the extent item
+ we processed in the previous iteration);
+
+6) Because we are a slot beyond the last item, we call btrfs_next_leaf(),
+ which releases the search path before doing a another search for the
+ last key of the leaf (280 108 294912);
+
+7) Right after btrfs_next_leaf() released the path, and before it did
+ another search for the last key of the leaf, writeback for the range
+ A (from 294912 to 303103) completes (it was previously started at
+ some point);
+
+8) Upon completion of the ordered extent for range A, the prealloc extent
+ we previously found got split into two extent items, one covering the
+ range from 294912 to 303103 (8192 bytes), with a type of regular extent
+ (and no longer prealloc) and another covering the range from 303104 to
+ 376831 (73728 bytes), with a type of prealloc and an offset of 8192
+ bytes. So our leaf now has the following layout:
+
+ leaf 38895616 gen 24544 total ptrs 31 free space 13664 owner 5
+ (...)
+ item 27 key (280 108 200704) itemoff 14598 itemsize 53
+ extent data disk bytenr 0 nr 0 type 1
+ extent data offset 0 nr 8192 ram 94208
+ item 28 key (280 108 208896) itemoff 14545 itemsize 53
+ extent data disk bytenr 10433142784 nr 86016 type 1
+ extent data offset 0 nr 86016 ram 86016
+ item 29 key (280 108 294912) itemoff 14492 itemsize 53
+ extent data disk bytenr 10433052672 nr 81920 type 1
+ extent data offset 0 nr 8192 ram 81920
+ item 30 key (280 108 303104) itemoff 14439 itemsize 53
+ extent data disk bytenr 10433052672 nr 81920 type 2
+ extent data offset 8192 nr 73728 ram 81920
+
+9) After btrfs_next_leaf() returns, we have our path pointing to that same
+ leaf and at slot 30, since it has a key we didn't have before and it's
+ the first key greater then the key that was previously the last key of
+ the leaf (key (280 108 294912));
+
+10) The extent item at slot 30 covers the range from 303104 to 376831
+ which is in our target range, so we process it, despite having already
+ created an ordered extent against this extent for the file range from
+ 348160 to 376831. This is because we skip to the next extent item only
+ if its end is less than or equals to the start of our delalloc range,
+ and not less than or equals to the current offset ('cur_offset');
+
+11) As a result we compute 'num_bytes' as:
+
+ num_bytes = min(end + 1, extent_end) - cur_offset;
+ = min(438271 + 1, 376832) - 376832 = 0
+
+12) We then call create_io_em() for a 0 bytes range starting at offset
+ 376832;
+
+13) Then create_io_em() enters an infinite loop because its calls to
+ btrfs_drop_extent_cache() do nothing due to the 0 length range
+ passed to it. So no existing extent maps that cover the offset
+ 376832 get removed, and therefore calls to add_extent_mapping()
+ return -EEXIST, resulting in an infinite loop. This loop from
+ create_io_em() is the following:
+
+ do {
+ btrfs_drop_extent_cache(BTRFS_I(inode), em->start,
+ em->start + em->len - 1, 0);
+ write_lock(&em_tree->lock);
+ ret = add_extent_mapping(em_tree, em, 1);
+ write_unlock(&em_tree->lock);
+ /*
+ * The caller has taken lock_extent(), who could race with us
+ * to add em?
+ */
+ } while (ret == -EEXIST);
+
+Also, each call to btrfs_drop_extent_cache() triggers a warning because
+the start offset passed to it (376832) is smaller then the end offset
+(376832 - 1) passed to it by -1, due to the 0 length:
+
+ [258532.052621] ------------[ cut here ]------------
+ [258532.052643] WARNING: CPU: 0 PID: 9987 at fs/btrfs/file.c:602 btrfs_drop_extent_cache+0x3f4/0x590 [btrfs]
+ (...)
+ [258532.052672] CPU: 0 PID: 9987 Comm: fsx Tainted: G W 5.4.0-rc7-btrfs-next-64 #1
+ [258532.052673] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.0-0-ga698c8995f-prebuilt.qemu.org 04/01/2014
+ [258532.052691] RIP: 0010:btrfs_drop_extent_cache+0x3f4/0x590 [btrfs]
+ (...)
+ [258532.052695] RSP: 0018:ffffb4be0153f860 EFLAGS: 00010287
+ [258532.052700] RAX: ffff975b445ee360 RBX: ffff975b44eb3e08 RCX: 0000000000000000
+ [258532.052700] RDX: 0000000000038fff RSI: 0000000000039000 RDI: ffff975b445ee308
+ [258532.052700] RBP: 0000000000038fff R08: 0000000000000000 R09: 0000000000000001
+ [258532.052701] R10: ffff975b513c5c10 R11: 00000000e3c0cfa9 R12: 0000000000039000
+ [258532.052703] R13: ffff975b445ee360 R14: 00000000ffffffef R15: ffff975b445ee308
+ [258532.052705] FS: 00007f86a821de80(0000) GS:ffff975b76a00000(0000) knlGS:0000000000000000
+ [258532.052707] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+ [258532.052708] CR2: 00007fdacf0f3ab4 CR3: 00000001f9d26002 CR4: 00000000003606f0
+ [258532.052712] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+ [258532.052717] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+ [258532.052717] Call Trace:
+ [258532.052718] ? preempt_schedule_common+0x32/0x70
+ [258532.052722] ? ___preempt_schedule+0x16/0x20
+ [258532.052741] create_io_em+0xff/0x180 [btrfs]
+ [258532.052767] run_delalloc_nocow+0x942/0xb10 [btrfs]
+ [258532.052791] btrfs_run_delalloc_range+0x30b/0x520 [btrfs]
+ [258532.052812] ? find_lock_delalloc_range+0x221/0x250 [btrfs]
+ [258532.052834] writepage_delalloc+0xe4/0x140 [btrfs]
+ [258532.052855] __extent_writepage+0x110/0x4e0 [btrfs]
+ [258532.052876] extent_write_cache_pages+0x21c/0x480 [btrfs]
+ [258532.052906] extent_writepages+0x52/0xb0 [btrfs]
+ [258532.052911] do_writepages+0x23/0x80
+ [258532.052915] __filemap_fdatawrite_range+0xd2/0x110
+ [258532.052938] btrfs_fdatawrite_range+0x1b/0x50 [btrfs]
+ [258532.052954] start_ordered_ops+0x57/0xa0 [btrfs]
+ [258532.052973] ? btrfs_sync_file+0x225/0x490 [btrfs]
+ [258532.052988] btrfs_sync_file+0x225/0x490 [btrfs]
+ [258532.052997] __x64_sys_msync+0x199/0x200
+ [258532.053004] do_syscall_64+0x5c/0x250
+ [258532.053007] entry_SYSCALL_64_after_hwframe+0x49/0xbe
+ [258532.053010] RIP: 0033:0x7f86a7dfd760
+ (...)
+ [258532.053014] RSP: 002b:00007ffd99af0368 EFLAGS: 00000246 ORIG_RAX: 000000000000001a
+ [258532.053016] RAX: ffffffffffffffda RBX: 0000000000000ec9 RCX: 00007f86a7dfd760
+ [258532.053017] RDX: 0000000000000004 RSI: 000000000000836c RDI: 00007f86a8221000
+ [258532.053019] RBP: 0000000000021ec9 R08: 0000000000000003 R09: 00007f86a812037c
+ [258532.053020] R10: 0000000000000001 R11: 0000000000000246 R12: 00000000000074a3
+ [258532.053021] R13: 00007f86a8221000 R14: 000000000000836c R15: 0000000000000001
+ [258532.053032] irq event stamp: 1653450494
+ [258532.053035] hardirqs last enabled at (1653450493): [<ffffffff9dec69f9>] _raw_spin_unlock_irq+0x29/0x50
+ [258532.053037] hardirqs last disabled at (1653450494): [<ffffffff9d4048ea>] trace_hardirqs_off_thunk+0x1a/0x20
+ [258532.053039] softirqs last enabled at (1653449852): [<ffffffff9e200466>] __do_softirq+0x466/0x6bd
+ [258532.053042] softirqs last disabled at (1653449845): [<ffffffff9d4c8a0c>] irq_exit+0xec/0x120
+ [258532.053043] ---[ end trace 8476fce13d9ce20a ]---
+
+Which results in flooding dmesg/syslog since btrfs_drop_extent_cache()
+uses WARN_ON() and not WARN_ON_ONCE().
+
+So fix this issue by changing run_delalloc_nocow()'s loop to move to the
+next extent item when the current extent item ends at at offset less than
+or equals to the current offset instead of the start offset.
+
+Fixes: 80ff385665b7fc ("Btrfs: update nodatacow code v2")
+CC: stable@vger.kernel.org # 4.4+
+Reviewed-by: Josef Bacik <josef@toxicpanda.com>
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/inode.c | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/fs/btrfs/inode.c
++++ b/fs/btrfs/inode.c
+@@ -1439,10 +1439,10 @@ next_slot:
+ disk_num_bytes =
+ btrfs_file_extent_disk_num_bytes(leaf, fi);
+ /*
+- * If extent we got ends before our range starts, skip
+- * to next extent
++ * If the extent we got ends before our current offset,
++ * skip to the next extent.
+ */
+- if (extent_end <= start) {
++ if (extent_end <= cur_offset) {
+ path->slots[0]++;
+ goto next_slot;
+ }
--- /dev/null
+From 21d37340912d74b1222d43c11aa9dd0687162573 Mon Sep 17 00:00:00 2001
+From: Arnd Bergmann <arnd@arndb.de>
+Date: Fri, 29 Nov 2019 11:28:22 +0100
+Subject: compat_ioctl: block: handle BLKGETZONESZ/BLKGETNRZONES
+
+From: Arnd Bergmann <arnd@arndb.de>
+
+commit 21d37340912d74b1222d43c11aa9dd0687162573 upstream.
+
+These were added to blkdev_ioctl() in v4.20 but not blkdev_compat_ioctl,
+so add them now.
+
+Cc: <stable@vger.kernel.org> # v4.20+
+Fixes: 72cd87576d1d ("block: Introduce BLKGETZONESZ ioctl")
+Fixes: 65e4e3eee83d ("block: Introduce BLKGETNRZONES ioctl")
+Reviewed-by: Damien Le Moal <damien.lemoal@wdc.com>
+Signed-off-by: Arnd Bergmann <arnd@arndb.de>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ block/compat_ioctl.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/block/compat_ioctl.c
++++ b/block/compat_ioctl.c
+@@ -357,6 +357,8 @@ long compat_blkdev_ioctl(struct file *fi
+ case BLKRRPART:
+ case BLKREPORTZONE:
+ case BLKRESETZONE:
++ case BLKGETZONESZ:
++ case BLKGETNRZONES:
+ return blkdev_ioctl(bdev, mode, cmd,
+ (unsigned long)compat_ptr(arg));
+ case BLKBSZSET_32:
--- /dev/null
+From 673bdf8ce0a387ef585c13b69a2676096c6edfe9 Mon Sep 17 00:00:00 2001
+From: Arnd Bergmann <arnd@arndb.de>
+Date: Fri, 29 Nov 2019 11:28:22 +0100
+Subject: compat_ioctl: block: handle BLKREPORTZONE/BLKRESETZONE
+
+From: Arnd Bergmann <arnd@arndb.de>
+
+commit 673bdf8ce0a387ef585c13b69a2676096c6edfe9 upstream.
+
+These were added to blkdev_ioctl() but not blkdev_compat_ioctl,
+so add them now.
+
+Cc: <stable@vger.kernel.org> # v4.10+
+Fixes: 3ed05a987e0f ("blk-zoned: implement ioctls")
+Reviewed-by: Damien Le Moal <damien.lemoal@wdc.com>
+Signed-off-by: Arnd Bergmann <arnd@arndb.de>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ block/compat_ioctl.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/block/compat_ioctl.c
++++ b/block/compat_ioctl.c
+@@ -355,6 +355,8 @@ long compat_blkdev_ioctl(struct file *fi
+ * but we call blkdev_ioctl, which gets the lock for us
+ */
+ case BLKRRPART:
++ case BLKREPORTZONE:
++ case BLKRESETZONE:
+ return blkdev_ioctl(bdev, mode, cmd,
+ (unsigned long)compat_ptr(arg));
+ case BLKBSZSET_32:
--- /dev/null
+From b2c0fcd28772f99236d261509bcd242135677965 Mon Sep 17 00:00:00 2001
+From: Arnd Bergmann <arnd@arndb.de>
+Date: Fri, 29 Nov 2019 11:28:22 +0100
+Subject: compat_ioctl: block: handle Persistent Reservations
+
+From: Arnd Bergmann <arnd@arndb.de>
+
+commit b2c0fcd28772f99236d261509bcd242135677965 upstream.
+
+These were added to blkdev_ioctl() in linux-5.5 but not
+blkdev_compat_ioctl, so add them now.
+
+Cc: <stable@vger.kernel.org> # v4.4+
+Fixes: bbd3e064362e ("block: add an API for Persistent Reservations")
+Signed-off-by: Arnd Bergmann <arnd@arndb.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+Fold in followup patch from Arnd with missing pr.h header include.
+
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+
+---
+ block/compat_ioctl.c | 9 +++++++++
+ 1 file changed, 9 insertions(+)
+
+--- a/block/compat_ioctl.c
++++ b/block/compat_ioctl.c
+@@ -6,6 +6,7 @@
+ #include <linux/compat.h>
+ #include <linux/elevator.h>
+ #include <linux/hdreg.h>
++#include <linux/pr.h>
+ #include <linux/slab.h>
+ #include <linux/syscalls.h>
+ #include <linux/types.h>
+@@ -401,6 +402,14 @@ long compat_blkdev_ioctl(struct file *fi
+ case BLKTRACETEARDOWN: /* compatible */
+ ret = blk_trace_ioctl(bdev, cmd, compat_ptr(arg));
+ return ret;
++ case IOC_PR_REGISTER:
++ case IOC_PR_RESERVE:
++ case IOC_PR_RELEASE:
++ case IOC_PR_PREEMPT:
++ case IOC_PR_PREEMPT_ABORT:
++ case IOC_PR_CLEAR:
++ return blkdev_ioctl(bdev, mode, cmd,
++ (unsigned long)compat_ptr(arg));
+ default:
+ if (disk->fops->compat_ioctl)
+ ret = disk->fops->compat_ioctl(bdev, mode, cmd, arg);
--- /dev/null
+From a40c94be2336f3002563c9ae16572143ae3422e2 Mon Sep 17 00:00:00 2001
+From: Paul Cercueil <paul@crapouillou.net>
+Date: Tue, 10 Dec 2019 17:55:45 +0100
+Subject: dmaengine: dma-jz4780: Also break descriptor chains on JZ4725B
+
+From: Paul Cercueil <paul@crapouillou.net>
+
+commit a40c94be2336f3002563c9ae16572143ae3422e2 upstream.
+
+It turns out that the JZ4725B displays the same buggy behaviour as the
+JZ4740 that was described in commit f4c255f1a747 ("dmaengine: dma-jz4780:
+Break descriptor chains on JZ4740").
+
+Work around it by using the same workaround previously used for the
+JZ4740.
+
+Fixes commit f4c255f1a747 ("dmaengine: dma-jz4780: Break descriptor
+chains on JZ4740")
+
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Paul Cercueil <paul@crapouillou.net>
+Link: https://lore.kernel.org/r/20191210165545.59690-1-paul@crapouillou.net
+Signed-off-by: Vinod Koul <vkoul@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/dma/dma-jz4780.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/drivers/dma/dma-jz4780.c
++++ b/drivers/dma/dma-jz4780.c
+@@ -1004,7 +1004,8 @@ static const struct jz4780_dma_soc_data
+ static const struct jz4780_dma_soc_data jz4725b_dma_soc_data = {
+ .nb_channels = 6,
+ .transfer_ord_max = 5,
+- .flags = JZ_SOC_DATA_PER_CHAN_PM | JZ_SOC_DATA_NO_DCKES_DCKEC,
++ .flags = JZ_SOC_DATA_PER_CHAN_PM | JZ_SOC_DATA_NO_DCKES_DCKEC |
++ JZ_SOC_DATA_BREAK_LINKS,
+ };
+
+ static const struct jz4780_dma_soc_data jz4770_dma_soc_data = {
--- /dev/null
+From 53a256a9b925b47c7e67fc1f16ca41561a7b877c Mon Sep 17 00:00:00 2001
+From: Lukas Wunner <lukas@wunner.de>
+Date: Thu, 5 Dec 2019 12:54:49 +0100
+Subject: dmaengine: Fix access to uninitialized dma_slave_caps
+
+From: Lukas Wunner <lukas@wunner.de>
+
+commit 53a256a9b925b47c7e67fc1f16ca41561a7b877c upstream.
+
+dmaengine_desc_set_reuse() allocates a struct dma_slave_caps on the
+stack, populates it using dma_get_slave_caps() and then accesses one
+of its members.
+
+However dma_get_slave_caps() may fail and this isn't accounted for,
+leading to a legitimate warning of gcc-4.9 (but not newer versions):
+
+ In file included from drivers/spi/spi-bcm2835.c:19:0:
+ drivers/spi/spi-bcm2835.c: In function 'dmaengine_desc_set_reuse':
+>> include/linux/dmaengine.h:1370:10: warning: 'caps.descriptor_reuse' is used uninitialized in this function [-Wuninitialized]
+ if (caps.descriptor_reuse) {
+
+Fix it, thereby also silencing the gcc-4.9 warning.
+
+The issue has been present for 4 years but surfaces only now that
+the first caller of dmaengine_desc_set_reuse() has been added in
+spi-bcm2835.c. Another user of reusable DMA descriptors has existed
+for a while in pxa_camera.c, but it sets the DMA_CTRL_REUSE flag
+directly instead of calling dmaengine_desc_set_reuse(). Nevertheless,
+tag this commit for stable in case there are out-of-tree users.
+
+Fixes: 272420214d26 ("dmaengine: Add DMA_CTRL_REUSE")
+Reported-by: kbuild test robot <lkp@intel.com>
+Signed-off-by: Lukas Wunner <lukas@wunner.de>
+Cc: stable@vger.kernel.org # v4.3+
+Link: https://lore.kernel.org/r/ca92998ccc054b4f2bfd60ef3adbab2913171eac.1575546234.git.lukas@wunner.de
+Signed-off-by: Vinod Koul <vkoul@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/linux/dmaengine.h | 5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+--- a/include/linux/dmaengine.h
++++ b/include/linux/dmaengine.h
+@@ -1364,8 +1364,11 @@ static inline int dma_get_slave_caps(str
+ static inline int dmaengine_desc_set_reuse(struct dma_async_tx_descriptor *tx)
+ {
+ struct dma_slave_caps caps;
++ int ret;
+
+- dma_get_slave_caps(tx->chan, &caps);
++ ret = dma_get_slave_caps(tx->chan, &caps);
++ if (ret)
++ return ret;
+
+ if (caps.descriptor_reuse) {
+ tx->flags |= DMA_CTRL_REUSE;
--- /dev/null
+From a5b0dc5a46c221725c43bd9b01570239a4cd78b1 Mon Sep 17 00:00:00 2001
+From: Arnd Bergmann <arnd@arndb.de>
+Date: Wed, 11 Dec 2019 14:39:28 +0100
+Subject: gcc-plugins: make it possible to disable CONFIG_GCC_PLUGINS again
+
+From: Arnd Bergmann <arnd@arndb.de>
+
+commit a5b0dc5a46c221725c43bd9b01570239a4cd78b1 upstream.
+
+I noticed that randconfig builds with gcc no longer produce a lot of
+ccache hits, unlike with clang, and traced this back to plugins
+now being enabled unconditionally if they are supported.
+
+I am now working around this by adding
+
+ export CCACHE_COMPILERCHECK=/usr/bin/size -A %compiler%
+
+to my top-level Makefile. This changes the heuristic that ccache uses
+to determine whether the plugins are the same after a 'make clean'.
+
+However, it also seems that being able to just turn off the plugins is
+generally useful, at least for build testing it adds noticeable overhead
+but does not find a lot of bugs additional bugs, and may be easier for
+ccache users than my workaround.
+
+Fixes: 9f671e58159a ("security: Create "kernel hardening" config area")
+Signed-off-by: Arnd Bergmann <arnd@arndb.de>
+Acked-by: Ard Biesheuvel <ardb@kernel.org>
+Reviewed-by: Masahiro Yamada <masahiroy@kernel.org>
+Link: https://lore.kernel.org/r/20191211133951.401933-1-arnd@arndb.de
+Cc: stable@vger.kernel.org
+Signed-off-by: Kees Cook <keescook@chromium.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ scripts/gcc-plugins/Kconfig | 9 ++++-----
+ 1 file changed, 4 insertions(+), 5 deletions(-)
+
+--- a/scripts/gcc-plugins/Kconfig
++++ b/scripts/gcc-plugins/Kconfig
+@@ -14,8 +14,8 @@ config HAVE_GCC_PLUGINS
+ An arch should select this symbol if it supports building with
+ GCC plugins.
+
+-config GCC_PLUGINS
+- bool
++menuconfig GCC_PLUGINS
++ bool "GCC plugins"
+ depends on HAVE_GCC_PLUGINS
+ depends on PLUGIN_HOSTCC != ""
+ default y
+@@ -25,8 +25,7 @@ config GCC_PLUGINS
+
+ See Documentation/core-api/gcc-plugins.rst for details.
+
+-menu "GCC plugins"
+- depends on GCC_PLUGINS
++if GCC_PLUGINS
+
+ config GCC_PLUGIN_CYC_COMPLEXITY
+ bool "Compute the cyclomatic complexity of a function" if EXPERT
+@@ -113,4 +112,4 @@ config GCC_PLUGIN_ARM_SSP_PER_TASK
+ bool
+ depends on GCC_PLUGINS && ARM
+
+-endmenu
++endif
--- /dev/null
+From 98ca480a8f22fdbd768e3dad07024c8d4856576c Mon Sep 17 00:00:00 2001
+From: Amir Goldstein <amir73il@gmail.com>
+Date: Sun, 22 Dec 2019 20:45:28 +0200
+Subject: locks: print unsigned ino in /proc/locks
+
+From: Amir Goldstein <amir73il@gmail.com>
+
+commit 98ca480a8f22fdbd768e3dad07024c8d4856576c upstream.
+
+An ino is unsigned, so display it as such in /proc/locks.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Amir Goldstein <amir73il@gmail.com>
+Signed-off-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/locks.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/locks.c
++++ b/fs/locks.c
+@@ -2853,7 +2853,7 @@ static void lock_get_status(struct seq_f
+ }
+ if (inode) {
+ /* userspace relies on this representation of dev_t */
+- seq_printf(f, "%d %02x:%02x:%ld ", fl_pid,
++ seq_printf(f, "%d %02x:%02x:%lu ", fl_pid,
+ MAJOR(inode->i_sb->s_dev),
+ MINOR(inode->i_sb->s_dev), inode->i_ino);
+ } else {
--- /dev/null
+From 84029fd04c201a4c7e0b07ba262664900f47c6f5 Mon Sep 17 00:00:00 2001
+From: Shakeel Butt <shakeelb@google.com>
+Date: Sat, 4 Jan 2020 12:59:43 -0800
+Subject: memcg: account security cred as well to kmemcg
+
+From: Shakeel Butt <shakeelb@google.com>
+
+commit 84029fd04c201a4c7e0b07ba262664900f47c6f5 upstream.
+
+The cred_jar kmem_cache is already memcg accounted in the current kernel
+but cred->security is not. Account cred->security to kmemcg.
+
+Recently we saw high root slab usage on our production and on further
+inspection, we found a buggy application leaking processes. Though that
+buggy application was contained within its memcg but we observe much
+more system memory overhead, couple of GiBs, during that period. This
+overhead can adversely impact the isolation on the system.
+
+One source of high overhead we found was cred->security objects, which
+have a lifetime of at least the life of the process which allocated
+them.
+
+Link: http://lkml.kernel.org/r/20191205223721.40034-1-shakeelb@google.com
+Signed-off-by: Shakeel Butt <shakeelb@google.com>
+Acked-by: Chris Down <chris@chrisdown.name>
+Reviewed-by: Roman Gushchin <guro@fb.com>
+Acked-by: Michal Hocko <mhocko@suse.com>
+Cc: Johannes Weiner <hannes@cmpxchg.org>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/cred.c | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/kernel/cred.c
++++ b/kernel/cred.c
+@@ -223,7 +223,7 @@ struct cred *cred_alloc_blank(void)
+ new->magic = CRED_MAGIC;
+ #endif
+
+- if (security_cred_alloc_blank(new, GFP_KERNEL) < 0)
++ if (security_cred_alloc_blank(new, GFP_KERNEL_ACCOUNT) < 0)
+ goto error;
+
+ return new;
+@@ -282,7 +282,7 @@ struct cred *prepare_creds(void)
+ new->security = NULL;
+ #endif
+
+- if (security_prepare_creds(new, old, GFP_KERNEL) < 0)
++ if (security_prepare_creds(new, old, GFP_KERNEL_ACCOUNT) < 0)
+ goto error;
+ validate_creds(new);
+ return new;
+@@ -715,7 +715,7 @@ struct cred *prepare_kernel_cred(struct
+ #ifdef CONFIG_SECURITY
+ new->security = NULL;
+ #endif
+- if (security_prepare_creds(new, old, GFP_KERNEL) < 0)
++ if (security_prepare_creds(new, old, GFP_KERNEL_ACCOUNT) < 0)
+ goto error;
+
+ put_cred(old);
--- /dev/null
+From feee6b2989165631b17ac6d4ccdbf6759254e85a Mon Sep 17 00:00:00 2001
+From: David Hildenbrand <david@redhat.com>
+Date: Sat, 4 Jan 2020 12:59:33 -0800
+Subject: mm/memory_hotplug: shrink zones when offlining memory
+
+From: David Hildenbrand <david@redhat.com>
+
+commit feee6b2989165631b17ac6d4ccdbf6759254e85a upstream.
+
+We currently try to shrink a single zone when removing memory. We use
+the zone of the first page of the memory we are removing. If that
+memmap was never initialized (e.g., memory was never onlined), we will
+read garbage and can trigger kernel BUGs (due to a stale pointer):
+
+ BUG: unable to handle page fault for address: 000000000000353d
+ #PF: supervisor write access in kernel mode
+ #PF: error_code(0x0002) - not-present page
+ PGD 0 P4D 0
+ Oops: 0002 [#1] SMP PTI
+ CPU: 1 PID: 7 Comm: kworker/u8:0 Not tainted 5.3.0-rc5-next-20190820+ #317
+ Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.1-0-ga5cab58e9a3f-prebuilt.qemu.4
+ Workqueue: kacpi_hotplug acpi_hotplug_work_fn
+ RIP: 0010:clear_zone_contiguous+0x5/0x10
+ Code: 48 89 c6 48 89 c3 e8 2a fe ff ff 48 85 c0 75 cf 5b 5d c3 c6 85 fd 05 00 00 01 5b 5d c3 0f 1f 840
+ RSP: 0018:ffffad2400043c98 EFLAGS: 00010246
+ RAX: 0000000000000000 RBX: 0000000200000000 RCX: 0000000000000000
+ RDX: 0000000000200000 RSI: 0000000000140000 RDI: 0000000000002f40
+ RBP: 0000000140000000 R08: 0000000000000000 R09: 0000000000000001
+ R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000140000
+ R13: 0000000000140000 R14: 0000000000002f40 R15: ffff9e3e7aff3680
+ FS: 0000000000000000(0000) GS:ffff9e3e7bb00000(0000) knlGS:0000000000000000
+ CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+ CR2: 000000000000353d CR3: 0000000058610000 CR4: 00000000000006e0
+ DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+ DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+ Call Trace:
+ __remove_pages+0x4b/0x640
+ arch_remove_memory+0x63/0x8d
+ try_remove_memory+0xdb/0x130
+ __remove_memory+0xa/0x11
+ acpi_memory_device_remove+0x70/0x100
+ acpi_bus_trim+0x55/0x90
+ acpi_device_hotplug+0x227/0x3a0
+ acpi_hotplug_work_fn+0x1a/0x30
+ process_one_work+0x221/0x550
+ worker_thread+0x50/0x3b0
+ kthread+0x105/0x140
+ ret_from_fork+0x3a/0x50
+ Modules linked in:
+ CR2: 000000000000353d
+
+Instead, shrink the zones when offlining memory or when onlining failed.
+Introduce and use remove_pfn_range_from_zone(() for that. We now
+properly shrink the zones, even if we have DIMMs whereby
+
+ - Some memory blocks fall into no zone (never onlined)
+
+ - Some memory blocks fall into multiple zones (offlined+re-onlined)
+
+ - Multiple memory blocks that fall into different zones
+
+Drop the zone parameter (with a potential dubious value) from
+__remove_pages() and __remove_section().
+
+Link: http://lkml.kernel.org/r/20191006085646.5768-6-david@redhat.com
+Fixes: f1dd2cd13c4b ("mm, memory_hotplug: do not associate hotadded memory to zones until online") [visible after d0dc12e86b319]
+Signed-off-by: David Hildenbrand <david@redhat.com>
+Reviewed-by: Oscar Salvador <osalvador@suse.de>
+Cc: Michal Hocko <mhocko@suse.com>
+Cc: "Matthew Wilcox (Oracle)" <willy@infradead.org>
+Cc: "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com>
+Cc: Pavel Tatashin <pasha.tatashin@soleen.com>
+Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Cc: Dan Williams <dan.j.williams@intel.com>
+Cc: Logan Gunthorpe <logang@deltatee.com>
+Cc: <stable@vger.kernel.org> [5.0+]
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/arm64/mm/mmu.c | 4 +---
+ arch/ia64/mm/init.c | 4 +---
+ arch/powerpc/mm/mem.c | 3 +--
+ arch/s390/mm/init.c | 4 +---
+ arch/sh/mm/init.c | 4 +---
+ arch/x86/mm/init_32.c | 4 +---
+ arch/x86/mm/init_64.c | 4 +---
+ include/linux/memory_hotplug.h | 7 +++++--
+ mm/memory_hotplug.c | 31 ++++++++++++++++---------------
+ mm/memremap.c | 2 +-
+ 10 files changed, 29 insertions(+), 38 deletions(-)
+
+--- a/arch/arm64/mm/mmu.c
++++ b/arch/arm64/mm/mmu.c
+@@ -1069,7 +1069,6 @@ void arch_remove_memory(int nid, u64 sta
+ {
+ unsigned long start_pfn = start >> PAGE_SHIFT;
+ unsigned long nr_pages = size >> PAGE_SHIFT;
+- struct zone *zone;
+
+ /*
+ * FIXME: Cleanup page tables (also in arch_add_memory() in case
+@@ -1078,7 +1077,6 @@ void arch_remove_memory(int nid, u64 sta
+ * unplug. ARCH_ENABLE_MEMORY_HOTREMOVE must not be
+ * unlocked yet.
+ */
+- zone = page_zone(pfn_to_page(start_pfn));
+- __remove_pages(zone, start_pfn, nr_pages, altmap);
++ __remove_pages(start_pfn, nr_pages, altmap);
+ }
+ #endif
+--- a/arch/ia64/mm/init.c
++++ b/arch/ia64/mm/init.c
+@@ -689,9 +689,7 @@ void arch_remove_memory(int nid, u64 sta
+ {
+ unsigned long start_pfn = start >> PAGE_SHIFT;
+ unsigned long nr_pages = size >> PAGE_SHIFT;
+- struct zone *zone;
+
+- zone = page_zone(pfn_to_page(start_pfn));
+- __remove_pages(zone, start_pfn, nr_pages, altmap);
++ __remove_pages(start_pfn, nr_pages, altmap);
+ }
+ #endif
+--- a/arch/powerpc/mm/mem.c
++++ b/arch/powerpc/mm/mem.c
+@@ -130,10 +130,9 @@ void __ref arch_remove_memory(int nid, u
+ {
+ unsigned long start_pfn = start >> PAGE_SHIFT;
+ unsigned long nr_pages = size >> PAGE_SHIFT;
+- struct page *page = pfn_to_page(start_pfn) + vmem_altmap_offset(altmap);
+ int ret;
+
+- __remove_pages(page_zone(page), start_pfn, nr_pages, altmap);
++ __remove_pages(start_pfn, nr_pages, altmap);
+
+ /* Remove htab bolted mappings for this section of memory */
+ start = (unsigned long)__va(start);
+--- a/arch/s390/mm/init.c
++++ b/arch/s390/mm/init.c
+@@ -291,10 +291,8 @@ void arch_remove_memory(int nid, u64 sta
+ {
+ unsigned long start_pfn = start >> PAGE_SHIFT;
+ unsigned long nr_pages = size >> PAGE_SHIFT;
+- struct zone *zone;
+
+- zone = page_zone(pfn_to_page(start_pfn));
+- __remove_pages(zone, start_pfn, nr_pages, altmap);
++ __remove_pages(start_pfn, nr_pages, altmap);
+ vmem_remove_mapping(start, size);
+ }
+ #endif /* CONFIG_MEMORY_HOTPLUG */
+--- a/arch/sh/mm/init.c
++++ b/arch/sh/mm/init.c
+@@ -434,9 +434,7 @@ void arch_remove_memory(int nid, u64 sta
+ {
+ unsigned long start_pfn = PFN_DOWN(start);
+ unsigned long nr_pages = size >> PAGE_SHIFT;
+- struct zone *zone;
+
+- zone = page_zone(pfn_to_page(start_pfn));
+- __remove_pages(zone, start_pfn, nr_pages, altmap);
++ __remove_pages(start_pfn, nr_pages, altmap);
+ }
+ #endif /* CONFIG_MEMORY_HOTPLUG */
+--- a/arch/x86/mm/init_32.c
++++ b/arch/x86/mm/init_32.c
+@@ -865,10 +865,8 @@ void arch_remove_memory(int nid, u64 sta
+ {
+ unsigned long start_pfn = start >> PAGE_SHIFT;
+ unsigned long nr_pages = size >> PAGE_SHIFT;
+- struct zone *zone;
+
+- zone = page_zone(pfn_to_page(start_pfn));
+- __remove_pages(zone, start_pfn, nr_pages, altmap);
++ __remove_pages(start_pfn, nr_pages, altmap);
+ }
+ #endif
+
+--- a/arch/x86/mm/init_64.c
++++ b/arch/x86/mm/init_64.c
+@@ -1212,10 +1212,8 @@ void __ref arch_remove_memory(int nid, u
+ {
+ unsigned long start_pfn = start >> PAGE_SHIFT;
+ unsigned long nr_pages = size >> PAGE_SHIFT;
+- struct page *page = pfn_to_page(start_pfn) + vmem_altmap_offset(altmap);
+- struct zone *zone = page_zone(page);
+
+- __remove_pages(zone, start_pfn, nr_pages, altmap);
++ __remove_pages(start_pfn, nr_pages, altmap);
+ kernel_physical_mapping_remove(start, start + size);
+ }
+ #endif /* CONFIG_MEMORY_HOTPLUG */
+--- a/include/linux/memory_hotplug.h
++++ b/include/linux/memory_hotplug.h
+@@ -125,8 +125,8 @@ static inline bool movable_node_is_enabl
+
+ extern void arch_remove_memory(int nid, u64 start, u64 size,
+ struct vmem_altmap *altmap);
+-extern void __remove_pages(struct zone *zone, unsigned long start_pfn,
+- unsigned long nr_pages, struct vmem_altmap *altmap);
++extern void __remove_pages(unsigned long start_pfn, unsigned long nr_pages,
++ struct vmem_altmap *altmap);
+
+ /* reasonably generic interface to expand the physical pages */
+ extern int __add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages,
+@@ -345,6 +345,9 @@ extern int add_memory(int nid, u64 start
+ extern int add_memory_resource(int nid, struct resource *resource);
+ extern void move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn,
+ unsigned long nr_pages, struct vmem_altmap *altmap);
++extern void remove_pfn_range_from_zone(struct zone *zone,
++ unsigned long start_pfn,
++ unsigned long nr_pages);
+ extern bool is_memblock_offlined(struct memory_block *mem);
+ extern int sparse_add_section(int nid, unsigned long pfn,
+ unsigned long nr_pages, struct vmem_altmap *altmap);
+--- a/mm/memory_hotplug.c
++++ b/mm/memory_hotplug.c
+@@ -465,8 +465,9 @@ static void update_pgdat_span(struct pgl
+ pgdat->node_spanned_pages = node_end_pfn - node_start_pfn;
+ }
+
+-static void __remove_zone(struct zone *zone, unsigned long start_pfn,
+- unsigned long nr_pages)
++void __ref remove_pfn_range_from_zone(struct zone *zone,
++ unsigned long start_pfn,
++ unsigned long nr_pages)
+ {
+ struct pglist_data *pgdat = zone->zone_pgdat;
+ unsigned long flags;
+@@ -481,28 +482,30 @@ static void __remove_zone(struct zone *z
+ return;
+ #endif
+
++ clear_zone_contiguous(zone);
++
+ pgdat_resize_lock(zone->zone_pgdat, &flags);
+ shrink_zone_span(zone, start_pfn, start_pfn + nr_pages);
+ update_pgdat_span(pgdat);
+ pgdat_resize_unlock(zone->zone_pgdat, &flags);
++
++ set_zone_contiguous(zone);
+ }
+
+-static void __remove_section(struct zone *zone, unsigned long pfn,
+- unsigned long nr_pages, unsigned long map_offset,
+- struct vmem_altmap *altmap)
++static void __remove_section(unsigned long pfn, unsigned long nr_pages,
++ unsigned long map_offset,
++ struct vmem_altmap *altmap)
+ {
+ struct mem_section *ms = __nr_to_section(pfn_to_section_nr(pfn));
+
+ if (WARN_ON_ONCE(!valid_section(ms)))
+ return;
+
+- __remove_zone(zone, pfn, nr_pages);
+ sparse_remove_section(ms, pfn, nr_pages, map_offset, altmap);
+ }
+
+ /**
+- * __remove_pages() - remove sections of pages from a zone
+- * @zone: zone from which pages need to be removed
++ * __remove_pages() - remove sections of pages
+ * @pfn: starting pageframe (must be aligned to start of a section)
+ * @nr_pages: number of pages to remove (must be multiple of section size)
+ * @altmap: alternative device page map or %NULL if default memmap is used
+@@ -512,16 +515,14 @@ static void __remove_section(struct zone
+ * sure that pages are marked reserved and zones are adjust properly by
+ * calling offline_pages().
+ */
+-void __remove_pages(struct zone *zone, unsigned long pfn,
+- unsigned long nr_pages, struct vmem_altmap *altmap)
++void __remove_pages(unsigned long pfn, unsigned long nr_pages,
++ struct vmem_altmap *altmap)
+ {
+ unsigned long map_offset = 0;
+ unsigned long nr, start_sec, end_sec;
+
+ map_offset = vmem_altmap_offset(altmap);
+
+- clear_zone_contiguous(zone);
+-
+ if (check_pfn_span(pfn, nr_pages, "remove"))
+ return;
+
+@@ -533,13 +534,11 @@ void __remove_pages(struct zone *zone, u
+ cond_resched();
+ pfns = min(nr_pages, PAGES_PER_SECTION
+ - (pfn & ~PAGE_SECTION_MASK));
+- __remove_section(zone, pfn, pfns, map_offset, altmap);
++ __remove_section(pfn, pfns, map_offset, altmap);
+ pfn += pfns;
+ nr_pages -= pfns;
+ map_offset = 0;
+ }
+-
+- set_zone_contiguous(zone);
+ }
+
+ int set_online_page_callback(online_page_callback_t callback)
+@@ -867,6 +866,7 @@ failed_addition:
+ (unsigned long long) pfn << PAGE_SHIFT,
+ (((unsigned long long) pfn + nr_pages) << PAGE_SHIFT) - 1);
+ memory_notify(MEM_CANCEL_ONLINE, &arg);
++ remove_pfn_range_from_zone(zone, pfn, nr_pages);
+ mem_hotplug_done();
+ return ret;
+ }
+@@ -1602,6 +1602,7 @@ static int __ref __offline_pages(unsigne
+ writeback_set_ratelimit();
+
+ memory_notify(MEM_OFFLINE, &arg);
++ remove_pfn_range_from_zone(zone, start_pfn, nr_pages);
+ mem_hotplug_done();
+ return 0;
+
+--- a/mm/memremap.c
++++ b/mm/memremap.c
+@@ -120,7 +120,7 @@ void memunmap_pages(struct dev_pagemap *
+
+ mem_hotplug_begin();
+ if (pgmap->type == MEMORY_DEVICE_PRIVATE) {
+- __remove_pages(page_zone(first_page), PHYS_PFN(res->start),
++ __remove_pages(PHYS_PFN(res->start),
+ PHYS_PFN(resource_size(res)), NULL);
+ } else {
+ arch_remove_memory(nid, res->start, resource_size(res),
--- /dev/null
+From e0153fc2c7606f101392b682e720a7a456d6c766 Mon Sep 17 00:00:00 2001
+From: Yang Shi <yang.shi@linux.alibaba.com>
+Date: Sat, 4 Jan 2020 12:59:46 -0800
+Subject: mm: move_pages: return valid node id in status if the page is already on the target node
+
+From: Yang Shi <yang.shi@linux.alibaba.com>
+
+commit e0153fc2c7606f101392b682e720a7a456d6c766 upstream.
+
+Felix Abecassis reports move_pages() would return random status if the
+pages are already on the target node by the below test program:
+
+ int main(void)
+ {
+ const long node_id = 1;
+ const long page_size = sysconf(_SC_PAGESIZE);
+ const int64_t num_pages = 8;
+
+ unsigned long nodemask = 1 << node_id;
+ long ret = set_mempolicy(MPOL_BIND, &nodemask, sizeof(nodemask));
+ if (ret < 0)
+ return (EXIT_FAILURE);
+
+ void **pages = malloc(sizeof(void*) * num_pages);
+ for (int i = 0; i < num_pages; ++i) {
+ pages[i] = mmap(NULL, page_size, PROT_WRITE | PROT_READ,
+ MAP_PRIVATE | MAP_POPULATE | MAP_ANONYMOUS,
+ -1, 0);
+ if (pages[i] == MAP_FAILED)
+ return (EXIT_FAILURE);
+ }
+
+ ret = set_mempolicy(MPOL_DEFAULT, NULL, 0);
+ if (ret < 0)
+ return (EXIT_FAILURE);
+
+ int *nodes = malloc(sizeof(int) * num_pages);
+ int *status = malloc(sizeof(int) * num_pages);
+ for (int i = 0; i < num_pages; ++i) {
+ nodes[i] = node_id;
+ status[i] = 0xd0; /* simulate garbage values */
+ }
+
+ ret = move_pages(0, num_pages, pages, nodes, status, MPOL_MF_MOVE);
+ printf("move_pages: %ld\n", ret);
+ for (int i = 0; i < num_pages; ++i)
+ printf("status[%d] = %d\n", i, status[i]);
+ }
+
+Then running the program would return nonsense status values:
+
+ $ ./move_pages_bug
+ move_pages: 0
+ status[0] = 208
+ status[1] = 208
+ status[2] = 208
+ status[3] = 208
+ status[4] = 208
+ status[5] = 208
+ status[6] = 208
+ status[7] = 208
+
+This is because the status is not set if the page is already on the
+target node, but move_pages() should return valid status as long as it
+succeeds. The valid status may be errno or node id.
+
+We can't simply initialize status array to zero since the pages may be
+not on node 0. Fix it by updating status with node id which the page is
+already on.
+
+Link: http://lkml.kernel.org/r/1575584353-125392-1-git-send-email-yang.shi@linux.alibaba.com
+Fixes: a49bd4d71637 ("mm, numa: rework do_pages_move")
+Signed-off-by: Yang Shi <yang.shi@linux.alibaba.com>
+Reported-by: Felix Abecassis <fabecassis@nvidia.com>
+Tested-by: Felix Abecassis <fabecassis@nvidia.com>
+Suggested-by: Michal Hocko <mhocko@suse.com>
+Reviewed-by: John Hubbard <jhubbard@nvidia.com>
+Acked-by: Christoph Lameter <cl@linux.com>
+Acked-by: Michal Hocko <mhocko@suse.com>
+Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
+Cc: Mel Gorman <mgorman@techsingularity.net>
+Cc: <stable@vger.kernel.org> [4.17+]
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/migrate.c | 23 +++++++++++++++++------
+ 1 file changed, 17 insertions(+), 6 deletions(-)
+
+--- a/mm/migrate.c
++++ b/mm/migrate.c
+@@ -1516,9 +1516,11 @@ static int do_move_pages_to_node(struct
+ /*
+ * Resolves the given address to a struct page, isolates it from the LRU and
+ * puts it to the given pagelist.
+- * Returns -errno if the page cannot be found/isolated or 0 when it has been
+- * queued or the page doesn't need to be migrated because it is already on
+- * the target node
++ * Returns:
++ * errno - if the page cannot be found/isolated
++ * 0 - when it doesn't have to be migrated because it is already on the
++ * target node
++ * 1 - when it has been queued
+ */
+ static int add_page_for_migration(struct mm_struct *mm, unsigned long addr,
+ int node, struct list_head *pagelist, bool migrate_all)
+@@ -1557,7 +1559,7 @@ static int add_page_for_migration(struct
+ if (PageHuge(page)) {
+ if (PageHead(page)) {
+ isolate_huge_page(page, pagelist);
+- err = 0;
++ err = 1;
+ }
+ } else {
+ struct page *head;
+@@ -1567,7 +1569,7 @@ static int add_page_for_migration(struct
+ if (err)
+ goto out_putpage;
+
+- err = 0;
++ err = 1;
+ list_add_tail(&head->lru, pagelist);
+ mod_node_page_state(page_pgdat(head),
+ NR_ISOLATED_ANON + page_is_file_cache(head),
+@@ -1644,8 +1646,17 @@ static int do_pages_move(struct mm_struc
+ */
+ err = add_page_for_migration(mm, addr, current_node,
+ &pagelist, flags & MPOL_MF_MOVE_ALL);
+- if (!err)
++
++ if (!err) {
++ /* The page is already on the target node */
++ err = store_status(status, i, current_node, 1);
++ if (err)
++ goto out_flush;
+ continue;
++ } else if (err > 0) {
++ /* The page is successfully queued for migration */
++ continue;
++ }
+
+ err = store_status(status, i, err, 1);
+ if (err)
--- /dev/null
+From 941f762bcb276259a78e7931674668874ccbda59 Mon Sep 17 00:00:00 2001
+From: Ilya Dryomov <idryomov@gmail.com>
+Date: Sat, 4 Jan 2020 13:00:09 -0800
+Subject: mm/oom: fix pgtables units mismatch in Killed process message
+
+From: Ilya Dryomov <idryomov@gmail.com>
+
+commit 941f762bcb276259a78e7931674668874ccbda59 upstream.
+
+pr_err() expects kB, but mm_pgtables_bytes() returns the number of bytes.
+As everything else is printed in kB, I chose to fix the value rather than
+the string.
+
+Before:
+
+[ pid ] uid tgid total_vm rss pgtables_bytes swapents oom_score_adj name
+...
+[ 1878] 1000 1878 217253 151144 1269760 0 0 python
+...
+Out of memory: Killed process 1878 (python) total-vm:869012kB, anon-rss:604572kB, file-rss:4kB, shmem-rss:0kB, UID:1000 pgtables:1269760kB oom_score_adj:0
+
+After:
+
+[ pid ] uid tgid total_vm rss pgtables_bytes swapents oom_score_adj name
+...
+[ 1436] 1000 1436 217253 151890 1294336 0 0 python
+...
+Out of memory: Killed process 1436 (python) total-vm:869012kB, anon-rss:607516kB, file-rss:44kB, shmem-rss:0kB, UID:1000 pgtables:1264kB oom_score_adj:0
+
+Link: http://lkml.kernel.org/r/20191211202830.1600-1-idryomov@gmail.com
+Fixes: 70cb6d267790 ("mm/oom: add oom_score_adj and pgtables to Killed process message")
+Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
+Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
+Acked-by: David Rientjes <rientjes@google.com>
+Acked-by: Michal Hocko <mhocko@suse.com>
+Cc: Edward Chron <echron@arista.com>
+Cc: David Rientjes <rientjes@google.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/oom_kill.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/mm/oom_kill.c
++++ b/mm/oom_kill.c
+@@ -890,7 +890,7 @@ static void __oom_kill_process(struct ta
+ K(get_mm_counter(mm, MM_FILEPAGES)),
+ K(get_mm_counter(mm, MM_SHMEMPAGES)),
+ from_kuid(&init_user_ns, task_uid(victim)),
+- mm_pgtables_bytes(mm), victim->signal->oom_score_adj);
++ mm_pgtables_bytes(mm) >> 10, victim->signal->oom_score_adj);
+ task_unlock(victim);
+
+ /*
--- /dev/null
+From ac8f05da5174c560de122c499ce5dfb5d0dfbee5 Mon Sep 17 00:00:00 2001
+From: Chanho Min <chanho.min@lge.com>
+Date: Sat, 4 Jan 2020 12:59:36 -0800
+Subject: mm/zsmalloc.c: fix the migrated zspage statistics.
+
+From: Chanho Min <chanho.min@lge.com>
+
+commit ac8f05da5174c560de122c499ce5dfb5d0dfbee5 upstream.
+
+When zspage is migrated to the other zone, the zone page state should be
+updated as well, otherwise the NR_ZSPAGE for each zone shows wrong
+counts including proc/zoneinfo in practice.
+
+Link: http://lkml.kernel.org/r/1575434841-48009-1-git-send-email-chanho.min@lge.com
+Fixes: 91537fee0013 ("mm: add NR_ZSMALLOC to vmstat")
+Signed-off-by: Chanho Min <chanho.min@lge.com>
+Signed-off-by: Jinsuk Choi <jjinsuk.choi@lge.com>
+Reviewed-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
+Acked-by: Minchan Kim <minchan@kernel.org>
+Cc: <stable@vger.kernel.org> [4.9+]
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/zsmalloc.c | 5 +++++
+ 1 file changed, 5 insertions(+)
+
+--- a/mm/zsmalloc.c
++++ b/mm/zsmalloc.c
+@@ -2069,6 +2069,11 @@ static int zs_page_migrate(struct addres
+ zs_pool_dec_isolated(pool);
+ }
+
++ if (page_zone(newpage) != page_zone(page)) {
++ dec_zone_page_state(page, NR_ZSPAGES);
++ inc_zone_page_state(newpage, NR_ZSPAGES);
++ }
++
+ reset_page(page);
+ put_page(page);
+ page = newpage;
--- /dev/null
+From b73eba2a867e10b9b4477738677341f3307c07bb Mon Sep 17 00:00:00 2001
+From: Gang He <GHe@suse.com>
+Date: Sat, 4 Jan 2020 13:00:22 -0800
+Subject: ocfs2: fix the crash due to call ocfs2_get_dlm_debug once less
+
+From: Gang He <GHe@suse.com>
+
+commit b73eba2a867e10b9b4477738677341f3307c07bb upstream.
+
+Because ocfs2_get_dlm_debug() function is called once less here, ocfs2
+file system will trigger the system crash, usually after ocfs2 file
+system is unmounted.
+
+This system crash is caused by a generic memory corruption, these crash
+backtraces are not always the same, for exapmle,
+
+ ocfs2: Unmounting device (253,16) on (node 172167785)
+ general protection fault: 0000 [#1] SMP PTI
+ CPU: 3 PID: 14107 Comm: fence_legacy Kdump:
+ Hardware name: QEMU Standard PC (i440FX + PIIX, 1996)
+ RIP: 0010:__kmalloc+0xa5/0x2a0
+ Code: 00 00 4d 8b 07 65 4d 8b
+ RSP: 0018:ffffaa1fc094bbe8 EFLAGS: 00010286
+ RAX: 0000000000000000 RBX: d310a8800d7a3faf RCX: 0000000000000000
+ RDX: 0000000000000000 RSI: 0000000000000dc0 RDI: ffff96e68fc036c0
+ RBP: d310a8800d7a3faf R08: ffff96e6ffdb10a0 R09: 00000000752e7079
+ R10: 000000000001c513 R11: 0000000004091041 R12: 0000000000000dc0
+ R13: 0000000000000039 R14: ffff96e68fc036c0 R15: ffff96e68fc036c0
+ FS: 00007f699dfba540(0000) GS:ffff96e6ffd80000(0000) knlGS:00000
+ CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+ CR2: 000055f3a9d9b768 CR3: 000000002cd1c000 CR4: 00000000000006e0
+ Call Trace:
+ ext4_htree_store_dirent+0x35/0x100 [ext4]
+ htree_dirblock_to_tree+0xea/0x290 [ext4]
+ ext4_htree_fill_tree+0x1c1/0x2d0 [ext4]
+ ext4_readdir+0x67c/0x9d0 [ext4]
+ iterate_dir+0x8d/0x1a0
+ __x64_sys_getdents+0xab/0x130
+ do_syscall_64+0x60/0x1f0
+ entry_SYSCALL_64_after_hwframe+0x49/0xbe
+ RIP: 0033:0x7f699d33a9fb
+
+This regression problem was introduced by commit e581595ea29c ("ocfs: no
+need to check return value of debugfs_create functions").
+
+Link: http://lkml.kernel.org/r/20191225061501.13587-1-ghe@suse.com
+Fixes: e581595ea29c ("ocfs: no need to check return value of debugfs_create functions")
+Signed-off-by: Gang He <ghe@suse.com>
+Acked-by: Joseph Qi <joseph.qi@linux.alibaba.com>
+Cc: Mark Fasheh <mark@fasheh.com>
+Cc: Joel Becker <jlbec@evilplan.org>
+Cc: Junxiao Bi <junxiao.bi@oracle.com>
+Cc: Changwei Ge <gechangwei@live.cn>
+Cc: Gang He <ghe@suse.com>
+Cc: Jun Piao <piaojun@huawei.com>
+Cc: <stable@vger.kernel.org> [5.3+]
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ocfs2/dlmglue.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/fs/ocfs2/dlmglue.c
++++ b/fs/ocfs2/dlmglue.c
+@@ -3282,6 +3282,7 @@ static void ocfs2_dlm_init_debug(struct
+
+ debugfs_create_u32("locking_filter", 0600, osb->osb_debug_root,
+ &dlm_debug->d_filter_secs);
++ ocfs2_get_dlm_debug(dlm_debug);
+ }
+
+ static void ocfs2_dlm_shutdown_debug(struct ocfs2_super *osb)
--- /dev/null
+From 8df955a32a73315055e0cd187cbb1cea5820394b Mon Sep 17 00:00:00 2001
+From: Kees Cook <keescook@chromium.org>
+Date: Mon, 30 Dec 2019 11:48:10 -0800
+Subject: pstore/ram: Fix error-path memory leak in persistent_ram_new() callers
+
+From: Kees Cook <keescook@chromium.org>
+
+commit 8df955a32a73315055e0cd187cbb1cea5820394b upstream.
+
+For callers that allocated a label for persistent_ram_new(), if the call
+fails, they must clean up the allocation.
+
+Suggested-by: Navid Emamdoost <navid.emamdoost@gmail.com>
+Fixes: 1227daa43bce ("pstore/ram: Clarify resource reservation labels")
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/lkml/20191211191353.14385-1-navid.emamdoost@gmail.com
+Signed-off-by: Kees Cook <keescook@chromium.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/pstore/ram.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/fs/pstore/ram.c
++++ b/fs/pstore/ram.c
+@@ -588,6 +588,7 @@ static int ramoops_init_przs(const char
+ dev_err(dev, "failed to request %s mem region (0x%zx@0x%llx): %d\n",
+ name, record_size,
+ (unsigned long long)*paddr, err);
++ kfree(label);
+
+ while (i > 0) {
+ i--;
+@@ -633,6 +634,7 @@ static int ramoops_init_prz(const char *
+
+ dev_err(dev, "failed to request %s mem region (0x%zx@0x%llx): %d\n",
+ name, sz, (unsigned long long)*paddr, err);
++ kfree(label);
+ return err;
+ }
+
--- /dev/null
+From 9e5f1c19800b808a37fb9815a26d382132c26c3d Mon Sep 17 00:00:00 2001
+From: Aleksandr Yashkin <a.yashkin@inango-systems.com>
+Date: Mon, 23 Dec 2019 18:38:16 +0500
+Subject: pstore/ram: Write new dumps to start of recycled zones
+
+From: Aleksandr Yashkin <a.yashkin@inango-systems.com>
+
+commit 9e5f1c19800b808a37fb9815a26d382132c26c3d upstream.
+
+The ram_core.c routines treat przs as circular buffers. When writing a
+new crash dump, the old buffer needs to be cleared so that the new dump
+doesn't end up in the wrong place (i.e. at the end).
+
+The solution to this problem is to reset the circular buffer state before
+writing a new Oops dump.
+
+Signed-off-by: Aleksandr Yashkin <a.yashkin@inango-systems.com>
+Signed-off-by: Nikolay Merinov <n.merinov@inango-systems.com>
+Signed-off-by: Ariel Gilman <a.gilman@inango-systems.com>
+Link: https://lore.kernel.org/r/20191223133816.28155-1-n.merinov@inango-systems.com
+Fixes: 896fc1f0c4c6 ("pstore/ram: Switch to persistent_ram routines")
+Cc: stable@vger.kernel.org
+Signed-off-by: Kees Cook <keescook@chromium.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/pstore/ram.c | 11 +++++++++++
+ 1 file changed, 11 insertions(+)
+
+--- a/fs/pstore/ram.c
++++ b/fs/pstore/ram.c
+@@ -407,6 +407,17 @@ static int notrace ramoops_pstore_write(
+
+ prz = cxt->dprzs[cxt->dump_write_cnt];
+
++ /*
++ * Since this is a new crash dump, we need to reset the buffer in
++ * case it still has an old dump present. Without this, the new dump
++ * will get appended, which would seriously confuse anything trying
++ * to check dump file contents. Specifically, ramoops_read_kmsg_hdr()
++ * expects to find a dump header in the beginning of buffer data, so
++ * we must to reset the buffer values, in order to ensure that the
++ * header will be written to the beginning of the buffer.
++ */
++ persistent_ram_zap(prz);
++
+ /* Build header and append record contents. */
+ hlen = ramoops_write_kmsg_hdr(prz, record);
+ if (!hlen)
--- /dev/null
+From 771b894f2f3dfedc2ba5561731fffa0e39b1bbb6 Mon Sep 17 00:00:00 2001
+From: Sargun Dhillon <sargun@sargun.me>
+Date: Mon, 30 Dec 2019 12:35:03 -0800
+Subject: samples/seccomp: Zero out members based on seccomp_notif_sizes
+
+From: Sargun Dhillon <sargun@sargun.me>
+
+commit 771b894f2f3dfedc2ba5561731fffa0e39b1bbb6 upstream.
+
+The sizes by which seccomp_notif and seccomp_notif_resp are allocated are
+based on the SECCOMP_GET_NOTIF_SIZES ioctl. This allows for graceful
+extension of these datastructures. If userspace zeroes out the
+datastructure based on its version, and it is lagging behind the kernel's
+version, it will end up sending trailing garbage. On the other hand,
+if it is ahead of the kernel version, it will write extra zero space,
+and potentially cause corruption.
+
+Signed-off-by: Sargun Dhillon <sargun@sargun.me>
+Suggested-by: Tycho Andersen <tycho@tycho.ws>
+Link: https://lore.kernel.org/r/20191230203503.4925-1-sargun@sargun.me
+Fixes: fec7b6690541 ("samples: add an example of seccomp user trap")
+Cc: stable@vger.kernel.org
+Signed-off-by: Kees Cook <keescook@chromium.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ samples/seccomp/user-trap.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/samples/seccomp/user-trap.c
++++ b/samples/seccomp/user-trap.c
+@@ -298,14 +298,14 @@ int main(void)
+ req = malloc(sizes.seccomp_notif);
+ if (!req)
+ goto out_close;
+- memset(req, 0, sizeof(*req));
+
+ resp = malloc(sizes.seccomp_notif_resp);
+ if (!resp)
+ goto out_req;
+- memset(resp, 0, sizeof(*resp));
++ memset(resp, 0, sizes.seccomp_notif_resp);
+
+ while (1) {
++ memset(req, 0, sizes.seccomp_notif);
+ if (ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, req)) {
+ perror("ioctl recv");
+ goto out_resp;
--- /dev/null
+From 2882d53c9c6f3b8311d225062522f03772cf0179 Mon Sep 17 00:00:00 2001
+From: Sargun Dhillon <sargun@sargun.me>
+Date: Sat, 28 Dec 2019 22:24:50 -0800
+Subject: seccomp: Check that seccomp_notif is zeroed out by the user
+
+From: Sargun Dhillon <sargun@sargun.me>
+
+commit 2882d53c9c6f3b8311d225062522f03772cf0179 upstream.
+
+This patch is a small change in enforcement of the uapi for
+SECCOMP_IOCTL_NOTIF_RECV ioctl. Specifically, the datastructure which
+is passed (seccomp_notif) must be zeroed out. Previously any of its
+members could be set to nonsense values, and we would ignore it.
+
+This ensures all fields are set to their zero value.
+
+Signed-off-by: Sargun Dhillon <sargun@sargun.me>
+Reviewed-by: Christian Brauner <christian.brauner@ubuntu.com>
+Reviewed-by: Aleksa Sarai <cyphar@cyphar.com>
+Acked-by: Tycho Andersen <tycho@tycho.ws>
+Link: https://lore.kernel.org/r/20191229062451.9467-2-sargun@sargun.me
+Fixes: 6a21cc50f0c7 ("seccomp: add a return code to trap to userspace")
+Cc: stable@vger.kernel.org
+Signed-off-by: Kees Cook <keescook@chromium.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/seccomp.c | 7 +++++++
+ 1 file changed, 7 insertions(+)
+
+--- a/kernel/seccomp.c
++++ b/kernel/seccomp.c
+@@ -1015,6 +1015,13 @@ static long seccomp_notify_recv(struct s
+ struct seccomp_notif unotif;
+ ssize_t ret;
+
++ /* Verify that we're not given garbage to keep struct extensible. */
++ ret = check_zeroed_user(buf, sizeof(unotif));
++ if (ret < 0)
++ return ret;
++ if (!ret)
++ return -EINVAL;
++
+ memset(&unotif, 0, sizeof(unotif));
+
+ ret = down_interruptible(&filter->notif->request);
--- /dev/null
+From e4ab5ccc357b978999328fadae164e098c26fa40 Mon Sep 17 00:00:00 2001
+From: Sargun Dhillon <sargun@sargun.me>
+Date: Mon, 30 Dec 2019 12:38:11 -0800
+Subject: selftests/seccomp: Catch garbage on SECCOMP_IOCTL_NOTIF_RECV
+
+From: Sargun Dhillon <sargun@sargun.me>
+
+commit e4ab5ccc357b978999328fadae164e098c26fa40 upstream.
+
+This adds logic to the user_notification_basic test to set a member
+of struct seccomp_notif to an invalid value to ensure that the kernel
+returns EINVAL if any of the struct seccomp_notif members are set to
+invalid values.
+
+Signed-off-by: Sargun Dhillon <sargun@sargun.me>
+Suggested-by: Christian Brauner <christian.brauner@ubuntu.com>
+Link: https://lore.kernel.org/r/20191230203811.4996-1-sargun@sargun.me
+Fixes: 6a21cc50f0c7 ("seccomp: add a return code to trap to userspace")
+Cc: stable@vger.kernel.org
+Signed-off-by: Kees Cook <keescook@chromium.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ tools/testing/selftests/seccomp/seccomp_bpf.c | 13 ++++++++++++-
+ 1 file changed, 12 insertions(+), 1 deletion(-)
+
+--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
++++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
+@@ -3147,7 +3147,18 @@ TEST(user_notification_basic)
+ EXPECT_GT(poll(&pollfd, 1, -1), 0);
+ EXPECT_EQ(pollfd.revents, POLLIN);
+
+- EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
++ /* Test that we can't pass garbage to the kernel. */
++ memset(&req, 0, sizeof(req));
++ req.pid = -1;
++ errno = 0;
++ ret = ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req);
++ EXPECT_EQ(-1, ret);
++ EXPECT_EQ(EINVAL, errno);
++
++ if (ret) {
++ req.pid = 0;
++ EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
++ }
+
+ pollfd.fd = listener;
+ pollfd.events = POLLIN | POLLOUT;
--- /dev/null
+From 88c13f8bd71472fbab5338b01d99122908c77e53 Mon Sep 17 00:00:00 2001
+From: Sargun Dhillon <sargun@sargun.me>
+Date: Sat, 28 Dec 2019 22:24:49 -0800
+Subject: selftests/seccomp: Zero out seccomp_notif
+
+From: Sargun Dhillon <sargun@sargun.me>
+
+commit 88c13f8bd71472fbab5338b01d99122908c77e53 upstream.
+
+The seccomp_notif structure should be zeroed out prior to calling the
+SECCOMP_IOCTL_NOTIF_RECV ioctl. Previously, the kernel did not check
+whether these structures were zeroed out or not, so these worked.
+
+This patch zeroes out the seccomp_notif data structure prior to calling
+the ioctl.
+
+Signed-off-by: Sargun Dhillon <sargun@sargun.me>
+Reviewed-by: Tycho Andersen <tycho@tycho.ws>
+Reviewed-by: Christian Brauner <christian.brauner@ubuntu.com>
+Link: https://lore.kernel.org/r/20191229062451.9467-1-sargun@sargun.me
+Fixes: 6a21cc50f0c7 ("seccomp: add a return code to trap to userspace")
+Cc: stable@vger.kernel.org
+Signed-off-by: Kees Cook <keescook@chromium.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ tools/testing/selftests/seccomp/seccomp_bpf.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
++++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
+@@ -3267,6 +3267,7 @@ TEST(user_notification_signal)
+
+ close(sk_pair[1]);
+
++ memset(&req, 0, sizeof(req));
+ EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
+
+ EXPECT_EQ(kill(pid, SIGUSR1), 0);
+@@ -3285,6 +3286,7 @@ TEST(user_notification_signal)
+ EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), -1);
+ EXPECT_EQ(errno, ENOENT);
+
++ memset(&req, 0, sizeof(req));
+ EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
+
+ resp.id = req.id;
media-cec-cec-2.0-only-bcast-messages-were-ignored.patch
media-cec-avoid-decrementing-transmit_queue_sz-if-it-is-0.patch
media-cec-check-transmit_in_progress-not-transmitting.patch
+mm-memory_hotplug-shrink-zones-when-offlining-memory.patch
+mm-zsmalloc.c-fix-the-migrated-zspage-statistics.patch
+memcg-account-security-cred-as-well-to-kmemcg.patch
+mm-move_pages-return-valid-node-id-in-status-if-the-page-is-already-on-the-target-node.patch
+mm-oom-fix-pgtables-units-mismatch-in-killed-process-message.patch
+ocfs2-fix-the-crash-due-to-call-ocfs2_get_dlm_debug-once-less.patch
+pstore-ram-write-new-dumps-to-start-of-recycled-zones.patch
+pstore-ram-fix-error-path-memory-leak-in-persistent_ram_new-callers.patch
+gcc-plugins-make-it-possible-to-disable-config_gcc_plugins-again.patch
+locks-print-unsigned-ino-in-proc-locks.patch
+selftests-seccomp-zero-out-seccomp_notif.patch
+seccomp-check-that-seccomp_notif-is-zeroed-out-by-the-user.patch
+samples-seccomp-zero-out-members-based-on-seccomp_notif_sizes.patch
+selftests-seccomp-catch-garbage-on-seccomp_ioctl_notif_recv.patch
+dmaengine-fix-access-to-uninitialized-dma_slave_caps.patch
+dmaengine-dma-jz4780-also-break-descriptor-chains-on-jz4725b.patch
+btrfs-fix-infinite-loop-during-nocow-writeback-due-to-race.patch
+block-fix-splitting-segments-on-boundary-masks.patch
+compat_ioctl-block-handle-persistent-reservations.patch
+compat_ioctl-block-handle-blkreportzone-blkresetzone.patch
+compat_ioctl-block-handle-blkgetzonesz-blkgetnrzones.patch
+bpf-fix-precision-tracking-for-unbounded-scalars.patch