From: Greg Kroah-Hartman Date: Mon, 6 Jan 2020 12:47:23 +0000 (+0100) Subject: 5.4-stable patches X-Git-Tag: v4.14.163~44 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=16dc38991eb1389dff33cb93c1037b1d8d34b7ca;p=thirdparty%2Fkernel%2Fstable-queue.git 5.4-stable patches added patches: block-fix-splitting-segments-on-boundary-masks.patch bpf-fix-precision-tracking-for-unbounded-scalars.patch btrfs-fix-infinite-loop-during-nocow-writeback-due-to-race.patch compat_ioctl-block-handle-blkgetzonesz-blkgetnrzones.patch compat_ioctl-block-handle-blkreportzone-blkresetzone.patch compat_ioctl-block-handle-persistent-reservations.patch dmaengine-dma-jz4780-also-break-descriptor-chains-on-jz4725b.patch dmaengine-fix-access-to-uninitialized-dma_slave_caps.patch gcc-plugins-make-it-possible-to-disable-config_gcc_plugins-again.patch locks-print-unsigned-ino-in-proc-locks.patch memcg-account-security-cred-as-well-to-kmemcg.patch mm-memory_hotplug-shrink-zones-when-offlining-memory.patch mm-move_pages-return-valid-node-id-in-status-if-the-page-is-already-on-the-target-node.patch mm-oom-fix-pgtables-units-mismatch-in-killed-process-message.patch mm-zsmalloc.c-fix-the-migrated-zspage-statistics.patch ocfs2-fix-the-crash-due-to-call-ocfs2_get_dlm_debug-once-less.patch pstore-ram-fix-error-path-memory-leak-in-persistent_ram_new-callers.patch pstore-ram-write-new-dumps-to-start-of-recycled-zones.patch samples-seccomp-zero-out-members-based-on-seccomp_notif_sizes.patch seccomp-check-that-seccomp_notif-is-zeroed-out-by-the-user.patch selftests-seccomp-catch-garbage-on-seccomp_ioctl_notif_recv.patch selftests-seccomp-zero-out-seccomp_notif.patch --- diff --git a/queue-5.4/block-fix-splitting-segments-on-boundary-masks.patch b/queue-5.4/block-fix-splitting-segments-on-boundary-masks.patch new file mode 100644 index 00000000000..fe44b7ebc3c --- /dev/null +++ b/queue-5.4/block-fix-splitting-segments-on-boundary-masks.patch @@ -0,0 +1,86 @@ +From 429120f3df2dba2bf3a4a19f4212a53ecefc7102 Mon Sep 17 00:00:00 2001 +From: Ming Lei +Date: Sun, 29 Dec 2019 10:32:30 +0800 +Subject: block: fix splitting segments on boundary masks + +From: Ming Lei + +commit 429120f3df2dba2bf3a4a19f4212a53ecefc7102 upstream. + +We ran into a problem with a mpt3sas based controller, where we would +see random (and hard to reproduce) file corruption). The issue seemed +specific to this controller, but wasn't specific to the file system. +After a lot of debugging, we find out that it's caused by segments +spanning a 4G memory boundary. This shouldn't happen, as the default +setting for segment boundary masks is 4G. + +Turns out there are two issues in get_max_segment_size(): + +1) The default segment boundary mask is bypassed + +2) The segment start address isn't taken into account when checking + segment boundary limit + +Fix these two issues by removing the bypass of the segment boundary +check even if the mask is set to the default value, and taking into +account the actual start address of the request when checking if a +segment needs splitting. + +Cc: stable@vger.kernel.org # v5.1+ +Reviewed-by: Chris Mason +Tested-by: Chris Mason +Fixes: dcebd755926b ("block: use bio_for_each_bvec() to compute multi-page bvec count") +Signed-off-by: Ming Lei +Dropped const on the page pointer, ppc page_to_phys() doesn't mark the +page as const... +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman + +--- + block/blk-merge.c | 18 +++++++++--------- + 1 file changed, 9 insertions(+), 9 deletions(-) + +--- a/block/blk-merge.c ++++ b/block/blk-merge.c +@@ -157,16 +157,14 @@ static inline unsigned get_max_io_size(s + return sectors & (lbs - 1); + } + +-static unsigned get_max_segment_size(const struct request_queue *q, +- unsigned offset) ++static inline unsigned get_max_segment_size(const struct request_queue *q, ++ struct page *start_page, ++ unsigned long offset) + { + unsigned long mask = queue_segment_boundary(q); + +- /* default segment boundary mask means no boundary limit */ +- if (mask == BLK_SEG_BOUNDARY_MASK) +- return queue_max_segment_size(q); +- +- return min_t(unsigned long, mask - (mask & offset) + 1, ++ offset = mask & (page_to_phys(start_page) + offset); ++ return min_t(unsigned long, mask - offset + 1, + queue_max_segment_size(q)); + } + +@@ -201,7 +199,8 @@ static bool bvec_split_segs(const struct + unsigned seg_size = 0; + + while (len && *nsegs < max_segs) { +- seg_size = get_max_segment_size(q, bv->bv_offset + total_len); ++ seg_size = get_max_segment_size(q, bv->bv_page, ++ bv->bv_offset + total_len); + seg_size = min(seg_size, len); + + (*nsegs)++; +@@ -404,7 +403,8 @@ static unsigned blk_bvec_map_sg(struct r + + while (nbytes > 0) { + unsigned offset = bvec->bv_offset + total; +- unsigned len = min(get_max_segment_size(q, offset), nbytes); ++ unsigned len = min(get_max_segment_size(q, bvec->bv_page, ++ offset), nbytes); + struct page *page = bvec->bv_page; + + /* diff --git a/queue-5.4/bpf-fix-precision-tracking-for-unbounded-scalars.patch b/queue-5.4/bpf-fix-precision-tracking-for-unbounded-scalars.patch new file mode 100644 index 00000000000..ea5f8569798 --- /dev/null +++ b/queue-5.4/bpf-fix-precision-tracking-for-unbounded-scalars.patch @@ -0,0 +1,259 @@ +From f54c7898ed1c3c9331376c0337a5049c38f66497 Mon Sep 17 00:00:00 2001 +From: Daniel Borkmann +Date: Sun, 22 Dec 2019 23:37:40 +0100 +Subject: bpf: Fix precision tracking for unbounded scalars + +From: Daniel Borkmann + +commit f54c7898ed1c3c9331376c0337a5049c38f66497 upstream. + +Anatoly has been fuzzing with kBdysch harness and reported a hang in one +of the outcomes. Upon closer analysis, it turns out that precise scalar +value tracking is missing a few precision markings for unknown scalars: + + 0: R1=ctx(id=0,off=0,imm=0) R10=fp0 + 0: (b7) r0 = 0 + 1: R0_w=invP0 R1=ctx(id=0,off=0,imm=0) R10=fp0 + 1: (35) if r0 >= 0xf72e goto pc+0 + --> only follow fallthrough + 2: R0_w=invP0 R1=ctx(id=0,off=0,imm=0) R10=fp0 + 2: (35) if r0 >= 0x80fe0000 goto pc+0 + --> only follow fallthrough + 3: R0_w=invP0 R1=ctx(id=0,off=0,imm=0) R10=fp0 + 3: (14) w0 -= -536870912 + 4: R0_w=invP536870912 R1=ctx(id=0,off=0,imm=0) R10=fp0 + 4: (0f) r1 += r0 + 5: R0_w=invP536870912 R1_w=inv(id=0) R10=fp0 + 5: (55) if r1 != 0x104c1500 goto pc+0 + --> push other branch for later analysis + R0_w=invP536870912 R1_w=inv273421568 R10=fp0 + 6: R0_w=invP536870912 R1_w=inv273421568 R10=fp0 + 6: (b7) r0 = 0 + 7: R0=invP0 R1=inv273421568 R10=fp0 + 7: (76) if w1 s>= 0xffffff00 goto pc+3 + --> only follow goto + 11: R0=invP0 R1=inv273421568 R10=fp0 + 11: (95) exit + 6: R0_w=invP536870912 R1_w=inv(id=0) R10=fp0 + 6: (b7) r0 = 0 + propagating r0 + 7: safe + processed 11 insns [...] + +In the analysis of the second path coming after the successful exit above, +the path is being pruned at line 7. Pruning analysis found that both r0 are +precise P0 and both R1 are non-precise scalars and given prior path with +R1 as non-precise scalar succeeded, this one is therefore safe as well. + +However, problem is that given condition at insn 7 in the first run, we only +followed goto and didn't push the other branch for later analysis, we've +never walked the few insns in there and therefore dead-code sanitation +rewrites it as goto pc-1, causing the hang depending on the skb address +hitting these conditions. The issue is that R1 should have been marked as +precise as well such that pruning enforces range check and conluded that new +R1 is not in range of old R1. In insn 4, we mark R1 (skb) as unknown scalar +via __mark_reg_unbounded() but not mark_reg_unbounded() and therefore +regs->precise remains as false. + +Back in b5dc0163d8fd ("bpf: precise scalar_value tracking"), this was not +the case since marking out of __mark_reg_unbounded() had this covered as well. +Once in both are set as precise in 4 as they should have been, we conclude +that given R1 was in prior fall-through path 0x104c1500 and now is completely +unknown, the check at insn 7 concludes that we need to continue walking. +Analysis after the fix: + + 0: R1=ctx(id=0,off=0,imm=0) R10=fp0 + 0: (b7) r0 = 0 + 1: R0_w=invP0 R1=ctx(id=0,off=0,imm=0) R10=fp0 + 1: (35) if r0 >= 0xf72e goto pc+0 + 2: R0_w=invP0 R1=ctx(id=0,off=0,imm=0) R10=fp0 + 2: (35) if r0 >= 0x80fe0000 goto pc+0 + 3: R0_w=invP0 R1=ctx(id=0,off=0,imm=0) R10=fp0 + 3: (14) w0 -= -536870912 + 4: R0_w=invP536870912 R1=ctx(id=0,off=0,imm=0) R10=fp0 + 4: (0f) r1 += r0 + 5: R0_w=invP536870912 R1_w=invP(id=0) R10=fp0 + 5: (55) if r1 != 0x104c1500 goto pc+0 + R0_w=invP536870912 R1_w=invP273421568 R10=fp0 + 6: R0_w=invP536870912 R1_w=invP273421568 R10=fp0 + 6: (b7) r0 = 0 + 7: R0=invP0 R1=invP273421568 R10=fp0 + 7: (76) if w1 s>= 0xffffff00 goto pc+3 + 11: R0=invP0 R1=invP273421568 R10=fp0 + 11: (95) exit + 6: R0_w=invP536870912 R1_w=invP(id=0) R10=fp0 + 6: (b7) r0 = 0 + 7: R0_w=invP0 R1_w=invP(id=0) R10=fp0 + 7: (76) if w1 s>= 0xffffff00 goto pc+3 + R0_w=invP0 R1_w=invP(id=0) R10=fp0 + 8: R0_w=invP0 R1_w=invP(id=0) R10=fp0 + 8: (a5) if r0 < 0x2007002a goto pc+0 + 9: R0_w=invP0 R1_w=invP(id=0) R10=fp0 + 9: (57) r0 &= -16316416 + 10: R0_w=invP0 R1_w=invP(id=0) R10=fp0 + 10: (a6) if w0 < 0x1201 goto pc+0 + 11: R0_w=invP0 R1_w=invP(id=0) R10=fp0 + 11: (95) exit + 11: R0=invP0 R1=invP(id=0) R10=fp0 + 11: (95) exit + processed 16 insns [...] + +Fixes: 6754172c208d ("bpf: fix precision tracking in presence of bpf2bpf calls") +Reported-by: Anatoly Trosinenko +Signed-off-by: Daniel Borkmann +Signed-off-by: Alexei Starovoitov +Link: https://lore.kernel.org/bpf/20191222223740.25297-1-daniel@iogearbox.net +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/bpf/verifier.c | 43 ++++++++++++++++++++++--------------------- + 1 file changed, 22 insertions(+), 21 deletions(-) + +--- a/kernel/bpf/verifier.c ++++ b/kernel/bpf/verifier.c +@@ -852,7 +852,8 @@ static const int caller_saved[CALLER_SAV + BPF_REG_0, BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4, BPF_REG_5 + }; + +-static void __mark_reg_not_init(struct bpf_reg_state *reg); ++static void __mark_reg_not_init(const struct bpf_verifier_env *env, ++ struct bpf_reg_state *reg); + + /* Mark the unknown part of a register (variable offset or scalar value) as + * known to have the value @imm. +@@ -890,7 +891,7 @@ static void mark_reg_known_zero(struct b + verbose(env, "mark_reg_known_zero(regs, %u)\n", regno); + /* Something bad happened, let's kill all regs */ + for (regno = 0; regno < MAX_BPF_REG; regno++) +- __mark_reg_not_init(regs + regno); ++ __mark_reg_not_init(env, regs + regno); + return; + } + __mark_reg_known_zero(regs + regno); +@@ -999,7 +1000,8 @@ static void __mark_reg_unbounded(struct + } + + /* Mark a register as having a completely unknown (scalar) value. */ +-static void __mark_reg_unknown(struct bpf_reg_state *reg) ++static void __mark_reg_unknown(const struct bpf_verifier_env *env, ++ struct bpf_reg_state *reg) + { + /* + * Clear type, id, off, and union(map_ptr, range) and +@@ -1009,6 +1011,8 @@ static void __mark_reg_unknown(struct bp + reg->type = SCALAR_VALUE; + reg->var_off = tnum_unknown; + reg->frameno = 0; ++ reg->precise = env->subprog_cnt > 1 || !env->allow_ptr_leaks ? ++ true : false; + __mark_reg_unbounded(reg); + } + +@@ -1019,19 +1023,16 @@ static void mark_reg_unknown(struct bpf_ + verbose(env, "mark_reg_unknown(regs, %u)\n", regno); + /* Something bad happened, let's kill all regs except FP */ + for (regno = 0; regno < BPF_REG_FP; regno++) +- __mark_reg_not_init(regs + regno); ++ __mark_reg_not_init(env, regs + regno); + return; + } +- regs += regno; +- __mark_reg_unknown(regs); +- /* constant backtracking is enabled for root without bpf2bpf calls */ +- regs->precise = env->subprog_cnt > 1 || !env->allow_ptr_leaks ? +- true : false; ++ __mark_reg_unknown(env, regs + regno); + } + +-static void __mark_reg_not_init(struct bpf_reg_state *reg) ++static void __mark_reg_not_init(const struct bpf_verifier_env *env, ++ struct bpf_reg_state *reg) + { +- __mark_reg_unknown(reg); ++ __mark_reg_unknown(env, reg); + reg->type = NOT_INIT; + } + +@@ -1042,10 +1043,10 @@ static void mark_reg_not_init(struct bpf + verbose(env, "mark_reg_not_init(regs, %u)\n", regno); + /* Something bad happened, let's kill all regs except FP */ + for (regno = 0; regno < BPF_REG_FP; regno++) +- __mark_reg_not_init(regs + regno); ++ __mark_reg_not_init(env, regs + regno); + return; + } +- __mark_reg_not_init(regs + regno); ++ __mark_reg_not_init(env, regs + regno); + } + + #define DEF_NOT_SUBREG (0) +@@ -3066,7 +3067,7 @@ static int check_stack_boundary(struct b + } + if (state->stack[spi].slot_type[0] == STACK_SPILL && + state->stack[spi].spilled_ptr.type == SCALAR_VALUE) { +- __mark_reg_unknown(&state->stack[spi].spilled_ptr); ++ __mark_reg_unknown(env, &state->stack[spi].spilled_ptr); + for (j = 0; j < BPF_REG_SIZE; j++) + state->stack[spi].slot_type[j] = STACK_MISC; + goto mark; +@@ -3706,7 +3707,7 @@ static void __clear_all_pkt_pointers(str + if (!reg) + continue; + if (reg_is_pkt_pointer_any(reg)) +- __mark_reg_unknown(reg); ++ __mark_reg_unknown(env, reg); + } + } + +@@ -3734,7 +3735,7 @@ static void release_reg_references(struc + if (!reg) + continue; + if (reg->ref_obj_id == ref_obj_id) +- __mark_reg_unknown(reg); ++ __mark_reg_unknown(env, reg); + } + } + +@@ -4357,7 +4358,7 @@ static int adjust_ptr_min_max_vals(struc + /* Taint dst register if offset had invalid bounds derived from + * e.g. dead branches. + */ +- __mark_reg_unknown(dst_reg); ++ __mark_reg_unknown(env, dst_reg); + return 0; + } + +@@ -4609,13 +4610,13 @@ static int adjust_scalar_min_max_vals(st + /* Taint dst register if offset had invalid bounds derived from + * e.g. dead branches. + */ +- __mark_reg_unknown(dst_reg); ++ __mark_reg_unknown(env, dst_reg); + return 0; + } + + if (!src_known && + opcode != BPF_ADD && opcode != BPF_SUB && opcode != BPF_AND) { +- __mark_reg_unknown(dst_reg); ++ __mark_reg_unknown(env, dst_reg); + return 0; + } + +@@ -6746,7 +6747,7 @@ static void clean_func_state(struct bpf_ + /* since the register is unused, clear its state + * to make further comparison simpler + */ +- __mark_reg_not_init(&st->regs[i]); ++ __mark_reg_not_init(env, &st->regs[i]); + } + + for (i = 0; i < st->allocated_stack / BPF_REG_SIZE; i++) { +@@ -6754,7 +6755,7 @@ static void clean_func_state(struct bpf_ + /* liveness must not touch this stack slot anymore */ + st->stack[i].spilled_ptr.live |= REG_LIVE_DONE; + if (!(live & REG_LIVE_READ)) { +- __mark_reg_not_init(&st->stack[i].spilled_ptr); ++ __mark_reg_not_init(env, &st->stack[i].spilled_ptr); + for (j = 0; j < BPF_REG_SIZE; j++) + st->stack[i].slot_type[j] = STACK_INVALID; + } diff --git a/queue-5.4/btrfs-fix-infinite-loop-during-nocow-writeback-due-to-race.patch b/queue-5.4/btrfs-fix-infinite-loop-during-nocow-writeback-due-to-race.patch new file mode 100644 index 00000000000..4c7d531fada --- /dev/null +++ b/queue-5.4/btrfs-fix-infinite-loop-during-nocow-writeback-due-to-race.patch @@ -0,0 +1,212 @@ +From de7999afedff02c6631feab3ea726a0e8f8c3d40 Mon Sep 17 00:00:00 2001 +From: Filipe Manana +Date: Wed, 11 Dec 2019 09:01:40 +0000 +Subject: Btrfs: fix infinite loop during nocow writeback due to race + +From: Filipe Manana + +commit de7999afedff02c6631feab3ea726a0e8f8c3d40 upstream. + +When starting writeback for a range that covers part of a preallocated +extent, due to a race with writeback for another range that also covers +another part of the same preallocated extent, we can end up in an infinite +loop. + +Consider the following example where for inode 280 we have two dirty +ranges: + + range A, from 294912 to 303103, 8192 bytes + range B, from 348160 to 438271, 90112 bytes + +and we have the following file extent item layout for our inode: + + leaf 38895616 gen 24544 total ptrs 29 free space 13820 owner 5 + (...) + item 27 key (280 108 200704) itemoff 14598 itemsize 53 + extent data disk bytenr 0 nr 0 type 1 (regular) + extent data offset 0 nr 94208 ram 94208 + item 28 key (280 108 294912) itemoff 14545 itemsize 53 + extent data disk bytenr 10433052672 nr 81920 type 2 (prealloc) + extent data offset 0 nr 81920 ram 81920 + +Then the following happens: + +1) Writeback starts for range B (from 348160 to 438271), execution of + run_delalloc_nocow() starts; + +2) The first iteration of run_delalloc_nocow()'s whil loop leaves us at + the extent item at slot 28, pointing to the prealloc extent item + covering the range from 294912 to 376831. This extent covers part of + our range; + +3) An ordered extent is created against that extent, covering the file + range from 348160 to 376831 (28672 bytes); + +4) We adjust 'cur_offset' to 376832 and move on to the next iteration of + the while loop; + +5) The call to btrfs_lookup_file_extent() leaves us at the same leaf, + pointing to slot 29, 1 slot after the last item (the extent item + we processed in the previous iteration); + +6) Because we are a slot beyond the last item, we call btrfs_next_leaf(), + which releases the search path before doing a another search for the + last key of the leaf (280 108 294912); + +7) Right after btrfs_next_leaf() released the path, and before it did + another search for the last key of the leaf, writeback for the range + A (from 294912 to 303103) completes (it was previously started at + some point); + +8) Upon completion of the ordered extent for range A, the prealloc extent + we previously found got split into two extent items, one covering the + range from 294912 to 303103 (8192 bytes), with a type of regular extent + (and no longer prealloc) and another covering the range from 303104 to + 376831 (73728 bytes), with a type of prealloc and an offset of 8192 + bytes. So our leaf now has the following layout: + + leaf 38895616 gen 24544 total ptrs 31 free space 13664 owner 5 + (...) + item 27 key (280 108 200704) itemoff 14598 itemsize 53 + extent data disk bytenr 0 nr 0 type 1 + extent data offset 0 nr 8192 ram 94208 + item 28 key (280 108 208896) itemoff 14545 itemsize 53 + extent data disk bytenr 10433142784 nr 86016 type 1 + extent data offset 0 nr 86016 ram 86016 + item 29 key (280 108 294912) itemoff 14492 itemsize 53 + extent data disk bytenr 10433052672 nr 81920 type 1 + extent data offset 0 nr 8192 ram 81920 + item 30 key (280 108 303104) itemoff 14439 itemsize 53 + extent data disk bytenr 10433052672 nr 81920 type 2 + extent data offset 8192 nr 73728 ram 81920 + +9) After btrfs_next_leaf() returns, we have our path pointing to that same + leaf and at slot 30, since it has a key we didn't have before and it's + the first key greater then the key that was previously the last key of + the leaf (key (280 108 294912)); + +10) The extent item at slot 30 covers the range from 303104 to 376831 + which is in our target range, so we process it, despite having already + created an ordered extent against this extent for the file range from + 348160 to 376831. This is because we skip to the next extent item only + if its end is less than or equals to the start of our delalloc range, + and not less than or equals to the current offset ('cur_offset'); + +11) As a result we compute 'num_bytes' as: + + num_bytes = min(end + 1, extent_end) - cur_offset; + = min(438271 + 1, 376832) - 376832 = 0 + +12) We then call create_io_em() for a 0 bytes range starting at offset + 376832; + +13) Then create_io_em() enters an infinite loop because its calls to + btrfs_drop_extent_cache() do nothing due to the 0 length range + passed to it. So no existing extent maps that cover the offset + 376832 get removed, and therefore calls to add_extent_mapping() + return -EEXIST, resulting in an infinite loop. This loop from + create_io_em() is the following: + + do { + btrfs_drop_extent_cache(BTRFS_I(inode), em->start, + em->start + em->len - 1, 0); + write_lock(&em_tree->lock); + ret = add_extent_mapping(em_tree, em, 1); + write_unlock(&em_tree->lock); + /* + * The caller has taken lock_extent(), who could race with us + * to add em? + */ + } while (ret == -EEXIST); + +Also, each call to btrfs_drop_extent_cache() triggers a warning because +the start offset passed to it (376832) is smaller then the end offset +(376832 - 1) passed to it by -1, due to the 0 length: + + [258532.052621] ------------[ cut here ]------------ + [258532.052643] WARNING: CPU: 0 PID: 9987 at fs/btrfs/file.c:602 btrfs_drop_extent_cache+0x3f4/0x590 [btrfs] + (...) + [258532.052672] CPU: 0 PID: 9987 Comm: fsx Tainted: G W 5.4.0-rc7-btrfs-next-64 #1 + [258532.052673] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.0-0-ga698c8995f-prebuilt.qemu.org 04/01/2014 + [258532.052691] RIP: 0010:btrfs_drop_extent_cache+0x3f4/0x590 [btrfs] + (...) + [258532.052695] RSP: 0018:ffffb4be0153f860 EFLAGS: 00010287 + [258532.052700] RAX: ffff975b445ee360 RBX: ffff975b44eb3e08 RCX: 0000000000000000 + [258532.052700] RDX: 0000000000038fff RSI: 0000000000039000 RDI: ffff975b445ee308 + [258532.052700] RBP: 0000000000038fff R08: 0000000000000000 R09: 0000000000000001 + [258532.052701] R10: ffff975b513c5c10 R11: 00000000e3c0cfa9 R12: 0000000000039000 + [258532.052703] R13: ffff975b445ee360 R14: 00000000ffffffef R15: ffff975b445ee308 + [258532.052705] FS: 00007f86a821de80(0000) GS:ffff975b76a00000(0000) knlGS:0000000000000000 + [258532.052707] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 + [258532.052708] CR2: 00007fdacf0f3ab4 CR3: 00000001f9d26002 CR4: 00000000003606f0 + [258532.052712] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 + [258532.052717] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 + [258532.052717] Call Trace: + [258532.052718] ? preempt_schedule_common+0x32/0x70 + [258532.052722] ? ___preempt_schedule+0x16/0x20 + [258532.052741] create_io_em+0xff/0x180 [btrfs] + [258532.052767] run_delalloc_nocow+0x942/0xb10 [btrfs] + [258532.052791] btrfs_run_delalloc_range+0x30b/0x520 [btrfs] + [258532.052812] ? find_lock_delalloc_range+0x221/0x250 [btrfs] + [258532.052834] writepage_delalloc+0xe4/0x140 [btrfs] + [258532.052855] __extent_writepage+0x110/0x4e0 [btrfs] + [258532.052876] extent_write_cache_pages+0x21c/0x480 [btrfs] + [258532.052906] extent_writepages+0x52/0xb0 [btrfs] + [258532.052911] do_writepages+0x23/0x80 + [258532.052915] __filemap_fdatawrite_range+0xd2/0x110 + [258532.052938] btrfs_fdatawrite_range+0x1b/0x50 [btrfs] + [258532.052954] start_ordered_ops+0x57/0xa0 [btrfs] + [258532.052973] ? btrfs_sync_file+0x225/0x490 [btrfs] + [258532.052988] btrfs_sync_file+0x225/0x490 [btrfs] + [258532.052997] __x64_sys_msync+0x199/0x200 + [258532.053004] do_syscall_64+0x5c/0x250 + [258532.053007] entry_SYSCALL_64_after_hwframe+0x49/0xbe + [258532.053010] RIP: 0033:0x7f86a7dfd760 + (...) + [258532.053014] RSP: 002b:00007ffd99af0368 EFLAGS: 00000246 ORIG_RAX: 000000000000001a + [258532.053016] RAX: ffffffffffffffda RBX: 0000000000000ec9 RCX: 00007f86a7dfd760 + [258532.053017] RDX: 0000000000000004 RSI: 000000000000836c RDI: 00007f86a8221000 + [258532.053019] RBP: 0000000000021ec9 R08: 0000000000000003 R09: 00007f86a812037c + [258532.053020] R10: 0000000000000001 R11: 0000000000000246 R12: 00000000000074a3 + [258532.053021] R13: 00007f86a8221000 R14: 000000000000836c R15: 0000000000000001 + [258532.053032] irq event stamp: 1653450494 + [258532.053035] hardirqs last enabled at (1653450493): [] _raw_spin_unlock_irq+0x29/0x50 + [258532.053037] hardirqs last disabled at (1653450494): [] trace_hardirqs_off_thunk+0x1a/0x20 + [258532.053039] softirqs last enabled at (1653449852): [] __do_softirq+0x466/0x6bd + [258532.053042] softirqs last disabled at (1653449845): [] irq_exit+0xec/0x120 + [258532.053043] ---[ end trace 8476fce13d9ce20a ]--- + +Which results in flooding dmesg/syslog since btrfs_drop_extent_cache() +uses WARN_ON() and not WARN_ON_ONCE(). + +So fix this issue by changing run_delalloc_nocow()'s loop to move to the +next extent item when the current extent item ends at at offset less than +or equals to the current offset instead of the start offset. + +Fixes: 80ff385665b7fc ("Btrfs: update nodatacow code v2") +CC: stable@vger.kernel.org # 4.4+ +Reviewed-by: Josef Bacik +Signed-off-by: Filipe Manana +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman + +--- + fs/btrfs/inode.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +--- a/fs/btrfs/inode.c ++++ b/fs/btrfs/inode.c +@@ -1439,10 +1439,10 @@ next_slot: + disk_num_bytes = + btrfs_file_extent_disk_num_bytes(leaf, fi); + /* +- * If extent we got ends before our range starts, skip +- * to next extent ++ * If the extent we got ends before our current offset, ++ * skip to the next extent. + */ +- if (extent_end <= start) { ++ if (extent_end <= cur_offset) { + path->slots[0]++; + goto next_slot; + } diff --git a/queue-5.4/compat_ioctl-block-handle-blkgetzonesz-blkgetnrzones.patch b/queue-5.4/compat_ioctl-block-handle-blkgetzonesz-blkgetnrzones.patch new file mode 100644 index 00000000000..da6dc1ce92a --- /dev/null +++ b/queue-5.4/compat_ioctl-block-handle-blkgetzonesz-blkgetnrzones.patch @@ -0,0 +1,35 @@ +From 21d37340912d74b1222d43c11aa9dd0687162573 Mon Sep 17 00:00:00 2001 +From: Arnd Bergmann +Date: Fri, 29 Nov 2019 11:28:22 +0100 +Subject: compat_ioctl: block: handle BLKGETZONESZ/BLKGETNRZONES + +From: Arnd Bergmann + +commit 21d37340912d74b1222d43c11aa9dd0687162573 upstream. + +These were added to blkdev_ioctl() in v4.20 but not blkdev_compat_ioctl, +so add them now. + +Cc: # v4.20+ +Fixes: 72cd87576d1d ("block: Introduce BLKGETZONESZ ioctl") +Fixes: 65e4e3eee83d ("block: Introduce BLKGETNRZONES ioctl") +Reviewed-by: Damien Le Moal +Signed-off-by: Arnd Bergmann +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman + +--- + block/compat_ioctl.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/block/compat_ioctl.c ++++ b/block/compat_ioctl.c +@@ -357,6 +357,8 @@ long compat_blkdev_ioctl(struct file *fi + case BLKRRPART: + case BLKREPORTZONE: + case BLKRESETZONE: ++ case BLKGETZONESZ: ++ case BLKGETNRZONES: + return blkdev_ioctl(bdev, mode, cmd, + (unsigned long)compat_ptr(arg)); + case BLKBSZSET_32: diff --git a/queue-5.4/compat_ioctl-block-handle-blkreportzone-blkresetzone.patch b/queue-5.4/compat_ioctl-block-handle-blkreportzone-blkresetzone.patch new file mode 100644 index 00000000000..653712f78d9 --- /dev/null +++ b/queue-5.4/compat_ioctl-block-handle-blkreportzone-blkresetzone.patch @@ -0,0 +1,34 @@ +From 673bdf8ce0a387ef585c13b69a2676096c6edfe9 Mon Sep 17 00:00:00 2001 +From: Arnd Bergmann +Date: Fri, 29 Nov 2019 11:28:22 +0100 +Subject: compat_ioctl: block: handle BLKREPORTZONE/BLKRESETZONE + +From: Arnd Bergmann + +commit 673bdf8ce0a387ef585c13b69a2676096c6edfe9 upstream. + +These were added to blkdev_ioctl() but not blkdev_compat_ioctl, +so add them now. + +Cc: # v4.10+ +Fixes: 3ed05a987e0f ("blk-zoned: implement ioctls") +Reviewed-by: Damien Le Moal +Signed-off-by: Arnd Bergmann +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman + +--- + block/compat_ioctl.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/block/compat_ioctl.c ++++ b/block/compat_ioctl.c +@@ -355,6 +355,8 @@ long compat_blkdev_ioctl(struct file *fi + * but we call blkdev_ioctl, which gets the lock for us + */ + case BLKRRPART: ++ case BLKREPORTZONE: ++ case BLKRESETZONE: + return blkdev_ioctl(bdev, mode, cmd, + (unsigned long)compat_ptr(arg)); + case BLKBSZSET_32: diff --git a/queue-5.4/compat_ioctl-block-handle-persistent-reservations.patch b/queue-5.4/compat_ioctl-block-handle-persistent-reservations.patch new file mode 100644 index 00000000000..a749dbd3418 --- /dev/null +++ b/queue-5.4/compat_ioctl-block-handle-persistent-reservations.patch @@ -0,0 +1,50 @@ +From b2c0fcd28772f99236d261509bcd242135677965 Mon Sep 17 00:00:00 2001 +From: Arnd Bergmann +Date: Fri, 29 Nov 2019 11:28:22 +0100 +Subject: compat_ioctl: block: handle Persistent Reservations + +From: Arnd Bergmann + +commit b2c0fcd28772f99236d261509bcd242135677965 upstream. + +These were added to blkdev_ioctl() in linux-5.5 but not +blkdev_compat_ioctl, so add them now. + +Cc: # v4.4+ +Fixes: bbd3e064362e ("block: add an API for Persistent Reservations") +Signed-off-by: Arnd Bergmann +Signed-off-by: Greg Kroah-Hartman + +Fold in followup patch from Arnd with missing pr.h header include. + +Signed-off-by: Jens Axboe + +--- + block/compat_ioctl.c | 9 +++++++++ + 1 file changed, 9 insertions(+) + +--- a/block/compat_ioctl.c ++++ b/block/compat_ioctl.c +@@ -6,6 +6,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -401,6 +402,14 @@ long compat_blkdev_ioctl(struct file *fi + case BLKTRACETEARDOWN: /* compatible */ + ret = blk_trace_ioctl(bdev, cmd, compat_ptr(arg)); + return ret; ++ case IOC_PR_REGISTER: ++ case IOC_PR_RESERVE: ++ case IOC_PR_RELEASE: ++ case IOC_PR_PREEMPT: ++ case IOC_PR_PREEMPT_ABORT: ++ case IOC_PR_CLEAR: ++ return blkdev_ioctl(bdev, mode, cmd, ++ (unsigned long)compat_ptr(arg)); + default: + if (disk->fops->compat_ioctl) + ret = disk->fops->compat_ioctl(bdev, mode, cmd, arg); diff --git a/queue-5.4/dmaengine-dma-jz4780-also-break-descriptor-chains-on-jz4725b.patch b/queue-5.4/dmaengine-dma-jz4780-also-break-descriptor-chains-on-jz4725b.patch new file mode 100644 index 00000000000..de1ebddedb7 --- /dev/null +++ b/queue-5.4/dmaengine-dma-jz4780-also-break-descriptor-chains-on-jz4725b.patch @@ -0,0 +1,41 @@ +From a40c94be2336f3002563c9ae16572143ae3422e2 Mon Sep 17 00:00:00 2001 +From: Paul Cercueil +Date: Tue, 10 Dec 2019 17:55:45 +0100 +Subject: dmaengine: dma-jz4780: Also break descriptor chains on JZ4725B + +From: Paul Cercueil + +commit a40c94be2336f3002563c9ae16572143ae3422e2 upstream. + +It turns out that the JZ4725B displays the same buggy behaviour as the +JZ4740 that was described in commit f4c255f1a747 ("dmaengine: dma-jz4780: +Break descriptor chains on JZ4740"). + +Work around it by using the same workaround previously used for the +JZ4740. + +Fixes commit f4c255f1a747 ("dmaengine: dma-jz4780: Break descriptor +chains on JZ4740") + +Cc: +Signed-off-by: Paul Cercueil +Link: https://lore.kernel.org/r/20191210165545.59690-1-paul@crapouillou.net +Signed-off-by: Vinod Koul +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/dma/dma-jz4780.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/drivers/dma/dma-jz4780.c ++++ b/drivers/dma/dma-jz4780.c +@@ -1004,7 +1004,8 @@ static const struct jz4780_dma_soc_data + static const struct jz4780_dma_soc_data jz4725b_dma_soc_data = { + .nb_channels = 6, + .transfer_ord_max = 5, +- .flags = JZ_SOC_DATA_PER_CHAN_PM | JZ_SOC_DATA_NO_DCKES_DCKEC, ++ .flags = JZ_SOC_DATA_PER_CHAN_PM | JZ_SOC_DATA_NO_DCKES_DCKEC | ++ JZ_SOC_DATA_BREAK_LINKS, + }; + + static const struct jz4780_dma_soc_data jz4770_dma_soc_data = { diff --git a/queue-5.4/dmaengine-fix-access-to-uninitialized-dma_slave_caps.patch b/queue-5.4/dmaengine-fix-access-to-uninitialized-dma_slave_caps.patch new file mode 100644 index 00000000000..f4fa54f9d0f --- /dev/null +++ b/queue-5.4/dmaengine-fix-access-to-uninitialized-dma_slave_caps.patch @@ -0,0 +1,57 @@ +From 53a256a9b925b47c7e67fc1f16ca41561a7b877c Mon Sep 17 00:00:00 2001 +From: Lukas Wunner +Date: Thu, 5 Dec 2019 12:54:49 +0100 +Subject: dmaengine: Fix access to uninitialized dma_slave_caps + +From: Lukas Wunner + +commit 53a256a9b925b47c7e67fc1f16ca41561a7b877c upstream. + +dmaengine_desc_set_reuse() allocates a struct dma_slave_caps on the +stack, populates it using dma_get_slave_caps() and then accesses one +of its members. + +However dma_get_slave_caps() may fail and this isn't accounted for, +leading to a legitimate warning of gcc-4.9 (but not newer versions): + + In file included from drivers/spi/spi-bcm2835.c:19:0: + drivers/spi/spi-bcm2835.c: In function 'dmaengine_desc_set_reuse': +>> include/linux/dmaengine.h:1370:10: warning: 'caps.descriptor_reuse' is used uninitialized in this function [-Wuninitialized] + if (caps.descriptor_reuse) { + +Fix it, thereby also silencing the gcc-4.9 warning. + +The issue has been present for 4 years but surfaces only now that +the first caller of dmaengine_desc_set_reuse() has been added in +spi-bcm2835.c. Another user of reusable DMA descriptors has existed +for a while in pxa_camera.c, but it sets the DMA_CTRL_REUSE flag +directly instead of calling dmaengine_desc_set_reuse(). Nevertheless, +tag this commit for stable in case there are out-of-tree users. + +Fixes: 272420214d26 ("dmaengine: Add DMA_CTRL_REUSE") +Reported-by: kbuild test robot +Signed-off-by: Lukas Wunner +Cc: stable@vger.kernel.org # v4.3+ +Link: https://lore.kernel.org/r/ca92998ccc054b4f2bfd60ef3adbab2913171eac.1575546234.git.lukas@wunner.de +Signed-off-by: Vinod Koul +Signed-off-by: Greg Kroah-Hartman + +--- + include/linux/dmaengine.h | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +--- a/include/linux/dmaengine.h ++++ b/include/linux/dmaengine.h +@@ -1364,8 +1364,11 @@ static inline int dma_get_slave_caps(str + static inline int dmaengine_desc_set_reuse(struct dma_async_tx_descriptor *tx) + { + struct dma_slave_caps caps; ++ int ret; + +- dma_get_slave_caps(tx->chan, &caps); ++ ret = dma_get_slave_caps(tx->chan, &caps); ++ if (ret) ++ return ret; + + if (caps.descriptor_reuse) { + tx->flags |= DMA_CTRL_REUSE; diff --git a/queue-5.4/gcc-plugins-make-it-possible-to-disable-config_gcc_plugins-again.patch b/queue-5.4/gcc-plugins-make-it-possible-to-disable-config_gcc_plugins-again.patch new file mode 100644 index 00000000000..00fb0314b0a --- /dev/null +++ b/queue-5.4/gcc-plugins-make-it-possible-to-disable-config_gcc_plugins-again.patch @@ -0,0 +1,67 @@ +From a5b0dc5a46c221725c43bd9b01570239a4cd78b1 Mon Sep 17 00:00:00 2001 +From: Arnd Bergmann +Date: Wed, 11 Dec 2019 14:39:28 +0100 +Subject: gcc-plugins: make it possible to disable CONFIG_GCC_PLUGINS again + +From: Arnd Bergmann + +commit a5b0dc5a46c221725c43bd9b01570239a4cd78b1 upstream. + +I noticed that randconfig builds with gcc no longer produce a lot of +ccache hits, unlike with clang, and traced this back to plugins +now being enabled unconditionally if they are supported. + +I am now working around this by adding + + export CCACHE_COMPILERCHECK=/usr/bin/size -A %compiler% + +to my top-level Makefile. This changes the heuristic that ccache uses +to determine whether the plugins are the same after a 'make clean'. + +However, it also seems that being able to just turn off the plugins is +generally useful, at least for build testing it adds noticeable overhead +but does not find a lot of bugs additional bugs, and may be easier for +ccache users than my workaround. + +Fixes: 9f671e58159a ("security: Create "kernel hardening" config area") +Signed-off-by: Arnd Bergmann +Acked-by: Ard Biesheuvel +Reviewed-by: Masahiro Yamada +Link: https://lore.kernel.org/r/20191211133951.401933-1-arnd@arndb.de +Cc: stable@vger.kernel.org +Signed-off-by: Kees Cook +Signed-off-by: Greg Kroah-Hartman + +--- + scripts/gcc-plugins/Kconfig | 9 ++++----- + 1 file changed, 4 insertions(+), 5 deletions(-) + +--- a/scripts/gcc-plugins/Kconfig ++++ b/scripts/gcc-plugins/Kconfig +@@ -14,8 +14,8 @@ config HAVE_GCC_PLUGINS + An arch should select this symbol if it supports building with + GCC plugins. + +-config GCC_PLUGINS +- bool ++menuconfig GCC_PLUGINS ++ bool "GCC plugins" + depends on HAVE_GCC_PLUGINS + depends on PLUGIN_HOSTCC != "" + default y +@@ -25,8 +25,7 @@ config GCC_PLUGINS + + See Documentation/core-api/gcc-plugins.rst for details. + +-menu "GCC plugins" +- depends on GCC_PLUGINS ++if GCC_PLUGINS + + config GCC_PLUGIN_CYC_COMPLEXITY + bool "Compute the cyclomatic complexity of a function" if EXPERT +@@ -113,4 +112,4 @@ config GCC_PLUGIN_ARM_SSP_PER_TASK + bool + depends on GCC_PLUGINS && ARM + +-endmenu ++endif diff --git a/queue-5.4/locks-print-unsigned-ino-in-proc-locks.patch b/queue-5.4/locks-print-unsigned-ino-in-proc-locks.patch new file mode 100644 index 00000000000..afc51592f44 --- /dev/null +++ b/queue-5.4/locks-print-unsigned-ino-in-proc-locks.patch @@ -0,0 +1,31 @@ +From 98ca480a8f22fdbd768e3dad07024c8d4856576c Mon Sep 17 00:00:00 2001 +From: Amir Goldstein +Date: Sun, 22 Dec 2019 20:45:28 +0200 +Subject: locks: print unsigned ino in /proc/locks + +From: Amir Goldstein + +commit 98ca480a8f22fdbd768e3dad07024c8d4856576c upstream. + +An ino is unsigned, so display it as such in /proc/locks. + +Cc: stable@vger.kernel.org +Signed-off-by: Amir Goldstein +Signed-off-by: Jeff Layton +Signed-off-by: Greg Kroah-Hartman + +--- + fs/locks.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/fs/locks.c ++++ b/fs/locks.c +@@ -2853,7 +2853,7 @@ static void lock_get_status(struct seq_f + } + if (inode) { + /* userspace relies on this representation of dev_t */ +- seq_printf(f, "%d %02x:%02x:%ld ", fl_pid, ++ seq_printf(f, "%d %02x:%02x:%lu ", fl_pid, + MAJOR(inode->i_sb->s_dev), + MINOR(inode->i_sb->s_dev), inode->i_ino); + } else { diff --git a/queue-5.4/memcg-account-security-cred-as-well-to-kmemcg.patch b/queue-5.4/memcg-account-security-cred-as-well-to-kmemcg.patch new file mode 100644 index 00000000000..00fef94bec1 --- /dev/null +++ b/queue-5.4/memcg-account-security-cred-as-well-to-kmemcg.patch @@ -0,0 +1,66 @@ +From 84029fd04c201a4c7e0b07ba262664900f47c6f5 Mon Sep 17 00:00:00 2001 +From: Shakeel Butt +Date: Sat, 4 Jan 2020 12:59:43 -0800 +Subject: memcg: account security cred as well to kmemcg + +From: Shakeel Butt + +commit 84029fd04c201a4c7e0b07ba262664900f47c6f5 upstream. + +The cred_jar kmem_cache is already memcg accounted in the current kernel +but cred->security is not. Account cred->security to kmemcg. + +Recently we saw high root slab usage on our production and on further +inspection, we found a buggy application leaking processes. Though that +buggy application was contained within its memcg but we observe much +more system memory overhead, couple of GiBs, during that period. This +overhead can adversely impact the isolation on the system. + +One source of high overhead we found was cred->security objects, which +have a lifetime of at least the life of the process which allocated +them. + +Link: http://lkml.kernel.org/r/20191205223721.40034-1-shakeelb@google.com +Signed-off-by: Shakeel Butt +Acked-by: Chris Down +Reviewed-by: Roman Gushchin +Acked-by: Michal Hocko +Cc: Johannes Weiner +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/cred.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +--- a/kernel/cred.c ++++ b/kernel/cred.c +@@ -223,7 +223,7 @@ struct cred *cred_alloc_blank(void) + new->magic = CRED_MAGIC; + #endif + +- if (security_cred_alloc_blank(new, GFP_KERNEL) < 0) ++ if (security_cred_alloc_blank(new, GFP_KERNEL_ACCOUNT) < 0) + goto error; + + return new; +@@ -282,7 +282,7 @@ struct cred *prepare_creds(void) + new->security = NULL; + #endif + +- if (security_prepare_creds(new, old, GFP_KERNEL) < 0) ++ if (security_prepare_creds(new, old, GFP_KERNEL_ACCOUNT) < 0) + goto error; + validate_creds(new); + return new; +@@ -715,7 +715,7 @@ struct cred *prepare_kernel_cred(struct + #ifdef CONFIG_SECURITY + new->security = NULL; + #endif +- if (security_prepare_creds(new, old, GFP_KERNEL) < 0) ++ if (security_prepare_creds(new, old, GFP_KERNEL_ACCOUNT) < 0) + goto error; + + put_cred(old); diff --git a/queue-5.4/mm-memory_hotplug-shrink-zones-when-offlining-memory.patch b/queue-5.4/mm-memory_hotplug-shrink-zones-when-offlining-memory.patch new file mode 100644 index 00000000000..b3cbf7906c1 --- /dev/null +++ b/queue-5.4/mm-memory_hotplug-shrink-zones-when-offlining-memory.patch @@ -0,0 +1,329 @@ +From feee6b2989165631b17ac6d4ccdbf6759254e85a Mon Sep 17 00:00:00 2001 +From: David Hildenbrand +Date: Sat, 4 Jan 2020 12:59:33 -0800 +Subject: mm/memory_hotplug: shrink zones when offlining memory + +From: David Hildenbrand + +commit feee6b2989165631b17ac6d4ccdbf6759254e85a upstream. + +We currently try to shrink a single zone when removing memory. We use +the zone of the first page of the memory we are removing. If that +memmap was never initialized (e.g., memory was never onlined), we will +read garbage and can trigger kernel BUGs (due to a stale pointer): + + BUG: unable to handle page fault for address: 000000000000353d + #PF: supervisor write access in kernel mode + #PF: error_code(0x0002) - not-present page + PGD 0 P4D 0 + Oops: 0002 [#1] SMP PTI + CPU: 1 PID: 7 Comm: kworker/u8:0 Not tainted 5.3.0-rc5-next-20190820+ #317 + Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.1-0-ga5cab58e9a3f-prebuilt.qemu.4 + Workqueue: kacpi_hotplug acpi_hotplug_work_fn + RIP: 0010:clear_zone_contiguous+0x5/0x10 + Code: 48 89 c6 48 89 c3 e8 2a fe ff ff 48 85 c0 75 cf 5b 5d c3 c6 85 fd 05 00 00 01 5b 5d c3 0f 1f 840 + RSP: 0018:ffffad2400043c98 EFLAGS: 00010246 + RAX: 0000000000000000 RBX: 0000000200000000 RCX: 0000000000000000 + RDX: 0000000000200000 RSI: 0000000000140000 RDI: 0000000000002f40 + RBP: 0000000140000000 R08: 0000000000000000 R09: 0000000000000001 + R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000140000 + R13: 0000000000140000 R14: 0000000000002f40 R15: ffff9e3e7aff3680 + FS: 0000000000000000(0000) GS:ffff9e3e7bb00000(0000) knlGS:0000000000000000 + CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 + CR2: 000000000000353d CR3: 0000000058610000 CR4: 00000000000006e0 + DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 + DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 + Call Trace: + __remove_pages+0x4b/0x640 + arch_remove_memory+0x63/0x8d + try_remove_memory+0xdb/0x130 + __remove_memory+0xa/0x11 + acpi_memory_device_remove+0x70/0x100 + acpi_bus_trim+0x55/0x90 + acpi_device_hotplug+0x227/0x3a0 + acpi_hotplug_work_fn+0x1a/0x30 + process_one_work+0x221/0x550 + worker_thread+0x50/0x3b0 + kthread+0x105/0x140 + ret_from_fork+0x3a/0x50 + Modules linked in: + CR2: 000000000000353d + +Instead, shrink the zones when offlining memory or when onlining failed. +Introduce and use remove_pfn_range_from_zone(() for that. We now +properly shrink the zones, even if we have DIMMs whereby + + - Some memory blocks fall into no zone (never onlined) + + - Some memory blocks fall into multiple zones (offlined+re-onlined) + + - Multiple memory blocks that fall into different zones + +Drop the zone parameter (with a potential dubious value) from +__remove_pages() and __remove_section(). + +Link: http://lkml.kernel.org/r/20191006085646.5768-6-david@redhat.com +Fixes: f1dd2cd13c4b ("mm, memory_hotplug: do not associate hotadded memory to zones until online") [visible after d0dc12e86b319] +Signed-off-by: David Hildenbrand +Reviewed-by: Oscar Salvador +Cc: Michal Hocko +Cc: "Matthew Wilcox (Oracle)" +Cc: "Aneesh Kumar K.V" +Cc: Pavel Tatashin +Cc: Greg Kroah-Hartman +Cc: Dan Williams +Cc: Logan Gunthorpe +Cc: [5.0+] +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + arch/arm64/mm/mmu.c | 4 +--- + arch/ia64/mm/init.c | 4 +--- + arch/powerpc/mm/mem.c | 3 +-- + arch/s390/mm/init.c | 4 +--- + arch/sh/mm/init.c | 4 +--- + arch/x86/mm/init_32.c | 4 +--- + arch/x86/mm/init_64.c | 4 +--- + include/linux/memory_hotplug.h | 7 +++++-- + mm/memory_hotplug.c | 31 ++++++++++++++++--------------- + mm/memremap.c | 2 +- + 10 files changed, 29 insertions(+), 38 deletions(-) + +--- a/arch/arm64/mm/mmu.c ++++ b/arch/arm64/mm/mmu.c +@@ -1069,7 +1069,6 @@ void arch_remove_memory(int nid, u64 sta + { + unsigned long start_pfn = start >> PAGE_SHIFT; + unsigned long nr_pages = size >> PAGE_SHIFT; +- struct zone *zone; + + /* + * FIXME: Cleanup page tables (also in arch_add_memory() in case +@@ -1078,7 +1077,6 @@ void arch_remove_memory(int nid, u64 sta + * unplug. ARCH_ENABLE_MEMORY_HOTREMOVE must not be + * unlocked yet. + */ +- zone = page_zone(pfn_to_page(start_pfn)); +- __remove_pages(zone, start_pfn, nr_pages, altmap); ++ __remove_pages(start_pfn, nr_pages, altmap); + } + #endif +--- a/arch/ia64/mm/init.c ++++ b/arch/ia64/mm/init.c +@@ -689,9 +689,7 @@ void arch_remove_memory(int nid, u64 sta + { + unsigned long start_pfn = start >> PAGE_SHIFT; + unsigned long nr_pages = size >> PAGE_SHIFT; +- struct zone *zone; + +- zone = page_zone(pfn_to_page(start_pfn)); +- __remove_pages(zone, start_pfn, nr_pages, altmap); ++ __remove_pages(start_pfn, nr_pages, altmap); + } + #endif +--- a/arch/powerpc/mm/mem.c ++++ b/arch/powerpc/mm/mem.c +@@ -130,10 +130,9 @@ void __ref arch_remove_memory(int nid, u + { + unsigned long start_pfn = start >> PAGE_SHIFT; + unsigned long nr_pages = size >> PAGE_SHIFT; +- struct page *page = pfn_to_page(start_pfn) + vmem_altmap_offset(altmap); + int ret; + +- __remove_pages(page_zone(page), start_pfn, nr_pages, altmap); ++ __remove_pages(start_pfn, nr_pages, altmap); + + /* Remove htab bolted mappings for this section of memory */ + start = (unsigned long)__va(start); +--- a/arch/s390/mm/init.c ++++ b/arch/s390/mm/init.c +@@ -291,10 +291,8 @@ void arch_remove_memory(int nid, u64 sta + { + unsigned long start_pfn = start >> PAGE_SHIFT; + unsigned long nr_pages = size >> PAGE_SHIFT; +- struct zone *zone; + +- zone = page_zone(pfn_to_page(start_pfn)); +- __remove_pages(zone, start_pfn, nr_pages, altmap); ++ __remove_pages(start_pfn, nr_pages, altmap); + vmem_remove_mapping(start, size); + } + #endif /* CONFIG_MEMORY_HOTPLUG */ +--- a/arch/sh/mm/init.c ++++ b/arch/sh/mm/init.c +@@ -434,9 +434,7 @@ void arch_remove_memory(int nid, u64 sta + { + unsigned long start_pfn = PFN_DOWN(start); + unsigned long nr_pages = size >> PAGE_SHIFT; +- struct zone *zone; + +- zone = page_zone(pfn_to_page(start_pfn)); +- __remove_pages(zone, start_pfn, nr_pages, altmap); ++ __remove_pages(start_pfn, nr_pages, altmap); + } + #endif /* CONFIG_MEMORY_HOTPLUG */ +--- a/arch/x86/mm/init_32.c ++++ b/arch/x86/mm/init_32.c +@@ -865,10 +865,8 @@ void arch_remove_memory(int nid, u64 sta + { + unsigned long start_pfn = start >> PAGE_SHIFT; + unsigned long nr_pages = size >> PAGE_SHIFT; +- struct zone *zone; + +- zone = page_zone(pfn_to_page(start_pfn)); +- __remove_pages(zone, start_pfn, nr_pages, altmap); ++ __remove_pages(start_pfn, nr_pages, altmap); + } + #endif + +--- a/arch/x86/mm/init_64.c ++++ b/arch/x86/mm/init_64.c +@@ -1212,10 +1212,8 @@ void __ref arch_remove_memory(int nid, u + { + unsigned long start_pfn = start >> PAGE_SHIFT; + unsigned long nr_pages = size >> PAGE_SHIFT; +- struct page *page = pfn_to_page(start_pfn) + vmem_altmap_offset(altmap); +- struct zone *zone = page_zone(page); + +- __remove_pages(zone, start_pfn, nr_pages, altmap); ++ __remove_pages(start_pfn, nr_pages, altmap); + kernel_physical_mapping_remove(start, start + size); + } + #endif /* CONFIG_MEMORY_HOTPLUG */ +--- a/include/linux/memory_hotplug.h ++++ b/include/linux/memory_hotplug.h +@@ -125,8 +125,8 @@ static inline bool movable_node_is_enabl + + extern void arch_remove_memory(int nid, u64 start, u64 size, + struct vmem_altmap *altmap); +-extern void __remove_pages(struct zone *zone, unsigned long start_pfn, +- unsigned long nr_pages, struct vmem_altmap *altmap); ++extern void __remove_pages(unsigned long start_pfn, unsigned long nr_pages, ++ struct vmem_altmap *altmap); + + /* reasonably generic interface to expand the physical pages */ + extern int __add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages, +@@ -345,6 +345,9 @@ extern int add_memory(int nid, u64 start + extern int add_memory_resource(int nid, struct resource *resource); + extern void move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn, + unsigned long nr_pages, struct vmem_altmap *altmap); ++extern void remove_pfn_range_from_zone(struct zone *zone, ++ unsigned long start_pfn, ++ unsigned long nr_pages); + extern bool is_memblock_offlined(struct memory_block *mem); + extern int sparse_add_section(int nid, unsigned long pfn, + unsigned long nr_pages, struct vmem_altmap *altmap); +--- a/mm/memory_hotplug.c ++++ b/mm/memory_hotplug.c +@@ -465,8 +465,9 @@ static void update_pgdat_span(struct pgl + pgdat->node_spanned_pages = node_end_pfn - node_start_pfn; + } + +-static void __remove_zone(struct zone *zone, unsigned long start_pfn, +- unsigned long nr_pages) ++void __ref remove_pfn_range_from_zone(struct zone *zone, ++ unsigned long start_pfn, ++ unsigned long nr_pages) + { + struct pglist_data *pgdat = zone->zone_pgdat; + unsigned long flags; +@@ -481,28 +482,30 @@ static void __remove_zone(struct zone *z + return; + #endif + ++ clear_zone_contiguous(zone); ++ + pgdat_resize_lock(zone->zone_pgdat, &flags); + shrink_zone_span(zone, start_pfn, start_pfn + nr_pages); + update_pgdat_span(pgdat); + pgdat_resize_unlock(zone->zone_pgdat, &flags); ++ ++ set_zone_contiguous(zone); + } + +-static void __remove_section(struct zone *zone, unsigned long pfn, +- unsigned long nr_pages, unsigned long map_offset, +- struct vmem_altmap *altmap) ++static void __remove_section(unsigned long pfn, unsigned long nr_pages, ++ unsigned long map_offset, ++ struct vmem_altmap *altmap) + { + struct mem_section *ms = __nr_to_section(pfn_to_section_nr(pfn)); + + if (WARN_ON_ONCE(!valid_section(ms))) + return; + +- __remove_zone(zone, pfn, nr_pages); + sparse_remove_section(ms, pfn, nr_pages, map_offset, altmap); + } + + /** +- * __remove_pages() - remove sections of pages from a zone +- * @zone: zone from which pages need to be removed ++ * __remove_pages() - remove sections of pages + * @pfn: starting pageframe (must be aligned to start of a section) + * @nr_pages: number of pages to remove (must be multiple of section size) + * @altmap: alternative device page map or %NULL if default memmap is used +@@ -512,16 +515,14 @@ static void __remove_section(struct zone + * sure that pages are marked reserved and zones are adjust properly by + * calling offline_pages(). + */ +-void __remove_pages(struct zone *zone, unsigned long pfn, +- unsigned long nr_pages, struct vmem_altmap *altmap) ++void __remove_pages(unsigned long pfn, unsigned long nr_pages, ++ struct vmem_altmap *altmap) + { + unsigned long map_offset = 0; + unsigned long nr, start_sec, end_sec; + + map_offset = vmem_altmap_offset(altmap); + +- clear_zone_contiguous(zone); +- + if (check_pfn_span(pfn, nr_pages, "remove")) + return; + +@@ -533,13 +534,11 @@ void __remove_pages(struct zone *zone, u + cond_resched(); + pfns = min(nr_pages, PAGES_PER_SECTION + - (pfn & ~PAGE_SECTION_MASK)); +- __remove_section(zone, pfn, pfns, map_offset, altmap); ++ __remove_section(pfn, pfns, map_offset, altmap); + pfn += pfns; + nr_pages -= pfns; + map_offset = 0; + } +- +- set_zone_contiguous(zone); + } + + int set_online_page_callback(online_page_callback_t callback) +@@ -867,6 +866,7 @@ failed_addition: + (unsigned long long) pfn << PAGE_SHIFT, + (((unsigned long long) pfn + nr_pages) << PAGE_SHIFT) - 1); + memory_notify(MEM_CANCEL_ONLINE, &arg); ++ remove_pfn_range_from_zone(zone, pfn, nr_pages); + mem_hotplug_done(); + return ret; + } +@@ -1602,6 +1602,7 @@ static int __ref __offline_pages(unsigne + writeback_set_ratelimit(); + + memory_notify(MEM_OFFLINE, &arg); ++ remove_pfn_range_from_zone(zone, start_pfn, nr_pages); + mem_hotplug_done(); + return 0; + +--- a/mm/memremap.c ++++ b/mm/memremap.c +@@ -120,7 +120,7 @@ void memunmap_pages(struct dev_pagemap * + + mem_hotplug_begin(); + if (pgmap->type == MEMORY_DEVICE_PRIVATE) { +- __remove_pages(page_zone(first_page), PHYS_PFN(res->start), ++ __remove_pages(PHYS_PFN(res->start), + PHYS_PFN(resource_size(res)), NULL); + } else { + arch_remove_memory(nid, res->start, resource_size(res), diff --git a/queue-5.4/mm-move_pages-return-valid-node-id-in-status-if-the-page-is-already-on-the-target-node.patch b/queue-5.4/mm-move_pages-return-valid-node-id-in-status-if-the-page-is-already-on-the-target-node.patch new file mode 100644 index 00000000000..a981f50d14c --- /dev/null +++ b/queue-5.4/mm-move_pages-return-valid-node-id-in-status-if-the-page-is-already-on-the-target-node.patch @@ -0,0 +1,144 @@ +From e0153fc2c7606f101392b682e720a7a456d6c766 Mon Sep 17 00:00:00 2001 +From: Yang Shi +Date: Sat, 4 Jan 2020 12:59:46 -0800 +Subject: mm: move_pages: return valid node id in status if the page is already on the target node + +From: Yang Shi + +commit e0153fc2c7606f101392b682e720a7a456d6c766 upstream. + +Felix Abecassis reports move_pages() would return random status if the +pages are already on the target node by the below test program: + + int main(void) + { + const long node_id = 1; + const long page_size = sysconf(_SC_PAGESIZE); + const int64_t num_pages = 8; + + unsigned long nodemask = 1 << node_id; + long ret = set_mempolicy(MPOL_BIND, &nodemask, sizeof(nodemask)); + if (ret < 0) + return (EXIT_FAILURE); + + void **pages = malloc(sizeof(void*) * num_pages); + for (int i = 0; i < num_pages; ++i) { + pages[i] = mmap(NULL, page_size, PROT_WRITE | PROT_READ, + MAP_PRIVATE | MAP_POPULATE | MAP_ANONYMOUS, + -1, 0); + if (pages[i] == MAP_FAILED) + return (EXIT_FAILURE); + } + + ret = set_mempolicy(MPOL_DEFAULT, NULL, 0); + if (ret < 0) + return (EXIT_FAILURE); + + int *nodes = malloc(sizeof(int) * num_pages); + int *status = malloc(sizeof(int) * num_pages); + for (int i = 0; i < num_pages; ++i) { + nodes[i] = node_id; + status[i] = 0xd0; /* simulate garbage values */ + } + + ret = move_pages(0, num_pages, pages, nodes, status, MPOL_MF_MOVE); + printf("move_pages: %ld\n", ret); + for (int i = 0; i < num_pages; ++i) + printf("status[%d] = %d\n", i, status[i]); + } + +Then running the program would return nonsense status values: + + $ ./move_pages_bug + move_pages: 0 + status[0] = 208 + status[1] = 208 + status[2] = 208 + status[3] = 208 + status[4] = 208 + status[5] = 208 + status[6] = 208 + status[7] = 208 + +This is because the status is not set if the page is already on the +target node, but move_pages() should return valid status as long as it +succeeds. The valid status may be errno or node id. + +We can't simply initialize status array to zero since the pages may be +not on node 0. Fix it by updating status with node id which the page is +already on. + +Link: http://lkml.kernel.org/r/1575584353-125392-1-git-send-email-yang.shi@linux.alibaba.com +Fixes: a49bd4d71637 ("mm, numa: rework do_pages_move") +Signed-off-by: Yang Shi +Reported-by: Felix Abecassis +Tested-by: Felix Abecassis +Suggested-by: Michal Hocko +Reviewed-by: John Hubbard +Acked-by: Christoph Lameter +Acked-by: Michal Hocko +Reviewed-by: Vlastimil Babka +Cc: Mel Gorman +Cc: [4.17+] +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + mm/migrate.c | 23 +++++++++++++++++------ + 1 file changed, 17 insertions(+), 6 deletions(-) + +--- a/mm/migrate.c ++++ b/mm/migrate.c +@@ -1516,9 +1516,11 @@ static int do_move_pages_to_node(struct + /* + * Resolves the given address to a struct page, isolates it from the LRU and + * puts it to the given pagelist. +- * Returns -errno if the page cannot be found/isolated or 0 when it has been +- * queued or the page doesn't need to be migrated because it is already on +- * the target node ++ * Returns: ++ * errno - if the page cannot be found/isolated ++ * 0 - when it doesn't have to be migrated because it is already on the ++ * target node ++ * 1 - when it has been queued + */ + static int add_page_for_migration(struct mm_struct *mm, unsigned long addr, + int node, struct list_head *pagelist, bool migrate_all) +@@ -1557,7 +1559,7 @@ static int add_page_for_migration(struct + if (PageHuge(page)) { + if (PageHead(page)) { + isolate_huge_page(page, pagelist); +- err = 0; ++ err = 1; + } + } else { + struct page *head; +@@ -1567,7 +1569,7 @@ static int add_page_for_migration(struct + if (err) + goto out_putpage; + +- err = 0; ++ err = 1; + list_add_tail(&head->lru, pagelist); + mod_node_page_state(page_pgdat(head), + NR_ISOLATED_ANON + page_is_file_cache(head), +@@ -1644,8 +1646,17 @@ static int do_pages_move(struct mm_struc + */ + err = add_page_for_migration(mm, addr, current_node, + &pagelist, flags & MPOL_MF_MOVE_ALL); +- if (!err) ++ ++ if (!err) { ++ /* The page is already on the target node */ ++ err = store_status(status, i, current_node, 1); ++ if (err) ++ goto out_flush; + continue; ++ } else if (err > 0) { ++ /* The page is successfully queued for migration */ ++ continue; ++ } + + err = store_status(status, i, err, 1); + if (err) diff --git a/queue-5.4/mm-oom-fix-pgtables-units-mismatch-in-killed-process-message.patch b/queue-5.4/mm-oom-fix-pgtables-units-mismatch-in-killed-process-message.patch new file mode 100644 index 00000000000..632193eafb4 --- /dev/null +++ b/queue-5.4/mm-oom-fix-pgtables-units-mismatch-in-killed-process-message.patch @@ -0,0 +1,57 @@ +From 941f762bcb276259a78e7931674668874ccbda59 Mon Sep 17 00:00:00 2001 +From: Ilya Dryomov +Date: Sat, 4 Jan 2020 13:00:09 -0800 +Subject: mm/oom: fix pgtables units mismatch in Killed process message + +From: Ilya Dryomov + +commit 941f762bcb276259a78e7931674668874ccbda59 upstream. + +pr_err() expects kB, but mm_pgtables_bytes() returns the number of bytes. +As everything else is printed in kB, I chose to fix the value rather than +the string. + +Before: + +[ pid ] uid tgid total_vm rss pgtables_bytes swapents oom_score_adj name +... +[ 1878] 1000 1878 217253 151144 1269760 0 0 python +... +Out of memory: Killed process 1878 (python) total-vm:869012kB, anon-rss:604572kB, file-rss:4kB, shmem-rss:0kB, UID:1000 pgtables:1269760kB oom_score_adj:0 + +After: + +[ pid ] uid tgid total_vm rss pgtables_bytes swapents oom_score_adj name +... +[ 1436] 1000 1436 217253 151890 1294336 0 0 python +... +Out of memory: Killed process 1436 (python) total-vm:869012kB, anon-rss:607516kB, file-rss:44kB, shmem-rss:0kB, UID:1000 pgtables:1264kB oom_score_adj:0 + +Link: http://lkml.kernel.org/r/20191211202830.1600-1-idryomov@gmail.com +Fixes: 70cb6d267790 ("mm/oom: add oom_score_adj and pgtables to Killed process message") +Signed-off-by: Ilya Dryomov +Reviewed-by: Andrew Morton +Acked-by: David Rientjes +Acked-by: Michal Hocko +Cc: Edward Chron +Cc: David Rientjes +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + mm/oom_kill.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/mm/oom_kill.c ++++ b/mm/oom_kill.c +@@ -890,7 +890,7 @@ static void __oom_kill_process(struct ta + K(get_mm_counter(mm, MM_FILEPAGES)), + K(get_mm_counter(mm, MM_SHMEMPAGES)), + from_kuid(&init_user_ns, task_uid(victim)), +- mm_pgtables_bytes(mm), victim->signal->oom_score_adj); ++ mm_pgtables_bytes(mm) >> 10, victim->signal->oom_score_adj); + task_unlock(victim); + + /* diff --git a/queue-5.4/mm-zsmalloc.c-fix-the-migrated-zspage-statistics.patch b/queue-5.4/mm-zsmalloc.c-fix-the-migrated-zspage-statistics.patch new file mode 100644 index 00000000000..e932f37073f --- /dev/null +++ b/queue-5.4/mm-zsmalloc.c-fix-the-migrated-zspage-statistics.patch @@ -0,0 +1,42 @@ +From ac8f05da5174c560de122c499ce5dfb5d0dfbee5 Mon Sep 17 00:00:00 2001 +From: Chanho Min +Date: Sat, 4 Jan 2020 12:59:36 -0800 +Subject: mm/zsmalloc.c: fix the migrated zspage statistics. + +From: Chanho Min + +commit ac8f05da5174c560de122c499ce5dfb5d0dfbee5 upstream. + +When zspage is migrated to the other zone, the zone page state should be +updated as well, otherwise the NR_ZSPAGE for each zone shows wrong +counts including proc/zoneinfo in practice. + +Link: http://lkml.kernel.org/r/1575434841-48009-1-git-send-email-chanho.min@lge.com +Fixes: 91537fee0013 ("mm: add NR_ZSMALLOC to vmstat") +Signed-off-by: Chanho Min +Signed-off-by: Jinsuk Choi +Reviewed-by: Sergey Senozhatsky +Acked-by: Minchan Kim +Cc: [4.9+] +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + mm/zsmalloc.c | 5 +++++ + 1 file changed, 5 insertions(+) + +--- a/mm/zsmalloc.c ++++ b/mm/zsmalloc.c +@@ -2069,6 +2069,11 @@ static int zs_page_migrate(struct addres + zs_pool_dec_isolated(pool); + } + ++ if (page_zone(newpage) != page_zone(page)) { ++ dec_zone_page_state(page, NR_ZSPAGES); ++ inc_zone_page_state(newpage, NR_ZSPAGES); ++ } ++ + reset_page(page); + put_page(page); + page = newpage; diff --git a/queue-5.4/ocfs2-fix-the-crash-due-to-call-ocfs2_get_dlm_debug-once-less.patch b/queue-5.4/ocfs2-fix-the-crash-due-to-call-ocfs2_get_dlm_debug-once-less.patch new file mode 100644 index 00000000000..075ccb34ea5 --- /dev/null +++ b/queue-5.4/ocfs2-fix-the-crash-due-to-call-ocfs2_get_dlm_debug-once-less.patch @@ -0,0 +1,74 @@ +From b73eba2a867e10b9b4477738677341f3307c07bb Mon Sep 17 00:00:00 2001 +From: Gang He +Date: Sat, 4 Jan 2020 13:00:22 -0800 +Subject: ocfs2: fix the crash due to call ocfs2_get_dlm_debug once less + +From: Gang He + +commit b73eba2a867e10b9b4477738677341f3307c07bb upstream. + +Because ocfs2_get_dlm_debug() function is called once less here, ocfs2 +file system will trigger the system crash, usually after ocfs2 file +system is unmounted. + +This system crash is caused by a generic memory corruption, these crash +backtraces are not always the same, for exapmle, + + ocfs2: Unmounting device (253,16) on (node 172167785) + general protection fault: 0000 [#1] SMP PTI + CPU: 3 PID: 14107 Comm: fence_legacy Kdump: + Hardware name: QEMU Standard PC (i440FX + PIIX, 1996) + RIP: 0010:__kmalloc+0xa5/0x2a0 + Code: 00 00 4d 8b 07 65 4d 8b + RSP: 0018:ffffaa1fc094bbe8 EFLAGS: 00010286 + RAX: 0000000000000000 RBX: d310a8800d7a3faf RCX: 0000000000000000 + RDX: 0000000000000000 RSI: 0000000000000dc0 RDI: ffff96e68fc036c0 + RBP: d310a8800d7a3faf R08: ffff96e6ffdb10a0 R09: 00000000752e7079 + R10: 000000000001c513 R11: 0000000004091041 R12: 0000000000000dc0 + R13: 0000000000000039 R14: ffff96e68fc036c0 R15: ffff96e68fc036c0 + FS: 00007f699dfba540(0000) GS:ffff96e6ffd80000(0000) knlGS:00000 + CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 + CR2: 000055f3a9d9b768 CR3: 000000002cd1c000 CR4: 00000000000006e0 + Call Trace: + ext4_htree_store_dirent+0x35/0x100 [ext4] + htree_dirblock_to_tree+0xea/0x290 [ext4] + ext4_htree_fill_tree+0x1c1/0x2d0 [ext4] + ext4_readdir+0x67c/0x9d0 [ext4] + iterate_dir+0x8d/0x1a0 + __x64_sys_getdents+0xab/0x130 + do_syscall_64+0x60/0x1f0 + entry_SYSCALL_64_after_hwframe+0x49/0xbe + RIP: 0033:0x7f699d33a9fb + +This regression problem was introduced by commit e581595ea29c ("ocfs: no +need to check return value of debugfs_create functions"). + +Link: http://lkml.kernel.org/r/20191225061501.13587-1-ghe@suse.com +Fixes: e581595ea29c ("ocfs: no need to check return value of debugfs_create functions") +Signed-off-by: Gang He +Acked-by: Joseph Qi +Cc: Mark Fasheh +Cc: Joel Becker +Cc: Junxiao Bi +Cc: Changwei Ge +Cc: Gang He +Cc: Jun Piao +Cc: [5.3+] +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + fs/ocfs2/dlmglue.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/fs/ocfs2/dlmglue.c ++++ b/fs/ocfs2/dlmglue.c +@@ -3282,6 +3282,7 @@ static void ocfs2_dlm_init_debug(struct + + debugfs_create_u32("locking_filter", 0600, osb->osb_debug_root, + &dlm_debug->d_filter_secs); ++ ocfs2_get_dlm_debug(dlm_debug); + } + + static void ocfs2_dlm_shutdown_debug(struct ocfs2_super *osb) diff --git a/queue-5.4/pstore-ram-fix-error-path-memory-leak-in-persistent_ram_new-callers.patch b/queue-5.4/pstore-ram-fix-error-path-memory-leak-in-persistent_ram_new-callers.patch new file mode 100644 index 00000000000..8e38744f84a --- /dev/null +++ b/queue-5.4/pstore-ram-fix-error-path-memory-leak-in-persistent_ram_new-callers.patch @@ -0,0 +1,41 @@ +From 8df955a32a73315055e0cd187cbb1cea5820394b Mon Sep 17 00:00:00 2001 +From: Kees Cook +Date: Mon, 30 Dec 2019 11:48:10 -0800 +Subject: pstore/ram: Fix error-path memory leak in persistent_ram_new() callers + +From: Kees Cook + +commit 8df955a32a73315055e0cd187cbb1cea5820394b upstream. + +For callers that allocated a label for persistent_ram_new(), if the call +fails, they must clean up the allocation. + +Suggested-by: Navid Emamdoost +Fixes: 1227daa43bce ("pstore/ram: Clarify resource reservation labels") +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/lkml/20191211191353.14385-1-navid.emamdoost@gmail.com +Signed-off-by: Kees Cook +Signed-off-by: Greg Kroah-Hartman + +--- + fs/pstore/ram.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/fs/pstore/ram.c ++++ b/fs/pstore/ram.c +@@ -588,6 +588,7 @@ static int ramoops_init_przs(const char + dev_err(dev, "failed to request %s mem region (0x%zx@0x%llx): %d\n", + name, record_size, + (unsigned long long)*paddr, err); ++ kfree(label); + + while (i > 0) { + i--; +@@ -633,6 +634,7 @@ static int ramoops_init_prz(const char * + + dev_err(dev, "failed to request %s mem region (0x%zx@0x%llx): %d\n", + name, sz, (unsigned long long)*paddr, err); ++ kfree(label); + return err; + } + diff --git a/queue-5.4/pstore-ram-write-new-dumps-to-start-of-recycled-zones.patch b/queue-5.4/pstore-ram-write-new-dumps-to-start-of-recycled-zones.patch new file mode 100644 index 00000000000..987fd9903ff --- /dev/null +++ b/queue-5.4/pstore-ram-write-new-dumps-to-start-of-recycled-zones.patch @@ -0,0 +1,49 @@ +From 9e5f1c19800b808a37fb9815a26d382132c26c3d Mon Sep 17 00:00:00 2001 +From: Aleksandr Yashkin +Date: Mon, 23 Dec 2019 18:38:16 +0500 +Subject: pstore/ram: Write new dumps to start of recycled zones + +From: Aleksandr Yashkin + +commit 9e5f1c19800b808a37fb9815a26d382132c26c3d upstream. + +The ram_core.c routines treat przs as circular buffers. When writing a +new crash dump, the old buffer needs to be cleared so that the new dump +doesn't end up in the wrong place (i.e. at the end). + +The solution to this problem is to reset the circular buffer state before +writing a new Oops dump. + +Signed-off-by: Aleksandr Yashkin +Signed-off-by: Nikolay Merinov +Signed-off-by: Ariel Gilman +Link: https://lore.kernel.org/r/20191223133816.28155-1-n.merinov@inango-systems.com +Fixes: 896fc1f0c4c6 ("pstore/ram: Switch to persistent_ram routines") +Cc: stable@vger.kernel.org +Signed-off-by: Kees Cook +Signed-off-by: Greg Kroah-Hartman + +--- + fs/pstore/ram.c | 11 +++++++++++ + 1 file changed, 11 insertions(+) + +--- a/fs/pstore/ram.c ++++ b/fs/pstore/ram.c +@@ -407,6 +407,17 @@ static int notrace ramoops_pstore_write( + + prz = cxt->dprzs[cxt->dump_write_cnt]; + ++ /* ++ * Since this is a new crash dump, we need to reset the buffer in ++ * case it still has an old dump present. Without this, the new dump ++ * will get appended, which would seriously confuse anything trying ++ * to check dump file contents. Specifically, ramoops_read_kmsg_hdr() ++ * expects to find a dump header in the beginning of buffer data, so ++ * we must to reset the buffer values, in order to ensure that the ++ * header will be written to the beginning of the buffer. ++ */ ++ persistent_ram_zap(prz); ++ + /* Build header and append record contents. */ + hlen = ramoops_write_kmsg_hdr(prz, record); + if (!hlen) diff --git a/queue-5.4/samples-seccomp-zero-out-members-based-on-seccomp_notif_sizes.patch b/queue-5.4/samples-seccomp-zero-out-members-based-on-seccomp_notif_sizes.patch new file mode 100644 index 00000000000..cef8eac4bdd --- /dev/null +++ b/queue-5.4/samples-seccomp-zero-out-members-based-on-seccomp_notif_sizes.patch @@ -0,0 +1,48 @@ +From 771b894f2f3dfedc2ba5561731fffa0e39b1bbb6 Mon Sep 17 00:00:00 2001 +From: Sargun Dhillon +Date: Mon, 30 Dec 2019 12:35:03 -0800 +Subject: samples/seccomp: Zero out members based on seccomp_notif_sizes + +From: Sargun Dhillon + +commit 771b894f2f3dfedc2ba5561731fffa0e39b1bbb6 upstream. + +The sizes by which seccomp_notif and seccomp_notif_resp are allocated are +based on the SECCOMP_GET_NOTIF_SIZES ioctl. This allows for graceful +extension of these datastructures. If userspace zeroes out the +datastructure based on its version, and it is lagging behind the kernel's +version, it will end up sending trailing garbage. On the other hand, +if it is ahead of the kernel version, it will write extra zero space, +and potentially cause corruption. + +Signed-off-by: Sargun Dhillon +Suggested-by: Tycho Andersen +Link: https://lore.kernel.org/r/20191230203503.4925-1-sargun@sargun.me +Fixes: fec7b6690541 ("samples: add an example of seccomp user trap") +Cc: stable@vger.kernel.org +Signed-off-by: Kees Cook +Signed-off-by: Greg Kroah-Hartman + +--- + samples/seccomp/user-trap.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/samples/seccomp/user-trap.c ++++ b/samples/seccomp/user-trap.c +@@ -298,14 +298,14 @@ int main(void) + req = malloc(sizes.seccomp_notif); + if (!req) + goto out_close; +- memset(req, 0, sizeof(*req)); + + resp = malloc(sizes.seccomp_notif_resp); + if (!resp) + goto out_req; +- memset(resp, 0, sizeof(*resp)); ++ memset(resp, 0, sizes.seccomp_notif_resp); + + while (1) { ++ memset(req, 0, sizes.seccomp_notif); + if (ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, req)) { + perror("ioctl recv"); + goto out_resp; diff --git a/queue-5.4/seccomp-check-that-seccomp_notif-is-zeroed-out-by-the-user.patch b/queue-5.4/seccomp-check-that-seccomp_notif-is-zeroed-out-by-the-user.patch new file mode 100644 index 00000000000..4a3aa6e0b07 --- /dev/null +++ b/queue-5.4/seccomp-check-that-seccomp_notif-is-zeroed-out-by-the-user.patch @@ -0,0 +1,46 @@ +From 2882d53c9c6f3b8311d225062522f03772cf0179 Mon Sep 17 00:00:00 2001 +From: Sargun Dhillon +Date: Sat, 28 Dec 2019 22:24:50 -0800 +Subject: seccomp: Check that seccomp_notif is zeroed out by the user + +From: Sargun Dhillon + +commit 2882d53c9c6f3b8311d225062522f03772cf0179 upstream. + +This patch is a small change in enforcement of the uapi for +SECCOMP_IOCTL_NOTIF_RECV ioctl. Specifically, the datastructure which +is passed (seccomp_notif) must be zeroed out. Previously any of its +members could be set to nonsense values, and we would ignore it. + +This ensures all fields are set to their zero value. + +Signed-off-by: Sargun Dhillon +Reviewed-by: Christian Brauner +Reviewed-by: Aleksa Sarai +Acked-by: Tycho Andersen +Link: https://lore.kernel.org/r/20191229062451.9467-2-sargun@sargun.me +Fixes: 6a21cc50f0c7 ("seccomp: add a return code to trap to userspace") +Cc: stable@vger.kernel.org +Signed-off-by: Kees Cook +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/seccomp.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +--- a/kernel/seccomp.c ++++ b/kernel/seccomp.c +@@ -1015,6 +1015,13 @@ static long seccomp_notify_recv(struct s + struct seccomp_notif unotif; + ssize_t ret; + ++ /* Verify that we're not given garbage to keep struct extensible. */ ++ ret = check_zeroed_user(buf, sizeof(unotif)); ++ if (ret < 0) ++ return ret; ++ if (!ret) ++ return -EINVAL; ++ + memset(&unotif, 0, sizeof(unotif)); + + ret = down_interruptible(&filter->notif->request); diff --git a/queue-5.4/selftests-seccomp-catch-garbage-on-seccomp_ioctl_notif_recv.patch b/queue-5.4/selftests-seccomp-catch-garbage-on-seccomp_ioctl_notif_recv.patch new file mode 100644 index 00000000000..51f1ff2ff73 --- /dev/null +++ b/queue-5.4/selftests-seccomp-catch-garbage-on-seccomp_ioctl_notif_recv.patch @@ -0,0 +1,48 @@ +From e4ab5ccc357b978999328fadae164e098c26fa40 Mon Sep 17 00:00:00 2001 +From: Sargun Dhillon +Date: Mon, 30 Dec 2019 12:38:11 -0800 +Subject: selftests/seccomp: Catch garbage on SECCOMP_IOCTL_NOTIF_RECV + +From: Sargun Dhillon + +commit e4ab5ccc357b978999328fadae164e098c26fa40 upstream. + +This adds logic to the user_notification_basic test to set a member +of struct seccomp_notif to an invalid value to ensure that the kernel +returns EINVAL if any of the struct seccomp_notif members are set to +invalid values. + +Signed-off-by: Sargun Dhillon +Suggested-by: Christian Brauner +Link: https://lore.kernel.org/r/20191230203811.4996-1-sargun@sargun.me +Fixes: 6a21cc50f0c7 ("seccomp: add a return code to trap to userspace") +Cc: stable@vger.kernel.org +Signed-off-by: Kees Cook +Signed-off-by: Greg Kroah-Hartman + +--- + tools/testing/selftests/seccomp/seccomp_bpf.c | 13 ++++++++++++- + 1 file changed, 12 insertions(+), 1 deletion(-) + +--- a/tools/testing/selftests/seccomp/seccomp_bpf.c ++++ b/tools/testing/selftests/seccomp/seccomp_bpf.c +@@ -3147,7 +3147,18 @@ TEST(user_notification_basic) + EXPECT_GT(poll(&pollfd, 1, -1), 0); + EXPECT_EQ(pollfd.revents, POLLIN); + +- EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); ++ /* Test that we can't pass garbage to the kernel. */ ++ memset(&req, 0, sizeof(req)); ++ req.pid = -1; ++ errno = 0; ++ ret = ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req); ++ EXPECT_EQ(-1, ret); ++ EXPECT_EQ(EINVAL, errno); ++ ++ if (ret) { ++ req.pid = 0; ++ EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); ++ } + + pollfd.fd = listener; + pollfd.events = POLLIN | POLLOUT; diff --git a/queue-5.4/selftests-seccomp-zero-out-seccomp_notif.patch b/queue-5.4/selftests-seccomp-zero-out-seccomp_notif.patch new file mode 100644 index 00000000000..a86c9590812 --- /dev/null +++ b/queue-5.4/selftests-seccomp-zero-out-seccomp_notif.patch @@ -0,0 +1,47 @@ +From 88c13f8bd71472fbab5338b01d99122908c77e53 Mon Sep 17 00:00:00 2001 +From: Sargun Dhillon +Date: Sat, 28 Dec 2019 22:24:49 -0800 +Subject: selftests/seccomp: Zero out seccomp_notif + +From: Sargun Dhillon + +commit 88c13f8bd71472fbab5338b01d99122908c77e53 upstream. + +The seccomp_notif structure should be zeroed out prior to calling the +SECCOMP_IOCTL_NOTIF_RECV ioctl. Previously, the kernel did not check +whether these structures were zeroed out or not, so these worked. + +This patch zeroes out the seccomp_notif data structure prior to calling +the ioctl. + +Signed-off-by: Sargun Dhillon +Reviewed-by: Tycho Andersen +Reviewed-by: Christian Brauner +Link: https://lore.kernel.org/r/20191229062451.9467-1-sargun@sargun.me +Fixes: 6a21cc50f0c7 ("seccomp: add a return code to trap to userspace") +Cc: stable@vger.kernel.org +Signed-off-by: Kees Cook +Signed-off-by: Greg Kroah-Hartman + +--- + tools/testing/selftests/seccomp/seccomp_bpf.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/tools/testing/selftests/seccomp/seccomp_bpf.c ++++ b/tools/testing/selftests/seccomp/seccomp_bpf.c +@@ -3267,6 +3267,7 @@ TEST(user_notification_signal) + + close(sk_pair[1]); + ++ memset(&req, 0, sizeof(req)); + EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); + + EXPECT_EQ(kill(pid, SIGUSR1), 0); +@@ -3285,6 +3286,7 @@ TEST(user_notification_signal) + EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), -1); + EXPECT_EQ(errno, ENOENT); + ++ memset(&req, 0, sizeof(req)); + EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); + + resp.id = req.id; diff --git a/queue-5.4/series b/queue-5.4/series index b7663dc6974..2d0d2807641 100644 --- a/queue-5.4/series +++ b/queue-5.4/series @@ -80,3 +80,25 @@ media-pulse8-cec-fix-lost-cec_transmit_attempt_done-call.patch media-cec-cec-2.0-only-bcast-messages-were-ignored.patch media-cec-avoid-decrementing-transmit_queue_sz-if-it-is-0.patch media-cec-check-transmit_in_progress-not-transmitting.patch +mm-memory_hotplug-shrink-zones-when-offlining-memory.patch +mm-zsmalloc.c-fix-the-migrated-zspage-statistics.patch +memcg-account-security-cred-as-well-to-kmemcg.patch +mm-move_pages-return-valid-node-id-in-status-if-the-page-is-already-on-the-target-node.patch +mm-oom-fix-pgtables-units-mismatch-in-killed-process-message.patch +ocfs2-fix-the-crash-due-to-call-ocfs2_get_dlm_debug-once-less.patch +pstore-ram-write-new-dumps-to-start-of-recycled-zones.patch +pstore-ram-fix-error-path-memory-leak-in-persistent_ram_new-callers.patch +gcc-plugins-make-it-possible-to-disable-config_gcc_plugins-again.patch +locks-print-unsigned-ino-in-proc-locks.patch +selftests-seccomp-zero-out-seccomp_notif.patch +seccomp-check-that-seccomp_notif-is-zeroed-out-by-the-user.patch +samples-seccomp-zero-out-members-based-on-seccomp_notif_sizes.patch +selftests-seccomp-catch-garbage-on-seccomp_ioctl_notif_recv.patch +dmaengine-fix-access-to-uninitialized-dma_slave_caps.patch +dmaengine-dma-jz4780-also-break-descriptor-chains-on-jz4725b.patch +btrfs-fix-infinite-loop-during-nocow-writeback-due-to-race.patch +block-fix-splitting-segments-on-boundary-masks.patch +compat_ioctl-block-handle-persistent-reservations.patch +compat_ioctl-block-handle-blkreportzone-blkresetzone.patch +compat_ioctl-block-handle-blkgetzonesz-blkgetnrzones.patch +bpf-fix-precision-tracking-for-unbounded-scalars.patch