]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/blob
676411ada9d7a263f79f71d89df8695bd72799d6
[thirdparty/kernel/stable-queue.git] /
1 From 3d7fed4ad8ccb691d217efbb0f934e6a4df5ef91 Mon Sep 17 00:00:00 2001
2 From: Jane Chu <jane.chu@oracle.com>
3 Date: Mon, 14 Oct 2019 14:12:29 -0700
4 Subject: mm/memory-failure: poison read receives SIGKILL instead of SIGBUS if mmaped more than once
5
6 From: Jane Chu <jane.chu@oracle.com>
7
8 commit 3d7fed4ad8ccb691d217efbb0f934e6a4df5ef91 upstream.
9
10 Mmap /dev/dax more than once, then read the poison location using
11 address from one of the mappings. The other mappings due to not having
12 the page mapped in will cause SIGKILLs delivered to the process.
13 SIGKILL succeeds over SIGBUS, so user process loses the opportunity to
14 handle the UE.
15
16 Although one may add MAP_POPULATE to mmap(2) to work around the issue,
17 MAP_POPULATE makes mapping 128GB of pmem several magnitudes slower, so
18 isn't always an option.
19
20 Details -
21
22 ndctl inject-error --block=10 --count=1 namespace6.0
23
24 ./read_poison -x dax6.0 -o 5120 -m 2
25 mmaped address 0x7f5bb6600000
26 mmaped address 0x7f3cf3600000
27 doing local read at address 0x7f3cf3601400
28 Killed
29
30 Console messages in instrumented kernel -
31
32 mce: Uncorrected hardware memory error in user-access at edbe201400
33 Memory failure: tk->addr = 7f5bb6601000
34 Memory failure: address edbe201: call dev_pagemap_mapping_shift
35 dev_pagemap_mapping_shift: page edbe201: no PUD
36 Memory failure: tk->size_shift == 0
37 Memory failure: Unable to find user space address edbe201 in read_poison
38 Memory failure: tk->addr = 7f3cf3601000
39 Memory failure: address edbe201: call dev_pagemap_mapping_shift
40 Memory failure: tk->size_shift = 21
41 Memory failure: 0xedbe201: forcibly killing read_poison:22434 because of failure to unmap corrupted page
42 => to deliver SIGKILL
43 Memory failure: 0xedbe201: Killing read_poison:22434 due to hardware memory corruption
44 => to deliver SIGBUS
45
46 Link: http://lkml.kernel.org/r/1565112345-28754-3-git-send-email-jane.chu@oracle.com
47 Signed-off-by: Jane Chu <jane.chu@oracle.com>
48 Suggested-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
49 Reviewed-by: Dan Williams <dan.j.williams@intel.com>
50 Acked-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
51 Cc: Michal Hocko <mhocko@kernel.org>
52 Cc: <stable@vger.kernel.org>
53 Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
54 Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
55 Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
56
57 ---
58 mm/memory-failure.c | 22 +++++++++++++---------
59 1 file changed, 13 insertions(+), 9 deletions(-)
60
61 --- a/mm/memory-failure.c
62 +++ b/mm/memory-failure.c
63 @@ -202,7 +202,6 @@ struct to_kill {
64 struct task_struct *tsk;
65 unsigned long addr;
66 short size_shift;
67 - char addr_valid;
68 };
69
70 /*
71 @@ -327,22 +326,27 @@ static void add_to_kill(struct task_stru
72 }
73 }
74 tk->addr = page_address_in_vma(p, vma);
75 - tk->addr_valid = 1;
76 if (is_zone_device_page(p))
77 tk->size_shift = dev_pagemap_mapping_shift(p, vma);
78 else
79 tk->size_shift = compound_order(compound_head(p)) + PAGE_SHIFT;
80
81 /*
82 - * In theory we don't have to kill when the page was
83 - * munmaped. But it could be also a mremap. Since that's
84 - * likely very rare kill anyways just out of paranoia, but use
85 - * a SIGKILL because the error is not contained anymore.
86 + * Send SIGKILL if "tk->addr == -EFAULT". Also, as
87 + * "tk->size_shift" is always non-zero for !is_zone_device_page(),
88 + * so "tk->size_shift == 0" effectively checks no mapping on
89 + * ZONE_DEVICE. Indeed, when a devdax page is mmapped N times
90 + * to a process' address space, it's possible not all N VMAs
91 + * contain mappings for the page, but at least one VMA does.
92 + * Only deliver SIGBUS with payload derived from the VMA that
93 + * has a mapping for the page.
94 */
95 - if (tk->addr == -EFAULT || tk->size_shift == 0) {
96 + if (tk->addr == -EFAULT) {
97 pr_info("Memory failure: Unable to find user space address %lx in %s\n",
98 page_to_pfn(p), tsk->comm);
99 - tk->addr_valid = 0;
100 + } else if (tk->size_shift == 0) {
101 + kfree(tk);
102 + return;
103 }
104 get_task_struct(tsk);
105 tk->tsk = tsk;
106 @@ -369,7 +373,7 @@ static void kill_procs(struct list_head
107 * make sure the process doesn't catch the
108 * signal and then access the memory. Just kill it.
109 */
110 - if (fail || tk->addr_valid == 0) {
111 + if (fail || tk->addr == -EFAULT) {
112 pr_err("Memory failure: %#lx: forcibly killing %s:%d because of failure to unmap corrupted page\n",
113 pfn, tk->tsk->comm, tk->tsk->pid);
114 do_send_sig_info(SIGKILL, SEND_SIG_PRIV,