]>
Commit | Line | Data |
---|---|---|
d1fcd44f GKH |
1 | From e1c3743e1a20647c53b719dbf28b48f45d23f2cd Mon Sep 17 00:00:00 2001 |
2 | From: Breno Leitao <leitao@debian.org> | |
3 | Date: Wed, 21 Nov 2018 17:21:09 -0200 | |
4 | Subject: powerpc/tm: Set MSR[TS] just prior to recheckpoint | |
5 | ||
6 | From: Breno Leitao <leitao@debian.org> | |
7 | ||
8 | commit e1c3743e1a20647c53b719dbf28b48f45d23f2cd upstream. | |
9 | ||
10 | On a signal handler return, the user could set a context with MSR[TS] bits | |
11 | set, and these bits would be copied to task regs->msr. | |
12 | ||
13 | At restore_tm_sigcontexts(), after current task regs->msr[TS] bits are set, | |
14 | several __get_user() are called and then a recheckpoint is executed. | |
15 | ||
16 | This is a problem since a page fault (in kernel space) could happen when | |
17 | calling __get_user(). If it happens, the process MSR[TS] bits were | |
18 | already set, but recheckpoint was not executed, and SPRs are still invalid. | |
19 | ||
20 | The page fault can cause the current process to be de-scheduled, with | |
21 | MSR[TS] active and without tm_recheckpoint() being called. More | |
22 | importantly, without TEXASR[FS] bit set also. | |
23 | ||
24 | Since TEXASR might not have the FS bit set, and when the process is | |
25 | scheduled back, it will try to reclaim, which will be aborted because of | |
26 | the CPU is not in the suspended state, and, then, recheckpoint. This | |
27 | recheckpoint will restore thread->texasr into TEXASR SPR, which might be | |
28 | zero, hitting a BUG_ON(). | |
29 | ||
30 | kernel BUG at /build/linux-sf3Co9/linux-4.9.30/arch/powerpc/kernel/tm.S:434! | |
31 | cpu 0xb: Vector: 700 (Program Check) at [c00000041f1576d0] | |
32 | pc: c000000000054550: restore_gprs+0xb0/0x180 | |
33 | lr: 0000000000000000 | |
34 | sp: c00000041f157950 | |
35 | msr: 8000000100021033 | |
36 | current = 0xc00000041f143000 | |
37 | paca = 0xc00000000fb86300 softe: 0 irq_happened: 0x01 | |
38 | pid = 1021, comm = kworker/11:1 | |
39 | kernel BUG at /build/linux-sf3Co9/linux-4.9.30/arch/powerpc/kernel/tm.S:434! | |
40 | Linux version 4.9.0-3-powerpc64le (debian-kernel@lists.debian.org) (gcc version 6.3.0 20170516 (Debian 6.3.0-18) ) #1 SMP Debian 4.9.30-2+deb9u2 (2017-06-26) | |
41 | enter ? for help | |
42 | [c00000041f157b30] c00000000001bc3c tm_recheckpoint.part.11+0x6c/0xa0 | |
43 | [c00000041f157b70] c00000000001d184 __switch_to+0x1e4/0x4c0 | |
44 | [c00000041f157bd0] c00000000082eeb8 __schedule+0x2f8/0x990 | |
45 | [c00000041f157cb0] c00000000082f598 schedule+0x48/0xc0 | |
46 | [c00000041f157ce0] c0000000000f0d28 worker_thread+0x148/0x610 | |
47 | [c00000041f157d80] c0000000000f96b0 kthread+0x120/0x140 | |
48 | [c00000041f157e30] c00000000000c0e0 ret_from_kernel_thread+0x5c/0x7c | |
49 | ||
50 | This patch simply delays the MSR[TS] set, so, if there is any page fault in | |
51 | the __get_user() section, it does not have regs->msr[TS] set, since the TM | |
52 | structures are still invalid, thus avoiding doing TM operations for | |
53 | in-kernel exceptions and possible process reschedule. | |
54 | ||
55 | With this patch, the MSR[TS] will only be set just before recheckpointing | |
56 | and setting TEXASR[FS] = 1, thus avoiding an interrupt with TM registers in | |
57 | invalid state. | |
58 | ||
59 | Other than that, if CONFIG_PREEMPT is set, there might be a preemption just | |
60 | after setting MSR[TS] and before tm_recheckpoint(), thus, this block must | |
61 | be atomic from a preemption perspective, thus, calling | |
62 | preempt_disable/enable() on this code. | |
63 | ||
64 | It is not possible to move tm_recheckpoint to happen earlier, because it is | |
65 | required to get the checkpointed registers from userspace, with | |
66 | __get_user(), thus, the only way to avoid this undesired behavior is | |
67 | delaying the MSR[TS] set. | |
68 | ||
69 | The 32-bits signal handler seems to be safe this current issue, but, it | |
70 | might be exposed to the preemption issue, thus, disabling preemption in | |
71 | this chunk of code. | |
72 | ||
73 | Changes from v2: | |
74 | * Run the critical section with preempt_disable. | |
75 | ||
76 | Fixes: 87b4e5393af7 ("powerpc/tm: Fix return of active 64bit signals") | |
77 | Cc: stable@vger.kernel.org (v3.9+) | |
78 | Signed-off-by: Breno Leitao <leitao@debian.org> | |
79 | Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> | |
80 | Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> | |
81 | ||
82 | --- | |
83 | arch/powerpc/kernel/signal_32.c | 20 +++++++++++++++++- | |
84 | arch/powerpc/kernel/signal_64.c | 44 +++++++++++++++++++++++++++------------- | |
85 | 2 files changed, 49 insertions(+), 15 deletions(-) | |
86 | ||
87 | --- a/arch/powerpc/kernel/signal_32.c | |
88 | +++ b/arch/powerpc/kernel/signal_32.c | |
89 | @@ -866,7 +866,23 @@ static long restore_tm_user_regs(struct | |
90 | /* If TM bits are set to the reserved value, it's an invalid context */ | |
91 | if (MSR_TM_RESV(msr_hi)) | |
92 | return 1; | |
93 | - /* Pull in the MSR TM bits from the user context */ | |
94 | + | |
95 | + /* | |
96 | + * Disabling preemption, since it is unsafe to be preempted | |
97 | + * with MSR[TS] set without recheckpointing. | |
98 | + */ | |
99 | + preempt_disable(); | |
100 | + | |
101 | + /* | |
102 | + * CAUTION: | |
103 | + * After regs->MSR[TS] being updated, make sure that get_user(), | |
104 | + * put_user() or similar functions are *not* called. These | |
105 | + * functions can generate page faults which will cause the process | |
106 | + * to be de-scheduled with MSR[TS] set but without calling | |
107 | + * tm_recheckpoint(). This can cause a bug. | |
108 | + * | |
109 | + * Pull in the MSR TM bits from the user context | |
110 | + */ | |
111 | regs->msr = (regs->msr & ~MSR_TS_MASK) | (msr_hi & MSR_TS_MASK); | |
112 | /* Now, recheckpoint. This loads up all of the checkpointed (older) | |
113 | * registers, including FP and V[S]Rs. After recheckpointing, the | |
114 | @@ -891,6 +907,8 @@ static long restore_tm_user_regs(struct | |
115 | } | |
116 | #endif | |
117 | ||
118 | + preempt_enable(); | |
119 | + | |
120 | return 0; | |
121 | } | |
122 | #endif | |
123 | --- a/arch/powerpc/kernel/signal_64.c | |
124 | +++ b/arch/powerpc/kernel/signal_64.c | |
125 | @@ -452,20 +452,6 @@ static long restore_tm_sigcontexts(struc | |
126 | if (MSR_TM_RESV(msr)) | |
127 | return -EINVAL; | |
128 | ||
129 | - /* pull in MSR TS bits from user context */ | |
130 | - regs->msr = (regs->msr & ~MSR_TS_MASK) | (msr & MSR_TS_MASK); | |
131 | - | |
132 | - /* | |
133 | - * Ensure that TM is enabled in regs->msr before we leave the signal | |
134 | - * handler. It could be the case that (a) user disabled the TM bit | |
135 | - * through the manipulation of the MSR bits in uc_mcontext or (b) the | |
136 | - * TM bit was disabled because a sufficient number of context switches | |
137 | - * happened whilst in the signal handler and load_tm overflowed, | |
138 | - * disabling the TM bit. In either case we can end up with an illegal | |
139 | - * TM state leading to a TM Bad Thing when we return to userspace. | |
140 | - */ | |
141 | - regs->msr |= MSR_TM; | |
142 | - | |
143 | /* pull in MSR LE from user context */ | |
144 | regs->msr = (regs->msr & ~MSR_LE) | (msr & MSR_LE); | |
145 | ||
146 | @@ -557,6 +543,34 @@ static long restore_tm_sigcontexts(struc | |
147 | tm_enable(); | |
148 | /* Make sure the transaction is marked as failed */ | |
149 | tsk->thread.tm_texasr |= TEXASR_FS; | |
150 | + | |
151 | + /* | |
152 | + * Disabling preemption, since it is unsafe to be preempted | |
153 | + * with MSR[TS] set without recheckpointing. | |
154 | + */ | |
155 | + preempt_disable(); | |
156 | + | |
157 | + /* pull in MSR TS bits from user context */ | |
158 | + regs->msr = (regs->msr & ~MSR_TS_MASK) | (msr & MSR_TS_MASK); | |
159 | + | |
160 | + /* | |
161 | + * Ensure that TM is enabled in regs->msr before we leave the signal | |
162 | + * handler. It could be the case that (a) user disabled the TM bit | |
163 | + * through the manipulation of the MSR bits in uc_mcontext or (b) the | |
164 | + * TM bit was disabled because a sufficient number of context switches | |
165 | + * happened whilst in the signal handler and load_tm overflowed, | |
166 | + * disabling the TM bit. In either case we can end up with an illegal | |
167 | + * TM state leading to a TM Bad Thing when we return to userspace. | |
168 | + * | |
169 | + * CAUTION: | |
170 | + * After regs->MSR[TS] being updated, make sure that get_user(), | |
171 | + * put_user() or similar functions are *not* called. These | |
172 | + * functions can generate page faults which will cause the process | |
173 | + * to be de-scheduled with MSR[TS] set but without calling | |
174 | + * tm_recheckpoint(). This can cause a bug. | |
175 | + */ | |
176 | + regs->msr |= MSR_TM; | |
177 | + | |
178 | /* This loads the checkpointed FP/VEC state, if used */ | |
179 | tm_recheckpoint(&tsk->thread, msr); | |
180 | ||
181 | @@ -570,6 +584,8 @@ static long restore_tm_sigcontexts(struc | |
182 | regs->msr |= MSR_VEC; | |
183 | } | |
184 | ||
185 | + preempt_enable(); | |
186 | + | |
187 | return err; | |
188 | } | |
189 | #endif |