]>
Commit | Line | Data |
---|---|---|
b2441318 | 1 | /* SPDX-License-Identifier: GPL-2.0 */ |
6e84f315 IM |
2 | #ifndef _LINUX_SCHED_MM_H |
3 | #define _LINUX_SCHED_MM_H | |
4 | ||
b8d6d80b IM |
5 | #include <linux/kernel.h> |
6 | #include <linux/atomic.h> | |
6e84f315 | 7 | #include <linux/sched.h> |
589ee628 | 8 | #include <linux/mm_types.h> |
fd771233 | 9 | #include <linux/gfp.h> |
70216e18 | 10 | #include <linux/sync_core.h> |
6e84f315 | 11 | |
68e21be2 IM |
12 | /* |
13 | * Routines for handling mm_structs | |
14 | */ | |
d70f2a14 | 15 | extern struct mm_struct *mm_alloc(void); |
68e21be2 IM |
16 | |
17 | /** | |
18 | * mmgrab() - Pin a &struct mm_struct. | |
19 | * @mm: The &struct mm_struct to pin. | |
20 | * | |
21 | * Make sure that @mm will not get freed even after the owning task | |
22 | * exits. This doesn't guarantee that the associated address space | |
23 | * will still exist later on and mmget_not_zero() has to be used before | |
24 | * accessing it. | |
25 | * | |
26 | * This is a preferred way to to pin @mm for a longer/unbounded amount | |
27 | * of time. | |
28 | * | |
29 | * Use mmdrop() to release the reference acquired by mmgrab(). | |
30 | * | |
ad56b738 | 31 | * See also <Documentation/vm/active_mm.rst> for an in-depth explanation |
68e21be2 IM |
32 | * of &mm_struct.mm_count vs &mm_struct.mm_users. |
33 | */ | |
34 | static inline void mmgrab(struct mm_struct *mm) | |
35 | { | |
36 | atomic_inc(&mm->mm_count); | |
37 | } | |
38 | ||
d34bc48f AM |
39 | extern void __mmdrop(struct mm_struct *mm); |
40 | ||
41 | static inline void mmdrop(struct mm_struct *mm) | |
42 | { | |
43 | /* | |
44 | * The implicit full barrier implied by atomic_dec_and_test() is | |
45 | * required by the membarrier system call before returning to | |
46 | * user-space, after storing to rq->curr. | |
47 | */ | |
48 | if (unlikely(atomic_dec_and_test(&mm->mm_count))) | |
49 | __mmdrop(mm); | |
50 | } | |
68e21be2 | 51 | |
04f5866e AA |
52 | /* |
53 | * This has to be called after a get_task_mm()/mmget_not_zero() | |
54 | * followed by taking the mmap_sem for writing before modifying the | |
55 | * vmas or anything the coredump pretends not to change from under it. | |
56 | * | |
59ea6d06 AA |
57 | * It also has to be called when mmgrab() is used in the context of |
58 | * the process, but then the mm_count refcount is transferred outside | |
59 | * the context of the process to run down_write() on that pinned mm. | |
60 | * | |
04f5866e AA |
61 | * NOTE: find_extend_vma() called from GUP context is the only place |
62 | * that can modify the "mm" (notably the vm_start/end) under mmap_sem | |
63 | * for reading and outside the context of the process, so it is also | |
64 | * the only case that holds the mmap_sem for reading that must call | |
65 | * this function. Generally if the mmap_sem is hold for reading | |
66 | * there's no need of this check after get_task_mm()/mmget_not_zero(). | |
67 | * | |
68 | * This function can be obsoleted and the check can be removed, after | |
69 | * the coredump code will hold the mmap_sem for writing before | |
70 | * invoking the ->core_dump methods. | |
71 | */ | |
72 | static inline bool mmget_still_valid(struct mm_struct *mm) | |
73 | { | |
74 | return likely(!mm->core_state); | |
75 | } | |
76 | ||
68e21be2 IM |
77 | /** |
78 | * mmget() - Pin the address space associated with a &struct mm_struct. | |
79 | * @mm: The address space to pin. | |
80 | * | |
81 | * Make sure that the address space of the given &struct mm_struct doesn't | |
82 | * go away. This does not protect against parts of the address space being | |
83 | * modified or freed, however. | |
84 | * | |
85 | * Never use this function to pin this address space for an | |
86 | * unbounded/indefinite amount of time. | |
87 | * | |
88 | * Use mmput() to release the reference acquired by mmget(). | |
89 | * | |
ad56b738 | 90 | * See also <Documentation/vm/active_mm.rst> for an in-depth explanation |
68e21be2 IM |
91 | * of &mm_struct.mm_count vs &mm_struct.mm_users. |
92 | */ | |
93 | static inline void mmget(struct mm_struct *mm) | |
94 | { | |
95 | atomic_inc(&mm->mm_users); | |
96 | } | |
97 | ||
98 | static inline bool mmget_not_zero(struct mm_struct *mm) | |
99 | { | |
100 | return atomic_inc_not_zero(&mm->mm_users); | |
101 | } | |
102 | ||
103 | /* mmput gets rid of the mappings and all user-space */ | |
104 | extern void mmput(struct mm_struct *); | |
a1b2289c SY |
105 | #ifdef CONFIG_MMU |
106 | /* same as above but performs the slow path from the async context. Can | |
107 | * be called from the atomic context as well | |
108 | */ | |
109 | void mmput_async(struct mm_struct *); | |
110 | #endif | |
68e21be2 IM |
111 | |
112 | /* Grab a reference to a task's mm, if it is not already going away */ | |
113 | extern struct mm_struct *get_task_mm(struct task_struct *task); | |
114 | /* | |
115 | * Grab a reference to a task's mm, if it is not already going away | |
116 | * and ptrace_may_access with the mode parameter passed to it | |
117 | * succeeds. | |
118 | */ | |
119 | extern struct mm_struct *mm_access(struct task_struct *task, unsigned int mode); | |
120 | /* Remove the current tasks stale references to the old mm_struct */ | |
121 | extern void mm_release(struct task_struct *, struct mm_struct *); | |
122 | ||
4240c8bf IM |
123 | #ifdef CONFIG_MEMCG |
124 | extern void mm_update_next_owner(struct mm_struct *mm); | |
125 | #else | |
126 | static inline void mm_update_next_owner(struct mm_struct *mm) | |
127 | { | |
128 | } | |
129 | #endif /* CONFIG_MEMCG */ | |
130 | ||
131 | #ifdef CONFIG_MMU | |
8f2af155 KC |
132 | extern void arch_pick_mmap_layout(struct mm_struct *mm, |
133 | struct rlimit *rlim_stack); | |
4240c8bf IM |
134 | extern unsigned long |
135 | arch_get_unmapped_area(struct file *, unsigned long, unsigned long, | |
136 | unsigned long, unsigned long); | |
137 | extern unsigned long | |
138 | arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr, | |
139 | unsigned long len, unsigned long pgoff, | |
140 | unsigned long flags); | |
141 | #else | |
8f2af155 KC |
142 | static inline void arch_pick_mmap_layout(struct mm_struct *mm, |
143 | struct rlimit *rlim_stack) {} | |
4240c8bf IM |
144 | #endif |
145 | ||
d026ce79 IM |
146 | static inline bool in_vfork(struct task_struct *tsk) |
147 | { | |
148 | bool ret; | |
149 | ||
150 | /* | |
151 | * need RCU to access ->real_parent if CLONE_VM was used along with | |
152 | * CLONE_PARENT. | |
153 | * | |
154 | * We check real_parent->mm == tsk->mm because CLONE_VFORK does not | |
155 | * imply CLONE_VM | |
156 | * | |
157 | * CLONE_VFORK can be used with CLONE_PARENT/CLONE_THREAD and thus | |
158 | * ->real_parent is not necessarily the task doing vfork(), so in | |
159 | * theory we can't rely on task_lock() if we want to dereference it. | |
160 | * | |
161 | * And in this case we can't trust the real_parent->mm == tsk->mm | |
162 | * check, it can be false negative. But we do not care, if init or | |
163 | * another oom-unkillable task does this it should blame itself. | |
164 | */ | |
165 | rcu_read_lock(); | |
166 | ret = tsk->vfork_done && tsk->real_parent->mm == tsk->mm; | |
167 | rcu_read_unlock(); | |
168 | ||
169 | return ret; | |
170 | } | |
171 | ||
7dea19f9 MH |
172 | /* |
173 | * Applies per-task gfp context to the given allocation flags. | |
174 | * PF_MEMALLOC_NOIO implies GFP_NOIO | |
175 | * PF_MEMALLOC_NOFS implies GFP_NOFS | |
d7fefcc8 | 176 | * PF_MEMALLOC_NOCMA implies no allocation from CMA region. |
74444eda | 177 | */ |
7dea19f9 | 178 | static inline gfp_t current_gfp_context(gfp_t flags) |
74444eda | 179 | { |
d7fefcc8 AK |
180 | if (unlikely(current->flags & |
181 | (PF_MEMALLOC_NOIO | PF_MEMALLOC_NOFS | PF_MEMALLOC_NOCMA))) { | |
182 | /* | |
183 | * NOIO implies both NOIO and NOFS and it is a weaker context | |
184 | * so always make sure it makes precedence | |
185 | */ | |
186 | if (current->flags & PF_MEMALLOC_NOIO) | |
187 | flags &= ~(__GFP_IO | __GFP_FS); | |
188 | else if (current->flags & PF_MEMALLOC_NOFS) | |
189 | flags &= ~__GFP_FS; | |
190 | #ifdef CONFIG_CMA | |
191 | if (current->flags & PF_MEMALLOC_NOCMA) | |
192 | flags &= ~__GFP_MOVABLE; | |
193 | #endif | |
194 | } | |
74444eda IM |
195 | return flags; |
196 | } | |
197 | ||
d92a8cfc | 198 | #ifdef CONFIG_LOCKDEP |
93781325 OS |
199 | extern void __fs_reclaim_acquire(void); |
200 | extern void __fs_reclaim_release(void); | |
d92a8cfc PZ |
201 | extern void fs_reclaim_acquire(gfp_t gfp_mask); |
202 | extern void fs_reclaim_release(gfp_t gfp_mask); | |
203 | #else | |
93781325 OS |
204 | static inline void __fs_reclaim_acquire(void) { } |
205 | static inline void __fs_reclaim_release(void) { } | |
d92a8cfc PZ |
206 | static inline void fs_reclaim_acquire(gfp_t gfp_mask) { } |
207 | static inline void fs_reclaim_release(gfp_t gfp_mask) { } | |
208 | #endif | |
209 | ||
46ca3599 MH |
210 | /** |
211 | * memalloc_noio_save - Marks implicit GFP_NOIO allocation scope. | |
212 | * | |
213 | * This functions marks the beginning of the GFP_NOIO allocation scope. | |
214 | * All further allocations will implicitly drop __GFP_IO flag and so | |
215 | * they are safe for the IO critical section from the allocation recursion | |
216 | * point of view. Use memalloc_noio_restore to end the scope with flags | |
217 | * returned by this function. | |
218 | * | |
219 | * This function is safe to be used from any context. | |
220 | */ | |
74444eda IM |
221 | static inline unsigned int memalloc_noio_save(void) |
222 | { | |
223 | unsigned int flags = current->flags & PF_MEMALLOC_NOIO; | |
224 | current->flags |= PF_MEMALLOC_NOIO; | |
225 | return flags; | |
226 | } | |
227 | ||
46ca3599 MH |
228 | /** |
229 | * memalloc_noio_restore - Ends the implicit GFP_NOIO scope. | |
230 | * @flags: Flags to restore. | |
231 | * | |
232 | * Ends the implicit GFP_NOIO scope started by memalloc_noio_save function. | |
233 | * Always make sure that that the given flags is the return value from the | |
234 | * pairing memalloc_noio_save call. | |
235 | */ | |
74444eda IM |
236 | static inline void memalloc_noio_restore(unsigned int flags) |
237 | { | |
238 | current->flags = (current->flags & ~PF_MEMALLOC_NOIO) | flags; | |
239 | } | |
240 | ||
46ca3599 MH |
241 | /** |
242 | * memalloc_nofs_save - Marks implicit GFP_NOFS allocation scope. | |
243 | * | |
244 | * This functions marks the beginning of the GFP_NOFS allocation scope. | |
245 | * All further allocations will implicitly drop __GFP_FS flag and so | |
246 | * they are safe for the FS critical section from the allocation recursion | |
247 | * point of view. Use memalloc_nofs_restore to end the scope with flags | |
248 | * returned by this function. | |
249 | * | |
250 | * This function is safe to be used from any context. | |
251 | */ | |
7dea19f9 MH |
252 | static inline unsigned int memalloc_nofs_save(void) |
253 | { | |
254 | unsigned int flags = current->flags & PF_MEMALLOC_NOFS; | |
255 | current->flags |= PF_MEMALLOC_NOFS; | |
256 | return flags; | |
257 | } | |
258 | ||
46ca3599 MH |
259 | /** |
260 | * memalloc_nofs_restore - Ends the implicit GFP_NOFS scope. | |
261 | * @flags: Flags to restore. | |
262 | * | |
263 | * Ends the implicit GFP_NOFS scope started by memalloc_nofs_save function. | |
264 | * Always make sure that that the given flags is the return value from the | |
265 | * pairing memalloc_nofs_save call. | |
266 | */ | |
7dea19f9 MH |
267 | static inline void memalloc_nofs_restore(unsigned int flags) |
268 | { | |
269 | current->flags = (current->flags & ~PF_MEMALLOC_NOFS) | flags; | |
270 | } | |
271 | ||
499118e9 VB |
272 | static inline unsigned int memalloc_noreclaim_save(void) |
273 | { | |
274 | unsigned int flags = current->flags & PF_MEMALLOC; | |
275 | current->flags |= PF_MEMALLOC; | |
276 | return flags; | |
277 | } | |
278 | ||
279 | static inline void memalloc_noreclaim_restore(unsigned int flags) | |
280 | { | |
281 | current->flags = (current->flags & ~PF_MEMALLOC) | flags; | |
282 | } | |
283 | ||
d7fefcc8 AK |
284 | #ifdef CONFIG_CMA |
285 | static inline unsigned int memalloc_nocma_save(void) | |
286 | { | |
287 | unsigned int flags = current->flags & PF_MEMALLOC_NOCMA; | |
288 | ||
289 | current->flags |= PF_MEMALLOC_NOCMA; | |
290 | return flags; | |
291 | } | |
292 | ||
293 | static inline void memalloc_nocma_restore(unsigned int flags) | |
294 | { | |
295 | current->flags = (current->flags & ~PF_MEMALLOC_NOCMA) | flags; | |
296 | } | |
297 | #else | |
298 | static inline unsigned int memalloc_nocma_save(void) | |
299 | { | |
300 | return 0; | |
301 | } | |
302 | ||
303 | static inline void memalloc_nocma_restore(unsigned int flags) | |
304 | { | |
305 | } | |
306 | #endif | |
307 | ||
d46eb14b SB |
308 | #ifdef CONFIG_MEMCG |
309 | /** | |
310 | * memalloc_use_memcg - Starts the remote memcg charging scope. | |
311 | * @memcg: memcg to charge. | |
312 | * | |
313 | * This function marks the beginning of the remote memcg charging scope. All the | |
314 | * __GFP_ACCOUNT allocations till the end of the scope will be charged to the | |
315 | * given memcg. | |
316 | * | |
317 | * NOTE: This function is not nesting safe. | |
318 | */ | |
319 | static inline void memalloc_use_memcg(struct mem_cgroup *memcg) | |
320 | { | |
321 | WARN_ON_ONCE(current->active_memcg); | |
322 | current->active_memcg = memcg; | |
323 | } | |
324 | ||
325 | /** | |
326 | * memalloc_unuse_memcg - Ends the remote memcg charging scope. | |
327 | * | |
328 | * This function marks the end of the remote memcg charging scope started by | |
329 | * memalloc_use_memcg(). | |
330 | */ | |
331 | static inline void memalloc_unuse_memcg(void) | |
332 | { | |
333 | current->active_memcg = NULL; | |
334 | } | |
335 | #else | |
336 | static inline void memalloc_use_memcg(struct mem_cgroup *memcg) | |
337 | { | |
338 | } | |
339 | ||
340 | static inline void memalloc_unuse_memcg(void) | |
341 | { | |
342 | } | |
343 | #endif | |
344 | ||
a961e409 MD |
345 | #ifdef CONFIG_MEMBARRIER |
346 | enum { | |
c5f58bd5 MD |
347 | MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY = (1U << 0), |
348 | MEMBARRIER_STATE_PRIVATE_EXPEDITED = (1U << 1), | |
349 | MEMBARRIER_STATE_GLOBAL_EXPEDITED_READY = (1U << 2), | |
350 | MEMBARRIER_STATE_GLOBAL_EXPEDITED = (1U << 3), | |
70216e18 MD |
351 | MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE_READY = (1U << 4), |
352 | MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE = (1U << 5), | |
353 | }; | |
354 | ||
355 | enum { | |
356 | MEMBARRIER_FLAG_SYNC_CORE = (1U << 0), | |
a961e409 MD |
357 | }; |
358 | ||
3ccfebed MD |
359 | #ifdef CONFIG_ARCH_HAS_MEMBARRIER_CALLBACKS |
360 | #include <asm/membarrier.h> | |
361 | #endif | |
362 | ||
70216e18 MD |
363 | static inline void membarrier_mm_sync_core_before_usermode(struct mm_struct *mm) |
364 | { | |
365 | if (likely(!(atomic_read(&mm->membarrier_state) & | |
366 | MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE))) | |
367 | return; | |
368 | sync_core_before_usermode(); | |
369 | } | |
370 | ||
a961e409 MD |
371 | static inline void membarrier_execve(struct task_struct *t) |
372 | { | |
373 | atomic_set(&t->mm->membarrier_state, 0); | |
374 | } | |
375 | #else | |
3ccfebed MD |
376 | #ifdef CONFIG_ARCH_HAS_MEMBARRIER_CALLBACKS |
377 | static inline void membarrier_arch_switch_mm(struct mm_struct *prev, | |
378 | struct mm_struct *next, | |
379 | struct task_struct *tsk) | |
380 | { | |
381 | } | |
382 | #endif | |
a961e409 MD |
383 | static inline void membarrier_execve(struct task_struct *t) |
384 | { | |
385 | } | |
70216e18 MD |
386 | static inline void membarrier_mm_sync_core_before_usermode(struct mm_struct *mm) |
387 | { | |
388 | } | |
a961e409 MD |
389 | #endif |
390 | ||
6e84f315 | 391 | #endif /* _LINUX_SCHED_MM_H */ |