]>
Commit | Line | Data |
---|---|---|
96f81b03 GKH |
1 | From 18fa84a2db0e15b02baa5d94bdb5bd509175d2f6 Mon Sep 17 00:00:00 2001 |
2 | From: Tejun Heo <tj@kernel.org> | |
3 | Date: Wed, 29 May 2019 13:46:25 -0700 | |
4 | Subject: cgroup: Use css_tryget() instead of css_tryget_online() in task_get_css() | |
5 | ||
6 | From: Tejun Heo <tj@kernel.org> | |
7 | ||
8 | commit 18fa84a2db0e15b02baa5d94bdb5bd509175d2f6 upstream. | |
9 | ||
10 | A PF_EXITING task can stay associated with an offline css. If such | |
11 | task calls task_get_css(), it can get stuck indefinitely. This can be | |
12 | triggered by BSD process accounting which writes to a file with | |
13 | PF_EXITING set when racing against memcg disable as in the backtrace | |
14 | at the end. | |
15 | ||
16 | After this change, task_get_css() may return a css which was already | |
17 | offline when the function was called. None of the existing users are | |
18 | affected by this change. | |
19 | ||
20 | INFO: rcu_sched self-detected stall on CPU | |
21 | INFO: rcu_sched detected stalls on CPUs/tasks: | |
22 | ... | |
23 | NMI backtrace for cpu 0 | |
24 | ... | |
25 | Call Trace: | |
26 | <IRQ> | |
27 | dump_stack+0x46/0x68 | |
28 | nmi_cpu_backtrace.cold.2+0x13/0x57 | |
29 | nmi_trigger_cpumask_backtrace+0xba/0xca | |
30 | rcu_dump_cpu_stacks+0x9e/0xce | |
31 | rcu_check_callbacks.cold.74+0x2af/0x433 | |
32 | update_process_times+0x28/0x60 | |
33 | tick_sched_timer+0x34/0x70 | |
34 | __hrtimer_run_queues+0xee/0x250 | |
35 | hrtimer_interrupt+0xf4/0x210 | |
36 | smp_apic_timer_interrupt+0x56/0x110 | |
37 | apic_timer_interrupt+0xf/0x20 | |
38 | </IRQ> | |
39 | RIP: 0010:balance_dirty_pages_ratelimited+0x28f/0x3d0 | |
40 | ... | |
41 | btrfs_file_write_iter+0x31b/0x563 | |
42 | __vfs_write+0xfa/0x140 | |
43 | __kernel_write+0x4f/0x100 | |
44 | do_acct_process+0x495/0x580 | |
45 | acct_process+0xb9/0xdb | |
46 | do_exit+0x748/0xa00 | |
47 | do_group_exit+0x3a/0xa0 | |
48 | get_signal+0x254/0x560 | |
49 | do_signal+0x23/0x5c0 | |
50 | exit_to_usermode_loop+0x5d/0xa0 | |
51 | prepare_exit_to_usermode+0x53/0x80 | |
52 | retint_user+0x8/0x8 | |
53 | ||
54 | Signed-off-by: Tejun Heo <tj@kernel.org> | |
55 | Cc: stable@vger.kernel.org # v4.2+ | |
56 | Fixes: ec438699a9ae ("cgroup, block: implement task_get_css() and use it in bio_associate_current()") | |
57 | Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> | |
58 | ||
59 | --- | |
60 | include/linux/cgroup.h | 10 ++++++++-- | |
61 | 1 file changed, 8 insertions(+), 2 deletions(-) | |
62 | ||
63 | --- a/include/linux/cgroup.h | |
64 | +++ b/include/linux/cgroup.h | |
65 | @@ -462,7 +462,7 @@ static inline struct cgroup_subsys_state | |
66 | * | |
67 | * Find the css for the (@task, @subsys_id) combination, increment a | |
68 | * reference on and return it. This function is guaranteed to return a | |
69 | - * valid css. | |
70 | + * valid css. The returned css may already have been offlined. | |
71 | */ | |
72 | static inline struct cgroup_subsys_state * | |
73 | task_get_css(struct task_struct *task, int subsys_id) | |
74 | @@ -472,7 +472,13 @@ task_get_css(struct task_struct *task, i | |
75 | rcu_read_lock(); | |
76 | while (true) { | |
77 | css = task_css(task, subsys_id); | |
78 | - if (likely(css_tryget_online(css))) | |
79 | + /* | |
80 | + * Can't use css_tryget_online() here. A task which has | |
81 | + * PF_EXITING set may stay associated with an offline css. | |
82 | + * If such task calls this function, css_tryget_online() | |
83 | + * will keep failing. | |
84 | + */ | |
85 | + if (likely(css_tryget(css))) | |
86 | break; | |
87 | cpu_relax(); | |
88 | } |