]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/blob - queue-5.0/bpf-fix-use-after-free-in-bpf_evict_inode.patch
5.0-stable patches
[thirdparty/kernel/stable-queue.git] / queue-5.0 / bpf-fix-use-after-free-in-bpf_evict_inode.patch
1 From b455bc192f9acbd783f0474f6563365cd5a8886c Mon Sep 17 00:00:00 2001
2 From: Daniel Borkmann <daniel@iogearbox.net>
3 Date: Mon, 25 Mar 2019 15:54:43 +0100
4 Subject: bpf: fix use after free in bpf_evict_inode
5
6 [ Upstream commit 1da6c4d9140cb7c13e87667dc4e1488d6c8fc10f ]
7
8 syzkaller was able to generate the following UAF in bpf:
9
10 BUG: KASAN: use-after-free in lookup_last fs/namei.c:2269 [inline]
11 BUG: KASAN: use-after-free in path_lookupat.isra.43+0x9f8/0xc00 fs/namei.c:2318
12 Read of size 1 at addr ffff8801c4865c47 by task syz-executor2/9423
13
14 CPU: 0 PID: 9423 Comm: syz-executor2 Not tainted 4.20.0-rc1-next-20181109+
15 #110
16 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS
17 Google 01/01/2011
18 Call Trace:
19 __dump_stack lib/dump_stack.c:77 [inline]
20 dump_stack+0x244/0x39d lib/dump_stack.c:113
21 print_address_description.cold.7+0x9/0x1ff mm/kasan/report.c:256
22 kasan_report_error mm/kasan/report.c:354 [inline]
23 kasan_report.cold.8+0x242/0x309 mm/kasan/report.c:412
24 __asan_report_load1_noabort+0x14/0x20 mm/kasan/report.c:430
25 lookup_last fs/namei.c:2269 [inline]
26 path_lookupat.isra.43+0x9f8/0xc00 fs/namei.c:2318
27 filename_lookup+0x26a/0x520 fs/namei.c:2348
28 user_path_at_empty+0x40/0x50 fs/namei.c:2608
29 user_path include/linux/namei.h:62 [inline]
30 do_mount+0x180/0x1ff0 fs/namespace.c:2980
31 ksys_mount+0x12d/0x140 fs/namespace.c:3258
32 __do_sys_mount fs/namespace.c:3272 [inline]
33 __se_sys_mount fs/namespace.c:3269 [inline]
34 __x64_sys_mount+0xbe/0x150 fs/namespace.c:3269
35 do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290
36 entry_SYSCALL_64_after_hwframe+0x49/0xbe
37 RIP: 0033:0x457569
38 Code: fd b3 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7
39 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff
40 ff 0f 83 cb b3 fb ff c3 66 2e 0f 1f 84 00 00 00 00
41 RSP: 002b:00007fde6ed96c78 EFLAGS: 00000246 ORIG_RAX: 00000000000000a5
42 RAX: ffffffffffffffda RBX: 0000000000000005 RCX: 0000000000457569
43 RDX: 0000000020000040 RSI: 0000000020000000 RDI: 0000000000000000
44 RBP: 000000000072bf00 R08: 0000000020000340 R09: 0000000000000000
45 R10: 0000000000200000 R11: 0000000000000246 R12: 00007fde6ed976d4
46 R13: 00000000004c2c24 R14: 00000000004d4990 R15: 00000000ffffffff
47
48 Allocated by task 9424:
49 save_stack+0x43/0xd0 mm/kasan/kasan.c:448
50 set_track mm/kasan/kasan.c:460 [inline]
51 kasan_kmalloc+0xc7/0xe0 mm/kasan/kasan.c:553
52 __do_kmalloc mm/slab.c:3722 [inline]
53 __kmalloc_track_caller+0x157/0x760 mm/slab.c:3737
54 kstrdup+0x39/0x70 mm/util.c:49
55 bpf_symlink+0x26/0x140 kernel/bpf/inode.c:356
56 vfs_symlink+0x37a/0x5d0 fs/namei.c:4127
57 do_symlinkat+0x242/0x2d0 fs/namei.c:4154
58 __do_sys_symlink fs/namei.c:4173 [inline]
59 __se_sys_symlink fs/namei.c:4171 [inline]
60 __x64_sys_symlink+0x59/0x80 fs/namei.c:4171
61 do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290
62 entry_SYSCALL_64_after_hwframe+0x49/0xbe
63
64 Freed by task 9425:
65 save_stack+0x43/0xd0 mm/kasan/kasan.c:448
66 set_track mm/kasan/kasan.c:460 [inline]
67 __kasan_slab_free+0x102/0x150 mm/kasan/kasan.c:521
68 kasan_slab_free+0xe/0x10 mm/kasan/kasan.c:528
69 __cache_free mm/slab.c:3498 [inline]
70 kfree+0xcf/0x230 mm/slab.c:3817
71 bpf_evict_inode+0x11f/0x150 kernel/bpf/inode.c:565
72 evict+0x4b9/0x980 fs/inode.c:558
73 iput_final fs/inode.c:1550 [inline]
74 iput+0x674/0xa90 fs/inode.c:1576
75 do_unlinkat+0x733/0xa30 fs/namei.c:4069
76 __do_sys_unlink fs/namei.c:4110 [inline]
77 __se_sys_unlink fs/namei.c:4108 [inline]
78 __x64_sys_unlink+0x42/0x50 fs/namei.c:4108
79 do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290
80 entry_SYSCALL_64_after_hwframe+0x49/0xbe
81
82 In this scenario path lookup under RCU is racing with the final
83 unlink in case of symlinks. As Linus puts it in his analysis:
84
85 [...] We actually RCU-delay the inode freeing itself, but
86 when we do the final iput(), the "evict()" function is called
87 synchronously. Now, the simple fix would seem to just RCU-delay
88 the kfree() of the symlink data in bpf_evict_inode(). Maybe
89 that's the right thing to do. [...]
90
91 Al suggested to piggy-back on the ->destroy_inode() callback in
92 order to implement RCU deferral there which can then kfree() the
93 inode->i_link eventually right before putting inode back into
94 inode cache. By reusing free_inode_nonrcu() from there we can
95 avoid the need for our own inode cache and just reuse generic
96 one as we currently do.
97
98 And in-fact on top of all this we should just get rid of the
99 bpf_evict_inode() entirely. This means truncate_inode_pages_final()
100 and clear_inode() will then simply be called by the fs core via
101 evict(). Dropping the reference should really only be done when
102 inode is unhashed and nothing reachable anymore, so it's better
103 also moved into the final ->destroy_inode() callback.
104
105 Fixes: 0f98621bef5d ("bpf, inode: add support for symlinks and fix mtime/ctime")
106 Reported-by: syzbot+fb731ca573367b7f6564@syzkaller.appspotmail.com
107 Reported-by: syzbot+a13e5ead792d6df37818@syzkaller.appspotmail.com
108 Reported-by: syzbot+7a8ba368b47fdefca61e@syzkaller.appspotmail.com
109 Suggested-by: Al Viro <viro@zeniv.linux.org.uk>
110 Analyzed-by: Linus Torvalds <torvalds@linux-foundation.org>
111 Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
112 Acked-by: Alexei Starovoitov <ast@kernel.org>
113 Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
114 Acked-by: Al Viro <viro@zeniv.linux.org.uk>
115 Link: https://lore.kernel.org/lkml/0000000000006946d2057bbd0eef@google.com/T/
116 Signed-off-by: Sasha Levin (Microsoft) <sashal@kernel.org>
117 ---
118 kernel/bpf/inode.c | 32 ++++++++++++++++++--------------
119 1 file changed, 18 insertions(+), 14 deletions(-)
120
121 --- a/kernel/bpf/inode.c
122 +++ b/kernel/bpf/inode.c
123 @@ -554,19 +554,6 @@ struct bpf_prog *bpf_prog_get_type_path(
124 }
125 EXPORT_SYMBOL(bpf_prog_get_type_path);
126
127 -static void bpf_evict_inode(struct inode *inode)
128 -{
129 - enum bpf_type type;
130 -
131 - truncate_inode_pages_final(&inode->i_data);
132 - clear_inode(inode);
133 -
134 - if (S_ISLNK(inode->i_mode))
135 - kfree(inode->i_link);
136 - if (!bpf_inode_type(inode, &type))
137 - bpf_any_put(inode->i_private, type);
138 -}
139 -
140 /*
141 * Display the mount options in /proc/mounts.
142 */
143 @@ -579,11 +566,28 @@ static int bpf_show_options(struct seq_f
144 return 0;
145 }
146
147 +static void bpf_destroy_inode_deferred(struct rcu_head *head)
148 +{
149 + struct inode *inode = container_of(head, struct inode, i_rcu);
150 + enum bpf_type type;
151 +
152 + if (S_ISLNK(inode->i_mode))
153 + kfree(inode->i_link);
154 + if (!bpf_inode_type(inode, &type))
155 + bpf_any_put(inode->i_private, type);
156 + free_inode_nonrcu(inode);
157 +}
158 +
159 +static void bpf_destroy_inode(struct inode *inode)
160 +{
161 + call_rcu(&inode->i_rcu, bpf_destroy_inode_deferred);
162 +}
163 +
164 static const struct super_operations bpf_super_ops = {
165 .statfs = simple_statfs,
166 .drop_inode = generic_delete_inode,
167 .show_options = bpf_show_options,
168 - .evict_inode = bpf_evict_inode,
169 + .destroy_inode = bpf_destroy_inode,
170 };
171
172 enum {