1 From 05a7f4a8dff19999ca8a83a35ff4782689de7bfc Mon Sep 17 00:00:00 2001
2 From: Leon Romanovsky <leonro@nvidia.com>
3 Date: Thu, 29 Jul 2021 20:19:24 +0300
4 Subject: devlink: Break parameter notification sequence to be before/after unload/load driver
6 From: Leon Romanovsky <leonro@nvidia.com>
8 commit 05a7f4a8dff19999ca8a83a35ff4782689de7bfc upstream.
10 The change of namespaces during devlink reload calls to driver unload
11 before it accesses devlink parameters. The commands below causes to
12 use-after-free bug when trying to get flow steering mode.
15 * devlink dev reload pci/0000:00:09.0 netns n1
17 ==================================================================
18 BUG: KASAN: use-after-free in mlx5_devlink_fs_mode_get+0x96/0xa0 [mlx5_core]
19 Read of size 4 at addr ffff888009d04308 by task devlink/275
21 CPU: 6 PID: 275 Comm: devlink Not tainted 5.12.0-rc2+ #2853
22 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014
25 print_address_description.constprop.0+0x18/0x140
26 ? mlx5_devlink_fs_mode_get+0x96/0xa0 [mlx5_core]
27 ? mlx5_devlink_fs_mode_get+0x96/0xa0 [mlx5_core]
28 kasan_report.cold+0x7c/0xd8
29 ? mlx5_devlink_fs_mode_get+0x96/0xa0 [mlx5_core]
30 mlx5_devlink_fs_mode_get+0x96/0xa0 [mlx5_core]
31 devlink_nl_param_fill+0x1c8/0xe80
32 ? __free_pages_ok+0x37a/0x8a0
33 ? devlink_flash_update_timeout_notify+0xd0/0xd0
34 ? lock_acquire+0x1a9/0x6d0
35 ? fs_reclaim_acquire+0xb7/0x160
36 ? lock_is_held_type+0x98/0x110
38 ? lock_release+0x1f9/0x6c0
39 ? fs_reclaim_release+0xa1/0xf0
40 ? lock_downgrade+0x6d0/0x6d0
41 ? lock_is_held_type+0x98/0x110
42 ? lock_is_held_type+0x98/0x110
44 ? __build_skb_around+0x1f8/0x2b0
45 devlink_param_notify+0x6d/0x180
46 devlink_reload+0x1c3/0x520
47 ? devlink_remote_reload_actions_performed+0x30/0x30
48 ? mutex_trylock+0x24b/0x2d0
49 ? devlink_nl_cmd_reload+0x62b/0x1070
50 devlink_nl_cmd_reload+0x66d/0x1070
51 ? devlink_reload+0x520/0x520
52 ? devlink_get_from_attrs+0x1bc/0x260
53 ? devlink_nl_pre_doit+0x64/0x4d0
54 genl_family_rcv_msg_doit+0x1e9/0x2f0
55 ? mutex_lock_io_nested+0x1130/0x1130
56 ? genl_family_rcv_msg_attrs_parse.constprop.0+0x240/0x240
57 ? security_capable+0x51/0x90
58 genl_rcv_msg+0x27f/0x4a0
59 ? genl_get_cmd+0x3c0/0x3c0
60 ? lock_acquire+0x1a9/0x6d0
61 ? devlink_reload+0x520/0x520
62 ? lock_release+0x6c0/0x6c0
63 netlink_rcv_skb+0x11d/0x340
64 ? genl_get_cmd+0x3c0/0x3c0
65 ? netlink_ack+0x9f0/0x9f0
66 ? lock_release+0x1f9/0x6c0
68 netlink_unicast+0x433/0x700
69 ? netlink_attachskb+0x730/0x730
70 ? _copy_from_iter_full+0x178/0x650
71 ? __alloc_skb+0x113/0x2b0
72 netlink_sendmsg+0x6f1/0xbd0
73 ? netlink_unicast+0x700/0x700
74 ? lock_is_held_type+0x98/0x110
75 ? netlink_unicast+0x700/0x700
76 sock_sendmsg+0xb0/0xe0
77 __sys_sendto+0x193/0x240
78 ? __x64_sys_getpeername+0xb0/0xb0
79 ? do_sys_openat2+0x10b/0x370
80 ? __up_read+0x1a1/0x7b0
81 ? do_user_addr_fault+0x219/0xdc0
82 ? __x64_sys_openat+0x120/0x1d0
83 ? __x64_sys_open+0x1a0/0x1a0
84 __x64_sys_sendto+0xdd/0x1b0
85 ? syscall_enter_from_user_mode+0x1d/0x50
86 do_syscall_64+0x2d/0x40
87 entry_SYSCALL_64_after_hwframe+0x44/0xae
88 RIP: 0033:0x7fc69d0af14a
89 Code: d8 64 89 02 48 c7 c0 ff ff ff ff eb b8 0f 1f 00 f3 0f 1e fa 41 89 ca 64 8b 04 25 18 00 00 00 85 c0 75 15 b8 2c 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 76 c3 0f 1f 44 00 00 55 48 83 ec 30 44 89 4c
90 RSP: 002b:00007ffc1d8292f8 EFLAGS: 00000246 ORIG_RAX: 000000000000002c
91 RAX: ffffffffffffffda RBX: 0000000000000005 RCX: 00007fc69d0af14a
92 RDX: 0000000000000038 RSI: 0000555f57c56440 RDI: 0000000000000003
93 RBP: 0000555f57c56410 R08: 00007fc69d17b200 R09: 000000000000000c
94 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000
95 R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000
97 Allocated by task 146:
98 kasan_save_stack+0x1b/0x40
99 __kasan_kmalloc+0x99/0xc0
100 mlx5_init_fs+0xf0/0x1c50 [mlx5_core]
101 mlx5_load+0xd2/0x180 [mlx5_core]
102 mlx5_init_one+0x2f6/0x450 [mlx5_core]
103 probe_one+0x47d/0x6e0 [mlx5_core]
104 pci_device_probe+0x2a0/0x4a0
105 really_probe+0x20a/0xc90
106 driver_probe_device+0xd8/0x380
107 device_driver_attach+0x1df/0x250
108 __driver_attach+0xff/0x240
109 bus_for_each_dev+0x11e/0x1a0
110 bus_add_driver+0x309/0x570
111 driver_register+0x1ee/0x380
113 do_one_initcall+0xd5/0x410
114 do_init_module+0x1c8/0x760
115 load_module+0x6d8b/0x9650
116 __do_sys_finit_module+0x118/0x1b0
117 do_syscall_64+0x2d/0x40
118 entry_SYSCALL_64_after_hwframe+0x44/0xae
121 kasan_save_stack+0x1b/0x40
122 kasan_set_track+0x1c/0x30
123 kasan_set_free_info+0x20/0x30
124 __kasan_slab_free+0x102/0x140
125 slab_free_freelist_hook+0x74/0x1b0
127 mlx5_unload+0x16/0xb0 [mlx5_core]
128 mlx5_unload_one+0xae/0x120 [mlx5_core]
129 mlx5_devlink_reload_down+0x1bc/0x380 [mlx5_core]
130 devlink_reload+0x141/0x520
131 devlink_nl_cmd_reload+0x66d/0x1070
132 genl_family_rcv_msg_doit+0x1e9/0x2f0
133 genl_rcv_msg+0x27f/0x4a0
134 netlink_rcv_skb+0x11d/0x340
136 netlink_unicast+0x433/0x700
137 netlink_sendmsg+0x6f1/0xbd0
138 sock_sendmsg+0xb0/0xe0
139 __sys_sendto+0x193/0x240
140 __x64_sys_sendto+0xdd/0x1b0
141 do_syscall_64+0x2d/0x40
142 entry_SYSCALL_64_after_hwframe+0x44/0xae
144 The buggy address belongs to the object at ffff888009d04300
145 which belongs to the cache kmalloc-128 of size 128
146 The buggy address is located 8 bytes inside of
147 128-byte region [ffff888009d04300, ffff888009d04380)
148 The buggy address belongs to the page:
149 page:0000000086a64ecc refcount:1 mapcount:0 mapping:0000000000000000 index:0xffff888009d04000 pfn:0x9d04
150 head:0000000086a64ecc order:1 compound_mapcount:0
151 flags: 0x4000000000010200(slab|head)
152 raw: 4000000000010200 ffffea0000203980 0000000200000002 ffff8880050428c0
153 raw: ffff888009d04000 000000008020001d 00000001ffffffff 0000000000000000
154 page dumped because: kasan: bad access detected
156 Memory state around the buggy address:
157 ffff888009d04200: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
158 ffff888009d04280: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
159 >ffff888009d04300: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
161 ffff888009d04380: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
162 ffff888009d04400: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
163 ==================================================================
165 The right solution to devlink reload is to notify about deletion of
166 parameters, unload driver, change net namespaces, load driver and notify
167 about addition of parameters.
169 Fixes: 070c63f20f6c ("net: devlink: allow to change namespaces during reload")
170 Reviewed-by: Parav Pandit <parav@nvidia.com>
171 Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
172 Signed-off-by: Jakub Kicinski <kuba@kernel.org>
173 Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
175 net/core/devlink.c | 32 ++++++++++++++++++++------------
176 1 file changed, 20 insertions(+), 12 deletions(-)
178 --- a/net/core/devlink.c
179 +++ b/net/core/devlink.c
180 @@ -3801,10 +3801,12 @@ static void devlink_param_notify(struct
181 struct devlink_param_item *param_item,
182 enum devlink_command cmd);
184 -static void devlink_reload_netns_change(struct devlink *devlink,
185 - struct net *dest_net)
186 +static void devlink_ns_change_notify(struct devlink *devlink,
187 + struct net *dest_net, struct net *curr_net,
190 struct devlink_param_item *param_item;
191 + enum devlink_command cmd;
193 /* Userspace needs to be notified about devlink objects
194 * removed from original and entering new network namespace.
195 @@ -3812,17 +3814,18 @@ static void devlink_reload_netns_change(
196 * reload process so the notifications are generated separatelly.
199 - list_for_each_entry(param_item, &devlink->param_list, list)
200 - devlink_param_notify(devlink, 0, param_item,
201 - DEVLINK_CMD_PARAM_DEL);
202 - devlink_notify(devlink, DEVLINK_CMD_DEL);
203 + if (!dest_net || net_eq(dest_net, curr_net))
206 - __devlink_net_set(devlink, dest_net);
208 + devlink_notify(devlink, DEVLINK_CMD_NEW);
210 - devlink_notify(devlink, DEVLINK_CMD_NEW);
211 + cmd = new ? DEVLINK_CMD_PARAM_NEW : DEVLINK_CMD_PARAM_DEL;
212 list_for_each_entry(param_item, &devlink->param_list, list)
213 - devlink_param_notify(devlink, 0, param_item,
214 - DEVLINK_CMD_PARAM_NEW);
215 + devlink_param_notify(devlink, 0, param_item, cmd);
218 + devlink_notify(devlink, DEVLINK_CMD_DEL);
221 static bool devlink_reload_supported(const struct devlink_ops *ops)
222 @@ -3902,6 +3905,7 @@ static int devlink_reload(struct devlink
223 u32 *actions_performed, struct netlink_ext_ack *extack)
225 u32 remote_reload_stats[DEVLINK_RELOAD_STATS_ARRAY_SIZE];
226 + struct net *curr_net;
229 if (!devlink->reload_enabled)
230 @@ -3909,18 +3913,22 @@ static int devlink_reload(struct devlink
232 memcpy(remote_reload_stats, devlink->stats.remote_reload_stats,
233 sizeof(remote_reload_stats));
235 + curr_net = devlink_net(devlink);
236 + devlink_ns_change_notify(devlink, dest_net, curr_net, false);
237 err = devlink->ops->reload_down(devlink, !!dest_net, action, limit, extack);
241 - if (dest_net && !net_eq(dest_net, devlink_net(devlink)))
242 - devlink_reload_netns_change(devlink, dest_net);
243 + if (dest_net && !net_eq(dest_net, curr_net))
244 + __devlink_net_set(devlink, dest_net);
246 err = devlink->ops->reload_up(devlink, action, limit, actions_performed, extack);
247 devlink_reload_failed_set(devlink, !!err);
251 + devlink_ns_change_notify(devlink, dest_net, curr_net, true);
252 WARN_ON(!(*actions_performed & BIT(action)));
253 /* Catch driver on updating the remote action within devlink reload */
254 WARN_ON(memcmp(remote_reload_stats, devlink->stats.remote_reload_stats,