From 549b7b82dc991566c0f5d44e1ffef06c5ac04153 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 9 May 2018 13:40:34 +0200 Subject: [PATCH] 4.9-stable patches added patches: ipvs-fix-rtnl_lock-lockups-caused-by-start_sync_thread.patch --- ...-lockups-caused-by-start_sync_thread.patch | 640 ++++++++++++++++++ 1 file changed, 640 insertions(+) create mode 100644 queue-4.9/ipvs-fix-rtnl_lock-lockups-caused-by-start_sync_thread.patch diff --git a/queue-4.9/ipvs-fix-rtnl_lock-lockups-caused-by-start_sync_thread.patch b/queue-4.9/ipvs-fix-rtnl_lock-lockups-caused-by-start_sync_thread.patch new file mode 100644 index 00000000000..e2b61d2ec31 --- /dev/null +++ b/queue-4.9/ipvs-fix-rtnl_lock-lockups-caused-by-start_sync_thread.patch @@ -0,0 +1,640 @@ +From 5c64576a77894a50be80be0024bed27171b55989 Mon Sep 17 00:00:00 2001 +From: Julian Anastasov +Date: Sat, 7 Apr 2018 15:50:47 +0300 +Subject: ipvs: fix rtnl_lock lockups caused by start_sync_thread + +From: Julian Anastasov + +commit 5c64576a77894a50be80be0024bed27171b55989 upstream. + +syzkaller reports for wrong rtnl_lock usage in sync code [1] and [2] + +We have 2 problems in start_sync_thread if error path is +taken, eg. on memory allocation error or failure to configure +sockets for mcast group or addr/port binding: + +1. recursive locking: holding rtnl_lock while calling sock_release +which in turn calls again rtnl_lock in ip_mc_drop_socket to leave +the mcast group, as noticed by Florian Westphal. Additionally, +sock_release can not be called while holding sync_mutex (ABBA +deadlock). + +2. task hung: holding rtnl_lock while calling kthread_stop to +stop the running kthreads. As the kthreads do the same to leave +the mcast group (sock_release -> ip_mc_drop_socket -> rtnl_lock) +they hang. + +Fix the problems by calling rtnl_unlock early in the error path, +now sock_release is called after unlocking both mutexes. + +Problem 3 (task hung reported by syzkaller [2]) is variant of +problem 2: use _trylock to prevent one user to call rtnl_lock and +then while waiting for sync_mutex to block kthreads that execute +sock_release when they are stopped by stop_sync_thread. + +[1] +IPVS: stopping backup sync thread 4500 ... +WARNING: possible recursive locking detected +4.16.0-rc7+ #3 Not tainted +-------------------------------------------- +syzkaller688027/4497 is trying to acquire lock: + (rtnl_mutex){+.+.}, at: [<00000000bb14d7fb>] rtnl_lock+0x17/0x20 +net/core/rtnetlink.c:74 + +but task is already holding lock: +IPVS: stopping backup sync thread 4495 ... + (rtnl_mutex){+.+.}, at: [<00000000bb14d7fb>] rtnl_lock+0x17/0x20 +net/core/rtnetlink.c:74 + +other info that might help us debug this: + Possible unsafe locking scenario: + + CPU0 + ---- + lock(rtnl_mutex); + lock(rtnl_mutex); + + *** DEADLOCK *** + + May be due to missing lock nesting notation + +2 locks held by syzkaller688027/4497: + #0: (rtnl_mutex){+.+.}, at: [<00000000bb14d7fb>] rtnl_lock+0x17/0x20 +net/core/rtnetlink.c:74 + #1: (ipvs->sync_mutex){+.+.}, at: [<00000000703f78e3>] +do_ip_vs_set_ctl+0x10f8/0x1cc0 net/netfilter/ipvs/ip_vs_ctl.c:2388 + +stack backtrace: +CPU: 1 PID: 4497 Comm: syzkaller688027 Not tainted 4.16.0-rc7+ #3 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS +Google 01/01/2011 +Call Trace: + __dump_stack lib/dump_stack.c:17 [inline] + dump_stack+0x194/0x24d lib/dump_stack.c:53 + print_deadlock_bug kernel/locking/lockdep.c:1761 [inline] + check_deadlock kernel/locking/lockdep.c:1805 [inline] + validate_chain kernel/locking/lockdep.c:2401 [inline] + __lock_acquire+0xe8f/0x3e00 kernel/locking/lockdep.c:3431 + lock_acquire+0x1d5/0x580 kernel/locking/lockdep.c:3920 + __mutex_lock_common kernel/locking/mutex.c:756 [inline] + __mutex_lock+0x16f/0x1a80 kernel/locking/mutex.c:893 + mutex_lock_nested+0x16/0x20 kernel/locking/mutex.c:908 + rtnl_lock+0x17/0x20 net/core/rtnetlink.c:74 + ip_mc_drop_socket+0x88/0x230 net/ipv4/igmp.c:2643 + inet_release+0x4e/0x1c0 net/ipv4/af_inet.c:413 + sock_release+0x8d/0x1e0 net/socket.c:595 + start_sync_thread+0x2213/0x2b70 net/netfilter/ipvs/ip_vs_sync.c:1924 + do_ip_vs_set_ctl+0x1139/0x1cc0 net/netfilter/ipvs/ip_vs_ctl.c:2389 + nf_sockopt net/netfilter/nf_sockopt.c:106 [inline] + nf_setsockopt+0x67/0xc0 net/netfilter/nf_sockopt.c:115 + ip_setsockopt+0x97/0xa0 net/ipv4/ip_sockglue.c:1261 + udp_setsockopt+0x45/0x80 net/ipv4/udp.c:2406 + sock_common_setsockopt+0x95/0xd0 net/core/sock.c:2975 + SYSC_setsockopt net/socket.c:1849 [inline] + SyS_setsockopt+0x189/0x360 net/socket.c:1828 + do_syscall_64+0x281/0x940 arch/x86/entry/common.c:287 + entry_SYSCALL_64_after_hwframe+0x42/0xb7 +RIP: 0033:0x446a69 +RSP: 002b:00007fa1c3a64da8 EFLAGS: 00000246 ORIG_RAX: 0000000000000036 +RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 0000000000446a69 +RDX: 000000000000048b RSI: 0000000000000000 RDI: 0000000000000003 +RBP: 00000000006e29fc R08: 0000000000000018 R09: 0000000000000000 +R10: 00000000200000c0 R11: 0000000000000246 R12: 00000000006e29f8 +R13: 00676e697279656b R14: 00007fa1c3a659c0 R15: 00000000006e2b60 + +[2] +IPVS: sync thread started: state = BACKUP, mcast_ifn = syz_tun, syncid = 4, +id = 0 +IPVS: stopping backup sync thread 25415 ... +INFO: task syz-executor7:25421 blocked for more than 120 seconds. + Not tainted 4.16.0-rc6+ #284 +"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. +syz-executor7 D23688 25421 4408 0x00000004 +Call Trace: + context_switch kernel/sched/core.c:2862 [inline] + __schedule+0x8fb/0x1ec0 kernel/sched/core.c:3440 + schedule+0xf5/0x430 kernel/sched/core.c:3499 + schedule_timeout+0x1a3/0x230 kernel/time/timer.c:1777 + do_wait_for_common kernel/sched/completion.c:86 [inline] + __wait_for_common kernel/sched/completion.c:107 [inline] + wait_for_common kernel/sched/completion.c:118 [inline] + wait_for_completion+0x415/0x770 kernel/sched/completion.c:139 + kthread_stop+0x14a/0x7a0 kernel/kthread.c:530 + stop_sync_thread+0x3d9/0x740 net/netfilter/ipvs/ip_vs_sync.c:1996 + do_ip_vs_set_ctl+0x2b1/0x1cc0 net/netfilter/ipvs/ip_vs_ctl.c:2394 + nf_sockopt net/netfilter/nf_sockopt.c:106 [inline] + nf_setsockopt+0x67/0xc0 net/netfilter/nf_sockopt.c:115 + ip_setsockopt+0x97/0xa0 net/ipv4/ip_sockglue.c:1253 + sctp_setsockopt+0x2ca/0x63e0 net/sctp/socket.c:4154 + sock_common_setsockopt+0x95/0xd0 net/core/sock.c:3039 + SYSC_setsockopt net/socket.c:1850 [inline] + SyS_setsockopt+0x189/0x360 net/socket.c:1829 + do_syscall_64+0x281/0x940 arch/x86/entry/common.c:287 + entry_SYSCALL_64_after_hwframe+0x42/0xb7 +RIP: 0033:0x454889 +RSP: 002b:00007fc927626c68 EFLAGS: 00000246 ORIG_RAX: 0000000000000036 +RAX: ffffffffffffffda RBX: 00007fc9276276d4 RCX: 0000000000454889 +RDX: 000000000000048c RSI: 0000000000000000 RDI: 0000000000000017 +RBP: 000000000072bf58 R08: 0000000000000018 R09: 0000000000000000 +R10: 0000000020000000 R11: 0000000000000246 R12: 00000000ffffffff +R13: 000000000000051c R14: 00000000006f9b40 R15: 0000000000000001 + +Showing all locks held in the system: +2 locks held by khungtaskd/868: + #0: (rcu_read_lock){....}, at: [<00000000a1a8f002>] +check_hung_uninterruptible_tasks kernel/hung_task.c:175 [inline] + #0: (rcu_read_lock){....}, at: [<00000000a1a8f002>] watchdog+0x1c5/0xd60 +kernel/hung_task.c:249 + #1: (tasklist_lock){.+.+}, at: [<0000000037c2f8f9>] +debug_show_all_locks+0xd3/0x3d0 kernel/locking/lockdep.c:4470 +1 lock held by rsyslogd/4247: + #0: (&f->f_pos_lock){+.+.}, at: [<000000000d8d6983>] +__fdget_pos+0x12b/0x190 fs/file.c:765 +2 locks held by getty/4338: + #0: (&tty->ldisc_sem){++++}, at: [<00000000bee98654>] +ldsem_down_read+0x37/0x40 drivers/tty/tty_ldsem.c:365 + #1: (&ldata->atomic_read_lock){+.+.}, at: [<00000000c1d180aa>] +n_tty_read+0x2ef/0x1a40 drivers/tty/n_tty.c:2131 +2 locks held by getty/4339: + #0: (&tty->ldisc_sem){++++}, at: [<00000000bee98654>] +ldsem_down_read+0x37/0x40 drivers/tty/tty_ldsem.c:365 + #1: (&ldata->atomic_read_lock){+.+.}, at: [<00000000c1d180aa>] +n_tty_read+0x2ef/0x1a40 drivers/tty/n_tty.c:2131 +2 locks held by getty/4340: + #0: (&tty->ldisc_sem){++++}, at: [<00000000bee98654>] +ldsem_down_read+0x37/0x40 drivers/tty/tty_ldsem.c:365 + #1: (&ldata->atomic_read_lock){+.+.}, at: [<00000000c1d180aa>] +n_tty_read+0x2ef/0x1a40 drivers/tty/n_tty.c:2131 +2 locks held by getty/4341: + #0: (&tty->ldisc_sem){++++}, at: [<00000000bee98654>] +ldsem_down_read+0x37/0x40 drivers/tty/tty_ldsem.c:365 + #1: (&ldata->atomic_read_lock){+.+.}, at: [<00000000c1d180aa>] +n_tty_read+0x2ef/0x1a40 drivers/tty/n_tty.c:2131 +2 locks held by getty/4342: + #0: (&tty->ldisc_sem){++++}, at: [<00000000bee98654>] +ldsem_down_read+0x37/0x40 drivers/tty/tty_ldsem.c:365 + #1: (&ldata->atomic_read_lock){+.+.}, at: [<00000000c1d180aa>] +n_tty_read+0x2ef/0x1a40 drivers/tty/n_tty.c:2131 +2 locks held by getty/4343: + #0: (&tty->ldisc_sem){++++}, at: [<00000000bee98654>] +ldsem_down_read+0x37/0x40 drivers/tty/tty_ldsem.c:365 + #1: (&ldata->atomic_read_lock){+.+.}, at: [<00000000c1d180aa>] +n_tty_read+0x2ef/0x1a40 drivers/tty/n_tty.c:2131 +2 locks held by getty/4344: + #0: (&tty->ldisc_sem){++++}, at: [<00000000bee98654>] +ldsem_down_read+0x37/0x40 drivers/tty/tty_ldsem.c:365 + #1: (&ldata->atomic_read_lock){+.+.}, at: [<00000000c1d180aa>] +n_tty_read+0x2ef/0x1a40 drivers/tty/n_tty.c:2131 +3 locks held by kworker/0:5/6494: + #0: ((wq_completion)"%s"("ipv6_addrconf")){+.+.}, at: +[<00000000a062b18e>] work_static include/linux/workqueue.h:198 [inline] + #0: ((wq_completion)"%s"("ipv6_addrconf")){+.+.}, at: +[<00000000a062b18e>] set_work_data kernel/workqueue.c:619 [inline] + #0: ((wq_completion)"%s"("ipv6_addrconf")){+.+.}, at: +[<00000000a062b18e>] set_work_pool_and_clear_pending kernel/workqueue.c:646 +[inline] + #0: ((wq_completion)"%s"("ipv6_addrconf")){+.+.}, at: +[<00000000a062b18e>] process_one_work+0xb12/0x1bb0 kernel/workqueue.c:2084 + #1: ((addr_chk_work).work){+.+.}, at: [<00000000278427d5>] +process_one_work+0xb89/0x1bb0 kernel/workqueue.c:2088 + #2: (rtnl_mutex){+.+.}, at: [<00000000066e35ac>] rtnl_lock+0x17/0x20 +net/core/rtnetlink.c:74 +1 lock held by syz-executor7/25421: + #0: (ipvs->sync_mutex){+.+.}, at: [<00000000d414a689>] +do_ip_vs_set_ctl+0x277/0x1cc0 net/netfilter/ipvs/ip_vs_ctl.c:2393 +2 locks held by syz-executor7/25427: + #0: (rtnl_mutex){+.+.}, at: [<00000000066e35ac>] rtnl_lock+0x17/0x20 +net/core/rtnetlink.c:74 + #1: (ipvs->sync_mutex){+.+.}, at: [<00000000e6d48489>] +do_ip_vs_set_ctl+0x10f8/0x1cc0 net/netfilter/ipvs/ip_vs_ctl.c:2388 +1 lock held by syz-executor7/25435: + #0: (rtnl_mutex){+.+.}, at: [<00000000066e35ac>] rtnl_lock+0x17/0x20 +net/core/rtnetlink.c:74 +1 lock held by ipvs-b:2:0/25415: + #0: (rtnl_mutex){+.+.}, at: [<00000000066e35ac>] rtnl_lock+0x17/0x20 +net/core/rtnetlink.c:74 + +Reported-and-tested-by: syzbot+a46d6abf9d56b1365a72@syzkaller.appspotmail.com +Reported-and-tested-by: syzbot+5fe074c01b2032ce9618@syzkaller.appspotmail.com +Fixes: e0b26cc997d5 ("ipvs: call rtnl_lock early") +Signed-off-by: Julian Anastasov +Signed-off-by: Simon Horman +Signed-off-by: Pablo Neira Ayuso +Cc: Zubin Mithra +Cc: Guenter Roeck +Signed-off-by: Greg Kroah-Hartman + +--- + net/netfilter/ipvs/ip_vs_ctl.c | 8 -- + net/netfilter/ipvs/ip_vs_sync.c | 155 ++++++++++++++++++++-------------------- + 2 files changed, 80 insertions(+), 83 deletions(-) + +--- a/net/netfilter/ipvs/ip_vs_ctl.c ++++ b/net/netfilter/ipvs/ip_vs_ctl.c +@@ -2393,11 +2393,7 @@ do_ip_vs_set_ctl(struct sock *sk, int cm + strlcpy(cfg.mcast_ifn, dm->mcast_ifn, + sizeof(cfg.mcast_ifn)); + cfg.syncid = dm->syncid; +- rtnl_lock(); +- mutex_lock(&ipvs->sync_mutex); + ret = start_sync_thread(ipvs, &cfg, dm->state); +- mutex_unlock(&ipvs->sync_mutex); +- rtnl_unlock(); + } else { + mutex_lock(&ipvs->sync_mutex); + ret = stop_sync_thread(ipvs, dm->state); +@@ -3495,12 +3491,8 @@ static int ip_vs_genl_new_daemon(struct + if (ipvs->mixed_address_family_dests > 0) + return -EINVAL; + +- rtnl_lock(); +- mutex_lock(&ipvs->sync_mutex); + ret = start_sync_thread(ipvs, &c, + nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE])); +- mutex_unlock(&ipvs->sync_mutex); +- rtnl_unlock(); + return ret; + } + +--- a/net/netfilter/ipvs/ip_vs_sync.c ++++ b/net/netfilter/ipvs/ip_vs_sync.c +@@ -48,6 +48,7 @@ + #include + #include + #include ++#include + + #include /* Used for ntoh_seq and hton_seq */ + +@@ -1359,15 +1360,9 @@ static void set_mcast_pmtudisc(struct so + /* + * Specifiy default interface for outgoing multicasts + */ +-static int set_mcast_if(struct sock *sk, char *ifname) ++static int set_mcast_if(struct sock *sk, struct net_device *dev) + { +- struct net_device *dev; + struct inet_sock *inet = inet_sk(sk); +- struct net *net = sock_net(sk); +- +- dev = __dev_get_by_name(net, ifname); +- if (!dev) +- return -ENODEV; + + if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if) + return -EINVAL; +@@ -1395,19 +1390,14 @@ static int set_mcast_if(struct sock *sk, + * in the in_addr structure passed in as a parameter. + */ + static int +-join_mcast_group(struct sock *sk, struct in_addr *addr, char *ifname) ++join_mcast_group(struct sock *sk, struct in_addr *addr, struct net_device *dev) + { +- struct net *net = sock_net(sk); + struct ip_mreqn mreq; +- struct net_device *dev; + int ret; + + memset(&mreq, 0, sizeof(mreq)); + memcpy(&mreq.imr_multiaddr, addr, sizeof(struct in_addr)); + +- dev = __dev_get_by_name(net, ifname); +- if (!dev) +- return -ENODEV; + if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if) + return -EINVAL; + +@@ -1422,15 +1412,10 @@ join_mcast_group(struct sock *sk, struct + + #ifdef CONFIG_IP_VS_IPV6 + static int join_mcast_group6(struct sock *sk, struct in6_addr *addr, +- char *ifname) ++ struct net_device *dev) + { +- struct net *net = sock_net(sk); +- struct net_device *dev; + int ret; + +- dev = __dev_get_by_name(net, ifname); +- if (!dev) +- return -ENODEV; + if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if) + return -EINVAL; + +@@ -1442,24 +1427,18 @@ static int join_mcast_group6(struct sock + } + #endif + +-static int bind_mcastif_addr(struct socket *sock, char *ifname) ++static int bind_mcastif_addr(struct socket *sock, struct net_device *dev) + { +- struct net *net = sock_net(sock->sk); +- struct net_device *dev; + __be32 addr; + struct sockaddr_in sin; + +- dev = __dev_get_by_name(net, ifname); +- if (!dev) +- return -ENODEV; +- + addr = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE); + if (!addr) + pr_err("You probably need to specify IP address on " + "multicast interface.\n"); + + IP_VS_DBG(7, "binding socket with (%s) %pI4\n", +- ifname, &addr); ++ dev->name, &addr); + + /* Now bind the socket with the address of multicast interface */ + sin.sin_family = AF_INET; +@@ -1492,7 +1471,8 @@ static void get_mcast_sockaddr(union ipv + /* + * Set up sending multicast socket over UDP + */ +-static struct socket *make_send_sock(struct netns_ipvs *ipvs, int id) ++static int make_send_sock(struct netns_ipvs *ipvs, int id, ++ struct net_device *dev, struct socket **sock_ret) + { + /* multicast addr */ + union ipvs_sockaddr mcast_addr; +@@ -1504,9 +1484,10 @@ static struct socket *make_send_sock(str + IPPROTO_UDP, &sock); + if (result < 0) { + pr_err("Error during creation of socket; terminating\n"); +- return ERR_PTR(result); ++ goto error; + } +- result = set_mcast_if(sock->sk, ipvs->mcfg.mcast_ifn); ++ *sock_ret = sock; ++ result = set_mcast_if(sock->sk, dev); + if (result < 0) { + pr_err("Error setting outbound mcast interface\n"); + goto error; +@@ -1521,7 +1502,7 @@ static struct socket *make_send_sock(str + set_sock_size(sock->sk, 1, result); + + if (AF_INET == ipvs->mcfg.mcast_af) +- result = bind_mcastif_addr(sock, ipvs->mcfg.mcast_ifn); ++ result = bind_mcastif_addr(sock, dev); + else + result = 0; + if (result < 0) { +@@ -1537,19 +1518,18 @@ static struct socket *make_send_sock(str + goto error; + } + +- return sock; ++ return 0; + + error: +- sock_release(sock); +- return ERR_PTR(result); ++ return result; + } + + + /* + * Set up receiving multicast socket over UDP + */ +-static struct socket *make_receive_sock(struct netns_ipvs *ipvs, int id, +- int ifindex) ++static int make_receive_sock(struct netns_ipvs *ipvs, int id, ++ struct net_device *dev, struct socket **sock_ret) + { + /* multicast addr */ + union ipvs_sockaddr mcast_addr; +@@ -1561,8 +1541,9 @@ static struct socket *make_receive_sock( + IPPROTO_UDP, &sock); + if (result < 0) { + pr_err("Error during creation of socket; terminating\n"); +- return ERR_PTR(result); ++ goto error; + } ++ *sock_ret = sock; + /* it is equivalent to the REUSEADDR option in user-space */ + sock->sk->sk_reuse = SK_CAN_REUSE; + result = sysctl_sync_sock_size(ipvs); +@@ -1570,7 +1551,7 @@ static struct socket *make_receive_sock( + set_sock_size(sock->sk, 0, result); + + get_mcast_sockaddr(&mcast_addr, &salen, &ipvs->bcfg, id); +- sock->sk->sk_bound_dev_if = ifindex; ++ sock->sk->sk_bound_dev_if = dev->ifindex; + result = sock->ops->bind(sock, (struct sockaddr *)&mcast_addr, salen); + if (result < 0) { + pr_err("Error binding to the multicast addr\n"); +@@ -1581,21 +1562,20 @@ static struct socket *make_receive_sock( + #ifdef CONFIG_IP_VS_IPV6 + if (ipvs->bcfg.mcast_af == AF_INET6) + result = join_mcast_group6(sock->sk, &mcast_addr.in6.sin6_addr, +- ipvs->bcfg.mcast_ifn); ++ dev); + else + #endif + result = join_mcast_group(sock->sk, &mcast_addr.in.sin_addr, +- ipvs->bcfg.mcast_ifn); ++ dev); + if (result < 0) { + pr_err("Error joining to the multicast group\n"); + goto error; + } + +- return sock; ++ return 0; + + error: +- sock_release(sock); +- return ERR_PTR(result); ++ return result; + } + + +@@ -1780,13 +1760,12 @@ static int sync_thread_backup(void *data + int start_sync_thread(struct netns_ipvs *ipvs, struct ipvs_sync_daemon_cfg *c, + int state) + { +- struct ip_vs_sync_thread_data *tinfo; ++ struct ip_vs_sync_thread_data *tinfo = NULL; + struct task_struct **array = NULL, *task; +- struct socket *sock; + struct net_device *dev; + char *name; + int (*threadfn)(void *data); +- int id, count, hlen; ++ int id = 0, count, hlen; + int result = -ENOMEM; + u16 mtu, min_mtu; + +@@ -1794,6 +1773,18 @@ int start_sync_thread(struct netns_ipvs + IP_VS_DBG(7, "Each ip_vs_sync_conn entry needs %Zd bytes\n", + sizeof(struct ip_vs_sync_conn_v0)); + ++ /* Do not hold one mutex and then to block on another */ ++ for (;;) { ++ rtnl_lock(); ++ if (mutex_trylock(&ipvs->sync_mutex)) ++ break; ++ rtnl_unlock(); ++ mutex_lock(&ipvs->sync_mutex); ++ if (rtnl_trylock()) ++ break; ++ mutex_unlock(&ipvs->sync_mutex); ++ } ++ + if (!ipvs->sync_state) { + count = clamp(sysctl_sync_ports(ipvs), 1, IPVS_SYNC_PORTS_MAX); + ipvs->threads_mask = count - 1; +@@ -1812,7 +1803,8 @@ int start_sync_thread(struct netns_ipvs + dev = __dev_get_by_name(ipvs->net, c->mcast_ifn); + if (!dev) { + pr_err("Unknown mcast interface: %s\n", c->mcast_ifn); +- return -ENODEV; ++ result = -ENODEV; ++ goto out_early; + } + hlen = (AF_INET6 == c->mcast_af) ? + sizeof(struct ipv6hdr) + sizeof(struct udphdr) : +@@ -1829,26 +1821,30 @@ int start_sync_thread(struct netns_ipvs + c->sync_maxlen = mtu - hlen; + + if (state == IP_VS_STATE_MASTER) { ++ result = -EEXIST; + if (ipvs->ms) +- return -EEXIST; ++ goto out_early; + + ipvs->mcfg = *c; + name = "ipvs-m:%d:%d"; + threadfn = sync_thread_master; + } else if (state == IP_VS_STATE_BACKUP) { ++ result = -EEXIST; + if (ipvs->backup_threads) +- return -EEXIST; ++ goto out_early; + + ipvs->bcfg = *c; + name = "ipvs-b:%d:%d"; + threadfn = sync_thread_backup; + } else { +- return -EINVAL; ++ result = -EINVAL; ++ goto out_early; + } + + if (state == IP_VS_STATE_MASTER) { + struct ipvs_master_sync_state *ms; + ++ result = -ENOMEM; + ipvs->ms = kzalloc(count * sizeof(ipvs->ms[0]), GFP_KERNEL); + if (!ipvs->ms) + goto out; +@@ -1864,39 +1860,38 @@ int start_sync_thread(struct netns_ipvs + } else { + array = kzalloc(count * sizeof(struct task_struct *), + GFP_KERNEL); ++ result = -ENOMEM; + if (!array) + goto out; + } + +- tinfo = NULL; + for (id = 0; id < count; id++) { +- if (state == IP_VS_STATE_MASTER) +- sock = make_send_sock(ipvs, id); +- else +- sock = make_receive_sock(ipvs, id, dev->ifindex); +- if (IS_ERR(sock)) { +- result = PTR_ERR(sock); +- goto outtinfo; +- } ++ result = -ENOMEM; + tinfo = kmalloc(sizeof(*tinfo), GFP_KERNEL); + if (!tinfo) +- goto outsocket; ++ goto out; + tinfo->ipvs = ipvs; +- tinfo->sock = sock; ++ tinfo->sock = NULL; + if (state == IP_VS_STATE_BACKUP) { + tinfo->buf = kmalloc(ipvs->bcfg.sync_maxlen, + GFP_KERNEL); + if (!tinfo->buf) +- goto outtinfo; ++ goto out; + } else { + tinfo->buf = NULL; + } + tinfo->id = id; ++ if (state == IP_VS_STATE_MASTER) ++ result = make_send_sock(ipvs, id, dev, &tinfo->sock); ++ else ++ result = make_receive_sock(ipvs, id, dev, &tinfo->sock); ++ if (result < 0) ++ goto out; + + task = kthread_run(threadfn, tinfo, name, ipvs->gen, id); + if (IS_ERR(task)) { + result = PTR_ERR(task); +- goto outtinfo; ++ goto out; + } + tinfo = NULL; + if (state == IP_VS_STATE_MASTER) +@@ -1913,20 +1908,20 @@ int start_sync_thread(struct netns_ipvs + ipvs->sync_state |= state; + spin_unlock_bh(&ipvs->sync_buff_lock); + ++ mutex_unlock(&ipvs->sync_mutex); ++ rtnl_unlock(); ++ + /* increase the module use count */ + ip_vs_use_count_inc(); + + return 0; + +-outsocket: +- sock_release(sock); +- +-outtinfo: +- if (tinfo) { +- sock_release(tinfo->sock); +- kfree(tinfo->buf); +- kfree(tinfo); +- } ++out: ++ /* We do not need RTNL lock anymore, release it here so that ++ * sock_release below and in the kthreads can use rtnl_lock ++ * to leave the mcast group. ++ */ ++ rtnl_unlock(); + count = id; + while (count-- > 0) { + if (state == IP_VS_STATE_MASTER) +@@ -1934,13 +1929,23 @@ outtinfo: + else + kthread_stop(array[count]); + } +- kfree(array); +- +-out: + if (!(ipvs->sync_state & IP_VS_STATE_MASTER)) { + kfree(ipvs->ms); + ipvs->ms = NULL; + } ++ mutex_unlock(&ipvs->sync_mutex); ++ if (tinfo) { ++ if (tinfo->sock) ++ sock_release(tinfo->sock); ++ kfree(tinfo->buf); ++ kfree(tinfo); ++ } ++ kfree(array); ++ return result; ++ ++out_early: ++ mutex_unlock(&ipvs->sync_mutex); ++ rtnl_unlock(); + return result; + } + -- 2.47.3