]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
4.9-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 9 May 2018 11:40:34 +0000 (13:40 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 9 May 2018 11:40:34 +0000 (13:40 +0200)
added patches:
ipvs-fix-rtnl_lock-lockups-caused-by-start_sync_thread.patch

queue-4.9/ipvs-fix-rtnl_lock-lockups-caused-by-start_sync_thread.patch [new file with mode: 0644]

diff --git a/queue-4.9/ipvs-fix-rtnl_lock-lockups-caused-by-start_sync_thread.patch b/queue-4.9/ipvs-fix-rtnl_lock-lockups-caused-by-start_sync_thread.patch
new file mode 100644 (file)
index 0000000..e2b61d2
--- /dev/null
@@ -0,0 +1,640 @@
+From 5c64576a77894a50be80be0024bed27171b55989 Mon Sep 17 00:00:00 2001
+From: Julian Anastasov <ja@ssi.bg>
+Date: Sat, 7 Apr 2018 15:50:47 +0300
+Subject: ipvs: fix rtnl_lock lockups caused by start_sync_thread
+
+From: Julian Anastasov <ja@ssi.bg>
+
+commit 5c64576a77894a50be80be0024bed27171b55989 upstream.
+
+syzkaller reports for wrong rtnl_lock usage in sync code [1] and [2]
+
+We have 2 problems in start_sync_thread if error path is
+taken, eg. on memory allocation error or failure to configure
+sockets for mcast group or addr/port binding:
+
+1. recursive locking: holding rtnl_lock while calling sock_release
+which in turn calls again rtnl_lock in ip_mc_drop_socket to leave
+the mcast group, as noticed by Florian Westphal. Additionally,
+sock_release can not be called while holding sync_mutex (ABBA
+deadlock).
+
+2. task hung: holding rtnl_lock while calling kthread_stop to
+stop the running kthreads. As the kthreads do the same to leave
+the mcast group (sock_release -> ip_mc_drop_socket -> rtnl_lock)
+they hang.
+
+Fix the problems by calling rtnl_unlock early in the error path,
+now sock_release is called after unlocking both mutexes.
+
+Problem 3 (task hung reported by syzkaller [2]) is variant of
+problem 2: use _trylock to prevent one user to call rtnl_lock and
+then while waiting for sync_mutex to block kthreads that execute
+sock_release when they are stopped by stop_sync_thread.
+
+[1]
+IPVS: stopping backup sync thread 4500 ...
+WARNING: possible recursive locking detected
+4.16.0-rc7+ #3 Not tainted
+--------------------------------------------
+syzkaller688027/4497 is trying to acquire lock:
+  (rtnl_mutex){+.+.}, at: [<00000000bb14d7fb>] rtnl_lock+0x17/0x20
+net/core/rtnetlink.c:74
+
+but task is already holding lock:
+IPVS: stopping backup sync thread 4495 ...
+  (rtnl_mutex){+.+.}, at: [<00000000bb14d7fb>] rtnl_lock+0x17/0x20
+net/core/rtnetlink.c:74
+
+other info that might help us debug this:
+  Possible unsafe locking scenario:
+
+        CPU0
+        ----
+   lock(rtnl_mutex);
+   lock(rtnl_mutex);
+
+  *** DEADLOCK ***
+
+  May be due to missing lock nesting notation
+
+2 locks held by syzkaller688027/4497:
+  #0:  (rtnl_mutex){+.+.}, at: [<00000000bb14d7fb>] rtnl_lock+0x17/0x20
+net/core/rtnetlink.c:74
+  #1:  (ipvs->sync_mutex){+.+.}, at: [<00000000703f78e3>]
+do_ip_vs_set_ctl+0x10f8/0x1cc0 net/netfilter/ipvs/ip_vs_ctl.c:2388
+
+stack backtrace:
+CPU: 1 PID: 4497 Comm: syzkaller688027 Not tainted 4.16.0-rc7+ #3
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS
+Google 01/01/2011
+Call Trace:
+  __dump_stack lib/dump_stack.c:17 [inline]
+  dump_stack+0x194/0x24d lib/dump_stack.c:53
+  print_deadlock_bug kernel/locking/lockdep.c:1761 [inline]
+  check_deadlock kernel/locking/lockdep.c:1805 [inline]
+  validate_chain kernel/locking/lockdep.c:2401 [inline]
+  __lock_acquire+0xe8f/0x3e00 kernel/locking/lockdep.c:3431
+  lock_acquire+0x1d5/0x580 kernel/locking/lockdep.c:3920
+  __mutex_lock_common kernel/locking/mutex.c:756 [inline]
+  __mutex_lock+0x16f/0x1a80 kernel/locking/mutex.c:893
+  mutex_lock_nested+0x16/0x20 kernel/locking/mutex.c:908
+  rtnl_lock+0x17/0x20 net/core/rtnetlink.c:74
+  ip_mc_drop_socket+0x88/0x230 net/ipv4/igmp.c:2643
+  inet_release+0x4e/0x1c0 net/ipv4/af_inet.c:413
+  sock_release+0x8d/0x1e0 net/socket.c:595
+  start_sync_thread+0x2213/0x2b70 net/netfilter/ipvs/ip_vs_sync.c:1924
+  do_ip_vs_set_ctl+0x1139/0x1cc0 net/netfilter/ipvs/ip_vs_ctl.c:2389
+  nf_sockopt net/netfilter/nf_sockopt.c:106 [inline]
+  nf_setsockopt+0x67/0xc0 net/netfilter/nf_sockopt.c:115
+  ip_setsockopt+0x97/0xa0 net/ipv4/ip_sockglue.c:1261
+  udp_setsockopt+0x45/0x80 net/ipv4/udp.c:2406
+  sock_common_setsockopt+0x95/0xd0 net/core/sock.c:2975
+  SYSC_setsockopt net/socket.c:1849 [inline]
+  SyS_setsockopt+0x189/0x360 net/socket.c:1828
+  do_syscall_64+0x281/0x940 arch/x86/entry/common.c:287
+  entry_SYSCALL_64_after_hwframe+0x42/0xb7
+RIP: 0033:0x446a69
+RSP: 002b:00007fa1c3a64da8 EFLAGS: 00000246 ORIG_RAX: 0000000000000036
+RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 0000000000446a69
+RDX: 000000000000048b RSI: 0000000000000000 RDI: 0000000000000003
+RBP: 00000000006e29fc R08: 0000000000000018 R09: 0000000000000000
+R10: 00000000200000c0 R11: 0000000000000246 R12: 00000000006e29f8
+R13: 00676e697279656b R14: 00007fa1c3a659c0 R15: 00000000006e2b60
+
+[2]
+IPVS: sync thread started: state = BACKUP, mcast_ifn = syz_tun, syncid = 4,
+id = 0
+IPVS: stopping backup sync thread 25415 ...
+INFO: task syz-executor7:25421 blocked for more than 120 seconds.
+       Not tainted 4.16.0-rc6+ #284
+"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
+syz-executor7   D23688 25421   4408 0x00000004
+Call Trace:
+  context_switch kernel/sched/core.c:2862 [inline]
+  __schedule+0x8fb/0x1ec0 kernel/sched/core.c:3440
+  schedule+0xf5/0x430 kernel/sched/core.c:3499
+  schedule_timeout+0x1a3/0x230 kernel/time/timer.c:1777
+  do_wait_for_common kernel/sched/completion.c:86 [inline]
+  __wait_for_common kernel/sched/completion.c:107 [inline]
+  wait_for_common kernel/sched/completion.c:118 [inline]
+  wait_for_completion+0x415/0x770 kernel/sched/completion.c:139
+  kthread_stop+0x14a/0x7a0 kernel/kthread.c:530
+  stop_sync_thread+0x3d9/0x740 net/netfilter/ipvs/ip_vs_sync.c:1996
+  do_ip_vs_set_ctl+0x2b1/0x1cc0 net/netfilter/ipvs/ip_vs_ctl.c:2394
+  nf_sockopt net/netfilter/nf_sockopt.c:106 [inline]
+  nf_setsockopt+0x67/0xc0 net/netfilter/nf_sockopt.c:115
+  ip_setsockopt+0x97/0xa0 net/ipv4/ip_sockglue.c:1253
+  sctp_setsockopt+0x2ca/0x63e0 net/sctp/socket.c:4154
+  sock_common_setsockopt+0x95/0xd0 net/core/sock.c:3039
+  SYSC_setsockopt net/socket.c:1850 [inline]
+  SyS_setsockopt+0x189/0x360 net/socket.c:1829
+  do_syscall_64+0x281/0x940 arch/x86/entry/common.c:287
+  entry_SYSCALL_64_after_hwframe+0x42/0xb7
+RIP: 0033:0x454889
+RSP: 002b:00007fc927626c68 EFLAGS: 00000246 ORIG_RAX: 0000000000000036
+RAX: ffffffffffffffda RBX: 00007fc9276276d4 RCX: 0000000000454889
+RDX: 000000000000048c RSI: 0000000000000000 RDI: 0000000000000017
+RBP: 000000000072bf58 R08: 0000000000000018 R09: 0000000000000000
+R10: 0000000020000000 R11: 0000000000000246 R12: 00000000ffffffff
+R13: 000000000000051c R14: 00000000006f9b40 R15: 0000000000000001
+
+Showing all locks held in the system:
+2 locks held by khungtaskd/868:
+  #0:  (rcu_read_lock){....}, at: [<00000000a1a8f002>]
+check_hung_uninterruptible_tasks kernel/hung_task.c:175 [inline]
+  #0:  (rcu_read_lock){....}, at: [<00000000a1a8f002>] watchdog+0x1c5/0xd60
+kernel/hung_task.c:249
+  #1:  (tasklist_lock){.+.+}, at: [<0000000037c2f8f9>]
+debug_show_all_locks+0xd3/0x3d0 kernel/locking/lockdep.c:4470
+1 lock held by rsyslogd/4247:
+  #0:  (&f->f_pos_lock){+.+.}, at: [<000000000d8d6983>]
+__fdget_pos+0x12b/0x190 fs/file.c:765
+2 locks held by getty/4338:
+  #0:  (&tty->ldisc_sem){++++}, at: [<00000000bee98654>]
+ldsem_down_read+0x37/0x40 drivers/tty/tty_ldsem.c:365
+  #1:  (&ldata->atomic_read_lock){+.+.}, at: [<00000000c1d180aa>]
+n_tty_read+0x2ef/0x1a40 drivers/tty/n_tty.c:2131
+2 locks held by getty/4339:
+  #0:  (&tty->ldisc_sem){++++}, at: [<00000000bee98654>]
+ldsem_down_read+0x37/0x40 drivers/tty/tty_ldsem.c:365
+  #1:  (&ldata->atomic_read_lock){+.+.}, at: [<00000000c1d180aa>]
+n_tty_read+0x2ef/0x1a40 drivers/tty/n_tty.c:2131
+2 locks held by getty/4340:
+  #0:  (&tty->ldisc_sem){++++}, at: [<00000000bee98654>]
+ldsem_down_read+0x37/0x40 drivers/tty/tty_ldsem.c:365
+  #1:  (&ldata->atomic_read_lock){+.+.}, at: [<00000000c1d180aa>]
+n_tty_read+0x2ef/0x1a40 drivers/tty/n_tty.c:2131
+2 locks held by getty/4341:
+  #0:  (&tty->ldisc_sem){++++}, at: [<00000000bee98654>]
+ldsem_down_read+0x37/0x40 drivers/tty/tty_ldsem.c:365
+  #1:  (&ldata->atomic_read_lock){+.+.}, at: [<00000000c1d180aa>]
+n_tty_read+0x2ef/0x1a40 drivers/tty/n_tty.c:2131
+2 locks held by getty/4342:
+  #0:  (&tty->ldisc_sem){++++}, at: [<00000000bee98654>]
+ldsem_down_read+0x37/0x40 drivers/tty/tty_ldsem.c:365
+  #1:  (&ldata->atomic_read_lock){+.+.}, at: [<00000000c1d180aa>]
+n_tty_read+0x2ef/0x1a40 drivers/tty/n_tty.c:2131
+2 locks held by getty/4343:
+  #0:  (&tty->ldisc_sem){++++}, at: [<00000000bee98654>]
+ldsem_down_read+0x37/0x40 drivers/tty/tty_ldsem.c:365
+  #1:  (&ldata->atomic_read_lock){+.+.}, at: [<00000000c1d180aa>]
+n_tty_read+0x2ef/0x1a40 drivers/tty/n_tty.c:2131
+2 locks held by getty/4344:
+  #0:  (&tty->ldisc_sem){++++}, at: [<00000000bee98654>]
+ldsem_down_read+0x37/0x40 drivers/tty/tty_ldsem.c:365
+  #1:  (&ldata->atomic_read_lock){+.+.}, at: [<00000000c1d180aa>]
+n_tty_read+0x2ef/0x1a40 drivers/tty/n_tty.c:2131
+3 locks held by kworker/0:5/6494:
+  #0:  ((wq_completion)"%s"("ipv6_addrconf")){+.+.}, at:
+[<00000000a062b18e>] work_static include/linux/workqueue.h:198 [inline]
+  #0:  ((wq_completion)"%s"("ipv6_addrconf")){+.+.}, at:
+[<00000000a062b18e>] set_work_data kernel/workqueue.c:619 [inline]
+  #0:  ((wq_completion)"%s"("ipv6_addrconf")){+.+.}, at:
+[<00000000a062b18e>] set_work_pool_and_clear_pending kernel/workqueue.c:646
+[inline]
+  #0:  ((wq_completion)"%s"("ipv6_addrconf")){+.+.}, at:
+[<00000000a062b18e>] process_one_work+0xb12/0x1bb0 kernel/workqueue.c:2084
+  #1:  ((addr_chk_work).work){+.+.}, at: [<00000000278427d5>]
+process_one_work+0xb89/0x1bb0 kernel/workqueue.c:2088
+  #2:  (rtnl_mutex){+.+.}, at: [<00000000066e35ac>] rtnl_lock+0x17/0x20
+net/core/rtnetlink.c:74
+1 lock held by syz-executor7/25421:
+  #0:  (ipvs->sync_mutex){+.+.}, at: [<00000000d414a689>]
+do_ip_vs_set_ctl+0x277/0x1cc0 net/netfilter/ipvs/ip_vs_ctl.c:2393
+2 locks held by syz-executor7/25427:
+  #0:  (rtnl_mutex){+.+.}, at: [<00000000066e35ac>] rtnl_lock+0x17/0x20
+net/core/rtnetlink.c:74
+  #1:  (ipvs->sync_mutex){+.+.}, at: [<00000000e6d48489>]
+do_ip_vs_set_ctl+0x10f8/0x1cc0 net/netfilter/ipvs/ip_vs_ctl.c:2388
+1 lock held by syz-executor7/25435:
+  #0:  (rtnl_mutex){+.+.}, at: [<00000000066e35ac>] rtnl_lock+0x17/0x20
+net/core/rtnetlink.c:74
+1 lock held by ipvs-b:2:0/25415:
+  #0:  (rtnl_mutex){+.+.}, at: [<00000000066e35ac>] rtnl_lock+0x17/0x20
+net/core/rtnetlink.c:74
+
+Reported-and-tested-by: syzbot+a46d6abf9d56b1365a72@syzkaller.appspotmail.com
+Reported-and-tested-by: syzbot+5fe074c01b2032ce9618@syzkaller.appspotmail.com
+Fixes: e0b26cc997d5 ("ipvs: call rtnl_lock early")
+Signed-off-by: Julian Anastasov <ja@ssi.bg>
+Signed-off-by: Simon Horman <horms@verge.net.au>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Cc: Zubin Mithra <zsm@chromium.org>
+Cc: Guenter Roeck <groeck@chromium.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ net/netfilter/ipvs/ip_vs_ctl.c  |    8 --
+ net/netfilter/ipvs/ip_vs_sync.c |  155 ++++++++++++++++++++--------------------
+ 2 files changed, 80 insertions(+), 83 deletions(-)
+
+--- a/net/netfilter/ipvs/ip_vs_ctl.c
++++ b/net/netfilter/ipvs/ip_vs_ctl.c
+@@ -2393,11 +2393,7 @@ do_ip_vs_set_ctl(struct sock *sk, int cm
+                       strlcpy(cfg.mcast_ifn, dm->mcast_ifn,
+                               sizeof(cfg.mcast_ifn));
+                       cfg.syncid = dm->syncid;
+-                      rtnl_lock();
+-                      mutex_lock(&ipvs->sync_mutex);
+                       ret = start_sync_thread(ipvs, &cfg, dm->state);
+-                      mutex_unlock(&ipvs->sync_mutex);
+-                      rtnl_unlock();
+               } else {
+                       mutex_lock(&ipvs->sync_mutex);
+                       ret = stop_sync_thread(ipvs, dm->state);
+@@ -3495,12 +3491,8 @@ static int ip_vs_genl_new_daemon(struct
+       if (ipvs->mixed_address_family_dests > 0)
+               return -EINVAL;
+-      rtnl_lock();
+-      mutex_lock(&ipvs->sync_mutex);
+       ret = start_sync_thread(ipvs, &c,
+                               nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
+-      mutex_unlock(&ipvs->sync_mutex);
+-      rtnl_unlock();
+       return ret;
+ }
+--- a/net/netfilter/ipvs/ip_vs_sync.c
++++ b/net/netfilter/ipvs/ip_vs_sync.c
+@@ -48,6 +48,7 @@
+ #include <linux/kthread.h>
+ #include <linux/wait.h>
+ #include <linux/kernel.h>
++#include <linux/sched/signal.h>
+ #include <asm/unaligned.h>            /* Used for ntoh_seq and hton_seq */
+@@ -1359,15 +1360,9 @@ static void set_mcast_pmtudisc(struct so
+ /*
+  *      Specifiy default interface for outgoing multicasts
+  */
+-static int set_mcast_if(struct sock *sk, char *ifname)
++static int set_mcast_if(struct sock *sk, struct net_device *dev)
+ {
+-      struct net_device *dev;
+       struct inet_sock *inet = inet_sk(sk);
+-      struct net *net = sock_net(sk);
+-
+-      dev = __dev_get_by_name(net, ifname);
+-      if (!dev)
+-              return -ENODEV;
+       if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if)
+               return -EINVAL;
+@@ -1395,19 +1390,14 @@ static int set_mcast_if(struct sock *sk,
+  *      in the in_addr structure passed in as a parameter.
+  */
+ static int
+-join_mcast_group(struct sock *sk, struct in_addr *addr, char *ifname)
++join_mcast_group(struct sock *sk, struct in_addr *addr, struct net_device *dev)
+ {
+-      struct net *net = sock_net(sk);
+       struct ip_mreqn mreq;
+-      struct net_device *dev;
+       int ret;
+       memset(&mreq, 0, sizeof(mreq));
+       memcpy(&mreq.imr_multiaddr, addr, sizeof(struct in_addr));
+-      dev = __dev_get_by_name(net, ifname);
+-      if (!dev)
+-              return -ENODEV;
+       if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if)
+               return -EINVAL;
+@@ -1422,15 +1412,10 @@ join_mcast_group(struct sock *sk, struct
+ #ifdef CONFIG_IP_VS_IPV6
+ static int join_mcast_group6(struct sock *sk, struct in6_addr *addr,
+-                           char *ifname)
++                           struct net_device *dev)
+ {
+-      struct net *net = sock_net(sk);
+-      struct net_device *dev;
+       int ret;
+-      dev = __dev_get_by_name(net, ifname);
+-      if (!dev)
+-              return -ENODEV;
+       if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if)
+               return -EINVAL;
+@@ -1442,24 +1427,18 @@ static int join_mcast_group6(struct sock
+ }
+ #endif
+-static int bind_mcastif_addr(struct socket *sock, char *ifname)
++static int bind_mcastif_addr(struct socket *sock, struct net_device *dev)
+ {
+-      struct net *net = sock_net(sock->sk);
+-      struct net_device *dev;
+       __be32 addr;
+       struct sockaddr_in sin;
+-      dev = __dev_get_by_name(net, ifname);
+-      if (!dev)
+-              return -ENODEV;
+-
+       addr = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
+       if (!addr)
+               pr_err("You probably need to specify IP address on "
+                      "multicast interface.\n");
+       IP_VS_DBG(7, "binding socket with (%s) %pI4\n",
+-                ifname, &addr);
++                dev->name, &addr);
+       /* Now bind the socket with the address of multicast interface */
+       sin.sin_family       = AF_INET;
+@@ -1492,7 +1471,8 @@ static void get_mcast_sockaddr(union ipv
+ /*
+  *      Set up sending multicast socket over UDP
+  */
+-static struct socket *make_send_sock(struct netns_ipvs *ipvs, int id)
++static int make_send_sock(struct netns_ipvs *ipvs, int id,
++                        struct net_device *dev, struct socket **sock_ret)
+ {
+       /* multicast addr */
+       union ipvs_sockaddr mcast_addr;
+@@ -1504,9 +1484,10 @@ static struct socket *make_send_sock(str
+                                 IPPROTO_UDP, &sock);
+       if (result < 0) {
+               pr_err("Error during creation of socket; terminating\n");
+-              return ERR_PTR(result);
++              goto error;
+       }
+-      result = set_mcast_if(sock->sk, ipvs->mcfg.mcast_ifn);
++      *sock_ret = sock;
++      result = set_mcast_if(sock->sk, dev);
+       if (result < 0) {
+               pr_err("Error setting outbound mcast interface\n");
+               goto error;
+@@ -1521,7 +1502,7 @@ static struct socket *make_send_sock(str
+               set_sock_size(sock->sk, 1, result);
+       if (AF_INET == ipvs->mcfg.mcast_af)
+-              result = bind_mcastif_addr(sock, ipvs->mcfg.mcast_ifn);
++              result = bind_mcastif_addr(sock, dev);
+       else
+               result = 0;
+       if (result < 0) {
+@@ -1537,19 +1518,18 @@ static struct socket *make_send_sock(str
+               goto error;
+       }
+-      return sock;
++      return 0;
+ error:
+-      sock_release(sock);
+-      return ERR_PTR(result);
++      return result;
+ }
+ /*
+  *      Set up receiving multicast socket over UDP
+  */
+-static struct socket *make_receive_sock(struct netns_ipvs *ipvs, int id,
+-                                      int ifindex)
++static int make_receive_sock(struct netns_ipvs *ipvs, int id,
++                           struct net_device *dev, struct socket **sock_ret)
+ {
+       /* multicast addr */
+       union ipvs_sockaddr mcast_addr;
+@@ -1561,8 +1541,9 @@ static struct socket *make_receive_sock(
+                                 IPPROTO_UDP, &sock);
+       if (result < 0) {
+               pr_err("Error during creation of socket; terminating\n");
+-              return ERR_PTR(result);
++              goto error;
+       }
++      *sock_ret = sock;
+       /* it is equivalent to the REUSEADDR option in user-space */
+       sock->sk->sk_reuse = SK_CAN_REUSE;
+       result = sysctl_sync_sock_size(ipvs);
+@@ -1570,7 +1551,7 @@ static struct socket *make_receive_sock(
+               set_sock_size(sock->sk, 0, result);
+       get_mcast_sockaddr(&mcast_addr, &salen, &ipvs->bcfg, id);
+-      sock->sk->sk_bound_dev_if = ifindex;
++      sock->sk->sk_bound_dev_if = dev->ifindex;
+       result = sock->ops->bind(sock, (struct sockaddr *)&mcast_addr, salen);
+       if (result < 0) {
+               pr_err("Error binding to the multicast addr\n");
+@@ -1581,21 +1562,20 @@ static struct socket *make_receive_sock(
+ #ifdef CONFIG_IP_VS_IPV6
+       if (ipvs->bcfg.mcast_af == AF_INET6)
+               result = join_mcast_group6(sock->sk, &mcast_addr.in6.sin6_addr,
+-                                         ipvs->bcfg.mcast_ifn);
++                                         dev);
+       else
+ #endif
+               result = join_mcast_group(sock->sk, &mcast_addr.in.sin_addr,
+-                                        ipvs->bcfg.mcast_ifn);
++                                        dev);
+       if (result < 0) {
+               pr_err("Error joining to the multicast group\n");
+               goto error;
+       }
+-      return sock;
++      return 0;
+ error:
+-      sock_release(sock);
+-      return ERR_PTR(result);
++      return result;
+ }
+@@ -1780,13 +1760,12 @@ static int sync_thread_backup(void *data
+ int start_sync_thread(struct netns_ipvs *ipvs, struct ipvs_sync_daemon_cfg *c,
+                     int state)
+ {
+-      struct ip_vs_sync_thread_data *tinfo;
++      struct ip_vs_sync_thread_data *tinfo = NULL;
+       struct task_struct **array = NULL, *task;
+-      struct socket *sock;
+       struct net_device *dev;
+       char *name;
+       int (*threadfn)(void *data);
+-      int id, count, hlen;
++      int id = 0, count, hlen;
+       int result = -ENOMEM;
+       u16 mtu, min_mtu;
+@@ -1794,6 +1773,18 @@ int start_sync_thread(struct netns_ipvs
+       IP_VS_DBG(7, "Each ip_vs_sync_conn entry needs %Zd bytes\n",
+                 sizeof(struct ip_vs_sync_conn_v0));
++      /* Do not hold one mutex and then to block on another */
++      for (;;) {
++              rtnl_lock();
++              if (mutex_trylock(&ipvs->sync_mutex))
++                      break;
++              rtnl_unlock();
++              mutex_lock(&ipvs->sync_mutex);
++              if (rtnl_trylock())
++                      break;
++              mutex_unlock(&ipvs->sync_mutex);
++      }
++
+       if (!ipvs->sync_state) {
+               count = clamp(sysctl_sync_ports(ipvs), 1, IPVS_SYNC_PORTS_MAX);
+               ipvs->threads_mask = count - 1;
+@@ -1812,7 +1803,8 @@ int start_sync_thread(struct netns_ipvs
+       dev = __dev_get_by_name(ipvs->net, c->mcast_ifn);
+       if (!dev) {
+               pr_err("Unknown mcast interface: %s\n", c->mcast_ifn);
+-              return -ENODEV;
++              result = -ENODEV;
++              goto out_early;
+       }
+       hlen = (AF_INET6 == c->mcast_af) ?
+              sizeof(struct ipv6hdr) + sizeof(struct udphdr) :
+@@ -1829,26 +1821,30 @@ int start_sync_thread(struct netns_ipvs
+               c->sync_maxlen = mtu - hlen;
+       if (state == IP_VS_STATE_MASTER) {
++              result = -EEXIST;
+               if (ipvs->ms)
+-                      return -EEXIST;
++                      goto out_early;
+               ipvs->mcfg = *c;
+               name = "ipvs-m:%d:%d";
+               threadfn = sync_thread_master;
+       } else if (state == IP_VS_STATE_BACKUP) {
++              result = -EEXIST;
+               if (ipvs->backup_threads)
+-                      return -EEXIST;
++                      goto out_early;
+               ipvs->bcfg = *c;
+               name = "ipvs-b:%d:%d";
+               threadfn = sync_thread_backup;
+       } else {
+-              return -EINVAL;
++              result = -EINVAL;
++              goto out_early;
+       }
+       if (state == IP_VS_STATE_MASTER) {
+               struct ipvs_master_sync_state *ms;
++              result = -ENOMEM;
+               ipvs->ms = kzalloc(count * sizeof(ipvs->ms[0]), GFP_KERNEL);
+               if (!ipvs->ms)
+                       goto out;
+@@ -1864,39 +1860,38 @@ int start_sync_thread(struct netns_ipvs
+       } else {
+               array = kzalloc(count * sizeof(struct task_struct *),
+                               GFP_KERNEL);
++              result = -ENOMEM;
+               if (!array)
+                       goto out;
+       }
+-      tinfo = NULL;
+       for (id = 0; id < count; id++) {
+-              if (state == IP_VS_STATE_MASTER)
+-                      sock = make_send_sock(ipvs, id);
+-              else
+-                      sock = make_receive_sock(ipvs, id, dev->ifindex);
+-              if (IS_ERR(sock)) {
+-                      result = PTR_ERR(sock);
+-                      goto outtinfo;
+-              }
++              result = -ENOMEM;
+               tinfo = kmalloc(sizeof(*tinfo), GFP_KERNEL);
+               if (!tinfo)
+-                      goto outsocket;
++                      goto out;
+               tinfo->ipvs = ipvs;
+-              tinfo->sock = sock;
++              tinfo->sock = NULL;
+               if (state == IP_VS_STATE_BACKUP) {
+                       tinfo->buf = kmalloc(ipvs->bcfg.sync_maxlen,
+                                            GFP_KERNEL);
+                       if (!tinfo->buf)
+-                              goto outtinfo;
++                              goto out;
+               } else {
+                       tinfo->buf = NULL;
+               }
+               tinfo->id = id;
++              if (state == IP_VS_STATE_MASTER)
++                      result = make_send_sock(ipvs, id, dev, &tinfo->sock);
++              else
++                      result = make_receive_sock(ipvs, id, dev, &tinfo->sock);
++              if (result < 0)
++                      goto out;
+               task = kthread_run(threadfn, tinfo, name, ipvs->gen, id);
+               if (IS_ERR(task)) {
+                       result = PTR_ERR(task);
+-                      goto outtinfo;
++                      goto out;
+               }
+               tinfo = NULL;
+               if (state == IP_VS_STATE_MASTER)
+@@ -1913,20 +1908,20 @@ int start_sync_thread(struct netns_ipvs
+       ipvs->sync_state |= state;
+       spin_unlock_bh(&ipvs->sync_buff_lock);
++      mutex_unlock(&ipvs->sync_mutex);
++      rtnl_unlock();
++
+       /* increase the module use count */
+       ip_vs_use_count_inc();
+       return 0;
+-outsocket:
+-      sock_release(sock);
+-
+-outtinfo:
+-      if (tinfo) {
+-              sock_release(tinfo->sock);
+-              kfree(tinfo->buf);
+-              kfree(tinfo);
+-      }
++out:
++      /* We do not need RTNL lock anymore, release it here so that
++       * sock_release below and in the kthreads can use rtnl_lock
++       * to leave the mcast group.
++       */
++      rtnl_unlock();
+       count = id;
+       while (count-- > 0) {
+               if (state == IP_VS_STATE_MASTER)
+@@ -1934,13 +1929,23 @@ outtinfo:
+               else
+                       kthread_stop(array[count]);
+       }
+-      kfree(array);
+-
+-out:
+       if (!(ipvs->sync_state & IP_VS_STATE_MASTER)) {
+               kfree(ipvs->ms);
+               ipvs->ms = NULL;
+       }
++      mutex_unlock(&ipvs->sync_mutex);
++      if (tinfo) {
++              if (tinfo->sock)
++                      sock_release(tinfo->sock);
++              kfree(tinfo->buf);
++              kfree(tinfo);
++      }
++      kfree(array);
++      return result;
++
++out_early:
++      mutex_unlock(&ipvs->sync_mutex);
++      rtnl_unlock();
+       return result;
+ }