]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
4.9-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 23 Feb 2017 20:17:22 +0000 (21:17 +0100)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 23 Feb 2017 20:17:22 +0000 (21:17 +0100)
added patches:
dccp-fix-freeing-skb-too-early-for-ipv6_recvpktinfo.patch
ip-fix-ip_checksum-handling.patch
irda-fix-lockdep-annotations-in-hashbin_delete.patch
kcm-fix-0-length-case-for-kcm_sendmsg.patch
kcm-fix-a-null-pointer-dereference-in-kcm_sendmsg.patch
net-ethernet-ti-cpsw-fix-cpsw-assignment-in-resume.patch
net-llc-avoid-bug_on-in-skb_orphan.patch
net-mlx5e-disable-preemption-when-doing-tc-statistics-upcall.patch
net-neigh-fix-netevent-netevent_delay_probe_time_update-notification.patch
net-socket-fix-recvmmsg-not-returning-error-from-sock_error.patch
packet-do-not-call-fanout_release-from-atomic-contexts.patch
packet-fix-races-in-fanout_add.patch
ptr_ring-fix-race-conditions-when-resizing.patch
vxlan-fix-oops-in-dev_fill_metadata_dst.patch

15 files changed:
queue-4.9/dccp-fix-freeing-skb-too-early-for-ipv6_recvpktinfo.patch [new file with mode: 0644]
queue-4.9/ip-fix-ip_checksum-handling.patch [new file with mode: 0644]
queue-4.9/irda-fix-lockdep-annotations-in-hashbin_delete.patch [new file with mode: 0644]
queue-4.9/kcm-fix-0-length-case-for-kcm_sendmsg.patch [new file with mode: 0644]
queue-4.9/kcm-fix-a-null-pointer-dereference-in-kcm_sendmsg.patch [new file with mode: 0644]
queue-4.9/net-ethernet-ti-cpsw-fix-cpsw-assignment-in-resume.patch [new file with mode: 0644]
queue-4.9/net-llc-avoid-bug_on-in-skb_orphan.patch [new file with mode: 0644]
queue-4.9/net-mlx5e-disable-preemption-when-doing-tc-statistics-upcall.patch [new file with mode: 0644]
queue-4.9/net-neigh-fix-netevent-netevent_delay_probe_time_update-notification.patch [new file with mode: 0644]
queue-4.9/net-socket-fix-recvmmsg-not-returning-error-from-sock_error.patch [new file with mode: 0644]
queue-4.9/packet-do-not-call-fanout_release-from-atomic-contexts.patch [new file with mode: 0644]
queue-4.9/packet-fix-races-in-fanout_add.patch [new file with mode: 0644]
queue-4.9/ptr_ring-fix-race-conditions-when-resizing.patch [new file with mode: 0644]
queue-4.9/series
queue-4.9/vxlan-fix-oops-in-dev_fill_metadata_dst.patch [new file with mode: 0644]

diff --git a/queue-4.9/dccp-fix-freeing-skb-too-early-for-ipv6_recvpktinfo.patch b/queue-4.9/dccp-fix-freeing-skb-too-early-for-ipv6_recvpktinfo.patch
new file mode 100644 (file)
index 0000000..1845f4d
--- /dev/null
@@ -0,0 +1,48 @@
+From foo@baz Thu Feb 23 21:13:05 CET 2017
+From: Andrey Konovalov <andreyknvl@google.com>
+Date: Thu, 16 Feb 2017 17:22:46 +0100
+Subject: dccp: fix freeing skb too early for IPV6_RECVPKTINFO
+
+From: Andrey Konovalov <andreyknvl@google.com>
+
+
+[ Upstream commit 5edabca9d4cff7f1f2b68f0bac55ef99d9798ba4 ]
+
+In the current DCCP implementation an skb for a DCCP_PKT_REQUEST packet
+is forcibly freed via __kfree_skb in dccp_rcv_state_process if
+dccp_v6_conn_request successfully returns.
+
+However, if IPV6_RECVPKTINFO is set on a socket, the address of the skb
+is saved to ireq->pktopts and the ref count for skb is incremented in
+dccp_v6_conn_request, so skb is still in use. Nevertheless, it gets freed
+in dccp_rcv_state_process.
+
+Fix by calling consume_skb instead of doing goto discard and therefore
+calling __kfree_skb.
+
+Similar fixes for TCP:
+
+fb7e2399ec17f1004c0e0ccfd17439f8759ede01 [TCP]: skb is unexpectedly freed.
+0aea76d35c9651d55bbaf746e7914e5f9ae5a25d tcp: SYN packets are now
+simply consumed
+
+Signed-off-by: Andrey Konovalov <andreyknvl@google.com>
+Acked-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/dccp/input.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/net/dccp/input.c
++++ b/net/dccp/input.c
+@@ -606,7 +606,8 @@ int dccp_rcv_state_process(struct sock *
+                       if (inet_csk(sk)->icsk_af_ops->conn_request(sk,
+                                                                   skb) < 0)
+                               return 1;
+-                      goto discard;
++                      consume_skb(skb);
++                      return 0;
+               }
+               if (dh->dccph_type == DCCP_PKT_RESET)
+                       goto discard;
diff --git a/queue-4.9/ip-fix-ip_checksum-handling.patch b/queue-4.9/ip-fix-ip_checksum-handling.patch
new file mode 100644 (file)
index 0000000..d9b6353
--- /dev/null
@@ -0,0 +1,49 @@
+From foo@baz Thu Feb 23 21:13:05 CET 2017
+From: Paolo Abeni <pabeni@redhat.com>
+Date: Tue, 21 Feb 2017 09:33:18 +0100
+Subject: ip: fix IP_CHECKSUM handling
+
+From: Paolo Abeni <pabeni@redhat.com>
+
+
+[ Upstream commit ca4ef4574f1ee5252e2cd365f8f5d5bafd048f32 ]
+
+The skbs processed by ip_cmsg_recv() are not guaranteed to
+be linear e.g. when sending UDP packets over loopback with
+MSGMORE.
+Using csum_partial() on [potentially] the whole skb len
+is dangerous; instead be on the safe side and use skb_checksum().
+
+Thanks to syzkaller team to detect the issue and provide the
+reproducer.
+
+v1 -> v2:
+ - move the variable declaration in a tighter scope
+
+Fixes: ad6f939ab193 ("ip: Add offset parameter to ip_cmsg_recv")
+Reported-by: Andrey Konovalov <andreyknvl@google.com>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Acked-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/ip_sockglue.c |    8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+--- a/net/ipv4/ip_sockglue.c
++++ b/net/ipv4/ip_sockglue.c
+@@ -105,10 +105,10 @@ static void ip_cmsg_recv_checksum(struct
+       if (skb->ip_summed != CHECKSUM_COMPLETE)
+               return;
+-      if (offset != 0)
+-              csum = csum_sub(csum,
+-                              csum_partial(skb_transport_header(skb) + tlen,
+-                                           offset, 0));
++      if (offset != 0) {
++              int tend_off = skb_transport_offset(skb) + tlen;
++              csum = csum_sub(csum, skb_checksum(skb, tend_off, offset, 0));
++      }
+       put_cmsg(msg, SOL_IP, IP_CHECKSUM, sizeof(__wsum), &csum);
+ }
diff --git a/queue-4.9/irda-fix-lockdep-annotations-in-hashbin_delete.patch b/queue-4.9/irda-fix-lockdep-annotations-in-hashbin_delete.patch
new file mode 100644 (file)
index 0000000..f494444
--- /dev/null
@@ -0,0 +1,88 @@
+From foo@baz Thu Feb 23 21:13:05 CET 2017
+From: "David S. Miller" <davem@davemloft.net>
+Date: Fri, 17 Feb 2017 16:19:39 -0500
+Subject: irda: Fix lockdep annotations in hashbin_delete().
+
+From: "David S. Miller" <davem@davemloft.net>
+
+
+[ Upstream commit 4c03b862b12f980456f9de92db6d508a4999b788 ]
+
+A nested lock depth was added to the hasbin_delete() code but it
+doesn't actually work some well and results in tons of lockdep splats.
+
+Fix the code instead to properly drop the lock around the operation
+and just keep peeking the head of the hashbin queue.
+
+Reported-by: Dmitry Vyukov <dvyukov@google.com>
+Tested-by: Dmitry Vyukov <dvyukov@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/irda/irqueue.c |   34 ++++++++++++++++------------------
+ 1 file changed, 16 insertions(+), 18 deletions(-)
+
+--- a/net/irda/irqueue.c
++++ b/net/irda/irqueue.c
+@@ -383,9 +383,6 @@ EXPORT_SYMBOL(hashbin_new);
+  *    for deallocating this structure if it's complex. If not the user can
+  *    just supply kfree, which should take care of the job.
+  */
+-#ifdef CONFIG_LOCKDEP
+-static int hashbin_lock_depth = 0;
+-#endif
+ int hashbin_delete( hashbin_t* hashbin, FREE_FUNC free_func)
+ {
+       irda_queue_t* queue;
+@@ -396,22 +393,27 @@ int hashbin_delete( hashbin_t* hashbin,
+       IRDA_ASSERT(hashbin->magic == HB_MAGIC, return -1;);
+       /* Synchronize */
+-      if ( hashbin->hb_type & HB_LOCK ) {
+-              spin_lock_irqsave_nested(&hashbin->hb_spinlock, flags,
+-                                       hashbin_lock_depth++);
+-      }
++      if (hashbin->hb_type & HB_LOCK)
++              spin_lock_irqsave(&hashbin->hb_spinlock, flags);
+       /*
+        *  Free the entries in the hashbin, TODO: use hashbin_clear when
+        *  it has been shown to work
+        */
+       for (i = 0; i < HASHBIN_SIZE; i ++ ) {
+-              queue = dequeue_first((irda_queue_t**) &hashbin->hb_queue[i]);
+-              while (queue ) {
+-                      if (free_func)
+-                              (*free_func)(queue);
+-                      queue = dequeue_first(
+-                              (irda_queue_t**) &hashbin->hb_queue[i]);
++              while (1) {
++                      queue = dequeue_first((irda_queue_t**) &hashbin->hb_queue[i]);
++
++                      if (!queue)
++                              break;
++
++                      if (free_func) {
++                              if (hashbin->hb_type & HB_LOCK)
++                                      spin_unlock_irqrestore(&hashbin->hb_spinlock, flags);
++                              free_func(queue);
++                              if (hashbin->hb_type & HB_LOCK)
++                                      spin_lock_irqsave(&hashbin->hb_spinlock, flags);
++                      }
+               }
+       }
+@@ -420,12 +422,8 @@ int hashbin_delete( hashbin_t* hashbin,
+       hashbin->magic = ~HB_MAGIC;
+       /* Release lock */
+-      if ( hashbin->hb_type & HB_LOCK) {
++      if (hashbin->hb_type & HB_LOCK)
+               spin_unlock_irqrestore(&hashbin->hb_spinlock, flags);
+-#ifdef CONFIG_LOCKDEP
+-              hashbin_lock_depth--;
+-#endif
+-      }
+       /*
+        *  Free the hashbin structure
diff --git a/queue-4.9/kcm-fix-0-length-case-for-kcm_sendmsg.patch b/queue-4.9/kcm-fix-0-length-case-for-kcm_sendmsg.patch
new file mode 100644 (file)
index 0000000..1ec7b66
--- /dev/null
@@ -0,0 +1,107 @@
+From foo@baz Thu Feb 23 21:13:05 CET 2017
+From: WANG Cong <xiyou.wangcong@gmail.com>
+Date: Tue, 7 Feb 2017 12:59:47 -0800
+Subject: kcm: fix 0-length case for kcm_sendmsg()
+
+From: WANG Cong <xiyou.wangcong@gmail.com>
+
+
+[ Upstream commit 98e3862ca2b1ae595a13805dcab4c3a6d7718f4d ]
+
+Dmitry reported a kernel warning:
+
+ WARNING: CPU: 3 PID: 2936 at net/kcm/kcmsock.c:627
+ kcm_write_msgs+0x12e3/0x1b90 net/kcm/kcmsock.c:627
+ CPU: 3 PID: 2936 Comm: a.out Not tainted 4.10.0-rc6+ #209
+ Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
+ Call Trace:
+  __dump_stack lib/dump_stack.c:15 [inline]
+  dump_stack+0x2ee/0x3ef lib/dump_stack.c:51
+  panic+0x1fb/0x412 kernel/panic.c:179
+  __warn+0x1c4/0x1e0 kernel/panic.c:539
+  warn_slowpath_null+0x2c/0x40 kernel/panic.c:582
+  kcm_write_msgs+0x12e3/0x1b90 net/kcm/kcmsock.c:627
+  kcm_sendmsg+0x163a/0x2200 net/kcm/kcmsock.c:1029
+  sock_sendmsg_nosec net/socket.c:635 [inline]
+  sock_sendmsg+0xca/0x110 net/socket.c:645
+  sock_write_iter+0x326/0x600 net/socket.c:848
+  new_sync_write fs/read_write.c:499 [inline]
+  __vfs_write+0x483/0x740 fs/read_write.c:512
+  vfs_write+0x187/0x530 fs/read_write.c:560
+  SYSC_write fs/read_write.c:607 [inline]
+  SyS_write+0xfb/0x230 fs/read_write.c:599
+  entry_SYSCALL_64_fastpath+0x1f/0xc2
+
+when calling syscall(__NR_write, sock2, 0x208aaf27ul, 0x0ul) on a KCM
+seqpacket socket. It appears that kcm_sendmsg() does not handle len==0
+case correctly, which causes an empty skb is allocated and queued.
+Fix this by skipping the skb allocation for len==0 case.
+
+Reported-by: Dmitry Vyukov <dvyukov@google.com>
+Cc: Tom Herbert <tom@herbertland.com>
+Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/kcm/kcmsock.c |   40 ++++++++++++++++++++++------------------
+ 1 file changed, 22 insertions(+), 18 deletions(-)
+
+--- a/net/kcm/kcmsock.c
++++ b/net/kcm/kcmsock.c
+@@ -929,23 +929,25 @@ static int kcm_sendmsg(struct socket *so
+                       goto out_error;
+       }
+-      /* New message, alloc head skb */
+-      head = alloc_skb(0, sk->sk_allocation);
+-      while (!head) {
+-              kcm_push(kcm);
+-              err = sk_stream_wait_memory(sk, &timeo);
+-              if (err)
+-                      goto out_error;
+-
++      if (msg_data_left(msg)) {
++              /* New message, alloc head skb */
+               head = alloc_skb(0, sk->sk_allocation);
+-      }
++              while (!head) {
++                      kcm_push(kcm);
++                      err = sk_stream_wait_memory(sk, &timeo);
++                      if (err)
++                              goto out_error;
+-      skb = head;
++                      head = alloc_skb(0, sk->sk_allocation);
++              }
+-      /* Set ip_summed to CHECKSUM_UNNECESSARY to avoid calling
+-       * csum_and_copy_from_iter from skb_do_copy_data_nocache.
+-       */
+-      skb->ip_summed = CHECKSUM_UNNECESSARY;
++              skb = head;
++
++              /* Set ip_summed to CHECKSUM_UNNECESSARY to avoid calling
++               * csum_and_copy_from_iter from skb_do_copy_data_nocache.
++               */
++              skb->ip_summed = CHECKSUM_UNNECESSARY;
++      }
+ start:
+       while (msg_data_left(msg)) {
+@@ -1018,10 +1020,12 @@ wait_for_memory:
+       if (eor) {
+               bool not_busy = skb_queue_empty(&sk->sk_write_queue);
+-              /* Message complete, queue it on send buffer */
+-              __skb_queue_tail(&sk->sk_write_queue, head);
+-              kcm->seq_skb = NULL;
+-              KCM_STATS_INCR(kcm->stats.tx_msgs);
++              if (head) {
++                      /* Message complete, queue it on send buffer */
++                      __skb_queue_tail(&sk->sk_write_queue, head);
++                      kcm->seq_skb = NULL;
++                      KCM_STATS_INCR(kcm->stats.tx_msgs);
++              }
+               if (msg->msg_flags & MSG_BATCH) {
+                       kcm->tx_wait_more = true;
diff --git a/queue-4.9/kcm-fix-a-null-pointer-dereference-in-kcm_sendmsg.patch b/queue-4.9/kcm-fix-a-null-pointer-dereference-in-kcm_sendmsg.patch
new file mode 100644 (file)
index 0000000..6429e2c
--- /dev/null
@@ -0,0 +1,40 @@
+From foo@baz Thu Feb 23 21:13:05 CET 2017
+From: WANG Cong <xiyou.wangcong@gmail.com>
+Date: Mon, 13 Feb 2017 11:13:16 -0800
+Subject: kcm: fix a null pointer dereference in kcm_sendmsg()
+
+From: WANG Cong <xiyou.wangcong@gmail.com>
+
+
+[ Upstream commit cd27b96bc13841ee7af25837a6ae86fee87273d6 ]
+
+In commit 98e3862ca2b1 ("kcm: fix 0-length case for kcm_sendmsg()")
+I tried to avoid skb allocation for 0-length case, but missed
+a check for NULL pointer in the non EOR case.
+
+Fixes: 98e3862ca2b1 ("kcm: fix 0-length case for kcm_sendmsg()")
+Reported-by: Dmitry Vyukov <dvyukov@google.com>
+Cc: Tom Herbert <tom@herbertland.com>
+Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
+Acked-by: Tom Herbert <tom@herbertland.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/kcm/kcmsock.c |    6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+--- a/net/kcm/kcmsock.c
++++ b/net/kcm/kcmsock.c
+@@ -1044,8 +1044,10 @@ wait_for_memory:
+       } else {
+               /* Message not complete, save state */
+ partial_message:
+-              kcm->seq_skb = head;
+-              kcm_tx_msg(head)->last_skb = skb;
++              if (head) {
++                      kcm->seq_skb = head;
++                      kcm_tx_msg(head)->last_skb = skb;
++              }
+       }
+       KCM_STATS_ADD(kcm->stats.tx_bytes, copied);
diff --git a/queue-4.9/net-ethernet-ti-cpsw-fix-cpsw-assignment-in-resume.patch b/queue-4.9/net-ethernet-ti-cpsw-fix-cpsw-assignment-in-resume.patch
new file mode 100644 (file)
index 0000000..60a8225
--- /dev/null
@@ -0,0 +1,36 @@
+From foo@baz Thu Feb 23 21:13:05 CET 2017
+From: Ivan Khoronzhuk <ivan.khoronzhuk@linaro.org>
+Date: Tue, 14 Feb 2017 14:42:15 +0200
+Subject: net: ethernet: ti: cpsw: fix cpsw assignment in resume
+
+From: Ivan Khoronzhuk <ivan.khoronzhuk@linaro.org>
+
+
+[ Upstream commit a60ced990e309666915d21445e95347d12406694 ]
+
+There is a copy-paste error, which hides breaking of resume
+for CPSW driver: there was replaced netdev_priv() to ndev_to_cpsw(ndev)
+in suspend, but left it unchanged in resume.
+
+Fixes: 606f39939595a4d4540406bfc11f265b2036af6d
+(ti: cpsw: move platform data and slaves info to cpsw_common)
+
+Reported-by: Alexey Starikovskiy <AStarikovskiy@topcon.com>
+Signed-off-by: Ivan Khoronzhuk <ivan.khoronzhuk@linaro.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/ti/cpsw.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/ti/cpsw.c
++++ b/drivers/net/ethernet/ti/cpsw.c
+@@ -2925,7 +2925,7 @@ static int cpsw_resume(struct device *de
+ {
+       struct platform_device  *pdev = to_platform_device(dev);
+       struct net_device       *ndev = platform_get_drvdata(pdev);
+-      struct cpsw_common      *cpsw = netdev_priv(ndev);
++      struct cpsw_common      *cpsw = ndev_to_cpsw(ndev);
+       /* Select default pin state */
+       pinctrl_pm_select_default_state(dev);
diff --git a/queue-4.9/net-llc-avoid-bug_on-in-skb_orphan.patch b/queue-4.9/net-llc-avoid-bug_on-in-skb_orphan.patch
new file mode 100644 (file)
index 0000000..3e110c9
--- /dev/null
@@ -0,0 +1,57 @@
+From foo@baz Thu Feb 23 21:13:05 CET 2017
+From: Eric Dumazet <edumazet@google.com>
+Date: Sun, 12 Feb 2017 14:03:52 -0800
+Subject: net/llc: avoid BUG_ON() in skb_orphan()
+
+From: Eric Dumazet <edumazet@google.com>
+
+
+[ Upstream commit 8b74d439e1697110c5e5c600643e823eb1dd0762 ]
+
+It seems nobody used LLC since linux-3.12.
+
+Fortunately fuzzers like syzkaller still know how to run this code,
+otherwise it would be no fun.
+
+Setting skb->sk without skb->destructor leads to all kinds of
+bugs, we now prefer to be very strict about it.
+
+Ideally here we would use skb_set_owner() but this helper does not exist yet,
+only CAN seems to have a private helper for that.
+
+Fixes: 376c7311bdb6 ("net: add a temporary sanity check in skb_orphan()")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: Andrey Konovalov <andreyknvl@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/llc/llc_conn.c |    3 +++
+ net/llc/llc_sap.c  |    3 +++
+ 2 files changed, 6 insertions(+)
+
+--- a/net/llc/llc_conn.c
++++ b/net/llc/llc_conn.c
+@@ -821,7 +821,10 @@ void llc_conn_handler(struct llc_sap *sa
+                * another trick required to cope with how the PROCOM state
+                * machine works. -acme
+                */
++              skb_orphan(skb);
++              sock_hold(sk);
+               skb->sk = sk;
++              skb->destructor = sock_efree;
+       }
+       if (!sock_owned_by_user(sk))
+               llc_conn_rcv(sk, skb);
+--- a/net/llc/llc_sap.c
++++ b/net/llc/llc_sap.c
+@@ -290,7 +290,10 @@ static void llc_sap_rcv(struct llc_sap *
+       ev->type   = LLC_SAP_EV_TYPE_PDU;
+       ev->reason = 0;
++      skb_orphan(skb);
++      sock_hold(sk);
+       skb->sk = sk;
++      skb->destructor = sock_efree;
+       llc_sap_state_process(sap, skb);
+ }
diff --git a/queue-4.9/net-mlx5e-disable-preemption-when-doing-tc-statistics-upcall.patch b/queue-4.9/net-mlx5e-disable-preemption-when-doing-tc-statistics-upcall.patch
new file mode 100644 (file)
index 0000000..cfe2cd7
--- /dev/null
@@ -0,0 +1,48 @@
+From foo@baz Thu Feb 23 21:13:05 CET 2017
+From: Or Gerlitz <ogerlitz@mellanox.com>
+Date: Sun, 12 Feb 2017 11:21:31 +0200
+Subject: net/mlx5e: Disable preemption when doing TC statistics upcall
+
+From: Or Gerlitz <ogerlitz@mellanox.com>
+
+
+[ Upstream commit fed06ee89b78d3af32e235e0e89ad0d946fcb95d ]
+
+When called by HW offloading drivers, the TC action (e.g
+net/sched/act_mirred.c) code uses this_cpu logic, e.g
+
+ _bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), bytes, packets)
+
+per the kernel documention, preemption should be disabled, add that.
+
+Before the fix, when running with CONFIG_PREEMPT set, we get a
+
+BUG: using smp_processor_id() in preemptible [00000000] code: tc/3793
+
+asserion from the TC action (mirred) stats_update callback.
+
+Fixes: aad7e08d39bd ('net/mlx5e: Hardware offloaded flower filter statistics support')
+Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en_tc.c |    4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+@@ -567,10 +567,14 @@ int mlx5e_stats_flower(struct mlx5e_priv
+       mlx5_fc_query_cached(counter, &bytes, &packets, &lastuse);
++      preempt_disable();
++
+       tcf_exts_to_list(f->exts, &actions);
+       list_for_each_entry(a, &actions, list)
+               tcf_action_stats_update(a, bytes, packets, lastuse);
++      preempt_enable();
++
+       return 0;
+ }
diff --git a/queue-4.9/net-neigh-fix-netevent-netevent_delay_probe_time_update-notification.patch b/queue-4.9/net-neigh-fix-netevent-netevent_delay_probe_time_update-notification.patch
new file mode 100644 (file)
index 0000000..359a5ac
--- /dev/null
@@ -0,0 +1,56 @@
+From foo@baz Thu Feb 23 21:13:05 CET 2017
+From: Marcus Huewe <suse-tux@gmx.de>
+Date: Wed, 15 Feb 2017 01:00:36 +0100
+Subject: net: neigh: Fix netevent NETEVENT_DELAY_PROBE_TIME_UPDATE notification
+
+From: Marcus Huewe <suse-tux@gmx.de>
+
+
+[ Upstream commit 7627ae6030f56a9a91a5b3867b21f35d79c16e64 ]
+
+When setting a neigh related sysctl parameter, we always send a
+NETEVENT_DELAY_PROBE_TIME_UPDATE netevent. For instance, when
+executing
+
+       sysctl net.ipv6.neigh.wlp3s0.retrans_time_ms=2000
+
+a NETEVENT_DELAY_PROBE_TIME_UPDATE netevent is generated.
+
+This is caused by commit 2a4501ae18b5 ("neigh: Send a
+notification when DELAY_PROBE_TIME changes"). According to the
+commit's description, it was intended to generate such an event
+when setting the "delay_first_probe_time" sysctl parameter.
+
+In order to fix this, only generate this event when actually
+setting the "delay_first_probe_time" sysctl parameter. This fix
+should not have any unintended side-effects, because all but one
+registered netevent callbacks check for other netevent event
+types (the registered callbacks were obtained by grepping for
+"register_netevent_notifier"). The only callback that uses the
+NETEVENT_DELAY_PROBE_TIME_UPDATE event is
+mlxsw_sp_router_netevent_event() (in
+drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c): in case
+of this event, it only accesses the DELAY_PROBE_TIME of the
+passed neigh_parms.
+
+Fixes: 2a4501ae18b5 ("neigh: Send a notification when DELAY_PROBE_TIME changes")
+Signed-off-by: Marcus Huewe <suse-tux@gmx.de>
+Reviewed-by: Ido Schimmel <idosch@mellanox.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/neighbour.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/net/core/neighbour.c
++++ b/net/core/neighbour.c
+@@ -2927,7 +2927,8 @@ static void neigh_proc_update(struct ctl
+               return;
+       set_bit(index, p->data_state);
+-      call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p);
++      if (index == NEIGH_VAR_DELAY_PROBE_TIME)
++              call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p);
+       if (!dev) /* NULL dev means this is default value */
+               neigh_copy_dflt_parms(net, p, index);
+ }
diff --git a/queue-4.9/net-socket-fix-recvmmsg-not-returning-error-from-sock_error.patch b/queue-4.9/net-socket-fix-recvmmsg-not-returning-error-from-sock_error.patch
new file mode 100644 (file)
index 0000000..88ae276
--- /dev/null
@@ -0,0 +1,47 @@
+From foo@baz Thu Feb 23 21:13:05 CET 2017
+From: Maxime Jayat <maxime.jayat@mobile-devices.fr>
+Date: Tue, 21 Feb 2017 18:35:51 +0100
+Subject: net: socket: fix recvmmsg not returning error from sock_error
+
+From: Maxime Jayat <maxime.jayat@mobile-devices.fr>
+
+
+[ Upstream commit e623a9e9dec29ae811d11f83d0074ba254aba374 ]
+
+Commit 34b88a68f26a ("net: Fix use after free in the recvmmsg exit path"),
+changed the exit path of recvmmsg to always return the datagrams
+variable and modified the error paths to set the variable to the error
+code returned by recvmsg if necessary.
+
+However in the case sock_error returned an error, the error code was
+then ignored, and recvmmsg returned 0.
+
+Change the error path of recvmmsg to correctly return the error code
+of sock_error.
+
+The bug was triggered by using recvmmsg on a CAN interface which was
+not up. Linux 4.6 and later return 0 in this case while earlier
+releases returned -ENETDOWN.
+
+Fixes: 34b88a68f26a ("net: Fix use after free in the recvmmsg exit path")
+Signed-off-by: Maxime Jayat <maxime.jayat@mobile-devices.fr>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/socket.c |    4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/net/socket.c
++++ b/net/socket.c
+@@ -2197,8 +2197,10 @@ int __sys_recvmmsg(int fd, struct mmsghd
+               return err;
+       err = sock_error(sock->sk);
+-      if (err)
++      if (err) {
++              datagrams = err;
+               goto out_put;
++      }
+       entry = mmsg;
+       compat_entry = (struct compat_mmsghdr __user *)mmsg;
diff --git a/queue-4.9/packet-do-not-call-fanout_release-from-atomic-contexts.patch b/queue-4.9/packet-do-not-call-fanout_release-from-atomic-contexts.patch
new file mode 100644 (file)
index 0000000..19a1a94
--- /dev/null
@@ -0,0 +1,186 @@
+From foo@baz Thu Feb 23 21:13:05 CET 2017
+From: Anoob Soman <anoob.soman@citrix.com>
+Date: Wed, 15 Feb 2017 20:25:39 +0000
+Subject: packet: Do not call fanout_release from atomic contexts
+
+From: Anoob Soman <anoob.soman@citrix.com>
+
+
+[ Upstream commit 2bd624b4611ffee36422782d16e1c944d1351e98 ]
+
+Commit 6664498280cf ("packet: call fanout_release, while UNREGISTERING a
+netdev"), unfortunately, introduced the following issues.
+
+1. calling mutex_lock(&fanout_mutex) (fanout_release()) from inside
+rcu_read-side critical section. rcu_read_lock disables preemption, most often,
+which prohibits calling sleeping functions.
+
+[  ] include/linux/rcupdate.h:560 Illegal context switch in RCU read-side critical section!
+[  ]
+[  ] rcu_scheduler_active = 1, debug_locks = 0
+[  ] 4 locks held by ovs-vswitchd/1969:
+[  ]  #0:  (cb_lock){++++++}, at: [<ffffffff8158a6c9>] genl_rcv+0x19/0x40
+[  ]  #1:  (ovs_mutex){+.+.+.}, at: [<ffffffffa04878ca>] ovs_vport_cmd_del+0x4a/0x100 [openvswitch]
+[  ]  #2:  (rtnl_mutex){+.+.+.}, at: [<ffffffff81564157>] rtnl_lock+0x17/0x20
+[  ]  #3:  (rcu_read_lock){......}, at: [<ffffffff81614165>] packet_notifier+0x5/0x3f0
+[  ]
+[  ] Call Trace:
+[  ]  [<ffffffff813770c1>] dump_stack+0x85/0xc4
+[  ]  [<ffffffff810c9077>] lockdep_rcu_suspicious+0x107/0x110
+[  ]  [<ffffffff810a2da7>] ___might_sleep+0x57/0x210
+[  ]  [<ffffffff810a2fd0>] __might_sleep+0x70/0x90
+[  ]  [<ffffffff8162e80c>] mutex_lock_nested+0x3c/0x3a0
+[  ]  [<ffffffff810de93f>] ? vprintk_default+0x1f/0x30
+[  ]  [<ffffffff81186e88>] ? printk+0x4d/0x4f
+[  ]  [<ffffffff816106dd>] fanout_release+0x1d/0xe0
+[  ]  [<ffffffff81614459>] packet_notifier+0x2f9/0x3f0
+
+2. calling mutex_lock(&fanout_mutex) inside spin_lock(&po->bind_lock).
+"sleeping function called from invalid context"
+
+[  ] BUG: sleeping function called from invalid context at kernel/locking/mutex.c:620
+[  ] in_atomic(): 1, irqs_disabled(): 0, pid: 1969, name: ovs-vswitchd
+[  ] INFO: lockdep is turned off.
+[  ] Call Trace:
+[  ]  [<ffffffff813770c1>] dump_stack+0x85/0xc4
+[  ]  [<ffffffff810a2f52>] ___might_sleep+0x202/0x210
+[  ]  [<ffffffff810a2fd0>] __might_sleep+0x70/0x90
+[  ]  [<ffffffff8162e80c>] mutex_lock_nested+0x3c/0x3a0
+[  ]  [<ffffffff816106dd>] fanout_release+0x1d/0xe0
+[  ]  [<ffffffff81614459>] packet_notifier+0x2f9/0x3f0
+
+3. calling dev_remove_pack(&fanout->prot_hook), from inside
+spin_lock(&po->bind_lock) or rcu_read-side critical-section. dev_remove_pack()
+-> synchronize_net(), which might sleep.
+
+[  ] BUG: scheduling while atomic: ovs-vswitchd/1969/0x00000002
+[  ] INFO: lockdep is turned off.
+[  ] Call Trace:
+[  ]  [<ffffffff813770c1>] dump_stack+0x85/0xc4
+[  ]  [<ffffffff81186274>] __schedule_bug+0x64/0x73
+[  ]  [<ffffffff8162b8cb>] __schedule+0x6b/0xd10
+[  ]  [<ffffffff8162c5db>] schedule+0x6b/0x80
+[  ]  [<ffffffff81630b1d>] schedule_timeout+0x38d/0x410
+[  ]  [<ffffffff810ea3fd>] synchronize_sched_expedited+0x53d/0x810
+[  ]  [<ffffffff810ea6de>] synchronize_rcu_expedited+0xe/0x10
+[  ]  [<ffffffff8154eab5>] synchronize_net+0x35/0x50
+[  ]  [<ffffffff8154eae3>] dev_remove_pack+0x13/0x20
+[  ]  [<ffffffff8161077e>] fanout_release+0xbe/0xe0
+[  ]  [<ffffffff81614459>] packet_notifier+0x2f9/0x3f0
+
+4. fanout_release() races with calls from different CPU.
+
+To fix the above problems, remove the call to fanout_release() under
+rcu_read_lock(). Instead, call __dev_remove_pack(&fanout->prot_hook) and
+netdev_run_todo will be happy that &dev->ptype_specific list is empty. In order
+to achieve this, I moved dev_{add,remove}_pack() out of fanout_{add,release} to
+__fanout_{link,unlink}. So, call to {,__}unregister_prot_hook() will make sure
+fanout->prot_hook is removed as well.
+
+Fixes: 6664498280cf ("packet: call fanout_release, while UNREGISTERING a netdev")
+Reported-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: Anoob Soman <anoob.soman@citrix.com>
+Acked-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/packet/af_packet.c |   31 ++++++++++++++++++++++---------
+ 1 file changed, 22 insertions(+), 9 deletions(-)
+
+--- a/net/packet/af_packet.c
++++ b/net/packet/af_packet.c
+@@ -1497,6 +1497,8 @@ static void __fanout_link(struct sock *s
+       f->arr[f->num_members] = sk;
+       smp_wmb();
+       f->num_members++;
++      if (f->num_members == 1)
++              dev_add_pack(&f->prot_hook);
+       spin_unlock(&f->lock);
+ }
+@@ -1513,6 +1515,8 @@ static void __fanout_unlink(struct sock
+       BUG_ON(i >= f->num_members);
+       f->arr[i] = f->arr[f->num_members - 1];
+       f->num_members--;
++      if (f->num_members == 0)
++              __dev_remove_pack(&f->prot_hook);
+       spin_unlock(&f->lock);
+ }
+@@ -1693,7 +1697,6 @@ static int fanout_add(struct sock *sk, u
+               match->prot_hook.func = packet_rcv_fanout;
+               match->prot_hook.af_packet_priv = match;
+               match->prot_hook.id_match = match_fanout_group;
+-              dev_add_pack(&match->prot_hook);
+               list_add(&match->list, &fanout_list);
+       }
+       err = -EINVAL;
+@@ -1718,7 +1721,12 @@ out:
+       return err;
+ }
+-static void fanout_release(struct sock *sk)
++/* If pkt_sk(sk)->fanout->sk_ref is zero, this function removes
++ * pkt_sk(sk)->fanout from fanout_list and returns pkt_sk(sk)->fanout.
++ * It is the responsibility of the caller to call fanout_release_data() and
++ * free the returned packet_fanout (after synchronize_net())
++ */
++static struct packet_fanout *fanout_release(struct sock *sk)
+ {
+       struct packet_sock *po = pkt_sk(sk);
+       struct packet_fanout *f;
+@@ -1728,17 +1736,17 @@ static void fanout_release(struct sock *
+       if (f) {
+               po->fanout = NULL;
+-              if (atomic_dec_and_test(&f->sk_ref)) {
++              if (atomic_dec_and_test(&f->sk_ref))
+                       list_del(&f->list);
+-                      dev_remove_pack(&f->prot_hook);
+-                      fanout_release_data(f);
+-                      kfree(f);
+-              }
++              else
++                      f = NULL;
+               if (po->rollover)
+                       kfree_rcu(po->rollover, rcu);
+       }
+       mutex_unlock(&fanout_mutex);
++
++      return f;
+ }
+ static bool packet_extra_vlan_len_allowed(const struct net_device *dev,
+@@ -2970,6 +2978,7 @@ static int packet_release(struct socket
+ {
+       struct sock *sk = sock->sk;
+       struct packet_sock *po;
++      struct packet_fanout *f;
+       struct net *net;
+       union tpacket_req_u req_u;
+@@ -3009,9 +3018,14 @@ static int packet_release(struct socket
+               packet_set_ring(sk, &req_u, 1, 1);
+       }
+-      fanout_release(sk);
++      f = fanout_release(sk);
+       synchronize_net();
++
++      if (f) {
++              fanout_release_data(f);
++              kfree(f);
++      }
+       /*
+        *      Now the socket is dead. No more input will appear.
+        */
+@@ -3963,7 +3977,6 @@ static int packet_notifier(struct notifi
+                               }
+                               if (msg == NETDEV_UNREGISTER) {
+                                       packet_cached_dev_reset(po);
+-                                      fanout_release(sk);
+                                       po->ifindex = -1;
+                                       if (po->prot_hook.dev)
+                                               dev_put(po->prot_hook.dev);
diff --git a/queue-4.9/packet-fix-races-in-fanout_add.patch b/queue-4.9/packet-fix-races-in-fanout_add.patch
new file mode 100644 (file)
index 0000000..bc770db
--- /dev/null
@@ -0,0 +1,126 @@
+From foo@baz Thu Feb 23 21:13:05 CET 2017
+From: Eric Dumazet <edumazet@google.com>
+Date: Tue, 14 Feb 2017 09:03:51 -0800
+Subject: packet: fix races in fanout_add()
+
+From: Eric Dumazet <edumazet@google.com>
+
+
+[ Upstream commit d199fab63c11998a602205f7ee7ff7c05c97164b ]
+
+Multiple threads can call fanout_add() at the same time.
+
+We need to grab fanout_mutex earlier to avoid races that could
+lead to one thread freeing po->rollover that was set by another thread.
+
+Do the same in fanout_release(), for peace of mind, and to help us
+finding lockdep issues earlier.
+
+Fixes: dc99f600698d ("packet: Add fanout support.")
+Fixes: 0648ab70afe6 ("packet: rollover prepare: per-socket state")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: Willem de Bruijn <willemb@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/packet/af_packet.c |   53 ++++++++++++++++++++++++++-----------------------
+ 1 file changed, 29 insertions(+), 24 deletions(-)
+
+--- a/net/packet/af_packet.c
++++ b/net/packet/af_packet.c
+@@ -1619,6 +1619,7 @@ static void fanout_release_data(struct p
+ static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
+ {
++      struct packet_rollover *rollover = NULL;
+       struct packet_sock *po = pkt_sk(sk);
+       struct packet_fanout *f, *match;
+       u8 type = type_flags & 0xff;
+@@ -1641,23 +1642,28 @@ static int fanout_add(struct sock *sk, u
+               return -EINVAL;
+       }
++      mutex_lock(&fanout_mutex);
++
++      err = -EINVAL;
+       if (!po->running)
+-              return -EINVAL;
++              goto out;
++      err = -EALREADY;
+       if (po->fanout)
+-              return -EALREADY;
++              goto out;
+       if (type == PACKET_FANOUT_ROLLOVER ||
+           (type_flags & PACKET_FANOUT_FLAG_ROLLOVER)) {
+-              po->rollover = kzalloc(sizeof(*po->rollover), GFP_KERNEL);
+-              if (!po->rollover)
+-                      return -ENOMEM;
+-              atomic_long_set(&po->rollover->num, 0);
+-              atomic_long_set(&po->rollover->num_huge, 0);
+-              atomic_long_set(&po->rollover->num_failed, 0);
++              err = -ENOMEM;
++              rollover = kzalloc(sizeof(*rollover), GFP_KERNEL);
++              if (!rollover)
++                      goto out;
++              atomic_long_set(&rollover->num, 0);
++              atomic_long_set(&rollover->num_huge, 0);
++              atomic_long_set(&rollover->num_failed, 0);
++              po->rollover = rollover;
+       }
+-      mutex_lock(&fanout_mutex);
+       match = NULL;
+       list_for_each_entry(f, &fanout_list, list) {
+               if (f->id == id &&
+@@ -1704,11 +1710,11 @@ static int fanout_add(struct sock *sk, u
+               }
+       }
+ out:
+-      mutex_unlock(&fanout_mutex);
+-      if (err) {
+-              kfree(po->rollover);
++      if (err && rollover) {
++              kfree(rollover);
+               po->rollover = NULL;
+       }
++      mutex_unlock(&fanout_mutex);
+       return err;
+ }
+@@ -1717,23 +1723,22 @@ static void fanout_release(struct sock *
+       struct packet_sock *po = pkt_sk(sk);
+       struct packet_fanout *f;
++      mutex_lock(&fanout_mutex);
+       f = po->fanout;
+-      if (!f)
+-              return;
++      if (f) {
++              po->fanout = NULL;
+-      mutex_lock(&fanout_mutex);
+-      po->fanout = NULL;
++              if (atomic_dec_and_test(&f->sk_ref)) {
++                      list_del(&f->list);
++                      dev_remove_pack(&f->prot_hook);
++                      fanout_release_data(f);
++                      kfree(f);
++              }
+-      if (atomic_dec_and_test(&f->sk_ref)) {
+-              list_del(&f->list);
+-              dev_remove_pack(&f->prot_hook);
+-              fanout_release_data(f);
+-              kfree(f);
++              if (po->rollover)
++                      kfree_rcu(po->rollover, rcu);
+       }
+       mutex_unlock(&fanout_mutex);
+-
+-      if (po->rollover)
+-              kfree_rcu(po->rollover, rcu);
+ }
+ static bool packet_extra_vlan_len_allowed(const struct net_device *dev,
diff --git a/queue-4.9/ptr_ring-fix-race-conditions-when-resizing.patch b/queue-4.9/ptr_ring-fix-race-conditions-when-resizing.patch
new file mode 100644 (file)
index 0000000..50d4f7f
--- /dev/null
@@ -0,0 +1,135 @@
+From foo@baz Thu Feb 23 21:13:05 CET 2017
+From: "Michael S. Tsirkin" <mst@redhat.com>
+Date: Sun, 19 Feb 2017 07:17:17 +0200
+Subject: ptr_ring: fix race conditions when resizing
+
+From: "Michael S. Tsirkin" <mst@redhat.com>
+
+
+[ Upstream commit e71695307114335be1ed912f4a347396c2ed0e69 ]
+
+Resizing currently drops consumer lock.  This can cause entries to be
+reordered, which isn't good in itself.  More importantly, consumer can
+detect a false ring empty condition and block forever.
+
+Further, nesting of consumer within producer lock is problematic for
+tun, since it produces entries in a BH, which causes a lock order
+reversal:
+
+       CPU0                    CPU1
+       ----                    ----
+  consume:
+  lock(&(&r->consumer_lock)->rlock);
+                               resize:
+                               local_irq_disable();
+                               lock(&(&r->producer_lock)->rlock);
+                               lock(&(&r->consumer_lock)->rlock);
+  <Interrupt>
+  produce:
+  lock(&(&r->producer_lock)->rlock);
+
+To fix, nest producer lock within consumer lock during resize,
+and keep consumer lock during the whole swap operation.
+
+Reported-by: Dmitry Vyukov <dvyukov@google.com>
+Cc: stable@vger.kernel.org
+Cc: "David S. Miller" <davem@davemloft.net>
+Acked-by: Jason Wang <jasowang@redhat.com>
+Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/ptr_ring.h |   36 +++++++++++++++++++++++++++++++-----
+ 1 file changed, 31 insertions(+), 5 deletions(-)
+
+--- a/include/linux/ptr_ring.h
++++ b/include/linux/ptr_ring.h
+@@ -111,6 +111,11 @@ static inline int __ptr_ring_produce(str
+       return 0;
+ }
++/*
++ * Note: resize (below) nests producer lock within consumer lock, so if you
++ * consume in interrupt or BH context, you must disable interrupts/BH when
++ * calling this.
++ */
+ static inline int ptr_ring_produce(struct ptr_ring *r, void *ptr)
+ {
+       int ret;
+@@ -242,6 +247,11 @@ static inline void *__ptr_ring_consume(s
+       return ptr;
+ }
++/*
++ * Note: resize (below) nests producer lock within consumer lock, so if you
++ * call this in interrupt or BH context, you must disable interrupts/BH when
++ * producing.
++ */
+ static inline void *ptr_ring_consume(struct ptr_ring *r)
+ {
+       void *ptr;
+@@ -357,7 +367,7 @@ static inline void **__ptr_ring_swap_que
+       void **old;
+       void *ptr;
+-      while ((ptr = ptr_ring_consume(r)))
++      while ((ptr = __ptr_ring_consume(r)))
+               if (producer < size)
+                       queue[producer++] = ptr;
+               else if (destroy)
+@@ -372,6 +382,12 @@ static inline void **__ptr_ring_swap_que
+       return old;
+ }
++/*
++ * Note: producer lock is nested within consumer lock, so if you
++ * resize you must make sure all uses nest correctly.
++ * In particular if you consume ring in interrupt or BH context, you must
++ * disable interrupts/BH when doing so.
++ */
+ static inline int ptr_ring_resize(struct ptr_ring *r, int size, gfp_t gfp,
+                                 void (*destroy)(void *))
+ {
+@@ -382,17 +398,25 @@ static inline int ptr_ring_resize(struct
+       if (!queue)
+               return -ENOMEM;
+-      spin_lock_irqsave(&(r)->producer_lock, flags);
++      spin_lock_irqsave(&(r)->consumer_lock, flags);
++      spin_lock(&(r)->producer_lock);
+       old = __ptr_ring_swap_queue(r, queue, size, gfp, destroy);
+-      spin_unlock_irqrestore(&(r)->producer_lock, flags);
++      spin_unlock(&(r)->producer_lock);
++      spin_unlock_irqrestore(&(r)->consumer_lock, flags);
+       kfree(old);
+       return 0;
+ }
++/*
++ * Note: producer lock is nested within consumer lock, so if you
++ * resize you must make sure all uses nest correctly.
++ * In particular if you consume ring in interrupt or BH context, you must
++ * disable interrupts/BH when doing so.
++ */
+ static inline int ptr_ring_resize_multiple(struct ptr_ring **rings, int nrings,
+                                          int size,
+                                          gfp_t gfp, void (*destroy)(void *))
+@@ -412,10 +436,12 @@ static inline int ptr_ring_resize_multip
+       }
+       for (i = 0; i < nrings; ++i) {
+-              spin_lock_irqsave(&(rings[i])->producer_lock, flags);
++              spin_lock_irqsave(&(rings[i])->consumer_lock, flags);
++              spin_lock(&(rings[i])->producer_lock);
+               queues[i] = __ptr_ring_swap_queue(rings[i], queues[i],
+                                                 size, gfp, destroy);
+-              spin_unlock_irqrestore(&(rings[i])->producer_lock, flags);
++              spin_unlock(&(rings[i])->producer_lock);
++              spin_unlock_irqrestore(&(rings[i])->consumer_lock, flags);
+       }
+       for (i = 0; i < nrings; ++i)
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..9cd107a157dc50746eea15048c44019711c5ce86 100644 (file)
@@ -0,0 +1,14 @@
+kcm-fix-0-length-case-for-kcm_sendmsg.patch
+kcm-fix-a-null-pointer-dereference-in-kcm_sendmsg.patch
+net-mlx5e-disable-preemption-when-doing-tc-statistics-upcall.patch
+net-llc-avoid-bug_on-in-skb_orphan.patch
+net-ethernet-ti-cpsw-fix-cpsw-assignment-in-resume.patch
+packet-fix-races-in-fanout_add.patch
+packet-do-not-call-fanout_release-from-atomic-contexts.patch
+net-neigh-fix-netevent-netevent_delay_probe_time_update-notification.patch
+dccp-fix-freeing-skb-too-early-for-ipv6_recvpktinfo.patch
+vxlan-fix-oops-in-dev_fill_metadata_dst.patch
+irda-fix-lockdep-annotations-in-hashbin_delete.patch
+ptr_ring-fix-race-conditions-when-resizing.patch
+ip-fix-ip_checksum-handling.patch
+net-socket-fix-recvmmsg-not-returning-error-from-sock_error.patch
diff --git a/queue-4.9/vxlan-fix-oops-in-dev_fill_metadata_dst.patch b/queue-4.9/vxlan-fix-oops-in-dev_fill_metadata_dst.patch
new file mode 100644 (file)
index 0000000..99ce1da
--- /dev/null
@@ -0,0 +1,63 @@
+From foo@baz Thu Feb 23 21:13:05 CET 2017
+From: Paolo Abeni <pabeni@redhat.com>
+Date: Fri, 17 Feb 2017 19:14:27 +0100
+Subject: vxlan: fix oops in dev_fill_metadata_dst
+
+From: Paolo Abeni <pabeni@redhat.com>
+
+
+[ Upstream commit 22f0708a718daea5e79de2d29b4829de016a4ff4 ]
+
+Since the commit 0c1d70af924b ("net: use dst_cache for vxlan device")
+vxlan_fill_metadata_dst() calls vxlan_get_route() passing a NULL
+dst_cache pointer, so the latter should explicitly check for
+valid dst_cache ptr. Unfortunately the commit d71785ffc7e7 ("net: add
+dst_cache to ovs vxlan lwtunnel") removed said check.
+
+As a result is possible to trigger a null pointer access calling
+vxlan_fill_metadata_dst(), e.g. with:
+
+ovs-vsctl add-br ovs-br0
+ovs-vsctl add-port ovs-br0 vxlan0 -- set interface vxlan0 \
+       type=vxlan options:remote_ip=192.168.1.1 \
+       options:key=1234 options:dst_port=4789 ofport_request=10
+ip address add dev ovs-br0 172.16.1.2/24
+ovs-vsctl set Bridge ovs-br0 ipfix=@i -- --id=@i create IPFIX \
+       targets=\"172.16.1.1:1234\" sampling=1
+iperf -c 172.16.1.1 -u -l 1000 -b 10M -t 1 -p 1234
+
+This commit addresses the issue passing to vxlan_get_route() the
+dst_cache already available into the lwt info processed by
+vxlan_fill_metadata_dst().
+
+Fixes: d71785ffc7e7 ("net: add dst_cache to ovs vxlan lwtunnel")
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Acked-by: Jiri Benc <jbenc@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/vxlan.c |    6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/vxlan.c
++++ b/drivers/net/vxlan.c
+@@ -2449,7 +2449,8 @@ static int vxlan_fill_metadata_dst(struc
+                       return -EINVAL;
+               rt = vxlan_get_route(vxlan, skb, 0, info->key.tos,
+                                    info->key.u.ipv4.dst,
+-                                   &info->key.u.ipv4.src, NULL, info);
++                                   &info->key.u.ipv4.src,
++                                   &info->dst_cache, info);
+               if (IS_ERR(rt))
+                       return PTR_ERR(rt);
+               ip_rt_put(rt);
+@@ -2459,7 +2460,8 @@ static int vxlan_fill_metadata_dst(struc
+               ndst = vxlan6_get_route(vxlan, skb, 0, info->key.tos,
+                                       info->key.label, &info->key.u.ipv6.dst,
+-                                      &info->key.u.ipv6.src, NULL, info);
++                                      &info->key.u.ipv6.src,
++                                      &info->dst_cache, info);
+               if (IS_ERR(ndst))
+                       return PTR_ERR(ndst);
+               dst_release(ndst);