4.1-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Fri, 23 Oct 2015 01:29:17 +0000 (18:29 -0700)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Fri, 23 Oct 2015 01:29:17 +0000 (18:29 -0700)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 23 Oct 2015 01:29:17 +0000 (18:29 -0700)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 23 Oct 2015 01:29:17 +0000 (18:29 -0700)
diff --git a/queue-4.1/act_mirred-clear-sender-cpu-before-sending-to-tx.patch b/queue-4.1/act_mirred-clear-sender-cpu-before-sending-to-tx.patch

new file mode 100644 (file)

index 0000000..097a156
--- /dev/null
+++ b/queue-4.1/act_mirred-clear-sender-cpu-before-sending-to-tx.patch
@@ -0,0 +1,35 @@
+From foo@baz Thu Oct 22 17:25:37 PDT 2015
+From: WANG Cong <xiyou.wangcong@gmail.com>
+Date: Tue, 6 Oct 2015 17:23:47 -0700
+Subject: act_mirred: clear sender cpu before sending to tx
+
+From: WANG Cong <xiyou.wangcong@gmail.com>
+
+[ Upstream commit d40496a56430eac0d330378816954619899fe303 ]
+
+Similar to commit c29390c6dfee ("xps: must clear sender_cpu before forwarding")
+the skb->sender_cpu needs to be cleared when moving from Rx
+Tx, otherwise kernel could crash.
+
+Fixes: 2bd82484bb4c ("xps: fix xps for stacked devices")
+Cc: Eric Dumazet <edumazet@google.com>
+Cc: Jamal Hadi Salim <jhs@mojatatu.com>
+Signed-off-by: Cong Wang <cwang@twopensource.com>
+Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
+Acked-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sched/act_mirred.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/net/sched/act_mirred.c
++++ b/net/sched/act_mirred.c
+@@ -166,6 +166,7 @@ static int tcf_mirred(struct sk_buff *sk
+ 
+       skb2->skb_iif = skb->dev->ifindex;
+       skb2->dev = dev;
++      skb_sender_cpu_clear(skb2);
+       err = dev_queue_xmit(skb2);
+ 
+ out:
diff --git a/queue-4.1/af_unix-convert-the-unix_sk-macro-to-an-inline-function-for-type-safety.patch b/queue-4.1/af_unix-convert-the-unix_sk-macro-to-an-inline-function-for-type-safety.patch

new file mode 100644 (file)

index 0000000..1770eb8
--- /dev/null
+++ b/queue-4.1/af_unix-convert-the-unix_sk-macro-to-an-inline-function-for-type-safety.patch
@@ -0,0 +1,35 @@
+From foo@baz Thu Oct 22 17:25:37 PDT 2015
+From: Aaron Conole <aconole@bytheb.org>
+Date: Sat, 26 Sep 2015 18:50:42 -0400
+Subject: af_unix: Convert the unix_sk macro to an inline function for type safety
+
+From: Aaron Conole <aconole@bytheb.org>
+
+[ Upstream commit 4613012db1d911f80897f9446a49de817b2c4c47 ]
+
+As suggested by Eric Dumazet this change replaces the
+#define with a static inline function to enjoy
+complaints by the compiler when misusing the API.
+
+Signed-off-by: Aaron Conole <aconole@bytheb.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/af_unix.h |    6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+--- a/include/net/af_unix.h
++++ b/include/net/af_unix.h
+@@ -64,7 +64,11 @@ struct unix_sock {
+ #define UNIX_GC_MAYBE_CYCLE   1
+       struct socket_wq        peer_wq;
+ };
+-#define unix_sk(__sk) ((struct unix_sock *)__sk)
++
++static inline struct unix_sock *unix_sk(struct sock *sk)
++{
++      return (struct unix_sock *)sk;
++}
+ 
+ #define peer_wait peer_wq.wait
+ 
diff --git a/queue-4.1/af_unix-return-data-from-multiple-skbs-on-recv-with-msg_peek-flag.patch b/queue-4.1/af_unix-return-data-from-multiple-skbs-on-recv-with-msg_peek-flag.patch

new file mode 100644 (file)

index 0000000..ba836da
--- /dev/null
+++ b/queue-4.1/af_unix-return-data-from-multiple-skbs-on-recv-with-msg_peek-flag.patch
@@ -0,0 +1,59 @@
+From foo@baz Thu Oct 22 17:25:37 PDT 2015
+From: Aaron Conole <aconole@bytheb.org>
+Date: Sat, 26 Sep 2015 18:50:43 -0400
+Subject: af_unix: return data from multiple SKBs on recv() with MSG_PEEK flag
+
+From: Aaron Conole <aconole@bytheb.org>
+
+[ Upstream commit 9f389e35674f5b086edd70ed524ca0f287259725 ]
+
+AF_UNIX sockets now return multiple skbs from recv() when MSG_PEEK flag
+is set.
+
+This is referenced in kernel bugzilla #12323 @
+https://bugzilla.kernel.org/show_bug.cgi?id=12323
+
+As described both in the BZ and lkml thread @
+http://lkml.org/lkml/2008/1/8/444 calling recv() with MSG_PEEK on an
+AF_UNIX socket only reads a single skb, where the desired effect is
+to return as much skb data has been queued, until hitting the recv
+buffer size (whichever comes first).
+
+The modified MSG_PEEK path will now move to the next skb in the tree
+and jump to the again: label, rather than following the natural loop
+structure. This requires duplicating some of the loop head actions.
+
+This was tested using the python socketpair python code attached to
+the bugzilla issue.
+
+Signed-off-by: Aaron Conole <aconole@bytheb.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/unix/af_unix.c |   14 +++++++++++++-
+ 1 file changed, 13 insertions(+), 1 deletion(-)
+
+--- a/net/unix/af_unix.c
++++ b/net/unix/af_unix.c
+@@ -2046,8 +2046,20 @@ again:
+                       if (UNIXCB(skb).fp)
+                               scm.fp = scm_fp_dup(UNIXCB(skb).fp);
+ 
+-                      sk_peek_offset_fwd(sk, chunk);
++                      if (skip) {
++                              sk_peek_offset_fwd(sk, chunk);
++                              skip -= chunk;
++                      }
+ 
++                      if (UNIXCB(skb).fp)
++                              break;
++
++                      last = skb;
++                      unix_state_lock(sk);
++                      skb = skb_peek_next(skb, &sk->sk_receive_queue);
++                      if (skb)
++                              goto again;
++                      unix_state_unlock(sk);
+                       break;
+               }
+       } while (size);
diff --git a/queue-4.1/bpf-fix-panic-in-so_get_filter-with-native-ebpf-programs.patch b/queue-4.1/bpf-fix-panic-in-so_get_filter-with-native-ebpf-programs.patch

new file mode 100644 (file)

index 0000000..9d3b5e7
--- /dev/null
+++ b/queue-4.1/bpf-fix-panic-in-so_get_filter-with-native-ebpf-programs.patch
@@ -0,0 +1,65 @@
+From foo@baz Thu Oct 22 17:25:37 PDT 2015
+From: Daniel Borkmann <daniel@iogearbox.net>
+Date: Fri, 2 Oct 2015 12:06:03 +0200
+Subject: bpf: fix panic in SO_GET_FILTER with native ebpf programs
+
+From: Daniel Borkmann <daniel@iogearbox.net>
+
+[ Upstream commit 93d08b6966cf730ea669d4d98f43627597077153 ]
+
+When sockets have a native eBPF program attached through
+setsockopt(sk, SOL_SOCKET, SO_ATTACH_BPF, ...), and then try to
+dump these over getsockopt(sk, SOL_SOCKET, SO_GET_FILTER, ...),
+the following panic appears:
+
+  [49904.178642] BUG: unable to handle kernel NULL pointer dereference at (null)
+  [49904.178762] IP: [<ffffffff81610fd9>] sk_get_filter+0x39/0x90
+  [49904.182000] PGD 86fc9067 PUD 531a1067 PMD 0
+  [49904.185196] Oops: 0000 [#1] SMP
+  [...]
+  [49904.224677] Call Trace:
+  [49904.226090]  [<ffffffff815e3d49>] sock_getsockopt+0x319/0x740
+  [49904.227535]  [<ffffffff812f59e3>] ? sock_has_perm+0x63/0x70
+  [49904.228953]  [<ffffffff815e2fc8>] ? release_sock+0x108/0x150
+  [49904.230380]  [<ffffffff812f5a43>] ? selinux_socket_getsockopt+0x23/0x30
+  [49904.231788]  [<ffffffff815dff36>] SyS_getsockopt+0xa6/0xc0
+  [49904.233267]  [<ffffffff8171b9ae>] entry_SYSCALL_64_fastpath+0x12/0x71
+
+The underlying issue is the very same as in commit b382c0865600
+("sock, diag: fix panic in sock_diag_put_filterinfo"), that is,
+native eBPF programs don't store an original program since this
+is only needed in cBPF ones.
+
+However, sk_get_filter() wasn't updated to test for this at the
+time when eBPF could be attached. Just throw an error to the user
+to indicate that eBPF cannot be dumped over this interface.
+That way, it can also be known that a program _is_ attached (as
+opposed to just return 0), and a different (future) method needs
+to be consulted for a dump.
+
+Fixes: 89aa075832b0 ("net: sock: allow eBPF programs to be attached to sockets")
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Acked-by: Alexei Starovoitov <ast@plumgrid.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/filter.c |    6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+--- a/net/core/filter.c
++++ b/net/core/filter.c
+@@ -1526,9 +1526,13 @@ int sk_get_filter(struct sock *sk, struc
+               goto out;
+ 
+       /* We're copying the filter that has been originally attached,
+-       * so no conversion/decode needed anymore.
++       * so no conversion/decode needed anymore. eBPF programs that
++       * have no original program cannot be dumped through this.
+        */
++      ret = -EACCES;
+       fprog = filter->prog->orig_prog;
++      if (!fprog)
++              goto out;
+ 
+       ret = fprog->len;
+       if (!len)
diff --git a/queue-4.1/ethtool-use-kcalloc-instead-of-kmalloc-for-ethtool_get_strings.patch b/queue-4.1/ethtool-use-kcalloc-instead-of-kmalloc-for-ethtool_get_strings.patch

new file mode 100644 (file)

index 0000000..b3e2504
--- /dev/null
+++ b/queue-4.1/ethtool-use-kcalloc-instead-of-kmalloc-for-ethtool_get_strings.patch
@@ -0,0 +1,33 @@
+From foo@baz Thu Oct 22 17:25:37 PDT 2015
+From: Joe Perches <joe@perches.com>
+Date: Wed, 14 Oct 2015 01:09:40 -0700
+Subject: ethtool: Use kcalloc instead of kmalloc for ethtool_get_strings
+
+From: Joe Perches <joe@perches.com>
+
+[ Upstream commit 077cb37fcf6f00a45f375161200b5ee0cd4e937b ]
+
+It seems that kernel memory can leak into userspace by a
+kmalloc, ethtool_get_strings, then copy_to_user sequence.
+
+Avoid this by using kcalloc to zero fill the copied buffer.
+
+Signed-off-by: Joe Perches <joe@perches.com>
+Acked-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/ethtool.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/core/ethtool.c
++++ b/net/core/ethtool.c
+@@ -1273,7 +1273,7 @@ static int ethtool_get_strings(struct ne
+ 
+       gstrings.len = ret;
+ 
+-      data = kmalloc(gstrings.len * ETH_GSTRING_LEN, GFP_USER);
++      data = kcalloc(gstrings.len, ETH_GSTRING_LEN, GFP_USER);
+       if (!data)
+               return -ENOMEM;
+ 
diff --git a/queue-4.1/inet-fix-race-in-reqsk_queue_unlink.patch b/queue-4.1/inet-fix-race-in-reqsk_queue_unlink.patch

new file mode 100644 (file)

index 0000000..d973a75
--- /dev/null
+++ b/queue-4.1/inet-fix-race-in-reqsk_queue_unlink.patch
@@ -0,0 +1,64 @@
+From foo@baz Thu Oct 22 17:25:37 PDT 2015
+From: Eric Dumazet <edumazet@google.com>
+Date: Thu, 1 Oct 2015 05:39:26 -0700
+Subject: inet: fix race in reqsk_queue_unlink()
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 2306c704ce280c97a60d1f45333b822b40281dea ]
+
+reqsk_timer_handler() tests if icsk_accept_queue.listen_opt
+is NULL at its beginning.
+
+By the time it calls inet_csk_reqsk_queue_drop() and
+reqsk_queue_unlink(), listener might have been closed and
+inet_csk_listen_stop() had called reqsk_queue_yank_acceptq()
+which sets icsk_accept_queue.listen_opt to NULL
+
+We therefore need to correctly check listen_opt being NULL
+after holding syn_wait_lock for proper synchronization.
+
+Fixes: fa76ce7328b2 ("inet: get rid of central tcp/dccp listener timer")
+Fixes: b357a364c57c ("inet: fix possible panic in reqsk_queue_unlink()")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: Yuchung Cheng <ycheng@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/inet_connection_sock.c |   19 ++++++++++---------
+ 1 file changed, 10 insertions(+), 9 deletions(-)
+
+--- a/net/ipv4/inet_connection_sock.c
++++ b/net/ipv4/inet_connection_sock.c
+@@ -568,21 +568,22 @@ EXPORT_SYMBOL(inet_rtx_syn_ack);
+ static bool reqsk_queue_unlink(struct request_sock_queue *queue,
+                              struct request_sock *req)
+ {
+-      struct listen_sock *lopt = queue->listen_opt;
+       struct request_sock **prev;
++      struct listen_sock *lopt;
+       bool found = false;
+ 
+       spin_lock(&queue->syn_wait_lock);
+-
+-      for (prev = &lopt->syn_table[req->rsk_hash]; *prev != NULL;
+-           prev = &(*prev)->dl_next) {
+-              if (*prev == req) {
+-                      *prev = req->dl_next;
+-                      found = true;
+-                      break;
++      lopt = queue->listen_opt;
++      if (lopt) {
++              for (prev = &lopt->syn_table[req->rsk_hash]; *prev != NULL;
++                   prev = &(*prev)->dl_next) {
++                      if (*prev == req) {
++                              *prev = req->dl_next;
++                              found = true;
++                              break;
++                      }
+               }
+       }
+-
+       spin_unlock(&queue->syn_wait_lock);
+       if (timer_pending(&req->rsk_timer) && del_timer_sync(&req->rsk_timer))
+               reqsk_put(req);
diff --git a/queue-4.1/inet-fix-races-in-reqsk_queue_hash_req.patch b/queue-4.1/inet-fix-races-in-reqsk_queue_hash_req.patch

new file mode 100644 (file)

index 0000000..1e7714f
--- /dev/null
+++ b/queue-4.1/inet-fix-races-in-reqsk_queue_hash_req.patch
@@ -0,0 +1,52 @@
+From foo@baz Thu Oct 22 17:25:37 PDT 2015
+From: Eric Dumazet <edumazet@google.com>
+Date: Sat, 19 Sep 2015 09:48:04 -0700
+Subject: inet: fix races in reqsk_queue_hash_req()
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 29c6852602e259d2c1882f320b29d5c3fec0de04 ]
+
+Before allowing lockless LISTEN processing, we need to make
+sure to arm the SYN_RECV timer before the req socket is visible
+in hash tables.
+
+Also, req->rsk_hash should be written before we set rsk_refcnt
+to a non zero value.
+
+Fixes: fa76ce7328b2 ("inet: get rid of central tcp/dccp listener timer")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: Ying Cai <ycai@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/inet_connection_sock.c |    8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+--- a/net/ipv4/inet_connection_sock.c
++++ b/net/ipv4/inet_connection_sock.c
+@@ -676,20 +676,20 @@ void reqsk_queue_hash_req(struct request
+       req->num_timeout = 0;
+       req->sk = NULL;
+ 
++      setup_timer(&req->rsk_timer, reqsk_timer_handler, (unsigned long)req);
++      mod_timer_pinned(&req->rsk_timer, jiffies + timeout);
++      req->rsk_hash = hash;
++
+       /* before letting lookups find us, make sure all req fields
+        * are committed to memory and refcnt initialized.
+        */
+       smp_wmb();
+       atomic_set(&req->rsk_refcnt, 2);
+-      setup_timer(&req->rsk_timer, reqsk_timer_handler, (unsigned long)req);
+-      req->rsk_hash = hash;
+ 
+       spin_lock(&queue->syn_wait_lock);
+       req->dl_next = lopt->syn_table[hash];
+       lopt->syn_table[hash] = req;
+       spin_unlock(&queue->syn_wait_lock);
+-
+-      mod_timer_pinned(&req->rsk_timer, jiffies + timeout);
+ }
+ EXPORT_SYMBOL(reqsk_queue_hash_req);
+ 
diff --git a/queue-4.1/l2tp-protect-tunnel-del_work-by-ref_count.patch b/queue-4.1/l2tp-protect-tunnel-del_work-by-ref_count.patch

new file mode 100644 (file)

index 0000000..6b32950
--- /dev/null
+++ b/queue-4.1/l2tp-protect-tunnel-del_work-by-ref_count.patch
@@ -0,0 +1,55 @@
+From foo@baz Thu Oct 22 17:25:37 PDT 2015
+From: Alexander Couzens <lynxis@fe80.eu>
+Date: Mon, 28 Sep 2015 11:32:42 +0200
+Subject: l2tp: protect tunnel->del_work by ref_count
+
+From: Alexander Couzens <lynxis@fe80.eu>
+
+[ Upstream commit 06a15f51cf3618e32a73871ee6a547ef7fd902b5 ]
+
+There is a small chance that tunnel_free() is called before tunnel->del_work scheduled
+resulting in a zero pointer dereference.
+
+Signed-off-by: Alexander Couzens <lynxis@fe80.eu>
+Acked-by: James Chapman <jchapman@katalix.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/l2tp/l2tp_core.c |   11 +++++++++--
+ 1 file changed, 9 insertions(+), 2 deletions(-)
+
+--- a/net/l2tp/l2tp_core.c
++++ b/net/l2tp/l2tp_core.c
+@@ -1319,7 +1319,7 @@ static void l2tp_tunnel_del_work(struct
+       tunnel = container_of(work, struct l2tp_tunnel, del_work);
+       sk = l2tp_tunnel_sock_lookup(tunnel);
+       if (!sk)
+-              return;
++              goto out;
+ 
+       sock = sk->sk_socket;
+ 
+@@ -1340,6 +1340,8 @@ static void l2tp_tunnel_del_work(struct
+       }
+ 
+       l2tp_tunnel_sock_put(sk);
++out:
++      l2tp_tunnel_dec_refcount(tunnel);
+ }
+ 
+ /* Create a socket for the tunnel, if one isn't set up by
+@@ -1639,8 +1641,13 @@ EXPORT_SYMBOL_GPL(l2tp_tunnel_create);
+  */
+ int l2tp_tunnel_delete(struct l2tp_tunnel *tunnel)
+ {
++      l2tp_tunnel_inc_refcount(tunnel);
+       l2tp_tunnel_closeall(tunnel);
+-      return (false == queue_work(l2tp_wq, &tunnel->del_work));
++      if (false == queue_work(l2tp_wq, &tunnel->del_work)) {
++              l2tp_tunnel_dec_refcount(tunnel);
++              return 1;
++      }
++      return 0;
+ }
+ EXPORT_SYMBOL_GPL(l2tp_tunnel_delete);
+ 
diff --git a/queue-4.1/net-add-pfmemalloc-check-in-sk_add_backlog.patch b/queue-4.1/net-add-pfmemalloc-check-in-sk_add_backlog.patch

new file mode 100644 (file)

index 0000000..6eb4395
--- /dev/null
+++ b/queue-4.1/net-add-pfmemalloc-check-in-sk_add_backlog.patch
@@ -0,0 +1,49 @@
+From foo@baz Thu Oct 22 17:25:37 PDT 2015
+From: Eric Dumazet <edumazet@google.com>
+Date: Tue, 29 Sep 2015 18:52:25 -0700
+Subject: net: add pfmemalloc check in sk_add_backlog()
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit c7c49b8fde26b74277188bdc6c9dca38db6fa35b ]
+
+Greg reported crashes hitting the following check in __sk_backlog_rcv()
+
+       BUG_ON(!sock_flag(sk, SOCK_MEMALLOC));
+
+The pfmemalloc bit is currently checked in sk_filter().
+
+This works correctly for TCP, because sk_filter() is ran in
+tcp_v[46]_rcv() before hitting the prequeue or backlog checks.
+
+For UDP or other protocols, this does not work, because the sk_filter()
+is ran from sock_queue_rcv_skb(), which might be called _after_ backlog
+queuing if socket is owned by user by the time packet is processed by
+softirq handler.
+
+Fixes: b4b9e35585089 ("netvm: set PF_MEMALLOC as appropriate during SKB processing")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: Greg Thelen <gthelen@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/sock.h |    8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+--- a/include/net/sock.h
++++ b/include/net/sock.h
+@@ -826,6 +826,14 @@ static inline __must_check int sk_add_ba
+       if (sk_rcvqueues_full(sk, limit))
+               return -ENOBUFS;
+ 
++      /*
++       * If the skb was allocated from pfmemalloc reserves, only
++       * allow SOCK_MEMALLOC sockets to use it as this socket is
++       * helping free memory
++       */
++      if (skb_pfmemalloc(skb) && !sock_flag(sk, SOCK_MEMALLOC))
++              return -ENOMEM;
++
+       __sk_add_backlog(sk, skb);
+       sk->sk_backlog.len += skb->truesize;
+       return 0;
diff --git a/queue-4.1/net-ibm-emac-bump-version-numbers-for-correct-work-with-ethtool.patch b/queue-4.1/net-ibm-emac-bump-version-numbers-for-correct-work-with-ethtool.patch

new file mode 100644 (file)

index 0000000..dcd8d90
--- /dev/null
+++ b/queue-4.1/net-ibm-emac-bump-version-numbers-for-correct-work-with-ethtool.patch
@@ -0,0 +1,40 @@
+From foo@baz Thu Oct 22 17:25:37 PDT 2015
+From: Ivan Mikhaylov <ivan@ru.ibm.com>
+Date: Fri, 25 Sep 2015 11:52:27 +0400
+Subject: net/ibm/emac: bump version numbers for correct work with ethtool
+
+From: Ivan Mikhaylov <ivan@ru.ibm.com>
+
+[ Upstream commit 661dfc65f7981481ba2e31aaa702371e82336e56 ]
+
+The size of the MAC register dump used to be the size specified by the
+reg property in the device tree.  Userland has no good way of finding
+out that size, and it was not specified consistently for each MAC type,
+so ethtool would end up printing junk at the end of the register dump
+if the device tree didn't match the size it assumed.
+
+Using the new version numbers indicates unambiguously that the size of
+the MAC register dump is dependent only on the MAC type.
+
+Fixes: 5369c71f7ca2 ("net/ibm/emac: fix size of emac dump memory areas")
+Signed-off-by: Ivan Mikhaylov <ivan@ru.ibm.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/ibm/emac/core.h |    6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/drivers/net/ethernet/ibm/emac/core.h
++++ b/drivers/net/ethernet/ibm/emac/core.h
+@@ -460,8 +460,8 @@ struct emac_ethtool_regs_subhdr {
+       u32 index;
+ };
+ 
+-#define EMAC_ETHTOOL_REGS_VER         0
+-#define EMAC4_ETHTOOL_REGS_VER                1
+-#define EMAC4SYNC_ETHTOOL_REGS_VER    2
++#define EMAC_ETHTOOL_REGS_VER         3
++#define EMAC4_ETHTOOL_REGS_VER                4
++#define EMAC4SYNC_ETHTOOL_REGS_VER    5
+ 
+ #endif /* __IBM_NEWEMAC_CORE_H */
diff --git a/queue-4.1/net-unix-fix-logic-about-sk_peek_offset.patch b/queue-4.1/net-unix-fix-logic-about-sk_peek_offset.patch

new file mode 100644 (file)

index 0000000..3871d45
--- /dev/null
+++ b/queue-4.1/net-unix-fix-logic-about-sk_peek_offset.patch
@@ -0,0 +1,67 @@
+From foo@baz Thu Oct 22 17:25:37 PDT 2015
+From: Andrey Vagin <avagin@openvz.org>
+Date: Fri, 2 Oct 2015 00:05:36 +0300
+Subject: net/unix: fix logic about sk_peek_offset
+
+From: Andrey Vagin <avagin@openvz.org>
+
+[ Upstream commit e9193d60d363e4dff75ff6d43a48f22be26d59c7 ]
+
+Now send with MSG_PEEK can return data from multiple SKBs.
+
+Unfortunately we take into account the peek offset for each skb,
+that is wrong. We need to apply the peek offset only once.
+
+In addition, the peek offset should be used only if MSG_PEEK is set.
+
+Cc: "David S. Miller" <davem@davemloft.net> (maintainer:NETWORKING
+Cc: Eric Dumazet <edumazet@google.com> (commit_signer:1/14=7%)
+Cc: Aaron Conole <aconole@bytheb.org>
+Fixes: 9f389e35674f ("af_unix: return data from multiple SKBs on recv() with MSG_PEEK flag")
+Signed-off-by: Andrey Vagin <avagin@openvz.org>
+Tested-by: Aaron Conole <aconole@bytheb.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/unix/af_unix.c |   12 +++++++-----
+ 1 file changed, 7 insertions(+), 5 deletions(-)
+
+--- a/net/unix/af_unix.c
++++ b/net/unix/af_unix.c
+@@ -1938,6 +1938,11 @@ static int unix_stream_recvmsg(struct so
+               goto out;
+       }
+ 
++      if (flags & MSG_PEEK)
++              skip = sk_peek_offset(sk, flags);
++      else
++              skip = 0;
++
+       do {
+               int chunk;
+               struct sk_buff *skb, *last;
+@@ -1984,7 +1989,6 @@ again:
+                       break;
+               }
+ 
+-              skip = sk_peek_offset(sk, flags);
+               while (skip >= unix_skb_len(skb)) {
+                       skip -= unix_skb_len(skb);
+                       last = skb;
+@@ -2046,14 +2050,12 @@ again:
+                       if (UNIXCB(skb).fp)
+                               scm.fp = scm_fp_dup(UNIXCB(skb).fp);
+ 
+-                      if (skip) {
+-                              sk_peek_offset_fwd(sk, chunk);
+-                              skip -= chunk;
+-                      }
++                      sk_peek_offset_fwd(sk, chunk);
+ 
+                       if (UNIXCB(skb).fp)
+                               break;
+ 
++                      skip = 0;
+                       last = skb;
+                       unix_state_lock(sk);
+                       skb = skb_peek_next(skb, &sk->sk_receive_queue);
diff --git a/queue-4.1/netlink-trim-skb-to-alloc-size-to-avoid-msg_trunc.patch b/queue-4.1/netlink-trim-skb-to-alloc-size-to-avoid-msg_trunc.patch

new file mode 100644 (file)

index 0000000..3c074dd
--- /dev/null
+++ b/queue-4.1/netlink-trim-skb-to-alloc-size-to-avoid-msg_trunc.patch
@@ -0,0 +1,107 @@
+From foo@baz Thu Oct 22 17:25:37 PDT 2015
+From: "Arad, Ronen" <ronen.arad@intel.com>
+Date: Thu, 15 Oct 2015 01:55:17 -0700
+Subject: netlink: Trim skb to alloc size to avoid MSG_TRUNC
+
+From: "Arad, Ronen" <ronen.arad@intel.com>
+
+[ Upstream commit db65a3aaf29ecce2e34271d52e8d2336b97bd9fe ]
+
+netlink_dump() allocates skb based on the calculated min_dump_alloc or
+a per socket max_recvmsg_len.
+min_alloc_size is maximum space required for any single netdev
+attributes as calculated by rtnl_calcit().
+max_recvmsg_len tracks the user provided buffer to netlink_recvmsg.
+It is capped at 16KiB.
+The intention is to avoid small allocations and to minimize the number
+of calls required to obtain dump information for all net devices.
+
+netlink_dump packs as many small messages as could fit within an skb
+that was sized for the largest single netdev information. The actual
+space available within an skb is larger than what is requested. It could
+be much larger and up to near 2x with align to next power of 2 approach.
+
+Allowing netlink_dump to use all the space available within the
+allocated skb increases the buffer size a user has to provide to avoid
+truncaion (i.e. MSG_TRUNG flag set).
+
+It was observed that with many VLANs configured on at least one netdev,
+a larger buffer of near 64KiB was necessary to avoid "Message truncated"
+error in "ip link" or "bridge [-c[ompressvlans]] vlan show" when
+min_alloc_size was only little over 32KiB.
+
+This patch trims skb to allocated size in order to allow the user to
+avoid truncation with more reasonable buffer size.
+
+Signed-off-by: Ronen Arad <ronen.arad@intel.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/netlink/af_netlink.c |   34 ++++++++++++++++++++++------------
+ 1 file changed, 22 insertions(+), 12 deletions(-)
+
+--- a/net/netlink/af_netlink.c
++++ b/net/netlink/af_netlink.c
+@@ -2683,6 +2683,7 @@ static int netlink_dump(struct sock *sk)
+       struct sk_buff *skb = NULL;
+       struct nlmsghdr *nlh;
+       int len, err = -ENOBUFS;
++      int alloc_min_size;
+       int alloc_size;
+ 
+       mutex_lock(nlk->cb_mutex);
+@@ -2691,9 +2692,6 @@ static int netlink_dump(struct sock *sk)
+               goto errout_skb;
+       }
+ 
+-      cb = &nlk->cb;
+-      alloc_size = max_t(int, cb->min_dump_alloc, NLMSG_GOODSIZE);
+-
+       if (!netlink_rx_is_mmaped(sk) &&
+           atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
+               goto errout_skb;
+@@ -2703,23 +2701,35 @@ static int netlink_dump(struct sock *sk)
+        * to reduce number of system calls on dump operations, if user
+        * ever provided a big enough buffer.
+        */
+-      if (alloc_size < nlk->max_recvmsg_len) {
+-              skb = netlink_alloc_skb(sk,
+-                                      nlk->max_recvmsg_len,
+-                                      nlk->portid,
++      cb = &nlk->cb;
++      alloc_min_size = max_t(int, cb->min_dump_alloc, NLMSG_GOODSIZE);
++
++      if (alloc_min_size < nlk->max_recvmsg_len) {
++              alloc_size = nlk->max_recvmsg_len;
++              skb = netlink_alloc_skb(sk, alloc_size, nlk->portid,
+                                       GFP_KERNEL |
+                                       __GFP_NOWARN |
+                                       __GFP_NORETRY);
+-              /* available room should be exact amount to avoid MSG_TRUNC */
+-              if (skb)
+-                      skb_reserve(skb, skb_tailroom(skb) -
+-                                       nlk->max_recvmsg_len);
+       }
+-      if (!skb)
++      if (!skb) {
++              alloc_size = alloc_min_size;
+               skb = netlink_alloc_skb(sk, alloc_size, nlk->portid,
+                                       GFP_KERNEL);
++      }
+       if (!skb)
+               goto errout_skb;
++
++      /* Trim skb to allocated size. User is expected to provide buffer as
++       * large as max(min_dump_alloc, 16KiB (mac_recvmsg_len capped at
++       * netlink_recvmsg())). dump will pack as many smaller messages as
++       * could fit within the allocated skb. skb is typically allocated
++       * with larger space than required (could be as much as near 2x the
++       * requested size with align to next power of 2 approach). Allowing
++       * dump to use the excess space makes it difficult for a user to have a
++       * reasonable static buffer based on the expected largest dump of a
++       * single netdev. The outcome is MSG_TRUNC error.
++       */
++      skb_reserve(skb, skb_tailroom(skb) - alloc_size);
+       netlink_skb_set_owner_r(skb, sk);
+ 
+       len = cb->dump(skb, cb);
diff --git a/queue-4.1/ovs-do-not-allocate-memory-from-offline-numa-node.patch b/queue-4.1/ovs-do-not-allocate-memory-from-offline-numa-node.patch

new file mode 100644 (file)

index 0000000..84752f3
--- /dev/null
+++ b/queue-4.1/ovs-do-not-allocate-memory-from-offline-numa-node.patch
@@ -0,0 +1,35 @@
+From foo@baz Thu Oct 22 17:25:37 PDT 2015
+From: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
+Date: Fri, 2 Oct 2015 13:18:22 +0300
+Subject: ovs: do not allocate memory from offline numa node
+
+From: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
+
+[ Upstream commit 598c12d0ba6de9060f04999746eb1e015774044b ]
+
+When openvswitch tries allocate memory from offline numa node 0:
+stats = kmem_cache_alloc_node(flow_stats_cache, GFP_KERNEL | __GFP_ZERO, 0)
+It catches VM_BUG_ON(nid < 0 || nid >= MAX_NUMNODES || !node_online(nid))
+[ replaced with VM_WARN_ON(!node_online(nid)) recently ] in linux/gfp.h
+This patch disables numa affinity in this case.
+
+Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
+Acked-by: Pravin B Shelar <pshelar@nicira.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/openvswitch/flow_table.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/net/openvswitch/flow_table.c
++++ b/net/openvswitch/flow_table.c
+@@ -92,7 +92,8 @@ struct sw_flow *ovs_flow_alloc(void)
+ 
+       /* Initialize the default stat node. */
+       stats = kmem_cache_alloc_node(flow_stats_cache,
+-                                    GFP_KERNEL | __GFP_ZERO, 0);
++                                    GFP_KERNEL | __GFP_ZERO,
++                                    node_online(0) ? 0 : NUMA_NO_NODE);
+       if (!stats)
+               goto err;
+ 
diff --git a/queue-4.1/ppp-don-t-override-sk-sk_state-in-pppoe_flush_dev.patch b/queue-4.1/ppp-don-t-override-sk-sk_state-in-pppoe_flush_dev.patch

new file mode 100644 (file)

index 0000000..73f5fb1
--- /dev/null
+++ b/queue-4.1/ppp-don-t-override-sk-sk_state-in-pppoe_flush_dev.patch
@@ -0,0 +1,76 @@
+From foo@baz Thu Oct 22 17:25:37 PDT 2015
+From: Guillaume Nault <g.nault@alphalink.fr>
+Date: Wed, 30 Sep 2015 11:45:33 +0200
+Subject: ppp: don't override sk->sk_state in pppoe_flush_dev()
+
+From: Guillaume Nault <g.nault@alphalink.fr>
+
+[ Upstream commit e6740165b8f7f06d8caee0fceab3fb9d790a6fed ]
+
+Since commit 2b018d57ff18 ("pppoe: drop PPPOX_ZOMBIEs in pppoe_release"),
+pppoe_release() calls dev_put(po->pppoe_dev) if sk is in the
+PPPOX_ZOMBIE state. But pppoe_flush_dev() can set sk->sk_state to
+PPPOX_ZOMBIE _and_ reset po->pppoe_dev to NULL. This leads to the
+following oops:
+
+[  570.140800] BUG: unable to handle kernel NULL pointer dereference at 00000000000004e0
+[  570.142931] IP: [<ffffffffa018c701>] pppoe_release+0x50/0x101 [pppoe]
+[  570.144601] PGD 3d119067 PUD 3dbc1067 PMD 0
+[  570.144601] Oops: 0000 [#1] SMP
+[  570.144601] Modules linked in: l2tp_ppp l2tp_netlink l2tp_core ip6_udp_tunnel udp_tunnel pppoe pppox ppp_generic slhc loop crc32c_intel ghash_clmulni_intel jitterentropy_rng sha256_generic hmac drbg ansi_cprng aesni_intel aes_x86_64 ablk_helper cryptd lrw gf128mul glue_helper acpi_cpufreq evdev serio_raw processor button ext4 crc16 mbcache jbd2 virtio_net virtio_blk virtio_pci virtio_ring virtio
+[  570.144601] CPU: 1 PID: 15738 Comm: ppp-apitest Not tainted 4.2.0 #1
+[  570.144601] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Debian-1.8.2-1 04/01/2014
+[  570.144601] task: ffff88003d30d600 ti: ffff880036b60000 task.ti: ffff880036b60000
+[  570.144601] RIP: 0010:[<ffffffffa018c701>]  [<ffffffffa018c701>] pppoe_release+0x50/0x101 [pppoe]
+[  570.144601] RSP: 0018:ffff880036b63e08  EFLAGS: 00010202
+[  570.144601] RAX: 0000000000000000 RBX: ffff880034340000 RCX: 0000000000000206
+[  570.144601] RDX: 0000000000000006 RSI: ffff88003d30dd20 RDI: ffff88003d30dd20
+[  570.144601] RBP: ffff880036b63e28 R08: 0000000000000001 R09: 0000000000000000
+[  570.144601] R10: 00007ffee9b50420 R11: ffff880034340078 R12: ffff8800387ec780
+[  570.144601] R13: ffff8800387ec7b0 R14: ffff88003e222aa0 R15: ffff8800387ec7b0
+[  570.144601] FS:  00007f5672f48700(0000) GS:ffff88003fc80000(0000) knlGS:0000000000000000
+[  570.144601] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+[  570.144601] CR2: 00000000000004e0 CR3: 0000000037f7e000 CR4: 00000000000406a0
+[  570.144601] Stack:
+[  570.144601]  ffffffffa018f240 ffff8800387ec780 ffffffffa018f240 ffff8800387ec7b0
+[  570.144601]  ffff880036b63e48 ffffffff812caabe ffff880039e4e000 0000000000000008
+[  570.144601]  ffff880036b63e58 ffffffff812cabad ffff880036b63ea8 ffffffff811347f5
+[  570.144601] Call Trace:
+[  570.144601]  [<ffffffff812caabe>] sock_release+0x1a/0x75
+[  570.144601]  [<ffffffff812cabad>] sock_close+0xd/0x11
+[  570.144601]  [<ffffffff811347f5>] __fput+0xff/0x1a5
+[  570.144601]  [<ffffffff811348cb>] ____fput+0x9/0xb
+[  570.144601]  [<ffffffff81056682>] task_work_run+0x66/0x90
+[  570.144601]  [<ffffffff8100189e>] prepare_exit_to_usermode+0x8c/0xa7
+[  570.144601]  [<ffffffff81001a26>] syscall_return_slowpath+0x16d/0x19b
+[  570.144601]  [<ffffffff813babb1>] int_ret_from_sys_call+0x25/0x9f
+[  570.144601] Code: 48 8b 83 c8 01 00 00 a8 01 74 12 48 89 df e8 8b 27 14 e1 b8 f7 ff ff ff e9 b7 00 00 00 8a 43 12 a8 0b 74 1c 48 8b 83 a8 04 00 00 <48> 8b 80 e0 04 00 00 65 ff 08 48 c7 83 a8 04 00 00 00 00 00 00
+[  570.144601] RIP  [<ffffffffa018c701>] pppoe_release+0x50/0x101 [pppoe]
+[  570.144601]  RSP <ffff880036b63e08>
+[  570.144601] CR2: 00000000000004e0
+[  570.200518] ---[ end trace 46956baf17349563 ]---
+
+pppoe_flush_dev() has no reason to override sk->sk_state with
+PPPOX_ZOMBIE. pppox_unbind_sock() already sets sk->sk_state to
+PPPOX_DEAD, which is the correct state given that sk is unbound and
+po->pppoe_dev is NULL.
+
+Fixes: 2b018d57ff18 ("pppoe: drop PPPOX_ZOMBIEs in pppoe_release")
+Tested-by: Oleksii Berezhniak <core@irc.lg.ua>
+Signed-off-by: Guillaume Nault <g.nault@alphalink.fr>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ppp/pppoe.c |    1 -
+ 1 file changed, 1 deletion(-)
+
+--- a/drivers/net/ppp/pppoe.c
++++ b/drivers/net/ppp/pppoe.c
+@@ -313,7 +313,6 @@ static void pppoe_flush_dev(struct net_d
+                       if (po->pppoe_dev == dev &&
+                           sk->sk_state & (PPPOX_CONNECTED | PPPOX_BOUND | PPPOX_ZOMBIE)) {
+                               pppox_unbind_sock(sk);
+-                              sk->sk_state = PPPOX_ZOMBIE;
+                               sk->sk_state_change(sk);
+                               po->pppoe_dev = NULL;
+                               dev_put(dev);
diff --git a/queue-4.1/skbuff-fix-skb-checksum-flag-on-skb-pull.patch b/queue-4.1/skbuff-fix-skb-checksum-flag-on-skb-pull.patch

new file mode 100644 (file)

index 0000000..426f5e9
--- /dev/null
+++ b/queue-4.1/skbuff-fix-skb-checksum-flag-on-skb-pull.patch
@@ -0,0 +1,68 @@
+From foo@baz Thu Oct 22 17:25:37 PDT 2015
+From: Pravin B Shelar <pshelar@nicira.com>
+Date: Tue, 22 Sep 2015 12:57:53 -0700
+Subject: skbuff: Fix skb checksum flag on skb pull
+
+From: Pravin B Shelar <pshelar@nicira.com>
+
+[ Upstream commit 6ae459bdaaeebc632b16e54dcbabb490c6931d61 ]
+
+VXLAN device can receive skb with checksum partial. But the checksum
+offset could be in outer header which is pulled on receive. This results
+in negative checksum offset for the skb. Such skb can cause the assert
+failure in skb_checksum_help(). Following patch fixes the bug by setting
+checksum-none while pulling outer header.
+
+Following is the kernel panic msg from old kernel hitting the bug.
+
+------------[ cut here ]------------
+kernel BUG at net/core/dev.c:1906!
+RIP: 0010:[<ffffffff81518034>] skb_checksum_help+0x144/0x150
+Call Trace:
+<IRQ>
+[<ffffffffa0164c28>] queue_userspace_packet+0x408/0x470 [openvswitch]
+[<ffffffffa016614d>] ovs_dp_upcall+0x5d/0x60 [openvswitch]
+[<ffffffffa0166236>] ovs_dp_process_packet_with_key+0xe6/0x100 [openvswitch]
+[<ffffffffa016629b>] ovs_dp_process_received_packet+0x4b/0x80 [openvswitch]
+[<ffffffffa016c51a>] ovs_vport_receive+0x2a/0x30 [openvswitch]
+[<ffffffffa0171383>] vxlan_rcv+0x53/0x60 [openvswitch]
+[<ffffffffa01734cb>] vxlan_udp_encap_recv+0x8b/0xf0 [openvswitch]
+[<ffffffff8157addc>] udp_queue_rcv_skb+0x2dc/0x3b0
+[<ffffffff8157b56f>] __udp4_lib_rcv+0x1cf/0x6c0
+[<ffffffff8157ba7a>] udp_rcv+0x1a/0x20
+[<ffffffff8154fdbd>] ip_local_deliver_finish+0xdd/0x280
+[<ffffffff81550128>] ip_local_deliver+0x88/0x90
+[<ffffffff8154fa7d>] ip_rcv_finish+0x10d/0x370
+[<ffffffff81550365>] ip_rcv+0x235/0x300
+[<ffffffff8151ba1d>] __netif_receive_skb+0x55d/0x620
+[<ffffffff8151c360>] netif_receive_skb+0x80/0x90
+[<ffffffff81459935>] virtnet_poll+0x555/0x6f0
+[<ffffffff8151cd04>] net_rx_action+0x134/0x290
+[<ffffffff810683d8>] __do_softirq+0xa8/0x210
+[<ffffffff8162fe6c>] call_softirq+0x1c/0x30
+[<ffffffff810161a5>] do_softirq+0x65/0xa0
+[<ffffffff810687be>] irq_exit+0x8e/0xb0
+[<ffffffff81630733>] do_IRQ+0x63/0xe0
+[<ffffffff81625f2e>] common_interrupt+0x6e/0x6e
+
+Reported-by: Anupam Chanda <achanda@vmware.com>
+Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
+Acked-by: Tom Herbert <tom@herbertland.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/skbuff.h |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/include/linux/skbuff.h
++++ b/include/linux/skbuff.h
+@@ -2588,6 +2588,9 @@ static inline void skb_postpull_rcsum(st
+ {
+       if (skb->ip_summed == CHECKSUM_COMPLETE)
+               skb->csum = csum_sub(skb->csum, csum_partial(start, len, 0));
++      else if (skb->ip_summed == CHECKSUM_PARTIAL &&
++               skb_checksum_start_offset(skb) <= len)
++              skb->ip_summed = CHECKSUM_NONE;
+ }
+ 
+ unsigned char *skb_pull_rcsum(struct sk_buff *skb, unsigned int len);
diff --git a/queue-4.1/skbuff-fix-skb-checksum-partial-check.patch b/queue-4.1/skbuff-fix-skb-checksum-partial-check.patch

new file mode 100644 (file)

index 0000000..c00d0cf
--- /dev/null
+++ b/queue-4.1/skbuff-fix-skb-checksum-partial-check.patch
@@ -0,0 +1,58 @@
+From foo@baz Thu Oct 22 17:25:37 PDT 2015
+From: Pravin B Shelar <pshelar@nicira.com>
+Date: Mon, 28 Sep 2015 17:24:25 -0700
+Subject: skbuff: Fix skb checksum partial check.
+
+From: Pravin B Shelar <pshelar@nicira.com>
+
+[ Upstream commit 31b33dfb0a144469dd805514c9e63f4993729a48 ]
+
+Earlier patch 6ae459bda tried to detect void ckecksum partial
+skb by comparing pull length to checksum offset. But it does
+not work for all cases since checksum-offset depends on
+updates to skb->data.
+
+Following patch fixes it by validating checksum start offset
+after skb-data pointer is updated. Negative value of checksum
+offset start means there is no need to checksum.
+
+Fixes: 6ae459bda ("skbuff: Fix skb checksum flag on skb pull")
+Reported-by: Andrew Vagin <avagin@odin.com>
+Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/skbuff.h |    2 +-
+ net/core/skbuff.c      |    9 +++++----
+ 2 files changed, 6 insertions(+), 5 deletions(-)
+
+--- a/include/linux/skbuff.h
++++ b/include/linux/skbuff.h
+@@ -2589,7 +2589,7 @@ static inline void skb_postpull_rcsum(st
+       if (skb->ip_summed == CHECKSUM_COMPLETE)
+               skb->csum = csum_sub(skb->csum, csum_partial(start, len, 0));
+       else if (skb->ip_summed == CHECKSUM_PARTIAL &&
+-               skb_checksum_start_offset(skb) <= len)
++               skb_checksum_start_offset(skb) < 0)
+               skb->ip_summed = CHECKSUM_NONE;
+ }
+ 
+--- a/net/core/skbuff.c
++++ b/net/core/skbuff.c
+@@ -2976,11 +2976,12 @@ EXPORT_SYMBOL(skb_append_datato_frags);
+  */
+ unsigned char *skb_pull_rcsum(struct sk_buff *skb, unsigned int len)
+ {
++      unsigned char *data = skb->data;
++
+       BUG_ON(len > skb->len);
+-      skb->len -= len;
+-      BUG_ON(skb->len < skb->data_len);
+-      skb_postpull_rcsum(skb, skb->data, len);
+-      return skb->data += len;
++      __skb_pull(skb, len);
++      skb_postpull_rcsum(skb, data, len);
++      return skb->data;
+ }
+ EXPORT_SYMBOL_GPL(skb_pull_rcsum);
+ 
diff --git a/queue-4.1/tipc-move-fragment-importance-field-to-new-header-position.patch b/queue-4.1/tipc-move-fragment-importance-field-to-new-header-position.patch

new file mode 100644 (file)

index 0000000..0b98cfe
--- /dev/null
+++ b/queue-4.1/tipc-move-fragment-importance-field-to-new-header-position.patch
@@ -0,0 +1,57 @@
+From foo@baz Thu Oct 22 17:25:37 PDT 2015
+From: Jon Paul Maloy <jon.maloy@ericsson.com>
+Date: Wed, 14 Oct 2015 09:23:18 -0400
+Subject: tipc: move fragment importance field to new header position
+
+From: Jon Paul Maloy <jon.maloy@ericsson.com>
+
+[ Upstream commit dde4b5ae65de659b9ec64bafdde0430459fcb495 ]
+
+In commit e3eea1eb47a ("tipc: clean up handling of message priorities")
+we introduced a field in the packet header for keeping track of the
+priority of fragments, since this value is not present in the specified
+protocol header. Since the value so far only is used at the transmitting
+end of the link, we have not yet officially defined it as part of the
+protocol.
+
+Unfortunately, the field we use for keeping this value, bits 13-15 in
+in word 5, has turned out to be a poor choice; it is already used by the
+broadcast protocol for carrying the 'network id' field of the sending
+node. Since packet fragments also need to be transported across the
+broadcast protocol, the risk of conflict is obvious, and we see this
+happen when we use network identities larger than 2^13-1. This has
+escaped our testing because we have so far only been using small network
+id values.
+
+We now move this field to bits 0-2 in word 9, a field that is guaranteed
+to be unused by all involved protocols.
+
+Fixes: e3eea1eb47a ("tipc: clean up handling of message priorities")
+Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
+Acked-by: Ying Xue <ying.xue@windriver.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/tipc/msg.h |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/net/tipc/msg.h
++++ b/net/tipc/msg.h
+@@ -353,7 +353,7 @@ static inline void msg_set_seqno(struct
+ static inline u32 msg_importance(struct tipc_msg *m)
+ {
+       if (unlikely(msg_user(m) == MSG_FRAGMENTER))
+-              return msg_bits(m, 5, 13, 0x7);
++              return msg_bits(m, 9, 0, 0x7);
+       if (likely(msg_isdata(m) && !msg_errcode(m)))
+               return msg_user(m);
+       return TIPC_SYSTEM_IMPORTANCE;
+@@ -362,7 +362,7 @@ static inline u32 msg_importance(struct
+ static inline void msg_set_importance(struct tipc_msg *m, u32 i)
+ {
+       if (unlikely(msg_user(m) == MSG_FRAGMENTER))
+-              msg_set_bits(m, 5, 13, 0x7, i);
++              msg_set_bits(m, 9, 0, 0x7, i);
+       else if (likely(i < TIPC_SYSTEM_IMPORTANCE))
+               msg_set_user(m, i);
+       else
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Fri, 23 Oct 2015 01:29:17 +0000 (18:29 -0700)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Fri, 23 Oct 2015 01:29:17 +0000 (18:29 -0700)
queue-4.1/act_mirred-clear-sender-cpu-before-sending-to-tx.patch	[new file with mode: 0644]	patch \| blob
queue-4.1/af_unix-convert-the-unix_sk-macro-to-an-inline-function-for-type-safety.patch	[new file with mode: 0644]	patch \| blob
queue-4.1/af_unix-return-data-from-multiple-skbs-on-recv-with-msg_peek-flag.patch	[new file with mode: 0644]	patch \| blob
queue-4.1/bpf-fix-panic-in-so_get_filter-with-native-ebpf-programs.patch	[new file with mode: 0644]	patch \| blob
queue-4.1/ethtool-use-kcalloc-instead-of-kmalloc-for-ethtool_get_strings.patch	[new file with mode: 0644]	patch \| blob
queue-4.1/inet-fix-race-in-reqsk_queue_unlink.patch	[new file with mode: 0644]	patch \| blob
queue-4.1/inet-fix-races-in-reqsk_queue_hash_req.patch	[new file with mode: 0644]	patch \| blob
queue-4.1/l2tp-protect-tunnel-del_work-by-ref_count.patch	[new file with mode: 0644]	patch \| blob
queue-4.1/net-add-pfmemalloc-check-in-sk_add_backlog.patch	[new file with mode: 0644]	patch \| blob
queue-4.1/net-ibm-emac-bump-version-numbers-for-correct-work-with-ethtool.patch	[new file with mode: 0644]	patch \| blob
queue-4.1/net-unix-fix-logic-about-sk_peek_offset.patch	[new file with mode: 0644]	patch \| blob
queue-4.1/netlink-trim-skb-to-alloc-size-to-avoid-msg_trunc.patch	[new file with mode: 0644]	patch \| blob
queue-4.1/ovs-do-not-allocate-memory-from-offline-numa-node.patch	[new file with mode: 0644]	patch \| blob
queue-4.1/ppp-don-t-override-sk-sk_state-in-pppoe_flush_dev.patch	[new file with mode: 0644]	patch \| blob
queue-4.1/skbuff-fix-skb-checksum-flag-on-skb-pull.patch	[new file with mode: 0644]	patch \| blob
queue-4.1/skbuff-fix-skb-checksum-partial-check.patch	[new file with mode: 0644]	patch \| blob
queue-4.1/tipc-move-fragment-importance-field-to-new-header-position.patch	[new file with mode: 0644]	patch \| blob