]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
3.19-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Tue, 21 Apr 2015 21:16:16 +0000 (23:16 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Tue, 21 Apr 2015 21:16:16 +0000 (23:16 +0200)
added patches:
bnx2x-fix-busy_poll-vs-netpoll.patch
bonding-bonding-overriding-configuration-logic-restored.patch
bpf-fix-verifier-memory-corruption.patch
ipv6-don-t-reduce-hop-limit-for-an-interface.patch
ipv6-protect-skb-sk-accesses-from-recursive-dereference-inside-the-stack.patch
net-mlx4_core-fix-error-message-deprecation-for-connectx-2-cards.patch
net-mlx4_en-call-register_netdevice-in-the-proper-location.patch
net-tcp6-fix-double-call-of-tcp_v6_fill_cb.patch
openvswitch-return-vport-module-ref-before-destruction.patch
revert-net-reset-secmark-when-scrubbing-packet.patch
rocker-handle-non-bridge-master-change.patch
skbuff-do-not-scrub-skb-mark-within-the-same-name-space.patch
tcp-fix-frto-undo-on-cumulative-ack-of-sacked-range.patch
tcp-prevent-fetching-dst-twice-in-early-demux-code.patch
tcp-tcp_make_synack-should-clear-skb-tstamp.patch
tg3-hold-tp-lock-before-calling-tg3_halt-from-tg3_init_one.patch
tun-return-proper-error-code-from-tun_do_read.patch
udptunnels-call-handle_offloads-after-inserting-vlan-tag.patch
usbnet-fix-tx_bytes-statistic-running-backward-in-cdc_ncm.patch
usbnet-fix-tx_packets-stat-for-flag_multi_frame-drivers.patch
xen-netfront-transmit-fully-gso-sized-packets.patch

21 files changed:
queue-3.19/bnx2x-fix-busy_poll-vs-netpoll.patch [new file with mode: 0644]
queue-3.19/bonding-bonding-overriding-configuration-logic-restored.patch [new file with mode: 0644]
queue-3.19/bpf-fix-verifier-memory-corruption.patch [new file with mode: 0644]
queue-3.19/ipv6-don-t-reduce-hop-limit-for-an-interface.patch [new file with mode: 0644]
queue-3.19/ipv6-protect-skb-sk-accesses-from-recursive-dereference-inside-the-stack.patch [new file with mode: 0644]
queue-3.19/net-mlx4_core-fix-error-message-deprecation-for-connectx-2-cards.patch [new file with mode: 0644]
queue-3.19/net-mlx4_en-call-register_netdevice-in-the-proper-location.patch [new file with mode: 0644]
queue-3.19/net-tcp6-fix-double-call-of-tcp_v6_fill_cb.patch [new file with mode: 0644]
queue-3.19/openvswitch-return-vport-module-ref-before-destruction.patch [new file with mode: 0644]
queue-3.19/revert-net-reset-secmark-when-scrubbing-packet.patch [new file with mode: 0644]
queue-3.19/rocker-handle-non-bridge-master-change.patch [new file with mode: 0644]
queue-3.19/skbuff-do-not-scrub-skb-mark-within-the-same-name-space.patch [new file with mode: 0644]
queue-3.19/tcp-fix-frto-undo-on-cumulative-ack-of-sacked-range.patch [new file with mode: 0644]
queue-3.19/tcp-prevent-fetching-dst-twice-in-early-demux-code.patch [new file with mode: 0644]
queue-3.19/tcp-tcp_make_synack-should-clear-skb-tstamp.patch [new file with mode: 0644]
queue-3.19/tg3-hold-tp-lock-before-calling-tg3_halt-from-tg3_init_one.patch [new file with mode: 0644]
queue-3.19/tun-return-proper-error-code-from-tun_do_read.patch [new file with mode: 0644]
queue-3.19/udptunnels-call-handle_offloads-after-inserting-vlan-tag.patch [new file with mode: 0644]
queue-3.19/usbnet-fix-tx_bytes-statistic-running-backward-in-cdc_ncm.patch [new file with mode: 0644]
queue-3.19/usbnet-fix-tx_packets-stat-for-flag_multi_frame-drivers.patch [new file with mode: 0644]
queue-3.19/xen-netfront-transmit-fully-gso-sized-packets.patch [new file with mode: 0644]

diff --git a/queue-3.19/bnx2x-fix-busy_poll-vs-netpoll.patch b/queue-3.19/bnx2x-fix-busy_poll-vs-netpoll.patch
new file mode 100644 (file)
index 0000000..ad8d950
--- /dev/null
@@ -0,0 +1,265 @@
+From foo@baz Tue Apr 21 23:05:14 CEST 2015
+From: Eric Dumazet <edumazet@google.com>
+Date: Tue, 14 Apr 2015 18:45:00 -0700
+Subject: bnx2x: Fix busy_poll vs netpoll
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 074975d0374333f656c48487aa046a21a9b9d7a1 ]
+
+Commit 9a2620c877454 ("bnx2x: prevent WARN during driver unload")
+switched the napi/busy_lock locking mechanism from spin_lock() into
+spin_lock_bh(), breaking inter-operability with netconsole, as netpoll
+disables interrupts prior to calling our napi mechanism.
+
+This switches the driver into using atomic assignments instead of the
+spinlock mechanisms previously employed.
+
+Based on initial patch from Yuval Mintz & Ariel Elior
+
+I basically added softirq starvation avoidance, and mixture
+of atomic operations, plain writes and barriers.
+
+Note this slightly reduces the overhead for this driver when no
+busy_poll sockets are in use.
+
+Fixes: 9a2620c877454 ("bnx2x: prevent WARN during driver unload")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/broadcom/bnx2x/bnx2x.h     |  135 ++++++++----------------
+ drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c |    9 -
+ 2 files changed, 55 insertions(+), 89 deletions(-)
+
+--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
++++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
+@@ -531,20 +531,8 @@ struct bnx2x_fastpath {
+       struct napi_struct      napi;
+ #ifdef CONFIG_NET_RX_BUSY_POLL
+-      unsigned int state;
+-#define BNX2X_FP_STATE_IDLE                 0
+-#define BNX2X_FP_STATE_NAPI           (1 << 0)    /* NAPI owns this FP */
+-#define BNX2X_FP_STATE_POLL           (1 << 1)    /* poll owns this FP */
+-#define BNX2X_FP_STATE_DISABLED               (1 << 2)
+-#define BNX2X_FP_STATE_NAPI_YIELD     (1 << 3)    /* NAPI yielded this FP */
+-#define BNX2X_FP_STATE_POLL_YIELD     (1 << 4)    /* poll yielded this FP */
+-#define BNX2X_FP_OWNED        (BNX2X_FP_STATE_NAPI | BNX2X_FP_STATE_POLL)
+-#define BNX2X_FP_YIELD        (BNX2X_FP_STATE_NAPI_YIELD | BNX2X_FP_STATE_POLL_YIELD)
+-#define BNX2X_FP_LOCKED       (BNX2X_FP_OWNED | BNX2X_FP_STATE_DISABLED)
+-#define BNX2X_FP_USER_PEND (BNX2X_FP_STATE_POLL | BNX2X_FP_STATE_POLL_YIELD)
+-      /* protect state */
+-      spinlock_t lock;
+-#endif /* CONFIG_NET_RX_BUSY_POLL */
++      unsigned long           busy_poll_state;
++#endif
+       union host_hc_status_block      status_blk;
+       /* chip independent shortcuts into sb structure */
+@@ -619,104 +607,83 @@ struct bnx2x_fastpath {
+ #define bnx2x_fp_qstats(bp, fp)       (&((bp)->fp_stats[(fp)->index].eth_q_stats))
+ #ifdef CONFIG_NET_RX_BUSY_POLL
+-static inline void bnx2x_fp_init_lock(struct bnx2x_fastpath *fp)
++
++enum bnx2x_fp_state {
++      BNX2X_STATE_FP_NAPI     = BIT(0), /* NAPI handler owns the queue */
++
++      BNX2X_STATE_FP_NAPI_REQ_BIT = 1, /* NAPI would like to own the queue */
++      BNX2X_STATE_FP_NAPI_REQ = BIT(1),
++
++      BNX2X_STATE_FP_POLL_BIT = 2,
++      BNX2X_STATE_FP_POLL     = BIT(2), /* busy_poll owns the queue */
++
++      BNX2X_STATE_FP_DISABLE_BIT = 3, /* queue is dismantled */
++};
++
++static inline void bnx2x_fp_busy_poll_init(struct bnx2x_fastpath *fp)
+ {
+-      spin_lock_init(&fp->lock);
+-      fp->state = BNX2X_FP_STATE_IDLE;
++      WRITE_ONCE(fp->busy_poll_state, 0);
+ }
+ /* called from the device poll routine to get ownership of a FP */
+ static inline bool bnx2x_fp_lock_napi(struct bnx2x_fastpath *fp)
+ {
+-      bool rc = true;
++      unsigned long prev, old = READ_ONCE(fp->busy_poll_state);
+-      spin_lock_bh(&fp->lock);
+-      if (fp->state & BNX2X_FP_LOCKED) {
+-              WARN_ON(fp->state & BNX2X_FP_STATE_NAPI);
+-              fp->state |= BNX2X_FP_STATE_NAPI_YIELD;
+-              rc = false;
+-      } else {
+-              /* we don't care if someone yielded */
+-              fp->state = BNX2X_FP_STATE_NAPI;
++      while (1) {
++              switch (old) {
++              case BNX2X_STATE_FP_POLL:
++                      /* make sure bnx2x_fp_lock_poll() wont starve us */
++                      set_bit(BNX2X_STATE_FP_NAPI_REQ_BIT,
++                              &fp->busy_poll_state);
++                      /* fallthrough */
++              case BNX2X_STATE_FP_POLL | BNX2X_STATE_FP_NAPI_REQ:
++                      return false;
++              default:
++                      break;
++              }
++              prev = cmpxchg(&fp->busy_poll_state, old, BNX2X_STATE_FP_NAPI);
++              if (unlikely(prev != old)) {
++                      old = prev;
++                      continue;
++              }
++              return true;
+       }
+-      spin_unlock_bh(&fp->lock);
+-      return rc;
+ }
+-/* returns true is someone tried to get the FP while napi had it */
+-static inline bool bnx2x_fp_unlock_napi(struct bnx2x_fastpath *fp)
++static inline void bnx2x_fp_unlock_napi(struct bnx2x_fastpath *fp)
+ {
+-      bool rc = false;
+-
+-      spin_lock_bh(&fp->lock);
+-      WARN_ON(fp->state &
+-              (BNX2X_FP_STATE_POLL | BNX2X_FP_STATE_NAPI_YIELD));
+-
+-      if (fp->state & BNX2X_FP_STATE_POLL_YIELD)
+-              rc = true;
+-
+-      /* state ==> idle, unless currently disabled */
+-      fp->state &= BNX2X_FP_STATE_DISABLED;
+-      spin_unlock_bh(&fp->lock);
+-      return rc;
++      smp_wmb();
++      fp->busy_poll_state = 0;
+ }
+ /* called from bnx2x_low_latency_poll() */
+ static inline bool bnx2x_fp_lock_poll(struct bnx2x_fastpath *fp)
+ {
+-      bool rc = true;
+-
+-      spin_lock_bh(&fp->lock);
+-      if ((fp->state & BNX2X_FP_LOCKED)) {
+-              fp->state |= BNX2X_FP_STATE_POLL_YIELD;
+-              rc = false;
+-      } else {
+-              /* preserve yield marks */
+-              fp->state |= BNX2X_FP_STATE_POLL;
+-      }
+-      spin_unlock_bh(&fp->lock);
+-      return rc;
++      return cmpxchg(&fp->busy_poll_state, 0, BNX2X_STATE_FP_POLL) == 0;
+ }
+-/* returns true if someone tried to get the FP while it was locked */
+-static inline bool bnx2x_fp_unlock_poll(struct bnx2x_fastpath *fp)
++static inline void bnx2x_fp_unlock_poll(struct bnx2x_fastpath *fp)
+ {
+-      bool rc = false;
+-
+-      spin_lock_bh(&fp->lock);
+-      WARN_ON(fp->state & BNX2X_FP_STATE_NAPI);
+-
+-      if (fp->state & BNX2X_FP_STATE_POLL_YIELD)
+-              rc = true;
+-
+-      /* state ==> idle, unless currently disabled */
+-      fp->state &= BNX2X_FP_STATE_DISABLED;
+-      spin_unlock_bh(&fp->lock);
+-      return rc;
++      smp_mb__before_atomic();
++      clear_bit(BNX2X_STATE_FP_POLL_BIT, &fp->busy_poll_state);
+ }
+-/* true if a socket is polling, even if it did not get the lock */
++/* true if a socket is polling */
+ static inline bool bnx2x_fp_ll_polling(struct bnx2x_fastpath *fp)
+ {
+-      WARN_ON(!(fp->state & BNX2X_FP_OWNED));
+-      return fp->state & BNX2X_FP_USER_PEND;
++      return READ_ONCE(fp->busy_poll_state) & BNX2X_STATE_FP_POLL;
+ }
+ /* false if fp is currently owned */
+ static inline bool bnx2x_fp_ll_disable(struct bnx2x_fastpath *fp)
+ {
+-      int rc = true;
+-
+-      spin_lock_bh(&fp->lock);
+-      if (fp->state & BNX2X_FP_OWNED)
+-              rc = false;
+-      fp->state |= BNX2X_FP_STATE_DISABLED;
+-      spin_unlock_bh(&fp->lock);
++      set_bit(BNX2X_STATE_FP_DISABLE_BIT, &fp->busy_poll_state);
++      return !bnx2x_fp_ll_polling(fp);
+-      return rc;
+ }
+ #else
+-static inline void bnx2x_fp_init_lock(struct bnx2x_fastpath *fp)
++static inline void bnx2x_fp_busy_poll_init(struct bnx2x_fastpath *fp)
+ {
+ }
+@@ -725,9 +692,8 @@ static inline bool bnx2x_fp_lock_napi(st
+       return true;
+ }
+-static inline bool bnx2x_fp_unlock_napi(struct bnx2x_fastpath *fp)
++static inline void bnx2x_fp_unlock_napi(struct bnx2x_fastpath *fp)
+ {
+-      return false;
+ }
+ static inline bool bnx2x_fp_lock_poll(struct bnx2x_fastpath *fp)
+@@ -735,9 +701,8 @@ static inline bool bnx2x_fp_lock_poll(st
+       return false;
+ }
+-static inline bool bnx2x_fp_unlock_poll(struct bnx2x_fastpath *fp)
++static inline void bnx2x_fp_unlock_poll(struct bnx2x_fastpath *fp)
+ {
+-      return false;
+ }
+ static inline bool bnx2x_fp_ll_polling(struct bnx2x_fastpath *fp)
+--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
++++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
+@@ -1849,7 +1849,7 @@ static void bnx2x_napi_enable_cnic(struc
+       int i;
+       for_each_rx_queue_cnic(bp, i) {
+-              bnx2x_fp_init_lock(&bp->fp[i]);
++              bnx2x_fp_busy_poll_init(&bp->fp[i]);
+               napi_enable(&bnx2x_fp(bp, i, napi));
+       }
+ }
+@@ -1859,7 +1859,7 @@ static void bnx2x_napi_enable(struct bnx
+       int i;
+       for_each_eth_queue(bp, i) {
+-              bnx2x_fp_init_lock(&bp->fp[i]);
++              bnx2x_fp_busy_poll_init(&bp->fp[i]);
+               napi_enable(&bnx2x_fp(bp, i, napi));
+       }
+ }
+@@ -3191,9 +3191,10 @@ static int bnx2x_poll(struct napi_struct
+                       }
+               }
++              bnx2x_fp_unlock_napi(fp);
++
+               /* Fall out from the NAPI loop if needed */
+-              if (!bnx2x_fp_unlock_napi(fp) &&
+-                  !(bnx2x_has_rx_work(fp) || bnx2x_has_tx_work(fp))) {
++              if (!(bnx2x_has_rx_work(fp) || bnx2x_has_tx_work(fp))) {
+                       /* No need to update SB for FCoE L2 ring as long as
+                        * it's connected to the default SB and the SB
diff --git a/queue-3.19/bonding-bonding-overriding-configuration-logic-restored.patch b/queue-3.19/bonding-bonding-overriding-configuration-logic-restored.patch
new file mode 100644 (file)
index 0000000..57cf9aa
--- /dev/null
@@ -0,0 +1,40 @@
+From foo@baz Tue Apr 21 23:05:14 CEST 2015
+From: Anton Nayshtut <anton@swortex.com>
+Date: Sun, 29 Mar 2015 14:20:25 +0300
+Subject: bonding: Bonding Overriding Configuration logic restored.
+
+From: Anton Nayshtut <anton@swortex.com>
+
+[ Upstream commit f5e2dc5d7fe78fe4d8748d217338f4f7b6a5d7ea ]
+
+Before commit 3900f29021f0bc7fe9815aa32f1a993b7dfdd402 ("bonding: slight
+optimizztion for bond_slave_override()") the override logic was to send packets
+with non-zero queue_id through the slave with corresponding queue_id, under two
+conditions only - if the slave can transmit and it's up.
+
+The above mentioned commit changed this logic by introducing an additional
+condition - whether the bond is active (indirectly, using the slave_can_tx and
+later - bond_is_active_slave), that prevents the user from implementing more
+complex policies according to the Documentation/networking/bonding.txt.
+
+Signed-off-by: Anton Nayshtut <anton@swortex.com>
+Signed-off-by: Alexey Bogoslavsky <alexey@swortex.com>
+Signed-off-by: Andy Gospodarek <gospo@cumulusnetworks.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/bonding/bond_main.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/drivers/net/bonding/bond_main.c
++++ b/drivers/net/bonding/bond_main.c
+@@ -3797,7 +3797,8 @@ static inline int bond_slave_override(st
+       /* Find out if any slaves have the same mapping as this skb. */
+       bond_for_each_slave_rcu(bond, slave, iter) {
+               if (slave->queue_id == skb->queue_mapping) {
+-                      if (bond_slave_can_tx(slave)) {
++                      if (bond_slave_is_up(slave) &&
++                          slave->link == BOND_LINK_UP) {
+                               bond_dev_queue_xmit(bond, skb, slave->dev);
+                               return 0;
+                       }
diff --git a/queue-3.19/bpf-fix-verifier-memory-corruption.patch b/queue-3.19/bpf-fix-verifier-memory-corruption.patch
new file mode 100644 (file)
index 0000000..c6a8591
--- /dev/null
@@ -0,0 +1,43 @@
+From foo@baz Tue Apr 21 23:05:14 CEST 2015
+From: Alexei Starovoitov <ast@plumgrid.com>
+Date: Tue, 14 Apr 2015 15:57:13 -0700
+Subject: bpf: fix verifier memory corruption
+
+From: Alexei Starovoitov <ast@plumgrid.com>
+
+[ Upstream commit c3de6317d748e23b9e46ba36e10483728d00d144 ]
+
+Due to missing bounds check the DAG pass of the BPF verifier can corrupt
+the memory which can cause random crashes during program loading:
+
+[8.449451] BUG: unable to handle kernel paging request at ffffffffffffffff
+[8.451293] IP: [<ffffffff811de33d>] kmem_cache_alloc_trace+0x8d/0x2f0
+[8.452329] Oops: 0000 [#1] SMP
+[8.452329] Call Trace:
+[8.452329]  [<ffffffff8116cc82>] bpf_check+0x852/0x2000
+[8.452329]  [<ffffffff8116b7e4>] bpf_prog_load+0x1e4/0x310
+[8.452329]  [<ffffffff811b190f>] ? might_fault+0x5f/0xb0
+[8.452329]  [<ffffffff8116c206>] SyS_bpf+0x806/0xa30
+
+Fixes: f1bca824dabb ("bpf: add search pruning optimization to verifier")
+Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
+Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
+Acked-by: Daniel Borkmann <daniel@iogearbox.net>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/bpf/verifier.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/kernel/bpf/verifier.c
++++ b/kernel/bpf/verifier.c
+@@ -1380,7 +1380,8 @@ peek_stack:
+                       /* tell verifier to check for equivalent states
+                        * after every call and jump
+                        */
+-                      env->explored_states[t + 1] = STATE_LIST_MARK;
++                      if (t + 1 < insn_cnt)
++                              env->explored_states[t + 1] = STATE_LIST_MARK;
+               } else {
+                       /* conditional jump with two edges */
+                       ret = push_insn(t, t + 1, FALLTHROUGH, env);
diff --git a/queue-3.19/ipv6-don-t-reduce-hop-limit-for-an-interface.patch b/queue-3.19/ipv6-don-t-reduce-hop-limit-for-an-interface.patch
new file mode 100644 (file)
index 0000000..9f55ce4
--- /dev/null
@@ -0,0 +1,48 @@
+From foo@baz Tue Apr 21 23:05:14 CEST 2015
+From: "D.S. Ljungmark" <ljungmark@modio.se>
+Date: Wed, 25 Mar 2015 09:28:15 +0100
+Subject: ipv6: Don't reduce hop limit for an interface
+
+From: "D.S. Ljungmark" <ljungmark@modio.se>
+
+[ Upstream commit 6fd99094de2b83d1d4c8457f2c83483b2828e75a ]
+
+A local route may have a lower hop_limit set than global routes do.
+
+RFC 3756, Section 4.2.7, "Parameter Spoofing"
+
+>   1.  The attacker includes a Current Hop Limit of one or another small
+>       number which the attacker knows will cause legitimate packets to
+>       be dropped before they reach their destination.
+
+>   As an example, one possible approach to mitigate this threat is to
+>   ignore very small hop limits.  The nodes could implement a
+>   configurable minimum hop limit, and ignore attempts to set it below
+>   said limit.
+
+Signed-off-by: D.S. Ljungmark <ljungmark@modio.se>
+Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/ndisc.c |    9 ++++++++-
+ 1 file changed, 8 insertions(+), 1 deletion(-)
+
+--- a/net/ipv6/ndisc.c
++++ b/net/ipv6/ndisc.c
+@@ -1216,7 +1216,14 @@ static void ndisc_router_discovery(struc
+       if (rt)
+               rt6_set_expires(rt, jiffies + (HZ * lifetime));
+       if (ra_msg->icmph.icmp6_hop_limit) {
+-              in6_dev->cnf.hop_limit = ra_msg->icmph.icmp6_hop_limit;
++              /* Only set hop_limit on the interface if it is higher than
++               * the current hop_limit.
++               */
++              if (in6_dev->cnf.hop_limit < ra_msg->icmph.icmp6_hop_limit) {
++                      in6_dev->cnf.hop_limit = ra_msg->icmph.icmp6_hop_limit;
++              } else {
++                      ND_PRINTK(2, warn, "RA: Got route advertisement with lower hop_limit than current\n");
++              }
+               if (rt)
+                       dst_metric_set(&rt->dst, RTAX_HOPLIMIT,
+                                      ra_msg->icmph.icmp6_hop_limit);
diff --git a/queue-3.19/ipv6-protect-skb-sk-accesses-from-recursive-dereference-inside-the-stack.patch b/queue-3.19/ipv6-protect-skb-sk-accesses-from-recursive-dereference-inside-the-stack.patch
new file mode 100644 (file)
index 0000000..0d1095f
--- /dev/null
@@ -0,0 +1,160 @@
+From foo@baz Tue Apr 21 23:05:14 CEST 2015
+From: "hannes@stressinduktion.org" <hannes@stressinduktion.org>
+Date: Wed, 1 Apr 2015 17:07:44 +0200
+Subject: ipv6: protect skb->sk accesses from recursive dereference inside the stack
+
+From: "hannes@stressinduktion.org" <hannes@stressinduktion.org>
+
+[ Upstream commit f60e5990d9c1424af9dbca60a23ba2a1c7c1ce90 ]
+
+We should not consult skb->sk for output decisions in xmit recursion
+levels > 0 in the stack. Otherwise local socket settings could influence
+the result of e.g. tunnel encapsulation process.
+
+ipv6 does not conform with this in three places:
+
+1) ip6_fragment: we do consult ipv6_npinfo for frag_size
+
+2) sk_mc_loop in ipv6 uses skb->sk and checks if we should
+   loop the packet back to the local socket
+
+3) ip6_skb_dst_mtu could query the settings from the user socket and
+   force a wrong MTU
+
+Furthermore:
+In sk_mc_loop we could potentially land in WARN_ON(1) if we use a
+PF_PACKET socket ontop of an IPv6-backed vxlan device.
+
+Reuse xmit_recursion as we are currently only interested in protecting
+tunnel devices.
+
+Cc: Jiri Pirko <jiri@resnulli.us>
+Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/netdevice.h |    6 ++++++
+ include/net/ip.h          |   16 ----------------
+ include/net/ip6_route.h   |    3 ++-
+ include/net/sock.h        |    2 ++
+ net/core/dev.c            |    4 +++-
+ net/core/sock.c           |   19 +++++++++++++++++++
+ net/ipv6/ip6_output.c     |    3 ++-
+ 7 files changed, 34 insertions(+), 19 deletions(-)
+
+--- a/include/linux/netdevice.h
++++ b/include/linux/netdevice.h
+@@ -2159,6 +2159,12 @@ void netdev_freemem(struct net_device *d
+ void synchronize_net(void);
+ int init_dummy_netdev(struct net_device *dev);
++DECLARE_PER_CPU(int, xmit_recursion);
++static inline int dev_recursion_level(void)
++{
++      return this_cpu_read(xmit_recursion);
++}
++
+ struct net_device *dev_get_by_index(struct net *net, int ifindex);
+ struct net_device *__dev_get_by_index(struct net *net, int ifindex);
+ struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex);
+--- a/include/net/ip.h
++++ b/include/net/ip.h
+@@ -453,22 +453,6 @@ static __inline__ void inet_reset_saddr(
+ #endif
+-static inline int sk_mc_loop(struct sock *sk)
+-{
+-      if (!sk)
+-              return 1;
+-      switch (sk->sk_family) {
+-      case AF_INET:
+-              return inet_sk(sk)->mc_loop;
+-#if IS_ENABLED(CONFIG_IPV6)
+-      case AF_INET6:
+-              return inet6_sk(sk)->mc_loop;
+-#endif
+-      }
+-      WARN_ON(1);
+-      return 1;
+-}
+-
+ bool ip_call_ra_chain(struct sk_buff *skb);
+ /*
+--- a/include/net/ip6_route.h
++++ b/include/net/ip6_route.h
+@@ -174,7 +174,8 @@ int ip6_fragment(struct sk_buff *skb, in
+ static inline int ip6_skb_dst_mtu(struct sk_buff *skb)
+ {
+-      struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
++      struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ?
++                              inet6_sk(skb->sk) : NULL;
+       return (np && np->pmtudisc >= IPV6_PMTUDISC_PROBE) ?
+              skb_dst(skb)->dev->mtu : dst_mtu(skb_dst(skb));
+--- a/include/net/sock.h
++++ b/include/net/sock.h
+@@ -1812,6 +1812,8 @@ struct dst_entry *__sk_dst_check(struct
+ struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie);
++bool sk_mc_loop(struct sock *sk);
++
+ static inline bool sk_can_gso(const struct sock *sk)
+ {
+       return net_gso_ok(sk->sk_route_caps, sk->sk_gso_type);
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -2821,7 +2821,9 @@ static void skb_update_prio(struct sk_bu
+ #define skb_update_prio(skb)
+ #endif
+-static DEFINE_PER_CPU(int, xmit_recursion);
++DEFINE_PER_CPU(int, xmit_recursion);
++EXPORT_SYMBOL(xmit_recursion);
++
+ #define RECURSION_LIMIT 10
+ /**
+--- a/net/core/sock.c
++++ b/net/core/sock.c
+@@ -651,6 +651,25 @@ static inline void sock_valbool_flag(str
+               sock_reset_flag(sk, bit);
+ }
++bool sk_mc_loop(struct sock *sk)
++{
++      if (dev_recursion_level())
++              return false;
++      if (!sk)
++              return true;
++      switch (sk->sk_family) {
++      case AF_INET:
++              return inet_sk(sk)->mc_loop;
++#if IS_ENABLED(CONFIG_IPV6)
++      case AF_INET6:
++              return inet6_sk(sk)->mc_loop;
++#endif
++      }
++      WARN_ON(1);
++      return true;
++}
++EXPORT_SYMBOL(sk_mc_loop);
++
+ /*
+  *    This is meant for all protocols to use and covers goings on
+  *    at the socket level. Everything here is generic.
+--- a/net/ipv6/ip6_output.c
++++ b/net/ipv6/ip6_output.c
+@@ -541,7 +541,8 @@ int ip6_fragment(struct sk_buff *skb, in
+ {
+       struct sk_buff *frag;
+       struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
+-      struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
++      struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ?
++                              inet6_sk(skb->sk) : NULL;
+       struct ipv6hdr *tmp_hdr;
+       struct frag_hdr *fh;
+       unsigned int mtu, hlen, left, len;
diff --git a/queue-3.19/net-mlx4_core-fix-error-message-deprecation-for-connectx-2-cards.patch b/queue-3.19/net-mlx4_core-fix-error-message-deprecation-for-connectx-2-cards.patch
new file mode 100644 (file)
index 0000000..9bb479a
--- /dev/null
@@ -0,0 +1,34 @@
+From foo@baz Tue Apr 21 23:05:14 CEST 2015
+From: Jack Morgenstein <jackm@dev.mellanox.co.il>
+Date: Sun, 5 Apr 2015 17:50:48 +0300
+Subject: net/mlx4_core: Fix error message deprecation for ConnectX-2 cards
+
+From: Jack Morgenstein <jackm@dev.mellanox.co.il>
+
+[ Upstream commit fde913e25496761a4e2a4c81230c913aba6289a2 ]
+
+Commit 1daa4303b4ca ("net/mlx4_core: Deprecate error message at
+ConnectX-2 cards startup to debug") did the deprecation only for port 1
+of the card. Need to deprecate for port 2 as well.
+
+Fixes: 1daa4303b4ca ("net/mlx4_core: Deprecate error message at ConnectX-2 cards startup to debug")
+Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
+Signed-off-by: Amir Vadai <amirv@mellanox.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx4/cmd.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx4/cmd.c
++++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c
+@@ -585,7 +585,8 @@ static int mlx4_cmd_wait(struct mlx4_dev
+                * on the host, we deprecate the error message for this
+                * specific command/input_mod/opcode_mod/fw-status to be debug.
+                */
+-              if (op == MLX4_CMD_SET_PORT && in_modifier == 1 &&
++              if (op == MLX4_CMD_SET_PORT &&
++                  (in_modifier == 1 || in_modifier == 2) &&
+                   op_modifier == 0 && context->fw_status == CMD_STAT_BAD_SIZE)
+                       mlx4_dbg(dev, "command 0x%x failed: fw status = 0x%x\n",
+                                op, context->fw_status);
diff --git a/queue-3.19/net-mlx4_en-call-register_netdevice-in-the-proper-location.patch b/queue-3.19/net-mlx4_en-call-register_netdevice-in-the-proper-location.patch
new file mode 100644 (file)
index 0000000..2625ae6
--- /dev/null
@@ -0,0 +1,60 @@
+From foo@baz Tue Apr 21 23:05:14 CEST 2015
+From: Ido Shamay <idos@mellanox.com>
+Date: Tue, 24 Mar 2015 15:18:38 +0200
+Subject: net/mlx4_en: Call register_netdevice in the proper location
+
+From: Ido Shamay <idos@mellanox.com>
+
+[ Upstream commit e5eda89d97ec256ba14e7e861387cc0468259c18 ]
+
+Netdevice registration should be performed a the end of the driver
+initialization flow. If we don't do that, after calling register_netdevice,
+device callbacks may be issued by higher layers of the stack before
+final configuration of the device is done.
+
+For example (VXLAN configuration race), mlx4_SET_PORT_VXLAN was issued
+after the register_netdev command. System network scripts may configure
+the interface (UP) right after the registration, which also attach
+unicast VXLAN steering rule, before mlx4_SET_PORT_VXLAN was called,
+causing the firmware to fail the rule attachment.
+
+Fixes: 837052d0ccc5 ("net/mlx4_en: Add netdev support for TCP/IP offloads of vxlan tunneling")
+Signed-off-by: Ido Shamay <idos@mellanox.com>
+Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx4/en_netdev.c |   15 ++++++++-------
+ 1 file changed, 8 insertions(+), 7 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
++++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+@@ -2627,13 +2627,6 @@ int mlx4_en_init_netdev(struct mlx4_en_d
+       netif_carrier_off(dev);
+       mlx4_en_set_default_moderation(priv);
+-      err = register_netdev(dev);
+-      if (err) {
+-              en_err(priv, "Netdev registration failed for port %d\n", port);
+-              goto out;
+-      }
+-      priv->registered = 1;
+-
+       en_warn(priv, "Using %d TX rings\n", prof->tx_ring_num);
+       en_warn(priv, "Using %d RX rings\n", prof->rx_ring_num);
+@@ -2673,6 +2666,14 @@ int mlx4_en_init_netdev(struct mlx4_en_d
+               queue_delayed_work(mdev->workqueue, &priv->service_task,
+                                  SERVICE_TASK_DELAY);
++      err = register_netdev(dev);
++      if (err) {
++              en_err(priv, "Netdev registration failed for port %d\n", port);
++              goto out;
++      }
++
++      priv->registered = 1;
++
+       return 0;
+ out:
diff --git a/queue-3.19/net-tcp6-fix-double-call-of-tcp_v6_fill_cb.patch b/queue-3.19/net-tcp6-fix-double-call-of-tcp_v6_fill_cb.patch
new file mode 100644 (file)
index 0000000..03f7d25
--- /dev/null
@@ -0,0 +1,64 @@
+From foo@baz Tue Apr 21 23:05:14 CEST 2015
+From: Alexey Kodanev <alexey.kodanev@oracle.com>
+Date: Fri, 27 Mar 2015 12:24:22 +0300
+Subject: net: tcp6: fix double call of tcp_v6_fill_cb()
+
+From: Alexey Kodanev <alexey.kodanev@oracle.com>
+
+[ Upstream commit 4ad19de8774e2a7b075b3e8ea48db85adcf33fa6 ]
+
+tcp_v6_fill_cb() will be called twice if socket's state changes from
+TCP_TIME_WAIT to TCP_LISTEN. That can result in control buffer data
+corruption because in the second tcp_v6_fill_cb() call it's not copying
+IP6CB(skb) anymore, but 'seq', 'end_seq', etc., so we can get weird and
+unpredictable results. Performance loss of up to 1200% has been observed
+in LTP/vxlan03 test.
+
+This can be fixed by copying inet6_skb_parm to the beginning of 'cb'
+only if xfrm6_policy_check() and tcp_v6_fill_cb() are going to be
+called again.
+
+Fixes: 2dc49d1680b53 ("tcp6: don't move IP6CB before xfrm6_policy_check()")
+
+Signed-off-by: Alexey Kodanev <alexey.kodanev@oracle.com>
+Acked-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/tcp_ipv6.c |   11 +++++++++++
+ 1 file changed, 11 insertions(+)
+
+--- a/net/ipv6/tcp_ipv6.c
++++ b/net/ipv6/tcp_ipv6.c
+@@ -1409,6 +1409,15 @@ static void tcp_v6_fill_cb(struct sk_buf
+       TCP_SKB_CB(skb)->sacked = 0;
+ }
++static void tcp_v6_restore_cb(struct sk_buff *skb)
++{
++      /* We need to move header back to the beginning if xfrm6_policy_check()
++       * and tcp_v6_fill_cb() are going to be called again.
++       */
++      memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6,
++              sizeof(struct inet6_skb_parm));
++}
++
+ static int tcp_v6_rcv(struct sk_buff *skb)
+ {
+       const struct tcphdr *th;
+@@ -1541,6 +1550,7 @@ do_time_wait:
+                       inet_twsk_deschedule(tw, &tcp_death_row);
+                       inet_twsk_put(tw);
+                       sk = sk2;
++                      tcp_v6_restore_cb(skb);
+                       goto process;
+               }
+               /* Fall through to ACK */
+@@ -1549,6 +1559,7 @@ do_time_wait:
+               tcp_v6_timewait_ack(sk, skb);
+               break;
+       case TCP_TW_RST:
++              tcp_v6_restore_cb(skb);
+               goto no_tcp_socket;
+       case TCP_TW_SUCCESS:
+               ;
diff --git a/queue-3.19/openvswitch-return-vport-module-ref-before-destruction.patch b/queue-3.19/openvswitch-return-vport-module-ref-before-destruction.patch
new file mode 100644 (file)
index 0000000..cb60585
--- /dev/null
@@ -0,0 +1,42 @@
+From foo@baz Tue Apr 21 23:05:14 CEST 2015
+From: Thomas Graf <tgraf@suug.ch>
+Date: Mon, 30 Mar 2015 13:57:41 +0200
+Subject: openvswitch: Return vport module ref before destruction
+
+From: Thomas Graf <tgraf@suug.ch>
+
+[ Upstream commit fa2d8ff4e3522b4e05f590575d3eb8087f3a8cdc ]
+
+Return module reference before invoking the respective vport
+->destroy() function. This is needed as ovs_vport_del() is not
+invoked inside an RCU read side critical section so the kfree
+can occur immediately before returning to ovs_vport_del().
+
+Returning the module reference before ->destroy() is safe because
+the module unregistration is blocked on ovs_lock which we hold
+while destroying the datapath.
+
+Fixes: 62b9c8d0372d ("ovs: Turn vports with dependencies into separate modules")
+Reported-by: Pravin Shelar <pshelar@nicira.com>
+Signed-off-by: Thomas Graf <tgraf@suug.ch>
+Acked-by: Pravin B Shelar <pshelar@nicira.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/openvswitch/vport.c |    4 +---
+ 1 file changed, 1 insertion(+), 3 deletions(-)
+
+--- a/net/openvswitch/vport.c
++++ b/net/openvswitch/vport.c
+@@ -274,10 +274,8 @@ void ovs_vport_del(struct vport *vport)
+       ASSERT_OVSL();
+       hlist_del_rcu(&vport->hash_node);
+-
+-      vport->ops->destroy(vport);
+-
+       module_put(vport->ops->owner);
++      vport->ops->destroy(vport);
+ }
+ /**
diff --git a/queue-3.19/revert-net-reset-secmark-when-scrubbing-packet.patch b/queue-3.19/revert-net-reset-secmark-when-scrubbing-packet.patch
new file mode 100644 (file)
index 0000000..8953204
--- /dev/null
@@ -0,0 +1,31 @@
+From foo@baz Tue Apr 21 23:05:14 CEST 2015
+From: Herbert Xu <herbert@gondor.apana.org.au>
+Date: Thu, 16 Apr 2015 16:12:53 +0800
+Subject: Revert "net: Reset secmark when scrubbing packet"
+
+From: Herbert Xu <herbert@gondor.apana.org.au>
+
+[ Upstream commit 4c0ee414e877b899f7fc80aafb98d9425c02797f ]
+
+This patch reverts commit b8fb4e0648a2ab3734140342002f68fb0c7d1602
+because the secmark must be preserved even when a packet crosses
+namespace boundaries.  The reason is that security labels apply to
+the system as a whole and is not per-namespace.
+
+Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/skbuff.c |    1 -
+ 1 file changed, 1 deletion(-)
+
+--- a/net/core/skbuff.c
++++ b/net/core/skbuff.c
+@@ -4149,7 +4149,6 @@ void skb_scrub_packet(struct sk_buff *sk
+       skb->ignore_df = 0;
+       skb_dst_drop(skb);
+       skb->mark = 0;
+-      skb_init_secmark(skb);
+       secpath_reset(skb);
+       nf_reset(skb);
+       nf_reset_trace(skb);
diff --git a/queue-3.19/rocker-handle-non-bridge-master-change.patch b/queue-3.19/rocker-handle-non-bridge-master-change.patch
new file mode 100644 (file)
index 0000000..a9624ba
--- /dev/null
@@ -0,0 +1,49 @@
+From foo@baz Tue Apr 21 23:05:14 CEST 2015
+From: Simon Horman <simon.horman@netronome.com>
+Date: Tue, 24 Mar 2015 09:31:40 +0900
+Subject: rocker: handle non-bridge master change
+
+From: Simon Horman <simon.horman@netronome.com>
+
+[ Upstream commit a6e95cc718c8916a13f1e1e9d33cacbc5db56c0f ]
+
+Master change notifications may occur other than when joining or
+leaving a bridge, for example when being added to or removed from
+a bond or Open vSwitch.
+
+Previously in those cases rocker_port_bridge_leave() was called
+which results in a null-pointer dereference as rocker_port->bridge_dev
+is NULL because there is no bridge device.
+
+This patch makes provision for doing nothing in such cases.
+
+Fixes: 6c7079450071f ("rocker: implement L2 bridge offloading")
+Acked-by: Jiri Pirko <jiri@resnulli.us>
+Acked-by: Scott Feldman <sfeldma@gmail.com>
+Signed-off-by: Simon Horman <simon.horman@netronome.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/rocker/rocker.c |    8 +++++++-
+ 1 file changed, 7 insertions(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/rocker/rocker.c
++++ b/drivers/net/ethernet/rocker/rocker.c
+@@ -4305,10 +4305,16 @@ static int rocker_port_master_changed(st
+       struct net_device *master = netdev_master_upper_dev_get(dev);
+       int err = 0;
++      /* There are currently three cases handled here:
++       * 1. Joining a bridge
++       * 2. Leaving a previously joined bridge
++       * 3. Other, e.g. being added to or removed from a bond or openvswitch,
++       *    in which case nothing is done
++       */
+       if (master && master->rtnl_link_ops &&
+           !strcmp(master->rtnl_link_ops->kind, "bridge"))
+               err = rocker_port_bridge_join(rocker_port, master);
+-      else
++      else if (rocker_port_is_bridged(rocker_port))
+               err = rocker_port_bridge_leave(rocker_port);
+       return err;
diff --git a/queue-3.19/skbuff-do-not-scrub-skb-mark-within-the-same-name-space.patch b/queue-3.19/skbuff-do-not-scrub-skb-mark-within-the-same-name-space.patch
new file mode 100644 (file)
index 0000000..f926f23
--- /dev/null
@@ -0,0 +1,83 @@
+From foo@baz Tue Apr 21 23:05:14 CEST 2015
+From: Herbert Xu <herbert@gondor.apana.org.au>
+Date: Thu, 16 Apr 2015 09:03:27 +0800
+Subject: skbuff: Do not scrub skb mark within the same name space
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Herbert Xu <herbert@gondor.apana.org.au>
+
+[ Upstream commit 213dd74aee765d4e5f3f4b9607fef0cf97faa2af ]
+
+On Wed, Apr 15, 2015 at 05:41:26PM +0200, Nicolas Dichtel wrote:
+> Le 15/04/2015 15:57, Herbert Xu a écrit :
+> >On Wed, Apr 15, 2015 at 06:22:29PM +0800, Herbert Xu wrote:
+> [snip]
+> >Subject: skbuff: Do not scrub skb mark within the same name space
+> >
+> >The commit ea23192e8e577dfc51e0f4fc5ca113af334edff9 ("tunnels:
+> Maybe add a Fixes tag?
+> Fixes: ea23192e8e57 ("tunnels: harmonize cleanup done on skb on rx path")
+>
+> >harmonize cleanup done on skb on rx path") broke anyone trying to
+> >use netfilter marking across IPv4 tunnels.  While most of the
+> >fields that are cleared by skb_scrub_packet don't matter, the
+> >netfilter mark must be preserved.
+> >
+> >This patch rearranges skb_scurb_packet to preserve the mark field.
+> nit: s/scurb/scrub
+>
+> Else it's fine for me.
+
+Sure.
+
+PS I used the wrong email for James the first time around.  So
+let me repeat the question here.  Should secmark be preserved
+or cleared across tunnels within the same name space? In fact,
+do our security models even support name spaces?
+
+---8<---
+The commit ea23192e8e577dfc51e0f4fc5ca113af334edff9 ("tunnels:
+harmonize cleanup done on skb on rx path") broke anyone trying to
+use netfilter marking across IPv4 tunnels.  While most of the
+fields that are cleared by skb_scrub_packet don't matter, the
+netfilter mark must be preserved.
+
+This patch rearranges skb_scrub_packet to preserve the mark field.
+
+Fixes: ea23192e8e57 ("tunnels: harmonize cleanup done on skb on rx path")
+Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
+Acked-by: Thomas Graf <tgraf@suug.ch>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/skbuff.c |    9 ++++++---
+ 1 file changed, 6 insertions(+), 3 deletions(-)
+
+--- a/net/core/skbuff.c
++++ b/net/core/skbuff.c
+@@ -4141,17 +4141,20 @@ EXPORT_SYMBOL(skb_try_coalesce);
+  */
+ void skb_scrub_packet(struct sk_buff *skb, bool xnet)
+ {
+-      if (xnet)
+-              skb_orphan(skb);
+       skb->tstamp.tv64 = 0;
+       skb->pkt_type = PACKET_HOST;
+       skb->skb_iif = 0;
+       skb->ignore_df = 0;
+       skb_dst_drop(skb);
+-      skb->mark = 0;
+       secpath_reset(skb);
+       nf_reset(skb);
+       nf_reset_trace(skb);
++
++      if (!xnet)
++              return;
++
++      skb_orphan(skb);
++      skb->mark = 0;
+ }
+ EXPORT_SYMBOL_GPL(skb_scrub_packet);
diff --git a/queue-3.19/tcp-fix-frto-undo-on-cumulative-ack-of-sacked-range.patch b/queue-3.19/tcp-fix-frto-undo-on-cumulative-ack-of-sacked-range.patch
new file mode 100644 (file)
index 0000000..a5a1483
--- /dev/null
@@ -0,0 +1,47 @@
+From foo@baz Tue Apr 21 23:05:14 CEST 2015
+From: Neal Cardwell <ncardwell@google.com>
+Date: Wed, 1 Apr 2015 20:26:46 -0400
+Subject: tcp: fix FRTO undo on cumulative ACK of SACKed range
+
+From: Neal Cardwell <ncardwell@google.com>
+
+[ Upstream commit 666b805150efd62f05810ff0db08f44a2370c937 ]
+
+On processing cumulative ACKs, the FRTO code was not checking the
+SACKed bit, meaning that there could be a spurious FRTO undo on a
+cumulative ACK of a previously SACKed skb.
+
+The FRTO code should only consider a cumulative ACK to indicate that
+an original/unretransmitted skb is newly ACKed if the skb was not yet
+SACKed.
+
+The effect of the spurious FRTO undo would typically be to make the
+connection think that all previously-sent packets were in flight when
+they really weren't, leading to a stall and an RTO.
+
+Signed-off-by: Neal Cardwell <ncardwell@google.com>
+Signed-off-by: Yuchung Cheng <ycheng@google.com>
+Fixes: e33099f96d99c ("tcp: implement RFC5682 F-RTO")
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_input.c |    7 ++++---
+ 1 file changed, 4 insertions(+), 3 deletions(-)
+
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -3104,10 +3104,11 @@ static int tcp_clean_rtx_queue(struct so
+                       if (!first_ackt.v64)
+                               first_ackt = last_ackt;
+-                      if (!(sacked & TCPCB_SACKED_ACKED))
++                      if (!(sacked & TCPCB_SACKED_ACKED)) {
+                               reord = min(pkts_acked, reord);
+-                      if (!after(scb->end_seq, tp->high_seq))
+-                              flag |= FLAG_ORIG_SACK_ACKED;
++                              if (!after(scb->end_seq, tp->high_seq))
++                                      flag |= FLAG_ORIG_SACK_ACKED;
++                      }
+               }
+               if (sacked & TCPCB_SACKED_ACKED)
diff --git a/queue-3.19/tcp-prevent-fetching-dst-twice-in-early-demux-code.patch b/queue-3.19/tcp-prevent-fetching-dst-twice-in-early-demux-code.patch
new file mode 100644 (file)
index 0000000..e60be53
--- /dev/null
@@ -0,0 +1,56 @@
+From foo@baz Tue Apr 21 23:05:14 CEST 2015
+From: =?UTF-8?q?Michal=20Kube=C4=8Dek?= <mkubecek@suse.cz>
+Date: Mon, 23 Mar 2015 15:14:00 +0100
+Subject: tcp: prevent fetching dst twice in early demux code
+
+From: =?UTF-8?q?Michal=20Kube=C4=8Dek?= <mkubecek@suse.cz>
+
+[ Upstream commit d0c294c53a771ae7e84506dfbd8c18c30f078735 ]
+
+On s390x, gcc 4.8 compiles this part of tcp_v6_early_demux()
+
+        struct dst_entry *dst = sk->sk_rx_dst;
+
+        if (dst)
+                dst = dst_check(dst, inet6_sk(sk)->rx_dst_cookie);
+
+to code reading sk->sk_rx_dst twice, once for the test and once for
+the argument of ip6_dst_check() (dst_check() is inline). This allows
+ip6_dst_check() to be called with null first argument, causing a crash.
+
+Protect sk->sk_rx_dst access by READ_ONCE() both in IPv4 and IPv6
+TCP early demux code.
+
+Fixes: 41063e9dd119 ("ipv4: Early TCP socket demux.")
+Fixes: c7109986db3c ("ipv6: Early TCP socket demux")
+Signed-off-by: Michal Kubecek <mkubecek@suse.cz>
+Acked-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_ipv4.c |    2 +-
+ net/ipv6/tcp_ipv6.c |    2 +-
+ 2 files changed, 2 insertions(+), 2 deletions(-)
+
+--- a/net/ipv4/tcp_ipv4.c
++++ b/net/ipv4/tcp_ipv4.c
+@@ -1516,7 +1516,7 @@ void tcp_v4_early_demux(struct sk_buff *
+               skb->sk = sk;
+               skb->destructor = sock_edemux;
+               if (sk->sk_state != TCP_TIME_WAIT) {
+-                      struct dst_entry *dst = sk->sk_rx_dst;
++                      struct dst_entry *dst = READ_ONCE(sk->sk_rx_dst);
+                       if (dst)
+                               dst = dst_check(dst, 0);
+--- a/net/ipv6/tcp_ipv6.c
++++ b/net/ipv6/tcp_ipv6.c
+@@ -1583,7 +1583,7 @@ static void tcp_v6_early_demux(struct sk
+               skb->sk = sk;
+               skb->destructor = sock_edemux;
+               if (sk->sk_state != TCP_TIME_WAIT) {
+-                      struct dst_entry *dst = sk->sk_rx_dst;
++                      struct dst_entry *dst = READ_ONCE(sk->sk_rx_dst);
+                       if (dst)
+                               dst = dst_check(dst, inet6_sk(sk)->rx_dst_cookie);
diff --git a/queue-3.19/tcp-tcp_make_synack-should-clear-skb-tstamp.patch b/queue-3.19/tcp-tcp_make_synack-should-clear-skb-tstamp.patch
new file mode 100644 (file)
index 0000000..7783562
--- /dev/null
@@ -0,0 +1,42 @@
+From foo@baz Tue Apr 21 23:05:14 CEST 2015
+From: Eric Dumazet <edumazet@google.com>
+Date: Thu, 9 Apr 2015 13:31:56 -0700
+Subject: tcp: tcp_make_synack() should clear skb->tstamp
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit b50edd7812852d989f2ef09dcfc729690f54a42d ]
+
+I noticed tcpdump was giving funky timestamps for locally
+generated SYNACK messages on loopback interface.
+
+11:42:46.938990 IP 127.0.0.1.48245 > 127.0.0.2.23850: S
+945476042:945476042(0) win 43690 <mss 65495,nop,nop,sackOK,nop,wscale 7>
+
+20:28:58.502209 IP 127.0.0.2.23850 > 127.0.0.1.48245: S
+3160535375:3160535375(0) ack 945476043 win 43690 <mss
+65495,nop,nop,sackOK,nop,wscale 7>
+
+This is because we need to clear skb->tstamp before
+entering lower stack, otherwise net_timestamp_check()
+does not set skb->tstamp.
+
+Fixes: 7faee5c0d514 ("tcp: remove TCP_SKB_CB(skb)->when")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_output.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/net/ipv4/tcp_output.c
++++ b/net/ipv4/tcp_output.c
+@@ -2931,6 +2931,8 @@ struct sk_buff *tcp_make_synack(struct s
+       }
+ #endif
++      /* Do not fool tcpdump (if any), clean our debris */
++      skb->tstamp.tv64 = 0;
+       return skb;
+ }
+ EXPORT_SYMBOL(tcp_make_synack);
diff --git a/queue-3.19/tg3-hold-tp-lock-before-calling-tg3_halt-from-tg3_init_one.patch b/queue-3.19/tg3-hold-tp-lock-before-calling-tg3_halt-from-tg3_init_one.patch
new file mode 100644 (file)
index 0000000..9e45101
--- /dev/null
@@ -0,0 +1,44 @@
+From foo@baz Tue Apr 21 23:05:14 CEST 2015
+From: "Jun'ichi Nomura \\\\(NEC\\\\)" <j-nomura@ce.jp.nec.com>
+Date: Thu, 12 Feb 2015 01:26:24 +0000
+Subject: tg3: Hold tp->lock before calling tg3_halt() from tg3_init_one()
+
+From: "Jun'ichi Nomura \\\\(NEC\\\\)" <j-nomura@ce.jp.nec.com>
+
+[ Upstream commit d0af71a3573f1217b140c60b66f1a9b335fb058b ]
+
+tg3_init_one() calls tg3_halt() without tp->lock despite its assumption
+and causes deadlock.
+If lockdep is enabled, a warning like this shows up before the stall:
+
+  [ BUG: bad unlock balance detected! ]
+  3.19.0test #3 Tainted: G            E
+  -------------------------------------
+  insmod/369 is trying to release lock (&(&tp->lock)->rlock) at:
+  [<ffffffffa02d5a1d>] tg3_chip_reset+0x14d/0x780 [tg3]
+  but there are no more locks to release!
+
+tg3_init_one() doesn't call tg3_halt() under normal situation but
+during kexec kdump I hit this problem.
+
+Fixes: 932f19de ("tg3: Release tp->lock before invoking synchronize_irq()")
+Signed-off-by: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/broadcom/tg3.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/drivers/net/ethernet/broadcom/tg3.c
++++ b/drivers/net/ethernet/broadcom/tg3.c
+@@ -17868,8 +17868,10 @@ static int tg3_init_one(struct pci_dev *
+        */
+       if ((tr32(HOSTCC_MODE) & HOSTCC_MODE_ENABLE) ||
+           (tr32(WDMAC_MODE) & WDMAC_MODE_ENABLE)) {
++              tg3_full_lock(tp, 0);
+               tw32(MEMARB_MODE, MEMARB_MODE_ENABLE);
+               tg3_halt(tp, RESET_KIND_SHUTDOWN, 1);
++              tg3_full_unlock(tp);
+       }
+       err = tg3_test_dma(tp);
diff --git a/queue-3.19/tun-return-proper-error-code-from-tun_do_read.patch b/queue-3.19/tun-return-proper-error-code-from-tun_do_read.patch
new file mode 100644 (file)
index 0000000..d318cf9
--- /dev/null
@@ -0,0 +1,30 @@
+From foo@baz Tue Apr 21 23:05:14 CEST 2015
+From: Alex Gartrell <agartrell@fb.com>
+Date: Thu, 25 Dec 2014 23:22:49 -0800
+Subject: tun: return proper error code from tun_do_read
+
+From: Alex Gartrell <agartrell@fb.com>
+
+[ Upstream commit 957f094f221f81e457133b1f4c4d95ffa49ff731 ]
+
+Instead of -1 with EAGAIN, read on a O_NONBLOCK tun fd will return 0.  This
+fixes this by properly returning the error code from __skb_recv_datagram.
+
+Signed-off-by: Alex Gartrell <agartrell@fb.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/tun.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/tun.c
++++ b/drivers/net/tun.c
+@@ -1368,7 +1368,7 @@ static ssize_t tun_do_read(struct tun_st
+       skb = __skb_recv_datagram(tfile->socket.sk, noblock ? MSG_DONTWAIT : 0,
+                                 &peeked, &off, &err);
+       if (!skb)
+-              return 0;
++              return err;
+       ret = tun_put_user(tun, tfile, skb, to);
+       if (unlikely(ret < 0))
diff --git a/queue-3.19/udptunnels-call-handle_offloads-after-inserting-vlan-tag.patch b/queue-3.19/udptunnels-call-handle_offloads-after-inserting-vlan-tag.patch
new file mode 100644 (file)
index 0000000..7e1011d
--- /dev/null
@@ -0,0 +1,100 @@
+From foo@baz Tue Apr 21 23:05:14 CEST 2015
+From: Jesse Gross <jesse@nicira.com>
+Date: Thu, 9 Apr 2015 11:19:14 -0700
+Subject: udptunnels: Call handle_offloads after inserting vlan tag.
+
+From: Jesse Gross <jesse@nicira.com>
+
+[ Upstream commit b736a623bd099cdf5521ca9bd03559f3bc7fa31c ]
+
+handle_offloads() calls skb_reset_inner_headers() to store
+the layer pointers to the encapsulated packet. However, we
+currently push the vlag tag (if there is one) onto the packet
+afterwards. This changes the MAC header for the encapsulated
+packet but it is not reflected in skb->inner_mac_header, which
+breaks GSO and drivers which attempt to use this for encapsulation
+offloads.
+
+Fixes: 1eaa8178 ("vxlan: Add tx-vlan offload support.")
+Signed-off-by: Jesse Gross <jesse@nicira.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/vxlan.c |   20 ++++++++++----------
+ net/ipv4/geneve.c   |    8 ++++----
+ 2 files changed, 14 insertions(+), 14 deletions(-)
+
+--- a/drivers/net/vxlan.c
++++ b/drivers/net/vxlan.c
+@@ -1578,12 +1578,6 @@ static int vxlan6_xmit_skb(struct vxlan_
+       int err;
+       bool udp_sum = !udp_get_no_check6_tx(vs->sock->sk);
+-      skb = udp_tunnel_handle_offloads(skb, udp_sum);
+-      if (IS_ERR(skb)) {
+-              err = -EINVAL;
+-              goto err;
+-      }
+-
+       skb_scrub_packet(skb, xnet);
+       min_headroom = LL_RESERVED_SPACE(dst->dev) + dst->header_len
+@@ -1603,6 +1597,12 @@ static int vxlan6_xmit_skb(struct vxlan_
+               goto err;
+       }
++      skb = udp_tunnel_handle_offloads(skb, udp_sum);
++      if (IS_ERR(skb)) {
++              err = -EINVAL;
++              goto err;
++      }
++
+       vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh));
+       vxh->vx_flags = htonl(VXLAN_FLAGS);
+       vxh->vx_vni = vni;
+@@ -1628,10 +1628,6 @@ int vxlan_xmit_skb(struct vxlan_sock *vs
+       int err;
+       bool udp_sum = !vs->sock->sk->sk_no_check_tx;
+-      skb = udp_tunnel_handle_offloads(skb, udp_sum);
+-      if (IS_ERR(skb))
+-              return PTR_ERR(skb);
+-
+       min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len
+                       + VXLAN_HLEN + sizeof(struct iphdr)
+                       + (vlan_tx_tag_present(skb) ? VLAN_HLEN : 0);
+@@ -1647,6 +1643,10 @@ int vxlan_xmit_skb(struct vxlan_sock *vs
+       if (WARN_ON(!skb))
+               return -ENOMEM;
++      skb = udp_tunnel_handle_offloads(skb, udp_sum);
++      if (IS_ERR(skb))
++              return PTR_ERR(skb);
++
+       vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh));
+       vxh->vx_flags = htonl(VXLAN_FLAGS);
+       vxh->vx_vni = vni;
+--- a/net/ipv4/geneve.c
++++ b/net/ipv4/geneve.c
+@@ -121,10 +121,6 @@ int geneve_xmit_skb(struct geneve_sock *
+       int min_headroom;
+       int err;
+-      skb = udp_tunnel_handle_offloads(skb, !gs->sock->sk->sk_no_check_tx);
+-      if (IS_ERR(skb))
+-              return PTR_ERR(skb);
+-
+       min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len
+                       + GENEVE_BASE_HLEN + opt_len + sizeof(struct iphdr)
+                       + (vlan_tx_tag_present(skb) ? VLAN_HLEN : 0);
+@@ -139,6 +135,10 @@ int geneve_xmit_skb(struct geneve_sock *
+       if (unlikely(!skb))
+               return -ENOMEM;
++      skb = udp_tunnel_handle_offloads(skb, !gs->sock->sk->sk_no_check_tx);
++      if (IS_ERR(skb))
++              return PTR_ERR(skb);
++
+       gnvh = (struct genevehdr *)__skb_push(skb, sizeof(*gnvh) + opt_len);
+       geneve_build_header(gnvh, tun_flags, vni, opt_len, opt);
diff --git a/queue-3.19/usbnet-fix-tx_bytes-statistic-running-backward-in-cdc_ncm.patch b/queue-3.19/usbnet-fix-tx_bytes-statistic-running-backward-in-cdc_ncm.patch
new file mode 100644 (file)
index 0000000..333e6a0
--- /dev/null
@@ -0,0 +1,126 @@
+From foo@baz Tue Apr 21 23:05:14 CEST 2015
+From: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Date: Wed, 25 Mar 2015 21:41:33 +0100
+Subject: usbnet: Fix tx_bytes statistic running backward in cdc_ncm
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Ben Hutchings <ben.hutchings@codethink.co.uk>
+
+[ Upstream commit 7a1e890e2168e33fb62d84528e996b8b4b478fea ]
+
+cdc_ncm disagrees with usbnet about how much framing overhead should
+be counted in the tx_bytes statistics, and tries 'fix' this by
+decrementing tx_bytes on the transmit path.  But statistics must never
+be decremented except due to roll-over; this will thoroughly confuse
+user-space.  Also, tx_bytes is only incremented by usbnet in the
+completion path.
+
+Fix this by requiring drivers that set FLAG_MULTI_FRAME to set a
+tx_bytes delta along with the tx_packets count.
+
+Fixes: beeecd42c3b4 ("net: cdc_ncm/cdc_mbim: adding NCM protocol statistics")
+Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Signed-off-by: Bjørn Mork <bjorn@mork.no>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/usb/asix_common.c |    2 +-
+ drivers/net/usb/cdc_ncm.c     |    7 +++----
+ drivers/net/usb/sr9800.c      |    2 +-
+ drivers/net/usb/usbnet.c      |   16 +++++++++++++---
+ include/linux/usb/usbnet.h    |    6 ++++--
+ 5 files changed, 22 insertions(+), 11 deletions(-)
+
+--- a/drivers/net/usb/asix_common.c
++++ b/drivers/net/usb/asix_common.c
+@@ -189,7 +189,7 @@ struct sk_buff *asix_tx_fixup(struct usb
+               skb_put(skb, sizeof(padbytes));
+       }
+-      usbnet_set_skb_tx_stats(skb, 1);
++      usbnet_set_skb_tx_stats(skb, 1, 0);
+       return skb;
+ }
+--- a/drivers/net/usb/cdc_ncm.c
++++ b/drivers/net/usb/cdc_ncm.c
+@@ -1177,13 +1177,12 @@ cdc_ncm_fill_tx_frame(struct usbnet *dev
+       ctx->tx_overhead += skb_out->len - ctx->tx_curr_frame_payload;
+       ctx->tx_ntbs++;
+-      /* usbnet has already counted all the framing overhead.
++      /* usbnet will count all the framing overhead by default.
+        * Adjust the stats so that the tx_bytes counter show real
+        * payload data instead.
+        */
+-      dev->net->stats.tx_bytes -= skb_out->len - ctx->tx_curr_frame_payload;
+-
+-      usbnet_set_skb_tx_stats(skb_out, n);
++      usbnet_set_skb_tx_stats(skb_out, n,
++                              ctx->tx_curr_frame_payload - skb_out->len);
+       return skb_out;
+--- a/drivers/net/usb/sr9800.c
++++ b/drivers/net/usb/sr9800.c
+@@ -144,7 +144,7 @@ static struct sk_buff *sr_tx_fixup(struc
+               skb_put(skb, sizeof(padbytes));
+       }
+-      usbnet_set_skb_tx_stats(skb, 1);
++      usbnet_set_skb_tx_stats(skb, 1, 0);
+       return skb;
+ }
+--- a/drivers/net/usb/usbnet.c
++++ b/drivers/net/usb/usbnet.c
+@@ -1347,9 +1347,19 @@ netdev_tx_t usbnet_start_xmit (struct sk
+               } else
+                       urb->transfer_flags |= URB_ZERO_PACKET;
+       }
+-      entry->length = urb->transfer_buffer_length = length;
+-      if (!(info->flags & FLAG_MULTI_PACKET))
+-              usbnet_set_skb_tx_stats(skb, 1);
++      urb->transfer_buffer_length = length;
++
++      if (info->flags & FLAG_MULTI_PACKET) {
++              /* Driver has set number of packets and a length delta.
++               * Calculate the complete length and ensure that it's
++               * positive.
++               */
++              entry->length += length;
++              if (WARN_ON_ONCE(entry->length <= 0))
++                      entry->length = length;
++      } else {
++              usbnet_set_skb_tx_stats(skb, 1, length);
++      }
+       spin_lock_irqsave(&dev->txq.lock, flags);
+       retval = usb_autopm_get_interface_async(dev->intf);
+--- a/include/linux/usb/usbnet.h
++++ b/include/linux/usb/usbnet.h
+@@ -227,7 +227,7 @@ struct skb_data {  /* skb->cb is one of t
+       struct urb              *urb;
+       struct usbnet           *dev;
+       enum skb_state          state;
+-      size_t                  length;
++      long                    length;
+       unsigned long           packets;
+ };
+@@ -235,11 +235,13 @@ struct skb_data {        /* skb->cb is one of t
+  * tx_fixup method before returning an skb.
+  */
+ static inline void
+-usbnet_set_skb_tx_stats(struct sk_buff *skb, unsigned long packets)
++usbnet_set_skb_tx_stats(struct sk_buff *skb,
++                      unsigned long packets, long bytes_delta)
+ {
+       struct skb_data *entry = (struct skb_data *) skb->cb;
+       entry->packets = packets;
++      entry->length = bytes_delta;
+ }
+ extern int usbnet_open(struct net_device *net);
diff --git a/queue-3.19/usbnet-fix-tx_packets-stat-for-flag_multi_frame-drivers.patch b/queue-3.19/usbnet-fix-tx_packets-stat-for-flag_multi_frame-drivers.patch
new file mode 100644 (file)
index 0000000..78dca54
--- /dev/null
@@ -0,0 +1,119 @@
+From foo@baz Tue Apr 21 23:05:14 CEST 2015
+From: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Date: Thu, 26 Feb 2015 19:34:37 +0000
+Subject: usbnet: Fix tx_packets stat for FLAG_MULTI_FRAME drivers
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Ben Hutchings <ben.hutchings@codethink.co.uk>
+
+[ Upstream commit 1e9e39f4a29857a396ac7b669d109f697f66695e ]
+
+Currently the usbnet core does not update the tx_packets statistic for
+drivers with FLAG_MULTI_PACKET and there is no hook in the TX
+completion path where they could do this.
+
+cdc_ncm and dependent drivers are bumping tx_packets stat on the
+transmit path while asix and sr9800 aren't updating it at all.
+
+Add a packet count in struct skb_data so these drivers can fill it
+in, initialise it to 1 for other drivers, and add the packet count
+to the tx_packets statistic on completion.
+
+Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Tested-by: Bjørn Mork <bjorn@mork.no>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/usb/asix_common.c |    2 ++
+ drivers/net/usb/cdc_ncm.c     |    3 ++-
+ drivers/net/usb/sr9800.c      |    1 +
+ drivers/net/usb/usbnet.c      |    5 +++--
+ include/linux/usb/usbnet.h    |   12 ++++++++++++
+ 5 files changed, 20 insertions(+), 3 deletions(-)
+
+--- a/drivers/net/usb/asix_common.c
++++ b/drivers/net/usb/asix_common.c
+@@ -188,6 +188,8 @@ struct sk_buff *asix_tx_fixup(struct usb
+               memcpy(skb_tail_pointer(skb), &padbytes, sizeof(padbytes));
+               skb_put(skb, sizeof(padbytes));
+       }
++
++      usbnet_set_skb_tx_stats(skb, 1);
+       return skb;
+ }
+--- a/drivers/net/usb/cdc_ncm.c
++++ b/drivers/net/usb/cdc_ncm.c
+@@ -1172,7 +1172,6 @@ cdc_ncm_fill_tx_frame(struct usbnet *dev
+       /* return skb */
+       ctx->tx_curr_skb = NULL;
+-      dev->net->stats.tx_packets += ctx->tx_curr_frame_num;
+       /* keep private stats: framing overhead and number of NTBs */
+       ctx->tx_overhead += skb_out->len - ctx->tx_curr_frame_payload;
+@@ -1184,6 +1183,8 @@ cdc_ncm_fill_tx_frame(struct usbnet *dev
+        */
+       dev->net->stats.tx_bytes -= skb_out->len - ctx->tx_curr_frame_payload;
++      usbnet_set_skb_tx_stats(skb_out, n);
++
+       return skb_out;
+ exit_no_skb:
+--- a/drivers/net/usb/sr9800.c
++++ b/drivers/net/usb/sr9800.c
+@@ -144,6 +144,7 @@ static struct sk_buff *sr_tx_fixup(struc
+               skb_put(skb, sizeof(padbytes));
+       }
++      usbnet_set_skb_tx_stats(skb, 1);
+       return skb;
+ }
+--- a/drivers/net/usb/usbnet.c
++++ b/drivers/net/usb/usbnet.c
+@@ -1189,8 +1189,7 @@ static void tx_complete (struct urb *urb
+       struct usbnet           *dev = entry->dev;
+       if (urb->status == 0) {
+-              if (!(dev->driver_info->flags & FLAG_MULTI_PACKET))
+-                      dev->net->stats.tx_packets++;
++              dev->net->stats.tx_packets += entry->packets;
+               dev->net->stats.tx_bytes += entry->length;
+       } else {
+               dev->net->stats.tx_errors++;
+@@ -1349,6 +1348,8 @@ netdev_tx_t usbnet_start_xmit (struct sk
+                       urb->transfer_flags |= URB_ZERO_PACKET;
+       }
+       entry->length = urb->transfer_buffer_length = length;
++      if (!(info->flags & FLAG_MULTI_PACKET))
++              usbnet_set_skb_tx_stats(skb, 1);
+       spin_lock_irqsave(&dev->txq.lock, flags);
+       retval = usb_autopm_get_interface_async(dev->intf);
+--- a/include/linux/usb/usbnet.h
++++ b/include/linux/usb/usbnet.h
+@@ -228,8 +228,20 @@ struct skb_data { /* skb->cb is one of t
+       struct usbnet           *dev;
+       enum skb_state          state;
+       size_t                  length;
++      unsigned long           packets;
+ };
++/* Drivers that set FLAG_MULTI_PACKET must call this in their
++ * tx_fixup method before returning an skb.
++ */
++static inline void
++usbnet_set_skb_tx_stats(struct sk_buff *skb, unsigned long packets)
++{
++      struct skb_data *entry = (struct skb_data *) skb->cb;
++
++      entry->packets = packets;
++}
++
+ extern int usbnet_open(struct net_device *net);
+ extern int usbnet_stop(struct net_device *net);
+ extern netdev_tx_t usbnet_start_xmit(struct sk_buff *skb,
diff --git a/queue-3.19/xen-netfront-transmit-fully-gso-sized-packets.patch b/queue-3.19/xen-netfront-transmit-fully-gso-sized-packets.patch
new file mode 100644 (file)
index 0000000..722481c
--- /dev/null
@@ -0,0 +1,65 @@
+From foo@baz Tue Apr 21 23:05:14 CEST 2015
+From: Jonathan Davies <jonathan.davies@citrix.com>
+Date: Tue, 31 Mar 2015 11:05:15 +0100
+Subject: xen-netfront: transmit fully GSO-sized packets
+
+From: Jonathan Davies <jonathan.davies@citrix.com>
+
+[ Upstream commit 0c36820e2ab7d943ab1188230fdf2149826d33c0 ]
+
+xen-netfront limits transmitted skbs to be at most 44 segments in size. However,
+GSO permits up to 65536 bytes, which means a maximum of 45 segments of 1448
+bytes each. This slight reduction in the size of packets means a slight loss in
+efficiency.
+
+Since c/s 9ecd1a75d, xen-netfront sets gso_max_size to
+    XEN_NETIF_MAX_TX_SIZE - MAX_TCP_HEADER,
+where XEN_NETIF_MAX_TX_SIZE is 65535 bytes.
+
+The calculation used by tcp_tso_autosize (and also tcp_xmit_size_goal since c/s
+6c09fa09d) in determining when to split an skb into two is
+    sk->sk_gso_max_size - 1 - MAX_TCP_HEADER.
+
+So the maximum permitted size of an skb is calculated to be
+    (XEN_NETIF_MAX_TX_SIZE - MAX_TCP_HEADER) - 1 - MAX_TCP_HEADER.
+
+Intuitively, this looks like the wrong formula -- we don't need two TCP headers.
+Instead, there is no need to deviate from the default gso_max_size of 65536 as
+this already accommodates the size of the header.
+
+Currently, the largest skb transmitted by netfront is 63712 bytes (44 segments
+of 1448 bytes each), as observed via tcpdump. This patch makes netfront send
+skbs of up to 65160 bytes (45 segments of 1448 bytes each).
+
+Similarly, the maximum allowable mtu does not need to subtract MAX_TCP_HEADER as
+it relates to the size of the whole packet, including the header.
+
+Fixes: 9ecd1a75d977 ("xen-netfront: reduce gso_max_size to account for max TCP header")
+Signed-off-by: Jonathan Davies <jonathan.davies@citrix.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/xen-netfront.c |    5 +----
+ 1 file changed, 1 insertion(+), 4 deletions(-)
+
+--- a/drivers/net/xen-netfront.c
++++ b/drivers/net/xen-netfront.c
+@@ -1062,8 +1062,7 @@ err:
+ static int xennet_change_mtu(struct net_device *dev, int mtu)
+ {
+-      int max = xennet_can_sg(dev) ?
+-              XEN_NETIF_MAX_TX_SIZE - MAX_TCP_HEADER : ETH_DATA_LEN;
++      int max = xennet_can_sg(dev) ? XEN_NETIF_MAX_TX_SIZE : ETH_DATA_LEN;
+       if (mtu > max)
+               return -EINVAL;
+@@ -1333,8 +1332,6 @@ static struct net_device *xennet_create_
+       netdev->ethtool_ops = &xennet_ethtool_ops;
+       SET_NETDEV_DEV(netdev, &dev->dev);
+-      netif_set_gso_max_size(netdev, XEN_NETIF_MAX_TX_SIZE - MAX_TCP_HEADER);
+-
+       np->netdev = netdev;
+       netif_carrier_off(netdev);