From: Greg Kroah-Hartman Date: Tue, 21 Apr 2015 21:16:16 +0000 (+0200) Subject: 3.19-stable patches X-Git-Tag: v4.0.1~27 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=55aec2a561316d35d2aa2d29fcee0cfefc0b8b86;p=thirdparty%2Fkernel%2Fstable-queue.git 3.19-stable patches added patches: bnx2x-fix-busy_poll-vs-netpoll.patch bonding-bonding-overriding-configuration-logic-restored.patch bpf-fix-verifier-memory-corruption.patch ipv6-don-t-reduce-hop-limit-for-an-interface.patch ipv6-protect-skb-sk-accesses-from-recursive-dereference-inside-the-stack.patch net-mlx4_core-fix-error-message-deprecation-for-connectx-2-cards.patch net-mlx4_en-call-register_netdevice-in-the-proper-location.patch net-tcp6-fix-double-call-of-tcp_v6_fill_cb.patch openvswitch-return-vport-module-ref-before-destruction.patch revert-net-reset-secmark-when-scrubbing-packet.patch rocker-handle-non-bridge-master-change.patch skbuff-do-not-scrub-skb-mark-within-the-same-name-space.patch tcp-fix-frto-undo-on-cumulative-ack-of-sacked-range.patch tcp-prevent-fetching-dst-twice-in-early-demux-code.patch tcp-tcp_make_synack-should-clear-skb-tstamp.patch tg3-hold-tp-lock-before-calling-tg3_halt-from-tg3_init_one.patch tun-return-proper-error-code-from-tun_do_read.patch udptunnels-call-handle_offloads-after-inserting-vlan-tag.patch usbnet-fix-tx_bytes-statistic-running-backward-in-cdc_ncm.patch usbnet-fix-tx_packets-stat-for-flag_multi_frame-drivers.patch xen-netfront-transmit-fully-gso-sized-packets.patch --- diff --git a/queue-3.19/bnx2x-fix-busy_poll-vs-netpoll.patch b/queue-3.19/bnx2x-fix-busy_poll-vs-netpoll.patch new file mode 100644 index 00000000000..ad8d950eaf2 --- /dev/null +++ b/queue-3.19/bnx2x-fix-busy_poll-vs-netpoll.patch @@ -0,0 +1,265 @@ +From foo@baz Tue Apr 21 23:05:14 CEST 2015 +From: Eric Dumazet +Date: Tue, 14 Apr 2015 18:45:00 -0700 +Subject: bnx2x: Fix busy_poll vs netpoll + +From: Eric Dumazet + +[ Upstream commit 074975d0374333f656c48487aa046a21a9b9d7a1 ] + +Commit 9a2620c877454 ("bnx2x: prevent WARN during driver unload") +switched the napi/busy_lock locking mechanism from spin_lock() into +spin_lock_bh(), breaking inter-operability with netconsole, as netpoll +disables interrupts prior to calling our napi mechanism. + +This switches the driver into using atomic assignments instead of the +spinlock mechanisms previously employed. + +Based on initial patch from Yuval Mintz & Ariel Elior + +I basically added softirq starvation avoidance, and mixture +of atomic operations, plain writes and barriers. + +Note this slightly reduces the overhead for this driver when no +busy_poll sockets are in use. + +Fixes: 9a2620c877454 ("bnx2x: prevent WARN during driver unload") +Signed-off-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/broadcom/bnx2x/bnx2x.h | 135 ++++++++---------------- + drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c | 9 - + 2 files changed, 55 insertions(+), 89 deletions(-) + +--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h ++++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h +@@ -531,20 +531,8 @@ struct bnx2x_fastpath { + struct napi_struct napi; + + #ifdef CONFIG_NET_RX_BUSY_POLL +- unsigned int state; +-#define BNX2X_FP_STATE_IDLE 0 +-#define BNX2X_FP_STATE_NAPI (1 << 0) /* NAPI owns this FP */ +-#define BNX2X_FP_STATE_POLL (1 << 1) /* poll owns this FP */ +-#define BNX2X_FP_STATE_DISABLED (1 << 2) +-#define BNX2X_FP_STATE_NAPI_YIELD (1 << 3) /* NAPI yielded this FP */ +-#define BNX2X_FP_STATE_POLL_YIELD (1 << 4) /* poll yielded this FP */ +-#define BNX2X_FP_OWNED (BNX2X_FP_STATE_NAPI | BNX2X_FP_STATE_POLL) +-#define BNX2X_FP_YIELD (BNX2X_FP_STATE_NAPI_YIELD | BNX2X_FP_STATE_POLL_YIELD) +-#define BNX2X_FP_LOCKED (BNX2X_FP_OWNED | BNX2X_FP_STATE_DISABLED) +-#define BNX2X_FP_USER_PEND (BNX2X_FP_STATE_POLL | BNX2X_FP_STATE_POLL_YIELD) +- /* protect state */ +- spinlock_t lock; +-#endif /* CONFIG_NET_RX_BUSY_POLL */ ++ unsigned long busy_poll_state; ++#endif + + union host_hc_status_block status_blk; + /* chip independent shortcuts into sb structure */ +@@ -619,104 +607,83 @@ struct bnx2x_fastpath { + #define bnx2x_fp_qstats(bp, fp) (&((bp)->fp_stats[(fp)->index].eth_q_stats)) + + #ifdef CONFIG_NET_RX_BUSY_POLL +-static inline void bnx2x_fp_init_lock(struct bnx2x_fastpath *fp) ++ ++enum bnx2x_fp_state { ++ BNX2X_STATE_FP_NAPI = BIT(0), /* NAPI handler owns the queue */ ++ ++ BNX2X_STATE_FP_NAPI_REQ_BIT = 1, /* NAPI would like to own the queue */ ++ BNX2X_STATE_FP_NAPI_REQ = BIT(1), ++ ++ BNX2X_STATE_FP_POLL_BIT = 2, ++ BNX2X_STATE_FP_POLL = BIT(2), /* busy_poll owns the queue */ ++ ++ BNX2X_STATE_FP_DISABLE_BIT = 3, /* queue is dismantled */ ++}; ++ ++static inline void bnx2x_fp_busy_poll_init(struct bnx2x_fastpath *fp) + { +- spin_lock_init(&fp->lock); +- fp->state = BNX2X_FP_STATE_IDLE; ++ WRITE_ONCE(fp->busy_poll_state, 0); + } + + /* called from the device poll routine to get ownership of a FP */ + static inline bool bnx2x_fp_lock_napi(struct bnx2x_fastpath *fp) + { +- bool rc = true; ++ unsigned long prev, old = READ_ONCE(fp->busy_poll_state); + +- spin_lock_bh(&fp->lock); +- if (fp->state & BNX2X_FP_LOCKED) { +- WARN_ON(fp->state & BNX2X_FP_STATE_NAPI); +- fp->state |= BNX2X_FP_STATE_NAPI_YIELD; +- rc = false; +- } else { +- /* we don't care if someone yielded */ +- fp->state = BNX2X_FP_STATE_NAPI; ++ while (1) { ++ switch (old) { ++ case BNX2X_STATE_FP_POLL: ++ /* make sure bnx2x_fp_lock_poll() wont starve us */ ++ set_bit(BNX2X_STATE_FP_NAPI_REQ_BIT, ++ &fp->busy_poll_state); ++ /* fallthrough */ ++ case BNX2X_STATE_FP_POLL | BNX2X_STATE_FP_NAPI_REQ: ++ return false; ++ default: ++ break; ++ } ++ prev = cmpxchg(&fp->busy_poll_state, old, BNX2X_STATE_FP_NAPI); ++ if (unlikely(prev != old)) { ++ old = prev; ++ continue; ++ } ++ return true; + } +- spin_unlock_bh(&fp->lock); +- return rc; + } + +-/* returns true is someone tried to get the FP while napi had it */ +-static inline bool bnx2x_fp_unlock_napi(struct bnx2x_fastpath *fp) ++static inline void bnx2x_fp_unlock_napi(struct bnx2x_fastpath *fp) + { +- bool rc = false; +- +- spin_lock_bh(&fp->lock); +- WARN_ON(fp->state & +- (BNX2X_FP_STATE_POLL | BNX2X_FP_STATE_NAPI_YIELD)); +- +- if (fp->state & BNX2X_FP_STATE_POLL_YIELD) +- rc = true; +- +- /* state ==> idle, unless currently disabled */ +- fp->state &= BNX2X_FP_STATE_DISABLED; +- spin_unlock_bh(&fp->lock); +- return rc; ++ smp_wmb(); ++ fp->busy_poll_state = 0; + } + + /* called from bnx2x_low_latency_poll() */ + static inline bool bnx2x_fp_lock_poll(struct bnx2x_fastpath *fp) + { +- bool rc = true; +- +- spin_lock_bh(&fp->lock); +- if ((fp->state & BNX2X_FP_LOCKED)) { +- fp->state |= BNX2X_FP_STATE_POLL_YIELD; +- rc = false; +- } else { +- /* preserve yield marks */ +- fp->state |= BNX2X_FP_STATE_POLL; +- } +- spin_unlock_bh(&fp->lock); +- return rc; ++ return cmpxchg(&fp->busy_poll_state, 0, BNX2X_STATE_FP_POLL) == 0; + } + +-/* returns true if someone tried to get the FP while it was locked */ +-static inline bool bnx2x_fp_unlock_poll(struct bnx2x_fastpath *fp) ++static inline void bnx2x_fp_unlock_poll(struct bnx2x_fastpath *fp) + { +- bool rc = false; +- +- spin_lock_bh(&fp->lock); +- WARN_ON(fp->state & BNX2X_FP_STATE_NAPI); +- +- if (fp->state & BNX2X_FP_STATE_POLL_YIELD) +- rc = true; +- +- /* state ==> idle, unless currently disabled */ +- fp->state &= BNX2X_FP_STATE_DISABLED; +- spin_unlock_bh(&fp->lock); +- return rc; ++ smp_mb__before_atomic(); ++ clear_bit(BNX2X_STATE_FP_POLL_BIT, &fp->busy_poll_state); + } + +-/* true if a socket is polling, even if it did not get the lock */ ++/* true if a socket is polling */ + static inline bool bnx2x_fp_ll_polling(struct bnx2x_fastpath *fp) + { +- WARN_ON(!(fp->state & BNX2X_FP_OWNED)); +- return fp->state & BNX2X_FP_USER_PEND; ++ return READ_ONCE(fp->busy_poll_state) & BNX2X_STATE_FP_POLL; + } + + /* false if fp is currently owned */ + static inline bool bnx2x_fp_ll_disable(struct bnx2x_fastpath *fp) + { +- int rc = true; +- +- spin_lock_bh(&fp->lock); +- if (fp->state & BNX2X_FP_OWNED) +- rc = false; +- fp->state |= BNX2X_FP_STATE_DISABLED; +- spin_unlock_bh(&fp->lock); ++ set_bit(BNX2X_STATE_FP_DISABLE_BIT, &fp->busy_poll_state); ++ return !bnx2x_fp_ll_polling(fp); + +- return rc; + } + #else +-static inline void bnx2x_fp_init_lock(struct bnx2x_fastpath *fp) ++static inline void bnx2x_fp_busy_poll_init(struct bnx2x_fastpath *fp) + { + } + +@@ -725,9 +692,8 @@ static inline bool bnx2x_fp_lock_napi(st + return true; + } + +-static inline bool bnx2x_fp_unlock_napi(struct bnx2x_fastpath *fp) ++static inline void bnx2x_fp_unlock_napi(struct bnx2x_fastpath *fp) + { +- return false; + } + + static inline bool bnx2x_fp_lock_poll(struct bnx2x_fastpath *fp) +@@ -735,9 +701,8 @@ static inline bool bnx2x_fp_lock_poll(st + return false; + } + +-static inline bool bnx2x_fp_unlock_poll(struct bnx2x_fastpath *fp) ++static inline void bnx2x_fp_unlock_poll(struct bnx2x_fastpath *fp) + { +- return false; + } + + static inline bool bnx2x_fp_ll_polling(struct bnx2x_fastpath *fp) +--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c ++++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c +@@ -1849,7 +1849,7 @@ static void bnx2x_napi_enable_cnic(struc + int i; + + for_each_rx_queue_cnic(bp, i) { +- bnx2x_fp_init_lock(&bp->fp[i]); ++ bnx2x_fp_busy_poll_init(&bp->fp[i]); + napi_enable(&bnx2x_fp(bp, i, napi)); + } + } +@@ -1859,7 +1859,7 @@ static void bnx2x_napi_enable(struct bnx + int i; + + for_each_eth_queue(bp, i) { +- bnx2x_fp_init_lock(&bp->fp[i]); ++ bnx2x_fp_busy_poll_init(&bp->fp[i]); + napi_enable(&bnx2x_fp(bp, i, napi)); + } + } +@@ -3191,9 +3191,10 @@ static int bnx2x_poll(struct napi_struct + } + } + ++ bnx2x_fp_unlock_napi(fp); ++ + /* Fall out from the NAPI loop if needed */ +- if (!bnx2x_fp_unlock_napi(fp) && +- !(bnx2x_has_rx_work(fp) || bnx2x_has_tx_work(fp))) { ++ if (!(bnx2x_has_rx_work(fp) || bnx2x_has_tx_work(fp))) { + + /* No need to update SB for FCoE L2 ring as long as + * it's connected to the default SB and the SB diff --git a/queue-3.19/bonding-bonding-overriding-configuration-logic-restored.patch b/queue-3.19/bonding-bonding-overriding-configuration-logic-restored.patch new file mode 100644 index 00000000000..57cf9aa7d9a --- /dev/null +++ b/queue-3.19/bonding-bonding-overriding-configuration-logic-restored.patch @@ -0,0 +1,40 @@ +From foo@baz Tue Apr 21 23:05:14 CEST 2015 +From: Anton Nayshtut +Date: Sun, 29 Mar 2015 14:20:25 +0300 +Subject: bonding: Bonding Overriding Configuration logic restored. + +From: Anton Nayshtut + +[ Upstream commit f5e2dc5d7fe78fe4d8748d217338f4f7b6a5d7ea ] + +Before commit 3900f29021f0bc7fe9815aa32f1a993b7dfdd402 ("bonding: slight +optimizztion for bond_slave_override()") the override logic was to send packets +with non-zero queue_id through the slave with corresponding queue_id, under two +conditions only - if the slave can transmit and it's up. + +The above mentioned commit changed this logic by introducing an additional +condition - whether the bond is active (indirectly, using the slave_can_tx and +later - bond_is_active_slave), that prevents the user from implementing more +complex policies according to the Documentation/networking/bonding.txt. + +Signed-off-by: Anton Nayshtut +Signed-off-by: Alexey Bogoslavsky +Signed-off-by: Andy Gospodarek +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/bonding/bond_main.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/drivers/net/bonding/bond_main.c ++++ b/drivers/net/bonding/bond_main.c +@@ -3797,7 +3797,8 @@ static inline int bond_slave_override(st + /* Find out if any slaves have the same mapping as this skb. */ + bond_for_each_slave_rcu(bond, slave, iter) { + if (slave->queue_id == skb->queue_mapping) { +- if (bond_slave_can_tx(slave)) { ++ if (bond_slave_is_up(slave) && ++ slave->link == BOND_LINK_UP) { + bond_dev_queue_xmit(bond, skb, slave->dev); + return 0; + } diff --git a/queue-3.19/bpf-fix-verifier-memory-corruption.patch b/queue-3.19/bpf-fix-verifier-memory-corruption.patch new file mode 100644 index 00000000000..c6a859113e4 --- /dev/null +++ b/queue-3.19/bpf-fix-verifier-memory-corruption.patch @@ -0,0 +1,43 @@ +From foo@baz Tue Apr 21 23:05:14 CEST 2015 +From: Alexei Starovoitov +Date: Tue, 14 Apr 2015 15:57:13 -0700 +Subject: bpf: fix verifier memory corruption + +From: Alexei Starovoitov + +[ Upstream commit c3de6317d748e23b9e46ba36e10483728d00d144 ] + +Due to missing bounds check the DAG pass of the BPF verifier can corrupt +the memory which can cause random crashes during program loading: + +[8.449451] BUG: unable to handle kernel paging request at ffffffffffffffff +[8.451293] IP: [] kmem_cache_alloc_trace+0x8d/0x2f0 +[8.452329] Oops: 0000 [#1] SMP +[8.452329] Call Trace: +[8.452329] [] bpf_check+0x852/0x2000 +[8.452329] [] bpf_prog_load+0x1e4/0x310 +[8.452329] [] ? might_fault+0x5f/0xb0 +[8.452329] [] SyS_bpf+0x806/0xa30 + +Fixes: f1bca824dabb ("bpf: add search pruning optimization to verifier") +Signed-off-by: Alexei Starovoitov +Acked-by: Hannes Frederic Sowa +Acked-by: Daniel Borkmann +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + kernel/bpf/verifier.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/kernel/bpf/verifier.c ++++ b/kernel/bpf/verifier.c +@@ -1380,7 +1380,8 @@ peek_stack: + /* tell verifier to check for equivalent states + * after every call and jump + */ +- env->explored_states[t + 1] = STATE_LIST_MARK; ++ if (t + 1 < insn_cnt) ++ env->explored_states[t + 1] = STATE_LIST_MARK; + } else { + /* conditional jump with two edges */ + ret = push_insn(t, t + 1, FALLTHROUGH, env); diff --git a/queue-3.19/ipv6-don-t-reduce-hop-limit-for-an-interface.patch b/queue-3.19/ipv6-don-t-reduce-hop-limit-for-an-interface.patch new file mode 100644 index 00000000000..9f55ce4a2e1 --- /dev/null +++ b/queue-3.19/ipv6-don-t-reduce-hop-limit-for-an-interface.patch @@ -0,0 +1,48 @@ +From foo@baz Tue Apr 21 23:05:14 CEST 2015 +From: "D.S. Ljungmark" +Date: Wed, 25 Mar 2015 09:28:15 +0100 +Subject: ipv6: Don't reduce hop limit for an interface + +From: "D.S. Ljungmark" + +[ Upstream commit 6fd99094de2b83d1d4c8457f2c83483b2828e75a ] + +A local route may have a lower hop_limit set than global routes do. + +RFC 3756, Section 4.2.7, "Parameter Spoofing" + +> 1. The attacker includes a Current Hop Limit of one or another small +> number which the attacker knows will cause legitimate packets to +> be dropped before they reach their destination. + +> As an example, one possible approach to mitigate this threat is to +> ignore very small hop limits. The nodes could implement a +> configurable minimum hop limit, and ignore attempts to set it below +> said limit. + +Signed-off-by: D.S. Ljungmark +Acked-by: Hannes Frederic Sowa +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/ndisc.c | 9 ++++++++- + 1 file changed, 8 insertions(+), 1 deletion(-) + +--- a/net/ipv6/ndisc.c ++++ b/net/ipv6/ndisc.c +@@ -1216,7 +1216,14 @@ static void ndisc_router_discovery(struc + if (rt) + rt6_set_expires(rt, jiffies + (HZ * lifetime)); + if (ra_msg->icmph.icmp6_hop_limit) { +- in6_dev->cnf.hop_limit = ra_msg->icmph.icmp6_hop_limit; ++ /* Only set hop_limit on the interface if it is higher than ++ * the current hop_limit. ++ */ ++ if (in6_dev->cnf.hop_limit < ra_msg->icmph.icmp6_hop_limit) { ++ in6_dev->cnf.hop_limit = ra_msg->icmph.icmp6_hop_limit; ++ } else { ++ ND_PRINTK(2, warn, "RA: Got route advertisement with lower hop_limit than current\n"); ++ } + if (rt) + dst_metric_set(&rt->dst, RTAX_HOPLIMIT, + ra_msg->icmph.icmp6_hop_limit); diff --git a/queue-3.19/ipv6-protect-skb-sk-accesses-from-recursive-dereference-inside-the-stack.patch b/queue-3.19/ipv6-protect-skb-sk-accesses-from-recursive-dereference-inside-the-stack.patch new file mode 100644 index 00000000000..0d1095f959e --- /dev/null +++ b/queue-3.19/ipv6-protect-skb-sk-accesses-from-recursive-dereference-inside-the-stack.patch @@ -0,0 +1,160 @@ +From foo@baz Tue Apr 21 23:05:14 CEST 2015 +From: "hannes@stressinduktion.org" +Date: Wed, 1 Apr 2015 17:07:44 +0200 +Subject: ipv6: protect skb->sk accesses from recursive dereference inside the stack + +From: "hannes@stressinduktion.org" + +[ Upstream commit f60e5990d9c1424af9dbca60a23ba2a1c7c1ce90 ] + +We should not consult skb->sk for output decisions in xmit recursion +levels > 0 in the stack. Otherwise local socket settings could influence +the result of e.g. tunnel encapsulation process. + +ipv6 does not conform with this in three places: + +1) ip6_fragment: we do consult ipv6_npinfo for frag_size + +2) sk_mc_loop in ipv6 uses skb->sk and checks if we should + loop the packet back to the local socket + +3) ip6_skb_dst_mtu could query the settings from the user socket and + force a wrong MTU + +Furthermore: +In sk_mc_loop we could potentially land in WARN_ON(1) if we use a +PF_PACKET socket ontop of an IPv6-backed vxlan device. + +Reuse xmit_recursion as we are currently only interested in protecting +tunnel devices. + +Cc: Jiri Pirko +Signed-off-by: Hannes Frederic Sowa +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/netdevice.h | 6 ++++++ + include/net/ip.h | 16 ---------------- + include/net/ip6_route.h | 3 ++- + include/net/sock.h | 2 ++ + net/core/dev.c | 4 +++- + net/core/sock.c | 19 +++++++++++++++++++ + net/ipv6/ip6_output.c | 3 ++- + 7 files changed, 34 insertions(+), 19 deletions(-) + +--- a/include/linux/netdevice.h ++++ b/include/linux/netdevice.h +@@ -2159,6 +2159,12 @@ void netdev_freemem(struct net_device *d + void synchronize_net(void); + int init_dummy_netdev(struct net_device *dev); + ++DECLARE_PER_CPU(int, xmit_recursion); ++static inline int dev_recursion_level(void) ++{ ++ return this_cpu_read(xmit_recursion); ++} ++ + struct net_device *dev_get_by_index(struct net *net, int ifindex); + struct net_device *__dev_get_by_index(struct net *net, int ifindex); + struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex); +--- a/include/net/ip.h ++++ b/include/net/ip.h +@@ -453,22 +453,6 @@ static __inline__ void inet_reset_saddr( + + #endif + +-static inline int sk_mc_loop(struct sock *sk) +-{ +- if (!sk) +- return 1; +- switch (sk->sk_family) { +- case AF_INET: +- return inet_sk(sk)->mc_loop; +-#if IS_ENABLED(CONFIG_IPV6) +- case AF_INET6: +- return inet6_sk(sk)->mc_loop; +-#endif +- } +- WARN_ON(1); +- return 1; +-} +- + bool ip_call_ra_chain(struct sk_buff *skb); + + /* +--- a/include/net/ip6_route.h ++++ b/include/net/ip6_route.h +@@ -174,7 +174,8 @@ int ip6_fragment(struct sk_buff *skb, in + + static inline int ip6_skb_dst_mtu(struct sk_buff *skb) + { +- struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL; ++ struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ? ++ inet6_sk(skb->sk) : NULL; + + return (np && np->pmtudisc >= IPV6_PMTUDISC_PROBE) ? + skb_dst(skb)->dev->mtu : dst_mtu(skb_dst(skb)); +--- a/include/net/sock.h ++++ b/include/net/sock.h +@@ -1812,6 +1812,8 @@ struct dst_entry *__sk_dst_check(struct + + struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie); + ++bool sk_mc_loop(struct sock *sk); ++ + static inline bool sk_can_gso(const struct sock *sk) + { + return net_gso_ok(sk->sk_route_caps, sk->sk_gso_type); +--- a/net/core/dev.c ++++ b/net/core/dev.c +@@ -2821,7 +2821,9 @@ static void skb_update_prio(struct sk_bu + #define skb_update_prio(skb) + #endif + +-static DEFINE_PER_CPU(int, xmit_recursion); ++DEFINE_PER_CPU(int, xmit_recursion); ++EXPORT_SYMBOL(xmit_recursion); ++ + #define RECURSION_LIMIT 10 + + /** +--- a/net/core/sock.c ++++ b/net/core/sock.c +@@ -651,6 +651,25 @@ static inline void sock_valbool_flag(str + sock_reset_flag(sk, bit); + } + ++bool sk_mc_loop(struct sock *sk) ++{ ++ if (dev_recursion_level()) ++ return false; ++ if (!sk) ++ return true; ++ switch (sk->sk_family) { ++ case AF_INET: ++ return inet_sk(sk)->mc_loop; ++#if IS_ENABLED(CONFIG_IPV6) ++ case AF_INET6: ++ return inet6_sk(sk)->mc_loop; ++#endif ++ } ++ WARN_ON(1); ++ return true; ++} ++EXPORT_SYMBOL(sk_mc_loop); ++ + /* + * This is meant for all protocols to use and covers goings on + * at the socket level. Everything here is generic. +--- a/net/ipv6/ip6_output.c ++++ b/net/ipv6/ip6_output.c +@@ -541,7 +541,8 @@ int ip6_fragment(struct sk_buff *skb, in + { + struct sk_buff *frag; + struct rt6_info *rt = (struct rt6_info *)skb_dst(skb); +- struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL; ++ struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ? ++ inet6_sk(skb->sk) : NULL; + struct ipv6hdr *tmp_hdr; + struct frag_hdr *fh; + unsigned int mtu, hlen, left, len; diff --git a/queue-3.19/net-mlx4_core-fix-error-message-deprecation-for-connectx-2-cards.patch b/queue-3.19/net-mlx4_core-fix-error-message-deprecation-for-connectx-2-cards.patch new file mode 100644 index 00000000000..9bb479a8e33 --- /dev/null +++ b/queue-3.19/net-mlx4_core-fix-error-message-deprecation-for-connectx-2-cards.patch @@ -0,0 +1,34 @@ +From foo@baz Tue Apr 21 23:05:14 CEST 2015 +From: Jack Morgenstein +Date: Sun, 5 Apr 2015 17:50:48 +0300 +Subject: net/mlx4_core: Fix error message deprecation for ConnectX-2 cards + +From: Jack Morgenstein + +[ Upstream commit fde913e25496761a4e2a4c81230c913aba6289a2 ] + +Commit 1daa4303b4ca ("net/mlx4_core: Deprecate error message at +ConnectX-2 cards startup to debug") did the deprecation only for port 1 +of the card. Need to deprecate for port 2 as well. + +Fixes: 1daa4303b4ca ("net/mlx4_core: Deprecate error message at ConnectX-2 cards startup to debug") +Signed-off-by: Jack Morgenstein +Signed-off-by: Amir Vadai +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx4/cmd.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/drivers/net/ethernet/mellanox/mlx4/cmd.c ++++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c +@@ -585,7 +585,8 @@ static int mlx4_cmd_wait(struct mlx4_dev + * on the host, we deprecate the error message for this + * specific command/input_mod/opcode_mod/fw-status to be debug. + */ +- if (op == MLX4_CMD_SET_PORT && in_modifier == 1 && ++ if (op == MLX4_CMD_SET_PORT && ++ (in_modifier == 1 || in_modifier == 2) && + op_modifier == 0 && context->fw_status == CMD_STAT_BAD_SIZE) + mlx4_dbg(dev, "command 0x%x failed: fw status = 0x%x\n", + op, context->fw_status); diff --git a/queue-3.19/net-mlx4_en-call-register_netdevice-in-the-proper-location.patch b/queue-3.19/net-mlx4_en-call-register_netdevice-in-the-proper-location.patch new file mode 100644 index 00000000000..2625ae62f34 --- /dev/null +++ b/queue-3.19/net-mlx4_en-call-register_netdevice-in-the-proper-location.patch @@ -0,0 +1,60 @@ +From foo@baz Tue Apr 21 23:05:14 CEST 2015 +From: Ido Shamay +Date: Tue, 24 Mar 2015 15:18:38 +0200 +Subject: net/mlx4_en: Call register_netdevice in the proper location + +From: Ido Shamay + +[ Upstream commit e5eda89d97ec256ba14e7e861387cc0468259c18 ] + +Netdevice registration should be performed a the end of the driver +initialization flow. If we don't do that, after calling register_netdevice, +device callbacks may be issued by higher layers of the stack before +final configuration of the device is done. + +For example (VXLAN configuration race), mlx4_SET_PORT_VXLAN was issued +after the register_netdev command. System network scripts may configure +the interface (UP) right after the registration, which also attach +unicast VXLAN steering rule, before mlx4_SET_PORT_VXLAN was called, +causing the firmware to fail the rule attachment. + +Fixes: 837052d0ccc5 ("net/mlx4_en: Add netdev support for TCP/IP offloads of vxlan tunneling") +Signed-off-by: Ido Shamay +Signed-off-by: Or Gerlitz +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 15 ++++++++------- + 1 file changed, 8 insertions(+), 7 deletions(-) + +--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c ++++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +@@ -2627,13 +2627,6 @@ int mlx4_en_init_netdev(struct mlx4_en_d + netif_carrier_off(dev); + mlx4_en_set_default_moderation(priv); + +- err = register_netdev(dev); +- if (err) { +- en_err(priv, "Netdev registration failed for port %d\n", port); +- goto out; +- } +- priv->registered = 1; +- + en_warn(priv, "Using %d TX rings\n", prof->tx_ring_num); + en_warn(priv, "Using %d RX rings\n", prof->rx_ring_num); + +@@ -2673,6 +2666,14 @@ int mlx4_en_init_netdev(struct mlx4_en_d + queue_delayed_work(mdev->workqueue, &priv->service_task, + SERVICE_TASK_DELAY); + ++ err = register_netdev(dev); ++ if (err) { ++ en_err(priv, "Netdev registration failed for port %d\n", port); ++ goto out; ++ } ++ ++ priv->registered = 1; ++ + return 0; + + out: diff --git a/queue-3.19/net-tcp6-fix-double-call-of-tcp_v6_fill_cb.patch b/queue-3.19/net-tcp6-fix-double-call-of-tcp_v6_fill_cb.patch new file mode 100644 index 00000000000..03f7d25ddbd --- /dev/null +++ b/queue-3.19/net-tcp6-fix-double-call-of-tcp_v6_fill_cb.patch @@ -0,0 +1,64 @@ +From foo@baz Tue Apr 21 23:05:14 CEST 2015 +From: Alexey Kodanev +Date: Fri, 27 Mar 2015 12:24:22 +0300 +Subject: net: tcp6: fix double call of tcp_v6_fill_cb() + +From: Alexey Kodanev + +[ Upstream commit 4ad19de8774e2a7b075b3e8ea48db85adcf33fa6 ] + +tcp_v6_fill_cb() will be called twice if socket's state changes from +TCP_TIME_WAIT to TCP_LISTEN. That can result in control buffer data +corruption because in the second tcp_v6_fill_cb() call it's not copying +IP6CB(skb) anymore, but 'seq', 'end_seq', etc., so we can get weird and +unpredictable results. Performance loss of up to 1200% has been observed +in LTP/vxlan03 test. + +This can be fixed by copying inet6_skb_parm to the beginning of 'cb' +only if xfrm6_policy_check() and tcp_v6_fill_cb() are going to be +called again. + +Fixes: 2dc49d1680b53 ("tcp6: don't move IP6CB before xfrm6_policy_check()") + +Signed-off-by: Alexey Kodanev +Acked-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/tcp_ipv6.c | 11 +++++++++++ + 1 file changed, 11 insertions(+) + +--- a/net/ipv6/tcp_ipv6.c ++++ b/net/ipv6/tcp_ipv6.c +@@ -1409,6 +1409,15 @@ static void tcp_v6_fill_cb(struct sk_buf + TCP_SKB_CB(skb)->sacked = 0; + } + ++static void tcp_v6_restore_cb(struct sk_buff *skb) ++{ ++ /* We need to move header back to the beginning if xfrm6_policy_check() ++ * and tcp_v6_fill_cb() are going to be called again. ++ */ ++ memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6, ++ sizeof(struct inet6_skb_parm)); ++} ++ + static int tcp_v6_rcv(struct sk_buff *skb) + { + const struct tcphdr *th; +@@ -1541,6 +1550,7 @@ do_time_wait: + inet_twsk_deschedule(tw, &tcp_death_row); + inet_twsk_put(tw); + sk = sk2; ++ tcp_v6_restore_cb(skb); + goto process; + } + /* Fall through to ACK */ +@@ -1549,6 +1559,7 @@ do_time_wait: + tcp_v6_timewait_ack(sk, skb); + break; + case TCP_TW_RST: ++ tcp_v6_restore_cb(skb); + goto no_tcp_socket; + case TCP_TW_SUCCESS: + ; diff --git a/queue-3.19/openvswitch-return-vport-module-ref-before-destruction.patch b/queue-3.19/openvswitch-return-vport-module-ref-before-destruction.patch new file mode 100644 index 00000000000..cb60585494e --- /dev/null +++ b/queue-3.19/openvswitch-return-vport-module-ref-before-destruction.patch @@ -0,0 +1,42 @@ +From foo@baz Tue Apr 21 23:05:14 CEST 2015 +From: Thomas Graf +Date: Mon, 30 Mar 2015 13:57:41 +0200 +Subject: openvswitch: Return vport module ref before destruction + +From: Thomas Graf + +[ Upstream commit fa2d8ff4e3522b4e05f590575d3eb8087f3a8cdc ] + +Return module reference before invoking the respective vport +->destroy() function. This is needed as ovs_vport_del() is not +invoked inside an RCU read side critical section so the kfree +can occur immediately before returning to ovs_vport_del(). + +Returning the module reference before ->destroy() is safe because +the module unregistration is blocked on ovs_lock which we hold +while destroying the datapath. + +Fixes: 62b9c8d0372d ("ovs: Turn vports with dependencies into separate modules") +Reported-by: Pravin Shelar +Signed-off-by: Thomas Graf +Acked-by: Pravin B Shelar +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/openvswitch/vport.c | 4 +--- + 1 file changed, 1 insertion(+), 3 deletions(-) + +--- a/net/openvswitch/vport.c ++++ b/net/openvswitch/vport.c +@@ -274,10 +274,8 @@ void ovs_vport_del(struct vport *vport) + ASSERT_OVSL(); + + hlist_del_rcu(&vport->hash_node); +- +- vport->ops->destroy(vport); +- + module_put(vport->ops->owner); ++ vport->ops->destroy(vport); + } + + /** diff --git a/queue-3.19/revert-net-reset-secmark-when-scrubbing-packet.patch b/queue-3.19/revert-net-reset-secmark-when-scrubbing-packet.patch new file mode 100644 index 00000000000..8953204607a --- /dev/null +++ b/queue-3.19/revert-net-reset-secmark-when-scrubbing-packet.patch @@ -0,0 +1,31 @@ +From foo@baz Tue Apr 21 23:05:14 CEST 2015 +From: Herbert Xu +Date: Thu, 16 Apr 2015 16:12:53 +0800 +Subject: Revert "net: Reset secmark when scrubbing packet" + +From: Herbert Xu + +[ Upstream commit 4c0ee414e877b899f7fc80aafb98d9425c02797f ] + +This patch reverts commit b8fb4e0648a2ab3734140342002f68fb0c7d1602 +because the secmark must be preserved even when a packet crosses +namespace boundaries. The reason is that security labels apply to +the system as a whole and is not per-namespace. + +Signed-off-by: Herbert Xu +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/skbuff.c | 1 - + 1 file changed, 1 deletion(-) + +--- a/net/core/skbuff.c ++++ b/net/core/skbuff.c +@@ -4149,7 +4149,6 @@ void skb_scrub_packet(struct sk_buff *sk + skb->ignore_df = 0; + skb_dst_drop(skb); + skb->mark = 0; +- skb_init_secmark(skb); + secpath_reset(skb); + nf_reset(skb); + nf_reset_trace(skb); diff --git a/queue-3.19/rocker-handle-non-bridge-master-change.patch b/queue-3.19/rocker-handle-non-bridge-master-change.patch new file mode 100644 index 00000000000..a9624ba9d87 --- /dev/null +++ b/queue-3.19/rocker-handle-non-bridge-master-change.patch @@ -0,0 +1,49 @@ +From foo@baz Tue Apr 21 23:05:14 CEST 2015 +From: Simon Horman +Date: Tue, 24 Mar 2015 09:31:40 +0900 +Subject: rocker: handle non-bridge master change + +From: Simon Horman + +[ Upstream commit a6e95cc718c8916a13f1e1e9d33cacbc5db56c0f ] + +Master change notifications may occur other than when joining or +leaving a bridge, for example when being added to or removed from +a bond or Open vSwitch. + +Previously in those cases rocker_port_bridge_leave() was called +which results in a null-pointer dereference as rocker_port->bridge_dev +is NULL because there is no bridge device. + +This patch makes provision for doing nothing in such cases. + +Fixes: 6c7079450071f ("rocker: implement L2 bridge offloading") +Acked-by: Jiri Pirko +Acked-by: Scott Feldman +Signed-off-by: Simon Horman +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/rocker/rocker.c | 8 +++++++- + 1 file changed, 7 insertions(+), 1 deletion(-) + +--- a/drivers/net/ethernet/rocker/rocker.c ++++ b/drivers/net/ethernet/rocker/rocker.c +@@ -4305,10 +4305,16 @@ static int rocker_port_master_changed(st + struct net_device *master = netdev_master_upper_dev_get(dev); + int err = 0; + ++ /* There are currently three cases handled here: ++ * 1. Joining a bridge ++ * 2. Leaving a previously joined bridge ++ * 3. Other, e.g. being added to or removed from a bond or openvswitch, ++ * in which case nothing is done ++ */ + if (master && master->rtnl_link_ops && + !strcmp(master->rtnl_link_ops->kind, "bridge")) + err = rocker_port_bridge_join(rocker_port, master); +- else ++ else if (rocker_port_is_bridged(rocker_port)) + err = rocker_port_bridge_leave(rocker_port); + + return err; diff --git a/queue-3.19/skbuff-do-not-scrub-skb-mark-within-the-same-name-space.patch b/queue-3.19/skbuff-do-not-scrub-skb-mark-within-the-same-name-space.patch new file mode 100644 index 00000000000..f926f236547 --- /dev/null +++ b/queue-3.19/skbuff-do-not-scrub-skb-mark-within-the-same-name-space.patch @@ -0,0 +1,83 @@ +From foo@baz Tue Apr 21 23:05:14 CEST 2015 +From: Herbert Xu +Date: Thu, 16 Apr 2015 09:03:27 +0800 +Subject: skbuff: Do not scrub skb mark within the same name space +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Herbert Xu + +[ Upstream commit 213dd74aee765d4e5f3f4b9607fef0cf97faa2af ] + +On Wed, Apr 15, 2015 at 05:41:26PM +0200, Nicolas Dichtel wrote: +> Le 15/04/2015 15:57, Herbert Xu a écrit : +> >On Wed, Apr 15, 2015 at 06:22:29PM +0800, Herbert Xu wrote: +> [snip] +> >Subject: skbuff: Do not scrub skb mark within the same name space +> > +> >The commit ea23192e8e577dfc51e0f4fc5ca113af334edff9 ("tunnels: +> Maybe add a Fixes tag? +> Fixes: ea23192e8e57 ("tunnels: harmonize cleanup done on skb on rx path") +> +> >harmonize cleanup done on skb on rx path") broke anyone trying to +> >use netfilter marking across IPv4 tunnels. While most of the +> >fields that are cleared by skb_scrub_packet don't matter, the +> >netfilter mark must be preserved. +> > +> >This patch rearranges skb_scurb_packet to preserve the mark field. +> nit: s/scurb/scrub +> +> Else it's fine for me. + +Sure. + +PS I used the wrong email for James the first time around. So +let me repeat the question here. Should secmark be preserved +or cleared across tunnels within the same name space? In fact, +do our security models even support name spaces? + +---8<--- +The commit ea23192e8e577dfc51e0f4fc5ca113af334edff9 ("tunnels: +harmonize cleanup done on skb on rx path") broke anyone trying to +use netfilter marking across IPv4 tunnels. While most of the +fields that are cleared by skb_scrub_packet don't matter, the +netfilter mark must be preserved. + +This patch rearranges skb_scrub_packet to preserve the mark field. + +Fixes: ea23192e8e57 ("tunnels: harmonize cleanup done on skb on rx path") +Signed-off-by: Herbert Xu +Acked-by: Thomas Graf +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/skbuff.c | 9 ++++++--- + 1 file changed, 6 insertions(+), 3 deletions(-) + +--- a/net/core/skbuff.c ++++ b/net/core/skbuff.c +@@ -4141,17 +4141,20 @@ EXPORT_SYMBOL(skb_try_coalesce); + */ + void skb_scrub_packet(struct sk_buff *skb, bool xnet) + { +- if (xnet) +- skb_orphan(skb); + skb->tstamp.tv64 = 0; + skb->pkt_type = PACKET_HOST; + skb->skb_iif = 0; + skb->ignore_df = 0; + skb_dst_drop(skb); +- skb->mark = 0; + secpath_reset(skb); + nf_reset(skb); + nf_reset_trace(skb); ++ ++ if (!xnet) ++ return; ++ ++ skb_orphan(skb); ++ skb->mark = 0; + } + EXPORT_SYMBOL_GPL(skb_scrub_packet); + diff --git a/queue-3.19/tcp-fix-frto-undo-on-cumulative-ack-of-sacked-range.patch b/queue-3.19/tcp-fix-frto-undo-on-cumulative-ack-of-sacked-range.patch new file mode 100644 index 00000000000..a5a14831d0e --- /dev/null +++ b/queue-3.19/tcp-fix-frto-undo-on-cumulative-ack-of-sacked-range.patch @@ -0,0 +1,47 @@ +From foo@baz Tue Apr 21 23:05:14 CEST 2015 +From: Neal Cardwell +Date: Wed, 1 Apr 2015 20:26:46 -0400 +Subject: tcp: fix FRTO undo on cumulative ACK of SACKed range + +From: Neal Cardwell + +[ Upstream commit 666b805150efd62f05810ff0db08f44a2370c937 ] + +On processing cumulative ACKs, the FRTO code was not checking the +SACKed bit, meaning that there could be a spurious FRTO undo on a +cumulative ACK of a previously SACKed skb. + +The FRTO code should only consider a cumulative ACK to indicate that +an original/unretransmitted skb is newly ACKed if the skb was not yet +SACKed. + +The effect of the spurious FRTO undo would typically be to make the +connection think that all previously-sent packets were in flight when +they really weren't, leading to a stall and an RTO. + +Signed-off-by: Neal Cardwell +Signed-off-by: Yuchung Cheng +Fixes: e33099f96d99c ("tcp: implement RFC5682 F-RTO") +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp_input.c | 7 ++++--- + 1 file changed, 4 insertions(+), 3 deletions(-) + +--- a/net/ipv4/tcp_input.c ++++ b/net/ipv4/tcp_input.c +@@ -3104,10 +3104,11 @@ static int tcp_clean_rtx_queue(struct so + if (!first_ackt.v64) + first_ackt = last_ackt; + +- if (!(sacked & TCPCB_SACKED_ACKED)) ++ if (!(sacked & TCPCB_SACKED_ACKED)) { + reord = min(pkts_acked, reord); +- if (!after(scb->end_seq, tp->high_seq)) +- flag |= FLAG_ORIG_SACK_ACKED; ++ if (!after(scb->end_seq, tp->high_seq)) ++ flag |= FLAG_ORIG_SACK_ACKED; ++ } + } + + if (sacked & TCPCB_SACKED_ACKED) diff --git a/queue-3.19/tcp-prevent-fetching-dst-twice-in-early-demux-code.patch b/queue-3.19/tcp-prevent-fetching-dst-twice-in-early-demux-code.patch new file mode 100644 index 00000000000..e60be531cb3 --- /dev/null +++ b/queue-3.19/tcp-prevent-fetching-dst-twice-in-early-demux-code.patch @@ -0,0 +1,56 @@ +From foo@baz Tue Apr 21 23:05:14 CEST 2015 +From: =?UTF-8?q?Michal=20Kube=C4=8Dek?= +Date: Mon, 23 Mar 2015 15:14:00 +0100 +Subject: tcp: prevent fetching dst twice in early demux code + +From: =?UTF-8?q?Michal=20Kube=C4=8Dek?= + +[ Upstream commit d0c294c53a771ae7e84506dfbd8c18c30f078735 ] + +On s390x, gcc 4.8 compiles this part of tcp_v6_early_demux() + + struct dst_entry *dst = sk->sk_rx_dst; + + if (dst) + dst = dst_check(dst, inet6_sk(sk)->rx_dst_cookie); + +to code reading sk->sk_rx_dst twice, once for the test and once for +the argument of ip6_dst_check() (dst_check() is inline). This allows +ip6_dst_check() to be called with null first argument, causing a crash. + +Protect sk->sk_rx_dst access by READ_ONCE() both in IPv4 and IPv6 +TCP early demux code. + +Fixes: 41063e9dd119 ("ipv4: Early TCP socket demux.") +Fixes: c7109986db3c ("ipv6: Early TCP socket demux") +Signed-off-by: Michal Kubecek +Acked-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp_ipv4.c | 2 +- + net/ipv6/tcp_ipv6.c | 2 +- + 2 files changed, 2 insertions(+), 2 deletions(-) + +--- a/net/ipv4/tcp_ipv4.c ++++ b/net/ipv4/tcp_ipv4.c +@@ -1516,7 +1516,7 @@ void tcp_v4_early_demux(struct sk_buff * + skb->sk = sk; + skb->destructor = sock_edemux; + if (sk->sk_state != TCP_TIME_WAIT) { +- struct dst_entry *dst = sk->sk_rx_dst; ++ struct dst_entry *dst = READ_ONCE(sk->sk_rx_dst); + + if (dst) + dst = dst_check(dst, 0); +--- a/net/ipv6/tcp_ipv6.c ++++ b/net/ipv6/tcp_ipv6.c +@@ -1583,7 +1583,7 @@ static void tcp_v6_early_demux(struct sk + skb->sk = sk; + skb->destructor = sock_edemux; + if (sk->sk_state != TCP_TIME_WAIT) { +- struct dst_entry *dst = sk->sk_rx_dst; ++ struct dst_entry *dst = READ_ONCE(sk->sk_rx_dst); + + if (dst) + dst = dst_check(dst, inet6_sk(sk)->rx_dst_cookie); diff --git a/queue-3.19/tcp-tcp_make_synack-should-clear-skb-tstamp.patch b/queue-3.19/tcp-tcp_make_synack-should-clear-skb-tstamp.patch new file mode 100644 index 00000000000..77835620d1b --- /dev/null +++ b/queue-3.19/tcp-tcp_make_synack-should-clear-skb-tstamp.patch @@ -0,0 +1,42 @@ +From foo@baz Tue Apr 21 23:05:14 CEST 2015 +From: Eric Dumazet +Date: Thu, 9 Apr 2015 13:31:56 -0700 +Subject: tcp: tcp_make_synack() should clear skb->tstamp + +From: Eric Dumazet + +[ Upstream commit b50edd7812852d989f2ef09dcfc729690f54a42d ] + +I noticed tcpdump was giving funky timestamps for locally +generated SYNACK messages on loopback interface. + +11:42:46.938990 IP 127.0.0.1.48245 > 127.0.0.2.23850: S +945476042:945476042(0) win 43690 + +20:28:58.502209 IP 127.0.0.2.23850 > 127.0.0.1.48245: S +3160535375:3160535375(0) ack 945476043 win 43690 + +This is because we need to clear skb->tstamp before +entering lower stack, otherwise net_timestamp_check() +does not set skb->tstamp. + +Fixes: 7faee5c0d514 ("tcp: remove TCP_SKB_CB(skb)->when") +Signed-off-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp_output.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/net/ipv4/tcp_output.c ++++ b/net/ipv4/tcp_output.c +@@ -2931,6 +2931,8 @@ struct sk_buff *tcp_make_synack(struct s + } + #endif + ++ /* Do not fool tcpdump (if any), clean our debris */ ++ skb->tstamp.tv64 = 0; + return skb; + } + EXPORT_SYMBOL(tcp_make_synack); diff --git a/queue-3.19/tg3-hold-tp-lock-before-calling-tg3_halt-from-tg3_init_one.patch b/queue-3.19/tg3-hold-tp-lock-before-calling-tg3_halt-from-tg3_init_one.patch new file mode 100644 index 00000000000..9e45101f9ea --- /dev/null +++ b/queue-3.19/tg3-hold-tp-lock-before-calling-tg3_halt-from-tg3_init_one.patch @@ -0,0 +1,44 @@ +From foo@baz Tue Apr 21 23:05:14 CEST 2015 +From: "Jun'ichi Nomura \\\\(NEC\\\\)" +Date: Thu, 12 Feb 2015 01:26:24 +0000 +Subject: tg3: Hold tp->lock before calling tg3_halt() from tg3_init_one() + +From: "Jun'ichi Nomura \\\\(NEC\\\\)" + +[ Upstream commit d0af71a3573f1217b140c60b66f1a9b335fb058b ] + +tg3_init_one() calls tg3_halt() without tp->lock despite its assumption +and causes deadlock. +If lockdep is enabled, a warning like this shows up before the stall: + + [ BUG: bad unlock balance detected! ] + 3.19.0test #3 Tainted: G E + ------------------------------------- + insmod/369 is trying to release lock (&(&tp->lock)->rlock) at: + [] tg3_chip_reset+0x14d/0x780 [tg3] + but there are no more locks to release! + +tg3_init_one() doesn't call tg3_halt() under normal situation but +during kexec kdump I hit this problem. + +Fixes: 932f19de ("tg3: Release tp->lock before invoking synchronize_irq()") +Signed-off-by: Jun'ichi Nomura +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/broadcom/tg3.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/drivers/net/ethernet/broadcom/tg3.c ++++ b/drivers/net/ethernet/broadcom/tg3.c +@@ -17868,8 +17868,10 @@ static int tg3_init_one(struct pci_dev * + */ + if ((tr32(HOSTCC_MODE) & HOSTCC_MODE_ENABLE) || + (tr32(WDMAC_MODE) & WDMAC_MODE_ENABLE)) { ++ tg3_full_lock(tp, 0); + tw32(MEMARB_MODE, MEMARB_MODE_ENABLE); + tg3_halt(tp, RESET_KIND_SHUTDOWN, 1); ++ tg3_full_unlock(tp); + } + + err = tg3_test_dma(tp); diff --git a/queue-3.19/tun-return-proper-error-code-from-tun_do_read.patch b/queue-3.19/tun-return-proper-error-code-from-tun_do_read.patch new file mode 100644 index 00000000000..d318cf9205f --- /dev/null +++ b/queue-3.19/tun-return-proper-error-code-from-tun_do_read.patch @@ -0,0 +1,30 @@ +From foo@baz Tue Apr 21 23:05:14 CEST 2015 +From: Alex Gartrell +Date: Thu, 25 Dec 2014 23:22:49 -0800 +Subject: tun: return proper error code from tun_do_read + +From: Alex Gartrell + +[ Upstream commit 957f094f221f81e457133b1f4c4d95ffa49ff731 ] + +Instead of -1 with EAGAIN, read on a O_NONBLOCK tun fd will return 0. This +fixes this by properly returning the error code from __skb_recv_datagram. + +Signed-off-by: Alex Gartrell +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/tun.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/net/tun.c ++++ b/drivers/net/tun.c +@@ -1368,7 +1368,7 @@ static ssize_t tun_do_read(struct tun_st + skb = __skb_recv_datagram(tfile->socket.sk, noblock ? MSG_DONTWAIT : 0, + &peeked, &off, &err); + if (!skb) +- return 0; ++ return err; + + ret = tun_put_user(tun, tfile, skb, to); + if (unlikely(ret < 0)) diff --git a/queue-3.19/udptunnels-call-handle_offloads-after-inserting-vlan-tag.patch b/queue-3.19/udptunnels-call-handle_offloads-after-inserting-vlan-tag.patch new file mode 100644 index 00000000000..7e1011dee03 --- /dev/null +++ b/queue-3.19/udptunnels-call-handle_offloads-after-inserting-vlan-tag.patch @@ -0,0 +1,100 @@ +From foo@baz Tue Apr 21 23:05:14 CEST 2015 +From: Jesse Gross +Date: Thu, 9 Apr 2015 11:19:14 -0700 +Subject: udptunnels: Call handle_offloads after inserting vlan tag. + +From: Jesse Gross + +[ Upstream commit b736a623bd099cdf5521ca9bd03559f3bc7fa31c ] + +handle_offloads() calls skb_reset_inner_headers() to store +the layer pointers to the encapsulated packet. However, we +currently push the vlag tag (if there is one) onto the packet +afterwards. This changes the MAC header for the encapsulated +packet but it is not reflected in skb->inner_mac_header, which +breaks GSO and drivers which attempt to use this for encapsulation +offloads. + +Fixes: 1eaa8178 ("vxlan: Add tx-vlan offload support.") +Signed-off-by: Jesse Gross +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/vxlan.c | 20 ++++++++++---------- + net/ipv4/geneve.c | 8 ++++---- + 2 files changed, 14 insertions(+), 14 deletions(-) + +--- a/drivers/net/vxlan.c ++++ b/drivers/net/vxlan.c +@@ -1578,12 +1578,6 @@ static int vxlan6_xmit_skb(struct vxlan_ + int err; + bool udp_sum = !udp_get_no_check6_tx(vs->sock->sk); + +- skb = udp_tunnel_handle_offloads(skb, udp_sum); +- if (IS_ERR(skb)) { +- err = -EINVAL; +- goto err; +- } +- + skb_scrub_packet(skb, xnet); + + min_headroom = LL_RESERVED_SPACE(dst->dev) + dst->header_len +@@ -1603,6 +1597,12 @@ static int vxlan6_xmit_skb(struct vxlan_ + goto err; + } + ++ skb = udp_tunnel_handle_offloads(skb, udp_sum); ++ if (IS_ERR(skb)) { ++ err = -EINVAL; ++ goto err; ++ } ++ + vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh)); + vxh->vx_flags = htonl(VXLAN_FLAGS); + vxh->vx_vni = vni; +@@ -1628,10 +1628,6 @@ int vxlan_xmit_skb(struct vxlan_sock *vs + int err; + bool udp_sum = !vs->sock->sk->sk_no_check_tx; + +- skb = udp_tunnel_handle_offloads(skb, udp_sum); +- if (IS_ERR(skb)) +- return PTR_ERR(skb); +- + min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len + + VXLAN_HLEN + sizeof(struct iphdr) + + (vlan_tx_tag_present(skb) ? VLAN_HLEN : 0); +@@ -1647,6 +1643,10 @@ int vxlan_xmit_skb(struct vxlan_sock *vs + if (WARN_ON(!skb)) + return -ENOMEM; + ++ skb = udp_tunnel_handle_offloads(skb, udp_sum); ++ if (IS_ERR(skb)) ++ return PTR_ERR(skb); ++ + vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh)); + vxh->vx_flags = htonl(VXLAN_FLAGS); + vxh->vx_vni = vni; +--- a/net/ipv4/geneve.c ++++ b/net/ipv4/geneve.c +@@ -121,10 +121,6 @@ int geneve_xmit_skb(struct geneve_sock * + int min_headroom; + int err; + +- skb = udp_tunnel_handle_offloads(skb, !gs->sock->sk->sk_no_check_tx); +- if (IS_ERR(skb)) +- return PTR_ERR(skb); +- + min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len + + GENEVE_BASE_HLEN + opt_len + sizeof(struct iphdr) + + (vlan_tx_tag_present(skb) ? VLAN_HLEN : 0); +@@ -139,6 +135,10 @@ int geneve_xmit_skb(struct geneve_sock * + if (unlikely(!skb)) + return -ENOMEM; + ++ skb = udp_tunnel_handle_offloads(skb, !gs->sock->sk->sk_no_check_tx); ++ if (IS_ERR(skb)) ++ return PTR_ERR(skb); ++ + gnvh = (struct genevehdr *)__skb_push(skb, sizeof(*gnvh) + opt_len); + geneve_build_header(gnvh, tun_flags, vni, opt_len, opt); + diff --git a/queue-3.19/usbnet-fix-tx_bytes-statistic-running-backward-in-cdc_ncm.patch b/queue-3.19/usbnet-fix-tx_bytes-statistic-running-backward-in-cdc_ncm.patch new file mode 100644 index 00000000000..333e6a067c7 --- /dev/null +++ b/queue-3.19/usbnet-fix-tx_bytes-statistic-running-backward-in-cdc_ncm.patch @@ -0,0 +1,126 @@ +From foo@baz Tue Apr 21 23:05:14 CEST 2015 +From: Ben Hutchings +Date: Wed, 25 Mar 2015 21:41:33 +0100 +Subject: usbnet: Fix tx_bytes statistic running backward in cdc_ncm +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Ben Hutchings + +[ Upstream commit 7a1e890e2168e33fb62d84528e996b8b4b478fea ] + +cdc_ncm disagrees with usbnet about how much framing overhead should +be counted in the tx_bytes statistics, and tries 'fix' this by +decrementing tx_bytes on the transmit path. But statistics must never +be decremented except due to roll-over; this will thoroughly confuse +user-space. Also, tx_bytes is only incremented by usbnet in the +completion path. + +Fix this by requiring drivers that set FLAG_MULTI_FRAME to set a +tx_bytes delta along with the tx_packets count. + +Fixes: beeecd42c3b4 ("net: cdc_ncm/cdc_mbim: adding NCM protocol statistics") +Signed-off-by: Ben Hutchings +Signed-off-by: Bjørn Mork +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/usb/asix_common.c | 2 +- + drivers/net/usb/cdc_ncm.c | 7 +++---- + drivers/net/usb/sr9800.c | 2 +- + drivers/net/usb/usbnet.c | 16 +++++++++++++--- + include/linux/usb/usbnet.h | 6 ++++-- + 5 files changed, 22 insertions(+), 11 deletions(-) + +--- a/drivers/net/usb/asix_common.c ++++ b/drivers/net/usb/asix_common.c +@@ -189,7 +189,7 @@ struct sk_buff *asix_tx_fixup(struct usb + skb_put(skb, sizeof(padbytes)); + } + +- usbnet_set_skb_tx_stats(skb, 1); ++ usbnet_set_skb_tx_stats(skb, 1, 0); + return skb; + } + +--- a/drivers/net/usb/cdc_ncm.c ++++ b/drivers/net/usb/cdc_ncm.c +@@ -1177,13 +1177,12 @@ cdc_ncm_fill_tx_frame(struct usbnet *dev + ctx->tx_overhead += skb_out->len - ctx->tx_curr_frame_payload; + ctx->tx_ntbs++; + +- /* usbnet has already counted all the framing overhead. ++ /* usbnet will count all the framing overhead by default. + * Adjust the stats so that the tx_bytes counter show real + * payload data instead. + */ +- dev->net->stats.tx_bytes -= skb_out->len - ctx->tx_curr_frame_payload; +- +- usbnet_set_skb_tx_stats(skb_out, n); ++ usbnet_set_skb_tx_stats(skb_out, n, ++ ctx->tx_curr_frame_payload - skb_out->len); + + return skb_out; + +--- a/drivers/net/usb/sr9800.c ++++ b/drivers/net/usb/sr9800.c +@@ -144,7 +144,7 @@ static struct sk_buff *sr_tx_fixup(struc + skb_put(skb, sizeof(padbytes)); + } + +- usbnet_set_skb_tx_stats(skb, 1); ++ usbnet_set_skb_tx_stats(skb, 1, 0); + return skb; + } + +--- a/drivers/net/usb/usbnet.c ++++ b/drivers/net/usb/usbnet.c +@@ -1347,9 +1347,19 @@ netdev_tx_t usbnet_start_xmit (struct sk + } else + urb->transfer_flags |= URB_ZERO_PACKET; + } +- entry->length = urb->transfer_buffer_length = length; +- if (!(info->flags & FLAG_MULTI_PACKET)) +- usbnet_set_skb_tx_stats(skb, 1); ++ urb->transfer_buffer_length = length; ++ ++ if (info->flags & FLAG_MULTI_PACKET) { ++ /* Driver has set number of packets and a length delta. ++ * Calculate the complete length and ensure that it's ++ * positive. ++ */ ++ entry->length += length; ++ if (WARN_ON_ONCE(entry->length <= 0)) ++ entry->length = length; ++ } else { ++ usbnet_set_skb_tx_stats(skb, 1, length); ++ } + + spin_lock_irqsave(&dev->txq.lock, flags); + retval = usb_autopm_get_interface_async(dev->intf); +--- a/include/linux/usb/usbnet.h ++++ b/include/linux/usb/usbnet.h +@@ -227,7 +227,7 @@ struct skb_data { /* skb->cb is one of t + struct urb *urb; + struct usbnet *dev; + enum skb_state state; +- size_t length; ++ long length; + unsigned long packets; + }; + +@@ -235,11 +235,13 @@ struct skb_data { /* skb->cb is one of t + * tx_fixup method before returning an skb. + */ + static inline void +-usbnet_set_skb_tx_stats(struct sk_buff *skb, unsigned long packets) ++usbnet_set_skb_tx_stats(struct sk_buff *skb, ++ unsigned long packets, long bytes_delta) + { + struct skb_data *entry = (struct skb_data *) skb->cb; + + entry->packets = packets; ++ entry->length = bytes_delta; + } + + extern int usbnet_open(struct net_device *net); diff --git a/queue-3.19/usbnet-fix-tx_packets-stat-for-flag_multi_frame-drivers.patch b/queue-3.19/usbnet-fix-tx_packets-stat-for-flag_multi_frame-drivers.patch new file mode 100644 index 00000000000..78dca54d651 --- /dev/null +++ b/queue-3.19/usbnet-fix-tx_packets-stat-for-flag_multi_frame-drivers.patch @@ -0,0 +1,119 @@ +From foo@baz Tue Apr 21 23:05:14 CEST 2015 +From: Ben Hutchings +Date: Thu, 26 Feb 2015 19:34:37 +0000 +Subject: usbnet: Fix tx_packets stat for FLAG_MULTI_FRAME drivers +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Ben Hutchings + +[ Upstream commit 1e9e39f4a29857a396ac7b669d109f697f66695e ] + +Currently the usbnet core does not update the tx_packets statistic for +drivers with FLAG_MULTI_PACKET and there is no hook in the TX +completion path where they could do this. + +cdc_ncm and dependent drivers are bumping tx_packets stat on the +transmit path while asix and sr9800 aren't updating it at all. + +Add a packet count in struct skb_data so these drivers can fill it +in, initialise it to 1 for other drivers, and add the packet count +to the tx_packets statistic on completion. + +Signed-off-by: Ben Hutchings +Tested-by: Bjørn Mork +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/usb/asix_common.c | 2 ++ + drivers/net/usb/cdc_ncm.c | 3 ++- + drivers/net/usb/sr9800.c | 1 + + drivers/net/usb/usbnet.c | 5 +++-- + include/linux/usb/usbnet.h | 12 ++++++++++++ + 5 files changed, 20 insertions(+), 3 deletions(-) + +--- a/drivers/net/usb/asix_common.c ++++ b/drivers/net/usb/asix_common.c +@@ -188,6 +188,8 @@ struct sk_buff *asix_tx_fixup(struct usb + memcpy(skb_tail_pointer(skb), &padbytes, sizeof(padbytes)); + skb_put(skb, sizeof(padbytes)); + } ++ ++ usbnet_set_skb_tx_stats(skb, 1); + return skb; + } + +--- a/drivers/net/usb/cdc_ncm.c ++++ b/drivers/net/usb/cdc_ncm.c +@@ -1172,7 +1172,6 @@ cdc_ncm_fill_tx_frame(struct usbnet *dev + + /* return skb */ + ctx->tx_curr_skb = NULL; +- dev->net->stats.tx_packets += ctx->tx_curr_frame_num; + + /* keep private stats: framing overhead and number of NTBs */ + ctx->tx_overhead += skb_out->len - ctx->tx_curr_frame_payload; +@@ -1184,6 +1183,8 @@ cdc_ncm_fill_tx_frame(struct usbnet *dev + */ + dev->net->stats.tx_bytes -= skb_out->len - ctx->tx_curr_frame_payload; + ++ usbnet_set_skb_tx_stats(skb_out, n); ++ + return skb_out; + + exit_no_skb: +--- a/drivers/net/usb/sr9800.c ++++ b/drivers/net/usb/sr9800.c +@@ -144,6 +144,7 @@ static struct sk_buff *sr_tx_fixup(struc + skb_put(skb, sizeof(padbytes)); + } + ++ usbnet_set_skb_tx_stats(skb, 1); + return skb; + } + +--- a/drivers/net/usb/usbnet.c ++++ b/drivers/net/usb/usbnet.c +@@ -1189,8 +1189,7 @@ static void tx_complete (struct urb *urb + struct usbnet *dev = entry->dev; + + if (urb->status == 0) { +- if (!(dev->driver_info->flags & FLAG_MULTI_PACKET)) +- dev->net->stats.tx_packets++; ++ dev->net->stats.tx_packets += entry->packets; + dev->net->stats.tx_bytes += entry->length; + } else { + dev->net->stats.tx_errors++; +@@ -1349,6 +1348,8 @@ netdev_tx_t usbnet_start_xmit (struct sk + urb->transfer_flags |= URB_ZERO_PACKET; + } + entry->length = urb->transfer_buffer_length = length; ++ if (!(info->flags & FLAG_MULTI_PACKET)) ++ usbnet_set_skb_tx_stats(skb, 1); + + spin_lock_irqsave(&dev->txq.lock, flags); + retval = usb_autopm_get_interface_async(dev->intf); +--- a/include/linux/usb/usbnet.h ++++ b/include/linux/usb/usbnet.h +@@ -228,8 +228,20 @@ struct skb_data { /* skb->cb is one of t + struct usbnet *dev; + enum skb_state state; + size_t length; ++ unsigned long packets; + }; + ++/* Drivers that set FLAG_MULTI_PACKET must call this in their ++ * tx_fixup method before returning an skb. ++ */ ++static inline void ++usbnet_set_skb_tx_stats(struct sk_buff *skb, unsigned long packets) ++{ ++ struct skb_data *entry = (struct skb_data *) skb->cb; ++ ++ entry->packets = packets; ++} ++ + extern int usbnet_open(struct net_device *net); + extern int usbnet_stop(struct net_device *net); + extern netdev_tx_t usbnet_start_xmit(struct sk_buff *skb, diff --git a/queue-3.19/xen-netfront-transmit-fully-gso-sized-packets.patch b/queue-3.19/xen-netfront-transmit-fully-gso-sized-packets.patch new file mode 100644 index 00000000000..722481c3bbc --- /dev/null +++ b/queue-3.19/xen-netfront-transmit-fully-gso-sized-packets.patch @@ -0,0 +1,65 @@ +From foo@baz Tue Apr 21 23:05:14 CEST 2015 +From: Jonathan Davies +Date: Tue, 31 Mar 2015 11:05:15 +0100 +Subject: xen-netfront: transmit fully GSO-sized packets + +From: Jonathan Davies + +[ Upstream commit 0c36820e2ab7d943ab1188230fdf2149826d33c0 ] + +xen-netfront limits transmitted skbs to be at most 44 segments in size. However, +GSO permits up to 65536 bytes, which means a maximum of 45 segments of 1448 +bytes each. This slight reduction in the size of packets means a slight loss in +efficiency. + +Since c/s 9ecd1a75d, xen-netfront sets gso_max_size to + XEN_NETIF_MAX_TX_SIZE - MAX_TCP_HEADER, +where XEN_NETIF_MAX_TX_SIZE is 65535 bytes. + +The calculation used by tcp_tso_autosize (and also tcp_xmit_size_goal since c/s +6c09fa09d) in determining when to split an skb into two is + sk->sk_gso_max_size - 1 - MAX_TCP_HEADER. + +So the maximum permitted size of an skb is calculated to be + (XEN_NETIF_MAX_TX_SIZE - MAX_TCP_HEADER) - 1 - MAX_TCP_HEADER. + +Intuitively, this looks like the wrong formula -- we don't need two TCP headers. +Instead, there is no need to deviate from the default gso_max_size of 65536 as +this already accommodates the size of the header. + +Currently, the largest skb transmitted by netfront is 63712 bytes (44 segments +of 1448 bytes each), as observed via tcpdump. This patch makes netfront send +skbs of up to 65160 bytes (45 segments of 1448 bytes each). + +Similarly, the maximum allowable mtu does not need to subtract MAX_TCP_HEADER as +it relates to the size of the whole packet, including the header. + +Fixes: 9ecd1a75d977 ("xen-netfront: reduce gso_max_size to account for max TCP header") +Signed-off-by: Jonathan Davies +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/xen-netfront.c | 5 +---- + 1 file changed, 1 insertion(+), 4 deletions(-) + +--- a/drivers/net/xen-netfront.c ++++ b/drivers/net/xen-netfront.c +@@ -1062,8 +1062,7 @@ err: + + static int xennet_change_mtu(struct net_device *dev, int mtu) + { +- int max = xennet_can_sg(dev) ? +- XEN_NETIF_MAX_TX_SIZE - MAX_TCP_HEADER : ETH_DATA_LEN; ++ int max = xennet_can_sg(dev) ? XEN_NETIF_MAX_TX_SIZE : ETH_DATA_LEN; + + if (mtu > max) + return -EINVAL; +@@ -1333,8 +1332,6 @@ static struct net_device *xennet_create_ + netdev->ethtool_ops = &xennet_ethtool_ops; + SET_NETDEV_DEV(netdev, &dev->dev); + +- netif_set_gso_max_size(netdev, XEN_NETIF_MAX_TX_SIZE - MAX_TCP_HEADER); +- + np->netdev = netdev; + + netif_carrier_off(netdev);