From: Greg Kroah-Hartman Date: Mon, 13 Oct 2014 01:33:10 +0000 (+0200) Subject: 3.16-stable patches X-Git-Tag: v3.17.1~7 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=9c4d75f04ca85dc551536cb055a04825ff284697;p=thirdparty%2Fkernel%2Fstable-queue.git 3.16-stable patches added patches: bnx2x-revert-undi-flushing-mechanism.patch bonding-fix-div-by-zero-while-enslaving-and-transmitting.patch bridge-check-if-vlan-filtering-is-enabled-only-once.patch bridge-fix-br_should_learn-to-check-vlan_enabled.patch gro-fix-aggregation-for-skb-using-frag_list.patch hyperv-fix-a-bug-in-netvsc_send.patch hyperv-fix-a-bug-in-netvsc_start_xmit.patch ip6_gre-fix-flowi6_proto-value-in-xmit-path.patch ip_tunnel-don-t-allow-to-add-the-same-tunnel-multiple-times.patch ipv6-fix-rtnl-locking-in-setsockopt-for-anycast-and-multicast.patch ipv6-remove-rt6i_genid.patch ipv6-restore-the-behavior-of-ipv6_sock_ac_drop.patch l2tp-fix-race-while-getting-pmtu-on-ppp-pseudo-wire.patch macvlan-allow-to-enqueue-broadcast-pkt-on-virtual-device.patch macvtap-fix-race-between-device-delete-and-open.patch myri10ge-check-for-dma-mapping-errors.patch net-allow-macvlans-to-move-to-net-namespace.patch net-always-untag-vlan-tagged-traffic-on-input.patch net-filter-fix-possible-use-after-free.patch net-fix-checksum-features-handling-in-netif_skb_features.patch net-ipv6-fib-don-t-sleep-inside-atomic-lock.patch net-mlx4-correctly-configure-single-ported-vfs-from-the-host.patch net-mlx4_core-allow-not-to-specify-probe_vf-in-sriov-ib-mode.patch net-phy-smsc-move-smsc_phy_config_init-reset-part-in-a-soft_reset-function.patch net-systemport-fix-bcm_sysport_insert_tsb.patch net_sched-copy-exts-type-in-tcf_exts_change.patch netlink-reset-network-header-before-passing-to-taps.patch openvswitch-fix-panic-with-multiple-vlan-headers.patch packet-handle-too-big-packets-for-packet_v3.patch revert-macvlan-simplify-the-structure-port.patch revert-net-macb-add-pinctrl-consumer-support.patch rtnetlink-fix-vf-info-size.patch sctp-handle-association-restarts-when-the-socket-is-closed.patch sit-fix-ipip6_tunnel_lookup-device-matching-criteria.patch tcp-don-t-use-timestamp-from-repaired-skb-s-to-calculate-rtt-v2.patch tcp-fix-ssthresh-and-undo-for-consecutive-short-frto-episodes.patch tcp-fix-tcp_release_cb-to-dispatch-via-address-family-for-mtu_reduced.patch team-avoid-race-condition-in-scheduling-delayed-work.patch tg3-allow-for-recieve-of-full-size-8021ad-frames.patch tg3-work-around-hw-fw-limitations-with-vlan-encapsulated-frames.patch tipc-fix-message-importance-range-check.patch vxlan-fix-incorrect-initializer-in-union-vxlan_addr.patch xfrm-generate-blackhole-routes-only-from-route-lookup-functions.patch xfrm-generate-queueing-routes-only-from-route-lookup-functions.patch --- diff --git a/queue-3.16/bnx2x-revert-undi-flushing-mechanism.patch b/queue-3.16/bnx2x-revert-undi-flushing-mechanism.patch new file mode 100644 index 00000000000..d233f7dfecd --- /dev/null +++ b/queue-3.16/bnx2x-revert-undi-flushing-mechanism.patch @@ -0,0 +1,170 @@ +From foo@baz Sun Oct 12 20:11:55 CEST 2014 +From: Yuval Mintz +Date: Mon, 18 Aug 2014 22:36:23 +0300 +Subject: bnx2x: Revert UNDI flushing mechanism + +From: Yuval Mintz + +[ Upstream commit 7c3afd85dc1610bb2fc049644cd1b52c7af96f98 ] + +Commit 91ebb929b6f8 ("bnx2x: Add support for Multi-Function UNDI") [which was +later supposedly fixed by de682941eef3 ("bnx2x: Fix UNDI driver unload")] +introduced a bug in which in some [yet-to-be-determined] scenarios the +alternative flushing mechanism which was to guarantee the Rx buffers are +empty before resetting them during device probe will fail. +If this happens, when device will be loaded once more a fatal attention will +occur; Since this most likely happens in boot from SAN scenarios, the machine +will fail to load. + +Notice this may occur not only in the 'Multi-Function' scenario but in the +regular scenario as well, i.e., this introduced a regression in the driver's +ability to perform boot from SAN. + +The patch reverts the mechanism and applies the old scheme to multi-function +devices as well as to single-function devices. + +Signed-off-by: Yuval Mintz +Signed-off-by: Ariel Elior +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c | 92 ++++------------------- + 1 file changed, 17 insertions(+), 75 deletions(-) + +--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c ++++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +@@ -10044,6 +10044,8 @@ static void bnx2x_prev_unload_close_mac( + } + + #define BNX2X_PREV_UNDI_PROD_ADDR(p) (BAR_TSTRORM_INTMEM + 0x1508 + ((p) << 4)) ++#define BNX2X_PREV_UNDI_PROD_ADDR_H(f) (BAR_TSTRORM_INTMEM + \ ++ 0x1848 + ((f) << 4)) + #define BNX2X_PREV_UNDI_RCQ(val) ((val) & 0xffff) + #define BNX2X_PREV_UNDI_BD(val) ((val) >> 16 & 0xffff) + #define BNX2X_PREV_UNDI_PROD(rcq, bd) ((bd) << 16 | (rcq)) +@@ -10051,8 +10053,6 @@ static void bnx2x_prev_unload_close_mac( + #define BCM_5710_UNDI_FW_MF_MAJOR (0x07) + #define BCM_5710_UNDI_FW_MF_MINOR (0x08) + #define BCM_5710_UNDI_FW_MF_VERS (0x05) +-#define BNX2X_PREV_UNDI_MF_PORT(p) (BAR_TSTRORM_INTMEM + 0x150c + ((p) << 4)) +-#define BNX2X_PREV_UNDI_MF_FUNC(f) (BAR_TSTRORM_INTMEM + 0x184c + ((f) << 4)) + + static bool bnx2x_prev_is_after_undi(struct bnx2x *bp) + { +@@ -10071,72 +10071,25 @@ static bool bnx2x_prev_is_after_undi(str + return false; + } + +-static bool bnx2x_prev_unload_undi_fw_supports_mf(struct bnx2x *bp) +-{ +- u8 major, minor, version; +- u32 fw; +- +- /* Must check that FW is loaded */ +- if (!(REG_RD(bp, MISC_REG_RESET_REG_1) & +- MISC_REGISTERS_RESET_REG_1_RST_XSEM)) { +- BNX2X_DEV_INFO("XSEM is reset - UNDI MF FW is not loaded\n"); +- return false; +- } +- +- /* Read Currently loaded FW version */ +- fw = REG_RD(bp, XSEM_REG_PRAM); +- major = fw & 0xff; +- minor = (fw >> 0x8) & 0xff; +- version = (fw >> 0x10) & 0xff; +- BNX2X_DEV_INFO("Loaded FW: 0x%08x: Major 0x%02x Minor 0x%02x Version 0x%02x\n", +- fw, major, minor, version); +- +- if (major > BCM_5710_UNDI_FW_MF_MAJOR) +- return true; +- +- if ((major == BCM_5710_UNDI_FW_MF_MAJOR) && +- (minor > BCM_5710_UNDI_FW_MF_MINOR)) +- return true; +- +- if ((major == BCM_5710_UNDI_FW_MF_MAJOR) && +- (minor == BCM_5710_UNDI_FW_MF_MINOR) && +- (version >= BCM_5710_UNDI_FW_MF_VERS)) +- return true; +- +- return false; +-} +- +-static void bnx2x_prev_unload_undi_mf(struct bnx2x *bp) +-{ +- int i; +- +- /* Due to legacy (FW) code, the first function on each engine has a +- * different offset macro from the rest of the functions. +- * Setting this for all 8 functions is harmless regardless of whether +- * this is actually a multi-function device. +- */ +- for (i = 0; i < 2; i++) +- REG_WR(bp, BNX2X_PREV_UNDI_MF_PORT(i), 1); +- +- for (i = 2; i < 8; i++) +- REG_WR(bp, BNX2X_PREV_UNDI_MF_FUNC(i - 2), 1); +- +- BNX2X_DEV_INFO("UNDI FW (MF) set to discard\n"); +-} +- +-static void bnx2x_prev_unload_undi_inc(struct bnx2x *bp, u8 port, u8 inc) ++static void bnx2x_prev_unload_undi_inc(struct bnx2x *bp, u8 inc) + { + u16 rcq, bd; +- u32 tmp_reg = REG_RD(bp, BNX2X_PREV_UNDI_PROD_ADDR(port)); ++ u32 addr, tmp_reg; ++ ++ if (BP_FUNC(bp) < 2) ++ addr = BNX2X_PREV_UNDI_PROD_ADDR(BP_PORT(bp)); ++ else ++ addr = BNX2X_PREV_UNDI_PROD_ADDR_H(BP_FUNC(bp) - 2); + ++ tmp_reg = REG_RD(bp, addr); + rcq = BNX2X_PREV_UNDI_RCQ(tmp_reg) + inc; + bd = BNX2X_PREV_UNDI_BD(tmp_reg) + inc; + + tmp_reg = BNX2X_PREV_UNDI_PROD(rcq, bd); +- REG_WR(bp, BNX2X_PREV_UNDI_PROD_ADDR(port), tmp_reg); ++ REG_WR(bp, addr, tmp_reg); + +- BNX2X_DEV_INFO("UNDI producer [%d] rings bd -> 0x%04x, rcq -> 0x%04x\n", +- port, bd, rcq); ++ BNX2X_DEV_INFO("UNDI producer [%d/%d][%08x] rings bd -> 0x%04x, rcq -> 0x%04x\n", ++ BP_PORT(bp), BP_FUNC(bp), addr, bd, rcq); + } + + static int bnx2x_prev_mcp_done(struct bnx2x *bp) +@@ -10375,7 +10328,6 @@ static int bnx2x_prev_unload_common(stru + /* Reset should be performed after BRB is emptied */ + if (reset_reg & MISC_REGISTERS_RESET_REG_1_RST_BRB1) { + u32 timer_count = 1000; +- bool need_write = true; + + /* Close the MAC Rx to prevent BRB from filling up */ + bnx2x_prev_unload_close_mac(bp, &mac_vals); +@@ -10412,20 +10364,10 @@ static int bnx2x_prev_unload_common(stru + else + timer_count--; + +- /* New UNDI FW supports MF and contains better +- * cleaning methods - might be redundant but harmless. +- */ +- if (bnx2x_prev_unload_undi_fw_supports_mf(bp)) { +- if (need_write) { +- bnx2x_prev_unload_undi_mf(bp); +- need_write = false; +- } +- } else if (prev_undi) { +- /* If UNDI resides in memory, +- * manually increment it +- */ +- bnx2x_prev_unload_undi_inc(bp, BP_PORT(bp), 1); +- } ++ /* If UNDI resides in memory, manually increment it */ ++ if (prev_undi) ++ bnx2x_prev_unload_undi_inc(bp, 1); ++ + udelay(10); + } + diff --git a/queue-3.16/bonding-fix-div-by-zero-while-enslaving-and-transmitting.patch b/queue-3.16/bonding-fix-div-by-zero-while-enslaving-and-transmitting.patch new file mode 100644 index 00000000000..8f1df6d1aae --- /dev/null +++ b/queue-3.16/bonding-fix-div-by-zero-while-enslaving-and-transmitting.patch @@ -0,0 +1,152 @@ +From foo@baz Sun Oct 12 20:11:55 CEST 2014 +From: Nikolay Aleksandrov +Date: Fri, 12 Sep 2014 17:38:18 +0200 +Subject: bonding: fix div by zero while enslaving and transmitting + +From: Nikolay Aleksandrov + +[ Upstream commit 9a72c2da690d78e93cff24b9f616412508678dd5 ] + +The problem is that the slave is first linked and slave_cnt is +incremented afterwards leading to a div by zero in the modes that use it +as a modulus. What happens is that in bond_start_xmit() +bond_has_slaves() is used to evaluate further transmission and it becomes +true after the slave is linked in, but when slave_cnt is used in the xmit +path it is still 0, so fetch it once and transmit based on that. Since +it is used only in round-robin and XOR modes, the fix is only for them. +Thanks to Eric Dumazet for pointing out the fault in my first try to fix +this. + +Call trace (took it out of net-next kernel, but it's the same with net): +[46934.330038] divide error: 0000 [#1] SMP +[46934.330041] Modules linked in: bonding(O) 9p fscache +snd_hda_codec_generic crct10dif_pclmul +[46934.330041] bond0: Enslaving eth1 as an active interface with an up +link +[46934.330051] ppdev joydev crc32_pclmul crc32c_intel 9pnet_virtio +ghash_clmulni_intel snd_hda_intel 9pnet snd_hda_controller parport_pc +serio_raw pcspkr snd_hda_codec parport virtio_balloon virtio_console +snd_hwdep snd_pcm pvpanic i2c_piix4 snd_timer i2ccore snd soundcore +virtio_blk virtio_net virtio_pci virtio_ring virtio ata_generic +pata_acpi floppy [last unloaded: bonding] +[46934.330053] CPU: 1 PID: 3382 Comm: ping Tainted: G O +3.17.0-rc4+ #27 +[46934.330053] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 +[46934.330054] task: ffff88005aebf2c0 ti: ffff88005b728000 task.ti: +ffff88005b728000 +[46934.330059] RIP: 0010:[] [] +bond_start_xmit+0x1c3/0x450 [bonding] +[46934.330060] RSP: 0018:ffff88005b72b7f8 EFLAGS: 00010246 +[46934.330060] RAX: 0000000000000679 RBX: ffff88004b077000 RCX: +000000000000002a +[46934.330061] RDX: 0000000000000000 RSI: ffff88004b3f0500 RDI: +ffff88004b077940 +[46934.330061] RBP: ffff88005b72b830 R08: 00000000000000c0 R09: +ffff88004a83e000 +[46934.330062] R10: 000000000000ffff R11: ffff88004b1f12c0 R12: +ffff88004b3f0500 +[46934.330062] R13: ffff88004b3f0500 R14: 000000000000002a R15: +ffff88004b077940 +[46934.330063] FS: 00007fbd91a4c740(0000) GS:ffff88005f080000(0000) +knlGS:0000000000000000 +[46934.330064] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +[46934.330064] CR2: 00007f803a8bb000 CR3: 000000004b2c9000 CR4: +00000000000406e0 +[46934.330069] Stack: +[46934.330071] ffffffff811e6169 00000000e772fa05 ffff88004b077000 +ffff88004b3f0500 +[46934.330072] ffffffff81d17d18 000000000000002a 0000000000000000 +ffff88005b72b8a0 +[46934.330073] ffffffff81620108 ffffffff8161fe0e ffff88005b72b8c4 +ffff88005b302000 +[46934.330073] Call Trace: +[46934.330077] [] ? +__kmalloc_node_track_caller+0x119/0x300 +[46934.330084] [] dev_hard_start_xmit+0x188/0x410 +[46934.330086] [] ? harmonize_features+0x2e/0x90 +[46934.330088] [] __dev_queue_xmit+0x456/0x590 +[46934.330089] [] dev_queue_xmit+0x10/0x20 +[46934.330090] [] arp_xmit+0x22/0x60 +[46934.330091] [] arp_send.part.16+0x30/0x40 +[46934.330092] [] arp_solicit+0x115/0x2b0 +[46934.330094] [] ? copy_skb_header+0x17/0xa0 +[46934.330096] [] neigh_probe+0x4a/0x70 +[46934.330097] [] __neigh_event_send+0xac/0x230 +[46934.330098] [] neigh_resolve_output+0x13b/0x220 +[46934.330100] [] ? ip_forward_options+0x1c0/0x1c0 +[46934.330101] [] ip_finish_output+0x1f8/0x860 +[46934.330102] [] ip_output+0x58/0x90 +[46934.330103] [] ? __ip_local_out+0xa2/0xb0 +[46934.330104] [] ip_local_out_sk+0x30/0x40 +[46934.330105] [] ip_send_skb+0x16/0x50 +[46934.330106] [] ip_push_pending_frames+0x33/0x40 +[46934.330107] [] raw_sendmsg+0x88c/0xa30 +[46934.330110] [] ? skb_recv_datagram+0x41/0x60 +[46934.330111] [] ? raw_recvmsg+0xa9/0x1f0 +[46934.330113] [] inet_sendmsg+0x74/0xc0 +[46934.330114] [] ? inet_recvmsg+0x8b/0xb0 +[46934.330115] bond0: Adding slave eth2 +[46934.330116] [] sock_sendmsg+0x9c/0xe0 +[46934.330118] [] ? +move_addr_to_kernel.part.20+0x28/0x80 +[46934.330121] [] ? might_fault+0x47/0x50 +[46934.330122] [] ___sys_sendmsg+0x3a9/0x3c0 +[46934.330125] [] ? n_tty_write+0x3aa/0x530 +[46934.330127] [] ? __wake_up+0x44/0x50 +[46934.330129] [] ? fsnotify+0x238/0x310 +[46934.330130] [] __sys_sendmsg+0x51/0x90 +[46934.330131] [] SyS_sendmsg+0x12/0x20 +[46934.330134] [] system_call_fastpath+0x16/0x1b +[46934.330144] Code: 48 8b 10 4c 89 ee 4c 89 ff e8 aa bc ff ff 31 c0 e9 +1a ff ff ff 0f 1f 00 4c 89 ee 4c 89 ff e8 65 fb ff ff 31 d2 4c 89 ee 4c +89 ff b3 64 09 00 00 e8 02 bd ff ff 31 c0 e9 f2 fe ff ff 0f 1f 00 +[46934.330146] RIP [] bond_start_xmit+0x1c3/0x450 +[bonding] +[46934.330146] RSP + +CC: Eric Dumazet +CC: Andy Gospodarek +CC: Jay Vosburgh +CC: Veaceslav Falico +Fixes: 278b208375 ("bonding: initial RCU conversion") +Signed-off-by: Nikolay Aleksandrov +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/bonding/bond_main.c | 17 ++++++++++++++--- + 1 file changed, 14 insertions(+), 3 deletions(-) + +--- a/drivers/net/bonding/bond_main.c ++++ b/drivers/net/bonding/bond_main.c +@@ -3667,8 +3667,14 @@ static int bond_xmit_roundrobin(struct s + else + bond_xmit_slave_id(bond, skb, 0); + } else { +- slave_id = bond_rr_gen_slave_id(bond); +- bond_xmit_slave_id(bond, skb, slave_id % bond->slave_cnt); ++ int slave_cnt = ACCESS_ONCE(bond->slave_cnt); ++ ++ if (likely(slave_cnt)) { ++ slave_id = bond_rr_gen_slave_id(bond); ++ bond_xmit_slave_id(bond, skb, slave_id % slave_cnt); ++ } else { ++ dev_kfree_skb_any(skb); ++ } + } + + return NETDEV_TX_OK; +@@ -3699,8 +3705,13 @@ static int bond_xmit_activebackup(struct + static int bond_xmit_xor(struct sk_buff *skb, struct net_device *bond_dev) + { + struct bonding *bond = netdev_priv(bond_dev); ++ int slave_cnt = ACCESS_ONCE(bond->slave_cnt); + +- bond_xmit_slave_id(bond, skb, bond_xmit_hash(bond, skb) % bond->slave_cnt); ++ if (likely(slave_cnt)) ++ bond_xmit_slave_id(bond, skb, ++ bond_xmit_hash(bond, skb) % slave_cnt); ++ else ++ dev_kfree_skb_any(skb); + + return NETDEV_TX_OK; + } diff --git a/queue-3.16/bridge-check-if-vlan-filtering-is-enabled-only-once.patch b/queue-3.16/bridge-check-if-vlan-filtering-is-enabled-only-once.patch new file mode 100644 index 00000000000..00db24dec79 --- /dev/null +++ b/queue-3.16/bridge-check-if-vlan-filtering-is-enabled-only-once.patch @@ -0,0 +1,94 @@ +From foo@baz Sun Oct 12 20:11:55 CEST 2014 +From: Vlad Yasevich +Date: Fri, 12 Sep 2014 16:26:16 -0400 +Subject: bridge: Check if vlan filtering is enabled only once. + +From: Vlad Yasevich + +[ Upstream commit 20adfa1a81af00bf2027644507ad4fa9cd2849cf ] + +The bridge code checks if vlan filtering is enabled on both +ingress and egress. When the state flip happens, it +is possible for the bridge to currently be forwarding packets +and forwarding behavior becomes non-deterministic. Bridge +may drop packets on some interfaces, but not others. + +This patch solves this by caching the filtered state of the +packet into skb_cb on ingress. The skb_cb is guaranteed to +not be over-written between the time packet entres bridge +forwarding path and the time it leaves it. On egress, we +can then check the cached state to see if we need to +apply filtering information. + +Signed-off-by: Vladislav Yasevich +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/bridge/br_private.h | 3 +++ + net/bridge/br_vlan.c | 14 ++++++++++---- + 2 files changed, 13 insertions(+), 4 deletions(-) + +--- a/net/bridge/br_private.h ++++ b/net/bridge/br_private.h +@@ -309,6 +309,9 @@ struct br_input_skb_cb { + int igmp; + int mrouters_only; + #endif ++#ifdef CONFIG_BRIDGE_VLAN_FILTERING ++ bool vlan_filtered; ++#endif + }; + + #define BR_INPUT_SKB_CB(__skb) ((struct br_input_skb_cb *)(__skb)->cb) +--- a/net/bridge/br_vlan.c ++++ b/net/bridge/br_vlan.c +@@ -127,7 +127,8 @@ struct sk_buff *br_handle_vlan(struct ne + { + u16 vid; + +- if (!br->vlan_enabled) ++ /* If this packet was not filtered at input, let it pass */ ++ if (!BR_INPUT_SKB_CB(skb)->vlan_filtered) + goto out; + + /* Vlan filter table must be configured at this point. The +@@ -166,8 +167,10 @@ bool br_allowed_ingress(struct net_bridg + /* If VLAN filtering is disabled on the bridge, all packets are + * permitted. + */ +- if (!br->vlan_enabled) ++ if (!br->vlan_enabled) { ++ BR_INPUT_SKB_CB(skb)->vlan_filtered = false; + return true; ++ } + + /* If there are no vlan in the permitted list, all packets are + * rejected. +@@ -175,6 +178,7 @@ bool br_allowed_ingress(struct net_bridg + if (!v) + goto drop; + ++ BR_INPUT_SKB_CB(skb)->vlan_filtered = true; + proto = br->vlan_proto; + + /* If vlan tx offload is disabled on bridge device and frame was +@@ -253,7 +257,8 @@ bool br_allowed_egress(struct net_bridge + { + u16 vid; + +- if (!br->vlan_enabled) ++ /* If this packet was not filtered at input, let it pass */ ++ if (!BR_INPUT_SKB_CB(skb)->vlan_filtered) + return true; + + if (!v) +@@ -272,7 +277,8 @@ bool br_should_learn(struct net_bridge_p + struct net_bridge *br = p->br; + struct net_port_vlans *v; + +- if (!br->vlan_enabled) ++ /* If filtering was disabled at input, let it pass. */ ++ if (!BR_INPUT_SKB_CB(skb)->vlan_filtered) + return true; + + v = rcu_dereference(p->vlan_info); diff --git a/queue-3.16/bridge-fix-br_should_learn-to-check-vlan_enabled.patch b/queue-3.16/bridge-fix-br_should_learn-to-check-vlan_enabled.patch new file mode 100644 index 00000000000..ed71092eaf0 --- /dev/null +++ b/queue-3.16/bridge-fix-br_should_learn-to-check-vlan_enabled.patch @@ -0,0 +1,35 @@ +From foo@baz Sun Oct 12 20:11:55 CEST 2014 +From: Vlad Yasevich +Date: Mon, 15 Sep 2014 15:24:26 -0400 +Subject: bridge: Fix br_should_learn to check vlan_enabled + +From: Vlad Yasevich + +[ Upstream commit c095f248e63ada504dd90c90baae673ae10ee3fe ] + +As Toshiaki Makita pointed out, the BRIDGE_INPUT_SKB_CB will +not be initialized in br_should_learn() as that function +is called only from br_handle_local_finish(). That is +an input handler for link-local ethernet traffic so it perfectly +correct to check br->vlan_enabled here. + +Reported-by: Toshiaki Makita +Fixes: 20adfa1 bridge: Check if vlan filtering is enabled only once. +Signed-off-by: Vladislav Yasevich +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/bridge/br_vlan.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/bridge/br_vlan.c ++++ b/net/bridge/br_vlan.c +@@ -278,7 +278,7 @@ bool br_should_learn(struct net_bridge_p + struct net_port_vlans *v; + + /* If filtering was disabled at input, let it pass. */ +- if (!BR_INPUT_SKB_CB(skb)->vlan_filtered) ++ if (!br->vlan_enabled) + return true; + + v = rcu_dereference(p->vlan_info); diff --git a/queue-3.16/gro-fix-aggregation-for-skb-using-frag_list.patch b/queue-3.16/gro-fix-aggregation-for-skb-using-frag_list.patch new file mode 100644 index 00000000000..8202b8abec0 --- /dev/null +++ b/queue-3.16/gro-fix-aggregation-for-skb-using-frag_list.patch @@ -0,0 +1,41 @@ +From foo@baz Sun Oct 12 20:11:55 CEST 2014 +From: Eric Dumazet +Date: Mon, 29 Sep 2014 10:34:29 -0700 +Subject: gro: fix aggregation for skb using frag_list + +From: Eric Dumazet + +[ Upstream commit 73d3fe6d1c6d840763ceafa9afae0aaafa18c4b5 ] + +In commit 8a29111c7ca6 ("net: gro: allow to build full sized skb") +I added a regression for linear skb that traditionally force GRO +to use the frag_list fallback. + +Erez Shitrit found that at most two segments were aggregated and +the "if (skb_gro_len(p) != pinfo->gso_size)" test was failing. + +This is because pinfo at this spot still points to the last skb in the +chain, instead of the first one, where we find the correct gso_size +information. + +Signed-off-by: Eric Dumazet +Fixes: 8a29111c7ca6 ("net: gro: allow to build full sized skb") +Reported-by: Erez Shitrit +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/skbuff.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/net/core/skbuff.c ++++ b/net/core/skbuff.c +@@ -3152,6 +3152,9 @@ int skb_gro_receive(struct sk_buff **hea + NAPI_GRO_CB(skb)->free = NAPI_GRO_FREE_STOLEN_HEAD; + goto done; + } ++ /* switch back to head shinfo */ ++ pinfo = skb_shinfo(p); ++ + if (pinfo->frag_list) + goto merge; + if (skb_gro_len(p) != pinfo->gso_size) diff --git a/queue-3.16/hyperv-fix-a-bug-in-netvsc_send.patch b/queue-3.16/hyperv-fix-a-bug-in-netvsc_send.patch new file mode 100644 index 00000000000..6cabadfb263 --- /dev/null +++ b/queue-3.16/hyperv-fix-a-bug-in-netvsc_send.patch @@ -0,0 +1,65 @@ +From foo@baz Sun Oct 12 20:11:55 CEST 2014 +From: KY Srinivasan +Date: Sun, 5 Oct 2014 10:42:51 -0700 +Subject: hyperv: Fix a bug in netvsc_send() + +From: KY Srinivasan + +[ Upstream commit 3a67c9ccad926a168d8b7891537a452018368a5b ] + +After the packet is successfully sent, we should not touch the packet +as it may have been freed. This patch is based on the work done by +Long Li . + +David, please queue this up for stable. + +Signed-off-by: K. Y. Srinivasan +Reported-by: Sitsofe Wheeler +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/hyperv/netvsc.c | 15 ++++++++------- + 1 file changed, 8 insertions(+), 7 deletions(-) + +--- a/drivers/net/hyperv/netvsc.c ++++ b/drivers/net/hyperv/netvsc.c +@@ -708,6 +708,7 @@ int netvsc_send(struct hv_device *device + unsigned int section_index = NETVSC_INVALID_INDEX; + u32 msg_size = 0; + struct sk_buff *skb; ++ u16 q_idx = packet->q_idx; + + + net_device = get_outbound_net_device(device); +@@ -772,24 +773,24 @@ int netvsc_send(struct hv_device *device + + if (ret == 0) { + atomic_inc(&net_device->num_outstanding_sends); +- atomic_inc(&net_device->queue_sends[packet->q_idx]); ++ atomic_inc(&net_device->queue_sends[q_idx]); + + if (hv_ringbuf_avail_percent(&out_channel->outbound) < + RING_AVAIL_PERCENT_LOWATER) { + netif_tx_stop_queue(netdev_get_tx_queue( +- ndev, packet->q_idx)); ++ ndev, q_idx)); + + if (atomic_read(&net_device-> +- queue_sends[packet->q_idx]) < 1) ++ queue_sends[q_idx]) < 1) + netif_tx_wake_queue(netdev_get_tx_queue( +- ndev, packet->q_idx)); ++ ndev, q_idx)); + } + } else if (ret == -EAGAIN) { + netif_tx_stop_queue(netdev_get_tx_queue( +- ndev, packet->q_idx)); +- if (atomic_read(&net_device->queue_sends[packet->q_idx]) < 1) { ++ ndev, q_idx)); ++ if (atomic_read(&net_device->queue_sends[q_idx]) < 1) { + netif_tx_wake_queue(netdev_get_tx_queue( +- ndev, packet->q_idx)); ++ ndev, q_idx)); + ret = -ENOSPC; + } + } else { diff --git a/queue-3.16/hyperv-fix-a-bug-in-netvsc_start_xmit.patch b/queue-3.16/hyperv-fix-a-bug-in-netvsc_start_xmit.patch new file mode 100644 index 00000000000..5cdbb296f2b --- /dev/null +++ b/queue-3.16/hyperv-fix-a-bug-in-netvsc_start_xmit.patch @@ -0,0 +1,44 @@ +From foo@baz Sun Oct 12 20:11:55 CEST 2014 +From: KY Srinivasan +Date: Sun, 28 Sep 2014 22:16:43 -0700 +Subject: hyperv: Fix a bug in netvsc_start_xmit() + +From: KY Srinivasan + +[ Upstream commit dedb845ded56ded1c62f5398a94ffa8615d4592d ] + +After the packet is successfully sent, we should not touch the skb +as it may have been freed. This patch is based on the work done by +Long Li . + +In this version of the patch I have fixed issues pointed out by David. +David, please queue this up for stable. + +Signed-off-by: K. Y. Srinivasan +Tested-by: Long Li +Tested-by: Sitsofe Wheeler +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/hyperv/netvsc_drv.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/drivers/net/hyperv/netvsc_drv.c ++++ b/drivers/net/hyperv/netvsc_drv.c +@@ -387,6 +387,7 @@ static int netvsc_start_xmit(struct sk_b + int hdr_offset; + u32 net_trans_info; + u32 hash; ++ u32 skb_length = skb->len; + + + /* We will atmost need two pages to describe the rndis +@@ -562,7 +563,7 @@ do_send: + + drop: + if (ret == 0) { +- net->stats.tx_bytes += skb->len; ++ net->stats.tx_bytes += skb_length; + net->stats.tx_packets++; + } else { + kfree(packet); diff --git a/queue-3.16/ip6_gre-fix-flowi6_proto-value-in-xmit-path.patch b/queue-3.16/ip6_gre-fix-flowi6_proto-value-in-xmit-path.patch new file mode 100644 index 00000000000..780628a88e7 --- /dev/null +++ b/queue-3.16/ip6_gre-fix-flowi6_proto-value-in-xmit-path.patch @@ -0,0 +1,42 @@ +From foo@baz Sun Oct 12 20:11:55 CEST 2014 +From: Nicolas Dichtel +Date: Thu, 2 Oct 2014 18:26:49 +0200 +Subject: ip6_gre: fix flowi6_proto value in xmit path + +From: Nicolas Dichtel + +[ Upstream commit 3be07244b7337760a3269d56b2f4a63e72218648 ] + +In xmit path, we build a flowi6 which will be used for the output route lookup. +We are sending a GRE packet, neither IPv4 nor IPv6 encapsulated packet, thus the +protocol should be IPPROTO_GRE. + +Fixes: c12b395a4664 ("gre: Support GRE over IPv6") +Reported-by: Matthieu Ternisien d'Ouville +Signed-off-by: Nicolas Dichtel +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/ip6_gre.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/net/ipv6/ip6_gre.c ++++ b/net/ipv6/ip6_gre.c +@@ -778,7 +778,7 @@ static inline int ip6gre_xmit_ipv4(struc + encap_limit = t->parms.encap_limit; + + memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6)); +- fl6.flowi6_proto = IPPROTO_IPIP; ++ fl6.flowi6_proto = IPPROTO_GRE; + + dsfield = ipv4_get_dsfield(iph); + +@@ -828,7 +828,7 @@ static inline int ip6gre_xmit_ipv6(struc + encap_limit = t->parms.encap_limit; + + memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6)); +- fl6.flowi6_proto = IPPROTO_IPV6; ++ fl6.flowi6_proto = IPPROTO_GRE; + + dsfield = ipv6_get_dsfield(ipv6h); + if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS) diff --git a/queue-3.16/ip_tunnel-don-t-allow-to-add-the-same-tunnel-multiple-times.patch b/queue-3.16/ip_tunnel-don-t-allow-to-add-the-same-tunnel-multiple-times.patch new file mode 100644 index 00000000000..acb4098e066 --- /dev/null +++ b/queue-3.16/ip_tunnel-don-t-allow-to-add-the-same-tunnel-multiple-times.patch @@ -0,0 +1,45 @@ +From foo@baz Sun Oct 12 20:11:55 CEST 2014 +From: Steffen Klassert +Date: Mon, 22 Sep 2014 09:11:08 +0200 +Subject: ip_tunnel: Don't allow to add the same tunnel multiple times. + +From: Steffen Klassert + +[ Upstream commit d61746b2e71bf612fb397b00242de5df5ba7f29a ] + +When we try to add an already existing tunnel, we don't return +an error. Instead we continue and call ip_tunnel_update(). +This means that we can change existing tunnels by adding +the same tunnel multiple times. It is even possible to change +the tunnel endpoints of the fallback device. + +We fix this by returning an error if we try to add an existing +tunnel. + +Signed-off-by: Steffen Klassert +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/ip_tunnel.c | 11 ++++++++--- + 1 file changed, 8 insertions(+), 3 deletions(-) + +--- a/net/ipv4/ip_tunnel.c ++++ b/net/ipv4/ip_tunnel.c +@@ -764,9 +764,14 @@ int ip_tunnel_ioctl(struct net_device *d + + t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type); + +- if (!t && (cmd == SIOCADDTUNNEL)) { +- t = ip_tunnel_create(net, itn, p); +- err = PTR_ERR_OR_ZERO(t); ++ if (cmd == SIOCADDTUNNEL) { ++ if (!t) { ++ t = ip_tunnel_create(net, itn, p); ++ err = PTR_ERR_OR_ZERO(t); ++ break; ++ } ++ ++ err = -EEXIST; + break; + } + if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) { diff --git a/queue-3.16/ipv6-fix-rtnl-locking-in-setsockopt-for-anycast-and-multicast.patch b/queue-3.16/ipv6-fix-rtnl-locking-in-setsockopt-for-anycast-and-multicast.patch new file mode 100644 index 00000000000..c5743da3125 --- /dev/null +++ b/queue-3.16/ipv6-fix-rtnl-locking-in-setsockopt-for-anycast-and-multicast.patch @@ -0,0 +1,254 @@ +From foo@baz Sun Oct 12 20:11:55 CEST 2014 +From: Sabrina Dubroca +Date: Tue, 2 Sep 2014 10:29:29 +0200 +Subject: ipv6: fix rtnl locking in setsockopt for anycast and multicast + +From: Sabrina Dubroca + +[ Upstream commit a9ed4a2986e13011fcf4ed2d1a1647c53112f55b ] + +Calling setsockopt with IPV6_JOIN_ANYCAST or IPV6_LEAVE_ANYCAST +triggers the assertion in addrconf_join_solict()/addrconf_leave_solict() + +ipv6_sock_ac_join(), ipv6_sock_ac_drop(), ipv6_sock_ac_close() need to +take RTNL before calling ipv6_dev_ac_inc/dec. Same thing with +ipv6_sock_mc_join(), ipv6_sock_mc_drop(), ipv6_sock_mc_close() before +calling ipv6_dev_mc_inc/dec. + +This patch moves ASSERT_RTNL() up a level in the call stack. + +Signed-off-by: Cong Wang +Signed-off-by: Sabrina Dubroca +Reported-by: Tommi Rantala +Acked-by: Hannes Frederic Sowa +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/addrconf.c | 15 +++++---------- + net/ipv6/anycast.c | 12 ++++++++++++ + net/ipv6/mcast.c | 14 ++++++++++++++ + 3 files changed, 31 insertions(+), 10 deletions(-) + +--- a/net/ipv6/addrconf.c ++++ b/net/ipv6/addrconf.c +@@ -1679,14 +1679,12 @@ void addrconf_dad_failure(struct inet6_i + addrconf_mod_dad_work(ifp, 0); + } + +-/* Join to solicited addr multicast group. */ +- ++/* Join to solicited addr multicast group. ++ * caller must hold RTNL */ + void addrconf_join_solict(struct net_device *dev, const struct in6_addr *addr) + { + struct in6_addr maddr; + +- ASSERT_RTNL(); +- + if (dev->flags&(IFF_LOOPBACK|IFF_NOARP)) + return; + +@@ -1694,12 +1692,11 @@ void addrconf_join_solict(struct net_dev + ipv6_dev_mc_inc(dev, &maddr); + } + ++/* caller must hold RTNL */ + void addrconf_leave_solict(struct inet6_dev *idev, const struct in6_addr *addr) + { + struct in6_addr maddr; + +- ASSERT_RTNL(); +- + if (idev->dev->flags&(IFF_LOOPBACK|IFF_NOARP)) + return; + +@@ -1707,12 +1704,11 @@ void addrconf_leave_solict(struct inet6_ + __ipv6_dev_mc_dec(idev, &maddr); + } + ++/* caller must hold RTNL */ + static void addrconf_join_anycast(struct inet6_ifaddr *ifp) + { + struct in6_addr addr; + +- ASSERT_RTNL(); +- + if (ifp->prefix_len >= 127) /* RFC 6164 */ + return; + ipv6_addr_prefix(&addr, &ifp->addr, ifp->prefix_len); +@@ -1721,12 +1717,11 @@ static void addrconf_join_anycast(struct + ipv6_dev_ac_inc(ifp->idev->dev, &addr); + } + ++/* caller must hold RTNL */ + static void addrconf_leave_anycast(struct inet6_ifaddr *ifp) + { + struct in6_addr addr; + +- ASSERT_RTNL(); +- + if (ifp->prefix_len >= 127) /* RFC 6164 */ + return; + ipv6_addr_prefix(&addr, &ifp->addr, ifp->prefix_len); +--- a/net/ipv6/anycast.c ++++ b/net/ipv6/anycast.c +@@ -77,6 +77,7 @@ int ipv6_sock_ac_join(struct sock *sk, i + pac->acl_next = NULL; + pac->acl_addr = *addr; + ++ rtnl_lock(); + rcu_read_lock(); + if (ifindex == 0) { + struct rt6_info *rt; +@@ -137,6 +138,7 @@ int ipv6_sock_ac_join(struct sock *sk, i + + error: + rcu_read_unlock(); ++ rtnl_unlock(); + if (pac) + sock_kfree_s(sk, pac, sizeof(*pac)); + return err; +@@ -171,13 +173,17 @@ int ipv6_sock_ac_drop(struct sock *sk, i + + spin_unlock_bh(&ipv6_sk_ac_lock); + ++ rtnl_lock(); + rcu_read_lock(); + dev = dev_get_by_index_rcu(net, pac->acl_ifindex); + if (dev) + ipv6_dev_ac_dec(dev, &pac->acl_addr); + rcu_read_unlock(); ++ rtnl_unlock(); + + sock_kfree_s(sk, pac, sizeof(*pac)); ++ if (!dev) ++ return -ENODEV; + return 0; + } + +@@ -198,6 +204,7 @@ void ipv6_sock_ac_close(struct sock *sk) + spin_unlock_bh(&ipv6_sk_ac_lock); + + prev_index = 0; ++ rtnl_lock(); + rcu_read_lock(); + while (pac) { + struct ipv6_ac_socklist *next = pac->acl_next; +@@ -212,6 +219,7 @@ void ipv6_sock_ac_close(struct sock *sk) + pac = next; + } + rcu_read_unlock(); ++ rtnl_unlock(); + } + + static void aca_put(struct ifacaddr6 *ac) +@@ -233,6 +241,8 @@ int ipv6_dev_ac_inc(struct net_device *d + struct rt6_info *rt; + int err; + ++ ASSERT_RTNL(); ++ + idev = in6_dev_get(dev); + + if (idev == NULL) +@@ -302,6 +312,8 @@ int __ipv6_dev_ac_dec(struct inet6_dev * + { + struct ifacaddr6 *aca, *prev_aca; + ++ ASSERT_RTNL(); ++ + write_lock_bh(&idev->lock); + prev_aca = NULL; + for (aca = idev->ac_list; aca; aca = aca->aca_next) { +--- a/net/ipv6/mcast.c ++++ b/net/ipv6/mcast.c +@@ -172,6 +172,7 @@ int ipv6_sock_mc_join(struct sock *sk, i + mc_lst->next = NULL; + mc_lst->addr = *addr; + ++ rtnl_lock(); + rcu_read_lock(); + if (ifindex == 0) { + struct rt6_info *rt; +@@ -185,6 +186,7 @@ int ipv6_sock_mc_join(struct sock *sk, i + + if (dev == NULL) { + rcu_read_unlock(); ++ rtnl_unlock(); + sock_kfree_s(sk, mc_lst, sizeof(*mc_lst)); + return -ENODEV; + } +@@ -202,6 +204,7 @@ int ipv6_sock_mc_join(struct sock *sk, i + + if (err) { + rcu_read_unlock(); ++ rtnl_unlock(); + sock_kfree_s(sk, mc_lst, sizeof(*mc_lst)); + return err; + } +@@ -212,6 +215,7 @@ int ipv6_sock_mc_join(struct sock *sk, i + spin_unlock(&ipv6_sk_mc_lock); + + rcu_read_unlock(); ++ rtnl_unlock(); + + return 0; + } +@@ -229,6 +233,7 @@ int ipv6_sock_mc_drop(struct sock *sk, i + if (!ipv6_addr_is_multicast(addr)) + return -EINVAL; + ++ rtnl_lock(); + spin_lock(&ipv6_sk_mc_lock); + for (lnk = &np->ipv6_mc_list; + (mc_lst = rcu_dereference_protected(*lnk, +@@ -252,12 +257,15 @@ int ipv6_sock_mc_drop(struct sock *sk, i + } else + (void) ip6_mc_leave_src(sk, mc_lst, NULL); + rcu_read_unlock(); ++ rtnl_unlock(); ++ + atomic_sub(sizeof(*mc_lst), &sk->sk_omem_alloc); + kfree_rcu(mc_lst, rcu); + return 0; + } + } + spin_unlock(&ipv6_sk_mc_lock); ++ rtnl_unlock(); + + return -EADDRNOTAVAIL; + } +@@ -302,6 +310,7 @@ void ipv6_sock_mc_close(struct sock *sk) + if (!rcu_access_pointer(np->ipv6_mc_list)) + return; + ++ rtnl_lock(); + spin_lock(&ipv6_sk_mc_lock); + while ((mc_lst = rcu_dereference_protected(np->ipv6_mc_list, + lockdep_is_held(&ipv6_sk_mc_lock))) != NULL) { +@@ -328,6 +337,7 @@ void ipv6_sock_mc_close(struct sock *sk) + spin_lock(&ipv6_sk_mc_lock); + } + spin_unlock(&ipv6_sk_mc_lock); ++ rtnl_unlock(); + } + + int ip6_mc_source(int add, int omode, struct sock *sk, +@@ -845,6 +855,8 @@ int ipv6_dev_mc_inc(struct net_device *d + struct ifmcaddr6 *mc; + struct inet6_dev *idev; + ++ ASSERT_RTNL(); ++ + /* we need to take a reference on idev */ + idev = in6_dev_get(dev); + +@@ -916,6 +928,8 @@ int __ipv6_dev_mc_dec(struct inet6_dev * + { + struct ifmcaddr6 *ma, **map; + ++ ASSERT_RTNL(); ++ + write_lock_bh(&idev->lock); + for (map = &idev->mc_list; (ma=*map) != NULL; map = &ma->next) { + if (ipv6_addr_equal(&ma->mca_addr, addr)) { diff --git a/queue-3.16/ipv6-remove-rt6i_genid.patch b/queue-3.16/ipv6-remove-rt6i_genid.patch new file mode 100644 index 00000000000..6e142134197 --- /dev/null +++ b/queue-3.16/ipv6-remove-rt6i_genid.patch @@ -0,0 +1,180 @@ +From foo@baz Sun Oct 12 20:11:55 CEST 2014 +From: Hannes Frederic Sowa +Date: Sun, 28 Sep 2014 00:46:06 +0200 +Subject: ipv6: remove rt6i_genid + +From: Hannes Frederic Sowa + +[ Upstream commit 705f1c869d577c8055736dd02501f26a2507dd5b ] + +Eric Dumazet noticed that all no-nonexthop or no-gateway routes which +are already marked DST_HOST (e.g. input routes routes) will always be +invalidated during sk_dst_check. Thus per-socket dst caching absolutely +had no effect and early demuxing had no effect. + +Thus this patch removes rt6i_genid: fn_sernum already gets modified during +add operations, so we only must ensure we mutate fn_sernum during ipv6 +address remove operations. This is a fairly cost extensive operations, +but address removal should not happen that often. Also our mtu update +functions do the same and we heard no complains so far. xfrm policy +changes also cause a call into fib6_flush_trees. Also plug a hole in +rt6_info (no cacheline changes). + +I verified via tracing that this change has effect. + +Cc: Eric Dumazet +Cc: YOSHIFUJI Hideaki +Cc: Vlad Yasevich +Cc: Nicolas Dichtel +Cc: Martin Lau +Signed-off-by: Hannes Frederic Sowa +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/ip6_fib.h | 5 +---- + include/net/net_namespace.h | 20 +++----------------- + net/ipv6/addrconf.c | 3 ++- + net/ipv6/addrconf_core.c | 7 +++++++ + net/ipv6/ip6_fib.c | 20 ++++++++++++++++++++ + net/ipv6/route.c | 4 ---- + 6 files changed, 33 insertions(+), 26 deletions(-) + +--- a/include/net/ip6_fib.h ++++ b/include/net/ip6_fib.h +@@ -114,16 +114,13 @@ struct rt6_info { + u32 rt6i_flags; + struct rt6key rt6i_src; + struct rt6key rt6i_prefsrc; +- u32 rt6i_metric; + + struct inet6_dev *rt6i_idev; + unsigned long _rt6i_peer; + +- u32 rt6i_genid; +- ++ u32 rt6i_metric; + /* more non-fragment space at head required */ + unsigned short rt6i_nfheader_len; +- + u8 rt6i_protocol; + }; + +--- a/include/net/net_namespace.h ++++ b/include/net/net_namespace.h +@@ -352,26 +352,12 @@ static inline void rt_genid_bump_ipv4(st + atomic_inc(&net->ipv4.rt_genid); + } + +-#if IS_ENABLED(CONFIG_IPV6) +-static inline int rt_genid_ipv6(struct net *net) +-{ +- return atomic_read(&net->ipv6.rt_genid); +-} +- +-static inline void rt_genid_bump_ipv6(struct net *net) +-{ +- atomic_inc(&net->ipv6.rt_genid); +-} +-#else +-static inline int rt_genid_ipv6(struct net *net) +-{ +- return 0; +-} +- ++extern void (*__fib6_flush_trees)(struct net *net); + static inline void rt_genid_bump_ipv6(struct net *net) + { ++ if (__fib6_flush_trees) ++ __fib6_flush_trees(net); + } +-#endif + + #if IS_ENABLED(CONFIG_IEEE802154_6LOWPAN) + static inline struct netns_ieee802154_lowpan * +--- a/net/ipv6/addrconf.c ++++ b/net/ipv6/addrconf.c +@@ -4746,10 +4746,11 @@ static void __ipv6_ifa_notify(int event, + + if (ip6_del_rt(ifp->rt)) + dst_free(&ifp->rt->dst); ++ ++ rt_genid_bump_ipv6(net); + break; + } + atomic_inc(&net->ipv6.dev_addr_genid); +- rt_genid_bump_ipv6(net); + } + + static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) +--- a/net/ipv6/addrconf_core.c ++++ b/net/ipv6/addrconf_core.c +@@ -8,6 +8,13 @@ + #include + #include + ++/* if ipv6 module registers this function is used by xfrm to force all ++ * sockets to relookup their nodes - this is fairly expensive, be ++ * careful ++ */ ++void (*__fib6_flush_trees)(struct net *); ++EXPORT_SYMBOL(__fib6_flush_trees); ++ + #define IPV6_ADDR_SCOPE_TYPE(scope) ((scope) << 16) + + static inline unsigned int ipv6_addr_scope2type(unsigned int scope) +--- a/net/ipv6/ip6_fib.c ++++ b/net/ipv6/ip6_fib.c +@@ -1605,6 +1605,24 @@ static void fib6_prune_clones(struct net + fib6_clean_tree(net, fn, fib6_prune_clone, 1, NULL); + } + ++static int fib6_update_sernum(struct rt6_info *rt, void *arg) ++{ ++ __u32 sernum = *(__u32 *)arg; ++ ++ if (rt->rt6i_node && ++ rt->rt6i_node->fn_sernum != sernum) ++ rt->rt6i_node->fn_sernum = sernum; ++ ++ return 0; ++} ++ ++static void fib6_flush_trees(struct net *net) ++{ ++ __u32 new_sernum = fib6_new_sernum(); ++ ++ fib6_clean_all(net, fib6_update_sernum, &new_sernum); ++} ++ + /* + * Garbage collection + */ +@@ -1788,6 +1806,8 @@ int __init fib6_init(void) + NULL); + if (ret) + goto out_unregister_subsys; ++ ++ __fib6_flush_trees = fib6_flush_trees; + out: + return ret; + +--- a/net/ipv6/route.c ++++ b/net/ipv6/route.c +@@ -314,7 +314,6 @@ static inline struct rt6_info *ip6_dst_a + + memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst)); + rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers); +- rt->rt6i_genid = rt_genid_ipv6(net); + INIT_LIST_HEAD(&rt->rt6i_siblings); + } + return rt; +@@ -1098,9 +1097,6 @@ static struct dst_entry *ip6_dst_check(s + * DST_OBSOLETE_FORCE_CHK which forces validation calls down + * into this function always. + */ +- if (rt->rt6i_genid != rt_genid_ipv6(dev_net(rt->dst.dev))) +- return NULL; +- + if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie)) + return NULL; + diff --git a/queue-3.16/ipv6-restore-the-behavior-of-ipv6_sock_ac_drop.patch b/queue-3.16/ipv6-restore-the-behavior-of-ipv6_sock_ac_drop.patch new file mode 100644 index 00000000000..6a5c2f36bbf --- /dev/null +++ b/queue-3.16/ipv6-restore-the-behavior-of-ipv6_sock_ac_drop.patch @@ -0,0 +1,38 @@ +From foo@baz Sun Oct 12 20:11:55 CEST 2014 +From: WANG Cong +Date: Fri, 5 Sep 2014 14:33:00 -0700 +Subject: ipv6: restore the behavior of ipv6_sock_ac_drop() + +From: WANG Cong + +[ Upstream commit de185ab46cb02df9738b0d898b0c3a89181c5526 ] + +It is possible that the interface is already gone after joining +the list of anycast on this interface as we don't hold a refcount +for the device, in this case we are safe to ignore the error. + +What's more important, for API compatibility we should not +change this behavior for applications even if it were correct. + +Fixes: commit a9ed4a2986e13011 ("ipv6: fix rtnl locking in setsockopt for anycast and multicast") +Cc: Sabrina Dubroca +Cc: David S. Miller +Signed-off-by: Cong Wang +Acked-by: Hannes Frederic Sowa +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/anycast.c | 2 -- + 1 file changed, 2 deletions(-) + +--- a/net/ipv6/anycast.c ++++ b/net/ipv6/anycast.c +@@ -182,8 +182,6 @@ int ipv6_sock_ac_drop(struct sock *sk, i + rtnl_unlock(); + + sock_kfree_s(sk, pac, sizeof(*pac)); +- if (!dev) +- return -ENODEV; + return 0; + } + diff --git a/queue-3.16/l2tp-fix-race-while-getting-pmtu-on-ppp-pseudo-wire.patch b/queue-3.16/l2tp-fix-race-while-getting-pmtu-on-ppp-pseudo-wire.patch new file mode 100644 index 00000000000..09d7ac440b1 --- /dev/null +++ b/queue-3.16/l2tp-fix-race-while-getting-pmtu-on-ppp-pseudo-wire.patch @@ -0,0 +1,76 @@ +From foo@baz Sun Oct 12 20:11:55 CEST 2014 +From: Guillaume Nault +Date: Wed, 3 Sep 2014 14:12:55 +0200 +Subject: l2tp: fix race while getting PMTU on PPP pseudo-wire + +From: Guillaume Nault + +[ Upstream commit eed4d839b0cdf9d84b0a9bc63de90fd5e1e886fb ] + +Use dst_entry held by sk_dst_get() to retrieve tunnel's PMTU. + +The dst_mtu(__sk_dst_get(tunnel->sock)) call was racy. __sk_dst_get() +could return NULL if tunnel->sock->sk_dst_cache was reset just before the +call, thus making dst_mtu() dereference a NULL pointer: + +[ 1937.661598] BUG: unable to handle kernel NULL pointer dereference at 0000000000000020 +[ 1937.664005] IP: [] pppol2tp_connect+0x33d/0x41e [l2tp_ppp] +[ 1937.664005] PGD daf0c067 PUD d9f93067 PMD 0 +[ 1937.664005] Oops: 0000 [#1] SMP +[ 1937.664005] Modules linked in: l2tp_ppp l2tp_netlink l2tp_core ip6table_filter ip6_tables iptable_filter ip_tables ebtable_nat ebtables x_tables udp_tunnel pppoe pppox ppp_generic slhc deflate ctr twofish_generic twofish_x86_64_3way xts lrw gf128mul glue_helper twofish_x86_64 twofish_common blowfish_generic blowfish_x86_64 blowfish_common des_generic cbc xcbc rmd160 sha512_generic hmac crypto_null af_key xfrm_algo 8021q garp bridge stp llc tun atmtcp clip atm ext3 mbcache jbd iTCO_wdt coretemp kvm_intel iTCO_vendor_support kvm pcspkr evdev ehci_pci lpc_ich mfd_core i5400_edac edac_core i5k_amb shpchp button processor thermal_sys xfs crc32c_generic libcrc32c dm_mod usbhid sg hid sr_mod sd_mod cdrom crc_t10dif crct10dif_common ata_generic ahci ata_piix tg3 libahci libata uhci_hcd ptp ehci_hcd pps_core usbcore scsi_mod libphy usb_common [last unloaded: l2tp_core] +[ 1937.664005] CPU: 0 PID: 10022 Comm: l2tpstress Tainted: G O 3.17.0-rc1 #1 +[ 1937.664005] Hardware name: HP ProLiant DL160 G5, BIOS O12 08/22/2008 +[ 1937.664005] task: ffff8800d8fda790 ti: ffff8800c43c4000 task.ti: ffff8800c43c4000 +[ 1937.664005] RIP: 0010:[] [] pppol2tp_connect+0x33d/0x41e [l2tp_ppp] +[ 1937.664005] RSP: 0018:ffff8800c43c7de8 EFLAGS: 00010282 +[ 1937.664005] RAX: ffff8800da8a7240 RBX: ffff8800d8c64600 RCX: 000001c325a137b5 +[ 1937.664005] RDX: 8c6318c6318c6320 RSI: 000000000000010c RDI: 0000000000000000 +[ 1937.664005] RBP: ffff8800c43c7ea8 R08: 0000000000000000 R09: 0000000000000000 +[ 1937.664005] R10: ffffffffa048e2c0 R11: ffff8800d8c64600 R12: ffff8800ca7a5000 +[ 1937.664005] R13: ffff8800c439bf40 R14: 000000000000000c R15: 0000000000000009 +[ 1937.664005] FS: 00007fd7f610f700(0000) GS:ffff88011a600000(0000) knlGS:0000000000000000 +[ 1937.664005] CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b +[ 1937.664005] CR2: 0000000000000020 CR3: 00000000d9d75000 CR4: 00000000000027e0 +[ 1937.664005] Stack: +[ 1937.664005] ffffffffa049da80 ffff8800d8fda790 000000000000005b ffff880000000009 +[ 1937.664005] ffff8800daf3f200 0000000000000003 ffff8800c43c7e48 ffffffff81109b57 +[ 1937.664005] ffffffff81109b0e ffffffff8114c566 0000000000000000 0000000000000000 +[ 1937.664005] Call Trace: +[ 1937.664005] [] ? pppol2tp_connect+0x235/0x41e [l2tp_ppp] +[ 1937.664005] [] ? might_fault+0x9e/0xa5 +[ 1937.664005] [] ? might_fault+0x55/0xa5 +[ 1937.664005] [] ? rcu_read_unlock+0x1c/0x26 +[ 1937.664005] [] SYSC_connect+0x87/0xb1 +[ 1937.664005] [] ? sysret_check+0x1b/0x56 +[ 1937.664005] [] ? trace_hardirqs_on_caller+0x145/0x1a1 +[ 1937.664005] [] ? trace_hardirqs_on_thunk+0x3a/0x3f +[ 1937.664005] [] ? spin_lock+0x9/0xb +[ 1937.664005] [] SyS_connect+0x9/0xb +[ 1937.664005] [] system_call_fastpath+0x16/0x1b +[ 1937.664005] Code: 10 2a 84 81 e8 65 76 bd e0 65 ff 0c 25 10 bb 00 00 4d 85 ed 74 37 48 8b 85 60 ff ff ff 48 8b 80 88 01 00 00 48 8b b8 10 02 00 00 <48> 8b 47 20 ff 50 20 85 c0 74 0f 83 e8 28 89 83 10 01 00 00 89 +[ 1937.664005] RIP [] pppol2tp_connect+0x33d/0x41e [l2tp_ppp] +[ 1937.664005] RSP +[ 1937.664005] CR2: 0000000000000020 +[ 1939.559375] ---[ end trace 82d44500f28f8708 ]--- + +Fixes: f34c4a35d879 ("l2tp: take PMTU from tunnel UDP socket") +Signed-off-by: Guillaume Nault +Acked-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/l2tp/l2tp_ppp.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/net/l2tp/l2tp_ppp.c ++++ b/net/l2tp/l2tp_ppp.c +@@ -755,7 +755,8 @@ static int pppol2tp_connect(struct socke + /* If PMTU discovery was enabled, use the MTU that was discovered */ + dst = sk_dst_get(tunnel->sock); + if (dst != NULL) { +- u32 pmtu = dst_mtu(__sk_dst_get(tunnel->sock)); ++ u32 pmtu = dst_mtu(dst); ++ + if (pmtu != 0) + session->mtu = session->mru = pmtu - + PPPOL2TP_HEADER_OVERHEAD; diff --git a/queue-3.16/macvlan-allow-to-enqueue-broadcast-pkt-on-virtual-device.patch b/queue-3.16/macvlan-allow-to-enqueue-broadcast-pkt-on-virtual-device.patch new file mode 100644 index 00000000000..d0bf250248a --- /dev/null +++ b/queue-3.16/macvlan-allow-to-enqueue-broadcast-pkt-on-virtual-device.patch @@ -0,0 +1,47 @@ +From foo@baz Sun Oct 12 20:11:55 CEST 2014 +From: Nicolas Dichtel +Date: Wed, 17 Sep 2014 10:08:08 +0200 +Subject: macvlan: allow to enqueue broadcast pkt on virtual device + +From: Nicolas Dichtel + +[ Upstream commit 07d92d5cc977a7fe1e683e1d4a6f723f7f2778cb ] + +Since commit 412ca1550cbe ("macvlan: Move broadcasts into a work queue"), the +driver uses tx_queue_len of the master device as the limit of packets enqueuing. +Problem is that virtual drivers have this value set to 0, thus all broadcast +packets were rejected. +Because tx_queue_len was arbitrarily chosen, I replace it with a static limit +of 1000 (also arbitrarily chosen). + +CC: Herbert Xu +Reported-by: Thibaut Collet +Suggested-by: Thibaut Collet +Tested-by: Thibaut Collet +Signed-off-by: Nicolas Dichtel +Acked-by: Herbert Xu +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/macvlan.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/drivers/net/macvlan.c ++++ b/drivers/net/macvlan.c +@@ -36,6 +36,7 @@ + #include + + #define MACVLAN_HASH_SIZE (1 << BITS_PER_BYTE) ++#define MACVLAN_BC_QUEUE_LEN 1000 + + struct macvlan_port { + struct net_device *dev; +@@ -248,7 +249,7 @@ static void macvlan_broadcast_enqueue(st + goto err; + + spin_lock(&port->bc_queue.lock); +- if (skb_queue_len(&port->bc_queue) < skb->dev->tx_queue_len) { ++ if (skb_queue_len(&port->bc_queue) < MACVLAN_BC_QUEUE_LEN) { + __skb_queue_tail(&port->bc_queue, nskb); + err = 0; + } diff --git a/queue-3.16/macvtap-fix-race-between-device-delete-and-open.patch b/queue-3.16/macvtap-fix-race-between-device-delete-and-open.patch new file mode 100644 index 00000000000..e7c82f52d5a --- /dev/null +++ b/queue-3.16/macvtap-fix-race-between-device-delete-and-open.patch @@ -0,0 +1,95 @@ +From foo@baz Sun Oct 12 20:11:55 CEST 2014 +From: Vlad Yasevich +Date: Mon, 22 Sep 2014 16:34:17 -0400 +Subject: macvtap: Fix race between device delete and open. + +From: Vlad Yasevich + +[ Upstream commit 40b8fe45d1f094e3babe7b2dc2b71557ab71401d ] + +In macvtap device delete and open calls can race and +this causes a list curruption of the vlan queue_list. + +The race intself is triggered by the idr accessors +that located the vlan device. The device is stored +into and removed from the idr under both an rtnl and +a mutex. However, when attempting to locate the device +in idr, only a mutex is taken. As a result, once cpu +perfoming a delete may take an rtnl and wait for the mutex, +while another cput doing an open() will take the idr +mutex first to fetch the device pointer and later take +an rtnl to add a queue for the device which may have +just gotten deleted. + +With this patch, we now hold the rtnl for the duration +of the macvtap_open() call thus making sure that +open will not race with delete. + +CC: Michael S. Tsirkin +CC: Jason Wang +Signed-off-by: Vladislav Yasevich +Acked-by: Jason Wang +Acked-by: Michael S. Tsirkin +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/macvtap.c | 18 ++++++++---------- + 1 file changed, 8 insertions(+), 10 deletions(-) + +--- a/drivers/net/macvtap.c ++++ b/drivers/net/macvtap.c +@@ -112,17 +112,15 @@ out: + return err; + } + ++/* Requires RTNL */ + static int macvtap_set_queue(struct net_device *dev, struct file *file, + struct macvtap_queue *q) + { + struct macvlan_dev *vlan = netdev_priv(dev); +- int err = -EBUSY; + +- rtnl_lock(); + if (vlan->numqueues == MAX_MACVTAP_QUEUES) +- goto out; ++ return -EBUSY; + +- err = 0; + rcu_assign_pointer(q->vlan, vlan); + rcu_assign_pointer(vlan->taps[vlan->numvtaps], q); + sock_hold(&q->sk); +@@ -136,9 +134,7 @@ static int macvtap_set_queue(struct net_ + vlan->numvtaps++; + vlan->numqueues++; + +-out: +- rtnl_unlock(); +- return err; ++ return 0; + } + + static int macvtap_disable_queue(struct macvtap_queue *q) +@@ -454,11 +450,12 @@ static void macvtap_sock_destruct(struct + static int macvtap_open(struct inode *inode, struct file *file) + { + struct net *net = current->nsproxy->net_ns; +- struct net_device *dev = dev_get_by_macvtap_minor(iminor(inode)); ++ struct net_device *dev; + struct macvtap_queue *q; +- int err; ++ int err = -ENODEV; + +- err = -ENODEV; ++ rtnl_lock(); ++ dev = dev_get_by_macvtap_minor(iminor(inode)); + if (!dev) + goto out; + +@@ -498,6 +495,7 @@ out: + if (dev) + dev_put(dev); + ++ rtnl_unlock(); + return err; + } + diff --git a/queue-3.16/myri10ge-check-for-dma-mapping-errors.patch b/queue-3.16/myri10ge-check-for-dma-mapping-errors.patch new file mode 100644 index 00000000000..bead3901d53 --- /dev/null +++ b/queue-3.16/myri10ge-check-for-dma-mapping-errors.patch @@ -0,0 +1,175 @@ +From foo@baz Sun Oct 12 20:11:55 CEST 2014 +From: Stanislaw Gruszka +Date: Tue, 12 Aug 2014 10:35:19 +0200 +Subject: myri10ge: check for DMA mapping errors + +From: Stanislaw Gruszka + +[ Upstream commit 10545937e866ccdbb7ab583031dbdcc6b14e4eb4 ] + +On IOMMU systems DMA mapping can fail, we need to check for +that possibility. + +Signed-off-by: Stanislaw Gruszka +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/myricom/myri10ge/myri10ge.c | 88 +++++++++++++++-------- + 1 file changed, 58 insertions(+), 30 deletions(-) + +--- a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c ++++ b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c +@@ -872,6 +872,10 @@ static int myri10ge_dma_test(struct myri + return -ENOMEM; + dmatest_bus = pci_map_page(mgp->pdev, dmatest_page, 0, PAGE_SIZE, + DMA_BIDIRECTIONAL); ++ if (unlikely(pci_dma_mapping_error(mgp->pdev, dmatest_bus))) { ++ __free_page(dmatest_page); ++ return -ENOMEM; ++ } + + /* Run a small DMA test. + * The magic multipliers to the length tell the firmware +@@ -1293,6 +1297,7 @@ myri10ge_alloc_rx_pages(struct myri10ge_ + int bytes, int watchdog) + { + struct page *page; ++ dma_addr_t bus; + int idx; + #if MYRI10GE_ALLOC_SIZE > 4096 + int end_offset; +@@ -1317,11 +1322,21 @@ myri10ge_alloc_rx_pages(struct myri10ge_ + rx->watchdog_needed = 1; + return; + } ++ ++ bus = pci_map_page(mgp->pdev, page, 0, ++ MYRI10GE_ALLOC_SIZE, ++ PCI_DMA_FROMDEVICE); ++ if (unlikely(pci_dma_mapping_error(mgp->pdev, bus))) { ++ __free_pages(page, MYRI10GE_ALLOC_ORDER); ++ if (rx->fill_cnt - rx->cnt < 16) ++ rx->watchdog_needed = 1; ++ return; ++ } ++ + rx->page = page; + rx->page_offset = 0; +- rx->bus = pci_map_page(mgp->pdev, page, 0, +- MYRI10GE_ALLOC_SIZE, +- PCI_DMA_FROMDEVICE); ++ rx->bus = bus; ++ + } + rx->info[idx].page = rx->page; + rx->info[idx].page_offset = rx->page_offset; +@@ -2763,6 +2778,35 @@ myri10ge_submit_req(struct myri10ge_tx_b + mb(); + } + ++static void myri10ge_unmap_tx_dma(struct myri10ge_priv *mgp, ++ struct myri10ge_tx_buf *tx, int idx) ++{ ++ unsigned int len; ++ int last_idx; ++ ++ /* Free any DMA resources we've alloced and clear out the skb slot */ ++ last_idx = (idx + 1) & tx->mask; ++ idx = tx->req & tx->mask; ++ do { ++ len = dma_unmap_len(&tx->info[idx], len); ++ if (len) { ++ if (tx->info[idx].skb != NULL) ++ pci_unmap_single(mgp->pdev, ++ dma_unmap_addr(&tx->info[idx], ++ bus), len, ++ PCI_DMA_TODEVICE); ++ else ++ pci_unmap_page(mgp->pdev, ++ dma_unmap_addr(&tx->info[idx], ++ bus), len, ++ PCI_DMA_TODEVICE); ++ dma_unmap_len_set(&tx->info[idx], len, 0); ++ tx->info[idx].skb = NULL; ++ } ++ idx = (idx + 1) & tx->mask; ++ } while (idx != last_idx); ++} ++ + /* + * Transmit a packet. We need to split the packet so that a single + * segment does not cross myri10ge->tx_boundary, so this makes segment +@@ -2786,7 +2830,7 @@ static netdev_tx_t myri10ge_xmit(struct + u32 low; + __be32 high_swapped; + unsigned int len; +- int idx, last_idx, avail, frag_cnt, frag_idx, count, mss, max_segments; ++ int idx, avail, frag_cnt, frag_idx, count, mss, max_segments; + u16 pseudo_hdr_offset, cksum_offset, queue; + int cum_len, seglen, boundary, rdma_count; + u8 flags, odd_flag; +@@ -2883,9 +2927,12 @@ again: + + /* map the skb for DMA */ + len = skb_headlen(skb); ++ bus = pci_map_single(mgp->pdev, skb->data, len, PCI_DMA_TODEVICE); ++ if (unlikely(pci_dma_mapping_error(mgp->pdev, bus))) ++ goto drop; ++ + idx = tx->req & tx->mask; + tx->info[idx].skb = skb; +- bus = pci_map_single(mgp->pdev, skb->data, len, PCI_DMA_TODEVICE); + dma_unmap_addr_set(&tx->info[idx], bus, bus); + dma_unmap_len_set(&tx->info[idx], len, len); + +@@ -2984,12 +3031,16 @@ again: + break; + + /* map next fragment for DMA */ +- idx = (count + tx->req) & tx->mask; + frag = &skb_shinfo(skb)->frags[frag_idx]; + frag_idx++; + len = skb_frag_size(frag); + bus = skb_frag_dma_map(&mgp->pdev->dev, frag, 0, len, + DMA_TO_DEVICE); ++ if (unlikely(pci_dma_mapping_error(mgp->pdev, bus))) { ++ myri10ge_unmap_tx_dma(mgp, tx, idx); ++ goto drop; ++ } ++ idx = (count + tx->req) & tx->mask; + dma_unmap_addr_set(&tx->info[idx], bus, bus); + dma_unmap_len_set(&tx->info[idx], len, len); + } +@@ -3020,31 +3071,8 @@ again: + return NETDEV_TX_OK; + + abort_linearize: +- /* Free any DMA resources we've alloced and clear out the skb +- * slot so as to not trip up assertions, and to avoid a +- * double-free if linearizing fails */ ++ myri10ge_unmap_tx_dma(mgp, tx, idx); + +- last_idx = (idx + 1) & tx->mask; +- idx = tx->req & tx->mask; +- tx->info[idx].skb = NULL; +- do { +- len = dma_unmap_len(&tx->info[idx], len); +- if (len) { +- if (tx->info[idx].skb != NULL) +- pci_unmap_single(mgp->pdev, +- dma_unmap_addr(&tx->info[idx], +- bus), len, +- PCI_DMA_TODEVICE); +- else +- pci_unmap_page(mgp->pdev, +- dma_unmap_addr(&tx->info[idx], +- bus), len, +- PCI_DMA_TODEVICE); +- dma_unmap_len_set(&tx->info[idx], len, 0); +- tx->info[idx].skb = NULL; +- } +- idx = (idx + 1) & tx->mask; +- } while (idx != last_idx); + if (skb_is_gso(skb)) { + netdev_err(mgp->dev, "TSO but wanted to linearize?!?!?\n"); + goto drop; diff --git a/queue-3.16/net-allow-macvlans-to-move-to-net-namespace.patch b/queue-3.16/net-allow-macvlans-to-move-to-net-namespace.patch new file mode 100644 index 00000000000..0f62225e538 --- /dev/null +++ b/queue-3.16/net-allow-macvlans-to-move-to-net-namespace.patch @@ -0,0 +1,46 @@ +From foo@baz Sun Oct 12 20:11:55 CEST 2014 +From: Francesco Ruggeri +Date: Wed, 17 Sep 2014 10:40:44 -0700 +Subject: net: allow macvlans to move to net namespace + +From: Francesco Ruggeri + +[ Upstream commit 0d0162e7a33d3710b9604e7c68c0f31f5c457428 ] + +I cannot move a macvlan interface created on top of a bonding interface +to a different namespace: + +% ip netns add dummy0 +% ip link add link bond0 mac0 type macvlan +% ip link set mac0 netns dummy0 +RTNETLINK answers: Invalid argument +% + +The problem seems to be that commit f9399814927a ("bonding: Don't allow +bond devices to change network namespaces.") sets NETIF_F_NETNS_LOCAL +on bonding interfaces, and commit 797f87f83b60 ("macvlan: fix netdev +feature propagation from lower device") causes macvlan interfaces +to inherit its features from the lower device. + +NETIF_F_NETNS_LOCAL should not be inherited from the lower device +by a macvlan. +Patch tested on 3.16. + +Signed-off-by: Francesco Ruggeri +Acked-by: Cong Wang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/macvlan.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/drivers/net/macvlan.c ++++ b/drivers/net/macvlan.c +@@ -800,6 +800,7 @@ static netdev_features_t macvlan_fix_fea + features, + mask); + features |= ALWAYS_ON_FEATURES; ++ features &= ~NETIF_F_NETNS_LOCAL; + + return features; + } diff --git a/queue-3.16/net-always-untag-vlan-tagged-traffic-on-input.patch b/queue-3.16/net-always-untag-vlan-tagged-traffic-on-input.patch new file mode 100644 index 00000000000..0aaeaeea2f3 --- /dev/null +++ b/queue-3.16/net-always-untag-vlan-tagged-traffic-on-input.patch @@ -0,0 +1,254 @@ +From foo@baz Sun Oct 12 20:11:55 CEST 2014 +From: Vlad Yasevich +Date: Fri, 8 Aug 2014 14:42:13 -0400 +Subject: net: Always untag vlan-tagged traffic on input. + +From: Vlad Yasevich + +[ Upstream commit 0d5501c1c828fb97d02af50aa9d2b1a5498b94e4 ] + +Currently the functionality to untag traffic on input resides +as part of the vlan module and is build only when VLAN support +is enabled in the kernel. When VLAN is disabled, the function +vlan_untag() turns into a stub and doesn't really untag the +packets. This seems to create an interesting interaction +between VMs supporting checksum offloading and some network drivers. + +There are some drivers that do not allow the user to change +tx-vlan-offload feature of the driver. These drivers also seem +to assume that any VLAN-tagged traffic they transmit will +have the vlan information in the vlan_tci and not in the vlan +header already in the skb. When transmitting skbs that already +have tagged data with partial checksum set, the checksum doesn't +appear to be updated correctly by the card thus resulting in a +failure to establish TCP connections. + +The following is a packet trace taken on the receiver where a +sender is a VM with a VLAN configued. The host VM is running on +doest not have VLAN support and the outging interface on the +host is tg3: +10:12:43.503055 52:54:00:ae:42:3f > 28:d2:44:7d:c2:de, ethertype 802.1Q +(0x8100), length 78: vlan 100, p 0, ethertype IPv4, (tos 0x0, ttl 64, id 27243, +offset 0, flags [DF], proto TCP (6), length 60) + 10.0.100.1.58545 > 10.0.100.10.ircu-2: Flags [S], cksum 0xdc39 (incorrect +-> 0x48d9), seq 1069378582, win 29200, options [mss 1460,sackOK,TS val +4294837885 ecr 0,nop,wscale 7], length 0 +10:12:44.505556 52:54:00:ae:42:3f > 28:d2:44:7d:c2:de, ethertype 802.1Q +(0x8100), length 78: vlan 100, p 0, ethertype IPv4, (tos 0x0, ttl 64, id 27244, +offset 0, flags [DF], proto TCP (6), length 60) + 10.0.100.1.58545 > 10.0.100.10.ircu-2: Flags [S], cksum 0xdc39 (incorrect +-> 0x44ee), seq 1069378582, win 29200, options [mss 1460,sackOK,TS val +4294838888 ecr 0,nop,wscale 7], length 0 + +This connection finally times out. + +I've only access to the TG3 hardware in this configuration thus have +only tested this with TG3 driver. There are a lot of other drivers +that do not permit user changes to vlan acceleration features, and +I don't know if they all suffere from a similar issue. + +The patch attempt to fix this another way. It moves the vlan header +stipping code out of the vlan module and always builds it into the +kernel network core. This way, even if vlan is not supported on +a virtualizatoin host, the virtual machines running on top of such +host will still work with VLANs enabled. + +CC: Patrick McHardy +CC: Nithin Nayak Sujir +CC: Michael Chan +CC: Jiri Pirko +Signed-off-by: Vladislav Yasevich +Acked-by: Jiri Pirko +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/if_vlan.h | 6 ----- + include/linux/skbuff.h | 1 + net/8021q/vlan_core.c | 53 ------------------------------------------------ + net/bridge/br_vlan.c | 2 - + net/core/dev.c | 2 - + net/core/skbuff.c | 53 ++++++++++++++++++++++++++++++++++++++++++++++++ + 6 files changed, 56 insertions(+), 61 deletions(-) + +--- a/include/linux/if_vlan.h ++++ b/include/linux/if_vlan.h +@@ -187,7 +187,6 @@ vlan_dev_get_egress_qos_mask(struct net_ + } + + extern bool vlan_do_receive(struct sk_buff **skb); +-extern struct sk_buff *vlan_untag(struct sk_buff *skb); + + extern int vlan_vid_add(struct net_device *dev, __be16 proto, u16 vid); + extern void vlan_vid_del(struct net_device *dev, __be16 proto, u16 vid); +@@ -241,11 +240,6 @@ static inline bool vlan_do_receive(struc + return false; + } + +-static inline struct sk_buff *vlan_untag(struct sk_buff *skb) +-{ +- return skb; +-} +- + static inline int vlan_vid_add(struct net_device *dev, __be16 proto, u16 vid) + { + return 0; +--- a/include/linux/skbuff.h ++++ b/include/linux/skbuff.h +@@ -2549,6 +2549,7 @@ int skb_shift(struct sk_buff *tgt, struc + void skb_scrub_packet(struct sk_buff *skb, bool xnet); + unsigned int skb_gso_transport_seglen(const struct sk_buff *skb); + struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features); ++struct sk_buff *skb_vlan_untag(struct sk_buff *skb); + + struct skb_checksum_ops { + __wsum (*update)(const void *mem, int len, __wsum wsum); +--- a/net/8021q/vlan_core.c ++++ b/net/8021q/vlan_core.c +@@ -112,59 +112,6 @@ __be16 vlan_dev_vlan_proto(const struct + } + EXPORT_SYMBOL(vlan_dev_vlan_proto); + +-static struct sk_buff *vlan_reorder_header(struct sk_buff *skb) +-{ +- if (skb_cow(skb, skb_headroom(skb)) < 0) { +- kfree_skb(skb); +- return NULL; +- } +- +- memmove(skb->data - ETH_HLEN, skb->data - VLAN_ETH_HLEN, 2 * ETH_ALEN); +- skb->mac_header += VLAN_HLEN; +- return skb; +-} +- +-struct sk_buff *vlan_untag(struct sk_buff *skb) +-{ +- struct vlan_hdr *vhdr; +- u16 vlan_tci; +- +- if (unlikely(vlan_tx_tag_present(skb))) { +- /* vlan_tci is already set-up so leave this for another time */ +- return skb; +- } +- +- skb = skb_share_check(skb, GFP_ATOMIC); +- if (unlikely(!skb)) +- goto err_free; +- +- if (unlikely(!pskb_may_pull(skb, VLAN_HLEN))) +- goto err_free; +- +- vhdr = (struct vlan_hdr *) skb->data; +- vlan_tci = ntohs(vhdr->h_vlan_TCI); +- __vlan_hwaccel_put_tag(skb, skb->protocol, vlan_tci); +- +- skb_pull_rcsum(skb, VLAN_HLEN); +- vlan_set_encap_proto(skb, vhdr); +- +- skb = vlan_reorder_header(skb); +- if (unlikely(!skb)) +- goto err_free; +- +- skb_reset_network_header(skb); +- skb_reset_transport_header(skb); +- skb_reset_mac_len(skb); +- +- return skb; +- +-err_free: +- kfree_skb(skb); +- return NULL; +-} +-EXPORT_SYMBOL(vlan_untag); +- +- + /* + * vlan info and vid list + */ +--- a/net/bridge/br_vlan.c ++++ b/net/bridge/br_vlan.c +@@ -183,7 +183,7 @@ bool br_allowed_ingress(struct net_bridg + */ + if (unlikely(!vlan_tx_tag_present(skb) && + skb->protocol == proto)) { +- skb = vlan_untag(skb); ++ skb = skb_vlan_untag(skb); + if (unlikely(!skb)) + return false; + } +--- a/net/core/dev.c ++++ b/net/core/dev.c +@@ -3588,7 +3588,7 @@ another_round: + + if (skb->protocol == cpu_to_be16(ETH_P_8021Q) || + skb->protocol == cpu_to_be16(ETH_P_8021AD)) { +- skb = vlan_untag(skb); ++ skb = skb_vlan_untag(skb); + if (unlikely(!skb)) + goto unlock; + } +--- a/net/core/skbuff.c ++++ b/net/core/skbuff.c +@@ -62,6 +62,7 @@ + #include + #include + #include ++#include + + #include + #include +@@ -3959,3 +3960,55 @@ unsigned int skb_gso_transport_seglen(co + return shinfo->gso_size; + } + EXPORT_SYMBOL_GPL(skb_gso_transport_seglen); ++ ++static struct sk_buff *skb_reorder_vlan_header(struct sk_buff *skb) ++{ ++ if (skb_cow(skb, skb_headroom(skb)) < 0) { ++ kfree_skb(skb); ++ return NULL; ++ } ++ ++ memmove(skb->data - ETH_HLEN, skb->data - VLAN_ETH_HLEN, 2 * ETH_ALEN); ++ skb->mac_header += VLAN_HLEN; ++ return skb; ++} ++ ++struct sk_buff *skb_vlan_untag(struct sk_buff *skb) ++{ ++ struct vlan_hdr *vhdr; ++ u16 vlan_tci; ++ ++ if (unlikely(vlan_tx_tag_present(skb))) { ++ /* vlan_tci is already set-up so leave this for another time */ ++ return skb; ++ } ++ ++ skb = skb_share_check(skb, GFP_ATOMIC); ++ if (unlikely(!skb)) ++ goto err_free; ++ ++ if (unlikely(!pskb_may_pull(skb, VLAN_HLEN))) ++ goto err_free; ++ ++ vhdr = (struct vlan_hdr *)skb->data; ++ vlan_tci = ntohs(vhdr->h_vlan_TCI); ++ __vlan_hwaccel_put_tag(skb, skb->protocol, vlan_tci); ++ ++ skb_pull_rcsum(skb, VLAN_HLEN); ++ vlan_set_encap_proto(skb, vhdr); ++ ++ skb = skb_reorder_vlan_header(skb); ++ if (unlikely(!skb)) ++ goto err_free; ++ ++ skb_reset_network_header(skb); ++ skb_reset_transport_header(skb); ++ skb_reset_mac_len(skb); ++ ++ return skb; ++ ++err_free: ++ kfree_skb(skb); ++ return NULL; ++} ++EXPORT_SYMBOL(skb_vlan_untag); diff --git a/queue-3.16/net-filter-fix-possible-use-after-free.patch b/queue-3.16/net-filter-fix-possible-use-after-free.patch new file mode 100644 index 00000000000..46c83051fb8 --- /dev/null +++ b/queue-3.16/net-filter-fix-possible-use-after-free.patch @@ -0,0 +1,46 @@ +From foo@baz Sun Oct 12 20:11:55 CEST 2014 +From: Eric Dumazet +Date: Thu, 11 Sep 2014 20:27:37 -0700 +Subject: net: filter: fix possible use after free + +From: Eric Dumazet + +[ No appicable upstream commit, this bug has been subsequently been + fixed as a side effect of other changes. ] + +If kmemdup() fails, we free fp->orig_prog and return -ENOMEM + +sk_attach_filter() + -> sk_filter_uncharge(sk, fp) + -> sk_filter_release(fp) + -> call_rcu(&fp->rcu, sk_filter_release_rcu) + -> sk_filter_release_rcu() + -> sk_release_orig_filter() + fprog = fp->orig_prog; // not NULL, but points to freed memory + kfree(fprog->filter); // use after free, potential corruption + kfree(fprog); // double free or corruption + +Note: This was fixed in 3.17+ with commit 278571baca2a +("net: filter: simplify socket charging") + +Found by AddressSanitizer + +Signed-off-by: Eric Dumazet +Fixes: a3ea269b8bcdb ("net: filter: keep original BPF program around") +Acked-by: Alexei Starovoitov +Acked-by: Daniel Borkmann +Signed-off-by: Greg Kroah-Hartman +--- + net/core/filter.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/net/core/filter.c ++++ b/net/core/filter.c +@@ -1318,6 +1318,7 @@ static int sk_store_orig_filter(struct s + fkprog->filter = kmemdup(fp->insns, fsize, GFP_KERNEL); + if (!fkprog->filter) { + kfree(fp->orig_prog); ++ fp->orig_prog = NULL; + return -ENOMEM; + } + diff --git a/queue-3.16/net-fix-checksum-features-handling-in-netif_skb_features.patch b/queue-3.16/net-fix-checksum-features-handling-in-netif_skb_features.patch new file mode 100644 index 00000000000..5e8d2646f6a --- /dev/null +++ b/queue-3.16/net-fix-checksum-features-handling-in-netif_skb_features.patch @@ -0,0 +1,54 @@ +From foo@baz Sun Oct 12 20:11:55 CEST 2014 +From: =?UTF-8?q?Michal=20Kube=C4=8Dek?= +Date: Mon, 25 Aug 2014 15:16:22 +0200 +Subject: net: fix checksum features handling in netif_skb_features() + +From: =?UTF-8?q?Michal=20Kube=C4=8Dek?= + +[ Upstream commit db115037bb57cdfe97078b13da762213f7980e81 ] + +This is follow-up to + + da08143b8520 ("vlan: more careful checksum features handling") + +which introduced more careful feature intersection in vlan code, +taking into account that HW_CSUM should be considered superset +of IP_CSUM/IPV6_CSUM. The same is needed in netif_skb_features() +in order to avoid offloading mismatch warning when vlan is +created on top of a bond consisting of slaves supporting IP/IPv6 +checksumming but not vlan Tx offloading. + +Signed-off-by: Michal Kubecek +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/dev.c | 16 +++++++++++----- + 1 file changed, 11 insertions(+), 5 deletions(-) + +--- a/net/core/dev.c ++++ b/net/core/dev.c +@@ -2576,13 +2576,19 @@ netdev_features_t netif_skb_features(str + return harmonize_features(skb, features); + } + +- features &= (skb->dev->vlan_features | NETIF_F_HW_VLAN_CTAG_TX | +- NETIF_F_HW_VLAN_STAG_TX); ++ features = netdev_intersect_features(features, ++ skb->dev->vlan_features | ++ NETIF_F_HW_VLAN_CTAG_TX | ++ NETIF_F_HW_VLAN_STAG_TX); + + if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD)) +- features &= NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST | +- NETIF_F_GEN_CSUM | NETIF_F_HW_VLAN_CTAG_TX | +- NETIF_F_HW_VLAN_STAG_TX; ++ features = netdev_intersect_features(features, ++ NETIF_F_SG | ++ NETIF_F_HIGHDMA | ++ NETIF_F_FRAGLIST | ++ NETIF_F_GEN_CSUM | ++ NETIF_F_HW_VLAN_CTAG_TX | ++ NETIF_F_HW_VLAN_STAG_TX); + + return harmonize_features(skb, features); + } diff --git a/queue-3.16/net-ipv6-fib-don-t-sleep-inside-atomic-lock.patch b/queue-3.16/net-ipv6-fib-don-t-sleep-inside-atomic-lock.patch new file mode 100644 index 00000000000..1e6ef83b68d --- /dev/null +++ b/queue-3.16/net-ipv6-fib-don-t-sleep-inside-atomic-lock.patch @@ -0,0 +1,82 @@ +From foo@baz Sun Oct 12 20:11:55 CEST 2014 +From: Benjamin Block +Date: Thu, 21 Aug 2014 19:37:48 +0200 +Subject: net: ipv6: fib: don't sleep inside atomic lock + +From: Benjamin Block + +[ Upstream commit 793c3b4000a1ef611ae7e5c89bd2a9c6b776cb5e ] + +The function fib6_commit_metrics() allocates a piece of memory in mode +GFP_KERNEL while holding an atomic lock from higher up in the stack, in +the function __ip6_ins_rt(). This produces the following BUG: + +> BUG: sleeping function called from invalid context at mm/slub.c:1250 +> in_atomic(): 1, irqs_disabled(): 0, pid: 2909, name: dhcpcd +> 2 locks held by dhcpcd/2909: +> #0: (rtnl_mutex){+.+.+.}, at: [] rtnl_lock+0x17/0x20 +> #1: (&tb->tb6_lock){++--+.}, at: [] ip6_route_add+0x65a/0x800 +> CPU: 1 PID: 2909 Comm: dhcpcd Not tainted 3.17.0-rc1 #1 +> Hardware name: ASUS All Series/Q87T, BIOS 0216 10/16/2013 +> 0000000000000008 ffff8800c8f13858 ffffffff81af135a 0000000000000000 +> ffff880212202430 ffff8800c8f13878 ffffffff810f8d3a ffff880212202c98 +> 0000000000000010 ffff8800c8f138c8 ffffffff8121ad0e 0000000000000001 +> Call Trace: +> [] dump_stack+0x4e/0x68 +> [] __might_sleep+0x10a/0x120 +> [] kmem_cache_alloc_trace+0x4e/0x190 +> [] ? fib6_commit_metrics+0x66/0x110 +> [] fib6_commit_metrics+0x66/0x110 +> [] fib6_add+0x883/0xa80 +> [] ? ip6_route_add+0x65a/0x800 +> [] ip6_route_add+0x675/0x800 +> [] ? ip6_route_add+0x6a/0x800 +> [] inet6_rtm_newroute+0x5c/0x80 +> [] rtnetlink_rcv_msg+0x211/0x260 +> [] ? rtnl_lock+0x17/0x20 +> [] ? lock_release_holdtime+0x28/0x180 +> [] ? rtnl_lock+0x17/0x20 +> [] ? __rtnl_unlock+0x20/0x20 +> [] netlink_rcv_skb+0x6e/0xd0 +> [] rtnetlink_rcv+0x25/0x40 +> [] netlink_unicast+0xd9/0x180 +> [] netlink_sendmsg+0x700/0x770 +> [] ? local_clock+0x25/0x30 +> [] sock_sendmsg+0x6c/0x90 +> [] ? might_fault+0xa3/0xb0 +> [] ? verify_iovec+0x7d/0xf0 +> [] ___sys_sendmsg+0x37e/0x3b0 +> [] ? trace_hardirqs_on_caller+0x185/0x220 +> [] ? mutex_unlock+0xe/0x10 +> [] ? netlink_insert+0xbc/0xe0 +> [] ? netlink_autobind.isra.30+0x125/0x150 +> [] ? netlink_autobind.isra.30+0x60/0x150 +> [] ? netlink_bind+0x159/0x230 +> [] ? might_fault+0x5a/0xb0 +> [] ? SYSC_bind+0x7e/0xd0 +> [] __sys_sendmsg+0x4d/0x80 +> [] SyS_sendmsg+0x12/0x20 +> [] system_call_fastpath+0x16/0x1b + +Fixing this by replacing the mode GFP_KERNEL with GFP_ATOMIC. + +Signed-off-by: Benjamin Block +Acked-by: David Rientjes +Acked-by: Hannes Frederic Sowa +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/ip6_fib.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/ipv6/ip6_fib.c ++++ b/net/ipv6/ip6_fib.c +@@ -643,7 +643,7 @@ static int fib6_commit_metrics(struct ds + if (dst->flags & DST_HOST) { + mp = dst_metrics_write_ptr(dst); + } else { +- mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL); ++ mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_ATOMIC); + if (!mp) + return -ENOMEM; + dst_init_metrics(dst, mp, 0); diff --git a/queue-3.16/net-mlx4-correctly-configure-single-ported-vfs-from-the-host.patch b/queue-3.16/net-mlx4-correctly-configure-single-ported-vfs-from-the-host.patch new file mode 100644 index 00000000000..8c6b2ce1296 --- /dev/null +++ b/queue-3.16/net-mlx4-correctly-configure-single-ported-vfs-from-the-host.patch @@ -0,0 +1,92 @@ +From foo@baz Sun Oct 12 20:11:55 CEST 2014 +From: Matan Barak +Date: Wed, 10 Sep 2014 16:41:53 +0300 +Subject: net/mlx4: Correctly configure single ported VFs from the host + +From: Matan Barak + +[ Upstream commit a91c772fa0275163508e1078ff6d474d423244fb ] + +Single port VFs are seen PCI wise on both ports of the PF (we don't have +single port PFs with ConnectX). With this in mind, it's possible for +virtualization tools to try and configure a single ported VF through +the "wrong" PF port. + +To handle that, we use the PF driver mapping of single port VFs to NIC +ports and adjust the port value before calling into the low level +code that does the actual VF configuration + +Fixes: 449fc48 ('net/mlx4: Adapt code for N-Port VF') +Signed-off-by: Matan Barak +Signed-off-by: Or Gerlitz +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx4/cmd.c | 21 +++++++++++++++++++++ + 1 file changed, 21 insertions(+) + +--- a/drivers/net/ethernet/mellanox/mlx4/cmd.c ++++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c +@@ -2380,6 +2380,22 @@ struct mlx4_slaves_pport mlx4_phys_to_sl + } + EXPORT_SYMBOL_GPL(mlx4_phys_to_slaves_pport_actv); + ++static int mlx4_slaves_closest_port(struct mlx4_dev *dev, int slave, int port) ++{ ++ struct mlx4_active_ports actv_ports = mlx4_get_active_ports(dev, slave); ++ int min_port = find_first_bit(actv_ports.ports, dev->caps.num_ports) ++ + 1; ++ int max_port = min_port + ++ bitmap_weight(actv_ports.ports, dev->caps.num_ports); ++ ++ if (port < min_port) ++ port = min_port; ++ else if (port >= max_port) ++ port = max_port - 1; ++ ++ return port; ++} ++ + int mlx4_set_vf_mac(struct mlx4_dev *dev, int port, int vf, u64 mac) + { + struct mlx4_priv *priv = mlx4_priv(dev); +@@ -2393,6 +2409,7 @@ int mlx4_set_vf_mac(struct mlx4_dev *dev + if (slave < 0) + return -EINVAL; + ++ port = mlx4_slaves_closest_port(dev, slave, port); + s_info = &priv->mfunc.master.vf_admin[slave].vport[port]; + s_info->mac = mac; + mlx4_info(dev, "default mac on vf %d port %d to %llX will take afect only after vf restart\n", +@@ -2419,6 +2436,7 @@ int mlx4_set_vf_vlan(struct mlx4_dev *de + if (slave < 0) + return -EINVAL; + ++ port = mlx4_slaves_closest_port(dev, slave, port); + vf_admin = &priv->mfunc.master.vf_admin[slave].vport[port]; + + if ((0 == vlan) && (0 == qos)) +@@ -2446,6 +2464,7 @@ bool mlx4_get_slave_default_vlan(struct + struct mlx4_priv *priv; + + priv = mlx4_priv(dev); ++ port = mlx4_slaves_closest_port(dev, slave, port); + vp_oper = &priv->mfunc.master.vf_oper[slave].vport[port]; + + if (MLX4_VGT != vp_oper->state.default_vlan) { +@@ -2473,6 +2492,7 @@ int mlx4_set_vf_spoofchk(struct mlx4_dev + if (slave < 0) + return -EINVAL; + ++ port = mlx4_slaves_closest_port(dev, slave, port); + s_info = &priv->mfunc.master.vf_admin[slave].vport[port]; + s_info->spoofchk = setting; + +@@ -2526,6 +2546,7 @@ int mlx4_set_vf_link_state(struct mlx4_d + if (slave < 0) + return -EINVAL; + ++ port = mlx4_slaves_closest_port(dev, slave, port); + switch (link_state) { + case IFLA_VF_LINK_STATE_AUTO: + /* get current link state */ diff --git a/queue-3.16/net-mlx4_core-allow-not-to-specify-probe_vf-in-sriov-ib-mode.patch b/queue-3.16/net-mlx4_core-allow-not-to-specify-probe_vf-in-sriov-ib-mode.patch new file mode 100644 index 00000000000..a0dfff1074f --- /dev/null +++ b/queue-3.16/net-mlx4_core-allow-not-to-specify-probe_vf-in-sriov-ib-mode.patch @@ -0,0 +1,48 @@ +From foo@baz Sun Oct 12 20:11:55 CEST 2014 +From: Matan Barak +Date: Tue, 23 Sep 2014 16:05:59 +0300 +Subject: net/mlx4_core: Allow not to specify probe_vf in SRIOV IB mode + +From: Matan Barak + +[ Upstream commit effa4bc4e75a265105f4ccb55857057e5ad231ed ] + +When the HCA is configured in SRIOV IB mode (that is, at least one of +the ports is IB) and the probe_vf module param isn't specified, +mlx4_init_one() failed because of the following condition: + +if (ib_ports && (num_vfs_argc > 1 || probe_vfs_argc > 1)) { + ..... +} + +The root cause for that is a mistake in the initialization of num_vfs_argc +and probe_vfs_argc. When num_vfs / probe_vf aren't given, their argument +count counterpart should be 0, fix that. + +Fixes: dd41cc3bb90e ('net/mlx4: Adapt num_vfs/probed_vf params for single port VF') +Signed-off-by: Matan Barak +Signed-off-by: Or Gerlitz +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx4/main.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/drivers/net/ethernet/mellanox/mlx4/main.c ++++ b/drivers/net/ethernet/mellanox/mlx4/main.c +@@ -78,13 +78,13 @@ MODULE_PARM_DESC(msi_x, "attempt to use + #endif /* CONFIG_PCI_MSI */ + + static uint8_t num_vfs[3] = {0, 0, 0}; +-static int num_vfs_argc = 3; ++static int num_vfs_argc; + module_param_array(num_vfs, byte , &num_vfs_argc, 0444); + MODULE_PARM_DESC(num_vfs, "enable #num_vfs functions if num_vfs > 0\n" + "num_vfs=port1,port2,port1+2"); + + static uint8_t probe_vf[3] = {0, 0, 0}; +-static int probe_vfs_argc = 3; ++static int probe_vfs_argc; + module_param_array(probe_vf, byte, &probe_vfs_argc, 0444); + MODULE_PARM_DESC(probe_vf, "number of vfs to probe by pf driver (num_vfs > 0)\n" + "probe_vf=port1,port2,port1+2"); diff --git a/queue-3.16/net-phy-smsc-move-smsc_phy_config_init-reset-part-in-a-soft_reset-function.patch b/queue-3.16/net-phy-smsc-move-smsc_phy_config_init-reset-part-in-a-soft_reset-function.patch new file mode 100644 index 00000000000..68188c3e3f0 --- /dev/null +++ b/queue-3.16/net-phy-smsc-move-smsc_phy_config_init-reset-part-in-a-soft_reset-function.patch @@ -0,0 +1,103 @@ +From foo@baz Sun Oct 12 20:11:55 CEST 2014 +From: Gwenhael Goavec-Merou +Date: Fri, 15 Aug 2014 15:00:38 +0200 +Subject: net: phy: smsc: move smsc_phy_config_init reset part in a soft_reset function + +From: Gwenhael Goavec-Merou + +[ Upstream commit 21009686662fd21412ca35def7cb3cc8346e1c3d ] + +On the one hand, phy_device.c provides a generic reset function if the phy +driver does not provide a soft_reset pointer. This generic reset does not take +into account the state of the phy, with a potential failure if the phy is in +powerdown mode. On the other hand, smsc driver provides a function with both +correct reset behaviour and configuration. + +This patch moves the reset part into a new smsc_phy_reset function and provides +the soft_reset pointer to have a correct reset behaviour by default. + +Signed-off-by: Gwenhael Goavec-Merou +Reviewed-by: Florian Fainelli +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/phy/smsc.c | 33 +++++++++++++++++++++------------ + 1 file changed, 21 insertions(+), 12 deletions(-) + +--- a/drivers/net/phy/smsc.c ++++ b/drivers/net/phy/smsc.c +@@ -43,6 +43,22 @@ static int smsc_phy_ack_interrupt(struct + + static int smsc_phy_config_init(struct phy_device *phydev) + { ++ int rc = phy_read(phydev, MII_LAN83C185_CTRL_STATUS); ++ ++ if (rc < 0) ++ return rc; ++ ++ /* Enable energy detect mode for this SMSC Transceivers */ ++ rc = phy_write(phydev, MII_LAN83C185_CTRL_STATUS, ++ rc | MII_LAN83C185_EDPWRDOWN); ++ if (rc < 0) ++ return rc; ++ ++ return smsc_phy_ack_interrupt(phydev); ++} ++ ++static int smsc_phy_reset(struct phy_device *phydev) ++{ + int rc = phy_read(phydev, MII_LAN83C185_SPECIAL_MODES); + if (rc < 0) + return rc; +@@ -66,18 +82,7 @@ static int smsc_phy_config_init(struct p + rc = phy_read(phydev, MII_BMCR); + } while (rc & BMCR_RESET); + } +- +- rc = phy_read(phydev, MII_LAN83C185_CTRL_STATUS); +- if (rc < 0) +- return rc; +- +- /* Enable energy detect mode for this SMSC Transceivers */ +- rc = phy_write(phydev, MII_LAN83C185_CTRL_STATUS, +- rc | MII_LAN83C185_EDPWRDOWN); +- if (rc < 0) +- return rc; +- +- return smsc_phy_ack_interrupt (phydev); ++ return 0; + } + + static int lan911x_config_init(struct phy_device *phydev) +@@ -142,6 +147,7 @@ static struct phy_driver smsc_phy_driver + .config_aneg = genphy_config_aneg, + .read_status = genphy_read_status, + .config_init = smsc_phy_config_init, ++ .soft_reset = smsc_phy_reset, + + /* IRQ related */ + .ack_interrupt = smsc_phy_ack_interrupt, +@@ -164,6 +170,7 @@ static struct phy_driver smsc_phy_driver + .config_aneg = genphy_config_aneg, + .read_status = genphy_read_status, + .config_init = smsc_phy_config_init, ++ .soft_reset = smsc_phy_reset, + + /* IRQ related */ + .ack_interrupt = smsc_phy_ack_interrupt, +@@ -186,6 +193,7 @@ static struct phy_driver smsc_phy_driver + .config_aneg = genphy_config_aneg, + .read_status = genphy_read_status, + .config_init = smsc_phy_config_init, ++ .soft_reset = smsc_phy_reset, + + /* IRQ related */ + .ack_interrupt = smsc_phy_ack_interrupt, +@@ -230,6 +238,7 @@ static struct phy_driver smsc_phy_driver + .config_aneg = genphy_config_aneg, + .read_status = lan87xx_read_status, + .config_init = smsc_phy_config_init, ++ .soft_reset = smsc_phy_reset, + + /* IRQ related */ + .ack_interrupt = smsc_phy_ack_interrupt, diff --git a/queue-3.16/net-systemport-fix-bcm_sysport_insert_tsb.patch b/queue-3.16/net-systemport-fix-bcm_sysport_insert_tsb.patch new file mode 100644 index 00000000000..a888b18ddee --- /dev/null +++ b/queue-3.16/net-systemport-fix-bcm_sysport_insert_tsb.patch @@ -0,0 +1,71 @@ +From foo@baz Sun Oct 12 20:11:55 CEST 2014 +From: Florian Fainelli +Date: Thu, 2 Oct 2014 09:43:16 -0700 +Subject: net: systemport: fix bcm_sysport_insert_tsb() + +From: Florian Fainelli + +[ Upstream commit e87474a6e697857df21cff0707a2472abceca8b3 ] + +Similar to commit bc23333ba11fb7f959b7e87e121122f5a0fbbca8 ("net: +bcmgenet: fix bcmgenet_put_tx_csum()"), we need to return the skb +pointer in case we had to reallocate the SKB headroom. + +Fixes: 80105befdb4b8 ("net: systemport: add Broadcom SYSTEMPORT Ethernet MAC driver") +Signed-off-by: Florian Fainelli +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/broadcom/bcmsysport.c | 13 +++++++------ + 1 file changed, 7 insertions(+), 6 deletions(-) + +--- a/drivers/net/ethernet/broadcom/bcmsysport.c ++++ b/drivers/net/ethernet/broadcom/bcmsysport.c +@@ -757,7 +757,8 @@ static irqreturn_t bcm_sysport_tx_isr(in + return IRQ_HANDLED; + } + +-static int bcm_sysport_insert_tsb(struct sk_buff *skb, struct net_device *dev) ++static struct sk_buff *bcm_sysport_insert_tsb(struct sk_buff *skb, ++ struct net_device *dev) + { + struct sk_buff *nskb; + struct bcm_tsb *tsb; +@@ -773,7 +774,7 @@ static int bcm_sysport_insert_tsb(struct + if (!nskb) { + dev->stats.tx_errors++; + dev->stats.tx_dropped++; +- return -ENOMEM; ++ return NULL; + } + skb = nskb; + } +@@ -792,7 +793,7 @@ static int bcm_sysport_insert_tsb(struct + ip_proto = ipv6_hdr(skb)->nexthdr; + break; + default: +- return 0; ++ return skb; + } + + /* Get the checksum offset and the L4 (transport) offset */ +@@ -810,7 +811,7 @@ static int bcm_sysport_insert_tsb(struct + tsb->l4_ptr_dest_map = csum_info; + } + +- return 0; ++ return skb; + } + + static netdev_tx_t bcm_sysport_xmit(struct sk_buff *skb, +@@ -844,8 +845,8 @@ static netdev_tx_t bcm_sysport_xmit(stru + + /* Insert TSB and checksum infos */ + if (priv->tsb_en) { +- ret = bcm_sysport_insert_tsb(skb, dev); +- if (ret) { ++ skb = bcm_sysport_insert_tsb(skb, dev); ++ if (!skb) { + ret = NETDEV_TX_OK; + goto out; + } diff --git a/queue-3.16/net_sched-copy-exts-type-in-tcf_exts_change.patch b/queue-3.16/net_sched-copy-exts-type-in-tcf_exts_change.patch new file mode 100644 index 00000000000..60488e39505 --- /dev/null +++ b/queue-3.16/net_sched-copy-exts-type-in-tcf_exts_change.patch @@ -0,0 +1,35 @@ +From foo@baz Sun Oct 12 20:11:55 CEST 2014 +From: WANG Cong +Date: Mon, 6 Oct 2014 17:21:54 -0700 +Subject: net_sched: copy exts->type in tcf_exts_change() + +From: WANG Cong + +[ Upstream commit 5301e3e117d88ef0967ce278912e54757f1a31a2 ] + +We need to copy exts->type when committing the change, otherwise +it would be always 0. This is a quick fix for -net and -stable, +for net-next tcf_exts will be removed. + +Fixes: commit 33be627159913b094bb578e83 ("net_sched: act: use standard struct list_head") +Reported-by: Jamal Hadi Salim +Cc: Jamal Hadi Salim +Cc: John Fastabend +Signed-off-by: Cong Wang +Acked-by: Jamal Hadi Salim +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sched/cls_api.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/net/sched/cls_api.c ++++ b/net/sched/cls_api.c +@@ -549,6 +549,7 @@ void tcf_exts_change(struct tcf_proto *t + tcf_tree_lock(tp); + list_splice_init(&dst->actions, &tmp); + list_splice(&src->actions, &dst->actions); ++ dst->type = src->type; + tcf_tree_unlock(tp); + tcf_action_destroy(&tmp, TCA_ACT_UNBIND); + #endif diff --git a/queue-3.16/netlink-reset-network-header-before-passing-to-taps.patch b/queue-3.16/netlink-reset-network-header-before-passing-to-taps.patch new file mode 100644 index 00000000000..37032990673 --- /dev/null +++ b/queue-3.16/netlink-reset-network-header-before-passing-to-taps.patch @@ -0,0 +1,41 @@ +From foo@baz Sun Oct 12 20:11:55 CEST 2014 +From: Daniel Borkmann +Date: Thu, 7 Aug 2014 22:22:47 +0200 +Subject: netlink: reset network header before passing to taps + +From: Daniel Borkmann + +[ Upstream commit 4e48ed883c72e78c5a910f8831ffe90c9b18f0ec ] + +netlink doesn't set any network header offset thus when the skb is +being passed to tap devices via dev_queue_xmit_nit(), it emits klog +false positives due to it being unset like: + + ... + [ 124.990397] protocol 0000 is buggy, dev nlmon0 + [ 124.990411] protocol 0000 is buggy, dev nlmon0 + ... + +So just reset the network header before passing to the device; for +packet sockets that just means nothing will change - mac and net +offset hold the same value just as before. + +Reported-by: Marcel Holtmann +Signed-off-by: Daniel Borkmann +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/netlink/af_netlink.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/netlink/af_netlink.c ++++ b/net/netlink/af_netlink.c +@@ -205,7 +205,7 @@ static int __netlink_deliver_tap_skb(str + nskb->protocol = htons((u16) sk->sk_protocol); + nskb->pkt_type = netlink_is_kernel(sk) ? + PACKET_KERNEL : PACKET_USER; +- ++ skb_reset_network_header(nskb); + ret = dev_queue_xmit(nskb); + if (unlikely(ret > 0)) + ret = net_xmit_errno(ret); diff --git a/queue-3.16/openvswitch-fix-panic-with-multiple-vlan-headers.patch b/queue-3.16/openvswitch-fix-panic-with-multiple-vlan-headers.patch new file mode 100644 index 00000000000..e99eff86055 --- /dev/null +++ b/queue-3.16/openvswitch-fix-panic-with-multiple-vlan-headers.patch @@ -0,0 +1,55 @@ +From foo@baz Sun Oct 12 20:11:55 CEST 2014 +From: Jiri Benc +Date: Thu, 21 Aug 2014 21:33:44 +0200 +Subject: openvswitch: fix panic with multiple vlan headers + +From: Jiri Benc + +[ Upstream commit 2ba5af42a7b59ef01f9081234d8855140738defd ] + +When there are multiple vlan headers present in a received frame, the first +one is put into vlan_tci and protocol is set to ETH_P_8021Q. Anything in the +skb beyond the VLAN TPID may be still non-linear, including the inner TCI +and ethertype. While ovs_flow_extract takes care of IP and IPv6 headers, it +does nothing with ETH_P_8021Q. Later, if OVS_ACTION_ATTR_POP_VLAN is +executed, __pop_vlan_tci pulls the next vlan header into vlan_tci. + +This leads to two things: + +1. Part of the resulting ethernet header is in the non-linear part of the + skb. When eth_type_trans is called later as the result of + OVS_ACTION_ATTR_OUTPUT, kernel BUGs in __skb_pull. Also, __pop_vlan_tci + is in fact accessing random data when it reads past the TPID. + +2. network_header points into the ethernet header instead of behind it. + mac_len is set to a wrong value (10), too. + +Reported-by: Yulong Pei +Signed-off-by: Jiri Benc +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/openvswitch/actions.c | 5 +++++ + 1 file changed, 5 insertions(+) + +--- a/net/openvswitch/actions.c ++++ b/net/openvswitch/actions.c +@@ -42,6 +42,9 @@ static int do_execute_actions(struct dat + + static int make_writable(struct sk_buff *skb, int write_len) + { ++ if (!pskb_may_pull(skb, write_len)) ++ return -ENOMEM; ++ + if (!skb_cloned(skb) || skb_clone_writable(skb, write_len)) + return 0; + +@@ -70,6 +73,8 @@ static int __pop_vlan_tci(struct sk_buff + + vlan_set_encap_proto(skb, vhdr); + skb->mac_header += VLAN_HLEN; ++ if (skb_network_offset(skb) < ETH_HLEN) ++ skb_set_network_header(skb, ETH_HLEN); + skb_reset_mac_len(skb); + + return 0; diff --git a/queue-3.16/packet-handle-too-big-packets-for-packet_v3.patch b/queue-3.16/packet-handle-too-big-packets-for-packet_v3.patch new file mode 100644 index 00000000000..3aeb8bcb17a --- /dev/null +++ b/queue-3.16/packet-handle-too-big-packets-for-packet_v3.patch @@ -0,0 +1,83 @@ +From foo@baz Sun Oct 12 20:11:55 CEST 2014 +From: Eric Dumazet +Date: Fri, 15 Aug 2014 09:16:04 -0700 +Subject: packet: handle too big packets for PACKET_V3 + +From: Eric Dumazet + +[ Upstream commit dc808110bb62b64a448696ecac3938902c92e1ab ] + +af_packet can currently overwrite kernel memory by out of bound +accesses, because it assumed a [new] block can always hold one frame. + +This is not generally the case, even if most existing tools do it right. + +This patch clamps too long frames as API permits, and issue a one time +error on syslog. + +[ 394.357639] tpacket_rcv: packet too big, clamped from 5042 to 3966. macoff=82 + +In this example, packet header tp_snaplen was set to 3966, +and tp_len was set to 5042 (skb->len) + +Signed-off-by: Eric Dumazet +Fixes: f6fb8f100b80 ("af-packet: TPACKET_V3 flexible buffer implementation.") +Acked-by: Daniel Borkmann +Acked-by: Neil Horman +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/packet/af_packet.c | 17 +++++++++++++++++ + net/packet/internal.h | 1 + + 2 files changed, 18 insertions(+) + +--- a/net/packet/af_packet.c ++++ b/net/packet/af_packet.c +@@ -636,6 +636,7 @@ static void init_prb_bdqc(struct packet_ + p1->tov_in_jiffies = msecs_to_jiffies(p1->retire_blk_tov); + p1->blk_sizeof_priv = req_u->req3.tp_sizeof_priv; + ++ p1->max_frame_len = p1->kblk_size - BLK_PLUS_PRIV(p1->blk_sizeof_priv); + prb_init_ft_ops(p1, req_u); + prb_setup_retire_blk_timer(po, tx_ring); + prb_open_block(p1, pbd); +@@ -1946,6 +1947,18 @@ static int tpacket_rcv(struct sk_buff *s + if ((int)snaplen < 0) + snaplen = 0; + } ++ } else if (unlikely(macoff + snaplen > ++ GET_PBDQC_FROM_RB(&po->rx_ring)->max_frame_len)) { ++ u32 nval; ++ ++ nval = GET_PBDQC_FROM_RB(&po->rx_ring)->max_frame_len - macoff; ++ pr_err_once("tpacket_rcv: packet too big, clamped from %u to %u. macoff=%u\n", ++ snaplen, nval, macoff); ++ snaplen = nval; ++ if (unlikely((int)snaplen < 0)) { ++ snaplen = 0; ++ macoff = GET_PBDQC_FROM_RB(&po->rx_ring)->max_frame_len; ++ } + } + spin_lock(&sk->sk_receive_queue.lock); + h.raw = packet_current_rx_frame(po, skb, +@@ -3789,6 +3802,10 @@ static int packet_set_ring(struct sock * + goto out; + if (unlikely(req->tp_block_size & (PAGE_SIZE - 1))) + goto out; ++ if (po->tp_version >= TPACKET_V3 && ++ (int)(req->tp_block_size - ++ BLK_PLUS_PRIV(req_u->req3.tp_sizeof_priv)) <= 0) ++ goto out; + if (unlikely(req->tp_frame_size < po->tp_hdrlen + + po->tp_reserve)) + goto out; +--- a/net/packet/internal.h ++++ b/net/packet/internal.h +@@ -29,6 +29,7 @@ struct tpacket_kbdq_core { + char *pkblk_start; + char *pkblk_end; + int kblk_size; ++ unsigned int max_frame_len; + unsigned int knum_blocks; + uint64_t knxt_seq_num; + char *prev; diff --git a/queue-3.16/revert-macvlan-simplify-the-structure-port.patch b/queue-3.16/revert-macvlan-simplify-the-structure-port.patch new file mode 100644 index 00000000000..a5117343bed --- /dev/null +++ b/queue-3.16/revert-macvlan-simplify-the-structure-port.patch @@ -0,0 +1,136 @@ +From foo@baz Sun Oct 12 20:11:55 CEST 2014 +From: "David S. Miller" +Date: Thu, 14 Aug 2014 14:32:49 -0700 +Subject: Revert "macvlan: simplify the structure port" + +From: "David S. Miller" + +[ Upstream commit 5e3c516b512c0f8f18359413b04918f6347f67e7 ] + +This reverts commit a188a54d11629bef2169052297e61f3767ca8ce5. + +It causes crashes + +==================== +[ 80.643286] BUG: unable to handle kernel NULL pointer dereference at 0000000000000878 +[ 80.670103] IP: [] try_to_grab_pending+0x64/0x1f0 +[ 80.691289] PGD 22c102067 PUD 235bf0067 PMD 0 +[ 80.706611] Oops: 0002 [#1] SMP +[ 80.717836] Modules linked in: macvlan nfsd lockd nfs_acl exportfs auth_rpcgss sunrpc oid_registry ioatdma ixgbe(-) mdio igb dca +[ 80.757935] CPU: 37 PID: 6724 Comm: rmmod Not tainted 3.16.0-net-next-08-12-2014-FCoE+ #1 +[ 80.785688] Hardware name: Intel Corporation S2600CO/S2600CO, BIOS SE5C600.86B.02.03.0003.041920141333 04/19/2014 +[ 80.820310] task: ffff880235a9eae0 ti: ffff88022e844000 task.ti: ffff88022e844000 +[ 80.845770] RIP: 0010:[] [] try_to_grab_pending+0x64/0x1f0 +[ 80.875326] RSP: 0018:ffff88022e847b28 EFLAGS: 00010046 +[ 80.893251] RAX: 0000000000037a6a RBX: 0000000000000878 RCX: 0000000000000000 +[ 80.917187] RDX: ffff880235a9eae0 RSI: 0000000000000001 RDI: ffffffff810832db +[ 80.941125] RBP: ffff88022e847b58 R08: 0000000000000000 R09: 0000000000000000 +[ 80.965056] R10: 0000000000000001 R11: 0000000000000001 R12: ffff88022e847b70 +[ 80.988994] R13: 0000000000000000 R14: ffff88022e847be8 R15: ffffffff81ebe440 +[ 81.012929] FS: 00007fab90b07700(0000) GS:ffff88043f7a0000(0000) knlGS:0000000000000000 +[ 81.040400] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +[ 81.059757] CR2: 0000000000000878 CR3: 0000000235a42000 CR4: 00000000001407e0 +[ 81.083689] Stack: +[ 81.090739] ffff880235a9eae0 0000000000000878 ffff88022e847b70 0000000000000000 +[ 81.116253] ffff88022e847be8 ffffffff81ebe440 ffff88022e847b98 ffffffff810847f1 +[ 81.141766] ffff88022e847b78 0000000000000286 ffff880234200000 0000000000000000 +[ 81.167282] Call Trace: +[ 81.175768] [] __cancel_work_timer+0x31/0x170 +[ 81.195985] [] cancel_work_sync+0xb/0x10 +[ 81.214769] [] macvlan_port_destroy+0x28/0x60 [macvlan] +[ 81.237844] [] macvlan_uninit+0x40/0x50 [macvlan] +[ 81.259209] [] rollback_registered_many+0x1a2/0x2c0 +[ 81.281140] [] unregister_netdevice_many+0x1a/0xb0 +[ 81.302786] [] macvlan_device_event+0x1ef/0x240 [macvlan] +[ 81.326439] [] notifier_call_chain+0x4d/0x70 +[ 81.346366] [] raw_notifier_call_chain+0x11/0x20 +[ 81.367439] [] call_netdevice_notifiers_info+0x3b/0x70 +[ 81.390228] [] call_netdevice_notifiers+0x11/0x20 +[ 81.411587] [] rollback_registered_many+0x17d/0x2c0 +[ 81.433518] [] unregister_netdevice_queue+0x75/0x110 +[ 81.455735] [] unregister_netdev+0x1b/0x30 +[ 81.475094] [] ixgbe_remove+0x170/0x1d0 [ixgbe] +[ 81.495886] [] pci_device_remove+0x32/0x60 +[ 81.515246] [] __device_release_driver+0x64/0xd0 +[ 81.536321] [] driver_detach+0xc8/0xd0 +[ 81.554530] [] bus_remove_driver+0x4e/0xa0 +[ 81.573888] [] driver_unregister+0x2b/0x60 +[ 81.593246] [] pci_unregister_driver+0x1e/0xa0 +[ 81.613749] [] ixgbe_exit_module+0x1c/0x2e [ixgbe] +[ 81.635401] [] SyS_delete_module+0x15b/0x1e0 +[ 81.655334] [] ? sysret_check+0x22/0x5d +[ 81.673833] [] ? trace_hardirqs_on_caller+0x11d/0x1e0 +[ 81.696339] [] ? trace_hardirqs_on_thunk+0x3a/0x3f +[ 81.717985] [] system_call_fastpath+0x16/0x1b +[ 81.738199] Code: 00 48 83 3d 6e bb da 00 00 48 89 c2 0f 84 67 01 00 00 fa 66 0f 1f 44 00 00 49 89 14 24 e8 b5 4b 02 00 45 84 ed 0f 85 ac 00 00 00 0f ba 2b 00 72 1d 31 c0 48 8b 5d d8 4c 8b 65 e0 4c 8b 6d e8 +[ 81.807026] RIP [] try_to_grab_pending+0x64/0x1f0 +[ 81.828468] RSP +[ 81.840384] CR2: 0000000000000878 +[ 81.851731] ---[ end trace 9f6c7232e3464e11 ]--- +==================== + +This bug could be triggered by these steps: + +modprobe ixgbe ; modprobe macvlan +ip link add link p96p1 address 00:1B:21:6E:06:00 macvlan0 type macvlan +ip link add link p96p1 address 00:1B:21:6E:06:01 macvlan1 type macvlan +ip link add link p96p1 address 00:1B:21:6E:06:02 macvlan2 type macvlan +ip link add link p96p1 address 00:1B:21:6E:06:03 macvlan3 type macvlan +rmmod ixgbe + +Reported-by: "Keller, Jacob E" +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/macvlan.c | 12 +++++++----- + 1 file changed, 7 insertions(+), 5 deletions(-) + +--- a/drivers/net/macvlan.c ++++ b/drivers/net/macvlan.c +@@ -45,10 +45,9 @@ struct macvlan_port { + struct sk_buff_head bc_queue; + struct work_struct bc_work; + bool passthru; ++ int count; + }; + +-#define MACVLAN_PORT_IS_EMPTY(port) list_empty(&port->vlans) +- + struct macvlan_skb_cb { + const struct macvlan_dev *src; + }; +@@ -667,7 +666,8 @@ static void macvlan_uninit(struct net_de + + free_percpu(vlan->pcpu_stats); + +- if (MACVLAN_PORT_IS_EMPTY(port)) ++ port->count -= 1; ++ if (!port->count) + macvlan_port_destroy(port->dev); + } + +@@ -1020,12 +1020,13 @@ int macvlan_common_newlink(struct net *s + vlan->flags = nla_get_u16(data[IFLA_MACVLAN_FLAGS]); + + if (vlan->mode == MACVLAN_MODE_PASSTHRU) { +- if (!MACVLAN_PORT_IS_EMPTY(port)) ++ if (port->count) + return -EINVAL; + port->passthru = true; + eth_hw_addr_inherit(dev, lowerdev); + } + ++ port->count += 1; + err = register_netdevice(dev); + if (err < 0) + goto destroy_port; +@@ -1043,7 +1044,8 @@ int macvlan_common_newlink(struct net *s + unregister_netdev: + unregister_netdevice(dev); + destroy_port: +- if (MACVLAN_PORT_IS_EMPTY(port)) ++ port->count -= 1; ++ if (!port->count) + macvlan_port_destroy(lowerdev); + + return err; diff --git a/queue-3.16/revert-net-macb-add-pinctrl-consumer-support.patch b/queue-3.16/revert-net-macb-add-pinctrl-consumer-support.patch new file mode 100644 index 00000000000..f2cd162b980 --- /dev/null +++ b/queue-3.16/revert-net-macb-add-pinctrl-consumer-support.patch @@ -0,0 +1,55 @@ +From foo@baz Sun Oct 12 20:11:55 CEST 2014 +From: Soren Brinkmann +Date: Mon, 22 Sep 2014 16:49:08 -0700 +Subject: Revert "net/macb: add pinctrl consumer support" + +From: Soren Brinkmann + +[ Upstream commit 9026968abe7ad102f4ac5c6d96d733643f75399c ] + +This reverts commit 8ef29f8aae524bd51298fb10ac6a5ce6c4c5a3d8. +The driver core already calls pinctrl_get() and claims the default +state. There is no need to replicate this in the driver. +Acked-by: Nicolas Ferre + +Acked-by: Nicolas Ferre +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/cadence/macb.c | 11 ----------- + 1 file changed, 11 deletions(-) + +--- a/drivers/net/ethernet/cadence/macb.c ++++ b/drivers/net/ethernet/cadence/macb.c +@@ -30,7 +30,6 @@ + #include + #include + #include +-#include + + #include "macb.h" + +@@ -1803,7 +1802,6 @@ static int __init macb_probe(struct plat + struct phy_device *phydev; + u32 config; + int err = -ENXIO; +- struct pinctrl *pinctrl; + const char *mac; + + regs = platform_get_resource(pdev, IORESOURCE_MEM, 0); +@@ -1812,15 +1810,6 @@ static int __init macb_probe(struct plat + goto err_out; + } + +- pinctrl = devm_pinctrl_get_select_default(&pdev->dev); +- if (IS_ERR(pinctrl)) { +- err = PTR_ERR(pinctrl); +- if (err == -EPROBE_DEFER) +- goto err_out; +- +- dev_warn(&pdev->dev, "No pinctrl provided\n"); +- } +- + err = -ENOMEM; + dev = alloc_etherdev(sizeof(*bp)); + if (!dev) diff --git a/queue-3.16/rtnetlink-fix-vf-info-size.patch b/queue-3.16/rtnetlink-fix-vf-info-size.patch new file mode 100644 index 00000000000..f3602ba8b88 --- /dev/null +++ b/queue-3.16/rtnetlink-fix-vf-info-size.patch @@ -0,0 +1,36 @@ +From foo@baz Sun Oct 12 20:11:55 CEST 2014 +From: Jiri Benc +Date: Fri, 8 Aug 2014 16:44:32 +0200 +Subject: rtnetlink: fix VF info size + +From: Jiri Benc + +[ Upstream commit 945a36761fd7877660f630bbdeb4ff9ff80d1935 ] + +Commit 1d8faf48c74b8 ("net/core: Add VF link state control") added new +attribute to IFLA_VF_INFO group in rtnl_fill_ifinfo but did not adjust size +of the allocated memory in if_nlmsg_size/rtnl_vfinfo_size. As the result, we +may trigger warnings in rtnl_getlink and similar functions when many VF +links are enabled, as the information does not fit into the allocated skb. + +Fixes: 1d8faf48c74b8 ("net/core: Add VF link state control") +Reported-by: Yulong Pei +Signed-off-by: Jiri Benc +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/rtnetlink.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/net/core/rtnetlink.c ++++ b/net/core/rtnetlink.c +@@ -799,7 +799,8 @@ static inline int rtnl_vfinfo_size(const + (nla_total_size(sizeof(struct ifla_vf_mac)) + + nla_total_size(sizeof(struct ifla_vf_vlan)) + + nla_total_size(sizeof(struct ifla_vf_spoofchk)) + +- nla_total_size(sizeof(struct ifla_vf_rate))); ++ nla_total_size(sizeof(struct ifla_vf_rate)) + ++ nla_total_size(sizeof(struct ifla_vf_link_state))); + return size; + } else + return 0; diff --git a/queue-3.16/sctp-handle-association-restarts-when-the-socket-is-closed.patch b/queue-3.16/sctp-handle-association-restarts-when-the-socket-is-closed.patch new file mode 100644 index 00000000000..a97b4420a42 --- /dev/null +++ b/queue-3.16/sctp-handle-association-restarts-when-the-socket-is-closed.patch @@ -0,0 +1,79 @@ +From foo@baz Sun Oct 12 20:11:55 CEST 2014 +From: Vlad Yasevich +Date: Fri, 3 Oct 2014 18:16:20 -0400 +Subject: sctp: handle association restarts when the socket is closed. + +From: Vlad Yasevich + +[ Upstream commit bdf6fa52f01b941d4a80372d56de465bdbbd1d23 ] + +Currently association restarts do not take into consideration the +state of the socket. When a restart happens, the current assocation +simply transitions into established state. This creates a condition +where a remote system, through a the restart procedure, may create a +local association that is no way reachable by user. The conditions +to trigger this are as follows: + 1) Remote does not acknoledge some data causing data to remain + outstanding. + 2) Local application calls close() on the socket. Since data + is still outstanding, the association is placed in SHUTDOWN_PENDING + state. However, the socket is closed. + 3) The remote tries to create a new association, triggering a restart + on the local system. The association moves from SHUTDOWN_PENDING + to ESTABLISHED. At this point, it is no longer reachable by + any socket on the local system. + +This patch addresses the above situation by moving the newly ESTABLISHED +association into SHUTDOWN-SENT state and bundling a SHUTDOWN after +the COOKIE-ACK chunk. This way, the restarted associate immidiately +enters the shutdown procedure and forces the termination of the +unreachable association. + +Reported-by: David Laight +Signed-off-by: Vlad Yasevich +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/sctp/command.h | 2 +- + net/sctp/sm_statefuns.c | 19 ++++++++++++++++--- + 2 files changed, 17 insertions(+), 4 deletions(-) + +--- a/include/net/sctp/command.h ++++ b/include/net/sctp/command.h +@@ -115,7 +115,7 @@ typedef enum { + * analysis of the state functions, but in reality just taken from + * thin air in the hopes othat we don't trigger a kernel panic. + */ +-#define SCTP_MAX_NUM_COMMANDS 14 ++#define SCTP_MAX_NUM_COMMANDS 20 + + typedef union { + __s32 i32; +--- a/net/sctp/sm_statefuns.c ++++ b/net/sctp/sm_statefuns.c +@@ -1775,9 +1775,22 @@ static sctp_disposition_t sctp_sf_do_dup + /* Update the content of current association. */ + sctp_add_cmd_sf(commands, SCTP_CMD_UPDATE_ASSOC, SCTP_ASOC(new_asoc)); + sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, SCTP_ULPEVENT(ev)); +- sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE, +- SCTP_STATE(SCTP_STATE_ESTABLISHED)); +- sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(repl)); ++ if (sctp_state(asoc, SHUTDOWN_PENDING) && ++ (sctp_sstate(asoc->base.sk, CLOSING) || ++ sock_flag(asoc->base.sk, SOCK_DEAD))) { ++ /* if were currently in SHUTDOWN_PENDING, but the socket ++ * has been closed by user, don't transition to ESTABLISHED. ++ * Instead trigger SHUTDOWN bundled with COOKIE_ACK. ++ */ ++ sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(repl)); ++ return sctp_sf_do_9_2_start_shutdown(net, ep, asoc, ++ SCTP_ST_CHUNK(0), NULL, ++ commands); ++ } else { ++ sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE, ++ SCTP_STATE(SCTP_STATE_ESTABLISHED)); ++ sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(repl)); ++ } + return SCTP_DISPOSITION_CONSUME; + + nomem_ev: diff --git a/queue-3.16/series b/queue-3.16/series new file mode 100644 index 00000000000..bf4b18f971a --- /dev/null +++ b/queue-3.16/series @@ -0,0 +1,44 @@ +netlink-reset-network-header-before-passing-to-taps.patch +rtnetlink-fix-vf-info-size.patch +net-always-untag-vlan-tagged-traffic-on-input.patch +myri10ge-check-for-dma-mapping-errors.patch +revert-macvlan-simplify-the-structure-port.patch +tcp-don-t-use-timestamp-from-repaired-skb-s-to-calculate-rtt-v2.patch +sit-fix-ipip6_tunnel_lookup-device-matching-criteria.patch +tcp-fix-tcp_release_cb-to-dispatch-via-address-family-for-mtu_reduced.patch +tcp-fix-ssthresh-and-undo-for-consecutive-short-frto-episodes.patch +net-phy-smsc-move-smsc_phy_config_init-reset-part-in-a-soft_reset-function.patch +tipc-fix-message-importance-range-check.patch +packet-handle-too-big-packets-for-packet_v3.patch +bnx2x-revert-undi-flushing-mechanism.patch +net-ipv6-fib-don-t-sleep-inside-atomic-lock.patch +openvswitch-fix-panic-with-multiple-vlan-headers.patch +vxlan-fix-incorrect-initializer-in-union-vxlan_addr.patch +net-fix-checksum-features-handling-in-netif_skb_features.patch +ipv6-fix-rtnl-locking-in-setsockopt-for-anycast-and-multicast.patch +l2tp-fix-race-while-getting-pmtu-on-ppp-pseudo-wire.patch +ipv6-restore-the-behavior-of-ipv6_sock_ac_drop.patch +bonding-fix-div-by-zero-while-enslaving-and-transmitting.patch +net-filter-fix-possible-use-after-free.patch +bridge-check-if-vlan-filtering-is-enabled-only-once.patch +bridge-fix-br_should_learn-to-check-vlan_enabled.patch +net-allow-macvlans-to-move-to-net-namespace.patch +macvlan-allow-to-enqueue-broadcast-pkt-on-virtual-device.patch +tg3-work-around-hw-fw-limitations-with-vlan-encapsulated-frames.patch +tg3-allow-for-recieve-of-full-size-8021ad-frames.patch +xfrm-generate-blackhole-routes-only-from-route-lookup-functions.patch +xfrm-generate-queueing-routes-only-from-route-lookup-functions.patch +ip_tunnel-don-t-allow-to-add-the-same-tunnel-multiple-times.patch +macvtap-fix-race-between-device-delete-and-open.patch +revert-net-macb-add-pinctrl-consumer-support.patch +net-mlx4_core-allow-not-to-specify-probe_vf-in-sriov-ib-mode.patch +net-mlx4-correctly-configure-single-ported-vfs-from-the-host.patch +gro-fix-aggregation-for-skb-using-frag_list.patch +ipv6-remove-rt6i_genid.patch +hyperv-fix-a-bug-in-netvsc_start_xmit.patch +ip6_gre-fix-flowi6_proto-value-in-xmit-path.patch +net-systemport-fix-bcm_sysport_insert_tsb.patch +team-avoid-race-condition-in-scheduling-delayed-work.patch +hyperv-fix-a-bug-in-netvsc_send.patch +sctp-handle-association-restarts-when-the-socket-is-closed.patch +net_sched-copy-exts-type-in-tcf_exts_change.patch diff --git a/queue-3.16/sit-fix-ipip6_tunnel_lookup-device-matching-criteria.patch b/queue-3.16/sit-fix-ipip6_tunnel_lookup-device-matching-criteria.patch new file mode 100644 index 00000000000..f4ec25eb7e5 --- /dev/null +++ b/queue-3.16/sit-fix-ipip6_tunnel_lookup-device-matching-criteria.patch @@ -0,0 +1,55 @@ +From foo@baz Sun Oct 12 20:11:55 CEST 2014 +From: Shmulik Ladkani +Date: Thu, 14 Aug 2014 15:27:20 +0300 +Subject: sit: Fix ipip6_tunnel_lookup device matching criteria + +From: Shmulik Ladkani + +[ Upstream commit bc8fc7b8f825ef17a0fb9e68c18ce94fa66ab337 ] + +As of 4fddbf5d78 ("sit: strictly restrict incoming traffic to tunnel link device"), +when looking up a tunnel, tunnel's underlying interface (t->parms.link) +is verified to match incoming traffic's ingress device. + +However the comparison was incorrectly based on skb->dev->iflink. + +Instead, dev->ifindex should be used, which correctly represents the +interface from which the IP stack hands the ipip6 packets. + +This allows setting up sit tunnels bound to vlan interfaces (otherwise +incoming ipip6 traffic on the vlan interface was dropped due to +ipip6_tunnel_lookup match failure). + +Signed-off-by: Shmulik Ladkani +Acked-by: Nicolas Dichtel +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/sit.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +--- a/net/ipv6/sit.c ++++ b/net/ipv6/sit.c +@@ -101,19 +101,19 @@ static struct ip_tunnel *ipip6_tunnel_lo + for_each_ip_tunnel_rcu(t, sitn->tunnels_r_l[h0 ^ h1]) { + if (local == t->parms.iph.saddr && + remote == t->parms.iph.daddr && +- (!dev || !t->parms.link || dev->iflink == t->parms.link) && ++ (!dev || !t->parms.link || dev->ifindex == t->parms.link) && + (t->dev->flags & IFF_UP)) + return t; + } + for_each_ip_tunnel_rcu(t, sitn->tunnels_r[h0]) { + if (remote == t->parms.iph.daddr && +- (!dev || !t->parms.link || dev->iflink == t->parms.link) && ++ (!dev || !t->parms.link || dev->ifindex == t->parms.link) && + (t->dev->flags & IFF_UP)) + return t; + } + for_each_ip_tunnel_rcu(t, sitn->tunnels_l[h1]) { + if (local == t->parms.iph.saddr && +- (!dev || !t->parms.link || dev->iflink == t->parms.link) && ++ (!dev || !t->parms.link || dev->ifindex == t->parms.link) && + (t->dev->flags & IFF_UP)) + return t; + } diff --git a/queue-3.16/tcp-don-t-use-timestamp-from-repaired-skb-s-to-calculate-rtt-v2.patch b/queue-3.16/tcp-don-t-use-timestamp-from-repaired-skb-s-to-calculate-rtt-v2.patch new file mode 100644 index 00000000000..aeec7e89696 --- /dev/null +++ b/queue-3.16/tcp-don-t-use-timestamp-from-repaired-skb-s-to-calculate-rtt-v2.patch @@ -0,0 +1,116 @@ +From foo@baz Sun Oct 12 20:11:55 CEST 2014 +From: Andrey Vagin +Date: Wed, 13 Aug 2014 16:03:10 +0400 +Subject: tcp: don't use timestamp from repaired skb-s to calculate RTT (v2) + +From: Andrey Vagin + +[ Upstream commit 9d186cac7ffb1831e9f34cb4a3a8b22abb9dd9d4 ] + +We don't know right timestamp for repaired skb-s. Wrong RTT estimations +isn't good, because some congestion modules heavily depends on it. + +This patch adds the TCPCB_REPAIRED flag, which is included in +TCPCB_RETRANS. + +Thanks to Eric for the advice how to fix this issue. + +This patch fixes the warning: +[ 879.562947] WARNING: CPU: 0 PID: 2825 at net/ipv4/tcp_input.c:3078 tcp_ack+0x11f5/0x1380() +[ 879.567253] CPU: 0 PID: 2825 Comm: socket-tcpbuf-l Not tainted 3.16.0-next-20140811 #1 +[ 879.567829] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 +[ 879.568177] 0000000000000000 00000000c532680c ffff880039643d00 ffffffff817aa2d2 +[ 879.568776] 0000000000000000 ffff880039643d38 ffffffff8109afbd ffff880039d6ba80 +[ 879.569386] ffff88003a449800 000000002983d6bd 0000000000000000 000000002983d6bc +[ 879.569982] Call Trace: +[ 879.570264] [] dump_stack+0x4d/0x66 +[ 879.570599] [] warn_slowpath_common+0x7d/0xa0 +[ 879.570935] [] warn_slowpath_null+0x1a/0x20 +[ 879.571292] [] tcp_ack+0x11f5/0x1380 +[ 879.571614] [] tcp_rcv_established+0x1ed/0x710 +[ 879.571958] [] tcp_v4_do_rcv+0x10a/0x370 +[ 879.572315] [] release_sock+0x89/0x1d0 +[ 879.572642] [] do_tcp_setsockopt.isra.36+0x120/0x860 +[ 879.573000] [] ? rcu_read_lock_held+0x6e/0x80 +[ 879.573352] [] tcp_setsockopt+0x32/0x40 +[ 879.573678] [] sock_common_setsockopt+0x14/0x20 +[ 879.574031] [] SyS_setsockopt+0x80/0xf0 +[ 879.574393] [] system_call_fastpath+0x16/0x1b +[ 879.574730] ---[ end trace a17cbc38eb8c5c00 ]--- + +v2: moving setting of skb->when for repaired skb-s in tcp_write_xmit, + where it's set for other skb-s. + +Fixes: 431a91242d8d ("tcp: timestamp SYN+DATA messages") +Fixes: 740b0f1841f6 ("tcp: switch rtt estimations to usec resolution") +Cc: Eric Dumazet +Cc: Pavel Emelyanov +Cc: "David S. Miller" +Signed-off-by: Andrey Vagin +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/tcp.h | 4 +++- + net/ipv4/tcp.c | 14 +++++++------- + net/ipv4/tcp_output.c | 5 ++++- + 3 files changed, 14 insertions(+), 9 deletions(-) + +--- a/include/net/tcp.h ++++ b/include/net/tcp.h +@@ -718,8 +718,10 @@ struct tcp_skb_cb { + #define TCPCB_SACKED_RETRANS 0x02 /* SKB retransmitted */ + #define TCPCB_LOST 0x04 /* SKB is lost */ + #define TCPCB_TAGBITS 0x07 /* All tag bits */ ++#define TCPCB_REPAIRED 0x10 /* SKB repaired (no skb_mstamp) */ + #define TCPCB_EVER_RETRANS 0x80 /* Ever retransmitted frame */ +-#define TCPCB_RETRANS (TCPCB_SACKED_RETRANS|TCPCB_EVER_RETRANS) ++#define TCPCB_RETRANS (TCPCB_SACKED_RETRANS|TCPCB_EVER_RETRANS| \ ++ TCPCB_REPAIRED) + + __u8 ip_dsfield; /* IPv4 tos or IPv6 dsfield */ + /* 1 byte hole */ +--- a/net/ipv4/tcp.c ++++ b/net/ipv4/tcp.c +@@ -1175,13 +1175,6 @@ new_segment: + goto wait_for_memory; + + /* +- * All packets are restored as if they have +- * already been sent. +- */ +- if (tp->repair) +- TCP_SKB_CB(skb)->when = tcp_time_stamp; +- +- /* + * Check whether we can use HW checksum. + */ + if (sk->sk_route_caps & NETIF_F_ALL_CSUM) +@@ -1190,6 +1183,13 @@ new_segment: + skb_entail(sk, skb); + copy = size_goal; + max = size_goal; ++ ++ /* All packets are restored as if they have ++ * already been sent. skb_mstamp isn't set to ++ * avoid wrong rtt estimation. ++ */ ++ if (tp->repair) ++ TCP_SKB_CB(skb)->sacked |= TCPCB_REPAIRED; + } + + /* Try to append data to the end of skb. */ +--- a/net/ipv4/tcp_output.c ++++ b/net/ipv4/tcp_output.c +@@ -1916,8 +1916,11 @@ static bool tcp_write_xmit(struct sock * + tso_segs = tcp_init_tso_segs(sk, skb, mss_now); + BUG_ON(!tso_segs); + +- if (unlikely(tp->repair) && tp->repair_queue == TCP_SEND_QUEUE) ++ if (unlikely(tp->repair) && tp->repair_queue == TCP_SEND_QUEUE) { ++ /* "when" is used as a start point for the retransmit timer */ ++ TCP_SKB_CB(skb)->when = tcp_time_stamp; + goto repair; /* Skip network transmission */ ++ } + + cwnd_quota = tcp_cwnd_test(tp, skb); + if (!cwnd_quota) { diff --git a/queue-3.16/tcp-fix-ssthresh-and-undo-for-consecutive-short-frto-episodes.patch b/queue-3.16/tcp-fix-ssthresh-and-undo-for-consecutive-short-frto-episodes.patch new file mode 100644 index 00000000000..ce3afd7e1ff --- /dev/null +++ b/queue-3.16/tcp-fix-ssthresh-and-undo-for-consecutive-short-frto-episodes.patch @@ -0,0 +1,81 @@ +From foo@baz Sun Oct 12 20:11:55 CEST 2014 +From: Neal Cardwell +Date: Thu, 14 Aug 2014 16:13:07 -0400 +Subject: tcp: fix ssthresh and undo for consecutive short FRTO episodes + +From: Neal Cardwell + +[ Upstream commit 0c9ab09223fe9922baeb22546c9a90d774a4bde6 ] + +Fix TCP FRTO logic so that it always notices when snd_una advances, +indicating that any RTO after that point will be a new and distinct +loss episode. + +Previously there was a very specific sequence that could cause FRTO to +fail to notice a new loss episode had started: + +(1) RTO timer fires, enter FRTO and retransmit packet 1 in write queue +(2) receiver ACKs packet 1 +(3) FRTO sends 2 more packets +(4) RTO timer fires again (should start a new loss episode) + +The problem was in step (3) above, where tcp_process_loss() returned +early (in the spot marked "Step 2.b"), so that it never got to the +logic to clear icsk_retransmits. Thus icsk_retransmits stayed +non-zero. Thus in step (4) tcp_enter_loss() would see the non-zero +icsk_retransmits, decide that this RTO is not a new episode, and +decide not to cut ssthresh and remember the current cwnd and ssthresh +for undo. + +There were two main consequences to the bug that we have +observed. First, ssthresh was not decreased in step (4). Second, when +there was a series of such FRTO (1-4) sequences that happened to be +followed by an FRTO undo, we would restore the cwnd and ssthresh from +before the entire series started (instead of the cwnd and ssthresh +from before the most recent RTO). This could result in cwnd and +ssthresh being restored to values much bigger than the proper values. + +Signed-off-by: Neal Cardwell +Signed-off-by: Yuchung Cheng +Fixes: e33099f96d99c ("tcp: implement RFC5682 F-RTO") +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp_input.c | 8 +++----- + 1 file changed, 3 insertions(+), 5 deletions(-) + +--- a/net/ipv4/tcp_input.c ++++ b/net/ipv4/tcp_input.c +@@ -2680,7 +2680,6 @@ static void tcp_enter_recovery(struct so + */ + static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack) + { +- struct inet_connection_sock *icsk = inet_csk(sk); + struct tcp_sock *tp = tcp_sk(sk); + bool recovered = !before(tp->snd_una, tp->high_seq); + +@@ -2706,12 +2705,9 @@ static void tcp_process_loss(struct sock + + if (recovered) { + /* F-RTO RFC5682 sec 3.1 step 2.a and 1st part of step 3.a */ +- icsk->icsk_retransmits = 0; + tcp_try_undo_recovery(sk); + return; + } +- if (flag & FLAG_DATA_ACKED) +- icsk->icsk_retransmits = 0; + if (tcp_is_reno(tp)) { + /* A Reno DUPACK means new data in F-RTO step 2.b above are + * delivered. Lower inflight to clock out (re)tranmissions. +@@ -3393,8 +3389,10 @@ static int tcp_ack(struct sock *sk, cons + icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) + tcp_rearm_rto(sk); + +- if (after(ack, prior_snd_una)) ++ if (after(ack, prior_snd_una)) { + flag |= FLAG_SND_UNA_ADVANCED; ++ icsk->icsk_retransmits = 0; ++ } + + prior_fackets = tp->fackets_out; + diff --git a/queue-3.16/tcp-fix-tcp_release_cb-to-dispatch-via-address-family-for-mtu_reduced.patch b/queue-3.16/tcp-fix-tcp_release_cb-to-dispatch-via-address-family-for-mtu_reduced.patch new file mode 100644 index 00000000000..7da80f2fdc8 --- /dev/null +++ b/queue-3.16/tcp-fix-tcp_release_cb-to-dispatch-via-address-family-for-mtu_reduced.patch @@ -0,0 +1,134 @@ +From foo@baz Sun Oct 12 20:11:55 CEST 2014 +From: Neal Cardwell +Date: Thu, 14 Aug 2014 12:40:05 -0400 +Subject: tcp: fix tcp_release_cb() to dispatch via address family for mtu_reduced() + +From: Neal Cardwell + +[ Upstream commit 4fab9071950c2021d846e18351e0f46a1cffd67b ] + +Make sure we use the correct address-family-specific function for +handling MTU reductions from within tcp_release_cb(). + +Previously AF_INET6 sockets were incorrectly always using the IPv6 +code path when sometimes they were handling IPv4 traffic and thus had +an IPv4 dst. + +Signed-off-by: Neal Cardwell +Signed-off-by: Eric Dumazet +Diagnosed-by: Willem de Bruijn +Fixes: 563d34d057862 ("tcp: dont drop MTU reduction indications") +Reviewed-by: Hannes Frederic Sowa +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/inet_connection_sock.h | 1 + + include/net/sock.h | 1 - + include/net/tcp.h | 1 + + net/ipv4/tcp_ipv4.c | 5 +++-- + net/ipv4/tcp_output.c | 2 +- + net/ipv6/tcp_ipv6.c | 3 ++- + 6 files changed, 8 insertions(+), 5 deletions(-) + +--- a/include/net/inet_connection_sock.h ++++ b/include/net/inet_connection_sock.h +@@ -62,6 +62,7 @@ struct inet_connection_sock_af_ops { + void (*addr2sockaddr)(struct sock *sk, struct sockaddr *); + int (*bind_conflict)(const struct sock *sk, + const struct inet_bind_bucket *tb, bool relax); ++ void (*mtu_reduced)(struct sock *sk); + }; + + /** inet_connection_sock - INET connection oriented sock +--- a/include/net/sock.h ++++ b/include/net/sock.h +@@ -971,7 +971,6 @@ struct proto { + struct sk_buff *skb); + + void (*release_cb)(struct sock *sk); +- void (*mtu_reduced)(struct sock *sk); + + /* Keeping track of sk's, looking them up, and port selection methods. */ + void (*hash)(struct sock *sk); +--- a/include/net/tcp.h ++++ b/include/net/tcp.h +@@ -448,6 +448,7 @@ const u8 *tcp_parse_md5sig_option(const + */ + + void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb); ++void tcp_v4_mtu_reduced(struct sock *sk); + int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb); + struct sock *tcp_create_openreq_child(struct sock *sk, + struct request_sock *req, +--- a/net/ipv4/tcp_ipv4.c ++++ b/net/ipv4/tcp_ipv4.c +@@ -269,7 +269,7 @@ EXPORT_SYMBOL(tcp_v4_connect); + * It can be called through tcp_release_cb() if socket was owned by user + * at the time tcp_v4_err() was called to handle ICMP message. + */ +-static void tcp_v4_mtu_reduced(struct sock *sk) ++void tcp_v4_mtu_reduced(struct sock *sk) + { + struct dst_entry *dst; + struct inet_sock *inet = inet_sk(sk); +@@ -300,6 +300,7 @@ static void tcp_v4_mtu_reduced(struct so + tcp_simple_retransmit(sk); + } /* else let the usual retransmit timer handle it */ + } ++EXPORT_SYMBOL(tcp_v4_mtu_reduced); + + static void do_redirect(struct sk_buff *skb, struct sock *sk) + { +@@ -1880,6 +1881,7 @@ const struct inet_connection_sock_af_ops + .compat_setsockopt = compat_ip_setsockopt, + .compat_getsockopt = compat_ip_getsockopt, + #endif ++ .mtu_reduced = tcp_v4_mtu_reduced, + }; + EXPORT_SYMBOL(ipv4_specific); + +@@ -2499,7 +2501,6 @@ struct proto tcp_prot = { + .sendpage = tcp_sendpage, + .backlog_rcv = tcp_v4_do_rcv, + .release_cb = tcp_release_cb, +- .mtu_reduced = tcp_v4_mtu_reduced, + .hash = inet_hash, + .unhash = inet_unhash, + .get_port = inet_csk_get_port, +--- a/net/ipv4/tcp_output.c ++++ b/net/ipv4/tcp_output.c +@@ -800,7 +800,7 @@ void tcp_release_cb(struct sock *sk) + __sock_put(sk); + } + if (flags & (1UL << TCP_MTU_REDUCED_DEFERRED)) { +- sk->sk_prot->mtu_reduced(sk); ++ inet_csk(sk)->icsk_af_ops->mtu_reduced(sk); + __sock_put(sk); + } + } +--- a/net/ipv6/tcp_ipv6.c ++++ b/net/ipv6/tcp_ipv6.c +@@ -1681,6 +1681,7 @@ static const struct inet_connection_sock + .compat_setsockopt = compat_ipv6_setsockopt, + .compat_getsockopt = compat_ipv6_getsockopt, + #endif ++ .mtu_reduced = tcp_v6_mtu_reduced, + }; + + #ifdef CONFIG_TCP_MD5SIG +@@ -1711,6 +1712,7 @@ static const struct inet_connection_sock + .compat_setsockopt = compat_ipv6_setsockopt, + .compat_getsockopt = compat_ipv6_getsockopt, + #endif ++ .mtu_reduced = tcp_v4_mtu_reduced, + }; + + #ifdef CONFIG_TCP_MD5SIG +@@ -1950,7 +1952,6 @@ struct proto tcpv6_prot = { + .sendpage = tcp_sendpage, + .backlog_rcv = tcp_v6_do_rcv, + .release_cb = tcp_release_cb, +- .mtu_reduced = tcp_v6_mtu_reduced, + .hash = tcp_v6_hash, + .unhash = inet_unhash, + .get_port = inet_csk_get_port, diff --git a/queue-3.16/team-avoid-race-condition-in-scheduling-delayed-work.patch b/queue-3.16/team-avoid-race-condition-in-scheduling-delayed-work.patch new file mode 100644 index 00000000000..9e2f6ea4aee --- /dev/null +++ b/queue-3.16/team-avoid-race-condition-in-scheduling-delayed-work.patch @@ -0,0 +1,68 @@ +From foo@baz Sun Oct 12 20:11:55 CEST 2014 +From: Joe Lawrence +Date: Fri, 3 Oct 2014 09:58:34 -0400 +Subject: team: avoid race condition in scheduling delayed work + +From: Joe Lawrence + +[ Upstream commit 47549650abd13d873fd2e5fc218db19e21031074 ] + +When team_notify_peers and team_mcast_rejoin are called, they both reset +their respective .count_pending atomic variable. Then when the actual +worker function is executed, the variable is atomically decremented. +This pattern introduces a potential race condition where the +.count_pending rolls over and the worker function keeps rescheduling +until .count_pending decrements to zero again: + +THREAD 1 THREAD 2 + +======== ======== +team_notify_peers(teamX) + atomic_set count_pending = 1 + schedule_delayed_work + team_notify_peers(teamX) + atomic_set count_pending = 1 +team_notify_peers_work + atomic_dec_and_test + count_pending = 0 + (return) + schedule_delayed_work + team_notify_peers_work + atomic_dec_and_test + count_pending = -1 + schedule_delayed_work + (repeat until count_pending = 0) + +Instead of assigning a new value to .count_pending, use atomic_add to +tack-on the additional desired worker function invocations. + +Signed-off-by: Joe Lawrence +Acked-by: Jiri Pirko +Fixes: fc423ff00df3a19554414ee ("team: add peer notification") +Fixes: 492b200efdd20b8fcfdac87 ("team: add support for sending multicast rejoins") +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/team/team.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/drivers/net/team/team.c ++++ b/drivers/net/team/team.c +@@ -647,7 +647,7 @@ static void team_notify_peers(struct tea + { + if (!team->notify_peers.count || !netif_running(team->dev)) + return; +- atomic_set(&team->notify_peers.count_pending, team->notify_peers.count); ++ atomic_add(team->notify_peers.count, &team->notify_peers.count_pending); + schedule_delayed_work(&team->notify_peers.dw, 0); + } + +@@ -687,7 +687,7 @@ static void team_mcast_rejoin(struct tea + { + if (!team->mcast_rejoin.count || !netif_running(team->dev)) + return; +- atomic_set(&team->mcast_rejoin.count_pending, team->mcast_rejoin.count); ++ atomic_add(team->mcast_rejoin.count, &team->mcast_rejoin.count_pending); + schedule_delayed_work(&team->mcast_rejoin.dw, 0); + } + diff --git a/queue-3.16/tg3-allow-for-recieve-of-full-size-8021ad-frames.patch b/queue-3.16/tg3-allow-for-recieve-of-full-size-8021ad-frames.patch new file mode 100644 index 00000000000..4e41db38042 --- /dev/null +++ b/queue-3.16/tg3-allow-for-recieve-of-full-size-8021ad-frames.patch @@ -0,0 +1,41 @@ +From foo@baz Sun Oct 12 20:11:55 CEST 2014 +From: Vlad Yasevich +Date: Tue, 30 Sep 2014 19:39:36 -0400 +Subject: tg3: Allow for recieve of full-size 8021AD frames + +From: Vlad Yasevich + +[ Upstream commit 7d3083ee36b51e425b6abd76778a2046906b0fd3 ] + +When receiving a vlan-tagged frame that still contains +a vlan header, the length of the packet will be greater +then MTU+ETH_HLEN since it will account of the extra +vlan header. TG3 checks this for the case for 802.1Q, +but not for 802.1ad. As a result, full sized 802.1ad +frames get dropped by the card. + +Add a check for 802.1ad protocol when receving full +sized frames. + +Suggested-by: Prashant Sreedharan +CC: Prashant Sreedharan +CC: Michael Chan +Signed-off-by: Vladislav Yasevich +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/broadcom/tg3.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/drivers/net/ethernet/broadcom/tg3.c ++++ b/drivers/net/ethernet/broadcom/tg3.c +@@ -6918,7 +6918,8 @@ static int tg3_rx(struct tg3_napi *tnapi + skb->protocol = eth_type_trans(skb, tp->dev); + + if (len > (tp->dev->mtu + ETH_HLEN) && +- skb->protocol != htons(ETH_P_8021Q)) { ++ skb->protocol != htons(ETH_P_8021Q) && ++ skb->protocol != htons(ETH_P_8021AD)) { + dev_kfree_skb_any(skb); + goto drop_it_no_recycle; + } diff --git a/queue-3.16/tg3-work-around-hw-fw-limitations-with-vlan-encapsulated-frames.patch b/queue-3.16/tg3-work-around-hw-fw-limitations-with-vlan-encapsulated-frames.patch new file mode 100644 index 00000000000..9a9a64cb803 --- /dev/null +++ b/queue-3.16/tg3-work-around-hw-fw-limitations-with-vlan-encapsulated-frames.patch @@ -0,0 +1,68 @@ +From foo@baz Sun Oct 12 20:11:55 CEST 2014 +From: Vlad Yasevich +Date: Thu, 18 Sep 2014 10:31:17 -0400 +Subject: tg3: Work around HW/FW limitations with vlan encapsulated frames + +From: Vlad Yasevich + +[ Upstream commit 476c18850c6cbaa3f2bb661ae9710645081563b9 ] + +TG3 appears to have an issue performing TSO and checksum offloading +correclty when the frame has been vlan encapsulated (non-accelrated). +In these cases, tcp checksum is not correctly updated. + +This patch attempts to work around this issue. After the patch, +802.1ad vlans start working correctly over tg3 devices. + +CC: Prashant Sreedharan +CC: Michael Chan +Signed-off-by: Vladislav Yasevich +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/broadcom/tg3.c | 20 ++++++++++++++++++-- + 1 file changed, 18 insertions(+), 2 deletions(-) + +--- a/drivers/net/ethernet/broadcom/tg3.c ++++ b/drivers/net/ethernet/broadcom/tg3.c +@@ -7914,8 +7914,6 @@ static netdev_tx_t tg3_start_xmit(struct + + entry = tnapi->tx_prod; + base_flags = 0; +- if (skb->ip_summed == CHECKSUM_PARTIAL) +- base_flags |= TXD_FLAG_TCPUDP_CSUM; + + mss = skb_shinfo(skb)->gso_size; + if (mss) { +@@ -7929,6 +7927,13 @@ static netdev_tx_t tg3_start_xmit(struct + + hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb) - ETH_HLEN; + ++ /* HW/FW can not correctly segment packets that have been ++ * vlan encapsulated. ++ */ ++ if (skb->protocol == htons(ETH_P_8021Q) || ++ skb->protocol == htons(ETH_P_8021AD)) ++ return tg3_tso_bug(tp, tnapi, txq, skb); ++ + if (!skb_is_gso_v6(skb)) { + if (unlikely((ETH_HLEN + hdr_len) > 80) && + tg3_flag(tp, TSO_BUG)) +@@ -7979,6 +7984,17 @@ static netdev_tx_t tg3_start_xmit(struct + base_flags |= tsflags << 12; + } + } ++ } else if (skb->ip_summed == CHECKSUM_PARTIAL) { ++ /* HW/FW can not correctly checksum packets that have been ++ * vlan encapsulated. ++ */ ++ if (skb->protocol == htons(ETH_P_8021Q) || ++ skb->protocol == htons(ETH_P_8021AD)) { ++ if (skb_checksum_help(skb)) ++ goto drop; ++ } else { ++ base_flags |= TXD_FLAG_TCPUDP_CSUM; ++ } + } + + if (tg3_flag(tp, USE_JUMBO_BDFLAG) && diff --git a/queue-3.16/tipc-fix-message-importance-range-check.patch b/queue-3.16/tipc-fix-message-importance-range-check.patch new file mode 100644 index 00000000000..300936696fc --- /dev/null +++ b/queue-3.16/tipc-fix-message-importance-range-check.patch @@ -0,0 +1,50 @@ +From foo@baz Sun Oct 12 20:11:55 CEST 2014 +From: Erik Hugne +Date: Fri, 15 Aug 2014 16:44:35 +0200 +Subject: tipc: fix message importance range check + +From: Erik Hugne + +[ Upstream commit ac32c7f705692b92fe12dcbe88fe87136fdfff6f ] + +Commit 3b4f302d8578 ("tipc: eliminate +redundant locking") introduced a bug by removing the sanity check +for message importance, allowing programs to assign any value to +the msg_user field. This will mess up the packet reception logic +and may cause random link resets. + +Signed-off-by: Erik Hugne +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/tipc/port.h | 5 ++++- + net/tipc/socket.c | 2 +- + 2 files changed, 5 insertions(+), 2 deletions(-) + +--- a/net/tipc/port.h ++++ b/net/tipc/port.h +@@ -229,9 +229,12 @@ static inline int tipc_port_importance(s + return msg_importance(&port->phdr); + } + +-static inline void tipc_port_set_importance(struct tipc_port *port, int imp) ++static inline int tipc_port_set_importance(struct tipc_port *port, int imp) + { ++ if (imp > TIPC_CRITICAL_IMPORTANCE) ++ return -EINVAL; + msg_set_importance(&port->phdr, (u32)imp); ++ return 0; + } + + #endif +--- a/net/tipc/socket.c ++++ b/net/tipc/socket.c +@@ -1841,7 +1841,7 @@ static int tipc_setsockopt(struct socket + + switch (opt) { + case TIPC_IMPORTANCE: +- tipc_port_set_importance(port, value); ++ res = tipc_port_set_importance(port, value); + break; + case TIPC_SRC_DROPPABLE: + if (sock->type != SOCK_STREAM) diff --git a/queue-3.16/vxlan-fix-incorrect-initializer-in-union-vxlan_addr.patch b/queue-3.16/vxlan-fix-incorrect-initializer-in-union-vxlan_addr.patch new file mode 100644 index 00000000000..633c4e8502f --- /dev/null +++ b/queue-3.16/vxlan-fix-incorrect-initializer-in-union-vxlan_addr.patch @@ -0,0 +1,74 @@ +From foo@baz Sun Oct 12 20:11:55 CEST 2014 +From: Gerhard Stenzel +Date: Fri, 22 Aug 2014 21:34:16 +0200 +Subject: vxlan: fix incorrect initializer in union vxlan_addr + +From: Gerhard Stenzel + +[ Upstream commit a45e92a599e77ee6a850eabdd0141633fde03915 ] + +The first initializer in the following + + union vxlan_addr ipa = { + .sin.sin_addr.s_addr = tip, + .sa.sa_family = AF_INET, + }; + +is optimised away by the compiler, due to the second initializer, +therefore initialising .sin.sin_addr.s_addr always to 0. +This results in netlink messages indicating a L3 miss never contain the +missed IP address. This was observed with GCC 4.8 and 4.9. I do not know about previous versions. +The problem affects user space programs relying on an IP address being +sent as part of a netlink message indicating a L3 miss. + +Changing + .sa.sa_family = AF_INET, +to + .sin.sin_family = AF_INET, +fixes the problem. + +Signed-off-by: Gerhard Stenzel +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/vxlan.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +--- a/drivers/net/vxlan.c ++++ b/drivers/net/vxlan.c +@@ -1325,7 +1325,7 @@ static int arp_reduce(struct net_device + } else if (vxlan->flags & VXLAN_F_L3MISS) { + union vxlan_addr ipa = { + .sin.sin_addr.s_addr = tip, +- .sa.sa_family = AF_INET, ++ .sin.sin_family = AF_INET, + }; + + vxlan_ip_miss(dev, &ipa); +@@ -1486,7 +1486,7 @@ static int neigh_reduce(struct net_devic + } else if (vxlan->flags & VXLAN_F_L3MISS) { + union vxlan_addr ipa = { + .sin6.sin6_addr = msg->target, +- .sa.sa_family = AF_INET6, ++ .sin6.sin6_family = AF_INET6, + }; + + vxlan_ip_miss(dev, &ipa); +@@ -1519,7 +1519,7 @@ static bool route_shortcircuit(struct ne + if (!n && (vxlan->flags & VXLAN_F_L3MISS)) { + union vxlan_addr ipa = { + .sin.sin_addr.s_addr = pip->daddr, +- .sa.sa_family = AF_INET, ++ .sin.sin_family = AF_INET, + }; + + vxlan_ip_miss(dev, &ipa); +@@ -1540,7 +1540,7 @@ static bool route_shortcircuit(struct ne + if (!n && (vxlan->flags & VXLAN_F_L3MISS)) { + union vxlan_addr ipa = { + .sin6.sin6_addr = pip6->daddr, +- .sa.sa_family = AF_INET6, ++ .sin6.sin6_family = AF_INET6, + }; + + vxlan_ip_miss(dev, &ipa); diff --git a/queue-3.16/xfrm-generate-blackhole-routes-only-from-route-lookup-functions.patch b/queue-3.16/xfrm-generate-blackhole-routes-only-from-route-lookup-functions.patch new file mode 100644 index 00000000000..8f509c78c03 --- /dev/null +++ b/queue-3.16/xfrm-generate-blackhole-routes-only-from-route-lookup-functions.patch @@ -0,0 +1,130 @@ +From foo@baz Sun Oct 12 20:11:55 CEST 2014 +From: Steffen Klassert +Date: Tue, 16 Sep 2014 10:08:40 +0200 +Subject: xfrm: Generate blackhole routes only from route lookup functions + +From: Steffen Klassert + +[ Upstream commit f92ee61982d6da15a9e49664ecd6405a15a2ee56 ] + +Currently we genarate a blackhole route route whenever we have +matching policies but can not resolve the states. Here we assume +that dst_output() is called to kill the balckholed packets. +Unfortunately this assumption is not true in all cases, so +it is possible that these packets leave the system unwanted. + +We fix this by generating blackhole routes only from the +route lookup functions, here we can guarantee a call to +dst_output() afterwards. + +Fixes: 2774c131b1d ("xfrm: Handle blackhole route creation via afinfo.") +Reported-by: Konstantinos Kolelis +Signed-off-by: Steffen Klassert +Signed-off-by: Greg Kroah-Hartman +--- + include/net/dst.h | 15 ++++++++++++++- + net/ipv4/route.c | 6 +++--- + net/ipv6/ip6_output.c | 4 ++-- + net/xfrm/xfrm_policy.c | 18 +++++++++++++++++- + 4 files changed, 36 insertions(+), 7 deletions(-) + +--- a/include/net/dst.h ++++ b/include/net/dst.h +@@ -490,7 +490,16 @@ static inline struct dst_entry *xfrm_loo + int flags) + { + return dst_orig; +-} ++} ++ ++static inline struct dst_entry *xfrm_lookup_route(struct net *net, ++ struct dst_entry *dst_orig, ++ const struct flowi *fl, ++ struct sock *sk, ++ int flags) ++{ ++ return dst_orig; ++} + + static inline struct xfrm_state *dst_xfrm(const struct dst_entry *dst) + { +@@ -502,6 +511,10 @@ struct dst_entry *xfrm_lookup(struct net + const struct flowi *fl, struct sock *sk, + int flags); + ++struct dst_entry *xfrm_lookup_route(struct net *net, struct dst_entry *dst_orig, ++ const struct flowi *fl, struct sock *sk, ++ int flags); ++ + /* skb attached with this dst needs transformation if dst->xfrm is valid */ + static inline struct xfrm_state *dst_xfrm(const struct dst_entry *dst) + { +--- a/net/ipv4/route.c ++++ b/net/ipv4/route.c +@@ -2267,9 +2267,9 @@ struct rtable *ip_route_output_flow(stru + return rt; + + if (flp4->flowi4_proto) +- rt = (struct rtable *) xfrm_lookup(net, &rt->dst, +- flowi4_to_flowi(flp4), +- sk, 0); ++ rt = (struct rtable *)xfrm_lookup_route(net, &rt->dst, ++ flowi4_to_flowi(flp4), ++ sk, 0); + + return rt; + } +--- a/net/ipv6/ip6_output.c ++++ b/net/ipv6/ip6_output.c +@@ -1008,7 +1008,7 @@ struct dst_entry *ip6_dst_lookup_flow(st + if (final_dst) + fl6->daddr = *final_dst; + +- return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0); ++ return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0); + } + EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow); + +@@ -1040,7 +1040,7 @@ struct dst_entry *ip6_sk_dst_lookup_flow + if (final_dst) + fl6->daddr = *final_dst; + +- return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0); ++ return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0); + } + EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow); + +--- a/net/xfrm/xfrm_policy.c ++++ b/net/xfrm/xfrm_policy.c +@@ -2138,7 +2138,7 @@ struct dst_entry *xfrm_lookup(struct net + xfrm_pols_put(pols, drop_pols); + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES); + +- return make_blackhole(net, family, dst_orig); ++ return ERR_PTR(-EREMOTE); + } + + err = -EAGAIN; +@@ -2195,6 +2195,22 @@ dropdst: + } + EXPORT_SYMBOL(xfrm_lookup); + ++/* Callers of xfrm_lookup_route() must ensure a call to dst_output(). ++ * Otherwise we may send out blackholed packets. ++ */ ++struct dst_entry *xfrm_lookup_route(struct net *net, struct dst_entry *dst_orig, ++ const struct flowi *fl, ++ struct sock *sk, int flags) ++{ ++ struct dst_entry *dst = xfrm_lookup(net, dst_orig, fl, sk, flags); ++ ++ if (IS_ERR(dst) && PTR_ERR(dst) == -EREMOTE) ++ return make_blackhole(net, dst_orig->ops->family, dst_orig); ++ ++ return dst; ++} ++EXPORT_SYMBOL(xfrm_lookup_route); ++ + static inline int + xfrm_secpath_reject(int idx, struct sk_buff *skb, const struct flowi *fl) + { diff --git a/queue-3.16/xfrm-generate-queueing-routes-only-from-route-lookup-functions.patch b/queue-3.16/xfrm-generate-queueing-routes-only-from-route-lookup-functions.patch new file mode 100644 index 00000000000..cecadc16d29 --- /dev/null +++ b/queue-3.16/xfrm-generate-queueing-routes-only-from-route-lookup-functions.patch @@ -0,0 +1,149 @@ +From foo@baz Sun Oct 12 20:11:55 CEST 2014 +From: Steffen Klassert +Date: Tue, 16 Sep 2014 10:08:49 +0200 +Subject: xfrm: Generate queueing routes only from route lookup functions + +From: Steffen Klassert + +[ Upstream commit b8c203b2d2fc961bafd53b41d5396bbcdec55998 ] + +Currently we genarate a queueing route if we have matching policies +but can not resolve the states and the sysctl xfrm_larval_drop is +disabled. Here we assume that dst_output() is called to kill the +queued packets. Unfortunately this assumption is not true in all +cases, so it is possible that these packets leave the system unwanted. + +We fix this by generating queueing routes only from the +route lookup functions, here we can guarantee a call to +dst_output() afterwards. + +Fixes: a0073fe18e71 ("xfrm: Add a state resolution packet queue") +Reported-by: Konstantinos Kolelis +Signed-off-by: Steffen Klassert +Signed-off-by: Greg Kroah-Hartman +--- + include/net/dst.h | 1 + + net/xfrm/xfrm_policy.c | 32 ++++++++++++++++++++++++-------- + 2 files changed, 25 insertions(+), 8 deletions(-) + +--- a/include/net/dst.h ++++ b/include/net/dst.h +@@ -480,6 +480,7 @@ void dst_init(void); + /* Flags for xfrm_lookup flags argument. */ + enum { + XFRM_LOOKUP_ICMP = 1 << 0, ++ XFRM_LOOKUP_QUEUE = 1 << 1, + }; + + struct flowi; +--- a/net/xfrm/xfrm_policy.c ++++ b/net/xfrm/xfrm_policy.c +@@ -39,6 +39,11 @@ + #define XFRM_QUEUE_TMO_MAX ((unsigned)(60*HZ)) + #define XFRM_MAX_QUEUE_LEN 100 + ++struct xfrm_flo { ++ struct dst_entry *dst_orig; ++ u8 flags; ++}; ++ + static DEFINE_SPINLOCK(xfrm_policy_afinfo_lock); + static struct xfrm_policy_afinfo __rcu *xfrm_policy_afinfo[NPROTO] + __read_mostly; +@@ -1877,13 +1882,14 @@ static int xdst_queue_output(struct sock + } + + static struct xfrm_dst *xfrm_create_dummy_bundle(struct net *net, +- struct dst_entry *dst, ++ struct xfrm_flo *xflo, + const struct flowi *fl, + int num_xfrms, + u16 family) + { + int err; + struct net_device *dev; ++ struct dst_entry *dst; + struct dst_entry *dst1; + struct xfrm_dst *xdst; + +@@ -1891,9 +1897,12 @@ static struct xfrm_dst *xfrm_create_dumm + if (IS_ERR(xdst)) + return xdst; + +- if (net->xfrm.sysctl_larval_drop || num_xfrms <= 0) ++ if (!(xflo->flags & XFRM_LOOKUP_QUEUE) || ++ net->xfrm.sysctl_larval_drop || ++ num_xfrms <= 0) + return xdst; + ++ dst = xflo->dst_orig; + dst1 = &xdst->u.dst; + dst_hold(dst); + xdst->route = dst; +@@ -1935,7 +1944,7 @@ static struct flow_cache_object * + xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir, + struct flow_cache_object *oldflo, void *ctx) + { +- struct dst_entry *dst_orig = (struct dst_entry *)ctx; ++ struct xfrm_flo *xflo = (struct xfrm_flo *)ctx; + struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX]; + struct xfrm_dst *xdst, *new_xdst; + int num_pols = 0, num_xfrms = 0, i, err, pol_dead; +@@ -1976,7 +1985,8 @@ xfrm_bundle_lookup(struct net *net, cons + goto make_dummy_bundle; + } + +- new_xdst = xfrm_resolve_and_create_bundle(pols, num_pols, fl, family, dst_orig); ++ new_xdst = xfrm_resolve_and_create_bundle(pols, num_pols, fl, family, ++ xflo->dst_orig); + if (IS_ERR(new_xdst)) { + err = PTR_ERR(new_xdst); + if (err != -EAGAIN) +@@ -2010,7 +2020,7 @@ make_dummy_bundle: + /* We found policies, but there's no bundles to instantiate: + * either because the policy blocks, has no transformations or + * we could not build template (no xfrm_states).*/ +- xdst = xfrm_create_dummy_bundle(net, dst_orig, fl, num_xfrms, family); ++ xdst = xfrm_create_dummy_bundle(net, xflo, fl, num_xfrms, family); + if (IS_ERR(xdst)) { + xfrm_pols_put(pols, num_pols); + return ERR_CAST(xdst); +@@ -2104,13 +2114,18 @@ struct dst_entry *xfrm_lookup(struct net + } + + if (xdst == NULL) { ++ struct xfrm_flo xflo; ++ ++ xflo.dst_orig = dst_orig; ++ xflo.flags = flags; ++ + /* To accelerate a bit... */ + if ((dst_orig->flags & DST_NOXFRM) || + !net->xfrm.policy_count[XFRM_POLICY_OUT]) + goto nopol; + + flo = flow_cache_lookup(net, fl, family, dir, +- xfrm_bundle_lookup, dst_orig); ++ xfrm_bundle_lookup, &xflo); + if (flo == NULL) + goto nopol; + if (IS_ERR(flo)) { +@@ -2202,7 +2217,8 @@ struct dst_entry *xfrm_lookup_route(stru + const struct flowi *fl, + struct sock *sk, int flags) + { +- struct dst_entry *dst = xfrm_lookup(net, dst_orig, fl, sk, flags); ++ struct dst_entry *dst = xfrm_lookup(net, dst_orig, fl, sk, ++ flags | XFRM_LOOKUP_QUEUE); + + if (IS_ERR(dst) && PTR_ERR(dst) == -EREMOTE) + return make_blackhole(net, dst_orig->ops->family, dst_orig); +@@ -2476,7 +2492,7 @@ int __xfrm_route_forward(struct sk_buff + + skb_dst_force(skb); + +- dst = xfrm_lookup(net, skb_dst(skb), &fl, NULL, 0); ++ dst = xfrm_lookup(net, skb_dst(skb), &fl, NULL, XFRM_LOOKUP_QUEUE); + if (IS_ERR(dst)) { + res = 0; + dst = NULL;