]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
4.9-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 31 May 2017 00:14:32 +0000 (09:14 +0900)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 31 May 2017 00:14:32 +0000 (09:14 +0900)
added patches:
be2net-fix-offload-features-for-q-in-q-packets.patch
bonding-fix-accounting-of-active-ports-in-3ad.patch
bpf-add-bpf_clone_redirect-to-bpf_helper_changes_pkt_data.patch
bpf-arm64-fix-faulty-emission-of-map-access-in-tail-calls.patch
bridge-netlink-check-vlan_default_pvid-range.patch
bridge-start-hello_timer-when-enabling-kernel_stp-in-br_stp_start.patch
dccp-tcp-do-not-inherit-mc_list-from-parent.patch
driver-vrf-fix-one-possible-use-after-free-issue.patch
ipv4-add-reference-counting-to-metrics.patch
ipv6-check-ip6_find_1stfragopt-return-value-properly.patch
ipv6-dccp-do-not-inherit-ipv6_mc_list-from-parent.patch
ipv6-fix-out-of-bound-writes-in-__ip6_append_data.patch
ipv6-prevent-overrun-when-parsing-v6-header-options.patch
net-fix-compile-error-in-skb_orphan_partial.patch
net-improve-handling-of-failures-on-link-and-route-dumps.patch
net-mlx5-avoid-using-pending-command-interface-slots.patch
net-mlx5e-fix-ethtool-pause-support-and-advertise-reporting.patch
net-mlx5e-use-the-correct-pause-values-for-ethtool-advertising.patch
net-packet-fix-missing-net_device-reference-release.patch
net-phy-marvell-limit-errata-to-88m1101.patch
netem-fix-skb_orphan_partial.patch
qmi_wwan-add-another-lenovo-em74xx-device-id.patch
s390-qeth-add-missing-hash-table-initializations.patch
s390-qeth-avoid-null-pointer-dereference-on-osn.patch
s390-qeth-handle-sysfs-error-during-initialization.patch
s390-qeth-unbreak-osm-and-osn-support.patch
sctp-do-not-inherit-ipv6_-mc-ac-fl-_list-from-parent.patch
sctp-fix-icmp-processing-if-skb-is-non-linear.patch
sctp-fix-src-address-selection-if-using-secondary-addresses-for-ipv6.patch
tcp-avoid-fastopen-api-to-be-used-on-af_unspec.patch
tcp-avoid-fragmenting-peculiar-skbs-in-sack.patch
tcp-eliminate-negative-reordering-in-tcp_clean_rtx_queue.patch
virtio-net-enable-tso-checksum-offloads-for-q-in-q-vlans.patch
vlan-fix-tcp-checksum-offloads-in-q-in-q-vlans.patch

34 files changed:
queue-4.9/be2net-fix-offload-features-for-q-in-q-packets.patch [new file with mode: 0644]
queue-4.9/bonding-fix-accounting-of-active-ports-in-3ad.patch [new file with mode: 0644]
queue-4.9/bpf-add-bpf_clone_redirect-to-bpf_helper_changes_pkt_data.patch [new file with mode: 0644]
queue-4.9/bpf-arm64-fix-faulty-emission-of-map-access-in-tail-calls.patch [new file with mode: 0644]
queue-4.9/bridge-netlink-check-vlan_default_pvid-range.patch [new file with mode: 0644]
queue-4.9/bridge-start-hello_timer-when-enabling-kernel_stp-in-br_stp_start.patch [new file with mode: 0644]
queue-4.9/dccp-tcp-do-not-inherit-mc_list-from-parent.patch [new file with mode: 0644]
queue-4.9/driver-vrf-fix-one-possible-use-after-free-issue.patch [new file with mode: 0644]
queue-4.9/ipv4-add-reference-counting-to-metrics.patch [new file with mode: 0644]
queue-4.9/ipv6-check-ip6_find_1stfragopt-return-value-properly.patch [new file with mode: 0644]
queue-4.9/ipv6-dccp-do-not-inherit-ipv6_mc_list-from-parent.patch [new file with mode: 0644]
queue-4.9/ipv6-fix-out-of-bound-writes-in-__ip6_append_data.patch [new file with mode: 0644]
queue-4.9/ipv6-prevent-overrun-when-parsing-v6-header-options.patch [new file with mode: 0644]
queue-4.9/net-fix-compile-error-in-skb_orphan_partial.patch [new file with mode: 0644]
queue-4.9/net-improve-handling-of-failures-on-link-and-route-dumps.patch [new file with mode: 0644]
queue-4.9/net-mlx5-avoid-using-pending-command-interface-slots.patch [new file with mode: 0644]
queue-4.9/net-mlx5e-fix-ethtool-pause-support-and-advertise-reporting.patch [new file with mode: 0644]
queue-4.9/net-mlx5e-use-the-correct-pause-values-for-ethtool-advertising.patch [new file with mode: 0644]
queue-4.9/net-packet-fix-missing-net_device-reference-release.patch [new file with mode: 0644]
queue-4.9/net-phy-marvell-limit-errata-to-88m1101.patch [new file with mode: 0644]
queue-4.9/netem-fix-skb_orphan_partial.patch [new file with mode: 0644]
queue-4.9/qmi_wwan-add-another-lenovo-em74xx-device-id.patch [new file with mode: 0644]
queue-4.9/s390-qeth-add-missing-hash-table-initializations.patch [new file with mode: 0644]
queue-4.9/s390-qeth-avoid-null-pointer-dereference-on-osn.patch [new file with mode: 0644]
queue-4.9/s390-qeth-handle-sysfs-error-during-initialization.patch [new file with mode: 0644]
queue-4.9/s390-qeth-unbreak-osm-and-osn-support.patch [new file with mode: 0644]
queue-4.9/sctp-do-not-inherit-ipv6_-mc-ac-fl-_list-from-parent.patch [new file with mode: 0644]
queue-4.9/sctp-fix-icmp-processing-if-skb-is-non-linear.patch [new file with mode: 0644]
queue-4.9/sctp-fix-src-address-selection-if-using-secondary-addresses-for-ipv6.patch [new file with mode: 0644]
queue-4.9/tcp-avoid-fastopen-api-to-be-used-on-af_unspec.patch [new file with mode: 0644]
queue-4.9/tcp-avoid-fragmenting-peculiar-skbs-in-sack.patch [new file with mode: 0644]
queue-4.9/tcp-eliminate-negative-reordering-in-tcp_clean_rtx_queue.patch [new file with mode: 0644]
queue-4.9/virtio-net-enable-tso-checksum-offloads-for-q-in-q-vlans.patch [new file with mode: 0644]
queue-4.9/vlan-fix-tcp-checksum-offloads-in-q-in-q-vlans.patch [new file with mode: 0644]

diff --git a/queue-4.9/be2net-fix-offload-features-for-q-in-q-packets.patch b/queue-4.9/be2net-fix-offload-features-for-q-in-q-packets.patch
new file mode 100644 (file)
index 0000000..253f780
--- /dev/null
@@ -0,0 +1,44 @@
+From foo@baz Wed May 31 09:13:34 JST 2017
+From: Vlad Yasevich <vyasevich@gmail.com>
+Date: Tue, 23 May 2017 13:38:42 -0400
+Subject: be2net: Fix offload features for Q-in-Q packets
+
+From: Vlad Yasevich <vyasevich@gmail.com>
+
+
+[ Upstream commit cc6e9de62a7f84c9293a2ea41bc412b55bb46e85 ]
+
+At least some of the be2net cards do not seem to be capabled
+of performing checksum offload computions on Q-in-Q packets.
+In these case, the recevied checksum on the remote is invalid
+and TCP syn packets are dropped.
+
+This patch adds a call to check disbled acceleration features
+on Q-in-Q tagged traffic.
+
+CC: Sathya Perla <sathya.perla@broadcom.com>
+CC: Ajit Khaparde <ajit.khaparde@broadcom.com>
+CC: Sriharsha Basavapatna <sriharsha.basavapatna@broadcom.com>
+CC: Somnath Kotur <somnath.kotur@broadcom.com>
+Signed-off-by: Vladislav Yasevich <vyasevic@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/emulex/benet/be_main.c |    4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/emulex/benet/be_main.c
++++ b/drivers/net/ethernet/emulex/benet/be_main.c
+@@ -5144,9 +5144,11 @@ static netdev_features_t be_features_che
+       struct be_adapter *adapter = netdev_priv(dev);
+       u8 l4_hdr = 0;
+-      /* The code below restricts offload features for some tunneled packets.
++      /* The code below restricts offload features for some tunneled and
++       * Q-in-Q packets.
+        * Offload features for normal (non tunnel) packets are unchanged.
+        */
++      features = vlan_features_check(skb, features);
+       if (!skb->encapsulation ||
+           !(adapter->flags & BE_FLAGS_VXLAN_OFFLOADS))
+               return features;
diff --git a/queue-4.9/bonding-fix-accounting-of-active-ports-in-3ad.patch b/queue-4.9/bonding-fix-accounting-of-active-ports-in-3ad.patch
new file mode 100644 (file)
index 0000000..da68ed3
--- /dev/null
@@ -0,0 +1,70 @@
+From foo@baz Wed May 31 09:13:34 JST 2017
+From: Jarod Wilson <jarod@redhat.com>
+Date: Fri, 19 May 2017 19:43:45 -0400
+Subject: bonding: fix accounting of active ports in 3ad
+
+From: Jarod Wilson <jarod@redhat.com>
+
+
+[ Upstream commit 751da2a69b7cc82d83dc310ed7606225f2d6e014 ]
+
+As of 7bb11dc9f59d and 0622cab0341c, bond slaves in a 3ad bond are not
+removed from the aggregator when they are down, and the active slave count
+is NOT equal to number of ports in the aggregator, but rather the number
+of ports in the aggregator that are still enabled. The sysfs spew for
+bonding_show_ad_num_ports() has a comment that says "Show number of active
+802.3ad ports.", but it's currently showing total number of ports, both
+active and inactive. Remedy it by using the same logic introduced in
+0622cab0341c in __bond_3ad_get_active_agg_info(), so sysfs, procfs and
+netlink all report the number of active ports. Note that this means that
+IFLA_BOND_AD_INFO_NUM_PORTS really means NUM_ACTIVE_PORTS instead of
+NUM_PORTS, and thus perhaps should be renamed for clarity.
+
+Lightly tested on a dual i40e lacp bond, simulating link downs with an ip
+link set dev <slave2> down, was able to produce the state where I could
+see both in the same aggregator, but a number of ports count of 1.
+
+MII Status: up
+Active Aggregator Info:
+        Aggregator ID: 1
+        Number of ports: 2 <---
+Slave Interface: ens10
+MII Status: up <---
+Aggregator ID: 1
+Slave Interface: ens11
+MII Status: up
+Aggregator ID: 1
+
+MII Status: up
+Active Aggregator Info:
+        Aggregator ID: 1
+        Number of ports: 1 <---
+Slave Interface: ens10
+MII Status: down <---
+Aggregator ID: 1
+Slave Interface: ens11
+MII Status: up
+Aggregator ID: 1
+
+CC: Jay Vosburgh <j.vosburgh@gmail.com>
+CC: Veaceslav Falico <vfalico@gmail.com>
+CC: Andy Gospodarek <andy@greyhouse.net>
+CC: netdev@vger.kernel.org
+Signed-off-by: Jarod Wilson <jarod@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/bonding/bond_3ad.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/bonding/bond_3ad.c
++++ b/drivers/net/bonding/bond_3ad.c
+@@ -2573,7 +2573,7 @@ int __bond_3ad_get_active_agg_info(struc
+               return -1;
+       ad_info->aggregator_id = aggregator->aggregator_identifier;
+-      ad_info->ports = aggregator->num_of_ports;
++      ad_info->ports = __agg_active_ports(aggregator);
+       ad_info->actor_key = aggregator->actor_oper_aggregator_key;
+       ad_info->partner_key = aggregator->partner_oper_aggregator_key;
+       ether_addr_copy(ad_info->partner_system,
diff --git a/queue-4.9/bpf-add-bpf_clone_redirect-to-bpf_helper_changes_pkt_data.patch b/queue-4.9/bpf-add-bpf_clone_redirect-to-bpf_helper_changes_pkt_data.patch
new file mode 100644 (file)
index 0000000..10506a2
--- /dev/null
@@ -0,0 +1,34 @@
+From foo@baz Wed May 31 09:13:34 JST 2017
+From: Daniel Borkmann <daniel@iogearbox.net>
+Date: Thu, 25 May 2017 01:05:07 +0200
+Subject: bpf: add bpf_clone_redirect to bpf_helper_changes_pkt_data
+
+From: Daniel Borkmann <daniel@iogearbox.net>
+
+
+[ Upstream commit 41703a731066fde79c3e5ccf3391cf77a98aeda5 ]
+
+The bpf_clone_redirect() still needs to be listed in
+bpf_helper_changes_pkt_data() since we call into
+bpf_try_make_head_writable() from there, thus we need
+to invalidate prior pkt regs as well.
+
+Fixes: 36bbef52c7eb ("bpf: direct packet write and access for helpers for clsact progs")
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Acked-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/filter.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/net/core/filter.c
++++ b/net/core/filter.c
+@@ -2198,6 +2198,7 @@ bool bpf_helper_changes_skb_data(void *f
+           func == bpf_skb_change_proto ||
+           func == bpf_skb_change_tail ||
+           func == bpf_skb_pull_data ||
++          func == bpf_clone_redirect ||
+           func == bpf_l3_csum_replace ||
+           func == bpf_l4_csum_replace)
+               return true;
diff --git a/queue-4.9/bpf-arm64-fix-faulty-emission-of-map-access-in-tail-calls.patch b/queue-4.9/bpf-arm64-fix-faulty-emission-of-map-access-in-tail-calls.patch
new file mode 100644 (file)
index 0000000..b880fa0
--- /dev/null
@@ -0,0 +1,73 @@
+From foo@baz Wed May 31 09:13:34 JST 2017
+From: Daniel Borkmann <daniel@iogearbox.net>
+Date: Thu, 11 May 2017 01:53:15 +0200
+Subject: bpf, arm64: fix faulty emission of map access in tail calls
+
+From: Daniel Borkmann <daniel@iogearbox.net>
+
+
+[ Upstream commit d8b54110ee944de522ccd3531191f39986ec20f9 ]
+
+Shubham was recently asking on netdev why in arm64 JIT we don't multiply
+the index for accessing the tail call map by 8. That led me into testing
+out arm64 JIT wrt tail calls and it turned out I got a NULL pointer
+dereference on the tail call.
+
+The buggy access is at:
+
+  prog = array->ptrs[index];
+  if (prog == NULL)
+      goto out;
+
+  [...]
+  00000060:  d2800e0a  mov x10, #0x70 // #112
+  00000064:  f86a682a  ldr x10, [x1,x10]
+  00000068:  f862694b  ldr x11, [x10,x2]
+  0000006c:  b40000ab  cbz x11, 0x00000080
+  [...]
+
+The code triggering the crash is f862694b. x1 at the time contains the
+address of the bpf array, x10 offsetof(struct bpf_array, ptrs). Meaning,
+above we load the pointer to the program at map slot 0 into x10. x10
+can then be NULL if the slot is not occupied, which we later on try to
+access with a user given offset in x2 that is the map index.
+
+Fix this by emitting the following instead:
+
+  [...]
+  00000060:  d2800e0a  mov x10, #0x70 // #112
+  00000064:  8b0a002a  add x10, x1, x10
+  00000068:  d37df04b  lsl x11, x2, #3
+  0000006c:  f86b694b  ldr x11, [x10,x11]
+  00000070:  b40000ab  cbz x11, 0x00000084
+  [...]
+
+This basically adds the offset to ptrs to the base address of the bpf
+array we got and we later on access the map with an index * 8 offset
+relative to that. The tail call map itself is basically one large area
+with meta data at the head followed by the array of prog pointers.
+This makes tail calls working again, tested on Cavium ThunderX ARMv8.
+
+Fixes: ddb55992b04d ("arm64: bpf: implement bpf_tail_call() helper")
+Reported-by: Shubham Bansal <illusionist.neo@gmail.com>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/arm64/net/bpf_jit_comp.c |    5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+--- a/arch/arm64/net/bpf_jit_comp.c
++++ b/arch/arm64/net/bpf_jit_comp.c
+@@ -252,8 +252,9 @@ static int emit_bpf_tail_call(struct jit
+        */
+       off = offsetof(struct bpf_array, ptrs);
+       emit_a64_mov_i64(tmp, off, ctx);
+-      emit(A64_LDR64(tmp, r2, tmp), ctx);
+-      emit(A64_LDR64(prg, tmp, r3), ctx);
++      emit(A64_ADD(1, tmp, r2, tmp), ctx);
++      emit(A64_LSL(1, prg, r3, 3), ctx);
++      emit(A64_LDR64(prg, tmp, prg), ctx);
+       emit(A64_CBZ(1, prg, jmp_offset), ctx);
+       /* goto *(prog->bpf_func + prologue_size); */
diff --git a/queue-4.9/bridge-netlink-check-vlan_default_pvid-range.patch b/queue-4.9/bridge-netlink-check-vlan_default_pvid-range.patch
new file mode 100644 (file)
index 0000000..90f8179
--- /dev/null
@@ -0,0 +1,53 @@
+From foo@baz Wed May 31 09:13:34 JST 2017
+From: Tobias Jungel <tobias.jungel@bisdn.de>
+Date: Wed, 17 May 2017 09:29:12 +0200
+Subject: bridge: netlink: check vlan_default_pvid range
+
+From: Tobias Jungel <tobias.jungel@bisdn.de>
+
+
+[ Upstream commit a285860211bf257b0e6d522dac6006794be348af ]
+
+Currently it is allowed to set the default pvid of a bridge to a value
+above VLAN_VID_MASK (0xfff). This patch adds a check to br_validate and
+returns -EINVAL in case the pvid is out of bounds.
+
+Reproduce by calling:
+
+[root@test ~]# ip l a type bridge
+[root@test ~]# ip l a type dummy
+[root@test ~]# ip l s bridge0 type bridge vlan_filtering 1
+[root@test ~]# ip l s bridge0 type bridge vlan_default_pvid 9999
+[root@test ~]# ip l s dummy0 master bridge0
+[root@test ~]# bridge vlan
+port   vlan ids
+bridge0         9999 PVID Egress Untagged
+
+dummy0  9999 PVID Egress Untagged
+
+Fixes: 0f963b7592ef ("bridge: netlink: add support for default_pvid")
+Acked-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
+Signed-off-by: Tobias Jungel <tobias.jungel@bisdn.de>
+Acked-by: Sabrina Dubroca <sd@queasysnail.net>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/bridge/br_netlink.c |    7 +++++++
+ 1 file changed, 7 insertions(+)
+
+--- a/net/bridge/br_netlink.c
++++ b/net/bridge/br_netlink.c
+@@ -776,6 +776,13 @@ static int br_validate(struct nlattr *tb
+                       return -EPROTONOSUPPORT;
+               }
+       }
++
++      if (data[IFLA_BR_VLAN_DEFAULT_PVID]) {
++              __u16 defpvid = nla_get_u16(data[IFLA_BR_VLAN_DEFAULT_PVID]);
++
++              if (defpvid >= VLAN_VID_MASK)
++                      return -EINVAL;
++      }
+ #endif
+       return 0;
diff --git a/queue-4.9/bridge-start-hello_timer-when-enabling-kernel_stp-in-br_stp_start.patch b/queue-4.9/bridge-start-hello_timer-when-enabling-kernel_stp-in-br_stp_start.patch
new file mode 100644 (file)
index 0000000..1476ac6
--- /dev/null
@@ -0,0 +1,58 @@
+From foo@baz Wed May 31 09:13:34 JST 2017
+From: Xin Long <lucien.xin@gmail.com>
+Date: Fri, 19 May 2017 22:20:29 +0800
+Subject: bridge: start hello_timer when enabling KERNEL_STP in br_stp_start
+
+From: Xin Long <lucien.xin@gmail.com>
+
+
+[ Upstream commit 6d18c732b95c0a9d35e9f978b4438bba15412284 ]
+
+Since commit 76b91c32dd86 ("bridge: stp: when using userspace stp stop
+kernel hello and hold timers"), bridge would not start hello_timer if
+stp_enabled is not KERNEL_STP when br_dev_open.
+
+The problem is even if users set stp_enabled with KERNEL_STP later,
+the timer will still not be started. It causes that KERNEL_STP can
+not really work. Users have to re-ifup the bridge to avoid this.
+
+This patch is to fix it by starting br->hello_timer when enabling
+KERNEL_STP in br_stp_start.
+
+As an improvement, it's also to start hello_timer again only when
+br->stp_enabled is KERNEL_STP in br_hello_timer_expired, there is
+no reason to start the timer again when it's NO_STP.
+
+Fixes: 76b91c32dd86 ("bridge: stp: when using userspace stp stop kernel hello and hold timers")
+Reported-by: Haidong Li <haili@redhat.com>
+Signed-off-by: Xin Long <lucien.xin@gmail.com>
+Acked-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
+Reviewed-by: Ivan Vecera <cera@cera.cz>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/bridge/br_stp_if.c    |    1 +
+ net/bridge/br_stp_timer.c |    2 +-
+ 2 files changed, 2 insertions(+), 1 deletion(-)
+
+--- a/net/bridge/br_stp_if.c
++++ b/net/bridge/br_stp_if.c
+@@ -185,6 +185,7 @@ static void br_stp_start(struct net_brid
+               br_debug(br, "using kernel STP\n");
+               /* To start timers on any ports left in blocking */
++              mod_timer(&br->hello_timer, jiffies + br->hello_time);
+               br_port_state_selection(br);
+       }
+--- a/net/bridge/br_stp_timer.c
++++ b/net/bridge/br_stp_timer.c
+@@ -40,7 +40,7 @@ static void br_hello_timer_expired(unsig
+       if (br->dev->flags & IFF_UP) {
+               br_config_bpdu_generation(br);
+-              if (br->stp_enabled != BR_USER_STP)
++              if (br->stp_enabled == BR_KERNEL_STP)
+                       mod_timer(&br->hello_timer,
+                                 round_jiffies(jiffies + br->hello_time));
+       }
diff --git a/queue-4.9/dccp-tcp-do-not-inherit-mc_list-from-parent.patch b/queue-4.9/dccp-tcp-do-not-inherit-mc_list-from-parent.patch
new file mode 100644 (file)
index 0000000..2ee0448
--- /dev/null
@@ -0,0 +1,42 @@
+From foo@baz Wed May 31 09:13:34 JST 2017
+From: Eric Dumazet <edumazet@google.com>
+Date: Tue, 9 May 2017 06:29:19 -0700
+Subject: dccp/tcp: do not inherit mc_list from parent
+
+From: Eric Dumazet <edumazet@google.com>
+
+
+[ Upstream commit 657831ffc38e30092a2d5f03d385d710eb88b09a ]
+
+syzkaller found a way to trigger double frees from ip_mc_drop_socket()
+
+It turns out that leave a copy of parent mc_list at accept() time,
+which is very bad.
+
+Very similar to commit 8b485ce69876 ("tcp: do not inherit
+fastopen_req from parent")
+
+Initial report from Pray3r, completed by Andrey one.
+Thanks a lot to them !
+
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: Pray3r <pray3r.z@gmail.com>
+Reported-by: Andrey Konovalov <andreyknvl@google.com>
+Tested-by: Andrey Konovalov <andreyknvl@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/inet_connection_sock.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/net/ipv4/inet_connection_sock.c
++++ b/net/ipv4/inet_connection_sock.c
+@@ -665,6 +665,8 @@ struct sock *inet_csk_clone_lock(const s
+               /* listeners have SOCK_RCU_FREE, not the children */
+               sock_reset_flag(newsk, SOCK_RCU_FREE);
++              inet_sk(newsk)->mc_list = NULL;
++
+               newsk->sk_mark = inet_rsk(req)->ir_mark;
+               atomic64_set(&newsk->sk_cookie,
+                            atomic64_read(&inet_rsk(req)->ir_cookie));
diff --git a/queue-4.9/driver-vrf-fix-one-possible-use-after-free-issue.patch b/queue-4.9/driver-vrf-fix-one-possible-use-after-free-issue.patch
new file mode 100644 (file)
index 0000000..bf00df3
--- /dev/null
@@ -0,0 +1,60 @@
+From foo@baz Wed May 31 09:13:34 JST 2017
+From: Gao Feng <gfree.wind@vip.163.com>
+Date: Tue, 9 May 2017 18:27:33 +0800
+Subject: driver: vrf: Fix one possible use-after-free issue
+
+From: Gao Feng <gfree.wind@vip.163.com>
+
+
+[ Upstream commit 1a4a5bf52a4adb477adb075e5afce925824ad132 ]
+
+The current codes only deal with the case that the skb is dropped, it
+may meet one use-after-free issue when NF_HOOK returns 0 that means
+the skb is stolen by one netfilter rule or hook.
+
+When one netfilter rule or hook stoles the skb and return NF_STOLEN,
+it means the skb is taken by the rule, and other modules should not
+touch this skb ever. Maybe the skb is queued or freed directly by the
+rule.
+
+Now uses the nf_hook instead of NF_HOOK to get the result of netfilter,
+and check the return value of nf_hook. Only when its value equals 1, it
+means the skb could go ahead. Or reset the skb as NULL.
+
+BTW, because vrf_rcv_finish is empty function, so needn't invoke it
+even though nf_hook returns 1. But we need to modify vrf_rcv_finish
+to deal with the NF_STOLEN case.
+
+There are two cases when skb is stolen.
+1. The skb is stolen and freed directly.
+   There is nothing we need to do, and vrf_rcv_finish isn't invoked.
+2. The skb is queued and reinjected again.
+   The vrf_rcv_finish would be invoked as okfn, so need to free the
+   skb in it.
+
+Signed-off-by: Gao Feng <gfree.wind@vip.163.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/vrf.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/drivers/net/vrf.c
++++ b/drivers/net/vrf.c
+@@ -850,6 +850,7 @@ static u32 vrf_fib_table(const struct ne
+ static int vrf_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
+ {
++      kfree_skb(skb);
+       return 0;
+ }
+@@ -859,7 +860,7 @@ static struct sk_buff *vrf_rcv_nfhook(u8
+ {
+       struct net *net = dev_net(dev);
+-      if (NF_HOOK(pf, hook, net, NULL, skb, dev, NULL, vrf_rcv_finish) < 0)
++      if (nf_hook(pf, hook, net, NULL, skb, dev, NULL, vrf_rcv_finish) != 1)
+               skb = NULL;    /* kfree_skb(skb) handled by nf code */
+       return skb;
diff --git a/queue-4.9/ipv4-add-reference-counting-to-metrics.patch b/queue-4.9/ipv4-add-reference-counting-to-metrics.patch
new file mode 100644 (file)
index 0000000..818fed5
--- /dev/null
@@ -0,0 +1,254 @@
+From foo@baz Wed May 31 09:13:34 JST 2017
+From: Eric Dumazet <edumazet@google.com>
+Date: Thu, 25 May 2017 14:27:35 -0700
+Subject: ipv4: add reference counting to metrics
+
+From: Eric Dumazet <edumazet@google.com>
+
+
+[ Upstream commit 3fb07daff8e99243366a081e5129560734de4ada ]
+
+Andrey Konovalov reported crashes in ipv4_mtu()
+
+I could reproduce the issue with KASAN kernels, between
+10.246.7.151 and 10.246.7.152 :
+
+1) 20 concurrent netperf -t TCP_RR -H 10.246.7.152 -l 1000 &
+
+2) At the same time run following loop :
+while :
+do
+ ip ro add 10.246.7.152 dev eth0 src 10.246.7.151 mtu 1500
+ ip ro del 10.246.7.152 dev eth0 src 10.246.7.151 mtu 1500
+done
+
+Cong Wang attempted to add back rt->fi in commit
+82486aa6f1b9 ("ipv4: restore rt->fi for reference counting")
+but this proved to add some issues that were complex to solve.
+
+Instead, I suggested to add a refcount to the metrics themselves,
+being a standalone object (in particular, no reference to other objects)
+
+I tried to make this patch as small as possible to ease its backport,
+instead of being super clean. Note that we believe that only ipv4 dst
+need to take care of the metric refcount. But if this is wrong,
+this patch adds the basic infrastructure to extend this to other
+families.
+
+Many thanks to Julian Anastasov for reviewing this patch, and Cong Wang
+for his efforts on this problem.
+
+Fixes: 2860583fe840 ("ipv4: Kill rt->fi")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: Andrey Konovalov <andreyknvl@google.com>
+Reviewed-by: Julian Anastasov <ja@ssi.bg>
+Acked-by: Cong Wang <xiyou.wangcong@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/dst.h        |    8 +++++++-
+ include/net/ip_fib.h     |   10 +++++-----
+ net/core/dst.c           |   23 ++++++++++++++---------
+ net/ipv4/fib_semantics.c |   17 ++++++++++-------
+ net/ipv4/route.c         |   10 +++++++++-
+ 5 files changed, 45 insertions(+), 23 deletions(-)
+
+--- a/include/net/dst.h
++++ b/include/net/dst.h
+@@ -107,10 +107,16 @@ struct dst_entry {
+       };
+ };
++struct dst_metrics {
++      u32             metrics[RTAX_MAX];
++      atomic_t        refcnt;
++};
++extern const struct dst_metrics dst_default_metrics;
++
+ u32 *dst_cow_metrics_generic(struct dst_entry *dst, unsigned long old);
+-extern const u32 dst_default_metrics[];
+ #define DST_METRICS_READ_ONLY         0x1UL
++#define DST_METRICS_REFCOUNTED                0x2UL
+ #define DST_METRICS_FLAGS             0x3UL
+ #define __DST_METRICS_PTR(Y)  \
+       ((u32 *)((Y) & ~DST_METRICS_FLAGS))
+--- a/include/net/ip_fib.h
++++ b/include/net/ip_fib.h
+@@ -114,11 +114,11 @@ struct fib_info {
+       __be32                  fib_prefsrc;
+       u32                     fib_tb_id;
+       u32                     fib_priority;
+-      u32                     *fib_metrics;
+-#define fib_mtu fib_metrics[RTAX_MTU-1]
+-#define fib_window fib_metrics[RTAX_WINDOW-1]
+-#define fib_rtt fib_metrics[RTAX_RTT-1]
+-#define fib_advmss fib_metrics[RTAX_ADVMSS-1]
++      struct dst_metrics      *fib_metrics;
++#define fib_mtu fib_metrics->metrics[RTAX_MTU-1]
++#define fib_window fib_metrics->metrics[RTAX_WINDOW-1]
++#define fib_rtt fib_metrics->metrics[RTAX_RTT-1]
++#define fib_advmss fib_metrics->metrics[RTAX_ADVMSS-1]
+       int                     fib_nhs;
+ #ifdef CONFIG_IP_ROUTE_MULTIPATH
+       int                     fib_weight;
+--- a/net/core/dst.c
++++ b/net/core/dst.c
+@@ -151,13 +151,13 @@ int dst_discard_out(struct net *net, str
+ }
+ EXPORT_SYMBOL(dst_discard_out);
+-const u32 dst_default_metrics[RTAX_MAX + 1] = {
++const struct dst_metrics dst_default_metrics = {
+       /* This initializer is needed to force linker to place this variable
+        * into const section. Otherwise it might end into bss section.
+        * We really want to avoid false sharing on this variable, and catch
+        * any writes on it.
+        */
+-      [RTAX_MAX] = 0xdeadbeef,
++      .refcnt = ATOMIC_INIT(1),
+ };
+ void dst_init(struct dst_entry *dst, struct dst_ops *ops,
+@@ -169,7 +169,7 @@ void dst_init(struct dst_entry *dst, str
+       if (dev)
+               dev_hold(dev);
+       dst->ops = ops;
+-      dst_init_metrics(dst, dst_default_metrics, true);
++      dst_init_metrics(dst, dst_default_metrics.metrics, true);
+       dst->expires = 0UL;
+       dst->path = dst;
+       dst->from = NULL;
+@@ -315,25 +315,30 @@ EXPORT_SYMBOL(dst_release);
+ u32 *dst_cow_metrics_generic(struct dst_entry *dst, unsigned long old)
+ {
+-      u32 *p = kmalloc(sizeof(u32) * RTAX_MAX, GFP_ATOMIC);
++      struct dst_metrics *p = kmalloc(sizeof(*p), GFP_ATOMIC);
+       if (p) {
+-              u32 *old_p = __DST_METRICS_PTR(old);
++              struct dst_metrics *old_p = (struct dst_metrics *)__DST_METRICS_PTR(old);
+               unsigned long prev, new;
+-              memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
++              atomic_set(&p->refcnt, 1);
++              memcpy(p->metrics, old_p->metrics, sizeof(p->metrics));
+               new = (unsigned long) p;
+               prev = cmpxchg(&dst->_metrics, old, new);
+               if (prev != old) {
+                       kfree(p);
+-                      p = __DST_METRICS_PTR(prev);
++                      p = (struct dst_metrics *)__DST_METRICS_PTR(prev);
+                       if (prev & DST_METRICS_READ_ONLY)
+                               p = NULL;
++              } else if (prev & DST_METRICS_REFCOUNTED) {
++                      if (atomic_dec_and_test(&old_p->refcnt))
++                              kfree(old_p);
+               }
+       }
+-      return p;
++      BUILD_BUG_ON(offsetof(struct dst_metrics, metrics) != 0);
++      return (u32 *)p;
+ }
+ EXPORT_SYMBOL(dst_cow_metrics_generic);
+@@ -342,7 +347,7 @@ void __dst_destroy_metrics_generic(struc
+ {
+       unsigned long prev, new;
+-      new = ((unsigned long) dst_default_metrics) | DST_METRICS_READ_ONLY;
++      new = ((unsigned long) &dst_default_metrics) | DST_METRICS_READ_ONLY;
+       prev = cmpxchg(&dst->_metrics, old, new);
+       if (prev == old)
+               kfree(__DST_METRICS_PTR(old));
+--- a/net/ipv4/fib_semantics.c
++++ b/net/ipv4/fib_semantics.c
+@@ -204,6 +204,7 @@ static void rt_fibinfo_free_cpus(struct
+ static void free_fib_info_rcu(struct rcu_head *head)
+ {
+       struct fib_info *fi = container_of(head, struct fib_info, rcu);
++      struct dst_metrics *m;
+       change_nexthops(fi) {
+               if (nexthop_nh->nh_dev)
+@@ -214,8 +215,9 @@ static void free_fib_info_rcu(struct rcu
+               rt_fibinfo_free(&nexthop_nh->nh_rth_input);
+       } endfor_nexthops(fi);
+-      if (fi->fib_metrics != (u32 *) dst_default_metrics)
+-              kfree(fi->fib_metrics);
++      m = fi->fib_metrics;
++      if (m != &dst_default_metrics && atomic_dec_and_test(&m->refcnt))
++              kfree(m);
+       kfree(fi);
+ }
+@@ -982,11 +984,11 @@ fib_convert_metrics(struct fib_info *fi,
+                       val = 255;
+               if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK))
+                       return -EINVAL;
+-              fi->fib_metrics[type - 1] = val;
++              fi->fib_metrics->metrics[type - 1] = val;
+       }
+       if (ecn_ca)
+-              fi->fib_metrics[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA;
++              fi->fib_metrics->metrics[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA;
+       return 0;
+ }
+@@ -1044,11 +1046,12 @@ struct fib_info *fib_create_info(struct
+               goto failure;
+       fib_info_cnt++;
+       if (cfg->fc_mx) {
+-              fi->fib_metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
++              fi->fib_metrics = kzalloc(sizeof(*fi->fib_metrics), GFP_KERNEL);
+               if (!fi->fib_metrics)
+                       goto failure;
++              atomic_set(&fi->fib_metrics->refcnt, 1);
+       } else
+-              fi->fib_metrics = (u32 *) dst_default_metrics;
++              fi->fib_metrics = (struct dst_metrics *)&dst_default_metrics;
+       fi->fib_net = net;
+       fi->fib_protocol = cfg->fc_protocol;
+@@ -1252,7 +1255,7 @@ int fib_dump_info(struct sk_buff *skb, u
+       if (fi->fib_priority &&
+           nla_put_u32(skb, RTA_PRIORITY, fi->fib_priority))
+               goto nla_put_failure;
+-      if (rtnetlink_put_metrics(skb, fi->fib_metrics) < 0)
++      if (rtnetlink_put_metrics(skb, fi->fib_metrics->metrics) < 0)
+               goto nla_put_failure;
+       if (fi->fib_prefsrc &&
+--- a/net/ipv4/route.c
++++ b/net/ipv4/route.c
+@@ -1364,8 +1364,12 @@ static void rt_add_uncached_list(struct
+ static void ipv4_dst_destroy(struct dst_entry *dst)
+ {
++      struct dst_metrics *p = (struct dst_metrics *)DST_METRICS_PTR(dst);
+       struct rtable *rt = (struct rtable *) dst;
++      if (p != &dst_default_metrics && atomic_dec_and_test(&p->refcnt))
++              kfree(p);
++
+       if (!list_empty(&rt->rt_uncached)) {
+               struct uncached_list *ul = rt->rt_uncached_list;
+@@ -1417,7 +1421,11 @@ static void rt_set_nexthop(struct rtable
+                       rt->rt_gateway = nh->nh_gw;
+                       rt->rt_uses_gateway = 1;
+               }
+-              dst_init_metrics(&rt->dst, fi->fib_metrics, true);
++              dst_init_metrics(&rt->dst, fi->fib_metrics->metrics, true);
++              if (fi->fib_metrics != &dst_default_metrics) {
++                      rt->dst._metrics |= DST_METRICS_REFCOUNTED;
++                      atomic_inc(&fi->fib_metrics->refcnt);
++              }
+ #ifdef CONFIG_IP_ROUTE_CLASSID
+               rt->dst.tclassid = nh->nh_tclassid;
+ #endif
diff --git a/queue-4.9/ipv6-check-ip6_find_1stfragopt-return-value-properly.patch b/queue-4.9/ipv6-check-ip6_find_1stfragopt-return-value-properly.patch
new file mode 100644 (file)
index 0000000..c4a0639
--- /dev/null
@@ -0,0 +1,89 @@
+From foo@baz Wed May 31 09:13:34 JST 2017
+From: "David S. Miller" <davem@davemloft.net>
+Date: Wed, 17 May 2017 22:54:11 -0400
+Subject: ipv6: Check ip6_find_1stfragopt() return value properly.
+
+From: "David S. Miller" <davem@davemloft.net>
+
+
+[ Upstream commit 7dd7eb9513bd02184d45f000ab69d78cb1fa1531 ]
+
+Do not use unsigned variables to see if it returns a negative
+error or not.
+
+Fixes: 2423496af35d ("ipv6: Prevent overrun when parsing v6 header options")
+Reported-by: Julia Lawall <julia.lawall@lip6.fr>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/ip6_offload.c |    9 ++++-----
+ net/ipv6/ip6_output.c  |    7 +++----
+ net/ipv6/udp_offload.c |    8 +++++---
+ 3 files changed, 12 insertions(+), 12 deletions(-)
+
+--- a/net/ipv6/ip6_offload.c
++++ b/net/ipv6/ip6_offload.c
+@@ -63,7 +63,6 @@ static struct sk_buff *ipv6_gso_segment(
+       const struct net_offload *ops;
+       int proto;
+       struct frag_hdr *fptr;
+-      unsigned int unfrag_ip6hlen;
+       unsigned int payload_len;
+       u8 *prevhdr;
+       int offset = 0;
+@@ -116,10 +115,10 @@ static struct sk_buff *ipv6_gso_segment(
+               skb->network_header = (u8 *)ipv6h - skb->head;
+               if (udpfrag) {
+-                      unfrag_ip6hlen = ip6_find_1stfragopt(skb, &prevhdr);
+-                      if (unfrag_ip6hlen < 0)
+-                              return ERR_PTR(unfrag_ip6hlen);
+-                      fptr = (struct frag_hdr *)((u8 *)ipv6h + unfrag_ip6hlen);
++                      int err = ip6_find_1stfragopt(skb, &prevhdr);
++                      if (err < 0)
++                              return ERR_PTR(err);
++                      fptr = (struct frag_hdr *)((u8 *)ipv6h + err);
+                       fptr->frag_off = htons(offset);
+                       if (skb->next)
+                               fptr->frag_off |= htons(IP6_MF);
+--- a/net/ipv6/ip6_output.c
++++ b/net/ipv6/ip6_output.c
+@@ -586,11 +586,10 @@ int ip6_fragment(struct net *net, struct
+       int ptr, offset = 0, err = 0;
+       u8 *prevhdr, nexthdr = 0;
+-      hlen = ip6_find_1stfragopt(skb, &prevhdr);
+-      if (hlen < 0) {
+-              err = hlen;
++      err = ip6_find_1stfragopt(skb, &prevhdr);
++      if (err < 0)
+               goto fail;
+-      }
++      hlen = err;
+       nexthdr = *prevhdr;
+       mtu = ip6_skb_dst_mtu(skb);
+--- a/net/ipv6/udp_offload.c
++++ b/net/ipv6/udp_offload.c
+@@ -29,6 +29,7 @@ static struct sk_buff *udp6_ufo_fragment
+       u8 frag_hdr_sz = sizeof(struct frag_hdr);
+       __wsum csum;
+       int tnl_hlen;
++      int err;
+       mss = skb_shinfo(skb)->gso_size;
+       if (unlikely(skb->len <= mss))
+@@ -90,9 +91,10 @@ static struct sk_buff *udp6_ufo_fragment
+               /* Find the unfragmentable header and shift it left by frag_hdr_sz
+                * bytes to insert fragment header.
+                */
+-              unfrag_ip6hlen = ip6_find_1stfragopt(skb, &prevhdr);
+-              if (unfrag_ip6hlen < 0)
+-                      return ERR_PTR(unfrag_ip6hlen);
++              err = ip6_find_1stfragopt(skb, &prevhdr);
++              if (err < 0)
++                      return ERR_PTR(err);
++              unfrag_ip6hlen = err;
+               nexthdr = *prevhdr;
+               *prevhdr = NEXTHDR_FRAGMENT;
+               unfrag_len = (skb_network_header(skb) - skb_mac_header(skb)) +
diff --git a/queue-4.9/ipv6-dccp-do-not-inherit-ipv6_mc_list-from-parent.patch b/queue-4.9/ipv6-dccp-do-not-inherit-ipv6_mc_list-from-parent.patch
new file mode 100644 (file)
index 0000000..dd7ba3a
--- /dev/null
@@ -0,0 +1,63 @@
+From foo@baz Wed May 31 09:13:34 JST 2017
+From: WANG Cong <xiyou.wangcong@gmail.com>
+Date: Tue, 9 May 2017 16:59:54 -0700
+Subject: ipv6/dccp: do not inherit ipv6_mc_list from parent
+
+From: WANG Cong <xiyou.wangcong@gmail.com>
+
+
+[ Upstream commit 83eaddab4378db256d00d295bda6ca997cd13a52 ]
+
+Like commit 657831ffc38e ("dccp/tcp: do not inherit mc_list from parent")
+we should clear ipv6_mc_list etc. for IPv6 sockets too.
+
+Cc: Eric Dumazet <edumazet@google.com>
+Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
+Acked-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/dccp/ipv6.c     |    6 ++++++
+ net/ipv6/tcp_ipv6.c |    2 ++
+ 2 files changed, 8 insertions(+)
+
+--- a/net/dccp/ipv6.c
++++ b/net/dccp/ipv6.c
+@@ -426,6 +426,9 @@ static struct sock *dccp_v6_request_recv
+               newsk->sk_backlog_rcv = dccp_v4_do_rcv;
+               newnp->pktoptions  = NULL;
+               newnp->opt         = NULL;
++              newnp->ipv6_mc_list = NULL;
++              newnp->ipv6_ac_list = NULL;
++              newnp->ipv6_fl_list = NULL;
+               newnp->mcast_oif   = inet6_iif(skb);
+               newnp->mcast_hops  = ipv6_hdr(skb)->hop_limit;
+@@ -490,6 +493,9 @@ static struct sock *dccp_v6_request_recv
+       /* Clone RX bits */
+       newnp->rxopt.all = np->rxopt.all;
++      newnp->ipv6_mc_list = NULL;
++      newnp->ipv6_ac_list = NULL;
++      newnp->ipv6_fl_list = NULL;
+       newnp->pktoptions = NULL;
+       newnp->opt        = NULL;
+       newnp->mcast_oif  = inet6_iif(skb);
+--- a/net/ipv6/tcp_ipv6.c
++++ b/net/ipv6/tcp_ipv6.c
+@@ -1046,6 +1046,7 @@ static struct sock *tcp_v6_syn_recv_sock
+               newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
+ #endif
++              newnp->ipv6_mc_list = NULL;
+               newnp->ipv6_ac_list = NULL;
+               newnp->ipv6_fl_list = NULL;
+               newnp->pktoptions  = NULL;
+@@ -1115,6 +1116,7 @@ static struct sock *tcp_v6_syn_recv_sock
+          First: no IPv4 options.
+        */
+       newinet->inet_opt = NULL;
++      newnp->ipv6_mc_list = NULL;
+       newnp->ipv6_ac_list = NULL;
+       newnp->ipv6_fl_list = NULL;
diff --git a/queue-4.9/ipv6-fix-out-of-bound-writes-in-__ip6_append_data.patch b/queue-4.9/ipv6-fix-out-of-bound-writes-in-__ip6_append_data.patch
new file mode 100644 (file)
index 0000000..81d744d
--- /dev/null
@@ -0,0 +1,67 @@
+From foo@baz Wed May 31 09:13:34 JST 2017
+From: Eric Dumazet <edumazet@google.com>
+Date: Fri, 19 May 2017 14:17:48 -0700
+Subject: ipv6: fix out of bound writes in __ip6_append_data()
+
+From: Eric Dumazet <edumazet@google.com>
+
+
+[ Upstream commit 232cd35d0804cc241eb887bb8d4d9b3b9881c64a ]
+
+Andrey Konovalov and idaifish@gmail.com reported crashes caused by
+one skb shared_info being overwritten from __ip6_append_data()
+
+Andrey program lead to following state :
+
+copy -4200 datalen 2000 fraglen 2040
+maxfraglen 2040 alloclen 2048 transhdrlen 0 offset 0 fraggap 6200
+
+The skb_copy_and_csum_bits(skb_prev, maxfraglen, data + transhdrlen,
+fraggap, 0); is overwriting skb->head and skb_shared_info
+
+Since we apparently detect this rare condition too late, move the
+code earlier to even avoid allocating skb and risking crashes.
+
+Once again, many thanks to Andrey and syzkaller team.
+
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: Andrey Konovalov <andreyknvl@google.com>
+Tested-by: Andrey Konovalov <andreyknvl@google.com>
+Reported-by: <idaifish@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/ip6_output.c |   15 ++++++++-------
+ 1 file changed, 8 insertions(+), 7 deletions(-)
+
+--- a/net/ipv6/ip6_output.c
++++ b/net/ipv6/ip6_output.c
+@@ -1447,6 +1447,11 @@ alloc_new_skb:
+                        */
+                       alloclen += sizeof(struct frag_hdr);
++                      copy = datalen - transhdrlen - fraggap;
++                      if (copy < 0) {
++                              err = -EINVAL;
++                              goto error;
++                      }
+                       if (transhdrlen) {
+                               skb = sock_alloc_send_skb(sk,
+                                               alloclen + hh_len,
+@@ -1496,13 +1501,9 @@ alloc_new_skb:
+                               data += fraggap;
+                               pskb_trim_unique(skb_prev, maxfraglen);
+                       }
+-                      copy = datalen - transhdrlen - fraggap;
+-
+-                      if (copy < 0) {
+-                              err = -EINVAL;
+-                              kfree_skb(skb);
+-                              goto error;
+-                      } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
++                      if (copy > 0 &&
++                          getfrag(from, data + transhdrlen, offset,
++                                  copy, fraggap, skb) < 0) {
+                               err = -EFAULT;
+                               kfree_skb(skb);
+                               goto error;
diff --git a/queue-4.9/ipv6-prevent-overrun-when-parsing-v6-header-options.patch b/queue-4.9/ipv6-prevent-overrun-when-parsing-v6-header-options.patch
new file mode 100644 (file)
index 0000000..2ca4397
--- /dev/null
@@ -0,0 +1,226 @@
+From foo@baz Wed May 31 09:13:34 JST 2017
+From: Craig Gallek <kraig@google.com>
+Date: Tue, 16 May 2017 14:36:23 -0400
+Subject: ipv6: Prevent overrun when parsing v6 header options
+
+From: Craig Gallek <kraig@google.com>
+
+
+[ Upstream commit 2423496af35d94a87156b063ea5cedffc10a70a1 ]
+
+The KASAN warning repoted below was discovered with a syzkaller
+program.  The reproducer is basically:
+  int s = socket(AF_INET6, SOCK_RAW, NEXTHDR_HOP);
+  send(s, &one_byte_of_data, 1, MSG_MORE);
+  send(s, &more_than_mtu_bytes_data, 2000, 0);
+
+The socket() call sets the nexthdr field of the v6 header to
+NEXTHDR_HOP, the first send call primes the payload with a non zero
+byte of data, and the second send call triggers the fragmentation path.
+
+The fragmentation code tries to parse the header options in order
+to figure out where to insert the fragment option.  Since nexthdr points
+to an invalid option, the calculation of the size of the network header
+can made to be much larger than the linear section of the skb and data
+is read outside of it.
+
+This fix makes ip6_find_1stfrag return an error if it detects
+running out-of-bounds.
+
+[   42.361487] ==================================================================
+[   42.364412] BUG: KASAN: slab-out-of-bounds in ip6_fragment+0x11c8/0x3730
+[   42.365471] Read of size 840 at addr ffff88000969e798 by task ip6_fragment-oo/3789
+[   42.366469]
+[   42.366696] CPU: 1 PID: 3789 Comm: ip6_fragment-oo Not tainted 4.11.0+ #41
+[   42.367628] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.1-1ubuntu1 04/01/2014
+[   42.368824] Call Trace:
+[   42.369183]  dump_stack+0xb3/0x10b
+[   42.369664]  print_address_description+0x73/0x290
+[   42.370325]  kasan_report+0x252/0x370
+[   42.370839]  ? ip6_fragment+0x11c8/0x3730
+[   42.371396]  check_memory_region+0x13c/0x1a0
+[   42.371978]  memcpy+0x23/0x50
+[   42.372395]  ip6_fragment+0x11c8/0x3730
+[   42.372920]  ? nf_ct_expect_unregister_notifier+0x110/0x110
+[   42.373681]  ? ip6_copy_metadata+0x7f0/0x7f0
+[   42.374263]  ? ip6_forward+0x2e30/0x2e30
+[   42.374803]  ip6_finish_output+0x584/0x990
+[   42.375350]  ip6_output+0x1b7/0x690
+[   42.375836]  ? ip6_finish_output+0x990/0x990
+[   42.376411]  ? ip6_fragment+0x3730/0x3730
+[   42.376968]  ip6_local_out+0x95/0x160
+[   42.377471]  ip6_send_skb+0xa1/0x330
+[   42.377969]  ip6_push_pending_frames+0xb3/0xe0
+[   42.378589]  rawv6_sendmsg+0x2051/0x2db0
+[   42.379129]  ? rawv6_bind+0x8b0/0x8b0
+[   42.379633]  ? _copy_from_user+0x84/0xe0
+[   42.380193]  ? debug_check_no_locks_freed+0x290/0x290
+[   42.380878]  ? ___sys_sendmsg+0x162/0x930
+[   42.381427]  ? rcu_read_lock_sched_held+0xa3/0x120
+[   42.382074]  ? sock_has_perm+0x1f6/0x290
+[   42.382614]  ? ___sys_sendmsg+0x167/0x930
+[   42.383173]  ? lock_downgrade+0x660/0x660
+[   42.383727]  inet_sendmsg+0x123/0x500
+[   42.384226]  ? inet_sendmsg+0x123/0x500
+[   42.384748]  ? inet_recvmsg+0x540/0x540
+[   42.385263]  sock_sendmsg+0xca/0x110
+[   42.385758]  SYSC_sendto+0x217/0x380
+[   42.386249]  ? SYSC_connect+0x310/0x310
+[   42.386783]  ? __might_fault+0x110/0x1d0
+[   42.387324]  ? lock_downgrade+0x660/0x660
+[   42.387880]  ? __fget_light+0xa1/0x1f0
+[   42.388403]  ? __fdget+0x18/0x20
+[   42.388851]  ? sock_common_setsockopt+0x95/0xd0
+[   42.389472]  ? SyS_setsockopt+0x17f/0x260
+[   42.390021]  ? entry_SYSCALL_64_fastpath+0x5/0xbe
+[   42.390650]  SyS_sendto+0x40/0x50
+[   42.391103]  entry_SYSCALL_64_fastpath+0x1f/0xbe
+[   42.391731] RIP: 0033:0x7fbbb711e383
+[   42.392217] RSP: 002b:00007ffff4d34f28 EFLAGS: 00000246 ORIG_RAX: 000000000000002c
+[   42.393235] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007fbbb711e383
+[   42.394195] RDX: 0000000000001000 RSI: 00007ffff4d34f60 RDI: 0000000000000003
+[   42.395145] RBP: 0000000000000046 R08: 00007ffff4d34f40 R09: 0000000000000018
+[   42.396056] R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000400aad
+[   42.396598] R13: 0000000000000066 R14: 00007ffff4d34ee0 R15: 00007fbbb717af00
+[   42.397257]
+[   42.397411] Allocated by task 3789:
+[   42.397702]  save_stack_trace+0x16/0x20
+[   42.398005]  save_stack+0x46/0xd0
+[   42.398267]  kasan_kmalloc+0xad/0xe0
+[   42.398548]  kasan_slab_alloc+0x12/0x20
+[   42.398848]  __kmalloc_node_track_caller+0xcb/0x380
+[   42.399224]  __kmalloc_reserve.isra.32+0x41/0xe0
+[   42.399654]  __alloc_skb+0xf8/0x580
+[   42.400003]  sock_wmalloc+0xab/0xf0
+[   42.400346]  __ip6_append_data.isra.41+0x2472/0x33d0
+[   42.400813]  ip6_append_data+0x1a8/0x2f0
+[   42.401122]  rawv6_sendmsg+0x11ee/0x2db0
+[   42.401505]  inet_sendmsg+0x123/0x500
+[   42.401860]  sock_sendmsg+0xca/0x110
+[   42.402209]  ___sys_sendmsg+0x7cb/0x930
+[   42.402582]  __sys_sendmsg+0xd9/0x190
+[   42.402941]  SyS_sendmsg+0x2d/0x50
+[   42.403273]  entry_SYSCALL_64_fastpath+0x1f/0xbe
+[   42.403718]
+[   42.403871] Freed by task 1794:
+[   42.404146]  save_stack_trace+0x16/0x20
+[   42.404515]  save_stack+0x46/0xd0
+[   42.404827]  kasan_slab_free+0x72/0xc0
+[   42.405167]  kfree+0xe8/0x2b0
+[   42.405462]  skb_free_head+0x74/0xb0
+[   42.405806]  skb_release_data+0x30e/0x3a0
+[   42.406198]  skb_release_all+0x4a/0x60
+[   42.406563]  consume_skb+0x113/0x2e0
+[   42.406910]  skb_free_datagram+0x1a/0xe0
+[   42.407288]  netlink_recvmsg+0x60d/0xe40
+[   42.407667]  sock_recvmsg+0xd7/0x110
+[   42.408022]  ___sys_recvmsg+0x25c/0x580
+[   42.408395]  __sys_recvmsg+0xd6/0x190
+[   42.408753]  SyS_recvmsg+0x2d/0x50
+[   42.409086]  entry_SYSCALL_64_fastpath+0x1f/0xbe
+[   42.409513]
+[   42.409665] The buggy address belongs to the object at ffff88000969e780
+[   42.409665]  which belongs to the cache kmalloc-512 of size 512
+[   42.410846] The buggy address is located 24 bytes inside of
+[   42.410846]  512-byte region [ffff88000969e780, ffff88000969e980)
+[   42.411941] The buggy address belongs to the page:
+[   42.412405] page:ffffea000025a780 count:1 mapcount:0 mapping:          (null) index:0x0 compound_mapcount: 0
+[   42.413298] flags: 0x100000000008100(slab|head)
+[   42.413729] raw: 0100000000008100 0000000000000000 0000000000000000 00000001800c000c
+[   42.414387] raw: ffffea00002a9500 0000000900000007 ffff88000c401280 0000000000000000
+[   42.415074] page dumped because: kasan: bad access detected
+[   42.415604]
+[   42.415757] Memory state around the buggy address:
+[   42.416222]  ffff88000969e880: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
+[   42.416904]  ffff88000969e900: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
+[   42.417591] >ffff88000969e980: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
+[   42.418273]                    ^
+[   42.418588]  ffff88000969ea00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
+[   42.419273]  ffff88000969ea80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
+[   42.419882] ==================================================================
+
+Reported-by: Andrey Konovalov <andreyknvl@google.com>
+Signed-off-by: Craig Gallek <kraig@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/ip6_offload.c |    2 ++
+ net/ipv6/ip6_output.c  |    4 ++++
+ net/ipv6/output_core.c |   14 ++++++++------
+ net/ipv6/udp_offload.c |    2 ++
+ 4 files changed, 16 insertions(+), 6 deletions(-)
+
+--- a/net/ipv6/ip6_offload.c
++++ b/net/ipv6/ip6_offload.c
+@@ -117,6 +117,8 @@ static struct sk_buff *ipv6_gso_segment(
+               if (udpfrag) {
+                       unfrag_ip6hlen = ip6_find_1stfragopt(skb, &prevhdr);
++                      if (unfrag_ip6hlen < 0)
++                              return ERR_PTR(unfrag_ip6hlen);
+                       fptr = (struct frag_hdr *)((u8 *)ipv6h + unfrag_ip6hlen);
+                       fptr->frag_off = htons(offset);
+                       if (skb->next)
+--- a/net/ipv6/ip6_output.c
++++ b/net/ipv6/ip6_output.c
+@@ -587,6 +587,10 @@ int ip6_fragment(struct net *net, struct
+       u8 *prevhdr, nexthdr = 0;
+       hlen = ip6_find_1stfragopt(skb, &prevhdr);
++      if (hlen < 0) {
++              err = hlen;
++              goto fail;
++      }
+       nexthdr = *prevhdr;
+       mtu = ip6_skb_dst_mtu(skb);
+--- a/net/ipv6/output_core.c
++++ b/net/ipv6/output_core.c
+@@ -79,14 +79,13 @@ EXPORT_SYMBOL(ipv6_select_ident);
+ int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
+ {
+       u16 offset = sizeof(struct ipv6hdr);
+-      struct ipv6_opt_hdr *exthdr =
+-                              (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
+       unsigned int packet_len = skb_tail_pointer(skb) -
+               skb_network_header(skb);
+       int found_rhdr = 0;
+       *nexthdr = &ipv6_hdr(skb)->nexthdr;
+-      while (offset + 1 <= packet_len) {
++      while (offset <= packet_len) {
++              struct ipv6_opt_hdr *exthdr;
+               switch (**nexthdr) {
+@@ -107,13 +106,16 @@ int ip6_find_1stfragopt(struct sk_buff *
+                       return offset;
+               }
+-              offset += ipv6_optlen(exthdr);
+-              *nexthdr = &exthdr->nexthdr;
++              if (offset + sizeof(struct ipv6_opt_hdr) > packet_len)
++                      return -EINVAL;
++
+               exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) +
+                                                offset);
++              offset += ipv6_optlen(exthdr);
++              *nexthdr = &exthdr->nexthdr;
+       }
+-      return offset;
++      return -EINVAL;
+ }
+ EXPORT_SYMBOL(ip6_find_1stfragopt);
+--- a/net/ipv6/udp_offload.c
++++ b/net/ipv6/udp_offload.c
+@@ -91,6 +91,8 @@ static struct sk_buff *udp6_ufo_fragment
+                * bytes to insert fragment header.
+                */
+               unfrag_ip6hlen = ip6_find_1stfragopt(skb, &prevhdr);
++              if (unfrag_ip6hlen < 0)
++                      return ERR_PTR(unfrag_ip6hlen);
+               nexthdr = *prevhdr;
+               *prevhdr = NEXTHDR_FRAGMENT;
+               unfrag_len = (skb_network_header(skb) - skb_mac_header(skb)) +
diff --git a/queue-4.9/net-fix-compile-error-in-skb_orphan_partial.patch b/queue-4.9/net-fix-compile-error-in-skb_orphan_partial.patch
new file mode 100644 (file)
index 0000000..8ba6842
--- /dev/null
@@ -0,0 +1,44 @@
+From foo@baz Wed May 31 09:13:34 JST 2017
+From: Eric Dumazet <edumazet@google.com>
+Date: Tue, 16 May 2017 13:27:53 -0700
+Subject: net: fix compile error in skb_orphan_partial()
+
+From: Eric Dumazet <edumazet@google.com>
+
+
+[ Upstream commit 9142e9007f2d7ab58a587a1e1d921b0064a339aa ]
+
+If CONFIG_INET is not set, net/core/sock.c can not compile :
+
+net/core/sock.c: In function â€˜skb_orphan_partial’:
+net/core/sock.c:1810:2: error: implicit declaration of function
+‘skb_is_tcp_pure_ack’ [-Werror=implicit-function-declaration]
+  if (skb_is_tcp_pure_ack(skb))
+  ^
+
+Fix this by always including <net/tcp.h>
+
+Fixes: f6ba8d33cfbb ("netem: fix skb_orphan_partial()")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: Paul Gortmaker <paul.gortmaker@windriver.com>
+Reported-by: Randy Dunlap <rdunlap@infradead.org>
+Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/sock.c |    3 ---
+ 1 file changed, 3 deletions(-)
+
+--- a/net/core/sock.c
++++ b/net/core/sock.c
+@@ -138,10 +138,7 @@
+ #include <trace/events/sock.h>
+-#ifdef CONFIG_INET
+ #include <net/tcp.h>
+-#endif
+-
+ #include <net/busy_poll.h>
+ static DEFINE_MUTEX(proto_list_mutex);
diff --git a/queue-4.9/net-improve-handling-of-failures-on-link-and-route-dumps.patch b/queue-4.9/net-improve-handling-of-failures-on-link-and-route-dumps.patch
new file mode 100644 (file)
index 0000000..fd6310b
--- /dev/null
@@ -0,0 +1,205 @@
+From foo@baz Wed May 31 09:13:34 JST 2017
+From: David Ahern <dsahern@gmail.com>
+Date: Mon, 15 May 2017 23:19:17 -0700
+Subject: net: Improve handling of failures on link and route dumps
+
+From: David Ahern <dsahern@gmail.com>
+
+
+[ Upstream commit f6c5775ff0bfa62b072face6bf1d40f659f194b2 ]
+
+In general, rtnetlink dumps do not anticipate failure to dump a single
+object (e.g., link or route) on a single pass. As both route and link
+objects have grown via more attributes, that is no longer a given.
+
+netlink dumps can handle a failure if the dump function returns an
+error; specifically, netlink_dump adds the return code to the response
+if it is <= 0 so userspace is notified of the failure. The missing
+piece is the rtnetlink dump functions returning the error.
+
+Fix route and link dump functions to return the errors if no object is
+added to an skb (detected by skb->len != 0). IPv6 route dumps
+(rt6_dump_route) already return the error; this patch updates IPv4 and
+link dumps. Other dump functions may need to be ajusted as well.
+
+Reported-by: Jan Moskyto Matejka <mq@ucw.cz>
+Signed-off-by: David Ahern <dsahern@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/rtnetlink.c    |   36 ++++++++++++++++++++++++------------
+ net/ipv4/fib_frontend.c |   15 +++++++++++----
+ net/ipv4/fib_trie.c     |   26 ++++++++++++++------------
+ 3 files changed, 49 insertions(+), 28 deletions(-)
+
+--- a/net/core/rtnetlink.c
++++ b/net/core/rtnetlink.c
+@@ -1617,13 +1617,13 @@ static int rtnl_dump_ifinfo(struct sk_bu
+                                              cb->nlh->nlmsg_seq, 0,
+                                              flags,
+                                              ext_filter_mask);
+-                      /* If we ran out of room on the first message,
+-                       * we're in trouble
+-                       */
+-                      WARN_ON((err == -EMSGSIZE) && (skb->len == 0));
+-                      if (err < 0)
+-                              goto out;
++                      if (err < 0) {
++                              if (likely(skb->len))
++                                      goto out;
++
++                              goto out_err;
++                      }
+                       nl_dump_check_consistent(cb, nlmsg_hdr(skb));
+ cont:
+@@ -1631,10 +1631,12 @@ cont:
+               }
+       }
+ out:
++      err = skb->len;
++out_err:
+       cb->args[1] = idx;
+       cb->args[0] = h;
+-      return skb->len;
++      return err;
+ }
+ int rtnl_nla_parse_ifla(struct nlattr **tb, const struct nlattr *head, int len)
+@@ -3413,8 +3415,12 @@ static int rtnl_bridge_getlink(struct sk
+                               err = br_dev->netdev_ops->ndo_bridge_getlink(
+                                               skb, portid, seq, dev,
+                                               filter_mask, NLM_F_MULTI);
+-                              if (err < 0 && err != -EOPNOTSUPP)
+-                                      break;
++                              if (err < 0 && err != -EOPNOTSUPP) {
++                                      if (likely(skb->len))
++                                              break;
++
++                                      goto out_err;
++                              }
+                       }
+                       idx++;
+               }
+@@ -3425,16 +3431,22 @@ static int rtnl_bridge_getlink(struct sk
+                                                             seq, dev,
+                                                             filter_mask,
+                                                             NLM_F_MULTI);
+-                              if (err < 0 && err != -EOPNOTSUPP)
+-                                      break;
++                              if (err < 0 && err != -EOPNOTSUPP) {
++                                      if (likely(skb->len))
++                                              break;
++
++                                      goto out_err;
++                              }
+                       }
+                       idx++;
+               }
+       }
++      err = skb->len;
++out_err:
+       rcu_read_unlock();
+       cb->args[0] = idx;
+-      return skb->len;
++      return err;
+ }
+ static inline size_t bridge_nlmsg_size(void)
+--- a/net/ipv4/fib_frontend.c
++++ b/net/ipv4/fib_frontend.c
+@@ -758,7 +758,7 @@ static int inet_dump_fib(struct sk_buff
+       unsigned int e = 0, s_e;
+       struct fib_table *tb;
+       struct hlist_head *head;
+-      int dumped = 0;
++      int dumped = 0, err;
+       if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) &&
+           ((struct rtmsg *) nlmsg_data(cb->nlh))->rtm_flags & RTM_F_CLONED)
+@@ -778,20 +778,27 @@ static int inet_dump_fib(struct sk_buff
+                       if (dumped)
+                               memset(&cb->args[2], 0, sizeof(cb->args) -
+                                                2 * sizeof(cb->args[0]));
+-                      if (fib_table_dump(tb, skb, cb) < 0)
+-                              goto out;
++                      err = fib_table_dump(tb, skb, cb);
++                      if (err < 0) {
++                              if (likely(skb->len))
++                                      goto out;
++
++                              goto out_err;
++                      }
+                       dumped = 1;
+ next:
+                       e++;
+               }
+       }
+ out:
++      err = skb->len;
++out_err:
+       rcu_read_unlock();
+       cb->args[1] = e;
+       cb->args[0] = h;
+-      return skb->len;
++      return err;
+ }
+ /* Prepare and feed intra-kernel routing request.
+--- a/net/ipv4/fib_trie.c
++++ b/net/ipv4/fib_trie.c
+@@ -1932,6 +1932,8 @@ static int fn_trie_dump_leaf(struct key_
+       /* rcu_read_lock is hold by caller */
+       hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) {
++              int err;
++
+               if (i < s_i) {
+                       i++;
+                       continue;
+@@ -1942,17 +1944,14 @@ static int fn_trie_dump_leaf(struct key_
+                       continue;
+               }
+-              if (fib_dump_info(skb, NETLINK_CB(cb->skb).portid,
+-                                cb->nlh->nlmsg_seq,
+-                                RTM_NEWROUTE,
+-                                tb->tb_id,
+-                                fa->fa_type,
+-                                xkey,
+-                                KEYLENGTH - fa->fa_slen,
+-                                fa->fa_tos,
+-                                fa->fa_info, NLM_F_MULTI) < 0) {
++              err = fib_dump_info(skb, NETLINK_CB(cb->skb).portid,
++                                  cb->nlh->nlmsg_seq, RTM_NEWROUTE,
++                                  tb->tb_id, fa->fa_type,
++                                  xkey, KEYLENGTH - fa->fa_slen,
++                                  fa->fa_tos, fa->fa_info, NLM_F_MULTI);
++              if (err < 0) {
+                       cb->args[4] = i;
+-                      return -1;
++                      return err;
+               }
+               i++;
+       }
+@@ -1974,10 +1973,13 @@ int fib_table_dump(struct fib_table *tb,
+       t_key key = cb->args[3];
+       while ((l = leaf_walk_rcu(&tp, key)) != NULL) {
+-              if (fn_trie_dump_leaf(l, tb, skb, cb) < 0) {
++              int err;
++
++              err = fn_trie_dump_leaf(l, tb, skb, cb);
++              if (err < 0) {
+                       cb->args[3] = key;
+                       cb->args[2] = count;
+-                      return -1;
++                      return err;
+               }
+               ++count;
diff --git a/queue-4.9/net-mlx5-avoid-using-pending-command-interface-slots.patch b/queue-4.9/net-mlx5-avoid-using-pending-command-interface-slots.patch
new file mode 100644 (file)
index 0000000..25354a5
--- /dev/null
@@ -0,0 +1,178 @@
+From foo@baz Wed May 31 09:13:34 JST 2017
+From: Mohamad Haj Yahia <mohamad@mellanox.com>
+Date: Thu, 23 Feb 2017 11:19:36 +0200
+Subject: net/mlx5: Avoid using pending command interface slots
+
+From: Mohamad Haj Yahia <mohamad@mellanox.com>
+
+
+[ Upstream commit 73dd3a4839c1d27c36d4dcc92e1ff44225ecbeb7 ]
+
+Currently when firmware command gets stuck or it takes long time to
+complete, the driver command will get timeout and the command slot is
+freed and can be used for new commands, and if the firmware receive new
+command on the old busy slot its behavior is unexpected and this could
+be harmful.
+To fix this when the driver command gets timeout we return failure,
+but we don't free the command slot and we wait for the firmware to
+explicitly respond to that command.
+Once all the entries are busy we will stop processing new firmware
+commands.
+
+Fixes: 9cba4ebcf374 ('net/mlx5: Fix potential deadlock in command mode change')
+Signed-off-by: Mohamad Haj Yahia <mohamad@mellanox.com>
+Cc: kernel-team@fb.com
+Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/cmd.c    |   41 ++++++++++++++++++++---
+ drivers/net/ethernet/mellanox/mlx5/core/eq.c     |    2 -
+ drivers/net/ethernet/mellanox/mlx5/core/health.c |    2 -
+ include/linux/mlx5/driver.h                      |    7 +++
+ 4 files changed, 44 insertions(+), 8 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+@@ -767,7 +767,7 @@ static void cb_timeout_handler(struct wo
+       mlx5_core_warn(dev, "%s(0x%x) timeout. Will cause a leak of a command resource\n",
+                      mlx5_command_str(msg_to_opcode(ent->in)),
+                      msg_to_opcode(ent->in));
+-      mlx5_cmd_comp_handler(dev, 1UL << ent->idx);
++      mlx5_cmd_comp_handler(dev, 1UL << ent->idx, true);
+ }
+ static void cmd_work_handler(struct work_struct *work)
+@@ -797,6 +797,7 @@ static void cmd_work_handler(struct work
+       }
+       cmd->ent_arr[ent->idx] = ent;
++      set_bit(MLX5_CMD_ENT_STATE_PENDING_COMP, &ent->state);
+       lay = get_inst(cmd, ent->idx);
+       ent->lay = lay;
+       memset(lay, 0, sizeof(*lay));
+@@ -818,6 +819,20 @@ static void cmd_work_handler(struct work
+       if (ent->callback)
+               schedule_delayed_work(&ent->cb_timeout_work, cb_timeout);
++      /* Skip sending command to fw if internal error */
++      if (pci_channel_offline(dev->pdev) ||
++          dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
++              u8 status = 0;
++              u32 drv_synd;
++
++              ent->ret = mlx5_internal_err_ret_value(dev, msg_to_opcode(ent->in), &drv_synd, &status);
++              MLX5_SET(mbox_out, ent->out, status, status);
++              MLX5_SET(mbox_out, ent->out, syndrome, drv_synd);
++
++              mlx5_cmd_comp_handler(dev, 1UL << ent->idx, true);
++              return;
++      }
++
+       /* ring doorbell after the descriptor is valid */
+       mlx5_core_dbg(dev, "writing 0x%x to command doorbell\n", 1 << ent->idx);
+       wmb();
+@@ -828,7 +843,7 @@ static void cmd_work_handler(struct work
+               poll_timeout(ent);
+               /* make sure we read the descriptor after ownership is SW */
+               rmb();
+-              mlx5_cmd_comp_handler(dev, 1UL << ent->idx);
++              mlx5_cmd_comp_handler(dev, 1UL << ent->idx, (ent->ret == -ETIMEDOUT));
+       }
+ }
+@@ -872,7 +887,7 @@ static int wait_func(struct mlx5_core_de
+               wait_for_completion(&ent->done);
+       } else if (!wait_for_completion_timeout(&ent->done, timeout)) {
+               ent->ret = -ETIMEDOUT;
+-              mlx5_cmd_comp_handler(dev, 1UL << ent->idx);
++              mlx5_cmd_comp_handler(dev, 1UL << ent->idx, true);
+       }
+       err = ent->ret;
+@@ -1369,7 +1384,7 @@ static void free_msg(struct mlx5_core_de
+       }
+ }
+-void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec)
++void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced)
+ {
+       struct mlx5_cmd *cmd = &dev->cmd;
+       struct mlx5_cmd_work_ent *ent;
+@@ -1389,6 +1404,19 @@ void mlx5_cmd_comp_handler(struct mlx5_c
+                       struct semaphore *sem;
+                       ent = cmd->ent_arr[i];
++
++                      /* if we already completed the command, ignore it */
++                      if (!test_and_clear_bit(MLX5_CMD_ENT_STATE_PENDING_COMP,
++                                              &ent->state)) {
++                              /* only real completion can free the cmd slot */
++                              if (!forced) {
++                                      mlx5_core_err(dev, "Command completion arrived after timeout (entry idx = %d).\n",
++                                                    ent->idx);
++                                      free_ent(cmd, ent->idx);
++                              }
++                              continue;
++                      }
++
+                       if (ent->callback)
+                               cancel_delayed_work(&ent->cb_timeout_work);
+                       if (ent->page_queue)
+@@ -1411,7 +1439,10 @@ void mlx5_cmd_comp_handler(struct mlx5_c
+                               mlx5_core_dbg(dev, "command completed. ret 0x%x, delivery status %s(0x%x)\n",
+                                             ent->ret, deliv_status_to_str(ent->status), ent->status);
+                       }
+-                      free_ent(cmd, ent->idx);
++
++                      /* only real completion will free the entry slot */
++                      if (!forced)
++                              free_ent(cmd, ent->idx);
+                       if (ent->callback) {
+                               ds = ent->ts2 - ent->ts1;
+--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+@@ -234,7 +234,7 @@ static int mlx5_eq_int(struct mlx5_core_
+                       break;
+               case MLX5_EVENT_TYPE_CMD:
+-                      mlx5_cmd_comp_handler(dev, be32_to_cpu(eqe->data.cmd.vector));
++                      mlx5_cmd_comp_handler(dev, be32_to_cpu(eqe->data.cmd.vector), false);
+                       break;
+               case MLX5_EVENT_TYPE_PORT_CHANGE:
+--- a/drivers/net/ethernet/mellanox/mlx5/core/health.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c
+@@ -90,7 +90,7 @@ static void trigger_cmd_completions(stru
+       spin_unlock_irqrestore(&dev->cmd.alloc_lock, flags);
+       mlx5_core_dbg(dev, "vector 0x%llx\n", vector);
+-      mlx5_cmd_comp_handler(dev, vector);
++      mlx5_cmd_comp_handler(dev, vector, true);
+       return;
+ no_trig:
+--- a/include/linux/mlx5/driver.h
++++ b/include/linux/mlx5/driver.h
+@@ -640,7 +640,12 @@ enum {
+ typedef void (*mlx5_cmd_cbk_t)(int status, void *context);
++enum {
++      MLX5_CMD_ENT_STATE_PENDING_COMP,
++};
++
+ struct mlx5_cmd_work_ent {
++      unsigned long           state;
+       struct mlx5_cmd_msg    *in;
+       struct mlx5_cmd_msg    *out;
+       void                   *uout;
+@@ -838,7 +843,7 @@ void mlx5_eq_pagefault(struct mlx5_core_
+ #endif
+ void mlx5_srq_event(struct mlx5_core_dev *dev, u32 srqn, int event_type);
+ struct mlx5_core_srq *mlx5_core_get_srq(struct mlx5_core_dev *dev, u32 srqn);
+-void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec);
++void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced);
+ void mlx5_cq_event(struct mlx5_core_dev *dev, u32 cqn, int event_type);
+ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx,
+                      int nent, u64 mask, const char *name, struct mlx5_uar *uar);
diff --git a/queue-4.9/net-mlx5e-fix-ethtool-pause-support-and-advertise-reporting.patch b/queue-4.9/net-mlx5e-fix-ethtool-pause-support-and-advertise-reporting.patch
new file mode 100644 (file)
index 0000000..578d6b2
--- /dev/null
@@ -0,0 +1,41 @@
+From foo@baz Wed May 31 09:13:34 JST 2017
+From: Gal Pressman <galp@mellanox.com>
+Date: Wed, 19 Apr 2017 14:35:15 +0300
+Subject: net/mlx5e: Fix ethtool pause support and advertise reporting
+
+From: Gal Pressman <galp@mellanox.com>
+
+
+[ Upstream commit e3c19503712d6360239b19c14cded56dd63c40d7 ]
+
+Pause bit should set when RX pause is on, not TX pause.
+Also, setting Asym_Pause is incorrect, and should be turned off.
+
+Fixes: 665bc53969d7 ("net/mlx5e: Use new ethtool get/set link ksettings API")
+Signed-off-by: Gal Pressman <galp@mellanox.com>
+Cc: kernel-team@fb.com
+Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c |    3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+@@ -751,7 +751,6 @@ static void get_supported(u32 eth_proto_
+       ptys2ethtool_supported_port(link_ksettings, eth_proto_cap);
+       ptys2ethtool_supported_link(supported, eth_proto_cap);
+       ethtool_link_ksettings_add_link_mode(link_ksettings, supported, Pause);
+-      ethtool_link_ksettings_add_link_mode(link_ksettings, supported, Asym_Pause);
+ }
+ static void get_advertising(u32 eth_proto_cap, u8 tx_pause,
+@@ -761,7 +760,7 @@ static void get_advertising(u32 eth_prot
+       unsigned long *advertising = link_ksettings->link_modes.advertising;
+       ptys2ethtool_adver_link(advertising, eth_proto_cap);
+-      if (tx_pause)
++      if (rx_pause)
+               ethtool_link_ksettings_add_link_mode(link_ksettings, advertising, Pause);
+       if (tx_pause ^ rx_pause)
+               ethtool_link_ksettings_add_link_mode(link_ksettings, advertising, Asym_Pause);
diff --git a/queue-4.9/net-mlx5e-use-the-correct-pause-values-for-ethtool-advertising.patch b/queue-4.9/net-mlx5e-use-the-correct-pause-values-for-ethtool-advertising.patch
new file mode 100644 (file)
index 0000000..4c49d11
--- /dev/null
@@ -0,0 +1,48 @@
+From foo@baz Wed May 31 09:13:34 JST 2017
+From: Gal Pressman <galp@mellanox.com>
+Date: Mon, 3 Apr 2017 15:11:22 +0300
+Subject: net/mlx5e: Use the correct pause values for ethtool advertising
+
+From: Gal Pressman <galp@mellanox.com>
+
+
+[ Upstream commit b383b544f2666d67446b951a9a97af239dafed5d ]
+
+Query the operational pause from firmware (PFCC register) instead of
+always passing zeros.
+
+Fixes: 665bc53969d7 ("net/mlx5e: Use new ethtool get/set link ksettings API")
+Signed-off-by: Gal Pressman <galp@mellanox.com>
+Cc: kernel-team@fb.com
+Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c |    6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+@@ -806,6 +806,8 @@ static int mlx5e_get_link_ksettings(stru
+       struct mlx5e_priv *priv    = netdev_priv(netdev);
+       struct mlx5_core_dev *mdev = priv->mdev;
+       u32 out[MLX5_ST_SZ_DW(ptys_reg)] = {0};
++      u32 rx_pause = 0;
++      u32 tx_pause = 0;
+       u32 eth_proto_cap;
+       u32 eth_proto_admin;
+       u32 eth_proto_lp;
+@@ -828,11 +830,13 @@ static int mlx5e_get_link_ksettings(stru
+       an_disable_admin = MLX5_GET(ptys_reg, out, an_disable_admin);
+       an_status        = MLX5_GET(ptys_reg, out, an_status);
++      mlx5_query_port_pause(mdev, &rx_pause, &tx_pause);
++
+       ethtool_link_ksettings_zero_link_mode(link_ksettings, supported);
+       ethtool_link_ksettings_zero_link_mode(link_ksettings, advertising);
+       get_supported(eth_proto_cap, link_ksettings);
+-      get_advertising(eth_proto_admin, 0, 0, link_ksettings);
++      get_advertising(eth_proto_admin, tx_pause, rx_pause, link_ksettings);
+       get_speed_duplex(netdev, eth_proto_oper, link_ksettings);
+       eth_proto_oper = eth_proto_oper ? eth_proto_oper : eth_proto_cap;
diff --git a/queue-4.9/net-packet-fix-missing-net_device-reference-release.patch b/queue-4.9/net-packet-fix-missing-net_device-reference-release.patch
new file mode 100644 (file)
index 0000000..b3f13d6
--- /dev/null
@@ -0,0 +1,53 @@
+From foo@baz Wed May 31 09:13:34 JST 2017
+From: Douglas Caetano dos Santos <douglascs@taghos.com.br>
+Date: Fri, 12 May 2017 15:19:15 -0300
+Subject: net/packet: fix missing net_device reference release
+
+From: Douglas Caetano dos Santos <douglascs@taghos.com.br>
+
+
+[ Upstream commit d19b183cdc1fa3d70d6abe2a4c369e748cd7ebb8 ]
+
+When using a TX ring buffer, if an error occurs processing a control
+message (e.g. invalid message), the net_device reference is not
+released.
+
+Fixes c14ac9451c348 ("sock: enable timestamping using control messages")
+Signed-off-by: Douglas Caetano dos Santos <douglascs@taghos.com.br>
+Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/packet/af_packet.c |   14 +++++++-------
+ 1 file changed, 7 insertions(+), 7 deletions(-)
+
+--- a/net/packet/af_packet.c
++++ b/net/packet/af_packet.c
+@@ -2652,13 +2652,6 @@ static int tpacket_snd(struct packet_soc
+               dev = dev_get_by_index(sock_net(&po->sk), saddr->sll_ifindex);
+       }
+-      sockc.tsflags = po->sk.sk_tsflags;
+-      if (msg->msg_controllen) {
+-              err = sock_cmsg_send(&po->sk, msg, &sockc);
+-              if (unlikely(err))
+-                      goto out;
+-      }
+-
+       err = -ENXIO;
+       if (unlikely(dev == NULL))
+               goto out;
+@@ -2666,6 +2659,13 @@ static int tpacket_snd(struct packet_soc
+       if (unlikely(!(dev->flags & IFF_UP)))
+               goto out_put;
++      sockc.tsflags = po->sk.sk_tsflags;
++      if (msg->msg_controllen) {
++              err = sock_cmsg_send(&po->sk, msg, &sockc);
++              if (unlikely(err))
++                      goto out_put;
++      }
++
+       if (po->sk.sk_socket->type == SOCK_RAW)
+               reserve = dev->hard_header_len;
+       size_max = po->tx_ring.frame_size
diff --git a/queue-4.9/net-phy-marvell-limit-errata-to-88m1101.patch b/queue-4.9/net-phy-marvell-limit-errata-to-88m1101.patch
new file mode 100644 (file)
index 0000000..f843f21
--- /dev/null
@@ -0,0 +1,114 @@
+From foo@baz Wed May 31 09:13:34 JST 2017
+From: Andrew Lunn <andrew@lunn.ch>
+Date: Tue, 23 May 2017 17:49:13 +0200
+Subject: net: phy: marvell: Limit errata to 88m1101
+
+From: Andrew Lunn <andrew@lunn.ch>
+
+
+[ Upstream commit f2899788353c13891412b273fdff5f02d49aa40f ]
+
+The 88m1101 has an errata when configuring autoneg. However, it was
+being applied to many other Marvell PHYs as well. Limit its scope to
+just the 88m1101.
+
+Fixes: 76884679c644 ("phylib: Add support for Marvell 88e1111S and 88e1145")
+Reported-by: Daniel Walker <danielwa@cisco.com>
+Signed-off-by: Andrew Lunn <andrew@lunn.ch>
+Acked-by: Harini Katakam <harinik@xilinx.com>
+Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/phy/marvell.c |   66 +++++++++++++++++++++++++---------------------
+ 1 file changed, 37 insertions(+), 29 deletions(-)
+
+--- a/drivers/net/phy/marvell.c
++++ b/drivers/net/phy/marvell.c
+@@ -240,34 +240,6 @@ static int marvell_config_aneg(struct ph
+ {
+       int err;
+-      /* The Marvell PHY has an errata which requires
+-       * that certain registers get written in order
+-       * to restart autonegotiation */
+-      err = phy_write(phydev, MII_BMCR, BMCR_RESET);
+-
+-      if (err < 0)
+-              return err;
+-
+-      err = phy_write(phydev, 0x1d, 0x1f);
+-      if (err < 0)
+-              return err;
+-
+-      err = phy_write(phydev, 0x1e, 0x200c);
+-      if (err < 0)
+-              return err;
+-
+-      err = phy_write(phydev, 0x1d, 0x5);
+-      if (err < 0)
+-              return err;
+-
+-      err = phy_write(phydev, 0x1e, 0);
+-      if (err < 0)
+-              return err;
+-
+-      err = phy_write(phydev, 0x1e, 0x100);
+-      if (err < 0)
+-              return err;
+-
+       err = marvell_set_polarity(phydev, phydev->mdix);
+       if (err < 0)
+               return err;
+@@ -301,6 +273,42 @@ static int marvell_config_aneg(struct ph
+       return 0;
+ }
++static int m88e1101_config_aneg(struct phy_device *phydev)
++{
++      int err;
++
++      /* This Marvell PHY has an errata which requires
++       * that certain registers get written in order
++       * to restart autonegotiation
++       */
++      err = phy_write(phydev, MII_BMCR, BMCR_RESET);
++
++      if (err < 0)
++              return err;
++
++      err = phy_write(phydev, 0x1d, 0x1f);
++      if (err < 0)
++              return err;
++
++      err = phy_write(phydev, 0x1e, 0x200c);
++      if (err < 0)
++              return err;
++
++      err = phy_write(phydev, 0x1d, 0x5);
++      if (err < 0)
++              return err;
++
++      err = phy_write(phydev, 0x1e, 0);
++      if (err < 0)
++              return err;
++
++      err = phy_write(phydev, 0x1e, 0x100);
++      if (err < 0)
++              return err;
++
++      return marvell_config_aneg(phydev);
++}
++
+ static int m88e1111_config_aneg(struct phy_device *phydev)
+ {
+       int err;
+@@ -1491,7 +1499,7 @@ static struct phy_driver marvell_drivers
+               .probe = marvell_probe,
+               .flags = PHY_HAS_INTERRUPT,
+               .config_init = &marvell_config_init,
+-              .config_aneg = &marvell_config_aneg,
++              .config_aneg = &m88e1101_config_aneg,
+               .read_status = &genphy_read_status,
+               .ack_interrupt = &marvell_ack_interrupt,
+               .config_intr = &marvell_config_intr,
diff --git a/queue-4.9/netem-fix-skb_orphan_partial.patch b/queue-4.9/netem-fix-skb_orphan_partial.patch
new file mode 100644 (file)
index 0000000..a7a51c7
--- /dev/null
@@ -0,0 +1,67 @@
+From foo@baz Wed May 31 09:13:34 JST 2017
+From: Eric Dumazet <edumazet@google.com>
+Date: Thu, 11 May 2017 15:24:41 -0700
+Subject: netem: fix skb_orphan_partial()
+
+From: Eric Dumazet <edumazet@google.com>
+
+
+[ Upstream commit f6ba8d33cfbb46df569972e64dbb5bb7e929bfd9 ]
+
+I should have known that lowering skb->truesize was dangerous :/
+
+In case packets are not leaving the host via a standard Ethernet device,
+but looped back to local sockets, bad things can happen, as reported
+by Michael Madsen ( https://bugzilla.kernel.org/show_bug.cgi?id=195713 )
+
+So instead of tweaking skb->truesize, lets change skb->destructor
+and keep a reference on the owner socket via its sk_refcnt.
+
+Fixes: f2f872f9272a ("netem: Introduce skb_orphan_partial() helper")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: Michael Madsen <mkm@nabto.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/sock.c |   20 ++++++++------------
+ 1 file changed, 8 insertions(+), 12 deletions(-)
+
+--- a/net/core/sock.c
++++ b/net/core/sock.c
+@@ -1687,28 +1687,24 @@ EXPORT_SYMBOL(skb_set_owner_w);
+  * delay queue. We want to allow the owner socket to send more
+  * packets, as if they were already TX completed by a typical driver.
+  * But we also want to keep skb->sk set because some packet schedulers
+- * rely on it (sch_fq for example). So we set skb->truesize to a small
+- * amount (1) and decrease sk_wmem_alloc accordingly.
++ * rely on it (sch_fq for example).
+  */
+ void skb_orphan_partial(struct sk_buff *skb)
+ {
+-      /* If this skb is a TCP pure ACK or already went here,
+-       * we have nothing to do. 2 is already a very small truesize.
+-       */
+-      if (skb->truesize <= 2)
++      if (skb_is_tcp_pure_ack(skb))
+               return;
+-      /* TCP stack sets skb->ooo_okay based on sk_wmem_alloc,
+-       * so we do not completely orphan skb, but transfert all
+-       * accounted bytes but one, to avoid unexpected reorders.
+-       */
+       if (skb->destructor == sock_wfree
+ #ifdef CONFIG_INET
+           || skb->destructor == tcp_wfree
+ #endif
+               ) {
+-              atomic_sub(skb->truesize - 1, &skb->sk->sk_wmem_alloc);
+-              skb->truesize = 1;
++              struct sock *sk = skb->sk;
++
++              if (atomic_inc_not_zero(&sk->sk_refcnt)) {
++                      atomic_sub(skb->truesize, &sk->sk_wmem_alloc);
++                      skb->destructor = sock_efree;
++              }
+       } else {
+               skb_orphan(skb);
+       }
diff --git a/queue-4.9/qmi_wwan-add-another-lenovo-em74xx-device-id.patch b/queue-4.9/qmi_wwan-add-another-lenovo-em74xx-device-id.patch
new file mode 100644 (file)
index 0000000..cb762b0
--- /dev/null
@@ -0,0 +1,33 @@
+From foo@baz Wed May 31 09:13:34 JST 2017
+From: Bjørn Mork <bjorn@mork.no>
+Date: Wed, 17 May 2017 16:31:41 +0200
+Subject: qmi_wwan: add another Lenovo EM74xx device ID
+
+From: Bjørn Mork <bjorn@mork.no>
+
+
+[ Upstream commit 486181bcb3248e2f1977f4e69387a898234a4e1e ]
+
+In their infinite wisdom, and never ending quest for end user frustration,
+Lenovo has decided to use a new USB device ID for the wwan modules in
+their 2017 laptops.  The actual hardware is still the Sierra Wireless
+EM7455 or EM7430, depending on region.
+
+Signed-off-by: Bjørn Mork <bjorn@mork.no>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/usb/qmi_wwan.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/drivers/net/usb/qmi_wwan.c
++++ b/drivers/net/usb/qmi_wwan.c
+@@ -897,6 +897,8 @@ static const struct usb_device_id produc
+       {QMI_FIXED_INTF(0x1199, 0x9071, 10)},   /* Sierra Wireless MC74xx */
+       {QMI_FIXED_INTF(0x1199, 0x9079, 8)},    /* Sierra Wireless EM74xx */
+       {QMI_FIXED_INTF(0x1199, 0x9079, 10)},   /* Sierra Wireless EM74xx */
++      {QMI_FIXED_INTF(0x1199, 0x907b, 8)},    /* Sierra Wireless EM74xx */
++      {QMI_FIXED_INTF(0x1199, 0x907b, 10)},   /* Sierra Wireless EM74xx */
+       {QMI_FIXED_INTF(0x1bbb, 0x011e, 4)},    /* Telekom Speedstick LTE II (Alcatel One Touch L100V LTE) */
+       {QMI_FIXED_INTF(0x1bbb, 0x0203, 2)},    /* Alcatel L800MA */
+       {QMI_FIXED_INTF(0x2357, 0x0201, 4)},    /* TP-LINK HSUPA Modem MA180 */
diff --git a/queue-4.9/s390-qeth-add-missing-hash-table-initializations.patch b/queue-4.9/s390-qeth-add-missing-hash-table-initializations.patch
new file mode 100644 (file)
index 0000000..139f669
--- /dev/null
@@ -0,0 +1,34 @@
+From foo@baz Wed May 31 09:13:34 JST 2017
+From: Ursula Braun <ubraun@linux.vnet.ibm.com>
+Date: Wed, 10 May 2017 19:07:54 +0200
+Subject: s390/qeth: add missing hash table initializations
+
+From: Ursula Braun <ubraun@linux.vnet.ibm.com>
+
+
+[ Upstream commit ebccc7397e4a49ff64c8f44a54895de9d32fe742 ]
+
+commit 5f78e29ceebf ("qeth: optimize IP handling in rx_mode callback")
+added new hash tables, but missed to initialize them.
+
+Fixes: 5f78e29ceebf ("qeth: optimize IP handling in rx_mode callback")
+Signed-off-by: Ursula Braun <ubraun@linux.vnet.ibm.com>
+Reviewed-by: Julian Wiedmann <jwi@linux.vnet.ibm.com>
+Signed-off-by: Julian Wiedmann <jwi@linux.vnet.ibm.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/s390/net/qeth_l3_main.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/drivers/s390/net/qeth_l3_main.c
++++ b/drivers/s390/net/qeth_l3_main.c
+@@ -3162,6 +3162,8 @@ static int qeth_l3_probe_device(struct c
+       rc = qeth_l3_create_device_attributes(&gdev->dev);
+       if (rc)
+               return rc;
++      hash_init(card->ip_htable);
++      hash_init(card->ip_mc_htable);
+       card->options.layer2 = 0;
+       card->info.hwtrap = 0;
+       return 0;
diff --git a/queue-4.9/s390-qeth-avoid-null-pointer-dereference-on-osn.patch b/queue-4.9/s390-qeth-avoid-null-pointer-dereference-on-osn.patch
new file mode 100644 (file)
index 0000000..71f6d51
--- /dev/null
@@ -0,0 +1,46 @@
+From foo@baz Wed May 31 09:13:34 JST 2017
+From: Julian Wiedmann <jwi@linux.vnet.ibm.com>
+Date: Wed, 10 May 2017 19:07:53 +0200
+Subject: s390/qeth: avoid null pointer dereference on OSN
+
+From: Julian Wiedmann <jwi@linux.vnet.ibm.com>
+
+
+[ Upstream commit 25e2c341e7818a394da9abc403716278ee646014 ]
+
+Access card->dev only after checking whether's its valid.
+
+Signed-off-by: Julian Wiedmann <jwi@linux.vnet.ibm.com>
+Reviewed-by: Ursula Braun <ubraun@linux.vnet.ibm.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/s390/net/qeth_l2_main.c |   10 ++++++----
+ 1 file changed, 6 insertions(+), 4 deletions(-)
+
+--- a/drivers/s390/net/qeth_l2_main.c
++++ b/drivers/s390/net/qeth_l2_main.c
+@@ -1106,7 +1106,6 @@ static int qeth_l2_setup_netdev(struct q
+       case QETH_CARD_TYPE_OSN:
+               card->dev = alloc_netdev(0, "osn%d", NET_NAME_UNKNOWN,
+                                        ether_setup);
+-              card->dev->flags |= IFF_NOARP;
+               break;
+       default:
+               card->dev = alloc_etherdev(0);
+@@ -1119,9 +1118,12 @@ static int qeth_l2_setup_netdev(struct q
+       card->dev->watchdog_timeo = QETH_TX_TIMEOUT;
+       card->dev->mtu = card->info.initial_mtu;
+       card->dev->netdev_ops = &qeth_l2_netdev_ops;
+-      card->dev->ethtool_ops =
+-              (card->info.type != QETH_CARD_TYPE_OSN) ?
+-              &qeth_l2_ethtool_ops : &qeth_l2_osn_ops;
++      if (card->info.type == QETH_CARD_TYPE_OSN) {
++              card->dev->ethtool_ops = &qeth_l2_osn_ops;
++              card->dev->flags |= IFF_NOARP;
++      } else {
++              card->dev->ethtool_ops = &qeth_l2_ethtool_ops;
++      }
+       card->dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
+       if (card->info.type == QETH_CARD_TYPE_OSD && !card->info.guestlan) {
+               card->dev->hw_features = NETIF_F_SG;
diff --git a/queue-4.9/s390-qeth-handle-sysfs-error-during-initialization.patch b/queue-4.9/s390-qeth-handle-sysfs-error-during-initialization.patch
new file mode 100644 (file)
index 0000000..397bdd7
--- /dev/null
@@ -0,0 +1,82 @@
+From foo@baz Wed May 31 09:13:34 JST 2017
+From: Ursula Braun <ubraun@linux.vnet.ibm.com>
+Date: Wed, 10 May 2017 19:07:51 +0200
+Subject: s390/qeth: handle sysfs error during initialization
+
+From: Ursula Braun <ubraun@linux.vnet.ibm.com>
+
+
+[ Upstream commit 9111e7880ccf419548c7b0887df020b08eadb075 ]
+
+When setting up the device from within the layer discipline's
+probe routine, creating the layer-specific sysfs attributes can fail.
+Report this error back to the caller, and handle it by
+releasing the layer discipline.
+
+Signed-off-by: Ursula Braun <ubraun@linux.vnet.ibm.com>
+[jwi: updated commit msg, moved an OSN change to a subsequent patch]
+Signed-off-by: Julian Wiedmann <jwi@linux.vnet.ibm.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/s390/net/qeth_core_main.c |    4 +++-
+ drivers/s390/net/qeth_core_sys.c  |    2 ++
+ drivers/s390/net/qeth_l2_main.c   |    5 ++++-
+ drivers/s390/net/qeth_l3_main.c   |    5 ++++-
+ 4 files changed, 13 insertions(+), 3 deletions(-)
+
+--- a/drivers/s390/net/qeth_core_main.c
++++ b/drivers/s390/net/qeth_core_main.c
+@@ -5663,8 +5663,10 @@ static int qeth_core_set_online(struct c
+               if (rc)
+                       goto err;
+               rc = card->discipline->setup(card->gdev);
+-              if (rc)
++              if (rc) {
++                      qeth_core_free_discipline(card);
+                       goto err;
++              }
+       }
+       rc = card->discipline->set_online(gdev);
+ err:
+--- a/drivers/s390/net/qeth_core_sys.c
++++ b/drivers/s390/net/qeth_core_sys.c
+@@ -426,6 +426,8 @@ static ssize_t qeth_dev_layer2_store(str
+               goto out;
+       rc = card->discipline->setup(card->gdev);
++      if (rc)
++              qeth_core_free_discipline(card);
+ out:
+       mutex_unlock(&card->discipline_mutex);
+       return rc ? rc : count;
+--- a/drivers/s390/net/qeth_l2_main.c
++++ b/drivers/s390/net/qeth_l2_main.c
+@@ -1024,8 +1024,11 @@ static int qeth_l2_stop(struct net_devic
+ static int qeth_l2_probe_device(struct ccwgroup_device *gdev)
+ {
+       struct qeth_card *card = dev_get_drvdata(&gdev->dev);
++      int rc;
+-      qeth_l2_create_device_attributes(&gdev->dev);
++      rc = qeth_l2_create_device_attributes(&gdev->dev);
++      if (rc)
++              return rc;
+       INIT_LIST_HEAD(&card->vid_list);
+       hash_init(card->mac_htable);
+       card->options.layer2 = 1;
+--- a/drivers/s390/net/qeth_l3_main.c
++++ b/drivers/s390/net/qeth_l3_main.c
+@@ -3157,8 +3157,11 @@ static int qeth_l3_setup_netdev(struct q
+ static int qeth_l3_probe_device(struct ccwgroup_device *gdev)
+ {
+       struct qeth_card *card = dev_get_drvdata(&gdev->dev);
++      int rc;
+-      qeth_l3_create_device_attributes(&gdev->dev);
++      rc = qeth_l3_create_device_attributes(&gdev->dev);
++      if (rc)
++              return rc;
+       card->options.layer2 = 0;
+       card->info.hwtrap = 0;
+       return 0;
diff --git a/queue-4.9/s390-qeth-unbreak-osm-and-osn-support.patch b/queue-4.9/s390-qeth-unbreak-osm-and-osn-support.patch
new file mode 100644 (file)
index 0000000..c0f7a9d
--- /dev/null
@@ -0,0 +1,263 @@
+From foo@baz Wed May 31 09:13:34 JST 2017
+From: Julian Wiedmann <jwi@linux.vnet.ibm.com>
+Date: Wed, 10 May 2017 19:07:52 +0200
+Subject: s390/qeth: unbreak OSM and OSN support
+
+From: Julian Wiedmann <jwi@linux.vnet.ibm.com>
+
+
+[ Upstream commit 2d2ebb3ed0c6acfb014f98e427298673a5d07b82 ]
+
+commit b4d72c08b358 ("qeth: bridgeport support - basic control")
+broke the support for OSM and OSN devices as follows:
+
+As OSM and OSN are L2 only, qeth_core_probe_device() does an early
+setup by loading the l2 discipline and calling qeth_l2_probe_device().
+In this context, adding the l2-specific bridgeport sysfs attributes
+via qeth_l2_create_device_attributes() hits a BUG_ON in fs/sysfs/group.c,
+since the basic sysfs infrastructure for the device hasn't been
+established yet.
+
+Note that OSN actually has its own unique sysfs attributes
+(qeth_osn_devtype), so the additional attributes shouldn't be created
+at all.
+For OSM, add a new qeth_l2_devtype that contains all the common
+and l2-specific sysfs attributes.
+When qeth_core_probe_device() does early setup for OSM or OSN, assign
+the corresponding devtype so that the ccwgroup probe code creates the
+full set of sysfs attributes.
+This allows us to skip qeth_l2_create_device_attributes() in case
+of an early setup.
+
+Any device that can't do early setup will initially have only the
+generic sysfs attributes, and when it's probed later
+qeth_l2_probe_device() adds the l2-specific attributes.
+
+If an early-setup device is removed (by calling ccwgroup_ungroup()),
+device_unregister() will - using the devtype - delete the
+l2-specific attributes before qeth_l2_remove_device() is called.
+So make sure to not remove them twice.
+
+What complicates the issue is that qeth_l2_probe_device() and
+qeth_l2_remove_device() is also called on a device when its
+layer2 attribute changes (ie. its layer mode is switched).
+For early-setup devices this wouldn't work properly - we wouldn't
+remove the l2-specific attributes when switching to L3.
+But switching the layer mode doesn't actually make any sense;
+we already decided that the device can only operate in L2!
+So just refuse to switch the layer mode on such devices. Note that
+OSN doesn't have a layer2 attribute, so we only need to special-case
+OSM.
+
+Based on an initial patch by Ursula Braun.
+
+Fixes: b4d72c08b358 ("qeth: bridgeport support - basic control")
+Signed-off-by: Julian Wiedmann <jwi@linux.vnet.ibm.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/s390/net/qeth_core.h      |    4 ++++
+ drivers/s390/net/qeth_core_main.c |   17 +++++++++--------
+ drivers/s390/net/qeth_core_sys.c  |   22 ++++++++++++++--------
+ drivers/s390/net/qeth_l2.h        |    2 ++
+ drivers/s390/net/qeth_l2_main.c   |   17 +++++++++++++----
+ drivers/s390/net/qeth_l2_sys.c    |    8 ++++++++
+ drivers/s390/net/qeth_l3_main.c   |    1 +
+ 7 files changed, 51 insertions(+), 20 deletions(-)
+
+--- a/drivers/s390/net/qeth_core.h
++++ b/drivers/s390/net/qeth_core.h
+@@ -718,6 +718,7 @@ enum qeth_discipline_id {
+ };
+ struct qeth_discipline {
++      const struct device_type *devtype;
+       void (*start_poll)(struct ccw_device *, int, unsigned long);
+       qdio_handler_t *input_handler;
+       qdio_handler_t *output_handler;
+@@ -893,6 +894,9 @@ extern struct qeth_discipline qeth_l2_di
+ extern struct qeth_discipline qeth_l3_discipline;
+ extern const struct attribute_group *qeth_generic_attr_groups[];
+ extern const struct attribute_group *qeth_osn_attr_groups[];
++extern const struct attribute_group qeth_device_attr_group;
++extern const struct attribute_group qeth_device_blkt_group;
++extern const struct device_type qeth_generic_devtype;
+ extern struct workqueue_struct *qeth_wq;
+ int qeth_card_hw_is_reachable(struct qeth_card *);
+--- a/drivers/s390/net/qeth_core_main.c
++++ b/drivers/s390/net/qeth_core_main.c
+@@ -5462,10 +5462,12 @@ void qeth_core_free_discipline(struct qe
+       card->discipline = NULL;
+ }
+-static const struct device_type qeth_generic_devtype = {
++const struct device_type qeth_generic_devtype = {
+       .name = "qeth_generic",
+       .groups = qeth_generic_attr_groups,
+ };
++EXPORT_SYMBOL_GPL(qeth_generic_devtype);
++
+ static const struct device_type qeth_osn_devtype = {
+       .name = "qeth_osn",
+       .groups = qeth_osn_attr_groups,
+@@ -5591,23 +5593,22 @@ static int qeth_core_probe_device(struct
+               goto err_card;
+       }
+-      if (card->info.type == QETH_CARD_TYPE_OSN)
+-              gdev->dev.type = &qeth_osn_devtype;
+-      else
+-              gdev->dev.type = &qeth_generic_devtype;
+-
+       switch (card->info.type) {
+       case QETH_CARD_TYPE_OSN:
+       case QETH_CARD_TYPE_OSM:
+               rc = qeth_core_load_discipline(card, QETH_DISCIPLINE_LAYER2);
+               if (rc)
+                       goto err_card;
++
++              gdev->dev.type = (card->info.type != QETH_CARD_TYPE_OSN)
++                                      ? card->discipline->devtype
++                                      : &qeth_osn_devtype;
+               rc = card->discipline->setup(card->gdev);
+               if (rc)
+                       goto err_disc;
+-      case QETH_CARD_TYPE_OSD:
+-      case QETH_CARD_TYPE_OSX:
++              break;
+       default:
++              gdev->dev.type = &qeth_generic_devtype;
+               break;
+       }
+--- a/drivers/s390/net/qeth_core_sys.c
++++ b/drivers/s390/net/qeth_core_sys.c
+@@ -413,12 +413,16 @@ static ssize_t qeth_dev_layer2_store(str
+       if (card->options.layer2 == newdis)
+               goto out;
+-      else {
+-              card->info.mac_bits  = 0;
+-              if (card->discipline) {
+-                      card->discipline->remove(card->gdev);
+-                      qeth_core_free_discipline(card);
+-              }
++      if (card->info.type == QETH_CARD_TYPE_OSM) {
++              /* fixed layer, can't switch */
++              rc = -EOPNOTSUPP;
++              goto out;
++      }
++
++      card->info.mac_bits = 0;
++      if (card->discipline) {
++              card->discipline->remove(card->gdev);
++              qeth_core_free_discipline(card);
+       }
+       rc = qeth_core_load_discipline(card, newdis);
+@@ -705,10 +709,11 @@ static struct attribute *qeth_blkt_devic
+       &dev_attr_inter_jumbo.attr,
+       NULL,
+ };
+-static struct attribute_group qeth_device_blkt_group = {
++const struct attribute_group qeth_device_blkt_group = {
+       .name = "blkt",
+       .attrs = qeth_blkt_device_attrs,
+ };
++EXPORT_SYMBOL_GPL(qeth_device_blkt_group);
+ static struct attribute *qeth_device_attrs[] = {
+       &dev_attr_state.attr,
+@@ -728,9 +733,10 @@ static struct attribute *qeth_device_att
+       &dev_attr_switch_attrs.attr,
+       NULL,
+ };
+-static struct attribute_group qeth_device_attr_group = {
++const struct attribute_group qeth_device_attr_group = {
+       .attrs = qeth_device_attrs,
+ };
++EXPORT_SYMBOL_GPL(qeth_device_attr_group);
+ const struct attribute_group *qeth_generic_attr_groups[] = {
+       &qeth_device_attr_group,
+--- a/drivers/s390/net/qeth_l2.h
++++ b/drivers/s390/net/qeth_l2.h
+@@ -8,6 +8,8 @@
+ #include "qeth_core.h"
++extern const struct attribute_group *qeth_l2_attr_groups[];
++
+ int qeth_l2_create_device_attributes(struct device *);
+ void qeth_l2_remove_device_attributes(struct device *);
+ void qeth_l2_setup_bridgeport_attrs(struct qeth_card *card);
+--- a/drivers/s390/net/qeth_l2_main.c
++++ b/drivers/s390/net/qeth_l2_main.c
+@@ -1021,14 +1021,21 @@ static int qeth_l2_stop(struct net_devic
+       return 0;
+ }
++static const struct device_type qeth_l2_devtype = {
++      .name = "qeth_layer2",
++      .groups = qeth_l2_attr_groups,
++};
++
+ static int qeth_l2_probe_device(struct ccwgroup_device *gdev)
+ {
+       struct qeth_card *card = dev_get_drvdata(&gdev->dev);
+       int rc;
+-      rc = qeth_l2_create_device_attributes(&gdev->dev);
+-      if (rc)
+-              return rc;
++      if (gdev->dev.type == &qeth_generic_devtype) {
++              rc = qeth_l2_create_device_attributes(&gdev->dev);
++              if (rc)
++                      return rc;
++      }
+       INIT_LIST_HEAD(&card->vid_list);
+       hash_init(card->mac_htable);
+       card->options.layer2 = 1;
+@@ -1040,7 +1047,8 @@ static void qeth_l2_remove_device(struct
+ {
+       struct qeth_card *card = dev_get_drvdata(&cgdev->dev);
+-      qeth_l2_remove_device_attributes(&cgdev->dev);
++      if (cgdev->dev.type == &qeth_generic_devtype)
++              qeth_l2_remove_device_attributes(&cgdev->dev);
+       qeth_set_allowed_threads(card, 0, 1);
+       wait_event(card->wait_q, qeth_threads_running(card, 0xffffffff) == 0);
+@@ -1437,6 +1445,7 @@ static int qeth_l2_control_event(struct
+ }
+ struct qeth_discipline qeth_l2_discipline = {
++      .devtype = &qeth_l2_devtype,
+       .start_poll = qeth_qdio_start_poll,
+       .input_handler = (qdio_handler_t *) qeth_qdio_input_handler,
+       .output_handler = (qdio_handler_t *) qeth_qdio_output_handler,
+--- a/drivers/s390/net/qeth_l2_sys.c
++++ b/drivers/s390/net/qeth_l2_sys.c
+@@ -272,3 +272,11 @@ void qeth_l2_setup_bridgeport_attrs(stru
+       } else
+               qeth_bridgeport_an_set(card, 0);
+ }
++
++const struct attribute_group *qeth_l2_attr_groups[] = {
++      &qeth_device_attr_group,
++      &qeth_device_blkt_group,
++      /* l2 specific, see l2_{create,remove}_device_attributes(): */
++      &qeth_l2_bridgeport_attr_group,
++      NULL,
++};
+--- a/drivers/s390/net/qeth_l3_main.c
++++ b/drivers/s390/net/qeth_l3_main.c
+@@ -3453,6 +3453,7 @@ static int qeth_l3_control_event(struct
+ }
+ struct qeth_discipline qeth_l3_discipline = {
++      .devtype = &qeth_generic_devtype,
+       .start_poll = qeth_qdio_start_poll,
+       .input_handler = (qdio_handler_t *) qeth_qdio_input_handler,
+       .output_handler = (qdio_handler_t *) qeth_qdio_output_handler,
diff --git a/queue-4.9/sctp-do-not-inherit-ipv6_-mc-ac-fl-_list-from-parent.patch b/queue-4.9/sctp-do-not-inherit-ipv6_-mc-ac-fl-_list-from-parent.patch
new file mode 100644 (file)
index 0000000..2734a15
--- /dev/null
@@ -0,0 +1,34 @@
+From foo@baz Wed May 31 09:13:34 JST 2017
+From: Eric Dumazet <edumazet@google.com>
+Date: Wed, 17 May 2017 07:16:40 -0700
+Subject: sctp: do not inherit ipv6_{mc|ac|fl}_list from parent
+
+From: Eric Dumazet <edumazet@google.com>
+
+
+[ Upstream commit fdcee2cbb8438702ea1b328fb6e0ac5e9a40c7f8 ]
+
+SCTP needs fixes similar to 83eaddab4378 ("ipv6/dccp: do not inherit
+ipv6_mc_list from parent"), otherwise bad things can happen.
+
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: Andrey Konovalov <andreyknvl@google.com>
+Tested-by: Andrey Konovalov <andreyknvl@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sctp/ipv6.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/net/sctp/ipv6.c
++++ b/net/sctp/ipv6.c
+@@ -678,6 +678,9 @@ static struct sock *sctp_v6_create_accep
+       newnp = inet6_sk(newsk);
+       memcpy(newnp, np, sizeof(struct ipv6_pinfo));
++      newnp->ipv6_mc_list = NULL;
++      newnp->ipv6_ac_list = NULL;
++      newnp->ipv6_fl_list = NULL;
+       rcu_read_lock();
+       opt = rcu_dereference(np->opt);
diff --git a/queue-4.9/sctp-fix-icmp-processing-if-skb-is-non-linear.patch b/queue-4.9/sctp-fix-icmp-processing-if-skb-is-non-linear.patch
new file mode 100644 (file)
index 0000000..089a3bc
--- /dev/null
@@ -0,0 +1,72 @@
+From foo@baz Wed May 31 09:13:34 JST 2017
+From: Davide Caratti <dcaratti@redhat.com>
+Date: Thu, 25 May 2017 19:14:56 +0200
+Subject: sctp: fix ICMP processing if skb is non-linear
+
+From: Davide Caratti <dcaratti@redhat.com>
+
+
+[ Upstream commit 804ec7ebe8ea003999ca8d1bfc499edc6a9e07df ]
+
+sometimes ICMP replies to INIT chunks are ignored by the client, even if
+the encapsulated SCTP headers match an open socket. This happens when the
+ICMP packet is carried by a paged skb: use skb_header_pointer() to read
+packet contents beyond the SCTP header, so that chunk header and initiate
+tag are validated correctly.
+
+v2:
+- don't use skb_header_pointer() to read the transport header, since
+  icmp_socket_deliver() already puts these 8 bytes in the linear area.
+- change commit message to make specific reference to INIT chunks.
+
+Signed-off-by: Davide Caratti <dcaratti@redhat.com>
+Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
+Acked-by: Vlad Yasevich <vyasevich@gmail.com>
+Reviewed-by: Xin Long <lucien.xin@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sctp/input.c |   16 +++++++++-------
+ 1 file changed, 9 insertions(+), 7 deletions(-)
+
+--- a/net/sctp/input.c
++++ b/net/sctp/input.c
+@@ -473,15 +473,14 @@ struct sock *sctp_err_lookup(struct net
+                            struct sctp_association **app,
+                            struct sctp_transport **tpp)
+ {
++      struct sctp_init_chunk *chunkhdr, _chunkhdr;
+       union sctp_addr saddr;
+       union sctp_addr daddr;
+       struct sctp_af *af;
+       struct sock *sk = NULL;
+       struct sctp_association *asoc;
+       struct sctp_transport *transport = NULL;
+-      struct sctp_init_chunk *chunkhdr;
+       __u32 vtag = ntohl(sctphdr->vtag);
+-      int len = skb->len - ((void *)sctphdr - (void *)skb->data);
+       *app = NULL; *tpp = NULL;
+@@ -516,13 +515,16 @@ struct sock *sctp_err_lookup(struct net
+        * discard the packet.
+        */
+       if (vtag == 0) {
+-              chunkhdr = (void *)sctphdr + sizeof(struct sctphdr);
+-              if (len < sizeof(struct sctphdr) + sizeof(sctp_chunkhdr_t)
+-                        + sizeof(__be32) ||
++              /* chunk header + first 4 octects of init header */
++              chunkhdr = skb_header_pointer(skb, skb_transport_offset(skb) +
++                                            sizeof(struct sctphdr),
++                                            sizeof(struct sctp_chunkhdr) +
++                                            sizeof(__be32), &_chunkhdr);
++              if (!chunkhdr ||
+                   chunkhdr->chunk_hdr.type != SCTP_CID_INIT ||
+-                  ntohl(chunkhdr->init_hdr.init_tag) != asoc->c.my_vtag) {
++                  ntohl(chunkhdr->init_hdr.init_tag) != asoc->c.my_vtag)
+                       goto out;
+-              }
++
+       } else if (vtag != asoc->c.peer_vtag) {
+               goto out;
+       }
diff --git a/queue-4.9/sctp-fix-src-address-selection-if-using-secondary-addresses-for-ipv6.patch b/queue-4.9/sctp-fix-src-address-selection-if-using-secondary-addresses-for-ipv6.patch
new file mode 100644 (file)
index 0000000..e132432
--- /dev/null
@@ -0,0 +1,119 @@
+From foo@baz Wed May 31 09:13:34 JST 2017
+From: Xin Long <lucien.xin@gmail.com>
+Date: Fri, 12 May 2017 14:39:52 +0800
+Subject: sctp: fix src address selection if using secondary addresses for ipv6
+
+From: Xin Long <lucien.xin@gmail.com>
+
+
+[ Upstream commit dbc2b5e9a09e9a6664679a667ff81cff6e5f2641 ]
+
+Commit 0ca50d12fe46 ("sctp: fix src address selection if using secondary
+addresses") has fixed a src address selection issue when using secondary
+addresses for ipv4.
+
+Now sctp ipv6 also has the similar issue. When using a secondary address,
+sctp_v6_get_dst tries to choose the saddr which has the most same bits
+with the daddr by sctp_v6_addr_match_len. It may make some cases not work
+as expected.
+
+hostA:
+  [1] fd21:356b:459a:cf10::11 (eth1)
+  [2] fd21:356b:459a:cf20::11 (eth2)
+
+hostB:
+  [a] fd21:356b:459a:cf30::2  (eth1)
+  [b] fd21:356b:459a:cf40::2  (eth2)
+
+route from hostA to hostB:
+  fd21:356b:459a:cf30::/64 dev eth1  metric 1024  mtu 1500
+
+The expected path should be:
+  fd21:356b:459a:cf10::11 <-> fd21:356b:459a:cf30::2
+But addr[2] matches addr[a] more bits than addr[1] does, according to
+sctp_v6_addr_match_len. It causes the path to be:
+  fd21:356b:459a:cf20::11 <-> fd21:356b:459a:cf30::2
+
+This patch is to fix it with the same way as Marcelo's fix for sctp ipv4.
+As no ip_dev_find for ipv6, this patch is to use ipv6_chk_addr to check
+if the saddr is in a dev instead.
+
+Note that for backwards compatibility, it will still do the addr_match_len
+check here when no optimal is found.
+
+Reported-by: Patrick Talbert <ptalbert@redhat.com>
+Signed-off-by: Xin Long <lucien.xin@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sctp/ipv6.c |   46 +++++++++++++++++++++++++++++-----------------
+ 1 file changed, 29 insertions(+), 17 deletions(-)
+
+--- a/net/sctp/ipv6.c
++++ b/net/sctp/ipv6.c
+@@ -240,12 +240,10 @@ static void sctp_v6_get_dst(struct sctp_
+       struct sctp_bind_addr *bp;
+       struct ipv6_pinfo *np = inet6_sk(sk);
+       struct sctp_sockaddr_entry *laddr;
+-      union sctp_addr *baddr = NULL;
+       union sctp_addr *daddr = &t->ipaddr;
+       union sctp_addr dst_saddr;
+       struct in6_addr *final_p, final;
+       __u8 matchlen = 0;
+-      __u8 bmatchlen;
+       sctp_scope_t scope;
+       memset(fl6, 0, sizeof(struct flowi6));
+@@ -312,23 +310,37 @@ static void sctp_v6_get_dst(struct sctp_
+        */
+       rcu_read_lock();
+       list_for_each_entry_rcu(laddr, &bp->address_list, list) {
+-              if (!laddr->valid)
++              struct dst_entry *bdst;
++              __u8 bmatchlen;
++
++              if (!laddr->valid ||
++                  laddr->state != SCTP_ADDR_SRC ||
++                  laddr->a.sa.sa_family != AF_INET6 ||
++                  scope > sctp_scope(&laddr->a))
+                       continue;
+-              if ((laddr->state == SCTP_ADDR_SRC) &&
+-                  (laddr->a.sa.sa_family == AF_INET6) &&
+-                  (scope <= sctp_scope(&laddr->a))) {
+-                      bmatchlen = sctp_v6_addr_match_len(daddr, &laddr->a);
+-                      if (!baddr || (matchlen < bmatchlen)) {
+-                              baddr = &laddr->a;
+-                              matchlen = bmatchlen;
+-                      }
+-              }
+-      }
+-      if (baddr) {
+-              fl6->saddr = baddr->v6.sin6_addr;
+-              fl6->fl6_sport = baddr->v6.sin6_port;
++
++              fl6->saddr = laddr->a.v6.sin6_addr;
++              fl6->fl6_sport = laddr->a.v6.sin6_port;
+               final_p = fl6_update_dst(fl6, rcu_dereference(np->opt), &final);
+-              dst = ip6_dst_lookup_flow(sk, fl6, final_p);
++              bdst = ip6_dst_lookup_flow(sk, fl6, final_p);
++
++              if (!IS_ERR(bdst) &&
++                  ipv6_chk_addr(dev_net(bdst->dev),
++                                &laddr->a.v6.sin6_addr, bdst->dev, 1)) {
++                      if (!IS_ERR_OR_NULL(dst))
++                              dst_release(dst);
++                      dst = bdst;
++                      break;
++              }
++
++              bmatchlen = sctp_v6_addr_match_len(daddr, &laddr->a);
++              if (matchlen > bmatchlen)
++                      continue;
++
++              if (!IS_ERR_OR_NULL(dst))
++                      dst_release(dst);
++              dst = bdst;
++              matchlen = bmatchlen;
+       }
+       rcu_read_unlock();
diff --git a/queue-4.9/tcp-avoid-fastopen-api-to-be-used-on-af_unspec.patch b/queue-4.9/tcp-avoid-fastopen-api-to-be-used-on-af_unspec.patch
new file mode 100644 (file)
index 0000000..cfc06b3
--- /dev/null
@@ -0,0 +1,88 @@
+From foo@baz Wed May 31 09:13:34 JST 2017
+From: Wei Wang <weiwan@google.com>
+Date: Wed, 24 May 2017 09:59:31 -0700
+Subject: tcp: avoid fastopen API to be used on AF_UNSPEC
+
+From: Wei Wang <weiwan@google.com>
+
+
+[ Upstream commit ba615f675281d76fd19aa03558777f81fb6b6084 ]
+
+Fastopen API should be used to perform fastopen operations on the TCP
+socket. It does not make sense to use fastopen API to perform disconnect
+by calling it with AF_UNSPEC. The fastopen data path is also prone to
+race conditions and bugs when using with AF_UNSPEC.
+
+One issue reported and analyzed by Vegard Nossum is as follows:
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+Thread A:                            Thread B:
+------------------------------------------------------------------------
+sendto()
+ - tcp_sendmsg()
+     - sk_stream_memory_free() = 0
+         - goto wait_for_sndbuf
+            - sk_stream_wait_memory()
+               - sk_wait_event() // sleep
+          |                          sendto(flags=MSG_FASTOPEN, dest_addr=AF_UNSPEC)
+         |                           - tcp_sendmsg()
+         |                              - tcp_sendmsg_fastopen()
+         |                                 - __inet_stream_connect()
+         |                                    - tcp_disconnect() //because of AF_UNSPEC
+         |                                       - tcp_transmit_skb()// send RST
+         |                                    - return 0; // no reconnect!
+         |                           - sk_stream_wait_connect()
+         |                                 - sock_error()
+         |                                    - xchg(&sk->sk_err, 0)
+         |                                    - return -ECONNRESET
+       - ... // wake up, see sk->sk_err == 0
+    - skb_entail() on TCP_CLOSE socket
+
+If the connection is reopened then we will send a brand new SYN packet
+after thread A has already queued a buffer. At this point I think the
+socket internal state (sequence numbers etc.) becomes messed up.
+
+When the new connection is closed, the FIN-ACK is rejected because the
+sequence number is outside the window. The other side tries to
+retransmit,
+but __tcp_retransmit_skb() calls tcp_trim_head() on an empty skb which
+corrupts the skb data length and hits a BUG() in copy_and_csum_bits().
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+
+Hence, this patch adds a check for AF_UNSPEC in the fastopen data path
+and return EOPNOTSUPP to user if such case happens.
+
+Fixes: cf60af03ca4e7 ("tcp: Fast Open client - sendmsg(MSG_FASTOPEN)")
+Reported-by: Vegard Nossum <vegard.nossum@oracle.com>
+Signed-off-by: Wei Wang <weiwan@google.com>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp.c |    7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+--- a/net/ipv4/tcp.c
++++ b/net/ipv4/tcp.c
+@@ -1078,9 +1078,12 @@ static int tcp_sendmsg_fastopen(struct s
+                               int *copied, size_t size)
+ {
+       struct tcp_sock *tp = tcp_sk(sk);
++      struct sockaddr *uaddr = msg->msg_name;
+       int err, flags;
+-      if (!(sysctl_tcp_fastopen & TFO_CLIENT_ENABLE))
++      if (!(sysctl_tcp_fastopen & TFO_CLIENT_ENABLE) ||
++          (uaddr && msg->msg_namelen >= sizeof(uaddr->sa_family) &&
++           uaddr->sa_family == AF_UNSPEC))
+               return -EOPNOTSUPP;
+       if (tp->fastopen_req)
+               return -EALREADY; /* Another Fast Open is in progress */
+@@ -1093,7 +1096,7 @@ static int tcp_sendmsg_fastopen(struct s
+       tp->fastopen_req->size = size;
+       flags = (msg->msg_flags & MSG_DONTWAIT) ? O_NONBLOCK : 0;
+-      err = __inet_stream_connect(sk->sk_socket, msg->msg_name,
++      err = __inet_stream_connect(sk->sk_socket, uaddr,
+                                   msg->msg_namelen, flags);
+       *copied = tp->fastopen_req->copied;
+       tcp_free_fastopen_req(tp);
diff --git a/queue-4.9/tcp-avoid-fragmenting-peculiar-skbs-in-sack.patch b/queue-4.9/tcp-avoid-fragmenting-peculiar-skbs-in-sack.patch
new file mode 100644 (file)
index 0000000..4107f63
--- /dev/null
@@ -0,0 +1,55 @@
+From foo@baz Wed May 31 09:13:34 JST 2017
+From: Yuchung Cheng <ycheng@google.com>
+Date: Wed, 10 May 2017 17:01:27 -0700
+Subject: tcp: avoid fragmenting peculiar skbs in SACK
+
+From: Yuchung Cheng <ycheng@google.com>
+
+
+[ Upstream commit b451e5d24ba6687c6f0e7319c727a709a1846c06 ]
+
+This patch fixes a bug in splitting an SKB during SACK
+processing. Specifically if an skb contains multiple
+packets and is only partially sacked in the higher sequences,
+tcp_match_sack_to_skb() splits the skb and marks the second fragment
+as SACKed.
+
+The current code further attempts rounding up the first fragment
+to MSS boundaries. But it misses a boundary condition when the
+rounded-up fragment size (pkt_len) is exactly skb size.  Spliting
+such an skb is pointless and causses a kernel warning and aborts
+the SACK processing. This patch universally checks such over-split
+before calling tcp_fragment to prevent these unnecessary warnings.
+
+Fixes: adb92db857ee ("tcp: Make SACK code to split only at mss boundaries")
+Signed-off-by: Yuchung Cheng <ycheng@google.com>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: Soheil Hassas Yeganeh <soheil@google.com>
+Acked-by: Neal Cardwell <ncardwell@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_input.c |    9 +++++----
+ 1 file changed, 5 insertions(+), 4 deletions(-)
+
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -1177,13 +1177,14 @@ static int tcp_match_skb_to_sack(struct
+                */
+               if (pkt_len > mss) {
+                       unsigned int new_len = (pkt_len / mss) * mss;
+-                      if (!in_sack && new_len < pkt_len) {
++                      if (!in_sack && new_len < pkt_len)
+                               new_len += mss;
+-                              if (new_len >= skb->len)
+-                                      return 0;
+-                      }
+                       pkt_len = new_len;
+               }
++
++              if (pkt_len >= skb->len && !in_sack)
++                      return 0;
++
+               err = tcp_fragment(sk, skb, pkt_len, mss, GFP_ATOMIC);
+               if (err < 0)
+                       return err;
diff --git a/queue-4.9/tcp-eliminate-negative-reordering-in-tcp_clean_rtx_queue.patch b/queue-4.9/tcp-eliminate-negative-reordering-in-tcp_clean_rtx_queue.patch
new file mode 100644 (file)
index 0000000..a552d24
--- /dev/null
@@ -0,0 +1,44 @@
+From foo@baz Wed May 31 09:13:34 JST 2017
+From: Soheil Hassas Yeganeh <soheil@google.com>
+Date: Mon, 15 May 2017 17:05:47 -0400
+Subject: tcp: eliminate negative reordering in tcp_clean_rtx_queue
+
+From: Soheil Hassas Yeganeh <soheil@google.com>
+
+
+[ Upstream commit bafbb9c73241760023d8981191ddd30bb1c6dbac ]
+
+tcp_ack() can call tcp_fragment() which may dededuct the
+value tp->fackets_out when MSS changes. When prior_fackets
+is larger than tp->fackets_out, tcp_clean_rtx_queue() can
+invoke tcp_update_reordering() with negative values. This
+results in absurd tp->reodering values higher than
+sysctl_tcp_max_reordering.
+
+Note that tcp_update_reordering indeeds sets tp->reordering
+to min(sysctl_tcp_max_reordering, metric), but because
+the comparison is signed, a negative metric always wins.
+
+Fixes: c7caf8d3ed7a ("[TCP]: Fix reord detection due to snd_una covered holes")
+Reported-by: Rebecca Isaacs <risaacs@google.com>
+Signed-off-by: Soheil Hassas Yeganeh <soheil@google.com>
+Signed-off-by: Neal Cardwell <ncardwell@google.com>
+Signed-off-by: Yuchung Cheng <ycheng@google.com>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_input.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -3233,7 +3233,7 @@ static int tcp_clean_rtx_queue(struct so
+                       int delta;
+                       /* Non-retransmitted hole got filled? That's reordering */
+-                      if (reord < prior_fackets)
++                      if (reord < prior_fackets && reord <= tp->fackets_out)
+                               tcp_update_reordering(sk, tp->fackets_out - reord, 0);
+                       delta = tcp_is_fack(tp) ? pkts_acked :
diff --git a/queue-4.9/virtio-net-enable-tso-checksum-offloads-for-q-in-q-vlans.patch b/queue-4.9/virtio-net-enable-tso-checksum-offloads-for-q-in-q-vlans.patch
new file mode 100644 (file)
index 0000000..143e641
--- /dev/null
@@ -0,0 +1,34 @@
+From foo@baz Wed May 31 09:13:34 JST 2017
+From: Vlad Yasevich <vyasevich@gmail.com>
+Date: Tue, 23 May 2017 13:38:43 -0400
+Subject: virtio-net: enable TSO/checksum offloads for Q-in-Q vlans
+
+From: Vlad Yasevich <vyasevich@gmail.com>
+
+
+[ Upstream commit 2836b4f224d4fd7d1a2b23c3eecaf0f0ae199a74 ]
+
+Since virtio does not provide it's own ndo_features_check handler,
+TSO, and now checksum offload, are disabled for stacked vlans.
+Re-enable the support and let the host take care of it.  This
+restores/improves Guest-to-Guest performance over Q-in-Q vlans.
+
+Acked-by: Jason Wang <jasowang@redhat.com>
+Acked-by: Michael S. Tsirkin <mst@redhat.com>
+Signed-off-by: Vladislav Yasevich <vyasevic@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/virtio_net.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/net/virtio_net.c
++++ b/drivers/net/virtio_net.c
+@@ -1456,6 +1456,7 @@ static const struct net_device_ops virtn
+ #ifdef CONFIG_NET_RX_BUSY_POLL
+       .ndo_busy_poll          = virtnet_busy_poll,
+ #endif
++      .ndo_features_check     = passthru_features_check,
+ };
+ static void virtnet_config_changed_work(struct work_struct *work)
diff --git a/queue-4.9/vlan-fix-tcp-checksum-offloads-in-q-in-q-vlans.patch b/queue-4.9/vlan-fix-tcp-checksum-offloads-in-q-in-q-vlans.patch
new file mode 100644 (file)
index 0000000..432a02b
--- /dev/null
@@ -0,0 +1,71 @@
+From foo@baz Wed May 31 09:13:34 JST 2017
+From: Vlad Yasevich <vyasevich@gmail.com>
+Date: Tue, 23 May 2017 13:38:41 -0400
+Subject: vlan: Fix tcp checksum offloads in Q-in-Q vlans
+
+From: Vlad Yasevich <vyasevich@gmail.com>
+
+
+[ Upstream commit 35d2f80b07bbe03fb358afb0bdeff7437a7d67ff ]
+
+It appears that TCP checksum offloading has been broken for
+Q-in-Q vlans.  The behavior was execerbated by the
+series
+    commit afb0bc972b52 ("Merge branch 'stacked_vlan_tso'")
+that that enabled accleleration features on stacked vlans.
+
+However, event without that series, it is possible to trigger
+this issue.  It just requires a lot more specialized configuration.
+
+The root cause is the interaction between how
+netdev_intersect_features() works, the features actually set on
+the vlan devices and HW having the ability to run checksum with
+longer headers.
+
+The issue starts when netdev_interesect_features() replaces
+NETIF_F_HW_CSUM with a combination of NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM,
+if the HW advertises IP|IPV6 specific checksums.  This happens
+for tagged and multi-tagged packets.   However, HW that enables
+IP|IPV6 checksum offloading doesn't gurantee that packets with
+arbitrarily long headers can be checksummed.
+
+This patch disables IP|IPV6 checksums on the packet for multi-tagged
+packets.
+
+CC: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
+CC: Michal Kubecek <mkubecek@suse.cz>
+Signed-off-by: Vladislav Yasevich <vyasevic@redhat.com>
+Acked-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/if_vlan.h |   18 ++++++++++--------
+ 1 file changed, 10 insertions(+), 8 deletions(-)
+
+--- a/include/linux/if_vlan.h
++++ b/include/linux/if_vlan.h
+@@ -630,14 +630,16 @@ static inline bool skb_vlan_tagged_multi
+ static inline netdev_features_t vlan_features_check(const struct sk_buff *skb,
+                                                   netdev_features_t features)
+ {
+-      if (skb_vlan_tagged_multi(skb))
+-              features = netdev_intersect_features(features,
+-                                                   NETIF_F_SG |
+-                                                   NETIF_F_HIGHDMA |
+-                                                   NETIF_F_FRAGLIST |
+-                                                   NETIF_F_HW_CSUM |
+-                                                   NETIF_F_HW_VLAN_CTAG_TX |
+-                                                   NETIF_F_HW_VLAN_STAG_TX);
++      if (skb_vlan_tagged_multi(skb)) {
++              /* In the case of multi-tagged packets, use a direct mask
++               * instead of using netdev_interesect_features(), to make
++               * sure that only devices supporting NETIF_F_HW_CSUM will
++               * have checksum offloading support.
++               */
++              features &= NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_HW_CSUM |
++                          NETIF_F_FRAGLIST | NETIF_F_HW_VLAN_CTAG_TX |
++                          NETIF_F_HW_VLAN_STAG_TX;
++      }
+       return features;
+ }