From: Greg Kroah-Hartman Date: Wed, 31 May 2017 00:14:08 +0000 (+0900) Subject: 4.11-stable patches X-Git-Tag: v3.18.56~35 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=d09fcebea5743e2ed84bc2416a1b7932a9117fa0;p=thirdparty%2Fkernel%2Fstable-queue.git 4.11-stable patches added patches: be2net-fix-offload-features-for-q-in-q-packets.patch bonding-fix-accounting-of-active-ports-in-3ad.patch bpf-add-bpf_clone_redirect-to-bpf_helper_changes_pkt_data.patch bpf-adjust-verifier-heuristics.patch bpf-arm64-fix-faulty-emission-of-map-access-in-tail-calls.patch bpf-fix-wrong-exposure-of-map_flags-into-fdinfo-for-lpm.patch bridge-netlink-check-vlan_default_pvid-range.patch bridge-start-hello_timer-when-enabling-kernel_stp-in-br_stp_start.patch dccp-tcp-do-not-inherit-mc_list-from-parent.patch driver-vrf-fix-one-possible-use-after-free-issue.patch geneve-fix-fill_info-when-using-collect_metadata.patch ip6_tunnel-ip6_gre-fix-setting-of-dscp-on-encapsulated-packets.patch ipv4-add-reference-counting-to-metrics.patch ipv6-check-ip6_find_1stfragopt-return-value-properly.patch ipv6-dccp-do-not-inherit-ipv6_mc_list-from-parent.patch ipv6-fix-out-of-bound-writes-in-__ip6_append_data.patch ipv6-prevent-overrun-when-parsing-v6-header-options.patch net-fix-compile-error-in-skb_orphan_partial.patch net-improve-handling-of-failures-on-link-and-route-dumps.patch net-mlx5-avoid-using-pending-command-interface-slots.patch net-mlx5e-fix-ethtool-pause-support-and-advertise-reporting.patch net-mlx5e-use-the-correct-pause-values-for-ethtool-advertising.patch net-packet-fix-missing-net_device-reference-release.patch net-phy-marvell-limit-errata-to-88m1101.patch net-smc-add-warning-about-remote-memory-exposure.patch netem-fix-skb_orphan_partial.patch qmi_wwan-add-another-lenovo-em74xx-device-id.patch s390-qeth-add-missing-hash-table-initializations.patch s390-qeth-avoid-null-pointer-dereference-on-osn.patch s390-qeth-handle-sysfs-error-during-initialization.patch s390-qeth-unbreak-osm-and-osn-support.patch sctp-do-not-inherit-ipv6_-mc-ac-fl-_list-from-parent.patch sctp-fix-icmp-processing-if-skb-is-non-linear.patch sctp-fix-src-address-selection-if-using-secondary-addresses-for-ipv6.patch smc-switch-to-usage-of-ib_pd_unsafe_global_rkey.patch tcp-avoid-fastopen-api-to-be-used-on-af_unspec.patch tcp-avoid-fragmenting-peculiar-skbs-in-sack.patch tcp-eliminate-negative-reordering-in-tcp_clean_rtx_queue.patch tipc-make-macro-tipc_wait_for_cond-smp-safe.patch virtio-net-enable-tso-checksum-offloads-for-q-in-q-vlans.patch vlan-fix-tcp-checksum-offloads-in-q-in-q-vlans.patch --- diff --git a/queue-4.11/be2net-fix-offload-features-for-q-in-q-packets.patch b/queue-4.11/be2net-fix-offload-features-for-q-in-q-packets.patch new file mode 100644 index 00000000000..d8762123a27 --- /dev/null +++ b/queue-4.11/be2net-fix-offload-features-for-q-in-q-packets.patch @@ -0,0 +1,44 @@ +From foo@baz Wed May 31 09:13:10 JST 2017 +From: Vlad Yasevich +Date: Tue, 23 May 2017 13:38:42 -0400 +Subject: be2net: Fix offload features for Q-in-Q packets + +From: Vlad Yasevich + + +[ Upstream commit cc6e9de62a7f84c9293a2ea41bc412b55bb46e85 ] + +At least some of the be2net cards do not seem to be capabled +of performing checksum offload computions on Q-in-Q packets. +In these case, the recevied checksum on the remote is invalid +and TCP syn packets are dropped. + +This patch adds a call to check disbled acceleration features +on Q-in-Q tagged traffic. + +CC: Sathya Perla +CC: Ajit Khaparde +CC: Sriharsha Basavapatna +CC: Somnath Kotur +Signed-off-by: Vladislav Yasevich +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/emulex/benet/be_main.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/drivers/net/ethernet/emulex/benet/be_main.c ++++ b/drivers/net/ethernet/emulex/benet/be_main.c +@@ -5027,9 +5027,11 @@ static netdev_features_t be_features_che + struct be_adapter *adapter = netdev_priv(dev); + u8 l4_hdr = 0; + +- /* The code below restricts offload features for some tunneled packets. ++ /* The code below restricts offload features for some tunneled and ++ * Q-in-Q packets. + * Offload features for normal (non tunnel) packets are unchanged. + */ ++ features = vlan_features_check(skb, features); + if (!skb->encapsulation || + !(adapter->flags & BE_FLAGS_VXLAN_OFFLOADS)) + return features; diff --git a/queue-4.11/bonding-fix-accounting-of-active-ports-in-3ad.patch b/queue-4.11/bonding-fix-accounting-of-active-ports-in-3ad.patch new file mode 100644 index 00000000000..e69a4e69207 --- /dev/null +++ b/queue-4.11/bonding-fix-accounting-of-active-ports-in-3ad.patch @@ -0,0 +1,70 @@ +From foo@baz Wed May 31 09:13:10 JST 2017 +From: Jarod Wilson +Date: Fri, 19 May 2017 19:43:45 -0400 +Subject: bonding: fix accounting of active ports in 3ad + +From: Jarod Wilson + + +[ Upstream commit 751da2a69b7cc82d83dc310ed7606225f2d6e014 ] + +As of 7bb11dc9f59d and 0622cab0341c, bond slaves in a 3ad bond are not +removed from the aggregator when they are down, and the active slave count +is NOT equal to number of ports in the aggregator, but rather the number +of ports in the aggregator that are still enabled. The sysfs spew for +bonding_show_ad_num_ports() has a comment that says "Show number of active +802.3ad ports.", but it's currently showing total number of ports, both +active and inactive. Remedy it by using the same logic introduced in +0622cab0341c in __bond_3ad_get_active_agg_info(), so sysfs, procfs and +netlink all report the number of active ports. Note that this means that +IFLA_BOND_AD_INFO_NUM_PORTS really means NUM_ACTIVE_PORTS instead of +NUM_PORTS, and thus perhaps should be renamed for clarity. + +Lightly tested on a dual i40e lacp bond, simulating link downs with an ip +link set dev down, was able to produce the state where I could +see both in the same aggregator, but a number of ports count of 1. + +MII Status: up +Active Aggregator Info: + Aggregator ID: 1 + Number of ports: 2 <--- +Slave Interface: ens10 +MII Status: up <--- +Aggregator ID: 1 +Slave Interface: ens11 +MII Status: up +Aggregator ID: 1 + +MII Status: up +Active Aggregator Info: + Aggregator ID: 1 + Number of ports: 1 <--- +Slave Interface: ens10 +MII Status: down <--- +Aggregator ID: 1 +Slave Interface: ens11 +MII Status: up +Aggregator ID: 1 + +CC: Jay Vosburgh +CC: Veaceslav Falico +CC: Andy Gospodarek +CC: netdev@vger.kernel.org +Signed-off-by: Jarod Wilson +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/bonding/bond_3ad.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/net/bonding/bond_3ad.c ++++ b/drivers/net/bonding/bond_3ad.c +@@ -2573,7 +2573,7 @@ int __bond_3ad_get_active_agg_info(struc + return -1; + + ad_info->aggregator_id = aggregator->aggregator_identifier; +- ad_info->ports = aggregator->num_of_ports; ++ ad_info->ports = __agg_active_ports(aggregator); + ad_info->actor_key = aggregator->actor_oper_aggregator_key; + ad_info->partner_key = aggregator->partner_oper_aggregator_key; + ether_addr_copy(ad_info->partner_system, diff --git a/queue-4.11/bpf-add-bpf_clone_redirect-to-bpf_helper_changes_pkt_data.patch b/queue-4.11/bpf-add-bpf_clone_redirect-to-bpf_helper_changes_pkt_data.patch new file mode 100644 index 00000000000..5ba2ad50785 --- /dev/null +++ b/queue-4.11/bpf-add-bpf_clone_redirect-to-bpf_helper_changes_pkt_data.patch @@ -0,0 +1,34 @@ +From foo@baz Wed May 31 09:13:10 JST 2017 +From: Daniel Borkmann +Date: Thu, 25 May 2017 01:05:07 +0200 +Subject: bpf: add bpf_clone_redirect to bpf_helper_changes_pkt_data + +From: Daniel Borkmann + + +[ Upstream commit 41703a731066fde79c3e5ccf3391cf77a98aeda5 ] + +The bpf_clone_redirect() still needs to be listed in +bpf_helper_changes_pkt_data() since we call into +bpf_try_make_head_writable() from there, thus we need +to invalidate prior pkt regs as well. + +Fixes: 36bbef52c7eb ("bpf: direct packet write and access for helpers for clsact progs") +Signed-off-by: Daniel Borkmann +Acked-by: Alexei Starovoitov +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/filter.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/net/core/filter.c ++++ b/net/core/filter.c +@@ -2266,6 +2266,7 @@ bool bpf_helper_changes_pkt_data(void *f + func == bpf_skb_change_head || + func == bpf_skb_change_tail || + func == bpf_skb_pull_data || ++ func == bpf_clone_redirect || + func == bpf_l3_csum_replace || + func == bpf_l4_csum_replace || + func == bpf_xdp_adjust_head) diff --git a/queue-4.11/bpf-adjust-verifier-heuristics.patch b/queue-4.11/bpf-adjust-verifier-heuristics.patch new file mode 100644 index 00000000000..5d23050d7a0 --- /dev/null +++ b/queue-4.11/bpf-adjust-verifier-heuristics.patch @@ -0,0 +1,101 @@ +From foo@baz Wed May 31 09:13:10 JST 2017 +From: Daniel Borkmann +Date: Thu, 18 May 2017 03:00:06 +0200 +Subject: bpf: adjust verifier heuristics + +From: Daniel Borkmann + + +[ Upstream commit 3c2ce60bdd3d57051bf85615deec04a694473840 ] + +Current limits with regards to processing program paths do not +really reflect today's needs anymore due to programs becoming +more complex and verifier smarter, keeping track of more data +such as const ALU operations, alignment tracking, spilling of +PTR_TO_MAP_VALUE_ADJ registers, and other features allowing for +smarter matching of what LLVM generates. + +This also comes with the side-effect that we result in fewer +opportunities to prune search states and thus often need to do +more work to prove safety than in the past due to different +register states and stack layout where we mismatch. Generally, +it's quite hard to determine what caused a sudden increase in +complexity, it could be caused by something as trivial as a +single branch somewhere at the beginning of the program where +LLVM assigned a stack slot that is marked differently throughout +other branches and thus causing a mismatch, where verifier +then needs to prove safety for the whole rest of the program. +Subsequently, programs with even less than half the insn size +limit can get rejected. We noticed that while some programs +load fine under pre 4.11, they get rejected due to hitting +limits on more recent kernels. We saw that in the vast majority +of cases (90+%) pruning failed due to register mismatches. In +case of stack mismatches, majority of cases failed due to +different stack slot types (invalid, spill, misc) rather than +differences in spilled registers. + +This patch makes pruning more aggressive by also adding markers +that sit at conditional jumps as well. Currently, we only mark +jump targets for pruning. For example in direct packet access, +these are usually error paths where we bail out. We found that +adding these markers, it can reduce number of processed insns +by up to 30%. Another option is to ignore reg->id in probing +PTR_TO_MAP_VALUE_OR_NULL registers, which can help pruning +slightly as well by up to 7% observed complexity reduction as +stand-alone. Meaning, if a previous path with register type +PTR_TO_MAP_VALUE_OR_NULL for map X was found to be safe, then +in the current state a PTR_TO_MAP_VALUE_OR_NULL register for +the same map X must be safe as well. Last but not least the +patch also adds a scheduling point and bumps the current limit +for instructions to be processed to a more adequate value. + +Signed-off-by: Daniel Borkmann +Acked-by: Alexei Starovoitov +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + kernel/bpf/verifier.c | 12 +++++++++++- + 1 file changed, 11 insertions(+), 1 deletion(-) + +--- a/kernel/bpf/verifier.c ++++ b/kernel/bpf/verifier.c +@@ -140,7 +140,7 @@ struct bpf_verifier_stack_elem { + struct bpf_verifier_stack_elem *next; + }; + +-#define BPF_COMPLEXITY_LIMIT_INSNS 65536 ++#define BPF_COMPLEXITY_LIMIT_INSNS 98304 + #define BPF_COMPLEXITY_LIMIT_STACK 1024 + + struct bpf_call_arg_meta { +@@ -2546,6 +2546,7 @@ peek_stack: + env->explored_states[t + 1] = STATE_LIST_MARK; + } else { + /* conditional jump with two edges */ ++ env->explored_states[t] = STATE_LIST_MARK; + ret = push_insn(t, t + 1, FALLTHROUGH, env); + if (ret == 1) + goto peek_stack; +@@ -2704,6 +2705,12 @@ static bool states_equal(struct bpf_veri + rcur->type != NOT_INIT)) + continue; + ++ /* Don't care about the reg->id in this case. */ ++ if (rold->type == PTR_TO_MAP_VALUE_OR_NULL && ++ rcur->type == PTR_TO_MAP_VALUE_OR_NULL && ++ rold->map_ptr == rcur->map_ptr) ++ continue; ++ + if (rold->type == PTR_TO_PACKET && rcur->type == PTR_TO_PACKET && + compare_ptrs_to_packet(rold, rcur)) + continue; +@@ -2838,6 +2845,9 @@ static int do_check(struct bpf_verifier_ + goto process_bpf_exit; + } + ++ if (need_resched()) ++ cond_resched(); ++ + if (log_level && do_print_state) { + verbose("\nfrom %d to %d:", prev_insn_idx, insn_idx); + print_verifier_state(&env->cur_state); diff --git a/queue-4.11/bpf-arm64-fix-faulty-emission-of-map-access-in-tail-calls.patch b/queue-4.11/bpf-arm64-fix-faulty-emission-of-map-access-in-tail-calls.patch new file mode 100644 index 00000000000..39b988b06c6 --- /dev/null +++ b/queue-4.11/bpf-arm64-fix-faulty-emission-of-map-access-in-tail-calls.patch @@ -0,0 +1,73 @@ +From foo@baz Wed May 31 09:13:10 JST 2017 +From: Daniel Borkmann +Date: Thu, 11 May 2017 01:53:15 +0200 +Subject: bpf, arm64: fix faulty emission of map access in tail calls + +From: Daniel Borkmann + + +[ Upstream commit d8b54110ee944de522ccd3531191f39986ec20f9 ] + +Shubham was recently asking on netdev why in arm64 JIT we don't multiply +the index for accessing the tail call map by 8. That led me into testing +out arm64 JIT wrt tail calls and it turned out I got a NULL pointer +dereference on the tail call. + +The buggy access is at: + + prog = array->ptrs[index]; + if (prog == NULL) + goto out; + + [...] + 00000060: d2800e0a mov x10, #0x70 // #112 + 00000064: f86a682a ldr x10, [x1,x10] + 00000068: f862694b ldr x11, [x10,x2] + 0000006c: b40000ab cbz x11, 0x00000080 + [...] + +The code triggering the crash is f862694b. x1 at the time contains the +address of the bpf array, x10 offsetof(struct bpf_array, ptrs). Meaning, +above we load the pointer to the program at map slot 0 into x10. x10 +can then be NULL if the slot is not occupied, which we later on try to +access with a user given offset in x2 that is the map index. + +Fix this by emitting the following instead: + + [...] + 00000060: d2800e0a mov x10, #0x70 // #112 + 00000064: 8b0a002a add x10, x1, x10 + 00000068: d37df04b lsl x11, x2, #3 + 0000006c: f86b694b ldr x11, [x10,x11] + 00000070: b40000ab cbz x11, 0x00000084 + [...] + +This basically adds the offset to ptrs to the base address of the bpf +array we got and we later on access the map with an index * 8 offset +relative to that. The tail call map itself is basically one large area +with meta data at the head followed by the array of prog pointers. +This makes tail calls working again, tested on Cavium ThunderX ARMv8. + +Fixes: ddb55992b04d ("arm64: bpf: implement bpf_tail_call() helper") +Reported-by: Shubham Bansal +Signed-off-by: Daniel Borkmann +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + arch/arm64/net/bpf_jit_comp.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +--- a/arch/arm64/net/bpf_jit_comp.c ++++ b/arch/arm64/net/bpf_jit_comp.c +@@ -252,8 +252,9 @@ static int emit_bpf_tail_call(struct jit + */ + off = offsetof(struct bpf_array, ptrs); + emit_a64_mov_i64(tmp, off, ctx); +- emit(A64_LDR64(tmp, r2, tmp), ctx); +- emit(A64_LDR64(prg, tmp, r3), ctx); ++ emit(A64_ADD(1, tmp, r2, tmp), ctx); ++ emit(A64_LSL(1, prg, r3, 3), ctx); ++ emit(A64_LDR64(prg, tmp, prg), ctx); + emit(A64_CBZ(1, prg, jmp_offset), ctx); + + /* goto *(prog->bpf_func + prologue_size); */ diff --git a/queue-4.11/bpf-fix-wrong-exposure-of-map_flags-into-fdinfo-for-lpm.patch b/queue-4.11/bpf-fix-wrong-exposure-of-map_flags-into-fdinfo-for-lpm.patch new file mode 100644 index 00000000000..8da7e01c49c --- /dev/null +++ b/queue-4.11/bpf-fix-wrong-exposure-of-map_flags-into-fdinfo-for-lpm.patch @@ -0,0 +1,66 @@ +From foo@baz Wed May 31 09:13:10 JST 2017 +From: Daniel Borkmann +Date: Thu, 25 May 2017 01:05:08 +0200 +Subject: bpf: fix wrong exposure of map_flags into fdinfo for lpm + +From: Daniel Borkmann + + +[ Upstream commit a316338cb71a3260201490e615f2f6d5c0d8fb2c ] + +trie_alloc() always needs to have BPF_F_NO_PREALLOC passed in via +attr->map_flags, since it does not support preallocation yet. We +check the flag, but we never copy the flag into trie->map.map_flags, +which is later on exposed into fdinfo and used by loaders such as +iproute2. Latter uses this in bpf_map_selfcheck_pinned() to test +whether a pinned map has the same spec as the one from the BPF obj +file and if not, bails out, which is currently the case for lpm +since it exposes always 0 as flags. + +Also copy over flags in array_map_alloc() and stack_map_alloc(). +They always have to be 0 right now, but we should make sure to not +miss to copy them over at a later point in time when we add actual +flags for them to use. + +Fixes: b95a5c4db09b ("bpf: add a longest prefix match trie map implementation") +Reported-by: Jarno Rajahalme +Signed-off-by: Daniel Borkmann +Acked-by: Alexei Starovoitov +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + kernel/bpf/arraymap.c | 1 + + kernel/bpf/lpm_trie.c | 1 + + kernel/bpf/stackmap.c | 1 + + 3 files changed, 3 insertions(+) + +--- a/kernel/bpf/arraymap.c ++++ b/kernel/bpf/arraymap.c +@@ -83,6 +83,7 @@ static struct bpf_map *array_map_alloc(u + array->map.key_size = attr->key_size; + array->map.value_size = attr->value_size; + array->map.max_entries = attr->max_entries; ++ array->map.map_flags = attr->map_flags; + array->elem_size = elem_size; + + if (!percpu) +--- a/kernel/bpf/lpm_trie.c ++++ b/kernel/bpf/lpm_trie.c +@@ -432,6 +432,7 @@ static struct bpf_map *trie_alloc(union + trie->map.key_size = attr->key_size; + trie->map.value_size = attr->value_size; + trie->map.max_entries = attr->max_entries; ++ trie->map.map_flags = attr->map_flags; + trie->data_size = attr->key_size - + offsetof(struct bpf_lpm_trie_key, data); + trie->max_prefixlen = trie->data_size * 8; +--- a/kernel/bpf/stackmap.c ++++ b/kernel/bpf/stackmap.c +@@ -88,6 +88,7 @@ static struct bpf_map *stack_map_alloc(u + smap->map.key_size = attr->key_size; + smap->map.value_size = value_size; + smap->map.max_entries = attr->max_entries; ++ smap->map.map_flags = attr->map_flags; + smap->n_buckets = n_buckets; + smap->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT; + diff --git a/queue-4.11/bridge-netlink-check-vlan_default_pvid-range.patch b/queue-4.11/bridge-netlink-check-vlan_default_pvid-range.patch new file mode 100644 index 00000000000..54cfd02f24e --- /dev/null +++ b/queue-4.11/bridge-netlink-check-vlan_default_pvid-range.patch @@ -0,0 +1,53 @@ +From foo@baz Wed May 31 09:13:10 JST 2017 +From: Tobias Jungel +Date: Wed, 17 May 2017 09:29:12 +0200 +Subject: bridge: netlink: check vlan_default_pvid range + +From: Tobias Jungel + + +[ Upstream commit a285860211bf257b0e6d522dac6006794be348af ] + +Currently it is allowed to set the default pvid of a bridge to a value +above VLAN_VID_MASK (0xfff). This patch adds a check to br_validate and +returns -EINVAL in case the pvid is out of bounds. + +Reproduce by calling: + +[root@test ~]# ip l a type bridge +[root@test ~]# ip l a type dummy +[root@test ~]# ip l s bridge0 type bridge vlan_filtering 1 +[root@test ~]# ip l s bridge0 type bridge vlan_default_pvid 9999 +[root@test ~]# ip l s dummy0 master bridge0 +[root@test ~]# bridge vlan +port vlan ids +bridge0 9999 PVID Egress Untagged + +dummy0 9999 PVID Egress Untagged + +Fixes: 0f963b7592ef ("bridge: netlink: add support for default_pvid") +Acked-by: Nikolay Aleksandrov +Signed-off-by: Tobias Jungel +Acked-by: Sabrina Dubroca +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/bridge/br_netlink.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +--- a/net/bridge/br_netlink.c ++++ b/net/bridge/br_netlink.c +@@ -828,6 +828,13 @@ static int br_validate(struct nlattr *tb + return -EPROTONOSUPPORT; + } + } ++ ++ if (data[IFLA_BR_VLAN_DEFAULT_PVID]) { ++ __u16 defpvid = nla_get_u16(data[IFLA_BR_VLAN_DEFAULT_PVID]); ++ ++ if (defpvid >= VLAN_VID_MASK) ++ return -EINVAL; ++ } + #endif + + return 0; diff --git a/queue-4.11/bridge-start-hello_timer-when-enabling-kernel_stp-in-br_stp_start.patch b/queue-4.11/bridge-start-hello_timer-when-enabling-kernel_stp-in-br_stp_start.patch new file mode 100644 index 00000000000..56e883bd191 --- /dev/null +++ b/queue-4.11/bridge-start-hello_timer-when-enabling-kernel_stp-in-br_stp_start.patch @@ -0,0 +1,58 @@ +From foo@baz Wed May 31 09:13:10 JST 2017 +From: Xin Long +Date: Fri, 19 May 2017 22:20:29 +0800 +Subject: bridge: start hello_timer when enabling KERNEL_STP in br_stp_start + +From: Xin Long + + +[ Upstream commit 6d18c732b95c0a9d35e9f978b4438bba15412284 ] + +Since commit 76b91c32dd86 ("bridge: stp: when using userspace stp stop +kernel hello and hold timers"), bridge would not start hello_timer if +stp_enabled is not KERNEL_STP when br_dev_open. + +The problem is even if users set stp_enabled with KERNEL_STP later, +the timer will still not be started. It causes that KERNEL_STP can +not really work. Users have to re-ifup the bridge to avoid this. + +This patch is to fix it by starting br->hello_timer when enabling +KERNEL_STP in br_stp_start. + +As an improvement, it's also to start hello_timer again only when +br->stp_enabled is KERNEL_STP in br_hello_timer_expired, there is +no reason to start the timer again when it's NO_STP. + +Fixes: 76b91c32dd86 ("bridge: stp: when using userspace stp stop kernel hello and hold timers") +Reported-by: Haidong Li +Signed-off-by: Xin Long +Acked-by: Nikolay Aleksandrov +Reviewed-by: Ivan Vecera +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/bridge/br_stp_if.c | 1 + + net/bridge/br_stp_timer.c | 2 +- + 2 files changed, 2 insertions(+), 1 deletion(-) + +--- a/net/bridge/br_stp_if.c ++++ b/net/bridge/br_stp_if.c +@@ -179,6 +179,7 @@ static void br_stp_start(struct net_brid + br_debug(br, "using kernel STP\n"); + + /* To start timers on any ports left in blocking */ ++ mod_timer(&br->hello_timer, jiffies + br->hello_time); + br_port_state_selection(br); + } + +--- a/net/bridge/br_stp_timer.c ++++ b/net/bridge/br_stp_timer.c +@@ -40,7 +40,7 @@ static void br_hello_timer_expired(unsig + if (br->dev->flags & IFF_UP) { + br_config_bpdu_generation(br); + +- if (br->stp_enabled != BR_USER_STP) ++ if (br->stp_enabled == BR_KERNEL_STP) + mod_timer(&br->hello_timer, + round_jiffies(jiffies + br->hello_time)); + } diff --git a/queue-4.11/dccp-tcp-do-not-inherit-mc_list-from-parent.patch b/queue-4.11/dccp-tcp-do-not-inherit-mc_list-from-parent.patch new file mode 100644 index 00000000000..019de6ea596 --- /dev/null +++ b/queue-4.11/dccp-tcp-do-not-inherit-mc_list-from-parent.patch @@ -0,0 +1,42 @@ +From foo@baz Wed May 31 09:13:10 JST 2017 +From: Eric Dumazet +Date: Tue, 9 May 2017 06:29:19 -0700 +Subject: dccp/tcp: do not inherit mc_list from parent + +From: Eric Dumazet + + +[ Upstream commit 657831ffc38e30092a2d5f03d385d710eb88b09a ] + +syzkaller found a way to trigger double frees from ip_mc_drop_socket() + +It turns out that leave a copy of parent mc_list at accept() time, +which is very bad. + +Very similar to commit 8b485ce69876 ("tcp: do not inherit +fastopen_req from parent") + +Initial report from Pray3r, completed by Andrey one. +Thanks a lot to them ! + +Signed-off-by: Eric Dumazet +Reported-by: Pray3r +Reported-by: Andrey Konovalov +Tested-by: Andrey Konovalov +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/inet_connection_sock.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/net/ipv4/inet_connection_sock.c ++++ b/net/ipv4/inet_connection_sock.c +@@ -794,6 +794,8 @@ struct sock *inet_csk_clone_lock(const s + /* listeners have SOCK_RCU_FREE, not the children */ + sock_reset_flag(newsk, SOCK_RCU_FREE); + ++ inet_sk(newsk)->mc_list = NULL; ++ + newsk->sk_mark = inet_rsk(req)->ir_mark; + atomic64_set(&newsk->sk_cookie, + atomic64_read(&inet_rsk(req)->ir_cookie)); diff --git a/queue-4.11/driver-vrf-fix-one-possible-use-after-free-issue.patch b/queue-4.11/driver-vrf-fix-one-possible-use-after-free-issue.patch new file mode 100644 index 00000000000..20ffe7bff25 --- /dev/null +++ b/queue-4.11/driver-vrf-fix-one-possible-use-after-free-issue.patch @@ -0,0 +1,60 @@ +From foo@baz Wed May 31 09:13:10 JST 2017 +From: Gao Feng +Date: Tue, 9 May 2017 18:27:33 +0800 +Subject: driver: vrf: Fix one possible use-after-free issue + +From: Gao Feng + + +[ Upstream commit 1a4a5bf52a4adb477adb075e5afce925824ad132 ] + +The current codes only deal with the case that the skb is dropped, it +may meet one use-after-free issue when NF_HOOK returns 0 that means +the skb is stolen by one netfilter rule or hook. + +When one netfilter rule or hook stoles the skb and return NF_STOLEN, +it means the skb is taken by the rule, and other modules should not +touch this skb ever. Maybe the skb is queued or freed directly by the +rule. + +Now uses the nf_hook instead of NF_HOOK to get the result of netfilter, +and check the return value of nf_hook. Only when its value equals 1, it +means the skb could go ahead. Or reset the skb as NULL. + +BTW, because vrf_rcv_finish is empty function, so needn't invoke it +even though nf_hook returns 1. But we need to modify vrf_rcv_finish +to deal with the NF_STOLEN case. + +There are two cases when skb is stolen. +1. The skb is stolen and freed directly. + There is nothing we need to do, and vrf_rcv_finish isn't invoked. +2. The skb is queued and reinjected again. + The vrf_rcv_finish would be invoked as okfn, so need to free the + skb in it. + +Signed-off-by: Gao Feng +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/vrf.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/drivers/net/vrf.c ++++ b/drivers/net/vrf.c +@@ -851,6 +851,7 @@ static u32 vrf_fib_table(const struct ne + + static int vrf_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb) + { ++ kfree_skb(skb); + return 0; + } + +@@ -860,7 +861,7 @@ static struct sk_buff *vrf_rcv_nfhook(u8 + { + struct net *net = dev_net(dev); + +- if (NF_HOOK(pf, hook, net, NULL, skb, dev, NULL, vrf_rcv_finish) < 0) ++ if (nf_hook(pf, hook, net, NULL, skb, dev, NULL, vrf_rcv_finish) != 1) + skb = NULL; /* kfree_skb(skb) handled by nf code */ + + return skb; diff --git a/queue-4.11/geneve-fix-fill_info-when-using-collect_metadata.patch b/queue-4.11/geneve-fix-fill_info-when-using-collect_metadata.patch new file mode 100644 index 00000000000..46c9f1d5183 --- /dev/null +++ b/queue-4.11/geneve-fix-fill_info-when-using-collect_metadata.patch @@ -0,0 +1,59 @@ +From foo@baz Wed May 31 09:13:10 JST 2017 +From: Eric Garver +Date: Tue, 23 May 2017 18:37:27 -0400 +Subject: geneve: fix fill_info when using collect_metadata + +From: Eric Garver + + +[ Upstream commit 11387fe4a98f75d1f4cdb3efe3b42b19205c9df5 ] + +Since 9b4437a5b870 ("geneve: Unify LWT and netdev handling.") fill_info +does not return UDP_ZERO_CSUM6_RX when using COLLECT_METADATA. This is +because it uses ip_tunnel_info_af() with the device level info, which is +not valid for COLLECT_METADATA. + +Fix by checking for the presence of the actual sockets. + +Fixes: 9b4437a5b870 ("geneve: Unify LWT and netdev handling.") +Signed-off-by: Eric Garver +Acked-by: Pravin B Shelar +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/geneve.c | 8 +++++--- + 1 file changed, 5 insertions(+), 3 deletions(-) + +--- a/drivers/net/geneve.c ++++ b/drivers/net/geneve.c +@@ -1293,7 +1293,7 @@ static int geneve_fill_info(struct sk_bu + if (nla_put_u32(skb, IFLA_GENEVE_ID, vni)) + goto nla_put_failure; + +- if (ip_tunnel_info_af(info) == AF_INET) { ++ if (rtnl_dereference(geneve->sock4)) { + if (nla_put_in_addr(skb, IFLA_GENEVE_REMOTE, + info->key.u.ipv4.dst)) + goto nla_put_failure; +@@ -1302,8 +1302,10 @@ static int geneve_fill_info(struct sk_bu + !!(info->key.tun_flags & TUNNEL_CSUM))) + goto nla_put_failure; + ++ } ++ + #if IS_ENABLED(CONFIG_IPV6) +- } else { ++ if (rtnl_dereference(geneve->sock6)) { + if (nla_put_in6_addr(skb, IFLA_GENEVE_REMOTE6, + &info->key.u.ipv6.dst)) + goto nla_put_failure; +@@ -1315,8 +1317,8 @@ static int geneve_fill_info(struct sk_bu + if (nla_put_u8(skb, IFLA_GENEVE_UDP_ZERO_CSUM6_RX, + !geneve->use_udp6_rx_checksums)) + goto nla_put_failure; +-#endif + } ++#endif + + if (nla_put_u8(skb, IFLA_GENEVE_TTL, info->key.ttl) || + nla_put_u8(skb, IFLA_GENEVE_TOS, info->key.tos) || diff --git a/queue-4.11/ip6_tunnel-ip6_gre-fix-setting-of-dscp-on-encapsulated-packets.patch b/queue-4.11/ip6_tunnel-ip6_gre-fix-setting-of-dscp-on-encapsulated-packets.patch new file mode 100644 index 00000000000..aeb593f7522 --- /dev/null +++ b/queue-4.11/ip6_tunnel-ip6_gre-fix-setting-of-dscp-on-encapsulated-packets.patch @@ -0,0 +1,155 @@ +From foo@baz Wed May 31 09:13:10 JST 2017 +From: Peter Dawson +Date: Fri, 26 May 2017 06:35:18 +1000 +Subject: ip6_tunnel, ip6_gre: fix setting of DSCP on encapsulated packets + +From: Peter Dawson + + +[ Upstream commit 0e9a709560dbcfbace8bf4019dc5298619235891 ] + +This fix addresses two problems in the way the DSCP field is formulated + on the encapsulating header of IPv6 tunnels. +Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=195661 + +1) The IPv6 tunneling code was manipulating the DSCP field of the + encapsulating packet using the 32b flowlabel. Since the flowlabel is + only the lower 20b it was incorrect to assume that the upper 12b + containing the DSCP and ECN fields would remain intact when formulating + the encapsulating header. This fix handles the 'inherit' and + 'fixed-value' DSCP cases explicitly using the extant dsfield u8 variable. + +2) The use of INET_ECN_encapsulate(0, dsfield) in ip6_tnl_xmit was + incorrect and resulted in the DSCP value always being set to 0. + +Commit 90427ef5d2a4 ("ipv6: fix flow labels when the traffic class + is non-0") caused the regression by masking out the flowlabel + which exposed the incorrect handling of the DSCP portion of the + flowlabel in ip6_tunnel and ip6_gre. + +Fixes: 90427ef5d2a4 ("ipv6: fix flow labels when the traffic class is non-0") +Signed-off-by: Peter Dawson +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/ip6_gre.c | 13 +++++++------ + net/ipv6/ip6_tunnel.c | 21 +++++++++++++-------- + 2 files changed, 20 insertions(+), 14 deletions(-) + +--- a/net/ipv6/ip6_gre.c ++++ b/net/ipv6/ip6_gre.c +@@ -537,11 +537,10 @@ static inline int ip6gre_xmit_ipv4(struc + + memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6)); + +- dsfield = ipv4_get_dsfield(iph); +- + if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS) +- fl6.flowlabel |= htonl((__u32)iph->tos << IPV6_TCLASS_SHIFT) +- & IPV6_TCLASS_MASK; ++ dsfield = ipv4_get_dsfield(iph); ++ else ++ dsfield = ip6_tclass(t->parms.flowinfo); + if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK) + fl6.flowi6_mark = skb->mark; + +@@ -596,9 +595,11 @@ static inline int ip6gre_xmit_ipv6(struc + + memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6)); + +- dsfield = ipv6_get_dsfield(ipv6h); + if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS) +- fl6.flowlabel |= (*(__be32 *) ipv6h & IPV6_TCLASS_MASK); ++ dsfield = ipv6_get_dsfield(ipv6h); ++ else ++ dsfield = ip6_tclass(t->parms.flowinfo); ++ + if (t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL) + fl6.flowlabel |= ip6_flowlabel(ipv6h); + if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK) +--- a/net/ipv6/ip6_tunnel.c ++++ b/net/ipv6/ip6_tunnel.c +@@ -1196,7 +1196,7 @@ route_lookup: + skb_push(skb, sizeof(struct ipv6hdr)); + skb_reset_network_header(skb); + ipv6h = ipv6_hdr(skb); +- ip6_flow_hdr(ipv6h, INET_ECN_encapsulate(0, dsfield), ++ ip6_flow_hdr(ipv6h, dsfield, + ip6_make_flowlabel(net, skb, fl6->flowlabel, true, fl6)); + ipv6h->hop_limit = hop_limit; + ipv6h->nexthdr = proto; +@@ -1231,8 +1231,6 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, str + if (tproto != IPPROTO_IPIP && tproto != 0) + return -1; + +- dsfield = ipv4_get_dsfield(iph); +- + if (t->parms.collect_md) { + struct ip_tunnel_info *tun_info; + const struct ip_tunnel_key *key; +@@ -1246,6 +1244,7 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, str + fl6.flowi6_proto = IPPROTO_IPIP; + fl6.daddr = key->u.ipv6.dst; + fl6.flowlabel = key->label; ++ dsfield = ip6_tclass(key->label); + } else { + if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) + encap_limit = t->parms.encap_limit; +@@ -1254,8 +1253,9 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, str + fl6.flowi6_proto = IPPROTO_IPIP; + + if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS) +- fl6.flowlabel |= htonl((__u32)iph->tos << IPV6_TCLASS_SHIFT) +- & IPV6_TCLASS_MASK; ++ dsfield = ipv4_get_dsfield(iph); ++ else ++ dsfield = ip6_tclass(t->parms.flowinfo); + if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK) + fl6.flowi6_mark = skb->mark; + } +@@ -1265,6 +1265,8 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, str + if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6)) + return -1; + ++ dsfield = INET_ECN_encapsulate(dsfield, ipv4_get_dsfield(iph)); ++ + skb_set_inner_ipproto(skb, IPPROTO_IPIP); + + err = ip6_tnl_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu, +@@ -1298,8 +1300,6 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, str + ip6_tnl_addr_conflict(t, ipv6h)) + return -1; + +- dsfield = ipv6_get_dsfield(ipv6h); +- + if (t->parms.collect_md) { + struct ip_tunnel_info *tun_info; + const struct ip_tunnel_key *key; +@@ -1313,6 +1313,7 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, str + fl6.flowi6_proto = IPPROTO_IPV6; + fl6.daddr = key->u.ipv6.dst; + fl6.flowlabel = key->label; ++ dsfield = ip6_tclass(key->label); + } else { + offset = ip6_tnl_parse_tlv_enc_lim(skb, skb_network_header(skb)); + /* ip6_tnl_parse_tlv_enc_lim() might have reallocated skb->head */ +@@ -1335,7 +1336,9 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, str + fl6.flowi6_proto = IPPROTO_IPV6; + + if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS) +- fl6.flowlabel |= (*(__be32 *)ipv6h & IPV6_TCLASS_MASK); ++ dsfield = ipv6_get_dsfield(ipv6h); ++ else ++ dsfield = ip6_tclass(t->parms.flowinfo); + if (t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL) + fl6.flowlabel |= ip6_flowlabel(ipv6h); + if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK) +@@ -1347,6 +1350,8 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, str + if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6)) + return -1; + ++ dsfield = INET_ECN_encapsulate(dsfield, ipv6_get_dsfield(ipv6h)); ++ + skb_set_inner_ipproto(skb, IPPROTO_IPV6); + + err = ip6_tnl_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu, diff --git a/queue-4.11/ipv4-add-reference-counting-to-metrics.patch b/queue-4.11/ipv4-add-reference-counting-to-metrics.patch new file mode 100644 index 00000000000..f5a74018822 --- /dev/null +++ b/queue-4.11/ipv4-add-reference-counting-to-metrics.patch @@ -0,0 +1,254 @@ +From foo@baz Wed May 31 09:13:10 JST 2017 +From: Eric Dumazet +Date: Thu, 25 May 2017 14:27:35 -0700 +Subject: ipv4: add reference counting to metrics + +From: Eric Dumazet + + +[ Upstream commit 3fb07daff8e99243366a081e5129560734de4ada ] + +Andrey Konovalov reported crashes in ipv4_mtu() + +I could reproduce the issue with KASAN kernels, between +10.246.7.151 and 10.246.7.152 : + +1) 20 concurrent netperf -t TCP_RR -H 10.246.7.152 -l 1000 & + +2) At the same time run following loop : +while : +do + ip ro add 10.246.7.152 dev eth0 src 10.246.7.151 mtu 1500 + ip ro del 10.246.7.152 dev eth0 src 10.246.7.151 mtu 1500 +done + +Cong Wang attempted to add back rt->fi in commit +82486aa6f1b9 ("ipv4: restore rt->fi for reference counting") +but this proved to add some issues that were complex to solve. + +Instead, I suggested to add a refcount to the metrics themselves, +being a standalone object (in particular, no reference to other objects) + +I tried to make this patch as small as possible to ease its backport, +instead of being super clean. Note that we believe that only ipv4 dst +need to take care of the metric refcount. But if this is wrong, +this patch adds the basic infrastructure to extend this to other +families. + +Many thanks to Julian Anastasov for reviewing this patch, and Cong Wang +for his efforts on this problem. + +Fixes: 2860583fe840 ("ipv4: Kill rt->fi") +Signed-off-by: Eric Dumazet +Reported-by: Andrey Konovalov +Reviewed-by: Julian Anastasov +Acked-by: Cong Wang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/dst.h | 8 +++++++- + include/net/ip_fib.h | 10 +++++----- + net/core/dst.c | 23 ++++++++++++++--------- + net/ipv4/fib_semantics.c | 17 ++++++++++------- + net/ipv4/route.c | 10 +++++++++- + 5 files changed, 45 insertions(+), 23 deletions(-) + +--- a/include/net/dst.h ++++ b/include/net/dst.h +@@ -107,10 +107,16 @@ struct dst_entry { + }; + }; + ++struct dst_metrics { ++ u32 metrics[RTAX_MAX]; ++ atomic_t refcnt; ++}; ++extern const struct dst_metrics dst_default_metrics; ++ + u32 *dst_cow_metrics_generic(struct dst_entry *dst, unsigned long old); +-extern const u32 dst_default_metrics[]; + + #define DST_METRICS_READ_ONLY 0x1UL ++#define DST_METRICS_REFCOUNTED 0x2UL + #define DST_METRICS_FLAGS 0x3UL + #define __DST_METRICS_PTR(Y) \ + ((u32 *)((Y) & ~DST_METRICS_FLAGS)) +--- a/include/net/ip_fib.h ++++ b/include/net/ip_fib.h +@@ -114,11 +114,11 @@ struct fib_info { + __be32 fib_prefsrc; + u32 fib_tb_id; + u32 fib_priority; +- u32 *fib_metrics; +-#define fib_mtu fib_metrics[RTAX_MTU-1] +-#define fib_window fib_metrics[RTAX_WINDOW-1] +-#define fib_rtt fib_metrics[RTAX_RTT-1] +-#define fib_advmss fib_metrics[RTAX_ADVMSS-1] ++ struct dst_metrics *fib_metrics; ++#define fib_mtu fib_metrics->metrics[RTAX_MTU-1] ++#define fib_window fib_metrics->metrics[RTAX_WINDOW-1] ++#define fib_rtt fib_metrics->metrics[RTAX_RTT-1] ++#define fib_advmss fib_metrics->metrics[RTAX_ADVMSS-1] + int fib_nhs; + #ifdef CONFIG_IP_ROUTE_MULTIPATH + int fib_weight; +--- a/net/core/dst.c ++++ b/net/core/dst.c +@@ -151,13 +151,13 @@ int dst_discard_out(struct net *net, str + } + EXPORT_SYMBOL(dst_discard_out); + +-const u32 dst_default_metrics[RTAX_MAX + 1] = { ++const struct dst_metrics dst_default_metrics = { + /* This initializer is needed to force linker to place this variable + * into const section. Otherwise it might end into bss section. + * We really want to avoid false sharing on this variable, and catch + * any writes on it. + */ +- [RTAX_MAX] = 0xdeadbeef, ++ .refcnt = ATOMIC_INIT(1), + }; + + void dst_init(struct dst_entry *dst, struct dst_ops *ops, +@@ -169,7 +169,7 @@ void dst_init(struct dst_entry *dst, str + if (dev) + dev_hold(dev); + dst->ops = ops; +- dst_init_metrics(dst, dst_default_metrics, true); ++ dst_init_metrics(dst, dst_default_metrics.metrics, true); + dst->expires = 0UL; + dst->path = dst; + dst->from = NULL; +@@ -314,25 +314,30 @@ EXPORT_SYMBOL(dst_release); + + u32 *dst_cow_metrics_generic(struct dst_entry *dst, unsigned long old) + { +- u32 *p = kmalloc(sizeof(u32) * RTAX_MAX, GFP_ATOMIC); ++ struct dst_metrics *p = kmalloc(sizeof(*p), GFP_ATOMIC); + + if (p) { +- u32 *old_p = __DST_METRICS_PTR(old); ++ struct dst_metrics *old_p = (struct dst_metrics *)__DST_METRICS_PTR(old); + unsigned long prev, new; + +- memcpy(p, old_p, sizeof(u32) * RTAX_MAX); ++ atomic_set(&p->refcnt, 1); ++ memcpy(p->metrics, old_p->metrics, sizeof(p->metrics)); + + new = (unsigned long) p; + prev = cmpxchg(&dst->_metrics, old, new); + + if (prev != old) { + kfree(p); +- p = __DST_METRICS_PTR(prev); ++ p = (struct dst_metrics *)__DST_METRICS_PTR(prev); + if (prev & DST_METRICS_READ_ONLY) + p = NULL; ++ } else if (prev & DST_METRICS_REFCOUNTED) { ++ if (atomic_dec_and_test(&old_p->refcnt)) ++ kfree(old_p); + } + } +- return p; ++ BUILD_BUG_ON(offsetof(struct dst_metrics, metrics) != 0); ++ return (u32 *)p; + } + EXPORT_SYMBOL(dst_cow_metrics_generic); + +@@ -341,7 +346,7 @@ void __dst_destroy_metrics_generic(struc + { + unsigned long prev, new; + +- new = ((unsigned long) dst_default_metrics) | DST_METRICS_READ_ONLY; ++ new = ((unsigned long) &dst_default_metrics) | DST_METRICS_READ_ONLY; + prev = cmpxchg(&dst->_metrics, old, new); + if (prev == old) + kfree(__DST_METRICS_PTR(old)); +--- a/net/ipv4/fib_semantics.c ++++ b/net/ipv4/fib_semantics.c +@@ -204,6 +204,7 @@ static void rt_fibinfo_free_cpus(struct + static void free_fib_info_rcu(struct rcu_head *head) + { + struct fib_info *fi = container_of(head, struct fib_info, rcu); ++ struct dst_metrics *m; + + change_nexthops(fi) { + if (nexthop_nh->nh_dev) +@@ -214,8 +215,9 @@ static void free_fib_info_rcu(struct rcu + rt_fibinfo_free(&nexthop_nh->nh_rth_input); + } endfor_nexthops(fi); + +- if (fi->fib_metrics != (u32 *) dst_default_metrics) +- kfree(fi->fib_metrics); ++ m = fi->fib_metrics; ++ if (m != &dst_default_metrics && atomic_dec_and_test(&m->refcnt)) ++ kfree(m); + kfree(fi); + } + +@@ -975,11 +977,11 @@ fib_convert_metrics(struct fib_info *fi, + val = 255; + if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK)) + return -EINVAL; +- fi->fib_metrics[type - 1] = val; ++ fi->fib_metrics->metrics[type - 1] = val; + } + + if (ecn_ca) +- fi->fib_metrics[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA; ++ fi->fib_metrics->metrics[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA; + + return 0; + } +@@ -1037,11 +1039,12 @@ struct fib_info *fib_create_info(struct + goto failure; + fib_info_cnt++; + if (cfg->fc_mx) { +- fi->fib_metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL); ++ fi->fib_metrics = kzalloc(sizeof(*fi->fib_metrics), GFP_KERNEL); + if (!fi->fib_metrics) + goto failure; ++ atomic_set(&fi->fib_metrics->refcnt, 1); + } else +- fi->fib_metrics = (u32 *) dst_default_metrics; ++ fi->fib_metrics = (struct dst_metrics *)&dst_default_metrics; + + fi->fib_net = net; + fi->fib_protocol = cfg->fc_protocol; +@@ -1242,7 +1245,7 @@ int fib_dump_info(struct sk_buff *skb, u + if (fi->fib_priority && + nla_put_u32(skb, RTA_PRIORITY, fi->fib_priority)) + goto nla_put_failure; +- if (rtnetlink_put_metrics(skb, fi->fib_metrics) < 0) ++ if (rtnetlink_put_metrics(skb, fi->fib_metrics->metrics) < 0) + goto nla_put_failure; + + if (fi->fib_prefsrc && +--- a/net/ipv4/route.c ++++ b/net/ipv4/route.c +@@ -1389,8 +1389,12 @@ static void rt_add_uncached_list(struct + + static void ipv4_dst_destroy(struct dst_entry *dst) + { ++ struct dst_metrics *p = (struct dst_metrics *)DST_METRICS_PTR(dst); + struct rtable *rt = (struct rtable *) dst; + ++ if (p != &dst_default_metrics && atomic_dec_and_test(&p->refcnt)) ++ kfree(p); ++ + if (!list_empty(&rt->rt_uncached)) { + struct uncached_list *ul = rt->rt_uncached_list; + +@@ -1442,7 +1446,11 @@ static void rt_set_nexthop(struct rtable + rt->rt_gateway = nh->nh_gw; + rt->rt_uses_gateway = 1; + } +- dst_init_metrics(&rt->dst, fi->fib_metrics, true); ++ dst_init_metrics(&rt->dst, fi->fib_metrics->metrics, true); ++ if (fi->fib_metrics != &dst_default_metrics) { ++ rt->dst._metrics |= DST_METRICS_REFCOUNTED; ++ atomic_inc(&fi->fib_metrics->refcnt); ++ } + #ifdef CONFIG_IP_ROUTE_CLASSID + rt->dst.tclassid = nh->nh_tclassid; + #endif diff --git a/queue-4.11/ipv6-check-ip6_find_1stfragopt-return-value-properly.patch b/queue-4.11/ipv6-check-ip6_find_1stfragopt-return-value-properly.patch new file mode 100644 index 00000000000..32f014e6d09 --- /dev/null +++ b/queue-4.11/ipv6-check-ip6_find_1stfragopt-return-value-properly.patch @@ -0,0 +1,89 @@ +From foo@baz Wed May 31 09:13:10 JST 2017 +From: "David S. Miller" +Date: Wed, 17 May 2017 22:54:11 -0400 +Subject: ipv6: Check ip6_find_1stfragopt() return value properly. + +From: "David S. Miller" + + +[ Upstream commit 7dd7eb9513bd02184d45f000ab69d78cb1fa1531 ] + +Do not use unsigned variables to see if it returns a negative +error or not. + +Fixes: 2423496af35d ("ipv6: Prevent overrun when parsing v6 header options") +Reported-by: Julia Lawall +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/ip6_offload.c | 9 ++++----- + net/ipv6/ip6_output.c | 7 +++---- + net/ipv6/udp_offload.c | 8 +++++--- + 3 files changed, 12 insertions(+), 12 deletions(-) + +--- a/net/ipv6/ip6_offload.c ++++ b/net/ipv6/ip6_offload.c +@@ -63,7 +63,6 @@ static struct sk_buff *ipv6_gso_segment( + const struct net_offload *ops; + int proto; + struct frag_hdr *fptr; +- unsigned int unfrag_ip6hlen; + unsigned int payload_len; + u8 *prevhdr; + int offset = 0; +@@ -116,10 +115,10 @@ static struct sk_buff *ipv6_gso_segment( + skb->network_header = (u8 *)ipv6h - skb->head; + + if (udpfrag) { +- unfrag_ip6hlen = ip6_find_1stfragopt(skb, &prevhdr); +- if (unfrag_ip6hlen < 0) +- return ERR_PTR(unfrag_ip6hlen); +- fptr = (struct frag_hdr *)((u8 *)ipv6h + unfrag_ip6hlen); ++ int err = ip6_find_1stfragopt(skb, &prevhdr); ++ if (err < 0) ++ return ERR_PTR(err); ++ fptr = (struct frag_hdr *)((u8 *)ipv6h + err); + fptr->frag_off = htons(offset); + if (skb->next) + fptr->frag_off |= htons(IP6_MF); +--- a/net/ipv6/ip6_output.c ++++ b/net/ipv6/ip6_output.c +@@ -597,11 +597,10 @@ int ip6_fragment(struct net *net, struct + int ptr, offset = 0, err = 0; + u8 *prevhdr, nexthdr = 0; + +- hlen = ip6_find_1stfragopt(skb, &prevhdr); +- if (hlen < 0) { +- err = hlen; ++ err = ip6_find_1stfragopt(skb, &prevhdr); ++ if (err < 0) + goto fail; +- } ++ hlen = err; + nexthdr = *prevhdr; + + mtu = ip6_skb_dst_mtu(skb); +--- a/net/ipv6/udp_offload.c ++++ b/net/ipv6/udp_offload.c +@@ -29,6 +29,7 @@ static struct sk_buff *udp6_ufo_fragment + u8 frag_hdr_sz = sizeof(struct frag_hdr); + __wsum csum; + int tnl_hlen; ++ int err; + + mss = skb_shinfo(skb)->gso_size; + if (unlikely(skb->len <= mss)) +@@ -90,9 +91,10 @@ static struct sk_buff *udp6_ufo_fragment + /* Find the unfragmentable header and shift it left by frag_hdr_sz + * bytes to insert fragment header. + */ +- unfrag_ip6hlen = ip6_find_1stfragopt(skb, &prevhdr); +- if (unfrag_ip6hlen < 0) +- return ERR_PTR(unfrag_ip6hlen); ++ err = ip6_find_1stfragopt(skb, &prevhdr); ++ if (err < 0) ++ return ERR_PTR(err); ++ unfrag_ip6hlen = err; + nexthdr = *prevhdr; + *prevhdr = NEXTHDR_FRAGMENT; + unfrag_len = (skb_network_header(skb) - skb_mac_header(skb)) + diff --git a/queue-4.11/ipv6-dccp-do-not-inherit-ipv6_mc_list-from-parent.patch b/queue-4.11/ipv6-dccp-do-not-inherit-ipv6_mc_list-from-parent.patch new file mode 100644 index 00000000000..579df45c526 --- /dev/null +++ b/queue-4.11/ipv6-dccp-do-not-inherit-ipv6_mc_list-from-parent.patch @@ -0,0 +1,63 @@ +From foo@baz Wed May 31 09:13:10 JST 2017 +From: WANG Cong +Date: Tue, 9 May 2017 16:59:54 -0700 +Subject: ipv6/dccp: do not inherit ipv6_mc_list from parent + +From: WANG Cong + + +[ Upstream commit 83eaddab4378db256d00d295bda6ca997cd13a52 ] + +Like commit 657831ffc38e ("dccp/tcp: do not inherit mc_list from parent") +we should clear ipv6_mc_list etc. for IPv6 sockets too. + +Cc: Eric Dumazet +Signed-off-by: Cong Wang +Acked-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/dccp/ipv6.c | 6 ++++++ + net/ipv6/tcp_ipv6.c | 2 ++ + 2 files changed, 8 insertions(+) + +--- a/net/dccp/ipv6.c ++++ b/net/dccp/ipv6.c +@@ -426,6 +426,9 @@ static struct sock *dccp_v6_request_recv + newsk->sk_backlog_rcv = dccp_v4_do_rcv; + newnp->pktoptions = NULL; + newnp->opt = NULL; ++ newnp->ipv6_mc_list = NULL; ++ newnp->ipv6_ac_list = NULL; ++ newnp->ipv6_fl_list = NULL; + newnp->mcast_oif = inet6_iif(skb); + newnp->mcast_hops = ipv6_hdr(skb)->hop_limit; + +@@ -490,6 +493,9 @@ static struct sock *dccp_v6_request_recv + /* Clone RX bits */ + newnp->rxopt.all = np->rxopt.all; + ++ newnp->ipv6_mc_list = NULL; ++ newnp->ipv6_ac_list = NULL; ++ newnp->ipv6_fl_list = NULL; + newnp->pktoptions = NULL; + newnp->opt = NULL; + newnp->mcast_oif = inet6_iif(skb); +--- a/net/ipv6/tcp_ipv6.c ++++ b/net/ipv6/tcp_ipv6.c +@@ -1070,6 +1070,7 @@ static struct sock *tcp_v6_syn_recv_sock + newtp->af_specific = &tcp_sock_ipv6_mapped_specific; + #endif + ++ newnp->ipv6_mc_list = NULL; + newnp->ipv6_ac_list = NULL; + newnp->ipv6_fl_list = NULL; + newnp->pktoptions = NULL; +@@ -1139,6 +1140,7 @@ static struct sock *tcp_v6_syn_recv_sock + First: no IPv4 options. + */ + newinet->inet_opt = NULL; ++ newnp->ipv6_mc_list = NULL; + newnp->ipv6_ac_list = NULL; + newnp->ipv6_fl_list = NULL; + diff --git a/queue-4.11/ipv6-fix-out-of-bound-writes-in-__ip6_append_data.patch b/queue-4.11/ipv6-fix-out-of-bound-writes-in-__ip6_append_data.patch new file mode 100644 index 00000000000..825cacdc9b7 --- /dev/null +++ b/queue-4.11/ipv6-fix-out-of-bound-writes-in-__ip6_append_data.patch @@ -0,0 +1,67 @@ +From foo@baz Wed May 31 09:13:10 JST 2017 +From: Eric Dumazet +Date: Fri, 19 May 2017 14:17:48 -0700 +Subject: ipv6: fix out of bound writes in __ip6_append_data() + +From: Eric Dumazet + + +[ Upstream commit 232cd35d0804cc241eb887bb8d4d9b3b9881c64a ] + +Andrey Konovalov and idaifish@gmail.com reported crashes caused by +one skb shared_info being overwritten from __ip6_append_data() + +Andrey program lead to following state : + +copy -4200 datalen 2000 fraglen 2040 +maxfraglen 2040 alloclen 2048 transhdrlen 0 offset 0 fraggap 6200 + +The skb_copy_and_csum_bits(skb_prev, maxfraglen, data + transhdrlen, +fraggap, 0); is overwriting skb->head and skb_shared_info + +Since we apparently detect this rare condition too late, move the +code earlier to even avoid allocating skb and risking crashes. + +Once again, many thanks to Andrey and syzkaller team. + +Signed-off-by: Eric Dumazet +Reported-by: Andrey Konovalov +Tested-by: Andrey Konovalov +Reported-by: +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/ip6_output.c | 15 ++++++++------- + 1 file changed, 8 insertions(+), 7 deletions(-) + +--- a/net/ipv6/ip6_output.c ++++ b/net/ipv6/ip6_output.c +@@ -1466,6 +1466,11 @@ alloc_new_skb: + */ + alloclen += sizeof(struct frag_hdr); + ++ copy = datalen - transhdrlen - fraggap; ++ if (copy < 0) { ++ err = -EINVAL; ++ goto error; ++ } + if (transhdrlen) { + skb = sock_alloc_send_skb(sk, + alloclen + hh_len, +@@ -1515,13 +1520,9 @@ alloc_new_skb: + data += fraggap; + pskb_trim_unique(skb_prev, maxfraglen); + } +- copy = datalen - transhdrlen - fraggap; +- +- if (copy < 0) { +- err = -EINVAL; +- kfree_skb(skb); +- goto error; +- } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) { ++ if (copy > 0 && ++ getfrag(from, data + transhdrlen, offset, ++ copy, fraggap, skb) < 0) { + err = -EFAULT; + kfree_skb(skb); + goto error; diff --git a/queue-4.11/ipv6-prevent-overrun-when-parsing-v6-header-options.patch b/queue-4.11/ipv6-prevent-overrun-when-parsing-v6-header-options.patch new file mode 100644 index 00000000000..a0251613545 --- /dev/null +++ b/queue-4.11/ipv6-prevent-overrun-when-parsing-v6-header-options.patch @@ -0,0 +1,226 @@ +From foo@baz Wed May 31 09:13:10 JST 2017 +From: Craig Gallek +Date: Tue, 16 May 2017 14:36:23 -0400 +Subject: ipv6: Prevent overrun when parsing v6 header options + +From: Craig Gallek + + +[ Upstream commit 2423496af35d94a87156b063ea5cedffc10a70a1 ] + +The KASAN warning repoted below was discovered with a syzkaller +program. The reproducer is basically: + int s = socket(AF_INET6, SOCK_RAW, NEXTHDR_HOP); + send(s, &one_byte_of_data, 1, MSG_MORE); + send(s, &more_than_mtu_bytes_data, 2000, 0); + +The socket() call sets the nexthdr field of the v6 header to +NEXTHDR_HOP, the first send call primes the payload with a non zero +byte of data, and the second send call triggers the fragmentation path. + +The fragmentation code tries to parse the header options in order +to figure out where to insert the fragment option. Since nexthdr points +to an invalid option, the calculation of the size of the network header +can made to be much larger than the linear section of the skb and data +is read outside of it. + +This fix makes ip6_find_1stfrag return an error if it detects +running out-of-bounds. + +[ 42.361487] ================================================================== +[ 42.364412] BUG: KASAN: slab-out-of-bounds in ip6_fragment+0x11c8/0x3730 +[ 42.365471] Read of size 840 at addr ffff88000969e798 by task ip6_fragment-oo/3789 +[ 42.366469] +[ 42.366696] CPU: 1 PID: 3789 Comm: ip6_fragment-oo Not tainted 4.11.0+ #41 +[ 42.367628] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.1-1ubuntu1 04/01/2014 +[ 42.368824] Call Trace: +[ 42.369183] dump_stack+0xb3/0x10b +[ 42.369664] print_address_description+0x73/0x290 +[ 42.370325] kasan_report+0x252/0x370 +[ 42.370839] ? ip6_fragment+0x11c8/0x3730 +[ 42.371396] check_memory_region+0x13c/0x1a0 +[ 42.371978] memcpy+0x23/0x50 +[ 42.372395] ip6_fragment+0x11c8/0x3730 +[ 42.372920] ? nf_ct_expect_unregister_notifier+0x110/0x110 +[ 42.373681] ? ip6_copy_metadata+0x7f0/0x7f0 +[ 42.374263] ? ip6_forward+0x2e30/0x2e30 +[ 42.374803] ip6_finish_output+0x584/0x990 +[ 42.375350] ip6_output+0x1b7/0x690 +[ 42.375836] ? ip6_finish_output+0x990/0x990 +[ 42.376411] ? ip6_fragment+0x3730/0x3730 +[ 42.376968] ip6_local_out+0x95/0x160 +[ 42.377471] ip6_send_skb+0xa1/0x330 +[ 42.377969] ip6_push_pending_frames+0xb3/0xe0 +[ 42.378589] rawv6_sendmsg+0x2051/0x2db0 +[ 42.379129] ? rawv6_bind+0x8b0/0x8b0 +[ 42.379633] ? _copy_from_user+0x84/0xe0 +[ 42.380193] ? debug_check_no_locks_freed+0x290/0x290 +[ 42.380878] ? ___sys_sendmsg+0x162/0x930 +[ 42.381427] ? rcu_read_lock_sched_held+0xa3/0x120 +[ 42.382074] ? sock_has_perm+0x1f6/0x290 +[ 42.382614] ? ___sys_sendmsg+0x167/0x930 +[ 42.383173] ? lock_downgrade+0x660/0x660 +[ 42.383727] inet_sendmsg+0x123/0x500 +[ 42.384226] ? inet_sendmsg+0x123/0x500 +[ 42.384748] ? inet_recvmsg+0x540/0x540 +[ 42.385263] sock_sendmsg+0xca/0x110 +[ 42.385758] SYSC_sendto+0x217/0x380 +[ 42.386249] ? SYSC_connect+0x310/0x310 +[ 42.386783] ? __might_fault+0x110/0x1d0 +[ 42.387324] ? lock_downgrade+0x660/0x660 +[ 42.387880] ? __fget_light+0xa1/0x1f0 +[ 42.388403] ? __fdget+0x18/0x20 +[ 42.388851] ? sock_common_setsockopt+0x95/0xd0 +[ 42.389472] ? SyS_setsockopt+0x17f/0x260 +[ 42.390021] ? entry_SYSCALL_64_fastpath+0x5/0xbe +[ 42.390650] SyS_sendto+0x40/0x50 +[ 42.391103] entry_SYSCALL_64_fastpath+0x1f/0xbe +[ 42.391731] RIP: 0033:0x7fbbb711e383 +[ 42.392217] RSP: 002b:00007ffff4d34f28 EFLAGS: 00000246 ORIG_RAX: 000000000000002c +[ 42.393235] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007fbbb711e383 +[ 42.394195] RDX: 0000000000001000 RSI: 00007ffff4d34f60 RDI: 0000000000000003 +[ 42.395145] RBP: 0000000000000046 R08: 00007ffff4d34f40 R09: 0000000000000018 +[ 42.396056] R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000400aad +[ 42.396598] R13: 0000000000000066 R14: 00007ffff4d34ee0 R15: 00007fbbb717af00 +[ 42.397257] +[ 42.397411] Allocated by task 3789: +[ 42.397702] save_stack_trace+0x16/0x20 +[ 42.398005] save_stack+0x46/0xd0 +[ 42.398267] kasan_kmalloc+0xad/0xe0 +[ 42.398548] kasan_slab_alloc+0x12/0x20 +[ 42.398848] __kmalloc_node_track_caller+0xcb/0x380 +[ 42.399224] __kmalloc_reserve.isra.32+0x41/0xe0 +[ 42.399654] __alloc_skb+0xf8/0x580 +[ 42.400003] sock_wmalloc+0xab/0xf0 +[ 42.400346] __ip6_append_data.isra.41+0x2472/0x33d0 +[ 42.400813] ip6_append_data+0x1a8/0x2f0 +[ 42.401122] rawv6_sendmsg+0x11ee/0x2db0 +[ 42.401505] inet_sendmsg+0x123/0x500 +[ 42.401860] sock_sendmsg+0xca/0x110 +[ 42.402209] ___sys_sendmsg+0x7cb/0x930 +[ 42.402582] __sys_sendmsg+0xd9/0x190 +[ 42.402941] SyS_sendmsg+0x2d/0x50 +[ 42.403273] entry_SYSCALL_64_fastpath+0x1f/0xbe +[ 42.403718] +[ 42.403871] Freed by task 1794: +[ 42.404146] save_stack_trace+0x16/0x20 +[ 42.404515] save_stack+0x46/0xd0 +[ 42.404827] kasan_slab_free+0x72/0xc0 +[ 42.405167] kfree+0xe8/0x2b0 +[ 42.405462] skb_free_head+0x74/0xb0 +[ 42.405806] skb_release_data+0x30e/0x3a0 +[ 42.406198] skb_release_all+0x4a/0x60 +[ 42.406563] consume_skb+0x113/0x2e0 +[ 42.406910] skb_free_datagram+0x1a/0xe0 +[ 42.407288] netlink_recvmsg+0x60d/0xe40 +[ 42.407667] sock_recvmsg+0xd7/0x110 +[ 42.408022] ___sys_recvmsg+0x25c/0x580 +[ 42.408395] __sys_recvmsg+0xd6/0x190 +[ 42.408753] SyS_recvmsg+0x2d/0x50 +[ 42.409086] entry_SYSCALL_64_fastpath+0x1f/0xbe +[ 42.409513] +[ 42.409665] The buggy address belongs to the object at ffff88000969e780 +[ 42.409665] which belongs to the cache kmalloc-512 of size 512 +[ 42.410846] The buggy address is located 24 bytes inside of +[ 42.410846] 512-byte region [ffff88000969e780, ffff88000969e980) +[ 42.411941] The buggy address belongs to the page: +[ 42.412405] page:ffffea000025a780 count:1 mapcount:0 mapping: (null) index:0x0 compound_mapcount: 0 +[ 42.413298] flags: 0x100000000008100(slab|head) +[ 42.413729] raw: 0100000000008100 0000000000000000 0000000000000000 00000001800c000c +[ 42.414387] raw: ffffea00002a9500 0000000900000007 ffff88000c401280 0000000000000000 +[ 42.415074] page dumped because: kasan: bad access detected +[ 42.415604] +[ 42.415757] Memory state around the buggy address: +[ 42.416222] ffff88000969e880: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +[ 42.416904] ffff88000969e900: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +[ 42.417591] >ffff88000969e980: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc +[ 42.418273] ^ +[ 42.418588] ffff88000969ea00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb +[ 42.419273] ffff88000969ea80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb +[ 42.419882] ================================================================== + +Reported-by: Andrey Konovalov +Signed-off-by: Craig Gallek +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/ip6_offload.c | 2 ++ + net/ipv6/ip6_output.c | 4 ++++ + net/ipv6/output_core.c | 14 ++++++++------ + net/ipv6/udp_offload.c | 2 ++ + 4 files changed, 16 insertions(+), 6 deletions(-) + +--- a/net/ipv6/ip6_offload.c ++++ b/net/ipv6/ip6_offload.c +@@ -117,6 +117,8 @@ static struct sk_buff *ipv6_gso_segment( + + if (udpfrag) { + unfrag_ip6hlen = ip6_find_1stfragopt(skb, &prevhdr); ++ if (unfrag_ip6hlen < 0) ++ return ERR_PTR(unfrag_ip6hlen); + fptr = (struct frag_hdr *)((u8 *)ipv6h + unfrag_ip6hlen); + fptr->frag_off = htons(offset); + if (skb->next) +--- a/net/ipv6/ip6_output.c ++++ b/net/ipv6/ip6_output.c +@@ -598,6 +598,10 @@ int ip6_fragment(struct net *net, struct + u8 *prevhdr, nexthdr = 0; + + hlen = ip6_find_1stfragopt(skb, &prevhdr); ++ if (hlen < 0) { ++ err = hlen; ++ goto fail; ++ } + nexthdr = *prevhdr; + + mtu = ip6_skb_dst_mtu(skb); +--- a/net/ipv6/output_core.c ++++ b/net/ipv6/output_core.c +@@ -79,14 +79,13 @@ EXPORT_SYMBOL(ipv6_select_ident); + int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr) + { + u16 offset = sizeof(struct ipv6hdr); +- struct ipv6_opt_hdr *exthdr = +- (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1); + unsigned int packet_len = skb_tail_pointer(skb) - + skb_network_header(skb); + int found_rhdr = 0; + *nexthdr = &ipv6_hdr(skb)->nexthdr; + +- while (offset + 1 <= packet_len) { ++ while (offset <= packet_len) { ++ struct ipv6_opt_hdr *exthdr; + + switch (**nexthdr) { + +@@ -107,13 +106,16 @@ int ip6_find_1stfragopt(struct sk_buff * + return offset; + } + +- offset += ipv6_optlen(exthdr); +- *nexthdr = &exthdr->nexthdr; ++ if (offset + sizeof(struct ipv6_opt_hdr) > packet_len) ++ return -EINVAL; ++ + exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) + + offset); ++ offset += ipv6_optlen(exthdr); ++ *nexthdr = &exthdr->nexthdr; + } + +- return offset; ++ return -EINVAL; + } + EXPORT_SYMBOL(ip6_find_1stfragopt); + +--- a/net/ipv6/udp_offload.c ++++ b/net/ipv6/udp_offload.c +@@ -91,6 +91,8 @@ static struct sk_buff *udp6_ufo_fragment + * bytes to insert fragment header. + */ + unfrag_ip6hlen = ip6_find_1stfragopt(skb, &prevhdr); ++ if (unfrag_ip6hlen < 0) ++ return ERR_PTR(unfrag_ip6hlen); + nexthdr = *prevhdr; + *prevhdr = NEXTHDR_FRAGMENT; + unfrag_len = (skb_network_header(skb) - skb_mac_header(skb)) + diff --git a/queue-4.11/net-fix-compile-error-in-skb_orphan_partial.patch b/queue-4.11/net-fix-compile-error-in-skb_orphan_partial.patch new file mode 100644 index 00000000000..139c2dee7e1 --- /dev/null +++ b/queue-4.11/net-fix-compile-error-in-skb_orphan_partial.patch @@ -0,0 +1,44 @@ +From foo@baz Wed May 31 09:13:10 JST 2017 +From: Eric Dumazet +Date: Tue, 16 May 2017 13:27:53 -0700 +Subject: net: fix compile error in skb_orphan_partial() + +From: Eric Dumazet + + +[ Upstream commit 9142e9007f2d7ab58a587a1e1d921b0064a339aa ] + +If CONFIG_INET is not set, net/core/sock.c can not compile : + +net/core/sock.c: In function ‘skb_orphan_partial’: +net/core/sock.c:1810:2: error: implicit declaration of function +‘skb_is_tcp_pure_ack’ [-Werror=implicit-function-declaration] + if (skb_is_tcp_pure_ack(skb)) + ^ + +Fix this by always including + +Fixes: f6ba8d33cfbb ("netem: fix skb_orphan_partial()") +Signed-off-by: Eric Dumazet +Reported-by: Paul Gortmaker +Reported-by: Randy Dunlap +Reported-by: Stephen Rothwell +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/sock.c | 3 --- + 1 file changed, 3 deletions(-) + +--- a/net/core/sock.c ++++ b/net/core/sock.c +@@ -138,10 +138,7 @@ + + #include + +-#ifdef CONFIG_INET + #include +-#endif +- + #include + + static DEFINE_MUTEX(proto_list_mutex); diff --git a/queue-4.11/net-improve-handling-of-failures-on-link-and-route-dumps.patch b/queue-4.11/net-improve-handling-of-failures-on-link-and-route-dumps.patch new file mode 100644 index 00000000000..13e3b3548a7 --- /dev/null +++ b/queue-4.11/net-improve-handling-of-failures-on-link-and-route-dumps.patch @@ -0,0 +1,205 @@ +From foo@baz Wed May 31 09:13:10 JST 2017 +From: David Ahern +Date: Mon, 15 May 2017 23:19:17 -0700 +Subject: net: Improve handling of failures on link and route dumps + +From: David Ahern + + +[ Upstream commit f6c5775ff0bfa62b072face6bf1d40f659f194b2 ] + +In general, rtnetlink dumps do not anticipate failure to dump a single +object (e.g., link or route) on a single pass. As both route and link +objects have grown via more attributes, that is no longer a given. + +netlink dumps can handle a failure if the dump function returns an +error; specifically, netlink_dump adds the return code to the response +if it is <= 0 so userspace is notified of the failure. The missing +piece is the rtnetlink dump functions returning the error. + +Fix route and link dump functions to return the errors if no object is +added to an skb (detected by skb->len != 0). IPv6 route dumps +(rt6_dump_route) already return the error; this patch updates IPv4 and +link dumps. Other dump functions may need to be ajusted as well. + +Reported-by: Jan Moskyto Matejka +Signed-off-by: David Ahern +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/rtnetlink.c | 36 ++++++++++++++++++++++++------------ + net/ipv4/fib_frontend.c | 15 +++++++++++---- + net/ipv4/fib_trie.c | 26 ++++++++++++++------------ + 3 files changed, 49 insertions(+), 28 deletions(-) + +--- a/net/core/rtnetlink.c ++++ b/net/core/rtnetlink.c +@@ -1620,13 +1620,13 @@ static int rtnl_dump_ifinfo(struct sk_bu + cb->nlh->nlmsg_seq, 0, + flags, + ext_filter_mask); +- /* If we ran out of room on the first message, +- * we're in trouble +- */ +- WARN_ON((err == -EMSGSIZE) && (skb->len == 0)); + +- if (err < 0) +- goto out; ++ if (err < 0) { ++ if (likely(skb->len)) ++ goto out; ++ ++ goto out_err; ++ } + + nl_dump_check_consistent(cb, nlmsg_hdr(skb)); + cont: +@@ -1634,10 +1634,12 @@ cont: + } + } + out: ++ err = skb->len; ++out_err: + cb->args[1] = idx; + cb->args[0] = h; + +- return skb->len; ++ return err; + } + + int rtnl_nla_parse_ifla(struct nlattr **tb, const struct nlattr *head, int len) +@@ -3427,8 +3429,12 @@ static int rtnl_bridge_getlink(struct sk + err = br_dev->netdev_ops->ndo_bridge_getlink( + skb, portid, seq, dev, + filter_mask, NLM_F_MULTI); +- if (err < 0 && err != -EOPNOTSUPP) +- break; ++ if (err < 0 && err != -EOPNOTSUPP) { ++ if (likely(skb->len)) ++ break; ++ ++ goto out_err; ++ } + } + idx++; + } +@@ -3439,16 +3445,22 @@ static int rtnl_bridge_getlink(struct sk + seq, dev, + filter_mask, + NLM_F_MULTI); +- if (err < 0 && err != -EOPNOTSUPP) +- break; ++ if (err < 0 && err != -EOPNOTSUPP) { ++ if (likely(skb->len)) ++ break; ++ ++ goto out_err; ++ } + } + idx++; + } + } ++ err = skb->len; ++out_err: + rcu_read_unlock(); + cb->args[0] = idx; + +- return skb->len; ++ return err; + } + + static inline size_t bridge_nlmsg_size(void) +--- a/net/ipv4/fib_frontend.c ++++ b/net/ipv4/fib_frontend.c +@@ -760,7 +760,7 @@ static int inet_dump_fib(struct sk_buff + unsigned int e = 0, s_e; + struct fib_table *tb; + struct hlist_head *head; +- int dumped = 0; ++ int dumped = 0, err; + + if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) && + ((struct rtmsg *) nlmsg_data(cb->nlh))->rtm_flags & RTM_F_CLONED) +@@ -780,20 +780,27 @@ static int inet_dump_fib(struct sk_buff + if (dumped) + memset(&cb->args[2], 0, sizeof(cb->args) - + 2 * sizeof(cb->args[0])); +- if (fib_table_dump(tb, skb, cb) < 0) +- goto out; ++ err = fib_table_dump(tb, skb, cb); ++ if (err < 0) { ++ if (likely(skb->len)) ++ goto out; ++ ++ goto out_err; ++ } + dumped = 1; + next: + e++; + } + } + out: ++ err = skb->len; ++out_err: + rcu_read_unlock(); + + cb->args[1] = e; + cb->args[0] = h; + +- return skb->len; ++ return err; + } + + /* Prepare and feed intra-kernel routing request. +--- a/net/ipv4/fib_trie.c ++++ b/net/ipv4/fib_trie.c +@@ -2079,6 +2079,8 @@ static int fn_trie_dump_leaf(struct key_ + + /* rcu_read_lock is hold by caller */ + hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) { ++ int err; ++ + if (i < s_i) { + i++; + continue; +@@ -2089,17 +2091,14 @@ static int fn_trie_dump_leaf(struct key_ + continue; + } + +- if (fib_dump_info(skb, NETLINK_CB(cb->skb).portid, +- cb->nlh->nlmsg_seq, +- RTM_NEWROUTE, +- tb->tb_id, +- fa->fa_type, +- xkey, +- KEYLENGTH - fa->fa_slen, +- fa->fa_tos, +- fa->fa_info, NLM_F_MULTI) < 0) { ++ err = fib_dump_info(skb, NETLINK_CB(cb->skb).portid, ++ cb->nlh->nlmsg_seq, RTM_NEWROUTE, ++ tb->tb_id, fa->fa_type, ++ xkey, KEYLENGTH - fa->fa_slen, ++ fa->fa_tos, fa->fa_info, NLM_F_MULTI); ++ if (err < 0) { + cb->args[4] = i; +- return -1; ++ return err; + } + i++; + } +@@ -2121,10 +2120,13 @@ int fib_table_dump(struct fib_table *tb, + t_key key = cb->args[3]; + + while ((l = leaf_walk_rcu(&tp, key)) != NULL) { +- if (fn_trie_dump_leaf(l, tb, skb, cb) < 0) { ++ int err; ++ ++ err = fn_trie_dump_leaf(l, tb, skb, cb); ++ if (err < 0) { + cb->args[3] = key; + cb->args[2] = count; +- return -1; ++ return err; + } + + ++count; diff --git a/queue-4.11/net-mlx5-avoid-using-pending-command-interface-slots.patch b/queue-4.11/net-mlx5-avoid-using-pending-command-interface-slots.patch new file mode 100644 index 00000000000..b5acde81c83 --- /dev/null +++ b/queue-4.11/net-mlx5-avoid-using-pending-command-interface-slots.patch @@ -0,0 +1,178 @@ +From foo@baz Wed May 31 09:13:10 JST 2017 +From: Mohamad Haj Yahia +Date: Thu, 23 Feb 2017 11:19:36 +0200 +Subject: net/mlx5: Avoid using pending command interface slots + +From: Mohamad Haj Yahia + + +[ Upstream commit 73dd3a4839c1d27c36d4dcc92e1ff44225ecbeb7 ] + +Currently when firmware command gets stuck or it takes long time to +complete, the driver command will get timeout and the command slot is +freed and can be used for new commands, and if the firmware receive new +command on the old busy slot its behavior is unexpected and this could +be harmful. +To fix this when the driver command gets timeout we return failure, +but we don't free the command slot and we wait for the firmware to +explicitly respond to that command. +Once all the entries are busy we will stop processing new firmware +commands. + +Fixes: 9cba4ebcf374 ('net/mlx5: Fix potential deadlock in command mode change') +Signed-off-by: Mohamad Haj Yahia +Cc: kernel-team@fb.com +Signed-off-by: Saeed Mahameed +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 41 ++++++++++++++++++++--- + drivers/net/ethernet/mellanox/mlx5/core/eq.c | 2 - + drivers/net/ethernet/mellanox/mlx5/core/health.c | 2 - + include/linux/mlx5/driver.h | 7 +++ + 4 files changed, 44 insertions(+), 8 deletions(-) + +--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +@@ -770,7 +770,7 @@ static void cb_timeout_handler(struct wo + mlx5_core_warn(dev, "%s(0x%x) timeout. Will cause a leak of a command resource\n", + mlx5_command_str(msg_to_opcode(ent->in)), + msg_to_opcode(ent->in)); +- mlx5_cmd_comp_handler(dev, 1UL << ent->idx); ++ mlx5_cmd_comp_handler(dev, 1UL << ent->idx, true); + } + + static void cmd_work_handler(struct work_struct *work) +@@ -800,6 +800,7 @@ static void cmd_work_handler(struct work + } + + cmd->ent_arr[ent->idx] = ent; ++ set_bit(MLX5_CMD_ENT_STATE_PENDING_COMP, &ent->state); + lay = get_inst(cmd, ent->idx); + ent->lay = lay; + memset(lay, 0, sizeof(*lay)); +@@ -821,6 +822,20 @@ static void cmd_work_handler(struct work + if (ent->callback) + schedule_delayed_work(&ent->cb_timeout_work, cb_timeout); + ++ /* Skip sending command to fw if internal error */ ++ if (pci_channel_offline(dev->pdev) || ++ dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) { ++ u8 status = 0; ++ u32 drv_synd; ++ ++ ent->ret = mlx5_internal_err_ret_value(dev, msg_to_opcode(ent->in), &drv_synd, &status); ++ MLX5_SET(mbox_out, ent->out, status, status); ++ MLX5_SET(mbox_out, ent->out, syndrome, drv_synd); ++ ++ mlx5_cmd_comp_handler(dev, 1UL << ent->idx, true); ++ return; ++ } ++ + /* ring doorbell after the descriptor is valid */ + mlx5_core_dbg(dev, "writing 0x%x to command doorbell\n", 1 << ent->idx); + wmb(); +@@ -831,7 +846,7 @@ static void cmd_work_handler(struct work + poll_timeout(ent); + /* make sure we read the descriptor after ownership is SW */ + rmb(); +- mlx5_cmd_comp_handler(dev, 1UL << ent->idx); ++ mlx5_cmd_comp_handler(dev, 1UL << ent->idx, (ent->ret == -ETIMEDOUT)); + } + } + +@@ -875,7 +890,7 @@ static int wait_func(struct mlx5_core_de + wait_for_completion(&ent->done); + } else if (!wait_for_completion_timeout(&ent->done, timeout)) { + ent->ret = -ETIMEDOUT; +- mlx5_cmd_comp_handler(dev, 1UL << ent->idx); ++ mlx5_cmd_comp_handler(dev, 1UL << ent->idx, true); + } + + err = ent->ret; +@@ -1371,7 +1386,7 @@ static void free_msg(struct mlx5_core_de + } + } + +-void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec) ++void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced) + { + struct mlx5_cmd *cmd = &dev->cmd; + struct mlx5_cmd_work_ent *ent; +@@ -1391,6 +1406,19 @@ void mlx5_cmd_comp_handler(struct mlx5_c + struct semaphore *sem; + + ent = cmd->ent_arr[i]; ++ ++ /* if we already completed the command, ignore it */ ++ if (!test_and_clear_bit(MLX5_CMD_ENT_STATE_PENDING_COMP, ++ &ent->state)) { ++ /* only real completion can free the cmd slot */ ++ if (!forced) { ++ mlx5_core_err(dev, "Command completion arrived after timeout (entry idx = %d).\n", ++ ent->idx); ++ free_ent(cmd, ent->idx); ++ } ++ continue; ++ } ++ + if (ent->callback) + cancel_delayed_work(&ent->cb_timeout_work); + if (ent->page_queue) +@@ -1413,7 +1441,10 @@ void mlx5_cmd_comp_handler(struct mlx5_c + mlx5_core_dbg(dev, "command completed. ret 0x%x, delivery status %s(0x%x)\n", + ent->ret, deliv_status_to_str(ent->status), ent->status); + } +- free_ent(cmd, ent->idx); ++ ++ /* only real completion will free the entry slot */ ++ if (!forced) ++ free_ent(cmd, ent->idx); + + if (ent->callback) { + ds = ent->ts2 - ent->ts1; +--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c +@@ -422,7 +422,7 @@ static irqreturn_t mlx5_eq_int(int irq, + break; + + case MLX5_EVENT_TYPE_CMD: +- mlx5_cmd_comp_handler(dev, be32_to_cpu(eqe->data.cmd.vector)); ++ mlx5_cmd_comp_handler(dev, be32_to_cpu(eqe->data.cmd.vector), false); + break; + + case MLX5_EVENT_TYPE_PORT_CHANGE: +--- a/drivers/net/ethernet/mellanox/mlx5/core/health.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c +@@ -90,7 +90,7 @@ static void trigger_cmd_completions(stru + spin_unlock_irqrestore(&dev->cmd.alloc_lock, flags); + + mlx5_core_dbg(dev, "vector 0x%llx\n", vector); +- mlx5_cmd_comp_handler(dev, vector); ++ mlx5_cmd_comp_handler(dev, vector, true); + return; + + no_trig: +--- a/include/linux/mlx5/driver.h ++++ b/include/linux/mlx5/driver.h +@@ -785,7 +785,12 @@ enum { + + typedef void (*mlx5_cmd_cbk_t)(int status, void *context); + ++enum { ++ MLX5_CMD_ENT_STATE_PENDING_COMP, ++}; ++ + struct mlx5_cmd_work_ent { ++ unsigned long state; + struct mlx5_cmd_msg *in; + struct mlx5_cmd_msg *out; + void *uout; +@@ -979,7 +984,7 @@ void mlx5_cq_completion(struct mlx5_core + void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type); + void mlx5_srq_event(struct mlx5_core_dev *dev, u32 srqn, int event_type); + struct mlx5_core_srq *mlx5_core_get_srq(struct mlx5_core_dev *dev, u32 srqn); +-void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec); ++void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced); + void mlx5_cq_event(struct mlx5_core_dev *dev, u32 cqn, int event_type); + int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx, + int nent, u64 mask, const char *name, diff --git a/queue-4.11/net-mlx5e-fix-ethtool-pause-support-and-advertise-reporting.patch b/queue-4.11/net-mlx5e-fix-ethtool-pause-support-and-advertise-reporting.patch new file mode 100644 index 00000000000..56222d92687 --- /dev/null +++ b/queue-4.11/net-mlx5e-fix-ethtool-pause-support-and-advertise-reporting.patch @@ -0,0 +1,41 @@ +From foo@baz Wed May 31 09:13:10 JST 2017 +From: Gal Pressman +Date: Wed, 19 Apr 2017 14:35:15 +0300 +Subject: net/mlx5e: Fix ethtool pause support and advertise reporting + +From: Gal Pressman + + +[ Upstream commit e3c19503712d6360239b19c14cded56dd63c40d7 ] + +Pause bit should set when RX pause is on, not TX pause. +Also, setting Asym_Pause is incorrect, and should be turned off. + +Fixes: 665bc53969d7 ("net/mlx5e: Use new ethtool get/set link ksettings API") +Signed-off-by: Gal Pressman +Cc: kernel-team@fb.com +Signed-off-by: Saeed Mahameed +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +@@ -773,7 +773,6 @@ static void get_supported(u32 eth_proto_ + ptys2ethtool_supported_port(link_ksettings, eth_proto_cap); + ptys2ethtool_supported_link(supported, eth_proto_cap); + ethtool_link_ksettings_add_link_mode(link_ksettings, supported, Pause); +- ethtool_link_ksettings_add_link_mode(link_ksettings, supported, Asym_Pause); + } + + static void get_advertising(u32 eth_proto_cap, u8 tx_pause, +@@ -783,7 +782,7 @@ static void get_advertising(u32 eth_prot + unsigned long *advertising = link_ksettings->link_modes.advertising; + + ptys2ethtool_adver_link(advertising, eth_proto_cap); +- if (tx_pause) ++ if (rx_pause) + ethtool_link_ksettings_add_link_mode(link_ksettings, advertising, Pause); + if (tx_pause ^ rx_pause) + ethtool_link_ksettings_add_link_mode(link_ksettings, advertising, Asym_Pause); diff --git a/queue-4.11/net-mlx5e-use-the-correct-pause-values-for-ethtool-advertising.patch b/queue-4.11/net-mlx5e-use-the-correct-pause-values-for-ethtool-advertising.patch new file mode 100644 index 00000000000..a3675f243d3 --- /dev/null +++ b/queue-4.11/net-mlx5e-use-the-correct-pause-values-for-ethtool-advertising.patch @@ -0,0 +1,48 @@ +From foo@baz Wed May 31 09:13:10 JST 2017 +From: Gal Pressman +Date: Mon, 3 Apr 2017 15:11:22 +0300 +Subject: net/mlx5e: Use the correct pause values for ethtool advertising + +From: Gal Pressman + + +[ Upstream commit b383b544f2666d67446b951a9a97af239dafed5d ] + +Query the operational pause from firmware (PFCC register) instead of +always passing zeros. + +Fixes: 665bc53969d7 ("net/mlx5e: Use new ethtool get/set link ksettings API") +Signed-off-by: Gal Pressman +Cc: kernel-team@fb.com +Signed-off-by: Saeed Mahameed +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +@@ -828,6 +828,8 @@ static int mlx5e_get_link_ksettings(stru + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; + u32 out[MLX5_ST_SZ_DW(ptys_reg)] = {0}; ++ u32 rx_pause = 0; ++ u32 tx_pause = 0; + u32 eth_proto_cap; + u32 eth_proto_admin; + u32 eth_proto_lp; +@@ -850,11 +852,13 @@ static int mlx5e_get_link_ksettings(stru + an_disable_admin = MLX5_GET(ptys_reg, out, an_disable_admin); + an_status = MLX5_GET(ptys_reg, out, an_status); + ++ mlx5_query_port_pause(mdev, &rx_pause, &tx_pause); ++ + ethtool_link_ksettings_zero_link_mode(link_ksettings, supported); + ethtool_link_ksettings_zero_link_mode(link_ksettings, advertising); + + get_supported(eth_proto_cap, link_ksettings); +- get_advertising(eth_proto_admin, 0, 0, link_ksettings); ++ get_advertising(eth_proto_admin, tx_pause, rx_pause, link_ksettings); + get_speed_duplex(netdev, eth_proto_oper, link_ksettings); + + eth_proto_oper = eth_proto_oper ? eth_proto_oper : eth_proto_cap; diff --git a/queue-4.11/net-packet-fix-missing-net_device-reference-release.patch b/queue-4.11/net-packet-fix-missing-net_device-reference-release.patch new file mode 100644 index 00000000000..83f2abd2025 --- /dev/null +++ b/queue-4.11/net-packet-fix-missing-net_device-reference-release.patch @@ -0,0 +1,53 @@ +From foo@baz Wed May 31 09:13:10 JST 2017 +From: Douglas Caetano dos Santos +Date: Fri, 12 May 2017 15:19:15 -0300 +Subject: net/packet: fix missing net_device reference release + +From: Douglas Caetano dos Santos + + +[ Upstream commit d19b183cdc1fa3d70d6abe2a4c369e748cd7ebb8 ] + +When using a TX ring buffer, if an error occurs processing a control +message (e.g. invalid message), the net_device reference is not +released. + +Fixes c14ac9451c348 ("sock: enable timestamping using control messages") +Signed-off-by: Douglas Caetano dos Santos +Acked-by: Soheil Hassas Yeganeh +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/packet/af_packet.c | 14 +++++++------- + 1 file changed, 7 insertions(+), 7 deletions(-) + +--- a/net/packet/af_packet.c ++++ b/net/packet/af_packet.c +@@ -2614,13 +2614,6 @@ static int tpacket_snd(struct packet_soc + dev = dev_get_by_index(sock_net(&po->sk), saddr->sll_ifindex); + } + +- sockc.tsflags = po->sk.sk_tsflags; +- if (msg->msg_controllen) { +- err = sock_cmsg_send(&po->sk, msg, &sockc); +- if (unlikely(err)) +- goto out; +- } +- + err = -ENXIO; + if (unlikely(dev == NULL)) + goto out; +@@ -2628,6 +2621,13 @@ static int tpacket_snd(struct packet_soc + if (unlikely(!(dev->flags & IFF_UP))) + goto out_put; + ++ sockc.tsflags = po->sk.sk_tsflags; ++ if (msg->msg_controllen) { ++ err = sock_cmsg_send(&po->sk, msg, &sockc); ++ if (unlikely(err)) ++ goto out_put; ++ } ++ + if (po->sk.sk_socket->type == SOCK_RAW) + reserve = dev->hard_header_len; + size_max = po->tx_ring.frame_size diff --git a/queue-4.11/net-phy-marvell-limit-errata-to-88m1101.patch b/queue-4.11/net-phy-marvell-limit-errata-to-88m1101.patch new file mode 100644 index 00000000000..54a38d5fca2 --- /dev/null +++ b/queue-4.11/net-phy-marvell-limit-errata-to-88m1101.patch @@ -0,0 +1,114 @@ +From foo@baz Wed May 31 09:13:10 JST 2017 +From: Andrew Lunn +Date: Tue, 23 May 2017 17:49:13 +0200 +Subject: net: phy: marvell: Limit errata to 88m1101 + +From: Andrew Lunn + + +[ Upstream commit f2899788353c13891412b273fdff5f02d49aa40f ] + +The 88m1101 has an errata when configuring autoneg. However, it was +being applied to many other Marvell PHYs as well. Limit its scope to +just the 88m1101. + +Fixes: 76884679c644 ("phylib: Add support for Marvell 88e1111S and 88e1145") +Reported-by: Daniel Walker +Signed-off-by: Andrew Lunn +Acked-by: Harini Katakam +Reviewed-by: Florian Fainelli +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/phy/marvell.c | 66 +++++++++++++++++++++++++--------------------- + 1 file changed, 37 insertions(+), 29 deletions(-) + +--- a/drivers/net/phy/marvell.c ++++ b/drivers/net/phy/marvell.c +@@ -255,34 +255,6 @@ static int marvell_config_aneg(struct ph + { + int err; + +- /* The Marvell PHY has an errata which requires +- * that certain registers get written in order +- * to restart autonegotiation */ +- err = phy_write(phydev, MII_BMCR, BMCR_RESET); +- +- if (err < 0) +- return err; +- +- err = phy_write(phydev, 0x1d, 0x1f); +- if (err < 0) +- return err; +- +- err = phy_write(phydev, 0x1e, 0x200c); +- if (err < 0) +- return err; +- +- err = phy_write(phydev, 0x1d, 0x5); +- if (err < 0) +- return err; +- +- err = phy_write(phydev, 0x1e, 0); +- if (err < 0) +- return err; +- +- err = phy_write(phydev, 0x1e, 0x100); +- if (err < 0) +- return err; +- + err = marvell_set_polarity(phydev, phydev->mdix_ctrl); + if (err < 0) + return err; +@@ -316,6 +288,42 @@ static int marvell_config_aneg(struct ph + return 0; + } + ++static int m88e1101_config_aneg(struct phy_device *phydev) ++{ ++ int err; ++ ++ /* This Marvell PHY has an errata which requires ++ * that certain registers get written in order ++ * to restart autonegotiation ++ */ ++ err = phy_write(phydev, MII_BMCR, BMCR_RESET); ++ ++ if (err < 0) ++ return err; ++ ++ err = phy_write(phydev, 0x1d, 0x1f); ++ if (err < 0) ++ return err; ++ ++ err = phy_write(phydev, 0x1e, 0x200c); ++ if (err < 0) ++ return err; ++ ++ err = phy_write(phydev, 0x1d, 0x5); ++ if (err < 0) ++ return err; ++ ++ err = phy_write(phydev, 0x1e, 0); ++ if (err < 0) ++ return err; ++ ++ err = phy_write(phydev, 0x1e, 0x100); ++ if (err < 0) ++ return err; ++ ++ return marvell_config_aneg(phydev); ++} ++ + static int m88e1111_config_aneg(struct phy_device *phydev) + { + int err; +@@ -1892,7 +1900,7 @@ static struct phy_driver marvell_drivers + .flags = PHY_HAS_INTERRUPT, + .probe = marvell_probe, + .config_init = &marvell_config_init, +- .config_aneg = &marvell_config_aneg, ++ .config_aneg = &m88e1101_config_aneg, + .read_status = &genphy_read_status, + .ack_interrupt = &marvell_ack_interrupt, + .config_intr = &marvell_config_intr, diff --git a/queue-4.11/net-smc-add-warning-about-remote-memory-exposure.patch b/queue-4.11/net-smc-add-warning-about-remote-memory-exposure.patch new file mode 100644 index 00000000000..ac5c36e719e --- /dev/null +++ b/queue-4.11/net-smc-add-warning-about-remote-memory-exposure.patch @@ -0,0 +1,35 @@ +From foo@baz Wed May 31 09:13:10 JST 2017 +From: Christoph Hellwig +Date: Tue, 16 May 2017 09:51:38 +0300 +Subject: net/smc: Add warning about remote memory exposure + +From: Christoph Hellwig + + +[ Upstream commit 19a0f7e37c0761a0a1cbf550705a6063c9675223 ] + +The driver explicitly bypasses APIs to register all memory once a +connection is made, and thus allows remote access to memory. + +Signed-off-by: Christoph Hellwig +Signed-off-by: Leon Romanovsky +Acked-by: Ursula Braun +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/smc/Kconfig | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/net/smc/Kconfig ++++ b/net/smc/Kconfig +@@ -8,6 +8,10 @@ config SMC + The Linux implementation of the SMC-R solution is designed as + a separate socket family SMC. + ++ Warning: SMC will expose all memory for remote reads and writes ++ once a connection is established. Don't enable this option except ++ for tightly controlled lab environment. ++ + Select this option if you want to run SMC socket applications + + config SMC_DIAG diff --git a/queue-4.11/netem-fix-skb_orphan_partial.patch b/queue-4.11/netem-fix-skb_orphan_partial.patch new file mode 100644 index 00000000000..2517135c899 --- /dev/null +++ b/queue-4.11/netem-fix-skb_orphan_partial.patch @@ -0,0 +1,67 @@ +From foo@baz Wed May 31 09:13:10 JST 2017 +From: Eric Dumazet +Date: Thu, 11 May 2017 15:24:41 -0700 +Subject: netem: fix skb_orphan_partial() + +From: Eric Dumazet + + +[ Upstream commit f6ba8d33cfbb46df569972e64dbb5bb7e929bfd9 ] + +I should have known that lowering skb->truesize was dangerous :/ + +In case packets are not leaving the host via a standard Ethernet device, +but looped back to local sockets, bad things can happen, as reported +by Michael Madsen ( https://bugzilla.kernel.org/show_bug.cgi?id=195713 ) + +So instead of tweaking skb->truesize, lets change skb->destructor +and keep a reference on the owner socket via its sk_refcnt. + +Fixes: f2f872f9272a ("netem: Introduce skb_orphan_partial() helper") +Signed-off-by: Eric Dumazet +Reported-by: Michael Madsen +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/sock.c | 20 ++++++++------------ + 1 file changed, 8 insertions(+), 12 deletions(-) + +--- a/net/core/sock.c ++++ b/net/core/sock.c +@@ -1699,28 +1699,24 @@ EXPORT_SYMBOL(skb_set_owner_w); + * delay queue. We want to allow the owner socket to send more + * packets, as if they were already TX completed by a typical driver. + * But we also want to keep skb->sk set because some packet schedulers +- * rely on it (sch_fq for example). So we set skb->truesize to a small +- * amount (1) and decrease sk_wmem_alloc accordingly. ++ * rely on it (sch_fq for example). + */ + void skb_orphan_partial(struct sk_buff *skb) + { +- /* If this skb is a TCP pure ACK or already went here, +- * we have nothing to do. 2 is already a very small truesize. +- */ +- if (skb->truesize <= 2) ++ if (skb_is_tcp_pure_ack(skb)) + return; + +- /* TCP stack sets skb->ooo_okay based on sk_wmem_alloc, +- * so we do not completely orphan skb, but transfert all +- * accounted bytes but one, to avoid unexpected reorders. +- */ + if (skb->destructor == sock_wfree + #ifdef CONFIG_INET + || skb->destructor == tcp_wfree + #endif + ) { +- atomic_sub(skb->truesize - 1, &skb->sk->sk_wmem_alloc); +- skb->truesize = 1; ++ struct sock *sk = skb->sk; ++ ++ if (atomic_inc_not_zero(&sk->sk_refcnt)) { ++ atomic_sub(skb->truesize, &sk->sk_wmem_alloc); ++ skb->destructor = sock_efree; ++ } + } else { + skb_orphan(skb); + } diff --git a/queue-4.11/qmi_wwan-add-another-lenovo-em74xx-device-id.patch b/queue-4.11/qmi_wwan-add-another-lenovo-em74xx-device-id.patch new file mode 100644 index 00000000000..8ecf056fe25 --- /dev/null +++ b/queue-4.11/qmi_wwan-add-another-lenovo-em74xx-device-id.patch @@ -0,0 +1,33 @@ +From foo@baz Wed May 31 09:13:10 JST 2017 +From: Bjørn Mork +Date: Wed, 17 May 2017 16:31:41 +0200 +Subject: qmi_wwan: add another Lenovo EM74xx device ID + +From: Bjørn Mork + + +[ Upstream commit 486181bcb3248e2f1977f4e69387a898234a4e1e ] + +In their infinite wisdom, and never ending quest for end user frustration, +Lenovo has decided to use a new USB device ID for the wwan modules in +their 2017 laptops. The actual hardware is still the Sierra Wireless +EM7455 or EM7430, depending on region. + +Signed-off-by: Bjørn Mork +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/usb/qmi_wwan.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/drivers/net/usb/qmi_wwan.c ++++ b/drivers/net/usb/qmi_wwan.c +@@ -902,6 +902,8 @@ static const struct usb_device_id produc + {QMI_FIXED_INTF(0x1199, 0x9071, 10)}, /* Sierra Wireless MC74xx */ + {QMI_FIXED_INTF(0x1199, 0x9079, 8)}, /* Sierra Wireless EM74xx */ + {QMI_FIXED_INTF(0x1199, 0x9079, 10)}, /* Sierra Wireless EM74xx */ ++ {QMI_FIXED_INTF(0x1199, 0x907b, 8)}, /* Sierra Wireless EM74xx */ ++ {QMI_FIXED_INTF(0x1199, 0x907b, 10)}, /* Sierra Wireless EM74xx */ + {QMI_FIXED_INTF(0x1bbb, 0x011e, 4)}, /* Telekom Speedstick LTE II (Alcatel One Touch L100V LTE) */ + {QMI_FIXED_INTF(0x1bbb, 0x0203, 2)}, /* Alcatel L800MA */ + {QMI_FIXED_INTF(0x2357, 0x0201, 4)}, /* TP-LINK HSUPA Modem MA180 */ diff --git a/queue-4.11/s390-qeth-add-missing-hash-table-initializations.patch b/queue-4.11/s390-qeth-add-missing-hash-table-initializations.patch new file mode 100644 index 00000000000..c2821845353 --- /dev/null +++ b/queue-4.11/s390-qeth-add-missing-hash-table-initializations.patch @@ -0,0 +1,34 @@ +From foo@baz Wed May 31 09:13:10 JST 2017 +From: Ursula Braun +Date: Wed, 10 May 2017 19:07:54 +0200 +Subject: s390/qeth: add missing hash table initializations + +From: Ursula Braun + + +[ Upstream commit ebccc7397e4a49ff64c8f44a54895de9d32fe742 ] + +commit 5f78e29ceebf ("qeth: optimize IP handling in rx_mode callback") +added new hash tables, but missed to initialize them. + +Fixes: 5f78e29ceebf ("qeth: optimize IP handling in rx_mode callback") +Signed-off-by: Ursula Braun +Reviewed-by: Julian Wiedmann +Signed-off-by: Julian Wiedmann +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/s390/net/qeth_l3_main.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/drivers/s390/net/qeth_l3_main.c ++++ b/drivers/s390/net/qeth_l3_main.c +@@ -3158,6 +3158,8 @@ static int qeth_l3_probe_device(struct c + rc = qeth_l3_create_device_attributes(&gdev->dev); + if (rc) + return rc; ++ hash_init(card->ip_htable); ++ hash_init(card->ip_mc_htable); + card->options.layer2 = 0; + card->info.hwtrap = 0; + return 0; diff --git a/queue-4.11/s390-qeth-avoid-null-pointer-dereference-on-osn.patch b/queue-4.11/s390-qeth-avoid-null-pointer-dereference-on-osn.patch new file mode 100644 index 00000000000..1d27f7328f3 --- /dev/null +++ b/queue-4.11/s390-qeth-avoid-null-pointer-dereference-on-osn.patch @@ -0,0 +1,46 @@ +From foo@baz Wed May 31 09:13:10 JST 2017 +From: Julian Wiedmann +Date: Wed, 10 May 2017 19:07:53 +0200 +Subject: s390/qeth: avoid null pointer dereference on OSN + +From: Julian Wiedmann + + +[ Upstream commit 25e2c341e7818a394da9abc403716278ee646014 ] + +Access card->dev only after checking whether's its valid. + +Signed-off-by: Julian Wiedmann +Reviewed-by: Ursula Braun +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/s390/net/qeth_l2_main.c | 10 ++++++---- + 1 file changed, 6 insertions(+), 4 deletions(-) + +--- a/drivers/s390/net/qeth_l2_main.c ++++ b/drivers/s390/net/qeth_l2_main.c +@@ -1091,7 +1091,6 @@ static int qeth_l2_setup_netdev(struct q + case QETH_CARD_TYPE_OSN: + card->dev = alloc_netdev(0, "osn%d", NET_NAME_UNKNOWN, + ether_setup); +- card->dev->flags |= IFF_NOARP; + break; + default: + card->dev = alloc_etherdev(0); +@@ -1106,9 +1105,12 @@ static int qeth_l2_setup_netdev(struct q + card->dev->min_mtu = 64; + card->dev->max_mtu = ETH_MAX_MTU; + card->dev->netdev_ops = &qeth_l2_netdev_ops; +- card->dev->ethtool_ops = +- (card->info.type != QETH_CARD_TYPE_OSN) ? +- &qeth_l2_ethtool_ops : &qeth_l2_osn_ops; ++ if (card->info.type == QETH_CARD_TYPE_OSN) { ++ card->dev->ethtool_ops = &qeth_l2_osn_ops; ++ card->dev->flags |= IFF_NOARP; ++ } else { ++ card->dev->ethtool_ops = &qeth_l2_ethtool_ops; ++ } + card->dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER; + if (card->info.type == QETH_CARD_TYPE_OSD && !card->info.guestlan) { + card->dev->hw_features = NETIF_F_SG; diff --git a/queue-4.11/s390-qeth-handle-sysfs-error-during-initialization.patch b/queue-4.11/s390-qeth-handle-sysfs-error-during-initialization.patch new file mode 100644 index 00000000000..0350b554e22 --- /dev/null +++ b/queue-4.11/s390-qeth-handle-sysfs-error-during-initialization.patch @@ -0,0 +1,82 @@ +From foo@baz Wed May 31 09:13:10 JST 2017 +From: Ursula Braun +Date: Wed, 10 May 2017 19:07:51 +0200 +Subject: s390/qeth: handle sysfs error during initialization + +From: Ursula Braun + + +[ Upstream commit 9111e7880ccf419548c7b0887df020b08eadb075 ] + +When setting up the device from within the layer discipline's +probe routine, creating the layer-specific sysfs attributes can fail. +Report this error back to the caller, and handle it by +releasing the layer discipline. + +Signed-off-by: Ursula Braun +[jwi: updated commit msg, moved an OSN change to a subsequent patch] +Signed-off-by: Julian Wiedmann +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/s390/net/qeth_core_main.c | 4 +++- + drivers/s390/net/qeth_core_sys.c | 2 ++ + drivers/s390/net/qeth_l2_main.c | 5 ++++- + drivers/s390/net/qeth_l3_main.c | 5 ++++- + 4 files changed, 13 insertions(+), 3 deletions(-) + +--- a/drivers/s390/net/qeth_core_main.c ++++ b/drivers/s390/net/qeth_core_main.c +@@ -5661,8 +5661,10 @@ static int qeth_core_set_online(struct c + if (rc) + goto err; + rc = card->discipline->setup(card->gdev); +- if (rc) ++ if (rc) { ++ qeth_core_free_discipline(card); + goto err; ++ } + } + rc = card->discipline->set_online(gdev); + err: +--- a/drivers/s390/net/qeth_core_sys.c ++++ b/drivers/s390/net/qeth_core_sys.c +@@ -426,6 +426,8 @@ static ssize_t qeth_dev_layer2_store(str + goto out; + + rc = card->discipline->setup(card->gdev); ++ if (rc) ++ qeth_core_free_discipline(card); + out: + mutex_unlock(&card->discipline_mutex); + return rc ? rc : count; +--- a/drivers/s390/net/qeth_l2_main.c ++++ b/drivers/s390/net/qeth_l2_main.c +@@ -1009,8 +1009,11 @@ static int qeth_l2_stop(struct net_devic + static int qeth_l2_probe_device(struct ccwgroup_device *gdev) + { + struct qeth_card *card = dev_get_drvdata(&gdev->dev); ++ int rc; + +- qeth_l2_create_device_attributes(&gdev->dev); ++ rc = qeth_l2_create_device_attributes(&gdev->dev); ++ if (rc) ++ return rc; + INIT_LIST_HEAD(&card->vid_list); + hash_init(card->mac_htable); + card->options.layer2 = 1; +--- a/drivers/s390/net/qeth_l3_main.c ++++ b/drivers/s390/net/qeth_l3_main.c +@@ -3153,8 +3153,11 @@ static int qeth_l3_setup_netdev(struct q + static int qeth_l3_probe_device(struct ccwgroup_device *gdev) + { + struct qeth_card *card = dev_get_drvdata(&gdev->dev); ++ int rc; + +- qeth_l3_create_device_attributes(&gdev->dev); ++ rc = qeth_l3_create_device_attributes(&gdev->dev); ++ if (rc) ++ return rc; + card->options.layer2 = 0; + card->info.hwtrap = 0; + return 0; diff --git a/queue-4.11/s390-qeth-unbreak-osm-and-osn-support.patch b/queue-4.11/s390-qeth-unbreak-osm-and-osn-support.patch new file mode 100644 index 00000000000..b0d726a85db --- /dev/null +++ b/queue-4.11/s390-qeth-unbreak-osm-and-osn-support.patch @@ -0,0 +1,263 @@ +From foo@baz Wed May 31 09:13:10 JST 2017 +From: Julian Wiedmann +Date: Wed, 10 May 2017 19:07:52 +0200 +Subject: s390/qeth: unbreak OSM and OSN support + +From: Julian Wiedmann + + +[ Upstream commit 2d2ebb3ed0c6acfb014f98e427298673a5d07b82 ] + +commit b4d72c08b358 ("qeth: bridgeport support - basic control") +broke the support for OSM and OSN devices as follows: + +As OSM and OSN are L2 only, qeth_core_probe_device() does an early +setup by loading the l2 discipline and calling qeth_l2_probe_device(). +In this context, adding the l2-specific bridgeport sysfs attributes +via qeth_l2_create_device_attributes() hits a BUG_ON in fs/sysfs/group.c, +since the basic sysfs infrastructure for the device hasn't been +established yet. + +Note that OSN actually has its own unique sysfs attributes +(qeth_osn_devtype), so the additional attributes shouldn't be created +at all. +For OSM, add a new qeth_l2_devtype that contains all the common +and l2-specific sysfs attributes. +When qeth_core_probe_device() does early setup for OSM or OSN, assign +the corresponding devtype so that the ccwgroup probe code creates the +full set of sysfs attributes. +This allows us to skip qeth_l2_create_device_attributes() in case +of an early setup. + +Any device that can't do early setup will initially have only the +generic sysfs attributes, and when it's probed later +qeth_l2_probe_device() adds the l2-specific attributes. + +If an early-setup device is removed (by calling ccwgroup_ungroup()), +device_unregister() will - using the devtype - delete the +l2-specific attributes before qeth_l2_remove_device() is called. +So make sure to not remove them twice. + +What complicates the issue is that qeth_l2_probe_device() and +qeth_l2_remove_device() is also called on a device when its +layer2 attribute changes (ie. its layer mode is switched). +For early-setup devices this wouldn't work properly - we wouldn't +remove the l2-specific attributes when switching to L3. +But switching the layer mode doesn't actually make any sense; +we already decided that the device can only operate in L2! +So just refuse to switch the layer mode on such devices. Note that +OSN doesn't have a layer2 attribute, so we only need to special-case +OSM. + +Based on an initial patch by Ursula Braun. + +Fixes: b4d72c08b358 ("qeth: bridgeport support - basic control") +Signed-off-by: Julian Wiedmann +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/s390/net/qeth_core.h | 4 ++++ + drivers/s390/net/qeth_core_main.c | 17 +++++++++-------- + drivers/s390/net/qeth_core_sys.c | 22 ++++++++++++++-------- + drivers/s390/net/qeth_l2.h | 2 ++ + drivers/s390/net/qeth_l2_main.c | 17 +++++++++++++---- + drivers/s390/net/qeth_l2_sys.c | 8 ++++++++ + drivers/s390/net/qeth_l3_main.c | 1 + + 7 files changed, 51 insertions(+), 20 deletions(-) + +--- a/drivers/s390/net/qeth_core.h ++++ b/drivers/s390/net/qeth_core.h +@@ -714,6 +714,7 @@ enum qeth_discipline_id { + }; + + struct qeth_discipline { ++ const struct device_type *devtype; + void (*start_poll)(struct ccw_device *, int, unsigned long); + qdio_handler_t *input_handler; + qdio_handler_t *output_handler; +@@ -889,6 +890,9 @@ extern struct qeth_discipline qeth_l2_di + extern struct qeth_discipline qeth_l3_discipline; + extern const struct attribute_group *qeth_generic_attr_groups[]; + extern const struct attribute_group *qeth_osn_attr_groups[]; ++extern const struct attribute_group qeth_device_attr_group; ++extern const struct attribute_group qeth_device_blkt_group; ++extern const struct device_type qeth_generic_devtype; + extern struct workqueue_struct *qeth_wq; + + int qeth_card_hw_is_reachable(struct qeth_card *); +--- a/drivers/s390/net/qeth_core_main.c ++++ b/drivers/s390/net/qeth_core_main.c +@@ -5460,10 +5460,12 @@ void qeth_core_free_discipline(struct qe + card->discipline = NULL; + } + +-static const struct device_type qeth_generic_devtype = { ++const struct device_type qeth_generic_devtype = { + .name = "qeth_generic", + .groups = qeth_generic_attr_groups, + }; ++EXPORT_SYMBOL_GPL(qeth_generic_devtype); ++ + static const struct device_type qeth_osn_devtype = { + .name = "qeth_osn", + .groups = qeth_osn_attr_groups, +@@ -5589,23 +5591,22 @@ static int qeth_core_probe_device(struct + goto err_card; + } + +- if (card->info.type == QETH_CARD_TYPE_OSN) +- gdev->dev.type = &qeth_osn_devtype; +- else +- gdev->dev.type = &qeth_generic_devtype; +- + switch (card->info.type) { + case QETH_CARD_TYPE_OSN: + case QETH_CARD_TYPE_OSM: + rc = qeth_core_load_discipline(card, QETH_DISCIPLINE_LAYER2); + if (rc) + goto err_card; ++ ++ gdev->dev.type = (card->info.type != QETH_CARD_TYPE_OSN) ++ ? card->discipline->devtype ++ : &qeth_osn_devtype; + rc = card->discipline->setup(card->gdev); + if (rc) + goto err_disc; +- case QETH_CARD_TYPE_OSD: +- case QETH_CARD_TYPE_OSX: ++ break; + default: ++ gdev->dev.type = &qeth_generic_devtype; + break; + } + +--- a/drivers/s390/net/qeth_core_sys.c ++++ b/drivers/s390/net/qeth_core_sys.c +@@ -413,12 +413,16 @@ static ssize_t qeth_dev_layer2_store(str + + if (card->options.layer2 == newdis) + goto out; +- else { +- card->info.mac_bits = 0; +- if (card->discipline) { +- card->discipline->remove(card->gdev); +- qeth_core_free_discipline(card); +- } ++ if (card->info.type == QETH_CARD_TYPE_OSM) { ++ /* fixed layer, can't switch */ ++ rc = -EOPNOTSUPP; ++ goto out; ++ } ++ ++ card->info.mac_bits = 0; ++ if (card->discipline) { ++ card->discipline->remove(card->gdev); ++ qeth_core_free_discipline(card); + } + + rc = qeth_core_load_discipline(card, newdis); +@@ -705,10 +709,11 @@ static struct attribute *qeth_blkt_devic + &dev_attr_inter_jumbo.attr, + NULL, + }; +-static struct attribute_group qeth_device_blkt_group = { ++const struct attribute_group qeth_device_blkt_group = { + .name = "blkt", + .attrs = qeth_blkt_device_attrs, + }; ++EXPORT_SYMBOL_GPL(qeth_device_blkt_group); + + static struct attribute *qeth_device_attrs[] = { + &dev_attr_state.attr, +@@ -728,9 +733,10 @@ static struct attribute *qeth_device_att + &dev_attr_switch_attrs.attr, + NULL, + }; +-static struct attribute_group qeth_device_attr_group = { ++const struct attribute_group qeth_device_attr_group = { + .attrs = qeth_device_attrs, + }; ++EXPORT_SYMBOL_GPL(qeth_device_attr_group); + + const struct attribute_group *qeth_generic_attr_groups[] = { + &qeth_device_attr_group, +--- a/drivers/s390/net/qeth_l2.h ++++ b/drivers/s390/net/qeth_l2.h +@@ -8,6 +8,8 @@ + + #include "qeth_core.h" + ++extern const struct attribute_group *qeth_l2_attr_groups[]; ++ + int qeth_l2_create_device_attributes(struct device *); + void qeth_l2_remove_device_attributes(struct device *); + void qeth_l2_setup_bridgeport_attrs(struct qeth_card *card); +--- a/drivers/s390/net/qeth_l2_main.c ++++ b/drivers/s390/net/qeth_l2_main.c +@@ -1006,14 +1006,21 @@ static int qeth_l2_stop(struct net_devic + return 0; + } + ++static const struct device_type qeth_l2_devtype = { ++ .name = "qeth_layer2", ++ .groups = qeth_l2_attr_groups, ++}; ++ + static int qeth_l2_probe_device(struct ccwgroup_device *gdev) + { + struct qeth_card *card = dev_get_drvdata(&gdev->dev); + int rc; + +- rc = qeth_l2_create_device_attributes(&gdev->dev); +- if (rc) +- return rc; ++ if (gdev->dev.type == &qeth_generic_devtype) { ++ rc = qeth_l2_create_device_attributes(&gdev->dev); ++ if (rc) ++ return rc; ++ } + INIT_LIST_HEAD(&card->vid_list); + hash_init(card->mac_htable); + card->options.layer2 = 1; +@@ -1025,7 +1032,8 @@ static void qeth_l2_remove_device(struct + { + struct qeth_card *card = dev_get_drvdata(&cgdev->dev); + +- qeth_l2_remove_device_attributes(&cgdev->dev); ++ if (cgdev->dev.type == &qeth_generic_devtype) ++ qeth_l2_remove_device_attributes(&cgdev->dev); + qeth_set_allowed_threads(card, 0, 1); + wait_event(card->wait_q, qeth_threads_running(card, 0xffffffff) == 0); + +@@ -1409,6 +1417,7 @@ static int qeth_l2_control_event(struct + } + + struct qeth_discipline qeth_l2_discipline = { ++ .devtype = &qeth_l2_devtype, + .start_poll = qeth_qdio_start_poll, + .input_handler = (qdio_handler_t *) qeth_qdio_input_handler, + .output_handler = (qdio_handler_t *) qeth_qdio_output_handler, +--- a/drivers/s390/net/qeth_l2_sys.c ++++ b/drivers/s390/net/qeth_l2_sys.c +@@ -272,3 +272,11 @@ void qeth_l2_setup_bridgeport_attrs(stru + } else + qeth_bridgeport_an_set(card, 0); + } ++ ++const struct attribute_group *qeth_l2_attr_groups[] = { ++ &qeth_device_attr_group, ++ &qeth_device_blkt_group, ++ /* l2 specific, see l2_{create,remove}_device_attributes(): */ ++ &qeth_l2_bridgeport_attr_group, ++ NULL, ++}; +--- a/drivers/s390/net/qeth_l3_main.c ++++ b/drivers/s390/net/qeth_l3_main.c +@@ -3434,6 +3434,7 @@ static int qeth_l3_control_event(struct + } + + struct qeth_discipline qeth_l3_discipline = { ++ .devtype = &qeth_generic_devtype, + .start_poll = qeth_qdio_start_poll, + .input_handler = (qdio_handler_t *) qeth_qdio_input_handler, + .output_handler = (qdio_handler_t *) qeth_qdio_output_handler, diff --git a/queue-4.11/sctp-do-not-inherit-ipv6_-mc-ac-fl-_list-from-parent.patch b/queue-4.11/sctp-do-not-inherit-ipv6_-mc-ac-fl-_list-from-parent.patch new file mode 100644 index 00000000000..49e99ba4bb7 --- /dev/null +++ b/queue-4.11/sctp-do-not-inherit-ipv6_-mc-ac-fl-_list-from-parent.patch @@ -0,0 +1,34 @@ +From foo@baz Wed May 31 09:13:10 JST 2017 +From: Eric Dumazet +Date: Wed, 17 May 2017 07:16:40 -0700 +Subject: sctp: do not inherit ipv6_{mc|ac|fl}_list from parent + +From: Eric Dumazet + + +[ Upstream commit fdcee2cbb8438702ea1b328fb6e0ac5e9a40c7f8 ] + +SCTP needs fixes similar to 83eaddab4378 ("ipv6/dccp: do not inherit +ipv6_mc_list from parent"), otherwise bad things can happen. + +Signed-off-by: Eric Dumazet +Reported-by: Andrey Konovalov +Tested-by: Andrey Konovalov +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sctp/ipv6.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/net/sctp/ipv6.c ++++ b/net/sctp/ipv6.c +@@ -677,6 +677,9 @@ static struct sock *sctp_v6_create_accep + newnp = inet6_sk(newsk); + + memcpy(newnp, np, sizeof(struct ipv6_pinfo)); ++ newnp->ipv6_mc_list = NULL; ++ newnp->ipv6_ac_list = NULL; ++ newnp->ipv6_fl_list = NULL; + + rcu_read_lock(); + opt = rcu_dereference(np->opt); diff --git a/queue-4.11/sctp-fix-icmp-processing-if-skb-is-non-linear.patch b/queue-4.11/sctp-fix-icmp-processing-if-skb-is-non-linear.patch new file mode 100644 index 00000000000..6cfb4c1d1c8 --- /dev/null +++ b/queue-4.11/sctp-fix-icmp-processing-if-skb-is-non-linear.patch @@ -0,0 +1,72 @@ +From foo@baz Wed May 31 09:13:10 JST 2017 +From: Davide Caratti +Date: Thu, 25 May 2017 19:14:56 +0200 +Subject: sctp: fix ICMP processing if skb is non-linear + +From: Davide Caratti + + +[ Upstream commit 804ec7ebe8ea003999ca8d1bfc499edc6a9e07df ] + +sometimes ICMP replies to INIT chunks are ignored by the client, even if +the encapsulated SCTP headers match an open socket. This happens when the +ICMP packet is carried by a paged skb: use skb_header_pointer() to read +packet contents beyond the SCTP header, so that chunk header and initiate +tag are validated correctly. + +v2: +- don't use skb_header_pointer() to read the transport header, since + icmp_socket_deliver() already puts these 8 bytes in the linear area. +- change commit message to make specific reference to INIT chunks. + +Signed-off-by: Davide Caratti +Acked-by: Marcelo Ricardo Leitner +Acked-by: Vlad Yasevich +Reviewed-by: Xin Long +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sctp/input.c | 16 +++++++++------- + 1 file changed, 9 insertions(+), 7 deletions(-) + +--- a/net/sctp/input.c ++++ b/net/sctp/input.c +@@ -473,15 +473,14 @@ struct sock *sctp_err_lookup(struct net + struct sctp_association **app, + struct sctp_transport **tpp) + { ++ struct sctp_init_chunk *chunkhdr, _chunkhdr; + union sctp_addr saddr; + union sctp_addr daddr; + struct sctp_af *af; + struct sock *sk = NULL; + struct sctp_association *asoc; + struct sctp_transport *transport = NULL; +- struct sctp_init_chunk *chunkhdr; + __u32 vtag = ntohl(sctphdr->vtag); +- int len = skb->len - ((void *)sctphdr - (void *)skb->data); + + *app = NULL; *tpp = NULL; + +@@ -516,13 +515,16 @@ struct sock *sctp_err_lookup(struct net + * discard the packet. + */ + if (vtag == 0) { +- chunkhdr = (void *)sctphdr + sizeof(struct sctphdr); +- if (len < sizeof(struct sctphdr) + sizeof(sctp_chunkhdr_t) +- + sizeof(__be32) || ++ /* chunk header + first 4 octects of init header */ ++ chunkhdr = skb_header_pointer(skb, skb_transport_offset(skb) + ++ sizeof(struct sctphdr), ++ sizeof(struct sctp_chunkhdr) + ++ sizeof(__be32), &_chunkhdr); ++ if (!chunkhdr || + chunkhdr->chunk_hdr.type != SCTP_CID_INIT || +- ntohl(chunkhdr->init_hdr.init_tag) != asoc->c.my_vtag) { ++ ntohl(chunkhdr->init_hdr.init_tag) != asoc->c.my_vtag) + goto out; +- } ++ + } else if (vtag != asoc->c.peer_vtag) { + goto out; + } diff --git a/queue-4.11/sctp-fix-src-address-selection-if-using-secondary-addresses-for-ipv6.patch b/queue-4.11/sctp-fix-src-address-selection-if-using-secondary-addresses-for-ipv6.patch new file mode 100644 index 00000000000..b322ec3d4ab --- /dev/null +++ b/queue-4.11/sctp-fix-src-address-selection-if-using-secondary-addresses-for-ipv6.patch @@ -0,0 +1,119 @@ +From foo@baz Wed May 31 09:13:10 JST 2017 +From: Xin Long +Date: Fri, 12 May 2017 14:39:52 +0800 +Subject: sctp: fix src address selection if using secondary addresses for ipv6 + +From: Xin Long + + +[ Upstream commit dbc2b5e9a09e9a6664679a667ff81cff6e5f2641 ] + +Commit 0ca50d12fe46 ("sctp: fix src address selection if using secondary +addresses") has fixed a src address selection issue when using secondary +addresses for ipv4. + +Now sctp ipv6 also has the similar issue. When using a secondary address, +sctp_v6_get_dst tries to choose the saddr which has the most same bits +with the daddr by sctp_v6_addr_match_len. It may make some cases not work +as expected. + +hostA: + [1] fd21:356b:459a:cf10::11 (eth1) + [2] fd21:356b:459a:cf20::11 (eth2) + +hostB: + [a] fd21:356b:459a:cf30::2 (eth1) + [b] fd21:356b:459a:cf40::2 (eth2) + +route from hostA to hostB: + fd21:356b:459a:cf30::/64 dev eth1 metric 1024 mtu 1500 + +The expected path should be: + fd21:356b:459a:cf10::11 <-> fd21:356b:459a:cf30::2 +But addr[2] matches addr[a] more bits than addr[1] does, according to +sctp_v6_addr_match_len. It causes the path to be: + fd21:356b:459a:cf20::11 <-> fd21:356b:459a:cf30::2 + +This patch is to fix it with the same way as Marcelo's fix for sctp ipv4. +As no ip_dev_find for ipv6, this patch is to use ipv6_chk_addr to check +if the saddr is in a dev instead. + +Note that for backwards compatibility, it will still do the addr_match_len +check here when no optimal is found. + +Reported-by: Patrick Talbert +Signed-off-by: Xin Long +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sctp/ipv6.c | 46 +++++++++++++++++++++++++++++----------------- + 1 file changed, 29 insertions(+), 17 deletions(-) + +--- a/net/sctp/ipv6.c ++++ b/net/sctp/ipv6.c +@@ -240,12 +240,10 @@ static void sctp_v6_get_dst(struct sctp_ + struct sctp_bind_addr *bp; + struct ipv6_pinfo *np = inet6_sk(sk); + struct sctp_sockaddr_entry *laddr; +- union sctp_addr *baddr = NULL; + union sctp_addr *daddr = &t->ipaddr; + union sctp_addr dst_saddr; + struct in6_addr *final_p, final; + __u8 matchlen = 0; +- __u8 bmatchlen; + sctp_scope_t scope; + + memset(fl6, 0, sizeof(struct flowi6)); +@@ -312,23 +310,37 @@ static void sctp_v6_get_dst(struct sctp_ + */ + rcu_read_lock(); + list_for_each_entry_rcu(laddr, &bp->address_list, list) { +- if (!laddr->valid) ++ struct dst_entry *bdst; ++ __u8 bmatchlen; ++ ++ if (!laddr->valid || ++ laddr->state != SCTP_ADDR_SRC || ++ laddr->a.sa.sa_family != AF_INET6 || ++ scope > sctp_scope(&laddr->a)) + continue; +- if ((laddr->state == SCTP_ADDR_SRC) && +- (laddr->a.sa.sa_family == AF_INET6) && +- (scope <= sctp_scope(&laddr->a))) { +- bmatchlen = sctp_v6_addr_match_len(daddr, &laddr->a); +- if (!baddr || (matchlen < bmatchlen)) { +- baddr = &laddr->a; +- matchlen = bmatchlen; +- } +- } +- } +- if (baddr) { +- fl6->saddr = baddr->v6.sin6_addr; +- fl6->fl6_sport = baddr->v6.sin6_port; ++ ++ fl6->saddr = laddr->a.v6.sin6_addr; ++ fl6->fl6_sport = laddr->a.v6.sin6_port; + final_p = fl6_update_dst(fl6, rcu_dereference(np->opt), &final); +- dst = ip6_dst_lookup_flow(sk, fl6, final_p); ++ bdst = ip6_dst_lookup_flow(sk, fl6, final_p); ++ ++ if (!IS_ERR(bdst) && ++ ipv6_chk_addr(dev_net(bdst->dev), ++ &laddr->a.v6.sin6_addr, bdst->dev, 1)) { ++ if (!IS_ERR_OR_NULL(dst)) ++ dst_release(dst); ++ dst = bdst; ++ break; ++ } ++ ++ bmatchlen = sctp_v6_addr_match_len(daddr, &laddr->a); ++ if (matchlen > bmatchlen) ++ continue; ++ ++ if (!IS_ERR_OR_NULL(dst)) ++ dst_release(dst); ++ dst = bdst; ++ matchlen = bmatchlen; + } + rcu_read_unlock(); + diff --git a/queue-4.11/series b/queue-4.11/series new file mode 100644 index 00000000000..38a534adfae --- /dev/null +++ b/queue-4.11/series @@ -0,0 +1,41 @@ +dccp-tcp-do-not-inherit-mc_list-from-parent.patch +driver-vrf-fix-one-possible-use-after-free-issue.patch +ipv6-dccp-do-not-inherit-ipv6_mc_list-from-parent.patch +s390-qeth-handle-sysfs-error-during-initialization.patch +s390-qeth-unbreak-osm-and-osn-support.patch +s390-qeth-avoid-null-pointer-dereference-on-osn.patch +s390-qeth-add-missing-hash-table-initializations.patch +bpf-arm64-fix-faulty-emission-of-map-access-in-tail-calls.patch +netem-fix-skb_orphan_partial.patch +net-fix-compile-error-in-skb_orphan_partial.patch +tcp-avoid-fragmenting-peculiar-skbs-in-sack.patch +tipc-make-macro-tipc_wait_for_cond-smp-safe.patch +sctp-fix-src-address-selection-if-using-secondary-addresses-for-ipv6.patch +sctp-do-not-inherit-ipv6_-mc-ac-fl-_list-from-parent.patch +net-packet-fix-missing-net_device-reference-release.patch +net-mlx5e-use-the-correct-pause-values-for-ethtool-advertising.patch +net-mlx5e-fix-ethtool-pause-support-and-advertise-reporting.patch +tcp-eliminate-negative-reordering-in-tcp_clean_rtx_queue.patch +smc-switch-to-usage-of-ib_pd_unsafe_global_rkey.patch +net-smc-add-warning-about-remote-memory-exposure.patch +net-improve-handling-of-failures-on-link-and-route-dumps.patch +ipv6-prevent-overrun-when-parsing-v6-header-options.patch +ipv6-check-ip6_find_1stfragopt-return-value-properly.patch +bridge-netlink-check-vlan_default_pvid-range.patch +qmi_wwan-add-another-lenovo-em74xx-device-id.patch +bridge-start-hello_timer-when-enabling-kernel_stp-in-br_stp_start.patch +ipv6-fix-out-of-bound-writes-in-__ip6_append_data.patch +bonding-fix-accounting-of-active-ports-in-3ad.patch +net-mlx5-avoid-using-pending-command-interface-slots.patch +net-phy-marvell-limit-errata-to-88m1101.patch +vlan-fix-tcp-checksum-offloads-in-q-in-q-vlans.patch +be2net-fix-offload-features-for-q-in-q-packets.patch +virtio-net-enable-tso-checksum-offloads-for-q-in-q-vlans.patch +geneve-fix-fill_info-when-using-collect_metadata.patch +tcp-avoid-fastopen-api-to-be-used-on-af_unspec.patch +sctp-fix-icmp-processing-if-skb-is-non-linear.patch +ip6_tunnel-ip6_gre-fix-setting-of-dscp-on-encapsulated-packets.patch +ipv4-add-reference-counting-to-metrics.patch +bpf-add-bpf_clone_redirect-to-bpf_helper_changes_pkt_data.patch +bpf-fix-wrong-exposure-of-map_flags-into-fdinfo-for-lpm.patch +bpf-adjust-verifier-heuristics.patch diff --git a/queue-4.11/smc-switch-to-usage-of-ib_pd_unsafe_global_rkey.patch b/queue-4.11/smc-switch-to-usage-of-ib_pd_unsafe_global_rkey.patch new file mode 100644 index 00000000000..72dff2254b5 --- /dev/null +++ b/queue-4.11/smc-switch-to-usage-of-ib_pd_unsafe_global_rkey.patch @@ -0,0 +1,141 @@ +From foo@baz Wed May 31 09:13:10 JST 2017 +From: Ursula Braun +Date: Mon, 15 May 2017 17:33:37 +0200 +Subject: smc: switch to usage of IB_PD_UNSAFE_GLOBAL_RKEY + +From: Ursula Braun + + +[ Upstream commit 263eec9b2a82e8697d064709414914b5b10ac538 ] + +Currently, SMC enables remote access to physical memory when a user +has successfully configured and established an SMC-connection until ten +minutes after the last SMC connection is closed. Because this is considered +a security risk, drivers are supposed to use IB_PD_UNSAFE_GLOBAL_RKEY in +such a case. + +This patch changes the current SMC code to use IB_PD_UNSAFE_GLOBAL_RKEY. +This improves user awareness, but does not remove the security risk itself. + +Signed-off-by: Ursula Braun +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/smc/smc_clc.c | 4 ++-- + net/smc/smc_core.c | 16 +++------------- + net/smc/smc_core.h | 2 +- + net/smc/smc_ib.c | 21 ++------------------- + net/smc/smc_ib.h | 2 -- + 5 files changed, 8 insertions(+), 37 deletions(-) + +--- a/net/smc/smc_clc.c ++++ b/net/smc/smc_clc.c +@@ -204,7 +204,7 @@ int smc_clc_send_confirm(struct smc_sock + memcpy(&cclc.lcl.mac, &link->smcibdev->mac[link->ibport - 1], ETH_ALEN); + hton24(cclc.qpn, link->roce_qp->qp_num); + cclc.rmb_rkey = +- htonl(conn->rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey); ++ htonl(conn->rmb_desc->rkey[SMC_SINGLE_LINK]); + cclc.conn_idx = 1; /* for now: 1 RMB = 1 RMBE */ + cclc.rmbe_alert_token = htonl(conn->alert_token_local); + cclc.qp_mtu = min(link->path_mtu, link->peer_mtu); +@@ -256,7 +256,7 @@ int smc_clc_send_accept(struct smc_sock + memcpy(&aclc.lcl.mac, link->smcibdev->mac[link->ibport - 1], ETH_ALEN); + hton24(aclc.qpn, link->roce_qp->qp_num); + aclc.rmb_rkey = +- htonl(conn->rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey); ++ htonl(conn->rmb_desc->rkey[SMC_SINGLE_LINK]); + aclc.conn_idx = 1; /* as long as 1 RMB = 1 RMBE */ + aclc.rmbe_alert_token = htonl(conn->alert_token_local); + aclc.qp_mtu = link->path_mtu; +--- a/net/smc/smc_core.c ++++ b/net/smc/smc_core.c +@@ -613,19 +613,8 @@ int smc_rmb_create(struct smc_sock *smc) + rmb_desc = NULL; + continue; /* if mapping failed, try smaller one */ + } +- rc = smc_ib_get_memory_region(lgr->lnk[SMC_SINGLE_LINK].roce_pd, +- IB_ACCESS_REMOTE_WRITE | +- IB_ACCESS_LOCAL_WRITE, +- &rmb_desc->mr_rx[SMC_SINGLE_LINK]); +- if (rc) { +- smc_ib_buf_unmap(lgr->lnk[SMC_SINGLE_LINK].smcibdev, +- tmp_bufsize, rmb_desc, +- DMA_FROM_DEVICE); +- kfree(rmb_desc->cpu_addr); +- kfree(rmb_desc); +- rmb_desc = NULL; +- continue; +- } ++ rmb_desc->rkey[SMC_SINGLE_LINK] = ++ lgr->lnk[SMC_SINGLE_LINK].roce_pd->unsafe_global_rkey; + rmb_desc->used = 1; + write_lock_bh(&lgr->rmbs_lock); + list_add(&rmb_desc->list, +@@ -668,6 +657,7 @@ int smc_rmb_rtoken_handling(struct smc_c + + for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) { + if ((lgr->rtokens[i][SMC_SINGLE_LINK].rkey == rkey) && ++ (lgr->rtokens[i][SMC_SINGLE_LINK].dma_addr == dma_addr) && + test_bit(i, lgr->rtokens_used_mask)) { + conn->rtoken_idx = i; + return 0; +--- a/net/smc/smc_core.h ++++ b/net/smc/smc_core.h +@@ -93,7 +93,7 @@ struct smc_buf_desc { + u64 dma_addr[SMC_LINKS_PER_LGR_MAX]; + /* mapped address of buffer */ + void *cpu_addr; /* virtual address of buffer */ +- struct ib_mr *mr_rx[SMC_LINKS_PER_LGR_MAX]; ++ u32 rkey[SMC_LINKS_PER_LGR_MAX]; + /* for rmb only: + * rkey provided to peer + */ +--- a/net/smc/smc_ib.c ++++ b/net/smc/smc_ib.c +@@ -37,24 +37,6 @@ u8 local_systemid[SMC_SYSTEMID_LEN] = SM + * identifier + */ + +-int smc_ib_get_memory_region(struct ib_pd *pd, int access_flags, +- struct ib_mr **mr) +-{ +- int rc; +- +- if (*mr) +- return 0; /* already done */ +- +- /* obtain unique key - +- * next invocation of get_dma_mr returns a different key! +- */ +- *mr = pd->device->get_dma_mr(pd, access_flags); +- rc = PTR_ERR_OR_ZERO(*mr); +- if (IS_ERR(*mr)) +- *mr = NULL; +- return rc; +-} +- + static int smc_ib_modify_qp_init(struct smc_link *lnk) + { + struct ib_qp_attr qp_attr; +@@ -213,7 +195,8 @@ int smc_ib_create_protection_domain(stru + { + int rc; + +- lnk->roce_pd = ib_alloc_pd(lnk->smcibdev->ibdev, 0); ++ lnk->roce_pd = ib_alloc_pd(lnk->smcibdev->ibdev, ++ IB_PD_UNSAFE_GLOBAL_RKEY); + rc = PTR_ERR_OR_ZERO(lnk->roce_pd); + if (IS_ERR(lnk->roce_pd)) + lnk->roce_pd = NULL; +--- a/net/smc/smc_ib.h ++++ b/net/smc/smc_ib.h +@@ -60,8 +60,6 @@ void smc_ib_dealloc_protection_domain(st + int smc_ib_create_protection_domain(struct smc_link *lnk); + void smc_ib_destroy_queue_pair(struct smc_link *lnk); + int smc_ib_create_queue_pair(struct smc_link *lnk); +-int smc_ib_get_memory_region(struct ib_pd *pd, int access_flags, +- struct ib_mr **mr); + int smc_ib_ready_link(struct smc_link *lnk); + int smc_ib_modify_qp_rts(struct smc_link *lnk); + int smc_ib_modify_qp_reset(struct smc_link *lnk); diff --git a/queue-4.11/tcp-avoid-fastopen-api-to-be-used-on-af_unspec.patch b/queue-4.11/tcp-avoid-fastopen-api-to-be-used-on-af_unspec.patch new file mode 100644 index 00000000000..bdcc9df3dda --- /dev/null +++ b/queue-4.11/tcp-avoid-fastopen-api-to-be-used-on-af_unspec.patch @@ -0,0 +1,88 @@ +From foo@baz Wed May 31 09:13:10 JST 2017 +From: Wei Wang +Date: Wed, 24 May 2017 09:59:31 -0700 +Subject: tcp: avoid fastopen API to be used on AF_UNSPEC + +From: Wei Wang + + +[ Upstream commit ba615f675281d76fd19aa03558777f81fb6b6084 ] + +Fastopen API should be used to perform fastopen operations on the TCP +socket. It does not make sense to use fastopen API to perform disconnect +by calling it with AF_UNSPEC. The fastopen data path is also prone to +race conditions and bugs when using with AF_UNSPEC. + +One issue reported and analyzed by Vegard Nossum is as follows: ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Thread A: Thread B: +------------------------------------------------------------------------ +sendto() + - tcp_sendmsg() + - sk_stream_memory_free() = 0 + - goto wait_for_sndbuf + - sk_stream_wait_memory() + - sk_wait_event() // sleep + | sendto(flags=MSG_FASTOPEN, dest_addr=AF_UNSPEC) + | - tcp_sendmsg() + | - tcp_sendmsg_fastopen() + | - __inet_stream_connect() + | - tcp_disconnect() //because of AF_UNSPEC + | - tcp_transmit_skb()// send RST + | - return 0; // no reconnect! + | - sk_stream_wait_connect() + | - sock_error() + | - xchg(&sk->sk_err, 0) + | - return -ECONNRESET + - ... // wake up, see sk->sk_err == 0 + - skb_entail() on TCP_CLOSE socket + +If the connection is reopened then we will send a brand new SYN packet +after thread A has already queued a buffer. At this point I think the +socket internal state (sequence numbers etc.) becomes messed up. + +When the new connection is closed, the FIN-ACK is rejected because the +sequence number is outside the window. The other side tries to +retransmit, +but __tcp_retransmit_skb() calls tcp_trim_head() on an empty skb which +corrupts the skb data length and hits a BUG() in copy_and_csum_bits(). ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + +Hence, this patch adds a check for AF_UNSPEC in the fastopen data path +and return EOPNOTSUPP to user if such case happens. + +Fixes: cf60af03ca4e7 ("tcp: Fast Open client - sendmsg(MSG_FASTOPEN)") +Reported-by: Vegard Nossum +Signed-off-by: Wei Wang +Signed-off-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +--- a/net/ipv4/tcp.c ++++ b/net/ipv4/tcp.c +@@ -1084,9 +1084,12 @@ static int tcp_sendmsg_fastopen(struct s + { + struct tcp_sock *tp = tcp_sk(sk); + struct inet_sock *inet = inet_sk(sk); ++ struct sockaddr *uaddr = msg->msg_name; + int err, flags; + +- if (!(sysctl_tcp_fastopen & TFO_CLIENT_ENABLE)) ++ if (!(sysctl_tcp_fastopen & TFO_CLIENT_ENABLE) || ++ (uaddr && msg->msg_namelen >= sizeof(uaddr->sa_family) && ++ uaddr->sa_family == AF_UNSPEC)) + return -EOPNOTSUPP; + if (tp->fastopen_req) + return -EALREADY; /* Another Fast Open is in progress */ +@@ -1108,7 +1111,7 @@ static int tcp_sendmsg_fastopen(struct s + } + } + flags = (msg->msg_flags & MSG_DONTWAIT) ? O_NONBLOCK : 0; +- err = __inet_stream_connect(sk->sk_socket, msg->msg_name, ++ err = __inet_stream_connect(sk->sk_socket, uaddr, + msg->msg_namelen, flags, 1); + /* fastopen_req could already be freed in __inet_stream_connect + * if the connection times out or gets rst diff --git a/queue-4.11/tcp-avoid-fragmenting-peculiar-skbs-in-sack.patch b/queue-4.11/tcp-avoid-fragmenting-peculiar-skbs-in-sack.patch new file mode 100644 index 00000000000..2ca486014a8 --- /dev/null +++ b/queue-4.11/tcp-avoid-fragmenting-peculiar-skbs-in-sack.patch @@ -0,0 +1,55 @@ +From foo@baz Wed May 31 09:13:10 JST 2017 +From: Yuchung Cheng +Date: Wed, 10 May 2017 17:01:27 -0700 +Subject: tcp: avoid fragmenting peculiar skbs in SACK + +From: Yuchung Cheng + + +[ Upstream commit b451e5d24ba6687c6f0e7319c727a709a1846c06 ] + +This patch fixes a bug in splitting an SKB during SACK +processing. Specifically if an skb contains multiple +packets and is only partially sacked in the higher sequences, +tcp_match_sack_to_skb() splits the skb and marks the second fragment +as SACKed. + +The current code further attempts rounding up the first fragment +to MSS boundaries. But it misses a boundary condition when the +rounded-up fragment size (pkt_len) is exactly skb size. Spliting +such an skb is pointless and causses a kernel warning and aborts +the SACK processing. This patch universally checks such over-split +before calling tcp_fragment to prevent these unnecessary warnings. + +Fixes: adb92db857ee ("tcp: Make SACK code to split only at mss boundaries") +Signed-off-by: Yuchung Cheng +Signed-off-by: Eric Dumazet +Signed-off-by: Soheil Hassas Yeganeh +Acked-by: Neal Cardwell +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp_input.c | 9 +++++---- + 1 file changed, 5 insertions(+), 4 deletions(-) + +--- a/net/ipv4/tcp_input.c ++++ b/net/ipv4/tcp_input.c +@@ -1174,13 +1174,14 @@ static int tcp_match_skb_to_sack(struct + */ + if (pkt_len > mss) { + unsigned int new_len = (pkt_len / mss) * mss; +- if (!in_sack && new_len < pkt_len) { ++ if (!in_sack && new_len < pkt_len) + new_len += mss; +- if (new_len >= skb->len) +- return 0; +- } + pkt_len = new_len; + } ++ ++ if (pkt_len >= skb->len && !in_sack) ++ return 0; ++ + err = tcp_fragment(sk, skb, pkt_len, mss, GFP_ATOMIC); + if (err < 0) + return err; diff --git a/queue-4.11/tcp-eliminate-negative-reordering-in-tcp_clean_rtx_queue.patch b/queue-4.11/tcp-eliminate-negative-reordering-in-tcp_clean_rtx_queue.patch new file mode 100644 index 00000000000..4a37562f02b --- /dev/null +++ b/queue-4.11/tcp-eliminate-negative-reordering-in-tcp_clean_rtx_queue.patch @@ -0,0 +1,44 @@ +From foo@baz Wed May 31 09:13:10 JST 2017 +From: Soheil Hassas Yeganeh +Date: Mon, 15 May 2017 17:05:47 -0400 +Subject: tcp: eliminate negative reordering in tcp_clean_rtx_queue + +From: Soheil Hassas Yeganeh + + +[ Upstream commit bafbb9c73241760023d8981191ddd30bb1c6dbac ] + +tcp_ack() can call tcp_fragment() which may dededuct the +value tp->fackets_out when MSS changes. When prior_fackets +is larger than tp->fackets_out, tcp_clean_rtx_queue() can +invoke tcp_update_reordering() with negative values. This +results in absurd tp->reodering values higher than +sysctl_tcp_max_reordering. + +Note that tcp_update_reordering indeeds sets tp->reordering +to min(sysctl_tcp_max_reordering, metric), but because +the comparison is signed, a negative metric always wins. + +Fixes: c7caf8d3ed7a ("[TCP]: Fix reord detection due to snd_una covered holes") +Reported-by: Rebecca Isaacs +Signed-off-by: Soheil Hassas Yeganeh +Signed-off-by: Neal Cardwell +Signed-off-by: Yuchung Cheng +Signed-off-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp_input.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/ipv4/tcp_input.c ++++ b/net/ipv4/tcp_input.c +@@ -3189,7 +3189,7 @@ static int tcp_clean_rtx_queue(struct so + int delta; + + /* Non-retransmitted hole got filled? That's reordering */ +- if (reord < prior_fackets) ++ if (reord < prior_fackets && reord <= tp->fackets_out) + tcp_update_reordering(sk, tp->fackets_out - reord, 0); + + delta = tcp_is_fack(tp) ? pkts_acked : diff --git a/queue-4.11/tipc-make-macro-tipc_wait_for_cond-smp-safe.patch b/queue-4.11/tipc-make-macro-tipc_wait_for_cond-smp-safe.patch new file mode 100644 index 00000000000..835bb8ecb96 --- /dev/null +++ b/queue-4.11/tipc-make-macro-tipc_wait_for_cond-smp-safe.patch @@ -0,0 +1,80 @@ +From foo@baz Wed May 31 09:13:10 JST 2017 +From: Jon Paul Maloy +Date: Thu, 11 May 2017 20:28:15 +0200 +Subject: tipc: make macro tipc_wait_for_cond() smp safe + +From: Jon Paul Maloy + + +[ Upstream commit 844cf763fba654436d3a4279b6a672c196cf1901 ] + +The macro tipc_wait_for_cond() is embedding the macro sk_wait_event() +to fulfil its task. The latter, in turn, is evaluating the stated +condition outside the socket lock context. This is problematic if +the condition is accessing non-trivial data structures which may be +altered by incoming interrupts, as is the case with the cong_links() +linked list, used by socket to keep track of the current set of +congested links. We sometimes see crashes when this list is accessed +by a condition function at the same time as a SOCK_WAKEUP interrupt +is removing an element from the list. + +We fix this by expanding selected parts of sk_wait_event() into the +outer macro, while ensuring that all evaluations of a given condition +are performed under socket lock protection. + +Fixes: commit 365ad353c256 ("tipc: reduce risk of user starvation during link congestion") +Reviewed-by: Parthasarathy Bhuvaragan +Signed-off-by: Jon Maloy +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/tipc/socket.c | 38 +++++++++++++++++++------------------- + 1 file changed, 19 insertions(+), 19 deletions(-) + +--- a/net/tipc/socket.c ++++ b/net/tipc/socket.c +@@ -361,25 +361,25 @@ static int tipc_sk_sock_err(struct socke + return 0; + } + +-#define tipc_wait_for_cond(sock_, timeout_, condition_) \ +-({ \ +- int rc_ = 0; \ +- int done_ = 0; \ +- \ +- while (!(condition_) && !done_) { \ +- struct sock *sk_ = sock->sk; \ +- DEFINE_WAIT_FUNC(wait_, woken_wake_function); \ +- \ +- rc_ = tipc_sk_sock_err(sock_, timeout_); \ +- if (rc_) \ +- break; \ +- prepare_to_wait(sk_sleep(sk_), &wait_, \ +- TASK_INTERRUPTIBLE); \ +- done_ = sk_wait_event(sk_, timeout_, \ +- (condition_), &wait_); \ +- remove_wait_queue(sk_sleep(sk_), &wait_); \ +- } \ +- rc_; \ ++#define tipc_wait_for_cond(sock_, timeo_, condition_) \ ++({ \ ++ struct sock *sk_; \ ++ int rc_; \ ++ \ ++ while ((rc_ = !(condition_))) { \ ++ DEFINE_WAIT_FUNC(wait_, woken_wake_function); \ ++ sk_ = (sock_)->sk; \ ++ rc_ = tipc_sk_sock_err((sock_), timeo_); \ ++ if (rc_) \ ++ break; \ ++ prepare_to_wait(sk_sleep(sk_), &wait_, TASK_INTERRUPTIBLE); \ ++ release_sock(sk_); \ ++ *(timeo_) = wait_woken(&wait_, TASK_INTERRUPTIBLE, *(timeo_)); \ ++ sched_annotate_sleep(); \ ++ lock_sock(sk_); \ ++ remove_wait_queue(sk_sleep(sk_), &wait_); \ ++ } \ ++ rc_; \ + }) + + /** diff --git a/queue-4.11/virtio-net-enable-tso-checksum-offloads-for-q-in-q-vlans.patch b/queue-4.11/virtio-net-enable-tso-checksum-offloads-for-q-in-q-vlans.patch new file mode 100644 index 00000000000..2171164a646 --- /dev/null +++ b/queue-4.11/virtio-net-enable-tso-checksum-offloads-for-q-in-q-vlans.patch @@ -0,0 +1,34 @@ +From foo@baz Wed May 31 09:13:10 JST 2017 +From: Vlad Yasevich +Date: Tue, 23 May 2017 13:38:43 -0400 +Subject: virtio-net: enable TSO/checksum offloads for Q-in-Q vlans + +From: Vlad Yasevich + + +[ Upstream commit 2836b4f224d4fd7d1a2b23c3eecaf0f0ae199a74 ] + +Since virtio does not provide it's own ndo_features_check handler, +TSO, and now checksum offload, are disabled for stacked vlans. +Re-enable the support and let the host take care of it. This +restores/improves Guest-to-Guest performance over Q-in-Q vlans. + +Acked-by: Jason Wang +Acked-by: Michael S. Tsirkin +Signed-off-by: Vladislav Yasevich +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/virtio_net.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/drivers/net/virtio_net.c ++++ b/drivers/net/virtio_net.c +@@ -1894,6 +1894,7 @@ static const struct net_device_ops virtn + .ndo_poll_controller = virtnet_netpoll, + #endif + .ndo_xdp = virtnet_xdp, ++ .ndo_features_check = passthru_features_check, + }; + + static void virtnet_config_changed_work(struct work_struct *work) diff --git a/queue-4.11/vlan-fix-tcp-checksum-offloads-in-q-in-q-vlans.patch b/queue-4.11/vlan-fix-tcp-checksum-offloads-in-q-in-q-vlans.patch new file mode 100644 index 00000000000..86039ff180b --- /dev/null +++ b/queue-4.11/vlan-fix-tcp-checksum-offloads-in-q-in-q-vlans.patch @@ -0,0 +1,71 @@ +From foo@baz Wed May 31 09:13:10 JST 2017 +From: Vlad Yasevich +Date: Tue, 23 May 2017 13:38:41 -0400 +Subject: vlan: Fix tcp checksum offloads in Q-in-Q vlans + +From: Vlad Yasevich + + +[ Upstream commit 35d2f80b07bbe03fb358afb0bdeff7437a7d67ff ] + +It appears that TCP checksum offloading has been broken for +Q-in-Q vlans. The behavior was execerbated by the +series + commit afb0bc972b52 ("Merge branch 'stacked_vlan_tso'") +that that enabled accleleration features on stacked vlans. + +However, event without that series, it is possible to trigger +this issue. It just requires a lot more specialized configuration. + +The root cause is the interaction between how +netdev_intersect_features() works, the features actually set on +the vlan devices and HW having the ability to run checksum with +longer headers. + +The issue starts when netdev_interesect_features() replaces +NETIF_F_HW_CSUM with a combination of NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM, +if the HW advertises IP|IPV6 specific checksums. This happens +for tagged and multi-tagged packets. However, HW that enables +IP|IPV6 checksum offloading doesn't gurantee that packets with +arbitrarily long headers can be checksummed. + +This patch disables IP|IPV6 checksums on the packet for multi-tagged +packets. + +CC: Toshiaki Makita +CC: Michal Kubecek +Signed-off-by: Vladislav Yasevich +Acked-by: Toshiaki Makita +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/if_vlan.h | 18 ++++++++++-------- + 1 file changed, 10 insertions(+), 8 deletions(-) + +--- a/include/linux/if_vlan.h ++++ b/include/linux/if_vlan.h +@@ -614,14 +614,16 @@ static inline bool skb_vlan_tagged_multi + static inline netdev_features_t vlan_features_check(const struct sk_buff *skb, + netdev_features_t features) + { +- if (skb_vlan_tagged_multi(skb)) +- features = netdev_intersect_features(features, +- NETIF_F_SG | +- NETIF_F_HIGHDMA | +- NETIF_F_FRAGLIST | +- NETIF_F_HW_CSUM | +- NETIF_F_HW_VLAN_CTAG_TX | +- NETIF_F_HW_VLAN_STAG_TX); ++ if (skb_vlan_tagged_multi(skb)) { ++ /* In the case of multi-tagged packets, use a direct mask ++ * instead of using netdev_interesect_features(), to make ++ * sure that only devices supporting NETIF_F_HW_CSUM will ++ * have checksum offloading support. ++ */ ++ features &= NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_HW_CSUM | ++ NETIF_F_FRAGLIST | NETIF_F_HW_VLAN_CTAG_TX | ++ NETIF_F_HW_VLAN_STAG_TX; ++ } + + return features; + } diff --git a/queue-4.9/series b/queue-4.9/series new file mode 100644 index 00000000000..b57eea84ea4 --- /dev/null +++ b/queue-4.9/series @@ -0,0 +1,34 @@ +dccp-tcp-do-not-inherit-mc_list-from-parent.patch +driver-vrf-fix-one-possible-use-after-free-issue.patch +ipv6-dccp-do-not-inherit-ipv6_mc_list-from-parent.patch +s390-qeth-handle-sysfs-error-during-initialization.patch +s390-qeth-unbreak-osm-and-osn-support.patch +s390-qeth-avoid-null-pointer-dereference-on-osn.patch +s390-qeth-add-missing-hash-table-initializations.patch +bpf-arm64-fix-faulty-emission-of-map-access-in-tail-calls.patch +netem-fix-skb_orphan_partial.patch +net-fix-compile-error-in-skb_orphan_partial.patch +tcp-avoid-fragmenting-peculiar-skbs-in-sack.patch +sctp-fix-src-address-selection-if-using-secondary-addresses-for-ipv6.patch +sctp-do-not-inherit-ipv6_-mc-ac-fl-_list-from-parent.patch +net-packet-fix-missing-net_device-reference-release.patch +net-mlx5e-use-the-correct-pause-values-for-ethtool-advertising.patch +net-mlx5e-fix-ethtool-pause-support-and-advertise-reporting.patch +tcp-eliminate-negative-reordering-in-tcp_clean_rtx_queue.patch +net-improve-handling-of-failures-on-link-and-route-dumps.patch +ipv6-prevent-overrun-when-parsing-v6-header-options.patch +ipv6-check-ip6_find_1stfragopt-return-value-properly.patch +bridge-netlink-check-vlan_default_pvid-range.patch +qmi_wwan-add-another-lenovo-em74xx-device-id.patch +bridge-start-hello_timer-when-enabling-kernel_stp-in-br_stp_start.patch +ipv6-fix-out-of-bound-writes-in-__ip6_append_data.patch +bonding-fix-accounting-of-active-ports-in-3ad.patch +net-mlx5-avoid-using-pending-command-interface-slots.patch +net-phy-marvell-limit-errata-to-88m1101.patch +vlan-fix-tcp-checksum-offloads-in-q-in-q-vlans.patch +be2net-fix-offload-features-for-q-in-q-packets.patch +virtio-net-enable-tso-checksum-offloads-for-q-in-q-vlans.patch +tcp-avoid-fastopen-api-to-be-used-on-af_unspec.patch +sctp-fix-icmp-processing-if-skb-is-non-linear.patch +ipv4-add-reference-counting-to-metrics.patch +bpf-add-bpf_clone_redirect-to-bpf_helper_changes_pkt_data.patch