From: Greg Kroah-Hartman Date: Mon, 9 Oct 2017 07:33:30 +0000 (+0200) Subject: 4.13-stable patches X-Git-Tag: v3.18.75~46 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=014275a0b63373be36d3eeceff8a030019e0e276;p=thirdparty%2Fkernel%2Fstable-queue.git 4.13-stable patches added patches: 8139too-revisit-napi_complete_done-usage.patch bpf-do-not-disable-enable-bh-in-bpf_map_free_id.patch bpf-fix-bpf_tail_call-x64-jit.patch bpf-one-perf-event-close-won-t-free-bpf-program-attached-by-another-perf-event.patch bpf-verifier-reject-bpf_alu64-bpf_end.patch ip6_gre-ip6gre_tap-device-should-keep-dst.patch ip6_gre-skb_push-ipv6hdr-before-packing-the-header-in-ip6gre_header.patch ip6_tunnel-do-not-allow-loading-ip6_tunnel-if-ipv6-is-disabled-in-cmdline.patch ip6_tunnel-update-mtu-properly-for-arphrd_ether-tunnel-device-in-tx-path.patch ipv4-early-demux-can-return-an-error-code.patch isdn-i4l-fetch-the-ppp_write-buffer-in-one-shot.patch l2tp-fix-l2tp_eth-module-loading.patch l2tp-fix-race-condition-in-l2tp_tunnel_delete.patch mlxsw-spectrum-fix-eeprom-access-in-case-of-sfp-sfp.patch mlxsw-spectrum-prevent-mirred-related-crash-on-removal.patch net-bonding-fix-tlb_dynamic_lb-default-value.patch net-bonding-fix-transmit-load-balancing-in-balance-alb-mode-if-specified-by-sysfs.patch net-change-skb-mac_header-when-generic-xdp-calls-adjust_head.patch net-dsa-fix-network-device-registration-order.patch net-dsa-mv88e6xxx-allow-dsa-and-cpu-ports-in-multiple-vlans.patch net-dsa-mv88e6xxx-lock-mutex-when-freeing-irqs.patch net-emac-fix-napi-poll-list-corruption.patch net-ipv6-fix-regression-of-no-rtm_deladdr-sent-after-dad-failure.patch net-mlx5e-ipoib-fix-access-to-invalid-memory-address.patch net-phy-fix-mask-value-write-on-gmii2rgmii-converter-speed-register.patch net-qcom-emac-specify-the-correct-size-when-mapping-a-dma-buffer.patch net-rtnetlink-fix-info-leak-in-rtm_getstats-call.patch net-sched-cls_matchall-fix-crash-when-used-with-classful-qdisc.patch net-sched-fix-use-after-free-in-tcf_action_destroy-and-tcf_del_walker.patch net-set-sk_prot_creator-when-cloning-sockets-to-the-right-proto.patch net-stmmac-cocci-spatch-of_table.patch net_sched-always-reset-qdisc-backlog-in-qdisc_reset.patch net_sched-gen_estimator-fix-scaling-error-in-bytes-packets-samples.patch netlink-do-not-proceed-if-dump-s-start-errs.patch openvswitch-fix-an-error-handling-path-in-ovs_nla_init_match_and_action.patch packet-hold-bind-lock-when-rebinding-to-fanout-hook.patch packet-in-packet_do_bind-test-fanout-with-bind_lock-held.patch packet-only-test-po-has_vnet_hdr-once-in-packet_snd.patch sctp-potential-read-out-of-bounds-in-sctp_ulpevent_type_enabled.patch socket-bpf-fix-possible-use-after-free.patch tcp-fastopen-fix-on-syn-data-transmit-failure.patch tcp-fix-data-delivery-rate.patch tcp-update-skb-skb_mstamp-more-carefully.patch tipc-use-only-positive-error-codes-in-messages.patch tun-bail-out-from-tun_get_user-if-the-skb-is-empty.patch udp-perform-source-validation-for-mcast-early-demux.patch udpv6-fix-the-checksum-computation-when-hw-checksum-does-not-apply.patch vti-fix-use-after-free-in-vti_tunnel_xmit-vti6_tnl_xmit.patch --- diff --git a/queue-4.13/8139too-revisit-napi_complete_done-usage.patch b/queue-4.13/8139too-revisit-napi_complete_done-usage.patch new file mode 100644 index 00000000000..31db4453f35 --- /dev/null +++ b/queue-4.13/8139too-revisit-napi_complete_done-usage.patch @@ -0,0 +1,44 @@ +From foo@baz Mon Oct 9 09:32:35 CEST 2017 +From: Eric Dumazet +Date: Mon, 18 Sep 2017 13:03:43 -0700 +Subject: 8139too: revisit napi_complete_done() usage + +From: Eric Dumazet + + +[ Upstream commit 129c6cda2de2a8ac44fab096152469999b727faf ] + +It seems we have to be more careful in napi_complete_done() +use. This patch is not a revert, as it seems we can +avoid bug that Ville reported by moving the napi_complete_done() +test in the spinlock section. + +Many thanks to Ville for detective work and all tests. + +Fixes: 617f01211baf ("8139too: use napi_complete_done()") +Reported-by: Ville Syrjälä +Tested-by: Ville Syrjälä + +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/realtek/8139too.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +--- a/drivers/net/ethernet/realtek/8139too.c ++++ b/drivers/net/ethernet/realtek/8139too.c +@@ -2135,11 +2135,12 @@ static int rtl8139_poll(struct napi_stru + if (likely(RTL_R16(IntrStatus) & RxAckBits)) + work_done += rtl8139_rx(dev, tp, budget); + +- if (work_done < budget && napi_complete_done(napi, work_done)) { ++ if (work_done < budget) { + unsigned long flags; + + spin_lock_irqsave(&tp->lock, flags); +- RTL_W16_F(IntrMask, rtl8139_intr_mask); ++ if (napi_complete_done(napi, work_done)) ++ RTL_W16_F(IntrMask, rtl8139_intr_mask); + spin_unlock_irqrestore(&tp->lock, flags); + } + spin_unlock(&tp->rx_lock); diff --git a/queue-4.13/bpf-do-not-disable-enable-bh-in-bpf_map_free_id.patch b/queue-4.13/bpf-do-not-disable-enable-bh-in-bpf_map_free_id.patch new file mode 100644 index 00000000000..4c8f2719e1f --- /dev/null +++ b/queue-4.13/bpf-do-not-disable-enable-bh-in-bpf_map_free_id.patch @@ -0,0 +1,95 @@ +From foo@baz Mon Oct 9 09:32:35 CEST 2017 +From: Eric Dumazet +Date: Tue, 19 Sep 2017 09:15:59 -0700 +Subject: bpf: do not disable/enable BH in bpf_map_free_id() + +From: Eric Dumazet + + +[ Upstream commit 930651a75bf1ba6893a8b8475270664ebdb6cf4a ] + +syzkaller reported following splat [1] + +Since hard irq are disabled by the caller, bpf_map_free_id() +should not try to enable/disable BH. + +Another solution would be to change htab_map_delete_elem() to +defer the free_htab_elem() call after +raw_spin_unlock_irqrestore(&b->lock, flags), but this might be not +enough to cover other code paths. + +[1] +WARNING: CPU: 1 PID: 8052 at kernel/softirq.c:161 __local_bh_enable_ip ++0x1e/0x160 kernel/softirq.c:161 +Kernel panic - not syncing: panic_on_warn set ... + +CPU: 1 PID: 8052 Comm: syz-executor1 Not tainted 4.13.0-next-20170915+ +#23 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS +Google 01/01/2011 +Call Trace: + __dump_stack lib/dump_stack.c:16 [inline] + dump_stack+0x194/0x257 lib/dump_stack.c:52 + panic+0x1e4/0x417 kernel/panic.c:181 + __warn+0x1c4/0x1d9 kernel/panic.c:542 + report_bug+0x211/0x2d0 lib/bug.c:183 + fixup_bug+0x40/0x90 arch/x86/kernel/traps.c:178 + do_trap_no_signal arch/x86/kernel/traps.c:212 [inline] + do_trap+0x260/0x390 arch/x86/kernel/traps.c:261 + do_error_trap+0x120/0x390 arch/x86/kernel/traps.c:298 + do_invalid_op+0x1b/0x20 arch/x86/kernel/traps.c:311 + invalid_op+0x18/0x20 arch/x86/entry/entry_64.S:905 +RIP: 0010:__local_bh_enable_ip+0x1e/0x160 kernel/softirq.c:161 +RSP: 0018:ffff8801cdcd7748 EFLAGS: 00010046 +RAX: 0000000000000082 RBX: 0000000000000201 RCX: 0000000000000000 +RDX: 1ffffffff0b5933c RSI: 0000000000000201 RDI: ffffffff85ac99e0 +RBP: ffff8801cdcd7758 R08: ffffffff85b87158 R09: 1ffff10039b9aec6 +R10: ffff8801c99f24c0 R11: 0000000000000002 R12: ffffffff817b0b47 +R13: dffffc0000000000 R14: ffff8801cdcd77e8 R15: 0000000000000001 + __raw_spin_unlock_bh include/linux/spinlock_api_smp.h:176 [inline] + _raw_spin_unlock_bh+0x30/0x40 kernel/locking/spinlock.c:207 + spin_unlock_bh include/linux/spinlock.h:361 [inline] + bpf_map_free_id kernel/bpf/syscall.c:197 [inline] + __bpf_map_put+0x267/0x320 kernel/bpf/syscall.c:227 + bpf_map_put+0x1a/0x20 kernel/bpf/syscall.c:235 + bpf_map_fd_put_ptr+0x15/0x20 kernel/bpf/map_in_map.c:96 + free_htab_elem+0xc3/0x1b0 kernel/bpf/hashtab.c:658 + htab_map_delete_elem+0x74d/0x970 kernel/bpf/hashtab.c:1063 + map_delete_elem kernel/bpf/syscall.c:633 [inline] + SYSC_bpf kernel/bpf/syscall.c:1479 [inline] + SyS_bpf+0x2188/0x46a0 kernel/bpf/syscall.c:1451 + entry_SYSCALL_64_fastpath+0x1f/0xbe + +Fixes: f3f1c054c288 ("bpf: Introduce bpf_map ID") +Signed-off-by: Eric Dumazet +Cc: Martin KaFai Lau +Acked-by: Martin KaFai Lau +Acked-by: Daniel Borkmann +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + kernel/bpf/syscall.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +--- a/kernel/bpf/syscall.c ++++ b/kernel/bpf/syscall.c +@@ -144,15 +144,17 @@ static int bpf_map_alloc_id(struct bpf_m + + static void bpf_map_free_id(struct bpf_map *map, bool do_idr_lock) + { ++ unsigned long flags; ++ + if (do_idr_lock) +- spin_lock_bh(&map_idr_lock); ++ spin_lock_irqsave(&map_idr_lock, flags); + else + __acquire(&map_idr_lock); + + idr_remove(&map_idr, map->id); + + if (do_idr_lock) +- spin_unlock_bh(&map_idr_lock); ++ spin_unlock_irqrestore(&map_idr_lock, flags); + else + __release(&map_idr_lock); + } diff --git a/queue-4.13/bpf-fix-bpf_tail_call-x64-jit.patch b/queue-4.13/bpf-fix-bpf_tail_call-x64-jit.patch new file mode 100644 index 00000000000..60d5b4dac6e --- /dev/null +++ b/queue-4.13/bpf-fix-bpf_tail_call-x64-jit.patch @@ -0,0 +1,70 @@ +From foo@baz Mon Oct 9 09:32:35 CEST 2017 +From: Alexei Starovoitov +Date: Tue, 3 Oct 2017 15:37:20 -0700 +Subject: bpf: fix bpf_tail_call() x64 JIT + +From: Alexei Starovoitov + + +[ Upstream commit 90caccdd8cc0215705f18b92771b449b01e2474a ] + +- bpf prog_array just like all other types of bpf array accepts 32-bit index. + Clarify that in the comment. +- fix x64 JIT of bpf_tail_call which was incorrectly loading 8 instead of 4 bytes +- tighten corresponding check in the interpreter to stay consistent + +The JIT bug can be triggered after introduction of BPF_F_NUMA_NODE flag +in commit 96eabe7a40aa in 4.14. Before that the map_flags would stay zero and +though JIT code is wrong it will check bounds correctly. +Hence two fixes tags. All other JITs don't have this problem. + +Signed-off-by: Alexei Starovoitov +Fixes: 96eabe7a40aa ("bpf: Allow selecting numa node during map creation") +Fixes: b52f00e6a715 ("x86: bpf_jit: implement bpf_tail_call() helper") +Acked-by: Daniel Borkmann +Acked-by: Martin KaFai Lau +Reviewed-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/net/bpf_jit_comp.c | 4 ++-- + include/uapi/linux/bpf.h | 2 +- + kernel/bpf/core.c | 2 +- + 3 files changed, 4 insertions(+), 4 deletions(-) + +--- a/arch/x86/net/bpf_jit_comp.c ++++ b/arch/x86/net/bpf_jit_comp.c +@@ -282,9 +282,9 @@ static void emit_bpf_tail_call(u8 **ppro + /* if (index >= array->map.max_entries) + * goto out; + */ +- EMIT4(0x48, 0x8B, 0x46, /* mov rax, qword ptr [rsi + 16] */ ++ EMIT2(0x89, 0xD2); /* mov edx, edx */ ++ EMIT3(0x39, 0x56, /* cmp dword ptr [rsi + 16], edx */ + offsetof(struct bpf_array, map.max_entries)); +- EMIT3(0x48, 0x39, 0xD0); /* cmp rax, rdx */ + #define OFFSET1 47 /* number of bytes to jump */ + EMIT2(X86_JBE, OFFSET1); /* jbe out */ + label1 = cnt; +--- a/include/uapi/linux/bpf.h ++++ b/include/uapi/linux/bpf.h +@@ -294,7 +294,7 @@ union bpf_attr { + * jump into another BPF program + * @ctx: context pointer passed to next program + * @prog_array_map: pointer to map which type is BPF_MAP_TYPE_PROG_ARRAY +- * @index: index inside array that selects specific program to run ++ * @index: 32-bit index inside array that selects specific program to run + * Return: 0 on success or negative error + * + * int bpf_clone_redirect(skb, ifindex, flags) +--- a/kernel/bpf/core.c ++++ b/kernel/bpf/core.c +@@ -1010,7 +1010,7 @@ select_insn: + struct bpf_map *map = (struct bpf_map *) (unsigned long) BPF_R2; + struct bpf_array *array = container_of(map, struct bpf_array, map); + struct bpf_prog *prog; +- u64 index = BPF_R3; ++ u32 index = BPF_R3; + + if (unlikely(index >= array->map.max_entries)) + goto out; diff --git a/queue-4.13/bpf-one-perf-event-close-won-t-free-bpf-program-attached-by-another-perf-event.patch b/queue-4.13/bpf-one-perf-event-close-won-t-free-bpf-program-attached-by-another-perf-event.patch new file mode 100644 index 00000000000..494b4b11061 --- /dev/null +++ b/queue-4.13/bpf-one-perf-event-close-won-t-free-bpf-program-attached-by-another-perf-event.patch @@ -0,0 +1,64 @@ +From foo@baz Mon Oct 9 09:32:35 CEST 2017 +From: Yonghong Song +Date: Mon, 18 Sep 2017 16:38:36 -0700 +Subject: bpf: one perf event close won't free bpf program attached by another perf event + +From: Yonghong Song + + +[ Upstream commit ec9dd352d591f0c90402ec67a317c1ed4fb2e638 ] + +This patch fixes a bug exhibited by the following scenario: + 1. fd1 = perf_event_open with attr.config = ID1 + 2. attach bpf program prog1 to fd1 + 3. fd2 = perf_event_open with attr.config = ID1 + + 4. user program closes fd2 and prog1 is detached from the tracepoint. + 5. user program with fd1 does not work properly as tracepoint + no output any more. + +The issue happens at step 4. Multiple perf_event_open can be called +successfully, but only one bpf prog pointer in the tp_event. In the +current logic, any fd release for the same tp_event will free +the tp_event->prog. + +The fix is to free tp_event->prog only when the closing fd +corresponds to the one which registered the program. + +Signed-off-by: Yonghong Song +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/trace_events.h | 1 + + kernel/events/core.c | 3 ++- + 2 files changed, 3 insertions(+), 1 deletion(-) + +--- a/include/linux/trace_events.h ++++ b/include/linux/trace_events.h +@@ -277,6 +277,7 @@ struct trace_event_call { + int perf_refcount; + struct hlist_head __percpu *perf_events; + struct bpf_prog *prog; ++ struct perf_event *bpf_prog_owner; + + int (*perf_perm)(struct trace_event_call *, + struct perf_event *); +--- a/kernel/events/core.c ++++ b/kernel/events/core.c +@@ -8121,6 +8121,7 @@ static int perf_event_set_bpf_prog(struc + } + } + event->tp_event->prog = prog; ++ event->tp_event->bpf_prog_owner = event; + + return 0; + } +@@ -8135,7 +8136,7 @@ static void perf_event_free_bpf_prog(str + return; + + prog = event->tp_event->prog; +- if (prog) { ++ if (prog && event->tp_event->bpf_prog_owner == event) { + event->tp_event->prog = NULL; + bpf_prog_put(prog); + } diff --git a/queue-4.13/bpf-verifier-reject-bpf_alu64-bpf_end.patch b/queue-4.13/bpf-verifier-reject-bpf_alu64-bpf_end.patch new file mode 100644 index 00000000000..d458cc59f7b --- /dev/null +++ b/queue-4.13/bpf-verifier-reject-bpf_alu64-bpf_end.patch @@ -0,0 +1,61 @@ +From foo@baz Mon Oct 9 09:32:35 CEST 2017 +From: Edward Cree +Date: Fri, 15 Sep 2017 14:37:38 +0100 +Subject: bpf/verifier: reject BPF_ALU64|BPF_END + +From: Edward Cree + + +[ Upstream commit e67b8a685c7c984e834e3181ef4619cd7025a136 ] + +Neither ___bpf_prog_run nor the JITs accept it. +Also adds a new test case. + +Fixes: 17a5267067f3 ("bpf: verifier (add verifier core)") +Signed-off-by: Edward Cree +Acked-by: Alexei Starovoitov +Acked-by: Daniel Borkmann +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + kernel/bpf/verifier.c | 3 ++- + tools/testing/selftests/bpf/test_verifier.c | 16 ++++++++++++++++ + 2 files changed, 18 insertions(+), 1 deletion(-) + +--- a/kernel/bpf/verifier.c ++++ b/kernel/bpf/verifier.c +@@ -1978,7 +1978,8 @@ static int check_alu_op(struct bpf_verif + } + } else { + if (insn->src_reg != BPF_REG_0 || insn->off != 0 || +- (insn->imm != 16 && insn->imm != 32 && insn->imm != 64)) { ++ (insn->imm != 16 && insn->imm != 32 && insn->imm != 64) || ++ BPF_CLASS(insn->code) == BPF_ALU64) { + verbose("BPF_END uses reserved fields\n"); + return -EINVAL; + } +--- a/tools/testing/selftests/bpf/test_verifier.c ++++ b/tools/testing/selftests/bpf/test_verifier.c +@@ -6009,6 +6009,22 @@ static struct bpf_test tests[] = { + .result = REJECT, + .result_unpriv = REJECT, + }, ++ { ++ "invalid 64-bit BPF_END", ++ .insns = { ++ BPF_MOV32_IMM(BPF_REG_0, 0), ++ { ++ .code = BPF_ALU64 | BPF_END | BPF_TO_LE, ++ .dst_reg = BPF_REG_0, ++ .src_reg = 0, ++ .off = 0, ++ .imm = 32, ++ }, ++ BPF_EXIT_INSN(), ++ }, ++ .errstr = "BPF_END uses reserved fields", ++ .result = REJECT, ++ }, + }; + + static int probe_filter_length(const struct bpf_insn *fp) diff --git a/queue-4.13/ip6_gre-ip6gre_tap-device-should-keep-dst.patch b/queue-4.13/ip6_gre-ip6gre_tap-device-should-keep-dst.patch new file mode 100644 index 00000000000..a8e25b46a2a --- /dev/null +++ b/queue-4.13/ip6_gre-ip6gre_tap-device-should-keep-dst.patch @@ -0,0 +1,32 @@ +From foo@baz Mon Oct 9 09:32:35 CEST 2017 +From: Xin Long +Date: Thu, 28 Sep 2017 13:23:50 +0800 +Subject: ip6_gre: ip6gre_tap device should keep dst + +From: Xin Long + + +[ Upstream commit 2d40557cc702ed8e5edd9bd422233f86652d932e ] + +The patch 'ip_gre: ipgre_tap device should keep dst' fixed +a issue that ipgre_tap mtu couldn't be updated in tx path. + +The same fix is needed for ip6gre_tap as well. + +Signed-off-by: Xin Long +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/ip6_gre.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/net/ipv6/ip6_gre.c ++++ b/net/ipv6/ip6_gre.c +@@ -1311,6 +1311,7 @@ static void ip6gre_tap_setup(struct net_ + dev->features |= NETIF_F_NETNS_LOCAL; + dev->priv_flags &= ~IFF_TX_SKB_SHARING; + dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; ++ netif_keep_dst(dev); + } + + static bool ip6gre_netlink_encap_parms(struct nlattr *data[], diff --git a/queue-4.13/ip6_gre-skb_push-ipv6hdr-before-packing-the-header-in-ip6gre_header.patch b/queue-4.13/ip6_gre-skb_push-ipv6hdr-before-packing-the-header-in-ip6gre_header.patch new file mode 100644 index 00000000000..3cd5fffa011 --- /dev/null +++ b/queue-4.13/ip6_gre-skb_push-ipv6hdr-before-packing-the-header-in-ip6gre_header.patch @@ -0,0 +1,76 @@ +From foo@baz Mon Oct 9 09:32:35 CEST 2017 +From: Xin Long +Date: Fri, 15 Sep 2017 12:00:07 +0800 +Subject: ip6_gre: skb_push ipv6hdr before packing the header in ip6gre_header + +From: Xin Long + + +[ Upstream commit 76cc0d3282d4b933fa144fa41fbc5318e0fdca24 ] + +Now in ip6gre_header before packing the ipv6 header, it skb_push t->hlen +which only includes encap_hlen + tun_hlen. It means greh and inner header +would be over written by ipv6 stuff and ipv6h might have no chance to set +up. + +Jianlin found this issue when using remote any on ip6_gre, the packets he +captured on gre dev are truncated: + +22:50:26.210866 Out ethertype IPv6 (0x86dd), length 120: truncated-ip6 -\ +8128 bytes missing!(flowlabel 0x92f40, hlim 0, next-header Options (0) \ +payload length: 8192) ::1:2000:0 > ::1:0:86dd: HBH [trunc] ip-proto-128 \ +8184 + +It should also skb_push ipv6hdr so that ipv6h points to the right position +to set ipv6 stuff up. + +This patch is to skb_push hlen + sizeof(*ipv6h) and also fix some indents +in ip6gre_header. + +Fixes: c12b395a4664 ("gre: Support GRE over IPv6") +Reported-by: Jianlin Shi +Signed-off-by: Xin Long +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/ip6_gre.c | 21 +++++++++++---------- + 1 file changed, 11 insertions(+), 10 deletions(-) + +--- a/net/ipv6/ip6_gre.c ++++ b/net/ipv6/ip6_gre.c +@@ -940,24 +940,25 @@ done: + } + + static int ip6gre_header(struct sk_buff *skb, struct net_device *dev, +- unsigned short type, +- const void *daddr, const void *saddr, unsigned int len) ++ unsigned short type, const void *daddr, ++ const void *saddr, unsigned int len) + { + struct ip6_tnl *t = netdev_priv(dev); +- struct ipv6hdr *ipv6h = skb_push(skb, t->hlen); +- __be16 *p = (__be16 *)(ipv6h+1); ++ struct ipv6hdr *ipv6h; ++ __be16 *p; + +- ip6_flow_hdr(ipv6h, 0, +- ip6_make_flowlabel(dev_net(dev), skb, +- t->fl.u.ip6.flowlabel, true, +- &t->fl.u.ip6)); ++ ipv6h = skb_push(skb, t->hlen + sizeof(*ipv6h)); ++ ip6_flow_hdr(ipv6h, 0, ip6_make_flowlabel(dev_net(dev), skb, ++ t->fl.u.ip6.flowlabel, ++ true, &t->fl.u.ip6)); + ipv6h->hop_limit = t->parms.hop_limit; + ipv6h->nexthdr = NEXTHDR_GRE; + ipv6h->saddr = t->parms.laddr; + ipv6h->daddr = t->parms.raddr; + +- p[0] = t->parms.o_flags; +- p[1] = htons(type); ++ p = (__be16 *)(ipv6h + 1); ++ p[0] = t->parms.o_flags; ++ p[1] = htons(type); + + /* + * Set the source hardware address. diff --git a/queue-4.13/ip6_tunnel-do-not-allow-loading-ip6_tunnel-if-ipv6-is-disabled-in-cmdline.patch b/queue-4.13/ip6_tunnel-do-not-allow-loading-ip6_tunnel-if-ipv6-is-disabled-in-cmdline.patch new file mode 100644 index 00000000000..37c00679edb --- /dev/null +++ b/queue-4.13/ip6_tunnel-do-not-allow-loading-ip6_tunnel-if-ipv6-is-disabled-in-cmdline.patch @@ -0,0 +1,61 @@ +From foo@baz Mon Oct 9 09:32:35 CEST 2017 +From: Xin Long +Date: Fri, 15 Sep 2017 15:58:33 +0800 +Subject: ip6_tunnel: do not allow loading ip6_tunnel if ipv6 is disabled in cmdline + +From: Xin Long + + +[ Upstream commit 8c22dab03ad072e45060c299c70d02a4f6fc4aab ] + +If ipv6 has been disabled from cmdline since kernel started, it makes +no sense to allow users to create any ip6 tunnel. Otherwise, it could +some potential problem. + +Jianlin found a kernel crash caused by this in ip6_gre when he set +ipv6.disable=1 in grub: + +[ 209.588865] Unable to handle kernel paging request for data at address 0x00000080 +[ 209.588872] Faulting instruction address: 0xc000000000a3aa6c +[ 209.588879] Oops: Kernel access of bad area, sig: 11 [#1] +[ 209.589062] NIP [c000000000a3aa6c] fib_rules_lookup+0x4c/0x260 +[ 209.589071] LR [c000000000b9ad90] fib6_rule_lookup+0x50/0xb0 +[ 209.589076] Call Trace: +[ 209.589097] fib6_rule_lookup+0x50/0xb0 +[ 209.589106] rt6_lookup+0xc4/0x110 +[ 209.589116] ip6gre_tnl_link_config+0x214/0x2f0 [ip6_gre] +[ 209.589125] ip6gre_newlink+0x138/0x3a0 [ip6_gre] +[ 209.589134] rtnl_newlink+0x798/0xb80 +[ 209.589142] rtnetlink_rcv_msg+0xec/0x390 +[ 209.589151] netlink_rcv_skb+0x138/0x150 +[ 209.589159] rtnetlink_rcv+0x48/0x70 +[ 209.589169] netlink_unicast+0x538/0x640 +[ 209.589175] netlink_sendmsg+0x40c/0x480 +[ 209.589184] ___sys_sendmsg+0x384/0x4e0 +[ 209.589194] SyS_sendmsg+0xd4/0x140 +[ 209.589201] SyS_socketcall+0x3e0/0x4f0 +[ 209.589209] system_call+0x38/0xe0 + +This patch is to return -EOPNOTSUPP in ip6_tunnel_init if ipv6 has been +disabled from cmdline. + +Reported-by: Jianlin Shi +Signed-off-by: Xin Long +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/ip6_tunnel.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/net/ipv6/ip6_tunnel.c ++++ b/net/ipv6/ip6_tunnel.c +@@ -2258,6 +2258,9 @@ static int __init ip6_tunnel_init(void) + { + int err; + ++ if (!ipv6_mod_enabled()) ++ return -EOPNOTSUPP; ++ + err = register_pernet_device(&ip6_tnl_net_ops); + if (err < 0) + goto out_pernet; diff --git a/queue-4.13/ip6_tunnel-update-mtu-properly-for-arphrd_ether-tunnel-device-in-tx-path.patch b/queue-4.13/ip6_tunnel-update-mtu-properly-for-arphrd_ether-tunnel-device-in-tx-path.patch new file mode 100644 index 00000000000..efcdc307574 --- /dev/null +++ b/queue-4.13/ip6_tunnel-update-mtu-properly-for-arphrd_ether-tunnel-device-in-tx-path.patch @@ -0,0 +1,49 @@ +From foo@baz Mon Oct 9 09:32:35 CEST 2017 +From: Xin Long +Date: Thu, 28 Sep 2017 13:24:07 +0800 +Subject: ip6_tunnel: update mtu properly for ARPHRD_ETHER tunnel device in tx path + +From: Xin Long + + +[ Upstream commit d41bb33ba33b8f8debe54ed36be6925eb496e354 ] + +Now when updating mtu in tx path, it doesn't consider ARPHRD_ETHER tunnel +device, like ip6gre_tap tunnel, for which it should also subtract ether +header to get the correct mtu. + +Signed-off-by: Xin Long +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/ip6_tunnel.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +--- a/net/ipv6/ip6_tunnel.c ++++ b/net/ipv6/ip6_tunnel.c +@@ -1043,6 +1043,7 @@ int ip6_tnl_xmit(struct sk_buff *skb, st + struct dst_entry *dst = NULL, *ndst = NULL; + struct net_device *tdev; + int mtu; ++ unsigned int eth_hlen = t->dev->type == ARPHRD_ETHER ? ETH_HLEN : 0; + unsigned int psh_hlen = sizeof(struct ipv6hdr) + t->encap_hlen; + unsigned int max_headroom = psh_hlen; + bool use_cache = false; +@@ -1124,7 +1125,7 @@ route_lookup: + t->parms.name); + goto tx_err_dst_release; + } +- mtu = dst_mtu(dst) - psh_hlen - t->tun_hlen; ++ mtu = dst_mtu(dst) - eth_hlen - psh_hlen - t->tun_hlen; + if (encap_limit >= 0) { + max_headroom += 8; + mtu -= 8; +@@ -1133,7 +1134,7 @@ route_lookup: + mtu = IPV6_MIN_MTU; + if (skb_dst(skb) && !t->parms.collect_md) + skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); +- if (skb->len - t->tun_hlen > mtu && !skb_is_gso(skb)) { ++ if (skb->len - t->tun_hlen - eth_hlen > mtu && !skb_is_gso(skb)) { + *pmtu = mtu; + err = -EMSGSIZE; + goto tx_err_dst_release; diff --git a/queue-4.13/ipv4-early-demux-can-return-an-error-code.patch b/queue-4.13/ipv4-early-demux-can-return-an-error-code.patch new file mode 100644 index 00000000000..70ddd10c644 --- /dev/null +++ b/queue-4.13/ipv4-early-demux-can-return-an-error-code.patch @@ -0,0 +1,212 @@ +From foo@baz Mon Oct 9 09:32:35 CEST 2017 +From: Paolo Abeni +Date: Thu, 28 Sep 2017 15:51:36 +0200 +Subject: IPv4: early demux can return an error code + +From: Paolo Abeni + + +[ Upstream commit 7487449c86c65202b3b725c4524cb48dd65e4e6f ] + +Currently no error is emitted, but this infrastructure will +used by the next patch to allow source address validation +for mcast sockets. +Since early demux can do a route lookup and an ipv4 route +lookup can return an error code this is consistent with the +current ipv4 route infrastructure. + +Signed-off-by: Paolo Abeni +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/protocol.h | 4 ++-- + include/net/tcp.h | 2 +- + include/net/udp.h | 2 +- + net/ipv4/ip_input.c | 25 +++++++++++++++---------- + net/ipv4/tcp_ipv4.c | 9 +++++---- + net/ipv4/udp.c | 11 ++++++----- + 6 files changed, 30 insertions(+), 23 deletions(-) + +--- a/include/net/protocol.h ++++ b/include/net/protocol.h +@@ -39,8 +39,8 @@ + + /* This is used to register protocols. */ + struct net_protocol { +- void (*early_demux)(struct sk_buff *skb); +- void (*early_demux_handler)(struct sk_buff *skb); ++ int (*early_demux)(struct sk_buff *skb); ++ int (*early_demux_handler)(struct sk_buff *skb); + int (*handler)(struct sk_buff *skb); + void (*err_handler)(struct sk_buff *skb, u32 info); + unsigned int no_policy:1, +--- a/include/net/tcp.h ++++ b/include/net/tcp.h +@@ -347,7 +347,7 @@ void tcp_v4_err(struct sk_buff *skb, u32 + + void tcp_shutdown(struct sock *sk, int how); + +-void tcp_v4_early_demux(struct sk_buff *skb); ++int tcp_v4_early_demux(struct sk_buff *skb); + int tcp_v4_rcv(struct sk_buff *skb); + + int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw); +--- a/include/net/udp.h ++++ b/include/net/udp.h +@@ -259,7 +259,7 @@ static inline struct sk_buff *skb_recv_u + return __skb_recv_udp(sk, flags, noblock, &peeked, &off, err); + } + +-void udp_v4_early_demux(struct sk_buff *skb); ++int udp_v4_early_demux(struct sk_buff *skb); + bool udp_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst); + int udp_get_port(struct sock *sk, unsigned short snum, + int (*saddr_cmp)(const struct sock *, +--- a/net/ipv4/ip_input.c ++++ b/net/ipv4/ip_input.c +@@ -311,9 +311,10 @@ drop: + static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb) + { + const struct iphdr *iph = ip_hdr(skb); +- struct rtable *rt; ++ int (*edemux)(struct sk_buff *skb); + struct net_device *dev = skb->dev; +- void (*edemux)(struct sk_buff *skb); ++ struct rtable *rt; ++ int err; + + /* if ingress device is enslaved to an L3 master device pass the + * skb to its handler for processing +@@ -331,7 +332,9 @@ static int ip_rcv_finish(struct net *net + + ipprot = rcu_dereference(inet_protos[protocol]); + if (ipprot && (edemux = READ_ONCE(ipprot->early_demux))) { +- edemux(skb); ++ err = edemux(skb); ++ if (unlikely(err)) ++ goto drop_error; + /* must reload iph, skb->head might have changed */ + iph = ip_hdr(skb); + } +@@ -342,13 +345,10 @@ static int ip_rcv_finish(struct net *net + * how the packet travels inside Linux networking. + */ + if (!skb_valid_dst(skb)) { +- int err = ip_route_input_noref(skb, iph->daddr, iph->saddr, +- iph->tos, dev); +- if (unlikely(err)) { +- if (err == -EXDEV) +- __NET_INC_STATS(net, LINUX_MIB_IPRPFILTER); +- goto drop; +- } ++ err = ip_route_input_noref(skb, iph->daddr, iph->saddr, ++ iph->tos, dev); ++ if (unlikely(err)) ++ goto drop_error; + } + + #ifdef CONFIG_IP_ROUTE_CLASSID +@@ -399,6 +399,11 @@ static int ip_rcv_finish(struct net *net + drop: + kfree_skb(skb); + return NET_RX_DROP; ++ ++drop_error: ++ if (err == -EXDEV) ++ __NET_INC_STATS(net, LINUX_MIB_IPRPFILTER); ++ goto drop; + } + + /* +--- a/net/ipv4/tcp_ipv4.c ++++ b/net/ipv4/tcp_ipv4.c +@@ -1504,23 +1504,23 @@ csum_err: + } + EXPORT_SYMBOL(tcp_v4_do_rcv); + +-void tcp_v4_early_demux(struct sk_buff *skb) ++int tcp_v4_early_demux(struct sk_buff *skb) + { + const struct iphdr *iph; + const struct tcphdr *th; + struct sock *sk; + + if (skb->pkt_type != PACKET_HOST) +- return; ++ return 0; + + if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr))) +- return; ++ return 0; + + iph = ip_hdr(skb); + th = tcp_hdr(skb); + + if (th->doff < sizeof(struct tcphdr) / 4) +- return; ++ return 0; + + sk = __inet_lookup_established(dev_net(skb->dev), &tcp_hashinfo, + iph->saddr, th->source, +@@ -1539,6 +1539,7 @@ void tcp_v4_early_demux(struct sk_buff * + skb_dst_set_noref(skb, dst); + } + } ++ return 0; + } + + /* Packet is added to VJ-style prequeue for processing in process +--- a/net/ipv4/udp.c ++++ b/net/ipv4/udp.c +@@ -2217,7 +2217,7 @@ static struct sock *__udp4_lib_demux_loo + return NULL; + } + +-void udp_v4_early_demux(struct sk_buff *skb) ++int udp_v4_early_demux(struct sk_buff *skb) + { + struct net *net = dev_net(skb->dev); + const struct iphdr *iph; +@@ -2229,7 +2229,7 @@ void udp_v4_early_demux(struct sk_buff * + + /* validate the packet */ + if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct udphdr))) +- return; ++ return 0; + + iph = ip_hdr(skb); + uh = udp_hdr(skb); +@@ -2239,14 +2239,14 @@ void udp_v4_early_demux(struct sk_buff * + struct in_device *in_dev = __in_dev_get_rcu(skb->dev); + + if (!in_dev) +- return; ++ return 0; + + /* we are supposed to accept bcast packets */ + if (skb->pkt_type == PACKET_MULTICAST) { + ours = ip_check_mc_rcu(in_dev, iph->daddr, iph->saddr, + iph->protocol); + if (!ours) +- return; ++ return 0; + } + + sk = __udp4_lib_mcast_demux_lookup(net, uh->dest, iph->daddr, +@@ -2257,7 +2257,7 @@ void udp_v4_early_demux(struct sk_buff * + } + + if (!sk || !refcount_inc_not_zero(&sk->sk_refcnt)) +- return; ++ return 0; + + skb->sk = sk; + skb->destructor = sock_efree; +@@ -2272,6 +2272,7 @@ void udp_v4_early_demux(struct sk_buff * + */ + skb_dst_set_noref(skb, dst); + } ++ return 0; + } + + int udp_rcv(struct sk_buff *skb) diff --git a/queue-4.13/isdn-i4l-fetch-the-ppp_write-buffer-in-one-shot.patch b/queue-4.13/isdn-i4l-fetch-the-ppp_write-buffer-in-one-shot.patch new file mode 100644 index 00000000000..b7cb88ab987 --- /dev/null +++ b/queue-4.13/isdn-i4l-fetch-the-ppp_write-buffer-in-one-shot.patch @@ -0,0 +1,104 @@ +From foo@baz Mon Oct 9 09:32:35 CEST 2017 +From: Meng Xu +Date: Tue, 19 Sep 2017 21:49:55 -0400 +Subject: isdn/i4l: fetch the ppp_write buffer in one shot + +From: Meng Xu + + +[ Upstream commit 02388bf87f72e1d47174cd8f81c34443920eb5a0 ] + +In isdn_ppp_write(), the header (i.e., protobuf) of the buffer is +fetched twice from userspace. The first fetch is used to peek at the +protocol of the message and reset the huptimer if necessary; while the +second fetch copies in the whole buffer. However, given that buf resides +in userspace memory, a user process can race to change its memory content +across fetches. By doing so, we can either avoid resetting the huptimer +for any type of packets (by first setting proto to PPP_LCP and later +change to the actual type) or force resetting the huptimer for LCP +packets. + +This patch changes this double-fetch behavior into two single fetches +decided by condition (lp->isdn_device < 0 || lp->isdn_channel <0). +A more detailed discussion can be found at +https://marc.info/?l=linux-kernel&m=150586376926123&w=2 + +Signed-off-by: Meng Xu +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/isdn/i4l/isdn_ppp.c | 37 +++++++++++++++++++++++++------------ + 1 file changed, 25 insertions(+), 12 deletions(-) + +--- a/drivers/isdn/i4l/isdn_ppp.c ++++ b/drivers/isdn/i4l/isdn_ppp.c +@@ -825,7 +825,6 @@ isdn_ppp_write(int min, struct file *fil + isdn_net_local *lp; + struct ippp_struct *is; + int proto; +- unsigned char protobuf[4]; + + is = file->private_data; + +@@ -839,24 +838,28 @@ isdn_ppp_write(int min, struct file *fil + if (!lp) + printk(KERN_DEBUG "isdn_ppp_write: lp == NULL\n"); + else { +- /* +- * Don't reset huptimer for +- * LCP packets. (Echo requests). +- */ +- if (copy_from_user(protobuf, buf, 4)) +- return -EFAULT; +- proto = PPP_PROTOCOL(protobuf); +- if (proto != PPP_LCP) +- lp->huptimer = 0; ++ if (lp->isdn_device < 0 || lp->isdn_channel < 0) { ++ unsigned char protobuf[4]; ++ /* ++ * Don't reset huptimer for ++ * LCP packets. (Echo requests). ++ */ ++ if (copy_from_user(protobuf, buf, 4)) ++ return -EFAULT; ++ ++ proto = PPP_PROTOCOL(protobuf); ++ if (proto != PPP_LCP) ++ lp->huptimer = 0; + +- if (lp->isdn_device < 0 || lp->isdn_channel < 0) + return 0; ++ } + + if ((dev->drv[lp->isdn_device]->flags & DRV_FLAG_RUNNING) && + lp->dialstate == 0 && + (lp->flags & ISDN_NET_CONNECTED)) { + unsigned short hl; + struct sk_buff *skb; ++ unsigned char *cpy_buf; + /* + * we need to reserve enough space in front of + * sk_buff. old call to dev_alloc_skb only reserved +@@ -869,11 +872,21 @@ isdn_ppp_write(int min, struct file *fil + return count; + } + skb_reserve(skb, hl); +- if (copy_from_user(skb_put(skb, count), buf, count)) ++ cpy_buf = skb_put(skb, count); ++ if (copy_from_user(cpy_buf, buf, count)) + { + kfree_skb(skb); + return -EFAULT; + } ++ ++ /* ++ * Don't reset huptimer for ++ * LCP packets. (Echo requests). ++ */ ++ proto = PPP_PROTOCOL(cpy_buf); ++ if (proto != PPP_LCP) ++ lp->huptimer = 0; ++ + if (is->debug & 0x40) { + printk(KERN_DEBUG "ppp xmit: len %d\n", (int) skb->len); + isdn_ppp_frame_log("xmit", skb->data, skb->len, 32, is->unit, lp->ppp_slot); diff --git a/queue-4.13/l2tp-fix-l2tp_eth-module-loading.patch b/queue-4.13/l2tp-fix-l2tp_eth-module-loading.patch new file mode 100644 index 00000000000..865eb008ebc --- /dev/null +++ b/queue-4.13/l2tp-fix-l2tp_eth-module-loading.patch @@ -0,0 +1,145 @@ +From foo@baz Mon Oct 9 09:32:35 CEST 2017 +From: Guillaume Nault +Date: Thu, 28 Sep 2017 15:44:38 +0200 +Subject: l2tp: fix l2tp_eth module loading + +From: Guillaume Nault + + +[ Upstream commit 9f775ead5e570e7e19015b9e4e2f3dd6e71a5935 ] + +The l2tp_eth module crashes if its netlink callbacks are run when the +pernet data aren't initialised. + +We should normally register_pernet_device() before the genl callbacks. +However, the pernet data only maintain a list of l2tpeth interfaces, +and this list is never used. So let's just drop pernet handling +instead. + +Fixes: d9e31d17ceba ("l2tp: Add L2TP ethernet pseudowire support") +Signed-off-by: Guillaume Nault +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/l2tp/l2tp_eth.c | 51 ++------------------------------------------------- + 1 file changed, 2 insertions(+), 49 deletions(-) + +--- a/net/l2tp/l2tp_eth.c ++++ b/net/l2tp/l2tp_eth.c +@@ -44,7 +44,6 @@ struct l2tp_eth { + struct net_device *dev; + struct sock *tunnel_sock; + struct l2tp_session *session; +- struct list_head list; + atomic_long_t tx_bytes; + atomic_long_t tx_packets; + atomic_long_t tx_dropped; +@@ -58,17 +57,6 @@ struct l2tp_eth_sess { + struct net_device *dev; + }; + +-/* per-net private data for this module */ +-static unsigned int l2tp_eth_net_id; +-struct l2tp_eth_net { +- struct list_head l2tp_eth_dev_list; +- spinlock_t l2tp_eth_lock; +-}; +- +-static inline struct l2tp_eth_net *l2tp_eth_pernet(struct net *net) +-{ +- return net_generic(net, l2tp_eth_net_id); +-} + + static int l2tp_eth_dev_init(struct net_device *dev) + { +@@ -84,12 +72,6 @@ static int l2tp_eth_dev_init(struct net_ + + static void l2tp_eth_dev_uninit(struct net_device *dev) + { +- struct l2tp_eth *priv = netdev_priv(dev); +- struct l2tp_eth_net *pn = l2tp_eth_pernet(dev_net(dev)); +- +- spin_lock(&pn->l2tp_eth_lock); +- list_del_init(&priv->list); +- spin_unlock(&pn->l2tp_eth_lock); + dev_put(dev); + } + +@@ -272,7 +254,6 @@ static int l2tp_eth_create(struct net *n + struct l2tp_eth *priv; + struct l2tp_eth_sess *spriv; + int rc; +- struct l2tp_eth_net *pn; + + tunnel = l2tp_tunnel_find(net, tunnel_id); + if (!tunnel) { +@@ -310,7 +291,6 @@ static int l2tp_eth_create(struct net *n + priv = netdev_priv(dev); + priv->dev = dev; + priv->session = session; +- INIT_LIST_HEAD(&priv->list); + + priv->tunnel_sock = tunnel->sock; + session->recv_skb = l2tp_eth_dev_recv; +@@ -331,10 +311,6 @@ static int l2tp_eth_create(struct net *n + strlcpy(session->ifname, dev->name, IFNAMSIZ); + + dev_hold(dev); +- pn = l2tp_eth_pernet(dev_net(dev)); +- spin_lock(&pn->l2tp_eth_lock); +- list_add(&priv->list, &pn->l2tp_eth_dev_list); +- spin_unlock(&pn->l2tp_eth_lock); + + return 0; + +@@ -347,22 +323,6 @@ out: + return rc; + } + +-static __net_init int l2tp_eth_init_net(struct net *net) +-{ +- struct l2tp_eth_net *pn = net_generic(net, l2tp_eth_net_id); +- +- INIT_LIST_HEAD(&pn->l2tp_eth_dev_list); +- spin_lock_init(&pn->l2tp_eth_lock); +- +- return 0; +-} +- +-static struct pernet_operations l2tp_eth_net_ops = { +- .init = l2tp_eth_init_net, +- .id = &l2tp_eth_net_id, +- .size = sizeof(struct l2tp_eth_net), +-}; +- + + static const struct l2tp_nl_cmd_ops l2tp_eth_nl_cmd_ops = { + .session_create = l2tp_eth_create, +@@ -376,25 +336,18 @@ static int __init l2tp_eth_init(void) + + err = l2tp_nl_register_ops(L2TP_PWTYPE_ETH, &l2tp_eth_nl_cmd_ops); + if (err) +- goto out; +- +- err = register_pernet_device(&l2tp_eth_net_ops); +- if (err) +- goto out_unreg; ++ goto err; + + pr_info("L2TP ethernet pseudowire support (L2TPv3)\n"); + + return 0; + +-out_unreg: +- l2tp_nl_unregister_ops(L2TP_PWTYPE_ETH); +-out: ++err: + return err; + } + + static void __exit l2tp_eth_exit(void) + { +- unregister_pernet_device(&l2tp_eth_net_ops); + l2tp_nl_unregister_ops(L2TP_PWTYPE_ETH); + } + diff --git a/queue-4.13/l2tp-fix-race-condition-in-l2tp_tunnel_delete.patch b/queue-4.13/l2tp-fix-race-condition-in-l2tp_tunnel_delete.patch new file mode 100644 index 00000000000..668b50aa618 --- /dev/null +++ b/queue-4.13/l2tp-fix-race-condition-in-l2tp_tunnel_delete.patch @@ -0,0 +1,85 @@ +From foo@baz Mon Oct 9 09:32:35 CEST 2017 +From: Sabrina Dubroca +Date: Tue, 26 Sep 2017 16:16:43 +0200 +Subject: l2tp: fix race condition in l2tp_tunnel_delete + +From: Sabrina Dubroca + + +[ Upstream commit 62b982eeb4589b2e6d7c01a90590e3a4c2b2ca19 ] + +If we try to delete the same tunnel twice, the first delete operation +does a lookup (l2tp_tunnel_get), finds the tunnel, calls +l2tp_tunnel_delete, which queues it for deletion by +l2tp_tunnel_del_work. + +The second delete operation also finds the tunnel and calls +l2tp_tunnel_delete. If the workqueue has already fired and started +running l2tp_tunnel_del_work, then l2tp_tunnel_delete will queue the +same tunnel a second time, and try to free the socket again. + +Add a dead flag to prevent firing the workqueue twice. Then we can +remove the check of queue_work's result that was meant to prevent that +race but doesn't. + +Reproducer: + + ip l2tp add tunnel tunnel_id 3000 peer_tunnel_id 4000 local 192.168.0.2 remote 192.168.0.1 encap udp udp_sport 5000 udp_dport 6000 + ip l2tp add session name l2tp1 tunnel_id 3000 session_id 1000 peer_session_id 2000 + ip link set l2tp1 up + ip l2tp del tunnel tunnel_id 3000 + ip l2tp del tunnel tunnel_id 3000 + +Fixes: f8ccac0e4493 ("l2tp: put tunnel socket release on a workqueue") +Reported-by: Jianlin Shi +Signed-off-by: Sabrina Dubroca +Acked-by: Guillaume Nault +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/l2tp/l2tp_core.c | 10 ++++------ + net/l2tp/l2tp_core.h | 5 ++++- + 2 files changed, 8 insertions(+), 7 deletions(-) + +--- a/net/l2tp/l2tp_core.c ++++ b/net/l2tp/l2tp_core.c +@@ -1665,14 +1665,12 @@ EXPORT_SYMBOL_GPL(l2tp_tunnel_create); + + /* This function is used by the netlink TUNNEL_DELETE command. + */ +-int l2tp_tunnel_delete(struct l2tp_tunnel *tunnel) ++void l2tp_tunnel_delete(struct l2tp_tunnel *tunnel) + { +- l2tp_tunnel_inc_refcount(tunnel); +- if (false == queue_work(l2tp_wq, &tunnel->del_work)) { +- l2tp_tunnel_dec_refcount(tunnel); +- return 1; ++ if (!test_and_set_bit(0, &tunnel->dead)) { ++ l2tp_tunnel_inc_refcount(tunnel); ++ queue_work(l2tp_wq, &tunnel->del_work); + } +- return 0; + } + EXPORT_SYMBOL_GPL(l2tp_tunnel_delete); + +--- a/net/l2tp/l2tp_core.h ++++ b/net/l2tp/l2tp_core.h +@@ -160,6 +160,9 @@ struct l2tp_tunnel_cfg { + + struct l2tp_tunnel { + int magic; /* Should be L2TP_TUNNEL_MAGIC */ ++ ++ unsigned long dead; ++ + struct rcu_head rcu; + rwlock_t hlist_lock; /* protect session_hlist */ + struct hlist_head session_hlist[L2TP_HASH_SIZE]; +@@ -248,7 +251,7 @@ int l2tp_tunnel_create(struct net *net, + u32 peer_tunnel_id, struct l2tp_tunnel_cfg *cfg, + struct l2tp_tunnel **tunnelp); + void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel); +-int l2tp_tunnel_delete(struct l2tp_tunnel *tunnel); ++void l2tp_tunnel_delete(struct l2tp_tunnel *tunnel); + struct l2tp_session *l2tp_session_create(int priv_size, + struct l2tp_tunnel *tunnel, + u32 session_id, u32 peer_session_id, diff --git a/queue-4.13/mlxsw-spectrum-fix-eeprom-access-in-case-of-sfp-sfp.patch b/queue-4.13/mlxsw-spectrum-fix-eeprom-access-in-case-of-sfp-sfp.patch new file mode 100644 index 00000000000..141fc6722a6 --- /dev/null +++ b/queue-4.13/mlxsw-spectrum-fix-eeprom-access-in-case-of-sfp-sfp.patch @@ -0,0 +1,65 @@ +From foo@baz Mon Oct 9 09:32:35 CEST 2017 +From: Arkadi Sharshevsky +Date: Mon, 11 Sep 2017 09:42:26 +0200 +Subject: mlxsw: spectrum: Fix EEPROM access in case of SFP/SFP+ + +From: Arkadi Sharshevsky + + +[ Upstream commit 4400081b631af69abc63cea3352680e3d85e0c39 ] + +The current code does not handle correctly the access to the upper page +in case of SFP/SFP+ EEPROM. In that case the offset should be local +and the I2C address should be changed. + +Fixes: 2ea109039cd3 ("mlxsw: spectrum: Add support for access cable info via ethtool") +Reported-by: Florian Klink +Signed-off-by: Arkadi Sharshevsky +Reviewed-by: Ido Schimmel +Signed-off-by: Jiri Pirko +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 19 +++++++++++++++++-- + 1 file changed, 17 insertions(+), 2 deletions(-) + +--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c ++++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +@@ -2519,7 +2519,9 @@ out: + return err; + } + +-#define MLXSW_SP_QSFP_I2C_ADDR 0x50 ++#define MLXSW_SP_I2C_ADDR_LOW 0x50 ++#define MLXSW_SP_I2C_ADDR_HIGH 0x51 ++#define MLXSW_SP_EEPROM_PAGE_LENGTH 256 + + static int mlxsw_sp_query_module_eeprom(struct mlxsw_sp_port *mlxsw_sp_port, + u16 offset, u16 size, void *data, +@@ -2528,12 +2530,25 @@ static int mlxsw_sp_query_module_eeprom( + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char eeprom_tmp[MLXSW_SP_REG_MCIA_EEPROM_SIZE]; + char mcia_pl[MLXSW_REG_MCIA_LEN]; ++ u16 i2c_addr; + int status; + int err; + + size = min_t(u16, size, MLXSW_SP_REG_MCIA_EEPROM_SIZE); ++ ++ if (offset < MLXSW_SP_EEPROM_PAGE_LENGTH && ++ offset + size > MLXSW_SP_EEPROM_PAGE_LENGTH) ++ /* Cross pages read, read until offset 256 in low page */ ++ size = MLXSW_SP_EEPROM_PAGE_LENGTH - offset; ++ ++ i2c_addr = MLXSW_SP_I2C_ADDR_LOW; ++ if (offset >= MLXSW_SP_EEPROM_PAGE_LENGTH) { ++ i2c_addr = MLXSW_SP_I2C_ADDR_HIGH; ++ offset -= MLXSW_SP_EEPROM_PAGE_LENGTH; ++ } ++ + mlxsw_reg_mcia_pack(mcia_pl, mlxsw_sp_port->mapping.module, +- 0, 0, offset, size, MLXSW_SP_QSFP_I2C_ADDR); ++ 0, 0, offset, size, i2c_addr); + + err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(mcia), mcia_pl); + if (err) diff --git a/queue-4.13/mlxsw-spectrum-prevent-mirred-related-crash-on-removal.patch b/queue-4.13/mlxsw-spectrum-prevent-mirred-related-crash-on-removal.patch new file mode 100644 index 00000000000..0ef6755feb1 --- /dev/null +++ b/queue-4.13/mlxsw-spectrum-prevent-mirred-related-crash-on-removal.patch @@ -0,0 +1,98 @@ +From foo@baz Mon Oct 9 09:32:35 CEST 2017 +From: Yuval Mintz +Date: Tue, 12 Sep 2017 08:50:53 +0200 +Subject: mlxsw: spectrum: Prevent mirred-related crash on removal + +From: Yuval Mintz + + +[ Upstream commit 6399ebcccffa12e65bc15eda039d37673264ebce ] + +When removing the offloading of mirred actions under +matchall classifiers, mlxsw would find the destination port +associated with the offloaded action and utilize it for undoing +the configuration. + +Depending on the order by which ports are removed, it's possible that +the destination port would get removed before the source port. +In such a scenario, when actions would be flushed for the source port +mlxsw would perform an illegal dereference as the destination port is +no longer listed. + +Since the only item necessary for undoing the configuration on the +destination side is the port-id and that in turn is already maintained +by mlxsw on the source-port, simply stop trying to access the +destination port and use the port-id directly instead. + +Fixes: 763b4b70af ("mlxsw: spectrum: Add support in matchall mirror TC offloading") +Signed-off-by: Yuval Mintz +Signed-off-by: Jiri Pirko +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 19 +++++++++---------- + 1 file changed, 9 insertions(+), 10 deletions(-) + +--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c ++++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +@@ -572,15 +572,14 @@ static void mlxsw_sp_span_entry_destroy( + } + + static struct mlxsw_sp_span_entry * +-mlxsw_sp_span_entry_find(struct mlxsw_sp_port *port) ++mlxsw_sp_span_entry_find(struct mlxsw_sp *mlxsw_sp, u8 local_port) + { +- struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp; + int i; + + for (i = 0; i < mlxsw_sp->span.entries_count; i++) { + struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span.entries[i]; + +- if (curr->used && curr->local_port == port->local_port) ++ if (curr->used && curr->local_port == local_port) + return curr; + } + return NULL; +@@ -591,7 +590,8 @@ static struct mlxsw_sp_span_entry + { + struct mlxsw_sp_span_entry *span_entry; + +- span_entry = mlxsw_sp_span_entry_find(port); ++ span_entry = mlxsw_sp_span_entry_find(port->mlxsw_sp, ++ port->local_port); + if (span_entry) { + /* Already exists, just take a reference */ + span_entry->ref_count++; +@@ -780,12 +780,13 @@ err_port_bind: + } + + static void mlxsw_sp_span_mirror_remove(struct mlxsw_sp_port *from, +- struct mlxsw_sp_port *to, ++ u8 destination_port, + enum mlxsw_sp_span_type type) + { + struct mlxsw_sp_span_entry *span_entry; + +- span_entry = mlxsw_sp_span_entry_find(to); ++ span_entry = mlxsw_sp_span_entry_find(from->mlxsw_sp, ++ destination_port); + if (!span_entry) { + netdev_err(from->dev, "no span entry found\n"); + return; +@@ -1560,14 +1561,12 @@ static void + mlxsw_sp_port_del_cls_matchall_mirror(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_port_mall_mirror_tc_entry *mirror) + { +- struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + enum mlxsw_sp_span_type span_type; +- struct mlxsw_sp_port *to_port; + +- to_port = mlxsw_sp->ports[mirror->to_local_port]; + span_type = mirror->ingress ? + MLXSW_SP_SPAN_INGRESS : MLXSW_SP_SPAN_EGRESS; +- mlxsw_sp_span_mirror_remove(mlxsw_sp_port, to_port, span_type); ++ mlxsw_sp_span_mirror_remove(mlxsw_sp_port, mirror->to_local_port, ++ span_type); + } + + static int diff --git a/queue-4.13/net-bonding-fix-tlb_dynamic_lb-default-value.patch b/queue-4.13/net-bonding-fix-tlb_dynamic_lb-default-value.patch new file mode 100644 index 00000000000..3000edb377e --- /dev/null +++ b/queue-4.13/net-bonding-fix-tlb_dynamic_lb-default-value.patch @@ -0,0 +1,65 @@ +From foo@baz Mon Oct 9 09:32:35 CEST 2017 +From: Nikolay Aleksandrov +Date: Tue, 12 Sep 2017 15:10:05 +0300 +Subject: net: bonding: fix tlb_dynamic_lb default value + +From: Nikolay Aleksandrov + + +[ Upstream commit f13ad104b4e886a03e75f130daf579ef9bf33dfc ] + +Commit 8b426dc54cf4 ("bonding: remove hardcoded value") changed the +default value for tlb_dynamic_lb which lead to either broken ALB mode +(since tlb_dynamic_lb can be changed only in TLB) or setting TLB mode +with tlb_dynamic_lb equal to 0. +The first issue was recently fixed by setting tlb_dynamic_lb to 1 always +when switching to ALB mode, but the default value is still wrong and +we'll enter TLB mode with tlb_dynamic_lb equal to 0 if the mode is +changed via netlink or sysfs. In order to restore the previous behaviour +and default value simply remove the mode check around the default param +initialization for tlb_dynamic_lb which will always set it to 1 as +before. + +Fixes: 8b426dc54cf4 ("bonding: remove hardcoded value") +Signed-off-by: Nikolay Aleksandrov +Acked-by: Mahesh Bandewar +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/bonding/bond_main.c | 17 +++++++---------- + 1 file changed, 7 insertions(+), 10 deletions(-) + +--- a/drivers/net/bonding/bond_main.c ++++ b/drivers/net/bonding/bond_main.c +@@ -4289,7 +4289,7 @@ static int bond_check_params(struct bond + int bond_mode = BOND_MODE_ROUNDROBIN; + int xmit_hashtype = BOND_XMIT_POLICY_LAYER2; + int lacp_fast = 0; +- int tlb_dynamic_lb = 0; ++ int tlb_dynamic_lb; + + /* Convert string parameters. */ + if (mode) { +@@ -4601,16 +4601,13 @@ static int bond_check_params(struct bond + } + ad_user_port_key = valptr->value; + +- if ((bond_mode == BOND_MODE_TLB) || (bond_mode == BOND_MODE_ALB)) { +- bond_opt_initstr(&newval, "default"); +- valptr = bond_opt_parse(bond_opt_get(BOND_OPT_TLB_DYNAMIC_LB), +- &newval); +- if (!valptr) { +- pr_err("Error: No tlb_dynamic_lb default value"); +- return -EINVAL; +- } +- tlb_dynamic_lb = valptr->value; ++ bond_opt_initstr(&newval, "default"); ++ valptr = bond_opt_parse(bond_opt_get(BOND_OPT_TLB_DYNAMIC_LB), &newval); ++ if (!valptr) { ++ pr_err("Error: No tlb_dynamic_lb default value"); ++ return -EINVAL; + } ++ tlb_dynamic_lb = valptr->value; + + if (lp_interval == 0) { + pr_warn("Warning: ip_interval must be between 1 and %d, so it was reset to %d\n", diff --git a/queue-4.13/net-bonding-fix-transmit-load-balancing-in-balance-alb-mode-if-specified-by-sysfs.patch b/queue-4.13/net-bonding-fix-transmit-load-balancing-in-balance-alb-mode-if-specified-by-sysfs.patch new file mode 100644 index 00000000000..1e6a470fcaf --- /dev/null +++ b/queue-4.13/net-bonding-fix-transmit-load-balancing-in-balance-alb-mode-if-specified-by-sysfs.patch @@ -0,0 +1,66 @@ +From foo@baz Mon Oct 9 09:32:35 CEST 2017 +From: Kosuke Tatsukawa +Date: Wed, 6 Sep 2017 22:47:59 +0000 +Subject: net: bonding: Fix transmit load balancing in balance-alb mode if specified by sysfs + +From: Kosuke Tatsukawa + + +[ Upstream commit c6644d07eff6588b2dedf881279fb0d1c7783970 ] + +Commit cbf5ecb30560 ("net: bonding: Fix transmit load balancing in +balance-alb mode") tried to fix transmit dynamic load balancing in +balance-alb mode, which wasn't working after commit 8b426dc54cf4 +("bonding: remove hardcoded value"). + +It turned out that my previous patch only fixed the case when +balance-alb was specified as bonding module parameter, and not when +balance-alb mode was set using /sys/class/net/*/bonding/mode (the most +common usage). In the latter case, tlb_dynamic_lb was set up according +to the default mode of the bonding interface, which happens to be +balance-rr. + +This additional patch addresses this issue by setting up tlb_dynamic_lb +to 1 if "mode" is set to balance-alb through the sysfs interface. + +I didn't add code to change tlb_balance_lb back to the default value for +other modes, because "mode" is usually set up only once during +initialization, and it's not worthwhile to change the static variable +bonding_defaults in bond_main.c to a global variable just for this +purpose. + +Commit 8b426dc54cf4 also changes the value of tlb_dynamic_lb for +balance-tlb mode if it is set up using the sysfs interface. I didn't +change that behavior, because the value of tlb_balance_lb can be changed +using the sysfs interface for balance-tlb, and I didn't like changing +the default value back and forth for balance-tlb. + +As for balance-alb, /sys/class/net/*/bonding/tlb_balance_lb cannot be +written to. However, I think balance-alb with tlb_dynamic_lb set to 0 +is not an intended usage, so there is little use making it writable at +this moment. + +Fixes: 8b426dc54cf4 ("bonding: remove hardcoded value") +Reported-by: Reinis Rozitis +Signed-off-by: Kosuke Tatsukawa +Cc: stable@vger.kernel.org # v4.12+ +Acked-by: Nikolay Aleksandrov +Acked-by: Mahesh Bandewar +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/bonding/bond_options.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/drivers/net/bonding/bond_options.c ++++ b/drivers/net/bonding/bond_options.c +@@ -754,6 +754,9 @@ static int bond_option_mode_set(struct b + bond->params.miimon); + } + ++ if (newval->value == BOND_MODE_ALB) ++ bond->params.tlb_dynamic_lb = 1; ++ + /* don't cache arp_validate between modes */ + bond->params.arp_validate = BOND_ARP_VALIDATE_NONE; + bond->params.mode = newval->value; diff --git a/queue-4.13/net-change-skb-mac_header-when-generic-xdp-calls-adjust_head.patch b/queue-4.13/net-change-skb-mac_header-when-generic-xdp-calls-adjust_head.patch new file mode 100644 index 00000000000..946d5ebc915 --- /dev/null +++ b/queue-4.13/net-change-skb-mac_header-when-generic-xdp-calls-adjust_head.patch @@ -0,0 +1,35 @@ +From foo@baz Mon Oct 9 09:32:35 CEST 2017 +From: Edward Cree +Date: Tue, 19 Sep 2017 18:45:56 +0100 +Subject: net: change skb->mac_header when Generic XDP calls adjust_head + +From: Edward Cree + + +[ Upstream commit 92dd5452c1be873a1193561f4f691763103d22ac ] + +Since XDP's view of the packet includes the MAC header, moving the start- + of-packet with bpf_xdp_adjust_head needs to also update the offset of the + MAC header (which is relative to skb->head, not to the skb->data that was + changed). +Without this, tcpdump sees packets starting from the old MAC header rather + than the new one, at least in my tests on the loopback device. + +Fixes: b5cdae3291f7 ("net: Generic XDP") +Signed-off-by: Edward Cree +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/dev.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/net/core/dev.c ++++ b/net/core/dev.c +@@ -4408,6 +4408,7 @@ static u32 netif_receive_generic_xdp(str + __skb_pull(skb, off); + else if (off < 0) + __skb_push(skb, -off); ++ skb->mac_header += off; + + switch (act) { + case XDP_TX: diff --git a/queue-4.13/net-dsa-fix-network-device-registration-order.patch b/queue-4.13/net-dsa-fix-network-device-registration-order.patch new file mode 100644 index 00000000000..26044b2fe31 --- /dev/null +++ b/queue-4.13/net-dsa-fix-network-device-registration-order.patch @@ -0,0 +1,75 @@ +From foo@baz Mon Oct 9 09:32:35 CEST 2017 +From: Florian Fainelli +Date: Mon, 25 Sep 2017 15:55:53 -0700 +Subject: net: dsa: Fix network device registration order + +From: Florian Fainelli + + +[ Upstream commit e804441cfe0b60f6c430901946a69c01eac09df1 ] + +We cannot be registering the network device first, then setting its +carrier off and finally connecting it to a PHY, doing that leaves a +window during which the carrier is at best inconsistent, and at worse +the device is not usable without a down/up sequence since the network +device is visible to user space with possibly no PHY device attached. + +Re-order steps so that they make logical sense. This fixes some devices +where the port was not usable after e.g: an unbind then bind of the +driver. + +Fixes: 0071f56e46da ("dsa: Register netdev before phy") +Fixes: 91da11f870f0 ("net: Distributed Switch Architecture protocol support") +Signed-off-by: Florian Fainelli +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/dsa/slave.c | 28 +++++++++++++++++----------- + 1 file changed, 17 insertions(+), 11 deletions(-) + +--- a/net/dsa/slave.c ++++ b/net/dsa/slave.c +@@ -1180,26 +1180,32 @@ int dsa_slave_create(struct dsa_switch * + p->old_duplex = -1; + + ds->ports[port].netdev = slave_dev; +- ret = register_netdev(slave_dev); +- if (ret) { +- netdev_err(master, "error %d registering interface %s\n", +- ret, slave_dev->name); +- ds->ports[port].netdev = NULL; +- free_netdev(slave_dev); +- return ret; +- } + + netif_carrier_off(slave_dev); + + ret = dsa_slave_phy_setup(p, slave_dev); + if (ret) { + netdev_err(master, "error %d setting up slave phy\n", ret); +- unregister_netdev(slave_dev); +- free_netdev(slave_dev); +- return ret; ++ goto out_free; ++ } ++ ++ ret = register_netdev(slave_dev); ++ if (ret) { ++ netdev_err(master, "error %d registering interface %s\n", ++ ret, slave_dev->name); ++ goto out_phy; + } + + return 0; ++ ++out_phy: ++ phy_disconnect(p->phy); ++ if (of_phy_is_fixed_link(p->dp->dn)) ++ of_phy_deregister_fixed_link(p->dp->dn); ++out_free: ++ free_netdev(slave_dev); ++ ds->ports[port].netdev = NULL; ++ return ret; + } + + void dsa_slave_destroy(struct net_device *slave_dev) diff --git a/queue-4.13/net-dsa-mv88e6xxx-allow-dsa-and-cpu-ports-in-multiple-vlans.patch b/queue-4.13/net-dsa-mv88e6xxx-allow-dsa-and-cpu-ports-in-multiple-vlans.patch new file mode 100644 index 00000000000..a0357f8930c --- /dev/null +++ b/queue-4.13/net-dsa-mv88e6xxx-allow-dsa-and-cpu-ports-in-multiple-vlans.patch @@ -0,0 +1,36 @@ +From foo@baz Mon Oct 9 09:32:35 CEST 2017 +From: Andrew Lunn +Date: Mon, 25 Sep 2017 23:32:20 +0200 +Subject: net: dsa: mv88e6xxx: Allow dsa and cpu ports in multiple vlans + +From: Andrew Lunn + + +[ Upstream commit db06ae41945b14feb7f696dcafe8048cc37e8a20 ] + +Ports with the same VLAN must all be in the same bridge. However the +CPU and DSA ports need to be in multiple VLANs spread over multiple +bridges. So exclude them when performing this test. + +Fixes: b2f81d304cee ("net: dsa: add CPU and DSA ports as VLAN members") +Signed-off-by: Andrew Lunn +Reviewed-by: Vivien Didelot +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/dsa/mv88e6xxx/chip.c | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/drivers/net/dsa/mv88e6xxx/chip.c ++++ b/drivers/net/dsa/mv88e6xxx/chip.c +@@ -1184,6 +1184,10 @@ static int mv88e6xxx_port_check_hw_vlan( + }; + int i, err; + ++ /* DSA and CPU ports have to be members of multiple vlans */ ++ if (dsa_is_dsa_port(ds, port) || dsa_is_cpu_port(ds, port)) ++ return 0; ++ + if (!vid_begin) + return -EOPNOTSUPP; + diff --git a/queue-4.13/net-dsa-mv88e6xxx-lock-mutex-when-freeing-irqs.patch b/queue-4.13/net-dsa-mv88e6xxx-lock-mutex-when-freeing-irqs.patch new file mode 100644 index 00000000000..a38dcb5ae59 --- /dev/null +++ b/queue-4.13/net-dsa-mv88e6xxx-lock-mutex-when-freeing-irqs.patch @@ -0,0 +1,35 @@ +From foo@baz Mon Oct 9 09:32:35 CEST 2017 +From: Vivien Didelot +Date: Tue, 26 Sep 2017 14:57:21 -0400 +Subject: net: dsa: mv88e6xxx: lock mutex when freeing IRQs + +From: Vivien Didelot + + +[ Upstream commit b32ca44a88def4bf92626d8777494c6f14638c42 ] + +mv88e6xxx_g2_irq_free locks the registers mutex, but not +mv88e6xxx_g1_irq_free, which results in a stack trace from +assert_reg_lock when unloading the mv88e6xxx module. Fix this. + +Fixes: 3460a5770ce9 ("net: dsa: mv88e6xxx: Mask g1 interrupts and free interrupt") +Signed-off-by: Vivien Didelot +Reviewed-by: Florian Fainelli +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/dsa/mv88e6xxx/chip.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/drivers/net/dsa/mv88e6xxx/chip.c ++++ b/drivers/net/dsa/mv88e6xxx/chip.c +@@ -4019,7 +4019,9 @@ static void mv88e6xxx_remove(struct mdio + if (chip->irq > 0) { + if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_G2_INT)) + mv88e6xxx_g2_irq_free(chip); ++ mutex_lock(&chip->reg_lock); + mv88e6xxx_g1_irq_free(chip); ++ mutex_unlock(&chip->reg_lock); + } + } + diff --git a/queue-4.13/net-emac-fix-napi-poll-list-corruption.patch b/queue-4.13/net-emac-fix-napi-poll-list-corruption.patch new file mode 100644 index 00000000000..d496a5fa9ba --- /dev/null +++ b/queue-4.13/net-emac-fix-napi-poll-list-corruption.patch @@ -0,0 +1,54 @@ +From foo@baz Mon Oct 9 09:32:35 CEST 2017 +From: Christian Lamparter +Date: Tue, 19 Sep 2017 19:35:18 +0200 +Subject: net: emac: Fix napi poll list corruption + +From: Christian Lamparter + + +[ Upstream commit f55956065ec94e3e9371463d693a1029c4cc3007 ] + +This patch is pretty much a carbon copy of +commit 3079c652141f ("caif: Fix napi poll list corruption") +with "caif" replaced by "emac". + +The commit d75b1ade567f ("net: less interrupt masking in NAPI") +breaks emac. + +It is now required that if the entire budget is consumed when poll +returns, the napi poll_list must remain empty. However, like some +other drivers emac tries to do a last-ditch check and if there is +more work it will call napi_reschedule and then immediately process +some of this new work. Should the entire budget be consumed while +processing such new work then we will violate the new caller +contract. + +This patch fixes this by not touching any work when we reschedule +in emac. + +Signed-off-by: Christian Lamparter +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/ibm/emac/mal.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +--- a/drivers/net/ethernet/ibm/emac/mal.c ++++ b/drivers/net/ethernet/ibm/emac/mal.c +@@ -402,7 +402,7 @@ static int mal_poll(struct napi_struct * + unsigned long flags; + + MAL_DBG2(mal, "poll(%d)" NL, budget); +- again: ++ + /* Process TX skbs */ + list_for_each(l, &mal->poll_list) { + struct mal_commac *mc = +@@ -451,7 +451,6 @@ static int mal_poll(struct napi_struct * + spin_lock_irqsave(&mal->lock, flags); + mal_disable_eob_irq(mal); + spin_unlock_irqrestore(&mal->lock, flags); +- goto again; + } + mc->ops->poll_tx(mc->dev); + } diff --git a/queue-4.13/net-ipv6-fix-regression-of-no-rtm_deladdr-sent-after-dad-failure.patch b/queue-4.13/net-ipv6-fix-regression-of-no-rtm_deladdr-sent-after-dad-failure.patch new file mode 100644 index 00000000000..d3186a7e893 --- /dev/null +++ b/queue-4.13/net-ipv6-fix-regression-of-no-rtm_deladdr-sent-after-dad-failure.patch @@ -0,0 +1,45 @@ +From foo@baz Mon Oct 9 09:32:35 CEST 2017 +From: Mike Manning +Date: Mon, 4 Sep 2017 15:52:55 +0100 +Subject: net: ipv6: fix regression of no RTM_DELADDR sent after DAD failure + +From: Mike Manning + + +[ Upstream commit 6819a14ecbe2e089e5c5bb74edecafdde2028a00 ] + +Commit f784ad3d79e5 ("ipv6: do not send RTM_DELADDR for tentative +addresses") incorrectly assumes that no RTM_NEWADDR are sent for +addresses in tentative state, as this does happen for the standard +IPv6 use-case of DAD failure, see the call to ipv6_ifa_notify() in +addconf_dad_stop(). So as a result of this change, no RTM_DELADDR is +sent after DAD failure for a link-local when strict DAD (accept_dad=2) +is configured, or on the next admin down in other cases. The absence +of this notification breaks backwards compatibility and causes problems +after DAD failure if this notification was being relied on. The +solution is to allow RTM_DELADDR to still be sent after DAD failure. + +Fixes: f784ad3d79e5 ("ipv6: do not send RTM_DELADDR for tentative addresses") +Signed-off-by: Mike Manning +Cc: Mahesh Bandewar +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/addrconf.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +--- a/net/ipv6/addrconf.c ++++ b/net/ipv6/addrconf.c +@@ -4982,9 +4982,10 @@ static void inet6_ifa_notify(int event, + + /* Don't send DELADDR notification for TENTATIVE address, + * since NEWADDR notification is sent only after removing +- * TENTATIVE flag. ++ * TENTATIVE flag, if DAD has not failed. + */ +- if (ifa->flags & IFA_F_TENTATIVE && event == RTM_DELADDR) ++ if (ifa->flags & IFA_F_TENTATIVE && !(ifa->flags & IFA_F_DADFAILED) && ++ event == RTM_DELADDR) + return; + + skb = nlmsg_new(inet6_ifaddr_msgsize(), GFP_ATOMIC); diff --git a/queue-4.13/net-mlx5e-ipoib-fix-access-to-invalid-memory-address.patch b/queue-4.13/net-mlx5e-ipoib-fix-access-to-invalid-memory-address.patch new file mode 100644 index 00000000000..ea0a6549bea --- /dev/null +++ b/queue-4.13/net-mlx5e-ipoib-fix-access-to-invalid-memory-address.patch @@ -0,0 +1,42 @@ +From foo@baz Mon Oct 9 09:32:35 CEST 2017 +From: Roi Dayan +Date: Mon, 21 Aug 2017 12:04:50 +0300 +Subject: net/mlx5e: IPoIB, Fix access to invalid memory address + +From: Roi Dayan + + +[ Upstream commit 38e8a5c040d3ec99a8351c688dcdf0f549611565 ] + +When cleaning rdma netdevice we need to save the mdev pointer +because priv is released when we release netdev. + +This bug was found using the kernel address sanitizer (KASAN). +use-after-free in mlx5_rdma_netdev_free+0xe3/0x100 [mlx5_core] + +Fixes: 48935bbb7ae8 ("net/mlx5e: IPoIB, Add netdevice profile skeleton") +Signed-off-by: Roi Dayan +Reviewed-by: Or Gerlitz +Signed-off-by: Saeed Mahameed +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c +@@ -572,12 +572,13 @@ void mlx5_rdma_netdev_free(struct net_de + { + struct mlx5e_priv *priv = mlx5i_epriv(netdev); + const struct mlx5e_profile *profile = priv->profile; ++ struct mlx5_core_dev *mdev = priv->mdev; + + mlx5e_detach_netdev(priv); + profile->cleanup(priv); + destroy_workqueue(priv->wq); + free_netdev(netdev); + +- mlx5e_destroy_mdev_resources(priv->mdev); ++ mlx5e_destroy_mdev_resources(mdev); + } + EXPORT_SYMBOL(mlx5_rdma_netdev_free); diff --git a/queue-4.13/net-phy-fix-mask-value-write-on-gmii2rgmii-converter-speed-register.patch b/queue-4.13/net-phy-fix-mask-value-write-on-gmii2rgmii-converter-speed-register.patch new file mode 100644 index 00000000000..65320ca07a3 --- /dev/null +++ b/queue-4.13/net-phy-fix-mask-value-write-on-gmii2rgmii-converter-speed-register.patch @@ -0,0 +1,39 @@ +From foo@baz Mon Oct 9 09:32:35 CEST 2017 +From: Fahad Kunnathadi +Date: Fri, 15 Sep 2017 12:01:58 +0530 +Subject: net: phy: Fix mask value write on gmii2rgmii converter speed register + +From: Fahad Kunnathadi + + +[ Upstream commit f2654a4781318dc7ab8d6cde66f1fa39eab980a9 ] + +To clear Speed Selection in MDIO control register(0x10), +ie, clear bits 6 and 13 to zero while keeping other bits same. +Before AND operation,The Mask value has to be perform with bitwise NOT +operation (ie, ~ operator) + +This patch clears current speed selection before writing the +new speed settings to gmii2rgmii converter + +Fixes: f411a6160bd4 ("net: phy: Add gmiitorgmii converter support") + +Signed-off-by: Fahad Kunnathadi +Reviewed-by: Andrew Lunn +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/phy/xilinx_gmii2rgmii.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/net/phy/xilinx_gmii2rgmii.c ++++ b/drivers/net/phy/xilinx_gmii2rgmii.c +@@ -44,7 +44,7 @@ static int xgmiitorgmii_read_status(stru + priv->phy_drv->read_status(phydev); + + val = mdiobus_read(phydev->mdio.bus, priv->addr, XILINX_GMII2RGMII_REG); +- val &= XILINX_GMII2RGMII_SPEED_MASK; ++ val &= ~XILINX_GMII2RGMII_SPEED_MASK; + + if (phydev->speed == SPEED_1000) + val |= BMCR_SPEED1000; diff --git a/queue-4.13/net-qcom-emac-specify-the-correct-size-when-mapping-a-dma-buffer.patch b/queue-4.13/net-qcom-emac-specify-the-correct-size-when-mapping-a-dma-buffer.patch new file mode 100644 index 00000000000..29b4a832815 --- /dev/null +++ b/queue-4.13/net-qcom-emac-specify-the-correct-size-when-mapping-a-dma-buffer.patch @@ -0,0 +1,36 @@ +From foo@baz Mon Oct 9 09:32:35 CEST 2017 +From: Timur Tabi +Date: Fri, 22 Sep 2017 15:32:44 -0500 +Subject: net: qcom/emac: specify the correct size when mapping a DMA buffer + +From: Timur Tabi + + +[ Upstream commit a93ad944f4ff9a797abff17c73fc4b1e4a1d9141 ] + +When mapping the RX DMA buffers, the driver was accidentally specifying +zero for the buffer length. Under normal circumstances, SWIOTLB does not +need to allocate a bounce buffer, so the address is just mapped without +checking the size field. This is why the error was not detected earlier. + +Fixes: b9b17debc69d ("net: emac: emac gigabit ethernet controller driver") +Cc: stable@vger.kernel.org +Signed-off-by: Timur Tabi +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/qualcomm/emac/emac-mac.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/drivers/net/ethernet/qualcomm/emac/emac-mac.c ++++ b/drivers/net/ethernet/qualcomm/emac/emac-mac.c +@@ -876,7 +876,8 @@ static void emac_mac_rx_descs_refill(str + + curr_rxbuf->dma_addr = + dma_map_single(adpt->netdev->dev.parent, skb->data, +- curr_rxbuf->length, DMA_FROM_DEVICE); ++ adpt->rxbuf_size, DMA_FROM_DEVICE); ++ + ret = dma_mapping_error(adpt->netdev->dev.parent, + curr_rxbuf->dma_addr); + if (ret) { diff --git a/queue-4.13/net-rtnetlink-fix-info-leak-in-rtm_getstats-call.patch b/queue-4.13/net-rtnetlink-fix-info-leak-in-rtm_getstats-call.patch new file mode 100644 index 00000000000..035293eb1e9 --- /dev/null +++ b/queue-4.13/net-rtnetlink-fix-info-leak-in-rtm_getstats-call.patch @@ -0,0 +1,37 @@ +From foo@baz Mon Oct 9 09:32:35 CEST 2017 +From: Nikolay Aleksandrov +Date: Tue, 3 Oct 2017 13:20:48 +0300 +Subject: net: rtnetlink: fix info leak in RTM_GETSTATS call + +From: Nikolay Aleksandrov + + +[ Upstream commit ce024f42c2e28b6bce4ecc1e891b42f57f753892 ] + +When RTM_GETSTATS was added the fields of its header struct were not all +initialized when returning the result thus leaking 4 bytes of information +to user-space per rtnl_fill_statsinfo call, so initialize them now. Thanks +to Alexander Potapenko for the detailed report and bisection. + +Reported-by: Alexander Potapenko +Fixes: 10c9ead9f3c6 ("rtnetlink: add new RTM_GETSTATS message to dump link stats") +Signed-off-by: Nikolay Aleksandrov +Acked-by: Roopa Prabhu +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/rtnetlink.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/net/core/rtnetlink.c ++++ b/net/core/rtnetlink.c +@@ -3867,6 +3867,9 @@ static int rtnl_fill_statsinfo(struct sk + return -EMSGSIZE; + + ifsm = nlmsg_data(nlh); ++ ifsm->family = PF_UNSPEC; ++ ifsm->pad1 = 0; ++ ifsm->pad2 = 0; + ifsm->ifindex = dev->ifindex; + ifsm->filter_mask = filter_mask; + diff --git a/queue-4.13/net-sched-cls_matchall-fix-crash-when-used-with-classful-qdisc.patch b/queue-4.13/net-sched-cls_matchall-fix-crash-when-used-with-classful-qdisc.patch new file mode 100644 index 00000000000..e1df12bf96d --- /dev/null +++ b/queue-4.13/net-sched-cls_matchall-fix-crash-when-used-with-classful-qdisc.patch @@ -0,0 +1,53 @@ +From foo@baz Mon Oct 9 09:32:35 CEST 2017 +From: Davide Caratti +Date: Sat, 16 Sep 2017 14:02:21 +0200 +Subject: net/sched: cls_matchall: fix crash when used with classful qdisc + +From: Davide Caratti + + +[ Upstream commit 3ff4cbec87da48b0ec1f7b6196607b034de0c680 ] + +this script, edited from Linux Advanced Routing and Traffic Control guide + +tc q a dev en0 root handle 1: htb default a +tc c a dev en0 parent 1: classid 1:1 htb rate 6mbit burst 15k +tc c a dev en0 parent 1:1 classid 1:a htb rate 5mbit ceil 6mbit burst 15k +tc c a dev en0 parent 1:1 classid 1:b htb rate 1mbit ceil 6mbit burst 15k +tc f a dev en0 parent 1:0 prio 1 $clsname $clsargs classid 1:b +ping $address -c1 +tc -s c s dev en0 + +classifies traffic to 1:b or 1:a, depending on whether the packet matches +or not the pattern $clsargs of filter $clsname. However, when $clsname is +'matchall', a systematic crash can be observed in htb_classify(). HTB and +classful qdiscs don't assign initial value to struct tcf_result, but then +they expect it to contain valid values after filters have been run. Thus, +current 'matchall' ignores the TCA_MATCHALL_CLASSID attribute, configured +by user, and makes HTB (and classful qdiscs) dereference random pointers. + +By assigning head->res to *res in mall_classify(), before the actions are +invoked, we fix this crash and enable TCA_MATCHALL_CLASSID functionality, +that had no effect on 'matchall' classifier since its first introduction. + +BugLink: https://bugzilla.redhat.com/show_bug.cgi?id=1460213 +Reported-by: Jiri Benc +Fixes: b87f7936a932 ("net/sched: introduce Match-all classifier") +Signed-off-by: Davide Caratti +Acked-by: Yotam Gigi +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sched/cls_matchall.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/net/sched/cls_matchall.c ++++ b/net/sched/cls_matchall.c +@@ -32,6 +32,7 @@ static int mall_classify(struct sk_buff + if (tc_skip_sw(head->flags)) + return -1; + ++ *res = head->res; + return tcf_exts_exec(skb, &head->exts, res); + } + diff --git a/queue-4.13/net-sched-fix-use-after-free-in-tcf_action_destroy-and-tcf_del_walker.patch b/queue-4.13/net-sched-fix-use-after-free-in-tcf_action_destroy-and-tcf_del_walker.patch new file mode 100644 index 00000000000..4bf8873eda0 --- /dev/null +++ b/queue-4.13/net-sched-fix-use-after-free-in-tcf_action_destroy-and-tcf_del_walker.patch @@ -0,0 +1,60 @@ +From foo@baz Mon Oct 9 09:32:35 CEST 2017 +From: Jiri Pirko +Date: Wed, 13 Sep 2017 17:32:37 +0200 +Subject: net: sched: fix use-after-free in tcf_action_destroy and tcf_del_walker + +From: Jiri Pirko + + +[ Upstream commit 255cd50f207ae8ec7b22663246c833407744e634 ] + +Recent commit d7fb60b9cafb ("net_sched: get rid of tcfa_rcu") removed +freeing in call_rcu, which changed already existing hard-to-hit +race condition into 100% hit: + +[ 598.599825] BUG: unable to handle kernel NULL pointer dereference at 0000000000000030 +[ 598.607782] IP: tcf_action_destroy+0xc0/0x140 + +Or: + +[ 40.858924] BUG: unable to handle kernel NULL pointer dereference at 0000000000000030 +[ 40.862840] IP: tcf_generic_walker+0x534/0x820 + +Fix this by storing the ops and use them directly for module_put call. + +Fixes: a85a970af265 ("net_sched: move tc_action into tcf_common") +Signed-off-by: Jiri Pirko +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sched/act_api.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +--- a/net/sched/act_api.c ++++ b/net/sched/act_api.c +@@ -174,7 +174,7 @@ static int tcf_del_walker(struct tcf_has + hlist_for_each_entry_safe(p, n, head, tcfa_head) { + ret = __tcf_hash_release(p, false, true); + if (ret == ACT_P_DELETED) { +- module_put(p->ops->owner); ++ module_put(ops->owner); + n_i++; + } else if (ret < 0) + goto nla_put_failure; +@@ -506,13 +506,15 @@ EXPORT_SYMBOL(tcf_action_exec); + + int tcf_action_destroy(struct list_head *actions, int bind) + { ++ const struct tc_action_ops *ops; + struct tc_action *a, *tmp; + int ret = 0; + + list_for_each_entry_safe(a, tmp, actions, list) { ++ ops = a->ops; + ret = __tcf_hash_release(a, bind, true); + if (ret == ACT_P_DELETED) +- module_put(a->ops->owner); ++ module_put(ops->owner); + else if (ret < 0) + return ret; + } diff --git a/queue-4.13/net-set-sk_prot_creator-when-cloning-sockets-to-the-right-proto.patch b/queue-4.13/net-set-sk_prot_creator-when-cloning-sockets-to-the-right-proto.patch new file mode 100644 index 00000000000..abc1f09bbc0 --- /dev/null +++ b/queue-4.13/net-set-sk_prot_creator-when-cloning-sockets-to-the-right-proto.patch @@ -0,0 +1,107 @@ +From foo@baz Mon Oct 9 09:32:35 CEST 2017 +From: Christoph Paasch +Date: Tue, 26 Sep 2017 17:38:50 -0700 +Subject: net: Set sk_prot_creator when cloning sockets to the right proto + +From: Christoph Paasch + + +[ Upstream commit 9d538fa60bad4f7b23193c89e843797a1cf71ef3 ] + +sk->sk_prot and sk->sk_prot_creator can differ when the app uses +IPV6_ADDRFORM (transforming an IPv6-socket to an IPv4-one). +Which is why sk_prot_creator is there to make sure that sk_prot_free() +does the kmem_cache_free() on the right kmem_cache slab. + +Now, if such a socket gets transformed back to a listening socket (using +connect() with AF_UNSPEC) we will allocate an IPv4 tcp_sock through +sk_clone_lock() when a new connection comes in. But sk_prot_creator will +still point to the IPv6 kmem_cache (as everything got copied in +sk_clone_lock()). When freeing, we will thus put this +memory back into the IPv6 kmem_cache although it was allocated in the +IPv4 cache. I have seen memory corruption happening because of this. + +With slub-debugging and MEMCG_KMEM enabled this gives the warning + "cache_from_obj: Wrong slab cache. TCPv6 but object is from TCP" + +A C-program to trigger this: + +void main(void) +{ + int fd = socket(AF_INET6, SOCK_STREAM, IPPROTO_TCP); + int new_fd, newest_fd, client_fd; + struct sockaddr_in6 bind_addr; + struct sockaddr_in bind_addr4, client_addr1, client_addr2; + struct sockaddr unsp; + int val; + + memset(&bind_addr, 0, sizeof(bind_addr)); + bind_addr.sin6_family = AF_INET6; + bind_addr.sin6_port = ntohs(42424); + + memset(&client_addr1, 0, sizeof(client_addr1)); + client_addr1.sin_family = AF_INET; + client_addr1.sin_port = ntohs(42424); + client_addr1.sin_addr.s_addr = inet_addr("127.0.0.1"); + + memset(&client_addr2, 0, sizeof(client_addr2)); + client_addr2.sin_family = AF_INET; + client_addr2.sin_port = ntohs(42421); + client_addr2.sin_addr.s_addr = inet_addr("127.0.0.1"); + + memset(&unsp, 0, sizeof(unsp)); + unsp.sa_family = AF_UNSPEC; + + bind(fd, (struct sockaddr *)&bind_addr, sizeof(bind_addr)); + + listen(fd, 5); + + client_fd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); + connect(client_fd, (struct sockaddr *)&client_addr1, sizeof(client_addr1)); + new_fd = accept(fd, NULL, NULL); + close(fd); + + val = AF_INET; + setsockopt(new_fd, SOL_IPV6, IPV6_ADDRFORM, &val, sizeof(val)); + + connect(new_fd, &unsp, sizeof(unsp)); + + memset(&bind_addr4, 0, sizeof(bind_addr4)); + bind_addr4.sin_family = AF_INET; + bind_addr4.sin_port = ntohs(42421); + bind(new_fd, (struct sockaddr *)&bind_addr4, sizeof(bind_addr4)); + + listen(new_fd, 5); + + client_fd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); + connect(client_fd, (struct sockaddr *)&client_addr2, sizeof(client_addr2)); + + newest_fd = accept(new_fd, NULL, NULL); + close(new_fd); + + close(client_fd); + close(new_fd); +} + +As far as I can see, this bug has been there since the beginning of the +git-days. + +Signed-off-by: Christoph Paasch +Reviewed-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/sock.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/net/core/sock.c ++++ b/net/core/sock.c +@@ -1646,6 +1646,8 @@ struct sock *sk_clone_lock(const struct + + sock_copy(newsk, sk); + ++ newsk->sk_prot_creator = sk->sk_prot; ++ + /* SANITY */ + if (likely(newsk->sk_net_refcnt)) + get_net(sock_net(newsk)); diff --git a/queue-4.13/net-stmmac-cocci-spatch-of_table.patch b/queue-4.13/net-stmmac-cocci-spatch-of_table.patch new file mode 100644 index 00000000000..08b64629b47 --- /dev/null +++ b/queue-4.13/net-stmmac-cocci-spatch-of_table.patch @@ -0,0 +1,30 @@ +From foo@baz Mon Oct 9 09:32:35 CEST 2017 +From: Thomas Meyer +Date: Thu, 21 Sep 2017 08:24:27 +0200 +Subject: net: stmmac: Cocci spatch "of_table" + +From: Thomas Meyer + + +[ Upstream commit f0ef1f4f2b772c0a1c8b35a6ae3edf974cc110dd ] + +Make sure (of/i2c/platform)_device_id tables are NULL terminated. +Found by coccinelle spatch "misc/of_table.cocci" + +Signed-off-by: Thomas Meyer +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c ++++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c +@@ -315,6 +315,7 @@ static int stmmac_dt_phy(struct plat_stm + { .compatible = "allwinner,sun8i-h3-emac" }, + { .compatible = "allwinner,sun8i-v3s-emac" }, + { .compatible = "allwinner,sun50i-a64-emac" }, ++ {}, + }; + + /* If phy-handle property is passed from DT, use it as the PHY */ diff --git a/queue-4.13/net_sched-always-reset-qdisc-backlog-in-qdisc_reset.patch b/queue-4.13/net_sched-always-reset-qdisc-backlog-in-qdisc_reset.patch new file mode 100644 index 00000000000..180b5546389 --- /dev/null +++ b/queue-4.13/net_sched-always-reset-qdisc-backlog-in-qdisc_reset.patch @@ -0,0 +1,33 @@ +From foo@baz Mon Oct 9 09:32:35 CEST 2017 +From: Konstantin Khlebnikov +Date: Wed, 20 Sep 2017 15:45:36 +0300 +Subject: net_sched: always reset qdisc backlog in qdisc_reset() + +From: Konstantin Khlebnikov + + +[ Upstream commit c8e1812960eeae42e2183154927028511c4bc566 ] + +SKB stored in qdisc->gso_skb also counted into backlog. + +Some qdiscs don't reset backlog to zero in ->reset(), +for example sfq just dequeue and free all queued skb. + +Signed-off-by: Konstantin Khlebnikov +Fixes: 2ccccf5fb43f ("net_sched: update hierarchical backlog too") +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sched/sch_generic.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/net/sched/sch_generic.c ++++ b/net/sched/sch_generic.c +@@ -681,6 +681,7 @@ void qdisc_reset(struct Qdisc *qdisc) + qdisc->gso_skb = NULL; + } + qdisc->q.qlen = 0; ++ qdisc->qstats.backlog = 0; + } + EXPORT_SYMBOL(qdisc_reset); + diff --git a/queue-4.13/net_sched-gen_estimator-fix-scaling-error-in-bytes-packets-samples.patch b/queue-4.13/net_sched-gen_estimator-fix-scaling-error-in-bytes-packets-samples.patch new file mode 100644 index 00000000000..986db7597d3 --- /dev/null +++ b/queue-4.13/net_sched-gen_estimator-fix-scaling-error-in-bytes-packets-samples.patch @@ -0,0 +1,63 @@ +From foo@baz Mon Oct 9 09:32:35 CEST 2017 +From: Eric Dumazet +Date: Wed, 13 Sep 2017 11:16:45 -0700 +Subject: net_sched: gen_estimator: fix scaling error in bytes/packets samples + +From: Eric Dumazet + + +[ Upstream commit ca558e185972d8ecd308760abf972f5d408bcff0 ] + +Denys reported wrong rate estimations with HTB classes. + +It appears the bug was added in linux-4.10, since my tests +where using intervals of one second only. + +HTB using 4 sec default rate estimators, reported rates +were 4x higher. + +We need to properly scale the bytes/packets samples before +integrating them in EWMA. + +Tested: + echo 1 >/sys/module/sch_htb/parameters/htb_rate_est + + Setup HTB with one class with a rate/cail of 5Gbit + + Generate traffic on this class + + tc -s -d cl sh dev eth0 classid 7002:11 +class htb 7002:11 parent 7002:1 prio 5 quantum 200000 rate 5Gbit ceil +5Gbit linklayer ethernet burst 80000b/1 mpu 0b cburst 80000b/1 mpu 0b +level 0 rate_handle 1 + Sent 1488215421648 bytes 982969243 pkt (dropped 0, overlimits 0 +requeues 0) + rate 5Gbit 412814pps backlog 136260b 2p requeues 0 + TCP pkts/rtx 982969327/45 bytes 1488215557414/68130 + lended: 22732826 borrowed: 0 giants: 0 + tokens: -1684 ctokens: -1684 + +Fixes: 1c0d32fde5bd ("net_sched: gen_estimator: complete rewrite of rate estimators") +Signed-off-by: Eric Dumazet +Reported-by: Denys Fedoryshchenko +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/gen_estimator.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/net/core/gen_estimator.c ++++ b/net/core/gen_estimator.c +@@ -83,10 +83,10 @@ static void est_timer(unsigned long arg) + u64 rate, brate; + + est_fetch_counters(est, &b); +- brate = (b.bytes - est->last_bytes) << (8 - est->ewma_log); ++ brate = (b.bytes - est->last_bytes) << (10 - est->ewma_log - est->intvl_log); + brate -= (est->avbps >> est->ewma_log); + +- rate = (u64)(b.packets - est->last_packets) << (8 - est->ewma_log); ++ rate = (u64)(b.packets - est->last_packets) << (10 - est->ewma_log - est->intvl_log); + rate -= (est->avpps >> est->ewma_log); + + write_seqcount_begin(&est->seq); diff --git a/queue-4.13/netlink-do-not-proceed-if-dump-s-start-errs.patch b/queue-4.13/netlink-do-not-proceed-if-dump-s-start-errs.patch new file mode 100644 index 00000000000..a2a45f915fb --- /dev/null +++ b/queue-4.13/netlink-do-not-proceed-if-dump-s-start-errs.patch @@ -0,0 +1,49 @@ +From foo@baz Mon Oct 9 09:32:35 CEST 2017 +From: "Jason A. Donenfeld" +Date: Thu, 28 Sep 2017 00:41:44 +0200 +Subject: netlink: do not proceed if dump's start() errs + +From: "Jason A. Donenfeld" + + +[ Upstream commit fef0035c0f31322d417d1954bba5ab959bf91183 ] + +Drivers that use the start method for netlink dumping rely on dumpit not +being called if start fails. For example, ila_xlat.c allocates memory +and assigns it to cb->args[0] in its start() function. It might fail to +do that and return -ENOMEM instead. However, even when returning an +error, dumpit will be called, which, in the example above, quickly +dereferences the memory in cb->args[0], which will OOPS the kernel. This +is but one example of how this goes wrong. + +Since start() has always been a function with an int return type, it +therefore makes sense to use it properly, rather than ignoring it. This +patch thus returns early and does not call dumpit() when start() fails. + +Signed-off-by: Jason A. Donenfeld +Cc: Johannes Berg +Reviewed-by: Johannes Berg +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/netlink/af_netlink.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +--- a/net/netlink/af_netlink.c ++++ b/net/netlink/af_netlink.c +@@ -2262,10 +2262,13 @@ int __netlink_dump_start(struct sock *ss + + mutex_unlock(nlk->cb_mutex); + ++ ret = 0; + if (cb->start) +- cb->start(cb); ++ ret = cb->start(cb); ++ ++ if (!ret) ++ ret = netlink_dump(sk); + +- ret = netlink_dump(sk); + sock_put(sk); + + if (ret) diff --git a/queue-4.13/openvswitch-fix-an-error-handling-path-in-ovs_nla_init_match_and_action.patch b/queue-4.13/openvswitch-fix-an-error-handling-path-in-ovs_nla_init_match_and_action.patch new file mode 100644 index 00000000000..f738f0153d2 --- /dev/null +++ b/queue-4.13/openvswitch-fix-an-error-handling-path-in-ovs_nla_init_match_and_action.patch @@ -0,0 +1,34 @@ +From foo@baz Mon Oct 9 09:32:35 CEST 2017 +From: Christophe JAILLET +Date: Mon, 11 Sep 2017 21:56:20 +0200 +Subject: openvswitch: Fix an error handling path in 'ovs_nla_init_match_and_action()' + +From: Christophe JAILLET + + +[ Upstream commit 5829e62ac17a40ab08c1b905565604a4b5fa7af6 ] + +All other error handling paths in this function go through the 'error' +label. This one should do the same. + +Fixes: 9cc9a5cb176c ("datapath: Avoid using stack larger than 1024.") +Signed-off-by: Christophe JAILLET +Acked-by: Pravin B Shelar +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/openvswitch/datapath.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/net/openvswitch/datapath.c ++++ b/net/openvswitch/datapath.c +@@ -1126,7 +1126,8 @@ static int ovs_nla_init_match_and_action + if (!a[OVS_FLOW_ATTR_KEY]) { + OVS_NLERR(log, + "Flow key attribute not present in set flow."); +- return -EINVAL; ++ error = -EINVAL; ++ goto error; + } + + *acts = get_flow_actions(net, a[OVS_FLOW_ATTR_ACTIONS], key, diff --git a/queue-4.13/packet-hold-bind-lock-when-rebinding-to-fanout-hook.patch b/queue-4.13/packet-hold-bind-lock-when-rebinding-to-fanout-hook.patch new file mode 100644 index 00000000000..99cfb2822b8 --- /dev/null +++ b/queue-4.13/packet-hold-bind-lock-when-rebinding-to-fanout-hook.patch @@ -0,0 +1,74 @@ +From foo@baz Mon Oct 9 09:32:35 CEST 2017 +From: Willem de Bruijn +Date: Thu, 14 Sep 2017 17:14:41 -0400 +Subject: packet: hold bind lock when rebinding to fanout hook + +From: Willem de Bruijn + + +[ Upstream commit 008ba2a13f2d04c947adc536d19debb8fe66f110 ] + +Packet socket bind operations must hold the po->bind_lock. This keeps +po->running consistent with whether the socket is actually on a ptype +list to receive packets. + +fanout_add unbinds a socket and its packet_rcv/tpacket_rcv call, then +binds the fanout object to receive through packet_rcv_fanout. + +Make it hold the po->bind_lock when testing po->running and rebinding. +Else, it can race with other rebind operations, such as that in +packet_set_ring from packet_rcv to tpacket_rcv. Concurrent updates +can result in a socket being added to a fanout group twice, causing +use-after-free KASAN bug reports, among others. + +Reported independently by both trinity and syzkaller. +Verified that the syzkaller reproducer passes after this patch. + +Fixes: dc99f600698d ("packet: Add fanout support.") +Reported-by: nixioaming +Signed-off-by: Willem de Bruijn +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/packet/af_packet.c | 16 +++++++++++----- + 1 file changed, 11 insertions(+), 5 deletions(-) + +--- a/net/packet/af_packet.c ++++ b/net/packet/af_packet.c +@@ -1686,10 +1686,6 @@ static int fanout_add(struct sock *sk, u + + mutex_lock(&fanout_mutex); + +- err = -EINVAL; +- if (!po->running) +- goto out; +- + err = -EALREADY; + if (po->fanout) + goto out; +@@ -1751,7 +1747,10 @@ static int fanout_add(struct sock *sk, u + list_add(&match->list, &fanout_list); + } + err = -EINVAL; +- if (match->type == type && ++ ++ spin_lock(&po->bind_lock); ++ if (po->running && ++ match->type == type && + match->prot_hook.type == po->prot_hook.type && + match->prot_hook.dev == po->prot_hook.dev) { + err = -ENOSPC; +@@ -1763,6 +1762,13 @@ static int fanout_add(struct sock *sk, u + err = 0; + } + } ++ spin_unlock(&po->bind_lock); ++ ++ if (err && !refcount_read(&match->sk_ref)) { ++ list_del(&match->list); ++ kfree(match); ++ } ++ + out: + if (err && rollover) { + kfree(rollover); diff --git a/queue-4.13/packet-in-packet_do_bind-test-fanout-with-bind_lock-held.patch b/queue-4.13/packet-in-packet_do_bind-test-fanout-with-bind_lock-held.patch new file mode 100644 index 00000000000..e516b0b081a --- /dev/null +++ b/queue-4.13/packet-in-packet_do_bind-test-fanout-with-bind_lock-held.patch @@ -0,0 +1,53 @@ +From foo@baz Mon Oct 9 09:32:35 CEST 2017 +From: Willem de Bruijn +Date: Tue, 26 Sep 2017 12:19:37 -0400 +Subject: packet: in packet_do_bind, test fanout with bind_lock held + +From: Willem de Bruijn + + +[ Upstream commit 4971613c1639d8e5f102c4e797c3bf8f83a5a69e ] + +Once a socket has po->fanout set, it remains a member of the group +until it is destroyed. The prot_hook must be constant and identical +across sockets in the group. + +If fanout_add races with packet_do_bind between the test of po->fanout +and taking the lock, the bind call may make type or dev inconsistent +with that of the fanout group. + +Hold po->bind_lock when testing po->fanout to avoid this race. + +I had to introduce artificial delay (local_bh_enable) to actually +observe the race. + +Fixes: dc99f600698d ("packet: Add fanout support.") +Signed-off-by: Willem de Bruijn +Reviewed-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/packet/af_packet.c | 8 +++++--- + 1 file changed, 5 insertions(+), 3 deletions(-) + +--- a/net/packet/af_packet.c ++++ b/net/packet/af_packet.c +@@ -3071,13 +3071,15 @@ static int packet_do_bind(struct sock *s + int ret = 0; + bool unlisted = false; + +- if (po->fanout) +- return -EINVAL; +- + lock_sock(sk); + spin_lock(&po->bind_lock); + rcu_read_lock(); + ++ if (po->fanout) { ++ ret = -EINVAL; ++ goto out_unlock; ++ } ++ + if (name) { + dev = dev_get_by_name_rcu(sock_net(sk), name); + if (!dev) { diff --git a/queue-4.13/packet-only-test-po-has_vnet_hdr-once-in-packet_snd.patch b/queue-4.13/packet-only-test-po-has_vnet_hdr-once-in-packet_snd.patch new file mode 100644 index 00000000000..22c6bd393d5 --- /dev/null +++ b/queue-4.13/packet-only-test-po-has_vnet_hdr-once-in-packet_snd.patch @@ -0,0 +1,53 @@ +From foo@baz Mon Oct 9 09:32:35 CEST 2017 +From: Willem de Bruijn +Date: Tue, 26 Sep 2017 12:20:17 -0400 +Subject: packet: only test po->has_vnet_hdr once in packet_snd + +From: Willem de Bruijn + + +[ Upstream commit da7c9561015e93d10fe6aab73e9288e0d09d65a6 ] + +Packet socket option po->has_vnet_hdr can be updated concurrently with +other operations if no ring is attached. + +Do not test the option twice in packet_snd, as the value may change in +between calls. A race on setsockopt disable may cause a packet > mtu +to be sent without having GSO options set. + +Fixes: bfd5f4a3d605 ("packet: Add GSO/csum offload support.") +Signed-off-by: Willem de Bruijn +Reviewed-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/packet/af_packet.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/net/packet/af_packet.c ++++ b/net/packet/af_packet.c +@@ -2842,6 +2842,7 @@ static int packet_snd(struct socket *soc + struct virtio_net_hdr vnet_hdr = { 0 }; + int offset = 0; + struct packet_sock *po = pkt_sk(sk); ++ bool has_vnet_hdr = false; + int hlen, tlen, linear; + int extra_len = 0; + +@@ -2885,6 +2886,7 @@ static int packet_snd(struct socket *soc + err = packet_snd_vnet_parse(msg, &len, &vnet_hdr); + if (err) + goto out_unlock; ++ has_vnet_hdr = true; + } + + if (unlikely(sock_flag(sk, SOCK_NOFCS))) { +@@ -2943,7 +2945,7 @@ static int packet_snd(struct socket *soc + skb->priority = sk->sk_priority; + skb->mark = sockc.mark; + +- if (po->has_vnet_hdr) { ++ if (has_vnet_hdr) { + err = virtio_net_hdr_to_skb(skb, &vnet_hdr, vio_le()); + if (err) + goto out_free; diff --git a/queue-4.13/sctp-potential-read-out-of-bounds-in-sctp_ulpevent_type_enabled.patch b/queue-4.13/sctp-potential-read-out-of-bounds-in-sctp_ulpevent_type_enabled.patch new file mode 100644 index 00000000000..79acc986add --- /dev/null +++ b/queue-4.13/sctp-potential-read-out-of-bounds-in-sctp_ulpevent_type_enabled.patch @@ -0,0 +1,46 @@ +From foo@baz Mon Oct 9 09:32:35 CEST 2017 +From: Dan Carpenter +Date: Thu, 14 Sep 2017 02:00:54 +0300 +Subject: sctp: potential read out of bounds in sctp_ulpevent_type_enabled() + +From: Dan Carpenter + + +[ Upstream commit fa5f7b51fc3080c2b195fa87c7eca7c05e56f673 ] + +This code causes a static checker warning because Smatch doesn't trust +anything that comes from skb->data. I've reviewed this code and I do +think skb->data can be controlled by the user here. + +The sctp_event_subscribe struct has 13 __u8 fields and we want to see +if ours is non-zero. sn_type can be any value in the 0-USHRT_MAX range. +We're subtracting SCTP_SN_TYPE_BASE which is 1 << 15 so we could read +either before the start of the struct or after the end. + +This is a very old bug and it's surprising that it would go undetected +for so long but my theory is that it just doesn't have a big impact so +it would be hard to notice. + +Signed-off-by: Dan Carpenter +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/sctp/ulpevent.h | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +--- a/include/net/sctp/ulpevent.h ++++ b/include/net/sctp/ulpevent.h +@@ -153,8 +153,12 @@ __u16 sctp_ulpevent_get_notification_typ + static inline int sctp_ulpevent_type_enabled(__u16 sn_type, + struct sctp_event_subscribe *mask) + { ++ int offset = sn_type - SCTP_SN_TYPE_BASE; + char *amask = (char *) mask; +- return amask[sn_type - SCTP_SN_TYPE_BASE]; ++ ++ if (offset >= sizeof(struct sctp_event_subscribe)) ++ return 0; ++ return amask[offset]; + } + + /* Given an event subscription, is this event enabled? */ diff --git a/queue-4.13/series b/queue-4.13/series index b518d50e1e8..e6fc21bc0a9 100644 --- a/queue-4.13/series +++ b/queue-4.13/series @@ -1,2 +1,50 @@ imx-media-of-avoid-uninitialized-variable-warning.patch usb-dwc3-ep0-fix-dma-starvation-by-assigning-req-trb-on-ep0.patch +mlxsw-spectrum-fix-eeprom-access-in-case-of-sfp-sfp.patch +net-bonding-fix-transmit-load-balancing-in-balance-alb-mode-if-specified-by-sysfs.patch +openvswitch-fix-an-error-handling-path-in-ovs_nla_init_match_and_action.patch +mlxsw-spectrum-prevent-mirred-related-crash-on-removal.patch +net-bonding-fix-tlb_dynamic_lb-default-value.patch +net_sched-gen_estimator-fix-scaling-error-in-bytes-packets-samples.patch +net-sched-fix-use-after-free-in-tcf_action_destroy-and-tcf_del_walker.patch +sctp-potential-read-out-of-bounds-in-sctp_ulpevent_type_enabled.patch +tcp-update-skb-skb_mstamp-more-carefully.patch +bpf-verifier-reject-bpf_alu64-bpf_end.patch +tcp-fix-data-delivery-rate.patch +udpv6-fix-the-checksum-computation-when-hw-checksum-does-not-apply.patch +ip6_gre-skb_push-ipv6hdr-before-packing-the-header-in-ip6gre_header.patch +net-phy-fix-mask-value-write-on-gmii2rgmii-converter-speed-register.patch +ip6_tunnel-do-not-allow-loading-ip6_tunnel-if-ipv6-is-disabled-in-cmdline.patch +net-sched-cls_matchall-fix-crash-when-used-with-classful-qdisc.patch +8139too-revisit-napi_complete_done-usage.patch +bpf-do-not-disable-enable-bh-in-bpf_map_free_id.patch +tcp-fastopen-fix-on-syn-data-transmit-failure.patch +net-emac-fix-napi-poll-list-corruption.patch +net-ipv6-fix-regression-of-no-rtm_deladdr-sent-after-dad-failure.patch +packet-hold-bind-lock-when-rebinding-to-fanout-hook.patch +bpf-one-perf-event-close-won-t-free-bpf-program-attached-by-another-perf-event.patch +net-change-skb-mac_header-when-generic-xdp-calls-adjust_head.patch +isdn-i4l-fetch-the-ppp_write-buffer-in-one-shot.patch +net_sched-always-reset-qdisc-backlog-in-qdisc_reset.patch +net-stmmac-cocci-spatch-of_table.patch +net-qcom-emac-specify-the-correct-size-when-mapping-a-dma-buffer.patch +vti-fix-use-after-free-in-vti_tunnel_xmit-vti6_tnl_xmit.patch +l2tp-fix-race-condition-in-l2tp_tunnel_delete.patch +tun-bail-out-from-tun_get_user-if-the-skb-is-empty.patch +net-dsa-mv88e6xxx-allow-dsa-and-cpu-ports-in-multiple-vlans.patch +net-dsa-fix-network-device-registration-order.patch +packet-in-packet_do_bind-test-fanout-with-bind_lock-held.patch +packet-only-test-po-has_vnet_hdr-once-in-packet_snd.patch +net-dsa-mv88e6xxx-lock-mutex-when-freeing-irqs.patch +net-set-sk_prot_creator-when-cloning-sockets-to-the-right-proto.patch +net-mlx5e-ipoib-fix-access-to-invalid-memory-address.patch +netlink-do-not-proceed-if-dump-s-start-errs.patch +ip6_gre-ip6gre_tap-device-should-keep-dst.patch +ip6_tunnel-update-mtu-properly-for-arphrd_ether-tunnel-device-in-tx-path.patch +ipv4-early-demux-can-return-an-error-code.patch +udp-perform-source-validation-for-mcast-early-demux.patch +tipc-use-only-positive-error-codes-in-messages.patch +l2tp-fix-l2tp_eth-module-loading.patch +socket-bpf-fix-possible-use-after-free.patch +net-rtnetlink-fix-info-leak-in-rtm_getstats-call.patch +bpf-fix-bpf_tail_call-x64-jit.patch diff --git a/queue-4.13/socket-bpf-fix-possible-use-after-free.patch b/queue-4.13/socket-bpf-fix-possible-use-after-free.patch new file mode 100644 index 00000000000..7053ba72a9a --- /dev/null +++ b/queue-4.13/socket-bpf-fix-possible-use-after-free.patch @@ -0,0 +1,71 @@ +From foo@baz Mon Oct 9 09:32:35 CEST 2017 +From: Eric Dumazet +Date: Mon, 2 Oct 2017 12:20:51 -0700 +Subject: socket, bpf: fix possible use after free + +From: Eric Dumazet + + +[ Upstream commit eefca20eb20c66b06cf5ed09b49b1a7caaa27b7b ] + +Starting from linux-4.4, 3WHS no longer takes the listener lock. + +Since this time, we might hit a use-after-free in sk_filter_charge(), +if the filter we got in the memcpy() of the listener content +just happened to be replaced by a thread changing listener BPF filter. + +To fix this, we need to make sure the filter refcount is not already +zero before incrementing it again. + +Fixes: e994b2f0fb92 ("tcp: do not lock listener to process SYN packets") +Signed-off-by: Eric Dumazet +Acked-by: Alexei Starovoitov +Acked-by: Daniel Borkmann +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/filter.c | 12 ++++++++---- + net/core/sock.c | 5 ++++- + 2 files changed, 12 insertions(+), 5 deletions(-) + +--- a/net/core/filter.c ++++ b/net/core/filter.c +@@ -975,10 +975,14 @@ static bool __sk_filter_charge(struct so + + bool sk_filter_charge(struct sock *sk, struct sk_filter *fp) + { +- bool ret = __sk_filter_charge(sk, fp); +- if (ret) +- refcount_inc(&fp->refcnt); +- return ret; ++ if (!refcount_inc_not_zero(&fp->refcnt)) ++ return false; ++ ++ if (!__sk_filter_charge(sk, fp)) { ++ sk_filter_release(fp); ++ return false; ++ } ++ return true; + } + + static struct bpf_prog *bpf_migrate_filter(struct bpf_prog *fp) +--- a/net/core/sock.c ++++ b/net/core/sock.c +@@ -1675,13 +1675,16 @@ struct sock *sk_clone_lock(const struct + + sock_reset_flag(newsk, SOCK_DONE); + +- filter = rcu_dereference_protected(newsk->sk_filter, 1); ++ rcu_read_lock(); ++ filter = rcu_dereference(sk->sk_filter); + if (filter != NULL) + /* though it's an empty new sock, the charging may fail + * if sysctl_optmem_max was changed between creation of + * original socket and cloning + */ + is_charged = sk_filter_charge(newsk, filter); ++ RCU_INIT_POINTER(newsk->sk_filter, filter); ++ rcu_read_unlock(); + + if (unlikely(!is_charged || xfrm_sk_clone_policy(newsk, sk))) { + /* We need to make sure that we don't uncharge the new diff --git a/queue-4.13/tcp-fastopen-fix-on-syn-data-transmit-failure.patch b/queue-4.13/tcp-fastopen-fix-on-syn-data-transmit-failure.patch new file mode 100644 index 00000000000..59f566b156e --- /dev/null +++ b/queue-4.13/tcp-fastopen-fix-on-syn-data-transmit-failure.patch @@ -0,0 +1,97 @@ +From foo@baz Mon Oct 9 09:32:35 CEST 2017 +From: Eric Dumazet +Date: Tue, 19 Sep 2017 10:05:57 -0700 +Subject: tcp: fastopen: fix on syn-data transmit failure + +From: Eric Dumazet + + +[ Upstream commit b5b7db8d680464b1d631fd016f5e093419f0bfd9 ] + +Our recent change exposed a bug in TCP Fastopen Client that syzkaller +found right away [1] + +When we prepare skb with SYN+DATA, we attempt to transmit it, +and we update socket state as if the transmit was a success. + +In socket RTX queue we have two skbs, one with the SYN alone, +and a second one containing the DATA. + +When (malicious) ACK comes in, we now complain that second one had no +skb_mstamp. + +The proper fix is to make sure that if the transmit failed, we do not +pretend we sent the DATA skb, and make it our send_head. + +When 3WHS completes, we can now send the DATA right away, without having +to wait for a timeout. + +[1] +WARNING: CPU: 0 PID: 100189 at net/ipv4/tcp_input.c:3117 tcp_clean_rtx_queue+0x2057/0x2ab0 net/ipv4/tcp_input.c:3117() + + WARN_ON_ONCE(last_ackt == 0); + +Modules linked in: +CPU: 0 PID: 100189 Comm: syz-executor1 Not tainted +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 + 0000000000000000 ffff8800b35cb1d8 ffffffff81cad00d 0000000000000000 + ffffffff828a4347 ffff88009f86c080 ffffffff8316eb20 0000000000000d7f + ffff8800b35cb220 ffffffff812c33c2 ffff8800baad2440 00000009d46575c0 +Call Trace: + [] __dump_stack + [] dump_stack+0xc1/0x124 + [] warn_slowpath_common+0xe2/0x150 + [] warn_slowpath_null+0x2e/0x40 + [] tcp_clean_rtx_queue+0x2057/0x2ab0 n + [] tcp_ack+0x151d/0x3930 + [] tcp_rcv_state_process+0x1c69/0x4fd0 + [] tcp_v4_do_rcv+0x54f/0x7c0 + [] sk_backlog_rcv + [] __release_sock+0x12b/0x3a0 + [] release_sock+0x5e/0x1c0 + [] inet_wait_for_connect + [] __inet_stream_connect+0x545/0xc50 + [] tcp_sendmsg_fastopen + [] tcp_sendmsg+0x2298/0x35a0 + [] inet_sendmsg+0xe5/0x520 + [] sock_sendmsg_nosec + [] sock_sendmsg+0xcf/0x110 + +Fixes: 8c72c65b426b ("tcp: update skb->skb_mstamp more carefully") +Fixes: 783237e8daf1 ("net-tcp: Fast Open client - sending SYN-data") +Signed-off-by: Eric Dumazet +Reported-by: Dmitry Vyukov +Cc: Neal Cardwell +Cc: Yuchung Cheng +Acked-by: Yuchung Cheng +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp_output.c | 9 +++++++++ + 1 file changed, 9 insertions(+) + +--- a/net/ipv4/tcp_output.c ++++ b/net/ipv4/tcp_output.c +@@ -3420,6 +3420,10 @@ static int tcp_send_syn_data(struct sock + goto done; + } + ++ /* data was not sent, this is our new send_head */ ++ sk->sk_send_head = syn_data; ++ tp->packets_out -= tcp_skb_pcount(syn_data); ++ + fallback: + /* Send a regular SYN with Fast Open cookie request option */ + if (fo->cookie.len > 0) +@@ -3472,6 +3476,11 @@ int tcp_connect(struct sock *sk) + */ + tp->snd_nxt = tp->write_seq; + tp->pushed_seq = tp->write_seq; ++ buff = tcp_send_head(sk); ++ if (unlikely(buff)) { ++ tp->snd_nxt = TCP_SKB_CB(buff)->seq; ++ tp->pushed_seq = TCP_SKB_CB(buff)->seq; ++ } + TCP_INC_STATS(sock_net(sk), TCP_MIB_ACTIVEOPENS); + + /* Timer for repeating the SYN until an answer. */ diff --git a/queue-4.13/tcp-fix-data-delivery-rate.patch b/queue-4.13/tcp-fix-data-delivery-rate.patch new file mode 100644 index 00000000000..098188c393a --- /dev/null +++ b/queue-4.13/tcp-fix-data-delivery-rate.patch @@ -0,0 +1,46 @@ +From foo@baz Mon Oct 9 09:32:35 CEST 2017 +From: Eric Dumazet +Date: Fri, 15 Sep 2017 16:47:42 -0700 +Subject: tcp: fix data delivery rate + +From: Eric Dumazet + + +[ Upstream commit fc22579917eb7e13433448a342f1cb1592920940 ] + +Now skb->mstamp_skb is updated later, we also need to call +tcp_rate_skb_sent() after the update is done. + +Fixes: 8c72c65b426b ("tcp: update skb->skb_mstamp more carefully") +Signed-off-by: Eric Dumazet +Acked-by: Soheil Hassas Yeganeh +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp_output.c | 7 +++---- + 1 file changed, 3 insertions(+), 4 deletions(-) + +--- a/net/ipv4/tcp_output.c ++++ b/net/ipv4/tcp_output.c +@@ -1002,8 +1002,6 @@ static int tcp_transmit_skb(struct sock + if (clone_it) { + TCP_SKB_CB(skb)->tx.in_flight = TCP_SKB_CB(skb)->end_seq + - tp->snd_una; +- tcp_rate_skb_sent(sk, skb); +- + oskb = skb; + if (unlikely(skb_cloned(skb))) + skb = pskb_copy(skb, gfp_mask); +@@ -1128,9 +1126,10 @@ static int tcp_transmit_skb(struct sock + tcp_enter_cwr(sk); + err = net_xmit_eval(err); + } +- if (!err && oskb) ++ if (!err && oskb) { + oskb->skb_mstamp = tp->tcp_mstamp; +- ++ tcp_rate_skb_sent(sk, oskb); ++ } + return err; + } + diff --git a/queue-4.13/tcp-update-skb-skb_mstamp-more-carefully.patch b/queue-4.13/tcp-update-skb-skb_mstamp-more-carefully.patch new file mode 100644 index 00000000000..a4bfe7db61c --- /dev/null +++ b/queue-4.13/tcp-update-skb-skb_mstamp-more-carefully.patch @@ -0,0 +1,143 @@ +From foo@baz Mon Oct 9 09:32:35 CEST 2017 +From: Eric Dumazet +Date: Wed, 13 Sep 2017 20:30:39 -0700 +Subject: tcp: update skb->skb_mstamp more carefully + +From: Eric Dumazet + + +[ Upstream commit 8c72c65b426b47b3c166a8fef0d8927fe5e8a28d ] + +liujian reported a problem in TCP_USER_TIMEOUT processing with a patch +in tcp_probe_timer() : + https://www.spinics.net/lists/netdev/msg454496.html + +After investigations, the root cause of the problem is that we update +skb->skb_mstamp of skbs in write queue, even if the attempt to send a +clone or copy of it failed. One reason being a routing problem. + +This patch prevents this, solving liujian issue. + +It also removes a potential RTT miscalculation, since +__tcp_retransmit_skb() is not OR-ing TCP_SKB_CB(skb)->sacked with +TCPCB_EVER_RETRANS if a failure happens, but skb->skb_mstamp has +been changed. + +A future ACK would then lead to a very small RTT sample and min_rtt +would then be lowered to this too small value. + +Tested: + +# cat user_timeout.pkt +--local_ip=192.168.102.64 + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 `ifconfig tun0 192.168.102.64/16; ip ro add 192.0.2.1 dev tun0` + + +0 < S 0:0(0) win 0 + +0 > S. 0:0(0) ack 1 + + +.1 < . 1:1(0) ack 1 win 65530 + +0 accept(3, ..., ...) = 4 + + +0 setsockopt(4, SOL_TCP, TCP_USER_TIMEOUT, [3000], 4) = 0 + +0 write(4, ..., 24) = 24 + +0 > P. 1:25(24) ack 1 win 29200 + +.1 < . 1:1(0) ack 25 win 65530 + +//change the ipaddress + +1 `ifconfig tun0 192.168.0.10/16` + + +1 write(4, ..., 24) = 24 + +1 write(4, ..., 24) = 24 + +1 write(4, ..., 24) = 24 + +1 write(4, ..., 24) = 24 + + +0 `ifconfig tun0 192.168.102.64/16` + +0 < . 1:2(1) ack 25 win 65530 + +0 `ifconfig tun0 192.168.0.10/16` + + +3 write(4, ..., 24) = -1 + +# ./packetdrill user_timeout.pkt + +Signed-off-by: Eric Dumazet +Reported-by: liujian +Acked-by: Neal Cardwell +Acked-by: Yuchung Cheng +Acked-by: Soheil Hassas Yeganeh +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp_output.c | 19 ++++++++++++------- + 1 file changed, 12 insertions(+), 7 deletions(-) + +--- a/net/ipv4/tcp_output.c ++++ b/net/ipv4/tcp_output.c +@@ -991,6 +991,7 @@ static int tcp_transmit_skb(struct sock + struct tcp_skb_cb *tcb; + struct tcp_out_options opts; + unsigned int tcp_options_size, tcp_header_size; ++ struct sk_buff *oskb = NULL; + struct tcp_md5sig_key *md5; + struct tcphdr *th; + int err; +@@ -998,12 +999,12 @@ static int tcp_transmit_skb(struct sock + BUG_ON(!skb || !tcp_skb_pcount(skb)); + tp = tcp_sk(sk); + +- skb->skb_mstamp = tp->tcp_mstamp; + if (clone_it) { + TCP_SKB_CB(skb)->tx.in_flight = TCP_SKB_CB(skb)->end_seq + - tp->snd_una; + tcp_rate_skb_sent(sk, skb); + ++ oskb = skb; + if (unlikely(skb_cloned(skb))) + skb = pskb_copy(skb, gfp_mask); + else +@@ -1011,6 +1012,7 @@ static int tcp_transmit_skb(struct sock + if (unlikely(!skb)) + return -ENOBUFS; + } ++ skb->skb_mstamp = tp->tcp_mstamp; + + inet = inet_sk(sk); + tcb = TCP_SKB_CB(skb); +@@ -1122,12 +1124,14 @@ static int tcp_transmit_skb(struct sock + + err = icsk->icsk_af_ops->queue_xmit(sk, skb, &inet->cork.fl); + +- if (likely(err <= 0)) +- return err; +- +- tcp_enter_cwr(sk); ++ if (unlikely(err > 0)) { ++ tcp_enter_cwr(sk); ++ err = net_xmit_eval(err); ++ } ++ if (!err && oskb) ++ oskb->skb_mstamp = tp->tcp_mstamp; + +- return net_xmit_eval(err); ++ return err; + } + + /* This routine just queues the buffer for sending. +@@ -2866,10 +2870,11 @@ int __tcp_retransmit_skb(struct sock *sk + skb_headroom(skb) >= 0xFFFF)) { + struct sk_buff *nskb; + +- skb->skb_mstamp = tp->tcp_mstamp; + nskb = __pskb_copy(skb, MAX_TCP_HEADER, GFP_ATOMIC); + err = nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) : + -ENOBUFS; ++ if (!err) ++ skb->skb_mstamp = tp->tcp_mstamp; + } else { + err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC); + } diff --git a/queue-4.13/tipc-use-only-positive-error-codes-in-messages.patch b/queue-4.13/tipc-use-only-positive-error-codes-in-messages.patch new file mode 100644 index 00000000000..aeb408d452b --- /dev/null +++ b/queue-4.13/tipc-use-only-positive-error-codes-in-messages.patch @@ -0,0 +1,38 @@ +From foo@baz Mon Oct 9 09:32:35 CEST 2017 +From: Parthasarathy Bhuvaragan +Date: Fri, 29 Sep 2017 10:02:54 +0200 +Subject: tipc: use only positive error codes in messages + +From: Parthasarathy Bhuvaragan + + +[ Upstream commit aad06212d36cf34859428a0a279e5c14ee5c9e26 ] + +In commit e3a77561e7d32 ("tipc: split up function tipc_msg_eval()"), +we have updated the function tipc_msg_lookup_dest() to set the error +codes to negative values at destination lookup failures. Thus when +the function sets the error code to -TIPC_ERR_NO_NAME, its inserted +into the 4 bit error field of the message header as 0xf instead of +TIPC_ERR_NO_NAME (1). The value 0xf is an unknown error code. + +In this commit, we set only positive error code. + +Fixes: e3a77561e7d32 ("tipc: split up function tipc_msg_eval()") +Signed-off-by: Parthasarathy Bhuvaragan +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/tipc/msg.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/tipc/msg.c ++++ b/net/tipc/msg.c +@@ -551,7 +551,7 @@ bool tipc_msg_lookup_dest(struct net *ne + return false; + if (msg_errcode(msg)) + return false; +- *err = -TIPC_ERR_NO_NAME; ++ *err = TIPC_ERR_NO_NAME; + if (skb_linearize(skb)) + return false; + msg = buf_msg(skb); diff --git a/queue-4.13/tun-bail-out-from-tun_get_user-if-the-skb-is-empty.patch b/queue-4.13/tun-bail-out-from-tun_get_user-if-the-skb-is-empty.patch new file mode 100644 index 00000000000..3c7905fb2d9 --- /dev/null +++ b/queue-4.13/tun-bail-out-from-tun_get_user-if-the-skb-is-empty.patch @@ -0,0 +1,111 @@ +From foo@baz Mon Oct 9 09:32:35 CEST 2017 +From: Alexander Potapenko +Date: Thu, 28 Sep 2017 11:32:37 +0200 +Subject: tun: bail out from tun_get_user() if the skb is empty + +From: Alexander Potapenko + + +[ Upstream commit 2580c4c17aee3ad58e9751012bad278dd074ccae ] + +KMSAN (https://github.com/google/kmsan) reported accessing uninitialized +skb->data[0] in the case the skb is empty (i.e. skb->len is 0): + +================================================ +BUG: KMSAN: use of uninitialized memory in tun_get_user+0x19ba/0x3770 +CPU: 0 PID: 3051 Comm: probe Not tainted 4.13.0+ #3140 +Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 +Call Trace: +... + __msan_warning_32+0x66/0xb0 mm/kmsan/kmsan_instr.c:477 + tun_get_user+0x19ba/0x3770 drivers/net/tun.c:1301 + tun_chr_write_iter+0x19f/0x300 drivers/net/tun.c:1365 + call_write_iter ./include/linux/fs.h:1743 + new_sync_write fs/read_write.c:457 + __vfs_write+0x6c3/0x7f0 fs/read_write.c:470 + vfs_write+0x3e4/0x770 fs/read_write.c:518 + SYSC_write+0x12f/0x2b0 fs/read_write.c:565 + SyS_write+0x55/0x80 fs/read_write.c:557 + do_syscall_64+0x242/0x330 arch/x86/entry/common.c:284 + entry_SYSCALL64_slow_path+0x25/0x25 arch/x86/entry/entry_64.S:245 +... +origin: +... + kmsan_poison_shadow+0x6e/0xc0 mm/kmsan/kmsan.c:211 + slab_alloc_node mm/slub.c:2732 + __kmalloc_node_track_caller+0x351/0x370 mm/slub.c:4351 + __kmalloc_reserve net/core/skbuff.c:138 + __alloc_skb+0x26a/0x810 net/core/skbuff.c:231 + alloc_skb ./include/linux/skbuff.h:903 + alloc_skb_with_frags+0x1d7/0xc80 net/core/skbuff.c:4756 + sock_alloc_send_pskb+0xabf/0xfe0 net/core/sock.c:2037 + tun_alloc_skb drivers/net/tun.c:1144 + tun_get_user+0x9a8/0x3770 drivers/net/tun.c:1274 + tun_chr_write_iter+0x19f/0x300 drivers/net/tun.c:1365 + call_write_iter ./include/linux/fs.h:1743 + new_sync_write fs/read_write.c:457 + __vfs_write+0x6c3/0x7f0 fs/read_write.c:470 + vfs_write+0x3e4/0x770 fs/read_write.c:518 + SYSC_write+0x12f/0x2b0 fs/read_write.c:565 + SyS_write+0x55/0x80 fs/read_write.c:557 + do_syscall_64+0x242/0x330 arch/x86/entry/common.c:284 + return_from_SYSCALL_64+0x0/0x6a arch/x86/entry/entry_64.S:245 +================================================ + +Make sure tun_get_user() doesn't touch skb->data[0] unless there is +actual data. + +C reproducer below: +========================== + // autogenerated by syzkaller (http://github.com/google/syzkaller) + + #define _GNU_SOURCE + + #include + #include + #include + #include + #include + #include + + int main() + { + int sock = socket(PF_INET, SOCK_STREAM, IPPROTO_IP); + int tun_fd = open("/dev/net/tun", O_RDWR); + struct ifreq req; + memset(&req, 0, sizeof(struct ifreq)); + strcpy((char*)&req.ifr_name, "gre0"); + req.ifr_flags = IFF_UP | IFF_MULTICAST; + ioctl(tun_fd, TUNSETIFF, &req); + ioctl(sock, SIOCSIFFLAGS, "gre0"); + write(tun_fd, "hi", 0); + return 0; + } +========================== + +Signed-off-by: Alexander Potapenko +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/tun.c | 8 +++++--- + 1 file changed, 5 insertions(+), 3 deletions(-) + +--- a/drivers/net/tun.c ++++ b/drivers/net/tun.c +@@ -1298,11 +1298,13 @@ static ssize_t tun_get_user(struct tun_s + switch (tun->flags & TUN_TYPE_MASK) { + case IFF_TUN: + if (tun->flags & IFF_NO_PI) { +- switch (skb->data[0] & 0xf0) { +- case 0x40: ++ u8 ip_version = skb->len ? (skb->data[0] >> 4) : 0; ++ ++ switch (ip_version) { ++ case 4: + pi.proto = htons(ETH_P_IP); + break; +- case 0x60: ++ case 6: + pi.proto = htons(ETH_P_IPV6); + break; + default: diff --git a/queue-4.13/udp-perform-source-validation-for-mcast-early-demux.patch b/queue-4.13/udp-perform-source-validation-for-mcast-early-demux.patch new file mode 100644 index 00000000000..cfee690523f --- /dev/null +++ b/queue-4.13/udp-perform-source-validation-for-mcast-early-demux.patch @@ -0,0 +1,191 @@ +From foo@baz Mon Oct 9 09:32:35 CEST 2017 +From: Paolo Abeni +Date: Thu, 28 Sep 2017 15:51:37 +0200 +Subject: udp: perform source validation for mcast early demux + +From: Paolo Abeni + + +[ Upstream commit bc044e8db7962e727a75b591b9851ff2ac5cf846 ] + +The UDP early demux can leverate the rx dst cache even for +multicast unconnected sockets. + +In such scenario the ipv4 source address is validated only on +the first packet in the given flow. After that, when we fetch +the dst entry from the socket rx cache, we stop enforcing +the rp_filter and we even start accepting any kind of martian +addresses. + +Disabling the dst cache for unconnected multicast socket will +cause large performace regression, nearly reducing by half the +max ingress tput. + +Instead we factor out a route helper to completely validate an +skb source address for multicast packets and we call it from +the UDP early demux for mcast packets landing on unconnected +sockets, after successful fetching the related cached dst entry. + +This still gives a measurable, but limited performance +regression: + + rp_filter = 0 rp_filter = 1 +edmux disabled: 1182 Kpps 1127 Kpps +edmux before: 2238 Kpps 2238 Kpps +edmux after: 2037 Kpps 2019 Kpps + +The above figures are on top of current net tree. +Applying the net-next commit 6e617de84e87 ("net: avoid a full +fib lookup when rp_filter is disabled.") the delta with +rp_filter == 0 will decrease even more. + +Fixes: 421b3885bf6d ("udp: ipv4: Add udp early demux") +Signed-off-by: Paolo Abeni +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/route.h | 4 +++- + net/ipv4/route.c | 46 ++++++++++++++++++++++++++-------------------- + net/ipv4/udp.c | 13 ++++++++++++- + 3 files changed, 41 insertions(+), 22 deletions(-) + +--- a/include/net/route.h ++++ b/include/net/route.h +@@ -175,7 +175,9 @@ static inline struct rtable *ip_route_ou + fl4->fl4_gre_key = gre_key; + return ip_route_output_key(net, fl4); + } +- ++int ip_mc_validate_source(struct sk_buff *skb, __be32 daddr, __be32 saddr, ++ u8 tos, struct net_device *dev, ++ struct in_device *in_dev, u32 *itag); + int ip_route_input_noref(struct sk_buff *skb, __be32 dst, __be32 src, + u8 tos, struct net_device *devin); + int ip_route_input_rcu(struct sk_buff *skb, __be32 dst, __be32 src, +--- a/net/ipv4/route.c ++++ b/net/ipv4/route.c +@@ -1520,43 +1520,56 @@ struct rtable *rt_dst_alloc(struct net_d + EXPORT_SYMBOL(rt_dst_alloc); + + /* called in rcu_read_lock() section */ +-static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, +- u8 tos, struct net_device *dev, int our) ++int ip_mc_validate_source(struct sk_buff *skb, __be32 daddr, __be32 saddr, ++ u8 tos, struct net_device *dev, ++ struct in_device *in_dev, u32 *itag) + { +- struct rtable *rth; +- struct in_device *in_dev = __in_dev_get_rcu(dev); +- unsigned int flags = RTCF_MULTICAST; +- u32 itag = 0; + int err; + + /* Primary sanity checks. */ +- + if (!in_dev) + return -EINVAL; + + if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr) || + skb->protocol != htons(ETH_P_IP)) +- goto e_inval; ++ return -EINVAL; + + if (ipv4_is_loopback(saddr) && !IN_DEV_ROUTE_LOCALNET(in_dev)) +- goto e_inval; ++ return -EINVAL; + + if (ipv4_is_zeronet(saddr)) { + if (!ipv4_is_local_multicast(daddr)) +- goto e_inval; ++ return -EINVAL; + } else { + err = fib_validate_source(skb, saddr, 0, tos, 0, dev, +- in_dev, &itag); ++ in_dev, itag); + if (err < 0) +- goto e_err; ++ return err; + } ++ return 0; ++} ++ ++/* called in rcu_read_lock() section */ ++static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, ++ u8 tos, struct net_device *dev, int our) ++{ ++ struct in_device *in_dev = __in_dev_get_rcu(dev); ++ unsigned int flags = RTCF_MULTICAST; ++ struct rtable *rth; ++ u32 itag = 0; ++ int err; ++ ++ err = ip_mc_validate_source(skb, daddr, saddr, tos, dev, in_dev, &itag); ++ if (err) ++ return err; ++ + if (our) + flags |= RTCF_LOCAL; + + rth = rt_dst_alloc(dev_net(dev)->loopback_dev, flags, RTN_MULTICAST, + IN_DEV_CONF_GET(in_dev, NOPOLICY), false, false); + if (!rth) +- goto e_nobufs; ++ return -ENOBUFS; + + #ifdef CONFIG_IP_ROUTE_CLASSID + rth->dst.tclassid = itag; +@@ -1572,13 +1585,6 @@ static int ip_route_input_mc(struct sk_b + + skb_dst_set(skb, &rth->dst); + return 0; +- +-e_nobufs: +- return -ENOBUFS; +-e_inval: +- return -EINVAL; +-e_err: +- return err; + } + + +--- a/net/ipv4/udp.c ++++ b/net/ipv4/udp.c +@@ -2220,6 +2220,7 @@ static struct sock *__udp4_lib_demux_loo + int udp_v4_early_demux(struct sk_buff *skb) + { + struct net *net = dev_net(skb->dev); ++ struct in_device *in_dev = NULL; + const struct iphdr *iph; + const struct udphdr *uh; + struct sock *sk = NULL; +@@ -2236,7 +2237,7 @@ int udp_v4_early_demux(struct sk_buff *s + + if (skb->pkt_type == PACKET_BROADCAST || + skb->pkt_type == PACKET_MULTICAST) { +- struct in_device *in_dev = __in_dev_get_rcu(skb->dev); ++ in_dev = __in_dev_get_rcu(skb->dev); + + if (!in_dev) + return 0; +@@ -2266,11 +2267,21 @@ int udp_v4_early_demux(struct sk_buff *s + if (dst) + dst = dst_check(dst, 0); + if (dst) { ++ u32 itag = 0; ++ + /* set noref for now. + * any place which wants to hold dst has to call + * dst_hold_safe() + */ + skb_dst_set_noref(skb, dst); ++ ++ /* for unconnected multicast sockets we need to validate ++ * the source on each packet ++ */ ++ if (!inet_sk(sk)->inet_daddr && in_dev) ++ return ip_mc_validate_source(skb, iph->daddr, ++ iph->saddr, iph->tos, ++ skb->dev, in_dev, &itag); + } + return 0; + } diff --git a/queue-4.13/udpv6-fix-the-checksum-computation-when-hw-checksum-does-not-apply.patch b/queue-4.13/udpv6-fix-the-checksum-computation-when-hw-checksum-does-not-apply.patch new file mode 100644 index 00000000000..f082b2756c8 --- /dev/null +++ b/queue-4.13/udpv6-fix-the-checksum-computation-when-hw-checksum-does-not-apply.patch @@ -0,0 +1,36 @@ +From foo@baz Mon Oct 9 09:32:35 CEST 2017 +From: Subash Abhinov Kasiviswanathan +Date: Wed, 13 Sep 2017 19:30:51 -0600 +Subject: udpv6: Fix the checksum computation when HW checksum does not apply + +From: Subash Abhinov Kasiviswanathan + + +[ Upstream commit 63ecc3d9436f8012e49dc846d6cb0a85a3433517 ] + +While trying an ESP transport mode encryption for UDPv6 packets of +datagram size 1436 with MTU 1500, checksum error was observed in +the secondary fragment. + +This error occurs due to the UDP payload checksum being missed out +when computing the full checksum for these packets in +udp6_hwcsum_outgoing(). + +Fixes: d39d938c8228 ("ipv6: Introduce udpv6_send_skb()") +Signed-off-by: Subash Abhinov Kasiviswanathan +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/udp.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/net/ipv6/udp.c ++++ b/net/ipv6/udp.c +@@ -1011,6 +1011,7 @@ static void udp6_hwcsum_outgoing(struct + */ + offset = skb_transport_offset(skb); + skb->csum = skb_checksum(skb, offset, skb->len - offset, 0); ++ csum = skb->csum; + + skb->ip_summed = CHECKSUM_NONE; + diff --git a/queue-4.13/vti-fix-use-after-free-in-vti_tunnel_xmit-vti6_tnl_xmit.patch b/queue-4.13/vti-fix-use-after-free-in-vti_tunnel_xmit-vti6_tnl_xmit.patch new file mode 100644 index 00000000000..a6a4249727d --- /dev/null +++ b/queue-4.13/vti-fix-use-after-free-in-vti_tunnel_xmit-vti6_tnl_xmit.patch @@ -0,0 +1,98 @@ +From foo@baz Mon Oct 9 09:32:35 CEST 2017 +From: Alexey Kodanev +Date: Tue, 26 Sep 2017 15:14:29 +0300 +Subject: vti: fix use after free in vti_tunnel_xmit/vti6_tnl_xmit + +From: Alexey Kodanev + + +[ Upstream commit 36f6ee22d2d66046e369757ec6bbe1c482957ba6 ] + +When running LTP IPsec tests, KASan might report: + +BUG: KASAN: use-after-free in vti_tunnel_xmit+0xeee/0xff0 [ip_vti] +Read of size 4 at addr ffff880dc6ad1980 by task swapper/0/0 +... +Call Trace: + + dump_stack+0x63/0x89 + print_address_description+0x7c/0x290 + kasan_report+0x28d/0x370 + ? vti_tunnel_xmit+0xeee/0xff0 [ip_vti] + __asan_report_load4_noabort+0x19/0x20 + vti_tunnel_xmit+0xeee/0xff0 [ip_vti] + ? vti_init_net+0x190/0x190 [ip_vti] + ? save_stack_trace+0x1b/0x20 + ? save_stack+0x46/0xd0 + dev_hard_start_xmit+0x147/0x510 + ? icmp_echo.part.24+0x1f0/0x210 + __dev_queue_xmit+0x1394/0x1c60 +... +Freed by task 0: + save_stack_trace+0x1b/0x20 + save_stack+0x46/0xd0 + kasan_slab_free+0x70/0xc0 + kmem_cache_free+0x81/0x1e0 + kfree_skbmem+0xb1/0xe0 + kfree_skb+0x75/0x170 + kfree_skb_list+0x3e/0x60 + __dev_queue_xmit+0x1298/0x1c60 + dev_queue_xmit+0x10/0x20 + neigh_resolve_output+0x3a8/0x740 + ip_finish_output2+0x5c0/0xe70 + ip_finish_output+0x4ba/0x680 + ip_output+0x1c1/0x3a0 + xfrm_output_resume+0xc65/0x13d0 + xfrm_output+0x1e4/0x380 + xfrm4_output_finish+0x5c/0x70 + +Can be fixed if we get skb->len before dst_output(). + +Fixes: b9959fd3b0fa ("vti: switch to new ip tunnel code") +Fixes: 22e1b23dafa8 ("vti6: Support inter address family tunneling.") +Signed-off-by: Alexey Kodanev +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/ip_vti.c | 3 ++- + net/ipv6/ip6_vti.c | 3 ++- + 2 files changed, 4 insertions(+), 2 deletions(-) + +--- a/net/ipv4/ip_vti.c ++++ b/net/ipv4/ip_vti.c +@@ -168,6 +168,7 @@ static netdev_tx_t vti_xmit(struct sk_bu + struct ip_tunnel_parm *parms = &tunnel->parms; + struct dst_entry *dst = skb_dst(skb); + struct net_device *tdev; /* Device to other host */ ++ int pkt_len = skb->len; + int err; + int mtu; + +@@ -229,7 +230,7 @@ static netdev_tx_t vti_xmit(struct sk_bu + + err = dst_output(tunnel->net, skb->sk, skb); + if (net_xmit_eval(err) == 0) +- err = skb->len; ++ err = pkt_len; + iptunnel_xmit_stats(dev, err); + return NETDEV_TX_OK; + +--- a/net/ipv6/ip6_vti.c ++++ b/net/ipv6/ip6_vti.c +@@ -445,6 +445,7 @@ vti6_xmit(struct sk_buff *skb, struct ne + struct dst_entry *dst = skb_dst(skb); + struct net_device *tdev; + struct xfrm_state *x; ++ int pkt_len = skb->len; + int err = -1; + int mtu; + +@@ -502,7 +503,7 @@ vti6_xmit(struct sk_buff *skb, struct ne + struct pcpu_sw_netstats *tstats = this_cpu_ptr(dev->tstats); + + u64_stats_update_begin(&tstats->syncp); +- tstats->tx_bytes += skb->len; ++ tstats->tx_bytes += pkt_len; + tstats->tx_packets++; + u64_stats_update_end(&tstats->syncp); + } else {