4.13-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Mon, 9 Oct 2017 07:33:30 +0000 (09:33 +0200)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Mon, 9 Oct 2017 07:33:30 +0000 (09:33 +0200)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 9 Oct 2017 07:33:30 +0000 (09:33 +0200)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 9 Oct 2017 07:33:30 +0000 (09:33 +0200)
diff --git a/queue-4.13/8139too-revisit-napi_complete_done-usage.patch b/queue-4.13/8139too-revisit-napi_complete_done-usage.patch

new file mode 100644 (file)

index 0000000..31db445
--- /dev/null
+++ b/queue-4.13/8139too-revisit-napi_complete_done-usage.patch
@@ -0,0 +1,44 @@
+From foo@baz Mon Oct  9 09:32:35 CEST 2017
+From: Eric Dumazet <edumazet@google.com>
+Date: Mon, 18 Sep 2017 13:03:43 -0700
+Subject: 8139too: revisit napi_complete_done() usage
+
+From: Eric Dumazet <edumazet@google.com>
+
+
+[ Upstream commit 129c6cda2de2a8ac44fab096152469999b727faf ]
+
+It seems we have to be more careful in napi_complete_done()
+use. This patch is not a revert, as it seems we can
+avoid bug that Ville reported by moving the napi_complete_done()
+test in the spinlock section.
+
+Many thanks to Ville for detective work and all tests.
+
+Fixes: 617f01211baf ("8139too: use napi_complete_done()")
+Reported-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
+Tested-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
+
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/realtek/8139too.c |    5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/ethernet/realtek/8139too.c
++++ b/drivers/net/ethernet/realtek/8139too.c
+@@ -2135,11 +2135,12 @@ static int rtl8139_poll(struct napi_stru
+       if (likely(RTL_R16(IntrStatus) & RxAckBits))
+               work_done += rtl8139_rx(dev, tp, budget);
+ 
+-      if (work_done < budget && napi_complete_done(napi, work_done)) {
++      if (work_done < budget) {
+               unsigned long flags;
+ 
+               spin_lock_irqsave(&tp->lock, flags);
+-              RTL_W16_F(IntrMask, rtl8139_intr_mask);
++              if (napi_complete_done(napi, work_done))
++                      RTL_W16_F(IntrMask, rtl8139_intr_mask);
+               spin_unlock_irqrestore(&tp->lock, flags);
+       }
+       spin_unlock(&tp->rx_lock);
diff --git a/queue-4.13/bpf-do-not-disable-enable-bh-in-bpf_map_free_id.patch b/queue-4.13/bpf-do-not-disable-enable-bh-in-bpf_map_free_id.patch

new file mode 100644 (file)

index 0000000..4c8f271
--- /dev/null
+++ b/queue-4.13/bpf-do-not-disable-enable-bh-in-bpf_map_free_id.patch
@@ -0,0 +1,95 @@
+From foo@baz Mon Oct  9 09:32:35 CEST 2017
+From: Eric Dumazet <edumazet@google.com>
+Date: Tue, 19 Sep 2017 09:15:59 -0700
+Subject: bpf: do not disable/enable BH in bpf_map_free_id()
+
+From: Eric Dumazet <edumazet@google.com>
+
+
+[ Upstream commit 930651a75bf1ba6893a8b8475270664ebdb6cf4a ]
+
+syzkaller reported following splat [1]
+
+Since hard irq are disabled by the caller, bpf_map_free_id()
+should not try to enable/disable BH.
+
+Another solution would be to change htab_map_delete_elem() to
+defer the free_htab_elem() call after
+raw_spin_unlock_irqrestore(&b->lock, flags), but this might be not
+enough to cover other code paths.
+
+[1]
+WARNING: CPU: 1 PID: 8052 at kernel/softirq.c:161 __local_bh_enable_ip
++0x1e/0x160 kernel/softirq.c:161
+Kernel panic - not syncing: panic_on_warn set ...
+
+CPU: 1 PID: 8052 Comm: syz-executor1 Not tainted 4.13.0-next-20170915+
+#23
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS
+Google 01/01/2011
+Call Trace:
+ __dump_stack lib/dump_stack.c:16 [inline]
+ dump_stack+0x194/0x257 lib/dump_stack.c:52
+ panic+0x1e4/0x417 kernel/panic.c:181
+ __warn+0x1c4/0x1d9 kernel/panic.c:542
+ report_bug+0x211/0x2d0 lib/bug.c:183
+ fixup_bug+0x40/0x90 arch/x86/kernel/traps.c:178
+ do_trap_no_signal arch/x86/kernel/traps.c:212 [inline]
+ do_trap+0x260/0x390 arch/x86/kernel/traps.c:261
+ do_error_trap+0x120/0x390 arch/x86/kernel/traps.c:298
+ do_invalid_op+0x1b/0x20 arch/x86/kernel/traps.c:311
+ invalid_op+0x18/0x20 arch/x86/entry/entry_64.S:905
+RIP: 0010:__local_bh_enable_ip+0x1e/0x160 kernel/softirq.c:161
+RSP: 0018:ffff8801cdcd7748 EFLAGS: 00010046
+RAX: 0000000000000082 RBX: 0000000000000201 RCX: 0000000000000000
+RDX: 1ffffffff0b5933c RSI: 0000000000000201 RDI: ffffffff85ac99e0
+RBP: ffff8801cdcd7758 R08: ffffffff85b87158 R09: 1ffff10039b9aec6
+R10: ffff8801c99f24c0 R11: 0000000000000002 R12: ffffffff817b0b47
+R13: dffffc0000000000 R14: ffff8801cdcd77e8 R15: 0000000000000001
+ __raw_spin_unlock_bh include/linux/spinlock_api_smp.h:176 [inline]
+ _raw_spin_unlock_bh+0x30/0x40 kernel/locking/spinlock.c:207
+ spin_unlock_bh include/linux/spinlock.h:361 [inline]
+ bpf_map_free_id kernel/bpf/syscall.c:197 [inline]
+ __bpf_map_put+0x267/0x320 kernel/bpf/syscall.c:227
+ bpf_map_put+0x1a/0x20 kernel/bpf/syscall.c:235
+ bpf_map_fd_put_ptr+0x15/0x20 kernel/bpf/map_in_map.c:96
+ free_htab_elem+0xc3/0x1b0 kernel/bpf/hashtab.c:658
+ htab_map_delete_elem+0x74d/0x970 kernel/bpf/hashtab.c:1063
+ map_delete_elem kernel/bpf/syscall.c:633 [inline]
+ SYSC_bpf kernel/bpf/syscall.c:1479 [inline]
+ SyS_bpf+0x2188/0x46a0 kernel/bpf/syscall.c:1451
+ entry_SYSCALL_64_fastpath+0x1f/0xbe
+
+Fixes: f3f1c054c288 ("bpf: Introduce bpf_map ID")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: Martin KaFai Lau <kafai@fb.com>
+Acked-by: Martin KaFai Lau <kafai@fb.com>
+Acked-by: Daniel Borkmann <daniel@iogearbox.net>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/bpf/syscall.c |    6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+--- a/kernel/bpf/syscall.c
++++ b/kernel/bpf/syscall.c
+@@ -144,15 +144,17 @@ static int bpf_map_alloc_id(struct bpf_m
+ 
+ static void bpf_map_free_id(struct bpf_map *map, bool do_idr_lock)
+ {
++      unsigned long flags;
++
+       if (do_idr_lock)
+-              spin_lock_bh(&map_idr_lock);
++              spin_lock_irqsave(&map_idr_lock, flags);
+       else
+               __acquire(&map_idr_lock);
+ 
+       idr_remove(&map_idr, map->id);
+ 
+       if (do_idr_lock)
+-              spin_unlock_bh(&map_idr_lock);
++              spin_unlock_irqrestore(&map_idr_lock, flags);
+       else
+               __release(&map_idr_lock);
+ }
diff --git a/queue-4.13/bpf-fix-bpf_tail_call-x64-jit.patch b/queue-4.13/bpf-fix-bpf_tail_call-x64-jit.patch

new file mode 100644 (file)

index 0000000..60d5b4d
--- /dev/null
+++ b/queue-4.13/bpf-fix-bpf_tail_call-x64-jit.patch
@@ -0,0 +1,70 @@
+From foo@baz Mon Oct  9 09:32:35 CEST 2017
+From: Alexei Starovoitov <ast@fb.com>
+Date: Tue, 3 Oct 2017 15:37:20 -0700
+Subject: bpf: fix bpf_tail_call() x64 JIT
+
+From: Alexei Starovoitov <ast@fb.com>
+
+
+[ Upstream commit 90caccdd8cc0215705f18b92771b449b01e2474a ]
+
+- bpf prog_array just like all other types of bpf array accepts 32-bit index.
+  Clarify that in the comment.
+- fix x64 JIT of bpf_tail_call which was incorrectly loading 8 instead of 4 bytes
+- tighten corresponding check in the interpreter to stay consistent
+
+The JIT bug can be triggered after introduction of BPF_F_NUMA_NODE flag
+in commit 96eabe7a40aa in 4.14. Before that the map_flags would stay zero and
+though JIT code is wrong it will check bounds correctly.
+Hence two fixes tags. All other JITs don't have this problem.
+
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Fixes: 96eabe7a40aa ("bpf: Allow selecting numa node during map creation")
+Fixes: b52f00e6a715 ("x86: bpf_jit: implement bpf_tail_call() helper")
+Acked-by: Daniel Borkmann <daniel@iogearbox.net>
+Acked-by: Martin KaFai Lau <kafai@fb.com>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/net/bpf_jit_comp.c |    4 ++--
+ include/uapi/linux/bpf.h    |    2 +-
+ kernel/bpf/core.c           |    2 +-
+ 3 files changed, 4 insertions(+), 4 deletions(-)
+
+--- a/arch/x86/net/bpf_jit_comp.c
++++ b/arch/x86/net/bpf_jit_comp.c
+@@ -282,9 +282,9 @@ static void emit_bpf_tail_call(u8 **ppro
+       /* if (index >= array->map.max_entries)
+        *   goto out;
+        */
+-      EMIT4(0x48, 0x8B, 0x46,                   /* mov rax, qword ptr [rsi + 16] */
++      EMIT2(0x89, 0xD2);                        /* mov edx, edx */
++      EMIT3(0x39, 0x56,                         /* cmp dword ptr [rsi + 16], edx */
+             offsetof(struct bpf_array, map.max_entries));
+-      EMIT3(0x48, 0x39, 0xD0);                  /* cmp rax, rdx */
+ #define OFFSET1 47 /* number of bytes to jump */
+       EMIT2(X86_JBE, OFFSET1);                  /* jbe out */
+       label1 = cnt;
+--- a/include/uapi/linux/bpf.h
++++ b/include/uapi/linux/bpf.h
+@@ -294,7 +294,7 @@ union bpf_attr {
+  *     jump into another BPF program
+  *     @ctx: context pointer passed to next program
+  *     @prog_array_map: pointer to map which type is BPF_MAP_TYPE_PROG_ARRAY
+- *     @index: index inside array that selects specific program to run
++ *     @index: 32-bit index inside array that selects specific program to run
+  *     Return: 0 on success or negative error
+  *
+  * int bpf_clone_redirect(skb, ifindex, flags)
+--- a/kernel/bpf/core.c
++++ b/kernel/bpf/core.c
+@@ -1010,7 +1010,7 @@ select_insn:
+               struct bpf_map *map = (struct bpf_map *) (unsigned long) BPF_R2;
+               struct bpf_array *array = container_of(map, struct bpf_array, map);
+               struct bpf_prog *prog;
+-              u64 index = BPF_R3;
++              u32 index = BPF_R3;
+ 
+               if (unlikely(index >= array->map.max_entries))
+                       goto out;
diff --git a/queue-4.13/bpf-one-perf-event-close-won-t-free-bpf-program-attached-by-another-perf-event.patch b/queue-4.13/bpf-one-perf-event-close-won-t-free-bpf-program-attached-by-another-perf-event.patch

new file mode 100644 (file)

index 0000000..494b4b1
--- /dev/null
+++ b/queue-4.13/bpf-one-perf-event-close-won-t-free-bpf-program-attached-by-another-perf-event.patch
@@ -0,0 +1,64 @@
+From foo@baz Mon Oct  9 09:32:35 CEST 2017
+From: Yonghong Song <yhs@fb.com>
+Date: Mon, 18 Sep 2017 16:38:36 -0700
+Subject: bpf: one perf event close won't free bpf program attached by another perf event
+
+From: Yonghong Song <yhs@fb.com>
+
+
+[ Upstream commit ec9dd352d591f0c90402ec67a317c1ed4fb2e638 ]
+
+This patch fixes a bug exhibited by the following scenario:
+  1. fd1 = perf_event_open with attr.config = ID1
+  2. attach bpf program prog1 to fd1
+  3. fd2 = perf_event_open with attr.config = ID1
+     <this will be successful>
+  4. user program closes fd2 and prog1 is detached from the tracepoint.
+  5. user program with fd1 does not work properly as tracepoint
+     no output any more.
+
+The issue happens at step 4. Multiple perf_event_open can be called
+successfully, but only one bpf prog pointer in the tp_event. In the
+current logic, any fd release for the same tp_event will free
+the tp_event->prog.
+
+The fix is to free tp_event->prog only when the closing fd
+corresponds to the one which registered the program.
+
+Signed-off-by: Yonghong Song <yhs@fb.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/trace_events.h |    1 +
+ kernel/events/core.c         |    3 ++-
+ 2 files changed, 3 insertions(+), 1 deletion(-)
+
+--- a/include/linux/trace_events.h
++++ b/include/linux/trace_events.h
+@@ -277,6 +277,7 @@ struct trace_event_call {
+       int                             perf_refcount;
+       struct hlist_head __percpu      *perf_events;
+       struct bpf_prog                 *prog;
++      struct perf_event               *bpf_prog_owner;
+ 
+       int     (*perf_perm)(struct trace_event_call *,
+                            struct perf_event *);
+--- a/kernel/events/core.c
++++ b/kernel/events/core.c
+@@ -8121,6 +8121,7 @@ static int perf_event_set_bpf_prog(struc
+               }
+       }
+       event->tp_event->prog = prog;
++      event->tp_event->bpf_prog_owner = event;
+ 
+       return 0;
+ }
+@@ -8135,7 +8136,7 @@ static void perf_event_free_bpf_prog(str
+               return;
+ 
+       prog = event->tp_event->prog;
+-      if (prog) {
++      if (prog && event->tp_event->bpf_prog_owner == event) {
+               event->tp_event->prog = NULL;
+               bpf_prog_put(prog);
+       }
diff --git a/queue-4.13/bpf-verifier-reject-bpf_alu64-bpf_end.patch b/queue-4.13/bpf-verifier-reject-bpf_alu64-bpf_end.patch

new file mode 100644 (file)

index 0000000..d458cc5
--- /dev/null
+++ b/queue-4.13/bpf-verifier-reject-bpf_alu64-bpf_end.patch
@@ -0,0 +1,61 @@
+From foo@baz Mon Oct  9 09:32:35 CEST 2017
+From: Edward Cree <ecree@solarflare.com>
+Date: Fri, 15 Sep 2017 14:37:38 +0100
+Subject: bpf/verifier: reject BPF_ALU64|BPF_END
+
+From: Edward Cree <ecree@solarflare.com>
+
+
+[ Upstream commit e67b8a685c7c984e834e3181ef4619cd7025a136 ]
+
+Neither ___bpf_prog_run nor the JITs accept it.
+Also adds a new test case.
+
+Fixes: 17a5267067f3 ("bpf: verifier (add verifier core)")
+Signed-off-by: Edward Cree <ecree@solarflare.com>
+Acked-by: Alexei Starovoitov <ast@kernel.org>
+Acked-by: Daniel Borkmann <daniel@iogearbox.net>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/bpf/verifier.c                       |    3 ++-
+ tools/testing/selftests/bpf/test_verifier.c |   16 ++++++++++++++++
+ 2 files changed, 18 insertions(+), 1 deletion(-)
+
+--- a/kernel/bpf/verifier.c
++++ b/kernel/bpf/verifier.c
+@@ -1978,7 +1978,8 @@ static int check_alu_op(struct bpf_verif
+                       }
+               } else {
+                       if (insn->src_reg != BPF_REG_0 || insn->off != 0 ||
+-                          (insn->imm != 16 && insn->imm != 32 && insn->imm != 64)) {
++                          (insn->imm != 16 && insn->imm != 32 && insn->imm != 64) ||
++                          BPF_CLASS(insn->code) == BPF_ALU64) {
+                               verbose("BPF_END uses reserved fields\n");
+                               return -EINVAL;
+                       }
+--- a/tools/testing/selftests/bpf/test_verifier.c
++++ b/tools/testing/selftests/bpf/test_verifier.c
+@@ -6009,6 +6009,22 @@ static struct bpf_test tests[] = {
+               .result = REJECT,
+               .result_unpriv = REJECT,
+       },
++      {
++              "invalid 64-bit BPF_END",
++              .insns = {
++                      BPF_MOV32_IMM(BPF_REG_0, 0),
++                      {
++                              .code  = BPF_ALU64 | BPF_END | BPF_TO_LE,
++                              .dst_reg = BPF_REG_0,
++                              .src_reg = 0,
++                              .off   = 0,
++                              .imm   = 32,
++                      },
++                      BPF_EXIT_INSN(),
++              },
++              .errstr = "BPF_END uses reserved fields",
++              .result = REJECT,
++      },
+ };
+ 
+ static int probe_filter_length(const struct bpf_insn *fp)
diff --git a/queue-4.13/ip6_gre-ip6gre_tap-device-should-keep-dst.patch b/queue-4.13/ip6_gre-ip6gre_tap-device-should-keep-dst.patch

new file mode 100644 (file)

index 0000000..a8e25b4
--- /dev/null
+++ b/queue-4.13/ip6_gre-ip6gre_tap-device-should-keep-dst.patch
@@ -0,0 +1,32 @@
+From foo@baz Mon Oct  9 09:32:35 CEST 2017
+From: Xin Long <lucien.xin@gmail.com>
+Date: Thu, 28 Sep 2017 13:23:50 +0800
+Subject: ip6_gre: ip6gre_tap device should keep dst
+
+From: Xin Long <lucien.xin@gmail.com>
+
+
+[ Upstream commit 2d40557cc702ed8e5edd9bd422233f86652d932e ]
+
+The patch 'ip_gre: ipgre_tap device should keep dst' fixed
+a issue that ipgre_tap mtu couldn't be updated in tx path.
+
+The same fix is needed for ip6gre_tap as well.
+
+Signed-off-by: Xin Long <lucien.xin@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/ip6_gre.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/net/ipv6/ip6_gre.c
++++ b/net/ipv6/ip6_gre.c
+@@ -1311,6 +1311,7 @@ static void ip6gre_tap_setup(struct net_
+       dev->features |= NETIF_F_NETNS_LOCAL;
+       dev->priv_flags &= ~IFF_TX_SKB_SHARING;
+       dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
++      netif_keep_dst(dev);
+ }
+ 
+ static bool ip6gre_netlink_encap_parms(struct nlattr *data[],
diff --git a/queue-4.13/ip6_gre-skb_push-ipv6hdr-before-packing-the-header-in-ip6gre_header.patch b/queue-4.13/ip6_gre-skb_push-ipv6hdr-before-packing-the-header-in-ip6gre_header.patch

new file mode 100644 (file)

index 0000000..3cd5fff
--- /dev/null
+++ b/queue-4.13/ip6_gre-skb_push-ipv6hdr-before-packing-the-header-in-ip6gre_header.patch
@@ -0,0 +1,76 @@
+From foo@baz Mon Oct  9 09:32:35 CEST 2017
+From: Xin Long <lucien.xin@gmail.com>
+Date: Fri, 15 Sep 2017 12:00:07 +0800
+Subject: ip6_gre: skb_push ipv6hdr before packing the header in ip6gre_header
+
+From: Xin Long <lucien.xin@gmail.com>
+
+
+[ Upstream commit 76cc0d3282d4b933fa144fa41fbc5318e0fdca24 ]
+
+Now in ip6gre_header before packing the ipv6 header, it skb_push t->hlen
+which only includes encap_hlen + tun_hlen. It means greh and inner header
+would be over written by ipv6 stuff and ipv6h might have no chance to set
+up.
+
+Jianlin found this issue when using remote any on ip6_gre, the packets he
+captured on gre dev are truncated:
+
+22:50:26.210866 Out ethertype IPv6 (0x86dd), length 120: truncated-ip6 -\
+8128 bytes missing!(flowlabel 0x92f40, hlim 0, next-header Options (0)  \
+payload length: 8192) ::1:2000:0 > ::1:0:86dd: HBH [trunc] ip-proto-128 \
+8184
+
+It should also skb_push ipv6hdr so that ipv6h points to the right position
+to set ipv6 stuff up.
+
+This patch is to skb_push hlen + sizeof(*ipv6h) and also fix some indents
+in ip6gre_header.
+
+Fixes: c12b395a4664 ("gre: Support GRE over IPv6")
+Reported-by: Jianlin Shi <jishi@redhat.com>
+Signed-off-by: Xin Long <lucien.xin@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/ip6_gre.c |   21 +++++++++++----------
+ 1 file changed, 11 insertions(+), 10 deletions(-)
+
+--- a/net/ipv6/ip6_gre.c
++++ b/net/ipv6/ip6_gre.c
+@@ -940,24 +940,25 @@ done:
+ }
+ 
+ static int ip6gre_header(struct sk_buff *skb, struct net_device *dev,
+-                      unsigned short type,
+-                      const void *daddr, const void *saddr, unsigned int len)
++                       unsigned short type, const void *daddr,
++                       const void *saddr, unsigned int len)
+ {
+       struct ip6_tnl *t = netdev_priv(dev);
+-      struct ipv6hdr *ipv6h = skb_push(skb, t->hlen);
+-      __be16 *p = (__be16 *)(ipv6h+1);
++      struct ipv6hdr *ipv6h;
++      __be16 *p;
+ 
+-      ip6_flow_hdr(ipv6h, 0,
+-                   ip6_make_flowlabel(dev_net(dev), skb,
+-                                      t->fl.u.ip6.flowlabel, true,
+-                                      &t->fl.u.ip6));
++      ipv6h = skb_push(skb, t->hlen + sizeof(*ipv6h));
++      ip6_flow_hdr(ipv6h, 0, ip6_make_flowlabel(dev_net(dev), skb,
++                                                t->fl.u.ip6.flowlabel,
++                                                true, &t->fl.u.ip6));
+       ipv6h->hop_limit = t->parms.hop_limit;
+       ipv6h->nexthdr = NEXTHDR_GRE;
+       ipv6h->saddr = t->parms.laddr;
+       ipv6h->daddr = t->parms.raddr;
+ 
+-      p[0]            = t->parms.o_flags;
+-      p[1]            = htons(type);
++      p = (__be16 *)(ipv6h + 1);
++      p[0] = t->parms.o_flags;
++      p[1] = htons(type);
+ 
+       /*
+        *      Set the source hardware address.
diff --git a/queue-4.13/ip6_tunnel-do-not-allow-loading-ip6_tunnel-if-ipv6-is-disabled-in-cmdline.patch b/queue-4.13/ip6_tunnel-do-not-allow-loading-ip6_tunnel-if-ipv6-is-disabled-in-cmdline.patch

new file mode 100644 (file)

index 0000000..37c0067
--- /dev/null
+++ b/queue-4.13/ip6_tunnel-do-not-allow-loading-ip6_tunnel-if-ipv6-is-disabled-in-cmdline.patch
@@ -0,0 +1,61 @@
+From foo@baz Mon Oct  9 09:32:35 CEST 2017
+From: Xin Long <lucien.xin@gmail.com>
+Date: Fri, 15 Sep 2017 15:58:33 +0800
+Subject: ip6_tunnel: do not allow loading ip6_tunnel if ipv6 is disabled in cmdline
+
+From: Xin Long <lucien.xin@gmail.com>
+
+
+[ Upstream commit 8c22dab03ad072e45060c299c70d02a4f6fc4aab ]
+
+If ipv6 has been disabled from cmdline since kernel started, it makes
+no sense to allow users to create any ip6 tunnel. Otherwise, it could
+some potential problem.
+
+Jianlin found a kernel crash caused by this in ip6_gre when he set
+ipv6.disable=1 in grub:
+
+[  209.588865] Unable to handle kernel paging request for data at address 0x00000080
+[  209.588872] Faulting instruction address: 0xc000000000a3aa6c
+[  209.588879] Oops: Kernel access of bad area, sig: 11 [#1]
+[  209.589062] NIP [c000000000a3aa6c] fib_rules_lookup+0x4c/0x260
+[  209.589071] LR [c000000000b9ad90] fib6_rule_lookup+0x50/0xb0
+[  209.589076] Call Trace:
+[  209.589097] fib6_rule_lookup+0x50/0xb0
+[  209.589106] rt6_lookup+0xc4/0x110
+[  209.589116] ip6gre_tnl_link_config+0x214/0x2f0 [ip6_gre]
+[  209.589125] ip6gre_newlink+0x138/0x3a0 [ip6_gre]
+[  209.589134] rtnl_newlink+0x798/0xb80
+[  209.589142] rtnetlink_rcv_msg+0xec/0x390
+[  209.589151] netlink_rcv_skb+0x138/0x150
+[  209.589159] rtnetlink_rcv+0x48/0x70
+[  209.589169] netlink_unicast+0x538/0x640
+[  209.589175] netlink_sendmsg+0x40c/0x480
+[  209.589184] ___sys_sendmsg+0x384/0x4e0
+[  209.589194] SyS_sendmsg+0xd4/0x140
+[  209.589201] SyS_socketcall+0x3e0/0x4f0
+[  209.589209] system_call+0x38/0xe0
+
+This patch is to return -EOPNOTSUPP in ip6_tunnel_init if ipv6 has been
+disabled from cmdline.
+
+Reported-by: Jianlin Shi <jishi@redhat.com>
+Signed-off-by: Xin Long <lucien.xin@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/ip6_tunnel.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/net/ipv6/ip6_tunnel.c
++++ b/net/ipv6/ip6_tunnel.c
+@@ -2258,6 +2258,9 @@ static int __init ip6_tunnel_init(void)
+ {
+       int  err;
+ 
++      if (!ipv6_mod_enabled())
++              return -EOPNOTSUPP;
++
+       err = register_pernet_device(&ip6_tnl_net_ops);
+       if (err < 0)
+               goto out_pernet;
diff --git a/queue-4.13/ip6_tunnel-update-mtu-properly-for-arphrd_ether-tunnel-device-in-tx-path.patch b/queue-4.13/ip6_tunnel-update-mtu-properly-for-arphrd_ether-tunnel-device-in-tx-path.patch

new file mode 100644 (file)

index 0000000..efcdc30
--- /dev/null
+++ b/queue-4.13/ip6_tunnel-update-mtu-properly-for-arphrd_ether-tunnel-device-in-tx-path.patch
@@ -0,0 +1,49 @@
+From foo@baz Mon Oct  9 09:32:35 CEST 2017
+From: Xin Long <lucien.xin@gmail.com>
+Date: Thu, 28 Sep 2017 13:24:07 +0800
+Subject: ip6_tunnel: update mtu properly for ARPHRD_ETHER tunnel device in tx path
+
+From: Xin Long <lucien.xin@gmail.com>
+
+
+[ Upstream commit d41bb33ba33b8f8debe54ed36be6925eb496e354 ]
+
+Now when updating mtu in tx path, it doesn't consider ARPHRD_ETHER tunnel
+device, like ip6gre_tap tunnel, for which it should also subtract ether
+header to get the correct mtu.
+
+Signed-off-by: Xin Long <lucien.xin@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/ip6_tunnel.c |    5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+--- a/net/ipv6/ip6_tunnel.c
++++ b/net/ipv6/ip6_tunnel.c
+@@ -1043,6 +1043,7 @@ int ip6_tnl_xmit(struct sk_buff *skb, st
+       struct dst_entry *dst = NULL, *ndst = NULL;
+       struct net_device *tdev;
+       int mtu;
++      unsigned int eth_hlen = t->dev->type == ARPHRD_ETHER ? ETH_HLEN : 0;
+       unsigned int psh_hlen = sizeof(struct ipv6hdr) + t->encap_hlen;
+       unsigned int max_headroom = psh_hlen;
+       bool use_cache = false;
+@@ -1124,7 +1125,7 @@ route_lookup:
+                                    t->parms.name);
+               goto tx_err_dst_release;
+       }
+-      mtu = dst_mtu(dst) - psh_hlen - t->tun_hlen;
++      mtu = dst_mtu(dst) - eth_hlen - psh_hlen - t->tun_hlen;
+       if (encap_limit >= 0) {
+               max_headroom += 8;
+               mtu -= 8;
+@@ -1133,7 +1134,7 @@ route_lookup:
+               mtu = IPV6_MIN_MTU;
+       if (skb_dst(skb) && !t->parms.collect_md)
+               skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
+-      if (skb->len - t->tun_hlen > mtu && !skb_is_gso(skb)) {
++      if (skb->len - t->tun_hlen - eth_hlen > mtu && !skb_is_gso(skb)) {
+               *pmtu = mtu;
+               err = -EMSGSIZE;
+               goto tx_err_dst_release;
diff --git a/queue-4.13/ipv4-early-demux-can-return-an-error-code.patch b/queue-4.13/ipv4-early-demux-can-return-an-error-code.patch

new file mode 100644 (file)

index 0000000..70ddd10
--- /dev/null
+++ b/queue-4.13/ipv4-early-demux-can-return-an-error-code.patch
@@ -0,0 +1,212 @@
+From foo@baz Mon Oct  9 09:32:35 CEST 2017
+From: Paolo Abeni <pabeni@redhat.com>
+Date: Thu, 28 Sep 2017 15:51:36 +0200
+Subject: IPv4: early demux can return an error code
+
+From: Paolo Abeni <pabeni@redhat.com>
+
+
+[ Upstream commit 7487449c86c65202b3b725c4524cb48dd65e4e6f ]
+
+Currently no error is emitted, but this infrastructure will
+used by the next patch to allow source address validation
+for mcast sockets.
+Since early demux can do a route lookup and an ipv4 route
+lookup can return an error code this is consistent with the
+current ipv4 route infrastructure.
+
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/protocol.h |    4 ++--
+ include/net/tcp.h      |    2 +-
+ include/net/udp.h      |    2 +-
+ net/ipv4/ip_input.c    |   25 +++++++++++++++----------
+ net/ipv4/tcp_ipv4.c    |    9 +++++----
+ net/ipv4/udp.c         |   11 ++++++-----
+ 6 files changed, 30 insertions(+), 23 deletions(-)
+
+--- a/include/net/protocol.h
++++ b/include/net/protocol.h
+@@ -39,8 +39,8 @@
+ 
+ /* This is used to register protocols. */
+ struct net_protocol {
+-      void                    (*early_demux)(struct sk_buff *skb);
+-      void                    (*early_demux_handler)(struct sk_buff *skb);
++      int                     (*early_demux)(struct sk_buff *skb);
++      int                     (*early_demux_handler)(struct sk_buff *skb);
+       int                     (*handler)(struct sk_buff *skb);
+       void                    (*err_handler)(struct sk_buff *skb, u32 info);
+       unsigned int            no_policy:1,
+--- a/include/net/tcp.h
++++ b/include/net/tcp.h
+@@ -347,7 +347,7 @@ void tcp_v4_err(struct sk_buff *skb, u32
+ 
+ void tcp_shutdown(struct sock *sk, int how);
+ 
+-void tcp_v4_early_demux(struct sk_buff *skb);
++int tcp_v4_early_demux(struct sk_buff *skb);
+ int tcp_v4_rcv(struct sk_buff *skb);
+ 
+ int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw);
+--- a/include/net/udp.h
++++ b/include/net/udp.h
+@@ -259,7 +259,7 @@ static inline struct sk_buff *skb_recv_u
+       return __skb_recv_udp(sk, flags, noblock, &peeked, &off, err);
+ }
+ 
+-void udp_v4_early_demux(struct sk_buff *skb);
++int udp_v4_early_demux(struct sk_buff *skb);
+ bool udp_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst);
+ int udp_get_port(struct sock *sk, unsigned short snum,
+                int (*saddr_cmp)(const struct sock *,
+--- a/net/ipv4/ip_input.c
++++ b/net/ipv4/ip_input.c
+@@ -311,9 +311,10 @@ drop:
+ static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
+ {
+       const struct iphdr *iph = ip_hdr(skb);
+-      struct rtable *rt;
++      int (*edemux)(struct sk_buff *skb);
+       struct net_device *dev = skb->dev;
+-      void (*edemux)(struct sk_buff *skb);
++      struct rtable *rt;
++      int err;
+ 
+       /* if ingress device is enslaved to an L3 master device pass the
+        * skb to its handler for processing
+@@ -331,7 +332,9 @@ static int ip_rcv_finish(struct net *net
+ 
+               ipprot = rcu_dereference(inet_protos[protocol]);
+               if (ipprot && (edemux = READ_ONCE(ipprot->early_demux))) {
+-                      edemux(skb);
++                      err = edemux(skb);
++                      if (unlikely(err))
++                              goto drop_error;
+                       /* must reload iph, skb->head might have changed */
+                       iph = ip_hdr(skb);
+               }
+@@ -342,13 +345,10 @@ static int ip_rcv_finish(struct net *net
+        *      how the packet travels inside Linux networking.
+        */
+       if (!skb_valid_dst(skb)) {
+-              int err = ip_route_input_noref(skb, iph->daddr, iph->saddr,
+-                                             iph->tos, dev);
+-              if (unlikely(err)) {
+-                      if (err == -EXDEV)
+-                              __NET_INC_STATS(net, LINUX_MIB_IPRPFILTER);
+-                      goto drop;
+-              }
++              err = ip_route_input_noref(skb, iph->daddr, iph->saddr,
++                                         iph->tos, dev);
++              if (unlikely(err))
++                      goto drop_error;
+       }
+ 
+ #ifdef CONFIG_IP_ROUTE_CLASSID
+@@ -399,6 +399,11 @@ static int ip_rcv_finish(struct net *net
+ drop:
+       kfree_skb(skb);
+       return NET_RX_DROP;
++
++drop_error:
++      if (err == -EXDEV)
++              __NET_INC_STATS(net, LINUX_MIB_IPRPFILTER);
++      goto drop;
+ }
+ 
+ /*
+--- a/net/ipv4/tcp_ipv4.c
++++ b/net/ipv4/tcp_ipv4.c
+@@ -1504,23 +1504,23 @@ csum_err:
+ }
+ EXPORT_SYMBOL(tcp_v4_do_rcv);
+ 
+-void tcp_v4_early_demux(struct sk_buff *skb)
++int tcp_v4_early_demux(struct sk_buff *skb)
+ {
+       const struct iphdr *iph;
+       const struct tcphdr *th;
+       struct sock *sk;
+ 
+       if (skb->pkt_type != PACKET_HOST)
+-              return;
++              return 0;
+ 
+       if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
+-              return;
++              return 0;
+ 
+       iph = ip_hdr(skb);
+       th = tcp_hdr(skb);
+ 
+       if (th->doff < sizeof(struct tcphdr) / 4)
+-              return;
++              return 0;
+ 
+       sk = __inet_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
+                                      iph->saddr, th->source,
+@@ -1539,6 +1539,7 @@ void tcp_v4_early_demux(struct sk_buff *
+                               skb_dst_set_noref(skb, dst);
+               }
+       }
++      return 0;
+ }
+ 
+ /* Packet is added to VJ-style prequeue for processing in process
+--- a/net/ipv4/udp.c
++++ b/net/ipv4/udp.c
+@@ -2217,7 +2217,7 @@ static struct sock *__udp4_lib_demux_loo
+       return NULL;
+ }
+ 
+-void udp_v4_early_demux(struct sk_buff *skb)
++int udp_v4_early_demux(struct sk_buff *skb)
+ {
+       struct net *net = dev_net(skb->dev);
+       const struct iphdr *iph;
+@@ -2229,7 +2229,7 @@ void udp_v4_early_demux(struct sk_buff *
+ 
+       /* validate the packet */
+       if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct udphdr)))
+-              return;
++              return 0;
+ 
+       iph = ip_hdr(skb);
+       uh = udp_hdr(skb);
+@@ -2239,14 +2239,14 @@ void udp_v4_early_demux(struct sk_buff *
+               struct in_device *in_dev = __in_dev_get_rcu(skb->dev);
+ 
+               if (!in_dev)
+-                      return;
++                      return 0;
+ 
+               /* we are supposed to accept bcast packets */
+               if (skb->pkt_type == PACKET_MULTICAST) {
+                       ours = ip_check_mc_rcu(in_dev, iph->daddr, iph->saddr,
+                                              iph->protocol);
+                       if (!ours)
+-                              return;
++                              return 0;
+               }
+ 
+               sk = __udp4_lib_mcast_demux_lookup(net, uh->dest, iph->daddr,
+@@ -2257,7 +2257,7 @@ void udp_v4_early_demux(struct sk_buff *
+       }
+ 
+       if (!sk || !refcount_inc_not_zero(&sk->sk_refcnt))
+-              return;
++              return 0;
+ 
+       skb->sk = sk;
+       skb->destructor = sock_efree;
+@@ -2272,6 +2272,7 @@ void udp_v4_early_demux(struct sk_buff *
+                */
+               skb_dst_set_noref(skb, dst);
+       }
++      return 0;
+ }
+ 
+ int udp_rcv(struct sk_buff *skb)
diff --git a/queue-4.13/isdn-i4l-fetch-the-ppp_write-buffer-in-one-shot.patch b/queue-4.13/isdn-i4l-fetch-the-ppp_write-buffer-in-one-shot.patch

new file mode 100644 (file)

index 0000000..b7cb88a
--- /dev/null
+++ b/queue-4.13/isdn-i4l-fetch-the-ppp_write-buffer-in-one-shot.patch
@@ -0,0 +1,104 @@
+From foo@baz Mon Oct  9 09:32:35 CEST 2017
+From: Meng Xu <mengxu.gatech@gmail.com>
+Date: Tue, 19 Sep 2017 21:49:55 -0400
+Subject: isdn/i4l: fetch the ppp_write buffer in one shot
+
+From: Meng Xu <mengxu.gatech@gmail.com>
+
+
+[ Upstream commit 02388bf87f72e1d47174cd8f81c34443920eb5a0 ]
+
+In isdn_ppp_write(), the header (i.e., protobuf) of the buffer is
+fetched twice from userspace. The first fetch is used to peek at the
+protocol of the message and reset the huptimer if necessary; while the
+second fetch copies in the whole buffer. However, given that buf resides
+in userspace memory, a user process can race to change its memory content
+across fetches. By doing so, we can either avoid resetting the huptimer
+for any type of packets (by first setting proto to PPP_LCP and later
+change to the actual type) or force resetting the huptimer for LCP
+packets.
+
+This patch changes this double-fetch behavior into two single fetches
+decided by condition (lp->isdn_device < 0 || lp->isdn_channel <0).
+A more detailed discussion can be found at
+https://marc.info/?l=linux-kernel&m=150586376926123&w=2
+
+Signed-off-by: Meng Xu <mengxu.gatech@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/isdn/i4l/isdn_ppp.c |   37 +++++++++++++++++++++++++------------
+ 1 file changed, 25 insertions(+), 12 deletions(-)
+
+--- a/drivers/isdn/i4l/isdn_ppp.c
++++ b/drivers/isdn/i4l/isdn_ppp.c
+@@ -825,7 +825,6 @@ isdn_ppp_write(int min, struct file *fil
+       isdn_net_local *lp;
+       struct ippp_struct *is;
+       int proto;
+-      unsigned char protobuf[4];
+ 
+       is = file->private_data;
+ 
+@@ -839,24 +838,28 @@ isdn_ppp_write(int min, struct file *fil
+       if (!lp)
+               printk(KERN_DEBUG "isdn_ppp_write: lp == NULL\n");
+       else {
+-              /*
+-               * Don't reset huptimer for
+-               * LCP packets. (Echo requests).
+-               */
+-              if (copy_from_user(protobuf, buf, 4))
+-                      return -EFAULT;
+-              proto = PPP_PROTOCOL(protobuf);
+-              if (proto != PPP_LCP)
+-                      lp->huptimer = 0;
++              if (lp->isdn_device < 0 || lp->isdn_channel < 0) {
++                      unsigned char protobuf[4];
++                      /*
++                       * Don't reset huptimer for
++                       * LCP packets. (Echo requests).
++                       */
++                      if (copy_from_user(protobuf, buf, 4))
++                              return -EFAULT;
++
++                      proto = PPP_PROTOCOL(protobuf);
++                      if (proto != PPP_LCP)
++                              lp->huptimer = 0;
+ 
+-              if (lp->isdn_device < 0 || lp->isdn_channel < 0)
+                       return 0;
++              }
+ 
+               if ((dev->drv[lp->isdn_device]->flags & DRV_FLAG_RUNNING) &&
+                   lp->dialstate == 0 &&
+                   (lp->flags & ISDN_NET_CONNECTED)) {
+                       unsigned short hl;
+                       struct sk_buff *skb;
++                      unsigned char *cpy_buf;
+                       /*
+                        * we need to reserve enough space in front of
+                        * sk_buff. old call to dev_alloc_skb only reserved
+@@ -869,11 +872,21 @@ isdn_ppp_write(int min, struct file *fil
+                               return count;
+                       }
+                       skb_reserve(skb, hl);
+-                      if (copy_from_user(skb_put(skb, count), buf, count))
++                      cpy_buf = skb_put(skb, count);
++                      if (copy_from_user(cpy_buf, buf, count))
+                       {
+                               kfree_skb(skb);
+                               return -EFAULT;
+                       }
++
++                      /*
++                       * Don't reset huptimer for
++                       * LCP packets. (Echo requests).
++                       */
++                      proto = PPP_PROTOCOL(cpy_buf);
++                      if (proto != PPP_LCP)
++                              lp->huptimer = 0;
++
+                       if (is->debug & 0x40) {
+                               printk(KERN_DEBUG "ppp xmit: len %d\n", (int) skb->len);
+                               isdn_ppp_frame_log("xmit", skb->data, skb->len, 32, is->unit, lp->ppp_slot);
diff --git a/queue-4.13/l2tp-fix-l2tp_eth-module-loading.patch b/queue-4.13/l2tp-fix-l2tp_eth-module-loading.patch

new file mode 100644 (file)

index 0000000..865eb00
--- /dev/null
+++ b/queue-4.13/l2tp-fix-l2tp_eth-module-loading.patch
@@ -0,0 +1,145 @@
+From foo@baz Mon Oct  9 09:32:35 CEST 2017
+From: Guillaume Nault <g.nault@alphalink.fr>
+Date: Thu, 28 Sep 2017 15:44:38 +0200
+Subject: l2tp: fix l2tp_eth module loading
+
+From: Guillaume Nault <g.nault@alphalink.fr>
+
+
+[ Upstream commit 9f775ead5e570e7e19015b9e4e2f3dd6e71a5935 ]
+
+The l2tp_eth module crashes if its netlink callbacks are run when the
+pernet data aren't initialised.
+
+We should normally register_pernet_device() before the genl callbacks.
+However, the pernet data only maintain a list of l2tpeth interfaces,
+and this list is never used. So let's just drop pernet handling
+instead.
+
+Fixes: d9e31d17ceba ("l2tp: Add L2TP ethernet pseudowire support")
+Signed-off-by: Guillaume Nault <g.nault@alphalink.fr>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/l2tp/l2tp_eth.c |   51 ++-------------------------------------------------
+ 1 file changed, 2 insertions(+), 49 deletions(-)
+
+--- a/net/l2tp/l2tp_eth.c
++++ b/net/l2tp/l2tp_eth.c
+@@ -44,7 +44,6 @@ struct l2tp_eth {
+       struct net_device       *dev;
+       struct sock             *tunnel_sock;
+       struct l2tp_session     *session;
+-      struct list_head        list;
+       atomic_long_t           tx_bytes;
+       atomic_long_t           tx_packets;
+       atomic_long_t           tx_dropped;
+@@ -58,17 +57,6 @@ struct l2tp_eth_sess {
+       struct net_device       *dev;
+ };
+ 
+-/* per-net private data for this module */
+-static unsigned int l2tp_eth_net_id;
+-struct l2tp_eth_net {
+-      struct list_head l2tp_eth_dev_list;
+-      spinlock_t l2tp_eth_lock;
+-};
+-
+-static inline struct l2tp_eth_net *l2tp_eth_pernet(struct net *net)
+-{
+-      return net_generic(net, l2tp_eth_net_id);
+-}
+ 
+ static int l2tp_eth_dev_init(struct net_device *dev)
+ {
+@@ -84,12 +72,6 @@ static int l2tp_eth_dev_init(struct net_
+ 
+ static void l2tp_eth_dev_uninit(struct net_device *dev)
+ {
+-      struct l2tp_eth *priv = netdev_priv(dev);
+-      struct l2tp_eth_net *pn = l2tp_eth_pernet(dev_net(dev));
+-
+-      spin_lock(&pn->l2tp_eth_lock);
+-      list_del_init(&priv->list);
+-      spin_unlock(&pn->l2tp_eth_lock);
+       dev_put(dev);
+ }
+ 
+@@ -272,7 +254,6 @@ static int l2tp_eth_create(struct net *n
+       struct l2tp_eth *priv;
+       struct l2tp_eth_sess *spriv;
+       int rc;
+-      struct l2tp_eth_net *pn;
+ 
+       tunnel = l2tp_tunnel_find(net, tunnel_id);
+       if (!tunnel) {
+@@ -310,7 +291,6 @@ static int l2tp_eth_create(struct net *n
+       priv = netdev_priv(dev);
+       priv->dev = dev;
+       priv->session = session;
+-      INIT_LIST_HEAD(&priv->list);
+ 
+       priv->tunnel_sock = tunnel->sock;
+       session->recv_skb = l2tp_eth_dev_recv;
+@@ -331,10 +311,6 @@ static int l2tp_eth_create(struct net *n
+       strlcpy(session->ifname, dev->name, IFNAMSIZ);
+ 
+       dev_hold(dev);
+-      pn = l2tp_eth_pernet(dev_net(dev));
+-      spin_lock(&pn->l2tp_eth_lock);
+-      list_add(&priv->list, &pn->l2tp_eth_dev_list);
+-      spin_unlock(&pn->l2tp_eth_lock);
+ 
+       return 0;
+ 
+@@ -347,22 +323,6 @@ out:
+       return rc;
+ }
+ 
+-static __net_init int l2tp_eth_init_net(struct net *net)
+-{
+-      struct l2tp_eth_net *pn = net_generic(net, l2tp_eth_net_id);
+-
+-      INIT_LIST_HEAD(&pn->l2tp_eth_dev_list);
+-      spin_lock_init(&pn->l2tp_eth_lock);
+-
+-      return 0;
+-}
+-
+-static struct pernet_operations l2tp_eth_net_ops = {
+-      .init = l2tp_eth_init_net,
+-      .id   = &l2tp_eth_net_id,
+-      .size = sizeof(struct l2tp_eth_net),
+-};
+-
+ 
+ static const struct l2tp_nl_cmd_ops l2tp_eth_nl_cmd_ops = {
+       .session_create = l2tp_eth_create,
+@@ -376,25 +336,18 @@ static int __init l2tp_eth_init(void)
+ 
+       err = l2tp_nl_register_ops(L2TP_PWTYPE_ETH, &l2tp_eth_nl_cmd_ops);
+       if (err)
+-              goto out;
+-
+-      err = register_pernet_device(&l2tp_eth_net_ops);
+-      if (err)
+-              goto out_unreg;
++              goto err;
+ 
+       pr_info("L2TP ethernet pseudowire support (L2TPv3)\n");
+ 
+       return 0;
+ 
+-out_unreg:
+-      l2tp_nl_unregister_ops(L2TP_PWTYPE_ETH);
+-out:
++err:
+       return err;
+ }
+ 
+ static void __exit l2tp_eth_exit(void)
+ {
+-      unregister_pernet_device(&l2tp_eth_net_ops);
+       l2tp_nl_unregister_ops(L2TP_PWTYPE_ETH);
+ }
+ 
diff --git a/queue-4.13/l2tp-fix-race-condition-in-l2tp_tunnel_delete.patch b/queue-4.13/l2tp-fix-race-condition-in-l2tp_tunnel_delete.patch

new file mode 100644 (file)

index 0000000..668b50a
--- /dev/null
+++ b/queue-4.13/l2tp-fix-race-condition-in-l2tp_tunnel_delete.patch
@@ -0,0 +1,85 @@
+From foo@baz Mon Oct  9 09:32:35 CEST 2017
+From: Sabrina Dubroca <sd@queasysnail.net>
+Date: Tue, 26 Sep 2017 16:16:43 +0200
+Subject: l2tp: fix race condition in l2tp_tunnel_delete
+
+From: Sabrina Dubroca <sd@queasysnail.net>
+
+
+[ Upstream commit 62b982eeb4589b2e6d7c01a90590e3a4c2b2ca19 ]
+
+If we try to delete the same tunnel twice, the first delete operation
+does a lookup (l2tp_tunnel_get), finds the tunnel, calls
+l2tp_tunnel_delete, which queues it for deletion by
+l2tp_tunnel_del_work.
+
+The second delete operation also finds the tunnel and calls
+l2tp_tunnel_delete. If the workqueue has already fired and started
+running l2tp_tunnel_del_work, then l2tp_tunnel_delete will queue the
+same tunnel a second time, and try to free the socket again.
+
+Add a dead flag to prevent firing the workqueue twice. Then we can
+remove the check of queue_work's result that was meant to prevent that
+race but doesn't.
+
+Reproducer:
+
+    ip l2tp add tunnel tunnel_id 3000 peer_tunnel_id 4000 local 192.168.0.2 remote 192.168.0.1 encap udp udp_sport 5000 udp_dport 6000
+    ip l2tp add session name l2tp1 tunnel_id 3000 session_id 1000 peer_session_id 2000
+    ip link set l2tp1 up
+    ip l2tp del tunnel tunnel_id 3000
+    ip l2tp del tunnel tunnel_id 3000
+
+Fixes: f8ccac0e4493 ("l2tp: put tunnel socket release on a workqueue")
+Reported-by: Jianlin Shi <jishi@redhat.com>
+Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
+Acked-by: Guillaume Nault <g.nault@alphalink.fr>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/l2tp/l2tp_core.c |   10 ++++------
+ net/l2tp/l2tp_core.h |    5 ++++-
+ 2 files changed, 8 insertions(+), 7 deletions(-)
+
+--- a/net/l2tp/l2tp_core.c
++++ b/net/l2tp/l2tp_core.c
+@@ -1665,14 +1665,12 @@ EXPORT_SYMBOL_GPL(l2tp_tunnel_create);
+ 
+ /* This function is used by the netlink TUNNEL_DELETE command.
+  */
+-int l2tp_tunnel_delete(struct l2tp_tunnel *tunnel)
++void l2tp_tunnel_delete(struct l2tp_tunnel *tunnel)
+ {
+-      l2tp_tunnel_inc_refcount(tunnel);
+-      if (false == queue_work(l2tp_wq, &tunnel->del_work)) {
+-              l2tp_tunnel_dec_refcount(tunnel);
+-              return 1;
++      if (!test_and_set_bit(0, &tunnel->dead)) {
++              l2tp_tunnel_inc_refcount(tunnel);
++              queue_work(l2tp_wq, &tunnel->del_work);
+       }
+-      return 0;
+ }
+ EXPORT_SYMBOL_GPL(l2tp_tunnel_delete);
+ 
+--- a/net/l2tp/l2tp_core.h
++++ b/net/l2tp/l2tp_core.h
+@@ -160,6 +160,9 @@ struct l2tp_tunnel_cfg {
+ 
+ struct l2tp_tunnel {
+       int                     magic;          /* Should be L2TP_TUNNEL_MAGIC */
++
++      unsigned long           dead;
++
+       struct rcu_head rcu;
+       rwlock_t                hlist_lock;     /* protect session_hlist */
+       struct hlist_head       session_hlist[L2TP_HASH_SIZE];
+@@ -248,7 +251,7 @@ int l2tp_tunnel_create(struct net *net,
+                      u32 peer_tunnel_id, struct l2tp_tunnel_cfg *cfg,
+                      struct l2tp_tunnel **tunnelp);
+ void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel);
+-int l2tp_tunnel_delete(struct l2tp_tunnel *tunnel);
++void l2tp_tunnel_delete(struct l2tp_tunnel *tunnel);
+ struct l2tp_session *l2tp_session_create(int priv_size,
+                                        struct l2tp_tunnel *tunnel,
+                                        u32 session_id, u32 peer_session_id,
diff --git a/queue-4.13/mlxsw-spectrum-fix-eeprom-access-in-case-of-sfp-sfp.patch b/queue-4.13/mlxsw-spectrum-fix-eeprom-access-in-case-of-sfp-sfp.patch

new file mode 100644 (file)

index 0000000..141fc67
--- /dev/null
+++ b/queue-4.13/mlxsw-spectrum-fix-eeprom-access-in-case-of-sfp-sfp.patch
@@ -0,0 +1,65 @@
+From foo@baz Mon Oct  9 09:32:35 CEST 2017
+From: Arkadi Sharshevsky <arkadis@mellanox.com>
+Date: Mon, 11 Sep 2017 09:42:26 +0200
+Subject: mlxsw: spectrum: Fix EEPROM access in case of SFP/SFP+
+
+From: Arkadi Sharshevsky <arkadis@mellanox.com>
+
+
+[ Upstream commit 4400081b631af69abc63cea3352680e3d85e0c39 ]
+
+The current code does not handle correctly the access to the upper page
+in case of SFP/SFP+ EEPROM. In that case the offset should be local
+and the I2C address should be changed.
+
+Fixes: 2ea109039cd3 ("mlxsw: spectrum: Add support for access cable info via ethtool")
+Reported-by: Florian Klink <flokli@flokli.de>
+Signed-off-by: Arkadi Sharshevsky <arkadis@mellanox.com>
+Reviewed-by: Ido Schimmel <idosch@mellanox.com>
+Signed-off-by: Jiri Pirko <jiri@mellanox.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlxsw/spectrum.c |   19 +++++++++++++++++--
+ 1 file changed, 17 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
++++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+@@ -2519,7 +2519,9 @@ out:
+       return err;
+ }
+ 
+-#define MLXSW_SP_QSFP_I2C_ADDR 0x50
++#define MLXSW_SP_I2C_ADDR_LOW 0x50
++#define MLXSW_SP_I2C_ADDR_HIGH 0x51
++#define MLXSW_SP_EEPROM_PAGE_LENGTH 256
+ 
+ static int mlxsw_sp_query_module_eeprom(struct mlxsw_sp_port *mlxsw_sp_port,
+                                       u16 offset, u16 size, void *data,
+@@ -2528,12 +2530,25 @@ static int mlxsw_sp_query_module_eeprom(
+       struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+       char eeprom_tmp[MLXSW_SP_REG_MCIA_EEPROM_SIZE];
+       char mcia_pl[MLXSW_REG_MCIA_LEN];
++      u16 i2c_addr;
+       int status;
+       int err;
+ 
+       size = min_t(u16, size, MLXSW_SP_REG_MCIA_EEPROM_SIZE);
++
++      if (offset < MLXSW_SP_EEPROM_PAGE_LENGTH &&
++          offset + size > MLXSW_SP_EEPROM_PAGE_LENGTH)
++              /* Cross pages read, read until offset 256 in low page */
++              size = MLXSW_SP_EEPROM_PAGE_LENGTH - offset;
++
++      i2c_addr = MLXSW_SP_I2C_ADDR_LOW;
++      if (offset >= MLXSW_SP_EEPROM_PAGE_LENGTH) {
++              i2c_addr = MLXSW_SP_I2C_ADDR_HIGH;
++              offset -= MLXSW_SP_EEPROM_PAGE_LENGTH;
++      }
++
+       mlxsw_reg_mcia_pack(mcia_pl, mlxsw_sp_port->mapping.module,
+-                          0, 0, offset, size, MLXSW_SP_QSFP_I2C_ADDR);
++                          0, 0, offset, size, i2c_addr);
+ 
+       err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(mcia), mcia_pl);
+       if (err)
diff --git a/queue-4.13/mlxsw-spectrum-prevent-mirred-related-crash-on-removal.patch b/queue-4.13/mlxsw-spectrum-prevent-mirred-related-crash-on-removal.patch

new file mode 100644 (file)

index 0000000..0ef6755
--- /dev/null
+++ b/queue-4.13/mlxsw-spectrum-prevent-mirred-related-crash-on-removal.patch
@@ -0,0 +1,98 @@
+From foo@baz Mon Oct  9 09:32:35 CEST 2017
+From: Yuval Mintz <yuvalm@mellanox.com>
+Date: Tue, 12 Sep 2017 08:50:53 +0200
+Subject: mlxsw: spectrum: Prevent mirred-related crash on removal
+
+From: Yuval Mintz <yuvalm@mellanox.com>
+
+
+[ Upstream commit 6399ebcccffa12e65bc15eda039d37673264ebce ]
+
+When removing the offloading of mirred actions under
+matchall classifiers, mlxsw would find the destination port
+associated with the offloaded action and utilize it for undoing
+the configuration.
+
+Depending on the order by which ports are removed, it's possible that
+the destination port would get removed before the source port.
+In such a scenario, when actions would be flushed for the source port
+mlxsw would perform an illegal dereference as the destination port is
+no longer listed.
+
+Since the only item necessary for undoing the configuration on the
+destination side is the port-id and that in turn is already maintained
+by mlxsw on the source-port, simply stop trying to access the
+destination port and use the port-id directly instead.
+
+Fixes: 763b4b70af ("mlxsw: spectrum: Add support in matchall mirror TC offloading")
+Signed-off-by: Yuval Mintz <yuvalm@mellanox.com>
+Signed-off-by: Jiri Pirko <jiri@mellanox.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlxsw/spectrum.c |   19 +++++++++----------
+ 1 file changed, 9 insertions(+), 10 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
++++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+@@ -572,15 +572,14 @@ static void mlxsw_sp_span_entry_destroy(
+ }
+ 
+ static struct mlxsw_sp_span_entry *
+-mlxsw_sp_span_entry_find(struct mlxsw_sp_port *port)
++mlxsw_sp_span_entry_find(struct mlxsw_sp *mlxsw_sp, u8 local_port)
+ {
+-      struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp;
+       int i;
+ 
+       for (i = 0; i < mlxsw_sp->span.entries_count; i++) {
+               struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span.entries[i];
+ 
+-              if (curr->used && curr->local_port == port->local_port)
++              if (curr->used && curr->local_port == local_port)
+                       return curr;
+       }
+       return NULL;
+@@ -591,7 +590,8 @@ static struct mlxsw_sp_span_entry
+ {
+       struct mlxsw_sp_span_entry *span_entry;
+ 
+-      span_entry = mlxsw_sp_span_entry_find(port);
++      span_entry = mlxsw_sp_span_entry_find(port->mlxsw_sp,
++                                            port->local_port);
+       if (span_entry) {
+               /* Already exists, just take a reference */
+               span_entry->ref_count++;
+@@ -780,12 +780,13 @@ err_port_bind:
+ }
+ 
+ static void mlxsw_sp_span_mirror_remove(struct mlxsw_sp_port *from,
+-                                      struct mlxsw_sp_port *to,
++                                      u8 destination_port,
+                                       enum mlxsw_sp_span_type type)
+ {
+       struct mlxsw_sp_span_entry *span_entry;
+ 
+-      span_entry = mlxsw_sp_span_entry_find(to);
++      span_entry = mlxsw_sp_span_entry_find(from->mlxsw_sp,
++                                            destination_port);
+       if (!span_entry) {
+               netdev_err(from->dev, "no span entry found\n");
+               return;
+@@ -1560,14 +1561,12 @@ static void
+ mlxsw_sp_port_del_cls_matchall_mirror(struct mlxsw_sp_port *mlxsw_sp_port,
+                                     struct mlxsw_sp_port_mall_mirror_tc_entry *mirror)
+ {
+-      struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+       enum mlxsw_sp_span_type span_type;
+-      struct mlxsw_sp_port *to_port;
+ 
+-      to_port = mlxsw_sp->ports[mirror->to_local_port];
+       span_type = mirror->ingress ?
+                       MLXSW_SP_SPAN_INGRESS : MLXSW_SP_SPAN_EGRESS;
+-      mlxsw_sp_span_mirror_remove(mlxsw_sp_port, to_port, span_type);
++      mlxsw_sp_span_mirror_remove(mlxsw_sp_port, mirror->to_local_port,
++                                  span_type);
+ }
+ 
+ static int
diff --git a/queue-4.13/net-bonding-fix-tlb_dynamic_lb-default-value.patch b/queue-4.13/net-bonding-fix-tlb_dynamic_lb-default-value.patch

new file mode 100644 (file)

index 0000000..3000edb
--- /dev/null
+++ b/queue-4.13/net-bonding-fix-tlb_dynamic_lb-default-value.patch
@@ -0,0 +1,65 @@
+From foo@baz Mon Oct  9 09:32:35 CEST 2017
+From: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
+Date: Tue, 12 Sep 2017 15:10:05 +0300
+Subject: net: bonding: fix tlb_dynamic_lb default value
+
+From: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
+
+
+[ Upstream commit f13ad104b4e886a03e75f130daf579ef9bf33dfc ]
+
+Commit 8b426dc54cf4 ("bonding: remove hardcoded value") changed the
+default value for tlb_dynamic_lb which lead to either broken ALB mode
+(since tlb_dynamic_lb can be changed only in TLB) or setting TLB mode
+with tlb_dynamic_lb equal to 0.
+The first issue was recently fixed by setting tlb_dynamic_lb to 1 always
+when switching to ALB mode, but the default value is still wrong and
+we'll enter TLB mode with tlb_dynamic_lb equal to 0 if the mode is
+changed via netlink or sysfs. In order to restore the previous behaviour
+and default value simply remove the mode check around the default param
+initialization for tlb_dynamic_lb which will always set it to 1 as
+before.
+
+Fixes: 8b426dc54cf4 ("bonding: remove hardcoded value")
+Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
+Acked-by: Mahesh Bandewar <maheshb@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/bonding/bond_main.c |   17 +++++++----------
+ 1 file changed, 7 insertions(+), 10 deletions(-)
+
+--- a/drivers/net/bonding/bond_main.c
++++ b/drivers/net/bonding/bond_main.c
+@@ -4289,7 +4289,7 @@ static int bond_check_params(struct bond
+       int bond_mode   = BOND_MODE_ROUNDROBIN;
+       int xmit_hashtype = BOND_XMIT_POLICY_LAYER2;
+       int lacp_fast = 0;
+-      int tlb_dynamic_lb = 0;
++      int tlb_dynamic_lb;
+ 
+       /* Convert string parameters. */
+       if (mode) {
+@@ -4601,16 +4601,13 @@ static int bond_check_params(struct bond
+       }
+       ad_user_port_key = valptr->value;
+ 
+-      if ((bond_mode == BOND_MODE_TLB) || (bond_mode == BOND_MODE_ALB)) {
+-              bond_opt_initstr(&newval, "default");
+-              valptr = bond_opt_parse(bond_opt_get(BOND_OPT_TLB_DYNAMIC_LB),
+-                                      &newval);
+-              if (!valptr) {
+-                      pr_err("Error: No tlb_dynamic_lb default value");
+-                      return -EINVAL;
+-              }
+-              tlb_dynamic_lb = valptr->value;
++      bond_opt_initstr(&newval, "default");
++      valptr = bond_opt_parse(bond_opt_get(BOND_OPT_TLB_DYNAMIC_LB), &newval);
++      if (!valptr) {
++              pr_err("Error: No tlb_dynamic_lb default value");
++              return -EINVAL;
+       }
++      tlb_dynamic_lb = valptr->value;
+ 
+       if (lp_interval == 0) {
+               pr_warn("Warning: ip_interval must be between 1 and %d, so it was reset to %d\n",
diff --git a/queue-4.13/net-bonding-fix-transmit-load-balancing-in-balance-alb-mode-if-specified-by-sysfs.patch b/queue-4.13/net-bonding-fix-transmit-load-balancing-in-balance-alb-mode-if-specified-by-sysfs.patch

new file mode 100644 (file)

index 0000000..1e6a470
--- /dev/null
+++ b/queue-4.13/net-bonding-fix-transmit-load-balancing-in-balance-alb-mode-if-specified-by-sysfs.patch
@@ -0,0 +1,66 @@
+From foo@baz Mon Oct  9 09:32:35 CEST 2017
+From: Kosuke Tatsukawa <tatsu@ab.jp.nec.com>
+Date: Wed, 6 Sep 2017 22:47:59 +0000
+Subject: net: bonding: Fix transmit load balancing in balance-alb mode if specified by sysfs
+
+From: Kosuke Tatsukawa <tatsu@ab.jp.nec.com>
+
+
+[ Upstream commit c6644d07eff6588b2dedf881279fb0d1c7783970 ]
+
+Commit cbf5ecb30560 ("net: bonding: Fix transmit load balancing in
+balance-alb mode") tried to fix transmit dynamic load balancing in
+balance-alb mode, which wasn't working after commit 8b426dc54cf4
+("bonding: remove hardcoded value").
+
+It turned out that my previous patch only fixed the case when
+balance-alb was specified as bonding module parameter, and not when
+balance-alb mode was set using /sys/class/net/*/bonding/mode (the most
+common usage).  In the latter case, tlb_dynamic_lb was set up according
+to the default mode of the bonding interface, which happens to be
+balance-rr.
+
+This additional patch addresses this issue by setting up tlb_dynamic_lb
+to 1 if "mode" is set to balance-alb through the sysfs interface.
+
+I didn't add code to change tlb_balance_lb back to the default value for
+other modes, because "mode" is usually set up only once during
+initialization, and it's not worthwhile to change the static variable
+bonding_defaults in bond_main.c to a global variable just for this
+purpose.
+
+Commit 8b426dc54cf4 also changes the value of tlb_dynamic_lb for
+balance-tlb mode if it is set up using the sysfs interface.  I didn't
+change that behavior, because the value of tlb_balance_lb can be changed
+using the sysfs interface for balance-tlb, and I didn't like changing
+the default value back and forth for balance-tlb.
+
+As for balance-alb, /sys/class/net/*/bonding/tlb_balance_lb cannot be
+written to.  However, I think balance-alb with tlb_dynamic_lb set to 0
+is not an intended usage, so there is little use making it writable at
+this moment.
+
+Fixes: 8b426dc54cf4 ("bonding: remove hardcoded value")
+Reported-by: Reinis Rozitis <r@roze.lv>
+Signed-off-by: Kosuke Tatsukawa <tatsu@ab.jp.nec.com>
+Cc: stable@vger.kernel.org  # v4.12+
+Acked-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
+Acked-by: Mahesh Bandewar <maheshb@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/bonding/bond_options.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/drivers/net/bonding/bond_options.c
++++ b/drivers/net/bonding/bond_options.c
+@@ -754,6 +754,9 @@ static int bond_option_mode_set(struct b
+                          bond->params.miimon);
+       }
+ 
++      if (newval->value == BOND_MODE_ALB)
++              bond->params.tlb_dynamic_lb = 1;
++
+       /* don't cache arp_validate between modes */
+       bond->params.arp_validate = BOND_ARP_VALIDATE_NONE;
+       bond->params.mode = newval->value;
diff --git a/queue-4.13/net-change-skb-mac_header-when-generic-xdp-calls-adjust_head.patch b/queue-4.13/net-change-skb-mac_header-when-generic-xdp-calls-adjust_head.patch

new file mode 100644 (file)

index 0000000..946d5eb
--- /dev/null
+++ b/queue-4.13/net-change-skb-mac_header-when-generic-xdp-calls-adjust_head.patch
@@ -0,0 +1,35 @@
+From foo@baz Mon Oct  9 09:32:35 CEST 2017
+From: Edward Cree <ecree@solarflare.com>
+Date: Tue, 19 Sep 2017 18:45:56 +0100
+Subject: net: change skb->mac_header when Generic XDP calls adjust_head
+
+From: Edward Cree <ecree@solarflare.com>
+
+
+[ Upstream commit 92dd5452c1be873a1193561f4f691763103d22ac ]
+
+Since XDP's view of the packet includes the MAC header, moving the start-
+ of-packet with bpf_xdp_adjust_head needs to also update the offset of the
+ MAC header (which is relative to skb->head, not to the skb->data that was
+ changed).
+Without this, tcpdump sees packets starting from the old MAC header rather
+ than the new one, at least in my tests on the loopback device.
+
+Fixes: b5cdae3291f7 ("net: Generic XDP")
+Signed-off-by: Edward Cree <ecree@solarflare.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/dev.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -4408,6 +4408,7 @@ static u32 netif_receive_generic_xdp(str
+               __skb_pull(skb, off);
+       else if (off < 0)
+               __skb_push(skb, -off);
++      skb->mac_header += off;
+ 
+       switch (act) {
+       case XDP_TX:
diff --git a/queue-4.13/net-dsa-fix-network-device-registration-order.patch b/queue-4.13/net-dsa-fix-network-device-registration-order.patch

new file mode 100644 (file)

index 0000000..26044b2
--- /dev/null
+++ b/queue-4.13/net-dsa-fix-network-device-registration-order.patch
@@ -0,0 +1,75 @@
+From foo@baz Mon Oct  9 09:32:35 CEST 2017
+From: Florian Fainelli <f.fainelli@gmail.com>
+Date: Mon, 25 Sep 2017 15:55:53 -0700
+Subject: net: dsa: Fix network device registration order
+
+From: Florian Fainelli <f.fainelli@gmail.com>
+
+
+[ Upstream commit e804441cfe0b60f6c430901946a69c01eac09df1 ]
+
+We cannot be registering the network device first, then setting its
+carrier off and finally connecting it to a PHY, doing that leaves a
+window during which the carrier is at best inconsistent, and at worse
+the device is not usable without a down/up sequence since the network
+device is visible to user space with possibly no PHY device attached.
+
+Re-order steps so that they make logical sense. This fixes some devices
+where the port was not usable after e.g: an unbind then bind of the
+driver.
+
+Fixes: 0071f56e46da ("dsa: Register netdev before phy")
+Fixes: 91da11f870f0 ("net: Distributed Switch Architecture protocol support")
+Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/dsa/slave.c |   28 +++++++++++++++++-----------
+ 1 file changed, 17 insertions(+), 11 deletions(-)
+
+--- a/net/dsa/slave.c
++++ b/net/dsa/slave.c
+@@ -1180,26 +1180,32 @@ int dsa_slave_create(struct dsa_switch *
+       p->old_duplex = -1;
+ 
+       ds->ports[port].netdev = slave_dev;
+-      ret = register_netdev(slave_dev);
+-      if (ret) {
+-              netdev_err(master, "error %d registering interface %s\n",
+-                         ret, slave_dev->name);
+-              ds->ports[port].netdev = NULL;
+-              free_netdev(slave_dev);
+-              return ret;
+-      }
+ 
+       netif_carrier_off(slave_dev);
+ 
+       ret = dsa_slave_phy_setup(p, slave_dev);
+       if (ret) {
+               netdev_err(master, "error %d setting up slave phy\n", ret);
+-              unregister_netdev(slave_dev);
+-              free_netdev(slave_dev);
+-              return ret;
++              goto out_free;
++      }
++
++      ret = register_netdev(slave_dev);
++      if (ret) {
++              netdev_err(master, "error %d registering interface %s\n",
++                         ret, slave_dev->name);
++              goto out_phy;
+       }
+ 
+       return 0;
++
++out_phy:
++      phy_disconnect(p->phy);
++      if (of_phy_is_fixed_link(p->dp->dn))
++              of_phy_deregister_fixed_link(p->dp->dn);
++out_free:
++      free_netdev(slave_dev);
++      ds->ports[port].netdev = NULL;
++      return ret;
+ }
+ 
+ void dsa_slave_destroy(struct net_device *slave_dev)
diff --git a/queue-4.13/net-dsa-mv88e6xxx-allow-dsa-and-cpu-ports-in-multiple-vlans.patch b/queue-4.13/net-dsa-mv88e6xxx-allow-dsa-and-cpu-ports-in-multiple-vlans.patch

new file mode 100644 (file)

index 0000000..a0357f8
--- /dev/null
+++ b/queue-4.13/net-dsa-mv88e6xxx-allow-dsa-and-cpu-ports-in-multiple-vlans.patch
@@ -0,0 +1,36 @@
+From foo@baz Mon Oct  9 09:32:35 CEST 2017
+From: Andrew Lunn <andrew@lunn.ch>
+Date: Mon, 25 Sep 2017 23:32:20 +0200
+Subject: net: dsa: mv88e6xxx: Allow dsa and cpu ports in multiple vlans
+
+From: Andrew Lunn <andrew@lunn.ch>
+
+
+[ Upstream commit db06ae41945b14feb7f696dcafe8048cc37e8a20 ]
+
+Ports with the same VLAN must all be in the same bridge. However the
+CPU and DSA ports need to be in multiple VLANs spread over multiple
+bridges. So exclude them when performing this test.
+
+Fixes: b2f81d304cee ("net: dsa: add CPU and DSA ports as VLAN members")
+Signed-off-by: Andrew Lunn <andrew@lunn.ch>
+Reviewed-by: Vivien Didelot <vivien.didelot@savoirfairelinux.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/dsa/mv88e6xxx/chip.c |    4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/drivers/net/dsa/mv88e6xxx/chip.c
++++ b/drivers/net/dsa/mv88e6xxx/chip.c
+@@ -1184,6 +1184,10 @@ static int mv88e6xxx_port_check_hw_vlan(
+       };
+       int i, err;
+ 
++      /* DSA and CPU ports have to be members of multiple vlans */
++      if (dsa_is_dsa_port(ds, port) || dsa_is_cpu_port(ds, port))
++              return 0;
++
+       if (!vid_begin)
+               return -EOPNOTSUPP;
+ 
diff --git a/queue-4.13/net-dsa-mv88e6xxx-lock-mutex-when-freeing-irqs.patch b/queue-4.13/net-dsa-mv88e6xxx-lock-mutex-when-freeing-irqs.patch

new file mode 100644 (file)

index 0000000..a38dcb5
--- /dev/null
+++ b/queue-4.13/net-dsa-mv88e6xxx-lock-mutex-when-freeing-irqs.patch
@@ -0,0 +1,35 @@
+From foo@baz Mon Oct  9 09:32:35 CEST 2017
+From: Vivien Didelot <vivien.didelot@savoirfairelinux.com>
+Date: Tue, 26 Sep 2017 14:57:21 -0400
+Subject: net: dsa: mv88e6xxx: lock mutex when freeing IRQs
+
+From: Vivien Didelot <vivien.didelot@savoirfairelinux.com>
+
+
+[ Upstream commit b32ca44a88def4bf92626d8777494c6f14638c42 ]
+
+mv88e6xxx_g2_irq_free locks the registers mutex, but not
+mv88e6xxx_g1_irq_free, which results in a stack trace from
+assert_reg_lock when unloading the mv88e6xxx module. Fix this.
+
+Fixes: 3460a5770ce9 ("net: dsa: mv88e6xxx: Mask g1 interrupts and free interrupt")
+Signed-off-by: Vivien Didelot <vivien.didelot@savoirfairelinux.com>
+Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/dsa/mv88e6xxx/chip.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/drivers/net/dsa/mv88e6xxx/chip.c
++++ b/drivers/net/dsa/mv88e6xxx/chip.c
+@@ -4019,7 +4019,9 @@ static void mv88e6xxx_remove(struct mdio
+       if (chip->irq > 0) {
+               if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_G2_INT))
+                       mv88e6xxx_g2_irq_free(chip);
++              mutex_lock(&chip->reg_lock);
+               mv88e6xxx_g1_irq_free(chip);
++              mutex_unlock(&chip->reg_lock);
+       }
+ }
+ 
diff --git a/queue-4.13/net-emac-fix-napi-poll-list-corruption.patch b/queue-4.13/net-emac-fix-napi-poll-list-corruption.patch

new file mode 100644 (file)

index 0000000..d496a5f
--- /dev/null
+++ b/queue-4.13/net-emac-fix-napi-poll-list-corruption.patch
@@ -0,0 +1,54 @@
+From foo@baz Mon Oct  9 09:32:35 CEST 2017
+From: Christian Lamparter <chunkeey@googlemail.com>
+Date: Tue, 19 Sep 2017 19:35:18 +0200
+Subject: net: emac: Fix napi poll list corruption
+
+From: Christian Lamparter <chunkeey@googlemail.com>
+
+
+[ Upstream commit f55956065ec94e3e9371463d693a1029c4cc3007 ]
+
+This patch is pretty much a carbon copy of
+commit 3079c652141f ("caif: Fix napi poll list corruption")
+with "caif" replaced by "emac".
+
+The commit d75b1ade567f ("net: less interrupt masking in NAPI")
+breaks emac.
+
+It is now required that if the entire budget is consumed when poll
+returns, the napi poll_list must remain empty.  However, like some
+other drivers emac tries to do a last-ditch check and if there is
+more work it will call napi_reschedule and then immediately process
+some of this new work.  Should the entire budget be consumed while
+processing such new work then we will violate the new caller
+contract.
+
+This patch fixes this by not touching any work when we reschedule
+in emac.
+
+Signed-off-by: Christian Lamparter <chunkeey@googlemail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/ibm/emac/mal.c |    3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+--- a/drivers/net/ethernet/ibm/emac/mal.c
++++ b/drivers/net/ethernet/ibm/emac/mal.c
+@@ -402,7 +402,7 @@ static int mal_poll(struct napi_struct *
+       unsigned long flags;
+ 
+       MAL_DBG2(mal, "poll(%d)" NL, budget);
+- again:
++
+       /* Process TX skbs */
+       list_for_each(l, &mal->poll_list) {
+               struct mal_commac *mc =
+@@ -451,7 +451,6 @@ static int mal_poll(struct napi_struct *
+                       spin_lock_irqsave(&mal->lock, flags);
+                       mal_disable_eob_irq(mal);
+                       spin_unlock_irqrestore(&mal->lock, flags);
+-                      goto again;
+               }
+               mc->ops->poll_tx(mc->dev);
+       }
diff --git a/queue-4.13/net-ipv6-fix-regression-of-no-rtm_deladdr-sent-after-dad-failure.patch b/queue-4.13/net-ipv6-fix-regression-of-no-rtm_deladdr-sent-after-dad-failure.patch

new file mode 100644 (file)

index 0000000..d3186a7
--- /dev/null
+++ b/queue-4.13/net-ipv6-fix-regression-of-no-rtm_deladdr-sent-after-dad-failure.patch
@@ -0,0 +1,45 @@
+From foo@baz Mon Oct  9 09:32:35 CEST 2017
+From: Mike Manning <mmanning@brocade.com>
+Date: Mon, 4 Sep 2017 15:52:55 +0100
+Subject: net: ipv6: fix regression of no RTM_DELADDR sent after DAD failure
+
+From: Mike Manning <mmanning@brocade.com>
+
+
+[ Upstream commit 6819a14ecbe2e089e5c5bb74edecafdde2028a00 ]
+
+Commit f784ad3d79e5 ("ipv6: do not send RTM_DELADDR for tentative
+addresses") incorrectly assumes that no RTM_NEWADDR are sent for
+addresses in tentative state, as this does happen for the standard
+IPv6 use-case of DAD failure, see the call to ipv6_ifa_notify() in
+addconf_dad_stop(). So as a result of this change, no RTM_DELADDR is
+sent after DAD failure for a link-local when strict DAD (accept_dad=2)
+is configured, or on the next admin down in other cases. The absence
+of this notification breaks backwards compatibility and causes problems
+after DAD failure if this notification was being relied on. The
+solution is to allow RTM_DELADDR to still be sent after DAD failure.
+
+Fixes: f784ad3d79e5 ("ipv6: do not send RTM_DELADDR for tentative addresses")
+Signed-off-by: Mike Manning <mmanning@brocade.com>
+Cc: Mahesh Bandewar <maheshb@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/addrconf.c |    5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+--- a/net/ipv6/addrconf.c
++++ b/net/ipv6/addrconf.c
+@@ -4982,9 +4982,10 @@ static void inet6_ifa_notify(int event,
+ 
+       /* Don't send DELADDR notification for TENTATIVE address,
+        * since NEWADDR notification is sent only after removing
+-       * TENTATIVE flag.
++       * TENTATIVE flag, if DAD has not failed.
+        */
+-      if (ifa->flags & IFA_F_TENTATIVE && event == RTM_DELADDR)
++      if (ifa->flags & IFA_F_TENTATIVE && !(ifa->flags & IFA_F_DADFAILED) &&
++          event == RTM_DELADDR)
+               return;
+ 
+       skb = nlmsg_new(inet6_ifaddr_msgsize(), GFP_ATOMIC);
diff --git a/queue-4.13/net-mlx5e-ipoib-fix-access-to-invalid-memory-address.patch b/queue-4.13/net-mlx5e-ipoib-fix-access-to-invalid-memory-address.patch

new file mode 100644 (file)

index 0000000..ea0a654
--- /dev/null
+++ b/queue-4.13/net-mlx5e-ipoib-fix-access-to-invalid-memory-address.patch
@@ -0,0 +1,42 @@
+From foo@baz Mon Oct  9 09:32:35 CEST 2017
+From: Roi Dayan <roid@mellanox.com>
+Date: Mon, 21 Aug 2017 12:04:50 +0300
+Subject: net/mlx5e: IPoIB, Fix access to invalid memory address
+
+From: Roi Dayan <roid@mellanox.com>
+
+
+[ Upstream commit 38e8a5c040d3ec99a8351c688dcdf0f549611565 ]
+
+When cleaning rdma netdevice we need to save the mdev pointer
+because priv is released when we release netdev.
+
+This bug was found using the kernel address sanitizer (KASAN).
+use-after-free in mlx5_rdma_netdev_free+0xe3/0x100 [mlx5_core]
+
+Fixes: 48935bbb7ae8 ("net/mlx5e: IPoIB, Add netdevice profile skeleton")
+Signed-off-by: Roi Dayan <roid@mellanox.com>
+Reviewed-by: Or Gerlitz <ogerlitz@mellanox.com>
+Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
+@@ -572,12 +572,13 @@ void mlx5_rdma_netdev_free(struct net_de
+ {
+       struct mlx5e_priv          *priv    = mlx5i_epriv(netdev);
+       const struct mlx5e_profile *profile = priv->profile;
++      struct mlx5_core_dev       *mdev    = priv->mdev;
+ 
+       mlx5e_detach_netdev(priv);
+       profile->cleanup(priv);
+       destroy_workqueue(priv->wq);
+       free_netdev(netdev);
+ 
+-      mlx5e_destroy_mdev_resources(priv->mdev);
++      mlx5e_destroy_mdev_resources(mdev);
+ }
+ EXPORT_SYMBOL(mlx5_rdma_netdev_free);
diff --git a/queue-4.13/net-phy-fix-mask-value-write-on-gmii2rgmii-converter-speed-register.patch b/queue-4.13/net-phy-fix-mask-value-write-on-gmii2rgmii-converter-speed-register.patch

new file mode 100644 (file)

index 0000000..65320ca
--- /dev/null
+++ b/queue-4.13/net-phy-fix-mask-value-write-on-gmii2rgmii-converter-speed-register.patch
@@ -0,0 +1,39 @@
+From foo@baz Mon Oct  9 09:32:35 CEST 2017
+From: Fahad Kunnathadi <fahad.kunnathadi@dexceldesigns.com>
+Date: Fri, 15 Sep 2017 12:01:58 +0530
+Subject: net: phy: Fix mask value write on gmii2rgmii converter speed register
+
+From: Fahad Kunnathadi <fahad.kunnathadi@dexceldesigns.com>
+
+
+[ Upstream commit f2654a4781318dc7ab8d6cde66f1fa39eab980a9 ]
+
+To clear Speed Selection in MDIO control register(0x10),
+ie, clear bits 6 and 13 to zero while keeping other bits same.
+Before AND operation,The Mask value has to be perform with bitwise NOT
+operation (ie, ~ operator)
+
+This patch clears current speed selection before writing the
+new speed settings to gmii2rgmii converter
+
+Fixes: f411a6160bd4 ("net: phy: Add gmiitorgmii converter support")
+
+Signed-off-by: Fahad Kunnathadi <fahad.kunnathadi@dexceldesigns.com>
+Reviewed-by: Andrew Lunn <andrew@lunn.ch>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/phy/xilinx_gmii2rgmii.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/phy/xilinx_gmii2rgmii.c
++++ b/drivers/net/phy/xilinx_gmii2rgmii.c
+@@ -44,7 +44,7 @@ static int xgmiitorgmii_read_status(stru
+       priv->phy_drv->read_status(phydev);
+ 
+       val = mdiobus_read(phydev->mdio.bus, priv->addr, XILINX_GMII2RGMII_REG);
+-      val &= XILINX_GMII2RGMII_SPEED_MASK;
++      val &= ~XILINX_GMII2RGMII_SPEED_MASK;
+ 
+       if (phydev->speed == SPEED_1000)
+               val |= BMCR_SPEED1000;
diff --git a/queue-4.13/net-qcom-emac-specify-the-correct-size-when-mapping-a-dma-buffer.patch b/queue-4.13/net-qcom-emac-specify-the-correct-size-when-mapping-a-dma-buffer.patch

new file mode 100644 (file)

index 0000000..29b4a83
--- /dev/null
+++ b/queue-4.13/net-qcom-emac-specify-the-correct-size-when-mapping-a-dma-buffer.patch
@@ -0,0 +1,36 @@
+From foo@baz Mon Oct  9 09:32:35 CEST 2017
+From: Timur Tabi <timur@codeaurora.org>
+Date: Fri, 22 Sep 2017 15:32:44 -0500
+Subject: net: qcom/emac: specify the correct size when mapping a DMA buffer
+
+From: Timur Tabi <timur@codeaurora.org>
+
+
+[ Upstream commit a93ad944f4ff9a797abff17c73fc4b1e4a1d9141 ]
+
+When mapping the RX DMA buffers, the driver was accidentally specifying
+zero for the buffer length.  Under normal circumstances, SWIOTLB does not
+need to allocate a bounce buffer, so the address is just mapped without
+checking the size field.  This is why the error was not detected earlier.
+
+Fixes: b9b17debc69d ("net: emac: emac gigabit ethernet controller driver")
+Cc: stable@vger.kernel.org
+Signed-off-by: Timur Tabi <timur@codeaurora.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/qualcomm/emac/emac-mac.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/qualcomm/emac/emac-mac.c
++++ b/drivers/net/ethernet/qualcomm/emac/emac-mac.c
+@@ -876,7 +876,8 @@ static void emac_mac_rx_descs_refill(str
+ 
+               curr_rxbuf->dma_addr =
+                       dma_map_single(adpt->netdev->dev.parent, skb->data,
+-                                     curr_rxbuf->length, DMA_FROM_DEVICE);
++                                     adpt->rxbuf_size, DMA_FROM_DEVICE);
++
+               ret = dma_mapping_error(adpt->netdev->dev.parent,
+                                       curr_rxbuf->dma_addr);
+               if (ret) {
diff --git a/queue-4.13/net-rtnetlink-fix-info-leak-in-rtm_getstats-call.patch b/queue-4.13/net-rtnetlink-fix-info-leak-in-rtm_getstats-call.patch

new file mode 100644 (file)

index 0000000..035293e
--- /dev/null
+++ b/queue-4.13/net-rtnetlink-fix-info-leak-in-rtm_getstats-call.patch
@@ -0,0 +1,37 @@
+From foo@baz Mon Oct  9 09:32:35 CEST 2017
+From: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
+Date: Tue, 3 Oct 2017 13:20:48 +0300
+Subject: net: rtnetlink: fix info leak in RTM_GETSTATS call
+
+From: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
+
+
+[ Upstream commit ce024f42c2e28b6bce4ecc1e891b42f57f753892 ]
+
+When RTM_GETSTATS was added the fields of its header struct were not all
+initialized when returning the result thus leaking 4 bytes of information
+to user-space per rtnl_fill_statsinfo call, so initialize them now. Thanks
+to Alexander Potapenko for the detailed report and bisection.
+
+Reported-by: Alexander Potapenko <glider@google.com>
+Fixes: 10c9ead9f3c6 ("rtnetlink: add new RTM_GETSTATS message to dump link stats")
+Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
+Acked-by: Roopa Prabhu <roopa@cumulusnetworks.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/rtnetlink.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/net/core/rtnetlink.c
++++ b/net/core/rtnetlink.c
+@@ -3867,6 +3867,9 @@ static int rtnl_fill_statsinfo(struct sk
+               return -EMSGSIZE;
+ 
+       ifsm = nlmsg_data(nlh);
++      ifsm->family = PF_UNSPEC;
++      ifsm->pad1 = 0;
++      ifsm->pad2 = 0;
+       ifsm->ifindex = dev->ifindex;
+       ifsm->filter_mask = filter_mask;
+ 
diff --git a/queue-4.13/net-sched-cls_matchall-fix-crash-when-used-with-classful-qdisc.patch b/queue-4.13/net-sched-cls_matchall-fix-crash-when-used-with-classful-qdisc.patch

new file mode 100644 (file)

index 0000000..e1df12b
--- /dev/null
+++ b/queue-4.13/net-sched-cls_matchall-fix-crash-when-used-with-classful-qdisc.patch
@@ -0,0 +1,53 @@
+From foo@baz Mon Oct  9 09:32:35 CEST 2017
+From: Davide Caratti <dcaratti@redhat.com>
+Date: Sat, 16 Sep 2017 14:02:21 +0200
+Subject: net/sched: cls_matchall: fix crash when used with classful qdisc
+
+From: Davide Caratti <dcaratti@redhat.com>
+
+
+[ Upstream commit 3ff4cbec87da48b0ec1f7b6196607b034de0c680 ]
+
+this script, edited from Linux Advanced Routing and Traffic Control guide
+
+tc q a dev en0 root handle 1: htb default a
+tc c a dev en0 parent 1:  classid 1:1 htb rate 6mbit burst 15k
+tc c a dev en0 parent 1:1 classid 1:a htb rate 5mbit ceil 6mbit burst 15k
+tc c a dev en0 parent 1:1 classid 1:b htb rate 1mbit ceil 6mbit burst 15k
+tc f a dev en0 parent 1:0 prio 1 $clsname $clsargs classid 1:b
+ping $address -c1
+tc -s c s dev en0
+
+classifies traffic to 1:b or 1:a, depending on whether the packet matches
+or not the pattern $clsargs of filter $clsname. However, when $clsname is
+'matchall', a systematic crash can be observed in htb_classify(). HTB and
+classful qdiscs don't assign initial value to struct tcf_result, but then
+they expect it to contain valid values after filters have been run. Thus,
+current 'matchall' ignores the TCA_MATCHALL_CLASSID attribute, configured
+by user, and makes HTB (and classful qdiscs) dereference random pointers.
+
+By assigning head->res to *res in mall_classify(), before the actions are
+invoked, we fix this crash and enable TCA_MATCHALL_CLASSID functionality,
+that had no effect on 'matchall' classifier since its first introduction.
+
+BugLink: https://bugzilla.redhat.com/show_bug.cgi?id=1460213
+Reported-by: Jiri Benc <jbenc@redhat.com>
+Fixes: b87f7936a932 ("net/sched: introduce Match-all classifier")
+Signed-off-by: Davide Caratti <dcaratti@redhat.com>
+Acked-by: Yotam Gigi <yotamg@mellanox.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sched/cls_matchall.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/net/sched/cls_matchall.c
++++ b/net/sched/cls_matchall.c
+@@ -32,6 +32,7 @@ static int mall_classify(struct sk_buff
+       if (tc_skip_sw(head->flags))
+               return -1;
+ 
++      *res = head->res;
+       return tcf_exts_exec(skb, &head->exts, res);
+ }
+ 
diff --git a/queue-4.13/net-sched-fix-use-after-free-in-tcf_action_destroy-and-tcf_del_walker.patch b/queue-4.13/net-sched-fix-use-after-free-in-tcf_action_destroy-and-tcf_del_walker.patch

new file mode 100644 (file)

index 0000000..4bf8873
--- /dev/null
+++ b/queue-4.13/net-sched-fix-use-after-free-in-tcf_action_destroy-and-tcf_del_walker.patch
@@ -0,0 +1,60 @@
+From foo@baz Mon Oct  9 09:32:35 CEST 2017
+From: Jiri Pirko <jiri@mellanox.com>
+Date: Wed, 13 Sep 2017 17:32:37 +0200
+Subject: net: sched: fix use-after-free in tcf_action_destroy and tcf_del_walker
+
+From: Jiri Pirko <jiri@mellanox.com>
+
+
+[ Upstream commit 255cd50f207ae8ec7b22663246c833407744e634 ]
+
+Recent commit d7fb60b9cafb ("net_sched: get rid of tcfa_rcu") removed
+freeing in call_rcu, which changed already existing hard-to-hit
+race condition into 100% hit:
+
+[  598.599825] BUG: unable to handle kernel NULL pointer dereference at 0000000000000030
+[  598.607782] IP: tcf_action_destroy+0xc0/0x140
+
+Or:
+
+[   40.858924] BUG: unable to handle kernel NULL pointer dereference at 0000000000000030
+[   40.862840] IP: tcf_generic_walker+0x534/0x820
+
+Fix this by storing the ops and use them directly for module_put call.
+
+Fixes: a85a970af265 ("net_sched: move tc_action into tcf_common")
+Signed-off-by: Jiri Pirko <jiri@mellanox.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sched/act_api.c |    6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+--- a/net/sched/act_api.c
++++ b/net/sched/act_api.c
+@@ -174,7 +174,7 @@ static int tcf_del_walker(struct tcf_has
+               hlist_for_each_entry_safe(p, n, head, tcfa_head) {
+                       ret = __tcf_hash_release(p, false, true);
+                       if (ret == ACT_P_DELETED) {
+-                              module_put(p->ops->owner);
++                              module_put(ops->owner);
+                               n_i++;
+                       } else if (ret < 0)
+                               goto nla_put_failure;
+@@ -506,13 +506,15 @@ EXPORT_SYMBOL(tcf_action_exec);
+ 
+ int tcf_action_destroy(struct list_head *actions, int bind)
+ {
++      const struct tc_action_ops *ops;
+       struct tc_action *a, *tmp;
+       int ret = 0;
+ 
+       list_for_each_entry_safe(a, tmp, actions, list) {
++              ops = a->ops;
+               ret = __tcf_hash_release(a, bind, true);
+               if (ret == ACT_P_DELETED)
+-                      module_put(a->ops->owner);
++                      module_put(ops->owner);
+               else if (ret < 0)
+                       return ret;
+       }
diff --git a/queue-4.13/net-set-sk_prot_creator-when-cloning-sockets-to-the-right-proto.patch b/queue-4.13/net-set-sk_prot_creator-when-cloning-sockets-to-the-right-proto.patch

new file mode 100644 (file)

index 0000000..abc1f09
--- /dev/null
+++ b/queue-4.13/net-set-sk_prot_creator-when-cloning-sockets-to-the-right-proto.patch
@@ -0,0 +1,107 @@
+From foo@baz Mon Oct  9 09:32:35 CEST 2017
+From: Christoph Paasch <cpaasch@apple.com>
+Date: Tue, 26 Sep 2017 17:38:50 -0700
+Subject: net: Set sk_prot_creator when cloning sockets to the right proto
+
+From: Christoph Paasch <cpaasch@apple.com>
+
+
+[ Upstream commit 9d538fa60bad4f7b23193c89e843797a1cf71ef3 ]
+
+sk->sk_prot and sk->sk_prot_creator can differ when the app uses
+IPV6_ADDRFORM (transforming an IPv6-socket to an IPv4-one).
+Which is why sk_prot_creator is there to make sure that sk_prot_free()
+does the kmem_cache_free() on the right kmem_cache slab.
+
+Now, if such a socket gets transformed back to a listening socket (using
+connect() with AF_UNSPEC) we will allocate an IPv4 tcp_sock through
+sk_clone_lock() when a new connection comes in. But sk_prot_creator will
+still point to the IPv6 kmem_cache (as everything got copied in
+sk_clone_lock()). When freeing, we will thus put this
+memory back into the IPv6 kmem_cache although it was allocated in the
+IPv4 cache. I have seen memory corruption happening because of this.
+
+With slub-debugging and MEMCG_KMEM enabled this gives the warning
+       "cache_from_obj: Wrong slab cache. TCPv6 but object is from TCP"
+
+A C-program to trigger this:
+
+void main(void)
+{
+        int fd = socket(AF_INET6, SOCK_STREAM, IPPROTO_TCP);
+        int new_fd, newest_fd, client_fd;
+        struct sockaddr_in6 bind_addr;
+        struct sockaddr_in bind_addr4, client_addr1, client_addr2;
+        struct sockaddr unsp;
+        int val;
+
+        memset(&bind_addr, 0, sizeof(bind_addr));
+        bind_addr.sin6_family = AF_INET6;
+        bind_addr.sin6_port = ntohs(42424);
+
+        memset(&client_addr1, 0, sizeof(client_addr1));
+        client_addr1.sin_family = AF_INET;
+        client_addr1.sin_port = ntohs(42424);
+        client_addr1.sin_addr.s_addr = inet_addr("127.0.0.1");
+
+        memset(&client_addr2, 0, sizeof(client_addr2));
+        client_addr2.sin_family = AF_INET;
+        client_addr2.sin_port = ntohs(42421);
+        client_addr2.sin_addr.s_addr = inet_addr("127.0.0.1");
+
+        memset(&unsp, 0, sizeof(unsp));
+        unsp.sa_family = AF_UNSPEC;
+
+        bind(fd, (struct sockaddr *)&bind_addr, sizeof(bind_addr));
+
+        listen(fd, 5);
+
+        client_fd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
+        connect(client_fd, (struct sockaddr *)&client_addr1, sizeof(client_addr1));
+        new_fd = accept(fd, NULL, NULL);
+        close(fd);
+
+        val = AF_INET;
+        setsockopt(new_fd, SOL_IPV6, IPV6_ADDRFORM, &val, sizeof(val));
+
+        connect(new_fd, &unsp, sizeof(unsp));
+
+        memset(&bind_addr4, 0, sizeof(bind_addr4));
+        bind_addr4.sin_family = AF_INET;
+        bind_addr4.sin_port = ntohs(42421);
+        bind(new_fd, (struct sockaddr *)&bind_addr4, sizeof(bind_addr4));
+
+        listen(new_fd, 5);
+
+        client_fd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
+        connect(client_fd, (struct sockaddr *)&client_addr2, sizeof(client_addr2));
+
+        newest_fd = accept(new_fd, NULL, NULL);
+        close(new_fd);
+
+        close(client_fd);
+        close(new_fd);
+}
+
+As far as I can see, this bug has been there since the beginning of the
+git-days.
+
+Signed-off-by: Christoph Paasch <cpaasch@apple.com>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/sock.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/net/core/sock.c
++++ b/net/core/sock.c
+@@ -1646,6 +1646,8 @@ struct sock *sk_clone_lock(const struct
+ 
+               sock_copy(newsk, sk);
+ 
++              newsk->sk_prot_creator = sk->sk_prot;
++
+               /* SANITY */
+               if (likely(newsk->sk_net_refcnt))
+                       get_net(sock_net(newsk));
diff --git a/queue-4.13/net-stmmac-cocci-spatch-of_table.patch b/queue-4.13/net-stmmac-cocci-spatch-of_table.patch

new file mode 100644 (file)

index 0000000..08b6462
--- /dev/null
+++ b/queue-4.13/net-stmmac-cocci-spatch-of_table.patch
@@ -0,0 +1,30 @@
+From foo@baz Mon Oct  9 09:32:35 CEST 2017
+From: Thomas Meyer <thomas@m3y3r.de>
+Date: Thu, 21 Sep 2017 08:24:27 +0200
+Subject: net: stmmac: Cocci spatch "of_table"
+
+From: Thomas Meyer <thomas@m3y3r.de>
+
+
+[ Upstream commit f0ef1f4f2b772c0a1c8b35a6ae3edf974cc110dd ]
+
+Make sure (of/i2c/platform)_device_id tables are NULL terminated.
+Found by coccinelle spatch "misc/of_table.cocci"
+
+Signed-off-by: Thomas Meyer <thomas@m3y3r.de>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
++++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
+@@ -315,6 +315,7 @@ static int stmmac_dt_phy(struct plat_stm
+               { .compatible = "allwinner,sun8i-h3-emac" },
+               { .compatible = "allwinner,sun8i-v3s-emac" },
+               { .compatible = "allwinner,sun50i-a64-emac" },
++              {},
+       };
+ 
+       /* If phy-handle property is passed from DT, use it as the PHY */
diff --git a/queue-4.13/net_sched-always-reset-qdisc-backlog-in-qdisc_reset.patch b/queue-4.13/net_sched-always-reset-qdisc-backlog-in-qdisc_reset.patch

new file mode 100644 (file)

index 0000000..180b554
--- /dev/null
+++ b/queue-4.13/net_sched-always-reset-qdisc-backlog-in-qdisc_reset.patch
@@ -0,0 +1,33 @@
+From foo@baz Mon Oct  9 09:32:35 CEST 2017
+From: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
+Date: Wed, 20 Sep 2017 15:45:36 +0300
+Subject: net_sched: always reset qdisc backlog in qdisc_reset()
+
+From: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
+
+
+[ Upstream commit c8e1812960eeae42e2183154927028511c4bc566 ]
+
+SKB stored in qdisc->gso_skb also counted into backlog.
+
+Some qdiscs don't reset backlog to zero in ->reset(),
+for example sfq just dequeue and free all queued skb.
+
+Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
+Fixes: 2ccccf5fb43f ("net_sched: update hierarchical backlog too")
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sched/sch_generic.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/net/sched/sch_generic.c
++++ b/net/sched/sch_generic.c
+@@ -681,6 +681,7 @@ void qdisc_reset(struct Qdisc *qdisc)
+               qdisc->gso_skb = NULL;
+       }
+       qdisc->q.qlen = 0;
++      qdisc->qstats.backlog = 0;
+ }
+ EXPORT_SYMBOL(qdisc_reset);
+ 
diff --git a/queue-4.13/net_sched-gen_estimator-fix-scaling-error-in-bytes-packets-samples.patch b/queue-4.13/net_sched-gen_estimator-fix-scaling-error-in-bytes-packets-samples.patch

new file mode 100644 (file)

index 0000000..986db75
--- /dev/null
+++ b/queue-4.13/net_sched-gen_estimator-fix-scaling-error-in-bytes-packets-samples.patch
@@ -0,0 +1,63 @@
+From foo@baz Mon Oct  9 09:32:35 CEST 2017
+From: Eric Dumazet <edumazet@google.com>
+Date: Wed, 13 Sep 2017 11:16:45 -0700
+Subject: net_sched: gen_estimator: fix scaling error in bytes/packets samples
+
+From: Eric Dumazet <edumazet@google.com>
+
+
+[ Upstream commit ca558e185972d8ecd308760abf972f5d408bcff0 ]
+
+Denys reported wrong rate estimations with HTB classes.
+
+It appears the bug was added in linux-4.10, since my tests
+where using intervals of one second only.
+
+HTB using 4 sec default rate estimators, reported rates
+were 4x higher.
+
+We need to properly scale the bytes/packets samples before
+integrating them in EWMA.
+
+Tested:
+ echo 1 >/sys/module/sch_htb/parameters/htb_rate_est
+
+ Setup HTB with one class with a rate/cail of 5Gbit
+
+ Generate traffic on this class
+
+ tc -s -d cl sh dev eth0 classid 7002:11
+class htb 7002:11 parent 7002:1 prio 5 quantum 200000 rate 5Gbit ceil
+5Gbit linklayer ethernet burst 80000b/1 mpu 0b cburst 80000b/1 mpu 0b
+level 0 rate_handle 1
+ Sent 1488215421648 bytes 982969243 pkt (dropped 0, overlimits 0
+requeues 0)
+ rate 5Gbit 412814pps backlog 136260b 2p requeues 0
+ TCP pkts/rtx 982969327/45 bytes 1488215557414/68130
+ lended: 22732826 borrowed: 0 giants: 0
+ tokens: -1684 ctokens: -1684
+
+Fixes: 1c0d32fde5bd ("net_sched: gen_estimator: complete rewrite of rate estimators")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: Denys Fedoryshchenko <nuclearcat@nuclearcat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/gen_estimator.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/net/core/gen_estimator.c
++++ b/net/core/gen_estimator.c
+@@ -83,10 +83,10 @@ static void est_timer(unsigned long arg)
+       u64 rate, brate;
+ 
+       est_fetch_counters(est, &b);
+-      brate = (b.bytes - est->last_bytes) << (8 - est->ewma_log);
++      brate = (b.bytes - est->last_bytes) << (10 - est->ewma_log - est->intvl_log);
+       brate -= (est->avbps >> est->ewma_log);
+ 
+-      rate = (u64)(b.packets - est->last_packets) << (8 - est->ewma_log);
++      rate = (u64)(b.packets - est->last_packets) << (10 - est->ewma_log - est->intvl_log);
+       rate -= (est->avpps >> est->ewma_log);
+ 
+       write_seqcount_begin(&est->seq);
diff --git a/queue-4.13/netlink-do-not-proceed-if-dump-s-start-errs.patch b/queue-4.13/netlink-do-not-proceed-if-dump-s-start-errs.patch

new file mode 100644 (file)

index 0000000..a2a45f9
--- /dev/null
+++ b/queue-4.13/netlink-do-not-proceed-if-dump-s-start-errs.patch
@@ -0,0 +1,49 @@
+From foo@baz Mon Oct  9 09:32:35 CEST 2017
+From: "Jason A. Donenfeld" <Jason@zx2c4.com>
+Date: Thu, 28 Sep 2017 00:41:44 +0200
+Subject: netlink: do not proceed if dump's start() errs
+
+From: "Jason A. Donenfeld" <Jason@zx2c4.com>
+
+
+[ Upstream commit fef0035c0f31322d417d1954bba5ab959bf91183 ]
+
+Drivers that use the start method for netlink dumping rely on dumpit not
+being called if start fails. For example, ila_xlat.c allocates memory
+and assigns it to cb->args[0] in its start() function. It might fail to
+do that and return -ENOMEM instead. However, even when returning an
+error, dumpit will be called, which, in the example above, quickly
+dereferences the memory in cb->args[0], which will OOPS the kernel. This
+is but one example of how this goes wrong.
+
+Since start() has always been a function with an int return type, it
+therefore makes sense to use it properly, rather than ignoring it. This
+patch thus returns early and does not call dumpit() when start() fails.
+
+Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
+Cc: Johannes Berg <johannes@sipsolutions.net>
+Reviewed-by: Johannes Berg <johannes@sipsolutions.net>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/netlink/af_netlink.c |    7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+--- a/net/netlink/af_netlink.c
++++ b/net/netlink/af_netlink.c
+@@ -2262,10 +2262,13 @@ int __netlink_dump_start(struct sock *ss
+ 
+       mutex_unlock(nlk->cb_mutex);
+ 
++      ret = 0;
+       if (cb->start)
+-              cb->start(cb);
++              ret = cb->start(cb);
++
++      if (!ret)
++              ret = netlink_dump(sk);
+ 
+-      ret = netlink_dump(sk);
+       sock_put(sk);
+ 
+       if (ret)
diff --git a/queue-4.13/openvswitch-fix-an-error-handling-path-in-ovs_nla_init_match_and_action.patch b/queue-4.13/openvswitch-fix-an-error-handling-path-in-ovs_nla_init_match_and_action.patch

new file mode 100644 (file)

index 0000000..f738f01
--- /dev/null
+++ b/queue-4.13/openvswitch-fix-an-error-handling-path-in-ovs_nla_init_match_and_action.patch
@@ -0,0 +1,34 @@
+From foo@baz Mon Oct  9 09:32:35 CEST 2017
+From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
+Date: Mon, 11 Sep 2017 21:56:20 +0200
+Subject: openvswitch: Fix an error handling path in 'ovs_nla_init_match_and_action()'
+
+From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
+
+
+[ Upstream commit 5829e62ac17a40ab08c1b905565604a4b5fa7af6 ]
+
+All other error handling paths in this function go through the 'error'
+label. This one should do the same.
+
+Fixes: 9cc9a5cb176c ("datapath: Avoid using stack larger than 1024.")
+Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
+Acked-by: Pravin B Shelar <pshelar@ovn.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/openvswitch/datapath.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/net/openvswitch/datapath.c
++++ b/net/openvswitch/datapath.c
+@@ -1126,7 +1126,8 @@ static int ovs_nla_init_match_and_action
+               if (!a[OVS_FLOW_ATTR_KEY]) {
+                       OVS_NLERR(log,
+                                 "Flow key attribute not present in set flow.");
+-                      return -EINVAL;
++                      error = -EINVAL;
++                      goto error;
+               }
+ 
+               *acts = get_flow_actions(net, a[OVS_FLOW_ATTR_ACTIONS], key,
diff --git a/queue-4.13/packet-hold-bind-lock-when-rebinding-to-fanout-hook.patch b/queue-4.13/packet-hold-bind-lock-when-rebinding-to-fanout-hook.patch

new file mode 100644 (file)

index 0000000..99cfb28
--- /dev/null
+++ b/queue-4.13/packet-hold-bind-lock-when-rebinding-to-fanout-hook.patch
@@ -0,0 +1,74 @@
+From foo@baz Mon Oct  9 09:32:35 CEST 2017
+From: Willem de Bruijn <willemb@google.com>
+Date: Thu, 14 Sep 2017 17:14:41 -0400
+Subject: packet: hold bind lock when rebinding to fanout hook
+
+From: Willem de Bruijn <willemb@google.com>
+
+
+[ Upstream commit 008ba2a13f2d04c947adc536d19debb8fe66f110 ]
+
+Packet socket bind operations must hold the po->bind_lock. This keeps
+po->running consistent with whether the socket is actually on a ptype
+list to receive packets.
+
+fanout_add unbinds a socket and its packet_rcv/tpacket_rcv call, then
+binds the fanout object to receive through packet_rcv_fanout.
+
+Make it hold the po->bind_lock when testing po->running and rebinding.
+Else, it can race with other rebind operations, such as that in
+packet_set_ring from packet_rcv to tpacket_rcv. Concurrent updates
+can result in a socket being added to a fanout group twice, causing
+use-after-free KASAN bug reports, among others.
+
+Reported independently by both trinity and syzkaller.
+Verified that the syzkaller reproducer passes after this patch.
+
+Fixes: dc99f600698d ("packet: Add fanout support.")
+Reported-by: nixioaming <nixiaoming@huawei.com>
+Signed-off-by: Willem de Bruijn <willemb@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/packet/af_packet.c |   16 +++++++++++-----
+ 1 file changed, 11 insertions(+), 5 deletions(-)
+
+--- a/net/packet/af_packet.c
++++ b/net/packet/af_packet.c
+@@ -1686,10 +1686,6 @@ static int fanout_add(struct sock *sk, u
+ 
+       mutex_lock(&fanout_mutex);
+ 
+-      err = -EINVAL;
+-      if (!po->running)
+-              goto out;
+-
+       err = -EALREADY;
+       if (po->fanout)
+               goto out;
+@@ -1751,7 +1747,10 @@ static int fanout_add(struct sock *sk, u
+               list_add(&match->list, &fanout_list);
+       }
+       err = -EINVAL;
+-      if (match->type == type &&
++
++      spin_lock(&po->bind_lock);
++      if (po->running &&
++          match->type == type &&
+           match->prot_hook.type == po->prot_hook.type &&
+           match->prot_hook.dev == po->prot_hook.dev) {
+               err = -ENOSPC;
+@@ -1763,6 +1762,13 @@ static int fanout_add(struct sock *sk, u
+                       err = 0;
+               }
+       }
++      spin_unlock(&po->bind_lock);
++
++      if (err && !refcount_read(&match->sk_ref)) {
++              list_del(&match->list);
++              kfree(match);
++      }
++
+ out:
+       if (err && rollover) {
+               kfree(rollover);
diff --git a/queue-4.13/packet-in-packet_do_bind-test-fanout-with-bind_lock-held.patch b/queue-4.13/packet-in-packet_do_bind-test-fanout-with-bind_lock-held.patch

new file mode 100644 (file)

index 0000000..e516b0b
--- /dev/null
+++ b/queue-4.13/packet-in-packet_do_bind-test-fanout-with-bind_lock-held.patch
@@ -0,0 +1,53 @@
+From foo@baz Mon Oct  9 09:32:35 CEST 2017
+From: Willem de Bruijn <willemb@google.com>
+Date: Tue, 26 Sep 2017 12:19:37 -0400
+Subject: packet: in packet_do_bind, test fanout with bind_lock held
+
+From: Willem de Bruijn <willemb@google.com>
+
+
+[ Upstream commit 4971613c1639d8e5f102c4e797c3bf8f83a5a69e ]
+
+Once a socket has po->fanout set, it remains a member of the group
+until it is destroyed. The prot_hook must be constant and identical
+across sockets in the group.
+
+If fanout_add races with packet_do_bind between the test of po->fanout
+and taking the lock, the bind call may make type or dev inconsistent
+with that of the fanout group.
+
+Hold po->bind_lock when testing po->fanout to avoid this race.
+
+I had to introduce artificial delay (local_bh_enable) to actually
+observe the race.
+
+Fixes: dc99f600698d ("packet: Add fanout support.")
+Signed-off-by: Willem de Bruijn <willemb@google.com>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/packet/af_packet.c |    8 +++++---
+ 1 file changed, 5 insertions(+), 3 deletions(-)
+
+--- a/net/packet/af_packet.c
++++ b/net/packet/af_packet.c
+@@ -3071,13 +3071,15 @@ static int packet_do_bind(struct sock *s
+       int ret = 0;
+       bool unlisted = false;
+ 
+-      if (po->fanout)
+-              return -EINVAL;
+-
+       lock_sock(sk);
+       spin_lock(&po->bind_lock);
+       rcu_read_lock();
+ 
++      if (po->fanout) {
++              ret = -EINVAL;
++              goto out_unlock;
++      }
++
+       if (name) {
+               dev = dev_get_by_name_rcu(sock_net(sk), name);
+               if (!dev) {
diff --git a/queue-4.13/packet-only-test-po-has_vnet_hdr-once-in-packet_snd.patch b/queue-4.13/packet-only-test-po-has_vnet_hdr-once-in-packet_snd.patch

new file mode 100644 (file)

index 0000000..22c6bd3
--- /dev/null
+++ b/queue-4.13/packet-only-test-po-has_vnet_hdr-once-in-packet_snd.patch
@@ -0,0 +1,53 @@
+From foo@baz Mon Oct  9 09:32:35 CEST 2017
+From: Willem de Bruijn <willemb@google.com>
+Date: Tue, 26 Sep 2017 12:20:17 -0400
+Subject: packet: only test po->has_vnet_hdr once in packet_snd
+
+From: Willem de Bruijn <willemb@google.com>
+
+
+[ Upstream commit da7c9561015e93d10fe6aab73e9288e0d09d65a6 ]
+
+Packet socket option po->has_vnet_hdr can be updated concurrently with
+other operations if no ring is attached.
+
+Do not test the option twice in packet_snd, as the value may change in
+between calls. A race on setsockopt disable may cause a packet > mtu
+to be sent without having GSO options set.
+
+Fixes: bfd5f4a3d605 ("packet: Add GSO/csum offload support.")
+Signed-off-by: Willem de Bruijn <willemb@google.com>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/packet/af_packet.c |    4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/net/packet/af_packet.c
++++ b/net/packet/af_packet.c
+@@ -2842,6 +2842,7 @@ static int packet_snd(struct socket *soc
+       struct virtio_net_hdr vnet_hdr = { 0 };
+       int offset = 0;
+       struct packet_sock *po = pkt_sk(sk);
++      bool has_vnet_hdr = false;
+       int hlen, tlen, linear;
+       int extra_len = 0;
+ 
+@@ -2885,6 +2886,7 @@ static int packet_snd(struct socket *soc
+               err = packet_snd_vnet_parse(msg, &len, &vnet_hdr);
+               if (err)
+                       goto out_unlock;
++              has_vnet_hdr = true;
+       }
+ 
+       if (unlikely(sock_flag(sk, SOCK_NOFCS))) {
+@@ -2943,7 +2945,7 @@ static int packet_snd(struct socket *soc
+       skb->priority = sk->sk_priority;
+       skb->mark = sockc.mark;
+ 
+-      if (po->has_vnet_hdr) {
++      if (has_vnet_hdr) {
+               err = virtio_net_hdr_to_skb(skb, &vnet_hdr, vio_le());
+               if (err)
+                       goto out_free;
diff --git a/queue-4.13/sctp-potential-read-out-of-bounds-in-sctp_ulpevent_type_enabled.patch b/queue-4.13/sctp-potential-read-out-of-bounds-in-sctp_ulpevent_type_enabled.patch

new file mode 100644 (file)

index 0000000..79acc98
--- /dev/null
+++ b/queue-4.13/sctp-potential-read-out-of-bounds-in-sctp_ulpevent_type_enabled.patch
@@ -0,0 +1,46 @@
+From foo@baz Mon Oct  9 09:32:35 CEST 2017
+From: Dan Carpenter <dan.carpenter@oracle.com>
+Date: Thu, 14 Sep 2017 02:00:54 +0300
+Subject: sctp: potential read out of bounds in sctp_ulpevent_type_enabled()
+
+From: Dan Carpenter <dan.carpenter@oracle.com>
+
+
+[ Upstream commit fa5f7b51fc3080c2b195fa87c7eca7c05e56f673 ]
+
+This code causes a static checker warning because Smatch doesn't trust
+anything that comes from skb->data.  I've reviewed this code and I do
+think skb->data can be controlled by the user here.
+
+The sctp_event_subscribe struct has 13 __u8 fields and we want to see
+if ours is non-zero.  sn_type can be any value in the 0-USHRT_MAX range.
+We're subtracting SCTP_SN_TYPE_BASE which is 1 << 15 so we could read
+either before the start of the struct or after the end.
+
+This is a very old bug and it's surprising that it would go undetected
+for so long but my theory is that it just doesn't have a big impact so
+it would be hard to notice.
+
+Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/sctp/ulpevent.h |    6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+--- a/include/net/sctp/ulpevent.h
++++ b/include/net/sctp/ulpevent.h
+@@ -153,8 +153,12 @@ __u16 sctp_ulpevent_get_notification_typ
+ static inline int sctp_ulpevent_type_enabled(__u16 sn_type,
+                                            struct sctp_event_subscribe *mask)
+ {
++      int offset = sn_type - SCTP_SN_TYPE_BASE;
+       char *amask = (char *) mask;
+-      return amask[sn_type - SCTP_SN_TYPE_BASE];
++
++      if (offset >= sizeof(struct sctp_event_subscribe))
++              return 0;
++      return amask[offset];
+ }
+ 
+ /* Given an event subscription, is this event enabled? */
diff --git a/queue-4.13/series b/queue-4.13/series

index b518d50e1e8fb8b84c9ced0ca9a52ca9bdff0283..e6fc21bc0a954becddd3c6894779257416d371ab 100644 (file)
--- a/queue-4.13/series
+++ b/queue-4.13/series
@@ -1,2 +1,50 @@
  imx-media-of-avoid-uninitialized-variable-warning.patch
  usb-dwc3-ep0-fix-dma-starvation-by-assigning-req-trb-on-ep0.patch
+mlxsw-spectrum-fix-eeprom-access-in-case-of-sfp-sfp.patch
+net-bonding-fix-transmit-load-balancing-in-balance-alb-mode-if-specified-by-sysfs.patch
+openvswitch-fix-an-error-handling-path-in-ovs_nla_init_match_and_action.patch
+mlxsw-spectrum-prevent-mirred-related-crash-on-removal.patch
+net-bonding-fix-tlb_dynamic_lb-default-value.patch
+net_sched-gen_estimator-fix-scaling-error-in-bytes-packets-samples.patch
+net-sched-fix-use-after-free-in-tcf_action_destroy-and-tcf_del_walker.patch
+sctp-potential-read-out-of-bounds-in-sctp_ulpevent_type_enabled.patch
+tcp-update-skb-skb_mstamp-more-carefully.patch
+bpf-verifier-reject-bpf_alu64-bpf_end.patch
+tcp-fix-data-delivery-rate.patch
+udpv6-fix-the-checksum-computation-when-hw-checksum-does-not-apply.patch
+ip6_gre-skb_push-ipv6hdr-before-packing-the-header-in-ip6gre_header.patch
+net-phy-fix-mask-value-write-on-gmii2rgmii-converter-speed-register.patch
+ip6_tunnel-do-not-allow-loading-ip6_tunnel-if-ipv6-is-disabled-in-cmdline.patch
+net-sched-cls_matchall-fix-crash-when-used-with-classful-qdisc.patch
+8139too-revisit-napi_complete_done-usage.patch
+bpf-do-not-disable-enable-bh-in-bpf_map_free_id.patch
+tcp-fastopen-fix-on-syn-data-transmit-failure.patch
+net-emac-fix-napi-poll-list-corruption.patch
+net-ipv6-fix-regression-of-no-rtm_deladdr-sent-after-dad-failure.patch
+packet-hold-bind-lock-when-rebinding-to-fanout-hook.patch
+bpf-one-perf-event-close-won-t-free-bpf-program-attached-by-another-perf-event.patch
+net-change-skb-mac_header-when-generic-xdp-calls-adjust_head.patch
+isdn-i4l-fetch-the-ppp_write-buffer-in-one-shot.patch
+net_sched-always-reset-qdisc-backlog-in-qdisc_reset.patch
+net-stmmac-cocci-spatch-of_table.patch
+net-qcom-emac-specify-the-correct-size-when-mapping-a-dma-buffer.patch
+vti-fix-use-after-free-in-vti_tunnel_xmit-vti6_tnl_xmit.patch
+l2tp-fix-race-condition-in-l2tp_tunnel_delete.patch
+tun-bail-out-from-tun_get_user-if-the-skb-is-empty.patch
+net-dsa-mv88e6xxx-allow-dsa-and-cpu-ports-in-multiple-vlans.patch
+net-dsa-fix-network-device-registration-order.patch
+packet-in-packet_do_bind-test-fanout-with-bind_lock-held.patch
+packet-only-test-po-has_vnet_hdr-once-in-packet_snd.patch
+net-dsa-mv88e6xxx-lock-mutex-when-freeing-irqs.patch
+net-set-sk_prot_creator-when-cloning-sockets-to-the-right-proto.patch
+net-mlx5e-ipoib-fix-access-to-invalid-memory-address.patch
+netlink-do-not-proceed-if-dump-s-start-errs.patch
+ip6_gre-ip6gre_tap-device-should-keep-dst.patch
+ip6_tunnel-update-mtu-properly-for-arphrd_ether-tunnel-device-in-tx-path.patch
+ipv4-early-demux-can-return-an-error-code.patch
+udp-perform-source-validation-for-mcast-early-demux.patch
+tipc-use-only-positive-error-codes-in-messages.patch
+l2tp-fix-l2tp_eth-module-loading.patch
+socket-bpf-fix-possible-use-after-free.patch
+net-rtnetlink-fix-info-leak-in-rtm_getstats-call.patch
+bpf-fix-bpf_tail_call-x64-jit.patch
diff --git a/queue-4.13/socket-bpf-fix-possible-use-after-free.patch b/queue-4.13/socket-bpf-fix-possible-use-after-free.patch

new file mode 100644 (file)

index 0000000..7053ba7
--- /dev/null
+++ b/queue-4.13/socket-bpf-fix-possible-use-after-free.patch
@@ -0,0 +1,71 @@
+From foo@baz Mon Oct  9 09:32:35 CEST 2017
+From: Eric Dumazet <edumazet@google.com>
+Date: Mon, 2 Oct 2017 12:20:51 -0700
+Subject: socket, bpf: fix possible use after free
+
+From: Eric Dumazet <edumazet@google.com>
+
+
+[ Upstream commit eefca20eb20c66b06cf5ed09b49b1a7caaa27b7b ]
+
+Starting from linux-4.4, 3WHS no longer takes the listener lock.
+
+Since this time, we might hit a use-after-free in sk_filter_charge(),
+if the filter we got in the memcpy() of the listener content
+just happened to be replaced by a thread changing listener BPF filter.
+
+To fix this, we need to make sure the filter refcount is not already
+zero before incrementing it again.
+
+Fixes: e994b2f0fb92 ("tcp: do not lock listener to process SYN packets")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Acked-by: Alexei Starovoitov <ast@kernel.org>
+Acked-by: Daniel Borkmann <daniel@iogearbox.net>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/filter.c |   12 ++++++++----
+ net/core/sock.c   |    5 ++++-
+ 2 files changed, 12 insertions(+), 5 deletions(-)
+
+--- a/net/core/filter.c
++++ b/net/core/filter.c
+@@ -975,10 +975,14 @@ static bool __sk_filter_charge(struct so
+ 
+ bool sk_filter_charge(struct sock *sk, struct sk_filter *fp)
+ {
+-      bool ret = __sk_filter_charge(sk, fp);
+-      if (ret)
+-              refcount_inc(&fp->refcnt);
+-      return ret;
++      if (!refcount_inc_not_zero(&fp->refcnt))
++              return false;
++
++      if (!__sk_filter_charge(sk, fp)) {
++              sk_filter_release(fp);
++              return false;
++      }
++      return true;
+ }
+ 
+ static struct bpf_prog *bpf_migrate_filter(struct bpf_prog *fp)
+--- a/net/core/sock.c
++++ b/net/core/sock.c
+@@ -1675,13 +1675,16 @@ struct sock *sk_clone_lock(const struct
+ 
+               sock_reset_flag(newsk, SOCK_DONE);
+ 
+-              filter = rcu_dereference_protected(newsk->sk_filter, 1);
++              rcu_read_lock();
++              filter = rcu_dereference(sk->sk_filter);
+               if (filter != NULL)
+                       /* though it's an empty new sock, the charging may fail
+                        * if sysctl_optmem_max was changed between creation of
+                        * original socket and cloning
+                        */
+                       is_charged = sk_filter_charge(newsk, filter);
++              RCU_INIT_POINTER(newsk->sk_filter, filter);
++              rcu_read_unlock();
+ 
+               if (unlikely(!is_charged || xfrm_sk_clone_policy(newsk, sk))) {
+                       /* We need to make sure that we don't uncharge the new
diff --git a/queue-4.13/tcp-fastopen-fix-on-syn-data-transmit-failure.patch b/queue-4.13/tcp-fastopen-fix-on-syn-data-transmit-failure.patch

new file mode 100644 (file)

index 0000000..59f566b
--- /dev/null
+++ b/queue-4.13/tcp-fastopen-fix-on-syn-data-transmit-failure.patch
@@ -0,0 +1,97 @@
+From foo@baz Mon Oct  9 09:32:35 CEST 2017
+From: Eric Dumazet <edumazet@google.com>
+Date: Tue, 19 Sep 2017 10:05:57 -0700
+Subject: tcp: fastopen: fix on syn-data transmit failure
+
+From: Eric Dumazet <edumazet@google.com>
+
+
+[ Upstream commit b5b7db8d680464b1d631fd016f5e093419f0bfd9 ]
+
+Our recent change exposed a bug in TCP Fastopen Client that syzkaller
+found right away [1]
+
+When we prepare skb with SYN+DATA, we attempt to transmit it,
+and we update socket state as if the transmit was a success.
+
+In socket RTX queue we have two skbs, one with the SYN alone,
+and a second one containing the DATA.
+
+When (malicious) ACK comes in, we now complain that second one had no
+skb_mstamp.
+
+The proper fix is to make sure that if the transmit failed, we do not
+pretend we sent the DATA skb, and make it our send_head.
+
+When 3WHS completes, we can now send the DATA right away, without having
+to wait for a timeout.
+
+[1]
+WARNING: CPU: 0 PID: 100189 at net/ipv4/tcp_input.c:3117 tcp_clean_rtx_queue+0x2057/0x2ab0 net/ipv4/tcp_input.c:3117()
+
+ WARN_ON_ONCE(last_ackt == 0);
+
+Modules linked in:
+CPU: 0 PID: 100189 Comm: syz-executor1 Not tainted
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
+ 0000000000000000 ffff8800b35cb1d8 ffffffff81cad00d 0000000000000000
+ ffffffff828a4347 ffff88009f86c080 ffffffff8316eb20 0000000000000d7f
+ ffff8800b35cb220 ffffffff812c33c2 ffff8800baad2440 00000009d46575c0
+Call Trace:
+ [<ffffffff81cad00d>] __dump_stack
+ [<ffffffff81cad00d>] dump_stack+0xc1/0x124
+ [<ffffffff812c33c2>] warn_slowpath_common+0xe2/0x150
+ [<ffffffff812c361e>] warn_slowpath_null+0x2e/0x40
+ [<ffffffff828a4347>] tcp_clean_rtx_queue+0x2057/0x2ab0 n
+ [<ffffffff828ae6fd>] tcp_ack+0x151d/0x3930
+ [<ffffffff828baa09>] tcp_rcv_state_process+0x1c69/0x4fd0
+ [<ffffffff828efb7f>] tcp_v4_do_rcv+0x54f/0x7c0
+ [<ffffffff8258aacb>] sk_backlog_rcv
+ [<ffffffff8258aacb>] __release_sock+0x12b/0x3a0
+ [<ffffffff8258ad9e>] release_sock+0x5e/0x1c0
+ [<ffffffff8294a785>] inet_wait_for_connect
+ [<ffffffff8294a785>] __inet_stream_connect+0x545/0xc50
+ [<ffffffff82886f08>] tcp_sendmsg_fastopen
+ [<ffffffff82886f08>] tcp_sendmsg+0x2298/0x35a0
+ [<ffffffff82952515>] inet_sendmsg+0xe5/0x520
+ [<ffffffff8257152f>] sock_sendmsg_nosec
+ [<ffffffff8257152f>] sock_sendmsg+0xcf/0x110
+
+Fixes: 8c72c65b426b ("tcp: update skb->skb_mstamp more carefully")
+Fixes: 783237e8daf1 ("net-tcp: Fast Open client - sending SYN-data")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: Dmitry Vyukov <dvyukov@google.com>
+Cc: Neal Cardwell <ncardwell@google.com>
+Cc: Yuchung Cheng <ycheng@google.com>
+Acked-by: Yuchung Cheng <ycheng@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_output.c |    9 +++++++++
+ 1 file changed, 9 insertions(+)
+
+--- a/net/ipv4/tcp_output.c
++++ b/net/ipv4/tcp_output.c
+@@ -3420,6 +3420,10 @@ static int tcp_send_syn_data(struct sock
+               goto done;
+       }
+ 
++      /* data was not sent, this is our new send_head */
++      sk->sk_send_head = syn_data;
++      tp->packets_out -= tcp_skb_pcount(syn_data);
++
+ fallback:
+       /* Send a regular SYN with Fast Open cookie request option */
+       if (fo->cookie.len > 0)
+@@ -3472,6 +3476,11 @@ int tcp_connect(struct sock *sk)
+        */
+       tp->snd_nxt = tp->write_seq;
+       tp->pushed_seq = tp->write_seq;
++      buff = tcp_send_head(sk);
++      if (unlikely(buff)) {
++              tp->snd_nxt     = TCP_SKB_CB(buff)->seq;
++              tp->pushed_seq  = TCP_SKB_CB(buff)->seq;
++      }
+       TCP_INC_STATS(sock_net(sk), TCP_MIB_ACTIVEOPENS);
+ 
+       /* Timer for repeating the SYN until an answer. */
diff --git a/queue-4.13/tcp-fix-data-delivery-rate.patch b/queue-4.13/tcp-fix-data-delivery-rate.patch

new file mode 100644 (file)

index 0000000..098188c
--- /dev/null
+++ b/queue-4.13/tcp-fix-data-delivery-rate.patch
@@ -0,0 +1,46 @@
+From foo@baz Mon Oct  9 09:32:35 CEST 2017
+From: Eric Dumazet <edumazet@google.com>
+Date: Fri, 15 Sep 2017 16:47:42 -0700
+Subject: tcp: fix data delivery rate
+
+From: Eric Dumazet <edumazet@google.com>
+
+
+[ Upstream commit fc22579917eb7e13433448a342f1cb1592920940 ]
+
+Now skb->mstamp_skb is updated later, we also need to call
+tcp_rate_skb_sent() after the update is done.
+
+Fixes: 8c72c65b426b ("tcp: update skb->skb_mstamp more carefully")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_output.c |    7 +++----
+ 1 file changed, 3 insertions(+), 4 deletions(-)
+
+--- a/net/ipv4/tcp_output.c
++++ b/net/ipv4/tcp_output.c
+@@ -1002,8 +1002,6 @@ static int tcp_transmit_skb(struct sock
+       if (clone_it) {
+               TCP_SKB_CB(skb)->tx.in_flight = TCP_SKB_CB(skb)->end_seq
+                       - tp->snd_una;
+-              tcp_rate_skb_sent(sk, skb);
+-
+               oskb = skb;
+               if (unlikely(skb_cloned(skb)))
+                       skb = pskb_copy(skb, gfp_mask);
+@@ -1128,9 +1126,10 @@ static int tcp_transmit_skb(struct sock
+               tcp_enter_cwr(sk);
+               err = net_xmit_eval(err);
+       }
+-      if (!err && oskb)
++      if (!err && oskb) {
+               oskb->skb_mstamp = tp->tcp_mstamp;
+-
++              tcp_rate_skb_sent(sk, oskb);
++      }
+       return err;
+ }
+ 
diff --git a/queue-4.13/tcp-update-skb-skb_mstamp-more-carefully.patch b/queue-4.13/tcp-update-skb-skb_mstamp-more-carefully.patch

new file mode 100644 (file)

index 0000000..a4bfe7d
--- /dev/null
+++ b/queue-4.13/tcp-update-skb-skb_mstamp-more-carefully.patch
@@ -0,0 +1,143 @@
+From foo@baz Mon Oct  9 09:32:35 CEST 2017
+From: Eric Dumazet <edumazet@googl.com>
+Date: Wed, 13 Sep 2017 20:30:39 -0700
+Subject: tcp: update skb->skb_mstamp more carefully
+
+From: Eric Dumazet <edumazet@googl.com>
+
+
+[ Upstream commit 8c72c65b426b47b3c166a8fef0d8927fe5e8a28d ]
+
+liujian reported a problem in TCP_USER_TIMEOUT processing with a patch
+in tcp_probe_timer() :
+      https://www.spinics.net/lists/netdev/msg454496.html
+
+After investigations, the root cause of the problem is that we update
+skb->skb_mstamp of skbs in write queue, even if the attempt to send a
+clone or copy of it failed. One reason being a routing problem.
+
+This patch prevents this, solving liujian issue.
+
+It also removes a potential RTT miscalculation, since
+__tcp_retransmit_skb() is not OR-ing TCP_SKB_CB(skb)->sacked with
+TCPCB_EVER_RETRANS if a failure happens, but skb->skb_mstamp has
+been changed.
+
+A future ACK would then lead to a very small RTT sample and min_rtt
+would then be lowered to this too small value.
+
+Tested:
+
+# cat user_timeout.pkt
+--local_ip=192.168.102.64
+
+    0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+   +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+   +0 bind(3, ..., ...) = 0
+   +0 listen(3, 1) = 0
+
+   +0 `ifconfig tun0 192.168.102.64/16; ip ro add 192.0.2.1 dev tun0`
+
+   +0 < S 0:0(0) win 0 <mss 1460>
+   +0 > S. 0:0(0) ack 1 <mss 1460>
+
+  +.1 < . 1:1(0) ack 1 win 65530
+   +0 accept(3, ..., ...) = 4
+
+   +0 setsockopt(4, SOL_TCP, TCP_USER_TIMEOUT, [3000], 4) = 0
+   +0 write(4, ..., 24) = 24
+   +0 > P. 1:25(24) ack 1 win 29200
+   +.1 < . 1:1(0) ack 25 win 65530
+
+//change the ipaddress
+   +1 `ifconfig tun0 192.168.0.10/16`
+
+   +1 write(4, ..., 24) = 24
+   +1 write(4, ..., 24) = 24
+   +1 write(4, ..., 24) = 24
+   +1 write(4, ..., 24) = 24
+
+   +0 `ifconfig tun0 192.168.102.64/16`
+   +0 < . 1:2(1) ack 25 win 65530
+   +0 `ifconfig tun0 192.168.0.10/16`
+
+   +3 write(4, ..., 24) = -1
+
+# ./packetdrill user_timeout.pkt
+
+Signed-off-by: Eric Dumazet <edumazet@googl.com>
+Reported-by: liujian <liujian56@huawei.com>
+Acked-by: Neal Cardwell <ncardwell@google.com>
+Acked-by: Yuchung Cheng <ycheng@google.com>
+Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_output.c |   19 ++++++++++++-------
+ 1 file changed, 12 insertions(+), 7 deletions(-)
+
+--- a/net/ipv4/tcp_output.c
++++ b/net/ipv4/tcp_output.c
+@@ -991,6 +991,7 @@ static int tcp_transmit_skb(struct sock
+       struct tcp_skb_cb *tcb;
+       struct tcp_out_options opts;
+       unsigned int tcp_options_size, tcp_header_size;
++      struct sk_buff *oskb = NULL;
+       struct tcp_md5sig_key *md5;
+       struct tcphdr *th;
+       int err;
+@@ -998,12 +999,12 @@ static int tcp_transmit_skb(struct sock
+       BUG_ON(!skb || !tcp_skb_pcount(skb));
+       tp = tcp_sk(sk);
+ 
+-      skb->skb_mstamp = tp->tcp_mstamp;
+       if (clone_it) {
+               TCP_SKB_CB(skb)->tx.in_flight = TCP_SKB_CB(skb)->end_seq
+                       - tp->snd_una;
+               tcp_rate_skb_sent(sk, skb);
+ 
++              oskb = skb;
+               if (unlikely(skb_cloned(skb)))
+                       skb = pskb_copy(skb, gfp_mask);
+               else
+@@ -1011,6 +1012,7 @@ static int tcp_transmit_skb(struct sock
+               if (unlikely(!skb))
+                       return -ENOBUFS;
+       }
++      skb->skb_mstamp = tp->tcp_mstamp;
+ 
+       inet = inet_sk(sk);
+       tcb = TCP_SKB_CB(skb);
+@@ -1122,12 +1124,14 @@ static int tcp_transmit_skb(struct sock
+ 
+       err = icsk->icsk_af_ops->queue_xmit(sk, skb, &inet->cork.fl);
+ 
+-      if (likely(err <= 0))
+-              return err;
+-
+-      tcp_enter_cwr(sk);
++      if (unlikely(err > 0)) {
++              tcp_enter_cwr(sk);
++              err = net_xmit_eval(err);
++      }
++      if (!err && oskb)
++              oskb->skb_mstamp = tp->tcp_mstamp;
+ 
+-      return net_xmit_eval(err);
++      return err;
+ }
+ 
+ /* This routine just queues the buffer for sending.
+@@ -2866,10 +2870,11 @@ int __tcp_retransmit_skb(struct sock *sk
+                    skb_headroom(skb) >= 0xFFFF)) {
+               struct sk_buff *nskb;
+ 
+-              skb->skb_mstamp = tp->tcp_mstamp;
+               nskb = __pskb_copy(skb, MAX_TCP_HEADER, GFP_ATOMIC);
+               err = nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) :
+                            -ENOBUFS;
++              if (!err)
++                      skb->skb_mstamp = tp->tcp_mstamp;
+       } else {
+               err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
+       }
diff --git a/queue-4.13/tipc-use-only-positive-error-codes-in-messages.patch b/queue-4.13/tipc-use-only-positive-error-codes-in-messages.patch

new file mode 100644 (file)

index 0000000..aeb408d
--- /dev/null
+++ b/queue-4.13/tipc-use-only-positive-error-codes-in-messages.patch
@@ -0,0 +1,38 @@
+From foo@baz Mon Oct  9 09:32:35 CEST 2017
+From: Parthasarathy Bhuvaragan <parthasarathy.bhuvaragan@ericsson.com>
+Date: Fri, 29 Sep 2017 10:02:54 +0200
+Subject: tipc: use only positive error codes in messages
+
+From: Parthasarathy Bhuvaragan <parthasarathy.bhuvaragan@ericsson.com>
+
+
+[ Upstream commit aad06212d36cf34859428a0a279e5c14ee5c9e26 ]
+
+In commit e3a77561e7d32 ("tipc: split up function tipc_msg_eval()"),
+we have updated the function tipc_msg_lookup_dest() to set the error
+codes to negative values at destination lookup failures. Thus when
+the function sets the error code to -TIPC_ERR_NO_NAME, its inserted
+into the 4 bit error field of the message header as 0xf instead of
+TIPC_ERR_NO_NAME (1). The value 0xf is an unknown error code.
+
+In this commit, we set only positive error code.
+
+Fixes: e3a77561e7d32 ("tipc: split up function tipc_msg_eval()")
+Signed-off-by: Parthasarathy Bhuvaragan <parthasarathy.bhuvaragan@ericsson.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/tipc/msg.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/tipc/msg.c
++++ b/net/tipc/msg.c
+@@ -551,7 +551,7 @@ bool tipc_msg_lookup_dest(struct net *ne
+               return false;
+       if (msg_errcode(msg))
+               return false;
+-      *err = -TIPC_ERR_NO_NAME;
++      *err = TIPC_ERR_NO_NAME;
+       if (skb_linearize(skb))
+               return false;
+       msg = buf_msg(skb);
diff --git a/queue-4.13/tun-bail-out-from-tun_get_user-if-the-skb-is-empty.patch b/queue-4.13/tun-bail-out-from-tun_get_user-if-the-skb-is-empty.patch

new file mode 100644 (file)

index 0000000..3c7905f
--- /dev/null
+++ b/queue-4.13/tun-bail-out-from-tun_get_user-if-the-skb-is-empty.patch
@@ -0,0 +1,111 @@
+From foo@baz Mon Oct  9 09:32:35 CEST 2017
+From: Alexander Potapenko <glider@google.com>
+Date: Thu, 28 Sep 2017 11:32:37 +0200
+Subject: tun: bail out from tun_get_user() if the skb is empty
+
+From: Alexander Potapenko <glider@google.com>
+
+
+[ Upstream commit 2580c4c17aee3ad58e9751012bad278dd074ccae ]
+
+KMSAN (https://github.com/google/kmsan) reported accessing uninitialized
+skb->data[0] in the case the skb is empty (i.e. skb->len is 0):
+
+================================================
+BUG: KMSAN: use of uninitialized memory in tun_get_user+0x19ba/0x3770
+CPU: 0 PID: 3051 Comm: probe Not tainted 4.13.0+ #3140
+Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
+Call Trace:
+...
+ __msan_warning_32+0x66/0xb0 mm/kmsan/kmsan_instr.c:477
+ tun_get_user+0x19ba/0x3770 drivers/net/tun.c:1301
+ tun_chr_write_iter+0x19f/0x300 drivers/net/tun.c:1365
+ call_write_iter ./include/linux/fs.h:1743
+ new_sync_write fs/read_write.c:457
+ __vfs_write+0x6c3/0x7f0 fs/read_write.c:470
+ vfs_write+0x3e4/0x770 fs/read_write.c:518
+ SYSC_write+0x12f/0x2b0 fs/read_write.c:565
+ SyS_write+0x55/0x80 fs/read_write.c:557
+ do_syscall_64+0x242/0x330 arch/x86/entry/common.c:284
+ entry_SYSCALL64_slow_path+0x25/0x25 arch/x86/entry/entry_64.S:245
+...
+origin:
+...
+ kmsan_poison_shadow+0x6e/0xc0 mm/kmsan/kmsan.c:211
+ slab_alloc_node mm/slub.c:2732
+ __kmalloc_node_track_caller+0x351/0x370 mm/slub.c:4351
+ __kmalloc_reserve net/core/skbuff.c:138
+ __alloc_skb+0x26a/0x810 net/core/skbuff.c:231
+ alloc_skb ./include/linux/skbuff.h:903
+ alloc_skb_with_frags+0x1d7/0xc80 net/core/skbuff.c:4756
+ sock_alloc_send_pskb+0xabf/0xfe0 net/core/sock.c:2037
+ tun_alloc_skb drivers/net/tun.c:1144
+ tun_get_user+0x9a8/0x3770 drivers/net/tun.c:1274
+ tun_chr_write_iter+0x19f/0x300 drivers/net/tun.c:1365
+ call_write_iter ./include/linux/fs.h:1743
+ new_sync_write fs/read_write.c:457
+ __vfs_write+0x6c3/0x7f0 fs/read_write.c:470
+ vfs_write+0x3e4/0x770 fs/read_write.c:518
+ SYSC_write+0x12f/0x2b0 fs/read_write.c:565
+ SyS_write+0x55/0x80 fs/read_write.c:557
+ do_syscall_64+0x242/0x330 arch/x86/entry/common.c:284
+ return_from_SYSCALL_64+0x0/0x6a arch/x86/entry/entry_64.S:245
+================================================
+
+Make sure tun_get_user() doesn't touch skb->data[0] unless there is
+actual data.
+
+C reproducer below:
+==========================
+    // autogenerated by syzkaller (http://github.com/google/syzkaller)
+
+    #define _GNU_SOURCE
+
+    #include <fcntl.h>
+    #include <linux/if_tun.h>
+    #include <netinet/ip.h>
+    #include <net/if.h>
+    #include <string.h>
+    #include <sys/ioctl.h>
+
+    int main()
+    {
+      int sock = socket(PF_INET, SOCK_STREAM, IPPROTO_IP);
+      int tun_fd = open("/dev/net/tun", O_RDWR);
+      struct ifreq req;
+      memset(&req, 0, sizeof(struct ifreq));
+      strcpy((char*)&req.ifr_name, "gre0");
+      req.ifr_flags = IFF_UP | IFF_MULTICAST;
+      ioctl(tun_fd, TUNSETIFF, &req);
+      ioctl(sock, SIOCSIFFLAGS, "gre0");
+      write(tun_fd, "hi", 0);
+      return 0;
+    }
+==========================
+
+Signed-off-by: Alexander Potapenko <glider@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/tun.c |    8 +++++---
+ 1 file changed, 5 insertions(+), 3 deletions(-)
+
+--- a/drivers/net/tun.c
++++ b/drivers/net/tun.c
+@@ -1298,11 +1298,13 @@ static ssize_t tun_get_user(struct tun_s
+       switch (tun->flags & TUN_TYPE_MASK) {
+       case IFF_TUN:
+               if (tun->flags & IFF_NO_PI) {
+-                      switch (skb->data[0] & 0xf0) {
+-                      case 0x40:
++                      u8 ip_version = skb->len ? (skb->data[0] >> 4) : 0;
++
++                      switch (ip_version) {
++                      case 4:
+                               pi.proto = htons(ETH_P_IP);
+                               break;
+-                      case 0x60:
++                      case 6:
+                               pi.proto = htons(ETH_P_IPV6);
+                               break;
+                       default:
diff --git a/queue-4.13/udp-perform-source-validation-for-mcast-early-demux.patch b/queue-4.13/udp-perform-source-validation-for-mcast-early-demux.patch

new file mode 100644 (file)

index 0000000..cfee690
--- /dev/null
+++ b/queue-4.13/udp-perform-source-validation-for-mcast-early-demux.patch
@@ -0,0 +1,191 @@
+From foo@baz Mon Oct  9 09:32:35 CEST 2017
+From: Paolo Abeni <pabeni@redhat.com>
+Date: Thu, 28 Sep 2017 15:51:37 +0200
+Subject: udp: perform source validation for mcast early demux
+
+From: Paolo Abeni <pabeni@redhat.com>
+
+
+[ Upstream commit bc044e8db7962e727a75b591b9851ff2ac5cf846 ]
+
+The UDP early demux can leverate the rx dst cache even for
+multicast unconnected sockets.
+
+In such scenario the ipv4 source address is validated only on
+the first packet in the given flow. After that, when we fetch
+the dst entry  from the socket rx cache, we stop enforcing
+the rp_filter and we even start accepting any kind of martian
+addresses.
+
+Disabling the dst cache for unconnected multicast socket will
+cause large performace regression, nearly reducing by half the
+max ingress tput.
+
+Instead we factor out a route helper to completely validate an
+skb source address for multicast packets and we call it from
+the UDP early demux for mcast packets landing on unconnected
+sockets, after successful fetching the related cached dst entry.
+
+This still gives a measurable, but limited performance
+regression:
+
+               rp_filter = 0           rp_filter = 1
+edmux disabled:        1182 Kpps               1127 Kpps
+edmux before:  2238 Kpps               2238 Kpps
+edmux after:   2037 Kpps               2019 Kpps
+
+The above figures are on top of current net tree.
+Applying the net-next commit 6e617de84e87 ("net: avoid a full
+fib lookup when rp_filter is disabled.") the delta with
+rp_filter == 0 will decrease even more.
+
+Fixes: 421b3885bf6d ("udp: ipv4: Add udp early demux")
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/route.h |    4 +++-
+ net/ipv4/route.c    |   46 ++++++++++++++++++++++++++--------------------
+ net/ipv4/udp.c      |   13 ++++++++++++-
+ 3 files changed, 41 insertions(+), 22 deletions(-)
+
+--- a/include/net/route.h
++++ b/include/net/route.h
+@@ -175,7 +175,9 @@ static inline struct rtable *ip_route_ou
+       fl4->fl4_gre_key = gre_key;
+       return ip_route_output_key(net, fl4);
+ }
+-
++int ip_mc_validate_source(struct sk_buff *skb, __be32 daddr, __be32 saddr,
++                        u8 tos, struct net_device *dev,
++                        struct in_device *in_dev, u32 *itag);
+ int ip_route_input_noref(struct sk_buff *skb, __be32 dst, __be32 src,
+                        u8 tos, struct net_device *devin);
+ int ip_route_input_rcu(struct sk_buff *skb, __be32 dst, __be32 src,
+--- a/net/ipv4/route.c
++++ b/net/ipv4/route.c
+@@ -1520,43 +1520,56 @@ struct rtable *rt_dst_alloc(struct net_d
+ EXPORT_SYMBOL(rt_dst_alloc);
+ 
+ /* called in rcu_read_lock() section */
+-static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
+-                              u8 tos, struct net_device *dev, int our)
++int ip_mc_validate_source(struct sk_buff *skb, __be32 daddr, __be32 saddr,
++                        u8 tos, struct net_device *dev,
++                        struct in_device *in_dev, u32 *itag)
+ {
+-      struct rtable *rth;
+-      struct in_device *in_dev = __in_dev_get_rcu(dev);
+-      unsigned int flags = RTCF_MULTICAST;
+-      u32 itag = 0;
+       int err;
+ 
+       /* Primary sanity checks. */
+-
+       if (!in_dev)
+               return -EINVAL;
+ 
+       if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr) ||
+           skb->protocol != htons(ETH_P_IP))
+-              goto e_inval;
++              return -EINVAL;
+ 
+       if (ipv4_is_loopback(saddr) && !IN_DEV_ROUTE_LOCALNET(in_dev))
+-              goto e_inval;
++              return -EINVAL;
+ 
+       if (ipv4_is_zeronet(saddr)) {
+               if (!ipv4_is_local_multicast(daddr))
+-                      goto e_inval;
++                      return -EINVAL;
+       } else {
+               err = fib_validate_source(skb, saddr, 0, tos, 0, dev,
+-                                        in_dev, &itag);
++                                        in_dev, itag);
+               if (err < 0)
+-                      goto e_err;
++                      return err;
+       }
++      return 0;
++}
++
++/* called in rcu_read_lock() section */
++static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
++                           u8 tos, struct net_device *dev, int our)
++{
++      struct in_device *in_dev = __in_dev_get_rcu(dev);
++      unsigned int flags = RTCF_MULTICAST;
++      struct rtable *rth;
++      u32 itag = 0;
++      int err;
++
++      err = ip_mc_validate_source(skb, daddr, saddr, tos, dev, in_dev, &itag);
++      if (err)
++              return err;
++
+       if (our)
+               flags |= RTCF_LOCAL;
+ 
+       rth = rt_dst_alloc(dev_net(dev)->loopback_dev, flags, RTN_MULTICAST,
+                          IN_DEV_CONF_GET(in_dev, NOPOLICY), false, false);
+       if (!rth)
+-              goto e_nobufs;
++              return -ENOBUFS;
+ 
+ #ifdef CONFIG_IP_ROUTE_CLASSID
+       rth->dst.tclassid = itag;
+@@ -1572,13 +1585,6 @@ static int ip_route_input_mc(struct sk_b
+ 
+       skb_dst_set(skb, &rth->dst);
+       return 0;
+-
+-e_nobufs:
+-      return -ENOBUFS;
+-e_inval:
+-      return -EINVAL;
+-e_err:
+-      return err;
+ }
+ 
+ 
+--- a/net/ipv4/udp.c
++++ b/net/ipv4/udp.c
+@@ -2220,6 +2220,7 @@ static struct sock *__udp4_lib_demux_loo
+ int udp_v4_early_demux(struct sk_buff *skb)
+ {
+       struct net *net = dev_net(skb->dev);
++      struct in_device *in_dev = NULL;
+       const struct iphdr *iph;
+       const struct udphdr *uh;
+       struct sock *sk = NULL;
+@@ -2236,7 +2237,7 @@ int udp_v4_early_demux(struct sk_buff *s
+ 
+       if (skb->pkt_type == PACKET_BROADCAST ||
+           skb->pkt_type == PACKET_MULTICAST) {
+-              struct in_device *in_dev = __in_dev_get_rcu(skb->dev);
++              in_dev = __in_dev_get_rcu(skb->dev);
+ 
+               if (!in_dev)
+                       return 0;
+@@ -2266,11 +2267,21 @@ int udp_v4_early_demux(struct sk_buff *s
+       if (dst)
+               dst = dst_check(dst, 0);
+       if (dst) {
++              u32 itag = 0;
++
+               /* set noref for now.
+                * any place which wants to hold dst has to call
+                * dst_hold_safe()
+                */
+               skb_dst_set_noref(skb, dst);
++
++              /* for unconnected multicast sockets we need to validate
++               * the source on each packet
++               */
++              if (!inet_sk(sk)->inet_daddr && in_dev)
++                      return ip_mc_validate_source(skb, iph->daddr,
++                                                   iph->saddr, iph->tos,
++                                                   skb->dev, in_dev, &itag);
+       }
+       return 0;
+ }
diff --git a/queue-4.13/udpv6-fix-the-checksum-computation-when-hw-checksum-does-not-apply.patch b/queue-4.13/udpv6-fix-the-checksum-computation-when-hw-checksum-does-not-apply.patch

new file mode 100644 (file)

index 0000000..f082b27
--- /dev/null
+++ b/queue-4.13/udpv6-fix-the-checksum-computation-when-hw-checksum-does-not-apply.patch
@@ -0,0 +1,36 @@
+From foo@baz Mon Oct  9 09:32:35 CEST 2017
+From: Subash Abhinov Kasiviswanathan <subashab@codeaurora.org>
+Date: Wed, 13 Sep 2017 19:30:51 -0600
+Subject: udpv6: Fix the checksum computation when HW checksum does not apply
+
+From: Subash Abhinov Kasiviswanathan <subashab@codeaurora.org>
+
+
+[ Upstream commit 63ecc3d9436f8012e49dc846d6cb0a85a3433517 ]
+
+While trying an ESP transport mode encryption for UDPv6 packets of
+datagram size 1436 with MTU 1500, checksum error was observed in
+the secondary fragment.
+
+This error occurs due to the UDP payload checksum being missed out
+when computing the full checksum for these packets in
+udp6_hwcsum_outgoing().
+
+Fixes: d39d938c8228 ("ipv6: Introduce udpv6_send_skb()")
+Signed-off-by: Subash Abhinov Kasiviswanathan <subashab@codeaurora.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/udp.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/net/ipv6/udp.c
++++ b/net/ipv6/udp.c
+@@ -1011,6 +1011,7 @@ static void udp6_hwcsum_outgoing(struct
+                */
+               offset = skb_transport_offset(skb);
+               skb->csum = skb_checksum(skb, offset, skb->len - offset, 0);
++              csum = skb->csum;
+ 
+               skb->ip_summed = CHECKSUM_NONE;
+ 
diff --git a/queue-4.13/vti-fix-use-after-free-in-vti_tunnel_xmit-vti6_tnl_xmit.patch b/queue-4.13/vti-fix-use-after-free-in-vti_tunnel_xmit-vti6_tnl_xmit.patch

new file mode 100644 (file)

index 0000000..a6a4249
--- /dev/null
+++ b/queue-4.13/vti-fix-use-after-free-in-vti_tunnel_xmit-vti6_tnl_xmit.patch
@@ -0,0 +1,98 @@
+From foo@baz Mon Oct  9 09:32:35 CEST 2017
+From: Alexey Kodanev <alexey.kodanev@oracle.com>
+Date: Tue, 26 Sep 2017 15:14:29 +0300
+Subject: vti: fix use after free in vti_tunnel_xmit/vti6_tnl_xmit
+
+From: Alexey Kodanev <alexey.kodanev@oracle.com>
+
+
+[ Upstream commit 36f6ee22d2d66046e369757ec6bbe1c482957ba6 ]
+
+When running LTP IPsec tests, KASan might report:
+
+BUG: KASAN: use-after-free in vti_tunnel_xmit+0xeee/0xff0 [ip_vti]
+Read of size 4 at addr ffff880dc6ad1980 by task swapper/0/0
+...
+Call Trace:
+  <IRQ>
+  dump_stack+0x63/0x89
+  print_address_description+0x7c/0x290
+  kasan_report+0x28d/0x370
+  ? vti_tunnel_xmit+0xeee/0xff0 [ip_vti]
+  __asan_report_load4_noabort+0x19/0x20
+  vti_tunnel_xmit+0xeee/0xff0 [ip_vti]
+  ? vti_init_net+0x190/0x190 [ip_vti]
+  ? save_stack_trace+0x1b/0x20
+  ? save_stack+0x46/0xd0
+  dev_hard_start_xmit+0x147/0x510
+  ? icmp_echo.part.24+0x1f0/0x210
+  __dev_queue_xmit+0x1394/0x1c60
+...
+Freed by task 0:
+  save_stack_trace+0x1b/0x20
+  save_stack+0x46/0xd0
+  kasan_slab_free+0x70/0xc0
+  kmem_cache_free+0x81/0x1e0
+  kfree_skbmem+0xb1/0xe0
+  kfree_skb+0x75/0x170
+  kfree_skb_list+0x3e/0x60
+  __dev_queue_xmit+0x1298/0x1c60
+  dev_queue_xmit+0x10/0x20
+  neigh_resolve_output+0x3a8/0x740
+  ip_finish_output2+0x5c0/0xe70
+  ip_finish_output+0x4ba/0x680
+  ip_output+0x1c1/0x3a0
+  xfrm_output_resume+0xc65/0x13d0
+  xfrm_output+0x1e4/0x380
+  xfrm4_output_finish+0x5c/0x70
+
+Can be fixed if we get skb->len before dst_output().
+
+Fixes: b9959fd3b0fa ("vti: switch to new ip tunnel code")
+Fixes: 22e1b23dafa8 ("vti6: Support inter address family tunneling.")
+Signed-off-by: Alexey Kodanev <alexey.kodanev@oracle.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/ip_vti.c  |    3 ++-
+ net/ipv6/ip6_vti.c |    3 ++-
+ 2 files changed, 4 insertions(+), 2 deletions(-)
+
+--- a/net/ipv4/ip_vti.c
++++ b/net/ipv4/ip_vti.c
+@@ -168,6 +168,7 @@ static netdev_tx_t vti_xmit(struct sk_bu
+       struct ip_tunnel_parm *parms = &tunnel->parms;
+       struct dst_entry *dst = skb_dst(skb);
+       struct net_device *tdev;        /* Device to other host */
++      int pkt_len = skb->len;
+       int err;
+       int mtu;
+ 
+@@ -229,7 +230,7 @@ static netdev_tx_t vti_xmit(struct sk_bu
+ 
+       err = dst_output(tunnel->net, skb->sk, skb);
+       if (net_xmit_eval(err) == 0)
+-              err = skb->len;
++              err = pkt_len;
+       iptunnel_xmit_stats(dev, err);
+       return NETDEV_TX_OK;
+ 
+--- a/net/ipv6/ip6_vti.c
++++ b/net/ipv6/ip6_vti.c
+@@ -445,6 +445,7 @@ vti6_xmit(struct sk_buff *skb, struct ne
+       struct dst_entry *dst = skb_dst(skb);
+       struct net_device *tdev;
+       struct xfrm_state *x;
++      int pkt_len = skb->len;
+       int err = -1;
+       int mtu;
+ 
+@@ -502,7 +503,7 @@ vti6_xmit(struct sk_buff *skb, struct ne
+               struct pcpu_sw_netstats *tstats = this_cpu_ptr(dev->tstats);
+ 
+               u64_stats_update_begin(&tstats->syncp);
+-              tstats->tx_bytes += skb->len;
++              tstats->tx_bytes += pkt_len;
+               tstats->tx_packets++;
+               u64_stats_update_end(&tstats->syncp);
+       } else {
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Mon, 9 Oct 2017 07:33:30 +0000 (09:33 +0200)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Mon, 9 Oct 2017 07:33:30 +0000 (09:33 +0200)
queue-4.13/8139too-revisit-napi_complete_done-usage.patch	[new file with mode: 0644]	patch \| blob
queue-4.13/bpf-do-not-disable-enable-bh-in-bpf_map_free_id.patch	[new file with mode: 0644]	patch \| blob
queue-4.13/bpf-fix-bpf_tail_call-x64-jit.patch	[new file with mode: 0644]	patch \| blob
queue-4.13/bpf-one-perf-event-close-won-t-free-bpf-program-attached-by-another-perf-event.patch	[new file with mode: 0644]	patch \| blob
queue-4.13/bpf-verifier-reject-bpf_alu64-bpf_end.patch	[new file with mode: 0644]	patch \| blob
queue-4.13/ip6_gre-ip6gre_tap-device-should-keep-dst.patch	[new file with mode: 0644]	patch \| blob
queue-4.13/ip6_gre-skb_push-ipv6hdr-before-packing-the-header-in-ip6gre_header.patch	[new file with mode: 0644]	patch \| blob
queue-4.13/ip6_tunnel-do-not-allow-loading-ip6_tunnel-if-ipv6-is-disabled-in-cmdline.patch	[new file with mode: 0644]	patch \| blob
queue-4.13/ip6_tunnel-update-mtu-properly-for-arphrd_ether-tunnel-device-in-tx-path.patch	[new file with mode: 0644]	patch \| blob
queue-4.13/ipv4-early-demux-can-return-an-error-code.patch	[new file with mode: 0644]	patch \| blob
queue-4.13/isdn-i4l-fetch-the-ppp_write-buffer-in-one-shot.patch	[new file with mode: 0644]	patch \| blob
queue-4.13/l2tp-fix-l2tp_eth-module-loading.patch	[new file with mode: 0644]	patch \| blob
queue-4.13/l2tp-fix-race-condition-in-l2tp_tunnel_delete.patch	[new file with mode: 0644]	patch \| blob
queue-4.13/mlxsw-spectrum-fix-eeprom-access-in-case-of-sfp-sfp.patch	[new file with mode: 0644]	patch \| blob
queue-4.13/mlxsw-spectrum-prevent-mirred-related-crash-on-removal.patch	[new file with mode: 0644]	patch \| blob
queue-4.13/net-bonding-fix-tlb_dynamic_lb-default-value.patch	[new file with mode: 0644]	patch \| blob
queue-4.13/net-bonding-fix-transmit-load-balancing-in-balance-alb-mode-if-specified-by-sysfs.patch	[new file with mode: 0644]	patch \| blob
queue-4.13/net-change-skb-mac_header-when-generic-xdp-calls-adjust_head.patch	[new file with mode: 0644]	patch \| blob
queue-4.13/net-dsa-fix-network-device-registration-order.patch	[new file with mode: 0644]	patch \| blob
queue-4.13/net-dsa-mv88e6xxx-allow-dsa-and-cpu-ports-in-multiple-vlans.patch	[new file with mode: 0644]	patch \| blob
queue-4.13/net-dsa-mv88e6xxx-lock-mutex-when-freeing-irqs.patch	[new file with mode: 0644]	patch \| blob
queue-4.13/net-emac-fix-napi-poll-list-corruption.patch	[new file with mode: 0644]	patch \| blob
queue-4.13/net-ipv6-fix-regression-of-no-rtm_deladdr-sent-after-dad-failure.patch	[new file with mode: 0644]	patch \| blob
queue-4.13/net-mlx5e-ipoib-fix-access-to-invalid-memory-address.patch	[new file with mode: 0644]	patch \| blob
queue-4.13/net-phy-fix-mask-value-write-on-gmii2rgmii-converter-speed-register.patch	[new file with mode: 0644]	patch \| blob
queue-4.13/net-qcom-emac-specify-the-correct-size-when-mapping-a-dma-buffer.patch	[new file with mode: 0644]	patch \| blob
queue-4.13/net-rtnetlink-fix-info-leak-in-rtm_getstats-call.patch	[new file with mode: 0644]	patch \| blob
queue-4.13/net-sched-cls_matchall-fix-crash-when-used-with-classful-qdisc.patch	[new file with mode: 0644]	patch \| blob
queue-4.13/net-sched-fix-use-after-free-in-tcf_action_destroy-and-tcf_del_walker.patch	[new file with mode: 0644]	patch \| blob
queue-4.13/net-set-sk_prot_creator-when-cloning-sockets-to-the-right-proto.patch	[new file with mode: 0644]	patch \| blob
queue-4.13/net-stmmac-cocci-spatch-of_table.patch	[new file with mode: 0644]	patch \| blob
queue-4.13/net_sched-always-reset-qdisc-backlog-in-qdisc_reset.patch	[new file with mode: 0644]	patch \| blob
queue-4.13/net_sched-gen_estimator-fix-scaling-error-in-bytes-packets-samples.patch	[new file with mode: 0644]	patch \| blob
queue-4.13/netlink-do-not-proceed-if-dump-s-start-errs.patch	[new file with mode: 0644]	patch \| blob
queue-4.13/openvswitch-fix-an-error-handling-path-in-ovs_nla_init_match_and_action.patch	[new file with mode: 0644]	patch \| blob
queue-4.13/packet-hold-bind-lock-when-rebinding-to-fanout-hook.patch	[new file with mode: 0644]	patch \| blob
queue-4.13/packet-in-packet_do_bind-test-fanout-with-bind_lock-held.patch	[new file with mode: 0644]	patch \| blob
queue-4.13/packet-only-test-po-has_vnet_hdr-once-in-packet_snd.patch	[new file with mode: 0644]	patch \| blob
queue-4.13/sctp-potential-read-out-of-bounds-in-sctp_ulpevent_type_enabled.patch	[new file with mode: 0644]	patch \| blob
queue-4.13/series		patch \| blob \| blame \| history
queue-4.13/socket-bpf-fix-possible-use-after-free.patch	[new file with mode: 0644]	patch \| blob
queue-4.13/tcp-fastopen-fix-on-syn-data-transmit-failure.patch	[new file with mode: 0644]	patch \| blob
queue-4.13/tcp-fix-data-delivery-rate.patch	[new file with mode: 0644]	patch \| blob
queue-4.13/tcp-update-skb-skb_mstamp-more-carefully.patch	[new file with mode: 0644]	patch \| blob
queue-4.13/tipc-use-only-positive-error-codes-in-messages.patch	[new file with mode: 0644]	patch \| blob
queue-4.13/tun-bail-out-from-tun_get_user-if-the-skb-is-empty.patch	[new file with mode: 0644]	patch \| blob
queue-4.13/udp-perform-source-validation-for-mcast-early-demux.patch	[new file with mode: 0644]	patch \| blob
queue-4.13/udpv6-fix-the-checksum-computation-when-hw-checksum-does-not-apply.patch	[new file with mode: 0644]	patch \| blob
queue-4.13/vti-fix-use-after-free-in-vti_tunnel_xmit-vti6_tnl_xmit.patch	[new file with mode: 0644]	patch \| blob