]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
4.4-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 16 May 2016 18:25:50 +0000 (11:25 -0700)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 16 May 2016 18:25:50 +0000 (11:25 -0700)
added patches:
atl2-disable-unimplemented-scatter-gather-feature.patch
bpf-fix-check_map_func_compatibility-logic.patch
bpf-fix-double-fdput-in-replace_map_fd_with_map_ptr.patch
bpf-fix-refcnt-overflow.patch
bpf-verifier-reject-invalid-ld_abs-bpf_dw-instruction.patch
bridge-fix-igmp-mld-query-parsing.patch
cdc_mbim-apply-ndp-to-end-quirk-to-all-huawei-devices.patch
decnet-do-not-build-routes-to-devices-without-decnet-private-data.patch
drm-i915-bail-out-of-pipe-config-compute-loop-on-lpt.patch
drm-i915-bdw-add-missing-delay-during-l3-sqc-credit-programming.patch
drm-radeon-fix-dp-link-training-issue-with-second-4k-monitor.patch
drm-radeon-fix-pll-sharing-on-dce6.1-v2.patch
gre-do-not-pull-header-in-icmp-error-processing.patch
ipv4-fib-don-t-warn-when-primary-address-is-missing-if-in_dev-is-dead.patch
net-bridge-fix-old-ioctl-unlocked-net-device-walk.patch
net-fec-only-clear-a-queue-s-work-bit-if-the-queue-was-emptied.patch
net-fix-a-kernel-infoleak-in-x25-module.patch
net-fix-infoleak-in-llc.patch
net-fix-infoleak-in-rtnetlink.patch
net-implement-net_dbg_ratelimited-for-config_dynamic_debug-case.patch
net-mlx4_en-fix-endianness-bug-in-ipv6-csum-calculation.patch
net-mlx4_en-fix-spurious-timestamping-callbacks.patch
net-mlx5e-device-s-mtu-field-is-u16-and-not-int.patch
net-mlx5e-fix-minimum-mtu.patch
net-route-enforce-hoplimit-max-value.patch
net-sched-do-not-requeue-a-null-skb.patch
net-thunderx-avoid-exposing-kernel-stack.patch
net-use-skb_postpush_rcsum-instead-of-own-implementations.patch
net_sched-introduce-qdisc_replace-helper.patch
net_sched-update-hierarchical-backlog-too.patch
netem-segment-gso-packets-on-enqueue.patch
openvswitch-use-flow-protocol-when-recalculating-ipv6-checksums.patch
packet-fix-heap-info-leak-in-packet_diag_mclist-sock_diag-interface.patch
route-do-not-cache-fib-route-info-on-local-routes-with-oif.patch
samples-bpf-fix-trace_output-example.patch
sch_dsmark-update-backlog-as-well.patch
sch_htb-update-backlog-as-well.patch
soreuseport-fix-ordering-for-mixed-v4-v6-sockets.patch
tcp-refresh-skb-timestamp-at-retransmit-time.patch
uapi-glibc-compat-fix-compile-errors-when-glibc-net-if.h-included-before-linux-if.h-mime-version-1.0.patch
vlan-pull-on-__vlan_insert_tag-error-path-and-fix-csum-correction.patch
vsock-do-not-disconnect-socket-when-peer-has-shutdown-send-only.patch

43 files changed:
queue-4.4/atl2-disable-unimplemented-scatter-gather-feature.patch [new file with mode: 0644]
queue-4.4/bpf-fix-check_map_func_compatibility-logic.patch [new file with mode: 0644]
queue-4.4/bpf-fix-double-fdput-in-replace_map_fd_with_map_ptr.patch [new file with mode: 0644]
queue-4.4/bpf-fix-refcnt-overflow.patch [new file with mode: 0644]
queue-4.4/bpf-verifier-reject-invalid-ld_abs-bpf_dw-instruction.patch [new file with mode: 0644]
queue-4.4/bridge-fix-igmp-mld-query-parsing.patch [new file with mode: 0644]
queue-4.4/cdc_mbim-apply-ndp-to-end-quirk-to-all-huawei-devices.patch [new file with mode: 0644]
queue-4.4/decnet-do-not-build-routes-to-devices-without-decnet-private-data.patch [new file with mode: 0644]
queue-4.4/drm-i915-bail-out-of-pipe-config-compute-loop-on-lpt.patch [new file with mode: 0644]
queue-4.4/drm-i915-bdw-add-missing-delay-during-l3-sqc-credit-programming.patch [new file with mode: 0644]
queue-4.4/drm-radeon-fix-dp-link-training-issue-with-second-4k-monitor.patch [new file with mode: 0644]
queue-4.4/drm-radeon-fix-pll-sharing-on-dce6.1-v2.patch [new file with mode: 0644]
queue-4.4/gre-do-not-pull-header-in-icmp-error-processing.patch [new file with mode: 0644]
queue-4.4/ipv4-fib-don-t-warn-when-primary-address-is-missing-if-in_dev-is-dead.patch [new file with mode: 0644]
queue-4.4/net-bridge-fix-old-ioctl-unlocked-net-device-walk.patch [new file with mode: 0644]
queue-4.4/net-fec-only-clear-a-queue-s-work-bit-if-the-queue-was-emptied.patch [new file with mode: 0644]
queue-4.4/net-fix-a-kernel-infoleak-in-x25-module.patch [new file with mode: 0644]
queue-4.4/net-fix-infoleak-in-llc.patch [new file with mode: 0644]
queue-4.4/net-fix-infoleak-in-rtnetlink.patch [new file with mode: 0644]
queue-4.4/net-implement-net_dbg_ratelimited-for-config_dynamic_debug-case.patch [new file with mode: 0644]
queue-4.4/net-mlx4_en-fix-endianness-bug-in-ipv6-csum-calculation.patch [new file with mode: 0644]
queue-4.4/net-mlx4_en-fix-spurious-timestamping-callbacks.patch [new file with mode: 0644]
queue-4.4/net-mlx5e-device-s-mtu-field-is-u16-and-not-int.patch [new file with mode: 0644]
queue-4.4/net-mlx5e-fix-minimum-mtu.patch [new file with mode: 0644]
queue-4.4/net-route-enforce-hoplimit-max-value.patch [new file with mode: 0644]
queue-4.4/net-sched-do-not-requeue-a-null-skb.patch [new file with mode: 0644]
queue-4.4/net-thunderx-avoid-exposing-kernel-stack.patch [new file with mode: 0644]
queue-4.4/net-use-skb_postpush_rcsum-instead-of-own-implementations.patch [new file with mode: 0644]
queue-4.4/net_sched-introduce-qdisc_replace-helper.patch [new file with mode: 0644]
queue-4.4/net_sched-update-hierarchical-backlog-too.patch [new file with mode: 0644]
queue-4.4/netem-segment-gso-packets-on-enqueue.patch [new file with mode: 0644]
queue-4.4/openvswitch-use-flow-protocol-when-recalculating-ipv6-checksums.patch [new file with mode: 0644]
queue-4.4/packet-fix-heap-info-leak-in-packet_diag_mclist-sock_diag-interface.patch [new file with mode: 0644]
queue-4.4/route-do-not-cache-fib-route-info-on-local-routes-with-oif.patch [new file with mode: 0644]
queue-4.4/samples-bpf-fix-trace_output-example.patch [new file with mode: 0644]
queue-4.4/sch_dsmark-update-backlog-as-well.patch [new file with mode: 0644]
queue-4.4/sch_htb-update-backlog-as-well.patch [new file with mode: 0644]
queue-4.4/series
queue-4.4/soreuseport-fix-ordering-for-mixed-v4-v6-sockets.patch [new file with mode: 0644]
queue-4.4/tcp-refresh-skb-timestamp-at-retransmit-time.patch [new file with mode: 0644]
queue-4.4/uapi-glibc-compat-fix-compile-errors-when-glibc-net-if.h-included-before-linux-if.h-mime-version-1.0.patch [new file with mode: 0644]
queue-4.4/vlan-pull-on-__vlan_insert_tag-error-path-and-fix-csum-correction.patch [new file with mode: 0644]
queue-4.4/vsock-do-not-disconnect-socket-when-peer-has-shutdown-send-only.patch [new file with mode: 0644]

diff --git a/queue-4.4/atl2-disable-unimplemented-scatter-gather-feature.patch b/queue-4.4/atl2-disable-unimplemented-scatter-gather-feature.patch
new file mode 100644 (file)
index 0000000..444a01e
--- /dev/null
@@ -0,0 +1,40 @@
+From foo@baz Mon May 16 11:21:32 PDT 2016
+From: Ben Hutchings <ben@decadent.org.uk>
+Date: Wed, 20 Apr 2016 23:23:08 +0100
+Subject: atl2: Disable unimplemented scatter/gather feature
+
+From: Ben Hutchings <ben@decadent.org.uk>
+
+[ Upstream commit f43bfaeddc79effbf3d0fcb53ca477cca66f3db8 ]
+
+atl2 includes NETIF_F_SG in hw_features even though it has no support
+for non-linear skbs.  This bug was originally harmless since the
+driver does not claim to implement checksum offload and that used to
+be a requirement for SG.
+
+Now that SG and checksum offload are independent features, if you
+explicitly enable SG *and* use one of the rare protocols that can use
+SG without checkusm offload, this potentially leaks sensitive
+information (before you notice that it just isn't working).  Therefore
+this obscure bug has been designated CVE-2016-2117.
+
+Reported-by: Justin Yackoski <jyackoski@crypto-nite.com>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Fixes: ec5f06156423 ("net: Kill link between CSUM and SG features.")
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/atheros/atlx/atl2.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/atheros/atlx/atl2.c
++++ b/drivers/net/ethernet/atheros/atlx/atl2.c
+@@ -1412,7 +1412,7 @@ static int atl2_probe(struct pci_dev *pd
+       err = -EIO;
+-      netdev->hw_features = NETIF_F_SG | NETIF_F_HW_VLAN_CTAG_RX;
++      netdev->hw_features = NETIF_F_HW_VLAN_CTAG_RX;
+       netdev->features |= (NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX);
+       /* Init PHY as early as possible due to power saving issue  */
diff --git a/queue-4.4/bpf-fix-check_map_func_compatibility-logic.patch b/queue-4.4/bpf-fix-check_map_func_compatibility-logic.patch
new file mode 100644 (file)
index 0000000..dc26b2c
--- /dev/null
@@ -0,0 +1,101 @@
+From foo@baz Mon May 16 11:21:32 PDT 2016
+From: Alexei Starovoitov <ast@fb.com>
+Date: Wed, 27 Apr 2016 18:56:21 -0700
+Subject: bpf: fix check_map_func_compatibility logic
+
+From: Alexei Starovoitov <ast@fb.com>
+
+[ Upstream commit 6aff67c85c9e5a4bc99e5211c1bac547936626ca ]
+
+The commit 35578d798400 ("bpf: Implement function bpf_perf_event_read() that get the selected hardware PMU conuter")
+introduced clever way to check bpf_helper<->map_type compatibility.
+Later on commit a43eec304259 ("bpf: introduce bpf_perf_event_output() helper") adjusted
+the logic and inadvertently broke it.
+Get rid of the clever bool compare and go back to two-way check
+from map and from helper perspective.
+
+Fixes: a43eec304259 ("bpf: introduce bpf_perf_event_output() helper")
+Reported-by: Jann Horn <jannh@google.com>
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/bpf/verifier.c |   53 ++++++++++++++++++++++++++++++--------------------
+ 1 file changed, 32 insertions(+), 21 deletions(-)
+
+--- a/kernel/bpf/verifier.c
++++ b/kernel/bpf/verifier.c
+@@ -239,15 +239,6 @@ static const char * const reg_type_str[]
+       [CONST_IMM]             = "imm",
+ };
+-static const struct {
+-      int map_type;
+-      int func_id;
+-} func_limit[] = {
+-      {BPF_MAP_TYPE_PROG_ARRAY, BPF_FUNC_tail_call},
+-      {BPF_MAP_TYPE_PERF_EVENT_ARRAY, BPF_FUNC_perf_event_read},
+-      {BPF_MAP_TYPE_PERF_EVENT_ARRAY, BPF_FUNC_perf_event_output},
+-};
+-
+ static void print_verifier_state(struct verifier_env *env)
+ {
+       enum bpf_reg_type t;
+@@ -898,24 +889,44 @@ static int check_func_arg(struct verifie
+ static int check_map_func_compatibility(struct bpf_map *map, int func_id)
+ {
+-      bool bool_map, bool_func;
+-      int i;
+-
+       if (!map)
+               return 0;
+-      for (i = 0; i < ARRAY_SIZE(func_limit); i++) {
+-              bool_map = (map->map_type == func_limit[i].map_type);
+-              bool_func = (func_id == func_limit[i].func_id);
+-              /* only when map & func pair match it can continue.
+-               * don't allow any other map type to be passed into
+-               * the special func;
+-               */
+-              if (bool_func && bool_map != bool_func)
+-                      return -EINVAL;
++      /* We need a two way check, first is from map perspective ... */
++      switch (map->map_type) {
++      case BPF_MAP_TYPE_PROG_ARRAY:
++              if (func_id != BPF_FUNC_tail_call)
++                      goto error;
++              break;
++      case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
++              if (func_id != BPF_FUNC_perf_event_read &&
++                  func_id != BPF_FUNC_perf_event_output)
++                      goto error;
++              break;
++      default:
++              break;
++      }
++
++      /* ... and second from the function itself. */
++      switch (func_id) {
++      case BPF_FUNC_tail_call:
++              if (map->map_type != BPF_MAP_TYPE_PROG_ARRAY)
++                      goto error;
++              break;
++      case BPF_FUNC_perf_event_read:
++      case BPF_FUNC_perf_event_output:
++              if (map->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY)
++                      goto error;
++              break;
++      default:
++              break;
+       }
+       return 0;
++error:
++      verbose("cannot pass map_type %d into func %d\n",
++              map->map_type, func_id);
++      return -EINVAL;
+ }
+ static int check_call(struct verifier_env *env, int func_id)
diff --git a/queue-4.4/bpf-fix-double-fdput-in-replace_map_fd_with_map_ptr.patch b/queue-4.4/bpf-fix-double-fdput-in-replace_map_fd_with_map_ptr.patch
new file mode 100644 (file)
index 0000000..f79a8c4
--- /dev/null
@@ -0,0 +1,46 @@
+From foo@baz Mon May 16 11:21:32 PDT 2016
+From: Jann Horn <jannh@google.com>
+Date: Tue, 26 Apr 2016 22:26:26 +0200
+Subject: bpf: fix double-fdput in replace_map_fd_with_map_ptr()
+
+From: Jann Horn <jannh@google.com>
+
+[ Upstream commit 8358b02bf67d3a5d8a825070e1aa73f25fb2e4c7 ]
+
+When bpf(BPF_PROG_LOAD, ...) was invoked with a BPF program whose bytecode
+references a non-map file descriptor as a map file descriptor, the error
+handling code called fdput() twice instead of once (in __bpf_map_get() and
+in replace_map_fd_with_map_ptr()). If the file descriptor table of the
+current task is shared, this causes f_count to be decremented too much,
+allowing the struct file to be freed while it is still in use
+(use-after-free). This can be exploited to gain root privileges by an
+unprivileged user.
+
+This bug was introduced in
+commit 0246e64d9a5f ("bpf: handle pseudo BPF_LD_IMM64 insn"), but is only
+exploitable since
+commit 1be7f75d1668 ("bpf: enable non-root eBPF programs") because
+previously, CAP_SYS_ADMIN was required to reach the vulnerable code.
+
+(posted publicly according to request by maintainer)
+
+Signed-off-by: Jann Horn <jannh@google.com>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Acked-by: Alexei Starovoitov <ast@kernel.org>
+Acked-by: Daniel Borkmann <daniel@iogearbox.net>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/bpf/verifier.c |    1 -
+ 1 file changed, 1 deletion(-)
+
+--- a/kernel/bpf/verifier.c
++++ b/kernel/bpf/verifier.c
+@@ -2004,7 +2004,6 @@ static int replace_map_fd_with_map_ptr(s
+                       if (IS_ERR(map)) {
+                               verbose("fd %d is not pointing to valid bpf_map\n",
+                                       insn->imm);
+-                              fdput(f);
+                               return PTR_ERR(map);
+                       }
diff --git a/queue-4.4/bpf-fix-refcnt-overflow.patch b/queue-4.4/bpf-fix-refcnt-overflow.patch
new file mode 100644 (file)
index 0000000..e69c7fc
--- /dev/null
@@ -0,0 +1,152 @@
+From foo@baz Mon May 16 11:21:32 PDT 2016
+From: Alexei Starovoitov <ast@fb.com>
+Date: Wed, 27 Apr 2016 18:56:20 -0700
+Subject: bpf: fix refcnt overflow
+
+From: Alexei Starovoitov <ast@fb.com>
+
+[ Upstream commit 92117d8443bc5afacc8d5ba82e541946310f106e ]
+
+On a system with >32Gbyte of phyiscal memory and infinite RLIMIT_MEMLOCK,
+the malicious application may overflow 32-bit bpf program refcnt.
+It's also possible to overflow map refcnt on 1Tb system.
+Impose 32k hard limit which means that the same bpf program or
+map cannot be shared by more than 32k processes.
+
+Fixes: 1be7f75d1668 ("bpf: enable non-root eBPF programs")
+Reported-by: Jann Horn <jannh@google.com>
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Acked-by: Daniel Borkmann <daniel@iogearbox.net>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/bpf.h   |    3 ++-
+ kernel/bpf/inode.c    |    7 ++++---
+ kernel/bpf/syscall.c  |   24 ++++++++++++++++++++----
+ kernel/bpf/verifier.c |   11 +++++++----
+ 4 files changed, 33 insertions(+), 12 deletions(-)
+
+--- a/include/linux/bpf.h
++++ b/include/linux/bpf.h
+@@ -165,12 +165,13 @@ void bpf_register_prog_type(struct bpf_p
+ void bpf_register_map_type(struct bpf_map_type_list *tl);
+ struct bpf_prog *bpf_prog_get(u32 ufd);
++struct bpf_prog *bpf_prog_inc(struct bpf_prog *prog);
+ void bpf_prog_put(struct bpf_prog *prog);
+ void bpf_prog_put_rcu(struct bpf_prog *prog);
+ struct bpf_map *bpf_map_get_with_uref(u32 ufd);
+ struct bpf_map *__bpf_map_get(struct fd f);
+-void bpf_map_inc(struct bpf_map *map, bool uref);
++struct bpf_map *bpf_map_inc(struct bpf_map *map, bool uref);
+ void bpf_map_put_with_uref(struct bpf_map *map);
+ void bpf_map_put(struct bpf_map *map);
+--- a/kernel/bpf/inode.c
++++ b/kernel/bpf/inode.c
+@@ -31,10 +31,10 @@ static void *bpf_any_get(void *raw, enum
+ {
+       switch (type) {
+       case BPF_TYPE_PROG:
+-              atomic_inc(&((struct bpf_prog *)raw)->aux->refcnt);
++              raw = bpf_prog_inc(raw);
+               break;
+       case BPF_TYPE_MAP:
+-              bpf_map_inc(raw, true);
++              raw = bpf_map_inc(raw, true);
+               break;
+       default:
+               WARN_ON_ONCE(1);
+@@ -277,7 +277,8 @@ static void *bpf_obj_do_get(const struct
+               goto out;
+       raw = bpf_any_get(inode->i_private, *type);
+-      touch_atime(&path);
++      if (!IS_ERR(raw))
++              touch_atime(&path);
+       path_put(&path);
+       return raw;
+--- a/kernel/bpf/syscall.c
++++ b/kernel/bpf/syscall.c
+@@ -181,11 +181,18 @@ struct bpf_map *__bpf_map_get(struct fd
+       return f.file->private_data;
+ }
+-void bpf_map_inc(struct bpf_map *map, bool uref)
++/* prog's and map's refcnt limit */
++#define BPF_MAX_REFCNT 32768
++
++struct bpf_map *bpf_map_inc(struct bpf_map *map, bool uref)
+ {
+-      atomic_inc(&map->refcnt);
++      if (atomic_inc_return(&map->refcnt) > BPF_MAX_REFCNT) {
++              atomic_dec(&map->refcnt);
++              return ERR_PTR(-EBUSY);
++      }
+       if (uref)
+               atomic_inc(&map->usercnt);
++      return map;
+ }
+ struct bpf_map *bpf_map_get_with_uref(u32 ufd)
+@@ -197,7 +204,7 @@ struct bpf_map *bpf_map_get_with_uref(u3
+       if (IS_ERR(map))
+               return map;
+-      bpf_map_inc(map, true);
++      map = bpf_map_inc(map, true);
+       fdput(f);
+       return map;
+@@ -580,6 +587,15 @@ static struct bpf_prog *__bpf_prog_get(s
+       return f.file->private_data;
+ }
++struct bpf_prog *bpf_prog_inc(struct bpf_prog *prog)
++{
++      if (atomic_inc_return(&prog->aux->refcnt) > BPF_MAX_REFCNT) {
++              atomic_dec(&prog->aux->refcnt);
++              return ERR_PTR(-EBUSY);
++      }
++      return prog;
++}
++
+ /* called by sockets/tracing/seccomp before attaching program to an event
+  * pairs with bpf_prog_put()
+  */
+@@ -592,7 +608,7 @@ struct bpf_prog *bpf_prog_get(u32 ufd)
+       if (IS_ERR(prog))
+               return prog;
+-      atomic_inc(&prog->aux->refcnt);
++      prog = bpf_prog_inc(prog);
+       fdput(f);
+       return prog;
+--- a/kernel/bpf/verifier.c
++++ b/kernel/bpf/verifier.c
+@@ -2023,15 +2023,18 @@ static int replace_map_fd_with_map_ptr(s
+                               return -E2BIG;
+                       }
+-                      /* remember this map */
+-                      env->used_maps[env->used_map_cnt++] = map;
+-
+                       /* hold the map. If the program is rejected by verifier,
+                        * the map will be released by release_maps() or it
+                        * will be used by the valid program until it's unloaded
+                        * and all maps are released in free_bpf_prog_info()
+                        */
+-                      bpf_map_inc(map, false);
++                      map = bpf_map_inc(map, false);
++                      if (IS_ERR(map)) {
++                              fdput(f);
++                              return PTR_ERR(map);
++                      }
++                      env->used_maps[env->used_map_cnt++] = map;
++
+                       fdput(f);
+ next_insn:
+                       insn++;
diff --git a/queue-4.4/bpf-verifier-reject-invalid-ld_abs-bpf_dw-instruction.patch b/queue-4.4/bpf-verifier-reject-invalid-ld_abs-bpf_dw-instruction.patch
new file mode 100644 (file)
index 0000000..04af299
--- /dev/null
@@ -0,0 +1,32 @@
+From foo@baz Mon May 16 11:21:32 PDT 2016
+From: Alexei Starovoitov <ast@fb.com>
+Date: Tue, 12 Apr 2016 10:26:19 -0700
+Subject: bpf/verifier: reject invalid LD_ABS | BPF_DW instruction
+
+From: Alexei Starovoitov <ast@fb.com>
+
+[ Upstream commit d82bccc69041a51f7b7b9b4a36db0772f4cdba21 ]
+
+verifier must check for reserved size bits in instruction opcode and
+reject BPF_LD | BPF_ABS | BPF_DW and BPF_LD | BPF_IND | BPF_DW instructions,
+otherwise interpreter will WARN_RATELIMIT on them during execution.
+
+Fixes: ddd872bc3098 ("bpf: verifier: add checks for BPF_ABS | BPF_IND instructions")
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Acked-by: Daniel Borkmann <daniel@iogearbox.net>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/bpf/verifier.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/kernel/bpf/verifier.c
++++ b/kernel/bpf/verifier.c
+@@ -1348,6 +1348,7 @@ static int check_ld_abs(struct verifier_
+       }
+       if (insn->dst_reg != BPF_REG_0 || insn->off != 0 ||
++          BPF_SIZE(insn->code) == BPF_DW ||
+           (mode == BPF_ABS && insn->src_reg != BPF_REG_0)) {
+               verbose("BPF_LD_ABS uses reserved fields\n");
+               return -EINVAL;
diff --git a/queue-4.4/bridge-fix-igmp-mld-query-parsing.patch b/queue-4.4/bridge-fix-igmp-mld-query-parsing.patch
new file mode 100644 (file)
index 0000000..c9f8cfc
--- /dev/null
@@ -0,0 +1,92 @@
+From foo@baz Mon May 16 11:21:32 PDT 2016
+From: =?UTF-8?q?Linus=20L=C3=BCssing?= <linus.luessing@c0d3.blue>
+Date: Wed, 4 May 2016 17:25:02 +0200
+Subject: bridge: fix igmp / mld query parsing
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: =?UTF-8?q?Linus=20L=C3=BCssing?= <linus.luessing@c0d3.blue>
+
+[ Upstream commit 856ce5d083e14571d051301fe3c65b32b8cbe321 ]
+
+With the newly introduced helper functions the skb pulling is hidden
+in the checksumming function - and undone before returning to the
+caller.
+
+The IGMP and MLD query parsing functions in the bridge still
+assumed that the skb is pointing to the beginning of the IGMP/MLD
+message while it is now kept at the beginning of the IPv4/6 header.
+
+If there is a querier somewhere else, then this either causes
+the multicast snooping to stay disabled even though it could be
+enabled. Or, if we have the querier enabled too, then this can
+create unnecessary IGMP / MLD query messages on the link.
+
+Fixing this by taking the offset between IP and IGMP/MLD header into
+account, too.
+
+Fixes: 9afd85c9e455 ("net: Export IGMP/MLD message validation code")
+Reported-by: Simon Wunderlich <sw@simonwunderlich.de>
+Signed-off-by: Linus Lüssing <linus.luessing@c0d3.blue>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/bridge/br_multicast.c |   12 +++++++-----
+ 1 file changed, 7 insertions(+), 5 deletions(-)
+
+--- a/net/bridge/br_multicast.c
++++ b/net/bridge/br_multicast.c
+@@ -1270,6 +1270,7 @@ static int br_ip4_multicast_query(struct
+       struct br_ip saddr;
+       unsigned long max_delay;
+       unsigned long now = jiffies;
++      unsigned int offset = skb_transport_offset(skb);
+       __be32 group;
+       int err = 0;
+@@ -1280,14 +1281,14 @@ static int br_ip4_multicast_query(struct
+       group = ih->group;
+-      if (skb->len == sizeof(*ih)) {
++      if (skb->len == offset + sizeof(*ih)) {
+               max_delay = ih->code * (HZ / IGMP_TIMER_SCALE);
+               if (!max_delay) {
+                       max_delay = 10 * HZ;
+                       group = 0;
+               }
+-      } else if (skb->len >= sizeof(*ih3)) {
++      } else if (skb->len >= offset + sizeof(*ih3)) {
+               ih3 = igmpv3_query_hdr(skb);
+               if (ih3->nsrcs)
+                       goto out;
+@@ -1348,6 +1349,7 @@ static int br_ip6_multicast_query(struct
+       struct br_ip saddr;
+       unsigned long max_delay;
+       unsigned long now = jiffies;
++      unsigned int offset = skb_transport_offset(skb);
+       const struct in6_addr *group = NULL;
+       bool is_general_query;
+       int err = 0;
+@@ -1357,8 +1359,8 @@ static int br_ip6_multicast_query(struct
+           (port && port->state == BR_STATE_DISABLED))
+               goto out;
+-      if (skb->len == sizeof(*mld)) {
+-              if (!pskb_may_pull(skb, sizeof(*mld))) {
++      if (skb->len == offset + sizeof(*mld)) {
++              if (!pskb_may_pull(skb, offset + sizeof(*mld))) {
+                       err = -EINVAL;
+                       goto out;
+               }
+@@ -1367,7 +1369,7 @@ static int br_ip6_multicast_query(struct
+               if (max_delay)
+                       group = &mld->mld_mca;
+       } else {
+-              if (!pskb_may_pull(skb, sizeof(*mld2q))) {
++              if (!pskb_may_pull(skb, offset + sizeof(*mld2q))) {
+                       err = -EINVAL;
+                       goto out;
+               }
diff --git a/queue-4.4/cdc_mbim-apply-ndp-to-end-quirk-to-all-huawei-devices.patch b/queue-4.4/cdc_mbim-apply-ndp-to-end-quirk-to-all-huawei-devices.patch
new file mode 100644 (file)
index 0000000..e7b2952
--- /dev/null
@@ -0,0 +1,52 @@
+From foo@baz Mon May 16 11:21:32 PDT 2016
+From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= <bjorn@mork.no>
+Date: Tue, 12 Apr 2016 16:11:12 +0200
+Subject: cdc_mbim: apply "NDP to end" quirk to all Huawei devices
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= <bjorn@mork.no>
+
+[ Upstream commit c5b5343cfbc9f46af65033fa4f407d7b7d98371d ]
+
+We now have a positive report of another Huawei device needing
+this quirk: The ME906s-158 (12d1:15c1).  This is an m.2 form
+factor modem with no obvious relationship to the E3372 (12d1:157d)
+we already have a quirk entry for.  This is reason enough to
+believe the quirk might be necessary for any number of current
+and future Huawei devices.
+
+Applying the quirk to all Huawei devices, since it is crucial
+to any device affected by the firmware bug, while the impact
+on non-affected devices is negligible.
+
+The quirk can if necessary be disabled per-device by writing
+N to /sys/class/net/<iface>/cdc_ncm/ndp_to_end
+
+Reported-by: Andreas Fett <andreas.fett@secunet.com>
+Signed-off-by: Bjørn Mork <bjorn@mork.no>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/usb/cdc_mbim.c |    9 +++++++--
+ 1 file changed, 7 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/usb/cdc_mbim.c
++++ b/drivers/net/usb/cdc_mbim.c
+@@ -617,8 +617,13 @@ static const struct usb_device_id mbim_d
+       { USB_VENDOR_AND_INTERFACE_INFO(0x0bdb, USB_CLASS_COMM, USB_CDC_SUBCLASS_MBIM, USB_CDC_PROTO_NONE),
+         .driver_info = (unsigned long)&cdc_mbim_info,
+       },
+-      /* Huawei E3372 fails unless NDP comes after the IP packets */
+-      { USB_DEVICE_AND_INTERFACE_INFO(0x12d1, 0x157d, USB_CLASS_COMM, USB_CDC_SUBCLASS_MBIM, USB_CDC_PROTO_NONE),
++
++      /* Some Huawei devices, ME906s-158 (12d1:15c1) and E3372
++       * (12d1:157d), are known to fail unless the NDP is placed
++       * after the IP packets.  Applying the quirk to all Huawei
++       * devices is broader than necessary, but harmless.
++       */
++      { USB_VENDOR_AND_INTERFACE_INFO(0x12d1, USB_CLASS_COMM, USB_CDC_SUBCLASS_MBIM, USB_CDC_PROTO_NONE),
+         .driver_info = (unsigned long)&cdc_mbim_info_ndp_to_end,
+       },
+       /* default entry */
diff --git a/queue-4.4/decnet-do-not-build-routes-to-devices-without-decnet-private-data.patch b/queue-4.4/decnet-do-not-build-routes-to-devices-without-decnet-private-data.patch
new file mode 100644 (file)
index 0000000..de557b2
--- /dev/null
@@ -0,0 +1,53 @@
+From foo@baz Mon May 16 11:21:32 PDT 2016
+From: "David S. Miller" <davem@davemloft.net>
+Date: Sun, 10 Apr 2016 23:01:30 -0400
+Subject: decnet: Do not build routes to devices without decnet private data.
+
+From: "David S. Miller" <davem@davemloft.net>
+
+[ Upstream commit a36a0d4008488fa545c74445d69eaf56377d5d4e ]
+
+In particular, make sure we check for decnet private presence
+for loopback devices.
+
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/decnet/dn_route.c |    9 ++++++++-
+ 1 file changed, 8 insertions(+), 1 deletion(-)
+
+--- a/net/decnet/dn_route.c
++++ b/net/decnet/dn_route.c
+@@ -1034,10 +1034,13 @@ source_ok:
+       if (!fld.daddr) {
+               fld.daddr = fld.saddr;
+-              err = -EADDRNOTAVAIL;
+               if (dev_out)
+                       dev_put(dev_out);
++              err = -EINVAL;
+               dev_out = init_net.loopback_dev;
++              if (!dev_out->dn_ptr)
++                      goto out;
++              err = -EADDRNOTAVAIL;
+               dev_hold(dev_out);
+               if (!fld.daddr) {
+                       fld.daddr =
+@@ -1110,6 +1113,8 @@ source_ok:
+               if (dev_out == NULL)
+                       goto out;
+               dn_db = rcu_dereference_raw(dev_out->dn_ptr);
++              if (!dn_db)
++                      goto e_inval;
+               /* Possible improvement - check all devices for local addr */
+               if (dn_dev_islocal(dev_out, fld.daddr)) {
+                       dev_put(dev_out);
+@@ -1151,6 +1156,8 @@ select_source:
+                       dev_put(dev_out);
+               dev_out = init_net.loopback_dev;
+               dev_hold(dev_out);
++              if (!dev_out->dn_ptr)
++                      goto e_inval;
+               fld.flowidn_oif = dev_out->ifindex;
+               if (res.fi)
+                       dn_fib_info_put(res.fi);
diff --git a/queue-4.4/drm-i915-bail-out-of-pipe-config-compute-loop-on-lpt.patch b/queue-4.4/drm-i915-bail-out-of-pipe-config-compute-loop-on-lpt.patch
new file mode 100644 (file)
index 0000000..fb37cea
--- /dev/null
@@ -0,0 +1,48 @@
+From 2700818ac9f935d8590715eecd7e8cadbca552b6 Mon Sep 17 00:00:00 2001
+From: Daniel Vetter <daniel.vetter@ffwll.ch>
+Date: Tue, 3 May 2016 10:33:01 +0200
+Subject: drm/i915: Bail out of pipe config compute loop on LPT
+
+From: Daniel Vetter <daniel.vetter@ffwll.ch>
+
+commit 2700818ac9f935d8590715eecd7e8cadbca552b6 upstream.
+
+LPT is pch, so might run into the fdi bandwidth constraint (especially
+since it has only 2 lanes). But right now we just force pipe_bpp back
+to 24, resulting in a nice loop (which we bail out with a loud
+WARN_ON). Fix this.
+
+Cc: Chris Wilson <chris@chris-wilson.co.uk>
+Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
+References: https://bugs.freedesktop.org/show_bug.cgi?id=93477
+Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
+Tested-by: Chris Wilson <chris@chris-wilson.co.uk>
+Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
+Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
+Link: http://patchwork.freedesktop.org/patch/msgid/1462264381-7573-1-git-send-email-daniel.vetter@ffwll.ch
+(cherry picked from commit f58a1acc7e4a1f37d26124ce4c875c647fbcc61f)
+Signed-off-by: Jani Nikula <jani.nikula@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/gpu/drm/i915/intel_crt.c |    8 +++++++-
+ 1 file changed, 7 insertions(+), 1 deletion(-)
+
+--- a/drivers/gpu/drm/i915/intel_crt.c
++++ b/drivers/gpu/drm/i915/intel_crt.c
+@@ -248,8 +248,14 @@ static bool intel_crt_compute_config(str
+               pipe_config->has_pch_encoder = true;
+       /* LPT FDI RX only supports 8bpc. */
+-      if (HAS_PCH_LPT(dev))
++      if (HAS_PCH_LPT(dev)) {
++              if (pipe_config->bw_constrained && pipe_config->pipe_bpp < 24) {
++                      DRM_DEBUG_KMS("LPT only supports 24bpp\n");
++                      return false;
++              }
++
+               pipe_config->pipe_bpp = 24;
++      }
+       /* FDI must always be 2.7 GHz */
+       if (HAS_DDI(dev)) {
diff --git a/queue-4.4/drm-i915-bdw-add-missing-delay-during-l3-sqc-credit-programming.patch b/queue-4.4/drm-i915-bdw-add-missing-delay-during-l3-sqc-credit-programming.patch
new file mode 100644 (file)
index 0000000..01d869b
--- /dev/null
@@ -0,0 +1,42 @@
+From d6a862fe8c48229ba342648bcd535b2404724603 Mon Sep 17 00:00:00 2001
+From: Imre Deak <imre.deak@intel.com>
+Date: Tue, 3 May 2016 15:54:19 +0300
+Subject: drm/i915/bdw: Add missing delay during L3 SQC credit programming
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Imre Deak <imre.deak@intel.com>
+
+commit d6a862fe8c48229ba342648bcd535b2404724603 upstream.
+
+BSpec requires us to wait ~100 clocks before re-enabling clock gating,
+so make sure we do this.
+
+CC: Ville Syrjälä <ville.syrjala@linux.intel.com>
+Signed-off-by: Imre Deak <imre.deak@intel.com>
+Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
+Link: http://patchwork.freedesktop.org/patch/msgid/1462280061-1457-2-git-send-email-imre.deak@intel.com
+(cherry picked from commit 48e5d68d28f00c0cadac5a830980ff3222781abb)
+Signed-off-by: Jani Nikula <jani.nikula@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/gpu/drm/i915/intel_pm.c |    6 ++++++
+ 1 file changed, 6 insertions(+)
+
+--- a/drivers/gpu/drm/i915/intel_pm.c
++++ b/drivers/gpu/drm/i915/intel_pm.c
+@@ -6620,6 +6620,12 @@ static void broadwell_init_clock_gating(
+       misccpctl = I915_READ(GEN7_MISCCPCTL);
+       I915_WRITE(GEN7_MISCCPCTL, misccpctl & ~GEN7_DOP_CLOCK_GATE_ENABLE);
+       I915_WRITE(GEN8_L3SQCREG1, BDW_WA_L3SQCREG1_DEFAULT);
++      /*
++       * Wait at least 100 clocks before re-enabling clock gating. See
++       * the definition of L3SQCREG1 in BSpec.
++       */
++      POSTING_READ(GEN8_L3SQCREG1);
++      udelay(1);
+       I915_WRITE(GEN7_MISCCPCTL, misccpctl);
+       /*
diff --git a/queue-4.4/drm-radeon-fix-dp-link-training-issue-with-second-4k-monitor.patch b/queue-4.4/drm-radeon-fix-dp-link-training-issue-with-second-4k-monitor.patch
new file mode 100644 (file)
index 0000000..fb67089
--- /dev/null
@@ -0,0 +1,42 @@
+From 1a738347df2ee4977459a8776fe2c62196bdcb1b Mon Sep 17 00:00:00 2001
+From: Arindam Nath <arindam.nath@amd.com>
+Date: Wed, 4 May 2016 23:39:59 +0530
+Subject: drm/radeon: fix DP link training issue with second 4K monitor
+
+From: Arindam Nath <arindam.nath@amd.com>
+
+commit 1a738347df2ee4977459a8776fe2c62196bdcb1b upstream.
+
+There is an issue observed when we hotplug a second DP
+4K monitor to the system. Sometimes, the link training
+fails for the second monitor after HPD interrupt
+generation.
+
+The issue happens when some queued or deferred transactions
+are already present on the AUX channel when we initiate
+a new transcation to (say) get DPCD or during link training.
+
+We set AUX_IGNORE_HPD_DISCON bit in the AUX_CONTROL
+register so that we can ignore any such deferred
+transactions when a new AUX transaction is initiated.
+
+Signed-off-by: Arindam Nath <arindam.nath@amd.com>
+Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/gpu/drm/radeon/radeon_dp_auxch.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/gpu/drm/radeon/radeon_dp_auxch.c
++++ b/drivers/gpu/drm/radeon/radeon_dp_auxch.c
+@@ -105,7 +105,7 @@ radeon_dp_aux_transfer_native(struct drm
+       tmp &= AUX_HPD_SEL(0x7);
+       tmp |= AUX_HPD_SEL(chan->rec.hpd);
+-      tmp |= AUX_EN | AUX_LS_READ_EN;
++      tmp |= AUX_EN | AUX_LS_READ_EN | AUX_HPD_DISCON(0x1);
+       WREG32(AUX_CONTROL + aux_offset[instance], tmp);
diff --git a/queue-4.4/drm-radeon-fix-pll-sharing-on-dce6.1-v2.patch b/queue-4.4/drm-radeon-fix-pll-sharing-on-dce6.1-v2.patch
new file mode 100644 (file)
index 0000000..8d5d3d1
--- /dev/null
@@ -0,0 +1,71 @@
+From e3c00d87845ab375f90fa6e10a5e72a3a5778cd3 Mon Sep 17 00:00:00 2001
+From: Lucas Stach <dev@lynxeye.de>
+Date: Thu, 5 May 2016 10:16:44 -0400
+Subject: drm/radeon: fix PLL sharing on DCE6.1 (v2)
+
+From: Lucas Stach <dev@lynxeye.de>
+
+commit e3c00d87845ab375f90fa6e10a5e72a3a5778cd3 upstream.
+
+On DCE6.1 PPLL2 is exclusively available to UNIPHYA, so it should not
+be taken into consideration when looking for an already enabled PLL
+to be shared with other outputs.
+
+This fixes the broken VGA port (TRAVIS DP->VGA bridge) on my Richland
+based laptop, where the internal display is connected to UNIPHYA through
+a TRAVIS DP->LVDS bridge.
+
+Bug:
+https://bugs.freedesktop.org/show_bug.cgi?id=78987
+
+v2: agd: add check in radeon_get_shared_nondp_ppll as well, drop
+    extra parameter.
+
+Signed-off-by: Lucas Stach <dev@lynxeye.de>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/gpu/drm/radeon/atombios_crtc.c |   10 ++++++++++
+ 1 file changed, 10 insertions(+)
+
+--- a/drivers/gpu/drm/radeon/atombios_crtc.c
++++ b/drivers/gpu/drm/radeon/atombios_crtc.c
+@@ -1739,6 +1739,7 @@ static u32 radeon_get_pll_use_mask(struc
+ static int radeon_get_shared_dp_ppll(struct drm_crtc *crtc)
+ {
+       struct drm_device *dev = crtc->dev;
++      struct radeon_device *rdev = dev->dev_private;
+       struct drm_crtc *test_crtc;
+       struct radeon_crtc *test_radeon_crtc;
+@@ -1748,6 +1749,10 @@ static int radeon_get_shared_dp_ppll(str
+               test_radeon_crtc = to_radeon_crtc(test_crtc);
+               if (test_radeon_crtc->encoder &&
+                   ENCODER_MODE_IS_DP(atombios_get_encoder_mode(test_radeon_crtc->encoder))) {
++                      /* PPLL2 is exclusive to UNIPHYA on DCE61 */
++                      if (ASIC_IS_DCE61(rdev) && !ASIC_IS_DCE8(rdev) &&
++                          test_radeon_crtc->pll_id == ATOM_PPLL2)
++                              continue;
+                       /* for DP use the same PLL for all */
+                       if (test_radeon_crtc->pll_id != ATOM_PPLL_INVALID)
+                               return test_radeon_crtc->pll_id;
+@@ -1769,6 +1774,7 @@ static int radeon_get_shared_nondp_ppll(
+ {
+       struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc);
+       struct drm_device *dev = crtc->dev;
++      struct radeon_device *rdev = dev->dev_private;
+       struct drm_crtc *test_crtc;
+       struct radeon_crtc *test_radeon_crtc;
+       u32 adjusted_clock, test_adjusted_clock;
+@@ -1784,6 +1790,10 @@ static int radeon_get_shared_nondp_ppll(
+               test_radeon_crtc = to_radeon_crtc(test_crtc);
+               if (test_radeon_crtc->encoder &&
+                   !ENCODER_MODE_IS_DP(atombios_get_encoder_mode(test_radeon_crtc->encoder))) {
++                      /* PPLL2 is exclusive to UNIPHYA on DCE61 */
++                      if (ASIC_IS_DCE61(rdev) && !ASIC_IS_DCE8(rdev) &&
++                          test_radeon_crtc->pll_id == ATOM_PPLL2)
++                              continue;
+                       /* check if we are already driving this connector with another crtc */
+                       if (test_radeon_crtc->connector == radeon_crtc->connector) {
+                               /* if we are, return that pll */
diff --git a/queue-4.4/gre-do-not-pull-header-in-icmp-error-processing.patch b/queue-4.4/gre-do-not-pull-header-in-icmp-error-processing.patch
new file mode 100644 (file)
index 0000000..2908d02
--- /dev/null
@@ -0,0 +1,73 @@
+From foo@baz Mon May 16 11:21:32 PDT 2016
+From: Jiri Benc <jbenc@redhat.com>
+Date: Fri, 29 Apr 2016 23:31:32 +0200
+Subject: gre: do not pull header in ICMP error processing
+
+From: Jiri Benc <jbenc@redhat.com>
+
+[ Upstream commit b7f8fe251e4609e2a437bd2c2dea01e61db6849c ]
+
+iptunnel_pull_header expects that IP header was already pulled; with this
+expectation, it pulls the tunnel header. This is not true in gre_err.
+Furthermore, ipv4_update_pmtu and ipv4_redirect expect that skb->data points
+to the IP header.
+
+We cannot pull the tunnel header in this path. It's just a matter of not
+calling iptunnel_pull_header - we don't need any of its effects.
+
+Fixes: bda7bb463436 ("gre: Allow multiple protocol listener for gre protocol.")
+Signed-off-by: Jiri Benc <jbenc@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/ip_gre.c |   11 ++++++++---
+ 1 file changed, 8 insertions(+), 3 deletions(-)
+
+--- a/net/ipv4/ip_gre.c
++++ b/net/ipv4/ip_gre.c
+@@ -180,6 +180,7 @@ static __be16 tnl_flags_to_gre_flags(__b
+       return flags;
+ }
++/* Fills in tpi and returns header length to be pulled. */
+ static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi,
+                           bool *csum_err)
+ {
+@@ -239,7 +240,7 @@ static int parse_gre_header(struct sk_bu
+                               return -EINVAL;
+               }
+       }
+-      return iptunnel_pull_header(skb, hdr_len, tpi->proto);
++      return hdr_len;
+ }
+ static void ipgre_err(struct sk_buff *skb, u32 info,
+@@ -342,7 +343,7 @@ static void gre_err(struct sk_buff *skb,
+       struct tnl_ptk_info tpi;
+       bool csum_err = false;
+-      if (parse_gre_header(skb, &tpi, &csum_err)) {
++      if (parse_gre_header(skb, &tpi, &csum_err) < 0) {
+               if (!csum_err)          /* ignore csum errors. */
+                       return;
+       }
+@@ -420,6 +421,7 @@ static int gre_rcv(struct sk_buff *skb)
+ {
+       struct tnl_ptk_info tpi;
+       bool csum_err = false;
++      int hdr_len;
+ #ifdef CONFIG_NET_IPGRE_BROADCAST
+       if (ipv4_is_multicast(ip_hdr(skb)->daddr)) {
+@@ -429,7 +431,10 @@ static int gre_rcv(struct sk_buff *skb)
+       }
+ #endif
+-      if (parse_gre_header(skb, &tpi, &csum_err) < 0)
++      hdr_len = parse_gre_header(skb, &tpi, &csum_err);
++      if (hdr_len < 0)
++              goto drop;
++      if (iptunnel_pull_header(skb, hdr_len, tpi.proto) < 0)
+               goto drop;
+       if (ipgre_rcv(skb, &tpi) == PACKET_RCVD)
diff --git a/queue-4.4/ipv4-fib-don-t-warn-when-primary-address-is-missing-if-in_dev-is-dead.patch b/queue-4.4/ipv4-fib-don-t-warn-when-primary-address-is-missing-if-in_dev-is-dead.patch
new file mode 100644 (file)
index 0000000..d5365bc
--- /dev/null
@@ -0,0 +1,40 @@
+From foo@baz Mon May 16 11:21:32 PDT 2016
+From: Paolo Abeni <pabeni@redhat.com>
+Date: Thu, 21 Apr 2016 22:23:31 +0200
+Subject: ipv4/fib: don't warn when primary address is missing if in_dev is dead
+
+From: Paolo Abeni <pabeni@redhat.com>
+
+[ Upstream commit 391a20333b8393ef2e13014e6e59d192c5594471 ]
+
+After commit fbd40ea0180a ("ipv4: Don't do expensive useless work
+during inetdev destroy.") when deleting an interface,
+fib_del_ifaddr() can be executed without any primary address
+present on the dead interface.
+
+The above is safe, but triggers some "bug: prim == NULL" warnings.
+
+This commit avoids warning if the in_dev is dead
+
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/fib_frontend.c |    6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+--- a/net/ipv4/fib_frontend.c
++++ b/net/ipv4/fib_frontend.c
+@@ -904,7 +904,11 @@ void fib_del_ifaddr(struct in_ifaddr *if
+       if (ifa->ifa_flags & IFA_F_SECONDARY) {
+               prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask);
+               if (!prim) {
+-                      pr_warn("%s: bug: prim == NULL\n", __func__);
++                      /* if the device has been deleted, we don't perform
++                       * address promotion
++                       */
++                      if (!in_dev->dead)
++                              pr_warn("%s: bug: prim == NULL\n", __func__);
+                       return;
+               }
+               if (iprim && iprim != prim) {
diff --git a/queue-4.4/net-bridge-fix-old-ioctl-unlocked-net-device-walk.patch b/queue-4.4/net-bridge-fix-old-ioctl-unlocked-net-device-walk.patch
new file mode 100644 (file)
index 0000000..abff2f7
--- /dev/null
@@ -0,0 +1,68 @@
+From foo@baz Mon May 16 11:21:32 PDT 2016
+From: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
+Date: Wed, 4 May 2016 16:18:45 +0200
+Subject: net: bridge: fix old ioctl unlocked net device walk
+
+From: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
+
+[ Upstream commit 31ca0458a61a502adb7ed192bf9716c6d05791a5 ]
+
+get_bridge_ifindices() is used from the old "deviceless" bridge ioctl
+calls which aren't called with rtnl held. The comment above says that it is
+called with rtnl but that is not really the case.
+Here's a sample output from a test ASSERT_RTNL() which I put in
+get_bridge_ifindices and executed "brctl show":
+[  957.422726] RTNL: assertion failed at net/bridge//br_ioctl.c (30)
+[  957.422925] CPU: 0 PID: 1862 Comm: brctl Tainted: G        W  O
+4.6.0-rc4+ #157
+[  957.423009] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996),
+BIOS 1.8.1-20150318_183358- 04/01/2014
+[  957.423009]  0000000000000000 ffff880058adfdf0 ffffffff8138dec5
+0000000000000400
+[  957.423009]  ffffffff81ce8380 ffff880058adfe58 ffffffffa05ead32
+0000000000000001
+[  957.423009]  00007ffec1a444b0 0000000000000400 ffff880053c19130
+0000000000008940
+[  957.423009] Call Trace:
+[  957.423009]  [<ffffffff8138dec5>] dump_stack+0x85/0xc0
+[  957.423009]  [<ffffffffa05ead32>]
+br_ioctl_deviceless_stub+0x212/0x2e0 [bridge]
+[  957.423009]  [<ffffffff81515beb>] sock_ioctl+0x22b/0x290
+[  957.423009]  [<ffffffff8126ba75>] do_vfs_ioctl+0x95/0x700
+[  957.423009]  [<ffffffff8126c159>] SyS_ioctl+0x79/0x90
+[  957.423009]  [<ffffffff8163a4c0>] entry_SYSCALL_64_fastpath+0x23/0xc1
+
+Since it only reads bridge ifindices, we can use rcu to safely walk the net
+device list. Also remove the wrong rtnl comment above.
+
+Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/bridge/br_ioctl.c |    5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+--- a/net/bridge/br_ioctl.c
++++ b/net/bridge/br_ioctl.c
+@@ -21,18 +21,19 @@
+ #include <asm/uaccess.h>
+ #include "br_private.h"
+-/* called with RTNL */
+ static int get_bridge_ifindices(struct net *net, int *indices, int num)
+ {
+       struct net_device *dev;
+       int i = 0;
+-      for_each_netdev(net, dev) {
++      rcu_read_lock();
++      for_each_netdev_rcu(net, dev) {
+               if (i >= num)
+                       break;
+               if (dev->priv_flags & IFF_EBRIDGE)
+                       indices[i++] = dev->ifindex;
+       }
++      rcu_read_unlock();
+       return i;
+ }
diff --git a/queue-4.4/net-fec-only-clear-a-queue-s-work-bit-if-the-queue-was-emptied.patch b/queue-4.4/net-fec-only-clear-a-queue-s-work-bit-if-the-queue-was-emptied.patch
new file mode 100644 (file)
index 0000000..07d99a7
--- /dev/null
@@ -0,0 +1,51 @@
+From foo@baz Mon May 16 11:21:32 PDT 2016
+From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
+Date: Tue, 3 May 2016 16:38:53 +0200
+Subject: net: fec: only clear a queue's work bit if the queue was emptied
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
+
+[ Upstream commit 1c021bb717a70aaeaa4b25c91f43c2aeddd922de ]
+
+In the receive path a queue's work bit was cleared unconditionally even
+if fec_enet_rx_queue only read out a part of the available packets from
+the hardware. This resulted in not reading any packets in the next napi
+turn and so packets were delayed or lost.
+
+The obvious fix is to only clear a queue's bit when the queue was
+emptied.
+
+Fixes: 4d494cdc92b3 ("net: fec: change data structure to support multiqueue")
+Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
+Reviewed-by: Lucas Stach <l.stach@pengutronix.de>
+Tested-by: Fugang Duan <fugang.duan@nxp.com>
+Acked-by: Fugang Duan <fugang.duan@nxp.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/freescale/fec_main.c |   10 ++++++++--
+ 1 file changed, 8 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/ethernet/freescale/fec_main.c
++++ b/drivers/net/ethernet/freescale/fec_main.c
+@@ -1557,9 +1557,15 @@ fec_enet_rx(struct net_device *ndev, int
+       struct fec_enet_private *fep = netdev_priv(ndev);
+       for_each_set_bit(queue_id, &fep->work_rx, FEC_ENET_MAX_RX_QS) {
+-              clear_bit(queue_id, &fep->work_rx);
+-              pkt_received += fec_enet_rx_queue(ndev,
++              int ret;
++
++              ret = fec_enet_rx_queue(ndev,
+                                       budget - pkt_received, queue_id);
++
++              if (ret < budget - pkt_received)
++                      clear_bit(queue_id, &fep->work_rx);
++
++              pkt_received += ret;
+       }
+       return pkt_received;
+ }
diff --git a/queue-4.4/net-fix-a-kernel-infoleak-in-x25-module.patch b/queue-4.4/net-fix-a-kernel-infoleak-in-x25-module.patch
new file mode 100644 (file)
index 0000000..b461685
--- /dev/null
@@ -0,0 +1,32 @@
+From foo@baz Mon May 16 11:21:32 PDT 2016
+From: Kangjie Lu <kangjielu@gmail.com>
+Date: Sun, 8 May 2016 12:10:14 -0400
+Subject: net: fix a kernel infoleak in x25 module
+
+From: Kangjie Lu <kangjielu@gmail.com>
+
+[ Upstream commit 79e48650320e6fba48369fccf13fd045315b19b8 ]
+
+Stack object "dte_facilities" is allocated in x25_rx_call_request(),
+which is supposed to be initialized in x25_negotiate_facilities.
+However, 5 fields (8 bytes in total) are not initialized. This
+object is then copied to userland via copy_to_user, thus infoleak
+occurs.
+
+Signed-off-by: Kangjie Lu <kjlu@gatech.edu>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/x25/x25_facilities.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/net/x25/x25_facilities.c
++++ b/net/x25/x25_facilities.c
+@@ -277,6 +277,7 @@ int x25_negotiate_facilities(struct sk_b
+       memset(&theirs, 0, sizeof(theirs));
+       memcpy(new, ours, sizeof(*new));
++      memset(dte, 0, sizeof(*dte));
+       len = x25_parse_facilities(skb, &theirs, dte, &x25->vc_facil_mask);
+       if (len < 0)
diff --git a/queue-4.4/net-fix-infoleak-in-llc.patch b/queue-4.4/net-fix-infoleak-in-llc.patch
new file mode 100644 (file)
index 0000000..313d87a
--- /dev/null
@@ -0,0 +1,32 @@
+From foo@baz Mon May 16 11:21:32 PDT 2016
+From: Kangjie Lu <kangjielu@gmail.com>
+Date: Tue, 3 May 2016 16:35:05 -0400
+Subject: net: fix infoleak in llc
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Kangjie Lu <kangjielu@gmail.com>
+
+[ Upstream commit b8670c09f37bdf2847cc44f36511a53afc6161fd ]
+
+The stack object “info” has a total size of 12 bytes. Its last byte
+is padding which is not initialized and leaked via “put_cmsg”.
+
+Signed-off-by: Kangjie Lu <kjlu@gatech.edu>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/llc/af_llc.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/net/llc/af_llc.c
++++ b/net/llc/af_llc.c
+@@ -626,6 +626,7 @@ static void llc_cmsg_rcv(struct msghdr *
+       if (llc->cmsg_flags & LLC_CMSG_PKTINFO) {
+               struct llc_pktinfo info;
++              memset(&info, 0, sizeof(info));
+               info.lpi_ifindex = llc_sk(skb->sk)->dev->ifindex;
+               llc_pdu_decode_dsap(skb, &info.lpi_sap);
+               llc_pdu_decode_da(skb, info.lpi_mac);
diff --git a/queue-4.4/net-fix-infoleak-in-rtnetlink.patch b/queue-4.4/net-fix-infoleak-in-rtnetlink.patch
new file mode 100644 (file)
index 0000000..42dc574
--- /dev/null
@@ -0,0 +1,50 @@
+From foo@baz Mon May 16 11:21:32 PDT 2016
+From: Kangjie Lu <kangjielu@gmail.com>
+Date: Tue, 3 May 2016 16:46:24 -0400
+Subject: net: fix infoleak in rtnetlink
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Kangjie Lu <kangjielu@gmail.com>
+
+[ Upstream commit 5f8e44741f9f216e33736ea4ec65ca9ac03036e6 ]
+
+The stack object “map” has a total size of 32 bytes. Its last 4
+bytes are padding generated by compiler. These padding bytes are
+not initialized and sent out via “nla_put”.
+
+Signed-off-by: Kangjie Lu <kjlu@gatech.edu>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/rtnetlink.c |   18 ++++++++++--------
+ 1 file changed, 10 insertions(+), 8 deletions(-)
+
+--- a/net/core/rtnetlink.c
++++ b/net/core/rtnetlink.c
+@@ -1175,14 +1175,16 @@ static noinline_for_stack int rtnl_fill_
+ static int rtnl_fill_link_ifmap(struct sk_buff *skb, struct net_device *dev)
+ {
+-      struct rtnl_link_ifmap map = {
+-              .mem_start   = dev->mem_start,
+-              .mem_end     = dev->mem_end,
+-              .base_addr   = dev->base_addr,
+-              .irq         = dev->irq,
+-              .dma         = dev->dma,
+-              .port        = dev->if_port,
+-      };
++      struct rtnl_link_ifmap map;
++
++      memset(&map, 0, sizeof(map));
++      map.mem_start   = dev->mem_start;
++      map.mem_end     = dev->mem_end;
++      map.base_addr   = dev->base_addr;
++      map.irq         = dev->irq;
++      map.dma         = dev->dma;
++      map.port        = dev->if_port;
++
+       if (nla_put(skb, IFLA_MAP, sizeof(map), &map))
+               return -EMSGSIZE;
diff --git a/queue-4.4/net-implement-net_dbg_ratelimited-for-config_dynamic_debug-case.patch b/queue-4.4/net-implement-net_dbg_ratelimited-for-config_dynamic_debug-case.patch
new file mode 100644 (file)
index 0000000..7e74d39
--- /dev/null
@@ -0,0 +1,59 @@
+From foo@baz Mon May 16 11:21:32 PDT 2016
+From: Tim Bingham <tbingham@akamai.com>
+Date: Fri, 29 Apr 2016 13:30:23 -0400
+Subject: net: Implement net_dbg_ratelimited() for CONFIG_DYNAMIC_DEBUG case
+
+From: Tim Bingham <tbingham@akamai.com>
+
+[ Upstream commit 2c94b53738549d81dc7464a32117d1f5112c64d3 ]
+
+Prior to commit d92cff89a0c8 ("net_dbg_ratelimited: turn into no-op
+when !DEBUG") the implementation of net_dbg_ratelimited() was buggy
+for both the DEBUG and CONFIG_DYNAMIC_DEBUG cases.
+
+The bug was that net_ratelimit() was being called and, despite
+returning true, nothing was being printed to the console. This
+resulted in messages like the following -
+
+"net_ratelimit: %d callbacks suppressed"
+
+with no other output nearby.
+
+After commit d92cff89a0c8 ("net_dbg_ratelimited: turn into no-op when
+!DEBUG") the bug is fixed for the DEBUG case. However, there's no
+output at all for CONFIG_DYNAMIC_DEBUG case.
+
+This patch restores debug output (if enabled) for the
+CONFIG_DYNAMIC_DEBUG case.
+
+Add a definition of net_dbg_ratelimited() for the CONFIG_DYNAMIC_DEBUG
+case. The implementation takes care to check that dynamic debugging is
+enabled before calling net_ratelimit().
+
+Fixes: d92cff89a0c8 ("net_dbg_ratelimited: turn into no-op when !DEBUG")
+Signed-off-by: Tim Bingham <tbingham@akamai.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/net.h |   10 +++++++++-
+ 1 file changed, 9 insertions(+), 1 deletion(-)
+
+--- a/include/linux/net.h
++++ b/include/linux/net.h
+@@ -245,7 +245,15 @@ do {                                                              \
+       net_ratelimited_function(pr_warn, fmt, ##__VA_ARGS__)
+ #define net_info_ratelimited(fmt, ...)                                \
+       net_ratelimited_function(pr_info, fmt, ##__VA_ARGS__)
+-#if defined(DEBUG)
++#if defined(CONFIG_DYNAMIC_DEBUG)
++#define net_dbg_ratelimited(fmt, ...)                                 \
++do {                                                                  \
++      DEFINE_DYNAMIC_DEBUG_METADATA(descriptor, fmt);                 \
++      if (unlikely(descriptor.flags & _DPRINTK_FLAGS_PRINT) &&        \
++          net_ratelimit())                                            \
++              __dynamic_pr_debug(&descriptor, fmt, ##__VA_ARGS__);    \
++} while (0)
++#elif defined(DEBUG)
+ #define net_dbg_ratelimited(fmt, ...)                         \
+       net_ratelimited_function(pr_debug, fmt, ##__VA_ARGS__)
+ #else
diff --git a/queue-4.4/net-mlx4_en-fix-endianness-bug-in-ipv6-csum-calculation.patch b/queue-4.4/net-mlx4_en-fix-endianness-bug-in-ipv6-csum-calculation.patch
new file mode 100644 (file)
index 0000000..d2687ed
--- /dev/null
@@ -0,0 +1,35 @@
+From foo@baz Mon May 16 11:21:32 PDT 2016
+From: Daniel Jurgens <danielj@mellanox.com>
+Date: Wed, 4 May 2016 15:00:33 +0300
+Subject: net/mlx4_en: Fix endianness bug in IPV6 csum calculation
+
+From: Daniel Jurgens <danielj@mellanox.com>
+
+[ Upstream commit 82d69203df634b4dfa765c94f60ce9482bcc44d6 ]
+
+Use htons instead of unconditionally byte swapping nexthdr.  On a little
+endian systems shifting the byte is correct behavior, but it results in
+incorrect csums on big endian architectures.
+
+Fixes: f8c6455bb04b ('net/mlx4_en: Extend checksum offloading by CHECKSUM COMPLETE')
+Signed-off-by: Daniel Jurgens <danielj@mellanox.com>
+Reviewed-by: Carol Soto <clsoto@us.ibm.com>
+Tested-by: Carol Soto <clsoto@us.ibm.com>
+Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx4/en_rx.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c
++++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
+@@ -704,7 +704,7 @@ static int get_fixed_ipv6_csum(__wsum hw
+       if (ipv6h->nexthdr == IPPROTO_FRAGMENT || ipv6h->nexthdr == IPPROTO_HOPOPTS)
+               return -1;
+-      hw_checksum = csum_add(hw_checksum, (__force __wsum)(ipv6h->nexthdr << 8));
++      hw_checksum = csum_add(hw_checksum, (__force __wsum)htons(ipv6h->nexthdr));
+       csum_pseudo_hdr = csum_partial(&ipv6h->saddr,
+                                      sizeof(ipv6h->saddr) + sizeof(ipv6h->daddr), 0);
diff --git a/queue-4.4/net-mlx4_en-fix-spurious-timestamping-callbacks.patch b/queue-4.4/net-mlx4_en-fix-spurious-timestamping-callbacks.patch
new file mode 100644 (file)
index 0000000..60ab209
--- /dev/null
@@ -0,0 +1,46 @@
+From foo@baz Mon May 16 11:21:32 PDT 2016
+From: Eric Dumazet <edumazet@google.com>
+Date: Sat, 23 Apr 2016 11:35:46 -0700
+Subject: net/mlx4_en: fix spurious timestamping callbacks
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit fc96256c906362e845d848d0f6a6354450059e81 ]
+
+When multiple skb are TX-completed in a row, we might incorrectly keep
+a timestamp of a prior skb and cause extra work.
+
+Fixes: ec693d47010e8 ("net/mlx4_en: Add HW timestamping (TS) support")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: Willem de Bruijn <willemb@google.com>
+Reviewed-by: Eran Ben Elisha <eranbe@mellanox.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx4/en_tx.c |    6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c
++++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
+@@ -400,7 +400,6 @@ static bool mlx4_en_process_tx_cq(struct
+       u32 packets = 0;
+       u32 bytes = 0;
+       int factor = priv->cqe_factor;
+-      u64 timestamp = 0;
+       int done = 0;
+       int budget = priv->tx_work_limit;
+       u32 last_nr_txbb;
+@@ -440,9 +439,12 @@ static bool mlx4_en_process_tx_cq(struct
+               new_index = be16_to_cpu(cqe->wqe_index) & size_mask;
+               do {
++                      u64 timestamp = 0;
++
+                       txbbs_skipped += last_nr_txbb;
+                       ring_index = (ring_index + last_nr_txbb) & size_mask;
+-                      if (ring->tx_info[ring_index].ts_requested)
++
++                      if (unlikely(ring->tx_info[ring_index].ts_requested))
+                               timestamp = mlx4_en_get_cqe_ts(cqe);
+                       /* free next descriptor */
diff --git a/queue-4.4/net-mlx5e-device-s-mtu-field-is-u16-and-not-int.patch b/queue-4.4/net-mlx5e-device-s-mtu-field-is-u16-and-not-int.patch
new file mode 100644 (file)
index 0000000..d7e7acb
--- /dev/null
@@ -0,0 +1,110 @@
+From foo@baz Mon May 16 11:21:32 PDT 2016
+From: Saeed Mahameed <saeedm@mellanox.com>
+Date: Fri, 22 Apr 2016 00:33:03 +0300
+Subject: net/mlx5e: Device's mtu field is u16 and not int
+
+From: Saeed Mahameed <saeedm@mellanox.com>
+
+[ Upstream commit 046339eaab26804f52f6604877f5674f70815b26 ]
+
+For set/query MTU port firmware commands the MTU field
+is 16 bits, here I changed all the "int mtu" parameters
+of the functions wrapping those firmware commands to be u16.
+
+Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/infiniband/hw/mlx5/main.c                 |    4 ++--
+ drivers/net/ethernet/mellanox/mlx5/core/en_main.c |    4 ++--
+ drivers/net/ethernet/mellanox/mlx5/core/port.c    |   10 +++++-----
+ include/linux/mlx5/driver.h                       |    6 +++---
+ 4 files changed, 12 insertions(+), 12 deletions(-)
+
+--- a/drivers/infiniband/hw/mlx5/main.c
++++ b/drivers/infiniband/hw/mlx5/main.c
+@@ -405,8 +405,8 @@ static int mlx5_query_hca_port(struct ib
+       struct mlx5_ib_dev *dev = to_mdev(ibdev);
+       struct mlx5_core_dev *mdev = dev->mdev;
+       struct mlx5_hca_vport_context *rep;
+-      int max_mtu;
+-      int oper_mtu;
++      u16 max_mtu;
++      u16 oper_mtu;
+       int err;
+       u8 ib_link_width_oper;
+       u8 vl_hw_cap;
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+@@ -1372,7 +1372,7 @@ static int mlx5e_set_dev_port_mtu(struct
+ {
+       struct mlx5e_priv *priv = netdev_priv(netdev);
+       struct mlx5_core_dev *mdev = priv->mdev;
+-      int hw_mtu;
++      u16 hw_mtu;
+       int err;
+       err = mlx5_set_port_mtu(mdev, MLX5E_SW2HW_MTU(netdev->mtu), 1);
+@@ -1896,7 +1896,7 @@ static int mlx5e_change_mtu(struct net_d
+       struct mlx5e_priv *priv = netdev_priv(netdev);
+       struct mlx5_core_dev *mdev = priv->mdev;
+       bool was_opened;
+-      int max_mtu;
++      u16 max_mtu;
+       int err = 0;
+       mlx5_query_port_max_mtu(mdev, &max_mtu, 1);
+--- a/drivers/net/ethernet/mellanox/mlx5/core/port.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c
+@@ -246,8 +246,8 @@ int mlx5_query_port_admin_status(struct
+ }
+ EXPORT_SYMBOL_GPL(mlx5_query_port_admin_status);
+-static void mlx5_query_port_mtu(struct mlx5_core_dev *dev, int *admin_mtu,
+-                              int *max_mtu, int *oper_mtu, u8 port)
++static void mlx5_query_port_mtu(struct mlx5_core_dev *dev, u16 *admin_mtu,
++                              u16 *max_mtu, u16 *oper_mtu, u8 port)
+ {
+       u32 in[MLX5_ST_SZ_DW(pmtu_reg)];
+       u32 out[MLX5_ST_SZ_DW(pmtu_reg)];
+@@ -267,7 +267,7 @@ static void mlx5_query_port_mtu(struct m
+               *admin_mtu = MLX5_GET(pmtu_reg, out, admin_mtu);
+ }
+-int mlx5_set_port_mtu(struct mlx5_core_dev *dev, int mtu, u8 port)
++int mlx5_set_port_mtu(struct mlx5_core_dev *dev, u16 mtu, u8 port)
+ {
+       u32 in[MLX5_ST_SZ_DW(pmtu_reg)];
+       u32 out[MLX5_ST_SZ_DW(pmtu_reg)];
+@@ -282,14 +282,14 @@ int mlx5_set_port_mtu(struct mlx5_core_d
+ }
+ EXPORT_SYMBOL_GPL(mlx5_set_port_mtu);
+-void mlx5_query_port_max_mtu(struct mlx5_core_dev *dev, int *max_mtu,
++void mlx5_query_port_max_mtu(struct mlx5_core_dev *dev, u16 *max_mtu,
+                            u8 port)
+ {
+       mlx5_query_port_mtu(dev, NULL, max_mtu, NULL, port);
+ }
+ EXPORT_SYMBOL_GPL(mlx5_query_port_max_mtu);
+-void mlx5_query_port_oper_mtu(struct mlx5_core_dev *dev, int *oper_mtu,
++void mlx5_query_port_oper_mtu(struct mlx5_core_dev *dev, u16 *oper_mtu,
+                             u8 port)
+ {
+       mlx5_query_port_mtu(dev, NULL, NULL, oper_mtu, port);
+--- a/include/linux/mlx5/driver.h
++++ b/include/linux/mlx5/driver.h
+@@ -792,9 +792,9 @@ int mlx5_set_port_admin_status(struct ml
+ int mlx5_query_port_admin_status(struct mlx5_core_dev *dev,
+                                enum mlx5_port_status *status);
+-int mlx5_set_port_mtu(struct mlx5_core_dev *dev, int mtu, u8 port);
+-void mlx5_query_port_max_mtu(struct mlx5_core_dev *dev, int *max_mtu, u8 port);
+-void mlx5_query_port_oper_mtu(struct mlx5_core_dev *dev, int *oper_mtu,
++int mlx5_set_port_mtu(struct mlx5_core_dev *dev, u16 mtu, u8 port);
++void mlx5_query_port_max_mtu(struct mlx5_core_dev *dev, u16 *max_mtu, u8 port);
++void mlx5_query_port_oper_mtu(struct mlx5_core_dev *dev, u16 *oper_mtu,
+                             u8 port);
+ int mlx5_query_port_vl_hw_cap(struct mlx5_core_dev *dev,
diff --git a/queue-4.4/net-mlx5e-fix-minimum-mtu.patch b/queue-4.4/net-mlx5e-fix-minimum-mtu.patch
new file mode 100644 (file)
index 0000000..df5b47c
--- /dev/null
@@ -0,0 +1,58 @@
+From foo@baz Mon May 16 11:21:32 PDT 2016
+From: Saeed Mahameed <saeedm@mellanox.com>
+Date: Fri, 22 Apr 2016 00:33:04 +0300
+Subject: net/mlx5e: Fix minimum MTU
+
+From: Saeed Mahameed <saeedm@mellanox.com>
+
+[ Upstream commit d8edd2469ace550db707798180d1c84d81f93bca ]
+
+Minimum MTU that can be set in Connectx4 device is 68.
+
+This fixes the case where a user wants to set invalid MTU,
+the driver will fail to satisfy this request and the interface
+will stay down.
+
+It is better to report an error and continue working with old
+mtu.
+
+Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en_main.c |   11 ++++++++---
+ 1 file changed, 8 insertions(+), 3 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+@@ -1891,22 +1891,27 @@ static int mlx5e_set_features(struct net
+       return err;
+ }
++#define MXL5_HW_MIN_MTU 64
++#define MXL5E_MIN_MTU (MXL5_HW_MIN_MTU + ETH_FCS_LEN)
++
+ static int mlx5e_change_mtu(struct net_device *netdev, int new_mtu)
+ {
+       struct mlx5e_priv *priv = netdev_priv(netdev);
+       struct mlx5_core_dev *mdev = priv->mdev;
+       bool was_opened;
+       u16 max_mtu;
++      u16 min_mtu;
+       int err = 0;
+       mlx5_query_port_max_mtu(mdev, &max_mtu, 1);
+       max_mtu = MLX5E_HW2SW_MTU(max_mtu);
++      min_mtu = MLX5E_HW2SW_MTU(MXL5E_MIN_MTU);
+-      if (new_mtu > max_mtu) {
++      if (new_mtu > max_mtu || new_mtu < min_mtu) {
+               netdev_err(netdev,
+-                         "%s: Bad MTU (%d) > (%d) Max\n",
+-                         __func__, new_mtu, max_mtu);
++                         "%s: Bad MTU (%d), valid range is: [%d..%d]\n",
++                         __func__, new_mtu, min_mtu, max_mtu);
+               return -EINVAL;
+       }
diff --git a/queue-4.4/net-route-enforce-hoplimit-max-value.patch b/queue-4.4/net-route-enforce-hoplimit-max-value.patch
new file mode 100644 (file)
index 0000000..db93618
--- /dev/null
@@ -0,0 +1,51 @@
+From foo@baz Mon May 16 11:21:32 PDT 2016
+From: Paolo Abeni <pabeni@redhat.com>
+Date: Fri, 13 May 2016 18:33:41 +0200
+Subject: net/route: enforce hoplimit max value
+
+From: Paolo Abeni <pabeni@redhat.com>
+
+[ Upstream commit 626abd59e51d4d8c6367e03aae252a8aa759ac78 ]
+
+Currently, when creating or updating a route, no check is performed
+in both ipv4 and ipv6 code to the hoplimit value.
+
+The caller can i.e. set hoplimit to 256, and when such route will
+ be used, packets will be sent with hoplimit/ttl equal to 0.
+
+This commit adds checks for the RTAX_HOPLIMIT value, in both ipv4
+ipv6 route code, substituting any value greater than 255 with 255.
+
+This is consistent with what is currently done for ADVMSS and MTU
+in the ipv4 code.
+
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/fib_semantics.c |    2 ++
+ net/ipv6/route.c         |    2 ++
+ 2 files changed, 4 insertions(+)
+
+--- a/net/ipv4/fib_semantics.c
++++ b/net/ipv4/fib_semantics.c
+@@ -975,6 +975,8 @@ fib_convert_metrics(struct fib_info *fi,
+                       val = 65535 - 40;
+               if (type == RTAX_MTU && val > 65535 - 15)
+                       val = 65535 - 15;
++              if (type == RTAX_HOPLIMIT && val > 255)
++                      val = 255;
+               if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK))
+                       return -EINVAL;
+               fi->fib_metrics[type - 1] = val;
+--- a/net/ipv6/route.c
++++ b/net/ipv6/route.c
+@@ -1727,6 +1727,8 @@ static int ip6_convert_metrics(struct mx
+               } else {
+                       val = nla_get_u32(nla);
+               }
++              if (type == RTAX_HOPLIMIT && val > 255)
++                      val = 255;
+               if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK))
+                       goto err;
diff --git a/queue-4.4/net-sched-do-not-requeue-a-null-skb.patch b/queue-4.4/net-sched-do-not-requeue-a-null-skb.patch
new file mode 100644 (file)
index 0000000..b8b2f4b
--- /dev/null
@@ -0,0 +1,45 @@
+From foo@baz Mon May 16 11:21:32 PDT 2016
+From: Lars Persson <lars.persson@axis.com>
+Date: Tue, 12 Apr 2016 08:45:52 +0200
+Subject: net: sched: do not requeue a NULL skb
+
+From: Lars Persson <lars.persson@axis.com>
+
+[ Upstream commit 3dcd493fbebfd631913df6e2773cc295d3bf7d22 ]
+
+A failure in validate_xmit_skb_list() triggered an unconditional call
+to dev_requeue_skb with skb=NULL. This slowly grows the queue
+discipline's qlen count until all traffic through the queue stops.
+
+We take the optimistic approach and continue running the queue after a
+failure since it is unknown if later packets also will fail in the
+validate path.
+
+Fixes: 55a93b3ea780 ("qdisc: validate skb without holding lock")
+Signed-off-by: Lars Persson <larper@axis.com>
+Acked-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sched/sch_generic.c |    5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+--- a/net/sched/sch_generic.c
++++ b/net/sched/sch_generic.c
+@@ -159,12 +159,15 @@ int sch_direct_xmit(struct sk_buff *skb,
+       if (validate)
+               skb = validate_xmit_skb_list(skb, dev);
+-      if (skb) {
++      if (likely(skb)) {
+               HARD_TX_LOCK(dev, txq, smp_processor_id());
+               if (!netif_xmit_frozen_or_stopped(txq))
+                       skb = dev_hard_start_xmit(skb, dev, txq, &ret);
+               HARD_TX_UNLOCK(dev, txq);
++      } else {
++              spin_lock(root_lock);
++              return qdisc_qlen(q);
+       }
+       spin_lock(root_lock);
diff --git a/queue-4.4/net-thunderx-avoid-exposing-kernel-stack.patch b/queue-4.4/net-thunderx-avoid-exposing-kernel-stack.patch
new file mode 100644 (file)
index 0000000..c37d076
--- /dev/null
@@ -0,0 +1,53 @@
+From foo@baz Mon May 16 11:21:32 PDT 2016
+From: "xypron.glpk@gmx.de" <xypron.glpk@gmx.de>
+Date: Mon, 9 May 2016 00:46:18 +0200
+Subject: net: thunderx: avoid exposing kernel stack
+
+From: "xypron.glpk@gmx.de" <xypron.glpk@gmx.de>
+
+[ Upstream commit 161de2caf68c549c266e571ffba8e2163886fb10 ]
+
+Reserved fields should be set to zero to avoid exposing
+bits from the kernel stack.
+
+Signed-off-by: Heinrich Schuchardt <xypron.glpk@gmx.de>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/cavium/thunder/nicvf_queues.c |    4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
++++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
+@@ -550,6 +550,7 @@ static void nicvf_rcv_queue_config(struc
+               nicvf_config_vlan_stripping(nic, nic->netdev->features);
+       /* Enable Receive queue */
++      memset(&rq_cfg, 0, sizeof(struct rq_cfg));
+       rq_cfg.ena = 1;
+       rq_cfg.tcp_ena = 0;
+       nicvf_queue_reg_write(nic, NIC_QSET_RQ_0_7_CFG, qidx, *(u64 *)&rq_cfg);
+@@ -582,6 +583,7 @@ void nicvf_cmp_queue_config(struct nicvf
+                             qidx, (u64)(cq->dmem.phys_base));
+       /* Enable Completion queue */
++      memset(&cq_cfg, 0, sizeof(struct cq_cfg));
+       cq_cfg.ena = 1;
+       cq_cfg.reset = 0;
+       cq_cfg.caching = 0;
+@@ -630,6 +632,7 @@ static void nicvf_snd_queue_config(struc
+                             qidx, (u64)(sq->dmem.phys_base));
+       /* Enable send queue  & set queue size */
++      memset(&sq_cfg, 0, sizeof(struct sq_cfg));
+       sq_cfg.ena = 1;
+       sq_cfg.reset = 0;
+       sq_cfg.ldwb = 0;
+@@ -666,6 +669,7 @@ static void nicvf_rbdr_config(struct nic
+       /* Enable RBDR  & set queue size */
+       /* Buffer size should be in multiples of 128 bytes */
++      memset(&rbdr_cfg, 0, sizeof(struct rbdr_cfg));
+       rbdr_cfg.ena = 1;
+       rbdr_cfg.reset = 0;
+       rbdr_cfg.ldwb = 0;
diff --git a/queue-4.4/net-use-skb_postpush_rcsum-instead-of-own-implementations.patch b/queue-4.4/net-use-skb_postpush_rcsum-instead-of-own-implementations.patch
new file mode 100644 (file)
index 0000000..6076d9b
--- /dev/null
@@ -0,0 +1,111 @@
+From foo@baz Mon May 16 11:21:32 PDT 2016
+From: Daniel Borkmann <daniel@iogearbox.net>
+Date: Sat, 20 Feb 2016 00:29:30 +0100
+Subject: net: use skb_postpush_rcsum instead of own implementations
+
+From: Daniel Borkmann <daniel@iogearbox.net>
+
+[ Upstream commit 6b83d28a55a891a9d70fc61ccb1c138e47dcbe74 ]
+
+Replace individual implementations with the recently introduced
+skb_postpush_rcsum() helper.
+
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Acked-by: Tom Herbert <tom@herbertland.com>
+Acked-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/skbuff.c              |    4 +---
+ net/ipv6/reassembly.c          |    6 ++----
+ net/openvswitch/actions.c      |    8 +++-----
+ net/openvswitch/vport-netdev.c |    2 +-
+ net/openvswitch/vport.h        |    7 -------
+ 5 files changed, 7 insertions(+), 20 deletions(-)
+
+--- a/net/core/skbuff.c
++++ b/net/core/skbuff.c
+@@ -4433,9 +4433,7 @@ int skb_vlan_push(struct sk_buff *skb, _
+               skb->mac_len += VLAN_HLEN;
+               __skb_pull(skb, offset);
+-              if (skb->ip_summed == CHECKSUM_COMPLETE)
+-                      skb->csum = csum_add(skb->csum, csum_partial(skb->data
+-                                      + (2 * ETH_ALEN), VLAN_HLEN, 0));
++              skb_postpush_rcsum(skb, skb->data + (2 * ETH_ALEN), VLAN_HLEN);
+       }
+       __vlan_hwaccel_put_tag(skb, vlan_proto, vlan_tci);
+       return 0;
+--- a/net/ipv6/reassembly.c
++++ b/net/ipv6/reassembly.c
+@@ -496,10 +496,8 @@ static int ip6_frag_reasm(struct frag_qu
+       IP6CB(head)->flags |= IP6SKB_FRAGMENTED;
+       /* Yes, and fold redundant checksum back. 8) */
+-      if (head->ip_summed == CHECKSUM_COMPLETE)
+-              head->csum = csum_partial(skb_network_header(head),
+-                                        skb_network_header_len(head),
+-                                        head->csum);
++      skb_postpush_rcsum(head, skb_network_header(head),
++                         skb_network_header_len(head));
+       rcu_read_lock();
+       IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMOKS);
+--- a/net/openvswitch/actions.c
++++ b/net/openvswitch/actions.c
+@@ -158,9 +158,7 @@ static int push_mpls(struct sk_buff *skb
+       new_mpls_lse = (__be32 *)skb_mpls_header(skb);
+       *new_mpls_lse = mpls->mpls_lse;
+-      if (skb->ip_summed == CHECKSUM_COMPLETE)
+-              skb->csum = csum_add(skb->csum, csum_partial(new_mpls_lse,
+-                                                           MPLS_HLEN, 0));
++      skb_postpush_rcsum(skb, new_mpls_lse, MPLS_HLEN);
+       hdr = eth_hdr(skb);
+       hdr->h_proto = mpls->mpls_ethertype;
+@@ -280,7 +278,7 @@ static int set_eth_addr(struct sk_buff *
+       ether_addr_copy_masked(eth_hdr(skb)->h_dest, key->eth_dst,
+                              mask->eth_dst);
+-      ovs_skb_postpush_rcsum(skb, eth_hdr(skb), ETH_ALEN * 2);
++      skb_postpush_rcsum(skb, eth_hdr(skb), ETH_ALEN * 2);
+       ether_addr_copy(flow_key->eth.src, eth_hdr(skb)->h_source);
+       ether_addr_copy(flow_key->eth.dst, eth_hdr(skb)->h_dest);
+@@ -639,7 +637,7 @@ static int ovs_vport_output(struct net *
+       /* Reconstruct the MAC header.  */
+       skb_push(skb, data->l2_len);
+       memcpy(skb->data, &data->l2_data, data->l2_len);
+-      ovs_skb_postpush_rcsum(skb, skb->data, data->l2_len);
++      skb_postpush_rcsum(skb, skb->data, data->l2_len);
+       skb_reset_mac_header(skb);
+       ovs_vport_send(vport, skb);
+--- a/net/openvswitch/vport-netdev.c
++++ b/net/openvswitch/vport-netdev.c
+@@ -58,7 +58,7 @@ static void netdev_port_receive(struct s
+               return;
+       skb_push(skb, ETH_HLEN);
+-      ovs_skb_postpush_rcsum(skb, skb->data, ETH_HLEN);
++      skb_postpush_rcsum(skb, skb->data, ETH_HLEN);
+       ovs_vport_receive(vport, skb, skb_tunnel_info(skb));
+       return;
+ error:
+--- a/net/openvswitch/vport.h
++++ b/net/openvswitch/vport.h
+@@ -184,13 +184,6 @@ static inline struct vport *vport_from_p
+ int ovs_vport_receive(struct vport *, struct sk_buff *,
+                     const struct ip_tunnel_info *);
+-static inline void ovs_skb_postpush_rcsum(struct sk_buff *skb,
+-                                    const void *start, unsigned int len)
+-{
+-      if (skb->ip_summed == CHECKSUM_COMPLETE)
+-              skb->csum = csum_add(skb->csum, csum_partial(start, len, 0));
+-}
+-
+ static inline const char *ovs_vport_name(struct vport *vport)
+ {
+       return vport->dev->name;
diff --git a/queue-4.4/net_sched-introduce-qdisc_replace-helper.patch b/queue-4.4/net_sched-introduce-qdisc_replace-helper.patch
new file mode 100644 (file)
index 0000000..50201a3
--- /dev/null
@@ -0,0 +1,257 @@
+From foo@baz Mon May 16 11:21:32 PDT 2016
+From: WANG Cong <xiyou.wangcong@gmail.com>
+Date: Thu, 25 Feb 2016 14:55:00 -0800
+Subject: net_sched: introduce qdisc_replace() helper
+
+From: WANG Cong <xiyou.wangcong@gmail.com>
+
+[ Upstream commit 86a7996cc8a078793670d82ed97d5a99bb4e8496 ]
+
+Remove nearly duplicated code and prepare for the following patch.
+
+Cc: Jamal Hadi Salim <jhs@mojatatu.com>
+Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
+Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/sch_generic.h |   17 +++++++++++++++++
+ net/sched/sch_cbq.c       |    7 +------
+ net/sched/sch_drr.c       |    6 +-----
+ net/sched/sch_dsmark.c    |    8 +-------
+ net/sched/sch_hfsc.c      |    6 +-----
+ net/sched/sch_htb.c       |    9 +--------
+ net/sched/sch_multiq.c    |    8 +-------
+ net/sched/sch_netem.c     |   10 +---------
+ net/sched/sch_prio.c      |    8 +-------
+ net/sched/sch_qfq.c       |    6 +-----
+ net/sched/sch_red.c       |    7 +------
+ net/sched/sch_sfb.c       |    7 +------
+ net/sched/sch_tbf.c       |    8 +-------
+ 13 files changed, 29 insertions(+), 78 deletions(-)
+
+--- a/include/net/sch_generic.h
++++ b/include/net/sch_generic.h
+@@ -698,6 +698,23 @@ static inline void qdisc_reset_queue(str
+       sch->qstats.backlog = 0;
+ }
++static inline struct Qdisc *qdisc_replace(struct Qdisc *sch, struct Qdisc *new,
++                                        struct Qdisc **pold)
++{
++      struct Qdisc *old;
++
++      sch_tree_lock(sch);
++      old = *pold;
++      *pold = new;
++      if (old != NULL) {
++              qdisc_tree_decrease_qlen(old, old->q.qlen);
++              qdisc_reset(old);
++      }
++      sch_tree_unlock(sch);
++
++      return old;
++}
++
+ static inline unsigned int __qdisc_queue_drop(struct Qdisc *sch,
+                                             struct sk_buff_head *list)
+ {
+--- a/net/sched/sch_cbq.c
++++ b/net/sched/sch_cbq.c
+@@ -1624,13 +1624,8 @@ static int cbq_graft(struct Qdisc *sch,
+                       new->reshape_fail = cbq_reshape_fail;
+ #endif
+       }
+-      sch_tree_lock(sch);
+-      *old = cl->q;
+-      cl->q = new;
+-      qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
+-      qdisc_reset(*old);
+-      sch_tree_unlock(sch);
++      *old = qdisc_replace(sch, new, &cl->q);
+       return 0;
+ }
+--- a/net/sched/sch_drr.c
++++ b/net/sched/sch_drr.c
+@@ -226,11 +226,7 @@ static int drr_graft_class(struct Qdisc
+                       new = &noop_qdisc;
+       }
+-      sch_tree_lock(sch);
+-      drr_purge_queue(cl);
+-      *old = cl->qdisc;
+-      cl->qdisc = new;
+-      sch_tree_unlock(sch);
++      *old = qdisc_replace(sch, new, &cl->qdisc);
+       return 0;
+ }
+--- a/net/sched/sch_dsmark.c
++++ b/net/sched/sch_dsmark.c
+@@ -73,13 +73,7 @@ static int dsmark_graft(struct Qdisc *sc
+                       new = &noop_qdisc;
+       }
+-      sch_tree_lock(sch);
+-      *old = p->q;
+-      p->q = new;
+-      qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
+-      qdisc_reset(*old);
+-      sch_tree_unlock(sch);
+-
++      *old = qdisc_replace(sch, new, &p->q);
+       return 0;
+ }
+--- a/net/sched/sch_hfsc.c
++++ b/net/sched/sch_hfsc.c
+@@ -1215,11 +1215,7 @@ hfsc_graft_class(struct Qdisc *sch, unsi
+                       new = &noop_qdisc;
+       }
+-      sch_tree_lock(sch);
+-      hfsc_purge_queue(sch, cl);
+-      *old = cl->qdisc;
+-      cl->qdisc = new;
+-      sch_tree_unlock(sch);
++      *old = qdisc_replace(sch, new, &cl->qdisc);
+       return 0;
+ }
+--- a/net/sched/sch_htb.c
++++ b/net/sched/sch_htb.c
+@@ -1163,14 +1163,7 @@ static int htb_graft(struct Qdisc *sch,
+                                    cl->common.classid)) == NULL)
+               return -ENOBUFS;
+-      sch_tree_lock(sch);
+-      *old = cl->un.leaf.q;
+-      cl->un.leaf.q = new;
+-      if (*old != NULL) {
+-              qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
+-              qdisc_reset(*old);
+-      }
+-      sch_tree_unlock(sch);
++      *old = qdisc_replace(sch, new, &cl->un.leaf.q);
+       return 0;
+ }
+--- a/net/sched/sch_multiq.c
++++ b/net/sched/sch_multiq.c
+@@ -303,13 +303,7 @@ static int multiq_graft(struct Qdisc *sc
+       if (new == NULL)
+               new = &noop_qdisc;
+-      sch_tree_lock(sch);
+-      *old = q->queues[band];
+-      q->queues[band] = new;
+-      qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
+-      qdisc_reset(*old);
+-      sch_tree_unlock(sch);
+-
++      *old = qdisc_replace(sch, new, &q->queues[band]);
+       return 0;
+ }
+--- a/net/sched/sch_netem.c
++++ b/net/sched/sch_netem.c
+@@ -1037,15 +1037,7 @@ static int netem_graft(struct Qdisc *sch
+ {
+       struct netem_sched_data *q = qdisc_priv(sch);
+-      sch_tree_lock(sch);
+-      *old = q->qdisc;
+-      q->qdisc = new;
+-      if (*old) {
+-              qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
+-              qdisc_reset(*old);
+-      }
+-      sch_tree_unlock(sch);
+-
++      *old = qdisc_replace(sch, new, &q->qdisc);
+       return 0;
+ }
+--- a/net/sched/sch_prio.c
++++ b/net/sched/sch_prio.c
+@@ -268,13 +268,7 @@ static int prio_graft(struct Qdisc *sch,
+       if (new == NULL)
+               new = &noop_qdisc;
+-      sch_tree_lock(sch);
+-      *old = q->queues[band];
+-      q->queues[band] = new;
+-      qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
+-      qdisc_reset(*old);
+-      sch_tree_unlock(sch);
+-
++      *old = qdisc_replace(sch, new, &q->queues[band]);
+       return 0;
+ }
+--- a/net/sched/sch_qfq.c
++++ b/net/sched/sch_qfq.c
+@@ -617,11 +617,7 @@ static int qfq_graft_class(struct Qdisc
+                       new = &noop_qdisc;
+       }
+-      sch_tree_lock(sch);
+-      qfq_purge_queue(cl);
+-      *old = cl->qdisc;
+-      cl->qdisc = new;
+-      sch_tree_unlock(sch);
++      *old = qdisc_replace(sch, new, &cl->qdisc);
+       return 0;
+ }
+--- a/net/sched/sch_red.c
++++ b/net/sched/sch_red.c
+@@ -313,12 +313,7 @@ static int red_graft(struct Qdisc *sch,
+       if (new == NULL)
+               new = &noop_qdisc;
+-      sch_tree_lock(sch);
+-      *old = q->qdisc;
+-      q->qdisc = new;
+-      qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
+-      qdisc_reset(*old);
+-      sch_tree_unlock(sch);
++      *old = qdisc_replace(sch, new, &q->qdisc);
+       return 0;
+ }
+--- a/net/sched/sch_sfb.c
++++ b/net/sched/sch_sfb.c
+@@ -606,12 +606,7 @@ static int sfb_graft(struct Qdisc *sch,
+       if (new == NULL)
+               new = &noop_qdisc;
+-      sch_tree_lock(sch);
+-      *old = q->qdisc;
+-      q->qdisc = new;
+-      qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
+-      qdisc_reset(*old);
+-      sch_tree_unlock(sch);
++      *old = qdisc_replace(sch, new, &q->qdisc);
+       return 0;
+ }
+--- a/net/sched/sch_tbf.c
++++ b/net/sched/sch_tbf.c
+@@ -502,13 +502,7 @@ static int tbf_graft(struct Qdisc *sch,
+       if (new == NULL)
+               new = &noop_qdisc;
+-      sch_tree_lock(sch);
+-      *old = q->qdisc;
+-      q->qdisc = new;
+-      qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
+-      qdisc_reset(*old);
+-      sch_tree_unlock(sch);
+-
++      *old = qdisc_replace(sch, new, &q->qdisc);
+       return 0;
+ }
diff --git a/queue-4.4/net_sched-update-hierarchical-backlog-too.patch b/queue-4.4/net_sched-update-hierarchical-backlog-too.patch
new file mode 100644 (file)
index 0000000..a80ad7c
--- /dev/null
@@ -0,0 +1,675 @@
+From foo@baz Mon May 16 11:21:32 PDT 2016
+From: WANG Cong <xiyou.wangcong@gmail.com>
+Date: Thu, 25 Feb 2016 14:55:01 -0800
+Subject: net_sched: update hierarchical backlog too
+
+From: WANG Cong <xiyou.wangcong@gmail.com>
+
+[ Upstream commit 2ccccf5fb43ff62b2b96cc58d95fc0b3596516e4 ]
+
+When the bottom qdisc decides to, for example, drop some packet,
+it calls qdisc_tree_decrease_qlen() to update the queue length
+for all its ancestors, we need to update the backlog too to
+keep the stats on root qdisc accurate.
+
+Cc: Jamal Hadi Salim <jhs@mojatatu.com>
+Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
+Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/codel.h       |    4 ++++
+ include/net/sch_generic.h |    5 +++--
+ net/sched/sch_api.c       |    8 +++++---
+ net/sched/sch_cbq.c       |    5 +++--
+ net/sched/sch_choke.c     |    6 ++++--
+ net/sched/sch_codel.c     |   10 ++++++----
+ net/sched/sch_drr.c       |    3 ++-
+ net/sched/sch_fq.c        |    4 +++-
+ net/sched/sch_fq_codel.c  |   17 ++++++++++++-----
+ net/sched/sch_hfsc.c      |    3 ++-
+ net/sched/sch_hhf.c       |   10 +++++++---
+ net/sched/sch_htb.c       |   10 ++++++----
+ net/sched/sch_multiq.c    |    8 +++++---
+ net/sched/sch_netem.c     |    3 ++-
+ net/sched/sch_pie.c       |    5 +++--
+ net/sched/sch_prio.c      |    7 ++++---
+ net/sched/sch_qfq.c       |    3 ++-
+ net/sched/sch_red.c       |    3 ++-
+ net/sched/sch_sfb.c       |    3 ++-
+ net/sched/sch_sfq.c       |   16 +++++++++-------
+ net/sched/sch_tbf.c       |    7 +++++--
+ 21 files changed, 91 insertions(+), 49 deletions(-)
+
+--- a/include/net/codel.h
++++ b/include/net/codel.h
+@@ -162,12 +162,14 @@ struct codel_vars {
+  * struct codel_stats - contains codel shared variables and stats
+  * @maxpacket:        largest packet we've seen so far
+  * @drop_count:       temp count of dropped packets in dequeue()
++ * @drop_len: bytes of dropped packets in dequeue()
+  * ecn_mark:  number of packets we ECN marked instead of dropping
+  * ce_mark:   number of packets CE marked because sojourn time was above ce_threshold
+  */
+ struct codel_stats {
+       u32             maxpacket;
+       u32             drop_count;
++      u32             drop_len;
+       u32             ecn_mark;
+       u32             ce_mark;
+ };
+@@ -308,6 +310,7 @@ static struct sk_buff *codel_dequeue(str
+                                                                 vars->rec_inv_sqrt);
+                                       goto end;
+                               }
++                              stats->drop_len += qdisc_pkt_len(skb);
+                               qdisc_drop(skb, sch);
+                               stats->drop_count++;
+                               skb = dequeue_func(vars, sch);
+@@ -330,6 +333,7 @@ static struct sk_buff *codel_dequeue(str
+               if (params->ecn && INET_ECN_set_ce(skb)) {
+                       stats->ecn_mark++;
+               } else {
++                      stats->drop_len += qdisc_pkt_len(skb);
+                       qdisc_drop(skb, sch);
+                       stats->drop_count++;
+--- a/include/net/sch_generic.h
++++ b/include/net/sch_generic.h
+@@ -396,7 +396,8 @@ struct Qdisc *dev_graft_qdisc(struct net
+                             struct Qdisc *qdisc);
+ void qdisc_reset(struct Qdisc *qdisc);
+ void qdisc_destroy(struct Qdisc *qdisc);
+-void qdisc_tree_decrease_qlen(struct Qdisc *qdisc, unsigned int n);
++void qdisc_tree_reduce_backlog(struct Qdisc *qdisc, unsigned int n,
++                             unsigned int len);
+ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
+                         const struct Qdisc_ops *ops);
+ struct Qdisc *qdisc_create_dflt(struct netdev_queue *dev_queue,
+@@ -707,7 +708,7 @@ static inline struct Qdisc *qdisc_replac
+       old = *pold;
+       *pold = new;
+       if (old != NULL) {
+-              qdisc_tree_decrease_qlen(old, old->q.qlen);
++              qdisc_tree_reduce_backlog(old, old->q.qlen, old->qstats.backlog);
+               qdisc_reset(old);
+       }
+       sch_tree_unlock(sch);
+--- a/net/sched/sch_api.c
++++ b/net/sched/sch_api.c
+@@ -744,14 +744,15 @@ static u32 qdisc_alloc_handle(struct net
+       return 0;
+ }
+-void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n)
++void qdisc_tree_reduce_backlog(struct Qdisc *sch, unsigned int n,
++                             unsigned int len)
+ {
+       const struct Qdisc_class_ops *cops;
+       unsigned long cl;
+       u32 parentid;
+       int drops;
+-      if (n == 0)
++      if (n == 0 && len == 0)
+               return;
+       drops = max_t(int, n, 0);
+       rcu_read_lock();
+@@ -774,11 +775,12 @@ void qdisc_tree_decrease_qlen(struct Qdi
+                       cops->put(sch, cl);
+               }
+               sch->q.qlen -= n;
++              sch->qstats.backlog -= len;
+               __qdisc_qstats_drop(sch, drops);
+       }
+       rcu_read_unlock();
+ }
+-EXPORT_SYMBOL(qdisc_tree_decrease_qlen);
++EXPORT_SYMBOL(qdisc_tree_reduce_backlog);
+ static void notify_and_destroy(struct net *net, struct sk_buff *skb,
+                              struct nlmsghdr *n, u32 clid,
+--- a/net/sched/sch_cbq.c
++++ b/net/sched/sch_cbq.c
+@@ -1909,7 +1909,7 @@ static int cbq_delete(struct Qdisc *sch,
+ {
+       struct cbq_sched_data *q = qdisc_priv(sch);
+       struct cbq_class *cl = (struct cbq_class *)arg;
+-      unsigned int qlen;
++      unsigned int qlen, backlog;
+       if (cl->filters || cl->children || cl == &q->link)
+               return -EBUSY;
+@@ -1917,8 +1917,9 @@ static int cbq_delete(struct Qdisc *sch,
+       sch_tree_lock(sch);
+       qlen = cl->q->q.qlen;
++      backlog = cl->q->qstats.backlog;
+       qdisc_reset(cl->q);
+-      qdisc_tree_decrease_qlen(cl->q, qlen);
++      qdisc_tree_reduce_backlog(cl->q, qlen, backlog);
+       if (cl->next_alive)
+               cbq_deactivate_class(cl);
+--- a/net/sched/sch_choke.c
++++ b/net/sched/sch_choke.c
+@@ -128,8 +128,8 @@ static void choke_drop_by_idx(struct Qdi
+               choke_zap_tail_holes(q);
+       qdisc_qstats_backlog_dec(sch, skb);
++      qdisc_tree_reduce_backlog(sch, 1, qdisc_pkt_len(skb));
+       qdisc_drop(skb, sch);
+-      qdisc_tree_decrease_qlen(sch, 1);
+       --sch->q.qlen;
+ }
+@@ -456,6 +456,7 @@ static int choke_change(struct Qdisc *sc
+               old = q->tab;
+               if (old) {
+                       unsigned int oqlen = sch->q.qlen, tail = 0;
++                      unsigned dropped = 0;
+                       while (q->head != q->tail) {
+                               struct sk_buff *skb = q->tab[q->head];
+@@ -467,11 +468,12 @@ static int choke_change(struct Qdisc *sc
+                                       ntab[tail++] = skb;
+                                       continue;
+                               }
++                              dropped += qdisc_pkt_len(skb);
+                               qdisc_qstats_backlog_dec(sch, skb);
+                               --sch->q.qlen;
+                               qdisc_drop(skb, sch);
+                       }
+-                      qdisc_tree_decrease_qlen(sch, oqlen - sch->q.qlen);
++                      qdisc_tree_reduce_backlog(sch, oqlen - sch->q.qlen, dropped);
+                       q->head = 0;
+                       q->tail = tail;
+               }
+--- a/net/sched/sch_codel.c
++++ b/net/sched/sch_codel.c
+@@ -79,12 +79,13 @@ static struct sk_buff *codel_qdisc_deque
+       skb = codel_dequeue(sch, &q->params, &q->vars, &q->stats, dequeue);
+-      /* We cant call qdisc_tree_decrease_qlen() if our qlen is 0,
++      /* We cant call qdisc_tree_reduce_backlog() if our qlen is 0,
+        * or HTB crashes. Defer it for next round.
+        */
+       if (q->stats.drop_count && sch->q.qlen) {
+-              qdisc_tree_decrease_qlen(sch, q->stats.drop_count);
++              qdisc_tree_reduce_backlog(sch, q->stats.drop_count, q->stats.drop_len);
+               q->stats.drop_count = 0;
++              q->stats.drop_len = 0;
+       }
+       if (skb)
+               qdisc_bstats_update(sch, skb);
+@@ -116,7 +117,7 @@ static int codel_change(struct Qdisc *sc
+ {
+       struct codel_sched_data *q = qdisc_priv(sch);
+       struct nlattr *tb[TCA_CODEL_MAX + 1];
+-      unsigned int qlen;
++      unsigned int qlen, dropped = 0;
+       int err;
+       if (!opt)
+@@ -156,10 +157,11 @@ static int codel_change(struct Qdisc *sc
+       while (sch->q.qlen > sch->limit) {
+               struct sk_buff *skb = __skb_dequeue(&sch->q);
++              dropped += qdisc_pkt_len(skb);
+               qdisc_qstats_backlog_dec(sch, skb);
+               qdisc_drop(skb, sch);
+       }
+-      qdisc_tree_decrease_qlen(sch, qlen - sch->q.qlen);
++      qdisc_tree_reduce_backlog(sch, qlen - sch->q.qlen, dropped);
+       sch_tree_unlock(sch);
+       return 0;
+--- a/net/sched/sch_drr.c
++++ b/net/sched/sch_drr.c
+@@ -53,9 +53,10 @@ static struct drr_class *drr_find_class(
+ static void drr_purge_queue(struct drr_class *cl)
+ {
+       unsigned int len = cl->qdisc->q.qlen;
++      unsigned int backlog = cl->qdisc->qstats.backlog;
+       qdisc_reset(cl->qdisc);
+-      qdisc_tree_decrease_qlen(cl->qdisc, len);
++      qdisc_tree_reduce_backlog(cl->qdisc, len, backlog);
+ }
+ static const struct nla_policy drr_policy[TCA_DRR_MAX + 1] = {
+--- a/net/sched/sch_fq.c
++++ b/net/sched/sch_fq.c
+@@ -662,6 +662,7 @@ static int fq_change(struct Qdisc *sch,
+       struct fq_sched_data *q = qdisc_priv(sch);
+       struct nlattr *tb[TCA_FQ_MAX + 1];
+       int err, drop_count = 0;
++      unsigned drop_len = 0;
+       u32 fq_log;
+       if (!opt)
+@@ -736,10 +737,11 @@ static int fq_change(struct Qdisc *sch,
+               if (!skb)
+                       break;
++              drop_len += qdisc_pkt_len(skb);
+               kfree_skb(skb);
+               drop_count++;
+       }
+-      qdisc_tree_decrease_qlen(sch, drop_count);
++      qdisc_tree_reduce_backlog(sch, drop_count, drop_len);
+       sch_tree_unlock(sch);
+       return err;
+--- a/net/sched/sch_fq_codel.c
++++ b/net/sched/sch_fq_codel.c
+@@ -175,7 +175,7 @@ static unsigned int fq_codel_qdisc_drop(
+ static int fq_codel_enqueue(struct sk_buff *skb, struct Qdisc *sch)
+ {
+       struct fq_codel_sched_data *q = qdisc_priv(sch);
+-      unsigned int idx;
++      unsigned int idx, prev_backlog;
+       struct fq_codel_flow *flow;
+       int uninitialized_var(ret);
+@@ -203,6 +203,7 @@ static int fq_codel_enqueue(struct sk_bu
+       if (++sch->q.qlen <= sch->limit)
+               return NET_XMIT_SUCCESS;
++      prev_backlog = sch->qstats.backlog;
+       q->drop_overlimit++;
+       /* Return Congestion Notification only if we dropped a packet
+        * from this flow.
+@@ -211,7 +212,7 @@ static int fq_codel_enqueue(struct sk_bu
+               return NET_XMIT_CN;
+       /* As we dropped a packet, better let upper stack know this */
+-      qdisc_tree_decrease_qlen(sch, 1);
++      qdisc_tree_reduce_backlog(sch, 1, prev_backlog - sch->qstats.backlog);
+       return NET_XMIT_SUCCESS;
+ }
+@@ -241,6 +242,7 @@ static struct sk_buff *fq_codel_dequeue(
+       struct fq_codel_flow *flow;
+       struct list_head *head;
+       u32 prev_drop_count, prev_ecn_mark;
++      unsigned int prev_backlog;
+ begin:
+       head = &q->new_flows;
+@@ -259,6 +261,7 @@ begin:
+       prev_drop_count = q->cstats.drop_count;
+       prev_ecn_mark = q->cstats.ecn_mark;
++      prev_backlog = sch->qstats.backlog;
+       skb = codel_dequeue(sch, &q->cparams, &flow->cvars, &q->cstats,
+                           dequeue);
+@@ -276,12 +279,14 @@ begin:
+       }
+       qdisc_bstats_update(sch, skb);
+       flow->deficit -= qdisc_pkt_len(skb);
+-      /* We cant call qdisc_tree_decrease_qlen() if our qlen is 0,
++      /* We cant call qdisc_tree_reduce_backlog() if our qlen is 0,
+        * or HTB crashes. Defer it for next round.
+        */
+       if (q->cstats.drop_count && sch->q.qlen) {
+-              qdisc_tree_decrease_qlen(sch, q->cstats.drop_count);
++              qdisc_tree_reduce_backlog(sch, q->cstats.drop_count,
++                                        q->cstats.drop_len);
+               q->cstats.drop_count = 0;
++              q->cstats.drop_len = 0;
+       }
+       return skb;
+ }
+@@ -372,11 +377,13 @@ static int fq_codel_change(struct Qdisc
+       while (sch->q.qlen > sch->limit) {
+               struct sk_buff *skb = fq_codel_dequeue(sch);
++              q->cstats.drop_len += qdisc_pkt_len(skb);
+               kfree_skb(skb);
+               q->cstats.drop_count++;
+       }
+-      qdisc_tree_decrease_qlen(sch, q->cstats.drop_count);
++      qdisc_tree_reduce_backlog(sch, q->cstats.drop_count, q->cstats.drop_len);
+       q->cstats.drop_count = 0;
++      q->cstats.drop_len = 0;
+       sch_tree_unlock(sch);
+       return 0;
+--- a/net/sched/sch_hfsc.c
++++ b/net/sched/sch_hfsc.c
+@@ -895,9 +895,10 @@ static void
+ hfsc_purge_queue(struct Qdisc *sch, struct hfsc_class *cl)
+ {
+       unsigned int len = cl->qdisc->q.qlen;
++      unsigned int backlog = cl->qdisc->qstats.backlog;
+       qdisc_reset(cl->qdisc);
+-      qdisc_tree_decrease_qlen(cl->qdisc, len);
++      qdisc_tree_reduce_backlog(cl->qdisc, len, backlog);
+ }
+ static void
+--- a/net/sched/sch_hhf.c
++++ b/net/sched/sch_hhf.c
+@@ -382,6 +382,7 @@ static int hhf_enqueue(struct sk_buff *s
+       struct hhf_sched_data *q = qdisc_priv(sch);
+       enum wdrr_bucket_idx idx;
+       struct wdrr_bucket *bucket;
++      unsigned int prev_backlog;
+       idx = hhf_classify(skb, sch);
+@@ -409,6 +410,7 @@ static int hhf_enqueue(struct sk_buff *s
+       if (++sch->q.qlen <= sch->limit)
+               return NET_XMIT_SUCCESS;
++      prev_backlog = sch->qstats.backlog;
+       q->drop_overlimit++;
+       /* Return Congestion Notification only if we dropped a packet from this
+        * bucket.
+@@ -417,7 +419,7 @@ static int hhf_enqueue(struct sk_buff *s
+               return NET_XMIT_CN;
+       /* As we dropped a packet, better let upper stack know this. */
+-      qdisc_tree_decrease_qlen(sch, 1);
++      qdisc_tree_reduce_backlog(sch, 1, prev_backlog - sch->qstats.backlog);
+       return NET_XMIT_SUCCESS;
+ }
+@@ -527,7 +529,7 @@ static int hhf_change(struct Qdisc *sch,
+ {
+       struct hhf_sched_data *q = qdisc_priv(sch);
+       struct nlattr *tb[TCA_HHF_MAX + 1];
+-      unsigned int qlen;
++      unsigned int qlen, prev_backlog;
+       int err;
+       u64 non_hh_quantum;
+       u32 new_quantum = q->quantum;
+@@ -577,12 +579,14 @@ static int hhf_change(struct Qdisc *sch,
+       }
+       qlen = sch->q.qlen;
++      prev_backlog = sch->qstats.backlog;
+       while (sch->q.qlen > sch->limit) {
+               struct sk_buff *skb = hhf_dequeue(sch);
+               kfree_skb(skb);
+       }
+-      qdisc_tree_decrease_qlen(sch, qlen - sch->q.qlen);
++      qdisc_tree_reduce_backlog(sch, qlen - sch->q.qlen,
++                                prev_backlog - sch->qstats.backlog);
+       sch_tree_unlock(sch);
+       return 0;
+--- a/net/sched/sch_htb.c
++++ b/net/sched/sch_htb.c
+@@ -1265,7 +1265,6 @@ static int htb_delete(struct Qdisc *sch,
+ {
+       struct htb_sched *q = qdisc_priv(sch);
+       struct htb_class *cl = (struct htb_class *)arg;
+-      unsigned int qlen;
+       struct Qdisc *new_q = NULL;
+       int last_child = 0;
+@@ -1285,9 +1284,11 @@ static int htb_delete(struct Qdisc *sch,
+       sch_tree_lock(sch);
+       if (!cl->level) {
+-              qlen = cl->un.leaf.q->q.qlen;
++              unsigned int qlen = cl->un.leaf.q->q.qlen;
++              unsigned int backlog = cl->un.leaf.q->qstats.backlog;
++
+               qdisc_reset(cl->un.leaf.q);
+-              qdisc_tree_decrease_qlen(cl->un.leaf.q, qlen);
++              qdisc_tree_reduce_backlog(cl->un.leaf.q, qlen, backlog);
+       }
+       /* delete from hash and active; remainder in destroy_class */
+@@ -1421,10 +1422,11 @@ static int htb_change_class(struct Qdisc
+               sch_tree_lock(sch);
+               if (parent && !parent->level) {
+                       unsigned int qlen = parent->un.leaf.q->q.qlen;
++                      unsigned int backlog = parent->un.leaf.q->qstats.backlog;
+                       /* turn parent into inner node */
+                       qdisc_reset(parent->un.leaf.q);
+-                      qdisc_tree_decrease_qlen(parent->un.leaf.q, qlen);
++                      qdisc_tree_reduce_backlog(parent->un.leaf.q, qlen, backlog);
+                       qdisc_destroy(parent->un.leaf.q);
+                       if (parent->prio_activity)
+                               htb_deactivate(q, parent);
+--- a/net/sched/sch_multiq.c
++++ b/net/sched/sch_multiq.c
+@@ -218,7 +218,8 @@ static int multiq_tune(struct Qdisc *sch
+               if (q->queues[i] != &noop_qdisc) {
+                       struct Qdisc *child = q->queues[i];
+                       q->queues[i] = &noop_qdisc;
+-                      qdisc_tree_decrease_qlen(child, child->q.qlen);
++                      qdisc_tree_reduce_backlog(child, child->q.qlen,
++                                                child->qstats.backlog);
+                       qdisc_destroy(child);
+               }
+       }
+@@ -238,8 +239,9 @@ static int multiq_tune(struct Qdisc *sch
+                               q->queues[i] = child;
+                               if (old != &noop_qdisc) {
+-                                      qdisc_tree_decrease_qlen(old,
+-                                                               old->q.qlen);
++                                      qdisc_tree_reduce_backlog(old,
++                                                                old->q.qlen,
++                                                                old->qstats.backlog);
+                                       qdisc_destroy(old);
+                               }
+                               sch_tree_unlock(sch);
+--- a/net/sched/sch_netem.c
++++ b/net/sched/sch_netem.c
+@@ -598,7 +598,8 @@ deliver:
+                               if (unlikely(err != NET_XMIT_SUCCESS)) {
+                                       if (net_xmit_drop_count(err)) {
+                                               qdisc_qstats_drop(sch);
+-                                              qdisc_tree_decrease_qlen(sch, 1);
++                                              qdisc_tree_reduce_backlog(sch, 1,
++                                                                        qdisc_pkt_len(skb));
+                                       }
+                               }
+                               goto tfifo_dequeue;
+--- a/net/sched/sch_pie.c
++++ b/net/sched/sch_pie.c
+@@ -183,7 +183,7 @@ static int pie_change(struct Qdisc *sch,
+ {
+       struct pie_sched_data *q = qdisc_priv(sch);
+       struct nlattr *tb[TCA_PIE_MAX + 1];
+-      unsigned int qlen;
++      unsigned int qlen, dropped = 0;
+       int err;
+       if (!opt)
+@@ -232,10 +232,11 @@ static int pie_change(struct Qdisc *sch,
+       while (sch->q.qlen > sch->limit) {
+               struct sk_buff *skb = __skb_dequeue(&sch->q);
++              dropped += qdisc_pkt_len(skb);
+               qdisc_qstats_backlog_dec(sch, skb);
+               qdisc_drop(skb, sch);
+       }
+-      qdisc_tree_decrease_qlen(sch, qlen - sch->q.qlen);
++      qdisc_tree_reduce_backlog(sch, qlen - sch->q.qlen, dropped);
+       sch_tree_unlock(sch);
+       return 0;
+--- a/net/sched/sch_prio.c
++++ b/net/sched/sch_prio.c
+@@ -191,7 +191,7 @@ static int prio_tune(struct Qdisc *sch,
+               struct Qdisc *child = q->queues[i];
+               q->queues[i] = &noop_qdisc;
+               if (child != &noop_qdisc) {
+-                      qdisc_tree_decrease_qlen(child, child->q.qlen);
++                      qdisc_tree_reduce_backlog(child, child->q.qlen, child->qstats.backlog);
+                       qdisc_destroy(child);
+               }
+       }
+@@ -210,8 +210,9 @@ static int prio_tune(struct Qdisc *sch,
+                               q->queues[i] = child;
+                               if (old != &noop_qdisc) {
+-                                      qdisc_tree_decrease_qlen(old,
+-                                                               old->q.qlen);
++                                      qdisc_tree_reduce_backlog(old,
++                                                                old->q.qlen,
++                                                                old->qstats.backlog);
+                                       qdisc_destroy(old);
+                               }
+                               sch_tree_unlock(sch);
+--- a/net/sched/sch_qfq.c
++++ b/net/sched/sch_qfq.c
+@@ -220,9 +220,10 @@ static struct qfq_class *qfq_find_class(
+ static void qfq_purge_queue(struct qfq_class *cl)
+ {
+       unsigned int len = cl->qdisc->q.qlen;
++      unsigned int backlog = cl->qdisc->qstats.backlog;
+       qdisc_reset(cl->qdisc);
+-      qdisc_tree_decrease_qlen(cl->qdisc, len);
++      qdisc_tree_reduce_backlog(cl->qdisc, len, backlog);
+ }
+ static const struct nla_policy qfq_policy[TCA_QFQ_MAX + 1] = {
+--- a/net/sched/sch_red.c
++++ b/net/sched/sch_red.c
+@@ -210,7 +210,8 @@ static int red_change(struct Qdisc *sch,
+       q->flags = ctl->flags;
+       q->limit = ctl->limit;
+       if (child) {
+-              qdisc_tree_decrease_qlen(q->qdisc, q->qdisc->q.qlen);
++              qdisc_tree_reduce_backlog(q->qdisc, q->qdisc->q.qlen,
++                                        q->qdisc->qstats.backlog);
+               qdisc_destroy(q->qdisc);
+               q->qdisc = child;
+       }
+--- a/net/sched/sch_sfb.c
++++ b/net/sched/sch_sfb.c
+@@ -510,7 +510,8 @@ static int sfb_change(struct Qdisc *sch,
+       sch_tree_lock(sch);
+-      qdisc_tree_decrease_qlen(q->qdisc, q->qdisc->q.qlen);
++      qdisc_tree_reduce_backlog(q->qdisc, q->qdisc->q.qlen,
++                                q->qdisc->qstats.backlog);
+       qdisc_destroy(q->qdisc);
+       q->qdisc = child;
+--- a/net/sched/sch_sfq.c
++++ b/net/sched/sch_sfq.c
+@@ -346,7 +346,7 @@ static int
+ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
+ {
+       struct sfq_sched_data *q = qdisc_priv(sch);
+-      unsigned int hash;
++      unsigned int hash, dropped;
+       sfq_index x, qlen;
+       struct sfq_slot *slot;
+       int uninitialized_var(ret);
+@@ -461,7 +461,7 @@ enqueue:
+               return NET_XMIT_SUCCESS;
+       qlen = slot->qlen;
+-      sfq_drop(sch);
++      dropped = sfq_drop(sch);
+       /* Return Congestion Notification only if we dropped a packet
+        * from this flow.
+        */
+@@ -469,7 +469,7 @@ enqueue:
+               return NET_XMIT_CN;
+       /* As we dropped a packet, better let upper stack know this */
+-      qdisc_tree_decrease_qlen(sch, 1);
++      qdisc_tree_reduce_backlog(sch, 1, dropped);
+       return NET_XMIT_SUCCESS;
+ }
+@@ -537,6 +537,7 @@ static void sfq_rehash(struct Qdisc *sch
+       struct sfq_slot *slot;
+       struct sk_buff_head list;
+       int dropped = 0;
++      unsigned int drop_len = 0;
+       __skb_queue_head_init(&list);
+@@ -565,6 +566,7 @@ static void sfq_rehash(struct Qdisc *sch
+                       if (x >= SFQ_MAX_FLOWS) {
+ drop:
+                               qdisc_qstats_backlog_dec(sch, skb);
++                              drop_len += qdisc_pkt_len(skb);
+                               kfree_skb(skb);
+                               dropped++;
+                               continue;
+@@ -594,7 +596,7 @@ drop:
+               }
+       }
+       sch->q.qlen -= dropped;
+-      qdisc_tree_decrease_qlen(sch, dropped);
++      qdisc_tree_reduce_backlog(sch, dropped, drop_len);
+ }
+ static void sfq_perturbation(unsigned long arg)
+@@ -618,7 +620,7 @@ static int sfq_change(struct Qdisc *sch,
+       struct sfq_sched_data *q = qdisc_priv(sch);
+       struct tc_sfq_qopt *ctl = nla_data(opt);
+       struct tc_sfq_qopt_v1 *ctl_v1 = NULL;
+-      unsigned int qlen;
++      unsigned int qlen, dropped = 0;
+       struct red_parms *p = NULL;
+       if (opt->nla_len < nla_attr_size(sizeof(*ctl)))
+@@ -667,8 +669,8 @@ static int sfq_change(struct Qdisc *sch,
+       qlen = sch->q.qlen;
+       while (sch->q.qlen > q->limit)
+-              sfq_drop(sch);
+-      qdisc_tree_decrease_qlen(sch, qlen - sch->q.qlen);
++              dropped += sfq_drop(sch);
++      qdisc_tree_reduce_backlog(sch, qlen - sch->q.qlen, dropped);
+       del_timer(&q->perturb_timer);
+       if (q->perturb_period) {
+--- a/net/sched/sch_tbf.c
++++ b/net/sched/sch_tbf.c
+@@ -160,6 +160,7 @@ static int tbf_segment(struct sk_buff *s
+       struct tbf_sched_data *q = qdisc_priv(sch);
+       struct sk_buff *segs, *nskb;
+       netdev_features_t features = netif_skb_features(skb);
++      unsigned int len = 0, prev_len = qdisc_pkt_len(skb);
+       int ret, nb;
+       segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
+@@ -172,6 +173,7 @@ static int tbf_segment(struct sk_buff *s
+               nskb = segs->next;
+               segs->next = NULL;
+               qdisc_skb_cb(segs)->pkt_len = segs->len;
++              len += segs->len;
+               ret = qdisc_enqueue(segs, q->qdisc);
+               if (ret != NET_XMIT_SUCCESS) {
+                       if (net_xmit_drop_count(ret))
+@@ -183,7 +185,7 @@ static int tbf_segment(struct sk_buff *s
+       }
+       sch->q.qlen += nb;
+       if (nb > 1)
+-              qdisc_tree_decrease_qlen(sch, 1 - nb);
++              qdisc_tree_reduce_backlog(sch, 1 - nb, prev_len - len);
+       consume_skb(skb);
+       return nb > 0 ? NET_XMIT_SUCCESS : NET_XMIT_DROP;
+ }
+@@ -399,7 +401,8 @@ static int tbf_change(struct Qdisc *sch,
+       sch_tree_lock(sch);
+       if (child) {
+-              qdisc_tree_decrease_qlen(q->qdisc, q->qdisc->q.qlen);
++              qdisc_tree_reduce_backlog(q->qdisc, q->qdisc->q.qlen,
++                                        q->qdisc->qstats.backlog);
+               qdisc_destroy(q->qdisc);
+               q->qdisc = child;
+       }
diff --git a/queue-4.4/netem-segment-gso-packets-on-enqueue.patch b/queue-4.4/netem-segment-gso-packets-on-enqueue.patch
new file mode 100644 (file)
index 0000000..5c6bd8c
--- /dev/null
@@ -0,0 +1,166 @@
+From foo@baz Mon May 16 11:21:32 PDT 2016
+From: Neil Horman <nhorman@tuxdriver.com>
+Date: Mon, 2 May 2016 12:20:15 -0400
+Subject: netem: Segment GSO packets on enqueue
+
+From: Neil Horman <nhorman@tuxdriver.com>
+
+[ Upstream commit 6071bd1aa13ed9e41824bafad845b7b7f4df5cfd ]
+
+This was recently reported to me, and reproduced on the latest net kernel,
+when attempting to run netperf from a host that had a netem qdisc attached
+to the egress interface:
+
+[  788.073771] ---------------------[ cut here ]---------------------------
+[  788.096716] WARNING: at net/core/dev.c:2253 skb_warn_bad_offload+0xcd/0xda()
+[  788.129521] bnx2: caps=(0x00000001801949b3, 0x0000000000000000) len=2962
+data_len=0 gso_size=1448 gso_type=1 ip_summed=3
+[  788.182150] Modules linked in: sch_netem kvm_amd kvm crc32_pclmul ipmi_ssif
+ghash_clmulni_intel sp5100_tco amd64_edac_mod aesni_intel lrw gf128mul
+glue_helper ablk_helper edac_mce_amd cryptd pcspkr sg edac_core hpilo ipmi_si
+i2c_piix4 k10temp fam15h_power hpwdt ipmi_msghandler shpchp acpi_power_meter
+pcc_cpufreq nfsd auth_rpcgss nfs_acl lockd grace sunrpc ip_tables xfs libcrc32c
+sd_mod crc_t10dif crct10dif_generic mgag200 syscopyarea sysfillrect sysimgblt
+i2c_algo_bit drm_kms_helper ahci ata_generic pata_acpi ttm libahci
+crct10dif_pclmul pata_atiixp tg3 libata crct10dif_common drm crc32c_intel ptp
+serio_raw bnx2 r8169 hpsa pps_core i2c_core mii dm_mirror dm_region_hash dm_log
+dm_mod
+[  788.465294] CPU: 16 PID: 0 Comm: swapper/16 Tainted: G        W
+------------   3.10.0-327.el7.x86_64 #1
+[  788.511521] Hardware name: HP ProLiant DL385p Gen8, BIOS A28 12/17/2012
+[  788.542260]  ffff880437c036b8 f7afc56532a53db9 ffff880437c03670
+ffffffff816351f1
+[  788.576332]  ffff880437c036a8 ffffffff8107b200 ffff880633e74200
+ffff880231674000
+[  788.611943]  0000000000000001 0000000000000003 0000000000000000
+ffff880437c03710
+[  788.647241] Call Trace:
+[  788.658817]  <IRQ>  [<ffffffff816351f1>] dump_stack+0x19/0x1b
+[  788.686193]  [<ffffffff8107b200>] warn_slowpath_common+0x70/0xb0
+[  788.713803]  [<ffffffff8107b29c>] warn_slowpath_fmt+0x5c/0x80
+[  788.741314]  [<ffffffff812f92f3>] ? ___ratelimit+0x93/0x100
+[  788.767018]  [<ffffffff81637f49>] skb_warn_bad_offload+0xcd/0xda
+[  788.796117]  [<ffffffff8152950c>] skb_checksum_help+0x17c/0x190
+[  788.823392]  [<ffffffffa01463a1>] netem_enqueue+0x741/0x7c0 [sch_netem]
+[  788.854487]  [<ffffffff8152cb58>] dev_queue_xmit+0x2a8/0x570
+[  788.880870]  [<ffffffff8156ae1d>] ip_finish_output+0x53d/0x7d0
+...
+
+The problem occurs because netem is not prepared to handle GSO packets (as it
+uses skb_checksum_help in its enqueue path, which cannot manipulate these
+frames).
+
+The solution I think is to simply segment the skb in a simmilar fashion to the
+way we do in __dev_queue_xmit (via validate_xmit_skb), with some minor changes.
+When we decide to corrupt an skb, if the frame is GSO, we segment it, corrupt
+the first segment, and enqueue the remaining ones.
+
+tested successfully by myself on the latest net kernel, to which this applies
+
+Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
+CC: Jamal Hadi Salim <jhs@mojatatu.com>
+CC: "David S. Miller" <davem@davemloft.net>
+CC: netem@lists.linux-foundation.org
+CC: eric.dumazet@gmail.com
+CC: stephen@networkplumber.org
+Acked-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sched/sch_netem.c |   61 ++++++++++++++++++++++++++++++++++++++++++++++++--
+ 1 file changed, 59 insertions(+), 2 deletions(-)
+
+--- a/net/sched/sch_netem.c
++++ b/net/sched/sch_netem.c
+@@ -395,6 +395,25 @@ static void tfifo_enqueue(struct sk_buff
+       sch->q.qlen++;
+ }
++/* netem can't properly corrupt a megapacket (like we get from GSO), so instead
++ * when we statistically choose to corrupt one, we instead segment it, returning
++ * the first packet to be corrupted, and re-enqueue the remaining frames
++ */
++static struct sk_buff *netem_segment(struct sk_buff *skb, struct Qdisc *sch)
++{
++      struct sk_buff *segs;
++      netdev_features_t features = netif_skb_features(skb);
++
++      segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
++
++      if (IS_ERR_OR_NULL(segs)) {
++              qdisc_reshape_fail(skb, sch);
++              return NULL;
++      }
++      consume_skb(skb);
++      return segs;
++}
++
+ /*
+  * Insert one skb into qdisc.
+  * Note: parent depends on return value to account for queue length.
+@@ -407,7 +426,11 @@ static int netem_enqueue(struct sk_buff
+       /* We don't fill cb now as skb_unshare() may invalidate it */
+       struct netem_skb_cb *cb;
+       struct sk_buff *skb2;
++      struct sk_buff *segs = NULL;
++      unsigned int len = 0, last_len, prev_len = qdisc_pkt_len(skb);
++      int nb = 0;
+       int count = 1;
++      int rc = NET_XMIT_SUCCESS;
+       /* Random duplication */
+       if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor))
+@@ -453,10 +476,23 @@ static int netem_enqueue(struct sk_buff
+        * do it now in software before we mangle it.
+        */
+       if (q->corrupt && q->corrupt >= get_crandom(&q->corrupt_cor)) {
++              if (skb_is_gso(skb)) {
++                      segs = netem_segment(skb, sch);
++                      if (!segs)
++                              return NET_XMIT_DROP;
++              } else {
++                      segs = skb;
++              }
++
++              skb = segs;
++              segs = segs->next;
++
+               if (!(skb = skb_unshare(skb, GFP_ATOMIC)) ||
+                   (skb->ip_summed == CHECKSUM_PARTIAL &&
+-                   skb_checksum_help(skb)))
+-                      return qdisc_drop(skb, sch);
++                   skb_checksum_help(skb))) {
++                      rc = qdisc_drop(skb, sch);
++                      goto finish_segs;
++              }
+               skb->data[prandom_u32() % skb_headlen(skb)] ^=
+                       1<<(prandom_u32() % 8);
+@@ -516,6 +552,27 @@ static int netem_enqueue(struct sk_buff
+               sch->qstats.requeues++;
+       }
++finish_segs:
++      if (segs) {
++              while (segs) {
++                      skb2 = segs->next;
++                      segs->next = NULL;
++                      qdisc_skb_cb(segs)->pkt_len = segs->len;
++                      last_len = segs->len;
++                      rc = qdisc_enqueue(segs, sch);
++                      if (rc != NET_XMIT_SUCCESS) {
++                              if (net_xmit_drop_count(rc))
++                                      qdisc_qstats_drop(sch);
++                      } else {
++                              nb++;
++                              len += last_len;
++                      }
++                      segs = skb2;
++              }
++              sch->q.qlen += nb;
++              if (nb > 1)
++                      qdisc_tree_reduce_backlog(sch, 1 - nb, prev_len - len);
++      }
+       return NET_XMIT_SUCCESS;
+ }
diff --git a/queue-4.4/openvswitch-use-flow-protocol-when-recalculating-ipv6-checksums.patch b/queue-4.4/openvswitch-use-flow-protocol-when-recalculating-ipv6-checksums.patch
new file mode 100644 (file)
index 0000000..27bcd75
--- /dev/null
@@ -0,0 +1,45 @@
+From foo@baz Mon May 16 11:21:32 PDT 2016
+From: Simon Horman <simon.horman@netronome.com>
+Date: Thu, 21 Apr 2016 11:49:15 +1000
+Subject: openvswitch: use flow protocol when recalculating ipv6 checksums
+
+From: Simon Horman <simon.horman@netronome.com>
+
+[ Upstream commit b4f70527f052b0c00be4d7cac562baa75b212df5 ]
+
+When using masked actions the ipv6_proto field of an action
+to set IPv6 fields may be zero rather than the prevailing protocol
+which will result in skipping checksum recalculation.
+
+This patch resolves the problem by relying on the protocol
+in the flow key rather than that in the set field action.
+
+Fixes: 83d2b9ba1abc ("net: openvswitch: Support masked set actions.")
+Cc: Jarno Rajahalme <jrajahalme@nicira.com>
+Signed-off-by: Simon Horman <simon.horman@netronome.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/openvswitch/actions.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/net/openvswitch/actions.c
++++ b/net/openvswitch/actions.c
+@@ -461,7 +461,7 @@ static int set_ipv6(struct sk_buff *skb,
+               mask_ipv6_addr(saddr, key->ipv6_src, mask->ipv6_src, masked);
+               if (unlikely(memcmp(saddr, masked, sizeof(masked)))) {
+-                      set_ipv6_addr(skb, key->ipv6_proto, saddr, masked,
++                      set_ipv6_addr(skb, flow_key->ip.proto, saddr, masked,
+                                     true);
+                       memcpy(&flow_key->ipv6.addr.src, masked,
+                              sizeof(flow_key->ipv6.addr.src));
+@@ -483,7 +483,7 @@ static int set_ipv6(struct sk_buff *skb,
+                                                            NULL, &flags)
+                                              != NEXTHDR_ROUTING);
+-                      set_ipv6_addr(skb, key->ipv6_proto, daddr, masked,
++                      set_ipv6_addr(skb, flow_key->ip.proto, daddr, masked,
+                                     recalc_csum);
+                       memcpy(&flow_key->ipv6.addr.dst, masked,
+                              sizeof(flow_key->ipv6.addr.dst));
diff --git a/queue-4.4/packet-fix-heap-info-leak-in-packet_diag_mclist-sock_diag-interface.patch b/queue-4.4/packet-fix-heap-info-leak-in-packet_diag_mclist-sock_diag-interface.patch
new file mode 100644 (file)
index 0000000..ab1b814
--- /dev/null
@@ -0,0 +1,36 @@
+From foo@baz Mon May 16 11:21:32 PDT 2016
+From: Mathias Krause <minipli@googlemail.com>
+Date: Sun, 10 Apr 2016 12:52:28 +0200
+Subject: packet: fix heap info leak in PACKET_DIAG_MCLIST sock_diag interface
+
+From: Mathias Krause <minipli@googlemail.com>
+
+[ Upstream commit 309cf37fe2a781279b7675d4bb7173198e532867 ]
+
+Because we miss to wipe the remainder of i->addr[] in packet_mc_add(),
+pdiag_put_mclist() leaks uninitialized heap bytes via the
+PACKET_DIAG_MCLIST netlink attribute.
+
+Fix this by explicitly memset(0)ing the remaining bytes in i->addr[].
+
+Fixes: eea68e2f1a00 ("packet: Report socket mclist info via diag module")
+Signed-off-by: Mathias Krause <minipli@googlemail.com>
+Cc: Eric W. Biederman <ebiederm@xmission.com>
+Cc: Pavel Emelyanov <xemul@parallels.com>
+Acked-by: Pavel Emelyanov <xemul@virtuozzo.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/packet/af_packet.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/net/packet/af_packet.c
++++ b/net/packet/af_packet.c
+@@ -3436,6 +3436,7 @@ static int packet_mc_add(struct sock *sk
+       i->ifindex = mreq->mr_ifindex;
+       i->alen = mreq->mr_alen;
+       memcpy(i->addr, mreq->mr_address, i->alen);
++      memset(i->addr + i->alen, 0, sizeof(i->addr) - i->alen);
+       i->count = 1;
+       i->next = po->mclist;
+       po->mclist = i;
diff --git a/queue-4.4/route-do-not-cache-fib-route-info-on-local-routes-with-oif.patch b/queue-4.4/route-do-not-cache-fib-route-info-on-local-routes-with-oif.patch
new file mode 100644 (file)
index 0000000..b30feeb
--- /dev/null
@@ -0,0 +1,59 @@
+From foo@baz Mon May 16 11:21:32 PDT 2016
+From: Chris Friesen <chris.friesen@windriver.com>
+Date: Fri, 8 Apr 2016 15:21:30 -0600
+Subject: route: do not cache fib route info on local routes with oif
+
+From: Chris Friesen <chris.friesen@windriver.com>
+
+[ Upstream commit d6d5e999e5df67f8ec20b6be45e2229455ee3699 ]
+
+For local routes that require a particular output interface we do not want
+to cache the result.  Caching the result causes incorrect behaviour when
+there are multiple source addresses on the interface.  The end result
+being that if the intended recipient is waiting on that interface for the
+packet he won't receive it because it will be delivered on the loopback
+interface and the IP_PKTINFO ipi_ifindex will be set to the loopback
+interface as well.
+
+This can be tested by running a program such as "dhcp_release" which
+attempts to inject a packet on a particular interface so that it is
+received by another program on the same board.  The receiving process
+should see an IP_PKTINFO ipi_ifndex value of the source interface
+(e.g., eth1) instead of the loopback interface (e.g., lo).  The packet
+will still appear on the loopback interface in tcpdump but the important
+aspect is that the CMSG info is correct.
+
+Sample dhcp_release command line:
+
+   dhcp_release eth1 192.168.204.222 02:11:33:22:44:66
+
+Signed-off-by: Allain Legacy <allain.legacy@windriver.com>
+Signed off-by: Chris Friesen <chris.friesen@windriver.com>
+Reviewed-by: Julian Anastasov <ja@ssi.bg>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/route.c |   12 ++++++++++++
+ 1 file changed, 12 insertions(+)
+
+--- a/net/ipv4/route.c
++++ b/net/ipv4/route.c
+@@ -2045,6 +2045,18 @@ static struct rtable *__mkroute_output(c
+                */
+               if (fi && res->prefixlen < 4)
+                       fi = NULL;
++      } else if ((type == RTN_LOCAL) && (orig_oif != 0) &&
++                 (orig_oif != dev_out->ifindex)) {
++              /* For local routes that require a particular output interface
++               * we do not want to cache the result.  Caching the result
++               * causes incorrect behaviour when there are multiple source
++               * addresses on the interface, the end result being that if the
++               * intended recipient is waiting on that interface for the
++               * packet he won't receive it because it will be delivered on
++               * the loopback interface and the IP_PKTINFO ipi_ifindex will
++               * be set to the loopback interface as well.
++               */
++              fi = NULL;
+       }
+       fnhe = NULL;
diff --git a/queue-4.4/samples-bpf-fix-trace_output-example.patch b/queue-4.4/samples-bpf-fix-trace_output-example.patch
new file mode 100644 (file)
index 0000000..1f7c5ec
--- /dev/null
@@ -0,0 +1,31 @@
+From foo@baz Mon May 16 11:21:32 PDT 2016
+From: Alexei Starovoitov <ast@fb.com>
+Date: Wed, 27 Apr 2016 18:56:22 -0700
+Subject: samples/bpf: fix trace_output example
+
+From: Alexei Starovoitov <ast@fb.com>
+
+[ Upstream commit 569cc39d39385a74b23145496bca2df5ac8b2fb8 ]
+
+llvm cannot always recognize memset as builtin function and optimize
+it away, so just delete it. It was a leftover from testing
+of bpf_perf_event_output() with large data structures.
+
+Fixes: 39111695b1b8 ("samples: bpf: add bpf_perf_event_output example")
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ samples/bpf/trace_output_kern.c |    1 -
+ 1 file changed, 1 deletion(-)
+
+--- a/samples/bpf/trace_output_kern.c
++++ b/samples/bpf/trace_output_kern.c
+@@ -18,7 +18,6 @@ int bpf_prog1(struct pt_regs *ctx)
+               u64 cookie;
+       } data;
+-      memset(&data, 0, sizeof(data));
+       data.pid = bpf_get_current_pid_tgid();
+       data.cookie = 0x12345678;
diff --git a/queue-4.4/sch_dsmark-update-backlog-as-well.patch b/queue-4.4/sch_dsmark-update-backlog-as-well.patch
new file mode 100644 (file)
index 0000000..747fe57
--- /dev/null
@@ -0,0 +1,45 @@
+From foo@baz Mon May 16 11:21:32 PDT 2016
+From: WANG Cong <xiyou.wangcong@gmail.com>
+Date: Thu, 25 Feb 2016 14:55:03 -0800
+Subject: sch_dsmark: update backlog as well
+
+From: WANG Cong <xiyou.wangcong@gmail.com>
+
+[ Upstream commit bdf17661f63a79c3cb4209b970b1cc39e34f7543 ]
+
+Similarly, we need to update backlog too when we update qlen.
+
+Cc: Jamal Hadi Salim <jhs@mojatatu.com>
+Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sched/sch_dsmark.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/net/sched/sch_dsmark.c
++++ b/net/sched/sch_dsmark.c
+@@ -258,6 +258,7 @@ static int dsmark_enqueue(struct sk_buff
+               return err;
+       }
++      qdisc_qstats_backlog_inc(sch, skb);
+       sch->q.qlen++;
+       return NET_XMIT_SUCCESS;
+@@ -280,6 +281,7 @@ static struct sk_buff *dsmark_dequeue(st
+               return NULL;
+       qdisc_bstats_update(sch, skb);
++      qdisc_qstats_backlog_dec(sch, skb);
+       sch->q.qlen--;
+       index = skb->tc_index & (p->indices - 1);
+@@ -395,6 +397,7 @@ static void dsmark_reset(struct Qdisc *s
+       pr_debug("%s(sch %p,[qdisc %p])\n", __func__, sch, p);
+       qdisc_reset(p->q);
++      sch->qstats.backlog = 0;
+       sch->q.qlen = 0;
+ }
diff --git a/queue-4.4/sch_htb-update-backlog-as-well.patch b/queue-4.4/sch_htb-update-backlog-as-well.patch
new file mode 100644 (file)
index 0000000..3192395
--- /dev/null
@@ -0,0 +1,68 @@
+From foo@baz Mon May 16 11:21:32 PDT 2016
+From: WANG Cong <xiyou.wangcong@gmail.com>
+Date: Thu, 25 Feb 2016 14:55:02 -0800
+Subject: sch_htb: update backlog as well
+
+From: WANG Cong <xiyou.wangcong@gmail.com>
+
+[ Upstream commit 431e3a8e36a05a37126f34b41aa3a5a6456af04e ]
+
+We saw qlen!=0 but backlog==0 on our production machine:
+
+qdisc htb 1: dev eth0 root refcnt 2 r2q 10 default 1 direct_packets_stat 0 ver 3.17
+ Sent 172680457356 bytes 222469449 pkt (dropped 0, overlimits 123575834 requeues 0)
+ backlog 0b 72p requeues 0
+
+The problem is we only count qlen for HTB qdisc but not backlog.
+We need to update backlog too when we update qlen, so that we
+can at least know the average packet length.
+
+Cc: Jamal Hadi Salim <jhs@mojatatu.com>
+Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
+Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sched/sch_htb.c |    5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+--- a/net/sched/sch_htb.c
++++ b/net/sched/sch_htb.c
+@@ -600,6 +600,7 @@ static int htb_enqueue(struct sk_buff *s
+               htb_activate(q, cl);
+       }
++      qdisc_qstats_backlog_inc(sch, skb);
+       sch->q.qlen++;
+       return NET_XMIT_SUCCESS;
+ }
+@@ -889,6 +890,7 @@ static struct sk_buff *htb_dequeue(struc
+ ok:
+               qdisc_bstats_update(sch, skb);
+               qdisc_unthrottled(sch);
++              qdisc_qstats_backlog_dec(sch, skb);
+               sch->q.qlen--;
+               return skb;
+       }
+@@ -955,6 +957,7 @@ static unsigned int htb_drop(struct Qdis
+                       unsigned int len;
+                       if (cl->un.leaf.q->ops->drop &&
+                           (len = cl->un.leaf.q->ops->drop(cl->un.leaf.q))) {
++                              sch->qstats.backlog -= len;
+                               sch->q.qlen--;
+                               if (!cl->un.leaf.q->q.qlen)
+                                       htb_deactivate(q, cl);
+@@ -984,12 +987,12 @@ static void htb_reset(struct Qdisc *sch)
+                       }
+                       cl->prio_activity = 0;
+                       cl->cmode = HTB_CAN_SEND;
+-
+               }
+       }
+       qdisc_watchdog_cancel(&q->watchdog);
+       __skb_queue_purge(&q->direct_queue);
+       sch->q.qlen = 0;
++      sch->qstats.backlog = 0;
+       memset(q->hlevel, 0, sizeof(q->hlevel));
+       memset(q->row_mask, 0, sizeof(q->row_mask));
+       for (i = 0; i < TC_HTB_NUMPRIO; i++)
index 5cc2dc36ff42325b42ddddd84877dca6a4dd2224..eb6b070c7444837774377f721856a3ca532ab394 100644 (file)
@@ -1,3 +1,41 @@
+decnet-do-not-build-routes-to-devices-without-decnet-private-data.patch
+route-do-not-cache-fib-route-info-on-local-routes-with-oif.patch
+packet-fix-heap-info-leak-in-packet_diag_mclist-sock_diag-interface.patch
+net-sched-do-not-requeue-a-null-skb.patch
+bpf-verifier-reject-invalid-ld_abs-bpf_dw-instruction.patch
+cdc_mbim-apply-ndp-to-end-quirk-to-all-huawei-devices.patch
+soreuseport-fix-ordering-for-mixed-v4-v6-sockets.patch
+net-use-skb_postpush_rcsum-instead-of-own-implementations.patch
+vlan-pull-on-__vlan_insert_tag-error-path-and-fix-csum-correction.patch
+atl2-disable-unimplemented-scatter-gather-feature.patch
+openvswitch-use-flow-protocol-when-recalculating-ipv6-checksums.patch
+net-mlx5e-device-s-mtu-field-is-u16-and-not-int.patch
+net-mlx5e-fix-minimum-mtu.patch
+ipv4-fib-don-t-warn-when-primary-address-is-missing-if-in_dev-is-dead.patch
+net-mlx4_en-fix-spurious-timestamping-callbacks.patch
+bpf-fix-double-fdput-in-replace_map_fd_with_map_ptr.patch
+bpf-fix-refcnt-overflow.patch
+bpf-fix-check_map_func_compatibility-logic.patch
+samples-bpf-fix-trace_output-example.patch
+net-implement-net_dbg_ratelimited-for-config_dynamic_debug-case.patch
+gre-do-not-pull-header-in-icmp-error-processing.patch
+net_sched-introduce-qdisc_replace-helper.patch
+net_sched-update-hierarchical-backlog-too.patch
+sch_htb-update-backlog-as-well.patch
+sch_dsmark-update-backlog-as-well.patch
+netem-segment-gso-packets-on-enqueue.patch
+net-fec-only-clear-a-queue-s-work-bit-if-the-queue-was-emptied.patch
+net-fix-infoleak-in-llc.patch
+net-fix-infoleak-in-rtnetlink.patch
+net-mlx4_en-fix-endianness-bug-in-ipv6-csum-calculation.patch
+vsock-do-not-disconnect-socket-when-peer-has-shutdown-send-only.patch
+net-bridge-fix-old-ioctl-unlocked-net-device-walk.patch
+bridge-fix-igmp-mld-query-parsing.patch
+uapi-glibc-compat-fix-compile-errors-when-glibc-net-if.h-included-before-linux-if.h-mime-version-1.0.patch
+net-fix-a-kernel-infoleak-in-x25-module.patch
+net-thunderx-avoid-exposing-kernel-stack.patch
+tcp-refresh-skb-timestamp-at-retransmit-time.patch
+net-route-enforce-hoplimit-max-value.patch
 ocfs2-revert-using-ocfs2_acl_chmod-to-avoid-inode-cluster-lock-hang.patch
 ocfs2-fix-posix_acl_create-deadlock.patch
 zsmalloc-fix-zs_can_compact-integer-overflow.patch
@@ -28,3 +66,7 @@ tools-lib-traceevent-do-not-reassign-parg-after-collapse_tree.patch
 get_rock_ridge_filename-handle-malformed-nm-entries.patch
 input-max8997-haptic-fix-null-pointer-dereference.patch
 revert-videobuf2-v4l2-verify-planes-array-in-buffer-dequeueing.patch
+drm-radeon-fix-pll-sharing-on-dce6.1-v2.patch
+drm-i915-bail-out-of-pipe-config-compute-loop-on-lpt.patch
+drm-i915-bdw-add-missing-delay-during-l3-sqc-credit-programming.patch
+drm-radeon-fix-dp-link-training-issue-with-second-4k-monitor.patch
diff --git a/queue-4.4/soreuseport-fix-ordering-for-mixed-v4-v6-sockets.patch b/queue-4.4/soreuseport-fix-ordering-for-mixed-v4-v6-sockets.patch
new file mode 100644 (file)
index 0000000..7965b08
--- /dev/null
@@ -0,0 +1,130 @@
+From foo@baz Mon May 16 11:21:32 PDT 2016
+From: Craig Gallek <kraig@google.com>
+Date: Tue, 12 Apr 2016 13:11:25 -0400
+Subject: soreuseport: fix ordering for mixed v4/v6 sockets
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Craig Gallek <kraig@google.com>
+
+[ Upstream commit d894ba18d4e449b3a7f6eb491f16c9e02933736e ]
+
+With the SO_REUSEPORT socket option, it is possible to create sockets
+in the AF_INET and AF_INET6 domains which are bound to the same IPv4 address.
+This is only possible with SO_REUSEPORT and when not using IPV6_V6ONLY on
+the AF_INET6 sockets.
+
+Prior to the commits referenced below, an incoming IPv4 packet would
+always be routed to a socket of type AF_INET when this mixed-mode was used.
+After those changes, the same packet would be routed to the most recently
+bound socket (if this happened to be an AF_INET6 socket, it would
+have an IPv4 mapped IPv6 address).
+
+The change in behavior occurred because the recent SO_REUSEPORT optimizations
+short-circuit the socket scoring logic as soon as they find a match.  They
+did not take into account the scoring logic that favors AF_INET sockets
+over AF_INET6 sockets in the event of a tie.
+
+To fix this problem, this patch changes the insertion order of AF_INET
+and AF_INET6 addresses in the TCP and UDP socket lists when the sockets
+have SO_REUSEPORT set.  AF_INET sockets will be inserted at the head of the
+list and AF_INET6 sockets with SO_REUSEPORT set will always be inserted at
+the tail of the list.  This will force AF_INET sockets to always be
+considered first.
+
+Fixes: e32ea7e74727 ("soreuseport: fast reuseport UDP socket selection")
+Fixes: 125e80b88687 ("soreuseport: fast reuseport TCP socket selection")
+
+Reported-by: Maciej Żenczykowski <maze@google.com>
+Signed-off-by: Craig Gallek <kraig@google.com>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/rculist_nulls.h |   39 +++++++++++++++++++++++++++++++++++++++
+ include/net/sock.h            |    6 +++++-
+ net/ipv4/udp.c                |    9 +++++++--
+ 3 files changed, 51 insertions(+), 3 deletions(-)
+
+--- a/include/linux/rculist_nulls.h
++++ b/include/linux/rculist_nulls.h
+@@ -98,6 +98,45 @@ static inline void hlist_nulls_add_head_
+       if (!is_a_nulls(first))
+               first->pprev = &n->next;
+ }
++
++/**
++ * hlist_nulls_add_tail_rcu
++ * @n: the element to add to the hash list.
++ * @h: the list to add to.
++ *
++ * Description:
++ * Adds the specified element to the end of the specified hlist_nulls,
++ * while permitting racing traversals.  NOTE: tail insertion requires
++ * list traversal.
++ *
++ * The caller must take whatever precautions are necessary
++ * (such as holding appropriate locks) to avoid racing
++ * with another list-mutation primitive, such as hlist_nulls_add_head_rcu()
++ * or hlist_nulls_del_rcu(), running on this same list.
++ * However, it is perfectly legal to run concurrently with
++ * the _rcu list-traversal primitives, such as
++ * hlist_nulls_for_each_entry_rcu(), used to prevent memory-consistency
++ * problems on Alpha CPUs.  Regardless of the type of CPU, the
++ * list-traversal primitive must be guarded by rcu_read_lock().
++ */
++static inline void hlist_nulls_add_tail_rcu(struct hlist_nulls_node *n,
++                                      struct hlist_nulls_head *h)
++{
++      struct hlist_nulls_node *i, *last = NULL;
++
++      for (i = hlist_nulls_first_rcu(h); !is_a_nulls(i);
++           i = hlist_nulls_next_rcu(i))
++              last = i;
++
++      if (last) {
++              n->next = last->next;
++              n->pprev = &last->next;
++              rcu_assign_pointer(hlist_nulls_next_rcu(last), n);
++      } else {
++              hlist_nulls_add_head_rcu(n, h);
++      }
++}
++
+ /**
+  * hlist_nulls_for_each_entry_rcu - iterate over rcu list of given type
+  * @tpos:     the type * to use as a loop cursor.
+--- a/include/net/sock.h
++++ b/include/net/sock.h
+@@ -649,7 +649,11 @@ static inline void sk_add_node_rcu(struc
+ static inline void __sk_nulls_add_node_rcu(struct sock *sk, struct hlist_nulls_head *list)
+ {
+-      hlist_nulls_add_head_rcu(&sk->sk_nulls_node, list);
++      if (IS_ENABLED(CONFIG_IPV6) && sk->sk_reuseport &&
++          sk->sk_family == AF_INET6)
++              hlist_nulls_add_tail_rcu(&sk->sk_nulls_node, list);
++      else
++              hlist_nulls_add_head_rcu(&sk->sk_nulls_node, list);
+ }
+ static inline void sk_nulls_add_node_rcu(struct sock *sk, struct hlist_nulls_head *list)
+--- a/net/ipv4/udp.c
++++ b/net/ipv4/udp.c
+@@ -296,8 +296,13 @@ found:
+               hslot2 = udp_hashslot2(udptable, udp_sk(sk)->udp_portaddr_hash);
+               spin_lock(&hslot2->lock);
+-              hlist_nulls_add_head_rcu(&udp_sk(sk)->udp_portaddr_node,
+-                                       &hslot2->head);
++              if (IS_ENABLED(CONFIG_IPV6) && sk->sk_reuseport &&
++                      sk->sk_family == AF_INET6)
++                      hlist_nulls_add_tail_rcu(&udp_sk(sk)->udp_portaddr_node,
++                                               &hslot2->head);
++              else
++                      hlist_nulls_add_head_rcu(&udp_sk(sk)->udp_portaddr_node,
++                                               &hslot2->head);
+               hslot2->count++;
+               spin_unlock(&hslot2->lock);
+       }
diff --git a/queue-4.4/tcp-refresh-skb-timestamp-at-retransmit-time.patch b/queue-4.4/tcp-refresh-skb-timestamp-at-retransmit-time.patch
new file mode 100644 (file)
index 0000000..d7013e2
--- /dev/null
@@ -0,0 +1,39 @@
+From foo@baz Mon May 16 11:21:32 PDT 2016
+From: Eric Dumazet <edumazet@google.com>
+Date: Mon, 9 May 2016 20:55:16 -0700
+Subject: tcp: refresh skb timestamp at retransmit time
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 10a81980fc47e64ffac26a073139813d3f697b64 ]
+
+In the very unlikely case __tcp_retransmit_skb() can not use the cloning
+done in tcp_transmit_skb(), we need to refresh skb_mstamp before doing
+the copy and transmit, otherwise TCP TS val will be an exact copy of
+original transmit.
+
+Fixes: 7faee5c0d514 ("tcp: remove TCP_SKB_CB(skb)->when")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: Yuchung Cheng <ycheng@google.com>
+Acked-by: Yuchung Cheng <ycheng@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_output.c |    6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+--- a/net/ipv4/tcp_output.c
++++ b/net/ipv4/tcp_output.c
+@@ -2625,8 +2625,10 @@ int __tcp_retransmit_skb(struct sock *sk
+        */
+       if (unlikely((NET_IP_ALIGN && ((unsigned long)skb->data & 3)) ||
+                    skb_headroom(skb) >= 0xFFFF)) {
+-              struct sk_buff *nskb = __pskb_copy(skb, MAX_TCP_HEADER,
+-                                                 GFP_ATOMIC);
++              struct sk_buff *nskb;
++
++              skb_mstamp_get(&skb->skb_mstamp);
++              nskb = __pskb_copy(skb, MAX_TCP_HEADER, GFP_ATOMIC);
+               err = nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) :
+                            -ENOBUFS;
+       } else {
diff --git a/queue-4.4/uapi-glibc-compat-fix-compile-errors-when-glibc-net-if.h-included-before-linux-if.h-mime-version-1.0.patch b/queue-4.4/uapi-glibc-compat-fix-compile-errors-when-glibc-net-if.h-included-before-linux-if.h-mime-version-1.0.patch
new file mode 100644 (file)
index 0000000..3e72e7e
--- /dev/null
@@ -0,0 +1,243 @@
+From foo@baz Mon May 16 11:21:32 PDT 2016
+From: Mikko Rapeli <mikko.rapeli@iki.fi>
+Date: Sun, 24 Apr 2016 17:45:00 +0200
+Subject: uapi glibc compat: fix compile errors when glibc net/if.h included before linux/if.h MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Mikko Rapeli <mikko.rapeli@iki.fi>
+
+[ Upstream commit 4a91cb61bb995e5571098188092e296192309c77 ]
+
+glibc's net/if.h contains copies of definitions from linux/if.h and these
+conflict and cause build failures if both files are included by application
+source code. Changes in uapi headers, which fixed header file dependencies to
+include linux/if.h when it was needed, e.g. commit 1ffad83d, made the
+net/if.h and linux/if.h incompatibilities visible as build failures for
+userspace applications like iproute2 and xtables-addons.
+
+This patch fixes compile errors when glibc net/if.h is included before
+linux/if.h:
+
+./linux/if.h:99:21: error: redeclaration of enumerator ‘IFF_NOARP’
+./linux/if.h:98:23: error: redeclaration of enumerator ‘IFF_RUNNING’
+./linux/if.h:97:26: error: redeclaration of enumerator ‘IFF_NOTRAILERS’
+./linux/if.h:96:27: error: redeclaration of enumerator ‘IFF_POINTOPOINT’
+./linux/if.h:95:24: error: redeclaration of enumerator ‘IFF_LOOPBACK’
+./linux/if.h:94:21: error: redeclaration of enumerator ‘IFF_DEBUG’
+./linux/if.h:93:25: error: redeclaration of enumerator ‘IFF_BROADCAST’
+./linux/if.h:92:19: error: redeclaration of enumerator ‘IFF_UP’
+./linux/if.h:252:8: error: redefinition of ‘struct ifconf’
+./linux/if.h:203:8: error: redefinition of ‘struct ifreq’
+./linux/if.h:169:8: error: redefinition of ‘struct ifmap’
+./linux/if.h:107:23: error: redeclaration of enumerator ‘IFF_DYNAMIC’
+./linux/if.h:106:25: error: redeclaration of enumerator ‘IFF_AUTOMEDIA’
+./linux/if.h:105:23: error: redeclaration of enumerator ‘IFF_PORTSEL’
+./linux/if.h:104:25: error: redeclaration of enumerator ‘IFF_MULTICAST’
+./linux/if.h:103:21: error: redeclaration of enumerator ‘IFF_SLAVE’
+./linux/if.h:102:22: error: redeclaration of enumerator ‘IFF_MASTER’
+./linux/if.h:101:24: error: redeclaration of enumerator ‘IFF_ALLMULTI’
+./linux/if.h:100:23: error: redeclaration of enumerator ‘IFF_PROMISC’
+
+The cases where linux/if.h is included before net/if.h need a similar fix in
+the glibc side, or the order of include files can be changed userspace
+code as a workaround.
+
+This change was tested in x86 userspace on Debian unstable with
+scripts/headers_compile_test.sh:
+
+$ make headers_install && \
+  cd usr/include && ../../scripts/headers_compile_test.sh -l -k
+...
+cc -Wall -c -nostdinc -I /usr/lib/gcc/i586-linux-gnu/5/include -I /usr/lib/gcc/i586-linux-gnu/5/include-fixed -I . -I /home/mcfrisk/src/linux-2.6/usr/headers_compile_test_include.2uX2zH -I /home/mcfrisk/src/linux-2.6/usr/headers_compile_test_include.2uX2zH/i586-linux-gnu -o /dev/null ./linux/if.h_libc_before_kernel.h
+PASSED libc before kernel test: ./linux/if.h
+
+Reported-by: Jan Engelhardt <jengelh@inai.de>
+Reported-by: Josh Boyer <jwboyer@fedoraproject.org>
+Reported-by: Stephen Hemminger <shemming@brocade.com>
+Reported-by: Waldemar Brodkorb <mail@waldemar-brodkorb.de>
+Cc: Gabriel Laskar <gabriel@lse.epita.fr>
+Signed-off-by: Mikko Rapeli <mikko.rapeli@iki.fi>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/uapi/linux/if.h          |   28 ++++++++++++++++++++++++
+ include/uapi/linux/libc-compat.h |   44 +++++++++++++++++++++++++++++++++++++++
+ 2 files changed, 72 insertions(+)
+
+--- a/include/uapi/linux/if.h
++++ b/include/uapi/linux/if.h
+@@ -19,14 +19,20 @@
+ #ifndef _LINUX_IF_H
+ #define _LINUX_IF_H
++#include <linux/libc-compat.h>          /* for compatibility with glibc */
+ #include <linux/types.h>              /* for "__kernel_caddr_t" et al */
+ #include <linux/socket.h>             /* for "struct sockaddr" et al  */
+ #include <linux/compiler.h>           /* for "__user" et al           */
++#if __UAPI_DEF_IF_IFNAMSIZ
+ #define       IFNAMSIZ        16
++#endif /* __UAPI_DEF_IF_IFNAMSIZ */
+ #define       IFALIASZ        256
+ #include <linux/hdlc/ioctl.h>
++/* For glibc compatibility. An empty enum does not compile. */
++#if __UAPI_DEF_IF_NET_DEVICE_FLAGS_LOWER_UP_DORMANT_ECHO != 0 && \
++    __UAPI_DEF_IF_NET_DEVICE_FLAGS != 0
+ /**
+  * enum net_device_flags - &struct net_device flags
+  *
+@@ -68,6 +74,8 @@
+  * @IFF_ECHO: echo sent packets. Volatile.
+  */
+ enum net_device_flags {
++/* for compatibility with glibc net/if.h */
++#if __UAPI_DEF_IF_NET_DEVICE_FLAGS
+       IFF_UP                          = 1<<0,  /* sysfs */
+       IFF_BROADCAST                   = 1<<1,  /* volatile */
+       IFF_DEBUG                       = 1<<2,  /* sysfs */
+@@ -84,11 +92,17 @@ enum net_device_flags {
+       IFF_PORTSEL                     = 1<<13, /* sysfs */
+       IFF_AUTOMEDIA                   = 1<<14, /* sysfs */
+       IFF_DYNAMIC                     = 1<<15, /* sysfs */
++#endif /* __UAPI_DEF_IF_NET_DEVICE_FLAGS */
++#if __UAPI_DEF_IF_NET_DEVICE_FLAGS_LOWER_UP_DORMANT_ECHO
+       IFF_LOWER_UP                    = 1<<16, /* volatile */
+       IFF_DORMANT                     = 1<<17, /* volatile */
+       IFF_ECHO                        = 1<<18, /* volatile */
++#endif /* __UAPI_DEF_IF_NET_DEVICE_FLAGS_LOWER_UP_DORMANT_ECHO */
+ };
++#endif /* __UAPI_DEF_IF_NET_DEVICE_FLAGS_LOWER_UP_DORMANT_ECHO != 0 && __UAPI_DEF_IF_NET_DEVICE_FLAGS != 0 */
++/* for compatibility with glibc net/if.h */
++#if __UAPI_DEF_IF_NET_DEVICE_FLAGS
+ #define IFF_UP                                IFF_UP
+ #define IFF_BROADCAST                 IFF_BROADCAST
+ #define IFF_DEBUG                     IFF_DEBUG
+@@ -105,9 +119,13 @@ enum net_device_flags {
+ #define IFF_PORTSEL                   IFF_PORTSEL
+ #define IFF_AUTOMEDIA                 IFF_AUTOMEDIA
+ #define IFF_DYNAMIC                   IFF_DYNAMIC
++#endif /* __UAPI_DEF_IF_NET_DEVICE_FLAGS */
++
++#if __UAPI_DEF_IF_NET_DEVICE_FLAGS_LOWER_UP_DORMANT_ECHO
+ #define IFF_LOWER_UP                  IFF_LOWER_UP
+ #define IFF_DORMANT                   IFF_DORMANT
+ #define IFF_ECHO                      IFF_ECHO
++#endif /* __UAPI_DEF_IF_NET_DEVICE_FLAGS_LOWER_UP_DORMANT_ECHO */
+ #define IFF_VOLATILE  (IFF_LOOPBACK|IFF_POINTOPOINT|IFF_BROADCAST|IFF_ECHO|\
+               IFF_MASTER|IFF_SLAVE|IFF_RUNNING|IFF_LOWER_UP|IFF_DORMANT)
+@@ -166,6 +184,8 @@ enum {
+  *    being very small might be worth keeping for clean configuration.
+  */
++/* for compatibility with glibc net/if.h */
++#if __UAPI_DEF_IF_IFMAP
+ struct ifmap {
+       unsigned long mem_start;
+       unsigned long mem_end;
+@@ -175,6 +195,7 @@ struct ifmap {
+       unsigned char port;
+       /* 3 bytes spare */
+ };
++#endif /* __UAPI_DEF_IF_IFMAP */
+ struct if_settings {
+       unsigned int type;      /* Type of physical device or protocol */
+@@ -200,6 +221,8 @@ struct if_settings {
+  * remainder may be interface specific.
+  */
++/* for compatibility with glibc net/if.h */
++#if __UAPI_DEF_IF_IFREQ
+ struct ifreq {
+ #define IFHWADDRLEN   6
+       union
+@@ -223,6 +246,7 @@ struct ifreq {
+               struct  if_settings ifru_settings;
+       } ifr_ifru;
+ };
++#endif /* __UAPI_DEF_IF_IFREQ */
+ #define ifr_name      ifr_ifrn.ifrn_name      /* interface name       */
+ #define ifr_hwaddr    ifr_ifru.ifru_hwaddr    /* MAC address          */
+@@ -249,6 +273,8 @@ struct ifreq {
+  * must know all networks accessible).
+  */
++/* for compatibility with glibc net/if.h */
++#if __UAPI_DEF_IF_IFCONF
+ struct ifconf  {
+       int     ifc_len;                        /* size of buffer       */
+       union {
+@@ -256,6 +282,8 @@ struct ifconf  {
+               struct ifreq __user *ifcu_req;
+       } ifc_ifcu;
+ };
++#endif /* __UAPI_DEF_IF_IFCONF */
++
+ #define       ifc_buf ifc_ifcu.ifcu_buf               /* buffer address       */
+ #define       ifc_req ifc_ifcu.ifcu_req               /* array of structures  */
+--- a/include/uapi/linux/libc-compat.h
++++ b/include/uapi/linux/libc-compat.h
+@@ -51,6 +51,40 @@
+ /* We have included glibc headers... */
+ #if defined(__GLIBC__)
++/* Coordinate with glibc net/if.h header. */
++#if defined(_NET_IF_H)
++
++/* GLIBC headers included first so don't define anything
++ * that would already be defined. */
++
++#define __UAPI_DEF_IF_IFCONF 0
++#define __UAPI_DEF_IF_IFMAP 0
++#define __UAPI_DEF_IF_IFNAMSIZ 0
++#define __UAPI_DEF_IF_IFREQ 0
++/* Everything up to IFF_DYNAMIC, matches net/if.h until glibc 2.23 */
++#define __UAPI_DEF_IF_NET_DEVICE_FLAGS 0
++/* For the future if glibc adds IFF_LOWER_UP, IFF_DORMANT and IFF_ECHO */
++#ifndef __UAPI_DEF_IF_NET_DEVICE_FLAGS_LOWER_UP_DORMANT_ECHO
++#define __UAPI_DEF_IF_NET_DEVICE_FLAGS_LOWER_UP_DORMANT_ECHO 1
++#endif /* __UAPI_DEF_IF_NET_DEVICE_FLAGS_LOWER_UP_DORMANT_ECHO */
++
++#else /* _NET_IF_H */
++
++/* Linux headers included first, and we must define everything
++ * we need. The expectation is that glibc will check the
++ * __UAPI_DEF_* defines and adjust appropriately. */
++
++#define __UAPI_DEF_IF_IFCONF 1
++#define __UAPI_DEF_IF_IFMAP 1
++#define __UAPI_DEF_IF_IFNAMSIZ 1
++#define __UAPI_DEF_IF_IFREQ 1
++/* Everything up to IFF_DYNAMIC, matches net/if.h until glibc 2.23 */
++#define __UAPI_DEF_IF_NET_DEVICE_FLAGS 1
++/* For the future if glibc adds IFF_LOWER_UP, IFF_DORMANT and IFF_ECHO */
++#define __UAPI_DEF_IF_NET_DEVICE_FLAGS_LOWER_UP_DORMANT_ECHO 1
++
++#endif /* _NET_IF_H */
++
+ /* Coordinate with glibc netinet/in.h header. */
+ #if defined(_NETINET_IN_H)
+@@ -117,6 +151,16 @@
+  * that we need. */
+ #else /* !defined(__GLIBC__) */
++/* Definitions for if.h */
++#define __UAPI_DEF_IF_IFCONF 1
++#define __UAPI_DEF_IF_IFMAP 1
++#define __UAPI_DEF_IF_IFNAMSIZ 1
++#define __UAPI_DEF_IF_IFREQ 1
++/* Everything up to IFF_DYNAMIC, matches net/if.h until glibc 2.23 */
++#define __UAPI_DEF_IF_NET_DEVICE_FLAGS 1
++/* For the future if glibc adds IFF_LOWER_UP, IFF_DORMANT and IFF_ECHO */
++#define __UAPI_DEF_IF_NET_DEVICE_FLAGS_LOWER_UP_DORMANT_ECHO 1
++
+ /* Definitions for in.h */
+ #define __UAPI_DEF_IN_ADDR            1
+ #define __UAPI_DEF_IN_IPPROTO         1
diff --git a/queue-4.4/vlan-pull-on-__vlan_insert_tag-error-path-and-fix-csum-correction.patch b/queue-4.4/vlan-pull-on-__vlan_insert_tag-error-path-and-fix-csum-correction.patch
new file mode 100644 (file)
index 0000000..bae707d
--- /dev/null
@@ -0,0 +1,51 @@
+From foo@baz Mon May 16 11:21:32 PDT 2016
+From: Daniel Borkmann <daniel@iogearbox.net>
+Date: Sat, 16 Apr 2016 02:27:58 +0200
+Subject: vlan: pull on __vlan_insert_tag error path and fix csum correction
+
+From: Daniel Borkmann <daniel@iogearbox.net>
+
+[ Upstream commit 9241e2df4fbc648a92ea0752918e05c26255649e ]
+
+When __vlan_insert_tag() fails from skb_vlan_push() path due to the
+skb_cow_head(), we need to undo the __skb_push() in the error path
+as well that was done earlier to move skb->data pointer to mac header.
+
+Moreover, I noticed that when in the non-error path the __skb_pull()
+is done and the original offset to mac header was non-zero, we fixup
+from a wrong skb->data offset in the checksum complete processing.
+
+So the skb_postpush_rcsum() really needs to be done before __skb_pull()
+where skb->data still points to the mac header start and thus operates
+under the same conditions as in __vlan_insert_tag().
+
+Fixes: 93515d53b133 ("net: move vlan pop/push functions into common code")
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Reviewed-by: Jiri Pirko <jiri@mellanox.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/skbuff.c |    7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+--- a/net/core/skbuff.c
++++ b/net/core/skbuff.c
+@@ -4427,13 +4427,16 @@ int skb_vlan_push(struct sk_buff *skb, _
+               __skb_push(skb, offset);
+               err = __vlan_insert_tag(skb, skb->vlan_proto,
+                                       skb_vlan_tag_get(skb));
+-              if (err)
++              if (err) {
++                      __skb_pull(skb, offset);
+                       return err;
++              }
++
+               skb->protocol = skb->vlan_proto;
+               skb->mac_len += VLAN_HLEN;
+-              __skb_pull(skb, offset);
+               skb_postpush_rcsum(skb, skb->data + (2 * ETH_ALEN), VLAN_HLEN);
++              __skb_pull(skb, offset);
+       }
+       __vlan_hwaccel_put_tag(skb, vlan_proto, vlan_tci);
+       return 0;
diff --git a/queue-4.4/vsock-do-not-disconnect-socket-when-peer-has-shutdown-send-only.patch b/queue-4.4/vsock-do-not-disconnect-socket-when-peer-has-shutdown-send-only.patch
new file mode 100644 (file)
index 0000000..b57879b
--- /dev/null
@@ -0,0 +1,66 @@
+From foo@baz Mon May 16 11:21:32 PDT 2016
+From: Ian Campbell <ian.campbell@docker.com>
+Date: Wed, 4 May 2016 14:21:53 +0100
+Subject: VSOCK: do not disconnect socket when peer has shutdown SEND only
+
+From: Ian Campbell <ian.campbell@docker.com>
+
+[ Upstream commit dedc58e067d8c379a15a8a183c5db318201295bb ]
+
+The peer may be expecting a reply having sent a request and then done a
+shutdown(SHUT_WR), so tearing down the whole socket at this point seems
+wrong and breaks for me with a client which does a SHUT_WR.
+
+Looking at other socket family's stream_recvmsg callbacks doing a shutdown
+here does not seem to be the norm and removing it does not seem to have
+had any adverse effects that I can see.
+
+I'm using Stefan's RFC virtio transport patches, I'm unsure of the impact
+on the vmci transport.
+
+Signed-off-by: Ian Campbell <ian.campbell@docker.com>
+Cc: "David S. Miller" <davem@davemloft.net>
+Cc: Stefan Hajnoczi <stefanha@redhat.com>
+Cc: Claudio Imbrenda <imbrenda@linux.vnet.ibm.com>
+Cc: Andy King <acking@vmware.com>
+Cc: Dmitry Torokhov <dtor@vmware.com>
+Cc: Jorgen Hansen <jhansen@vmware.com>
+Cc: Adit Ranadive <aditr@vmware.com>
+Cc: netdev@vger.kernel.org
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/vmw_vsock/af_vsock.c |   21 +--------------------
+ 1 file changed, 1 insertion(+), 20 deletions(-)
+
+--- a/net/vmw_vsock/af_vsock.c
++++ b/net/vmw_vsock/af_vsock.c
+@@ -1794,27 +1794,8 @@ vsock_stream_recvmsg(struct socket *sock
+       else if (sk->sk_shutdown & RCV_SHUTDOWN)
+               err = 0;
+-      if (copied > 0) {
+-              /* We only do these additional bookkeeping/notification steps
+-               * if we actually copied something out of the queue pair
+-               * instead of just peeking ahead.
+-               */
+-
+-              if (!(flags & MSG_PEEK)) {
+-                      /* If the other side has shutdown for sending and there
+-                       * is nothing more to read, then modify the socket
+-                       * state.
+-                       */
+-                      if (vsk->peer_shutdown & SEND_SHUTDOWN) {
+-                              if (vsock_stream_has_data(vsk) <= 0) {
+-                                      sk->sk_state = SS_UNCONNECTED;
+-                                      sock_set_flag(sk, SOCK_DONE);
+-                                      sk->sk_state_change(sk);
+-                              }
+-                      }
+-              }
++      if (copied > 0)
+               err = copied;
+-      }
+ out_wait:
+       finish_wait(sk_sleep(sk), &wait);