]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
5.4-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Tue, 17 Dec 2019 19:27:02 +0000 (20:27 +0100)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Tue, 17 Dec 2019 19:27:02 +0000 (20:27 +0100)
added patches:
act_ct-support-asymmetric-conntrack.patch
cls_flower-fix-the-behavior-using-port-ranges-with-hw-offload.patch
fixed-updating-of-ethertype-in-function-skb_mpls_pop.patch
gre-refetch-erspan-header-from-skb-data-after-pskb_may_pull.patch
hsr-fix-a-null-pointer-dereference-in-hsr_dev_xmit.patch
inet-protect-against-too-small-mtu-values.patch
ionic-keep-users-rss-hash-across-lif-reset.patch
mqprio-fix-out-of-bounds-access-in-mqprio_dump.patch
net-bridge-deny-dev_set_mac_address-when-unregistering.patch
net-core-rename-indirect-block-ingress-cb-function.patch
net-dsa-fix-flow-dissection-on-tx-path.patch
net-ethernet-ti-cpsw-fix-extra-rx-interrupt.patch
net-fixed-updating-of-ethertype-in-skb_mpls_push.patch
net-ipv6-add-net-argument-to-ip6_dst_lookup_flow.patch
net-ipv6_stub-use-ip6_dst_lookup_flow-instead-of-ip6_dst_lookup.patch
net-mlx5e-ethtool-fix-analysis-of-speed-setting.patch
net-mlx5e-fix-freeing-flow-with-kfree-and-not-kvfree.patch
net-mlx5e-fix-sff-8472-eeprom-length.patch
net-mlx5e-fix-translation-of-link-mode-into-speed.patch
net-mlx5e-fix-txq-indices-to-be-sequential.patch
net-mlx5e-query-global-pause-state-before-setting-prio2buffer.patch
net-mscc-ocelot-unregister-the-ptp-clock-on-deinit.patch
net-sched-allow-indirect-blocks-to-bind-to-clsact-in-tc.patch
net-sched-fix-dump-qlen-for-sch_mq-sch_mqprio-with-nolock-subqueues.patch
net-sysfs-call-dev_hold-always-in-netdev_queue_add_kobject.patch
net-thunderx-start-phy-before-starting-autonegotiation.patch
net-tls-fix-return-values-to-avoid-enotsupp.patch
net_sched-validate-tca_kind-attribute-in-tc_chain_tmplt_add.patch
openvswitch-support-asymmetric-conntrack.patch
page_pool-do-not-release-pool-until-inflight-0.patch
r8169-add-missing-rx-enabling-for-wol-on-rtl8125.patch
tcp-fix-rejected-syncookies-due-to-stale-timestamps.patch
tcp-md5-fix-potential-overestimation-of-tcp-option-space.patch
tcp-protect-accesses-to-.ts_recent_stamp-with-read-write-_once.patch
tcp-tighten-acceptance-of-acks-not-matching-a-child-socket.patch
tipc-fix-ordering-of-tipc-module-init-and-exit-routine.patch
xdp-obtain-the-mem_id-mutex-before-trying-to-remove-an-entry.patch

38 files changed:
queue-5.4/act_ct-support-asymmetric-conntrack.patch [new file with mode: 0644]
queue-5.4/cls_flower-fix-the-behavior-using-port-ranges-with-hw-offload.patch [new file with mode: 0644]
queue-5.4/fixed-updating-of-ethertype-in-function-skb_mpls_pop.patch [new file with mode: 0644]
queue-5.4/gre-refetch-erspan-header-from-skb-data-after-pskb_may_pull.patch [new file with mode: 0644]
queue-5.4/hsr-fix-a-null-pointer-dereference-in-hsr_dev_xmit.patch [new file with mode: 0644]
queue-5.4/inet-protect-against-too-small-mtu-values.patch [new file with mode: 0644]
queue-5.4/ionic-keep-users-rss-hash-across-lif-reset.patch [new file with mode: 0644]
queue-5.4/mqprio-fix-out-of-bounds-access-in-mqprio_dump.patch [new file with mode: 0644]
queue-5.4/net-bridge-deny-dev_set_mac_address-when-unregistering.patch [new file with mode: 0644]
queue-5.4/net-core-rename-indirect-block-ingress-cb-function.patch [new file with mode: 0644]
queue-5.4/net-dsa-fix-flow-dissection-on-tx-path.patch [new file with mode: 0644]
queue-5.4/net-ethernet-ti-cpsw-fix-extra-rx-interrupt.patch [new file with mode: 0644]
queue-5.4/net-fixed-updating-of-ethertype-in-skb_mpls_push.patch [new file with mode: 0644]
queue-5.4/net-ipv6-add-net-argument-to-ip6_dst_lookup_flow.patch [new file with mode: 0644]
queue-5.4/net-ipv6_stub-use-ip6_dst_lookup_flow-instead-of-ip6_dst_lookup.patch [new file with mode: 0644]
queue-5.4/net-mlx5e-ethtool-fix-analysis-of-speed-setting.patch [new file with mode: 0644]
queue-5.4/net-mlx5e-fix-freeing-flow-with-kfree-and-not-kvfree.patch [new file with mode: 0644]
queue-5.4/net-mlx5e-fix-sff-8472-eeprom-length.patch [new file with mode: 0644]
queue-5.4/net-mlx5e-fix-translation-of-link-mode-into-speed.patch [new file with mode: 0644]
queue-5.4/net-mlx5e-fix-txq-indices-to-be-sequential.patch [new file with mode: 0644]
queue-5.4/net-mlx5e-query-global-pause-state-before-setting-prio2buffer.patch [new file with mode: 0644]
queue-5.4/net-mscc-ocelot-unregister-the-ptp-clock-on-deinit.patch [new file with mode: 0644]
queue-5.4/net-sched-allow-indirect-blocks-to-bind-to-clsact-in-tc.patch [new file with mode: 0644]
queue-5.4/net-sched-fix-dump-qlen-for-sch_mq-sch_mqprio-with-nolock-subqueues.patch [new file with mode: 0644]
queue-5.4/net-sysfs-call-dev_hold-always-in-netdev_queue_add_kobject.patch [new file with mode: 0644]
queue-5.4/net-thunderx-start-phy-before-starting-autonegotiation.patch [new file with mode: 0644]
queue-5.4/net-tls-fix-return-values-to-avoid-enotsupp.patch [new file with mode: 0644]
queue-5.4/net_sched-validate-tca_kind-attribute-in-tc_chain_tmplt_add.patch [new file with mode: 0644]
queue-5.4/openvswitch-support-asymmetric-conntrack.patch [new file with mode: 0644]
queue-5.4/page_pool-do-not-release-pool-until-inflight-0.patch [new file with mode: 0644]
queue-5.4/r8169-add-missing-rx-enabling-for-wol-on-rtl8125.patch [new file with mode: 0644]
queue-5.4/series [new file with mode: 0644]
queue-5.4/tcp-fix-rejected-syncookies-due-to-stale-timestamps.patch [new file with mode: 0644]
queue-5.4/tcp-md5-fix-potential-overestimation-of-tcp-option-space.patch [new file with mode: 0644]
queue-5.4/tcp-protect-accesses-to-.ts_recent_stamp-with-read-write-_once.patch [new file with mode: 0644]
queue-5.4/tcp-tighten-acceptance-of-acks-not-matching-a-child-socket.patch [new file with mode: 0644]
queue-5.4/tipc-fix-ordering-of-tipc-module-init-and-exit-routine.patch [new file with mode: 0644]
queue-5.4/xdp-obtain-the-mem_id-mutex-before-trying-to-remove-an-entry.patch [new file with mode: 0644]

diff --git a/queue-5.4/act_ct-support-asymmetric-conntrack.patch b/queue-5.4/act_ct-support-asymmetric-conntrack.patch
new file mode 100644 (file)
index 0000000..41dd164
--- /dev/null
@@ -0,0 +1,56 @@
+From foo@baz Tue 17 Dec 2019 08:13:44 PM CET
+From: Aaron Conole <aconole@redhat.com>
+Date: Tue, 3 Dec 2019 16:34:14 -0500
+Subject: act_ct: support asymmetric conntrack
+
+From: Aaron Conole <aconole@redhat.com>
+
+[ Upstream commit 95219afbb980f10934de9f23a3e199be69c5ed09 ]
+
+The act_ct TC module shares a common conntrack and NAT infrastructure
+exposed via netfilter.  It's possible that a packet needs both SNAT and
+DNAT manipulation, due to e.g. tuple collision.  Netfilter can support
+this because it runs through the NAT table twice - once on ingress and
+again after egress.  The act_ct action doesn't have such capability.
+
+Like netfilter hook infrastructure, we should run through NAT twice to
+keep the symmetry.
+
+Fixes: b57dc7c13ea9 ("net/sched: Introduce action ct")
+Signed-off-by: Aaron Conole <aconole@redhat.com>
+Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sched/act_ct.c |   13 ++++++++++++-
+ 1 file changed, 12 insertions(+), 1 deletion(-)
+
+--- a/net/sched/act_ct.c
++++ b/net/sched/act_ct.c
+@@ -329,6 +329,7 @@ static int tcf_ct_act_nat(struct sk_buff
+                         bool commit)
+ {
+ #if IS_ENABLED(CONFIG_NF_NAT)
++      int err;
+       enum nf_nat_manip_type maniptype;
+       if (!(ct_action & TCA_CT_ACT_NAT))
+@@ -359,7 +360,17 @@ static int tcf_ct_act_nat(struct sk_buff
+               return NF_ACCEPT;
+       }
+-      return ct_nat_execute(skb, ct, ctinfo, range, maniptype);
++      err = ct_nat_execute(skb, ct, ctinfo, range, maniptype);
++      if (err == NF_ACCEPT &&
++          ct->status & IPS_SRC_NAT && ct->status & IPS_DST_NAT) {
++              if (maniptype == NF_NAT_MANIP_SRC)
++                      maniptype = NF_NAT_MANIP_DST;
++              else
++                      maniptype = NF_NAT_MANIP_SRC;
++
++              err = ct_nat_execute(skb, ct, ctinfo, range, maniptype);
++      }
++      return err;
+ #else
+       return NF_ACCEPT;
+ #endif
diff --git a/queue-5.4/cls_flower-fix-the-behavior-using-port-ranges-with-hw-offload.patch b/queue-5.4/cls_flower-fix-the-behavior-using-port-ranges-with-hw-offload.patch
new file mode 100644 (file)
index 0000000..082661e
--- /dev/null
@@ -0,0 +1,305 @@
+From foo@baz Tue 17 Dec 2019 08:13:44 PM CET
+From: Yoshiki Komachi <komachi.yoshiki@gmail.com>
+Date: Tue, 3 Dec 2019 19:40:12 +0900
+Subject: cls_flower: Fix the behavior using port ranges with hw-offload
+
+From: Yoshiki Komachi <komachi.yoshiki@gmail.com>
+
+[ Upstream commit 8ffb055beae58574d3e77b4bf9d4d15eace1ca27 ]
+
+The recent commit 5c72299fba9d ("net: sched: cls_flower: Classify
+packets using port ranges") had added filtering based on port ranges
+to tc flower. However the commit missed necessary changes in hw-offload
+code, so the feature gave rise to generating incorrect offloaded flow
+keys in NIC.
+
+One more detailed example is below:
+
+$ tc qdisc add dev eth0 ingress
+$ tc filter add dev eth0 ingress protocol ip flower ip_proto tcp \
+  dst_port 100-200 action drop
+
+With the setup above, an exact match filter with dst_port == 0 will be
+installed in NIC by hw-offload. IOW, the NIC will have a rule which is
+equivalent to the following one.
+
+$ tc qdisc add dev eth0 ingress
+$ tc filter add dev eth0 ingress protocol ip flower ip_proto tcp \
+  dst_port 0 action drop
+
+The behavior was caused by the flow dissector which extracts packet
+data into the flow key in the tc flower. More specifically, regardless
+of exact match or specified port ranges, fl_init_dissector() set the
+FLOW_DISSECTOR_KEY_PORTS flag in struct flow_dissector to extract port
+numbers from skb in skb_flow_dissect() called by fl_classify(). Note
+that device drivers received the same struct flow_dissector object as
+used in skb_flow_dissect(). Thus, offloaded drivers could not identify
+which of these is used because the FLOW_DISSECTOR_KEY_PORTS flag was
+set to struct flow_dissector in either case.
+
+This patch adds the new FLOW_DISSECTOR_KEY_PORTS_RANGE flag and the new
+tp_range field in struct fl_flow_key to recognize which filters are applied
+to offloaded drivers. At this point, when filters based on port ranges
+passed to drivers, drivers return the EOPNOTSUPP error because they do
+not support the feature (the newly created FLOW_DISSECTOR_KEY_PORTS_RANGE
+flag).
+
+Fixes: 5c72299fba9d ("net: sched: cls_flower: Classify packets using port ranges")
+Signed-off-by: Yoshiki Komachi <komachi.yoshiki@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/flow_dissector.h |    1 
+ net/core/flow_dissector.c    |   37 ++++++++++---
+ net/sched/cls_flower.c       |  118 ++++++++++++++++++++++++-------------------
+ 3 files changed, 95 insertions(+), 61 deletions(-)
+
+--- a/include/net/flow_dissector.h
++++ b/include/net/flow_dissector.h
+@@ -229,6 +229,7 @@ enum flow_dissector_key_id {
+       FLOW_DISSECTOR_KEY_IPV4_ADDRS, /* struct flow_dissector_key_ipv4_addrs */
+       FLOW_DISSECTOR_KEY_IPV6_ADDRS, /* struct flow_dissector_key_ipv6_addrs */
+       FLOW_DISSECTOR_KEY_PORTS, /* struct flow_dissector_key_ports */
++      FLOW_DISSECTOR_KEY_PORTS_RANGE, /* struct flow_dissector_key_ports */
+       FLOW_DISSECTOR_KEY_ICMP, /* struct flow_dissector_key_icmp */
+       FLOW_DISSECTOR_KEY_ETH_ADDRS, /* struct flow_dissector_key_eth_addrs */
+       FLOW_DISSECTOR_KEY_TIPC, /* struct flow_dissector_key_tipc */
+--- a/net/core/flow_dissector.c
++++ b/net/core/flow_dissector.c
+@@ -684,6 +684,31 @@ __skb_flow_dissect_tcp(const struct sk_b
+ }
+ static void
++__skb_flow_dissect_ports(const struct sk_buff *skb,
++                       struct flow_dissector *flow_dissector,
++                       void *target_container, void *data, int nhoff,
++                       u8 ip_proto, int hlen)
++{
++      enum flow_dissector_key_id dissector_ports = FLOW_DISSECTOR_KEY_MAX;
++      struct flow_dissector_key_ports *key_ports;
++
++      if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_PORTS))
++              dissector_ports = FLOW_DISSECTOR_KEY_PORTS;
++      else if (dissector_uses_key(flow_dissector,
++                                  FLOW_DISSECTOR_KEY_PORTS_RANGE))
++              dissector_ports = FLOW_DISSECTOR_KEY_PORTS_RANGE;
++
++      if (dissector_ports == FLOW_DISSECTOR_KEY_MAX)
++              return;
++
++      key_ports = skb_flow_dissector_target(flow_dissector,
++                                            dissector_ports,
++                                            target_container);
++      key_ports->ports = __skb_flow_get_ports(skb, nhoff, ip_proto,
++                                              data, hlen);
++}
++
++static void
+ __skb_flow_dissect_ipv4(const struct sk_buff *skb,
+                       struct flow_dissector *flow_dissector,
+                       void *target_container, void *data, const struct iphdr *iph)
+@@ -852,7 +877,6 @@ bool __skb_flow_dissect(const struct net
+       struct flow_dissector_key_control *key_control;
+       struct flow_dissector_key_basic *key_basic;
+       struct flow_dissector_key_addrs *key_addrs;
+-      struct flow_dissector_key_ports *key_ports;
+       struct flow_dissector_key_icmp *key_icmp;
+       struct flow_dissector_key_tags *key_tags;
+       struct flow_dissector_key_vlan *key_vlan;
+@@ -1300,14 +1324,9 @@ ip_proto_again:
+               break;
+       }
+-      if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_PORTS) &&
+-          !(key_control->flags & FLOW_DIS_IS_FRAGMENT)) {
+-              key_ports = skb_flow_dissector_target(flow_dissector,
+-                                                    FLOW_DISSECTOR_KEY_PORTS,
+-                                                    target_container);
+-              key_ports->ports = __skb_flow_get_ports(skb, nhoff, ip_proto,
+-                                                      data, hlen);
+-      }
++      if (!(key_control->flags & FLOW_DIS_IS_FRAGMENT))
++              __skb_flow_dissect_ports(skb, flow_dissector, target_container,
++                                       data, nhoff, ip_proto, hlen);
+       if (dissector_uses_key(flow_dissector,
+                              FLOW_DISSECTOR_KEY_ICMP)) {
+--- a/net/sched/cls_flower.c
++++ b/net/sched/cls_flower.c
+@@ -54,8 +54,13 @@ struct fl_flow_key {
+       struct flow_dissector_key_ip ip;
+       struct flow_dissector_key_ip enc_ip;
+       struct flow_dissector_key_enc_opts enc_opts;
+-      struct flow_dissector_key_ports tp_min;
+-      struct flow_dissector_key_ports tp_max;
++      union {
++              struct flow_dissector_key_ports tp;
++              struct {
++                      struct flow_dissector_key_ports tp_min;
++                      struct flow_dissector_key_ports tp_max;
++              };
++      } tp_range;
+       struct flow_dissector_key_ct ct;
+ } __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */
+@@ -198,19 +203,19 @@ static bool fl_range_port_dst_cmp(struct
+ {
+       __be16 min_mask, max_mask, min_val, max_val;
+-      min_mask = htons(filter->mask->key.tp_min.dst);
+-      max_mask = htons(filter->mask->key.tp_max.dst);
+-      min_val = htons(filter->key.tp_min.dst);
+-      max_val = htons(filter->key.tp_max.dst);
++      min_mask = htons(filter->mask->key.tp_range.tp_min.dst);
++      max_mask = htons(filter->mask->key.tp_range.tp_max.dst);
++      min_val = htons(filter->key.tp_range.tp_min.dst);
++      max_val = htons(filter->key.tp_range.tp_max.dst);
+       if (min_mask && max_mask) {
+-              if (htons(key->tp.dst) < min_val ||
+-                  htons(key->tp.dst) > max_val)
++              if (htons(key->tp_range.tp.dst) < min_val ||
++                  htons(key->tp_range.tp.dst) > max_val)
+                       return false;
+               /* skb does not have min and max values */
+-              mkey->tp_min.dst = filter->mkey.tp_min.dst;
+-              mkey->tp_max.dst = filter->mkey.tp_max.dst;
++              mkey->tp_range.tp_min.dst = filter->mkey.tp_range.tp_min.dst;
++              mkey->tp_range.tp_max.dst = filter->mkey.tp_range.tp_max.dst;
+       }
+       return true;
+ }
+@@ -221,19 +226,19 @@ static bool fl_range_port_src_cmp(struct
+ {
+       __be16 min_mask, max_mask, min_val, max_val;
+-      min_mask = htons(filter->mask->key.tp_min.src);
+-      max_mask = htons(filter->mask->key.tp_max.src);
+-      min_val = htons(filter->key.tp_min.src);
+-      max_val = htons(filter->key.tp_max.src);
++      min_mask = htons(filter->mask->key.tp_range.tp_min.src);
++      max_mask = htons(filter->mask->key.tp_range.tp_max.src);
++      min_val = htons(filter->key.tp_range.tp_min.src);
++      max_val = htons(filter->key.tp_range.tp_max.src);
+       if (min_mask && max_mask) {
+-              if (htons(key->tp.src) < min_val ||
+-                  htons(key->tp.src) > max_val)
++              if (htons(key->tp_range.tp.src) < min_val ||
++                  htons(key->tp_range.tp.src) > max_val)
+                       return false;
+               /* skb does not have min and max values */
+-              mkey->tp_min.src = filter->mkey.tp_min.src;
+-              mkey->tp_max.src = filter->mkey.tp_max.src;
++              mkey->tp_range.tp_min.src = filter->mkey.tp_range.tp_min.src;
++              mkey->tp_range.tp_max.src = filter->mkey.tp_range.tp_max.src;
+       }
+       return true;
+ }
+@@ -715,23 +720,25 @@ static void fl_set_key_val(struct nlattr
+ static int fl_set_key_port_range(struct nlattr **tb, struct fl_flow_key *key,
+                                struct fl_flow_key *mask)
+ {
+-      fl_set_key_val(tb, &key->tp_min.dst,
+-                     TCA_FLOWER_KEY_PORT_DST_MIN, &mask->tp_min.dst,
+-                     TCA_FLOWER_UNSPEC, sizeof(key->tp_min.dst));
+-      fl_set_key_val(tb, &key->tp_max.dst,
+-                     TCA_FLOWER_KEY_PORT_DST_MAX, &mask->tp_max.dst,
+-                     TCA_FLOWER_UNSPEC, sizeof(key->tp_max.dst));
+-      fl_set_key_val(tb, &key->tp_min.src,
+-                     TCA_FLOWER_KEY_PORT_SRC_MIN, &mask->tp_min.src,
+-                     TCA_FLOWER_UNSPEC, sizeof(key->tp_min.src));
+-      fl_set_key_val(tb, &key->tp_max.src,
+-                     TCA_FLOWER_KEY_PORT_SRC_MAX, &mask->tp_max.src,
+-                     TCA_FLOWER_UNSPEC, sizeof(key->tp_max.src));
+-
+-      if ((mask->tp_min.dst && mask->tp_max.dst &&
+-           htons(key->tp_max.dst) <= htons(key->tp_min.dst)) ||
+-           (mask->tp_min.src && mask->tp_max.src &&
+-            htons(key->tp_max.src) <= htons(key->tp_min.src)))
++      fl_set_key_val(tb, &key->tp_range.tp_min.dst,
++                     TCA_FLOWER_KEY_PORT_DST_MIN, &mask->tp_range.tp_min.dst,
++                     TCA_FLOWER_UNSPEC, sizeof(key->tp_range.tp_min.dst));
++      fl_set_key_val(tb, &key->tp_range.tp_max.dst,
++                     TCA_FLOWER_KEY_PORT_DST_MAX, &mask->tp_range.tp_max.dst,
++                     TCA_FLOWER_UNSPEC, sizeof(key->tp_range.tp_max.dst));
++      fl_set_key_val(tb, &key->tp_range.tp_min.src,
++                     TCA_FLOWER_KEY_PORT_SRC_MIN, &mask->tp_range.tp_min.src,
++                     TCA_FLOWER_UNSPEC, sizeof(key->tp_range.tp_min.src));
++      fl_set_key_val(tb, &key->tp_range.tp_max.src,
++                     TCA_FLOWER_KEY_PORT_SRC_MAX, &mask->tp_range.tp_max.src,
++                     TCA_FLOWER_UNSPEC, sizeof(key->tp_range.tp_max.src));
++
++      if ((mask->tp_range.tp_min.dst && mask->tp_range.tp_max.dst &&
++           htons(key->tp_range.tp_max.dst) <=
++               htons(key->tp_range.tp_min.dst)) ||
++          (mask->tp_range.tp_min.src && mask->tp_range.tp_max.src &&
++           htons(key->tp_range.tp_max.src) <=
++               htons(key->tp_range.tp_min.src)))
+               return -EINVAL;
+       return 0;
+@@ -1320,9 +1327,10 @@ static void fl_init_dissector(struct flo
+                            FLOW_DISSECTOR_KEY_IPV4_ADDRS, ipv4);
+       FL_KEY_SET_IF_MASKED(mask, keys, cnt,
+                            FLOW_DISSECTOR_KEY_IPV6_ADDRS, ipv6);
+-      if (FL_KEY_IS_MASKED(mask, tp) ||
+-          FL_KEY_IS_MASKED(mask, tp_min) || FL_KEY_IS_MASKED(mask, tp_max))
+-              FL_KEY_SET(keys, cnt, FLOW_DISSECTOR_KEY_PORTS, tp);
++      FL_KEY_SET_IF_MASKED(mask, keys, cnt,
++                           FLOW_DISSECTOR_KEY_PORTS, tp);
++      FL_KEY_SET_IF_MASKED(mask, keys, cnt,
++                           FLOW_DISSECTOR_KEY_PORTS_RANGE, tp_range);
+       FL_KEY_SET_IF_MASKED(mask, keys, cnt,
+                            FLOW_DISSECTOR_KEY_IP, ip);
+       FL_KEY_SET_IF_MASKED(mask, keys, cnt,
+@@ -1371,8 +1379,10 @@ static struct fl_flow_mask *fl_create_ne
+       fl_mask_copy(newmask, mask);
+-      if ((newmask->key.tp_min.dst && newmask->key.tp_max.dst) ||
+-          (newmask->key.tp_min.src && newmask->key.tp_max.src))
++      if ((newmask->key.tp_range.tp_min.dst &&
++           newmask->key.tp_range.tp_max.dst) ||
++          (newmask->key.tp_range.tp_min.src &&
++           newmask->key.tp_range.tp_max.src))
+               newmask->flags |= TCA_FLOWER_MASK_FLAGS_RANGE;
+       err = fl_init_mask_hashtable(newmask);
+@@ -1970,18 +1980,22 @@ static int fl_dump_key_val(struct sk_buf
+ static int fl_dump_key_port_range(struct sk_buff *skb, struct fl_flow_key *key,
+                                 struct fl_flow_key *mask)
+ {
+-      if (fl_dump_key_val(skb, &key->tp_min.dst, TCA_FLOWER_KEY_PORT_DST_MIN,
+-                          &mask->tp_min.dst, TCA_FLOWER_UNSPEC,
+-                          sizeof(key->tp_min.dst)) ||
+-          fl_dump_key_val(skb, &key->tp_max.dst, TCA_FLOWER_KEY_PORT_DST_MAX,
+-                          &mask->tp_max.dst, TCA_FLOWER_UNSPEC,
+-                          sizeof(key->tp_max.dst)) ||
+-          fl_dump_key_val(skb, &key->tp_min.src, TCA_FLOWER_KEY_PORT_SRC_MIN,
+-                          &mask->tp_min.src, TCA_FLOWER_UNSPEC,
+-                          sizeof(key->tp_min.src)) ||
+-          fl_dump_key_val(skb, &key->tp_max.src, TCA_FLOWER_KEY_PORT_SRC_MAX,
+-                          &mask->tp_max.src, TCA_FLOWER_UNSPEC,
+-                          sizeof(key->tp_max.src)))
++      if (fl_dump_key_val(skb, &key->tp_range.tp_min.dst,
++                          TCA_FLOWER_KEY_PORT_DST_MIN,
++                          &mask->tp_range.tp_min.dst, TCA_FLOWER_UNSPEC,
++                          sizeof(key->tp_range.tp_min.dst)) ||
++          fl_dump_key_val(skb, &key->tp_range.tp_max.dst,
++                          TCA_FLOWER_KEY_PORT_DST_MAX,
++                          &mask->tp_range.tp_max.dst, TCA_FLOWER_UNSPEC,
++                          sizeof(key->tp_range.tp_max.dst)) ||
++          fl_dump_key_val(skb, &key->tp_range.tp_min.src,
++                          TCA_FLOWER_KEY_PORT_SRC_MIN,
++                          &mask->tp_range.tp_min.src, TCA_FLOWER_UNSPEC,
++                          sizeof(key->tp_range.tp_min.src)) ||
++          fl_dump_key_val(skb, &key->tp_range.tp_max.src,
++                          TCA_FLOWER_KEY_PORT_SRC_MAX,
++                          &mask->tp_range.tp_max.src, TCA_FLOWER_UNSPEC,
++                          sizeof(key->tp_range.tp_max.src)))
+               return -1;
+       return 0;
diff --git a/queue-5.4/fixed-updating-of-ethertype-in-function-skb_mpls_pop.patch b/queue-5.4/fixed-updating-of-ethertype-in-function-skb_mpls_pop.patch
new file mode 100644 (file)
index 0000000..18a70aa
--- /dev/null
@@ -0,0 +1,105 @@
+From foo@baz Tue 17 Dec 2019 08:13:44 PM CET
+From: Martin Varghese <martin.varghese@nokia.com>
+Date: Mon, 2 Dec 2019 10:49:51 +0530
+Subject: Fixed updating of ethertype in function skb_mpls_pop
+
+From: Martin Varghese <martin.varghese@nokia.com>
+
+[ Upstream commit 040b5cfbcefa263ccf2c118c4938308606bb7ed8 ]
+
+The skb_mpls_pop was not updating ethertype of an ethernet packet if the
+packet was originally received from a non ARPHRD_ETHER device.
+
+In the below OVS data path flow, since the device corresponding to port 7
+is an l3 device (ARPHRD_NONE) the skb_mpls_pop function does not update
+the ethertype of the packet even though the previous push_eth action had
+added an ethernet header to the packet.
+
+recirc_id(0),in_port(7),eth_type(0x8847),
+mpls(label=12/0xfffff,tc=0/0,ttl=0/0x0,bos=1/1),
+actions:push_eth(src=00:00:00:00:00:00,dst=00:00:00:00:00:00),
+pop_mpls(eth_type=0x800),4
+
+Fixes: ed246cee09b9 ("net: core: move pop MPLS functionality from OvS to core helper")
+Signed-off-by: Martin Varghese <martin.varghese@nokia.com>
+Acked-by: Pravin B Shelar <pshelar@ovn.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/skbuff.h    |    3 ++-
+ net/core/skbuff.c         |    6 ++++--
+ net/openvswitch/actions.c |    3 ++-
+ net/sched/act_mpls.c      |    4 +++-
+ 4 files changed, 11 insertions(+), 5 deletions(-)
+
+--- a/include/linux/skbuff.h
++++ b/include/linux/skbuff.h
+@@ -3528,7 +3528,8 @@ int skb_vlan_pop(struct sk_buff *skb);
+ int skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci);
+ int skb_mpls_push(struct sk_buff *skb, __be32 mpls_lse, __be16 mpls_proto,
+                 int mac_len);
+-int skb_mpls_pop(struct sk_buff *skb, __be16 next_proto, int mac_len);
++int skb_mpls_pop(struct sk_buff *skb, __be16 next_proto, int mac_len,
++               bool ethernet);
+ int skb_mpls_update_lse(struct sk_buff *skb, __be32 mpls_lse);
+ int skb_mpls_dec_ttl(struct sk_buff *skb);
+ struct sk_buff *pskb_extract(struct sk_buff *skb, int off, int to_copy,
+--- a/net/core/skbuff.c
++++ b/net/core/skbuff.c
+@@ -5529,12 +5529,14 @@ EXPORT_SYMBOL_GPL(skb_mpls_push);
+  * @skb: buffer
+  * @next_proto: ethertype of header after popped MPLS header
+  * @mac_len: length of the MAC header
++ * @ethernet: flag to indicate if ethernet header is present in packet
+  *
+  * Expects skb->data at mac header.
+  *
+  * Returns 0 on success, -errno otherwise.
+  */
+-int skb_mpls_pop(struct sk_buff *skb, __be16 next_proto, int mac_len)
++int skb_mpls_pop(struct sk_buff *skb, __be16 next_proto, int mac_len,
++               bool ethernet)
+ {
+       int err;
+@@ -5553,7 +5555,7 @@ int skb_mpls_pop(struct sk_buff *skb, __
+       skb_reset_mac_header(skb);
+       skb_set_network_header(skb, mac_len);
+-      if (skb->dev && skb->dev->type == ARPHRD_ETHER) {
++      if (ethernet) {
+               struct ethhdr *hdr;
+               /* use mpls_hdr() to get ethertype to account for VLANs. */
+--- a/net/openvswitch/actions.c
++++ b/net/openvswitch/actions.c
+@@ -179,7 +179,8 @@ static int pop_mpls(struct sk_buff *skb,
+ {
+       int err;
+-      err = skb_mpls_pop(skb, ethertype, skb->mac_len);
++      err = skb_mpls_pop(skb, ethertype, skb->mac_len,
++                         ovs_key_mac_proto(key) == MAC_PROTO_ETHERNET);
+       if (err)
+               return err;
+--- a/net/sched/act_mpls.c
++++ b/net/sched/act_mpls.c
+@@ -1,6 +1,7 @@
+ // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+ /* Copyright (C) 2019 Netronome Systems, Inc. */
++#include <linux/if_arp.h>
+ #include <linux/init.h>
+ #include <linux/kernel.h>
+ #include <linux/module.h>
+@@ -76,7 +77,8 @@ static int tcf_mpls_act(struct sk_buff *
+       switch (p->tcfm_action) {
+       case TCA_MPLS_ACT_POP:
+-              if (skb_mpls_pop(skb, p->tcfm_proto, mac_len))
++              if (skb_mpls_pop(skb, p->tcfm_proto, mac_len,
++                               skb->dev && skb->dev->type == ARPHRD_ETHER))
+                       goto drop;
+               break;
+       case TCA_MPLS_ACT_PUSH:
diff --git a/queue-5.4/gre-refetch-erspan-header-from-skb-data-after-pskb_may_pull.patch b/queue-5.4/gre-refetch-erspan-header-from-skb-data-after-pskb_may_pull.patch
new file mode 100644 (file)
index 0000000..9ba5269
--- /dev/null
@@ -0,0 +1,41 @@
+From foo@baz Tue 17 Dec 2019 08:13:44 PM CET
+From: Cong Wang <xiyou.wangcong@gmail.com>
+Date: Thu, 5 Dec 2019 19:39:02 -0800
+Subject: gre: refetch erspan header from skb->data after pskb_may_pull()
+
+From: Cong Wang <xiyou.wangcong@gmail.com>
+
+[ Upstream commit 0e4940928c26527ce8f97237fef4c8a91cd34207 ]
+
+After pskb_may_pull() we should always refetch the header
+pointers from the skb->data in case it got reallocated.
+
+In gre_parse_header(), the erspan header is still fetched
+from the 'options' pointer which is fetched before
+pskb_may_pull().
+
+Found this during code review of a KMSAN bug report.
+
+Fixes: cb73ee40b1b3 ("net: ip_gre: use erspan key field for tunnel lookup")
+Cc: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
+Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
+Acked-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
+Acked-by: William Tu <u9012063@gmail.com>
+Reviewed-by: Simon Horman <simon.horman@netronome.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/gre_demux.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/ipv4/gre_demux.c
++++ b/net/ipv4/gre_demux.c
+@@ -127,7 +127,7 @@ int gre_parse_header(struct sk_buff *skb
+               if (!pskb_may_pull(skb, nhs + hdr_len + sizeof(*ershdr)))
+                       return -EINVAL;
+-              ershdr = (struct erspan_base_hdr *)options;
++              ershdr = (struct erspan_base_hdr *)(skb->data + nhs + hdr_len);
+               tpi->key = cpu_to_be32(get_session_id(ershdr));
+       }
diff --git a/queue-5.4/hsr-fix-a-null-pointer-dereference-in-hsr_dev_xmit.patch b/queue-5.4/hsr-fix-a-null-pointer-dereference-in-hsr_dev_xmit.patch
new file mode 100644 (file)
index 0000000..6a0d0bd
--- /dev/null
@@ -0,0 +1,97 @@
+From foo@baz Tue 17 Dec 2019 08:13:44 PM CET
+From: Taehee Yoo <ap420073@gmail.com>
+Date: Thu, 5 Dec 2019 07:23:39 +0000
+Subject: hsr: fix a NULL pointer dereference in hsr_dev_xmit()
+
+From: Taehee Yoo <ap420073@gmail.com>
+
+[ Upstream commit df95467b6d2bfce49667ee4b71c67249b01957f7 ]
+
+hsr_dev_xmit() calls hsr_port_get_hsr() to find master node and that would
+return NULL if master node is not existing in the list.
+But hsr_dev_xmit() doesn't check return pointer so a NULL dereference
+could occur.
+
+Test commands:
+    ip netns add nst
+    ip link add veth0 type veth peer name veth1
+    ip link add veth2 type veth peer name veth3
+    ip link set veth1 netns nst
+    ip link set veth3 netns nst
+    ip link set veth0 up
+    ip link set veth2 up
+    ip link add hsr0 type hsr slave1 veth0 slave2 veth2
+    ip a a 192.168.100.1/24 dev hsr0
+    ip link set hsr0 up
+    ip netns exec nst ip link set veth1 up
+    ip netns exec nst ip link set veth3 up
+    ip netns exec nst ip link add hsr1 type hsr slave1 veth1 slave2 veth3
+    ip netns exec nst ip a a 192.168.100.2/24 dev hsr1
+    ip netns exec nst ip link set hsr1 up
+    hping3 192.168.100.2 -2 --flood &
+    modprobe -rv hsr
+
+Splat looks like:
+[  217.351122][ T1635] kasan: CONFIG_KASAN_INLINE enabled
+[  217.352969][ T1635] kasan: GPF could be caused by NULL-ptr deref or user memory access
+[  217.354297][ T1635] general protection fault: 0000 [#1] SMP DEBUG_PAGEALLOC KASAN PTI
+[  217.355507][ T1635] CPU: 1 PID: 1635 Comm: hping3 Not tainted 5.4.0+ #192
+[  217.356472][ T1635] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006
+[  217.357804][ T1635] RIP: 0010:hsr_dev_xmit+0x34/0x90 [hsr]
+[  217.373010][ T1635] Code: 48 8d be 00 0c 00 00 be 04 00 00 00 48 83 ec 08 e8 21 be ff ff 48 8d 78 10 48 ba 00 b
+[  217.376919][ T1635] RSP: 0018:ffff8880cd8af058 EFLAGS: 00010202
+[  217.377571][ T1635] RAX: 0000000000000000 RBX: ffff8880acde6840 RCX: 0000000000000002
+[  217.379465][ T1635] RDX: dffffc0000000000 RSI: 0000000000000004 RDI: 0000000000000010
+[  217.380274][ T1635] RBP: ffff8880acde6840 R08: ffffed101b440d5d R09: 0000000000000001
+[  217.381078][ T1635] R10: 0000000000000001 R11: ffffed101b440d5c R12: ffff8880bffcc000
+[  217.382023][ T1635] R13: ffff8880bffcc088 R14: 0000000000000000 R15: ffff8880ca675c00
+[  217.383094][ T1635] FS:  00007f060d9d1740(0000) GS:ffff8880da000000(0000) knlGS:0000000000000000
+[  217.384289][ T1635] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+[  217.385009][ T1635] CR2: 00007faf15381dd0 CR3: 00000000d523c001 CR4: 00000000000606e0
+[  217.385940][ T1635] Call Trace:
+[  217.386544][ T1635]  dev_hard_start_xmit+0x160/0x740
+[  217.387114][ T1635]  __dev_queue_xmit+0x1961/0x2e10
+[  217.388118][ T1635]  ? check_object+0xaf/0x260
+[  217.391466][ T1635]  ? __alloc_skb+0xb9/0x500
+[  217.392017][ T1635]  ? init_object+0x6b/0x80
+[  217.392629][ T1635]  ? netdev_core_pick_tx+0x2e0/0x2e0
+[  217.393175][ T1635]  ? __alloc_skb+0xb9/0x500
+[  217.393727][ T1635]  ? rcu_read_lock_sched_held+0x90/0xc0
+[  217.394331][ T1635]  ? rcu_read_lock_bh_held+0xa0/0xa0
+[  217.395013][ T1635]  ? kasan_unpoison_shadow+0x30/0x40
+[  217.395668][ T1635]  ? __kasan_kmalloc.constprop.4+0xa0/0xd0
+[  217.396280][ T1635]  ? __kmalloc_node_track_caller+0x3a8/0x3f0
+[  217.399007][ T1635]  ? __kasan_kmalloc.constprop.4+0xa0/0xd0
+[  217.400093][ T1635]  ? __kmalloc_reserve.isra.46+0x2e/0xb0
+[  217.401118][ T1635]  ? memset+0x1f/0x40
+[  217.402529][ T1635]  ? __alloc_skb+0x317/0x500
+[  217.404915][ T1635]  ? arp_xmit+0xca/0x2c0
+[ ... ]
+
+Fixes: 311633b60406 ("hsr: switch ->dellink() to ->ndo_uninit()")
+Acked-by: Cong Wang <xiyou.wangcong@gmail.com>
+Signed-off-by: Taehee Yoo <ap420073@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/hsr/hsr_device.c |    9 +++++++--
+ 1 file changed, 7 insertions(+), 2 deletions(-)
+
+--- a/net/hsr/hsr_device.c
++++ b/net/hsr/hsr_device.c
+@@ -227,8 +227,13 @@ static int hsr_dev_xmit(struct sk_buff *
+       struct hsr_port *master;
+       master = hsr_port_get_hsr(hsr, HSR_PT_MASTER);
+-      skb->dev = master->dev;
+-      hsr_forward_skb(skb, master);
++      if (master) {
++              skb->dev = master->dev;
++              hsr_forward_skb(skb, master);
++      } else {
++              atomic_long_inc(&dev->tx_dropped);
++              dev_kfree_skb_any(skb);
++      }
+       return NETDEV_TX_OK;
+ }
diff --git a/queue-5.4/inet-protect-against-too-small-mtu-values.patch b/queue-5.4/inet-protect-against-too-small-mtu-values.patch
new file mode 100644 (file)
index 0000000..9c478e1
--- /dev/null
@@ -0,0 +1,177 @@
+From foo@baz Tue 17 Dec 2019 08:13:44 PM CET
+From: Eric Dumazet <edumazet@google.com>
+Date: Thu, 5 Dec 2019 20:43:46 -0800
+Subject: inet: protect against too small mtu values.
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 501a90c945103e8627406763dac418f20f3837b2 ]
+
+syzbot was once again able to crash a host by setting a very small mtu
+on loopback device.
+
+Let's make inetdev_valid_mtu() available in include/net/ip.h,
+and use it in ip_setup_cork(), so that we protect both ip_append_page()
+and __ip_append_data()
+
+Also add a READ_ONCE() when the device mtu is read.
+
+Pairs this lockless read with one WRITE_ONCE() in __dev_set_mtu(),
+even if other code paths might write over this field.
+
+Add a big comment in include/linux/netdevice.h about dev->mtu
+needing READ_ONCE()/WRITE_ONCE() annotations.
+
+Hopefully we will add the missing ones in followup patches.
+
+[1]
+
+refcount_t: saturated; leaking memory.
+WARNING: CPU: 0 PID: 9464 at lib/refcount.c:22 refcount_warn_saturate+0x138/0x1f0 lib/refcount.c:22
+Kernel panic - not syncing: panic_on_warn set ...
+CPU: 0 PID: 9464 Comm: syz-executor850 Not tainted 5.4.0-syzkaller #0
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
+Call Trace:
+ __dump_stack lib/dump_stack.c:77 [inline]
+ dump_stack+0x197/0x210 lib/dump_stack.c:118
+ panic+0x2e3/0x75c kernel/panic.c:221
+ __warn.cold+0x2f/0x3e kernel/panic.c:582
+ report_bug+0x289/0x300 lib/bug.c:195
+ fixup_bug arch/x86/kernel/traps.c:174 [inline]
+ fixup_bug arch/x86/kernel/traps.c:169 [inline]
+ do_error_trap+0x11b/0x200 arch/x86/kernel/traps.c:267
+ do_invalid_op+0x37/0x50 arch/x86/kernel/traps.c:286
+ invalid_op+0x23/0x30 arch/x86/entry/entry_64.S:1027
+RIP: 0010:refcount_warn_saturate+0x138/0x1f0 lib/refcount.c:22
+Code: 06 31 ff 89 de e8 c8 f5 e6 fd 84 db 0f 85 6f ff ff ff e8 7b f4 e6 fd 48 c7 c7 e0 71 4f 88 c6 05 56 a6 a4 06 01 e8 c7 a8 b7 fd <0f> 0b e9 50 ff ff ff e8 5c f4 e6 fd 0f b6 1d 3d a6 a4 06 31 ff 89
+RSP: 0018:ffff88809689f550 EFLAGS: 00010286
+RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000
+RDX: 0000000000000000 RSI: ffffffff815e4336 RDI: ffffed1012d13e9c
+RBP: ffff88809689f560 R08: ffff88809c50a3c0 R09: fffffbfff15d31b1
+R10: fffffbfff15d31b0 R11: ffffffff8ae98d87 R12: 0000000000000001
+R13: 0000000000040100 R14: ffff888099041104 R15: ffff888218d96e40
+ refcount_add include/linux/refcount.h:193 [inline]
+ skb_set_owner_w+0x2b6/0x410 net/core/sock.c:1999
+ sock_wmalloc+0xf1/0x120 net/core/sock.c:2096
+ ip_append_page+0x7ef/0x1190 net/ipv4/ip_output.c:1383
+ udp_sendpage+0x1c7/0x480 net/ipv4/udp.c:1276
+ inet_sendpage+0xdb/0x150 net/ipv4/af_inet.c:821
+ kernel_sendpage+0x92/0xf0 net/socket.c:3794
+ sock_sendpage+0x8b/0xc0 net/socket.c:936
+ pipe_to_sendpage+0x2da/0x3c0 fs/splice.c:458
+ splice_from_pipe_feed fs/splice.c:512 [inline]
+ __splice_from_pipe+0x3ee/0x7c0 fs/splice.c:636
+ splice_from_pipe+0x108/0x170 fs/splice.c:671
+ generic_splice_sendpage+0x3c/0x50 fs/splice.c:842
+ do_splice_from fs/splice.c:861 [inline]
+ direct_splice_actor+0x123/0x190 fs/splice.c:1035
+ splice_direct_to_actor+0x3b4/0xa30 fs/splice.c:990
+ do_splice_direct+0x1da/0x2a0 fs/splice.c:1078
+ do_sendfile+0x597/0xd00 fs/read_write.c:1464
+ __do_sys_sendfile64 fs/read_write.c:1525 [inline]
+ __se_sys_sendfile64 fs/read_write.c:1511 [inline]
+ __x64_sys_sendfile64+0x1dd/0x220 fs/read_write.c:1511
+ do_syscall_64+0xfa/0x790 arch/x86/entry/common.c:294
+ entry_SYSCALL_64_after_hwframe+0x49/0xbe
+RIP: 0033:0x441409
+Code: e8 ac e8 ff ff 48 83 c4 18 c3 0f 1f 80 00 00 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 eb 08 fc ff c3 66 2e 0f 1f 84 00 00 00 00
+RSP: 002b:00007fffb64c4f78 EFLAGS: 00000246 ORIG_RAX: 0000000000000028
+RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 0000000000441409
+RDX: 0000000000000000 RSI: 0000000000000006 RDI: 0000000000000005
+RBP: 0000000000073b8a R08: 0000000000000010 R09: 0000000000000010
+R10: 0000000000010001 R11: 0000000000000246 R12: 0000000000402180
+R13: 0000000000402210 R14: 0000000000000000 R15: 0000000000000000
+Kernel Offset: disabled
+Rebooting in 86400 seconds..
+
+Fixes: 1470ddf7f8ce ("inet: Remove explicit write references to sk/inet in ip_append_data")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: syzbot <syzkaller@googlegroups.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/netdevice.h |    5 +++++
+ include/net/ip.h          |    5 +++++
+ net/core/dev.c            |    3 ++-
+ net/ipv4/devinet.c        |    5 -----
+ net/ipv4/ip_output.c      |   13 ++++++++-----
+ 5 files changed, 20 insertions(+), 11 deletions(-)
+
+--- a/include/linux/netdevice.h
++++ b/include/linux/netdevice.h
+@@ -1867,6 +1867,11 @@ struct net_device {
+       unsigned char           if_port;
+       unsigned char           dma;
++      /* Note : dev->mtu is often read without holding a lock.
++       * Writers usually hold RTNL.
++       * It is recommended to use READ_ONCE() to annotate the reads,
++       * and to use WRITE_ONCE() to annotate the writes.
++       */
+       unsigned int            mtu;
+       unsigned int            min_mtu;
+       unsigned int            max_mtu;
+--- a/include/net/ip.h
++++ b/include/net/ip.h
+@@ -760,4 +760,9 @@ int ip_misc_proc_init(void);
+ int rtm_getroute_parse_ip_proto(struct nlattr *attr, u8 *ip_proto, u8 family,
+                               struct netlink_ext_ack *extack);
++static inline bool inetdev_valid_mtu(unsigned int mtu)
++{
++      return likely(mtu >= IPV4_MIN_MTU);
++}
++
+ #endif        /* _IP_H */
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -7967,7 +7967,8 @@ int __dev_set_mtu(struct net_device *dev
+       if (ops->ndo_change_mtu)
+               return ops->ndo_change_mtu(dev, new_mtu);
+-      dev->mtu = new_mtu;
++      /* Pairs with all the lockless reads of dev->mtu in the stack */
++      WRITE_ONCE(dev->mtu, new_mtu);
+       return 0;
+ }
+ EXPORT_SYMBOL(__dev_set_mtu);
+--- a/net/ipv4/devinet.c
++++ b/net/ipv4/devinet.c
+@@ -1496,11 +1496,6 @@ skip:
+       }
+ }
+-static bool inetdev_valid_mtu(unsigned int mtu)
+-{
+-      return mtu >= IPV4_MIN_MTU;
+-}
+-
+ static void inetdev_send_gratuitous_arp(struct net_device *dev,
+                                       struct in_device *in_dev)
+--- a/net/ipv4/ip_output.c
++++ b/net/ipv4/ip_output.c
+@@ -1258,15 +1258,18 @@ static int ip_setup_cork(struct sock *sk
+               cork->addr = ipc->addr;
+       }
+-      /*
+-       * We steal reference to this route, caller should not release it
+-       */
+-      *rtp = NULL;
+       cork->fragsize = ip_sk_use_pmtu(sk) ?
+-                       dst_mtu(&rt->dst) : rt->dst.dev->mtu;
++                       dst_mtu(&rt->dst) : READ_ONCE(rt->dst.dev->mtu);
++
++      if (!inetdev_valid_mtu(cork->fragsize))
++              return -ENETUNREACH;
+       cork->gso_size = ipc->gso_size;
++
+       cork->dst = &rt->dst;
++      /* We stole this route, caller should not release it. */
++      *rtp = NULL;
++
+       cork->length = 0;
+       cork->ttl = ipc->ttl;
+       cork->tos = ipc->tos;
diff --git a/queue-5.4/ionic-keep-users-rss-hash-across-lif-reset.patch b/queue-5.4/ionic-keep-users-rss-hash-across-lif-reset.patch
new file mode 100644 (file)
index 0000000..7b8c6a8
--- /dev/null
@@ -0,0 +1,71 @@
+From foo@baz Tue 17 Dec 2019 08:13:44 PM CET
+From: Shannon Nelson <snelson@pensando.io>
+Date: Tue, 3 Dec 2019 14:17:34 -0800
+Subject: ionic: keep users rss hash across lif reset
+
+From: Shannon Nelson <snelson@pensando.io>
+
+[ Upstream commit ffac2027e18f006f42630f2e01a8a9bd8dc664b5 ]
+
+If the user has specified their own RSS hash key, don't
+lose it across queue resets such as DOWN/UP, MTU change,
+and number of channels change.  This is fixed by moving
+the key initialization to a little earlier in the lif
+creation.
+
+Also, let's clean up the RSS config a little better on
+the way down by setting it all to 0.
+
+Fixes: aa3198819bea ("ionic: Add RSS support")
+Signed-off-by: Shannon Nelson <snelson@pensando.io>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/pensando/ionic/ionic_lif.c |   16 ++++++++++------
+ 1 file changed, 10 insertions(+), 6 deletions(-)
+
+--- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c
++++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
+@@ -1364,12 +1364,9 @@ int ionic_lif_rss_config(struct ionic_li
+ static int ionic_lif_rss_init(struct ionic_lif *lif)
+ {
+-      u8 rss_key[IONIC_RSS_HASH_KEY_SIZE];
+       unsigned int tbl_sz;
+       unsigned int i;
+-      netdev_rss_key_fill(rss_key, IONIC_RSS_HASH_KEY_SIZE);
+-
+       lif->rss_types = IONIC_RSS_TYPE_IPV4     |
+                        IONIC_RSS_TYPE_IPV4_TCP |
+                        IONIC_RSS_TYPE_IPV4_UDP |
+@@ -1382,12 +1379,18 @@ static int ionic_lif_rss_init(struct ion
+       for (i = 0; i < tbl_sz; i++)
+               lif->rss_ind_tbl[i] = ethtool_rxfh_indir_default(i, lif->nxqs);
+-      return ionic_lif_rss_config(lif, lif->rss_types, rss_key, NULL);
++      return ionic_lif_rss_config(lif, lif->rss_types, NULL, NULL);
+ }
+-static int ionic_lif_rss_deinit(struct ionic_lif *lif)
++static void ionic_lif_rss_deinit(struct ionic_lif *lif)
+ {
+-      return ionic_lif_rss_config(lif, 0x0, NULL, NULL);
++      int tbl_sz;
++
++      tbl_sz = le16_to_cpu(lif->ionic->ident.lif.eth.rss_ind_tbl_sz);
++      memset(lif->rss_ind_tbl, 0, tbl_sz);
++      memset(lif->rss_hash_key, 0, IONIC_RSS_HASH_KEY_SIZE);
++
++      ionic_lif_rss_config(lif, 0x0, NULL, NULL);
+ }
+ static void ionic_txrx_disable(struct ionic_lif *lif)
+@@ -1710,6 +1713,7 @@ static struct ionic_lif *ionic_lif_alloc
+               dev_err(dev, "Failed to allocate rss indirection table, aborting\n");
+               goto err_out_free_qcqs;
+       }
++      netdev_rss_key_fill(lif->rss_hash_key, IONIC_RSS_HASH_KEY_SIZE);
+       list_add_tail(&lif->list, &ionic->lifs);
diff --git a/queue-5.4/mqprio-fix-out-of-bounds-access-in-mqprio_dump.patch b/queue-5.4/mqprio-fix-out-of-bounds-access-in-mqprio_dump.patch
new file mode 100644 (file)
index 0000000..ec33728
--- /dev/null
@@ -0,0 +1,42 @@
+From foo@baz Tue 17 Dec 2019 08:13:44 PM CET
+From: Vladyslav Tarasiuk <vladyslavt@mellanox.com>
+Date: Fri, 6 Dec 2019 13:51:05 +0000
+Subject: mqprio: Fix out-of-bounds access in mqprio_dump
+
+From: Vladyslav Tarasiuk <vladyslavt@mellanox.com>
+
+[ Upstream commit 9f104c7736904ac72385bbb48669e0c923ca879b ]
+
+When user runs a command like
+tc qdisc add dev eth1 root mqprio
+KASAN stack-out-of-bounds warning is emitted.
+Currently, NLA_ALIGN macro used in mqprio_dump provides too large
+buffer size as argument for nla_put and memcpy down the call stack.
+The flow looks like this:
+1. nla_put expects exact object size as an argument;
+2. Later it provides this size to memcpy;
+3. To calculate correct padding for SKB, nla_put applies NLA_ALIGN
+   macro itself.
+
+Therefore, NLA_ALIGN should not be applied to the nla_put parameter.
+Otherwise it will lead to out-of-bounds memory access in memcpy.
+
+Fixes: 4e8b86c06269 ("mqprio: Introduce new hardware offload mode and shaper in mqprio")
+Signed-off-by: Vladyslav Tarasiuk <vladyslavt@mellanox.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sched/sch_mqprio.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/sched/sch_mqprio.c
++++ b/net/sched/sch_mqprio.c
+@@ -433,7 +433,7 @@ static int mqprio_dump(struct Qdisc *sch
+               opt.offset[tc] = dev->tc_to_txq[tc].offset;
+       }
+-      if (nla_put(skb, TCA_OPTIONS, NLA_ALIGN(sizeof(opt)), &opt))
++      if (nla_put(skb, TCA_OPTIONS, sizeof(opt), &opt))
+               goto nla_put_failure;
+       if ((priv->flags & TC_MQPRIO_F_MODE) &&
diff --git a/queue-5.4/net-bridge-deny-dev_set_mac_address-when-unregistering.patch b/queue-5.4/net-bridge-deny-dev_set_mac_address-when-unregistering.patch
new file mode 100644 (file)
index 0000000..c1ecb2e
--- /dev/null
@@ -0,0 +1,76 @@
+From foo@baz Tue 17 Dec 2019 08:13:44 PM CET
+From: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
+Date: Tue, 3 Dec 2019 16:48:06 +0200
+Subject: net: bridge: deny dev_set_mac_address() when unregistering
+
+From: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
+
+[ Upstream commit c4b4c421857dc7b1cf0dccbd738472360ff2cd70 ]
+
+We have an interesting memory leak in the bridge when it is being
+unregistered and is a slave to a master device which would change the
+mac of its slaves on unregister (e.g. bond, team). This is a very
+unusual setup but we do end up leaking 1 fdb entry because
+dev_set_mac_address() would cause the bridge to insert the new mac address
+into its table after all fdbs are flushed, i.e. after dellink() on the
+bridge has finished and we call NETDEV_UNREGISTER the bond/team would
+release it and will call dev_set_mac_address() to restore its original
+address and that in turn will add an fdb in the bridge.
+One fix is to check for the bridge dev's reg_state in its
+ndo_set_mac_address callback and return an error if the bridge is not in
+NETREG_REGISTERED.
+
+Easy steps to reproduce:
+ 1. add bond in mode != A/B
+ 2. add any slave to the bond
+ 3. add bridge dev as a slave to the bond
+ 4. destroy the bridge device
+
+Trace:
+ unreferenced object 0xffff888035c4d080 (size 128):
+   comm "ip", pid 4068, jiffies 4296209429 (age 1413.753s)
+   hex dump (first 32 bytes):
+     41 1d c9 36 80 88 ff ff 00 00 00 00 00 00 00 00  A..6............
+     d2 19 c9 5e 3f d7 00 00 00 00 00 00 00 00 00 00  ...^?...........
+   backtrace:
+     [<00000000ddb525dc>] kmem_cache_alloc+0x155/0x26f
+     [<00000000633ff1e0>] fdb_create+0x21/0x486 [bridge]
+     [<0000000092b17e9c>] fdb_insert+0x91/0xdc [bridge]
+     [<00000000f2a0f0ff>] br_fdb_change_mac_address+0xb3/0x175 [bridge]
+     [<000000001de02dbd>] br_stp_change_bridge_id+0xf/0xff [bridge]
+     [<00000000ac0e32b1>] br_set_mac_address+0x76/0x99 [bridge]
+     [<000000006846a77f>] dev_set_mac_address+0x63/0x9b
+     [<00000000d30738fc>] __bond_release_one+0x3f6/0x455 [bonding]
+     [<00000000fc7ec01d>] bond_netdev_event+0x2f2/0x400 [bonding]
+     [<00000000305d7795>] notifier_call_chain+0x38/0x56
+     [<0000000028885d4a>] call_netdevice_notifiers+0x1e/0x23
+     [<000000008279477b>] rollback_registered_many+0x353/0x6a4
+     [<0000000018ef753a>] unregister_netdevice_many+0x17/0x6f
+     [<00000000ba854b7a>] rtnl_delete_link+0x3c/0x43
+     [<00000000adf8618d>] rtnl_dellink+0x1dc/0x20a
+     [<000000009b6395fd>] rtnetlink_rcv_msg+0x23d/0x268
+
+Fixes: 43598813386f ("bridge: add local MAC address to forwarding table (v2)")
+Reported-by: syzbot+2add91c08eb181fea1bf@syzkaller.appspotmail.com
+Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/bridge/br_device.c |    6 ++++++
+ 1 file changed, 6 insertions(+)
+
+--- a/net/bridge/br_device.c
++++ b/net/bridge/br_device.c
+@@ -245,6 +245,12 @@ static int br_set_mac_address(struct net
+       if (!is_valid_ether_addr(addr->sa_data))
+               return -EADDRNOTAVAIL;
++      /* dev_set_mac_addr() can be called by a master device on bridge's
++       * NETDEV_UNREGISTER, but since it's being destroyed do nothing
++       */
++      if (dev->reg_state != NETREG_REGISTERED)
++              return -EBUSY;
++
+       spin_lock_bh(&br->lock);
+       if (!ether_addr_equal(dev->dev_addr, addr->sa_data)) {
+               /* Mac address will be changed in br_stp_change_bridge_id(). */
diff --git a/queue-5.4/net-core-rename-indirect-block-ingress-cb-function.patch b/queue-5.4/net-core-rename-indirect-block-ingress-cb-function.patch
new file mode 100644 (file)
index 0000000..41a7ede
--- /dev/null
@@ -0,0 +1,205 @@
+From foo@baz Tue 17 Dec 2019 08:13:44 PM CET
+From: John Hurley <john.hurley@netronome.com>
+Date: Thu, 5 Dec 2019 17:03:34 +0000
+Subject: net: core: rename indirect block ingress cb function
+
+From: John Hurley <john.hurley@netronome.com>
+
+[ Upstream commit dbad3408896c3c5722ec9cda065468b3df16c5bf ]
+
+With indirect blocks, a driver can register for callbacks from a device
+that is does not 'own', for example, a tunnel device. When registering to
+or unregistering from a new device, a callback is triggered to generate
+a bind/unbind event. This, in turn, allows the driver to receive any
+existing rules or to properly clean up installed rules.
+
+When first added, it was assumed that all indirect block registrations
+would be for ingress offloads. However, the NFP driver can, in some
+instances, support clsact qdisc binds for egress offload.
+
+Change the name of the indirect block callback command in flow_offload to
+remove the 'ingress' identifier from it. While this does not change
+functionality, a follow up patch will implement a more more generic
+callback than just those currently just supporting ingress offload.
+
+Fixes: 4d12ba42787b ("nfp: flower: allow offloading of matches on 'internal' ports")
+Signed-off-by: John Hurley <john.hurley@netronome.com>
+Acked-by: Jakub Kicinski <jakub.kicinski@netronome.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/flow_offload.h        |   15 +++++-------
+ net/core/flow_offload.c           |   45 ++++++++++++++++++--------------------
+ net/netfilter/nf_tables_offload.c |    6 ++---
+ net/sched/cls_api.c               |    4 +--
+ 4 files changed, 34 insertions(+), 36 deletions(-)
+
+--- a/include/net/flow_offload.h
++++ b/include/net/flow_offload.h
+@@ -380,19 +380,18 @@ static inline void flow_block_init(struc
+ typedef int flow_indr_block_bind_cb_t(struct net_device *dev, void *cb_priv,
+                                     enum tc_setup_type type, void *type_data);
+-typedef void flow_indr_block_ing_cmd_t(struct net_device *dev,
+-                                      flow_indr_block_bind_cb_t *cb,
+-                                      void *cb_priv,
+-                                      enum flow_block_command command);
++typedef void flow_indr_block_cmd_t(struct net_device *dev,
++                                 flow_indr_block_bind_cb_t *cb, void *cb_priv,
++                                 enum flow_block_command command);
+-struct flow_indr_block_ing_entry {
+-      flow_indr_block_ing_cmd_t *cb;
++struct flow_indr_block_entry {
++      flow_indr_block_cmd_t *cb;
+       struct list_head        list;
+ };
+-void flow_indr_add_block_ing_cb(struct flow_indr_block_ing_entry *entry);
++void flow_indr_add_block_cb(struct flow_indr_block_entry *entry);
+-void flow_indr_del_block_ing_cb(struct flow_indr_block_ing_entry *entry);
++void flow_indr_del_block_cb(struct flow_indr_block_entry *entry);
+ int __flow_indr_block_cb_register(struct net_device *dev, void *cb_priv,
+                                 flow_indr_block_bind_cb_t *cb,
+--- a/net/core/flow_offload.c
++++ b/net/core/flow_offload.c
+@@ -283,7 +283,7 @@ int flow_block_cb_setup_simple(struct fl
+ }
+ EXPORT_SYMBOL(flow_block_cb_setup_simple);
+-static LIST_HEAD(block_ing_cb_list);
++static LIST_HEAD(block_cb_list);
+ static struct rhashtable indr_setup_block_ht;
+@@ -391,20 +391,19 @@ static void flow_indr_block_cb_del(struc
+       kfree(indr_block_cb);
+ }
+-static DEFINE_MUTEX(flow_indr_block_ing_cb_lock);
++static DEFINE_MUTEX(flow_indr_block_cb_lock);
+-static void flow_block_ing_cmd(struct net_device *dev,
+-                             flow_indr_block_bind_cb_t *cb,
+-                             void *cb_priv,
+-                             enum flow_block_command command)
++static void flow_block_cmd(struct net_device *dev,
++                         flow_indr_block_bind_cb_t *cb, void *cb_priv,
++                         enum flow_block_command command)
+ {
+-      struct flow_indr_block_ing_entry *entry;
++      struct flow_indr_block_entry *entry;
+-      mutex_lock(&flow_indr_block_ing_cb_lock);
+-      list_for_each_entry(entry, &block_ing_cb_list, list) {
++      mutex_lock(&flow_indr_block_cb_lock);
++      list_for_each_entry(entry, &block_cb_list, list) {
+               entry->cb(dev, cb, cb_priv, command);
+       }
+-      mutex_unlock(&flow_indr_block_ing_cb_lock);
++      mutex_unlock(&flow_indr_block_cb_lock);
+ }
+ int __flow_indr_block_cb_register(struct net_device *dev, void *cb_priv,
+@@ -424,8 +423,8 @@ int __flow_indr_block_cb_register(struct
+       if (err)
+               goto err_dev_put;
+-      flow_block_ing_cmd(dev, indr_block_cb->cb, indr_block_cb->cb_priv,
+-                         FLOW_BLOCK_BIND);
++      flow_block_cmd(dev, indr_block_cb->cb, indr_block_cb->cb_priv,
++                     FLOW_BLOCK_BIND);
+       return 0;
+@@ -464,8 +463,8 @@ void __flow_indr_block_cb_unregister(str
+       if (!indr_block_cb)
+               return;
+-      flow_block_ing_cmd(dev, indr_block_cb->cb, indr_block_cb->cb_priv,
+-                         FLOW_BLOCK_UNBIND);
++      flow_block_cmd(dev, indr_block_cb->cb, indr_block_cb->cb_priv,
++                     FLOW_BLOCK_UNBIND);
+       flow_indr_block_cb_del(indr_block_cb);
+       flow_indr_block_dev_put(indr_dev);
+@@ -499,21 +498,21 @@ void flow_indr_block_call(struct net_dev
+ }
+ EXPORT_SYMBOL_GPL(flow_indr_block_call);
+-void flow_indr_add_block_ing_cb(struct flow_indr_block_ing_entry *entry)
++void flow_indr_add_block_cb(struct flow_indr_block_entry *entry)
+ {
+-      mutex_lock(&flow_indr_block_ing_cb_lock);
+-      list_add_tail(&entry->list, &block_ing_cb_list);
+-      mutex_unlock(&flow_indr_block_ing_cb_lock);
++      mutex_lock(&flow_indr_block_cb_lock);
++      list_add_tail(&entry->list, &block_cb_list);
++      mutex_unlock(&flow_indr_block_cb_lock);
+ }
+-EXPORT_SYMBOL_GPL(flow_indr_add_block_ing_cb);
++EXPORT_SYMBOL_GPL(flow_indr_add_block_cb);
+-void flow_indr_del_block_ing_cb(struct flow_indr_block_ing_entry *entry)
++void flow_indr_del_block_cb(struct flow_indr_block_entry *entry)
+ {
+-      mutex_lock(&flow_indr_block_ing_cb_lock);
++      mutex_lock(&flow_indr_block_cb_lock);
+       list_del(&entry->list);
+-      mutex_unlock(&flow_indr_block_ing_cb_lock);
++      mutex_unlock(&flow_indr_block_cb_lock);
+ }
+-EXPORT_SYMBOL_GPL(flow_indr_del_block_ing_cb);
++EXPORT_SYMBOL_GPL(flow_indr_del_block_cb);
+ static int __init init_flow_indr_rhashtable(void)
+ {
+--- a/net/netfilter/nf_tables_offload.c
++++ b/net/netfilter/nf_tables_offload.c
+@@ -455,7 +455,7 @@ static int nft_offload_netdev_event(stru
+       return NOTIFY_DONE;
+ }
+-static struct flow_indr_block_ing_entry block_ing_entry = {
++static struct flow_indr_block_entry block_ing_entry = {
+       .cb     = nft_indr_block_cb,
+       .list   = LIST_HEAD_INIT(block_ing_entry.list),
+ };
+@@ -472,13 +472,13 @@ int nft_offload_init(void)
+       if (err < 0)
+               return err;
+-      flow_indr_add_block_ing_cb(&block_ing_entry);
++      flow_indr_add_block_cb(&block_ing_entry);
+       return 0;
+ }
+ void nft_offload_exit(void)
+ {
+-      flow_indr_del_block_ing_cb(&block_ing_entry);
++      flow_indr_del_block_cb(&block_ing_entry);
+       unregister_netdevice_notifier(&nft_offload_netdev_notifier);
+ }
+--- a/net/sched/cls_api.c
++++ b/net/sched/cls_api.c
+@@ -3632,7 +3632,7 @@ static struct pernet_operations tcf_net_
+       .size = sizeof(struct tcf_net),
+ };
+-static struct flow_indr_block_ing_entry block_ing_entry = {
++static struct flow_indr_block_entry block_ing_entry = {
+       .cb = tc_indr_block_get_and_ing_cmd,
+       .list = LIST_HEAD_INIT(block_ing_entry.list),
+ };
+@@ -3649,7 +3649,7 @@ static int __init tc_filter_init(void)
+       if (err)
+               goto err_register_pernet_subsys;
+-      flow_indr_add_block_ing_cb(&block_ing_entry);
++      flow_indr_add_block_cb(&block_ing_entry);
+       rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_new_tfilter, NULL,
+                     RTNL_FLAG_DOIT_UNLOCKED);
diff --git a/queue-5.4/net-dsa-fix-flow-dissection-on-tx-path.patch b/queue-5.4/net-dsa-fix-flow-dissection-on-tx-path.patch
new file mode 100644 (file)
index 0000000..2885df9
--- /dev/null
@@ -0,0 +1,114 @@
+From foo@baz Tue 17 Dec 2019 08:13:44 PM CET
+From: Alexander Lobakin <alobakin@dlink.ru>
+Date: Thu, 5 Dec 2019 13:02:35 +0300
+Subject: net: dsa: fix flow dissection on Tx path
+
+From: Alexander Lobakin <alobakin@dlink.ru>
+
+[ Upstream commit 8bef0af09a5415df761b04fa487a6c34acae74bc ]
+
+Commit 43e665287f93 ("net-next: dsa: fix flow dissection") added an
+ability to override protocol and network offset during flow dissection
+for DSA-enabled devices (i.e. controllers shipped as switch CPU ports)
+in order to fix skb hashing for RPS on Rx path.
+
+However, skb_hash() and added part of code can be invoked not only on
+Rx, but also on Tx path if we have a multi-queued device and:
+ - kernel is running on UP system or
+ - XPS is not configured.
+
+The call stack in this two cases will be like: dev_queue_xmit() ->
+__dev_queue_xmit() -> netdev_core_pick_tx() -> netdev_pick_tx() ->
+skb_tx_hash() -> skb_get_hash().
+
+The problem is that skbs queued for Tx have both network offset and
+correct protocol already set up even after inserting a CPU tag by DSA
+tagger, so calling tag_ops->flow_dissect() on this path actually only
+breaks flow dissection and hashing.
+
+This can be observed by adding debug prints just before and right after
+tag_ops->flow_dissect() call to the related block of code:
+
+Before the patch:
+
+Rx path (RPS):
+
+[   19.240001] Rx: proto: 0x00f8, nhoff: 0     /* ETH_P_XDSA */
+[   19.244271] tag_ops->flow_dissect()
+[   19.247811] Rx: proto: 0x0800, nhoff: 8     /* ETH_P_IP */
+
+[   19.215435] Rx: proto: 0x00f8, nhoff: 0     /* ETH_P_XDSA */
+[   19.219746] tag_ops->flow_dissect()
+[   19.223241] Rx: proto: 0x0806, nhoff: 8     /* ETH_P_ARP */
+
+[   18.654057] Rx: proto: 0x00f8, nhoff: 0     /* ETH_P_XDSA */
+[   18.658332] tag_ops->flow_dissect()
+[   18.661826] Rx: proto: 0x8100, nhoff: 8     /* ETH_P_8021Q */
+
+Tx path (UP system):
+
+[   18.759560] Tx: proto: 0x0800, nhoff: 26    /* ETH_P_IP */
+[   18.763933] tag_ops->flow_dissect()
+[   18.767485] Tx: proto: 0x920b, nhoff: 34    /* junk */
+
+[   22.800020] Tx: proto: 0x0806, nhoff: 26    /* ETH_P_ARP */
+[   22.804392] tag_ops->flow_dissect()
+[   22.807921] Tx: proto: 0x920b, nhoff: 34    /* junk */
+
+[   16.898342] Tx: proto: 0x86dd, nhoff: 26    /* ETH_P_IPV6 */
+[   16.902705] tag_ops->flow_dissect()
+[   16.906227] Tx: proto: 0x920b, nhoff: 34    /* junk */
+
+After:
+
+Rx path (RPS):
+
+[   16.520993] Rx: proto: 0x00f8, nhoff: 0     /* ETH_P_XDSA */
+[   16.525260] tag_ops->flow_dissect()
+[   16.528808] Rx: proto: 0x0800, nhoff: 8     /* ETH_P_IP */
+
+[   15.484807] Rx: proto: 0x00f8, nhoff: 0     /* ETH_P_XDSA */
+[   15.490417] tag_ops->flow_dissect()
+[   15.495223] Rx: proto: 0x0806, nhoff: 8     /* ETH_P_ARP */
+
+[   17.134621] Rx: proto: 0x00f8, nhoff: 0     /* ETH_P_XDSA */
+[   17.138895] tag_ops->flow_dissect()
+[   17.142388] Rx: proto: 0x8100, nhoff: 8     /* ETH_P_8021Q */
+
+Tx path (UP system):
+
+[   15.499558] Tx: proto: 0x0800, nhoff: 26    /* ETH_P_IP */
+
+[   20.664689] Tx: proto: 0x0806, nhoff: 26    /* ETH_P_ARP */
+
+[   18.565782] Tx: proto: 0x86dd, nhoff: 26    /* ETH_P_IPV6 */
+
+In order to fix that we can add the check 'proto == htons(ETH_P_XDSA)'
+to prevent code from calling tag_ops->flow_dissect() on Tx.
+I also decided to initialize 'offset' variable so tagger callbacks can
+now safely leave it untouched without provoking a chaos.
+
+Fixes: 43e665287f93 ("net-next: dsa: fix flow dissection")
+Signed-off-by: Alexander Lobakin <alobakin@dlink.ru>
+Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/flow_dissector.c |    5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+--- a/net/core/flow_dissector.c
++++ b/net/core/flow_dissector.c
+@@ -870,9 +870,10 @@ bool __skb_flow_dissect(const struct net
+               nhoff = skb_network_offset(skb);
+               hlen = skb_headlen(skb);
+ #if IS_ENABLED(CONFIG_NET_DSA)
+-              if (unlikely(skb->dev && netdev_uses_dsa(skb->dev))) {
++              if (unlikely(skb->dev && netdev_uses_dsa(skb->dev) &&
++                           proto == htons(ETH_P_XDSA))) {
+                       const struct dsa_device_ops *ops;
+-                      int offset;
++                      int offset = 0;
+                       ops = skb->dev->dsa_ptr->tag_ops;
+                       if (ops->flow_dissect &&
diff --git a/queue-5.4/net-ethernet-ti-cpsw-fix-extra-rx-interrupt.patch b/queue-5.4/net-ethernet-ti-cpsw-fix-extra-rx-interrupt.patch
new file mode 100644 (file)
index 0000000..c8c17cd
--- /dev/null
@@ -0,0 +1,36 @@
+From foo@baz Tue 17 Dec 2019 08:13:44 PM CET
+From: Grygorii Strashko <grygorii.strashko@ti.com>
+Date: Fri, 6 Dec 2019 14:28:20 +0200
+Subject: net: ethernet: ti: cpsw: fix extra rx interrupt
+
+From: Grygorii Strashko <grygorii.strashko@ti.com>
+
+[ Upstream commit 51302f77bedab8768b761ed1899c08f89af9e4e2 ]
+
+Now RX interrupt is triggered twice every time, because in
+cpsw_rx_interrupt() it is asked first and then disabled. So there will be
+pending interrupt always, when RX interrupt is enabled again in NAPI
+handler.
+
+Fix it by first disabling IRQ and then do ask.
+
+Fixes: 870915feabdc ("drivers: net: cpsw: remove disable_irq/enable_irq as irq can be masked from cpsw itself")
+Signed-off-by: Grygorii Strashko <grygorii.strashko@ti.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/ti/cpsw.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/ti/cpsw.c
++++ b/drivers/net/ethernet/ti/cpsw.c
+@@ -890,8 +890,8 @@ static irqreturn_t cpsw_rx_interrupt(int
+ {
+       struct cpsw_common *cpsw = dev_id;
+-      cpdma_ctlr_eoi(cpsw->dma, CPDMA_EOI_RX);
+       writel(0, &cpsw->wr_regs->rx_en);
++      cpdma_ctlr_eoi(cpsw->dma, CPDMA_EOI_RX);
+       if (cpsw->quirk_irq) {
+               disable_irq_nosync(cpsw->irqs_table[0]);
diff --git a/queue-5.4/net-fixed-updating-of-ethertype-in-skb_mpls_push.patch b/queue-5.4/net-fixed-updating-of-ethertype-in-skb_mpls_push.patch
new file mode 100644 (file)
index 0000000..388dac1
--- /dev/null
@@ -0,0 +1,87 @@
+From foo@baz Tue 17 Dec 2019 08:13:44 PM CET
+From: Martin Varghese <martin.varghese@nokia.com>
+Date: Thu, 5 Dec 2019 05:57:22 +0530
+Subject: net: Fixed updating of ethertype in skb_mpls_push()
+
+From: Martin Varghese <martin.varghese@nokia.com>
+
+[ Upstream commit d04ac224b1688f005a84f764cfe29844f8e9da08 ]
+
+The skb_mpls_push was not updating ethertype of an ethernet packet if
+the packet was originally received from a non ARPHRD_ETHER device.
+
+In the below OVS data path flow, since the device corresponding to
+port 7 is an l3 device (ARPHRD_NONE) the skb_mpls_push function does
+not update the ethertype of the packet even though the previous
+push_eth action had added an ethernet header to the packet.
+
+recirc_id(0),in_port(7),eth_type(0x0800),ipv4(tos=0/0xfc,ttl=64,frag=no),
+actions:push_eth(src=00:00:00:00:00:00,dst=00:00:00:00:00:00),
+push_mpls(label=13,tc=0,ttl=64,bos=1,eth_type=0x8847),4
+
+Fixes: 8822e270d697 ("net: core: move push MPLS functionality from OvS to core helper")
+Signed-off-by: Martin Varghese <martin.varghese@nokia.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/skbuff.h    |    2 +-
+ net/core/skbuff.c         |    4 ++--
+ net/openvswitch/actions.c |    3 ++-
+ net/sched/act_mpls.c      |    3 ++-
+ 4 files changed, 7 insertions(+), 5 deletions(-)
+
+--- a/include/linux/skbuff.h
++++ b/include/linux/skbuff.h
+@@ -3527,7 +3527,7 @@ int __skb_vlan_pop(struct sk_buff *skb,
+ int skb_vlan_pop(struct sk_buff *skb);
+ int skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci);
+ int skb_mpls_push(struct sk_buff *skb, __be32 mpls_lse, __be16 mpls_proto,
+-                int mac_len);
++                int mac_len, bool ethernet);
+ int skb_mpls_pop(struct sk_buff *skb, __be16 next_proto, int mac_len,
+                bool ethernet);
+ int skb_mpls_update_lse(struct sk_buff *skb, __be32 mpls_lse);
+--- a/net/core/skbuff.c
++++ b/net/core/skbuff.c
+@@ -5484,7 +5484,7 @@ static void skb_mod_eth_type(struct sk_b
+  * Returns 0 on success, -errno otherwise.
+  */
+ int skb_mpls_push(struct sk_buff *skb, __be32 mpls_lse, __be16 mpls_proto,
+-                int mac_len)
++                int mac_len, bool ethernet)
+ {
+       struct mpls_shim_hdr *lse;
+       int err;
+@@ -5515,7 +5515,7 @@ int skb_mpls_push(struct sk_buff *skb, _
+       lse->label_stack_entry = mpls_lse;
+       skb_postpush_rcsum(skb, lse, MPLS_HLEN);
+-      if (skb->dev && skb->dev->type == ARPHRD_ETHER)
++      if (ethernet)
+               skb_mod_eth_type(skb, eth_hdr(skb), mpls_proto);
+       skb->protocol = mpls_proto;
+--- a/net/openvswitch/actions.c
++++ b/net/openvswitch/actions.c
+@@ -166,7 +166,8 @@ static int push_mpls(struct sk_buff *skb
+       int err;
+       err = skb_mpls_push(skb, mpls->mpls_lse, mpls->mpls_ethertype,
+-                          skb->mac_len);
++                          skb->mac_len,
++                          ovs_key_mac_proto(key) == MAC_PROTO_ETHERNET);
+       if (err)
+               return err;
+--- a/net/sched/act_mpls.c
++++ b/net/sched/act_mpls.c
+@@ -83,7 +83,8 @@ static int tcf_mpls_act(struct sk_buff *
+               break;
+       case TCA_MPLS_ACT_PUSH:
+               new_lse = tcf_mpls_get_lse(NULL, p, !eth_p_mpls(skb->protocol));
+-              if (skb_mpls_push(skb, new_lse, p->tcfm_proto, mac_len))
++              if (skb_mpls_push(skb, new_lse, p->tcfm_proto, mac_len,
++                                skb->dev && skb->dev->type == ARPHRD_ETHER))
+                       goto drop;
+               break;
+       case TCA_MPLS_ACT_MODIFY:
diff --git a/queue-5.4/net-ipv6-add-net-argument-to-ip6_dst_lookup_flow.patch b/queue-5.4/net-ipv6-add-net-argument-to-ip6_dst_lookup_flow.patch
new file mode 100644 (file)
index 0000000..8519708
--- /dev/null
@@ -0,0 +1,220 @@
+From foo@baz Tue 17 Dec 2019 08:13:44 PM CET
+From: Sabrina Dubroca <sd@queasysnail.net>
+Date: Wed, 4 Dec 2019 15:35:52 +0100
+Subject: net: ipv6: add net argument to ip6_dst_lookup_flow
+
+From: Sabrina Dubroca <sd@queasysnail.net>
+
+[ Upstream commit c4e85f73afb6384123e5ef1bba3315b2e3ad031e ]
+
+This will be used in the conversion of ipv6_stub to ip6_dst_lookup_flow,
+as some modules currently pass a net argument without a socket to
+ip6_dst_lookup. This is equivalent to commit 343d60aada5a ("ipv6: change
+ipv6_stub_impl.ipv6_dst_lookup to take net argument").
+
+Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/ipv6.h               |    2 +-
+ net/dccp/ipv6.c                  |    6 +++---
+ net/ipv6/af_inet6.c              |    2 +-
+ net/ipv6/datagram.c              |    2 +-
+ net/ipv6/inet6_connection_sock.c |    4 ++--
+ net/ipv6/ip6_output.c            |    8 ++++----
+ net/ipv6/raw.c                   |    2 +-
+ net/ipv6/syncookies.c            |    2 +-
+ net/ipv6/tcp_ipv6.c              |    4 ++--
+ net/l2tp/l2tp_ip6.c              |    2 +-
+ net/sctp/ipv6.c                  |    4 ++--
+ 11 files changed, 19 insertions(+), 19 deletions(-)
+
+--- a/include/net/ipv6.h
++++ b/include/net/ipv6.h
+@@ -1017,7 +1017,7 @@ static inline struct sk_buff *ip6_finish
+ int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst,
+                  struct flowi6 *fl6);
+-struct dst_entry *ip6_dst_lookup_flow(const struct sock *sk, struct flowi6 *fl6,
++struct dst_entry *ip6_dst_lookup_flow(struct net *net, const struct sock *sk, struct flowi6 *fl6,
+                                     const struct in6_addr *final_dst);
+ struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
+                                        const struct in6_addr *final_dst,
+--- a/net/dccp/ipv6.c
++++ b/net/dccp/ipv6.c
+@@ -210,7 +210,7 @@ static int dccp_v6_send_response(const s
+       final_p = fl6_update_dst(&fl6, rcu_dereference(np->opt), &final);
+       rcu_read_unlock();
+-      dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
++      dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p);
+       if (IS_ERR(dst)) {
+               err = PTR_ERR(dst);
+               dst = NULL;
+@@ -282,7 +282,7 @@ static void dccp_v6_ctl_send_reset(const
+       security_skb_classify_flow(rxskb, flowi6_to_flowi(&fl6));
+       /* sk = NULL, but it is safe for now. RST socket required. */
+-      dst = ip6_dst_lookup_flow(ctl_sk, &fl6, NULL);
++      dst = ip6_dst_lookup_flow(sock_net(ctl_sk), ctl_sk, &fl6, NULL);
+       if (!IS_ERR(dst)) {
+               skb_dst_set(skb, dst);
+               ip6_xmit(ctl_sk, skb, &fl6, 0, NULL, 0, 0);
+@@ -912,7 +912,7 @@ static int dccp_v6_connect(struct sock *
+       opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk));
+       final_p = fl6_update_dst(&fl6, opt, &final);
+-      dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
++      dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p);
+       if (IS_ERR(dst)) {
+               err = PTR_ERR(dst);
+               goto failure;
+--- a/net/ipv6/af_inet6.c
++++ b/net/ipv6/af_inet6.c
+@@ -765,7 +765,7 @@ int inet6_sk_rebuild_header(struct sock
+                                        &final);
+               rcu_read_unlock();
+-              dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
++              dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p);
+               if (IS_ERR(dst)) {
+                       sk->sk_route_caps = 0;
+                       sk->sk_err_soft = -PTR_ERR(dst);
+--- a/net/ipv6/datagram.c
++++ b/net/ipv6/datagram.c
+@@ -85,7 +85,7 @@ int ip6_datagram_dst_update(struct sock
+       final_p = fl6_update_dst(&fl6, opt, &final);
+       rcu_read_unlock();
+-      dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
++      dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p);
+       if (IS_ERR(dst)) {
+               err = PTR_ERR(dst);
+               goto out;
+--- a/net/ipv6/inet6_connection_sock.c
++++ b/net/ipv6/inet6_connection_sock.c
+@@ -48,7 +48,7 @@ struct dst_entry *inet6_csk_route_req(co
+       fl6->flowi6_uid = sk->sk_uid;
+       security_req_classify_flow(req, flowi6_to_flowi(fl6));
+-      dst = ip6_dst_lookup_flow(sk, fl6, final_p);
++      dst = ip6_dst_lookup_flow(sock_net(sk), sk, fl6, final_p);
+       if (IS_ERR(dst))
+               return NULL;
+@@ -103,7 +103,7 @@ static struct dst_entry *inet6_csk_route
+       dst = __inet6_csk_dst_check(sk, np->dst_cookie);
+       if (!dst) {
+-              dst = ip6_dst_lookup_flow(sk, fl6, final_p);
++              dst = ip6_dst_lookup_flow(sock_net(sk), sk, fl6, final_p);
+               if (!IS_ERR(dst))
+                       ip6_dst_store(sk, dst, NULL, NULL);
+--- a/net/ipv6/ip6_output.c
++++ b/net/ipv6/ip6_output.c
+@@ -1144,19 +1144,19 @@ EXPORT_SYMBOL_GPL(ip6_dst_lookup);
+  *    It returns a valid dst pointer on success, or a pointer encoded
+  *    error code.
+  */
+-struct dst_entry *ip6_dst_lookup_flow(const struct sock *sk, struct flowi6 *fl6,
++struct dst_entry *ip6_dst_lookup_flow(struct net *net, const struct sock *sk, struct flowi6 *fl6,
+                                     const struct in6_addr *final_dst)
+ {
+       struct dst_entry *dst = NULL;
+       int err;
+-      err = ip6_dst_lookup_tail(sock_net(sk), sk, &dst, fl6);
++      err = ip6_dst_lookup_tail(net, sk, &dst, fl6);
+       if (err)
+               return ERR_PTR(err);
+       if (final_dst)
+               fl6->daddr = *final_dst;
+-      return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
++      return xfrm_lookup_route(net, dst, flowi6_to_flowi(fl6), sk, 0);
+ }
+ EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
+@@ -1188,7 +1188,7 @@ struct dst_entry *ip6_sk_dst_lookup_flow
+       if (dst)
+               return dst;
+-      dst = ip6_dst_lookup_flow(sk, fl6, final_dst);
++      dst = ip6_dst_lookup_flow(sock_net(sk), sk, fl6, final_dst);
+       if (connected && !IS_ERR(dst))
+               ip6_sk_dst_store_flow(sk, dst_clone(dst), fl6);
+--- a/net/ipv6/raw.c
++++ b/net/ipv6/raw.c
+@@ -925,7 +925,7 @@ static int rawv6_sendmsg(struct sock *sk
+       fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel);
+-      dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
++      dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p);
+       if (IS_ERR(dst)) {
+               err = PTR_ERR(dst);
+               goto out;
+--- a/net/ipv6/syncookies.c
++++ b/net/ipv6/syncookies.c
+@@ -235,7 +235,7 @@ struct sock *cookie_v6_check(struct sock
+               fl6.flowi6_uid = sk->sk_uid;
+               security_req_classify_flow(req, flowi6_to_flowi(&fl6));
+-              dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
++              dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p);
+               if (IS_ERR(dst))
+                       goto out_free;
+       }
+--- a/net/ipv6/tcp_ipv6.c
++++ b/net/ipv6/tcp_ipv6.c
+@@ -275,7 +275,7 @@ static int tcp_v6_connect(struct sock *s
+       security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
+-      dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
++      dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p);
+       if (IS_ERR(dst)) {
+               err = PTR_ERR(dst);
+               goto failure;
+@@ -906,7 +906,7 @@ static void tcp_v6_send_response(const s
+        * Underlying function will use this to retrieve the network
+        * namespace
+        */
+-      dst = ip6_dst_lookup_flow(ctl_sk, &fl6, NULL);
++      dst = ip6_dst_lookup_flow(sock_net(ctl_sk), ctl_sk, &fl6, NULL);
+       if (!IS_ERR(dst)) {
+               skb_dst_set(buff, dst);
+               ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL, tclass,
+--- a/net/l2tp/l2tp_ip6.c
++++ b/net/l2tp/l2tp_ip6.c
+@@ -615,7 +615,7 @@ static int l2tp_ip6_sendmsg(struct sock
+       fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel);
+-      dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
++      dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p);
+       if (IS_ERR(dst)) {
+               err = PTR_ERR(dst);
+               goto out;
+--- a/net/sctp/ipv6.c
++++ b/net/sctp/ipv6.c
+@@ -275,7 +275,7 @@ static void sctp_v6_get_dst(struct sctp_
+       final_p = fl6_update_dst(fl6, rcu_dereference(np->opt), &final);
+       rcu_read_unlock();
+-      dst = ip6_dst_lookup_flow(sk, fl6, final_p);
++      dst = ip6_dst_lookup_flow(sock_net(sk), sk, fl6, final_p);
+       if (!asoc || saddr)
+               goto out;
+@@ -328,7 +328,7 @@ static void sctp_v6_get_dst(struct sctp_
+               fl6->saddr = laddr->a.v6.sin6_addr;
+               fl6->fl6_sport = laddr->a.v6.sin6_port;
+               final_p = fl6_update_dst(fl6, rcu_dereference(np->opt), &final);
+-              bdst = ip6_dst_lookup_flow(sk, fl6, final_p);
++              bdst = ip6_dst_lookup_flow(sock_net(sk), sk, fl6, final_p);
+               if (IS_ERR(bdst))
+                       continue;
diff --git a/queue-5.4/net-ipv6_stub-use-ip6_dst_lookup_flow-instead-of-ip6_dst_lookup.patch b/queue-5.4/net-ipv6_stub-use-ip6_dst_lookup_flow-instead-of-ip6_dst_lookup.patch
new file mode 100644 (file)
index 0000000..e1666ff
--- /dev/null
@@ -0,0 +1,243 @@
+From foo@baz Tue 17 Dec 2019 08:13:44 PM CET
+From: Sabrina Dubroca <sd@queasysnail.net>
+Date: Wed, 4 Dec 2019 15:35:53 +0100
+Subject: net: ipv6_stub: use ip6_dst_lookup_flow instead of ip6_dst_lookup
+
+From: Sabrina Dubroca <sd@queasysnail.net>
+
+[ Upstream commit 6c8991f41546c3c472503dff1ea9daaddf9331c2 ]
+
+ipv6_stub uses the ip6_dst_lookup function to allow other modules to
+perform IPv6 lookups. However, this function skips the XFRM layer
+entirely.
+
+All users of ipv6_stub->ip6_dst_lookup use ip_route_output_flow (via the
+ip_route_output_key and ip_route_output helpers) for their IPv4 lookups,
+which calls xfrm_lookup_route(). This patch fixes this inconsistent
+behavior by switching the stub to ip6_dst_lookup_flow, which also calls
+xfrm_lookup_route().
+
+This requires some changes in all the callers, as these two functions
+take different arguments and have different return types.
+
+Fixes: 5f81bd2e5d80 ("ipv6: export a stub for IPv6 symbols used by vxlan")
+Reported-by: Xiumei Mu <xmu@redhat.com>
+Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/infiniband/core/addr.c                      |    7 +++----
+ drivers/infiniband/sw/rxe/rxe_net.c                 |    8 +++++---
+ drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c |    8 ++++----
+ drivers/net/geneve.c                                |    4 +++-
+ drivers/net/vxlan.c                                 |    8 +++-----
+ include/net/ipv6_stubs.h                            |    6 ++++--
+ net/core/lwt_bpf.c                                  |    4 +---
+ net/ipv6/addrconf_core.c                            |   11 ++++++-----
+ net/ipv6/af_inet6.c                                 |    2 +-
+ net/mpls/af_mpls.c                                  |    7 +++----
+ net/tipc/udp_media.c                                |    9 ++++++---
+ 11 files changed, 39 insertions(+), 35 deletions(-)
+
+--- a/drivers/infiniband/core/addr.c
++++ b/drivers/infiniband/core/addr.c
+@@ -421,16 +421,15 @@ static int addr6_resolve(struct sockaddr
+                               (const struct sockaddr_in6 *)dst_sock;
+       struct flowi6 fl6;
+       struct dst_entry *dst;
+-      int ret;
+       memset(&fl6, 0, sizeof fl6);
+       fl6.daddr = dst_in->sin6_addr;
+       fl6.saddr = src_in->sin6_addr;
+       fl6.flowi6_oif = addr->bound_dev_if;
+-      ret = ipv6_stub->ipv6_dst_lookup(addr->net, NULL, &dst, &fl6);
+-      if (ret < 0)
+-              return ret;
++      dst = ipv6_stub->ipv6_dst_lookup_flow(addr->net, NULL, &fl6, NULL);
++      if (IS_ERR(dst))
++              return PTR_ERR(dst);
+       if (ipv6_addr_any(&src_in->sin6_addr))
+               src_in->sin6_addr = fl6.saddr;
+--- a/drivers/infiniband/sw/rxe/rxe_net.c
++++ b/drivers/infiniband/sw/rxe/rxe_net.c
+@@ -117,10 +117,12 @@ static struct dst_entry *rxe_find_route6
+       memcpy(&fl6.daddr, daddr, sizeof(*daddr));
+       fl6.flowi6_proto = IPPROTO_UDP;
+-      if (unlikely(ipv6_stub->ipv6_dst_lookup(sock_net(recv_sockets.sk6->sk),
+-                                              recv_sockets.sk6->sk, &ndst, &fl6))) {
++      ndst = ipv6_stub->ipv6_dst_lookup_flow(sock_net(recv_sockets.sk6->sk),
++                                             recv_sockets.sk6->sk, &fl6,
++                                             NULL);
++      if (unlikely(IS_ERR(ndst))) {
+               pr_err_ratelimited("no route to %pI6\n", daddr);
+-              goto put;
++              return NULL;
+       }
+       if (unlikely(ndst->error)) {
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
+@@ -144,10 +144,10 @@ static int mlx5e_route_lookup_ipv6(struc
+ #if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6)
+       int ret;
+-      ret = ipv6_stub->ipv6_dst_lookup(dev_net(mirred_dev), NULL, &dst,
+-                                       fl6);
+-      if (ret < 0)
+-              return ret;
++      dst = ipv6_stub->ipv6_dst_lookup_flow(dev_net(mirred_dev), NULL, fl6,
++                                            NULL);
++      if (IS_ERR(dst))
++              return PTR_ERR(dst);
+       if (!(*out_ttl))
+               *out_ttl = ip6_dst_hoplimit(dst);
+--- a/drivers/net/geneve.c
++++ b/drivers/net/geneve.c
+@@ -853,7 +853,9 @@ static struct dst_entry *geneve_get_v6_d
+               if (dst)
+                       return dst;
+       }
+-      if (ipv6_stub->ipv6_dst_lookup(geneve->net, gs6->sock->sk, &dst, fl6)) {
++      dst = ipv6_stub->ipv6_dst_lookup_flow(geneve->net, gs6->sock->sk, fl6,
++                                            NULL);
++      if (IS_ERR(dst)) {
+               netdev_dbg(dev, "no route to %pI6\n", &fl6->daddr);
+               return ERR_PTR(-ENETUNREACH);
+       }
+--- a/drivers/net/vxlan.c
++++ b/drivers/net/vxlan.c
+@@ -2276,7 +2276,6 @@ static struct dst_entry *vxlan6_get_rout
+       bool use_cache = ip_tunnel_dst_cache_usable(skb, info);
+       struct dst_entry *ndst;
+       struct flowi6 fl6;
+-      int err;
+       if (!sock6)
+               return ERR_PTR(-EIO);
+@@ -2299,10 +2298,9 @@ static struct dst_entry *vxlan6_get_rout
+       fl6.fl6_dport = dport;
+       fl6.fl6_sport = sport;
+-      err = ipv6_stub->ipv6_dst_lookup(vxlan->net,
+-                                       sock6->sock->sk,
+-                                       &ndst, &fl6);
+-      if (unlikely(err < 0)) {
++      ndst = ipv6_stub->ipv6_dst_lookup_flow(vxlan->net, sock6->sock->sk,
++                                             &fl6, NULL);
++      if (unlikely(IS_ERR(ndst))) {
+               netdev_dbg(dev, "no route to %pI6\n", daddr);
+               return ERR_PTR(-ENETUNREACH);
+       }
+--- a/include/net/ipv6_stubs.h
++++ b/include/net/ipv6_stubs.h
+@@ -24,8 +24,10 @@ struct ipv6_stub {
+                                const struct in6_addr *addr);
+       int (*ipv6_sock_mc_drop)(struct sock *sk, int ifindex,
+                                const struct in6_addr *addr);
+-      int (*ipv6_dst_lookup)(struct net *net, struct sock *sk,
+-                             struct dst_entry **dst, struct flowi6 *fl6);
++      struct dst_entry *(*ipv6_dst_lookup_flow)(struct net *net,
++                                                const struct sock *sk,
++                                                struct flowi6 *fl6,
++                                                const struct in6_addr *final_dst);
+       int (*ipv6_route_input)(struct sk_buff *skb);
+       struct fib6_table *(*fib6_get_table)(struct net *net, u32 id);
+--- a/net/core/lwt_bpf.c
++++ b/net/core/lwt_bpf.c
+@@ -230,9 +230,7 @@ static int bpf_lwt_xmit_reroute(struct s
+               fl6.daddr = iph6->daddr;
+               fl6.saddr = iph6->saddr;
+-              err = ipv6_stub->ipv6_dst_lookup(net, skb->sk, &dst, &fl6);
+-              if (unlikely(err))
+-                      goto err;
++              dst = ipv6_stub->ipv6_dst_lookup_flow(net, skb->sk, &fl6, NULL);
+               if (IS_ERR(dst)) {
+                       err = PTR_ERR(dst);
+                       goto err;
+--- a/net/ipv6/addrconf_core.c
++++ b/net/ipv6/addrconf_core.c
+@@ -129,11 +129,12 @@ int inet6addr_validator_notifier_call_ch
+ }
+ EXPORT_SYMBOL(inet6addr_validator_notifier_call_chain);
+-static int eafnosupport_ipv6_dst_lookup(struct net *net, struct sock *u1,
+-                                      struct dst_entry **u2,
+-                                      struct flowi6 *u3)
++static struct dst_entry *eafnosupport_ipv6_dst_lookup_flow(struct net *net,
++                                                         const struct sock *sk,
++                                                         struct flowi6 *fl6,
++                                                         const struct in6_addr *final_dst)
+ {
+-      return -EAFNOSUPPORT;
++      return ERR_PTR(-EAFNOSUPPORT);
+ }
+ static int eafnosupport_ipv6_route_input(struct sk_buff *skb)
+@@ -190,7 +191,7 @@ static int eafnosupport_ip6_del_rt(struc
+ }
+ const struct ipv6_stub *ipv6_stub __read_mostly = &(struct ipv6_stub) {
+-      .ipv6_dst_lookup   = eafnosupport_ipv6_dst_lookup,
++      .ipv6_dst_lookup_flow = eafnosupport_ipv6_dst_lookup_flow,
+       .ipv6_route_input  = eafnosupport_ipv6_route_input,
+       .fib6_get_table    = eafnosupport_fib6_get_table,
+       .fib6_table_lookup = eafnosupport_fib6_table_lookup,
+--- a/net/ipv6/af_inet6.c
++++ b/net/ipv6/af_inet6.c
+@@ -946,7 +946,7 @@ static int ipv6_route_input(struct sk_bu
+ static const struct ipv6_stub ipv6_stub_impl = {
+       .ipv6_sock_mc_join = ipv6_sock_mc_join,
+       .ipv6_sock_mc_drop = ipv6_sock_mc_drop,
+-      .ipv6_dst_lookup   = ip6_dst_lookup,
++      .ipv6_dst_lookup_flow = ip6_dst_lookup_flow,
+       .ipv6_route_input  = ipv6_route_input,
+       .fib6_get_table    = fib6_get_table,
+       .fib6_table_lookup = fib6_table_lookup,
+--- a/net/mpls/af_mpls.c
++++ b/net/mpls/af_mpls.c
+@@ -617,16 +617,15 @@ static struct net_device *inet6_fib_look
+       struct net_device *dev;
+       struct dst_entry *dst;
+       struct flowi6 fl6;
+-      int err;
+       if (!ipv6_stub)
+               return ERR_PTR(-EAFNOSUPPORT);
+       memset(&fl6, 0, sizeof(fl6));
+       memcpy(&fl6.daddr, addr, sizeof(struct in6_addr));
+-      err = ipv6_stub->ipv6_dst_lookup(net, NULL, &dst, &fl6);
+-      if (err)
+-              return ERR_PTR(err);
++      dst = ipv6_stub->ipv6_dst_lookup_flow(net, NULL, &fl6, NULL);
++      if (IS_ERR(dst))
++              return ERR_CAST(dst);
+       dev = dst->dev;
+       dev_hold(dev);
+--- a/net/tipc/udp_media.c
++++ b/net/tipc/udp_media.c
+@@ -195,10 +195,13 @@ static int tipc_udp_xmit(struct net *net
+                               .saddr = src->ipv6,
+                               .flowi6_proto = IPPROTO_UDP
+                       };
+-                      err = ipv6_stub->ipv6_dst_lookup(net, ub->ubsock->sk,
+-                                                       &ndst, &fl6);
+-                      if (err)
++                      ndst = ipv6_stub->ipv6_dst_lookup_flow(net,
++                                                             ub->ubsock->sk,
++                                                             &fl6, NULL);
++                      if (IS_ERR(ndst)) {
++                              err = PTR_ERR(ndst);
+                               goto tx_error;
++                      }
+                       dst_cache_set_ip6(cache, ndst, &fl6.saddr);
+               }
+               ttl = ip6_dst_hoplimit(ndst);
diff --git a/queue-5.4/net-mlx5e-ethtool-fix-analysis-of-speed-setting.patch b/queue-5.4/net-mlx5e-ethtool-fix-analysis-of-speed-setting.patch
new file mode 100644 (file)
index 0000000..e3ce559
--- /dev/null
@@ -0,0 +1,57 @@
+From foo@baz Tue 17 Dec 2019 08:13:44 PM CET
+From: Aya Levin <ayal@mellanox.com>
+Date: Sun, 1 Dec 2019 16:33:55 +0200
+Subject: net/mlx5e: ethtool, Fix analysis of speed setting
+
+From: Aya Levin <ayal@mellanox.com>
+
+[ Upstream commit 3d7cadae51f1b7f28358e36d0a1ce3f0ae2eee60 ]
+
+When setting speed to 100G via ethtool (AN is set to off), only 25G*4 is
+configured while the user, who has an advanced HW which supports
+extended PTYS, expects also 50G*2 to be configured.
+With this patch, when extended PTYS mode is available, configure
+PTYS via extended fields.
+
+Fixes: 4b95840a6ced ("net/mlx5e: Fix matching of speed to PRM link modes")
+Signed-off-by: Aya Levin <ayal@mellanox.com>
+Reviewed-by: Eran Ben Elisha <eranbe@mellanox.com>
+Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c |   13 +++----------
+ 1 file changed, 3 insertions(+), 10 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+@@ -1027,18 +1027,11 @@ static bool ext_link_mode_requested(cons
+       return bitmap_intersects(modes, adver, __ETHTOOL_LINK_MODE_MASK_NBITS);
+ }
+-static bool ext_speed_requested(u32 speed)
+-{
+-#define MLX5E_MAX_PTYS_LEGACY_SPEED 100000
+-      return !!(speed > MLX5E_MAX_PTYS_LEGACY_SPEED);
+-}
+-
+-static bool ext_requested(u8 autoneg, const unsigned long *adver, u32 speed)
++static bool ext_requested(u8 autoneg, const unsigned long *adver, bool ext_supported)
+ {
+       bool ext_link_mode = ext_link_mode_requested(adver);
+-      bool ext_speed = ext_speed_requested(speed);
+-      return  autoneg == AUTONEG_ENABLE ? ext_link_mode : ext_speed;
++      return  autoneg == AUTONEG_ENABLE ? ext_link_mode : ext_supported;
+ }
+ int mlx5e_ethtool_set_link_ksettings(struct mlx5e_priv *priv,
+@@ -1065,8 +1058,8 @@ int mlx5e_ethtool_set_link_ksettings(str
+       autoneg = link_ksettings->base.autoneg;
+       speed = link_ksettings->base.speed;
+-      ext = ext_requested(autoneg, adver, speed),
+       ext_supported = MLX5_CAP_PCAM_FEATURE(mdev, ptys_extended_ethernet);
++      ext = ext_requested(autoneg, adver, ext_supported);
+       if (!ext_supported && ext)
+               return -EOPNOTSUPP;
diff --git a/queue-5.4/net-mlx5e-fix-freeing-flow-with-kfree-and-not-kvfree.patch b/queue-5.4/net-mlx5e-fix-freeing-flow-with-kfree-and-not-kvfree.patch
new file mode 100644 (file)
index 0000000..f202017
--- /dev/null
@@ -0,0 +1,31 @@
+From foo@baz Tue 17 Dec 2019 08:13:44 PM CET
+From: Roi Dayan <roid@mellanox.com>
+Date: Wed, 4 Dec 2019 11:25:43 +0200
+Subject: net/mlx5e: Fix freeing flow with kfree() and not kvfree()
+
+From: Roi Dayan <roid@mellanox.com>
+
+[ Upstream commit a23dae79fb6555c808528707c6389345d0b0c189 ]
+
+Flows are allocated with kzalloc() so free with kfree().
+
+Fixes: 04de7dda7394 ("net/mlx5e: Infrastructure for duplicated offloading of TC flows")
+Signed-off-by: Roi Dayan <roid@mellanox.com>
+Reviewed-by: Eli Britstein <elibr@mellanox.com>
+Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en_tc.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+@@ -1616,7 +1616,7 @@ static void __mlx5e_tc_del_fdb_peer_flow
+       flow_flag_clear(flow, DUP);
+       mlx5e_tc_del_fdb_flow(flow->peer_flow->priv, flow->peer_flow);
+-      kvfree(flow->peer_flow);
++      kfree(flow->peer_flow);
+       flow->peer_flow = NULL;
+ }
diff --git a/queue-5.4/net-mlx5e-fix-sff-8472-eeprom-length.patch b/queue-5.4/net-mlx5e-fix-sff-8472-eeprom-length.patch
new file mode 100644 (file)
index 0000000..4debebc
--- /dev/null
@@ -0,0 +1,32 @@
+From foo@baz Tue 17 Dec 2019 08:13:44 PM CET
+From: Eran Ben Elisha <eranbe@mellanox.com>
+Date: Thu, 5 Dec 2019 10:30:22 +0200
+Subject: net/mlx5e: Fix SFF 8472 eeprom length
+
+From: Eran Ben Elisha <eranbe@mellanox.com>
+
+[ Upstream commit c431f8597863a91eea6024926e0c1b179cfa4852 ]
+
+SFF 8472 eeprom length is 512 bytes. Fix module info return value to
+support 512 bytes read.
+
+Fixes: ace329f4ab3b ("net/mlx5e: ethtool, Remove unsupported SFP EEPROM high pages query")
+Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
+Reviewed-by: Aya Levin <ayal@mellanox.com>
+Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+@@ -1643,7 +1643,7 @@ static int mlx5e_get_module_info(struct
+               break;
+       case MLX5_MODULE_ID_SFP:
+               modinfo->type       = ETH_MODULE_SFF_8472;
+-              modinfo->eeprom_len = MLX5_EEPROM_PAGE_LENGTH;
++              modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN;
+               break;
+       default:
+               netdev_err(priv->netdev, "%s: cable type not recognized:0x%x\n",
diff --git a/queue-5.4/net-mlx5e-fix-translation-of-link-mode-into-speed.patch b/queue-5.4/net-mlx5e-fix-translation-of-link-mode-into-speed.patch
new file mode 100644 (file)
index 0000000..7fbe5d4
--- /dev/null
@@ -0,0 +1,33 @@
+From foo@baz Tue 17 Dec 2019 08:13:44 PM CET
+From: Aya Levin <ayal@mellanox.com>
+Date: Sun, 1 Dec 2019 14:45:25 +0200
+Subject: net/mlx5e: Fix translation of link mode into speed
+
+From: Aya Levin <ayal@mellanox.com>
+
+[ Upstream commit 6d485e5e555436d2c13accdb10807328c4158a17 ]
+
+Add a missing value in translation of PTYS ext_eth_proto_oper to its
+corresponding speed. When ext_eth_proto_oper bit 10 is set, ethtool
+shows unknown speed. With this fix, ethtool shows speed is 100G as
+expected.
+
+Fixes: a08b4ed1373d ("net/mlx5: Add support to ext_* fields introduced in Port Type and Speed register")
+Signed-off-by: Aya Levin <ayal@mellanox.com>
+Reviewed-by: Eran Ben Elisha <eranbe@mellanox.com>
+Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en/port.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c
+@@ -73,6 +73,7 @@ static const u32 mlx5e_ext_link_speed[ML
+       [MLX5E_50GAUI_2_LAUI_2_50GBASE_CR2_KR2] = 50000,
+       [MLX5E_50GAUI_1_LAUI_1_50GBASE_CR_KR]   = 50000,
+       [MLX5E_CAUI_4_100GBASE_CR4_KR4]         = 100000,
++      [MLX5E_100GAUI_2_100GBASE_CR2_KR2]      = 100000,
+       [MLX5E_200GAUI_4_200GBASE_CR4_KR4]      = 200000,
+       [MLX5E_400GAUI_8]                       = 400000,
+ };
diff --git a/queue-5.4/net-mlx5e-fix-txq-indices-to-be-sequential.patch b/queue-5.4/net-mlx5e-fix-txq-indices-to-be-sequential.patch
new file mode 100644 (file)
index 0000000..d0e9c70
--- /dev/null
@@ -0,0 +1,150 @@
+From foo@baz Tue 17 Dec 2019 08:13:44 PM CET
+From: Eran Ben Elisha <eranbe@mellanox.com>
+Date: Mon, 25 Nov 2019 12:11:49 +0200
+Subject: net/mlx5e: Fix TXQ indices to be sequential
+
+From: Eran Ben Elisha <eranbe@mellanox.com>
+
+[ Upstream commit c55d8b108caa2ec1ae8dddd02cb9d3a740f7c838 ]
+
+Cited patch changed (channel index, tc) => (TXQ index) mapping to be a
+static one, in order to keep indices consistent when changing number of
+channels or TCs.
+
+For 32 channels (OOB) and 8 TCs, real num of TXQs is 256.
+When reducing the amount of channels to 8, the real num of TXQs will be
+changed to 64.
+This indices method is buggy:
+- Channel #0, TC 3, the TXQ index is 96.
+- Index 8 is not valid, as there is no such TXQ from driver perspective
+  (As it represents channel #8, TC 0, which is not valid with the above
+  configuration).
+
+As part of driver's select queue, it calls netdev_pick_tx which returns an
+index in the range of real number of TXQs. Depends on the return value,
+with the examples above, driver could have returned index larger than the
+real number of tx queues, or crash the kernel as it tries to read invalid
+address of SQ which was not allocated.
+
+Fix that by allocating sequential TXQ indices, and hold a new mapping
+between (channel index, tc) => (real TXQ index). This mapping will be
+updated as part of priv channels activation, and is used in
+mlx5e_select_queue to find the selected queue index.
+
+The existing indices mapping (channel_tc2txq) is no longer needed, as it
+is used only for statistics structures and can be calculated on run time.
+Delete its definintion and updates.
+
+Fixes: 8bfaf07f7806 ("net/mlx5e: Present SW stats when state is not opened")
+Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
+Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en.h       |    2 -
+ drivers/net/ethernet/mellanox/mlx5/core/en_main.c  |   31 ++++++++-------------
+ drivers/net/ethernet/mellanox/mlx5/core/en_stats.c |    2 -
+ drivers/net/ethernet/mellanox/mlx5/core/en_tx.c    |    2 -
+ 4 files changed, 15 insertions(+), 22 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
+@@ -816,7 +816,7 @@ struct mlx5e_xsk {
+ struct mlx5e_priv {
+       /* priv data path fields - start */
+       struct mlx5e_txqsq *txq2sq[MLX5E_MAX_NUM_CHANNELS * MLX5E_MAX_NUM_TC];
+-      int channel_tc2txq[MLX5E_MAX_NUM_CHANNELS][MLX5E_MAX_NUM_TC];
++      int channel_tc2realtxq[MLX5E_MAX_NUM_CHANNELS][MLX5E_MAX_NUM_TC];
+ #ifdef CONFIG_MLX5_CORE_EN_DCB
+       struct mlx5e_dcbx_dp       dcbx_dp;
+ #endif
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+@@ -1693,11 +1693,10 @@ static int mlx5e_open_sqs(struct mlx5e_c
+                         struct mlx5e_params *params,
+                         struct mlx5e_channel_param *cparam)
+ {
+-      struct mlx5e_priv *priv = c->priv;
+       int err, tc;
+       for (tc = 0; tc < params->num_tc; tc++) {
+-              int txq_ix = c->ix + tc * priv->max_nch;
++              int txq_ix = c->ix + tc * params->num_channels;
+               err = mlx5e_open_txqsq(c, c->priv->tisn[c->lag_port][tc], txq_ix,
+                                      params, &cparam->sq, &c->sq[tc], tc);
+@@ -2878,26 +2877,21 @@ static void mlx5e_netdev_set_tcs(struct
+               netdev_set_tc_queue(netdev, tc, nch, 0);
+ }
+-static void mlx5e_build_tc2txq_maps(struct mlx5e_priv *priv)
++static void mlx5e_build_txq_maps(struct mlx5e_priv *priv)
+ {
+-      int i, tc;
++      int i, ch;
+-      for (i = 0; i < priv->max_nch; i++)
+-              for (tc = 0; tc < priv->profile->max_tc; tc++)
+-                      priv->channel_tc2txq[i][tc] = i + tc * priv->max_nch;
+-}
++      ch = priv->channels.num;
+-static void mlx5e_build_tx2sq_maps(struct mlx5e_priv *priv)
+-{
+-      struct mlx5e_channel *c;
+-      struct mlx5e_txqsq *sq;
+-      int i, tc;
++      for (i = 0; i < ch; i++) {
++              int tc;
++
++              for (tc = 0; tc < priv->channels.params.num_tc; tc++) {
++                      struct mlx5e_channel *c = priv->channels.c[i];
++                      struct mlx5e_txqsq *sq = &c->sq[tc];
+-      for (i = 0; i < priv->channels.num; i++) {
+-              c = priv->channels.c[i];
+-              for (tc = 0; tc < c->num_tc; tc++) {
+-                      sq = &c->sq[tc];
+                       priv->txq2sq[sq->txq_ix] = sq;
++                      priv->channel_tc2realtxq[i][tc] = i + tc * ch;
+               }
+       }
+ }
+@@ -2912,7 +2906,7 @@ void mlx5e_activate_priv_channels(struct
+       netif_set_real_num_tx_queues(netdev, num_txqs);
+       netif_set_real_num_rx_queues(netdev, num_rxqs);
+-      mlx5e_build_tx2sq_maps(priv);
++      mlx5e_build_txq_maps(priv);
+       mlx5e_activate_channels(&priv->channels);
+       mlx5e_xdp_tx_enable(priv);
+       netif_tx_start_all_queues(priv->netdev);
+@@ -5028,7 +5022,6 @@ static int mlx5e_nic_init(struct mlx5_co
+       if (err)
+               mlx5_core_err(mdev, "TLS initialization failed, %d\n", err);
+       mlx5e_build_nic_netdev(netdev);
+-      mlx5e_build_tc2txq_maps(priv);
+       mlx5e_health_create_reporters(priv);
+       return 0;
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
+@@ -1601,7 +1601,7 @@ static int mlx5e_grp_channels_fill_strin
+                       for (j = 0; j < NUM_SQ_STATS; j++)
+                               sprintf(data + (idx++) * ETH_GSTRING_LEN,
+                                       sq_stats_desc[j].format,
+-                                      priv->channel_tc2txq[i][tc]);
++                                      i + tc * max_nch);
+       for (i = 0; i < max_nch; i++) {
+               for (j = 0; j < NUM_XSKSQ_STATS * is_xsk; j++)
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
+@@ -93,7 +93,7 @@ u16 mlx5e_select_queue(struct net_device
+       if (txq_ix >= num_channels)
+               txq_ix = priv->txq2sq[txq_ix]->ch_ix;
+-      return priv->channel_tc2txq[txq_ix][up];
++      return priv->channel_tc2realtxq[txq_ix][up];
+ }
+ static inline int mlx5e_skb_l2_header_offset(struct sk_buff *skb)
diff --git a/queue-5.4/net-mlx5e-query-global-pause-state-before-setting-prio2buffer.patch b/queue-5.4/net-mlx5e-query-global-pause-state-before-setting-prio2buffer.patch
new file mode 100644 (file)
index 0000000..b85f28e
--- /dev/null
@@ -0,0 +1,78 @@
+From foo@baz Tue 17 Dec 2019 08:13:44 PM CET
+From: Huy Nguyen <huyn@mellanox.com>
+Date: Fri, 6 Sep 2019 09:28:46 -0500
+Subject: net/mlx5e: Query global pause state before setting prio2buffer
+
+From: Huy Nguyen <huyn@mellanox.com>
+
+[ Upstream commit 73e6551699a32fac703ceea09214d6580edcf2d5 ]
+
+When the user changes prio2buffer mapping while global pause is
+enabled, mlx5 driver incorrectly sets all active buffers
+(buffer that has at least one priority mapped) to lossy.
+
+Solution:
+If global pause is enabled, set all the active buffers to lossless
+in prio2buffer command.
+Also, add error message when buffer size is not enough to meet
+xoff threshold.
+
+Fixes: 0696d60853d5 ("net/mlx5e: Receive buffer configuration")
+Signed-off-by: Huy Nguyen <huyn@mellanox.com>
+Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c |   27 +++++++++++++--
+ 1 file changed, 25 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c
+@@ -155,8 +155,11 @@ static int update_xoff_threshold(struct
+               }
+               if (port_buffer->buffer[i].size <
+-                  (xoff + max_mtu + (1 << MLX5E_BUFFER_CELL_SHIFT)))
++                  (xoff + max_mtu + (1 << MLX5E_BUFFER_CELL_SHIFT))) {
++                      pr_err("buffer_size[%d]=%d is not enough for lossless buffer\n",
++                             i, port_buffer->buffer[i].size);
+                       return -ENOMEM;
++              }
+               port_buffer->buffer[i].xoff = port_buffer->buffer[i].size - xoff;
+               port_buffer->buffer[i].xon  =
+@@ -232,6 +235,26 @@ static int update_buffer_lossy(unsigned
+       return 0;
+ }
++static int fill_pfc_en(struct mlx5_core_dev *mdev, u8 *pfc_en)
++{
++      u32 g_rx_pause, g_tx_pause;
++      int err;
++
++      err = mlx5_query_port_pause(mdev, &g_rx_pause, &g_tx_pause);
++      if (err)
++              return err;
++
++      /* If global pause enabled, set all active buffers to lossless.
++       * Otherwise, check PFC setting.
++       */
++      if (g_rx_pause || g_tx_pause)
++              *pfc_en = 0xff;
++      else
++              err = mlx5_query_port_pfc(mdev, pfc_en, NULL);
++
++      return err;
++}
++
+ #define MINIMUM_MAX_MTU 9216
+ int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv,
+                                   u32 change, unsigned int mtu,
+@@ -277,7 +300,7 @@ int mlx5e_port_manual_buffer_config(stru
+       if (change & MLX5E_PORT_BUFFER_PRIO2BUFFER) {
+               update_prio2buffer = true;
+-              err = mlx5_query_port_pfc(priv->mdev, &curr_pfc_en, NULL);
++              err = fill_pfc_en(priv->mdev, &curr_pfc_en);
+               if (err)
+                       return err;
diff --git a/queue-5.4/net-mscc-ocelot-unregister-the-ptp-clock-on-deinit.patch b/queue-5.4/net-mscc-ocelot-unregister-the-ptp-clock-on-deinit.patch
new file mode 100644 (file)
index 0000000..31738b5
--- /dev/null
@@ -0,0 +1,121 @@
+From foo@baz Tue 17 Dec 2019 08:13:44 PM CET
+From: Vladimir Oltean <vladimir.oltean@nxp.com>
+Date: Tue, 3 Dec 2019 17:45:35 +0200
+Subject: net: mscc: ocelot: unregister the PTP clock on deinit
+
+From: Vladimir Oltean <vladimir.oltean@nxp.com>
+
+[ Upstream commit 9385973fe8db9743fa93bf17245635be4eb8c4a6 ]
+
+Currently a switch driver deinit frees the regmaps, but the PTP clock is
+still out there, available to user space via /dev/ptpN. Any PTP
+operation is a ticking time bomb, since it will attempt to use the freed
+regmaps and thus trigger kernel panics:
+
+[    4.291746] fsl_enetc 0000:00:00.2 eth1: error -22 setting up slave phy
+[    4.291871] mscc_felix 0000:00:00.5: Failed to register DSA switch: -22
+[    4.308666] mscc_felix: probe of 0000:00:00.5 failed with error -22
+[    6.358270] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000088
+[    6.367090] Mem abort info:
+[    6.369888]   ESR = 0x96000046
+[    6.369891]   EC = 0x25: DABT (current EL), IL = 32 bits
+[    6.369892]   SET = 0, FnV = 0
+[    6.369894]   EA = 0, S1PTW = 0
+[    6.369895] Data abort info:
+[    6.369897]   ISV = 0, ISS = 0x00000046
+[    6.369899]   CM = 0, WnR = 1
+[    6.369902] user pgtable: 4k pages, 48-bit VAs, pgdp=00000020d58c7000
+[    6.369904] [0000000000000088] pgd=00000020d5912003, pud=00000020d5915003, pmd=0000000000000000
+[    6.369914] Internal error: Oops: 96000046 [#1] PREEMPT SMP
+[    6.420443] Modules linked in:
+[    6.423506] CPU: 1 PID: 262 Comm: phc_ctl Not tainted 5.4.0-03625-gb7b2a5dadd7f #204
+[    6.431273] Hardware name: LS1028A RDB Board (DT)
+[    6.435989] pstate: 40000085 (nZcv daIf -PAN -UAO)
+[    6.440802] pc : css_release+0x24/0x58
+[    6.444561] lr : regmap_read+0x40/0x78
+[    6.448316] sp : ffff800010513cc0
+[    6.451636] x29: ffff800010513cc0 x28: ffff002055873040
+[    6.456963] x27: 0000000000000000 x26: 0000000000000000
+[    6.462289] x25: 0000000000000000 x24: 0000000000000000
+[    6.467617] x23: 0000000000000000 x22: 0000000000000080
+[    6.472944] x21: ffff800010513d44 x20: 0000000000000080
+[    6.478270] x19: 0000000000000000 x18: 0000000000000000
+[    6.483596] x17: 0000000000000000 x16: 0000000000000000
+[    6.488921] x15: 0000000000000000 x14: 0000000000000000
+[    6.494247] x13: 0000000000000000 x12: 0000000000000000
+[    6.499573] x11: 0000000000000000 x10: 0000000000000000
+[    6.504899] x9 : 0000000000000000 x8 : 0000000000000000
+[    6.510225] x7 : 0000000000000000 x6 : ffff800010513cf0
+[    6.515550] x5 : 0000000000000000 x4 : 0000000fffffffe0
+[    6.520876] x3 : 0000000000000088 x2 : ffff800010513d44
+[    6.526202] x1 : ffffcada668ea000 x0 : ffffcada64d8b0c0
+[    6.531528] Call trace:
+[    6.533977]  css_release+0x24/0x58
+[    6.537385]  regmap_read+0x40/0x78
+[    6.540795]  __ocelot_read_ix+0x6c/0xa0
+[    6.544641]  ocelot_ptp_gettime64+0x4c/0x110
+[    6.548921]  ptp_clock_gettime+0x4c/0x58
+[    6.552853]  pc_clock_gettime+0x5c/0xa8
+[    6.556699]  __arm64_sys_clock_gettime+0x68/0xc8
+[    6.561331]  el0_svc_common.constprop.2+0x7c/0x178
+[    6.566133]  el0_svc_handler+0x34/0xa0
+[    6.569891]  el0_sync_handler+0x114/0x1d0
+[    6.573908]  el0_sync+0x140/0x180
+[    6.577232] Code: d503201f b00119a1 91022263 b27b7be4 (f9004663)
+[    6.583349] ---[ end trace d196b9b14cdae2da ]---
+[    6.587977] Kernel panic - not syncing: Fatal exception
+[    6.593216] SMP: stopping secondary CPUs
+[    6.597151] Kernel Offset: 0x4ada54400000 from 0xffff800010000000
+[    6.603261] PHYS_OFFSET: 0xffffd0a7c0000000
+[    6.607454] CPU features: 0x10002,21806008
+[    6.611558] Memory Limit: none
+
+And now that ocelot->ptp_clock is checked at exit, prevent a potential
+error where ptp_clock_register returned a pointer-encoded error, which
+we are keeping in the ocelot private data structure. So now,
+ocelot->ptp_clock is now either NULL or a valid pointer.
+
+Fixes: 4e3b0468e6d7 ("net: mscc: PTP Hardware Clock (PHC) support")
+Cc: Antoine Tenart <antoine.tenart@bootlin.com>
+Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
+Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mscc/ocelot.c |   14 ++++++++++----
+ 1 file changed, 10 insertions(+), 4 deletions(-)
+
+--- a/drivers/net/ethernet/mscc/ocelot.c
++++ b/drivers/net/ethernet/mscc/ocelot.c
+@@ -1979,14 +1979,18 @@ static struct ptp_clock_info ocelot_ptp_
+ static int ocelot_init_timestamp(struct ocelot *ocelot)
+ {
++      struct ptp_clock *ptp_clock;
++
+       ocelot->ptp_info = ocelot_ptp_clock_info;
+-      ocelot->ptp_clock = ptp_clock_register(&ocelot->ptp_info, ocelot->dev);
+-      if (IS_ERR(ocelot->ptp_clock))
+-              return PTR_ERR(ocelot->ptp_clock);
++      ptp_clock = ptp_clock_register(&ocelot->ptp_info, ocelot->dev);
++      if (IS_ERR(ptp_clock))
++              return PTR_ERR(ptp_clock);
+       /* Check if PHC support is missing at the configuration level */
+-      if (!ocelot->ptp_clock)
++      if (!ptp_clock)
+               return 0;
++      ocelot->ptp_clock = ptp_clock;
++
+       ocelot_write(ocelot, SYS_PTP_CFG_PTP_STAMP_WID(30), SYS_PTP_CFG);
+       ocelot_write(ocelot, 0xffffffff, ANA_TABLES_PTP_ID_LOW);
+       ocelot_write(ocelot, 0xffffffff, ANA_TABLES_PTP_ID_HIGH);
+@@ -2213,6 +2217,8 @@ void ocelot_deinit(struct ocelot *ocelot
+       destroy_workqueue(ocelot->stats_queue);
+       mutex_destroy(&ocelot->stats_lock);
+       ocelot_ace_deinit();
++      if (ocelot->ptp_clock)
++              ptp_clock_unregister(ocelot->ptp_clock);
+       for (i = 0; i < ocelot->num_phys_ports; i++) {
+               port = ocelot->ports[i];
diff --git a/queue-5.4/net-sched-allow-indirect-blocks-to-bind-to-clsact-in-tc.patch b/queue-5.4/net-sched-allow-indirect-blocks-to-bind-to-clsact-in-tc.patch
new file mode 100644 (file)
index 0000000..2d0f886
--- /dev/null
@@ -0,0 +1,139 @@
+From foo@baz Tue 17 Dec 2019 08:13:44 PM CET
+From: John Hurley <john.hurley@netronome.com>
+Date: Thu, 5 Dec 2019 17:03:35 +0000
+Subject: net: sched: allow indirect blocks to bind to clsact in TC
+
+From: John Hurley <john.hurley@netronome.com>
+
+[ Upstream commit 25a443f74bcff2c4d506a39eae62fc15ad7c618a ]
+
+When a device is bound to a clsact qdisc, bind events are triggered to
+registered drivers for both ingress and egress. However, if a driver
+registers to such a device using the indirect block routines then it is
+assumed that it is only interested in ingress offload and so only replays
+ingress bind/unbind messages.
+
+The NFP driver supports the offload of some egress filters when
+registering to a block with qdisc of type clsact. However, on unregister,
+if the block is still active, it will not receive an unbind egress
+notification which can prevent proper cleanup of other registered
+callbacks.
+
+Modify the indirect block callback command in TC to send messages of
+ingress and/or egress bind depending on the qdisc in use. NFP currently
+supports egress offload for TC flower offload so the changes are only
+added to TC.
+
+Fixes: 4d12ba42787b ("nfp: flower: allow offloading of matches on 'internal' ports")
+Signed-off-by: John Hurley <john.hurley@netronome.com>
+Acked-by: Jakub Kicinski <jakub.kicinski@netronome.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sched/cls_api.c |   52 +++++++++++++++++++++++++++++++++-------------------
+ 1 file changed, 33 insertions(+), 19 deletions(-)
+
+--- a/net/sched/cls_api.c
++++ b/net/sched/cls_api.c
+@@ -626,15 +626,15 @@ static void tcf_chain_flush(struct tcf_c
+ static int tcf_block_setup(struct tcf_block *block,
+                          struct flow_block_offload *bo);
+-static void tc_indr_block_ing_cmd(struct net_device *dev,
+-                                struct tcf_block *block,
+-                                flow_indr_block_bind_cb_t *cb,
+-                                void *cb_priv,
+-                                enum flow_block_command command)
++static void tc_indr_block_cmd(struct net_device *dev, struct tcf_block *block,
++                            flow_indr_block_bind_cb_t *cb, void *cb_priv,
++                            enum flow_block_command command, bool ingress)
+ {
+       struct flow_block_offload bo = {
+               .command        = command,
+-              .binder_type    = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS,
++              .binder_type    = ingress ?
++                                FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS :
++                                FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS,
+               .net            = dev_net(dev),
+               .block_shared   = tcf_block_non_null_shared(block),
+       };
+@@ -652,9 +652,10 @@ static void tc_indr_block_ing_cmd(struct
+       up_write(&block->cb_lock);
+ }
+-static struct tcf_block *tc_dev_ingress_block(struct net_device *dev)
++static struct tcf_block *tc_dev_block(struct net_device *dev, bool ingress)
+ {
+       const struct Qdisc_class_ops *cops;
++      const struct Qdisc_ops *ops;
+       struct Qdisc *qdisc;
+       if (!dev_ingress_queue(dev))
+@@ -664,24 +665,37 @@ static struct tcf_block *tc_dev_ingress_
+       if (!qdisc)
+               return NULL;
+-      cops = qdisc->ops->cl_ops;
++      ops = qdisc->ops;
++      if (!ops)
++              return NULL;
++
++      if (!ingress && !strcmp("ingress", ops->id))
++              return NULL;
++
++      cops = ops->cl_ops;
+       if (!cops)
+               return NULL;
+       if (!cops->tcf_block)
+               return NULL;
+-      return cops->tcf_block(qdisc, TC_H_MIN_INGRESS, NULL);
++      return cops->tcf_block(qdisc,
++                             ingress ? TC_H_MIN_INGRESS : TC_H_MIN_EGRESS,
++                             NULL);
+ }
+-static void tc_indr_block_get_and_ing_cmd(struct net_device *dev,
+-                                        flow_indr_block_bind_cb_t *cb,
+-                                        void *cb_priv,
+-                                        enum flow_block_command command)
++static void tc_indr_block_get_and_cmd(struct net_device *dev,
++                                    flow_indr_block_bind_cb_t *cb,
++                                    void *cb_priv,
++                                    enum flow_block_command command)
+ {
+-      struct tcf_block *block = tc_dev_ingress_block(dev);
++      struct tcf_block *block;
++
++      block = tc_dev_block(dev, true);
++      tc_indr_block_cmd(dev, block, cb, cb_priv, command, true);
+-      tc_indr_block_ing_cmd(dev, block, cb, cb_priv, command);
++      block = tc_dev_block(dev, false);
++      tc_indr_block_cmd(dev, block, cb, cb_priv, command, false);
+ }
+ static void tc_indr_block_call(struct tcf_block *block,
+@@ -3632,9 +3646,9 @@ static struct pernet_operations tcf_net_
+       .size = sizeof(struct tcf_net),
+ };
+-static struct flow_indr_block_entry block_ing_entry = {
+-      .cb = tc_indr_block_get_and_ing_cmd,
+-      .list = LIST_HEAD_INIT(block_ing_entry.list),
++static struct flow_indr_block_entry block_entry = {
++      .cb = tc_indr_block_get_and_cmd,
++      .list = LIST_HEAD_INIT(block_entry.list),
+ };
+ static int __init tc_filter_init(void)
+@@ -3649,7 +3663,7 @@ static int __init tc_filter_init(void)
+       if (err)
+               goto err_register_pernet_subsys;
+-      flow_indr_add_block_cb(&block_ing_entry);
++      flow_indr_add_block_cb(&block_entry);
+       rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_new_tfilter, NULL,
+                     RTNL_FLAG_DOIT_UNLOCKED);
diff --git a/queue-5.4/net-sched-fix-dump-qlen-for-sch_mq-sch_mqprio-with-nolock-subqueues.patch b/queue-5.4/net-sched-fix-dump-qlen-for-sch_mq-sch_mqprio-with-nolock-subqueues.patch
new file mode 100644 (file)
index 0000000..55fd440
--- /dev/null
@@ -0,0 +1,42 @@
+From foo@baz Tue 17 Dec 2019 08:13:44 PM CET
+From: Dust Li <dust.li@linux.alibaba.com>
+Date: Tue, 3 Dec 2019 11:17:40 +0800
+Subject: net: sched: fix dump qlen for sch_mq/sch_mqprio with NOLOCK subqueues
+
+From: Dust Li <dust.li@linux.alibaba.com>
+
+[ Upstream commit 2f23cd42e19c22c24ff0e221089b7b6123b117c5 ]
+
+sch->q.len hasn't been set if the subqueue is a NOLOCK qdisc
+ in mq_dump() and mqprio_dump().
+
+Fixes: ce679e8df7ed ("net: sched: add support for TCQ_F_NOLOCK subqueues to sch_mqprio")
+Signed-off-by: Dust Li <dust.li@linux.alibaba.com>
+Signed-off-by: Tony Lu <tonylu@linux.alibaba.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sched/sch_mq.c     |    1 +
+ net/sched/sch_mqprio.c |    1 +
+ 2 files changed, 2 insertions(+)
+
+--- a/net/sched/sch_mq.c
++++ b/net/sched/sch_mq.c
+@@ -153,6 +153,7 @@ static int mq_dump(struct Qdisc *sch, st
+                       __gnet_stats_copy_queue(&sch->qstats,
+                                               qdisc->cpu_qstats,
+                                               &qdisc->qstats, qlen);
++                      sch->q.qlen             += qlen;
+               } else {
+                       sch->q.qlen             += qdisc->q.qlen;
+                       sch->bstats.bytes       += qdisc->bstats.bytes;
+--- a/net/sched/sch_mqprio.c
++++ b/net/sched/sch_mqprio.c
+@@ -411,6 +411,7 @@ static int mqprio_dump(struct Qdisc *sch
+                       __gnet_stats_copy_queue(&sch->qstats,
+                                               qdisc->cpu_qstats,
+                                               &qdisc->qstats, qlen);
++                      sch->q.qlen             += qlen;
+               } else {
+                       sch->q.qlen             += qdisc->q.qlen;
+                       sch->bstats.bytes       += qdisc->bstats.bytes;
diff --git a/queue-5.4/net-sysfs-call-dev_hold-always-in-netdev_queue_add_kobject.patch b/queue-5.4/net-sysfs-call-dev_hold-always-in-netdev_queue_add_kobject.patch
new file mode 100644 (file)
index 0000000..3563418
--- /dev/null
@@ -0,0 +1,46 @@
+From foo@baz Tue 17 Dec 2019 08:13:44 PM CET
+From: Jouni Hogander <jouni.hogander@unikie.com>
+Date: Thu, 5 Dec 2019 15:57:07 +0200
+Subject: net-sysfs: Call dev_hold always in netdev_queue_add_kobject
+
+From: Jouni Hogander <jouni.hogander@unikie.com>
+
+[ Upstream commit e0b60903b434a7ee21ba8d8659f207ed84101e89 ]
+
+Dev_hold has to be called always in netdev_queue_add_kobject.
+Otherwise usage count drops below 0 in case of failure in
+kobject_init_and_add.
+
+Fixes: b8eb718348b8 ("net-sysfs: Fix reference count leak in rx|netdev_queue_add_kobject")
+Reported-by: Hulk Robot <hulkci@huawei.com>
+Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
+Cc: David Miller <davem@davemloft.net>
+Cc: Lukas Bulwahn <lukas.bulwahn@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/net-sysfs.c |    7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+--- a/net/core/net-sysfs.c
++++ b/net/core/net-sysfs.c
+@@ -1459,14 +1459,17 @@ static int netdev_queue_add_kobject(stru
+       struct kobject *kobj = &queue->kobj;
+       int error = 0;
++      /* Kobject_put later will trigger netdev_queue_release call
++       * which decreases dev refcount: Take that reference here
++       */
++      dev_hold(queue->dev);
++
+       kobj->kset = dev->queues_kset;
+       error = kobject_init_and_add(kobj, &netdev_queue_ktype, NULL,
+                                    "tx-%u", index);
+       if (error)
+               goto err;
+-      dev_hold(queue->dev);
+-
+ #ifdef CONFIG_BQL
+       error = sysfs_create_group(kobj, &dql_group);
+       if (error)
diff --git a/queue-5.4/net-thunderx-start-phy-before-starting-autonegotiation.patch b/queue-5.4/net-thunderx-start-phy-before-starting-autonegotiation.patch
new file mode 100644 (file)
index 0000000..8550e58
--- /dev/null
@@ -0,0 +1,39 @@
+From foo@baz Tue 17 Dec 2019 08:13:44 PM CET
+From: Mian Yousaf Kaukab <ykaukab@suse.de>
+Date: Thu, 5 Dec 2019 10:41:16 +0100
+Subject: net: thunderx: start phy before starting autonegotiation
+
+From: Mian Yousaf Kaukab <ykaukab@suse.de>
+
+[ Upstream commit a350d2e7adbb57181d33e3aa6f0565632747feaa ]
+
+Since commit 2b3e88ea6528 ("net: phy: improve phy state checking")
+phy_start_aneg() expects phy state to be >= PHY_UP. Call phy_start()
+before calling phy_start_aneg() during probe so that autonegotiation
+is initiated.
+
+As phy_start() takes care of calling phy_start_aneg(), drop the explicit
+call to phy_start_aneg().
+
+Network fails without this patch on Octeon TX.
+
+Fixes: 2b3e88ea6528 ("net: phy: improve phy state checking")
+Signed-off-by: Mian Yousaf Kaukab <ykaukab@suse.de>
+Reviewed-by: Andrew Lunn <andrew@lunn.ch>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/cavium/thunder/thunder_bgx.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
++++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
+@@ -1115,7 +1115,7 @@ static int bgx_lmac_enable(struct bgx *b
+                                      phy_interface_mode(lmac->lmac_type)))
+                       return -ENODEV;
+-              phy_start_aneg(lmac->phydev);
++              phy_start(lmac->phydev);
+               return 0;
+       }
diff --git a/queue-5.4/net-tls-fix-return-values-to-avoid-enotsupp.patch b/queue-5.4/net-tls-fix-return-values-to-avoid-enotsupp.patch
new file mode 100644 (file)
index 0000000..1ef1963
--- /dev/null
@@ -0,0 +1,147 @@
+From foo@baz Tue 17 Dec 2019 08:13:44 PM CET
+From: Valentin Vidic <vvidic@valentin-vidic.from.hr>
+Date: Thu, 5 Dec 2019 07:41:18 +0100
+Subject: net/tls: Fix return values to avoid ENOTSUPP
+
+From: Valentin Vidic <vvidic@valentin-vidic.from.hr>
+
+[ Upstream commit 4a5cdc604b9cf645e6fa24d8d9f055955c3c8516 ]
+
+ENOTSUPP is not available in userspace, for example:
+
+  setsockopt failed, 524, Unknown error 524
+
+Signed-off-by: Valentin Vidic <vvidic@valentin-vidic.from.hr>
+Acked-by: Jakub Kicinski <jakub.kicinski@netronome.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/tls/tls_device.c              |    8 ++++----
+ net/tls/tls_main.c                |    4 ++--
+ net/tls/tls_sw.c                  |    8 ++++----
+ tools/testing/selftests/net/tls.c |    8 ++------
+ 4 files changed, 12 insertions(+), 16 deletions(-)
+
+--- a/net/tls/tls_device.c
++++ b/net/tls/tls_device.c
+@@ -417,7 +417,7 @@ static int tls_push_data(struct sock *sk
+       if (flags &
+           ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL | MSG_SENDPAGE_NOTLAST))
+-              return -ENOTSUPP;
++              return -EOPNOTSUPP;
+       if (sk->sk_err)
+               return -sk->sk_err;
+@@ -560,7 +560,7 @@ int tls_device_sendpage(struct sock *sk,
+       lock_sock(sk);
+       if (flags & MSG_OOB) {
+-              rc = -ENOTSUPP;
++              rc = -EOPNOTSUPP;
+               goto out;
+       }
+@@ -999,7 +999,7 @@ int tls_set_device_offload(struct sock *
+       }
+       if (!(netdev->features & NETIF_F_HW_TLS_TX)) {
+-              rc = -ENOTSUPP;
++              rc = -EOPNOTSUPP;
+               goto release_netdev;
+       }
+@@ -1071,7 +1071,7 @@ int tls_set_device_offload_rx(struct soc
+       }
+       if (!(netdev->features & NETIF_F_HW_TLS_RX)) {
+-              rc = -ENOTSUPP;
++              rc = -EOPNOTSUPP;
+               goto release_netdev;
+       }
+--- a/net/tls/tls_main.c
++++ b/net/tls/tls_main.c
+@@ -482,7 +482,7 @@ static int do_tls_setsockopt_conf(struct
+       /* check version */
+       if (crypto_info->version != TLS_1_2_VERSION &&
+           crypto_info->version != TLS_1_3_VERSION) {
+-              rc = -ENOTSUPP;
++              rc = -EINVAL;
+               goto err_crypto_info;
+       }
+@@ -778,7 +778,7 @@ static int tls_init(struct sock *sk)
+        * share the ulp context.
+        */
+       if (sk->sk_state != TCP_ESTABLISHED)
+-              return -ENOTSUPP;
++              return -ENOTCONN;
+       tls_build_proto(sk);
+--- a/net/tls/tls_sw.c
++++ b/net/tls/tls_sw.c
+@@ -900,7 +900,7 @@ int tls_sw_sendmsg(struct sock *sk, stru
+       int ret = 0;
+       if (msg->msg_flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL))
+-              return -ENOTSUPP;
++              return -EOPNOTSUPP;
+       mutex_lock(&tls_ctx->tx_lock);
+       lock_sock(sk);
+@@ -1215,7 +1215,7 @@ int tls_sw_sendpage_locked(struct sock *
+       if (flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL |
+                     MSG_SENDPAGE_NOTLAST | MSG_SENDPAGE_NOPOLICY |
+                     MSG_NO_SHARED_FRAGS))
+-              return -ENOTSUPP;
++              return -EOPNOTSUPP;
+       return tls_sw_do_sendpage(sk, page, offset, size, flags);
+ }
+@@ -1228,7 +1228,7 @@ int tls_sw_sendpage(struct sock *sk, str
+       if (flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL |
+                     MSG_SENDPAGE_NOTLAST | MSG_SENDPAGE_NOPOLICY))
+-              return -ENOTSUPP;
++              return -EOPNOTSUPP;
+       mutex_lock(&tls_ctx->tx_lock);
+       lock_sock(sk);
+@@ -1927,7 +1927,7 @@ ssize_t tls_sw_splice_read(struct socket
+               /* splice does not support reading control messages */
+               if (ctx->control != TLS_RECORD_TYPE_DATA) {
+-                      err = -ENOTSUPP;
++                      err = -EINVAL;
+                       goto splice_read_end;
+               }
+--- a/tools/testing/selftests/net/tls.c
++++ b/tools/testing/selftests/net/tls.c
+@@ -25,10 +25,6 @@
+ #define TLS_PAYLOAD_MAX_LEN 16384
+ #define SOL_TLS 282
+-#ifndef ENOTSUPP
+-#define ENOTSUPP 524
+-#endif
+-
+ FIXTURE(tls_basic)
+ {
+       int fd, cfd;
+@@ -1205,11 +1201,11 @@ TEST(non_established) {
+       /* TLS ULP not supported */
+       if (errno == ENOENT)
+               return;
+-      EXPECT_EQ(errno, ENOTSUPP);
++      EXPECT_EQ(errno, ENOTCONN);
+       ret = setsockopt(sfd, IPPROTO_TCP, TCP_ULP, "tls", sizeof("tls"));
+       EXPECT_EQ(ret, -1);
+-      EXPECT_EQ(errno, ENOTSUPP);
++      EXPECT_EQ(errno, ENOTCONN);
+       ret = getsockname(sfd, &addr, &len);
+       ASSERT_EQ(ret, 0);
diff --git a/queue-5.4/net_sched-validate-tca_kind-attribute-in-tc_chain_tmplt_add.patch b/queue-5.4/net_sched-validate-tca_kind-attribute-in-tc_chain_tmplt_add.patch
new file mode 100644 (file)
index 0000000..711bb8b
--- /dev/null
@@ -0,0 +1,109 @@
+From foo@baz Tue 17 Dec 2019 08:13:44 PM CET
+From: Eric Dumazet <edumazet@google.com>
+Date: Sat, 7 Dec 2019 11:34:45 -0800
+Subject: net_sched: validate TCA_KIND attribute in tc_chain_tmplt_add()
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 2dd5616ecdcebdf5a8d007af64e040d4e9214efe ]
+
+Use the new tcf_proto_check_kind() helper to make sure user
+provided value is well formed.
+
+BUG: KMSAN: uninit-value in string_nocheck lib/vsprintf.c:606 [inline]
+BUG: KMSAN: uninit-value in string+0x4be/0x600 lib/vsprintf.c:668
+CPU: 0 PID: 12358 Comm: syz-executor.1 Not tainted 5.4.0-rc8-syzkaller #0
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
+Call Trace:
+ __dump_stack lib/dump_stack.c:77 [inline]
+ dump_stack+0x1c9/0x220 lib/dump_stack.c:118
+ kmsan_report+0x128/0x220 mm/kmsan/kmsan_report.c:108
+ __msan_warning+0x64/0xc0 mm/kmsan/kmsan_instr.c:245
+ string_nocheck lib/vsprintf.c:606 [inline]
+ string+0x4be/0x600 lib/vsprintf.c:668
+ vsnprintf+0x218f/0x3210 lib/vsprintf.c:2510
+ __request_module+0x2b1/0x11c0 kernel/kmod.c:143
+ tcf_proto_lookup_ops+0x171/0x700 net/sched/cls_api.c:139
+ tc_chain_tmplt_add net/sched/cls_api.c:2730 [inline]
+ tc_ctl_chain+0x1904/0x38a0 net/sched/cls_api.c:2850
+ rtnetlink_rcv_msg+0x115a/0x1580 net/core/rtnetlink.c:5224
+ netlink_rcv_skb+0x431/0x620 net/netlink/af_netlink.c:2477
+ rtnetlink_rcv+0x50/0x60 net/core/rtnetlink.c:5242
+ netlink_unicast_kernel net/netlink/af_netlink.c:1302 [inline]
+ netlink_unicast+0xf3e/0x1020 net/netlink/af_netlink.c:1328
+ netlink_sendmsg+0x110f/0x1330 net/netlink/af_netlink.c:1917
+ sock_sendmsg_nosec net/socket.c:637 [inline]
+ sock_sendmsg net/socket.c:657 [inline]
+ ___sys_sendmsg+0x14ff/0x1590 net/socket.c:2311
+ __sys_sendmsg net/socket.c:2356 [inline]
+ __do_sys_sendmsg net/socket.c:2365 [inline]
+ __se_sys_sendmsg+0x305/0x460 net/socket.c:2363
+ __x64_sys_sendmsg+0x4a/0x70 net/socket.c:2363
+ do_syscall_64+0xb6/0x160 arch/x86/entry/common.c:291
+ entry_SYSCALL_64_after_hwframe+0x44/0xa9
+RIP: 0033:0x45a649
+Code: ad b6 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 7b b6 fb ff c3 66 2e 0f 1f 84 00 00 00 00
+RSP: 002b:00007f0790795c78 EFLAGS: 00000246 ORIG_RAX: 000000000000002e
+RAX: ffffffffffffffda RBX: 0000000000000003 RCX: 000000000045a649
+RDX: 0000000000000000 RSI: 0000000020000300 RDI: 0000000000000006
+RBP: 000000000075bfc8 R08: 0000000000000000 R09: 0000000000000000
+R10: 0000000000000000 R11: 0000000000000246 R12: 00007f07907966d4
+R13: 00000000004c8db5 R14: 00000000004df630 R15: 00000000ffffffff
+
+Uninit was created at:
+ kmsan_save_stack_with_flags mm/kmsan/kmsan.c:149 [inline]
+ kmsan_internal_poison_shadow+0x5c/0x110 mm/kmsan/kmsan.c:132
+ kmsan_slab_alloc+0x97/0x100 mm/kmsan/kmsan_hooks.c:86
+ slab_alloc_node mm/slub.c:2773 [inline]
+ __kmalloc_node_track_caller+0xe27/0x11a0 mm/slub.c:4381
+ __kmalloc_reserve net/core/skbuff.c:141 [inline]
+ __alloc_skb+0x306/0xa10 net/core/skbuff.c:209
+ alloc_skb include/linux/skbuff.h:1049 [inline]
+ netlink_alloc_large_skb net/netlink/af_netlink.c:1174 [inline]
+ netlink_sendmsg+0x783/0x1330 net/netlink/af_netlink.c:1892
+ sock_sendmsg_nosec net/socket.c:637 [inline]
+ sock_sendmsg net/socket.c:657 [inline]
+ ___sys_sendmsg+0x14ff/0x1590 net/socket.c:2311
+ __sys_sendmsg net/socket.c:2356 [inline]
+ __do_sys_sendmsg net/socket.c:2365 [inline]
+ __se_sys_sendmsg+0x305/0x460 net/socket.c:2363
+ __x64_sys_sendmsg+0x4a/0x70 net/socket.c:2363
+ do_syscall_64+0xb6/0x160 arch/x86/entry/common.c:291
+ entry_SYSCALL_64_after_hwframe+0x44/0xa9
+
+Fixes: 6f96c3c6904c ("net_sched: fix backward compatibility for TCA_KIND")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: syzbot <syzkaller@googlegroups.com>
+Acked-by: Cong Wang <xiyou.wangcong@gmail.com>
+Cc: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
+Cc: Jamal Hadi Salim <jhs@mojatatu.com>
+Cc: Jiri Pirko <jiri@resnulli.us>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sched/cls_api.c |    8 +++++++-
+ 1 file changed, 7 insertions(+), 1 deletion(-)
+
+--- a/net/sched/cls_api.c
++++ b/net/sched/cls_api.c
+@@ -2721,13 +2721,19 @@ static int tc_chain_tmplt_add(struct tcf
+                             struct netlink_ext_ack *extack)
+ {
+       const struct tcf_proto_ops *ops;
++      char name[IFNAMSIZ];
+       void *tmplt_priv;
+       /* If kind is not set, user did not specify template. */
+       if (!tca[TCA_KIND])
+               return 0;
+-      ops = tcf_proto_lookup_ops(nla_data(tca[TCA_KIND]), true, extack);
++      if (tcf_proto_check_kind(tca[TCA_KIND], name)) {
++              NL_SET_ERR_MSG(extack, "Specified TC chain template name too long");
++              return -EINVAL;
++      }
++
++      ops = tcf_proto_lookup_ops(name, true, extack);
+       if (IS_ERR(ops))
+               return PTR_ERR(ops);
+       if (!ops->tmplt_create || !ops->tmplt_destroy || !ops->tmplt_dump) {
diff --git a/queue-5.4/openvswitch-support-asymmetric-conntrack.patch b/queue-5.4/openvswitch-support-asymmetric-conntrack.patch
new file mode 100644 (file)
index 0000000..d63053d
--- /dev/null
@@ -0,0 +1,46 @@
+From foo@baz Tue 17 Dec 2019 08:13:44 PM CET
+From: Aaron Conole <aconole@redhat.com>
+Date: Tue, 3 Dec 2019 16:34:13 -0500
+Subject: openvswitch: support asymmetric conntrack
+
+From: Aaron Conole <aconole@redhat.com>
+
+[ Upstream commit 5d50aa83e2c8e91ced2cca77c198b468ca9210f4 ]
+
+The openvswitch module shares a common conntrack and NAT infrastructure
+exposed via netfilter.  It's possible that a packet needs both SNAT and
+DNAT manipulation, due to e.g. tuple collision.  Netfilter can support
+this because it runs through the NAT table twice - once on ingress and
+again after egress.  The openvswitch module doesn't have such capability.
+
+Like netfilter hook infrastructure, we should run through NAT twice to
+keep the symmetry.
+
+Fixes: 05752523e565 ("openvswitch: Interface with NAT.")
+Signed-off-by: Aaron Conole <aconole@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/openvswitch/conntrack.c |   11 +++++++++++
+ 1 file changed, 11 insertions(+)
+
+--- a/net/openvswitch/conntrack.c
++++ b/net/openvswitch/conntrack.c
+@@ -903,6 +903,17 @@ static int ovs_ct_nat(struct net *net, s
+       }
+       err = ovs_ct_nat_execute(skb, ct, ctinfo, &info->range, maniptype);
++      if (err == NF_ACCEPT &&
++          ct->status & IPS_SRC_NAT && ct->status & IPS_DST_NAT) {
++              if (maniptype == NF_NAT_MANIP_SRC)
++                      maniptype = NF_NAT_MANIP_DST;
++              else
++                      maniptype = NF_NAT_MANIP_SRC;
++
++              err = ovs_ct_nat_execute(skb, ct, ctinfo, &info->range,
++                                       maniptype);
++      }
++
+       /* Mark NAT done if successful and update the flow key. */
+       if (err == NF_ACCEPT)
+               ovs_nat_update_key(key, skb, maniptype);
diff --git a/queue-5.4/page_pool-do-not-release-pool-until-inflight-0.patch b/queue-5.4/page_pool-do-not-release-pool-until-inflight-0.patch
new file mode 100644 (file)
index 0000000..0c9ebe9
--- /dev/null
@@ -0,0 +1,565 @@
+From foo@baz Tue 17 Dec 2019 08:13:44 PM CET
+From: Jonathan Lemon <jonathan.lemon@gmail.com>
+Date: Thu, 14 Nov 2019 14:13:00 -0800
+Subject: page_pool: do not release pool until inflight == 0.
+
+From: Jonathan Lemon <jonathan.lemon@gmail.com>
+
+[ Upstream commit c3f812cea0d7006469d1cf33a4a9f0a12bb4b3a3 ]
+
+The page pool keeps track of the number of pages in flight, and
+it isn't safe to remove the pool until all pages are returned.
+
+Disallow removing the pool until all pages are back, so the pool
+is always available for page producers.
+
+Make the page pool responsible for its own delayed destruction
+instead of relying on XDP, so the page pool can be used without
+the xdp memory model.
+
+When all pages are returned, free the pool and notify xdp if the
+pool is registered with the xdp memory system.  Have the callback
+perform a table walk since some drivers (cpsw) may share the pool
+among multiple xdp_rxq_info.
+
+Note that the increment of pages_state_release_cnt may result in
+inflight == 0, resulting in the pool being released.
+
+Fixes: d956a048cd3f ("xdp: force mem allocator removal and periodic warning")
+Signed-off-by: Jonathan Lemon <jonathan.lemon@gmail.com>
+Acked-by: Jesper Dangaard Brouer <brouer@redhat.com>
+Acked-by: Ilias Apalodimas <ilias.apalodimas@linaro.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/stmicro/stmmac/stmmac_main.c |    4 
+ include/net/page_pool.h                           |   52 ++-------
+ include/net/xdp_priv.h                            |    4 
+ include/trace/events/xdp.h                        |   19 ---
+ net/core/page_pool.c                              |  122 +++++++++++++---------
+ net/core/xdp.c                                    |  121 +++++++--------------
+ 6 files changed, 139 insertions(+), 183 deletions(-)
+
+--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
++++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+@@ -1502,10 +1502,8 @@ static void free_dma_rx_desc_resources(s
+                                         rx_q->dma_erx, rx_q->dma_rx_phy);
+               kfree(rx_q->buf_pool);
+-              if (rx_q->page_pool) {
+-                      page_pool_request_shutdown(rx_q->page_pool);
++              if (rx_q->page_pool)
+                       page_pool_destroy(rx_q->page_pool);
+-              }
+       }
+ }
+--- a/include/net/page_pool.h
++++ b/include/net/page_pool.h
+@@ -70,7 +70,12 @@ struct page_pool_params {
+ struct page_pool {
+       struct page_pool_params p;
+-        u32 pages_state_hold_cnt;
++      struct delayed_work release_dw;
++      void (*disconnect)(void *);
++      unsigned long defer_start;
++      unsigned long defer_warn;
++
++      u32 pages_state_hold_cnt;
+       /*
+        * Data structure for allocation side
+@@ -129,25 +134,19 @@ inline enum dma_data_direction page_pool
+ struct page_pool *page_pool_create(const struct page_pool_params *params);
+-void __page_pool_free(struct page_pool *pool);
+-static inline void page_pool_free(struct page_pool *pool)
+-{
+-      /* When page_pool isn't compiled-in, net/core/xdp.c doesn't
+-       * allow registering MEM_TYPE_PAGE_POOL, but shield linker.
+-       */
+ #ifdef CONFIG_PAGE_POOL
+-      __page_pool_free(pool);
+-#endif
+-}
+-
+-/* Drivers use this instead of page_pool_free */
++void page_pool_destroy(struct page_pool *pool);
++void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *));
++#else
+ static inline void page_pool_destroy(struct page_pool *pool)
+ {
+-      if (!pool)
+-              return;
++}
+-      page_pool_free(pool);
++static inline void page_pool_use_xdp_mem(struct page_pool *pool,
++                                       void (*disconnect)(void *))
++{
+ }
++#endif
+ /* Never call this directly, use helpers below */
+ void __page_pool_put_page(struct page_pool *pool,
+@@ -170,24 +169,6 @@ static inline void page_pool_recycle_dir
+       __page_pool_put_page(pool, page, true);
+ }
+-/* API user MUST have disconnected alloc-side (not allowed to call
+- * page_pool_alloc_pages()) before calling this.  The free-side can
+- * still run concurrently, to handle in-flight packet-pages.
+- *
+- * A request to shutdown can fail (with false) if there are still
+- * in-flight packet-pages.
+- */
+-bool __page_pool_request_shutdown(struct page_pool *pool);
+-static inline bool page_pool_request_shutdown(struct page_pool *pool)
+-{
+-      bool safe_to_remove = false;
+-
+-#ifdef CONFIG_PAGE_POOL
+-      safe_to_remove = __page_pool_request_shutdown(pool);
+-#endif
+-      return safe_to_remove;
+-}
+-
+ /* Disconnects a page (from a page_pool).  API users can have a need
+  * to disconnect a page (from a page_pool), to allow it to be used as
+  * a regular page (that will eventually be returned to the normal
+@@ -216,11 +197,6 @@ static inline bool is_page_pool_compiled
+ #endif
+ }
+-static inline void page_pool_get(struct page_pool *pool)
+-{
+-      refcount_inc(&pool->user_cnt);
+-}
+-
+ static inline bool page_pool_put(struct page_pool *pool)
+ {
+       return refcount_dec_and_test(&pool->user_cnt);
+--- a/include/net/xdp_priv.h
++++ b/include/net/xdp_priv.h
+@@ -12,12 +12,8 @@ struct xdp_mem_allocator {
+               struct page_pool *page_pool;
+               struct zero_copy_allocator *zc_alloc;
+       };
+-      int disconnect_cnt;
+-      unsigned long defer_start;
+       struct rhash_head node;
+       struct rcu_head rcu;
+-      struct delayed_work defer_wq;
+-      unsigned long defer_warn;
+ };
+ #endif /* __LINUX_NET_XDP_PRIV_H__ */
+--- a/include/trace/events/xdp.h
++++ b/include/trace/events/xdp.h
+@@ -317,19 +317,15 @@ __MEM_TYPE_MAP(__MEM_TYPE_TP_FN)
+ TRACE_EVENT(mem_disconnect,
+-      TP_PROTO(const struct xdp_mem_allocator *xa,
+-               bool safe_to_remove, bool force),
++      TP_PROTO(const struct xdp_mem_allocator *xa),
+-      TP_ARGS(xa, safe_to_remove, force),
++      TP_ARGS(xa),
+       TP_STRUCT__entry(
+               __field(const struct xdp_mem_allocator *,       xa)
+               __field(u32,            mem_id)
+               __field(u32,            mem_type)
+               __field(const void *,   allocator)
+-              __field(bool,           safe_to_remove)
+-              __field(bool,           force)
+-              __field(int,            disconnect_cnt)
+       ),
+       TP_fast_assign(
+@@ -337,19 +333,12 @@ TRACE_EVENT(mem_disconnect,
+               __entry->mem_id         = xa->mem.id;
+               __entry->mem_type       = xa->mem.type;
+               __entry->allocator      = xa->allocator;
+-              __entry->safe_to_remove = safe_to_remove;
+-              __entry->force          = force;
+-              __entry->disconnect_cnt = xa->disconnect_cnt;
+       ),
+-      TP_printk("mem_id=%d mem_type=%s allocator=%p"
+-                " safe_to_remove=%s force=%s disconnect_cnt=%d",
++      TP_printk("mem_id=%d mem_type=%s allocator=%p",
+                 __entry->mem_id,
+                 __print_symbolic(__entry->mem_type, __MEM_TYPE_SYM_TAB),
+-                __entry->allocator,
+-                __entry->safe_to_remove ? "true" : "false",
+-                __entry->force ? "true" : "false",
+-                __entry->disconnect_cnt
++                __entry->allocator
+       )
+ );
+--- a/net/core/page_pool.c
++++ b/net/core/page_pool.c
+@@ -18,6 +18,9 @@
+ #include <trace/events/page_pool.h>
++#define DEFER_TIME (msecs_to_jiffies(1000))
++#define DEFER_WARN_INTERVAL (60 * HZ)
++
+ static int page_pool_init(struct page_pool *pool,
+                         const struct page_pool_params *params)
+ {
+@@ -193,22 +196,14 @@ static s32 page_pool_inflight(struct pag
+ {
+       u32 release_cnt = atomic_read(&pool->pages_state_release_cnt);
+       u32 hold_cnt = READ_ONCE(pool->pages_state_hold_cnt);
+-      s32 distance;
+-
+-      distance = _distance(hold_cnt, release_cnt);
+-
+-      trace_page_pool_inflight(pool, distance, hold_cnt, release_cnt);
+-      return distance;
+-}
++      s32 inflight;
+-static bool __page_pool_safe_to_destroy(struct page_pool *pool)
+-{
+-      s32 inflight = page_pool_inflight(pool);
++      inflight = _distance(hold_cnt, release_cnt);
+-      /* The distance should not be able to become negative */
++      trace_page_pool_inflight(pool, inflight, hold_cnt, release_cnt);
+       WARN(inflight < 0, "Negative(%d) inflight packet-pages", inflight);
+-      return (inflight == 0);
++      return inflight;
+ }
+ /* Cleanup page_pool state from page */
+@@ -216,6 +211,7 @@ static void __page_pool_clean_page(struc
+                                  struct page *page)
+ {
+       dma_addr_t dma;
++      int count;
+       if (!(pool->p.flags & PP_FLAG_DMA_MAP))
+               goto skip_dma_unmap;
+@@ -227,9 +223,11 @@ static void __page_pool_clean_page(struc
+                            DMA_ATTR_SKIP_CPU_SYNC);
+       page->dma_addr = 0;
+ skip_dma_unmap:
+-      atomic_inc(&pool->pages_state_release_cnt);
+-      trace_page_pool_state_release(pool, page,
+-                            atomic_read(&pool->pages_state_release_cnt));
++      /* This may be the last page returned, releasing the pool, so
++       * it is not safe to reference pool afterwards.
++       */
++      count = atomic_inc_return(&pool->pages_state_release_cnt);
++      trace_page_pool_state_release(pool, page, count);
+ }
+ /* unmap the page and clean our state */
+@@ -338,31 +336,10 @@ static void __page_pool_empty_ring(struc
+       }
+ }
+-static void __warn_in_flight(struct page_pool *pool)
++static void page_pool_free(struct page_pool *pool)
+ {
+-      u32 release_cnt = atomic_read(&pool->pages_state_release_cnt);
+-      u32 hold_cnt = READ_ONCE(pool->pages_state_hold_cnt);
+-      s32 distance;
+-
+-      distance = _distance(hold_cnt, release_cnt);
+-
+-      /* Drivers should fix this, but only problematic when DMA is used */
+-      WARN(1, "Still in-flight pages:%d hold:%u released:%u",
+-           distance, hold_cnt, release_cnt);
+-}
+-
+-void __page_pool_free(struct page_pool *pool)
+-{
+-      /* Only last user actually free/release resources */
+-      if (!page_pool_put(pool))
+-              return;
+-
+-      WARN(pool->alloc.count, "API usage violation");
+-      WARN(!ptr_ring_empty(&pool->ring), "ptr_ring is not empty");
+-
+-      /* Can happen due to forced shutdown */
+-      if (!__page_pool_safe_to_destroy(pool))
+-              __warn_in_flight(pool);
++      if (pool->disconnect)
++              pool->disconnect(pool);
+       ptr_ring_cleanup(&pool->ring, NULL);
+@@ -371,12 +348,8 @@ void __page_pool_free(struct page_pool *
+       kfree(pool);
+ }
+-EXPORT_SYMBOL(__page_pool_free);
+-/* Request to shutdown: release pages cached by page_pool, and check
+- * for in-flight pages
+- */
+-bool __page_pool_request_shutdown(struct page_pool *pool)
++static void page_pool_scrub(struct page_pool *pool)
+ {
+       struct page *page;
+@@ -393,7 +366,64 @@ bool __page_pool_request_shutdown(struct
+        * be in-flight.
+        */
+       __page_pool_empty_ring(pool);
++}
++
++static int page_pool_release(struct page_pool *pool)
++{
++      int inflight;
++
++      page_pool_scrub(pool);
++      inflight = page_pool_inflight(pool);
++      if (!inflight)
++              page_pool_free(pool);
++
++      return inflight;
++}
++
++static void page_pool_release_retry(struct work_struct *wq)
++{
++      struct delayed_work *dwq = to_delayed_work(wq);
++      struct page_pool *pool = container_of(dwq, typeof(*pool), release_dw);
++      int inflight;
++
++      inflight = page_pool_release(pool);
++      if (!inflight)
++              return;
++
++      /* Periodic warning */
++      if (time_after_eq(jiffies, pool->defer_warn)) {
++              int sec = (s32)((u32)jiffies - (u32)pool->defer_start) / HZ;
++
++              pr_warn("%s() stalled pool shutdown %d inflight %d sec\n",
++                      __func__, inflight, sec);
++              pool->defer_warn = jiffies + DEFER_WARN_INTERVAL;
++      }
++
++      /* Still not ready to be disconnected, retry later */
++      schedule_delayed_work(&pool->release_dw, DEFER_TIME);
++}
++
++void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *))
++{
++      refcount_inc(&pool->user_cnt);
++      pool->disconnect = disconnect;
++}
++
++void page_pool_destroy(struct page_pool *pool)
++{
++      if (!pool)
++              return;
++
++      if (!page_pool_put(pool))
++              return;
++
++      if (!page_pool_release(pool))
++              return;
++
++      pool->defer_start = jiffies;
++      pool->defer_warn  = jiffies + DEFER_WARN_INTERVAL;
+-      return __page_pool_safe_to_destroy(pool);
++      INIT_DELAYED_WORK(&pool->release_dw, page_pool_release_retry);
++      schedule_delayed_work(&pool->release_dw, DEFER_TIME);
+ }
+-EXPORT_SYMBOL(__page_pool_request_shutdown);
++EXPORT_SYMBOL(page_pool_destroy);
+--- a/net/core/xdp.c
++++ b/net/core/xdp.c
+@@ -70,10 +70,6 @@ static void __xdp_mem_allocator_rcu_free
+       xa = container_of(rcu, struct xdp_mem_allocator, rcu);
+-      /* Allocator have indicated safe to remove before this is called */
+-      if (xa->mem.type == MEM_TYPE_PAGE_POOL)
+-              page_pool_free(xa->page_pool);
+-
+       /* Allow this ID to be reused */
+       ida_simple_remove(&mem_id_pool, xa->mem.id);
+@@ -85,62 +81,57 @@ static void __xdp_mem_allocator_rcu_free
+       kfree(xa);
+ }
+-static bool __mem_id_disconnect(int id, bool force)
++static void mem_xa_remove(struct xdp_mem_allocator *xa)
+ {
+-      struct xdp_mem_allocator *xa;
+-      bool safe_to_remove = true;
++      trace_mem_disconnect(xa);
+       mutex_lock(&mem_id_lock);
+-      xa = rhashtable_lookup_fast(mem_id_ht, &id, mem_id_rht_params);
+-      if (!xa) {
+-              mutex_unlock(&mem_id_lock);
+-              WARN(1, "Request remove non-existing id(%d), driver bug?", id);
+-              return true;
+-      }
+-      xa->disconnect_cnt++;
+-
+-      /* Detects in-flight packet-pages for page_pool */
+-      if (xa->mem.type == MEM_TYPE_PAGE_POOL)
+-              safe_to_remove = page_pool_request_shutdown(xa->page_pool);
+-
+-      trace_mem_disconnect(xa, safe_to_remove, force);
+-
+-      if ((safe_to_remove || force) &&
+-          !rhashtable_remove_fast(mem_id_ht, &xa->node, mem_id_rht_params))
++      if (!rhashtable_remove_fast(mem_id_ht, &xa->node, mem_id_rht_params))
+               call_rcu(&xa->rcu, __xdp_mem_allocator_rcu_free);
+       mutex_unlock(&mem_id_lock);
+-      return (safe_to_remove|force);
+ }
+-#define DEFER_TIME (msecs_to_jiffies(1000))
+-#define DEFER_WARN_INTERVAL (30 * HZ)
+-#define DEFER_MAX_RETRIES 120
++static void mem_allocator_disconnect(void *allocator)
++{
++      struct xdp_mem_allocator *xa;
++      struct rhashtable_iter iter;
++
++      rhashtable_walk_enter(mem_id_ht, &iter);
++      do {
++              rhashtable_walk_start(&iter);
++
++              while ((xa = rhashtable_walk_next(&iter)) && !IS_ERR(xa)) {
++                      if (xa->allocator == allocator)
++                              mem_xa_remove(xa);
++              }
++
++              rhashtable_walk_stop(&iter);
++
++      } while (xa == ERR_PTR(-EAGAIN));
++      rhashtable_walk_exit(&iter);
++}
+-static void mem_id_disconnect_defer_retry(struct work_struct *wq)
++static void mem_id_disconnect(int id)
+ {
+-      struct delayed_work *dwq = to_delayed_work(wq);
+-      struct xdp_mem_allocator *xa = container_of(dwq, typeof(*xa), defer_wq);
+-      bool force = false;
++      struct xdp_mem_allocator *xa;
+-      if (xa->disconnect_cnt > DEFER_MAX_RETRIES)
+-              force = true;
++      mutex_lock(&mem_id_lock);
+-      if (__mem_id_disconnect(xa->mem.id, force))
++      xa = rhashtable_lookup_fast(mem_id_ht, &id, mem_id_rht_params);
++      if (!xa) {
++              mutex_unlock(&mem_id_lock);
++              WARN(1, "Request remove non-existing id(%d), driver bug?", id);
+               return;
++      }
+-      /* Periodic warning */
+-      if (time_after_eq(jiffies, xa->defer_warn)) {
+-              int sec = (s32)((u32)jiffies - (u32)xa->defer_start) / HZ;
++      trace_mem_disconnect(xa);
+-              pr_warn("%s() stalled mem.id=%u shutdown %d attempts %d sec\n",
+-                      __func__, xa->mem.id, xa->disconnect_cnt, sec);
+-              xa->defer_warn = jiffies + DEFER_WARN_INTERVAL;
+-      }
++      if (!rhashtable_remove_fast(mem_id_ht, &xa->node, mem_id_rht_params))
++              call_rcu(&xa->rcu, __xdp_mem_allocator_rcu_free);
+-      /* Still not ready to be disconnected, retry later */
+-      schedule_delayed_work(&xa->defer_wq, DEFER_TIME);
++      mutex_unlock(&mem_id_lock);
+ }
+ void xdp_rxq_info_unreg_mem_model(struct xdp_rxq_info *xdp_rxq)
+@@ -153,38 +144,21 @@ void xdp_rxq_info_unreg_mem_model(struct
+               return;
+       }
+-      if (xdp_rxq->mem.type != MEM_TYPE_PAGE_POOL &&
+-          xdp_rxq->mem.type != MEM_TYPE_ZERO_COPY) {
+-              return;
+-      }
+-
+       if (id == 0)
+               return;
+-      if (__mem_id_disconnect(id, false))
+-              return;
+-
+-      /* Could not disconnect, defer new disconnect attempt to later */
+-      mutex_lock(&mem_id_lock);
++      if (xdp_rxq->mem.type == MEM_TYPE_ZERO_COPY)
++              return mem_id_disconnect(id);
+-      xa = rhashtable_lookup_fast(mem_id_ht, &id, mem_id_rht_params);
+-      if (!xa) {
+-              mutex_unlock(&mem_id_lock);
+-              return;
++      if (xdp_rxq->mem.type == MEM_TYPE_PAGE_POOL) {
++              rcu_read_lock();
++              xa = rhashtable_lookup(mem_id_ht, &id, mem_id_rht_params);
++              page_pool_destroy(xa->page_pool);
++              rcu_read_unlock();
+       }
+-      xa->defer_start = jiffies;
+-      xa->defer_warn  = jiffies + DEFER_WARN_INTERVAL;
+-
+-      INIT_DELAYED_WORK(&xa->defer_wq, mem_id_disconnect_defer_retry);
+-      mutex_unlock(&mem_id_lock);
+-      schedule_delayed_work(&xa->defer_wq, DEFER_TIME);
+ }
+ EXPORT_SYMBOL_GPL(xdp_rxq_info_unreg_mem_model);
+-/* This unregister operation will also cleanup and destroy the
+- * allocator. The page_pool_free() operation is first called when it's
+- * safe to remove, possibly deferred to a workqueue.
+- */
+ void xdp_rxq_info_unreg(struct xdp_rxq_info *xdp_rxq)
+ {
+       /* Simplify driver cleanup code paths, allow unreg "unused" */
+@@ -371,7 +345,7 @@ int xdp_rxq_info_reg_mem_model(struct xd
+       }
+       if (type == MEM_TYPE_PAGE_POOL)
+-              page_pool_get(xdp_alloc->page_pool);
++              page_pool_use_xdp_mem(allocator, mem_allocator_disconnect);
+       mutex_unlock(&mem_id_lock);
+@@ -402,15 +376,8 @@ static void __xdp_return(void *data, str
+               /* mem->id is valid, checked in xdp_rxq_info_reg_mem_model() */
+               xa = rhashtable_lookup(mem_id_ht, &mem->id, mem_id_rht_params);
+               page = virt_to_head_page(data);
+-              if (likely(xa)) {
+-                      napi_direct &= !xdp_return_frame_no_direct();
+-                      page_pool_put_page(xa->page_pool, page, napi_direct);
+-              } else {
+-                      /* Hopefully stack show who to blame for late return */
+-                      WARN_ONCE(1, "page_pool gone mem.id=%d", mem->id);
+-                      trace_mem_return_failed(mem, page);
+-                      put_page(page);
+-              }
++              napi_direct &= !xdp_return_frame_no_direct();
++              page_pool_put_page(xa->page_pool, page, napi_direct);
+               rcu_read_unlock();
+               break;
+       case MEM_TYPE_PAGE_SHARED:
diff --git a/queue-5.4/r8169-add-missing-rx-enabling-for-wol-on-rtl8125.patch b/queue-5.4/r8169-add-missing-rx-enabling-for-wol-on-rtl8125.patch
new file mode 100644 (file)
index 0000000..be9eb2d
--- /dev/null
@@ -0,0 +1,32 @@
+From foo@baz Tue 17 Dec 2019 08:13:44 PM CET
+From: Heiner Kallweit <hkallweit1@gmail.com>
+Date: Fri, 6 Dec 2019 23:27:15 +0100
+Subject: r8169: add missing RX enabling for WoL on RTL8125
+
+From: Heiner Kallweit <hkallweit1@gmail.com>
+
+[ Upstream commit 00222d1394104f0fd6c01ca9f578afec9e0f148b ]
+
+RTL8125 also requires to enable RX for WoL.
+
+v2: add missing Fixes tag
+
+Fixes: f1bce4ad2f1c ("r8169: add support for RTL8125")
+Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/realtek/r8169_main.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/realtek/r8169_main.c
++++ b/drivers/net/ethernet/realtek/r8169_main.c
+@@ -3920,7 +3920,7 @@ static void rtl_wol_suspend_quirk(struct
+       case RTL_GIGA_MAC_VER_32:
+       case RTL_GIGA_MAC_VER_33:
+       case RTL_GIGA_MAC_VER_34:
+-      case RTL_GIGA_MAC_VER_37 ... RTL_GIGA_MAC_VER_51:
++      case RTL_GIGA_MAC_VER_37 ... RTL_GIGA_MAC_VER_61:
+               RTL_W32(tp, RxConfig, RTL_R32(tp, RxConfig) |
+                       AcceptBroadcast | AcceptMulticast | AcceptMyPhys);
+               break;
diff --git a/queue-5.4/series b/queue-5.4/series
new file mode 100644 (file)
index 0000000..4c6a4cc
--- /dev/null
@@ -0,0 +1,37 @@
+inet-protect-against-too-small-mtu-values.patch
+mqprio-fix-out-of-bounds-access-in-mqprio_dump.patch
+net-bridge-deny-dev_set_mac_address-when-unregistering.patch
+net-dsa-fix-flow-dissection-on-tx-path.patch
+net-ethernet-ti-cpsw-fix-extra-rx-interrupt.patch
+net-sched-fix-dump-qlen-for-sch_mq-sch_mqprio-with-nolock-subqueues.patch
+net_sched-validate-tca_kind-attribute-in-tc_chain_tmplt_add.patch
+net-sysfs-call-dev_hold-always-in-netdev_queue_add_kobject.patch
+net-thunderx-start-phy-before-starting-autonegotiation.patch
+net-tls-fix-return-values-to-avoid-enotsupp.patch
+openvswitch-support-asymmetric-conntrack.patch
+tcp-md5-fix-potential-overestimation-of-tcp-option-space.patch
+tipc-fix-ordering-of-tipc-module-init-and-exit-routine.patch
+net-mlx5e-query-global-pause-state-before-setting-prio2buffer.patch
+net-ipv6-add-net-argument-to-ip6_dst_lookup_flow.patch
+net-ipv6_stub-use-ip6_dst_lookup_flow-instead-of-ip6_dst_lookup.patch
+tcp-fix-rejected-syncookies-due-to-stale-timestamps.patch
+tcp-tighten-acceptance-of-acks-not-matching-a-child-socket.patch
+tcp-protect-accesses-to-.ts_recent_stamp-with-read-write-_once.patch
+net-core-rename-indirect-block-ingress-cb-function.patch
+net-sched-allow-indirect-blocks-to-bind-to-clsact-in-tc.patch
+cls_flower-fix-the-behavior-using-port-ranges-with-hw-offload.patch
+gre-refetch-erspan-header-from-skb-data-after-pskb_may_pull.patch
+fixed-updating-of-ethertype-in-function-skb_mpls_pop.patch
+hsr-fix-a-null-pointer-dereference-in-hsr_dev_xmit.patch
+net-fixed-updating-of-ethertype-in-skb_mpls_push.patch
+net-mlx5e-fix-txq-indices-to-be-sequential.patch
+act_ct-support-asymmetric-conntrack.patch
+net-mlx5e-fix-sff-8472-eeprom-length.patch
+net-mlx5e-fix-freeing-flow-with-kfree-and-not-kvfree.patch
+net-mlx5e-fix-translation-of-link-mode-into-speed.patch
+net-mlx5e-ethtool-fix-analysis-of-speed-setting.patch
+page_pool-do-not-release-pool-until-inflight-0.patch
+xdp-obtain-the-mem_id-mutex-before-trying-to-remove-an-entry.patch
+ionic-keep-users-rss-hash-across-lif-reset.patch
+net-mscc-ocelot-unregister-the-ptp-clock-on-deinit.patch
+r8169-add-missing-rx-enabling-for-wol-on-rtl8125.patch
diff --git a/queue-5.4/tcp-fix-rejected-syncookies-due-to-stale-timestamps.patch b/queue-5.4/tcp-fix-rejected-syncookies-due-to-stale-timestamps.patch
new file mode 100644 (file)
index 0000000..42cf863
--- /dev/null
@@ -0,0 +1,117 @@
+From foo@baz Tue 17 Dec 2019 08:13:44 PM CET
+From: Guillaume Nault <gnault@redhat.com>
+Date: Fri, 6 Dec 2019 12:38:36 +0100
+Subject: tcp: fix rejected syncookies due to stale timestamps
+
+From: Guillaume Nault <gnault@redhat.com>
+
+[ Upstream commit 04d26e7b159a396372646a480f4caa166d1b6720 ]
+
+If no synflood happens for a long enough period of time, then the
+synflood timestamp isn't refreshed and jiffies can advance so much
+that time_after32() can't accurately compare them any more.
+
+Therefore, we can end up in a situation where time_after32(now,
+last_overflow + HZ) returns false, just because these two values are
+too far apart. In that case, the synflood timestamp isn't updated as
+it should be, which can trick tcp_synq_no_recent_overflow() into
+rejecting valid syncookies.
+
+For example, let's consider the following scenario on a system
+with HZ=1000:
+
+  * The synflood timestamp is 0, either because that's the timestamp
+    of the last synflood or, more commonly, because we're working with
+    a freshly created socket.
+
+  * We receive a new SYN, which triggers synflood protection. Let's say
+    that this happens when jiffies == 2147484649 (that is,
+    'synflood timestamp' + HZ + 2^31 + 1).
+
+  * Then tcp_synq_overflow() doesn't update the synflood timestamp,
+    because time_after32(2147484649, 1000) returns false.
+    With:
+      - 2147484649: the value of jiffies, aka. 'now'.
+      - 1000: the value of 'last_overflow' + HZ.
+
+  * A bit later, we receive the ACK completing the 3WHS. But
+    cookie_v[46]_check() rejects it because tcp_synq_no_recent_overflow()
+    says that we're not under synflood. That's because
+    time_after32(2147484649, 120000) returns false.
+    With:
+      - 2147484649: the value of jiffies, aka. 'now'.
+      - 120000: the value of 'last_overflow' + TCP_SYNCOOKIE_VALID.
+
+    Of course, in reality jiffies would have increased a bit, but this
+    condition will last for the next 119 seconds, which is far enough
+    to accommodate for jiffie's growth.
+
+Fix this by updating the overflow timestamp whenever jiffies isn't
+within the [last_overflow, last_overflow + HZ] range. That shouldn't
+have any performance impact since the update still happens at most once
+per second.
+
+Now we're guaranteed to have fresh timestamps while under synflood, so
+tcp_synq_no_recent_overflow() can safely use it with time_after32() in
+such situations.
+
+Stale timestamps can still make tcp_synq_no_recent_overflow() return
+the wrong verdict when not under synflood. This will be handled in the
+next patch.
+
+For 64 bits architectures, the problem was introduced with the
+conversion of ->tw_ts_recent_stamp to 32 bits integer by commit
+cca9bab1b72c ("tcp: use monotonic timestamps for PAWS").
+The problem has always been there on 32 bits architectures.
+
+Fixes: cca9bab1b72c ("tcp: use monotonic timestamps for PAWS")
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Signed-off-by: Guillaume Nault <gnault@redhat.com>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/time.h |   13 +++++++++++++
+ include/net/tcp.h    |    5 +++--
+ 2 files changed, 16 insertions(+), 2 deletions(-)
+
+--- a/include/linux/time.h
++++ b/include/linux/time.h
+@@ -96,4 +96,17 @@ static inline bool itimerspec64_valid(co
+  */
+ #define time_after32(a, b)    ((s32)((u32)(b) - (u32)(a)) < 0)
+ #define time_before32(b, a)   time_after32(a, b)
++
++/**
++ * time_between32 - check if a 32-bit timestamp is within a given time range
++ * @t:        the time which may be within [l,h]
++ * @l:        the lower bound of the range
++ * @h:        the higher bound of the range
++ *
++ * time_before32(t, l, h) returns true if @l <= @t <= @h. All operands are
++ * treated as 32-bit integers.
++ *
++ * Equivalent to !(time_before32(@t, @l) || time_after32(@t, @h)).
++ */
++#define time_between32(t, l, h) ((u32)(h) - (u32)(l) >= (u32)(t) - (u32)(l))
+ #endif
+--- a/include/net/tcp.h
++++ b/include/net/tcp.h
+@@ -494,14 +494,15 @@ static inline void tcp_synq_overflow(con
+               reuse = rcu_dereference(sk->sk_reuseport_cb);
+               if (likely(reuse)) {
+                       last_overflow = READ_ONCE(reuse->synq_overflow_ts);
+-                      if (time_after32(now, last_overflow + HZ))
++                      if (!time_between32(now, last_overflow,
++                                          last_overflow + HZ))
+                               WRITE_ONCE(reuse->synq_overflow_ts, now);
+                       return;
+               }
+       }
+       last_overflow = tcp_sk(sk)->rx_opt.ts_recent_stamp;
+-      if (time_after32(now, last_overflow + HZ))
++      if (!time_between32(now, last_overflow, last_overflow + HZ))
+               tcp_sk(sk)->rx_opt.ts_recent_stamp = now;
+ }
diff --git a/queue-5.4/tcp-md5-fix-potential-overestimation-of-tcp-option-space.patch b/queue-5.4/tcp-md5-fix-potential-overestimation-of-tcp-option-space.patch
new file mode 100644 (file)
index 0000000..b111ed0
--- /dev/null
@@ -0,0 +1,46 @@
+From foo@baz Tue 17 Dec 2019 08:13:44 PM CET
+From: Eric Dumazet <edumazet@google.com>
+Date: Thu, 5 Dec 2019 10:10:15 -0800
+Subject: tcp: md5: fix potential overestimation of TCP option space
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 9424e2e7ad93ffffa88f882c9bc5023570904b55 ]
+
+Back in 2008, Adam Langley fixed the corner case of packets for flows
+having all of the following options : MD5 TS SACK
+
+Since MD5 needs 20 bytes, and TS needs 12 bytes, no sack block
+can be cooked from the remaining 8 bytes.
+
+tcp_established_options() correctly sets opts->num_sack_blocks
+to zero, but returns 36 instead of 32.
+
+This means TCP cooks packets with 4 extra bytes at the end
+of options, containing unitialized bytes.
+
+Fixes: 33ad798c924b ("tcp: options clean up")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: syzbot <syzkaller@googlegroups.com>
+Acked-by: Neal Cardwell <ncardwell@google.com>
+Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_output.c |    5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+--- a/net/ipv4/tcp_output.c
++++ b/net/ipv4/tcp_output.c
+@@ -755,8 +755,9 @@ static unsigned int tcp_established_opti
+                       min_t(unsigned int, eff_sacks,
+                             (remaining - TCPOLEN_SACK_BASE_ALIGNED) /
+                             TCPOLEN_SACK_PERBLOCK);
+-              size += TCPOLEN_SACK_BASE_ALIGNED +
+-                      opts->num_sack_blocks * TCPOLEN_SACK_PERBLOCK;
++              if (likely(opts->num_sack_blocks))
++                      size += TCPOLEN_SACK_BASE_ALIGNED +
++                              opts->num_sack_blocks * TCPOLEN_SACK_PERBLOCK;
+       }
+       return size;
diff --git a/queue-5.4/tcp-protect-accesses-to-.ts_recent_stamp-with-read-write-_once.patch b/queue-5.4/tcp-protect-accesses-to-.ts_recent_stamp-with-read-write-_once.patch
new file mode 100644 (file)
index 0000000..5fa2d5a
--- /dev/null
@@ -0,0 +1,50 @@
+From foo@baz Tue 17 Dec 2019 08:13:44 PM CET
+From: Guillaume Nault <gnault@redhat.com>
+Date: Fri, 6 Dec 2019 12:38:49 +0100
+Subject: tcp: Protect accesses to .ts_recent_stamp with {READ,WRITE}_ONCE()
+
+From: Guillaume Nault <gnault@redhat.com>
+
+[ Upstream commit 721c8dafad26ccfa90ff659ee19755e3377b829d ]
+
+Syncookies borrow the ->rx_opt.ts_recent_stamp field to store the
+timestamp of the last synflood. Protect them with READ_ONCE() and
+WRITE_ONCE() since reads and writes aren't serialised.
+
+Use of .rx_opt.ts_recent_stamp for storing the synflood timestamp was
+introduced by a0f82f64e269 ("syncookies: remove last_synq_overflow from
+struct tcp_sock"). But unprotected accesses were already there when
+timestamp was stored in .last_synq_overflow.
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Signed-off-by: Guillaume Nault <gnault@redhat.com>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/tcp.h |    6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/include/net/tcp.h
++++ b/include/net/tcp.h
+@@ -501,9 +501,9 @@ static inline void tcp_synq_overflow(con
+               }
+       }
+-      last_overflow = tcp_sk(sk)->rx_opt.ts_recent_stamp;
++      last_overflow = READ_ONCE(tcp_sk(sk)->rx_opt.ts_recent_stamp);
+       if (!time_between32(now, last_overflow, last_overflow + HZ))
+-              tcp_sk(sk)->rx_opt.ts_recent_stamp = now;
++              WRITE_ONCE(tcp_sk(sk)->rx_opt.ts_recent_stamp, now);
+ }
+ /* syncookies: no recent synqueue overflow on this listening socket? */
+@@ -524,7 +524,7 @@ static inline bool tcp_synq_no_recent_ov
+               }
+       }
+-      last_overflow = tcp_sk(sk)->rx_opt.ts_recent_stamp;
++      last_overflow = READ_ONCE(tcp_sk(sk)->rx_opt.ts_recent_stamp);
+       /* If last_overflow <= jiffies <= last_overflow + TCP_SYNCOOKIE_VALID,
+        * then we're under synflood. However, we have to use
diff --git a/queue-5.4/tcp-tighten-acceptance-of-acks-not-matching-a-child-socket.patch b/queue-5.4/tcp-tighten-acceptance-of-acks-not-matching-a-child-socket.patch
new file mode 100644 (file)
index 0000000..42bb23c
--- /dev/null
@@ -0,0 +1,86 @@
+From foo@baz Tue 17 Dec 2019 08:13:44 PM CET
+From: Guillaume Nault <gnault@redhat.com>
+Date: Fri, 6 Dec 2019 12:38:43 +0100
+Subject: tcp: tighten acceptance of ACKs not matching a child socket
+
+From: Guillaume Nault <gnault@redhat.com>
+
+[ Upstream commit cb44a08f8647fd2e8db5cc9ac27cd8355fa392d8 ]
+
+When no synflood occurs, the synflood timestamp isn't updated.
+Therefore it can be so old that time_after32() can consider it to be
+in the future.
+
+That's a problem for tcp_synq_no_recent_overflow() as it may report
+that a recent overflow occurred while, in fact, it's just that jiffies
+has grown past 'last_overflow' + TCP_SYNCOOKIE_VALID + 2^31.
+
+Spurious detection of recent overflows lead to extra syncookie
+verification in cookie_v[46]_check(). At that point, the verification
+should fail and the packet dropped. But we should have dropped the
+packet earlier as we didn't even send a syncookie.
+
+Let's refine tcp_synq_no_recent_overflow() to report a recent overflow
+only if jiffies is within the
+[last_overflow, last_overflow + TCP_SYNCOOKIE_VALID] interval. This
+way, no spurious recent overflow is reported when jiffies wraps and
+'last_overflow' becomes in the future from the point of view of
+time_after32().
+
+However, if jiffies wraps and enters the
+[last_overflow, last_overflow + TCP_SYNCOOKIE_VALID] interval (with
+'last_overflow' being a stale synflood timestamp), then
+tcp_synq_no_recent_overflow() still erroneously reports an
+overflow. In such cases, we have to rely on syncookie verification
+to drop the packet. We unfortunately have no way to differentiate
+between a fresh and a stale syncookie timestamp.
+
+In practice, using last_overflow as lower bound is problematic.
+If the synflood timestamp is concurrently updated between the time
+we read jiffies and the moment we store the timestamp in
+'last_overflow', then 'now' becomes smaller than 'last_overflow' and
+tcp_synq_no_recent_overflow() returns true, potentially dropping a
+valid syncookie.
+
+Reading jiffies after loading the timestamp could fix the problem,
+but that'd require a memory barrier. Let's just accommodate for
+potential timestamp growth instead and extend the interval using
+'last_overflow - HZ' as lower bound.
+
+Signed-off-by: Guillaume Nault <gnault@redhat.com>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/tcp.h |   16 +++++++++++++---
+ 1 file changed, 13 insertions(+), 3 deletions(-)
+
+--- a/include/net/tcp.h
++++ b/include/net/tcp.h
+@@ -518,13 +518,23 @@ static inline bool tcp_synq_no_recent_ov
+               reuse = rcu_dereference(sk->sk_reuseport_cb);
+               if (likely(reuse)) {
+                       last_overflow = READ_ONCE(reuse->synq_overflow_ts);
+-                      return time_after32(now, last_overflow +
+-                                          TCP_SYNCOOKIE_VALID);
++                      return !time_between32(now, last_overflow - HZ,
++                                             last_overflow +
++                                             TCP_SYNCOOKIE_VALID);
+               }
+       }
+       last_overflow = tcp_sk(sk)->rx_opt.ts_recent_stamp;
+-      return time_after32(now, last_overflow + TCP_SYNCOOKIE_VALID);
++
++      /* If last_overflow <= jiffies <= last_overflow + TCP_SYNCOOKIE_VALID,
++       * then we're under synflood. However, we have to use
++       * 'last_overflow - HZ' as lower bound. That's because a concurrent
++       * tcp_synq_overflow() could update .ts_recent_stamp after we read
++       * jiffies but before we store .ts_recent_stamp into last_overflow,
++       * which could lead to rejecting a valid syncookie.
++       */
++      return !time_between32(now, last_overflow - HZ,
++                             last_overflow + TCP_SYNCOOKIE_VALID);
+ }
+ static inline u32 tcp_cookie_time(void)
diff --git a/queue-5.4/tipc-fix-ordering-of-tipc-module-init-and-exit-routine.patch b/queue-5.4/tipc-fix-ordering-of-tipc-module-init-and-exit-routine.patch
new file mode 100644 (file)
index 0000000..09c55e1
--- /dev/null
@@ -0,0 +1,159 @@
+From foo@baz Tue 17 Dec 2019 08:13:44 PM CET
+From: Taehee Yoo <ap420073@gmail.com>
+Date: Fri, 6 Dec 2019 05:25:48 +0000
+Subject: tipc: fix ordering of tipc module init and exit routine
+
+From: Taehee Yoo <ap420073@gmail.com>
+
+[ Upstream commit 9cf1cd8ee3ee09ef2859017df2058e2f53c5347f ]
+
+In order to set/get/dump, the tipc uses the generic netlink
+infrastructure. So, when tipc module is inserted, init function
+calls genl_register_family().
+After genl_register_family(), set/get/dump commands are immediately
+allowed and these callbacks internally use the net_generic.
+net_generic is allocated by register_pernet_device() but this
+is called after genl_register_family() in the __init function.
+So, these callbacks would use un-initialized net_generic.
+
+Test commands:
+    #SHELL1
+    while :
+    do
+        modprobe tipc
+        modprobe -rv tipc
+    done
+
+    #SHELL2
+    while :
+    do
+        tipc link list
+    done
+
+Splat looks like:
+[   59.616322][ T2788] kasan: CONFIG_KASAN_INLINE enabled
+[   59.617234][ T2788] kasan: GPF could be caused by NULL-ptr deref or user memory access
+[   59.618398][ T2788] general protection fault: 0000 [#1] SMP DEBUG_PAGEALLOC KASAN PTI
+[   59.619389][ T2788] CPU: 3 PID: 2788 Comm: tipc Not tainted 5.4.0+ #194
+[   59.620231][ T2788] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006
+[   59.621428][ T2788] RIP: 0010:tipc_bcast_get_broadcast_mode+0x131/0x310 [tipc]
+[   59.622379][ T2788] Code: c7 c6 ef 8b 38 c0 65 ff 0d 84 83 c9 3f e8 d7 a5 f2 e3 48 8d bb 38 11 00 00 48 b8 00 00 00 00
+[   59.622550][ T2780] NET: Registered protocol family 30
+[   59.624627][ T2788] RSP: 0018:ffff88804b09f578 EFLAGS: 00010202
+[   59.624630][ T2788] RAX: dffffc0000000000 RBX: 0000000000000011 RCX: 000000008bc66907
+[   59.624631][ T2788] RDX: 0000000000000229 RSI: 000000004b3cf4cc RDI: 0000000000001149
+[   59.624633][ T2788] RBP: ffff88804b09f588 R08: 0000000000000003 R09: fffffbfff4fb3df1
+[   59.624635][ T2788] R10: fffffbfff50318f8 R11: ffff888066cadc18 R12: ffffffffa6cc2f40
+[   59.624637][ T2788] R13: 1ffff11009613eba R14: ffff8880662e9328 R15: ffff8880662e9328
+[   59.624639][ T2788] FS:  00007f57d8f7b740(0000) GS:ffff88806cc00000(0000) knlGS:0000000000000000
+[   59.624645][ T2788] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+[   59.625875][ T2780] tipc: Started in single node mode
+[   59.626128][ T2788] CR2: 00007f57d887a8c0 CR3: 000000004b140002 CR4: 00000000000606e0
+[   59.633991][ T2788] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+[   59.635195][ T2788] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+[   59.636478][ T2788] Call Trace:
+[   59.637025][ T2788]  tipc_nl_add_bc_link+0x179/0x1470 [tipc]
+[   59.638219][ T2788]  ? lock_downgrade+0x6e0/0x6e0
+[   59.638923][ T2788]  ? __tipc_nl_add_link+0xf90/0xf90 [tipc]
+[   59.639533][ T2788]  ? tipc_nl_node_dump_link+0x318/0xa50 [tipc]
+[   59.640160][ T2788]  ? mutex_lock_io_nested+0x1380/0x1380
+[   59.640746][ T2788]  tipc_nl_node_dump_link+0x4fd/0xa50 [tipc]
+[   59.641356][ T2788]  ? tipc_nl_node_reset_link_stats+0x340/0x340 [tipc]
+[   59.642088][ T2788]  ? __skb_ext_del+0x270/0x270
+[   59.642594][ T2788]  genl_lock_dumpit+0x85/0xb0
+[   59.643050][ T2788]  netlink_dump+0x49c/0xed0
+[   59.643529][ T2788]  ? __netlink_sendskb+0xc0/0xc0
+[   59.644044][ T2788]  ? __netlink_dump_start+0x190/0x800
+[   59.644617][ T2788]  ? __mutex_unlock_slowpath+0xd0/0x670
+[   59.645177][ T2788]  __netlink_dump_start+0x5a0/0x800
+[   59.645692][ T2788]  genl_rcv_msg+0xa75/0xe90
+[   59.646144][ T2788]  ? __lock_acquire+0xdfe/0x3de0
+[   59.646692][ T2788]  ? genl_family_rcv_msg_attrs_parse+0x320/0x320
+[   59.647340][ T2788]  ? genl_lock_dumpit+0xb0/0xb0
+[   59.647821][ T2788]  ? genl_unlock+0x20/0x20
+[   59.648290][ T2788]  ? genl_parallel_done+0xe0/0xe0
+[   59.648787][ T2788]  ? find_held_lock+0x39/0x1d0
+[   59.649276][ T2788]  ? genl_rcv+0x15/0x40
+[   59.649722][ T2788]  ? lock_contended+0xcd0/0xcd0
+[   59.650296][ T2788]  netlink_rcv_skb+0x121/0x350
+[   59.650828][ T2788]  ? genl_family_rcv_msg_attrs_parse+0x320/0x320
+[   59.651491][ T2788]  ? netlink_ack+0x940/0x940
+[   59.651953][ T2788]  ? lock_acquire+0x164/0x3b0
+[   59.652449][ T2788]  genl_rcv+0x24/0x40
+[   59.652841][ T2788]  netlink_unicast+0x421/0x600
+[ ... ]
+
+Fixes: 7e4369057806 ("tipc: fix a slab object leak")
+Fixes: a62fbccecd62 ("tipc: make subscriber server support net namespace")
+Signed-off-by: Taehee Yoo <ap420073@gmail.com>
+Acked-by: Jon Maloy <jon.maloy@ericsson.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/tipc/core.c |   29 +++++++++++++++--------------
+ 1 file changed, 15 insertions(+), 14 deletions(-)
+
+--- a/net/tipc/core.c
++++ b/net/tipc/core.c
+@@ -125,14 +125,6 @@ static int __init tipc_init(void)
+       sysctl_tipc_rmem[1] = RCVBUF_DEF;
+       sysctl_tipc_rmem[2] = RCVBUF_MAX;
+-      err = tipc_netlink_start();
+-      if (err)
+-              goto out_netlink;
+-
+-      err = tipc_netlink_compat_start();
+-      if (err)
+-              goto out_netlink_compat;
+-
+       err = tipc_register_sysctl();
+       if (err)
+               goto out_sysctl;
+@@ -153,8 +145,21 @@ static int __init tipc_init(void)
+       if (err)
+               goto out_bearer;
++      err = tipc_netlink_start();
++      if (err)
++              goto out_netlink;
++
++      err = tipc_netlink_compat_start();
++      if (err)
++              goto out_netlink_compat;
++
+       pr_info("Started in single node mode\n");
+       return 0;
++
++out_netlink_compat:
++      tipc_netlink_stop();
++out_netlink:
++      tipc_bearer_cleanup();
+ out_bearer:
+       unregister_pernet_device(&tipc_topsrv_net_ops);
+ out_pernet_topsrv:
+@@ -164,22 +169,18 @@ out_socket:
+ out_pernet:
+       tipc_unregister_sysctl();
+ out_sysctl:
+-      tipc_netlink_compat_stop();
+-out_netlink_compat:
+-      tipc_netlink_stop();
+-out_netlink:
+       pr_err("Unable to start in single node mode\n");
+       return err;
+ }
+ static void __exit tipc_exit(void)
+ {
++      tipc_netlink_compat_stop();
++      tipc_netlink_stop();
+       tipc_bearer_cleanup();
+       unregister_pernet_device(&tipc_topsrv_net_ops);
+       tipc_socket_stop();
+       unregister_pernet_device(&tipc_net_ops);
+-      tipc_netlink_stop();
+-      tipc_netlink_compat_stop();
+       tipc_unregister_sysctl();
+       pr_info("Deactivated\n");
diff --git a/queue-5.4/xdp-obtain-the-mem_id-mutex-before-trying-to-remove-an-entry.patch b/queue-5.4/xdp-obtain-the-mem_id-mutex-before-trying-to-remove-an-entry.patch
new file mode 100644 (file)
index 0000000..48a43f6
--- /dev/null
@@ -0,0 +1,60 @@
+From foo@baz Tue 17 Dec 2019 08:13:44 PM CET
+From: Jonathan Lemon <jonathan.lemon@gmail.com>
+Date: Tue, 3 Dec 2019 14:01:14 -0800
+Subject: xdp: obtain the mem_id mutex before trying to remove an entry.
+
+From: Jonathan Lemon <jonathan.lemon@gmail.com>
+
+[ Upstream commit 86c76c09898332143be365c702cf8d586ed4ed21 ]
+
+A lockdep splat was observed when trying to remove an xdp memory
+model from the table since the mutex was obtained when trying to
+remove the entry, but not before the table walk started:
+
+Fix the splat by obtaining the lock before starting the table walk.
+
+Fixes: c3f812cea0d7 ("page_pool: do not release pool until inflight == 0.")
+Reported-by: Grygorii Strashko <grygorii.strashko@ti.com>
+Signed-off-by: Jonathan Lemon <jonathan.lemon@gmail.com>
+Tested-by: Grygorii Strashko <grygorii.strashko@ti.com>
+Acked-by: Jesper Dangaard Brouer <brouer@redhat.com>
+Acked-by: Ilias Apalodimas <ilias.apalodimas@linaro.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/xdp.c |    8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+--- a/net/core/xdp.c
++++ b/net/core/xdp.c
+@@ -85,12 +85,8 @@ static void mem_xa_remove(struct xdp_mem
+ {
+       trace_mem_disconnect(xa);
+-      mutex_lock(&mem_id_lock);
+-
+       if (!rhashtable_remove_fast(mem_id_ht, &xa->node, mem_id_rht_params))
+               call_rcu(&xa->rcu, __xdp_mem_allocator_rcu_free);
+-
+-      mutex_unlock(&mem_id_lock);
+ }
+ static void mem_allocator_disconnect(void *allocator)
+@@ -98,6 +94,8 @@ static void mem_allocator_disconnect(voi
+       struct xdp_mem_allocator *xa;
+       struct rhashtable_iter iter;
++      mutex_lock(&mem_id_lock);
++
+       rhashtable_walk_enter(mem_id_ht, &iter);
+       do {
+               rhashtable_walk_start(&iter);
+@@ -111,6 +109,8 @@ static void mem_allocator_disconnect(voi
+       } while (xa == ERR_PTR(-EAGAIN));
+       rhashtable_walk_exit(&iter);
++
++      mutex_unlock(&mem_id_lock);
+ }
+ static void mem_id_disconnect(int id)