From 51a424d0070e24d6aaa083bc4cff06b02d0bddfb Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 17 Jul 2020 08:48:49 +0200 Subject: [PATCH] 5.7-stable patches added patches: bridge-mcast-fix-mld2-report-ipv6-payload-length-check.patch cgroup-fix-cgroup_sk_alloc-for-sk_clone_lock.patch cgroup-fix-sock_cgroup_data-on-big-endian.patch ethtool-fix-genlmsg_put-failure-handling-in-ethnl_default_dumpit.patch genetlink-get-rid-of-family-attrbuf.patch genetlink-remove-genl_bind.patch hsr-fix-interface-leak-in-error-path-of-hsr_dev_finalize.patch ip-fix-so_mark-in-rst-ack-and-icmp-packets.patch ipv4-fill-fl4_icmp_-type-code-in-ping_v4_sendmsg.patch ipv6-fib6_select_path-can-not-use-out-path-for-nexthop-objects.patch ipv6-fix-use-of-anycast-address-with-loopback.patch l2tp-remove-skb_dst_set-from-l2tp_xmit_skb.patch llc-make-sure-applications-use-arphrd_ether.patch mptcp-fix-dss-map-generation-on-fin-retransmission.patch net-added-pointer-check-for-dst-ops-neigh_lookup-in-dst_neigh_lookup_skb.patch net-dsa-microchip-set-the-correct-number-of-ports.patch net-ipa-always-check-for-stopped-channel.patch net-ipa-introduce-ipa_cmd_tag_process.patch net-ipv4-fix-wrong-type-conversion-from-hint-to-rt-in-ip_route_use_hint.patch net-qrtr-free-flow-in-__qrtr_node_release.patch net-rmnet-do-not-allow-to-add-multiple-bridge-interfaces.patch net-usb-qmi_wwan-add-support-for-quectel-eg95-lte-modem.patch net_sched-fix-a-memory-leak-in-atm_tc_init.patch sched-consistently-handle-layer3-header-accesses-in-the-presence-of-vlans.patch tcp-fix-so_rcvlowat-possible-hangs-under-high-mem-pressure.patch tcp-make-sure-listeners-don-t-initialize-congestion-control-state.patch tcp-md5-add-missing-memory-barriers-in-tcp_md5_do_add-tcp_md5_hash_key.patch tcp-md5-allow-changing-md5-keys-in-all-socket-states.patch tcp-md5-do-not-send-silly-options-in-syncookies.patch tcp-md5-refine-tcp_md5_do_add-tcp_md5_hash_key-barriers.patch vlan-consolidate-vlan-parsing-code-and-limit-max-parsing-depth.patch --- ...ld2-report-ipv6-payload-length-check.patch | 37 ++ ...ix-cgroup_sk_alloc-for-sk_clone_lock.patch | 168 ++++++ ...p-fix-sock_cgroup_data-on-big-endian.patch | 38 ++ ...ure-handling-in-ethnl_default_dumpit.patch | 100 ++++ .../genetlink-get-rid-of-family-attrbuf.patch | 202 +++++++ queue-5.7/genetlink-remove-genl_bind.patch | 134 +++++ ...ak-in-error-path-of-hsr_dev_finalize.patch | 108 ++++ ...-so_mark-in-rst-ack-and-icmp-packets.patch | 114 ++++ ...4_icmp_-type-code-in-ping_v4_sendmsg.patch | 50 ++ ...not-use-out-path-for-nexthop-objects.patch | 73 +++ ...use-of-anycast-address-with-loopback.patch | 43 ++ ...emove-skb_dst_set-from-l2tp_xmit_skb.patch | 60 +++ ...e-sure-applications-use-arphrd_ether.patch | 160 ++++++ ...map-generation-on-fin-retransmission.patch | 56 ++ ...neigh_lookup-in-dst_neigh_lookup_skb.patch | 121 +++++ ...chip-set-the-correct-number-of-ports.patch | 49 ++ ...ipa-always-check-for-stopped-channel.patch | 53 ++ ...et-ipa-introduce-ipa_cmd_tag_process.patch | 78 +++ ...from-hint-to-rt-in-ip_route_use_hint.patch | 30 ++ ...rtr-free-flow-in-__qrtr_node_release.patch | 57 ++ ...ow-to-add-multiple-bridge-interfaces.patch | 84 +++ ...d-support-for-quectel-eg95-lte-modem.patch | 41 ++ ...hed-fix-a-memory-leak-in-atm_tc_init.patch | 51 ++ ...er-accesses-in-the-presence-of-vlans.patch | 496 ++++++++++++++++++ queue-5.7/series | 31 ++ ...ssible-hangs-under-high-mem-pressure.patch | 39 ++ ...-initialize-congestion-control-state.patch | 145 +++++ ...s-in-tcp_md5_do_add-tcp_md5_hash_key.patch | 64 +++ ...anging-md5-keys-in-all-socket-states.patch | 66 +++ ...not-send-silly-options-in-syncookies.patch | 82 +++ ...md5_do_add-tcp_md5_hash_key-barriers.patch | 90 ++++ ...ing-code-and-limit-max-parsing-depth.patch | 136 +++++ 32 files changed, 3056 insertions(+) create mode 100644 queue-5.7/bridge-mcast-fix-mld2-report-ipv6-payload-length-check.patch create mode 100644 queue-5.7/cgroup-fix-cgroup_sk_alloc-for-sk_clone_lock.patch create mode 100644 queue-5.7/cgroup-fix-sock_cgroup_data-on-big-endian.patch create mode 100644 queue-5.7/ethtool-fix-genlmsg_put-failure-handling-in-ethnl_default_dumpit.patch create mode 100644 queue-5.7/genetlink-get-rid-of-family-attrbuf.patch create mode 100644 queue-5.7/genetlink-remove-genl_bind.patch create mode 100644 queue-5.7/hsr-fix-interface-leak-in-error-path-of-hsr_dev_finalize.patch create mode 100644 queue-5.7/ip-fix-so_mark-in-rst-ack-and-icmp-packets.patch create mode 100644 queue-5.7/ipv4-fill-fl4_icmp_-type-code-in-ping_v4_sendmsg.patch create mode 100644 queue-5.7/ipv6-fib6_select_path-can-not-use-out-path-for-nexthop-objects.patch create mode 100644 queue-5.7/ipv6-fix-use-of-anycast-address-with-loopback.patch create mode 100644 queue-5.7/l2tp-remove-skb_dst_set-from-l2tp_xmit_skb.patch create mode 100644 queue-5.7/llc-make-sure-applications-use-arphrd_ether.patch create mode 100644 queue-5.7/mptcp-fix-dss-map-generation-on-fin-retransmission.patch create mode 100644 queue-5.7/net-added-pointer-check-for-dst-ops-neigh_lookup-in-dst_neigh_lookup_skb.patch create mode 100644 queue-5.7/net-dsa-microchip-set-the-correct-number-of-ports.patch create mode 100644 queue-5.7/net-ipa-always-check-for-stopped-channel.patch create mode 100644 queue-5.7/net-ipa-introduce-ipa_cmd_tag_process.patch create mode 100644 queue-5.7/net-ipv4-fix-wrong-type-conversion-from-hint-to-rt-in-ip_route_use_hint.patch create mode 100644 queue-5.7/net-qrtr-free-flow-in-__qrtr_node_release.patch create mode 100644 queue-5.7/net-rmnet-do-not-allow-to-add-multiple-bridge-interfaces.patch create mode 100644 queue-5.7/net-usb-qmi_wwan-add-support-for-quectel-eg95-lte-modem.patch create mode 100644 queue-5.7/net_sched-fix-a-memory-leak-in-atm_tc_init.patch create mode 100644 queue-5.7/sched-consistently-handle-layer3-header-accesses-in-the-presence-of-vlans.patch create mode 100644 queue-5.7/tcp-fix-so_rcvlowat-possible-hangs-under-high-mem-pressure.patch create mode 100644 queue-5.7/tcp-make-sure-listeners-don-t-initialize-congestion-control-state.patch create mode 100644 queue-5.7/tcp-md5-add-missing-memory-barriers-in-tcp_md5_do_add-tcp_md5_hash_key.patch create mode 100644 queue-5.7/tcp-md5-allow-changing-md5-keys-in-all-socket-states.patch create mode 100644 queue-5.7/tcp-md5-do-not-send-silly-options-in-syncookies.patch create mode 100644 queue-5.7/tcp-md5-refine-tcp_md5_do_add-tcp_md5_hash_key-barriers.patch create mode 100644 queue-5.7/vlan-consolidate-vlan-parsing-code-and-limit-max-parsing-depth.patch diff --git a/queue-5.7/bridge-mcast-fix-mld2-report-ipv6-payload-length-check.patch b/queue-5.7/bridge-mcast-fix-mld2-report-ipv6-payload-length-check.patch new file mode 100644 index 00000000000..953d2ad6ef1 --- /dev/null +++ b/queue-5.7/bridge-mcast-fix-mld2-report-ipv6-payload-length-check.patch @@ -0,0 +1,37 @@ +From foo@baz Fri 17 Jul 2020 08:39:20 AM CEST +From: "Linus Lüssing" +Date: Sun, 5 Jul 2020 21:10:17 +0200 +Subject: bridge: mcast: Fix MLD2 Report IPv6 payload length check + +From: "Linus Lüssing" + +[ Upstream commit 5fc6266af7b427243da24f3443a50cd4584aac06 ] + +Commit e57f61858b7c ("net: bridge: mcast: fix stale nsrcs pointer in +igmp3/mld2 report handling") introduced a bug in the IPv6 header payload +length check which would potentially lead to rejecting a valid MLD2 Report: + +The check needs to take into account the 2 bytes for the "Number of +Sources" field in the "Multicast Address Record" before reading it. +And not the size of a pointer to this field. + +Fixes: e57f61858b7c ("net: bridge: mcast: fix stale nsrcs pointer in igmp3/mld2 report handling") +Acked-by: Nikolay Aleksandrov +Signed-off-by: Linus Lüssing +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/bridge/br_multicast.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/bridge/br_multicast.c ++++ b/net/bridge/br_multicast.c +@@ -1007,7 +1007,7 @@ static int br_ip6_multicast_mld2_report( + nsrcs_offset = len + offsetof(struct mld2_grec, grec_nsrcs); + + if (skb_transport_offset(skb) + ipv6_transport_len(skb) < +- nsrcs_offset + sizeof(_nsrcs)) ++ nsrcs_offset + sizeof(__nsrcs)) + return -EINVAL; + + _nsrcs = skb_header_pointer(skb, nsrcs_offset, diff --git a/queue-5.7/cgroup-fix-cgroup_sk_alloc-for-sk_clone_lock.patch b/queue-5.7/cgroup-fix-cgroup_sk_alloc-for-sk_clone_lock.patch new file mode 100644 index 00000000000..14a681a78c6 --- /dev/null +++ b/queue-5.7/cgroup-fix-cgroup_sk_alloc-for-sk_clone_lock.patch @@ -0,0 +1,168 @@ +From foo@baz Fri 17 Jul 2020 08:39:20 AM CEST +From: Cong Wang +Date: Thu, 2 Jul 2020 11:52:56 -0700 +Subject: cgroup: fix cgroup_sk_alloc() for sk_clone_lock() + +From: Cong Wang + +[ Upstream commit ad0f75e5f57ccbceec13274e1e242f2b5a6397ed ] + +When we clone a socket in sk_clone_lock(), its sk_cgrp_data is +copied, so the cgroup refcnt must be taken too. And, unlike the +sk_alloc() path, sock_update_netprioidx() is not called here. +Therefore, it is safe and necessary to grab the cgroup refcnt +even when cgroup_sk_alloc is disabled. + +sk_clone_lock() is in BH context anyway, the in_interrupt() +would terminate this function if called there. And for sk_alloc() +skcd->val is always zero. So it's safe to factor out the code +to make it more readable. + +The global variable 'cgroup_sk_alloc_disabled' is used to determine +whether to take these reference counts. It is impossible to make +the reference counting correct unless we save this bit of information +in skcd->val. So, add a new bit there to record whether the socket +has already taken the reference counts. This obviously relies on +kmalloc() to align cgroup pointers to at least 4 bytes, +ARCH_KMALLOC_MINALIGN is certainly larger than that. + +This bug seems to be introduced since the beginning, commit +d979a39d7242 ("cgroup: duplicate cgroup reference when cloning sockets") +tried to fix it but not compeletely. It seems not easy to trigger until +the recent commit 090e28b229af +("netprio_cgroup: Fix unlimited memory leak of v2 cgroups") was merged. + +Fixes: bd1060a1d671 ("sock, cgroup: add sock->sk_cgroup") +Reported-by: Cameron Berkenpas +Reported-by: Peter Geis +Reported-by: Lu Fengqi +Reported-by: Daniël Sonck +Reported-by: Zhang Qiang +Tested-by: Cameron Berkenpas +Tested-by: Peter Geis +Tested-by: Thomas Lamprecht +Cc: Daniel Borkmann +Cc: Zefan Li +Cc: Tejun Heo +Cc: Roman Gushchin +Signed-off-by: Cong Wang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/cgroup-defs.h | 6 ++++-- + include/linux/cgroup.h | 4 +++- + kernel/cgroup/cgroup.c | 31 +++++++++++++++++++------------ + net/core/sock.c | 2 +- + 4 files changed, 27 insertions(+), 16 deletions(-) + +--- a/include/linux/cgroup-defs.h ++++ b/include/linux/cgroup-defs.h +@@ -790,7 +790,8 @@ struct sock_cgroup_data { + union { + #ifdef __LITTLE_ENDIAN + struct { +- u8 is_data; ++ u8 is_data : 1; ++ u8 no_refcnt : 1; + u8 padding; + u16 prioidx; + u32 classid; +@@ -800,7 +801,8 @@ struct sock_cgroup_data { + u32 classid; + u16 prioidx; + u8 padding; +- u8 is_data; ++ u8 no_refcnt : 1; ++ u8 is_data : 1; + } __packed; + #endif + u64 val; +--- a/include/linux/cgroup.h ++++ b/include/linux/cgroup.h +@@ -822,6 +822,7 @@ extern spinlock_t cgroup_sk_update_lock; + + void cgroup_sk_alloc_disable(void); + void cgroup_sk_alloc(struct sock_cgroup_data *skcd); ++void cgroup_sk_clone(struct sock_cgroup_data *skcd); + void cgroup_sk_free(struct sock_cgroup_data *skcd); + + static inline struct cgroup *sock_cgroup_ptr(struct sock_cgroup_data *skcd) +@@ -835,7 +836,7 @@ static inline struct cgroup *sock_cgroup + */ + v = READ_ONCE(skcd->val); + +- if (v & 1) ++ if (v & 3) + return &cgrp_dfl_root.cgrp; + + return (struct cgroup *)(unsigned long)v ?: &cgrp_dfl_root.cgrp; +@@ -847,6 +848,7 @@ static inline struct cgroup *sock_cgroup + #else /* CONFIG_CGROUP_DATA */ + + static inline void cgroup_sk_alloc(struct sock_cgroup_data *skcd) {} ++static inline void cgroup_sk_clone(struct sock_cgroup_data *skcd) {} + static inline void cgroup_sk_free(struct sock_cgroup_data *skcd) {} + + #endif /* CONFIG_CGROUP_DATA */ +--- a/kernel/cgroup/cgroup.c ++++ b/kernel/cgroup/cgroup.c +@@ -6447,18 +6447,8 @@ void cgroup_sk_alloc_disable(void) + + void cgroup_sk_alloc(struct sock_cgroup_data *skcd) + { +- if (cgroup_sk_alloc_disabled) +- return; +- +- /* Socket clone path */ +- if (skcd->val) { +- /* +- * We might be cloning a socket which is left in an empty +- * cgroup and the cgroup might have already been rmdir'd. +- * Don't use cgroup_get_live(). +- */ +- cgroup_get(sock_cgroup_ptr(skcd)); +- cgroup_bpf_get(sock_cgroup_ptr(skcd)); ++ if (cgroup_sk_alloc_disabled) { ++ skcd->no_refcnt = 1; + return; + } + +@@ -6483,10 +6473,27 @@ void cgroup_sk_alloc(struct sock_cgroup_ + rcu_read_unlock(); + } + ++void cgroup_sk_clone(struct sock_cgroup_data *skcd) ++{ ++ if (skcd->val) { ++ if (skcd->no_refcnt) ++ return; ++ /* ++ * We might be cloning a socket which is left in an empty ++ * cgroup and the cgroup might have already been rmdir'd. ++ * Don't use cgroup_get_live(). ++ */ ++ cgroup_get(sock_cgroup_ptr(skcd)); ++ cgroup_bpf_get(sock_cgroup_ptr(skcd)); ++ } ++} ++ + void cgroup_sk_free(struct sock_cgroup_data *skcd) + { + struct cgroup *cgrp = sock_cgroup_ptr(skcd); + ++ if (skcd->no_refcnt) ++ return; + cgroup_bpf_put(cgrp); + cgroup_put(cgrp); + } +--- a/net/core/sock.c ++++ b/net/core/sock.c +@@ -1837,7 +1837,7 @@ struct sock *sk_clone_lock(const struct + /* sk->sk_memcg will be populated at accept() time */ + newsk->sk_memcg = NULL; + +- cgroup_sk_alloc(&newsk->sk_cgrp_data); ++ cgroup_sk_clone(&newsk->sk_cgrp_data); + + rcu_read_lock(); + filter = rcu_dereference(sk->sk_filter); diff --git a/queue-5.7/cgroup-fix-sock_cgroup_data-on-big-endian.patch b/queue-5.7/cgroup-fix-sock_cgroup_data-on-big-endian.patch new file mode 100644 index 00000000000..d2f9507200d --- /dev/null +++ b/queue-5.7/cgroup-fix-sock_cgroup_data-on-big-endian.patch @@ -0,0 +1,38 @@ +From foo@baz Fri 17 Jul 2020 08:39:20 AM CEST +From: Cong Wang +Date: Thu, 9 Jul 2020 16:28:44 -0700 +Subject: cgroup: Fix sock_cgroup_data on big-endian. + +From: Cong Wang + +[ Upstream commit 14b032b8f8fce03a546dcf365454bec8c4a58d7d ] + +In order for no_refcnt and is_data to be the lowest order two +bits in the 'val' we have to pad out the bitfield of the u8. + +Fixes: ad0f75e5f57c ("cgroup: fix cgroup_sk_alloc() for sk_clone_lock()") +Reported-by: Guenter Roeck +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/cgroup-defs.h | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/include/linux/cgroup-defs.h ++++ b/include/linux/cgroup-defs.h +@@ -792,6 +792,7 @@ struct sock_cgroup_data { + struct { + u8 is_data : 1; + u8 no_refcnt : 1; ++ u8 unused : 6; + u8 padding; + u16 prioidx; + u32 classid; +@@ -801,6 +802,7 @@ struct sock_cgroup_data { + u32 classid; + u16 prioidx; + u8 padding; ++ u8 unused : 6; + u8 no_refcnt : 1; + u8 is_data : 1; + } __packed; diff --git a/queue-5.7/ethtool-fix-genlmsg_put-failure-handling-in-ethnl_default_dumpit.patch b/queue-5.7/ethtool-fix-genlmsg_put-failure-handling-in-ethnl_default_dumpit.patch new file mode 100644 index 00000000000..3917e147742 --- /dev/null +++ b/queue-5.7/ethtool-fix-genlmsg_put-failure-handling-in-ethnl_default_dumpit.patch @@ -0,0 +1,100 @@ +From foo@baz Fri 17 Jul 2020 08:39:20 AM CEST +From: Michal Kubecek +Date: Thu, 9 Jul 2020 12:11:50 +0200 +Subject: ethtool: fix genlmsg_put() failure handling in ethnl_default_dumpit() + +From: Michal Kubecek + +[ Upstream commit 365f9ae4ee36037e2a9268fe7296065356840b4c ] + +If the genlmsg_put() call in ethnl_default_dumpit() fails, we bail out +without checking if we already have some messages in current skb like we do +with ethnl_default_dump_one() failure later. Therefore if existing messages +almost fill up the buffer so that there is not enough space even for +netlink and genetlink header, we lose all prepared messages and return and +error. + +Rather than duplicating the skb->len check, move the genlmsg_put(), +genlmsg_cancel() and genlmsg_end() calls into ethnl_default_dump_one(). +This is also more logical as all message composition will be in +ethnl_default_dump_one() and only iteration logic will be left in +ethnl_default_dumpit(). + +Fixes: 728480f12442 ("ethtool: default handlers for GET requests") +Reported-by: Jakub Kicinski +Signed-off-by: Michal Kubecek +Reviewed-by: Jakub Kicinski +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ethtool/netlink.c | 27 +++++++++++++-------------- + 1 file changed, 13 insertions(+), 14 deletions(-) + +--- a/net/ethtool/netlink.c ++++ b/net/ethtool/netlink.c +@@ -376,10 +376,17 @@ err_dev: + } + + static int ethnl_default_dump_one(struct sk_buff *skb, struct net_device *dev, +- const struct ethnl_dump_ctx *ctx) ++ const struct ethnl_dump_ctx *ctx, ++ struct netlink_callback *cb) + { ++ void *ehdr; + int ret; + ++ ehdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, ++ ðtool_genl_family, 0, ctx->ops->reply_cmd); ++ if (!ehdr) ++ return -EMSGSIZE; ++ + ethnl_init_reply_data(ctx->reply_data, ctx->ops, dev); + rtnl_lock(); + ret = ctx->ops->prepare_data(ctx->req_info, ctx->reply_data, NULL); +@@ -395,6 +402,10 @@ out: + if (ctx->ops->cleanup_data) + ctx->ops->cleanup_data(ctx->reply_data); + ctx->reply_data->dev = NULL; ++ if (ret < 0) ++ genlmsg_cancel(skb, ehdr); ++ else ++ genlmsg_end(skb, ehdr); + return ret; + } + +@@ -411,7 +422,6 @@ static int ethnl_default_dumpit(struct s + int s_idx = ctx->pos_idx; + int h, idx = 0; + int ret = 0; +- void *ehdr; + + rtnl_lock(); + for (h = ctx->pos_hash; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { +@@ -431,26 +441,15 @@ restart_chain: + dev_hold(dev); + rtnl_unlock(); + +- ehdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, +- cb->nlh->nlmsg_seq, +- ðtool_genl_family, 0, +- ctx->ops->reply_cmd); +- if (!ehdr) { +- dev_put(dev); +- ret = -EMSGSIZE; +- goto out; +- } +- ret = ethnl_default_dump_one(skb, dev, ctx); ++ ret = ethnl_default_dump_one(skb, dev, ctx, cb); + dev_put(dev); + if (ret < 0) { +- genlmsg_cancel(skb, ehdr); + if (ret == -EOPNOTSUPP) + goto lock_and_cont; + if (likely(skb->len)) + ret = skb->len; + goto out; + } +- genlmsg_end(skb, ehdr); + lock_and_cont: + rtnl_lock(); + if (net->dev_base_seq != seq) { diff --git a/queue-5.7/genetlink-get-rid-of-family-attrbuf.patch b/queue-5.7/genetlink-get-rid-of-family-attrbuf.patch new file mode 100644 index 00000000000..fdfe45afad0 --- /dev/null +++ b/queue-5.7/genetlink-get-rid-of-family-attrbuf.patch @@ -0,0 +1,202 @@ +From foo@baz Fri 17 Jul 2020 08:39:20 AM CEST +From: Cong Wang +Date: Sat, 27 Jun 2020 00:12:24 -0700 +Subject: genetlink: get rid of family->attrbuf + +From: Cong Wang + +[ Upstream commit bf64ff4c2aac65d680dc639a511c781cf6b6ec08 ] + +genl_family_rcv_msg_attrs_parse() reuses the global family->attrbuf +when family->parallel_ops is false. However, family->attrbuf is not +protected by any lock on the genl_family_rcv_msg_doit() code path. + +This leads to several different consequences, one of them is UAF, +like the following: + +genl_family_rcv_msg_doit(): genl_start(): + genl_family_rcv_msg_attrs_parse() + attrbuf = family->attrbuf + __nlmsg_parse(attrbuf); + genl_family_rcv_msg_attrs_parse() + attrbuf = family->attrbuf + __nlmsg_parse(attrbuf); + info->attrs = attrs; + cb->data = info; + +netlink_unicast_kernel(): + consume_skb() + genl_lock_dumpit(): + genl_dumpit_info(cb)->attrs + +Note family->attrbuf is an array of pointers to the skb data, once +the skb is freed, any dereference of family->attrbuf will be a UAF. + +Maybe we could serialize the family->attrbuf with genl_mutex too, but +that would make the locking more complicated. Instead, we can just get +rid of family->attrbuf and always allocate attrbuf from heap like the +family->parallel_ops==true code path. This may add some performance +overhead but comparing with taking the global genl_mutex, it still +looks better. + +Fixes: 75cdbdd08900 ("net: ieee802154: have genetlink code to parse the attrs during dumpit") +Fixes: 057af7071344 ("net: tipc: have genetlink code to parse the attrs during dumpit") +Reported-and-tested-by: syzbot+3039ddf6d7b13daf3787@syzkaller.appspotmail.com +Reported-and-tested-by: syzbot+80cad1e3cb4c41cde6ff@syzkaller.appspotmail.com +Reported-and-tested-by: syzbot+736bcbcb11b60d0c0792@syzkaller.appspotmail.com +Reported-and-tested-by: syzbot+520f8704db2b68091d44@syzkaller.appspotmail.com +Reported-and-tested-by: syzbot+c96e4dfb32f8987fdeed@syzkaller.appspotmail.com +Cc: Jiri Pirko +Signed-off-by: Cong Wang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/genetlink.h | 2 -- + net/netlink/genetlink.c | 48 +++++++++++++----------------------------------- + 2 files changed, 13 insertions(+), 37 deletions(-) + +--- a/include/net/genetlink.h ++++ b/include/net/genetlink.h +@@ -35,7 +35,6 @@ struct genl_info; + * do additional, common, filtering and return an error + * @post_doit: called after an operation's doit callback, it may + * undo operations done by pre_doit, for example release locks +- * @attrbuf: buffer to store parsed attributes (private) + * @mcgrps: multicast groups used by this family + * @n_mcgrps: number of multicast groups + * @mcgrp_offset: starting number of multicast group IDs in this family +@@ -58,7 +57,6 @@ struct genl_family { + void (*post_doit)(const struct genl_ops *ops, + struct sk_buff *skb, + struct genl_info *info); +- struct nlattr ** attrbuf; /* private */ + const struct genl_ops * ops; + const struct genl_multicast_group *mcgrps; + unsigned int n_ops; +--- a/net/netlink/genetlink.c ++++ b/net/netlink/genetlink.c +@@ -351,22 +351,11 @@ int genl_register_family(struct genl_fam + start = end = GENL_ID_VFS_DQUOT; + } + +- if (family->maxattr && !family->parallel_ops) { +- family->attrbuf = kmalloc_array(family->maxattr + 1, +- sizeof(struct nlattr *), +- GFP_KERNEL); +- if (family->attrbuf == NULL) { +- err = -ENOMEM; +- goto errout_locked; +- } +- } else +- family->attrbuf = NULL; +- + family->id = idr_alloc_cyclic(&genl_fam_idr, family, + start, end + 1, GFP_KERNEL); + if (family->id < 0) { + err = family->id; +- goto errout_free; ++ goto errout_locked; + } + + err = genl_validate_assign_mc_groups(family); +@@ -385,8 +374,6 @@ int genl_register_family(struct genl_fam + + errout_remove: + idr_remove(&genl_fam_idr, family->id); +-errout_free: +- kfree(family->attrbuf); + errout_locked: + genl_unlock_all(); + return err; +@@ -419,8 +406,6 @@ int genl_unregister_family(const struct + atomic_read(&genl_sk_destructing_cnt) == 0); + genl_unlock(); + +- kfree(family->attrbuf); +- + genl_ctrl_event(CTRL_CMD_DELFAMILY, family, NULL, 0); + + return 0; +@@ -485,30 +470,23 @@ genl_family_rcv_msg_attrs_parse(const st + if (!family->maxattr) + return NULL; + +- if (family->parallel_ops) { +- attrbuf = kmalloc_array(family->maxattr + 1, +- sizeof(struct nlattr *), GFP_KERNEL); +- if (!attrbuf) +- return ERR_PTR(-ENOMEM); +- } else { +- attrbuf = family->attrbuf; +- } ++ attrbuf = kmalloc_array(family->maxattr + 1, ++ sizeof(struct nlattr *), GFP_KERNEL); ++ if (!attrbuf) ++ return ERR_PTR(-ENOMEM); + + err = __nlmsg_parse(nlh, hdrlen, attrbuf, family->maxattr, + family->policy, validate, extack); + if (err) { +- if (family->parallel_ops) +- kfree(attrbuf); ++ kfree(attrbuf); + return ERR_PTR(err); + } + return attrbuf; + } + +-static void genl_family_rcv_msg_attrs_free(const struct genl_family *family, +- struct nlattr **attrbuf) ++static void genl_family_rcv_msg_attrs_free(struct nlattr **attrbuf) + { +- if (family->parallel_ops) +- kfree(attrbuf); ++ kfree(attrbuf); + } + + struct genl_start_context { +@@ -542,7 +520,7 @@ static int genl_start(struct netlink_cal + no_attrs: + info = genl_dumpit_info_alloc(); + if (!info) { +- genl_family_rcv_msg_attrs_free(ctx->family, attrs); ++ genl_family_rcv_msg_attrs_free(attrs); + return -ENOMEM; + } + info->family = ctx->family; +@@ -559,7 +537,7 @@ no_attrs: + } + + if (rc) { +- genl_family_rcv_msg_attrs_free(info->family, info->attrs); ++ genl_family_rcv_msg_attrs_free(info->attrs); + genl_dumpit_info_free(info); + cb->data = NULL; + } +@@ -588,7 +566,7 @@ static int genl_lock_done(struct netlink + rc = ops->done(cb); + genl_unlock(); + } +- genl_family_rcv_msg_attrs_free(info->family, info->attrs); ++ genl_family_rcv_msg_attrs_free(info->attrs); + genl_dumpit_info_free(info); + return rc; + } +@@ -601,7 +579,7 @@ static int genl_parallel_done(struct net + + if (ops->done) + rc = ops->done(cb); +- genl_family_rcv_msg_attrs_free(info->family, info->attrs); ++ genl_family_rcv_msg_attrs_free(info->attrs); + genl_dumpit_info_free(info); + return rc; + } +@@ -694,7 +672,7 @@ static int genl_family_rcv_msg_doit(cons + family->post_doit(ops, skb, &info); + + out: +- genl_family_rcv_msg_attrs_free(family, attrbuf); ++ genl_family_rcv_msg_attrs_free(attrbuf); + + return err; + } diff --git a/queue-5.7/genetlink-remove-genl_bind.patch b/queue-5.7/genetlink-remove-genl_bind.patch new file mode 100644 index 00000000000..82690902949 --- /dev/null +++ b/queue-5.7/genetlink-remove-genl_bind.patch @@ -0,0 +1,134 @@ +From foo@baz Fri 17 Jul 2020 08:39:20 AM CEST +From: Sean Tranchetti +Date: Tue, 30 Jun 2020 11:50:17 -0600 +Subject: genetlink: remove genl_bind + +From: Sean Tranchetti + +[ Upstream commit 1e82a62fec613844da9e558f3493540a5b7a7b67 ] + +A potential deadlock can occur during registering or unregistering a +new generic netlink family between the main nl_table_lock and the +cb_lock where each thread wants the lock held by the other, as +demonstrated below. + +1) Thread 1 is performing a netlink_bind() operation on a socket. As part + of this call, it will call netlink_lock_table(), incrementing the + nl_table_users count to 1. +2) Thread 2 is registering (or unregistering) a genl_family via the + genl_(un)register_family() API. The cb_lock semaphore will be taken for + writing. +3) Thread 1 will call genl_bind() as part of the bind operation to handle + subscribing to GENL multicast groups at the request of the user. It will + attempt to take the cb_lock semaphore for reading, but it will fail and + be scheduled away, waiting for Thread 2 to finish the write. +4) Thread 2 will call netlink_table_grab() during the (un)registration + call. However, as Thread 1 has incremented nl_table_users, it will not + be able to proceed, and both threads will be stuck waiting for the + other. + +genl_bind() is a noop, unless a genl_family implements the mcast_bind() +function to handle setting up family-specific multicast operations. Since +no one in-tree uses this functionality as Cong pointed out, simply removing +the genl_bind() function will remove the possibility for deadlock, as there +is no attempt by Thread 1 above to take the cb_lock semaphore. + +Fixes: c380d9a7afff ("genetlink: pass multicast bind/unbind to families") +Suggested-by: Cong Wang +Acked-by: Johannes Berg +Reported-by: kernel test robot +Signed-off-by: Sean Tranchetti +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/genetlink.h | 8 ------- + net/netlink/genetlink.c | 49 ------------------------------------------------ + 2 files changed, 57 deletions(-) + +--- a/include/net/genetlink.h ++++ b/include/net/genetlink.h +@@ -35,12 +35,6 @@ struct genl_info; + * do additional, common, filtering and return an error + * @post_doit: called after an operation's doit callback, it may + * undo operations done by pre_doit, for example release locks +- * @mcast_bind: a socket bound to the given multicast group (which +- * is given as the offset into the groups array) +- * @mcast_unbind: a socket was unbound from the given multicast group. +- * Note that unbind() will not be called symmetrically if the +- * generic netlink family is removed while there are still open +- * sockets. + * @attrbuf: buffer to store parsed attributes (private) + * @mcgrps: multicast groups used by this family + * @n_mcgrps: number of multicast groups +@@ -64,8 +58,6 @@ struct genl_family { + void (*post_doit)(const struct genl_ops *ops, + struct sk_buff *skb, + struct genl_info *info); +- int (*mcast_bind)(struct net *net, int group); +- void (*mcast_unbind)(struct net *net, int group); + struct nlattr ** attrbuf; /* private */ + const struct genl_ops * ops; + const struct genl_multicast_group *mcgrps; +--- a/net/netlink/genetlink.c ++++ b/net/netlink/genetlink.c +@@ -1088,60 +1088,11 @@ static struct genl_family genl_ctrl __ro + .netnsok = true, + }; + +-static int genl_bind(struct net *net, int group) +-{ +- struct genl_family *f; +- int err = -ENOENT; +- unsigned int id; +- +- down_read(&cb_lock); +- +- idr_for_each_entry(&genl_fam_idr, f, id) { +- if (group >= f->mcgrp_offset && +- group < f->mcgrp_offset + f->n_mcgrps) { +- int fam_grp = group - f->mcgrp_offset; +- +- if (!f->netnsok && net != &init_net) +- err = -ENOENT; +- else if (f->mcast_bind) +- err = f->mcast_bind(net, fam_grp); +- else +- err = 0; +- break; +- } +- } +- up_read(&cb_lock); +- +- return err; +-} +- +-static void genl_unbind(struct net *net, int group) +-{ +- struct genl_family *f; +- unsigned int id; +- +- down_read(&cb_lock); +- +- idr_for_each_entry(&genl_fam_idr, f, id) { +- if (group >= f->mcgrp_offset && +- group < f->mcgrp_offset + f->n_mcgrps) { +- int fam_grp = group - f->mcgrp_offset; +- +- if (f->mcast_unbind) +- f->mcast_unbind(net, fam_grp); +- break; +- } +- } +- up_read(&cb_lock); +-} +- + static int __net_init genl_pernet_init(struct net *net) + { + struct netlink_kernel_cfg cfg = { + .input = genl_rcv, + .flags = NL_CFG_F_NONROOT_RECV, +- .bind = genl_bind, +- .unbind = genl_unbind, + }; + + /* we'll bump the group number right afterwards */ diff --git a/queue-5.7/hsr-fix-interface-leak-in-error-path-of-hsr_dev_finalize.patch b/queue-5.7/hsr-fix-interface-leak-in-error-path-of-hsr_dev_finalize.patch new file mode 100644 index 00000000000..710de7b2f84 --- /dev/null +++ b/queue-5.7/hsr-fix-interface-leak-in-error-path-of-hsr_dev_finalize.patch @@ -0,0 +1,108 @@ +From foo@baz Fri 17 Jul 2020 08:39:20 AM CEST +From: Taehee Yoo +Date: Thu, 2 Jul 2020 17:06:19 +0000 +Subject: hsr: fix interface leak in error path of hsr_dev_finalize() + +From: Taehee Yoo + +[ Upstream commit ccfc9df1352be5b2f391091e18c4b2395d30ce78 ] + +To release hsr(upper) interface, it should release +its own lower interfaces first. +Then, hsr(upper) interface can be released safely. +In the current code of error path of hsr_dev_finalize(), it releases hsr +interface before releasing a lower interface. +So, a warning occurs, which warns about the leak of lower interfaces. +In order to fix this problem, changing the ordering of the error path of +hsr_dev_finalize() is needed. + +Test commands: + ip link add dummy0 type dummy + ip link add dummy1 type dummy + ip link add dummy2 type dummy + ip link add hsr0 type hsr slave1 dummy0 slave2 dummy1 + ip link add hsr1 type hsr slave1 dummy2 slave2 dummy0 + +Splat looks like: +[ 214.923127][ C2] WARNING: CPU: 2 PID: 1093 at net/core/dev.c:8992 rollback_registered_many+0x986/0xcf0 +[ 214.923129][ C2] Modules linked in: hsr dummy openvswitch nsh nf_conncount nf_nat nf_conntrack nf_defrag_ipx +[ 214.923154][ C2] CPU: 2 PID: 1093 Comm: ip Not tainted 5.8.0-rc2+ #623 +[ 214.923156][ C2] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 +[ 214.923157][ C2] RIP: 0010:rollback_registered_many+0x986/0xcf0 +[ 214.923160][ C2] Code: 41 8b 4e cc 45 31 c0 31 d2 4c 89 ee 48 89 df e8 e0 47 ff ff 85 c0 0f 84 cd fc ff ff 5 +[ 214.923162][ C2] RSP: 0018:ffff8880c5156f28 EFLAGS: 00010287 +[ 214.923165][ C2] RAX: ffff8880d1dad458 RBX: ffff8880bd1b9000 RCX: ffffffffb929d243 +[ 214.923167][ C2] RDX: 1ffffffff77e63f0 RSI: 0000000000000008 RDI: ffffffffbbf31f80 +[ 214.923168][ C2] RBP: dffffc0000000000 R08: fffffbfff77e63f1 R09: fffffbfff77e63f1 +[ 214.923170][ C2] R10: ffffffffbbf31f87 R11: 0000000000000001 R12: ffff8880c51570a0 +[ 214.923172][ C2] R13: ffff8880bd1b90b8 R14: ffff8880c5157048 R15: ffff8880d1dacc40 +[ 214.923174][ C2] FS: 00007fdd257a20c0(0000) GS:ffff8880da200000(0000) knlGS:0000000000000000 +[ 214.923175][ C2] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +[ 214.923177][ C2] CR2: 00007ffd78beb038 CR3: 00000000be544005 CR4: 00000000000606e0 +[ 214.923179][ C2] Call Trace: +[ 214.923180][ C2] ? netif_set_real_num_tx_queues+0x780/0x780 +[ 214.923182][ C2] ? dev_validate_mtu+0x140/0x140 +[ 214.923183][ C2] ? synchronize_rcu.part.79+0x85/0xd0 +[ 214.923185][ C2] ? synchronize_rcu_expedited+0xbb0/0xbb0 +[ 214.923187][ C2] rollback_registered+0xc8/0x170 +[ 214.923188][ C2] ? rollback_registered_many+0xcf0/0xcf0 +[ 214.923190][ C2] unregister_netdevice_queue+0x18b/0x240 +[ 214.923191][ C2] hsr_dev_finalize+0x56e/0x6e0 [hsr] +[ 214.923192][ C2] hsr_newlink+0x36b/0x450 [hsr] +[ 214.923194][ C2] ? hsr_dellink+0x70/0x70 [hsr] +[ 214.923195][ C2] ? rtnl_create_link+0x2e4/0xb00 +[ 214.923197][ C2] ? __netlink_ns_capable+0xc3/0xf0 +[ 214.923198][ C2] __rtnl_newlink+0xbdb/0x1270 +[ ... ] + +Fixes: e0a4b99773d3 ("hsr: use upper/lower device infrastructure") +Reported-by: syzbot+7f1c020f68dab95aab59@syzkaller.appspotmail.com +Signed-off-by: Taehee Yoo +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/hsr/hsr_device.c | 11 +++++++---- + 1 file changed, 7 insertions(+), 4 deletions(-) + +--- a/net/hsr/hsr_device.c ++++ b/net/hsr/hsr_device.c +@@ -417,6 +417,7 @@ int hsr_dev_finalize(struct net_device * + unsigned char multicast_spec, u8 protocol_version, + struct netlink_ext_ack *extack) + { ++ bool unregister = false; + struct hsr_priv *hsr; + int res; + +@@ -468,25 +469,27 @@ int hsr_dev_finalize(struct net_device * + if (res) + goto err_unregister; + ++ unregister = true; ++ + res = hsr_add_port(hsr, slave[0], HSR_PT_SLAVE_A, extack); + if (res) +- goto err_add_slaves; ++ goto err_unregister; + + res = hsr_add_port(hsr, slave[1], HSR_PT_SLAVE_B, extack); + if (res) +- goto err_add_slaves; ++ goto err_unregister; + + hsr_debugfs_init(hsr, hsr_dev); + mod_timer(&hsr->prune_timer, jiffies + msecs_to_jiffies(PRUNE_PERIOD)); + + return 0; + +-err_add_slaves: +- unregister_netdevice(hsr_dev); + err_unregister: + hsr_del_ports(hsr); + err_add_master: + hsr_del_self_node(hsr); + ++ if (unregister) ++ unregister_netdevice(hsr_dev); + return res; + } diff --git a/queue-5.7/ip-fix-so_mark-in-rst-ack-and-icmp-packets.patch b/queue-5.7/ip-fix-so_mark-in-rst-ack-and-icmp-packets.patch new file mode 100644 index 00000000000..f0a7031dca8 --- /dev/null +++ b/queue-5.7/ip-fix-so_mark-in-rst-ack-and-icmp-packets.patch @@ -0,0 +1,114 @@ +From foo@baz Fri 17 Jul 2020 08:39:20 AM CEST +From: Willem de Bruijn +Date: Wed, 1 Jul 2020 16:00:06 -0400 +Subject: ip: Fix SO_MARK in RST, ACK and ICMP packets + +From: Willem de Bruijn + +[ Upstream commit 0da7536fb47f51df89ccfcb1fa09f249d9accec5 ] + +When no full socket is available, skbs are sent over a per-netns +control socket. Its sk_mark is temporarily adjusted to match that +of the real (request or timewait) socket or to reflect an incoming +skb, so that the outgoing skb inherits this in __ip_make_skb. + +Introduction of the socket cookie mark field broke this. Now the +skb is set through the cookie and cork: + + # init sockc.mark from sk_mark or cmsg +ip_append_data + ip_setup_cork # convert sockc.mark to cork mark +ip_push_pending_frames + ip_finish_skb + __ip_make_skb # set skb->mark to cork mark + +But I missed these special control sockets. Update all callers of +__ip(6)_make_skb that were originally missed. + +For IPv6, the same two icmp(v6) paths are affected. The third +case is not, as commit 92e55f412cff ("tcp: don't annotate +mark on control socket from tcp_v6_send_response()") replaced +the ctl_sk->sk_mark with passing the mark field directly as a +function argument. That commit predates the commit that +introduced the bug. + +Fixes: c6af0c227a22 ("ip: support SO_MARK cmsg") +Signed-off-by: Willem de Bruijn +Reported-by: Martin KaFai Lau +Reviewed-by: Martin KaFai Lau +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/icmp.c | 4 ++-- + net/ipv4/ip_output.c | 2 +- + net/ipv6/icmp.c | 4 ++-- + 3 files changed, 5 insertions(+), 5 deletions(-) + +--- a/net/ipv4/icmp.c ++++ b/net/ipv4/icmp.c +@@ -427,7 +427,7 @@ static void icmp_reply(struct icmp_bxm * + + ipcm_init(&ipc); + inet->tos = ip_hdr(skb)->tos; +- sk->sk_mark = mark; ++ ipc.sockc.mark = mark; + daddr = ipc.addr = ip_hdr(skb)->saddr; + saddr = fib_compute_spec_dst(skb); + +@@ -710,10 +710,10 @@ void __icmp_send(struct sk_buff *skb_in, + icmp_param.skb = skb_in; + icmp_param.offset = skb_network_offset(skb_in); + inet_sk(sk)->tos = tos; +- sk->sk_mark = mark; + ipcm_init(&ipc); + ipc.addr = iph->saddr; + ipc.opt = &icmp_param.replyopts.opt; ++ ipc.sockc.mark = mark; + + rt = icmp_route_lookup(net, &fl4, skb_in, iph, saddr, tos, mark, + type, code, &icmp_param); +--- a/net/ipv4/ip_output.c ++++ b/net/ipv4/ip_output.c +@@ -1702,7 +1702,7 @@ void ip_send_unicast_reply(struct sock * + sk->sk_protocol = ip_hdr(skb)->protocol; + sk->sk_bound_dev_if = arg->bound_dev_if; + sk->sk_sndbuf = sysctl_wmem_default; +- sk->sk_mark = fl4.flowi4_mark; ++ ipc.sockc.mark = fl4.flowi4_mark; + err = ip_append_data(sk, &fl4, ip_reply_glue_bits, arg->iov->iov_base, + len, 0, &ipc, &rt, MSG_DONTWAIT); + if (unlikely(err)) { +--- a/net/ipv6/icmp.c ++++ b/net/ipv6/icmp.c +@@ -566,7 +566,6 @@ static void icmp6_send(struct sk_buff *s + fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, NULL); + security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); + +- sk->sk_mark = mark; + np = inet6_sk(sk); + + if (!icmpv6_xrlim_allow(sk, type, &fl6)) +@@ -583,6 +582,7 @@ static void icmp6_send(struct sk_buff *s + fl6.flowi6_oif = np->ucast_oif; + + ipcm6_init_sk(&ipc6, np); ++ ipc6.sockc.mark = mark; + fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel); + + dst = icmpv6_route_lookup(net, skb, sk, &fl6); +@@ -751,7 +751,6 @@ static void icmpv6_echo_reply(struct sk_ + sk = icmpv6_xmit_lock(net); + if (!sk) + goto out_bh_enable; +- sk->sk_mark = mark; + np = inet6_sk(sk); + + if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr)) +@@ -779,6 +778,7 @@ static void icmpv6_echo_reply(struct sk_ + ipcm6_init_sk(&ipc6, np); + ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst); + ipc6.tclass = ipv6_get_dsfield(ipv6_hdr(skb)); ++ ipc6.sockc.mark = mark; + + if (ip6_append_data(sk, icmpv6_getfrag, &msg, + skb->len + sizeof(struct icmp6hdr), diff --git a/queue-5.7/ipv4-fill-fl4_icmp_-type-code-in-ping_v4_sendmsg.patch b/queue-5.7/ipv4-fill-fl4_icmp_-type-code-in-ping_v4_sendmsg.patch new file mode 100644 index 00000000000..2b4223fcf17 --- /dev/null +++ b/queue-5.7/ipv4-fill-fl4_icmp_-type-code-in-ping_v4_sendmsg.patch @@ -0,0 +1,50 @@ +From foo@baz Fri 17 Jul 2020 08:39:20 AM CEST +From: Sabrina Dubroca +Date: Fri, 3 Jul 2020 17:00:32 +0200 +Subject: ipv4: fill fl4_icmp_{type,code} in ping_v4_sendmsg + +From: Sabrina Dubroca + +[ Upstream commit 5eff06902394425c722f0a44d9545909a8800f79 ] + +IPv4 ping sockets don't set fl4.fl4_icmp_{type,code}, which leads to +incomplete IPsec ACQUIRE messages being sent to userspace. Currently, +both raw sockets and IPv6 ping sockets set those fields. + +Expected output of "ip xfrm monitor": + acquire proto esp + sel src 10.0.2.15/32 dst 8.8.8.8/32 proto icmp type 8 code 0 dev ens4 + policy src 10.0.2.15/32 dst 8.8.8.8/32 + + +Currently with ping sockets: + acquire proto esp + sel src 10.0.2.15/32 dst 8.8.8.8/32 proto icmp type 0 code 0 dev ens4 + policy src 10.0.2.15/32 dst 8.8.8.8/32 + + +The Libreswan test suite found this problem after Fedora changed the +value for the sysctl net.ipv4.ping_group_range. + +Fixes: c319b4d76b9e ("net: ipv4: add IPPROTO_ICMP socket kind") +Reported-by: Paul Wouters +Tested-by: Paul Wouters +Signed-off-by: Sabrina Dubroca +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/ping.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/net/ipv4/ping.c ++++ b/net/ipv4/ping.c +@@ -786,6 +786,9 @@ static int ping_v4_sendmsg(struct sock * + inet_sk_flowi_flags(sk), faddr, saddr, 0, 0, + sk->sk_uid); + ++ fl4.fl4_icmp_type = user_icmph.type; ++ fl4.fl4_icmp_code = user_icmph.code; ++ + security_sk_classify_flow(sk, flowi4_to_flowi(&fl4)); + rt = ip_route_output_flow(net, &fl4, sk); + if (IS_ERR(rt)) { diff --git a/queue-5.7/ipv6-fib6_select_path-can-not-use-out-path-for-nexthop-objects.patch b/queue-5.7/ipv6-fib6_select_path-can-not-use-out-path-for-nexthop-objects.patch new file mode 100644 index 00000000000..d705ed28990 --- /dev/null +++ b/queue-5.7/ipv6-fib6_select_path-can-not-use-out-path-for-nexthop-objects.patch @@ -0,0 +1,73 @@ +From foo@baz Fri 17 Jul 2020 08:39:20 AM CEST +From: David Ahern +Date: Mon, 6 Jul 2020 11:45:07 -0600 +Subject: ipv6: fib6_select_path can not use out path for nexthop objects + +From: David Ahern + +[ Upstream commit 34fe5a1cf95c3f114068fc16d919c9cf4b00e428 ] + +Brian reported a crash in IPv6 code when using rpfilter with a setup +running FRR and external nexthop objects. The root cause of the crash +is fib6_select_path setting fib6_nh in the result to NULL because of +an improper check for nexthop objects. + +More specifically, rpfilter invokes ip6_route_lookup with flowi6_oif +set causing fib6_select_path to be called with have_oif_match set. +fib6_select_path has early check on have_oif_match and jumps to the +out label which presumes a builtin fib6_nh. This path is invalid for +nexthop objects; for external nexthops fib6_select_path needs to just +return if the fib6_nh has already been set in the result otherwise it +returns after the call to nexthop_path_fib6_result. Update the check +on have_oif_match to not bail on external nexthops. + +Update selftests for this problem. + +Fixes: f88d8ea67fbd ("ipv6: Plumb support for nexthop object in a fib6_info") +Reported-by: Brian Rak +Signed-off-by: David Ahern +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/route.c | 5 ++++- + tools/testing/selftests/net/fib_nexthops.sh | 13 +++++++++++++ + 2 files changed, 17 insertions(+), 1 deletion(-) + +--- a/net/ipv6/route.c ++++ b/net/ipv6/route.c +@@ -431,9 +431,12 @@ void fib6_select_path(const struct net * + struct fib6_info *sibling, *next_sibling; + struct fib6_info *match = res->f6i; + +- if ((!match->fib6_nsiblings && !match->nh) || have_oif_match) ++ if (!match->nh && (!match->fib6_nsiblings || have_oif_match)) + goto out; + ++ if (match->nh && have_oif_match && res->nh) ++ return; ++ + /* We might have already computed the hash for ICMPv6 errors. In such + * case it will always be non-zero. Otherwise now is the time to do it. + */ +--- a/tools/testing/selftests/net/fib_nexthops.sh ++++ b/tools/testing/selftests/net/fib_nexthops.sh +@@ -512,6 +512,19 @@ ipv6_fcnal_runtime() + run_cmd "$IP nexthop add id 86 via 2001:db8:91::2 dev veth1" + run_cmd "$IP ro add 2001:db8:101::1/128 nhid 81" + ++ # rpfilter and default route ++ $IP nexthop flush >/dev/null 2>&1 ++ run_cmd "ip netns exec me ip6tables -t mangle -I PREROUTING 1 -m rpfilter --invert -j DROP" ++ run_cmd "$IP nexthop add id 91 via 2001:db8:91::2 dev veth1" ++ run_cmd "$IP nexthop add id 92 via 2001:db8:92::2 dev veth3" ++ run_cmd "$IP nexthop add id 93 group 91/92" ++ run_cmd "$IP -6 ro add default nhid 91" ++ run_cmd "ip netns exec me ping -c1 -w1 2001:db8:101::1" ++ log_test $? 0 "Nexthop with default route and rpfilter" ++ run_cmd "$IP -6 ro replace default nhid 93" ++ run_cmd "ip netns exec me ping -c1 -w1 2001:db8:101::1" ++ log_test $? 0 "Nexthop with multipath default route and rpfilter" ++ + # TO-DO: + # existing route with old nexthop; append route with new nexthop + # existing route with old nexthop; replace route with new diff --git a/queue-5.7/ipv6-fix-use-of-anycast-address-with-loopback.patch b/queue-5.7/ipv6-fix-use-of-anycast-address-with-loopback.patch new file mode 100644 index 00000000000..21407ff5d7a --- /dev/null +++ b/queue-5.7/ipv6-fix-use-of-anycast-address-with-loopback.patch @@ -0,0 +1,43 @@ +From foo@baz Fri 17 Jul 2020 08:39:20 AM CEST +From: David Ahern +Date: Tue, 7 Jul 2020 07:39:24 -0600 +Subject: ipv6: Fix use of anycast address with loopback + +From: David Ahern + +[ Upstream commit aea23c323d89836bcdcee67e49def997ffca043b ] + +Thomas reported a regression with IPv6 and anycast using the following +reproducer: + + echo 1 > /proc/sys/net/ipv6/conf/all/forwarding + ip -6 a add fc12::1/16 dev lo + sleep 2 + echo "pinging lo" + ping6 -c 2 fc12:: + +The conversion of addrconf_f6i_alloc to use ip6_route_info_create missed +the use of fib6_is_reject which checks addresses added to the loopback +interface and sets the REJECT flag as needed. Update fib6_is_reject for +loopback checks to handle RTF_ANYCAST addresses. + +Fixes: c7a1ce397ada ("ipv6: Change addrconf_f6i_alloc to use ip6_route_info_create") +Reported-by: thomas.gambier@nexedi.com +Signed-off-by: David Ahern +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/route.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/ipv6/route.c ++++ b/net/ipv6/route.c +@@ -3402,7 +3402,7 @@ static bool fib6_is_reject(u32 flags, st + if ((flags & RTF_REJECT) || + (dev && (dev->flags & IFF_LOOPBACK) && + !(addr_type & IPV6_ADDR_LOOPBACK) && +- !(flags & RTF_LOCAL))) ++ !(flags & (RTF_ANYCAST | RTF_LOCAL)))) + return true; + + return false; diff --git a/queue-5.7/l2tp-remove-skb_dst_set-from-l2tp_xmit_skb.patch b/queue-5.7/l2tp-remove-skb_dst_set-from-l2tp_xmit_skb.patch new file mode 100644 index 00000000000..c505c1d1ba8 --- /dev/null +++ b/queue-5.7/l2tp-remove-skb_dst_set-from-l2tp_xmit_skb.patch @@ -0,0 +1,60 @@ +From foo@baz Fri 17 Jul 2020 08:39:20 AM CEST +From: Xin Long +Date: Tue, 7 Jul 2020 02:02:32 +0800 +Subject: l2tp: remove skb_dst_set() from l2tp_xmit_skb() + +From: Xin Long + +[ Upstream commit 27d53323664c549b5bb2dfaaf6f7ad6e0376a64e ] + +In the tx path of l2tp, l2tp_xmit_skb() calls skb_dst_set() to set +skb's dst. However, it will eventually call inet6_csk_xmit() or +ip_queue_xmit() where skb's dst will be overwritten by: + + skb_dst_set_noref(skb, dst); + +without releasing the old dst in skb. Then it causes dst/dev refcnt leak: + + unregister_netdevice: waiting for eth0 to become free. Usage count = 1 + +This can be reproduced by simply running: + + # modprobe l2tp_eth && modprobe l2tp_ip + # sh ./tools/testing/selftests/net/l2tp.sh + +So before going to inet6_csk_xmit() or ip_queue_xmit(), skb's dst +should be dropped. This patch is to fix it by removing skb_dst_set() +from l2tp_xmit_skb() and moving skb_dst_drop() into l2tp_xmit_core(). + +Fixes: 3557baabf280 ("[L2TP]: PPP over L2TP driver core") +Reported-by: Hangbin Liu +Signed-off-by: Xin Long +Acked-by: James Chapman +Tested-by: James Chapman +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/l2tp/l2tp_core.c | 5 +---- + 1 file changed, 1 insertion(+), 4 deletions(-) + +--- a/net/l2tp/l2tp_core.c ++++ b/net/l2tp/l2tp_core.c +@@ -1028,6 +1028,7 @@ static void l2tp_xmit_core(struct l2tp_s + + /* Queue the packet to IP for output */ + skb->ignore_df = 1; ++ skb_dst_drop(skb); + #if IS_ENABLED(CONFIG_IPV6) + if (l2tp_sk_is_v6(tunnel->sock)) + error = inet6_csk_xmit(tunnel->sock, skb, NULL); +@@ -1099,10 +1100,6 @@ int l2tp_xmit_skb(struct l2tp_session *s + goto out_unlock; + } + +- /* Get routing info from the tunnel socket */ +- skb_dst_drop(skb); +- skb_dst_set(skb, sk_dst_check(sk, 0)); +- + inet = inet_sk(sk); + fl = &inet->cork.fl; + switch (tunnel->encap) { diff --git a/queue-5.7/llc-make-sure-applications-use-arphrd_ether.patch b/queue-5.7/llc-make-sure-applications-use-arphrd_ether.patch new file mode 100644 index 00000000000..cfdde567b92 --- /dev/null +++ b/queue-5.7/llc-make-sure-applications-use-arphrd_ether.patch @@ -0,0 +1,160 @@ +From foo@baz Fri 17 Jul 2020 08:39:20 AM CEST +From: Eric Dumazet +Date: Sat, 27 Jun 2020 13:31:50 -0700 +Subject: llc: make sure applications use ARPHRD_ETHER + +From: Eric Dumazet + +[ Upstream commit a9b1110162357689a34992d5c925852948e5b9fd ] + +syzbot was to trigger a bug by tricking AF_LLC with +non sensible addr->sllc_arphrd + +It seems clear LLC requires an Ethernet device. + +Back in commit abf9d537fea2 ("llc: add support for SO_BINDTODEVICE") +Octavian Purdila added possibility for application to use a zero +value for sllc_arphrd, convert it to ARPHRD_ETHER to not cause +regressions on existing applications. + +BUG: KASAN: use-after-free in __read_once_size include/linux/compiler.h:199 [inline] +BUG: KASAN: use-after-free in list_empty include/linux/list.h:268 [inline] +BUG: KASAN: use-after-free in waitqueue_active include/linux/wait.h:126 [inline] +BUG: KASAN: use-after-free in wq_has_sleeper include/linux/wait.h:160 [inline] +BUG: KASAN: use-after-free in skwq_has_sleeper include/net/sock.h:2092 [inline] +BUG: KASAN: use-after-free in sock_def_write_space+0x642/0x670 net/core/sock.c:2813 +Read of size 8 at addr ffff88801e0b4078 by task ksoftirqd/3/27 + +CPU: 3 PID: 27 Comm: ksoftirqd/3 Not tainted 5.5.0-rc1-syzkaller #0 +Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.12.0-59-gc9ba5276e321-prebuilt.qemu.org 04/01/2014 +Call Trace: + __dump_stack lib/dump_stack.c:77 [inline] + dump_stack+0x197/0x210 lib/dump_stack.c:118 + print_address_description.constprop.0.cold+0xd4/0x30b mm/kasan/report.c:374 + __kasan_report.cold+0x1b/0x41 mm/kasan/report.c:506 + kasan_report+0x12/0x20 mm/kasan/common.c:639 + __asan_report_load8_noabort+0x14/0x20 mm/kasan/generic_report.c:135 + __read_once_size include/linux/compiler.h:199 [inline] + list_empty include/linux/list.h:268 [inline] + waitqueue_active include/linux/wait.h:126 [inline] + wq_has_sleeper include/linux/wait.h:160 [inline] + skwq_has_sleeper include/net/sock.h:2092 [inline] + sock_def_write_space+0x642/0x670 net/core/sock.c:2813 + sock_wfree+0x1e1/0x260 net/core/sock.c:1958 + skb_release_head_state+0xeb/0x260 net/core/skbuff.c:652 + skb_release_all+0x16/0x60 net/core/skbuff.c:663 + __kfree_skb net/core/skbuff.c:679 [inline] + consume_skb net/core/skbuff.c:838 [inline] + consume_skb+0xfb/0x410 net/core/skbuff.c:832 + __dev_kfree_skb_any+0xa4/0xd0 net/core/dev.c:2967 + dev_kfree_skb_any include/linux/netdevice.h:3650 [inline] + e1000_unmap_and_free_tx_resource.isra.0+0x21b/0x3a0 drivers/net/ethernet/intel/e1000/e1000_main.c:1963 + e1000_clean_tx_irq drivers/net/ethernet/intel/e1000/e1000_main.c:3854 [inline] + e1000_clean+0x4cc/0x1d10 drivers/net/ethernet/intel/e1000/e1000_main.c:3796 + napi_poll net/core/dev.c:6532 [inline] + net_rx_action+0x508/0x1120 net/core/dev.c:6600 + __do_softirq+0x262/0x98c kernel/softirq.c:292 + run_ksoftirqd kernel/softirq.c:603 [inline] + run_ksoftirqd+0x8e/0x110 kernel/softirq.c:595 + smpboot_thread_fn+0x6a3/0xa40 kernel/smpboot.c:165 + kthread+0x361/0x430 kernel/kthread.c:255 + ret_from_fork+0x24/0x30 arch/x86/entry/entry_64.S:352 + +Allocated by task 8247: + save_stack+0x23/0x90 mm/kasan/common.c:72 + set_track mm/kasan/common.c:80 [inline] + __kasan_kmalloc mm/kasan/common.c:513 [inline] + __kasan_kmalloc.constprop.0+0xcf/0xe0 mm/kasan/common.c:486 + kasan_slab_alloc+0xf/0x20 mm/kasan/common.c:521 + slab_post_alloc_hook mm/slab.h:584 [inline] + slab_alloc mm/slab.c:3320 [inline] + kmem_cache_alloc+0x121/0x710 mm/slab.c:3484 + sock_alloc_inode+0x1c/0x1d0 net/socket.c:240 + alloc_inode+0x68/0x1e0 fs/inode.c:230 + new_inode_pseudo+0x19/0xf0 fs/inode.c:919 + sock_alloc+0x41/0x270 net/socket.c:560 + __sock_create+0xc2/0x730 net/socket.c:1384 + sock_create net/socket.c:1471 [inline] + __sys_socket+0x103/0x220 net/socket.c:1513 + __do_sys_socket net/socket.c:1522 [inline] + __se_sys_socket net/socket.c:1520 [inline] + __ia32_sys_socket+0x73/0xb0 net/socket.c:1520 + do_syscall_32_irqs_on arch/x86/entry/common.c:337 [inline] + do_fast_syscall_32+0x27b/0xe16 arch/x86/entry/common.c:408 + entry_SYSENTER_compat+0x70/0x7f arch/x86/entry/entry_64_compat.S:139 + +Freed by task 17: + save_stack+0x23/0x90 mm/kasan/common.c:72 + set_track mm/kasan/common.c:80 [inline] + kasan_set_free_info mm/kasan/common.c:335 [inline] + __kasan_slab_free+0x102/0x150 mm/kasan/common.c:474 + kasan_slab_free+0xe/0x10 mm/kasan/common.c:483 + __cache_free mm/slab.c:3426 [inline] + kmem_cache_free+0x86/0x320 mm/slab.c:3694 + sock_free_inode+0x20/0x30 net/socket.c:261 + i_callback+0x44/0x80 fs/inode.c:219 + __rcu_reclaim kernel/rcu/rcu.h:222 [inline] + rcu_do_batch kernel/rcu/tree.c:2183 [inline] + rcu_core+0x570/0x1540 kernel/rcu/tree.c:2408 + rcu_core_si+0x9/0x10 kernel/rcu/tree.c:2417 + __do_softirq+0x262/0x98c kernel/softirq.c:292 + +The buggy address belongs to the object at ffff88801e0b4000 + which belongs to the cache sock_inode_cache of size 1152 +The buggy address is located 120 bytes inside of + 1152-byte region [ffff88801e0b4000, ffff88801e0b4480) +The buggy address belongs to the page: +page:ffffea0000782d00 refcount:1 mapcount:0 mapping:ffff88807aa59c40 index:0xffff88801e0b4ffd +raw: 00fffe0000000200 ffffea00008e6c88 ffffea0000782d48 ffff88807aa59c40 +raw: ffff88801e0b4ffd ffff88801e0b4000 0000000100000003 0000000000000000 +page dumped because: kasan: bad access detected + +Memory state around the buggy address: + ffff88801e0b3f00: fb fb fb fb fb fb fb fb fb fb fb fb fc fc fc fc + ffff88801e0b3f80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc +>ffff88801e0b4000: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb + ^ + ffff88801e0b4080: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb + ffff88801e0b4100: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb + +Fixes: abf9d537fea2 ("llc: add support for SO_BINDTODEVICE") +Signed-off-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/llc/af_llc.c | 10 +++++++--- + 1 file changed, 7 insertions(+), 3 deletions(-) + +--- a/net/llc/af_llc.c ++++ b/net/llc/af_llc.c +@@ -273,6 +273,10 @@ static int llc_ui_autobind(struct socket + + if (!sock_flag(sk, SOCK_ZAPPED)) + goto out; ++ if (!addr->sllc_arphrd) ++ addr->sllc_arphrd = ARPHRD_ETHER; ++ if (addr->sllc_arphrd != ARPHRD_ETHER) ++ goto out; + rc = -ENODEV; + if (sk->sk_bound_dev_if) { + llc->dev = dev_get_by_index(&init_net, sk->sk_bound_dev_if); +@@ -328,7 +332,9 @@ static int llc_ui_bind(struct socket *so + if (unlikely(!sock_flag(sk, SOCK_ZAPPED) || addrlen != sizeof(*addr))) + goto out; + rc = -EAFNOSUPPORT; +- if (unlikely(addr->sllc_family != AF_LLC)) ++ if (!addr->sllc_arphrd) ++ addr->sllc_arphrd = ARPHRD_ETHER; ++ if (unlikely(addr->sllc_family != AF_LLC || addr->sllc_arphrd != ARPHRD_ETHER)) + goto out; + dprintk("%s: binding %02X\n", __func__, addr->sllc_sap); + rc = -ENODEV; +@@ -336,8 +342,6 @@ static int llc_ui_bind(struct socket *so + if (sk->sk_bound_dev_if) { + llc->dev = dev_get_by_index_rcu(&init_net, sk->sk_bound_dev_if); + if (llc->dev) { +- if (!addr->sllc_arphrd) +- addr->sllc_arphrd = llc->dev->type; + if (is_zero_ether_addr(addr->sllc_mac)) + memcpy(addr->sllc_mac, llc->dev->dev_addr, + IFHWADDRLEN); diff --git a/queue-5.7/mptcp-fix-dss-map-generation-on-fin-retransmission.patch b/queue-5.7/mptcp-fix-dss-map-generation-on-fin-retransmission.patch new file mode 100644 index 00000000000..02d962a5be1 --- /dev/null +++ b/queue-5.7/mptcp-fix-dss-map-generation-on-fin-retransmission.patch @@ -0,0 +1,56 @@ +From foo@baz Fri 17 Jul 2020 08:39:20 AM CEST +From: Paolo Abeni +Date: Fri, 3 Jul 2020 18:06:04 +0200 +Subject: mptcp: fix DSS map generation on fin retransmission + +From: Paolo Abeni + +[ Upstream commit 9c29e36152748fd623fcff6cc8f538550f9eeafc ] + +The RFC 8684 mandates that no-data DATA FIN packets should carry +a DSS with 0 sequence number and data len equal to 1. Currently, +on FIN retransmission we re-use the existing mapping; if the previous +fin transmission was part of a partially acked data packet, we could +end-up writing in the egress packet a non-compliant DSS. + +The above will be detected by a "Bad mapping" warning on the receiver +side. + +This change addresses the issue explicitly checking for 0 len packet +when adding the DATA_FIN option. + +Fixes: 6d0060f600ad ("mptcp: Write MPTCP DSS headers to outgoing data packets") +Reported-by: syzbot+42a07faa5923cfaeb9c9@syzkaller.appspotmail.com +Tested-by: Christoph Paasch +Reviewed-by: Christoph Paasch +Reviewed-by: Mat Martineau +Signed-off-by: Paolo Abeni +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/mptcp/options.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +--- a/net/mptcp/options.c ++++ b/net/mptcp/options.c +@@ -449,9 +449,9 @@ static bool mptcp_established_options_mp + } + + static void mptcp_write_data_fin(struct mptcp_subflow_context *subflow, +- struct mptcp_ext *ext) ++ struct sk_buff *skb, struct mptcp_ext *ext) + { +- if (!ext->use_map) { ++ if (!ext->use_map || !skb->len) { + /* RFC6824 requires a DSS mapping with specific values + * if DATA_FIN is set but no data payload is mapped + */ +@@ -503,7 +503,7 @@ static bool mptcp_established_options_ds + opts->ext_copy = *mpext; + + if (skb && tcp_fin && subflow->data_fin_tx_enable) +- mptcp_write_data_fin(subflow, &opts->ext_copy); ++ mptcp_write_data_fin(subflow, skb, &opts->ext_copy); + ret = true; + } + diff --git a/queue-5.7/net-added-pointer-check-for-dst-ops-neigh_lookup-in-dst_neigh_lookup_skb.patch b/queue-5.7/net-added-pointer-check-for-dst-ops-neigh_lookup-in-dst_neigh_lookup_skb.patch new file mode 100644 index 00000000000..57ae05d70c6 --- /dev/null +++ b/queue-5.7/net-added-pointer-check-for-dst-ops-neigh_lookup-in-dst_neigh_lookup_skb.patch @@ -0,0 +1,121 @@ +From foo@baz Fri 17 Jul 2020 08:39:20 AM CEST +From: Martin Varghese +Date: Sun, 5 Jul 2020 14:23:49 +0530 +Subject: net: Added pointer check for dst->ops->neigh_lookup in dst_neigh_lookup_skb + +From: Martin Varghese + +[ Upstream commit 394de110a73395de2ca4516b0de435e91b11b604 ] + +The packets from tunnel devices (eg bareudp) may have only +metadata in the dst pointer of skb. Hence a pointer check of +neigh_lookup is needed in dst_neigh_lookup_skb + +Kernel crashes when packets from bareudp device is processed in +the kernel neighbour subsytem. + +[ 133.384484] BUG: kernel NULL pointer dereference, address: 0000000000000000 +[ 133.385240] #PF: supervisor instruction fetch in kernel mode +[ 133.385828] #PF: error_code(0x0010) - not-present page +[ 133.386603] PGD 0 P4D 0 +[ 133.386875] Oops: 0010 [#1] SMP PTI +[ 133.387275] CPU: 0 PID: 5045 Comm: ping Tainted: G W 5.8.0-rc2+ #15 +[ 133.388052] Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011 +[ 133.391076] RIP: 0010:0x0 +[ 133.392401] Code: Bad RIP value. +[ 133.394029] RSP: 0018:ffffb79980003d50 EFLAGS: 00010246 +[ 133.396656] RAX: 0000000080000102 RBX: ffff9de2fe0d6600 RCX: ffff9de2fe5e9d00 +[ 133.399018] RDX: 0000000000000000 RSI: ffff9de2fe5e9d00 RDI: ffff9de2fc21b400 +[ 133.399685] RBP: ffff9de2fe5e9d00 R08: 0000000000000000 R09: 0000000000000000 +[ 133.400350] R10: ffff9de2fbc6be22 R11: ffff9de2fe0d6600 R12: ffff9de2fc21b400 +[ 133.401010] R13: ffff9de2fe0d6628 R14: 0000000000000001 R15: 0000000000000003 +[ 133.401667] FS: 00007fe014918740(0000) GS:ffff9de2fec00000(0000) knlGS:0000000000000000 +[ 133.402412] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +[ 133.402948] CR2: ffffffffffffffd6 CR3: 000000003bb72000 CR4: 00000000000006f0 +[ 133.403611] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 +[ 133.404270] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 +[ 133.404933] Call Trace: +[ 133.405169] +[ 133.405367] __neigh_update+0x5a4/0x8f0 +[ 133.405734] arp_process+0x294/0x820 +[ 133.406076] ? __netif_receive_skb_core+0x866/0xe70 +[ 133.406557] arp_rcv+0x129/0x1c0 +[ 133.406882] __netif_receive_skb_one_core+0x95/0xb0 +[ 133.407340] process_backlog+0xa7/0x150 +[ 133.407705] net_rx_action+0x2af/0x420 +[ 133.408457] __do_softirq+0xda/0x2a8 +[ 133.408813] asm_call_on_stack+0x12/0x20 +[ 133.409290] +[ 133.409519] do_softirq_own_stack+0x39/0x50 +[ 133.410036] do_softirq+0x50/0x60 +[ 133.410401] __local_bh_enable_ip+0x50/0x60 +[ 133.410871] ip_finish_output2+0x195/0x530 +[ 133.411288] ip_output+0x72/0xf0 +[ 133.411673] ? __ip_finish_output+0x1f0/0x1f0 +[ 133.412122] ip_send_skb+0x15/0x40 +[ 133.412471] raw_sendmsg+0x853/0xab0 +[ 133.412855] ? insert_pfn+0xfe/0x270 +[ 133.413827] ? vvar_fault+0xec/0x190 +[ 133.414772] sock_sendmsg+0x57/0x80 +[ 133.415685] __sys_sendto+0xdc/0x160 +[ 133.416605] ? syscall_trace_enter+0x1d4/0x2b0 +[ 133.417679] ? __audit_syscall_exit+0x1d9/0x280 +[ 133.418753] ? __prepare_exit_to_usermode+0x5d/0x1a0 +[ 133.419819] __x64_sys_sendto+0x24/0x30 +[ 133.420848] do_syscall_64+0x4d/0x90 +[ 133.421768] entry_SYSCALL_64_after_hwframe+0x44/0xa9 +[ 133.422833] RIP: 0033:0x7fe013689c03 +[ 133.423749] Code: Bad RIP value. +[ 133.424624] RSP: 002b:00007ffc7288f418 EFLAGS: 00000246 ORIG_RAX: 000000000000002c +[ 133.425940] RAX: ffffffffffffffda RBX: 000056151fc63720 RCX: 00007fe013689c03 +[ 133.427225] RDX: 0000000000000040 RSI: 000056151fc63720 RDI: 0000000000000003 +[ 133.428481] RBP: 00007ffc72890b30 R08: 000056151fc60500 R09: 0000000000000010 +[ 133.429757] R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000040 +[ 133.431041] R13: 000056151fc636e0 R14: 000056151fc616bc R15: 0000000000000080 +[ 133.432481] Modules linked in: mpls_iptunnel act_mirred act_tunnel_key cls_flower sch_ingress veth mpls_router ip_tunnel bareudp ip6_udp_tunnel udp_tunnel macsec udp_diag inet_diag unix_diag af_packet_diag netlink_diag binfmt_misc xt_MASQUERADE iptable_nat xt_addrtype xt_conntrack nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 br_netfilter bridge stp llc ebtable_filter ebtables overlay ip6table_filter ip6_tables iptable_filter sunrpc ext4 mbcache jbd2 pcspkr i2c_piix4 virtio_balloon joydev ip_tables xfs libcrc32c ata_generic qxl pata_acpi drm_ttm_helper ttm drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops drm ata_piix libata virtio_net net_failover virtio_console failover virtio_blk i2c_core virtio_pci virtio_ring serio_raw floppy virtio dm_mirror dm_region_hash dm_log dm_mod +[ 133.444045] CR2: 0000000000000000 +[ 133.445082] ---[ end trace f4aeee1958fd1638 ]--- +[ 133.446236] RIP: 0010:0x0 +[ 133.447180] Code: Bad RIP value. +[ 133.448152] RSP: 0018:ffffb79980003d50 EFLAGS: 00010246 +[ 133.449363] RAX: 0000000080000102 RBX: ffff9de2fe0d6600 RCX: ffff9de2fe5e9d00 +[ 133.450835] RDX: 0000000000000000 RSI: ffff9de2fe5e9d00 RDI: ffff9de2fc21b400 +[ 133.452237] RBP: ffff9de2fe5e9d00 R08: 0000000000000000 R09: 0000000000000000 +[ 133.453722] R10: ffff9de2fbc6be22 R11: ffff9de2fe0d6600 R12: ffff9de2fc21b400 +[ 133.455149] R13: ffff9de2fe0d6628 R14: 0000000000000001 R15: 0000000000000003 +[ 133.456520] FS: 00007fe014918740(0000) GS:ffff9de2fec00000(0000) knlGS:0000000000000000 +[ 133.458046] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +[ 133.459342] CR2: ffffffffffffffd6 CR3: 000000003bb72000 CR4: 00000000000006f0 +[ 133.460782] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 +[ 133.462240] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 +[ 133.463697] Kernel panic - not syncing: Fatal exception in interrupt +[ 133.465226] Kernel Offset: 0xfa00000 from 0xffffffff81000000 (relocation range: 0xffffffff80000000-0xffffffffbfffffff) +[ 133.467025] ---[ end Kernel panic - not syncing: Fatal exception in interrupt ]--- + +Fixes: aaa0c23cb901 ("Fix dst_neigh_lookup/dst_neigh_lookup_skb return value handling bug") +Signed-off-by: Martin Varghese +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/dst.h | 10 +++++++++- + 1 file changed, 9 insertions(+), 1 deletion(-) + +--- a/include/net/dst.h ++++ b/include/net/dst.h +@@ -400,7 +400,15 @@ static inline struct neighbour *dst_neig + static inline struct neighbour *dst_neigh_lookup_skb(const struct dst_entry *dst, + struct sk_buff *skb) + { +- struct neighbour *n = dst->ops->neigh_lookup(dst, skb, NULL); ++ struct neighbour *n = NULL; ++ ++ /* The packets from tunnel devices (eg bareudp) may have only ++ * metadata in the dst pointer of skb. Hence a pointer check of ++ * neigh_lookup is needed. ++ */ ++ if (dst->ops->neigh_lookup) ++ n = dst->ops->neigh_lookup(dst, skb, NULL); ++ + return IS_ERR(n) ? NULL : n; + } + diff --git a/queue-5.7/net-dsa-microchip-set-the-correct-number-of-ports.patch b/queue-5.7/net-dsa-microchip-set-the-correct-number-of-ports.patch new file mode 100644 index 00000000000..8b47149470b --- /dev/null +++ b/queue-5.7/net-dsa-microchip-set-the-correct-number-of-ports.patch @@ -0,0 +1,49 @@ +From foo@baz Fri 17 Jul 2020 08:39:20 AM CEST +From: Codrin Ciubotariu +Date: Thu, 2 Jul 2020 12:44:50 +0300 +Subject: net: dsa: microchip: set the correct number of ports + +From: Codrin Ciubotariu + +[ Upstream commit af199a1a9cb02ec0194804bd46c174b6db262075 ] + +The number of ports is incorrectly set to the maximum available for a DSA +switch. Even if the extra ports are not used, this causes some functions +to be called later, like port_disable() and port_stp_state_set(). If the +driver doesn't check the port index, it will end up modifying unknown +registers. + +Fixes: b987e98e50ab ("dsa: add DSA switch driver for Microchip KSZ9477") +Signed-off-by: Codrin Ciubotariu +Reviewed-by: Andrew Lunn +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/dsa/microchip/ksz8795.c | 3 +++ + drivers/net/dsa/microchip/ksz9477.c | 3 +++ + 2 files changed, 6 insertions(+) + +--- a/drivers/net/dsa/microchip/ksz8795.c ++++ b/drivers/net/dsa/microchip/ksz8795.c +@@ -1271,6 +1271,9 @@ static int ksz8795_switch_init(struct ks + /* set the real number of ports */ + dev->ds->num_ports = dev->port_cnt; + ++ /* set the real number of ports */ ++ dev->ds->num_ports = dev->port_cnt; ++ + return 0; + } + +--- a/drivers/net/dsa/microchip/ksz9477.c ++++ b/drivers/net/dsa/microchip/ksz9477.c +@@ -516,6 +516,9 @@ static int ksz9477_port_vlan_filtering(s + PORT_VLAN_LOOKUP_VID_0, false); + } + ++ /* set the real number of ports */ ++ dev->ds->num_ports = dev->port_cnt; ++ + return 0; + } + diff --git a/queue-5.7/net-ipa-always-check-for-stopped-channel.patch b/queue-5.7/net-ipa-always-check-for-stopped-channel.patch new file mode 100644 index 00000000000..cbd1e66cf5c --- /dev/null +++ b/queue-5.7/net-ipa-always-check-for-stopped-channel.patch @@ -0,0 +1,53 @@ +From foo@baz Fri 17 Jul 2020 08:39:20 AM CEST +From: Alex Elder +Date: Tue, 30 Jun 2020 07:44:42 -0500 +Subject: net: ipa: always check for stopped channel + +From: Alex Elder + +[ Upstream commit 5468cbcddf47f674829c6ada190283108a63d7b5 ] + +In gsi_channel_stop(), there's a check to see if the channel might +have entered STOPPED state since a previous call, which might have +timed out before stopping completed. + +That check actually belongs in gsi_channel_stop_command(), which is +called repeatedly by gsi_channel_stop() for RX channels. + +Fixes: 650d1603825d ("soc: qcom: ipa: the generic software interface") +Signed-off-by: Alex Elder +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ipa/gsi.c | 13 ++++++------- + 1 file changed, 6 insertions(+), 7 deletions(-) + +--- a/drivers/net/ipa/gsi.c ++++ b/drivers/net/ipa/gsi.c +@@ -490,6 +490,12 @@ static int gsi_channel_stop_command(stru + enum gsi_channel_state state = channel->state; + int ret; + ++ /* Channel could have entered STOPPED state since last call ++ * if it timed out. If so, we're done. ++ */ ++ if (state == GSI_CHANNEL_STATE_STOPPED) ++ return 0; ++ + if (state != GSI_CHANNEL_STATE_STARTED && + state != GSI_CHANNEL_STATE_STOP_IN_PROC) + return -EINVAL; +@@ -773,13 +779,6 @@ int gsi_channel_stop(struct gsi *gsi, u3 + + gsi_channel_freeze(channel); + +- /* Channel could have entered STOPPED state since last call if the +- * STOP command timed out. We won't stop a channel if stopping it +- * was successful previously (so we still want the freeze above). +- */ +- if (channel->state == GSI_CHANNEL_STATE_STOPPED) +- return 0; +- + /* RX channels might require a little time to enter STOPPED state */ + retries = channel->toward_ipa ? 0 : GSI_CHANNEL_STOP_RX_RETRIES; + diff --git a/queue-5.7/net-ipa-introduce-ipa_cmd_tag_process.patch b/queue-5.7/net-ipa-introduce-ipa_cmd_tag_process.patch new file mode 100644 index 00000000000..1a767fdab55 --- /dev/null +++ b/queue-5.7/net-ipa-introduce-ipa_cmd_tag_process.patch @@ -0,0 +1,78 @@ +From foo@baz Fri 17 Jul 2020 08:39:20 AM CEST +From: Alex Elder +Date: Tue, 30 Jun 2020 07:44:44 -0500 +Subject: net: ipa: introduce ipa_cmd_tag_process() + +From: Alex Elder + +[ Upstream commit 6cb63ea6a39eac9640d109f274a237b34350c183 ] + +Create a new function ipa_cmd_tag_process() that simply allocates a +transaction, adds a tag process command to it to clear the hardware +pipeline, and commits the transaction. + +Call it in from ipa_endpoint_suspend(), after suspending the modem +endpoints but before suspending the AP command TX and AP LAN RX +endpoints (which are used by the tag sequence). + +Signed-off-by: Alex Elder +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ipa/ipa_cmd.c | 15 +++++++++++++++ + drivers/net/ipa/ipa_cmd.h | 8 ++++++++ + drivers/net/ipa/ipa_endpoint.c | 2 ++ + 3 files changed, 25 insertions(+) + +--- a/drivers/net/ipa/ipa_cmd.c ++++ b/drivers/net/ipa/ipa_cmd.c +@@ -645,6 +645,21 @@ u32 ipa_cmd_tag_process_count(void) + return 4; + } + ++void ipa_cmd_tag_process(struct ipa *ipa) ++{ ++ u32 count = ipa_cmd_tag_process_count(); ++ struct gsi_trans *trans; ++ ++ trans = ipa_cmd_trans_alloc(ipa, count); ++ if (trans) { ++ ipa_cmd_tag_process_add(trans); ++ gsi_trans_commit_wait(trans); ++ } else { ++ dev_err(&ipa->pdev->dev, ++ "error allocating %u entry tag transaction\n", count); ++ } ++} ++ + static struct ipa_cmd_info * + ipa_cmd_info_alloc(struct ipa_endpoint *endpoint, u32 tre_count) + { +--- a/drivers/net/ipa/ipa_cmd.h ++++ b/drivers/net/ipa/ipa_cmd.h +@@ -183,6 +183,14 @@ void ipa_cmd_tag_process_add(struct gsi_ + u32 ipa_cmd_tag_process_count(void); + + /** ++ * ipa_cmd_tag_process() - Perform a tag process ++ * ++ * @Return: The number of elements to allocate in a transaction ++ * to hold tag process commands ++ */ ++void ipa_cmd_tag_process(struct ipa *ipa); ++ ++/** + * ipa_cmd_trans_alloc() - Allocate a transaction for the command TX endpoint + * @ipa: IPA pointer + * @tre_count: Number of elements in the transaction +--- a/drivers/net/ipa/ipa_endpoint.c ++++ b/drivers/net/ipa/ipa_endpoint.c +@@ -1485,6 +1485,8 @@ void ipa_endpoint_suspend(struct ipa *ip + if (ipa->modem_netdev) + ipa_modem_suspend(ipa->modem_netdev); + ++ ipa_cmd_tag_process(ipa); ++ + ipa_endpoint_suspend_one(ipa->name_map[IPA_ENDPOINT_AP_LAN_RX]); + ipa_endpoint_suspend_one(ipa->name_map[IPA_ENDPOINT_AP_COMMAND_TX]); + } diff --git a/queue-5.7/net-ipv4-fix-wrong-type-conversion-from-hint-to-rt-in-ip_route_use_hint.patch b/queue-5.7/net-ipv4-fix-wrong-type-conversion-from-hint-to-rt-in-ip_route_use_hint.patch new file mode 100644 index 00000000000..eb79972a4af --- /dev/null +++ b/queue-5.7/net-ipv4-fix-wrong-type-conversion-from-hint-to-rt-in-ip_route_use_hint.patch @@ -0,0 +1,30 @@ +From foo@baz Fri 17 Jul 2020 08:39:20 AM CEST +From: Miaohe Lin +Date: Sat, 27 Jun 2020 15:47:51 +0800 +Subject: net: ipv4: Fix wrong type conversion from hint to rt in ip_route_use_hint() + +From: Miaohe Lin + +[ Upstream commit 2ce578ca9444bb44da66b9a494f56e7ec12e6466 ] + +We can't cast sk_buff to rtable by (struct rtable *)hint. Use skb_rtable(). + +Fixes: 02b24941619f ("ipv4: use dst hint for ipv4 list receive") +Signed-off-by: Miaohe Lin +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/route.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/ipv4/route.c ++++ b/net/ipv4/route.c +@@ -2027,7 +2027,7 @@ int ip_route_use_hint(struct sk_buff *sk + const struct sk_buff *hint) + { + struct in_device *in_dev = __in_dev_get_rcu(dev); +- struct rtable *rt = (struct rtable *)hint; ++ struct rtable *rt = skb_rtable(hint); + struct net *net = dev_net(dev); + int err = -EINVAL; + u32 tag = 0; diff --git a/queue-5.7/net-qrtr-free-flow-in-__qrtr_node_release.patch b/queue-5.7/net-qrtr-free-flow-in-__qrtr_node_release.patch new file mode 100644 index 00000000000..666c1e1457e --- /dev/null +++ b/queue-5.7/net-qrtr-free-flow-in-__qrtr_node_release.patch @@ -0,0 +1,57 @@ +From foo@baz Fri 17 Jul 2020 08:39:20 AM CEST +From: Carl Huang +Date: Tue, 30 Jun 2020 14:52:51 +0800 +Subject: net: qrtr: free flow in __qrtr_node_release + +From: Carl Huang + +[ Upstream commit 28541f3d324f6de1e545e2875283b6cef95c5d36 ] + +The flow is allocated in qrtr_tx_wait, but not freed when qrtr node +is released. (*slot) becomes NULL after radix_tree_iter_delete is +called in __qrtr_node_release. The fix is to save (*slot) to a +vairable and then free it. + +This memory leak is catched when kmemleak is enabled in kernel, +the report looks like below: + +unreferenced object 0xffffa0de69e08420 (size 32): + comm "kworker/u16:3", pid 176, jiffies 4294918275 (age 82858.876s) + hex dump (first 32 bytes): + 00 00 00 00 00 00 00 00 28 84 e0 69 de a0 ff ff ........(..i.... + 28 84 e0 69 de a0 ff ff 03 00 00 00 00 00 00 00 (..i............ + backtrace: + [<00000000e252af0a>] qrtr_node_enqueue+0x38e/0x400 [qrtr] + [<000000009cea437f>] qrtr_sendmsg+0x1e0/0x2a0 [qrtr] + [<000000008bddbba4>] sock_sendmsg+0x5b/0x60 + [<0000000003beb43a>] qmi_send_message.isra.3+0xbe/0x110 [qmi_helpers] + [<000000009c9ae7de>] qmi_send_request+0x1c/0x20 [qmi_helpers] + +Signed-off-by: Carl Huang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/qrtr/qrtr.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/net/qrtr/qrtr.c ++++ b/net/qrtr/qrtr.c +@@ -166,6 +166,7 @@ static void __qrtr_node_release(struct k + { + struct qrtr_node *node = container_of(kref, struct qrtr_node, ref); + struct radix_tree_iter iter; ++ struct qrtr_tx_flow *flow; + unsigned long flags; + void __rcu **slot; + +@@ -181,8 +182,9 @@ static void __qrtr_node_release(struct k + + /* Free tx flow counters */ + radix_tree_for_each_slot(slot, &node->qrtr_tx_flow, &iter, 0) { ++ flow = *slot; + radix_tree_iter_delete(&node->qrtr_tx_flow, &iter, slot); +- kfree(*slot); ++ kfree(flow); + } + kfree(node); + } diff --git a/queue-5.7/net-rmnet-do-not-allow-to-add-multiple-bridge-interfaces.patch b/queue-5.7/net-rmnet-do-not-allow-to-add-multiple-bridge-interfaces.patch new file mode 100644 index 00000000000..5c69bd2b8f8 --- /dev/null +++ b/queue-5.7/net-rmnet-do-not-allow-to-add-multiple-bridge-interfaces.patch @@ -0,0 +1,84 @@ +From foo@baz Fri 17 Jul 2020 08:39:20 AM CEST +From: Taehee Yoo +Date: Thu, 2 Jul 2020 17:08:55 +0000 +Subject: net: rmnet: do not allow to add multiple bridge interfaces + +From: Taehee Yoo + +[ Upstream commit 2fb2799a2abb39d7dbb48abb3baa1133bf5e921a ] + +rmnet can have only two bridge interface. +One of them is a link interface and another one is added by +the master operation. +rmnet interface shouldn't allow adding additional +bridge interfaces by mater operation. +But, there is no code to deny additional interfaces. +So, interface leak occurs. + +Test commands: + ip link add dummy0 type dummy + ip link add dummy1 type dummy + ip link add dummy2 type dummy + ip link add rmnet0 link dummy0 type rmnet mux_id 1 + ip link set dummy1 master rmnet0 + ip link set dummy2 master rmnet0 + ip link del rmnet0 + +In the above test command, the dummy0 was attached to rmnet as VND mode. +Then, dummy1 was attached to rmnet0 as BRIDGE mode. +At this point, dummy0 mode is switched from VND to BRIDGE automatically. +Then, dummy2 is attached to rmnet as BRIDGE mode. +At this point, rmnet0 should deny this operation. +But, rmnet0 doesn't deny this. +So that below splat occurs when the rmnet0 interface is deleted. + +Splat looks like: +[ 186.684787][ C2] WARNING: CPU: 2 PID: 1009 at net/core/dev.c:8992 rollback_registered_many+0x986/0xcf0 +[ 186.684788][ C2] Modules linked in: rmnet dummy openvswitch nsh nf_conncount nf_nat nf_conntrack nf_defrag_x +[ 186.684805][ C2] CPU: 2 PID: 1009 Comm: ip Not tainted 5.8.0-rc1+ #621 +[ 186.684807][ C2] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 +[ 186.684808][ C2] RIP: 0010:rollback_registered_many+0x986/0xcf0 +[ 186.684811][ C2] Code: 41 8b 4e cc 45 31 c0 31 d2 4c 89 ee 48 89 df e8 e0 47 ff ff 85 c0 0f 84 cd fc ff ff 5 +[ 186.684812][ C2] RSP: 0018:ffff8880cd9472e0 EFLAGS: 00010287 +[ 186.684815][ C2] RAX: ffff8880cc56da58 RBX: ffff8880ab21c000 RCX: ffffffff9329d323 +[ 186.684816][ C2] RDX: 1ffffffff2be6410 RSI: 0000000000000008 RDI: ffffffff95f32080 +[ 186.684818][ C2] RBP: dffffc0000000000 R08: fffffbfff2be6411 R09: fffffbfff2be6411 +[ 186.684819][ C2] R10: ffffffff95f32087 R11: 0000000000000001 R12: ffff8880cd947480 +[ 186.684820][ C2] R13: ffff8880ab21c0b8 R14: ffff8880cd947400 R15: ffff8880cdf10640 +[ 186.684822][ C2] FS: 00007f00843890c0(0000) GS:ffff8880d4e00000(0000) knlGS:0000000000000000 +[ 186.684823][ C2] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +[ 186.684825][ C2] CR2: 000055b8ab1077b8 CR3: 00000000ab612006 CR4: 00000000000606e0 +[ 186.684826][ C2] Call Trace: +[ 186.684827][ C2] ? lockdep_hardirqs_on_prepare+0x379/0x540 +[ 186.684829][ C2] ? netif_set_real_num_tx_queues+0x780/0x780 +[ 186.684830][ C2] ? rmnet_unregister_real_device+0x56/0x90 [rmnet] +[ 186.684831][ C2] ? __kasan_slab_free+0x126/0x150 +[ 186.684832][ C2] ? kfree+0xdc/0x320 +[ 186.684834][ C2] ? rmnet_unregister_real_device+0x56/0x90 [rmnet] +[ 186.684835][ C2] unregister_netdevice_many.part.135+0x13/0x1b0 +[ 186.684836][ C2] rtnl_delete_link+0xbc/0x100 +[ ... ] +[ 238.440071][ T1009] unregister_netdevice: waiting for rmnet0 to become free. Usage count = 1 + +Fixes: 037f9cdf72fb ("net: rmnet: use upper/lower device infrastructure") +Signed-off-by: Taehee Yoo +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c | 5 +++++ + 1 file changed, 5 insertions(+) + +--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c ++++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c +@@ -434,6 +434,11 @@ int rmnet_add_bridge(struct net_device * + return -EINVAL; + } + ++ if (port->rmnet_mode != RMNET_EPMODE_VND) { ++ NL_SET_ERR_MSG_MOD(extack, "more than one bridge dev attached"); ++ return -EINVAL; ++ } ++ + if (rmnet_is_real_dev_registered(slave_dev)) { + NL_SET_ERR_MSG_MOD(extack, + "slave cannot be another rmnet dev"); diff --git a/queue-5.7/net-usb-qmi_wwan-add-support-for-quectel-eg95-lte-modem.patch b/queue-5.7/net-usb-qmi_wwan-add-support-for-quectel-eg95-lte-modem.patch new file mode 100644 index 00000000000..22e0097906c --- /dev/null +++ b/queue-5.7/net-usb-qmi_wwan-add-support-for-quectel-eg95-lte-modem.patch @@ -0,0 +1,41 @@ +From foo@baz Fri 17 Jul 2020 08:39:20 AM CEST +From: AceLan Kao +Date: Tue, 7 Jul 2020 16:14:45 +0800 +Subject: net: usb: qmi_wwan: add support for Quectel EG95 LTE modem + +From: AceLan Kao + +[ Upstream commit f815dd5cf48b905eeecf0a2b990e9b7ab048b4f1 ] + +Add support for Quectel Wireless Solutions Co., Ltd. EG95 LTE modem + +T: Bus=01 Lev=01 Prnt=01 Port=02 Cnt=02 Dev#= 5 Spd=480 MxCh= 0 +D: Ver= 2.00 Cls=ef(misc ) Sub=02 Prot=01 MxPS=64 #Cfgs= 1 +P: Vendor=2c7c ProdID=0195 Rev=03.18 +S: Manufacturer=Android +S: Product=Android +C: #Ifs= 5 Cfg#= 1 Atr=a0 MxPwr=500mA +I: If#=0x0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=(none) +I: If#=0x1 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=(none) +I: If#=0x2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=(none) +I: If#=0x3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=(none) +I: If#=0x4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=(none) + +Signed-off-by: AceLan Kao +Acked-by: Bjørn Mork +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/usb/qmi_wwan.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/drivers/net/usb/qmi_wwan.c ++++ b/drivers/net/usb/qmi_wwan.c +@@ -1370,6 +1370,7 @@ static const struct usb_device_id produc + {QMI_QUIRK_SET_DTR(0x1e0e, 0x9001, 5)}, /* SIMCom 7100E, 7230E, 7600E ++ */ + {QMI_QUIRK_SET_DTR(0x2c7c, 0x0121, 4)}, /* Quectel EC21 Mini PCIe */ + {QMI_QUIRK_SET_DTR(0x2c7c, 0x0191, 4)}, /* Quectel EG91 */ ++ {QMI_QUIRK_SET_DTR(0x2c7c, 0x0195, 4)}, /* Quectel EG95 */ + {QMI_FIXED_INTF(0x2c7c, 0x0296, 4)}, /* Quectel BG96 */ + {QMI_QUIRK_SET_DTR(0x2cb7, 0x0104, 4)}, /* Fibocom NL678 series */ + {QMI_FIXED_INTF(0x0489, 0xe0b4, 0)}, /* Foxconn T77W968 LTE */ diff --git a/queue-5.7/net_sched-fix-a-memory-leak-in-atm_tc_init.patch b/queue-5.7/net_sched-fix-a-memory-leak-in-atm_tc_init.patch new file mode 100644 index 00000000000..88927b5cf9a --- /dev/null +++ b/queue-5.7/net_sched-fix-a-memory-leak-in-atm_tc_init.patch @@ -0,0 +1,51 @@ +From foo@baz Fri 17 Jul 2020 08:39:20 AM CEST +From: Cong Wang +Date: Wed, 8 Jul 2020 20:13:59 -0700 +Subject: net_sched: fix a memory leak in atm_tc_init() + +From: Cong Wang + +[ Upstream commit 306381aec7c2b5a658eebca008c8a1b666536cba ] + +When tcf_block_get() fails inside atm_tc_init(), +atm_tc_put() is called to release the qdisc p->link.q. +But the flow->ref prevents it to do so, as the flow->ref +is still zero. + +Fix this by moving the p->link.ref initialization before +tcf_block_get(). + +Fixes: 6529eaba33f0 ("net: sched: introduce tcf block infractructure") +Reported-and-tested-by: syzbot+d411cff6ab29cc2c311b@syzkaller.appspotmail.com +Cc: Jamal Hadi Salim +Cc: Jiri Pirko +Signed-off-by: Cong Wang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sched/sch_atm.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +--- a/net/sched/sch_atm.c ++++ b/net/sched/sch_atm.c +@@ -553,16 +553,16 @@ static int atm_tc_init(struct Qdisc *sch + if (!p->link.q) + p->link.q = &noop_qdisc; + pr_debug("atm_tc_init: link (%p) qdisc %p\n", &p->link, p->link.q); ++ p->link.vcc = NULL; ++ p->link.sock = NULL; ++ p->link.common.classid = sch->handle; ++ p->link.ref = 1; + + err = tcf_block_get(&p->link.block, &p->link.filter_list, sch, + extack); + if (err) + return err; + +- p->link.vcc = NULL; +- p->link.sock = NULL; +- p->link.common.classid = sch->handle; +- p->link.ref = 1; + tasklet_init(&p->task, sch_atm_dequeue, (unsigned long)sch); + return 0; + } diff --git a/queue-5.7/sched-consistently-handle-layer3-header-accesses-in-the-presence-of-vlans.patch b/queue-5.7/sched-consistently-handle-layer3-header-accesses-in-the-presence-of-vlans.patch new file mode 100644 index 00000000000..676b702337d --- /dev/null +++ b/queue-5.7/sched-consistently-handle-layer3-header-accesses-in-the-presence-of-vlans.patch @@ -0,0 +1,496 @@ +From foo@baz Fri 17 Jul 2020 08:39:20 AM CEST +From: "Toke Høiland-Jørgensen" +Date: Fri, 3 Jul 2020 22:26:43 +0200 +Subject: sched: consistently handle layer3 header accesses in the presence of VLANs + +From: "Toke Høiland-Jørgensen" + +[ Upstream commit d7bf2ebebc2bd61ab95e2a8e33541ef282f303d4 ] + +There are a couple of places in net/sched/ that check skb->protocol and act +on the value there. However, in the presence of VLAN tags, the value stored +in skb->protocol can be inconsistent based on whether VLAN acceleration is +enabled. The commit quoted in the Fixes tag below fixed the users of +skb->protocol to use a helper that will always see the VLAN ethertype. + +However, most of the callers don't actually handle the VLAN ethertype, but +expect to find the IP header type in the protocol field. This means that +things like changing the ECN field, or parsing diffserv values, stops +working if there's a VLAN tag, or if there are multiple nested VLAN +tags (QinQ). + +To fix this, change the helper to take an argument that indicates whether +the caller wants to skip the VLAN tags or not. When skipping VLAN tags, we +make sure to skip all of them, so behaviour is consistent even in QinQ +mode. + +To make the helper usable from the ECN code, move it to if_vlan.h instead +of pkt_sched.h. + +v3: +- Remove empty lines +- Move vlan variable definitions inside loop in skb_protocol() +- Also use skb_protocol() helper in IP{,6}_ECN_decapsulate() and + bpf_skb_ecn_set_ce() + +v2: +- Use eth_type_vlan() helper in skb_protocol() +- Also fix code that reads skb->protocol directly +- Change a couple of 'if/else if' statements to switch constructs to avoid + calling the helper twice + +Reported-by: Ilya Ponetayev +Fixes: d8b9605d2697 ("net: sched: fix skb->protocol use in case of accelerated vlan path") +Signed-off-by: Toke Høiland-Jørgensen +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/if_vlan.h | 28 ++++++++++++++++++++++++++++ + include/net/inet_ecn.h | 25 +++++++++++++++++-------- + include/net/pkt_sched.h | 11 ----------- + net/core/filter.c | 10 +++++++--- + net/sched/act_connmark.c | 9 ++++++--- + net/sched/act_csum.c | 2 +- + net/sched/act_ct.c | 9 ++++----- + net/sched/act_ctinfo.c | 9 ++++++--- + net/sched/act_mpls.c | 2 +- + net/sched/act_skbedit.c | 2 +- + net/sched/cls_api.c | 2 +- + net/sched/cls_flow.c | 8 ++++---- + net/sched/cls_flower.c | 2 +- + net/sched/em_ipset.c | 2 +- + net/sched/em_ipt.c | 2 +- + net/sched/em_meta.c | 2 +- + net/sched/sch_cake.c | 4 ++-- + net/sched/sch_dsmark.c | 6 +++--- + net/sched/sch_teql.c | 2 +- + 19 files changed, 86 insertions(+), 51 deletions(-) + +--- a/include/linux/if_vlan.h ++++ b/include/linux/if_vlan.h +@@ -308,6 +308,34 @@ static inline bool eth_type_vlan(__be16 + } + } + ++/* A getter for the SKB protocol field which will handle VLAN tags consistently ++ * whether VLAN acceleration is enabled or not. ++ */ ++static inline __be16 skb_protocol(const struct sk_buff *skb, bool skip_vlan) ++{ ++ unsigned int offset = skb_mac_offset(skb) + sizeof(struct ethhdr); ++ __be16 proto = skb->protocol; ++ ++ if (!skip_vlan) ++ /* VLAN acceleration strips the VLAN header from the skb and ++ * moves it to skb->vlan_proto ++ */ ++ return skb_vlan_tag_present(skb) ? skb->vlan_proto : proto; ++ ++ while (eth_type_vlan(proto)) { ++ struct vlan_hdr vhdr, *vh; ++ ++ vh = skb_header_pointer(skb, offset, sizeof(vhdr), &vhdr); ++ if (!vh) ++ break; ++ ++ proto = vh->h_vlan_encapsulated_proto; ++ offset += sizeof(vhdr); ++ } ++ ++ return proto; ++} ++ + static inline bool vlan_hw_offload_capable(netdev_features_t features, + __be16 proto) + { +--- a/include/net/inet_ecn.h ++++ b/include/net/inet_ecn.h +@@ -4,6 +4,7 @@ + + #include + #include ++#include + + #include + #include +@@ -172,7 +173,7 @@ static inline void ipv6_copy_dscp(unsign + + static inline int INET_ECN_set_ce(struct sk_buff *skb) + { +- switch (skb->protocol) { ++ switch (skb_protocol(skb, true)) { + case cpu_to_be16(ETH_P_IP): + if (skb_network_header(skb) + sizeof(struct iphdr) <= + skb_tail_pointer(skb)) +@@ -191,7 +192,7 @@ static inline int INET_ECN_set_ce(struct + + static inline int INET_ECN_set_ect1(struct sk_buff *skb) + { +- switch (skb->protocol) { ++ switch (skb_protocol(skb, true)) { + case cpu_to_be16(ETH_P_IP): + if (skb_network_header(skb) + sizeof(struct iphdr) <= + skb_tail_pointer(skb)) +@@ -272,12 +273,16 @@ static inline int IP_ECN_decapsulate(con + { + __u8 inner; + +- if (skb->protocol == htons(ETH_P_IP)) ++ switch (skb_protocol(skb, true)) { ++ case htons(ETH_P_IP): + inner = ip_hdr(skb)->tos; +- else if (skb->protocol == htons(ETH_P_IPV6)) ++ break; ++ case htons(ETH_P_IPV6): + inner = ipv6_get_dsfield(ipv6_hdr(skb)); +- else ++ break; ++ default: + return 0; ++ } + + return INET_ECN_decapsulate(skb, oiph->tos, inner); + } +@@ -287,12 +292,16 @@ static inline int IP6_ECN_decapsulate(co + { + __u8 inner; + +- if (skb->protocol == htons(ETH_P_IP)) ++ switch (skb_protocol(skb, true)) { ++ case htons(ETH_P_IP): + inner = ip_hdr(skb)->tos; +- else if (skb->protocol == htons(ETH_P_IPV6)) ++ break; ++ case htons(ETH_P_IPV6): + inner = ipv6_get_dsfield(ipv6_hdr(skb)); +- else ++ break; ++ default: + return 0; ++ } + + return INET_ECN_decapsulate(skb, ipv6_get_dsfield(oipv6h), inner); + } +--- a/include/net/pkt_sched.h ++++ b/include/net/pkt_sched.h +@@ -136,17 +136,6 @@ static inline void qdisc_run(struct Qdis + } + } + +-static inline __be16 tc_skb_protocol(const struct sk_buff *skb) +-{ +- /* We need to take extra care in case the skb came via +- * vlan accelerated path. In that case, use skb->vlan_proto +- * as the original vlan header was already stripped. +- */ +- if (skb_vlan_tag_present(skb)) +- return skb->vlan_proto; +- return skb->protocol; +-} +- + /* Calculate maximal size of packet seen by hard_start_xmit + routine of this device. + */ +--- a/net/core/filter.c ++++ b/net/core/filter.c +@@ -5724,12 +5724,16 @@ BPF_CALL_1(bpf_skb_ecn_set_ce, struct sk + { + unsigned int iphdr_len; + +- if (skb->protocol == cpu_to_be16(ETH_P_IP)) ++ switch (skb_protocol(skb, true)) { ++ case cpu_to_be16(ETH_P_IP): + iphdr_len = sizeof(struct iphdr); +- else if (skb->protocol == cpu_to_be16(ETH_P_IPV6)) ++ break; ++ case cpu_to_be16(ETH_P_IPV6): + iphdr_len = sizeof(struct ipv6hdr); +- else ++ break; ++ default: + return 0; ++ } + + if (skb_headlen(skb) < iphdr_len) + return 0; +--- a/net/sched/act_connmark.c ++++ b/net/sched/act_connmark.c +@@ -43,17 +43,20 @@ static int tcf_connmark_act(struct sk_bu + tcf_lastuse_update(&ca->tcf_tm); + bstats_update(&ca->tcf_bstats, skb); + +- if (skb->protocol == htons(ETH_P_IP)) { ++ switch (skb_protocol(skb, true)) { ++ case htons(ETH_P_IP): + if (skb->len < sizeof(struct iphdr)) + goto out; + + proto = NFPROTO_IPV4; +- } else if (skb->protocol == htons(ETH_P_IPV6)) { ++ break; ++ case htons(ETH_P_IPV6): + if (skb->len < sizeof(struct ipv6hdr)) + goto out; + + proto = NFPROTO_IPV6; +- } else { ++ break; ++ default: + goto out; + } + +--- a/net/sched/act_csum.c ++++ b/net/sched/act_csum.c +@@ -587,7 +587,7 @@ static int tcf_csum_act(struct sk_buff * + goto drop; + + update_flags = params->update_flags; +- protocol = tc_skb_protocol(skb); ++ protocol = skb_protocol(skb, false); + again: + switch (protocol) { + case cpu_to_be16(ETH_P_IP): +--- a/net/sched/act_ct.c ++++ b/net/sched/act_ct.c +@@ -622,7 +622,7 @@ static u8 tcf_ct_skb_nf_family(struct sk + { + u8 family = NFPROTO_UNSPEC; + +- switch (skb->protocol) { ++ switch (skb_protocol(skb, true)) { + case htons(ETH_P_IP): + family = NFPROTO_IPV4; + break; +@@ -746,6 +746,7 @@ static int ct_nat_execute(struct sk_buff + const struct nf_nat_range2 *range, + enum nf_nat_manip_type maniptype) + { ++ __be16 proto = skb_protocol(skb, true); + int hooknum, err = NF_ACCEPT; + + /* See HOOK2MANIP(). */ +@@ -757,14 +758,13 @@ static int ct_nat_execute(struct sk_buff + switch (ctinfo) { + case IP_CT_RELATED: + case IP_CT_RELATED_REPLY: +- if (skb->protocol == htons(ETH_P_IP) && ++ if (proto == htons(ETH_P_IP) && + ip_hdr(skb)->protocol == IPPROTO_ICMP) { + if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo, + hooknum)) + err = NF_DROP; + goto out; +- } else if (IS_ENABLED(CONFIG_IPV6) && +- skb->protocol == htons(ETH_P_IPV6)) { ++ } else if (IS_ENABLED(CONFIG_IPV6) && proto == htons(ETH_P_IPV6)) { + __be16 frag_off; + u8 nexthdr = ipv6_hdr(skb)->nexthdr; + int hdrlen = ipv6_skip_exthdr(skb, +@@ -1559,4 +1559,3 @@ MODULE_AUTHOR("Yossi Kuperman "); + MODULE_DESCRIPTION("Connection tracking action"); + MODULE_LICENSE("GPL v2"); +- +--- a/net/sched/act_ctinfo.c ++++ b/net/sched/act_ctinfo.c +@@ -96,19 +96,22 @@ static int tcf_ctinfo_act(struct sk_buff + action = READ_ONCE(ca->tcf_action); + + wlen = skb_network_offset(skb); +- if (tc_skb_protocol(skb) == htons(ETH_P_IP)) { ++ switch (skb_protocol(skb, true)) { ++ case htons(ETH_P_IP): + wlen += sizeof(struct iphdr); + if (!pskb_may_pull(skb, wlen)) + goto out; + + proto = NFPROTO_IPV4; +- } else if (tc_skb_protocol(skb) == htons(ETH_P_IPV6)) { ++ break; ++ case htons(ETH_P_IPV6): + wlen += sizeof(struct ipv6hdr); + if (!pskb_may_pull(skb, wlen)) + goto out; + + proto = NFPROTO_IPV6; +- } else { ++ break; ++ default: + goto out; + } + +--- a/net/sched/act_mpls.c ++++ b/net/sched/act_mpls.c +@@ -82,7 +82,7 @@ static int tcf_mpls_act(struct sk_buff * + goto drop; + break; + case TCA_MPLS_ACT_PUSH: +- new_lse = tcf_mpls_get_lse(NULL, p, !eth_p_mpls(skb->protocol)); ++ new_lse = tcf_mpls_get_lse(NULL, p, !eth_p_mpls(skb_protocol(skb, true))); + if (skb_mpls_push(skb, new_lse, p->tcfm_proto, mac_len, + skb->dev && skb->dev->type == ARPHRD_ETHER)) + goto drop; +--- a/net/sched/act_skbedit.c ++++ b/net/sched/act_skbedit.c +@@ -41,7 +41,7 @@ static int tcf_skbedit_act(struct sk_buf + if (params->flags & SKBEDIT_F_INHERITDSFIELD) { + int wlen = skb_network_offset(skb); + +- switch (tc_skb_protocol(skb)) { ++ switch (skb_protocol(skb, true)) { + case htons(ETH_P_IP): + wlen += sizeof(struct iphdr); + if (!pskb_may_pull(skb, wlen)) +--- a/net/sched/cls_api.c ++++ b/net/sched/cls_api.c +@@ -1589,7 +1589,7 @@ static inline int __tcf_classify(struct + reclassify: + #endif + for (; tp; tp = rcu_dereference_bh(tp->next)) { +- __be16 protocol = tc_skb_protocol(skb); ++ __be16 protocol = skb_protocol(skb, false); + int err; + + if (tp->protocol != protocol && +--- a/net/sched/cls_flow.c ++++ b/net/sched/cls_flow.c +@@ -80,7 +80,7 @@ static u32 flow_get_dst(const struct sk_ + if (dst) + return ntohl(dst); + +- return addr_fold(skb_dst(skb)) ^ (__force u16) tc_skb_protocol(skb); ++ return addr_fold(skb_dst(skb)) ^ (__force u16)skb_protocol(skb, true); + } + + static u32 flow_get_proto(const struct sk_buff *skb, +@@ -104,7 +104,7 @@ static u32 flow_get_proto_dst(const stru + if (flow->ports.ports) + return ntohs(flow->ports.dst); + +- return addr_fold(skb_dst(skb)) ^ (__force u16) tc_skb_protocol(skb); ++ return addr_fold(skb_dst(skb)) ^ (__force u16)skb_protocol(skb, true); + } + + static u32 flow_get_iif(const struct sk_buff *skb) +@@ -151,7 +151,7 @@ static u32 flow_get_nfct(const struct sk + static u32 flow_get_nfct_src(const struct sk_buff *skb, + const struct flow_keys *flow) + { +- switch (tc_skb_protocol(skb)) { ++ switch (skb_protocol(skb, true)) { + case htons(ETH_P_IP): + return ntohl(CTTUPLE(skb, src.u3.ip)); + case htons(ETH_P_IPV6): +@@ -164,7 +164,7 @@ fallback: + static u32 flow_get_nfct_dst(const struct sk_buff *skb, + const struct flow_keys *flow) + { +- switch (tc_skb_protocol(skb)) { ++ switch (skb_protocol(skb, true)) { + case htons(ETH_P_IP): + return ntohl(CTTUPLE(skb, dst.u3.ip)); + case htons(ETH_P_IPV6): +--- a/net/sched/cls_flower.c ++++ b/net/sched/cls_flower.c +@@ -312,7 +312,7 @@ static int fl_classify(struct sk_buff *s + /* skb_flow_dissect() does not set n_proto in case an unknown + * protocol, so do it rather here. + */ +- skb_key.basic.n_proto = skb->protocol; ++ skb_key.basic.n_proto = skb_protocol(skb, false); + skb_flow_dissect_tunnel_info(skb, &mask->dissector, &skb_key); + skb_flow_dissect_ct(skb, &mask->dissector, &skb_key, + fl_ct_info_to_flower_map, +--- a/net/sched/em_ipset.c ++++ b/net/sched/em_ipset.c +@@ -59,7 +59,7 @@ static int em_ipset_match(struct sk_buff + }; + int ret, network_offset; + +- switch (tc_skb_protocol(skb)) { ++ switch (skb_protocol(skb, true)) { + case htons(ETH_P_IP): + state.pf = NFPROTO_IPV4; + if (!pskb_network_may_pull(skb, sizeof(struct iphdr))) +--- a/net/sched/em_ipt.c ++++ b/net/sched/em_ipt.c +@@ -212,7 +212,7 @@ static int em_ipt_match(struct sk_buff * + struct nf_hook_state state; + int ret; + +- switch (tc_skb_protocol(skb)) { ++ switch (skb_protocol(skb, true)) { + case htons(ETH_P_IP): + if (!pskb_network_may_pull(skb, sizeof(struct iphdr))) + return 0; +--- a/net/sched/em_meta.c ++++ b/net/sched/em_meta.c +@@ -195,7 +195,7 @@ META_COLLECTOR(int_priority) + META_COLLECTOR(int_protocol) + { + /* Let userspace take care of the byte ordering */ +- dst->value = tc_skb_protocol(skb); ++ dst->value = skb_protocol(skb, false); + } + + META_COLLECTOR(int_pkttype) +--- a/net/sched/sch_cake.c ++++ b/net/sched/sch_cake.c +@@ -591,7 +591,7 @@ static void cake_update_flowkeys(struct + struct nf_conntrack_tuple tuple = {}; + bool rev = !skb->_nfct; + +- if (tc_skb_protocol(skb) != htons(ETH_P_IP)) ++ if (skb_protocol(skb, true) != htons(ETH_P_IP)) + return; + + if (!nf_ct_get_tuple_skb(&tuple, skb)) +@@ -1520,7 +1520,7 @@ static u8 cake_handle_diffserv(struct sk + u16 *buf, buf_; + u8 dscp; + +- switch (tc_skb_protocol(skb)) { ++ switch (skb_protocol(skb, true)) { + case htons(ETH_P_IP): + buf = skb_header_pointer(skb, offset, sizeof(buf_), &buf_); + if (unlikely(!buf)) +--- a/net/sched/sch_dsmark.c ++++ b/net/sched/sch_dsmark.c +@@ -210,7 +210,7 @@ static int dsmark_enqueue(struct sk_buff + if (p->set_tc_index) { + int wlen = skb_network_offset(skb); + +- switch (tc_skb_protocol(skb)) { ++ switch (skb_protocol(skb, true)) { + case htons(ETH_P_IP): + wlen += sizeof(struct iphdr); + if (!pskb_may_pull(skb, wlen) || +@@ -303,7 +303,7 @@ static struct sk_buff *dsmark_dequeue(st + index = skb->tc_index & (p->indices - 1); + pr_debug("index %d->%d\n", skb->tc_index, index); + +- switch (tc_skb_protocol(skb)) { ++ switch (skb_protocol(skb, true)) { + case htons(ETH_P_IP): + ipv4_change_dsfield(ip_hdr(skb), p->mv[index].mask, + p->mv[index].value); +@@ -320,7 +320,7 @@ static struct sk_buff *dsmark_dequeue(st + */ + if (p->mv[index].mask != 0xff || p->mv[index].value) + pr_warn("%s: unsupported protocol %d\n", +- __func__, ntohs(tc_skb_protocol(skb))); ++ __func__, ntohs(skb_protocol(skb, true))); + break; + } + +--- a/net/sched/sch_teql.c ++++ b/net/sched/sch_teql.c +@@ -239,7 +239,7 @@ __teql_resolve(struct sk_buff *skb, stru + char haddr[MAX_ADDR_LEN]; + + neigh_ha_snapshot(haddr, n, dev); +- err = dev_hard_header(skb, dev, ntohs(tc_skb_protocol(skb)), ++ err = dev_hard_header(skb, dev, ntohs(skb_protocol(skb, false)), + haddr, NULL, skb->len); + + if (err < 0) diff --git a/queue-5.7/series b/queue-5.7/series index e69de29bb2d..dbe252556e4 100644 --- a/queue-5.7/series +++ b/queue-5.7/series @@ -0,0 +1,31 @@ +bridge-mcast-fix-mld2-report-ipv6-payload-length-check.patch +genetlink-remove-genl_bind.patch +ipv4-fill-fl4_icmp_-type-code-in-ping_v4_sendmsg.patch +ipv6-fib6_select_path-can-not-use-out-path-for-nexthop-objects.patch +ipv6-fix-use-of-anycast-address-with-loopback.patch +l2tp-remove-skb_dst_set-from-l2tp_xmit_skb.patch +llc-make-sure-applications-use-arphrd_ether.patch +net-added-pointer-check-for-dst-ops-neigh_lookup-in-dst_neigh_lookup_skb.patch +net-dsa-microchip-set-the-correct-number-of-ports.patch +net-qrtr-free-flow-in-__qrtr_node_release.patch +net_sched-fix-a-memory-leak-in-atm_tc_init.patch +net-usb-qmi_wwan-add-support-for-quectel-eg95-lte-modem.patch +sched-consistently-handle-layer3-header-accesses-in-the-presence-of-vlans.patch +tcp-fix-so_rcvlowat-possible-hangs-under-high-mem-pressure.patch +tcp-make-sure-listeners-don-t-initialize-congestion-control-state.patch +tcp-md5-add-missing-memory-barriers-in-tcp_md5_do_add-tcp_md5_hash_key.patch +tcp-md5-do-not-send-silly-options-in-syncookies.patch +vlan-consolidate-vlan-parsing-code-and-limit-max-parsing-depth.patch +tcp-md5-refine-tcp_md5_do_add-tcp_md5_hash_key-barriers.patch +tcp-md5-allow-changing-md5-keys-in-all-socket-states.patch +cgroup-fix-cgroup_sk_alloc-for-sk_clone_lock.patch +cgroup-fix-sock_cgroup_data-on-big-endian.patch +net-ipa-always-check-for-stopped-channel.patch +net-ipa-introduce-ipa_cmd_tag_process.patch +ip-fix-so_mark-in-rst-ack-and-icmp-packets.patch +genetlink-get-rid-of-family-attrbuf.patch +net-ipv4-fix-wrong-type-conversion-from-hint-to-rt-in-ip_route_use_hint.patch +ethtool-fix-genlmsg_put-failure-handling-in-ethnl_default_dumpit.patch +mptcp-fix-dss-map-generation-on-fin-retransmission.patch +net-rmnet-do-not-allow-to-add-multiple-bridge-interfaces.patch +hsr-fix-interface-leak-in-error-path-of-hsr_dev_finalize.patch diff --git a/queue-5.7/tcp-fix-so_rcvlowat-possible-hangs-under-high-mem-pressure.patch b/queue-5.7/tcp-fix-so_rcvlowat-possible-hangs-under-high-mem-pressure.patch new file mode 100644 index 00000000000..06423127a32 --- /dev/null +++ b/queue-5.7/tcp-fix-so_rcvlowat-possible-hangs-under-high-mem-pressure.patch @@ -0,0 +1,39 @@ +From foo@baz Fri 17 Jul 2020 08:39:20 AM CEST +From: Eric Dumazet +Date: Tue, 30 Jun 2020 13:51:28 -0700 +Subject: tcp: fix SO_RCVLOWAT possible hangs under high mem pressure + +From: Eric Dumazet + +[ Upstream commit ba3bb0e76ccd464bb66665a1941fabe55dadb3ba ] + +Whenever tcp_try_rmem_schedule() returns an error, we are under +trouble and should make sure to wakeup readers so that they +can drain socket queues and eventually make room. + +Fixes: 03f45c883c6f ("tcp: avoid extra wakeups for SO_RCVLOWAT users") +Signed-off-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp_input.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/net/ipv4/tcp_input.c ++++ b/net/ipv4/tcp_input.c +@@ -4570,6 +4570,7 @@ static void tcp_data_queue_ofo(struct so + + if (unlikely(tcp_try_rmem_schedule(sk, skb, skb->truesize))) { + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFODROP); ++ sk->sk_data_ready(sk); + tcp_drop(sk, skb); + return; + } +@@ -4816,6 +4817,7 @@ queue_and_out: + sk_forced_mem_schedule(sk, skb->truesize); + else if (tcp_try_rmem_schedule(sk, skb, skb->truesize)) { + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRCVQDROP); ++ sk->sk_data_ready(sk); + goto drop; + } + diff --git a/queue-5.7/tcp-make-sure-listeners-don-t-initialize-congestion-control-state.patch b/queue-5.7/tcp-make-sure-listeners-don-t-initialize-congestion-control-state.patch new file mode 100644 index 00000000000..8bce11ff395 --- /dev/null +++ b/queue-5.7/tcp-make-sure-listeners-don-t-initialize-congestion-control-state.patch @@ -0,0 +1,145 @@ +From foo@baz Fri 17 Jul 2020 08:39:20 AM CEST +From: Christoph Paasch +Date: Wed, 8 Jul 2020 16:18:34 -0700 +Subject: tcp: make sure listeners don't initialize congestion-control state + +From: Christoph Paasch + +[ Upstream commit ce69e563b325f620863830c246a8698ccea52048 ] + +syzkaller found its way into setsockopt with TCP_CONGESTION "cdg". +tcp_cdg_init() does a kcalloc to store the gradients. As sk_clone_lock +just copies all the memory, the allocated pointer will be copied as +well, if the app called setsockopt(..., TCP_CONGESTION) on the listener. +If now the socket will be destroyed before the congestion-control +has properly been initialized (through a call to tcp_init_transfer), we +will end up freeing memory that does not belong to that particular +socket, opening the door to a double-free: + +[ 11.413102] ================================================================== +[ 11.414181] BUG: KASAN: double-free or invalid-free in tcp_cleanup_congestion_control+0x58/0xd0 +[ 11.415329] +[ 11.415560] CPU: 3 PID: 4884 Comm: syz-executor.5 Not tainted 5.8.0-rc2 #80 +[ 11.416544] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.1-0-ga5cab58e9a3f-prebuilt.qemu.org 04/01/2014 +[ 11.418148] Call Trace: +[ 11.418534] +[ 11.418834] dump_stack+0x7d/0xb0 +[ 11.419297] print_address_description.constprop.0+0x1a/0x210 +[ 11.422079] kasan_report_invalid_free+0x51/0x80 +[ 11.423433] __kasan_slab_free+0x15e/0x170 +[ 11.424761] kfree+0x8c/0x230 +[ 11.425157] tcp_cleanup_congestion_control+0x58/0xd0 +[ 11.425872] tcp_v4_destroy_sock+0x57/0x5a0 +[ 11.426493] inet_csk_destroy_sock+0x153/0x2c0 +[ 11.427093] tcp_v4_syn_recv_sock+0xb29/0x1100 +[ 11.427731] tcp_get_cookie_sock+0xc3/0x4a0 +[ 11.429457] cookie_v4_check+0x13d0/0x2500 +[ 11.433189] tcp_v4_do_rcv+0x60e/0x780 +[ 11.433727] tcp_v4_rcv+0x2869/0x2e10 +[ 11.437143] ip_protocol_deliver_rcu+0x23/0x190 +[ 11.437810] ip_local_deliver+0x294/0x350 +[ 11.439566] __netif_receive_skb_one_core+0x15d/0x1a0 +[ 11.441995] process_backlog+0x1b1/0x6b0 +[ 11.443148] net_rx_action+0x37e/0xc40 +[ 11.445361] __do_softirq+0x18c/0x61a +[ 11.445881] asm_call_on_stack+0x12/0x20 +[ 11.446409] +[ 11.446716] do_softirq_own_stack+0x34/0x40 +[ 11.447259] do_softirq.part.0+0x26/0x30 +[ 11.447827] __local_bh_enable_ip+0x46/0x50 +[ 11.448406] ip_finish_output2+0x60f/0x1bc0 +[ 11.450109] __ip_queue_xmit+0x71c/0x1b60 +[ 11.451861] __tcp_transmit_skb+0x1727/0x3bb0 +[ 11.453789] tcp_rcv_state_process+0x3070/0x4d3a +[ 11.456810] tcp_v4_do_rcv+0x2ad/0x780 +[ 11.457995] __release_sock+0x14b/0x2c0 +[ 11.458529] release_sock+0x4a/0x170 +[ 11.459005] __inet_stream_connect+0x467/0xc80 +[ 11.461435] inet_stream_connect+0x4e/0xa0 +[ 11.462043] __sys_connect+0x204/0x270 +[ 11.465515] __x64_sys_connect+0x6a/0xb0 +[ 11.466088] do_syscall_64+0x3e/0x70 +[ 11.466617] entry_SYSCALL_64_after_hwframe+0x44/0xa9 +[ 11.467341] RIP: 0033:0x7f56046dc469 +[ 11.467844] Code: Bad RIP value. +[ 11.468282] RSP: 002b:00007f5604dccdd8 EFLAGS: 00000246 ORIG_RAX: 000000000000002a +[ 11.469326] RAX: ffffffffffffffda RBX: 000000000068bf00 RCX: 00007f56046dc469 +[ 11.470379] RDX: 0000000000000010 RSI: 0000000020000000 RDI: 0000000000000004 +[ 11.471311] RBP: 00000000ffffffff R08: 0000000000000000 R09: 0000000000000000 +[ 11.472286] R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 +[ 11.473341] R13: 000000000041427c R14: 00007f5604dcd5c0 R15: 0000000000000003 +[ 11.474321] +[ 11.474527] Allocated by task 4884: +[ 11.475031] save_stack+0x1b/0x40 +[ 11.475548] __kasan_kmalloc.constprop.0+0xc2/0xd0 +[ 11.476182] tcp_cdg_init+0xf0/0x150 +[ 11.476744] tcp_init_congestion_control+0x9b/0x3a0 +[ 11.477435] tcp_set_congestion_control+0x270/0x32f +[ 11.478088] do_tcp_setsockopt.isra.0+0x521/0x1a00 +[ 11.478744] __sys_setsockopt+0xff/0x1e0 +[ 11.479259] __x64_sys_setsockopt+0xb5/0x150 +[ 11.479895] do_syscall_64+0x3e/0x70 +[ 11.480395] entry_SYSCALL_64_after_hwframe+0x44/0xa9 +[ 11.481097] +[ 11.481321] Freed by task 4872: +[ 11.481783] save_stack+0x1b/0x40 +[ 11.482230] __kasan_slab_free+0x12c/0x170 +[ 11.482839] kfree+0x8c/0x230 +[ 11.483240] tcp_cleanup_congestion_control+0x58/0xd0 +[ 11.483948] tcp_v4_destroy_sock+0x57/0x5a0 +[ 11.484502] inet_csk_destroy_sock+0x153/0x2c0 +[ 11.485144] tcp_close+0x932/0xfe0 +[ 11.485642] inet_release+0xc1/0x1c0 +[ 11.486131] __sock_release+0xc0/0x270 +[ 11.486697] sock_close+0xc/0x10 +[ 11.487145] __fput+0x277/0x780 +[ 11.487632] task_work_run+0xeb/0x180 +[ 11.488118] __prepare_exit_to_usermode+0x15a/0x160 +[ 11.488834] do_syscall_64+0x4a/0x70 +[ 11.489326] entry_SYSCALL_64_after_hwframe+0x44/0xa9 + +Wei Wang fixed a part of these CDG-malloc issues with commit c12014440750 +("tcp: memset ca_priv data to 0 properly"). + +This patch here fixes the listener-scenario: We make sure that listeners +setting the congestion-control through setsockopt won't initialize it +(thus CDG never allocates on listeners). For those who use AF_UNSPEC to +reuse a socket, tcp_disconnect() is changed to cleanup afterwards. + +(The issue can be reproduced at least down to v4.4.x.) + +Cc: Wei Wang +Cc: Eric Dumazet +Fixes: 2b0a8c9eee81 ("tcp: add CDG congestion control") +Signed-off-by: Christoph Paasch +Signed-off-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp.c | 3 +++ + net/ipv4/tcp_cong.c | 2 +- + 2 files changed, 4 insertions(+), 1 deletion(-) + +--- a/net/ipv4/tcp.c ++++ b/net/ipv4/tcp.c +@@ -2635,6 +2635,9 @@ int tcp_disconnect(struct sock *sk, int + tp->window_clamp = 0; + tp->delivered = 0; + tp->delivered_ce = 0; ++ if (icsk->icsk_ca_ops->release) ++ icsk->icsk_ca_ops->release(sk); ++ memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv)); + tcp_set_ca_state(sk, TCP_CA_Open); + tp->is_sack_reneg = 0; + tcp_clear_retrans(tp); +--- a/net/ipv4/tcp_cong.c ++++ b/net/ipv4/tcp_cong.c +@@ -197,7 +197,7 @@ static void tcp_reinit_congestion_contro + icsk->icsk_ca_setsockopt = 1; + memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv)); + +- if (sk->sk_state != TCP_CLOSE) ++ if (!((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) + tcp_init_congestion_control(sk); + } + diff --git a/queue-5.7/tcp-md5-add-missing-memory-barriers-in-tcp_md5_do_add-tcp_md5_hash_key.patch b/queue-5.7/tcp-md5-add-missing-memory-barriers-in-tcp_md5_do_add-tcp_md5_hash_key.patch new file mode 100644 index 00000000000..66ba80bed88 --- /dev/null +++ b/queue-5.7/tcp-md5-add-missing-memory-barriers-in-tcp_md5_do_add-tcp_md5_hash_key.patch @@ -0,0 +1,64 @@ +From foo@baz Fri 17 Jul 2020 08:39:20 AM CEST +From: Eric Dumazet +Date: Tue, 30 Jun 2020 16:41:01 -0700 +Subject: tcp: md5: add missing memory barriers in tcp_md5_do_add()/tcp_md5_hash_key() + +From: Eric Dumazet + +[ Upstream commit 6a2febec338df7e7699a52d00b2e1207dcf65b28 ] + +MD5 keys are read with RCU protection, and tcp_md5_do_add() +might update in-place a prior key. + +Normally, typical RCU updates would allocate a new piece +of memory. In this case only key->key and key->keylen might +be updated, and we do not care if an incoming packet could +see the old key, the new one, or some intermediate value, +since changing the key on a live flow is known to be problematic +anyway. + +We only want to make sure that in the case key->keylen +is changed, cpus in tcp_md5_hash_key() wont try to use +uninitialized data, or crash because key->keylen was +read twice to feed sg_init_one() and ahash_request_set_crypt() + +Fixes: 9ea88a153001 ("tcp: md5: check md5 signature without socket lock") +Signed-off-by: Eric Dumazet +Cc: Mathieu Desnoyers +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp.c | 7 +++++-- + net/ipv4/tcp_ipv4.c | 3 +++ + 2 files changed, 8 insertions(+), 2 deletions(-) + +--- a/net/ipv4/tcp.c ++++ b/net/ipv4/tcp.c +@@ -3880,10 +3880,13 @@ EXPORT_SYMBOL(tcp_md5_hash_skb_data); + + int tcp_md5_hash_key(struct tcp_md5sig_pool *hp, const struct tcp_md5sig_key *key) + { ++ u8 keylen = key->keylen; + struct scatterlist sg; + +- sg_init_one(&sg, key->key, key->keylen); +- ahash_request_set_crypt(hp->md5_req, &sg, NULL, key->keylen); ++ smp_rmb(); /* paired with smp_wmb() in tcp_md5_do_add() */ ++ ++ sg_init_one(&sg, key->key, keylen); ++ ahash_request_set_crypt(hp->md5_req, &sg, NULL, keylen); + return crypto_ahash_update(hp->md5_req); + } + EXPORT_SYMBOL(tcp_md5_hash_key); +--- a/net/ipv4/tcp_ipv4.c ++++ b/net/ipv4/tcp_ipv4.c +@@ -1105,6 +1105,9 @@ int tcp_md5_do_add(struct sock *sk, cons + if (key) { + /* Pre-existing entry - just update that one. */ + memcpy(key->key, newkey, newkeylen); ++ ++ smp_wmb(); /* pairs with smp_rmb() in tcp_md5_hash_key() */ ++ + key->keylen = newkeylen; + return 0; + } diff --git a/queue-5.7/tcp-md5-allow-changing-md5-keys-in-all-socket-states.patch b/queue-5.7/tcp-md5-allow-changing-md5-keys-in-all-socket-states.patch new file mode 100644 index 00000000000..d4e20093ef8 --- /dev/null +++ b/queue-5.7/tcp-md5-allow-changing-md5-keys-in-all-socket-states.patch @@ -0,0 +1,66 @@ +From foo@baz Fri 17 Jul 2020 08:39:20 AM CEST +From: Eric Dumazet +Date: Wed, 1 Jul 2020 18:39:33 -0700 +Subject: tcp: md5: allow changing MD5 keys in all socket states + +From: Eric Dumazet + +[ Upstream commit 1ca0fafd73c5268e8fc4b997094b8bb2bfe8deea ] + +This essentially reverts commit 721230326891 ("tcp: md5: reject TCP_MD5SIG +or TCP_MD5SIG_EXT on established sockets") + +Mathieu reported that many vendors BGP implementations can +actually switch TCP MD5 on established flows. + +Quoting Mathieu : + Here is a list of a few network vendors along with their behavior + with respect to TCP MD5: + + - Cisco: Allows for password to be changed, but within the hold-down + timer (~180 seconds). + - Juniper: When password is initially set on active connection it will + reset, but after that any subsequent password changes no network + resets. + - Nokia: No notes on if they flap the tcp connection or not. + - Ericsson/RedBack: Allows for 2 password (old/new) to co-exist until + both sides are ok with new passwords. + - Meta-Switch: Expects the password to be set before a connection is + attempted, but no further info on whether they reset the TCP + connection on a change. + - Avaya: Disable the neighbor, then set password, then re-enable. + - Zebos: Would normally allow the change when socket connected. + +We can revert my prior change because commit 9424e2e7ad93 ("tcp: md5: fix potential +overestimation of TCP option space") removed the leak of 4 kernel bytes to +the wire that was the main reason for my patch. + +While doing my investigations, I found a bug when a MD5 key is changed, leading +to these commits that stable teams want to consider before backporting this revert : + + Commit 6a2febec338d ("tcp: md5: add missing memory barriers in tcp_md5_do_add()/tcp_md5_hash_key()") + Commit e6ced831ef11 ("tcp: md5: refine tcp_md5_do_add()/tcp_md5_hash_key() barriers") + +Fixes: 721230326891 "tcp: md5: reject TCP_MD5SIG or TCP_MD5SIG_EXT on established sockets" +Signed-off-by: Eric Dumazet +Reported-by: Mathieu Desnoyers +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp.c | 5 +---- + 1 file changed, 1 insertion(+), 4 deletions(-) + +--- a/net/ipv4/tcp.c ++++ b/net/ipv4/tcp.c +@@ -3093,10 +3093,7 @@ static int do_tcp_setsockopt(struct sock + #ifdef CONFIG_TCP_MD5SIG + case TCP_MD5SIG: + case TCP_MD5SIG_EXT: +- if ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)) +- err = tp->af_specific->md5_parse(sk, optname, optval, optlen); +- else +- err = -EINVAL; ++ err = tp->af_specific->md5_parse(sk, optname, optval, optlen); + break; + #endif + case TCP_USER_TIMEOUT: diff --git a/queue-5.7/tcp-md5-do-not-send-silly-options-in-syncookies.patch b/queue-5.7/tcp-md5-do-not-send-silly-options-in-syncookies.patch new file mode 100644 index 00000000000..c7fb625ffc0 --- /dev/null +++ b/queue-5.7/tcp-md5-do-not-send-silly-options-in-syncookies.patch @@ -0,0 +1,82 @@ +From foo@baz Fri 17 Jul 2020 08:39:20 AM CEST +From: Eric Dumazet +Date: Wed, 1 Jul 2020 12:41:23 -0700 +Subject: tcp: md5: do not send silly options in SYNCOOKIES + +From: Eric Dumazet + +[ Upstream commit e114e1e8ac9d31f25b9dd873bab5d80c1fc482ca ] + +Whenever cookie_init_timestamp() has been used to encode +ECN,SACK,WSCALE options, we can not remove the TS option in the SYNACK. + +Otherwise, tcp_synack_options() will still advertize options like WSCALE +that we can not deduce later when receiving the packet from the client +to complete 3WHS. + +Note that modern linux TCP stacks wont use MD5+TS+SACK in a SYN packet, +but we can not know for sure that all TCP stacks have the same logic. + +Before the fix a tcpdump would exhibit this wrong exchange : + +10:12:15.464591 IP C > S: Flags [S], seq 4202415601, win 65535, options [nop,nop,md5 valid,mss 1400,sackOK,TS val 456965269 ecr 0,nop,wscale 8], length 0 +10:12:15.464602 IP S > C: Flags [S.], seq 253516766, ack 4202415602, win 65535, options [nop,nop,md5 valid,mss 1400,nop,nop,sackOK,nop,wscale 8], length 0 +10:12:15.464611 IP C > S: Flags [.], ack 1, win 256, options [nop,nop,md5 valid], length 0 +10:12:15.464678 IP C > S: Flags [P.], seq 1:13, ack 1, win 256, options [nop,nop,md5 valid], length 12 +10:12:15.464685 IP S > C: Flags [.], ack 13, win 65535, options [nop,nop,md5 valid], length 0 + +After this patch the exchange looks saner : + +11:59:59.882990 IP C > S: Flags [S], seq 517075944, win 65535, options [nop,nop,md5 valid,mss 1400,sackOK,TS val 1751508483 ecr 0,nop,wscale 8], length 0 +11:59:59.883002 IP S > C: Flags [S.], seq 1902939253, ack 517075945, win 65535, options [nop,nop,md5 valid,mss 1400,sackOK,TS val 1751508479 ecr 1751508483,nop,wscale 8], length 0 +11:59:59.883012 IP C > S: Flags [.], ack 1, win 256, options [nop,nop,md5 valid,nop,nop,TS val 1751508483 ecr 1751508479], length 0 +11:59:59.883114 IP C > S: Flags [P.], seq 1:13, ack 1, win 256, options [nop,nop,md5 valid,nop,nop,TS val 1751508483 ecr 1751508479], length 12 +11:59:59.883122 IP S > C: Flags [.], ack 13, win 256, options [nop,nop,md5 valid,nop,nop,TS val 1751508483 ecr 1751508483], length 0 +11:59:59.883152 IP S > C: Flags [P.], seq 1:13, ack 13, win 256, options [nop,nop,md5 valid,nop,nop,TS val 1751508484 ecr 1751508483], length 12 +11:59:59.883170 IP C > S: Flags [.], ack 13, win 256, options [nop,nop,md5 valid,nop,nop,TS val 1751508484 ecr 1751508484], length 0 + +Of course, no SACK block will ever be added later, but nothing should break. +Technically, we could remove the 4 nops included in MD5+TS options, +but again some stacks could break seeing not conventional alignment. + +Fixes: 4957faade11b ("TCPCT part 1g: Responder Cookie => Initiator") +Signed-off-by: Eric Dumazet +Cc: Florian Westphal +Cc: Mathieu Desnoyers +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp_output.c | 8 +++++--- + 1 file changed, 5 insertions(+), 3 deletions(-) + +--- a/net/ipv4/tcp_output.c ++++ b/net/ipv4/tcp_output.c +@@ -700,7 +700,8 @@ static unsigned int tcp_synack_options(c + unsigned int mss, struct sk_buff *skb, + struct tcp_out_options *opts, + const struct tcp_md5sig_key *md5, +- struct tcp_fastopen_cookie *foc) ++ struct tcp_fastopen_cookie *foc, ++ enum tcp_synack_type synack_type) + { + struct inet_request_sock *ireq = inet_rsk(req); + unsigned int remaining = MAX_TCP_OPTION_SPACE; +@@ -715,7 +716,8 @@ static unsigned int tcp_synack_options(c + * rather than TS in order to fit in better with old, + * buggy kernels, but that was deemed to be unnecessary. + */ +- ireq->tstamp_ok &= !ireq->sack_ok; ++ if (synack_type != TCP_SYNACK_COOKIE) ++ ireq->tstamp_ok &= !ireq->sack_ok; + } + #endif + +@@ -3388,7 +3390,7 @@ struct sk_buff *tcp_make_synack(const st + #endif + skb_set_hash(skb, tcp_rsk(req)->txhash, PKT_HASH_TYPE_L4); + tcp_header_size = tcp_synack_options(sk, req, mss, skb, &opts, md5, +- foc) + sizeof(*th); ++ foc, synack_type) + sizeof(*th); + + skb_push(skb, tcp_header_size); + skb_reset_transport_header(skb); diff --git a/queue-5.7/tcp-md5-refine-tcp_md5_do_add-tcp_md5_hash_key-barriers.patch b/queue-5.7/tcp-md5-refine-tcp_md5_do_add-tcp_md5_hash_key-barriers.patch new file mode 100644 index 00000000000..08e4d552ef7 --- /dev/null +++ b/queue-5.7/tcp-md5-refine-tcp_md5_do_add-tcp_md5_hash_key-barriers.patch @@ -0,0 +1,90 @@ +From foo@baz Fri 17 Jul 2020 08:39:20 AM CEST +From: Eric Dumazet +Date: Wed, 1 Jul 2020 11:43:04 -0700 +Subject: tcp: md5: refine tcp_md5_do_add()/tcp_md5_hash_key() barriers + +From: Eric Dumazet + +[ Upstream commit e6ced831ef11a2a06e8d00aad9d4fc05b610bf38 ] + +My prior fix went a bit too far, according to Herbert and Mathieu. + +Since we accept that concurrent TCP MD5 lookups might see inconsistent +keys, we can use READ_ONCE()/WRITE_ONCE() instead of smp_rmb()/smp_wmb() + +Clearing all key->key[] is needed to avoid possible KMSAN reports, +if key->keylen is increased. Since tcp_md5_do_add() is not fast path, +using __GFP_ZERO to clear all struct tcp_md5sig_key is simpler. + +data_race() was added in linux-5.8 and will prevent KCSAN reports, +this can safely be removed in stable backports, if data_race() is +not yet backported. + +v2: use data_race() both in tcp_md5_hash_key() and tcp_md5_do_add() + +Fixes: 6a2febec338d ("tcp: md5: add missing memory barriers in tcp_md5_do_add()/tcp_md5_hash_key()") +Signed-off-by: Eric Dumazet +Cc: Mathieu Desnoyers +Cc: Herbert Xu +Cc: Marco Elver +Reviewed-by: Mathieu Desnoyers +Acked-by: Herbert Xu +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp.c | 6 +++--- + net/ipv4/tcp_ipv4.c | 14 ++++++++++---- + 2 files changed, 13 insertions(+), 7 deletions(-) + +--- a/net/ipv4/tcp.c ++++ b/net/ipv4/tcp.c +@@ -3880,13 +3880,13 @@ EXPORT_SYMBOL(tcp_md5_hash_skb_data); + + int tcp_md5_hash_key(struct tcp_md5sig_pool *hp, const struct tcp_md5sig_key *key) + { +- u8 keylen = key->keylen; ++ u8 keylen = READ_ONCE(key->keylen); /* paired with WRITE_ONCE() in tcp_md5_do_add */ + struct scatterlist sg; + +- smp_rmb(); /* paired with smp_wmb() in tcp_md5_do_add() */ +- + sg_init_one(&sg, key->key, keylen); + ahash_request_set_crypt(hp->md5_req, &sg, NULL, keylen); ++ ++ /* tcp_md5_do_add() might change key->key under us */ + return crypto_ahash_update(hp->md5_req); + } + EXPORT_SYMBOL(tcp_md5_hash_key); +--- a/net/ipv4/tcp_ipv4.c ++++ b/net/ipv4/tcp_ipv4.c +@@ -1103,12 +1103,18 @@ int tcp_md5_do_add(struct sock *sk, cons + + key = tcp_md5_do_lookup_exact(sk, addr, family, prefixlen, l3index); + if (key) { +- /* Pre-existing entry - just update that one. */ ++ /* Pre-existing entry - just update that one. ++ * Note that the key might be used concurrently. ++ */ + memcpy(key->key, newkey, newkeylen); + +- smp_wmb(); /* pairs with smp_rmb() in tcp_md5_hash_key() */ ++ /* Pairs with READ_ONCE() in tcp_md5_hash_key(). ++ * Also note that a reader could catch new key->keylen value ++ * but old key->key[], this is the reason we use __GFP_ZERO ++ * at sock_kmalloc() time below these lines. ++ */ ++ WRITE_ONCE(key->keylen, newkeylen); + +- key->keylen = newkeylen; + return 0; + } + +@@ -1124,7 +1130,7 @@ int tcp_md5_do_add(struct sock *sk, cons + rcu_assign_pointer(tp->md5sig_info, md5sig); + } + +- key = sock_kmalloc(sk, sizeof(*key), gfp); ++ key = sock_kmalloc(sk, sizeof(*key), gfp | __GFP_ZERO); + if (!key) + return -ENOMEM; + if (!tcp_alloc_md5sig_pool()) { diff --git a/queue-5.7/vlan-consolidate-vlan-parsing-code-and-limit-max-parsing-depth.patch b/queue-5.7/vlan-consolidate-vlan-parsing-code-and-limit-max-parsing-depth.patch new file mode 100644 index 00000000000..953c7803514 --- /dev/null +++ b/queue-5.7/vlan-consolidate-vlan-parsing-code-and-limit-max-parsing-depth.patch @@ -0,0 +1,136 @@ +From foo@baz Fri 17 Jul 2020 08:39:20 AM CEST +From: "Toke Høiland-Jørgensen" +Date: Tue, 7 Jul 2020 13:03:25 +0200 +Subject: vlan: consolidate VLAN parsing code and limit max parsing depth + +From: "Toke Høiland-Jørgensen" + +[ Upstream commit 469aceddfa3ed16e17ee30533fae45e90f62efd8 ] + +Toshiaki pointed out that we now have two very similar functions to extract +the L3 protocol number in the presence of VLAN tags. And Daniel pointed out +that the unbounded parsing loop makes it possible for maliciously crafted +packets to loop through potentially hundreds of tags. + +Fix both of these issues by consolidating the two parsing functions and +limiting the VLAN tag parsing to a max depth of 8 tags. As part of this, +switch over __vlan_get_protocol() to use skb_header_pointer() instead of +pskb_may_pull(), to avoid the possible side effects of the latter and keep +the skb pointer 'const' through all the parsing functions. + +v2: +- Use limit of 8 tags instead of 32 (matching XMIT_RECURSION_LIMIT) + +Reported-by: Toshiaki Makita +Reported-by: Daniel Borkmann +Fixes: d7bf2ebebc2b ("sched: consistently handle layer3 header accesses in the presence of VLANs") +Signed-off-by: Toke Høiland-Jørgensen +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/if_vlan.h | 57 ++++++++++++++++++------------------------------ + 1 file changed, 22 insertions(+), 35 deletions(-) + +--- a/include/linux/if_vlan.h ++++ b/include/linux/if_vlan.h +@@ -25,6 +25,8 @@ + #define VLAN_ETH_DATA_LEN 1500 /* Max. octets in payload */ + #define VLAN_ETH_FRAME_LEN 1518 /* Max. octets in frame sans FCS */ + ++#define VLAN_MAX_DEPTH 8 /* Max. number of nested VLAN tags parsed */ ++ + /* + * struct vlan_hdr - vlan header + * @h_vlan_TCI: priority and VLAN ID +@@ -308,34 +310,6 @@ static inline bool eth_type_vlan(__be16 + } + } + +-/* A getter for the SKB protocol field which will handle VLAN tags consistently +- * whether VLAN acceleration is enabled or not. +- */ +-static inline __be16 skb_protocol(const struct sk_buff *skb, bool skip_vlan) +-{ +- unsigned int offset = skb_mac_offset(skb) + sizeof(struct ethhdr); +- __be16 proto = skb->protocol; +- +- if (!skip_vlan) +- /* VLAN acceleration strips the VLAN header from the skb and +- * moves it to skb->vlan_proto +- */ +- return skb_vlan_tag_present(skb) ? skb->vlan_proto : proto; +- +- while (eth_type_vlan(proto)) { +- struct vlan_hdr vhdr, *vh; +- +- vh = skb_header_pointer(skb, offset, sizeof(vhdr), &vhdr); +- if (!vh) +- break; +- +- proto = vh->h_vlan_encapsulated_proto; +- offset += sizeof(vhdr); +- } +- +- return proto; +-} +- + static inline bool vlan_hw_offload_capable(netdev_features_t features, + __be16 proto) + { +@@ -605,10 +579,10 @@ static inline int vlan_get_tag(const str + * Returns the EtherType of the packet, regardless of whether it is + * vlan encapsulated (normal or hardware accelerated) or not. + */ +-static inline __be16 __vlan_get_protocol(struct sk_buff *skb, __be16 type, ++static inline __be16 __vlan_get_protocol(const struct sk_buff *skb, __be16 type, + int *depth) + { +- unsigned int vlan_depth = skb->mac_len; ++ unsigned int vlan_depth = skb->mac_len, parse_depth = VLAN_MAX_DEPTH; + + /* if type is 802.1Q/AD then the header should already be + * present at mac_len - VLAN_HLEN (if mac_len > 0), or at +@@ -623,13 +597,12 @@ static inline __be16 __vlan_get_protocol + vlan_depth = ETH_HLEN; + } + do { +- struct vlan_hdr *vh; ++ struct vlan_hdr vhdr, *vh; + +- if (unlikely(!pskb_may_pull(skb, +- vlan_depth + VLAN_HLEN))) ++ vh = skb_header_pointer(skb, vlan_depth, sizeof(vhdr), &vhdr); ++ if (unlikely(!vh || !--parse_depth)) + return 0; + +- vh = (struct vlan_hdr *)(skb->data + vlan_depth); + type = vh->h_vlan_encapsulated_proto; + vlan_depth += VLAN_HLEN; + } while (eth_type_vlan(type)); +@@ -648,11 +621,25 @@ static inline __be16 __vlan_get_protocol + * Returns the EtherType of the packet, regardless of whether it is + * vlan encapsulated (normal or hardware accelerated) or not. + */ +-static inline __be16 vlan_get_protocol(struct sk_buff *skb) ++static inline __be16 vlan_get_protocol(const struct sk_buff *skb) + { + return __vlan_get_protocol(skb, skb->protocol, NULL); + } + ++/* A getter for the SKB protocol field which will handle VLAN tags consistently ++ * whether VLAN acceleration is enabled or not. ++ */ ++static inline __be16 skb_protocol(const struct sk_buff *skb, bool skip_vlan) ++{ ++ if (!skip_vlan) ++ /* VLAN acceleration strips the VLAN header from the skb and ++ * moves it to skb->vlan_proto ++ */ ++ return skb_vlan_tag_present(skb) ? skb->vlan_proto : skb->protocol; ++ ++ return vlan_get_protocol(skb); ++} ++ + static inline void vlan_set_encap_proto(struct sk_buff *skb, + struct vlan_hdr *vhdr) + { -- 2.47.3