]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
5.9-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 2 Dec 2020 09:55:20 +0000 (10:55 +0100)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 2 Dec 2020 09:55:20 +0000 (10:55 +0100)
added patches:
devlink-hold-rtnl-lock-while-reading-netdev-attributes.patch
devlink-make-sure-devlink-instance-and-port-are-in-same-net-namespace.patch
ipv6-addrlabel-fix-possible-memory-leak-in-ip6addrlbl_net_init.patch
mptcp-fix-null-ptr-dereference-on-bad-mpj.patch
net-af_iucv-set-correct-sk_protocol-for-child-sockets.patch
net-openvswitch-fix-ttl-decrement-action-netlink-message-format.patch
net-packet-fix-packet-receive-on-l3-devices-without-visible-hard-header.patch
net-tls-missing-received-data-after-fast-remote-close.patch
net-tls-protect-from-calling-tls_dev_del-for-tls-rx-twice.patch
rose-fix-null-pointer-dereference-in-rose_send_frame.patch
sock-set-sk_err-to-ee_errno-on-dequeue-from-errq.patch
tcp-set-inet_ecn_xmit-configuration-in-tcp_reinit_congestion_control.patch
tun-honor-iocb_nowait-flag.patch
usbnet-ipheth-fix-connectivity-with-ios-14.patch
vsock-virtio-discard-packets-only-when-socket-is-really-closed.patch

16 files changed:
queue-5.9/devlink-hold-rtnl-lock-while-reading-netdev-attributes.patch [new file with mode: 0644]
queue-5.9/devlink-make-sure-devlink-instance-and-port-are-in-same-net-namespace.patch [new file with mode: 0644]
queue-5.9/ipv6-addrlabel-fix-possible-memory-leak-in-ip6addrlbl_net_init.patch [new file with mode: 0644]
queue-5.9/mptcp-fix-null-ptr-dereference-on-bad-mpj.patch [new file with mode: 0644]
queue-5.9/net-af_iucv-set-correct-sk_protocol-for-child-sockets.patch [new file with mode: 0644]
queue-5.9/net-openvswitch-fix-ttl-decrement-action-netlink-message-format.patch [new file with mode: 0644]
queue-5.9/net-packet-fix-packet-receive-on-l3-devices-without-visible-hard-header.patch [new file with mode: 0644]
queue-5.9/net-tls-missing-received-data-after-fast-remote-close.patch [new file with mode: 0644]
queue-5.9/net-tls-protect-from-calling-tls_dev_del-for-tls-rx-twice.patch [new file with mode: 0644]
queue-5.9/rose-fix-null-pointer-dereference-in-rose_send_frame.patch [new file with mode: 0644]
queue-5.9/series [new file with mode: 0644]
queue-5.9/sock-set-sk_err-to-ee_errno-on-dequeue-from-errq.patch [new file with mode: 0644]
queue-5.9/tcp-set-inet_ecn_xmit-configuration-in-tcp_reinit_congestion_control.patch [new file with mode: 0644]
queue-5.9/tun-honor-iocb_nowait-flag.patch [new file with mode: 0644]
queue-5.9/usbnet-ipheth-fix-connectivity-with-ios-14.patch [new file with mode: 0644]
queue-5.9/vsock-virtio-discard-packets-only-when-socket-is-really-closed.patch [new file with mode: 0644]

diff --git a/queue-5.9/devlink-hold-rtnl-lock-while-reading-netdev-attributes.patch b/queue-5.9/devlink-hold-rtnl-lock-while-reading-netdev-attributes.patch
new file mode 100644 (file)
index 0000000..f74f5b1
--- /dev/null
@@ -0,0 +1,55 @@
+From foo@baz Wed Dec  2 10:40:54 AM CET 2020
+From: Parav Pandit <parav@nvidia.com>
+Date: Wed, 25 Nov 2020 11:16:19 +0200
+Subject: devlink: Hold rtnl lock while reading netdev attributes
+
+From: Parav Pandit <parav@nvidia.com>
+
+[ Upstream commit b187c9b4178b87954dbc94e78a7094715794714f ]
+
+A netdevice of a devlink port can be moved to different net namespace
+than its parent devlink instance.
+This scenario occurs when devlink reload is not used.
+
+When netdevice is undergoing migration to net namespace, its ifindex
+and name may change.
+
+In such use case, devlink port query may read stale netdev attributes.
+
+Fix it by reading them under rtnl lock.
+
+Fixes: bfcd3a466172 ("Introduce devlink infrastructure")
+Signed-off-by: Parav Pandit <parav@nvidia.com>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/devlink.c |    4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/net/core/devlink.c
++++ b/net/core/devlink.c
+@@ -616,6 +616,8 @@ static int devlink_nl_port_fill(struct s
+       if (nla_put_u32(msg, DEVLINK_ATTR_PORT_INDEX, devlink_port->index))
+               goto nla_put_failure;
++      /* Hold rtnl lock while accessing port's netdev attributes. */
++      rtnl_lock();
+       spin_lock_bh(&devlink_port->type_lock);
+       if (nla_put_u16(msg, DEVLINK_ATTR_PORT_TYPE, devlink_port->type))
+               goto nla_put_failure_type_locked;
+@@ -642,6 +644,7 @@ static int devlink_nl_port_fill(struct s
+                       goto nla_put_failure_type_locked;
+       }
+       spin_unlock_bh(&devlink_port->type_lock);
++      rtnl_unlock();
+       if (devlink_nl_port_attrs_put(msg, devlink_port))
+               goto nla_put_failure;
+       if (devlink_nl_port_function_attrs_put(msg, devlink_port, extack))
+@@ -652,6 +655,7 @@ static int devlink_nl_port_fill(struct s
+ nla_put_failure_type_locked:
+       spin_unlock_bh(&devlink_port->type_lock);
++      rtnl_unlock();
+ nla_put_failure:
+       genlmsg_cancel(msg, hdr);
+       return -EMSGSIZE;
diff --git a/queue-5.9/devlink-make-sure-devlink-instance-and-port-are-in-same-net-namespace.patch b/queue-5.9/devlink-make-sure-devlink-instance-and-port-are-in-same-net-namespace.patch
new file mode 100644 (file)
index 0000000..b56fb23
--- /dev/null
@@ -0,0 +1,38 @@
+From foo@baz Wed Dec  2 10:40:54 AM CET 2020
+From: Parav Pandit <parav@nvidia.com>
+Date: Wed, 25 Nov 2020 11:16:20 +0200
+Subject: devlink: Make sure devlink instance and port are in same net namespace
+
+From: Parav Pandit <parav@nvidia.com>
+
+[ Upstream commit a7b43649507dae4e55ff0087cad4e4dd1c6d5b99 ]
+
+When devlink reload operation is not used, netdev of an Ethernet port may
+be present in different net namespace than the net namespace of the
+devlink instance.
+
+Ensure that both the devlink instance and devlink port netdev are located
+in same net namespace.
+
+Fixes: 070c63f20f6c ("net: devlink: allow to change namespaces during reload")
+Signed-off-by: Parav Pandit <parav@nvidia.com>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/devlink.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/net/core/devlink.c
++++ b/net/core/devlink.c
+@@ -626,9 +626,10 @@ static int devlink_nl_port_fill(struct s
+                       devlink_port->desired_type))
+               goto nla_put_failure_type_locked;
+       if (devlink_port->type == DEVLINK_PORT_TYPE_ETH) {
++              struct net *net = devlink_net(devlink_port->devlink);
+               struct net_device *netdev = devlink_port->type_dev;
+-              if (netdev &&
++              if (netdev && net_eq(net, dev_net(netdev)) &&
+                   (nla_put_u32(msg, DEVLINK_ATTR_PORT_NETDEV_IFINDEX,
+                                netdev->ifindex) ||
+                    nla_put_string(msg, DEVLINK_ATTR_PORT_NETDEV_NAME,
diff --git a/queue-5.9/ipv6-addrlabel-fix-possible-memory-leak-in-ip6addrlbl_net_init.patch b/queue-5.9/ipv6-addrlabel-fix-possible-memory-leak-in-ip6addrlbl_net_init.patch
new file mode 100644 (file)
index 0000000..1065295
--- /dev/null
@@ -0,0 +1,84 @@
+From foo@baz Wed Dec  2 10:40:54 AM CET 2020
+From: Wang Hai <wanghai38@huawei.com>
+Date: Tue, 24 Nov 2020 15:17:28 +0800
+Subject: ipv6: addrlabel: fix possible memory leak in ip6addrlbl_net_init
+
+From: Wang Hai <wanghai38@huawei.com>
+
+[ Upstream commit e255e11e66da8281e337e4e352956e8a4999fca4 ]
+
+kmemleak report a memory leak as follows:
+
+unreferenced object 0xffff8880059c6a00 (size 64):
+  comm "ip", pid 23696, jiffies 4296590183 (age 1755.384s)
+  hex dump (first 32 bytes):
+    20 01 00 10 00 00 00 00 00 00 00 00 00 00 00 00   ...............
+    1c 00 00 00 00 00 00 00 00 00 00 00 07 00 00 00  ................
+  backtrace:
+    [<00000000aa4e7a87>] ip6addrlbl_add+0x90/0xbb0
+    [<0000000070b8d7f1>] ip6addrlbl_net_init+0x109/0x170
+    [<000000006a9ca9d4>] ops_init+0xa8/0x3c0
+    [<000000002da57bf2>] setup_net+0x2de/0x7e0
+    [<000000004e52d573>] copy_net_ns+0x27d/0x530
+    [<00000000b07ae2b4>] create_new_namespaces+0x382/0xa30
+    [<000000003b76d36f>] unshare_nsproxy_namespaces+0xa1/0x1d0
+    [<0000000030653721>] ksys_unshare+0x3a4/0x780
+    [<0000000007e82e40>] __x64_sys_unshare+0x2d/0x40
+    [<0000000031a10c08>] do_syscall_64+0x33/0x40
+    [<0000000099df30e7>] entry_SYSCALL_64_after_hwframe+0x44/0xa9
+
+We should free all rules when we catch an error in ip6addrlbl_net_init().
+otherwise a memory leak will occur.
+
+Fixes: 2a8cc6c89039 ("[IPV6] ADDRCONF: Support RFC3484 configurable address selection policy table.")
+Reported-by: Hulk Robot <hulkci@huawei.com>
+Signed-off-by: Wang Hai <wanghai38@huawei.com>
+Link: https://lore.kernel.org/r/20201124071728.8385-1-wanghai38@huawei.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/addrlabel.c |   26 +++++++++++++++++---------
+ 1 file changed, 17 insertions(+), 9 deletions(-)
+
+--- a/net/ipv6/addrlabel.c
++++ b/net/ipv6/addrlabel.c
+@@ -306,7 +306,9 @@ static int ip6addrlbl_del(struct net *ne
+ /* add default label */
+ static int __net_init ip6addrlbl_net_init(struct net *net)
+ {
+-      int err = 0;
++      struct ip6addrlbl_entry *p = NULL;
++      struct hlist_node *n;
++      int err;
+       int i;
+       ADDRLABEL(KERN_DEBUG "%s\n", __func__);
+@@ -315,14 +317,20 @@ static int __net_init ip6addrlbl_net_ini
+       INIT_HLIST_HEAD(&net->ipv6.ip6addrlbl_table.head);
+       for (i = 0; i < ARRAY_SIZE(ip6addrlbl_init_table); i++) {
+-              int ret = ip6addrlbl_add(net,
+-                                       ip6addrlbl_init_table[i].prefix,
+-                                       ip6addrlbl_init_table[i].prefixlen,
+-                                       0,
+-                                       ip6addrlbl_init_table[i].label, 0);
+-              /* XXX: should we free all rules when we catch an error? */
+-              if (ret && (!err || err != -ENOMEM))
+-                      err = ret;
++              err = ip6addrlbl_add(net,
++                                   ip6addrlbl_init_table[i].prefix,
++                                   ip6addrlbl_init_table[i].prefixlen,
++                                   0,
++                                   ip6addrlbl_init_table[i].label, 0);
++              if (err)
++                      goto err_ip6addrlbl_add;
++      }
++      return 0;
++
++err_ip6addrlbl_add:
++      hlist_for_each_entry_safe(p, n, &net->ipv6.ip6addrlbl_table.head, list) {
++              hlist_del_rcu(&p->list);
++              kfree_rcu(p, rcu);
+       }
+       return err;
+ }
diff --git a/queue-5.9/mptcp-fix-null-ptr-dereference-on-bad-mpj.patch b/queue-5.9/mptcp-fix-null-ptr-dereference-on-bad-mpj.patch
new file mode 100644 (file)
index 0000000..0abf556
--- /dev/null
@@ -0,0 +1,47 @@
+From foo@baz Wed Dec  2 10:40:54 AM CET 2020
+From: Paolo Abeni <pabeni@redhat.com>
+Date: Thu, 26 Nov 2020 15:17:53 +0100
+Subject: mptcp: fix NULL ptr dereference on bad MPJ
+
+From: Paolo Abeni <pabeni@redhat.com>
+
+[ Upstream commit d3ab78858f1451351221061a1c365495df196500 ]
+
+If an msk listener receives an MPJ carrying an invalid token, it
+will zero the request socket msk entry. That should later
+cause fallback and subflow reset - as per RFC - at
+subflow_syn_recv_sock() time due to failing hmac validation.
+
+Since commit 4cf8b7e48a09 ("subflow: introduce and use
+mptcp_can_accept_new_subflow()"), we unconditionally dereference
+- in mptcp_can_accept_new_subflow - the subflow request msk
+before performing hmac validation. In the above scenario we
+hit a NULL ptr dereference.
+
+Address the issue doing the hmac validation earlier.
+
+Fixes: 4cf8b7e48a09 ("subflow: introduce and use mptcp_can_accept_new_subflow()")
+Tested-by: Davide Caratti <dcaratti@redhat.com>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Reviewed-by: Matthieu Baerts <matthieu.baerts@tessares.net>
+Link: https://lore.kernel.org/r/03b2cfa3ac80d8fc18272edc6442a9ddf0b1e34e.1606400227.git.pabeni@redhat.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/mptcp/subflow.c |    5 ++---
+ 1 file changed, 2 insertions(+), 3 deletions(-)
+
+--- a/net/mptcp/subflow.c
++++ b/net/mptcp/subflow.c
+@@ -542,9 +542,8 @@ create_msk:
+                       fallback = true;
+       } else if (subflow_req->mp_join) {
+               mptcp_get_options(skb, &mp_opt);
+-              if (!mp_opt.mp_join ||
+-                  !mptcp_can_accept_new_subflow(subflow_req->msk) ||
+-                  !subflow_hmac_valid(req, &mp_opt)) {
++              if (!mp_opt.mp_join || !subflow_hmac_valid(req, &mp_opt) ||
++                  !mptcp_can_accept_new_subflow(subflow_req->msk)) {
+                       SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKMAC);
+                       fallback = true;
+               }
diff --git a/queue-5.9/net-af_iucv-set-correct-sk_protocol-for-child-sockets.patch b/queue-5.9/net-af_iucv-set-correct-sk_protocol-for-child-sockets.patch
new file mode 100644 (file)
index 0000000..b40436c
--- /dev/null
@@ -0,0 +1,45 @@
+From foo@baz Wed Dec  2 10:40:54 AM CET 2020
+From: Julian Wiedmann <jwi@linux.ibm.com>
+Date: Fri, 20 Nov 2020 11:06:57 +0100
+Subject: net/af_iucv: set correct sk_protocol for child sockets
+
+From: Julian Wiedmann <jwi@linux.ibm.com>
+
+[ Upstream commit c5dab0941fcdc9664eb0ec0d4d51433216d91336 ]
+
+Child sockets erroneously inherit their parent's sk_type (ie. SOCK_*),
+instead of the PF_IUCV protocol that the parent was created with in
+iucv_sock_create().
+
+We're currently not using sk->sk_protocol ourselves, so this shouldn't
+have much impact (except eg. getting the output in skb_dump() right).
+
+Fixes: eac3731bd04c ("[S390]: Add AF_IUCV socket support")
+Signed-off-by: Julian Wiedmann <jwi@linux.ibm.com>
+Link: https://lore.kernel.org/r/20201120100657.34407-1-jwi@linux.ibm.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/iucv/af_iucv.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/net/iucv/af_iucv.c
++++ b/net/iucv/af_iucv.c
+@@ -1645,7 +1645,7 @@ static int iucv_callback_connreq(struct
+       }
+       /* Create the new socket */
+-      nsk = iucv_sock_alloc(NULL, sk->sk_type, GFP_ATOMIC, 0);
++      nsk = iucv_sock_alloc(NULL, sk->sk_protocol, GFP_ATOMIC, 0);
+       if (!nsk) {
+               err = pr_iucv->path_sever(path, user_data);
+               iucv_path_free(path);
+@@ -1851,7 +1851,7 @@ static int afiucv_hs_callback_syn(struct
+               goto out;
+       }
+-      nsk = iucv_sock_alloc(NULL, sk->sk_type, GFP_ATOMIC, 0);
++      nsk = iucv_sock_alloc(NULL, sk->sk_protocol, GFP_ATOMIC, 0);
+       bh_lock_sock(sk);
+       if ((sk->sk_state != IUCV_LISTEN) ||
+           sk_acceptq_is_full(sk) ||
diff --git a/queue-5.9/net-openvswitch-fix-ttl-decrement-action-netlink-message-format.patch b/queue-5.9/net-openvswitch-fix-ttl-decrement-action-netlink-message-format.patch
new file mode 100644 (file)
index 0000000..6a0c3b3
--- /dev/null
@@ -0,0 +1,170 @@
+From foo@baz Wed Dec  2 10:40:54 AM CET 2020
+From: Eelco Chaudron <echaudro@redhat.com>
+Date: Tue, 24 Nov 2020 07:34:44 -0500
+Subject: net: openvswitch: fix TTL decrement action netlink message format
+
+From: Eelco Chaudron <echaudro@redhat.com>
+
+[ Upstream commit 69929d4c49e182f8526d42c43b37b460d562d3a0 ]
+
+Currently, the openvswitch module is not accepting the correctly formated
+netlink message for the TTL decrement action. For both setting and getting
+the dec_ttl action, the actions should be nested in the
+OVS_DEC_TTL_ATTR_ACTION attribute as mentioned in the openvswitch.h uapi.
+
+When the original patch was sent, it was tested with a private OVS userspace
+implementation. This implementation was unfortunately not upstreamed and
+reviewed, hence an erroneous version of this patch was sent out.
+
+Leaving the patch as-is would cause problems as the kernel module could
+interpret additional attributes as actions and vice-versa, due to the
+actions not being encapsulated/nested within the actual attribute, but
+being concatinated after it.
+
+Fixes: 744676e77720 ("openvswitch: add TTL decrement action")
+Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
+Link: https://lore.kernel.org/r/160622121495.27296.888010441924340582.stgit@wsfd-netdev64.ntdv.lab.eng.bos.redhat.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/uapi/linux/openvswitch.h |    2 +
+ net/openvswitch/actions.c        |    7 +--
+ net/openvswitch/flow_netlink.c   |   74 ++++++++++++++++++++++++++++-----------
+ 3 files changed, 60 insertions(+), 23 deletions(-)
+
+--- a/include/uapi/linux/openvswitch.h
++++ b/include/uapi/linux/openvswitch.h
+@@ -1058,4 +1058,6 @@ enum ovs_dec_ttl_attr {
+       __OVS_DEC_TTL_ATTR_MAX
+ };
++#define OVS_DEC_TTL_ATTR_MAX (__OVS_DEC_TTL_ATTR_MAX - 1)
++
+ #endif /* _LINUX_OPENVSWITCH_H */
+--- a/net/openvswitch/actions.c
++++ b/net/openvswitch/actions.c
+@@ -970,14 +970,13 @@ static int dec_ttl_exception_handler(str
+ {
+       /* The first action is always 'OVS_DEC_TTL_ATTR_ARG'. */
+       struct nlattr *dec_ttl_arg = nla_data(attr);
+-      int rem = nla_len(attr);
+       if (nla_len(dec_ttl_arg)) {
+-              struct nlattr *actions = nla_next(dec_ttl_arg, &rem);
++              struct nlattr *actions = nla_data(dec_ttl_arg);
+               if (actions)
+-                      return clone_execute(dp, skb, key, 0, actions, rem,
+-                                           last, false);
++                      return clone_execute(dp, skb, key, 0, nla_data(actions),
++                                           nla_len(actions), last, false);
+       }
+       consume_skb(skb);
+       return 0;
+--- a/net/openvswitch/flow_netlink.c
++++ b/net/openvswitch/flow_netlink.c
+@@ -2503,28 +2503,42 @@ static int validate_and_copy_dec_ttl(str
+                                    __be16 eth_type, __be16 vlan_tci,
+                                    u32 mpls_label_count, bool log)
+ {
+-      int start, err;
+-      u32 nested = true;
++      const struct nlattr *attrs[OVS_DEC_TTL_ATTR_MAX + 1];
++      int start, action_start, err, rem;
++      const struct nlattr *a, *actions;
++
++      memset(attrs, 0, sizeof(attrs));
++      nla_for_each_nested(a, attr, rem) {
++              int type = nla_type(a);
++
++              /* Ignore unknown attributes to be future proof. */
++              if (type > OVS_DEC_TTL_ATTR_MAX)
++                      continue;
+-      if (!nla_len(attr))
+-              return ovs_nla_add_action(sfa, OVS_ACTION_ATTR_DEC_TTL,
+-                                        NULL, 0, log);
++              if (!type || attrs[type])
++                      return -EINVAL;
++
++              attrs[type] = a;
++      }
++
++      actions = attrs[OVS_DEC_TTL_ATTR_ACTION];
++      if (rem || !actions || (nla_len(actions) && nla_len(actions) < NLA_HDRLEN))
++              return -EINVAL;
+       start = add_nested_action_start(sfa, OVS_ACTION_ATTR_DEC_TTL, log);
+       if (start < 0)
+               return start;
+-      err = ovs_nla_add_action(sfa, OVS_DEC_TTL_ATTR_ACTION, &nested,
+-                               sizeof(nested), log);
+-
+-      if (err)
+-              return err;
++      action_start = add_nested_action_start(sfa, OVS_DEC_TTL_ATTR_ACTION, log);
++      if (action_start < 0)
++              return start;
+-      err = __ovs_nla_copy_actions(net, attr, key, sfa, eth_type,
++      err = __ovs_nla_copy_actions(net, actions, key, sfa, eth_type,
+                                    vlan_tci, mpls_label_count, log);
+       if (err)
+               return err;
++      add_nested_action_end(*sfa, action_start);
+       add_nested_action_end(*sfa, start);
+       return 0;
+ }
+@@ -3487,20 +3501,42 @@ out:
+ static int dec_ttl_action_to_attr(const struct nlattr *attr,
+                                 struct sk_buff *skb)
+ {
+-      int err = 0, rem = nla_len(attr);
+-      struct nlattr *start;
++      struct nlattr *start, *action_start;
++      const struct nlattr *a;
++      int err = 0, rem;
+       start = nla_nest_start_noflag(skb, OVS_ACTION_ATTR_DEC_TTL);
+-
+       if (!start)
+               return -EMSGSIZE;
+-      err = ovs_nla_put_actions(nla_data(attr), rem, skb);
+-      if (err)
+-              nla_nest_cancel(skb, start);
+-      else
+-              nla_nest_end(skb, start);
++      nla_for_each_attr(a, nla_data(attr), nla_len(attr), rem) {
++              switch (nla_type(a)) {
++              case OVS_DEC_TTL_ATTR_ACTION:
++
++                      action_start = nla_nest_start_noflag(skb, OVS_DEC_TTL_ATTR_ACTION);
++                      if (!action_start) {
++                              err = -EMSGSIZE;
++                              goto out;
++                      }
++                      err = ovs_nla_put_actions(nla_data(a), nla_len(a), skb);
++                      if (err)
++                              goto out;
++
++                      nla_nest_end(skb, action_start);
++                      break;
++
++              default:
++                      /* Ignore all other option to be future compatible */
++                      break;
++              }
++      }
++
++      nla_nest_end(skb, start);
++      return 0;
++
++out:
++      nla_nest_cancel(skb, start);
+       return err;
+ }
diff --git a/queue-5.9/net-packet-fix-packet-receive-on-l3-devices-without-visible-hard-header.patch b/queue-5.9/net-packet-fix-packet-receive-on-l3-devices-without-visible-hard-header.patch
new file mode 100644 (file)
index 0000000..177a5bb
--- /dev/null
@@ -0,0 +1,136 @@
+From foo@baz Wed Dec  2 10:40:54 AM CET 2020
+From: Eyal Birger <eyal.birger@gmail.com>
+Date: Sat, 21 Nov 2020 08:28:17 +0200
+Subject: net/packet: fix packet receive on L3 devices without visible hard header
+
+From: Eyal Birger <eyal.birger@gmail.com>
+
+[ Upstream commit d549699048b4b5c22dd710455bcdb76966e55aa3 ]
+
+In the patchset merged by commit b9fcf0a0d826
+("Merge branch 'support-AF_PACKET-for-layer-3-devices'") L3 devices which
+did not have header_ops were given one for the purpose of protocol parsing
+on af_packet transmit path.
+
+That change made af_packet receive path regard these devices as having a
+visible L3 header and therefore aligned incoming skb->data to point to the
+skb's mac_header. Some devices, such as ipip, xfrmi, and others, do not
+reset their mac_header prior to ingress and therefore their incoming
+packets became malformed.
+
+Ideally these devices would reset their mac headers, or af_packet would be
+able to rely on dev->hard_header_len being 0 for such cases, but it seems
+this is not the case.
+
+Fix by changing af_packet RX ll visibility criteria to include the
+existence of a '.create()' header operation, which is used when creating
+a device hard header - via dev_hard_header() - by upper layers, and does
+not exist in these L3 devices.
+
+As this predicate may be useful in other situations, add it as a common
+dev_has_header() helper in netdevice.h.
+
+Fixes: b9fcf0a0d826 ("Merge branch 'support-AF_PACKET-for-layer-3-devices'")
+Signed-off-by: Eyal Birger <eyal.birger@gmail.com>
+Acked-by: Jason A. Donenfeld <Jason@zx2c4.com>
+Acked-by: Willem de Bruijn <willemb@google.com>
+Link: https://lore.kernel.org/r/20201121062817.3178900-1-eyal.birger@gmail.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/netdevice.h |    5 +++++
+ net/packet/af_packet.c    |   38 +++++++++++++++++++++-----------------
+ 2 files changed, 26 insertions(+), 17 deletions(-)
+
+--- a/include/linux/netdevice.h
++++ b/include/linux/netdevice.h
+@@ -3103,6 +3103,11 @@ static inline bool dev_validate_header(c
+       return false;
+ }
++static inline bool dev_has_header(const struct net_device *dev)
++{
++      return dev->header_ops && dev->header_ops->create;
++}
++
+ typedef int gifconf_func_t(struct net_device * dev, char __user * bufptr,
+                          int len, int size);
+ int register_gifconf(unsigned int family, gifconf_func_t *gifconf);
+--- a/net/packet/af_packet.c
++++ b/net/packet/af_packet.c
+@@ -93,38 +93,42 @@
+ /*
+    Assumptions:
+-   - if device has no dev->hard_header routine, it adds and removes ll header
+-     inside itself. In this case ll header is invisible outside of device,
+-     but higher levels still should reserve dev->hard_header_len.
+-     Some devices are enough clever to reallocate skb, when header
+-     will not fit to reserved space (tunnel), another ones are silly
+-     (PPP).
++   - If the device has no dev->header_ops->create, there is no LL header
++     visible above the device. In this case, its hard_header_len should be 0.
++     The device may prepend its own header internally. In this case, its
++     needed_headroom should be set to the space needed for it to add its
++     internal header.
++     For example, a WiFi driver pretending to be an Ethernet driver should
++     set its hard_header_len to be the Ethernet header length, and set its
++     needed_headroom to be (the real WiFi header length - the fake Ethernet
++     header length).
+    - packet socket receives packets with pulled ll header,
+      so that SOCK_RAW should push it back.
+ On receive:
+ -----------
+-Incoming, dev->hard_header!=NULL
++Incoming, dev_has_header(dev) == true
+    mac_header -> ll header
+    data       -> data
+-Outgoing, dev->hard_header!=NULL
++Outgoing, dev_has_header(dev) == true
+    mac_header -> ll header
+    data       -> ll header
+-Incoming, dev->hard_header==NULL
+-   mac_header -> UNKNOWN position. It is very likely, that it points to ll
+-               header.  PPP makes it, that is wrong, because introduce
+-               assymetry between rx and tx paths.
++Incoming, dev_has_header(dev) == false
++   mac_header -> data
++     However drivers often make it point to the ll header.
++     This is incorrect because the ll header should be invisible to us.
+    data       -> data
+-Outgoing, dev->hard_header==NULL
+-   mac_header -> data. ll header is still not built!
++Outgoing, dev_has_header(dev) == false
++   mac_header -> data. ll header is invisible to us.
+    data       -> data
+ Resume
+-  If dev->hard_header==NULL we are unlikely to restore sensible ll header.
++  If dev_has_header(dev) == false we are unable to restore the ll header,
++    because it is invisible to us.
+ On transmit:
+@@ -2066,7 +2070,7 @@ static int packet_rcv(struct sk_buff *sk
+       skb->dev = dev;
+-      if (dev->header_ops) {
++      if (dev_has_header(dev)) {
+               /* The device has an explicit notion of ll header,
+                * exported to higher levels.
+                *
+@@ -2195,7 +2199,7 @@ static int tpacket_rcv(struct sk_buff *s
+       if (!net_eq(dev_net(dev), sock_net(sk)))
+               goto drop;
+-      if (dev->header_ops) {
++      if (dev_has_header(dev)) {
+               if (sk->sk_type != SOCK_DGRAM)
+                       skb_push(skb, skb->data - skb_mac_header(skb));
+               else if (skb->pkt_type == PACKET_OUTGOING) {
diff --git a/queue-5.9/net-tls-missing-received-data-after-fast-remote-close.patch b/queue-5.9/net-tls-missing-received-data-after-fast-remote-close.patch
new file mode 100644 (file)
index 0000000..f154028
--- /dev/null
@@ -0,0 +1,50 @@
+From foo@baz Wed Dec  2 10:40:54 AM CET 2020
+From: Vadim Fedorenko <vfedorenko@novek.ru>
+Date: Thu, 19 Nov 2020 18:59:48 +0300
+Subject: net/tls: missing received data after fast remote close
+
+From: Vadim Fedorenko <vfedorenko@novek.ru>
+
+[ Upstream commit 20ffc7adf53a5fd3d19751fbff7895bcca66686e ]
+
+In case when tcp socket received FIN after some data and the
+parser haven't started before reading data caller will receive
+an empty buffer. This behavior differs from plain TCP socket and
+leads to special treating in user-space.
+The flow that triggers the race is simple. Server sends small
+amount of data right after the connection is configured to use TLS
+and closes the connection. In this case receiver sees TLS Handshake
+data, configures TLS socket right after Change Cipher Spec record.
+While the configuration is in process, TCP socket receives small
+Application Data record, Encrypted Alert record and FIN packet. So
+the TCP socket changes sk_shutdown to RCV_SHUTDOWN and sk_flag with
+SK_DONE bit set. The received data is not parsed upon arrival and is
+never sent to user-space.
+
+Patch unpauses parser directly if we have unparsed data in tcp
+receive queue.
+
+Fixes: fcf4793e278e ("tls: check RCV_SHUTDOWN in tls_wait_data")
+Signed-off-by: Vadim Fedorenko <vfedorenko@novek.ru>
+Link: https://lore.kernel.org/r/1605801588-12236-1-git-send-email-vfedorenko@novek.ru
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/tls/tls_sw.c |    6 ++++++
+ 1 file changed, 6 insertions(+)
+
+--- a/net/tls/tls_sw.c
++++ b/net/tls/tls_sw.c
+@@ -1295,6 +1295,12 @@ static struct sk_buff *tls_wait_data(str
+                       return NULL;
+               }
++              if (!skb_queue_empty(&sk->sk_receive_queue)) {
++                      __strp_unpause(&ctx->strp);
++                      if (ctx->recv_pkt)
++                              return ctx->recv_pkt;
++              }
++
+               if (sk->sk_shutdown & RCV_SHUTDOWN)
+                       return NULL;
diff --git a/queue-5.9/net-tls-protect-from-calling-tls_dev_del-for-tls-rx-twice.patch b/queue-5.9/net-tls-protect-from-calling-tls_dev_del-for-tls-rx-twice.patch
new file mode 100644 (file)
index 0000000..5346d39
--- /dev/null
@@ -0,0 +1,65 @@
+From foo@baz Wed Dec  2 10:40:54 AM CET 2020
+From: Maxim Mikityanskiy <maximmi@mellanox.com>
+Date: Wed, 25 Nov 2020 14:18:10 -0800
+Subject: net/tls: Protect from calling tls_dev_del for TLS RX twice
+
+From: Maxim Mikityanskiy <maximmi@mellanox.com>
+
+[ Upstream commit 025cc2fb6a4e84e9a0552c0017dcd1c24b7ac7da ]
+
+tls_device_offload_cleanup_rx doesn't clear tls_ctx->netdev after
+calling tls_dev_del if TLX TX offload is also enabled. Clearing
+tls_ctx->netdev gets postponed until tls_device_gc_task. It leaves a
+time frame when tls_device_down may get called and call tls_dev_del for
+RX one extra time, confusing the driver, which may lead to a crash.
+
+This patch corrects this racy behavior by adding a flag to prevent
+tls_device_down from calling tls_dev_del the second time.
+
+Fixes: e8f69799810c ("net/tls: Add generic NIC offload infrastructure")
+Signed-off-by: Maxim Mikityanskiy <maximmi@mellanox.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Link: https://lore.kernel.org/r/20201125221810.69870-1-saeedm@nvidia.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/tls.h    |    6 ++++++
+ net/tls/tls_device.c |    5 ++++-
+ 2 files changed, 10 insertions(+), 1 deletion(-)
+
+--- a/include/net/tls.h
++++ b/include/net/tls.h
+@@ -199,6 +199,12 @@ enum tls_context_flags {
+        * to be atomic.
+        */
+       TLS_TX_SYNC_SCHED = 1,
++      /* tls_dev_del was called for the RX side, device state was released,
++       * but tls_ctx->netdev might still be kept, because TX-side driver
++       * resources might not be released yet. Used to prevent the second
++       * tls_dev_del call in tls_device_down if it happens simultaneously.
++       */
++      TLS_RX_DEV_CLOSED = 2,
+ };
+ struct cipher_context {
+--- a/net/tls/tls_device.c
++++ b/net/tls/tls_device.c
+@@ -1262,6 +1262,8 @@ void tls_device_offload_cleanup_rx(struc
+       if (tls_ctx->tx_conf != TLS_HW) {
+               dev_put(netdev);
+               tls_ctx->netdev = NULL;
++      } else {
++              set_bit(TLS_RX_DEV_CLOSED, &tls_ctx->flags);
+       }
+ out:
+       up_read(&device_offload_lock);
+@@ -1291,7 +1293,8 @@ static int tls_device_down(struct net_de
+               if (ctx->tx_conf == TLS_HW)
+                       netdev->tlsdev_ops->tls_dev_del(netdev, ctx,
+                                                       TLS_OFFLOAD_CTX_DIR_TX);
+-              if (ctx->rx_conf == TLS_HW)
++              if (ctx->rx_conf == TLS_HW &&
++                  !test_bit(TLS_RX_DEV_CLOSED, &ctx->flags))
+                       netdev->tlsdev_ops->tls_dev_del(netdev, ctx,
+                                                       TLS_OFFLOAD_CTX_DIR_RX);
+               WRITE_ONCE(ctx->netdev, NULL);
diff --git a/queue-5.9/rose-fix-null-pointer-dereference-in-rose_send_frame.patch b/queue-5.9/rose-fix-null-pointer-dereference-in-rose_send_frame.patch
new file mode 100644 (file)
index 0000000..154266e
--- /dev/null
@@ -0,0 +1,64 @@
+From foo@baz Wed Dec  2 10:40:54 AM CET 2020
+From: Anmol Karn <anmol.karan123@gmail.com>
+Date: Fri, 20 Nov 2020 00:40:43 +0530
+Subject: rose: Fix Null pointer dereference in rose_send_frame()
+
+From: Anmol Karn <anmol.karan123@gmail.com>
+
+[ Upstream commit 3b3fd068c56e3fbea30090859216a368398e39bf ]
+
+rose_send_frame() dereferences `neigh->dev` when called from
+rose_transmit_clear_request(), and the first occurrence of the
+`neigh` is in rose_loopback_timer() as `rose_loopback_neigh`,
+and it is initialized in rose_add_loopback_neigh() as NULL.
+i.e when `rose_loopback_neigh` used in rose_loopback_timer()
+its `->dev` was still NULL and rose_loopback_timer() was calling
+rose_rx_call_request() without checking for NULL.
+
+- net/rose/rose_link.c
+This bug seems to get triggered in this line:
+
+rose_call = (ax25_address *)neigh->dev->dev_addr;
+
+Fix it by adding NULL checking for `rose_loopback_neigh->dev`
+in rose_loopback_timer().
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Suggested-by: Jakub Kicinski <kuba@kernel.org>
+Reported-by: syzbot+a1c743815982d9496393@syzkaller.appspotmail.com
+Tested-by: syzbot+a1c743815982d9496393@syzkaller.appspotmail.com
+Link: https://syzkaller.appspot.com/bug?id=9d2a7ca8c7f2e4b682c97578dfa3f236258300b3
+Signed-off-by: Anmol Karn <anmol.karan123@gmail.com>
+Link: https://lore.kernel.org/r/20201119191043.28813-1-anmol.karan123@gmail.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/rose/rose_loopback.c |   17 +++++++++++++----
+ 1 file changed, 13 insertions(+), 4 deletions(-)
+
+--- a/net/rose/rose_loopback.c
++++ b/net/rose/rose_loopback.c
+@@ -96,10 +96,19 @@ static void rose_loopback_timer(struct t
+               }
+               if (frametype == ROSE_CALL_REQUEST) {
+-                      if ((dev = rose_dev_get(dest)) != NULL) {
+-                              if (rose_rx_call_request(skb, dev, rose_loopback_neigh, lci_o) == 0)
+-                                      kfree_skb(skb);
+-                      } else {
++                      if (!rose_loopback_neigh->dev) {
++                              kfree_skb(skb);
++                              continue;
++                      }
++
++                      dev = rose_dev_get(dest);
++                      if (!dev) {
++                              kfree_skb(skb);
++                              continue;
++                      }
++
++                      if (rose_rx_call_request(skb, dev, rose_loopback_neigh, lci_o) == 0) {
++                              dev_put(dev);
+                               kfree_skb(skb);
+                       }
+               } else {
diff --git a/queue-5.9/series b/queue-5.9/series
new file mode 100644 (file)
index 0000000..9b35755
--- /dev/null
@@ -0,0 +1,15 @@
+devlink-hold-rtnl-lock-while-reading-netdev-attributes.patch
+devlink-make-sure-devlink-instance-and-port-are-in-same-net-namespace.patch
+ipv6-addrlabel-fix-possible-memory-leak-in-ip6addrlbl_net_init.patch
+net-af_iucv-set-correct-sk_protocol-for-child-sockets.patch
+net-openvswitch-fix-ttl-decrement-action-netlink-message-format.patch
+net-tls-missing-received-data-after-fast-remote-close.patch
+net-tls-protect-from-calling-tls_dev_del-for-tls-rx-twice.patch
+rose-fix-null-pointer-dereference-in-rose_send_frame.patch
+sock-set-sk_err-to-ee_errno-on-dequeue-from-errq.patch
+tcp-set-inet_ecn_xmit-configuration-in-tcp_reinit_congestion_control.patch
+tun-honor-iocb_nowait-flag.patch
+usbnet-ipheth-fix-connectivity-with-ios-14.patch
+vsock-virtio-discard-packets-only-when-socket-is-really-closed.patch
+mptcp-fix-null-ptr-dereference-on-bad-mpj.patch
+net-packet-fix-packet-receive-on-l3-devices-without-visible-hard-header.patch
diff --git a/queue-5.9/sock-set-sk_err-to-ee_errno-on-dequeue-from-errq.patch b/queue-5.9/sock-set-sk_err-to-ee_errno-on-dequeue-from-errq.patch
new file mode 100644 (file)
index 0000000..7f17020
--- /dev/null
@@ -0,0 +1,47 @@
+From foo@baz Wed Dec  2 10:40:54 AM CET 2020
+From: Willem de Bruijn <willemb@google.com>
+Date: Thu, 26 Nov 2020 10:12:20 -0500
+Subject: sock: set sk_err to ee_errno on dequeue from errq
+
+From: Willem de Bruijn <willemb@google.com>
+
+[ Upstream commit 985f7337421a811cb354ca93882f943c8335a6f5 ]
+
+When setting sk_err, set it to ee_errno, not ee_origin.
+
+Commit f5f99309fa74 ("sock: do not set sk_err in
+sock_dequeue_err_skb") disabled updating sk_err on errq dequeue,
+which is correct for most error types (origins):
+
+  -       sk->sk_err = err;
+
+Commit 38b257938ac6 ("sock: reset sk_err when the error queue is
+empty") reenabled the behavior for IMCP origins, which do require it:
+
+  +       if (icmp_next)
+  +               sk->sk_err = SKB_EXT_ERR(skb_next)->ee.ee_origin;
+
+But read from ee_errno.
+
+Fixes: 38b257938ac6 ("sock: reset sk_err when the error queue is empty")
+Reported-by: Ayush Ranjan <ayushranjan@google.com>
+Signed-off-by: Willem de Bruijn <willemb@google.com>
+Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
+Link: https://lore.kernel.org/r/20201126151220.2819322-1-willemdebruijn.kernel@gmail.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/skbuff.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/core/skbuff.c
++++ b/net/core/skbuff.c
+@@ -4555,7 +4555,7 @@ struct sk_buff *sock_dequeue_err_skb(str
+       if (skb && (skb_next = skb_peek(q))) {
+               icmp_next = is_icmp_err_skb(skb_next);
+               if (icmp_next)
+-                      sk->sk_err = SKB_EXT_ERR(skb_next)->ee.ee_origin;
++                      sk->sk_err = SKB_EXT_ERR(skb_next)->ee.ee_errno;
+       }
+       spin_unlock_irqrestore(&q->lock, flags);
diff --git a/queue-5.9/tcp-set-inet_ecn_xmit-configuration-in-tcp_reinit_congestion_control.patch b/queue-5.9/tcp-set-inet_ecn_xmit-configuration-in-tcp_reinit_congestion_control.patch
new file mode 100644 (file)
index 0000000..948a333
--- /dev/null
@@ -0,0 +1,49 @@
+From foo@baz Wed Dec  2 10:40:54 AM CET 2020
+From: Alexander Duyck <alexanderduyck@fb.com>
+Date: Thu, 19 Nov 2020 13:23:58 -0800
+Subject: tcp: Set INET_ECN_xmit configuration in tcp_reinit_congestion_control
+
+From: Alexander Duyck <alexanderduyck@fb.com>
+
+[ Upstream commit 55472017a4219ca965a957584affdb17549ae4a4 ]
+
+When setting congestion control via a BPF program it is seen that the
+SYN/ACK for packets within a given flow will not include the ECT0 flag. A
+bit of simple printk debugging shows that when this is configured without
+BPF we will see the value INET_ECN_xmit value initialized in
+tcp_assign_congestion_control however when we configure this via BPF the
+socket is in the closed state and as such it isn't configured, and I do not
+see it being initialized when we transition the socket into the listen
+state. The result of this is that the ECT0 bit is configured based on
+whatever the default state is for the socket.
+
+Any easy way to reproduce this is to monitor the following with tcpdump:
+tools/testing/selftests/bpf/test_progs -t bpf_tcp_ca
+
+Without this patch the SYN/ACK will follow whatever the default is. If dctcp
+all SYN/ACK packets will have the ECT0 bit set, and if it is not then ECT0
+will be cleared on all SYN/ACK packets. With this patch applied the SYN/ACK
+bit matches the value seen on the other packets in the given stream.
+
+Fixes: 91b5b21c7c16 ("bpf: Add support for changing congestion control")
+Signed-off-by: Alexander Duyck <alexanderduyck@fb.com>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_cong.c |    5 +++++
+ 1 file changed, 5 insertions(+)
+
+--- a/net/ipv4/tcp_cong.c
++++ b/net/ipv4/tcp_cong.c
+@@ -197,6 +197,11 @@ static void tcp_reinit_congestion_contro
+       icsk->icsk_ca_setsockopt = 1;
+       memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv));
++      if (ca->flags & TCP_CONG_NEEDS_ECN)
++              INET_ECN_xmit(sk);
++      else
++              INET_ECN_dontxmit(sk);
++
+       if (!((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)))
+               tcp_init_congestion_control(sk);
+ }
diff --git a/queue-5.9/tun-honor-iocb_nowait-flag.patch b/queue-5.9/tun-honor-iocb_nowait-flag.patch
new file mode 100644 (file)
index 0000000..0554741
--- /dev/null
@@ -0,0 +1,59 @@
+From foo@baz Wed Dec  2 10:40:54 AM CET 2020
+From: Jens Axboe <axboe@kernel.dk>
+Date: Fri, 20 Nov 2020 07:59:54 -0700
+Subject: tun: honor IOCB_NOWAIT flag
+
+From: Jens Axboe <axboe@kernel.dk>
+
+[ Upstream commit 5aac0390a63b8718237a61dd0d24a29201d1c94a ]
+
+tun only checks the file O_NONBLOCK flag, but it should also be checking
+the iocb IOCB_NOWAIT flag. Any fops using ->read/write_iter() should check
+both, otherwise it breaks users that correctly expect O_NONBLOCK semantics
+if IOCB_NOWAIT is set.
+
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Link: https://lore.kernel.org/r/e9451860-96cc-c7c7-47b8-fe42cadd5f4c@kernel.dk
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/tun.c |   14 +++++++++++---
+ 1 file changed, 11 insertions(+), 3 deletions(-)
+
+--- a/drivers/net/tun.c
++++ b/drivers/net/tun.c
+@@ -1979,12 +1979,15 @@ static ssize_t tun_chr_write_iter(struct
+       struct tun_file *tfile = file->private_data;
+       struct tun_struct *tun = tun_get(tfile);
+       ssize_t result;
++      int noblock = 0;
+       if (!tun)
+               return -EBADFD;
+-      result = tun_get_user(tun, tfile, NULL, from,
+-                            file->f_flags & O_NONBLOCK, false);
++      if ((file->f_flags & O_NONBLOCK) || (iocb->ki_flags & IOCB_NOWAIT))
++              noblock = 1;
++
++      result = tun_get_user(tun, tfile, NULL, from, noblock, false);
+       tun_put(tun);
+       return result;
+@@ -2203,10 +2206,15 @@ static ssize_t tun_chr_read_iter(struct
+       struct tun_file *tfile = file->private_data;
+       struct tun_struct *tun = tun_get(tfile);
+       ssize_t len = iov_iter_count(to), ret;
++      int noblock = 0;
+       if (!tun)
+               return -EBADFD;
+-      ret = tun_do_read(tun, tfile, to, file->f_flags & O_NONBLOCK, NULL);
++
++      if ((file->f_flags & O_NONBLOCK) || (iocb->ki_flags & IOCB_NOWAIT))
++              noblock = 1;
++
++      ret = tun_do_read(tun, tfile, to, noblock, NULL);
+       ret = min_t(ssize_t, ret, len);
+       if (ret > 0)
+               iocb->ki_pos = ret;
diff --git a/queue-5.9/usbnet-ipheth-fix-connectivity-with-ios-14.patch b/queue-5.9/usbnet-ipheth-fix-connectivity-with-ios-14.patch
new file mode 100644 (file)
index 0000000..68e54bd
--- /dev/null
@@ -0,0 +1,49 @@
+From foo@baz Wed Dec  2 10:40:54 AM CET 2020
+From: Yves-Alexis Perez <corsac@corsac.net>
+Date: Thu, 19 Nov 2020 18:24:39 +0100
+Subject: usbnet: ipheth: fix connectivity with iOS 14
+
+From: Yves-Alexis Perez <corsac@corsac.net>
+
+[ Upstream commit f33d9e2b48a34e1558b67a473a1fc1d6e793f93c ]
+
+Starting with iOS 14 released in September 2020, connectivity using the
+personal hotspot USB tethering function of iOS devices is broken.
+
+Communication between the host and the device (for example ICMP traffic
+or DNS resolution using the DNS service running in the device itself)
+works fine, but communication to endpoints further away doesn't work.
+
+Investigation on the matter shows that no UDP and ICMP traffic from the
+tethered host is reaching the Internet at all. For TCP traffic there are
+exchanges between tethered host and server but packets are modified in
+transit leading to impossible communication.
+
+After some trials Matti Vuorela discovered that reducing the URB buffer
+size by two bytes restored the previous behavior. While a better
+solution might exist to fix the issue, since the protocol is not
+publicly documented and considering the small size of the fix, let's do
+that.
+
+Tested-by: Matti Vuorela <matti.vuorela@bitfactor.fi>
+Signed-off-by: Yves-Alexis Perez <corsac@corsac.net>
+Link: https://lore.kernel.org/linux-usb/CAAn0qaXmysJ9vx3ZEMkViv_B19ju-_ExN8Yn_uSefxpjS6g4Lw@mail.gmail.com/
+Link: https://github.com/libimobiledevice/libimobiledevice/issues/1038
+Link: https://lore.kernel.org/r/20201119172439.94988-1-corsac@corsac.net
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/usb/ipheth.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/usb/ipheth.c
++++ b/drivers/net/usb/ipheth.c
+@@ -59,7 +59,7 @@
+ #define IPHETH_USBINTF_SUBCLASS 253
+ #define IPHETH_USBINTF_PROTO    1
+-#define IPHETH_BUF_SIZE         1516
++#define IPHETH_BUF_SIZE         1514
+ #define IPHETH_IP_ALIGN               2       /* padding at front of URB */
+ #define IPHETH_TX_TIMEOUT       (5 * HZ)
diff --git a/queue-5.9/vsock-virtio-discard-packets-only-when-socket-is-really-closed.patch b/queue-5.9/vsock-virtio-discard-packets-only-when-socket-is-really-closed.patch
new file mode 100644 (file)
index 0000000..0094056
--- /dev/null
@@ -0,0 +1,73 @@
+From foo@baz Wed Dec  2 10:40:54 AM CET 2020
+From: Stefano Garzarella <sgarzare@redhat.com>
+Date: Fri, 20 Nov 2020 11:47:36 +0100
+Subject: vsock/virtio: discard packets only when socket is really closed
+
+From: Stefano Garzarella <sgarzare@redhat.com>
+
+[ Upstream commit 3fe356d58efae54dade9ec94ea7c919ed20cf4db ]
+
+Starting from commit 8692cefc433f ("virtio_vsock: Fix race condition
+in virtio_transport_recv_pkt"), we discard packets in
+virtio_transport_recv_pkt() if the socket has been released.
+
+When the socket is connected, we schedule a delayed work to wait the
+RST packet from the other peer, also if SHUTDOWN_MASK is set in
+sk->sk_shutdown.
+This is done to complete the virtio-vsock shutdown algorithm, releasing
+the port assigned to the socket definitively only when the other peer
+has consumed all the packets.
+
+If we discard the RST packet received, the socket will be closed only
+when the VSOCK_CLOSE_TIMEOUT is reached.
+
+Sergio discovered the issue while running ab(1) HTTP benchmark using
+libkrun [1] and observing a latency increase with that commit.
+
+To avoid this issue, we discard packet only if the socket is really
+closed (SOCK_DONE flag is set).
+We also set SOCK_DONE in virtio_transport_release() when we don't need
+to wait any packets from the other peer (we didn't schedule the delayed
+work). In this case we remove the socket from the vsock lists, releasing
+the port assigned.
+
+[1] https://github.com/containers/libkrun
+
+Fixes: 8692cefc433f ("virtio_vsock: Fix race condition in virtio_transport_recv_pkt")
+Cc: justin.he@arm.com
+Reported-by: Sergio Lopez <slp@redhat.com>
+Tested-by: Sergio Lopez <slp@redhat.com>
+Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
+Acked-by: Jia He <justin.he@arm.com>
+Link: https://lore.kernel.org/r/20201120104736.73749-1-sgarzare@redhat.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/vmw_vsock/virtio_transport_common.c |    8 +++++---
+ 1 file changed, 5 insertions(+), 3 deletions(-)
+
+--- a/net/vmw_vsock/virtio_transport_common.c
++++ b/net/vmw_vsock/virtio_transport_common.c
+@@ -841,8 +841,10 @@ void virtio_transport_release(struct vso
+               virtio_transport_free_pkt(pkt);
+       }
+-      if (remove_sock)
++      if (remove_sock) {
++              sock_set_flag(sk, SOCK_DONE);
+               vsock_remove_sock(vsk);
++      }
+ }
+ EXPORT_SYMBOL_GPL(virtio_transport_release);
+@@ -1132,8 +1134,8 @@ void virtio_transport_recv_pkt(struct vi
+       lock_sock(sk);
+-      /* Check if sk has been released before lock_sock */
+-      if (sk->sk_shutdown == SHUTDOWN_MASK) {
++      /* Check if sk has been closed before lock_sock */
++      if (sock_flag(sk, SOCK_DONE)) {
+               (void)virtio_transport_reset_no_sock(t, pkt);
+               release_sock(sk);
+               sock_put(sk);