From f1c732a0e942d2781d1b69c239b36e4fe9881afa Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 1 Feb 2017 09:11:09 +0100 Subject: [PATCH] 4.9-stable patches added patches: af_unix-move-unix_mknod-out-of-bindlock.patch ax25-fix-segfault-after-sock-connection-timeout.patch bridge-netlink-call-br_changelink-during-br_dev_newlink.patch ip6_tunnel-account-for-tunnel-header-in-tunnel-mtu.patch ipv6-addrconf-avoid-addrconf_disable_change-using-rcu-read-side-lock.patch lwtunnel-fix-autoload-of-lwt-modules.patch lwtunnel-fix-oops-on-state-free-after-encap-module-unload.patch mlx4-do-not-call-napi_schedule-without-care.patch mlxsw-pci-fix-eqe-structure-definition.patch mlxsw-spectrum-fix-memory-leak-at-skb-reallocation.patch mlxsw-switchx2-fix-memory-leak-at-skb-reallocation.patch net-dsa-bring-back-device-detaching-in-dsa_slave_suspend.patch net-fix-harmonize_features-vs-netif_f_highdma.patch net-ipv4-fix-table-id-in-getroute-response.patch net-lwtunnel-handle-lwtunnel_fill_encap-failure.patch net-mlx5e-do-not-recycle-pages-from-emergency-reserve.patch net-mpls-fix-multipath-selection-for-lsr-use-case.patch net-phy-bcm63xx-utilize-correct-config_intr-function.patch net-sched-actions-fix-refcnt-when-geting-of-action-after-bind.patch net-specify-the-owning-module-for-lwtunnel-ops.patch net-systemport-decouple-flow-control-from-__bcm_sysport_tx_reclaim.patch netvsc-add-rcu_read-locking-to-netvsc-callback.patch openvswitch-maintain-correct-checksum-state-in-conntrack-actions.patch qmi_wwan-cdc_ether-add-device-id-for-hp-lt2523-novatel-e371-wwan-card.patch r8152-don-t-execute-runtime-suspend-if-the-tx-is-not-empty.patch r8152-fix-the-sw-rx-checksum-is-unavailable.patch ravb-do-not-use-zero-length-alignment-dma-descriptor.patch tcp-fix-tcp_fastopen-unaligned-access-complaints-on-sparc.patch tcp-initialize-max-window-for-a-new-fastopen-socket.patch virtio-don-t-set-virtio_net_hdr_f_data_valid-on-xmit.patch virtio-net-restore-virtio_hdr_f_data_valid-on-receiving.patch vxlan-fix-byte-order-of-vxlan-gpe-port-number.patch --- ...unix-move-unix_mknod-out-of-bindlock.patch | 92 +++++++ ...gfault-after-sock-connection-timeout.patch | 36 +++ ...-br_changelink-during-br_dev_newlink.patch | 72 +++++ ...ount-for-tunnel-header-in-tunnel-mtu.patch | 52 ++++ ...able_change-using-rcu-read-side-lock.patch | 43 +++ ...lwtunnel-fix-autoload-of-lwt-modules.patch | 247 ++++++++++++++++++ ...state-free-after-encap-module-unload.patch | 96 +++++++ ...-not-call-napi_schedule-without-care.patch | 43 +++ ...xsw-pci-fix-eqe-structure-definition.patch | 51 ++++ ...-fix-memory-leak-at-skb-reallocation.patch | 39 +++ ...-fix-memory-leak-at-skb-reallocation.patch | 38 +++ ...evice-detaching-in-dsa_slave_suspend.patch | 35 +++ ...armonize_features-vs-netif_f_highdma.patch | 44 ++++ ...v4-fix-table-id-in-getroute-response.patch | 35 +++ ...l-handle-lwtunnel_fill_encap-failure.patch | 61 +++++ ...recycle-pages-from-emergency-reserve.patch | 42 +++ ...multipath-selection-for-lsr-use-case.patch | 162 ++++++++++++ ...utilize-correct-config_intr-function.patch | 72 +++++ ...cnt-when-geting-of-action-after-bind.patch | 146 +++++++++++ ...y-the-owning-module-for-lwtunnel-ops.patch | 73 ++++++ ...ontrol-from-__bcm_sysport_tx_reclaim.patch | 93 +++++++ ...-rcu_read-locking-to-netvsc-callback.patch | 49 ++++ ...-checksum-state-in-conntrack-actions.patch | 99 +++++++ ...for-hp-lt2523-novatel-e371-wwan-card.patch | 65 +++++ ...ntime-suspend-if-the-tx-is-not-empty.patch | 40 +++ ...ix-the-sw-rx-checksum-is-unavailable.patch | 47 ++++ ...zero-length-alignment-dma-descriptor.patch | 167 ++++++++++++ ...unaligned-access-complaints-on-sparc.patch | 61 +++++ ...max-window-for-a-new-fastopen-socket.patch | 58 ++++ ...-virtio_net_hdr_f_data_valid-on-xmit.patch | 42 +++ ...virtio_hdr_f_data_valid-on-receiving.patch | 94 +++++++ ...-byte-order-of-vxlan-gpe-port-number.patch | 34 +++ 32 files changed, 2328 insertions(+) create mode 100644 queue-4.9/af_unix-move-unix_mknod-out-of-bindlock.patch create mode 100644 queue-4.9/ax25-fix-segfault-after-sock-connection-timeout.patch create mode 100644 queue-4.9/bridge-netlink-call-br_changelink-during-br_dev_newlink.patch create mode 100644 queue-4.9/ip6_tunnel-account-for-tunnel-header-in-tunnel-mtu.patch create mode 100644 queue-4.9/ipv6-addrconf-avoid-addrconf_disable_change-using-rcu-read-side-lock.patch create mode 100644 queue-4.9/lwtunnel-fix-autoload-of-lwt-modules.patch create mode 100644 queue-4.9/lwtunnel-fix-oops-on-state-free-after-encap-module-unload.patch create mode 100644 queue-4.9/mlx4-do-not-call-napi_schedule-without-care.patch create mode 100644 queue-4.9/mlxsw-pci-fix-eqe-structure-definition.patch create mode 100644 queue-4.9/mlxsw-spectrum-fix-memory-leak-at-skb-reallocation.patch create mode 100644 queue-4.9/mlxsw-switchx2-fix-memory-leak-at-skb-reallocation.patch create mode 100644 queue-4.9/net-dsa-bring-back-device-detaching-in-dsa_slave_suspend.patch create mode 100644 queue-4.9/net-fix-harmonize_features-vs-netif_f_highdma.patch create mode 100644 queue-4.9/net-ipv4-fix-table-id-in-getroute-response.patch create mode 100644 queue-4.9/net-lwtunnel-handle-lwtunnel_fill_encap-failure.patch create mode 100644 queue-4.9/net-mlx5e-do-not-recycle-pages-from-emergency-reserve.patch create mode 100644 queue-4.9/net-mpls-fix-multipath-selection-for-lsr-use-case.patch create mode 100644 queue-4.9/net-phy-bcm63xx-utilize-correct-config_intr-function.patch create mode 100644 queue-4.9/net-sched-actions-fix-refcnt-when-geting-of-action-after-bind.patch create mode 100644 queue-4.9/net-specify-the-owning-module-for-lwtunnel-ops.patch create mode 100644 queue-4.9/net-systemport-decouple-flow-control-from-__bcm_sysport_tx_reclaim.patch create mode 100644 queue-4.9/netvsc-add-rcu_read-locking-to-netvsc-callback.patch create mode 100644 queue-4.9/openvswitch-maintain-correct-checksum-state-in-conntrack-actions.patch create mode 100644 queue-4.9/qmi_wwan-cdc_ether-add-device-id-for-hp-lt2523-novatel-e371-wwan-card.patch create mode 100644 queue-4.9/r8152-don-t-execute-runtime-suspend-if-the-tx-is-not-empty.patch create mode 100644 queue-4.9/r8152-fix-the-sw-rx-checksum-is-unavailable.patch create mode 100644 queue-4.9/ravb-do-not-use-zero-length-alignment-dma-descriptor.patch create mode 100644 queue-4.9/tcp-fix-tcp_fastopen-unaligned-access-complaints-on-sparc.patch create mode 100644 queue-4.9/tcp-initialize-max-window-for-a-new-fastopen-socket.patch create mode 100644 queue-4.9/virtio-don-t-set-virtio_net_hdr_f_data_valid-on-xmit.patch create mode 100644 queue-4.9/virtio-net-restore-virtio_hdr_f_data_valid-on-receiving.patch create mode 100644 queue-4.9/vxlan-fix-byte-order-of-vxlan-gpe-port-number.patch diff --git a/queue-4.9/af_unix-move-unix_mknod-out-of-bindlock.patch b/queue-4.9/af_unix-move-unix_mknod-out-of-bindlock.patch new file mode 100644 index 00000000000..d389ea31882 --- /dev/null +++ b/queue-4.9/af_unix-move-unix_mknod-out-of-bindlock.patch @@ -0,0 +1,92 @@ +From foo@baz Wed Feb 1 08:50:24 CET 2017 +From: WANG Cong +Date: Mon, 23 Jan 2017 11:17:35 -0800 +Subject: af_unix: move unix_mknod() out of bindlock + +From: WANG Cong + + +[ Upstream commit 0fb44559ffd67de8517098b81f675fa0210f13f0 ] + +Dmitry reported a deadlock scenario: + +unix_bind() path: +u->bindlock ==> sb_writer + +do_splice() path: +sb_writer ==> pipe->mutex ==> u->bindlock + +In the unix_bind() code path, unix_mknod() does not have to +be done with u->bindlock held, since it is a pure fs operation, +so we can just move unix_mknod() out. + +Reported-by: Dmitry Vyukov +Tested-by: Dmitry Vyukov +Cc: Rainer Weikusat +Cc: Al Viro +Signed-off-by: Cong Wang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/unix/af_unix.c | 27 ++++++++++++++++----------- + 1 file changed, 16 insertions(+), 11 deletions(-) + +--- a/net/unix/af_unix.c ++++ b/net/unix/af_unix.c +@@ -995,6 +995,7 @@ static int unix_bind(struct socket *sock + unsigned int hash; + struct unix_address *addr; + struct hlist_head *list; ++ struct path path = { NULL, NULL }; + + err = -EINVAL; + if (sunaddr->sun_family != AF_UNIX) +@@ -1010,9 +1011,20 @@ static int unix_bind(struct socket *sock + goto out; + addr_len = err; + ++ if (sun_path[0]) { ++ umode_t mode = S_IFSOCK | ++ (SOCK_INODE(sock)->i_mode & ~current_umask()); ++ err = unix_mknod(sun_path, mode, &path); ++ if (err) { ++ if (err == -EEXIST) ++ err = -EADDRINUSE; ++ goto out; ++ } ++ } ++ + err = mutex_lock_interruptible(&u->bindlock); + if (err) +- goto out; ++ goto out_put; + + err = -EINVAL; + if (u->addr) +@@ -1029,16 +1041,6 @@ static int unix_bind(struct socket *sock + atomic_set(&addr->refcnt, 1); + + if (sun_path[0]) { +- struct path path; +- umode_t mode = S_IFSOCK | +- (SOCK_INODE(sock)->i_mode & ~current_umask()); +- err = unix_mknod(sun_path, mode, &path); +- if (err) { +- if (err == -EEXIST) +- err = -EADDRINUSE; +- unix_release_addr(addr); +- goto out_up; +- } + addr->hash = UNIX_HASH_SIZE; + hash = d_real_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE - 1); + spin_lock(&unix_table_lock); +@@ -1065,6 +1067,9 @@ out_unlock: + spin_unlock(&unix_table_lock); + out_up: + mutex_unlock(&u->bindlock); ++out_put: ++ if (err) ++ path_put(&path); + out: + return err; + } diff --git a/queue-4.9/ax25-fix-segfault-after-sock-connection-timeout.patch b/queue-4.9/ax25-fix-segfault-after-sock-connection-timeout.patch new file mode 100644 index 00000000000..6a060d73700 --- /dev/null +++ b/queue-4.9/ax25-fix-segfault-after-sock-connection-timeout.patch @@ -0,0 +1,36 @@ +From foo@baz Wed Feb 1 08:50:24 CET 2017 +From: Basil Gunn +Date: Sat, 14 Jan 2017 12:18:55 -0800 +Subject: ax25: Fix segfault after sock connection timeout + +From: Basil Gunn + + +[ Upstream commit 8a367e74c0120ef68c8c70d5a025648c96626dff ] + +The ax.25 socket connection timed out & the sock struct has been +previously taken down ie. sock struct is now a NULL pointer. Checking +the sock_flag causes the segfault. Check if the socket struct pointer +is NULL before checking sock_flag. This segfault is seen in +timed out netrom connections. + +Please submit to -stable. + +Signed-off-by: Basil Gunn +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ax25/ax25_subr.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/ax25/ax25_subr.c ++++ b/net/ax25/ax25_subr.c +@@ -264,7 +264,7 @@ void ax25_disconnect(ax25_cb *ax25, int + { + ax25_clear_queues(ax25); + +- if (!sock_flag(ax25->sk, SOCK_DESTROY)) ++ if (!ax25->sk || !sock_flag(ax25->sk, SOCK_DESTROY)) + ax25_stop_heartbeat(ax25); + ax25_stop_t1timer(ax25); + ax25_stop_t2timer(ax25); diff --git a/queue-4.9/bridge-netlink-call-br_changelink-during-br_dev_newlink.patch b/queue-4.9/bridge-netlink-call-br_changelink-during-br_dev_newlink.patch new file mode 100644 index 00000000000..d05f59f942a --- /dev/null +++ b/queue-4.9/bridge-netlink-call-br_changelink-during-br_dev_newlink.patch @@ -0,0 +1,72 @@ +From foo@baz Wed Feb 1 08:50:24 CET 2017 +From: Ivan Vecera +Date: Fri, 20 Jan 2017 18:12:17 +0100 +Subject: bridge: netlink: call br_changelink() during br_dev_newlink() + +From: Ivan Vecera + + +[ Upstream commit b6677449dff674cf5b81429b11d5c7f358852ef9 ] + +Any bridge options specified during link creation (e.g. ip link add) +are ignored as br_dev_newlink() does not process them. +Use br_changelink() to do it. + +Fixes: 133235161721 ("bridge: implement rtnl_link_ops->changelink") +Signed-off-by: Ivan Vecera +Reviewed-by: Jiri Pirko +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/bridge/br_netlink.c | 33 +++++++++++++++++++-------------- + 1 file changed, 19 insertions(+), 14 deletions(-) + +--- a/net/bridge/br_netlink.c ++++ b/net/bridge/br_netlink.c +@@ -781,20 +781,6 @@ static int br_validate(struct nlattr *tb + return 0; + } + +-static int br_dev_newlink(struct net *src_net, struct net_device *dev, +- struct nlattr *tb[], struct nlattr *data[]) +-{ +- struct net_bridge *br = netdev_priv(dev); +- +- if (tb[IFLA_ADDRESS]) { +- spin_lock_bh(&br->lock); +- br_stp_change_bridge_id(br, nla_data(tb[IFLA_ADDRESS])); +- spin_unlock_bh(&br->lock); +- } +- +- return register_netdevice(dev); +-} +- + static int br_port_slave_changelink(struct net_device *brdev, + struct net_device *dev, + struct nlattr *tb[], +@@ -1093,6 +1079,25 @@ static int br_changelink(struct net_devi + return 0; + } + ++static int br_dev_newlink(struct net *src_net, struct net_device *dev, ++ struct nlattr *tb[], struct nlattr *data[]) ++{ ++ struct net_bridge *br = netdev_priv(dev); ++ int err; ++ ++ if (tb[IFLA_ADDRESS]) { ++ spin_lock_bh(&br->lock); ++ br_stp_change_bridge_id(br, nla_data(tb[IFLA_ADDRESS])); ++ spin_unlock_bh(&br->lock); ++ } ++ ++ err = br_changelink(dev, tb, data); ++ if (err) ++ return err; ++ ++ return register_netdevice(dev); ++} ++ + static size_t br_get_size(const struct net_device *brdev) + { + return nla_total_size(sizeof(u32)) + /* IFLA_BR_FORWARD_DELAY */ diff --git a/queue-4.9/ip6_tunnel-account-for-tunnel-header-in-tunnel-mtu.patch b/queue-4.9/ip6_tunnel-account-for-tunnel-header-in-tunnel-mtu.patch new file mode 100644 index 00000000000..18ddfb07e69 --- /dev/null +++ b/queue-4.9/ip6_tunnel-account-for-tunnel-header-in-tunnel-mtu.patch @@ -0,0 +1,52 @@ +From foo@baz Wed Feb 1 08:50:24 CET 2017 +From: Jakub Sitnicki +Date: Fri, 13 Jan 2017 10:12:20 +0100 +Subject: ip6_tunnel: Account for tunnel header in tunnel MTU + +From: Jakub Sitnicki + + +[ Upstream commit 02ca0423fd65a0a9c4d70da0dbb8f4b8503f08c7 ] + +With ip6gre we have a tunnel header which also makes the tunnel MTU +smaller. We need to reserve room for it. Previously we were using up +space reserved for the Tunnel Encapsulation Limit option +header (RFC 2473). + +Also, after commit b05229f44228 ("gre6: Cleanup GREv6 transmit path, +call common GRE functions") our contract with the caller has +changed. Now we check if the packet length exceeds the tunnel MTU after +the tunnel header has been pushed, unlike before. + +This is reflected in the check where we look at the packet length minus +the size of the tunnel header, which is already accounted for in tunnel +MTU. + +Fixes: b05229f44228 ("gre6: Cleanup GREv6 transmit path, call common GRE functions") +Signed-off-by: Jakub Sitnicki +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/ip6_tunnel.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/net/ipv6/ip6_tunnel.c ++++ b/net/ipv6/ip6_tunnel.c +@@ -1108,7 +1108,7 @@ route_lookup: + t->parms.name); + goto tx_err_dst_release; + } +- mtu = dst_mtu(dst) - psh_hlen; ++ mtu = dst_mtu(dst) - psh_hlen - t->tun_hlen; + if (encap_limit >= 0) { + max_headroom += 8; + mtu -= 8; +@@ -1117,7 +1117,7 @@ route_lookup: + mtu = IPV6_MIN_MTU; + if (skb_dst(skb) && !t->parms.collect_md) + skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); +- if (skb->len > mtu && !skb_is_gso(skb)) { ++ if (skb->len - t->tun_hlen > mtu && !skb_is_gso(skb)) { + *pmtu = mtu; + err = -EMSGSIZE; + goto tx_err_dst_release; diff --git a/queue-4.9/ipv6-addrconf-avoid-addrconf_disable_change-using-rcu-read-side-lock.patch b/queue-4.9/ipv6-addrconf-avoid-addrconf_disable_change-using-rcu-read-side-lock.patch new file mode 100644 index 00000000000..9f7fbadbc0f --- /dev/null +++ b/queue-4.9/ipv6-addrconf-avoid-addrconf_disable_change-using-rcu-read-side-lock.patch @@ -0,0 +1,43 @@ +From foo@baz Wed Feb 1 08:50:24 CET 2017 +From: Kefeng Wang +Date: Thu, 19 Jan 2017 16:26:21 +0800 +Subject: ipv6: addrconf: Avoid addrconf_disable_change() using RCU read-side lock + +From: Kefeng Wang + + +[ Upstream commit 03e4deff4987f79c34112c5ba4eb195d4f9382b0 ] + +Just like commit 4acd4945cd1e ("ipv6: addrconf: Avoid calling +netdevice notifiers with RCU read-side lock"), it is unnecessary +to make addrconf_disable_change() use RCU iteration over the +netdev list, since it already holds the RTNL lock, or we may meet +Illegal context switch in RCU read-side critical section. + +Signed-off-by: Kefeng Wang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/addrconf.c | 4 +--- + 1 file changed, 1 insertion(+), 3 deletions(-) + +--- a/net/ipv6/addrconf.c ++++ b/net/ipv6/addrconf.c +@@ -5515,8 +5515,7 @@ static void addrconf_disable_change(stru + struct net_device *dev; + struct inet6_dev *idev; + +- rcu_read_lock(); +- for_each_netdev_rcu(net, dev) { ++ for_each_netdev(net, dev) { + idev = __in6_dev_get(dev); + if (idev) { + int changed = (!idev->cnf.disable_ipv6) ^ (!newf); +@@ -5525,7 +5524,6 @@ static void addrconf_disable_change(stru + dev_disable_change(idev); + } + } +- rcu_read_unlock(); + } + + static int addrconf_disable_ipv6(struct ctl_table *table, int *p, int newf) diff --git a/queue-4.9/lwtunnel-fix-autoload-of-lwt-modules.patch b/queue-4.9/lwtunnel-fix-autoload-of-lwt-modules.patch new file mode 100644 index 00000000000..5d5b995dc93 --- /dev/null +++ b/queue-4.9/lwtunnel-fix-autoload-of-lwt-modules.patch @@ -0,0 +1,247 @@ +From foo@baz Wed Feb 1 08:50:24 CET 2017 +From: David Ahern +Date: Tue, 17 Jan 2017 14:57:36 -0800 +Subject: lwtunnel: fix autoload of lwt modules + +From: David Ahern + + +[ Upstream commit 9ed59592e3e379b2e9557dc1d9e9ec8fcbb33f16] + +Trying to add an mpls encap route when the MPLS modules are not loaded +hangs. For example: + + CONFIG_MPLS=y + CONFIG_NET_MPLS_GSO=m + CONFIG_MPLS_ROUTING=m + CONFIG_MPLS_IPTUNNEL=m + + $ ip route add 10.10.10.10/32 encap mpls 100 via inet 10.100.1.2 + +The ip command hangs: +root 880 826 0 21:25 pts/0 00:00:00 ip route add 10.10.10.10/32 encap mpls 100 via inet 10.100.1.2 + + $ cat /proc/880/stack + [] call_usermodehelper_exec+0xd6/0x134 + [] __request_module+0x27b/0x30a + [] lwtunnel_build_state+0xe4/0x178 + [] fib_create_info+0x47f/0xdd4 + [] fib_table_insert+0x90/0x41f + [] inet_rtm_newroute+0x4b/0x52 + ... + +modprobe is trying to load rtnl-lwt-MPLS: + +root 881 5 0 21:25 ? 00:00:00 /sbin/modprobe -q -- rtnl-lwt-MPLS + +and it hangs after loading mpls_router: + + $ cat /proc/881/stack + [] rtnl_lock+0x12/0x14 + [] register_netdevice_notifier+0x16/0x179 + [] mpls_init+0x25/0x1000 [mpls_router] + [] do_one_initcall+0x8e/0x13f + [] do_init_module+0x5a/0x1e5 + [] load_module+0x13bd/0x17d6 + ... + +The problem is that lwtunnel_build_state is called with rtnl lock +held preventing mpls_init from registering. + +Given the potential references held by the time lwtunnel_build_state it +can not drop the rtnl lock to the load module. So, extract the module +loading code from lwtunnel_build_state into a new function to validate +the encap type. The new function is called while converting the user +request into a fib_config which is well before any table, device or +fib entries are examined. + +Fixes: 745041e2aaf1 ("lwtunnel: autoload of lwt modules") +Signed-off-by: David Ahern +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/lwtunnel.h | 11 ++++++++ + net/core/lwtunnel.c | 62 +++++++++++++++++++++++++++++++++++++++++++----- + net/ipv4/fib_frontend.c | 8 ++++++ + net/ipv6/route.c | 12 ++++++++- + 4 files changed, 86 insertions(+), 7 deletions(-) + +--- a/include/net/lwtunnel.h ++++ b/include/net/lwtunnel.h +@@ -106,6 +106,8 @@ int lwtunnel_encap_add_ops(const struct + unsigned int num); + int lwtunnel_encap_del_ops(const struct lwtunnel_encap_ops *op, + unsigned int num); ++int lwtunnel_valid_encap_type(u16 encap_type); ++int lwtunnel_valid_encap_type_attr(struct nlattr *attr, int len); + int lwtunnel_build_state(struct net_device *dev, u16 encap_type, + struct nlattr *encap, + unsigned int family, const void *cfg, +@@ -168,6 +170,15 @@ static inline int lwtunnel_encap_del_ops + { + return -EOPNOTSUPP; + } ++ ++static inline int lwtunnel_valid_encap_type(u16 encap_type) ++{ ++ return -EOPNOTSUPP; ++} ++static inline int lwtunnel_valid_encap_type_attr(struct nlattr *attr, int len) ++{ ++ return -EOPNOTSUPP; ++} + + static inline int lwtunnel_build_state(struct net_device *dev, u16 encap_type, + struct nlattr *encap, +--- a/net/core/lwtunnel.c ++++ b/net/core/lwtunnel.c +@@ -26,6 +26,7 @@ + #include + #include + #include ++#include + + #ifdef CONFIG_MODULES + +@@ -110,25 +111,74 @@ int lwtunnel_build_state(struct net_devi + ret = -EOPNOTSUPP; + rcu_read_lock(); + ops = rcu_dereference(lwtun_encaps[encap_type]); ++ if (likely(ops && ops->build_state)) ++ ret = ops->build_state(dev, encap, family, cfg, lws); ++ rcu_read_unlock(); ++ ++ return ret; ++} ++EXPORT_SYMBOL(lwtunnel_build_state); ++ ++int lwtunnel_valid_encap_type(u16 encap_type) ++{ ++ const struct lwtunnel_encap_ops *ops; ++ int ret = -EINVAL; ++ ++ if (encap_type == LWTUNNEL_ENCAP_NONE || ++ encap_type > LWTUNNEL_ENCAP_MAX) ++ return ret; ++ ++ rcu_read_lock(); ++ ops = rcu_dereference(lwtun_encaps[encap_type]); ++ rcu_read_unlock(); + #ifdef CONFIG_MODULES + if (!ops) { + const char *encap_type_str = lwtunnel_encap_str(encap_type); + + if (encap_type_str) { +- rcu_read_unlock(); ++ __rtnl_unlock(); + request_module("rtnl-lwt-%s", encap_type_str); ++ rtnl_lock(); ++ + rcu_read_lock(); + ops = rcu_dereference(lwtun_encaps[encap_type]); ++ rcu_read_unlock(); + } + } + #endif +- if (likely(ops && ops->build_state)) +- ret = ops->build_state(dev, encap, family, cfg, lws); +- rcu_read_unlock(); ++ return ops ? 0 : -EOPNOTSUPP; ++} ++EXPORT_SYMBOL(lwtunnel_valid_encap_type); + +- return ret; ++int lwtunnel_valid_encap_type_attr(struct nlattr *attr, int remaining) ++{ ++ struct rtnexthop *rtnh = (struct rtnexthop *)attr; ++ struct nlattr *nla_entype; ++ struct nlattr *attrs; ++ struct nlattr *nla; ++ u16 encap_type; ++ int attrlen; ++ ++ while (rtnh_ok(rtnh, remaining)) { ++ attrlen = rtnh_attrlen(rtnh); ++ if (attrlen > 0) { ++ attrs = rtnh_attrs(rtnh); ++ nla = nla_find(attrs, attrlen, RTA_ENCAP); ++ nla_entype = nla_find(attrs, attrlen, RTA_ENCAP_TYPE); ++ ++ if (nla_entype) { ++ encap_type = nla_get_u16(nla_entype); ++ ++ if (lwtunnel_valid_encap_type(encap_type) != 0) ++ return -EOPNOTSUPP; ++ } ++ } ++ rtnh = rtnh_next(rtnh, &remaining); ++ } ++ ++ return 0; + } +-EXPORT_SYMBOL(lwtunnel_build_state); ++EXPORT_SYMBOL(lwtunnel_valid_encap_type_attr); + + int lwtunnel_fill_encap(struct sk_buff *skb, struct lwtunnel_state *lwtstate) + { +--- a/net/ipv4/fib_frontend.c ++++ b/net/ipv4/fib_frontend.c +@@ -46,6 +46,7 @@ + #include + #include + #include ++#include + #include + + #ifndef CONFIG_IP_MULTIPLE_TABLES +@@ -676,6 +677,10 @@ static int rtm_to_fib_config(struct net + cfg->fc_mx_len = nla_len(attr); + break; + case RTA_MULTIPATH: ++ err = lwtunnel_valid_encap_type_attr(nla_data(attr), ++ nla_len(attr)); ++ if (err < 0) ++ goto errout; + cfg->fc_mp = nla_data(attr); + cfg->fc_mp_len = nla_len(attr); + break; +@@ -690,6 +695,9 @@ static int rtm_to_fib_config(struct net + break; + case RTA_ENCAP_TYPE: + cfg->fc_encap_type = nla_get_u16(attr); ++ err = lwtunnel_valid_encap_type(cfg->fc_encap_type); ++ if (err < 0) ++ goto errout; + break; + } + } +--- a/net/ipv6/route.c ++++ b/net/ipv6/route.c +@@ -2885,6 +2885,11 @@ static int rtm_to_fib6_config(struct sk_ + if (tb[RTA_MULTIPATH]) { + cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]); + cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]); ++ ++ err = lwtunnel_valid_encap_type_attr(cfg->fc_mp, ++ cfg->fc_mp_len); ++ if (err < 0) ++ goto errout; + } + + if (tb[RTA_PREF]) { +@@ -2898,9 +2903,14 @@ static int rtm_to_fib6_config(struct sk_ + if (tb[RTA_ENCAP]) + cfg->fc_encap = tb[RTA_ENCAP]; + +- if (tb[RTA_ENCAP_TYPE]) ++ if (tb[RTA_ENCAP_TYPE]) { + cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]); + ++ err = lwtunnel_valid_encap_type(cfg->fc_encap_type); ++ if (err < 0) ++ goto errout; ++ } ++ + if (tb[RTA_EXPIRES]) { + unsigned long timeout = addrconf_timeout_fixup(nla_get_u32(tb[RTA_EXPIRES]), HZ); + diff --git a/queue-4.9/lwtunnel-fix-oops-on-state-free-after-encap-module-unload.patch b/queue-4.9/lwtunnel-fix-oops-on-state-free-after-encap-module-unload.patch new file mode 100644 index 00000000000..9ad6d943cd5 --- /dev/null +++ b/queue-4.9/lwtunnel-fix-oops-on-state-free-after-encap-module-unload.patch @@ -0,0 +1,96 @@ +From foo@baz Wed Feb 1 08:50:24 CET 2017 +From: Robert Shearman +Date: Tue, 24 Jan 2017 16:26:48 +0000 +Subject: lwtunnel: Fix oops on state free after encap module unload + +From: Robert Shearman + + +[ Upstream commit 85c814016ce3b371016c2c054a905fa2492f5a65 ] + +When attempting to free lwtunnel state after the module for the encap +has been unloaded an oops occurs: + +BUG: unable to handle kernel NULL pointer dereference at 0000000000000008 +IP: lwtstate_free+0x18/0x40 +[..] +task: ffff88003e372380 task.stack: ffffc900001fc000 +RIP: 0010:lwtstate_free+0x18/0x40 +RSP: 0018:ffff88003fd83e88 EFLAGS: 00010246 +RAX: 0000000000000000 RBX: ffff88002bbb3380 RCX: ffff88000c91a300 +[..] +Call Trace: + + free_fib_info_rcu+0x195/0x1a0 + ? rt_fibinfo_free+0x50/0x50 + rcu_process_callbacks+0x2d3/0x850 + ? rcu_process_callbacks+0x296/0x850 + __do_softirq+0xe4/0x4cb + irq_exit+0xb0/0xc0 + smp_apic_timer_interrupt+0x3d/0x50 + apic_timer_interrupt+0x93/0xa0 +[..] +Code: e8 6e c6 fc ff 89 d8 5b 5d c3 bb de ff ff ff eb f4 66 90 66 66 66 66 90 55 48 89 e5 53 0f b7 07 48 89 fb 48 8b 04 c5 00 81 d5 81 <48> 8b 40 08 48 85 c0 74 13 ff d0 48 8d 7b 20 be 20 00 00 00 e8 + +The problem is after the module for the encap can be unloaded the +corresponding ops is removed and is thus NULL here. + +Modules implementing lwtunnel ops should not be allowed to unload +while there is state alive using those ops, so grab the module +reference for the ops on creating lwtunnel state and of course release +the reference when freeing the state. + +Fixes: 1104d9ba443a ("lwtunnel: Add destroy state operation") +Signed-off-by: Robert Shearman +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/lwtunnel.h | 5 +---- + net/core/lwtunnel.c | 14 +++++++++++++- + 2 files changed, 14 insertions(+), 5 deletions(-) + +--- a/include/net/lwtunnel.h ++++ b/include/net/lwtunnel.h +@@ -48,10 +48,7 @@ struct lwtunnel_encap_ops { + }; + + #ifdef CONFIG_LWTUNNEL +-static inline void lwtstate_free(struct lwtunnel_state *lws) +-{ +- kfree(lws); +-} ++void lwtstate_free(struct lwtunnel_state *lws); + + static inline struct lwtunnel_state * + lwtstate_get(struct lwtunnel_state *lws) +--- a/net/core/lwtunnel.c ++++ b/net/core/lwtunnel.c +@@ -66,6 +66,15 @@ EXPORT_SYMBOL(lwtunnel_state_alloc); + static const struct lwtunnel_encap_ops __rcu * + lwtun_encaps[LWTUNNEL_ENCAP_MAX + 1] __read_mostly; + ++void lwtstate_free(struct lwtunnel_state *lws) ++{ ++ const struct lwtunnel_encap_ops *ops = lwtun_encaps[lws->type]; ++ ++ kfree(lws); ++ module_put(ops->owner); ++} ++EXPORT_SYMBOL(lwtstate_free); ++ + int lwtunnel_encap_add_ops(const struct lwtunnel_encap_ops *ops, + unsigned int num) + { +@@ -111,8 +120,11 @@ int lwtunnel_build_state(struct net_devi + ret = -EOPNOTSUPP; + rcu_read_lock(); + ops = rcu_dereference(lwtun_encaps[encap_type]); +- if (likely(ops && ops->build_state)) ++ if (likely(ops && ops->build_state && try_module_get(ops->owner))) { + ret = ops->build_state(dev, encap, family, cfg, lws); ++ if (ret) ++ module_put(ops->owner); ++ } + rcu_read_unlock(); + + return ret; diff --git a/queue-4.9/mlx4-do-not-call-napi_schedule-without-care.patch b/queue-4.9/mlx4-do-not-call-napi_schedule-without-care.patch new file mode 100644 index 00000000000..963f0be150c --- /dev/null +++ b/queue-4.9/mlx4-do-not-call-napi_schedule-without-care.patch @@ -0,0 +1,43 @@ +From foo@baz Wed Feb 1 08:50:24 CET 2017 +From: Eric Dumazet +Date: Fri, 13 Jan 2017 08:39:24 -0800 +Subject: mlx4: do not call napi_schedule() without care + +From: Eric Dumazet + + +[ Upstream commit 8cf699ec849f4ca1413cea01289bd7d37dbcc626 ] + +Disable BH around the call to napi_schedule() to avoid following warning + +[ 52.095499] NOHZ: local_softirq_pending 08 +[ 52.421291] NOHZ: local_softirq_pending 08 +[ 52.608313] NOHZ: local_softirq_pending 08 + +Fixes: 8d59de8f7bb3 ("net/mlx4_en: Process all completions in RX rings after port goes up") +Signed-off-by: Eric Dumazet +Cc: Erez Shitrit +Cc: Eugenia Emantayev +Cc: Tariq Toukan +Acked-by: Tariq Toukan +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c ++++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +@@ -1740,8 +1740,11 @@ int mlx4_en_start_port(struct net_device + /* Process all completions if exist to prevent + * the queues freezing if they are full + */ +- for (i = 0; i < priv->rx_ring_num; i++) ++ for (i = 0; i < priv->rx_ring_num; i++) { ++ local_bh_disable(); + napi_schedule(&priv->rx_cq[i]->napi); ++ local_bh_enable(); ++ } + + netif_tx_start_all_queues(dev); + netif_device_attach(dev); diff --git a/queue-4.9/mlxsw-pci-fix-eqe-structure-definition.patch b/queue-4.9/mlxsw-pci-fix-eqe-structure-definition.patch new file mode 100644 index 00000000000..4427524b82b --- /dev/null +++ b/queue-4.9/mlxsw-pci-fix-eqe-structure-definition.patch @@ -0,0 +1,51 @@ +From foo@baz Wed Feb 1 08:50:24 CET 2017 +From: Elad Raz +Date: Thu, 12 Jan 2017 09:10:39 +0100 +Subject: mlxsw: pci: Fix EQE structure definition + +From: Elad Raz + + +[ Upstream commit 28e46a0f2e03ab4ed0e23cace1ea89a68c8c115b ] + +The event_data starts from address 0x00-0x0C and not from 0x08-0x014. This +leads to duplication with other fields in the Event Queue Element such as +sub-type, cqn and owner. + +Fixes: eda6500a987a0 ("mlxsw: Add PCI bus implementation") +Signed-off-by: Elad Raz +Signed-off-by: Jiri Pirko +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlxsw/pci.h | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +--- a/drivers/net/ethernet/mellanox/mlxsw/pci.h ++++ b/drivers/net/ethernet/mellanox/mlxsw/pci.h +@@ -211,21 +211,21 @@ MLXSW_ITEM32(pci, eqe, owner, 0x0C, 0, 1 + /* pci_eqe_cmd_token + * Command completion event - token + */ +-MLXSW_ITEM32(pci, eqe, cmd_token, 0x08, 16, 16); ++MLXSW_ITEM32(pci, eqe, cmd_token, 0x00, 16, 16); + + /* pci_eqe_cmd_status + * Command completion event - status + */ +-MLXSW_ITEM32(pci, eqe, cmd_status, 0x08, 0, 8); ++MLXSW_ITEM32(pci, eqe, cmd_status, 0x00, 0, 8); + + /* pci_eqe_cmd_out_param_h + * Command completion event - output parameter - higher part + */ +-MLXSW_ITEM32(pci, eqe, cmd_out_param_h, 0x0C, 0, 32); ++MLXSW_ITEM32(pci, eqe, cmd_out_param_h, 0x04, 0, 32); + + /* pci_eqe_cmd_out_param_l + * Command completion event - output parameter - lower part + */ +-MLXSW_ITEM32(pci, eqe, cmd_out_param_l, 0x10, 0, 32); ++MLXSW_ITEM32(pci, eqe, cmd_out_param_l, 0x08, 0, 32); + + #endif diff --git a/queue-4.9/mlxsw-spectrum-fix-memory-leak-at-skb-reallocation.patch b/queue-4.9/mlxsw-spectrum-fix-memory-leak-at-skb-reallocation.patch new file mode 100644 index 00000000000..2f45ae9ade5 --- /dev/null +++ b/queue-4.9/mlxsw-spectrum-fix-memory-leak-at-skb-reallocation.patch @@ -0,0 +1,39 @@ +From foo@baz Wed Feb 1 08:50:24 CET 2017 +From: Arkadi Sharshevsky +Date: Thu, 12 Jan 2017 09:10:37 +0100 +Subject: mlxsw: spectrum: Fix memory leak at skb reallocation + +From: Arkadi Sharshevsky + + +[ Upstream commit 36bf38d158d3482119b3e159c0619b3c1539b508 ] + +During transmission the skb is checked for headroom in order to +add vendor specific header. In case the skb needs to be re-allocated, +skb_realloc_headroom() is called to make a private copy of the original, +but doesn't release it. Current code assumes that the original skb is +released during reallocation and only releases it at the error path +which causes a memory leak. + +Fix this by adding the original skb release to the main path. + +Fixes: 56ade8fe3fe1 ("mlxsw: spectrum: Add initial support for Spectrum ASIC") +Signed-off-by: Arkadi Sharshevsky +Reviewed-by: Ido Schimmel +Signed-off-by: Jiri Pirko +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c ++++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +@@ -684,6 +684,7 @@ static netdev_tx_t mlxsw_sp_port_xmit(st + dev_kfree_skb_any(skb_orig); + return NETDEV_TX_OK; + } ++ dev_consume_skb_any(skb_orig); + } + + if (eth_skb_pad(skb)) { diff --git a/queue-4.9/mlxsw-switchx2-fix-memory-leak-at-skb-reallocation.patch b/queue-4.9/mlxsw-switchx2-fix-memory-leak-at-skb-reallocation.patch new file mode 100644 index 00000000000..01da781e112 --- /dev/null +++ b/queue-4.9/mlxsw-switchx2-fix-memory-leak-at-skb-reallocation.patch @@ -0,0 +1,38 @@ +From foo@baz Wed Feb 1 08:50:24 CET 2017 +From: Arkadi Sharshevsky +Date: Thu, 12 Jan 2017 09:10:38 +0100 +Subject: mlxsw: switchx2: Fix memory leak at skb reallocation + +From: Arkadi Sharshevsky + + +[ Upstream commit 400fc0106dd8c27ed84781c929c1a184785b9c79 ] + +During transmission the skb is checked for headroom in order to +add vendor specific header. In case the skb needs to be re-allocated, +skb_realloc_headroom() is called to make a private copy of the original, +but doesn't release it. Current code assumes that the original skb is +released during reallocation and only releases it at the error path +which causes a memory leak. + +Fix this by adding the original skb release to the main path. + +Fixes: d003462a50de ("mlxsw: Simplify mlxsw_sx_port_xmit function") +Signed-off-by: Arkadi Sharshevsky +Signed-off-by: Jiri Pirko +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlxsw/switchx2.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c ++++ b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c +@@ -314,6 +314,7 @@ static netdev_tx_t mlxsw_sx_port_xmit(st + dev_kfree_skb_any(skb_orig); + return NETDEV_TX_OK; + } ++ dev_consume_skb_any(skb_orig); + } + mlxsw_sx_txhdr_construct(skb, &tx_info); + /* TX header is consumed by HW on the way so we shouldn't count its diff --git a/queue-4.9/net-dsa-bring-back-device-detaching-in-dsa_slave_suspend.patch b/queue-4.9/net-dsa-bring-back-device-detaching-in-dsa_slave_suspend.patch new file mode 100644 index 00000000000..750bc944081 --- /dev/null +++ b/queue-4.9/net-dsa-bring-back-device-detaching-in-dsa_slave_suspend.patch @@ -0,0 +1,35 @@ +From foo@baz Wed Feb 1 08:50:24 CET 2017 +From: Florian Fainelli +Date: Wed, 25 Jan 2017 09:10:41 -0800 +Subject: net: dsa: Bring back device detaching in dsa_slave_suspend() + +From: Florian Fainelli + + +[ Upstream commit f154be241d22298d2b63c9b613f619fa1086ea75 ] + +Commit 448b4482c671 ("net: dsa: Add lockdep class to tx queues to avoid +lockdep splat") removed the netif_device_detach() call done in +dsa_slave_suspend() which is necessary, and paired with a corresponding +netif_device_attach(), bring it back. + +Fixes: 448b4482c671 ("net: dsa: Add lockdep class to tx queues to avoid lockdep splat") +Signed-off-by: Florian Fainelli +Reviewed-by: Andrew Lunn +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/dsa/slave.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/net/dsa/slave.c ++++ b/net/dsa/slave.c +@@ -1201,6 +1201,8 @@ int dsa_slave_suspend(struct net_device + { + struct dsa_slave_priv *p = netdev_priv(slave_dev); + ++ netif_device_detach(slave_dev); ++ + if (p->phy) { + phy_stop(p->phy); + p->old_pause = -1; diff --git a/queue-4.9/net-fix-harmonize_features-vs-netif_f_highdma.patch b/queue-4.9/net-fix-harmonize_features-vs-netif_f_highdma.patch new file mode 100644 index 00000000000..d8038e0f178 --- /dev/null +++ b/queue-4.9/net-fix-harmonize_features-vs-netif_f_highdma.patch @@ -0,0 +1,44 @@ +From foo@baz Wed Feb 1 08:50:24 CET 2017 +From: Eric Dumazet +Date: Wed, 18 Jan 2017 12:12:17 -0800 +Subject: net: fix harmonize_features() vs NETIF_F_HIGHDMA + +From: Eric Dumazet + + +[ Upstream commit 7be2c82cfd5d28d7adb66821a992604eb6dd112e ] + +Ashizuka reported a highmem oddity and sent a patch for freescale +fec driver. + +But the problem root cause is that core networking stack +must ensure no skb with highmem fragment is ever sent through +a device that does not assert NETIF_F_HIGHDMA in its features. + +We need to call illegal_highdma() from harmonize_features() +regardless of CSUM checks. + +Fixes: ec5f06156423 ("net: Kill link between CSUM and SG features.") +Signed-off-by: Eric Dumazet +Cc: Pravin Shelar +Reported-by: "Ashizuka, Yuusuke" +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/dev.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/net/core/dev.c ++++ b/net/core/dev.c +@@ -2815,9 +2815,9 @@ static netdev_features_t harmonize_featu + if (skb->ip_summed != CHECKSUM_NONE && + !can_checksum_protocol(features, type)) { + features &= ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK); +- } else if (illegal_highdma(skb->dev, skb)) { +- features &= ~NETIF_F_SG; + } ++ if (illegal_highdma(skb->dev, skb)) ++ features &= ~NETIF_F_SG; + + return features; + } diff --git a/queue-4.9/net-ipv4-fix-table-id-in-getroute-response.patch b/queue-4.9/net-ipv4-fix-table-id-in-getroute-response.patch new file mode 100644 index 00000000000..2284d4732c7 --- /dev/null +++ b/queue-4.9/net-ipv4-fix-table-id-in-getroute-response.patch @@ -0,0 +1,35 @@ +From foo@baz Wed Feb 1 08:50:24 CET 2017 +From: David Ahern +Date: Wed, 11 Jan 2017 15:42:17 -0800 +Subject: net: ipv4: fix table id in getroute response + +From: David Ahern + + +[ Upstream commit 8a430ed50bb1b19ca14a46661f3b1b35f2fb5c39 ] + +rtm_table is an 8-bit field while table ids are allowed up to u32. Commit +709772e6e065 ("net: Fix routing tables with id > 255 for legacy software") +added the preference to set rtm_table in dumps to RT_TABLE_COMPAT if the +table id is > 255. The table id returned on get route requests should do +the same. + +Fixes: c36ba6603a11 ("net: Allow user to get table id from route lookup") +Signed-off-by: David Ahern +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/route.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/ipv4/route.c ++++ b/net/ipv4/route.c +@@ -2440,7 +2440,7 @@ static int rt_fill_info(struct net *net, + r->rtm_dst_len = 32; + r->rtm_src_len = 0; + r->rtm_tos = fl4->flowi4_tos; +- r->rtm_table = table_id; ++ r->rtm_table = table_id < 256 ? table_id : RT_TABLE_COMPAT; + if (nla_put_u32(skb, RTA_TABLE, table_id)) + goto nla_put_failure; + r->rtm_type = rt->rt_type; diff --git a/queue-4.9/net-lwtunnel-handle-lwtunnel_fill_encap-failure.patch b/queue-4.9/net-lwtunnel-handle-lwtunnel_fill_encap-failure.patch new file mode 100644 index 00000000000..11f431808e0 --- /dev/null +++ b/queue-4.9/net-lwtunnel-handle-lwtunnel_fill_encap-failure.patch @@ -0,0 +1,61 @@ +From foo@baz Wed Feb 1 08:50:24 CET 2017 +From: David Ahern +Date: Wed, 11 Jan 2017 14:29:54 -0800 +Subject: net: lwtunnel: Handle lwtunnel_fill_encap failure + +From: David Ahern + + +[ Upstream commit ea7a80858f57d8878b1499ea0f1b8a635cc48de7 ] + +Handle failure in lwtunnel_fill_encap adding attributes to skb. + +Fixes: 571e722676fe ("ipv4: support for fib route lwtunnel encap attributes") +Fixes: 19e42e451506 ("ipv6: support for fib route lwtunnel encap attributes") +Signed-off-by: David Ahern +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/fib_semantics.c | 11 +++++++---- + net/ipv6/route.c | 3 ++- + 2 files changed, 9 insertions(+), 5 deletions(-) + +--- a/net/ipv4/fib_semantics.c ++++ b/net/ipv4/fib_semantics.c +@@ -1278,8 +1278,9 @@ int fib_dump_info(struct sk_buff *skb, u + nla_put_u32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid)) + goto nla_put_failure; + #endif +- if (fi->fib_nh->nh_lwtstate) +- lwtunnel_fill_encap(skb, fi->fib_nh->nh_lwtstate); ++ if (fi->fib_nh->nh_lwtstate && ++ lwtunnel_fill_encap(skb, fi->fib_nh->nh_lwtstate) < 0) ++ goto nla_put_failure; + } + #ifdef CONFIG_IP_ROUTE_MULTIPATH + if (fi->fib_nhs > 1) { +@@ -1315,8 +1316,10 @@ int fib_dump_info(struct sk_buff *skb, u + nla_put_u32(skb, RTA_FLOW, nh->nh_tclassid)) + goto nla_put_failure; + #endif +- if (nh->nh_lwtstate) +- lwtunnel_fill_encap(skb, nh->nh_lwtstate); ++ if (nh->nh_lwtstate && ++ lwtunnel_fill_encap(skb, nh->nh_lwtstate) < 0) ++ goto nla_put_failure; ++ + /* length of rtnetlink header + attributes */ + rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *) rtnh; + } endfor_nexthops(fi); +--- a/net/ipv6/route.c ++++ b/net/ipv6/route.c +@@ -3306,7 +3306,8 @@ static int rt6_fill_node(struct net *net + if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags))) + goto nla_put_failure; + +- lwtunnel_fill_encap(skb, rt->dst.lwtstate); ++ if (lwtunnel_fill_encap(skb, rt->dst.lwtstate) < 0) ++ goto nla_put_failure; + + nlmsg_end(skb, nlh); + return 0; diff --git a/queue-4.9/net-mlx5e-do-not-recycle-pages-from-emergency-reserve.patch b/queue-4.9/net-mlx5e-do-not-recycle-pages-from-emergency-reserve.patch new file mode 100644 index 00000000000..b3d52882840 --- /dev/null +++ b/queue-4.9/net-mlx5e-do-not-recycle-pages-from-emergency-reserve.patch @@ -0,0 +1,42 @@ +From foo@baz Wed Feb 1 08:50:24 CET 2017 +From: Eric Dumazet +Date: Wed, 18 Jan 2017 23:03:08 -0800 +Subject: net/mlx5e: Do not recycle pages from emergency reserve + +From: Eric Dumazet + + +[ Upstream commit e048fc50d7bde23136e098e04a324d7e3404408d ] + +A driver using dev_alloc_page() must not reuse a page allocated from +emergency memory reserve. + +Otherwise all packets using this page will be immediately dropped, +unless for very specific sockets having SOCK_MEMALLOC bit set. + +This issue might be hard to debug, because only a fraction of received +packets would be dropped. + +Fixes: 4415a0319f92 ("net/mlx5e: Implement RX mapped page cache for page recycle") +Signed-off-by: Eric Dumazet +Cc: Tariq Toukan +Cc: Saeed Mahameed +Acked-by: Saeed Mahameed +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +@@ -193,6 +193,9 @@ static inline bool mlx5e_rx_cache_put(st + return false; + } + ++ if (unlikely(page_is_pfmemalloc(dma_info->page))) ++ return false; ++ + cache->page_cache[cache->tail] = *dma_info; + cache->tail = tail_next; + return true; diff --git a/queue-4.9/net-mpls-fix-multipath-selection-for-lsr-use-case.patch b/queue-4.9/net-mpls-fix-multipath-selection-for-lsr-use-case.patch new file mode 100644 index 00000000000..6bdc14b54ff --- /dev/null +++ b/queue-4.9/net-mpls-fix-multipath-selection-for-lsr-use-case.patch @@ -0,0 +1,162 @@ +From foo@baz Wed Feb 1 08:50:24 CET 2017 +From: David Ahern +Date: Fri, 20 Jan 2017 12:58:34 -0800 +Subject: net: mpls: Fix multipath selection for LSR use case + +From: David Ahern + + +[ Upstream commit 9f427a0e474a67b454420c131709600d44850486 ] + +MPLS multipath for LSR is broken -- always selecting the first nexthop +in the one label case. For example: + + $ ip -f mpls ro ls + 100 + nexthop as to 200 via inet 172.16.2.2 dev virt12 + nexthop as to 300 via inet 172.16.3.2 dev virt13 + 101 + nexthop as to 201 via inet6 2000:2::2 dev virt12 + nexthop as to 301 via inet6 2000:3::2 dev virt13 + +In this example incoming packets have a single MPLS labels which means +BOS bit is set. The BOS bit is passed from mpls_forward down to +mpls_multipath_hash which never processes the hash loop because BOS is 1. + +Update mpls_multipath_hash to process the entire label stack. mpls_hdr_len +tracks the total mpls header length on each pass (on pass N mpls_hdr_len +is N * sizeof(mpls_shim_hdr)). When the label is found with the BOS set +it verifies the skb has sufficient header for ipv4 or ipv6, and find the +IPv4 and IPv6 header by using the last mpls_hdr pointer and adding 1 to +advance past it. + +With these changes I have verified the code correctly sees the label, +BOS, IPv4 and IPv6 addresses in the network header and icmp/tcp/udp +traffic for ipv4 and ipv6 are distributed across the nexthops. + +Fixes: 1c78efa8319ca ("mpls: flow-based multipath selection") +Acked-by: Robert Shearman +Signed-off-by: David Ahern +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/mpls/af_mpls.c | 48 +++++++++++++++++++++++++----------------------- + 1 file changed, 25 insertions(+), 23 deletions(-) + +--- a/net/mpls/af_mpls.c ++++ b/net/mpls/af_mpls.c +@@ -98,18 +98,19 @@ bool mpls_pkt_too_big(const struct sk_bu + } + EXPORT_SYMBOL_GPL(mpls_pkt_too_big); + +-static u32 mpls_multipath_hash(struct mpls_route *rt, +- struct sk_buff *skb, bool bos) ++static u32 mpls_multipath_hash(struct mpls_route *rt, struct sk_buff *skb) + { + struct mpls_entry_decoded dec; ++ unsigned int mpls_hdr_len = 0; + struct mpls_shim_hdr *hdr; + bool eli_seen = false; + int label_index; + u32 hash = 0; + +- for (label_index = 0; label_index < MAX_MP_SELECT_LABELS && !bos; ++ for (label_index = 0; label_index < MAX_MP_SELECT_LABELS; + label_index++) { +- if (!pskb_may_pull(skb, sizeof(*hdr) * label_index)) ++ mpls_hdr_len += sizeof(*hdr); ++ if (!pskb_may_pull(skb, mpls_hdr_len)) + break; + + /* Read and decode the current label */ +@@ -134,37 +135,38 @@ static u32 mpls_multipath_hash(struct mp + eli_seen = true; + } + +- bos = dec.bos; +- if (bos && pskb_may_pull(skb, sizeof(*hdr) * label_index + +- sizeof(struct iphdr))) { ++ if (!dec.bos) ++ continue; ++ ++ /* found bottom label; does skb have room for a header? */ ++ if (pskb_may_pull(skb, mpls_hdr_len + sizeof(struct iphdr))) { + const struct iphdr *v4hdr; + +- v4hdr = (const struct iphdr *)(mpls_hdr(skb) + +- label_index); ++ v4hdr = (const struct iphdr *)(hdr + 1); + if (v4hdr->version == 4) { + hash = jhash_3words(ntohl(v4hdr->saddr), + ntohl(v4hdr->daddr), + v4hdr->protocol, hash); + } else if (v4hdr->version == 6 && +- pskb_may_pull(skb, sizeof(*hdr) * label_index + +- sizeof(struct ipv6hdr))) { ++ pskb_may_pull(skb, mpls_hdr_len + ++ sizeof(struct ipv6hdr))) { + const struct ipv6hdr *v6hdr; + +- v6hdr = (const struct ipv6hdr *)(mpls_hdr(skb) + +- label_index); +- ++ v6hdr = (const struct ipv6hdr *)(hdr + 1); + hash = __ipv6_addr_jhash(&v6hdr->saddr, hash); + hash = __ipv6_addr_jhash(&v6hdr->daddr, hash); + hash = jhash_1word(v6hdr->nexthdr, hash); + } + } ++ ++ break; + } + + return hash; + } + + static struct mpls_nh *mpls_select_multipath(struct mpls_route *rt, +- struct sk_buff *skb, bool bos) ++ struct sk_buff *skb) + { + int alive = ACCESS_ONCE(rt->rt_nhn_alive); + u32 hash = 0; +@@ -180,7 +182,7 @@ static struct mpls_nh *mpls_select_multi + if (alive <= 0) + return NULL; + +- hash = mpls_multipath_hash(rt, skb, bos); ++ hash = mpls_multipath_hash(rt, skb); + nh_index = hash % alive; + if (alive == rt->rt_nhn) + goto out; +@@ -278,17 +280,11 @@ static int mpls_forward(struct sk_buff * + hdr = mpls_hdr(skb); + dec = mpls_entry_decode(hdr); + +- /* Pop the label */ +- skb_pull(skb, sizeof(*hdr)); +- skb_reset_network_header(skb); +- +- skb_orphan(skb); +- + rt = mpls_route_input_rcu(net, dec.label); + if (!rt) + goto drop; + +- nh = mpls_select_multipath(rt, skb, dec.bos); ++ nh = mpls_select_multipath(rt, skb); + if (!nh) + goto drop; + +@@ -297,6 +293,12 @@ static int mpls_forward(struct sk_buff * + if (!mpls_output_possible(out_dev)) + goto drop; + ++ /* Pop the label */ ++ skb_pull(skb, sizeof(*hdr)); ++ skb_reset_network_header(skb); ++ ++ skb_orphan(skb); ++ + if (skb_warn_if_lro(skb)) + goto drop; + diff --git a/queue-4.9/net-phy-bcm63xx-utilize-correct-config_intr-function.patch b/queue-4.9/net-phy-bcm63xx-utilize-correct-config_intr-function.patch new file mode 100644 index 00000000000..69490302e50 --- /dev/null +++ b/queue-4.9/net-phy-bcm63xx-utilize-correct-config_intr-function.patch @@ -0,0 +1,72 @@ +From foo@baz Wed Feb 1 08:50:24 CET 2017 +From: Daniel Gonzalez Cabanelas +Date: Tue, 17 Jan 2017 16:26:55 -0800 +Subject: net: phy: bcm63xx: Utilize correct config_intr function + +From: Daniel Gonzalez Cabanelas + + +[ Upstream commit cd33b3e0da43522ff8e8f2b2b71d3d08298512b0 ] + +Commit a1cba5613edf ("net: phy: Add Broadcom phy library for common +interfaces") make the BCM63xx PHY driver utilize bcm_phy_config_intr() +which would appear to do the right thing, except that it does not write +to the MII_BCM63XX_IR register but to MII_BCM54XX_ECR which is +different. + +This would be causing invalid link parameters and events from being +generated by the PHY interrupt. + +Fixes: a1cba5613edf ("net: phy: Add Broadcom phy library for common interfaces") +Signed-off-by: Daniel Gonzalez Cabanelas +Signed-off-by: Florian Fainelli +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/phy/bcm63xx.c | 21 +++++++++++++++++++-- + 1 file changed, 19 insertions(+), 2 deletions(-) + +--- a/drivers/net/phy/bcm63xx.c ++++ b/drivers/net/phy/bcm63xx.c +@@ -21,6 +21,23 @@ MODULE_DESCRIPTION("Broadcom 63xx intern + MODULE_AUTHOR("Maxime Bizon "); + MODULE_LICENSE("GPL"); + ++static int bcm63xx_config_intr(struct phy_device *phydev) ++{ ++ int reg, err; ++ ++ reg = phy_read(phydev, MII_BCM63XX_IR); ++ if (reg < 0) ++ return reg; ++ ++ if (phydev->interrupts == PHY_INTERRUPT_ENABLED) ++ reg &= ~MII_BCM63XX_IR_GMASK; ++ else ++ reg |= MII_BCM63XX_IR_GMASK; ++ ++ err = phy_write(phydev, MII_BCM63XX_IR, reg); ++ return err; ++} ++ + static int bcm63xx_config_init(struct phy_device *phydev) + { + int reg, err; +@@ -55,7 +72,7 @@ static struct phy_driver bcm63xx_driver[ + .config_aneg = genphy_config_aneg, + .read_status = genphy_read_status, + .ack_interrupt = bcm_phy_ack_intr, +- .config_intr = bcm_phy_config_intr, ++ .config_intr = bcm63xx_config_intr, + }, { + /* same phy as above, with just a different OUI */ + .phy_id = 0x002bdc00, +@@ -67,7 +84,7 @@ static struct phy_driver bcm63xx_driver[ + .config_aneg = genphy_config_aneg, + .read_status = genphy_read_status, + .ack_interrupt = bcm_phy_ack_intr, +- .config_intr = bcm_phy_config_intr, ++ .config_intr = bcm63xx_config_intr, + } }; + + module_phy_driver(bcm63xx_driver); diff --git a/queue-4.9/net-sched-actions-fix-refcnt-when-geting-of-action-after-bind.patch b/queue-4.9/net-sched-actions-fix-refcnt-when-geting-of-action-after-bind.patch new file mode 100644 index 00000000000..903353c7092 --- /dev/null +++ b/queue-4.9/net-sched-actions-fix-refcnt-when-geting-of-action-after-bind.patch @@ -0,0 +1,146 @@ +From foo@baz Wed Feb 1 08:50:24 CET 2017 +From: Jamal Hadi Salim +Date: Sun, 15 Jan 2017 10:14:06 -0500 +Subject: net sched actions: fix refcnt when GETing of action after bind + +From: Jamal Hadi Salim + + +[ Upstream commit 0faa9cb5b3836a979864a6357e01d2046884ad52 ] + +Demonstrating the issue: + +.. add a drop action +$sudo $TC actions add action drop index 10 + +.. retrieve it +$ sudo $TC -s actions get action gact index 10 + + action order 1: gact action drop + random type none pass val 0 + index 10 ref 2 bind 0 installed 29 sec used 29 sec + Action statistics: + Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0) + backlog 0b 0p requeues 0 + +... bug 1 above: reference is two. + Reference is actually 1 but we forget to subtract 1. + +... do a GET again and we see the same issue + try a few times and nothing changes +~$ sudo $TC -s actions get action gact index 10 + + action order 1: gact action drop + random type none pass val 0 + index 10 ref 2 bind 0 installed 31 sec used 31 sec + Action statistics: + Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0) + backlog 0b 0p requeues 0 + +... lets try to bind the action to a filter.. +$ sudo $TC qdisc add dev lo ingress +$ sudo $TC filter add dev lo parent ffff: protocol ip prio 1 \ + u32 match ip dst 127.0.0.1/32 flowid 1:1 action gact index 10 + +... and now a few GETs: +$ sudo $TC -s actions get action gact index 10 + + action order 1: gact action drop + random type none pass val 0 + index 10 ref 3 bind 1 installed 204 sec used 204 sec + Action statistics: + Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0) + backlog 0b 0p requeues 0 + +$ sudo $TC -s actions get action gact index 10 + + action order 1: gact action drop + random type none pass val 0 + index 10 ref 4 bind 1 installed 206 sec used 206 sec + Action statistics: + Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0) + backlog 0b 0p requeues 0 + +$ sudo $TC -s actions get action gact index 10 + + action order 1: gact action drop + random type none pass val 0 + index 10 ref 5 bind 1 installed 235 sec used 235 sec + Action statistics: + Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0) + backlog 0b 0p requeues 0 + +.... as can be observed the reference count keeps going up. + +After the fix + +$ sudo $TC actions add action drop index 10 +$ sudo $TC -s actions get action gact index 10 + + action order 1: gact action drop + random type none pass val 0 + index 10 ref 1 bind 0 installed 4 sec used 4 sec + Action statistics: + Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0) + backlog 0b 0p requeues 0 + +$ sudo $TC -s actions get action gact index 10 + + action order 1: gact action drop + random type none pass val 0 + index 10 ref 1 bind 0 installed 6 sec used 6 sec + Action statistics: + Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0) + backlog 0b 0p requeues 0 + +$ sudo $TC qdisc add dev lo ingress +$ sudo $TC filter add dev lo parent ffff: protocol ip prio 1 \ + u32 match ip dst 127.0.0.1/32 flowid 1:1 action gact index 10 + +$ sudo $TC -s actions get action gact index 10 + + action order 1: gact action drop + random type none pass val 0 + index 10 ref 2 bind 1 installed 32 sec used 32 sec + Action statistics: + Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0) + backlog 0b 0p requeues 0 + +$ sudo $TC -s actions get action gact index 10 + + action order 1: gact action drop + random type none pass val 0 + index 10 ref 2 bind 1 installed 33 sec used 33 sec + Action statistics: + Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0) + backlog 0b 0p requeues 0 + +Fixes: aecc5cefc389 ("net sched actions: fix GETing actions") +Signed-off-by: Jamal Hadi Salim +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sched/act_api.c | 5 ++--- + 1 file changed, 2 insertions(+), 3 deletions(-) + +--- a/net/sched/act_api.c ++++ b/net/sched/act_api.c +@@ -903,8 +903,6 @@ tca_action_gd(struct net *net, struct nl + goto err; + } + act->order = i; +- if (event == RTM_GETACTION) +- act->tcfa_refcnt++; + list_add_tail(&act->list, &actions); + } + +@@ -917,7 +915,8 @@ tca_action_gd(struct net *net, struct nl + return ret; + } + err: +- tcf_action_destroy(&actions, 0); ++ if (event != RTM_GETACTION) ++ tcf_action_destroy(&actions, 0); + return ret; + } + diff --git a/queue-4.9/net-specify-the-owning-module-for-lwtunnel-ops.patch b/queue-4.9/net-specify-the-owning-module-for-lwtunnel-ops.patch new file mode 100644 index 00000000000..0142bc4f4b3 --- /dev/null +++ b/queue-4.9/net-specify-the-owning-module-for-lwtunnel-ops.patch @@ -0,0 +1,73 @@ +From foo@baz Wed Feb 1 08:50:24 CET 2017 +From: Robert Shearman +Date: Tue, 24 Jan 2017 16:26:47 +0000 +Subject: net: Specify the owning module for lwtunnel ops + +From: Robert Shearman + + +[ Upstream commit 88ff7334f25909802140e690c0e16433e485b0a0 ] + +Modules implementing lwtunnel ops should not be allowed to unload +while there is state alive using those ops, so specify the owning +module for all lwtunnel ops. + +Signed-off-by: Robert Shearman +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/lwtunnel.h | 2 ++ + net/ipv4/ip_tunnel_core.c | 2 ++ + net/ipv6/ila/ila_lwt.c | 1 + + net/mpls/mpls_iptunnel.c | 1 + + 4 files changed, 6 insertions(+) + +--- a/include/net/lwtunnel.h ++++ b/include/net/lwtunnel.h +@@ -43,6 +43,8 @@ struct lwtunnel_encap_ops { + int (*get_encap_size)(struct lwtunnel_state *lwtstate); + int (*cmp_encap)(struct lwtunnel_state *a, struct lwtunnel_state *b); + int (*xmit)(struct sk_buff *skb); ++ ++ struct module *owner; + }; + + #ifdef CONFIG_LWTUNNEL +--- a/net/ipv4/ip_tunnel_core.c ++++ b/net/ipv4/ip_tunnel_core.c +@@ -313,6 +313,7 @@ static const struct lwtunnel_encap_ops i + .fill_encap = ip_tun_fill_encap_info, + .get_encap_size = ip_tun_encap_nlsize, + .cmp_encap = ip_tun_cmp_encap, ++ .owner = THIS_MODULE, + }; + + static const struct nla_policy ip6_tun_policy[LWTUNNEL_IP6_MAX + 1] = { +@@ -403,6 +404,7 @@ static const struct lwtunnel_encap_ops i + .fill_encap = ip6_tun_fill_encap_info, + .get_encap_size = ip6_tun_encap_nlsize, + .cmp_encap = ip_tun_cmp_encap, ++ .owner = THIS_MODULE, + }; + + void __init ip_tunnel_core_init(void) +--- a/net/ipv6/ila/ila_lwt.c ++++ b/net/ipv6/ila/ila_lwt.c +@@ -164,6 +164,7 @@ static const struct lwtunnel_encap_ops i + .fill_encap = ila_fill_encap_info, + .get_encap_size = ila_encap_nlsize, + .cmp_encap = ila_encap_cmp, ++ .owner = THIS_MODULE, + }; + + int ila_lwt_init(void) +--- a/net/mpls/mpls_iptunnel.c ++++ b/net/mpls/mpls_iptunnel.c +@@ -218,6 +218,7 @@ static const struct lwtunnel_encap_ops m + .fill_encap = mpls_fill_encap_info, + .get_encap_size = mpls_encap_nlsize, + .cmp_encap = mpls_encap_cmp, ++ .owner = THIS_MODULE, + }; + + static int __init mpls_iptunnel_init(void) diff --git a/queue-4.9/net-systemport-decouple-flow-control-from-__bcm_sysport_tx_reclaim.patch b/queue-4.9/net-systemport-decouple-flow-control-from-__bcm_sysport_tx_reclaim.patch new file mode 100644 index 00000000000..220c2fc66b9 --- /dev/null +++ b/queue-4.9/net-systemport-decouple-flow-control-from-__bcm_sysport_tx_reclaim.patch @@ -0,0 +1,93 @@ +From foo@baz Wed Feb 1 08:50:24 CET 2017 +From: Florian Fainelli +Date: Thu, 12 Jan 2017 12:09:09 -0800 +Subject: net: systemport: Decouple flow control from __bcm_sysport_tx_reclaim + +From: Florian Fainelli + + +[ Upstream commit 148d3d021cf9724fcf189ce4e525a094bbf5ce89 ] + +The __bcm_sysport_tx_reclaim() function is used to reclaim transmit +resources in different places within the driver. Most of them should +not affect the state of the transit flow control. + +Introduce bcm_sysport_tx_clean() which cleans the ring, but does not +re-enable flow control towards the networking stack, and make +bcm_sysport_tx_reclaim() do the actual transmit queue flow control. + +Fixes: 80105befdb4b ("net: systemport: add Broadcom SYSTEMPORT Ethernet MAC driver") +Signed-off-by: Florian Fainelli +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/broadcom/bcmsysport.c | 25 ++++++++++++++++++------- + 1 file changed, 18 insertions(+), 7 deletions(-) + +--- a/drivers/net/ethernet/broadcom/bcmsysport.c ++++ b/drivers/net/ethernet/broadcom/bcmsysport.c +@@ -710,11 +710,8 @@ static unsigned int __bcm_sysport_tx_rec + unsigned int c_index, last_c_index, last_tx_cn, num_tx_cbs; + unsigned int pkts_compl = 0, bytes_compl = 0; + struct bcm_sysport_cb *cb; +- struct netdev_queue *txq; + u32 hw_ind; + +- txq = netdev_get_tx_queue(ndev, ring->index); +- + /* Compute how many descriptors have been processed since last call */ + hw_ind = tdma_readl(priv, TDMA_DESC_RING_PROD_CONS_INDEX(ring->index)); + c_index = (hw_ind >> RING_CONS_INDEX_SHIFT) & RING_CONS_INDEX_MASK; +@@ -745,9 +742,6 @@ static unsigned int __bcm_sysport_tx_rec + + ring->c_index = c_index; + +- if (netif_tx_queue_stopped(txq) && pkts_compl) +- netif_tx_wake_queue(txq); +- + netif_dbg(priv, tx_done, ndev, + "ring=%d c_index=%d pkts_compl=%d, bytes_compl=%d\n", + ring->index, ring->c_index, pkts_compl, bytes_compl); +@@ -759,16 +753,33 @@ static unsigned int __bcm_sysport_tx_rec + static unsigned int bcm_sysport_tx_reclaim(struct bcm_sysport_priv *priv, + struct bcm_sysport_tx_ring *ring) + { ++ struct netdev_queue *txq; + unsigned int released; + unsigned long flags; + ++ txq = netdev_get_tx_queue(priv->netdev, ring->index); ++ + spin_lock_irqsave(&ring->lock, flags); + released = __bcm_sysport_tx_reclaim(priv, ring); ++ if (released) ++ netif_tx_wake_queue(txq); ++ + spin_unlock_irqrestore(&ring->lock, flags); + + return released; + } + ++/* Locked version of the per-ring TX reclaim, but does not wake the queue */ ++static void bcm_sysport_tx_clean(struct bcm_sysport_priv *priv, ++ struct bcm_sysport_tx_ring *ring) ++{ ++ unsigned long flags; ++ ++ spin_lock_irqsave(&ring->lock, flags); ++ __bcm_sysport_tx_reclaim(priv, ring); ++ spin_unlock_irqrestore(&ring->lock, flags); ++} ++ + static int bcm_sysport_tx_poll(struct napi_struct *napi, int budget) + { + struct bcm_sysport_tx_ring *ring = +@@ -1253,7 +1264,7 @@ static void bcm_sysport_fini_tx_ring(str + napi_disable(&ring->napi); + netif_napi_del(&ring->napi); + +- bcm_sysport_tx_reclaim(priv, ring); ++ bcm_sysport_tx_clean(priv, ring); + + kfree(ring->cbs); + ring->cbs = NULL; diff --git a/queue-4.9/netvsc-add-rcu_read-locking-to-netvsc-callback.patch b/queue-4.9/netvsc-add-rcu_read-locking-to-netvsc-callback.patch new file mode 100644 index 00000000000..00eca2e6b7b --- /dev/null +++ b/queue-4.9/netvsc-add-rcu_read-locking-to-netvsc-callback.patch @@ -0,0 +1,49 @@ +From foo@baz Wed Feb 1 08:50:24 CET 2017 +From: stephen hemminger +Date: Wed, 11 Jan 2017 09:16:32 -0800 +Subject: netvsc: add rcu_read locking to netvsc callback + +From: stephen hemminger + + +[ Upstream commit 0719e72ccb801829a3d735d187ca8417f0930459 ] + +The receive callback (in tasklet context) is using RCU to get reference +to associated VF network device but this is not safe. RCU read lock +needs to be held. Found by running with full lockdep debugging +enabled. + +Fixes: f207c10d9823 ("hv_netvsc: use RCU to protect vf_netdev") +Signed-off-by: Stephen Hemminger +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/hyperv/netvsc_drv.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/drivers/net/hyperv/netvsc_drv.c ++++ b/drivers/net/hyperv/netvsc_drv.c +@@ -659,6 +659,7 @@ int netvsc_recv_callback(struct hv_devic + * policy filters on the host). Deliver these via the VF + * interface in the guest. + */ ++ rcu_read_lock(); + vf_netdev = rcu_dereference(net_device_ctx->vf_netdev); + if (vf_netdev && (vf_netdev->flags & IFF_UP)) + net = vf_netdev; +@@ -667,6 +668,7 @@ int netvsc_recv_callback(struct hv_devic + skb = netvsc_alloc_recv_skb(net, packet, csum_info, *data, vlan_tci); + if (unlikely(!skb)) { + ++net->stats.rx_dropped; ++ rcu_read_unlock(); + return NVSP_STAT_FAIL; + } + +@@ -696,6 +698,7 @@ int netvsc_recv_callback(struct hv_devic + * TODO - use NAPI? + */ + netif_rx(skb); ++ rcu_read_unlock(); + + return 0; + } diff --git a/queue-4.9/openvswitch-maintain-correct-checksum-state-in-conntrack-actions.patch b/queue-4.9/openvswitch-maintain-correct-checksum-state-in-conntrack-actions.patch new file mode 100644 index 00000000000..559f8e02a0e --- /dev/null +++ b/queue-4.9/openvswitch-maintain-correct-checksum-state-in-conntrack-actions.patch @@ -0,0 +1,99 @@ +From foo@baz Wed Feb 1 08:50:24 CET 2017 +From: Lance Richardson +Date: Thu, 12 Jan 2017 19:33:18 -0500 +Subject: openvswitch: maintain correct checksum state in conntrack actions + +From: Lance Richardson + + +[ Upstream commit 75f01a4c9cc291ff5cb28ca1216adb163b7a20ee ] + +When executing conntrack actions on skbuffs with checksum mode +CHECKSUM_COMPLETE, the checksum must be updated to account for +header pushes and pulls. Otherwise we get "hw csum failure" +logs similar to this (ICMP packet received on geneve tunnel +via ixgbe NIC): + +[ 405.740065] genev_sys_6081: hw csum failure +[ 405.740106] CPU: 3 PID: 0 Comm: swapper/3 Tainted: G I 4.10.0-rc3+ #1 +[ 405.740108] Call Trace: +[ 405.740110] +[ 405.740113] dump_stack+0x63/0x87 +[ 405.740116] netdev_rx_csum_fault+0x3a/0x40 +[ 405.740118] __skb_checksum_complete+0xcf/0xe0 +[ 405.740120] nf_ip_checksum+0xc8/0xf0 +[ 405.740124] icmp_error+0x1de/0x351 [nf_conntrack_ipv4] +[ 405.740132] nf_conntrack_in+0xe1/0x550 [nf_conntrack] +[ 405.740137] ? find_bucket.isra.2+0x62/0x70 [openvswitch] +[ 405.740143] __ovs_ct_lookup+0x95/0x980 [openvswitch] +[ 405.740145] ? netif_rx_internal+0x44/0x110 +[ 405.740149] ovs_ct_execute+0x147/0x4b0 [openvswitch] +[ 405.740153] do_execute_actions+0x22e/0xa70 [openvswitch] +[ 405.740157] ovs_execute_actions+0x40/0x120 [openvswitch] +[ 405.740161] ovs_dp_process_packet+0x84/0x120 [openvswitch] +[ 405.740166] ovs_vport_receive+0x73/0xd0 [openvswitch] +[ 405.740168] ? udp_rcv+0x1a/0x20 +[ 405.740170] ? ip_local_deliver_finish+0x93/0x1e0 +[ 405.740172] ? ip_local_deliver+0x6f/0xe0 +[ 405.740174] ? ip_rcv_finish+0x3a0/0x3a0 +[ 405.740176] ? ip_rcv_finish+0xdb/0x3a0 +[ 405.740177] ? ip_rcv+0x2a7/0x400 +[ 405.740180] ? __netif_receive_skb_core+0x970/0xa00 +[ 405.740185] netdev_frame_hook+0xd3/0x160 [openvswitch] +[ 405.740187] __netif_receive_skb_core+0x1dc/0xa00 +[ 405.740194] ? ixgbe_clean_rx_irq+0x46d/0xa20 [ixgbe] +[ 405.740197] __netif_receive_skb+0x18/0x60 +[ 405.740199] netif_receive_skb_internal+0x40/0xb0 +[ 405.740201] napi_gro_receive+0xcd/0x120 +[ 405.740204] gro_cell_poll+0x57/0x80 [geneve] +[ 405.740206] net_rx_action+0x260/0x3c0 +[ 405.740209] __do_softirq+0xc9/0x28c +[ 405.740211] irq_exit+0xd9/0xf0 +[ 405.740213] do_IRQ+0x51/0xd0 +[ 405.740215] common_interrupt+0x93/0x93 + +Fixes: 7f8a436eaa2c ("openvswitch: Add conntrack action") +Signed-off-by: Lance Richardson +Acked-by: Pravin B Shelar +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/openvswitch/conntrack.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +--- a/net/openvswitch/conntrack.c ++++ b/net/openvswitch/conntrack.c +@@ -514,7 +514,7 @@ static int ovs_ct_nat_execute(struct sk_ + int hooknum, nh_off, err = NF_ACCEPT; + + nh_off = skb_network_offset(skb); +- skb_pull(skb, nh_off); ++ skb_pull_rcsum(skb, nh_off); + + /* See HOOK2MANIP(). */ + if (maniptype == NF_NAT_MANIP_SRC) +@@ -579,6 +579,7 @@ static int ovs_ct_nat_execute(struct sk_ + err = nf_nat_packet(ct, ctinfo, hooknum, skb); + push: + skb_push(skb, nh_off); ++ skb_postpush_rcsum(skb, skb->data, nh_off); + + return err; + } +@@ -890,7 +891,7 @@ int ovs_ct_execute(struct net *net, stru + + /* The conntrack module expects to be working at L3. */ + nh_ofs = skb_network_offset(skb); +- skb_pull(skb, nh_ofs); ++ skb_pull_rcsum(skb, nh_ofs); + + if (key->ip.frag != OVS_FRAG_TYPE_NONE) { + err = handle_fragments(net, key, info->zone.id, skb); +@@ -904,6 +905,7 @@ int ovs_ct_execute(struct net *net, stru + err = ovs_ct_lookup(net, key, info, skb); + + skb_push(skb, nh_ofs); ++ skb_postpush_rcsum(skb, skb->data, nh_ofs); + if (err) + kfree_skb(skb); + return err; diff --git a/queue-4.9/qmi_wwan-cdc_ether-add-device-id-for-hp-lt2523-novatel-e371-wwan-card.patch b/queue-4.9/qmi_wwan-cdc_ether-add-device-id-for-hp-lt2523-novatel-e371-wwan-card.patch new file mode 100644 index 00000000000..c4fe42632f4 --- /dev/null +++ b/queue-4.9/qmi_wwan-cdc_ether-add-device-id-for-hp-lt2523-novatel-e371-wwan-card.patch @@ -0,0 +1,65 @@ +From foo@baz Wed Feb 1 08:50:24 CET 2017 +From: Bjørn Mork +Date: Tue, 24 Jan 2017 10:45:38 +0100 +Subject: qmi_wwan/cdc_ether: add device ID for HP lt2523 (Novatel E371) WWAN card + +From: Bjørn Mork + + +[ Upstream commit 5b9f57516337b523f7466a53939aaaea7b78141b ] + +Another rebranded Novatel E371. qmi_wwan should drive this device, while +cdc_ether should ignore it. Even though the USB descriptors are plain +CDC-ETHER that USB interface is a QMI interface. Ref commit 7fdb7846c9ca +("qmi_wwan/cdc_ether: add device IDs for Dell 5804 (Novatel E371) WWAN +card") + +Cc: Dan Williams +Signed-off-by: Bjørn Mork +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/usb/cdc_ether.c | 8 ++++++++ + drivers/net/usb/qmi_wwan.c | 7 +++++++ + 2 files changed, 15 insertions(+) + +--- a/drivers/net/usb/cdc_ether.c ++++ b/drivers/net/usb/cdc_ether.c +@@ -531,6 +531,7 @@ static const struct driver_info wwan_inf + #define SAMSUNG_VENDOR_ID 0x04e8 + #define LENOVO_VENDOR_ID 0x17ef + #define NVIDIA_VENDOR_ID 0x0955 ++#define HP_VENDOR_ID 0x03f0 + + static const struct usb_device_id products[] = { + /* BLACKLIST !! +@@ -677,6 +678,13 @@ static const struct usb_device_id produc + .driver_info = 0, + }, + ++/* HP lt2523 (Novatel E371) - handled by qmi_wwan */ ++{ ++ USB_DEVICE_AND_INTERFACE_INFO(HP_VENDOR_ID, 0x421d, USB_CLASS_COMM, ++ USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), ++ .driver_info = 0, ++}, ++ + /* AnyDATA ADU960S - handled by qmi_wwan */ + { + USB_DEVICE_AND_INTERFACE_INFO(0x16d5, 0x650a, USB_CLASS_COMM, +--- a/drivers/net/usb/qmi_wwan.c ++++ b/drivers/net/usb/qmi_wwan.c +@@ -654,6 +654,13 @@ static const struct usb_device_id produc + USB_CDC_PROTO_NONE), + .driver_info = (unsigned long)&qmi_wwan_info, + }, ++ { /* HP lt2523 (Novatel E371) */ ++ USB_DEVICE_AND_INTERFACE_INFO(0x03f0, 0x421d, ++ USB_CLASS_COMM, ++ USB_CDC_SUBCLASS_ETHERNET, ++ USB_CDC_PROTO_NONE), ++ .driver_info = (unsigned long)&qmi_wwan_info, ++ }, + { /* HP lt4112 LTE/HSPA+ Gobi 4G Module (Huawei me906e) */ + USB_DEVICE_AND_INTERFACE_INFO(0x03f0, 0x581d, USB_CLASS_VENDOR_SPEC, 1, 7), + .driver_info = (unsigned long)&qmi_wwan_info, diff --git a/queue-4.9/r8152-don-t-execute-runtime-suspend-if-the-tx-is-not-empty.patch b/queue-4.9/r8152-don-t-execute-runtime-suspend-if-the-tx-is-not-empty.patch new file mode 100644 index 00000000000..94475177da2 --- /dev/null +++ b/queue-4.9/r8152-don-t-execute-runtime-suspend-if-the-tx-is-not-empty.patch @@ -0,0 +1,40 @@ +From foo@baz Wed Feb 1 08:50:24 CET 2017 +From: hayeswang +Date: Mon, 23 Jan 2017 14:18:43 +0800 +Subject: r8152: don't execute runtime suspend if the tx is not empty + +From: hayeswang + + +[ Upstream commit 6a0b76c04ec157c88ca943debf78a8ee58469f2d ] + +Runtime suspend shouldn't be executed if the tx queue is not empty, +because the device is not idle. + +Signed-off-by: Hayes Wang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/usb/r8152.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/drivers/net/usb/r8152.c ++++ b/drivers/net/usb/r8152.c +@@ -32,7 +32,7 @@ + #define NETNEXT_VERSION "08" + + /* Information for net */ +-#define NET_VERSION "6" ++#define NET_VERSION "7" + + #define DRIVER_VERSION "v1." NETNEXT_VERSION "." NET_VERSION + #define DRIVER_AUTHOR "Realtek linux nic maintainers " +@@ -3572,6 +3572,8 @@ static bool delay_autosuspend(struct r81 + */ + if (!sw_linking && tp->rtl_ops.in_nway(tp)) + return true; ++ else if (!skb_queue_empty(&tp->tx_queue)) ++ return true; + else + return false; + } diff --git a/queue-4.9/r8152-fix-the-sw-rx-checksum-is-unavailable.patch b/queue-4.9/r8152-fix-the-sw-rx-checksum-is-unavailable.patch new file mode 100644 index 00000000000..6dcfb2cbdf9 --- /dev/null +++ b/queue-4.9/r8152-fix-the-sw-rx-checksum-is-unavailable.patch @@ -0,0 +1,47 @@ +From foo@baz Wed Feb 1 08:50:24 CET 2017 +From: hayeswang +Date: Wed, 11 Jan 2017 16:25:34 +0800 +Subject: r8152: fix the sw rx checksum is unavailable + +From: hayeswang + + +[ Upstream commit 19c0f40d4fca3a47b8f784a627f0467f0138ccc8 ] + +Fix the hw rx checksum is always enabled, and the user couldn't switch +it to sw rx checksum. + +Note that the RTL_VER_01 only support sw rx checksum only. Besides, +the hw rx checksum for RTL_VER_02 is disabled after +commit b9a321b48af4 ("r8152: Fix broken RX checksums."). Re-enable it. + +Signed-off-by: Hayes Wang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/usb/r8152.c | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) + +--- a/drivers/net/usb/r8152.c ++++ b/drivers/net/usb/r8152.c +@@ -1730,7 +1730,7 @@ static u8 r8152_rx_csum(struct r8152 *tp + u8 checksum = CHECKSUM_NONE; + u32 opts2, opts3; + +- if (tp->version == RTL_VER_01 || tp->version == RTL_VER_02) ++ if (!(tp->netdev->features & NETIF_F_RXCSUM)) + goto return_result; + + opts2 = le32_to_cpu(rx_desc->opts2); +@@ -4358,6 +4358,11 @@ static int rtl8152_probe(struct usb_inte + NETIF_F_HIGHDMA | NETIF_F_FRAGLIST | + NETIF_F_IPV6_CSUM | NETIF_F_TSO6; + ++ if (tp->version == RTL_VER_01) { ++ netdev->features &= ~NETIF_F_RXCSUM; ++ netdev->hw_features &= ~NETIF_F_RXCSUM; ++ } ++ + netdev->ethtool_ops = &ops; + netif_set_gso_max_size(netdev, RTL_LIMITED_TSO_SIZE); + diff --git a/queue-4.9/ravb-do-not-use-zero-length-alignment-dma-descriptor.patch b/queue-4.9/ravb-do-not-use-zero-length-alignment-dma-descriptor.patch new file mode 100644 index 00000000000..b0f866d30b8 --- /dev/null +++ b/queue-4.9/ravb-do-not-use-zero-length-alignment-dma-descriptor.patch @@ -0,0 +1,167 @@ +From foo@baz Wed Feb 1 08:50:24 CET 2017 +From: Masaru Nagai +Date: Mon, 16 Jan 2017 11:45:21 +0100 +Subject: ravb: do not use zero-length alignment DMA descriptor + +From: Masaru Nagai + + +[ Upstream commit 8ec3e8a192ba6f13be4522ee81227c792c86fb1a ] + +Due to alignment requirements of the hardware transmissions are split into +two DMA descriptors, a small padding descriptor of 0 - 3 bytes in length +followed by a descriptor for rest of the packet. + +In the case of IP packets the first descriptor will never be zero due to +the way that the stack aligns buffers for IP packets. However, for non-IP +packets it may be zero. + +In that case it has been reported that timeouts occur, presumably because +transmission stops at the first zero-length DMA descriptor and thus the +packet is not transmitted. However, in my environment a BUG is triggered as +follows: + +[ 20.381417] ------------[ cut here ]------------ +[ 20.386054] kernel BUG at lib/swiotlb.c:495! +[ 20.390324] Internal error: Oops - BUG: 0 [#1] PREEMPT SMP +[ 20.395805] Modules linked in: +[ 20.398862] CPU: 0 PID: 2089 Comm: mz Not tainted 4.10.0-rc3-00001-gf13ad2db193f #162 +[ 20.406689] Hardware name: Renesas Salvator-X board based on r8a7796 (DT) +[ 20.413474] task: ffff80063b1f1900 task.stack: ffff80063a71c000 +[ 20.419404] PC is at swiotlb_tbl_map_single+0x178/0x2ec +[ 20.424625] LR is at map_single+0x4c/0x98 +[ 20.428629] pc : [] lr : [] pstate: 800001c5 +[ 20.436019] sp : ffff80063a71f9b0 +[ 20.439327] x29: ffff80063a71f9b0 x28: ffff80063a20d500 +[ 20.444636] x27: ffff000008ed5000 x26: 0000000000000000 +[ 20.449944] x25: 000000067abe2adc x24: 0000000000000000 +[ 20.455252] x23: 0000000000200000 x22: 0000000000000001 +[ 20.460559] x21: 0000000000175ffe x20: ffff80063b2a0010 +[ 20.465866] x19: 0000000000000000 x18: 0000ffffcae6fb20 +[ 20.471173] x17: 0000ffffa09ba018 x16: ffff0000087c8b70 +[ 20.476480] x15: 0000ffffa084f588 x14: 0000ffffa09cfa14 +[ 20.481787] x13: 0000ffffcae87ff0 x12: 000000000063abe2 +[ 20.487098] x11: ffff000008096360 x10: ffff80063abe2adc +[ 20.492407] x9 : 0000000000000000 x8 : 0000000000000000 +[ 20.497718] x7 : 0000000000000000 x6 : ffff000008ed50d0 +[ 20.503028] x5 : 0000000000000000 x4 : 0000000000000001 +[ 20.508338] x3 : 0000000000000000 x2 : 000000067abe2adc +[ 20.513648] x1 : 00000000bafff000 x0 : 0000000000000000 +[ 20.518958] +[ 20.520446] Process mz (pid: 2089, stack limit = 0xffff80063a71c000) +[ 20.526798] Stack: (0xffff80063a71f9b0 to 0xffff80063a720000) +[ 20.532543] f9a0: ffff80063a71fa30 ffff00000839c680 +[ 20.540374] f9c0: ffff80063b2a0010 ffff80063b2a0010 0000000000000001 0000000000000000 +[ 20.548204] f9e0: 000000000000006e ffff80063b23c000 ffff80063b23c000 0000000000000000 +[ 20.556034] fa00: ffff80063b23c000 ffff80063a20d500 000000013b1f1900 0000000000000000 +[ 20.563864] fa20: ffff80063ffd18e0 ffff80063b2a0010 ffff80063a71fa60 ffff00000839cd10 +[ 20.571694] fa40: ffff80063b2a0010 0000000000000000 ffff80063ffd18e0 000000067abe2adc +[ 20.579524] fa60: ffff80063a71fa90 ffff000008096380 ffff80063b2a0010 0000000000000000 +[ 20.587353] fa80: 0000000000000000 0000000000000001 ffff80063a71fac0 ffff00000864f770 +[ 20.595184] faa0: ffff80063b23caf0 0000000000000000 0000000000000000 0000000000000140 +[ 20.603014] fac0: ffff80063a71fb60 ffff0000087e6498 ffff80063a20d500 ffff80063b23c000 +[ 20.610843] fae0: 0000000000000000 ffff000008daeaf0 0000000000000000 ffff000008daeb00 +[ 20.618673] fb00: ffff80063a71fc0c ffff000008da7000 ffff80063b23c090 ffff80063a44f000 +[ 20.626503] fb20: 0000000000000000 ffff000008daeb00 ffff80063a71fc0c ffff000008da7000 +[ 20.634333] fb40: ffff80063b23c090 0000000000000000 ffff800600000037 ffff0000087e63d8 +[ 20.642163] fb60: ffff80063a71fbc0 ffff000008807510 ffff80063a692400 ffff80063a20d500 +[ 20.649993] fb80: ffff80063a44f000 ffff80063b23c000 ffff80063a69249c 0000000000000000 +[ 20.657823] fba0: 0000000000000000 ffff80063a087800 ffff80063b23c000 ffff80063a20d500 +[ 20.665653] fbc0: ffff80063a71fc10 ffff0000087e67dc ffff80063a20d500 ffff80063a692400 +[ 20.673483] fbe0: ffff80063b23c000 0000000000000000 ffff80063a44f000 ffff80063a69249c +[ 20.681312] fc00: ffff80063a5f1a10 000000103a087800 ffff80063a71fc70 ffff0000087e6b24 +[ 20.689142] fc20: ffff80063a5f1a80 ffff80063a71fde8 000000000000000f 00000000000005ea +[ 20.696972] fc40: ffff80063a5f1a10 0000000000000000 000000000000000f ffff00000887fbd0 +[ 20.704802] fc60: fffffff43a5f1a80 0000000000000000 ffff80063a71fc80 ffff000008880240 +[ 20.712632] fc80: ffff80063a71fd90 ffff0000087c7a34 ffff80063afc7180 0000000000000000 +[ 20.720462] fca0: 0000ffffcae6fe18 0000000000000014 0000000060000000 0000000000000015 +[ 20.728292] fcc0: 0000000000000123 00000000000000ce ffff0000088d2000 ffff80063b1f1900 +[ 20.736122] fce0: 0000000000008933 ffff000008e7cb80 ffff80063a71fd80 ffff0000087c50a4 +[ 20.743951] fd00: 0000000000008933 ffff000008e7cb80 ffff000008e7cb80 000000100000000e +[ 20.751781] fd20: ffff80063a71fe4c 0000ffff00000300 0000000000000123 0000000000000000 +[ 20.759611] fd40: 0000000000000000 ffff80063b1f0000 000000000000000e 0000000000000300 +[ 20.767441] fd60: 0000000000000000 0000000000000000 0000000000000000 0000000000000000 +[ 20.775271] fd80: 0000000000000000 0000000000000000 ffff80063a71fda0 ffff0000087c8c20 +[ 20.783100] fda0: 0000000000000000 ffff000008082f30 0000000000000000 0000800637260000 +[ 20.790930] fdc0: ffffffffffffffff 0000ffffa0903078 0000000000000000 000000001ea87232 +[ 20.798760] fde0: 000000000000000f ffff80063a71fe40 ffff800600000014 ffff000000000001 +[ 20.806590] fe00: 0000000000000000 0000000000000000 ffff80063a71fde8 0000000000000000 +[ 20.814420] fe20: 0000000000000000 0000000000000000 0000000000000000 0000000000000001 +[ 20.822249] fe40: 0000000203000011 0000000000000000 0000000000000000 ffff80063a68aa00 +[ 20.830079] fe60: ffff80063a68aa00 0000000000000003 0000000000008933 ffff0000081f1b9c +[ 20.837909] fe80: 0000000000000000 ffff000008082f30 0000000000000000 0000800637260000 +[ 20.845739] fea0: ffffffffffffffff 0000ffffa07ca81c 0000000060000000 0000000000000015 +[ 20.853569] fec0: 0000000000000003 000000001ea87232 000000000000000f 0000000000000000 +[ 20.861399] fee0: 0000ffffcae6fe18 0000000000000014 0000000000000300 0000000000000000 +[ 20.869228] ff00: 00000000000000ce 0000000000000000 00000000ffffffff 0000000000000000 +[ 20.877059] ff20: 0000000000000002 0000ffffcae87ff0 0000ffffa09cfa14 0000ffffa084f588 +[ 20.884888] ff40: 0000000000000000 0000ffffa09ba018 0000ffffcae6fb20 000000001ea87010 +[ 20.892718] ff60: 0000ffffa09b9000 0000ffffcae6fe30 0000ffffcae6fe18 000000000000000f +[ 20.900548] ff80: 0000000000000003 000000001ea87232 0000000000000000 0000000000000000 +[ 20.908378] ffa0: 0000000000000000 0000ffffcae6fdc0 0000ffffa09a7824 0000ffffcae6fdc0 +[ 20.916208] ffc0: 0000ffffa0903078 0000000060000000 0000000000000003 00000000000000ce +[ 20.924038] ffe0: 0000000000000000 0000000000000000 ffffffffffffffff ffffffffffffffff +[ 20.931867] Call trace: +[ 20.934312] Exception stack(0xffff80063a71f7e0 to 0xffff80063a71f910) +[ 20.940750] f7e0: 0000000000000000 0001000000000000 ffff80063a71f9b0 ffff00000839c4c0 +[ 20.948580] f800: ffff80063a71f840 ffff00000888a6e4 ffff80063a24c418 ffff80063a24c448 +[ 20.956410] f820: 0000000000000000 ffff00000811cd54 ffff80063a71f860 ffff80063a24c458 +[ 20.964240] f840: ffff80063a71f870 ffff00000888b258 ffff80063a24c418 0000000000000001 +[ 20.972070] f860: ffff80063a71f910 ffff80063a7b7028 ffff80063a71f890 ffff0000088825e4 +[ 20.979899] f880: 0000000000000000 00000000bafff000 000000067abe2adc 0000000000000000 +[ 20.987729] f8a0: 0000000000000001 0000000000000000 ffff000008ed50d0 0000000000000000 +[ 20.995560] f8c0: 0000000000000000 0000000000000000 ffff80063abe2adc ffff000008096360 +[ 21.003390] f8e0: 000000000063abe2 0000ffffcae87ff0 0000ffffa09cfa14 0000ffffa084f588 +[ 21.011219] f900: ffff0000087c8b70 0000ffffa09ba018 +[ 21.016097] [] swiotlb_tbl_map_single+0x178/0x2ec +[ 21.022362] [] map_single+0x4c/0x98 +[ 21.027411] [] swiotlb_map_page+0xa4/0x138 +[ 21.033072] [] __swiotlb_map_page+0x20/0x7c +[ 21.038821] [] ravb_start_xmit+0x174/0x668 +[ 21.044484] [] dev_hard_start_xmit+0x8c/0x120 +[ 21.050407] [] sch_direct_xmit+0x108/0x1a0 +[ 21.056064] [] __dev_queue_xmit+0x194/0x4cc +[ 21.061807] [] dev_queue_xmit+0x10/0x18 +[ 21.067214] [] packet_sendmsg+0xf40/0x1220 +[ 21.072873] [] sock_sendmsg+0x18/0x2c +[ 21.078097] [] SyS_sendto+0xb0/0xf0 +[ 21.083150] [] el0_svc_naked+0x24/0x28 +[ 21.088462] Code: d34bfef7 2a1803f3 1a9f86d6 35fff878 (d4210000) +[ 21.094611] ---[ end trace 5bc544ad491f3814 ]--- +[ 21.099234] Kernel panic - not syncing: Fatal exception in interrupt +[ 21.105587] Kernel Offset: disabled +[ 21.109073] Memory Limit: none +[ 21.112126] ---[ end Kernel panic - not syncing: Fatal exception in interrupt + +Fixes: 2f45d1902acf ("ravb: minimize TX data copying") +Signed-off-by: Masaru Nagai +Acked-by: Sergei Shtylyov +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/renesas/ravb_main.c | 13 +++++++++++++ + 1 file changed, 13 insertions(+) + +--- a/drivers/net/ethernet/renesas/ravb_main.c ++++ b/drivers/net/ethernet/renesas/ravb_main.c +@@ -1508,6 +1508,19 @@ static netdev_tx_t ravb_start_xmit(struc + buffer = PTR_ALIGN(priv->tx_align[q], DPTR_ALIGN) + + entry / NUM_TX_DESC * DPTR_ALIGN; + len = PTR_ALIGN(skb->data, DPTR_ALIGN) - skb->data; ++ /* Zero length DMA descriptors are problematic as they seem to ++ * terminate DMA transfers. Avoid them by simply using a length of ++ * DPTR_ALIGN (4) when skb data is aligned to DPTR_ALIGN. ++ * ++ * As skb is guaranteed to have at least ETH_ZLEN (60) bytes of ++ * data by the call to skb_put_padto() above this is safe with ++ * respect to both the length of the first DMA descriptor (len) ++ * overflowing the available data and the length of the second DMA ++ * descriptor (skb->len - len) being negative. ++ */ ++ if (len == 0) ++ len = DPTR_ALIGN; ++ + memcpy(buffer, skb->data, len); + dma_addr = dma_map_single(ndev->dev.parent, buffer, len, DMA_TO_DEVICE); + if (dma_mapping_error(ndev->dev.parent, dma_addr)) diff --git a/queue-4.9/tcp-fix-tcp_fastopen-unaligned-access-complaints-on-sparc.patch b/queue-4.9/tcp-fix-tcp_fastopen-unaligned-access-complaints-on-sparc.patch new file mode 100644 index 00000000000..06a601cf676 --- /dev/null +++ b/queue-4.9/tcp-fix-tcp_fastopen-unaligned-access-complaints-on-sparc.patch @@ -0,0 +1,61 @@ +From foo@baz Wed Feb 1 08:50:24 CET 2017 +From: Shannon Nelson +Date: Thu, 12 Jan 2017 14:24:58 -0800 +Subject: tcp: fix tcp_fastopen unaligned access complaints on sparc + +From: Shannon Nelson + + +[ Upstream commit 003c941057eaa868ca6fedd29a274c863167230d ] + +Fix up a data alignment issue on sparc by swapping the order +of the cookie byte array field with the length field in +struct tcp_fastopen_cookie, and making it a proper union +to clean up the typecasting. + +This addresses log complaints like these: + log_unaligned: 113 callbacks suppressed + Kernel unaligned access at TPC[976490] tcp_try_fastopen+0x2d0/0x360 + Kernel unaligned access at TPC[9764ac] tcp_try_fastopen+0x2ec/0x360 + Kernel unaligned access at TPC[9764c8] tcp_try_fastopen+0x308/0x360 + Kernel unaligned access at TPC[9764e4] tcp_try_fastopen+0x324/0x360 + Kernel unaligned access at TPC[976490] tcp_try_fastopen+0x2d0/0x360 + +Cc: Eric Dumazet +Signed-off-by: Shannon Nelson +Acked-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/tcp.h | 7 ++++++- + net/ipv4/tcp_fastopen.c | 2 +- + 2 files changed, 7 insertions(+), 2 deletions(-) + +--- a/include/linux/tcp.h ++++ b/include/linux/tcp.h +@@ -62,8 +62,13 @@ static inline unsigned int tcp_optlen(co + + /* TCP Fast Open Cookie as stored in memory */ + struct tcp_fastopen_cookie { ++ union { ++ u8 val[TCP_FASTOPEN_COOKIE_MAX]; ++#if IS_ENABLED(CONFIG_IPV6) ++ struct in6_addr addr; ++#endif ++ }; + s8 len; +- u8 val[TCP_FASTOPEN_COOKIE_MAX]; + bool exp; /* In RFC6994 experimental option format */ + }; + +--- a/net/ipv4/tcp_fastopen.c ++++ b/net/ipv4/tcp_fastopen.c +@@ -113,7 +113,7 @@ static bool tcp_fastopen_cookie_gen(stru + struct tcp_fastopen_cookie tmp; + + if (__tcp_fastopen_cookie_gen(&ip6h->saddr, &tmp)) { +- struct in6_addr *buf = (struct in6_addr *) tmp.val; ++ struct in6_addr *buf = &tmp.addr; + int i; + + for (i = 0; i < 4; i++) diff --git a/queue-4.9/tcp-initialize-max-window-for-a-new-fastopen-socket.patch b/queue-4.9/tcp-initialize-max-window-for-a-new-fastopen-socket.patch new file mode 100644 index 00000000000..cd0ccc0ff69 --- /dev/null +++ b/queue-4.9/tcp-initialize-max-window-for-a-new-fastopen-socket.patch @@ -0,0 +1,58 @@ +From foo@baz Wed Feb 1 08:50:24 CET 2017 +From: Alexey Kodanev +Date: Thu, 19 Jan 2017 16:36:39 +0300 +Subject: tcp: initialize max window for a new fastopen socket + +From: Alexey Kodanev + + +[ Upstream commit 0dbd7ff3ac5017a46033a9d0a87a8267d69119d9 ] + +Found that if we run LTP netstress test with large MSS (65K), +the first attempt from server to send data comparable to this +MSS on fastopen connection will be delayed by the probe timer. + +Here is an example: + + < S seq 0:0 win 43690 options [mss 65495 wscale 7 tfo cookie] length 32 + > S. seq 0:0 ack 1 win 43690 options [mss 65495 wscale 7] length 0 + < . ack 1 win 342 length 0 + +Inside tcp_sendmsg(), tcp_send_mss() returns max MSS in 'mss_now', +as well as in 'size_goal'. This results the segment not queued for +transmition until all the data copied from user buffer. Then, inside +__tcp_push_pending_frames(), it breaks on send window test and +continues with the check probe timer. + +Fragmentation occurs in tcp_write_wakeup()... + ++0.2 > P. seq 1:43777 ack 1 win 342 length 43776 + < . ack 43777, win 1365 length 0 + > P. seq 43777:65001 ack 1 win 342 options [...] length 21224 + ... + +This also contradicts with the fact that we should bound to the half +of the window if it is large. + +Fix this flaw by correctly initializing max_window. Before that, it +could have large values that affect further calculations of 'size_goal'. + +Fixes: 168a8f58059a ("tcp: TCP Fast Open Server - main code path") +Signed-off-by: Alexey Kodanev +Acked-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp_fastopen.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/net/ipv4/tcp_fastopen.c ++++ b/net/ipv4/tcp_fastopen.c +@@ -205,6 +205,7 @@ static struct sock *tcp_fastopen_create_ + * scaled. So correct it appropriately. + */ + tp->snd_wnd = ntohs(tcp_hdr(skb)->window); ++ tp->max_window = tp->snd_wnd; + + /* Activate the retrans timer so that SYNACK can be retransmitted. + * The request socket is not added to the ehash diff --git a/queue-4.9/virtio-don-t-set-virtio_net_hdr_f_data_valid-on-xmit.patch b/queue-4.9/virtio-don-t-set-virtio_net_hdr_f_data_valid-on-xmit.patch new file mode 100644 index 00000000000..674959c7c1f --- /dev/null +++ b/queue-4.9/virtio-don-t-set-virtio_net_hdr_f_data_valid-on-xmit.patch @@ -0,0 +1,42 @@ +From foo@baz Wed Feb 1 08:50:24 CET 2017 +From: Rolf Neugebauer +Date: Tue, 17 Jan 2017 18:13:51 +0000 +Subject: virtio: don't set VIRTIO_NET_HDR_F_DATA_VALID on xmit + +From: Rolf Neugebauer + + +[ Upstream commit 501db511397fd6efff3aa5b4e8de415b55559550 ] + +This patch part reverts fd2a0437dc33 and e858fae2b0b8 which introduced a +subtle change in how the virtio_net flags are derived from the SKBs +ip_summed field. + +With the above commits, the flags are set to VIRTIO_NET_HDR_F_DATA_VALID +when ip_summed == CHECKSUM_UNNECESSARY, thus treating it differently to +ip_summed == CHECKSUM_NONE, which should be the same. + +Further, the virtio spec 1.0 / CS04 explicitly says that +VIRTIO_NET_HDR_F_DATA_VALID must not be set by the driver. + +Fixes: fd2a0437dc33 ("virtio_net: introduce virtio_net_hdr_{from,to}_skb") +Fixes: e858fae2b0b8 (" virtio_net: use common code for virtio_net_hdr and skb GSO conversion") +Signed-off-by: Rolf Neugebauer +Acked-by: Michael S. Tsirkin +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/virtio_net.h | 2 -- + 1 file changed, 2 deletions(-) + +--- a/include/linux/virtio_net.h ++++ b/include/linux/virtio_net.h +@@ -91,8 +91,6 @@ static inline int virtio_net_hdr_from_sk + skb_checksum_start_offset(skb)); + hdr->csum_offset = __cpu_to_virtio16(little_endian, + skb->csum_offset); +- } else if (skb->ip_summed == CHECKSUM_UNNECESSARY) { +- hdr->flags = VIRTIO_NET_HDR_F_DATA_VALID; + } /* else everything is zero */ + + return 0; diff --git a/queue-4.9/virtio-net-restore-virtio_hdr_f_data_valid-on-receiving.patch b/queue-4.9/virtio-net-restore-virtio_hdr_f_data_valid-on-receiving.patch new file mode 100644 index 00000000000..3d1545df0f7 --- /dev/null +++ b/queue-4.9/virtio-net-restore-virtio_hdr_f_data_valid-on-receiving.patch @@ -0,0 +1,94 @@ +From foo@baz Wed Feb 1 08:50:24 CET 2017 +From: Jason Wang +Date: Fri, 20 Jan 2017 14:32:42 +0800 +Subject: virtio-net: restore VIRTIO_HDR_F_DATA_VALID on receiving + +From: Jason Wang + + +[ Upstream commit 6391a4481ba0796805d6581e42f9f0418c099e34 ] + +Commit 501db511397f ("virtio: don't set VIRTIO_NET_HDR_F_DATA_VALID on +xmit") in fact disables VIRTIO_HDR_F_DATA_VALID on receiving path too, +fixing this by adding a hint (has_data_valid) and set it only on the +receiving path. + +Cc: Rolf Neugebauer +Signed-off-by: Jason Wang +Acked-by: Rolf Neugebauer +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/macvtap.c | 2 +- + drivers/net/tun.c | 2 +- + drivers/net/virtio_net.c | 2 +- + include/linux/virtio_net.h | 6 +++++- + net/packet/af_packet.c | 2 +- + 5 files changed, 9 insertions(+), 5 deletions(-) + +--- a/drivers/net/macvtap.c ++++ b/drivers/net/macvtap.c +@@ -827,7 +827,7 @@ static ssize_t macvtap_put_user(struct m + return -EINVAL; + + ret = virtio_net_hdr_from_skb(skb, &vnet_hdr, +- macvtap_is_little_endian(q)); ++ macvtap_is_little_endian(q), true); + if (ret) + BUG(); + +--- a/drivers/net/tun.c ++++ b/drivers/net/tun.c +@@ -1374,7 +1374,7 @@ static ssize_t tun_put_user(struct tun_s + return -EINVAL; + + ret = virtio_net_hdr_from_skb(skb, &gso, +- tun_is_little_endian(tun)); ++ tun_is_little_endian(tun), true); + if (ret) { + struct skb_shared_info *sinfo = skb_shinfo(skb); + pr_err("unexpected GSO type: " +--- a/drivers/net/virtio_net.c ++++ b/drivers/net/virtio_net.c +@@ -840,7 +840,7 @@ static int xmit_skb(struct send_queue *s + hdr = skb_vnet_hdr(skb); + + if (virtio_net_hdr_from_skb(skb, &hdr->hdr, +- virtio_is_little_endian(vi->vdev))) ++ virtio_is_little_endian(vi->vdev), false)) + BUG(); + + if (vi->mergeable_rx_bufs) +--- a/include/linux/virtio_net.h ++++ b/include/linux/virtio_net.h +@@ -56,7 +56,8 @@ static inline int virtio_net_hdr_to_skb( + + static inline int virtio_net_hdr_from_skb(const struct sk_buff *skb, + struct virtio_net_hdr *hdr, +- bool little_endian) ++ bool little_endian, ++ bool has_data_valid) + { + memset(hdr, 0, sizeof(*hdr)); + +@@ -91,6 +92,9 @@ static inline int virtio_net_hdr_from_sk + skb_checksum_start_offset(skb)); + hdr->csum_offset = __cpu_to_virtio16(little_endian, + skb->csum_offset); ++ } else if (has_data_valid && ++ skb->ip_summed == CHECKSUM_UNNECESSARY) { ++ hdr->flags = VIRTIO_NET_HDR_F_DATA_VALID; + } /* else everything is zero */ + + return 0; +--- a/net/packet/af_packet.c ++++ b/net/packet/af_packet.c +@@ -1972,7 +1972,7 @@ static int __packet_rcv_vnet(const struc + { + *vnet_hdr = (const struct virtio_net_hdr) { 0 }; + +- if (virtio_net_hdr_from_skb(skb, vnet_hdr, vio_le())) ++ if (virtio_net_hdr_from_skb(skb, vnet_hdr, vio_le(), true)) + BUG(); + + return 0; diff --git a/queue-4.9/vxlan-fix-byte-order-of-vxlan-gpe-port-number.patch b/queue-4.9/vxlan-fix-byte-order-of-vxlan-gpe-port-number.patch new file mode 100644 index 00000000000..2cc337ffdd1 --- /dev/null +++ b/queue-4.9/vxlan-fix-byte-order-of-vxlan-gpe-port-number.patch @@ -0,0 +1,34 @@ +From foo@baz Wed Feb 1 08:50:24 CET 2017 +From: Lance Richardson +Date: Mon, 16 Jan 2017 18:37:58 -0500 +Subject: vxlan: fix byte order of vxlan-gpe port number + +From: Lance Richardson + + +[ Upstream commit d5ff72d9af73bc3cbaa3edb541333a851f8c7295 ] + +vxlan->cfg.dst_port is in network byte order, so an htons() +is needed here. Also reduced comment length to stay closer +to 80 column width (still slightly over, however). + +Fixes: e1e5314de08b ("vxlan: implement GPE") +Signed-off-by: Lance Richardson +Acked-by: Jiri Benc +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/vxlan.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/net/vxlan.c ++++ b/drivers/net/vxlan.c +@@ -2887,7 +2887,7 @@ static int vxlan_dev_configure(struct ne + memcpy(&vxlan->cfg, conf, sizeof(*conf)); + if (!vxlan->cfg.dst_port) { + if (conf->flags & VXLAN_F_GPE) +- vxlan->cfg.dst_port = 4790; /* IANA assigned VXLAN-GPE port */ ++ vxlan->cfg.dst_port = htons(4790); /* IANA VXLAN-GPE port */ + else + vxlan->cfg.dst_port = default_port; + } -- 2.47.3