]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
4.9-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 8 Mar 2018 18:19:04 +0000 (10:19 -0800)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 8 Mar 2018 18:19:04 +0000 (10:19 -0800)
added patches:
bridge-check-brport-attr-show-in-brport_show.patch
fib_semantics-don-t-match-route-with-mismatching-tclassid.patch
hdlc_ppp-carrier-detect-ok-don-t-turn-off-negotiation.patch
ipv6-sit-work-around-bogus-gcc-8-wrestrict-warning.patch
mlxsw-spectrum_switchdev-check-success-of-fdb-add-operation.patch
net-fix-race-on-decreasing-number-of-tx-queues.patch
net-ipv4-don-t-allow-setting-net.ipv4.route.min_pmtu-below-68.patch
net-phy-fix-phy_start-to-consider-phy_ignore_interrupt.patch
netlink-ensure-to-loop-over-all-netns-in-genlmsg_multicast_allns.patch
ppp-prevent-unregistered-channels-from-connecting-to-ppp-units.patch
rxrpc-fix-send-in-rxrpc_send_data_packet.patch
s390-qeth-fix-double-free-on-ip-add-remove-race.patch
s390-qeth-fix-ip-address-lookup-for-l3-devices.patch
s390-qeth-fix-ip-removal-on-offline-cards.patch
s390-qeth-fix-ipa-command-submission-race.patch
s390-qeth-fix-overestimated-count-of-buffer-elements.patch
s390-qeth-fix-setip-command-handling.patch
s390-qeth-fix-underestimated-count-of-buffer-elements.patch
sctp-fix-dst-refcnt-leak-in-sctp_v4_get_dst.patch
sctp-fix-dst-refcnt-leak-in-sctp_v6_get_dst.patch
sctp-verify-size-of-a-new-chunk-in-_sctp_make_chunk.patch
tcp-honor-the-eor-bit-in-tcp_mtu_probe.patch
tcp_bbr-better-deal-with-suboptimal-gso.patch
udplite-fix-partial-checksum-initialization.patch

25 files changed:
queue-4.9/bridge-check-brport-attr-show-in-brport_show.patch [new file with mode: 0644]
queue-4.9/fib_semantics-don-t-match-route-with-mismatching-tclassid.patch [new file with mode: 0644]
queue-4.9/hdlc_ppp-carrier-detect-ok-don-t-turn-off-negotiation.patch [new file with mode: 0644]
queue-4.9/ipv6-sit-work-around-bogus-gcc-8-wrestrict-warning.patch [new file with mode: 0644]
queue-4.9/mlxsw-spectrum_switchdev-check-success-of-fdb-add-operation.patch [new file with mode: 0644]
queue-4.9/net-fix-race-on-decreasing-number-of-tx-queues.patch [new file with mode: 0644]
queue-4.9/net-ipv4-don-t-allow-setting-net.ipv4.route.min_pmtu-below-68.patch [new file with mode: 0644]
queue-4.9/net-phy-fix-phy_start-to-consider-phy_ignore_interrupt.patch [new file with mode: 0644]
queue-4.9/netlink-ensure-to-loop-over-all-netns-in-genlmsg_multicast_allns.patch [new file with mode: 0644]
queue-4.9/ppp-prevent-unregistered-channels-from-connecting-to-ppp-units.patch [new file with mode: 0644]
queue-4.9/rxrpc-fix-send-in-rxrpc_send_data_packet.patch [new file with mode: 0644]
queue-4.9/s390-qeth-fix-double-free-on-ip-add-remove-race.patch [new file with mode: 0644]
queue-4.9/s390-qeth-fix-ip-address-lookup-for-l3-devices.patch [new file with mode: 0644]
queue-4.9/s390-qeth-fix-ip-removal-on-offline-cards.patch [new file with mode: 0644]
queue-4.9/s390-qeth-fix-ipa-command-submission-race.patch [new file with mode: 0644]
queue-4.9/s390-qeth-fix-overestimated-count-of-buffer-elements.patch [new file with mode: 0644]
queue-4.9/s390-qeth-fix-setip-command-handling.patch [new file with mode: 0644]
queue-4.9/s390-qeth-fix-underestimated-count-of-buffer-elements.patch [new file with mode: 0644]
queue-4.9/sctp-fix-dst-refcnt-leak-in-sctp_v4_get_dst.patch [new file with mode: 0644]
queue-4.9/sctp-fix-dst-refcnt-leak-in-sctp_v6_get_dst.patch [new file with mode: 0644]
queue-4.9/sctp-verify-size-of-a-new-chunk-in-_sctp_make_chunk.patch [new file with mode: 0644]
queue-4.9/series
queue-4.9/tcp-honor-the-eor-bit-in-tcp_mtu_probe.patch [new file with mode: 0644]
queue-4.9/tcp_bbr-better-deal-with-suboptimal-gso.patch [new file with mode: 0644]
queue-4.9/udplite-fix-partial-checksum-initialization.patch [new file with mode: 0644]

diff --git a/queue-4.9/bridge-check-brport-attr-show-in-brport_show.patch b/queue-4.9/bridge-check-brport-attr-show-in-brport_show.patch
new file mode 100644 (file)
index 0000000..4f1946d
--- /dev/null
@@ -0,0 +1,48 @@
+From foo@baz Thu Mar  8 06:55:02 PST 2018
+From: Xin Long <lucien.xin@gmail.com>
+Date: Mon, 12 Feb 2018 17:15:40 +0800
+Subject: bridge: check brport attr show in brport_show
+
+From: Xin Long <lucien.xin@gmail.com>
+
+
+[ Upstream commit 1b12580af1d0677c3c3a19e35bfe5d59b03f737f ]
+
+Now br_sysfs_if file flush doesn't have attr show. To read it will
+cause kernel panic after users chmod u+r this file.
+
+Xiong found this issue when running the commands:
+
+  ip link add br0 type bridge
+  ip link add type veth
+  ip link set veth0 master br0
+  chmod u+r /sys/devices/virtual/net/veth0/brport/flush
+  timeout 3 cat /sys/devices/virtual/net/veth0/brport/flush
+
+kernel crashed with NULL a pointer dereference call trace.
+
+This patch is to fix it by return -EINVAL when brport_attr->show
+is null, just the same as the check for brport_attr->store in
+brport_store().
+
+Fixes: 9cf637473c85 ("bridge: add sysfs hook to flush forwarding table")
+Reported-by: Xiong Zhou <xzhou@redhat.com>
+Signed-off-by: Xin Long <lucien.xin@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/bridge/br_sysfs_if.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/net/bridge/br_sysfs_if.c
++++ b/net/bridge/br_sysfs_if.c
+@@ -230,6 +230,9 @@ static ssize_t brport_show(struct kobjec
+       struct brport_attribute *brport_attr = to_brport_attr(attr);
+       struct net_bridge_port *p = to_brport(kobj);
++      if (!brport_attr->show)
++              return -EINVAL;
++
+       return brport_attr->show(p, buf);
+ }
diff --git a/queue-4.9/fib_semantics-don-t-match-route-with-mismatching-tclassid.patch b/queue-4.9/fib_semantics-don-t-match-route-with-mismatching-tclassid.patch
new file mode 100644 (file)
index 0000000..a1100c7
--- /dev/null
@@ -0,0 +1,66 @@
+From foo@baz Thu Mar  8 06:55:02 PST 2018
+From: Stefano Brivio <sbrivio@redhat.com>
+Date: Thu, 15 Feb 2018 09:46:03 +0100
+Subject: fib_semantics: Don't match route with mismatching tclassid
+
+From: Stefano Brivio <sbrivio@redhat.com>
+
+
+[ Upstream commit a8c6db1dfd1b1d18359241372bb204054f2c3174 ]
+
+In fib_nh_match(), if output interface or gateway are passed in
+the FIB configuration, we don't have to check next hops of
+multipath routes to conclude whether we have a match or not.
+
+However, we might still have routes with different realms
+matching the same output interface and gateway configuration,
+and this needs to cause the match to fail. Otherwise the first
+route inserted in the FIB will match, regardless of the realms:
+
+ # ip route add 1.1.1.1 dev eth0 table 1234 realms 1/2
+ # ip route append 1.1.1.1 dev eth0 table 1234 realms 3/4
+ # ip route list table 1234
+ 1.1.1.1 dev eth0 scope link realms 1/2
+ 1.1.1.1 dev eth0 scope link realms 3/4
+ # ip route del 1.1.1.1 dev ens3 table 1234 realms 3/4
+ # ip route list table 1234
+ 1.1.1.1 dev ens3 scope link realms 3/4
+
+whereas route with realms 3/4 should have been deleted instead.
+
+Explicitly check for fc_flow passed in the FIB configuration
+(this comes from RTA_FLOW extracted by rtm_to_fib_config()) and
+fail matching if it differs from nh_tclassid.
+
+The handling of RTA_FLOW for multipath routes later in
+fib_nh_match() is still needed, as we can have multiple RTA_FLOW
+attributes that need to be matched against the tclassid of each
+next hop.
+
+v2: Check that fc_flow is set before discarding the match, so
+    that the user can still select the first matching rule by
+    not specifying any realm, as suggested by David Ahern.
+
+Reported-by: Jianlin Shi <jishi@redhat.com>
+Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
+Acked-by: David Ahern <dsahern@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/fib_semantics.c |    5 +++++
+ 1 file changed, 5 insertions(+)
+
+--- a/net/ipv4/fib_semantics.c
++++ b/net/ipv4/fib_semantics.c
+@@ -640,6 +640,11 @@ int fib_nh_match(struct fib_config *cfg,
+                                           fi->fib_nh, cfg))
+                           return 1;
+               }
++#ifdef CONFIG_IP_ROUTE_CLASSID
++              if (cfg->fc_flow &&
++                  cfg->fc_flow != fi->fib_nh->nh_tclassid)
++                      return 1;
++#endif
+               if ((!cfg->fc_oif || cfg->fc_oif == fi->fib_nh->nh_oif) &&
+                   (!cfg->fc_gw  || cfg->fc_gw == fi->fib_nh->nh_gw))
+                       return 0;
diff --git a/queue-4.9/hdlc_ppp-carrier-detect-ok-don-t-turn-off-negotiation.patch b/queue-4.9/hdlc_ppp-carrier-detect-ok-don-t-turn-off-negotiation.patch
new file mode 100644 (file)
index 0000000..9939939
--- /dev/null
@@ -0,0 +1,37 @@
+From foo@baz Thu Mar  8 06:55:02 PST 2018
+From: Denis Du <dudenis2000@yahoo.ca>
+Date: Sat, 24 Feb 2018 16:51:42 -0500
+Subject: hdlc_ppp: carrier detect ok, don't turn off negotiation
+
+From: Denis Du <dudenis2000@yahoo.ca>
+
+
+[ Upstream commit b6c3bad1ba83af1062a7ff6986d9edc4f3d7fc8e ]
+
+Sometimes when physical lines have a just good noise to make the protocol
+handshaking fail, but the carrier detect still good. Then after remove of
+the noise, nobody will trigger this protocol to be start again to cause
+the link to never come back. The fix is when the carrier is still on, not
+terminate the protocol handshaking.
+
+Signed-off-by: Denis Du <dudenis2000@yahoo.ca>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/wan/hdlc_ppp.c |    5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+--- a/drivers/net/wan/hdlc_ppp.c
++++ b/drivers/net/wan/hdlc_ppp.c
+@@ -574,7 +574,10 @@ static void ppp_timer(unsigned long arg)
+                       ppp_cp_event(proto->dev, proto->pid, TO_GOOD, 0, 0,
+                                    0, NULL);
+                       proto->restart_counter--;
+-              } else
++              } else if (netif_carrier_ok(proto->dev))
++                      ppp_cp_event(proto->dev, proto->pid, TO_GOOD, 0, 0,
++                                   0, NULL);
++              else
+                       ppp_cp_event(proto->dev, proto->pid, TO_BAD, 0, 0,
+                                    0, NULL);
+               break;
diff --git a/queue-4.9/ipv6-sit-work-around-bogus-gcc-8-wrestrict-warning.patch b/queue-4.9/ipv6-sit-work-around-bogus-gcc-8-wrestrict-warning.patch
new file mode 100644 (file)
index 0000000..7ad6980
--- /dev/null
@@ -0,0 +1,49 @@
+From foo@baz Thu Mar  8 06:55:02 PST 2018
+From: Arnd Bergmann <arnd@arndb.de>
+Date: Thu, 22 Feb 2018 16:55:34 +0100
+Subject: ipv6 sit: work around bogus gcc-8 -Wrestrict warning
+
+From: Arnd Bergmann <arnd@arndb.de>
+
+
+[ Upstream commit ca79bec237f5809a7c3c59bd41cd0880aa889966 ]
+
+gcc-8 has a new warning that detects overlapping input and output arguments
+in memcpy(). It triggers for sit_init_net() calling ipip6_tunnel_clone_6rd(),
+which is actually correct:
+
+net/ipv6/sit.c: In function 'sit_init_net':
+net/ipv6/sit.c:192:3: error: 'memcpy' source argument is the same as destination [-Werror=restrict]
+
+The problem here is that the logic detecting the memcpy() arguments finds them
+to be the same, but the conditional that tests for the input and output of
+ipip6_tunnel_clone_6rd() to be identical is not a compile-time constant.
+
+We know that netdev_priv(t->dev) is the same as t for a tunnel device,
+and comparing "dev" directly here lets the compiler figure out as well
+that 'dev == sitn->fb_tunnel_dev' when called from sit_init_net(), so
+it no longer warns.
+
+This code is old, so Cc stable to make sure that we don't get the warning
+for older kernels built with new gcc.
+
+Cc: Martin Sebor <msebor@gmail.com>
+Link: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=83456
+Signed-off-by: Arnd Bergmann <arnd@arndb.de>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/sit.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/ipv6/sit.c
++++ b/net/ipv6/sit.c
+@@ -176,7 +176,7 @@ static void ipip6_tunnel_clone_6rd(struc
+ #ifdef CONFIG_IPV6_SIT_6RD
+       struct ip_tunnel *t = netdev_priv(dev);
+-      if (t->dev == sitn->fb_tunnel_dev) {
++      if (dev == sitn->fb_tunnel_dev) {
+               ipv6_addr_set(&t->ip6rd.prefix, htonl(0x20020000), 0, 0, 0);
+               t->ip6rd.relay_prefix = 0;
+               t->ip6rd.prefixlen = 16;
diff --git a/queue-4.9/mlxsw-spectrum_switchdev-check-success-of-fdb-add-operation.patch b/queue-4.9/mlxsw-spectrum_switchdev-check-success-of-fdb-add-operation.patch
new file mode 100644 (file)
index 0000000..4a2b5fe
--- /dev/null
@@ -0,0 +1,104 @@
+From foo@baz Thu Mar  8 06:55:02 PST 2018
+From: Shalom Toledo <shalomt@mellanox.com>
+Date: Thu, 1 Mar 2018 11:37:05 +0100
+Subject: mlxsw: spectrum_switchdev: Check success of FDB add operation
+
+From: Shalom Toledo <shalomt@mellanox.com>
+
+
+[ Upstream commit 0a8a1bf17e3af34f1f8d2368916a6327f8b3bfd5 ]
+
+Until now, we assumed that in case of error when adding FDB entries, the
+write operation will fail, but this is not the case. Instead, we need to
+check that the number of entries reported in the response is equal to
+the number of entries specified in the request.
+
+Fixes: 56ade8fe3fe1 ("mlxsw: spectrum: Add initial support for Spectrum ASIC")
+Reported-by: Ido Schimmel <idosch@mellanox.com>
+Signed-off-by: Shalom Toledo <shalomt@mellanox.com>
+Reviewed-by: Ido Schimmel <idosch@mellanox.com>
+Signed-off-by: Jiri Pirko <jiri@mellanox.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c |   29 +++++++++++++--
+ 1 file changed, 27 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
++++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
+@@ -809,6 +809,7 @@ static int __mlxsw_sp_port_fdb_uc_op(str
+                                    bool dynamic)
+ {
+       char *sfd_pl;
++      u8 num_rec;
+       int err;
+       sfd_pl = kmalloc(MLXSW_REG_SFD_LEN, GFP_KERNEL);
+@@ -818,9 +819,16 @@ static int __mlxsw_sp_port_fdb_uc_op(str
+       mlxsw_reg_sfd_pack(sfd_pl, mlxsw_sp_sfd_op(adding), 0);
+       mlxsw_reg_sfd_uc_pack(sfd_pl, 0, mlxsw_sp_sfd_rec_policy(dynamic),
+                             mac, fid, action, local_port);
++      num_rec = mlxsw_reg_sfd_num_rec_get(sfd_pl);
+       err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfd), sfd_pl);
+-      kfree(sfd_pl);
++      if (err)
++              goto out;
++
++      if (num_rec != mlxsw_reg_sfd_num_rec_get(sfd_pl))
++              err = -EBUSY;
++out:
++      kfree(sfd_pl);
+       return err;
+ }
+@@ -845,6 +853,7 @@ static int mlxsw_sp_port_fdb_uc_lag_op(s
+                                      bool adding, bool dynamic)
+ {
+       char *sfd_pl;
++      u8 num_rec;
+       int err;
+       sfd_pl = kmalloc(MLXSW_REG_SFD_LEN, GFP_KERNEL);
+@@ -855,9 +864,16 @@ static int mlxsw_sp_port_fdb_uc_lag_op(s
+       mlxsw_reg_sfd_uc_lag_pack(sfd_pl, 0, mlxsw_sp_sfd_rec_policy(dynamic),
+                                 mac, fid, MLXSW_REG_SFD_REC_ACTION_NOP,
+                                 lag_vid, lag_id);
++      num_rec = mlxsw_reg_sfd_num_rec_get(sfd_pl);
+       err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfd), sfd_pl);
+-      kfree(sfd_pl);
++      if (err)
++              goto out;
++      if (num_rec != mlxsw_reg_sfd_num_rec_get(sfd_pl))
++              err = -EBUSY;
++
++out:
++      kfree(sfd_pl);
+       return err;
+ }
+@@ -891,6 +907,7 @@ static int mlxsw_sp_port_mdb_op(struct m
+                               u16 fid, u16 mid, bool adding)
+ {
+       char *sfd_pl;
++      u8 num_rec;
+       int err;
+       sfd_pl = kmalloc(MLXSW_REG_SFD_LEN, GFP_KERNEL);
+@@ -900,7 +917,15 @@ static int mlxsw_sp_port_mdb_op(struct m
+       mlxsw_reg_sfd_pack(sfd_pl, mlxsw_sp_sfd_op(adding), 0);
+       mlxsw_reg_sfd_mc_pack(sfd_pl, 0, addr, fid,
+                             MLXSW_REG_SFD_REC_ACTION_NOP, mid);
++      num_rec = mlxsw_reg_sfd_num_rec_get(sfd_pl);
+       err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfd), sfd_pl);
++      if (err)
++              goto out;
++
++      if (num_rec != mlxsw_reg_sfd_num_rec_get(sfd_pl))
++              err = -EBUSY;
++
++out:
+       kfree(sfd_pl);
+       return err;
+ }
diff --git a/queue-4.9/net-fix-race-on-decreasing-number-of-tx-queues.patch b/queue-4.9/net-fix-race-on-decreasing-number-of-tx-queues.patch
new file mode 100644 (file)
index 0000000..c3e5e4b
--- /dev/null
@@ -0,0 +1,68 @@
+From foo@baz Thu Mar  8 06:55:02 PST 2018
+From: Jakub Kicinski <jakub.kicinski@netronome.com>
+Date: Mon, 12 Feb 2018 21:35:31 -0800
+Subject: net: fix race on decreasing number of TX queues
+
+From: Jakub Kicinski <jakub.kicinski@netronome.com>
+
+
+[ Upstream commit ac5b70198adc25c73fba28de4f78adcee8f6be0b ]
+
+netif_set_real_num_tx_queues() can be called when netdev is up.
+That usually happens when user requests change of number of
+channels/rings with ethtool -L.  The procedure for changing
+the number of queues involves resetting the qdiscs and setting
+dev->num_tx_queues to the new value.  When the new value is
+lower than the old one, extra care has to be taken to ensure
+ordering of accesses to the number of queues vs qdisc reset.
+
+Currently the queues are reset before new dev->num_tx_queues
+is assigned, leaving a window of time where packets can be
+enqueued onto the queues going down, leading to a likely
+crash in the drivers, since most drivers don't check if TX
+skbs are assigned to an active queue.
+
+Fixes: e6484930d7c7 ("net: allocate tx queues in register_netdevice")
+Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/dev.c |   11 +++++++++--
+ 1 file changed, 9 insertions(+), 2 deletions(-)
+
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -2199,8 +2199,11 @@ EXPORT_SYMBOL(netif_set_xps_queue);
+  */
+ int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
+ {
++      bool disabling;
+       int rc;
++      disabling = txq < dev->real_num_tx_queues;
++
+       if (txq < 1 || txq > dev->num_tx_queues)
+               return -EINVAL;
+@@ -2216,15 +2219,19 @@ int netif_set_real_num_tx_queues(struct
+               if (dev->num_tc)
+                       netif_setup_tc(dev, txq);
+-              if (txq < dev->real_num_tx_queues) {
++              dev->real_num_tx_queues = txq;
++
++              if (disabling) {
++                      synchronize_net();
+                       qdisc_reset_all_tx_gt(dev, txq);
+ #ifdef CONFIG_XPS
+                       netif_reset_xps_queues_gt(dev, txq);
+ #endif
+               }
++      } else {
++              dev->real_num_tx_queues = txq;
+       }
+-      dev->real_num_tx_queues = txq;
+       return 0;
+ }
+ EXPORT_SYMBOL(netif_set_real_num_tx_queues);
diff --git a/queue-4.9/net-ipv4-don-t-allow-setting-net.ipv4.route.min_pmtu-below-68.patch b/queue-4.9/net-ipv4-don-t-allow-setting-net.ipv4.route.min_pmtu-below-68.patch
new file mode 100644 (file)
index 0000000..90db42f
--- /dev/null
@@ -0,0 +1,64 @@
+From foo@baz Thu Mar  8 06:55:02 PST 2018
+From: Sabrina Dubroca <sd@queasysnail.net>
+Date: Mon, 26 Feb 2018 16:13:43 +0100
+Subject: net: ipv4: don't allow setting net.ipv4.route.min_pmtu below 68
+
+From: Sabrina Dubroca <sd@queasysnail.net>
+
+
+[ Upstream commit c7272c2f1229125f74f22dcdd59de9bbd804f1c8 ]
+
+According to RFC 1191 sections 3 and 4, ICMP frag-needed messages
+indicating an MTU below 68 should be rejected:
+
+    A host MUST never reduce its estimate of the Path MTU below 68
+    octets.
+
+and (talking about ICMP frag-needed's Next-Hop MTU field):
+
+    This field will never contain a value less than 68, since every
+    router "must be able to forward a datagram of 68 octets without
+    fragmentation".
+
+Furthermore, by letting net.ipv4.route.min_pmtu be set to negative
+values, we can end up with a very large PMTU when (-1) is cast into u32.
+
+Let's also make ip_rt_min_pmtu a u32, since it's only ever compared to
+unsigned ints.
+
+Reported-by: Jianlin Shi <jishi@redhat.com>
+Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
+Reviewed-by: Stefano Brivio <sbrivio@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/route.c |    8 ++++++--
+ 1 file changed, 6 insertions(+), 2 deletions(-)
+
+--- a/net/ipv4/route.c
++++ b/net/ipv4/route.c
+@@ -126,10 +126,13 @@ static int ip_rt_redirect_silence __read
+ static int ip_rt_error_cost __read_mostly     = HZ;
+ static int ip_rt_error_burst __read_mostly    = 5 * HZ;
+ static int ip_rt_mtu_expires __read_mostly    = 10 * 60 * HZ;
+-static int ip_rt_min_pmtu __read_mostly               = 512 + 20 + 20;
++static u32 ip_rt_min_pmtu __read_mostly               = 512 + 20 + 20;
+ static int ip_rt_min_advmss __read_mostly     = 256;
+ static int ip_rt_gc_timeout __read_mostly     = RT_GC_TIMEOUT;
++
++static int ip_min_valid_pmtu __read_mostly    = IPV4_MIN_MTU;
++
+ /*
+  *    Interface to generic destination cache.
+  */
+@@ -2772,7 +2775,8 @@ static struct ctl_table ipv4_route_table
+               .data           = &ip_rt_min_pmtu,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+-              .proc_handler   = proc_dointvec,
++              .proc_handler   = proc_dointvec_minmax,
++              .extra1         = &ip_min_valid_pmtu,
+       },
+       {
+               .procname       = "min_adv_mss",
diff --git a/queue-4.9/net-phy-fix-phy_start-to-consider-phy_ignore_interrupt.patch b/queue-4.9/net-phy-fix-phy_start-to-consider-phy_ignore_interrupt.patch
new file mode 100644 (file)
index 0000000..6d52565
--- /dev/null
@@ -0,0 +1,34 @@
+From foo@baz Thu Mar  8 06:55:02 PST 2018
+From: Heiner Kallweit <hkallweit1@gmail.com>
+Date: Thu, 8 Feb 2018 21:01:48 +0100
+Subject: net: phy: fix phy_start to consider PHY_IGNORE_INTERRUPT
+
+From: Heiner Kallweit <hkallweit1@gmail.com>
+
+
+[ Upstream commit 08f5138512180a479ce6b9d23b825c9f4cd3be77 ]
+
+This condition wasn't adjusted when PHY_IGNORE_INTERRUPT (-2) was added
+long ago. In case of PHY_IGNORE_INTERRUPT the MAC interrupt indicates
+also PHY state changes and we should do what the symbol says.
+
+Fixes: 84a527a41f38 ("net: phylib: fix interrupts re-enablement in phy_start")
+Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
+Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/phy/phy.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/phy/phy.c
++++ b/drivers/net/phy/phy.c
+@@ -925,7 +925,7 @@ void phy_start(struct phy_device *phydev
+               break;
+       case PHY_HALTED:
+               /* make sure interrupts are re-enabled for the PHY */
+-              if (phydev->irq != PHY_POLL) {
++              if (phy_interrupt_is_valid(phydev)) {
+                       err = phy_enable_interrupts(phydev);
+                       if (err < 0)
+                               break;
diff --git a/queue-4.9/netlink-ensure-to-loop-over-all-netns-in-genlmsg_multicast_allns.patch b/queue-4.9/netlink-ensure-to-loop-over-all-netns-in-genlmsg_multicast_allns.patch
new file mode 100644 (file)
index 0000000..323f11e
--- /dev/null
@@ -0,0 +1,62 @@
+From foo@baz Thu Mar  8 06:55:02 PST 2018
+From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
+Date: Tue, 6 Feb 2018 14:48:32 +0100
+Subject: netlink: ensure to loop over all netns in genlmsg_multicast_allns()
+
+From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
+
+
+[ Upstream commit cb9f7a9a5c96a773bbc9c70660dc600cfff82f82 ]
+
+Nowadays, nlmsg_multicast() returns only 0 or -ESRCH but this was not the
+case when commit 134e63756d5f was pushed.
+However, there was no reason to stop the loop if a netns does not have
+listeners.
+Returns -ESRCH only if there was no listeners in all netns.
+
+To avoid having the same problem in the future, I didn't take the
+assumption that nlmsg_multicast() returns only 0 or -ESRCH.
+
+Fixes: 134e63756d5f ("genetlink: make netns aware")
+CC: Johannes Berg <johannes.berg@intel.com>
+Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/netlink/genetlink.c |   12 ++++++++++--
+ 1 file changed, 10 insertions(+), 2 deletions(-)
+
+--- a/net/netlink/genetlink.c
++++ b/net/netlink/genetlink.c
+@@ -1103,6 +1103,7 @@ static int genlmsg_mcast(struct sk_buff
+ {
+       struct sk_buff *tmp;
+       struct net *net, *prev = NULL;
++      bool delivered = false;
+       int err;
+       for_each_net_rcu(net) {
+@@ -1114,14 +1115,21 @@ static int genlmsg_mcast(struct sk_buff
+                       }
+                       err = nlmsg_multicast(prev->genl_sock, tmp,
+                                             portid, group, flags);
+-                      if (err)
++                      if (!err)
++                              delivered = true;
++                      else if (err != -ESRCH)
+                               goto error;
+               }
+               prev = net;
+       }
+-      return nlmsg_multicast(prev->genl_sock, skb, portid, group, flags);
++      err = nlmsg_multicast(prev->genl_sock, skb, portid, group, flags);
++      if (!err)
++              delivered = true;
++      else if (err != -ESRCH)
++              goto error;
++      return delivered ? 0 : -ESRCH;
+  error:
+       kfree_skb(skb);
+       return err;
diff --git a/queue-4.9/ppp-prevent-unregistered-channels-from-connecting-to-ppp-units.patch b/queue-4.9/ppp-prevent-unregistered-channels-from-connecting-to-ppp-units.patch
new file mode 100644 (file)
index 0000000..607d279
--- /dev/null
@@ -0,0 +1,60 @@
+From foo@baz Thu Mar  8 06:55:02 PST 2018
+From: Guillaume Nault <g.nault@alphalink.fr>
+Date: Fri, 2 Mar 2018 18:41:16 +0100
+Subject: ppp: prevent unregistered channels from connecting to PPP units
+
+From: Guillaume Nault <g.nault@alphalink.fr>
+
+
+[ Upstream commit 77f840e3e5f09c6d7d727e85e6e08276dd813d11 ]
+
+PPP units don't hold any reference on the channels connected to it.
+It is the channel's responsibility to ensure that it disconnects from
+its unit before being destroyed.
+In practice, this is ensured by ppp_unregister_channel() disconnecting
+the channel from the unit before dropping a reference on the channel.
+
+However, it is possible for an unregistered channel to connect to a PPP
+unit: register a channel with ppp_register_net_channel(), attach a
+/dev/ppp file to it with ioctl(PPPIOCATTCHAN), unregister the channel
+with ppp_unregister_channel() and finally connect the /dev/ppp file to
+a PPP unit with ioctl(PPPIOCCONNECT).
+
+Once in this situation, the channel is only held by the /dev/ppp file,
+which can be released at anytime and free the channel without letting
+the parent PPP unit know. Then the ppp structure ends up with dangling
+pointers in its ->channels list.
+
+Prevent this scenario by forbidding unregistered channels from
+connecting to PPP units. This maintains the code logic by keeping
+ppp_unregister_channel() responsible from disconnecting the channel if
+necessary and avoids modification on the reference counting mechanism.
+
+This issue seems to predate git history (successfully reproduced on
+Linux 2.6.26 and earlier PPP commits are unrelated).
+
+Signed-off-by: Guillaume Nault <g.nault@alphalink.fr>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ppp/ppp_generic.c |    9 +++++++++
+ 1 file changed, 9 insertions(+)
+
+--- a/drivers/net/ppp/ppp_generic.c
++++ b/drivers/net/ppp/ppp_generic.c
+@@ -3157,6 +3157,15 @@ ppp_connect_channel(struct channel *pch,
+               goto outl;
+       ppp_lock(ppp);
++      spin_lock_bh(&pch->downl);
++      if (!pch->chan) {
++              /* Don't connect unregistered channels */
++              spin_unlock_bh(&pch->downl);
++              ppp_unlock(ppp);
++              ret = -ENOTCONN;
++              goto outl;
++      }
++      spin_unlock_bh(&pch->downl);
+       if (pch->file.hdrlen > ppp->file.hdrlen)
+               ppp->file.hdrlen = pch->file.hdrlen;
+       hdrlen = pch->file.hdrlen + 2;  /* for protocol bytes */
diff --git a/queue-4.9/rxrpc-fix-send-in-rxrpc_send_data_packet.patch b/queue-4.9/rxrpc-fix-send-in-rxrpc_send_data_packet.patch
new file mode 100644 (file)
index 0000000..c43221e
--- /dev/null
@@ -0,0 +1,37 @@
+From foo@baz Thu Mar  8 06:55:02 PST 2018
+From: David Howells <dhowells@redhat.com>
+Date: Thu, 22 Feb 2018 14:38:14 +0000
+Subject: rxrpc: Fix send in rxrpc_send_data_packet()
+
+From: David Howells <dhowells@redhat.com>
+
+
+[ Upstream commit 93c62c45ed5fad1b87e3a45835b251cd68de9c46 ]
+
+All the kernel_sendmsg() calls in rxrpc_send_data_packet() need to send
+both parts of the iov[] buffer, but one of them does not.  Fix it so that
+it does.
+
+Without this, short IPv6 rxrpc DATA packets may be seen that have the rxrpc
+header included, but no payload.
+
+Fixes: 5a924b8951f8 ("rxrpc: Don't store the rxrpc header in the Tx queue sk_buffs")
+Reported-by: Marc Dionne <marc.dionne@auristor.com>
+Signed-off-by: David Howells <dhowells@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/rxrpc/output.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/rxrpc/output.c
++++ b/net/rxrpc/output.c
+@@ -391,7 +391,7 @@ send_fragmentable:
+                                       (char *)&opt, sizeof(opt));
+               if (ret == 0) {
+                       ret = kernel_sendmsg(conn->params.local->socket, &msg,
+-                                           iov, 1, iov[0].iov_len);
++                                           iov, 2, len);
+                       opt = IPV6_PMTUDISC_DO;
+                       kernel_setsockopt(conn->params.local->socket,
diff --git a/queue-4.9/s390-qeth-fix-double-free-on-ip-add-remove-race.patch b/queue-4.9/s390-qeth-fix-double-free-on-ip-add-remove-race.patch
new file mode 100644 (file)
index 0000000..447a16a
--- /dev/null
@@ -0,0 +1,47 @@
+From foo@baz Thu Mar  8 06:55:02 PST 2018
+From: Julian Wiedmann <jwi@linux.vnet.ibm.com>
+Date: Tue, 27 Feb 2018 18:58:14 +0100
+Subject: s390/qeth: fix double-free on IP add/remove race
+
+From: Julian Wiedmann <jwi@linux.vnet.ibm.com>
+
+
+[ Upstream commit 14d066c3531a87f727968cacd85bd95c75f59843 ]
+
+Registering an IPv4 address with the HW takes quite a while, so we
+temporarily drop the ip_htable lock. Any concurrent add/remove of the
+same IP adjusts the IP's use count, and (on remove) is then blocked by
+addr->in_progress.
+After the register call has completed, we check the use count for
+concurrently attempted add/remove calls - and possibly straight-away
+deregister the IP again. This happens via l3_delete_ip(), which
+1) looks up the queried IP in the htable (getting a reference to the
+   *same* queried object),
+2) deregisters the IP from the HW, and
+3) frees the IP object.
+
+The caller in l3_add_ip() then does a second free on the same object.
+
+For this case, skip all the extra checks and lookups in l3_delete_ip()
+and just deregister & free the IP object ourselves.
+
+Fixes: 5f78e29ceebf ("qeth: optimize IP handling in rx_mode callback")
+Signed-off-by: Julian Wiedmann <jwi@linux.vnet.ibm.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/s390/net/qeth_l3_main.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/drivers/s390/net/qeth_l3_main.c
++++ b/drivers/s390/net/qeth_l3_main.c
+@@ -323,7 +323,8 @@ int qeth_l3_add_ip(struct qeth_card *car
+                               (rc == IPA_RC_LAN_OFFLINE)) {
+                       addr->disp_flag = QETH_DISP_ADDR_DO_NOTHING;
+                       if (addr->ref_counter < 1) {
+-                              qeth_l3_delete_ip(card, addr);
++                              qeth_l3_deregister_addr_entry(card, addr);
++                              hash_del(&addr->hnode);
+                               kfree(addr);
+                       }
+               } else {
diff --git a/queue-4.9/s390-qeth-fix-ip-address-lookup-for-l3-devices.patch b/queue-4.9/s390-qeth-fix-ip-address-lookup-for-l3-devices.patch
new file mode 100644 (file)
index 0000000..21f122d
--- /dev/null
@@ -0,0 +1,255 @@
+From foo@baz Thu Mar  8 06:55:02 PST 2018
+From: Julian Wiedmann <jwi@linux.vnet.ibm.com>
+Date: Tue, 27 Feb 2018 18:58:16 +0100
+Subject: s390/qeth: fix IP address lookup for L3 devices
+
+From: Julian Wiedmann <jwi@linux.vnet.ibm.com>
+
+
+[ Upstream commit c5c48c58b259bb8f0482398370ee539d7a12df3e ]
+
+Current code ("qeth_l3_ip_from_hash()") matches a queried address object
+against objects in the IP table by IP address, Mask/Prefix Length and
+MAC address ("qeth_l3_ipaddrs_is_equal()"). But what callers actually
+require is either
+a) "is this IP address registered" (ie. match by IP address only),
+before adding a new address.
+b) or "is this address object registered" (ie. match all relevant
+   attributes), before deleting an address.
+
+Right now
+1. the ADD path is too strict in its lookup, and eg. doesn't detect
+conflicts between an existing NORMAL address and a new VIPA address
+(because the NORMAL address will have mask != 0, while VIPA has
+a mask == 0),
+2. the DELETE path is not strict enough, and eg. allows del_rxip() to
+delete a VIPA address as long as the IP address matches.
+
+Fix all this by adding helpers (_addr_match_ip() and _addr_match_all())
+that do the appropriate checking.
+
+Note that the ADD path for NORMAL addresses is special, as qeth keeps
+track of how many times such an address is in use (and there is no
+immediate way of returning errors to the caller). So when a requested
+NORMAL address _fully_ matches an existing one, it's not considered a
+conflict and we merely increment the refcount.
+
+Fixes: 5f78e29ceebf ("qeth: optimize IP handling in rx_mode callback")
+Signed-off-by: Julian Wiedmann <jwi@linux.vnet.ibm.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/s390/net/qeth_l3.h      |   34 ++++++++++++++
+ drivers/s390/net/qeth_l3_main.c |   91 ++++++++++++++++++----------------------
+ 2 files changed, 74 insertions(+), 51 deletions(-)
+
+--- a/drivers/s390/net/qeth_l3.h
++++ b/drivers/s390/net/qeth_l3.h
+@@ -39,8 +39,40 @@ struct qeth_ipaddr {
+                       unsigned int pfxlen;
+               } a6;
+       } u;
+-
+ };
++
++static inline bool qeth_l3_addr_match_ip(struct qeth_ipaddr *a1,
++                                       struct qeth_ipaddr *a2)
++{
++      if (a1->proto != a2->proto)
++              return false;
++      if (a1->proto == QETH_PROT_IPV6)
++              return ipv6_addr_equal(&a1->u.a6.addr, &a2->u.a6.addr);
++      return a1->u.a4.addr == a2->u.a4.addr;
++}
++
++static inline bool qeth_l3_addr_match_all(struct qeth_ipaddr *a1,
++                                        struct qeth_ipaddr *a2)
++{
++      /* Assumes that the pair was obtained via qeth_l3_addr_find_by_ip(),
++       * so 'proto' and 'addr' match for sure.
++       *
++       * For ucast:
++       * -    'mac' is always 0.
++       * -    'mask'/'pfxlen' for RXIP/VIPA is always 0. For NORMAL, matching
++       *      values are required to avoid mixups in takeover eligibility.
++       *
++       * For mcast,
++       * -    'mac' is mapped from the IP, and thus always matches.
++       * -    'mask'/'pfxlen' is always 0.
++       */
++      if (a1->type != a2->type)
++              return false;
++      if (a1->proto == QETH_PROT_IPV6)
++              return a1->u.a6.pfxlen == a2->u.a6.pfxlen;
++      return a1->u.a4.mask == a2->u.a4.mask;
++}
++
+ static inline  u64 qeth_l3_ipaddr_hash(struct qeth_ipaddr *addr)
+ {
+       u64  ret = 0;
+--- a/drivers/s390/net/qeth_l3_main.c
++++ b/drivers/s390/net/qeth_l3_main.c
+@@ -154,6 +154,24 @@ int qeth_l3_string_to_ipaddr(const char
+               return -EINVAL;
+ }
++static struct qeth_ipaddr *qeth_l3_find_addr_by_ip(struct qeth_card *card,
++                                                 struct qeth_ipaddr *query)
++{
++      u64 key = qeth_l3_ipaddr_hash(query);
++      struct qeth_ipaddr *addr;
++
++      if (query->is_multicast) {
++              hash_for_each_possible(card->ip_mc_htable, addr, hnode, key)
++                      if (qeth_l3_addr_match_ip(addr, query))
++                              return addr;
++      } else {
++              hash_for_each_possible(card->ip_htable,  addr, hnode, key)
++                      if (qeth_l3_addr_match_ip(addr, query))
++                              return addr;
++      }
++      return NULL;
++}
++
+ static void qeth_l3_convert_addr_to_bits(u8 *addr, u8 *bits, int len)
+ {
+       int i, j;
+@@ -207,34 +225,6 @@ static bool qeth_l3_is_addr_covered_by_i
+       return rc;
+ }
+-inline int
+-qeth_l3_ipaddrs_is_equal(struct qeth_ipaddr *addr1, struct qeth_ipaddr *addr2)
+-{
+-      return addr1->proto == addr2->proto &&
+-              !memcmp(&addr1->u, &addr2->u, sizeof(addr1->u))  &&
+-              !memcmp(&addr1->mac, &addr2->mac, sizeof(addr1->mac));
+-}
+-
+-static struct qeth_ipaddr *
+-qeth_l3_ip_from_hash(struct qeth_card *card, struct qeth_ipaddr *tmp_addr)
+-{
+-      struct qeth_ipaddr *addr;
+-
+-      if (tmp_addr->is_multicast) {
+-              hash_for_each_possible(card->ip_mc_htable,  addr,
+-                              hnode, qeth_l3_ipaddr_hash(tmp_addr))
+-                      if (qeth_l3_ipaddrs_is_equal(tmp_addr, addr))
+-                              return addr;
+-      } else {
+-              hash_for_each_possible(card->ip_htable,  addr,
+-                              hnode, qeth_l3_ipaddr_hash(tmp_addr))
+-                      if (qeth_l3_ipaddrs_is_equal(tmp_addr, addr))
+-                              return addr;
+-      }
+-
+-      return NULL;
+-}
+-
+ int qeth_l3_delete_ip(struct qeth_card *card, struct qeth_ipaddr *tmp_addr)
+ {
+       int rc = 0;
+@@ -249,8 +239,8 @@ int qeth_l3_delete_ip(struct qeth_card *
+               QETH_CARD_HEX(card, 4, ((char *)&tmp_addr->u.a6.addr) + 8, 8);
+       }
+-      addr = qeth_l3_ip_from_hash(card, tmp_addr);
+-      if (!addr)
++      addr = qeth_l3_find_addr_by_ip(card, tmp_addr);
++      if (!addr || !qeth_l3_addr_match_all(addr, tmp_addr))
+               return -ENOENT;
+       addr->ref_counter--;
+@@ -272,6 +262,7 @@ int qeth_l3_add_ip(struct qeth_card *car
+ {
+       int rc = 0;
+       struct qeth_ipaddr *addr;
++      char buf[40];
+       QETH_CARD_TEXT(card, 4, "addip");
+@@ -282,8 +273,20 @@ int qeth_l3_add_ip(struct qeth_card *car
+               QETH_CARD_HEX(card, 4, ((char *)&tmp_addr->u.a6.addr) + 8, 8);
+       }
+-      addr = qeth_l3_ip_from_hash(card, tmp_addr);
+-      if (!addr) {
++      addr = qeth_l3_find_addr_by_ip(card, tmp_addr);
++      if (addr) {
++              if (tmp_addr->type != QETH_IP_TYPE_NORMAL)
++                      return -EADDRINUSE;
++              if (qeth_l3_addr_match_all(addr, tmp_addr)) {
++                      addr->ref_counter++;
++                      return 0;
++              }
++              qeth_l3_ipaddr_to_string(tmp_addr->proto, (u8 *)&tmp_addr->u,
++                                       buf);
++              dev_warn(&card->gdev->dev,
++                       "Registering IP address %s failed\n", buf);
++              return -EADDRINUSE;
++      } else {
+               addr = qeth_l3_get_addr_buffer(tmp_addr->proto);
+               if (!addr)
+                       return -ENOMEM;
+@@ -331,11 +334,7 @@ int qeth_l3_add_ip(struct qeth_card *car
+                       hash_del(&addr->hnode);
+                       kfree(addr);
+               }
+-      } else {
+-                      if (addr->type == QETH_IP_TYPE_NORMAL)
+-                              addr->ref_counter++;
+       }
+-
+       return rc;
+ }
+@@ -719,12 +718,7 @@ int qeth_l3_add_vipa(struct qeth_card *c
+               return -ENOMEM;
+       spin_lock_bh(&card->ip_lock);
+-
+-      if (qeth_l3_ip_from_hash(card, ipaddr))
+-              rc = -EEXIST;
+-      else
+-              qeth_l3_add_ip(card, ipaddr);
+-
++      rc = qeth_l3_add_ip(card, ipaddr);
+       spin_unlock_bh(&card->ip_lock);
+       kfree(ipaddr);
+@@ -787,12 +781,7 @@ int qeth_l3_add_rxip(struct qeth_card *c
+               return -ENOMEM;
+       spin_lock_bh(&card->ip_lock);
+-
+-      if (qeth_l3_ip_from_hash(card, ipaddr))
+-              rc = -EEXIST;
+-      else
+-              qeth_l3_add_ip(card, ipaddr);
+-
++      rc = qeth_l3_add_ip(card, ipaddr);
+       spin_unlock_bh(&card->ip_lock);
+       kfree(ipaddr);
+@@ -1437,8 +1426,9 @@ qeth_l3_add_mc_to_hash(struct qeth_card
+               memcpy(tmp->mac, buf, sizeof(tmp->mac));
+               tmp->is_multicast = 1;
+-              ipm = qeth_l3_ip_from_hash(card, tmp);
++              ipm = qeth_l3_find_addr_by_ip(card, tmp);
+               if (ipm) {
++                      /* for mcast, by-IP match means full match */
+                       ipm->disp_flag = QETH_DISP_ADDR_DO_NOTHING;
+               } else {
+                       ipm = qeth_l3_get_addr_buffer(QETH_PROT_IPV4);
+@@ -1521,8 +1511,9 @@ qeth_l3_add_mc6_to_hash(struct qeth_card
+                      sizeof(struct in6_addr));
+               tmp->is_multicast = 1;
+-              ipm = qeth_l3_ip_from_hash(card, tmp);
++              ipm = qeth_l3_find_addr_by_ip(card, tmp);
+               if (ipm) {
++                      /* for mcast, by-IP match means full match */
+                       ipm->disp_flag = QETH_DISP_ADDR_DO_NOTHING;
+                       continue;
+               }
diff --git a/queue-4.9/s390-qeth-fix-ip-removal-on-offline-cards.patch b/queue-4.9/s390-qeth-fix-ip-removal-on-offline-cards.patch
new file mode 100644 (file)
index 0000000..edec5c6
--- /dev/null
@@ -0,0 +1,58 @@
+From foo@baz Thu Mar  8 06:55:02 PST 2018
+From: Julian Wiedmann <jwi@linux.vnet.ibm.com>
+Date: Tue, 27 Feb 2018 18:58:13 +0100
+Subject: s390/qeth: fix IP removal on offline cards
+
+From: Julian Wiedmann <jwi@linux.vnet.ibm.com>
+
+
+[ Upstream commit 98d823ab1fbdcb13abc25b420f9bb71bade42056 ]
+
+If the HW is not reachable, then none of the IPs in qeth's internal
+table has been registered with the HW yet. So when deleting such an IP,
+there's no need to stage it for deregistration - just drop it from
+the table.
+
+This fixes the "add-delete-add" scenario on an offline card, where the
+the second "add" merely increments the IP's use count. But as the IP is
+still set to DISP_ADDR_DELETE from the previous "delete" step,
+l3_recover_ip() won't register it with the HW when the card goes online.
+
+Fixes: 5f78e29ceebf ("qeth: optimize IP handling in rx_mode callback")
+Signed-off-by: Julian Wiedmann <jwi@linux.vnet.ibm.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/s390/net/qeth_l3_main.c |   14 +++-----------
+ 1 file changed, 3 insertions(+), 11 deletions(-)
+
+--- a/drivers/s390/net/qeth_l3_main.c
++++ b/drivers/s390/net/qeth_l3_main.c
+@@ -259,12 +259,8 @@ int qeth_l3_delete_ip(struct qeth_card *
+       if (addr->in_progress)
+               return -EINPROGRESS;
+-      if (!qeth_card_hw_is_reachable(card)) {
+-              addr->disp_flag = QETH_DISP_ADDR_DELETE;
+-              return 0;
+-      }
+-
+-      rc = qeth_l3_deregister_addr_entry(card, addr);
++      if (qeth_card_hw_is_reachable(card))
++              rc = qeth_l3_deregister_addr_entry(card, addr);
+       hash_del(&addr->hnode);
+       kfree(addr);
+@@ -406,11 +402,7 @@ static void qeth_l3_recover_ip(struct qe
+       spin_lock_bh(&card->ip_lock);
+       hash_for_each_safe(card->ip_htable, i, tmp, addr, hnode) {
+-              if (addr->disp_flag == QETH_DISP_ADDR_DELETE) {
+-                      qeth_l3_deregister_addr_entry(card, addr);
+-                      hash_del(&addr->hnode);
+-                      kfree(addr);
+-              } else if (addr->disp_flag == QETH_DISP_ADDR_ADD) {
++              if (addr->disp_flag == QETH_DISP_ADDR_ADD) {
+                       if (addr->proto == QETH_PROT_IPV4) {
+                               addr->in_progress = 1;
+                               spin_unlock_bh(&card->ip_lock);
diff --git a/queue-4.9/s390-qeth-fix-ipa-command-submission-race.patch b/queue-4.9/s390-qeth-fix-ipa-command-submission-race.patch
new file mode 100644 (file)
index 0000000..f129d17
--- /dev/null
@@ -0,0 +1,84 @@
+From foo@baz Thu Mar  8 06:55:02 PST 2018
+From: Julian Wiedmann <jwi@linux.vnet.ibm.com>
+Date: Tue, 27 Feb 2018 18:58:17 +0100
+Subject: s390/qeth: fix IPA command submission race
+
+From: Julian Wiedmann <jwi@linux.vnet.ibm.com>
+
+
+[ Upstream commit d22ffb5a712f9211ffd104c38fc17cbfb1b5e2b0 ]
+
+If multiple IPA commands are build & sent out concurrently,
+fill_ipacmd_header() may assign a seqno value to a command that's
+different from what send_control_data() later assigns to this command's
+reply.
+This is due to other commands passing through send_control_data(),
+and incrementing card->seqno.ipa along the way.
+
+So one IPA command has no reply that's waiting for its seqno, while some
+other IPA command has multiple reply objects waiting for it.
+Only one of those waiting replies wins, and the other(s) times out and
+triggers a recovery via send_ipa_cmd().
+
+Fix this by making sure that the same seqno value is assigned to
+a command and its reply object.
+Do so immediately before submitting the command & while holding the
+irq_pending "lock", to produce nicely ascending seqnos.
+
+As a side effect, *all* IPA commands now use a reply object that's
+waiting for its actual seqno. Previously, early IPA commands that were
+submitted while the card was still DOWN used the "catch-all" IDX seqno.
+
+Signed-off-by: Julian Wiedmann <jwi@linux.vnet.ibm.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/s390/net/qeth_core_main.c |   19 ++++++++++---------
+ 1 file changed, 10 insertions(+), 9 deletions(-)
+
+--- a/drivers/s390/net/qeth_core_main.c
++++ b/drivers/s390/net/qeth_core_main.c
+@@ -2064,25 +2064,26 @@ int qeth_send_control_data(struct qeth_c
+       }
+       reply->callback = reply_cb;
+       reply->param = reply_param;
+-      if (card->state == CARD_STATE_DOWN)
+-              reply->seqno = QETH_IDX_COMMAND_SEQNO;
+-      else
+-              reply->seqno = card->seqno.ipa++;
++
+       init_waitqueue_head(&reply->wait_q);
+-      spin_lock_irqsave(&card->lock, flags);
+-      list_add_tail(&reply->list, &card->cmd_waiter_list);
+-      spin_unlock_irqrestore(&card->lock, flags);
+       QETH_DBF_HEX(CTRL, 2, iob->data, QETH_DBF_CTRL_LEN);
+       while (atomic_cmpxchg(&card->write.irq_pending, 0, 1)) ;
+-      qeth_prepare_control_data(card, len, iob);
+       if (IS_IPA(iob->data)) {
+               cmd = __ipa_cmd(iob);
++              cmd->hdr.seqno = card->seqno.ipa++;
++              reply->seqno = cmd->hdr.seqno;
+               event_timeout = QETH_IPA_TIMEOUT;
+       } else {
++              reply->seqno = QETH_IDX_COMMAND_SEQNO;
+               event_timeout = QETH_TIMEOUT;
+       }
++      qeth_prepare_control_data(card, len, iob);
++
++      spin_lock_irqsave(&card->lock, flags);
++      list_add_tail(&reply->list, &card->cmd_waiter_list);
++      spin_unlock_irqrestore(&card->lock, flags);
+       timeout = jiffies + event_timeout;
+@@ -2873,7 +2874,7 @@ static void qeth_fill_ipacmd_header(stru
+       memset(cmd, 0, sizeof(struct qeth_ipa_cmd));
+       cmd->hdr.command = command;
+       cmd->hdr.initiator = IPA_CMD_INITIATOR_HOST;
+-      cmd->hdr.seqno = card->seqno.ipa;
++      /* cmd->hdr.seqno is set by qeth_send_control_data() */
+       cmd->hdr.adapter_type = qeth_get_ipa_adp_type(card->info.link_type);
+       cmd->hdr.rel_adapter_no = (__u8) card->info.portno;
+       if (card->options.layer2)
diff --git a/queue-4.9/s390-qeth-fix-overestimated-count-of-buffer-elements.patch b/queue-4.9/s390-qeth-fix-overestimated-count-of-buffer-elements.patch
new file mode 100644 (file)
index 0000000..d661461
--- /dev/null
@@ -0,0 +1,74 @@
+From foo@baz Thu Mar  8 06:55:02 PST 2018
+From: Julian Wiedmann <jwi@linux.vnet.ibm.com>
+Date: Tue, 27 Feb 2018 18:58:12 +0100
+Subject: s390/qeth: fix overestimated count of buffer elements
+
+From: Julian Wiedmann <jwi@linux.vnet.ibm.com>
+
+
+[ Upstream commit 12472af89632beb1ed8dea29d4efe208ca05b06a ]
+
+qeth_get_elements_for_range() doesn't know how to handle a 0-length
+range (ie. start == end), and returns 1 when it should return 0.
+Such ranges occur on TSO skbs, where the L2/L3/L4 headers (and thus all
+of the skb's linear data) are skipped when mapping the skb into regular
+buffer elements.
+
+This overestimation may cause several performance-related issues:
+1. sub-optimal IO buffer selection, where the next buffer gets selected
+   even though the skb would actually still fit into the current buffer.
+2. forced linearization, if the element count for a non-linear skb
+   exceeds QETH_MAX_BUFFER_ELEMENTS.
+
+Rather than modifying qeth_get_elements_for_range() and adding overhead
+to every caller, fix up those callers that are in risk of passing a
+0-length range.
+
+Fixes: 2863c61334aa ("qeth: refactor calculation of SBALE count")
+Signed-off-by: Julian Wiedmann <jwi@linux.vnet.ibm.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/s390/net/qeth_core_main.c |   10 ++++++----
+ drivers/s390/net/qeth_l3_main.c   |   11 ++++++-----
+ 2 files changed, 12 insertions(+), 9 deletions(-)
+
+--- a/drivers/s390/net/qeth_core_main.c
++++ b/drivers/s390/net/qeth_core_main.c
+@@ -3854,10 +3854,12 @@ EXPORT_SYMBOL_GPL(qeth_get_elements_for_
+ int qeth_get_elements_no(struct qeth_card *card,
+                    struct sk_buff *skb, int extra_elems, int data_offset)
+ {
+-      int elements = qeth_get_elements_for_range(
+-                              (addr_t)skb->data + data_offset,
+-                              (addr_t)skb->data + skb_headlen(skb)) +
+-                      qeth_get_elements_for_frags(skb);
++      addr_t end = (addr_t)skb->data + skb_headlen(skb);
++      int elements = qeth_get_elements_for_frags(skb);
++      addr_t start = (addr_t)skb->data + data_offset;
++
++      if (start != end)
++              elements += qeth_get_elements_for_range(start, end);
+       if ((elements + extra_elems) > QETH_MAX_BUFFER_ELEMENTS(card)) {
+               QETH_DBF_MESSAGE(2, "Invalid size of IP packet "
+--- a/drivers/s390/net/qeth_l3_main.c
++++ b/drivers/s390/net/qeth_l3_main.c
+@@ -2784,11 +2784,12 @@ static void qeth_tso_fill_header(struct
+ static int qeth_l3_get_elements_no_tso(struct qeth_card *card,
+                       struct sk_buff *skb, int extra_elems)
+ {
+-      addr_t tcpdptr = (addr_t)tcp_hdr(skb) + tcp_hdrlen(skb);
+-      int elements = qeth_get_elements_for_range(
+-                              tcpdptr,
+-                              (addr_t)skb->data + skb_headlen(skb)) +
+-                              qeth_get_elements_for_frags(skb);
++      addr_t start = (addr_t)tcp_hdr(skb) + tcp_hdrlen(skb);
++      addr_t end = (addr_t)skb->data + skb_headlen(skb);
++      int elements = qeth_get_elements_for_frags(skb);
++
++      if (start != end)
++              elements += qeth_get_elements_for_range(start, end);
+       if ((elements + extra_elems) > QETH_MAX_BUFFER_ELEMENTS(card)) {
+               QETH_DBF_MESSAGE(2,
diff --git a/queue-4.9/s390-qeth-fix-setip-command-handling.patch b/queue-4.9/s390-qeth-fix-setip-command-handling.patch
new file mode 100644 (file)
index 0000000..44c8957
--- /dev/null
@@ -0,0 +1,76 @@
+From foo@baz Thu Mar  8 06:55:02 PST 2018
+From: Julian Wiedmann <jwi@linux.vnet.ibm.com>
+Date: Fri, 9 Feb 2018 11:03:50 +0100
+Subject: s390/qeth: fix SETIP command handling
+
+From: Julian Wiedmann <jwi@linux.vnet.ibm.com>
+
+
+[ Upstream commit 1c5b2216fbb973a9410e0b06389740b5c1289171 ]
+
+send_control_data() applies some special handling to SETIP v4 IPA
+commands. But current code parses *all* command types for the SETIP
+command code. Limit the command code check to IPA commands.
+
+Fixes: 5b54e16f1a54 ("qeth: do not spin for SETIP ip assist command")
+Signed-off-by: Julian Wiedmann <jwi@linux.vnet.ibm.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/s390/net/qeth_core.h      |    5 +++++
+ drivers/s390/net/qeth_core_main.c |   14 ++++++++------
+ 2 files changed, 13 insertions(+), 6 deletions(-)
+
+--- a/drivers/s390/net/qeth_core.h
++++ b/drivers/s390/net/qeth_core.h
+@@ -592,6 +592,11 @@ struct qeth_cmd_buffer {
+       void (*callback) (struct qeth_channel *, struct qeth_cmd_buffer *);
+ };
++static inline struct qeth_ipa_cmd *__ipa_cmd(struct qeth_cmd_buffer *iob)
++{
++      return (struct qeth_ipa_cmd *)(iob->data + IPA_PDU_HEADER_SIZE);
++}
++
+ /**
+  * definition of a qeth channel, used for read and write
+  */
+--- a/drivers/s390/net/qeth_core_main.c
++++ b/drivers/s390/net/qeth_core_main.c
+@@ -2050,7 +2050,7 @@ int qeth_send_control_data(struct qeth_c
+       unsigned long flags;
+       struct qeth_reply *reply = NULL;
+       unsigned long timeout, event_timeout;
+-      struct qeth_ipa_cmd *cmd;
++      struct qeth_ipa_cmd *cmd = NULL;
+       QETH_CARD_TEXT(card, 2, "sendctl");
+@@ -2077,10 +2077,13 @@ int qeth_send_control_data(struct qeth_c
+       while (atomic_cmpxchg(&card->write.irq_pending, 0, 1)) ;
+       qeth_prepare_control_data(card, len, iob);
+-      if (IS_IPA(iob->data))
++      if (IS_IPA(iob->data)) {
++              cmd = __ipa_cmd(iob);
+               event_timeout = QETH_IPA_TIMEOUT;
+-      else
++      } else {
+               event_timeout = QETH_TIMEOUT;
++      }
++
+       timeout = jiffies + event_timeout;
+       QETH_CARD_TEXT(card, 6, "noirqpnd");
+@@ -2105,9 +2108,8 @@ int qeth_send_control_data(struct qeth_c
+       /* we have only one long running ipassist, since we can ensure
+          process context of this command we can sleep */
+-      cmd = (struct qeth_ipa_cmd *)(iob->data+IPA_PDU_HEADER_SIZE);
+-      if ((cmd->hdr.command == IPA_CMD_SETIP) &&
+-          (cmd->hdr.prot_version == QETH_PROT_IPV4)) {
++      if (cmd && cmd->hdr.command == IPA_CMD_SETIP &&
++          cmd->hdr.prot_version == QETH_PROT_IPV4) {
+               if (!wait_event_timeout(reply->wait_q,
+                   atomic_read(&reply->received), event_timeout))
+                       goto time_err;
diff --git a/queue-4.9/s390-qeth-fix-underestimated-count-of-buffer-elements.patch b/queue-4.9/s390-qeth-fix-underestimated-count-of-buffer-elements.patch
new file mode 100644 (file)
index 0000000..4a4b76c
--- /dev/null
@@ -0,0 +1,40 @@
+From foo@baz Thu Mar  8 06:55:02 PST 2018
+From: Ursula Braun <ubraun@linux.vnet.ibm.com>
+Date: Fri, 9 Feb 2018 11:03:49 +0100
+Subject: s390/qeth: fix underestimated count of buffer elements
+
+From: Ursula Braun <ubraun@linux.vnet.ibm.com>
+
+
+[ Upstream commit 89271c65edd599207dd982007900506283c90ae3 ]
+
+For a memory range/skb where the last byte falls onto a page boundary
+(ie. 'end' is of the form xxx...xxx001), the PFN_UP() part of the
+calculation currently doesn't round up to the next PFN due to an
+off-by-one error.
+Thus qeth believes that the skb occupies one page less than it
+actually does, and may select a IO buffer that doesn't have enough spare
+buffer elements to fit all of the skb's data.
+HW detects this as a malformed buffer descriptor, and raises an
+exception which then triggers device recovery.
+
+Fixes: 2863c61334aa ("qeth: refactor calculation of SBALE count")
+Signed-off-by: Ursula Braun <ubraun@linux.vnet.ibm.com>
+Signed-off-by: Julian Wiedmann <jwi@linux.vnet.ibm.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/s390/net/qeth_core.h |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/s390/net/qeth_core.h
++++ b/drivers/s390/net/qeth_core.h
+@@ -849,7 +849,7 @@ struct qeth_trap_id {
+  */
+ static inline int qeth_get_elements_for_range(addr_t start, addr_t end)
+ {
+-      return PFN_UP(end - 1) - PFN_DOWN(start);
++      return PFN_UP(end) - PFN_DOWN(start);
+ }
+ static inline int qeth_get_micros(void)
diff --git a/queue-4.9/sctp-fix-dst-refcnt-leak-in-sctp_v4_get_dst.patch b/queue-4.9/sctp-fix-dst-refcnt-leak-in-sctp_v4_get_dst.patch
new file mode 100644 (file)
index 0000000..a2487df
--- /dev/null
@@ -0,0 +1,86 @@
+From foo@baz Thu Mar  8 06:55:02 PST 2018
+From: Tommi Rantala <tommi.t.rantala@nokia.com>
+Date: Mon, 5 Feb 2018 21:48:14 +0200
+Subject: sctp: fix dst refcnt leak in sctp_v4_get_dst
+
+From: Tommi Rantala <tommi.t.rantala@nokia.com>
+
+
+[ Upstream commit 4a31a6b19f9ddf498c81f5c9b089742b7472a6f8 ]
+
+Fix dst reference count leak in sctp_v4_get_dst() introduced in commit
+410f03831 ("sctp: add routing output fallback"):
+
+When walking the address_list, successive ip_route_output_key() calls
+may return the same rt->dst with the reference incremented on each call.
+
+The code would not decrement the dst refcount when the dst pointer was
+identical from the previous iteration, causing the dst refcnt leak.
+
+Testcase:
+  ip netns add TEST
+  ip netns exec TEST ip link set lo up
+  ip link add dummy0 type dummy
+  ip link add dummy1 type dummy
+  ip link add dummy2 type dummy
+  ip link set dev dummy0 netns TEST
+  ip link set dev dummy1 netns TEST
+  ip link set dev dummy2 netns TEST
+  ip netns exec TEST ip addr add 192.168.1.1/24 dev dummy0
+  ip netns exec TEST ip link set dummy0 up
+  ip netns exec TEST ip addr add 192.168.1.2/24 dev dummy1
+  ip netns exec TEST ip link set dummy1 up
+  ip netns exec TEST ip addr add 192.168.1.3/24 dev dummy2
+  ip netns exec TEST ip link set dummy2 up
+  ip netns exec TEST sctp_test -H 192.168.1.2 -P 20002 -h 192.168.1.1 -p 20000 -s -B 192.168.1.3
+  ip netns del TEST
+
+In 4.4 and 4.9 kernels this results to:
+  [  354.179591] unregister_netdevice: waiting for lo to become free. Usage count = 1
+  [  364.419674] unregister_netdevice: waiting for lo to become free. Usage count = 1
+  [  374.663664] unregister_netdevice: waiting for lo to become free. Usage count = 1
+  [  384.903717] unregister_netdevice: waiting for lo to become free. Usage count = 1
+  [  395.143724] unregister_netdevice: waiting for lo to become free. Usage count = 1
+  [  405.383645] unregister_netdevice: waiting for lo to become free. Usage count = 1
+  ...
+
+Fixes: 410f03831 ("sctp: add routing output fallback")
+Fixes: 0ca50d12f ("sctp: fix src address selection if using secondary addresses")
+Signed-off-by: Tommi Rantala <tommi.t.rantala@nokia.com>
+Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
+Acked-by: Neil Horman <nhorman@tuxdriver.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sctp/protocol.c |   10 ++++------
+ 1 file changed, 4 insertions(+), 6 deletions(-)
+
+--- a/net/sctp/protocol.c
++++ b/net/sctp/protocol.c
+@@ -510,22 +510,20 @@ static void sctp_v4_get_dst(struct sctp_
+               if (IS_ERR(rt))
+                       continue;
+-              if (!dst)
+-                      dst = &rt->dst;
+-
+               /* Ensure the src address belongs to the output
+                * interface.
+                */
+               odev = __ip_dev_find(sock_net(sk), laddr->a.v4.sin_addr.s_addr,
+                                    false);
+               if (!odev || odev->ifindex != fl4->flowi4_oif) {
+-                      if (&rt->dst != dst)
++                      if (!dst)
++                              dst = &rt->dst;
++                      else
+                               dst_release(&rt->dst);
+                       continue;
+               }
+-              if (dst != &rt->dst)
+-                      dst_release(dst);
++              dst_release(dst);
+               dst = &rt->dst;
+               break;
+       }
diff --git a/queue-4.9/sctp-fix-dst-refcnt-leak-in-sctp_v6_get_dst.patch b/queue-4.9/sctp-fix-dst-refcnt-leak-in-sctp_v6_get_dst.patch
new file mode 100644 (file)
index 0000000..1d98eb1
--- /dev/null
@@ -0,0 +1,57 @@
+From foo@baz Thu Mar  8 06:55:02 PST 2018
+From: Alexey Kodanev <alexey.kodanev@oracle.com>
+Date: Mon, 5 Feb 2018 15:10:35 +0300
+Subject: sctp: fix dst refcnt leak in sctp_v6_get_dst()
+
+From: Alexey Kodanev <alexey.kodanev@oracle.com>
+
+
+[ Upstream commit 957d761cf91cdbb175ad7d8f5472336a4d54dbf2 ]
+
+When going through the bind address list in sctp_v6_get_dst() and
+the previously found address is better ('matchlen > bmatchlen'),
+the code continues to the next iteration without releasing currently
+held destination.
+
+Fix it by releasing 'bdst' before continue to the next iteration, and
+instead of introducing one more '!IS_ERR(bdst)' check for dst_release(),
+move the already existed one right after ip6_dst_lookup_flow(), i.e. we
+shouldn't proceed further if we get an error for the route lookup.
+
+Fixes: dbc2b5e9a09e ("sctp: fix src address selection if using secondary addresses for ipv6")
+Signed-off-by: Alexey Kodanev <alexey.kodanev@oracle.com>
+Acked-by: Neil Horman <nhorman@tuxdriver.com>
+Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sctp/ipv6.c |   10 +++++++---
+ 1 file changed, 7 insertions(+), 3 deletions(-)
+
+--- a/net/sctp/ipv6.c
++++ b/net/sctp/ipv6.c
+@@ -324,8 +324,10 @@ static void sctp_v6_get_dst(struct sctp_
+               final_p = fl6_update_dst(fl6, rcu_dereference(np->opt), &final);
+               bdst = ip6_dst_lookup_flow(sk, fl6, final_p);
+-              if (!IS_ERR(bdst) &&
+-                  ipv6_chk_addr(dev_net(bdst->dev),
++              if (IS_ERR(bdst))
++                      continue;
++
++              if (ipv6_chk_addr(dev_net(bdst->dev),
+                                 &laddr->a.v6.sin6_addr, bdst->dev, 1)) {
+                       if (!IS_ERR_OR_NULL(dst))
+                               dst_release(dst);
+@@ -334,8 +336,10 @@ static void sctp_v6_get_dst(struct sctp_
+               }
+               bmatchlen = sctp_v6_addr_match_len(daddr, &laddr->a);
+-              if (matchlen > bmatchlen)
++              if (matchlen > bmatchlen) {
++                      dst_release(bdst);
+                       continue;
++              }
+               if (!IS_ERR_OR_NULL(dst))
+                       dst_release(dst);
diff --git a/queue-4.9/sctp-verify-size-of-a-new-chunk-in-_sctp_make_chunk.patch b/queue-4.9/sctp-verify-size-of-a-new-chunk-in-_sctp_make_chunk.patch
new file mode 100644 (file)
index 0000000..f634215
--- /dev/null
@@ -0,0 +1,86 @@
+From foo@baz Thu Mar  8 06:55:02 PST 2018
+From: Alexey Kodanev <alexey.kodanev@oracle.com>
+Date: Fri, 9 Feb 2018 17:35:23 +0300
+Subject: sctp: verify size of a new chunk in _sctp_make_chunk()
+
+From: Alexey Kodanev <alexey.kodanev@oracle.com>
+
+
+[ Upstream commit 07f2c7ab6f8d0a7e7c5764c4e6cc9c52951b9d9c ]
+
+When SCTP makes INIT or INIT_ACK packet the total chunk length
+can exceed SCTP_MAX_CHUNK_LEN which leads to kernel panic when
+transmitting these packets, e.g. the crash on sending INIT_ACK:
+
+[  597.804948] skbuff: skb_over_panic: text:00000000ffae06e4 len:120168
+               put:120156 head:000000007aa47635 data:00000000d991c2de
+               tail:0x1d640 end:0xfec0 dev:<NULL>
+...
+[  597.976970] ------------[ cut here ]------------
+[  598.033408] kernel BUG at net/core/skbuff.c:104!
+[  600.314841] Call Trace:
+[  600.345829]  <IRQ>
+[  600.371639]  ? sctp_packet_transmit+0x2095/0x26d0 [sctp]
+[  600.436934]  skb_put+0x16c/0x200
+[  600.477295]  sctp_packet_transmit+0x2095/0x26d0 [sctp]
+[  600.540630]  ? sctp_packet_config+0x890/0x890 [sctp]
+[  600.601781]  ? __sctp_packet_append_chunk+0x3b4/0xd00 [sctp]
+[  600.671356]  ? sctp_cmp_addr_exact+0x3f/0x90 [sctp]
+[  600.731482]  sctp_outq_flush+0x663/0x30d0 [sctp]
+[  600.788565]  ? sctp_make_init+0xbf0/0xbf0 [sctp]
+[  600.845555]  ? sctp_check_transmitted+0x18f0/0x18f0 [sctp]
+[  600.912945]  ? sctp_outq_tail+0x631/0x9d0 [sctp]
+[  600.969936]  sctp_cmd_interpreter.isra.22+0x3be1/0x5cb0 [sctp]
+[  601.041593]  ? sctp_sf_do_5_1B_init+0x85f/0xc30 [sctp]
+[  601.104837]  ? sctp_generate_t1_cookie_event+0x20/0x20 [sctp]
+[  601.175436]  ? sctp_eat_data+0x1710/0x1710 [sctp]
+[  601.233575]  sctp_do_sm+0x182/0x560 [sctp]
+[  601.284328]  ? sctp_has_association+0x70/0x70 [sctp]
+[  601.345586]  ? sctp_rcv+0xef4/0x32f0 [sctp]
+[  601.397478]  ? sctp6_rcv+0xa/0x20 [sctp]
+...
+
+Here the chunk size for INIT_ACK packet becomes too big, mostly
+because of the state cookie (INIT packet has large size with
+many address parameters), plus additional server parameters.
+
+Later this chunk causes the panic in skb_put_data():
+
+  skb_packet_transmit()
+      sctp_packet_pack()
+          skb_put_data(nskb, chunk->skb->data, chunk->skb->len);
+
+'nskb' (head skb) was previously allocated with packet->size
+from u16 'chunk->chunk_hdr->length'.
+
+As suggested by Marcelo we should check the chunk's length in
+_sctp_make_chunk() before trying to allocate skb for it and
+discard a chunk if its size bigger than SCTP_MAX_CHUNK_LEN.
+
+Signed-off-by: Alexey Kodanev <alexey.kodanev@oracle.com>
+Acked-by: Marcelo Ricardo Leitner <marcelo.leinter@gmail.com>
+Acked-by: Neil Horman <nhorman@tuxdriver.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sctp/sm_make_chunk.c |    7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+--- a/net/sctp/sm_make_chunk.c
++++ b/net/sctp/sm_make_chunk.c
+@@ -1373,9 +1373,14 @@ static struct sctp_chunk *_sctp_make_chu
+       sctp_chunkhdr_t *chunk_hdr;
+       struct sk_buff *skb;
+       struct sock *sk;
++      int chunklen;
++
++      chunklen = SCTP_PAD4(sizeof(*chunk_hdr) + paylen);
++      if (chunklen > SCTP_MAX_CHUNK_LEN)
++              goto nodata;
+       /* No need to allocate LL here, as this is only a chunk. */
+-      skb = alloc_skb(SCTP_PAD4(sizeof(sctp_chunkhdr_t) + paylen), gfp);
++      skb = alloc_skb(chunklen, gfp);
+       if (!skb)
+               goto nodata;
index 046dcd0f89970902866bf36223fc5d4c905be8b9..051ff48119ad506dea79b3bfe132fd32d51c3603 100644 (file)
@@ -30,3 +30,29 @@ x86-speculation-use-indirect-branch-prediction-barrier-in-context-switch.patch
 md-only-allow-remove_and_add_spares-when-no-sync_thread-running.patch
 netlink-put-module-reference-if-dump-start-fails.patch
 x86-apic-vector-handle-legacy-irq-data-correctly.patch
+bridge-check-brport-attr-show-in-brport_show.patch
+fib_semantics-don-t-match-route-with-mismatching-tclassid.patch
+hdlc_ppp-carrier-detect-ok-don-t-turn-off-negotiation.patch
+ipv6-sit-work-around-bogus-gcc-8-wrestrict-warning.patch
+net-fix-race-on-decreasing-number-of-tx-queues.patch
+net-ipv4-don-t-allow-setting-net.ipv4.route.min_pmtu-below-68.patch
+netlink-ensure-to-loop-over-all-netns-in-genlmsg_multicast_allns.patch
+ppp-prevent-unregistered-channels-from-connecting-to-ppp-units.patch
+udplite-fix-partial-checksum-initialization.patch
+sctp-fix-dst-refcnt-leak-in-sctp_v4_get_dst.patch
+mlxsw-spectrum_switchdev-check-success-of-fdb-add-operation.patch
+net-phy-fix-phy_start-to-consider-phy_ignore_interrupt.patch
+tcp-honor-the-eor-bit-in-tcp_mtu_probe.patch
+rxrpc-fix-send-in-rxrpc_send_data_packet.patch
+tcp_bbr-better-deal-with-suboptimal-gso.patch
+sctp-fix-dst-refcnt-leak-in-sctp_v6_get_dst.patch
+s390-qeth-fix-underestimated-count-of-buffer-elements.patch
+s390-qeth-fix-setip-command-handling.patch
+s390-qeth-fix-overestimated-count-of-buffer-elements.patch
+s390-qeth-fix-ip-removal-on-offline-cards.patch
+s390-qeth-fix-double-free-on-ip-add-remove-race.patch
+s390-qeth-fix-ip-address-lookup-for-l3-devices.patch
+s390-qeth-fix-ipa-command-submission-race.patch
+
+sctp-verify-size-of-a-new-chunk-in-_sctp_make_chunk.patch
+
diff --git a/queue-4.9/tcp-honor-the-eor-bit-in-tcp_mtu_probe.patch b/queue-4.9/tcp-honor-the-eor-bit-in-tcp_mtu_probe.patch
new file mode 100644 (file)
index 0000000..b1646a8
--- /dev/null
@@ -0,0 +1,69 @@
+From foo@baz Thu Mar  8 06:55:02 PST 2018
+From: Ilya Lesokhin <ilyal@mellanox.com>
+Date: Mon, 12 Feb 2018 12:57:04 +0200
+Subject: tcp: Honor the eor bit in tcp_mtu_probe
+
+From: Ilya Lesokhin <ilyal@mellanox.com>
+
+
+[ Upstream commit 808cf9e38cd7923036a99f459ccc8cf2955e47af ]
+
+Avoid SKB coalescing if eor bit is set in one of the relevant
+SKBs.
+
+Fixes: c134ecb87817 ("tcp: Make use of MSG_EOR in tcp_sendmsg")
+Signed-off-by: Ilya Lesokhin <ilyal@mellanox.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_output.c |   25 +++++++++++++++++++++++++
+ 1 file changed, 25 insertions(+)
+
+--- a/net/ipv4/tcp_output.c
++++ b/net/ipv4/tcp_output.c
+@@ -1907,6 +1907,24 @@ static inline void tcp_mtu_check_reprobe
+       }
+ }
++static bool tcp_can_coalesce_send_queue_head(struct sock *sk, int len)
++{
++      struct sk_buff *skb, *next;
++
++      skb = tcp_send_head(sk);
++      tcp_for_write_queue_from_safe(skb, next, sk) {
++              if (len <= skb->len)
++                      break;
++
++              if (unlikely(TCP_SKB_CB(skb)->eor))
++                      return false;
++
++              len -= skb->len;
++      }
++
++      return true;
++}
++
+ /* Create a new MTU probe if we are ready.
+  * MTU probe is regularly attempting to increase the path MTU by
+  * deliberately sending larger packets.  This discovers routing
+@@ -1979,6 +1997,9 @@ static int tcp_mtu_probe(struct sock *sk
+                       return 0;
+       }
++      if (!tcp_can_coalesce_send_queue_head(sk, probe_size))
++              return -1;
++
+       /* We're allowed to probe.  Build it now. */
+       nskb = sk_stream_alloc_skb(sk, probe_size, GFP_ATOMIC, false);
+       if (!nskb)
+@@ -2014,6 +2035,10 @@ static int tcp_mtu_probe(struct sock *sk
+                       /* We've eaten all the data from this skb.
+                        * Throw it away. */
+                       TCP_SKB_CB(nskb)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags;
++                      /* If this is the last SKB we copy and eor is set
++                       * we need to propagate it to the new skb.
++                       */
++                      TCP_SKB_CB(nskb)->eor = TCP_SKB_CB(skb)->eor;
+                       tcp_unlink_write_queue(skb, sk);
+                       sk_wmem_free_skb(sk, skb);
+               } else {
diff --git a/queue-4.9/tcp_bbr-better-deal-with-suboptimal-gso.patch b/queue-4.9/tcp_bbr-better-deal-with-suboptimal-gso.patch
new file mode 100644 (file)
index 0000000..974755f
--- /dev/null
@@ -0,0 +1,88 @@
+From foo@baz Thu Mar  8 06:55:02 PST 2018
+From: Eric Dumazet <edumazet@google.com>
+Date: Wed, 21 Feb 2018 06:43:03 -0800
+Subject: tcp_bbr: better deal with suboptimal GSO
+
+From: Eric Dumazet <edumazet@google.com>
+
+
+[ Upstream commit 350c9f484bde93ef229682eedd98cd5f74350f7f ]
+
+BBR uses tcp_tso_autosize() in an attempt to probe what would be the
+burst sizes and to adjust cwnd in bbr_target_cwnd() with following
+gold formula :
+
+/* Allow enough full-sized skbs in flight to utilize end systems. */
+cwnd += 3 * bbr->tso_segs_goal;
+
+But GSO can be lacking or be constrained to very small
+units (ip link set dev ... gso_max_segs 2)
+
+What we really want is to have enough packets in flight so that both
+GSO and GRO are efficient.
+
+So in the case GSO is off or downgraded, we still want to have the same
+number of packets in flight as if GSO/TSO was fully operational, so
+that GRO can hopefully be working efficiently.
+
+To fix this issue, we make tcp_tso_autosize() unaware of
+sk->sk_gso_max_segs
+
+Only tcp_tso_segs() has to enforce the gso_max_segs limit.
+
+Tested:
+
+ethtool -K eth0 tso off gso off
+tc qd replace dev eth0 root pfifo_fast
+
+Before patch:
+for f in {1..5}; do ./super_netperf 1 -H lpaa24 -- -K bbr; done
+    691  (ss -temoi shows cwnd is stuck around 6 )
+    667
+    651
+    631
+    517
+
+After patch :
+# for f in {1..5}; do ./super_netperf 1 -H lpaa24 -- -K bbr; done
+   1733 (ss -temoi shows cwnd is around 386 )
+   1778
+   1746
+   1781
+   1718
+
+Fixes: 0f8782ea1497 ("tcp_bbr: add BBR congestion control")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: Oleksandr Natalenko <oleksandr@natalenko.name>
+Acked-by: Neal Cardwell <ncardwell@google.com>
+Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_output.c |    8 +++++---
+ 1 file changed, 5 insertions(+), 3 deletions(-)
+
+--- a/net/ipv4/tcp_output.c
++++ b/net/ipv4/tcp_output.c
+@@ -1580,7 +1580,7 @@ u32 tcp_tso_autosize(const struct sock *
+        */
+       segs = max_t(u32, bytes / mss_now, min_tso_segs);
+-      return min_t(u32, segs, sk->sk_gso_max_segs);
++      return segs;
+ }
+ EXPORT_SYMBOL(tcp_tso_autosize);
+@@ -1592,8 +1592,10 @@ static u32 tcp_tso_segs(struct sock *sk,
+       const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops;
+       u32 tso_segs = ca_ops->tso_segs_goal ? ca_ops->tso_segs_goal(sk) : 0;
+-      return tso_segs ? :
+-              tcp_tso_autosize(sk, mss_now, sysctl_tcp_min_tso_segs);
++      if (!tso_segs)
++              tso_segs = tcp_tso_autosize(sk, mss_now,
++                                          sysctl_tcp_min_tso_segs);
++      return min_t(u32, tso_segs, sk->sk_gso_max_segs);
+ }
+ /* Returns the portion of skb which can be sent right away */
diff --git a/queue-4.9/udplite-fix-partial-checksum-initialization.patch b/queue-4.9/udplite-fix-partial-checksum-initialization.patch
new file mode 100644 (file)
index 0000000..9e5f5fc
--- /dev/null
@@ -0,0 +1,76 @@
+From foo@baz Thu Mar  8 06:55:02 PST 2018
+From: Alexey Kodanev <alexey.kodanev@oracle.com>
+Date: Thu, 15 Feb 2018 20:18:43 +0300
+Subject: udplite: fix partial checksum initialization
+
+From: Alexey Kodanev <alexey.kodanev@oracle.com>
+
+
+[ Upstream commit 15f35d49c93f4fa9875235e7bf3e3783d2dd7a1b ]
+
+Since UDP-Lite is always using checksum, the following path is
+triggered when calculating pseudo header for it:
+
+  udp4_csum_init() or udp6_csum_init()
+    skb_checksum_init_zero_check()
+      __skb_checksum_validate_complete()
+
+The problem can appear if skb->len is less than CHECKSUM_BREAK. In
+this particular case __skb_checksum_validate_complete() also invokes
+__skb_checksum_complete(skb). If UDP-Lite is using partial checksum
+that covers only part of a packet, the function will return bad
+checksum and the packet will be dropped.
+
+It can be fixed if we skip skb_checksum_init_zero_check() and only
+set the required pseudo header checksum for UDP-Lite with partial
+checksum before udp4_csum_init()/udp6_csum_init() functions return.
+
+Fixes: ed70fcfcee95 ("net: Call skb_checksum_init in IPv4")
+Fixes: e4f45b7f40bd ("net: Call skb_checksum_init in IPv6")
+Signed-off-by: Alexey Kodanev <alexey.kodanev@oracle.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/udplite.h   |    1 +
+ net/ipv4/udp.c          |    5 +++++
+ net/ipv6/ip6_checksum.c |    5 +++++
+ 3 files changed, 11 insertions(+)
+
+--- a/include/net/udplite.h
++++ b/include/net/udplite.h
+@@ -62,6 +62,7 @@ static inline int udplite_checksum_init(
+               UDP_SKB_CB(skb)->cscov = cscov;
+               if (skb->ip_summed == CHECKSUM_COMPLETE)
+                       skb->ip_summed = CHECKSUM_NONE;
++              skb->csum_valid = 0;
+         }
+       return 0;
+--- a/net/ipv4/udp.c
++++ b/net/ipv4/udp.c
+@@ -1713,6 +1713,11 @@ static inline int udp4_csum_init(struct
+               err = udplite_checksum_init(skb, uh);
+               if (err)
+                       return err;
++
++              if (UDP_SKB_CB(skb)->partial_cov) {
++                      skb->csum = inet_compute_pseudo(skb, proto);
++                      return 0;
++              }
+       }
+       /* Note, we are only interested in != 0 or == 0, thus the
+--- a/net/ipv6/ip6_checksum.c
++++ b/net/ipv6/ip6_checksum.c
+@@ -72,6 +72,11 @@ int udp6_csum_init(struct sk_buff *skb,
+               err = udplite_checksum_init(skb, uh);
+               if (err)
+                       return err;
++
++              if (UDP_SKB_CB(skb)->partial_cov) {
++                      skb->csum = ip6_compute_pseudo(skb, proto);
++                      return 0;
++              }
+       }
+       /* To support RFC 6936 (allow zero checksum in UDP/IPV6 for tunnels)