]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
4.12-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 15 Sep 2017 06:21:25 +0000 (23:21 -0700)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 15 Sep 2017 06:21:25 +0000 (23:21 -0700)
added patches:
bpf-fix-map-value-attribute-for-hash-of-maps.patch
bridge-switchdev-clear-forward-mark-when-transmitting-packet.patch
cxgb4-fix-stack-out-of-bounds-read-due-to-wrong-size-to-t4_record_mbox.patch
fsl-man-inherit-parent-device-and-of_node.patch
gianfar-fix-tx-flow-control-deactivation.patch
ip6_gre-update-mtu-properly-in-ip6gre_err.patch
ipv6-accept-64k-1-packet-length-in-ip6_find_1stfragopt.patch
ipv6-add-rcu-grace-period-before-freeing-fib6_node.patch
ipv6-do-not-set-sk_destruct-in-ipv6_addrform-sockopt.patch
ipv6-fix-memory-leak-with-multiple-tables-during-netns-destruction.patch
ipv6-fix-sparse-warning-on-rt6i_node.patch
ipv6-fix-typo-in-fib6_net_exit.patch
ipv6-set-dst.obsolete-when-a-cached-route-has-expired.patch
kcm-do-not-attach-pf_kcm-sockets-to-avoid-deadlock.patch
macsec-add-genl-family-module-alias.patch
mlxsw-spectrum-forbid-linking-to-devices-that-have-uppers.patch
net-bcmgenet-be-drop-monitor-friendly.patch
net-dsa-bcm_sf2-fix-number-of-cfp-entries-for-bcm7278.patch
net-fec-allow-reception-of-frames-bigger-than-1522-bytes.patch
net-mlx5-e-switch-unload-the-representors-in-the-correct-order.patch
net-mlx5-fix-arm-srq-command-for-issi-version-0.patch
net-mlx5e-check-for-qos-capability-in-dcbnl_initialize.patch
net-mlx5e-don-t-override-user-rss-upon-set-channels.patch
net-mlx5e-fix-cq-moderation-mode-not-set-properly.patch
net-mlx5e-fix-dangling-page-pointer-on-dma-mapping-error.patch
net-mlx5e-fix-dcb_cap_attr_dcbx-capability-for-dcbnl-getcap.patch
net-mlx5e-fix-inline-header-size-for-small-packets.patch
net-mlx5e-properly-resolve-tc-offloaded-ipv6-vxlan-tunnel-source-address.patch
net-mvpp2-fix-the-mac-address-used-when-using-ppv2.2.patch
net-systemport-be-drop-monitor-friendly.patch
net-systemport-free-dma-coherent-descriptors-on-errors.patch
netvsc-fix-deadlock-betwen-link-status-and-removal.patch
packet-don-t-write-vnet-header-beyond-end-of-buffer.patch
qlge-avoid-memcpy-buffer-overflow.patch
revert-net-fix-percpu-memory-leaks.patch
revert-net-phy-correctly-process-phy_halted-in-phy_stop_machine.patch
revert-net-use-lib-percpu_counter-api-for-fragmentation-mem-accounting.patch
sctp-avoid-out-of-bounds-reads-from-address-storage.patch
sctp-fix-missing-wake-ups-in-some-situations.patch
tipc-fix-tipc_sk_reinit-handling-of-eagain.patch
udp-on-peeking-bad-csum-drop-packets-even-if-not-at-head.patch
udp6-set-rx_dst_cookie-on-rx_dst-updates.patch
vhost_net-correctly-check-tx-avail-during-rx-busy-polling.patch

46 files changed:
queue-4.12/bpf-fix-map-value-attribute-for-hash-of-maps.patch [new file with mode: 0644]
queue-4.12/bridge-switchdev-clear-forward-mark-when-transmitting-packet.patch [new file with mode: 0644]
queue-4.12/cxgb4-fix-stack-out-of-bounds-read-due-to-wrong-size-to-t4_record_mbox.patch [new file with mode: 0644]
queue-4.12/fsl-man-inherit-parent-device-and-of_node.patch [new file with mode: 0644]
queue-4.12/gianfar-fix-tx-flow-control-deactivation.patch [new file with mode: 0644]
queue-4.12/ip6_gre-update-mtu-properly-in-ip6gre_err.patch [new file with mode: 0644]
queue-4.12/ipv6-accept-64k-1-packet-length-in-ip6_find_1stfragopt.patch [new file with mode: 0644]
queue-4.12/ipv6-add-rcu-grace-period-before-freeing-fib6_node.patch [new file with mode: 0644]
queue-4.12/ipv6-do-not-set-sk_destruct-in-ipv6_addrform-sockopt.patch [new file with mode: 0644]
queue-4.12/ipv6-fix-memory-leak-with-multiple-tables-during-netns-destruction.patch [new file with mode: 0644]
queue-4.12/ipv6-fix-sparse-warning-on-rt6i_node.patch [new file with mode: 0644]
queue-4.12/ipv6-fix-typo-in-fib6_net_exit.patch [new file with mode: 0644]
queue-4.12/ipv6-set-dst.obsolete-when-a-cached-route-has-expired.patch [new file with mode: 0644]
queue-4.12/kcm-do-not-attach-pf_kcm-sockets-to-avoid-deadlock.patch [new file with mode: 0644]
queue-4.12/macsec-add-genl-family-module-alias.patch [new file with mode: 0644]
queue-4.12/mlxsw-spectrum-forbid-linking-to-devices-that-have-uppers.patch [new file with mode: 0644]
queue-4.12/net-bcmgenet-be-drop-monitor-friendly.patch [new file with mode: 0644]
queue-4.12/net-dsa-bcm_sf2-fix-number-of-cfp-entries-for-bcm7278.patch [new file with mode: 0644]
queue-4.12/net-fec-allow-reception-of-frames-bigger-than-1522-bytes.patch [new file with mode: 0644]
queue-4.12/net-mlx5-e-switch-unload-the-representors-in-the-correct-order.patch [new file with mode: 0644]
queue-4.12/net-mlx5-fix-arm-srq-command-for-issi-version-0.patch [new file with mode: 0644]
queue-4.12/net-mlx5e-check-for-qos-capability-in-dcbnl_initialize.patch [new file with mode: 0644]
queue-4.12/net-mlx5e-don-t-override-user-rss-upon-set-channels.patch [new file with mode: 0644]
queue-4.12/net-mlx5e-fix-cq-moderation-mode-not-set-properly.patch [new file with mode: 0644]
queue-4.12/net-mlx5e-fix-dangling-page-pointer-on-dma-mapping-error.patch [new file with mode: 0644]
queue-4.12/net-mlx5e-fix-dcb_cap_attr_dcbx-capability-for-dcbnl-getcap.patch [new file with mode: 0644]
queue-4.12/net-mlx5e-fix-inline-header-size-for-small-packets.patch [new file with mode: 0644]
queue-4.12/net-mlx5e-properly-resolve-tc-offloaded-ipv6-vxlan-tunnel-source-address.patch [new file with mode: 0644]
queue-4.12/net-mvpp2-fix-the-mac-address-used-when-using-ppv2.2.patch [new file with mode: 0644]
queue-4.12/net-systemport-be-drop-monitor-friendly.patch [new file with mode: 0644]
queue-4.12/net-systemport-free-dma-coherent-descriptors-on-errors.patch [new file with mode: 0644]
queue-4.12/netvsc-fix-deadlock-betwen-link-status-and-removal.patch [new file with mode: 0644]
queue-4.12/packet-don-t-write-vnet-header-beyond-end-of-buffer.patch [new file with mode: 0644]
queue-4.12/qlge-avoid-memcpy-buffer-overflow.patch [new file with mode: 0644]
queue-4.12/revert-net-fix-percpu-memory-leaks.patch [new file with mode: 0644]
queue-4.12/revert-net-phy-correctly-process-phy_halted-in-phy_stop_machine.patch [new file with mode: 0644]
queue-4.12/revert-net-use-lib-percpu_counter-api-for-fragmentation-mem-accounting.patch [new file with mode: 0644]
queue-4.12/sctp-avoid-out-of-bounds-reads-from-address-storage.patch [new file with mode: 0644]
queue-4.12/sctp-fix-missing-wake-ups-in-some-situations.patch [new file with mode: 0644]
queue-4.12/series [new file with mode: 0644]
queue-4.12/tipc-fix-tipc_sk_reinit-handling-of-eagain.patch [new file with mode: 0644]
queue-4.12/udp-on-peeking-bad-csum-drop-packets-even-if-not-at-head.patch [new file with mode: 0644]
queue-4.12/udp6-set-rx_dst_cookie-on-rx_dst-updates.patch [new file with mode: 0644]
queue-4.12/vhost_net-correctly-check-tx-avail-during-rx-busy-polling.patch [new file with mode: 0644]
queue-4.13/series [new file with mode: 0644]
queue-4.9/series [new file with mode: 0644]

diff --git a/queue-4.12/bpf-fix-map-value-attribute-for-hash-of-maps.patch b/queue-4.12/bpf-fix-map-value-attribute-for-hash-of-maps.patch
new file mode 100644 (file)
index 0000000..36c275e
--- /dev/null
@@ -0,0 +1,103 @@
+From foo@baz Thu Sep 14 23:20:23 PDT 2017
+From: Daniel Borkmann <daniel@iogearbox.net>
+Date: Wed, 23 Aug 2017 00:06:09 +0200
+Subject: bpf: fix map value attribute for hash of maps
+
+From: Daniel Borkmann <daniel@iogearbox.net>
+
+
+[ Upstream commit 33ba43ed0afc13a29b1314e3e45a9938d310ba13 ]
+
+Currently, iproute2's BPF ELF loader works fine with array of maps
+when retrieving the fd from a pinned node and doing a selfcheck
+against the provided map attributes from the object file, but we
+fail to do the same for hash of maps and thus refuse to get the
+map from pinned node.
+
+Reason is that when allocating hash of maps, fd_htab_map_alloc() will
+set the value size to sizeof(void *), and any user space map creation
+requests are forced to set 4 bytes as value size. Thus, selfcheck
+will complain about exposed 8 bytes on 64 bit archs vs. 4 bytes from
+object file as value size. Contract is that fdinfo or BPF_MAP_GET_FD_BY_ID
+returns the value size used to create the map.
+
+Fix it by handling it the same way as we do for array of maps, which
+means that we leave value size at 4 bytes and in the allocation phase
+round up value size to 8 bytes. alloc_htab_elem() needs an adjustment
+in order to copy rounded up 8 bytes due to bpf_fd_htab_map_update_elem()
+calling into htab_map_update_elem() with the pointer of the map
+pointer as value. Unlike array of maps where we just xchg(), we're
+using the generic htab_map_update_elem() callback also used from helper
+calls, which published the key/value already on return, so we need
+to ensure to memcpy() the right size.
+
+Fixes: bcc6b1b7ebf8 ("bpf: Add hash of maps support")
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Acked-by: Alexei Starovoitov <ast@kernel.org>
+Acked-by: Martin KaFai Lau <kafai@fb.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/bpf/hashtab.c |   30 +++++++++++++++++-------------
+ 1 file changed, 17 insertions(+), 13 deletions(-)
+
+--- a/kernel/bpf/hashtab.c
++++ b/kernel/bpf/hashtab.c
+@@ -652,12 +652,27 @@ static void pcpu_copy_value(struct bpf_h
+       }
+ }
++static bool fd_htab_map_needs_adjust(const struct bpf_htab *htab)
++{
++      return htab->map.map_type == BPF_MAP_TYPE_HASH_OF_MAPS &&
++             BITS_PER_LONG == 64;
++}
++
++static u32 htab_size_value(const struct bpf_htab *htab, bool percpu)
++{
++      u32 size = htab->map.value_size;
++
++      if (percpu || fd_htab_map_needs_adjust(htab))
++              size = round_up(size, 8);
++      return size;
++}
++
+ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
+                                        void *value, u32 key_size, u32 hash,
+                                        bool percpu, bool onallcpus,
+                                        struct htab_elem *old_elem)
+ {
+-      u32 size = htab->map.value_size;
++      u32 size = htab_size_value(htab, percpu);
+       bool prealloc = htab_is_prealloc(htab);
+       struct htab_elem *l_new, **pl_new;
+       void __percpu *pptr;
+@@ -696,9 +711,6 @@ static struct htab_elem *alloc_htab_elem
+       memcpy(l_new->key, key, key_size);
+       if (percpu) {
+-              /* round up value_size to 8 bytes */
+-              size = round_up(size, 8);
+-
+               if (prealloc) {
+                       pptr = htab_elem_get_ptr(l_new, key_size);
+               } else {
+@@ -1209,17 +1221,9 @@ const struct bpf_map_ops htab_lru_percpu
+ static struct bpf_map *fd_htab_map_alloc(union bpf_attr *attr)
+ {
+-      struct bpf_map *map;
+-
+       if (attr->value_size != sizeof(u32))
+               return ERR_PTR(-EINVAL);
+-
+-      /* pointer is stored internally */
+-      attr->value_size = sizeof(void *);
+-      map = htab_map_alloc(attr);
+-      attr->value_size = sizeof(u32);
+-
+-      return map;
++      return htab_map_alloc(attr);
+ }
+ static void fd_htab_map_free(struct bpf_map *map)
diff --git a/queue-4.12/bridge-switchdev-clear-forward-mark-when-transmitting-packet.patch b/queue-4.12/bridge-switchdev-clear-forward-mark-when-transmitting-packet.patch
new file mode 100644 (file)
index 0000000..d7aeddc
--- /dev/null
@@ -0,0 +1,51 @@
+From foo@baz Thu Sep 14 23:20:23 PDT 2017
+From: Ido Schimmel <idosch@mellanox.com>
+Date: Fri, 1 Sep 2017 12:22:25 +0300
+Subject: bridge: switchdev: Clear forward mark when transmitting packet
+
+From: Ido Schimmel <idosch@mellanox.com>
+
+
+[ Upstream commit 79e99bdd60b484af9afe0147e85a13e66d5c1cdb ]
+
+Commit 6bc506b4fb06 ("bridge: switchdev: Add forward mark support for
+stacked devices") added the 'offload_fwd_mark' bit to the skb in order
+to allow drivers to indicate to the bridge driver that they already
+forwarded the packet in L2.
+
+In case the bit is set, before transmitting the packet from each port,
+the port's mark is compared with the mark stored in the skb's control
+block. If both marks are equal, we know the packet arrived from a switch
+device that already forwarded the packet and it's not re-transmitted.
+
+However, if the packet is transmitted from the bridge device itself
+(e.g., br0), we should clear the 'offload_fwd_mark' bit as the mark
+stored in the skb's control block isn't valid.
+
+This scenario can happen in rare cases where a packet was trapped during
+L3 forwarding and forwarded by the kernel to a bridge device.
+
+Fixes: 6bc506b4fb06 ("bridge: switchdev: Add forward mark support for stacked devices")
+Signed-off-by: Ido Schimmel <idosch@mellanox.com>
+Reported-by: Yotam Gigi <yotamg@mellanox.com>
+Tested-by: Yotam Gigi <yotamg@mellanox.com>
+Reviewed-by: Jiri Pirko <jiri@mellanox.com>
+Acked-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/bridge/br_device.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/net/bridge/br_device.c
++++ b/net/bridge/br_device.c
+@@ -53,6 +53,9 @@ netdev_tx_t br_dev_xmit(struct sk_buff *
+       brstats->tx_bytes += skb->len;
+       u64_stats_update_end(&brstats->syncp);
++#ifdef CONFIG_NET_SWITCHDEV
++      skb->offload_fwd_mark = 0;
++#endif
+       BR_INPUT_SKB_CB(skb)->brdev = dev;
+       skb_reset_mac_header(skb);
diff --git a/queue-4.12/cxgb4-fix-stack-out-of-bounds-read-due-to-wrong-size-to-t4_record_mbox.patch b/queue-4.12/cxgb4-fix-stack-out-of-bounds-read-due-to-wrong-size-to-t4_record_mbox.patch
new file mode 100644 (file)
index 0000000..78246a4
--- /dev/null
@@ -0,0 +1,69 @@
+From foo@baz Thu Sep 14 23:20:23 PDT 2017
+From: Stefano Brivio <sbrivio@redhat.com>
+Date: Fri, 25 Aug 2017 22:48:48 +0200
+Subject: cxgb4: Fix stack out-of-bounds read due to wrong size to t4_record_mbox()
+
+From: Stefano Brivio <sbrivio@redhat.com>
+
+
+[ Upstream commit 0f3086868e8889a823a6e0f3d299102aa895d947 ]
+
+Passing commands for logging to t4_record_mbox() with size
+MBOX_LEN, when the actual command size is actually smaller,
+causes out-of-bounds stack accesses in t4_record_mbox() while
+copying command words here:
+
+       for (i = 0; i < size / 8; i++)
+               entry->cmd[i] = be64_to_cpu(cmd[i]);
+
+Up to 48 bytes from the stack are then leaked to debugfs.
+
+This happens whenever we send (and log) commands described by
+structs fw_sched_cmd (32 bytes leaked), fw_vi_rxmode_cmd (48),
+fw_hello_cmd (48), fw_bye_cmd (48), fw_initialize_cmd (48),
+fw_reset_cmd (48), fw_pfvf_cmd (32), fw_eq_eth_cmd (16),
+fw_eq_ctrl_cmd (32), fw_eq_ofld_cmd (32), fw_acl_mac_cmd(16),
+fw_rss_glb_config_cmd(32), fw_rss_vi_config_cmd(32),
+fw_devlog_cmd(32), fw_vi_enable_cmd(48), fw_port_cmd(32),
+fw_sched_cmd(32), fw_devlog_cmd(32).
+
+The cxgb4vf driver got this right instead.
+
+When we call t4_record_mbox() to log a command reply, a MBOX_LEN
+size can be used though, as get_mbox_rpl() will fill cmd_rpl up
+completely.
+
+Fixes: 7f080c3f2ff0 ("cxgb4: Add support to enable logging of firmware mailbox commands")
+Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/chelsio/cxgb4/t4_hw.c |    6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
++++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
+@@ -369,12 +369,12 @@ int t4_wr_mbox_meat_timeout(struct adapt
+               list_del(&entry.list);
+               spin_unlock(&adap->mbox_lock);
+               ret = (v == MBOX_OWNER_FW) ? -EBUSY : -ETIMEDOUT;
+-              t4_record_mbox(adap, cmd, MBOX_LEN, access, ret);
++              t4_record_mbox(adap, cmd, size, access, ret);
+               return ret;
+       }
+       /* Copy in the new mailbox command and send it on its way ... */
+-      t4_record_mbox(adap, cmd, MBOX_LEN, access, 0);
++      t4_record_mbox(adap, cmd, size, access, 0);
+       for (i = 0; i < size; i += 8)
+               t4_write_reg64(adap, data_reg + i, be64_to_cpu(*p++));
+@@ -426,7 +426,7 @@ int t4_wr_mbox_meat_timeout(struct adapt
+       }
+       ret = (pcie_fw & PCIE_FW_ERR_F) ? -ENXIO : -ETIMEDOUT;
+-      t4_record_mbox(adap, cmd, MBOX_LEN, access, ret);
++      t4_record_mbox(adap, cmd, size, access, ret);
+       dev_err(adap->pdev_dev, "command %#x in mailbox %d timed out\n",
+               *(const u8 *)cmd, mbox);
+       t4_report_fw_error(adap);
diff --git a/queue-4.12/fsl-man-inherit-parent-device-and-of_node.patch b/queue-4.12/fsl-man-inherit-parent-device-and-of_node.patch
new file mode 100644 (file)
index 0000000..3e8262a
--- /dev/null
@@ -0,0 +1,40 @@
+From foo@baz Thu Sep 14 23:20:23 PDT 2017
+From: Florian Fainelli <f.fainelli@gmail.com>
+Date: Tue, 22 Aug 2017 15:24:47 -0700
+Subject: fsl/man: Inherit parent device and of_node
+
+From: Florian Fainelli <f.fainelli@gmail.com>
+
+
+[ Upstream commit a1a50c8e4c241a505b7270e1a3c6e50d94e794b1 ]
+
+Junote Cai reported that he was not able to get a DSA setup involving the
+Freescale DPAA/FMAN driver to work and narrowed it down to
+of_find_net_device_by_node(). This function requires the network device's
+device reference to be correctly set which is the case here, though we have
+lost any device_node association there.
+
+The problem is that dpaa_eth_add_device() allocates a "dpaa-ethernet" platform
+device, and later on dpaa_eth_probe() is called but SET_NETDEV_DEV() won't be
+propagating &pdev->dev.of_node properly. Fix this by inherenting both the parent
+device and the of_node when dpaa_eth_add_device() creates the platform device.
+
+Fixes: 3933961682a3 ("fsl/fman: Add FMan MAC driver")
+Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/freescale/fman/mac.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/drivers/net/ethernet/freescale/fman/mac.c
++++ b/drivers/net/ethernet/freescale/fman/mac.c
+@@ -623,6 +623,8 @@ static struct platform_device *dpaa_eth_
+               goto no_mem;
+       }
++      pdev->dev.of_node = node;
++      pdev->dev.parent = priv->dev;
+       set_dma_ops(&pdev->dev, get_dma_ops(priv->dev));
+       ret = platform_device_add_data(pdev, &data, sizeof(data));
diff --git a/queue-4.12/gianfar-fix-tx-flow-control-deactivation.patch b/queue-4.12/gianfar-fix-tx-flow-control-deactivation.patch
new file mode 100644 (file)
index 0000000..d263bf9
--- /dev/null
@@ -0,0 +1,39 @@
+From foo@baz Thu Sep 14 23:20:23 PDT 2017
+From: Claudiu Manoil <claudiu.manoil@nxp.com>
+Date: Mon, 4 Sep 2017 10:45:28 +0300
+Subject: gianfar: Fix Tx flow control deactivation
+
+From: Claudiu Manoil <claudiu.manoil@nxp.com>
+
+
+[ Upstream commit 5d621672bc1a1e5090c1ac5432a18c79e0e13e03 ]
+
+The wrong register is checked for the Tx flow control bit,
+it should have been maccfg1 not maccfg2.
+This went unnoticed for so long probably because the impact is
+hardly visible, not to mention the tangled code from adjust_link().
+First, link flow control (i.e. handling of Rx/Tx link level pause frames)
+is disabled by default (needs to be enabled via 'ethtool -A').
+Secondly, maccfg2 always returns 0 for tx_flow_oldval (except for a few
+old boards), which results in Tx flow control remaining always on
+once activated.
+
+Fixes: 45b679c9a3ccd9e34f28e6ec677b812a860eb8eb ("gianfar: Implement PAUSE frame generation support")
+Signed-off-by: Claudiu Manoil <claudiu.manoil@nxp.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/freescale/gianfar.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/freescale/gianfar.c
++++ b/drivers/net/ethernet/freescale/gianfar.c
+@@ -3687,7 +3687,7 @@ static noinline void gfar_update_link_st
+               u32 tempval1 = gfar_read(&regs->maccfg1);
+               u32 tempval = gfar_read(&regs->maccfg2);
+               u32 ecntrl = gfar_read(&regs->ecntrl);
+-              u32 tx_flow_oldval = (tempval & MACCFG1_TX_FLOW);
++              u32 tx_flow_oldval = (tempval1 & MACCFG1_TX_FLOW);
+               if (phydev->duplex != priv->oldduplex) {
+                       if (!(phydev->duplex))
diff --git a/queue-4.12/ip6_gre-update-mtu-properly-in-ip6gre_err.patch b/queue-4.12/ip6_gre-update-mtu-properly-in-ip6gre_err.patch
new file mode 100644 (file)
index 0000000..88f98df
--- /dev/null
@@ -0,0 +1,46 @@
+From foo@baz Thu Sep 14 23:20:23 PDT 2017
+From: Xin Long <lucien.xin@gmail.com>
+Date: Tue, 5 Sep 2017 17:26:33 +0800
+Subject: ip6_gre: update mtu properly in ip6gre_err
+
+From: Xin Long <lucien.xin@gmail.com>
+
+
+[ Upstream commit 5c25f30c93fdc5bf25e62101aeaae7a4f9b421b3 ]
+
+Now when probessing ICMPV6_PKT_TOOBIG, ip6gre_err only subtracts the
+offset of gre header from mtu info. The expected mtu of gre device
+should also subtract gre header. Otherwise, the next packets still
+can't be sent out.
+
+Jianlin found this issue when using the topo:
+  client(ip6gre)<---->(nic1)route(nic2)<----->(ip6gre)server
+
+and reducing nic2's mtu, then both tcp and sctp's performance with
+big size data became 0.
+
+This patch is to fix it by also subtracting grehdr (tun->tun_hlen)
+from mtu info when updating gre device's mtu in ip6gre_err(). It
+also needs to subtract ETH_HLEN if gre dev'type is ARPHRD_ETHER.
+
+Reported-by: Jianlin Shi <jishi@redhat.com>
+Signed-off-by: Xin Long <lucien.xin@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/ip6_gre.c |    4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/net/ipv6/ip6_gre.c
++++ b/net/ipv6/ip6_gre.c
+@@ -432,7 +432,9 @@ static void ip6gre_err(struct sk_buff *s
+               }
+               break;
+       case ICMPV6_PKT_TOOBIG:
+-              mtu = be32_to_cpu(info) - offset;
++              mtu = be32_to_cpu(info) - offset - t->tun_hlen;
++              if (t->dev->type == ARPHRD_ETHER)
++                      mtu -= ETH_HLEN;
+               if (mtu < IPV6_MIN_MTU)
+                       mtu = IPV6_MIN_MTU;
+               t->dev->mtu = mtu;
diff --git a/queue-4.12/ipv6-accept-64k-1-packet-length-in-ip6_find_1stfragopt.patch b/queue-4.12/ipv6-accept-64k-1-packet-length-in-ip6_find_1stfragopt.patch
new file mode 100644 (file)
index 0000000..d247d5d
--- /dev/null
@@ -0,0 +1,50 @@
+From foo@baz Thu Sep 14 23:20:23 PDT 2017
+From: Stefano Brivio <sbrivio@redhat.com>
+Date: Fri, 18 Aug 2017 14:40:53 +0200
+Subject: ipv6: accept 64k - 1 packet length in ip6_find_1stfragopt()
+
+From: Stefano Brivio <sbrivio@redhat.com>
+
+
+[ Upstream commit 3de33e1ba0506723ab25734e098cf280ecc34756 ]
+
+A packet length of exactly IPV6_MAXPLEN is allowed, we should
+refuse parsing options only if the size is 64KiB or more.
+
+While at it, remove one extra variable and one assignment which
+were also introduced by the commit that introduced the size
+check. Checking the sum 'offset + len' and only later adding
+'len' to 'offset' doesn't provide any advantage over directly
+summing to 'offset' and checking it.
+
+Fixes: 6399f1fae4ec ("ipv6: avoid overflow of offset in ip6_find_1stfragopt")
+Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/output_core.c |    6 ++----
+ 1 file changed, 2 insertions(+), 4 deletions(-)
+
+--- a/net/ipv6/output_core.c
++++ b/net/ipv6/output_core.c
+@@ -86,7 +86,6 @@ int ip6_find_1stfragopt(struct sk_buff *
+       while (offset <= packet_len) {
+               struct ipv6_opt_hdr *exthdr;
+-              unsigned int len;
+               switch (**nexthdr) {
+@@ -112,10 +111,9 @@ int ip6_find_1stfragopt(struct sk_buff *
+               exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) +
+                                                offset);
+-              len = ipv6_optlen(exthdr);
+-              if (len + offset >= IPV6_MAXPLEN)
++              offset += ipv6_optlen(exthdr);
++              if (offset > IPV6_MAXPLEN)
+                       return -EINVAL;
+-              offset += len;
+               *nexthdr = &exthdr->nexthdr;
+       }
diff --git a/queue-4.12/ipv6-add-rcu-grace-period-before-freeing-fib6_node.patch b/queue-4.12/ipv6-add-rcu-grace-period-before-freeing-fib6_node.patch
new file mode 100644 (file)
index 0000000..5a1e06c
--- /dev/null
@@ -0,0 +1,162 @@
+From foo@baz Thu Sep 14 23:20:23 PDT 2017
+From: Wei Wang <weiwan@google.com>
+Date: Mon, 21 Aug 2017 09:47:10 -0700
+Subject: ipv6: add rcu grace period before freeing fib6_node
+
+From: Wei Wang <weiwan@google.com>
+
+
+[ Upstream commit c5cff8561d2d0006e972bd114afd51f082fee77c ]
+
+We currently keep rt->rt6i_node pointing to the fib6_node for the route.
+And some functions make use of this pointer to dereference the fib6_node
+from rt structure, e.g. rt6_check(). However, as there is neither
+refcount nor rcu taken when dereferencing rt->rt6i_node, it could
+potentially cause crashes as rt->rt6i_node could be set to NULL by other
+CPUs when doing a route deletion.
+This patch introduces an rcu grace period before freeing fib6_node and
+makes sure the functions that dereference it takes rcu_read_lock().
+
+Note: there is no "Fixes" tag because this bug was there in a very
+early stage.
+
+Signed-off-by: Wei Wang <weiwan@google.com>
+Acked-by: Eric Dumazet <edumazet@google.com>
+Acked-by: Martin KaFai Lau <kafai@fb.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/ip6_fib.h |   29 ++++++++++++++++++++++++++++-
+ net/ipv6/ip6_fib.c    |   20 ++++++++++++++++----
+ net/ipv6/route.c      |   14 +++++++++++---
+ 3 files changed, 55 insertions(+), 8 deletions(-)
+
+--- a/include/net/ip6_fib.h
++++ b/include/net/ip6_fib.h
+@@ -70,6 +70,7 @@ struct fib6_node {
+       __u16                   fn_flags;
+       int                     fn_sernum;
+       struct rt6_info         *rr_ptr;
++      struct rcu_head         rcu;
+ };
+ #ifndef CONFIG_IPV6_SUBTREES
+@@ -167,13 +168,39 @@ static inline void rt6_update_expires(st
+       rt0->rt6i_flags |= RTF_EXPIRES;
+ }
++/* Function to safely get fn->sernum for passed in rt
++ * and store result in passed in cookie.
++ * Return true if we can get cookie safely
++ * Return false if not
++ */
++static inline bool rt6_get_cookie_safe(const struct rt6_info *rt,
++                                     u32 *cookie)
++{
++      struct fib6_node *fn;
++      bool status = false;
++
++      rcu_read_lock();
++      fn = rcu_dereference(rt->rt6i_node);
++
++      if (fn) {
++              *cookie = fn->fn_sernum;
++              status = true;
++      }
++
++      rcu_read_unlock();
++      return status;
++}
++
+ static inline u32 rt6_get_cookie(const struct rt6_info *rt)
+ {
++      u32 cookie = 0;
++
+       if (rt->rt6i_flags & RTF_PCPU ||
+           (unlikely(rt->dst.flags & DST_NOCACHE) && rt->dst.from))
+               rt = (struct rt6_info *)(rt->dst.from);
++      rt6_get_cookie_safe(rt, &cookie);
+-      return rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0;
++      return cookie;
+ }
+ static inline void ip6_rt_put(struct rt6_info *rt)
+--- a/net/ipv6/ip6_fib.c
++++ b/net/ipv6/ip6_fib.c
+@@ -148,11 +148,23 @@ static struct fib6_node *node_alloc(void
+       return fn;
+ }
+-static void node_free(struct fib6_node *fn)
++static void node_free_immediate(struct fib6_node *fn)
++{
++      kmem_cache_free(fib6_node_kmem, fn);
++}
++
++static void node_free_rcu(struct rcu_head *head)
+ {
++      struct fib6_node *fn = container_of(head, struct fib6_node, rcu);
++
+       kmem_cache_free(fib6_node_kmem, fn);
+ }
++static void node_free(struct fib6_node *fn)
++{
++      call_rcu(&fn->rcu, node_free_rcu);
++}
++
+ static void rt6_rcu_free(struct rt6_info *rt)
+ {
+       call_rcu(&rt->dst.rcu_head, dst_rcu_free);
+@@ -599,9 +611,9 @@ insert_above:
+               if (!in || !ln) {
+                       if (in)
+-                              node_free(in);
++                              node_free_immediate(in);
+                       if (ln)
+-                              node_free(ln);
++                              node_free_immediate(ln);
+                       return ERR_PTR(-ENOMEM);
+               }
+@@ -1035,7 +1047,7 @@ int fib6_add(struct fib6_node *root, str
+                                  root, and then (in failure) stale node
+                                  in main tree.
+                                */
+-                              node_free(sfn);
++                              node_free_immediate(sfn);
+                               err = PTR_ERR(sn);
+                               goto failure;
+                       }
+--- a/net/ipv6/route.c
++++ b/net/ipv6/route.c
+@@ -1289,7 +1289,9 @@ static void rt6_dst_from_metrics_check(s
+ static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie)
+ {
+-      if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie))
++      u32 rt_cookie;
++
++      if (!rt6_get_cookie_safe(rt, &rt_cookie) || rt_cookie != cookie)
+               return NULL;
+       if (rt6_check_expired(rt))
+@@ -1357,8 +1359,14 @@ static void ip6_link_failure(struct sk_b
+               if (rt->rt6i_flags & RTF_CACHE) {
+                       dst_hold(&rt->dst);
+                       ip6_del_rt(rt);
+-              } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) {
+-                      rt->rt6i_node->fn_sernum = -1;
++              } else {
++                      struct fib6_node *fn;
++
++                      rcu_read_lock();
++                      fn = rcu_dereference(rt->rt6i_node);
++                      if (fn && (rt->rt6i_flags & RTF_DEFAULT))
++                              fn->fn_sernum = -1;
++                      rcu_read_unlock();
+               }
+       }
+ }
diff --git a/queue-4.12/ipv6-do-not-set-sk_destruct-in-ipv6_addrform-sockopt.patch b/queue-4.12/ipv6-do-not-set-sk_destruct-in-ipv6_addrform-sockopt.patch
new file mode 100644 (file)
index 0000000..272b51a
--- /dev/null
@@ -0,0 +1,61 @@
+From foo@baz Thu Sep 14 23:20:23 PDT 2017
+From: Xin Long <lucien.xin@gmail.com>
+Date: Mon, 28 Aug 2017 10:45:01 +0800
+Subject: ipv6: do not set sk_destruct in IPV6_ADDRFORM sockopt
+
+From: Xin Long <lucien.xin@gmail.com>
+
+
+[ Upstream commit e8d411d2980723b8f8ba8e4dd78b694c5fd9ea3e ]
+
+ChunYu found a kernel warn_on during syzkaller fuzzing:
+
+[40226.038539] WARNING: CPU: 5 PID: 23720 at net/ipv4/af_inet.c:152 inet_sock_destruct+0x78d/0x9a0
+[40226.144849] Call Trace:
+[40226.147590]  <IRQ>
+[40226.149859]  dump_stack+0xe2/0x186
+[40226.176546]  __warn+0x1a4/0x1e0
+[40226.180066]  warn_slowpath_null+0x31/0x40
+[40226.184555]  inet_sock_destruct+0x78d/0x9a0
+[40226.246355]  __sk_destruct+0xfa/0x8c0
+[40226.290612]  rcu_process_callbacks+0xaa0/0x18a0
+[40226.336816]  __do_softirq+0x241/0x75e
+[40226.367758]  irq_exit+0x1f6/0x220
+[40226.371458]  smp_apic_timer_interrupt+0x7b/0xa0
+[40226.376507]  apic_timer_interrupt+0x93/0xa0
+
+The warn_on happned when sk->sk_rmem_alloc wasn't 0 in inet_sock_destruct.
+As after commit f970bd9e3a06 ("udp: implement memory accounting helpers"),
+udp has changed to use udp_destruct_sock as sk_destruct where it would
+udp_rmem_release all rmem.
+
+But IPV6_ADDRFORM sockopt sets sk_destruct with inet_sock_destruct after
+changing family to PF_INET. If rmem is not 0 at that time, and there is
+no place to release rmem before calling inet_sock_destruct, the warn_on
+will be triggered.
+
+This patch is to fix it by not setting sk_destruct in IPV6_ADDRFORM sockopt
+any more. As IPV6_ADDRFORM sockopt only works for tcp and udp. TCP sock has
+already set it's sk_destruct with inet_sock_destruct and UDP has set with
+udp_destruct_sock since they're created.
+
+Fixes: f970bd9e3a06 ("udp: implement memory accounting helpers")
+Reported-by: ChunYu Wang <chunwang@redhat.com>
+Signed-off-by: Xin Long <lucien.xin@gmail.com>
+Acked-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/ipv6_sockglue.c |    1 -
+ 1 file changed, 1 deletion(-)
+
+--- a/net/ipv6/ipv6_sockglue.c
++++ b/net/ipv6/ipv6_sockglue.c
+@@ -242,7 +242,6 @@ static int do_ipv6_setsockopt(struct soc
+                       pktopt = xchg(&np->pktoptions, NULL);
+                       kfree_skb(pktopt);
+-                      sk->sk_destruct = inet_sock_destruct;
+                       /*
+                        * ... and add it to the refcnt debug socks count
+                        * in the new family. -acme
diff --git a/queue-4.12/ipv6-fix-memory-leak-with-multiple-tables-during-netns-destruction.patch b/queue-4.12/ipv6-fix-memory-leak-with-multiple-tables-during-netns-destruction.patch
new file mode 100644 (file)
index 0000000..29f8323
--- /dev/null
@@ -0,0 +1,76 @@
+From foo@baz Thu Sep 14 23:20:23 PDT 2017
+From: Sabrina Dubroca <sd@queasysnail.net>
+Date: Fri, 8 Sep 2017 10:26:19 +0200
+Subject: ipv6: fix memory leak with multiple tables during netns destruction
+
+From: Sabrina Dubroca <sd@queasysnail.net>
+
+
+[ Upstream commit ba1cc08d9488c94cb8d94f545305688b72a2a300 ]
+
+fib6_net_exit only frees the main and local tables. If another table was
+created with fib6_alloc_table, we leak it when the netns is destroyed.
+
+Fix this in the same way ip_fib_net_exit cleans up tables, by walking
+through the whole hashtable of fib6_table's. We can get rid of the
+special cases for local and main, since they're also part of the
+hashtable.
+
+Reproducer:
+    ip netns add x
+    ip -net x -6 rule add from 6003:1::/64 table 100
+    ip netns del x
+
+Reported-by: Jianlin Shi <jishi@redhat.com>
+Fixes: 58f09b78b730 ("[NETNS][IPV6] ip6_fib - make it per network namespace")
+Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/ip6_fib.c |   25 +++++++++++++++++++------
+ 1 file changed, 19 insertions(+), 6 deletions(-)
+
+--- a/net/ipv6/ip6_fib.c
++++ b/net/ipv6/ip6_fib.c
+@@ -201,6 +201,12 @@ static void rt6_release(struct rt6_info
+       }
+ }
++static void fib6_free_table(struct fib6_table *table)
++{
++      inetpeer_invalidate_tree(&table->tb6_peers);
++      kfree(table);
++}
++
+ static void fib6_link_table(struct net *net, struct fib6_table *tb)
+ {
+       unsigned int h;
+@@ -1911,15 +1917,22 @@ out_timer:
+ static void fib6_net_exit(struct net *net)
+ {
++      unsigned int i;
++
+       rt6_ifdown(net, NULL);
+       del_timer_sync(&net->ipv6.ip6_fib_timer);
+-#ifdef CONFIG_IPV6_MULTIPLE_TABLES
+-      inetpeer_invalidate_tree(&net->ipv6.fib6_local_tbl->tb6_peers);
+-      kfree(net->ipv6.fib6_local_tbl);
+-#endif
+-      inetpeer_invalidate_tree(&net->ipv6.fib6_main_tbl->tb6_peers);
+-      kfree(net->ipv6.fib6_main_tbl);
++      for (i = 0; i < FIB_TABLE_HASHSZ; i++) {
++              struct hlist_head *head = &net->ipv6.fib_table_hash[i];
++              struct hlist_node *tmp;
++              struct fib6_table *tb;
++
++              hlist_for_each_entry_safe(tb, tmp, head, tb6_hlist) {
++                      hlist_del(&tb->tb6_hlist);
++                      fib6_free_table(tb);
++              }
++      }
++
+       kfree(net->ipv6.fib_table_hash);
+       kfree(net->ipv6.rt6_stats);
+ }
diff --git a/queue-4.12/ipv6-fix-sparse-warning-on-rt6i_node.patch b/queue-4.12/ipv6-fix-sparse-warning-on-rt6i_node.patch
new file mode 100644 (file)
index 0000000..7a136ba
--- /dev/null
@@ -0,0 +1,110 @@
+From foo@baz Thu Sep 14 23:20:23 PDT 2017
+From: Wei Wang <weiwan@google.com>
+Date: Fri, 25 Aug 2017 15:03:10 -0700
+Subject: ipv6: fix sparse warning on rt6i_node
+
+From: Wei Wang <weiwan@google.com>
+
+
+[ Upstream commit 4e587ea71bf924f7dac621f1351653bd41e446cb ]
+
+Commit c5cff8561d2d adds rcu grace period before freeing fib6_node. This
+generates a new sparse warning on rt->rt6i_node related code:
+  net/ipv6/route.c:1394:30: error: incompatible types in comparison
+  expression (different address spaces)
+  ./include/net/ip6_fib.h:187:14: error: incompatible types in comparison
+  expression (different address spaces)
+
+This commit adds "__rcu" tag for rt6i_node and makes sure corresponding
+rcu API is used for it.
+After this fix, sparse no longer generates the above warning.
+
+Fixes: c5cff8561d2d ("ipv6: add rcu grace period before freeing fib6_node")
+Signed-off-by: Wei Wang <weiwan@google.com>
+Acked-by: Eric Dumazet <edumazet@google.com>
+Acked-by: Martin KaFai Lau <kafai@fb.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/ip6_fib.h |    2 +-
+ net/ipv6/addrconf.c   |    2 +-
+ net/ipv6/ip6_fib.c    |   11 +++++++----
+ net/ipv6/route.c      |    3 ++-
+ 4 files changed, 11 insertions(+), 7 deletions(-)
+
+--- a/include/net/ip6_fib.h
++++ b/include/net/ip6_fib.h
+@@ -105,7 +105,7 @@ struct rt6_info {
+        * the same cache line.
+        */
+       struct fib6_table               *rt6i_table;
+-      struct fib6_node                *rt6i_node;
++      struct fib6_node __rcu          *rt6i_node;
+       struct in6_addr                 rt6i_gateway;
+--- a/net/ipv6/addrconf.c
++++ b/net/ipv6/addrconf.c
+@@ -5541,7 +5541,7 @@ static void __ipv6_ifa_notify(int event,
+                * our DAD process, so we don't need
+                * to do it again
+                */
+-              if (!(ifp->rt->rt6i_node))
++              if (!rcu_access_pointer(ifp->rt->rt6i_node))
+                       ip6_ins_rt(ifp->rt);
+               if (ifp->idev->cnf.forwarding)
+                       addrconf_join_anycast(ifp);
+--- a/net/ipv6/ip6_fib.c
++++ b/net/ipv6/ip6_fib.c
+@@ -887,7 +887,7 @@ add:
+               rt->dst.rt6_next = iter;
+               *ins = rt;
+-              rt->rt6i_node = fn;
++              rcu_assign_pointer(rt->rt6i_node, fn);
+               atomic_inc(&rt->rt6i_ref);
+               if (!info->skip_notify)
+                       inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
+@@ -913,7 +913,7 @@ add:
+                       return err;
+               *ins = rt;
+-              rt->rt6i_node = fn;
++              rcu_assign_pointer(rt->rt6i_node, fn);
+               rt->dst.rt6_next = iter->dst.rt6_next;
+               atomic_inc(&rt->rt6i_ref);
+               if (!info->skip_notify)
+@@ -1475,8 +1475,9 @@ static void fib6_del_route(struct fib6_n
+ int fib6_del(struct rt6_info *rt, struct nl_info *info)
+ {
++      struct fib6_node *fn = rcu_dereference_protected(rt->rt6i_node,
++                                  lockdep_is_held(&rt->rt6i_table->tb6_lock));
+       struct net *net = info->nl_net;
+-      struct fib6_node *fn = rt->rt6i_node;
+       struct rt6_info **rtp;
+ #if RT6_DEBUG >= 2
+@@ -1665,7 +1666,9 @@ static int fib6_clean_node(struct fib6_w
+                       if (res) {
+ #if RT6_DEBUG >= 2
+                               pr_debug("%s: del failed: rt=%p@%p err=%d\n",
+-                                       __func__, rt, rt->rt6i_node, res);
++                                       __func__, rt,
++                                       rcu_access_pointer(rt->rt6i_node),
++                                       res);
+ #endif
+                               continue;
+                       }
+--- a/net/ipv6/route.c
++++ b/net/ipv6/route.c
+@@ -1383,7 +1383,8 @@ static void rt6_do_update_pmtu(struct rt
+ static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
+ {
+       return !(rt->rt6i_flags & RTF_CACHE) &&
+-              (rt->rt6i_flags & RTF_PCPU || rt->rt6i_node);
++              (rt->rt6i_flags & RTF_PCPU ||
++               rcu_access_pointer(rt->rt6i_node));
+ }
+ static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
diff --git a/queue-4.12/ipv6-fix-typo-in-fib6_net_exit.patch b/queue-4.12/ipv6-fix-typo-in-fib6_net_exit.patch
new file mode 100644 (file)
index 0000000..bcb7747
--- /dev/null
@@ -0,0 +1,31 @@
+From foo@baz Thu Sep 14 23:20:23 PDT 2017
+From: Eric Dumazet <edumazet@google.com>
+Date: Fri, 8 Sep 2017 15:48:47 -0700
+Subject: ipv6: fix typo in fib6_net_exit()
+
+From: Eric Dumazet <edumazet@google.com>
+
+
+[ Upstream commit 32a805baf0fb70b6dbedefcd7249ac7f580f9e3b ]
+
+IPv6 FIB should use FIB6_TABLE_HASHSZ, not FIB_TABLE_HASHSZ.
+
+Fixes: ba1cc08d9488 ("ipv6: fix memory leak with multiple tables during netns destruction")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/ip6_fib.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/ipv6/ip6_fib.c
++++ b/net/ipv6/ip6_fib.c
+@@ -1922,7 +1922,7 @@ static void fib6_net_exit(struct net *ne
+       rt6_ifdown(net, NULL);
+       del_timer_sync(&net->ipv6.ip6_fib_timer);
+-      for (i = 0; i < FIB_TABLE_HASHSZ; i++) {
++      for (i = 0; i < FIB6_TABLE_HASHSZ; i++) {
+               struct hlist_head *head = &net->ipv6.fib_table_hash[i];
+               struct hlist_node *tmp;
+               struct fib6_table *tb;
diff --git a/queue-4.12/ipv6-set-dst.obsolete-when-a-cached-route-has-expired.patch b/queue-4.12/ipv6-set-dst.obsolete-when-a-cached-route-has-expired.patch
new file mode 100644 (file)
index 0000000..3370c2e
--- /dev/null
@@ -0,0 +1,69 @@
+From foo@baz Thu Sep 14 23:20:23 PDT 2017
+From: Xin Long <lucien.xin@gmail.com>
+Date: Sat, 26 Aug 2017 20:10:10 +0800
+Subject: ipv6: set dst.obsolete when a cached route has expired
+
+From: Xin Long <lucien.xin@gmail.com>
+
+
+[ Upstream commit 1e2ea8ad37be25a7cdcc974945935829d534d5d3 ]
+
+Now it doesn't check for the cached route expiration in ipv6's
+dst_ops->check(), because it trusts dst_gc that would clean the
+cached route up when it's expired.
+
+The problem is in dst_gc, it would clean the cached route only
+when it's refcount is 1. If some other module (like xfrm) keeps
+holding it and the module only release it when dst_ops->check()
+fails.
+
+But without checking for the cached route expiration, .check()
+may always return true. Meanwhile, without releasing the cached
+route, dst_gc couldn't del it. It will cause this cached route
+never to expire.
+
+This patch is to set dst.obsolete with DST_OBSOLETE_KILL in .gc
+when it's expired, and check obsolete != DST_OBSOLETE_FORCE_CHK
+in .check.
+
+Note that this is even needed when ipv6 dst_gc timer is removed
+one day. It would set dst.obsolete in .redirect and .update_pmtu
+instead, and check for cached route expiration when getting it,
+just like what ipv4 route does.
+
+Reported-by: Jianlin Shi <jishi@redhat.com>
+Signed-off-by: Xin Long <lucien.xin@gmail.com>
+Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/ip6_fib.c |    4 +++-
+ net/ipv6/route.c   |    3 ++-
+ 2 files changed, 5 insertions(+), 2 deletions(-)
+
+--- a/net/ipv6/ip6_fib.c
++++ b/net/ipv6/ip6_fib.c
+@@ -1790,8 +1790,10 @@ static int fib6_age(struct rt6_info *rt,
+               }
+               gc_args->more++;
+       } else if (rt->rt6i_flags & RTF_CACHE) {
++              if (time_after_eq(now, rt->dst.lastuse + gc_args->timeout))
++                      rt->dst.obsolete = DST_OBSOLETE_KILL;
+               if (atomic_read(&rt->dst.__refcnt) == 0 &&
+-                  time_after_eq(now, rt->dst.lastuse + gc_args->timeout)) {
++                  rt->dst.obsolete == DST_OBSOLETE_KILL) {
+                       RT6_TRACE("aging clone %p\n", rt);
+                       return -1;
+               } else if (rt->rt6i_flags & RTF_GATEWAY) {
+--- a/net/ipv6/route.c
++++ b/net/ipv6/route.c
+@@ -444,7 +444,8 @@ static bool rt6_check_expired(const stru
+               if (time_after(jiffies, rt->dst.expires))
+                       return true;
+       } else if (rt->dst.from) {
+-              return rt6_check_expired((struct rt6_info *) rt->dst.from);
++              return rt->dst.obsolete != DST_OBSOLETE_FORCE_CHK ||
++                     rt6_check_expired((struct rt6_info *)rt->dst.from);
+       }
+       return false;
+ }
diff --git a/queue-4.12/kcm-do-not-attach-pf_kcm-sockets-to-avoid-deadlock.patch b/queue-4.12/kcm-do-not-attach-pf_kcm-sockets-to-avoid-deadlock.patch
new file mode 100644 (file)
index 0000000..9c5ee1d
--- /dev/null
@@ -0,0 +1,40 @@
+From foo@baz Thu Sep 14 23:20:23 PDT 2017
+From: Eric Dumazet <edumazet@google.com>
+Date: Wed, 30 Aug 2017 09:29:31 -0700
+Subject: kcm: do not attach PF_KCM sockets to avoid deadlock
+
+From: Eric Dumazet <edumazet@google.com>
+
+
+[ Upstream commit 351050ecd6523374b370341cc29fe61e2201556b ]
+
+syzkaller had no problem to trigger a deadlock, attaching a KCM socket
+to another one (or itself). (original syzkaller report was a very
+confusing lockdep splat during a sendmsg())
+
+It seems KCM claims to only support TCP, but no enforcement is done,
+so we might need to add additional checks.
+
+Fixes: ab7ac4eb9832 ("kcm: Kernel Connection Multiplexor module")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: Dmitry Vyukov <dvyukov@google.com>
+Acked-by: Tom Herbert <tom@quantonium.net>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/kcm/kcmsock.c |    4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/net/kcm/kcmsock.c
++++ b/net/kcm/kcmsock.c
+@@ -1383,6 +1383,10 @@ static int kcm_attach(struct socket *soc
+       if (!csk)
+               return -EINVAL;
++      /* We must prevent loops or risk deadlock ! */
++      if (csk->sk_family == PF_KCM)
++              return -EOPNOTSUPP;
++
+       psock = kmem_cache_zalloc(kcm_psockp, GFP_KERNEL);
+       if (!psock)
+               return -ENOMEM;
diff --git a/queue-4.12/macsec-add-genl-family-module-alias.patch b/queue-4.12/macsec-add-genl-family-module-alias.patch
new file mode 100644 (file)
index 0000000..ba4eea2
--- /dev/null
@@ -0,0 +1,31 @@
+From foo@baz Thu Sep 14 23:20:23 PDT 2017
+From: Sabrina Dubroca <sd@queasysnail.net>
+Date: Tue, 22 Aug 2017 15:36:08 +0200
+Subject: macsec: add genl family module alias
+
+From: Sabrina Dubroca <sd@queasysnail.net>
+
+
+[ Upstream commit 78362998f58c7c271e2719dcd0aaced435c801f9 ]
+
+This helps tools such as wpa_supplicant can start even if the macsec
+module isn't loaded yet.
+
+Fixes: c09440f7dcb3 ("macsec: introduce IEEE 802.1AE driver")
+Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/macsec.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/net/macsec.c
++++ b/drivers/net/macsec.c
+@@ -3518,6 +3518,7 @@ module_init(macsec_init);
+ module_exit(macsec_exit);
+ MODULE_ALIAS_RTNL_LINK("macsec");
++MODULE_ALIAS_GENL_FAMILY("macsec");
+ MODULE_DESCRIPTION("MACsec IEEE 802.1AE");
+ MODULE_LICENSE("GPL v2");
diff --git a/queue-4.12/mlxsw-spectrum-forbid-linking-to-devices-that-have-uppers.patch b/queue-4.12/mlxsw-spectrum-forbid-linking-to-devices-that-have-uppers.patch
new file mode 100644 (file)
index 0000000..eef17f3
--- /dev/null
@@ -0,0 +1,89 @@
+From foo@baz Thu Sep 14 23:20:23 PDT 2017
+From: Ido Schimmel <idosch@mellanox.com>
+Date: Fri, 1 Sep 2017 10:52:31 +0200
+Subject: mlxsw: spectrum: Forbid linking to devices that have uppers
+
+From: Ido Schimmel <idosch@mellanox.com>
+
+
+[ Upstream commit 25cc72a33835ed8a6f53180a822cadab855852ac ]
+
+The mlxsw driver relies on NETDEV_CHANGEUPPER events to configure the
+device in case a port is enslaved to a master netdev such as bridge or
+bond.
+
+Since the driver ignores events unrelated to its ports and their
+uppers, it's possible to engineer situations in which the device's data
+path differs from the kernel's.
+
+One example to such a situation is when a port is enslaved to a bond
+that is already enslaved to a bridge. When the bond was enslaved the
+driver ignored the event - as the bond wasn't one of its uppers - and
+therefore a bridge port instance isn't created in the device.
+
+Until such configurations are supported forbid them by checking that the
+upper device doesn't have uppers of its own.
+
+Fixes: 0d65fc13042f ("mlxsw: spectrum: Implement LAG port join/leave")
+Signed-off-by: Ido Schimmel <idosch@mellanox.com>
+Reported-by: Nogah Frankel <nogahf@mellanox.com>
+Tested-by: Nogah Frankel <nogahf@mellanox.com>
+Signed-off-by: Jiri Pirko <jiri@mellanox.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlxsw/spectrum.c |    6 ++++++
+ include/linux/netdevice.h                      |    2 ++
+ net/core/dev.c                                 |    3 ++-
+ 3 files changed, 10 insertions(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
++++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+@@ -4110,6 +4110,8 @@ static int mlxsw_sp_netdevice_port_upper
+                       return -EINVAL;
+               if (!info->linking)
+                       break;
++              if (netdev_has_any_upper_dev(upper_dev))
++                      return -EINVAL;
+               /* HW limitation forbids to put ports to multiple bridges. */
+               if (netif_is_bridge_master(upper_dev) &&
+                   !mlxsw_sp_master_bridge_check(mlxsw_sp, upper_dev))
+@@ -4274,6 +4276,10 @@ static int mlxsw_sp_netdevice_bridge_eve
+               if (is_vlan_dev(upper_dev) &&
+                   br_dev != mlxsw_sp->master_bridge.dev)
+                       return -EINVAL;
++              if (!info->linking)
++                      break;
++              if (netdev_has_any_upper_dev(upper_dev))
++                      return -EINVAL;
+               break;
+       case NETDEV_CHANGEUPPER:
+               upper_dev = info->upper_dev;
+--- a/include/linux/netdevice.h
++++ b/include/linux/netdevice.h
+@@ -3868,6 +3868,8 @@ int netdev_walk_all_upper_dev_rcu(struct
+ bool netdev_has_upper_dev_all_rcu(struct net_device *dev,
+                                 struct net_device *upper_dev);
++bool netdev_has_any_upper_dev(struct net_device *dev);
++
+ void *netdev_lower_get_next_private(struct net_device *dev,
+                                   struct list_head **iter);
+ void *netdev_lower_get_next_private_rcu(struct net_device *dev,
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -5590,12 +5590,13 @@ EXPORT_SYMBOL(netdev_has_upper_dev_all_r
+  * Find out if a device is linked to an upper device and return true in case
+  * it is. The caller must hold the RTNL lock.
+  */
+-static bool netdev_has_any_upper_dev(struct net_device *dev)
++bool netdev_has_any_upper_dev(struct net_device *dev)
+ {
+       ASSERT_RTNL();
+       return !list_empty(&dev->adj_list.upper);
+ }
++EXPORT_SYMBOL(netdev_has_any_upper_dev);
+ /**
+  * netdev_master_upper_dev_get - Get master upper device
diff --git a/queue-4.12/net-bcmgenet-be-drop-monitor-friendly.patch b/queue-4.12/net-bcmgenet-be-drop-monitor-friendly.patch
new file mode 100644 (file)
index 0000000..135557e
--- /dev/null
@@ -0,0 +1,44 @@
+From foo@baz Thu Sep 14 23:20:23 PDT 2017
+From: Florian Fainelli <f.fainelli@gmail.com>
+Date: Thu, 24 Aug 2017 15:56:29 -0700
+Subject: net: bcmgenet: Be drop monitor friendly
+
+From: Florian Fainelli <f.fainelli@gmail.com>
+
+
+[ Upstream commit d4fec855905fa8bd5fb1c59f73ad2d74a944876a ]
+
+There are 3 spots where we call dev_kfree_skb() but we are actually
+just doing a normal SKB consumption: __bcmgenet_tx_reclaim() for normal
+TX reclamation, bcmgenet_alloc_rx_buffers() during the initial RX ring
+setup and bcmgenet_free_rx_buffers() during RX ring cleanup.
+
+Fixes: d6707bec5986 ("net: bcmgenet: rewrite bcmgenet_rx_refill()")
+Fixes: f48bed16a756 ("net: bcmgenet: Free skb after last Tx frag")
+Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/broadcom/genet/bcmgenet.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
++++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+@@ -1203,7 +1203,7 @@ static struct enet_cb *bcmgenet_get_txcb
+ /* Simple helper to free a control block's resources */
+ static void bcmgenet_free_cb(struct enet_cb *cb)
+ {
+-      dev_kfree_skb_any(cb->skb);
++      dev_consume_skb_any(cb->skb);
+       cb->skb = NULL;
+       dma_unmap_addr_set(cb, dma_addr, 0);
+ }
+@@ -1868,7 +1868,7 @@ static int bcmgenet_alloc_rx_buffers(str
+               cb = ring->cbs + i;
+               skb = bcmgenet_rx_refill(priv, cb);
+               if (skb)
+-                      dev_kfree_skb_any(skb);
++                      dev_consume_skb_any(skb);
+               if (!cb->skb)
+                       return -ENOMEM;
+       }
diff --git a/queue-4.12/net-dsa-bcm_sf2-fix-number-of-cfp-entries-for-bcm7278.patch b/queue-4.12/net-dsa-bcm_sf2-fix-number-of-cfp-entries-for-bcm7278.patch
new file mode 100644 (file)
index 0000000..c24bbb9
--- /dev/null
@@ -0,0 +1,106 @@
+From foo@baz Thu Sep 14 23:20:23 PDT 2017
+From: Florian Fainelli <f.fainelli@gmail.com>
+Date: Wed, 30 Aug 2017 12:39:33 -0700
+Subject: net: dsa: bcm_sf2: Fix number of CFP entries for BCM7278
+
+From: Florian Fainelli <f.fainelli@gmail.com>
+
+
+[ Upstream commit df191632f814357ee4d646421662d866028b569d ]
+
+BCM7278 has only 128 entries while BCM7445 has the full 256 entries set,
+fix that.
+
+Fixes: 7318166cacad ("net: dsa: bcm_sf2: Add support for ethtool::rxnfc")
+Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
+Reviewed-by: Vivien Didelot <vivien.didelot@savoirfairelinux.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/dsa/bcm_sf2.c     |    4 ++++
+ drivers/net/dsa/bcm_sf2.h     |    1 +
+ drivers/net/dsa/bcm_sf2_cfp.c |    8 ++++----
+ 3 files changed, 9 insertions(+), 4 deletions(-)
+
+--- a/drivers/net/dsa/bcm_sf2.c
++++ b/drivers/net/dsa/bcm_sf2.c
+@@ -1055,6 +1055,7 @@ struct bcm_sf2_of_data {
+       u32 type;
+       const u16 *reg_offsets;
+       unsigned int core_reg_align;
++      unsigned int num_cfp_rules;
+ };
+ /* Register offsets for the SWITCH_REG_* block */
+@@ -1078,6 +1079,7 @@ static const struct bcm_sf2_of_data bcm_
+       .type           = BCM7445_DEVICE_ID,
+       .core_reg_align = 0,
+       .reg_offsets    = bcm_sf2_7445_reg_offsets,
++      .num_cfp_rules  = 256,
+ };
+ static const u16 bcm_sf2_7278_reg_offsets[] = {
+@@ -1100,6 +1102,7 @@ static const struct bcm_sf2_of_data bcm_
+       .type           = BCM7278_DEVICE_ID,
+       .core_reg_align = 1,
+       .reg_offsets    = bcm_sf2_7278_reg_offsets,
++      .num_cfp_rules  = 128,
+ };
+ static const struct of_device_id bcm_sf2_of_match[] = {
+@@ -1156,6 +1159,7 @@ static int bcm_sf2_sw_probe(struct platf
+       priv->type = data->type;
+       priv->reg_offsets = data->reg_offsets;
+       priv->core_reg_align = data->core_reg_align;
++      priv->num_cfp_rules = data->num_cfp_rules;
+       /* Auto-detection using standard registers will not work, so
+        * provide an indication of what kind of device we are for
+--- a/drivers/net/dsa/bcm_sf2.h
++++ b/drivers/net/dsa/bcm_sf2.h
+@@ -72,6 +72,7 @@ struct bcm_sf2_priv {
+       u32                             type;
+       const u16                       *reg_offsets;
+       unsigned int                    core_reg_align;
++      unsigned int                    num_cfp_rules;
+       /* spinlock protecting access to the indirect registers */
+       spinlock_t                      indir_lock;
+--- a/drivers/net/dsa/bcm_sf2_cfp.c
++++ b/drivers/net/dsa/bcm_sf2_cfp.c
+@@ -98,7 +98,7 @@ static inline void bcm_sf2_cfp_rule_addr
+ {
+       u32 reg;
+-      WARN_ON(addr >= CFP_NUM_RULES);
++      WARN_ON(addr >= priv->num_cfp_rules);
+       reg = core_readl(priv, CORE_CFP_ACC);
+       reg &= ~(XCESS_ADDR_MASK << XCESS_ADDR_SHIFT);
+@@ -109,7 +109,7 @@ static inline void bcm_sf2_cfp_rule_addr
+ static inline unsigned int bcm_sf2_cfp_rule_size(struct bcm_sf2_priv *priv)
+ {
+       /* Entry #0 is reserved */
+-      return CFP_NUM_RULES - 1;
++      return priv->num_cfp_rules - 1;
+ }
+ static int bcm_sf2_cfp_rule_set(struct dsa_switch *ds, int port,
+@@ -523,7 +523,7 @@ static int bcm_sf2_cfp_rule_get_all(stru
+               if (!(reg & OP_STR_DONE))
+                       break;
+-      } while (index < CFP_NUM_RULES);
++      } while (index < priv->num_cfp_rules);
+       /* Put the TCAM size here */
+       nfc->data = bcm_sf2_cfp_rule_size(priv);
+@@ -544,7 +544,7 @@ int bcm_sf2_get_rxnfc(struct dsa_switch
+       case ETHTOOL_GRXCLSRLCNT:
+               /* Subtract the default, unusable rule */
+               nfc->rule_cnt = bitmap_weight(priv->cfp.used,
+-                                            CFP_NUM_RULES) - 1;
++                                            priv->num_cfp_rules) - 1;
+               /* We support specifying rule locations */
+               nfc->data |= RX_CLS_LOC_SPECIAL;
+               break;
diff --git a/queue-4.12/net-fec-allow-reception-of-frames-bigger-than-1522-bytes.patch b/queue-4.12/net-fec-allow-reception-of-frames-bigger-than-1522-bytes.patch
new file mode 100644 (file)
index 0000000..b03fc7c
--- /dev/null
@@ -0,0 +1,60 @@
+From foo@baz Thu Sep 14 23:20:23 PDT 2017
+From: Andrew Lunn <andrew@lunn.ch>
+Date: Sun, 30 Jul 2017 19:36:05 +0200
+Subject: net: fec: Allow reception of frames bigger than 1522 bytes
+
+From: Andrew Lunn <andrew@lunn.ch>
+
+
+[ Upstream commit fbbeefdd21049fcf9437c809da3828b210577f36 ]
+
+The FEC Receive Control Register has a 14 bit field indicating the
+longest frame that may be received. It is being set to 1522. Frames
+longer than this are discarded, but counted as being in error.
+
+When using DSA, frames from the switch has an additional header,
+either 4 or 8 bytes if a Marvell switch is used. Thus a full MTU frame
+of 1522 bytes received by the switch on a port becomes 1530 bytes when
+passed to the host via the FEC interface.
+
+Change the maximum receive size to 2048 - 64, where 64 is the maximum
+rx_alignment applied on the receive buffer for AVB capable FEC
+cores. Use this value also for the maximum receive buffer size. The
+driver is already allocating a receive SKB of 2048 bytes, so this
+change should not have any significant effects.
+
+Tested on imx51, imx6, vf610.
+
+Signed-off-by: Andrew Lunn <andrew@lunn.ch>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/freescale/fec_main.c |    8 +++++---
+ 1 file changed, 5 insertions(+), 3 deletions(-)
+
+--- a/drivers/net/ethernet/freescale/fec_main.c
++++ b/drivers/net/ethernet/freescale/fec_main.c
+@@ -173,10 +173,12 @@ MODULE_PARM_DESC(macaddr, "FEC Ethernet
+ #endif /* CONFIG_M5272 */
+ /* The FEC stores dest/src/type/vlan, data, and checksum for receive packets.
++ *
++ * 2048 byte skbufs are allocated. However, alignment requirements
++ * varies between FEC variants. Worst case is 64, so round down by 64.
+  */
+-#define PKT_MAXBUF_SIZE               1522
++#define PKT_MAXBUF_SIZE               (round_down(2048 - 64, 64))
+ #define PKT_MINBUF_SIZE               64
+-#define PKT_MAXBLR_SIZE               1536
+ /* FEC receive acceleration */
+ #define FEC_RACC_IPDIS                (1 << 1)
+@@ -848,7 +850,7 @@ static void fec_enet_enable_ring(struct
+       for (i = 0; i < fep->num_rx_queues; i++) {
+               rxq = fep->rx_queue[i];
+               writel(rxq->bd.dma, fep->hwp + FEC_R_DES_START(i));
+-              writel(PKT_MAXBLR_SIZE, fep->hwp + FEC_R_BUFF_SIZE(i));
++              writel(PKT_MAXBUF_SIZE, fep->hwp + FEC_R_BUFF_SIZE(i));
+               /* enable DMA1/2 */
+               if (i)
diff --git a/queue-4.12/net-mlx5-e-switch-unload-the-representors-in-the-correct-order.patch b/queue-4.12/net-mlx5-e-switch-unload-the-representors-in-the-correct-order.patch
new file mode 100644 (file)
index 0000000..959aace
--- /dev/null
@@ -0,0 +1,38 @@
+From foo@baz Thu Sep 14 23:20:23 PDT 2017
+From: Shahar Klein <shahark@mellanox.com>
+Date: Tue, 1 Aug 2017 15:29:55 +0300
+Subject: net/mlx5: E-Switch, Unload the representors in the correct order
+
+From: Shahar Klein <shahark@mellanox.com>
+
+
+[ Upstream commit 191220396db840822fc818edf03c49f0c02eb237 ]
+
+When changing from switchdev to legacy mode, all the representor port
+devices (uplink nic and reps) are cleaned up. Part of this cleaning
+process is removing the neigh entries and the hash table containing them.
+However, a representor neigh entry might be linked to the uplink port
+hash table and if the uplink nic is cleaned first the cleaning of the
+representor will end up in null deref.
+Fix that by unloading the representors in the opposite order of load.
+
+Fixes: cb67b832921c ("net/mlx5e: Introduce SRIOV VF representors")
+Signed-off-by: Shahar Klein <shahark@mellanox.com>
+Reviewed-by: Roi Dayan <roid@mellanox.com>
+Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+@@ -818,7 +818,7 @@ void esw_offloads_cleanup(struct mlx5_es
+       struct mlx5_eswitch_rep *rep;
+       int vport;
+-      for (vport = 0; vport < nvports; vport++) {
++      for (vport = nvports - 1; vport >= 0; vport--) {
+               rep = &esw->offloads.vport_reps[vport];
+               if (!rep->valid)
+                       continue;
diff --git a/queue-4.12/net-mlx5-fix-arm-srq-command-for-issi-version-0.patch b/queue-4.12/net-mlx5-fix-arm-srq-command-for-issi-version-0.patch
new file mode 100644 (file)
index 0000000..50489c7
--- /dev/null
@@ -0,0 +1,47 @@
+From foo@baz Thu Sep 14 23:20:23 PDT 2017
+From: Noa Osherovich <noaos@mellanox.com>
+Date: Sun, 30 Jul 2017 13:55:48 +0300
+Subject: net/mlx5: Fix arm SRQ command for ISSI version 0
+
+From: Noa Osherovich <noaos@mellanox.com>
+
+
+[ Upstream commit 672d0880b7798a917bcc622308f25a0fbb991dab ]
+
+Support for ISSI version 0 was recently broken as the arm_srq_cmd
+command, which is used only for ISSI version 0, was given the opcode
+for ISSI version 1 instead of ISSI version 0.
+
+Change arm_srq_cmd to use the correct command opcode for ISSI version
+0.
+
+Fixes: af1ba291c5e4 ('{net, IB}/mlx5: Refactor internal SRQ API')
+Signed-off-by: Noa Osherovich <noaos@mellanox.com>
+Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/srq.c |   12 ++++++------
+ 1 file changed, 6 insertions(+), 6 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/srq.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/srq.c
+@@ -201,13 +201,13 @@ static int destroy_srq_cmd(struct mlx5_c
+ static int arm_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
+                      u16 lwm, int is_srq)
+ {
+-      /* arm_srq structs missing using identical xrc ones */
+-      u32 srq_in[MLX5_ST_SZ_DW(arm_xrc_srq_in)] = {0};
+-      u32 srq_out[MLX5_ST_SZ_DW(arm_xrc_srq_out)] = {0};
++      u32 srq_in[MLX5_ST_SZ_DW(arm_rq_in)] = {0};
++      u32 srq_out[MLX5_ST_SZ_DW(arm_rq_out)] = {0};
+-      MLX5_SET(arm_xrc_srq_in, srq_in, opcode,   MLX5_CMD_OP_ARM_XRC_SRQ);
+-      MLX5_SET(arm_xrc_srq_in, srq_in, xrc_srqn, srq->srqn);
+-      MLX5_SET(arm_xrc_srq_in, srq_in, lwm,      lwm);
++      MLX5_SET(arm_rq_in, srq_in, opcode, MLX5_CMD_OP_ARM_RQ);
++      MLX5_SET(arm_rq_in, srq_in, op_mod, MLX5_ARM_RQ_IN_OP_MOD_SRQ);
++      MLX5_SET(arm_rq_in, srq_in, srq_number, srq->srqn);
++      MLX5_SET(arm_rq_in, srq_in, lwm,      lwm);
+       return  mlx5_cmd_exec(dev, srq_in, sizeof(srq_in),
+                             srq_out, sizeof(srq_out));
diff --git a/queue-4.12/net-mlx5e-check-for-qos-capability-in-dcbnl_initialize.patch b/queue-4.12/net-mlx5e-check-for-qos-capability-in-dcbnl_initialize.patch
new file mode 100644 (file)
index 0000000..ad22576
--- /dev/null
@@ -0,0 +1,35 @@
+From foo@baz Thu Sep 14 23:20:23 PDT 2017
+From: Huy Nguyen <huyn@mellanox.com>
+Date: Thu, 13 Jul 2017 13:45:11 -0500
+Subject: net/mlx5e: Check for qos capability in dcbnl_initialize
+
+From: Huy Nguyen <huyn@mellanox.com>
+
+
+[ Upstream commit 33c52b6718d2a6cb414440c98560818910d896dc ]
+
+qos capability is the master capability bit that determines
+if the DCBX is supported for the PCI function. If this bit is off,
+driver cannot run any dcbx code.
+
+Fixes: e207b7e99176 ("net/mlx5e: ConnectX-4 firmware support for DCBX")
+Signed-off-by: Huy Nguyen <huyn@mellanox.com>
+Reviewed-by: Parav Pandit <parav@mellanox.com>
+Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
+@@ -754,6 +754,9 @@ void mlx5e_dcbnl_initialize(struct mlx5e
+ {
+       struct mlx5e_dcbx *dcbx = &priv->dcbx;
++      if (!MLX5_CAP_GEN(priv->mdev, qos))
++              return;
++
+       if (MLX5_CAP_GEN(priv->mdev, dcbx))
+               mlx5e_dcbnl_query_dcbx_mode(priv, &dcbx->mode);
diff --git a/queue-4.12/net-mlx5e-don-t-override-user-rss-upon-set-channels.patch b/queue-4.12/net-mlx5e-don-t-override-user-rss-upon-set-channels.patch
new file mode 100644 (file)
index 0000000..e1169c9
--- /dev/null
@@ -0,0 +1,48 @@
+From foo@baz Thu Sep 14 23:20:23 PDT 2017
+From: Inbar Karmy <inbark@mellanox.com>
+Date: Mon, 14 Aug 2017 16:12:16 +0300
+Subject: net/mlx5e: Don't override user RSS upon set channels
+
+From: Inbar Karmy <inbark@mellanox.com>
+
+
+[ Upstream commit 5a8e12678c767ccf8bb16d6237569e4a707d655b ]
+
+Currently, increasing the number of combined channels is changing
+the RSS spread to use the new created channels.
+Prevent the RSS spread change in case the user explicitly declare it,
+to avoid overriding user configuration.
+
+Tested:
+when RSS default:
+
+# ethtool -L ens8 combined 4
+RSS spread will change and point to 4 channels.
+
+# ethtool -X ens8 equal 4
+# ethtool -L ens8 combined 6
+RSS will not change after increasing the number of the channels.
+
+Fixes: 8bf368620486 ('ethtool: ensure channel counts are within bounds during SCHANNELS')
+Signed-off-by: Inbar Karmy <inbark@mellanox.com>
+Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c |    6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+@@ -580,8 +580,10 @@ static int mlx5e_set_channels(struct net
+       new_channels.params = priv->channels.params;
+       new_channels.params.num_channels = count;
+-      mlx5e_build_default_indir_rqt(priv->mdev, new_channels.params.indirection_rqt,
+-                                    MLX5E_INDIR_RQT_SIZE, count);
++      if (!netif_is_rxfh_configured(priv->netdev))
++              mlx5e_build_default_indir_rqt(priv->mdev,
++                                            new_channels.params.indirection_rqt,
++                                            MLX5E_INDIR_RQT_SIZE, count);
+       if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
+               priv->channels.params = new_channels.params;
diff --git a/queue-4.12/net-mlx5e-fix-cq-moderation-mode-not-set-properly.patch b/queue-4.12/net-mlx5e-fix-cq-moderation-mode-not-set-properly.patch
new file mode 100644 (file)
index 0000000..58472d9
--- /dev/null
@@ -0,0 +1,32 @@
+From foo@baz Thu Sep 14 23:20:23 PDT 2017
+From: Tal Gilboa <talgi@mellanox.com>
+Date: Mon, 28 Aug 2017 18:45:08 +0300
+Subject: net/mlx5e: Fix CQ moderation mode not set properly
+
+From: Tal Gilboa <talgi@mellanox.com>
+
+
+[ Upstream commit 1213ad28f9595a08e3877248bbba1a25c40225d6 ]
+
+cq_period_mode assignment was mistakenly removed so it was always set to "0",
+which is EQE based moderation, regardless of the device CAPs and
+requested value in ethtool.
+
+Fixes: 6a9764efb255 ("net/mlx5e: Isolate open_channels from priv->params")
+Signed-off-by: Tal Gilboa <talgi@mellanox.com>
+Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en_main.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+@@ -1936,6 +1936,7 @@ static void mlx5e_build_rx_cq_param(stru
+       }
+       mlx5e_build_common_cq_param(priv, param);
++      param->cq_period_mode = params->rx_cq_period_mode;
+ }
+ static void mlx5e_build_tx_cq_param(struct mlx5e_priv *priv,
diff --git a/queue-4.12/net-mlx5e-fix-dangling-page-pointer-on-dma-mapping-error.patch b/queue-4.12/net-mlx5e-fix-dangling-page-pointer-on-dma-mapping-error.patch
new file mode 100644 (file)
index 0000000..2441f3a
--- /dev/null
@@ -0,0 +1,40 @@
+From foo@baz Thu Sep 14 23:20:23 PDT 2017
+From: Eran Ben Elisha <eranbe@mellanox.com>
+Date: Wed, 16 Aug 2017 14:37:11 +0300
+Subject: net/mlx5e: Fix dangling page pointer on DMA mapping error
+
+From: Eran Ben Elisha <eranbe@mellanox.com>
+
+
+[ Upstream commit 0556ce72ab16156af6c94cdc7964e4310acc97c0 ]
+
+Function mlx5e_dealloc_rx_wqe is using page pointer value as an
+indication to valid DMA mapping. In case that the mapping failed, we
+released the page but kept the dangling pointer. Store the page pointer
+only after the DMA mapping passed to avoid invalid page DMA unmap.
+
+Fixes: bc77b240b3c5 ("net/mlx5e: Add fragmented memory support for RX multi packet WQE")
+Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
+Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en_rx.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+@@ -216,13 +216,13 @@ static inline int mlx5e_page_alloc_mappe
+       if (unlikely(!page))
+               return -ENOMEM;
+-      dma_info->page = page;
+       dma_info->addr = dma_map_page(rq->pdev, page, 0,
+                                     RQ_PAGE_SIZE(rq), rq->buff.map_dir);
+       if (unlikely(dma_mapping_error(rq->pdev, dma_info->addr))) {
+               put_page(page);
+               return -ENOMEM;
+       }
++      dma_info->page = page;
+       return 0;
+ }
diff --git a/queue-4.12/net-mlx5e-fix-dcb_cap_attr_dcbx-capability-for-dcbnl-getcap.patch b/queue-4.12/net-mlx5e-fix-dcb_cap_attr_dcbx-capability-for-dcbnl-getcap.patch
new file mode 100644 (file)
index 0000000..05f4ce8
--- /dev/null
@@ -0,0 +1,96 @@
+From foo@baz Thu Sep 14 23:20:23 PDT 2017
+From: Huy Nguyen <huyn@mellanox.com>
+Date: Mon, 10 Jul 2017 14:00:23 -0500
+Subject: net/mlx5e: Fix DCB_CAP_ATTR_DCBX capability for DCBNL getcap.
+
+From: Huy Nguyen <huyn@mellanox.com>
+
+
+[ Upstream commit 9e10bf1d349787f373484d835efe2dbb5f9c5614 ]
+
+Current code doesn't report DCB_CAP_DCBX_HOST capability when query
+through getcap. User space lldptool expects capability to have HOST mode
+set when it wants to configure DCBX CEE mode. In absence of HOST mode
+capability, lldptool fails to switch to CEE mode.
+
+This fix returns DCB_CAP_DCBX_HOST capability when port's DCBX
+controlled mode is under software control.
+
+Fixes: 3a6a931dfb8e ("net/mlx5e: Support DCBX CEE API")
+Signed-off-by: Huy Nguyen <huyn@mellanox.com>
+Reviewed-by: Parav Pandit <parav@mellanox.com>
+Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en.h       |    1 +
+ drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c |   21 ++++++++++++---------
+ 2 files changed, 13 insertions(+), 9 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
+@@ -258,6 +258,7 @@ struct mlx5e_dcbx {
+       /* The only setting that cannot be read from FW */
+       u8                         tc_tsa[IEEE_8021QAZ_MAX_TCS];
++      u8                         cap;
+ };
+ #endif
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
+@@ -288,13 +288,8 @@ static int mlx5e_dcbnl_ieee_setpfc(struc
+ static u8 mlx5e_dcbnl_getdcbx(struct net_device *dev)
+ {
+       struct mlx5e_priv *priv = netdev_priv(dev);
+-      struct mlx5e_dcbx *dcbx = &priv->dcbx;
+-      u8 mode = DCB_CAP_DCBX_VER_IEEE | DCB_CAP_DCBX_VER_CEE;
+-
+-      if (dcbx->mode == MLX5E_DCBX_PARAM_VER_OPER_HOST)
+-              mode |= DCB_CAP_DCBX_HOST;
+-      return mode;
++      return priv->dcbx.cap;
+ }
+ static u8 mlx5e_dcbnl_setdcbx(struct net_device *dev, u8 mode)
+@@ -312,6 +307,7 @@ static u8 mlx5e_dcbnl_setdcbx(struct net
+               /* set dcbx to fw controlled */
+               if (!mlx5e_dcbnl_set_dcbx_mode(priv, MLX5E_DCBX_PARAM_VER_OPER_AUTO)) {
+                       dcbx->mode = MLX5E_DCBX_PARAM_VER_OPER_AUTO;
++                      dcbx->cap &= ~DCB_CAP_DCBX_HOST;
+                       return 0;
+               }
+@@ -324,6 +320,8 @@ static u8 mlx5e_dcbnl_setdcbx(struct net
+       if (mlx5e_dcbnl_switch_to_host_mode(netdev_priv(dev)))
+               return 1;
++      dcbx->cap = mode;
++
+       return 0;
+ }
+@@ -628,9 +626,9 @@ static u8 mlx5e_dcbnl_getcap(struct net_
+               *cap = false;
+               break;
+       case DCB_CAP_ATTR_DCBX:
+-              *cap = (DCB_CAP_DCBX_LLD_MANAGED |
+-                      DCB_CAP_DCBX_VER_CEE |
+-                      DCB_CAP_DCBX_STATIC);
++              *cap = priv->dcbx.cap |
++                     DCB_CAP_DCBX_VER_CEE |
++                     DCB_CAP_DCBX_VER_IEEE;
+               break;
+       default:
+               *cap = 0;
+@@ -760,5 +758,10 @@ void mlx5e_dcbnl_initialize(struct mlx5e
+       if (MLX5_CAP_GEN(priv->mdev, dcbx))
+               mlx5e_dcbnl_query_dcbx_mode(priv, &dcbx->mode);
++      priv->dcbx.cap = DCB_CAP_DCBX_VER_CEE |
++                       DCB_CAP_DCBX_VER_IEEE;
++      if (priv->dcbx.mode == MLX5E_DCBX_PARAM_VER_OPER_HOST)
++              priv->dcbx.cap |= DCB_CAP_DCBX_HOST;
++
+       mlx5e_ets_init(priv);
+ }
diff --git a/queue-4.12/net-mlx5e-fix-inline-header-size-for-small-packets.patch b/queue-4.12/net-mlx5e-fix-inline-header-size-for-small-packets.patch
new file mode 100644 (file)
index 0000000..55c38f1
--- /dev/null
@@ -0,0 +1,64 @@
+From foo@baz Thu Sep 14 23:20:23 PDT 2017
+From: Moshe Shemesh <moshe@mellanox.com>
+Date: Tue, 8 Aug 2017 15:56:37 +0300
+Subject: net/mlx5e: Fix inline header size for small packets
+
+From: Moshe Shemesh <moshe@mellanox.com>
+
+
+[ Upstream commit 6aace17e64f4aa1c49802c46bd10688968b3787f ]
+
+Fix inline header size, make sure it is not greater than skb len.
+This bug effects small packets, for example L2 packets with size < 18.
+
+Fixes: ae76715d153e ("net/mlx5e: Check the minimum inline header mode before xmit")
+Signed-off-by: Moshe Shemesh <moshe@mellanox.com>
+Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en_tx.c |   17 ++++++++++-------
+ 1 file changed, 10 insertions(+), 7 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
+@@ -127,10 +127,10 @@ static inline int mlx5e_skb_l3_header_of
+               return mlx5e_skb_l2_header_offset(skb);
+ }
+-static inline unsigned int mlx5e_calc_min_inline(enum mlx5_inline_modes mode,
+-                                               struct sk_buff *skb)
++static inline u16 mlx5e_calc_min_inline(enum mlx5_inline_modes mode,
++                                      struct sk_buff *skb)
+ {
+-      int hlen;
++      u16 hlen;
+       switch (mode) {
+       case MLX5_INLINE_MODE_NONE:
+@@ -139,19 +139,22 @@ static inline unsigned int mlx5e_calc_mi
+               hlen = eth_get_headlen(skb->data, skb_headlen(skb));
+               if (hlen == ETH_HLEN && !skb_vlan_tag_present(skb))
+                       hlen += VLAN_HLEN;
+-              return hlen;
++              break;
+       case MLX5_INLINE_MODE_IP:
+               /* When transport header is set to zero, it means no transport
+                * header. When transport header is set to 0xff's, it means
+                * transport header wasn't set.
+                */
+-              if (skb_transport_offset(skb))
+-                      return mlx5e_skb_l3_header_offset(skb);
++              if (skb_transport_offset(skb)) {
++                      hlen = mlx5e_skb_l3_header_offset(skb);
++                      break;
++              }
+               /* fall through */
+       case MLX5_INLINE_MODE_L2:
+       default:
+-              return mlx5e_skb_l2_header_offset(skb);
++              hlen = mlx5e_skb_l2_header_offset(skb);
+       }
++      return min_t(u16, hlen, skb->len);
+ }
+ static inline void mlx5e_tx_skb_pull_inline(unsigned char **skb_data,
diff --git a/queue-4.12/net-mlx5e-properly-resolve-tc-offloaded-ipv6-vxlan-tunnel-source-address.patch b/queue-4.12/net-mlx5e-properly-resolve-tc-offloaded-ipv6-vxlan-tunnel-source-address.patch
new file mode 100644 (file)
index 0000000..f2b9008
--- /dev/null
@@ -0,0 +1,46 @@
+From foo@baz Thu Sep 14 23:20:23 PDT 2017
+From: Paul Blakey <paulb@mellanox.com>
+Date: Tue, 22 Aug 2017 13:51:56 +0300
+Subject: net/mlx5e: Properly resolve TC offloaded ipv6 vxlan tunnel source address
+
+From: Paul Blakey <paulb@mellanox.com>
+
+
+[ Upstream commit 08820528c9d3ff0d0eda047d7ef5ecac2da1ef6c ]
+
+Currently if vxlan tunnel ipv6 src isn't supplied the driver fails to
+resolve it as part of the route lookup. The resulting encap header
+is left with a zeroed out ipv6 src address so the packets are sent
+with this src ip.
+
+Use an appropriate route lookup API that also resolves the source
+ipv6 address if it's not supplied.
+
+Fixes: ce99f6b97fcd ('net/mlx5e: Support SRIOV TC encapsulation offloads for IPv6 tunnels')
+Signed-off-by: Paul Blakey <paulb@mellanox.com>
+Reviewed-by: Or Gerlitz <ogerlitz@mellanox.com>
+Reviewed-by: Roi Dayan <roid@mellanox.com>
+Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en_tc.c |    8 +++-----
+ 1 file changed, 3 insertions(+), 5 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+@@ -1262,12 +1262,10 @@ static int mlx5e_route_lookup_ipv6(struc
+       struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+       int ret;
+-      dst = ip6_route_output(dev_net(mirred_dev), NULL, fl6);
+-      ret = dst->error;
+-      if (ret) {
+-              dst_release(dst);
++      ret = ipv6_stub->ipv6_dst_lookup(dev_net(mirred_dev), NULL, &dst,
++                                       fl6);
++      if (ret < 0)
+               return ret;
+-      }
+       *out_ttl = ip6_dst_hoplimit(dst);
diff --git a/queue-4.12/net-mvpp2-fix-the-mac-address-used-when-using-ppv2.2.patch b/queue-4.12/net-mvpp2-fix-the-mac-address-used-when-using-ppv2.2.patch
new file mode 100644 (file)
index 0000000..c4c6181
--- /dev/null
@@ -0,0 +1,39 @@
+From foo@baz Thu Sep 14 23:20:23 PDT 2017
+From: Antoine Tenart <antoine.tenart@free-electrons.com>
+Date: Fri, 25 Aug 2017 16:14:17 +0200
+Subject: net: mvpp2: fix the mac address used when using PPv2.2
+
+From: Antoine Tenart <antoine.tenart@free-electrons.com>
+
+
+[ Upstream commit 4c22868264516fe0c42817a87f37efb44254e7a9 ]
+
+The mac address is only retrieved from h/w when using PPv2.1. Otherwise
+the variable holding it is still checked and used if it contains a valid
+value. As the variable isn't initialized to an invalid mac address
+value, we end up with random mac addresses which can be the same for all
+the ports handled by this PPv2 driver.
+
+Fixes this by initializing the h/w mac address variable to {0}, which is
+an invalid mac address value. This way the random assignation fallback
+is called and all ports end up with their own addresses.
+
+Signed-off-by: Antoine Tenart <antoine.tenart@free-electrons.com>
+Fixes: 2697582144dd ("net: mvpp2: handle misc PPv2.1/PPv2.2 differences")
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/marvell/mvpp2.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/marvell/mvpp2.c
++++ b/drivers/net/ethernet/marvell/mvpp2.c
+@@ -6465,7 +6465,7 @@ static int mvpp2_port_probe(struct platf
+       struct resource *res;
+       const char *dt_mac_addr;
+       const char *mac_from;
+-      char hw_mac_addr[ETH_ALEN];
++      char hw_mac_addr[ETH_ALEN] = {0};
+       u32 id;
+       int features;
+       int phy_mode;
diff --git a/queue-4.12/net-systemport-be-drop-monitor-friendly.patch b/queue-4.12/net-systemport-be-drop-monitor-friendly.patch
new file mode 100644 (file)
index 0000000..8d8688b
--- /dev/null
@@ -0,0 +1,35 @@
+From foo@baz Thu Sep 14 23:20:23 PDT 2017
+From: Florian Fainelli <f.fainelli@gmail.com>
+Date: Thu, 24 Aug 2017 15:20:41 -0700
+Subject: net: systemport: Be drop monitor friendly
+
+From: Florian Fainelli <f.fainelli@gmail.com>
+
+
+[ Upstream commit c45182eb967af11e9482168be5be41aa22e5d321 ]
+
+Utilize dev_consume_skb_any(cb->skb) in bcm_sysport_free_cb() which is
+used when a TX packet is completed, as well as when the RX ring is
+cleaned on shutdown. None of these two cases are packet drops, so be
+drop monitor friendly.
+
+Suggested-by: Eric Dumazet <edumazet@gmail.com>
+Fixes: 80105befdb4b ("net: systemport: add Broadcom SYSTEMPORT Ethernet MAC driver")
+Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/broadcom/bcmsysport.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/broadcom/bcmsysport.c
++++ b/drivers/net/ethernet/broadcom/bcmsysport.c
+@@ -593,7 +593,7 @@ static int bcm_sysport_set_coalesce(stru
+ static void bcm_sysport_free_cb(struct bcm_sysport_cb *cb)
+ {
+-      dev_kfree_skb_any(cb->skb);
++      dev_consume_skb_any(cb->skb);
+       cb->skb = NULL;
+       dma_unmap_addr_set(cb, dma_addr, 0);
+ }
diff --git a/queue-4.12/net-systemport-free-dma-coherent-descriptors-on-errors.patch b/queue-4.12/net-systemport-free-dma-coherent-descriptors-on-errors.patch
new file mode 100644 (file)
index 0000000..e143808
--- /dev/null
@@ -0,0 +1,36 @@
+From foo@baz Thu Sep 14 23:20:23 PDT 2017
+From: Florian Fainelli <f.fainelli@gmail.com>
+Date: Thu, 24 Aug 2017 16:01:13 -0700
+Subject: net: systemport: Free DMA coherent descriptors on errors
+
+From: Florian Fainelli <f.fainelli@gmail.com>
+
+
+[ Upstream commit c2062ee3d9615828109ffe8089fbf69bed394d05 ]
+
+In case bcm_sysport_init_tx_ring() is not able to allocate ring->cbs, we
+would return with an error, and call bcm_sysport_fini_tx_ring() and it
+would see that ring->cbs is NULL and do nothing. This would leak the
+coherent DMA descriptor area, so we need to free it on error before
+returning.
+
+Reported-by: Eric Dumazet <edumazet@gmail.com>
+Fixes: 80105befdb4b ("net: systemport: add Broadcom SYSTEMPORT Ethernet MAC driver")
+Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/broadcom/bcmsysport.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/drivers/net/ethernet/broadcom/bcmsysport.c
++++ b/drivers/net/ethernet/broadcom/bcmsysport.c
+@@ -1342,6 +1342,8 @@ static int bcm_sysport_init_tx_ring(stru
+       ring->cbs = kcalloc(size, sizeof(struct bcm_sysport_cb), GFP_KERNEL);
+       if (!ring->cbs) {
++              dma_free_coherent(kdev, sizeof(struct dma_desc),
++                                ring->desc_cpu, ring->desc_dma);
+               netif_err(priv, hw, priv->netdev, "CB allocation failed\n");
+               return -ENOMEM;
+       }
diff --git a/queue-4.12/netvsc-fix-deadlock-betwen-link-status-and-removal.patch b/queue-4.12/netvsc-fix-deadlock-betwen-link-status-and-removal.patch
new file mode 100644 (file)
index 0000000..e0a0d2d
--- /dev/null
@@ -0,0 +1,41 @@
+From foo@baz Thu Sep 14 23:20:23 PDT 2017
+From: stephen hemminger <stephen@networkplumber.org>
+Date: Thu, 24 Aug 2017 16:49:16 -0700
+Subject: netvsc: fix deadlock betwen link status and removal
+
+From: stephen hemminger <stephen@networkplumber.org>
+
+
+[ Upstream commit 9b4e946ce14e20d7addbfb7d9139e604f9fda107 ]
+
+There is a deadlock possible when canceling the link status
+delayed work queue. The removal process is run with RTNL held,
+and the link status callback is acquring RTNL.
+
+Resolve the issue by using trylock and rescheduling.
+If cancel is in process, that block it from happening.
+
+Fixes: 122a5f6410f4 ("staging: hv: use delayed_work for netvsc_send_garp()")
+Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/hyperv/netvsc_drv.c |    7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+--- a/drivers/net/hyperv/netvsc_drv.c
++++ b/drivers/net/hyperv/netvsc_drv.c
+@@ -1270,7 +1270,12 @@ static void netvsc_link_change(struct wo
+       bool notify = false, reschedule = false;
+       unsigned long flags, next_reconfig, delay;
+-      rtnl_lock();
++      /* if changes are happening, comeback later */
++      if (!rtnl_trylock()) {
++              schedule_delayed_work(&ndev_ctx->dwork, LINKCHANGE_INT);
++              return;
++      }
++
+       net_device = rtnl_dereference(ndev_ctx->nvdev);
+       if (!net_device)
+               goto out_unlock;
diff --git a/queue-4.12/packet-don-t-write-vnet-header-beyond-end-of-buffer.patch b/queue-4.12/packet-don-t-write-vnet-header-beyond-end-of-buffer.patch
new file mode 100644 (file)
index 0000000..d17aa91
--- /dev/null
@@ -0,0 +1,73 @@
+From foo@baz Thu Sep 14 23:20:23 PDT 2017
+From: Benjamin Poirier <bpoirier@suse.com>
+Date: Mon, 28 Aug 2017 14:29:41 -0400
+Subject: packet: Don't write vnet header beyond end of buffer
+
+From: Benjamin Poirier <bpoirier@suse.com>
+
+
+[ Upstream commit edbd58be15a957f6a760c4a514cd475217eb97fd ]
+
+... which may happen with certain values of tp_reserve and maclen.
+
+Fixes: 58d19b19cd99 ("packet: vnet_hdr support for tpacket_rcv")
+Signed-off-by: Benjamin Poirier <bpoirier@suse.com>
+Cc: Willem de Bruijn <willemb@google.com>
+Acked-by: Willem de Bruijn <willemb@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/packet/af_packet.c |   12 +++++++++---
+ 1 file changed, 9 insertions(+), 3 deletions(-)
+
+--- a/net/packet/af_packet.c
++++ b/net/packet/af_packet.c
+@@ -2192,6 +2192,7 @@ static int tpacket_rcv(struct sk_buff *s
+       struct timespec ts;
+       __u32 ts_status;
+       bool is_drop_n_account = false;
++      bool do_vnet = false;
+       /* struct tpacket{2,3}_hdr is aligned to a multiple of TPACKET_ALIGNMENT.
+        * We may add members to them until current aligned size without forcing
+@@ -2242,8 +2243,10 @@ static int tpacket_rcv(struct sk_buff *s
+               netoff = TPACKET_ALIGN(po->tp_hdrlen +
+                                      (maclen < 16 ? 16 : maclen)) +
+                                      po->tp_reserve;
+-              if (po->has_vnet_hdr)
++              if (po->has_vnet_hdr) {
+                       netoff += sizeof(struct virtio_net_hdr);
++                      do_vnet = true;
++              }
+               macoff = netoff - maclen;
+       }
+       if (po->tp_version <= TPACKET_V2) {
+@@ -2260,8 +2263,10 @@ static int tpacket_rcv(struct sk_buff *s
+                                       skb_set_owner_r(copy_skb, sk);
+                       }
+                       snaplen = po->rx_ring.frame_size - macoff;
+-                      if ((int)snaplen < 0)
++                      if ((int)snaplen < 0) {
+                               snaplen = 0;
++                              do_vnet = false;
++                      }
+               }
+       } else if (unlikely(macoff + snaplen >
+                           GET_PBDQC_FROM_RB(&po->rx_ring)->max_frame_len)) {
+@@ -2274,6 +2279,7 @@ static int tpacket_rcv(struct sk_buff *s
+               if (unlikely((int)snaplen < 0)) {
+                       snaplen = 0;
+                       macoff = GET_PBDQC_FROM_RB(&po->rx_ring)->max_frame_len;
++                      do_vnet = false;
+               }
+       }
+       spin_lock(&sk->sk_receive_queue.lock);
+@@ -2299,7 +2305,7 @@ static int tpacket_rcv(struct sk_buff *s
+       }
+       spin_unlock(&sk->sk_receive_queue.lock);
+-      if (po->has_vnet_hdr) {
++      if (do_vnet) {
+               if (virtio_net_hdr_from_skb(skb, h.raw + macoff -
+                                           sizeof(struct virtio_net_hdr),
+                                           vio_le(), true)) {
diff --git a/queue-4.12/qlge-avoid-memcpy-buffer-overflow.patch b/queue-4.12/qlge-avoid-memcpy-buffer-overflow.patch
new file mode 100644 (file)
index 0000000..e728d6d
--- /dev/null
@@ -0,0 +1,44 @@
+From foo@baz Thu Sep 14 23:20:23 PDT 2017
+From: Arnd Bergmann <arnd@arndb.de>
+Date: Wed, 23 Aug 2017 15:59:49 +0200
+Subject: qlge: avoid memcpy buffer overflow
+
+From: Arnd Bergmann <arnd@arndb.de>
+
+
+[ Upstream commit e58f95831e7468d25eb6e41f234842ecfe6f014f ]
+
+gcc-8.0.0 (snapshot) points out that we copy a variable-length string
+into a fixed length field using memcpy() with the destination length,
+and that ends up copying whatever follows the string:
+
+    inlined from 'ql_core_dump' at drivers/net/ethernet/qlogic/qlge/qlge_dbg.c:1106:2:
+drivers/net/ethernet/qlogic/qlge/qlge_dbg.c:708:2: error: 'memcpy' reading 15 bytes from a region of size 14 [-Werror=stringop-overflow=]
+  memcpy(seg_hdr->description, desc, (sizeof(seg_hdr->description)) - 1);
+
+Changing it to use strncpy() will instead zero-pad the destination,
+which seems to be the right thing to do here.
+
+The bug is probably harmless, but it seems like a good idea to address
+it in stable kernels as well, if only for the purpose of building with
+gcc-8 without warnings.
+
+Fixes: a61f80261306 ("qlge: Add ethtool register dump function.")
+Signed-off-by: Arnd Bergmann <arnd@arndb.de>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/qlogic/qlge/qlge_dbg.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/qlogic/qlge/qlge_dbg.c
++++ b/drivers/net/ethernet/qlogic/qlge/qlge_dbg.c
+@@ -724,7 +724,7 @@ static void ql_build_coredump_seg_header
+       seg_hdr->cookie = MPI_COREDUMP_COOKIE;
+       seg_hdr->segNum = seg_number;
+       seg_hdr->segSize = seg_size;
+-      memcpy(seg_hdr->description, desc, (sizeof(seg_hdr->description)) - 1);
++      strncpy(seg_hdr->description, desc, (sizeof(seg_hdr->description)) - 1);
+ }
+ /*
diff --git a/queue-4.12/revert-net-fix-percpu-memory-leaks.patch b/queue-4.12/revert-net-fix-percpu-memory-leaks.patch
new file mode 100644 (file)
index 0000000..575afd6
--- /dev/null
@@ -0,0 +1,151 @@
+From foo@baz Thu Sep 14 23:20:23 PDT 2017
+From: Jesper Dangaard Brouer <brouer@redhat.com>
+Date: Fri, 1 Sep 2017 11:26:13 +0200
+Subject: Revert "net: fix percpu memory leaks"
+
+From: Jesper Dangaard Brouer <brouer@redhat.com>
+
+
+[ Upstream commit 5a63643e583b6a9789d7a225ae076fb4e603991c ]
+
+This reverts commit 1d6119baf0610f813eb9d9580eb4fd16de5b4ceb.
+
+After reverting commit 6d7b857d541e ("net: use lib/percpu_counter API
+for fragmentation mem accounting") then here is no need for this
+fix-up patch.  As percpu_counter is no longer used, it cannot
+memory leak it any-longer.
+
+Fixes: 6d7b857d541e ("net: use lib/percpu_counter API for fragmentation mem accounting")
+Fixes: 1d6119baf061 ("net: fix percpu memory leaks")
+Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/inet_frag.h                 |    7 +------
+ net/ieee802154/6lowpan/reassembly.c     |   11 +++--------
+ net/ipv4/ip_fragment.c                  |   12 +++---------
+ net/ipv6/netfilter/nf_conntrack_reasm.c |   12 +++---------
+ net/ipv6/reassembly.c                   |   12 +++---------
+ 5 files changed, 13 insertions(+), 41 deletions(-)
+
+--- a/include/net/inet_frag.h
++++ b/include/net/inet_frag.h
+@@ -103,15 +103,10 @@ struct inet_frags {
+ int inet_frags_init(struct inet_frags *);
+ void inet_frags_fini(struct inet_frags *);
+-static inline int inet_frags_init_net(struct netns_frags *nf)
++static inline void inet_frags_init_net(struct netns_frags *nf)
+ {
+       atomic_set(&nf->mem, 0);
+-      return 0;
+ }
+-static inline void inet_frags_uninit_net(struct netns_frags *nf)
+-{
+-}
+-
+ void inet_frags_exit_net(struct netns_frags *nf, struct inet_frags *f);
+ void inet_frag_kill(struct inet_frag_queue *q, struct inet_frags *f);
+--- a/net/ieee802154/6lowpan/reassembly.c
++++ b/net/ieee802154/6lowpan/reassembly.c
+@@ -580,19 +580,14 @@ static int __net_init lowpan_frags_init_
+ {
+       struct netns_ieee802154_lowpan *ieee802154_lowpan =
+               net_ieee802154_lowpan(net);
+-      int res;
+       ieee802154_lowpan->frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
+       ieee802154_lowpan->frags.low_thresh = IPV6_FRAG_LOW_THRESH;
+       ieee802154_lowpan->frags.timeout = IPV6_FRAG_TIMEOUT;
+-      res = inet_frags_init_net(&ieee802154_lowpan->frags);
+-      if (res)
+-              return res;
+-      res = lowpan_frags_ns_sysctl_register(net);
+-      if (res)
+-              inet_frags_uninit_net(&ieee802154_lowpan->frags);
+-      return res;
++      inet_frags_init_net(&ieee802154_lowpan->frags);
++
++      return lowpan_frags_ns_sysctl_register(net);
+ }
+ static void __net_exit lowpan_frags_exit_net(struct net *net)
+--- a/net/ipv4/ip_fragment.c
++++ b/net/ipv4/ip_fragment.c
+@@ -844,8 +844,6 @@ static void __init ip4_frags_ctl_registe
+ static int __net_init ipv4_frags_init_net(struct net *net)
+ {
+-      int res;
+-
+       /* Fragment cache limits.
+        *
+        * The fragment memory accounting code, (tries to) account for
+@@ -871,13 +869,9 @@ static int __net_init ipv4_frags_init_ne
+       net->ipv4.frags.max_dist = 64;
+-      res = inet_frags_init_net(&net->ipv4.frags);
+-      if (res)
+-              return res;
+-      res = ip4_frags_ns_ctl_register(net);
+-      if (res)
+-              inet_frags_uninit_net(&net->ipv4.frags);
+-      return res;
++      inet_frags_init_net(&net->ipv4.frags);
++
++      return ip4_frags_ns_ctl_register(net);
+ }
+ static void __net_exit ipv4_frags_exit_net(struct net *net)
+--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
++++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
+@@ -622,18 +622,12 @@ EXPORT_SYMBOL_GPL(nf_ct_frag6_gather);
+ static int nf_ct_net_init(struct net *net)
+ {
+-      int res;
+-
+       net->nf_frag.frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
+       net->nf_frag.frags.low_thresh = IPV6_FRAG_LOW_THRESH;
+       net->nf_frag.frags.timeout = IPV6_FRAG_TIMEOUT;
+-      res = inet_frags_init_net(&net->nf_frag.frags);
+-      if (res)
+-              return res;
+-      res = nf_ct_frag6_sysctl_register(net);
+-      if (res)
+-              inet_frags_uninit_net(&net->nf_frag.frags);
+-      return res;
++      inet_frags_init_net(&net->nf_frag.frags);
++
++      return nf_ct_frag6_sysctl_register(net);
+ }
+ static void nf_ct_net_exit(struct net *net)
+--- a/net/ipv6/reassembly.c
++++ b/net/ipv6/reassembly.c
+@@ -714,19 +714,13 @@ static void ip6_frags_sysctl_unregister(
+ static int __net_init ipv6_frags_init_net(struct net *net)
+ {
+-      int res;
+-
+       net->ipv6.frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
+       net->ipv6.frags.low_thresh = IPV6_FRAG_LOW_THRESH;
+       net->ipv6.frags.timeout = IPV6_FRAG_TIMEOUT;
+-      res = inet_frags_init_net(&net->ipv6.frags);
+-      if (res)
+-              return res;
+-      res = ip6_frags_ns_sysctl_register(net);
+-      if (res)
+-              inet_frags_uninit_net(&net->ipv6.frags);
+-      return res;
++      inet_frags_init_net(&net->ipv6.frags);
++
++      return ip6_frags_ns_sysctl_register(net);
+ }
+ static void __net_exit ipv6_frags_exit_net(struct net *net)
diff --git a/queue-4.12/revert-net-phy-correctly-process-phy_halted-in-phy_stop_machine.patch b/queue-4.12/revert-net-phy-correctly-process-phy_halted-in-phy_stop_machine.patch
new file mode 100644 (file)
index 0000000..d780a26
--- /dev/null
@@ -0,0 +1,104 @@
+From foo@baz Thu Sep 14 23:20:23 PDT 2017
+From: Florian Fainelli <f.fainelli@gmail.com>
+Date: Wed, 30 Aug 2017 17:49:29 -0700
+Subject: Revert "net: phy: Correctly process PHY_HALTED in phy_stop_machine()"
+
+From: Florian Fainelli <f.fainelli@gmail.com>
+
+
+[ Upstream commit ebc8254aeae34226d0bc8fda309fd9790d4dccfe ]
+
+This reverts commit 7ad813f208533cebfcc32d3d7474dc1677d1b09a ("net: phy:
+Correctly process PHY_HALTED in phy_stop_machine()") because it is
+creating the possibility for a NULL pointer dereference.
+
+David Daney provide the following call trace and diagram of events:
+
+When ndo_stop() is called we call:
+
+ phy_disconnect()
+    +---> phy_stop_interrupts() implies: phydev->irq = PHY_POLL;
+    +---> phy_stop_machine()
+    |      +---> phy_state_machine()
+    |              +----> queue_delayed_work(): Work queued.
+    +--->phy_detach() implies: phydev->attached_dev = NULL;
+
+Now at a later time the queued work does:
+
+ phy_state_machine()
+    +---->netif_carrier_off(phydev->attached_dev): Oh no! It is NULL:
+
+ CPU 12 Unable to handle kernel paging request at virtual address
+0000000000000048, epc == ffffffff80de37ec, ra == ffffffff80c7c
+Oops[#1]:
+CPU: 12 PID: 1502 Comm: kworker/12:1 Not tainted 4.9.43-Cavium-Octeon+ #1
+Workqueue: events_power_efficient phy_state_machine
+task: 80000004021ed100 task.stack: 8000000409d70000
+$ 0   : 0000000000000000 ffffffff84720060 0000000000000048 0000000000000004
+$ 4   : 0000000000000000 0000000000000001 0000000000000004 0000000000000000
+$ 8   : 0000000000000000 0000000000000000 00000000ffff98f3 0000000000000000
+$12   : 8000000409d73fe0 0000000000009c00 ffffffff846547c8 000000000000af3b
+$16   : 80000004096bab68 80000004096babd0 0000000000000000 80000004096ba800
+$20   : 0000000000000000 0000000000000000 ffffffff81090000 0000000000000008
+$24   : 0000000000000061 ffffffff808637b0
+$28   : 8000000409d70000 8000000409d73cf0 80000000271bd300 ffffffff80c7804c
+Hi    : 000000000000002a
+Lo    : 000000000000003f
+epc   : ffffffff80de37ec netif_carrier_off+0xc/0x58
+ra    : ffffffff80c7804c phy_state_machine+0x48c/0x4f8
+Status: 14009ce3        KX SX UX KERNEL EXL IE
+Cause : 00800008 (ExcCode 02)
+BadVA : 0000000000000048
+PrId  : 000d9501 (Cavium Octeon III)
+Modules linked in:
+Process kworker/12:1 (pid: 1502, threadinfo=8000000409d70000,
+task=80000004021ed100, tls=0000000000000000)
+Stack : 8000000409a54000 80000004096bab68 80000000271bd300 80000000271c1e00
+        0000000000000000 ffffffff808a1708 8000000409a54000 80000000271bd300
+        80000000271bd320 8000000409a54030 ffffffff80ff0f00 0000000000000001
+        ffffffff81090000 ffffffff808a1ac0 8000000402182080 ffffffff84650000
+        8000000402182080 ffffffff84650000 ffffffff80ff0000 8000000409a54000
+        ffffffff808a1970 0000000000000000 80000004099e8000 8000000402099240
+        0000000000000000 ffffffff808a8598 0000000000000000 8000000408eeeb00
+        8000000409a54000 00000000810a1d00 0000000000000000 8000000409d73de8
+        8000000409d73de8 0000000000000088 000000000c009c00 8000000409d73e08
+        8000000409d73e08 8000000402182080 ffffffff808a84d0 8000000402182080
+        ...
+Call Trace:
+[<ffffffff80de37ec>] netif_carrier_off+0xc/0x58
+[<ffffffff80c7804c>] phy_state_machine+0x48c/0x4f8
+[<ffffffff808a1708>] process_one_work+0x158/0x368
+[<ffffffff808a1ac0>] worker_thread+0x150/0x4c0
+[<ffffffff808a8598>] kthread+0xc8/0xe0
+[<ffffffff808617f0>] ret_from_kernel_thread+0x14/0x1c
+
+The original motivation for this change originated from Marc Gonzales
+indicating that his network driver did not have its adjust_link callback
+executing with phydev->link = 0 while he was expecting it.
+
+PHYLIB has never made any such guarantees ever because phy_stop() merely just
+tells the workqueue to move into PHY_HALTED state which will happen
+asynchronously.
+
+Reported-by: Geert Uytterhoeven <geert+renesas@glider.be>
+Reported-by: David Daney <ddaney.cavm@gmail.com>
+Fixes: 7ad813f20853 ("net: phy: Correctly process PHY_HALTED in phy_stop_machine()")
+Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/phy/phy.c |    3 ---
+ 1 file changed, 3 deletions(-)
+
+--- a/drivers/net/phy/phy.c
++++ b/drivers/net/phy/phy.c
+@@ -749,9 +749,6 @@ void phy_stop_machine(struct phy_device
+       if (phydev->state > PHY_UP && phydev->state != PHY_HALTED)
+               phydev->state = PHY_UP;
+       mutex_unlock(&phydev->lock);
+-
+-      /* Now we can run the state machine synchronously */
+-      phy_state_machine(&phydev->state_queue.work);
+ }
+ /**
diff --git a/queue-4.12/revert-net-use-lib-percpu_counter-api-for-fragmentation-mem-accounting.patch b/queue-4.12/revert-net-use-lib-percpu_counter-api-for-fragmentation-mem-accounting.patch
new file mode 100644 (file)
index 0000000..216ceb7
--- /dev/null
@@ -0,0 +1,141 @@
+From foo@baz Thu Sep 14 23:20:23 PDT 2017
+From: Jesper Dangaard Brouer <brouer@redhat.com>
+Date: Fri, 1 Sep 2017 11:26:08 +0200
+Subject: Revert "net: use lib/percpu_counter API for fragmentation mem accounting"
+
+From: Jesper Dangaard Brouer <brouer@redhat.com>
+
+
+[ Upstream commit fb452a1aa3fd4034d7999e309c5466ff2d7005aa ]
+
+This reverts commit 6d7b857d541ecd1d9bd997c97242d4ef94b19de2.
+
+There is a bug in fragmentation codes use of the percpu_counter API,
+that can cause issues on systems with many CPUs.
+
+The frag_mem_limit() just reads the global counter (fbc->count),
+without considering other CPUs can have upto batch size (130K) that
+haven't been subtracted yet.  Due to the 3MBytes lower thresh limit,
+this become dangerous at >=24 CPUs (3*1024*1024/130000=24).
+
+The correct API usage would be to use __percpu_counter_compare() which
+does the right thing, and takes into account the number of (online)
+CPUs and batch size, to account for this and call __percpu_counter_sum()
+when needed.
+
+We choose to revert the use of the lib/percpu_counter API for frag
+memory accounting for several reasons:
+
+1) On systems with CPUs > 24, the heavier fully locked
+   __percpu_counter_sum() is always invoked, which will be more
+   expensive than the atomic_t that is reverted to.
+
+Given systems with more than 24 CPUs are becoming common this doesn't
+seem like a good option.  To mitigate this, the batch size could be
+decreased and thresh be increased.
+
+2) The add_frag_mem_limit+sub_frag_mem_limit pairs happen on the RX
+   CPU, before SKBs are pushed into sockets on remote CPUs.  Given
+   NICs can only hash on L2 part of the IP-header, the NIC-RXq's will
+   likely be limited.  Thus, a fair chance that atomic add+dec happen
+   on the same CPU.
+
+Revert note that commit 1d6119baf061 ("net: fix percpu memory leaks")
+removed init_frag_mem_limit() and instead use inet_frags_init_net().
+After this revert, inet_frags_uninit_net() becomes empty.
+
+Fixes: 6d7b857d541e ("net: use lib/percpu_counter API for fragmentation mem accounting")
+Fixes: 1d6119baf061 ("net: fix percpu memory leaks")
+Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
+Acked-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/inet_frag.h  |   30 +++++++++---------------------
+ net/ipv4/inet_fragment.c |    4 +---
+ 2 files changed, 10 insertions(+), 24 deletions(-)
+
+--- a/include/net/inet_frag.h
++++ b/include/net/inet_frag.h
+@@ -1,14 +1,9 @@
+ #ifndef __NET_FRAG_H__
+ #define __NET_FRAG_H__
+-#include <linux/percpu_counter.h>
+-
+ struct netns_frags {
+-      /* The percpu_counter "mem" need to be cacheline aligned.
+-       *  mem.count must not share cacheline with other writers
+-       */
+-      struct percpu_counter   mem ____cacheline_aligned_in_smp;
+-
++      /* Keep atomic mem on separate cachelines in structs that include it */
++      atomic_t                mem ____cacheline_aligned_in_smp;
+       /* sysctls */
+       int                     timeout;
+       int                     high_thresh;
+@@ -110,11 +105,11 @@ void inet_frags_fini(struct inet_frags *
+ static inline int inet_frags_init_net(struct netns_frags *nf)
+ {
+-      return percpu_counter_init(&nf->mem, 0, GFP_KERNEL);
++      atomic_set(&nf->mem, 0);
++      return 0;
+ }
+ static inline void inet_frags_uninit_net(struct netns_frags *nf)
+ {
+-      percpu_counter_destroy(&nf->mem);
+ }
+ void inet_frags_exit_net(struct netns_frags *nf, struct inet_frags *f);
+@@ -140,31 +135,24 @@ static inline bool inet_frag_evicting(st
+ /* Memory Tracking Functions. */
+-/* The default percpu_counter batch size is not big enough to scale to
+- * fragmentation mem acct sizes.
+- * The mem size of a 64K fragment is approx:
+- *  (44 fragments * 2944 truesize) + frag_queue struct(200) = 129736 bytes
+- */
+-static unsigned int frag_percpu_counter_batch = 130000;
+-
+ static inline int frag_mem_limit(struct netns_frags *nf)
+ {
+-      return percpu_counter_read(&nf->mem);
++      return atomic_read(&nf->mem);
+ }
+ static inline void sub_frag_mem_limit(struct netns_frags *nf, int i)
+ {
+-      percpu_counter_add_batch(&nf->mem, -i, frag_percpu_counter_batch);
++      atomic_sub(i, &nf->mem);
+ }
+ static inline void add_frag_mem_limit(struct netns_frags *nf, int i)
+ {
+-      percpu_counter_add_batch(&nf->mem, i, frag_percpu_counter_batch);
++      atomic_add(i, &nf->mem);
+ }
+-static inline unsigned int sum_frag_mem_limit(struct netns_frags *nf)
++static inline int sum_frag_mem_limit(struct netns_frags *nf)
+ {
+-      return percpu_counter_sum_positive(&nf->mem);
++      return atomic_read(&nf->mem);
+ }
+ /* RFC 3168 support :
+--- a/net/ipv4/inet_fragment.c
++++ b/net/ipv4/inet_fragment.c
+@@ -234,10 +234,8 @@ evict_again:
+       cond_resched();
+       if (read_seqretry(&f->rnd_seqlock, seq) ||
+-          percpu_counter_sum(&nf->mem))
++          sum_frag_mem_limit(nf))
+               goto evict_again;
+-
+-      percpu_counter_destroy(&nf->mem);
+ }
+ EXPORT_SYMBOL(inet_frags_exit_net);
diff --git a/queue-4.12/sctp-avoid-out-of-bounds-reads-from-address-storage.patch b/queue-4.12/sctp-avoid-out-of-bounds-reads-from-address-storage.patch
new file mode 100644 (file)
index 0000000..ee27215
--- /dev/null
@@ -0,0 +1,189 @@
+From foo@baz Thu Sep 14 23:20:23 PDT 2017
+From: Stefano Brivio <sbrivio@redhat.com>
+Date: Wed, 23 Aug 2017 13:27:13 +0200
+Subject: sctp: Avoid out-of-bounds reads from address storage
+
+From: Stefano Brivio <sbrivio@redhat.com>
+
+
+[ Upstream commit ee6c88bb754e3d363e568da78086adfedb692447 ]
+
+inet_diag_msg_sctp{,l}addr_fill() and sctp_get_sctp_info() copy
+sizeof(sockaddr_storage) bytes to fill in sockaddr structs used
+to export diagnostic information to userspace.
+
+However, the memory allocated to store sockaddr information is
+smaller than that and depends on the address family, so we leak
+up to 100 uninitialized bytes to userspace. Just use the size of
+the source structs instead, in all the three cases this is what
+userspace expects. Zero out the remaining memory.
+
+Unused bytes (i.e. when IPv4 addresses are used) in source
+structs sctp_sockaddr_entry and sctp_transport are already
+cleared by sctp_add_bind_addr() and sctp_transport_new(),
+respectively.
+
+Noticed while testing KASAN-enabled kernel with 'ss':
+
+[ 2326.885243] BUG: KASAN: slab-out-of-bounds in inet_sctp_diag_fill+0x42c/0x6c0 [sctp_diag] at addr ffff881be8779800
+[ 2326.896800] Read of size 128 by task ss/9527
+[ 2326.901564] CPU: 0 PID: 9527 Comm: ss Not tainted 4.11.0-22.el7a.x86_64 #1
+[ 2326.909236] Hardware name: Dell Inc. PowerEdge R730/072T6D, BIOS 2.4.3 01/17/2017
+[ 2326.917585] Call Trace:
+[ 2326.920312]  dump_stack+0x63/0x8d
+[ 2326.924014]  kasan_object_err+0x21/0x70
+[ 2326.928295]  kasan_report+0x288/0x540
+[ 2326.932380]  ? inet_sctp_diag_fill+0x42c/0x6c0 [sctp_diag]
+[ 2326.938500]  ? skb_put+0x8b/0xd0
+[ 2326.942098]  ? memset+0x31/0x40
+[ 2326.945599]  check_memory_region+0x13c/0x1a0
+[ 2326.950362]  memcpy+0x23/0x50
+[ 2326.953669]  inet_sctp_diag_fill+0x42c/0x6c0 [sctp_diag]
+[ 2326.959596]  ? inet_diag_msg_sctpasoc_fill+0x460/0x460 [sctp_diag]
+[ 2326.966495]  ? __lock_sock+0x102/0x150
+[ 2326.970671]  ? sock_def_wakeup+0x60/0x60
+[ 2326.975048]  ? remove_wait_queue+0xc0/0xc0
+[ 2326.979619]  sctp_diag_dump+0x44a/0x760 [sctp_diag]
+[ 2326.985063]  ? sctp_ep_dump+0x280/0x280 [sctp_diag]
+[ 2326.990504]  ? memset+0x31/0x40
+[ 2326.994007]  ? mutex_lock+0x12/0x40
+[ 2326.997900]  __inet_diag_dump+0x57/0xb0 [inet_diag]
+[ 2327.003340]  ? __sys_sendmsg+0x150/0x150
+[ 2327.007715]  inet_diag_dump+0x4d/0x80 [inet_diag]
+[ 2327.012979]  netlink_dump+0x1e6/0x490
+[ 2327.017064]  __netlink_dump_start+0x28e/0x2c0
+[ 2327.021924]  inet_diag_handler_cmd+0x189/0x1a0 [inet_diag]
+[ 2327.028045]  ? inet_diag_rcv_msg_compat+0x1b0/0x1b0 [inet_diag]
+[ 2327.034651]  ? inet_diag_dump_compat+0x190/0x190 [inet_diag]
+[ 2327.040965]  ? __netlink_lookup+0x1b9/0x260
+[ 2327.045631]  sock_diag_rcv_msg+0x18b/0x1e0
+[ 2327.050199]  netlink_rcv_skb+0x14b/0x180
+[ 2327.054574]  ? sock_diag_bind+0x60/0x60
+[ 2327.058850]  sock_diag_rcv+0x28/0x40
+[ 2327.062837]  netlink_unicast+0x2e7/0x3b0
+[ 2327.067212]  ? netlink_attachskb+0x330/0x330
+[ 2327.071975]  ? kasan_check_write+0x14/0x20
+[ 2327.076544]  netlink_sendmsg+0x5be/0x730
+[ 2327.080918]  ? netlink_unicast+0x3b0/0x3b0
+[ 2327.085486]  ? kasan_check_write+0x14/0x20
+[ 2327.090057]  ? selinux_socket_sendmsg+0x24/0x30
+[ 2327.095109]  ? netlink_unicast+0x3b0/0x3b0
+[ 2327.099678]  sock_sendmsg+0x74/0x80
+[ 2327.103567]  ___sys_sendmsg+0x520/0x530
+[ 2327.107844]  ? __get_locked_pte+0x178/0x200
+[ 2327.112510]  ? copy_msghdr_from_user+0x270/0x270
+[ 2327.117660]  ? vm_insert_page+0x360/0x360
+[ 2327.122133]  ? vm_insert_pfn_prot+0xb4/0x150
+[ 2327.126895]  ? vm_insert_pfn+0x32/0x40
+[ 2327.131077]  ? vvar_fault+0x71/0xd0
+[ 2327.134968]  ? special_mapping_fault+0x69/0x110
+[ 2327.140022]  ? __do_fault+0x42/0x120
+[ 2327.144008]  ? __handle_mm_fault+0x1062/0x17a0
+[ 2327.148965]  ? __fget_light+0xa7/0xc0
+[ 2327.153049]  __sys_sendmsg+0xcb/0x150
+[ 2327.157133]  ? __sys_sendmsg+0xcb/0x150
+[ 2327.161409]  ? SyS_shutdown+0x140/0x140
+[ 2327.165688]  ? exit_to_usermode_loop+0xd0/0xd0
+[ 2327.170646]  ? __do_page_fault+0x55d/0x620
+[ 2327.175216]  ? __sys_sendmsg+0x150/0x150
+[ 2327.179591]  SyS_sendmsg+0x12/0x20
+[ 2327.183384]  do_syscall_64+0xe3/0x230
+[ 2327.187471]  entry_SYSCALL64_slow_path+0x25/0x25
+[ 2327.192622] RIP: 0033:0x7f41d18fa3b0
+[ 2327.196608] RSP: 002b:00007ffc3b731218 EFLAGS: 00000246 ORIG_RAX: 000000000000002e
+[ 2327.205055] RAX: ffffffffffffffda RBX: 00007ffc3b731380 RCX: 00007f41d18fa3b0
+[ 2327.213017] RDX: 0000000000000000 RSI: 00007ffc3b731340 RDI: 0000000000000003
+[ 2327.220978] RBP: 0000000000000002 R08: 0000000000000004 R09: 0000000000000040
+[ 2327.228939] R10: 00007ffc3b730f30 R11: 0000000000000246 R12: 0000000000000003
+[ 2327.236901] R13: 00007ffc3b731340 R14: 00007ffc3b7313d0 R15: 0000000000000084
+[ 2327.244865] Object at ffff881be87797e0, in cache kmalloc-64 size: 64
+[ 2327.251953] Allocated:
+[ 2327.254581] PID = 9484
+[ 2327.257215]  save_stack_trace+0x1b/0x20
+[ 2327.261485]  save_stack+0x46/0xd0
+[ 2327.265179]  kasan_kmalloc+0xad/0xe0
+[ 2327.269165]  kmem_cache_alloc_trace+0xe6/0x1d0
+[ 2327.274138]  sctp_add_bind_addr+0x58/0x180 [sctp]
+[ 2327.279400]  sctp_do_bind+0x208/0x310 [sctp]
+[ 2327.284176]  sctp_bind+0x61/0xa0 [sctp]
+[ 2327.288455]  inet_bind+0x5f/0x3a0
+[ 2327.292151]  SYSC_bind+0x1a4/0x1e0
+[ 2327.295944]  SyS_bind+0xe/0x10
+[ 2327.299349]  do_syscall_64+0xe3/0x230
+[ 2327.303433]  return_from_SYSCALL_64+0x0/0x6a
+[ 2327.308194] Freed:
+[ 2327.310434] PID = 4131
+[ 2327.313065]  save_stack_trace+0x1b/0x20
+[ 2327.317344]  save_stack+0x46/0xd0
+[ 2327.321040]  kasan_slab_free+0x73/0xc0
+[ 2327.325220]  kfree+0x96/0x1a0
+[ 2327.328530]  dynamic_kobj_release+0x15/0x40
+[ 2327.333195]  kobject_release+0x99/0x1e0
+[ 2327.337472]  kobject_put+0x38/0x70
+[ 2327.341266]  free_notes_attrs+0x66/0x80
+[ 2327.345545]  mod_sysfs_teardown+0x1a5/0x270
+[ 2327.350211]  free_module+0x20/0x2a0
+[ 2327.354099]  SyS_delete_module+0x2cb/0x2f0
+[ 2327.358667]  do_syscall_64+0xe3/0x230
+[ 2327.362750]  return_from_SYSCALL_64+0x0/0x6a
+[ 2327.367510] Memory state around the buggy address:
+[ 2327.372855]  ffff881be8779700: fc fc fc fc 00 00 00 00 00 00 00 00 fc fc fc fc
+[ 2327.380914]  ffff881be8779780: fb fb fb fb fb fb fb fb fc fc fc fc 00 00 00 00
+[ 2327.388972] >ffff881be8779800: 00 00 00 00 fc fc fc fc fb fb fb fb fb fb fb fb
+[ 2327.397031]                                ^
+[ 2327.401792]  ffff881be8779880: fc fc fc fc fb fb fb fb fb fb fb fb fc fc fc fc
+[ 2327.409850]  ffff881be8779900: 00 00 00 00 00 04 fc fc fc fc fc fc 00 00 00 00
+[ 2327.417907] ==================================================================
+
+This fixes CVE-2017-7558.
+
+References: https://bugzilla.redhat.com/show_bug.cgi?id=1480266
+Fixes: 8f840e47f190 ("sctp: add the sctp_diag.c file")
+Cc: Xin Long <lucien.xin@gmail.com>
+Cc: Vlad Yasevich <vyasevich@gmail.com>
+Cc: Neil Horman <nhorman@tuxdriver.com>
+Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
+Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
+Reviewed-by: Xin Long <lucien.xin@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sctp/sctp_diag.c |    7 +++++--
+ net/sctp/socket.c    |    3 +--
+ 2 files changed, 6 insertions(+), 4 deletions(-)
+
+--- a/net/sctp/sctp_diag.c
++++ b/net/sctp/sctp_diag.c
+@@ -70,7 +70,8 @@ static int inet_diag_msg_sctpladdrs_fill
+       info = nla_data(attr);
+       list_for_each_entry_rcu(laddr, address_list, list) {
+-              memcpy(info, &laddr->a, addrlen);
++              memcpy(info, &laddr->a, sizeof(laddr->a));
++              memset(info + sizeof(laddr->a), 0, addrlen - sizeof(laddr->a));
+               info += addrlen;
+       }
+@@ -93,7 +94,9 @@ static int inet_diag_msg_sctpaddrs_fill(
+       info = nla_data(attr);
+       list_for_each_entry(from, &asoc->peer.transport_addr_list,
+                           transports) {
+-              memcpy(info, &from->ipaddr, addrlen);
++              memcpy(info, &from->ipaddr, sizeof(from->ipaddr));
++              memset(info + sizeof(from->ipaddr), 0,
++                     addrlen - sizeof(from->ipaddr));
+               info += addrlen;
+       }
+--- a/net/sctp/socket.c
++++ b/net/sctp/socket.c
+@@ -4538,8 +4538,7 @@ int sctp_get_sctp_info(struct sock *sk,
+       info->sctpi_ictrlchunks = asoc->stats.ictrlchunks;
+       prim = asoc->peer.primary_path;
+-      memcpy(&info->sctpi_p_address, &prim->ipaddr,
+-             sizeof(struct sockaddr_storage));
++      memcpy(&info->sctpi_p_address, &prim->ipaddr, sizeof(prim->ipaddr));
+       info->sctpi_p_state = prim->state;
+       info->sctpi_p_cwnd = prim->cwnd;
+       info->sctpi_p_srtt = prim->srtt;
diff --git a/queue-4.12/sctp-fix-missing-wake-ups-in-some-situations.patch b/queue-4.12/sctp-fix-missing-wake-ups-in-some-situations.patch
new file mode 100644 (file)
index 0000000..1a07803
--- /dev/null
@@ -0,0 +1,52 @@
+From foo@baz Thu Sep 14 23:20:23 PDT 2017
+From: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
+Date: Fri, 8 Sep 2017 11:35:21 -0300
+Subject: sctp: fix missing wake ups in some situations
+
+From: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
+
+
+[ Upstream commit 7906b00f5cd1cd484fced7fcda892176e3202c8a ]
+
+Commit fb586f25300f ("sctp: delay calls to sk_data_ready() as much as
+possible") minimized the number of wake ups that are triggered in case
+the association receives a packet with multiple data chunks on it and/or
+when io_events are enabled and then commit 0970f5b36659 ("sctp: signal
+sk_data_ready earlier on data chunks reception") moved the wake up to as
+soon as possible. It thus relies on the state machine running later to
+clean the flag that the event was already generated.
+
+The issue is that there are 2 call paths that calls
+sctp_ulpq_tail_event() outside of the state machine, causing the flag to
+linger and possibly omitting a needed wake up in the sequence.
+
+One of the call paths is when enabling SCTP_SENDER_DRY_EVENTS via
+setsockopt(SCTP_EVENTS), as noticed by Harald Welte. The other is when
+partial reliability triggers removal of chunks from the send queue when
+the application calls sendmsg().
+
+This commit fixes it by not setting the flag in case the socket is not
+owned by the user, as it won't be cleaned later. This works for
+user-initiated calls and also for rx path processing.
+
+Fixes: fb586f25300f ("sctp: delay calls to sk_data_ready() as much as possible")
+Reported-by: Harald Welte <laforge@gnumonks.org>
+Signed-off-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sctp/ulpqueue.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/net/sctp/ulpqueue.c
++++ b/net/sctp/ulpqueue.c
+@@ -265,7 +265,8 @@ int sctp_ulpq_tail_event(struct sctp_ulp
+               sctp_ulpq_clear_pd(ulpq);
+       if (queue == &sk->sk_receive_queue && !sp->data_ready_signalled) {
+-              sp->data_ready_signalled = 1;
++              if (!sock_owned_by_user(sk))
++                      sp->data_ready_signalled = 1;
+               sk->sk_data_ready(sk);
+       }
+       return 1;
diff --git a/queue-4.12/series b/queue-4.12/series
new file mode 100644 (file)
index 0000000..cb6188f
--- /dev/null
@@ -0,0 +1,43 @@
+ipv6-accept-64k-1-packet-length-in-ip6_find_1stfragopt.patch
+ipv6-add-rcu-grace-period-before-freeing-fib6_node.patch
+ipv6-fix-sparse-warning-on-rt6i_node.patch
+macsec-add-genl-family-module-alias.patch
+udp-on-peeking-bad-csum-drop-packets-even-if-not-at-head.patch
+bpf-fix-map-value-attribute-for-hash-of-maps.patch
+fsl-man-inherit-parent-device-and-of_node.patch
+sctp-avoid-out-of-bounds-reads-from-address-storage.patch
+qlge-avoid-memcpy-buffer-overflow.patch
+tipc-fix-tipc_sk_reinit-handling-of-eagain.patch
+net-systemport-be-drop-monitor-friendly.patch
+net-bcmgenet-be-drop-monitor-friendly.patch
+net-systemport-free-dma-coherent-descriptors-on-errors.patch
+netvsc-fix-deadlock-betwen-link-status-and-removal.patch
+udp6-set-rx_dst_cookie-on-rx_dst-updates.patch
+net-mvpp2-fix-the-mac-address-used-when-using-ppv2.2.patch
+cxgb4-fix-stack-out-of-bounds-read-due-to-wrong-size-to-t4_record_mbox.patch
+ipv6-set-dst.obsolete-when-a-cached-route-has-expired.patch
+ipv6-do-not-set-sk_destruct-in-ipv6_addrform-sockopt.patch
+packet-don-t-write-vnet-header-beyond-end-of-buffer.patch
+kcm-do-not-attach-pf_kcm-sockets-to-avoid-deadlock.patch
+net-dsa-bcm_sf2-fix-number-of-cfp-entries-for-bcm7278.patch
+net-mlx5e-check-for-qos-capability-in-dcbnl_initialize.patch
+net-mlx5e-fix-dcb_cap_attr_dcbx-capability-for-dcbnl-getcap.patch
+net-mlx5-fix-arm-srq-command-for-issi-version-0.patch
+net-mlx5e-fix-dangling-page-pointer-on-dma-mapping-error.patch
+net-mlx5e-don-t-override-user-rss-upon-set-channels.patch
+net-mlx5e-properly-resolve-tc-offloaded-ipv6-vxlan-tunnel-source-address.patch
+net-mlx5-e-switch-unload-the-representors-in-the-correct-order.patch
+net-mlx5e-fix-inline-header-size-for-small-packets.patch
+net-mlx5e-fix-cq-moderation-mode-not-set-properly.patch
+revert-net-phy-correctly-process-phy_halted-in-phy_stop_machine.patch
+net-fec-allow-reception-of-frames-bigger-than-1522-bytes.patch
+mlxsw-spectrum-forbid-linking-to-devices-that-have-uppers.patch
+bridge-switchdev-clear-forward-mark-when-transmitting-packet.patch
+revert-net-use-lib-percpu_counter-api-for-fragmentation-mem-accounting.patch
+revert-net-fix-percpu-memory-leaks.patch
+gianfar-fix-tx-flow-control-deactivation.patch
+vhost_net-correctly-check-tx-avail-during-rx-busy-polling.patch
+ip6_gre-update-mtu-properly-in-ip6gre_err.patch
+ipv6-fix-memory-leak-with-multiple-tables-during-netns-destruction.patch
+ipv6-fix-typo-in-fib6_net_exit.patch
+sctp-fix-missing-wake-ups-in-some-situations.patch
diff --git a/queue-4.12/tipc-fix-tipc_sk_reinit-handling-of-eagain.patch b/queue-4.12/tipc-fix-tipc_sk_reinit-handling-of-eagain.patch
new file mode 100644 (file)
index 0000000..7c7ee91
--- /dev/null
@@ -0,0 +1,58 @@
+From foo@baz Thu Sep 14 23:20:23 PDT 2017
+From: Bob Peterson <rpeterso@redhat.com>
+Date: Wed, 23 Aug 2017 10:43:02 -0400
+Subject: tipc: Fix tipc_sk_reinit handling of -EAGAIN
+
+From: Bob Peterson <rpeterso@redhat.com>
+
+
+[ Upstream commit 6c7e983b220f89e03286dc70a41c7ef3a8b409df ]
+
+In 9dbbfb0ab6680c6a85609041011484e6658e7d3c function tipc_sk_reinit
+had additional logic added to loop in the event that function
+rhashtable_walk_next() returned -EAGAIN. No worries.
+
+However, if rhashtable_walk_start returns -EAGAIN, it does "continue",
+and therefore skips the call to rhashtable_walk_stop(). That has
+the effect of calling rcu_read_lock() without its paired call to
+rcu_read_unlock(). Since rcu_read_lock() may be nested, the problem
+may not be apparent for a while, especially since resize events may
+be rare. But the comments to rhashtable_walk_start() state:
+
+ * ...Note that we take the RCU lock in all
+ * cases including when we return an error.  So you must always call
+ * rhashtable_walk_stop to clean up.
+
+This patch replaces the continue with a goto and label to ensure a
+matching call to rhashtable_walk_stop().
+
+Signed-off-by: Bob Peterson <rpeterso@redhat.com>
+Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/tipc/socket.c |    6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/net/tipc/socket.c
++++ b/net/tipc/socket.c
+@@ -2255,8 +2255,8 @@ void tipc_sk_reinit(struct net *net)
+       do {
+               tsk = ERR_PTR(rhashtable_walk_start(&iter));
+-              if (tsk)
+-                      continue;
++              if (IS_ERR(tsk))
++                      goto walk_stop;
+               while ((tsk = rhashtable_walk_next(&iter)) && !IS_ERR(tsk)) {
+                       spin_lock_bh(&tsk->sk.sk_lock.slock);
+@@ -2265,7 +2265,7 @@ void tipc_sk_reinit(struct net *net)
+                       msg_set_orignode(msg, tn->own_addr);
+                       spin_unlock_bh(&tsk->sk.sk_lock.slock);
+               }
+-
++walk_stop:
+               rhashtable_walk_stop(&iter);
+       } while (tsk == ERR_PTR(-EAGAIN));
+ }
diff --git a/queue-4.12/udp-on-peeking-bad-csum-drop-packets-even-if-not-at-head.patch b/queue-4.12/udp-on-peeking-bad-csum-drop-packets-even-if-not-at-head.patch
new file mode 100644 (file)
index 0000000..0b93846
--- /dev/null
@@ -0,0 +1,51 @@
+From foo@baz Thu Sep 14 23:20:23 PDT 2017
+From: Eric Dumazet <edumazet@google.com>
+Date: Tue, 22 Aug 2017 09:39:28 -0700
+Subject: udp: on peeking bad csum, drop packets even if not at head
+
+From: Eric Dumazet <edumazet@google.com>
+
+
+[ Upstream commit fd6055a806edc4019be1b9fb7d25262599bca5b1 ]
+
+When peeking, if a bad csum is discovered, the skb is unlinked from
+the queue with __sk_queue_drop_skb and the peek operation restarted.
+
+__sk_queue_drop_skb only drops packets that match the queue head.
+
+This fails if the skb was found after the head, using SO_PEEK_OFF
+socket option. This causes an infinite loop.
+
+We MUST drop this problematic skb, and we can simply check if skb was
+already removed by another thread, by looking at skb->next :
+
+This pointer is set to NULL by the  __skb_unlink() operation, that might
+have happened only under the spinlock protection.
+
+Many thanks to syzkaller team (and particularly Dmitry Vyukov who
+provided us nice C reproducers exhibiting the lockup) and Willem de
+Bruijn who provided first version for this patch and a test program.
+
+Fixes: 627d2d6b5500 ("udp: enable MSG_PEEK at non-zero offset")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: Dmitry Vyukov <dvyukov@google.com>
+Cc: Willem de Bruijn <willemb@google.com>
+Acked-by: Paolo Abeni <pabeni@redhat.com>
+Acked-by: Willem de Bruijn <willemb@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/datagram.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/core/datagram.c
++++ b/net/core/datagram.c
+@@ -345,7 +345,7 @@ int __sk_queue_drop_skb(struct sock *sk,
+       if (flags & MSG_PEEK) {
+               err = -ENOENT;
+               spin_lock_bh(&sk->sk_receive_queue.lock);
+-              if (skb == skb_peek(&sk->sk_receive_queue)) {
++              if (skb->next) {
+                       __skb_unlink(skb, &sk->sk_receive_queue);
+                       atomic_dec(&skb->users);
+                       if (destructor)
diff --git a/queue-4.12/udp6-set-rx_dst_cookie-on-rx_dst-updates.patch b/queue-4.12/udp6-set-rx_dst_cookie-on-rx_dst-updates.patch
new file mode 100644 (file)
index 0000000..b456962
--- /dev/null
@@ -0,0 +1,89 @@
+From foo@baz Thu Sep 14 23:20:23 PDT 2017
+From: Paolo Abeni <pabeni@redhat.com>
+Date: Fri, 25 Aug 2017 14:31:01 +0200
+Subject: udp6: set rx_dst_cookie on rx_dst updates
+
+From: Paolo Abeni <pabeni@redhat.com>
+
+
+[ Upstream commit 64f0f5d18a47c703c85576375cc010e83dac6a48 ]
+
+Currently, in the udp6 code, the dst cookie is not initialized/updated
+concurrently with the RX dst used by early demux.
+
+As a result, the dst_check() in the early_demux path always fails,
+the rx dst cache is always invalidated, and we can't really
+leverage significant gain from the demux lookup.
+
+Fix it adding udp6 specific variant of sk_rx_dst_set() and use it
+to set the dst cookie when the dst entry is really changed.
+
+The issue is there since the introduction of early demux for ipv6.
+
+Fixes: 5425077d73e0 ("net: ipv6: Add early demux handler for UDP unicast")
+Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/udp.h |    2 +-
+ net/ipv4/udp.c    |    3 ++-
+ net/ipv6/udp.c    |   11 ++++++++++-
+ 3 files changed, 13 insertions(+), 3 deletions(-)
+
+--- a/include/net/udp.h
++++ b/include/net/udp.h
+@@ -265,7 +265,7 @@ static inline struct sk_buff *skb_recv_u
+ }
+ void udp_v4_early_demux(struct sk_buff *skb);
+-void udp_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst);
++bool udp_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst);
+ int udp_get_port(struct sock *sk, unsigned short snum,
+                int (*saddr_cmp)(const struct sock *,
+                                 const struct sock *));
+--- a/net/ipv4/udp.c
++++ b/net/ipv4/udp.c
+@@ -1762,13 +1762,14 @@ drop:
+ /* For TCP sockets, sk_rx_dst is protected by socket lock
+  * For UDP, we use xchg() to guard against concurrent changes.
+  */
+-void udp_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst)
++bool udp_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst)
+ {
+       struct dst_entry *old;
+       dst_hold(dst);
+       old = xchg(&sk->sk_rx_dst, dst);
+       dst_release(old);
++      return old != dst;
+ }
+ /*
+--- a/net/ipv6/udp.c
++++ b/net/ipv6/udp.c
+@@ -752,6 +752,15 @@ start_lookup:
+       return 0;
+ }
++static void udp6_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst)
++{
++      if (udp_sk_rx_dst_set(sk, dst)) {
++              const struct rt6_info *rt = (const struct rt6_info *)dst;
++
++              inet6_sk(sk)->rx_dst_cookie = rt6_get_cookie(rt);
++      }
++}
++
+ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
+                  int proto)
+ {
+@@ -801,7 +810,7 @@ int __udp6_lib_rcv(struct sk_buff *skb,
+               int ret;
+               if (unlikely(sk->sk_rx_dst != dst))
+-                      udp_sk_rx_dst_set(sk, dst);
++                      udp6_sk_rx_dst_set(sk, dst);
+               ret = udpv6_queue_rcv_skb(sk, skb);
+               sock_put(sk);
diff --git a/queue-4.12/vhost_net-correctly-check-tx-avail-during-rx-busy-polling.patch b/queue-4.12/vhost_net-correctly-check-tx-avail-during-rx-busy-polling.patch
new file mode 100644 (file)
index 0000000..97adce2
--- /dev/null
@@ -0,0 +1,47 @@
+From foo@baz Thu Sep 14 23:20:23 PDT 2017
+From: Jason Wang <jasowang@redhat.com>
+Date: Tue, 5 Sep 2017 09:22:05 +0800
+Subject: vhost_net: correctly check tx avail during rx busy polling
+
+From: Jason Wang <jasowang@redhat.com>
+
+
+[ Upstream commit 8b949bef9172ca69d918e93509a4ecb03d0355e0 ]
+
+We check tx avail through vhost_enable_notify() in the past which is
+wrong since it only checks whether or not guest has filled more
+available buffer since last avail idx synchronization which was just
+done by vhost_vq_avail_empty() before. What we really want is checking
+pending buffers in the avail ring. Fix this by calling
+vhost_vq_avail_empty() instead.
+
+This issue could be noticed by doing netperf TCP_RR benchmark as
+client from guest (but not host). With this fix, TCP_RR from guest to
+localhost restores from 1375.91 trans per sec to 55235.28 trans per
+sec on my laptop (Intel(R) Core(TM) i7-5600U CPU @ 2.60GHz).
+
+Fixes: 030881372460 ("vhost_net: basic polling support")
+Signed-off-by: Jason Wang <jasowang@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/vhost/net.c |    7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+--- a/drivers/vhost/net.c
++++ b/drivers/vhost/net.c
+@@ -557,8 +557,13 @@ static int vhost_net_rx_peek_head_len(st
+               preempt_enable();
+-              if (vhost_enable_notify(&net->dev, vq))
++              if (!vhost_vq_avail_empty(&net->dev, vq))
+                       vhost_poll_queue(&vq->poll);
++              else if (unlikely(vhost_enable_notify(&net->dev, vq))) {
++                      vhost_disable_notify(&net->dev, vq);
++                      vhost_poll_queue(&vq->poll);
++              }
++
+               mutex_unlock(&vq->mutex);
+               len = peek_head_len(sk);
diff --git a/queue-4.13/series b/queue-4.13/series
new file mode 100644 (file)
index 0000000..f64f0e3
--- /dev/null
@@ -0,0 +1,11 @@
+revert-net-use-lib-percpu_counter-api-for-fragmentation-mem-accounting.patch
+revert-net-fix-percpu-memory-leaks.patch
+gianfar-fix-tx-flow-control-deactivation.patch
+vhost_net-correctly-check-tx-avail-during-rx-busy-polling.patch
+ip6_gre-update-mtu-properly-in-ip6gre_err.patch
+udp-drop-head-states-only-when-all-skb-references-are-gone.patch
+ipv6-fix-memory-leak-with-multiple-tables-during-netns-destruction.patch
+ipv6-fix-typo-in-fib6_net_exit.patch
+sctp-fix-missing-wake-ups-in-some-situations.patch
+tcp-fix-a-request-socket-leak.patch
+ip_tunnel-fix-setting-ttl-and-tos-value-in-collect_md-mode.patch
diff --git a/queue-4.9/series b/queue-4.9/series
new file mode 100644 (file)
index 0000000..9fd985c
--- /dev/null
@@ -0,0 +1,25 @@
+ipv6-accept-64k-1-packet-length-in-ip6_find_1stfragopt.patch
+ipv6-add-rcu-grace-period-before-freeing-fib6_node.patch
+ipv6-fix-sparse-warning-on-rt6i_node.patch
+macsec-add-genl-family-module-alias.patch
+udp-on-peeking-bad-csum-drop-packets-even-if-not-at-head.patch
+fsl-man-inherit-parent-device-and-of_node.patch
+sctp-avoid-out-of-bounds-reads-from-address-storage.patch
+qlge-avoid-memcpy-buffer-overflow.patch
+netvsc-fix-deadlock-betwen-link-status-and-removal.patch
+cxgb4-fix-stack-out-of-bounds-read-due-to-wrong-size-to-t4_record_mbox.patch
+packet-don-t-write-vnet-header-beyond-end-of-buffer.patch
+kcm-do-not-attach-pf_kcm-sockets-to-avoid-deadlock.patch
+revert-net-phy-correctly-process-phy_halted-in-phy_stop_machine.patch
+tcp-initialize-rcv_mss-to-tcp_min_mss-instead-of-0.patch
+mlxsw-spectrum-forbid-linking-to-devices-that-have-uppers.patch
+bridge-switchdev-clear-forward-mark-when-transmitting-packet.patch
+revert-net-use-lib-percpu_counter-api-for-fragmentation-mem-accounting.patch
+revert-net-fix-percpu-memory-leaks.patch
+gianfar-fix-tx-flow-control-deactivation.patch
+vhost_net-correctly-check-tx-avail-during-rx-busy-polling.patch
+ip6_gre-update-mtu-properly-in-ip6gre_err.patch
+ipv6-fix-memory-leak-with-multiple-tables-during-netns-destruction.patch
+ipv6-fix-typo-in-fib6_net_exit.patch
+sctp-fix-missing-wake-ups-in-some-situations.patch
+ip_tunnel-fix-setting-ttl-and-tos-value-in-collect_md-mode.patch