From: Greg Kroah-Hartman Date: Mon, 12 Oct 2020 12:02:33 +0000 (+0200) Subject: 5.8-stable patches X-Git-Tag: v4.4.239~10 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=5194f90fa4750ca13358b631f409b6d0b7fb17d6;p=thirdparty%2Fkernel%2Fstable-queue.git 5.8-stable patches added patches: i2c-imx-fix-reset-of-i2sr_ial-flag.patch mm-khugepaged-recalculate-min_free_kbytes-after-memory-hotplug-as-expected-by-khugepaged.patch mm-validate-inode-in-mapping_set_error.patch net-bridge-fdb-don-t-flush-ext_learn-entries.patch net-core-check-length-before-updating-ethertype-in-skb_mpls_-push-pop.patch net-mlx5e-fix-driver-s-declaration-to-support-gre-offload.patch net-tls-race-causes-kernel-panic.patch netlink-fix-policy-dump-leak.patch tcp-fix-receive-window-update-in-tcp_add_backlog.patch --- diff --git a/queue-5.8/i2c-imx-fix-reset-of-i2sr_ial-flag.patch b/queue-5.8/i2c-imx-fix-reset-of-i2sr_ial-flag.patch new file mode 100644 index 00000000000..9f3244998dd --- /dev/null +++ b/queue-5.8/i2c-imx-fix-reset-of-i2sr_ial-flag.patch @@ -0,0 +1,71 @@ +From fa4d30556883f2eaab425b88ba9904865a4d00f3 Mon Sep 17 00:00:00 2001 +From: Christian Eggers +Date: Wed, 7 Oct 2020 10:45:22 +0200 +Subject: i2c: imx: Fix reset of I2SR_IAL flag +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Christian Eggers + +commit fa4d30556883f2eaab425b88ba9904865a4d00f3 upstream. + +According to the "VFxxx Controller Reference Manual" (and the comment +block starting at line 97), Vybrid requires writing a one for clearing +an interrupt flag. Syncing the method for clearing I2SR_IIF in +i2c_imx_isr(). + +Signed-off-by: Christian Eggers +Fixes: 4b775022f6fd ("i2c: imx: add struct to hold more configurable quirks") +Reviewed-by: Uwe Kleine-König +Cc: stable@vger.kernel.org +Signed-off-by: Wolfram Sang +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/i2c/busses/i2c-imx.c | 20 +++++++++++++++----- + 1 file changed, 15 insertions(+), 5 deletions(-) + +--- a/drivers/i2c/busses/i2c-imx.c ++++ b/drivers/i2c/busses/i2c-imx.c +@@ -412,6 +412,19 @@ static void i2c_imx_dma_free(struct imx_ + dma->chan_using = NULL; + } + ++static void i2c_imx_clear_irq(struct imx_i2c_struct *i2c_imx, unsigned int bits) ++{ ++ unsigned int temp; ++ ++ /* ++ * i2sr_clr_opcode is the value to clear all interrupts. Here we want to ++ * clear only , so we write ~i2sr_clr_opcode with just ++ * toggled. This is required because i.MX needs W1C and Vybrid uses W0C. ++ */ ++ temp = ~i2c_imx->hwdata->i2sr_clr_opcode ^ bits; ++ imx_i2c_write_reg(temp, i2c_imx, IMX_I2C_I2SR); ++} ++ + static int i2c_imx_bus_busy(struct imx_i2c_struct *i2c_imx, int for_busy, bool atomic) + { + unsigned long orig_jiffies = jiffies; +@@ -424,8 +437,7 @@ static int i2c_imx_bus_busy(struct imx_i + + /* check for arbitration lost */ + if (temp & I2SR_IAL) { +- temp &= ~I2SR_IAL; +- imx_i2c_write_reg(temp, i2c_imx, IMX_I2C_I2SR); ++ i2c_imx_clear_irq(i2c_imx, I2SR_IAL); + return -EAGAIN; + } + +@@ -623,9 +635,7 @@ static irqreturn_t i2c_imx_isr(int irq, + if (temp & I2SR_IIF) { + /* save status register */ + i2c_imx->i2csr = temp; +- temp &= ~I2SR_IIF; +- temp |= (i2c_imx->hwdata->i2sr_clr_opcode & I2SR_IIF); +- imx_i2c_write_reg(temp, i2c_imx, IMX_I2C_I2SR); ++ i2c_imx_clear_irq(i2c_imx, I2SR_IIF); + wake_up(&i2c_imx->queue); + return IRQ_HANDLED; + } diff --git a/queue-5.8/mm-khugepaged-recalculate-min_free_kbytes-after-memory-hotplug-as-expected-by-khugepaged.patch b/queue-5.8/mm-khugepaged-recalculate-min_free_kbytes-after-memory-hotplug-as-expected-by-khugepaged.patch new file mode 100644 index 00000000000..1904d06c71b --- /dev/null +++ b/queue-5.8/mm-khugepaged-recalculate-min_free_kbytes-after-memory-hotplug-as-expected-by-khugepaged.patch @@ -0,0 +1,114 @@ +From 4aab2be0983031a05cb4a19696c9da5749523426 Mon Sep 17 00:00:00 2001 +From: Vijay Balakrishna +Date: Sat, 10 Oct 2020 23:16:40 -0700 +Subject: mm: khugepaged: recalculate min_free_kbytes after memory hotplug as expected by khugepaged + +From: Vijay Balakrishna + +commit 4aab2be0983031a05cb4a19696c9da5749523426 upstream. + +When memory is hotplug added or removed the min_free_kbytes should be +recalculated based on what is expected by khugepaged. Currently after +hotplug, min_free_kbytes will be set to a lower default and higher +default set when THP enabled is lost. + +This change restores min_free_kbytes as expected for THP consumers. + +[vijayb@linux.microsoft.com: v5] + Link: https://lkml.kernel.org/r/1601398153-5517-1-git-send-email-vijayb@linux.microsoft.com + +Fixes: f000565adb77 ("thp: set recommended min free kbytes") +Signed-off-by: Vijay Balakrishna +Signed-off-by: Andrew Morton +Reviewed-by: Pavel Tatashin +Acked-by: Michal Hocko +Cc: Allen Pais +Cc: Andrea Arcangeli +Cc: "Kirill A. Shutemov" +Cc: Oleg Nesterov +Cc: Song Liu +Cc: +Link: https://lkml.kernel.org/r/1600305709-2319-2-git-send-email-vijayb@linux.microsoft.com +Link: https://lkml.kernel.org/r/1600204258-13683-1-git-send-email-vijayb@linux.microsoft.com +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + include/linux/khugepaged.h | 5 +++++ + mm/khugepaged.c | 13 +++++++++++-- + mm/page_alloc.c | 3 +++ + 3 files changed, 19 insertions(+), 2 deletions(-) + +--- a/include/linux/khugepaged.h ++++ b/include/linux/khugepaged.h +@@ -15,6 +15,7 @@ extern int __khugepaged_enter(struct mm_ + extern void __khugepaged_exit(struct mm_struct *mm); + extern int khugepaged_enter_vma_merge(struct vm_area_struct *vma, + unsigned long vm_flags); ++extern void khugepaged_min_free_kbytes_update(void); + #ifdef CONFIG_SHMEM + extern void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr); + #else +@@ -85,6 +86,10 @@ static inline void collapse_pte_mapped_t + unsigned long addr) + { + } ++ ++static inline void khugepaged_min_free_kbytes_update(void) ++{ ++} + #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ + + #endif /* _LINUX_KHUGEPAGED_H */ +--- a/mm/khugepaged.c ++++ b/mm/khugepaged.c +@@ -56,6 +56,9 @@ enum scan_result { + #define CREATE_TRACE_POINTS + #include + ++static struct task_struct *khugepaged_thread __read_mostly; ++static DEFINE_MUTEX(khugepaged_mutex); ++ + /* default scan 8*512 pte (or vmas) every 30 second */ + static unsigned int khugepaged_pages_to_scan __read_mostly; + static unsigned int khugepaged_pages_collapsed; +@@ -2304,8 +2307,6 @@ static void set_recommended_min_free_kby + + int start_stop_khugepaged(void) + { +- static struct task_struct *khugepaged_thread __read_mostly; +- static DEFINE_MUTEX(khugepaged_mutex); + int err = 0; + + mutex_lock(&khugepaged_mutex); +@@ -2332,3 +2333,11 @@ fail: + mutex_unlock(&khugepaged_mutex); + return err; + } ++ ++void khugepaged_min_free_kbytes_update(void) ++{ ++ mutex_lock(&khugepaged_mutex); ++ if (khugepaged_enabled() && khugepaged_thread) ++ set_recommended_min_free_kbytes(); ++ mutex_unlock(&khugepaged_mutex); ++} +--- a/mm/page_alloc.c ++++ b/mm/page_alloc.c +@@ -69,6 +69,7 @@ + #include + #include + #include ++#include + + #include + #include +@@ -7884,6 +7885,8 @@ int __meminit init_per_zone_wmark_min(vo + setup_min_slab_ratio(); + #endif + ++ khugepaged_min_free_kbytes_update(); ++ + return 0; + } + postcore_initcall(init_per_zone_wmark_min) diff --git a/queue-5.8/mm-validate-inode-in-mapping_set_error.patch b/queue-5.8/mm-validate-inode-in-mapping_set_error.patch new file mode 100644 index 00000000000..7ff44814cbe --- /dev/null +++ b/queue-5.8/mm-validate-inode-in-mapping_set_error.patch @@ -0,0 +1,44 @@ +From 8b7b2eb131d3476062ffd34358785b44be25172f Mon Sep 17 00:00:00 2001 +From: Minchan Kim +Date: Sat, 10 Oct 2020 23:16:37 -0700 +Subject: mm: validate inode in mapping_set_error() + +From: Minchan Kim + +commit 8b7b2eb131d3476062ffd34358785b44be25172f upstream. + +The swap address_space doesn't have host. Thus, it makes kernel crash once +swap write meets error. Fix it. + +Fixes: 735e4ae5ba28 ("vfs: track per-sb writeback errors and report them to syncfs") +Signed-off-by: Minchan Kim +Signed-off-by: Andrew Morton +Acked-by: Jeff Layton +Cc: Jan Kara +Cc: Andres Freund +Cc: Matthew Wilcox +Cc: Al Viro +Cc: Christoph Hellwig +Cc: Dave Chinner +Cc: David Howells +Cc: +Link: https://lkml.kernel.org/r/20201010000650.750063-1-minchan@kernel.org +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + include/linux/pagemap.h | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/include/linux/pagemap.h ++++ b/include/linux/pagemap.h +@@ -54,7 +54,8 @@ static inline void mapping_set_error(str + __filemap_set_wb_err(mapping, error); + + /* Record it in superblock */ +- errseq_set(&mapping->host->i_sb->s_wb_err, error); ++ if (mapping->host) ++ errseq_set(&mapping->host->i_sb->s_wb_err, error); + + /* Record it in flags for now, for legacy callers */ + if (error == -ENOSPC) diff --git a/queue-5.8/net-bridge-fdb-don-t-flush-ext_learn-entries.patch b/queue-5.8/net-bridge-fdb-don-t-flush-ext_learn-entries.patch new file mode 100644 index 00000000000..3147baf6a07 --- /dev/null +++ b/queue-5.8/net-bridge-fdb-don-t-flush-ext_learn-entries.patch @@ -0,0 +1,41 @@ +From f2f3729fb65c5c2e6db234e6316b71a7bdc4b30b Mon Sep 17 00:00:00 2001 +From: Nikolay Aleksandrov +Date: Mon, 28 Sep 2020 18:30:02 +0300 +Subject: net: bridge: fdb: don't flush ext_learn entries + +From: Nikolay Aleksandrov + +commit f2f3729fb65c5c2e6db234e6316b71a7bdc4b30b upstream. + +When a user-space software manages fdb entries externally it should +set the ext_learn flag which marks the fdb entry as externally managed +and avoids expiring it (they're treated as static fdbs). Unfortunately +on events where fdb entries are flushed (STP down, netlink fdb flush +etc) these fdbs are also deleted automatically by the bridge. That in turn +causes trouble for the managing user-space software (e.g. in MLAG setups +we lose remote fdb entries on port flaps). +These entries are completely externally managed so we should avoid +automatically deleting them, the only exception are offloaded entries +(i.e. BR_FDB_ADDED_BY_EXT_LEARN + BR_FDB_OFFLOADED). They are flushed as +before. + +Fixes: eb100e0e24a2 ("net: bridge: allow to add externally learned entries from user-space") +Signed-off-by: Nikolay Aleksandrov +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman + +--- + net/bridge/br_fdb.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/net/bridge/br_fdb.c ++++ b/net/bridge/br_fdb.c +@@ -404,6 +404,8 @@ void br_fdb_delete_by_port(struct net_br + + if (!do_all) + if (test_bit(BR_FDB_STATIC, &f->flags) || ++ (test_bit(BR_FDB_ADDED_BY_EXT_LEARN, &f->flags) && ++ !test_bit(BR_FDB_OFFLOADED, &f->flags)) || + (vid && f->key.vlan_id != vid)) + continue; + diff --git a/queue-5.8/net-core-check-length-before-updating-ethertype-in-skb_mpls_-push-pop.patch b/queue-5.8/net-core-check-length-before-updating-ethertype-in-skb_mpls_-push-pop.patch new file mode 100644 index 00000000000..f2a3dddd2e6 --- /dev/null +++ b/queue-5.8/net-core-check-length-before-updating-ethertype-in-skb_mpls_-push-pop.patch @@ -0,0 +1,48 @@ +From 4296adc3e32f5d544a95061160fe7e127be1b9ff Mon Sep 17 00:00:00 2001 +From: Guillaume Nault +Date: Fri, 2 Oct 2020 21:53:08 +0200 +Subject: net/core: check length before updating Ethertype in skb_mpls_{push,pop} + +From: Guillaume Nault + +commit 4296adc3e32f5d544a95061160fe7e127be1b9ff upstream. + +Openvswitch allows to drop a packet's Ethernet header, therefore +skb_mpls_push() and skb_mpls_pop() might be called with ethernet=true +and mac_len=0. In that case the pointer passed to skb_mod_eth_type() +doesn't point to an Ethernet header and the new Ethertype is written at +unexpected locations. + +Fix this by verifying that mac_len is big enough to contain an Ethernet +header. + +Fixes: fa4e0f8855fc ("net/sched: fix corrupted L2 header with MPLS 'push' and 'pop' actions") +Signed-off-by: Guillaume Nault +Acked-by: Davide Caratti +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman + +--- + net/core/skbuff.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/net/core/skbuff.c ++++ b/net/core/skbuff.c +@@ -5621,7 +5621,7 @@ int skb_mpls_push(struct sk_buff *skb, _ + lse->label_stack_entry = mpls_lse; + skb_postpush_rcsum(skb, lse, MPLS_HLEN); + +- if (ethernet) ++ if (ethernet && mac_len >= ETH_HLEN) + skb_mod_eth_type(skb, eth_hdr(skb), mpls_proto); + skb->protocol = mpls_proto; + +@@ -5661,7 +5661,7 @@ int skb_mpls_pop(struct sk_buff *skb, __ + skb_reset_mac_header(skb); + skb_set_network_header(skb, mac_len); + +- if (ethernet) { ++ if (ethernet && mac_len >= ETH_HLEN) { + struct ethhdr *hdr; + + /* use mpls_hdr() to get ethertype to account for VLANs. */ diff --git a/queue-5.8/net-mlx5e-fix-driver-s-declaration-to-support-gre-offload.patch b/queue-5.8/net-mlx5e-fix-driver-s-declaration-to-support-gre-offload.patch new file mode 100644 index 00000000000..9ddd7321b6c --- /dev/null +++ b/queue-5.8/net-mlx5e-fix-driver-s-declaration-to-support-gre-offload.patch @@ -0,0 +1,60 @@ +From 3d093bc2369003b4ce6c3522d9b383e47c40045d Mon Sep 17 00:00:00 2001 +From: Aya Levin +Date: Sun, 9 Aug 2020 12:34:21 +0300 +Subject: net/mlx5e: Fix driver's declaration to support GRE offload + +From: Aya Levin + +commit 3d093bc2369003b4ce6c3522d9b383e47c40045d upstream. + +Declare GRE offload support with respect to the inner protocol. Add a +list of supported inner protocols on which the driver can offload +checksum and GSO. For other protocols, inform the stack to do the needed +operations. There is no noticeable impact on GRE performance. + +Fixes: 2729984149e6 ("net/mlx5e: Support TSO and TX checksum offloads for GRE tunnels") +Signed-off-by: Aya Levin +Reviewed-by: Moshe Shemesh +Reviewed-by: Tariq Toukan +Signed-off-by: Saeed Mahameed +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 19 ++++++++++++++++++- + 1 file changed, 18 insertions(+), 1 deletion(-) + +--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +@@ -4323,6 +4323,21 @@ void mlx5e_del_vxlan_port(struct net_dev + mlx5e_vxlan_queue_work(priv, be16_to_cpu(ti->port), 0); + } + ++static bool mlx5e_gre_tunnel_inner_proto_offload_supported(struct mlx5_core_dev *mdev, ++ struct sk_buff *skb) ++{ ++ switch (skb->inner_protocol) { ++ case htons(ETH_P_IP): ++ case htons(ETH_P_IPV6): ++ case htons(ETH_P_TEB): ++ return true; ++ case htons(ETH_P_MPLS_UC): ++ case htons(ETH_P_MPLS_MC): ++ return MLX5_CAP_ETH(mdev, tunnel_stateless_mpls_over_gre); ++ } ++ return false; ++} ++ + static netdev_features_t mlx5e_tunnel_features_check(struct mlx5e_priv *priv, + struct sk_buff *skb, + netdev_features_t features) +@@ -4345,7 +4360,9 @@ static netdev_features_t mlx5e_tunnel_fe + + switch (proto) { + case IPPROTO_GRE: +- return features; ++ if (mlx5e_gre_tunnel_inner_proto_offload_supported(priv->mdev, skb)) ++ return features; ++ break; + case IPPROTO_IPIP: + case IPPROTO_IPV6: + if (mlx5e_tunnel_proto_supported(priv->mdev, IPPROTO_IPIP)) diff --git a/queue-5.8/net-tls-race-causes-kernel-panic.patch b/queue-5.8/net-tls-race-causes-kernel-panic.patch new file mode 100644 index 00000000000..e4d91a28925 --- /dev/null +++ b/queue-5.8/net-tls-race-causes-kernel-panic.patch @@ -0,0 +1,75 @@ +From 38f7e1c0c43dd25b06513137bb6fd35476f9ec6d Mon Sep 17 00:00:00 2001 +From: Rohit Maheshwari +Date: Thu, 24 Sep 2020 12:28:45 +0530 +Subject: net/tls: race causes kernel panic + +From: Rohit Maheshwari + +commit 38f7e1c0c43dd25b06513137bb6fd35476f9ec6d upstream. + +BUG: kernel NULL pointer dereference, address: 00000000000000b8 + #PF: supervisor read access in kernel mode + #PF: error_code(0x0000) - not-present page + PGD 80000008b6fef067 P4D 80000008b6fef067 PUD 8b6fe6067 PMD 0 + Oops: 0000 [#1] SMP PTI + CPU: 12 PID: 23871 Comm: kworker/12:80 Kdump: loaded Tainted: G S + 5.9.0-rc3+ #1 + Hardware name: Supermicro X10SRA-F/X10SRA-F, BIOS 2.1 03/29/2018 + Workqueue: events tx_work_handler [tls] + RIP: 0010:tx_work_handler+0x1b/0x70 [tls] + Code: dc fe ff ff e8 16 d4 a3 f6 66 0f 1f 44 00 00 0f 1f 44 00 00 55 53 48 8b + 6f 58 48 8b bd a0 04 00 00 48 85 ff 74 1c 48 8b 47 28 <48> 8b 90 b8 00 00 00 83 + e2 02 75 0c f0 48 0f ba b0 b8 00 00 00 00 + RSP: 0018:ffffa44ace61fe88 EFLAGS: 00010286 + RAX: 0000000000000000 RBX: ffff91da9e45cc30 RCX: dead000000000122 + RDX: 0000000000000001 RSI: ffff91da9e45cc38 RDI: ffff91d95efac200 + RBP: ffff91da133fd780 R08: 0000000000000000 R09: 000073746e657665 + R10: 8080808080808080 R11: 0000000000000000 R12: ffff91dad7d30700 + R13: ffff91dab6561080 R14: 0ffff91dad7d3070 R15: ffff91da9e45cc38 + FS: 0000000000000000(0000) GS:ffff91dad7d00000(0000) knlGS:0000000000000000 + CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 + CR2: 00000000000000b8 CR3: 0000000906478003 CR4: 00000000003706e0 + DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 + DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 + Call Trace: + process_one_work+0x1a7/0x370 + worker_thread+0x30/0x370 + ? process_one_work+0x370/0x370 + kthread+0x114/0x130 + ? kthread_park+0x80/0x80 + ret_from_fork+0x22/0x30 + +tls_sw_release_resources_tx() waits for encrypt_pending, which +can have race, so we need similar changes as in commit +0cada33241d9de205522e3858b18e506ca5cce2c here as well. + +Fixes: a42055e8d2c3 ("net/tls: Add support for async encryption of records for performance") +Signed-off-by: Rohit Maheshwari +Acked-by: Jakub Kicinski +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman + +--- + net/tls/tls_sw.c | 9 +++++++-- + 1 file changed, 7 insertions(+), 2 deletions(-) + +--- a/net/tls/tls_sw.c ++++ b/net/tls/tls_sw.c +@@ -2142,10 +2142,15 @@ void tls_sw_release_resources_tx(struct + struct tls_context *tls_ctx = tls_get_ctx(sk); + struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx); + struct tls_rec *rec, *tmp; ++ int pending; + + /* Wait for any pending async encryptions to complete */ +- smp_store_mb(ctx->async_notify, true); +- if (atomic_read(&ctx->encrypt_pending)) ++ spin_lock_bh(&ctx->encrypt_compl_lock); ++ ctx->async_notify = true; ++ pending = atomic_read(&ctx->encrypt_pending); ++ spin_unlock_bh(&ctx->encrypt_compl_lock); ++ ++ if (pending) + crypto_wait_req(-EINPROGRESS, &ctx->async_wait); + + tls_tx_records(sk, -1); diff --git a/queue-5.8/netlink-fix-policy-dump-leak.patch b/queue-5.8/netlink-fix-policy-dump-leak.patch new file mode 100644 index 00000000000..e52e7a96a4c --- /dev/null +++ b/queue-5.8/netlink-fix-policy-dump-leak.patch @@ -0,0 +1,114 @@ +From a95bc734e60449e7b073ff7ff70c35083b290ae9 Mon Sep 17 00:00:00 2001 +From: Johannes Berg +Date: Fri, 2 Oct 2020 09:46:04 +0200 +Subject: netlink: fix policy dump leak + +From: Johannes Berg + +commit a95bc734e60449e7b073ff7ff70c35083b290ae9 upstream. + +If userspace doesn't complete the policy dump, we leak the +allocated state. Fix this. + +Fixes: d07dcf9aadd6 ("netlink: add infrastructure to expose policies to userspace") +Signed-off-by: Johannes Berg +Reviewed-by: Jakub Kicinski +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman + +--- + include/net/netlink.h | 3 ++- + net/netlink/genetlink.c | 9 ++++++++- + net/netlink/policy.c | 24 ++++++++++-------------- + 3 files changed, 20 insertions(+), 16 deletions(-) + +--- a/include/net/netlink.h ++++ b/include/net/netlink.h +@@ -1936,7 +1936,8 @@ void nla_get_range_signed(const struct n + int netlink_policy_dump_start(const struct nla_policy *policy, + unsigned int maxtype, + unsigned long *state); +-bool netlink_policy_dump_loop(unsigned long *state); ++bool netlink_policy_dump_loop(unsigned long state); + int netlink_policy_dump_write(struct sk_buff *skb, unsigned long state); ++void netlink_policy_dump_free(unsigned long state); + + #endif +--- a/net/netlink/genetlink.c ++++ b/net/netlink/genetlink.c +@@ -1079,7 +1079,7 @@ static int ctrl_dumppolicy(struct sk_buf + if (err) + return err; + +- while (netlink_policy_dump_loop(&cb->args[1])) { ++ while (netlink_policy_dump_loop(cb->args[1])) { + void *hdr; + struct nlattr *nest; + +@@ -1113,6 +1113,12 @@ nla_put_failure: + return skb->len; + } + ++static int ctrl_dumppolicy_done(struct netlink_callback *cb) ++{ ++ netlink_policy_dump_free(cb->args[1]); ++ return 0; ++} ++ + static const struct genl_ops genl_ctrl_ops[] = { + { + .cmd = CTRL_CMD_GETFAMILY, +@@ -1123,6 +1129,7 @@ static const struct genl_ops genl_ctrl_o + { + .cmd = CTRL_CMD_GETPOLICY, + .dumpit = ctrl_dumppolicy, ++ .done = ctrl_dumppolicy_done, + }, + }; + +--- a/net/netlink/policy.c ++++ b/net/netlink/policy.c +@@ -84,7 +84,6 @@ int netlink_policy_dump_start(const stru + unsigned int policy_idx; + int err; + +- /* also returns 0 if "*_state" is our ERR_PTR() end marker */ + if (*_state) + return 0; + +@@ -140,21 +139,11 @@ static bool netlink_policy_dump_finished + !state->policies[state->policy_idx].policy; + } + +-bool netlink_policy_dump_loop(unsigned long *_state) ++bool netlink_policy_dump_loop(unsigned long _state) + { +- struct nl_policy_dump *state = (void *)*_state; +- +- if (IS_ERR(state)) +- return false; +- +- if (netlink_policy_dump_finished(state)) { +- kfree(state); +- /* store end marker instead of freed state */ +- *_state = (unsigned long)ERR_PTR(-ENOENT); +- return false; +- } ++ struct nl_policy_dump *state = (void *)_state; + +- return true; ++ return !netlink_policy_dump_finished(state); + } + + int netlink_policy_dump_write(struct sk_buff *skb, unsigned long _state) +@@ -309,3 +298,10 @@ nla_put_failure: + nla_nest_cancel(skb, policy); + return -ENOBUFS; + } ++ ++void netlink_policy_dump_free(unsigned long _state) ++{ ++ struct nl_policy_dump *state = (void *)_state; ++ ++ kfree(state); ++} diff --git a/queue-5.8/series b/queue-5.8/series index 05b9f0b30bb..35433793c02 100644 --- a/queue-5.8/series +++ b/queue-5.8/series @@ -109,3 +109,12 @@ i2c-meson-fix-clock-setting-overwrite.patch-20597 afs-fix-deadlock-between-writeback-and-truncate.patch perf-fix-task_function_call-error-handling.patch mmc-core-don-t-set-limits.discard_granularity-as-0.patch +mm-validate-inode-in-mapping_set_error.patch +mm-khugepaged-recalculate-min_free_kbytes-after-memory-hotplug-as-expected-by-khugepaged.patch +tcp-fix-receive-window-update-in-tcp_add_backlog.patch +netlink-fix-policy-dump-leak.patch +net-core-check-length-before-updating-ethertype-in-skb_mpls_-push-pop.patch +i2c-imx-fix-reset-of-i2sr_ial-flag.patch +net-bridge-fdb-don-t-flush-ext_learn-entries.patch +net-tls-race-causes-kernel-panic.patch +net-mlx5e-fix-driver-s-declaration-to-support-gre-offload.patch diff --git a/queue-5.8/tcp-fix-receive-window-update-in-tcp_add_backlog.patch b/queue-5.8/tcp-fix-receive-window-update-in-tcp_add_backlog.patch new file mode 100644 index 00000000000..21445ae254c --- /dev/null +++ b/queue-5.8/tcp-fix-receive-window-update-in-tcp_add_backlog.patch @@ -0,0 +1,89 @@ +From 86bccd0367130f481ca99ba91de1c6a5aa1c78c1 Mon Sep 17 00:00:00 2001 +From: Eric Dumazet +Date: Mon, 5 Oct 2020 06:48:13 -0700 +Subject: tcp: fix receive window update in tcp_add_backlog() + +From: Eric Dumazet + +commit 86bccd0367130f481ca99ba91de1c6a5aa1c78c1 upstream. + +We got reports from GKE customers flows being reset by netfilter +conntrack unless nf_conntrack_tcp_be_liberal is set to 1. + +Traces seemed to suggest ACK packet being dropped by the +packet capture, or more likely that ACK were received in the +wrong order. + + wscale=7, SYN and SYNACK not shown here. + + This ACK allows the sender to send 1871*128 bytes from seq 51359321 : + New right edge of the window -> 51359321+1871*128=51598809 + + 09:17:23.389210 IP A > B: Flags [.], ack 51359321, win 1871, options [nop,nop,TS val 10 ecr 999], length 0 + + 09:17:23.389212 IP B > A: Flags [.], seq 51422681:51424089, ack 1577, win 268, options [nop,nop,TS val 999 ecr 10], length 1408 + 09:17:23.389214 IP A > B: Flags [.], ack 51422681, win 1376, options [nop,nop,TS val 10 ecr 999], length 0 + 09:17:23.389253 IP B > A: Flags [.], seq 51424089:51488857, ack 1577, win 268, options [nop,nop,TS val 999 ecr 10], length 64768 + 09:17:23.389272 IP A > B: Flags [.], ack 51488857, win 859, options [nop,nop,TS val 10 ecr 999], length 0 + 09:17:23.389275 IP B > A: Flags [.], seq 51488857:51521241, ack 1577, win 268, options [nop,nop,TS val 999 ecr 10], length 32384 + + Receiver now allows to send 606*128=77568 from seq 51521241 : + New right edge of the window -> 51521241+606*128=51598809 + + 09:17:23.389296 IP A > B: Flags [.], ack 51521241, win 606, options [nop,nop,TS val 10 ecr 999], length 0 + + 09:17:23.389308 IP B > A: Flags [.], seq 51521241:51553625, ack 1577, win 268, options [nop,nop,TS val 999 ecr 10], length 32384 + + It seems the sender exceeds RWIN allowance, since 51611353 > 51598809 + + 09:17:23.389346 IP B > A: Flags [.], seq 51553625:51611353, ack 1577, win 268, options [nop,nop,TS val 999 ecr 10], length 57728 + 09:17:23.389356 IP B > A: Flags [.], seq 51611353:51618393, ack 1577, win 268, options [nop,nop,TS val 999 ecr 10], length 7040 + + 09:17:23.389367 IP A > B: Flags [.], ack 51611353, win 0, options [nop,nop,TS val 10 ecr 999], length 0 + + netfilter conntrack is not happy and sends RST + + 09:17:23.389389 IP A > B: Flags [R], seq 92176528, win 0, length 0 + 09:17:23.389488 IP B > A: Flags [R], seq 174478967, win 0, length 0 + + Now imagine ACK were delivered out of order and tcp_add_backlog() sets window based on wrong packet. + New right edge of the window -> 51521241+859*128=51631193 + +Normally TCP stack handles OOO packets just fine, but it +turns out tcp_add_backlog() does not. It can update the window +field of the aggregated packet even if the ACK sequence +of the last received packet is too old. + +Many thanks to Alexandre Ferrieux for independently reporting the issue +and suggesting a fix. + +Fixes: 4f693b55c3d2 ("tcp: implement coalescing on backlog queue") +Signed-off-by: Eric Dumazet +Reported-by: Alexandre Ferrieux +Acked-by: Soheil Hassas Yeganeh +Acked-by: Neal Cardwell +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman + +--- + net/ipv4/tcp_ipv4.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +--- a/net/ipv4/tcp_ipv4.c ++++ b/net/ipv4/tcp_ipv4.c +@@ -1787,12 +1787,12 @@ bool tcp_add_backlog(struct sock *sk, st + + __skb_pull(skb, hdrlen); + if (skb_try_coalesce(tail, skb, &fragstolen, &delta)) { +- thtail->window = th->window; +- + TCP_SKB_CB(tail)->end_seq = TCP_SKB_CB(skb)->end_seq; + +- if (after(TCP_SKB_CB(skb)->ack_seq, TCP_SKB_CB(tail)->ack_seq)) ++ if (likely(!before(TCP_SKB_CB(skb)->ack_seq, TCP_SKB_CB(tail)->ack_seq))) { + TCP_SKB_CB(tail)->ack_seq = TCP_SKB_CB(skb)->ack_seq; ++ thtail->window = th->window; ++ } + + /* We have to update both TCP_SKB_CB(tail)->tcp_flags and + * thtail->fin, so that the fast path in tcp_rcv_established()