From: Greg Kroah-Hartman Date: Mon, 9 Dec 2019 18:17:30 +0000 (+0100) Subject: 4.19-stable patches X-Git-Tag: v5.4.3~47 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=0677ac08f48505477321e0131e9b851013119ac8;p=thirdparty%2Fkernel%2Fstable-queue.git 4.19-stable patches added patches: cifs-fix-null-pointer-dereference-in-smb2_push_mandatory_locks.patch cifs-fix-smb2-oplock-break-processing.patch x86-mm-32-sync-only-to-vmalloc_end-in-vmalloc_sync_all.patch x86-pci-avoid-amd-fch-xhci-usb-pme-from-d0-defect.patch xfrm-interface-avoid-corruption-on-changelink.patch xfrm-interface-fix-list-corruption-for-x-netns.patch xfrm-interface-fix-management-of-phydev.patch xfrm-interface-fix-memory-leak-on-creation.patch --- diff --git a/queue-4.19/cifs-fix-null-pointer-dereference-in-smb2_push_mandatory_locks.patch b/queue-4.19/cifs-fix-null-pointer-dereference-in-smb2_push_mandatory_locks.patch new file mode 100644 index 00000000000..71b393ae7f3 --- /dev/null +++ b/queue-4.19/cifs-fix-null-pointer-dereference-in-smb2_push_mandatory_locks.patch @@ -0,0 +1,72 @@ +From 6f582b273ec23332074d970a7fb25bef835df71f Mon Sep 17 00:00:00 2001 +From: Pavel Shilovsky +Date: Wed, 27 Nov 2019 16:18:39 -0800 +Subject: CIFS: Fix NULL-pointer dereference in smb2_push_mandatory_locks + +From: Pavel Shilovsky + +commit 6f582b273ec23332074d970a7fb25bef835df71f upstream. + +Currently when the client creates a cifsFileInfo structure for +a newly opened file, it allocates a list of byte-range locks +with a pointer to the new cfile and attaches this list to the +inode's lock list. The latter happens before initializing all +other fields, e.g. cfile->tlink. Thus a partially initialized +cifsFileInfo structure becomes available to other threads that +walk through the inode's lock list. One example of such a thread +may be an oplock break worker thread that tries to push all +cached byte-range locks. This causes NULL-pointer dereference +in smb2_push_mandatory_locks() when accessing cfile->tlink: + +[598428.945633] BUG: kernel NULL pointer dereference, address: 0000000000000038 +... +[598428.945749] Workqueue: cifsoplockd cifs_oplock_break [cifs] +[598428.945793] RIP: 0010:smb2_push_mandatory_locks+0xd6/0x5a0 [cifs] +... +[598428.945834] Call Trace: +[598428.945870] ? cifs_revalidate_mapping+0x45/0x90 [cifs] +[598428.945901] cifs_oplock_break+0x13d/0x450 [cifs] +[598428.945909] process_one_work+0x1db/0x380 +[598428.945914] worker_thread+0x4d/0x400 +[598428.945921] kthread+0x104/0x140 +[598428.945925] ? process_one_work+0x380/0x380 +[598428.945931] ? kthread_park+0x80/0x80 +[598428.945937] ret_from_fork+0x35/0x40 + +Fix this by reordering initialization steps of the cifsFileInfo +structure: initialize all the fields first and then add the new +byte-range lock list to the inode's lock list. + +Cc: Stable +Signed-off-by: Pavel Shilovsky +Reviewed-by: Aurelien Aptel +Signed-off-by: Steve French +Signed-off-by: Greg Kroah-Hartman + +--- + fs/cifs/file.c | 7 ++++--- + 1 file changed, 4 insertions(+), 3 deletions(-) + +--- a/fs/cifs/file.c ++++ b/fs/cifs/file.c +@@ -312,9 +312,6 @@ cifs_new_fileinfo(struct cifs_fid *fid, + INIT_LIST_HEAD(&fdlocks->locks); + fdlocks->cfile = cfile; + cfile->llist = fdlocks; +- cifs_down_write(&cinode->lock_sem); +- list_add(&fdlocks->llist, &cinode->llist); +- up_write(&cinode->lock_sem); + + cfile->count = 1; + cfile->pid = current->tgid; +@@ -338,6 +335,10 @@ cifs_new_fileinfo(struct cifs_fid *fid, + oplock = 0; + } + ++ cifs_down_write(&cinode->lock_sem); ++ list_add(&fdlocks->llist, &cinode->llist); ++ up_write(&cinode->lock_sem); ++ + spin_lock(&tcon->open_file_lock); + if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock) + oplock = fid->pending_open->oplock; diff --git a/queue-4.19/cifs-fix-smb2-oplock-break-processing.patch b/queue-4.19/cifs-fix-smb2-oplock-break-processing.patch new file mode 100644 index 00000000000..1abd7cf7add --- /dev/null +++ b/queue-4.19/cifs-fix-smb2-oplock-break-processing.patch @@ -0,0 +1,67 @@ +From fa9c2362497fbd64788063288dc4e74daf977ebb Mon Sep 17 00:00:00 2001 +From: Pavel Shilovsky +Date: Thu, 31 Oct 2019 14:18:57 -0700 +Subject: CIFS: Fix SMB2 oplock break processing + +From: Pavel Shilovsky + +commit fa9c2362497fbd64788063288dc4e74daf977ebb upstream. + +Even when mounting modern protocol version the server may be +configured without supporting SMB2.1 leases and the client +uses SMB2 oplock to optimize IO performance through local caching. + +However there is a problem in oplock break handling that leads +to missing a break notification on the client who has a file +opened. It latter causes big latencies to other clients that +are trying to open the same file. + +The problem reproduces when there are multiple shares from the +same server mounted on the client. The processing code tries to +match persistent and volatile file ids from the break notification +with an open file but it skips all share besides the first one. +Fix this by looking up in all shares belonging to the server that +issued the oplock break. + +Cc: Stable +Signed-off-by: Pavel Shilovsky +Signed-off-by: Steve French +Signed-off-by: Greg Kroah-Hartman + +--- + fs/cifs/smb2misc.c | 7 +++---- + 1 file changed, 3 insertions(+), 4 deletions(-) + +--- a/fs/cifs/smb2misc.c ++++ b/fs/cifs/smb2misc.c +@@ -673,10 +673,10 @@ smb2_is_valid_oplock_break(char *buffer, + spin_lock(&cifs_tcp_ses_lock); + list_for_each(tmp, &server->smb_ses_list) { + ses = list_entry(tmp, struct cifs_ses, smb_ses_list); ++ + list_for_each(tmp1, &ses->tcon_list) { + tcon = list_entry(tmp1, struct cifs_tcon, tcon_list); + +- cifs_stats_inc(&tcon->stats.cifs_stats.num_oplock_brks); + spin_lock(&tcon->open_file_lock); + list_for_each(tmp2, &tcon->openFileList) { + cfile = list_entry(tmp2, struct cifsFileInfo, +@@ -688,6 +688,8 @@ smb2_is_valid_oplock_break(char *buffer, + continue; + + cifs_dbg(FYI, "file id match, oplock break\n"); ++ cifs_stats_inc( ++ &tcon->stats.cifs_stats.num_oplock_brks); + cinode = CIFS_I(d_inode(cfile->dentry)); + spin_lock(&cfile->file_info_lock); + if (!CIFS_CACHE_WRITE(cinode) && +@@ -720,9 +722,6 @@ smb2_is_valid_oplock_break(char *buffer, + return true; + } + spin_unlock(&tcon->open_file_lock); +- spin_unlock(&cifs_tcp_ses_lock); +- cifs_dbg(FYI, "No matching file for oplock break\n"); +- return true; + } + } + spin_unlock(&cifs_tcp_ses_lock); diff --git a/queue-4.19/series b/queue-4.19/series index 84ec01c5eff..a4260a59949 100644 --- a/queue-4.19/series +++ b/queue-4.19/series @@ -212,3 +212,11 @@ input-synaptics-rmi4-don-t-increment-rmiaddr-for-smbus-transfers.patch input-goodix-add-upside-down-quirk-for-teclast-x89-tablet.patch coresight-etm4x-fix-input-validation-for-sysfs.patch input-fix-memory-leak-in-psxpad_spi_probe.patch +x86-mm-32-sync-only-to-vmalloc_end-in-vmalloc_sync_all.patch +x86-pci-avoid-amd-fch-xhci-usb-pme-from-d0-defect.patch +xfrm-interface-fix-memory-leak-on-creation.patch +xfrm-interface-avoid-corruption-on-changelink.patch +xfrm-interface-fix-list-corruption-for-x-netns.patch +xfrm-interface-fix-management-of-phydev.patch +cifs-fix-null-pointer-dereference-in-smb2_push_mandatory_locks.patch +cifs-fix-smb2-oplock-break-processing.patch diff --git a/queue-4.19/x86-mm-32-sync-only-to-vmalloc_end-in-vmalloc_sync_all.patch b/queue-4.19/x86-mm-32-sync-only-to-vmalloc_end-in-vmalloc_sync_all.patch new file mode 100644 index 00000000000..87c3eb6e3d5 --- /dev/null +++ b/queue-4.19/x86-mm-32-sync-only-to-vmalloc_end-in-vmalloc_sync_all.patch @@ -0,0 +1,93 @@ +From 9a62d20027da3164a22244d9f022c0c987261687 Mon Sep 17 00:00:00 2001 +From: Joerg Roedel +Date: Tue, 26 Nov 2019 11:09:42 +0100 +Subject: x86/mm/32: Sync only to VMALLOC_END in vmalloc_sync_all() + +From: Joerg Roedel + +commit 9a62d20027da3164a22244d9f022c0c987261687 upstream. + +The job of vmalloc_sync_all() is to help the lazy freeing of vmalloc() +ranges: before such vmap ranges are reused we make sure that they are +unmapped from every task's page tables. + +This is really easy on pagetable setups where the kernel page tables +are shared between all tasks - this is the case on 32-bit kernels +with SHARED_KERNEL_PMD = 1. + +But on !SHARED_KERNEL_PMD 32-bit kernels this involves iterating +over the pgd_list and clearing all pmd entries in the pgds that +are cleared in the init_mm.pgd, which is the reference pagetable +that the vmalloc() code uses. + +In that context the current practice of vmalloc_sync_all() iterating +until FIX_ADDR_TOP is buggy: + + for (address = VMALLOC_START & PMD_MASK; + address >= TASK_SIZE_MAX && address < FIXADDR_TOP; + address += PMD_SIZE) { + struct page *page; + +Because iterating up to FIXADDR_TOP will involve a lot of non-vmalloc +address ranges: + + VMALLOC -> PKMAP -> LDT -> CPU_ENTRY_AREA -> FIX_ADDR + +This is mostly harmless for the FIX_ADDR and CPU_ENTRY_AREA ranges +that don't clear their pmds, but it's lethal for the LDT range, +which relies on having different mappings in different processes, +and 'synchronizing' them in the vmalloc sense corrupts those +pagetable entries (clearing them). + +This got particularly prominent with PTI, which turns SHARED_KERNEL_PMD +off and makes this the dominant mapping mode on 32-bit. + +To make LDT working again vmalloc_sync_all() must only iterate over +the volatile parts of the kernel address range that are identical +between all processes. + +So the correct check in vmalloc_sync_all() is "address < VMALLOC_END" +to make sure the VMALLOC areas are synchronized and the LDT +mapping is not falsely overwritten. + +The CPU_ENTRY_AREA and the FIXMAP area are no longer synced either, +but this is not really a proplem since their PMDs get established +during bootup and never change. + +This change fixes the ldt_gdt selftest in my setup. + +[ mingo: Fixed up the changelog to explain the logic and modified the + copying to only happen up until VMALLOC_END. ] + +Reported-by: Borislav Petkov +Tested-by: Borislav Petkov +Signed-off-by: Joerg Roedel +Cc: +Cc: Andy Lutomirski +Cc: Borislav Petkov +Cc: Dave Hansen +Cc: Joerg Roedel +Cc: Linus Torvalds +Cc: Peter Zijlstra +Cc: Thomas Gleixner +Cc: hpa@zytor.com +Fixes: 7757d607c6b3: ("x86/pti: Allow CONFIG_PAGE_TABLE_ISOLATION for x86_32") +Link: https://lkml.kernel.org/r/20191126111119.GA110513@gmail.com +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/mm/fault.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/arch/x86/mm/fault.c ++++ b/arch/x86/mm/fault.c +@@ -281,7 +281,7 @@ void vmalloc_sync_all(void) + return; + + for (address = VMALLOC_START & PMD_MASK; +- address >= TASK_SIZE_MAX && address < FIXADDR_TOP; ++ address >= TASK_SIZE_MAX && address < VMALLOC_END; + address += PMD_SIZE) { + struct page *page; + diff --git a/queue-4.19/x86-pci-avoid-amd-fch-xhci-usb-pme-from-d0-defect.patch b/queue-4.19/x86-pci-avoid-amd-fch-xhci-usb-pme-from-d0-defect.patch new file mode 100644 index 00000000000..fdcd06124de --- /dev/null +++ b/queue-4.19/x86-pci-avoid-amd-fch-xhci-usb-pme-from-d0-defect.patch @@ -0,0 +1,53 @@ +From 7e8ce0e2b036dbc6617184317983aea4f2c52099 Mon Sep 17 00:00:00 2001 +From: Kai-Heng Feng +Date: Mon, 2 Sep 2019 22:52:52 +0800 +Subject: x86/PCI: Avoid AMD FCH XHCI USB PME# from D0 defect + +From: Kai-Heng Feng + +commit 7e8ce0e2b036dbc6617184317983aea4f2c52099 upstream. + +The AMD FCH USB XHCI Controller advertises support for generating PME# +while in D0. When in D0, it does signal PME# for USB 3.0 connect events, +but not for USB 2.0 or USB 1.1 connect events, which means the controller +doesn't wake correctly for those events. + + 00:10.0 USB controller [0c03]: Advanced Micro Devices, Inc. [AMD] FCH USB XHCI Controller [1022:7914] (rev 20) (prog-if 30 [XHCI]) + Subsystem: Dell FCH USB XHCI Controller [1028:087e] + Capabilities: [50] Power Management version 3 + Flags: PMEClk- DSI- D1- D2- AuxCurrent=0mA PME(D0+,D1-,D2-,D3hot+,D3cold+) + +Clear PCI_PM_CAP_PME_D0 in dev->pme_support to indicate the device will not +assert PME# from D0 so we don't rely on it. + +Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=203673 +Link: https://lore.kernel.org/r/20190902145252.32111-1-kai.heng.feng@canonical.com +Signed-off-by: Kai-Heng Feng +Signed-off-by: Bjorn Helgaas +Cc: stable@vger.kernel.org +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/pci/fixup.c | 11 +++++++++++ + 1 file changed, 11 insertions(+) + +--- a/arch/x86/pci/fixup.c ++++ b/arch/x86/pci/fixup.c +@@ -589,6 +589,17 @@ static void pci_fixup_amd_ehci_pme(struc + DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, 0x7808, pci_fixup_amd_ehci_pme); + + /* ++ * Device [1022:7914] ++ * When in D0, PME# doesn't get asserted when plugging USB 2.0 device. ++ */ ++static void pci_fixup_amd_fch_xhci_pme(struct pci_dev *dev) ++{ ++ dev_info(&dev->dev, "PME# does not work under D0, disabling it\n"); ++ dev->pme_support &= ~(PCI_PM_CAP_PME_D0 >> PCI_PM_CAP_PME_SHIFT); ++} ++DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, 0x7914, pci_fixup_amd_fch_xhci_pme); ++ ++/* + * Apple MacBook Pro: Avoid [mem 0x7fa00000-0x7fbfffff] + * + * Using the [mem 0x7fa00000-0x7fbfffff] region, e.g., by assigning it to diff --git a/queue-4.19/xfrm-interface-avoid-corruption-on-changelink.patch b/queue-4.19/xfrm-interface-avoid-corruption-on-changelink.patch new file mode 100644 index 00000000000..a048c67d5f2 --- /dev/null +++ b/queue-4.19/xfrm-interface-avoid-corruption-on-changelink.patch @@ -0,0 +1,62 @@ +From e9e7e85d75f3731079ffd77c1a66f037aef04fe7 Mon Sep 17 00:00:00 2001 +From: Nicolas Dichtel +Date: Mon, 15 Jul 2019 12:00:20 +0200 +Subject: xfrm interface: avoid corruption on changelink + +From: Nicolas Dichtel + +commit e9e7e85d75f3731079ffd77c1a66f037aef04fe7 upstream. + +The new parameters must not be stored in the netdev_priv() before +validation, it may corrupt the interface. Note also that if data is NULL, +only a memset() is done. + +$ ip link add xfrm1 type xfrm dev lo if_id 1 +$ ip link add xfrm2 type xfrm dev lo if_id 2 +$ ip link set xfrm1 type xfrm dev lo if_id 2 +RTNETLINK answers: File exists +$ ip -d link list dev xfrm1 +5: xfrm1@lo: mtu 1500 qdisc noop state DOWN mode DEFAULT group default qlen 1000 + link/none 00:00:00:00:00:00 brd 00:00:00:00:00:00 promiscuity 0 minmtu 68 maxmtu 1500 + xfrm if_id 0x2 addrgenmode eui64 numtxqueues 1 numrxqueues 1 gso_max_size 65536 gso_max_segs 65535 + +=> "if_id 0x2" + +Fixes: f203b76d7809 ("xfrm: Add virtual xfrm interfaces") +Signed-off-by: Nicolas Dichtel +Tested-by: Julien Floret +Signed-off-by: Steffen Klassert +Signed-off-by: Greg Kroah-Hartman + +--- + net/xfrm/xfrm_interface.c | 10 +++++----- + 1 file changed, 5 insertions(+), 5 deletions(-) + +--- a/net/xfrm/xfrm_interface.c ++++ b/net/xfrm/xfrm_interface.c +@@ -674,12 +674,12 @@ static int xfrmi_changelink(struct net_d + struct nlattr *data[], + struct netlink_ext_ack *extack) + { +- struct xfrm_if *xi = netdev_priv(dev); + struct net *net = dev_net(dev); ++ struct xfrm_if_parms p; ++ struct xfrm_if *xi; + +- xfrmi_netlink_parms(data, &xi->p); +- +- xi = xfrmi_locate(net, &xi->p); ++ xfrmi_netlink_parms(data, &p); ++ xi = xfrmi_locate(net, &p); + if (!xi) { + xi = netdev_priv(dev); + } else { +@@ -687,7 +687,7 @@ static int xfrmi_changelink(struct net_d + return -EEXIST; + } + +- return xfrmi_update(xi, &xi->p); ++ return xfrmi_update(xi, &p); + } + + static size_t xfrmi_get_size(const struct net_device *dev) diff --git a/queue-4.19/xfrm-interface-fix-list-corruption-for-x-netns.patch b/queue-4.19/xfrm-interface-fix-list-corruption-for-x-netns.patch new file mode 100644 index 00000000000..31d343d717f --- /dev/null +++ b/queue-4.19/xfrm-interface-fix-list-corruption-for-x-netns.patch @@ -0,0 +1,62 @@ +From c5d1030f23002430c2a336b2b629b9d6f72b3564 Mon Sep 17 00:00:00 2001 +From: Nicolas Dichtel +Date: Mon, 15 Jul 2019 12:00:22 +0200 +Subject: xfrm interface: fix list corruption for x-netns + +From: Nicolas Dichtel + +commit c5d1030f23002430c2a336b2b629b9d6f72b3564 upstream. + +dev_net(dev) is the netns of the device and xi->net is the link netns, +where the device has been linked. +changelink() must operate in the link netns to avoid a corruption of +the xfrm lists. + +Note that xi->net and dev_net(xi->physdev) are always the same. + +Before the patch, the xfrmi lists may be corrupted and can later trigger a +kernel panic. + +Fixes: f203b76d7809 ("xfrm: Add virtual xfrm interfaces") +Reported-by: Julien Floret +Signed-off-by: Nicolas Dichtel +Tested-by: Julien Floret +Signed-off-by: Steffen Klassert +Signed-off-by: Greg Kroah-Hartman + +--- + net/xfrm/xfrm_interface.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +--- a/net/xfrm/xfrm_interface.c ++++ b/net/xfrm/xfrm_interface.c +@@ -505,7 +505,7 @@ static int xfrmi_change(struct xfrm_if * + + static int xfrmi_update(struct xfrm_if *xi, struct xfrm_if_parms *p) + { +- struct net *net = dev_net(xi->dev); ++ struct net *net = xi->net; + struct xfrmi_net *xfrmn = net_generic(net, xfrmi_net_id); + int err; + +@@ -674,9 +674,9 @@ static int xfrmi_changelink(struct net_d + struct nlattr *data[], + struct netlink_ext_ack *extack) + { +- struct net *net = dev_net(dev); ++ struct xfrm_if *xi = netdev_priv(dev); ++ struct net *net = xi->net; + struct xfrm_if_parms p; +- struct xfrm_if *xi; + + xfrmi_netlink_parms(data, &p); + xi = xfrmi_locate(net, &p); +@@ -718,7 +718,7 @@ struct net *xfrmi_get_link_net(const str + { + struct xfrm_if *xi = netdev_priv(dev); + +- return dev_net(xi->phydev); ++ return xi->net; + } + + static const struct nla_policy xfrmi_policy[IFLA_XFRM_MAX + 1] = { diff --git a/queue-4.19/xfrm-interface-fix-management-of-phydev.patch b/queue-4.19/xfrm-interface-fix-management-of-phydev.patch new file mode 100644 index 00000000000..ffa35fceae1 --- /dev/null +++ b/queue-4.19/xfrm-interface-fix-management-of-phydev.patch @@ -0,0 +1,125 @@ +From 22d6552f827ef76ade3edf6bbb3f05048a0a7d8b Mon Sep 17 00:00:00 2001 +From: Nicolas Dichtel +Date: Mon, 15 Jul 2019 12:00:23 +0200 +Subject: xfrm interface: fix management of phydev + +From: Nicolas Dichtel + +commit 22d6552f827ef76ade3edf6bbb3f05048a0a7d8b upstream. + +With the current implementation, phydev cannot be removed: + +$ ip link add dummy type dummy +$ ip link add xfrm1 type xfrm dev dummy if_id 1 +$ ip l d dummy + kernel:[77938.465445] unregister_netdevice: waiting for dummy to become free. Usage count = 1 + +Manage it like in ip tunnels, ie just keep the ifindex. Not that the side +effect, is that the phydev is now optional. + +Fixes: f203b76d7809 ("xfrm: Add virtual xfrm interfaces") +Signed-off-by: Nicolas Dichtel +Tested-by: Julien Floret +Signed-off-by: Steffen Klassert +Signed-off-by: Greg Kroah-Hartman + +--- + include/net/xfrm.h | 1 - + net/xfrm/xfrm_interface.c | 34 ++++++++++++++++++---------------- + 2 files changed, 18 insertions(+), 17 deletions(-) + +--- a/include/net/xfrm.h ++++ b/include/net/xfrm.h +@@ -1062,7 +1062,6 @@ struct xfrm_if_parms { + struct xfrm_if { + struct xfrm_if __rcu *next; /* next interface in list */ + struct net_device *dev; /* virtual device associated with interface */ +- struct net_device *phydev; /* physical device */ + struct net *net; /* netns for packet i/o */ + struct xfrm_if_parms p; /* interface parms */ + +--- a/net/xfrm/xfrm_interface.c ++++ b/net/xfrm/xfrm_interface.c +@@ -177,7 +177,6 @@ static void xfrmi_dev_uninit(struct net_ + struct xfrmi_net *xfrmn = net_generic(xi->net, xfrmi_net_id); + + xfrmi_unlink(xfrmn, xi); +- dev_put(xi->phydev); + dev_put(dev); + } + +@@ -364,7 +363,7 @@ static netdev_tx_t xfrmi_xmit(struct sk_ + goto tx_err; + } + +- fl.flowi_oif = xi->phydev->ifindex; ++ fl.flowi_oif = xi->p.link; + + ret = xfrmi_xmit2(skb, dev, &fl); + if (ret < 0) +@@ -553,7 +552,7 @@ static int xfrmi_get_iflink(const struct + { + struct xfrm_if *xi = netdev_priv(dev); + +- return xi->phydev->ifindex; ++ return xi->p.link; + } + + +@@ -579,12 +578,14 @@ static void xfrmi_dev_setup(struct net_d + dev->needs_free_netdev = true; + dev->priv_destructor = xfrmi_dev_free; + netif_keep_dst(dev); ++ ++ eth_broadcast_addr(dev->broadcast); + } + + static int xfrmi_dev_init(struct net_device *dev) + { + struct xfrm_if *xi = netdev_priv(dev); +- struct net_device *phydev = xi->phydev; ++ struct net_device *phydev = __dev_get_by_index(xi->net, xi->p.link); + int err; + + dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); +@@ -599,13 +600,19 @@ static int xfrmi_dev_init(struct net_dev + + dev->features |= NETIF_F_LLTX; + +- dev->needed_headroom = phydev->needed_headroom; +- dev->needed_tailroom = phydev->needed_tailroom; +- +- if (is_zero_ether_addr(dev->dev_addr)) +- eth_hw_addr_inherit(dev, phydev); +- if (is_zero_ether_addr(dev->broadcast)) +- memcpy(dev->broadcast, phydev->broadcast, dev->addr_len); ++ if (phydev) { ++ dev->needed_headroom = phydev->needed_headroom; ++ dev->needed_tailroom = phydev->needed_tailroom; ++ ++ if (is_zero_ether_addr(dev->dev_addr)) ++ eth_hw_addr_inherit(dev, phydev); ++ if (is_zero_ether_addr(dev->broadcast)) ++ memcpy(dev->broadcast, phydev->broadcast, ++ dev->addr_len); ++ } else { ++ eth_hw_addr_random(dev); ++ eth_broadcast_addr(dev->broadcast); ++ } + + return 0; + } +@@ -655,13 +662,8 @@ static int xfrmi_newlink(struct net *src + xi->p = p; + xi->net = net; + xi->dev = dev; +- xi->phydev = dev_get_by_index(net, p.link); +- if (!xi->phydev) +- return -ENODEV; + + err = xfrmi_create(dev); +- if (err < 0) +- dev_put(xi->phydev); + return err; + } + diff --git a/queue-4.19/xfrm-interface-fix-memory-leak-on-creation.patch b/queue-4.19/xfrm-interface-fix-memory-leak-on-creation.patch new file mode 100644 index 00000000000..303c3efbed8 --- /dev/null +++ b/queue-4.19/xfrm-interface-fix-memory-leak-on-creation.patch @@ -0,0 +1,221 @@ +From 56c5ee1a5823e9cf5288b84ae6364cb4112f8225 Mon Sep 17 00:00:00 2001 +From: Nicolas Dichtel +Date: Tue, 2 Jul 2019 17:51:39 +0200 +Subject: xfrm interface: fix memory leak on creation + +From: Nicolas Dichtel + +commit 56c5ee1a5823e9cf5288b84ae6364cb4112f8225 upstream. + +The following commands produce a backtrace and return an error but the xfrm +interface is created (in the wrong netns): +$ ip netns add foo +$ ip netns add bar +$ ip -n foo netns set bar 0 +$ ip -n foo link add xfrmi0 link-netnsid 0 type xfrm dev lo if_id 23 +RTNETLINK answers: Invalid argument +$ ip -n bar link ls xfrmi0 +2: xfrmi0@lo: mtu 1500 qdisc noop state DOWN mode DEFAULT group default qlen 1000 + link/none 00:00:00:00:00:00 brd 00:00:00:00:00:00 + +Here is the backtrace: +[ 79.879174] WARNING: CPU: 0 PID: 1178 at net/core/dev.c:8172 rollback_registered_many+0x86/0x3c1 +[ 79.880260] Modules linked in: xfrm_interface nfsv3 nfs_acl auth_rpcgss nfsv4 nfs lockd grace sunrpc fscache button parport_pc parport serio_raw evdev pcspkr loop ext4 crc16 mbcache jbd2 crc32c_generic ide_cd_mod ide_gd_mod cdrom ata_$ +eneric ata_piix libata scsi_mod 8139too piix psmouse i2c_piix4 ide_core 8139cp mii i2c_core floppy +[ 79.883698] CPU: 0 PID: 1178 Comm: ip Not tainted 5.2.0-rc6+ #106 +[ 79.884462] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1 04/01/2014 +[ 79.885447] RIP: 0010:rollback_registered_many+0x86/0x3c1 +[ 79.886120] Code: 01 e8 d7 7d c6 ff 0f 0b 48 8b 45 00 4c 8b 20 48 8d 58 90 49 83 ec 70 48 8d 7b 70 48 39 ef 74 44 8a 83 d0 04 00 00 84 c0 75 1f <0f> 0b e8 61 cd ff ff 48 b8 00 01 00 00 00 00 ad de 48 89 43 70 66 +[ 79.888667] RSP: 0018:ffffc900015ab740 EFLAGS: 00010246 +[ 79.889339] RAX: ffff8882353e5700 RBX: ffff8882353e56a0 RCX: ffff8882353e5710 +[ 79.890174] RDX: ffffc900015ab7e0 RSI: ffffc900015ab7e0 RDI: ffff8882353e5710 +[ 79.891029] RBP: ffffc900015ab7e0 R08: ffffc900015ab7e0 R09: ffffc900015ab7e0 +[ 79.891866] R10: ffffc900015ab7a0 R11: ffffffff82233fec R12: ffffc900015ab770 +[ 79.892728] R13: ffffffff81eb7ec0 R14: ffff88822ed6cf00 R15: 00000000ffffffea +[ 79.893557] FS: 00007ff350f31740(0000) GS:ffff888237a00000(0000) knlGS:0000000000000000 +[ 79.894581] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +[ 79.895317] CR2: 00000000006c8580 CR3: 000000022c272000 CR4: 00000000000006f0 +[ 79.896137] Call Trace: +[ 79.896464] unregister_netdevice_many+0x12/0x6c +[ 79.896998] __rtnl_newlink+0x6e2/0x73b +[ 79.897446] ? __kmalloc_node_track_caller+0x15e/0x185 +[ 79.898039] ? pskb_expand_head+0x5f/0x1fe +[ 79.898556] ? stack_access_ok+0xd/0x2c +[ 79.899009] ? deref_stack_reg+0x12/0x20 +[ 79.899462] ? stack_access_ok+0xd/0x2c +[ 79.899927] ? stack_access_ok+0xd/0x2c +[ 79.900404] ? __module_text_address+0x9/0x4f +[ 79.900910] ? is_bpf_text_address+0x5/0xc +[ 79.901390] ? kernel_text_address+0x67/0x7b +[ 79.901884] ? __kernel_text_address+0x1a/0x25 +[ 79.902397] ? unwind_get_return_address+0x12/0x23 +[ 79.903122] ? __cmpxchg_double_slab.isra.37+0x46/0x77 +[ 79.903772] rtnl_newlink+0x43/0x56 +[ 79.904217] rtnetlink_rcv_msg+0x200/0x24c + +In fact, each time a xfrm interface was created, a netdev was allocated +by __rtnl_newlink()/rtnl_create_link() and then another one by +xfrmi_newlink()/xfrmi_create(). Only the second one was registered, it's +why the previous commands produce a backtrace: dev_change_net_namespace() +was called on a netdev with reg_state set to NETREG_UNINITIALIZED (the +first one). + +CC: Lorenzo Colitti +CC: Benedict Wong +CC: Steffen Klassert +CC: Shannon Nelson +CC: Antony Antony +CC: Eyal Birger +Fixes: f203b76d7809 ("xfrm: Add virtual xfrm interfaces") +Reported-by: Julien Floret +Signed-off-by: Nicolas Dichtel +Signed-off-by: Steffen Klassert +Signed-off-by: Greg Kroah-Hartman + +--- + net/xfrm/xfrm_interface.c | 98 +++++++++++++--------------------------------- + 1 file changed, 28 insertions(+), 70 deletions(-) + +--- a/net/xfrm/xfrm_interface.c ++++ b/net/xfrm/xfrm_interface.c +@@ -133,7 +133,7 @@ static void xfrmi_dev_free(struct net_de + free_percpu(dev->tstats); + } + +-static int xfrmi_create2(struct net_device *dev) ++static int xfrmi_create(struct net_device *dev) + { + struct xfrm_if *xi = netdev_priv(dev); + struct net *net = dev_net(dev); +@@ -156,54 +156,7 @@ out: + return err; + } + +-static struct xfrm_if *xfrmi_create(struct net *net, struct xfrm_if_parms *p) +-{ +- struct net_device *dev; +- struct xfrm_if *xi; +- char name[IFNAMSIZ]; +- int err; +- +- if (p->name[0]) { +- strlcpy(name, p->name, IFNAMSIZ); +- } else { +- err = -EINVAL; +- goto failed; +- } +- +- dev = alloc_netdev(sizeof(*xi), name, NET_NAME_UNKNOWN, xfrmi_dev_setup); +- if (!dev) { +- err = -EAGAIN; +- goto failed; +- } +- +- dev_net_set(dev, net); +- +- xi = netdev_priv(dev); +- xi->p = *p; +- xi->net = net; +- xi->dev = dev; +- xi->phydev = dev_get_by_index(net, p->link); +- if (!xi->phydev) { +- err = -ENODEV; +- goto failed_free; +- } +- +- err = xfrmi_create2(dev); +- if (err < 0) +- goto failed_dev_put; +- +- return xi; +- +-failed_dev_put: +- dev_put(xi->phydev); +-failed_free: +- free_netdev(dev); +-failed: +- return ERR_PTR(err); +-} +- +-static struct xfrm_if *xfrmi_locate(struct net *net, struct xfrm_if_parms *p, +- int create) ++static struct xfrm_if *xfrmi_locate(struct net *net, struct xfrm_if_parms *p) + { + struct xfrm_if __rcu **xip; + struct xfrm_if *xi; +@@ -211,17 +164,11 @@ static struct xfrm_if *xfrmi_locate(stru + + for (xip = &xfrmn->xfrmi[0]; + (xi = rtnl_dereference(*xip)) != NULL; +- xip = &xi->next) { +- if (xi->p.if_id == p->if_id) { +- if (create) +- return ERR_PTR(-EEXIST); +- ++ xip = &xi->next) ++ if (xi->p.if_id == p->if_id) + return xi; +- } +- } +- if (!create) +- return ERR_PTR(-ENODEV); +- return xfrmi_create(net, p); ++ ++ return NULL; + } + + static void xfrmi_dev_uninit(struct net_device *dev) +@@ -689,21 +636,33 @@ static int xfrmi_newlink(struct net *src + struct netlink_ext_ack *extack) + { + struct net *net = dev_net(dev); +- struct xfrm_if_parms *p; ++ struct xfrm_if_parms p; + struct xfrm_if *xi; ++ int err; + +- xi = netdev_priv(dev); +- p = &xi->p; +- +- xfrmi_netlink_parms(data, p); ++ xfrmi_netlink_parms(data, &p); + + if (!tb[IFLA_IFNAME]) + return -EINVAL; + +- nla_strlcpy(p->name, tb[IFLA_IFNAME], IFNAMSIZ); ++ nla_strlcpy(p.name, tb[IFLA_IFNAME], IFNAMSIZ); + +- xi = xfrmi_locate(net, p, 1); +- return PTR_ERR_OR_ZERO(xi); ++ xi = xfrmi_locate(net, &p); ++ if (xi) ++ return -EEXIST; ++ ++ xi = netdev_priv(dev); ++ xi->p = p; ++ xi->net = net; ++ xi->dev = dev; ++ xi->phydev = dev_get_by_index(net, p.link); ++ if (!xi->phydev) ++ return -ENODEV; ++ ++ err = xfrmi_create(dev); ++ if (err < 0) ++ dev_put(xi->phydev); ++ return err; + } + + static void xfrmi_dellink(struct net_device *dev, struct list_head *head) +@@ -720,9 +679,8 @@ static int xfrmi_changelink(struct net_d + + xfrmi_netlink_parms(data, &xi->p); + +- xi = xfrmi_locate(net, &xi->p, 0); +- +- if (IS_ERR_OR_NULL(xi)) { ++ xi = xfrmi_locate(net, &xi->p); ++ if (!xi) { + xi = netdev_priv(dev); + } else { + if (xi->dev != dev)