]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
4.19-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 9 Dec 2019 18:17:30 +0000 (19:17 +0100)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 9 Dec 2019 18:17:30 +0000 (19:17 +0100)
added patches:
cifs-fix-null-pointer-dereference-in-smb2_push_mandatory_locks.patch
cifs-fix-smb2-oplock-break-processing.patch
x86-mm-32-sync-only-to-vmalloc_end-in-vmalloc_sync_all.patch
x86-pci-avoid-amd-fch-xhci-usb-pme-from-d0-defect.patch
xfrm-interface-avoid-corruption-on-changelink.patch
xfrm-interface-fix-list-corruption-for-x-netns.patch
xfrm-interface-fix-management-of-phydev.patch
xfrm-interface-fix-memory-leak-on-creation.patch

queue-4.19/cifs-fix-null-pointer-dereference-in-smb2_push_mandatory_locks.patch [new file with mode: 0644]
queue-4.19/cifs-fix-smb2-oplock-break-processing.patch [new file with mode: 0644]
queue-4.19/series
queue-4.19/x86-mm-32-sync-only-to-vmalloc_end-in-vmalloc_sync_all.patch [new file with mode: 0644]
queue-4.19/x86-pci-avoid-amd-fch-xhci-usb-pme-from-d0-defect.patch [new file with mode: 0644]
queue-4.19/xfrm-interface-avoid-corruption-on-changelink.patch [new file with mode: 0644]
queue-4.19/xfrm-interface-fix-list-corruption-for-x-netns.patch [new file with mode: 0644]
queue-4.19/xfrm-interface-fix-management-of-phydev.patch [new file with mode: 0644]
queue-4.19/xfrm-interface-fix-memory-leak-on-creation.patch [new file with mode: 0644]

diff --git a/queue-4.19/cifs-fix-null-pointer-dereference-in-smb2_push_mandatory_locks.patch b/queue-4.19/cifs-fix-null-pointer-dereference-in-smb2_push_mandatory_locks.patch
new file mode 100644 (file)
index 0000000..71b393a
--- /dev/null
@@ -0,0 +1,72 @@
+From 6f582b273ec23332074d970a7fb25bef835df71f Mon Sep 17 00:00:00 2001
+From: Pavel Shilovsky <pshilov@microsoft.com>
+Date: Wed, 27 Nov 2019 16:18:39 -0800
+Subject: CIFS: Fix NULL-pointer dereference in smb2_push_mandatory_locks
+
+From: Pavel Shilovsky <pshilov@microsoft.com>
+
+commit 6f582b273ec23332074d970a7fb25bef835df71f upstream.
+
+Currently when the client creates a cifsFileInfo structure for
+a newly opened file, it allocates a list of byte-range locks
+with a pointer to the new cfile and attaches this list to the
+inode's lock list. The latter happens before initializing all
+other fields, e.g. cfile->tlink. Thus a partially initialized
+cifsFileInfo structure becomes available to other threads that
+walk through the inode's lock list. One example of such a thread
+may be an oplock break worker thread that tries to push all
+cached byte-range locks. This causes NULL-pointer dereference
+in smb2_push_mandatory_locks() when accessing cfile->tlink:
+
+[598428.945633] BUG: kernel NULL pointer dereference, address: 0000000000000038
+...
+[598428.945749] Workqueue: cifsoplockd cifs_oplock_break [cifs]
+[598428.945793] RIP: 0010:smb2_push_mandatory_locks+0xd6/0x5a0 [cifs]
+...
+[598428.945834] Call Trace:
+[598428.945870]  ? cifs_revalidate_mapping+0x45/0x90 [cifs]
+[598428.945901]  cifs_oplock_break+0x13d/0x450 [cifs]
+[598428.945909]  process_one_work+0x1db/0x380
+[598428.945914]  worker_thread+0x4d/0x400
+[598428.945921]  kthread+0x104/0x140
+[598428.945925]  ? process_one_work+0x380/0x380
+[598428.945931]  ? kthread_park+0x80/0x80
+[598428.945937]  ret_from_fork+0x35/0x40
+
+Fix this by reordering initialization steps of the cifsFileInfo
+structure: initialize all the fields first and then add the new
+byte-range lock list to the inode's lock list.
+
+Cc: Stable <stable@vger.kernel.org>
+Signed-off-by: Pavel Shilovsky <pshilov@microsoft.com>
+Reviewed-by: Aurelien Aptel <aaptel@suse.com>
+Signed-off-by: Steve French <stfrench@microsoft.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/cifs/file.c |    7 ++++---
+ 1 file changed, 4 insertions(+), 3 deletions(-)
+
+--- a/fs/cifs/file.c
++++ b/fs/cifs/file.c
+@@ -312,9 +312,6 @@ cifs_new_fileinfo(struct cifs_fid *fid,
+       INIT_LIST_HEAD(&fdlocks->locks);
+       fdlocks->cfile = cfile;
+       cfile->llist = fdlocks;
+-      cifs_down_write(&cinode->lock_sem);
+-      list_add(&fdlocks->llist, &cinode->llist);
+-      up_write(&cinode->lock_sem);
+       cfile->count = 1;
+       cfile->pid = current->tgid;
+@@ -338,6 +335,10 @@ cifs_new_fileinfo(struct cifs_fid *fid,
+               oplock = 0;
+       }
++      cifs_down_write(&cinode->lock_sem);
++      list_add(&fdlocks->llist, &cinode->llist);
++      up_write(&cinode->lock_sem);
++
+       spin_lock(&tcon->open_file_lock);
+       if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
+               oplock = fid->pending_open->oplock;
diff --git a/queue-4.19/cifs-fix-smb2-oplock-break-processing.patch b/queue-4.19/cifs-fix-smb2-oplock-break-processing.patch
new file mode 100644 (file)
index 0000000..1abd7cf
--- /dev/null
@@ -0,0 +1,67 @@
+From fa9c2362497fbd64788063288dc4e74daf977ebb Mon Sep 17 00:00:00 2001
+From: Pavel Shilovsky <pshilov@microsoft.com>
+Date: Thu, 31 Oct 2019 14:18:57 -0700
+Subject: CIFS: Fix SMB2 oplock break processing
+
+From: Pavel Shilovsky <pshilov@microsoft.com>
+
+commit fa9c2362497fbd64788063288dc4e74daf977ebb upstream.
+
+Even when mounting modern protocol version the server may be
+configured without supporting SMB2.1 leases and the client
+uses SMB2 oplock to optimize IO performance through local caching.
+
+However there is a problem in oplock break handling that leads
+to missing a break notification on the client who has a file
+opened. It latter causes big latencies to other clients that
+are trying to open the same file.
+
+The problem reproduces when there are multiple shares from the
+same server mounted on the client. The processing code tries to
+match persistent and volatile file ids from the break notification
+with an open file but it skips all share besides the first one.
+Fix this by looking up in all shares belonging to the server that
+issued the oplock break.
+
+Cc: Stable <stable@vger.kernel.org>
+Signed-off-by: Pavel Shilovsky <pshilov@microsoft.com>
+Signed-off-by: Steve French <stfrench@microsoft.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/cifs/smb2misc.c |    7 +++----
+ 1 file changed, 3 insertions(+), 4 deletions(-)
+
+--- a/fs/cifs/smb2misc.c
++++ b/fs/cifs/smb2misc.c
+@@ -673,10 +673,10 @@ smb2_is_valid_oplock_break(char *buffer,
+       spin_lock(&cifs_tcp_ses_lock);
+       list_for_each(tmp, &server->smb_ses_list) {
+               ses = list_entry(tmp, struct cifs_ses, smb_ses_list);
++
+               list_for_each(tmp1, &ses->tcon_list) {
+                       tcon = list_entry(tmp1, struct cifs_tcon, tcon_list);
+-                      cifs_stats_inc(&tcon->stats.cifs_stats.num_oplock_brks);
+                       spin_lock(&tcon->open_file_lock);
+                       list_for_each(tmp2, &tcon->openFileList) {
+                               cfile = list_entry(tmp2, struct cifsFileInfo,
+@@ -688,6 +688,8 @@ smb2_is_valid_oplock_break(char *buffer,
+                                       continue;
+                               cifs_dbg(FYI, "file id match, oplock break\n");
++                              cifs_stats_inc(
++                                  &tcon->stats.cifs_stats.num_oplock_brks);
+                               cinode = CIFS_I(d_inode(cfile->dentry));
+                               spin_lock(&cfile->file_info_lock);
+                               if (!CIFS_CACHE_WRITE(cinode) &&
+@@ -720,9 +722,6 @@ smb2_is_valid_oplock_break(char *buffer,
+                               return true;
+                       }
+                       spin_unlock(&tcon->open_file_lock);
+-                      spin_unlock(&cifs_tcp_ses_lock);
+-                      cifs_dbg(FYI, "No matching file for oplock break\n");
+-                      return true;
+               }
+       }
+       spin_unlock(&cifs_tcp_ses_lock);
index 84ec01c5efff0612e3fcfc03becc7bafb07c00d4..a4260a599498a42591e5c94a6d3d2a65e53d9b7a 100644 (file)
@@ -212,3 +212,11 @@ input-synaptics-rmi4-don-t-increment-rmiaddr-for-smbus-transfers.patch
 input-goodix-add-upside-down-quirk-for-teclast-x89-tablet.patch
 coresight-etm4x-fix-input-validation-for-sysfs.patch
 input-fix-memory-leak-in-psxpad_spi_probe.patch
+x86-mm-32-sync-only-to-vmalloc_end-in-vmalloc_sync_all.patch
+x86-pci-avoid-amd-fch-xhci-usb-pme-from-d0-defect.patch
+xfrm-interface-fix-memory-leak-on-creation.patch
+xfrm-interface-avoid-corruption-on-changelink.patch
+xfrm-interface-fix-list-corruption-for-x-netns.patch
+xfrm-interface-fix-management-of-phydev.patch
+cifs-fix-null-pointer-dereference-in-smb2_push_mandatory_locks.patch
+cifs-fix-smb2-oplock-break-processing.patch
diff --git a/queue-4.19/x86-mm-32-sync-only-to-vmalloc_end-in-vmalloc_sync_all.patch b/queue-4.19/x86-mm-32-sync-only-to-vmalloc_end-in-vmalloc_sync_all.patch
new file mode 100644 (file)
index 0000000..87c3eb6
--- /dev/null
@@ -0,0 +1,93 @@
+From 9a62d20027da3164a22244d9f022c0c987261687 Mon Sep 17 00:00:00 2001
+From: Joerg Roedel <jroedel@suse.de>
+Date: Tue, 26 Nov 2019 11:09:42 +0100
+Subject: x86/mm/32: Sync only to VMALLOC_END in vmalloc_sync_all()
+
+From: Joerg Roedel <jroedel@suse.de>
+
+commit 9a62d20027da3164a22244d9f022c0c987261687 upstream.
+
+The job of vmalloc_sync_all() is to help the lazy freeing of vmalloc()
+ranges: before such vmap ranges are reused we make sure that they are
+unmapped from every task's page tables.
+
+This is really easy on pagetable setups where the kernel page tables
+are shared between all tasks - this is the case on 32-bit kernels
+with SHARED_KERNEL_PMD = 1.
+
+But on !SHARED_KERNEL_PMD 32-bit kernels this involves iterating
+over the pgd_list and clearing all pmd entries in the pgds that
+are cleared in the init_mm.pgd, which is the reference pagetable
+that the vmalloc() code uses.
+
+In that context the current practice of vmalloc_sync_all() iterating
+until FIX_ADDR_TOP is buggy:
+
+        for (address = VMALLOC_START & PMD_MASK;
+             address >= TASK_SIZE_MAX && address < FIXADDR_TOP;
+             address += PMD_SIZE) {
+                struct page *page;
+
+Because iterating up to FIXADDR_TOP will involve a lot of non-vmalloc
+address ranges:
+
+       VMALLOC -> PKMAP -> LDT -> CPU_ENTRY_AREA -> FIX_ADDR
+
+This is mostly harmless for the FIX_ADDR and CPU_ENTRY_AREA ranges
+that don't clear their pmds, but it's lethal for the LDT range,
+which relies on having different mappings in different processes,
+and 'synchronizing' them in the vmalloc sense corrupts those
+pagetable entries (clearing them).
+
+This got particularly prominent with PTI, which turns SHARED_KERNEL_PMD
+off and makes this the dominant mapping mode on 32-bit.
+
+To make LDT working again vmalloc_sync_all() must only iterate over
+the volatile parts of the kernel address range that are identical
+between all processes.
+
+So the correct check in vmalloc_sync_all() is "address < VMALLOC_END"
+to make sure the VMALLOC areas are synchronized and the LDT
+mapping is not falsely overwritten.
+
+The CPU_ENTRY_AREA and the FIXMAP area are no longer synced either,
+but this is not really a proplem since their PMDs get established
+during bootup and never change.
+
+This change fixes the ldt_gdt selftest in my setup.
+
+[ mingo: Fixed up the changelog to explain the logic and modified the
+         copying to only happen up until VMALLOC_END. ]
+
+Reported-by: Borislav Petkov <bp@suse.de>
+Tested-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Joerg Roedel <jroedel@suse.de>
+Cc: <stable@vger.kernel.org>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: Joerg Roedel <joro@8bytes.org>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: hpa@zytor.com
+Fixes: 7757d607c6b3: ("x86/pti: Allow CONFIG_PAGE_TABLE_ISOLATION for x86_32")
+Link: https://lkml.kernel.org/r/20191126111119.GA110513@gmail.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/mm/fault.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/x86/mm/fault.c
++++ b/arch/x86/mm/fault.c
+@@ -281,7 +281,7 @@ void vmalloc_sync_all(void)
+               return;
+       for (address = VMALLOC_START & PMD_MASK;
+-           address >= TASK_SIZE_MAX && address < FIXADDR_TOP;
++           address >= TASK_SIZE_MAX && address < VMALLOC_END;
+            address += PMD_SIZE) {
+               struct page *page;
diff --git a/queue-4.19/x86-pci-avoid-amd-fch-xhci-usb-pme-from-d0-defect.patch b/queue-4.19/x86-pci-avoid-amd-fch-xhci-usb-pme-from-d0-defect.patch
new file mode 100644 (file)
index 0000000..fdcd061
--- /dev/null
@@ -0,0 +1,53 @@
+From 7e8ce0e2b036dbc6617184317983aea4f2c52099 Mon Sep 17 00:00:00 2001
+From: Kai-Heng Feng <kai.heng.feng@canonical.com>
+Date: Mon, 2 Sep 2019 22:52:52 +0800
+Subject: x86/PCI: Avoid AMD FCH XHCI USB PME# from D0 defect
+
+From: Kai-Heng Feng <kai.heng.feng@canonical.com>
+
+commit 7e8ce0e2b036dbc6617184317983aea4f2c52099 upstream.
+
+The AMD FCH USB XHCI Controller advertises support for generating PME#
+while in D0.  When in D0, it does signal PME# for USB 3.0 connect events,
+but not for USB 2.0 or USB 1.1 connect events, which means the controller
+doesn't wake correctly for those events.
+
+  00:10.0 USB controller [0c03]: Advanced Micro Devices, Inc. [AMD] FCH USB XHCI Controller [1022:7914] (rev 20) (prog-if 30 [XHCI])
+        Subsystem: Dell FCH USB XHCI Controller [1028:087e]
+        Capabilities: [50] Power Management version 3
+                Flags: PMEClk- DSI- D1- D2- AuxCurrent=0mA PME(D0+,D1-,D2-,D3hot+,D3cold+)
+
+Clear PCI_PM_CAP_PME_D0 in dev->pme_support to indicate the device will not
+assert PME# from D0 so we don't rely on it.
+
+Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=203673
+Link: https://lore.kernel.org/r/20190902145252.32111-1-kai.heng.feng@canonical.com
+Signed-off-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
+Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/pci/fixup.c |   11 +++++++++++
+ 1 file changed, 11 insertions(+)
+
+--- a/arch/x86/pci/fixup.c
++++ b/arch/x86/pci/fixup.c
+@@ -589,6 +589,17 @@ static void pci_fixup_amd_ehci_pme(struc
+ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, 0x7808, pci_fixup_amd_ehci_pme);
+ /*
++ * Device [1022:7914]
++ * When in D0, PME# doesn't get asserted when plugging USB 2.0 device.
++ */
++static void pci_fixup_amd_fch_xhci_pme(struct pci_dev *dev)
++{
++      dev_info(&dev->dev, "PME# does not work under D0, disabling it\n");
++      dev->pme_support &= ~(PCI_PM_CAP_PME_D0 >> PCI_PM_CAP_PME_SHIFT);
++}
++DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, 0x7914, pci_fixup_amd_fch_xhci_pme);
++
++/*
+  * Apple MacBook Pro: Avoid [mem 0x7fa00000-0x7fbfffff]
+  *
+  * Using the [mem 0x7fa00000-0x7fbfffff] region, e.g., by assigning it to
diff --git a/queue-4.19/xfrm-interface-avoid-corruption-on-changelink.patch b/queue-4.19/xfrm-interface-avoid-corruption-on-changelink.patch
new file mode 100644 (file)
index 0000000..a048c67
--- /dev/null
@@ -0,0 +1,62 @@
+From e9e7e85d75f3731079ffd77c1a66f037aef04fe7 Mon Sep 17 00:00:00 2001
+From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
+Date: Mon, 15 Jul 2019 12:00:20 +0200
+Subject: xfrm interface: avoid corruption on changelink
+
+From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
+
+commit e9e7e85d75f3731079ffd77c1a66f037aef04fe7 upstream.
+
+The new parameters must not be stored in the netdev_priv() before
+validation, it may corrupt the interface. Note also that if data is NULL,
+only a memset() is done.
+
+$ ip link add xfrm1 type xfrm dev lo if_id 1
+$ ip link add xfrm2 type xfrm dev lo if_id 2
+$ ip link set xfrm1 type xfrm dev lo if_id 2
+RTNETLINK answers: File exists
+$ ip -d link list dev xfrm1
+5: xfrm1@lo: <NOARP> mtu 1500 qdisc noop state DOWN mode DEFAULT group default qlen 1000
+    link/none 00:00:00:00:00:00 brd 00:00:00:00:00:00 promiscuity 0 minmtu 68 maxmtu 1500
+    xfrm if_id 0x2 addrgenmode eui64 numtxqueues 1 numrxqueues 1 gso_max_size 65536 gso_max_segs 65535
+
+=> "if_id 0x2"
+
+Fixes: f203b76d7809 ("xfrm: Add virtual xfrm interfaces")
+Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
+Tested-by: Julien Floret <julien.floret@6wind.com>
+Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ net/xfrm/xfrm_interface.c |   10 +++++-----
+ 1 file changed, 5 insertions(+), 5 deletions(-)
+
+--- a/net/xfrm/xfrm_interface.c
++++ b/net/xfrm/xfrm_interface.c
+@@ -674,12 +674,12 @@ static int xfrmi_changelink(struct net_d
+                          struct nlattr *data[],
+                          struct netlink_ext_ack *extack)
+ {
+-      struct xfrm_if *xi = netdev_priv(dev);
+       struct net *net = dev_net(dev);
++      struct xfrm_if_parms p;
++      struct xfrm_if *xi;
+-      xfrmi_netlink_parms(data, &xi->p);
+-
+-      xi = xfrmi_locate(net, &xi->p);
++      xfrmi_netlink_parms(data, &p);
++      xi = xfrmi_locate(net, &p);
+       if (!xi) {
+               xi = netdev_priv(dev);
+       } else {
+@@ -687,7 +687,7 @@ static int xfrmi_changelink(struct net_d
+                       return -EEXIST;
+       }
+-      return xfrmi_update(xi, &xi->p);
++      return xfrmi_update(xi, &p);
+ }
+ static size_t xfrmi_get_size(const struct net_device *dev)
diff --git a/queue-4.19/xfrm-interface-fix-list-corruption-for-x-netns.patch b/queue-4.19/xfrm-interface-fix-list-corruption-for-x-netns.patch
new file mode 100644 (file)
index 0000000..31d343d
--- /dev/null
@@ -0,0 +1,62 @@
+From c5d1030f23002430c2a336b2b629b9d6f72b3564 Mon Sep 17 00:00:00 2001
+From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
+Date: Mon, 15 Jul 2019 12:00:22 +0200
+Subject: xfrm interface: fix list corruption for x-netns
+
+From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
+
+commit c5d1030f23002430c2a336b2b629b9d6f72b3564 upstream.
+
+dev_net(dev) is the netns of the device and xi->net is the link netns,
+where the device has been linked.
+changelink() must operate in the link netns to avoid a corruption of
+the xfrm lists.
+
+Note that xi->net and dev_net(xi->physdev) are always the same.
+
+Before the patch, the xfrmi lists may be corrupted and can later trigger a
+kernel panic.
+
+Fixes: f203b76d7809 ("xfrm: Add virtual xfrm interfaces")
+Reported-by: Julien Floret <julien.floret@6wind.com>
+Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
+Tested-by: Julien Floret <julien.floret@6wind.com>
+Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ net/xfrm/xfrm_interface.c |    8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+--- a/net/xfrm/xfrm_interface.c
++++ b/net/xfrm/xfrm_interface.c
+@@ -505,7 +505,7 @@ static int xfrmi_change(struct xfrm_if *
+ static int xfrmi_update(struct xfrm_if *xi, struct xfrm_if_parms *p)
+ {
+-      struct net *net = dev_net(xi->dev);
++      struct net *net = xi->net;
+       struct xfrmi_net *xfrmn = net_generic(net, xfrmi_net_id);
+       int err;
+@@ -674,9 +674,9 @@ static int xfrmi_changelink(struct net_d
+                          struct nlattr *data[],
+                          struct netlink_ext_ack *extack)
+ {
+-      struct net *net = dev_net(dev);
++      struct xfrm_if *xi = netdev_priv(dev);
++      struct net *net = xi->net;
+       struct xfrm_if_parms p;
+-      struct xfrm_if *xi;
+       xfrmi_netlink_parms(data, &p);
+       xi = xfrmi_locate(net, &p);
+@@ -718,7 +718,7 @@ struct net *xfrmi_get_link_net(const str
+ {
+       struct xfrm_if *xi = netdev_priv(dev);
+-      return dev_net(xi->phydev);
++      return xi->net;
+ }
+ static const struct nla_policy xfrmi_policy[IFLA_XFRM_MAX + 1] = {
diff --git a/queue-4.19/xfrm-interface-fix-management-of-phydev.patch b/queue-4.19/xfrm-interface-fix-management-of-phydev.patch
new file mode 100644 (file)
index 0000000..ffa35fc
--- /dev/null
@@ -0,0 +1,125 @@
+From 22d6552f827ef76ade3edf6bbb3f05048a0a7d8b Mon Sep 17 00:00:00 2001
+From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
+Date: Mon, 15 Jul 2019 12:00:23 +0200
+Subject: xfrm interface: fix management of phydev
+
+From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
+
+commit 22d6552f827ef76ade3edf6bbb3f05048a0a7d8b upstream.
+
+With the current implementation, phydev cannot be removed:
+
+$ ip link add dummy type dummy
+$ ip link add xfrm1 type xfrm dev dummy if_id 1
+$ ip l d dummy
+ kernel:[77938.465445] unregister_netdevice: waiting for dummy to become free. Usage count = 1
+
+Manage it like in ip tunnels, ie just keep the ifindex. Not that the side
+effect, is that the phydev is now optional.
+
+Fixes: f203b76d7809 ("xfrm: Add virtual xfrm interfaces")
+Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
+Tested-by: Julien Floret <julien.floret@6wind.com>
+Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/net/xfrm.h        |    1 -
+ net/xfrm/xfrm_interface.c |   34 ++++++++++++++++++----------------
+ 2 files changed, 18 insertions(+), 17 deletions(-)
+
+--- a/include/net/xfrm.h
++++ b/include/net/xfrm.h
+@@ -1062,7 +1062,6 @@ struct xfrm_if_parms {
+ struct xfrm_if {
+       struct xfrm_if __rcu *next;     /* next interface in list */
+       struct net_device *dev;         /* virtual device associated with interface */
+-      struct net_device *phydev;      /* physical device */
+       struct net *net;                /* netns for packet i/o */
+       struct xfrm_if_parms p;         /* interface parms */
+--- a/net/xfrm/xfrm_interface.c
++++ b/net/xfrm/xfrm_interface.c
+@@ -177,7 +177,6 @@ static void xfrmi_dev_uninit(struct net_
+       struct xfrmi_net *xfrmn = net_generic(xi->net, xfrmi_net_id);
+       xfrmi_unlink(xfrmn, xi);
+-      dev_put(xi->phydev);
+       dev_put(dev);
+ }
+@@ -364,7 +363,7 @@ static netdev_tx_t xfrmi_xmit(struct sk_
+               goto tx_err;
+       }
+-      fl.flowi_oif = xi->phydev->ifindex;
++      fl.flowi_oif = xi->p.link;
+       ret = xfrmi_xmit2(skb, dev, &fl);
+       if (ret < 0)
+@@ -553,7 +552,7 @@ static int xfrmi_get_iflink(const struct
+ {
+       struct xfrm_if *xi = netdev_priv(dev);
+-      return xi->phydev->ifindex;
++      return xi->p.link;
+ }
+@@ -579,12 +578,14 @@ static void xfrmi_dev_setup(struct net_d
+       dev->needs_free_netdev  = true;
+       dev->priv_destructor    = xfrmi_dev_free;
+       netif_keep_dst(dev);
++
++      eth_broadcast_addr(dev->broadcast);
+ }
+ static int xfrmi_dev_init(struct net_device *dev)
+ {
+       struct xfrm_if *xi = netdev_priv(dev);
+-      struct net_device *phydev = xi->phydev;
++      struct net_device *phydev = __dev_get_by_index(xi->net, xi->p.link);
+       int err;
+       dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
+@@ -599,13 +600,19 @@ static int xfrmi_dev_init(struct net_dev
+       dev->features |= NETIF_F_LLTX;
+-      dev->needed_headroom = phydev->needed_headroom;
+-      dev->needed_tailroom = phydev->needed_tailroom;
+-
+-      if (is_zero_ether_addr(dev->dev_addr))
+-              eth_hw_addr_inherit(dev, phydev);
+-      if (is_zero_ether_addr(dev->broadcast))
+-              memcpy(dev->broadcast, phydev->broadcast, dev->addr_len);
++      if (phydev) {
++              dev->needed_headroom = phydev->needed_headroom;
++              dev->needed_tailroom = phydev->needed_tailroom;
++
++              if (is_zero_ether_addr(dev->dev_addr))
++                      eth_hw_addr_inherit(dev, phydev);
++              if (is_zero_ether_addr(dev->broadcast))
++                      memcpy(dev->broadcast, phydev->broadcast,
++                             dev->addr_len);
++      } else {
++              eth_hw_addr_random(dev);
++              eth_broadcast_addr(dev->broadcast);
++      }
+       return 0;
+ }
+@@ -655,13 +662,8 @@ static int xfrmi_newlink(struct net *src
+       xi->p = p;
+       xi->net = net;
+       xi->dev = dev;
+-      xi->phydev = dev_get_by_index(net, p.link);
+-      if (!xi->phydev)
+-              return -ENODEV;
+       err = xfrmi_create(dev);
+-      if (err < 0)
+-              dev_put(xi->phydev);
+       return err;
+ }
diff --git a/queue-4.19/xfrm-interface-fix-memory-leak-on-creation.patch b/queue-4.19/xfrm-interface-fix-memory-leak-on-creation.patch
new file mode 100644 (file)
index 0000000..303c3ef
--- /dev/null
@@ -0,0 +1,221 @@
+From 56c5ee1a5823e9cf5288b84ae6364cb4112f8225 Mon Sep 17 00:00:00 2001
+From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
+Date: Tue, 2 Jul 2019 17:51:39 +0200
+Subject: xfrm interface: fix memory leak on creation
+
+From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
+
+commit 56c5ee1a5823e9cf5288b84ae6364cb4112f8225 upstream.
+
+The following commands produce a backtrace and return an error but the xfrm
+interface is created (in the wrong netns):
+$ ip netns add foo
+$ ip netns add bar
+$ ip -n foo netns set bar 0
+$ ip -n foo link add xfrmi0 link-netnsid 0 type xfrm dev lo if_id 23
+RTNETLINK answers: Invalid argument
+$ ip -n bar link ls xfrmi0
+2: xfrmi0@lo: <NOARP,M-DOWN> mtu 1500 qdisc noop state DOWN mode DEFAULT group default qlen 1000
+    link/none 00:00:00:00:00:00 brd 00:00:00:00:00:00
+
+Here is the backtrace:
+[   79.879174] WARNING: CPU: 0 PID: 1178 at net/core/dev.c:8172 rollback_registered_many+0x86/0x3c1
+[   79.880260] Modules linked in: xfrm_interface nfsv3 nfs_acl auth_rpcgss nfsv4 nfs lockd grace sunrpc fscache button parport_pc parport serio_raw evdev pcspkr loop ext4 crc16 mbcache jbd2 crc32c_generic ide_cd_mod ide_gd_mod cdrom ata_$
+eneric ata_piix libata scsi_mod 8139too piix psmouse i2c_piix4 ide_core 8139cp mii i2c_core floppy
+[   79.883698] CPU: 0 PID: 1178 Comm: ip Not tainted 5.2.0-rc6+ #106
+[   79.884462] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1 04/01/2014
+[   79.885447] RIP: 0010:rollback_registered_many+0x86/0x3c1
+[   79.886120] Code: 01 e8 d7 7d c6 ff 0f 0b 48 8b 45 00 4c 8b 20 48 8d 58 90 49 83 ec 70 48 8d 7b 70 48 39 ef 74 44 8a 83 d0 04 00 00 84 c0 75 1f <0f> 0b e8 61 cd ff ff 48 b8 00 01 00 00 00 00 ad de 48 89 43 70 66
+[   79.888667] RSP: 0018:ffffc900015ab740 EFLAGS: 00010246
+[   79.889339] RAX: ffff8882353e5700 RBX: ffff8882353e56a0 RCX: ffff8882353e5710
+[   79.890174] RDX: ffffc900015ab7e0 RSI: ffffc900015ab7e0 RDI: ffff8882353e5710
+[   79.891029] RBP: ffffc900015ab7e0 R08: ffffc900015ab7e0 R09: ffffc900015ab7e0
+[   79.891866] R10: ffffc900015ab7a0 R11: ffffffff82233fec R12: ffffc900015ab770
+[   79.892728] R13: ffffffff81eb7ec0 R14: ffff88822ed6cf00 R15: 00000000ffffffea
+[   79.893557] FS:  00007ff350f31740(0000) GS:ffff888237a00000(0000) knlGS:0000000000000000
+[   79.894581] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+[   79.895317] CR2: 00000000006c8580 CR3: 000000022c272000 CR4: 00000000000006f0
+[   79.896137] Call Trace:
+[   79.896464]  unregister_netdevice_many+0x12/0x6c
+[   79.896998]  __rtnl_newlink+0x6e2/0x73b
+[   79.897446]  ? __kmalloc_node_track_caller+0x15e/0x185
+[   79.898039]  ? pskb_expand_head+0x5f/0x1fe
+[   79.898556]  ? stack_access_ok+0xd/0x2c
+[   79.899009]  ? deref_stack_reg+0x12/0x20
+[   79.899462]  ? stack_access_ok+0xd/0x2c
+[   79.899927]  ? stack_access_ok+0xd/0x2c
+[   79.900404]  ? __module_text_address+0x9/0x4f
+[   79.900910]  ? is_bpf_text_address+0x5/0xc
+[   79.901390]  ? kernel_text_address+0x67/0x7b
+[   79.901884]  ? __kernel_text_address+0x1a/0x25
+[   79.902397]  ? unwind_get_return_address+0x12/0x23
+[   79.903122]  ? __cmpxchg_double_slab.isra.37+0x46/0x77
+[   79.903772]  rtnl_newlink+0x43/0x56
+[   79.904217]  rtnetlink_rcv_msg+0x200/0x24c
+
+In fact, each time a xfrm interface was created, a netdev was allocated
+by __rtnl_newlink()/rtnl_create_link() and then another one by
+xfrmi_newlink()/xfrmi_create(). Only the second one was registered, it's
+why the previous commands produce a backtrace: dev_change_net_namespace()
+was called on a netdev with reg_state set to NETREG_UNINITIALIZED (the
+first one).
+
+CC: Lorenzo Colitti <lorenzo@google.com>
+CC: Benedict Wong <benedictwong@google.com>
+CC: Steffen Klassert <steffen.klassert@secunet.com>
+CC: Shannon Nelson <shannon.nelson@oracle.com>
+CC: Antony Antony <antony@phenome.org>
+CC: Eyal Birger <eyal.birger@gmail.com>
+Fixes: f203b76d7809 ("xfrm: Add virtual xfrm interfaces")
+Reported-by: Julien Floret <julien.floret@6wind.com>
+Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
+Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ net/xfrm/xfrm_interface.c |   98 +++++++++++++---------------------------------
+ 1 file changed, 28 insertions(+), 70 deletions(-)
+
+--- a/net/xfrm/xfrm_interface.c
++++ b/net/xfrm/xfrm_interface.c
+@@ -133,7 +133,7 @@ static void xfrmi_dev_free(struct net_de
+       free_percpu(dev->tstats);
+ }
+-static int xfrmi_create2(struct net_device *dev)
++static int xfrmi_create(struct net_device *dev)
+ {
+       struct xfrm_if *xi = netdev_priv(dev);
+       struct net *net = dev_net(dev);
+@@ -156,54 +156,7 @@ out:
+       return err;
+ }
+-static struct xfrm_if *xfrmi_create(struct net *net, struct xfrm_if_parms *p)
+-{
+-      struct net_device *dev;
+-      struct xfrm_if *xi;
+-      char name[IFNAMSIZ];
+-      int err;
+-
+-      if (p->name[0]) {
+-              strlcpy(name, p->name, IFNAMSIZ);
+-      } else {
+-              err = -EINVAL;
+-              goto failed;
+-      }
+-
+-      dev = alloc_netdev(sizeof(*xi), name, NET_NAME_UNKNOWN, xfrmi_dev_setup);
+-      if (!dev) {
+-              err = -EAGAIN;
+-              goto failed;
+-      }
+-
+-      dev_net_set(dev, net);
+-
+-      xi = netdev_priv(dev);
+-      xi->p = *p;
+-      xi->net = net;
+-      xi->dev = dev;
+-      xi->phydev = dev_get_by_index(net, p->link);
+-      if (!xi->phydev) {
+-              err = -ENODEV;
+-              goto failed_free;
+-      }
+-
+-      err = xfrmi_create2(dev);
+-      if (err < 0)
+-              goto failed_dev_put;
+-
+-      return xi;
+-
+-failed_dev_put:
+-      dev_put(xi->phydev);
+-failed_free:
+-      free_netdev(dev);
+-failed:
+-      return ERR_PTR(err);
+-}
+-
+-static struct xfrm_if *xfrmi_locate(struct net *net, struct xfrm_if_parms *p,
+-                                 int create)
++static struct xfrm_if *xfrmi_locate(struct net *net, struct xfrm_if_parms *p)
+ {
+       struct xfrm_if __rcu **xip;
+       struct xfrm_if *xi;
+@@ -211,17 +164,11 @@ static struct xfrm_if *xfrmi_locate(stru
+       for (xip = &xfrmn->xfrmi[0];
+            (xi = rtnl_dereference(*xip)) != NULL;
+-           xip = &xi->next) {
+-              if (xi->p.if_id == p->if_id) {
+-                      if (create)
+-                              return ERR_PTR(-EEXIST);
+-
++           xip = &xi->next)
++              if (xi->p.if_id == p->if_id)
+                       return xi;
+-              }
+-      }
+-      if (!create)
+-              return ERR_PTR(-ENODEV);
+-      return xfrmi_create(net, p);
++
++      return NULL;
+ }
+ static void xfrmi_dev_uninit(struct net_device *dev)
+@@ -689,21 +636,33 @@ static int xfrmi_newlink(struct net *src
+                       struct netlink_ext_ack *extack)
+ {
+       struct net *net = dev_net(dev);
+-      struct xfrm_if_parms *p;
++      struct xfrm_if_parms p;
+       struct xfrm_if *xi;
++      int err;
+-      xi = netdev_priv(dev);
+-      p = &xi->p;
+-
+-      xfrmi_netlink_parms(data, p);
++      xfrmi_netlink_parms(data, &p);
+       if (!tb[IFLA_IFNAME])
+               return -EINVAL;
+-      nla_strlcpy(p->name, tb[IFLA_IFNAME], IFNAMSIZ);
++      nla_strlcpy(p.name, tb[IFLA_IFNAME], IFNAMSIZ);
+-      xi = xfrmi_locate(net, p, 1);
+-      return PTR_ERR_OR_ZERO(xi);
++      xi = xfrmi_locate(net, &p);
++      if (xi)
++              return -EEXIST;
++
++      xi = netdev_priv(dev);
++      xi->p = p;
++      xi->net = net;
++      xi->dev = dev;
++      xi->phydev = dev_get_by_index(net, p.link);
++      if (!xi->phydev)
++              return -ENODEV;
++
++      err = xfrmi_create(dev);
++      if (err < 0)
++              dev_put(xi->phydev);
++      return err;
+ }
+ static void xfrmi_dellink(struct net_device *dev, struct list_head *head)
+@@ -720,9 +679,8 @@ static int xfrmi_changelink(struct net_d
+       xfrmi_netlink_parms(data, &xi->p);
+-      xi = xfrmi_locate(net, &xi->p, 0);
+-
+-      if (IS_ERR_OR_NULL(xi)) {
++      xi = xfrmi_locate(net, &xi->p);
++      if (!xi) {
+               xi = netdev_priv(dev);
+       } else {
+               if (xi->dev != dev)