--- /dev/null
+From 60588bfa223ff675b95f866249f90616613fbe31 Mon Sep 17 00:00:00 2001
+From: Cheng Jian <cj.chengjian@huawei.com>
+Date: Fri, 13 Dec 2019 10:45:30 +0800
+Subject: sched/fair: Optimize select_idle_cpu
+
+From: Cheng Jian <cj.chengjian@huawei.com>
+
+commit 60588bfa223ff675b95f866249f90616613fbe31 upstream.
+
+select_idle_cpu() will scan the LLC domain for idle CPUs,
+it's always expensive. so the next commit :
+
+ 1ad3aaf3fcd2 ("sched/core: Implement new approach to scale select_idle_cpu()")
+
+introduces a way to limit how many CPUs we scan.
+
+But it consume some CPUs out of 'nr' that are not allowed
+for the task and thus waste our attempts. The function
+always return nr_cpumask_bits, and we can't find a CPU
+which our task is allowed to run.
+
+Cpumask may be too big, similar to select_idle_core(), use
+per_cpu_ptr 'select_idle_mask' to prevent stack overflow.
+
+Fixes: 1ad3aaf3fcd2 ("sched/core: Implement new approach to scale select_idle_cpu()")
+Signed-off-by: Cheng Jian <cj.chengjian@huawei.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Reviewed-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
+Reviewed-by: Vincent Guittot <vincent.guittot@linaro.org>
+Reviewed-by: Valentin Schneider <valentin.schneider@arm.com>
+Link: https://lkml.kernel.org/r/20191213024530.28052-1-cj.chengjian@huawei.com
+Signed-off-by: Yang Wei <yang.wei@linux.alibaba.com>
+Tested-by: Yang Wei <yang.wei@linux.alibaba.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/sched/fair.c | 7 ++++---
+ 1 file changed, 4 insertions(+), 3 deletions(-)
+
+--- a/kernel/sched/fair.c
++++ b/kernel/sched/fair.c
+@@ -5779,6 +5779,7 @@ static inline int select_idle_smt(struct
+ */
+ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int target)
+ {
++ struct cpumask *cpus = this_cpu_cpumask_var_ptr(select_idle_mask);
+ struct sched_domain *this_sd;
+ u64 avg_cost, avg_idle;
+ u64 time, cost;
+@@ -5809,11 +5810,11 @@ static int select_idle_cpu(struct task_s
+
+ time = local_clock();
+
+- for_each_cpu_wrap(cpu, sched_domain_span(sd), target) {
++ cpumask_and(cpus, sched_domain_span(sd), &p->cpus_allowed);
++
++ for_each_cpu_wrap(cpu, cpus, target) {
+ if (!--nr)
+ return -1;
+- if (!cpumask_test_cpu(cpu, &p->cpus_allowed))
+- continue;
+ if (idle_cpu(cpu))
+ break;
+ }
--- /dev/null
+From foo@baz Tue Jun 8 07:08:15 PM CEST 2021
+From: Jan Beulich <jbeulich@suse.com>
+Date: Tue, 18 May 2021 18:13:42 +0200
+Subject: xen-pciback: redo VF placement in the virtual topology
+
+From: Jan Beulich <jbeulich@suse.com>
+
+The commit referenced below was incomplete: It merely affected what
+would get written to the vdev-<N> xenstore node. The guest would still
+find the function at the original function number as long as
+__xen_pcibk_get_pci_dev() wouldn't be in sync. The same goes for AER wrt
+__xen_pcibk_get_pcifront_dev().
+
+Undo overriding the function to zero and instead make sure that VFs at
+function zero remain alone in their slot. This has the added benefit of
+improving overall capacity, considering that there's only a total of 32
+slots available right now (PCI segment and bus can both only ever be
+zero at present).
+
+This is upstream commit 4ba50e7c423c29639878c00573288869aa627068.
+
+Fixes: 8a5248fe10b1 ("xen PV passthru: assign SR-IOV virtual functions to
+separate virtual slots")
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Link: https://lore.kernel.org/r/8def783b-404c-3452-196d-3f3fd4d72c9e@suse.com
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/xen/xen-pciback/vpci.c | 14 ++++++++------
+ 1 file changed, 8 insertions(+), 6 deletions(-)
+
+--- a/drivers/xen/xen-pciback/vpci.c
++++ b/drivers/xen/xen-pciback/vpci.c
+@@ -69,7 +69,7 @@ static int __xen_pcibk_add_pci_dev(struc
+ struct pci_dev *dev, int devid,
+ publish_pci_dev_cb publish_cb)
+ {
+- int err = 0, slot, func = -1;
++ int err = 0, slot, func = PCI_FUNC(dev->devfn);
+ struct pci_dev_entry *t, *dev_entry;
+ struct vpci_dev_data *vpci_dev = pdev->pci_dev_data;
+
+@@ -94,23 +94,26 @@ static int __xen_pcibk_add_pci_dev(struc
+
+ /*
+ * Keep multi-function devices together on the virtual PCI bus, except
+- * virtual functions.
++ * that we want to keep virtual functions at func 0 on their own. They
++ * aren't multi-function devices and hence their presence at func 0
++ * may cause guests to not scan the other functions.
+ */
+- if (!dev->is_virtfn) {
++ if (!dev->is_virtfn || func) {
+ for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
+ if (list_empty(&vpci_dev->dev_list[slot]))
+ continue;
+
+ t = list_entry(list_first(&vpci_dev->dev_list[slot]),
+ struct pci_dev_entry, list);
++ if (t->dev->is_virtfn && !PCI_FUNC(t->dev->devfn))
++ continue;
+
+ if (match_slot(dev, t->dev)) {
+ pr_info("vpci: %s: assign to virtual slot %d func %d\n",
+ pci_name(dev), slot,
+- PCI_FUNC(dev->devfn));
++ func);
+ list_add_tail(&dev_entry->list,
+ &vpci_dev->dev_list[slot]);
+- func = PCI_FUNC(dev->devfn);
+ goto unlock;
+ }
+ }
+@@ -123,7 +126,6 @@ static int __xen_pcibk_add_pci_dev(struc
+ pci_name(dev), slot);
+ list_add_tail(&dev_entry->list,
+ &vpci_dev->dev_list[slot]);
+- func = dev->is_virtfn ? 0 : PCI_FUNC(dev->devfn);
+ goto unlock;
+ }
+ }