--- /dev/null
+From a5a8ef937cfa79167f4b2a5602092b8d14fd6b9a Mon Sep 17 00:00:00 2001
+From: Colin Xu <colin.xu@intel.com>
+Date: Mon, 9 Nov 2020 15:39:22 +0800
+Subject: drm/i915/gvt: Fix virtual display setup for BXT/APL
+
+From: Colin Xu <colin.xu@intel.com>
+
+commit a5a8ef937cfa79167f4b2a5602092b8d14fd6b9a upstream.
+
+Program display related vregs to proper value at initialization, setup
+virtual monitor and hotplug.
+
+vGPU virtual display vregs inherit the value from pregs. The virtual DP
+monitor is always setup on PORT_B for BXT/APL. However the host may
+connect monitor on other PORT or without any monitor connected. Without
+properly setup PIPE/DDI/PLL related vregs, guest driver may not setup
+the virutal display as expected, and the guest desktop may not be
+created.
+Since only one virtual display is supported, enable PIPE_A only. And
+enable transcoder/DDI/PLL based on which port is setup for BXT/APL.
+
+V2:
+Revise commit message.
+
+V3:
+set_edid should on PORT_B for BXT.
+Inject hpd event for BXT.
+
+V4:
+Temporarily disable vfio edid on BXT/APL until issue fixed.
+
+V5:
+Rebase to use new HPD define GEN8_DE_PORT_HOTPLUG for BXT.
+Put vfio edid disabling on BXT/APL to a separate patch.
+
+Acked-by: Zhenyu Wang <zhenyuw@linux.intel.com>
+Signed-off-by: Colin Xu <colin.xu@intel.com>
+Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
+Link: http://patchwork.freedesktop.org/patch/msgid/20201109073922.757759-1-colin.xu@intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/gpu/drm/i915/gvt/display.c | 173 +++++++++++++++++++++++++++++++++++++
+ drivers/gpu/drm/i915/gvt/mmio.c | 5 +
+ 2 files changed, 178 insertions(+)
+
+--- a/drivers/gpu/drm/i915/gvt/display.c
++++ b/drivers/gpu/drm/i915/gvt/display.c
+@@ -173,21 +173,161 @@ static void emulate_monitor_status_chang
+ int pipe;
+
+ if (IS_BROXTON(dev_priv)) {
++ enum transcoder trans;
++ enum port port;
++
++ /* Clear PIPE, DDI, PHY, HPD before setting new */
+ vgpu_vreg_t(vgpu, GEN8_DE_PORT_ISR) &= ~(BXT_DE_PORT_HP_DDIA |
+ BXT_DE_PORT_HP_DDIB |
+ BXT_DE_PORT_HP_DDIC);
+
++ for_each_pipe(dev_priv, pipe) {
++ vgpu_vreg_t(vgpu, PIPECONF(pipe)) &=
++ ~(PIPECONF_ENABLE | I965_PIPECONF_ACTIVE);
++ vgpu_vreg_t(vgpu, DSPCNTR(pipe)) &= ~DISPLAY_PLANE_ENABLE;
++ vgpu_vreg_t(vgpu, SPRCTL(pipe)) &= ~SPRITE_ENABLE;
++ vgpu_vreg_t(vgpu, CURCNTR(pipe)) &= ~MCURSOR_MODE;
++ vgpu_vreg_t(vgpu, CURCNTR(pipe)) |= MCURSOR_MODE_DISABLE;
++ }
++
++ for (trans = TRANSCODER_A; trans <= TRANSCODER_EDP; trans++) {
++ vgpu_vreg_t(vgpu, TRANS_DDI_FUNC_CTL(trans)) &=
++ ~(TRANS_DDI_BPC_MASK | TRANS_DDI_MODE_SELECT_MASK |
++ TRANS_DDI_PORT_MASK | TRANS_DDI_FUNC_ENABLE);
++ }
++ vgpu_vreg_t(vgpu, TRANS_DDI_FUNC_CTL(TRANSCODER_A)) &=
++ ~(TRANS_DDI_BPC_MASK | TRANS_DDI_MODE_SELECT_MASK |
++ TRANS_DDI_PORT_MASK);
++
++ for (port = PORT_A; port <= PORT_C; port++) {
++ vgpu_vreg_t(vgpu, BXT_PHY_CTL(port)) &=
++ ~BXT_PHY_LANE_ENABLED;
++ vgpu_vreg_t(vgpu, BXT_PHY_CTL(port)) |=
++ (BXT_PHY_CMNLANE_POWERDOWN_ACK |
++ BXT_PHY_LANE_POWERDOWN_ACK);
++
++ vgpu_vreg_t(vgpu, BXT_PORT_PLL_ENABLE(port)) &=
++ ~(PORT_PLL_POWER_STATE | PORT_PLL_POWER_ENABLE |
++ PORT_PLL_REF_SEL | PORT_PLL_LOCK |
++ PORT_PLL_ENABLE);
++
++ vgpu_vreg_t(vgpu, DDI_BUF_CTL(port)) &=
++ ~(DDI_INIT_DISPLAY_DETECTED |
++ DDI_BUF_CTL_ENABLE);
++ vgpu_vreg_t(vgpu, DDI_BUF_CTL(port)) |= DDI_BUF_IS_IDLE;
++ }
++
++ vgpu_vreg_t(vgpu, BXT_P_CR_GT_DISP_PWRON) &= ~(BIT(0) | BIT(1));
++ vgpu_vreg_t(vgpu, BXT_PORT_CL1CM_DW0(DPIO_PHY0)) &=
++ ~PHY_POWER_GOOD;
++ vgpu_vreg_t(vgpu, BXT_PORT_CL1CM_DW0(DPIO_PHY1)) &=
++ ~PHY_POWER_GOOD;
++ vgpu_vreg_t(vgpu, BXT_PHY_CTL_FAMILY(DPIO_PHY0)) &= ~BIT(30);
++ vgpu_vreg_t(vgpu, BXT_PHY_CTL_FAMILY(DPIO_PHY1)) &= ~BIT(30);
++
++ vgpu_vreg_t(vgpu, SFUSE_STRAP) &= ~SFUSE_STRAP_DDIB_DETECTED;
++ vgpu_vreg_t(vgpu, SFUSE_STRAP) &= ~SFUSE_STRAP_DDIC_DETECTED;
++
++ /*
++ * Only 1 PIPE enabled in current vGPU display and PIPE_A is
++ * tied to TRANSCODER_A in HW, so it's safe to assume PIPE_A,
++ * TRANSCODER_A can be enabled. PORT_x depends on the input of
++ * setup_virtual_dp_monitor.
++ */
++ vgpu_vreg_t(vgpu, PIPECONF(PIPE_A)) |= PIPECONF_ENABLE;
++ vgpu_vreg_t(vgpu, PIPECONF(PIPE_A)) |= I965_PIPECONF_ACTIVE;
++
++ /*
++ * Golden M/N are calculated based on:
++ * 24 bpp, 4 lanes, 154000 pixel clk (from virtual EDID),
++ * DP link clk 1620 MHz and non-constant_n.
++ * TODO: calculate DP link symbol clk and stream clk m/n.
++ */
++ vgpu_vreg_t(vgpu, PIPE_DATA_M1(TRANSCODER_A)) = 63 << TU_SIZE_SHIFT;
++ vgpu_vreg_t(vgpu, PIPE_DATA_M1(TRANSCODER_A)) |= 0x5b425e;
++ vgpu_vreg_t(vgpu, PIPE_DATA_N1(TRANSCODER_A)) = 0x800000;
++ vgpu_vreg_t(vgpu, PIPE_LINK_M1(TRANSCODER_A)) = 0x3cd6e;
++ vgpu_vreg_t(vgpu, PIPE_LINK_N1(TRANSCODER_A)) = 0x80000;
++
++ /* Enable per-DDI/PORT vreg */
+ if (intel_vgpu_has_monitor_on_port(vgpu, PORT_A)) {
++ vgpu_vreg_t(vgpu, BXT_P_CR_GT_DISP_PWRON) |= BIT(1);
++ vgpu_vreg_t(vgpu, BXT_PORT_CL1CM_DW0(DPIO_PHY1)) |=
++ PHY_POWER_GOOD;
++ vgpu_vreg_t(vgpu, BXT_PHY_CTL_FAMILY(DPIO_PHY1)) |=
++ BIT(30);
++ vgpu_vreg_t(vgpu, BXT_PHY_CTL(PORT_A)) |=
++ BXT_PHY_LANE_ENABLED;
++ vgpu_vreg_t(vgpu, BXT_PHY_CTL(PORT_A)) &=
++ ~(BXT_PHY_CMNLANE_POWERDOWN_ACK |
++ BXT_PHY_LANE_POWERDOWN_ACK);
++ vgpu_vreg_t(vgpu, BXT_PORT_PLL_ENABLE(PORT_A)) |=
++ (PORT_PLL_POWER_STATE | PORT_PLL_POWER_ENABLE |
++ PORT_PLL_REF_SEL | PORT_PLL_LOCK |
++ PORT_PLL_ENABLE);
++ vgpu_vreg_t(vgpu, DDI_BUF_CTL(PORT_A)) |=
++ (DDI_BUF_CTL_ENABLE | DDI_INIT_DISPLAY_DETECTED);
++ vgpu_vreg_t(vgpu, DDI_BUF_CTL(PORT_A)) &=
++ ~DDI_BUF_IS_IDLE;
++ vgpu_vreg_t(vgpu, TRANS_DDI_FUNC_CTL(TRANSCODER_EDP)) |=
++ (TRANS_DDI_BPC_8 | TRANS_DDI_MODE_SELECT_DP_SST |
++ TRANS_DDI_FUNC_ENABLE);
+ vgpu_vreg_t(vgpu, GEN8_DE_PORT_ISR) |=
+ BXT_DE_PORT_HP_DDIA;
+ }
+
+ if (intel_vgpu_has_monitor_on_port(vgpu, PORT_B)) {
++ vgpu_vreg_t(vgpu, SFUSE_STRAP) |= SFUSE_STRAP_DDIB_DETECTED;
++ vgpu_vreg_t(vgpu, BXT_P_CR_GT_DISP_PWRON) |= BIT(0);
++ vgpu_vreg_t(vgpu, BXT_PORT_CL1CM_DW0(DPIO_PHY0)) |=
++ PHY_POWER_GOOD;
++ vgpu_vreg_t(vgpu, BXT_PHY_CTL_FAMILY(DPIO_PHY0)) |=
++ BIT(30);
++ vgpu_vreg_t(vgpu, BXT_PHY_CTL(PORT_B)) |=
++ BXT_PHY_LANE_ENABLED;
++ vgpu_vreg_t(vgpu, BXT_PHY_CTL(PORT_B)) &=
++ ~(BXT_PHY_CMNLANE_POWERDOWN_ACK |
++ BXT_PHY_LANE_POWERDOWN_ACK);
++ vgpu_vreg_t(vgpu, BXT_PORT_PLL_ENABLE(PORT_B)) |=
++ (PORT_PLL_POWER_STATE | PORT_PLL_POWER_ENABLE |
++ PORT_PLL_REF_SEL | PORT_PLL_LOCK |
++ PORT_PLL_ENABLE);
++ vgpu_vreg_t(vgpu, DDI_BUF_CTL(PORT_B)) |=
++ DDI_BUF_CTL_ENABLE;
++ vgpu_vreg_t(vgpu, DDI_BUF_CTL(PORT_B)) &=
++ ~DDI_BUF_IS_IDLE;
++ vgpu_vreg_t(vgpu, TRANS_DDI_FUNC_CTL(TRANSCODER_A)) |=
++ (TRANS_DDI_BPC_8 | TRANS_DDI_MODE_SELECT_DP_SST |
++ (PORT_B << TRANS_DDI_PORT_SHIFT) |
++ TRANS_DDI_FUNC_ENABLE);
+ vgpu_vreg_t(vgpu, GEN8_DE_PORT_ISR) |=
+ BXT_DE_PORT_HP_DDIB;
+ }
+
+ if (intel_vgpu_has_monitor_on_port(vgpu, PORT_C)) {
++ vgpu_vreg_t(vgpu, SFUSE_STRAP) |= SFUSE_STRAP_DDIC_DETECTED;
++ vgpu_vreg_t(vgpu, BXT_P_CR_GT_DISP_PWRON) |= BIT(0);
++ vgpu_vreg_t(vgpu, BXT_PORT_CL1CM_DW0(DPIO_PHY0)) |=
++ PHY_POWER_GOOD;
++ vgpu_vreg_t(vgpu, BXT_PHY_CTL_FAMILY(DPIO_PHY0)) |=
++ BIT(30);
++ vgpu_vreg_t(vgpu, BXT_PHY_CTL(PORT_C)) |=
++ BXT_PHY_LANE_ENABLED;
++ vgpu_vreg_t(vgpu, BXT_PHY_CTL(PORT_C)) &=
++ ~(BXT_PHY_CMNLANE_POWERDOWN_ACK |
++ BXT_PHY_LANE_POWERDOWN_ACK);
++ vgpu_vreg_t(vgpu, BXT_PORT_PLL_ENABLE(PORT_C)) |=
++ (PORT_PLL_POWER_STATE | PORT_PLL_POWER_ENABLE |
++ PORT_PLL_REF_SEL | PORT_PLL_LOCK |
++ PORT_PLL_ENABLE);
++ vgpu_vreg_t(vgpu, DDI_BUF_CTL(PORT_C)) |=
++ DDI_BUF_CTL_ENABLE;
++ vgpu_vreg_t(vgpu, DDI_BUF_CTL(PORT_C)) &=
++ ~DDI_BUF_IS_IDLE;
++ vgpu_vreg_t(vgpu, TRANS_DDI_FUNC_CTL(TRANSCODER_A)) |=
++ (TRANS_DDI_BPC_8 | TRANS_DDI_MODE_SELECT_DP_SST |
++ (PORT_B << TRANS_DDI_PORT_SHIFT) |
++ TRANS_DDI_FUNC_ENABLE);
+ vgpu_vreg_t(vgpu, GEN8_DE_PORT_ISR) |=
+ BXT_DE_PORT_HP_DDIC;
+ }
+@@ -519,6 +659,39 @@ void intel_vgpu_emulate_hotplug(struct i
+ vgpu_vreg_t(vgpu, PCH_PORT_HOTPLUG) |=
+ PORTD_HOTPLUG_STATUS_MASK;
+ intel_vgpu_trigger_virtual_event(vgpu, DP_D_HOTPLUG);
++ } else if (IS_BROXTON(dev_priv)) {
++ if (connected) {
++ if (intel_vgpu_has_monitor_on_port(vgpu, PORT_A)) {
++ vgpu_vreg_t(vgpu, GEN8_DE_PORT_ISR) |= BXT_DE_PORT_HP_DDIA;
++ }
++ if (intel_vgpu_has_monitor_on_port(vgpu, PORT_B)) {
++ vgpu_vreg_t(vgpu, SFUSE_STRAP) |=
++ SFUSE_STRAP_DDIB_DETECTED;
++ vgpu_vreg_t(vgpu, GEN8_DE_PORT_ISR) |= BXT_DE_PORT_HP_DDIB;
++ }
++ if (intel_vgpu_has_monitor_on_port(vgpu, PORT_C)) {
++ vgpu_vreg_t(vgpu, SFUSE_STRAP) |=
++ SFUSE_STRAP_DDIC_DETECTED;
++ vgpu_vreg_t(vgpu, GEN8_DE_PORT_ISR) |= BXT_DE_PORT_HP_DDIC;
++ }
++ } else {
++ if (intel_vgpu_has_monitor_on_port(vgpu, PORT_A)) {
++ vgpu_vreg_t(vgpu, GEN8_DE_PORT_ISR) &= ~BXT_DE_PORT_HP_DDIA;
++ }
++ if (intel_vgpu_has_monitor_on_port(vgpu, PORT_B)) {
++ vgpu_vreg_t(vgpu, SFUSE_STRAP) &=
++ ~SFUSE_STRAP_DDIB_DETECTED;
++ vgpu_vreg_t(vgpu, GEN8_DE_PORT_ISR) &= ~BXT_DE_PORT_HP_DDIB;
++ }
++ if (intel_vgpu_has_monitor_on_port(vgpu, PORT_C)) {
++ vgpu_vreg_t(vgpu, SFUSE_STRAP) &=
++ ~SFUSE_STRAP_DDIC_DETECTED;
++ vgpu_vreg_t(vgpu, GEN8_DE_PORT_ISR) &= ~BXT_DE_PORT_HP_DDIC;
++ }
++ }
++ vgpu_vreg_t(vgpu, PCH_PORT_HOTPLUG) |=
++ PORTB_HOTPLUG_STATUS_MASK;
++ intel_vgpu_trigger_virtual_event(vgpu, DP_B_HOTPLUG);
+ }
+ }
+
+--- a/drivers/gpu/drm/i915/gvt/mmio.c
++++ b/drivers/gpu/drm/i915/gvt/mmio.c
+@@ -280,6 +280,11 @@ void intel_vgpu_reset_mmio(struct intel_
+ vgpu_vreg_t(vgpu, BXT_PHY_CTL(PORT_C)) |=
+ BXT_PHY_CMNLANE_POWERDOWN_ACK |
+ BXT_PHY_LANE_POWERDOWN_ACK;
++ vgpu_vreg_t(vgpu, SKL_FUSE_STATUS) |=
++ SKL_FUSE_DOWNLOAD_STATUS |
++ SKL_FUSE_PG_DIST_STATUS(SKL_PG0) |
++ SKL_FUSE_PG_DIST_STATUS(SKL_PG1) |
++ SKL_FUSE_PG_DIST_STATUS(SKL_PG2);
+ }
+ } else {
+ #define GVT_GEN8_MMIO_RESET_OFFSET (0x44200)
--- /dev/null
+From 2b5715fc17386a6223490d5b8f08d031999b0c0b Mon Sep 17 00:00:00 2001
+From: Nicolas Morey-Chaisemartin <nmoreychaisemartin@suse.com>
+Date: Fri, 5 Feb 2021 09:14:28 +0100
+Subject: RDMA/srp: Fix support for unpopulated and unbalanced NUMA nodes
+
+From: Nicolas Morey-Chaisemartin <nmoreychaisemartin@suse.com>
+
+commit 2b5715fc17386a6223490d5b8f08d031999b0c0b upstream.
+
+The current code computes a number of channels per SRP target and spreads
+them equally across all online NUMA nodes. Each channel is then assigned
+a CPU within this node.
+
+In the case of unbalanced, or even unpopulated nodes, some channels do not
+get a CPU associated and thus do not get connected. This causes the SRP
+connection to fail.
+
+This patch solves the issue by rewriting channel computation and
+allocation:
+
+- Drop channel to node/CPU association as it had no real effect on
+ locality but added unnecessary complexity.
+
+- Tweak the number of channels allocated to reduce CPU contention when
+ possible:
+ - Up to one channel per CPU (instead of up to 4 by node)
+ - At least 4 channels per node, unless ch_count module parameter is
+ used.
+
+Link: https://lore.kernel.org/r/9cb4d9d3-30ad-2276-7eff-e85f7ddfb411@suse.com
+Signed-off-by: Nicolas Morey-Chaisemartin <nmoreychaisemartin@suse.com>
+Reviewed-by: Bart Van Assche <bvanassche@acm.org>
+Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
+Cc: Yi Zhang <yi.zhang@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/infiniband/ulp/srp/ib_srp.c | 116 ++++++++++++++----------------------
+ 1 file changed, 48 insertions(+), 68 deletions(-)
+
+--- a/drivers/infiniband/ulp/srp/ib_srp.c
++++ b/drivers/infiniband/ulp/srp/ib_srp.c
+@@ -3624,7 +3624,7 @@ static ssize_t srp_create_target(struct
+ struct srp_rdma_ch *ch;
+ struct srp_device *srp_dev = host->srp_dev;
+ struct ib_device *ibdev = srp_dev->dev;
+- int ret, node_idx, node, cpu, i;
++ int ret, i, ch_idx;
+ unsigned int max_sectors_per_mr, mr_per_cmd = 0;
+ bool multich = false;
+ uint32_t max_iu_len;
+@@ -3749,81 +3749,61 @@ static ssize_t srp_create_target(struct
+ goto out;
+
+ ret = -ENOMEM;
+- if (target->ch_count == 0)
++ if (target->ch_count == 0) {
+ target->ch_count =
+- max_t(unsigned int, num_online_nodes(),
+- min(ch_count ?:
+- min(4 * num_online_nodes(),
+- ibdev->num_comp_vectors),
+- num_online_cpus()));
++ min(ch_count ?:
++ max(4 * num_online_nodes(),
++ ibdev->num_comp_vectors),
++ num_online_cpus());
++ }
++
+ target->ch = kcalloc(target->ch_count, sizeof(*target->ch),
+ GFP_KERNEL);
+ if (!target->ch)
+ goto out;
+
+- node_idx = 0;
+- for_each_online_node(node) {
+- const int ch_start = (node_idx * target->ch_count /
+- num_online_nodes());
+- const int ch_end = ((node_idx + 1) * target->ch_count /
+- num_online_nodes());
+- const int cv_start = node_idx * ibdev->num_comp_vectors /
+- num_online_nodes();
+- const int cv_end = (node_idx + 1) * ibdev->num_comp_vectors /
+- num_online_nodes();
+- int cpu_idx = 0;
+-
+- for_each_online_cpu(cpu) {
+- if (cpu_to_node(cpu) != node)
+- continue;
+- if (ch_start + cpu_idx >= ch_end)
+- continue;
+- ch = &target->ch[ch_start + cpu_idx];
+- ch->target = target;
+- ch->comp_vector = cv_start == cv_end ? cv_start :
+- cv_start + cpu_idx % (cv_end - cv_start);
+- spin_lock_init(&ch->lock);
+- INIT_LIST_HEAD(&ch->free_tx);
+- ret = srp_new_cm_id(ch);
+- if (ret)
+- goto err_disconnect;
+-
+- ret = srp_create_ch_ib(ch);
+- if (ret)
+- goto err_disconnect;
+-
+- ret = srp_alloc_req_data(ch);
+- if (ret)
+- goto err_disconnect;
+-
+- ret = srp_connect_ch(ch, max_iu_len, multich);
+- if (ret) {
+- char dst[64];
+-
+- if (target->using_rdma_cm)
+- snprintf(dst, sizeof(dst), "%pIS",
+- &target->rdma_cm.dst);
+- else
+- snprintf(dst, sizeof(dst), "%pI6",
+- target->ib_cm.orig_dgid.raw);
+- shost_printk(KERN_ERR, target->scsi_host,
+- PFX "Connection %d/%d to %s failed\n",
+- ch_start + cpu_idx,
+- target->ch_count, dst);
+- if (node_idx == 0 && cpu_idx == 0) {
+- goto free_ch;
+- } else {
+- srp_free_ch_ib(target, ch);
+- srp_free_req_data(target, ch);
+- target->ch_count = ch - target->ch;
+- goto connected;
+- }
++ for (ch_idx = 0; ch_idx < target->ch_count; ++ch_idx) {
++ ch = &target->ch[ch_idx];
++ ch->target = target;
++ ch->comp_vector = ch_idx % ibdev->num_comp_vectors;
++ spin_lock_init(&ch->lock);
++ INIT_LIST_HEAD(&ch->free_tx);
++ ret = srp_new_cm_id(ch);
++ if (ret)
++ goto err_disconnect;
++
++ ret = srp_create_ch_ib(ch);
++ if (ret)
++ goto err_disconnect;
++
++ ret = srp_alloc_req_data(ch);
++ if (ret)
++ goto err_disconnect;
++
++ ret = srp_connect_ch(ch, max_iu_len, multich);
++ if (ret) {
++ char dst[64];
++
++ if (target->using_rdma_cm)
++ snprintf(dst, sizeof(dst), "%pIS",
++ &target->rdma_cm.dst);
++ else
++ snprintf(dst, sizeof(dst), "%pI6",
++ target->ib_cm.orig_dgid.raw);
++ shost_printk(KERN_ERR, target->scsi_host,
++ PFX "Connection %d/%d to %s failed\n",
++ ch_idx,
++ target->ch_count, dst);
++ if (ch_idx == 0) {
++ goto free_ch;
++ } else {
++ srp_free_ch_ib(target, ch);
++ srp_free_req_data(target, ch);
++ target->ch_count = ch - target->ch;
++ goto connected;
+ }
+-
+- multich = true;
+- cpu_idx++;
+ }
+- node_idx++;
++ multich = true;
+ }
+
+ connected: