--- /dev/null
+From 2f31115e940c4afd49b99c33123534e2ac924ffb Mon Sep 17 00:00:00 2001
+From: Ming Lei <ming.lei@redhat.com>
+Date: Tue, 13 Mar 2018 17:42:41 +0800
+Subject: scsi: core: introduce force_blk_mq
+
+From: Ming Lei <ming.lei@redhat.com>
+
+commit 2f31115e940c4afd49b99c33123534e2ac924ffb upstream.
+
+This patch introduces 'force_blk_mq' to the scsi_host_template so that
+drivers that have no desire to support the legacy I/O path can signal
+blk-mq only support.
+
+[mkp: commit desc]
+
+Cc: Omar Sandoval <osandov@fb.com>,
+Cc: "Martin K. Petersen" <martin.petersen@oracle.com>,
+Cc: James Bottomley <james.bottomley@hansenpartnership.com>,
+Cc: Christoph Hellwig <hch@lst.de>,
+Cc: Don Brace <don.brace@microsemi.com>
+Cc: Kashyap Desai <kashyap.desai@broadcom.com>
+Cc: Mike Snitzer <snitzer@redhat.com>
+Cc: Laurence Oberman <loberman@redhat.com>
+Signed-off-by: Ming Lei <ming.lei@redhat.com>
+Reviewed-by: Hannes Reinecke <hare@suse.de>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/scsi/hosts.c | 1 +
+ include/scsi/scsi_host.h | 3 +++
+ 2 files changed, 4 insertions(+)
+
+--- a/drivers/scsi/hosts.c
++++ b/drivers/scsi/hosts.c
+@@ -474,6 +474,7 @@ struct Scsi_Host *scsi_host_alloc(struct
+ shost->dma_boundary = 0xffffffff;
+
+ shost->use_blk_mq = scsi_use_blk_mq;
++ shost->use_blk_mq = scsi_use_blk_mq || shost->hostt->force_blk_mq;
+
+ device_initialize(&shost->shost_gendev);
+ dev_set_name(&shost->shost_gendev, "host%d", shost->host_no);
+--- a/include/scsi/scsi_host.h
++++ b/include/scsi/scsi_host.h
+@@ -452,6 +452,9 @@ struct scsi_host_template {
+ /* True if the controller does not support WRITE SAME */
+ unsigned no_write_same:1;
+
++ /* True if the low-level driver supports blk-mq only */
++ unsigned force_blk_mq:1;
++
+ /*
+ * Countdown for host blocking with no commands outstanding.
+ */
--- /dev/null
+From 8b834bff1b73dce46f4e9f5e84af6f73fed8b0ef Mon Sep 17 00:00:00 2001
+From: Ming Lei <ming.lei@redhat.com>
+Date: Tue, 13 Mar 2018 17:42:39 +0800
+Subject: scsi: hpsa: fix selection of reply queue
+
+From: Ming Lei <ming.lei@redhat.com>
+
+commit 8b834bff1b73dce46f4e9f5e84af6f73fed8b0ef upstream.
+
+Since commit 84676c1f21e8 ("genirq/affinity: assign vectors to all
+possible CPUs") we could end up with an MSI-X vector that did not have
+any online CPUs mapped. This would lead to I/O hangs since there was no
+CPU to receive the completion.
+
+Retrieve IRQ affinity information using pci_irq_get_affinity() and use
+this mapping to choose a reply queue.
+
+[mkp: tweaked commit desc]
+
+Cc: Hannes Reinecke <hare@suse.de>
+Cc: "Martin K. Petersen" <martin.petersen@oracle.com>,
+Cc: James Bottomley <james.bottomley@hansenpartnership.com>,
+Cc: Christoph Hellwig <hch@lst.de>,
+Cc: Don Brace <don.brace@microsemi.com>
+Cc: Kashyap Desai <kashyap.desai@broadcom.com>
+Cc: Laurence Oberman <loberman@redhat.com>
+Cc: Meelis Roos <mroos@linux.ee>
+Cc: Artem Bityutskiy <artem.bityutskiy@intel.com>
+Cc: Mike Snitzer <snitzer@redhat.com>
+Fixes: 84676c1f21e8 ("genirq/affinity: assign vectors to all possible CPUs")
+Signed-off-by: Ming Lei <ming.lei@redhat.com>
+Tested-by: Laurence Oberman <loberman@redhat.com>
+Tested-by: Don Brace <don.brace@microsemi.com>
+Tested-by: Artem Bityutskiy <artem.bityutskiy@intel.com>
+Acked-by: Don Brace <don.brace@microsemi.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/scsi/hpsa.c | 73 ++++++++++++++++++++++++++++++++++++++--------------
+ drivers/scsi/hpsa.h | 1
+ 2 files changed, 55 insertions(+), 19 deletions(-)
+
+--- a/drivers/scsi/hpsa.c
++++ b/drivers/scsi/hpsa.c
+@@ -1040,11 +1040,7 @@ static void set_performant_mode(struct c
+ c->busaddr |= 1 | (h->blockFetchTable[c->Header.SGList] << 1);
+ if (unlikely(!h->msix_vectors))
+ return;
+- if (likely(reply_queue == DEFAULT_REPLY_QUEUE))
+- c->Header.ReplyQueue =
+- raw_smp_processor_id() % h->nreply_queues;
+- else
+- c->Header.ReplyQueue = reply_queue % h->nreply_queues;
++ c->Header.ReplyQueue = reply_queue;
+ }
+ }
+
+@@ -1058,10 +1054,7 @@ static void set_ioaccel1_performant_mode
+ * Tell the controller to post the reply to the queue for this
+ * processor. This seems to give the best I/O throughput.
+ */
+- if (likely(reply_queue == DEFAULT_REPLY_QUEUE))
+- cp->ReplyQueue = smp_processor_id() % h->nreply_queues;
+- else
+- cp->ReplyQueue = reply_queue % h->nreply_queues;
++ cp->ReplyQueue = reply_queue;
+ /*
+ * Set the bits in the address sent down to include:
+ * - performant mode bit (bit 0)
+@@ -1082,10 +1075,7 @@ static void set_ioaccel2_tmf_performant_
+ /* Tell the controller to post the reply to the queue for this
+ * processor. This seems to give the best I/O throughput.
+ */
+- if (likely(reply_queue == DEFAULT_REPLY_QUEUE))
+- cp->reply_queue = smp_processor_id() % h->nreply_queues;
+- else
+- cp->reply_queue = reply_queue % h->nreply_queues;
++ cp->reply_queue = reply_queue;
+ /* Set the bits in the address sent down to include:
+ * - performant mode bit not used in ioaccel mode 2
+ * - pull count (bits 0-3)
+@@ -1104,10 +1094,7 @@ static void set_ioaccel2_performant_mode
+ * Tell the controller to post the reply to the queue for this
+ * processor. This seems to give the best I/O throughput.
+ */
+- if (likely(reply_queue == DEFAULT_REPLY_QUEUE))
+- cp->reply_queue = smp_processor_id() % h->nreply_queues;
+- else
+- cp->reply_queue = reply_queue % h->nreply_queues;
++ cp->reply_queue = reply_queue;
+ /*
+ * Set the bits in the address sent down to include:
+ * - performant mode bit not used in ioaccel mode 2
+@@ -1152,6 +1139,8 @@ static void __enqueue_cmd_and_start_io(s
+ {
+ dial_down_lockup_detection_during_fw_flash(h, c);
+ atomic_inc(&h->commands_outstanding);
++
++ reply_queue = h->reply_map[raw_smp_processor_id()];
+ switch (c->cmd_type) {
+ case CMD_IOACCEL1:
+ set_ioaccel1_performant_mode(h, c, reply_queue);
+@@ -7244,6 +7233,26 @@ static void hpsa_disable_interrupt_mode(
+ h->msix_vectors = 0;
+ }
+
++static void hpsa_setup_reply_map(struct ctlr_info *h)
++{
++ const struct cpumask *mask;
++ unsigned int queue, cpu;
++
++ for (queue = 0; queue < h->msix_vectors; queue++) {
++ mask = pci_irq_get_affinity(h->pdev, queue);
++ if (!mask)
++ goto fallback;
++
++ for_each_cpu(cpu, mask)
++ h->reply_map[cpu] = queue;
++ }
++ return;
++
++fallback:
++ for_each_possible_cpu(cpu)
++ h->reply_map[cpu] = 0;
++}
++
+ /* If MSI/MSI-X is supported by the kernel we will try to enable it on
+ * controllers that are capable. If not, we use legacy INTx mode.
+ */
+@@ -7639,6 +7648,10 @@ static int hpsa_pci_init(struct ctlr_inf
+ err = hpsa_interrupt_mode(h);
+ if (err)
+ goto clean1;
++
++ /* setup mapping between CPU and reply queue */
++ hpsa_setup_reply_map(h);
++
+ err = hpsa_pci_find_memory_BAR(h->pdev, &h->paddr);
+ if (err)
+ goto clean2; /* intmode+region, pci */
+@@ -8284,6 +8297,28 @@ static struct workqueue_struct *hpsa_cre
+ return wq;
+ }
+
++static void hpda_free_ctlr_info(struct ctlr_info *h)
++{
++ kfree(h->reply_map);
++ kfree(h);
++}
++
++static struct ctlr_info *hpda_alloc_ctlr_info(void)
++{
++ struct ctlr_info *h;
++
++ h = kzalloc(sizeof(*h), GFP_KERNEL);
++ if (!h)
++ return NULL;
++
++ h->reply_map = kzalloc(sizeof(*h->reply_map) * nr_cpu_ids, GFP_KERNEL);
++ if (!h->reply_map) {
++ kfree(h);
++ return NULL;
++ }
++ return h;
++}
++
+ static int hpsa_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
+ {
+ int dac, rc;
+@@ -8321,7 +8356,7 @@ reinit_after_soft_reset:
+ * the driver. See comments in hpsa.h for more info.
+ */
+ BUILD_BUG_ON(sizeof(struct CommandList) % COMMANDLIST_ALIGNMENT);
+- h = kzalloc(sizeof(*h), GFP_KERNEL);
++ h = hpda_alloc_ctlr_info();
+ if (!h) {
+ dev_err(&pdev->dev, "Failed to allocate controller head\n");
+ return -ENOMEM;
+@@ -8726,7 +8761,7 @@ static void hpsa_remove_one(struct pci_d
+ h->lockup_detected = NULL; /* init_one 2 */
+ /* (void) pci_disable_pcie_error_reporting(pdev); */ /* init_one 1 */
+
+- kfree(h); /* init_one 1 */
++ hpda_free_ctlr_info(h); /* init_one 1 */
+ }
+
+ static int hpsa_suspend(__attribute__((unused)) struct pci_dev *pdev,
+--- a/drivers/scsi/hpsa.h
++++ b/drivers/scsi/hpsa.h
+@@ -158,6 +158,7 @@ struct bmic_controller_parameters {
+ #pragma pack()
+
+ struct ctlr_info {
++ unsigned int *reply_map;
+ int ctlr;
+ char devname[8];
+ char *product_name;
--- /dev/null
+From b5b6e8c8d3b4cbeb447a0f10c7d5de3caa573299 Mon Sep 17 00:00:00 2001
+From: Ming Lei <ming.lei@redhat.com>
+Date: Tue, 13 Mar 2018 17:42:42 +0800
+Subject: scsi: virtio_scsi: fix IO hang caused by automatic irq vector affinity
+
+From: Ming Lei <ming.lei@redhat.com>
+
+commit b5b6e8c8d3b4cbeb447a0f10c7d5de3caa573299 upstream.
+
+Since commit 84676c1f21e8ff5 ("genirq/affinity: assign vectors to all
+possible CPUs") it is possible to end up in a scenario where only
+offline CPUs are mapped to an interrupt vector.
+
+This is only an issue for the legacy I/O path since with blk-mq/scsi-mq
+an I/O can't be submitted to a hardware queue if the queue isn't mapped
+to an online CPU.
+
+Fix this issue by forcing virtio-scsi to use blk-mq.
+
+[mkp: commit desc]
+
+Cc: Omar Sandoval <osandov@fb.com>,
+Cc: "Martin K. Petersen" <martin.petersen@oracle.com>,
+Cc: James Bottomley <james.bottomley@hansenpartnership.com>,
+Cc: Christoph Hellwig <hch@lst.de>,
+Cc: Don Brace <don.brace@microsemi.com>
+Cc: Kashyap Desai <kashyap.desai@broadcom.com>
+Cc: Paolo Bonzini <pbonzini@redhat.com>
+Cc: Mike Snitzer <snitzer@redhat.com>
+Cc: Laurence Oberman <loberman@redhat.com>
+Fixes: 84676c1f21e8 ("genirq/affinity: assign vectors to all possible CPUs")
+Signed-off-by: Ming Lei <ming.lei@redhat.com>
+Reviewed-by: Hannes Reinecke <hare@suse.de>
+Acked-by: Paolo Bonzini <pbonzini@redhat.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/scsi/virtio_scsi.c | 59 ++-------------------------------------------
+ 1 file changed, 3 insertions(+), 56 deletions(-)
+
+--- a/drivers/scsi/virtio_scsi.c
++++ b/drivers/scsi/virtio_scsi.c
+@@ -91,9 +91,6 @@ struct virtio_scsi_vq {
+ struct virtio_scsi_target_state {
+ seqcount_t tgt_seq;
+
+- /* Count of outstanding requests. */
+- atomic_t reqs;
+-
+ /* Currently active virtqueue for requests sent to this target. */
+ struct virtio_scsi_vq *req_vq;
+ };
+@@ -152,8 +149,6 @@ static void virtscsi_complete_cmd(struct
+ struct virtio_scsi_cmd *cmd = buf;
+ struct scsi_cmnd *sc = cmd->sc;
+ struct virtio_scsi_cmd_resp *resp = &cmd->resp.cmd;
+- struct virtio_scsi_target_state *tgt =
+- scsi_target(sc->device)->hostdata;
+
+ dev_dbg(&sc->device->sdev_gendev,
+ "cmd %p response %u status %#02x sense_len %u\n",
+@@ -210,8 +205,6 @@ static void virtscsi_complete_cmd(struct
+ }
+
+ sc->scsi_done(sc);
+-
+- atomic_dec(&tgt->reqs);
+ }
+
+ static void virtscsi_vq_done(struct virtio_scsi *vscsi,
+@@ -580,10 +573,7 @@ static int virtscsi_queuecommand_single(
+ struct scsi_cmnd *sc)
+ {
+ struct virtio_scsi *vscsi = shost_priv(sh);
+- struct virtio_scsi_target_state *tgt =
+- scsi_target(sc->device)->hostdata;
+
+- atomic_inc(&tgt->reqs);
+ return virtscsi_queuecommand(vscsi, &vscsi->req_vqs[0], sc);
+ }
+
+@@ -596,55 +586,11 @@ static struct virtio_scsi_vq *virtscsi_p
+ return &vscsi->req_vqs[hwq];
+ }
+
+-static struct virtio_scsi_vq *virtscsi_pick_vq(struct virtio_scsi *vscsi,
+- struct virtio_scsi_target_state *tgt)
+-{
+- struct virtio_scsi_vq *vq;
+- unsigned long flags;
+- u32 queue_num;
+-
+- local_irq_save(flags);
+- if (atomic_inc_return(&tgt->reqs) > 1) {
+- unsigned long seq;
+-
+- do {
+- seq = read_seqcount_begin(&tgt->tgt_seq);
+- vq = tgt->req_vq;
+- } while (read_seqcount_retry(&tgt->tgt_seq, seq));
+- } else {
+- /* no writes can be concurrent because of atomic_t */
+- write_seqcount_begin(&tgt->tgt_seq);
+-
+- /* keep previous req_vq if a reader just arrived */
+- if (unlikely(atomic_read(&tgt->reqs) > 1)) {
+- vq = tgt->req_vq;
+- goto unlock;
+- }
+-
+- queue_num = smp_processor_id();
+- while (unlikely(queue_num >= vscsi->num_queues))
+- queue_num -= vscsi->num_queues;
+- tgt->req_vq = vq = &vscsi->req_vqs[queue_num];
+- unlock:
+- write_seqcount_end(&tgt->tgt_seq);
+- }
+- local_irq_restore(flags);
+-
+- return vq;
+-}
+-
+ static int virtscsi_queuecommand_multi(struct Scsi_Host *sh,
+ struct scsi_cmnd *sc)
+ {
+ struct virtio_scsi *vscsi = shost_priv(sh);
+- struct virtio_scsi_target_state *tgt =
+- scsi_target(sc->device)->hostdata;
+- struct virtio_scsi_vq *req_vq;
+-
+- if (shost_use_blk_mq(sh))
+- req_vq = virtscsi_pick_vq_mq(vscsi, sc);
+- else
+- req_vq = virtscsi_pick_vq(vscsi, tgt);
++ struct virtio_scsi_vq *req_vq = virtscsi_pick_vq_mq(vscsi, sc);
+
+ return virtscsi_queuecommand(vscsi, req_vq, sc);
+ }
+@@ -775,7 +721,6 @@ static int virtscsi_target_alloc(struct
+ return -ENOMEM;
+
+ seqcount_init(&tgt->tgt_seq);
+- atomic_set(&tgt->reqs, 0);
+ tgt->req_vq = &vscsi->req_vqs[0];
+
+ starget->hostdata = tgt;
+@@ -823,6 +768,7 @@ static struct scsi_host_template virtscs
+ .target_alloc = virtscsi_target_alloc,
+ .target_destroy = virtscsi_target_destroy,
+ .track_queue_depth = 1,
++ .force_blk_mq = 1,
+ };
+
+ static struct scsi_host_template virtscsi_host_template_multi = {
+@@ -844,6 +790,7 @@ static struct scsi_host_template virtscs
+ .target_destroy = virtscsi_target_destroy,
+ .map_queues = virtscsi_map_queues,
+ .track_queue_depth = 1,
++ .force_blk_mq = 1,
+ };
+
+ #define virtscsi_config_get(vdev, fld) \
parisc-enable-config_mlongcalls-by-default.patch
parisc-define-mb-and-add-memory-barriers-to-assembler-unlock-sequences.patch
+scsi-hpsa-fix-selection-of-reply-queue.patch
+scsi-core-introduce-force_blk_mq.patch
+scsi-virtio_scsi-fix-io-hang-caused-by-automatic-irq-vector-affinity.patch