]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
scsi: storvsc: Prefer returning channel with the same CPU as on the I/O issuing CPU
authorLong Li <longli@microsoft.com>
Thu, 2 Oct 2025 05:05:30 +0000 (22:05 -0700)
committerMartin K. Petersen <martin.petersen@oracle.com>
Tue, 7 Oct 2025 02:04:57 +0000 (22:04 -0400)
When selecting an outgoing channel for I/O, storvsc tries to select a
channel with a returning CPU that is not the same as issuing CPU. This
worked well in the past, however it doesn't work well when the Hyper-V
exposes a large number of channels (up to the number of all CPUs). Use a
different CPU for returning channel is not efficient on Hyper-V.

Change this behavior by preferring to the channel with the same CPU as
the current I/O issuing CPU whenever possible.

Tests have shown improvements in newer Hyper-V/Azure environment, and no
regression with older Hyper-V/Azure environments.

Tested-by: Raheel Abdul Faizy <rabdulfaizy@microsoft.com>
Signed-off-by: Long Li <longli@microsoft.com>
Message-Id: <1759381530-7414-1-git-send-email-longli@linux.microsoft.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
drivers/scsi/storvsc_drv.c

index 7449743930d2ed27c7e9bf301d221fbf978b9dcc..7fb57dca86e2a5cd2abe586645708d0be7576153 100644 (file)
@@ -1406,14 +1406,19 @@ static struct vmbus_channel *get_og_chn(struct storvsc_device *stor_device,
        }
 
        /*
-        * Our channel array is sparsley populated and we
+        * Our channel array could be sparsley populated and we
         * initiated I/O on a processor/hw-q that does not
         * currently have a designated channel. Fix this.
         * The strategy is simple:
-        * I. Ensure NUMA locality
-        * II. Distribute evenly (best effort)
+        * I. Prefer the channel associated with the current CPU
+        * II. Ensure NUMA locality
+        * III. Distribute evenly (best effort)
         */
 
+       /* Prefer the channel on the I/O issuing processor/hw-q */
+       if (cpumask_test_cpu(q_num, &stor_device->alloced_cpus))
+               return stor_device->stor_chns[q_num];
+
        node_mask = cpumask_of_node(cpu_to_node(q_num));
 
        num_channels = 0;
@@ -1469,59 +1474,48 @@ static int storvsc_do_io(struct hv_device *device,
        /* See storvsc_change_target_cpu(). */
        outgoing_channel = READ_ONCE(stor_device->stor_chns[q_num]);
        if (outgoing_channel != NULL) {
-               if (outgoing_channel->target_cpu == q_num) {
-                       /*
-                        * Ideally, we want to pick a different channel if
-                        * available on the same NUMA node.
-                        */
-                       node_mask = cpumask_of_node(cpu_to_node(q_num));
-                       for_each_cpu_wrap(tgt_cpu,
-                                &stor_device->alloced_cpus, q_num + 1) {
-                               if (!cpumask_test_cpu(tgt_cpu, node_mask))
-                                       continue;
-                               if (tgt_cpu == q_num)
-                                       continue;
-                               channel = READ_ONCE(
-                                       stor_device->stor_chns[tgt_cpu]);
-                               if (channel == NULL)
-                                       continue;
-                               if (hv_get_avail_to_write_percent(
-                                                       &channel->outbound)
-                                               > ring_avail_percent_lowater) {
-                                       outgoing_channel = channel;
-                                       goto found_channel;
-                               }
-                       }
+               if (hv_get_avail_to_write_percent(&outgoing_channel->outbound)
+                               > ring_avail_percent_lowater)
+                       goto found_channel;
 
-                       /*
-                        * All the other channels on the same NUMA node are
-                        * busy. Try to use the channel on the current CPU
-                        */
-                       if (hv_get_avail_to_write_percent(
-                                               &outgoing_channel->outbound)
-                                       > ring_avail_percent_lowater)
+               /*
+                * Channel is busy, try to find a channel on the same NUMA node
+                */
+               node_mask = cpumask_of_node(cpu_to_node(q_num));
+               for_each_cpu_wrap(tgt_cpu, &stor_device->alloced_cpus,
+                                 q_num + 1) {
+                       if (!cpumask_test_cpu(tgt_cpu, node_mask))
+                               continue;
+                       channel = READ_ONCE(stor_device->stor_chns[tgt_cpu]);
+                       if (!channel)
+                               continue;
+                       if (hv_get_avail_to_write_percent(&channel->outbound)
+                                       > ring_avail_percent_lowater) {
+                               outgoing_channel = channel;
                                goto found_channel;
+                       }
+               }
 
-                       /*
-                        * If we reach here, all the channels on the current
-                        * NUMA node are busy. Try to find a channel in
-                        * other NUMA nodes
-                        */
-                       for_each_cpu(tgt_cpu, &stor_device->alloced_cpus) {
-                               if (cpumask_test_cpu(tgt_cpu, node_mask))
-                                       continue;
-                               channel = READ_ONCE(
-                                       stor_device->stor_chns[tgt_cpu]);
-                               if (channel == NULL)
-                                       continue;
-                               if (hv_get_avail_to_write_percent(
-                                                       &channel->outbound)
-                                               > ring_avail_percent_lowater) {
-                                       outgoing_channel = channel;
-                                       goto found_channel;
-                               }
+               /*
+                * If we reach here, all the channels on the current
+                * NUMA node are busy. Try to find a channel in
+                * all NUMA nodes
+                */
+               for_each_cpu_wrap(tgt_cpu, &stor_device->alloced_cpus,
+                                 q_num + 1) {
+                       channel = READ_ONCE(stor_device->stor_chns[tgt_cpu]);
+                       if (!channel)
+                               continue;
+                       if (hv_get_avail_to_write_percent(&channel->outbound)
+                                       > ring_avail_percent_lowater) {
+                               outgoing_channel = channel;
+                               goto found_channel;
                        }
                }
+               /*
+                * If we reach here, all the channels are busy. Use the
+                * original channel found.
+                */
        } else {
                spin_lock_irqsave(&stor_device->lock, flags);
                outgoing_channel = stor_device->stor_chns[q_num];