From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Fri, 4 Sep 2020 11:57:39 +0000 (+0200)
Subject: 4.19-stable patches
X-Git-Tag: v5.4.63~3
X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=60c3670b755bfe3485a3bc960921dd0f381e11a7;p=thirdparty%2Fkernel%2Fstable-queue.git

4.19-stable patches

added patches:
	scsi-target-tcmu-fix-size-in-calls-to-tcmu_flush_dcache_range.patch
	scsi-target-tcmu-optimize-use-of-flush_dcache_page.patch
---

diff --git a/queue-4.19/scsi-target-tcmu-fix-size-in-calls-to-tcmu_flush_dcache_range.patch b/queue-4.19/scsi-target-tcmu-fix-size-in-calls-to-tcmu_flush_dcache_range.patch
new file mode 100644
index 00000000000..82ebfd16d36
--- /dev/null
+++ b/queue-4.19/scsi-target-tcmu-fix-size-in-calls-to-tcmu_flush_dcache_range.patch
@@ -0,0 +1,66 @@
+From 8c4e0f212398cdd1eb4310a5981d06a723cdd24f Mon Sep 17 00:00:00 2001
+From: Bodo Stroesser <bstroesser@ts.fujitsu.com>
+Date: Thu, 28 May 2020 21:31:08 +0200
+Subject: scsi: target: tcmu: Fix size in calls to tcmu_flush_dcache_range
+
+From: Bodo Stroesser <bstroesser@ts.fujitsu.com>
+
+commit 8c4e0f212398cdd1eb4310a5981d06a723cdd24f upstream.
+
+1) If remaining ring space before the end of the ring is smaller then the
+   next cmd to write, tcmu writes a padding entry which fills the remaining
+   space at the end of the ring.
+
+   Then tcmu calls tcmu_flush_dcache_range() with the size of struct
+   tcmu_cmd_entry as data length to flush.  If the space filled by the
+   padding was smaller then tcmu_cmd_entry, tcmu_flush_dcache_range() is
+   called for an address range reaching behind the end of the vmalloc'ed
+   ring.
+
+   tcmu_flush_dcache_range() in a loop calls
+   flush_dcache_page(virt_to_page(start)); for every page being part of the
+   range. On x86 the line is optimized out by the compiler, as
+   flush_dcache_page() is empty on x86.
+
+   But I assume the above can cause trouble on other architectures that
+   really have a flush_dcache_page().  For paddings only the header part of
+   an entry is relevant due to alignment rules the header always fits in
+   the remaining space, if padding is needed.  So tcmu_flush_dcache_range()
+   can safely be called with sizeof(entry->hdr) as the length here.
+
+2) After it has written a command to cmd ring, tcmu calls
+   tcmu_flush_dcache_range() using the size of a struct tcmu_cmd_entry as
+   data length to flush.  But if a command needs many iovecs, the real size
+   of the command may be bigger then tcmu_cmd_entry, so a part of the
+   written command is not flushed then.
+
+Link: https://lore.kernel.org/r/20200528193108.9085-1-bstroesser@ts.fujitsu.com
+Acked-by: Mike Christie <michael.christie@oracle.com>
+Signed-off-by: Bodo Stroesser <bstroesser@ts.fujitsu.com>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/target/target_core_user.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/target/target_core_user.c
++++ b/drivers/target/target_core_user.c
+@@ -1018,7 +1018,7 @@ static int queue_cmd_ring(struct tcmu_cm
+ 		entry->hdr.cmd_id = 0; /* not used for PAD */
+ 		entry->hdr.kflags = 0;
+ 		entry->hdr.uflags = 0;
+-		tcmu_flush_dcache_range(entry, sizeof(*entry));
++		tcmu_flush_dcache_range(entry, sizeof(entry->hdr));
+ 
+ 		UPDATE_HEAD(mb->cmd_head, pad_size, udev->cmdr_size);
+ 		tcmu_flush_dcache_range(mb, sizeof(*mb));
+@@ -1083,7 +1083,7 @@ static int queue_cmd_ring(struct tcmu_cm
+ 	cdb_off = CMDR_OFF + cmd_head + base_command_size;
+ 	memcpy((void *) mb + cdb_off, se_cmd->t_task_cdb, scsi_command_size(se_cmd->t_task_cdb));
+ 	entry->req.cdb_off = cdb_off;
+-	tcmu_flush_dcache_range(entry, sizeof(*entry));
++	tcmu_flush_dcache_range(entry, command_size);
+ 
+ 	UPDATE_HEAD(mb->cmd_head, command_size, udev->cmdr_size);
+ 	tcmu_flush_dcache_range(mb, sizeof(*mb));
diff --git a/queue-4.19/scsi-target-tcmu-optimize-use-of-flush_dcache_page.patch b/queue-4.19/scsi-target-tcmu-optimize-use-of-flush_dcache_page.patch
new file mode 100644
index 00000000000..c0c34072323
--- /dev/null
+++ b/queue-4.19/scsi-target-tcmu-optimize-use-of-flush_dcache_page.patch
@@ -0,0 +1,94 @@
+From 3c58f737231e2c8cbf543a09d84d8c8e80e05e43 Mon Sep 17 00:00:00 2001
+From: Bodo Stroesser <bstroesser@ts.fujitsu.com>
+Date: Thu, 18 Jun 2020 15:16:31 +0200
+Subject: scsi: target: tcmu: Optimize use of flush_dcache_page
+
+From: Bodo Stroesser <bstroesser@ts.fujitsu.com>
+
+commit 3c58f737231e2c8cbf543a09d84d8c8e80e05e43 upstream.
+
+(scatter|gather)_data_area() need to flush dcache after writing data to or
+before reading data from a page in uio data area.  The two routines are
+able to handle data transfer to/from such a page in fragments and flush the
+cache after each fragment was copied by calling the wrapper
+tcmu_flush_dcache_range().
+
+That means:
+
+1) flush_dcache_page() can be called multiple times for the same page.
+
+2) Calling flush_dcache_page() indirectly using the wrapper does not make
+   sense, because each call of the wrapper is for one single page only and
+   the calling routine already has the correct page pointer.
+
+Change (scatter|gather)_data_area() such that, instead of calling
+tcmu_flush_dcache_range() before/after each memcpy, it now calls
+flush_dcache_page() before unmapping a page (when writing is complete for
+that page) or after mapping a page (when starting to read the page).
+
+After this change only calls to tcmu_flush_dcache_range() for addresses in
+vmalloc'ed command ring are left over.
+
+The patch was tested on ARM with kernel 4.19.118 and 5.7.2
+
+Link: https://lore.kernel.org/r/20200618131632.32748-2-bstroesser@ts.fujitsu.com
+Tested-by: JiangYu <lnsyyj@hotmail.com>
+Tested-by: Daniel Meyerholt <dxm523@gmail.com>
+Acked-by: Mike Christie <michael.christie@oracle.com>
+Signed-off-by: Bodo Stroesser <bstroesser@ts.fujitsu.com>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/target/target_core_user.c |   11 +++++++----
+ 1 file changed, 7 insertions(+), 4 deletions(-)
+
+--- a/drivers/target/target_core_user.c
++++ b/drivers/target/target_core_user.c
+@@ -687,8 +687,10 @@ static void scatter_data_area(struct tcm
+ 		from = kmap_atomic(sg_page(sg)) + sg->offset;
+ 		while (sg_remaining > 0) {
+ 			if (block_remaining == 0) {
+-				if (to)
++				if (to) {
++					flush_dcache_page(page);
+ 					kunmap_atomic(to);
++				}
+ 
+ 				block_remaining = DATA_BLOCK_SIZE;
+ 				dbi = tcmu_cmd_get_dbi(tcmu_cmd);
+@@ -733,7 +735,6 @@ static void scatter_data_area(struct tcm
+ 				memcpy(to + offset,
+ 				       from + sg->length - sg_remaining,
+ 				       copy_bytes);
+-				tcmu_flush_dcache_range(to, copy_bytes);
+ 			}
+ 
+ 			sg_remaining -= copy_bytes;
+@@ -742,8 +743,10 @@ static void scatter_data_area(struct tcm
+ 		kunmap_atomic(from - sg->offset);
+ 	}
+ 
+-	if (to)
++	if (to) {
++		flush_dcache_page(page);
+ 		kunmap_atomic(to);
++	}
+ }
+ 
+ static void gather_data_area(struct tcmu_dev *udev, struct tcmu_cmd *cmd,
+@@ -789,13 +792,13 @@ static void gather_data_area(struct tcmu
+ 				dbi = tcmu_cmd_get_dbi(cmd);
+ 				page = tcmu_get_block_page(udev, dbi);
+ 				from = kmap_atomic(page);
++				flush_dcache_page(page);
+ 			}
+ 			copy_bytes = min_t(size_t, sg_remaining,
+ 					block_remaining);
+ 			if (read_len < copy_bytes)
+ 				copy_bytes = read_len;
+ 			offset = DATA_BLOCK_SIZE - block_remaining;
+-			tcmu_flush_dcache_range(from, copy_bytes);
+ 			memcpy(to + sg->length - sg_remaining, from + offset,
+ 					copy_bytes);
+ 
diff --git a/queue-4.19/series b/queue-4.19/series
index 079a4e65165..515c06d3d52 100644
--- a/queue-4.19/series
+++ b/queue-4.19/series
@@ -1,3 +1,5 @@
 hid-core-correctly-handle-reportsize-being-zero.patch
 hid-core-sanitize-event-code-and-type-when-mapping-input.patch
 perf-record-stat-explicitly-call-out-event-modifiers-in-the-documentation.patch
+scsi-target-tcmu-fix-size-in-calls-to-tcmu_flush_dcache_range.patch
+scsi-target-tcmu-optimize-use-of-flush_dcache_page.patch