From: Greg Kroah-Hartman Date: Fri, 4 Sep 2020 11:57:39 +0000 (+0200) Subject: 4.19-stable patches X-Git-Tag: v5.4.63~3 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=60c3670b755bfe3485a3bc960921dd0f381e11a7;p=thirdparty%2Fkernel%2Fstable-queue.git 4.19-stable patches added patches: scsi-target-tcmu-fix-size-in-calls-to-tcmu_flush_dcache_range.patch scsi-target-tcmu-optimize-use-of-flush_dcache_page.patch --- diff --git a/queue-4.19/scsi-target-tcmu-fix-size-in-calls-to-tcmu_flush_dcache_range.patch b/queue-4.19/scsi-target-tcmu-fix-size-in-calls-to-tcmu_flush_dcache_range.patch new file mode 100644 index 00000000000..82ebfd16d36 --- /dev/null +++ b/queue-4.19/scsi-target-tcmu-fix-size-in-calls-to-tcmu_flush_dcache_range.patch @@ -0,0 +1,66 @@ +From 8c4e0f212398cdd1eb4310a5981d06a723cdd24f Mon Sep 17 00:00:00 2001 +From: Bodo Stroesser +Date: Thu, 28 May 2020 21:31:08 +0200 +Subject: scsi: target: tcmu: Fix size in calls to tcmu_flush_dcache_range + +From: Bodo Stroesser + +commit 8c4e0f212398cdd1eb4310a5981d06a723cdd24f upstream. + +1) If remaining ring space before the end of the ring is smaller then the + next cmd to write, tcmu writes a padding entry which fills the remaining + space at the end of the ring. + + Then tcmu calls tcmu_flush_dcache_range() with the size of struct + tcmu_cmd_entry as data length to flush. If the space filled by the + padding was smaller then tcmu_cmd_entry, tcmu_flush_dcache_range() is + called for an address range reaching behind the end of the vmalloc'ed + ring. + + tcmu_flush_dcache_range() in a loop calls + flush_dcache_page(virt_to_page(start)); for every page being part of the + range. On x86 the line is optimized out by the compiler, as + flush_dcache_page() is empty on x86. + + But I assume the above can cause trouble on other architectures that + really have a flush_dcache_page(). For paddings only the header part of + an entry is relevant due to alignment rules the header always fits in + the remaining space, if padding is needed. So tcmu_flush_dcache_range() + can safely be called with sizeof(entry->hdr) as the length here. + +2) After it has written a command to cmd ring, tcmu calls + tcmu_flush_dcache_range() using the size of a struct tcmu_cmd_entry as + data length to flush. But if a command needs many iovecs, the real size + of the command may be bigger then tcmu_cmd_entry, so a part of the + written command is not flushed then. + +Link: https://lore.kernel.org/r/20200528193108.9085-1-bstroesser@ts.fujitsu.com +Acked-by: Mike Christie +Signed-off-by: Bodo Stroesser +Signed-off-by: Martin K. Petersen +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/target/target_core_user.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/drivers/target/target_core_user.c ++++ b/drivers/target/target_core_user.c +@@ -1018,7 +1018,7 @@ static int queue_cmd_ring(struct tcmu_cm + entry->hdr.cmd_id = 0; /* not used for PAD */ + entry->hdr.kflags = 0; + entry->hdr.uflags = 0; +- tcmu_flush_dcache_range(entry, sizeof(*entry)); ++ tcmu_flush_dcache_range(entry, sizeof(entry->hdr)); + + UPDATE_HEAD(mb->cmd_head, pad_size, udev->cmdr_size); + tcmu_flush_dcache_range(mb, sizeof(*mb)); +@@ -1083,7 +1083,7 @@ static int queue_cmd_ring(struct tcmu_cm + cdb_off = CMDR_OFF + cmd_head + base_command_size; + memcpy((void *) mb + cdb_off, se_cmd->t_task_cdb, scsi_command_size(se_cmd->t_task_cdb)); + entry->req.cdb_off = cdb_off; +- tcmu_flush_dcache_range(entry, sizeof(*entry)); ++ tcmu_flush_dcache_range(entry, command_size); + + UPDATE_HEAD(mb->cmd_head, command_size, udev->cmdr_size); + tcmu_flush_dcache_range(mb, sizeof(*mb)); diff --git a/queue-4.19/scsi-target-tcmu-optimize-use-of-flush_dcache_page.patch b/queue-4.19/scsi-target-tcmu-optimize-use-of-flush_dcache_page.patch new file mode 100644 index 00000000000..c0c34072323 --- /dev/null +++ b/queue-4.19/scsi-target-tcmu-optimize-use-of-flush_dcache_page.patch @@ -0,0 +1,94 @@ +From 3c58f737231e2c8cbf543a09d84d8c8e80e05e43 Mon Sep 17 00:00:00 2001 +From: Bodo Stroesser +Date: Thu, 18 Jun 2020 15:16:31 +0200 +Subject: scsi: target: tcmu: Optimize use of flush_dcache_page + +From: Bodo Stroesser + +commit 3c58f737231e2c8cbf543a09d84d8c8e80e05e43 upstream. + +(scatter|gather)_data_area() need to flush dcache after writing data to or +before reading data from a page in uio data area. The two routines are +able to handle data transfer to/from such a page in fragments and flush the +cache after each fragment was copied by calling the wrapper +tcmu_flush_dcache_range(). + +That means: + +1) flush_dcache_page() can be called multiple times for the same page. + +2) Calling flush_dcache_page() indirectly using the wrapper does not make + sense, because each call of the wrapper is for one single page only and + the calling routine already has the correct page pointer. + +Change (scatter|gather)_data_area() such that, instead of calling +tcmu_flush_dcache_range() before/after each memcpy, it now calls +flush_dcache_page() before unmapping a page (when writing is complete for +that page) or after mapping a page (when starting to read the page). + +After this change only calls to tcmu_flush_dcache_range() for addresses in +vmalloc'ed command ring are left over. + +The patch was tested on ARM with kernel 4.19.118 and 5.7.2 + +Link: https://lore.kernel.org/r/20200618131632.32748-2-bstroesser@ts.fujitsu.com +Tested-by: JiangYu +Tested-by: Daniel Meyerholt +Acked-by: Mike Christie +Signed-off-by: Bodo Stroesser +Signed-off-by: Martin K. Petersen +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/target/target_core_user.c | 11 +++++++---- + 1 file changed, 7 insertions(+), 4 deletions(-) + +--- a/drivers/target/target_core_user.c ++++ b/drivers/target/target_core_user.c +@@ -687,8 +687,10 @@ static void scatter_data_area(struct tcm + from = kmap_atomic(sg_page(sg)) + sg->offset; + while (sg_remaining > 0) { + if (block_remaining == 0) { +- if (to) ++ if (to) { ++ flush_dcache_page(page); + kunmap_atomic(to); ++ } + + block_remaining = DATA_BLOCK_SIZE; + dbi = tcmu_cmd_get_dbi(tcmu_cmd); +@@ -733,7 +735,6 @@ static void scatter_data_area(struct tcm + memcpy(to + offset, + from + sg->length - sg_remaining, + copy_bytes); +- tcmu_flush_dcache_range(to, copy_bytes); + } + + sg_remaining -= copy_bytes; +@@ -742,8 +743,10 @@ static void scatter_data_area(struct tcm + kunmap_atomic(from - sg->offset); + } + +- if (to) ++ if (to) { ++ flush_dcache_page(page); + kunmap_atomic(to); ++ } + } + + static void gather_data_area(struct tcmu_dev *udev, struct tcmu_cmd *cmd, +@@ -789,13 +792,13 @@ static void gather_data_area(struct tcmu + dbi = tcmu_cmd_get_dbi(cmd); + page = tcmu_get_block_page(udev, dbi); + from = kmap_atomic(page); ++ flush_dcache_page(page); + } + copy_bytes = min_t(size_t, sg_remaining, + block_remaining); + if (read_len < copy_bytes) + copy_bytes = read_len; + offset = DATA_BLOCK_SIZE - block_remaining; +- tcmu_flush_dcache_range(from, copy_bytes); + memcpy(to + sg->length - sg_remaining, from + offset, + copy_bytes); + diff --git a/queue-4.19/series b/queue-4.19/series index 079a4e65165..515c06d3d52 100644 --- a/queue-4.19/series +++ b/queue-4.19/series @@ -1,3 +1,5 @@ hid-core-correctly-handle-reportsize-being-zero.patch hid-core-sanitize-event-code-and-type-when-mapping-input.patch perf-record-stat-explicitly-call-out-event-modifiers-in-the-documentation.patch +scsi-target-tcmu-fix-size-in-calls-to-tcmu_flush_dcache_range.patch +scsi-target-tcmu-optimize-use-of-flush_dcache_page.patch