--- /dev/null
+From stable-owner@vger.kernel.org Tue May 9 16:31:38 2023
+From: Vasily Gorbik <gor@linux.ibm.com>
+Date: Tue, 9 May 2023 16:31:17 +0200
+Subject: s390/mm: fix direct map accounting
+To: stable@vger.kernel.org
+Cc: Heiko Carstens <hca@linux.ibm.com>, Alexander Gordeev <agordeev@linux.ibm.com>, Sasha Levin <sashal@kernel.org>
+Message-ID: <patch-2.thread-961a23.git-961a2364d134.your-ad-here.call-01683642007-ext-1116@work.hours>
+Content-Disposition: inline
+
+From: Heiko Carstens <hca@linux.ibm.com>
+
+[ Upstream commit 81e8479649853ffafc714aca4a9c0262efd3160a ]
+
+Commit bb1520d581a3 ("s390/mm: start kernel with DAT enabled") did not
+implement direct map accounting in the early page table setup code. In
+result the reported values are bogus now:
+
+$cat /proc/meminfo
+...
+DirectMap4k: 5120 kB
+DirectMap1M: 18446744073709546496 kB
+DirectMap2G: 0 kB
+
+Fix this by adding the missing accounting. The result looks sane again:
+
+$cat /proc/meminfo
+...
+DirectMap4k: 6156 kB
+DirectMap1M: 2091008 kB
+DirectMap2G: 6291456 kB
+
+Fixes: bb1520d581a3 ("s390/mm: start kernel with DAT enabled")
+Reviewed-by: Alexander Gordeev <agordeev@linux.ibm.com>
+Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
+Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/s390/boot/vmem.c | 19 ++++++++++++++++---
+ arch/s390/include/asm/pgtable.h | 2 +-
+ arch/s390/mm/pageattr.c | 2 +-
+ 3 files changed, 18 insertions(+), 5 deletions(-)
+
+--- a/arch/s390/boot/vmem.c
++++ b/arch/s390/boot/vmem.c
+@@ -10,6 +10,10 @@
+ #include "decompressor.h"
+ #include "boot.h"
+
++#ifdef CONFIG_PROC_FS
++atomic_long_t __bootdata_preserved(direct_pages_count[PG_DIRECT_MAP_MAX]);
++#endif
++
+ #define init_mm (*(struct mm_struct *)vmlinux.init_mm_off)
+ #define swapper_pg_dir vmlinux.swapper_pg_dir_off
+ #define invalid_pg_dir vmlinux.invalid_pg_dir_off
+@@ -126,7 +130,7 @@ static bool can_large_pmd(pmd_t *pm_dir,
+ static void pgtable_pte_populate(pmd_t *pmd, unsigned long addr, unsigned long end,
+ enum populate_mode mode)
+ {
+- unsigned long next;
++ unsigned long pages = 0;
+ pte_t *pte, entry;
+
+ pte = pte_offset_kernel(pmd, addr);
+@@ -135,14 +139,17 @@ static void pgtable_pte_populate(pmd_t *
+ entry = __pte(_pa(addr, mode));
+ entry = set_pte_bit(entry, PAGE_KERNEL_EXEC);
+ set_pte(pte, entry);
++ pages++;
+ }
+ }
++ if (mode == POPULATE_DIRECT)
++ update_page_count(PG_DIRECT_MAP_4K, pages);
+ }
+
+ static void pgtable_pmd_populate(pud_t *pud, unsigned long addr, unsigned long end,
+ enum populate_mode mode)
+ {
+- unsigned long next;
++ unsigned long next, pages = 0;
+ pmd_t *pmd, entry;
+ pte_t *pte;
+
+@@ -154,6 +161,7 @@ static void pgtable_pmd_populate(pud_t *
+ entry = __pmd(_pa(addr, mode));
+ entry = set_pmd_bit(entry, SEGMENT_KERNEL_EXEC);
+ set_pmd(pmd, entry);
++ pages++;
+ continue;
+ }
+ pte = boot_pte_alloc();
+@@ -163,12 +171,14 @@ static void pgtable_pmd_populate(pud_t *
+ }
+ pgtable_pte_populate(pmd, addr, next, mode);
+ }
++ if (mode == POPULATE_DIRECT)
++ update_page_count(PG_DIRECT_MAP_1M, pages);
+ }
+
+ static void pgtable_pud_populate(p4d_t *p4d, unsigned long addr, unsigned long end,
+ enum populate_mode mode)
+ {
+- unsigned long next;
++ unsigned long next, pages = 0;
+ pud_t *pud, entry;
+ pmd_t *pmd;
+
+@@ -180,6 +190,7 @@ static void pgtable_pud_populate(p4d_t *
+ entry = __pud(_pa(addr, mode));
+ entry = set_pud_bit(entry, REGION3_KERNEL_EXEC);
+ set_pud(pud, entry);
++ pages++;
+ continue;
+ }
+ pmd = boot_crst_alloc(_SEGMENT_ENTRY_EMPTY);
+@@ -189,6 +200,8 @@ static void pgtable_pud_populate(p4d_t *
+ }
+ pgtable_pmd_populate(pud, addr, next, mode);
+ }
++ if (mode == POPULATE_DIRECT)
++ update_page_count(PG_DIRECT_MAP_2G, pages);
+ }
+
+ static void pgtable_p4d_populate(pgd_t *pgd, unsigned long addr, unsigned long end,
+--- a/arch/s390/include/asm/pgtable.h
++++ b/arch/s390/include/asm/pgtable.h
+@@ -34,7 +34,7 @@ enum {
+ PG_DIRECT_MAP_MAX
+ };
+
+-extern atomic_long_t direct_pages_count[PG_DIRECT_MAP_MAX];
++extern atomic_long_t __bootdata_preserved(direct_pages_count[PG_DIRECT_MAP_MAX]);
+
+ static inline void update_page_count(int level, long count)
+ {
+--- a/arch/s390/mm/pageattr.c
++++ b/arch/s390/mm/pageattr.c
+@@ -41,7 +41,7 @@ void __storage_key_init_range(unsigned l
+ }
+
+ #ifdef CONFIG_PROC_FS
+-atomic_long_t direct_pages_count[PG_DIRECT_MAP_MAX];
++atomic_long_t __bootdata_preserved(direct_pages_count[PG_DIRECT_MAP_MAX]);
+
+ void arch_report_meminfo(struct seq_file *m)
+ {
--- /dev/null
+From stable-owner@vger.kernel.org Tue May 9 16:34:51 2023
+From: Vasily Gorbik <gor@linux.ibm.com>
+Date: Tue, 9 May 2023 16:31:14 +0200
+Subject: s390/mm: rename POPULATE_ONE2ONE to POPULATE_DIRECT
+To: stable@vger.kernel.org
+Cc: Heiko Carstens <hca@linux.ibm.com>, Alexander Gordeev <agordeev@linux.ibm.com>, Sasha Levin <sashal@kernel.org>
+Message-ID: <patch-1.thread-961a23.git-0b29fdfb1223.your-ad-here.call-01683642007-ext-1116@work.hours>
+Content-Disposition: inline
+
+From: Heiko Carstens <hca@linux.ibm.com>
+
+[ Upstream commit 07fdd6627f7f9c72ed68d531653b56df81da9996 ]
+
+Architectures generally use the "direct map" wording for mapping the whole
+physical memory. Use that wording as well in arch/s390/boot/vmem.c, instead
+of "one to one" in order to avoid confusion.
+
+This also matches what is already done in arch/s390/mm/vmem.c.
+
+Reviewed-by: Alexander Gordeev <agordeev@linux.ibm.com>
+Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
+Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/s390/boot/vmem.c | 8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+--- a/arch/s390/boot/vmem.c
++++ b/arch/s390/boot/vmem.c
+@@ -29,7 +29,7 @@ unsigned long __bootdata(pgalloc_low);
+
+ enum populate_mode {
+ POPULATE_NONE,
+- POPULATE_ONE2ONE,
++ POPULATE_DIRECT,
+ POPULATE_ABS_LOWCORE,
+ };
+
+@@ -102,7 +102,7 @@ static unsigned long _pa(unsigned long a
+ switch (mode) {
+ case POPULATE_NONE:
+ return -1;
+- case POPULATE_ONE2ONE:
++ case POPULATE_DIRECT:
+ return addr;
+ case POPULATE_ABS_LOWCORE:
+ return __abs_lowcore_pa(addr);
+@@ -251,9 +251,9 @@ void setup_vmem(unsigned long asce_limit
+ * the lowcore and create the identity mapping only afterwards.
+ */
+ pgtable_populate_init();
+- pgtable_populate(0, sizeof(struct lowcore), POPULATE_ONE2ONE);
++ pgtable_populate(0, sizeof(struct lowcore), POPULATE_DIRECT);
+ for_each_mem_detect_usable_block(i, &start, &end)
+- pgtable_populate(start, end, POPULATE_ONE2ONE);
++ pgtable_populate(start, end, POPULATE_DIRECT);
+ pgtable_populate(__abs_lowcore, __abs_lowcore + sizeof(struct lowcore),
+ POPULATE_ABS_LOWCORE);
+ pgtable_populate(__memcpy_real_area, __memcpy_real_area + PAGE_SIZE,
ext4-remove-a-bug_on-in-ext4_mb_release_group_pa.patch
ext4-fix-invalid-free-tracking-in-ext4_xattr_move_to_block.patch
x86-amd_nb-add-pci-id-for-family-19h-model-78h.patch
+x86-fix-clear_user_rep_good-exception-handling-annotation.patch
+spi-fsl-spi-re-organise-transfer-bits_per_word-adaptation.patch
+spi-fsl-cpm-use-16-bit-mode-for-large-transfers-with-even-size.patch
+s390-mm-rename-populate_one2one-to-populate_direct.patch
+s390-mm-fix-direct-map-accounting.patch
--- /dev/null
+From fc96ec826bced75cc6b9c07a4ac44bbf651337ab Mon Sep 17 00:00:00 2001
+From: Christophe Leroy <christophe.leroy@csgroup.eu>
+Date: Sat, 1 Apr 2023 19:59:48 +0200
+Subject: spi: fsl-cpm: Use 16 bit mode for large transfers with even size
+
+From: Christophe Leroy <christophe.leroy@csgroup.eu>
+
+commit fc96ec826bced75cc6b9c07a4ac44bbf651337ab upstream.
+
+On CPM, the RISC core is a lot more efficiant when doing transfers
+in 16-bits chunks than in 8-bits chunks, but unfortunately the
+words need to be byte swapped as seen in a previous commit.
+
+So, for large tranfers with an even size, allocate a temporary tx
+buffer and byte-swap data before and after transfer.
+
+This change allows setting higher speed for transfer. For instance
+on an MPC 8xx (CPM1 comms RISC processor), the documentation tells
+that transfer in byte mode at 1 kbit/s uses 0.200% of CPM load
+at 25 MHz while a word transfer at the same speed uses 0.032%
+of CPM load. This means the speed can be 6 times higher in
+word mode for the same CPM load.
+
+For the time being, only do it on CPM1 as there must be a
+trade-off between the CPM load reduction and the CPU load required
+to byte swap the data.
+
+Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
+Link: https://lore.kernel.org/r/f2e981f20f92dd28983c3949702a09248c23845c.1680371809.git.christophe.leroy@csgroup.eu
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/spi/spi-fsl-cpm.c | 23 +++++++++++++++++++++++
+ drivers/spi/spi-fsl-spi.c | 3 +++
+ 2 files changed, 26 insertions(+)
+
+--- a/drivers/spi/spi-fsl-cpm.c
++++ b/drivers/spi/spi-fsl-cpm.c
+@@ -21,6 +21,7 @@
+ #include <linux/spi/spi.h>
+ #include <linux/types.h>
+ #include <linux/platform_device.h>
++#include <linux/byteorder/generic.h>
+
+ #include "spi-fsl-cpm.h"
+ #include "spi-fsl-lib.h"
+@@ -120,6 +121,21 @@ int fsl_spi_cpm_bufs(struct mpc8xxx_spi
+ mspi->rx_dma = mspi->dma_dummy_rx;
+ mspi->map_rx_dma = 0;
+ }
++ if (t->bits_per_word == 16 && t->tx_buf) {
++ const u16 *src = t->tx_buf;
++ u16 *dst;
++ int i;
++
++ dst = kmalloc(t->len, GFP_KERNEL);
++ if (!dst)
++ return -ENOMEM;
++
++ for (i = 0; i < t->len >> 1; i++)
++ dst[i] = cpu_to_le16p(src + i);
++
++ mspi->tx = dst;
++ mspi->map_tx_dma = 1;
++ }
+
+ if (mspi->map_tx_dma) {
+ void *nonconst_tx = (void *)mspi->tx; /* shut up gcc */
+@@ -173,6 +189,13 @@ void fsl_spi_cpm_bufs_complete(struct mp
+ if (mspi->map_rx_dma)
+ dma_unmap_single(dev, mspi->rx_dma, t->len, DMA_FROM_DEVICE);
+ mspi->xfer_in_progress = NULL;
++
++ if (t->bits_per_word == 16 && t->rx_buf) {
++ int i;
++
++ for (i = 0; i < t->len; i += 2)
++ le16_to_cpus(t->rx_buf + i);
++ }
+ }
+ EXPORT_SYMBOL_GPL(fsl_spi_cpm_bufs_complete);
+
+--- a/drivers/spi/spi-fsl-spi.c
++++ b/drivers/spi/spi-fsl-spi.c
+@@ -351,6 +351,9 @@ static int fsl_spi_prepare_message(struc
+ return -EINVAL;
+ if (t->bits_per_word == 16 || t->bits_per_word == 32)
+ t->bits_per_word = 8; /* pretend its 8 bits */
++ if (t->bits_per_word == 8 && t->len >= 256 &&
++ (mpc8xxx_spi->flags & SPI_CPM1))
++ t->bits_per_word = 16;
+ }
+ }
+ return fsl_spi_setup_transfer(m->spi, first);
--- /dev/null
+From 8a5299a1278eadf1e08a598a5345c376206f171e Mon Sep 17 00:00:00 2001
+From: Christophe Leroy <christophe.leroy@csgroup.eu>
+Date: Sat, 1 Apr 2023 19:59:47 +0200
+Subject: spi: fsl-spi: Re-organise transfer bits_per_word adaptation
+
+From: Christophe Leroy <christophe.leroy@csgroup.eu>
+
+commit 8a5299a1278eadf1e08a598a5345c376206f171e upstream.
+
+For different reasons, fsl-spi driver performs bits_per_word
+modifications for different reasons:
+- On CPU mode, to minimise amount of interrupts
+- On CPM/QE mode to work around controller byte order
+
+For CPU mode that's done in fsl_spi_prepare_message() while
+for CPM mode that's done in fsl_spi_setup_transfer().
+
+Reunify all of it in fsl_spi_prepare_message(), and catch
+impossible cases early through master's bits_per_word_mask
+instead of returning EINVAL later.
+
+Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
+Link: https://lore.kernel.org/r/0ce96fe96e8b07cba0613e4097cfd94d09b8919a.1680371809.git.christophe.leroy@csgroup.eu
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/spi/spi-fsl-spi.c | 46 +++++++++++++++++++++-------------------------
+ 1 file changed, 21 insertions(+), 25 deletions(-)
+
+--- a/drivers/spi/spi-fsl-spi.c
++++ b/drivers/spi/spi-fsl-spi.c
+@@ -177,26 +177,6 @@ static int mspi_apply_cpu_mode_quirks(st
+ return bits_per_word;
+ }
+
+-static int mspi_apply_qe_mode_quirks(struct spi_mpc8xxx_cs *cs,
+- struct spi_device *spi,
+- int bits_per_word)
+-{
+- /* CPM/QE uses Little Endian for words > 8
+- * so transform 16 and 32 bits words into 8 bits
+- * Unfortnatly that doesn't work for LSB so
+- * reject these for now */
+- /* Note: 32 bits word, LSB works iff
+- * tfcr/rfcr is set to CPMFCR_GBL */
+- if (spi->mode & SPI_LSB_FIRST &&
+- bits_per_word > 8)
+- return -EINVAL;
+- if (bits_per_word <= 8)
+- return bits_per_word;
+- if (bits_per_word == 16 || bits_per_word == 32)
+- return 8; /* pretend its 8 bits */
+- return -EINVAL;
+-}
+-
+ static int fsl_spi_setup_transfer(struct spi_device *spi,
+ struct spi_transfer *t)
+ {
+@@ -224,9 +204,6 @@ static int fsl_spi_setup_transfer(struct
+ bits_per_word = mspi_apply_cpu_mode_quirks(cs, spi,
+ mpc8xxx_spi,
+ bits_per_word);
+- else
+- bits_per_word = mspi_apply_qe_mode_quirks(cs, spi,
+- bits_per_word);
+
+ if (bits_per_word < 0)
+ return bits_per_word;
+@@ -361,6 +338,19 @@ static int fsl_spi_prepare_message(struc
+ t->bits_per_word = 32;
+ else if ((t->len & 1) == 0)
+ t->bits_per_word = 16;
++ } else {
++ /*
++ * CPM/QE uses Little Endian for words > 8
++ * so transform 16 and 32 bits words into 8 bits
++ * Unfortnatly that doesn't work for LSB so
++ * reject these for now
++ * Note: 32 bits word, LSB works iff
++ * tfcr/rfcr is set to CPMFCR_GBL
++ */
++ if (m->spi->mode & SPI_LSB_FIRST && t->bits_per_word > 8)
++ return -EINVAL;
++ if (t->bits_per_word == 16 || t->bits_per_word == 32)
++ t->bits_per_word = 8; /* pretend its 8 bits */
+ }
+ }
+ return fsl_spi_setup_transfer(m->spi, first);
+@@ -594,8 +584,14 @@ static struct spi_master *fsl_spi_probe(
+ if (mpc8xxx_spi->type == TYPE_GRLIB)
+ fsl_spi_grlib_probe(dev);
+
+- master->bits_per_word_mask =
+- (SPI_BPW_RANGE_MASK(4, 16) | SPI_BPW_MASK(32)) &
++ if (mpc8xxx_spi->flags & SPI_CPM_MODE)
++ master->bits_per_word_mask =
++ (SPI_BPW_RANGE_MASK(4, 8) | SPI_BPW_MASK(16) | SPI_BPW_MASK(32));
++ else
++ master->bits_per_word_mask =
++ (SPI_BPW_RANGE_MASK(4, 16) | SPI_BPW_MASK(32));
++
++ master->bits_per_word_mask &=
+ SPI_BPW_RANGE_MASK(1, mpc8xxx_spi->max_bits_per_word);
+
+ if (mpc8xxx_spi->flags & SPI_QE_CPU_MODE)
--- /dev/null
+From 6904bfe4df72f9cb8e5fb465e1075b39402c5c6c Mon Sep 17 00:00:00 2001
+From: Linus Torvalds <torvalds@linux-foundation.org>
+Date: Sun, 14 May 2023 15:46:19 -0700
+Subject: x86: fix clear_user_rep_good() exception handling annotation
+
+From: Linus Torvalds <torvalds@linux-foundation.org>
+
+This code no longer exists in mainline, because it was removed in
+commit d2c95f9d6802 ("x86: don't use REP_GOOD or ERMS for user memory
+clearing") upstream.
+
+However, rather than backport the full range of x86 memory clearing and
+copying cleanups, fix the exception table annotation placement for the
+final 'rep movsb' in clear_user_rep_good(): rather than pointing at the
+actual instruction that did the user space access, it pointed to the
+register move just before it.
+
+That made sense from a code flow standpoint, but not from an actual
+usage standpoint: it means that if user access takes an exception, the
+exception handler won't actually find the instruction in the exception
+tables.
+
+As a result, rather than fixing it up and returning -EFAULT, it would
+then turn it into a kernel oops report instead, something like:
+
+ BUG: unable to handle page fault for address: 0000000020081000
+ #PF: supervisor write access in kernel mode
+ #PF: error_code(0x0002) - not-present page
+ ...
+ RIP: 0010:clear_user_rep_good+0x1c/0x30 arch/x86/lib/clear_page_64.S:147
+ ...
+ Call Trace:
+ __clear_user arch/x86/include/asm/uaccess_64.h:103 [inline]
+ clear_user arch/x86/include/asm/uaccess_64.h:124 [inline]
+ iov_iter_zero+0x709/0x1290 lib/iov_iter.c:800
+ iomap_dio_hole_iter fs/iomap/direct-io.c:389 [inline]
+ iomap_dio_iter fs/iomap/direct-io.c:440 [inline]
+ __iomap_dio_rw+0xe3d/0x1cd0 fs/iomap/direct-io.c:601
+ iomap_dio_rw+0x40/0xa0 fs/iomap/direct-io.c:689
+ ext4_dio_read_iter fs/ext4/file.c:94 [inline]
+ ext4_file_read_iter+0x4be/0x690 fs/ext4/file.c:145
+ call_read_iter include/linux/fs.h:2183 [inline]
+ do_iter_readv_writev+0x2e0/0x3b0 fs/read_write.c:733
+ do_iter_read+0x2f2/0x750 fs/read_write.c:796
+ vfs_readv+0xe5/0x150 fs/read_write.c:916
+ do_preadv+0x1b6/0x270 fs/read_write.c:1008
+ __do_sys_preadv2 fs/read_write.c:1070 [inline]
+ __se_sys_preadv2 fs/read_write.c:1061 [inline]
+ __x64_sys_preadv2+0xef/0x150 fs/read_write.c:1061
+ do_syscall_x64 arch/x86/entry/common.c:50 [inline]
+ do_syscall_64+0x39/0xb0 arch/x86/entry/common.c:80
+ entry_SYSCALL_64_after_hwframe+0x63/0xcd
+
+which then looks like a filesystem bug rather than the incorrect
+exception annotation that it is.
+
+[ The alternative to this one-liner fix is to take the upstream series
+ that cleans this all up:
+
+ 68674f94ffc9 ("x86: don't use REP_GOOD or ERMS for small memory copies")
+ 20f3337d350c ("x86: don't use REP_GOOD or ERMS for small memory clearing")
+ adfcf4231b8c ("x86: don't use REP_GOOD or ERMS for user memory copies")
+ * d2c95f9d6802 ("x86: don't use REP_GOOD or ERMS for user memory clearing")
+ 3639a535587d ("x86: move stac/clac from user copy routines into callers")
+ 577e6a7fd50d ("x86: inline the 'rep movs' in user copies for the FSRM case")
+ 8c9b6a88b7e2 ("x86: improve on the non-rep 'clear_user' function")
+ 427fda2c8a49 ("x86: improve on the non-rep 'copy_user' function")
+ * e046fe5a36a9 ("x86: set FSRS automatically on AMD CPUs that have FSRM")
+ e1f2750edc4a ("x86: remove 'zerorest' argument from __copy_user_nocache()")
+ 034ff37d3407 ("x86: rewrite '__copy_user_nocache' function")
+
+ with either the whole series or at a minimum the two marked commits
+ being needed to fix this issue ]
+
+Reported-by: syzbot <syzbot+401145a9a237779feb26@syzkaller.appspotmail.com>
+Link: https://syzkaller.appspot.com/bug?extid=401145a9a237779feb26
+Fixes: 0db7058e8e23 ("x86/clear_user: Make it faster")
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: stable@kernel.org
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/lib/clear_page_64.S | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/x86/lib/clear_page_64.S
++++ b/arch/x86/lib/clear_page_64.S
+@@ -142,8 +142,8 @@ SYM_FUNC_START(clear_user_rep_good)
+ and $7, %edx
+ jz .Lrep_good_exit
+
+-.Lrep_good_bytes:
+ mov %edx, %ecx
++.Lrep_good_bytes:
+ rep stosb
+
+ .Lrep_good_exit: