]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
5.0-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 20 May 2019 11:28:37 +0000 (13:28 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 20 May 2019 11:28:37 +0000 (13:28 +0200)
added patches:
ext4-don-t-update-s_rev_level-if-not-required.patch
ext4-fix-block-validity-checks-for-journal-inodes-using-indirect-blocks.patch
ext4-fix-compile-error-when-using-buffer_trace.patch
ext4-unsigned-int-compared-against-zero.patch
iov_iter-optimize-page_copy_sane.patch
kbuild-turn-auto.conf.cmd-into-a-mandatory-include-file.patch
libnvdimm-namespace-fix-label-tracking-error.patch
s390-mm-convert-to-the-generic-get_user_pages_fast-code.patch
s390-mm-make-the-pxd_offset-functions-more-robust.patch
xen-pvh-correctly-setup-the-pv-efi-interface-for-dom0.patch
xen-pvh-set-xen_domain_type-to-hvm-in-xen_pvh_init.patch

12 files changed:
queue-5.0/ext4-don-t-update-s_rev_level-if-not-required.patch [new file with mode: 0644]
queue-5.0/ext4-fix-block-validity-checks-for-journal-inodes-using-indirect-blocks.patch [new file with mode: 0644]
queue-5.0/ext4-fix-compile-error-when-using-buffer_trace.patch [new file with mode: 0644]
queue-5.0/ext4-unsigned-int-compared-against-zero.patch [new file with mode: 0644]
queue-5.0/iov_iter-optimize-page_copy_sane.patch [new file with mode: 0644]
queue-5.0/kbuild-turn-auto.conf.cmd-into-a-mandatory-include-file.patch [new file with mode: 0644]
queue-5.0/libnvdimm-namespace-fix-label-tracking-error.patch [new file with mode: 0644]
queue-5.0/s390-mm-convert-to-the-generic-get_user_pages_fast-code.patch [new file with mode: 0644]
queue-5.0/s390-mm-make-the-pxd_offset-functions-more-robust.patch [new file with mode: 0644]
queue-5.0/series
queue-5.0/xen-pvh-correctly-setup-the-pv-efi-interface-for-dom0.patch [new file with mode: 0644]
queue-5.0/xen-pvh-set-xen_domain_type-to-hvm-in-xen_pvh_init.patch [new file with mode: 0644]

diff --git a/queue-5.0/ext4-don-t-update-s_rev_level-if-not-required.patch b/queue-5.0/ext4-don-t-update-s_rev_level-if-not-required.patch
new file mode 100644 (file)
index 0000000..bc5c750
--- /dev/null
@@ -0,0 +1,92 @@
+From c9e716eb9b3455a83ed7c5f5a81256a3da779a95 Mon Sep 17 00:00:00 2001
+From: Andreas Dilger <adilger@dilger.ca>
+Date: Thu, 14 Feb 2019 17:52:18 -0500
+Subject: ext4: don't update s_rev_level if not required
+
+From: Andreas Dilger <adilger@dilger.ca>
+
+commit c9e716eb9b3455a83ed7c5f5a81256a3da779a95 upstream.
+
+Don't update the superblock s_rev_level during mount if it isn't
+actually necessary, only if superblock features are being set by
+the kernel.  This was originally added for ext3 since it always
+set the INCOMPAT_RECOVER and HAS_JOURNAL features during mount,
+but this is not needed since no journal mode was added to ext4.
+
+That will allow Geert to mount his 20-year-old ext2 rev 0.0 m68k
+filesystem, as a testament of the backward compatibility of ext4.
+
+Fixes: 0390131ba84f ("ext4: Allow ext4 to run without a journal")
+Signed-off-by: Andreas Dilger <adilger@dilger.ca>
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ext4/ext4.h  |    6 +++++-
+ fs/ext4/inode.c |    1 -
+ fs/ext4/super.c |    1 -
+ 3 files changed, 5 insertions(+), 3 deletions(-)
+
+--- a/fs/ext4/ext4.h
++++ b/fs/ext4/ext4.h
+@@ -1665,6 +1665,8 @@ static inline void ext4_clear_state_flag
+ #define EXT4_FEATURE_INCOMPAT_INLINE_DATA     0x8000 /* data in inode */
+ #define EXT4_FEATURE_INCOMPAT_ENCRYPT         0x10000
++extern void ext4_update_dynamic_rev(struct super_block *sb);
++
+ #define EXT4_FEATURE_COMPAT_FUNCS(name, flagname) \
+ static inline bool ext4_has_feature_##name(struct super_block *sb) \
+ { \
+@@ -1673,6 +1675,7 @@ static inline bool ext4_has_feature_##na
+ } \
+ static inline void ext4_set_feature_##name(struct super_block *sb) \
+ { \
++      ext4_update_dynamic_rev(sb); \
+       EXT4_SB(sb)->s_es->s_feature_compat |= \
+               cpu_to_le32(EXT4_FEATURE_COMPAT_##flagname); \
+ } \
+@@ -1690,6 +1693,7 @@ static inline bool ext4_has_feature_##na
+ } \
+ static inline void ext4_set_feature_##name(struct super_block *sb) \
+ { \
++      ext4_update_dynamic_rev(sb); \
+       EXT4_SB(sb)->s_es->s_feature_ro_compat |= \
+               cpu_to_le32(EXT4_FEATURE_RO_COMPAT_##flagname); \
+ } \
+@@ -1707,6 +1711,7 @@ static inline bool ext4_has_feature_##na
+ } \
+ static inline void ext4_set_feature_##name(struct super_block *sb) \
+ { \
++      ext4_update_dynamic_rev(sb); \
+       EXT4_SB(sb)->s_es->s_feature_incompat |= \
+               cpu_to_le32(EXT4_FEATURE_INCOMPAT_##flagname); \
+ } \
+@@ -2675,7 +2680,6 @@ do {                                                                     \
+ #endif
+-extern void ext4_update_dynamic_rev(struct super_block *sb);
+ extern int ext4_update_compat_feature(handle_t *handle, struct super_block *sb,
+                                       __u32 compat);
+ extern int ext4_update_rocompat_feature(handle_t *handle,
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -5355,7 +5355,6 @@ static int ext4_do_update_inode(handle_t
+               err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh);
+               if (err)
+                       goto out_brelse;
+-              ext4_update_dynamic_rev(sb);
+               ext4_set_feature_large_file(sb);
+               ext4_handle_sync(handle);
+               err = ext4_handle_dirty_super(handle, sb);
+--- a/fs/ext4/super.c
++++ b/fs/ext4/super.c
+@@ -2259,7 +2259,6 @@ static int ext4_setup_super(struct super
+               es->s_max_mnt_count = cpu_to_le16(EXT4_DFL_MAX_MNT_COUNT);
+       le16_add_cpu(&es->s_mnt_count, 1);
+       ext4_update_tstamp(es, s_mtime);
+-      ext4_update_dynamic_rev(sb);
+       if (sbi->s_journal)
+               ext4_set_feature_journal_needs_recovery(sb);
diff --git a/queue-5.0/ext4-fix-block-validity-checks-for-journal-inodes-using-indirect-blocks.patch b/queue-5.0/ext4-fix-block-validity-checks-for-journal-inodes-using-indirect-blocks.patch
new file mode 100644 (file)
index 0000000..dd17d55
--- /dev/null
@@ -0,0 +1,42 @@
+From 170417c8c7bb2cbbdd949bf5c443c0c8f24a203b Mon Sep 17 00:00:00 2001
+From: Theodore Ts'o <tytso@mit.edu>
+Date: Wed, 15 May 2019 00:51:19 -0400
+Subject: ext4: fix block validity checks for journal inodes using indirect blocks
+
+From: Theodore Ts'o <tytso@mit.edu>
+
+commit 170417c8c7bb2cbbdd949bf5c443c0c8f24a203b upstream.
+
+Commit 345c0dbf3a30 ("ext4: protect journal inode's blocks using
+block_validity") failed to add an exception for the journal inode in
+ext4_check_blockref(), which is the function used by ext4_get_branch()
+for indirect blocks.  This caused attempts to read from the ext3-style
+journals to fail with:
+
+[  848.968550] EXT4-fs error (device sdb7): ext4_get_branch:171: inode #8: block 30343695: comm jbd2/sdb7-8: invalid block
+
+Fix this by adding the missing exception check.
+
+Fixes: 345c0dbf3a30 ("ext4: protect journal inode's blocks using block_validity")
+Reported-by: Arthur Marsh <arthur.marsh@internode.on.net>
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ext4/block_validity.c |    5 +++++
+ 1 file changed, 5 insertions(+)
+
+--- a/fs/ext4/block_validity.c
++++ b/fs/ext4/block_validity.c
+@@ -276,6 +276,11 @@ int ext4_check_blockref(const char *func
+       __le32 *bref = p;
+       unsigned int blk;
++      if (ext4_has_feature_journal(inode->i_sb) &&
++          (inode->i_ino ==
++           le32_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_journal_inum)))
++              return 0;
++
+       while (bref < p+max) {
+               blk = le32_to_cpu(*bref++);
+               if (blk &&
diff --git a/queue-5.0/ext4-fix-compile-error-when-using-buffer_trace.patch b/queue-5.0/ext4-fix-compile-error-when-using-buffer_trace.patch
new file mode 100644 (file)
index 0000000..8ea593d
--- /dev/null
@@ -0,0 +1,39 @@
+From ddccb6dbe780d68133191477571cb7c69e17bb8c Mon Sep 17 00:00:00 2001
+From: "zhangyi (F)" <yi.zhang@huawei.com>
+Date: Thu, 21 Feb 2019 11:29:10 -0500
+Subject: ext4: fix compile error when using BUFFER_TRACE
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: zhangyi (F) <yi.zhang@huawei.com>
+
+commit ddccb6dbe780d68133191477571cb7c69e17bb8c upstream.
+
+Fix compile error below when using BUFFER_TRACE.
+
+fs/ext4/inode.c: In function ‘ext4_expand_extra_isize’:
+fs/ext4/inode.c:5979:19: error: request for member ‘bh’ in something not a structure or union
+  BUFFER_TRACE(iloc.bh, "get_write_access");
+
+Fixes: c03b45b853f58 ("ext4, project: expand inode extra size if possible")
+Signed-off-by: zhangyi (F) <yi.zhang@huawei.com>
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ext4/inode.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -6006,7 +6006,7 @@ int ext4_expand_extra_isize(struct inode
+       ext4_write_lock_xattr(inode, &no_expand);
+-      BUFFER_TRACE(iloc.bh, "get_write_access");
++      BUFFER_TRACE(iloc->bh, "get_write_access");
+       error = ext4_journal_get_write_access(handle, iloc->bh);
+       if (error) {
+               brelse(iloc->bh);
diff --git a/queue-5.0/ext4-unsigned-int-compared-against-zero.patch b/queue-5.0/ext4-unsigned-int-compared-against-zero.patch
new file mode 100644 (file)
index 0000000..85dc8e9
--- /dev/null
@@ -0,0 +1,35 @@
+From fbbbbd2f28aec991f3fbc248df211550fbdfd58c Mon Sep 17 00:00:00 2001
+From: Colin Ian King <colin.king@canonical.com>
+Date: Fri, 10 May 2019 22:06:38 -0400
+Subject: ext4: unsigned int compared against zero
+
+From: Colin Ian King <colin.king@canonical.com>
+
+commit fbbbbd2f28aec991f3fbc248df211550fbdfd58c upstream.
+
+There are two cases where u32 variables n and err are being checked
+for less than zero error values, the checks is always false because
+the variables are not signed. Fix this by making the variables ints.
+
+Addresses-Coverity: ("Unsigned compared against 0")
+Fixes: 345c0dbf3a30 ("ext4: protect journal inode's blocks using block_validity")
+Signed-off-by: Colin Ian King <colin.king@canonical.com>
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ext4/block_validity.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/fs/ext4/block_validity.c
++++ b/fs/ext4/block_validity.c
+@@ -142,7 +142,8 @@ static int ext4_protect_reserved_inode(s
+       struct inode *inode;
+       struct ext4_sb_info *sbi = EXT4_SB(sb);
+       struct ext4_map_blocks map;
+-      u32 i = 0, err = 0, num, n;
++      u32 i = 0, num;
++      int err = 0, n;
+       if ((ino < EXT4_ROOT_INO) ||
+           (ino > le32_to_cpu(sbi->s_es->s_inodes_count)))
diff --git a/queue-5.0/iov_iter-optimize-page_copy_sane.patch b/queue-5.0/iov_iter-optimize-page_copy_sane.patch
new file mode 100644 (file)
index 0000000..db29bb0
--- /dev/null
@@ -0,0 +1,56 @@
+From 6daef95b8c914866a46247232a048447fff97279 Mon Sep 17 00:00:00 2001
+From: Eric Dumazet <edumazet@google.com>
+Date: Tue, 26 Feb 2019 10:42:39 -0800
+Subject: iov_iter: optimize page_copy_sane()
+
+From: Eric Dumazet <edumazet@google.com>
+
+commit 6daef95b8c914866a46247232a048447fff97279 upstream.
+
+Avoid cache line miss dereferencing struct page if we can.
+
+page_copy_sane() mostly deals with order-0 pages.
+
+Extra cache line miss is visible on TCP recvmsg() calls dealing
+with GRO packets (typically 45 page frags are attached to one skb).
+
+Bringing the 45 struct pages into cpu cache while copying the data
+is not free, since the freeing of the skb (and associated
+page frags put_page()) can happen after cache lines have been evicted.
+
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: Al Viro <viro@zeniv.linux.org.uk>
+Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
+Cc: Matthew Wilcox <willy@infradead.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ lib/iov_iter.c |   17 +++++++++++++++--
+ 1 file changed, 15 insertions(+), 2 deletions(-)
+
+--- a/lib/iov_iter.c
++++ b/lib/iov_iter.c
+@@ -861,8 +861,21 @@ EXPORT_SYMBOL(_copy_from_iter_full_nocac
+ static inline bool page_copy_sane(struct page *page, size_t offset, size_t n)
+ {
+-      struct page *head = compound_head(page);
+-      size_t v = n + offset + page_address(page) - page_address(head);
++      struct page *head;
++      size_t v = n + offset;
++
++      /*
++       * The general case needs to access the page order in order
++       * to compute the page size.
++       * However, we mostly deal with order-0 pages and thus can
++       * avoid a possible cache line miss for requests that fit all
++       * page orders.
++       */
++      if (n <= v && v <= PAGE_SIZE)
++              return true;
++
++      head = compound_head(page);
++      v += (page - head) << PAGE_SHIFT;
+       if (likely(n <= v && v <= (PAGE_SIZE << compound_order(head))))
+               return true;
diff --git a/queue-5.0/kbuild-turn-auto.conf.cmd-into-a-mandatory-include-file.patch b/queue-5.0/kbuild-turn-auto.conf.cmd-into-a-mandatory-include-file.patch
new file mode 100644 (file)
index 0000000..3a6ae6f
--- /dev/null
@@ -0,0 +1,92 @@
+From d2f8ae0e4c5c754f1b2a7b8388d19a1a977e698a Mon Sep 17 00:00:00 2001
+From: Masahiro Yamada <yamada.masahiro@socionext.com>
+Date: Sun, 12 May 2019 11:13:48 +0900
+Subject: kbuild: turn auto.conf.cmd into a mandatory include file
+
+From: Masahiro Yamada <yamada.masahiro@socionext.com>
+
+commit d2f8ae0e4c5c754f1b2a7b8388d19a1a977e698a upstream.
+
+syncconfig is responsible for keeping auto.conf up-to-date, so if it
+fails for any reason, the build must be terminated immediately.
+
+However, since commit 9390dff66a52 ("kbuild: invoke syncconfig if
+include/config/auto.conf.cmd is missing"), Kbuild continues running
+even after syncconfig fails.
+
+You can confirm this by intentionally making syncconfig error out:
+
+#  diff --git a/scripts/kconfig/confdata.c b/scripts/kconfig/confdata.c
+#  index 08ba146..307b9de 100644
+#  --- a/scripts/kconfig/confdata.c
+#  +++ b/scripts/kconfig/confdata.c
+#  @@ -1023,6 +1023,9 @@ int conf_write_autoconf(int overwrite)
+#          FILE *out, *tristate, *out_h;
+#          int i;
+#
+#  +       if (overwrite)
+#  +               return 1;
+#  +
+#          if (!overwrite && is_present(autoconf_name))
+#                  return 0;
+
+Then, syncconfig fails, but Make would not stop:
+
+  $ make -s mrproper allyesconfig defconfig
+  $ make
+  scripts/kconfig/conf  --syncconfig Kconfig
+
+  *** Error during sync of the configuration.
+
+  make[2]: *** [scripts/kconfig/Makefile;69: syncconfig] Error 1
+  make[1]: *** [Makefile;557: syncconfig] Error 2
+  make: *** [include/config/auto.conf.cmd] Deleting file 'include/config/tristate.conf'
+  make: Failed to remake makefile 'include/config/auto.conf'.
+    SYSTBL  arch/x86/include/generated/asm/syscalls_32.h
+    SYSHDR  arch/x86/include/generated/asm/unistd_32_ia32.h
+    SYSHDR  arch/x86/include/generated/asm/unistd_64_x32.h
+    SYSTBL  arch/x86/include/generated/asm/syscalls_64.h
+  [ continue running ... ]
+
+The reason is in the behavior of a pattern rule with multi-targets.
+
+  %/auto.conf %/auto.conf.cmd %/tristate.conf: $(KCONFIG_CONFIG)
+          $(Q)$(MAKE) -f $(srctree)/Makefile syncconfig
+
+GNU Make knows this rule is responsible for making all the three files
+simultaneously. As far as examined, auto.conf.cmd is the target in
+question when this rule is invoked. It is probably because auto.conf.cmd
+is included below the inclusion of auto.conf.
+
+The inclusion of auto.conf is mandatory, while that of auto.conf.cmd
+is optional. GNU Make does not care about the failure in the process
+of updating optional include files.
+
+I filed this issue (https://savannah.gnu.org/bugs/?56301) in case this
+behavior could be improved somehow in future releases of GNU Make.
+Anyway, it is quite easy to fix our Makefile.
+
+Given that auto.conf is already a mandatory include file, there is no
+reason to stick auto.conf.cmd optional. Make it mandatory as well.
+
+Cc: linux-stable <stable@vger.kernel.org> # 5.0+
+Fixes: 9390dff66a52 ("kbuild: invoke syncconfig if include/config/auto.conf.cmd is missing")
+Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
+[commented out diff above to keep patch happy - gregkh]
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ Makefile |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/Makefile
++++ b/Makefile
+@@ -642,7 +642,7 @@ ifeq ($(may-sync-config),1)
+ # Read in dependencies to all Kconfig* files, make sure to run syncconfig if
+ # changes are detected. This should be included after arch/$(SRCARCH)/Makefile
+ # because some architectures define CROSS_COMPILE there.
+--include include/config/auto.conf.cmd
++include include/config/auto.conf.cmd
+ # To avoid any implicit rule to kick in, define an empty command
+ $(KCONFIG_CONFIG): ;
diff --git a/queue-5.0/libnvdimm-namespace-fix-label-tracking-error.patch b/queue-5.0/libnvdimm-namespace-fix-label-tracking-error.patch
new file mode 100644 (file)
index 0000000..c7a3b51
--- /dev/null
@@ -0,0 +1,158 @@
+From c4703ce11c23423d4b46e3d59aef7979814fd608 Mon Sep 17 00:00:00 2001
+From: Dan Williams <dan.j.williams@intel.com>
+Date: Tue, 30 Apr 2019 21:51:21 -0700
+Subject: libnvdimm/namespace: Fix label tracking error
+
+From: Dan Williams <dan.j.williams@intel.com>
+
+commit c4703ce11c23423d4b46e3d59aef7979814fd608 upstream.
+
+Users have reported intermittent occurrences of DIMM initialization
+failures due to duplicate allocations of address capacity detected in
+the labels, or errors of the form below, both have the same root cause.
+
+    nd namespace1.4: failed to track label: 0
+    WARNING: CPU: 17 PID: 1381 at drivers/nvdimm/label.c:863
+
+    RIP: 0010:__pmem_label_update+0x56c/0x590 [libnvdimm]
+    Call Trace:
+     ? nd_pmem_namespace_label_update+0xd6/0x160 [libnvdimm]
+     nd_pmem_namespace_label_update+0xd6/0x160 [libnvdimm]
+     uuid_store+0x17e/0x190 [libnvdimm]
+     kernfs_fop_write+0xf0/0x1a0
+     vfs_write+0xb7/0x1b0
+     ksys_write+0x57/0xd0
+     do_syscall_64+0x60/0x210
+
+Unfortunately those reports were typically with a busy parallel
+namespace creation / destruction loop making it difficult to see the
+components of the bug. However, Jane provided a simple reproducer using
+the work-in-progress sub-section implementation.
+
+When ndctl is reconfiguring a namespace it may take an existing defunct
+/ disabled namespace and reconfigure it with a new uuid and other
+parameters. Critically namespace_update_uuid() takes existing address
+resources and renames them for the new namespace to use / reconfigure as
+it sees fit. The bug is that this rename only happens in the resource
+tracking tree. Existing labels with the old uuid are not reaped leading
+to a scenario where multiple active labels reference the same span of
+address range.
+
+Teach namespace_update_uuid() to flag any references to the old uuid for
+reaping at the next label update attempt.
+
+Cc: <stable@vger.kernel.org>
+Fixes: bf9bccc14c05 ("libnvdimm: pmem label sets and namespace instantiation")
+Link: https://github.com/pmem/ndctl/issues/91
+Reported-by: Jane Chu <jane.chu@oracle.com>
+Reported-by: Jeff Moyer <jmoyer@redhat.com>
+Reported-by: Erwin Tsaur <erwin.tsaur@oracle.com>
+Cc: Johannes Thumshirn <jthumshirn@suse.de>
+Signed-off-by: Dan Williams <dan.j.williams@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/nvdimm/label.c          |   29 ++++++++++++++++-------------
+ drivers/nvdimm/namespace_devs.c |   15 +++++++++++++++
+ drivers/nvdimm/nd.h             |    4 ++++
+ 3 files changed, 35 insertions(+), 13 deletions(-)
+
+--- a/drivers/nvdimm/label.c
++++ b/drivers/nvdimm/label.c
+@@ -753,6 +753,17 @@ static const guid_t *to_abstraction_guid
+               return &guid_null;
+ }
++static void reap_victim(struct nd_mapping *nd_mapping,
++              struct nd_label_ent *victim)
++{
++      struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
++      u32 slot = to_slot(ndd, victim->label);
++
++      dev_dbg(ndd->dev, "free: %d\n", slot);
++      nd_label_free_slot(ndd, slot);
++      victim->label = NULL;
++}
++
+ static int __pmem_label_update(struct nd_region *nd_region,
+               struct nd_mapping *nd_mapping, struct nd_namespace_pmem *nspm,
+               int pos, unsigned long flags)
+@@ -760,9 +771,9 @@ static int __pmem_label_update(struct nd
+       struct nd_namespace_common *ndns = &nspm->nsio.common;
+       struct nd_interleave_set *nd_set = nd_region->nd_set;
+       struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
+-      struct nd_label_ent *label_ent, *victim = NULL;
+       struct nd_namespace_label *nd_label;
+       struct nd_namespace_index *nsindex;
++      struct nd_label_ent *label_ent;
+       struct nd_label_id label_id;
+       struct resource *res;
+       unsigned long *free;
+@@ -831,18 +842,10 @@ static int __pmem_label_update(struct nd
+       list_for_each_entry(label_ent, &nd_mapping->labels, list) {
+               if (!label_ent->label)
+                       continue;
+-              if (memcmp(nspm->uuid, label_ent->label->uuid,
+-                                      NSLABEL_UUID_LEN) != 0)
+-                      continue;
+-              victim = label_ent;
+-              list_move_tail(&victim->list, &nd_mapping->labels);
+-              break;
+-      }
+-      if (victim) {
+-              dev_dbg(ndd->dev, "free: %d\n", slot);
+-              slot = to_slot(ndd, victim->label);
+-              nd_label_free_slot(ndd, slot);
+-              victim->label = NULL;
++              if (test_and_clear_bit(ND_LABEL_REAP, &label_ent->flags)
++                              || memcmp(nspm->uuid, label_ent->label->uuid,
++                                      NSLABEL_UUID_LEN) == 0)
++                      reap_victim(nd_mapping, label_ent);
+       }
+       /* update index */
+--- a/drivers/nvdimm/namespace_devs.c
++++ b/drivers/nvdimm/namespace_devs.c
+@@ -1247,12 +1247,27 @@ static int namespace_update_uuid(struct
+       for (i = 0; i < nd_region->ndr_mappings; i++) {
+               struct nd_mapping *nd_mapping = &nd_region->mapping[i];
+               struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
++              struct nd_label_ent *label_ent;
+               struct resource *res;
+               for_each_dpa_resource(ndd, res)
+                       if (strcmp(res->name, old_label_id.id) == 0)
+                               sprintf((void *) res->name, "%s",
+                                               new_label_id.id);
++
++              mutex_lock(&nd_mapping->lock);
++              list_for_each_entry(label_ent, &nd_mapping->labels, list) {
++                      struct nd_namespace_label *nd_label = label_ent->label;
++                      struct nd_label_id label_id;
++
++                      if (!nd_label)
++                              continue;
++                      nd_label_gen_id(&label_id, nd_label->uuid,
++                                      __le32_to_cpu(nd_label->flags));
++                      if (strcmp(old_label_id.id, label_id.id) == 0)
++                              set_bit(ND_LABEL_REAP, &label_ent->flags);
++              }
++              mutex_unlock(&nd_mapping->lock);
+       }
+       kfree(*old_uuid);
+  out:
+--- a/drivers/nvdimm/nd.h
++++ b/drivers/nvdimm/nd.h
+@@ -113,8 +113,12 @@ struct nd_percpu_lane {
+       spinlock_t lock;
+ };
++enum nd_label_flags {
++      ND_LABEL_REAP,
++};
+ struct nd_label_ent {
+       struct list_head list;
++      unsigned long flags;
+       struct nd_namespace_label *label;
+ };
diff --git a/queue-5.0/s390-mm-convert-to-the-generic-get_user_pages_fast-code.patch b/queue-5.0/s390-mm-convert-to-the-generic-get_user_pages_fast-code.patch
new file mode 100644 (file)
index 0000000..99999f9
--- /dev/null
@@ -0,0 +1,359 @@
+From 1a42010cdc26bb7e5912984f3c91b8c6d55f089a Mon Sep 17 00:00:00 2001
+From: Martin Schwidefsky <schwidefsky@de.ibm.com>
+Date: Tue, 23 Apr 2019 10:53:21 +0200
+Subject: s390/mm: convert to the generic get_user_pages_fast code
+
+From: Martin Schwidefsky <schwidefsky@de.ibm.com>
+
+commit 1a42010cdc26bb7e5912984f3c91b8c6d55f089a upstream.
+
+Define the gup_fast_permitted to check against the asce_limit of the
+mm attached to the current task, then replace the s390 specific gup
+code with the generic implementation in mm/gup.c.
+
+Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/s390/Kconfig               |    1 
+ arch/s390/include/asm/pgtable.h |   12 +
+ arch/s390/mm/Makefile           |    2 
+ arch/s390/mm/gup.c              |  291 ----------------------------------------
+ 4 files changed, 14 insertions(+), 292 deletions(-)
+
+--- a/arch/s390/Kconfig
++++ b/arch/s390/Kconfig
+@@ -148,6 +148,7 @@ config S390
+       select HAVE_FUNCTION_TRACER
+       select HAVE_FUTEX_CMPXCHG if FUTEX
+       select HAVE_GCC_PLUGINS
++      select HAVE_GENERIC_GUP
+       select HAVE_KERNEL_BZIP2
+       select HAVE_KERNEL_GZIP
+       select HAVE_KERNEL_LZ4
+--- a/arch/s390/include/asm/pgtable.h
++++ b/arch/s390/include/asm/pgtable.h
+@@ -1264,6 +1264,18 @@ static inline pte_t *pte_offset(pmd_t *p
+ #define pte_offset_map(pmd, address) pte_offset_kernel(pmd, address)
+ #define pte_unmap(pte) do { } while (0)
++static inline bool gup_fast_permitted(unsigned long start, int nr_pages)
++{
++      unsigned long len, end;
++
++      len = (unsigned long) nr_pages << PAGE_SHIFT;
++      end = start + len;
++      if (end < start)
++              return false;
++      return end <= current->mm->context.asce_limit;
++}
++#define gup_fast_permitted gup_fast_permitted
++
+ #define pfn_pte(pfn,pgprot) mk_pte_phys(__pa((pfn) << PAGE_SHIFT),(pgprot))
+ #define pte_pfn(x) (pte_val(x) >> PAGE_SHIFT)
+ #define pte_page(x) pfn_to_page(pte_pfn(x))
+--- a/arch/s390/mm/Makefile
++++ b/arch/s390/mm/Makefile
+@@ -4,7 +4,7 @@
+ #
+ obj-y         := init.o fault.o extmem.o mmap.o vmem.o maccess.o
+-obj-y         += page-states.o gup.o pageattr.o pgtable.o pgalloc.o
++obj-y         += page-states.o pageattr.o pgtable.o pgalloc.o
+ obj-$(CONFIG_CMM)             += cmm.o
+ obj-$(CONFIG_HUGETLB_PAGE)    += hugetlbpage.o
+--- a/arch/s390/mm/gup.c
++++ /dev/null
+@@ -1,291 +0,0 @@
+-// SPDX-License-Identifier: GPL-2.0
+-/*
+- *  Lockless get_user_pages_fast for s390
+- *
+- *  Copyright IBM Corp. 2010
+- *  Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+- */
+-#include <linux/sched.h>
+-#include <linux/mm.h>
+-#include <linux/hugetlb.h>
+-#include <linux/vmstat.h>
+-#include <linux/pagemap.h>
+-#include <linux/rwsem.h>
+-#include <asm/pgtable.h>
+-
+-/*
+- * The performance critical leaf functions are made noinline otherwise gcc
+- * inlines everything into a single function which results in too much
+- * register pressure.
+- */
+-static inline int gup_pte_range(pmd_t pmd, unsigned long addr,
+-              unsigned long end, int write, struct page **pages, int *nr)
+-{
+-      struct page *head, *page;
+-      unsigned long mask;
+-      pte_t *ptep, pte;
+-
+-      mask = (write ? _PAGE_PROTECT : 0) | _PAGE_INVALID | _PAGE_SPECIAL;
+-
+-      ptep = pte_offset_map(&pmd, addr);
+-      do {
+-              pte = *ptep;
+-              barrier();
+-              /* Similar to the PMD case, NUMA hinting must take slow path */
+-              if (pte_protnone(pte))
+-                      return 0;
+-              if ((pte_val(pte) & mask) != 0)
+-                      return 0;
+-              VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
+-              page = pte_page(pte);
+-              head = compound_head(page);
+-              if (!page_cache_get_speculative(head))
+-                      return 0;
+-              if (unlikely(pte_val(pte) != pte_val(*ptep))) {
+-                      put_page(head);
+-                      return 0;
+-              }
+-              VM_BUG_ON_PAGE(compound_head(page) != head, page);
+-              pages[*nr] = page;
+-              (*nr)++;
+-
+-      } while (ptep++, addr += PAGE_SIZE, addr != end);
+-
+-      return 1;
+-}
+-
+-static inline int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr,
+-              unsigned long end, int write, struct page **pages, int *nr)
+-{
+-      struct page *head, *page;
+-      unsigned long mask;
+-      int refs;
+-
+-      mask = (write ? _SEGMENT_ENTRY_PROTECT : 0) | _SEGMENT_ENTRY_INVALID;
+-      if ((pmd_val(pmd) & mask) != 0)
+-              return 0;
+-      VM_BUG_ON(!pfn_valid(pmd_val(pmd) >> PAGE_SHIFT));
+-
+-      refs = 0;
+-      head = pmd_page(pmd);
+-      page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
+-      do {
+-              VM_BUG_ON(compound_head(page) != head);
+-              pages[*nr] = page;
+-              (*nr)++;
+-              page++;
+-              refs++;
+-      } while (addr += PAGE_SIZE, addr != end);
+-
+-      if (!page_cache_add_speculative(head, refs)) {
+-              *nr -= refs;
+-              return 0;
+-      }
+-
+-      if (unlikely(pmd_val(pmd) != pmd_val(*pmdp))) {
+-              *nr -= refs;
+-              while (refs--)
+-                      put_page(head);
+-              return 0;
+-      }
+-
+-      return 1;
+-}
+-
+-
+-static inline int gup_pmd_range(pud_t pud, unsigned long addr,
+-              unsigned long end, int write, struct page **pages, int *nr)
+-{
+-      unsigned long next;
+-      pmd_t *pmdp, pmd;
+-
+-      pmdp = pmd_offset(&pud, addr);
+-      do {
+-              pmd = *pmdp;
+-              barrier();
+-              next = pmd_addr_end(addr, end);
+-              if (pmd_none(pmd))
+-                      return 0;
+-              if (unlikely(pmd_large(pmd))) {
+-                      /*
+-                       * NUMA hinting faults need to be handled in the GUP
+-                       * slowpath for accounting purposes and so that they
+-                       * can be serialised against THP migration.
+-                       */
+-                      if (pmd_protnone(pmd))
+-                              return 0;
+-                      if (!gup_huge_pmd(pmdp, pmd, addr, next,
+-                                        write, pages, nr))
+-                              return 0;
+-              } else if (!gup_pte_range(pmd, addr, next,
+-                                        write, pages, nr))
+-                      return 0;
+-      } while (pmdp++, addr = next, addr != end);
+-
+-      return 1;
+-}
+-
+-static int gup_huge_pud(pud_t *pudp, pud_t pud, unsigned long addr,
+-              unsigned long end, int write, struct page **pages, int *nr)
+-{
+-      struct page *head, *page;
+-      unsigned long mask;
+-      int refs;
+-
+-      mask = (write ? _REGION_ENTRY_PROTECT : 0) | _REGION_ENTRY_INVALID;
+-      if ((pud_val(pud) & mask) != 0)
+-              return 0;
+-      VM_BUG_ON(!pfn_valid(pud_pfn(pud)));
+-
+-      refs = 0;
+-      head = pud_page(pud);
+-      page = head + ((addr & ~PUD_MASK) >> PAGE_SHIFT);
+-      do {
+-              VM_BUG_ON_PAGE(compound_head(page) != head, page);
+-              pages[*nr] = page;
+-              (*nr)++;
+-              page++;
+-              refs++;
+-      } while (addr += PAGE_SIZE, addr != end);
+-
+-      if (!page_cache_add_speculative(head, refs)) {
+-              *nr -= refs;
+-              return 0;
+-      }
+-
+-      if (unlikely(pud_val(pud) != pud_val(*pudp))) {
+-              *nr -= refs;
+-              while (refs--)
+-                      put_page(head);
+-              return 0;
+-      }
+-
+-      return 1;
+-}
+-
+-static inline int gup_pud_range(p4d_t p4d, unsigned long addr,
+-              unsigned long end, int write, struct page **pages, int *nr)
+-{
+-      unsigned long next;
+-      pud_t *pudp, pud;
+-
+-      pudp = pud_offset(&p4d, addr);
+-      do {
+-              pud = *pudp;
+-              barrier();
+-              next = pud_addr_end(addr, end);
+-              if (pud_none(pud))
+-                      return 0;
+-              if (unlikely(pud_large(pud))) {
+-                      if (!gup_huge_pud(pudp, pud, addr, next, write, pages,
+-                                        nr))
+-                              return 0;
+-              } else if (!gup_pmd_range(pud, addr, next, write, pages,
+-                                        nr))
+-                      return 0;
+-      } while (pudp++, addr = next, addr != end);
+-
+-      return 1;
+-}
+-
+-static inline int gup_p4d_range(pgd_t pgd, unsigned long addr,
+-              unsigned long end, int write, struct page **pages, int *nr)
+-{
+-      unsigned long next;
+-      p4d_t *p4dp, p4d;
+-
+-      p4dp = p4d_offset(&pgd, addr);
+-      do {
+-              p4d = *p4dp;
+-              barrier();
+-              next = p4d_addr_end(addr, end);
+-              if (p4d_none(p4d))
+-                      return 0;
+-              if (!gup_pud_range(p4d, addr, next, write, pages, nr))
+-                      return 0;
+-      } while (p4dp++, addr = next, addr != end);
+-
+-      return 1;
+-}
+-
+-/*
+- * Like get_user_pages_fast() except its IRQ-safe in that it won't fall
+- * back to the regular GUP.
+- * Note a difference with get_user_pages_fast: this always returns the
+- * number of pages pinned, 0 if no pages were pinned.
+- */
+-int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
+-                        struct page **pages)
+-{
+-      struct mm_struct *mm = current->mm;
+-      unsigned long addr, len, end;
+-      unsigned long next, flags;
+-      pgd_t *pgdp, pgd;
+-      int nr = 0;
+-
+-      start &= PAGE_MASK;
+-      addr = start;
+-      len = (unsigned long) nr_pages << PAGE_SHIFT;
+-      end = start + len;
+-      if ((end <= start) || (end > mm->context.asce_limit))
+-              return 0;
+-      /*
+-       * local_irq_save() doesn't prevent pagetable teardown, but does
+-       * prevent the pagetables from being freed on s390.
+-       *
+-       * So long as we atomically load page table pointers versus teardown,
+-       * we can follow the address down to the the page and take a ref on it.
+-       */
+-      local_irq_save(flags);
+-      pgdp = pgd_offset(mm, addr);
+-      do {
+-              pgd = *pgdp;
+-              barrier();
+-              next = pgd_addr_end(addr, end);
+-              if (pgd_none(pgd))
+-                      break;
+-              if (!gup_p4d_range(pgd, addr, next, write, pages, &nr))
+-                      break;
+-      } while (pgdp++, addr = next, addr != end);
+-      local_irq_restore(flags);
+-
+-      return nr;
+-}
+-
+-/**
+- * get_user_pages_fast() - pin user pages in memory
+- * @start:    starting user address
+- * @nr_pages: number of pages from start to pin
+- * @write:    whether pages will be written to
+- * @pages:    array that receives pointers to the pages pinned.
+- *            Should be at least nr_pages long.
+- *
+- * Attempt to pin user pages in memory without taking mm->mmap_sem.
+- * If not successful, it will fall back to taking the lock and
+- * calling get_user_pages().
+- *
+- * Returns number of pages pinned. This may be fewer than the number
+- * requested. If nr_pages is 0 or negative, returns 0. If no pages
+- * were pinned, returns -errno.
+- */
+-int get_user_pages_fast(unsigned long start, int nr_pages, int write,
+-                      struct page **pages)
+-{
+-      int nr, ret;
+-
+-      might_sleep();
+-      start &= PAGE_MASK;
+-      nr = __get_user_pages_fast(start, nr_pages, write, pages);
+-      if (nr == nr_pages)
+-              return nr;
+-
+-      /* Try to get the remaining pages with get_user_pages */
+-      start += nr << PAGE_SHIFT;
+-      pages += nr;
+-      ret = get_user_pages_unlocked(start, nr_pages - nr, pages,
+-                                    write ? FOLL_WRITE : 0);
+-      /* Have to be a bit careful with return values */
+-      if (nr > 0)
+-              ret = (ret < 0) ? nr : ret + nr;
+-      return ret;
+-}
diff --git a/queue-5.0/s390-mm-make-the-pxd_offset-functions-more-robust.patch b/queue-5.0/s390-mm-make-the-pxd_offset-functions-more-robust.patch
new file mode 100644 (file)
index 0000000..d933a1b
--- /dev/null
@@ -0,0 +1,250 @@
+From d1874a0c2805fcfa9162c972d6b7541e57adb542 Mon Sep 17 00:00:00 2001
+From: Martin Schwidefsky <schwidefsky@de.ibm.com>
+Date: Tue, 23 Apr 2019 10:51:12 +0200
+Subject: s390/mm: make the pxd_offset functions more robust
+
+From: Martin Schwidefsky <schwidefsky@de.ibm.com>
+
+commit d1874a0c2805fcfa9162c972d6b7541e57adb542 upstream.
+
+Change the way how pgd_offset, p4d_offset, pud_offset and pmd_offset
+walk the page tables. pgd_offset now always calculates the index for
+the top-level page table and adds it to the pgd, this is either a
+segment table offset for a 2-level setup, a region-3 offset for 3-levels,
+region-2 offset for 4-levels, or a region-1 offset for a 5-level setup.
+The other three functions p4d_offset, pud_offset and pmd_offset will
+only add the respective offset if they dereference the passed pointer.
+
+With the new way of walking the page tables a sequence like this from
+mm/gup.c now works:
+
+     pgdp = pgd_offset(current->mm, addr);
+     pgd = READ_ONCE(*pgdp);
+     p4dp = p4d_offset(&pgd, addr);
+     p4d = READ_ONCE(*p4dp);
+     pudp = pud_offset(&p4d, addr);
+     pud = READ_ONCE(*pudp);
+     pmdp = pmd_offset(&pud, addr);
+     pmd = READ_ONCE(*pmdp);
+
+Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/s390/include/asm/pgtable.h |   67 +++++++++++++++++++++++++---------------
+ arch/s390/mm/gup.c              |   33 +++++++------------
+ 2 files changed, 55 insertions(+), 45 deletions(-)
+
+--- a/arch/s390/include/asm/pgtable.h
++++ b/arch/s390/include/asm/pgtable.h
+@@ -1203,42 +1203,67 @@ static inline pte_t mk_pte(struct page *
+ #define pmd_index(address) (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1))
+ #define pte_index(address) (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE-1))
+-#define pgd_offset(mm, address) ((mm)->pgd + pgd_index(address))
+-#define pgd_offset_k(address) pgd_offset(&init_mm, address)
+-#define pgd_offset_raw(pgd, addr) ((pgd) + pgd_index(addr))
+-
+ #define pmd_deref(pmd) (pmd_val(pmd) & _SEGMENT_ENTRY_ORIGIN)
+ #define pud_deref(pud) (pud_val(pud) & _REGION_ENTRY_ORIGIN)
+ #define p4d_deref(pud) (p4d_val(pud) & _REGION_ENTRY_ORIGIN)
+ #define pgd_deref(pgd) (pgd_val(pgd) & _REGION_ENTRY_ORIGIN)
+-static inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address)
++/*
++ * The pgd_offset function *always* adds the index for the top-level
++ * region/segment table. This is done to get a sequence like the
++ * following to work:
++ *    pgdp = pgd_offset(current->mm, addr);
++ *    pgd = READ_ONCE(*pgdp);
++ *    p4dp = p4d_offset(&pgd, addr);
++ *    ...
++ * The subsequent p4d_offset, pud_offset and pmd_offset functions
++ * only add an index if they dereferenced the pointer.
++ */
++static inline pgd_t *pgd_offset_raw(pgd_t *pgd, unsigned long address)
+ {
+-      p4d_t *p4d = (p4d_t *) pgd;
++      unsigned long rste;
++      unsigned int shift;
+-      if ((pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R1)
+-              p4d = (p4d_t *) pgd_deref(*pgd);
+-      return p4d + p4d_index(address);
++      /* Get the first entry of the top level table */
++      rste = pgd_val(*pgd);
++      /* Pick up the shift from the table type of the first entry */
++      shift = ((rste & _REGION_ENTRY_TYPE_MASK) >> 2) * 11 + 20;
++      return pgd + ((address >> shift) & (PTRS_PER_PGD - 1));
+ }
+-static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address)
++#define pgd_offset(mm, address) pgd_offset_raw(READ_ONCE((mm)->pgd), address)
++#define pgd_offset_k(address) pgd_offset(&init_mm, address)
++
++static inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address)
+ {
+-      pud_t *pud = (pud_t *) p4d;
++      if ((pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) >= _REGION_ENTRY_TYPE_R1)
++              return (p4d_t *) pgd_deref(*pgd) + p4d_index(address);
++      return (p4d_t *) pgd;
++}
+-      if ((p4d_val(*p4d) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R2)
+-              pud = (pud_t *) p4d_deref(*p4d);
+-      return pud + pud_index(address);
++static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address)
++{
++      if ((p4d_val(*p4d) & _REGION_ENTRY_TYPE_MASK) >= _REGION_ENTRY_TYPE_R2)
++              return (pud_t *) p4d_deref(*p4d) + pud_index(address);
++      return (pud_t *) p4d;
+ }
+ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address)
+ {
+-      pmd_t *pmd = (pmd_t *) pud;
++      if ((pud_val(*pud) & _REGION_ENTRY_TYPE_MASK) >= _REGION_ENTRY_TYPE_R3)
++              return (pmd_t *) pud_deref(*pud) + pmd_index(address);
++      return (pmd_t *) pud;
++}
+-      if ((pud_val(*pud) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3)
+-              pmd = (pmd_t *) pud_deref(*pud);
+-      return pmd + pmd_index(address);
++static inline pte_t *pte_offset(pmd_t *pmd, unsigned long address)
++{
++      return (pte_t *) pmd_deref(*pmd) + pte_index(address);
+ }
++#define pte_offset_kernel(pmd, address) pte_offset(pmd, address)
++#define pte_offset_map(pmd, address) pte_offset_kernel(pmd, address)
++#define pte_unmap(pte) do { } while (0)
++
+ #define pfn_pte(pfn,pgprot) mk_pte_phys(__pa((pfn) << PAGE_SHIFT),(pgprot))
+ #define pte_pfn(x) (pte_val(x) >> PAGE_SHIFT)
+ #define pte_page(x) pfn_to_page(pte_pfn(x))
+@@ -1248,12 +1273,6 @@ static inline pmd_t *pmd_offset(pud_t *p
+ #define p4d_page(p4d) pfn_to_page(p4d_pfn(p4d))
+ #define pgd_page(pgd) pfn_to_page(pgd_pfn(pgd))
+-/* Find an entry in the lowest level page table.. */
+-#define pte_offset(pmd, addr) ((pte_t *) pmd_deref(*(pmd)) + pte_index(addr))
+-#define pte_offset_kernel(pmd, address) pte_offset(pmd,address)
+-#define pte_offset_map(pmd, address) pte_offset_kernel(pmd, address)
+-#define pte_unmap(pte) do { } while (0)
+-
+ static inline pmd_t pmd_wrprotect(pmd_t pmd)
+ {
+       pmd_val(pmd) &= ~_SEGMENT_ENTRY_WRITE;
+--- a/arch/s390/mm/gup.c
++++ b/arch/s390/mm/gup.c
+@@ -18,7 +18,7 @@
+  * inlines everything into a single function which results in too much
+  * register pressure.
+  */
+-static inline int gup_pte_range(pmd_t *pmdp, pmd_t pmd, unsigned long addr,
++static inline int gup_pte_range(pmd_t pmd, unsigned long addr,
+               unsigned long end, int write, struct page **pages, int *nr)
+ {
+       struct page *head, *page;
+@@ -27,7 +27,7 @@ static inline int gup_pte_range(pmd_t *p
+       mask = (write ? _PAGE_PROTECT : 0) | _PAGE_INVALID | _PAGE_SPECIAL;
+-      ptep = ((pte_t *) pmd_deref(pmd)) + pte_index(addr);
++      ptep = pte_offset_map(&pmd, addr);
+       do {
+               pte = *ptep;
+               barrier();
+@@ -93,16 +93,13 @@ static inline int gup_huge_pmd(pmd_t *pm
+ }
+-static inline int gup_pmd_range(pud_t *pudp, pud_t pud, unsigned long addr,
++static inline int gup_pmd_range(pud_t pud, unsigned long addr,
+               unsigned long end, int write, struct page **pages, int *nr)
+ {
+       unsigned long next;
+       pmd_t *pmdp, pmd;
+-      pmdp = (pmd_t *) pudp;
+-      if ((pud_val(pud) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3)
+-              pmdp = (pmd_t *) pud_deref(pud);
+-      pmdp += pmd_index(addr);
++      pmdp = pmd_offset(&pud, addr);
+       do {
+               pmd = *pmdp;
+               barrier();
+@@ -120,7 +117,7 @@ static inline int gup_pmd_range(pud_t *p
+                       if (!gup_huge_pmd(pmdp, pmd, addr, next,
+                                         write, pages, nr))
+                               return 0;
+-              } else if (!gup_pte_range(pmdp, pmd, addr, next,
++              } else if (!gup_pte_range(pmd, addr, next,
+                                         write, pages, nr))
+                       return 0;
+       } while (pmdp++, addr = next, addr != end);
+@@ -166,16 +163,13 @@ static int gup_huge_pud(pud_t *pudp, pud
+       return 1;
+ }
+-static inline int gup_pud_range(p4d_t *p4dp, p4d_t p4d, unsigned long addr,
++static inline int gup_pud_range(p4d_t p4d, unsigned long addr,
+               unsigned long end, int write, struct page **pages, int *nr)
+ {
+       unsigned long next;
+       pud_t *pudp, pud;
+-      pudp = (pud_t *) p4dp;
+-      if ((p4d_val(p4d) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R2)
+-              pudp = (pud_t *) p4d_deref(p4d);
+-      pudp += pud_index(addr);
++      pudp = pud_offset(&p4d, addr);
+       do {
+               pud = *pudp;
+               barrier();
+@@ -186,7 +180,7 @@ static inline int gup_pud_range(p4d_t *p
+                       if (!gup_huge_pud(pudp, pud, addr, next, write, pages,
+                                         nr))
+                               return 0;
+-              } else if (!gup_pmd_range(pudp, pud, addr, next, write, pages,
++              } else if (!gup_pmd_range(pud, addr, next, write, pages,
+                                         nr))
+                       return 0;
+       } while (pudp++, addr = next, addr != end);
+@@ -194,23 +188,20 @@ static inline int gup_pud_range(p4d_t *p
+       return 1;
+ }
+-static inline int gup_p4d_range(pgd_t *pgdp, pgd_t pgd, unsigned long addr,
++static inline int gup_p4d_range(pgd_t pgd, unsigned long addr,
+               unsigned long end, int write, struct page **pages, int *nr)
+ {
+       unsigned long next;
+       p4d_t *p4dp, p4d;
+-      p4dp = (p4d_t *) pgdp;
+-      if ((pgd_val(pgd) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R1)
+-              p4dp = (p4d_t *) pgd_deref(pgd);
+-      p4dp += p4d_index(addr);
++      p4dp = p4d_offset(&pgd, addr);
+       do {
+               p4d = *p4dp;
+               barrier();
+               next = p4d_addr_end(addr, end);
+               if (p4d_none(p4d))
+                       return 0;
+-              if (!gup_pud_range(p4dp, p4d, addr, next, write, pages, nr))
++              if (!gup_pud_range(p4d, addr, next, write, pages, nr))
+                       return 0;
+       } while (p4dp++, addr = next, addr != end);
+@@ -253,7 +244,7 @@ int __get_user_pages_fast(unsigned long
+               next = pgd_addr_end(addr, end);
+               if (pgd_none(pgd))
+                       break;
+-              if (!gup_p4d_range(pgdp, pgd, addr, next, write, pages, &nr))
++              if (!gup_p4d_range(pgd, addr, next, write, pages, &nr))
+                       break;
+       } while (pgdp++, addr = next, addr != end);
+       local_irq_restore(flags);
index a24d2b868eb1405e5aab253e3b5c474c083b23b4..88513d65889c69aeed72a91536cfda56f6372376 100644 (file)
@@ -110,3 +110,14 @@ jbd2-fix-potential-double-free.patch
 kvm-fix-the-bitmap-range-to-copy-during-clear-dirty.patch
 kvm-x86-skip-efer-vs.-guest-cpuid-checks-for-host-initiated-writes.patch
 kvm-lapic-busy-wait-for-timer-to-expire-when-using-hv_timer.patch
+kbuild-turn-auto.conf.cmd-into-a-mandatory-include-file.patch
+xen-pvh-set-xen_domain_type-to-hvm-in-xen_pvh_init.patch
+xen-pvh-correctly-setup-the-pv-efi-interface-for-dom0.patch
+libnvdimm-namespace-fix-label-tracking-error.patch
+iov_iter-optimize-page_copy_sane.patch
+s390-mm-make-the-pxd_offset-functions-more-robust.patch
+s390-mm-convert-to-the-generic-get_user_pages_fast-code.patch
+ext4-unsigned-int-compared-against-zero.patch
+ext4-fix-block-validity-checks-for-journal-inodes-using-indirect-blocks.patch
+ext4-fix-compile-error-when-using-buffer_trace.patch
+ext4-don-t-update-s_rev_level-if-not-required.patch
diff --git a/queue-5.0/xen-pvh-correctly-setup-the-pv-efi-interface-for-dom0.patch b/queue-5.0/xen-pvh-correctly-setup-the-pv-efi-interface-for-dom0.patch
new file mode 100644 (file)
index 0000000..1687369
--- /dev/null
@@ -0,0 +1,154 @@
+From 72813bfbf0276a97c82af038efb5f02dcdd9e310 Mon Sep 17 00:00:00 2001
+From: Roger Pau Monne <roger.pau@citrix.com>
+Date: Tue, 23 Apr 2019 15:04:16 +0200
+Subject: xen/pvh: correctly setup the PV EFI interface for dom0
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Roger Pau Monne <roger.pau@citrix.com>
+
+commit 72813bfbf0276a97c82af038efb5f02dcdd9e310 upstream.
+
+This involves initializing the boot params EFI related fields and the
+efi global variable.
+
+Without this fix a PVH dom0 doesn't detect when booted from EFI, and
+thus doesn't support accessing any of the EFI related data.
+
+Reported-by: PGNet Dev <pgnet.dev@gmail.com>
+Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
+Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: stable@vger.kernel.org # 4.19+
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/platform/pvh/enlighten.c |    8 ++++----
+ arch/x86/xen/efi.c                |   12 ++++++------
+ arch/x86/xen/enlighten_pv.c       |    2 +-
+ arch/x86/xen/enlighten_pvh.c      |    6 +++++-
+ arch/x86/xen/xen-ops.h            |    4 ++--
+ 5 files changed, 18 insertions(+), 14 deletions(-)
+
+--- a/arch/x86/platform/pvh/enlighten.c
++++ b/arch/x86/platform/pvh/enlighten.c
+@@ -44,8 +44,6 @@ void __init __weak mem_map_via_hcall(str
+ static void __init init_pvh_bootparams(bool xen_guest)
+ {
+-      memset(&pvh_bootparams, 0, sizeof(pvh_bootparams));
+-
+       if ((pvh_start_info.version > 0) && (pvh_start_info.memmap_entries)) {
+               struct hvm_memmap_table_entry *ep;
+               int i;
+@@ -103,7 +101,7 @@ static void __init init_pvh_bootparams(b
+  * If we are trying to boot a Xen PVH guest, it is expected that the kernel
+  * will have been configured to provide the required override for this routine.
+  */
+-void __init __weak xen_pvh_init(void)
++void __init __weak xen_pvh_init(struct boot_params *boot_params)
+ {
+       xen_raw_printk("Error: Missing xen PVH initialization\n");
+       BUG();
+@@ -112,7 +110,7 @@ void __init __weak xen_pvh_init(void)
+ static void hypervisor_specific_init(bool xen_guest)
+ {
+       if (xen_guest)
+-              xen_pvh_init();
++              xen_pvh_init(&pvh_bootparams);
+ }
+ /*
+@@ -131,6 +129,8 @@ void __init xen_prepare_pvh(void)
+               BUG();
+       }
++      memset(&pvh_bootparams, 0, sizeof(pvh_bootparams));
++
+       hypervisor_specific_init(xen_guest);
+       init_pvh_bootparams(xen_guest);
+--- a/arch/x86/xen/efi.c
++++ b/arch/x86/xen/efi.c
+@@ -158,7 +158,7 @@ static enum efi_secureboot_mode xen_efi_
+       return efi_secureboot_mode_unknown;
+ }
+-void __init xen_efi_init(void)
++void __init xen_efi_init(struct boot_params *boot_params)
+ {
+       efi_system_table_t *efi_systab_xen;
+@@ -167,12 +167,12 @@ void __init xen_efi_init(void)
+       if (efi_systab_xen == NULL)
+               return;
+-      strncpy((char *)&boot_params.efi_info.efi_loader_signature, "Xen",
+-                      sizeof(boot_params.efi_info.efi_loader_signature));
+-      boot_params.efi_info.efi_systab = (__u32)__pa(efi_systab_xen);
+-      boot_params.efi_info.efi_systab_hi = (__u32)(__pa(efi_systab_xen) >> 32);
++      strncpy((char *)&boot_params->efi_info.efi_loader_signature, "Xen",
++                      sizeof(boot_params->efi_info.efi_loader_signature));
++      boot_params->efi_info.efi_systab = (__u32)__pa(efi_systab_xen);
++      boot_params->efi_info.efi_systab_hi = (__u32)(__pa(efi_systab_xen) >> 32);
+-      boot_params.secure_boot = xen_efi_get_secureboot();
++      boot_params->secure_boot = xen_efi_get_secureboot();
+       set_bit(EFI_BOOT, &efi.flags);
+       set_bit(EFI_PARAVIRT, &efi.flags);
+--- a/arch/x86/xen/enlighten_pv.c
++++ b/arch/x86/xen/enlighten_pv.c
+@@ -1403,7 +1403,7 @@ asmlinkage __visible void __init xen_sta
+       /* We need this for printk timestamps */
+       xen_setup_runstate_info(0);
+-      xen_efi_init();
++      xen_efi_init(&boot_params);
+       /* Start the world */
+ #ifdef CONFIG_X86_32
+--- a/arch/x86/xen/enlighten_pvh.c
++++ b/arch/x86/xen/enlighten_pvh.c
+@@ -13,6 +13,8 @@
+ #include <xen/interface/memory.h>
++#include "xen-ops.h"
++
+ /*
+  * PVH variables.
+  *
+@@ -21,7 +23,7 @@
+  */
+ bool xen_pvh __attribute__((section(".data"))) = 0;
+-void __init xen_pvh_init(void)
++void __init xen_pvh_init(struct boot_params *boot_params)
+ {
+       u32 msr;
+       u64 pfn;
+@@ -33,6 +35,8 @@ void __init xen_pvh_init(void)
+       msr = cpuid_ebx(xen_cpuid_base() + 2);
+       pfn = __pa(hypercall_page);
+       wrmsr_safe(msr, (u32)pfn, (u32)(pfn >> 32));
++
++      xen_efi_init(boot_params);
+ }
+ void __init mem_map_via_hcall(struct boot_params *boot_params_p)
+--- a/arch/x86/xen/xen-ops.h
++++ b/arch/x86/xen/xen-ops.h
+@@ -122,9 +122,9 @@ static inline void __init xen_init_vga(c
+ void __init xen_init_apic(void);
+ #ifdef CONFIG_XEN_EFI
+-extern void xen_efi_init(void);
++extern void xen_efi_init(struct boot_params *boot_params);
+ #else
+-static inline void __init xen_efi_init(void)
++static inline void __init xen_efi_init(struct boot_params *boot_params)
+ {
+ }
+ #endif
diff --git a/queue-5.0/xen-pvh-set-xen_domain_type-to-hvm-in-xen_pvh_init.patch b/queue-5.0/xen-pvh-set-xen_domain_type-to-hvm-in-xen_pvh_init.patch
new file mode 100644 (file)
index 0000000..b3349e8
--- /dev/null
@@ -0,0 +1,34 @@
+From c9f804d64bb93c8dbf957df1d7e9de11380e522d Mon Sep 17 00:00:00 2001
+From: Roger Pau Monne <roger.pau@citrix.com>
+Date: Tue, 23 Apr 2019 15:04:15 +0200
+Subject: xen/pvh: set xen_domain_type to HVM in xen_pvh_init
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Roger Pau Monne <roger.pau@citrix.com>
+
+commit c9f804d64bb93c8dbf957df1d7e9de11380e522d upstream.
+
+Or else xen_domain() returns false despite xen_pvh being set.
+
+Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
+Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: stable@vger.kernel.org # 4.19+
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/xen/enlighten_pvh.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/arch/x86/xen/enlighten_pvh.c
++++ b/arch/x86/xen/enlighten_pvh.c
+@@ -27,6 +27,7 @@ void __init xen_pvh_init(void)
+       u64 pfn;
+       xen_pvh = 1;
++      xen_domain_type = XEN_HVM_DOMAIN;
+       xen_start_flags = pvh_start_info.flags;
+       msr = cpuid_ebx(xen_cpuid_base() + 2);