--- /dev/null
+From 2247bb335ab9c40058484cac36ea74ee652f3b7b Mon Sep 17 00:00:00 2001
+From: Gerald Schaefer <gerald.schaefer@de.ibm.com>
+Date: Fri, 7 Oct 2016 17:01:07 -0700
+Subject: mm/hugetlb: fix memory offline with hugepage size > memory block size
+
+From: Gerald Schaefer <gerald.schaefer@de.ibm.com>
+
+commit 2247bb335ab9c40058484cac36ea74ee652f3b7b upstream.
+
+Patch series "mm/hugetlb: memory offline issues with hugepages", v4.
+
+This addresses several issues with hugepages and memory offline. While
+the first patch fixes a panic, and is therefore rather important, the
+last patch is just a performance optimization.
+
+The second patch fixes a theoretical issue with reserved hugepages,
+while still leaving some ugly usability issue, see description.
+
+This patch (of 3):
+
+dissolve_free_huge_pages() will either run into the VM_BUG_ON() or a
+list corruption and addressing exception when trying to set a memory
+block offline that is part (but not the first part) of a "gigantic"
+hugetlb page with a size > memory block size.
+
+When no other smaller hugetlb page sizes are present, the VM_BUG_ON()
+will trigger directly. In the other case we will run into an addressing
+exception later, because dissolve_free_huge_page() will not work on the
+head page of the compound hugetlb page which will result in a NULL
+hstate from page_hstate().
+
+To fix this, first remove the VM_BUG_ON() because it is wrong, and then
+use the compound head page in dissolve_free_huge_page(). This means
+that an unused pre-allocated gigantic page that has any part of itself
+inside the memory block that is going offline will be dissolved
+completely. Losing an unused gigantic hugepage is preferable to failing
+the memory offline, for example in the situation where a (possibly
+faulty) memory DIMM needs to go offline.
+
+Changes for v4.4 stable:
+ - make it apply w/o commit c1470b33 "mm/hugetlb: fix incorrect
+ hugepages count during mem hotplug"
+
+Fixes: c8721bbb ("mm: memory-hotplug: enable memory hotplug to handle hugepage")
+Link: http://lkml.kernel.org/r/20160926172811.94033-2-gerald.schaefer@de.ibm.com
+Signed-off-by: Gerald Schaefer <gerald.schaefer@de.ibm.com>
+Acked-by: Michal Hocko <mhocko@suse.com>
+Acked-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
+Cc: "Kirill A . Shutemov" <kirill.shutemov@linux.intel.com>
+Cc: Vlastimil Babka <vbabka@suse.cz>
+Cc: Mike Kravetz <mike.kravetz@oracle.com>
+Cc: "Aneesh Kumar K . V" <aneesh.kumar@linux.vnet.ibm.com>
+Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
+Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
+Cc: Rui Teng <rui.teng@linux.vnet.ibm.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Gerald Schaefer <gerald.schaefer@de.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/hugetlb.c | 13 +++++++------
+ 1 file changed, 7 insertions(+), 6 deletions(-)
+
+--- a/mm/hugetlb.c
++++ b/mm/hugetlb.c
+@@ -1416,12 +1416,13 @@ static void dissolve_free_huge_page(stru
+ {
+ spin_lock(&hugetlb_lock);
+ if (PageHuge(page) && !page_count(page)) {
+- struct hstate *h = page_hstate(page);
+- int nid = page_to_nid(page);
+- list_del(&page->lru);
++ struct page *head = compound_head(page);
++ struct hstate *h = page_hstate(head);
++ int nid = page_to_nid(head);
++ list_del(&head->lru);
+ h->free_huge_pages--;
+ h->free_huge_pages_node[nid]--;
+- update_and_free_page(h, page);
++ update_and_free_page(h, head);
+ }
+ spin_unlock(&hugetlb_lock);
+ }
+@@ -1429,7 +1430,8 @@ static void dissolve_free_huge_page(stru
+ /*
+ * Dissolve free hugepages in a given pfn range. Used by memory hotplug to
+ * make specified memory blocks removable from the system.
+- * Note that start_pfn should aligned with (minimum) hugepage size.
++ * Note that this will dissolve a free gigantic hugepage completely, if any
++ * part of it lies within the given range.
+ */
+ void dissolve_free_huge_pages(unsigned long start_pfn, unsigned long end_pfn)
+ {
+@@ -1438,7 +1440,6 @@ void dissolve_free_huge_pages(unsigned l
+ if (!hugepages_supported())
+ return;
+
+- VM_BUG_ON(!IS_ALIGNED(start_pfn, 1 << minimum_order));
+ for (pfn = start_pfn; pfn < end_pfn; pfn += 1 << minimum_order)
+ dissolve_free_huge_page(pfn_to_page(pfn));
+ }
--- /dev/null
+From 073931017b49d9458aa351605b43a7e34598caef Mon Sep 17 00:00:00 2001
+From: Jan Kara <jack@suse.cz>
+Date: Mon, 19 Sep 2016 17:39:09 +0200
+Subject: posix_acl: Clear SGID bit when setting file permissions
+
+From: Jan Kara <jack@suse.cz>
+
+commit 073931017b49d9458aa351605b43a7e34598caef upstream.
+
+When file permissions are modified via chmod(2) and the user is not in
+the owning group or capable of CAP_FSETID, the setgid bit is cleared in
+inode_change_ok(). Setting a POSIX ACL via setxattr(2) sets the file
+permissions as well as the new ACL, but doesn't clear the setgid bit in
+a similar way; this allows to bypass the check in chmod(2). Fix that.
+
+References: CVE-2016-7097
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Jeff Layton <jlayton@redhat.com>
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/9p/acl.c | 40 +++++++++++++++++-----------------------
+ fs/btrfs/acl.c | 6 ++----
+ fs/ceph/acl.c | 6 ++----
+ fs/ext2/acl.c | 12 ++++--------
+ fs/ext4/acl.c | 12 ++++--------
+ fs/f2fs/acl.c | 6 ++----
+ fs/gfs2/acl.c | 12 +++---------
+ fs/hfsplus/posix_acl.c | 4 ++--
+ fs/jffs2/acl.c | 9 ++++-----
+ fs/jfs/acl.c | 6 ++----
+ fs/ocfs2/acl.c | 10 ++++------
+ fs/posix_acl.c | 31 +++++++++++++++++++++++++++++++
+ fs/reiserfs/xattr_acl.c | 8 ++------
+ fs/xfs/xfs_acl.c | 13 ++++---------
+ include/linux/posix_acl.h | 1 +
+ 15 files changed, 84 insertions(+), 92 deletions(-)
+
+--- a/fs/9p/acl.c
++++ b/fs/9p/acl.c
+@@ -282,32 +282,26 @@ static int v9fs_xattr_set_acl(const stru
+ switch (handler->flags) {
+ case ACL_TYPE_ACCESS:
+ if (acl) {
+- umode_t mode = inode->i_mode;
+- retval = posix_acl_equiv_mode(acl, &mode);
+- if (retval < 0)
++ struct iattr iattr;
++
++ retval = posix_acl_update_mode(inode, &iattr.ia_mode, &acl);
++ if (retval)
+ goto err_out;
+- else {
+- struct iattr iattr;
+- if (retval == 0) {
+- /*
+- * ACL can be represented
+- * by the mode bits. So don't
+- * update ACL.
+- */
+- acl = NULL;
+- value = NULL;
+- size = 0;
+- }
+- /* Updte the mode bits */
+- iattr.ia_mode = ((mode & S_IALLUGO) |
+- (inode->i_mode & ~S_IALLUGO));
+- iattr.ia_valid = ATTR_MODE;
+- /* FIXME should we update ctime ?
+- * What is the following setxattr update the
+- * mode ?
++ if (!acl) {
++ /*
++ * ACL can be represented
++ * by the mode bits. So don't
++ * update ACL.
+ */
+- v9fs_vfs_setattr_dotl(dentry, &iattr);
++ value = NULL;
++ size = 0;
+ }
++ iattr.ia_valid = ATTR_MODE;
++ /* FIXME should we update ctime ?
++ * What is the following setxattr update the
++ * mode ?
++ */
++ v9fs_vfs_setattr_dotl(dentry, &iattr);
+ }
+ break;
+ case ACL_TYPE_DEFAULT:
+--- a/fs/btrfs/acl.c
++++ b/fs/btrfs/acl.c
+@@ -83,11 +83,9 @@ static int __btrfs_set_acl(struct btrfs_
+ case ACL_TYPE_ACCESS:
+ name = POSIX_ACL_XATTR_ACCESS;
+ if (acl) {
+- ret = posix_acl_equiv_mode(acl, &inode->i_mode);
+- if (ret < 0)
++ ret = posix_acl_update_mode(inode, &inode->i_mode, &acl);
++ if (ret)
+ return ret;
+- if (ret == 0)
+- acl = NULL;
+ }
+ ret = 0;
+ break;
+--- a/fs/ceph/acl.c
++++ b/fs/ceph/acl.c
+@@ -94,11 +94,9 @@ int ceph_set_acl(struct inode *inode, st
+ case ACL_TYPE_ACCESS:
+ name = POSIX_ACL_XATTR_ACCESS;
+ if (acl) {
+- ret = posix_acl_equiv_mode(acl, &new_mode);
+- if (ret < 0)
++ ret = posix_acl_update_mode(inode, &new_mode, &acl);
++ if (ret)
+ goto out;
+- if (ret == 0)
+- acl = NULL;
+ }
+ break;
+ case ACL_TYPE_DEFAULT:
+--- a/fs/ext2/acl.c
++++ b/fs/ext2/acl.c
+@@ -193,15 +193,11 @@ ext2_set_acl(struct inode *inode, struct
+ case ACL_TYPE_ACCESS:
+ name_index = EXT2_XATTR_INDEX_POSIX_ACL_ACCESS;
+ if (acl) {
+- error = posix_acl_equiv_mode(acl, &inode->i_mode);
+- if (error < 0)
++ error = posix_acl_update_mode(inode, &inode->i_mode, &acl);
++ if (error)
+ return error;
+- else {
+- inode->i_ctime = CURRENT_TIME_SEC;
+- mark_inode_dirty(inode);
+- if (error == 0)
+- acl = NULL;
+- }
++ inode->i_ctime = CURRENT_TIME_SEC;
++ mark_inode_dirty(inode);
+ }
+ break;
+
+--- a/fs/ext4/acl.c
++++ b/fs/ext4/acl.c
+@@ -196,15 +196,11 @@ __ext4_set_acl(handle_t *handle, struct
+ case ACL_TYPE_ACCESS:
+ name_index = EXT4_XATTR_INDEX_POSIX_ACL_ACCESS;
+ if (acl) {
+- error = posix_acl_equiv_mode(acl, &inode->i_mode);
+- if (error < 0)
++ error = posix_acl_update_mode(inode, &inode->i_mode, &acl);
++ if (error)
+ return error;
+- else {
+- inode->i_ctime = ext4_current_time(inode);
+- ext4_mark_inode_dirty(handle, inode);
+- if (error == 0)
+- acl = NULL;
+- }
++ inode->i_ctime = ext4_current_time(inode);
++ ext4_mark_inode_dirty(handle, inode);
+ }
+ break;
+
+--- a/fs/f2fs/acl.c
++++ b/fs/f2fs/acl.c
+@@ -214,12 +214,10 @@ static int __f2fs_set_acl(struct inode *
+ case ACL_TYPE_ACCESS:
+ name_index = F2FS_XATTR_INDEX_POSIX_ACL_ACCESS;
+ if (acl) {
+- error = posix_acl_equiv_mode(acl, &inode->i_mode);
+- if (error < 0)
++ error = posix_acl_update_mode(inode, &inode->i_mode, &acl);
++ if (error)
+ return error;
+ set_acl_inode(fi, inode->i_mode);
+- if (error == 0)
+- acl = NULL;
+ }
+ break;
+
+--- a/fs/gfs2/acl.c
++++ b/fs/gfs2/acl.c
+@@ -79,17 +79,11 @@ int gfs2_set_acl(struct inode *inode, st
+ if (type == ACL_TYPE_ACCESS) {
+ umode_t mode = inode->i_mode;
+
+- error = posix_acl_equiv_mode(acl, &mode);
+- if (error < 0)
++ error = posix_acl_update_mode(inode, &inode->i_mode, &acl);
++ if (error)
+ return error;
+-
+- if (error == 0)
+- acl = NULL;
+-
+- if (mode != inode->i_mode) {
+- inode->i_mode = mode;
++ if (mode != inode->i_mode)
+ mark_inode_dirty(inode);
+- }
+ }
+
+ if (acl) {
+--- a/fs/hfsplus/posix_acl.c
++++ b/fs/hfsplus/posix_acl.c
+@@ -68,8 +68,8 @@ int hfsplus_set_posix_acl(struct inode *
+ case ACL_TYPE_ACCESS:
+ xattr_name = POSIX_ACL_XATTR_ACCESS;
+ if (acl) {
+- err = posix_acl_equiv_mode(acl, &inode->i_mode);
+- if (err < 0)
++ err = posix_acl_update_mode(inode, &inode->i_mode, &acl);
++ if (err)
+ return err;
+ }
+ err = 0;
+--- a/fs/jffs2/acl.c
++++ b/fs/jffs2/acl.c
+@@ -235,9 +235,10 @@ int jffs2_set_acl(struct inode *inode, s
+ case ACL_TYPE_ACCESS:
+ xprefix = JFFS2_XPREFIX_ACL_ACCESS;
+ if (acl) {
+- umode_t mode = inode->i_mode;
+- rc = posix_acl_equiv_mode(acl, &mode);
+- if (rc < 0)
++ umode_t mode;
++
++ rc = posix_acl_update_mode(inode, &mode, &acl);
++ if (rc)
+ return rc;
+ if (inode->i_mode != mode) {
+ struct iattr attr;
+@@ -249,8 +250,6 @@ int jffs2_set_acl(struct inode *inode, s
+ if (rc < 0)
+ return rc;
+ }
+- if (rc == 0)
+- acl = NULL;
+ }
+ break;
+ case ACL_TYPE_DEFAULT:
+--- a/fs/jfs/acl.c
++++ b/fs/jfs/acl.c
+@@ -84,13 +84,11 @@ static int __jfs_set_acl(tid_t tid, stru
+ case ACL_TYPE_ACCESS:
+ ea_name = POSIX_ACL_XATTR_ACCESS;
+ if (acl) {
+- rc = posix_acl_equiv_mode(acl, &inode->i_mode);
+- if (rc < 0)
++ rc = posix_acl_update_mode(inode, &inode->i_mode, &acl);
++ if (rc)
+ return rc;
+ inode->i_ctime = CURRENT_TIME;
+ mark_inode_dirty(inode);
+- if (rc == 0)
+- acl = NULL;
+ }
+ break;
+ case ACL_TYPE_DEFAULT:
+--- a/fs/ocfs2/acl.c
++++ b/fs/ocfs2/acl.c
+@@ -241,13 +241,11 @@ int ocfs2_set_acl(handle_t *handle,
+ case ACL_TYPE_ACCESS:
+ name_index = OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS;
+ if (acl) {
+- umode_t mode = inode->i_mode;
+- ret = posix_acl_equiv_mode(acl, &mode);
+- if (ret < 0)
+- return ret;
++ umode_t mode;
+
+- if (ret == 0)
+- acl = NULL;
++ ret = posix_acl_update_mode(inode, &mode, &acl);
++ if (ret)
++ return ret;
+
+ ret = ocfs2_acl_set_mode(inode, di_bh,
+ handle, mode);
+--- a/fs/posix_acl.c
++++ b/fs/posix_acl.c
+@@ -592,6 +592,37 @@ no_mem:
+ }
+ EXPORT_SYMBOL_GPL(posix_acl_create);
+
++/**
++ * posix_acl_update_mode - update mode in set_acl
++ *
++ * Update the file mode when setting an ACL: compute the new file permission
++ * bits based on the ACL. In addition, if the ACL is equivalent to the new
++ * file mode, set *acl to NULL to indicate that no ACL should be set.
++ *
++ * As with chmod, clear the setgit bit if the caller is not in the owning group
++ * or capable of CAP_FSETID (see inode_change_ok).
++ *
++ * Called from set_acl inode operations.
++ */
++int posix_acl_update_mode(struct inode *inode, umode_t *mode_p,
++ struct posix_acl **acl)
++{
++ umode_t mode = inode->i_mode;
++ int error;
++
++ error = posix_acl_equiv_mode(*acl, &mode);
++ if (error < 0)
++ return error;
++ if (error == 0)
++ *acl = NULL;
++ if (!in_group_p(inode->i_gid) &&
++ !capable_wrt_inode_uidgid(inode, CAP_FSETID))
++ mode &= ~S_ISGID;
++ *mode_p = mode;
++ return 0;
++}
++EXPORT_SYMBOL(posix_acl_update_mode);
++
+ /*
+ * Fix up the uids and gids in posix acl extended attributes in place.
+ */
+--- a/fs/reiserfs/xattr_acl.c
++++ b/fs/reiserfs/xattr_acl.c
+@@ -246,13 +246,9 @@ __reiserfs_set_acl(struct reiserfs_trans
+ case ACL_TYPE_ACCESS:
+ name = POSIX_ACL_XATTR_ACCESS;
+ if (acl) {
+- error = posix_acl_equiv_mode(acl, &inode->i_mode);
+- if (error < 0)
++ error = posix_acl_update_mode(inode, &inode->i_mode, &acl);
++ if (error)
+ return error;
+- else {
+- if (error == 0)
+- acl = NULL;
+- }
+ }
+ break;
+ case ACL_TYPE_DEFAULT:
+--- a/fs/xfs/xfs_acl.c
++++ b/fs/xfs/xfs_acl.c
+@@ -288,16 +288,11 @@ xfs_set_acl(struct inode *inode, struct
+ return error;
+
+ if (type == ACL_TYPE_ACCESS) {
+- umode_t mode = inode->i_mode;
+- error = posix_acl_equiv_mode(acl, &mode);
+-
+- if (error <= 0) {
+- acl = NULL;
+-
+- if (error < 0)
+- return error;
+- }
++ umode_t mode;
+
++ error = posix_acl_update_mode(inode, &mode, &acl);
++ if (error)
++ return error;
+ error = xfs_set_mode(inode, mode);
+ if (error)
+ return error;
+--- a/include/linux/posix_acl.h
++++ b/include/linux/posix_acl.h
+@@ -95,6 +95,7 @@ extern int set_posix_acl(struct inode *,
+ extern int posix_acl_chmod(struct inode *, umode_t);
+ extern int posix_acl_create(struct inode *, umode_t *, struct posix_acl **,
+ struct posix_acl **);
++extern int posix_acl_update_mode(struct inode *, umode_t *, struct posix_acl **);
+
+ extern int simple_set_acl(struct inode *, struct posix_acl *, int);
+ extern int simple_acl_create(struct inode *, struct inode *);