--- /dev/null
+From foo@baz Tue 03 Mar 2020 04:52:04 PM CET
+From: Ajay Kaher <akaher@vmware.com>
+Date: Wed, 26 Feb 2020 01:46:14 +0530
+Subject: fs: prevent page refcount overflow in pipe_buf_get
+To: <gregkh@linuxfoundation.org>
+Cc: <torvalds@linux-foundation.org>, <willy@infradead.org>, <jannh@google.com>, <vbabka@suse.cz>, <will.deacon@arm.com>, <punit.agrawal@arm.com>, <steve.capper@arm.com>, <kirill.shutemov@linux.intel.com>, <aneesh.kumar@linux.vnet.ibm.com>, <catalin.marinas@arm.com>, <n-horiguchi@ah.jp.nec.com>, <mark.rutland@arm.com>, <mhocko@suse.com>, <mike.kravetz@oracle.com>, <akpm@linux-foundation.org>, <mszeredi@redhat.com>, <viro@zeniv.linux.org.uk>, <stable@vger.kernel.org>, <srivatsab@vmware.com>, <srivatsa@csail.mit.edu>, <amakhalov@vmware.com>, <srinidhir@vmware.com>, <bvikas@vmware.com>, <anishs@vmware.com>, <vsirnapalli@vmware.com>, <sharathg@vmware.com>, <srostedt@vmware.com>, <akaher@vmware.com>, <stable@kernel.org>
+Message-ID: <1582661774-30925-8-git-send-email-akaher@vmware.com>
+
+From: Ajay Kaher <akaher@vmware.com>
+
+From: Matthew Wilcox <willy@infradead.org>
+
+commit 15fab63e1e57be9fdb5eec1bbc5916e9825e9acb upstream.
+
+Change pipe_buf_get() to return a bool indicating whether it succeeded
+in raising the refcount of the page (if the thing in the pipe is a page).
+This removes another mechanism for overflowing the page refcount. All
+callers converted to handle a failure.
+
+Reported-by: Jann Horn <jannh@google.com>
+Signed-off-by: Matthew Wilcox <willy@infradead.org>
+Cc: stable@kernel.org
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+[ 4.4.y backport notes:
+ Regarding the change in generic_pipe_buf_get(), note that
+ page_cache_get() is the same as get_page(). See mainline commit
+ 09cbfeaf1a5a6 "mm, fs: get rid of PAGE_CACHE_* and
+ page_cache_{get,release} macros" for context. ]
+Signed-off-by: Ajay Kaher <akaher@vmware.com>
+Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/fuse/dev.c | 12 ++++++------
+ fs/pipe.c | 4 ++--
+ fs/splice.c | 12 ++++++++++--
+ include/linux/pipe_fs_i.h | 10 ++++++----
+ kernel/trace/trace.c | 6 +++++-
+ 5 files changed, 29 insertions(+), 15 deletions(-)
+
+--- a/fs/fuse/dev.c
++++ b/fs/fuse/dev.c
+@@ -2031,10 +2031,8 @@ static ssize_t fuse_dev_splice_write(str
+ rem += pipe->bufs[(pipe->curbuf + idx) & (pipe->buffers - 1)].len;
+
+ ret = -EINVAL;
+- if (rem < len) {
+- pipe_unlock(pipe);
+- goto out;
+- }
++ if (rem < len)
++ goto out_free;
+
+ rem = len;
+ while (rem) {
+@@ -2052,7 +2050,9 @@ static ssize_t fuse_dev_splice_write(str
+ pipe->curbuf = (pipe->curbuf + 1) & (pipe->buffers - 1);
+ pipe->nrbufs--;
+ } else {
+- pipe_buf_get(pipe, ibuf);
++ if (!pipe_buf_get(pipe, ibuf))
++ goto out_free;
++
+ *obuf = *ibuf;
+ obuf->flags &= ~PIPE_BUF_FLAG_GIFT;
+ obuf->len = rem;
+@@ -2075,13 +2075,13 @@ static ssize_t fuse_dev_splice_write(str
+ ret = fuse_dev_do_write(fud, &cs, len);
+
+ pipe_lock(pipe);
++out_free:
+ for (idx = 0; idx < nbuf; idx++) {
+ struct pipe_buffer *buf = &bufs[idx];
+ buf->ops->release(pipe, buf);
+ }
+ pipe_unlock(pipe);
+
+-out:
+ kfree(bufs);
+ return ret;
+ }
+--- a/fs/pipe.c
++++ b/fs/pipe.c
+@@ -178,9 +178,9 @@ EXPORT_SYMBOL(generic_pipe_buf_steal);
+ * in the tee() system call, when we duplicate the buffers in one
+ * pipe into another.
+ */
+-void generic_pipe_buf_get(struct pipe_inode_info *pipe, struct pipe_buffer *buf)
++bool generic_pipe_buf_get(struct pipe_inode_info *pipe, struct pipe_buffer *buf)
+ {
+- page_cache_get(buf->page);
++ return try_get_page(buf->page);
+ }
+ EXPORT_SYMBOL(generic_pipe_buf_get);
+
+--- a/fs/splice.c
++++ b/fs/splice.c
+@@ -1876,7 +1876,11 @@ retry:
+ * Get a reference to this pipe buffer,
+ * so we can copy the contents over.
+ */
+- pipe_buf_get(ipipe, ibuf);
++ if (!pipe_buf_get(ipipe, ibuf)) {
++ if (ret == 0)
++ ret = -EFAULT;
++ break;
++ }
+ *obuf = *ibuf;
+
+ /*
+@@ -1948,7 +1952,11 @@ static int link_pipe(struct pipe_inode_i
+ * Get a reference to this pipe buffer,
+ * so we can copy the contents over.
+ */
+- pipe_buf_get(ipipe, ibuf);
++ if (!pipe_buf_get(ipipe, ibuf)) {
++ if (ret == 0)
++ ret = -EFAULT;
++ break;
++ }
+
+ obuf = opipe->bufs + nbuf;
+ *obuf = *ibuf;
+--- a/include/linux/pipe_fs_i.h
++++ b/include/linux/pipe_fs_i.h
+@@ -112,18 +112,20 @@ struct pipe_buf_operations {
+ /*
+ * Get a reference to the pipe buffer.
+ */
+- void (*get)(struct pipe_inode_info *, struct pipe_buffer *);
++ bool (*get)(struct pipe_inode_info *, struct pipe_buffer *);
+ };
+
+ /**
+ * pipe_buf_get - get a reference to a pipe_buffer
+ * @pipe: the pipe that the buffer belongs to
+ * @buf: the buffer to get a reference to
++ *
++ * Return: %true if the reference was successfully obtained.
+ */
+-static inline void pipe_buf_get(struct pipe_inode_info *pipe,
++static inline __must_check bool pipe_buf_get(struct pipe_inode_info *pipe,
+ struct pipe_buffer *buf)
+ {
+- buf->ops->get(pipe, buf);
++ return buf->ops->get(pipe, buf);
+ }
+
+ /* Differs from PIPE_BUF in that PIPE_SIZE is the length of the actual
+@@ -148,7 +150,7 @@ struct pipe_inode_info *alloc_pipe_info(
+ void free_pipe_info(struct pipe_inode_info *);
+
+ /* Generic pipe buffer ops functions */
+-void generic_pipe_buf_get(struct pipe_inode_info *, struct pipe_buffer *);
++bool generic_pipe_buf_get(struct pipe_inode_info *, struct pipe_buffer *);
+ int generic_pipe_buf_confirm(struct pipe_inode_info *, struct pipe_buffer *);
+ int generic_pipe_buf_steal(struct pipe_inode_info *, struct pipe_buffer *);
+ void generic_pipe_buf_release(struct pipe_inode_info *, struct pipe_buffer *);
+--- a/kernel/trace/trace.c
++++ b/kernel/trace/trace.c
+@@ -5749,12 +5749,16 @@ static void buffer_pipe_buf_release(stru
+ buf->private = 0;
+ }
+
+-static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
++static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
+ struct pipe_buffer *buf)
+ {
+ struct buffer_ref *ref = (struct buffer_ref *)buf->private;
+
++ if (ref->ref > INT_MAX/2)
++ return false;
++
+ ref->ref++;
++ return true;
+ }
+
+ /* Pipe buffer operations for a buffer. */
--- /dev/null
+From foo@baz Tue 03 Mar 2020 04:52:04 PM CET
+From: Ajay Kaher <akaher@vmware.com>
+Date: Wed, 26 Feb 2020 01:46:09 +0530
+Subject: mm: add 'try_get_page()' helper function
+To: <gregkh@linuxfoundation.org>
+Cc: <torvalds@linux-foundation.org>, <willy@infradead.org>, <jannh@google.com>, <vbabka@suse.cz>, <will.deacon@arm.com>, <punit.agrawal@arm.com>, <steve.capper@arm.com>, <kirill.shutemov@linux.intel.com>, <aneesh.kumar@linux.vnet.ibm.com>, <catalin.marinas@arm.com>, <n-horiguchi@ah.jp.nec.com>, <mark.rutland@arm.com>, <mhocko@suse.com>, <mike.kravetz@oracle.com>, <akpm@linux-foundation.org>, <mszeredi@redhat.com>, <viro@zeniv.linux.org.uk>, <stable@vger.kernel.org>, <srivatsab@vmware.com>, <srivatsa@csail.mit.edu>, <amakhalov@vmware.com>, <srinidhir@vmware.com>, <bvikas@vmware.com>, <anishs@vmware.com>, <vsirnapalli@vmware.com>, <sharathg@vmware.com>, <srostedt@vmware.com>, <akaher@vmware.com>, <stable@kernel.org>
+Message-ID: <1582661774-30925-3-git-send-email-akaher@vmware.com>
+
+From: Ajay Kaher <akaher@vmware.com>
+
+From: Linus Torvalds <torvalds@linux-foundation.org>
+
+commit 88b1a17dfc3ed7728316478fae0f5ad508f50397 upsteam.
+
+This is the same as the traditional 'get_page()' function, but instead
+of unconditionally incrementing the reference count of the page, it only
+does so if the count was "safe". It returns whether the reference count
+was incremented (and is marked __must_check, since the caller obviously
+has to be aware of it).
+
+Also like 'get_page()', you can't use this function unless you already
+had a reference to the page. The intent is that you can use this
+exactly like get_page(), but in situations where you want to limit the
+maximum reference count.
+
+The code currently does an unconditional WARN_ON_ONCE() if we ever hit
+the reference count issues (either zero or negative), as a notification
+that the conditional non-increment actually happened.
+
+NOTE! The count access for the "safety" check is inherently racy, but
+that doesn't matter since the buffer we use is basically half the range
+of the reference count (ie we look at the sign of the count).
+
+Acked-by: Matthew Wilcox <willy@infradead.org>
+Cc: Jann Horn <jannh@google.com>
+Cc: stable@kernel.org
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+[ 4.4.y backport notes:
+ Srivatsa:
+ - Adapted try_get_page() to match the get_page()
+ implementation in 4.4.y, except for the refcount check.
+ - Added try_get_page_foll() which will be needed
+ in a subsequent patch. ]
+Signed-off-by: Srivatsa S. Bhat (VMware) <srivatsa@csail.mit.edu>
+Signed-off-by: Ajay Kaher <akaher@vmware.com>
+Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/mm.h | 12 ++++++++++++
+ mm/internal.h | 23 +++++++++++++++++++++++
+ 2 files changed, 35 insertions(+)
+
+--- a/include/linux/mm.h
++++ b/include/linux/mm.h
+@@ -505,6 +505,18 @@ static inline void get_page(struct page
+ atomic_inc(&page->_count);
+ }
+
++static inline __must_check bool try_get_page(struct page *page)
++{
++ if (unlikely(PageTail(page)))
++ if (likely(__get_page_tail(page)))
++ return true;
++
++ if (WARN_ON_ONCE(atomic_read(&page->_count) <= 0))
++ return false;
++ atomic_inc(&page->_count);
++ return true;
++}
++
+ static inline struct page *virt_to_head_page(const void *x)
+ {
+ struct page *page = virt_to_page(x);
+--- a/mm/internal.h
++++ b/mm/internal.h
+@@ -112,6 +112,29 @@ static inline void get_page_foll(struct
+ }
+ }
+
++static inline __must_check bool try_get_page_foll(struct page *page)
++{
++ if (unlikely(PageTail(page))) {
++ if (WARN_ON_ONCE(atomic_read(&compound_head(page)->_count) <= 0))
++ return false;
++ /*
++ * This is safe only because
++ * __split_huge_page_refcount() can't run under
++ * get_page_foll() because we hold the proper PT lock.
++ */
++ __get_page_tail_foll(page, true);
++ } else {
++ /*
++ * Getting a normal page or the head of a compound page
++ * requires to already have an elevated page->_count.
++ */
++ if (WARN_ON_ONCE(atomic_read(&page->_count) <= 0))
++ return false;
++ atomic_inc(&page->_count);
++ }
++ return true;
++}
++
+ extern unsigned long highest_memmap_pfn;
+
+ /*
--- /dev/null
+From foo@baz Tue 03 Mar 2020 04:52:04 PM CET
+From: Ajay Kaher <akaher@vmware.com>
+Date: Wed, 26 Feb 2020 01:46:11 +0530
+Subject: mm, gup: ensure real head page is ref-counted when using hugepages
+To: <gregkh@linuxfoundation.org>
+Cc: <torvalds@linux-foundation.org>, <willy@infradead.org>, <jannh@google.com>, <vbabka@suse.cz>, <will.deacon@arm.com>, <punit.agrawal@arm.com>, <steve.capper@arm.com>, <kirill.shutemov@linux.intel.com>, <aneesh.kumar@linux.vnet.ibm.com>, <catalin.marinas@arm.com>, <n-horiguchi@ah.jp.nec.com>, <mark.rutland@arm.com>, <mhocko@suse.com>, <mike.kravetz@oracle.com>, <akpm@linux-foundation.org>, <mszeredi@redhat.com>, <viro@zeniv.linux.org.uk>, <stable@vger.kernel.org>, <srivatsab@vmware.com>, <srivatsa@csail.mit.edu>, <amakhalov@vmware.com>, <srinidhir@vmware.com>, <bvikas@vmware.com>, <anishs@vmware.com>, <vsirnapalli@vmware.com>, <sharathg@vmware.com>, <srostedt@vmware.com>, <akaher@vmware.com>, Hillf Danton <hillf.zj@alibaba-inc.com>
+Message-ID: <1582661774-30925-5-git-send-email-akaher@vmware.com>
+
+From: Ajay Kaher <akaher@vmware.com>
+
+From: Punit Agrawal <punit.agrawal@arm.com>
+
+commit d63206ee32b6e64b0e12d46e5d6004afd9913713 upstream.
+
+When speculatively taking references to a hugepage using
+page_cache_add_speculative() in gup_huge_pmd(), it is assumed that the
+page returned by pmd_page() is the head page. Although normally true,
+this assumption doesn't hold when the hugepage comprises of successive
+page table entries such as when using contiguous bit on arm64 at PTE or
+PMD levels.
+
+This can be addressed by ensuring that the page passed to
+page_cache_add_speculative() is the real head or by de-referencing the
+head page within the function.
+
+We take the first approach to keep the usage pattern aligned with
+page_cache_get_speculative() where users already pass the appropriate
+page, i.e., the de-referenced head.
+
+Apply the same logic to fix gup_huge_[pud|pgd]() as well.
+
+[punit.agrawal@arm.com: fix arm64 ltp failure]
+ Link: http://lkml.kernel.org/r/20170619170145.25577-5-punit.agrawal@arm.com
+Link: http://lkml.kernel.org/r/20170522133604.11392-3-punit.agrawal@arm.com
+Signed-off-by: Punit Agrawal <punit.agrawal@arm.com>
+Acked-by: Steve Capper <steve.capper@arm.com>
+Cc: Michal Hocko <mhocko@suse.com>
+Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
+Cc: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+Cc: Catalin Marinas <catalin.marinas@arm.com>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Hillf Danton <hillf.zj@alibaba-inc.com>
+Cc: Mike Kravetz <mike.kravetz@oracle.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Ajay Kaher <akaher@vmware.com>
+Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/gup.c | 12 ++++++------
+ 1 file changed, 6 insertions(+), 6 deletions(-)
+
+--- a/mm/gup.c
++++ b/mm/gup.c
+@@ -1130,8 +1130,7 @@ static int gup_huge_pmd(pmd_t orig, pmd_
+ return 0;
+
+ refs = 0;
+- head = pmd_page(orig);
+- page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
++ page = pmd_page(orig) + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
+ tail = page;
+ do {
+ pages[*nr] = page;
+@@ -1140,6 +1139,7 @@ static int gup_huge_pmd(pmd_t orig, pmd_
+ refs++;
+ } while (addr += PAGE_SIZE, addr != end);
+
++ head = compound_head(pmd_page(orig));
+ if (!page_cache_add_speculative(head, refs)) {
+ *nr -= refs;
+ return 0;
+@@ -1176,8 +1176,7 @@ static int gup_huge_pud(pud_t orig, pud_
+ return 0;
+
+ refs = 0;
+- head = pud_page(orig);
+- page = head + ((addr & ~PUD_MASK) >> PAGE_SHIFT);
++ page = pud_page(orig) + ((addr & ~PUD_MASK) >> PAGE_SHIFT);
+ tail = page;
+ do {
+ pages[*nr] = page;
+@@ -1186,6 +1185,7 @@ static int gup_huge_pud(pud_t orig, pud_
+ refs++;
+ } while (addr += PAGE_SIZE, addr != end);
+
++ head = compound_head(pud_page(orig));
+ if (!page_cache_add_speculative(head, refs)) {
+ *nr -= refs;
+ return 0;
+@@ -1218,8 +1218,7 @@ static int gup_huge_pgd(pgd_t orig, pgd_
+ return 0;
+
+ refs = 0;
+- head = pgd_page(orig);
+- page = head + ((addr & ~PGDIR_MASK) >> PAGE_SHIFT);
++ page = pgd_page(orig) + ((addr & ~PGDIR_MASK) >> PAGE_SHIFT);
+ tail = page;
+ do {
+ pages[*nr] = page;
+@@ -1228,6 +1227,7 @@ static int gup_huge_pgd(pgd_t orig, pgd_
+ refs++;
+ } while (addr += PAGE_SIZE, addr != end);
+
++ head = compound_head(pgd_page(orig));
+ if (!page_cache_add_speculative(head, refs)) {
+ *nr -= refs;
+ return 0;
--- /dev/null
+From foo@baz Tue 03 Mar 2020 04:52:04 PM CET
+From: Ajay Kaher <akaher@vmware.com>
+Date: Wed, 26 Feb 2020 01:46:10 +0530
+Subject: mm, gup: remove broken VM_BUG_ON_PAGE compound check for hugepages
+To: <gregkh@linuxfoundation.org>
+Cc: <torvalds@linux-foundation.org>, <willy@infradead.org>, <jannh@google.com>, <vbabka@suse.cz>, <will.deacon@arm.com>, <punit.agrawal@arm.com>, <steve.capper@arm.com>, <kirill.shutemov@linux.intel.com>, <aneesh.kumar@linux.vnet.ibm.com>, <catalin.marinas@arm.com>, <n-horiguchi@ah.jp.nec.com>, <mark.rutland@arm.com>, <mhocko@suse.com>, <mike.kravetz@oracle.com>, <akpm@linux-foundation.org>, <mszeredi@redhat.com>, <viro@zeniv.linux.org.uk>, <stable@vger.kernel.org>, <srivatsab@vmware.com>, <srivatsa@csail.mit.edu>, <amakhalov@vmware.com>, <srinidhir@vmware.com>, <bvikas@vmware.com>, <anishs@vmware.com>, <vsirnapalli@vmware.com>, <sharathg@vmware.com>, <srostedt@vmware.com>, <akaher@vmware.com>, Hillf Danton <hillf.zj@alibaba-inc.com>
+Message-ID: <1582661774-30925-4-git-send-email-akaher@vmware.com>
+
+From: Ajay Kaher <akaher@vmware.com>
+
+From: Will Deacon <will.deacon@arm.com>
+
+commit a3e328556d41bb61c55f9dfcc62d6a826ea97b85 upstream.
+
+When operating on hugepages with DEBUG_VM enabled, the GUP code checks
+the compound head for each tail page prior to calling
+page_cache_add_speculative. This is broken, because on the fast-GUP
+path (where we don't hold any page table locks) we can be racing with a
+concurrent invocation of split_huge_page_to_list.
+
+split_huge_page_to_list deals with this race by using page_ref_freeze to
+freeze the page and force concurrent GUPs to fail whilst the component
+pages are modified. This modification includes clearing the
+compound_head field for the tail pages, so checking this prior to a
+successful call to page_cache_add_speculative can lead to false
+positives: In fact, page_cache_add_speculative *already* has this check
+once the page refcount has been successfully updated, so we can simply
+remove the broken calls to VM_BUG_ON_PAGE.
+
+Link: http://lkml.kernel.org/r/20170522133604.11392-2-punit.agrawal@arm.com
+Signed-off-by: Will Deacon <will.deacon@arm.com>
+Signed-off-by: Punit Agrawal <punit.agrawal@arm.com>
+Acked-by: Steve Capper <steve.capper@arm.com>
+Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Cc: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+Cc: Catalin Marinas <catalin.marinas@arm.com>
+Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Hillf Danton <hillf.zj@alibaba-inc.com>
+Cc: Michal Hocko <mhocko@suse.com>
+Cc: Mike Kravetz <mike.kravetz@oracle.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Srivatsa S. Bhat (VMware) <srivatsa@csail.mit.edu>
+Signed-off-by: Ajay Kaher <akaher@vmware.com>
+Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/gup.c | 3 ---
+ 1 file changed, 3 deletions(-)
+
+--- a/mm/gup.c
++++ b/mm/gup.c
+@@ -1134,7 +1134,6 @@ static int gup_huge_pmd(pmd_t orig, pmd_
+ page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
+ tail = page;
+ do {
+- VM_BUG_ON_PAGE(compound_head(page) != head, page);
+ pages[*nr] = page;
+ (*nr)++;
+ page++;
+@@ -1181,7 +1180,6 @@ static int gup_huge_pud(pud_t orig, pud_
+ page = head + ((addr & ~PUD_MASK) >> PAGE_SHIFT);
+ tail = page;
+ do {
+- VM_BUG_ON_PAGE(compound_head(page) != head, page);
+ pages[*nr] = page;
+ (*nr)++;
+ page++;
+@@ -1224,7 +1222,6 @@ static int gup_huge_pgd(pgd_t orig, pgd_
+ page = head + ((addr & ~PGDIR_MASK) >> PAGE_SHIFT);
+ tail = page;
+ do {
+- VM_BUG_ON_PAGE(compound_head(page) != head, page);
+ pages[*nr] = page;
+ (*nr)++;
+ page++;
--- /dev/null
+From foo@baz Tue 03 Mar 2020 04:52:04 PM CET
+From: Ajay Kaher <akaher@vmware.com>
+Date: Wed, 26 Feb 2020 01:46:08 +0530
+Subject: mm: make page ref count overflow check tighter and more explicit
+To: <gregkh@linuxfoundation.org>
+Cc: <torvalds@linux-foundation.org>, <willy@infradead.org>, <jannh@google.com>, <vbabka@suse.cz>, <will.deacon@arm.com>, <punit.agrawal@arm.com>, <steve.capper@arm.com>, <kirill.shutemov@linux.intel.com>, <aneesh.kumar@linux.vnet.ibm.com>, <catalin.marinas@arm.com>, <n-horiguchi@ah.jp.nec.com>, <mark.rutland@arm.com>, <mhocko@suse.com>, <mike.kravetz@oracle.com>, <akpm@linux-foundation.org>, <mszeredi@redhat.com>, <viro@zeniv.linux.org.uk>, <stable@vger.kernel.org>, <srivatsab@vmware.com>, <srivatsa@csail.mit.edu>, <amakhalov@vmware.com>, <srinidhir@vmware.com>, <bvikas@vmware.com>, <anishs@vmware.com>, <vsirnapalli@vmware.com>, <sharathg@vmware.com>, <srostedt@vmware.com>, <akaher@vmware.com>, <stable@kernel.org>
+Message-ID: <1582661774-30925-2-git-send-email-akaher@vmware.com>
+
+From: Ajay Kaher <akaher@vmware.com>
+
+From: Linus Torvalds <torvalds@linux-foundation.org>
+
+commit f958d7b528b1b40c44cfda5eabe2d82760d868c3 upsteam.
+
+We have a VM_BUG_ON() to check that the page reference count doesn't
+underflow (or get close to overflow) by checking the sign of the count.
+
+That's all fine, but we actually want to allow people to use a "get page
+ref unless it's already very high" helper function, and we want that one
+to use the sign of the page ref (without triggering this VM_BUG_ON).
+
+Change the VM_BUG_ON to only check for small underflows (or _very_ close
+to overflowing), and ignore overflows which have strayed into negative
+territory.
+
+Acked-by: Matthew Wilcox <willy@infradead.org>
+Cc: Jann Horn <jannh@google.com>
+Cc: stable@kernel.org
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+[ 4.4.y backport notes:
+ Ajay: Open-coded atomic refcount access due to missing
+ page_ref_count() helper in 4.4.y
+ Srivatsa: Added overflow check to get_page_foll() and related code. ]
+Signed-off-by: Srivatsa S. Bhat (VMware) <srivatsa@csail.mit.edu>
+Signed-off-by: Ajay Kaher <akaher@vmware.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/mm.h | 6 +++++-
+ mm/internal.h | 5 +++--
+ 2 files changed, 8 insertions(+), 3 deletions(-)
+
+--- a/include/linux/mm.h
++++ b/include/linux/mm.h
+@@ -488,6 +488,10 @@ static inline void get_huge_page_tail(st
+
+ extern bool __get_page_tail(struct page *page);
+
++/* 127: arbitrary random number, small enough to assemble well */
++#define page_ref_zero_or_close_to_overflow(page) \
++ ((unsigned int) atomic_read(&page->_count) + 127u <= 127u)
++
+ static inline void get_page(struct page *page)
+ {
+ if (unlikely(PageTail(page)))
+@@ -497,7 +501,7 @@ static inline void get_page(struct page
+ * Getting a normal page or the head of a compound page
+ * requires to already have an elevated page->_count.
+ */
+- VM_BUG_ON_PAGE(atomic_read(&page->_count) <= 0, page);
++ VM_BUG_ON_PAGE(page_ref_zero_or_close_to_overflow(page), page);
+ atomic_inc(&page->_count);
+ }
+
+--- a/mm/internal.h
++++ b/mm/internal.h
+@@ -81,7 +81,8 @@ static inline void __get_page_tail_foll(
+ * speculative page access (like in
+ * page_cache_get_speculative()) on tail pages.
+ */
+- VM_BUG_ON_PAGE(atomic_read(&compound_head(page)->_count) <= 0, page);
++ VM_BUG_ON_PAGE(page_ref_zero_or_close_to_overflow(compound_head(page)),
++ page);
+ if (get_page_head)
+ atomic_inc(&compound_head(page)->_count);
+ get_huge_page_tail(page);
+@@ -106,7 +107,7 @@ static inline void get_page_foll(struct
+ * Getting a normal page or the head of a compound page
+ * requires to already have an elevated page->_count.
+ */
+- VM_BUG_ON_PAGE(atomic_read(&page->_count) <= 0, page);
++ VM_BUG_ON_PAGE(page_ref_zero_or_close_to_overflow(page), page);
+ atomic_inc(&page->_count);
+ }
+ }
--- /dev/null
+From foo@baz Tue 03 Mar 2020 04:52:04 PM CET
+From: Ajay Kaher <akaher@vmware.com>
+Date: Wed, 26 Feb 2020 01:46:12 +0530
+Subject: mm: prevent get_user_pages() from overflowing page refcount
+To: <gregkh@linuxfoundation.org>
+Cc: <torvalds@linux-foundation.org>, <willy@infradead.org>, <jannh@google.com>, <vbabka@suse.cz>, <will.deacon@arm.com>, <punit.agrawal@arm.com>, <steve.capper@arm.com>, <kirill.shutemov@linux.intel.com>, <aneesh.kumar@linux.vnet.ibm.com>, <catalin.marinas@arm.com>, <n-horiguchi@ah.jp.nec.com>, <mark.rutland@arm.com>, <mhocko@suse.com>, <mike.kravetz@oracle.com>, <akpm@linux-foundation.org>, <mszeredi@redhat.com>, <viro@zeniv.linux.org.uk>, <stable@vger.kernel.org>, <srivatsab@vmware.com>, <srivatsa@csail.mit.edu>, <amakhalov@vmware.com>, <srinidhir@vmware.com>, <bvikas@vmware.com>, <anishs@vmware.com>, <vsirnapalli@vmware.com>, <sharathg@vmware.com>, <srostedt@vmware.com>, <akaher@vmware.com>, <stable@kernel.org>
+Message-ID: <1582661774-30925-6-git-send-email-akaher@vmware.com>
+
+From: Ajay Kaher <akaher@vmware.com>
+
+From: Linus Torvalds <torvalds@linux-foundation.org>
+
+commit 8fde12ca79aff9b5ba951fce1a2641901b8d8e64 upstream.
+
+If the page refcount wraps around past zero, it will be freed while
+there are still four billion references to it. One of the possible
+avenues for an attacker to try to make this happen is by doing direct IO
+on a page multiple times. This patch makes get_user_pages() refuse to
+take a new page reference if there are already more than two billion
+references to the page.
+
+Reported-by: Jann Horn <jannh@google.com>
+Acked-by: Matthew Wilcox <willy@infradead.org>
+Cc: stable@kernel.org
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+[ 4.4.y backport notes:
+ Ajay: - Added local variable 'err' with-in follow_hugetlb_page()
+ from 2be7cfed995e, to resolve compilation error
+ - Added page_ref_count()
+ - Added missing refcount overflow checks on x86 and s390
+ (Vlastimil, thanks for this change)
+ Srivatsa: - Replaced call to get_page_foll() with try_get_page_foll() ]
+Signed-off-by: Srivatsa S. Bhat (VMware) <srivatsa@csail.mit.edu>
+Signed-off-by: Ajay Kaher <akaher@vmware.com>
+Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/s390/mm/gup.c | 6 ++++--
+ arch/x86/mm/gup.c | 9 ++++++++-
+ include/linux/mm.h | 5 +++++
+ mm/gup.c | 42 +++++++++++++++++++++++++++++++++---------
+ mm/hugetlb.c | 16 +++++++++++++++-
+ 5 files changed, 65 insertions(+), 13 deletions(-)
+
+--- a/arch/s390/mm/gup.c
++++ b/arch/s390/mm/gup.c
+@@ -37,7 +37,8 @@ static inline int gup_pte_range(pmd_t *p
+ return 0;
+ VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
+ page = pte_page(pte);
+- if (!page_cache_get_speculative(page))
++ if (WARN_ON_ONCE(page_ref_count(page) < 0)
++ || !page_cache_get_speculative(page))
+ return 0;
+ if (unlikely(pte_val(pte) != pte_val(*ptep))) {
+ put_page(page);
+@@ -76,7 +77,8 @@ static inline int gup_huge_pmd(pmd_t *pm
+ refs++;
+ } while (addr += PAGE_SIZE, addr != end);
+
+- if (!page_cache_add_speculative(head, refs)) {
++ if (WARN_ON_ONCE(page_ref_count(head) < 0)
++ || !page_cache_add_speculative(head, refs)) {
+ *nr -= refs;
+ return 0;
+ }
+--- a/arch/x86/mm/gup.c
++++ b/arch/x86/mm/gup.c
+@@ -95,7 +95,10 @@ static noinline int gup_pte_range(pmd_t
+ }
+ VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
+ page = pte_page(pte);
+- get_page(page);
++ if (unlikely(!try_get_page(page))) {
++ pte_unmap(ptep);
++ return 0;
++ }
+ SetPageReferenced(page);
+ pages[*nr] = page;
+ (*nr)++;
+@@ -132,6 +135,8 @@ static noinline int gup_huge_pmd(pmd_t p
+
+ refs = 0;
+ head = pmd_page(pmd);
++ if (WARN_ON_ONCE(page_ref_count(head) <= 0))
++ return 0;
+ page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
+ do {
+ VM_BUG_ON_PAGE(compound_head(page) != head, page);
+@@ -208,6 +213,8 @@ static noinline int gup_huge_pud(pud_t p
+
+ refs = 0;
+ head = pud_page(pud);
++ if (WARN_ON_ONCE(page_ref_count(head) <= 0))
++ return 0;
+ page = head + ((addr & ~PUD_MASK) >> PAGE_SHIFT);
+ do {
+ VM_BUG_ON_PAGE(compound_head(page) != head, page);
+--- a/include/linux/mm.h
++++ b/include/linux/mm.h
+@@ -488,6 +488,11 @@ static inline void get_huge_page_tail(st
+
+ extern bool __get_page_tail(struct page *page);
+
++static inline int page_ref_count(struct page *page)
++{
++ return atomic_read(&page->_count);
++}
++
+ /* 127: arbitrary random number, small enough to assemble well */
+ #define page_ref_zero_or_close_to_overflow(page) \
+ ((unsigned int) atomic_read(&page->_count) + 127u <= 127u)
+--- a/mm/gup.c
++++ b/mm/gup.c
+@@ -126,8 +126,12 @@ retry:
+ }
+ }
+
+- if (flags & FOLL_GET)
+- get_page_foll(page);
++ if (flags & FOLL_GET) {
++ if (unlikely(!try_get_page_foll(page))) {
++ page = ERR_PTR(-ENOMEM);
++ goto out;
++ }
++ }
+ if (flags & FOLL_TOUCH) {
+ if ((flags & FOLL_WRITE) &&
+ !pte_dirty(pte) && !PageDirty(page))
+@@ -289,7 +293,10 @@ static int get_gate_page(struct mm_struc
+ goto unmap;
+ *page = pte_page(*pte);
+ }
+- get_page(*page);
++ if (unlikely(!try_get_page(*page))) {
++ ret = -ENOMEM;
++ goto unmap;
++ }
+ out:
+ ret = 0;
+ unmap:
+@@ -1053,6 +1060,20 @@ struct page *get_dump_page(unsigned long
+ */
+ #ifdef CONFIG_HAVE_GENERIC_RCU_GUP
+
++/*
++ * Return the compund head page with ref appropriately incremented,
++ * or NULL if that failed.
++ */
++static inline struct page *try_get_compound_head(struct page *page, int refs)
++{
++ struct page *head = compound_head(page);
++ if (WARN_ON_ONCE(atomic_read(&head->_count) < 0))
++ return NULL;
++ if (unlikely(!page_cache_add_speculative(head, refs)))
++ return NULL;
++ return head;
++}
++
+ #ifdef __HAVE_ARCH_PTE_SPECIAL
+ static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,
+ int write, struct page **pages, int *nr)
+@@ -1083,6 +1104,9 @@ static int gup_pte_range(pmd_t pmd, unsi
+ VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
+ page = pte_page(pte);
+
++ if (WARN_ON_ONCE(page_ref_count(page) < 0))
++ goto pte_unmap;
++
+ if (!page_cache_get_speculative(page))
+ goto pte_unmap;
+
+@@ -1139,8 +1163,8 @@ static int gup_huge_pmd(pmd_t orig, pmd_
+ refs++;
+ } while (addr += PAGE_SIZE, addr != end);
+
+- head = compound_head(pmd_page(orig));
+- if (!page_cache_add_speculative(head, refs)) {
++ head = try_get_compound_head(pmd_page(orig), refs);
++ if (!head) {
+ *nr -= refs;
+ return 0;
+ }
+@@ -1185,8 +1209,8 @@ static int gup_huge_pud(pud_t orig, pud_
+ refs++;
+ } while (addr += PAGE_SIZE, addr != end);
+
+- head = compound_head(pud_page(orig));
+- if (!page_cache_add_speculative(head, refs)) {
++ head = try_get_compound_head(pud_page(orig), refs);
++ if (!head) {
+ *nr -= refs;
+ return 0;
+ }
+@@ -1227,8 +1251,8 @@ static int gup_huge_pgd(pgd_t orig, pgd_
+ refs++;
+ } while (addr += PAGE_SIZE, addr != end);
+
+- head = compound_head(pgd_page(orig));
+- if (!page_cache_add_speculative(head, refs)) {
++ head = try_get_compound_head(pgd_page(orig), refs);
++ if (!head) {
+ *nr -= refs;
+ return 0;
+ }
+--- a/mm/hugetlb.c
++++ b/mm/hugetlb.c
+@@ -3886,6 +3886,7 @@ long follow_hugetlb_page(struct mm_struc
+ unsigned long vaddr = *position;
+ unsigned long remainder = *nr_pages;
+ struct hstate *h = hstate_vma(vma);
++ int err = -EFAULT;
+
+ while (vaddr < vma->vm_end && remainder) {
+ pte_t *pte;
+@@ -3957,6 +3958,19 @@ long follow_hugetlb_page(struct mm_struc
+
+ pfn_offset = (vaddr & ~huge_page_mask(h)) >> PAGE_SHIFT;
+ page = pte_page(huge_ptep_get(pte));
++
++ /*
++ * Instead of doing 'try_get_page_foll()' below in the same_page
++ * loop, just check the count once here.
++ */
++ if (unlikely(page_count(page) <= 0)) {
++ if (pages) {
++ spin_unlock(ptl);
++ remainder = 0;
++ err = -ENOMEM;
++ break;
++ }
++ }
+ same_page:
+ if (pages) {
+ pages[i] = mem_map_offset(page, pfn_offset);
+@@ -3983,7 +3997,7 @@ same_page:
+ *nr_pages = remainder;
+ *position = vaddr;
+
+- return i ? i : -EFAULT;
++ return i ? i : err;
+ }
+
+ unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
--- /dev/null
+From foo@baz Tue 03 Mar 2020 04:52:04 PM CET
+From: Ajay Kaher <akaher@vmware.com>
+Date: Wed, 26 Feb 2020 01:46:13 +0530
+Subject: pipe: add pipe_buf_get() helper
+To: <gregkh@linuxfoundation.org>
+Cc: <torvalds@linux-foundation.org>, <willy@infradead.org>, <jannh@google.com>, <vbabka@suse.cz>, <will.deacon@arm.com>, <punit.agrawal@arm.com>, <steve.capper@arm.com>, <kirill.shutemov@linux.intel.com>, <aneesh.kumar@linux.vnet.ibm.com>, <catalin.marinas@arm.com>, <n-horiguchi@ah.jp.nec.com>, <mark.rutland@arm.com>, <mhocko@suse.com>, <mike.kravetz@oracle.com>, <akpm@linux-foundation.org>, <mszeredi@redhat.com>, <viro@zeniv.linux.org.uk>, <stable@vger.kernel.org>, <srivatsab@vmware.com>, <srivatsa@csail.mit.edu>, <amakhalov@vmware.com>, <srinidhir@vmware.com>, <bvikas@vmware.com>, <anishs@vmware.com>, <vsirnapalli@vmware.com>, <sharathg@vmware.com>, <srostedt@vmware.com>, <akaher@vmware.com>
+Message-ID: <1582661774-30925-7-git-send-email-akaher@vmware.com>
+
+From: Ajay Kaher <akaher@vmware.com>
+
+From: Miklos Szeredi <mszeredi@redhat.com>
+
+commit 7bf2d1df80822ec056363627e2014990f068f7aa upstream.
+
+Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
+Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
+Signed-off-by: Ajay Kaher <akaher@vmware.com>
+Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/fuse/dev.c | 2 +-
+ fs/splice.c | 4 ++--
+ include/linux/pipe_fs_i.h | 11 +++++++++++
+ 3 files changed, 14 insertions(+), 3 deletions(-)
+
+--- a/fs/fuse/dev.c
++++ b/fs/fuse/dev.c
+@@ -2052,7 +2052,7 @@ static ssize_t fuse_dev_splice_write(str
+ pipe->curbuf = (pipe->curbuf + 1) & (pipe->buffers - 1);
+ pipe->nrbufs--;
+ } else {
+- ibuf->ops->get(pipe, ibuf);
++ pipe_buf_get(pipe, ibuf);
+ *obuf = *ibuf;
+ obuf->flags &= ~PIPE_BUF_FLAG_GIFT;
+ obuf->len = rem;
+--- a/fs/splice.c
++++ b/fs/splice.c
+@@ -1876,7 +1876,7 @@ retry:
+ * Get a reference to this pipe buffer,
+ * so we can copy the contents over.
+ */
+- ibuf->ops->get(ipipe, ibuf);
++ pipe_buf_get(ipipe, ibuf);
+ *obuf = *ibuf;
+
+ /*
+@@ -1948,7 +1948,7 @@ static int link_pipe(struct pipe_inode_i
+ * Get a reference to this pipe buffer,
+ * so we can copy the contents over.
+ */
+- ibuf->ops->get(ipipe, ibuf);
++ pipe_buf_get(ipipe, ibuf);
+
+ obuf = opipe->bufs + nbuf;
+ *obuf = *ibuf;
+--- a/include/linux/pipe_fs_i.h
++++ b/include/linux/pipe_fs_i.h
+@@ -115,6 +115,17 @@ struct pipe_buf_operations {
+ void (*get)(struct pipe_inode_info *, struct pipe_buffer *);
+ };
+
++/**
++ * pipe_buf_get - get a reference to a pipe_buffer
++ * @pipe: the pipe that the buffer belongs to
++ * @buf: the buffer to get a reference to
++ */
++static inline void pipe_buf_get(struct pipe_inode_info *pipe,
++ struct pipe_buffer *buf)
++{
++ buf->ops->get(pipe, buf);
++}
++
+ /* Differs from PIPE_BUF in that PIPE_SIZE is the length of the actual
+ memory allocation, whereas PIPE_BUF makes atomicity guarantees. */
+ #define PIPE_SIZE PAGE_SIZE
namei-only-return-echild-from-follow_dotdot_rcu.patch
kvm-check-for-a-bad-hva-before-dropping-into-the-ghc-slow-path.patch
slip-stop-double-free-sl-dev-in-slip_open.patch
+mm-make-page-ref-count-overflow-check-tighter-and-more-explicit.patch
+mm-add-try_get_page-helper-function.patch
+mm-gup-remove-broken-vm_bug_on_page-compound-check-for-hugepages.patch
+mm-gup-ensure-real-head-page-is-ref-counted-when-using-hugepages.patch
+mm-prevent-get_user_pages-from-overflowing-page-refcount.patch
+pipe-add-pipe_buf_get-helper.patch
+fs-prevent-page-refcount-overflow-in-pipe_buf_get.patch