From: Greg Kroah-Hartman Date: Tue, 3 Mar 2020 15:52:21 +0000 (+0100) Subject: 4.4-stable patches X-Git-Tag: v4.19.108~22 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=9257afc34c1afd1dfee85cbb23658827f9d093a9;p=thirdparty%2Fkernel%2Fstable-queue.git 4.4-stable patches added patches: fs-prevent-page-refcount-overflow-in-pipe_buf_get.patch mm-add-try_get_page-helper-function.patch mm-gup-ensure-real-head-page-is-ref-counted-when-using-hugepages.patch mm-gup-remove-broken-vm_bug_on_page-compound-check-for-hugepages.patch mm-make-page-ref-count-overflow-check-tighter-and-more-explicit.patch mm-prevent-get_user_pages-from-overflowing-page-refcount.patch pipe-add-pipe_buf_get-helper.patch --- diff --git a/queue-4.4/fs-prevent-page-refcount-overflow-in-pipe_buf_get.patch b/queue-4.4/fs-prevent-page-refcount-overflow-in-pipe_buf_get.patch new file mode 100644 index 00000000000..5a3dc717b3a --- /dev/null +++ b/queue-4.4/fs-prevent-page-refcount-overflow-in-pipe_buf_get.patch @@ -0,0 +1,177 @@ +From foo@baz Tue 03 Mar 2020 04:52:04 PM CET +From: Ajay Kaher +Date: Wed, 26 Feb 2020 01:46:14 +0530 +Subject: fs: prevent page refcount overflow in pipe_buf_get +To: +Cc: , , , , , , , , , , , , , , , , , , , , , , , , , , , , +Message-ID: <1582661774-30925-8-git-send-email-akaher@vmware.com> + +From: Ajay Kaher + +From: Matthew Wilcox + +commit 15fab63e1e57be9fdb5eec1bbc5916e9825e9acb upstream. + +Change pipe_buf_get() to return a bool indicating whether it succeeded +in raising the refcount of the page (if the thing in the pipe is a page). +This removes another mechanism for overflowing the page refcount. All +callers converted to handle a failure. + +Reported-by: Jann Horn +Signed-off-by: Matthew Wilcox +Cc: stable@kernel.org +Signed-off-by: Linus Torvalds +[ 4.4.y backport notes: + Regarding the change in generic_pipe_buf_get(), note that + page_cache_get() is the same as get_page(). See mainline commit + 09cbfeaf1a5a6 "mm, fs: get rid of PAGE_CACHE_* and + page_cache_{get,release} macros" for context. ] +Signed-off-by: Ajay Kaher +Signed-off-by: Vlastimil Babka +Signed-off-by: Greg Kroah-Hartman +--- + fs/fuse/dev.c | 12 ++++++------ + fs/pipe.c | 4 ++-- + fs/splice.c | 12 ++++++++++-- + include/linux/pipe_fs_i.h | 10 ++++++---- + kernel/trace/trace.c | 6 +++++- + 5 files changed, 29 insertions(+), 15 deletions(-) + +--- a/fs/fuse/dev.c ++++ b/fs/fuse/dev.c +@@ -2031,10 +2031,8 @@ static ssize_t fuse_dev_splice_write(str + rem += pipe->bufs[(pipe->curbuf + idx) & (pipe->buffers - 1)].len; + + ret = -EINVAL; +- if (rem < len) { +- pipe_unlock(pipe); +- goto out; +- } ++ if (rem < len) ++ goto out_free; + + rem = len; + while (rem) { +@@ -2052,7 +2050,9 @@ static ssize_t fuse_dev_splice_write(str + pipe->curbuf = (pipe->curbuf + 1) & (pipe->buffers - 1); + pipe->nrbufs--; + } else { +- pipe_buf_get(pipe, ibuf); ++ if (!pipe_buf_get(pipe, ibuf)) ++ goto out_free; ++ + *obuf = *ibuf; + obuf->flags &= ~PIPE_BUF_FLAG_GIFT; + obuf->len = rem; +@@ -2075,13 +2075,13 @@ static ssize_t fuse_dev_splice_write(str + ret = fuse_dev_do_write(fud, &cs, len); + + pipe_lock(pipe); ++out_free: + for (idx = 0; idx < nbuf; idx++) { + struct pipe_buffer *buf = &bufs[idx]; + buf->ops->release(pipe, buf); + } + pipe_unlock(pipe); + +-out: + kfree(bufs); + return ret; + } +--- a/fs/pipe.c ++++ b/fs/pipe.c +@@ -178,9 +178,9 @@ EXPORT_SYMBOL(generic_pipe_buf_steal); + * in the tee() system call, when we duplicate the buffers in one + * pipe into another. + */ +-void generic_pipe_buf_get(struct pipe_inode_info *pipe, struct pipe_buffer *buf) ++bool generic_pipe_buf_get(struct pipe_inode_info *pipe, struct pipe_buffer *buf) + { +- page_cache_get(buf->page); ++ return try_get_page(buf->page); + } + EXPORT_SYMBOL(generic_pipe_buf_get); + +--- a/fs/splice.c ++++ b/fs/splice.c +@@ -1876,7 +1876,11 @@ retry: + * Get a reference to this pipe buffer, + * so we can copy the contents over. + */ +- pipe_buf_get(ipipe, ibuf); ++ if (!pipe_buf_get(ipipe, ibuf)) { ++ if (ret == 0) ++ ret = -EFAULT; ++ break; ++ } + *obuf = *ibuf; + + /* +@@ -1948,7 +1952,11 @@ static int link_pipe(struct pipe_inode_i + * Get a reference to this pipe buffer, + * so we can copy the contents over. + */ +- pipe_buf_get(ipipe, ibuf); ++ if (!pipe_buf_get(ipipe, ibuf)) { ++ if (ret == 0) ++ ret = -EFAULT; ++ break; ++ } + + obuf = opipe->bufs + nbuf; + *obuf = *ibuf; +--- a/include/linux/pipe_fs_i.h ++++ b/include/linux/pipe_fs_i.h +@@ -112,18 +112,20 @@ struct pipe_buf_operations { + /* + * Get a reference to the pipe buffer. + */ +- void (*get)(struct pipe_inode_info *, struct pipe_buffer *); ++ bool (*get)(struct pipe_inode_info *, struct pipe_buffer *); + }; + + /** + * pipe_buf_get - get a reference to a pipe_buffer + * @pipe: the pipe that the buffer belongs to + * @buf: the buffer to get a reference to ++ * ++ * Return: %true if the reference was successfully obtained. + */ +-static inline void pipe_buf_get(struct pipe_inode_info *pipe, ++static inline __must_check bool pipe_buf_get(struct pipe_inode_info *pipe, + struct pipe_buffer *buf) + { +- buf->ops->get(pipe, buf); ++ return buf->ops->get(pipe, buf); + } + + /* Differs from PIPE_BUF in that PIPE_SIZE is the length of the actual +@@ -148,7 +150,7 @@ struct pipe_inode_info *alloc_pipe_info( + void free_pipe_info(struct pipe_inode_info *); + + /* Generic pipe buffer ops functions */ +-void generic_pipe_buf_get(struct pipe_inode_info *, struct pipe_buffer *); ++bool generic_pipe_buf_get(struct pipe_inode_info *, struct pipe_buffer *); + int generic_pipe_buf_confirm(struct pipe_inode_info *, struct pipe_buffer *); + int generic_pipe_buf_steal(struct pipe_inode_info *, struct pipe_buffer *); + void generic_pipe_buf_release(struct pipe_inode_info *, struct pipe_buffer *); +--- a/kernel/trace/trace.c ++++ b/kernel/trace/trace.c +@@ -5749,12 +5749,16 @@ static void buffer_pipe_buf_release(stru + buf->private = 0; + } + +-static void buffer_pipe_buf_get(struct pipe_inode_info *pipe, ++static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe, + struct pipe_buffer *buf) + { + struct buffer_ref *ref = (struct buffer_ref *)buf->private; + ++ if (ref->ref > INT_MAX/2) ++ return false; ++ + ref->ref++; ++ return true; + } + + /* Pipe buffer operations for a buffer. */ diff --git a/queue-4.4/mm-add-try_get_page-helper-function.patch b/queue-4.4/mm-add-try_get_page-helper-function.patch new file mode 100644 index 00000000000..f717d79b6f9 --- /dev/null +++ b/queue-4.4/mm-add-try_get_page-helper-function.patch @@ -0,0 +1,105 @@ +From foo@baz Tue 03 Mar 2020 04:52:04 PM CET +From: Ajay Kaher +Date: Wed, 26 Feb 2020 01:46:09 +0530 +Subject: mm: add 'try_get_page()' helper function +To: +Cc: , , , , , , , , , , , , , , , , , , , , , , , , , , , , +Message-ID: <1582661774-30925-3-git-send-email-akaher@vmware.com> + +From: Ajay Kaher + +From: Linus Torvalds + +commit 88b1a17dfc3ed7728316478fae0f5ad508f50397 upsteam. + +This is the same as the traditional 'get_page()' function, but instead +of unconditionally incrementing the reference count of the page, it only +does so if the count was "safe". It returns whether the reference count +was incremented (and is marked __must_check, since the caller obviously +has to be aware of it). + +Also like 'get_page()', you can't use this function unless you already +had a reference to the page. The intent is that you can use this +exactly like get_page(), but in situations where you want to limit the +maximum reference count. + +The code currently does an unconditional WARN_ON_ONCE() if we ever hit +the reference count issues (either zero or negative), as a notification +that the conditional non-increment actually happened. + +NOTE! The count access for the "safety" check is inherently racy, but +that doesn't matter since the buffer we use is basically half the range +of the reference count (ie we look at the sign of the count). + +Acked-by: Matthew Wilcox +Cc: Jann Horn +Cc: stable@kernel.org +Signed-off-by: Linus Torvalds +[ 4.4.y backport notes: + Srivatsa: + - Adapted try_get_page() to match the get_page() + implementation in 4.4.y, except for the refcount check. + - Added try_get_page_foll() which will be needed + in a subsequent patch. ] +Signed-off-by: Srivatsa S. Bhat (VMware) +Signed-off-by: Ajay Kaher +Signed-off-by: Vlastimil Babka +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/mm.h | 12 ++++++++++++ + mm/internal.h | 23 +++++++++++++++++++++++ + 2 files changed, 35 insertions(+) + +--- a/include/linux/mm.h ++++ b/include/linux/mm.h +@@ -505,6 +505,18 @@ static inline void get_page(struct page + atomic_inc(&page->_count); + } + ++static inline __must_check bool try_get_page(struct page *page) ++{ ++ if (unlikely(PageTail(page))) ++ if (likely(__get_page_tail(page))) ++ return true; ++ ++ if (WARN_ON_ONCE(atomic_read(&page->_count) <= 0)) ++ return false; ++ atomic_inc(&page->_count); ++ return true; ++} ++ + static inline struct page *virt_to_head_page(const void *x) + { + struct page *page = virt_to_page(x); +--- a/mm/internal.h ++++ b/mm/internal.h +@@ -112,6 +112,29 @@ static inline void get_page_foll(struct + } + } + ++static inline __must_check bool try_get_page_foll(struct page *page) ++{ ++ if (unlikely(PageTail(page))) { ++ if (WARN_ON_ONCE(atomic_read(&compound_head(page)->_count) <= 0)) ++ return false; ++ /* ++ * This is safe only because ++ * __split_huge_page_refcount() can't run under ++ * get_page_foll() because we hold the proper PT lock. ++ */ ++ __get_page_tail_foll(page, true); ++ } else { ++ /* ++ * Getting a normal page or the head of a compound page ++ * requires to already have an elevated page->_count. ++ */ ++ if (WARN_ON_ONCE(atomic_read(&page->_count) <= 0)) ++ return false; ++ atomic_inc(&page->_count); ++ } ++ return true; ++} ++ + extern unsigned long highest_memmap_pfn; + + /* diff --git a/queue-4.4/mm-gup-ensure-real-head-page-is-ref-counted-when-using-hugepages.patch b/queue-4.4/mm-gup-ensure-real-head-page-is-ref-counted-when-using-hugepages.patch new file mode 100644 index 00000000000..225bf4eeab4 --- /dev/null +++ b/queue-4.4/mm-gup-ensure-real-head-page-is-ref-counted-when-using-hugepages.patch @@ -0,0 +1,110 @@ +From foo@baz Tue 03 Mar 2020 04:52:04 PM CET +From: Ajay Kaher +Date: Wed, 26 Feb 2020 01:46:11 +0530 +Subject: mm, gup: ensure real head page is ref-counted when using hugepages +To: +Cc: , , , , , , , , , , , , , , , , , , , , , , , , , , , , Hillf Danton +Message-ID: <1582661774-30925-5-git-send-email-akaher@vmware.com> + +From: Ajay Kaher + +From: Punit Agrawal + +commit d63206ee32b6e64b0e12d46e5d6004afd9913713 upstream. + +When speculatively taking references to a hugepage using +page_cache_add_speculative() in gup_huge_pmd(), it is assumed that the +page returned by pmd_page() is the head page. Although normally true, +this assumption doesn't hold when the hugepage comprises of successive +page table entries such as when using contiguous bit on arm64 at PTE or +PMD levels. + +This can be addressed by ensuring that the page passed to +page_cache_add_speculative() is the real head or by de-referencing the +head page within the function. + +We take the first approach to keep the usage pattern aligned with +page_cache_get_speculative() where users already pass the appropriate +page, i.e., the de-referenced head. + +Apply the same logic to fix gup_huge_[pud|pgd]() as well. + +[punit.agrawal@arm.com: fix arm64 ltp failure] + Link: http://lkml.kernel.org/r/20170619170145.25577-5-punit.agrawal@arm.com +Link: http://lkml.kernel.org/r/20170522133604.11392-3-punit.agrawal@arm.com +Signed-off-by: Punit Agrawal +Acked-by: Steve Capper +Cc: Michal Hocko +Cc: "Kirill A. Shutemov" +Cc: Aneesh Kumar K.V +Cc: Catalin Marinas +Cc: Will Deacon +Cc: Naoya Horiguchi +Cc: Mark Rutland +Cc: Hillf Danton +Cc: Mike Kravetz +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Ajay Kaher +Signed-off-by: Vlastimil Babka +Signed-off-by: Greg Kroah-Hartman +--- + mm/gup.c | 12 ++++++------ + 1 file changed, 6 insertions(+), 6 deletions(-) + +--- a/mm/gup.c ++++ b/mm/gup.c +@@ -1130,8 +1130,7 @@ static int gup_huge_pmd(pmd_t orig, pmd_ + return 0; + + refs = 0; +- head = pmd_page(orig); +- page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT); ++ page = pmd_page(orig) + ((addr & ~PMD_MASK) >> PAGE_SHIFT); + tail = page; + do { + pages[*nr] = page; +@@ -1140,6 +1139,7 @@ static int gup_huge_pmd(pmd_t orig, pmd_ + refs++; + } while (addr += PAGE_SIZE, addr != end); + ++ head = compound_head(pmd_page(orig)); + if (!page_cache_add_speculative(head, refs)) { + *nr -= refs; + return 0; +@@ -1176,8 +1176,7 @@ static int gup_huge_pud(pud_t orig, pud_ + return 0; + + refs = 0; +- head = pud_page(orig); +- page = head + ((addr & ~PUD_MASK) >> PAGE_SHIFT); ++ page = pud_page(orig) + ((addr & ~PUD_MASK) >> PAGE_SHIFT); + tail = page; + do { + pages[*nr] = page; +@@ -1186,6 +1185,7 @@ static int gup_huge_pud(pud_t orig, pud_ + refs++; + } while (addr += PAGE_SIZE, addr != end); + ++ head = compound_head(pud_page(orig)); + if (!page_cache_add_speculative(head, refs)) { + *nr -= refs; + return 0; +@@ -1218,8 +1218,7 @@ static int gup_huge_pgd(pgd_t orig, pgd_ + return 0; + + refs = 0; +- head = pgd_page(orig); +- page = head + ((addr & ~PGDIR_MASK) >> PAGE_SHIFT); ++ page = pgd_page(orig) + ((addr & ~PGDIR_MASK) >> PAGE_SHIFT); + tail = page; + do { + pages[*nr] = page; +@@ -1228,6 +1227,7 @@ static int gup_huge_pgd(pgd_t orig, pgd_ + refs++; + } while (addr += PAGE_SIZE, addr != end); + ++ head = compound_head(pgd_page(orig)); + if (!page_cache_add_speculative(head, refs)) { + *nr -= refs; + return 0; diff --git a/queue-4.4/mm-gup-remove-broken-vm_bug_on_page-compound-check-for-hugepages.patch b/queue-4.4/mm-gup-remove-broken-vm_bug_on_page-compound-check-for-hugepages.patch new file mode 100644 index 00000000000..6dc6935a699 --- /dev/null +++ b/queue-4.4/mm-gup-remove-broken-vm_bug_on_page-compound-check-for-hugepages.patch @@ -0,0 +1,77 @@ +From foo@baz Tue 03 Mar 2020 04:52:04 PM CET +From: Ajay Kaher +Date: Wed, 26 Feb 2020 01:46:10 +0530 +Subject: mm, gup: remove broken VM_BUG_ON_PAGE compound check for hugepages +To: +Cc: , , , , , , , , , , , , , , , , , , , , , , , , , , , , Hillf Danton +Message-ID: <1582661774-30925-4-git-send-email-akaher@vmware.com> + +From: Ajay Kaher + +From: Will Deacon + +commit a3e328556d41bb61c55f9dfcc62d6a826ea97b85 upstream. + +When operating on hugepages with DEBUG_VM enabled, the GUP code checks +the compound head for each tail page prior to calling +page_cache_add_speculative. This is broken, because on the fast-GUP +path (where we don't hold any page table locks) we can be racing with a +concurrent invocation of split_huge_page_to_list. + +split_huge_page_to_list deals with this race by using page_ref_freeze to +freeze the page and force concurrent GUPs to fail whilst the component +pages are modified. This modification includes clearing the +compound_head field for the tail pages, so checking this prior to a +successful call to page_cache_add_speculative can lead to false +positives: In fact, page_cache_add_speculative *already* has this check +once the page refcount has been successfully updated, so we can simply +remove the broken calls to VM_BUG_ON_PAGE. + +Link: http://lkml.kernel.org/r/20170522133604.11392-2-punit.agrawal@arm.com +Signed-off-by: Will Deacon +Signed-off-by: Punit Agrawal +Acked-by: Steve Capper +Acked-by: Kirill A. Shutemov +Cc: Aneesh Kumar K.V +Cc: Catalin Marinas +Cc: Naoya Horiguchi +Cc: Mark Rutland +Cc: Hillf Danton +Cc: Michal Hocko +Cc: Mike Kravetz +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Srivatsa S. Bhat (VMware) +Signed-off-by: Ajay Kaher +Signed-off-by: Vlastimil Babka +Signed-off-by: Greg Kroah-Hartman +--- + mm/gup.c | 3 --- + 1 file changed, 3 deletions(-) + +--- a/mm/gup.c ++++ b/mm/gup.c +@@ -1134,7 +1134,6 @@ static int gup_huge_pmd(pmd_t orig, pmd_ + page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT); + tail = page; + do { +- VM_BUG_ON_PAGE(compound_head(page) != head, page); + pages[*nr] = page; + (*nr)++; + page++; +@@ -1181,7 +1180,6 @@ static int gup_huge_pud(pud_t orig, pud_ + page = head + ((addr & ~PUD_MASK) >> PAGE_SHIFT); + tail = page; + do { +- VM_BUG_ON_PAGE(compound_head(page) != head, page); + pages[*nr] = page; + (*nr)++; + page++; +@@ -1224,7 +1222,6 @@ static int gup_huge_pgd(pgd_t orig, pgd_ + page = head + ((addr & ~PGDIR_MASK) >> PAGE_SHIFT); + tail = page; + do { +- VM_BUG_ON_PAGE(compound_head(page) != head, page); + pages[*nr] = page; + (*nr)++; + page++; diff --git a/queue-4.4/mm-make-page-ref-count-overflow-check-tighter-and-more-explicit.patch b/queue-4.4/mm-make-page-ref-count-overflow-check-tighter-and-more-explicit.patch new file mode 100644 index 00000000000..c428c99df24 --- /dev/null +++ b/queue-4.4/mm-make-page-ref-count-overflow-check-tighter-and-more-explicit.patch @@ -0,0 +1,84 @@ +From foo@baz Tue 03 Mar 2020 04:52:04 PM CET +From: Ajay Kaher +Date: Wed, 26 Feb 2020 01:46:08 +0530 +Subject: mm: make page ref count overflow check tighter and more explicit +To: +Cc: , , , , , , , , , , , , , , , , , , , , , , , , , , , , +Message-ID: <1582661774-30925-2-git-send-email-akaher@vmware.com> + +From: Ajay Kaher + +From: Linus Torvalds + +commit f958d7b528b1b40c44cfda5eabe2d82760d868c3 upsteam. + +We have a VM_BUG_ON() to check that the page reference count doesn't +underflow (or get close to overflow) by checking the sign of the count. + +That's all fine, but we actually want to allow people to use a "get page +ref unless it's already very high" helper function, and we want that one +to use the sign of the page ref (without triggering this VM_BUG_ON). + +Change the VM_BUG_ON to only check for small underflows (or _very_ close +to overflowing), and ignore overflows which have strayed into negative +territory. + +Acked-by: Matthew Wilcox +Cc: Jann Horn +Cc: stable@kernel.org +Signed-off-by: Linus Torvalds +[ 4.4.y backport notes: + Ajay: Open-coded atomic refcount access due to missing + page_ref_count() helper in 4.4.y + Srivatsa: Added overflow check to get_page_foll() and related code. ] +Signed-off-by: Srivatsa S. Bhat (VMware) +Signed-off-by: Ajay Kaher +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/mm.h | 6 +++++- + mm/internal.h | 5 +++-- + 2 files changed, 8 insertions(+), 3 deletions(-) + +--- a/include/linux/mm.h ++++ b/include/linux/mm.h +@@ -488,6 +488,10 @@ static inline void get_huge_page_tail(st + + extern bool __get_page_tail(struct page *page); + ++/* 127: arbitrary random number, small enough to assemble well */ ++#define page_ref_zero_or_close_to_overflow(page) \ ++ ((unsigned int) atomic_read(&page->_count) + 127u <= 127u) ++ + static inline void get_page(struct page *page) + { + if (unlikely(PageTail(page))) +@@ -497,7 +501,7 @@ static inline void get_page(struct page + * Getting a normal page or the head of a compound page + * requires to already have an elevated page->_count. + */ +- VM_BUG_ON_PAGE(atomic_read(&page->_count) <= 0, page); ++ VM_BUG_ON_PAGE(page_ref_zero_or_close_to_overflow(page), page); + atomic_inc(&page->_count); + } + +--- a/mm/internal.h ++++ b/mm/internal.h +@@ -81,7 +81,8 @@ static inline void __get_page_tail_foll( + * speculative page access (like in + * page_cache_get_speculative()) on tail pages. + */ +- VM_BUG_ON_PAGE(atomic_read(&compound_head(page)->_count) <= 0, page); ++ VM_BUG_ON_PAGE(page_ref_zero_or_close_to_overflow(compound_head(page)), ++ page); + if (get_page_head) + atomic_inc(&compound_head(page)->_count); + get_huge_page_tail(page); +@@ -106,7 +107,7 @@ static inline void get_page_foll(struct + * Getting a normal page or the head of a compound page + * requires to already have an elevated page->_count. + */ +- VM_BUG_ON_PAGE(atomic_read(&page->_count) <= 0, page); ++ VM_BUG_ON_PAGE(page_ref_zero_or_close_to_overflow(page), page); + atomic_inc(&page->_count); + } + } diff --git a/queue-4.4/mm-prevent-get_user_pages-from-overflowing-page-refcount.patch b/queue-4.4/mm-prevent-get_user_pages-from-overflowing-page-refcount.patch new file mode 100644 index 00000000000..bbb98f80300 --- /dev/null +++ b/queue-4.4/mm-prevent-get_user_pages-from-overflowing-page-refcount.patch @@ -0,0 +1,244 @@ +From foo@baz Tue 03 Mar 2020 04:52:04 PM CET +From: Ajay Kaher +Date: Wed, 26 Feb 2020 01:46:12 +0530 +Subject: mm: prevent get_user_pages() from overflowing page refcount +To: +Cc: , , , , , , , , , , , , , , , , , , , , , , , , , , , , +Message-ID: <1582661774-30925-6-git-send-email-akaher@vmware.com> + +From: Ajay Kaher + +From: Linus Torvalds + +commit 8fde12ca79aff9b5ba951fce1a2641901b8d8e64 upstream. + +If the page refcount wraps around past zero, it will be freed while +there are still four billion references to it. One of the possible +avenues for an attacker to try to make this happen is by doing direct IO +on a page multiple times. This patch makes get_user_pages() refuse to +take a new page reference if there are already more than two billion +references to the page. + +Reported-by: Jann Horn +Acked-by: Matthew Wilcox +Cc: stable@kernel.org +Signed-off-by: Linus Torvalds +[ 4.4.y backport notes: + Ajay: - Added local variable 'err' with-in follow_hugetlb_page() + from 2be7cfed995e, to resolve compilation error + - Added page_ref_count() + - Added missing refcount overflow checks on x86 and s390 + (Vlastimil, thanks for this change) + Srivatsa: - Replaced call to get_page_foll() with try_get_page_foll() ] +Signed-off-by: Srivatsa S. Bhat (VMware) +Signed-off-by: Ajay Kaher +Signed-off-by: Vlastimil Babka +Signed-off-by: Greg Kroah-Hartman +--- + arch/s390/mm/gup.c | 6 ++++-- + arch/x86/mm/gup.c | 9 ++++++++- + include/linux/mm.h | 5 +++++ + mm/gup.c | 42 +++++++++++++++++++++++++++++++++--------- + mm/hugetlb.c | 16 +++++++++++++++- + 5 files changed, 65 insertions(+), 13 deletions(-) + +--- a/arch/s390/mm/gup.c ++++ b/arch/s390/mm/gup.c +@@ -37,7 +37,8 @@ static inline int gup_pte_range(pmd_t *p + return 0; + VM_BUG_ON(!pfn_valid(pte_pfn(pte))); + page = pte_page(pte); +- if (!page_cache_get_speculative(page)) ++ if (WARN_ON_ONCE(page_ref_count(page) < 0) ++ || !page_cache_get_speculative(page)) + return 0; + if (unlikely(pte_val(pte) != pte_val(*ptep))) { + put_page(page); +@@ -76,7 +77,8 @@ static inline int gup_huge_pmd(pmd_t *pm + refs++; + } while (addr += PAGE_SIZE, addr != end); + +- if (!page_cache_add_speculative(head, refs)) { ++ if (WARN_ON_ONCE(page_ref_count(head) < 0) ++ || !page_cache_add_speculative(head, refs)) { + *nr -= refs; + return 0; + } +--- a/arch/x86/mm/gup.c ++++ b/arch/x86/mm/gup.c +@@ -95,7 +95,10 @@ static noinline int gup_pte_range(pmd_t + } + VM_BUG_ON(!pfn_valid(pte_pfn(pte))); + page = pte_page(pte); +- get_page(page); ++ if (unlikely(!try_get_page(page))) { ++ pte_unmap(ptep); ++ return 0; ++ } + SetPageReferenced(page); + pages[*nr] = page; + (*nr)++; +@@ -132,6 +135,8 @@ static noinline int gup_huge_pmd(pmd_t p + + refs = 0; + head = pmd_page(pmd); ++ if (WARN_ON_ONCE(page_ref_count(head) <= 0)) ++ return 0; + page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT); + do { + VM_BUG_ON_PAGE(compound_head(page) != head, page); +@@ -208,6 +213,8 @@ static noinline int gup_huge_pud(pud_t p + + refs = 0; + head = pud_page(pud); ++ if (WARN_ON_ONCE(page_ref_count(head) <= 0)) ++ return 0; + page = head + ((addr & ~PUD_MASK) >> PAGE_SHIFT); + do { + VM_BUG_ON_PAGE(compound_head(page) != head, page); +--- a/include/linux/mm.h ++++ b/include/linux/mm.h +@@ -488,6 +488,11 @@ static inline void get_huge_page_tail(st + + extern bool __get_page_tail(struct page *page); + ++static inline int page_ref_count(struct page *page) ++{ ++ return atomic_read(&page->_count); ++} ++ + /* 127: arbitrary random number, small enough to assemble well */ + #define page_ref_zero_or_close_to_overflow(page) \ + ((unsigned int) atomic_read(&page->_count) + 127u <= 127u) +--- a/mm/gup.c ++++ b/mm/gup.c +@@ -126,8 +126,12 @@ retry: + } + } + +- if (flags & FOLL_GET) +- get_page_foll(page); ++ if (flags & FOLL_GET) { ++ if (unlikely(!try_get_page_foll(page))) { ++ page = ERR_PTR(-ENOMEM); ++ goto out; ++ } ++ } + if (flags & FOLL_TOUCH) { + if ((flags & FOLL_WRITE) && + !pte_dirty(pte) && !PageDirty(page)) +@@ -289,7 +293,10 @@ static int get_gate_page(struct mm_struc + goto unmap; + *page = pte_page(*pte); + } +- get_page(*page); ++ if (unlikely(!try_get_page(*page))) { ++ ret = -ENOMEM; ++ goto unmap; ++ } + out: + ret = 0; + unmap: +@@ -1053,6 +1060,20 @@ struct page *get_dump_page(unsigned long + */ + #ifdef CONFIG_HAVE_GENERIC_RCU_GUP + ++/* ++ * Return the compund head page with ref appropriately incremented, ++ * or NULL if that failed. ++ */ ++static inline struct page *try_get_compound_head(struct page *page, int refs) ++{ ++ struct page *head = compound_head(page); ++ if (WARN_ON_ONCE(atomic_read(&head->_count) < 0)) ++ return NULL; ++ if (unlikely(!page_cache_add_speculative(head, refs))) ++ return NULL; ++ return head; ++} ++ + #ifdef __HAVE_ARCH_PTE_SPECIAL + static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end, + int write, struct page **pages, int *nr) +@@ -1083,6 +1104,9 @@ static int gup_pte_range(pmd_t pmd, unsi + VM_BUG_ON(!pfn_valid(pte_pfn(pte))); + page = pte_page(pte); + ++ if (WARN_ON_ONCE(page_ref_count(page) < 0)) ++ goto pte_unmap; ++ + if (!page_cache_get_speculative(page)) + goto pte_unmap; + +@@ -1139,8 +1163,8 @@ static int gup_huge_pmd(pmd_t orig, pmd_ + refs++; + } while (addr += PAGE_SIZE, addr != end); + +- head = compound_head(pmd_page(orig)); +- if (!page_cache_add_speculative(head, refs)) { ++ head = try_get_compound_head(pmd_page(orig), refs); ++ if (!head) { + *nr -= refs; + return 0; + } +@@ -1185,8 +1209,8 @@ static int gup_huge_pud(pud_t orig, pud_ + refs++; + } while (addr += PAGE_SIZE, addr != end); + +- head = compound_head(pud_page(orig)); +- if (!page_cache_add_speculative(head, refs)) { ++ head = try_get_compound_head(pud_page(orig), refs); ++ if (!head) { + *nr -= refs; + return 0; + } +@@ -1227,8 +1251,8 @@ static int gup_huge_pgd(pgd_t orig, pgd_ + refs++; + } while (addr += PAGE_SIZE, addr != end); + +- head = compound_head(pgd_page(orig)); +- if (!page_cache_add_speculative(head, refs)) { ++ head = try_get_compound_head(pgd_page(orig), refs); ++ if (!head) { + *nr -= refs; + return 0; + } +--- a/mm/hugetlb.c ++++ b/mm/hugetlb.c +@@ -3886,6 +3886,7 @@ long follow_hugetlb_page(struct mm_struc + unsigned long vaddr = *position; + unsigned long remainder = *nr_pages; + struct hstate *h = hstate_vma(vma); ++ int err = -EFAULT; + + while (vaddr < vma->vm_end && remainder) { + pte_t *pte; +@@ -3957,6 +3958,19 @@ long follow_hugetlb_page(struct mm_struc + + pfn_offset = (vaddr & ~huge_page_mask(h)) >> PAGE_SHIFT; + page = pte_page(huge_ptep_get(pte)); ++ ++ /* ++ * Instead of doing 'try_get_page_foll()' below in the same_page ++ * loop, just check the count once here. ++ */ ++ if (unlikely(page_count(page) <= 0)) { ++ if (pages) { ++ spin_unlock(ptl); ++ remainder = 0; ++ err = -ENOMEM; ++ break; ++ } ++ } + same_page: + if (pages) { + pages[i] = mem_map_offset(page, pfn_offset); +@@ -3983,7 +3997,7 @@ same_page: + *nr_pages = remainder; + *position = vaddr; + +- return i ? i : -EFAULT; ++ return i ? i : err; + } + + unsigned long hugetlb_change_protection(struct vm_area_struct *vma, diff --git a/queue-4.4/pipe-add-pipe_buf_get-helper.patch b/queue-4.4/pipe-add-pipe_buf_get-helper.patch new file mode 100644 index 00000000000..21d346966df --- /dev/null +++ b/queue-4.4/pipe-add-pipe_buf_get-helper.patch @@ -0,0 +1,76 @@ +From foo@baz Tue 03 Mar 2020 04:52:04 PM CET +From: Ajay Kaher +Date: Wed, 26 Feb 2020 01:46:13 +0530 +Subject: pipe: add pipe_buf_get() helper +To: +Cc: , , , , , , , , , , , , , , , , , , , , , , , , , , , +Message-ID: <1582661774-30925-7-git-send-email-akaher@vmware.com> + +From: Ajay Kaher + +From: Miklos Szeredi + +commit 7bf2d1df80822ec056363627e2014990f068f7aa upstream. + +Signed-off-by: Miklos Szeredi +Signed-off-by: Al Viro +Signed-off-by: Ajay Kaher +Signed-off-by: Vlastimil Babka +Signed-off-by: Greg Kroah-Hartman +--- + fs/fuse/dev.c | 2 +- + fs/splice.c | 4 ++-- + include/linux/pipe_fs_i.h | 11 +++++++++++ + 3 files changed, 14 insertions(+), 3 deletions(-) + +--- a/fs/fuse/dev.c ++++ b/fs/fuse/dev.c +@@ -2052,7 +2052,7 @@ static ssize_t fuse_dev_splice_write(str + pipe->curbuf = (pipe->curbuf + 1) & (pipe->buffers - 1); + pipe->nrbufs--; + } else { +- ibuf->ops->get(pipe, ibuf); ++ pipe_buf_get(pipe, ibuf); + *obuf = *ibuf; + obuf->flags &= ~PIPE_BUF_FLAG_GIFT; + obuf->len = rem; +--- a/fs/splice.c ++++ b/fs/splice.c +@@ -1876,7 +1876,7 @@ retry: + * Get a reference to this pipe buffer, + * so we can copy the contents over. + */ +- ibuf->ops->get(ipipe, ibuf); ++ pipe_buf_get(ipipe, ibuf); + *obuf = *ibuf; + + /* +@@ -1948,7 +1948,7 @@ static int link_pipe(struct pipe_inode_i + * Get a reference to this pipe buffer, + * so we can copy the contents over. + */ +- ibuf->ops->get(ipipe, ibuf); ++ pipe_buf_get(ipipe, ibuf); + + obuf = opipe->bufs + nbuf; + *obuf = *ibuf; +--- a/include/linux/pipe_fs_i.h ++++ b/include/linux/pipe_fs_i.h +@@ -115,6 +115,17 @@ struct pipe_buf_operations { + void (*get)(struct pipe_inode_info *, struct pipe_buffer *); + }; + ++/** ++ * pipe_buf_get - get a reference to a pipe_buffer ++ * @pipe: the pipe that the buffer belongs to ++ * @buf: the buffer to get a reference to ++ */ ++static inline void pipe_buf_get(struct pipe_inode_info *pipe, ++ struct pipe_buffer *buf) ++{ ++ buf->ops->get(pipe, buf); ++} ++ + /* Differs from PIPE_BUF in that PIPE_SIZE is the length of the actual + memory allocation, whereas PIPE_BUF makes atomicity guarantees. */ + #define PIPE_SIZE PAGE_SIZE diff --git a/queue-4.4/series b/queue-4.4/series index 08c4dc81428..bf0c18fb2eb 100644 --- a/queue-4.4/series +++ b/queue-4.4/series @@ -30,3 +30,10 @@ net-netlink-cap-max-groups-which-will-be-considered-in-netlink_bind.patch namei-only-return-echild-from-follow_dotdot_rcu.patch kvm-check-for-a-bad-hva-before-dropping-into-the-ghc-slow-path.patch slip-stop-double-free-sl-dev-in-slip_open.patch +mm-make-page-ref-count-overflow-check-tighter-and-more-explicit.patch +mm-add-try_get_page-helper-function.patch +mm-gup-remove-broken-vm_bug_on_page-compound-check-for-hugepages.patch +mm-gup-ensure-real-head-page-is-ref-counted-when-using-hugepages.patch +mm-prevent-get_user_pages-from-overflowing-page-refcount.patch +pipe-add-pipe_buf_get-helper.patch +fs-prevent-page-refcount-overflow-in-pipe_buf_get.patch