From 250858505a7aa0c082dd7961d931dad7e17eda4d Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 4 Jun 2019 16:45:38 +0200 Subject: [PATCH] 4.9-stable patches added patches: binder-replace-p-with-pk-for-stable.patch binder-replace-p-with-pk.patch brcmfmac-add-length-checks-in-scheduled-scan-result-handler.patch brcmfmac-add-subtype-check-for-event-handling-in-data-path.patch brcmfmac-assure-ssid-length-from-firmware-is-limited.patch coredump-fix-race-condition-between-mmget_not_zero-get_task_mm-and-core-dumping.patch fs-prevent-page-refcount-overflow-in-pipe_buf_get.patch mm-gup-ensure-real-head-page-is-ref-counted-when-using-hugepages.patch mm-gup-remove-broken-vm_bug_on_page-compound-check-for-hugepages.patch mm-make-page-ref-count-overflow-check-tighter-and-more-explicit.patch mm-prevent-get_user_pages-from-overflowing-page-refcount.patch --- .../binder-replace-p-with-pk-for-stable.patch | 146 ++++++++++++ queue-4.9/binder-replace-p-with-pk.patch | 59 +++++ ...cks-in-scheduled-scan-result-handler.patch | 72 ++++++ ...heck-for-event-handling-in-data-path.patch | 102 +++++++++ ...ssid-length-from-firmware-is-limited.patch | 35 +++ ...ot_zero-get_task_mm-and-core-dumping.patch | 208 ++++++++++++++++++ ...ge-refcount-overflow-in-pipe_buf_get.patch | 164 ++++++++++++++ ...-is-ref-counted-when-using-hugepages.patch | 104 +++++++++ ...on_page-compound-check-for-hugepages.patch | 70 ++++++ ...flow-check-tighter-and-more-explicit.patch | 51 +++++ ...pages-from-overflowing-page-refcount.patch | 172 +++++++++++++++ queue-4.9/series | 11 + 12 files changed, 1194 insertions(+) create mode 100644 queue-4.9/binder-replace-p-with-pk-for-stable.patch create mode 100644 queue-4.9/binder-replace-p-with-pk.patch create mode 100644 queue-4.9/brcmfmac-add-length-checks-in-scheduled-scan-result-handler.patch create mode 100644 queue-4.9/brcmfmac-add-subtype-check-for-event-handling-in-data-path.patch create mode 100644 queue-4.9/brcmfmac-assure-ssid-length-from-firmware-is-limited.patch create mode 100644 queue-4.9/coredump-fix-race-condition-between-mmget_not_zero-get_task_mm-and-core-dumping.patch create mode 100644 queue-4.9/fs-prevent-page-refcount-overflow-in-pipe_buf_get.patch create mode 100644 queue-4.9/mm-gup-ensure-real-head-page-is-ref-counted-when-using-hugepages.patch create mode 100644 queue-4.9/mm-gup-remove-broken-vm_bug_on_page-compound-check-for-hugepages.patch create mode 100644 queue-4.9/mm-make-page-ref-count-overflow-check-tighter-and-more-explicit.patch create mode 100644 queue-4.9/mm-prevent-get_user_pages-from-overflowing-page-refcount.patch diff --git a/queue-4.9/binder-replace-p-with-pk-for-stable.patch b/queue-4.9/binder-replace-p-with-pk-for-stable.patch new file mode 100644 index 00000000000..7529aaafa1d --- /dev/null +++ b/queue-4.9/binder-replace-p-with-pk-for-stable.patch @@ -0,0 +1,146 @@ +From foo@baz Tue 04 Jun 2019 04:44:10 PM CEST +From: Ben Hutchings +Date: Wed, 29 May 2019 18:02:44 +0100 +Subject: binder: Replace "%p" with "%pK" for stable + +From: Ben Hutchings + +This was done as part of upstream commits fdfb4a99b6ab "8inder: +separate binder allocator structure from binder proc", 19c987241ca1 +"binder: separate out binder_alloc functions", and 7a4408c6bd3e +"binder: make sure accesses to proc/thread are safe". However, those +commits made lots of other changes that are not suitable for stable. + +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + drivers/android/binder.c | 28 ++++++++++++++-------------- + 1 file changed, 14 insertions(+), 14 deletions(-) + +--- a/drivers/android/binder.c ++++ b/drivers/android/binder.c +@@ -488,7 +488,7 @@ static void binder_insert_free_buffer(st + new_buffer_size = binder_buffer_size(proc, new_buffer); + + binder_debug(BINDER_DEBUG_BUFFER_ALLOC, +- "%d: add free buffer, size %zd, at %p\n", ++ "%d: add free buffer, size %zd, at %pK\n", + proc->pid, new_buffer_size, new_buffer); + + while (*p) { +@@ -566,7 +566,7 @@ static int binder_update_page_range(stru + struct mm_struct *mm; + + binder_debug(BINDER_DEBUG_BUFFER_ALLOC, +- "%d: %s pages %p-%p\n", proc->pid, ++ "%d: %s pages %pK-%pK\n", proc->pid, + allocate ? "allocate" : "free", start, end); + + if (end <= start) +@@ -606,7 +606,7 @@ static int binder_update_page_range(stru + BUG_ON(*page); + *page = alloc_page(GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO); + if (*page == NULL) { +- pr_err("%d: binder_alloc_buf failed for page at %p\n", ++ pr_err("%d: binder_alloc_buf failed for page at %pK\n", + proc->pid, page_addr); + goto err_alloc_page_failed; + } +@@ -615,7 +615,7 @@ static int binder_update_page_range(stru + flush_cache_vmap((unsigned long)page_addr, + (unsigned long)page_addr + PAGE_SIZE); + if (ret != 1) { +- pr_err("%d: binder_alloc_buf failed to map page at %p in kernel\n", ++ pr_err("%d: binder_alloc_buf failed to map page at %pK in kernel\n", + proc->pid, page_addr); + goto err_map_kernel_failed; + } +@@ -719,7 +719,7 @@ static struct binder_buffer *binder_allo + } + + binder_debug(BINDER_DEBUG_BUFFER_ALLOC, +- "%d: binder_alloc_buf size %zd got buffer %p size %zd\n", ++ "%d: binder_alloc_buf size %zd got buffer %pK size %zd\n", + proc->pid, size, buffer, buffer_size); + + has_page_addr = +@@ -749,7 +749,7 @@ static struct binder_buffer *binder_allo + binder_insert_free_buffer(proc, new_buffer); + } + binder_debug(BINDER_DEBUG_BUFFER_ALLOC, +- "%d: binder_alloc_buf size %zd got %p\n", ++ "%d: binder_alloc_buf size %zd got %pK\n", + proc->pid, size, buffer); + buffer->data_size = data_size; + buffer->offsets_size = offsets_size; +@@ -789,7 +789,7 @@ static void binder_delete_free_buffer(st + if (buffer_end_page(prev) == buffer_end_page(buffer)) + free_page_end = 0; + binder_debug(BINDER_DEBUG_BUFFER_ALLOC, +- "%d: merge free, buffer %p share page with %p\n", ++ "%d: merge free, buffer %pK share page with %pK\n", + proc->pid, buffer, prev); + } + +@@ -802,14 +802,14 @@ static void binder_delete_free_buffer(st + buffer_start_page(buffer)) + free_page_start = 0; + binder_debug(BINDER_DEBUG_BUFFER_ALLOC, +- "%d: merge free, buffer %p share page with %p\n", ++ "%d: merge free, buffer %pK share page with %pK\n", + proc->pid, buffer, prev); + } + } + list_del(&buffer->entry); + if (free_page_start || free_page_end) { + binder_debug(BINDER_DEBUG_BUFFER_ALLOC, +- "%d: merge free, buffer %p do not share page%s%s with %p or %p\n", ++ "%d: merge free, buffer %pK do not share page%s%s with %pK or %pK\n", + proc->pid, buffer, free_page_start ? "" : " end", + free_page_end ? "" : " start", prev, next); + binder_update_page_range(proc, 0, free_page_start ? +@@ -830,7 +830,7 @@ static void binder_free_buf(struct binde + ALIGN(buffer->offsets_size, sizeof(void *)); + + binder_debug(BINDER_DEBUG_BUFFER_ALLOC, +- "%d: binder_free_buf %p size %zd buffer_size %zd\n", ++ "%d: binder_free_buf %pK size %zd buffer_size %zd\n", + proc->pid, buffer, size, buffer_size); + + BUG_ON(buffer->free); +@@ -2930,7 +2930,7 @@ static int binder_mmap(struct file *filp + #ifdef CONFIG_CPU_CACHE_VIPT + if (cache_is_vipt_aliasing()) { + while (CACHE_COLOUR((vma->vm_start ^ (uint32_t)proc->buffer))) { +- pr_info("binder_mmap: %d %lx-%lx maps %p bad alignment\n", proc->pid, vma->vm_start, vma->vm_end, proc->buffer); ++ pr_info("binder_mmap: %d %lx-%lx maps %pK bad alignment\n", proc->pid, vma->vm_start, vma->vm_end, proc->buffer); + vma->vm_start += PAGE_SIZE; + } + } +@@ -3191,7 +3191,7 @@ static void binder_deferred_release(stru + + page_addr = proc->buffer + i * PAGE_SIZE; + binder_debug(BINDER_DEBUG_BUFFER_ALLOC, +- "%s: %d: page %d at %p not freed\n", ++ "%s: %d: page %d at %pK not freed\n", + __func__, proc->pid, i, page_addr); + unmap_kernel_range((unsigned long)page_addr, PAGE_SIZE); + __free_page(proc->pages[i]); +@@ -3294,7 +3294,7 @@ static void print_binder_transaction(str + static void print_binder_buffer(struct seq_file *m, const char *prefix, + struct binder_buffer *buffer) + { +- seq_printf(m, "%s %d: %p size %zd:%zd %s\n", ++ seq_printf(m, "%s %d: %pK size %zd:%zd %s\n", + prefix, buffer->debug_id, buffer->data, + buffer->data_size, buffer->offsets_size, + buffer->transaction ? "active" : "delivered"); +@@ -3397,7 +3397,7 @@ static void print_binder_node(struct seq + + static void print_binder_ref(struct seq_file *m, struct binder_ref *ref) + { +- seq_printf(m, " ref %d: desc %d %snode %d s %d w %d d %p\n", ++ seq_printf(m, " ref %d: desc %d %snode %d s %d w %d d %pK\n", + ref->debug_id, ref->desc, ref->node->proc ? "" : "dead ", + ref->node->debug_id, ref->strong, ref->weak, ref->death); + } diff --git a/queue-4.9/binder-replace-p-with-pk.patch b/queue-4.9/binder-replace-p-with-pk.patch new file mode 100644 index 00000000000..89c8e81b392 --- /dev/null +++ b/queue-4.9/binder-replace-p-with-pk.patch @@ -0,0 +1,59 @@ +From foo@baz Tue 04 Jun 2019 04:44:10 PM CEST +From: Todd Kjos +Date: Wed, 7 Feb 2018 13:57:37 -0800 +Subject: binder: replace "%p" with "%pK" + +From: Todd Kjos + +commit 8ca86f1639ec5890d400fff9211aca22d0a392eb upstream. + +The format specifier "%p" can leak kernel addresses. Use +"%pK" instead. There were 4 remaining cases in binder.c. + +Signed-off-by: Todd Kjos +Signed-off-by: Greg Kroah-Hartman +[bwh: Backported to 4.9: adjust context] +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + drivers/android/binder.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +--- a/drivers/android/binder.c ++++ b/drivers/android/binder.c +@@ -1260,7 +1260,7 @@ static void binder_transaction_buffer_re + int debug_id = buffer->debug_id; + + binder_debug(BINDER_DEBUG_TRANSACTION, +- "%d buffer release %d, size %zd-%zd, failed at %p\n", ++ "%d buffer release %d, size %zd-%zd, failed at %pK\n", + proc->pid, buffer->debug_id, + buffer->data_size, buffer->offsets_size, failed_at); + +@@ -2123,7 +2123,7 @@ static int binder_thread_write(struct bi + } + } + binder_debug(BINDER_DEBUG_DEAD_BINDER, +- "%d:%d BC_DEAD_BINDER_DONE %016llx found %p\n", ++ "%d:%d BC_DEAD_BINDER_DONE %016llx found %pK\n", + proc->pid, thread->pid, (u64)cookie, + death); + if (death == NULL) { +@@ -3272,7 +3272,7 @@ static void print_binder_transaction(str + struct binder_transaction *t) + { + seq_printf(m, +- "%s %d: %p from %d:%d to %d:%d code %x flags %x pri %ld r%d", ++ "%s %d: %pK from %d:%d to %d:%d code %x flags %x pri %ld r%d", + prefix, t->debug_id, t, + t->from ? t->from->proc->pid : 0, + t->from ? t->from->pid : 0, +@@ -3286,7 +3286,7 @@ static void print_binder_transaction(str + if (t->buffer->target_node) + seq_printf(m, " node %d", + t->buffer->target_node->debug_id); +- seq_printf(m, " size %zd:%zd data %p\n", ++ seq_printf(m, " size %zd:%zd data %pK\n", + t->buffer->data_size, t->buffer->offsets_size, + t->buffer->data); + } diff --git a/queue-4.9/brcmfmac-add-length-checks-in-scheduled-scan-result-handler.patch b/queue-4.9/brcmfmac-add-length-checks-in-scheduled-scan-result-handler.patch new file mode 100644 index 00000000000..2c734acad92 --- /dev/null +++ b/queue-4.9/brcmfmac-add-length-checks-in-scheduled-scan-result-handler.patch @@ -0,0 +1,72 @@ +From foo@baz Tue 04 Jun 2019 04:44:10 PM CEST +From: Arend Van Spriel +Date: Thu, 6 Apr 2017 13:14:40 +0100 +Subject: brcmfmac: add length checks in scheduled scan result handler + +From: Arend Van Spriel + +commit 4835f37e3bafc138f8bfa3cbed2920dd56fed283 upstream. + +Assure the event data buffer is long enough to hold the array +of netinfo items and that SSID length does not exceed the maximum +of 32 characters as per 802.11 spec. + +Reviewed-by: Hante Meuleman +Reviewed-by: Pieter-Paul Giesberts +Reviewed-by: Franky Lin +Signed-off-by: Arend van Spriel +Signed-off-by: Kalle Valo +[bwh: Backported to 4.9: + - Move the assignment to "data" along with the assignment to "netinfo_start" + that depends on it + - Adjust context, indentation] +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c | 14 +++++++++--- + 1 file changed, 11 insertions(+), 3 deletions(-) + +--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c ++++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c +@@ -3220,6 +3220,7 @@ brcmf_notify_sched_scan_results(struct b + struct brcmf_pno_scanresults_le *pfn_result; + u32 result_count; + u32 status; ++ u32 datalen; + + brcmf_dbg(SCAN, "Enter\n"); + +@@ -3245,6 +3246,14 @@ brcmf_notify_sched_scan_results(struct b + if (result_count > 0) { + int i; + ++ data += sizeof(struct brcmf_pno_scanresults_le); ++ netinfo_start = (struct brcmf_pno_net_info_le *)data; ++ datalen = e->datalen - ((void *)netinfo_start - (void *)pfn_result); ++ if (datalen < result_count * sizeof(*netinfo)) { ++ brcmf_err("insufficient event data\n"); ++ goto out_err; ++ } ++ + request = kzalloc(sizeof(*request), GFP_KERNEL); + ssid = kcalloc(result_count, sizeof(*ssid), GFP_KERNEL); + channel = kcalloc(result_count, sizeof(*channel), GFP_KERNEL); +@@ -3254,9 +3263,6 @@ brcmf_notify_sched_scan_results(struct b + } + + request->wiphy = wiphy; +- data += sizeof(struct brcmf_pno_scanresults_le); +- netinfo_start = (struct brcmf_pno_net_info_le *)data; +- + for (i = 0; i < result_count; i++) { + netinfo = &netinfo_start[i]; + if (!netinfo) { +@@ -3266,6 +3272,8 @@ brcmf_notify_sched_scan_results(struct b + goto out_err; + } + ++ if (netinfo->SSID_len > IEEE80211_MAX_SSID_LEN) ++ netinfo->SSID_len = IEEE80211_MAX_SSID_LEN; + brcmf_dbg(SCAN, "SSID:%s Channel:%d\n", + netinfo->SSID, netinfo->channel); + memcpy(ssid[i].ssid, netinfo->SSID, netinfo->SSID_len); diff --git a/queue-4.9/brcmfmac-add-subtype-check-for-event-handling-in-data-path.patch b/queue-4.9/brcmfmac-add-subtype-check-for-event-handling-in-data-path.patch new file mode 100644 index 00000000000..3fe53e6863c --- /dev/null +++ b/queue-4.9/brcmfmac-add-subtype-check-for-event-handling-in-data-path.patch @@ -0,0 +1,102 @@ +From foo@baz Tue 04 Jun 2019 04:44:10 PM CEST +From: Arend van Spriel +Date: Thu, 14 Feb 2019 13:43:48 +0100 +Subject: brcmfmac: add subtype check for event handling in data path + +From: Arend van Spriel + +commit a4176ec356c73a46c07c181c6d04039fafa34a9f upstream. + +For USB there is no separate channel being used to pass events +from firmware to the host driver and as such are passed over the +data path. In order to detect mock event messages an additional +check is needed on event subtype. This check is added conditionally +using unlikely() keyword. + +Reviewed-by: Hante Meuleman +Reviewed-by: Pieter-Paul Giesberts +Reviewed-by: Franky Lin +Signed-off-by: Arend van Spriel +Signed-off-by: Kalle Valo +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c | 5 ++-- + drivers/net/wireless/broadcom/brcm80211/brcmfmac/fweh.h | 16 ++++++++++---- + drivers/net/wireless/broadcom/brcm80211/brcmfmac/msgbuf.c | 2 - + 3 files changed, 16 insertions(+), 7 deletions(-) + +--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c ++++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c +@@ -339,7 +339,8 @@ void brcmf_rx_frame(struct device *dev, + } else { + /* Process special event packets */ + if (handle_event) +- brcmf_fweh_process_skb(ifp->drvr, skb); ++ brcmf_fweh_process_skb(ifp->drvr, skb, ++ BCMILCP_SUBTYPE_VENDOR_LONG); + + brcmf_netif_rx(ifp, skb); + } +@@ -356,7 +357,7 @@ void brcmf_rx_event(struct device *dev, + if (brcmf_rx_hdrpull(drvr, skb, &ifp)) + return; + +- brcmf_fweh_process_skb(ifp->drvr, skb); ++ brcmf_fweh_process_skb(ifp->drvr, skb, 0); + brcmu_pkt_buf_free_skb(skb); + } + +--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fweh.h ++++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fweh.h +@@ -181,7 +181,7 @@ enum brcmf_fweh_event_code { + */ + #define BRCM_OUI "\x00\x10\x18" + #define BCMILCP_BCM_SUBTYPE_EVENT 1 +- ++#define BCMILCP_SUBTYPE_VENDOR_LONG 32769 + + /** + * struct brcm_ethhdr - broadcom specific ether header. +@@ -302,10 +302,10 @@ void brcmf_fweh_process_event(struct brc + void brcmf_fweh_p2pdev_setup(struct brcmf_if *ifp, bool ongoing); + + static inline void brcmf_fweh_process_skb(struct brcmf_pub *drvr, +- struct sk_buff *skb) ++ struct sk_buff *skb, u16 stype) + { + struct brcmf_event *event_packet; +- u16 usr_stype; ++ u16 subtype, usr_stype; + + /* only process events when protocol matches */ + if (skb->protocol != cpu_to_be16(ETH_P_LINK_CTL)) +@@ -314,8 +314,16 @@ static inline void brcmf_fweh_process_sk + if ((skb->len + ETH_HLEN) < sizeof(*event_packet)) + return; + +- /* check for BRCM oui match */ + event_packet = (struct brcmf_event *)skb_mac_header(skb); ++ ++ /* check subtype if needed */ ++ if (unlikely(stype)) { ++ subtype = get_unaligned_be16(&event_packet->hdr.subtype); ++ if (subtype != stype) ++ return; ++ } ++ ++ /* check for BRCM oui match */ + if (memcmp(BRCM_OUI, &event_packet->hdr.oui[0], + sizeof(event_packet->hdr.oui))) + return; +--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/msgbuf.c ++++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/msgbuf.c +@@ -1114,7 +1114,7 @@ static void brcmf_msgbuf_process_event(s + + skb->protocol = eth_type_trans(skb, ifp->ndev); + +- brcmf_fweh_process_skb(ifp->drvr, skb); ++ brcmf_fweh_process_skb(ifp->drvr, skb, 0); + + exit: + brcmu_pkt_buf_free_skb(skb); diff --git a/queue-4.9/brcmfmac-assure-ssid-length-from-firmware-is-limited.patch b/queue-4.9/brcmfmac-assure-ssid-length-from-firmware-is-limited.patch new file mode 100644 index 00000000000..183d29fd149 --- /dev/null +++ b/queue-4.9/brcmfmac-assure-ssid-length-from-firmware-is-limited.patch @@ -0,0 +1,35 @@ +From foo@baz Tue 04 Jun 2019 04:44:10 PM CEST +From: Arend van Spriel +Date: Thu, 14 Feb 2019 13:43:47 +0100 +Subject: brcmfmac: assure SSID length from firmware is limited + +From: Arend van Spriel + +commit 1b5e2423164b3670e8bc9174e4762d297990deff upstream. + +The SSID length as received from firmware should not exceed +IEEE80211_MAX_SSID_LEN as that would result in heap overflow. + +Reviewed-by: Hante Meuleman +Reviewed-by: Pieter-Paul Giesberts +Reviewed-by: Franky Lin +Signed-off-by: Arend van Spriel +Signed-off-by: Kalle Valo +[bwh: Backported to 4.9: adjust context] +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c ++++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c +@@ -3579,6 +3579,8 @@ brcmf_wowl_nd_results(struct brcmf_if *i + + data += sizeof(struct brcmf_pno_scanresults_le); + netinfo = (struct brcmf_pno_net_info_le *)data; ++ if (netinfo->SSID_len > IEEE80211_MAX_SSID_LEN) ++ netinfo->SSID_len = IEEE80211_MAX_SSID_LEN; + memcpy(cfg->wowl.nd->ssid.ssid, netinfo->SSID, netinfo->SSID_len); + cfg->wowl.nd->ssid.ssid_len = netinfo->SSID_len; + cfg->wowl.nd->n_channels = 1; diff --git a/queue-4.9/coredump-fix-race-condition-between-mmget_not_zero-get_task_mm-and-core-dumping.patch b/queue-4.9/coredump-fix-race-condition-between-mmget_not_zero-get_task_mm-and-core-dumping.patch new file mode 100644 index 00000000000..3c8aaf25111 --- /dev/null +++ b/queue-4.9/coredump-fix-race-condition-between-mmget_not_zero-get_task_mm-and-core-dumping.patch @@ -0,0 +1,208 @@ +From foo@baz Tue 04 Jun 2019 04:44:10 PM CEST +From: Andrea Arcangeli +Date: Thu, 18 Apr 2019 17:50:52 -0700 +Subject: coredump: fix race condition between mmget_not_zero()/get_task_mm() and core dumping + +From: Andrea Arcangeli + +commit 04f5866e41fb70690e28397487d8bd8eea7d712a upstream. + +The core dumping code has always run without holding the mmap_sem for +writing, despite that is the only way to ensure that the entire vma +layout will not change from under it. Only using some signal +serialization on the processes belonging to the mm is not nearly enough. +This was pointed out earlier. For example in Hugh's post from Jul 2017: + + https://lkml.kernel.org/r/alpine.LSU.2.11.1707191716030.2055@eggly.anvils + + "Not strictly relevant here, but a related note: I was very surprised + to discover, only quite recently, how handle_mm_fault() may be called + without down_read(mmap_sem) - when core dumping. That seems a + misguided optimization to me, which would also be nice to correct" + +In particular because the growsdown and growsup can move the +vm_start/vm_end the various loops the core dump does around the vma will +not be consistent if page faults can happen concurrently. + +Pretty much all users calling mmget_not_zero()/get_task_mm() and then +taking the mmap_sem had the potential to introduce unexpected side +effects in the core dumping code. + +Adding mmap_sem for writing around the ->core_dump invocation is a +viable long term fix, but it requires removing all copy user and page +faults and to replace them with get_dump_page() for all binary formats +which is not suitable as a short term fix. + +For the time being this solution manually covers the places that can +confuse the core dump either by altering the vma layout or the vma flags +while it runs. Once ->core_dump runs under mmap_sem for writing the +function mmget_still_valid() can be dropped. + +Allowing mmap_sem protected sections to run in parallel with the +coredump provides some minor parallelism advantage to the swapoff code +(which seems to be safe enough by never mangling any vma field and can +keep doing swapins in parallel to the core dumping) and to some other +corner case. + +In order to facilitate the backporting I added "Fixes: 86039bd3b4e6" +however the side effect of this same race condition in /proc/pid/mem +should be reproducible since before 2.6.12-rc2 so I couldn't add any +other "Fixes:" because there's no hash beyond the git genesis commit. + +Because find_extend_vma() is the only location outside of the process +context that could modify the "mm" structures under mmap_sem for +reading, by adding the mmget_still_valid() check to it, all other cases +that take the mmap_sem for reading don't need the new check after +mmget_not_zero()/get_task_mm(). The expand_stack() in page fault +context also doesn't need the new check, because all tasks under core +dumping are frozen. + +Link: http://lkml.kernel.org/r/20190325224949.11068-1-aarcange@redhat.com +Fixes: 86039bd3b4e6 ("userfaultfd: add new syscall to provide memory externalization") +Signed-off-by: Andrea Arcangeli +Reported-by: Jann Horn +Suggested-by: Oleg Nesterov +Acked-by: Peter Xu +Reviewed-by: Mike Rapoport +Reviewed-by: Oleg Nesterov +Reviewed-by: Jann Horn +Acked-by: Jason Gunthorpe +Acked-by: Michal Hocko +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +[bwh: Backported to 4.9: + - Drop changes in Infiniband and userfaultfd_event_wait_completion() + - Adjust filename, context] +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + fs/proc/task_mmu.c | 18 ++++++++++++++++++ + fs/userfaultfd.c | 7 +++++++ + include/linux/sched.h | 21 +++++++++++++++++++++ + mm/mmap.c | 7 ++++++- + 4 files changed, 52 insertions(+), 1 deletion(-) + +--- a/fs/proc/task_mmu.c ++++ b/fs/proc/task_mmu.c +@@ -1057,6 +1057,24 @@ static ssize_t clear_refs_write(struct f + count = -EINTR; + goto out_mm; + } ++ /* ++ * Avoid to modify vma->vm_flags ++ * without locked ops while the ++ * coredump reads the vm_flags. ++ */ ++ if (!mmget_still_valid(mm)) { ++ /* ++ * Silently return "count" ++ * like if get_task_mm() ++ * failed. FIXME: should this ++ * function have returned ++ * -ESRCH if get_task_mm() ++ * failed like if ++ * get_proc_task() fails? ++ */ ++ up_write(&mm->mmap_sem); ++ goto out_mm; ++ } + for (vma = mm->mmap; vma; vma = vma->vm_next) { + vma->vm_flags &= ~VM_SOFTDIRTY; + vma_set_page_prot(vma); +--- a/fs/userfaultfd.c ++++ b/fs/userfaultfd.c +@@ -479,6 +479,8 @@ static int userfaultfd_release(struct in + * taking the mmap_sem for writing. + */ + down_write(&mm->mmap_sem); ++ if (!mmget_still_valid(mm)) ++ goto skip_mm; + prev = NULL; + for (vma = mm->mmap; vma; vma = vma->vm_next) { + cond_resched(); +@@ -501,6 +503,7 @@ static int userfaultfd_release(struct in + vma->vm_flags = new_flags; + vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX; + } ++skip_mm: + up_write(&mm->mmap_sem); + mmput(mm); + wakeup: +@@ -802,6 +805,8 @@ static int userfaultfd_register(struct u + goto out; + + down_write(&mm->mmap_sem); ++ if (!mmget_still_valid(mm)) ++ goto out_unlock; + vma = find_vma_prev(mm, start, &prev); + if (!vma) + goto out_unlock; +@@ -947,6 +952,8 @@ static int userfaultfd_unregister(struct + goto out; + + down_write(&mm->mmap_sem); ++ if (!mmget_still_valid(mm)) ++ goto out_unlock; + vma = find_vma_prev(mm, start, &prev); + if (!vma) + goto out_unlock; +--- a/include/linux/sched.h ++++ b/include/linux/sched.h +@@ -2938,6 +2938,27 @@ static inline bool mmget_not_zero(struct + return atomic_inc_not_zero(&mm->mm_users); + } + ++/* ++ * This has to be called after a get_task_mm()/mmget_not_zero() ++ * followed by taking the mmap_sem for writing before modifying the ++ * vmas or anything the coredump pretends not to change from under it. ++ * ++ * NOTE: find_extend_vma() called from GUP context is the only place ++ * that can modify the "mm" (notably the vm_start/end) under mmap_sem ++ * for reading and outside the context of the process, so it is also ++ * the only case that holds the mmap_sem for reading that must call ++ * this function. Generally if the mmap_sem is hold for reading ++ * there's no need of this check after get_task_mm()/mmget_not_zero(). ++ * ++ * This function can be obsoleted and the check can be removed, after ++ * the coredump code will hold the mmap_sem for writing before ++ * invoking the ->core_dump methods. ++ */ ++static inline bool mmget_still_valid(struct mm_struct *mm) ++{ ++ return likely(!mm->core_state); ++} ++ + /* mmput gets rid of the mappings and all user-space */ + extern void mmput(struct mm_struct *); + #ifdef CONFIG_MMU +--- a/mm/mmap.c ++++ b/mm/mmap.c +@@ -44,6 +44,7 @@ + #include + #include + #include ++#include + + #include + #include +@@ -2448,7 +2449,8 @@ find_extend_vma(struct mm_struct *mm, un + vma = find_vma_prev(mm, addr, &prev); + if (vma && (vma->vm_start <= addr)) + return vma; +- if (!prev || expand_stack(prev, addr)) ++ /* don't alter vm_end if the coredump is running */ ++ if (!prev || !mmget_still_valid(mm) || expand_stack(prev, addr)) + return NULL; + if (prev->vm_flags & VM_LOCKED) + populate_vma_page_range(prev, addr, prev->vm_end, NULL); +@@ -2474,6 +2476,9 @@ find_extend_vma(struct mm_struct *mm, un + return vma; + if (!(vma->vm_flags & VM_GROWSDOWN)) + return NULL; ++ /* don't alter vm_start if the coredump is running */ ++ if (!mmget_still_valid(mm)) ++ return NULL; + start = vma->vm_start; + if (expand_stack(vma, addr)) + return NULL; diff --git a/queue-4.9/fs-prevent-page-refcount-overflow-in-pipe_buf_get.patch b/queue-4.9/fs-prevent-page-refcount-overflow-in-pipe_buf_get.patch new file mode 100644 index 00000000000..a4d74e9da13 --- /dev/null +++ b/queue-4.9/fs-prevent-page-refcount-overflow-in-pipe_buf_get.patch @@ -0,0 +1,164 @@ +From foo@baz Tue 04 Jun 2019 04:44:10 PM CEST +From: Matthew Wilcox +Date: Fri, 5 Apr 2019 14:02:10 -0700 +Subject: fs: prevent page refcount overflow in pipe_buf_get + +From: Matthew Wilcox + +commit 15fab63e1e57be9fdb5eec1bbc5916e9825e9acb upstream. + +Change pipe_buf_get() to return a bool indicating whether it succeeded +in raising the refcount of the page (if the thing in the pipe is a page). +This removes another mechanism for overflowing the page refcount. All +callers converted to handle a failure. + +Reported-by: Jann Horn +Signed-off-by: Matthew Wilcox +Signed-off-by: Linus Torvalds +[bwh: Backported to 4.9: adjust context] +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + fs/fuse/dev.c | 12 ++++++------ + fs/pipe.c | 4 ++-- + fs/splice.c | 12 ++++++++++-- + include/linux/pipe_fs_i.h | 10 ++++++---- + kernel/trace/trace.c | 6 +++++- + 5 files changed, 29 insertions(+), 15 deletions(-) + +--- a/fs/fuse/dev.c ++++ b/fs/fuse/dev.c +@@ -1975,10 +1975,8 @@ static ssize_t fuse_dev_splice_write(str + rem += pipe->bufs[(pipe->curbuf + idx) & (pipe->buffers - 1)].len; + + ret = -EINVAL; +- if (rem < len) { +- pipe_unlock(pipe); +- goto out; +- } ++ if (rem < len) ++ goto out_free; + + rem = len; + while (rem) { +@@ -1996,7 +1994,9 @@ static ssize_t fuse_dev_splice_write(str + pipe->curbuf = (pipe->curbuf + 1) & (pipe->buffers - 1); + pipe->nrbufs--; + } else { +- pipe_buf_get(pipe, ibuf); ++ if (!pipe_buf_get(pipe, ibuf)) ++ goto out_free; ++ + *obuf = *ibuf; + obuf->flags &= ~PIPE_BUF_FLAG_GIFT; + obuf->len = rem; +@@ -2019,11 +2019,11 @@ static ssize_t fuse_dev_splice_write(str + ret = fuse_dev_do_write(fud, &cs, len); + + pipe_lock(pipe); ++out_free: + for (idx = 0; idx < nbuf; idx++) + pipe_buf_release(pipe, &bufs[idx]); + pipe_unlock(pipe); + +-out: + kfree(bufs); + return ret; + } +--- a/fs/pipe.c ++++ b/fs/pipe.c +@@ -193,9 +193,9 @@ EXPORT_SYMBOL(generic_pipe_buf_steal); + * in the tee() system call, when we duplicate the buffers in one + * pipe into another. + */ +-void generic_pipe_buf_get(struct pipe_inode_info *pipe, struct pipe_buffer *buf) ++bool generic_pipe_buf_get(struct pipe_inode_info *pipe, struct pipe_buffer *buf) + { +- get_page(buf->page); ++ return try_get_page(buf->page); + } + EXPORT_SYMBOL(generic_pipe_buf_get); + +--- a/fs/splice.c ++++ b/fs/splice.c +@@ -1585,7 +1585,11 @@ retry: + * Get a reference to this pipe buffer, + * so we can copy the contents over. + */ +- pipe_buf_get(ipipe, ibuf); ++ if (!pipe_buf_get(ipipe, ibuf)) { ++ if (ret == 0) ++ ret = -EFAULT; ++ break; ++ } + *obuf = *ibuf; + + /* +@@ -1659,7 +1663,11 @@ static int link_pipe(struct pipe_inode_i + * Get a reference to this pipe buffer, + * so we can copy the contents over. + */ +- pipe_buf_get(ipipe, ibuf); ++ if (!pipe_buf_get(ipipe, ibuf)) { ++ if (ret == 0) ++ ret = -EFAULT; ++ break; ++ } + + obuf = opipe->bufs + nbuf; + *obuf = *ibuf; +--- a/include/linux/pipe_fs_i.h ++++ b/include/linux/pipe_fs_i.h +@@ -107,18 +107,20 @@ struct pipe_buf_operations { + /* + * Get a reference to the pipe buffer. + */ +- void (*get)(struct pipe_inode_info *, struct pipe_buffer *); ++ bool (*get)(struct pipe_inode_info *, struct pipe_buffer *); + }; + + /** + * pipe_buf_get - get a reference to a pipe_buffer + * @pipe: the pipe that the buffer belongs to + * @buf: the buffer to get a reference to ++ * ++ * Return: %true if the reference was successfully obtained. + */ +-static inline void pipe_buf_get(struct pipe_inode_info *pipe, ++static inline __must_check bool pipe_buf_get(struct pipe_inode_info *pipe, + struct pipe_buffer *buf) + { +- buf->ops->get(pipe, buf); ++ return buf->ops->get(pipe, buf); + } + + /** +@@ -178,7 +180,7 @@ struct pipe_inode_info *alloc_pipe_info( + void free_pipe_info(struct pipe_inode_info *); + + /* Generic pipe buffer ops functions */ +-void generic_pipe_buf_get(struct pipe_inode_info *, struct pipe_buffer *); ++bool generic_pipe_buf_get(struct pipe_inode_info *, struct pipe_buffer *); + int generic_pipe_buf_confirm(struct pipe_inode_info *, struct pipe_buffer *); + int generic_pipe_buf_steal(struct pipe_inode_info *, struct pipe_buffer *); + void generic_pipe_buf_release(struct pipe_inode_info *, struct pipe_buffer *); +--- a/kernel/trace/trace.c ++++ b/kernel/trace/trace.c +@@ -6145,12 +6145,16 @@ static void buffer_pipe_buf_release(stru + buf->private = 0; + } + +-static void buffer_pipe_buf_get(struct pipe_inode_info *pipe, ++static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe, + struct pipe_buffer *buf) + { + struct buffer_ref *ref = (struct buffer_ref *)buf->private; + ++ if (ref->ref > INT_MAX/2) ++ return false; ++ + ref->ref++; ++ return true; + } + + /* Pipe buffer operations for a buffer. */ diff --git a/queue-4.9/mm-gup-ensure-real-head-page-is-ref-counted-when-using-hugepages.patch b/queue-4.9/mm-gup-ensure-real-head-page-is-ref-counted-when-using-hugepages.patch new file mode 100644 index 00000000000..4e8ebd58c85 --- /dev/null +++ b/queue-4.9/mm-gup-ensure-real-head-page-is-ref-counted-when-using-hugepages.patch @@ -0,0 +1,104 @@ +From foo@baz Tue 04 Jun 2019 04:44:10 PM CEST +From: Punit Agrawal +Date: Thu, 6 Jul 2017 15:39:39 -0700 +Subject: mm, gup: ensure real head page is ref-counted when using hugepages + +From: Punit Agrawal + +commit d63206ee32b6e64b0e12d46e5d6004afd9913713 upstream. + +When speculatively taking references to a hugepage using +page_cache_add_speculative() in gup_huge_pmd(), it is assumed that the +page returned by pmd_page() is the head page. Although normally true, +this assumption doesn't hold when the hugepage comprises of successive +page table entries such as when using contiguous bit on arm64 at PTE or +PMD levels. + +This can be addressed by ensuring that the page passed to +page_cache_add_speculative() is the real head or by de-referencing the +head page within the function. + +We take the first approach to keep the usage pattern aligned with +page_cache_get_speculative() where users already pass the appropriate +page, i.e., the de-referenced head. + +Apply the same logic to fix gup_huge_[pud|pgd]() as well. + +[punit.agrawal@arm.com: fix arm64 ltp failure] + Link: http://lkml.kernel.org/r/20170619170145.25577-5-punit.agrawal@arm.com +Link: http://lkml.kernel.org/r/20170522133604.11392-3-punit.agrawal@arm.com +Signed-off-by: Punit Agrawal +Acked-by: Steve Capper +Cc: Michal Hocko +Cc: "Kirill A. Shutemov" +Cc: Aneesh Kumar K.V +Cc: Catalin Marinas +Cc: Will Deacon +Cc: Naoya Horiguchi +Cc: Mark Rutland +Cc: Hillf Danton +Cc: Mike Kravetz +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + mm/gup.c | 12 ++++++------ + 1 file changed, 6 insertions(+), 6 deletions(-) + +--- a/mm/gup.c ++++ b/mm/gup.c +@@ -1313,8 +1313,7 @@ static int gup_huge_pmd(pmd_t orig, pmd_ + return 0; + + refs = 0; +- head = pmd_page(orig); +- page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT); ++ page = pmd_page(orig) + ((addr & ~PMD_MASK) >> PAGE_SHIFT); + do { + pages[*nr] = page; + (*nr)++; +@@ -1322,6 +1321,7 @@ static int gup_huge_pmd(pmd_t orig, pmd_ + refs++; + } while (addr += PAGE_SIZE, addr != end); + ++ head = compound_head(pmd_page(orig)); + if (!page_cache_add_speculative(head, refs)) { + *nr -= refs; + return 0; +@@ -1347,8 +1347,7 @@ static int gup_huge_pud(pud_t orig, pud_ + return 0; + + refs = 0; +- head = pud_page(orig); +- page = head + ((addr & ~PUD_MASK) >> PAGE_SHIFT); ++ page = pud_page(orig) + ((addr & ~PUD_MASK) >> PAGE_SHIFT); + do { + pages[*nr] = page; + (*nr)++; +@@ -1356,6 +1355,7 @@ static int gup_huge_pud(pud_t orig, pud_ + refs++; + } while (addr += PAGE_SIZE, addr != end); + ++ head = compound_head(pud_page(orig)); + if (!page_cache_add_speculative(head, refs)) { + *nr -= refs; + return 0; +@@ -1382,8 +1382,7 @@ static int gup_huge_pgd(pgd_t orig, pgd_ + return 0; + + refs = 0; +- head = pgd_page(orig); +- page = head + ((addr & ~PGDIR_MASK) >> PAGE_SHIFT); ++ page = pgd_page(orig) + ((addr & ~PGDIR_MASK) >> PAGE_SHIFT); + do { + pages[*nr] = page; + (*nr)++; +@@ -1391,6 +1390,7 @@ static int gup_huge_pgd(pgd_t orig, pgd_ + refs++; + } while (addr += PAGE_SIZE, addr != end); + ++ head = compound_head(pgd_page(orig)); + if (!page_cache_add_speculative(head, refs)) { + *nr -= refs; + return 0; diff --git a/queue-4.9/mm-gup-remove-broken-vm_bug_on_page-compound-check-for-hugepages.patch b/queue-4.9/mm-gup-remove-broken-vm_bug_on_page-compound-check-for-hugepages.patch new file mode 100644 index 00000000000..70f08b978f1 --- /dev/null +++ b/queue-4.9/mm-gup-remove-broken-vm_bug_on_page-compound-check-for-hugepages.patch @@ -0,0 +1,70 @@ +From foo@baz Tue 04 Jun 2019 04:44:10 PM CEST +From: Will Deacon +Date: Thu, 6 Jul 2017 15:39:36 -0700 +Subject: mm, gup: remove broken VM_BUG_ON_PAGE compound check for hugepages + +From: Will Deacon + +commit a3e328556d41bb61c55f9dfcc62d6a826ea97b85 upstream. + +When operating on hugepages with DEBUG_VM enabled, the GUP code checks +the compound head for each tail page prior to calling +page_cache_add_speculative. This is broken, because on the fast-GUP +path (where we don't hold any page table locks) we can be racing with a +concurrent invocation of split_huge_page_to_list. + +split_huge_page_to_list deals with this race by using page_ref_freeze to +freeze the page and force concurrent GUPs to fail whilst the component +pages are modified. This modification includes clearing the +compound_head field for the tail pages, so checking this prior to a +successful call to page_cache_add_speculative can lead to false +positives: In fact, page_cache_add_speculative *already* has this check +once the page refcount has been successfully updated, so we can simply +remove the broken calls to VM_BUG_ON_PAGE. + +Link: http://lkml.kernel.org/r/20170522133604.11392-2-punit.agrawal@arm.com +Signed-off-by: Will Deacon +Signed-off-by: Punit Agrawal +Acked-by: Steve Capper +Acked-by: Kirill A. Shutemov +Cc: Aneesh Kumar K.V +Cc: Catalin Marinas +Cc: Naoya Horiguchi +Cc: Mark Rutland +Cc: Hillf Danton +Cc: Michal Hocko +Cc: Mike Kravetz +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + mm/gup.c | 3 --- + 1 file changed, 3 deletions(-) + +--- a/mm/gup.c ++++ b/mm/gup.c +@@ -1316,7 +1316,6 @@ static int gup_huge_pmd(pmd_t orig, pmd_ + head = pmd_page(orig); + page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT); + do { +- VM_BUG_ON_PAGE(compound_head(page) != head, page); + pages[*nr] = page; + (*nr)++; + page++; +@@ -1351,7 +1350,6 @@ static int gup_huge_pud(pud_t orig, pud_ + head = pud_page(orig); + page = head + ((addr & ~PUD_MASK) >> PAGE_SHIFT); + do { +- VM_BUG_ON_PAGE(compound_head(page) != head, page); + pages[*nr] = page; + (*nr)++; + page++; +@@ -1387,7 +1385,6 @@ static int gup_huge_pgd(pgd_t orig, pgd_ + head = pgd_page(orig); + page = head + ((addr & ~PGDIR_MASK) >> PAGE_SHIFT); + do { +- VM_BUG_ON_PAGE(compound_head(page) != head, page); + pages[*nr] = page; + (*nr)++; + page++; diff --git a/queue-4.9/mm-make-page-ref-count-overflow-check-tighter-and-more-explicit.patch b/queue-4.9/mm-make-page-ref-count-overflow-check-tighter-and-more-explicit.patch new file mode 100644 index 00000000000..0d2f5484d9e --- /dev/null +++ b/queue-4.9/mm-make-page-ref-count-overflow-check-tighter-and-more-explicit.patch @@ -0,0 +1,51 @@ +From foo@baz Tue 04 Jun 2019 04:44:10 PM CEST +From: Linus Torvalds +Date: Thu, 11 Apr 2019 10:06:20 -0700 +Subject: mm: make page ref count overflow check tighter and more explicit + +From: Linus Torvalds + +commit f958d7b528b1b40c44cfda5eabe2d82760d868c3 upstream. + +We have a VM_BUG_ON() to check that the page reference count doesn't +underflow (or get close to overflow) by checking the sign of the count. + +That's all fine, but we actually want to allow people to use a "get page +ref unless it's already very high" helper function, and we want that one +to use the sign of the page ref (without triggering this VM_BUG_ON). + +Change the VM_BUG_ON to only check for small underflows (or _very_ close +to overflowing), and ignore overflows which have strayed into negative +territory. + +Acked-by: Matthew Wilcox +Cc: Jann Horn +Signed-off-by: Linus Torvalds +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/mm.h | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +--- a/include/linux/mm.h ++++ b/include/linux/mm.h +@@ -763,6 +763,10 @@ static inline bool is_zone_device_page(c + } + #endif + ++/* 127: arbitrary random number, small enough to assemble well */ ++#define page_ref_zero_or_close_to_overflow(page) \ ++ ((unsigned int) page_ref_count(page) + 127u <= 127u) ++ + static inline void get_page(struct page *page) + { + page = compound_head(page); +@@ -770,7 +774,7 @@ static inline void get_page(struct page + * Getting a normal page or the head of a compound page + * requires to already have an elevated page->_refcount. + */ +- VM_BUG_ON_PAGE(page_ref_count(page) <= 0, page); ++ VM_BUG_ON_PAGE(page_ref_zero_or_close_to_overflow(page), page); + page_ref_inc(page); + + if (unlikely(is_zone_device_page(page))) diff --git a/queue-4.9/mm-prevent-get_user_pages-from-overflowing-page-refcount.patch b/queue-4.9/mm-prevent-get_user_pages-from-overflowing-page-refcount.patch new file mode 100644 index 00000000000..9d36255ebae --- /dev/null +++ b/queue-4.9/mm-prevent-get_user_pages-from-overflowing-page-refcount.patch @@ -0,0 +1,172 @@ +From foo@baz Tue 04 Jun 2019 04:44:10 PM CEST +From: Linus Torvalds +Date: Thu, 11 Apr 2019 10:49:19 -0700 +Subject: mm: prevent get_user_pages() from overflowing page refcount + +From: Linus Torvalds + +commit 8fde12ca79aff9b5ba951fce1a2641901b8d8e64 upstream. + +If the page refcount wraps around past zero, it will be freed while +there are still four billion references to it. One of the possible +avenues for an attacker to try to make this happen is by doing direct IO +on a page multiple times. This patch makes get_user_pages() refuse to +take a new page reference if there are already more than two billion +references to the page. + +Reported-by: Jann Horn +Acked-by: Matthew Wilcox +Signed-off-by: Linus Torvalds +[bwh: Backported to 4.9: + - Add the "err" variable in follow_hugetlb_page() + - Adjust context] +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + mm/gup.c | 45 ++++++++++++++++++++++++++++++++++----------- + mm/hugetlb.c | 16 +++++++++++++++- + 2 files changed, 49 insertions(+), 12 deletions(-) + +--- a/mm/gup.c ++++ b/mm/gup.c +@@ -153,7 +153,10 @@ retry: + } + + if (flags & FOLL_GET) { +- get_page(page); ++ if (unlikely(!try_get_page(page))) { ++ page = ERR_PTR(-ENOMEM); ++ goto out; ++ } + + /* drop the pgmap reference now that we hold the page */ + if (pgmap) { +@@ -292,7 +295,10 @@ struct page *follow_page_mask(struct vm_ + if (pmd_trans_unstable(pmd)) + ret = -EBUSY; + } else { +- get_page(page); ++ if (unlikely(!try_get_page(page))) { ++ spin_unlock(ptl); ++ return ERR_PTR(-ENOMEM); ++ } + spin_unlock(ptl); + lock_page(page); + ret = split_huge_page(page); +@@ -348,7 +354,10 @@ static int get_gate_page(struct mm_struc + goto unmap; + *page = pte_page(*pte); + } +- get_page(*page); ++ if (unlikely(!try_get_page(*page))) { ++ ret = -ENOMEM; ++ goto unmap; ++ } + out: + ret = 0; + unmap: +@@ -1231,6 +1240,20 @@ struct page *get_dump_page(unsigned long + */ + #ifdef CONFIG_HAVE_GENERIC_RCU_GUP + ++/* ++ * Return the compund head page with ref appropriately incremented, ++ * or NULL if that failed. ++ */ ++static inline struct page *try_get_compound_head(struct page *page, int refs) ++{ ++ struct page *head = compound_head(page); ++ if (WARN_ON_ONCE(page_ref_count(head) < 0)) ++ return NULL; ++ if (unlikely(!page_cache_add_speculative(head, refs))) ++ return NULL; ++ return head; ++} ++ + #ifdef __HAVE_ARCH_PTE_SPECIAL + static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end, + int write, struct page **pages, int *nr) +@@ -1263,9 +1286,9 @@ static int gup_pte_range(pmd_t pmd, unsi + + VM_BUG_ON(!pfn_valid(pte_pfn(pte))); + page = pte_page(pte); +- head = compound_head(page); + +- if (!page_cache_get_speculative(head)) ++ head = try_get_compound_head(page, 1); ++ if (!head) + goto pte_unmap; + + if (unlikely(pte_val(pte) != pte_val(*ptep))) { +@@ -1321,8 +1344,8 @@ static int gup_huge_pmd(pmd_t orig, pmd_ + refs++; + } while (addr += PAGE_SIZE, addr != end); + +- head = compound_head(pmd_page(orig)); +- if (!page_cache_add_speculative(head, refs)) { ++ head = try_get_compound_head(pmd_page(orig), refs); ++ if (!head) { + *nr -= refs; + return 0; + } +@@ -1355,8 +1378,8 @@ static int gup_huge_pud(pud_t orig, pud_ + refs++; + } while (addr += PAGE_SIZE, addr != end); + +- head = compound_head(pud_page(orig)); +- if (!page_cache_add_speculative(head, refs)) { ++ head = try_get_compound_head(pud_page(orig), refs); ++ if (!head) { + *nr -= refs; + return 0; + } +@@ -1390,8 +1413,8 @@ static int gup_huge_pgd(pgd_t orig, pgd_ + refs++; + } while (addr += PAGE_SIZE, addr != end); + +- head = compound_head(pgd_page(orig)); +- if (!page_cache_add_speculative(head, refs)) { ++ head = try_get_compound_head(pgd_page(orig), refs); ++ if (!head) { + *nr -= refs; + return 0; + } +--- a/mm/hugetlb.c ++++ b/mm/hugetlb.c +@@ -3984,6 +3984,7 @@ long follow_hugetlb_page(struct mm_struc + unsigned long vaddr = *position; + unsigned long remainder = *nr_pages; + struct hstate *h = hstate_vma(vma); ++ int err = -EFAULT; + + while (vaddr < vma->vm_end && remainder) { + pte_t *pte; +@@ -4055,6 +4056,19 @@ long follow_hugetlb_page(struct mm_struc + + pfn_offset = (vaddr & ~huge_page_mask(h)) >> PAGE_SHIFT; + page = pte_page(huge_ptep_get(pte)); ++ ++ /* ++ * Instead of doing 'try_get_page()' below in the same_page ++ * loop, just check the count once here. ++ */ ++ if (unlikely(page_count(page) <= 0)) { ++ if (pages) { ++ spin_unlock(ptl); ++ remainder = 0; ++ err = -ENOMEM; ++ break; ++ } ++ } + same_page: + if (pages) { + pages[i] = mem_map_offset(page, pfn_offset); +@@ -4081,7 +4095,7 @@ same_page: + *nr_pages = remainder; + *position = vaddr; + +- return i ? i : -EFAULT; ++ return i ? i : err; + } + + #ifndef __HAVE_ARCH_FLUSH_HUGETLB_TLB_RANGE diff --git a/queue-4.9/series b/queue-4.9/series index b30551eed28..151f715b326 100644 --- a/queue-4.9/series +++ b/queue-4.9/series @@ -47,3 +47,14 @@ cifs-cifs_read_allocate_pages-don-t-iterate-through-whole-page-array-on-enomem.p gcc-plugins-fix-build-failures-under-darwin-host.patch drm-vmwgfx-don-t-send-drm-sysfs-hotplug-events-on-initial-master-set.patch drm-rockchip-shutdown-drm-subsystem-on-shutdown.patch +brcmfmac-add-length-checks-in-scheduled-scan-result-handler.patch +brcmfmac-assure-ssid-length-from-firmware-is-limited.patch +brcmfmac-add-subtype-check-for-event-handling-in-data-path.patch +binder-replace-p-with-pk-for-stable.patch +binder-replace-p-with-pk.patch +fs-prevent-page-refcount-overflow-in-pipe_buf_get.patch +mm-gup-remove-broken-vm_bug_on_page-compound-check-for-hugepages.patch +mm-gup-ensure-real-head-page-is-ref-counted-when-using-hugepages.patch +mm-prevent-get_user_pages-from-overflowing-page-refcount.patch +mm-make-page-ref-count-overflow-check-tighter-and-more-explicit.patch +coredump-fix-race-condition-between-mmget_not_zero-get_task_mm-and-core-dumping.patch -- 2.47.2