From: Sasha Levin Date: Mon, 7 Sep 2020 21:46:39 +0000 (-0400) Subject: Fixes for 4.9 X-Git-Tag: v4.14.197~27^2~7 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=761a83a360aed7f2bd09940f726def1ee0791b61;p=thirdparty%2Fkernel%2Fstable-queue.git Fixes for 4.9 Signed-off-by: Sasha Levin --- diff --git a/queue-4.9/btrfs-fix-potential-deadlock-in-the-search-ioctl.patch b/queue-4.9/btrfs-fix-potential-deadlock-in-the-search-ioctl.patch new file mode 100644 index 00000000000..c89a592fda0 --- /dev/null +++ b/queue-4.9/btrfs-fix-potential-deadlock-in-the-search-ioctl.patch @@ -0,0 +1,227 @@ +From 653ed1dbeff5ee16782ac8e3927ffe0e79472610 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 10 Aug 2020 11:42:27 -0400 +Subject: btrfs: fix potential deadlock in the search ioctl + +From: Josef Bacik + +[ Upstream commit a48b73eca4ceb9b8a4b97f290a065335dbcd8a04 ] + +With the conversion of the tree locks to rwsem I got the following +lockdep splat: + + ====================================================== + WARNING: possible circular locking dependency detected + 5.8.0-rc7-00165-g04ec4da5f45f-dirty #922 Not tainted + ------------------------------------------------------ + compsize/11122 is trying to acquire lock: + ffff889fabca8768 (&mm->mmap_lock#2){++++}-{3:3}, at: __might_fault+0x3e/0x90 + + but task is already holding lock: + ffff889fe720fe40 (btrfs-fs-00){++++}-{3:3}, at: __btrfs_tree_read_lock+0x39/0x180 + + which lock already depends on the new lock. + + the existing dependency chain (in reverse order) is: + + -> #2 (btrfs-fs-00){++++}-{3:3}: + down_write_nested+0x3b/0x70 + __btrfs_tree_lock+0x24/0x120 + btrfs_search_slot+0x756/0x990 + btrfs_lookup_inode+0x3a/0xb4 + __btrfs_update_delayed_inode+0x93/0x270 + btrfs_async_run_delayed_root+0x168/0x230 + btrfs_work_helper+0xd4/0x570 + process_one_work+0x2ad/0x5f0 + worker_thread+0x3a/0x3d0 + kthread+0x133/0x150 + ret_from_fork+0x1f/0x30 + + -> #1 (&delayed_node->mutex){+.+.}-{3:3}: + __mutex_lock+0x9f/0x930 + btrfs_delayed_update_inode+0x50/0x440 + btrfs_update_inode+0x8a/0xf0 + btrfs_dirty_inode+0x5b/0xd0 + touch_atime+0xa1/0xd0 + btrfs_file_mmap+0x3f/0x60 + mmap_region+0x3a4/0x640 + do_mmap+0x376/0x580 + vm_mmap_pgoff+0xd5/0x120 + ksys_mmap_pgoff+0x193/0x230 + do_syscall_64+0x50/0x90 + entry_SYSCALL_64_after_hwframe+0x44/0xa9 + + -> #0 (&mm->mmap_lock#2){++++}-{3:3}: + __lock_acquire+0x1272/0x2310 + lock_acquire+0x9e/0x360 + __might_fault+0x68/0x90 + _copy_to_user+0x1e/0x80 + copy_to_sk.isra.32+0x121/0x300 + search_ioctl+0x106/0x200 + btrfs_ioctl_tree_search_v2+0x7b/0xf0 + btrfs_ioctl+0x106f/0x30a0 + ksys_ioctl+0x83/0xc0 + __x64_sys_ioctl+0x16/0x20 + do_syscall_64+0x50/0x90 + entry_SYSCALL_64_after_hwframe+0x44/0xa9 + + other info that might help us debug this: + + Chain exists of: + &mm->mmap_lock#2 --> &delayed_node->mutex --> btrfs-fs-00 + + Possible unsafe locking scenario: + + CPU0 CPU1 + ---- ---- + lock(btrfs-fs-00); + lock(&delayed_node->mutex); + lock(btrfs-fs-00); + lock(&mm->mmap_lock#2); + + *** DEADLOCK *** + + 1 lock held by compsize/11122: + #0: ffff889fe720fe40 (btrfs-fs-00){++++}-{3:3}, at: __btrfs_tree_read_lock+0x39/0x180 + + stack backtrace: + CPU: 17 PID: 11122 Comm: compsize Kdump: loaded Not tainted 5.8.0-rc7-00165-g04ec4da5f45f-dirty #922 + Hardware name: Quanta Tioga Pass Single Side 01-0030993006/Tioga Pass Single Side, BIOS F08_3A18 12/20/2018 + Call Trace: + dump_stack+0x78/0xa0 + check_noncircular+0x165/0x180 + __lock_acquire+0x1272/0x2310 + lock_acquire+0x9e/0x360 + ? __might_fault+0x3e/0x90 + ? find_held_lock+0x72/0x90 + __might_fault+0x68/0x90 + ? __might_fault+0x3e/0x90 + _copy_to_user+0x1e/0x80 + copy_to_sk.isra.32+0x121/0x300 + ? btrfs_search_forward+0x2a6/0x360 + search_ioctl+0x106/0x200 + btrfs_ioctl_tree_search_v2+0x7b/0xf0 + btrfs_ioctl+0x106f/0x30a0 + ? __do_sys_newfstat+0x5a/0x70 + ? ksys_ioctl+0x83/0xc0 + ksys_ioctl+0x83/0xc0 + __x64_sys_ioctl+0x16/0x20 + do_syscall_64+0x50/0x90 + entry_SYSCALL_64_after_hwframe+0x44/0xa9 + +The problem is we're doing a copy_to_user() while holding tree locks, +which can deadlock if we have to do a page fault for the copy_to_user(). +This exists even without my locking changes, so it needs to be fixed. +Rework the search ioctl to do the pre-fault and then +copy_to_user_nofault for the copying. + +CC: stable@vger.kernel.org # 4.4+ +Reviewed-by: Filipe Manana +Signed-off-by: Josef Bacik +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Sasha Levin +--- + fs/btrfs/extent_io.c | 8 ++++---- + fs/btrfs/extent_io.h | 6 +++--- + fs/btrfs/ioctl.c | 27 ++++++++++++++++++++------- + 3 files changed, 27 insertions(+), 14 deletions(-) + +diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c +index fa22bb29eee6f..d6c827a9ebc56 100644 +--- a/fs/btrfs/extent_io.c ++++ b/fs/btrfs/extent_io.c +@@ -5488,9 +5488,9 @@ void read_extent_buffer(const struct extent_buffer *eb, void *dstv, + } + } + +-int read_extent_buffer_to_user(const struct extent_buffer *eb, +- void __user *dstv, +- unsigned long start, unsigned long len) ++int read_extent_buffer_to_user_nofault(const struct extent_buffer *eb, ++ void __user *dstv, ++ unsigned long start, unsigned long len) + { + size_t cur; + size_t offset; +@@ -5511,7 +5511,7 @@ int read_extent_buffer_to_user(const struct extent_buffer *eb, + + cur = min(len, (PAGE_SIZE - offset)); + kaddr = page_address(page); +- if (copy_to_user(dst, kaddr + offset, cur)) { ++ if (probe_user_write(dst, kaddr + offset, cur)) { + ret = -EFAULT; + break; + } +diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h +index 9ecdc9584df77..75c03aa1800fe 100644 +--- a/fs/btrfs/extent_io.h ++++ b/fs/btrfs/extent_io.h +@@ -401,9 +401,9 @@ int memcmp_extent_buffer(const struct extent_buffer *eb, const void *ptrv, + void read_extent_buffer(const struct extent_buffer *eb, void *dst, + unsigned long start, + unsigned long len); +-int read_extent_buffer_to_user(const struct extent_buffer *eb, +- void __user *dst, unsigned long start, +- unsigned long len); ++int read_extent_buffer_to_user_nofault(const struct extent_buffer *eb, ++ void __user *dst, unsigned long start, ++ unsigned long len); + void write_extent_buffer(struct extent_buffer *eb, const void *src, + unsigned long start, unsigned long len); + void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src, +diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c +index eefe103c65daa..6db46daeed16b 100644 +--- a/fs/btrfs/ioctl.c ++++ b/fs/btrfs/ioctl.c +@@ -2041,9 +2041,14 @@ static noinline int copy_to_sk(struct btrfs_path *path, + sh.len = item_len; + sh.transid = found_transid; + +- /* copy search result header */ +- if (copy_to_user(ubuf + *sk_offset, &sh, sizeof(sh))) { +- ret = -EFAULT; ++ /* ++ * Copy search result header. If we fault then loop again so we ++ * can fault in the pages and -EFAULT there if there's a ++ * problem. Otherwise we'll fault and then copy the buffer in ++ * properly this next time through ++ */ ++ if (probe_user_write(ubuf + *sk_offset, &sh, sizeof(sh))) { ++ ret = 0; + goto out; + } + +@@ -2051,10 +2056,14 @@ static noinline int copy_to_sk(struct btrfs_path *path, + + if (item_len) { + char __user *up = ubuf + *sk_offset; +- /* copy the item */ +- if (read_extent_buffer_to_user(leaf, up, +- item_off, item_len)) { +- ret = -EFAULT; ++ /* ++ * Copy the item, same behavior as above, but reset the ++ * * sk_offset so we copy the full thing again. ++ */ ++ if (read_extent_buffer_to_user_nofault(leaf, up, ++ item_off, item_len)) { ++ ret = 0; ++ *sk_offset -= sizeof(sh); + goto out; + } + +@@ -2142,6 +2151,10 @@ static noinline int search_ioctl(struct inode *inode, + key.offset = sk->min_offset; + + while (1) { ++ ret = fault_in_pages_writeable(ubuf, *buf_size - sk_offset); ++ if (ret) ++ break; ++ + ret = btrfs_search_forward(root, &key, path, sk->min_transid); + if (ret != 0) { + if (ret > 0) +-- +2.25.1 + diff --git a/queue-4.9/btrfs-remove-extraneous-extent_buffer_get-from-tree_.patch b/queue-4.9/btrfs-remove-extraneous-extent_buffer_get-from-tree_.patch new file mode 100644 index 00000000000..d1acba2b59b --- /dev/null +++ b/queue-4.9/btrfs-remove-extraneous-extent_buffer_get-from-tree_.patch @@ -0,0 +1,43 @@ +From a82b289db5fdd2692f61d2d55418c84302960120 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 15 Aug 2018 18:26:54 +0300 +Subject: btrfs: Remove extraneous extent_buffer_get from tree_mod_log_rewind + +From: Nikolay Borisov + +[ Upstream commit 24cee18a1c1d7c731ea5987e0c99daea22ae7f4a ] + +When a rewound buffer is created it already has a ref count of 1 and the +dummy flag set. Then another ref is taken bumping the count to 2. +Finally when this buffer is released from btrfs_release_path the extra +reference is decremented by the special handling code in +free_extent_buffer. + +However, this special code is in fact redundant sinca ref count of 1 is +still correct since the buffer is only accessed via btrfs_path struct. +This paves the way forward of removing the special handling in +free_extent_buffer. + +Signed-off-by: Nikolay Borisov +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Sasha Levin +--- + fs/btrfs/ctree.c | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c +index 78d4c8c22b4ac..406ae49baa076 100644 +--- a/fs/btrfs/ctree.c ++++ b/fs/btrfs/ctree.c +@@ -1360,7 +1360,6 @@ tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct btrfs_path *path, + btrfs_tree_read_unlock_blocking(eb); + free_extent_buffer(eb); + +- extent_buffer_get(eb_rewin); + btrfs_tree_read_lock(eb_rewin); + __tree_mod_log_rewind(fs_info, eb_rewin, time_seq, tm); + WARN_ON(btrfs_header_nritems(eb_rewin) > +-- +2.25.1 + diff --git a/queue-4.9/btrfs-remove-redundant-extent_buffer_get-in-get_old_.patch b/queue-4.9/btrfs-remove-redundant-extent_buffer_get-in-get_old_.patch new file mode 100644 index 00000000000..c6d1cdd1a70 --- /dev/null +++ b/queue-4.9/btrfs-remove-redundant-extent_buffer_get-in-get_old_.patch @@ -0,0 +1,42 @@ +From 26cee799d11b5fa06dc2b0cfba0302a76dbf8564 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 15 Aug 2018 18:26:53 +0300 +Subject: btrfs: Remove redundant extent_buffer_get in get_old_root + +From: Nikolay Borisov + +[ Upstream commit 6c122e2a0c515cfb3f3a9cefb5dad4cb62109c78 ] + +get_old_root used used only by btrfs_search_old_slot to initialise the +path structure. The old root is always a cloned buffer (either via alloc +dummy or via btrfs_clone_extent_buffer) and its reference count is 2: 1 +from allocation, 1 from extent_buffer_get call in get_old_root. + +This latter explicit ref count acquire operation is in fact unnecessary +since the semantic is such that the newly allocated buffer is handed +over to the btrfs_path for lifetime management. Considering this just +remove the extra extent_buffer_get in get_old_root. + +Signed-off-by: Nikolay Borisov +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Sasha Levin +--- + fs/btrfs/ctree.c | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c +index b5ebb43b1824f..78d4c8c22b4ac 100644 +--- a/fs/btrfs/ctree.c ++++ b/fs/btrfs/ctree.c +@@ -1430,7 +1430,6 @@ get_old_root(struct btrfs_root *root, u64 time_seq) + + if (!eb) + return NULL; +- extent_buffer_get(eb); + btrfs_tree_read_lock(eb); + if (old_root) { + btrfs_set_header_bytenr(eb, eb->start); +-- +2.25.1 + diff --git a/queue-4.9/btrfs-set-the-lockdep-class-for-log-tree-extent-buff.patch b/queue-4.9/btrfs-set-the-lockdep-class-for-log-tree-extent-buff.patch new file mode 100644 index 00000000000..8f86e4ce09a --- /dev/null +++ b/queue-4.9/btrfs-set-the-lockdep-class-for-log-tree-extent-buff.patch @@ -0,0 +1,59 @@ +From e71bcfe20072cccaf68f486e2b7b1d34e737b890 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 10 Aug 2020 11:42:31 -0400 +Subject: btrfs: set the lockdep class for log tree extent buffers + +From: Josef Bacik + +[ Upstream commit d3beaa253fd6fa40b8b18a216398e6e5376a9d21 ] + +These are special extent buffers that get rewound in order to lookup +the state of the tree at a specific point in time. As such they do not +go through the normal initialization paths that set their lockdep class, +so handle them appropriately when they are created and before they are +locked. + +CC: stable@vger.kernel.org # 4.4+ +Reviewed-by: Filipe Manana +Signed-off-by: Josef Bacik +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Sasha Levin +--- + fs/btrfs/ctree.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c +index 406ae49baa076..65689cbc362db 100644 +--- a/fs/btrfs/ctree.c ++++ b/fs/btrfs/ctree.c +@@ -1360,6 +1360,8 @@ tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct btrfs_path *path, + btrfs_tree_read_unlock_blocking(eb); + free_extent_buffer(eb); + ++ btrfs_set_buffer_lockdep_class(btrfs_header_owner(eb_rewin), ++ eb_rewin, btrfs_header_level(eb_rewin)); + btrfs_tree_read_lock(eb_rewin); + __tree_mod_log_rewind(fs_info, eb_rewin, time_seq, tm); + WARN_ON(btrfs_header_nritems(eb_rewin) > +@@ -1429,7 +1431,6 @@ get_old_root(struct btrfs_root *root, u64 time_seq) + + if (!eb) + return NULL; +- btrfs_tree_read_lock(eb); + if (old_root) { + btrfs_set_header_bytenr(eb, eb->start); + btrfs_set_header_backref_rev(eb, BTRFS_MIXED_BACKREF_REV); +@@ -1437,6 +1438,9 @@ get_old_root(struct btrfs_root *root, u64 time_seq) + btrfs_set_header_level(eb, old_root->level); + btrfs_set_header_generation(eb, old_generation); + } ++ btrfs_set_buffer_lockdep_class(btrfs_header_owner(eb), eb, ++ btrfs_header_level(eb)); ++ btrfs_tree_read_lock(eb); + if (tm) + __tree_mod_log_rewind(root->fs_info, eb, time_seq, tm); + else +-- +2.25.1 + diff --git a/queue-4.9/net-usb-qmi_wwan-add-telit-0x1050-composition.patch b/queue-4.9/net-usb-qmi_wwan-add-telit-0x1050-composition.patch new file mode 100644 index 00000000000..919ce6d91df --- /dev/null +++ b/queue-4.9/net-usb-qmi_wwan-add-telit-0x1050-composition.patch @@ -0,0 +1,39 @@ +From df17609fd5edfef13305089a81056497d2ff7351 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 9 Oct 2019 11:07:18 +0200 +Subject: net: usb: qmi_wwan: add Telit 0x1050 composition +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Daniele Palmas + +[ Upstream commit e0ae2c578d3909e60e9448207f5d83f785f1129f ] + +This patch adds support for Telit FN980 0x1050 composition + +0x1050: tty, adb, rmnet, tty, tty, tty, tty + +Signed-off-by: Daniele Palmas +Acked-by: Bjørn Mork +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/usb/qmi_wwan.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c +index 254a27295f41d..97a83d351a100 100644 +--- a/drivers/net/usb/qmi_wwan.c ++++ b/drivers/net/usb/qmi_wwan.c +@@ -923,6 +923,7 @@ static const struct usb_device_id products[] = { + {QMI_FIXED_INTF(0x2357, 0x9000, 4)}, /* TP-LINK MA260 */ + {QMI_QUIRK_SET_DTR(0x1bc7, 0x1031, 3)}, /* Telit LE910C1-EUX */ + {QMI_QUIRK_SET_DTR(0x1bc7, 0x1040, 2)}, /* Telit LE922A */ ++ {QMI_QUIRK_SET_DTR(0x1bc7, 0x1050, 2)}, /* Telit FN980 */ + {QMI_FIXED_INTF(0x1bc7, 0x1100, 3)}, /* Telit ME910 */ + {QMI_FIXED_INTF(0x1bc7, 0x1101, 3)}, /* Telit ME910 dual modem */ + {QMI_FIXED_INTF(0x1bc7, 0x1200, 5)}, /* Telit LE920 */ +-- +2.25.1 + diff --git a/queue-4.9/series b/queue-4.9/series index 3d46e3ca156..08079bd8b3d 100644 --- a/queue-4.9/series +++ b/queue-4.9/series @@ -28,3 +28,10 @@ iommu-vt-d-serialize-iommu-gcmd-register-modificatio.patch thermal-ti-soc-thermal-fix-bogus-thermal-shutdowns-f.patch include-linux-log2.h-add-missing-around-n-in-roundup.patch btrfs-drop-path-before-adding-new-uuid-tree-entry.patch +btrfs-remove-redundant-extent_buffer_get-in-get_old_.patch +btrfs-remove-extraneous-extent_buffer_get-from-tree_.patch +btrfs-set-the-lockdep-class-for-log-tree-extent-buff.patch +uaccess-add-non-pagefault-user-space-read-functions.patch +uaccess-add-non-pagefault-user-space-write-function.patch +btrfs-fix-potential-deadlock-in-the-search-ioctl.patch +net-usb-qmi_wwan-add-telit-0x1050-composition.patch diff --git a/queue-4.9/uaccess-add-non-pagefault-user-space-read-functions.patch b/queue-4.9/uaccess-add-non-pagefault-user-space-read-functions.patch new file mode 100644 index 00000000000..08179415cee --- /dev/null +++ b/queue-4.9/uaccess-add-non-pagefault-user-space-read-functions.patch @@ -0,0 +1,229 @@ +From e27cc7f30a73ce56fafeef2f19d9a8b3348e1d1f Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 15 May 2019 14:38:18 +0900 +Subject: uaccess: Add non-pagefault user-space read functions + +From: Masami Hiramatsu + +[ Upstream commit 3d7081822f7f9eab867d9bcc8fd635208ec438e0 ] + +Add probe_user_read(), strncpy_from_unsafe_user() and +strnlen_unsafe_user() which allows caller to access user-space +in IRQ context. + +Current probe_kernel_read() and strncpy_from_unsafe() are +not available for user-space memory, because it sets +KERNEL_DS while accessing data. On some arch, user address +space and kernel address space can be co-exist, but others +can not. In that case, setting KERNEL_DS means given +address is treated as a kernel address space. +Also strnlen_user() is only available from user context since +it can sleep if pagefault is enabled. + +To access user-space memory without pagefault, we need +these new functions which sets USER_DS while accessing +the data. + +Link: http://lkml.kernel.org/r/155789869802.26965.4940338412595759063.stgit@devnote2 + +Acked-by: Ingo Molnar +Signed-off-by: Masami Hiramatsu +Signed-off-by: Steven Rostedt (VMware) +Signed-off-by: Sasha Levin +--- + include/linux/uaccess.h | 14 +++++ + mm/maccess.c | 122 ++++++++++++++++++++++++++++++++++++++-- + 2 files changed, 130 insertions(+), 6 deletions(-) + +diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h +index 9442423979c1c..6d27d58ca4e04 100644 +--- a/include/linux/uaccess.h ++++ b/include/linux/uaccess.h +@@ -90,6 +90,17 @@ static inline unsigned long __copy_from_user_nocache(void *to, + extern long probe_kernel_read(void *dst, const void *src, size_t size); + extern long __probe_kernel_read(void *dst, const void *src, size_t size); + ++/* ++ * probe_user_read(): safely attempt to read from a location in user space ++ * @dst: pointer to the buffer that shall take the data ++ * @src: address to read from ++ * @size: size of the data chunk ++ * ++ * Safely read from address @src to the buffer at @dst. If a kernel fault ++ * happens, handle that and return -EFAULT. ++ */ ++extern long probe_user_read(void *dst, const void __user *src, size_t size); ++ + /* + * probe_kernel_write(): safely attempt to write to a location + * @dst: address to write to +@@ -103,6 +114,9 @@ extern long notrace probe_kernel_write(void *dst, const void *src, size_t size); + extern long notrace __probe_kernel_write(void *dst, const void *src, size_t size); + + extern long strncpy_from_unsafe(char *dst, const void *unsafe_addr, long count); ++extern long strncpy_from_unsafe_user(char *dst, const void __user *unsafe_addr, ++ long count); ++extern long strnlen_unsafe_user(const void __user *unsafe_addr, long count); + + /** + * probe_kernel_address(): safely attempt to read from a location +diff --git a/mm/maccess.c b/mm/maccess.c +index 78f9274dd49d0..5ebf9a3ca3674 100644 +--- a/mm/maccess.c ++++ b/mm/maccess.c +@@ -5,8 +5,20 @@ + #include + #include + ++static __always_inline long ++probe_read_common(void *dst, const void __user *src, size_t size) ++{ ++ long ret; ++ ++ pagefault_disable(); ++ ret = __copy_from_user_inatomic(dst, src, size); ++ pagefault_enable(); ++ ++ return ret ? -EFAULT : 0; ++} ++ + /** +- * probe_kernel_read(): safely attempt to read from a location ++ * probe_kernel_read(): safely attempt to read from a kernel-space location + * @dst: pointer to the buffer that shall take the data + * @src: address to read from + * @size: size of the data chunk +@@ -29,16 +41,40 @@ long __probe_kernel_read(void *dst, const void *src, size_t size) + mm_segment_t old_fs = get_fs(); + + set_fs(KERNEL_DS); +- pagefault_disable(); +- ret = __copy_from_user_inatomic(dst, +- (__force const void __user *)src, size); +- pagefault_enable(); ++ ret = probe_read_common(dst, (__force const void __user *)src, size); + set_fs(old_fs); + +- return ret ? -EFAULT : 0; ++ return ret; + } + EXPORT_SYMBOL_GPL(probe_kernel_read); + ++/** ++ * probe_user_read(): safely attempt to read from a user-space location ++ * @dst: pointer to the buffer that shall take the data ++ * @src: address to read from. This must be a user address. ++ * @size: size of the data chunk ++ * ++ * Safely read from user address @src to the buffer at @dst. If a kernel fault ++ * happens, handle that and return -EFAULT. ++ */ ++ ++long __weak probe_user_read(void *dst, const void __user *src, size_t size) ++ __attribute__((alias("__probe_user_read"))); ++ ++long __probe_user_read(void *dst, const void __user *src, size_t size) ++{ ++ long ret = -EFAULT; ++ mm_segment_t old_fs = get_fs(); ++ ++ set_fs(USER_DS); ++ if (access_ok(VERIFY_READ, src, size)) ++ ret = probe_read_common(dst, src, size); ++ set_fs(old_fs); ++ ++ return ret; ++} ++EXPORT_SYMBOL_GPL(probe_user_read); ++ + /** + * probe_kernel_write(): safely attempt to write to a location + * @dst: address to write to +@@ -66,6 +102,7 @@ long __probe_kernel_write(void *dst, const void *src, size_t size) + } + EXPORT_SYMBOL_GPL(probe_kernel_write); + ++ + /** + * strncpy_from_unsafe: - Copy a NUL terminated string from unsafe address. + * @dst: Destination address, in kernel space. This buffer must be at +@@ -105,3 +142,76 @@ long strncpy_from_unsafe(char *dst, const void *unsafe_addr, long count) + + return ret ? -EFAULT : src - unsafe_addr; + } ++ ++/** ++ * strncpy_from_unsafe_user: - Copy a NUL terminated string from unsafe user ++ * address. ++ * @dst: Destination address, in kernel space. This buffer must be at ++ * least @count bytes long. ++ * @unsafe_addr: Unsafe user address. ++ * @count: Maximum number of bytes to copy, including the trailing NUL. ++ * ++ * Copies a NUL-terminated string from unsafe user address to kernel buffer. ++ * ++ * On success, returns the length of the string INCLUDING the trailing NUL. ++ * ++ * If access fails, returns -EFAULT (some data may have been copied ++ * and the trailing NUL added). ++ * ++ * If @count is smaller than the length of the string, copies @count-1 bytes, ++ * sets the last byte of @dst buffer to NUL and returns @count. ++ */ ++long strncpy_from_unsafe_user(char *dst, const void __user *unsafe_addr, ++ long count) ++{ ++ mm_segment_t old_fs = get_fs(); ++ long ret; ++ ++ if (unlikely(count <= 0)) ++ return 0; ++ ++ set_fs(USER_DS); ++ pagefault_disable(); ++ ret = strncpy_from_user(dst, unsafe_addr, count); ++ pagefault_enable(); ++ set_fs(old_fs); ++ ++ if (ret >= count) { ++ ret = count; ++ dst[ret - 1] = '\0'; ++ } else if (ret > 0) { ++ ret++; ++ } ++ ++ return ret; ++} ++ ++/** ++ * strnlen_unsafe_user: - Get the size of a user string INCLUDING final NUL. ++ * @unsafe_addr: The string to measure. ++ * @count: Maximum count (including NUL) ++ * ++ * Get the size of a NUL-terminated string in user space without pagefault. ++ * ++ * Returns the size of the string INCLUDING the terminating NUL. ++ * ++ * If the string is too long, returns a number larger than @count. User ++ * has to check the return value against "> count". ++ * On exception (or invalid count), returns 0. ++ * ++ * Unlike strnlen_user, this can be used from IRQ handler etc. because ++ * it disables pagefaults. ++ */ ++long strnlen_unsafe_user(const void __user *unsafe_addr, long count) ++{ ++ mm_segment_t old_fs = get_fs(); ++ int ret; ++ ++ set_fs(USER_DS); ++ pagefault_disable(); ++ ret = strnlen_user(unsafe_addr, count); ++ pagefault_enable(); ++ set_fs(old_fs); ++ ++ return ret; ++} +-- +2.25.1 + diff --git a/queue-4.9/uaccess-add-non-pagefault-user-space-write-function.patch b/queue-4.9/uaccess-add-non-pagefault-user-space-write-function.patch new file mode 100644 index 00000000000..054dc85e86f --- /dev/null +++ b/queue-4.9/uaccess-add-non-pagefault-user-space-write-function.patch @@ -0,0 +1,130 @@ +From 408ea363d4f73136e5711defe2673ed3a69bae77 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 2 Nov 2019 00:17:56 +0100 +Subject: uaccess: Add non-pagefault user-space write function + +From: Daniel Borkmann + +[ Upstream commit 1d1585ca0f48fe7ed95c3571f3e4a82b2b5045dc ] + +Commit 3d7081822f7f ("uaccess: Add non-pagefault user-space read functions") +missed to add probe write function, therefore factor out a probe_write_common() +helper with most logic of probe_kernel_write() except setting KERNEL_DS, and +add a new probe_user_write() helper so it can be used from BPF side. + +Again, on some archs, the user address space and kernel address space can +co-exist and be overlapping, so in such case, setting KERNEL_DS would mean +that the given address is treated as being in kernel address space. + +Signed-off-by: Daniel Borkmann +Signed-off-by: Alexei Starovoitov +Acked-by: Andrii Nakryiko +Cc: Masami Hiramatsu +Link: https://lore.kernel.org/bpf/9df2542e68141bfa3addde631441ee45503856a8.1572649915.git.daniel@iogearbox.net +Signed-off-by: Sasha Levin +--- + include/linux/uaccess.h | 12 +++++++++++ + mm/maccess.c | 45 +++++++++++++++++++++++++++++++++++++---- + 2 files changed, 53 insertions(+), 4 deletions(-) + +diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h +index 6d27d58ca4e04..cc5ba47062e87 100644 +--- a/include/linux/uaccess.h ++++ b/include/linux/uaccess.h +@@ -113,6 +113,18 @@ extern long probe_user_read(void *dst, const void __user *src, size_t size); + extern long notrace probe_kernel_write(void *dst, const void *src, size_t size); + extern long notrace __probe_kernel_write(void *dst, const void *src, size_t size); + ++/* ++ * probe_user_write(): safely attempt to write to a location in user space ++ * @dst: address to write to ++ * @src: pointer to the data that shall be written ++ * @size: size of the data chunk ++ * ++ * Safely write to address @dst from the buffer at @src. If a kernel fault ++ * happens, handle that and return -EFAULT. ++ */ ++extern long notrace probe_user_write(void __user *dst, const void *src, size_t size); ++extern long notrace __probe_user_write(void __user *dst, const void *src, size_t size); ++ + extern long strncpy_from_unsafe(char *dst, const void *unsafe_addr, long count); + extern long strncpy_from_unsafe_user(char *dst, const void __user *unsafe_addr, + long count); +diff --git a/mm/maccess.c b/mm/maccess.c +index 5ebf9a3ca3674..03ea550f5a743 100644 +--- a/mm/maccess.c ++++ b/mm/maccess.c +@@ -17,6 +17,18 @@ probe_read_common(void *dst, const void __user *src, size_t size) + return ret ? -EFAULT : 0; + } + ++static __always_inline long ++probe_write_common(void __user *dst, const void *src, size_t size) ++{ ++ long ret; ++ ++ pagefault_disable(); ++ ret = __copy_to_user_inatomic(dst, src, size); ++ pagefault_enable(); ++ ++ return ret ? -EFAULT : 0; ++} ++ + /** + * probe_kernel_read(): safely attempt to read from a kernel-space location + * @dst: pointer to the buffer that shall take the data +@@ -84,6 +96,7 @@ EXPORT_SYMBOL_GPL(probe_user_read); + * Safely write to address @dst from the buffer at @src. If a kernel fault + * happens, handle that and return -EFAULT. + */ ++ + long __weak probe_kernel_write(void *dst, const void *src, size_t size) + __attribute__((alias("__probe_kernel_write"))); + +@@ -93,15 +106,39 @@ long __probe_kernel_write(void *dst, const void *src, size_t size) + mm_segment_t old_fs = get_fs(); + + set_fs(KERNEL_DS); +- pagefault_disable(); +- ret = __copy_to_user_inatomic((__force void __user *)dst, src, size); +- pagefault_enable(); ++ ret = probe_write_common((__force void __user *)dst, src, size); + set_fs(old_fs); + +- return ret ? -EFAULT : 0; ++ return ret; + } + EXPORT_SYMBOL_GPL(probe_kernel_write); + ++/** ++ * probe_user_write(): safely attempt to write to a user-space location ++ * @dst: address to write to ++ * @src: pointer to the data that shall be written ++ * @size: size of the data chunk ++ * ++ * Safely write to address @dst from the buffer at @src. If a kernel fault ++ * happens, handle that and return -EFAULT. ++ */ ++ ++long __weak probe_user_write(void __user *dst, const void *src, size_t size) ++ __attribute__((alias("__probe_user_write"))); ++ ++long __probe_user_write(void __user *dst, const void *src, size_t size) ++{ ++ long ret = -EFAULT; ++ mm_segment_t old_fs = get_fs(); ++ ++ set_fs(USER_DS); ++ if (access_ok(VERIFY_WRITE, dst, size)) ++ ret = probe_write_common(dst, src, size); ++ set_fs(old_fs); ++ ++ return ret; ++} ++EXPORT_SYMBOL_GPL(probe_user_write); + + /** + * strncpy_from_unsafe: - Copy a NUL terminated string from unsafe address. +-- +2.25.1 +