From: Greg Kroah-Hartman Date: Fri, 26 Jul 2019 13:24:02 +0000 (+0200) Subject: 5.1-stable patches X-Git-Tag: v5.2.4~10 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=0dde005b6422865ad5a23499743629b80ad7bfc5;p=thirdparty%2Fkernel%2Fstable-queue.git 5.1-stable patches added patches: dma-buf-balance-refcount-inbalance.patch dma-buf-discard-old-fence_excl-on-retrying-get_fences_rcu-for-realloc.patch ext4-allow-directory-holes.patch ext4-don-t-allow-any-modifications-to-an-immutable-file.patch ext4-enforce-the-immutable-flag-on-open-files.patch ext4-use-jbd2_inode-dirty-range-scoping.patch gpio-davinci-silence-error-prints-in-case-of-eprobe_defer.patch gpiolib-of-fix-a-memory-leak-in-of_gpio_flags_quirks.patch jbd2-introduce-jbd2_inode-dirty-range-scoping.patch mips-lb60-fix-pin-mappings.patch mm-add-filemap_fdatawait_range_keep_errors.patch perf-core-fix-exclusive-events-grouping.patch perf-core-fix-race-between-close-and-fork.patch perf-script-assume-native_arch-for-pipe-mode.patch --- diff --git a/queue-5.1/dma-buf-balance-refcount-inbalance.patch b/queue-5.1/dma-buf-balance-refcount-inbalance.patch new file mode 100644 index 00000000000..a2fe0c7d4bd --- /dev/null +++ b/queue-5.1/dma-buf-balance-refcount-inbalance.patch @@ -0,0 +1,42 @@ +From 5e383a9798990c69fc759a4930de224bb497e62c Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Glisse?= +Date: Thu, 6 Dec 2018 11:18:40 -0500 +Subject: dma-buf: balance refcount inbalance +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Jérôme Glisse + +commit 5e383a9798990c69fc759a4930de224bb497e62c upstream. + +The debugfs take reference on fence without dropping them. + +Signed-off-by: Jérôme Glisse +Cc: Christian König +Cc: Daniel Vetter +Cc: Sumit Semwal +Cc: linux-media@vger.kernel.org +Cc: dri-devel@lists.freedesktop.org +Cc: linaro-mm-sig@lists.linaro.org +Cc: Stéphane Marchesin +Cc: stable@vger.kernel.org +Reviewed-by: Christian König +Signed-off-by: Sumit Semwal +Link: https://patchwork.freedesktop.org/patch/msgid/20181206161840.6578-1-jglisse@redhat.com +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/dma-buf/dma-buf.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/drivers/dma-buf/dma-buf.c ++++ b/drivers/dma-buf/dma-buf.c +@@ -1068,6 +1068,7 @@ static int dma_buf_debug_show(struct seq + fence->ops->get_driver_name(fence), + fence->ops->get_timeline_name(fence), + dma_fence_is_signaled(fence) ? "" : "un"); ++ dma_fence_put(fence); + } + rcu_read_unlock(); + diff --git a/queue-5.1/dma-buf-discard-old-fence_excl-on-retrying-get_fences_rcu-for-realloc.patch b/queue-5.1/dma-buf-discard-old-fence_excl-on-retrying-get_fences_rcu-for-realloc.patch new file mode 100644 index 00000000000..38ba59ad1c4 --- /dev/null +++ b/queue-5.1/dma-buf-discard-old-fence_excl-on-retrying-get_fences_rcu-for-realloc.patch @@ -0,0 +1,45 @@ +From f5b07b04e5f090a85d1e96938520f2b2b58e4a8e Mon Sep 17 00:00:00 2001 +From: Chris Wilson +Date: Tue, 4 Jun 2019 13:53:23 +0100 +Subject: dma-buf: Discard old fence_excl on retrying get_fences_rcu for realloc +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Chris Wilson + +commit f5b07b04e5f090a85d1e96938520f2b2b58e4a8e upstream. + +If we have to drop the seqcount & rcu lock to perform a krealloc, we +have to restart the loop. In doing so, be careful not to lose track of +the already acquired exclusive fence. + +Fixes: fedf54132d24 ("dma-buf: Restart reservation_object_get_fences_rcu() after writes") +Signed-off-by: Chris Wilson +Cc: Daniel Vetter +Cc: Maarten Lankhorst +Cc: Christian König +Cc: Alex Deucher +Cc: Sumit Semwal +Cc: stable@vger.kernel.org #v4.10 +Reviewed-by: Christian König +Link: https://patchwork.freedesktop.org/patch/msgid/20190604125323.21396-1-chris@chris-wilson.co.uk +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/dma-buf/reservation.c | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/drivers/dma-buf/reservation.c ++++ b/drivers/dma-buf/reservation.c +@@ -357,6 +357,10 @@ int reservation_object_get_fences_rcu(st + GFP_NOWAIT | __GFP_NOWARN); + if (!nshared) { + rcu_read_unlock(); ++ ++ dma_fence_put(fence_excl); ++ fence_excl = NULL; ++ + nshared = krealloc(shared, sz, GFP_KERNEL); + if (nshared) { + shared = nshared; diff --git a/queue-5.1/ext4-allow-directory-holes.patch b/queue-5.1/ext4-allow-directory-holes.patch new file mode 100644 index 00000000000..6a335714365 --- /dev/null +++ b/queue-5.1/ext4-allow-directory-holes.patch @@ -0,0 +1,198 @@ +From 4e19d6b65fb4fc42e352ce9883649e049da14743 Mon Sep 17 00:00:00 2001 +From: Theodore Ts'o +Date: Thu, 20 Jun 2019 21:19:02 -0400 +Subject: ext4: allow directory holes + +From: Theodore Ts'o + +commit 4e19d6b65fb4fc42e352ce9883649e049da14743 upstream. + +The largedir feature was intended to allow ext4 directories to have +unmapped directory blocks (e.g., directory holes). And so the +released e2fsprogs no longer enforces this for largedir file systems; +however, the corresponding change to the kernel-side code was not made. + +This commit fixes this oversight. + +Signed-off-by: Theodore Ts'o +Cc: stable@kernel.org +Signed-off-by: Greg Kroah-Hartman + +--- + fs/ext4/dir.c | 19 +++++++++---------- + fs/ext4/namei.c | 45 +++++++++++++++++++++++++++++++++++++-------- + 2 files changed, 46 insertions(+), 18 deletions(-) + +--- a/fs/ext4/dir.c ++++ b/fs/ext4/dir.c +@@ -108,7 +108,6 @@ static int ext4_readdir(struct file *fil + struct inode *inode = file_inode(file); + struct super_block *sb = inode->i_sb; + struct buffer_head *bh = NULL; +- int dir_has_error = 0; + struct fscrypt_str fstr = FSTR_INIT(NULL, 0); + + if (IS_ENCRYPTED(inode)) { +@@ -144,8 +143,6 @@ static int ext4_readdir(struct file *fil + return err; + } + +- offset = ctx->pos & (sb->s_blocksize - 1); +- + while (ctx->pos < inode->i_size) { + struct ext4_map_blocks map; + +@@ -154,9 +151,18 @@ static int ext4_readdir(struct file *fil + goto errout; + } + cond_resched(); ++ offset = ctx->pos & (sb->s_blocksize - 1); + map.m_lblk = ctx->pos >> EXT4_BLOCK_SIZE_BITS(sb); + map.m_len = 1; + err = ext4_map_blocks(NULL, inode, &map, 0); ++ if (err == 0) { ++ /* m_len should never be zero but let's avoid ++ * an infinite loop if it somehow is */ ++ if (map.m_len == 0) ++ map.m_len = 1; ++ ctx->pos += map.m_len * sb->s_blocksize; ++ continue; ++ } + if (err > 0) { + pgoff_t index = map.m_pblk >> + (PAGE_SHIFT - inode->i_blkbits); +@@ -175,13 +181,6 @@ static int ext4_readdir(struct file *fil + } + + if (!bh) { +- if (!dir_has_error) { +- EXT4_ERROR_FILE(file, 0, +- "directory contains a " +- "hole at offset %llu", +- (unsigned long long) ctx->pos); +- dir_has_error = 1; +- } + /* corrupt size? Maybe no more blocks to read */ + if (ctx->pos > inode->i_blocks << 9) + break; +--- a/fs/ext4/namei.c ++++ b/fs/ext4/namei.c +@@ -81,8 +81,18 @@ static struct buffer_head *ext4_append(h + static int ext4_dx_csum_verify(struct inode *inode, + struct ext4_dir_entry *dirent); + ++/* ++ * Hints to ext4_read_dirblock regarding whether we expect a directory ++ * block being read to be an index block, or a block containing ++ * directory entries (and if the latter, whether it was found via a ++ * logical block in an htree index block). This is used to control ++ * what sort of sanity checkinig ext4_read_dirblock() will do on the ++ * directory block read from the storage device. EITHER will means ++ * the caller doesn't know what kind of directory block will be read, ++ * so no specific verification will be done. ++ */ + typedef enum { +- EITHER, INDEX, DIRENT ++ EITHER, INDEX, DIRENT, DIRENT_HTREE + } dirblock_type_t; + + #define ext4_read_dirblock(inode, block, type) \ +@@ -108,11 +118,14 @@ static struct buffer_head *__ext4_read_d + + return bh; + } +- if (!bh) { ++ if (!bh && (type == INDEX || type == DIRENT_HTREE)) { + ext4_error_inode(inode, func, line, block, +- "Directory hole found"); ++ "Directory hole found for htree %s block", ++ (type == INDEX) ? "index" : "leaf"); + return ERR_PTR(-EFSCORRUPTED); + } ++ if (!bh) ++ return NULL; + dirent = (struct ext4_dir_entry *) bh->b_data; + /* Determine whether or not we have an index block */ + if (is_dx(inode)) { +@@ -979,7 +992,7 @@ static int htree_dirblock_to_tree(struct + + dxtrace(printk(KERN_INFO "In htree dirblock_to_tree: block %lu\n", + (unsigned long)block)); +- bh = ext4_read_dirblock(dir, block, DIRENT); ++ bh = ext4_read_dirblock(dir, block, DIRENT_HTREE); + if (IS_ERR(bh)) + return PTR_ERR(bh); + +@@ -1509,7 +1522,7 @@ static struct buffer_head * ext4_dx_find + return (struct buffer_head *) frame; + do { + block = dx_get_block(frame->at); +- bh = ext4_read_dirblock(dir, block, DIRENT); ++ bh = ext4_read_dirblock(dir, block, DIRENT_HTREE); + if (IS_ERR(bh)) + goto errout; + +@@ -2079,6 +2092,11 @@ static int ext4_add_entry(handle_t *hand + blocks = dir->i_size >> sb->s_blocksize_bits; + for (block = 0; block < blocks; block++) { + bh = ext4_read_dirblock(dir, block, DIRENT); ++ if (bh == NULL) { ++ bh = ext4_bread(handle, dir, block, ++ EXT4_GET_BLOCKS_CREATE); ++ goto add_to_new_block; ++ } + if (IS_ERR(bh)) { + retval = PTR_ERR(bh); + bh = NULL; +@@ -2099,6 +2117,7 @@ static int ext4_add_entry(handle_t *hand + brelse(bh); + } + bh = ext4_append(handle, dir, &block); ++add_to_new_block: + if (IS_ERR(bh)) { + retval = PTR_ERR(bh); + bh = NULL; +@@ -2143,7 +2162,7 @@ again: + return PTR_ERR(frame); + entries = frame->entries; + at = frame->at; +- bh = ext4_read_dirblock(dir, dx_get_block(frame->at), DIRENT); ++ bh = ext4_read_dirblock(dir, dx_get_block(frame->at), DIRENT_HTREE); + if (IS_ERR(bh)) { + err = PTR_ERR(bh); + bh = NULL; +@@ -2691,7 +2710,10 @@ bool ext4_empty_dir(struct inode *inode) + EXT4_ERROR_INODE(inode, "invalid size"); + return true; + } +- bh = ext4_read_dirblock(inode, 0, EITHER); ++ /* The first directory block must not be a hole, ++ * so treat it as DIRENT_HTREE ++ */ ++ bh = ext4_read_dirblock(inode, 0, DIRENT_HTREE); + if (IS_ERR(bh)) + return true; + +@@ -2713,6 +2735,10 @@ bool ext4_empty_dir(struct inode *inode) + brelse(bh); + lblock = offset >> EXT4_BLOCK_SIZE_BITS(sb); + bh = ext4_read_dirblock(inode, lblock, EITHER); ++ if (bh == NULL) { ++ offset += sb->s_blocksize; ++ continue; ++ } + if (IS_ERR(bh)) + return true; + de = (struct ext4_dir_entry_2 *) bh->b_data; +@@ -3256,7 +3282,10 @@ static struct buffer_head *ext4_get_firs + struct buffer_head *bh; + + if (!ext4_has_inline_data(inode)) { +- bh = ext4_read_dirblock(inode, 0, EITHER); ++ /* The first directory block must not be a hole, so ++ * treat it as DIRENT_HTREE ++ */ ++ bh = ext4_read_dirblock(inode, 0, DIRENT_HTREE); + if (IS_ERR(bh)) { + *retval = PTR_ERR(bh); + return NULL; diff --git a/queue-5.1/ext4-don-t-allow-any-modifications-to-an-immutable-file.patch b/queue-5.1/ext4-don-t-allow-any-modifications-to-an-immutable-file.patch new file mode 100644 index 00000000000..416a2819b37 --- /dev/null +++ b/queue-5.1/ext4-don-t-allow-any-modifications-to-an-immutable-file.patch @@ -0,0 +1,98 @@ +From 2e53840362771c73eb0a5ff71611507e64e8eecd Mon Sep 17 00:00:00 2001 +From: "Darrick J. Wong" +Date: Sun, 9 Jun 2019 21:41:41 -0400 +Subject: ext4: don't allow any modifications to an immutable file + +From: Darrick J. Wong + +commit 2e53840362771c73eb0a5ff71611507e64e8eecd upstream. + +Don't allow any modifications to a file that's marked immutable, which +means that we have to flush all the writable pages to make the readonly +and we have to check the setattr/setflags parameters more closely. + +Signed-off-by: Darrick J. Wong +Signed-off-by: Theodore Ts'o +Cc: stable@kernel.org +Signed-off-by: Greg Kroah-Hartman + +--- + fs/ext4/ioctl.c | 46 +++++++++++++++++++++++++++++++++++++++++++++- + 1 file changed, 45 insertions(+), 1 deletion(-) + +--- a/fs/ext4/ioctl.c ++++ b/fs/ext4/ioctl.c +@@ -269,6 +269,29 @@ static int uuid_is_zero(__u8 u[16]) + } + #endif + ++/* ++ * If immutable is set and we are not clearing it, we're not allowed to change ++ * anything else in the inode. Don't error out if we're only trying to set ++ * immutable on an immutable file. ++ */ ++static int ext4_ioctl_check_immutable(struct inode *inode, __u32 new_projid, ++ unsigned int flags) ++{ ++ struct ext4_inode_info *ei = EXT4_I(inode); ++ unsigned int oldflags = ei->i_flags; ++ ++ if (!(oldflags & EXT4_IMMUTABLE_FL) || !(flags & EXT4_IMMUTABLE_FL)) ++ return 0; ++ ++ if ((oldflags & ~EXT4_IMMUTABLE_FL) != (flags & ~EXT4_IMMUTABLE_FL)) ++ return -EPERM; ++ if (ext4_has_feature_project(inode->i_sb) && ++ __kprojid_val(ei->i_projid) != new_projid) ++ return -EPERM; ++ ++ return 0; ++} ++ + static int ext4_ioctl_setflags(struct inode *inode, + unsigned int flags) + { +@@ -322,6 +345,20 @@ static int ext4_ioctl_setflags(struct in + goto flags_out; + } + ++ /* ++ * Wait for all pending directio and then flush all the dirty pages ++ * for this file. The flush marks all the pages readonly, so any ++ * subsequent attempt to write to the file (particularly mmap pages) ++ * will come through the filesystem and fail. ++ */ ++ if (S_ISREG(inode->i_mode) && !IS_IMMUTABLE(inode) && ++ (flags & EXT4_IMMUTABLE_FL)) { ++ inode_dio_wait(inode); ++ err = filemap_write_and_wait(inode->i_mapping); ++ if (err) ++ goto flags_out; ++ } ++ + handle = ext4_journal_start(inode, EXT4_HT_INODE, 1); + if (IS_ERR(handle)) { + err = PTR_ERR(handle); +@@ -751,7 +788,11 @@ long ext4_ioctl(struct file *filp, unsig + return err; + + inode_lock(inode); +- err = ext4_ioctl_setflags(inode, flags); ++ err = ext4_ioctl_check_immutable(inode, ++ from_kprojid(&init_user_ns, ei->i_projid), ++ flags); ++ if (!err) ++ err = ext4_ioctl_setflags(inode, flags); + inode_unlock(inode); + mnt_drop_write_file(filp); + return err; +@@ -1121,6 +1162,9 @@ resizefs_out: + goto out; + flags = (ei->i_flags & ~EXT4_FL_XFLAG_VISIBLE) | + (flags & EXT4_FL_XFLAG_VISIBLE); ++ err = ext4_ioctl_check_immutable(inode, fa.fsx_projid, flags); ++ if (err) ++ goto out; + err = ext4_ioctl_setflags(inode, flags); + if (err) + goto out; diff --git a/queue-5.1/ext4-enforce-the-immutable-flag-on-open-files.patch b/queue-5.1/ext4-enforce-the-immutable-flag-on-open-files.patch new file mode 100644 index 00000000000..72c822f6a77 --- /dev/null +++ b/queue-5.1/ext4-enforce-the-immutable-flag-on-open-files.patch @@ -0,0 +1,70 @@ +From 02b016ca7f99229ae6227e7b2fc950c4e140d74a Mon Sep 17 00:00:00 2001 +From: Theodore Ts'o +Date: Sun, 9 Jun 2019 22:04:33 -0400 +Subject: ext4: enforce the immutable flag on open files + +From: Theodore Ts'o + +commit 02b016ca7f99229ae6227e7b2fc950c4e140d74a upstream. + +According to the chattr man page, "a file with the 'i' attribute +cannot be modified..." Historically, this was only enforced when the +file was opened, per the rest of the description, "... and the file +can not be opened in write mode". + +There is general agreement that we should standardize all file systems +to prevent modifications even for files that were opened at the time +the immutable flag is set. Eventually, a change to enforce this at +the VFS layer should be landing in mainline. Until then, enforce this +at the ext4 level to prevent xfstests generic/553 from failing. + +Signed-off-by: Theodore Ts'o +Cc: "Darrick J. Wong" +Cc: stable@kernel.org +Signed-off-by: Greg Kroah-Hartman + +--- + fs/ext4/file.c | 4 ++++ + fs/ext4/inode.c | 11 +++++++++++ + 2 files changed, 15 insertions(+) + +--- a/fs/ext4/file.c ++++ b/fs/ext4/file.c +@@ -165,6 +165,10 @@ static ssize_t ext4_write_checks(struct + ret = generic_write_checks(iocb, from); + if (ret <= 0) + return ret; ++ ++ if (unlikely(IS_IMMUTABLE(inode))) ++ return -EPERM; ++ + /* + * If we have encountered a bitmap-format file, the size limit + * is smaller than s_maxbytes, which is for extent-mapped files. +--- a/fs/ext4/inode.c ++++ b/fs/ext4/inode.c +@@ -5514,6 +5514,14 @@ int ext4_setattr(struct dentry *dentry, + if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) + return -EIO; + ++ if (unlikely(IS_IMMUTABLE(inode))) ++ return -EPERM; ++ ++ if (unlikely(IS_APPEND(inode) && ++ (ia_valid & (ATTR_MODE | ATTR_UID | ++ ATTR_GID | ATTR_TIMES_SET)))) ++ return -EPERM; ++ + error = setattr_prepare(dentry, attr); + if (error) + return error; +@@ -6184,6 +6192,9 @@ vm_fault_t ext4_page_mkwrite(struct vm_f + get_block_t *get_block; + int retries = 0; + ++ if (unlikely(IS_IMMUTABLE(inode))) ++ return VM_FAULT_SIGBUS; ++ + sb_start_pagefault(inode->i_sb); + file_update_time(vma->vm_file); + diff --git a/queue-5.1/ext4-use-jbd2_inode-dirty-range-scoping.patch b/queue-5.1/ext4-use-jbd2_inode-dirty-range-scoping.patch new file mode 100644 index 00000000000..bca6f29d79a --- /dev/null +++ b/queue-5.1/ext4-use-jbd2_inode-dirty-range-scoping.patch @@ -0,0 +1,96 @@ +From 73131fbb003b3691cfcf9656f234b00da497fcd6 Mon Sep 17 00:00:00 2001 +From: Ross Zwisler +Date: Thu, 20 Jun 2019 17:26:26 -0400 +Subject: ext4: use jbd2_inode dirty range scoping + +From: Ross Zwisler + +commit 73131fbb003b3691cfcf9656f234b00da497fcd6 upstream. + +Use the newly introduced jbd2_inode dirty range scoping to prevent us +from waiting forever when trying to complete a journal transaction. + +Signed-off-by: Ross Zwisler +Signed-off-by: Theodore Ts'o +Reviewed-by: Jan Kara +Cc: stable@vger.kernel.org +Signed-off-by: Greg Kroah-Hartman + +--- + fs/ext4/ext4_jbd2.h | 12 ++++++------ + fs/ext4/inode.c | 13 ++++++++++--- + fs/ext4/move_extent.c | 3 ++- + 3 files changed, 18 insertions(+), 10 deletions(-) + +--- a/fs/ext4/ext4_jbd2.h ++++ b/fs/ext4/ext4_jbd2.h +@@ -361,20 +361,20 @@ static inline int ext4_journal_force_com + } + + static inline int ext4_jbd2_inode_add_write(handle_t *handle, +- struct inode *inode) ++ struct inode *inode, loff_t start_byte, loff_t length) + { + if (ext4_handle_valid(handle)) +- return jbd2_journal_inode_add_write(handle, +- EXT4_I(inode)->jinode); ++ return jbd2_journal_inode_ranged_write(handle, ++ EXT4_I(inode)->jinode, start_byte, length); + return 0; + } + + static inline int ext4_jbd2_inode_add_wait(handle_t *handle, +- struct inode *inode) ++ struct inode *inode, loff_t start_byte, loff_t length) + { + if (ext4_handle_valid(handle)) +- return jbd2_journal_inode_add_wait(handle, +- EXT4_I(inode)->jinode); ++ return jbd2_journal_inode_ranged_wait(handle, ++ EXT4_I(inode)->jinode, start_byte, length); + return 0; + } + +--- a/fs/ext4/inode.c ++++ b/fs/ext4/inode.c +@@ -727,10 +727,16 @@ out_sem: + !(flags & EXT4_GET_BLOCKS_ZERO) && + !ext4_is_quota_file(inode) && + ext4_should_order_data(inode)) { ++ loff_t start_byte = ++ (loff_t)map->m_lblk << inode->i_blkbits; ++ loff_t length = (loff_t)map->m_len << inode->i_blkbits; ++ + if (flags & EXT4_GET_BLOCKS_IO_SUBMIT) +- ret = ext4_jbd2_inode_add_wait(handle, inode); ++ ret = ext4_jbd2_inode_add_wait(handle, inode, ++ start_byte, length); + else +- ret = ext4_jbd2_inode_add_write(handle, inode); ++ ret = ext4_jbd2_inode_add_write(handle, inode, ++ start_byte, length); + if (ret) + return ret; + } +@@ -4081,7 +4087,8 @@ static int __ext4_block_zero_page_range( + err = 0; + mark_buffer_dirty(bh); + if (ext4_should_order_data(inode)) +- err = ext4_jbd2_inode_add_write(handle, inode); ++ err = ext4_jbd2_inode_add_write(handle, inode, from, ++ length); + } + + unlock: +--- a/fs/ext4/move_extent.c ++++ b/fs/ext4/move_extent.c +@@ -390,7 +390,8 @@ data_copy: + + /* Even in case of data=writeback it is reasonable to pin + * inode to transaction, to prevent unexpected data loss */ +- *err = ext4_jbd2_inode_add_write(handle, orig_inode); ++ *err = ext4_jbd2_inode_add_write(handle, orig_inode, ++ (loff_t)orig_page_offset << PAGE_SHIFT, replaced_size); + + unlock_pages: + unlock_page(pagep[0]); diff --git a/queue-5.1/gpio-davinci-silence-error-prints-in-case-of-eprobe_defer.patch b/queue-5.1/gpio-davinci-silence-error-prints-in-case-of-eprobe_defer.patch new file mode 100644 index 00000000000..8f9457eda05 --- /dev/null +++ b/queue-5.1/gpio-davinci-silence-error-prints-in-case-of-eprobe_defer.patch @@ -0,0 +1,35 @@ +From 541e4095f388c196685685633c950cb9b97f8039 Mon Sep 17 00:00:00 2001 +From: Keerthy +Date: Mon, 8 Jul 2019 14:19:04 +0530 +Subject: gpio: davinci: silence error prints in case of EPROBE_DEFER + +From: Keerthy + +commit 541e4095f388c196685685633c950cb9b97f8039 upstream. + +Silence error prints in case of EPROBE_DEFER. This avoids +multiple/duplicate defer prints during boot. + +Cc: +Signed-off-by: Keerthy +Signed-off-by: Bartosz Golaszewski +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/gpio/gpio-davinci.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +--- a/drivers/gpio/gpio-davinci.c ++++ b/drivers/gpio/gpio-davinci.c +@@ -242,8 +242,9 @@ static int davinci_gpio_probe(struct pla + for (i = 0; i < nirq; i++) { + chips->irqs[i] = platform_get_irq(pdev, i); + if (chips->irqs[i] < 0) { +- dev_info(dev, "IRQ not populated, err = %d\n", +- chips->irqs[i]); ++ if (chips->irqs[i] != -EPROBE_DEFER) ++ dev_info(dev, "IRQ not populated, err = %d\n", ++ chips->irqs[i]); + return chips->irqs[i]; + } + } diff --git a/queue-5.1/gpiolib-of-fix-a-memory-leak-in-of_gpio_flags_quirks.patch b/queue-5.1/gpiolib-of-fix-a-memory-leak-in-of_gpio_flags_quirks.patch new file mode 100644 index 00000000000..6d4b5fa51c3 --- /dev/null +++ b/queue-5.1/gpiolib-of-fix-a-memory-leak-in-of_gpio_flags_quirks.patch @@ -0,0 +1,34 @@ +From 89fea04c85e85f21ef4937611055abce82330d48 Mon Sep 17 00:00:00 2001 +From: Nishka Dasgupta +Date: Sat, 6 Jul 2019 19:04:22 +0530 +Subject: gpiolib: of: fix a memory leak in of_gpio_flags_quirks() + +From: Nishka Dasgupta + +commit 89fea04c85e85f21ef4937611055abce82330d48 upstream. + +Each iteration of for_each_child_of_node puts the previous node, but in +the case of a break from the middle of the loop, there is no put, thus +causing a memory leak. Hence add an of_node_put before the break. +Issue found with Coccinelle. + +Cc: +Signed-off-by: Nishka Dasgupta +[Bartosz: tweaked the commit message] +Signed-off-by: Bartosz Golaszewski +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/gpio/gpiolib-of.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/drivers/gpio/gpiolib-of.c ++++ b/drivers/gpio/gpiolib-of.c +@@ -155,6 +155,7 @@ static void of_gpio_flags_quirks(struct + of_node_full_name(child)); + *flags |= OF_GPIO_ACTIVE_LOW; + } ++ of_node_put(child); + break; + } + } diff --git a/queue-5.1/jbd2-introduce-jbd2_inode-dirty-range-scoping.patch b/queue-5.1/jbd2-introduce-jbd2_inode-dirty-range-scoping.patch new file mode 100644 index 00000000000..d050cd35ee1 --- /dev/null +++ b/queue-5.1/jbd2-introduce-jbd2_inode-dirty-range-scoping.patch @@ -0,0 +1,251 @@ +From 6ba0e7dc64a5adcda2fbe65adc466891795d639e Mon Sep 17 00:00:00 2001 +From: Ross Zwisler +Date: Thu, 20 Jun 2019 17:24:56 -0400 +Subject: jbd2: introduce jbd2_inode dirty range scoping + +From: Ross Zwisler + +commit 6ba0e7dc64a5adcda2fbe65adc466891795d639e upstream. + +Currently both journal_submit_inode_data_buffers() and +journal_finish_inode_data_buffers() operate on the entire address space +of each of the inodes associated with a given journal entry. The +consequence of this is that if we have an inode where we are constantly +appending dirty pages we can end up waiting for an indefinite amount of +time in journal_finish_inode_data_buffers() while we wait for all the +pages under writeback to be written out. + +The easiest way to cause this type of workload is do just dd from +/dev/zero to a file until it fills the entire filesystem. This can +cause journal_finish_inode_data_buffers() to wait for the duration of +the entire dd operation. + +We can improve this situation by scoping each of the inode dirty ranges +associated with a given transaction. We do this via the jbd2_inode +structure so that the scoping is contained within jbd2 and so that it +follows the lifetime and locking rules for that structure. + +This allows us to limit the writeback & wait in +journal_submit_inode_data_buffers() and +journal_finish_inode_data_buffers() respectively to the dirty range for +a given struct jdb2_inode, keeping us from waiting forever if the inode +in question is still being appended to. + +Signed-off-by: Ross Zwisler +Signed-off-by: Theodore Ts'o +Reviewed-by: Jan Kara +Cc: stable@vger.kernel.org +Signed-off-by: Greg Kroah-Hartman + +--- + fs/jbd2/commit.c | 23 +++++++++++++++++------ + fs/jbd2/journal.c | 4 ++++ + fs/jbd2/transaction.c | 49 ++++++++++++++++++++++++++++--------------------- + include/linux/jbd2.h | 22 ++++++++++++++++++++++ + 4 files changed, 71 insertions(+), 27 deletions(-) + +--- a/fs/jbd2/commit.c ++++ b/fs/jbd2/commit.c +@@ -187,14 +187,15 @@ static int journal_wait_on_commit_record + * use writepages() because with dealyed allocation we may be doing + * block allocation in writepages(). + */ +-static int journal_submit_inode_data_buffers(struct address_space *mapping) ++static int journal_submit_inode_data_buffers(struct address_space *mapping, ++ loff_t dirty_start, loff_t dirty_end) + { + int ret; + struct writeback_control wbc = { + .sync_mode = WB_SYNC_ALL, + .nr_to_write = mapping->nrpages * 2, +- .range_start = 0, +- .range_end = i_size_read(mapping->host), ++ .range_start = dirty_start, ++ .range_end = dirty_end, + }; + + ret = generic_writepages(mapping, &wbc); +@@ -218,6 +219,9 @@ static int journal_submit_data_buffers(j + + spin_lock(&journal->j_list_lock); + list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) { ++ loff_t dirty_start = jinode->i_dirty_start; ++ loff_t dirty_end = jinode->i_dirty_end; ++ + if (!(jinode->i_flags & JI_WRITE_DATA)) + continue; + mapping = jinode->i_vfs_inode->i_mapping; +@@ -230,7 +234,8 @@ static int journal_submit_data_buffers(j + * only allocated blocks here. + */ + trace_jbd2_submit_inode_data(jinode->i_vfs_inode); +- err = journal_submit_inode_data_buffers(mapping); ++ err = journal_submit_inode_data_buffers(mapping, dirty_start, ++ dirty_end); + if (!ret) + ret = err; + spin_lock(&journal->j_list_lock); +@@ -257,12 +262,16 @@ static int journal_finish_inode_data_buf + /* For locking, see the comment in journal_submit_data_buffers() */ + spin_lock(&journal->j_list_lock); + list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) { ++ loff_t dirty_start = jinode->i_dirty_start; ++ loff_t dirty_end = jinode->i_dirty_end; ++ + if (!(jinode->i_flags & JI_WAIT_DATA)) + continue; + jinode->i_flags |= JI_COMMIT_RUNNING; + spin_unlock(&journal->j_list_lock); +- err = filemap_fdatawait_keep_errors( +- jinode->i_vfs_inode->i_mapping); ++ err = filemap_fdatawait_range_keep_errors( ++ jinode->i_vfs_inode->i_mapping, dirty_start, ++ dirty_end); + if (!ret) + ret = err; + spin_lock(&journal->j_list_lock); +@@ -282,6 +291,8 @@ static int journal_finish_inode_data_buf + &jinode->i_transaction->t_inode_list); + } else { + jinode->i_transaction = NULL; ++ jinode->i_dirty_start = 0; ++ jinode->i_dirty_end = 0; + } + } + spin_unlock(&journal->j_list_lock); +--- a/fs/jbd2/journal.c ++++ b/fs/jbd2/journal.c +@@ -94,6 +94,8 @@ EXPORT_SYMBOL(jbd2_journal_try_to_free_b + EXPORT_SYMBOL(jbd2_journal_force_commit); + EXPORT_SYMBOL(jbd2_journal_inode_add_write); + EXPORT_SYMBOL(jbd2_journal_inode_add_wait); ++EXPORT_SYMBOL(jbd2_journal_inode_ranged_write); ++EXPORT_SYMBOL(jbd2_journal_inode_ranged_wait); + EXPORT_SYMBOL(jbd2_journal_init_jbd_inode); + EXPORT_SYMBOL(jbd2_journal_release_jbd_inode); + EXPORT_SYMBOL(jbd2_journal_begin_ordered_truncate); +@@ -2574,6 +2576,8 @@ void jbd2_journal_init_jbd_inode(struct + jinode->i_next_transaction = NULL; + jinode->i_vfs_inode = inode; + jinode->i_flags = 0; ++ jinode->i_dirty_start = 0; ++ jinode->i_dirty_end = 0; + INIT_LIST_HEAD(&jinode->i_list); + } + +--- a/fs/jbd2/transaction.c ++++ b/fs/jbd2/transaction.c +@@ -2565,7 +2565,7 @@ void jbd2_journal_refile_buffer(journal_ + * File inode in the inode list of the handle's transaction + */ + static int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *jinode, +- unsigned long flags) ++ unsigned long flags, loff_t start_byte, loff_t end_byte) + { + transaction_t *transaction = handle->h_transaction; + journal_t *journal; +@@ -2577,26 +2577,17 @@ static int jbd2_journal_file_inode(handl + jbd_debug(4, "Adding inode %lu, tid:%d\n", jinode->i_vfs_inode->i_ino, + transaction->t_tid); + +- /* +- * First check whether inode isn't already on the transaction's +- * lists without taking the lock. Note that this check is safe +- * without the lock as we cannot race with somebody removing inode +- * from the transaction. The reason is that we remove inode from the +- * transaction only in journal_release_jbd_inode() and when we commit +- * the transaction. We are guarded from the first case by holding +- * a reference to the inode. We are safe against the second case +- * because if jinode->i_transaction == transaction, commit code +- * cannot touch the transaction because we hold reference to it, +- * and if jinode->i_next_transaction == transaction, commit code +- * will only file the inode where we want it. +- */ +- if ((jinode->i_transaction == transaction || +- jinode->i_next_transaction == transaction) && +- (jinode->i_flags & flags) == flags) +- return 0; +- + spin_lock(&journal->j_list_lock); + jinode->i_flags |= flags; ++ ++ if (jinode->i_dirty_end) { ++ jinode->i_dirty_start = min(jinode->i_dirty_start, start_byte); ++ jinode->i_dirty_end = max(jinode->i_dirty_end, end_byte); ++ } else { ++ jinode->i_dirty_start = start_byte; ++ jinode->i_dirty_end = end_byte; ++ } ++ + /* Is inode already attached where we need it? */ + if (jinode->i_transaction == transaction || + jinode->i_next_transaction == transaction) +@@ -2631,12 +2622,28 @@ done: + int jbd2_journal_inode_add_write(handle_t *handle, struct jbd2_inode *jinode) + { + return jbd2_journal_file_inode(handle, jinode, +- JI_WRITE_DATA | JI_WAIT_DATA); ++ JI_WRITE_DATA | JI_WAIT_DATA, 0, LLONG_MAX); + } + + int jbd2_journal_inode_add_wait(handle_t *handle, struct jbd2_inode *jinode) + { +- return jbd2_journal_file_inode(handle, jinode, JI_WAIT_DATA); ++ return jbd2_journal_file_inode(handle, jinode, JI_WAIT_DATA, 0, ++ LLONG_MAX); ++} ++ ++int jbd2_journal_inode_ranged_write(handle_t *handle, ++ struct jbd2_inode *jinode, loff_t start_byte, loff_t length) ++{ ++ return jbd2_journal_file_inode(handle, jinode, ++ JI_WRITE_DATA | JI_WAIT_DATA, start_byte, ++ start_byte + length - 1); ++} ++ ++int jbd2_journal_inode_ranged_wait(handle_t *handle, struct jbd2_inode *jinode, ++ loff_t start_byte, loff_t length) ++{ ++ return jbd2_journal_file_inode(handle, jinode, JI_WAIT_DATA, ++ start_byte, start_byte + length - 1); + } + + /* +--- a/include/linux/jbd2.h ++++ b/include/linux/jbd2.h +@@ -454,6 +454,22 @@ struct jbd2_inode { + * @i_flags: Flags of inode [j_list_lock] + */ + unsigned long i_flags; ++ ++ /** ++ * @i_dirty_start: ++ * ++ * Offset in bytes where the dirty range for this inode starts. ++ * [j_list_lock] ++ */ ++ loff_t i_dirty_start; ++ ++ /** ++ * @i_dirty_end: ++ * ++ * Inclusive offset in bytes where the dirty range for this inode ++ * ends. [j_list_lock] ++ */ ++ loff_t i_dirty_end; + }; + + struct jbd2_revoke_table_s; +@@ -1400,6 +1416,12 @@ extern int jbd2_journal_force_commit( + extern int jbd2_journal_force_commit_nested(journal_t *); + extern int jbd2_journal_inode_add_write(handle_t *handle, struct jbd2_inode *inode); + extern int jbd2_journal_inode_add_wait(handle_t *handle, struct jbd2_inode *inode); ++extern int jbd2_journal_inode_ranged_write(handle_t *handle, ++ struct jbd2_inode *inode, loff_t start_byte, ++ loff_t length); ++extern int jbd2_journal_inode_ranged_wait(handle_t *handle, ++ struct jbd2_inode *inode, loff_t start_byte, ++ loff_t length); + extern int jbd2_journal_begin_ordered_truncate(journal_t *journal, + struct jbd2_inode *inode, loff_t new_size); + extern void jbd2_journal_init_jbd_inode(struct jbd2_inode *jinode, struct inode *inode); diff --git a/queue-5.1/mips-lb60-fix-pin-mappings.patch b/queue-5.1/mips-lb60-fix-pin-mappings.patch new file mode 100644 index 00000000000..2d622d61f3d --- /dev/null +++ b/queue-5.1/mips-lb60-fix-pin-mappings.patch @@ -0,0 +1,68 @@ +From 1323c3b72a987de57141cabc44bf9cd83656bc70 Mon Sep 17 00:00:00 2001 +From: Paul Cercueil +Date: Tue, 4 Jun 2019 18:33:11 +0200 +Subject: MIPS: lb60: Fix pin mappings + +From: Paul Cercueil + +commit 1323c3b72a987de57141cabc44bf9cd83656bc70 upstream. + +The pin mappings introduced in commit 636f8ba67fb6 +("MIPS: JZ4740: Qi LB60: Add pinctrl configuration for several drivers") +are completely wrong. The pinctrl driver name is incorrect, and the +function and group fields are swapped. + +Fixes: 636f8ba67fb6 ("MIPS: JZ4740: Qi LB60: Add pinctrl configuration for several drivers") +Cc: +Signed-off-by: Paul Cercueil +Reviewed-by: Linus Walleij +Signed-off-by: Paul Burton +Cc: Ralf Baechle +Cc: James Hogan +Cc: od@zcrc.me +Cc: linux-mips@vger.kernel.org +Cc: linux-kernel@vger.kernel.org +Signed-off-by: Greg Kroah-Hartman + +--- + arch/mips/jz4740/board-qi_lb60.c | 16 ++++++++-------- + 1 file changed, 8 insertions(+), 8 deletions(-) + +--- a/arch/mips/jz4740/board-qi_lb60.c ++++ b/arch/mips/jz4740/board-qi_lb60.c +@@ -469,27 +469,27 @@ static unsigned long pin_cfg_bias_disabl + static struct pinctrl_map pin_map[] __initdata = { + /* NAND pin configuration */ + PIN_MAP_MUX_GROUP_DEFAULT("jz4740-nand", +- "10010000.jz4740-pinctrl", "nand", "nand-cs1"), ++ "10010000.pin-controller", "nand-cs1", "nand"), + + /* fbdev pin configuration */ + PIN_MAP_MUX_GROUP("jz4740-fb", PINCTRL_STATE_DEFAULT, +- "10010000.jz4740-pinctrl", "lcd", "lcd-8bit"), ++ "10010000.pin-controller", "lcd-8bit", "lcd"), + PIN_MAP_MUX_GROUP("jz4740-fb", PINCTRL_STATE_SLEEP, +- "10010000.jz4740-pinctrl", "lcd", "lcd-no-pins"), ++ "10010000.pin-controller", "lcd-no-pins", "lcd"), + + /* MMC pin configuration */ + PIN_MAP_MUX_GROUP_DEFAULT("jz4740-mmc.0", +- "10010000.jz4740-pinctrl", "mmc", "mmc-1bit"), ++ "10010000.pin-controller", "mmc-1bit", "mmc"), + PIN_MAP_MUX_GROUP_DEFAULT("jz4740-mmc.0", +- "10010000.jz4740-pinctrl", "mmc", "mmc-4bit"), ++ "10010000.pin-controller", "mmc-4bit", "mmc"), + PIN_MAP_CONFIGS_PIN_DEFAULT("jz4740-mmc.0", +- "10010000.jz4740-pinctrl", "PD0", pin_cfg_bias_disable), ++ "10010000.pin-controller", "PD0", pin_cfg_bias_disable), + PIN_MAP_CONFIGS_PIN_DEFAULT("jz4740-mmc.0", +- "10010000.jz4740-pinctrl", "PD2", pin_cfg_bias_disable), ++ "10010000.pin-controller", "PD2", pin_cfg_bias_disable), + + /* PWM pin configuration */ + PIN_MAP_MUX_GROUP_DEFAULT("jz4740-pwm", +- "10010000.jz4740-pinctrl", "pwm4", "pwm4"), ++ "10010000.pin-controller", "pwm4", "pwm4"), + }; + + diff --git a/queue-5.1/mm-add-filemap_fdatawait_range_keep_errors.patch b/queue-5.1/mm-add-filemap_fdatawait_range_keep_errors.patch new file mode 100644 index 00000000000..00eb57ef45b --- /dev/null +++ b/queue-5.1/mm-add-filemap_fdatawait_range_keep_errors.patch @@ -0,0 +1,67 @@ +From aa0bfcd939c30617385ffa28682c062d78050eba Mon Sep 17 00:00:00 2001 +From: Ross Zwisler +Date: Thu, 20 Jun 2019 17:05:37 -0400 +Subject: mm: add filemap_fdatawait_range_keep_errors() + +From: Ross Zwisler + +commit aa0bfcd939c30617385ffa28682c062d78050eba upstream. + +In the spirit of filemap_fdatawait_range() and +filemap_fdatawait_keep_errors(), introduce +filemap_fdatawait_range_keep_errors() which both takes a range upon +which to wait and does not clear errors from the address space. + +Signed-off-by: Ross Zwisler +Signed-off-by: Theodore Ts'o +Reviewed-by: Jan Kara +Cc: stable@vger.kernel.org +Signed-off-by: Greg Kroah-Hartman + +--- + include/linux/fs.h | 2 ++ + mm/filemap.c | 22 ++++++++++++++++++++++ + 2 files changed, 24 insertions(+) + +--- a/include/linux/fs.h ++++ b/include/linux/fs.h +@@ -2703,6 +2703,8 @@ extern int filemap_flush(struct address_ + extern int filemap_fdatawait_keep_errors(struct address_space *mapping); + extern int filemap_fdatawait_range(struct address_space *, loff_t lstart, + loff_t lend); ++extern int filemap_fdatawait_range_keep_errors(struct address_space *mapping, ++ loff_t start_byte, loff_t end_byte); + + static inline int filemap_fdatawait(struct address_space *mapping) + { +--- a/mm/filemap.c ++++ b/mm/filemap.c +@@ -548,6 +548,28 @@ int filemap_fdatawait_range(struct addre + EXPORT_SYMBOL(filemap_fdatawait_range); + + /** ++ * filemap_fdatawait_range_keep_errors - wait for writeback to complete ++ * @mapping: address space structure to wait for ++ * @start_byte: offset in bytes where the range starts ++ * @end_byte: offset in bytes where the range ends (inclusive) ++ * ++ * Walk the list of under-writeback pages of the given address space in the ++ * given range and wait for all of them. Unlike filemap_fdatawait_range(), ++ * this function does not clear error status of the address space. ++ * ++ * Use this function if callers don't handle errors themselves. Expected ++ * call sites are system-wide / filesystem-wide data flushers: e.g. sync(2), ++ * fsfreeze(8) ++ */ ++int filemap_fdatawait_range_keep_errors(struct address_space *mapping, ++ loff_t start_byte, loff_t end_byte) ++{ ++ __filemap_fdatawait_range(mapping, start_byte, end_byte); ++ return filemap_check_and_keep_errors(mapping); ++} ++EXPORT_SYMBOL(filemap_fdatawait_range_keep_errors); ++ ++/** + * file_fdatawait_range - wait for writeback to complete + * @file: file pointing to address space structure to wait for + * @start_byte: offset in bytes where the range starts diff --git a/queue-5.1/perf-core-fix-exclusive-events-grouping.patch b/queue-5.1/perf-core-fix-exclusive-events-grouping.patch new file mode 100644 index 00000000000..e5ec81f117e --- /dev/null +++ b/queue-5.1/perf-core-fix-exclusive-events-grouping.patch @@ -0,0 +1,171 @@ +From 8a58ddae23796c733c5dfbd717538d89d036c5bd Mon Sep 17 00:00:00 2001 +From: Alexander Shishkin +Date: Mon, 1 Jul 2019 14:07:55 +0300 +Subject: perf/core: Fix exclusive events' grouping + +From: Alexander Shishkin + +commit 8a58ddae23796c733c5dfbd717538d89d036c5bd upstream. + +So far, we tried to disallow grouping exclusive events for the fear of +complications they would cause with moving between contexts. Specifically, +moving a software group to a hardware context would violate the exclusivity +rules if both groups contain matching exclusive events. + +This attempt was, however, unsuccessful: the check that we have in the +perf_event_open() syscall is both wrong (looks at wrong PMU) and +insufficient (group leader may still be exclusive), as can be illustrated +by running: + + $ perf record -e '{intel_pt//,cycles}' uname + $ perf record -e '{cycles,intel_pt//}' uname + +ultimately successfully. + +Furthermore, we are completely free to trigger the exclusivity violation +by: + + perf -e '{cycles,intel_pt//}' -e '{intel_pt//,instructions}' + +even though the helpful perf record will not allow that, the ABI will. + +The warning later in the perf_event_open() path will also not trigger, because +it's also wrong. + +Fix all this by validating the original group before moving, getting rid +of broken safeguards and placing a useful one to perf_install_in_context(). + +Signed-off-by: Alexander Shishkin +Signed-off-by: Peter Zijlstra (Intel) +Cc: +Cc: Arnaldo Carvalho de Melo +Cc: Jiri Olsa +Cc: Linus Torvalds +Cc: Peter Zijlstra +Cc: Stephane Eranian +Cc: Thomas Gleixner +Cc: Vince Weaver +Cc: mathieu.poirier@linaro.org +Cc: will.deacon@arm.com +Fixes: bed5b25ad9c8a ("perf: Add a pmu capability for "exclusive" events") +Link: https://lkml.kernel.org/r/20190701110755.24646-1-alexander.shishkin@linux.intel.com +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + include/linux/perf_event.h | 5 +++++ + kernel/events/core.c | 34 ++++++++++++++++++++++------------ + 2 files changed, 27 insertions(+), 12 deletions(-) + +--- a/include/linux/perf_event.h ++++ b/include/linux/perf_event.h +@@ -1044,6 +1044,11 @@ static inline int in_software_context(st + return event->ctx->pmu->task_ctx_nr == perf_sw_context; + } + ++static inline int is_exclusive_pmu(struct pmu *pmu) ++{ ++ return pmu->capabilities & PERF_PMU_CAP_EXCLUSIVE; ++} ++ + extern struct static_key perf_swevent_enabled[PERF_COUNT_SW_MAX]; + + extern void ___perf_sw_event(u32, u64, struct pt_regs *, u64); +--- a/kernel/events/core.c ++++ b/kernel/events/core.c +@@ -2543,6 +2543,9 @@ unlock: + return ret; + } + ++static bool exclusive_event_installable(struct perf_event *event, ++ struct perf_event_context *ctx); ++ + /* + * Attach a performance event to a context. + * +@@ -2557,6 +2560,8 @@ perf_install_in_context(struct perf_even + + lockdep_assert_held(&ctx->mutex); + ++ WARN_ON_ONCE(!exclusive_event_installable(event, ctx)); ++ + if (event->cpu != -1) + event->cpu = cpu; + +@@ -4348,7 +4353,7 @@ static int exclusive_event_init(struct p + { + struct pmu *pmu = event->pmu; + +- if (!(pmu->capabilities & PERF_PMU_CAP_EXCLUSIVE)) ++ if (!is_exclusive_pmu(pmu)) + return 0; + + /* +@@ -4379,7 +4384,7 @@ static void exclusive_event_destroy(stru + { + struct pmu *pmu = event->pmu; + +- if (!(pmu->capabilities & PERF_PMU_CAP_EXCLUSIVE)) ++ if (!is_exclusive_pmu(pmu)) + return; + + /* see comment in exclusive_event_init() */ +@@ -4399,14 +4404,15 @@ static bool exclusive_event_match(struct + return false; + } + +-/* Called under the same ctx::mutex as perf_install_in_context() */ + static bool exclusive_event_installable(struct perf_event *event, + struct perf_event_context *ctx) + { + struct perf_event *iter_event; + struct pmu *pmu = event->pmu; + +- if (!(pmu->capabilities & PERF_PMU_CAP_EXCLUSIVE)) ++ lockdep_assert_held(&ctx->mutex); ++ ++ if (!is_exclusive_pmu(pmu)) + return true; + + list_for_each_entry(iter_event, &ctx->event_list, event_entry) { +@@ -10899,11 +10905,6 @@ SYSCALL_DEFINE5(perf_event_open, + goto err_alloc; + } + +- if ((pmu->capabilities & PERF_PMU_CAP_EXCLUSIVE) && group_leader) { +- err = -EBUSY; +- goto err_context; +- } +- + /* + * Look up the group leader (we will attach this event to it): + */ +@@ -10991,6 +10992,18 @@ SYSCALL_DEFINE5(perf_event_open, + move_group = 0; + } + } ++ ++ /* ++ * Failure to create exclusive events returns -EBUSY. ++ */ ++ err = -EBUSY; ++ if (!exclusive_event_installable(group_leader, ctx)) ++ goto err_locked; ++ ++ for_each_sibling_event(sibling, group_leader) { ++ if (!exclusive_event_installable(sibling, ctx)) ++ goto err_locked; ++ } + } else { + mutex_lock(&ctx->mutex); + } +@@ -11027,9 +11040,6 @@ SYSCALL_DEFINE5(perf_event_open, + * because we need to serialize with concurrent event creation. + */ + if (!exclusive_event_installable(event, ctx)) { +- /* exclusive and group stuff are assumed mutually exclusive */ +- WARN_ON_ONCE(move_group); +- + err = -EBUSY; + goto err_locked; + } diff --git a/queue-5.1/perf-core-fix-race-between-close-and-fork.patch b/queue-5.1/perf-core-fix-race-between-close-and-fork.patch new file mode 100644 index 00000000000..f6e3ca40046 --- /dev/null +++ b/queue-5.1/perf-core-fix-race-between-close-and-fork.patch @@ -0,0 +1,183 @@ +From 1cf8dfe8a661f0462925df943140e9f6d1ea5233 Mon Sep 17 00:00:00 2001 +From: Peter Zijlstra +Date: Sat, 13 Jul 2019 11:21:25 +0200 +Subject: perf/core: Fix race between close() and fork() + +From: Peter Zijlstra + +commit 1cf8dfe8a661f0462925df943140e9f6d1ea5233 upstream. + +Syzcaller reported the following Use-after-Free bug: + + close() clone() + + copy_process() + perf_event_init_task() + perf_event_init_context() + mutex_lock(parent_ctx->mutex) + inherit_task_group() + inherit_group() + inherit_event() + mutex_lock(event->child_mutex) + // expose event on child list + list_add_tail() + mutex_unlock(event->child_mutex) + mutex_unlock(parent_ctx->mutex) + + ... + goto bad_fork_* + + bad_fork_cleanup_perf: + perf_event_free_task() + + perf_release() + perf_event_release_kernel() + list_for_each_entry() + mutex_lock(ctx->mutex) + mutex_lock(event->child_mutex) + // event is from the failing inherit + // on the other CPU + perf_remove_from_context() + list_move() + mutex_unlock(event->child_mutex) + mutex_unlock(ctx->mutex) + + mutex_lock(ctx->mutex) + list_for_each_entry_safe() + // event already stolen + mutex_unlock(ctx->mutex) + + delayed_free_task() + free_task() + + list_for_each_entry_safe() + list_del() + free_event() + _free_event() + // and so event->hw.target + // is the already freed failed clone() + if (event->hw.target) + put_task_struct(event->hw.target) + // WHOOPSIE, already quite dead + +Which puts the lie to the the comment on perf_event_free_task(): +'unexposed, unused context' not so much. + +Which is a 'fun' confluence of fail; copy_process() doing an +unconditional free_task() and not respecting refcounts, and perf having +creative locking. In particular: + + 82d94856fa22 ("perf/core: Fix lock inversion between perf,trace,cpuhp") + +seems to have overlooked this 'fun' parade. + +Solve it by using the fact that detached events still have a reference +count on their (previous) context. With this perf_event_free_task() +can detect when events have escaped and wait for their destruction. + +Debugged-by: Alexander Shishkin +Reported-by: syzbot+a24c397a29ad22d86c98@syzkaller.appspotmail.com +Signed-off-by: Peter Zijlstra (Intel) +Acked-by: Mark Rutland +Cc: +Cc: Alexander Shishkin +Cc: Arnaldo Carvalho de Melo +Cc: Jiri Olsa +Cc: Linus Torvalds +Cc: Peter Zijlstra +Cc: Stephane Eranian +Cc: Thomas Gleixner +Cc: Vince Weaver +Fixes: 82d94856fa22 ("perf/core: Fix lock inversion between perf,trace,cpuhp") +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/events/core.c | 49 +++++++++++++++++++++++++++++++++++++++++-------- + 1 file changed, 41 insertions(+), 8 deletions(-) + +--- a/kernel/events/core.c ++++ b/kernel/events/core.c +@@ -4459,12 +4459,20 @@ static void _free_event(struct perf_even + if (event->destroy) + event->destroy(event); + +- if (event->ctx) +- put_ctx(event->ctx); +- ++ /* ++ * Must be after ->destroy(), due to uprobe_perf_close() using ++ * hw.target. ++ */ + if (event->hw.target) + put_task_struct(event->hw.target); + ++ /* ++ * perf_event_free_task() relies on put_ctx() being 'last', in particular ++ * all task references must be cleaned up. ++ */ ++ if (event->ctx) ++ put_ctx(event->ctx); ++ + exclusive_event_destroy(event); + module_put(event->pmu->module); + +@@ -4644,8 +4652,17 @@ again: + mutex_unlock(&event->child_mutex); + + list_for_each_entry_safe(child, tmp, &free_list, child_list) { ++ void *var = &child->ctx->refcount; ++ + list_del(&child->child_list); + free_event(child); ++ ++ /* ++ * Wake any perf_event_free_task() waiting for this event to be ++ * freed. ++ */ ++ smp_mb(); /* pairs with wait_var_event() */ ++ wake_up_var(var); + } + + no_ctx: +@@ -11506,11 +11523,11 @@ static void perf_free_event(struct perf_ + } + + /* +- * Free an unexposed, unused context as created by inheritance by +- * perf_event_init_task below, used by fork() in case of fail. ++ * Free a context as created by inheritance by perf_event_init_task() below, ++ * used by fork() in case of fail. + * +- * Not all locks are strictly required, but take them anyway to be nice and +- * help out with the lockdep assertions. ++ * Even though the task has never lived, the context and events have been ++ * exposed through the child_list, so we must take care tearing it all down. + */ + void perf_event_free_task(struct task_struct *task) + { +@@ -11540,7 +11557,23 @@ void perf_event_free_task(struct task_st + perf_free_event(event, ctx); + + mutex_unlock(&ctx->mutex); +- put_ctx(ctx); ++ ++ /* ++ * perf_event_release_kernel() could've stolen some of our ++ * child events and still have them on its free_list. In that ++ * case we must wait for these events to have been freed (in ++ * particular all their references to this task must've been ++ * dropped). ++ * ++ * Without this copy_process() will unconditionally free this ++ * task (irrespective of its reference count) and ++ * _free_event()'s put_task_struct(event->hw.target) will be a ++ * use-after-free. ++ * ++ * Wait for all events to drop their context reference. ++ */ ++ wait_var_event(&ctx->refcount, refcount_read(&ctx->refcount) == 1); ++ put_ctx(ctx); /* must be last */ + } + } + diff --git a/queue-5.1/perf-script-assume-native_arch-for-pipe-mode.patch b/queue-5.1/perf-script-assume-native_arch-for-pipe-mode.patch new file mode 100644 index 00000000000..2a0a9cdabc3 --- /dev/null +++ b/queue-5.1/perf-script-assume-native_arch-for-pipe-mode.patch @@ -0,0 +1,55 @@ +From 9d49169c5958e429ffa6874fbef734ae7502ad65 Mon Sep 17 00:00:00 2001 +From: Song Liu +Date: Thu, 20 Jun 2019 18:44:38 -0700 +Subject: perf script: Assume native_arch for pipe mode + +From: Song Liu + +commit 9d49169c5958e429ffa6874fbef734ae7502ad65 upstream. + +In pipe mode, session->header.env.arch is not populated until the events +are processed. Therefore, the following command crashes: + + perf record -o - | perf script + +(gdb) bt + +It fails when we try to compare env.arch against uts.machine: + + if (!strcmp(uts.machine, session->header.env.arch) || + (!strcmp(uts.machine, "x86_64") && + !strcmp(session->header.env.arch, "i386"))) + native_arch = true; + +In pipe mode, it is tricky to find env.arch at this stage. To keep it +simple, let's just assume native_arch is always true for pipe mode. + +Reported-by: David Carrillo Cisneros +Signed-off-by: Song Liu +Tested-by: Arnaldo Carvalho de Melo +Cc: Andi Kleen +Cc: Jiri Olsa +Cc: Namhyung Kim +Cc: kernel-team@fb.com +Cc: stable@vger.kernel.org #v5.1+ +Fixes: 3ab481a1cfe1 ("perf script: Support insn output for normal samples") +Link: http://lkml.kernel.org/r/20190621014438.810342-1-songliubraving@fb.com +Signed-off-by: Arnaldo Carvalho de Melo +Signed-off-by: Greg Kroah-Hartman + +--- + tools/perf/builtin-script.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/tools/perf/builtin-script.c ++++ b/tools/perf/builtin-script.c +@@ -3669,7 +3669,8 @@ int cmd_script(int argc, const char **ar + goto out_delete; + + uname(&uts); +- if (!strcmp(uts.machine, session->header.env.arch) || ++ if (data.is_pipe || /* assume pipe_mode indicates native_arch */ ++ !strcmp(uts.machine, session->header.env.arch) || + (!strcmp(uts.machine, "x86_64") && + !strcmp(session->header.env.arch, "i386"))) + native_arch = true; diff --git a/queue-5.1/series b/queue-5.1/series index 3778b0ae08c..c119dd336c8 100644 --- a/queue-5.1/series +++ b/queue-5.1/series @@ -40,3 +40,17 @@ net-mlx5e-fix-port-tunnel-gre-entropy-control.patch net-mlx5e-rx-fix-checksum-calculation-for-new-hardware.patch net-mlx5e-fix-return-value-from-timeout-recover-function.patch net-mlx5e-fix-error-flow-in-tx-reporter-diagnose.patch +dma-buf-balance-refcount-inbalance.patch +dma-buf-discard-old-fence_excl-on-retrying-get_fences_rcu-for-realloc.patch +gpiolib-of-fix-a-memory-leak-in-of_gpio_flags_quirks.patch +gpio-davinci-silence-error-prints-in-case-of-eprobe_defer.patch +mips-lb60-fix-pin-mappings.patch +perf-script-assume-native_arch-for-pipe-mode.patch +perf-core-fix-exclusive-events-grouping.patch +perf-core-fix-race-between-close-and-fork.patch +ext4-don-t-allow-any-modifications-to-an-immutable-file.patch +ext4-enforce-the-immutable-flag-on-open-files.patch +mm-add-filemap_fdatawait_range_keep_errors.patch +jbd2-introduce-jbd2_inode-dirty-range-scoping.patch +ext4-use-jbd2_inode-dirty-range-scoping.patch +ext4-allow-directory-holes.patch