]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
4.9-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 26 Jul 2019 13:20:52 +0000 (15:20 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 26 Jul 2019 13:20:52 +0000 (15:20 +0200)
added patches:
ext4-allow-directory-holes.patch
ext4-don-t-allow-any-modifications-to-an-immutable-file.patch
perf-core-fix-exclusive-events-grouping.patch

queue-4.9/ext4-allow-directory-holes.patch [new file with mode: 0644]
queue-4.9/ext4-don-t-allow-any-modifications-to-an-immutable-file.patch [new file with mode: 0644]
queue-4.9/perf-core-fix-exclusive-events-grouping.patch [new file with mode: 0644]
queue-4.9/series

diff --git a/queue-4.9/ext4-allow-directory-holes.patch b/queue-4.9/ext4-allow-directory-holes.patch
new file mode 100644 (file)
index 0000000..dcb6d51
--- /dev/null
@@ -0,0 +1,198 @@
+From 4e19d6b65fb4fc42e352ce9883649e049da14743 Mon Sep 17 00:00:00 2001
+From: Theodore Ts'o <tytso@mit.edu>
+Date: Thu, 20 Jun 2019 21:19:02 -0400
+Subject: ext4: allow directory holes
+
+From: Theodore Ts'o <tytso@mit.edu>
+
+commit 4e19d6b65fb4fc42e352ce9883649e049da14743 upstream.
+
+The largedir feature was intended to allow ext4 directories to have
+unmapped directory blocks (e.g., directory holes).  And so the
+released e2fsprogs no longer enforces this for largedir file systems;
+however, the corresponding change to the kernel-side code was not made.
+
+This commit fixes this oversight.
+
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Cc: stable@kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ext4/dir.c   |   19 +++++++++----------
+ fs/ext4/namei.c |   45 +++++++++++++++++++++++++++++++++++++--------
+ 2 files changed, 46 insertions(+), 18 deletions(-)
+
+--- a/fs/ext4/dir.c
++++ b/fs/ext4/dir.c
+@@ -106,7 +106,6 @@ static int ext4_readdir(struct file *fil
+       struct inode *inode = file_inode(file);
+       struct super_block *sb = inode->i_sb;
+       struct buffer_head *bh = NULL;
+-      int dir_has_error = 0;
+       struct fscrypt_str fstr = FSTR_INIT(NULL, 0);
+       if (ext4_encrypted_inode(inode)) {
+@@ -142,8 +141,6 @@ static int ext4_readdir(struct file *fil
+                       return err;
+       }
+-      offset = ctx->pos & (sb->s_blocksize - 1);
+-
+       while (ctx->pos < inode->i_size) {
+               struct ext4_map_blocks map;
+@@ -152,9 +149,18 @@ static int ext4_readdir(struct file *fil
+                       goto errout;
+               }
+               cond_resched();
++              offset = ctx->pos & (sb->s_blocksize - 1);
+               map.m_lblk = ctx->pos >> EXT4_BLOCK_SIZE_BITS(sb);
+               map.m_len = 1;
+               err = ext4_map_blocks(NULL, inode, &map, 0);
++              if (err == 0) {
++                      /* m_len should never be zero but let's avoid
++                       * an infinite loop if it somehow is */
++                      if (map.m_len == 0)
++                              map.m_len = 1;
++                      ctx->pos += map.m_len * sb->s_blocksize;
++                      continue;
++              }
+               if (err > 0) {
+                       pgoff_t index = map.m_pblk >>
+                                       (PAGE_SHIFT - inode->i_blkbits);
+@@ -173,13 +179,6 @@ static int ext4_readdir(struct file *fil
+               }
+               if (!bh) {
+-                      if (!dir_has_error) {
+-                              EXT4_ERROR_FILE(file, 0,
+-                                              "directory contains a "
+-                                              "hole at offset %llu",
+-                                         (unsigned long long) ctx->pos);
+-                              dir_has_error = 1;
+-                      }
+                       /* corrupt size?  Maybe no more blocks to read */
+                       if (ctx->pos > inode->i_blocks << 9)
+                               break;
+--- a/fs/ext4/namei.c
++++ b/fs/ext4/namei.c
+@@ -79,8 +79,18 @@ static struct buffer_head *ext4_append(h
+ static int ext4_dx_csum_verify(struct inode *inode,
+                              struct ext4_dir_entry *dirent);
++/*
++ * Hints to ext4_read_dirblock regarding whether we expect a directory
++ * block being read to be an index block, or a block containing
++ * directory entries (and if the latter, whether it was found via a
++ * logical block in an htree index block).  This is used to control
++ * what sort of sanity checkinig ext4_read_dirblock() will do on the
++ * directory block read from the storage device.  EITHER will means
++ * the caller doesn't know what kind of directory block will be read,
++ * so no specific verification will be done.
++ */
+ typedef enum {
+-      EITHER, INDEX, DIRENT
++      EITHER, INDEX, DIRENT, DIRENT_HTREE
+ } dirblock_type_t;
+ #define ext4_read_dirblock(inode, block, type) \
+@@ -106,11 +116,14 @@ static struct buffer_head *__ext4_read_d
+               return bh;
+       }
+-      if (!bh) {
++      if (!bh && (type == INDEX || type == DIRENT_HTREE)) {
+               ext4_error_inode(inode, func, line, block,
+-                               "Directory hole found");
++                               "Directory hole found for htree %s block",
++                               (type == INDEX) ? "index" : "leaf");
+               return ERR_PTR(-EFSCORRUPTED);
+       }
++      if (!bh)
++              return NULL;
+       dirent = (struct ext4_dir_entry *) bh->b_data;
+       /* Determine whether or not we have an index block */
+       if (is_dx(inode)) {
+@@ -960,7 +973,7 @@ static int htree_dirblock_to_tree(struct
+       dxtrace(printk(KERN_INFO "In htree dirblock_to_tree: block %lu\n",
+                                                       (unsigned long)block));
+-      bh = ext4_read_dirblock(dir, block, DIRENT);
++      bh = ext4_read_dirblock(dir, block, DIRENT_HTREE);
+       if (IS_ERR(bh))
+               return PTR_ERR(bh);
+@@ -1537,7 +1550,7 @@ static struct buffer_head * ext4_dx_find
+               return (struct buffer_head *) frame;
+       do {
+               block = dx_get_block(frame->at);
+-              bh = ext4_read_dirblock(dir, block, DIRENT);
++              bh = ext4_read_dirblock(dir, block, DIRENT_HTREE);
+               if (IS_ERR(bh))
+                       goto errout;
+@@ -2142,6 +2155,11 @@ static int ext4_add_entry(handle_t *hand
+       blocks = dir->i_size >> sb->s_blocksize_bits;
+       for (block = 0; block < blocks; block++) {
+               bh = ext4_read_dirblock(dir, block, DIRENT);
++              if (bh == NULL) {
++                      bh = ext4_bread(handle, dir, block,
++                                      EXT4_GET_BLOCKS_CREATE);
++                      goto add_to_new_block;
++              }
+               if (IS_ERR(bh)) {
+                       retval = PTR_ERR(bh);
+                       bh = NULL;
+@@ -2162,6 +2180,7 @@ static int ext4_add_entry(handle_t *hand
+               brelse(bh);
+       }
+       bh = ext4_append(handle, dir, &block);
++add_to_new_block:
+       if (IS_ERR(bh)) {
+               retval = PTR_ERR(bh);
+               bh = NULL;
+@@ -2203,7 +2222,7 @@ static int ext4_dx_add_entry(handle_t *h
+               return PTR_ERR(frame);
+       entries = frame->entries;
+       at = frame->at;
+-      bh = ext4_read_dirblock(dir, dx_get_block(frame->at), DIRENT);
++      bh = ext4_read_dirblock(dir, dx_get_block(frame->at), DIRENT_HTREE);
+       if (IS_ERR(bh)) {
+               err = PTR_ERR(bh);
+               bh = NULL;
+@@ -2719,7 +2738,10 @@ bool ext4_empty_dir(struct inode *inode)
+               EXT4_ERROR_INODE(inode, "invalid size");
+               return true;
+       }
+-      bh = ext4_read_dirblock(inode, 0, EITHER);
++      /* The first directory block must not be a hole,
++       * so treat it as DIRENT_HTREE
++       */
++      bh = ext4_read_dirblock(inode, 0, DIRENT_HTREE);
+       if (IS_ERR(bh))
+               return true;
+@@ -2741,6 +2763,10 @@ bool ext4_empty_dir(struct inode *inode)
+                       brelse(bh);
+                       lblock = offset >> EXT4_BLOCK_SIZE_BITS(sb);
+                       bh = ext4_read_dirblock(inode, lblock, EITHER);
++                      if (bh == NULL) {
++                              offset += sb->s_blocksize;
++                              continue;
++                      }
+                       if (IS_ERR(bh))
+                               return true;
+                       de = (struct ext4_dir_entry_2 *) bh->b_data;
+@@ -3302,7 +3328,10 @@ static struct buffer_head *ext4_get_firs
+       struct buffer_head *bh;
+       if (!ext4_has_inline_data(inode)) {
+-              bh = ext4_read_dirblock(inode, 0, EITHER);
++              /* The first directory block must not be a hole, so
++               * treat it as DIRENT_HTREE
++               */
++              bh = ext4_read_dirblock(inode, 0, DIRENT_HTREE);
+               if (IS_ERR(bh)) {
+                       *retval = PTR_ERR(bh);
+                       return NULL;
diff --git a/queue-4.9/ext4-don-t-allow-any-modifications-to-an-immutable-file.patch b/queue-4.9/ext4-don-t-allow-any-modifications-to-an-immutable-file.patch
new file mode 100644 (file)
index 0000000..a7527e1
--- /dev/null
@@ -0,0 +1,98 @@
+From 2e53840362771c73eb0a5ff71611507e64e8eecd Mon Sep 17 00:00:00 2001
+From: "Darrick J. Wong" <darrick.wong@oracle.com>
+Date: Sun, 9 Jun 2019 21:41:41 -0400
+Subject: ext4: don't allow any modifications to an immutable file
+
+From: Darrick J. Wong <darrick.wong@oracle.com>
+
+commit 2e53840362771c73eb0a5ff71611507e64e8eecd upstream.
+
+Don't allow any modifications to a file that's marked immutable, which
+means that we have to flush all the writable pages to make the readonly
+and we have to check the setattr/setflags parameters more closely.
+
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Cc: stable@kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ext4/ioctl.c |   46 +++++++++++++++++++++++++++++++++++++++++++++-
+ 1 file changed, 45 insertions(+), 1 deletion(-)
+
+--- a/fs/ext4/ioctl.c
++++ b/fs/ext4/ioctl.c
+@@ -201,6 +201,29 @@ static int uuid_is_zero(__u8 u[16])
+       return 1;
+ }
++/*
++ * If immutable is set and we are not clearing it, we're not allowed to change
++ * anything else in the inode.  Don't error out if we're only trying to set
++ * immutable on an immutable file.
++ */
++static int ext4_ioctl_check_immutable(struct inode *inode, __u32 new_projid,
++                                    unsigned int flags)
++{
++      struct ext4_inode_info *ei = EXT4_I(inode);
++      unsigned int oldflags = ei->i_flags;
++
++      if (!(oldflags & EXT4_IMMUTABLE_FL) || !(flags & EXT4_IMMUTABLE_FL))
++              return 0;
++
++      if ((oldflags & ~EXT4_IMMUTABLE_FL) != (flags & ~EXT4_IMMUTABLE_FL))
++              return -EPERM;
++      if (ext4_has_feature_project(inode->i_sb) &&
++          __kprojid_val(ei->i_projid) != new_projid)
++              return -EPERM;
++
++      return 0;
++}
++
+ static int ext4_ioctl_setflags(struct inode *inode,
+                              unsigned int flags)
+ {
+@@ -251,6 +274,20 @@ static int ext4_ioctl_setflags(struct in
+       } else if (oldflags & EXT4_EOFBLOCKS_FL)
+               ext4_truncate(inode);
++      /*
++       * Wait for all pending directio and then flush all the dirty pages
++       * for this file.  The flush marks all the pages readonly, so any
++       * subsequent attempt to write to the file (particularly mmap pages)
++       * will come through the filesystem and fail.
++       */
++      if (S_ISREG(inode->i_mode) && !IS_IMMUTABLE(inode) &&
++          (flags & EXT4_IMMUTABLE_FL)) {
++              inode_dio_wait(inode);
++              err = filemap_write_and_wait(inode->i_mapping);
++              if (err)
++                      goto flags_out;
++      }
++
+       handle = ext4_journal_start(inode, EXT4_HT_INODE, 1);
+       if (IS_ERR(handle)) {
+               err = PTR_ERR(handle);
+@@ -462,7 +499,11 @@ long ext4_ioctl(struct file *filp, unsig
+               flags = ext4_mask_flags(inode->i_mode, flags);
+               inode_lock(inode);
+-              err = ext4_ioctl_setflags(inode, flags);
++              err = ext4_ioctl_check_immutable(inode,
++                              from_kprojid(&init_user_ns, ei->i_projid),
++                              flags);
++              if (!err)
++                      err = ext4_ioctl_setflags(inode, flags);
+               inode_unlock(inode);
+               mnt_drop_write_file(filp);
+               return err;
+@@ -884,6 +925,9 @@ resizefs_out:
+               inode_lock(inode);
+               flags = (ei->i_flags & ~EXT4_FL_XFLAG_VISIBLE) |
+                        (flags & EXT4_FL_XFLAG_VISIBLE);
++              err = ext4_ioctl_check_immutable(inode, fa.fsx_projid, flags);
++              if (err)
++                      goto out;
+               err = ext4_ioctl_setflags(inode, flags);
+               inode_unlock(inode);
+               mnt_drop_write_file(filp);
diff --git a/queue-4.9/perf-core-fix-exclusive-events-grouping.patch b/queue-4.9/perf-core-fix-exclusive-events-grouping.patch
new file mode 100644 (file)
index 0000000..47dec77
--- /dev/null
@@ -0,0 +1,171 @@
+From 8a58ddae23796c733c5dfbd717538d89d036c5bd Mon Sep 17 00:00:00 2001
+From: Alexander Shishkin <alexander.shishkin@linux.intel.com>
+Date: Mon, 1 Jul 2019 14:07:55 +0300
+Subject: perf/core: Fix exclusive events' grouping
+
+From: Alexander Shishkin <alexander.shishkin@linux.intel.com>
+
+commit 8a58ddae23796c733c5dfbd717538d89d036c5bd upstream.
+
+So far, we tried to disallow grouping exclusive events for the fear of
+complications they would cause with moving between contexts. Specifically,
+moving a software group to a hardware context would violate the exclusivity
+rules if both groups contain matching exclusive events.
+
+This attempt was, however, unsuccessful: the check that we have in the
+perf_event_open() syscall is both wrong (looks at wrong PMU) and
+insufficient (group leader may still be exclusive), as can be illustrated
+by running:
+
+  $ perf record -e '{intel_pt//,cycles}' uname
+  $ perf record -e '{cycles,intel_pt//}' uname
+
+ultimately successfully.
+
+Furthermore, we are completely free to trigger the exclusivity violation
+by:
+
+   perf -e '{cycles,intel_pt//}' -e '{intel_pt//,instructions}'
+
+even though the helpful perf record will not allow that, the ABI will.
+
+The warning later in the perf_event_open() path will also not trigger, because
+it's also wrong.
+
+Fix all this by validating the original group before moving, getting rid
+of broken safeguards and placing a useful one to perf_install_in_context().
+
+Signed-off-by: Alexander Shishkin <alexander.shishkin@linux.intel.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Cc: <stable@vger.kernel.org>
+Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
+Cc: Jiri Olsa <jolsa@redhat.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Stephane Eranian <eranian@google.com>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: Vince Weaver <vincent.weaver@maine.edu>
+Cc: mathieu.poirier@linaro.org
+Cc: will.deacon@arm.com
+Fixes: bed5b25ad9c8a ("perf: Add a pmu capability for "exclusive" events")
+Link: https://lkml.kernel.org/r/20190701110755.24646-1-alexander.shishkin@linux.intel.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/linux/perf_event.h |    5 +++++
+ kernel/events/core.c       |   34 ++++++++++++++++++++++------------
+ 2 files changed, 27 insertions(+), 12 deletions(-)
+
+--- a/include/linux/perf_event.h
++++ b/include/linux/perf_event.h
+@@ -1016,6 +1016,11 @@ static inline int is_software_event(stru
+       return event->event_caps & PERF_EV_CAP_SOFTWARE;
+ }
++static inline int is_exclusive_pmu(struct pmu *pmu)
++{
++      return pmu->capabilities & PERF_PMU_CAP_EXCLUSIVE;
++}
++
+ extern struct static_key perf_swevent_enabled[PERF_COUNT_SW_MAX];
+ extern void ___perf_sw_event(u32, u64, struct pt_regs *, u64);
+--- a/kernel/events/core.c
++++ b/kernel/events/core.c
+@@ -2324,6 +2324,9 @@ unlock:
+       return ret;
+ }
++static bool exclusive_event_installable(struct perf_event *event,
++                                      struct perf_event_context *ctx);
++
+ /*
+  * Attach a performance event to a context.
+  *
+@@ -2338,6 +2341,8 @@ perf_install_in_context(struct perf_even
+       lockdep_assert_held(&ctx->mutex);
++      WARN_ON_ONCE(!exclusive_event_installable(event, ctx));
++
+       if (event->cpu != -1)
+               event->cpu = cpu;
+@@ -3994,7 +3999,7 @@ static int exclusive_event_init(struct p
+ {
+       struct pmu *pmu = event->pmu;
+-      if (!(pmu->capabilities & PERF_PMU_CAP_EXCLUSIVE))
++      if (!is_exclusive_pmu(pmu))
+               return 0;
+       /*
+@@ -4025,7 +4030,7 @@ static void exclusive_event_destroy(stru
+ {
+       struct pmu *pmu = event->pmu;
+-      if (!(pmu->capabilities & PERF_PMU_CAP_EXCLUSIVE))
++      if (!is_exclusive_pmu(pmu))
+               return;
+       /* see comment in exclusive_event_init() */
+@@ -4045,14 +4050,15 @@ static bool exclusive_event_match(struct
+       return false;
+ }
+-/* Called under the same ctx::mutex as perf_install_in_context() */
+ static bool exclusive_event_installable(struct perf_event *event,
+                                       struct perf_event_context *ctx)
+ {
+       struct perf_event *iter_event;
+       struct pmu *pmu = event->pmu;
+-      if (!(pmu->capabilities & PERF_PMU_CAP_EXCLUSIVE))
++      lockdep_assert_held(&ctx->mutex);
++
++      if (!is_exclusive_pmu(pmu))
+               return true;
+       list_for_each_entry(iter_event, &ctx->event_list, event_entry) {
+@@ -9833,11 +9839,6 @@ SYSCALL_DEFINE5(perf_event_open,
+               goto err_alloc;
+       }
+-      if ((pmu->capabilities & PERF_PMU_CAP_EXCLUSIVE) && group_leader) {
+-              err = -EBUSY;
+-              goto err_context;
+-      }
+-
+       /*
+        * Look up the group leader (we will attach this event to it):
+        */
+@@ -9925,6 +9926,18 @@ SYSCALL_DEFINE5(perf_event_open,
+                               move_group = 0;
+                       }
+               }
++
++              /*
++               * Failure to create exclusive events returns -EBUSY.
++               */
++              err = -EBUSY;
++              if (!exclusive_event_installable(group_leader, ctx))
++                      goto err_locked;
++
++              for_each_sibling_event(sibling, group_leader) {
++                      if (!exclusive_event_installable(sibling, ctx))
++                              goto err_locked;
++              }
+       } else {
+               mutex_lock(&ctx->mutex);
+       }
+@@ -9944,9 +9957,6 @@ SYSCALL_DEFINE5(perf_event_open,
+        * because we need to serialize with concurrent event creation.
+        */
+       if (!exclusive_event_installable(event, ctx)) {
+-              /* exclusive and group stuff are assumed mutually exclusive */
+-              WARN_ON_ONCE(move_group);
+-
+               err = -EBUSY;
+               goto err_locked;
+       }
index 00baa7bacd5db67796bfa4a28811343275cb7256..a2e60ad0245ad117e41703a07ab36e2897422c64 100644 (file)
@@ -125,3 +125,6 @@ dm-bufio-fix-deadlock-with-loop-device.patch
 compiler.h-kasan-avoid-duplicating-__read_once_size_.patch
 compiler.h-add-read_word_at_a_time-function.patch
 lib-strscpy-shut-up-kasan-false-positives-in-strscpy.patch
+perf-core-fix-exclusive-events-grouping.patch
+ext4-don-t-allow-any-modifications-to-an-immutable-file.patch
+ext4-allow-directory-holes.patch