]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
4.14-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Tue, 24 Mar 2020 08:02:37 +0000 (09:02 +0100)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Tue, 24 Mar 2020 08:02:37 +0000 (09:02 +0100)
added patches:
futex-fix-inode-life-time-issue.patch
futex-unbreak-futex-hashing.patch

queue-4.14/futex-fix-inode-life-time-issue.patch [new file with mode: 0644]
queue-4.14/futex-unbreak-futex-hashing.patch [new file with mode: 0644]
queue-4.14/series

diff --git a/queue-4.14/futex-fix-inode-life-time-issue.patch b/queue-4.14/futex-fix-inode-life-time-issue.patch
new file mode 100644 (file)
index 0000000..2b332c7
--- /dev/null
@@ -0,0 +1,222 @@
+From 8019ad13ef7f64be44d4f892af9c840179009254 Mon Sep 17 00:00:00 2001
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Wed, 4 Mar 2020 11:28:31 +0100
+Subject: futex: Fix inode life-time issue
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 8019ad13ef7f64be44d4f892af9c840179009254 upstream.
+
+As reported by Jann, ihold() does not in fact guarantee inode
+persistence. And instead of making it so, replace the usage of inode
+pointers with a per boot, machine wide, unique inode identifier.
+
+This sequence number is global, but shared (file backed) futexes are
+rare enough that this should not become a performance issue.
+
+Reported-by: Jann Horn <jannh@google.com>
+Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/inode.c            |    1 
+ include/linux/fs.h    |    1 
+ include/linux/futex.h |   17 +++++----
+ kernel/futex.c        |   89 +++++++++++++++++++++++++++++---------------------
+ 4 files changed, 65 insertions(+), 43 deletions(-)
+
+--- a/fs/inode.c
++++ b/fs/inode.c
+@@ -135,6 +135,7 @@ int inode_init_always(struct super_block
+       inode->i_sb = sb;
+       inode->i_blkbits = sb->s_blocksize_bits;
+       inode->i_flags = 0;
++      atomic64_set(&inode->i_sequence, 0);
+       atomic_set(&inode->i_count, 1);
+       inode->i_op = &empty_iops;
+       inode->i_fop = &no_open_fops;
+--- a/include/linux/fs.h
++++ b/include/linux/fs.h
+@@ -645,6 +645,7 @@ struct inode {
+               struct rcu_head         i_rcu;
+       };
+       u64                     i_version;
++      atomic64_t              i_sequence; /* see futex */
+       atomic_t                i_count;
+       atomic_t                i_dio_count;
+       atomic_t                i_writecount;
+--- a/include/linux/futex.h
++++ b/include/linux/futex.h
+@@ -34,23 +34,26 @@ long do_futex(u32 __user *uaddr, int op,
+ union futex_key {
+       struct {
++              u64 i_seq;
+               unsigned long pgoff;
+-              struct inode *inode;
+-              int offset;
++              unsigned int offset;
+       } shared;
+       struct {
++              union {
++                      struct mm_struct *mm;
++                      u64 __tmp;
++              };
+               unsigned long address;
+-              struct mm_struct *mm;
+-              int offset;
++              unsigned int offset;
+       } private;
+       struct {
++              u64 ptr;
+               unsigned long word;
+-              void *ptr;
+-              int offset;
++              unsigned int offset;
+       } both;
+ };
+-#define FUTEX_KEY_INIT (union futex_key) { .both = { .ptr = NULL } }
++#define FUTEX_KEY_INIT (union futex_key) { .both = { .ptr = 0ULL } }
+ #ifdef CONFIG_FUTEX
+ enum {
+--- a/kernel/futex.c
++++ b/kernel/futex.c
+@@ -445,7 +445,7 @@ static void get_futex_key_refs(union fut
+       switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
+       case FUT_OFF_INODE:
+-              ihold(key->shared.inode); /* implies smp_mb(); (B) */
++              smp_mb();               /* explicit smp_mb(); (B) */
+               break;
+       case FUT_OFF_MMSHARED:
+               futex_get_mm(key); /* implies smp_mb(); (B) */
+@@ -479,7 +479,6 @@ static void drop_futex_key_refs(union fu
+       switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
+       case FUT_OFF_INODE:
+-              iput(key->shared.inode);
+               break;
+       case FUT_OFF_MMSHARED:
+               mmdrop(key->private.mm);
+@@ -487,6 +486,46 @@ static void drop_futex_key_refs(union fu
+       }
+ }
++/*
++ * Generate a machine wide unique identifier for this inode.
++ *
++ * This relies on u64 not wrapping in the life-time of the machine; which with
++ * 1ns resolution means almost 585 years.
++ *
++ * This further relies on the fact that a well formed program will not unmap
++ * the file while it has a (shared) futex waiting on it. This mapping will have
++ * a file reference which pins the mount and inode.
++ *
++ * If for some reason an inode gets evicted and read back in again, it will get
++ * a new sequence number and will _NOT_ match, even though it is the exact same
++ * file.
++ *
++ * It is important that match_futex() will never have a false-positive, esp.
++ * for PI futexes that can mess up the state. The above argues that false-negatives
++ * are only possible for malformed programs.
++ */
++static u64 get_inode_sequence_number(struct inode *inode)
++{
++      static atomic64_t i_seq;
++      u64 old;
++
++      /* Does the inode already have a sequence number? */
++      old = atomic64_read(&inode->i_sequence);
++      if (likely(old))
++              return old;
++
++      for (;;) {
++              u64 new = atomic64_add_return(1, &i_seq);
++              if (WARN_ON_ONCE(!new))
++                      continue;
++
++              old = atomic64_cmpxchg_relaxed(&inode->i_sequence, 0, new);
++              if (old)
++                      return old;
++              return new;
++      }
++}
++
+ /**
+  * get_futex_key() - Get parameters which are the keys for a futex
+  * @uaddr:    virtual address of the futex
+@@ -499,9 +538,15 @@ static void drop_futex_key_refs(union fu
+  *
+  * The key words are stored in @key on success.
+  *
+- * For shared mappings, it's (page->index, file_inode(vma->vm_file),
+- * offset_within_page).  For private mappings, it's (uaddr, current->mm).
+- * We can usually work out the index without swapping in the page.
++ * For shared mappings (when @fshared), the key is:
++ *   ( inode->i_sequence, page->index, offset_within_page )
++ * [ also see get_inode_sequence_number() ]
++ *
++ * For private mappings (or when !@fshared), the key is:
++ *   ( current->mm, address, 0 )
++ *
++ * This allows (cross process, where applicable) identification of the futex
++ * without keeping the page pinned for the duration of the FUTEX_WAIT.
+  *
+  * lock_page() might sleep, the caller should not hold a spinlock.
+  */
+@@ -641,8 +686,6 @@ again:
+               key->private.mm = mm;
+               key->private.address = address;
+-              get_futex_key_refs(key); /* implies smp_mb(); (B) */
+-
+       } else {
+               struct inode *inode;
+@@ -674,40 +717,14 @@ again:
+                       goto again;
+               }
+-              /*
+-               * Take a reference unless it is about to be freed. Previously
+-               * this reference was taken by ihold under the page lock
+-               * pinning the inode in place so i_lock was unnecessary. The
+-               * only way for this check to fail is if the inode was
+-               * truncated in parallel which is almost certainly an
+-               * application bug. In such a case, just retry.
+-               *
+-               * We are not calling into get_futex_key_refs() in file-backed
+-               * cases, therefore a successful atomic_inc return below will
+-               * guarantee that get_futex_key() will still imply smp_mb(); (B).
+-               */
+-              if (!atomic_inc_not_zero(&inode->i_count)) {
+-                      rcu_read_unlock();
+-                      put_page(page);
+-
+-                      goto again;
+-              }
+-
+-              /* Should be impossible but lets be paranoid for now */
+-              if (WARN_ON_ONCE(inode->i_mapping != mapping)) {
+-                      err = -EFAULT;
+-                      rcu_read_unlock();
+-                      iput(inode);
+-
+-                      goto out;
+-              }
+-
+               key->both.offset |= FUT_OFF_INODE; /* inode-based key */
+-              key->shared.inode = inode;
++              key->shared.i_seq = get_inode_sequence_number(inode);
+               key->shared.pgoff = basepage_index(tail);
+               rcu_read_unlock();
+       }
++      get_futex_key_refs(key); /* implies smp_mb(); (B) */
++
+ out:
+       put_page(page);
+       return err;
diff --git a/queue-4.14/futex-unbreak-futex-hashing.patch b/queue-4.14/futex-unbreak-futex-hashing.patch
new file mode 100644 (file)
index 0000000..c9197d1
--- /dev/null
@@ -0,0 +1,45 @@
+From 8d67743653dce5a0e7aa500fcccb237cde7ad88e Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Sun, 8 Mar 2020 19:07:17 +0100
+Subject: futex: Unbreak futex hashing
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+commit 8d67743653dce5a0e7aa500fcccb237cde7ad88e upstream.
+
+The recent futex inode life time fix changed the ordering of the futex key
+union struct members, but forgot to adjust the hash function accordingly,
+
+As a result the hashing omits the leading 64bit and even hashes beyond the
+futex key causing a bad hash distribution which led to a ~100% performance
+regression.
+
+Hand in the futex key pointer instead of a random struct member and make
+the size calculation based of the struct offset.
+
+Fixes: 8019ad13ef7f ("futex: Fix inode life-time issue")
+Reported-by: Rong Chen <rong.a.chen@intel.com>
+Decoded-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Tested-by: Rong Chen <rong.a.chen@intel.com>
+Link: https://lkml.kernel.org/r/87h7yy90ve.fsf@nanos.tec.linutronix.de
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/futex.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/kernel/futex.c
++++ b/kernel/futex.c
+@@ -401,9 +401,9 @@ static inline int hb_waiters_pending(str
+  */
+ static struct futex_hash_bucket *hash_futex(union futex_key *key)
+ {
+-      u32 hash = jhash2((u32*)&key->both.word,
+-                        (sizeof(key->both.word)+sizeof(key->both.ptr))/4,
++      u32 hash = jhash2((u32 *)key, offsetof(typeof(*key), both.offset) / 4,
+                         key->both.offset);
++
+       return &futex_queues[hash & (futex_hashsize - 1)];
+ }
index 7378958d1c274d44b8f1b7b081c486c613d26b16..803245dfe89485bccd1a456307e93e810b878ac1 100644 (file)
@@ -42,3 +42,5 @@ usb-cdc-acm-fix-rounding-error-in-tiocsserial.patch
 iio-adc-at91-sama5d2_adc-fix-channel-configuration-f.patch
 iio-adc-at91-sama5d2_adc-fix-differential-channels-i.patch
 kbuild-disable-wpointer-to-enum-cast.patch
+futex-fix-inode-life-time-issue.patch
+futex-unbreak-futex-hashing.patch