From a2fa31f991e0576ba71e6ceb0b3a3aab7c80e16f Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 24 Mar 2020 09:02:37 +0100 Subject: [PATCH] 4.14-stable patches added patches: futex-fix-inode-life-time-issue.patch futex-unbreak-futex-hashing.patch --- .../futex-fix-inode-life-time-issue.patch | 222 ++++++++++++++++++ queue-4.14/futex-unbreak-futex-hashing.patch | 45 ++++ queue-4.14/series | 2 + 3 files changed, 269 insertions(+) create mode 100644 queue-4.14/futex-fix-inode-life-time-issue.patch create mode 100644 queue-4.14/futex-unbreak-futex-hashing.patch diff --git a/queue-4.14/futex-fix-inode-life-time-issue.patch b/queue-4.14/futex-fix-inode-life-time-issue.patch new file mode 100644 index 00000000000..2b332c7a519 --- /dev/null +++ b/queue-4.14/futex-fix-inode-life-time-issue.patch @@ -0,0 +1,222 @@ +From 8019ad13ef7f64be44d4f892af9c840179009254 Mon Sep 17 00:00:00 2001 +From: Peter Zijlstra +Date: Wed, 4 Mar 2020 11:28:31 +0100 +Subject: futex: Fix inode life-time issue + +From: Peter Zijlstra + +commit 8019ad13ef7f64be44d4f892af9c840179009254 upstream. + +As reported by Jann, ihold() does not in fact guarantee inode +persistence. And instead of making it so, replace the usage of inode +pointers with a per boot, machine wide, unique inode identifier. + +This sequence number is global, but shared (file backed) futexes are +rare enough that this should not become a performance issue. + +Reported-by: Jann Horn +Suggested-by: Linus Torvalds +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Greg Kroah-Hartman + +--- + fs/inode.c | 1 + include/linux/fs.h | 1 + include/linux/futex.h | 17 +++++---- + kernel/futex.c | 89 +++++++++++++++++++++++++++++--------------------- + 4 files changed, 65 insertions(+), 43 deletions(-) + +--- a/fs/inode.c ++++ b/fs/inode.c +@@ -135,6 +135,7 @@ int inode_init_always(struct super_block + inode->i_sb = sb; + inode->i_blkbits = sb->s_blocksize_bits; + inode->i_flags = 0; ++ atomic64_set(&inode->i_sequence, 0); + atomic_set(&inode->i_count, 1); + inode->i_op = &empty_iops; + inode->i_fop = &no_open_fops; +--- a/include/linux/fs.h ++++ b/include/linux/fs.h +@@ -645,6 +645,7 @@ struct inode { + struct rcu_head i_rcu; + }; + u64 i_version; ++ atomic64_t i_sequence; /* see futex */ + atomic_t i_count; + atomic_t i_dio_count; + atomic_t i_writecount; +--- a/include/linux/futex.h ++++ b/include/linux/futex.h +@@ -34,23 +34,26 @@ long do_futex(u32 __user *uaddr, int op, + + union futex_key { + struct { ++ u64 i_seq; + unsigned long pgoff; +- struct inode *inode; +- int offset; ++ unsigned int offset; + } shared; + struct { ++ union { ++ struct mm_struct *mm; ++ u64 __tmp; ++ }; + unsigned long address; +- struct mm_struct *mm; +- int offset; ++ unsigned int offset; + } private; + struct { ++ u64 ptr; + unsigned long word; +- void *ptr; +- int offset; ++ unsigned int offset; + } both; + }; + +-#define FUTEX_KEY_INIT (union futex_key) { .both = { .ptr = NULL } } ++#define FUTEX_KEY_INIT (union futex_key) { .both = { .ptr = 0ULL } } + + #ifdef CONFIG_FUTEX + enum { +--- a/kernel/futex.c ++++ b/kernel/futex.c +@@ -445,7 +445,7 @@ static void get_futex_key_refs(union fut + + switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) { + case FUT_OFF_INODE: +- ihold(key->shared.inode); /* implies smp_mb(); (B) */ ++ smp_mb(); /* explicit smp_mb(); (B) */ + break; + case FUT_OFF_MMSHARED: + futex_get_mm(key); /* implies smp_mb(); (B) */ +@@ -479,7 +479,6 @@ static void drop_futex_key_refs(union fu + + switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) { + case FUT_OFF_INODE: +- iput(key->shared.inode); + break; + case FUT_OFF_MMSHARED: + mmdrop(key->private.mm); +@@ -487,6 +486,46 @@ static void drop_futex_key_refs(union fu + } + } + ++/* ++ * Generate a machine wide unique identifier for this inode. ++ * ++ * This relies on u64 not wrapping in the life-time of the machine; which with ++ * 1ns resolution means almost 585 years. ++ * ++ * This further relies on the fact that a well formed program will not unmap ++ * the file while it has a (shared) futex waiting on it. This mapping will have ++ * a file reference which pins the mount and inode. ++ * ++ * If for some reason an inode gets evicted and read back in again, it will get ++ * a new sequence number and will _NOT_ match, even though it is the exact same ++ * file. ++ * ++ * It is important that match_futex() will never have a false-positive, esp. ++ * for PI futexes that can mess up the state. The above argues that false-negatives ++ * are only possible for malformed programs. ++ */ ++static u64 get_inode_sequence_number(struct inode *inode) ++{ ++ static atomic64_t i_seq; ++ u64 old; ++ ++ /* Does the inode already have a sequence number? */ ++ old = atomic64_read(&inode->i_sequence); ++ if (likely(old)) ++ return old; ++ ++ for (;;) { ++ u64 new = atomic64_add_return(1, &i_seq); ++ if (WARN_ON_ONCE(!new)) ++ continue; ++ ++ old = atomic64_cmpxchg_relaxed(&inode->i_sequence, 0, new); ++ if (old) ++ return old; ++ return new; ++ } ++} ++ + /** + * get_futex_key() - Get parameters which are the keys for a futex + * @uaddr: virtual address of the futex +@@ -499,9 +538,15 @@ static void drop_futex_key_refs(union fu + * + * The key words are stored in @key on success. + * +- * For shared mappings, it's (page->index, file_inode(vma->vm_file), +- * offset_within_page). For private mappings, it's (uaddr, current->mm). +- * We can usually work out the index without swapping in the page. ++ * For shared mappings (when @fshared), the key is: ++ * ( inode->i_sequence, page->index, offset_within_page ) ++ * [ also see get_inode_sequence_number() ] ++ * ++ * For private mappings (or when !@fshared), the key is: ++ * ( current->mm, address, 0 ) ++ * ++ * This allows (cross process, where applicable) identification of the futex ++ * without keeping the page pinned for the duration of the FUTEX_WAIT. + * + * lock_page() might sleep, the caller should not hold a spinlock. + */ +@@ -641,8 +686,6 @@ again: + key->private.mm = mm; + key->private.address = address; + +- get_futex_key_refs(key); /* implies smp_mb(); (B) */ +- + } else { + struct inode *inode; + +@@ -674,40 +717,14 @@ again: + goto again; + } + +- /* +- * Take a reference unless it is about to be freed. Previously +- * this reference was taken by ihold under the page lock +- * pinning the inode in place so i_lock was unnecessary. The +- * only way for this check to fail is if the inode was +- * truncated in parallel which is almost certainly an +- * application bug. In such a case, just retry. +- * +- * We are not calling into get_futex_key_refs() in file-backed +- * cases, therefore a successful atomic_inc return below will +- * guarantee that get_futex_key() will still imply smp_mb(); (B). +- */ +- if (!atomic_inc_not_zero(&inode->i_count)) { +- rcu_read_unlock(); +- put_page(page); +- +- goto again; +- } +- +- /* Should be impossible but lets be paranoid for now */ +- if (WARN_ON_ONCE(inode->i_mapping != mapping)) { +- err = -EFAULT; +- rcu_read_unlock(); +- iput(inode); +- +- goto out; +- } +- + key->both.offset |= FUT_OFF_INODE; /* inode-based key */ +- key->shared.inode = inode; ++ key->shared.i_seq = get_inode_sequence_number(inode); + key->shared.pgoff = basepage_index(tail); + rcu_read_unlock(); + } + ++ get_futex_key_refs(key); /* implies smp_mb(); (B) */ ++ + out: + put_page(page); + return err; diff --git a/queue-4.14/futex-unbreak-futex-hashing.patch b/queue-4.14/futex-unbreak-futex-hashing.patch new file mode 100644 index 00000000000..c9197d17995 --- /dev/null +++ b/queue-4.14/futex-unbreak-futex-hashing.patch @@ -0,0 +1,45 @@ +From 8d67743653dce5a0e7aa500fcccb237cde7ad88e Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Sun, 8 Mar 2020 19:07:17 +0100 +Subject: futex: Unbreak futex hashing + +From: Thomas Gleixner + +commit 8d67743653dce5a0e7aa500fcccb237cde7ad88e upstream. + +The recent futex inode life time fix changed the ordering of the futex key +union struct members, but forgot to adjust the hash function accordingly, + +As a result the hashing omits the leading 64bit and even hashes beyond the +futex key causing a bad hash distribution which led to a ~100% performance +regression. + +Hand in the futex key pointer instead of a random struct member and make +the size calculation based of the struct offset. + +Fixes: 8019ad13ef7f ("futex: Fix inode life-time issue") +Reported-by: Rong Chen +Decoded-by: Linus Torvalds +Signed-off-by: Thomas Gleixner +Tested-by: Rong Chen +Link: https://lkml.kernel.org/r/87h7yy90ve.fsf@nanos.tec.linutronix.de +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/futex.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/kernel/futex.c ++++ b/kernel/futex.c +@@ -401,9 +401,9 @@ static inline int hb_waiters_pending(str + */ + static struct futex_hash_bucket *hash_futex(union futex_key *key) + { +- u32 hash = jhash2((u32*)&key->both.word, +- (sizeof(key->both.word)+sizeof(key->both.ptr))/4, ++ u32 hash = jhash2((u32 *)key, offsetof(typeof(*key), both.offset) / 4, + key->both.offset); ++ + return &futex_queues[hash & (futex_hashsize - 1)]; + } + diff --git a/queue-4.14/series b/queue-4.14/series index 7378958d1c2..803245dfe89 100644 --- a/queue-4.14/series +++ b/queue-4.14/series @@ -42,3 +42,5 @@ usb-cdc-acm-fix-rounding-error-in-tiocsserial.patch iio-adc-at91-sama5d2_adc-fix-channel-configuration-f.patch iio-adc-at91-sama5d2_adc-fix-differential-channels-i.patch kbuild-disable-wpointer-to-enum-cast.patch +futex-fix-inode-life-time-issue.patch +futex-unbreak-futex-hashing.patch -- 2.47.3