From: Greg Kroah-Hartman Date: Wed, 26 Oct 2016 08:43:53 +0000 (+0200) Subject: 4.4-stable patches X-Git-Tag: v4.8.5~11 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=0921959b5a2027283a9ac4d400227e08b1198c62;p=thirdparty%2Fkernel%2Fstable-queue.git 4.4-stable patches added patches: arc-don-t-leak-bits-of-kernel-stack-into-coredump.patch fs-super.c-fix-race-between-freeze_super-and-thaw_super.patch ipc-sem.c-fix-complex_count-vs.-simple-op-race.patch lightnvm-ensure-that-nvm_dev_ops-can-be-used-without-config_nvm.patch metag-only-define-atomic_dec_if_positive-conditionally.patch mm-filemap-don-t-plant-shadow-entries-without-radix-tree-node.patch --- diff --git a/queue-4.4/arc-don-t-leak-bits-of-kernel-stack-into-coredump.patch b/queue-4.4/arc-don-t-leak-bits-of-kernel-stack-into-coredump.patch new file mode 100644 index 00000000000..05380024406 --- /dev/null +++ b/queue-4.4/arc-don-t-leak-bits-of-kernel-stack-into-coredump.patch @@ -0,0 +1,50 @@ +From 7798bf2140ebcc36eafec6a4194fffd8d585d471 Mon Sep 17 00:00:00 2001 +From: Al Viro +Date: Sat, 10 Sep 2016 16:31:04 -0400 +Subject: arc: don't leak bits of kernel stack into coredump + +From: Al Viro + +commit 7798bf2140ebcc36eafec6a4194fffd8d585d471 upstream. + +On faulting sigreturn we do get SIGSEGV, all right, but anything +we'd put into pt_regs could end up in the coredump. And since +__copy_from_user() never zeroed on arc, we'd better bugger off +on its failure without copying random uninitialized bits of +kernel stack into pt_regs... + +Signed-off-by: Al Viro +Signed-off-by: Greg Kroah-Hartman + +--- + arch/arc/kernel/signal.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +--- a/arch/arc/kernel/signal.c ++++ b/arch/arc/kernel/signal.c +@@ -107,13 +107,13 @@ static int restore_usr_regs(struct pt_re + struct user_regs_struct uregs; + + err = __copy_from_user(&set, &sf->uc.uc_sigmask, sizeof(set)); +- if (!err) +- set_current_blocked(&set); +- + err |= __copy_from_user(&uregs.scratch, + &(sf->uc.uc_mcontext.regs.scratch), + sizeof(sf->uc.uc_mcontext.regs.scratch)); ++ if (err) ++ return err; + ++ set_current_blocked(&set); + regs->bta = uregs.scratch.bta; + regs->lp_start = uregs.scratch.lp_start; + regs->lp_end = uregs.scratch.lp_end; +@@ -138,7 +138,7 @@ static int restore_usr_regs(struct pt_re + regs->r0 = uregs.scratch.r0; + regs->sp = uregs.scratch.sp; + +- return err; ++ return 0; + } + + static inline int is_do_ss_needed(unsigned int magic) diff --git a/queue-4.4/fs-super.c-fix-race-between-freeze_super-and-thaw_super.patch b/queue-4.4/fs-super.c-fix-race-between-freeze_super-and-thaw_super.patch new file mode 100644 index 00000000000..d1abc9de7fe --- /dev/null +++ b/queue-4.4/fs-super.c-fix-race-between-freeze_super-and-thaw_super.patch @@ -0,0 +1,49 @@ +From 89f39af129382a40d7cd1f6914617282cfeee28e Mon Sep 17 00:00:00 2001 +From: Oleg Nesterov +Date: Mon, 26 Sep 2016 18:07:48 +0200 +Subject: fs/super.c: fix race between freeze_super() and thaw_super() + +From: Oleg Nesterov + +commit 89f39af129382a40d7cd1f6914617282cfeee28e upstream. + +Change thaw_super() to check frozen != SB_FREEZE_COMPLETE rather than +frozen == SB_UNFROZEN, otherwise it can race with freeze_super() which +drops sb->s_umount after SB_FREEZE_WRITE to preserve the lock ordering. + +In this case thaw_super() will wrongly call s_op->unfreeze_fs() before +it was actually frozen, and call sb_freeze_unlock() which leads to the +unbalanced percpu_up_write(). Unfortunately lockdep can't detect this, +so this triggers misc BUG_ON()'s in kernel/rcu/sync.c. + +Reported-and-tested-by: Nikolay Borisov +Signed-off-by: Oleg Nesterov +Signed-off-by: Al Viro +Signed-off-by: Greg Kroah-Hartman + +--- + fs/super.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +--- a/fs/super.c ++++ b/fs/super.c +@@ -1326,8 +1326,8 @@ int freeze_super(struct super_block *sb) + } + } + /* +- * This is just for debugging purposes so that fs can warn if it +- * sees write activity when frozen is set to SB_FREEZE_COMPLETE. ++ * For debugging purposes so that fs can warn if it sees write activity ++ * when frozen is set to SB_FREEZE_COMPLETE, and for thaw_super(). + */ + sb->s_writers.frozen = SB_FREEZE_COMPLETE; + up_write(&sb->s_umount); +@@ -1346,7 +1346,7 @@ int thaw_super(struct super_block *sb) + int error; + + down_write(&sb->s_umount); +- if (sb->s_writers.frozen == SB_UNFROZEN) { ++ if (sb->s_writers.frozen != SB_FREEZE_COMPLETE) { + up_write(&sb->s_umount); + return -EINVAL; + } diff --git a/queue-4.4/ipc-sem.c-fix-complex_count-vs.-simple-op-race.patch b/queue-4.4/ipc-sem.c-fix-complex_count-vs.-simple-op-race.patch new file mode 100644 index 00000000000..ab8f503f67f --- /dev/null +++ b/queue-4.4/ipc-sem.c-fix-complex_count-vs.-simple-op-race.patch @@ -0,0 +1,313 @@ +From 5864a2fd3088db73d47942370d0f7210a807b9bc Mon Sep 17 00:00:00 2001 +From: Manfred Spraul +Date: Tue, 11 Oct 2016 13:54:50 -0700 +Subject: ipc/sem.c: fix complex_count vs. simple op race + +From: Manfred Spraul + +commit 5864a2fd3088db73d47942370d0f7210a807b9bc upstream. + +Commit 6d07b68ce16a ("ipc/sem.c: optimize sem_lock()") introduced a +race: + +sem_lock has a fast path that allows parallel simple operations. +There are two reasons why a simple operation cannot run in parallel: + - a non-simple operations is ongoing (sma->sem_perm.lock held) + - a complex operation is sleeping (sma->complex_count != 0) + +As both facts are stored independently, a thread can bypass the current +checks by sleeping in the right positions. See below for more details +(or kernel bugzilla 105651). + +The patch fixes that by creating one variable (complex_mode) +that tracks both reasons why parallel operations are not possible. + +The patch also updates stale documentation regarding the locking. + +With regards to stable kernels: +The patch is required for all kernels that include the +commit 6d07b68ce16a ("ipc/sem.c: optimize sem_lock()") (3.10?) + +The alternative is to revert the patch that introduced the race. + +The patch is safe for backporting, i.e. it makes no assumptions +about memory barriers in spin_unlock_wait(). + +Background: +Here is the race of the current implementation: + +Thread A: (simple op) +- does the first "sma->complex_count == 0" test + +Thread B: (complex op) +- does sem_lock(): This includes an array scan. But the scan can't + find Thread A, because Thread A does not own sem->lock yet. +- the thread does the operation, increases complex_count, + drops sem_lock, sleeps + +Thread A: +- spin_lock(&sem->lock), spin_is_locked(sma->sem_perm.lock) +- sleeps before the complex_count test + +Thread C: (complex op) +- does sem_lock (no array scan, complex_count==1) +- wakes up Thread B. +- decrements complex_count + +Thread A: +- does the complex_count test + +Bug: +Now both thread A and thread C operate on the same array, without +any synchronization. + +Fixes: 6d07b68ce16a ("ipc/sem.c: optimize sem_lock()") +Link: http://lkml.kernel.org/r/1469123695-5661-1-git-send-email-manfred@colorfullife.com +Reported-by: +Cc: "H. Peter Anvin" +Cc: Peter Zijlstra +Cc: Davidlohr Bueso +Cc: Thomas Gleixner +Cc: Ingo Molnar +Cc: <1vier1@web.de> +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + include/linux/sem.h | 1 + ipc/sem.c | 130 ++++++++++++++++++++++++++++++---------------------- + 2 files changed, 76 insertions(+), 55 deletions(-) + +--- a/include/linux/sem.h ++++ b/include/linux/sem.h +@@ -21,6 +21,7 @@ struct sem_array { + struct list_head list_id; /* undo requests on this array */ + int sem_nsems; /* no. of semaphores in array */ + int complex_count; /* pending complex operations */ ++ bool complex_mode; /* no parallel simple ops */ + }; + + #ifdef CONFIG_SYSVIPC +--- a/ipc/sem.c ++++ b/ipc/sem.c +@@ -155,14 +155,21 @@ static int sysvipc_sem_proc_show(struct + + /* + * Locking: ++ * a) global sem_lock() for read/write + * sem_undo.id_next, + * sem_array.complex_count, +- * sem_array.pending{_alter,_cont}, +- * sem_array.sem_undo: global sem_lock() for read/write +- * sem_undo.proc_next: only "current" is allowed to read/write that field. ++ * sem_array.complex_mode ++ * sem_array.pending{_alter,_const}, ++ * sem_array.sem_undo + * ++ * b) global or semaphore sem_lock() for read/write: + * sem_array.sem_base[i].pending_{const,alter}: +- * global or semaphore sem_lock() for read/write ++ * sem_array.complex_mode (for read) ++ * ++ * c) special: ++ * sem_undo_list.list_proc: ++ * * undo_list->lock for write ++ * * rcu for read + */ + + #define sc_semmsl sem_ctls[0] +@@ -263,24 +270,25 @@ static void sem_rcu_free(struct rcu_head + #define ipc_smp_acquire__after_spin_is_unlocked() smp_rmb() + + /* +- * Wait until all currently ongoing simple ops have completed. ++ * Enter the mode suitable for non-simple operations: + * Caller must own sem_perm.lock. +- * New simple ops cannot start, because simple ops first check +- * that sem_perm.lock is free. +- * that a) sem_perm.lock is free and b) complex_count is 0. + */ +-static void sem_wait_array(struct sem_array *sma) ++static void complexmode_enter(struct sem_array *sma) + { + int i; + struct sem *sem; + +- if (sma->complex_count) { +- /* The thread that increased sma->complex_count waited on +- * all sem->lock locks. Thus we don't need to wait again. +- */ ++ if (sma->complex_mode) { ++ /* We are already in complex_mode. Nothing to do */ + return; + } + ++ /* We need a full barrier after seting complex_mode: ++ * The write to complex_mode must be visible ++ * before we read the first sem->lock spinlock state. ++ */ ++ smp_store_mb(sma->complex_mode, true); ++ + for (i = 0; i < sma->sem_nsems; i++) { + sem = sma->sem_base + i; + spin_unlock_wait(&sem->lock); +@@ -289,6 +297,28 @@ static void sem_wait_array(struct sem_ar + } + + /* ++ * Try to leave the mode that disallows simple operations: ++ * Caller must own sem_perm.lock. ++ */ ++static void complexmode_tryleave(struct sem_array *sma) ++{ ++ if (sma->complex_count) { ++ /* Complex ops are sleeping. ++ * We must stay in complex mode ++ */ ++ return; ++ } ++ /* ++ * Immediately after setting complex_mode to false, ++ * a simple op can start. Thus: all memory writes ++ * performed by the current operation must be visible ++ * before we set complex_mode to false. ++ */ ++ smp_store_release(&sma->complex_mode, false); ++} ++ ++#define SEM_GLOBAL_LOCK (-1) ++/* + * If the request contains only one semaphore operation, and there are + * no complex transactions pending, lock only the semaphore involved. + * Otherwise, lock the entire semaphore array, since we either have +@@ -304,56 +334,42 @@ static inline int sem_lock(struct sem_ar + /* Complex operation - acquire a full lock */ + ipc_lock_object(&sma->sem_perm); + +- /* And wait until all simple ops that are processed +- * right now have dropped their locks. +- */ +- sem_wait_array(sma); +- return -1; ++ /* Prevent parallel simple ops */ ++ complexmode_enter(sma); ++ return SEM_GLOBAL_LOCK; + } + + /* + * Only one semaphore affected - try to optimize locking. +- * The rules are: +- * - optimized locking is possible if no complex operation +- * is either enqueued or processed right now. +- * - The test for enqueued complex ops is simple: +- * sma->complex_count != 0 +- * - Testing for complex ops that are processed right now is +- * a bit more difficult. Complex ops acquire the full lock +- * and first wait that the running simple ops have completed. +- * (see above) +- * Thus: If we own a simple lock and the global lock is free +- * and complex_count is now 0, then it will stay 0 and +- * thus just locking sem->lock is sufficient. ++ * Optimized locking is possible if no complex operation ++ * is either enqueued or processed right now. ++ * ++ * Both facts are tracked by complex_mode. + */ + sem = sma->sem_base + sops->sem_num; + +- if (sma->complex_count == 0) { ++ /* ++ * Initial check for complex_mode. Just an optimization, ++ * no locking, no memory barrier. ++ */ ++ if (!sma->complex_mode) { + /* + * It appears that no complex operation is around. + * Acquire the per-semaphore lock. + */ + spin_lock(&sem->lock); + +- /* Then check that the global lock is free */ +- if (!spin_is_locked(&sma->sem_perm.lock)) { +- /* +- * We need a memory barrier with acquire semantics, +- * otherwise we can race with another thread that does: +- * complex_count++; +- * spin_unlock(sem_perm.lock); +- */ +- ipc_smp_acquire__after_spin_is_unlocked(); ++ /* ++ * See 51d7d5205d33 ++ * ("powerpc: Add smp_mb() to arch_spin_is_locked()"): ++ * A full barrier is required: the write of sem->lock ++ * must be visible before the read is executed ++ */ ++ smp_mb(); + +- /* +- * Now repeat the test of complex_count: +- * It can't change anymore until we drop sem->lock. +- * Thus: if is now 0, then it will stay 0. +- */ +- if (sma->complex_count == 0) { +- /* fast path successful! */ +- return sops->sem_num; +- } ++ if (!smp_load_acquire(&sma->complex_mode)) { ++ /* fast path successful! */ ++ return sops->sem_num; + } + spin_unlock(&sem->lock); + } +@@ -373,15 +389,16 @@ static inline int sem_lock(struct sem_ar + /* Not a false alarm, thus complete the sequence for a + * full lock. + */ +- sem_wait_array(sma); +- return -1; ++ complexmode_enter(sma); ++ return SEM_GLOBAL_LOCK; + } + } + + static inline void sem_unlock(struct sem_array *sma, int locknum) + { +- if (locknum == -1) { ++ if (locknum == SEM_GLOBAL_LOCK) { + unmerge_queues(sma); ++ complexmode_tryleave(sma); + ipc_unlock_object(&sma->sem_perm); + } else { + struct sem *sem = sma->sem_base + locknum; +@@ -533,6 +550,7 @@ static int newary(struct ipc_namespace * + } + + sma->complex_count = 0; ++ sma->complex_mode = true; /* dropped by sem_unlock below */ + INIT_LIST_HEAD(&sma->pending_alter); + INIT_LIST_HEAD(&sma->pending_const); + INIT_LIST_HEAD(&sma->list_id); +@@ -2186,10 +2204,10 @@ static int sysvipc_sem_proc_show(struct + /* + * The proc interface isn't aware of sem_lock(), it calls + * ipc_lock_object() directly (in sysvipc_find_ipc). +- * In order to stay compatible with sem_lock(), we must wait until +- * all simple semop() calls have left their critical regions. ++ * In order to stay compatible with sem_lock(), we must ++ * enter / leave complex_mode. + */ +- sem_wait_array(sma); ++ complexmode_enter(sma); + + sem_otime = get_semotime(sma); + +@@ -2206,6 +2224,8 @@ static int sysvipc_sem_proc_show(struct + sem_otime, + sma->sem_ctime); + ++ complexmode_tryleave(sma); ++ + return 0; + } + #endif diff --git a/queue-4.4/lightnvm-ensure-that-nvm_dev_ops-can-be-used-without-config_nvm.patch b/queue-4.4/lightnvm-ensure-that-nvm_dev_ops-can-be-used-without-config_nvm.patch new file mode 100644 index 00000000000..8f4bbdee300 --- /dev/null +++ b/queue-4.4/lightnvm-ensure-that-nvm_dev_ops-can-be-used-without-config_nvm.patch @@ -0,0 +1,177 @@ +From a7fd9a4f3e8179bab31e4637236ebb0e0b7867c6 Mon Sep 17 00:00:00 2001 +From: Jens Axboe +Date: Wed, 13 Jan 2016 13:04:11 -0700 +Subject: lightnvm: ensure that nvm_dev_ops can be used without CONFIG_NVM + +From: Jens Axboe + +commit a7fd9a4f3e8179bab31e4637236ebb0e0b7867c6 upstream. + +null_blk defines an empty version of this ops structure if CONFIG_NVM +isn't set, but it doesn't know the type. Move those bits out of the +protection of CONFIG_NVM in the main lightnvm include. + +Signed-off-by: Jens Axboe +[pebolle: backport to v4.4] +Signed-off-by: Paul Bolle +Signed-off-by: Greg Kroah-Hartman + + +--- + include/linux/lightnvm.h | 121 ++++++++++++++++++++++++----------------------- + 1 file changed, 64 insertions(+), 57 deletions(-) + +--- a/include/linux/lightnvm.h ++++ b/include/linux/lightnvm.h +@@ -1,6 +1,8 @@ + #ifndef NVM_H + #define NVM_H + ++#include ++ + enum { + NVM_IO_OK = 0, + NVM_IO_REQUEUE = 1, +@@ -11,10 +13,71 @@ enum { + NVM_IOTYPE_GC = 1, + }; + ++#define NVM_BLK_BITS (16) ++#define NVM_PG_BITS (16) ++#define NVM_SEC_BITS (8) ++#define NVM_PL_BITS (8) ++#define NVM_LUN_BITS (8) ++#define NVM_CH_BITS (8) ++ ++struct ppa_addr { ++ /* Generic structure for all addresses */ ++ union { ++ struct { ++ u64 blk : NVM_BLK_BITS; ++ u64 pg : NVM_PG_BITS; ++ u64 sec : NVM_SEC_BITS; ++ u64 pl : NVM_PL_BITS; ++ u64 lun : NVM_LUN_BITS; ++ u64 ch : NVM_CH_BITS; ++ } g; ++ ++ u64 ppa; ++ }; ++}; ++ ++struct nvm_rq; ++struct nvm_id; ++struct nvm_dev; ++ ++typedef int (nvm_l2p_update_fn)(u64, u32, __le64 *, void *); ++typedef int (nvm_bb_update_fn)(struct ppa_addr, int, u8 *, void *); ++typedef int (nvm_id_fn)(struct nvm_dev *, struct nvm_id *); ++typedef int (nvm_get_l2p_tbl_fn)(struct nvm_dev *, u64, u32, ++ nvm_l2p_update_fn *, void *); ++typedef int (nvm_op_bb_tbl_fn)(struct nvm_dev *, struct ppa_addr, int, ++ nvm_bb_update_fn *, void *); ++typedef int (nvm_op_set_bb_fn)(struct nvm_dev *, struct nvm_rq *, int); ++typedef int (nvm_submit_io_fn)(struct nvm_dev *, struct nvm_rq *); ++typedef int (nvm_erase_blk_fn)(struct nvm_dev *, struct nvm_rq *); ++typedef void *(nvm_create_dma_pool_fn)(struct nvm_dev *, char *); ++typedef void (nvm_destroy_dma_pool_fn)(void *); ++typedef void *(nvm_dev_dma_alloc_fn)(struct nvm_dev *, void *, gfp_t, ++ dma_addr_t *); ++typedef void (nvm_dev_dma_free_fn)(void *, void*, dma_addr_t); ++ ++struct nvm_dev_ops { ++ nvm_id_fn *identity; ++ nvm_get_l2p_tbl_fn *get_l2p_tbl; ++ nvm_op_bb_tbl_fn *get_bb_tbl; ++ nvm_op_set_bb_fn *set_bb_tbl; ++ ++ nvm_submit_io_fn *submit_io; ++ nvm_erase_blk_fn *erase_block; ++ ++ nvm_create_dma_pool_fn *create_dma_pool; ++ nvm_destroy_dma_pool_fn *destroy_dma_pool; ++ nvm_dev_dma_alloc_fn *dev_dma_alloc; ++ nvm_dev_dma_free_fn *dev_dma_free; ++ ++ unsigned int max_phys_sect; ++}; ++ ++ ++ + #ifdef CONFIG_NVM + + #include +-#include + #include + #include + +@@ -126,29 +189,6 @@ struct nvm_tgt_instance { + #define NVM_VERSION_MINOR 0 + #define NVM_VERSION_PATCH 0 + +-#define NVM_BLK_BITS (16) +-#define NVM_PG_BITS (16) +-#define NVM_SEC_BITS (8) +-#define NVM_PL_BITS (8) +-#define NVM_LUN_BITS (8) +-#define NVM_CH_BITS (8) +- +-struct ppa_addr { +- /* Generic structure for all addresses */ +- union { +- struct { +- u64 blk : NVM_BLK_BITS; +- u64 pg : NVM_PG_BITS; +- u64 sec : NVM_SEC_BITS; +- u64 pl : NVM_PL_BITS; +- u64 lun : NVM_LUN_BITS; +- u64 ch : NVM_CH_BITS; +- } g; +- +- u64 ppa; +- }; +-}; +- + struct nvm_rq { + struct nvm_tgt_instance *ins; + struct nvm_dev *dev; +@@ -182,39 +222,6 @@ static inline void *nvm_rq_to_pdu(struct + + struct nvm_block; + +-typedef int (nvm_l2p_update_fn)(u64, u32, __le64 *, void *); +-typedef int (nvm_bb_update_fn)(struct ppa_addr, int, u8 *, void *); +-typedef int (nvm_id_fn)(struct nvm_dev *, struct nvm_id *); +-typedef int (nvm_get_l2p_tbl_fn)(struct nvm_dev *, u64, u32, +- nvm_l2p_update_fn *, void *); +-typedef int (nvm_op_bb_tbl_fn)(struct nvm_dev *, struct ppa_addr, int, +- nvm_bb_update_fn *, void *); +-typedef int (nvm_op_set_bb_fn)(struct nvm_dev *, struct nvm_rq *, int); +-typedef int (nvm_submit_io_fn)(struct nvm_dev *, struct nvm_rq *); +-typedef int (nvm_erase_blk_fn)(struct nvm_dev *, struct nvm_rq *); +-typedef void *(nvm_create_dma_pool_fn)(struct nvm_dev *, char *); +-typedef void (nvm_destroy_dma_pool_fn)(void *); +-typedef void *(nvm_dev_dma_alloc_fn)(struct nvm_dev *, void *, gfp_t, +- dma_addr_t *); +-typedef void (nvm_dev_dma_free_fn)(void *, void*, dma_addr_t); +- +-struct nvm_dev_ops { +- nvm_id_fn *identity; +- nvm_get_l2p_tbl_fn *get_l2p_tbl; +- nvm_op_bb_tbl_fn *get_bb_tbl; +- nvm_op_set_bb_fn *set_bb_tbl; +- +- nvm_submit_io_fn *submit_io; +- nvm_erase_blk_fn *erase_block; +- +- nvm_create_dma_pool_fn *create_dma_pool; +- nvm_destroy_dma_pool_fn *destroy_dma_pool; +- nvm_dev_dma_alloc_fn *dev_dma_alloc; +- nvm_dev_dma_free_fn *dev_dma_free; +- +- unsigned int max_phys_sect; +-}; +- + struct nvm_lun { + int id; + diff --git a/queue-4.4/metag-only-define-atomic_dec_if_positive-conditionally.patch b/queue-4.4/metag-only-define-atomic_dec_if_positive-conditionally.patch new file mode 100644 index 00000000000..ec3e0cc7d4a --- /dev/null +++ b/queue-4.4/metag-only-define-atomic_dec_if_positive-conditionally.patch @@ -0,0 +1,45 @@ +From 35d04077ad96ed33ceea2501f5a4f1eacda77218 Mon Sep 17 00:00:00 2001 +From: Guenter Roeck +Date: Fri, 7 Oct 2016 10:40:59 -0700 +Subject: metag: Only define atomic_dec_if_positive conditionally + +From: Guenter Roeck + +commit 35d04077ad96ed33ceea2501f5a4f1eacda77218 upstream. + +The definition of atomic_dec_if_positive() assumes that +atomic_sub_if_positive() exists, which is only the case if +metag specific atomics are used. This results in the following +build error when trying to build metag1_defconfig. + +kernel/ucount.c: In function 'dec_ucount': +kernel/ucount.c:211: error: + implicit declaration of function 'atomic_sub_if_positive' + +Moving the definition of atomic_dec_if_positive() into the metag +conditional code fixes the problem. + +Fixes: 6006c0d8ce94 ("metag: Atomics, locks and bitops") +Signed-off-by: Guenter Roeck +Signed-off-by: James Hogan +Signed-off-by: Greg Kroah-Hartman + +--- + arch/metag/include/asm/atomic.h | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +--- a/arch/metag/include/asm/atomic.h ++++ b/arch/metag/include/asm/atomic.h +@@ -39,11 +39,10 @@ + #define atomic_dec(v) atomic_sub(1, (v)) + + #define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0) ++#define atomic_dec_if_positive(v) atomic_sub_if_positive(1, v) + + #endif + +-#define atomic_dec_if_positive(v) atomic_sub_if_positive(1, v) +- + #include + + #endif /* __ASM_METAG_ATOMIC_H */ diff --git a/queue-4.4/mm-filemap-don-t-plant-shadow-entries-without-radix-tree-node.patch b/queue-4.4/mm-filemap-don-t-plant-shadow-entries-without-radix-tree-node.patch new file mode 100644 index 00000000000..b6ca7c88def --- /dev/null +++ b/queue-4.4/mm-filemap-don-t-plant-shadow-entries-without-radix-tree-node.patch @@ -0,0 +1,84 @@ +From d3798ae8c6f3767c726403c2ca6ecc317752c9dd Mon Sep 17 00:00:00 2001 +From: Johannes Weiner +Date: Tue, 4 Oct 2016 22:02:08 +0200 +Subject: mm: filemap: don't plant shadow entries without radix tree node + +From: Johannes Weiner + +commit d3798ae8c6f3767c726403c2ca6ecc317752c9dd upstream. + +When the underflow checks were added to workingset_node_shadow_dec(), +they triggered immediately: + + kernel BUG at ./include/linux/swap.h:276! + invalid opcode: 0000 [#1] SMP + Modules linked in: isofs usb_storage fuse xt_CHECKSUM ipt_MASQUERADE nf_nat_masquerade_ipv4 tun nf_conntrack_netbios_ns nf_conntrack_broadcast ip6t_REJECT nf_reject_ipv6 + soundcore wmi acpi_als pinctrl_sunrisepoint kfifo_buf tpm_tis industrialio acpi_pad pinctrl_intel tpm_tis_core tpm nfsd auth_rpcgss nfs_acl lockd grace sunrpc dm_crypt + CPU: 0 PID: 20929 Comm: blkid Not tainted 4.8.0-rc8-00087-gbe67d60ba944 #1 + Hardware name: System manufacturer System Product Name/Z170-K, BIOS 1803 05/06/2016 + task: ffff8faa93ecd940 task.stack: ffff8faa7f478000 + RIP: page_cache_tree_insert+0xf1/0x100 + Call Trace: + __add_to_page_cache_locked+0x12e/0x270 + add_to_page_cache_lru+0x4e/0xe0 + mpage_readpages+0x112/0x1d0 + blkdev_readpages+0x1d/0x20 + __do_page_cache_readahead+0x1ad/0x290 + force_page_cache_readahead+0xaa/0x100 + page_cache_sync_readahead+0x3f/0x50 + generic_file_read_iter+0x5af/0x740 + blkdev_read_iter+0x35/0x40 + __vfs_read+0xe1/0x130 + vfs_read+0x96/0x130 + SyS_read+0x55/0xc0 + entry_SYSCALL_64_fastpath+0x13/0x8f + Code: 03 00 48 8b 5d d8 65 48 33 1c 25 28 00 00 00 44 89 e8 75 19 48 83 c4 18 5b 41 5c 41 5d 41 5e 5d c3 0f 0b 41 bd ef ff ff ff eb d7 <0f> 0b e8 88 68 ef ff 0f 1f 84 00 + RIP page_cache_tree_insert+0xf1/0x100 + +This is a long-standing bug in the way shadow entries are accounted in +the radix tree nodes. The shrinker needs to know when radix tree nodes +contain only shadow entries, no pages, so node->count is split in half +to count shadows in the upper bits and pages in the lower bits. + +Unfortunately, the radix tree implementation doesn't know of this and +assumes all entries are in node->count. When there is a shadow entry +directly in root->rnode and the tree is later extended, the radix tree +implementation will copy that entry into the new node and and bump its +node->count, i.e. increases the page count bits. Once the shadow gets +removed and we subtract from the upper counter, node->count underflows +and triggers the warning. Afterwards, without node->count reaching 0 +again, the radix tree node is leaked. + +Limit shadow entries to when we have actual radix tree nodes and can +count them properly. That means we lose the ability to detect refaults +from files that had only the first page faulted in at eviction time. + +Fixes: 449dd6984d0e ("mm: keep page cache radix tree nodes in check") +Signed-off-by: Johannes Weiner +Reported-and-tested-by: Linus Torvalds +Reviewed-by: Jan Kara +Cc: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + mm/filemap.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +--- a/mm/filemap.c ++++ b/mm/filemap.c +@@ -122,6 +122,14 @@ static void page_cache_tree_delete(struc + + __radix_tree_lookup(&mapping->page_tree, page->index, &node, &slot); + ++ if (!node) { ++ /* ++ * We need a node to properly account shadow ++ * entries. Don't plant any without. XXX ++ */ ++ shadow = NULL; ++ } ++ + if (shadow) { + mapping->nrshadows++; + /* diff --git a/queue-4.4/series b/queue-4.4/series index 1519eb2b1f5..c422ff9d65a 100644 --- a/queue-4.4/series +++ b/queue-4.4/series @@ -63,3 +63,9 @@ nfsv4-nfs4_copy_delegation_stateid-must-fail-if-the-delegation-is-invalid.patch nfsv4-open-state-recovery-must-account-for-file-permission-changes.patch nfsv4.2-fix-a-reference-leak-in-nfs42_proc_layoutstats_generic.patch scsi-fix-use-after-free.patch +metag-only-define-atomic_dec_if_positive-conditionally.patch +mm-filemap-don-t-plant-shadow-entries-without-radix-tree-node.patch +ipc-sem.c-fix-complex_count-vs.-simple-op-race.patch +lightnvm-ensure-that-nvm_dev_ops-can-be-used-without-config_nvm.patch +arc-don-t-leak-bits-of-kernel-stack-into-coredump.patch +fs-super.c-fix-race-between-freeze_super-and-thaw_super.patch