From a63fb04cc301da359074177865cf00a38bcce0f7 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sun, 14 Apr 2013 10:30:13 -0700 Subject: [PATCH] 3.4-stable patches added patches: kobject-fix-kset_find_obj-race-with-concurrent-last-kobject_put.patch kref-implement-kref_get_unless_zero-v3.patch vfs-revert-spurious-fix-to-spinning-prevention-in-prune_icache_sb.patch --- ...ace-with-concurrent-last-kobject_put.patch | 91 +++++++++++++++++++ ...ef-implement-kref_get_unless_zero-v3.patch | 58 ++++++++++++ queue-3.4/series | 3 + ...inning-prevention-in-prune_icache_sb.patch | 39 ++++++++ 4 files changed, 191 insertions(+) create mode 100644 queue-3.4/kobject-fix-kset_find_obj-race-with-concurrent-last-kobject_put.patch create mode 100644 queue-3.4/kref-implement-kref_get_unless_zero-v3.patch create mode 100644 queue-3.4/vfs-revert-spurious-fix-to-spinning-prevention-in-prune_icache_sb.patch diff --git a/queue-3.4/kobject-fix-kset_find_obj-race-with-concurrent-last-kobject_put.patch b/queue-3.4/kobject-fix-kset_find_obj-race-with-concurrent-last-kobject_put.patch new file mode 100644 index 00000000000..c5d424e52c3 --- /dev/null +++ b/queue-3.4/kobject-fix-kset_find_obj-race-with-concurrent-last-kobject_put.patch @@ -0,0 +1,91 @@ +From a49b7e82cab0f9b41f483359be83f44fbb6b4979 Mon Sep 17 00:00:00 2001 +From: Linus Torvalds +Date: Sat, 13 Apr 2013 15:15:30 -0700 +Subject: kobject: fix kset_find_obj() race with concurrent last kobject_put() + +From: Linus Torvalds + +commit a49b7e82cab0f9b41f483359be83f44fbb6b4979 upstream. + +Anatol Pomozov identified a race condition that hits module unloading +and re-loading. To quote Anatol: + + "This is a race codition that exists between kset_find_obj() and + kobject_put(). kset_find_obj() might return kobject that has refcount + equal to 0 if this kobject is freeing by kobject_put() in other + thread. + + Here is timeline for the crash in case if kset_find_obj() searches for + an object tht nobody holds and other thread is doing kobject_put() on + the same kobject: + + THREAD A (calls kset_find_obj()) THREAD B (calls kobject_put()) + splin_lock() + atomic_dec_return(kobj->kref), counter gets zero here + ... starts kobject cleanup .... + spin_lock() // WAIT thread A in kobj_kset_leave() + iterate over kset->list + atomic_inc(kobj->kref) (counter becomes 1) + spin_unlock() + spin_lock() // taken + // it does not know that thread A increased counter so it + remove obj from list + spin_unlock() + vfree(module) // frees module object with containing kobj + + // kobj points to freed memory area!! + kobject_put(kobj) // OOPS!!!! + + The race above happens because module.c tries to use kset_find_obj() + when somebody unloads module. The module.c code was introduced in + commit 6494a93d55fa" + +Anatol supplied a patch specific for module.c that worked around the +problem by simply not using kset_find_obj() at all, but rather than make +a local band-aid, this just fixes kset_find_obj() to be thread-safe +using the proper model of refusing the get a new reference if the +refcount has already dropped to zero. + +See examples of this proper refcount handling not only in the kref +documentation, but in various other equivalent uses of this pattern by +grepping for atomic_inc_not_zero(). + +[ Side note: the module race does indicate that module loading and + unloading is not properly serialized wrt sysfs information using the + module mutex. That may require further thought, but this is the + correct fix at the kobject layer regardless. ] + +Reported-analyzed-and-tested-by: Anatol Pomozov +Cc: Al Viro +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + lib/kobject.c | 9 ++++++++- + 1 file changed, 8 insertions(+), 1 deletion(-) + +--- a/lib/kobject.c ++++ b/lib/kobject.c +@@ -531,6 +531,13 @@ struct kobject *kobject_get(struct kobje + return kobj; + } + ++static struct kobject *kobject_get_unless_zero(struct kobject *kobj) ++{ ++ if (!kref_get_unless_zero(&kobj->kref)) ++ kobj = NULL; ++ return kobj; ++} ++ + /* + * kobject_cleanup - free kobject resources. + * @kobj: object to cleanup +@@ -753,7 +760,7 @@ struct kobject *kset_find_obj(struct kse + + list_for_each_entry(k, &kset->list, entry) { + if (kobject_name(k) && !strcmp(kobject_name(k), name)) { +- ret = kobject_get(k); ++ ret = kobject_get_unless_zero(k); + break; + } + } diff --git a/queue-3.4/kref-implement-kref_get_unless_zero-v3.patch b/queue-3.4/kref-implement-kref_get_unless_zero-v3.patch new file mode 100644 index 00000000000..dc02b37027f --- /dev/null +++ b/queue-3.4/kref-implement-kref_get_unless_zero-v3.patch @@ -0,0 +1,58 @@ +From 4b20db3de8dab005b07c74161cb041db8c5ff3a7 Mon Sep 17 00:00:00 2001 +From: Thomas Hellstrom +Date: Tue, 6 Nov 2012 11:31:49 +0000 +Subject: kref: Implement kref_get_unless_zero v3 + +From: Thomas Hellstrom + +commit 4b20db3de8dab005b07c74161cb041db8c5ff3a7 upstream. + +This function is intended to simplify locking around refcounting for +objects that can be looked up from a lookup structure, and which are +removed from that lookup structure in the object destructor. +Operations on such objects require at least a read lock around +lookup + kref_get, and a write lock around kref_put + remove from lookup +structure. Furthermore, RCU implementations become extremely tricky. +With a lookup followed by a kref_get_unless_zero *with return value check* +locking in the kref_put path can be deferred to the actual removal from +the lookup structure and RCU lookups become trivial. + +v2: Formatting fixes. +v3: Invert the return value. + +Signed-off-by: Thomas Hellstrom +Signed-off-by: Dave Airlie +Signed-off-by: Greg Kroah-Hartman + +--- + include/linux/kref.h | 21 +++++++++++++++++++++ + 1 file changed, 21 insertions(+) + +--- a/include/linux/kref.h ++++ b/include/linux/kref.h +@@ -93,4 +93,25 @@ static inline int kref_put(struct kref * + { + return kref_sub(kref, 1, release); + } ++ ++/** ++ * kref_get_unless_zero - Increment refcount for object unless it is zero. ++ * @kref: object. ++ * ++ * Return non-zero if the increment succeeded. Otherwise return 0. ++ * ++ * This function is intended to simplify locking around refcounting for ++ * objects that can be looked up from a lookup structure, and which are ++ * removed from that lookup structure in the object destructor. ++ * Operations on such objects require at least a read lock around ++ * lookup + kref_get, and a write lock around kref_put + remove from lookup ++ * structure. Furthermore, RCU implementations become extremely tricky. ++ * With a lookup followed by a kref_get_unless_zero *with return value check* ++ * locking in the kref_put path can be deferred to the actual removal from ++ * the lookup structure and RCU lookups become trivial. ++ */ ++static inline int __must_check kref_get_unless_zero(struct kref *kref) ++{ ++ return atomic_add_unless(&kref->refcount, 1, 0); ++} + #endif /* _KREF_H_ */ diff --git a/queue-3.4/series b/queue-3.4/series index d12e5292fd4..c78f65d0429 100644 --- a/queue-3.4/series +++ b/queue-3.4/series @@ -6,3 +6,6 @@ drm-i915-use-the-correct-size-of-the-gtt-for-placing-the-per-process-entries.pat scsi-libsas-fix-handling-vacant-phy-in-sas_set_ex_phy.patch cifs-allow-passwords-which-begin-with-a-delimitor.patch target-fix-incorrect-fallthrough-of-alua-standby-offline-transition-cdbs.patch +vfs-revert-spurious-fix-to-spinning-prevention-in-prune_icache_sb.patch +kref-implement-kref_get_unless_zero-v3.patch +kobject-fix-kset_find_obj-race-with-concurrent-last-kobject_put.patch diff --git a/queue-3.4/vfs-revert-spurious-fix-to-spinning-prevention-in-prune_icache_sb.patch b/queue-3.4/vfs-revert-spurious-fix-to-spinning-prevention-in-prune_icache_sb.patch new file mode 100644 index 00000000000..ccbdb54eb3a --- /dev/null +++ b/queue-3.4/vfs-revert-spurious-fix-to-spinning-prevention-in-prune_icache_sb.patch @@ -0,0 +1,39 @@ +From 5b55d708335a9e3e4f61f2dadf7511502205ccd1 Mon Sep 17 00:00:00 2001 +From: Suleiman Souhlal +Date: Sat, 13 Apr 2013 16:03:06 -0700 +Subject: vfs: Revert spurious fix to spinning prevention in prune_icache_sb + +From: Suleiman Souhlal + +commit 5b55d708335a9e3e4f61f2dadf7511502205ccd1 upstream. + +Revert commit 62a3ddef6181 ("vfs: fix spinning prevention in prune_icache_sb"). + +This commit doesn't look right: since we are looking at the tail of the +list (sb->s_inode_lru.prev) if we want to skip an inode, we should put +it back at the head of the list instead of the tail, otherwise we will +keep spinning on it. + +Discovered when investigating why prune_icache_sb came top in perf +reports of a swapping load. + +Signed-off-by: Suleiman Souhlal +Signed-off-by: Hugh Dickins +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + fs/inode.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/fs/inode.c ++++ b/fs/inode.c +@@ -705,7 +705,7 @@ void prune_icache_sb(struct super_block + * inode to the back of the list so we don't spin on it. + */ + if (!spin_trylock(&inode->i_lock)) { +- list_move_tail(&inode->i_lru, &sb->s_inode_lru); ++ list_move(&inode->i_lru, &sb->s_inode_lru); + continue; + } + -- 2.47.3