From: Greg Kroah-Hartman Date: Wed, 4 Jan 2017 17:39:41 +0000 (+0100) Subject: 4.8-stable patches X-Git-Tag: v4.9.1~4 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=60721223e7ce354669ff1c7a6823db398e433d7d;p=thirdparty%2Fkernel%2Fstable-queue.git 4.8-stable patches added patches: driver-core-fix-race-between-creating-querying-glue-dir-and-its-cleanup.patch revert-netfilter-move-nat-hlist_head-to-nf_conn.patch revert-netfilter-nat-convert-nat-bysrc-hash-to.patch --- diff --git a/queue-4.8/driver-core-fix-race-between-creating-querying-glue-dir-and-its-cleanup.patch b/queue-4.8/driver-core-fix-race-between-creating-querying-glue-dir-and-its-cleanup.patch new file mode 100644 index 00000000000..87a479a6745 --- /dev/null +++ b/queue-4.8/driver-core-fix-race-between-creating-querying-glue-dir-and-its-cleanup.patch @@ -0,0 +1,126 @@ +From cebf8fd16900fdfd58c0028617944f808f97fe50 Mon Sep 17 00:00:00 2001 +From: Ming Lei +Date: Sun, 10 Jul 2016 19:27:36 +0800 +Subject: driver core: fix race between creating/querying glue dir and its cleanup + +From: Ming Lei + +commit cebf8fd16900fdfd58c0028617944f808f97fe50 upstream. + +The global mutex of 'gdp_mutex' is used to serialize creating/querying +glue dir and its cleanup. Turns out it isn't a perfect way because +part(kobj_kset_leave()) of the actual cleanup action() is done inside +the release handler of the glue dir kobject. That means gdp_mutex has +to be held before releasing the last reference count of the glue dir +kobject. + +This patch moves glue dir's cleanup after kobject_del() in device_del() +for avoiding the race. + +Cc: Yijing Wang +Reported-by: Chandra Sekhar Lingutla +Signed-off-by: Ming Lei +Cc: Jiri Slaby +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/base/core.c | 39 +++++++++++++++++++++++++++++---------- + 1 file changed, 29 insertions(+), 10 deletions(-) + +--- a/drivers/base/core.c ++++ b/drivers/base/core.c +@@ -836,11 +836,29 @@ static struct kobject *get_device_parent + return NULL; + } + ++static inline bool live_in_glue_dir(struct kobject *kobj, ++ struct device *dev) ++{ ++ if (!kobj || !dev->class || ++ kobj->kset != &dev->class->p->glue_dirs) ++ return false; ++ return true; ++} ++ ++static inline struct kobject *get_glue_dir(struct device *dev) ++{ ++ return dev->kobj.parent; ++} ++ ++/* ++ * make sure cleaning up dir as the last step, we need to make ++ * sure .release handler of kobject is run with holding the ++ * global lock ++ */ + static void cleanup_glue_dir(struct device *dev, struct kobject *glue_dir) + { + /* see if we live in a "glue" directory */ +- if (!glue_dir || !dev->class || +- glue_dir->kset != &dev->class->p->glue_dirs) ++ if (!live_in_glue_dir(glue_dir, dev)) + return; + + mutex_lock(&gdp_mutex); +@@ -848,11 +866,6 @@ static void cleanup_glue_dir(struct devi + mutex_unlock(&gdp_mutex); + } + +-static void cleanup_device_parent(struct device *dev) +-{ +- cleanup_glue_dir(dev, dev->kobj.parent); +-} +- + static int device_add_class_symlinks(struct device *dev) + { + struct device_node *of_node = dev_of_node(dev); +@@ -1028,6 +1041,7 @@ int device_add(struct device *dev) + struct kobject *kobj; + struct class_interface *class_intf; + int error = -EINVAL; ++ struct kobject *glue_dir = NULL; + + dev = get_device(dev); + if (!dev) +@@ -1072,8 +1086,10 @@ int device_add(struct device *dev) + /* first, register with generic layer. */ + /* we require the name to be set before, and pass NULL */ + error = kobject_add(&dev->kobj, dev->kobj.parent, NULL); +- if (error) ++ if (error) { ++ glue_dir = get_glue_dir(dev); + goto Error; ++ } + + /* notify platform of device entry */ + if (platform_notify) +@@ -1154,9 +1170,10 @@ done: + device_remove_file(dev, &dev_attr_uevent); + attrError: + kobject_uevent(&dev->kobj, KOBJ_REMOVE); ++ glue_dir = get_glue_dir(dev); + kobject_del(&dev->kobj); + Error: +- cleanup_device_parent(dev); ++ cleanup_glue_dir(dev, glue_dir); + put_device(parent); + name_error: + kfree(dev->p); +@@ -1232,6 +1249,7 @@ EXPORT_SYMBOL_GPL(put_device); + void device_del(struct device *dev) + { + struct device *parent = dev->parent; ++ struct kobject *glue_dir = NULL; + struct class_interface *class_intf; + + /* Notify clients of device removal. This call must come +@@ -1276,8 +1294,9 @@ void device_del(struct device *dev) + blocking_notifier_call_chain(&dev->bus->p->bus_notifier, + BUS_NOTIFY_REMOVED_DEVICE, dev); + kobject_uevent(&dev->kobj, KOBJ_REMOVE); +- cleanup_device_parent(dev); ++ glue_dir = get_glue_dir(dev); + kobject_del(&dev->kobj); ++ cleanup_glue_dir(dev, glue_dir); + put_device(parent); + } + EXPORT_SYMBOL_GPL(device_del); diff --git a/queue-4.8/revert-netfilter-move-nat-hlist_head-to-nf_conn.patch b/queue-4.8/revert-netfilter-move-nat-hlist_head-to-nf_conn.patch new file mode 100644 index 00000000000..dd3bb4b91ea --- /dev/null +++ b/queue-4.8/revert-netfilter-move-nat-hlist_head-to-nf_conn.patch @@ -0,0 +1,170 @@ +From 394d96406cae0936778587a09d8be0d998132166 Mon Sep 17 00:00:00 2001 +From: Greg Kroah-Hartman +Date: Wed, 4 Jan 2017 18:29:16 +0100 +Subject: [PATCH 2/2] Revert "netfilter: move nat hlist_head to nf_conn" + +This reverts commit 7c9664351980aaa6a4b8837a314360b3a4ad382a as it is +not working properly. Please move to 4.9 to get the full fix. + +Reported-by: Pablo Neira Ayuso +Cc: Florian Westphal +Signed-off-by: Greg Kroah-Hartman +--- + include/net/netfilter/nf_conntrack.h | 3 -- + include/net/netfilter/nf_conntrack_extend.h | 3 ++ + include/net/netfilter/nf_nat.h | 2 + + net/netfilter/nf_conntrack_extend.c | 15 +++++++++++- + net/netfilter/nf_nat_core.c | 33 ++++++++++++++++++++++------ + 5 files changed, 44 insertions(+), 12 deletions(-) + +--- a/include/net/netfilter/nf_conntrack.h ++++ b/include/net/netfilter/nf_conntrack.h +@@ -117,9 +117,6 @@ struct nf_conn { + /* Extensions */ + struct nf_ct_ext *ext; + +-#if IS_ENABLED(CONFIG_NF_NAT) +- struct hlist_node nat_bysource; +-#endif + /* Storage reserved for other modules, must be the last member */ + union nf_conntrack_proto proto; + }; +--- a/include/net/netfilter/nf_conntrack_extend.h ++++ b/include/net/netfilter/nf_conntrack_extend.h +@@ -99,6 +99,9 @@ void *__nf_ct_ext_add_length(struct nf_c + struct nf_ct_ext_type { + /* Destroys relationships (can be NULL). */ + void (*destroy)(struct nf_conn *ct); ++ /* Called when realloacted (can be NULL). ++ Contents has already been moved. */ ++ void (*move)(void *new, void *old); + + enum nf_ct_ext_id id; + +--- a/include/net/netfilter/nf_nat.h ++++ b/include/net/netfilter/nf_nat.h +@@ -29,6 +29,8 @@ struct nf_conn; + + /* The structure embedded in the conntrack structure. */ + struct nf_conn_nat { ++ struct hlist_node bysource; ++ struct nf_conn *ct; + union nf_conntrack_nat_help help; + #if IS_ENABLED(CONFIG_NF_NAT_MASQUERADE_IPV4) || \ + IS_ENABLED(CONFIG_NF_NAT_MASQUERADE_IPV6) +--- a/net/netfilter/nf_conntrack_extend.c ++++ b/net/netfilter/nf_conntrack_extend.c +@@ -73,7 +73,7 @@ void *__nf_ct_ext_add_length(struct nf_c + size_t var_alloc_len, gfp_t gfp) + { + struct nf_ct_ext *old, *new; +- int newlen, newoff; ++ int i, newlen, newoff; + struct nf_ct_ext_type *t; + + /* Conntrack must not be confirmed to avoid races on reallocation. */ +@@ -99,8 +99,19 @@ void *__nf_ct_ext_add_length(struct nf_c + return NULL; + + if (new != old) { ++ for (i = 0; i < NF_CT_EXT_NUM; i++) { ++ if (!__nf_ct_ext_exist(old, i)) ++ continue; ++ ++ rcu_read_lock(); ++ t = rcu_dereference(nf_ct_ext_types[i]); ++ if (t && t->move) ++ t->move((void *)new + new->offset[i], ++ (void *)old + old->offset[i]); ++ rcu_read_unlock(); ++ } + kfree_rcu(old, rcu); +- rcu_assign_pointer(ct->ext, new); ++ ct->ext = new; + } + + new->offset[id] = newoff; +--- a/net/netfilter/nf_nat_core.c ++++ b/net/netfilter/nf_nat_core.c +@@ -198,9 +198,11 @@ find_appropriate_src(struct net *net, + const struct nf_nat_range *range) + { + unsigned int h = hash_by_src(net, tuple); ++ const struct nf_conn_nat *nat; + const struct nf_conn *ct; + +- hlist_for_each_entry_rcu(ct, &nf_nat_bysource[h], nat_bysource) { ++ hlist_for_each_entry_rcu(nat, &nf_nat_bysource[h], bysource) { ++ ct = nat->ct; + if (same_src(ct, tuple) && + net_eq(net, nf_ct_net(ct)) && + nf_ct_zone_equal(ct, zone, IP_CT_DIR_ORIGINAL)) { +@@ -434,7 +436,8 @@ nf_nat_setup_info(struct nf_conn *ct, + spin_lock_bh(&nf_nat_lock); + /* nf_conntrack_alter_reply might re-allocate extension aera */ + nat = nfct_nat(ct); +- hlist_add_head_rcu(&ct->nat_bysource, ++ nat->ct = ct; ++ hlist_add_head_rcu(&nat->bysource, + &nf_nat_bysource[srchash]); + spin_unlock_bh(&nf_nat_lock); + } +@@ -541,7 +544,7 @@ static int nf_nat_proto_clean(struct nf_ + if (nf_nat_proto_remove(ct, data)) + return 1; + +- if (!nat) ++ if (!nat || !nat->ct) + return 0; + + /* This netns is being destroyed, and conntrack has nat null binding. +@@ -554,8 +557,9 @@ static int nf_nat_proto_clean(struct nf_ + return 1; + + spin_lock_bh(&nf_nat_lock); +- hlist_del_rcu(&ct->nat_bysource); ++ hlist_del_rcu(&nat->bysource); + ct->status &= ~IPS_NAT_DONE_MASK; ++ nat->ct = NULL; + spin_unlock_bh(&nf_nat_lock); + + add_timer(&ct->timeout); +@@ -685,13 +689,27 @@ static void nf_nat_cleanup_conntrack(str + { + struct nf_conn_nat *nat = nf_ct_ext_find(ct, NF_CT_EXT_NAT); + +- if (!nat) ++ if (nat == NULL || nat->ct == NULL) + return; + +- NF_CT_ASSERT(ct->status & IPS_SRC_NAT_DONE); ++ NF_CT_ASSERT(nat->ct->status & IPS_SRC_NAT_DONE); ++ ++ spin_lock_bh(&nf_nat_lock); ++ hlist_del_rcu(&nat->bysource); ++ spin_unlock_bh(&nf_nat_lock); ++} ++ ++static void nf_nat_move_storage(void *new, void *old) ++{ ++ struct nf_conn_nat *new_nat = new; ++ struct nf_conn_nat *old_nat = old; ++ struct nf_conn *ct = old_nat->ct; ++ ++ if (!ct || !(ct->status & IPS_SRC_NAT_DONE)) ++ return; + + spin_lock_bh(&nf_nat_lock); +- hlist_del_rcu(&ct->nat_bysource); ++ hlist_replace_rcu(&old_nat->bysource, &new_nat->bysource); + spin_unlock_bh(&nf_nat_lock); + } + +@@ -699,6 +717,7 @@ static struct nf_ct_ext_type nat_extend + .len = sizeof(struct nf_conn_nat), + .align = __alignof__(struct nf_conn_nat), + .destroy = nf_nat_cleanup_conntrack, ++ .move = nf_nat_move_storage, + .id = NF_CT_EXT_NAT, + .flags = NF_CT_EXT_F_PREALLOC, + }; diff --git a/queue-4.8/revert-netfilter-nat-convert-nat-bysrc-hash-to.patch b/queue-4.8/revert-netfilter-nat-convert-nat-bysrc-hash-to.patch new file mode 100644 index 00000000000..96b2afa6126 --- /dev/null +++ b/queue-4.8/revert-netfilter-nat-convert-nat-bysrc-hash-to.patch @@ -0,0 +1,265 @@ +From 810d7bd7c769096eb352b2878a1641968e58319d Mon Sep 17 00:00:00 2001 +From: Greg Kroah-Hartman +Date: Wed, 4 Jan 2017 18:27:19 +0100 +Subject: [PATCH 1/2] Revert "netfilter: nat: convert nat bysrc hash to + rhashtable" + +This reverts commit 870190a9ec9075205c0fa795a09fa931694a3ff1 as it is +not working properly. Please move to 4.9 to get the full fix. + +Reported-by: Pablo Neira Ayuso +Cc: Florian Westphal +Signed-off-by: Greg Kroah-Hartman +--- + include/net/netfilter/nf_conntrack.h | 3 + include/net/netfilter/nf_nat.h | 1 + net/netfilter/nf_nat_core.c | 122 ++++++++++++++++------------------- + 3 files changed, 57 insertions(+), 69 deletions(-) + +--- a/include/net/netfilter/nf_conntrack.h ++++ b/include/net/netfilter/nf_conntrack.h +@@ -17,7 +17,6 @@ + #include + #include + #include +-#include + + #include + #include +@@ -119,7 +118,7 @@ struct nf_conn { + struct nf_ct_ext *ext; + + #if IS_ENABLED(CONFIG_NF_NAT) +- struct rhash_head nat_bysource; ++ struct hlist_node nat_bysource; + #endif + /* Storage reserved for other modules, must be the last member */ + union nf_conntrack_proto proto; +--- a/include/net/netfilter/nf_nat.h ++++ b/include/net/netfilter/nf_nat.h +@@ -1,6 +1,5 @@ + #ifndef _NF_NAT_H + #define _NF_NAT_H +-#include + #include + #include + #include +--- a/net/netfilter/nf_nat_core.c ++++ b/net/netfilter/nf_nat_core.c +@@ -30,19 +30,17 @@ + #include + #include + ++static DEFINE_SPINLOCK(nf_nat_lock); ++ + static DEFINE_MUTEX(nf_nat_proto_mutex); + static const struct nf_nat_l3proto __rcu *nf_nat_l3protos[NFPROTO_NUMPROTO] + __read_mostly; + static const struct nf_nat_l4proto __rcu **nf_nat_l4protos[NFPROTO_NUMPROTO] + __read_mostly; + +-struct nf_nat_conn_key { +- const struct net *net; +- const struct nf_conntrack_tuple *tuple; +- const struct nf_conntrack_zone *zone; +-}; +- +-static struct rhashtable nf_nat_bysource_table; ++static struct hlist_head *nf_nat_bysource __read_mostly; ++static unsigned int nf_nat_htable_size __read_mostly; ++static unsigned int nf_nat_hash_rnd __read_mostly; + + inline const struct nf_nat_l3proto * + __nf_nat_l3proto_find(u8 family) +@@ -121,17 +119,19 @@ int nf_xfrm_me_harder(struct net *net, s + EXPORT_SYMBOL(nf_xfrm_me_harder); + #endif /* CONFIG_XFRM */ + +-static u32 nf_nat_bysource_hash(const void *data, u32 len, u32 seed) ++/* We keep an extra hash for each conntrack, for fast searching. */ ++static inline unsigned int ++hash_by_src(const struct net *n, const struct nf_conntrack_tuple *tuple) + { +- const struct nf_conntrack_tuple *t; +- const struct nf_conn *ct = data; ++ unsigned int hash; ++ ++ get_random_once(&nf_nat_hash_rnd, sizeof(nf_nat_hash_rnd)); + +- t = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; + /* Original src, to ensure we map it consistently if poss. */ ++ hash = jhash2((u32 *)&tuple->src, sizeof(tuple->src) / sizeof(u32), ++ tuple->dst.protonum ^ nf_nat_hash_rnd ^ net_hash_mix(n)); + +- seed ^= net_hash_mix(nf_ct_net(ct)); +- return jhash2((const u32 *)&t->src, sizeof(t->src) / sizeof(u32), +- t->dst.protonum ^ seed); ++ return reciprocal_scale(hash, nf_nat_htable_size); + } + + /* Is this tuple already taken? (not by us) */ +@@ -187,26 +187,6 @@ same_src(const struct nf_conn *ct, + t->src.u.all == tuple->src.u.all); + } + +-static int nf_nat_bysource_cmp(struct rhashtable_compare_arg *arg, +- const void *obj) +-{ +- const struct nf_nat_conn_key *key = arg->key; +- const struct nf_conn *ct = obj; +- +- return same_src(ct, key->tuple) && +- net_eq(nf_ct_net(ct), key->net) && +- nf_ct_zone_equal(ct, key->zone, IP_CT_DIR_ORIGINAL); +-} +- +-static struct rhashtable_params nf_nat_bysource_params = { +- .head_offset = offsetof(struct nf_conn, nat_bysource), +- .obj_hashfn = nf_nat_bysource_hash, +- .obj_cmpfn = nf_nat_bysource_cmp, +- .nelem_hint = 256, +- .min_size = 1024, +- .nulls_base = (1U << RHT_BASE_SHIFT), +-}; +- + /* Only called for SRC manip */ + static int + find_appropriate_src(struct net *net, +@@ -217,23 +197,23 @@ find_appropriate_src(struct net *net, + struct nf_conntrack_tuple *result, + const struct nf_nat_range *range) + { ++ unsigned int h = hash_by_src(net, tuple); + const struct nf_conn *ct; +- struct nf_nat_conn_key key = { +- .net = net, +- .tuple = tuple, +- .zone = zone +- }; +- +- ct = rhashtable_lookup_fast(&nf_nat_bysource_table, &key, +- nf_nat_bysource_params); +- if (!ct) +- return 0; + +- nf_ct_invert_tuplepr(result, +- &ct->tuplehash[IP_CT_DIR_REPLY].tuple); +- result->dst = tuple->dst; ++ hlist_for_each_entry_rcu(ct, &nf_nat_bysource[h], nat_bysource) { ++ if (same_src(ct, tuple) && ++ net_eq(net, nf_ct_net(ct)) && ++ nf_ct_zone_equal(ct, zone, IP_CT_DIR_ORIGINAL)) { ++ /* Copy source part from reply tuple. */ ++ nf_ct_invert_tuplepr(result, ++ &ct->tuplehash[IP_CT_DIR_REPLY].tuple); ++ result->dst = tuple->dst; + +- return in_range(l3proto, l4proto, result, range); ++ if (in_range(l3proto, l4proto, result, range)) ++ return 1; ++ } ++ } ++ return 0; + } + + /* For [FUTURE] fragmentation handling, we want the least-used +@@ -405,6 +385,7 @@ nf_nat_setup_info(struct nf_conn *ct, + const struct nf_nat_range *range, + enum nf_nat_manip_type maniptype) + { ++ struct net *net = nf_ct_net(ct); + struct nf_conntrack_tuple curr_tuple, new_tuple; + struct nf_conn_nat *nat; + +@@ -446,13 +427,16 @@ nf_nat_setup_info(struct nf_conn *ct, + } + + if (maniptype == NF_NAT_MANIP_SRC) { +- int err; ++ unsigned int srchash; + +- err = rhashtable_insert_fast(&nf_nat_bysource_table, +- &ct->nat_bysource, +- nf_nat_bysource_params); +- if (err) +- return NF_DROP; ++ srchash = hash_by_src(net, ++ &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); ++ spin_lock_bh(&nf_nat_lock); ++ /* nf_conntrack_alter_reply might re-allocate extension aera */ ++ nat = nfct_nat(ct); ++ hlist_add_head_rcu(&ct->nat_bysource, ++ &nf_nat_bysource[srchash]); ++ spin_unlock_bh(&nf_nat_lock); + } + + /* It's done. */ +@@ -569,10 +553,10 @@ static int nf_nat_proto_clean(struct nf_ + if (!del_timer(&ct->timeout)) + return 1; + ++ spin_lock_bh(&nf_nat_lock); ++ hlist_del_rcu(&ct->nat_bysource); + ct->status &= ~IPS_NAT_DONE_MASK; +- +- rhashtable_remove_fast(&nf_nat_bysource_table, &ct->nat_bysource, +- nf_nat_bysource_params); ++ spin_unlock_bh(&nf_nat_lock); + + add_timer(&ct->timeout); + +@@ -704,8 +688,11 @@ static void nf_nat_cleanup_conntrack(str + if (!nat) + return; + +- rhashtable_remove_fast(&nf_nat_bysource_table, &ct->nat_bysource, +- nf_nat_bysource_params); ++ NF_CT_ASSERT(ct->status & IPS_SRC_NAT_DONE); ++ ++ spin_lock_bh(&nf_nat_lock); ++ hlist_del_rcu(&ct->nat_bysource); ++ spin_unlock_bh(&nf_nat_lock); + } + + static struct nf_ct_ext_type nat_extend __read_mostly = { +@@ -840,13 +827,16 @@ static int __init nf_nat_init(void) + { + int ret; + +- ret = rhashtable_init(&nf_nat_bysource_table, &nf_nat_bysource_params); +- if (ret) +- return ret; ++ /* Leave them the same for the moment. */ ++ nf_nat_htable_size = nf_conntrack_htable_size; ++ ++ nf_nat_bysource = nf_ct_alloc_hashtable(&nf_nat_htable_size, 0); ++ if (!nf_nat_bysource) ++ return -ENOMEM; + + ret = nf_ct_extend_register(&nat_extend); + if (ret < 0) { +- rhashtable_destroy(&nf_nat_bysource_table); ++ nf_ct_free_hashtable(nf_nat_bysource, nf_nat_htable_size); + printk(KERN_ERR "nf_nat_core: Unable to register extension\n"); + return ret; + } +@@ -870,7 +860,7 @@ static int __init nf_nat_init(void) + return 0; + + cleanup_extend: +- rhashtable_destroy(&nf_nat_bysource_table); ++ nf_ct_free_hashtable(nf_nat_bysource, nf_nat_htable_size); + nf_ct_extend_unregister(&nat_extend); + return ret; + } +@@ -888,8 +878,8 @@ static void __exit nf_nat_cleanup(void) + #endif + for (i = 0; i < NFPROTO_NUMPROTO; i++) + kfree(nf_nat_l4protos[i]); +- +- rhashtable_destroy(&nf_nat_bysource_table); ++ synchronize_net(); ++ nf_ct_free_hashtable(nf_nat_bysource, nf_nat_htable_size); + } + + MODULE_LICENSE("GPL"); diff --git a/queue-4.8/series b/queue-4.8/series index 078bb95bb14..8c9dcb5bd1b 100644 --- a/queue-4.8/series +++ b/queue-4.8/series @@ -80,3 +80,6 @@ xen-gntdev-use-vm_mixedmap-instead-of-vm_io-to-avoid-numa-balancing.patch arm-xen-use-alloc_percpu-rather-than-__alloc_percpu.patch xfs-set-agi-buffer-type-in-xlog_recover_clear_agi_bucket.patch arm64-mark-reserved-memblock-regions-explicitly-in-iomem.patch +revert-netfilter-nat-convert-nat-bysrc-hash-to.patch +revert-netfilter-move-nat-hlist_head-to-nf_conn.patch +driver-core-fix-race-between-creating-querying-glue-dir-and-its-cleanup.patch