]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
3.0-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 27 Sep 2012 18:41:10 +0000 (11:41 -0700)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 27 Sep 2012 18:41:10 +0000 (11:41 -0700)
added patches:
drop_monitor-dont-sleep-in-atomic-context.patch
drop_monitor-fix-sleeping-in-invalid-context-warning.patch
drop_monitor-make-updating-data-skb-smp-safe.patch
drop_monitor-prevent-init-path-from-scheduling-on-the-wrong-cpu.patch

queue-3.0/drop_monitor-dont-sleep-in-atomic-context.patch [new file with mode: 0644]
queue-3.0/drop_monitor-fix-sleeping-in-invalid-context-warning.patch [new file with mode: 0644]
queue-3.0/drop_monitor-make-updating-data-skb-smp-safe.patch [new file with mode: 0644]
queue-3.0/drop_monitor-prevent-init-path-from-scheduling-on-the-wrong-cpu.patch [new file with mode: 0644]
queue-3.0/series

diff --git a/queue-3.0/drop_monitor-dont-sleep-in-atomic-context.patch b/queue-3.0/drop_monitor-dont-sleep-in-atomic-context.patch
new file mode 100644 (file)
index 0000000..08ea8cf
--- /dev/null
@@ -0,0 +1,243 @@
+From bec4596b4e6770c7037f21f6bd27567b152dc0d6 Mon Sep 17 00:00:00 2001
+From: Eric Dumazet <edumazet@google.com>
+Date: Mon, 4 Jun 2012 00:18:19 +0000
+Subject: drop_monitor: dont sleep in atomic context
+
+From: Eric Dumazet <edumazet@google.com>
+
+commit bec4596b4e6770c7037f21f6bd27567b152dc0d6 upstream.
+
+drop_monitor calls several sleeping functions while in atomic context.
+
+ BUG: sleeping function called from invalid context at mm/slub.c:943
+ in_atomic(): 1, irqs_disabled(): 0, pid: 2103, name: kworker/0:2
+ Pid: 2103, comm: kworker/0:2 Not tainted 3.5.0-rc1+ #55
+ Call Trace:
+  [<ffffffff810697ca>] __might_sleep+0xca/0xf0
+  [<ffffffff811345a3>] kmem_cache_alloc_node+0x1b3/0x1c0
+  [<ffffffff8105578c>] ? queue_delayed_work_on+0x11c/0x130
+  [<ffffffff815343fb>] __alloc_skb+0x4b/0x230
+  [<ffffffffa00b0360>] ? reset_per_cpu_data+0x160/0x160 [drop_monitor]
+  [<ffffffffa00b022f>] reset_per_cpu_data+0x2f/0x160 [drop_monitor]
+  [<ffffffffa00b03ab>] send_dm_alert+0x4b/0xb0 [drop_monitor]
+  [<ffffffff810568e0>] process_one_work+0x130/0x4c0
+  [<ffffffff81058249>] worker_thread+0x159/0x360
+  [<ffffffff810580f0>] ? manage_workers.isra.27+0x240/0x240
+  [<ffffffff8105d403>] kthread+0x93/0xa0
+  [<ffffffff816be6d4>] kernel_thread_helper+0x4/0x10
+  [<ffffffff8105d370>] ? kthread_freezable_should_stop+0x80/0x80
+  [<ffffffff816be6d0>] ? gs_change+0xb/0xb
+
+Rework the logic to call the sleeping functions in right context.
+
+Use standard timer/workqueue api to let system chose any cpu to perform
+the allocation and netlink send.
+
+Also avoid a loop if reset_per_cpu_data() cannot allocate memory :
+use mod_timer() to wait 1/10 second before next try.
+
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: Neil Horman <nhorman@tuxdriver.com>
+Reviewed-by: Neil Horman <nhorman@tuxdriver.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Cc: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ net/core/drop_monitor.c |  101 +++++++++++++++---------------------------------
+ 1 file changed, 33 insertions(+), 68 deletions(-)
+
+--- a/net/core/drop_monitor.c
++++ b/net/core/drop_monitor.c
+@@ -33,9 +33,6 @@
+ #define TRACE_ON 1
+ #define TRACE_OFF 0
+-static void send_dm_alert(struct work_struct *unused);
+-
+-
+ /*
+  * Globals, our netlink socket pointer
+  * and the work handle that will send up
+@@ -45,11 +42,10 @@ static int trace_state = TRACE_OFF;
+ static DEFINE_MUTEX(trace_state_mutex);
+ struct per_cpu_dm_data {
+-      struct work_struct dm_alert_work;
+-      struct sk_buff __rcu *skb;
+-      atomic_t dm_hit_count;
+-      struct timer_list send_timer;
+-      int cpu;
++      spinlock_t              lock;
++      struct sk_buff          *skb;
++      struct work_struct      dm_alert_work;
++      struct timer_list       send_timer;
+ };
+ struct dm_hw_stat_delta {
+@@ -75,13 +71,13 @@ static int dm_delay = 1;
+ static unsigned long dm_hw_check_delta = 2*HZ;
+ static LIST_HEAD(hw_stats_list);
+-static void reset_per_cpu_data(struct per_cpu_dm_data *data)
++static struct sk_buff *reset_per_cpu_data(struct per_cpu_dm_data *data)
+ {
+       size_t al;
+       struct net_dm_alert_msg *msg;
+       struct nlattr *nla;
+       struct sk_buff *skb;
+-      struct sk_buff *oskb = rcu_dereference_protected(data->skb, 1);
++      unsigned long flags;
+       al = sizeof(struct net_dm_alert_msg);
+       al += dm_hit_limit * sizeof(struct net_dm_drop_point);
+@@ -96,65 +92,40 @@ static void reset_per_cpu_data(struct pe
+                                 sizeof(struct net_dm_alert_msg));
+               msg = nla_data(nla);
+               memset(msg, 0, al);
+-      } else
+-              schedule_work_on(data->cpu, &data->dm_alert_work);
+-
+-      /*
+-       * Don't need to lock this, since we are guaranteed to only
+-       * run this on a single cpu at a time.
+-       * Note also that we only update data->skb if the old and new skb
+-       * pointers don't match.  This ensures that we don't continually call
+-       * synchornize_rcu if we repeatedly fail to alloc a new netlink message.
+-       */
+-      if (skb != oskb) {
+-              rcu_assign_pointer(data->skb, skb);
+-
+-              synchronize_rcu();
+-
+-              atomic_set(&data->dm_hit_count, dm_hit_limit);
++      } else {
++              mod_timer(&data->send_timer, jiffies + HZ / 10);
+       }
++      spin_lock_irqsave(&data->lock, flags);
++      swap(data->skb, skb);
++      spin_unlock_irqrestore(&data->lock, flags);
++
++      return skb;
+ }
+-static void send_dm_alert(struct work_struct *unused)
++static void send_dm_alert(struct work_struct *work)
+ {
+       struct sk_buff *skb;
+-      struct per_cpu_dm_data *data = &get_cpu_var(dm_cpu_data);
++      struct per_cpu_dm_data *data;
+-      WARN_ON_ONCE(data->cpu != smp_processor_id());
++      data = container_of(work, struct per_cpu_dm_data, dm_alert_work);
+-      /*
+-       * Grab the skb we're about to send
+-       */
+-      skb = rcu_dereference_protected(data->skb, 1);
++      skb = reset_per_cpu_data(data);
+-      /*
+-       * Replace it with a new one
+-       */
+-      reset_per_cpu_data(data);
+-
+-      /*
+-       * Ship it!
+-       */
+       if (skb)
+               genlmsg_multicast(skb, 0, NET_DM_GRP_ALERT, GFP_KERNEL);
+-
+-      put_cpu_var(dm_cpu_data);
+ }
+ /*
+  * This is the timer function to delay the sending of an alert
+  * in the event that more drops will arrive during the
+- * hysteresis period.  Note that it operates under the timer interrupt
+- * so we don't need to disable preemption here
++ * hysteresis period.
+  */
+-static void sched_send_work(unsigned long unused)
++static void sched_send_work(unsigned long _data)
+ {
+-      struct per_cpu_dm_data *data =  &get_cpu_var(dm_cpu_data);
++      struct per_cpu_dm_data *data = (struct per_cpu_dm_data *)_data;
+-      schedule_work_on(smp_processor_id(), &data->dm_alert_work);
+-
+-      put_cpu_var(dm_cpu_data);
++      schedule_work(&data->dm_alert_work);
+ }
+ static void trace_drop_common(struct sk_buff *skb, void *location)
+@@ -164,22 +135,17 @@ static void trace_drop_common(struct sk_
+       struct nlattr *nla;
+       int i;
+       struct sk_buff *dskb;
+-      struct per_cpu_dm_data *data = &get_cpu_var(dm_cpu_data);
+-
++      struct per_cpu_dm_data *data;
++      unsigned long flags;
+-      rcu_read_lock();
+-      dskb = rcu_dereference(data->skb);
++      local_irq_save(flags);
++      data = &__get_cpu_var(dm_cpu_data);
++      spin_lock(&data->lock);
++      dskb = data->skb;
+       if (!dskb)
+               goto out;
+-      if (!atomic_add_unless(&data->dm_hit_count, -1, 0)) {
+-              /*
+-               * we're already at zero, discard this hit
+-               */
+-              goto out;
+-      }
+-
+       nlh = (struct nlmsghdr *)dskb->data;
+       nla = genlmsg_data(nlmsg_data(nlh));
+       msg = nla_data(nla);
+@@ -189,7 +155,8 @@ static void trace_drop_common(struct sk_
+                       goto out;
+               }
+       }
+-
++      if (msg->entries == dm_hit_limit)
++              goto out;
+       /*
+        * We need to create a new entry
+        */
+@@ -201,13 +168,11 @@ static void trace_drop_common(struct sk_
+       if (!timer_pending(&data->send_timer)) {
+               data->send_timer.expires = jiffies + dm_delay * HZ;
+-              add_timer_on(&data->send_timer, smp_processor_id());
++              add_timer(&data->send_timer);
+       }
+ out:
+-      rcu_read_unlock();
+-      put_cpu_var(dm_cpu_data);
+-      return;
++      spin_unlock_irqrestore(&data->lock, flags);
+ }
+ static void trace_kfree_skb_hit(void *ignore, struct sk_buff *skb, void *location)
+@@ -405,11 +370,11 @@ static int __init init_net_drop_monitor(
+       for_each_present_cpu(cpu) {
+               data = &per_cpu(dm_cpu_data, cpu);
+-              data->cpu = cpu;
+               INIT_WORK(&data->dm_alert_work, send_dm_alert);
+               init_timer(&data->send_timer);
+-              data->send_timer.data = cpu;
++              data->send_timer.data = (unsigned long)data;
+               data->send_timer.function = sched_send_work;
++              spin_lock_init(&data->lock);
+               reset_per_cpu_data(data);
+       }
diff --git a/queue-3.0/drop_monitor-fix-sleeping-in-invalid-context-warning.patch b/queue-3.0/drop_monitor-fix-sleeping-in-invalid-context-warning.patch
new file mode 100644 (file)
index 0000000..c4e1c9e
--- /dev/null
@@ -0,0 +1,114 @@
+From cde2e9a651b76d8db36ae94cd0febc82b637e5dd Mon Sep 17 00:00:00 2001
+From: Neil Horman <nhorman@tuxdriver.com>
+Date: Fri, 27 Apr 2012 10:11:48 +0000
+Subject: drop_monitor: fix sleeping in invalid context warning
+
+From: Neil Horman <nhorman@tuxdriver.com>
+
+commit cde2e9a651b76d8db36ae94cd0febc82b637e5dd upstream.
+
+Eric Dumazet pointed out this warning in the drop_monitor protocol to me:
+
+[   38.352571] BUG: sleeping function called from invalid context at kernel/mutex.c:85
+[   38.352576] in_atomic(): 1, irqs_disabled(): 0, pid: 4415, name: dropwatch
+[   38.352580] Pid: 4415, comm: dropwatch Not tainted 3.4.0-rc2+ #71
+[   38.352582] Call Trace:
+[   38.352592]  [<ffffffff8153aaf0>] ? trace_napi_poll_hit+0xd0/0xd0
+[   38.352599]  [<ffffffff81063f2a>] __might_sleep+0xca/0xf0
+[   38.352606]  [<ffffffff81655b16>] mutex_lock+0x26/0x50
+[   38.352610]  [<ffffffff8153aaf0>] ? trace_napi_poll_hit+0xd0/0xd0
+[   38.352616]  [<ffffffff810b72d9>] tracepoint_probe_register+0x29/0x90
+[   38.352621]  [<ffffffff8153a585>] set_all_monitor_traces+0x105/0x170
+[   38.352625]  [<ffffffff8153a8ca>] net_dm_cmd_trace+0x2a/0x40
+[   38.352630]  [<ffffffff8154a81a>] genl_rcv_msg+0x21a/0x2b0
+[   38.352636]  [<ffffffff810f8029>] ? zone_statistics+0x99/0xc0
+[   38.352640]  [<ffffffff8154a600>] ? genl_rcv+0x30/0x30
+[   38.352645]  [<ffffffff8154a059>] netlink_rcv_skb+0xa9/0xd0
+[   38.352649]  [<ffffffff8154a5f0>] genl_rcv+0x20/0x30
+[   38.352653]  [<ffffffff81549a7e>] netlink_unicast+0x1ae/0x1f0
+[   38.352658]  [<ffffffff81549d76>] netlink_sendmsg+0x2b6/0x310
+[   38.352663]  [<ffffffff8150824f>] sock_sendmsg+0x10f/0x130
+[   38.352668]  [<ffffffff8150abe0>] ? move_addr_to_kernel+0x60/0xb0
+[   38.352673]  [<ffffffff81515f04>] ? verify_iovec+0x64/0xe0
+[   38.352677]  [<ffffffff81509c46>] __sys_sendmsg+0x386/0x390
+[   38.352682]  [<ffffffff810ffaf9>] ? handle_mm_fault+0x139/0x210
+[   38.352687]  [<ffffffff8165b5bc>] ? do_page_fault+0x1ec/0x4f0
+[   38.352693]  [<ffffffff8106ba4d>] ? set_next_entity+0x9d/0xb0
+[   38.352699]  [<ffffffff81310b49>] ? tty_ldisc_deref+0x9/0x10
+[   38.352703]  [<ffffffff8106d363>] ? pick_next_task_fair+0x63/0x140
+[   38.352708]  [<ffffffff8150b8d4>] sys_sendmsg+0x44/0x80
+[   38.352713]  [<ffffffff8165f8e2>] system_call_fastpath+0x16/0x1b
+
+It stems from holding a spinlock (trace_state_lock) while attempting to register
+or unregister tracepoint hooks, making in_atomic() true in this context, leading
+to the warning when the tracepoint calls might_sleep() while its taking a mutex.
+Since we only use the trace_state_lock to prevent trace protocol state races, as
+well as hardware stat list updates on an rcu write side, we can just convert the
+spinlock to a mutex to avoid this problem.
+
+Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
+Reported-by: Eric Dumazet <eric.dumazet@gmail.com>
+CC: David Miller <davem@davemloft.net>
+Acked-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Cc: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ net/core/drop_monitor.c |   14 +++++++-------
+ 1 file changed, 7 insertions(+), 7 deletions(-)
+
+--- a/net/core/drop_monitor.c
++++ b/net/core/drop_monitor.c
+@@ -42,7 +42,7 @@ static void send_dm_alert(struct work_st
+  * netlink alerts
+  */
+ static int trace_state = TRACE_OFF;
+-static DEFINE_SPINLOCK(trace_state_lock);
++static DEFINE_MUTEX(trace_state_mutex);
+ struct per_cpu_dm_data {
+       struct work_struct dm_alert_work;
+@@ -213,7 +213,7 @@ static int set_all_monitor_traces(int st
+       struct dm_hw_stat_delta *new_stat = NULL;
+       struct dm_hw_stat_delta *temp;
+-      spin_lock(&trace_state_lock);
++      mutex_lock(&trace_state_mutex);
+       if (state == trace_state) {
+               rc = -EAGAIN;
+@@ -252,7 +252,7 @@ static int set_all_monitor_traces(int st
+               rc = -EINPROGRESS;
+ out_unlock:
+-      spin_unlock(&trace_state_lock);
++      mutex_unlock(&trace_state_mutex);
+       return rc;
+ }
+@@ -295,12 +295,12 @@ static int dropmon_net_event(struct noti
+               new_stat->dev = dev;
+               new_stat->last_rx = jiffies;
+-              spin_lock(&trace_state_lock);
++              mutex_lock(&trace_state_mutex);
+               list_add_rcu(&new_stat->list, &hw_stats_list);
+-              spin_unlock(&trace_state_lock);
++              mutex_unlock(&trace_state_mutex);
+               break;
+       case NETDEV_UNREGISTER:
+-              spin_lock(&trace_state_lock);
++              mutex_lock(&trace_state_mutex);
+               list_for_each_entry_safe(new_stat, tmp, &hw_stats_list, list) {
+                       if (new_stat->dev == dev) {
+                               new_stat->dev = NULL;
+@@ -311,7 +311,7 @@ static int dropmon_net_event(struct noti
+                               }
+                       }
+               }
+-              spin_unlock(&trace_state_lock);
++              mutex_unlock(&trace_state_mutex);
+               break;
+       }
+ out:
diff --git a/queue-3.0/drop_monitor-make-updating-data-skb-smp-safe.patch b/queue-3.0/drop_monitor-make-updating-data-skb-smp-safe.patch
new file mode 100644 (file)
index 0000000..24988dd
--- /dev/null
@@ -0,0 +1,186 @@
+From 3885ca785a3618593226687ced84f3f336dc3860 Mon Sep 17 00:00:00 2001
+From: Neil Horman <nhorman@tuxdriver.com>
+Date: Fri, 27 Apr 2012 10:11:49 +0000
+Subject: drop_monitor: Make updating data->skb smp safe
+
+From: Neil Horman <nhorman@tuxdriver.com>
+
+commit 3885ca785a3618593226687ced84f3f336dc3860 upstream.
+
+Eric Dumazet pointed out to me that the drop_monitor protocol has some holes in
+its smp protections.  Specifically, its possible to replace data->skb while its
+being written.  This patch corrects that by making data->skb an rcu protected
+variable.  That will prevent it from being overwritten while a tracepoint is
+modifying it.
+
+Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
+Reported-by: Eric Dumazet <eric.dumazet@gmail.com>
+CC: David Miller <davem@davemloft.net>
+Acked-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Cc: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ net/core/drop_monitor.c |   70 +++++++++++++++++++++++++++++++++++++-----------
+ 1 file changed, 54 insertions(+), 16 deletions(-)
+
+--- a/net/core/drop_monitor.c
++++ b/net/core/drop_monitor.c
+@@ -46,7 +46,7 @@ static DEFINE_MUTEX(trace_state_mutex);
+ struct per_cpu_dm_data {
+       struct work_struct dm_alert_work;
+-      struct sk_buff *skb;
++      struct sk_buff __rcu *skb;
+       atomic_t dm_hit_count;
+       struct timer_list send_timer;
+ };
+@@ -73,35 +73,58 @@ static int dm_hit_limit = 64;
+ static int dm_delay = 1;
+ static unsigned long dm_hw_check_delta = 2*HZ;
+ static LIST_HEAD(hw_stats_list);
++static int initialized = 0;
+ static void reset_per_cpu_data(struct per_cpu_dm_data *data)
+ {
+       size_t al;
+       struct net_dm_alert_msg *msg;
+       struct nlattr *nla;
++      struct sk_buff *skb;
++      struct sk_buff *oskb = rcu_dereference_protected(data->skb, 1);
+       al = sizeof(struct net_dm_alert_msg);
+       al += dm_hit_limit * sizeof(struct net_dm_drop_point);
+       al += sizeof(struct nlattr);
+-      data->skb = genlmsg_new(al, GFP_KERNEL);
+-      genlmsg_put(data->skb, 0, 0, &net_drop_monitor_family,
+-                      0, NET_DM_CMD_ALERT);
+-      nla = nla_reserve(data->skb, NLA_UNSPEC, sizeof(struct net_dm_alert_msg));
+-      msg = nla_data(nla);
+-      memset(msg, 0, al);
+-      atomic_set(&data->dm_hit_count, dm_hit_limit);
++      skb = genlmsg_new(al, GFP_KERNEL);
++
++      if (skb) {
++              genlmsg_put(skb, 0, 0, &net_drop_monitor_family,
++                              0, NET_DM_CMD_ALERT);
++              nla = nla_reserve(skb, NLA_UNSPEC,
++                                sizeof(struct net_dm_alert_msg));
++              msg = nla_data(nla);
++              memset(msg, 0, al);
++      } else if (initialized)
++              schedule_work_on(smp_processor_id(), &data->dm_alert_work);
++
++      /*
++       * Don't need to lock this, since we are guaranteed to only
++       * run this on a single cpu at a time.
++       * Note also that we only update data->skb if the old and new skb
++       * pointers don't match.  This ensures that we don't continually call
++       * synchornize_rcu if we repeatedly fail to alloc a new netlink message.
++       */
++      if (skb != oskb) {
++              rcu_assign_pointer(data->skb, skb);
++
++              synchronize_rcu();
++
++              atomic_set(&data->dm_hit_count, dm_hit_limit);
++      }
++
+ }
+ static void send_dm_alert(struct work_struct *unused)
+ {
+       struct sk_buff *skb;
+-      struct per_cpu_dm_data *data = &__get_cpu_var(dm_cpu_data);
++      struct per_cpu_dm_data *data = &get_cpu_var(dm_cpu_data);
+       /*
+        * Grab the skb we're about to send
+        */
+-      skb = data->skb;
++      skb = rcu_dereference_protected(data->skb, 1);
+       /*
+        * Replace it with a new one
+@@ -111,8 +134,10 @@ static void send_dm_alert(struct work_st
+       /*
+        * Ship it!
+        */
+-      genlmsg_multicast(skb, 0, NET_DM_GRP_ALERT, GFP_KERNEL);
++      if (skb)
++              genlmsg_multicast(skb, 0, NET_DM_GRP_ALERT, GFP_KERNEL);
++      put_cpu_var(dm_cpu_data);
+ }
+ /*
+@@ -123,9 +148,11 @@ static void send_dm_alert(struct work_st
+  */
+ static void sched_send_work(unsigned long unused)
+ {
+-      struct per_cpu_dm_data *data =  &__get_cpu_var(dm_cpu_data);
++      struct per_cpu_dm_data *data =  &get_cpu_var(dm_cpu_data);
++
++      schedule_work_on(smp_processor_id(), &data->dm_alert_work);
+-      schedule_work(&data->dm_alert_work);
++      put_cpu_var(dm_cpu_data);
+ }
+ static void trace_drop_common(struct sk_buff *skb, void *location)
+@@ -134,9 +161,16 @@ static void trace_drop_common(struct sk_
+       struct nlmsghdr *nlh;
+       struct nlattr *nla;
+       int i;
+-      struct per_cpu_dm_data *data = &__get_cpu_var(dm_cpu_data);
++      struct sk_buff *dskb;
++      struct per_cpu_dm_data *data = &get_cpu_var(dm_cpu_data);
++      rcu_read_lock();
++      dskb = rcu_dereference(data->skb);
++
++      if (!dskb)
++              goto out;
++
+       if (!atomic_add_unless(&data->dm_hit_count, -1, 0)) {
+               /*
+                * we're already at zero, discard this hit
+@@ -144,7 +178,7 @@ static void trace_drop_common(struct sk_
+               goto out;
+       }
+-      nlh = (struct nlmsghdr *)data->skb->data;
++      nlh = (struct nlmsghdr *)dskb->data;
+       nla = genlmsg_data(nlmsg_data(nlh));
+       msg = nla_data(nla);
+       for (i = 0; i < msg->entries; i++) {
+@@ -157,7 +191,7 @@ static void trace_drop_common(struct sk_
+       /*
+        * We need to create a new entry
+        */
+-      __nla_reserve_nohdr(data->skb, sizeof(struct net_dm_drop_point));
++      __nla_reserve_nohdr(dskb, sizeof(struct net_dm_drop_point));
+       nla->nla_len += NLA_ALIGN(sizeof(struct net_dm_drop_point));
+       memcpy(msg->points[msg->entries].pc, &location, sizeof(void *));
+       msg->points[msg->entries].count = 1;
+@@ -169,6 +203,8 @@ static void trace_drop_common(struct sk_
+       }
+ out:
++      rcu_read_unlock();
++      put_cpu_var(dm_cpu_data);
+       return;
+ }
+@@ -374,6 +410,8 @@ static int __init init_net_drop_monitor(
+               data->send_timer.function = sched_send_work;
+       }
++      initialized = 1;
++
+       goto out;
+ out_unreg:
diff --git a/queue-3.0/drop_monitor-prevent-init-path-from-scheduling-on-the-wrong-cpu.patch b/queue-3.0/drop_monitor-prevent-init-path-from-scheduling-on-the-wrong-cpu.patch
new file mode 100644 (file)
index 0000000..d8dd7ec
--- /dev/null
@@ -0,0 +1,91 @@
+From 4fdcfa12843bca38d0c9deff70c8720e4e8f515f Mon Sep 17 00:00:00 2001
+From: Neil Horman <nhorman@tuxdriver.com>
+Date: Tue, 1 May 2012 08:18:02 +0000
+Subject: drop_monitor: prevent init path from scheduling on the wrong cpu
+
+From: Neil Horman <nhorman@tuxdriver.com>
+
+commit 4fdcfa12843bca38d0c9deff70c8720e4e8f515f upstream.
+
+I just noticed after some recent updates, that the init path for the drop
+monitor protocol has a minor error.  drop monitor maintains a per cpu structure,
+that gets initalized from a single cpu.  Normally this is fine, as the protocol
+isn't in use yet, but I recently made a change that causes a failed skb
+allocation to reschedule itself .  Given the current code, the implication is
+that this workqueue reschedule will take place on the wrong cpu.  If drop
+monitor is used early during the boot process, its possible that two cpus will
+access a single per-cpu structure in parallel, possibly leading to data
+corruption.
+
+This patch fixes the situation, by storing the cpu number that a given instance
+of this per-cpu data should be accessed from.  In the case of a need for a
+reschedule, the cpu stored in the struct is assigned the rescheule, rather than
+the currently executing cpu
+
+Tested successfully by myself.
+
+Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
+CC: David Miller <davem@davemloft.net>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Cc: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ net/core/drop_monitor.c |   12 +++++++-----
+ 1 file changed, 7 insertions(+), 5 deletions(-)
+
+--- a/net/core/drop_monitor.c
++++ b/net/core/drop_monitor.c
+@@ -49,6 +49,7 @@ struct per_cpu_dm_data {
+       struct sk_buff __rcu *skb;
+       atomic_t dm_hit_count;
+       struct timer_list send_timer;
++      int cpu;
+ };
+ struct dm_hw_stat_delta {
+@@ -73,7 +74,6 @@ static int dm_hit_limit = 64;
+ static int dm_delay = 1;
+ static unsigned long dm_hw_check_delta = 2*HZ;
+ static LIST_HEAD(hw_stats_list);
+-static int initialized = 0;
+ static void reset_per_cpu_data(struct per_cpu_dm_data *data)
+ {
+@@ -96,8 +96,8 @@ static void reset_per_cpu_data(struct pe
+                                 sizeof(struct net_dm_alert_msg));
+               msg = nla_data(nla);
+               memset(msg, 0, al);
+-      } else if (initialized)
+-              schedule_work_on(smp_processor_id(), &data->dm_alert_work);
++      } else
++              schedule_work_on(data->cpu, &data->dm_alert_work);
+       /*
+        * Don't need to lock this, since we are guaranteed to only
+@@ -121,6 +121,8 @@ static void send_dm_alert(struct work_st
+       struct sk_buff *skb;
+       struct per_cpu_dm_data *data = &get_cpu_var(dm_cpu_data);
++      WARN_ON_ONCE(data->cpu != smp_processor_id());
++
+       /*
+        * Grab the skb we're about to send
+        */
+@@ -403,14 +405,14 @@ static int __init init_net_drop_monitor(
+       for_each_present_cpu(cpu) {
+               data = &per_cpu(dm_cpu_data, cpu);
+-              reset_per_cpu_data(data);
++              data->cpu = cpu;
+               INIT_WORK(&data->dm_alert_work, send_dm_alert);
+               init_timer(&data->send_timer);
+               data->send_timer.data = cpu;
+               data->send_timer.function = sched_send_work;
++              reset_per_cpu_data(data);
+       }
+-      initialized = 1;
+       goto out;
index 68be2194f119cc216d3b1883baf8da802004d830..8cf8f11456367faa465e90c3fbb4b22cfe8c23cd 100644 (file)
@@ -106,3 +106,7 @@ cpufreq-acpi-fix-not-loading-acpi-cpufreq-driver-regression.patch
 sched-fix-race-in-task_group.patch
 media-lirc_sir-make-device-registration-work.patch
 ecryptfs-improve-statfs-reporting.patch
+drop_monitor-fix-sleeping-in-invalid-context-warning.patch
+drop_monitor-make-updating-data-skb-smp-safe.patch
+drop_monitor-prevent-init-path-from-scheduling-on-the-wrong-cpu.patch
+drop_monitor-dont-sleep-in-atomic-context.patch