add nfsd oops and sched race fixes to 2.6.24 queue

author Chris Wright <chrisw@sous-sol.org>

Mon, 17 Mar 2008 04:44:25 +0000 (21:44 -0700)

committer Chris Wright <chrisw@sous-sol.org>

Mon, 17 Mar 2008 04:44:25 +0000 (21:44 -0700)
author Chris Wright <chrisw@sous-sol.org>
Mon, 17 Mar 2008 04:44:25 +0000 (21:44 -0700)
committer Chris Wright <chrisw@sous-sol.org>
Mon, 17 Mar 2008 04:44:25 +0000 (21:44 -0700)
diff --git a/queue-2.6.24/nfsd-fix-oops-on-access-from-high-numbered-ports.patch b/queue-2.6.24/nfsd-fix-oops-on-access-from-high-numbered-ports.patch

new file mode 100644 (file)

index 0000000..20d04f3
--- /dev/null
+++ b/queue-2.6.24/nfsd-fix-oops-on-access-from-high-numbered-ports.patch
@@ -0,0 +1,55 @@
+From b663c6fd98c9cf586279db03cec3257c413efd00 Mon Sep 17 00:00:00 2001
+From: J. Bruce Fields <bfields@citi.umich.edu>
+Date: Fri, 14 Mar 2008 19:37:11 -0400
+Message-ID: <20080314233711.GN2119@fieldses.org>
+Subject: nfsd: fix oops on access from high-numbered ports
+
+This bug was always here, but before my commit 6fa02839bf9412e18e77
+("recheck for secure ports in fh_verify"), it could only be triggered by
+failure of a kmalloc().  After that commit it could be triggered by a
+client making a request from a non-reserved port for access to an export
+marked "secure".  (Exports are "secure" by default.)
+
+The result is a struct svc_export with a reference count one too low,
+resulting in likely oopses next time the export is accessed.
+
+The reference counting here is not straightforward; a later patch will
+clean up fh_verify().
+
+Thanks to Lukas Hejtmanek for the bug report and followup.
+
+Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
+Cc: Lukas Hejtmanek <xhejtman@ics.muni.cz>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Chris Wright <chrisw@sous-sol.org>
+---
+ fs/nfsd/nfsfh.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/fs/nfsd/nfsfh.c
++++ b/fs/nfsd/nfsfh.c
+@@ -231,6 +231,7 @@ fh_verify(struct svc_rqst *rqstp, struct
+               fhp->fh_dentry = dentry;
+               fhp->fh_export = exp;
+               nfsd_nr_verified++;
++              cache_get(&exp->h);
+       } else {
+               /*
+                * just rechecking permissions
+@@ -240,6 +241,7 @@ fh_verify(struct svc_rqst *rqstp, struct
+               dprintk("nfsd: fh_verify - just checking\n");
+               dentry = fhp->fh_dentry;
+               exp = fhp->fh_export;
++              cache_get(&exp->h);
+               /*
+                * Set user creds for this exportpoint; necessary even
+                * in the "just checking" case because this may be a
+@@ -251,8 +253,6 @@ fh_verify(struct svc_rqst *rqstp, struct
+               if (error)
+                       goto out;
+       }
+-      cache_get(&exp->h);
+-
+ 
+       error = nfsd_mode_check(rqstp, dentry->d_inode->i_mode, type);
+       if (error)
diff --git a/queue-2.6.24/sched-fix-race-in-schedule.patch b/queue-2.6.24/sched-fix-race-in-schedule.patch

new file mode 100644 (file)

index 0000000..cd06e46
--- /dev/null
+++ b/queue-2.6.24/sched-fix-race-in-schedule.patch
@@ -0,0 +1,134 @@
+From 0e1f34833bd9170ccc93ab759e48e695917fa48f Mon Sep 17 00:00:00 2001
+From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
+Date: Mon, 10 Mar 2008 11:01:20 -0700
+Message-Id: <1205535434.6423.2.camel@lappy>
+Subject: sched: fix race in schedule()
+
+Fix a hard to trigger crash seen in the -rt kernel that also affects
+the vanilla scheduler.
+
+There is a race condition between schedule() and some dequeue/enqueue
+functions; rt_mutex_setprio(), __setscheduler() and sched_move_task().
+
+When scheduling to idle, idle_balance() is called to pull tasks from
+other busy processor. It might drop the rq lock. It means that those 3
+functions encounter on_rq=0 and running=1. The current task should be
+put when running.
+
+Here is a possible scenario:
+
+   CPU0                               CPU1
+    |                              schedule()
+    |                              ->deactivate_task()
+    |                              ->idle_balance()
+    |                              -->load_balance_newidle()
+rt_mutex_setprio()                     |
+    |                              --->double_lock_balance()
+    *get lock                          *rel lock
+    * on_rq=0, ruuning=1               |
+    * sched_class is changed           |
+    *rel lock                          *get lock
+    :                                  |
+                                       :
+                                   ->put_prev_task_rt()
+                                   ->pick_next_task_fair()
+                                       => panic
+
+The current process of CPU1(P1) is scheduling. Deactivated P1, and the
+scheduler looks for another process on other CPU's runqueue because CPU1
+will be idle. idle_balance(), load_balance_newidle() and
+double_lock_balance() are called and double_lock_balance() could drop
+the rq lock. On the other hand, CPU0 is trying to boost the priority of
+P1. The result of boosting only P1's prio and sched_class are changed to
+RT. The sched entities of P1 and P1's group are never put. It makes
+cfs_rq invalid, because the cfs_rq has curr and no leaf, but
+pick_next_task_fair() is called, then the kernel panics.
+
+Signed-off-by: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
+Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+[chrisw@sous-sol.org: backport to 2.6.24.3]
+Signed-off-by: Chris Wright <chrisw@sous-sol.org>
+---
+ kernel/sched.c |   36 ++++++++++++++++--------------------
+ 1 file changed, 16 insertions(+), 20 deletions(-)
+
+--- a/kernel/sched.c
++++ b/kernel/sched.c
+@@ -4028,11 +4028,10 @@ void rt_mutex_setprio(struct task_struct
+       oldprio = p->prio;
+       on_rq = p->se.on_rq;
+       running = task_current(rq, p);
+-      if (on_rq) {
++      if (on_rq)
+               dequeue_task(rq, p, 0);
+-              if (running)
+-                      p->sched_class->put_prev_task(rq, p);
+-      }
++      if (running)
++              p->sched_class->put_prev_task(rq, p);
+ 
+       if (rt_prio(prio))
+               p->sched_class = &rt_sched_class;
+@@ -4041,9 +4040,9 @@ void rt_mutex_setprio(struct task_struct
+ 
+       p->prio = prio;
+ 
++      if (running)
++              p->sched_class->set_curr_task(rq);
+       if (on_rq) {
+-              if (running)
+-                      p->sched_class->set_curr_task(rq);
+               enqueue_task(rq, p, 0);
+               /*
+                * Reschedule if we are currently running on this runqueue and
+@@ -4339,18 +4338,17 @@ recheck:
+       update_rq_clock(rq);
+       on_rq = p->se.on_rq;
+       running = task_current(rq, p);
+-      if (on_rq) {
++      if (on_rq)
+               deactivate_task(rq, p, 0);
+-              if (running)
+-                      p->sched_class->put_prev_task(rq, p);
+-      }
++      if (running)
++              p->sched_class->put_prev_task(rq, p);
+ 
+       oldprio = p->prio;
+       __setscheduler(rq, p, policy, param->sched_priority);
+ 
++      if (running)
++              p->sched_class->set_curr_task(rq);
+       if (on_rq) {
+-              if (running)
+-                      p->sched_class->set_curr_task(rq);
+               activate_task(rq, p, 0);
+               /*
+                * Reschedule if we are currently running on this runqueue and
+@@ -7110,19 +7108,17 @@ void sched_move_task(struct task_struct 
+       running = task_current(rq, tsk);
+       on_rq = tsk->se.on_rq;
+ 
+-      if (on_rq) {
++      if (on_rq)
+               dequeue_task(rq, tsk, 0);
+-              if (unlikely(running))
+-                      tsk->sched_class->put_prev_task(rq, tsk);
+-      }
++      if (unlikely(running))
++              tsk->sched_class->put_prev_task(rq, tsk);
+ 
+       set_task_cfs_rq(tsk, task_cpu(tsk));
+ 
+-      if (on_rq) {
+-              if (unlikely(running))
+-                      tsk->sched_class->set_curr_task(rq);
++      if (unlikely(running))
++              tsk->sched_class->set_curr_task(rq);
++      if (on_rq)
+               enqueue_task(rq, tsk, 0);
+-      }
+ 
+ done:
+       task_rq_unlock(rq, &flags);
diff --git a/queue-2.6.24/series b/queue-2.6.24/series

index 2502ab360db07f8ad6b4ab44eede44c34a69e4f4..b2f143f47d346e2a0ef13346e35ac3fb29e8da6d 100644 (file)
--- a/queue-2.6.24/series
+++ b/queue-2.6.24/series
@@ -60,3 +60,5 @@ fix-default-compose-table-initialization.patch
  scsi-gdth-bugfix-for-the-at-exit-problems.patch
  scsi-gdth-fix-to-internal-commands-execution.patch
  scsi-mpt-fusion-don-t-oops-if-numphys-0.patch
+sched-fix-race-in-schedule.patch
+nfsd-fix-oops-on-access-from-high-numbered-ports.patch
author	Chris Wright <chrisw@sous-sol.org>
	Mon, 17 Mar 2008 04:44:25 +0000 (21:44 -0700)
committer	Chris Wright <chrisw@sous-sol.org>
	Mon, 17 Mar 2008 04:44:25 +0000 (21:44 -0700)
queue-2.6.24/nfsd-fix-oops-on-access-from-high-numbered-ports.patch	[new file with mode: 0644]	patch \| blob
queue-2.6.24/sched-fix-race-in-schedule.patch	[new file with mode: 0644]	patch \| blob
queue-2.6.24/series		patch \| blob \| blame \| history