more 2.6.18.1 patches

author Greg Kroah-Hartman <gregkh@suse.de>

Tue, 10 Oct 2006 06:38:55 +0000 (23:38 -0700)

committer Greg Kroah-Hartman <gregkh@suse.de>

Tue, 10 Oct 2006 06:38:55 +0000 (23:38 -0700)
author Greg Kroah-Hartman <gregkh@suse.de>
Tue, 10 Oct 2006 06:38:55 +0000 (23:38 -0700)
committer Greg Kroah-Hartman <gregkh@suse.de>
Tue, 10 Oct 2006 06:38:55 +0000 (23:38 -0700)
diff --git a/queue-2.6.18/alsa-fix-initiailization-of-user-space-controls.patch b/queue-2.6.18/alsa-fix-initiailization-of-user-space-controls.patch

new file mode 100644 (file)

index 0000000..f519643
--- /dev/null
+++ b/queue-2.6.18/alsa-fix-initiailization-of-user-space-controls.patch
@@ -0,0 +1,33 @@
+From stable-bounces@linux.kernel.org Mon Sep 25 03:51:05 2006
+Date: Mon, 25 Sep 2006 11:49:01 +0200
+Message-ID: <s5h7izs8eeq.wl%tiwai@suse.de>
+From: Takashi Iwai <tiwai@suse.de>
+To: stable@kernel.org
+Subject: ALSA: Fix initiailization of user-space controls
+
+From: Takashi Iwai <tiwai@suse.de>
+
+ALSA: Fix initiailization of user-space controls
+
+Fix an assertion when accessing a user-defined control due to lack of
+initialization (appears only when CONFIG_SND_DEBUg is enabled).
+
+  ALSA sound/core/control.c:660: BUG? (info->access == 0)
+
+Signed-off-by: Takashi Iwai <tiwai@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ sound/core/control.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- linux-2.6.18.orig/sound/core/control.c
++++ linux-2.6.18/sound/core/control.c
+@@ -997,6 +997,7 @@ static int snd_ctl_elem_add(struct snd_c
+       if (ue == NULL)
+               return -ENOMEM;
+       ue->info = *info;
++      ue->info.access = 0;
+       ue->elem_data = (char *)ue + sizeof(*ue);
+       ue->elem_data_size = private_size;
+       kctl.private_free = snd_ctl_elem_user_free;
diff --git a/queue-2.6.18/fix-longstanding-load-balancing-bug-in-the-scheduler.patch b/queue-2.6.18/fix-longstanding-load-balancing-bug-in-the-scheduler.patch

new file mode 100644 (file)

index 0000000..e95a777
--- /dev/null
+++ b/queue-2.6.18/fix-longstanding-load-balancing-bug-in-the-scheduler.patch
@@ -0,0 +1,215 @@
+From stable-bounces@linux.kernel.org Mon Sep 25 23:34:14 2006
+Message-Id: <200609260630.k8Q6UpbB011991@shell0.pdx.osdl.net>
+To: torvalds@osdl.org
+From: Christoph Lameter <christoph@sgi.com>
+Date: Mon, 25 Sep 2006 23:30:51 -0700
+Cc: akpm@osdl.org, nickpiggin@yahoo.com.au, suresh.b.siddha@intel.com,
+        christoph@sgi.com, pwil3058@bigpond.net.au, mingo@elte.hu,
+        hawkes@sgi.com, stable@kernel.org, clameter@sgi.com
+Subject: Fix longstanding load balancing bug in the scheduler
+
+From: Christoph Lameter <christoph@sgi.com>
+
+The scheduler will stop load balancing if the most busy processor contains
+processes pinned via processor affinity.
+
+The scheduler currently only does one search for busiest cpu.  If it cannot
+pull any tasks away from the busiest cpu because they were pinned then the
+scheduler goes into a corner and sulks leaving the idle processors idle.
+
+F.e.  If you have processor 0 busy running four tasks pinned via taskset,
+there are none on processor 1 and one just started two processes on
+processor 2 then the scheduler will not move one of the two processes away
+from processor 2.
+
+This patch fixes that issue by forcing the scheduler to come out of its
+corner and retrying the load balancing by considering other processors for
+load balancing.
+
+This patch was originally developed by John Hawkes and discussed at
+http://marc.theaimsgroup.com/?l=linux-kernel&m=113901368523205&w=2.
+
+I have removed extraneous material and gone back to equipping struct rq
+with the cpu the queue is associated with since this makes the patch much
+easier and it is likely that others in the future will have the same
+difficulty of figuring out which processor owns which runqueue.
+
+The overhead added through these patches is a single word on the stack if
+the kernel is configured to support 32 cpus or less (32 bit).  For 32 bit
+environments the maximum number of cpus that can be configued is 255 which
+would result in the use of 32 bytes additional on the stack.  On IA64 up to
+1k cpus can be configured which will result in the use of 128 additional
+bytes on the stack.  The maximum additional cache footprint is one
+cacheline.  Typically memory use will be much less than a cacheline and the
+additional cpumask will be placed on the stack in a cacheline that already
+contains other local variable.
+
+
+Signed-off-by: Christoph Lameter <clameter@sgi.com>
+Cc: John Hawkes <hawkes@sgi.com>
+Cc: "Siddha, Suresh B" <suresh.b.siddha@intel.com>
+Cc: Ingo Molnar <mingo@elte.hu>
+Cc: Nick Piggin <nickpiggin@yahoo.com.au>
+Cc: Peter Williams <pwil3058@bigpond.net.au>
+Signed-off-by: Andrew Morton <akpm@osdl.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ kernel/sched.c |   54 ++++++++++++++++++++++++++++++++++++++++++++++--------
+ 1 file changed, 46 insertions(+), 8 deletions(-)
+
+--- linux-2.6.18.orig/kernel/sched.c
++++ linux-2.6.18/kernel/sched.c
+@@ -238,6 +238,7 @@ struct rq {
+       /* For active balancing */
+       int active_balance;
+       int push_cpu;
++      int cpu;                /* cpu of this runqueue */
+ 
+       struct task_struct *migration_thread;
+       struct list_head migration_queue;
+@@ -267,6 +268,15 @@ struct rq {
+ 
+ static DEFINE_PER_CPU(struct rq, runqueues);
+ 
++static inline int cpu_of(struct rq *rq)
++{
++#ifdef CONFIG_SMP
++      return rq->cpu;
++#else
++      return 0;
++#endif
++}
++
+ /*
+  * The domain tree (rq->sd) is protected by RCU's quiescent state transition.
+  * See detach_destroy_domains: synchronize_sched for details.
+@@ -2211,7 +2221,8 @@ out:
+  */
+ static struct sched_group *
+ find_busiest_group(struct sched_domain *sd, int this_cpu,
+-                 unsigned long *imbalance, enum idle_type idle, int *sd_idle)
++                 unsigned long *imbalance, enum idle_type idle, int *sd_idle,
++                 cpumask_t *cpus)
+ {
+       struct sched_group *busiest = NULL, *this = NULL, *group = sd->groups;
+       unsigned long max_load, avg_load, total_load, this_load, total_pwr;
+@@ -2248,7 +2259,12 @@ find_busiest_group(struct sched_domain *
+               sum_weighted_load = sum_nr_running = avg_load = 0;
+ 
+               for_each_cpu_mask(i, group->cpumask) {
+-                      struct rq *rq = cpu_rq(i);
++                      struct rq *rq;
++
++                      if (!cpu_isset(i, *cpus))
++                              continue;
++
++                      rq = cpu_rq(i);
+ 
+                       if (*sd_idle && !idle_cpu(i))
+                               *sd_idle = 0;
+@@ -2466,13 +2482,17 @@ ret:
+  */
+ static struct rq *
+ find_busiest_queue(struct sched_group *group, enum idle_type idle,
+-                 unsigned long imbalance)
++                 unsigned long imbalance, cpumask_t *cpus)
+ {
+       struct rq *busiest = NULL, *rq;
+       unsigned long max_load = 0;
+       int i;
+ 
+       for_each_cpu_mask(i, group->cpumask) {
++
++              if (!cpu_isset(i, *cpus))
++                      continue;
++
+               rq = cpu_rq(i);
+ 
+               if (rq->nr_running == 1 && rq->raw_weighted_load > imbalance)
+@@ -2511,6 +2531,7 @@ static int load_balance(int this_cpu, st
+       struct sched_group *group;
+       unsigned long imbalance;
+       struct rq *busiest;
++      cpumask_t cpus = CPU_MASK_ALL;
+ 
+       if (idle != NOT_IDLE && sd->flags & SD_SHARE_CPUPOWER &&
+           !sched_smt_power_savings)
+@@ -2518,13 +2539,15 @@ static int load_balance(int this_cpu, st
+ 
+       schedstat_inc(sd, lb_cnt[idle]);
+ 
+-      group = find_busiest_group(sd, this_cpu, &imbalance, idle, &sd_idle);
++redo:
++      group = find_busiest_group(sd, this_cpu, &imbalance, idle, &sd_idle,
++                                                      &cpus);
+       if (!group) {
+               schedstat_inc(sd, lb_nobusyg[idle]);
+               goto out_balanced;
+       }
+ 
+-      busiest = find_busiest_queue(group, idle, imbalance);
++      busiest = find_busiest_queue(group, idle, imbalance, &cpus);
+       if (!busiest) {
+               schedstat_inc(sd, lb_nobusyq[idle]);
+               goto out_balanced;
+@@ -2549,8 +2572,12 @@ static int load_balance(int this_cpu, st
+               double_rq_unlock(this_rq, busiest);
+ 
+               /* All tasks on this runqueue were pinned by CPU affinity */
+-              if (unlikely(all_pinned))
++              if (unlikely(all_pinned)) {
++                      cpu_clear(cpu_of(busiest), cpus);
++                      if (!cpus_empty(cpus))
++                              goto redo;
+                       goto out_balanced;
++              }
+       }
+ 
+       if (!nr_moved) {
+@@ -2639,18 +2666,22 @@ load_balance_newidle(int this_cpu, struc
+       unsigned long imbalance;
+       int nr_moved = 0;
+       int sd_idle = 0;
++      cpumask_t cpus = CPU_MASK_ALL;
+ 
+       if (sd->flags & SD_SHARE_CPUPOWER && !sched_smt_power_savings)
+               sd_idle = 1;
+ 
+       schedstat_inc(sd, lb_cnt[NEWLY_IDLE]);
+-      group = find_busiest_group(sd, this_cpu, &imbalance, NEWLY_IDLE, &sd_idle);
++redo:
++      group = find_busiest_group(sd, this_cpu, &imbalance, NEWLY_IDLE,
++                              &sd_idle, &cpus);
+       if (!group) {
+               schedstat_inc(sd, lb_nobusyg[NEWLY_IDLE]);
+               goto out_balanced;
+       }
+ 
+-      busiest = find_busiest_queue(group, NEWLY_IDLE, imbalance);
++      busiest = find_busiest_queue(group, NEWLY_IDLE, imbalance,
++                              &cpus);
+       if (!busiest) {
+               schedstat_inc(sd, lb_nobusyq[NEWLY_IDLE]);
+               goto out_balanced;
+@@ -2668,6 +2699,12 @@ load_balance_newidle(int this_cpu, struc
+                                       minus_1_or_zero(busiest->nr_running),
+                                       imbalance, sd, NEWLY_IDLE, NULL);
+               spin_unlock(&busiest->lock);
++
++              if (!nr_moved) {
++                      cpu_clear(cpu_of(busiest), cpus);
++                      if (!cpus_empty(cpus))
++                              goto redo;
++              }
+       }
+ 
+       if (!nr_moved) {
+@@ -6747,6 +6784,7 @@ void __init sched_init(void)
+                       rq->cpu_load[j] = 0;
+               rq->active_balance = 0;
+               rq->push_cpu = 0;
++              rq->cpu = i;
+               rq->migration_thread = NULL;
+               INIT_LIST_HEAD(&rq->migration_queue);
+ #endif
diff --git a/queue-2.6.18/ib-mthca-fix-lid-used-for-sending-traps.patch b/queue-2.6.18/ib-mthca-fix-lid-used-for-sending-traps.patch

new file mode 100644 (file)

index 0000000..3307d6c
--- /dev/null
+++ b/queue-2.6.18/ib-mthca-fix-lid-used-for-sending-traps.patch
@@ -0,0 +1,38 @@
+From stable-bounces@linux.kernel.org Wed Sep 20 15:47:56 2006
+To: stable@kernel.org
+From: Roland Dreier <rdreier@cisco.com>
+Date: Wed, 20 Sep 2006 15:47:16 -0700
+Message-ID: <adak63yjguj.fsf@cisco.com>
+Subject: IB/mthca: Fix lid used for sending traps
+
+From: Jack Morgenstein <jackm@dev.mellanox.co.il>
+
+The SM LID used to send traps to is incorrectly set to port LID.  This
+is a regression from 2.6.17 -- after a PortInfo MAD is received, no
+traps are sent to the SM LID.  The traps go to the loopback interface
+instead, and are dropped there.  The SM LID should be taken from the
+sm_lid of the PortInfo response.
+
+The bug was introduced by commit 12bbb2b7be7f5564952ebe0196623e97464b8ac5:
+       IB/mthca: Add client reregister event generation
+
+Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
+Signed-off-by: Michael S. Tsirkin <mst@mellanox.co.il>
+Signed-off-by: Roland Dreier <rolandd@cisco.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ drivers/infiniband/hw/mthca/mthca_mad.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- linux-2.6.18.orig/drivers/infiniband/hw/mthca/mthca_mad.c
++++ linux-2.6.18/drivers/infiniband/hw/mthca/mthca_mad.c
+@@ -119,7 +119,7 @@ static void smp_snoop(struct ib_device *
+ 
+                       mthca_update_rate(to_mdev(ibdev), port_num);
+                       update_sm_ah(to_mdev(ibdev), port_num,
+-                                   be16_to_cpu(pinfo->lid),
++                                   be16_to_cpu(pinfo->sm_lid),
+                                    pinfo->neighbormtu_mastersmsl & 0xf);
+ 
+                       event.device           = ibdev;
diff --git a/queue-2.6.18/jbd-fix-commit-of-ordered-data-buffers.patch b/queue-2.6.18/jbd-fix-commit-of-ordered-data-buffers.patch

new file mode 100644 (file)

index 0000000..76e7c0e
--- /dev/null
+++ b/queue-2.6.18/jbd-fix-commit-of-ordered-data-buffers.patch
@@ -0,0 +1,239 @@
+From stable-bounces@linux.kernel.org Mon Sep 11 15:14:16 2006
+Message-Id: <200609112213.k8BMDbPC029844@shell0.pdx.osdl.net>
+To: mm-commits@vger.kernel.org
+From: akpm@osdl.org
+Date: Mon, 11 Sep 2006 15:13:37 -0700
+Cc: pbadari@us.ibm.com, jack@suse.cz, stable@kernel.org
+Subject: jbd: fix commit of ordered data buffers
+
+From: Jan Kara <jack@suse.cz>
+
+Original commit code assumes, that when a buffer on BJ_SyncData list is
+locked, it is being written to disk.  But this is not true and hence it can
+lead to a potential data loss on crash.  Also the code didn't count with
+the fact that journal_dirty_data() can steal buffers from committing
+transaction and hence could write buffers that no longer belong to the
+committing transaction.  Finally it could possibly happen that we tried
+writing out one buffer several times.
+
+The patch below tries to solve these problems by a complete rewrite of the
+data commit code.  We go through buffers on t_sync_datalist, lock buffers
+needing write out and store them in an array.  Buffers are also immediately
+refiled to BJ_Locked list or unfiled (if the write out is completed).  When
+the array is full or we have to block on buffer lock, we submit all
+accumulated buffers for IO.
+
+[suitable for 2.6.18.x around the 2.6.19-rc2 timeframe]
+
+Signed-off-by: Jan Kara <jack@suse.cz>
+Cc: Badari Pulavarty <pbadari@us.ibm.com>
+Signed-off-by: Andrew Morton <akpm@osdl.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/jbd/commit.c |  182 ++++++++++++++++++++++++++++++++++----------------------
+ 1 file changed, 113 insertions(+), 69 deletions(-)
+
+--- linux-2.6.18.orig/fs/jbd/commit.c
++++ linux-2.6.18/fs/jbd/commit.c
+@@ -160,6 +160,117 @@ static int journal_write_commit_record(j
+       return (ret == -EIO);
+ }
+ 
++void journal_do_submit_data(struct buffer_head **wbuf, int bufs)
++{
++      int i;
++
++      for (i = 0; i < bufs; i++) {
++              wbuf[i]->b_end_io = end_buffer_write_sync;
++              /* We use-up our safety reference in submit_bh() */
++              submit_bh(WRITE, wbuf[i]);
++      }
++}
++
++/*
++ *  Submit all the data buffers to disk
++ */
++static void journal_submit_data_buffers(journal_t *journal,
++                              transaction_t *commit_transaction)
++{
++      struct journal_head *jh;
++      struct buffer_head *bh;
++      int locked;
++      int bufs = 0;
++      struct buffer_head **wbuf = journal->j_wbuf;
++
++      /*
++       * Whenever we unlock the journal and sleep, things can get added
++       * onto ->t_sync_datalist, so we have to keep looping back to
++       * write_out_data until we *know* that the list is empty.
++       *
++       * Cleanup any flushed data buffers from the data list.  Even in
++       * abort mode, we want to flush this out as soon as possible.
++       */
++write_out_data:
++      cond_resched();
++      spin_lock(&journal->j_list_lock);
++
++      while (commit_transaction->t_sync_datalist) {
++              jh = commit_transaction->t_sync_datalist;
++              bh = jh2bh(jh);
++              locked = 0;
++
++              /* Get reference just to make sure buffer does not disappear
++               * when we are forced to drop various locks */
++              get_bh(bh);
++              /* If the buffer is dirty, we need to submit IO and hence
++               * we need the buffer lock. We try to lock the buffer without
++               * blocking. If we fail, we need to drop j_list_lock and do
++               * blocking lock_buffer().
++               */
++              if (buffer_dirty(bh)) {
++                      if (test_set_buffer_locked(bh)) {
++                              BUFFER_TRACE(bh, "needs blocking lock");
++                              spin_unlock(&journal->j_list_lock);
++                              /* Write out all data to prevent deadlocks */
++                              journal_do_submit_data(wbuf, bufs);
++                              bufs = 0;
++                              lock_buffer(bh);
++                              spin_lock(&journal->j_list_lock);
++                      }
++                      locked = 1;
++              }
++              /* We have to get bh_state lock. Again out of order, sigh. */
++              if (!inverted_lock(journal, bh)) {
++                      jbd_lock_bh_state(bh);
++                      spin_lock(&journal->j_list_lock);
++              }
++              /* Someone already cleaned up the buffer? */
++              if (!buffer_jbd(bh)
++                      || jh->b_transaction != commit_transaction
++                      || jh->b_jlist != BJ_SyncData) {
++                      jbd_unlock_bh_state(bh);
++                      if (locked)
++                              unlock_buffer(bh);
++                      BUFFER_TRACE(bh, "already cleaned up");
++                      put_bh(bh);
++                      continue;
++              }
++              if (locked && test_clear_buffer_dirty(bh)) {
++                      BUFFER_TRACE(bh, "needs writeout, adding to array");
++                      wbuf[bufs++] = bh;
++                      __journal_file_buffer(jh, commit_transaction,
++                                              BJ_Locked);
++                      jbd_unlock_bh_state(bh);
++                      if (bufs == journal->j_wbufsize) {
++                              spin_unlock(&journal->j_list_lock);
++                              journal_do_submit_data(wbuf, bufs);
++                              bufs = 0;
++                              goto write_out_data;
++                      }
++              }
++              else {
++                      BUFFER_TRACE(bh, "writeout complete: unfile");
++                      __journal_unfile_buffer(jh);
++                      jbd_unlock_bh_state(bh);
++                      if (locked)
++                              unlock_buffer(bh);
++                      journal_remove_journal_head(bh);
++                      /* Once for our safety reference, once for
++                       * journal_remove_journal_head() */
++                      put_bh(bh);
++                      put_bh(bh);
++              }
++
++              if (lock_need_resched(&journal->j_list_lock)) {
++                      spin_unlock(&journal->j_list_lock);
++                      goto write_out_data;
++              }
++      }
++      spin_unlock(&journal->j_list_lock);
++      journal_do_submit_data(wbuf, bufs);
++}
++
+ /*
+  * journal_commit_transaction
+  *
+@@ -313,80 +424,13 @@ void journal_commit_transaction(journal_
+        * Now start flushing things to disk, in the order they appear
+        * on the transaction lists.  Data blocks go first.
+        */
+-
+       err = 0;
+-      /*
+-       * Whenever we unlock the journal and sleep, things can get added
+-       * onto ->t_sync_datalist, so we have to keep looping back to
+-       * write_out_data until we *know* that the list is empty.
+-       */
+-      bufs = 0;
+-      /*
+-       * Cleanup any flushed data buffers from the data list.  Even in
+-       * abort mode, we want to flush this out as soon as possible.
+-       */
+-write_out_data:
+-      cond_resched();
+-      spin_lock(&journal->j_list_lock);
+-
+-      while (commit_transaction->t_sync_datalist) {
+-              struct buffer_head *bh;
+-
+-              jh = commit_transaction->t_sync_datalist;
+-              commit_transaction->t_sync_datalist = jh->b_tnext;
+-              bh = jh2bh(jh);
+-              if (buffer_locked(bh)) {
+-                      BUFFER_TRACE(bh, "locked");
+-                      if (!inverted_lock(journal, bh))
+-                              goto write_out_data;
+-                      __journal_temp_unlink_buffer(jh);
+-                      __journal_file_buffer(jh, commit_transaction,
+-                                              BJ_Locked);
+-                      jbd_unlock_bh_state(bh);
+-                      if (lock_need_resched(&journal->j_list_lock)) {
+-                              spin_unlock(&journal->j_list_lock);
+-                              goto write_out_data;
+-                      }
+-              } else {
+-                      if (buffer_dirty(bh)) {
+-                              BUFFER_TRACE(bh, "start journal writeout");
+-                              get_bh(bh);
+-                              wbuf[bufs++] = bh;
+-                              if (bufs == journal->j_wbufsize) {
+-                                      jbd_debug(2, "submit %d writes\n",
+-                                                      bufs);
+-                                      spin_unlock(&journal->j_list_lock);
+-                                      ll_rw_block(SWRITE, bufs, wbuf);
+-                                      journal_brelse_array(wbuf, bufs);
+-                                      bufs = 0;
+-                                      goto write_out_data;
+-                              }
+-                      } else {
+-                              BUFFER_TRACE(bh, "writeout complete: unfile");
+-                              if (!inverted_lock(journal, bh))
+-                                      goto write_out_data;
+-                              __journal_unfile_buffer(jh);
+-                              jbd_unlock_bh_state(bh);
+-                              journal_remove_journal_head(bh);
+-                              put_bh(bh);
+-                              if (lock_need_resched(&journal->j_list_lock)) {
+-                                      spin_unlock(&journal->j_list_lock);
+-                                      goto write_out_data;
+-                              }
+-                      }
+-              }
+-      }
+-
+-      if (bufs) {
+-              spin_unlock(&journal->j_list_lock);
+-              ll_rw_block(SWRITE, bufs, wbuf);
+-              journal_brelse_array(wbuf, bufs);
+-              spin_lock(&journal->j_list_lock);
+-      }
++      journal_submit_data_buffers(journal, commit_transaction);
+ 
+       /*
+        * Wait for all previously submitted IO to complete.
+        */
++      spin_lock(&journal->j_list_lock);
+       while (commit_transaction->t_locked_list) {
+               struct buffer_head *bh;
+ 
diff --git a/queue-2.6.18/s390-user-readable-uninitialised-kernel-memory.patch b/queue-2.6.18/s390-user-readable-uninitialised-kernel-memory.patch

new file mode 100644 (file)

index 0000000..f6fe990
--- /dev/null
+++ b/queue-2.6.18/s390-user-readable-uninitialised-kernel-memory.patch
@@ -0,0 +1,69 @@
+From stable-bounces@linux.kernel.org Thu Sep 28 06:32:54 2006
+Date: Thu, 28 Sep 2006 15:31:52 +0200
+From: Martin Schwidefsky <schwidefsky@de.ibm.com>
+To: gregkh@suse.de, bunk@stusta.de
+Message-ID: <20060928133152.GA10672@skybase>
+Content-Disposition: inline
+Cc: stable@kernel.org
+Subject: S390: user readable uninitialised kernel memory (CVE-2006-5174)
+
+From: Martin Schwidefsky <schwidefsky@de.ibm.com>
+
+[S390] user readable uninitialised kernel memory.
+
+A user space program can read uninitialised kernel memory
+by appending to a file from a bad address and then reading
+the result back. The cause is the copy_from_user function
+that does not clear the remaining bytes of the kernel
+buffer after it got a fault on the user space address.
+
+Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ arch/s390/lib/uaccess.S   |   12 +++++++++++-
+ arch/s390/lib/uaccess64.S |   12 +++++++++++-
+ 2 files changed, 22 insertions(+), 2 deletions(-)
+
+--- linux-2.6.18.orig/arch/s390/lib/uaccess.S
++++ linux-2.6.18/arch/s390/lib/uaccess.S
+@@ -40,7 +40,17 @@ __copy_from_user_asm:
+       # move with the reduced length which is < 256
+ 5:    mvcp    0(%r5,%r2),0(%r4),%r0
+       slr     %r3,%r5
+-6:    lr      %r2,%r3
++      alr     %r2,%r5
++6:    lgr     %r5,%r3         # copy remaining size
++      ahi     %r5,-1          # subtract 1 for xc loop
++      bras    %r4,8f
++      xc      0(1,%2),0(%2)
++7:    xc      0(256,%2),0(%2)
++      la      %r2,256(%r2)
++8:    ahji    %r5,-256
++      jnm     7b
++      ex      %r5,0(%r2)
++9:    lr      %r2,%r3
+       br      %r14
+         .section __ex_table,"a"
+       .long   0b,4b
+--- linux-2.6.18.orig/arch/s390/lib/uaccess64.S
++++ linux-2.6.18/arch/s390/lib/uaccess64.S
+@@ -40,7 +40,17 @@ __copy_from_user_asm:
+       # move with the reduced length which is < 256
+ 5:    mvcp    0(%r5,%r2),0(%r4),%r0
+       slgr    %r3,%r5
+-6:    lgr     %r2,%r3
++      algr    %r2,%r5
++6:    lgr     %r5,%r3         # copy remaining size
++      aghi    %r5,-1          # subtract 1 for xc loop
++      bras    %r4,8f
++      xc      0(1,%r2),0(%r2)
++7:    xc      0(256,%r2),0(%r2)
++      la      %r2,256(%r2)
++8:    aghi    %r5,-256
++      jnm     7b
++      ex      %r5,0(%r2)
++9:    lgr     %r2,%r3
+       br      %r14
+         .section __ex_table,"a"
+       .quad   0b,4b
diff --git a/queue-2.6.18/series b/queue-2.6.18/series

index e39b176fd70507b618dd48bbfc65149d26e15734..c3c8e5ba232f974995db722136f586cfd90294e1 100644 (file)
--- a/queue-2.6.18/series
+++ b/queue-2.6.18/series
@@ -9,3 +9,10 @@ video-pvrusb2-improve-24xxx-config-option-description.patch
  video-pvrusb2-suppress-compiler-warning.patch
  video-pvrusb2-limit-hor-res-for-24xxx-devices.patch
  zd1211rw-zd1211b-asic-fwt-not-jointly-decoder.patch
+s390-user-readable-uninitialised-kernel-memory.patch
+ib-mthca-fix-lid-used-for-sending-traps.patch
+usb-allow-compile-in-g_ether-fix-typo.patch
+alsa-fix-initiailization-of-user-space-controls.patch
+jbd-fix-commit-of-ordered-data-buffers.patch
+fix-longstanding-load-balancing-bug-in-the-scheduler.patch
+zone_reclaim-dynamic-slab-reclaim.patch
diff --git a/queue-2.6.18/usb-allow-compile-in-g_ether-fix-typo.patch b/queue-2.6.18/usb-allow-compile-in-g_ether-fix-typo.patch

new file mode 100644 (file)

index 0000000..7510798
--- /dev/null
+++ b/queue-2.6.18/usb-allow-compile-in-g_ether-fix-typo.patch
@@ -0,0 +1,39 @@
+From stable-bounces@linux.kernel.org Fri Sep 22 20:18:48 2006
+Date: Fri, 22 Sep 2006 20:17:48 -0700
+To: stable@kernel.org
+Cc: Andrew Morton <akpm@osdl.org>
+Message-Id: <20060922201748.bf3245d6.akpm@osdl.org>
+Subject: USB: Allow compile in g_ether, fix typo
+
+From: Tony Lindgren <tony@atomide.com>
+
+Allows compiling g_ether in and fixes a typo with MUSB_HDRC
+
+Signed-off-by: Tony Lindgren <tony@atomide.com>
+Cc: David Brownell <david-b@pacbell.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ drivers/usb/gadget/ether.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- linux-2.6.18.orig/drivers/usb/gadget/ether.c
++++ linux-2.6.18/drivers/usb/gadget/ether.c
+@@ -262,7 +262,7 @@ MODULE_PARM_DESC(host_addr, "Host Ethern
+ #define DEV_CONFIG_CDC
+ #endif
+ 
+-#ifdef CONFIG_USB_GADGET_MUSBHDRC
++#ifdef CONFIG_USB_GADGET_MUSB_HDRC
+ #define DEV_CONFIG_CDC
+ #endif
+ 
+@@ -2564,7 +2564,7 @@ static struct usb_gadget_driver eth_driv
+ 
+       .function       = (char *) driver_desc,
+       .bind           = eth_bind,
+-      .unbind         = __exit_p(eth_unbind),
++      .unbind         = eth_unbind,
+ 
+       .setup          = eth_setup,
+       .disconnect     = eth_disconnect,
diff --git a/queue-2.6.18/zone_reclaim-dynamic-slab-reclaim.patch b/queue-2.6.18/zone_reclaim-dynamic-slab-reclaim.patch

new file mode 100644 (file)

index 0000000..5cce46b
--- /dev/null
+++ b/queue-2.6.18/zone_reclaim-dynamic-slab-reclaim.patch
@@ -0,0 +1,315 @@
+From stable-bounces@linux.kernel.org Mon Oct  2 10:46:22 2006
+Date: Mon, 2 Oct 2006 10:45:24 -0700 (PDT)
+From: Christoph Lameter <clameter@sgi.com>
+To: stable@kernel.org
+Message-ID: <Pine.LNX.4.64.0610021044190.12969@schroedinger.engr.sgi.com>
+Subject: zone_reclaim: dynamic slab reclaim
+
+From: Christoph Lameter <clameter@sgi.com>
+
+http://www.kernel.org/git/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commitdiff;h=0ff38490c836dc379ff7ec45b10a15a662f4e5f6
+
+
+Currently one can enable slab reclaim by setting an explicit option in
+/proc/sys/vm/zone_reclaim_mode.  Slab reclaim is then used as a final
+option if the freeing of unmapped file backed pages is not enough to free
+enough pages to allow a local allocation.
+
+However, that means that the slab can grow excessively and that most memory
+of a node may be used by slabs.  We have had a case where a machine with
+46GB of memory was using 40-42GB for slab.  Zone reclaim was effective in
+dealing with pagecache pages.  However, slab reclaim was only done during
+global reclaim (which is a bit rare on NUMA systems).
+
+This patch implements slab reclaim during zone reclaim.  Zone reclaim
+occurs if there is a danger of an off node allocation.  At that point we
+
+1. Shrink the per node page cache if the number of pagecache
+   pages is more than min_unmapped_ratio percent of pages in a zone.
+
+2. Shrink the slab cache if the number of the nodes reclaimable slab pages
+   (patch depends on earlier one that implements that counter)
+   are more than min_slab_ratio (a new /proc/sys/vm tunable).
+
+The shrinking of the slab cache is a bit problematic since it is not node
+specific.  So we simply calculate what point in the slab we want to reach
+(current per node slab use minus the number of pages that neeed to be
+allocated) and then repeately run the global reclaim until that is
+unsuccessful or we have reached the limit.  I hope we will have zone based
+slab reclaim at some point which will make that easier.
+
+The default for the min_slab_ratio is 5%
+
+Also remove the slab option from /proc/sys/vm/zone_reclaim_mode.
+
+[akpm@osdl.org: cleanups]
+Signed-off-by: Christoph Lameter <clameter@sgi.com>
+Signed-off-by: Andrew Morton <akpm@osdl.org>
+Signed-off-by: Linus Torvalds <torvalds@osdl.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ Documentation/sysctl/vm.txt |   27 +++++++++++++++-----
+ include/linux/mmzone.h      |    3 ++
+ include/linux/swap.h        |    1 
+ include/linux/sysctl.h      |    1 
+ kernel/sysctl.c             |   11 ++++++++
+ mm/page_alloc.c             |   17 ++++++++++++
+ mm/vmscan.c                 |   58 ++++++++++++++++++++++++++++----------------
+ 7 files changed, 90 insertions(+), 28 deletions(-)
+
+--- linux-2.6.18.orig/Documentation/sysctl/vm.txt
++++ linux-2.6.18/Documentation/sysctl/vm.txt
+@@ -29,6 +29,7 @@ Currently, these files are in /proc/sys/
+ - drop-caches
+ - zone_reclaim_mode
+ - min_unmapped_ratio
++- min_slab_ratio
+ - panic_on_oom
+ 
+ ==============================================================
+@@ -138,7 +139,6 @@ This is value ORed together of
+ 1     = Zone reclaim on
+ 2     = Zone reclaim writes dirty pages out
+ 4     = Zone reclaim swaps pages
+-8     = Also do a global slab reclaim pass
+ 
+ zone_reclaim_mode is set during bootup to 1 if it is determined that pages
+ from remote zones will cause a measurable performance reduction. The
+@@ -162,18 +162,13 @@ Allowing regular swap effectively restri
+ node unless explicitly overridden by memory policies or cpuset
+ configurations.
+ 
+-It may be advisable to allow slab reclaim if the system makes heavy
+-use of files and builds up large slab caches. However, the slab
+-shrink operation is global, may take a long time and free slabs
+-in all nodes of the system.
+-
+ =============================================================
+ 
+ min_unmapped_ratio:
+ 
+ This is available only on NUMA kernels.
+ 
+-A percentage of the file backed pages in each zone.  Zone reclaim will only
++A percentage of the total pages in each zone.  Zone reclaim will only
+ occur if more than this percentage of pages are file backed and unmapped.
+ This is to insure that a minimal amount of local pages is still available for
+ file I/O even if the node is overallocated.
+@@ -182,6 +177,24 @@ The default is 1 percent.
+ 
+ =============================================================
+ 
++min_slab_ratio:
++
++This is available only on NUMA kernels.
++
++A percentage of the total pages in each zone.  On Zone reclaim
++(fallback from the local zone occurs) slabs will be reclaimed if more
++than this percentage of pages in a zone are reclaimable slab pages.
++This insures that the slab growth stays under control even in NUMA
++systems that rarely perform global reclaim.
++
++The default is 5 percent.
++
++Note that slab reclaim is triggered in a per zone / node fashion.
++The process of reclaiming slab memory is currently not node specific
++and may not be fast.
++
++=============================================================
++
+ panic_on_oom
+ 
+ This enables or disables panic on out-of-memory feature.  If this is set to 1,
+--- linux-2.6.18.orig/include/linux/mmzone.h
++++ linux-2.6.18/include/linux/mmzone.h
+@@ -155,6 +155,7 @@ struct zone {
+        * zone reclaim becomes active if more unmapped pages exist.
+        */
+       unsigned long           min_unmapped_ratio;
++      unsigned long           min_slab_pages;
+       struct per_cpu_pageset  *pageset[NR_CPUS];
+ #else
+       struct per_cpu_pageset  pageset[NR_CPUS];
+@@ -421,6 +422,8 @@ int percpu_pagelist_fraction_sysctl_hand
+                                       void __user *, size_t *, loff_t *);
+ int sysctl_min_unmapped_ratio_sysctl_handler(struct ctl_table *, int,
+                       struct file *, void __user *, size_t *, loff_t *);
++int sysctl_min_slab_ratio_sysctl_handler(struct ctl_table *, int,
++                      struct file *, void __user *, size_t *, loff_t *);
+ 
+ #include <linux/topology.h>
+ /* Returns the number of the current Node. */
+--- linux-2.6.18.orig/include/linux/swap.h
++++ linux-2.6.18/include/linux/swap.h
+@@ -190,6 +190,7 @@ extern long vm_total_pages;
+ #ifdef CONFIG_NUMA
+ extern int zone_reclaim_mode;
+ extern int sysctl_min_unmapped_ratio;
++extern int sysctl_min_slab_ratio;
+ extern int zone_reclaim(struct zone *, gfp_t, unsigned int);
+ #else
+ #define zone_reclaim_mode 0
+--- linux-2.6.18.orig/include/linux/sysctl.h
++++ linux-2.6.18/include/linux/sysctl.h
+@@ -191,6 +191,7 @@ enum
+       VM_MIN_UNMAPPED=32,     /* Set min percent of unmapped pages */
+       VM_PANIC_ON_OOM=33,     /* panic at out-of-memory */
+       VM_VDSO_ENABLED=34,     /* map VDSO into new processes? */
++      VM_MIN_SLAB=35,          /* Percent pages ignored by zone reclaim */
+ };
+ 
+ 
+--- linux-2.6.18.orig/kernel/sysctl.c
++++ linux-2.6.18/kernel/sysctl.c
+@@ -943,6 +943,17 @@ static ctl_table vm_table[] = {
+               .extra1         = &zero,
+               .extra2         = &one_hundred,
+       },
++      {
++              .ctl_name       = VM_MIN_SLAB,
++              .procname       = "min_slab_ratio",
++              .data           = &sysctl_min_slab_ratio,
++              .maxlen         = sizeof(sysctl_min_slab_ratio),
++              .mode           = 0644,
++              .proc_handler   = &sysctl_min_slab_ratio_sysctl_handler,
++              .strategy       = &sysctl_intvec,
++              .extra1         = &zero,
++              .extra2         = &one_hundred,
++      },
+ #endif
+ #ifdef CONFIG_X86_32
+       {
+--- linux-2.6.18.orig/mm/page_alloc.c
++++ linux-2.6.18/mm/page_alloc.c
+@@ -2008,6 +2008,7 @@ static void __meminit free_area_init_cor
+ #ifdef CONFIG_NUMA
+               zone->min_unmapped_ratio = (realsize*sysctl_min_unmapped_ratio)
+                                               / 100;
++              zone->min_slab_pages = (realsize * sysctl_min_slab_ratio) / 100;
+ #endif
+               zone->name = zone_names[j];
+               spin_lock_init(&zone->lock);
+@@ -2318,6 +2319,22 @@ int sysctl_min_unmapped_ratio_sysctl_han
+                               sysctl_min_unmapped_ratio) / 100;
+       return 0;
+ }
++
++int sysctl_min_slab_ratio_sysctl_handler(ctl_table *table, int write,
++      struct file *file, void __user *buffer, size_t *length, loff_t *ppos)
++{
++      struct zone *zone;
++      int rc;
++
++      rc = proc_dointvec_minmax(table, write, file, buffer, length, ppos);
++      if (rc)
++              return rc;
++
++      for_each_zone(zone)
++              zone->min_slab_pages = (zone->present_pages *
++                              sysctl_min_slab_ratio) / 100;
++      return 0;
++}
+ #endif
+ 
+ /*
+--- linux-2.6.18.orig/mm/vmscan.c
++++ linux-2.6.18/mm/vmscan.c
+@@ -1510,7 +1510,6 @@ int zone_reclaim_mode __read_mostly;
+ #define RECLAIM_ZONE (1<<0)   /* Run shrink_cache on the zone */
+ #define RECLAIM_WRITE (1<<1)  /* Writeout pages during reclaim */
+ #define RECLAIM_SWAP (1<<2)   /* Swap pages out during reclaim */
+-#define RECLAIM_SLAB (1<<3)   /* Do a global slab shrink if the zone is out of memory */
+ 
+ /*
+  * Priority for ZONE_RECLAIM. This determines the fraction of pages
+@@ -1526,6 +1525,12 @@ int zone_reclaim_mode __read_mostly;
+ int sysctl_min_unmapped_ratio = 1;
+ 
+ /*
++ * If the number of slab pages in a zone grows beyond this percentage then
++ * slab reclaim needs to occur.
++ */
++int sysctl_min_slab_ratio = 5;
++
++/*
+  * Try to free up some pages from this zone through reclaim.
+  */
+ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
+@@ -1556,29 +1561,37 @@ static int __zone_reclaim(struct zone *z
+       reclaim_state.reclaimed_slab = 0;
+       p->reclaim_state = &reclaim_state;
+ 
+-      /*
+-       * Free memory by calling shrink zone with increasing priorities
+-       * until we have enough memory freed.
+-       */
+-      priority = ZONE_RECLAIM_PRIORITY;
+-      do {
+-              nr_reclaimed += shrink_zone(priority, zone, &sc);
+-              priority--;
+-      } while (priority >= 0 && nr_reclaimed < nr_pages);
++      if (zone_page_state(zone, NR_FILE_PAGES) -
++              zone_page_state(zone, NR_FILE_MAPPED) >
++              zone->min_unmapped_ratio) {
++              /*
++               * Free memory by calling shrink zone with increasing
++               * priorities until we have enough memory freed.
++               */
++              priority = ZONE_RECLAIM_PRIORITY;
++              do {
++                      nr_reclaimed += shrink_zone(priority, zone, &sc);
++                      priority--;
++              } while (priority >= 0 && nr_reclaimed < nr_pages);
++      }
+ 
+-      if (nr_reclaimed < nr_pages && (zone_reclaim_mode & RECLAIM_SLAB)) {
++      if (zone_page_state(zone, NR_SLAB) > zone->min_slab_pages) {
+               /*
+                * shrink_slab() does not currently allow us to determine how
+-               * many pages were freed in this zone. So we just shake the slab
+-               * a bit and then go off node for this particular allocation
+-               * despite possibly having freed enough memory to allocate in
+-               * this zone.  If we freed local memory then the next
+-               * allocations will be local again.
++               * many pages were freed in this zone. So we take the current
++               * number of slab pages and shake the slab until it is reduced
++               * by the same nr_pages that we used for reclaiming unmapped
++               * pages.
+                *
+-               * shrink_slab will free memory on all zones and may take
+-               * a long time.
++               * Note that shrink_slab will free memory on all zones and may
++               * take a long time.
+                */
+-              shrink_slab(sc.nr_scanned, gfp_mask, order);
++              unsigned long limit = zone_page_state(zone,
++                              NR_SLAB) - nr_pages;
++
++              while (shrink_slab(sc.nr_scanned, gfp_mask, order) &&
++                      zone_page_state(zone, NR_SLAB) > limit)
++                      ;
+       }
+ 
+       p->reclaim_state = NULL;
+@@ -1592,7 +1605,8 @@ int zone_reclaim(struct zone *zone, gfp_
+       int node_id;
+ 
+       /*
+-       * Zone reclaim reclaims unmapped file backed pages.
++       * Zone reclaim reclaims unmapped file backed pages and
++       * slab pages if we are over the defined limits.
+        *
+        * A small portion of unmapped file backed pages is needed for
+        * file I/O otherwise pages read by file I/O will be immediately
+@@ -1601,7 +1615,9 @@ int zone_reclaim(struct zone *zone, gfp_
+        * unmapped file backed pages.
+        */
+       if (zone_page_state(zone, NR_FILE_PAGES) -
+-          zone_page_state(zone, NR_FILE_MAPPED) <= zone->min_unmapped_ratio)
++          zone_page_state(zone, NR_FILE_MAPPED) <= zone->min_unmapped_ratio
++          && zone_page_state(zone, NR_SLAB)
++                      <= zone->min_slab_pages)
+               return 0;
+ 
+       /*
author	Greg Kroah-Hartman <gregkh@suse.de>
	Tue, 10 Oct 2006 06:38:55 +0000 (23:38 -0700)
committer	Greg Kroah-Hartman <gregkh@suse.de>
	Tue, 10 Oct 2006 06:38:55 +0000 (23:38 -0700)
queue-2.6.18/alsa-fix-initiailization-of-user-space-controls.patch	[new file with mode: 0644]	patch \| blob
queue-2.6.18/fix-longstanding-load-balancing-bug-in-the-scheduler.patch	[new file with mode: 0644]	patch \| blob
queue-2.6.18/ib-mthca-fix-lid-used-for-sending-traps.patch	[new file with mode: 0644]	patch \| blob
queue-2.6.18/jbd-fix-commit-of-ordered-data-buffers.patch	[new file with mode: 0644]	patch \| blob
queue-2.6.18/s390-user-readable-uninitialised-kernel-memory.patch	[new file with mode: 0644]	patch \| blob
queue-2.6.18/series		patch \| blob \| blame \| history
queue-2.6.18/usb-allow-compile-in-g_ether-fix-typo.patch	[new file with mode: 0644]	patch \| blob
queue-2.6.18/zone_reclaim-dynamic-slab-reclaim.patch	[new file with mode: 0644]	patch \| blob