more .25 patches

author Greg Kroah-Hartman <gregkh@suse.de>

Wed, 16 Jul 2008 14:59:07 +0000 (07:59 -0700)

committer Greg Kroah-Hartman <gregkh@suse.de>

Wed, 16 Jul 2008 14:59:07 +0000 (07:59 -0700)
author Greg Kroah-Hartman <gregkh@suse.de>
Wed, 16 Jul 2008 14:59:07 +0000 (07:59 -0700)
committer Greg Kroah-Hartman <gregkh@suse.de>
Wed, 16 Jul 2008 14:59:07 +0000 (07:59 -0700)
diff --git a/queue-2.6.25/b43-do-not-return-tx_busy-from-op_tx.patch b/queue-2.6.25/b43-do-not-return-tx_busy-from-op_tx.patch

new file mode 100644 (file)

index 0000000..6db383b
--- /dev/null
+++ b/queue-2.6.25/b43-do-not-return-tx_busy-from-op_tx.patch
@@ -0,0 +1,52 @@
+From stable-bounces@linux.kernel.org Wed Jul  2 16:42:53 2008
+From: Michael Buesch <mb@bu3sch.de>
+Date: Thu, 3 Jul 2008 01:04:29 +0200
+Subject: b43: Do not return TX_BUSY from op_tx
+To: stable@kernel.org
+Cc: linux-wireless@vger.kernel.org, John Linville <linville@tuxdriver.com>, bcm43xx-dev@lists.berlios.de
+Message-ID: <200807030104.30100.mb@bu3sch.de>
+Content-Disposition: inline
+
+From: Michael Buesch <mb@bu3sch.de>
+
+Never return TX_BUSY from op_tx. It doesn't make sense to return
+TX_BUSY, if we can not transmit the packet.
+Drop the packet and return TX_OK.
+This will fix the resume hang.
+
+Upstream commit is
+66193a7cef2239bfd1b9b96e304770facf7a49c7
+
+Signed-off-by: Michael Buesch <mb@bu3sch.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+
+---
+ drivers/net/wireless/b43/main.c |    9 +++++++--
+ 1 file changed, 7 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/wireless/b43/main.c
++++ b/drivers/net/wireless/b43/main.c
+@@ -2607,7 +2607,7 @@ static int b43_op_tx(struct ieee80211_hw
+       int err;
+ 
+       if (unlikely(!dev))
+-              return NETDEV_TX_BUSY;
++              goto drop_packet;
+ 
+       /* Transmissions on seperate queues can run concurrently. */
+       read_lock_irqsave(&wl->tx_lock, flags);
+@@ -2619,7 +2619,12 @@ static int b43_op_tx(struct ieee80211_hw
+       read_unlock_irqrestore(&wl->tx_lock, flags);
+ 
+       if (unlikely(err))
+-              return NETDEV_TX_BUSY;
++              goto drop_packet;
++      return NETDEV_TX_OK;
++
++drop_packet:
++      /* We can not transmit this packet. Drop it. */
++      dev_kfree_skb_any(skb);
+       return NETDEV_TX_OK;
+ }
+ 
diff --git a/queue-2.6.25/b43-fix-possible-mmio-access-while-device-is-down.patch b/queue-2.6.25/b43-fix-possible-mmio-access-while-device-is-down.patch

new file mode 100644 (file)

index 0000000..f93dfc5
--- /dev/null
+++ b/queue-2.6.25/b43-fix-possible-mmio-access-while-device-is-down.patch
@@ -0,0 +1,37 @@
+From stable-bounces@linux.kernel.org Wed Jul  2 18:24:51 2008
+From: Michael Buesch <mb@bu3sch.de>
+Date: Thu, 3 Jul 2008 02:04:33 +0200
+Subject: b43: Fix possible MMIO access while device is down
+To: stable@kernel.org
+Cc: linux-wireless@vger.kernel.org, John Linville <linville@tuxdriver.com>, bcm43xx-dev@lists.berlios.de
+Message-ID: <200807030204.33987.mb@bu3sch.de>
+Content-Disposition: inline
+
+From: Michael Buesch <mb@bu3sch.de>
+
+This fixes a possible MMIO access while the device is still down
+from a suspend cycle. MMIO accesses with the device powered down
+may cause crashes on certain devices.
+
+Upstream commit is
+33598cf261e393f2b3349cb55509e358014bfd1f
+
+Signed-off-by: Michael Buesch <mb@bu3sch.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ drivers/net/wireless/b43/leds.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/drivers/net/wireless/b43/leds.c
++++ b/drivers/net/wireless/b43/leds.c
+@@ -72,6 +72,9 @@ static void b43_led_brightness_set(struc
+       struct b43_wldev *dev = led->dev;
+       bool radio_enabled;
+ 
++      if (unlikely(b43_status(dev) < B43_STAT_INITIALIZED))
++              return;
++
+       /* Checking the radio-enabled status here is slightly racy,
+        * but we want to avoid the locking overhead and we don't care
+        * whether the LED has the wrong state for a second. */
diff --git a/queue-2.6.25/b43legacy-do-not-return-tx_busy-from-op_tx.patch b/queue-2.6.25/b43legacy-do-not-return-tx_busy-from-op_tx.patch

new file mode 100644 (file)

index 0000000..c3ced51
--- /dev/null
+++ b/queue-2.6.25/b43legacy-do-not-return-tx_busy-from-op_tx.patch
@@ -0,0 +1,40 @@
+From stable-bounces@linux.kernel.org Wed Jul  2 16:42:46 2008
+From: Michael Buesch <mb@bu3sch.de>
+Date: Thu, 3 Jul 2008 01:06:32 +0200
+Subject: b43legacy: Do not return TX_BUSY from op_tx
+To: stable@kernel.org
+Cc: linux-wireless@vger.kernel.org, John Linville <linville@tuxdriver.com>, Stefano Brivio <stefano.brivio@polimi.it>, bcm43xx-dev@lists.berlios.de
+Message-ID: <200807030106.32960.mb@bu3sch.de>
+Content-Disposition: inline
+
+
+Never return TX_BUSY from op_tx. It doesn't make sense to return
+TX_BUSY, if we can not transmit the packet.
+Drop the packet and return TX_OK.
+
+Upstream commit is
+eb803e419ca6be06ece2e42027bb4ebd8ec09f91
+
+Signed-off-by: Michael Buesch <mb@bu3sch.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+
+---
+ drivers/net/wireless/b43legacy/main.c |    6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/wireless/b43legacy/main.c
++++ b/drivers/net/wireless/b43legacy/main.c
+@@ -2350,8 +2350,10 @@ static int b43legacy_op_tx(struct ieee80
+       } else
+               err = b43legacy_dma_tx(dev, skb, ctl);
+ out:
+-      if (unlikely(err))
+-              return NETDEV_TX_BUSY;
++      if (unlikely(err)) {
++              /* Drop the packet. */
++              dev_kfree_skb_any(skb);
++      }
+       return NETDEV_TX_OK;
+ }
+ 
diff --git a/queue-2.6.25/block-fix-the-starving-writes-bug-in-the-anticipatory-io-scheduler.patch b/queue-2.6.25/block-fix-the-starving-writes-bug-in-the-anticipatory-io-scheduler.patch

new file mode 100644 (file)

index 0000000..2ff1783
--- /dev/null
+++ b/queue-2.6.25/block-fix-the-starving-writes-bug-in-the-anticipatory-io-scheduler.patch
@@ -0,0 +1,129 @@
+From stable-bounces@linux.kernel.org Wed Jul  2 19:45:38 2008
+From: Divyesh Shah <dpshah@google.com>
+Date: Thu, 3 Jul 2008 02:45:26 GMT
+Subject: block: Fix the starving writes bug in the anticipatory IO scheduler
+To: jejb@kernel.org, stable@kernel.org
+Message-ID: <200807030245.m632jQI1007478@hera.kernel.org>
+
+From: Divyesh Shah <dpshah@google.com>
+
+commit d585d0b9d73ed999cc7b8cf3cac4a5b01abb544e upstream
+
+AS scheduler alternates between issuing read and write batches. It does
+the batch switch only after all requests from the previous batch are
+completed.
+
+When switching to a write batch, if there is an on-going read request,
+it waits for its completion and indicates its intention of switching by
+setting ad->changed_batch and the new direction but does not update the
+batch_expire_time for the new write batch which it does in the case of
+no previous pending requests.
+On completion of the read request, it sees that we were waiting for the
+switch and schedules work for kblockd right away and resets the
+ad->changed_data flag.
+Now when kblockd enters dispatch_request where it is expected to pick
+up a write request, it in turn ends the write batch because the
+batch_expire_timer was not updated and shows the expire timestamp for
+the previous batch.
+
+This results in the write starvation for all the cases where there is
+the intention for switching to a write batch, but there is a previous
+in-flight read request and the batch gets reverted to a read_batch
+right away.
+
+This also holds true in the reverse case (switching from a write batch
+to a read batch with an in-flight write request).
+
+I've checked that this bug exists on 2.6.11, 2.6.18, 2.6.24 and
+linux-2.6-block git HEAD. I've tested the fix on x86 platforms with
+SCSI drives where the driver asks for the next request while a current
+request is in-flight.
+
+This patch is based off linux-2.6-block git HEAD.
+
+Bug reproduction:
+A simple scenario which reproduces this bug is:
+- dd if=/dev/hda3 of=/dev/null &
+- lilo
+   The lilo takes forever to complete.
+
+This can also be reproduced fairly easily with the earlier dd and
+another test
+program doing msync().
+
+The example test program below should print out a message after every
+iteration
+but it simply hangs forever. With this bugfix it makes forward progress.
+
+====
+Example test program using msync() (thanks to suleiman AT google DOT
+com)
+
+inline uint64_t
+rdtsc(void)
+{
+         int64_t tsc;
+
+         __asm __volatile("rdtsc" : "=A" (tsc));
+         return (tsc);
+}
+
+int
+main(int argc, char **argv)
+{
+         struct stat st;
+         uint64_t e, s, t;
+         char *p, q;
+         long i;
+         int fd;
+
+         if (argc < 2) {
+                 printf("Usage: %s <file>\n", argv[0]);
+                 return (1);
+         }
+
+         if ((fd = open(argv[1], O_RDWR | O_NOATIME)) < 0)
+                 err(1, "open");
+
+         if (fstat(fd, &st) < 0)
+                 err(1, "fstat");
+
+         p = mmap(NULL, st.st_size, PROT_READ | PROT_WRITE,
+MAP_SHARED, fd, 0);
+
+         t = 0;
+         for (i = 0; i < 1000; i++) {
+                 *p = 0;
+                 msync(p, 4096, MS_SYNC);
+                 s = rdtsc();
+                *p = 0;
+                 __asm __volatile(""::: "memory");
+                 e = rdtsc();
+                 if (argc > 2)
+                         printf("%d: %lld cycles %jd %jd\n",
+                                i, e - s, (intmax_t)s, (intmax_t)e);
+                 t += e - s;
+         }
+         printf("average time: %lld cycles\n", t / 1000);
+         return (0);
+}
+
+Acked-by: Nick Piggin <npiggin@suse.de>
+Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ block/as-iosched.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/block/as-iosched.c
++++ b/block/as-iosched.c
+@@ -831,6 +831,8 @@ static void as_completed_request(struct 
+       }
+ 
+       if (ad->changed_batch && ad->nr_dispatched == 1) {
++              ad->current_batch_expires = jiffies +
++                                      ad->batch_expire[ad->batch_data_dir];
+               kblockd_schedule_work(&ad->antic_work);
+               ad->changed_batch = 0;
+ 
diff --git a/queue-2.6.25/block-properly-notify-block-layer-of-sync-writes.patch b/queue-2.6.25/block-properly-notify-block-layer-of-sync-writes.patch

new file mode 100644 (file)

index 0000000..9f8e05a
--- /dev/null
+++ b/queue-2.6.25/block-properly-notify-block-layer-of-sync-writes.patch
@@ -0,0 +1,112 @@
+From 18ce3751ccd488c78d3827e9f6bf54e6322676fb Mon Sep 17 00:00:00 2001
+From: Jens Axboe <jens.axboe@oracle.com>
+Date: Tue, 1 Jul 2008 09:07:34 +0200
+Subject: block: Properly notify block layer of sync writes
+
+From: Jens Axboe <jens.axboe@oracle.com>
+
+commit 18ce3751ccd488c78d3827e9f6bf54e6322676fb upstream
+
+fsync_buffers_list() and sync_dirty_buffer() both issue async writes and
+then immediately wait on them. Conceptually, that makes them sync writes
+and we should treat them as such so that the IO schedulers can handle
+them appropriately.
+
+This patch fixes a write starvation issue that Lin Ming reported, where
+xx is stuck for more than 2 minutes because of a large number of
+synchronous IO in the system:
+
+INFO: task kjournald:20558 blocked for more than 120 seconds.
+"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this
+message.
+kjournald     D ffff810010820978  6712 20558      2
+ffff81022ddb1d10 0000000000000046 ffff81022e7baa10 ffffffff803ba6f2
+ffff81022ecd0000 ffff8101e6dc9160 ffff81022ecd0348 000000008048b6cb
+0000000000000086 ffff81022c4e8d30 0000000000000000 ffffffff80247537
+Call Trace:
+[<ffffffff803ba6f2>] kobject_get+0x12/0x17
+[<ffffffff80247537>] getnstimeofday+0x2f/0x83
+[<ffffffff8029c1ac>] sync_buffer+0x0/0x3f
+[<ffffffff8066d195>] io_schedule+0x5d/0x9f
+[<ffffffff8029c1e7>] sync_buffer+0x3b/0x3f
+[<ffffffff8066d3f0>] __wait_on_bit+0x40/0x6f
+[<ffffffff8029c1ac>] sync_buffer+0x0/0x3f
+[<ffffffff8066d48b>] out_of_line_wait_on_bit+0x6c/0x78
+[<ffffffff80243909>] wake_bit_function+0x0/0x23
+[<ffffffff8029e3ad>] sync_dirty_buffer+0x98/0xcb
+[<ffffffff8030056b>] journal_commit_transaction+0x97d/0xcb6
+[<ffffffff8023a676>] lock_timer_base+0x26/0x4b
+[<ffffffff8030300a>] kjournald+0xc1/0x1fb
+[<ffffffff802438db>] autoremove_wake_function+0x0/0x2e
+[<ffffffff80302f49>] kjournald+0x0/0x1fb
+[<ffffffff802437bb>] kthread+0x47/0x74
+[<ffffffff8022de51>] schedule_tail+0x28/0x5d
+[<ffffffff8020cac8>] child_rip+0xa/0x12
+[<ffffffff80243774>] kthread+0x0/0x74
+[<ffffffff8020cabe>] child_rip+0x0/0x12
+
+Lin Ming confirms that this patch fixes the issue. I've run tests with
+it for the past week and no ill effects have been observed, so I'm
+proposing it for inclusion into 2.6.26.
+
+Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/buffer.c        |   13 ++++++++-----
+ include/linux/fs.h |    1 +
+ 2 files changed, 9 insertions(+), 5 deletions(-)
+
+--- a/fs/buffer.c
++++ b/fs/buffer.c
+@@ -818,7 +818,7 @@ static int fsync_buffers_list(spinlock_t
+                                * contents - it is a noop if I/O is still in
+                                * flight on potentially older contents.
+                                */
+-                              ll_rw_block(SWRITE, 1, &bh);
++                              ll_rw_block(SWRITE_SYNC, 1, &bh);
+                               brelse(bh);
+                               spin_lock(lock);
+                       }
+@@ -2952,16 +2952,19 @@ void ll_rw_block(int rw, int nr, struct 
+       for (i = 0; i < nr; i++) {
+               struct buffer_head *bh = bhs[i];
+ 
+-              if (rw == SWRITE)
++              if (rw == SWRITE || rw == SWRITE_SYNC)
+                       lock_buffer(bh);
+               else if (test_set_buffer_locked(bh))
+                       continue;
+ 
+-              if (rw == WRITE || rw == SWRITE) {
++              if (rw == WRITE || rw == SWRITE || rw == SWRITE_SYNC) {
+                       if (test_clear_buffer_dirty(bh)) {
+                               bh->b_end_io = end_buffer_write_sync;
+                               get_bh(bh);
+-                              submit_bh(WRITE, bh);
++                              if (rw == SWRITE_SYNC)
++                                      submit_bh(WRITE_SYNC, bh);
++                              else
++                                      submit_bh(WRITE, bh);
+                               continue;
+                       }
+               } else {
+@@ -2990,7 +2993,7 @@ int sync_dirty_buffer(struct buffer_head
+       if (test_clear_buffer_dirty(bh)) {
+               get_bh(bh);
+               bh->b_end_io = end_buffer_write_sync;
+-              ret = submit_bh(WRITE, bh);
++              ret = submit_bh(WRITE_SYNC, bh);
+               wait_on_buffer(bh);
+               if (buffer_eopnotsupp(bh)) {
+                       clear_buffer_eopnotsupp(bh);
+--- a/include/linux/fs.h
++++ b/include/linux/fs.h
+@@ -83,6 +83,7 @@ extern int dir_notify_enable;
+ #define READ_SYNC     (READ | (1 << BIO_RW_SYNC))
+ #define READ_META     (READ | (1 << BIO_RW_META))
+ #define WRITE_SYNC    (WRITE | (1 << BIO_RW_SYNC))
++#define SWRITE_SYNC   (SWRITE | (1 << BIO_RW_SYNC))
+ #define WRITE_BARRIER ((1 << BIO_RW) | (1 << BIO_RW_BARRIER))
+ 
+ #define SEL_IN                1
diff --git a/queue-2.6.25/mac80211-detect-driver-tx-bugs.patch b/queue-2.6.25/mac80211-detect-driver-tx-bugs.patch

new file mode 100644 (file)

index 0000000..4eda46c
--- /dev/null
+++ b/queue-2.6.25/mac80211-detect-driver-tx-bugs.patch
@@ -0,0 +1,54 @@
+From stable-bounces@linux.kernel.org Wed Jul  2 19:06:00 2008
+From: Larry Finger <Larry.Finger@lwfinger.net>
+Date: Wed, 02 Jul 2008 20:36:31 -0500
+Subject: mac80211: detect driver tx bugs
+To: stable@kernel.org, Johannes Berg <johannes@sipsolutions.net>
+Cc: linux-wireless@vger.kernel.org
+Message-ID: <486c2d1f.ntPY4RgDHDbYfPvX%Larry.Finger@lwfinger.net>
+
+From: Johannes Berg <johannes@sipsolutions.net>
+
+When a driver rejects a frame in it's ->tx() callback, it must also
+stop queues, otherwise mac80211 can go into a loop here. Detect this
+situation and abort the loop after five retries, warning about the
+driver bug.
+
+This patch was added to mainline as
+commit ef3a62d272f033989e83eb1f26505f93f93e3e69.
+
+Thanks to Larry Finger <Larry.Finger@lwfinger.net> for doing the -stable
+port.
+
+Cc: Larry Finger <Larry.Finger@lwfinger.net>
+Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+
+---
+ net/mac80211/tx.c |    9 ++++++++-
+ 1 file changed, 8 insertions(+), 1 deletion(-)
+
+--- a/net/mac80211/tx.c
++++ b/net/mac80211/tx.c
+@@ -1090,7 +1090,7 @@ static int ieee80211_tx(struct net_devic
+       ieee80211_tx_handler *handler;
+       struct ieee80211_txrx_data tx;
+       ieee80211_txrx_result res = TXRX_DROP, res_prepare;
+-      int ret, i;
++      int ret, i, retries = 0;
+ 
+       WARN_ON(__ieee80211_queue_pending(local, control->queue));
+ 
+@@ -1181,6 +1181,13 @@ retry:
+               if (!__ieee80211_queue_stopped(local, control->queue)) {
+                       clear_bit(IEEE80211_LINK_STATE_PENDING,
+                                 &local->state[control->queue]);
++                      retries++;
++                      /*
++                       * Driver bug, it's rejecting packets but
++                       * not stopping queues.
++                       */
++                      if (WARN_ON_ONCE(retries > 5))
++                              goto drop;
+                       goto retry;
+               }
+               memcpy(&store->control, control,
diff --git a/queue-2.6.25/md-don-t-acknowlege-that-stripe-expand-is-complete-until-it-really-is.patch b/queue-2.6.25/md-don-t-acknowlege-that-stripe-expand-is-complete-until-it-really-is.patch

new file mode 100644 (file)

index 0000000..b437fcf
--- /dev/null
+++ b/queue-2.6.25/md-don-t-acknowlege-that-stripe-expand-is-complete-until-it-really-is.patch
@@ -0,0 +1,54 @@
+From stable-bounces@linux.kernel.org Wed Jul  2 19:45:44 2008
+From: Neil Brown <neilb@notabene.brown>
+Date: Thu, 3 Jul 2008 02:45:35 GMT
+Subject: md: Don't acknowlege that stripe-expand is complete until it really is.
+To: jejb@kernel.org, stable@kernel.org
+Message-ID: <200807030245.m632jZjW007566@hera.kernel.org>
+
+From: Neil Brown <neilb@notabene.brown>
+
+commit efe311431869b40d67911820a309f9a1a41306f3 upstream
+
+We shouldn't acknowledge that a stripe has been expanded (When
+reshaping a raid5 by adding a device) until the moved data has
+actually been written out.  However we are currently
+acknowledging (by calling md_done_sync) when the POST_XOR
+is complete and before the write.
+
+So track in s.locked whether there are pending writes, and don't
+call md_done_sync yet if there are.
+
+Note: we all set R5_LOCKED on devices which are are about to
+read from.  This probably isn't technically necessary, but is
+usually done when writing a block, and justifies the use of
+s.locked here.
+
+This bug can lead to a crash if an array is stopped while an reshape
+is in progress.
+
+Signed-off-by: Neil Brown <neilb@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ drivers/md/raid5.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/drivers/md/raid5.c
++++ b/drivers/md/raid5.c
+@@ -2861,6 +2861,8 @@ static void handle_stripe5(struct stripe
+ 
+               for (i = conf->raid_disks; i--; ) {
+                       set_bit(R5_Wantwrite, &sh->dev[i].flags);
++                      set_bit(R5_LOCKED, &dev->flags);
++                      s.locked++;
+                       if (!test_and_set_bit(STRIPE_OP_IO, &sh->ops.pending))
+                               sh->ops.count++;
+               }
+@@ -2874,6 +2876,7 @@ static void handle_stripe5(struct stripe
+                       conf->raid_disks);
+               s.locked += handle_write_operations5(sh, 1, 1);
+       } else if (s.expanded &&
++                 s.locked == 0 &&
+               !test_bit(STRIPE_OP_POSTXOR, &sh->ops.pending)) {
+               clear_bit(STRIPE_EXPAND_READY, &sh->state);
+               atomic_dec(&conf->reshape_stripes);
diff --git a/queue-2.6.25/md-ensure-interrupted-recovery-completed-properly.patch b/queue-2.6.25/md-ensure-interrupted-recovery-completed-properly.patch

new file mode 100644 (file)

index 0000000..d34e65f
--- /dev/null
+++ b/queue-2.6.25/md-ensure-interrupted-recovery-completed-properly.patch
@@ -0,0 +1,66 @@
+From stable-bounces@linux.kernel.org Wed Jul  2 19:45:47 2008
+From: Neil Brown <neilb@notabene.brown>
+Date: Thu, 3 Jul 2008 02:45:38 GMT
+Subject: md: Ensure interrupted recovery completed properly (v1 metadata plus bitmap)
+To: jejb@kernel.org, stable@kernel.org
+Message-ID: <200807030245.m632jc6s007589@hera.kernel.org>
+
+From: Neil Brown <neilb@notabene.brown>
+
+commit 8c2e870a625bd336b2e7a65a97c1836acef07322 upstream
+
+If, while assembling an array, we find a device which is not fully
+in-sync with the array, it is important to set the "fullsync" flags.
+This is an exact analog to the setting of this flag in hot_add_disk
+methods.
+
+Currently, only v1.x metadata supports having devices in an array
+which are not fully in-sync (it keep track of how in sync they are).
+The 'fullsync' flag only makes a difference when a write-intent bitmap
+is being used.  In this case it tells recovery to ignore the bitmap
+and recovery all blocks.
+
+This fix is already in place for raid1, but not raid5/6 or raid10.
+
+So without this fix, a raid1 ir raid4/5/6 array with version 1.x
+metadata and a write intent bitmaps, that is stopped in the middle
+of a recovery, will appear to complete the recovery instantly
+after it is reassembled, but the recovery will not be correct.
+
+If you might have an array like that, issueing
+   echo repair > /sys/block/mdXX/md/sync_action
+
+will make sure recovery completes properly.
+
+Signed-off-by: Neil Brown <neilb@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ drivers/md/raid10.c |    2 ++
+ drivers/md/raid5.c  |    4 +++-
+ 2 files changed, 5 insertions(+), 1 deletion(-)
+
+--- a/drivers/md/raid10.c
++++ b/drivers/md/raid10.c
+@@ -2102,6 +2102,8 @@ static int run(mddev_t *mddev)
+                   !test_bit(In_sync, &disk->rdev->flags)) {
+                       disk->head_position = 0;
+                       mddev->degraded++;
++                      if (disk->rdev)
++                              conf->fullsync = 1;
+               }
+       }
+ 
+--- a/drivers/md/raid5.c
++++ b/drivers/md/raid5.c
+@@ -4166,7 +4166,9 @@ static int run(mddev_t *mddev)
+                               " disk %d\n", bdevname(rdev->bdev,b),
+                               raid_disk);
+                       working_disks++;
+-              }
++              } else
++                      /* Cannot rely on bitmap to complete recovery */
++                      conf->fullsync = 1;
+       }
+ 
+       /*
diff --git a/queue-2.6.25/md-fix-error-paths-if-md_probe-fails.patch b/queue-2.6.25/md-fix-error-paths-if-md_probe-fails.patch

new file mode 100644 (file)

index 0000000..6d13cef
--- /dev/null
+++ b/queue-2.6.25/md-fix-error-paths-if-md_probe-fails.patch
@@ -0,0 +1,40 @@
+From stable-bounces@linux.kernel.org Wed Jul  2 19:45:41 2008
+From: Neil Brown <neilb@notabene.brown>
+Date: Thu, 3 Jul 2008 02:45:30 GMT
+Subject: md: Fix error paths if md_probe fails.
+To: jejb@kernel.org, stable@kernel.org
+Message-ID: <200807030245.m632jUqe007534@hera.kernel.org>
+
+From: Neil Brown <neilb@notabene.brown>
+
+commit 9bbbca3a0ee09293108b67835c6bdf6196d7bcb3 upstream
+
+md_probe can fail (e.g. alloc_disk could fail) without
+returning an error (as it alway returns NULL).
+So when we call mddev_find immediately afterwards, we need
+to check that md_probe actually succeeded.  This means checking
+that mdev->gendisk is non-NULL.
+
+Cc: Dave Jones <davej@redhat.com>
+Signed-off-by: Neil Brown <neilb@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ drivers/md/md.c |    6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+--- a/drivers/md/md.c
++++ b/drivers/md/md.c
+@@ -3804,8 +3804,10 @@ static void autorun_devices(int part)
+ 
+               md_probe(dev, NULL, NULL);
+               mddev = mddev_find(dev);
+-              if (!mddev) {
+-                      printk(KERN_ERR 
++              if (!mddev || !mddev->gendisk) {
++                      if (mddev)
++                              mddev_put(mddev);
++                      printk(KERN_ERR
+                               "md: cannot allocate memory for md drive.\n");
+                       break;
+               }
diff --git a/queue-2.6.25/series b/queue-2.6.25/series

index 4b527a33035238edabca335250d2f9bdb71f6c7d..a3bf32658cab31e012b2aca3930e0a574779ed71 100644 (file)
--- a/queue-2.6.25/series
+++ b/queue-2.6.25/series
@@ -1 +1,10 @@
+b43legacy-do-not-return-tx_busy-from-op_tx.patch
+b43-do-not-return-tx_busy-from-op_tx.patch
+b43-fix-possible-mmio-access-while-device-is-down.patch
+mac80211-detect-driver-tx-bugs.patch
+block-fix-the-starving-writes-bug-in-the-anticipatory-io-scheduler.patch
+md-fix-error-paths-if-md_probe-fails.patch
+md-don-t-acknowlege-that-stripe-expand-is-complete-until-it-really-is.patch
+md-ensure-interrupted-recovery-completed-properly.patch
+block-properly-notify-block-layer-of-sync-writes.patch
  xen-mask-unwanted-pte-bits-in-__supported_pte_mask.patch
author	Greg Kroah-Hartman <gregkh@suse.de>
	Wed, 16 Jul 2008 14:59:07 +0000 (07:59 -0700)
committer	Greg Kroah-Hartman <gregkh@suse.de>
	Wed, 16 Jul 2008 14:59:07 +0000 (07:59 -0700)
queue-2.6.25/b43-do-not-return-tx_busy-from-op_tx.patch	[new file with mode: 0644]	patch \| blob
queue-2.6.25/b43-fix-possible-mmio-access-while-device-is-down.patch	[new file with mode: 0644]	patch \| blob
queue-2.6.25/b43legacy-do-not-return-tx_busy-from-op_tx.patch	[new file with mode: 0644]	patch \| blob
queue-2.6.25/block-fix-the-starving-writes-bug-in-the-anticipatory-io-scheduler.patch	[new file with mode: 0644]	patch \| blob
queue-2.6.25/block-properly-notify-block-layer-of-sync-writes.patch	[new file with mode: 0644]	patch \| blob
queue-2.6.25/mac80211-detect-driver-tx-bugs.patch	[new file with mode: 0644]	patch \| blob
queue-2.6.25/md-don-t-acknowlege-that-stripe-expand-is-complete-until-it-really-is.patch	[new file with mode: 0644]	patch \| blob
queue-2.6.25/md-ensure-interrupted-recovery-completed-properly.patch	[new file with mode: 0644]	patch \| blob
queue-2.6.25/md-fix-error-paths-if-md_probe-fails.patch	[new file with mode: 0644]	patch \| blob
queue-2.6.25/series		patch \| blob \| blame \| history