3.4-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Tue, 24 Jul 2012 20:28:45 +0000 (13:28 -0700)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Tue, 24 Jul 2012 20:28:45 +0000 (13:28 -0700)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Tue, 24 Jul 2012 20:28:45 +0000 (13:28 -0700)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Tue, 24 Jul 2012 20:28:45 +0000 (13:28 -0700)
diff --git a/queue-3.4/dm-raid1-fix-crash-with-mirror-recovery-and-discard.patch b/queue-3.4/dm-raid1-fix-crash-with-mirror-recovery-and-discard.patch

new file mode 100644 (file)

index 0000000..0db4c8a
--- /dev/null
+++ b/queue-3.4/dm-raid1-fix-crash-with-mirror-recovery-and-discard.patch
@@ -0,0 +1,110 @@
+From 751f188dd5ab95b3f2b5f2f467c38aae5a2877eb Mon Sep 17 00:00:00 2001
+From: Mikulas Patocka <mpatocka@redhat.com>
+Date: Fri, 20 Jul 2012 14:25:03 +0100
+Subject: dm raid1: fix crash with mirror recovery and discard
+
+From: Mikulas Patocka <mpatocka@redhat.com>
+
+commit 751f188dd5ab95b3f2b5f2f467c38aae5a2877eb upstream.
+
+This patch fixes a crash when a discard request is sent during mirror
+recovery.
+
+Firstly, some background.  Generally, the following sequence happens during
+mirror synchronization:
+- function do_recovery is called
+- do_recovery calls dm_rh_recovery_prepare
+- dm_rh_recovery_prepare uses a semaphore to limit the number
+  simultaneously recovered regions (by default the semaphore value is 1,
+  so only one region at a time is recovered)
+- dm_rh_recovery_prepare calls __rh_recovery_prepare,
+  __rh_recovery_prepare asks the log driver for the next region to
+  recover. Then, it sets the region state to DM_RH_RECOVERING. If there
+  are no pending I/Os on this region, the region is added to
+  quiesced_regions list. If there are pending I/Os, the region is not
+  added to any list. It is added to the quiesced_regions list later (by
+  dm_rh_dec function) when all I/Os finish.
+- when the region is on quiesced_regions list, there are no I/Os in
+  flight on this region. The region is popped from the list in
+  dm_rh_recovery_start function. Then, a kcopyd job is started in the
+  recover function.
+- when the kcopyd job finishes, recovery_complete is called. It calls
+  dm_rh_recovery_end. dm_rh_recovery_end adds the region to
+  recovered_regions or failed_recovered_regions list (depending on
+  whether the copy operation was successful or not).
+
+The above mechanism assumes that if the region is in DM_RH_RECOVERING
+state, no new I/Os are started on this region. When I/O is started,
+dm_rh_inc_pending is called, which increases reg->pending count. When
+I/O is finished, dm_rh_dec is called. It decreases reg->pending count.
+If the count is zero and the region was in DM_RH_RECOVERING state,
+dm_rh_dec adds it to the quiesced_regions list.
+
+Consequently, if we call dm_rh_inc_pending/dm_rh_dec while the region is
+in DM_RH_RECOVERING state, it could be added to quiesced_regions list
+multiple times or it could be added to this list when kcopyd is copying
+data (it is assumed that the region is not on any list while kcopyd does
+its jobs). This results in memory corruption and crash.
+
+There already exist bypasses for REQ_FLUSH requests: REQ_FLUSH requests
+do not belong to any region, so they are always added to the sync list
+in do_writes. dm_rh_inc_pending does not increase count for REQ_FLUSH
+requests. In mirror_end_io, dm_rh_dec is never called for REQ_FLUSH
+requests. These bypasses avoid the crash possibility described above.
+
+These bypasses were improperly implemented for REQ_DISCARD when
+the mirror target gained discard support in commit
+5fc2ffeabb9ee0fc0e71ff16b49f34f0ed3d05b4 (dm raid1: support discard).
+
+In do_writes, REQ_DISCARD requests is always added to the sync queue and
+immediately dispatched (even if the region is in DM_RH_RECOVERING).  However,
+dm_rh_inc and dm_rh_dec is called for REQ_DISCARD resusts.  So it violates the
+rule that no I/Os are started on DM_RH_RECOVERING regions, and causes the list
+corruption described above.
+
+This patch changes it so that REQ_DISCARD requests follow the same path
+as REQ_FLUSH. This avoids the crash.
+
+Reference: https://bugzilla.redhat.com/837607
+
+Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
+Signed-off-by: Alasdair G Kergon <agk@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/md/dm-raid1.c       |    2 +-
+ drivers/md/dm-region-hash.c |    5 ++++-
+ 2 files changed, 5 insertions(+), 2 deletions(-)
+
+--- a/drivers/md/dm-raid1.c
++++ b/drivers/md/dm-raid1.c
+@@ -1214,7 +1214,7 @@ static int mirror_end_io(struct dm_targe
+        * We need to dec pending if this was a write.
+        */
+       if (rw == WRITE) {
+-              if (!(bio->bi_rw & REQ_FLUSH))
++              if (!(bio->bi_rw & (REQ_FLUSH | REQ_DISCARD)))
+                       dm_rh_dec(ms->rh, map_context->ll);
+               return error;
+       }
+--- a/drivers/md/dm-region-hash.c
++++ b/drivers/md/dm-region-hash.c
+@@ -404,6 +404,9 @@ void dm_rh_mark_nosync(struct dm_region_
+               return;
+       }
+ 
++      if (bio->bi_rw & REQ_DISCARD)
++              return;
++
+       /* We must inform the log that the sync count has changed. */
+       log->type->set_region_sync(log, region, 0);
+ 
+@@ -524,7 +527,7 @@ void dm_rh_inc_pending(struct dm_region_
+       struct bio *bio;
+ 
+       for (bio = bios->head; bio; bio = bio->bi_next) {
+-              if (bio->bi_rw & REQ_FLUSH)
++              if (bio->bi_rw & (REQ_FLUSH | REQ_DISCARD))
+                       continue;
+               rh_inc(rh, dm_rh_bio_to_region(rh, bio));
+       }
diff --git a/queue-3.4/dm-raid1-set-discard_zeroes_data_unsupported.patch b/queue-3.4/dm-raid1-set-discard_zeroes_data_unsupported.patch

new file mode 100644 (file)

index 0000000..7977f82
--- /dev/null
+++ b/queue-3.4/dm-raid1-set-discard_zeroes_data_unsupported.patch
@@ -0,0 +1,39 @@
+From 7c8d3a42fe1c58a7e8fd3f6a013e7d7b474ff931 Mon Sep 17 00:00:00 2001
+From: Mikulas Patocka <mpatocka@redhat.com>
+Date: Fri, 20 Jul 2012 14:25:07 +0100
+Subject: dm raid1: set discard_zeroes_data_unsupported
+
+From: Mikulas Patocka <mpatocka@redhat.com>
+
+commit 7c8d3a42fe1c58a7e8fd3f6a013e7d7b474ff931 upstream.
+
+We can't guarantee that REQ_DISCARD on dm-mirror zeroes the data even if
+the underlying disks support zero on discard.  So this patch sets
+ti->discard_zeroes_data_unsupported.
+
+For example, if the mirror is in the process of resynchronizing, it may
+happen that kcopyd reads a piece of data, then discard is sent on the
+same area and then kcopyd writes the piece of data to another leg.
+Consequently, the data is not zeroed.
+
+The flag was made available by commit 983c7db347db8ce2d8453fd1d89b7a4bb6920d56
+(dm crypt: always disable discard_zeroes_data).
+
+Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
+Signed-off-by: Alasdair G Kergon <agk@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/md/dm-raid1.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/md/dm-raid1.c
++++ b/drivers/md/dm-raid1.c
+@@ -1084,6 +1084,7 @@ static int mirror_ctr(struct dm_target *
+       ti->split_io = dm_rh_get_region_size(ms->rh);
+       ti->num_flush_requests = 1;
+       ti->num_discard_requests = 1;
++      ti->discard_zeroes_data_unsupported = 1;
+ 
+       ms->kmirrord_wq = alloc_workqueue("kmirrord",
+                                         WQ_NON_REENTRANT | WQ_MEM_RECLAIM, 0);
diff --git a/queue-3.4/dm-thin-do-not-send-discards-to-shared-blocks.patch b/queue-3.4/dm-thin-do-not-send-discards-to-shared-blocks.patch

new file mode 100644 (file)

index 0000000..e540c01
--- /dev/null
+++ b/queue-3.4/dm-thin-do-not-send-discards-to-shared-blocks.patch
@@ -0,0 +1,55 @@
+From 650d2a06b4fe1cc1d218c20e256650f68bf0ca31 Mon Sep 17 00:00:00 2001
+From: Mikulas Patocka <mpatocka@redhat.com>
+Date: Fri, 20 Jul 2012 14:25:05 +0100
+Subject: dm thin: do not send discards to shared blocks
+
+From: Mikulas Patocka <mpatocka@redhat.com>
+
+commit 650d2a06b4fe1cc1d218c20e256650f68bf0ca31 upstream.
+
+When process_discard receives a partial discard that doesn't cover a
+full block, it sends this discard down to that block. Unfortunately, the
+block can be shared and the discard would corrupt the other snapshots
+sharing this block.
+
+This patch detects block sharing and ends the discard with success when
+sending it to the shared block.
+
+The above change means that if the device supports discard it can't be
+guaranteed that a discard request zeroes data. Therefore, we set
+ti->discard_zeroes_data_unsupported.
+
+Thin target discard support with this bug arrived in commit
+104655fd4dcebd50068ef30253a001da72e3a081 (dm thin: support discards).
+
+Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
+Signed-off-by: Mike Snitzer <snitzer@redhat.com>
+Signed-off-by: Alasdair G Kergon <agk@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/md/dm-thin.c |    6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+--- a/drivers/md/dm-thin.c
++++ b/drivers/md/dm-thin.c
+@@ -1240,7 +1240,10 @@ static void process_discard(struct thin_
+ 
+                       cell_release_singleton(cell, bio);
+                       cell_release_singleton(cell2, bio);
+-                      remap_and_issue(tc, bio, lookup_result.block);
++                      if ((!lookup_result.shared) && pool->pf.discard_passdown)
++                              remap_and_issue(tc, bio, lookup_result.block);
++                      else
++                              bio_endio(bio, 0);
+               }
+               break;
+ 
+@@ -2575,6 +2578,7 @@ static int thin_ctr(struct dm_target *ti
+       if (tc->pool->pf.discard_enabled) {
+               ti->discards_supported = 1;
+               ti->num_discard_requests = 1;
++              ti->discard_zeroes_data_unsupported = 1;
+       }
+ 
+       dm_put(pool_md);
diff --git a/queue-3.4/mips-properly-align-the-.data..init_task-section.patch b/queue-3.4/mips-properly-align-the-.data..init_task-section.patch

new file mode 100644 (file)

index 0000000..f34bc81
--- /dev/null
+++ b/queue-3.4/mips-properly-align-the-.data..init_task-section.patch
@@ -0,0 +1,66 @@
+From 7b1c0d26a8e272787f0f9fcc5f3e8531df3b3409 Mon Sep 17 00:00:00 2001
+From: David Daney <david.daney@cavium.com>
+Date: Thu, 19 Jul 2012 09:11:14 +0200
+Subject: MIPS: Properly align the .data..init_task section.
+
+From: David Daney <david.daney@cavium.com>
+
+commit 7b1c0d26a8e272787f0f9fcc5f3e8531df3b3409 upstream.
+
+Improper alignment can lead to unbootable systems and/or random
+crashes.
+
+[ralf@linux-mips.org: This is a lond standing bug since
+6eb10bc9e2deab06630261cd05c4cb1e9a60e980 (kernel.org) rsp.
+c422a10917f75fd19fa7fe070aaaa23e384dae6f (lmo) [MIPS: Clean up linker script
+using new linker script macros.] so dates back to 2.6.32.]
+
+Signed-off-by: David Daney <david.daney@cavium.com>
+Cc: linux-mips@linux-mips.org
+Patchwork: https://patchwork.linux-mips.org/patch/3881/
+Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/mips/include/asm/thread_info.h |    4 ++--
+ arch/mips/kernel/vmlinux.lds.S      |    3 ++-
+ 2 files changed, 4 insertions(+), 3 deletions(-)
+
+--- a/arch/mips/include/asm/thread_info.h
++++ b/arch/mips/include/asm/thread_info.h
+@@ -60,6 +60,8 @@ struct thread_info {
+ register struct thread_info *__current_thread_info __asm__("$28");
+ #define current_thread_info()  __current_thread_info
+ 
++#endif /* !__ASSEMBLY__ */
++
+ /* thread information allocation */
+ #if defined(CONFIG_PAGE_SIZE_4KB) && defined(CONFIG_32BIT)
+ #define THREAD_SIZE_ORDER (1)
+@@ -97,8 +99,6 @@ register struct thread_info *__current_t
+ 
+ #define free_thread_info(info) kfree(info)
+ 
+-#endif /* !__ASSEMBLY__ */
+-
+ #define PREEMPT_ACTIVE                0x10000000
+ 
+ /*
+--- a/arch/mips/kernel/vmlinux.lds.S
++++ b/arch/mips/kernel/vmlinux.lds.S
+@@ -1,5 +1,6 @@
+ #include <asm/asm-offsets.h>
+ #include <asm/page.h>
++#include <asm/thread_info.h>
+ #include <asm-generic/vmlinux.lds.h>
+ 
+ #undef mips
+@@ -72,7 +73,7 @@ SECTIONS
+       .data : {       /* Data */
+               . = . + DATAOFFSET;             /* for CONFIG_MAPPED_KERNEL */
+ 
+-              INIT_TASK_DATA(PAGE_SIZE)
++              INIT_TASK_DATA(THREAD_SIZE)
+               NOSAVE_DATA
+               CACHELINE_ALIGNED_DATA(1 << CONFIG_MIPS_L1_CACHE_SHIFT)
+               READ_MOSTLY_DATA(1 << CONFIG_MIPS_L1_CACHE_SHIFT)
diff --git a/queue-3.4/ore-fix-nfs-crash-by-supporting-any-unaligned-raid-io.patch b/queue-3.4/ore-fix-nfs-crash-by-supporting-any-unaligned-raid-io.patch

new file mode 100644 (file)

index 0000000..4fa9929
--- /dev/null
+++ b/queue-3.4/ore-fix-nfs-crash-by-supporting-any-unaligned-raid-io.patch
@@ -0,0 +1,208 @@
+From 9ff19309a9623f2963ac5a136782ea4d8b5d67fb Mon Sep 17 00:00:00 2001
+From: Boaz Harrosh <bharrosh@panasas.com>
+Date: Fri, 8 Jun 2012 01:19:07 +0300
+Subject: ore: Fix NFS crash by supporting any unaligned RAID IO
+
+From: Boaz Harrosh <bharrosh@panasas.com>
+
+commit 9ff19309a9623f2963ac5a136782ea4d8b5d67fb upstream.
+
+In RAID_5/6 We used to not permit an IO that it's end
+byte is not stripe_size aligned and spans more than one stripe.
+.i.e the caller must check if after submission the actual
+transferred bytes is shorter, and would need to resubmit
+a new IO with the remainder.
+
+Exofs supports this, and NFS was supposed to support this
+as well with it's short write mechanism. But late testing has
+exposed a CRASH when this is used with none-RPC layout-drivers.
+
+The change at NFS is deep and risky, in it's place the fix
+at ORE to lift the limitation is actually clean and simple.
+So here it is below.
+
+The principal here is that in the case of unaligned IO on
+both ends, beginning and end, we will send two read requests
+one like old code, before the calculation of the first stripe,
+and also a new site, before the calculation of the last stripe.
+If any "boundary" is aligned or the complete IO is within a single
+stripe. we do a single read like before.
+
+The code is clean and simple by splitting the old _read_4_write
+into 3 even parts:
+1._read_4_write_first_stripe
+2. _read_4_write_last_stripe
+3. _read_4_write_execute
+
+And calling 1+3 at the same place as before. 2+3 before last
+stripe, and in the case of all in a single stripe then 1+2+3
+is preformed additively.
+
+Why did I not think of it before. Well I had a strike of
+genius because I have stared at this code for 2 years, and did
+not find this simple solution, til today. Not that I did not try.
+
+This solution is much better for NFS than the previous supposedly
+solution because the short write was dealt  with out-of-band after
+IO_done, which would cause for a seeky IO pattern where as in here
+we execute in order. At both solutions we do 2 separate reads, only
+here we do it within a single IO request. (And actually combine two
+writes into a single submission)
+
+NFS/exofs code need not change since the ORE API communicates the new
+shorter length on return, what will happen is that this case would not
+occur anymore.
+
+hurray!!
+
+[Stable this is an NFS bug since 3.2 Kernel should apply cleanly]
+Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/exofs/ore_raid.c |   67 +++++++++++++++++++++++++++-------------------------
+ 1 file changed, 36 insertions(+), 31 deletions(-)
+
+--- a/fs/exofs/ore_raid.c
++++ b/fs/exofs/ore_raid.c
+@@ -461,16 +461,12 @@ static void _mark_read4write_pages_uptod
+  * ios->sp2d[p][*], xor is calculated the same way. These pages are
+  * allocated/freed and don't go through cache
+  */
+-static int _read_4_write(struct ore_io_state *ios)
++static int _read_4_write_first_stripe(struct ore_io_state *ios)
+ {
+-      struct ore_io_state *ios_read;
+       struct ore_striping_info read_si;
+       struct __stripe_pages_2d *sp2d = ios->sp2d;
+       u64 offset = ios->si.first_stripe_start;
+-      u64 last_stripe_end;
+-      unsigned bytes_in_stripe = ios->si.bytes_in_stripe;
+-      unsigned i, c, p, min_p = sp2d->pages_in_unit, max_p = -1;
+-      int ret;
++      unsigned c, p, min_p = sp2d->pages_in_unit, max_p = -1;
+ 
+       if (offset == ios->offset) /* Go to start collect $200 */
+               goto read_last_stripe;
+@@ -478,6 +474,9 @@ static int _read_4_write(struct ore_io_s
+       min_p = _sp2d_min_pg(sp2d);
+       max_p = _sp2d_max_pg(sp2d);
+ 
++      ORE_DBGMSG("stripe_start=0x%llx ios->offset=0x%llx min_p=%d max_p=%d\n",
++                 offset, ios->offset, min_p, max_p);
++
+       for (c = 0; ; c++) {
+               ore_calc_stripe_info(ios->layout, offset, 0, &read_si);
+               read_si.obj_offset += min_p * PAGE_SIZE;
+@@ -512,6 +511,18 @@ static int _read_4_write(struct ore_io_s
+       }
+ 
+ read_last_stripe:
++      return 0;
++}
++
++static int _read_4_write_last_stripe(struct ore_io_state *ios)
++{
++      struct ore_striping_info read_si;
++      struct __stripe_pages_2d *sp2d = ios->sp2d;
++      u64 offset;
++      u64 last_stripe_end;
++      unsigned bytes_in_stripe = ios->si.bytes_in_stripe;
++      unsigned c, p, min_p = sp2d->pages_in_unit, max_p = -1;
++
+       offset = ios->offset + ios->length;
+       if (offset % PAGE_SIZE)
+               _add_to_r4w_last_page(ios, &offset);
+@@ -527,15 +538,15 @@ read_last_stripe:
+       c = _dev_order(ios->layout->group_width * ios->layout->mirrors_p1,
+                      ios->layout->mirrors_p1, read_si.par_dev, read_si.dev);
+ 
+-      BUG_ON(ios->si.first_stripe_start + bytes_in_stripe != last_stripe_end);
+-      /* unaligned IO must be within a single stripe */
+-
+       if (min_p == sp2d->pages_in_unit) {
+               /* Didn't do it yet */
+               min_p = _sp2d_min_pg(sp2d);
+               max_p = _sp2d_max_pg(sp2d);
+       }
+ 
++      ORE_DBGMSG("offset=0x%llx stripe_end=0x%llx min_p=%d max_p=%d\n",
++                 offset, last_stripe_end, min_p, max_p);
++
+       while (offset < last_stripe_end) {
+               struct __1_page_stripe *_1ps = &sp2d->_1p_stripes[p];
+ 
+@@ -568,6 +579,15 @@ read_last_stripe:
+       }
+ 
+ read_it:
++      return 0;
++}
++
++static int _read_4_write_execute(struct ore_io_state *ios)
++{
++      struct ore_io_state *ios_read;
++      unsigned i;
++      int ret;
++
+       ios_read = ios->ios_read_4_write;
+       if (!ios_read)
+               return 0;
+@@ -591,6 +611,8 @@ read_it:
+       }
+ 
+       _mark_read4write_pages_uptodate(ios_read, ret);
++      ore_put_io_state(ios_read);
++      ios->ios_read_4_write = NULL; /* Might need a reuse at last stripe */
+       return 0;
+ }
+ 
+@@ -626,8 +648,11 @@ int _ore_add_parity_unit(struct ore_io_s
+                       /* If first stripe, Read in all read4write pages
+                        * (if needed) before we calculate the first parity.
+                        */
+-                      _read_4_write(ios);
++                      _read_4_write_first_stripe(ios);
+               }
++              if (!cur_len) /* If last stripe r4w pages of last stripe */
++                      _read_4_write_last_stripe(ios);
++              _read_4_write_execute(ios);
+ 
+               for (i = 0; i < num_pages; i++) {
+                       pages[i] = _raid_page_alloc();
+@@ -654,34 +679,14 @@ int _ore_add_parity_unit(struct ore_io_s
+ 
+ int _ore_post_alloc_raid_stuff(struct ore_io_state *ios)
+ {
+-      struct ore_layout *layout = ios->layout;
+-
+       if (ios->parity_pages) {
++              struct ore_layout *layout = ios->layout;
+               unsigned pages_in_unit = layout->stripe_unit / PAGE_SIZE;
+-              unsigned stripe_size = ios->si.bytes_in_stripe;
+-              u64 last_stripe, first_stripe;
+ 
+               if (_sp2d_alloc(pages_in_unit, layout->group_width,
+                               layout->parity, &ios->sp2d)) {
+                       return -ENOMEM;
+               }
+-
+-              /* Round io down to last full strip */
+-              first_stripe = div_u64(ios->offset, stripe_size);
+-              last_stripe = div_u64(ios->offset + ios->length, stripe_size);
+-
+-              /* If an IO spans more then a single stripe it must end at
+-               * a stripe boundary. The reminder at the end is pushed into the
+-               * next IO.
+-               */
+-              if (last_stripe != first_stripe) {
+-                      ios->length = last_stripe * stripe_size - ios->offset;
+-
+-                      BUG_ON(!ios->length);
+-                      ios->nr_pages = (ios->length + PAGE_SIZE - 1) /
+-                                      PAGE_SIZE;
+-                      ios->si.length = ios->length; /*make it consistent */
+-              }
+       }
+       return 0;
+ }
diff --git a/queue-3.4/ore-remove-support-of-partial-io-request-nfs-crash.patch b/queue-3.4/ore-remove-support-of-partial-io-request-nfs-crash.patch

new file mode 100644 (file)

index 0000000..cc25150
--- /dev/null
+++ b/queue-3.4/ore-remove-support-of-partial-io-request-nfs-crash.patch
@@ -0,0 +1,47 @@
+From 62b62ad873f2accad9222a4d7ffbe1e93f6714c1 Mon Sep 17 00:00:00 2001
+From: Boaz Harrosh <bharrosh@panasas.com>
+Date: Fri, 8 Jun 2012 04:30:40 +0300
+Subject: ore: Remove support of partial IO request (NFS crash)
+
+From: Boaz Harrosh <bharrosh@panasas.com>
+
+commit 62b62ad873f2accad9222a4d7ffbe1e93f6714c1 upstream.
+
+Do to OOM situations the ore might fail to allocate all resources
+needed for IO of the full request. If some progress was possible
+it would proceed with a partial/short request, for the sake of
+forward progress.
+
+Since this crashes NFS-core and exofs is just fine without it just
+remove this contraption, and fail.
+
+TODO:
+       Support real forward progress with some reserved allocations
+       of resources, such as mem pools and/or bio_sets
+
+[Bug since 3.2 Kernel]
+CC: Benny Halevy <bhalevy@tonian.com>
+Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/exofs/ore.c |    8 +-------
+ 1 file changed, 1 insertion(+), 7 deletions(-)
+
+--- a/fs/exofs/ore.c
++++ b/fs/exofs/ore.c
+@@ -735,13 +735,7 @@ static int _prepare_for_striping(struct
+ out:
+       ios->numdevs = devs_in_group;
+       ios->pages_consumed = cur_pg;
+-      if (unlikely(ret)) {
+-              if (length == ios->length)
+-                      return ret;
+-              else
+-                      ios->length -= length;
+-      }
+-      return 0;
++      return ret;
+ }
+ 
+ int ore_create(struct ore_io_state *ios)
diff --git a/queue-3.4/pnfs-obj-don-t-leak-objio_state-if-ore_write-read-fails.patch b/queue-3.4/pnfs-obj-don-t-leak-objio_state-if-ore_write-read-fails.patch

new file mode 100644 (file)

index 0000000..22f8b56
--- /dev/null
+++ b/queue-3.4/pnfs-obj-don-t-leak-objio_state-if-ore_write-read-fails.patch
@@ -0,0 +1,43 @@
+From 9909d45a8557455ca5f8ee7af0f253debc851f1a Mon Sep 17 00:00:00 2001
+From: Boaz Harrosh <bharrosh@panasas.com>
+Date: Fri, 8 Jun 2012 05:29:40 +0300
+Subject: pnfs-obj: don't leak objio_state if ore_write/read fails
+
+From: Boaz Harrosh <bharrosh@panasas.com>
+
+commit 9909d45a8557455ca5f8ee7af0f253debc851f1a upstream.
+
+[Bug since 3.2 Kernel]
+Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/nfs/objlayout/objio_osd.c |    9 +++++++--
+ 1 file changed, 7 insertions(+), 2 deletions(-)
+
+--- a/fs/nfs/objlayout/objio_osd.c
++++ b/fs/nfs/objlayout/objio_osd.c
+@@ -453,7 +453,10 @@ int objio_read_pagelist(struct nfs_read_
+       objios->ios->done = _read_done;
+       dprintk("%s: offset=0x%llx length=0x%x\n", __func__,
+               rdata->args.offset, rdata->args.count);
+-      return ore_read(objios->ios);
++      ret = ore_read(objios->ios);
++      if (unlikely(ret))
++              objio_free_result(&objios->oir);
++      return ret;
+ }
+ 
+ /*
+@@ -537,8 +540,10 @@ int objio_write_pagelist(struct nfs_writ
+       dprintk("%s: offset=0x%llx length=0x%x\n", __func__,
+               wdata->args.offset, wdata->args.count);
+       ret = ore_write(objios->ios);
+-      if (unlikely(ret))
++      if (unlikely(ret)) {
++              objio_free_result(&objios->oir);
+               return ret;
++      }
+ 
+       if (objios->sync)
+               _write_done(objios->ios, objios);
diff --git a/queue-3.4/series b/queue-3.4/series

index a4f64941c1bcd19eac5041aae9bf6e8bd005e433..cc3091d9b6e20ddc3d3cb093b108c0ea93e8185c 100644 (file)
--- a/queue-3.4/series
+++ b/queue-3.4/series
@@ -11,3 +11,11 @@ mm-fix-lost-kswapd-wakeup-in-kswapd_stop.patch
  hid-add-battery-quirk-for-apple-wireless-ansi.patch
  hid-add-sennheiser-btd500usb-device-support.patch
  hid-multitouch-add-support-for-baanto-touchscreen.patch
+mips-properly-align-the-.data..init_task-section.patch
+ubifs-fix-a-bug-in-empty-space-fix-up.patch
+ore-fix-nfs-crash-by-supporting-any-unaligned-raid-io.patch
+ore-remove-support-of-partial-io-request-nfs-crash.patch
+pnfs-obj-don-t-leak-objio_state-if-ore_write-read-fails.patch
+dm-thin-do-not-send-discards-to-shared-blocks.patch
+dm-raid1-fix-crash-with-mirror-recovery-and-discard.patch
+dm-raid1-set-discard_zeroes_data_unsupported.patch
diff --git a/queue-3.4/ubifs-fix-a-bug-in-empty-space-fix-up.patch b/queue-3.4/ubifs-fix-a-bug-in-empty-space-fix-up.patch

new file mode 100644 (file)

index 0000000..34fa278
--- /dev/null
+++ b/queue-3.4/ubifs-fix-a-bug-in-empty-space-fix-up.patch
@@ -0,0 +1,65 @@
+From c6727932cfdb13501108b16c38463c09d5ec7a74 Mon Sep 17 00:00:00 2001
+From: Artem Bityutskiy <Artem.Bityutskiy@linux.intel.com>
+Date: Sat, 14 Jul 2012 14:33:09 +0300
+Subject: UBIFS: fix a bug in empty space fix-up
+
+From: Artem Bityutskiy <Artem.Bityutskiy@linux.intel.com>
+
+commit c6727932cfdb13501108b16c38463c09d5ec7a74 upstream.
+
+UBIFS has a feature called "empty space fix-up" which is a quirk to work-around
+limitations of dumb flasher programs. Namely, of those flashers that are unable
+to skip NAND pages full of 0xFFs while flashing, resulting in empty space at
+the end of half-filled eraseblocks to be unusable for UBIFS. This feature is
+relatively new (introduced in v3.0).
+
+The fix-up routine (fixup_free_space()) is executed only once at the very first
+mount if the superblock has the 'space_fixup' flag set (can be done with -F
+option of mkfs.ubifs). It basically reads all the UBIFS data and metadata and
+writes it back to the same LEB. The routine assumes the image is pristine and
+does not have anything in the journal.
+
+There was a bug in 'fixup_free_space()' where it fixed up the log incorrectly.
+All but one LEB of the log of a pristine file-system are empty. And one
+contains just a commit start node. And 'fixup_free_space()' just unmapped this
+LEB, which resulted in wiping the commit start node. As a result, some users
+were unable to mount the file-system next time with the following symptom:
+
+UBIFS error (pid 1): replay_log_leb: first log node at LEB 3:0 is not CS node
+UBIFS error (pid 1): replay_log_leb: log error detected while replaying the log at LEB 3:0
+
+The root-cause of this bug was that 'fixup_free_space()' wrongly assumed
+that the beginning of empty space in the log head (c->lhead_offs) was known
+on mount. However, it is not the case - it was always 0. UBIFS does not store
+in it the master node and finds out by scanning the log on every mount.
+
+The fix is simple - just pass commit start node size instead of 0 to
+'fixup_leb()'.
+
+Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@linux.intel.com>
+Reported-by: Iwo Mergler <Iwo.Mergler@netcommwireless.com>
+Tested-by: Iwo Mergler <Iwo.Mergler@netcommwireless.com>
+Reported-by: James Nute <newten82@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ubifs/sb.c |    8 ++++++--
+ 1 file changed, 6 insertions(+), 2 deletions(-)
+
+--- a/fs/ubifs/sb.c
++++ b/fs/ubifs/sb.c
+@@ -724,8 +724,12 @@ static int fixup_free_space(struct ubifs
+               lnum = ubifs_next_log_lnum(c, lnum);
+       }
+ 
+-      /* Fixup the current log head */
+-      err = fixup_leb(c, c->lhead_lnum, c->lhead_offs);
++      /*
++       * Fixup the log head which contains the only a CS node at the
++       * beginning.
++       */
++      err = fixup_leb(c, c->lhead_lnum,
++                      ALIGN(UBIFS_CS_NODE_SZ, c->min_io_size));
+       if (err)
+               goto out;
+
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Tue, 24 Jul 2012 20:28:45 +0000 (13:28 -0700)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Tue, 24 Jul 2012 20:28:45 +0000 (13:28 -0700)
queue-3.4/dm-raid1-fix-crash-with-mirror-recovery-and-discard.patch	[new file with mode: 0644]	patch \| blob
queue-3.4/dm-raid1-set-discard_zeroes_data_unsupported.patch	[new file with mode: 0644]	patch \| blob
queue-3.4/dm-thin-do-not-send-discards-to-shared-blocks.patch	[new file with mode: 0644]	patch \| blob
queue-3.4/mips-properly-align-the-.data..init_task-section.patch	[new file with mode: 0644]	patch \| blob
queue-3.4/ore-fix-nfs-crash-by-supporting-any-unaligned-raid-io.patch	[new file with mode: 0644]	patch \| blob
queue-3.4/ore-remove-support-of-partial-io-request-nfs-crash.patch	[new file with mode: 0644]	patch \| blob
queue-3.4/pnfs-obj-don-t-leak-objio_state-if-ore_write-read-fails.patch	[new file with mode: 0644]	patch \| blob
queue-3.4/series		patch \| blob \| blame \| history
queue-3.4/ubifs-fix-a-bug-in-empty-space-fix-up.patch	[new file with mode: 0644]	patch \| blob