--- /dev/null
+From e8088073c9610af017fd47fddd104a2c3afb32e8 Mon Sep 17 00:00:00 2001
+From: Joe Thornber <ejt@redhat.com>
+Date: Fri, 21 Dec 2012 20:23:31 +0000
+Subject: dm thin: fix race between simultaneous io and discards to same block
+
+From: Joe Thornber <ejt@redhat.com>
+
+commit e8088073c9610af017fd47fddd104a2c3afb32e8 upstream.
+
+There is a race when discard bios and non-discard bios are issued
+simultaneously to the same block.
+
+Discard support is expensive for all thin devices precisely because you
+have to be careful to quiesce the area you're discarding. DM thin must
+handle this conflicting IO pattern (simultaneous non-discard vs discard)
+even though a sane application shouldn't be issuing such IO.
+
+The race manifests as follows:
+
+1. A non-discard bio is mapped in thin_bio_map.
+ This doesn't lock out parallel activity to the same block.
+
+2. A discard bio is issued to the same block as the non-discard bio.
+
+3. The discard bio is locked in a dm_bio_prison_cell in process_discard
+ to lock out parallel activity against the same block.
+
+4. The non-discard bio's mapping continues and its all_io_entry is
+ incremented so the bio is accounted for in the thin pool's all_io_ds
+ which is a dm_deferred_set used to track time locality of non-discard IO.
+
+5. The non-discard bio is finally locked in a dm_bio_prison_cell in
+ process_bio.
+
+The race can result in deadlock, leaving the block layer hanging waiting
+for completion of a discard bio that never completes, e.g.:
+
+INFO: task ruby:15354 blocked for more than 120 seconds.
+"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
+ruby D ffffffff8160f0e0 0 15354 15314 0x00000000
+ ffff8802fb08bc58 0000000000000082 ffff8802fb08bfd8 0000000000012900
+ ffff8802fb08a010 0000000000012900 0000000000012900 0000000000012900
+ ffff8802fb08bfd8 0000000000012900 ffff8803324b9480 ffff88032c6f14c0
+Call Trace:
+ [<ffffffff814e5a19>] schedule+0x29/0x70
+ [<ffffffff814e3d85>] schedule_timeout+0x195/0x220
+ [<ffffffffa06b9bc1>] ? _dm_request+0x111/0x160 [dm_mod]
+ [<ffffffff814e589e>] wait_for_common+0x11e/0x190
+ [<ffffffff8107a170>] ? try_to_wake_up+0x2b0/0x2b0
+ [<ffffffff814e59ed>] wait_for_completion+0x1d/0x20
+ [<ffffffff81233289>] blkdev_issue_discard+0x219/0x260
+ [<ffffffff81233e79>] blkdev_ioctl+0x6e9/0x7b0
+ [<ffffffff8119a65c>] block_ioctl+0x3c/0x40
+ [<ffffffff8117539c>] do_vfs_ioctl+0x8c/0x340
+ [<ffffffff8119a547>] ? block_llseek+0x67/0xb0
+ [<ffffffff811756f1>] sys_ioctl+0xa1/0xb0
+ [<ffffffff810561f6>] ? sys_rt_sigprocmask+0x86/0xd0
+ [<ffffffff814ef099>] system_call_fastpath+0x16/0x1b
+
+The thinp-test-suite's test_discard_random_sectors reliably hits this
+deadlock on fast SSD storage.
+
+The fix for this race is that the all_io_entry for a bio must be
+incremented whilst the dm_bio_prison_cell is held for the bio's
+associated virtual and physical blocks. That cell locking wasn't
+occurring early enough in thin_bio_map. This patch fixes this.
+
+Care is taken to always call the new function inc_all_io_entry() with
+the relevant cells locked, but they are generally unlocked before
+calling issue() to try to avoid holding the cells locked across
+generic_submit_request.
+
+Also, now that thin_bio_map may lock bios in a cell, process_bio() is no
+longer the only thread that will do so. Because of this we must be sure
+to use cell_defer_except() to release all non-holder entries, that
+were added by the other thread, because they must be deferred.
+
+This patch depends on "dm thin: replace dm_cell_release_singleton with
+cell_defer_except".
+
+Signed-off-by: Joe Thornber <ejt@redhat.com>
+Signed-off-by: Mike Snitzer <snitzer@redhat.com>
+Signed-off-by: Alasdair G Kergon <agk@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/md/dm-thin.c | 84 +++++++++++++++++++++++++++++++++++----------------
+ 1 file changed, 59 insertions(+), 25 deletions(-)
+
+--- a/drivers/md/dm-thin.c
++++ b/drivers/md/dm-thin.c
+@@ -368,6 +368,17 @@ static int bio_triggers_commit(struct th
+ dm_thin_changed_this_transaction(tc->td);
+ }
+
++static void inc_all_io_entry(struct pool *pool, struct bio *bio)
++{
++ struct dm_thin_endio_hook *h;
++
++ if (bio->bi_rw & REQ_DISCARD)
++ return;
++
++ h = dm_get_mapinfo(bio)->ptr;
++ h->all_io_entry = dm_deferred_entry_inc(pool->all_io_ds);
++}
++
+ static void issue(struct thin_c *tc, struct bio *bio)
+ {
+ struct pool *pool = tc->pool;
+@@ -596,13 +607,15 @@ static void process_prepared_discard_pas
+ {
+ struct thin_c *tc = m->tc;
+
++ inc_all_io_entry(tc->pool, m->bio);
++ cell_defer_except(tc, m->cell);
++ cell_defer_except(tc, m->cell2);
++
+ if (m->pass_discard)
+ remap_and_issue(tc, m->bio, m->data_block);
+ else
+ bio_endio(m->bio, 0);
+
+- cell_defer_except(tc, m->cell);
+- cell_defer_except(tc, m->cell2);
+ mempool_free(m, tc->pool->mapping_pool);
+ }
+
+@@ -710,6 +723,7 @@ static void schedule_copy(struct thin_c
+ h->overwrite_mapping = m;
+ m->bio = bio;
+ save_and_set_endio(bio, &m->saved_bi_end_io, overwrite_endio);
++ inc_all_io_entry(pool, bio);
+ remap_and_issue(tc, bio, data_dest);
+ } else {
+ struct dm_io_region from, to;
+@@ -779,6 +793,7 @@ static void schedule_zero(struct thin_c
+ h->overwrite_mapping = m;
+ m->bio = bio;
+ save_and_set_endio(bio, &m->saved_bi_end_io, overwrite_endio);
++ inc_all_io_entry(pool, bio);
+ remap_and_issue(tc, bio, data_block);
+ } else {
+ int r;
+@@ -961,13 +976,15 @@ static void process_discard(struct thin_
+ wake_worker(pool);
+ }
+ } else {
++ inc_all_io_entry(pool, bio);
++ cell_defer_except(tc, cell);
++ cell_defer_except(tc, cell2);
++
+ /*
+ * The DM core makes sure that the discard doesn't span
+ * a block boundary. So we submit the discard of a
+ * partial block appropriately.
+ */
+- cell_defer_except(tc, cell);
+- cell_defer_except(tc, cell2);
+ if ((!lookup_result.shared) && pool->pf.discard_passdown)
+ remap_and_issue(tc, bio, lookup_result.block);
+ else
+@@ -1039,8 +1056,9 @@ static void process_shared_bio(struct th
+ struct dm_thin_endio_hook *h = dm_get_mapinfo(bio)->ptr;
+
+ h->shared_read_entry = dm_deferred_entry_inc(pool->shared_read_ds);
+-
++ inc_all_io_entry(pool, bio);
+ cell_defer_except(tc, cell);
++
+ remap_and_issue(tc, bio, lookup_result->block);
+ }
+ }
+@@ -1055,7 +1073,9 @@ static void provision_block(struct thin_
+ * Remap empty bios (flushes) immediately, without provisioning.
+ */
+ if (!bio->bi_size) {
++ inc_all_io_entry(tc->pool, bio);
+ cell_defer_except(tc, cell);
++
+ remap_and_issue(tc, bio, 0);
+ return;
+ }
+@@ -1110,26 +1130,22 @@ static void process_bio(struct thin_c *t
+ r = dm_thin_find_block(tc->td, block, 1, &lookup_result);
+ switch (r) {
+ case 0:
+- /*
+- * We can release this cell now. This thread is the only
+- * one that puts bios into a cell, and we know there were
+- * no preceding bios.
+- */
+- /*
+- * TODO: this will probably have to change when discard goes
+- * back in.
+- */
+- cell_defer_except(tc, cell);
+-
+- if (lookup_result.shared)
++ if (lookup_result.shared) {
+ process_shared_bio(tc, bio, block, &lookup_result);
+- else
++ cell_defer_except(tc, cell);
++ } else {
++ inc_all_io_entry(tc->pool, bio);
++ cell_defer_except(tc, cell);
++
+ remap_and_issue(tc, bio, lookup_result.block);
++ }
+ break;
+
+ case -ENODATA:
+ if (bio_data_dir(bio) == READ && tc->origin_dev) {
++ inc_all_io_entry(tc->pool, bio);
+ cell_defer_except(tc, cell);
++
+ remap_to_origin_and_issue(tc, bio);
+ } else
+ provision_block(tc, bio, block, cell);
+@@ -1155,8 +1171,10 @@ static void process_bio_read_only(struct
+ case 0:
+ if (lookup_result.shared && (rw == WRITE) && bio->bi_size)
+ bio_io_error(bio);
+- else
++ else {
++ inc_all_io_entry(tc->pool, bio);
+ remap_and_issue(tc, bio, lookup_result.block);
++ }
+ break;
+
+ case -ENODATA:
+@@ -1166,6 +1184,7 @@ static void process_bio_read_only(struct
+ }
+
+ if (tc->origin_dev) {
++ inc_all_io_entry(tc->pool, bio);
+ remap_to_origin_and_issue(tc, bio);
+ break;
+ }
+@@ -1346,7 +1365,7 @@ static struct dm_thin_endio_hook *thin_h
+
+ h->tc = tc;
+ h->shared_read_entry = NULL;
+- h->all_io_entry = bio->bi_rw & REQ_DISCARD ? NULL : dm_deferred_entry_inc(pool->all_io_ds);
++ h->all_io_entry = NULL;
+ h->overwrite_mapping = NULL;
+
+ return h;
+@@ -1363,6 +1382,8 @@ static int thin_bio_map(struct dm_target
+ dm_block_t block = get_bio_block(tc, bio);
+ struct dm_thin_device *td = tc->td;
+ struct dm_thin_lookup_result result;
++ struct dm_bio_prison_cell *cell1, *cell2;
++ struct dm_cell_key key;
+
+ map_context->ptr = thin_hook_bio(tc, bio);
+
+@@ -1399,12 +1420,25 @@ static int thin_bio_map(struct dm_target
+ * shared flag will be set in their case.
+ */
+ thin_defer_bio(tc, bio);
+- r = DM_MAPIO_SUBMITTED;
+- } else {
+- remap(tc, bio, result.block);
+- r = DM_MAPIO_REMAPPED;
++ return DM_MAPIO_SUBMITTED;
+ }
+- break;
++
++ build_virtual_key(tc->td, block, &key);
++ if (dm_bio_detain(tc->pool->prison, &key, bio, &cell1))
++ return DM_MAPIO_SUBMITTED;
++
++ build_data_key(tc->td, result.block, &key);
++ if (dm_bio_detain(tc->pool->prison, &key, bio, &cell2)) {
++ cell_defer_except(tc, cell1);
++ return DM_MAPIO_SUBMITTED;
++ }
++
++ inc_all_io_entry(tc->pool, bio);
++ cell_defer_except(tc, cell2);
++ cell_defer_except(tc, cell1);
++
++ remap(tc, bio, result.block);
++ return DM_MAPIO_REMAPPED;
+
+ case -ENODATA:
+ if (get_pool_mode(tc->pool) == PM_READ_ONLY) {
--- /dev/null
+From 9120963578320532dfb3a9a7947e8d05b39900b5 Mon Sep 17 00:00:00 2001
+From: Ralf Baechle <ralf@linux-mips.org>
+Date: Thu, 20 Dec 2012 12:47:51 +0100
+Subject: Revert "MIPS: Optimise TLB handlers for MIPS32/64 R2 cores."
+
+From: Ralf Baechle <ralf@linux-mips.org>
+
+commit 9120963578320532dfb3a9a7947e8d05b39900b5 upstream.
+
+This reverts commit ff401e52100dcdc85e572d1ad376d3307b3fe28e.
+
+This breaks on MIPS64 R2 cores such as Broadcom's.
+
+Signed-off-by: Jayachandran C <jchandra@broadcom.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/mips/mm/tlbex.c | 16 ----------------
+ 1 file changed, 16 deletions(-)
+
+--- a/arch/mips/mm/tlbex.c
++++ b/arch/mips/mm/tlbex.c
+@@ -952,13 +952,6 @@ build_get_pgde32(u32 **p, unsigned int t
+ #endif
+ uasm_i_mfc0(p, tmp, C0_BADVADDR); /* get faulting address */
+ uasm_i_lw(p, ptr, uasm_rel_lo(pgdc), ptr);
+-
+- if (cpu_has_mips_r2) {
+- uasm_i_ext(p, tmp, tmp, PGDIR_SHIFT, (32 - PGDIR_SHIFT));
+- uasm_i_ins(p, ptr, tmp, PGD_T_LOG2, (32 - PGDIR_SHIFT));
+- return;
+- }
+-
+ uasm_i_srl(p, tmp, tmp, PGDIR_SHIFT); /* get pgd only bits */
+ uasm_i_sll(p, tmp, tmp, PGD_T_LOG2);
+ uasm_i_addu(p, ptr, ptr, tmp); /* add in pgd offset */
+@@ -994,15 +987,6 @@ static void __cpuinit build_adjust_conte
+
+ static void __cpuinit build_get_ptep(u32 **p, unsigned int tmp, unsigned int ptr)
+ {
+- if (cpu_has_mips_r2) {
+- /* PTE ptr offset is obtained from BadVAddr */
+- UASM_i_MFC0(p, tmp, C0_BADVADDR);
+- UASM_i_LW(p, ptr, 0, ptr);
+- uasm_i_ext(p, tmp, tmp, PAGE_SHIFT+1, PGDIR_SHIFT-PAGE_SHIFT-1);
+- uasm_i_ins(p, ptr, tmp, PTE_T_LOG2+1, PGDIR_SHIFT-PAGE_SHIFT-1);
+- return;
+- }
+-
+ /*
+ * Bug workaround for the Nevada. It seems as if under certain
+ * circumstances the move from cp0_context might produce a