From: Greg Kroah-Hartman <gregkh@suse.de>
Date: Tue, 2 Feb 2010 23:10:06 +0000 (-0800)
Subject: .32 patches
X-Git-Tag: v2.6.32.8~7
X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=fdf1765ec06d69f5b2b756863b952d9159172b69;p=thirdparty%2Fkernel%2Fstable-queue.git

.32 patches
---

diff --git a/queue-2.6.32/block-fix-bio_add_page-for-non-trivial-merge_bvec_fn-case.patch b/queue-2.6.32/block-fix-bio_add_page-for-non-trivial-merge_bvec_fn-case.patch
new file mode 100644
index 00000000000..2a7a00a449b
--- /dev/null
+++ b/queue-2.6.32/block-fix-bio_add_page-for-non-trivial-merge_bvec_fn-case.patch
@@ -0,0 +1,46 @@
+From 1d6165851cd8e3f919d446cd6da35dee44e8837e Mon Sep 17 00:00:00 2001
+From: Dmitry Monakhov <dmonakhov@openvz.org>
+Date: Wed, 27 Jan 2010 22:44:36 +0300
+Subject: block: fix bio_add_page for non trivial merge_bvec_fn case
+
+From: Dmitry Monakhov <dmonakhov@openvz.org>
+
+commit 1d6165851cd8e3f919d446cd6da35dee44e8837e upstream.
+
+We have to properly decrease bi_size in order to merge_bvec_fn return
+right result.  Otherwise this result in false merge rejects for two
+absolutely valid bio_vecs.  This may cause significant performance
+penalty for example fs_block_size == 1k and block device is raid0 with
+small chunk_size = 8k. Then it is impossible to merge 7-th fs-block in
+to bio which already has 6 fs-blocks.
+
+Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
+Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/bio.c |    7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+--- a/fs/bio.c
++++ b/fs/bio.c
+@@ -542,13 +542,18 @@ static int __bio_add_page(struct request
+ 
+ 		if (page == prev->bv_page &&
+ 		    offset == prev->bv_offset + prev->bv_len) {
++			unsigned int prev_bv_len = prev->bv_len;
+ 			prev->bv_len += len;
+ 
+ 			if (q->merge_bvec_fn) {
+ 				struct bvec_merge_data bvm = {
++					/* prev_bvec is already charged in
++					   bi_size, discharge it in order to
++					   simulate merging updated prev_bvec
++					   as new bvec. */
+ 					.bi_bdev = bio->bi_bdev,
+ 					.bi_sector = bio->bi_sector,
+-					.bi_size = bio->bi_size,
++					.bi_size = bio->bi_size - prev_bv_len,
+ 					.bi_rw = bio->bi_rw,
+ 				};
+ 
diff --git a/queue-2.6.32/iwlwifi-set-default-aggregation-frame-count-limit-to-31.patch b/queue-2.6.32/iwlwifi-set-default-aggregation-frame-count-limit-to-31.patch
index 582cd7d0a27..73f0886e5d8 100644
--- a/queue-2.6.32/iwlwifi-set-default-aggregation-frame-count-limit-to-31.patch
+++ b/queue-2.6.32/iwlwifi-set-default-aggregation-frame-count-limit-to-31.patch
@@ -18,9 +18,13 @@ Signed-off-by: Reinette Chatre <reinette.chatre@intel.com>
 Signed-off-by: John W. Linville <linville@tuxdriver.com>
 Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
 
+---
+ drivers/net/wireless/iwlwifi/iwl-agn-rs.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
 --- a/drivers/net/wireless/iwlwifi/iwl-agn-rs.c
 +++ b/drivers/net/wireless/iwlwifi/iwl-agn-rs.c
-@@ -2800,7 +2800,7 @@ static void rs_fill_link_cmd(struct iwl_priv *priv,
+@@ -2808,7 +2808,7 @@ static void rs_fill_link_cmd(struct iwl_
  		repeat_rate--;
  	}
  
diff --git a/queue-2.6.32/libata-retry-link-resume-if-necessary.patch b/queue-2.6.32/libata-retry-link-resume-if-necessary.patch
new file mode 100644
index 00000000000..5d1198d0db4
--- /dev/null
+++ b/queue-2.6.32/libata-retry-link-resume-if-necessary.patch
@@ -0,0 +1,89 @@
+From 5040ab67a2c6d5710ba497dc52a8f7035729d7b0 Mon Sep 17 00:00:00 2001
+From: Tejun Heo <tj@kernel.org>
+Date: Mon, 11 Jan 2010 11:14:44 +0900
+Subject: libata: retry link resume if necessary
+
+From: Tejun Heo <tj@kernel.org>
+
+commit 5040ab67a2c6d5710ba497dc52a8f7035729d7b0 upstream.
+
+Interestingly, when SIDPR is used in ata_piix, writes to DET in
+SControl sometimes get ignored leading to detection failure.  Update
+sata_link_resume() such that it reads back SControl after clearing DET
+and retry if it's not clear.
+
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Reported-by: fengxiangjun <fengxiangjun@neusoft.com>
+Reported-by: Jim Faulkner <jfaulkne@ccs.neu.edu>
+Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+--- a/drivers/ata/libata-core.c
++++ b/drivers/ata/libata-core.c
+@@ -3790,21 +3790,45 @@ int sata_link_debounce(struct ata_link *link, const unsigned long *params,
+ int sata_link_resume(struct ata_link *link, const unsigned long *params,
+ 		     unsigned long deadline)
+ {
++	int tries = ATA_LINK_RESUME_TRIES;
+ 	u32 scontrol, serror;
+ 	int rc;
+ 
+ 	if ((rc = sata_scr_read(link, SCR_CONTROL, &scontrol)))
+ 		return rc;
+ 
+-	scontrol = (scontrol & 0x0f0) | 0x300;
++	/*
++	 * Writes to SControl sometimes get ignored under certain
++	 * controllers (ata_piix SIDPR).  Make sure DET actually is
++	 * cleared.
++	 */
++	do {
++		scontrol = (scontrol & 0x0f0) | 0x300;
++		if ((rc = sata_scr_write(link, SCR_CONTROL, scontrol)))
++			return rc;
++		/*
++		 * Some PHYs react badly if SStatus is pounded
++		 * immediately after resuming.  Delay 200ms before
++		 * debouncing.
++		 */
++		msleep(200);
+ 
+-	if ((rc = sata_scr_write(link, SCR_CONTROL, scontrol)))
+-		return rc;
++		/* is SControl restored correctly? */
++		if ((rc = sata_scr_read(link, SCR_CONTROL, &scontrol)))
++			return rc;
++	} while ((scontrol & 0xf0f) != 0x300 && --tries);
+ 
+-	/* Some PHYs react badly if SStatus is pounded immediately
+-	 * after resuming.  Delay 200ms before debouncing.
+-	 */
+-	msleep(200);
++	if ((scontrol & 0xf0f) != 0x300) {
++		ata_link_printk(link, KERN_ERR,
++				"failed to resume link (SControl %X)\n",
++				scontrol);
++		return 0;
++	}
++
++	if (tries < ATA_LINK_RESUME_TRIES)
++		ata_link_printk(link, KERN_WARNING,
++				"link resume succeeded after %d retries\n",
++				ATA_LINK_RESUME_TRIES - tries);
+ 
+ 	if ((rc = sata_link_debounce(link, params, deadline)))
+ 		return rc;
+diff --git a/include/linux/libata.h b/include/linux/libata.h
+index 6a9c4dd..7311225 100644
+--- a/include/linux/libata.h
++++ b/include/linux/libata.h
+@@ -354,6 +354,9 @@ enum {
+ 	/* max tries if error condition is still set after ->error_handler */
+ 	ATA_EH_MAX_TRIES	= 5,
+ 
++	/* sometimes resuming a link requires several retries */
++	ATA_LINK_RESUME_TRIES	= 5,
++
+ 	/* how hard are we gonna try to probe/recover devices */
+ 	ATA_PROBE_MAX_TRIES	= 3,
+ 	ATA_EH_DEV_TRIES	= 3,
diff --git a/queue-2.6.32/mm-percpu-vmap-fix-rcu-list-walking.patch b/queue-2.6.32/mm-percpu-vmap-fix-rcu-list-walking.patch
new file mode 100644
index 00000000000..eb869fdcf6f
--- /dev/null
+++ b/queue-2.6.32/mm-percpu-vmap-fix-rcu-list-walking.patch
@@ -0,0 +1,107 @@
+From de5604231ce4bc8db1bc1dcd27d8540cbedf1518 Mon Sep 17 00:00:00 2001
+From: Nick Piggin <npiggin@suse.de>
+Date: Mon, 1 Feb 2010 22:24:18 +1100
+Subject: mm: percpu-vmap fix RCU list walking
+
+From: Nick Piggin <npiggin@suse.de>
+
+commit de5604231ce4bc8db1bc1dcd27d8540cbedf1518 upstream.
+
+RCU list walking of the per-cpu vmap cache was broken.  It did not use
+RCU primitives, and also the union of free_list and rcu_head is
+obviously wrong (because free_list is indeed the list we are RCU
+walking).
+
+While we are there, remove a couple of unused fields from an earlier
+iteration.
+
+These APIs aren't actually used anywhere, because of problems with the
+XFS conversion.  Christoph has now verified that the problems are solved
+with these patches.  Also it is an exported interface, so I think it
+will be good to be merged now (and Christoph wants to get the XFS
+changes into their local tree).
+
+Cc: linux-mm@kvack.org
+Tested-by: Christoph Hellwig <hch@infradead.org>
+Signed-off-by: Nick Piggin <npiggin@suse.de>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ mm/vmalloc.c |   20 ++++++--------------
+ 1 file changed, 6 insertions(+), 14 deletions(-)
+
+--- a/mm/vmalloc.c
++++ b/mm/vmalloc.c
+@@ -667,8 +667,6 @@ static bool vmap_initialized __read_most
+ struct vmap_block_queue {
+ 	spinlock_t lock;
+ 	struct list_head free;
+-	struct list_head dirty;
+-	unsigned int nr_dirty;
+ };
+ 
+ struct vmap_block {
+@@ -678,10 +676,8 @@ struct vmap_block {
+ 	unsigned long free, dirty;
+ 	DECLARE_BITMAP(alloc_map, VMAP_BBMAP_BITS);
+ 	DECLARE_BITMAP(dirty_map, VMAP_BBMAP_BITS);
+-	union {
+-		struct list_head free_list;
+-		struct rcu_head rcu_head;
+-	};
++	struct list_head free_list;
++	struct rcu_head rcu_head;
+ };
+ 
+ /* Queue of free and dirty vmap blocks, for allocation and flushing purposes */
+@@ -757,7 +753,7 @@ static struct vmap_block *new_vmap_block
+ 	vbq = &get_cpu_var(vmap_block_queue);
+ 	vb->vbq = vbq;
+ 	spin_lock(&vbq->lock);
+-	list_add(&vb->free_list, &vbq->free);
++	list_add_rcu(&vb->free_list, &vbq->free);
+ 	spin_unlock(&vbq->lock);
+ 	put_cpu_var(vmap_cpu_blocks);
+ 
+@@ -776,8 +772,6 @@ static void free_vmap_block(struct vmap_
+ 	struct vmap_block *tmp;
+ 	unsigned long vb_idx;
+ 
+-	BUG_ON(!list_empty(&vb->free_list));
+-
+ 	vb_idx = addr_to_vb_idx(vb->va->va_start);
+ 	spin_lock(&vmap_block_tree_lock);
+ 	tmp = radix_tree_delete(&vmap_block_tree, vb_idx);
+@@ -816,7 +810,7 @@ again:
+ 			vb->free -= 1UL << order;
+ 			if (vb->free == 0) {
+ 				spin_lock(&vbq->lock);
+-				list_del_init(&vb->free_list);
++				list_del_rcu(&vb->free_list);
+ 				spin_unlock(&vbq->lock);
+ 			}
+ 			spin_unlock(&vb->lock);
+@@ -860,11 +854,11 @@ static void vb_free(const void *addr, un
+ 	BUG_ON(!vb);
+ 
+ 	spin_lock(&vb->lock);
+-	bitmap_allocate_region(vb->dirty_map, offset >> PAGE_SHIFT, order);
++	BUG_ON(bitmap_allocate_region(vb->dirty_map, offset >> PAGE_SHIFT, order));
+ 
+ 	vb->dirty += 1UL << order;
+ 	if (vb->dirty == VMAP_BBMAP_BITS) {
+-		BUG_ON(vb->free || !list_empty(&vb->free_list));
++		BUG_ON(vb->free);
+ 		spin_unlock(&vb->lock);
+ 		free_vmap_block(vb);
+ 	} else
+@@ -1033,8 +1027,6 @@ void __init vmalloc_init(void)
+ 		vbq = &per_cpu(vmap_block_queue, i);
+ 		spin_lock_init(&vbq->lock);
+ 		INIT_LIST_HEAD(&vbq->free);
+-		INIT_LIST_HEAD(&vbq->dirty);
+-		vbq->nr_dirty = 0;
+ 	}
+ 
+ 	/* Import existing vmlist entries. */
diff --git a/queue-2.6.32/mm-purge-fragmented-percpu-vmap-blocks.patch b/queue-2.6.32/mm-purge-fragmented-percpu-vmap-blocks.patch
new file mode 100644
index 00000000000..17576530308
--- /dev/null
+++ b/queue-2.6.32/mm-purge-fragmented-percpu-vmap-blocks.patch
@@ -0,0 +1,174 @@
+From 02b709df817c0db174f249cc59e5f7fd01b64d92 Mon Sep 17 00:00:00 2001
+From: Nick Piggin <npiggin@suse.de>
+Date: Mon, 1 Feb 2010 22:25:57 +1100
+Subject: mm: purge fragmented percpu vmap blocks
+
+From: Nick Piggin <npiggin@suse.de>
+
+commit 02b709df817c0db174f249cc59e5f7fd01b64d92 upstream.
+
+Improve handling of fragmented per-CPU vmaps.  We previously don't free
+up per-CPU maps until all its addresses have been used and freed.  So
+fragmented blocks could fill up vmalloc space even if they actually had
+no active vmap regions within them.
+
+Add some logic to allow all CPUs to have these blocks purged in the case
+of failure to allocate a new vm area, and also put some logic to trim
+such blocks of a current CPU if we hit them in the allocation path (so
+as to avoid a large build up of them).
+
+Christoph reported some vmap allocation failures when using the per CPU
+vmap APIs in XFS, which cannot be reproduced after this patch and the
+previous bug fix.
+
+Cc: linux-mm@kvack.org
+Tested-by: Christoph Hellwig <hch@infradead.org>
+Signed-off-by: Nick Piggin <npiggin@suse.de>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ mm/vmalloc.c |   91 +++++++++++++++++++++++++++++++++++++++++++++++++++--------
+ 1 file changed, 80 insertions(+), 11 deletions(-)
+
+--- a/mm/vmalloc.c
++++ b/mm/vmalloc.c
+@@ -509,6 +509,9 @@ static unsigned long lazy_max_pages(void
+ 
+ static atomic_t vmap_lazy_nr = ATOMIC_INIT(0);
+ 
++/* for per-CPU blocks */
++static void purge_fragmented_blocks_allcpus(void);
++
+ /*
+  * Purges all lazily-freed vmap areas.
+  *
+@@ -539,6 +542,9 @@ static void __purge_vmap_area_lazy(unsig
+ 	} else
+ 		spin_lock(&purge_lock);
+ 
++	if (sync)
++		purge_fragmented_blocks_allcpus();
++
+ 	rcu_read_lock();
+ 	list_for_each_entry_rcu(va, &vmap_area_list, list) {
+ 		if (va->flags & VM_LAZY_FREE) {
+@@ -678,6 +684,7 @@ struct vmap_block {
+ 	DECLARE_BITMAP(dirty_map, VMAP_BBMAP_BITS);
+ 	struct list_head free_list;
+ 	struct rcu_head rcu_head;
++	struct list_head purge;
+ };
+ 
+ /* Queue of free and dirty vmap blocks, for allocation and flushing purposes */
+@@ -782,12 +789,61 @@ static void free_vmap_block(struct vmap_
+ 	call_rcu(&vb->rcu_head, rcu_free_vb);
+ }
+ 
++static void purge_fragmented_blocks(int cpu)
++{
++	LIST_HEAD(purge);
++	struct vmap_block *vb;
++	struct vmap_block *n_vb;
++	struct vmap_block_queue *vbq = &per_cpu(vmap_block_queue, cpu);
++
++	rcu_read_lock();
++	list_for_each_entry_rcu(vb, &vbq->free, free_list) {
++
++		if (!(vb->free + vb->dirty == VMAP_BBMAP_BITS && vb->dirty != VMAP_BBMAP_BITS))
++			continue;
++
++		spin_lock(&vb->lock);
++		if (vb->free + vb->dirty == VMAP_BBMAP_BITS && vb->dirty != VMAP_BBMAP_BITS) {
++			vb->free = 0; /* prevent further allocs after releasing lock */
++			vb->dirty = VMAP_BBMAP_BITS; /* prevent purging it again */
++			bitmap_fill(vb->alloc_map, VMAP_BBMAP_BITS);
++			bitmap_fill(vb->dirty_map, VMAP_BBMAP_BITS);
++			spin_lock(&vbq->lock);
++			list_del_rcu(&vb->free_list);
++			spin_unlock(&vbq->lock);
++			spin_unlock(&vb->lock);
++			list_add_tail(&vb->purge, &purge);
++		} else
++			spin_unlock(&vb->lock);
++	}
++	rcu_read_unlock();
++
++	list_for_each_entry_safe(vb, n_vb, &purge, purge) {
++		list_del(&vb->purge);
++		free_vmap_block(vb);
++	}
++}
++
++static void purge_fragmented_blocks_thiscpu(void)
++{
++	purge_fragmented_blocks(smp_processor_id());
++}
++
++static void purge_fragmented_blocks_allcpus(void)
++{
++	int cpu;
++
++	for_each_possible_cpu(cpu)
++		purge_fragmented_blocks(cpu);
++}
++
+ static void *vb_alloc(unsigned long size, gfp_t gfp_mask)
+ {
+ 	struct vmap_block_queue *vbq;
+ 	struct vmap_block *vb;
+ 	unsigned long addr = 0;
+ 	unsigned int order;
++	int purge = 0;
+ 
+ 	BUG_ON(size & ~PAGE_MASK);
+ 	BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC);
+@@ -800,24 +856,37 @@ again:
+ 		int i;
+ 
+ 		spin_lock(&vb->lock);
++		if (vb->free < 1UL << order)
++			goto next;
+ 		i = bitmap_find_free_region(vb->alloc_map,
+ 						VMAP_BBMAP_BITS, order);
+ 
+-		if (i >= 0) {
+-			addr = vb->va->va_start + (i << PAGE_SHIFT);
+-			BUG_ON(addr_to_vb_idx(addr) !=
+-					addr_to_vb_idx(vb->va->va_start));
+-			vb->free -= 1UL << order;
+-			if (vb->free == 0) {
+-				spin_lock(&vbq->lock);
+-				list_del_rcu(&vb->free_list);
+-				spin_unlock(&vbq->lock);
++		if (i < 0) {
++			if (vb->free + vb->dirty == VMAP_BBMAP_BITS) {
++				/* fragmented and no outstanding allocations */
++				BUG_ON(vb->dirty != VMAP_BBMAP_BITS);
++				purge = 1;
+ 			}
+-			spin_unlock(&vb->lock);
+-			break;
++			goto next;
++		}
++		addr = vb->va->va_start + (i << PAGE_SHIFT);
++		BUG_ON(addr_to_vb_idx(addr) !=
++				addr_to_vb_idx(vb->va->va_start));
++		vb->free -= 1UL << order;
++		if (vb->free == 0) {
++			spin_lock(&vbq->lock);
++			list_del_rcu(&vb->free_list);
++			spin_unlock(&vbq->lock);
+ 		}
+ 		spin_unlock(&vb->lock);
++		break;
++next:
++		spin_unlock(&vb->lock);
+ 	}
++
++	if (purge)
++		purge_fragmented_blocks_thiscpu();
++
+ 	put_cpu_var(vmap_cpu_blocks);
+ 	rcu_read_unlock();
+ 
diff --git a/queue-2.6.32/series b/queue-2.6.32/series
index 3de3c683331..5cb135a7928 100644
--- a/queue-2.6.32/series
+++ b/queue-2.6.32/series
@@ -49,3 +49,7 @@ ax25-netrom-rose-fix-timer-oopses.patch
 kvm-allow-userspace-to-adjust-kvmclock-offset.patch
 oprofile-x86-add-xeon-7500-series-support.patch
 oprofile-x86-fix-crash-when-profiling-more-than-28-events.patch
+libata-retry-link-resume-if-necessary.patch
+mm-percpu-vmap-fix-rcu-list-walking.patch
+mm-purge-fragmented-percpu-vmap-blocks.patch
+block-fix-bio_add_page-for-non-trivial-merge_bvec_fn-case.patch