--- /dev/null
+From 1d6165851cd8e3f919d446cd6da35dee44e8837e Mon Sep 17 00:00:00 2001
+From: Dmitry Monakhov <dmonakhov@openvz.org>
+Date: Wed, 27 Jan 2010 22:44:36 +0300
+Subject: block: fix bio_add_page for non trivial merge_bvec_fn case
+
+From: Dmitry Monakhov <dmonakhov@openvz.org>
+
+commit 1d6165851cd8e3f919d446cd6da35dee44e8837e upstream.
+
+We have to properly decrease bi_size in order to merge_bvec_fn return
+right result. Otherwise this result in false merge rejects for two
+absolutely valid bio_vecs. This may cause significant performance
+penalty for example fs_block_size == 1k and block device is raid0 with
+small chunk_size = 8k. Then it is impossible to merge 7-th fs-block in
+to bio which already has 6 fs-blocks.
+
+Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
+Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/bio.c | 7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+--- a/fs/bio.c
++++ b/fs/bio.c
+@@ -542,13 +542,18 @@ static int __bio_add_page(struct request
+
+ if (page == prev->bv_page &&
+ offset == prev->bv_offset + prev->bv_len) {
++ unsigned int prev_bv_len = prev->bv_len;
+ prev->bv_len += len;
+
+ if (q->merge_bvec_fn) {
+ struct bvec_merge_data bvm = {
++ /* prev_bvec is already charged in
++ bi_size, discharge it in order to
++ simulate merging updated prev_bvec
++ as new bvec. */
+ .bi_bdev = bio->bi_bdev,
+ .bi_sector = bio->bi_sector,
+- .bi_size = bio->bi_size,
++ .bi_size = bio->bi_size - prev_bv_len,
+ .bi_rw = bio->bi_rw,
+ };
+
Signed-off-by: John W. Linville <linville@tuxdriver.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ drivers/net/wireless/iwlwifi/iwl-agn-rs.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
--- a/drivers/net/wireless/iwlwifi/iwl-agn-rs.c
+++ b/drivers/net/wireless/iwlwifi/iwl-agn-rs.c
-@@ -2800,7 +2800,7 @@ static void rs_fill_link_cmd(struct iwl_priv *priv,
+@@ -2808,7 +2808,7 @@ static void rs_fill_link_cmd(struct iwl_
repeat_rate--;
}
--- /dev/null
+From 5040ab67a2c6d5710ba497dc52a8f7035729d7b0 Mon Sep 17 00:00:00 2001
+From: Tejun Heo <tj@kernel.org>
+Date: Mon, 11 Jan 2010 11:14:44 +0900
+Subject: libata: retry link resume if necessary
+
+From: Tejun Heo <tj@kernel.org>
+
+commit 5040ab67a2c6d5710ba497dc52a8f7035729d7b0 upstream.
+
+Interestingly, when SIDPR is used in ata_piix, writes to DET in
+SControl sometimes get ignored leading to detection failure. Update
+sata_link_resume() such that it reads back SControl after clearing DET
+and retry if it's not clear.
+
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Reported-by: fengxiangjun <fengxiangjun@neusoft.com>
+Reported-by: Jim Faulkner <jfaulkne@ccs.neu.edu>
+Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+--- a/drivers/ata/libata-core.c
++++ b/drivers/ata/libata-core.c
+@@ -3790,21 +3790,45 @@ int sata_link_debounce(struct ata_link *link, const unsigned long *params,
+ int sata_link_resume(struct ata_link *link, const unsigned long *params,
+ unsigned long deadline)
+ {
++ int tries = ATA_LINK_RESUME_TRIES;
+ u32 scontrol, serror;
+ int rc;
+
+ if ((rc = sata_scr_read(link, SCR_CONTROL, &scontrol)))
+ return rc;
+
+- scontrol = (scontrol & 0x0f0) | 0x300;
++ /*
++ * Writes to SControl sometimes get ignored under certain
++ * controllers (ata_piix SIDPR). Make sure DET actually is
++ * cleared.
++ */
++ do {
++ scontrol = (scontrol & 0x0f0) | 0x300;
++ if ((rc = sata_scr_write(link, SCR_CONTROL, scontrol)))
++ return rc;
++ /*
++ * Some PHYs react badly if SStatus is pounded
++ * immediately after resuming. Delay 200ms before
++ * debouncing.
++ */
++ msleep(200);
+
+- if ((rc = sata_scr_write(link, SCR_CONTROL, scontrol)))
+- return rc;
++ /* is SControl restored correctly? */
++ if ((rc = sata_scr_read(link, SCR_CONTROL, &scontrol)))
++ return rc;
++ } while ((scontrol & 0xf0f) != 0x300 && --tries);
+
+- /* Some PHYs react badly if SStatus is pounded immediately
+- * after resuming. Delay 200ms before debouncing.
+- */
+- msleep(200);
++ if ((scontrol & 0xf0f) != 0x300) {
++ ata_link_printk(link, KERN_ERR,
++ "failed to resume link (SControl %X)\n",
++ scontrol);
++ return 0;
++ }
++
++ if (tries < ATA_LINK_RESUME_TRIES)
++ ata_link_printk(link, KERN_WARNING,
++ "link resume succeeded after %d retries\n",
++ ATA_LINK_RESUME_TRIES - tries);
+
+ if ((rc = sata_link_debounce(link, params, deadline)))
+ return rc;
+diff --git a/include/linux/libata.h b/include/linux/libata.h
+index 6a9c4dd..7311225 100644
+--- a/include/linux/libata.h
++++ b/include/linux/libata.h
+@@ -354,6 +354,9 @@ enum {
+ /* max tries if error condition is still set after ->error_handler */
+ ATA_EH_MAX_TRIES = 5,
+
++ /* sometimes resuming a link requires several retries */
++ ATA_LINK_RESUME_TRIES = 5,
++
+ /* how hard are we gonna try to probe/recover devices */
+ ATA_PROBE_MAX_TRIES = 3,
+ ATA_EH_DEV_TRIES = 3,
--- /dev/null
+From de5604231ce4bc8db1bc1dcd27d8540cbedf1518 Mon Sep 17 00:00:00 2001
+From: Nick Piggin <npiggin@suse.de>
+Date: Mon, 1 Feb 2010 22:24:18 +1100
+Subject: mm: percpu-vmap fix RCU list walking
+
+From: Nick Piggin <npiggin@suse.de>
+
+commit de5604231ce4bc8db1bc1dcd27d8540cbedf1518 upstream.
+
+RCU list walking of the per-cpu vmap cache was broken. It did not use
+RCU primitives, and also the union of free_list and rcu_head is
+obviously wrong (because free_list is indeed the list we are RCU
+walking).
+
+While we are there, remove a couple of unused fields from an earlier
+iteration.
+
+These APIs aren't actually used anywhere, because of problems with the
+XFS conversion. Christoph has now verified that the problems are solved
+with these patches. Also it is an exported interface, so I think it
+will be good to be merged now (and Christoph wants to get the XFS
+changes into their local tree).
+
+Cc: linux-mm@kvack.org
+Tested-by: Christoph Hellwig <hch@infradead.org>
+Signed-off-by: Nick Piggin <npiggin@suse.de>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ mm/vmalloc.c | 20 ++++++--------------
+ 1 file changed, 6 insertions(+), 14 deletions(-)
+
+--- a/mm/vmalloc.c
++++ b/mm/vmalloc.c
+@@ -667,8 +667,6 @@ static bool vmap_initialized __read_most
+ struct vmap_block_queue {
+ spinlock_t lock;
+ struct list_head free;
+- struct list_head dirty;
+- unsigned int nr_dirty;
+ };
+
+ struct vmap_block {
+@@ -678,10 +676,8 @@ struct vmap_block {
+ unsigned long free, dirty;
+ DECLARE_BITMAP(alloc_map, VMAP_BBMAP_BITS);
+ DECLARE_BITMAP(dirty_map, VMAP_BBMAP_BITS);
+- union {
+- struct list_head free_list;
+- struct rcu_head rcu_head;
+- };
++ struct list_head free_list;
++ struct rcu_head rcu_head;
+ };
+
+ /* Queue of free and dirty vmap blocks, for allocation and flushing purposes */
+@@ -757,7 +753,7 @@ static struct vmap_block *new_vmap_block
+ vbq = &get_cpu_var(vmap_block_queue);
+ vb->vbq = vbq;
+ spin_lock(&vbq->lock);
+- list_add(&vb->free_list, &vbq->free);
++ list_add_rcu(&vb->free_list, &vbq->free);
+ spin_unlock(&vbq->lock);
+ put_cpu_var(vmap_cpu_blocks);
+
+@@ -776,8 +772,6 @@ static void free_vmap_block(struct vmap_
+ struct vmap_block *tmp;
+ unsigned long vb_idx;
+
+- BUG_ON(!list_empty(&vb->free_list));
+-
+ vb_idx = addr_to_vb_idx(vb->va->va_start);
+ spin_lock(&vmap_block_tree_lock);
+ tmp = radix_tree_delete(&vmap_block_tree, vb_idx);
+@@ -816,7 +810,7 @@ again:
+ vb->free -= 1UL << order;
+ if (vb->free == 0) {
+ spin_lock(&vbq->lock);
+- list_del_init(&vb->free_list);
++ list_del_rcu(&vb->free_list);
+ spin_unlock(&vbq->lock);
+ }
+ spin_unlock(&vb->lock);
+@@ -860,11 +854,11 @@ static void vb_free(const void *addr, un
+ BUG_ON(!vb);
+
+ spin_lock(&vb->lock);
+- bitmap_allocate_region(vb->dirty_map, offset >> PAGE_SHIFT, order);
++ BUG_ON(bitmap_allocate_region(vb->dirty_map, offset >> PAGE_SHIFT, order));
+
+ vb->dirty += 1UL << order;
+ if (vb->dirty == VMAP_BBMAP_BITS) {
+- BUG_ON(vb->free || !list_empty(&vb->free_list));
++ BUG_ON(vb->free);
+ spin_unlock(&vb->lock);
+ free_vmap_block(vb);
+ } else
+@@ -1033,8 +1027,6 @@ void __init vmalloc_init(void)
+ vbq = &per_cpu(vmap_block_queue, i);
+ spin_lock_init(&vbq->lock);
+ INIT_LIST_HEAD(&vbq->free);
+- INIT_LIST_HEAD(&vbq->dirty);
+- vbq->nr_dirty = 0;
+ }
+
+ /* Import existing vmlist entries. */
--- /dev/null
+From 02b709df817c0db174f249cc59e5f7fd01b64d92 Mon Sep 17 00:00:00 2001
+From: Nick Piggin <npiggin@suse.de>
+Date: Mon, 1 Feb 2010 22:25:57 +1100
+Subject: mm: purge fragmented percpu vmap blocks
+
+From: Nick Piggin <npiggin@suse.de>
+
+commit 02b709df817c0db174f249cc59e5f7fd01b64d92 upstream.
+
+Improve handling of fragmented per-CPU vmaps. We previously don't free
+up per-CPU maps until all its addresses have been used and freed. So
+fragmented blocks could fill up vmalloc space even if they actually had
+no active vmap regions within them.
+
+Add some logic to allow all CPUs to have these blocks purged in the case
+of failure to allocate a new vm area, and also put some logic to trim
+such blocks of a current CPU if we hit them in the allocation path (so
+as to avoid a large build up of them).
+
+Christoph reported some vmap allocation failures when using the per CPU
+vmap APIs in XFS, which cannot be reproduced after this patch and the
+previous bug fix.
+
+Cc: linux-mm@kvack.org
+Tested-by: Christoph Hellwig <hch@infradead.org>
+Signed-off-by: Nick Piggin <npiggin@suse.de>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ mm/vmalloc.c | 91 +++++++++++++++++++++++++++++++++++++++++++++++++++--------
+ 1 file changed, 80 insertions(+), 11 deletions(-)
+
+--- a/mm/vmalloc.c
++++ b/mm/vmalloc.c
+@@ -509,6 +509,9 @@ static unsigned long lazy_max_pages(void
+
+ static atomic_t vmap_lazy_nr = ATOMIC_INIT(0);
+
++/* for per-CPU blocks */
++static void purge_fragmented_blocks_allcpus(void);
++
+ /*
+ * Purges all lazily-freed vmap areas.
+ *
+@@ -539,6 +542,9 @@ static void __purge_vmap_area_lazy(unsig
+ } else
+ spin_lock(&purge_lock);
+
++ if (sync)
++ purge_fragmented_blocks_allcpus();
++
+ rcu_read_lock();
+ list_for_each_entry_rcu(va, &vmap_area_list, list) {
+ if (va->flags & VM_LAZY_FREE) {
+@@ -678,6 +684,7 @@ struct vmap_block {
+ DECLARE_BITMAP(dirty_map, VMAP_BBMAP_BITS);
+ struct list_head free_list;
+ struct rcu_head rcu_head;
++ struct list_head purge;
+ };
+
+ /* Queue of free and dirty vmap blocks, for allocation and flushing purposes */
+@@ -782,12 +789,61 @@ static void free_vmap_block(struct vmap_
+ call_rcu(&vb->rcu_head, rcu_free_vb);
+ }
+
++static void purge_fragmented_blocks(int cpu)
++{
++ LIST_HEAD(purge);
++ struct vmap_block *vb;
++ struct vmap_block *n_vb;
++ struct vmap_block_queue *vbq = &per_cpu(vmap_block_queue, cpu);
++
++ rcu_read_lock();
++ list_for_each_entry_rcu(vb, &vbq->free, free_list) {
++
++ if (!(vb->free + vb->dirty == VMAP_BBMAP_BITS && vb->dirty != VMAP_BBMAP_BITS))
++ continue;
++
++ spin_lock(&vb->lock);
++ if (vb->free + vb->dirty == VMAP_BBMAP_BITS && vb->dirty != VMAP_BBMAP_BITS) {
++ vb->free = 0; /* prevent further allocs after releasing lock */
++ vb->dirty = VMAP_BBMAP_BITS; /* prevent purging it again */
++ bitmap_fill(vb->alloc_map, VMAP_BBMAP_BITS);
++ bitmap_fill(vb->dirty_map, VMAP_BBMAP_BITS);
++ spin_lock(&vbq->lock);
++ list_del_rcu(&vb->free_list);
++ spin_unlock(&vbq->lock);
++ spin_unlock(&vb->lock);
++ list_add_tail(&vb->purge, &purge);
++ } else
++ spin_unlock(&vb->lock);
++ }
++ rcu_read_unlock();
++
++ list_for_each_entry_safe(vb, n_vb, &purge, purge) {
++ list_del(&vb->purge);
++ free_vmap_block(vb);
++ }
++}
++
++static void purge_fragmented_blocks_thiscpu(void)
++{
++ purge_fragmented_blocks(smp_processor_id());
++}
++
++static void purge_fragmented_blocks_allcpus(void)
++{
++ int cpu;
++
++ for_each_possible_cpu(cpu)
++ purge_fragmented_blocks(cpu);
++}
++
+ static void *vb_alloc(unsigned long size, gfp_t gfp_mask)
+ {
+ struct vmap_block_queue *vbq;
+ struct vmap_block *vb;
+ unsigned long addr = 0;
+ unsigned int order;
++ int purge = 0;
+
+ BUG_ON(size & ~PAGE_MASK);
+ BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC);
+@@ -800,24 +856,37 @@ again:
+ int i;
+
+ spin_lock(&vb->lock);
++ if (vb->free < 1UL << order)
++ goto next;
+ i = bitmap_find_free_region(vb->alloc_map,
+ VMAP_BBMAP_BITS, order);
+
+- if (i >= 0) {
+- addr = vb->va->va_start + (i << PAGE_SHIFT);
+- BUG_ON(addr_to_vb_idx(addr) !=
+- addr_to_vb_idx(vb->va->va_start));
+- vb->free -= 1UL << order;
+- if (vb->free == 0) {
+- spin_lock(&vbq->lock);
+- list_del_rcu(&vb->free_list);
+- spin_unlock(&vbq->lock);
++ if (i < 0) {
++ if (vb->free + vb->dirty == VMAP_BBMAP_BITS) {
++ /* fragmented and no outstanding allocations */
++ BUG_ON(vb->dirty != VMAP_BBMAP_BITS);
++ purge = 1;
+ }
+- spin_unlock(&vb->lock);
+- break;
++ goto next;
++ }
++ addr = vb->va->va_start + (i << PAGE_SHIFT);
++ BUG_ON(addr_to_vb_idx(addr) !=
++ addr_to_vb_idx(vb->va->va_start));
++ vb->free -= 1UL << order;
++ if (vb->free == 0) {
++ spin_lock(&vbq->lock);
++ list_del_rcu(&vb->free_list);
++ spin_unlock(&vbq->lock);
+ }
+ spin_unlock(&vb->lock);
++ break;
++next:
++ spin_unlock(&vb->lock);
+ }
++
++ if (purge)
++ purge_fragmented_blocks_thiscpu();
++
+ put_cpu_var(vmap_cpu_blocks);
+ rcu_read_unlock();
+
kvm-allow-userspace-to-adjust-kvmclock-offset.patch
oprofile-x86-add-xeon-7500-series-support.patch
oprofile-x86-fix-crash-when-profiling-more-than-28-events.patch
+libata-retry-link-resume-if-necessary.patch
+mm-percpu-vmap-fix-rcu-list-walking.patch
+mm-purge-fragmented-percpu-vmap-blocks.patch
+block-fix-bio_add_page-for-non-trivial-merge_bvec_fn-case.patch