.32 patches

author Greg Kroah-Hartman <gregkh@suse.de>

Tue, 2 Feb 2010 23:10:06 +0000 (15:10 -0800)

committer Greg Kroah-Hartman <gregkh@suse.de>

Tue, 2 Feb 2010 23:10:06 +0000 (15:10 -0800)
author Greg Kroah-Hartman <gregkh@suse.de>
Tue, 2 Feb 2010 23:10:06 +0000 (15:10 -0800)
committer Greg Kroah-Hartman <gregkh@suse.de>
Tue, 2 Feb 2010 23:10:06 +0000 (15:10 -0800)
diff --git a/queue-2.6.32/block-fix-bio_add_page-for-non-trivial-merge_bvec_fn-case.patch b/queue-2.6.32/block-fix-bio_add_page-for-non-trivial-merge_bvec_fn-case.patch

new file mode 100644 (file)

index 0000000..2a7a00a
--- /dev/null
+++ b/queue-2.6.32/block-fix-bio_add_page-for-non-trivial-merge_bvec_fn-case.patch
@@ -0,0 +1,46 @@
+From 1d6165851cd8e3f919d446cd6da35dee44e8837e Mon Sep 17 00:00:00 2001
+From: Dmitry Monakhov <dmonakhov@openvz.org>
+Date: Wed, 27 Jan 2010 22:44:36 +0300
+Subject: block: fix bio_add_page for non trivial merge_bvec_fn case
+
+From: Dmitry Monakhov <dmonakhov@openvz.org>
+
+commit 1d6165851cd8e3f919d446cd6da35dee44e8837e upstream.
+
+We have to properly decrease bi_size in order to merge_bvec_fn return
+right result.  Otherwise this result in false merge rejects for two
+absolutely valid bio_vecs.  This may cause significant performance
+penalty for example fs_block_size == 1k and block device is raid0 with
+small chunk_size = 8k. Then it is impossible to merge 7-th fs-block in
+to bio which already has 6 fs-blocks.
+
+Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
+Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/bio.c |    7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+--- a/fs/bio.c
++++ b/fs/bio.c
+@@ -542,13 +542,18 @@ static int __bio_add_page(struct request
+ 
+               if (page == prev->bv_page &&
+                   offset == prev->bv_offset + prev->bv_len) {
++                      unsigned int prev_bv_len = prev->bv_len;
+                       prev->bv_len += len;
+ 
+                       if (q->merge_bvec_fn) {
+                               struct bvec_merge_data bvm = {
++                                      /* prev_bvec is already charged in
++                                         bi_size, discharge it in order to
++                                         simulate merging updated prev_bvec
++                                         as new bvec. */
+                                       .bi_bdev = bio->bi_bdev,
+                                       .bi_sector = bio->bi_sector,
+-                                      .bi_size = bio->bi_size,
++                                      .bi_size = bio->bi_size - prev_bv_len,
+                                       .bi_rw = bio->bi_rw,
+                               };
+ 
diff --git a/queue-2.6.32/iwlwifi-set-default-aggregation-frame-count-limit-to-31.patch b/queue-2.6.32/iwlwifi-set-default-aggregation-frame-count-limit-to-31.patch

index 582cd7d0a27a4124b3c1dae785756d96a03cb536..73f0886e5d8e8124430c85ece18b253d624c36cc 100644 (file)
--- a/queue-2.6.32/iwlwifi-set-default-aggregation-frame-count-limit-to-31.patch
+++ b/queue-2.6.32/iwlwifi-set-default-aggregation-frame-count-limit-to-31.patch
@@ -18,9 +18,13 @@ Signed-off-by: Reinette Chatre <reinette.chatre@intel.com>
  Signed-off-by: John W. Linville <linville@tuxdriver.com>
  Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
  
+---
+ drivers/net/wireless/iwlwifi/iwl-agn-rs.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
  --- a/drivers/net/wireless/iwlwifi/iwl-agn-rs.c
  +++ b/drivers/net/wireless/iwlwifi/iwl-agn-rs.c
-@@ -2800,7 +2800,7 @@ static void rs_fill_link_cmd(struct iwl_priv *priv,
+@@ -2808,7 +2808,7 @@ static void rs_fill_link_cmd(struct iwl_
                 repeat_rate--;
         }
   
diff --git a/queue-2.6.32/libata-retry-link-resume-if-necessary.patch b/queue-2.6.32/libata-retry-link-resume-if-necessary.patch

new file mode 100644 (file)

index 0000000..5d1198d
--- /dev/null
+++ b/queue-2.6.32/libata-retry-link-resume-if-necessary.patch
@@ -0,0 +1,89 @@
+From 5040ab67a2c6d5710ba497dc52a8f7035729d7b0 Mon Sep 17 00:00:00 2001
+From: Tejun Heo <tj@kernel.org>
+Date: Mon, 11 Jan 2010 11:14:44 +0900
+Subject: libata: retry link resume if necessary
+
+From: Tejun Heo <tj@kernel.org>
+
+commit 5040ab67a2c6d5710ba497dc52a8f7035729d7b0 upstream.
+
+Interestingly, when SIDPR is used in ata_piix, writes to DET in
+SControl sometimes get ignored leading to detection failure.  Update
+sata_link_resume() such that it reads back SControl after clearing DET
+and retry if it's not clear.
+
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Reported-by: fengxiangjun <fengxiangjun@neusoft.com>
+Reported-by: Jim Faulkner <jfaulkne@ccs.neu.edu>
+Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+--- a/drivers/ata/libata-core.c
++++ b/drivers/ata/libata-core.c
+@@ -3790,21 +3790,45 @@ int sata_link_debounce(struct ata_link *link, const unsigned long *params,
+ int sata_link_resume(struct ata_link *link, const unsigned long *params,
+                    unsigned long deadline)
+ {
++      int tries = ATA_LINK_RESUME_TRIES;
+       u32 scontrol, serror;
+       int rc;
+ 
+       if ((rc = sata_scr_read(link, SCR_CONTROL, &scontrol)))
+               return rc;
+ 
+-      scontrol = (scontrol & 0x0f0) | 0x300;
++      /*
++       * Writes to SControl sometimes get ignored under certain
++       * controllers (ata_piix SIDPR).  Make sure DET actually is
++       * cleared.
++       */
++      do {
++              scontrol = (scontrol & 0x0f0) | 0x300;
++              if ((rc = sata_scr_write(link, SCR_CONTROL, scontrol)))
++                      return rc;
++              /*
++               * Some PHYs react badly if SStatus is pounded
++               * immediately after resuming.  Delay 200ms before
++               * debouncing.
++               */
++              msleep(200);
+ 
+-      if ((rc = sata_scr_write(link, SCR_CONTROL, scontrol)))
+-              return rc;
++              /* is SControl restored correctly? */
++              if ((rc = sata_scr_read(link, SCR_CONTROL, &scontrol)))
++                      return rc;
++      } while ((scontrol & 0xf0f) != 0x300 && --tries);
+ 
+-      /* Some PHYs react badly if SStatus is pounded immediately
+-       * after resuming.  Delay 200ms before debouncing.
+-       */
+-      msleep(200);
++      if ((scontrol & 0xf0f) != 0x300) {
++              ata_link_printk(link, KERN_ERR,
++                              "failed to resume link (SControl %X)\n",
++                              scontrol);
++              return 0;
++      }
++
++      if (tries < ATA_LINK_RESUME_TRIES)
++              ata_link_printk(link, KERN_WARNING,
++                              "link resume succeeded after %d retries\n",
++                              ATA_LINK_RESUME_TRIES - tries);
+ 
+       if ((rc = sata_link_debounce(link, params, deadline)))
+               return rc;
+diff --git a/include/linux/libata.h b/include/linux/libata.h
+index 6a9c4dd..7311225 100644
+--- a/include/linux/libata.h
++++ b/include/linux/libata.h
+@@ -354,6 +354,9 @@ enum {
+       /* max tries if error condition is still set after ->error_handler */
+       ATA_EH_MAX_TRIES        = 5,
+ 
++      /* sometimes resuming a link requires several retries */
++      ATA_LINK_RESUME_TRIES   = 5,
++
+       /* how hard are we gonna try to probe/recover devices */
+       ATA_PROBE_MAX_TRIES     = 3,
+       ATA_EH_DEV_TRIES        = 3,
diff --git a/queue-2.6.32/mm-percpu-vmap-fix-rcu-list-walking.patch b/queue-2.6.32/mm-percpu-vmap-fix-rcu-list-walking.patch

new file mode 100644 (file)

index 0000000..eb869fd
--- /dev/null
+++ b/queue-2.6.32/mm-percpu-vmap-fix-rcu-list-walking.patch
@@ -0,0 +1,107 @@
+From de5604231ce4bc8db1bc1dcd27d8540cbedf1518 Mon Sep 17 00:00:00 2001
+From: Nick Piggin <npiggin@suse.de>
+Date: Mon, 1 Feb 2010 22:24:18 +1100
+Subject: mm: percpu-vmap fix RCU list walking
+
+From: Nick Piggin <npiggin@suse.de>
+
+commit de5604231ce4bc8db1bc1dcd27d8540cbedf1518 upstream.
+
+RCU list walking of the per-cpu vmap cache was broken.  It did not use
+RCU primitives, and also the union of free_list and rcu_head is
+obviously wrong (because free_list is indeed the list we are RCU
+walking).
+
+While we are there, remove a couple of unused fields from an earlier
+iteration.
+
+These APIs aren't actually used anywhere, because of problems with the
+XFS conversion.  Christoph has now verified that the problems are solved
+with these patches.  Also it is an exported interface, so I think it
+will be good to be merged now (and Christoph wants to get the XFS
+changes into their local tree).
+
+Cc: linux-mm@kvack.org
+Tested-by: Christoph Hellwig <hch@infradead.org>
+Signed-off-by: Nick Piggin <npiggin@suse.de>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ mm/vmalloc.c |   20 ++++++--------------
+ 1 file changed, 6 insertions(+), 14 deletions(-)
+
+--- a/mm/vmalloc.c
++++ b/mm/vmalloc.c
+@@ -667,8 +667,6 @@ static bool vmap_initialized __read_most
+ struct vmap_block_queue {
+       spinlock_t lock;
+       struct list_head free;
+-      struct list_head dirty;
+-      unsigned int nr_dirty;
+ };
+ 
+ struct vmap_block {
+@@ -678,10 +676,8 @@ struct vmap_block {
+       unsigned long free, dirty;
+       DECLARE_BITMAP(alloc_map, VMAP_BBMAP_BITS);
+       DECLARE_BITMAP(dirty_map, VMAP_BBMAP_BITS);
+-      union {
+-              struct list_head free_list;
+-              struct rcu_head rcu_head;
+-      };
++      struct list_head free_list;
++      struct rcu_head rcu_head;
+ };
+ 
+ /* Queue of free and dirty vmap blocks, for allocation and flushing purposes */
+@@ -757,7 +753,7 @@ static struct vmap_block *new_vmap_block
+       vbq = &get_cpu_var(vmap_block_queue);
+       vb->vbq = vbq;
+       spin_lock(&vbq->lock);
+-      list_add(&vb->free_list, &vbq->free);
++      list_add_rcu(&vb->free_list, &vbq->free);
+       spin_unlock(&vbq->lock);
+       put_cpu_var(vmap_cpu_blocks);
+ 
+@@ -776,8 +772,6 @@ static void free_vmap_block(struct vmap_
+       struct vmap_block *tmp;
+       unsigned long vb_idx;
+ 
+-      BUG_ON(!list_empty(&vb->free_list));
+-
+       vb_idx = addr_to_vb_idx(vb->va->va_start);
+       spin_lock(&vmap_block_tree_lock);
+       tmp = radix_tree_delete(&vmap_block_tree, vb_idx);
+@@ -816,7 +810,7 @@ again:
+                       vb->free -= 1UL << order;
+                       if (vb->free == 0) {
+                               spin_lock(&vbq->lock);
+-                              list_del_init(&vb->free_list);
++                              list_del_rcu(&vb->free_list);
+                               spin_unlock(&vbq->lock);
+                       }
+                       spin_unlock(&vb->lock);
+@@ -860,11 +854,11 @@ static void vb_free(const void *addr, un
+       BUG_ON(!vb);
+ 
+       spin_lock(&vb->lock);
+-      bitmap_allocate_region(vb->dirty_map, offset >> PAGE_SHIFT, order);
++      BUG_ON(bitmap_allocate_region(vb->dirty_map, offset >> PAGE_SHIFT, order));
+ 
+       vb->dirty += 1UL << order;
+       if (vb->dirty == VMAP_BBMAP_BITS) {
+-              BUG_ON(vb->free || !list_empty(&vb->free_list));
++              BUG_ON(vb->free);
+               spin_unlock(&vb->lock);
+               free_vmap_block(vb);
+       } else
+@@ -1033,8 +1027,6 @@ void __init vmalloc_init(void)
+               vbq = &per_cpu(vmap_block_queue, i);
+               spin_lock_init(&vbq->lock);
+               INIT_LIST_HEAD(&vbq->free);
+-              INIT_LIST_HEAD(&vbq->dirty);
+-              vbq->nr_dirty = 0;
+       }
+ 
+       /* Import existing vmlist entries. */
diff --git a/queue-2.6.32/mm-purge-fragmented-percpu-vmap-blocks.patch b/queue-2.6.32/mm-purge-fragmented-percpu-vmap-blocks.patch

new file mode 100644 (file)

index 0000000..1757653
--- /dev/null
+++ b/queue-2.6.32/mm-purge-fragmented-percpu-vmap-blocks.patch
@@ -0,0 +1,174 @@
+From 02b709df817c0db174f249cc59e5f7fd01b64d92 Mon Sep 17 00:00:00 2001
+From: Nick Piggin <npiggin@suse.de>
+Date: Mon, 1 Feb 2010 22:25:57 +1100
+Subject: mm: purge fragmented percpu vmap blocks
+
+From: Nick Piggin <npiggin@suse.de>
+
+commit 02b709df817c0db174f249cc59e5f7fd01b64d92 upstream.
+
+Improve handling of fragmented per-CPU vmaps.  We previously don't free
+up per-CPU maps until all its addresses have been used and freed.  So
+fragmented blocks could fill up vmalloc space even if they actually had
+no active vmap regions within them.
+
+Add some logic to allow all CPUs to have these blocks purged in the case
+of failure to allocate a new vm area, and also put some logic to trim
+such blocks of a current CPU if we hit them in the allocation path (so
+as to avoid a large build up of them).
+
+Christoph reported some vmap allocation failures when using the per CPU
+vmap APIs in XFS, which cannot be reproduced after this patch and the
+previous bug fix.
+
+Cc: linux-mm@kvack.org
+Tested-by: Christoph Hellwig <hch@infradead.org>
+Signed-off-by: Nick Piggin <npiggin@suse.de>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ mm/vmalloc.c |   91 +++++++++++++++++++++++++++++++++++++++++++++++++++--------
+ 1 file changed, 80 insertions(+), 11 deletions(-)
+
+--- a/mm/vmalloc.c
++++ b/mm/vmalloc.c
+@@ -509,6 +509,9 @@ static unsigned long lazy_max_pages(void
+ 
+ static atomic_t vmap_lazy_nr = ATOMIC_INIT(0);
+ 
++/* for per-CPU blocks */
++static void purge_fragmented_blocks_allcpus(void);
++
+ /*
+  * Purges all lazily-freed vmap areas.
+  *
+@@ -539,6 +542,9 @@ static void __purge_vmap_area_lazy(unsig
+       } else
+               spin_lock(&purge_lock);
+ 
++      if (sync)
++              purge_fragmented_blocks_allcpus();
++
+       rcu_read_lock();
+       list_for_each_entry_rcu(va, &vmap_area_list, list) {
+               if (va->flags & VM_LAZY_FREE) {
+@@ -678,6 +684,7 @@ struct vmap_block {
+       DECLARE_BITMAP(dirty_map, VMAP_BBMAP_BITS);
+       struct list_head free_list;
+       struct rcu_head rcu_head;
++      struct list_head purge;
+ };
+ 
+ /* Queue of free and dirty vmap blocks, for allocation and flushing purposes */
+@@ -782,12 +789,61 @@ static void free_vmap_block(struct vmap_
+       call_rcu(&vb->rcu_head, rcu_free_vb);
+ }
+ 
++static void purge_fragmented_blocks(int cpu)
++{
++      LIST_HEAD(purge);
++      struct vmap_block *vb;
++      struct vmap_block *n_vb;
++      struct vmap_block_queue *vbq = &per_cpu(vmap_block_queue, cpu);
++
++      rcu_read_lock();
++      list_for_each_entry_rcu(vb, &vbq->free, free_list) {
++
++              if (!(vb->free + vb->dirty == VMAP_BBMAP_BITS && vb->dirty != VMAP_BBMAP_BITS))
++                      continue;
++
++              spin_lock(&vb->lock);
++              if (vb->free + vb->dirty == VMAP_BBMAP_BITS && vb->dirty != VMAP_BBMAP_BITS) {
++                      vb->free = 0; /* prevent further allocs after releasing lock */
++                      vb->dirty = VMAP_BBMAP_BITS; /* prevent purging it again */
++                      bitmap_fill(vb->alloc_map, VMAP_BBMAP_BITS);
++                      bitmap_fill(vb->dirty_map, VMAP_BBMAP_BITS);
++                      spin_lock(&vbq->lock);
++                      list_del_rcu(&vb->free_list);
++                      spin_unlock(&vbq->lock);
++                      spin_unlock(&vb->lock);
++                      list_add_tail(&vb->purge, &purge);
++              } else
++                      spin_unlock(&vb->lock);
++      }
++      rcu_read_unlock();
++
++      list_for_each_entry_safe(vb, n_vb, &purge, purge) {
++              list_del(&vb->purge);
++              free_vmap_block(vb);
++      }
++}
++
++static void purge_fragmented_blocks_thiscpu(void)
++{
++      purge_fragmented_blocks(smp_processor_id());
++}
++
++static void purge_fragmented_blocks_allcpus(void)
++{
++      int cpu;
++
++      for_each_possible_cpu(cpu)
++              purge_fragmented_blocks(cpu);
++}
++
+ static void *vb_alloc(unsigned long size, gfp_t gfp_mask)
+ {
+       struct vmap_block_queue *vbq;
+       struct vmap_block *vb;
+       unsigned long addr = 0;
+       unsigned int order;
++      int purge = 0;
+ 
+       BUG_ON(size & ~PAGE_MASK);
+       BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC);
+@@ -800,24 +856,37 @@ again:
+               int i;
+ 
+               spin_lock(&vb->lock);
++              if (vb->free < 1UL << order)
++                      goto next;
+               i = bitmap_find_free_region(vb->alloc_map,
+                                               VMAP_BBMAP_BITS, order);
+ 
+-              if (i >= 0) {
+-                      addr = vb->va->va_start + (i << PAGE_SHIFT);
+-                      BUG_ON(addr_to_vb_idx(addr) !=
+-                                      addr_to_vb_idx(vb->va->va_start));
+-                      vb->free -= 1UL << order;
+-                      if (vb->free == 0) {
+-                              spin_lock(&vbq->lock);
+-                              list_del_rcu(&vb->free_list);
+-                              spin_unlock(&vbq->lock);
++              if (i < 0) {
++                      if (vb->free + vb->dirty == VMAP_BBMAP_BITS) {
++                              /* fragmented and no outstanding allocations */
++                              BUG_ON(vb->dirty != VMAP_BBMAP_BITS);
++                              purge = 1;
+                       }
+-                      spin_unlock(&vb->lock);
+-                      break;
++                      goto next;
++              }
++              addr = vb->va->va_start + (i << PAGE_SHIFT);
++              BUG_ON(addr_to_vb_idx(addr) !=
++                              addr_to_vb_idx(vb->va->va_start));
++              vb->free -= 1UL << order;
++              if (vb->free == 0) {
++                      spin_lock(&vbq->lock);
++                      list_del_rcu(&vb->free_list);
++                      spin_unlock(&vbq->lock);
+               }
+               spin_unlock(&vb->lock);
++              break;
++next:
++              spin_unlock(&vb->lock);
+       }
++
++      if (purge)
++              purge_fragmented_blocks_thiscpu();
++
+       put_cpu_var(vmap_cpu_blocks);
+       rcu_read_unlock();
+ 
diff --git a/queue-2.6.32/series b/queue-2.6.32/series

index 3de3c6833317fb2c2ead42778c85bc1bfd52f125..5cb135a792827231fd9c4fd824a203a23968bc2c 100644 (file)
--- a/queue-2.6.32/series
+++ b/queue-2.6.32/series
@@ -49,3 +49,7 @@ ax25-netrom-rose-fix-timer-oopses.patch
  kvm-allow-userspace-to-adjust-kvmclock-offset.patch
  oprofile-x86-add-xeon-7500-series-support.patch
  oprofile-x86-fix-crash-when-profiling-more-than-28-events.patch
+libata-retry-link-resume-if-necessary.patch
+mm-percpu-vmap-fix-rcu-list-walking.patch
+mm-purge-fragmented-percpu-vmap-blocks.patch
+block-fix-bio_add_page-for-non-trivial-merge_bvec_fn-case.patch
author	Greg Kroah-Hartman <gregkh@suse.de>
	Tue, 2 Feb 2010 23:10:06 +0000 (15:10 -0800)
committer	Greg Kroah-Hartman <gregkh@suse.de>
	Tue, 2 Feb 2010 23:10:06 +0000 (15:10 -0800)
queue-2.6.32/block-fix-bio_add_page-for-non-trivial-merge_bvec_fn-case.patch	[new file with mode: 0644]	patch \| blob
queue-2.6.32/iwlwifi-set-default-aggregation-frame-count-limit-to-31.patch		patch \| blob \| blame \| history
queue-2.6.32/libata-retry-link-resume-if-necessary.patch	[new file with mode: 0644]	patch \| blob
queue-2.6.32/mm-percpu-vmap-fix-rcu-list-walking.patch	[new file with mode: 0644]	patch \| blob
queue-2.6.32/mm-purge-fragmented-percpu-vmap-blocks.patch	[new file with mode: 0644]	patch \| blob
queue-2.6.32/series		patch \| blob \| blame \| history