start up 2.6.20 queue again...

author Greg Kroah-Hartman <gregkh@suse.de>

Mon, 23 Apr 2007 23:29:15 +0000 (16:29 -0700)

committer Greg Kroah-Hartman <gregkh@suse.de>

Mon, 23 Apr 2007 23:29:15 +0000 (16:29 -0700)
author Greg Kroah-Hartman <gregkh@suse.de>
Mon, 23 Apr 2007 23:29:15 +0000 (16:29 -0700)
committer Greg Kroah-Hartman <gregkh@suse.de>
Mon, 23 Apr 2007 23:29:15 +0000 (16:29 -0700)
diff --git a/queue-2.6.20/fix-sparc64-sbus-iommu-allocator.patch b/queue-2.6.20/fix-sparc64-sbus-iommu-allocator.patch

new file mode 100644 (file)

index 0000000..4d187dc
--- /dev/null
+++ b/queue-2.6.20/fix-sparc64-sbus-iommu-allocator.patch
@@ -0,0 +1,826 @@
+From stable-bounces@linux.kernel.org Tue Apr 17 14:38:46 2007
+From: David Miller <davem@davemloft.net>
+Date: Tue, 17 Apr 2007 14:37:25 -0700 (PDT)
+Subject: Fix sparc64 SBUS IOMMU allocator
+To: stable@kernel.org
+Cc: bunk@stusta.de
+Message-ID: <20070417.143725.72712787.davem@davemloft.net>
+
+From: David Miller <davem@davemloft.net>
+
+[SPARC64]: Fix SBUS IOMMU allocation code.
+
+There are several IOMMU allocator bugs.  Instead of trying to fix this
+overly complicated code, just mirror the PCI IOMMU arena allocator
+which is very stable and well stress tested.
+
+I tried to make the code as identical as possible so we can switch
+sun4u PCI and SBUS over to a common piece of IOMMU code.  All that
+will be need are two callbacks, one to do a full IOMMU flush and one
+to do a streaming buffer flush.
+
+This patch gets rid of a lot of hangs and mysterious crashes on SBUS
+sparc64 systems, at least for me.
+
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ arch/sparc64/kernel/sbus.c |  566 ++++++++++++++++++---------------------------
+ 1 file changed, 235 insertions(+), 331 deletions(-)
+
+--- a/arch/sparc64/kernel/sbus.c
++++ b/arch/sparc64/kernel/sbus.c
+@@ -24,48 +24,25 @@
+ 
+ #include "iommu_common.h"
+ 
+-/* These should be allocated on an SMP_CACHE_BYTES
+- * aligned boundary for optimal performance.
+- *
+- * On SYSIO, using an 8K page size we have 1GB of SBUS
+- * DMA space mapped.  We divide this space into equally
+- * sized clusters. We allocate a DMA mapping from the
+- * cluster that matches the order of the allocation, or
+- * if the order is greater than the number of clusters,
+- * we try to allocate from the last cluster.
+- */
+-
+-#define NCLUSTERS     8UL
+-#define ONE_GIG               (1UL * 1024UL * 1024UL * 1024UL)
+-#define CLUSTER_SIZE  (ONE_GIG / NCLUSTERS)
+-#define CLUSTER_MASK  (CLUSTER_SIZE - 1)
+-#define CLUSTER_NPAGES        (CLUSTER_SIZE >> IO_PAGE_SHIFT)
+ #define MAP_BASE      ((u32)0xc0000000)
+ 
++struct sbus_iommu_arena {
++      unsigned long   *map;
++      unsigned int    hint;
++      unsigned int    limit;
++};
++
+ struct sbus_iommu {
+-/*0x00*/spinlock_t            lock;
++      spinlock_t              lock;
+ 
+-/*0x08*/iopte_t                       *page_table;
+-/*0x10*/unsigned long         strbuf_regs;
+-/*0x18*/unsigned long         iommu_regs;
+-/*0x20*/unsigned long         sbus_control_reg;
+-
+-/*0x28*/volatile unsigned long        strbuf_flushflag;
+-
+-      /* If NCLUSTERS is ever decresed to 4 or lower,
+-       * you must increase the size of the type of
+-       * these counters.  You have been duly warned. -DaveM
+-       */
+-/*0x30*/struct {
+-              u16     next;
+-              u16     flush;
+-      } alloc_info[NCLUSTERS];
+-
+-      /* The lowest used consistent mapping entry.  Since
+-       * we allocate consistent maps out of cluster 0 this
+-       * is relative to the beginning of closter 0.
+-       */
+-/*0x50*/u32           lowest_consistent_map;
++      struct sbus_iommu_arena arena;
++
++      iopte_t                 *page_table;
++      unsigned long           strbuf_regs;
++      unsigned long           iommu_regs;
++      unsigned long           sbus_control_reg;
++
++      volatile unsigned long  strbuf_flushflag;
+ };
+ 
+ /* Offsets from iommu_regs */
+@@ -91,19 +68,6 @@ static void __iommu_flushall(struct sbus
+               tag += 8UL;
+       }
+       upa_readq(iommu->sbus_control_reg);
+-
+-      for (entry = 0; entry < NCLUSTERS; entry++) {
+-              iommu->alloc_info[entry].flush =
+-                      iommu->alloc_info[entry].next;
+-      }
+-}
+-
+-static void iommu_flush(struct sbus_iommu *iommu, u32 base, unsigned long npages)
+-{
+-      while (npages--)
+-              upa_writeq(base + (npages << IO_PAGE_SHIFT),
+-                         iommu->iommu_regs + IOMMU_FLUSH);
+-      upa_readq(iommu->sbus_control_reg);
+ }
+ 
+ /* Offsets from strbuf_regs */
+@@ -156,178 +120,115 @@ static void sbus_strbuf_flush(struct sbu
+                      base, npages);
+ }
+ 
+-static iopte_t *alloc_streaming_cluster(struct sbus_iommu *iommu, unsigned long npages)
++/* Based largely upon the ppc64 iommu allocator.  */
++static long sbus_arena_alloc(struct sbus_iommu *iommu, unsigned long npages)
+ {
+-      iopte_t *iopte, *limit, *first, *cluster;
+-      unsigned long cnum, ent, nent, flush_point, found;
+-
+-      cnum = 0;
+-      nent = 1;
+-      while ((1UL << cnum) < npages)
+-              cnum++;
+-      if(cnum >= NCLUSTERS) {
+-              nent = 1UL << (cnum - NCLUSTERS);
+-              cnum = NCLUSTERS - 1;
+-      }
+-      iopte  = iommu->page_table + (cnum * CLUSTER_NPAGES);
+-
+-      if (cnum == 0)
+-              limit = (iommu->page_table +
+-                       iommu->lowest_consistent_map);
+-      else
+-              limit = (iopte + CLUSTER_NPAGES);
+-
+-      iopte += ((ent = iommu->alloc_info[cnum].next) << cnum);
+-      flush_point = iommu->alloc_info[cnum].flush;
+-
+-      first = iopte;
+-      cluster = NULL;
+-      found = 0;
+-      for (;;) {
+-              if (iopte_val(*iopte) == 0UL) {
+-                      found++;
+-                      if (!cluster)
+-                              cluster = iopte;
++      struct sbus_iommu_arena *arena = &iommu->arena;
++      unsigned long n, i, start, end, limit;
++      int pass;
++
++      limit = arena->limit;
++      start = arena->hint;
++      pass = 0;
++
++again:
++      n = find_next_zero_bit(arena->map, limit, start);
++      end = n + npages;
++      if (unlikely(end >= limit)) {
++              if (likely(pass < 1)) {
++                      limit = start;
++                      start = 0;
++                      __iommu_flushall(iommu);
++                      pass++;
++                      goto again;
+               } else {
+-                      /* Used cluster in the way */
+-                      cluster = NULL;
+-                      found = 0;
++                      /* Scanned the whole thing, give up. */
++                      return -1;
+               }
++      }
+ 
+-              if (found == nent)
+-                      break;
+-
+-              iopte += (1 << cnum);
+-              ent++;
+-              if (iopte >= limit) {
+-                      iopte = (iommu->page_table + (cnum * CLUSTER_NPAGES));
+-                      ent = 0;
+-
+-                      /* Multiple cluster allocations must not wrap */
+-                      cluster = NULL;
+-                      found = 0;
++      for (i = n; i < end; i++) {
++              if (test_bit(i, arena->map)) {
++                      start = i + 1;
++                      goto again;
+               }
+-              if (ent == flush_point)
+-                      __iommu_flushall(iommu);
+-              if (iopte == first)
+-                      goto bad;
+       }
+ 
+-      /* ent/iopte points to the last cluster entry we're going to use,
+-       * so save our place for the next allocation.
+-       */
+-      if ((iopte + (1 << cnum)) >= limit)
+-              ent = 0;
+-      else
+-              ent = ent + 1;
+-      iommu->alloc_info[cnum].next = ent;
+-      if (ent == flush_point)
+-              __iommu_flushall(iommu);
+-
+-      /* I've got your streaming cluster right here buddy boy... */
+-      return cluster;
+-
+-bad:
+-      printk(KERN_EMERG "sbus: alloc_streaming_cluster of npages(%ld) failed!\n",
+-             npages);
+-      return NULL;
++      for (i = n; i < end; i++)
++              __set_bit(i, arena->map);
++
++      arena->hint = end;
++
++      return n;
+ }
+ 
+-static void free_streaming_cluster(struct sbus_iommu *iommu, u32 base, unsigned long npages)
++static void sbus_arena_free(struct sbus_iommu_arena *arena, unsigned long base, unsigned long npages)
+ {
+-      unsigned long cnum, ent, nent;
+-      iopte_t *iopte;
++      unsigned long i;
+ 
+-      cnum = 0;
+-      nent = 1;
+-      while ((1UL << cnum) < npages)
+-              cnum++;
+-      if(cnum >= NCLUSTERS) {
+-              nent = 1UL << (cnum - NCLUSTERS);
+-              cnum = NCLUSTERS - 1;
+-      }
+-      ent = (base & CLUSTER_MASK) >> (IO_PAGE_SHIFT + cnum);
+-      iopte = iommu->page_table + ((base - MAP_BASE) >> IO_PAGE_SHIFT);
+-      do {
+-              iopte_val(*iopte) = 0UL;
+-              iopte += 1 << cnum;
+-      } while(--nent);
+-
+-      /* If the global flush might not have caught this entry,
+-       * adjust the flush point such that we will flush before
+-       * ever trying to reuse it.
+-       */
+-#define between(X,Y,Z)        (((Z) - (Y)) >= ((X) - (Y)))
+-      if (between(ent, iommu->alloc_info[cnum].next, iommu->alloc_info[cnum].flush))
+-              iommu->alloc_info[cnum].flush = ent;
+-#undef between
++      for (i = base; i < (base + npages); i++)
++              __clear_bit(i, arena->map);
+ }
+ 
+-/* We allocate consistent mappings from the end of cluster zero. */
+-static iopte_t *alloc_consistent_cluster(struct sbus_iommu *iommu, unsigned long npages)
++static void sbus_iommu_table_init(struct sbus_iommu *iommu, unsigned int tsbsize)
+ {
+-      iopte_t *iopte;
++      unsigned long tsbbase, order, sz, num_tsb_entries;
+ 
+-      iopte = iommu->page_table + (1 * CLUSTER_NPAGES);
+-      while (iopte > iommu->page_table) {
+-              iopte--;
+-              if (!(iopte_val(*iopte) & IOPTE_VALID)) {
+-                      unsigned long tmp = npages;
+-
+-                      while (--tmp) {
+-                              iopte--;
+-                              if (iopte_val(*iopte) & IOPTE_VALID)
+-                                      break;
+-                      }
+-                      if (tmp == 0) {
+-                              u32 entry = (iopte - iommu->page_table);
++      num_tsb_entries = tsbsize / sizeof(iopte_t);
+ 
+-                              if (entry < iommu->lowest_consistent_map)
+-                                      iommu->lowest_consistent_map = entry;
+-                              return iopte;
+-                      }
+-              }
++      /* Setup initial software IOMMU state. */
++      spin_lock_init(&iommu->lock);
++
++      /* Allocate and initialize the free area map.  */
++      sz = num_tsb_entries / 8;
++      sz = (sz + 7UL) & ~7UL;
++      iommu->arena.map = kzalloc(sz, GFP_KERNEL);
++      if (!iommu->arena.map) {
++              prom_printf("PCI_IOMMU: Error, kmalloc(arena.map) failed.\n");
++              prom_halt();
++      }
++      iommu->arena.limit = num_tsb_entries;
++
++      /* Now allocate and setup the IOMMU page table itself.  */
++      order = get_order(tsbsize);
++      tsbbase = __get_free_pages(GFP_KERNEL, order);
++      if (!tsbbase) {
++              prom_printf("IOMMU: Error, gfp(tsb) failed.\n");
++              prom_halt();
+       }
+-      return NULL;
++      iommu->page_table = (iopte_t *)tsbbase;
++      memset(iommu->page_table, 0, tsbsize);
+ }
+ 
+-static void free_consistent_cluster(struct sbus_iommu *iommu, u32 base, unsigned long npages)
++static inline iopte_t *alloc_npages(struct sbus_iommu *iommu, unsigned long npages)
+ {
+-      iopte_t *iopte = iommu->page_table + ((base - MAP_BASE) >> IO_PAGE_SHIFT);
++      long entry;
+ 
+-      if ((iopte - iommu->page_table) == iommu->lowest_consistent_map) {
+-              iopte_t *walk = iopte + npages;
+-              iopte_t *limit;
++      entry = sbus_arena_alloc(iommu, npages);
++      if (unlikely(entry < 0))
++              return NULL;
+ 
+-              limit = iommu->page_table + CLUSTER_NPAGES;
+-              while (walk < limit) {
+-                      if (iopte_val(*walk) != 0UL)
+-                              break;
+-                      walk++;
+-              }
+-              iommu->lowest_consistent_map =
+-                      (walk - iommu->page_table);
+-      }
++      return iommu->page_table + entry;
++}
+ 
+-      while (npages--)
+-              *iopte++ = __iopte(0UL);
++static inline void free_npages(struct sbus_iommu *iommu, dma_addr_t base, unsigned long npages)
++{
++      sbus_arena_free(&iommu->arena, base >> IO_PAGE_SHIFT, npages);
+ }
+ 
+ void *sbus_alloc_consistent(struct sbus_dev *sdev, size_t size, dma_addr_t *dvma_addr)
+ {
+-      unsigned long order, first_page, flags;
+       struct sbus_iommu *iommu;
+       iopte_t *iopte;
++      unsigned long flags, order, first_page;
+       void *ret;
+       int npages;
+ 
+-      if (size <= 0 || sdev == NULL || dvma_addr == NULL)
+-              return NULL;
+-
+       size = IO_PAGE_ALIGN(size);
+       order = get_order(size);
+       if (order >= 10)
+               return NULL;
++
+       first_page = __get_free_pages(GFP_KERNEL|__GFP_COMP, order);
+       if (first_page == 0UL)
+               return NULL;
+@@ -336,108 +237,121 @@ void *sbus_alloc_consistent(struct sbus_
+       iommu = sdev->bus->iommu;
+ 
+       spin_lock_irqsave(&iommu->lock, flags);
+-      iopte = alloc_consistent_cluster(iommu, size >> IO_PAGE_SHIFT);
+-      if (iopte == NULL) {
+-              spin_unlock_irqrestore(&iommu->lock, flags);
++      iopte = alloc_npages(iommu, size >> IO_PAGE_SHIFT);
++      spin_unlock_irqrestore(&iommu->lock, flags);
++
++      if (unlikely(iopte == NULL)) {
+               free_pages(first_page, order);
+               return NULL;
+       }
+ 
+-      /* Ok, we're committed at this point. */
+-      *dvma_addr = MAP_BASE + ((iopte - iommu->page_table) << IO_PAGE_SHIFT);
++      *dvma_addr = (MAP_BASE +
++                    ((iopte - iommu->page_table) << IO_PAGE_SHIFT));
+       ret = (void *) first_page;
+       npages = size >> IO_PAGE_SHIFT;
++      first_page = __pa(first_page);
+       while (npages--) {
+-              *iopte++ = __iopte(IOPTE_VALID | IOPTE_CACHE | IOPTE_WRITE |
+-                                 (__pa(first_page) & IOPTE_PAGE));
++              iopte_val(*iopte) = (IOPTE_VALID | IOPTE_CACHE |
++                                   IOPTE_WRITE |
++                                   (first_page & IOPTE_PAGE));
++              iopte++;
+               first_page += IO_PAGE_SIZE;
+       }
+-      iommu_flush(iommu, *dvma_addr, size >> IO_PAGE_SHIFT);
+-      spin_unlock_irqrestore(&iommu->lock, flags);
+ 
+       return ret;
+ }
+ 
+ void sbus_free_consistent(struct sbus_dev *sdev, size_t size, void *cpu, dma_addr_t dvma)
+ {
+-      unsigned long order, npages;
+       struct sbus_iommu *iommu;
+-
+-      if (size <= 0 || sdev == NULL || cpu == NULL)
+-              return;
++      iopte_t *iopte;
++      unsigned long flags, order, npages;
+ 
+       npages = IO_PAGE_ALIGN(size) >> IO_PAGE_SHIFT;
+       iommu = sdev->bus->iommu;
++      iopte = iommu->page_table +
++              ((dvma - MAP_BASE) >> IO_PAGE_SHIFT);
++
++      spin_lock_irqsave(&iommu->lock, flags);
++
++      free_npages(iommu, dvma - MAP_BASE, npages);
+ 
+-      spin_lock_irq(&iommu->lock);
+-      free_consistent_cluster(iommu, dvma, npages);
+-      iommu_flush(iommu, dvma, npages);
+-      spin_unlock_irq(&iommu->lock);
++      spin_unlock_irqrestore(&iommu->lock, flags);
+ 
+       order = get_order(size);
+       if (order < 10)
+               free_pages((unsigned long)cpu, order);
+ }
+ 
+-dma_addr_t sbus_map_single(struct sbus_dev *sdev, void *ptr, size_t size, int dir)
++dma_addr_t sbus_map_single(struct sbus_dev *sdev, void *ptr, size_t sz, int direction)
+ {
+-      struct sbus_iommu *iommu = sdev->bus->iommu;
+-      unsigned long npages, pbase, flags;
+-      iopte_t *iopte;
+-      u32 dma_base, offset;
+-      unsigned long iopte_bits;
++      struct sbus_iommu *iommu;
++      iopte_t *base;
++      unsigned long flags, npages, oaddr;
++      unsigned long i, base_paddr;
++      u32 bus_addr, ret;
++      unsigned long iopte_protection;
++
++      iommu = sdev->bus->iommu;
+ 
+-      if (dir == SBUS_DMA_NONE)
++      if (unlikely(direction == SBUS_DMA_NONE))
+               BUG();
+ 
+-      pbase = (unsigned long) ptr;
+-      offset = (u32) (pbase & ~IO_PAGE_MASK);
+-      size = (IO_PAGE_ALIGN(pbase + size) - (pbase & IO_PAGE_MASK));
+-      pbase = (unsigned long) __pa(pbase & IO_PAGE_MASK);
++      oaddr = (unsigned long)ptr;
++      npages = IO_PAGE_ALIGN(oaddr + sz) - (oaddr & IO_PAGE_MASK);
++      npages >>= IO_PAGE_SHIFT;
+ 
+       spin_lock_irqsave(&iommu->lock, flags);
+-      npages = size >> IO_PAGE_SHIFT;
+-      iopte = alloc_streaming_cluster(iommu, npages);
+-      if (iopte == NULL)
+-              goto bad;
+-      dma_base = MAP_BASE + ((iopte - iommu->page_table) << IO_PAGE_SHIFT);
+-      npages = size >> IO_PAGE_SHIFT;
+-      iopte_bits = IOPTE_VALID | IOPTE_STBUF | IOPTE_CACHE;
+-      if (dir != SBUS_DMA_TODEVICE)
+-              iopte_bits |= IOPTE_WRITE;
+-      while (npages--) {
+-              *iopte++ = __iopte(iopte_bits | (pbase & IOPTE_PAGE));
+-              pbase += IO_PAGE_SIZE;
+-      }
+-      npages = size >> IO_PAGE_SHIFT;
++      base = alloc_npages(iommu, npages);
+       spin_unlock_irqrestore(&iommu->lock, flags);
+ 
+-      return (dma_base | offset);
++      if (unlikely(!base))
++              BUG();
+ 
+-bad:
+-      spin_unlock_irqrestore(&iommu->lock, flags);
+-      BUG();
+-      return 0;
++      bus_addr = (MAP_BASE +
++                  ((base - iommu->page_table) << IO_PAGE_SHIFT));
++      ret = bus_addr | (oaddr & ~IO_PAGE_MASK);
++      base_paddr = __pa(oaddr & IO_PAGE_MASK);
++
++      iopte_protection = IOPTE_VALID | IOPTE_STBUF | IOPTE_CACHE;
++      if (direction != SBUS_DMA_TODEVICE)
++              iopte_protection |= IOPTE_WRITE;
++
++      for (i = 0; i < npages; i++, base++, base_paddr += IO_PAGE_SIZE)
++              iopte_val(*base) = iopte_protection | base_paddr;
++
++      return ret;
+ }
+ 
+-void sbus_unmap_single(struct sbus_dev *sdev, dma_addr_t dma_addr, size_t size, int direction)
++void sbus_unmap_single(struct sbus_dev *sdev, dma_addr_t bus_addr, size_t sz, int direction)
+ {
+       struct sbus_iommu *iommu = sdev->bus->iommu;
+-      u32 dma_base = dma_addr & IO_PAGE_MASK;
+-      unsigned long flags;
++      iopte_t *base;
++      unsigned long flags, npages, i;
+ 
+-      size = (IO_PAGE_ALIGN(dma_addr + size) - dma_base);
++      if (unlikely(direction == SBUS_DMA_NONE))
++              BUG();
++
++      npages = IO_PAGE_ALIGN(bus_addr + sz) - (bus_addr & IO_PAGE_MASK);
++      npages >>= IO_PAGE_SHIFT;
++      base = iommu->page_table +
++              ((bus_addr - MAP_BASE) >> IO_PAGE_SHIFT);
++
++      bus_addr &= IO_PAGE_MASK;
+ 
+       spin_lock_irqsave(&iommu->lock, flags);
+-      free_streaming_cluster(iommu, dma_base, size >> IO_PAGE_SHIFT);
+-      sbus_strbuf_flush(iommu, dma_base, size >> IO_PAGE_SHIFT, direction);
++      sbus_strbuf_flush(iommu, bus_addr, npages, direction);
++      for (i = 0; i < npages; i++)
++              iopte_val(base[i]) = 0UL;
++      free_npages(iommu, bus_addr - MAP_BASE, npages);
+       spin_unlock_irqrestore(&iommu->lock, flags);
+ }
+ 
+ #define SG_ENT_PHYS_ADDRESS(SG)       \
+       (__pa(page_address((SG)->page)) + (SG)->offset)
+ 
+-static inline void fill_sg(iopte_t *iopte, struct scatterlist *sg, int nused, int nelems, unsigned long iopte_bits)
++static inline void fill_sg(iopte_t *iopte, struct scatterlist *sg,
++                         int nused, int nelems, unsigned long iopte_protection)
+ {
+       struct scatterlist *dma_sg = sg;
+       struct scatterlist *sg_end = sg + nelems;
+@@ -462,7 +376,7 @@ static inline void fill_sg(iopte_t *iopt
+                       for (;;) {
+                               unsigned long tmp;
+ 
+-                              tmp = (unsigned long) SG_ENT_PHYS_ADDRESS(sg);
++                              tmp = SG_ENT_PHYS_ADDRESS(sg);
+                               len = sg->length;
+                               if (((tmp ^ pteval) >> IO_PAGE_SHIFT) != 0UL) {
+                                       pteval = tmp & IO_PAGE_MASK;
+@@ -478,7 +392,7 @@ static inline void fill_sg(iopte_t *iopt
+                               sg++;
+                       }
+ 
+-                      pteval = ((pteval & IOPTE_PAGE) | iopte_bits);
++                      pteval = iopte_protection | (pteval & IOPTE_PAGE);
+                       while (len > 0) {
+                               *iopte++ = __iopte(pteval);
+                               pteval += IO_PAGE_SIZE;
+@@ -509,103 +423,111 @@ static inline void fill_sg(iopte_t *iopt
+       }
+ }
+ 
+-int sbus_map_sg(struct sbus_dev *sdev, struct scatterlist *sg, int nents, int dir)
++int sbus_map_sg(struct sbus_dev *sdev, struct scatterlist *sglist, int nelems, int direction)
+ {
+-      struct sbus_iommu *iommu = sdev->bus->iommu;
+-      unsigned long flags, npages;
+-      iopte_t *iopte;
++      struct sbus_iommu *iommu;
++      unsigned long flags, npages, iopte_protection;
++      iopte_t *base;
+       u32 dma_base;
+       struct scatterlist *sgtmp;
+       int used;
+-      unsigned long iopte_bits;
+-
+-      if (dir == SBUS_DMA_NONE)
+-              BUG();
+ 
+       /* Fast path single entry scatterlists. */
+-      if (nents == 1) {
+-              sg->dma_address =
++      if (nelems == 1) {
++              sglist->dma_address =
+                       sbus_map_single(sdev,
+-                                      (page_address(sg->page) + sg->offset),
+-                                      sg->length, dir);
+-              sg->dma_length = sg->length;
++                                      (page_address(sglist->page) + sglist->offset),
++                                      sglist->length, direction);
++              sglist->dma_length = sglist->length;
+               return 1;
+       }
+ 
+-      npages = prepare_sg(sg, nents);
++      iommu = sdev->bus->iommu;
++
++      if (unlikely(direction == SBUS_DMA_NONE))
++              BUG();
++
++      npages = prepare_sg(sglist, nelems);
+ 
+       spin_lock_irqsave(&iommu->lock, flags);
+-      iopte = alloc_streaming_cluster(iommu, npages);
+-      if (iopte == NULL)
+-              goto bad;
+-      dma_base = MAP_BASE + ((iopte - iommu->page_table) << IO_PAGE_SHIFT);
++      base = alloc_npages(iommu, npages);
++      spin_unlock_irqrestore(&iommu->lock, flags);
++
++      if (unlikely(base == NULL))
++              BUG();
++
++      dma_base = MAP_BASE +
++              ((base - iommu->page_table) << IO_PAGE_SHIFT);
+ 
+       /* Normalize DVMA addresses. */
+-      sgtmp = sg;
+-      used = nents;
++      used = nelems;
+ 
++      sgtmp = sglist;
+       while (used && sgtmp->dma_length) {
+               sgtmp->dma_address += dma_base;
+               sgtmp++;
+               used--;
+       }
+-      used = nents - used;
++      used = nelems - used;
++
++      iopte_protection = IOPTE_VALID | IOPTE_STBUF | IOPTE_CACHE;
++      if (direction != SBUS_DMA_TODEVICE)
++              iopte_protection |= IOPTE_WRITE;
+ 
+-      iopte_bits = IOPTE_VALID | IOPTE_STBUF | IOPTE_CACHE;
+-      if (dir != SBUS_DMA_TODEVICE)
+-              iopte_bits |= IOPTE_WRITE;
++      fill_sg(base, sglist, used, nelems, iopte_protection);
+ 
+-      fill_sg(iopte, sg, used, nents, iopte_bits);
+ #ifdef VERIFY_SG
+-      verify_sglist(sg, nents, iopte, npages);
++      verify_sglist(sglist, nelems, base, npages);
+ #endif
+-      spin_unlock_irqrestore(&iommu->lock, flags);
+ 
+       return used;
+-
+-bad:
+-      spin_unlock_irqrestore(&iommu->lock, flags);
+-      BUG();
+-      return 0;
+ }
+ 
+-void sbus_unmap_sg(struct sbus_dev *sdev, struct scatterlist *sg, int nents, int direction)
++void sbus_unmap_sg(struct sbus_dev *sdev, struct scatterlist *sglist, int nelems, int direction)
+ {
+-      unsigned long size, flags;
+       struct sbus_iommu *iommu;
+-      u32 dvma_base;
+-      int i;
++      iopte_t *base;
++      unsigned long flags, i, npages;
++      u32 bus_addr;
+ 
+-      /* Fast path single entry scatterlists. */
+-      if (nents == 1) {
+-              sbus_unmap_single(sdev, sg->dma_address, sg->dma_length, direction);
+-              return;
+-      }
++      if (unlikely(direction == SBUS_DMA_NONE))
++              BUG();
+ 
+-      dvma_base = sg[0].dma_address & IO_PAGE_MASK;
+-      for (i = 0; i < nents; i++) {
+-              if (sg[i].dma_length == 0)
++      iommu = sdev->bus->iommu;
++
++      bus_addr = sglist->dma_address & IO_PAGE_MASK;
++
++      for (i = 1; i < nelems; i++)
++              if (sglist[i].dma_length == 0)
+                       break;
+-      }
+       i--;
+-      size = IO_PAGE_ALIGN(sg[i].dma_address + sg[i].dma_length) - dvma_base;
++      npages = (IO_PAGE_ALIGN(sglist[i].dma_address + sglist[i].dma_length) -
++                bus_addr) >> IO_PAGE_SHIFT;
++
++      base = iommu->page_table +
++              ((bus_addr - MAP_BASE) >> IO_PAGE_SHIFT);
+ 
+-      iommu = sdev->bus->iommu;
+       spin_lock_irqsave(&iommu->lock, flags);
+-      free_streaming_cluster(iommu, dvma_base, size >> IO_PAGE_SHIFT);
+-      sbus_strbuf_flush(iommu, dvma_base, size >> IO_PAGE_SHIFT, direction);
++      sbus_strbuf_flush(iommu, bus_addr, npages, direction);
++      for (i = 0; i < npages; i++)
++              iopte_val(base[i]) = 0UL;
++      free_npages(iommu, bus_addr - MAP_BASE, npages);
+       spin_unlock_irqrestore(&iommu->lock, flags);
+ }
+ 
+-void sbus_dma_sync_single_for_cpu(struct sbus_dev *sdev, dma_addr_t base, size_t size, int direction)
++void sbus_dma_sync_single_for_cpu(struct sbus_dev *sdev, dma_addr_t bus_addr, size_t sz, int direction)
+ {
+-      struct sbus_iommu *iommu = sdev->bus->iommu;
+-      unsigned long flags;
++      struct sbus_iommu *iommu;
++      unsigned long flags, npages;
++
++      iommu = sdev->bus->iommu;
+ 
+-      size = (IO_PAGE_ALIGN(base + size) - (base & IO_PAGE_MASK));
++      npages = IO_PAGE_ALIGN(bus_addr + sz) - (bus_addr & IO_PAGE_MASK);
++      npages >>= IO_PAGE_SHIFT;
++      bus_addr &= IO_PAGE_MASK;
+ 
+       spin_lock_irqsave(&iommu->lock, flags);
+-      sbus_strbuf_flush(iommu, base & IO_PAGE_MASK, size >> IO_PAGE_SHIFT, direction);
++      sbus_strbuf_flush(iommu, bus_addr, npages, direction);
+       spin_unlock_irqrestore(&iommu->lock, flags);
+ }
+ 
+@@ -613,23 +535,25 @@ void sbus_dma_sync_single_for_device(str
+ {
+ }
+ 
+-void sbus_dma_sync_sg_for_cpu(struct sbus_dev *sdev, struct scatterlist *sg, int nents, int direction)
++void sbus_dma_sync_sg_for_cpu(struct sbus_dev *sdev, struct scatterlist *sglist, int nelems, int direction)
+ {
+-      struct sbus_iommu *iommu = sdev->bus->iommu;
+-      unsigned long flags, size;
+-      u32 base;
+-      int i;
++      struct sbus_iommu *iommu;
++      unsigned long flags, npages, i;
++      u32 bus_addr;
++
++      iommu = sdev->bus->iommu;
+ 
+-      base = sg[0].dma_address & IO_PAGE_MASK;
+-      for (i = 0; i < nents; i++) {
+-              if (sg[i].dma_length == 0)
++      bus_addr = sglist[0].dma_address & IO_PAGE_MASK;
++      for (i = 0; i < nelems; i++) {
++              if (!sglist[i].dma_length)
+                       break;
+       }
+       i--;
+-      size = IO_PAGE_ALIGN(sg[i].dma_address + sg[i].dma_length) - base;
++      npages = (IO_PAGE_ALIGN(sglist[i].dma_address + sglist[i].dma_length)
++                - bus_addr) >> IO_PAGE_SHIFT;
+ 
+       spin_lock_irqsave(&iommu->lock, flags);
+-      sbus_strbuf_flush(iommu, base, size >> IO_PAGE_SHIFT, direction);
++      sbus_strbuf_flush(iommu, bus_addr, npages, direction);
+       spin_unlock_irqrestore(&iommu->lock, flags);
+ }
+ 
+@@ -1104,7 +1028,7 @@ static void __init sbus_iommu_init(int _
+       struct linux_prom64_registers *pr;
+       struct device_node *dp;
+       struct sbus_iommu *iommu;
+-      unsigned long regs, tsb_base;
++      unsigned long regs;
+       u64 control;
+       int i;
+ 
+@@ -1132,14 +1056,6 @@ static void __init sbus_iommu_init(int _
+ 
+       memset(iommu, 0, sizeof(*iommu));
+ 
+-      /* We start with no consistent mappings. */
+-      iommu->lowest_consistent_map = CLUSTER_NPAGES;
+-
+-      for (i = 0; i < NCLUSTERS; i++) {
+-              iommu->alloc_info[i].flush = 0;
+-              iommu->alloc_info[i].next = 0;
+-      }
+-
+       /* Setup spinlock. */
+       spin_lock_init(&iommu->lock);
+ 
+@@ -1159,25 +1075,13 @@ static void __init sbus_iommu_init(int _
+              sbus->portid, regs);
+ 
+       /* Setup for TSB_SIZE=7, TBW_SIZE=0, MMU_DE=1, MMU_EN=1 */
++      sbus_iommu_table_init(iommu, IO_TSB_SIZE);
++
+       control = upa_readq(iommu->iommu_regs + IOMMU_CONTROL);
+       control = ((7UL << 16UL)        |
+                  (0UL << 2UL)         |
+                  (1UL << 1UL)         |
+                  (1UL << 0UL));
+-
+-      /* Using the above configuration we need 1MB iommu page
+-       * table (128K ioptes * 8 bytes per iopte).  This is
+-       * page order 7 on UltraSparc.
+-       */
+-      tsb_base = __get_free_pages(GFP_ATOMIC, get_order(IO_TSB_SIZE));
+-      if (tsb_base == 0UL) {
+-              prom_printf("sbus_iommu_init: Fatal error, cannot alloc TSB table.\n");
+-              prom_halt();
+-      }
+-
+-      iommu->page_table = (iopte_t *) tsb_base;
+-      memset(iommu->page_table, 0, IO_TSB_SIZE);
+-
+       upa_writeq(control, iommu->iommu_regs + IOMMU_CONTROL);
+ 
+       /* Clean out any cruft in the IOMMU using
+@@ -1195,7 +1099,7 @@ static void __init sbus_iommu_init(int _
+       upa_readq(iommu->sbus_control_reg);
+ 
+       /* Give the TSB to SYSIO. */
+-      upa_writeq(__pa(tsb_base), iommu->iommu_regs + IOMMU_TSBBASE);
++      upa_writeq(__pa(iommu->page_table), iommu->iommu_regs + IOMMU_TSBBASE);
+ 
+       /* Setup streaming buffer, DE=1 SB_EN=1 */
+       control = (1UL << 1UL) | (1UL << 0UL);
diff --git a/queue-2.6.20/hid-zeroing-of-bytes-in-output-fields-is-bogus.patch b/queue-2.6.20/hid-zeroing-of-bytes-in-output-fields-is-bogus.patch

new file mode 100644 (file)

index 0000000..00399e3
--- /dev/null
+++ b/queue-2.6.20/hid-zeroing-of-bytes-in-output-fields-is-bogus.patch
@@ -0,0 +1,48 @@
+From stable-bounces@linux.kernel.org Sun Apr 15 13:24:49 2007
+From: Jiri Kosina <jkosina@suse.cz>
+Date: Sun, 15 Apr 2007 22:30:15 +0200 (CEST)
+Subject: HID: zeroing of bytes in output fields is bogus
+To: stable@kernel.org
+Message-ID: <alpine.LSU.0.98.0704152228530.18903@jikos.suse.cz>
+
+From: Jiri Kosina <jkosina@suse.cz>
+
+HID: zeroing of bytes in output fields is bogus
+
+This patch removes bogus zeroing of unused bits in output reports,
+introduced in Simon's patch in commit d4ae650a.
+According to the specification, any sane device should not care
+about values of unused bits.
+
+What is worse, the zeroing is done in a way which is broken and
+might clear certain bits in output reports which are actually
+_used_ - a device that has multiple fields with one value of
+the size 1 bit each might serve as an example of why this is
+bogus - the second call of hid_output_report() would clear the
+first bit of report, which has already been set up previously.
+
+This patch will break LEDs on SpaceNavigator, because this device
+is broken and takes into account the bits which it shouldn't touch.
+The quirk for this particular device will be provided in a separate
+patch.
+
+Signed-off-by: Jiri Kosina <jkosina@suse.cz>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ drivers/hid/hid-core.c |    4 ----
+ 1 file changed, 4 deletions(-)
+
+--- a/drivers/hid/hid-core.c
++++ b/drivers/hid/hid-core.c
+@@ -876,10 +876,6 @@ static void hid_output_field(struct hid_
+       unsigned size = field->report_size;
+       unsigned n;
+ 
+-      /* make sure the unused bits in the last byte are zeros */
+-      if (count > 0 && size > 0)
+-              data[(count*size-1)/8] = 0;
+-
+       for (n = 0; n < count; n++) {
+               if (field->logical_minimum < 0) /* signed values */
+                       implement(data, offset + n * size, size, s32ton(field->value[n], size));
diff --git a/queue-2.6.20/holepunch-fix-disconnected-pages-after-second-truncate.patch b/queue-2.6.20/holepunch-fix-disconnected-pages-after-second-truncate.patch

new file mode 100644 (file)

index 0000000..b340319
--- /dev/null
+++ b/queue-2.6.20/holepunch-fix-disconnected-pages-after-second-truncate.patch
@@ -0,0 +1,51 @@
+From hugh_dickins@symantec.com Fri Apr 13 10:27:15 2007
+From: Hugh Dickins <hugh@veritas.com>
+Date: Fri, 13 Apr 2007 18:27:10 +0100 (BST)
+Subject: [PATCH 3/4] holepunch: fix disconnected pages after second truncate
+To: Greg KH <gregkh@suse.de>, Adrian Bunk <bunk@stusta.de>
+Cc: Miklos Szeredi <mszeredi@suse.cz>, stable@kernel.org
+Message-ID: <Pine.LNX.4.64.0704131826190.7279@blonde.wat.veritas.com>
+
+From: Hugh Dickins <hugh@veritas.com>
+
+shmem_truncate_range has its own truncate_inode_pages_range, to free any
+pages racily instantiated while it was in progress: a SHMEM_PAGEIN flag
+is set when this might have happened.  But holepunching gets no chance
+to clear that flag at the start of vmtruncate_range, so it's always set
+(unless a truncate came just before), so holepunch almost always does
+this second truncate_inode_pages_range.
+
+shmem holepunch has unlikely swap<->file races hereabouts whatever we do
+(without a fuller rework than is fit for this release): I was going to
+skip the second truncate in the punch_hole case, but Miklos points out
+that would make holepunch correctness more vulnerable to swapoff.  So
+keep the second truncate, but follow it by an unmap_mapping_range to
+eliminate the disconnected pages (freed from pagecache while still
+mapped in userspace) that it might have left behind.
+
+Signed-off-by: Hugh Dickins <hugh@veritas.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ mm/shmem.c |    8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+--- a/mm/shmem.c
++++ b/mm/shmem.c
+@@ -674,8 +674,16 @@ done2:
+                * generic_delete_inode did it, before we lowered next_index.
+                * Also, though shmem_getpage checks i_size before adding to
+                * cache, no recheck after: so fix the narrow window there too.
++               *
++               * Recalling truncate_inode_pages_range and unmap_mapping_range
++               * every time for punch_hole (which never got a chance to clear
++               * SHMEM_PAGEIN at the start of vmtruncate_range) is expensive,
++               * yet hardly ever necessary: try to optimize them out later.
+                */
+               truncate_inode_pages_range(inode->i_mapping, start, end);
++              if (punch_hole)
++                      unmap_mapping_range(inode->i_mapping, start,
++                                                      end - start, 1);
+       }
+ 
+       spin_lock(&info->lock);
diff --git a/queue-2.6.20/holepunch-fix-mmap_sem-i_mutex-deadlock.patch b/queue-2.6.20/holepunch-fix-mmap_sem-i_mutex-deadlock.patch

new file mode 100644 (file)

index 0000000..115dd0c
--- /dev/null
+++ b/queue-2.6.20/holepunch-fix-mmap_sem-i_mutex-deadlock.patch
@@ -0,0 +1,74 @@
+From hugh_dickins@symantec.com Fri Apr 13 10:28:00 2007
+From: Hugh Dickins <hugh@veritas.com>
+Date: Fri, 13 Apr 2007 18:27:55 +0100 (BST)
+Subject: holepunch: fix mmap_sem i_mutex deadlock
+To: Greg KH <gregkh@suse.de>, Adrian Bunk <bunk@stusta.de>
+Cc: Miklos Szeredi <mszeredi@suse.cz>, stable@kernel.org
+Message-ID: <Pine.LNX.4.64.0704131827160.7279@blonde.wat.veritas.com>
+
+From: Hugh Dickins <hugh@veritas.com>
+
+sys_madvise has down_write of mmap_sem, then madvise_remove calls
+vmtruncate_range which takes i_mutex and i_alloc_sem: no, we can
+easily devise deadlocks from that ordering.
+
+madvise_remove drop mmap_sem while calling vmtruncate_range: luckily,
+since madvise_remove doesn't split or merge vmas, it's easy to handle
+this case with a NULL prev, without restructuring sys_madvise.  (Though
+sad to retake mmap_sem when it's unlikely to be needed, and certainly
+down_read is sufficient for MADV_REMOVE, unlike the other madvices.)
+
+Signed-off-by: Hugh Dickins <hugh@veritas.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ mm/madvise.c |   19 ++++++++++++++-----
+ 1 file changed, 14 insertions(+), 5 deletions(-)
+
+--- a/mm/madvise.c
++++ b/mm/madvise.c
+@@ -159,9 +159,10 @@ static long madvise_remove(struct vm_are
+                               unsigned long start, unsigned long end)
+ {
+       struct address_space *mapping;
+-        loff_t offset, endoff;
++      loff_t offset, endoff;
++      int error;
+ 
+-      *prev = vma;
++      *prev = NULL;   /* tell sys_madvise we drop mmap_sem */
+ 
+       if (vma->vm_flags & (VM_LOCKED|VM_NONLINEAR|VM_HUGETLB))
+               return -EINVAL;
+@@ -180,7 +181,12 @@ static long madvise_remove(struct vm_are
+                       + ((loff_t)vma->vm_pgoff << PAGE_SHIFT);
+       endoff = (loff_t)(end - vma->vm_start - 1)
+                       + ((loff_t)vma->vm_pgoff << PAGE_SHIFT);
+-      return  vmtruncate_range(mapping->host, offset, endoff);
++
++      /* vmtruncate_range needs to take i_mutex and i_alloc_sem */
++      up_write(&current->mm->mmap_sem);
++      error = vmtruncate_range(mapping->host, offset, endoff);
++      down_write(&current->mm->mmap_sem);
++      return error;
+ }
+ 
+ static long
+@@ -315,12 +321,15 @@ asmlinkage long sys_madvise(unsigned lon
+               if (error)
+                       goto out;
+               start = tmp;
+-              if (start < prev->vm_end)
++              if (prev && start < prev->vm_end)
+                       start = prev->vm_end;
+               error = unmapped_error;
+               if (start >= end)
+                       goto out;
+-              vma = prev->vm_next;
++              if (prev)
++                      vma = prev->vm_next;
++              else    /* madvise_remove dropped mmap_sem */
++                      vma = find_vma(current->mm, start);
+       }
+ out:
+       up_write(&current->mm->mmap_sem);
diff --git a/queue-2.6.20/holepunch-fix-shmem_truncate_range-punch-locking.patch b/queue-2.6.20/holepunch-fix-shmem_truncate_range-punch-locking.patch

new file mode 100644 (file)

index 0000000..c74f4c7
--- /dev/null
+++ b/queue-2.6.20/holepunch-fix-shmem_truncate_range-punch-locking.patch
@@ -0,0 +1,225 @@
+From hugh_dickins@symantec.com Fri Apr 13 10:26:22 2007
+From: Hugh Dickins <hugh@veritas.com>
+Date: Fri, 13 Apr 2007 18:26:13 +0100 (BST)
+Subject: [PATCH 2/4] holepunch: fix shmem_truncate_range punch locking
+To: Greg KH <gregkh@suse.de>, Adrian Bunk <bunk@stusta.de>
+Cc: Miklos Szeredi <mszeredi@suse.cz>, stable@kernel.org
+Message-ID: <Pine.LNX.4.64.0704131825140.7279@blonde.wat.veritas.com>
+
+From: Hugh Dickins <hugh@veritas.com>
+
+Miklos Szeredi observes that during truncation of shmem page directories,
+info->lock is released to improve latency (after lowering i_size and
+next_index to exclude races); but this is quite wrong for holepunching,
+which receives no such protection from i_size or next_index, and is left
+vulnerable to races with shmem_unuse, shmem_getpage and shmem_writepage.
+
+Hold info->lock throughout when holepunching?  No, any user could prevent
+rescheduling for far too long.  Instead take info->lock just when needed:
+in shmem_free_swp when removing the swap entries, and whenever removing
+a directory page from the level above.  But so long as we remove before
+scanning, we can safely skip taking the lock at the lower levels, except
+at misaligned start and end of the hole.
+
+Signed-off-by: Hugh Dickins <hugh@veritas.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ mm/shmem.c |   96 ++++++++++++++++++++++++++++++++++++++++++++++---------------
+ 1 file changed, 73 insertions(+), 23 deletions(-)
+
+--- a/mm/shmem.c
++++ b/mm/shmem.c
+@@ -402,26 +402,38 @@ static swp_entry_t *shmem_swp_alloc(stru
+ /*
+  * shmem_free_swp - free some swap entries in a directory
+  *
+- * @dir:   pointer to the directory
+- * @edir:  pointer after last entry of the directory
++ * @dir:        pointer to the directory
++ * @edir:       pointer after last entry of the directory
++ * @punch_lock: pointer to spinlock when needed for the holepunch case
+  */
+-static int shmem_free_swp(swp_entry_t *dir, swp_entry_t *edir)
++static int shmem_free_swp(swp_entry_t *dir, swp_entry_t *edir,
++                                              spinlock_t *punch_lock)
+ {
++      spinlock_t *punch_unlock = NULL;
+       swp_entry_t *ptr;
+       int freed = 0;
+ 
+       for (ptr = dir; ptr < edir; ptr++) {
+               if (ptr->val) {
++                      if (unlikely(punch_lock)) {
++                              punch_unlock = punch_lock;
++                              punch_lock = NULL;
++                              spin_lock(punch_unlock);
++                              if (!ptr->val)
++                                      continue;
++                      }
+                       free_swap_and_cache(*ptr);
+                       *ptr = (swp_entry_t){0};
+                       freed++;
+               }
+       }
++      if (punch_unlock)
++              spin_unlock(punch_unlock);
+       return freed;
+ }
+ 
+-static int shmem_map_and_free_swp(struct page *subdir,
+-              int offset, int limit, struct page ***dir)
++static int shmem_map_and_free_swp(struct page *subdir, int offset,
++              int limit, struct page ***dir, spinlock_t *punch_lock)
+ {
+       swp_entry_t *ptr;
+       int freed = 0;
+@@ -431,7 +443,8 @@ static int shmem_map_and_free_swp(struct
+               int size = limit - offset;
+               if (size > LATENCY_LIMIT)
+                       size = LATENCY_LIMIT;
+-              freed += shmem_free_swp(ptr+offset, ptr+offset+size);
++              freed += shmem_free_swp(ptr+offset, ptr+offset+size,
++                                                      punch_lock);
+               if (need_resched()) {
+                       shmem_swp_unmap(ptr);
+                       if (*dir) {
+@@ -482,6 +495,8 @@ static void shmem_truncate_range(struct 
+       int offset;
+       int freed;
+       int punch_hole;
++      spinlock_t *needs_lock;
++      spinlock_t *punch_lock;
+       unsigned long upper_limit;
+ 
+       inode->i_ctime = inode->i_mtime = CURRENT_TIME;
+@@ -495,6 +510,7 @@ static void shmem_truncate_range(struct 
+               limit = info->next_index;
+               upper_limit = SHMEM_MAX_INDEX;
+               info->next_index = idx;
++              needs_lock = NULL;
+               punch_hole = 0;
+       } else {
+               if (end + 1 >= inode->i_size) { /* we may free a little more */
+@@ -505,6 +521,7 @@ static void shmem_truncate_range(struct 
+                       limit = (end + 1) >> PAGE_CACHE_SHIFT;
+                       upper_limit = limit;
+               }
++              needs_lock = &info->lock;
+               punch_hole = 1;
+       }
+ 
+@@ -521,7 +538,7 @@ static void shmem_truncate_range(struct 
+               size = limit;
+               if (size > SHMEM_NR_DIRECT)
+                       size = SHMEM_NR_DIRECT;
+-              nr_swaps_freed = shmem_free_swp(ptr+idx, ptr+size);
++              nr_swaps_freed = shmem_free_swp(ptr+idx, ptr+size, needs_lock);
+       }
+ 
+       /*
+@@ -531,6 +548,19 @@ static void shmem_truncate_range(struct 
+       if (!topdir || limit <= SHMEM_NR_DIRECT)
+               goto done2;
+ 
++      /*
++       * The truncation case has already dropped info->lock, and we're safe
++       * because i_size and next_index have already been lowered, preventing
++       * access beyond.  But in the punch_hole case, we still need to take
++       * the lock when updating the swap directory, because there might be
++       * racing accesses by shmem_getpage(SGP_CACHE), shmem_unuse_inode or
++       * shmem_writepage.  However, whenever we find we can remove a whole
++       * directory page (not at the misaligned start or end of the range),
++       * we first NULLify its pointer in the level above, and then have no
++       * need to take the lock when updating its contents: needs_lock and
++       * punch_lock (either pointing to info->lock or NULL) manage this.
++       */
++
+       upper_limit -= SHMEM_NR_DIRECT;
+       limit -= SHMEM_NR_DIRECT;
+       idx = (idx > SHMEM_NR_DIRECT)? (idx - SHMEM_NR_DIRECT): 0;
+@@ -552,7 +582,13 @@ static void shmem_truncate_range(struct 
+                       diroff = ((idx - ENTRIES_PER_PAGEPAGE/2) %
+                               ENTRIES_PER_PAGEPAGE) / ENTRIES_PER_PAGE;
+                       if (!diroff && !offset && upper_limit >= stage) {
+-                              *dir = NULL;
++                              if (needs_lock) {
++                                      spin_lock(needs_lock);
++                                      *dir = NULL;
++                                      spin_unlock(needs_lock);
++                                      needs_lock = NULL;
++                              } else
++                                      *dir = NULL;
+                               nr_pages_to_free++;
+                               list_add(&middir->lru, &pages_to_free);
+                       }
+@@ -578,8 +614,16 @@ static void shmem_truncate_range(struct 
+                       }
+                       stage = idx + ENTRIES_PER_PAGEPAGE;
+                       middir = *dir;
++                      if (punch_hole)
++                              needs_lock = &info->lock;
+                       if (upper_limit >= stage) {
+-                              *dir = NULL;
++                              if (needs_lock) {
++                                      spin_lock(needs_lock);
++                                      *dir = NULL;
++                                      spin_unlock(needs_lock);
++                                      needs_lock = NULL;
++                              } else
++                                      *dir = NULL;
+                               nr_pages_to_free++;
+                               list_add(&middir->lru, &pages_to_free);
+                       }
+@@ -588,31 +632,37 @@ static void shmem_truncate_range(struct 
+                       dir = shmem_dir_map(middir);
+                       diroff = 0;
+               }
++              punch_lock = needs_lock;
+               subdir = dir[diroff];
+-              if (subdir && page_private(subdir)) {
++              if (subdir && !offset && upper_limit-idx >= ENTRIES_PER_PAGE) {
++                      if (needs_lock) {
++                              spin_lock(needs_lock);
++                              dir[diroff] = NULL;
++                              spin_unlock(needs_lock);
++                              punch_lock = NULL;
++                      } else
++                              dir[diroff] = NULL;
++                      nr_pages_to_free++;
++                      list_add(&subdir->lru, &pages_to_free);
++              }
++              if (subdir && page_private(subdir) /* has swap entries */) {
+                       size = limit - idx;
+                       if (size > ENTRIES_PER_PAGE)
+                               size = ENTRIES_PER_PAGE;
+                       freed = shmem_map_and_free_swp(subdir,
+-                                              offset, size, &dir);
++                                      offset, size, &dir, punch_lock);
+                       if (!dir)
+                               dir = shmem_dir_map(middir);
+                       nr_swaps_freed += freed;
+-                      if (offset)
++                      if (offset || punch_lock) {
+                               spin_lock(&info->lock);
+-                      set_page_private(subdir, page_private(subdir) - freed);
+-                      if (offset)
++                              set_page_private(subdir,
++                                      page_private(subdir) - freed);
+                               spin_unlock(&info->lock);
+-                      if (!punch_hole)
+-                              BUG_ON(page_private(subdir) > offset);
+-              }
+-              if (offset)
+-                      offset = 0;
+-              else if (subdir && upper_limit - idx >= ENTRIES_PER_PAGE) {
+-                      dir[diroff] = NULL;
+-                      nr_pages_to_free++;
+-                      list_add(&subdir->lru, &pages_to_free);
++                      } else
++                              BUG_ON(page_private(subdir) != freed);
+               }
++              offset = 0;
+       }
+ done1:
+       shmem_dir_unmap(dir);
diff --git a/queue-2.6.20/holepunch-fix-shmem_truncate_range-punching-too-far.patch b/queue-2.6.20/holepunch-fix-shmem_truncate_range-punching-too-far.patch

new file mode 100644 (file)

index 0000000..500fc04
--- /dev/null
+++ b/queue-2.6.20/holepunch-fix-shmem_truncate_range-punching-too-far.patch
@@ -0,0 +1,112 @@
+From hugh_dickins@symantec.com Fri Apr 13 10:25:06 2007
+From: Hugh Dickins <hugh@veritas.com>
+Date: Fri, 13 Apr 2007 18:25:00 +0100 (BST)
+Subject: holepunch: fix shmem_truncate_range punching too far
+To: Greg KH <gregkh@suse.de>, Adrian Bunk <bunk@stusta.de>
+Cc: Miklos Szeredi <mszeredi@suse.cz>, stable@kernel.org
+Message-ID: <Pine.LNX.4.64.0704131819410.7279@blonde.wat.veritas.com>
+
+From: Hugh Dickins <hugh@veritas.com>
+
+Miklos Szeredi observes BUG_ON(!entry) in shmem_writepage() triggered
+in rare circumstances, because shmem_truncate_range() erroneously
+removes partially truncated directory pages at the end of the range:
+later reclaim on pages pointing to these removed directories triggers
+the BUG.  Indeed, and it can also cause data loss beyond the hole.
+
+Fix this as in the patch proposed by Miklos, but distinguish between
+"limit" (how far we need to search: ignore truncation's next_index
+optimization in the holepunch case - if there are races it's more
+consistent to act on the whole range specified) and "upper_limit"
+(how far we can free directory pages: generally we must be careful
+to keep partially punched pages, but can relax at end of file -
+i_size being held stable by i_mutex).
+
+Signed-off-by: Hugh Dickins <hugh@veritas.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+
+---
+ mm/shmem.c |   32 +++++++++++++++++++++-----------
+ 1 file changed, 21 insertions(+), 11 deletions(-)
+
+--- a/mm/shmem.c
++++ b/mm/shmem.c
+@@ -481,7 +481,8 @@ static void shmem_truncate_range(struct 
+       long nr_swaps_freed = 0;
+       int offset;
+       int freed;
+-      int punch_hole = 0;
++      int punch_hole;
++      unsigned long upper_limit;
+ 
+       inode->i_ctime = inode->i_mtime = CURRENT_TIME;
+       idx = (start + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+@@ -492,11 +493,18 @@ static void shmem_truncate_range(struct 
+       info->flags |= SHMEM_TRUNCATE;
+       if (likely(end == (loff_t) -1)) {
+               limit = info->next_index;
++              upper_limit = SHMEM_MAX_INDEX;
+               info->next_index = idx;
++              punch_hole = 0;
+       } else {
+-              limit = (end + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+-              if (limit > info->next_index)
+-                      limit = info->next_index;
++              if (end + 1 >= inode->i_size) { /* we may free a little more */
++                      limit = (inode->i_size + PAGE_CACHE_SIZE - 1) >>
++                                                      PAGE_CACHE_SHIFT;
++                      upper_limit = SHMEM_MAX_INDEX;
++              } else {
++                      limit = (end + 1) >> PAGE_CACHE_SHIFT;
++                      upper_limit = limit;
++              }
+               punch_hole = 1;
+       }
+ 
+@@ -520,10 +528,10 @@ static void shmem_truncate_range(struct 
+        * If there are no indirect blocks or we are punching a hole
+        * below indirect blocks, nothing to be done.
+        */
+-      if (!topdir || (punch_hole && (limit <= SHMEM_NR_DIRECT)))
++      if (!topdir || limit <= SHMEM_NR_DIRECT)
+               goto done2;
+ 
+-      BUG_ON(limit <= SHMEM_NR_DIRECT);
++      upper_limit -= SHMEM_NR_DIRECT;
+       limit -= SHMEM_NR_DIRECT;
+       idx = (idx > SHMEM_NR_DIRECT)? (idx - SHMEM_NR_DIRECT): 0;
+       offset = idx % ENTRIES_PER_PAGE;
+@@ -543,7 +551,7 @@ static void shmem_truncate_range(struct 
+               if (*dir) {
+                       diroff = ((idx - ENTRIES_PER_PAGEPAGE/2) %
+                               ENTRIES_PER_PAGEPAGE) / ENTRIES_PER_PAGE;
+-                      if (!diroff && !offset) {
++                      if (!diroff && !offset && upper_limit >= stage) {
+                               *dir = NULL;
+                               nr_pages_to_free++;
+                               list_add(&middir->lru, &pages_to_free);
+@@ -570,9 +578,11 @@ static void shmem_truncate_range(struct 
+                       }
+                       stage = idx + ENTRIES_PER_PAGEPAGE;
+                       middir = *dir;
+-                      *dir = NULL;
+-                      nr_pages_to_free++;
+-                      list_add(&middir->lru, &pages_to_free);
++                      if (upper_limit >= stage) {
++                              *dir = NULL;
++                              nr_pages_to_free++;
++                              list_add(&middir->lru, &pages_to_free);
++                      }
+                       shmem_dir_unmap(dir);
+                       cond_resched();
+                       dir = shmem_dir_map(middir);
+@@ -598,7 +608,7 @@ static void shmem_truncate_range(struct 
+               }
+               if (offset)
+                       offset = 0;
+-              else if (subdir && !page_private(subdir)) {
++              else if (subdir && upper_limit - idx >= ENTRIES_PER_PAGE) {
+                       dir[diroff] = NULL;
+                       nr_pages_to_free++;
+                       list_add(&subdir->lru, &pages_to_free);
diff --git a/queue-2.6.20/ib-mthca-fix-data-corruption-after-fmr-unmap-on-sinai.patch b/queue-2.6.20/ib-mthca-fix-data-corruption-after-fmr-unmap-on-sinai.patch

new file mode 100644 (file)

index 0000000..67f1cca
--- /dev/null
+++ b/queue-2.6.20/ib-mthca-fix-data-corruption-after-fmr-unmap-on-sinai.patch
@@ -0,0 +1,42 @@
+From stable-bounces@linux.kernel.org Mon Apr 16 14:19:25 2007
+From: Roland Dreier <rdreier@cisco.com>
+Date: Mon, 16 Apr 2007 14:17:42 -0700
+Subject: IB/mthca: Fix data corruption after FMR unmap on Sinai
+To: stable@kernel.org
+Cc: mst@mellanox.co.il, general@lists.openfabrics.org
+Message-ID: <adatzvg6mtl.fsf@cisco.com>
+
+From: Michael S. Tsirkin <mst@dev.mellanox.co.il>
+
+In mthca_arbel_fmr_unmap(), the high bits of the key are masked off.
+This gets rid of the effect of adjust_key(), which makes sure that
+bits 3 and 23 of the key are equal when the Sinai throughput
+optimization is enabled, and so it may happen that an FMR will end up
+with bits 3 and 23 in the key being different.  This causes data
+corruption, because when enabling the throughput optimization, the
+driver promises the HCA firmware that bits 3 and 23 of all memory keys
+will always be equal.
+
+Fix by re-applying adjust_key() after masking the key.
+
+Thanks to Or Gerlitz for reproducing the problem, and Ariel Shahar for
+help in debug.
+
+Signed-off-by: Michael S. Tsirkin <mst@dev.mellanox.co.il>
+Signed-off-by: Roland Dreier <rolandd@cisco.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ drivers/infiniband/hw/mthca/mthca_mr.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/infiniband/hw/mthca/mthca_mr.c
++++ b/drivers/infiniband/hw/mthca/mthca_mr.c
+@@ -751,6 +751,7 @@ void mthca_arbel_fmr_unmap(struct mthca_
+ 
+       key = arbel_key_to_hw_index(fmr->ibmr.lkey);
+       key &= dev->limits.num_mpts - 1;
++      key = adjust_key(dev, key);
+       fmr->ibmr.lkey = fmr->ibmr.rkey = arbel_hw_index_to_key(key);
+ 
+       fmr->maps = 0;
diff --git a/queue-2.6.20/knfsd-use-a-spinlock-to-protect-sk_info_authunix.patch b/queue-2.6.20/knfsd-use-a-spinlock-to-protect-sk_info_authunix.patch

new file mode 100644 (file)

index 0000000..2477618
--- /dev/null
+++ b/queue-2.6.20/knfsd-use-a-spinlock-to-protect-sk_info_authunix.patch
@@ -0,0 +1,76 @@
+From stable-bounces@linux.kernel.org Mon Apr 16 19:03:16 2007
+From: NeilBrown <neilb@suse.de>
+Date: Tue, 17 Apr 2007 12:01:41 +1000
+Subject: knfsd: Use a spinlock to protect sk_info_authunix
+To: Andrew Morton <akpm@linux-foundation.org>
+Cc: stable@kernel.org, Gabriel Barazer <gabriel@oxeva.fr>, nfs@lists.sourceforge.net, linux-kernel@vger.kernel.org, Greg Banks <gnb@melbourne.sgi.com>
+Message-ID: <1070417020141.28483@suse.de>
+
+From: NeilBrown <neilb@suse.de>
+
+sk_info_authunix is not being protected properly so the object that
+it points to can be cache_put twice, leading to corruption.
+
+We borrow svsk->sk_defer_lock to provide the protection.  We should probably
+rename that lock to have a more generic name - later.
+
+Thanks to Gabriel for reporting this.
+
+Cc: Greg Banks <gnb@melbourne.sgi.com>
+Cc: Gabriel Barazer <gabriel@oxeva.fr>
+Signed-off-by: Neil Brown <neilb@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ net/sunrpc/svcauth_unix.c |   21 ++++++++++++++++-----
+ 1 file changed, 16 insertions(+), 5 deletions(-)
+
+--- a/net/sunrpc/svcauth_unix.c
++++ b/net/sunrpc/svcauth_unix.c
+@@ -383,7 +383,10 @@ void svcauth_unix_purge(void)
+ static inline struct ip_map *
+ ip_map_cached_get(struct svc_rqst *rqstp)
+ {
+-      struct ip_map *ipm = rqstp->rq_sock->sk_info_authunix;
++      struct ip_map *ipm;
++      struct svc_sock *svsk = rqstp->rq_sock;
++      spin_lock_bh(&svsk->sk_defer_lock);
++      ipm = svsk->sk_info_authunix;
+       if (ipm != NULL) {
+               if (!cache_valid(&ipm->h)) {
+                       /*
+@@ -391,12 +394,14 @@ ip_map_cached_get(struct svc_rqst *rqstp
+                        * remembered, e.g. by a second mount from the
+                        * same IP address.
+                        */
+-                      rqstp->rq_sock->sk_info_authunix = NULL;
++                      svsk->sk_info_authunix = NULL;
++                      spin_unlock_bh(&svsk->sk_defer_lock);
+                       cache_put(&ipm->h, &ip_map_cache);
+                       return NULL;
+               }
+               cache_get(&ipm->h);
+       }
++      spin_unlock_bh(&svsk->sk_defer_lock);
+       return ipm;
+ }
+ 
+@@ -405,9 +410,15 @@ ip_map_cached_put(struct svc_rqst *rqstp
+ {
+       struct svc_sock *svsk = rqstp->rq_sock;
+ 
+-      if (svsk->sk_sock->type == SOCK_STREAM && svsk->sk_info_authunix == NULL)
+-              svsk->sk_info_authunix = ipm;   /* newly cached, keep the reference */
+-      else
++      spin_lock_bh(&svsk->sk_defer_lock);
++      if (svsk->sk_sock->type == SOCK_STREAM &&
++          svsk->sk_info_authunix == NULL) {
++              /* newly cached, keep the reference */
++              svsk->sk_info_authunix = ipm;
++              ipm = NULL;
++      }
++      spin_unlock_bh(&svsk->sk_defer_lock);
++      if (ipm)
+               cache_put(&ipm->h, &ip_map_cache);
+ }
+ 
diff --git a/queue-2.6.20/kvm-mmu-fix-guest-writes-to-nonpae-pde.patch b/queue-2.6.20/kvm-mmu-fix-guest-writes-to-nonpae-pde.patch

new file mode 100644 (file)

index 0000000..a4ff23e
--- /dev/null
+++ b/queue-2.6.20/kvm-mmu-fix-guest-writes-to-nonpae-pde.patch
@@ -0,0 +1,115 @@
+From stable-bounces@linux.kernel.org Sun Apr 22 02:29:31 2007
+From: Avi Kivity <avi@qumranet.com>
+Date: Sun, 22 Apr 2007 12:28:05 +0300
+Subject: KVM: MMU: Fix guest writes to nonpae pde
+To: stable@kernel.org
+Cc: kvm-devel@lists.sourceforge.net, linux-kernel@vger.kernel.org, Avi Kivity <avi@qumranet.com>
+Message-ID: <11772340852200-git-send-email-avi@qumranet.com>
+
+From: Avi Kivity <avi@qumranet.com>
+
+KVM shadow page tables are always in pae mode, regardless of the guest
+setting.  This means that a guest pde (mapping 4MB of memory) is mapped
+to two shadow pdes (mapping 2MB each).
+
+When the guest writes to a pte or pde, we intercept the write and emulate it.
+We also remove any shadowed mappings corresponding to the write.  Since the
+mmu did not account for the doubling in the number of pdes, it removed the
+wrong entry, resulting in a mismatch between shadow page tables and guest
+page tables, followed shortly by guest memory corruption.
+
+This patch fixes the problem by detecting the special case of writing to
+a non-pae pde and adjusting the address and number of shadow pdes zapped
+accordingly.
+
+Acked-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Avi Kivity <avi@qumranet.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ drivers/kvm/mmu.c |   47 +++++++++++++++++++++++++++++++++++------------
+ 1 file changed, 35 insertions(+), 12 deletions(-)
+
+--- a/drivers/kvm/mmu.c
++++ b/drivers/kvm/mmu.c
+@@ -1093,22 +1093,40 @@ out:
+       return r;
+ }
+ 
++static void mmu_pre_write_zap_pte(struct kvm_vcpu *vcpu,
++                                struct kvm_mmu_page *page,
++                                u64 *spte)
++{
++      u64 pte;
++      struct kvm_mmu_page *child;
++
++      pte = *spte;
++      if (is_present_pte(pte)) {
++              if (page->role.level == PT_PAGE_TABLE_LEVEL)
++                      rmap_remove(vcpu, spte);
++              else {
++                      child = page_header(pte & PT64_BASE_ADDR_MASK);
++                      mmu_page_remove_parent_pte(vcpu, child, spte);
++              }
++      }
++      *spte = 0;
++}
++
+ void kvm_mmu_pre_write(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes)
+ {
+       gfn_t gfn = gpa >> PAGE_SHIFT;
+       struct kvm_mmu_page *page;
+-      struct kvm_mmu_page *child;
+       struct hlist_node *node, *n;
+       struct hlist_head *bucket;
+       unsigned index;
+       u64 *spte;
+-      u64 pte;
+       unsigned offset = offset_in_page(gpa);
+       unsigned pte_size;
+       unsigned page_offset;
+       unsigned misaligned;
+       int level;
+       int flooded = 0;
++      int npte;
+ 
+       pgprintk("%s: gpa %llx bytes %d\n", __FUNCTION__, gpa, bytes);
+       if (gfn == vcpu->last_pt_write_gfn) {
+@@ -1144,22 +1162,27 @@ void kvm_mmu_pre_write(struct kvm_vcpu *
+               }
+               page_offset = offset;
+               level = page->role.level;
++              npte = 1;
+               if (page->role.glevels == PT32_ROOT_LEVEL) {
+-                      page_offset <<= 1;          /* 32->64 */
++                      page_offset <<= 1;      /* 32->64 */
++                      /*
++                       * A 32-bit pde maps 4MB while the shadow pdes map
++                       * only 2MB.  So we need to double the offset again
++                       * and zap two pdes instead of one.
++                       */
++                      if (level == PT32_ROOT_LEVEL) {
++                              page_offset &= ~7; /* kill rounding error */
++                              page_offset <<= 1;
++                              npte = 2;
++                      }
+                       page_offset &= ~PAGE_MASK;
+               }
+               spte = __va(page->page_hpa);
+               spte += page_offset / sizeof(*spte);
+-              pte = *spte;
+-              if (is_present_pte(pte)) {
+-                      if (level == PT_PAGE_TABLE_LEVEL)
+-                              rmap_remove(vcpu, spte);
+-                      else {
+-                              child = page_header(pte & PT64_BASE_ADDR_MASK);
+-                              mmu_page_remove_parent_pte(vcpu, child, spte);
+-                      }
++              while (npte--) {
++                      mmu_pre_write_zap_pte(vcpu, page, spte);
++                      ++spte;
+               }
+-              *spte = 0;
+       }
+ }
+ 
diff --git a/queue-2.6.20/kvm-mmu-fix-host-memory-corruption-on-i386-with-4gb-ram.patch b/queue-2.6.20/kvm-mmu-fix-host-memory-corruption-on-i386-with-4gb-ram.patch

new file mode 100644 (file)

index 0000000..9b0cb20
--- /dev/null
+++ b/queue-2.6.20/kvm-mmu-fix-host-memory-corruption-on-i386-with-4gb-ram.patch
@@ -0,0 +1,48 @@
+From stable-bounces@linux.kernel.org Sun Apr 22 02:30:01 2007
+From: Avi Kivity <avi@qumranet.com>
+Date: Sun, 22 Apr 2007 12:28:49 +0300
+Subject: KVM: MMU: Fix host memory corruption on i386 with >= 4GB ram
+To: stable@kernel.org
+Cc: kvm-devel@lists.sourceforge.net, linux-kernel@vger.kernel.org, Avi Kivity <avi@qumranet.com>
+Message-ID: <11772341294121-git-send-email-avi@qumranet.com>
+
+From: Avi Kivity <avi@qumranet.com>
+
+PAGE_MASK is an unsigned long, so using it to mask physical addresses on
+i386 (which are 64-bit wide) leads to truncation.  This can result in
+page->private of unrelated memory pages being modified, with disasterous
+results.
+
+Fix by not using PAGE_MASK for physical addresses; instead calculate
+the correct value directly from PAGE_SIZE.  Also fix a similar BUG_ON().
+
+Acked-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Avi Kivity <avi@qumranet.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ drivers/kvm/mmu.c |    6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/drivers/kvm/mmu.c
++++ b/drivers/kvm/mmu.c
+@@ -131,7 +131,7 @@ static int dbg = 1;
+       (((address) >> PT32_LEVEL_SHIFT(level)) & ((1 << PT32_LEVEL_BITS) - 1))
+ 
+ 
+-#define PT64_BASE_ADDR_MASK (((1ULL << 52) - 1) & PAGE_MASK)
++#define PT64_BASE_ADDR_MASK (((1ULL << 52) - 1) & ~(u64)(PAGE_SIZE-1))
+ #define PT64_DIR_BASE_ADDR_MASK \
+       (PT64_BASE_ADDR_MASK & ~((1ULL << (PAGE_SHIFT + PT64_LEVEL_BITS)) - 1))
+ 
+@@ -406,8 +406,8 @@ static void rmap_write_protect(struct kv
+                       spte = desc->shadow_ptes[0];
+               }
+               BUG_ON(!spte);
+-              BUG_ON((*spte & PT64_BASE_ADDR_MASK) !=
+-                     page_to_pfn(page) << PAGE_SHIFT);
++              BUG_ON((*spte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT
++                     != page_to_pfn(page));
+               BUG_ON(!(*spte & PT_PRESENT_MASK));
+               BUG_ON(!(*spte & PT_WRITABLE_MASK));
+               rmap_printk("rmap_write_protect: spte %p %llx\n", spte, *spte);
diff --git a/queue-2.6.20/series b/queue-2.6.20/series

new file mode 100644 (file)

index 0000000..27177a7
--- /dev/null
+++ b/queue-2.6.20/series
@@ -0,0 +1,10 @@
+knfsd-use-a-spinlock-to-protect-sk_info_authunix.patch
+ib-mthca-fix-data-corruption-after-fmr-unmap-on-sinai.patch
+hid-zeroing-of-bytes-in-output-fields-is-bogus.patch
+kvm-mmu-fix-guest-writes-to-nonpae-pde.patch
+kvm-mmu-fix-host-memory-corruption-on-i386-with-4gb-ram.patch
+holepunch-fix-shmem_truncate_range-punching-too-far.patch
+holepunch-fix-shmem_truncate_range-punch-locking.patch
+holepunch-fix-disconnected-pages-after-second-truncate.patch
+holepunch-fix-mmap_sem-i_mutex-deadlock.patch
+fix-sparc64-sbus-iommu-allocator.patch
author	Greg Kroah-Hartman <gregkh@suse.de>
	Mon, 23 Apr 2007 23:29:15 +0000 (16:29 -0700)
committer	Greg Kroah-Hartman <gregkh@suse.de>
	Mon, 23 Apr 2007 23:29:15 +0000 (16:29 -0700)
queue-2.6.20/fix-sparc64-sbus-iommu-allocator.patch	[new file with mode: 0644]	patch \| blob
queue-2.6.20/hid-zeroing-of-bytes-in-output-fields-is-bogus.patch	[new file with mode: 0644]	patch \| blob
queue-2.6.20/holepunch-fix-disconnected-pages-after-second-truncate.patch	[new file with mode: 0644]	patch \| blob
queue-2.6.20/holepunch-fix-mmap_sem-i_mutex-deadlock.patch	[new file with mode: 0644]	patch \| blob
queue-2.6.20/holepunch-fix-shmem_truncate_range-punch-locking.patch	[new file with mode: 0644]	patch \| blob
queue-2.6.20/holepunch-fix-shmem_truncate_range-punching-too-far.patch	[new file with mode: 0644]	patch \| blob
queue-2.6.20/ib-mthca-fix-data-corruption-after-fmr-unmap-on-sinai.patch	[new file with mode: 0644]	patch \| blob
queue-2.6.20/knfsd-use-a-spinlock-to-protect-sk_info_authunix.patch	[new file with mode: 0644]	patch \| blob
queue-2.6.20/kvm-mmu-fix-guest-writes-to-nonpae-pde.patch	[new file with mode: 0644]	patch \| blob
queue-2.6.20/kvm-mmu-fix-host-memory-corruption-on-i386-with-4gb-ram.patch	[new file with mode: 0644]	patch \| blob
queue-2.6.20/series	[new file with mode: 0644]	patch \| blob