]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
3.4-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Tue, 6 May 2014 22:52:57 +0000 (15:52 -0700)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Tue, 6 May 2014 22:52:57 +0000 (15:52 -0700)
added patches:
b43-fix-machine-check-error-due-to-improper-access-of-b43_mmio_psm_phy_hdr.patch
crypto-ghash-clmulni-intel-use-c-implementation-for-setkey.patch
framebuffer-fix-cfb_copyarea.patch
libata-ahci-accommodate-tag-ordered-controllers.patch
mach64-fix-cursor-when-character-width-is-not-a-multiple-of-8-pixels.patch
mach64-use-unaligned-access.patch
matroxfb-restore-the-registers-m_access-and-m_pitch.patch

queue-3.4/b43-fix-machine-check-error-due-to-improper-access-of-b43_mmio_psm_phy_hdr.patch [new file with mode: 0644]
queue-3.4/crypto-ghash-clmulni-intel-use-c-implementation-for-setkey.patch [new file with mode: 0644]
queue-3.4/framebuffer-fix-cfb_copyarea.patch [new file with mode: 0644]
queue-3.4/libata-ahci-accommodate-tag-ordered-controllers.patch [new file with mode: 0644]
queue-3.4/mach64-fix-cursor-when-character-width-is-not-a-multiple-of-8-pixels.patch [new file with mode: 0644]
queue-3.4/mach64-use-unaligned-access.patch [new file with mode: 0644]
queue-3.4/matroxfb-restore-the-registers-m_access-and-m_pitch.patch [new file with mode: 0644]
queue-3.4/series

diff --git a/queue-3.4/b43-fix-machine-check-error-due-to-improper-access-of-b43_mmio_psm_phy_hdr.patch b/queue-3.4/b43-fix-machine-check-error-due-to-improper-access-of-b43_mmio_psm_phy_hdr.patch
new file mode 100644 (file)
index 0000000..86f3381
--- /dev/null
@@ -0,0 +1,65 @@
+From 12cd43c6ed6da7bf7c5afbd74da6959cda6d056b Mon Sep 17 00:00:00 2001
+From: Rafał Miłecki <zajec5@gmail.com>
+Date: Sat, 5 Apr 2014 18:08:25 +0200
+Subject: b43: Fix machine check error due to improper access of B43_MMIO_PSM_PHY_HDR
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Rafał Miłecki <zajec5@gmail.com>
+
+commit 12cd43c6ed6da7bf7c5afbd74da6959cda6d056b upstream.
+
+Register B43_MMIO_PSM_PHY_HDR is 16 bit one, so accessing it with 32b
+functions isn't safe. On my machine it causes delayed (!) CPU exception:
+
+Disabling lock debugging due to kernel taint
+mce: [Hardware Error]: CPU 0: Machine Check Exception: 4 Bank 4: b200000000070f0f
+mce: [Hardware Error]: TSC 164083803dc
+mce: [Hardware Error]: PROCESSOR 2:20fc2 TIME 1396650505 SOCKET 0 APIC 0 microcode 0
+mce: [Hardware Error]: Run the above through 'mcelog --ascii'
+mce: [Hardware Error]: Machine check: Processor context corrupt
+Kernel panic - not syncing: Fatal machine check on current CPU
+Kernel Offset: 0x0 from 0xffffffff81000000 (relocation range: 0xffffffff80000000-0xffffffff9fffffff)
+
+Signed-off-by: Rafał Miłecki <zajec5@gmail.com>
+Acked-by: Larry Finger <Larry.Finger@lwfinger.net>
+Signed-off-by: John W. Linville <linville@tuxdriver.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/net/wireless/b43/phy_n.c |   14 +++++++-------
+ 1 file changed, 7 insertions(+), 7 deletions(-)
+
+--- a/drivers/net/wireless/b43/phy_n.c
++++ b/drivers/net/wireless/b43/phy_n.c
+@@ -4599,22 +4599,22 @@ static void b43_nphy_channel_setup(struc
+       int ch = new_channel->hw_value;
+       u16 old_band_5ghz;
+-      u32 tmp32;
++      u16 tmp16;
+       old_band_5ghz =
+               b43_phy_read(dev, B43_NPHY_BANDCTL) & B43_NPHY_BANDCTL_5GHZ;
+       if (new_channel->band == IEEE80211_BAND_5GHZ && !old_band_5ghz) {
+-              tmp32 = b43_read32(dev, B43_MMIO_PSM_PHY_HDR);
+-              b43_write32(dev, B43_MMIO_PSM_PHY_HDR, tmp32 | 4);
++              tmp16 = b43_read16(dev, B43_MMIO_PSM_PHY_HDR);
++              b43_write16(dev, B43_MMIO_PSM_PHY_HDR, tmp16 | 4);
+               b43_phy_set(dev, B43_PHY_B_BBCFG, 0xC000);
+-              b43_write32(dev, B43_MMIO_PSM_PHY_HDR, tmp32);
++              b43_write16(dev, B43_MMIO_PSM_PHY_HDR, tmp16);
+               b43_phy_set(dev, B43_NPHY_BANDCTL, B43_NPHY_BANDCTL_5GHZ);
+       } else if (new_channel->band == IEEE80211_BAND_2GHZ && old_band_5ghz) {
+               b43_phy_mask(dev, B43_NPHY_BANDCTL, ~B43_NPHY_BANDCTL_5GHZ);
+-              tmp32 = b43_read32(dev, B43_MMIO_PSM_PHY_HDR);
+-              b43_write32(dev, B43_MMIO_PSM_PHY_HDR, tmp32 | 4);
++              tmp16 = b43_read16(dev, B43_MMIO_PSM_PHY_HDR);
++              b43_write16(dev, B43_MMIO_PSM_PHY_HDR, tmp16 | 4);
+               b43_phy_mask(dev, B43_PHY_B_BBCFG, 0x3FFF);
+-              b43_write32(dev, B43_MMIO_PSM_PHY_HDR, tmp32);
++              b43_write16(dev, B43_MMIO_PSM_PHY_HDR, tmp16);
+       }
+       b43_chantab_phy_upload(dev, e);
diff --git a/queue-3.4/crypto-ghash-clmulni-intel-use-c-implementation-for-setkey.patch b/queue-3.4/crypto-ghash-clmulni-intel-use-c-implementation-for-setkey.patch
new file mode 100644 (file)
index 0000000..9abd16b
--- /dev/null
@@ -0,0 +1,105 @@
+From 8ceee72808d1ae3fb191284afc2257a2be964725 Mon Sep 17 00:00:00 2001
+From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
+Date: Thu, 27 Mar 2014 18:14:40 +0100
+Subject: crypto: ghash-clmulni-intel - use C implementation for setkey()
+
+From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
+
+commit 8ceee72808d1ae3fb191284afc2257a2be964725 upstream.
+
+The GHASH setkey() function uses SSE registers but fails to call
+kernel_fpu_begin()/kernel_fpu_end(). Instead of adding these calls, and
+then having to deal with the restriction that they cannot be called from
+interrupt context, move the setkey() implementation to the C domain.
+
+Note that setkey() does not use any particular SSE features and is not
+expected to become a performance bottleneck.
+
+Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
+Acked-by: H. Peter Anvin <hpa@linux.intel.com>
+Fixes: 0e1227d356e9b (crypto: ghash - Add PCLMULQDQ accelerated implementation)
+Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/crypto/ghash-clmulni-intel_asm.S  |   28 ----------------------------
+ arch/x86/crypto/ghash-clmulni-intel_glue.c |   14 +++++++++++---
+ 2 files changed, 11 insertions(+), 31 deletions(-)
+
+--- a/arch/x86/crypto/ghash-clmulni-intel_asm.S
++++ b/arch/x86/crypto/ghash-clmulni-intel_asm.S
+@@ -24,10 +24,6 @@
+ .align 16
+ .Lbswap_mask:
+       .octa 0x000102030405060708090a0b0c0d0e0f
+-.Lpoly:
+-      .octa 0xc2000000000000000000000000000001
+-.Ltwo_one:
+-      .octa 0x00000001000000000000000000000001
+ #define DATA  %xmm0
+ #define SHASH %xmm1
+@@ -131,27 +127,3 @@ ENTRY(clmul_ghash_update)
+       movups DATA, (%rdi)
+ .Lupdate_just_ret:
+       ret
+-
+-/*
+- * void clmul_ghash_setkey(be128 *shash, const u8 *key);
+- *
+- * Calculate hash_key << 1 mod poly
+- */
+-ENTRY(clmul_ghash_setkey)
+-      movaps .Lbswap_mask, BSWAP
+-      movups (%rsi), %xmm0
+-      PSHUFB_XMM BSWAP %xmm0
+-      movaps %xmm0, %xmm1
+-      psllq $1, %xmm0
+-      psrlq $63, %xmm1
+-      movaps %xmm1, %xmm2
+-      pslldq $8, %xmm1
+-      psrldq $8, %xmm2
+-      por %xmm1, %xmm0
+-      # reduction
+-      pshufd $0b00100100, %xmm2, %xmm1
+-      pcmpeqd .Ltwo_one, %xmm1
+-      pand .Lpoly, %xmm1
+-      pxor %xmm1, %xmm0
+-      movups %xmm0, (%rdi)
+-      ret
+--- a/arch/x86/crypto/ghash-clmulni-intel_glue.c
++++ b/arch/x86/crypto/ghash-clmulni-intel_glue.c
+@@ -30,8 +30,6 @@ void clmul_ghash_mul(char *dst, const be
+ void clmul_ghash_update(char *dst, const char *src, unsigned int srclen,
+                       const be128 *shash);
+-void clmul_ghash_setkey(be128 *shash, const u8 *key);
+-
+ struct ghash_async_ctx {
+       struct cryptd_ahash *cryptd_tfm;
+ };
+@@ -58,13 +56,23 @@ static int ghash_setkey(struct crypto_sh
+                       const u8 *key, unsigned int keylen)
+ {
+       struct ghash_ctx *ctx = crypto_shash_ctx(tfm);
++      be128 *x = (be128 *)key;
++      u64 a, b;
+       if (keylen != GHASH_BLOCK_SIZE) {
+               crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+               return -EINVAL;
+       }
+-      clmul_ghash_setkey(&ctx->shash, key);
++      /* perform multiplication by 'x' in GF(2^128) */
++      a = be64_to_cpu(x->a);
++      b = be64_to_cpu(x->b);
++
++      ctx->shash.a = (__be64)((b << 1) | (a >> 63));
++      ctx->shash.b = (__be64)((a << 1) | (b >> 63));
++
++      if (a >> 63)
++              ctx->shash.b ^= cpu_to_be64(0xc2);
+       return 0;
+ }
diff --git a/queue-3.4/framebuffer-fix-cfb_copyarea.patch b/queue-3.4/framebuffer-fix-cfb_copyarea.patch
new file mode 100644 (file)
index 0000000..3513e07
--- /dev/null
@@ -0,0 +1,400 @@
+From 00a9d699bc85052d2d3ed56251cd928024ce06a3 Mon Sep 17 00:00:00 2001
+From: Mikulas Patocka <mpatocka@redhat.com>
+Date: Thu, 23 Jan 2014 14:39:29 -0500
+Subject: framebuffer: fix cfb_copyarea
+
+From: Mikulas Patocka <mpatocka@redhat.com>
+
+commit 00a9d699bc85052d2d3ed56251cd928024ce06a3 upstream.
+
+The function cfb_copyarea is buggy when the copy operation is not aligned on
+long boundary (4 bytes on 32-bit machines, 8 bytes on 64-bit machines).
+
+How to reproduce:
+- use x86-64 machine
+- use a framebuffer driver without acceleration (for example uvesafb)
+- set the framebuffer to 8-bit depth
+       (for example fbset -a 1024x768-60 -depth 8)
+- load a font with character width that is not a multiple of 8 pixels
+       note: the console-tools package cannot load a font that has
+       width different from 8 pixels. You need to install the packages
+       "kbd" and "console-terminus" and use the program "setfont" to
+       set font width (for example: setfont Uni2-Terminus20x10)
+- move some text left and right on the bash command line and you get a
+       screen corruption
+
+To expose more bugs, put this line to the end of uvesafb_init_info:
+info->flags |= FBINFO_HWACCEL_COPYAREA | FBINFO_READS_FAST;
+- Now framebuffer console will use cfb_copyarea for console scrolling.
+You get a screen corruption when console is scrolled.
+
+This patch is a rewrite of cfb_copyarea. It fixes the bugs, with this
+patch, console scrolling in 8-bit depth with a font width that is not a
+multiple of 8 pixels works fine.
+
+The cfb_copyarea code was very buggy and it looks like it was written
+and never tried with non-8-pixel font.
+
+Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
+Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ti.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/video/cfbcopyarea.c |  153 ++++++++++++++++++++++----------------------
+ 1 file changed, 78 insertions(+), 75 deletions(-)
+
+--- a/drivers/video/cfbcopyarea.c
++++ b/drivers/video/cfbcopyarea.c
+@@ -43,13 +43,22 @@
+      */
+ static void
+-bitcpy(struct fb_info *p, unsigned long __iomem *dst, int dst_idx,
+-              const unsigned long __iomem *src, int src_idx, int bits,
++bitcpy(struct fb_info *p, unsigned long __iomem *dst, unsigned dst_idx,
++              const unsigned long __iomem *src, unsigned src_idx, int bits,
+               unsigned n, u32 bswapmask)
+ {
+       unsigned long first, last;
+       int const shift = dst_idx-src_idx;
+-      int left, right;
++
++#if 0
++      /*
++       * If you suspect bug in this function, compare it with this simple
++       * memmove implementation.
++       */
++      fb_memmove((char *)dst + ((dst_idx & (bits - 1))) / 8,
++                 (char *)src + ((src_idx & (bits - 1))) / 8, n / 8);
++      return;
++#endif
+       first = fb_shifted_pixels_mask_long(p, dst_idx, bswapmask);
+       last = ~fb_shifted_pixels_mask_long(p, (dst_idx+n) % bits, bswapmask);
+@@ -98,9 +107,8 @@ bitcpy(struct fb_info *p, unsigned long
+               unsigned long d0, d1;
+               int m;
+-              right = shift & (bits - 1);
+-              left = -shift & (bits - 1);
+-              bswapmask &= shift;
++              int const left = shift & (bits - 1);
++              int const right = -shift & (bits - 1);
+               if (dst_idx+n <= bits) {
+                       // Single destination word
+@@ -110,15 +118,15 @@ bitcpy(struct fb_info *p, unsigned long
+                       d0 = fb_rev_pixels_in_long(d0, bswapmask);
+                       if (shift > 0) {
+                               // Single source word
+-                              d0 >>= right;
++                              d0 <<= left;
+                       } else if (src_idx+n <= bits) {
+                               // Single source word
+-                              d0 <<= left;
++                              d0 >>= right;
+                       } else {
+                               // 2 source words
+                               d1 = FB_READL(src + 1);
+                               d1 = fb_rev_pixels_in_long(d1, bswapmask);
+-                              d0 = d0<<left | d1>>right;
++                              d0 = d0 >> right | d1 << left;
+                       }
+                       d0 = fb_rev_pixels_in_long(d0, bswapmask);
+                       FB_WRITEL(comp(d0, FB_READL(dst), first), dst);
+@@ -135,60 +143,59 @@ bitcpy(struct fb_info *p, unsigned long
+                       if (shift > 0) {
+                               // Single source word
+                               d1 = d0;
+-                              d0 >>= right;
+-                              dst++;
++                              d0 <<= left;
+                               n -= bits - dst_idx;
+                       } else {
+                               // 2 source words
+                               d1 = FB_READL(src++);
+                               d1 = fb_rev_pixels_in_long(d1, bswapmask);
+-                              d0 = d0<<left | d1>>right;
+-                              dst++;
++                              d0 = d0 >> right | d1 << left;
+                               n -= bits - dst_idx;
+                       }
+                       d0 = fb_rev_pixels_in_long(d0, bswapmask);
+                       FB_WRITEL(comp(d0, FB_READL(dst), first), dst);
+                       d0 = d1;
++                      dst++;
+                       // Main chunk
+                       m = n % bits;
+                       n /= bits;
+                       while ((n >= 4) && !bswapmask) {
+                               d1 = FB_READL(src++);
+-                              FB_WRITEL(d0 << left | d1 >> right, dst++);
++                              FB_WRITEL(d0 >> right | d1 << left, dst++);
+                               d0 = d1;
+                               d1 = FB_READL(src++);
+-                              FB_WRITEL(d0 << left | d1 >> right, dst++);
++                              FB_WRITEL(d0 >> right | d1 << left, dst++);
+                               d0 = d1;
+                               d1 = FB_READL(src++);
+-                              FB_WRITEL(d0 << left | d1 >> right, dst++);
++                              FB_WRITEL(d0 >> right | d1 << left, dst++);
+                               d0 = d1;
+                               d1 = FB_READL(src++);
+-                              FB_WRITEL(d0 << left | d1 >> right, dst++);
++                              FB_WRITEL(d0 >> right | d1 << left, dst++);
+                               d0 = d1;
+                               n -= 4;
+                       }
+                       while (n--) {
+                               d1 = FB_READL(src++);
+                               d1 = fb_rev_pixels_in_long(d1, bswapmask);
+-                              d0 = d0 << left | d1 >> right;
++                              d0 = d0 >> right | d1 << left;
+                               d0 = fb_rev_pixels_in_long(d0, bswapmask);
+                               FB_WRITEL(d0, dst++);
+                               d0 = d1;
+                       }
+                       // Trailing bits
+-                      if (last) {
+-                              if (m <= right) {
++                      if (m) {
++                              if (m <= bits - right) {
+                                       // Single source word
+-                                      d0 <<= left;
++                                      d0 >>= right;
+                               } else {
+                                       // 2 source words
+                                       d1 = FB_READL(src);
+                                       d1 = fb_rev_pixels_in_long(d1,
+                                                               bswapmask);
+-                                      d0 = d0<<left | d1>>right;
++                                      d0 = d0 >> right | d1 << left;
+                               }
+                               d0 = fb_rev_pixels_in_long(d0, bswapmask);
+                               FB_WRITEL(comp(d0, FB_READL(dst), last), dst);
+@@ -202,43 +209,46 @@ bitcpy(struct fb_info *p, unsigned long
+      */
+ static void
+-bitcpy_rev(struct fb_info *p, unsigned long __iomem *dst, int dst_idx,
+-              const unsigned long __iomem *src, int src_idx, int bits,
++bitcpy_rev(struct fb_info *p, unsigned long __iomem *dst, unsigned dst_idx,
++              const unsigned long __iomem *src, unsigned src_idx, int bits,
+               unsigned n, u32 bswapmask)
+ {
+       unsigned long first, last;
+       int shift;
+-      dst += (n-1)/bits;
+-      src += (n-1)/bits;
+-      if ((n-1) % bits) {
+-              dst_idx += (n-1) % bits;
+-              dst += dst_idx >> (ffs(bits) - 1);
+-              dst_idx &= bits - 1;
+-              src_idx += (n-1) % bits;
+-              src += src_idx >> (ffs(bits) - 1);
+-              src_idx &= bits - 1;
+-      }
++#if 0
++      /*
++       * If you suspect bug in this function, compare it with this simple
++       * memmove implementation.
++       */
++      fb_memmove((char *)dst + ((dst_idx & (bits - 1))) / 8,
++                 (char *)src + ((src_idx & (bits - 1))) / 8, n / 8);
++      return;
++#endif
++
++      dst += (dst_idx + n - 1) / bits;
++      src += (src_idx + n - 1) / bits;
++      dst_idx = (dst_idx + n - 1) % bits;
++      src_idx = (src_idx + n - 1) % bits;
+       shift = dst_idx-src_idx;
+-      first = fb_shifted_pixels_mask_long(p, bits - 1 - dst_idx, bswapmask);
+-      last = ~fb_shifted_pixels_mask_long(p, bits - 1 - ((dst_idx-n) % bits),
+-                                          bswapmask);
++      first = ~fb_shifted_pixels_mask_long(p, (dst_idx + 1) % bits, bswapmask);
++      last = fb_shifted_pixels_mask_long(p, (bits + dst_idx + 1 - n) % bits, bswapmask);
+       if (!shift) {
+               // Same alignment for source and dest
+               if ((unsigned long)dst_idx+1 >= n) {
+                       // Single word
+-                      if (last)
+-                              first &= last;
+-                      FB_WRITEL( comp( FB_READL(src), FB_READL(dst), first), dst);
++                      if (first)
++                              last &= first;
++                      FB_WRITEL( comp( FB_READL(src), FB_READL(dst), last), dst);
+               } else {
+                       // Multiple destination words
+                       // Leading bits
+-                      if (first != ~0UL) {
++                      if (first) {
+                               FB_WRITEL( comp( FB_READL(src), FB_READL(dst), first), dst);
+                               dst--;
+                               src--;
+@@ -262,7 +272,7 @@ bitcpy_rev(struct fb_info *p, unsigned l
+                               FB_WRITEL(FB_READL(src--), dst--);
+                       // Trailing bits
+-                      if (last)
++                      if (last != -1UL)
+                               FB_WRITEL( comp( FB_READL(src), FB_READL(dst), last), dst);
+               }
+       } else {
+@@ -270,29 +280,28 @@ bitcpy_rev(struct fb_info *p, unsigned l
+               unsigned long d0, d1;
+               int m;
+-              int const left = -shift & (bits-1);
+-              int const right = shift & (bits-1);
+-              bswapmask &= shift;
++              int const left = shift & (bits-1);
++              int const right = -shift & (bits-1);
+               if ((unsigned long)dst_idx+1 >= n) {
+                       // Single destination word
+-                      if (last)
+-                              first &= last;
++                      if (first)
++                              last &= first;
+                       d0 = FB_READL(src);
+                       if (shift < 0) {
+                               // Single source word
+-                              d0 <<= left;
++                              d0 >>= right;
+                       } else if (1+(unsigned long)src_idx >= n) {
+                               // Single source word
+-                              d0 >>= right;
++                              d0 <<= left;
+                       } else {
+                               // 2 source words
+                               d1 = FB_READL(src - 1);
+                               d1 = fb_rev_pixels_in_long(d1, bswapmask);
+-                              d0 = d0>>right | d1<<left;
++                              d0 = d0 << left | d1 >> right;
+                       }
+                       d0 = fb_rev_pixels_in_long(d0, bswapmask);
+-                      FB_WRITEL(comp(d0, FB_READL(dst), first), dst);
++                      FB_WRITEL(comp(d0, FB_READL(dst), last), dst);
+               } else {
+                       // Multiple destination words
+                       /** We must always remember the last value read, because in case
+@@ -307,12 +316,12 @@ bitcpy_rev(struct fb_info *p, unsigned l
+                       if (shift < 0) {
+                               // Single source word
+                               d1 = d0;
+-                              d0 <<= left;
++                              d0 >>= right;
+                       } else {
+                               // 2 source words
+                               d1 = FB_READL(src--);
+                               d1 = fb_rev_pixels_in_long(d1, bswapmask);
+-                              d0 = d0>>right | d1<<left;
++                              d0 = d0 << left | d1 >> right;
+                       }
+                       d0 = fb_rev_pixels_in_long(d0, bswapmask);
+                       FB_WRITEL(comp(d0, FB_READL(dst), first), dst);
+@@ -325,39 +334,39 @@ bitcpy_rev(struct fb_info *p, unsigned l
+                       n /= bits;
+                       while ((n >= 4) && !bswapmask) {
+                               d1 = FB_READL(src--);
+-                              FB_WRITEL(d0 >> right | d1 << left, dst--);
++                              FB_WRITEL(d0 << left | d1 >> right, dst--);
+                               d0 = d1;
+                               d1 = FB_READL(src--);
+-                              FB_WRITEL(d0 >> right | d1 << left, dst--);
++                              FB_WRITEL(d0 << left | d1 >> right, dst--);
+                               d0 = d1;
+                               d1 = FB_READL(src--);
+-                              FB_WRITEL(d0 >> right | d1 << left, dst--);
++                              FB_WRITEL(d0 << left | d1 >> right, dst--);
+                               d0 = d1;
+                               d1 = FB_READL(src--);
+-                              FB_WRITEL(d0 >> right | d1 << left, dst--);
++                              FB_WRITEL(d0 << left | d1 >> right, dst--);
+                               d0 = d1;
+                               n -= 4;
+                       }
+                       while (n--) {
+                               d1 = FB_READL(src--);
+                               d1 = fb_rev_pixels_in_long(d1, bswapmask);
+-                              d0 = d0 >> right | d1 << left;
++                              d0 = d0 << left | d1 >> right;
+                               d0 = fb_rev_pixels_in_long(d0, bswapmask);
+                               FB_WRITEL(d0, dst--);
+                               d0 = d1;
+                       }
+                       // Trailing bits
+-                      if (last) {
+-                              if (m <= left) {
++                      if (m) {
++                              if (m <= bits - left) {
+                                       // Single source word
+-                                      d0 >>= right;
++                                      d0 <<= left;
+                               } else {
+                                       // 2 source words
+                                       d1 = FB_READL(src);
+                                       d1 = fb_rev_pixels_in_long(d1,
+                                                               bswapmask);
+-                                      d0 = d0>>right | d1<<left;
++                                      d0 = d0 << left | d1 >> right;
+                               }
+                               d0 = fb_rev_pixels_in_long(d0, bswapmask);
+                               FB_WRITEL(comp(d0, FB_READL(dst), last), dst);
+@@ -371,9 +380,9 @@ void cfb_copyarea(struct fb_info *p, con
+       u32 dx = area->dx, dy = area->dy, sx = area->sx, sy = area->sy;
+       u32 height = area->height, width = area->width;
+       unsigned long const bits_per_line = p->fix.line_length*8u;
+-      unsigned long __iomem *dst = NULL, *src = NULL;
++      unsigned long __iomem *base = NULL;
+       int bits = BITS_PER_LONG, bytes = bits >> 3;
+-      int dst_idx = 0, src_idx = 0, rev_copy = 0;
++      unsigned dst_idx = 0, src_idx = 0, rev_copy = 0;
+       u32 bswapmask = fb_compute_bswapmask(p);
+       if (p->state != FBINFO_STATE_RUNNING)
+@@ -389,7 +398,7 @@ void cfb_copyarea(struct fb_info *p, con
+       // split the base of the framebuffer into a long-aligned address and the
+       // index of the first bit
+-      dst = src = (unsigned long __iomem *)((unsigned long)p->screen_base & ~(bytes-1));
++      base = (unsigned long __iomem *)((unsigned long)p->screen_base & ~(bytes-1));
+       dst_idx = src_idx = 8*((unsigned long)p->screen_base & (bytes-1));
+       // add offset of source and target area
+       dst_idx += dy*bits_per_line + dx*p->var.bits_per_pixel;
+@@ -402,20 +411,14 @@ void cfb_copyarea(struct fb_info *p, con
+               while (height--) {
+                       dst_idx -= bits_per_line;
+                       src_idx -= bits_per_line;
+-                      dst += dst_idx >> (ffs(bits) - 1);
+-                      dst_idx &= (bytes - 1);
+-                      src += src_idx >> (ffs(bits) - 1);
+-                      src_idx &= (bytes - 1);
+-                      bitcpy_rev(p, dst, dst_idx, src, src_idx, bits,
++                      bitcpy_rev(p, base + (dst_idx / bits), dst_idx % bits,
++                              base + (src_idx / bits), src_idx % bits, bits,
+                               width*p->var.bits_per_pixel, bswapmask);
+               }
+       } else {
+               while (height--) {
+-                      dst += dst_idx >> (ffs(bits) - 1);
+-                      dst_idx &= (bytes - 1);
+-                      src += src_idx >> (ffs(bits) - 1);
+-                      src_idx &= (bytes - 1);
+-                      bitcpy(p, dst, dst_idx, src, src_idx, bits,
++                      bitcpy(p, base + (dst_idx / bits), dst_idx % bits,
++                              base + (src_idx / bits), src_idx % bits, bits,
+                               width*p->var.bits_per_pixel, bswapmask);
+                       dst_idx += bits_per_line;
+                       src_idx += bits_per_line;
diff --git a/queue-3.4/libata-ahci-accommodate-tag-ordered-controllers.patch b/queue-3.4/libata-ahci-accommodate-tag-ordered-controllers.patch
new file mode 100644 (file)
index 0000000..88dbfdb
--- /dev/null
@@ -0,0 +1,96 @@
+From 8a4aeec8d2d6a3edeffbdfae451cdf05cbf0fefd Mon Sep 17 00:00:00 2001
+From: Dan Williams <dan.j.williams@intel.com>
+Date: Thu, 17 Apr 2014 11:48:21 -0700
+Subject: libata/ahci: accommodate tag ordered controllers
+
+From: Dan Williams <dan.j.williams@intel.com>
+
+commit 8a4aeec8d2d6a3edeffbdfae451cdf05cbf0fefd upstream.
+
+The AHCI spec allows implementations to issue commands in tag order
+rather than FIFO order:
+
+       5.3.2.12 P:SelectCmd
+       HBA sets pSlotLoc = (pSlotLoc + 1) mod (CAP.NCS + 1)
+       or HBA selects the command to issue that has had the
+       PxCI bit set to '1' longer than any other command
+       pending to be issued.
+
+The result is that commands posted sequentially (time-wise) may play out
+of sequence when issued by hardware.
+
+This behavior has likely been hidden by drives that arrange for commands
+to complete in issue order.  However, it appears recent drives (two from
+different vendors that we have found so far) inflict out-of-order
+completions as a matter of course.  So, we need to take care to maintain
+ordered submission, otherwise we risk triggering a drive to fall out of
+sequential-io automation and back to random-io processing, which incurs
+large latency and degrades throughput.
+
+This issue was found in simple benchmarks where QD=2 seq-write
+performance was 30-50% *greater* than QD=32 seq-write performance.
+
+Tagging for -stable and making the change globally since it has a low
+risk-to-reward ratio.  Also, word is that recent versions of an unnamed
+OS also does it this way now.  So, drives in the field are already
+experienced with this tag ordering scheme.
+
+Cc: Dave Jiang <dave.jiang@intel.com>
+Cc: Ed Ciechanowski <ed.ciechanowski@intel.com>
+Reviewed-by: Matthew Wilcox <matthew.r.wilcox@intel.com>
+Signed-off-by: Dan Williams <dan.j.williams@intel.com>
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/ata/libata-core.c |   21 +++++++++++++--------
+ include/linux/libata.h    |    1 +
+ 2 files changed, 14 insertions(+), 8 deletions(-)
+
+--- a/drivers/ata/libata-core.c
++++ b/drivers/ata/libata-core.c
+@@ -4700,21 +4700,26 @@ void swap_buf_le16(u16 *buf, unsigned in
+ static struct ata_queued_cmd *ata_qc_new(struct ata_port *ap)
+ {
+       struct ata_queued_cmd *qc = NULL;
+-      unsigned int i;
++      unsigned int i, tag;
+       /* no command while frozen */
+       if (unlikely(ap->pflags & ATA_PFLAG_FROZEN))
+               return NULL;
+-      /* the last tag is reserved for internal command. */
+-      for (i = 0; i < ATA_MAX_QUEUE - 1; i++)
+-              if (!test_and_set_bit(i, &ap->qc_allocated)) {
+-                      qc = __ata_qc_from_tag(ap, i);
++      for (i = 0; i < ATA_MAX_QUEUE; i++) {
++              tag = (i + ap->last_tag + 1) % ATA_MAX_QUEUE;
++
++              /* the last tag is reserved for internal command. */
++              if (tag == ATA_TAG_INTERNAL)
++                      continue;
++
++              if (!test_and_set_bit(tag, &ap->qc_allocated)) {
++                      qc = __ata_qc_from_tag(ap, tag);
++                      qc->tag = tag;
++                      ap->last_tag = tag;
+                       break;
+               }
+-
+-      if (qc)
+-              qc->tag = i;
++      }
+       return qc;
+ }
+--- a/include/linux/libata.h
++++ b/include/linux/libata.h
+@@ -762,6 +762,7 @@ struct ata_port {
+       unsigned long           qc_allocated;
+       unsigned int            qc_active;
+       int                     nr_active_links; /* #links with active qcs */
++      unsigned int            last_tag;       /* track next tag hw expects */
+       struct ata_link         link;           /* host default link */
+       struct ata_link         *slave_link;    /* see ata_slave_link_init() */
diff --git a/queue-3.4/mach64-fix-cursor-when-character-width-is-not-a-multiple-of-8-pixels.patch b/queue-3.4/mach64-fix-cursor-when-character-width-is-not-a-multiple-of-8-pixels.patch
new file mode 100644 (file)
index 0000000..3899435
--- /dev/null
@@ -0,0 +1,88 @@
+From 43751a1b8ee2e70ce392bf31ef3133da324e68b3 Mon Sep 17 00:00:00 2001
+From: Mikulas Patocka <mpatocka@redhat.com>
+Date: Thu, 23 Jan 2014 14:41:59 -0500
+Subject: mach64: fix cursor when character width is not a multiple of 8 pixels
+
+From: Mikulas Patocka <mpatocka@redhat.com>
+
+commit 43751a1b8ee2e70ce392bf31ef3133da324e68b3 upstream.
+
+This patch fixes the hardware cursor on mach64 when font width is not a
+multiple of 8 pixels.
+
+If you load such a font, the cursor is expanded to the next 8-byte
+boundary and a part of the next character after the cursor is not
+visible.
+For example, when you load a font with 12-pixel width, the cursor width
+is 16 pixels and when the cursor is displayed, 4 pixels of the next
+character are not visible.
+
+The reason is this: atyfb_cursor is called with proper parameters to
+load an image that is 12-pixel wide. However, the number is aligned on
+the next 8-pixel boundary on the line
+"unsigned int width = (cursor->image.width + 7) >> 3;" and the whole
+function acts as it is was loading a 16-pixel image.
+
+This patch fixes it so that the value written to the framebuffer is
+padded with 0xaaaa (the transparent pattern) when the image size it not
+a multiple of 8 pixels. The transparent pattern causes that the cursor
+will not interfere with the next character.
+
+Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
+Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ti.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/video/aty/mach64_cursor.c |   22 ++++++++++++++++------
+ 1 file changed, 16 insertions(+), 6 deletions(-)
+
+--- a/drivers/video/aty/mach64_cursor.c
++++ b/drivers/video/aty/mach64_cursor.c
+@@ -5,6 +5,7 @@
+ #include <linux/fb.h>
+ #include <linux/init.h>
+ #include <linux/string.h>
++#include "../fb_draw.h"
+ #include <asm/io.h>
+@@ -157,24 +158,33 @@ static int atyfb_cursor(struct fb_info *
+           for (i = 0; i < height; i++) {
+               for (j = 0; j < width; j++) {
++                      u16 l = 0xaaaa;
+                       b = *src++;
+                       m = *msk++;
+                       switch (cursor->rop) {
+                       case ROP_XOR:
+                           // Upper 4 bits of mask data
+-                          fb_writeb(cursor_bits_lookup[(b ^ m) >> 4], dst++);
++                          l = cursor_bits_lookup[(b ^ m) >> 4] |
+                           // Lower 4 bits of mask
+-                          fb_writeb(cursor_bits_lookup[(b ^ m) & 0x0f],
+-                                    dst++);
++                                  (cursor_bits_lookup[(b ^ m) & 0x0f] << 8);
+                           break;
+                       case ROP_COPY:
+                           // Upper 4 bits of mask data
+-                          fb_writeb(cursor_bits_lookup[(b & m) >> 4], dst++);
++                          l = cursor_bits_lookup[(b & m) >> 4] |
+                           // Lower 4 bits of mask
+-                          fb_writeb(cursor_bits_lookup[(b & m) & 0x0f],
+-                                    dst++);
++                                  (cursor_bits_lookup[(b & m) & 0x0f] << 8);
+                           break;
+                       }
++                      /*
++                       * If cursor size is not a multiple of 8 characters
++                       * we must pad it with transparent pattern (0xaaaa).
++                       */
++                      if ((j + 1) * 8 > cursor->image.width) {
++                              l = comp(l, 0xaaaa,
++                                  (1 << ((cursor->image.width & 7) * 2)) - 1);
++                      }
++                      fb_writeb(l & 0xff, dst++);
++                      fb_writeb(l >> 8, dst++);
+               }
+               dst += offset;
+           }
diff --git a/queue-3.4/mach64-use-unaligned-access.patch b/queue-3.4/mach64-use-unaligned-access.patch
new file mode 100644 (file)
index 0000000..123bf72
--- /dev/null
@@ -0,0 +1,46 @@
+From c29dd8696dc5dbd50b3ac441b8a26751277ba520 Mon Sep 17 00:00:00 2001
+From: Mikulas Patocka <mpatocka@redhat.com>
+Date: Thu, 23 Jan 2014 14:41:09 -0500
+Subject: mach64: use unaligned access
+
+From: Mikulas Patocka <mpatocka@redhat.com>
+
+commit c29dd8696dc5dbd50b3ac441b8a26751277ba520 upstream.
+
+This patch fixes mach64 to use unaligned access to the font bitmap.
+
+This fixes unaligned access warning on sparc64 when 14x8 font is loaded.
+
+On x86(64), unaligned access is handled in hardware, so both functions
+le32_to_cpup and get_unaligned_le32 perform the same operation.
+
+On RISC machines, unaligned access is not handled in hardware, so we
+better use get_unaligned_le32 to avoid the unaligned trap and warning.
+
+Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
+Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ti.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/video/aty/mach64_accel.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/drivers/video/aty/mach64_accel.c
++++ b/drivers/video/aty/mach64_accel.c
+@@ -4,6 +4,7 @@
+  */
+ #include <linux/delay.h>
++#include <asm/unaligned.h>
+ #include <linux/fb.h>
+ #include <video/mach64.h>
+ #include "atyfb.h"
+@@ -419,7 +420,7 @@ void atyfb_imageblit(struct fb_info *inf
+               u32 *pbitmap, dwords = (src_bytes + 3) / 4;
+               for (pbitmap = (u32*)(image->data); dwords; dwords--, pbitmap++) {
+                       wait_for_fifo(1, par);
+-                      aty_st_le32(HOST_DATA0, le32_to_cpup(pbitmap), par);
++                      aty_st_le32(HOST_DATA0, get_unaligned_le32(pbitmap), par);
+               }
+       }
diff --git a/queue-3.4/matroxfb-restore-the-registers-m_access-and-m_pitch.patch b/queue-3.4/matroxfb-restore-the-registers-m_access-and-m_pitch.patch
new file mode 100644 (file)
index 0000000..384d1c6
--- /dev/null
@@ -0,0 +1,157 @@
+From a772d4736641ec1b421ad965e13457c17379fc86 Mon Sep 17 00:00:00 2001
+From: Mikulas Patocka <mpatocka@redhat.com>
+Date: Thu, 23 Jan 2014 14:39:04 -0500
+Subject: matroxfb: restore the registers M_ACCESS and M_PITCH
+
+From: Mikulas Patocka <mpatocka@redhat.com>
+
+commit a772d4736641ec1b421ad965e13457c17379fc86 upstream.
+
+When X11 is running and the user switches back to console, the card
+modifies the content of registers M_MACCESS and M_PITCH in periodic
+intervals.
+
+This patch fixes it by restoring the content of these registers before
+issuing any accelerator command.
+
+Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
+Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ti.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/video/matrox/matroxfb_accel.c |   38 +++++++++++++++++++++++++---------
+ drivers/video/matrox/matroxfb_base.h  |    2 +
+ 2 files changed, 30 insertions(+), 10 deletions(-)
+
+--- a/drivers/video/matrox/matroxfb_accel.c
++++ b/drivers/video/matrox/matroxfb_accel.c
+@@ -192,10 +192,18 @@ void matrox_cfbX_init(struct matrox_fb_i
+       minfo->accel.m_dwg_rect = M_DWG_TRAP | M_DWG_SOLID | M_DWG_ARZERO | M_DWG_SGNZERO | M_DWG_SHIFTZERO;
+       if (isMilleniumII(minfo)) minfo->accel.m_dwg_rect |= M_DWG_TRANSC;
+       minfo->accel.m_opmode = mopmode;
++      minfo->accel.m_access = maccess;
++      minfo->accel.m_pitch = mpitch;
+ }
+ EXPORT_SYMBOL(matrox_cfbX_init);
++static void matrox_accel_restore_maccess(struct matrox_fb_info *minfo)
++{
++      mga_outl(M_MACCESS, minfo->accel.m_access);
++      mga_outl(M_PITCH, minfo->accel.m_pitch);
++}
++
+ static void matrox_accel_bmove(struct matrox_fb_info *minfo, int vxres, int sy,
+                              int sx, int dy, int dx, int height, int width)
+ {
+@@ -207,7 +215,8 @@ static void matrox_accel_bmove(struct ma
+       CRITBEGIN
+       if ((dy < sy) || ((dy == sy) && (dx <= sx))) {
+-              mga_fifo(2);
++              mga_fifo(4);
++              matrox_accel_restore_maccess(minfo);
+               mga_outl(M_DWGCTL, M_DWG_BITBLT | M_DWG_SHIFTZERO | M_DWG_SGNZERO |
+                        M_DWG_BFCOL | M_DWG_REPLACE);
+               mga_outl(M_AR5, vxres);
+@@ -215,7 +224,8 @@ static void matrox_accel_bmove(struct ma
+               start = sy*vxres+sx+curr_ydstorg(minfo);
+               end = start+width;
+       } else {
+-              mga_fifo(3);
++              mga_fifo(5);
++              matrox_accel_restore_maccess(minfo);
+               mga_outl(M_DWGCTL, M_DWG_BITBLT | M_DWG_SHIFTZERO | M_DWG_BFCOL | M_DWG_REPLACE);
+               mga_outl(M_SGN, 5);
+               mga_outl(M_AR5, -vxres);
+@@ -224,7 +234,8 @@ static void matrox_accel_bmove(struct ma
+               start = end+width;
+               dy += height-1;
+       }
+-      mga_fifo(4);
++      mga_fifo(6);
++      matrox_accel_restore_maccess(minfo);
+       mga_outl(M_AR0, end);
+       mga_outl(M_AR3, start);
+       mga_outl(M_FXBNDRY, ((dx+width)<<16) | dx);
+@@ -246,7 +257,8 @@ static void matrox_accel_bmove_lin(struc
+       CRITBEGIN
+       if ((dy < sy) || ((dy == sy) && (dx <= sx))) {
+-              mga_fifo(2);
++              mga_fifo(4);
++              matrox_accel_restore_maccess(minfo);
+               mga_outl(M_DWGCTL, M_DWG_BITBLT | M_DWG_SHIFTZERO | M_DWG_SGNZERO |
+                       M_DWG_BFCOL | M_DWG_REPLACE);
+               mga_outl(M_AR5, vxres);
+@@ -254,7 +266,8 @@ static void matrox_accel_bmove_lin(struc
+               start = sy*vxres+sx+curr_ydstorg(minfo);
+               end = start+width;
+       } else {
+-              mga_fifo(3);
++              mga_fifo(5);
++              matrox_accel_restore_maccess(minfo);
+               mga_outl(M_DWGCTL, M_DWG_BITBLT | M_DWG_SHIFTZERO | M_DWG_BFCOL | M_DWG_REPLACE);
+               mga_outl(M_SGN, 5);
+               mga_outl(M_AR5, -vxres);
+@@ -263,7 +276,8 @@ static void matrox_accel_bmove_lin(struc
+               start = end+width;
+               dy += height-1;
+       }
+-      mga_fifo(5);
++      mga_fifo(7);
++      matrox_accel_restore_maccess(minfo);
+       mga_outl(M_AR0, end);
+       mga_outl(M_AR3, start);
+       mga_outl(M_FXBNDRY, ((dx+width)<<16) | dx);
+@@ -298,7 +312,8 @@ static void matroxfb_accel_clear(struct
+       CRITBEGIN
+-      mga_fifo(5);
++      mga_fifo(7);
++      matrox_accel_restore_maccess(minfo);
+       mga_outl(M_DWGCTL, minfo->accel.m_dwg_rect | M_DWG_REPLACE);
+       mga_outl(M_FCOL, color);
+       mga_outl(M_FXBNDRY, ((sx + width) << 16) | sx);
+@@ -341,7 +356,8 @@ static void matroxfb_cfb4_clear(struct m
+       width >>= 1;
+       sx >>= 1;
+       if (width) {
+-              mga_fifo(5);
++              mga_fifo(7);
++              matrox_accel_restore_maccess(minfo);
+               mga_outl(M_DWGCTL, minfo->accel.m_dwg_rect | M_DWG_REPLACE2);
+               mga_outl(M_FCOL, bgx);
+               mga_outl(M_FXBNDRY, ((sx + width) << 16) | sx);
+@@ -415,7 +431,8 @@ static void matroxfb_1bpp_imageblit(stru
+       CRITBEGIN
+-      mga_fifo(3);
++      mga_fifo(5);
++      matrox_accel_restore_maccess(minfo);
+       if (easy)
+               mga_outl(M_DWGCTL, M_DWG_ILOAD | M_DWG_SGNZERO | M_DWG_SHIFTZERO | M_DWG_BMONOWF | M_DWG_LINEAR | M_DWG_REPLACE);
+       else
+@@ -425,7 +442,8 @@ static void matroxfb_1bpp_imageblit(stru
+       fxbndry = ((xx + width - 1) << 16) | xx;
+       mmio = minfo->mmio.vbase;
+-      mga_fifo(6);
++      mga_fifo(8);
++      matrox_accel_restore_maccess(minfo);
+       mga_writel(mmio, M_FXBNDRY, fxbndry);
+       mga_writel(mmio, M_AR0, ar0);
+       mga_writel(mmio, M_AR3, 0);
+--- a/drivers/video/matrox/matroxfb_base.h
++++ b/drivers/video/matrox/matroxfb_base.h
+@@ -307,6 +307,8 @@ struct matrox_accel_data {
+ #endif
+       u_int32_t       m_dwg_rect;
+       u_int32_t       m_opmode;
++      u_int32_t       m_access;
++      u_int32_t       m_pitch;
+ };
+ struct v4l2_queryctrl;
index a70eaca99b84ca48ab5dcd9f83a6e7701cec9e31..cb1bd6b0cba63dd9b42c57dc715ec6ddf729f128 100644 (file)
@@ -4,3 +4,10 @@ floppy-don-t-write-kernel-only-members-to-fdrawcmd-ioctl-output.patch
 mips-hibernate-flush-tlb-entries-in-swsusp_arch_resume.patch
 virtio_balloon-don-t-softlockup-on-huge-balloon-changes.patch
 mpt2sas-don-t-disable-device-twice-at-suspend.patch
+crypto-ghash-clmulni-intel-use-c-implementation-for-setkey.patch
+framebuffer-fix-cfb_copyarea.patch
+matroxfb-restore-the-registers-m_access-and-m_pitch.patch
+mach64-use-unaligned-access.patch
+mach64-fix-cursor-when-character-width-is-not-a-multiple-of-8-pixels.patch
+b43-fix-machine-check-error-due-to-improper-access-of-b43_mmio_psm_phy_hdr.patch
+libata-ahci-accommodate-tag-ordered-controllers.patch