From b85de3fa436071bf8313c8f585fcf6af42f4fbf0 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 6 May 2014 15:52:57 -0700 Subject: [PATCH] 3.4-stable patches added patches: b43-fix-machine-check-error-due-to-improper-access-of-b43_mmio_psm_phy_hdr.patch crypto-ghash-clmulni-intel-use-c-implementation-for-setkey.patch framebuffer-fix-cfb_copyarea.patch libata-ahci-accommodate-tag-ordered-controllers.patch mach64-fix-cursor-when-character-width-is-not-a-multiple-of-8-pixels.patch mach64-use-unaligned-access.patch matroxfb-restore-the-registers-m_access-and-m_pitch.patch --- ...roper-access-of-b43_mmio_psm_phy_hdr.patch | 65 +++ ...ntel-use-c-implementation-for-setkey.patch | 105 +++++ queue-3.4/framebuffer-fix-cfb_copyarea.patch | 400 ++++++++++++++++++ ...-accommodate-tag-ordered-controllers.patch | 96 +++++ ...-width-is-not-a-multiple-of-8-pixels.patch | 88 ++++ queue-3.4/mach64-use-unaligned-access.patch | 46 ++ ...e-the-registers-m_access-and-m_pitch.patch | 157 +++++++ queue-3.4/series | 7 + 8 files changed, 964 insertions(+) create mode 100644 queue-3.4/b43-fix-machine-check-error-due-to-improper-access-of-b43_mmio_psm_phy_hdr.patch create mode 100644 queue-3.4/crypto-ghash-clmulni-intel-use-c-implementation-for-setkey.patch create mode 100644 queue-3.4/framebuffer-fix-cfb_copyarea.patch create mode 100644 queue-3.4/libata-ahci-accommodate-tag-ordered-controllers.patch create mode 100644 queue-3.4/mach64-fix-cursor-when-character-width-is-not-a-multiple-of-8-pixels.patch create mode 100644 queue-3.4/mach64-use-unaligned-access.patch create mode 100644 queue-3.4/matroxfb-restore-the-registers-m_access-and-m_pitch.patch diff --git a/queue-3.4/b43-fix-machine-check-error-due-to-improper-access-of-b43_mmio_psm_phy_hdr.patch b/queue-3.4/b43-fix-machine-check-error-due-to-improper-access-of-b43_mmio_psm_phy_hdr.patch new file mode 100644 index 00000000000..86f3381c5a7 --- /dev/null +++ b/queue-3.4/b43-fix-machine-check-error-due-to-improper-access-of-b43_mmio_psm_phy_hdr.patch @@ -0,0 +1,65 @@ +From 12cd43c6ed6da7bf7c5afbd74da6959cda6d056b Mon Sep 17 00:00:00 2001 +From: Rafał Miłecki +Date: Sat, 5 Apr 2014 18:08:25 +0200 +Subject: b43: Fix machine check error due to improper access of B43_MMIO_PSM_PHY_HDR +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Rafał Miłecki + +commit 12cd43c6ed6da7bf7c5afbd74da6959cda6d056b upstream. + +Register B43_MMIO_PSM_PHY_HDR is 16 bit one, so accessing it with 32b +functions isn't safe. On my machine it causes delayed (!) CPU exception: + +Disabling lock debugging due to kernel taint +mce: [Hardware Error]: CPU 0: Machine Check Exception: 4 Bank 4: b200000000070f0f +mce: [Hardware Error]: TSC 164083803dc +mce: [Hardware Error]: PROCESSOR 2:20fc2 TIME 1396650505 SOCKET 0 APIC 0 microcode 0 +mce: [Hardware Error]: Run the above through 'mcelog --ascii' +mce: [Hardware Error]: Machine check: Processor context corrupt +Kernel panic - not syncing: Fatal machine check on current CPU +Kernel Offset: 0x0 from 0xffffffff81000000 (relocation range: 0xffffffff80000000-0xffffffff9fffffff) + +Signed-off-by: Rafał Miłecki +Acked-by: Larry Finger +Signed-off-by: John W. Linville +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/net/wireless/b43/phy_n.c | 14 +++++++------- + 1 file changed, 7 insertions(+), 7 deletions(-) + +--- a/drivers/net/wireless/b43/phy_n.c ++++ b/drivers/net/wireless/b43/phy_n.c +@@ -4599,22 +4599,22 @@ static void b43_nphy_channel_setup(struc + int ch = new_channel->hw_value; + + u16 old_band_5ghz; +- u32 tmp32; ++ u16 tmp16; + + old_band_5ghz = + b43_phy_read(dev, B43_NPHY_BANDCTL) & B43_NPHY_BANDCTL_5GHZ; + if (new_channel->band == IEEE80211_BAND_5GHZ && !old_band_5ghz) { +- tmp32 = b43_read32(dev, B43_MMIO_PSM_PHY_HDR); +- b43_write32(dev, B43_MMIO_PSM_PHY_HDR, tmp32 | 4); ++ tmp16 = b43_read16(dev, B43_MMIO_PSM_PHY_HDR); ++ b43_write16(dev, B43_MMIO_PSM_PHY_HDR, tmp16 | 4); + b43_phy_set(dev, B43_PHY_B_BBCFG, 0xC000); +- b43_write32(dev, B43_MMIO_PSM_PHY_HDR, tmp32); ++ b43_write16(dev, B43_MMIO_PSM_PHY_HDR, tmp16); + b43_phy_set(dev, B43_NPHY_BANDCTL, B43_NPHY_BANDCTL_5GHZ); + } else if (new_channel->band == IEEE80211_BAND_2GHZ && old_band_5ghz) { + b43_phy_mask(dev, B43_NPHY_BANDCTL, ~B43_NPHY_BANDCTL_5GHZ); +- tmp32 = b43_read32(dev, B43_MMIO_PSM_PHY_HDR); +- b43_write32(dev, B43_MMIO_PSM_PHY_HDR, tmp32 | 4); ++ tmp16 = b43_read16(dev, B43_MMIO_PSM_PHY_HDR); ++ b43_write16(dev, B43_MMIO_PSM_PHY_HDR, tmp16 | 4); + b43_phy_mask(dev, B43_PHY_B_BBCFG, 0x3FFF); +- b43_write32(dev, B43_MMIO_PSM_PHY_HDR, tmp32); ++ b43_write16(dev, B43_MMIO_PSM_PHY_HDR, tmp16); + } + + b43_chantab_phy_upload(dev, e); diff --git a/queue-3.4/crypto-ghash-clmulni-intel-use-c-implementation-for-setkey.patch b/queue-3.4/crypto-ghash-clmulni-intel-use-c-implementation-for-setkey.patch new file mode 100644 index 00000000000..9abd16b5f22 --- /dev/null +++ b/queue-3.4/crypto-ghash-clmulni-intel-use-c-implementation-for-setkey.patch @@ -0,0 +1,105 @@ +From 8ceee72808d1ae3fb191284afc2257a2be964725 Mon Sep 17 00:00:00 2001 +From: Ard Biesheuvel +Date: Thu, 27 Mar 2014 18:14:40 +0100 +Subject: crypto: ghash-clmulni-intel - use C implementation for setkey() + +From: Ard Biesheuvel + +commit 8ceee72808d1ae3fb191284afc2257a2be964725 upstream. + +The GHASH setkey() function uses SSE registers but fails to call +kernel_fpu_begin()/kernel_fpu_end(). Instead of adding these calls, and +then having to deal with the restriction that they cannot be called from +interrupt context, move the setkey() implementation to the C domain. + +Note that setkey() does not use any particular SSE features and is not +expected to become a performance bottleneck. + +Signed-off-by: Ard Biesheuvel +Acked-by: H. Peter Anvin +Fixes: 0e1227d356e9b (crypto: ghash - Add PCLMULQDQ accelerated implementation) +Signed-off-by: Herbert Xu +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/crypto/ghash-clmulni-intel_asm.S | 28 ---------------------------- + arch/x86/crypto/ghash-clmulni-intel_glue.c | 14 +++++++++++--- + 2 files changed, 11 insertions(+), 31 deletions(-) + +--- a/arch/x86/crypto/ghash-clmulni-intel_asm.S ++++ b/arch/x86/crypto/ghash-clmulni-intel_asm.S +@@ -24,10 +24,6 @@ + .align 16 + .Lbswap_mask: + .octa 0x000102030405060708090a0b0c0d0e0f +-.Lpoly: +- .octa 0xc2000000000000000000000000000001 +-.Ltwo_one: +- .octa 0x00000001000000000000000000000001 + + #define DATA %xmm0 + #define SHASH %xmm1 +@@ -131,27 +127,3 @@ ENTRY(clmul_ghash_update) + movups DATA, (%rdi) + .Lupdate_just_ret: + ret +- +-/* +- * void clmul_ghash_setkey(be128 *shash, const u8 *key); +- * +- * Calculate hash_key << 1 mod poly +- */ +-ENTRY(clmul_ghash_setkey) +- movaps .Lbswap_mask, BSWAP +- movups (%rsi), %xmm0 +- PSHUFB_XMM BSWAP %xmm0 +- movaps %xmm0, %xmm1 +- psllq $1, %xmm0 +- psrlq $63, %xmm1 +- movaps %xmm1, %xmm2 +- pslldq $8, %xmm1 +- psrldq $8, %xmm2 +- por %xmm1, %xmm0 +- # reduction +- pshufd $0b00100100, %xmm2, %xmm1 +- pcmpeqd .Ltwo_one, %xmm1 +- pand .Lpoly, %xmm1 +- pxor %xmm1, %xmm0 +- movups %xmm0, (%rdi) +- ret +--- a/arch/x86/crypto/ghash-clmulni-intel_glue.c ++++ b/arch/x86/crypto/ghash-clmulni-intel_glue.c +@@ -30,8 +30,6 @@ void clmul_ghash_mul(char *dst, const be + void clmul_ghash_update(char *dst, const char *src, unsigned int srclen, + const be128 *shash); + +-void clmul_ghash_setkey(be128 *shash, const u8 *key); +- + struct ghash_async_ctx { + struct cryptd_ahash *cryptd_tfm; + }; +@@ -58,13 +56,23 @@ static int ghash_setkey(struct crypto_sh + const u8 *key, unsigned int keylen) + { + struct ghash_ctx *ctx = crypto_shash_ctx(tfm); ++ be128 *x = (be128 *)key; ++ u64 a, b; + + if (keylen != GHASH_BLOCK_SIZE) { + crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); + return -EINVAL; + } + +- clmul_ghash_setkey(&ctx->shash, key); ++ /* perform multiplication by 'x' in GF(2^128) */ ++ a = be64_to_cpu(x->a); ++ b = be64_to_cpu(x->b); ++ ++ ctx->shash.a = (__be64)((b << 1) | (a >> 63)); ++ ctx->shash.b = (__be64)((a << 1) | (b >> 63)); ++ ++ if (a >> 63) ++ ctx->shash.b ^= cpu_to_be64(0xc2); + + return 0; + } diff --git a/queue-3.4/framebuffer-fix-cfb_copyarea.patch b/queue-3.4/framebuffer-fix-cfb_copyarea.patch new file mode 100644 index 00000000000..3513e07dec5 --- /dev/null +++ b/queue-3.4/framebuffer-fix-cfb_copyarea.patch @@ -0,0 +1,400 @@ +From 00a9d699bc85052d2d3ed56251cd928024ce06a3 Mon Sep 17 00:00:00 2001 +From: Mikulas Patocka +Date: Thu, 23 Jan 2014 14:39:29 -0500 +Subject: framebuffer: fix cfb_copyarea + +From: Mikulas Patocka + +commit 00a9d699bc85052d2d3ed56251cd928024ce06a3 upstream. + +The function cfb_copyarea is buggy when the copy operation is not aligned on +long boundary (4 bytes on 32-bit machines, 8 bytes on 64-bit machines). + +How to reproduce: +- use x86-64 machine +- use a framebuffer driver without acceleration (for example uvesafb) +- set the framebuffer to 8-bit depth + (for example fbset -a 1024x768-60 -depth 8) +- load a font with character width that is not a multiple of 8 pixels + note: the console-tools package cannot load a font that has + width different from 8 pixels. You need to install the packages + "kbd" and "console-terminus" and use the program "setfont" to + set font width (for example: setfont Uni2-Terminus20x10) +- move some text left and right on the bash command line and you get a + screen corruption + +To expose more bugs, put this line to the end of uvesafb_init_info: +info->flags |= FBINFO_HWACCEL_COPYAREA | FBINFO_READS_FAST; +- Now framebuffer console will use cfb_copyarea for console scrolling. +You get a screen corruption when console is scrolled. + +This patch is a rewrite of cfb_copyarea. It fixes the bugs, with this +patch, console scrolling in 8-bit depth with a font width that is not a +multiple of 8 pixels works fine. + +The cfb_copyarea code was very buggy and it looks like it was written +and never tried with non-8-pixel font. + +Signed-off-by: Mikulas Patocka +Signed-off-by: Tomi Valkeinen +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/video/cfbcopyarea.c | 153 ++++++++++++++++++++++---------------------- + 1 file changed, 78 insertions(+), 75 deletions(-) + +--- a/drivers/video/cfbcopyarea.c ++++ b/drivers/video/cfbcopyarea.c +@@ -43,13 +43,22 @@ + */ + + static void +-bitcpy(struct fb_info *p, unsigned long __iomem *dst, int dst_idx, +- const unsigned long __iomem *src, int src_idx, int bits, ++bitcpy(struct fb_info *p, unsigned long __iomem *dst, unsigned dst_idx, ++ const unsigned long __iomem *src, unsigned src_idx, int bits, + unsigned n, u32 bswapmask) + { + unsigned long first, last; + int const shift = dst_idx-src_idx; +- int left, right; ++ ++#if 0 ++ /* ++ * If you suspect bug in this function, compare it with this simple ++ * memmove implementation. ++ */ ++ fb_memmove((char *)dst + ((dst_idx & (bits - 1))) / 8, ++ (char *)src + ((src_idx & (bits - 1))) / 8, n / 8); ++ return; ++#endif + + first = fb_shifted_pixels_mask_long(p, dst_idx, bswapmask); + last = ~fb_shifted_pixels_mask_long(p, (dst_idx+n) % bits, bswapmask); +@@ -98,9 +107,8 @@ bitcpy(struct fb_info *p, unsigned long + unsigned long d0, d1; + int m; + +- right = shift & (bits - 1); +- left = -shift & (bits - 1); +- bswapmask &= shift; ++ int const left = shift & (bits - 1); ++ int const right = -shift & (bits - 1); + + if (dst_idx+n <= bits) { + // Single destination word +@@ -110,15 +118,15 @@ bitcpy(struct fb_info *p, unsigned long + d0 = fb_rev_pixels_in_long(d0, bswapmask); + if (shift > 0) { + // Single source word +- d0 >>= right; ++ d0 <<= left; + } else if (src_idx+n <= bits) { + // Single source word +- d0 <<= left; ++ d0 >>= right; + } else { + // 2 source words + d1 = FB_READL(src + 1); + d1 = fb_rev_pixels_in_long(d1, bswapmask); +- d0 = d0<>right; ++ d0 = d0 >> right | d1 << left; + } + d0 = fb_rev_pixels_in_long(d0, bswapmask); + FB_WRITEL(comp(d0, FB_READL(dst), first), dst); +@@ -135,60 +143,59 @@ bitcpy(struct fb_info *p, unsigned long + if (shift > 0) { + // Single source word + d1 = d0; +- d0 >>= right; +- dst++; ++ d0 <<= left; + n -= bits - dst_idx; + } else { + // 2 source words + d1 = FB_READL(src++); + d1 = fb_rev_pixels_in_long(d1, bswapmask); + +- d0 = d0<>right; +- dst++; ++ d0 = d0 >> right | d1 << left; + n -= bits - dst_idx; + } + d0 = fb_rev_pixels_in_long(d0, bswapmask); + FB_WRITEL(comp(d0, FB_READL(dst), first), dst); + d0 = d1; ++ dst++; + + // Main chunk + m = n % bits; + n /= bits; + while ((n >= 4) && !bswapmask) { + d1 = FB_READL(src++); +- FB_WRITEL(d0 << left | d1 >> right, dst++); ++ FB_WRITEL(d0 >> right | d1 << left, dst++); + d0 = d1; + d1 = FB_READL(src++); +- FB_WRITEL(d0 << left | d1 >> right, dst++); ++ FB_WRITEL(d0 >> right | d1 << left, dst++); + d0 = d1; + d1 = FB_READL(src++); +- FB_WRITEL(d0 << left | d1 >> right, dst++); ++ FB_WRITEL(d0 >> right | d1 << left, dst++); + d0 = d1; + d1 = FB_READL(src++); +- FB_WRITEL(d0 << left | d1 >> right, dst++); ++ FB_WRITEL(d0 >> right | d1 << left, dst++); + d0 = d1; + n -= 4; + } + while (n--) { + d1 = FB_READL(src++); + d1 = fb_rev_pixels_in_long(d1, bswapmask); +- d0 = d0 << left | d1 >> right; ++ d0 = d0 >> right | d1 << left; + d0 = fb_rev_pixels_in_long(d0, bswapmask); + FB_WRITEL(d0, dst++); + d0 = d1; + } + + // Trailing bits +- if (last) { +- if (m <= right) { ++ if (m) { ++ if (m <= bits - right) { + // Single source word +- d0 <<= left; ++ d0 >>= right; + } else { + // 2 source words + d1 = FB_READL(src); + d1 = fb_rev_pixels_in_long(d1, + bswapmask); +- d0 = d0<>right; ++ d0 = d0 >> right | d1 << left; + } + d0 = fb_rev_pixels_in_long(d0, bswapmask); + FB_WRITEL(comp(d0, FB_READL(dst), last), dst); +@@ -202,43 +209,46 @@ bitcpy(struct fb_info *p, unsigned long + */ + + static void +-bitcpy_rev(struct fb_info *p, unsigned long __iomem *dst, int dst_idx, +- const unsigned long __iomem *src, int src_idx, int bits, ++bitcpy_rev(struct fb_info *p, unsigned long __iomem *dst, unsigned dst_idx, ++ const unsigned long __iomem *src, unsigned src_idx, int bits, + unsigned n, u32 bswapmask) + { + unsigned long first, last; + int shift; + +- dst += (n-1)/bits; +- src += (n-1)/bits; +- if ((n-1) % bits) { +- dst_idx += (n-1) % bits; +- dst += dst_idx >> (ffs(bits) - 1); +- dst_idx &= bits - 1; +- src_idx += (n-1) % bits; +- src += src_idx >> (ffs(bits) - 1); +- src_idx &= bits - 1; +- } ++#if 0 ++ /* ++ * If you suspect bug in this function, compare it with this simple ++ * memmove implementation. ++ */ ++ fb_memmove((char *)dst + ((dst_idx & (bits - 1))) / 8, ++ (char *)src + ((src_idx & (bits - 1))) / 8, n / 8); ++ return; ++#endif ++ ++ dst += (dst_idx + n - 1) / bits; ++ src += (src_idx + n - 1) / bits; ++ dst_idx = (dst_idx + n - 1) % bits; ++ src_idx = (src_idx + n - 1) % bits; + + shift = dst_idx-src_idx; + +- first = fb_shifted_pixels_mask_long(p, bits - 1 - dst_idx, bswapmask); +- last = ~fb_shifted_pixels_mask_long(p, bits - 1 - ((dst_idx-n) % bits), +- bswapmask); ++ first = ~fb_shifted_pixels_mask_long(p, (dst_idx + 1) % bits, bswapmask); ++ last = fb_shifted_pixels_mask_long(p, (bits + dst_idx + 1 - n) % bits, bswapmask); + + if (!shift) { + // Same alignment for source and dest + + if ((unsigned long)dst_idx+1 >= n) { + // Single word +- if (last) +- first &= last; +- FB_WRITEL( comp( FB_READL(src), FB_READL(dst), first), dst); ++ if (first) ++ last &= first; ++ FB_WRITEL( comp( FB_READL(src), FB_READL(dst), last), dst); + } else { + // Multiple destination words + + // Leading bits +- if (first != ~0UL) { ++ if (first) { + FB_WRITEL( comp( FB_READL(src), FB_READL(dst), first), dst); + dst--; + src--; +@@ -262,7 +272,7 @@ bitcpy_rev(struct fb_info *p, unsigned l + FB_WRITEL(FB_READL(src--), dst--); + + // Trailing bits +- if (last) ++ if (last != -1UL) + FB_WRITEL( comp( FB_READL(src), FB_READL(dst), last), dst); + } + } else { +@@ -270,29 +280,28 @@ bitcpy_rev(struct fb_info *p, unsigned l + unsigned long d0, d1; + int m; + +- int const left = -shift & (bits-1); +- int const right = shift & (bits-1); +- bswapmask &= shift; ++ int const left = shift & (bits-1); ++ int const right = -shift & (bits-1); + + if ((unsigned long)dst_idx+1 >= n) { + // Single destination word +- if (last) +- first &= last; ++ if (first) ++ last &= first; + d0 = FB_READL(src); + if (shift < 0) { + // Single source word +- d0 <<= left; ++ d0 >>= right; + } else if (1+(unsigned long)src_idx >= n) { + // Single source word +- d0 >>= right; ++ d0 <<= left; + } else { + // 2 source words + d1 = FB_READL(src - 1); + d1 = fb_rev_pixels_in_long(d1, bswapmask); +- d0 = d0>>right | d1<> right; + } + d0 = fb_rev_pixels_in_long(d0, bswapmask); +- FB_WRITEL(comp(d0, FB_READL(dst), first), dst); ++ FB_WRITEL(comp(d0, FB_READL(dst), last), dst); + } else { + // Multiple destination words + /** We must always remember the last value read, because in case +@@ -307,12 +316,12 @@ bitcpy_rev(struct fb_info *p, unsigned l + if (shift < 0) { + // Single source word + d1 = d0; +- d0 <<= left; ++ d0 >>= right; + } else { + // 2 source words + d1 = FB_READL(src--); + d1 = fb_rev_pixels_in_long(d1, bswapmask); +- d0 = d0>>right | d1<> right; + } + d0 = fb_rev_pixels_in_long(d0, bswapmask); + FB_WRITEL(comp(d0, FB_READL(dst), first), dst); +@@ -325,39 +334,39 @@ bitcpy_rev(struct fb_info *p, unsigned l + n /= bits; + while ((n >= 4) && !bswapmask) { + d1 = FB_READL(src--); +- FB_WRITEL(d0 >> right | d1 << left, dst--); ++ FB_WRITEL(d0 << left | d1 >> right, dst--); + d0 = d1; + d1 = FB_READL(src--); +- FB_WRITEL(d0 >> right | d1 << left, dst--); ++ FB_WRITEL(d0 << left | d1 >> right, dst--); + d0 = d1; + d1 = FB_READL(src--); +- FB_WRITEL(d0 >> right | d1 << left, dst--); ++ FB_WRITEL(d0 << left | d1 >> right, dst--); + d0 = d1; + d1 = FB_READL(src--); +- FB_WRITEL(d0 >> right | d1 << left, dst--); ++ FB_WRITEL(d0 << left | d1 >> right, dst--); + d0 = d1; + n -= 4; + } + while (n--) { + d1 = FB_READL(src--); + d1 = fb_rev_pixels_in_long(d1, bswapmask); +- d0 = d0 >> right | d1 << left; ++ d0 = d0 << left | d1 >> right; + d0 = fb_rev_pixels_in_long(d0, bswapmask); + FB_WRITEL(d0, dst--); + d0 = d1; + } + + // Trailing bits +- if (last) { +- if (m <= left) { ++ if (m) { ++ if (m <= bits - left) { + // Single source word +- d0 >>= right; ++ d0 <<= left; + } else { + // 2 source words + d1 = FB_READL(src); + d1 = fb_rev_pixels_in_long(d1, + bswapmask); +- d0 = d0>>right | d1<> right; + } + d0 = fb_rev_pixels_in_long(d0, bswapmask); + FB_WRITEL(comp(d0, FB_READL(dst), last), dst); +@@ -371,9 +380,9 @@ void cfb_copyarea(struct fb_info *p, con + u32 dx = area->dx, dy = area->dy, sx = area->sx, sy = area->sy; + u32 height = area->height, width = area->width; + unsigned long const bits_per_line = p->fix.line_length*8u; +- unsigned long __iomem *dst = NULL, *src = NULL; ++ unsigned long __iomem *base = NULL; + int bits = BITS_PER_LONG, bytes = bits >> 3; +- int dst_idx = 0, src_idx = 0, rev_copy = 0; ++ unsigned dst_idx = 0, src_idx = 0, rev_copy = 0; + u32 bswapmask = fb_compute_bswapmask(p); + + if (p->state != FBINFO_STATE_RUNNING) +@@ -389,7 +398,7 @@ void cfb_copyarea(struct fb_info *p, con + + // split the base of the framebuffer into a long-aligned address and the + // index of the first bit +- dst = src = (unsigned long __iomem *)((unsigned long)p->screen_base & ~(bytes-1)); ++ base = (unsigned long __iomem *)((unsigned long)p->screen_base & ~(bytes-1)); + dst_idx = src_idx = 8*((unsigned long)p->screen_base & (bytes-1)); + // add offset of source and target area + dst_idx += dy*bits_per_line + dx*p->var.bits_per_pixel; +@@ -402,20 +411,14 @@ void cfb_copyarea(struct fb_info *p, con + while (height--) { + dst_idx -= bits_per_line; + src_idx -= bits_per_line; +- dst += dst_idx >> (ffs(bits) - 1); +- dst_idx &= (bytes - 1); +- src += src_idx >> (ffs(bits) - 1); +- src_idx &= (bytes - 1); +- bitcpy_rev(p, dst, dst_idx, src, src_idx, bits, ++ bitcpy_rev(p, base + (dst_idx / bits), dst_idx % bits, ++ base + (src_idx / bits), src_idx % bits, bits, + width*p->var.bits_per_pixel, bswapmask); + } + } else { + while (height--) { +- dst += dst_idx >> (ffs(bits) - 1); +- dst_idx &= (bytes - 1); +- src += src_idx >> (ffs(bits) - 1); +- src_idx &= (bytes - 1); +- bitcpy(p, dst, dst_idx, src, src_idx, bits, ++ bitcpy(p, base + (dst_idx / bits), dst_idx % bits, ++ base + (src_idx / bits), src_idx % bits, bits, + width*p->var.bits_per_pixel, bswapmask); + dst_idx += bits_per_line; + src_idx += bits_per_line; diff --git a/queue-3.4/libata-ahci-accommodate-tag-ordered-controllers.patch b/queue-3.4/libata-ahci-accommodate-tag-ordered-controllers.patch new file mode 100644 index 00000000000..88dbfdbffb5 --- /dev/null +++ b/queue-3.4/libata-ahci-accommodate-tag-ordered-controllers.patch @@ -0,0 +1,96 @@ +From 8a4aeec8d2d6a3edeffbdfae451cdf05cbf0fefd Mon Sep 17 00:00:00 2001 +From: Dan Williams +Date: Thu, 17 Apr 2014 11:48:21 -0700 +Subject: libata/ahci: accommodate tag ordered controllers + +From: Dan Williams + +commit 8a4aeec8d2d6a3edeffbdfae451cdf05cbf0fefd upstream. + +The AHCI spec allows implementations to issue commands in tag order +rather than FIFO order: + + 5.3.2.12 P:SelectCmd + HBA sets pSlotLoc = (pSlotLoc + 1) mod (CAP.NCS + 1) + or HBA selects the command to issue that has had the + PxCI bit set to '1' longer than any other command + pending to be issued. + +The result is that commands posted sequentially (time-wise) may play out +of sequence when issued by hardware. + +This behavior has likely been hidden by drives that arrange for commands +to complete in issue order. However, it appears recent drives (two from +different vendors that we have found so far) inflict out-of-order +completions as a matter of course. So, we need to take care to maintain +ordered submission, otherwise we risk triggering a drive to fall out of +sequential-io automation and back to random-io processing, which incurs +large latency and degrades throughput. + +This issue was found in simple benchmarks where QD=2 seq-write +performance was 30-50% *greater* than QD=32 seq-write performance. + +Tagging for -stable and making the change globally since it has a low +risk-to-reward ratio. Also, word is that recent versions of an unnamed +OS also does it this way now. So, drives in the field are already +experienced with this tag ordering scheme. + +Cc: Dave Jiang +Cc: Ed Ciechanowski +Reviewed-by: Matthew Wilcox +Signed-off-by: Dan Williams +Signed-off-by: Tejun Heo +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/ata/libata-core.c | 21 +++++++++++++-------- + include/linux/libata.h | 1 + + 2 files changed, 14 insertions(+), 8 deletions(-) + +--- a/drivers/ata/libata-core.c ++++ b/drivers/ata/libata-core.c +@@ -4700,21 +4700,26 @@ void swap_buf_le16(u16 *buf, unsigned in + static struct ata_queued_cmd *ata_qc_new(struct ata_port *ap) + { + struct ata_queued_cmd *qc = NULL; +- unsigned int i; ++ unsigned int i, tag; + + /* no command while frozen */ + if (unlikely(ap->pflags & ATA_PFLAG_FROZEN)) + return NULL; + +- /* the last tag is reserved for internal command. */ +- for (i = 0; i < ATA_MAX_QUEUE - 1; i++) +- if (!test_and_set_bit(i, &ap->qc_allocated)) { +- qc = __ata_qc_from_tag(ap, i); ++ for (i = 0; i < ATA_MAX_QUEUE; i++) { ++ tag = (i + ap->last_tag + 1) % ATA_MAX_QUEUE; ++ ++ /* the last tag is reserved for internal command. */ ++ if (tag == ATA_TAG_INTERNAL) ++ continue; ++ ++ if (!test_and_set_bit(tag, &ap->qc_allocated)) { ++ qc = __ata_qc_from_tag(ap, tag); ++ qc->tag = tag; ++ ap->last_tag = tag; + break; + } +- +- if (qc) +- qc->tag = i; ++ } + + return qc; + } +--- a/include/linux/libata.h ++++ b/include/linux/libata.h +@@ -762,6 +762,7 @@ struct ata_port { + unsigned long qc_allocated; + unsigned int qc_active; + int nr_active_links; /* #links with active qcs */ ++ unsigned int last_tag; /* track next tag hw expects */ + + struct ata_link link; /* host default link */ + struct ata_link *slave_link; /* see ata_slave_link_init() */ diff --git a/queue-3.4/mach64-fix-cursor-when-character-width-is-not-a-multiple-of-8-pixels.patch b/queue-3.4/mach64-fix-cursor-when-character-width-is-not-a-multiple-of-8-pixels.patch new file mode 100644 index 00000000000..389943573f8 --- /dev/null +++ b/queue-3.4/mach64-fix-cursor-when-character-width-is-not-a-multiple-of-8-pixels.patch @@ -0,0 +1,88 @@ +From 43751a1b8ee2e70ce392bf31ef3133da324e68b3 Mon Sep 17 00:00:00 2001 +From: Mikulas Patocka +Date: Thu, 23 Jan 2014 14:41:59 -0500 +Subject: mach64: fix cursor when character width is not a multiple of 8 pixels + +From: Mikulas Patocka + +commit 43751a1b8ee2e70ce392bf31ef3133da324e68b3 upstream. + +This patch fixes the hardware cursor on mach64 when font width is not a +multiple of 8 pixels. + +If you load such a font, the cursor is expanded to the next 8-byte +boundary and a part of the next character after the cursor is not +visible. +For example, when you load a font with 12-pixel width, the cursor width +is 16 pixels and when the cursor is displayed, 4 pixels of the next +character are not visible. + +The reason is this: atyfb_cursor is called with proper parameters to +load an image that is 12-pixel wide. However, the number is aligned on +the next 8-pixel boundary on the line +"unsigned int width = (cursor->image.width + 7) >> 3;" and the whole +function acts as it is was loading a 16-pixel image. + +This patch fixes it so that the value written to the framebuffer is +padded with 0xaaaa (the transparent pattern) when the image size it not +a multiple of 8 pixels. The transparent pattern causes that the cursor +will not interfere with the next character. + +Signed-off-by: Mikulas Patocka +Signed-off-by: Tomi Valkeinen +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/video/aty/mach64_cursor.c | 22 ++++++++++++++++------ + 1 file changed, 16 insertions(+), 6 deletions(-) + +--- a/drivers/video/aty/mach64_cursor.c ++++ b/drivers/video/aty/mach64_cursor.c +@@ -5,6 +5,7 @@ + #include + #include + #include ++#include "../fb_draw.h" + + #include + +@@ -157,24 +158,33 @@ static int atyfb_cursor(struct fb_info * + + for (i = 0; i < height; i++) { + for (j = 0; j < width; j++) { ++ u16 l = 0xaaaa; + b = *src++; + m = *msk++; + switch (cursor->rop) { + case ROP_XOR: + // Upper 4 bits of mask data +- fb_writeb(cursor_bits_lookup[(b ^ m) >> 4], dst++); ++ l = cursor_bits_lookup[(b ^ m) >> 4] | + // Lower 4 bits of mask +- fb_writeb(cursor_bits_lookup[(b ^ m) & 0x0f], +- dst++); ++ (cursor_bits_lookup[(b ^ m) & 0x0f] << 8); + break; + case ROP_COPY: + // Upper 4 bits of mask data +- fb_writeb(cursor_bits_lookup[(b & m) >> 4], dst++); ++ l = cursor_bits_lookup[(b & m) >> 4] | + // Lower 4 bits of mask +- fb_writeb(cursor_bits_lookup[(b & m) & 0x0f], +- dst++); ++ (cursor_bits_lookup[(b & m) & 0x0f] << 8); + break; + } ++ /* ++ * If cursor size is not a multiple of 8 characters ++ * we must pad it with transparent pattern (0xaaaa). ++ */ ++ if ((j + 1) * 8 > cursor->image.width) { ++ l = comp(l, 0xaaaa, ++ (1 << ((cursor->image.width & 7) * 2)) - 1); ++ } ++ fb_writeb(l & 0xff, dst++); ++ fb_writeb(l >> 8, dst++); + } + dst += offset; + } diff --git a/queue-3.4/mach64-use-unaligned-access.patch b/queue-3.4/mach64-use-unaligned-access.patch new file mode 100644 index 00000000000..123bf725586 --- /dev/null +++ b/queue-3.4/mach64-use-unaligned-access.patch @@ -0,0 +1,46 @@ +From c29dd8696dc5dbd50b3ac441b8a26751277ba520 Mon Sep 17 00:00:00 2001 +From: Mikulas Patocka +Date: Thu, 23 Jan 2014 14:41:09 -0500 +Subject: mach64: use unaligned access + +From: Mikulas Patocka + +commit c29dd8696dc5dbd50b3ac441b8a26751277ba520 upstream. + +This patch fixes mach64 to use unaligned access to the font bitmap. + +This fixes unaligned access warning on sparc64 when 14x8 font is loaded. + +On x86(64), unaligned access is handled in hardware, so both functions +le32_to_cpup and get_unaligned_le32 perform the same operation. + +On RISC machines, unaligned access is not handled in hardware, so we +better use get_unaligned_le32 to avoid the unaligned trap and warning. + +Signed-off-by: Mikulas Patocka +Signed-off-by: Tomi Valkeinen +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/video/aty/mach64_accel.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/drivers/video/aty/mach64_accel.c ++++ b/drivers/video/aty/mach64_accel.c +@@ -4,6 +4,7 @@ + */ + + #include ++#include + #include + #include