--- /dev/null
+From 12cd43c6ed6da7bf7c5afbd74da6959cda6d056b Mon Sep 17 00:00:00 2001
+From: Rafał Miłecki <zajec5@gmail.com>
+Date: Sat, 5 Apr 2014 18:08:25 +0200
+Subject: b43: Fix machine check error due to improper access of B43_MMIO_PSM_PHY_HDR
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Rafał Miłecki <zajec5@gmail.com>
+
+commit 12cd43c6ed6da7bf7c5afbd74da6959cda6d056b upstream.
+
+Register B43_MMIO_PSM_PHY_HDR is 16 bit one, so accessing it with 32b
+functions isn't safe. On my machine it causes delayed (!) CPU exception:
+
+Disabling lock debugging due to kernel taint
+mce: [Hardware Error]: CPU 0: Machine Check Exception: 4 Bank 4: b200000000070f0f
+mce: [Hardware Error]: TSC 164083803dc
+mce: [Hardware Error]: PROCESSOR 2:20fc2 TIME 1396650505 SOCKET 0 APIC 0 microcode 0
+mce: [Hardware Error]: Run the above through 'mcelog --ascii'
+mce: [Hardware Error]: Machine check: Processor context corrupt
+Kernel panic - not syncing: Fatal machine check on current CPU
+Kernel Offset: 0x0 from 0xffffffff81000000 (relocation range: 0xffffffff80000000-0xffffffff9fffffff)
+
+Signed-off-by: Rafał Miłecki <zajec5@gmail.com>
+Acked-by: Larry Finger <Larry.Finger@lwfinger.net>
+Signed-off-by: John W. Linville <linville@tuxdriver.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/net/wireless/b43/phy_n.c | 14 +++++++-------
+ 1 file changed, 7 insertions(+), 7 deletions(-)
+
+--- a/drivers/net/wireless/b43/phy_n.c
++++ b/drivers/net/wireless/b43/phy_n.c
+@@ -4599,22 +4599,22 @@ static void b43_nphy_channel_setup(struc
+ int ch = new_channel->hw_value;
+
+ u16 old_band_5ghz;
+- u32 tmp32;
++ u16 tmp16;
+
+ old_band_5ghz =
+ b43_phy_read(dev, B43_NPHY_BANDCTL) & B43_NPHY_BANDCTL_5GHZ;
+ if (new_channel->band == IEEE80211_BAND_5GHZ && !old_band_5ghz) {
+- tmp32 = b43_read32(dev, B43_MMIO_PSM_PHY_HDR);
+- b43_write32(dev, B43_MMIO_PSM_PHY_HDR, tmp32 | 4);
++ tmp16 = b43_read16(dev, B43_MMIO_PSM_PHY_HDR);
++ b43_write16(dev, B43_MMIO_PSM_PHY_HDR, tmp16 | 4);
+ b43_phy_set(dev, B43_PHY_B_BBCFG, 0xC000);
+- b43_write32(dev, B43_MMIO_PSM_PHY_HDR, tmp32);
++ b43_write16(dev, B43_MMIO_PSM_PHY_HDR, tmp16);
+ b43_phy_set(dev, B43_NPHY_BANDCTL, B43_NPHY_BANDCTL_5GHZ);
+ } else if (new_channel->band == IEEE80211_BAND_2GHZ && old_band_5ghz) {
+ b43_phy_mask(dev, B43_NPHY_BANDCTL, ~B43_NPHY_BANDCTL_5GHZ);
+- tmp32 = b43_read32(dev, B43_MMIO_PSM_PHY_HDR);
+- b43_write32(dev, B43_MMIO_PSM_PHY_HDR, tmp32 | 4);
++ tmp16 = b43_read16(dev, B43_MMIO_PSM_PHY_HDR);
++ b43_write16(dev, B43_MMIO_PSM_PHY_HDR, tmp16 | 4);
+ b43_phy_mask(dev, B43_PHY_B_BBCFG, 0x3FFF);
+- b43_write32(dev, B43_MMIO_PSM_PHY_HDR, tmp32);
++ b43_write16(dev, B43_MMIO_PSM_PHY_HDR, tmp16);
+ }
+
+ b43_chantab_phy_upload(dev, e);
--- /dev/null
+From 8ceee72808d1ae3fb191284afc2257a2be964725 Mon Sep 17 00:00:00 2001
+From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
+Date: Thu, 27 Mar 2014 18:14:40 +0100
+Subject: crypto: ghash-clmulni-intel - use C implementation for setkey()
+
+From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
+
+commit 8ceee72808d1ae3fb191284afc2257a2be964725 upstream.
+
+The GHASH setkey() function uses SSE registers but fails to call
+kernel_fpu_begin()/kernel_fpu_end(). Instead of adding these calls, and
+then having to deal with the restriction that they cannot be called from
+interrupt context, move the setkey() implementation to the C domain.
+
+Note that setkey() does not use any particular SSE features and is not
+expected to become a performance bottleneck.
+
+Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
+Acked-by: H. Peter Anvin <hpa@linux.intel.com>
+Fixes: 0e1227d356e9b (crypto: ghash - Add PCLMULQDQ accelerated implementation)
+Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/crypto/ghash-clmulni-intel_asm.S | 28 ----------------------------
+ arch/x86/crypto/ghash-clmulni-intel_glue.c | 14 +++++++++++---
+ 2 files changed, 11 insertions(+), 31 deletions(-)
+
+--- a/arch/x86/crypto/ghash-clmulni-intel_asm.S
++++ b/arch/x86/crypto/ghash-clmulni-intel_asm.S
+@@ -24,10 +24,6 @@
+ .align 16
+ .Lbswap_mask:
+ .octa 0x000102030405060708090a0b0c0d0e0f
+-.Lpoly:
+- .octa 0xc2000000000000000000000000000001
+-.Ltwo_one:
+- .octa 0x00000001000000000000000000000001
+
+ #define DATA %xmm0
+ #define SHASH %xmm1
+@@ -131,27 +127,3 @@ ENTRY(clmul_ghash_update)
+ movups DATA, (%rdi)
+ .Lupdate_just_ret:
+ ret
+-
+-/*
+- * void clmul_ghash_setkey(be128 *shash, const u8 *key);
+- *
+- * Calculate hash_key << 1 mod poly
+- */
+-ENTRY(clmul_ghash_setkey)
+- movaps .Lbswap_mask, BSWAP
+- movups (%rsi), %xmm0
+- PSHUFB_XMM BSWAP %xmm0
+- movaps %xmm0, %xmm1
+- psllq $1, %xmm0
+- psrlq $63, %xmm1
+- movaps %xmm1, %xmm2
+- pslldq $8, %xmm1
+- psrldq $8, %xmm2
+- por %xmm1, %xmm0
+- # reduction
+- pshufd $0b00100100, %xmm2, %xmm1
+- pcmpeqd .Ltwo_one, %xmm1
+- pand .Lpoly, %xmm1
+- pxor %xmm1, %xmm0
+- movups %xmm0, (%rdi)
+- ret
+--- a/arch/x86/crypto/ghash-clmulni-intel_glue.c
++++ b/arch/x86/crypto/ghash-clmulni-intel_glue.c
+@@ -30,8 +30,6 @@ void clmul_ghash_mul(char *dst, const be
+ void clmul_ghash_update(char *dst, const char *src, unsigned int srclen,
+ const be128 *shash);
+
+-void clmul_ghash_setkey(be128 *shash, const u8 *key);
+-
+ struct ghash_async_ctx {
+ struct cryptd_ahash *cryptd_tfm;
+ };
+@@ -58,13 +56,23 @@ static int ghash_setkey(struct crypto_sh
+ const u8 *key, unsigned int keylen)
+ {
+ struct ghash_ctx *ctx = crypto_shash_ctx(tfm);
++ be128 *x = (be128 *)key;
++ u64 a, b;
+
+ if (keylen != GHASH_BLOCK_SIZE) {
+ crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+ return -EINVAL;
+ }
+
+- clmul_ghash_setkey(&ctx->shash, key);
++ /* perform multiplication by 'x' in GF(2^128) */
++ a = be64_to_cpu(x->a);
++ b = be64_to_cpu(x->b);
++
++ ctx->shash.a = (__be64)((b << 1) | (a >> 63));
++ ctx->shash.b = (__be64)((a << 1) | (b >> 63));
++
++ if (a >> 63)
++ ctx->shash.b ^= cpu_to_be64(0xc2);
+
+ return 0;
+ }
--- /dev/null
+From 00a9d699bc85052d2d3ed56251cd928024ce06a3 Mon Sep 17 00:00:00 2001
+From: Mikulas Patocka <mpatocka@redhat.com>
+Date: Thu, 23 Jan 2014 14:39:29 -0500
+Subject: framebuffer: fix cfb_copyarea
+
+From: Mikulas Patocka <mpatocka@redhat.com>
+
+commit 00a9d699bc85052d2d3ed56251cd928024ce06a3 upstream.
+
+The function cfb_copyarea is buggy when the copy operation is not aligned on
+long boundary (4 bytes on 32-bit machines, 8 bytes on 64-bit machines).
+
+How to reproduce:
+- use x86-64 machine
+- use a framebuffer driver without acceleration (for example uvesafb)
+- set the framebuffer to 8-bit depth
+ (for example fbset -a 1024x768-60 -depth 8)
+- load a font with character width that is not a multiple of 8 pixels
+ note: the console-tools package cannot load a font that has
+ width different from 8 pixels. You need to install the packages
+ "kbd" and "console-terminus" and use the program "setfont" to
+ set font width (for example: setfont Uni2-Terminus20x10)
+- move some text left and right on the bash command line and you get a
+ screen corruption
+
+To expose more bugs, put this line to the end of uvesafb_init_info:
+info->flags |= FBINFO_HWACCEL_COPYAREA | FBINFO_READS_FAST;
+- Now framebuffer console will use cfb_copyarea for console scrolling.
+You get a screen corruption when console is scrolled.
+
+This patch is a rewrite of cfb_copyarea. It fixes the bugs, with this
+patch, console scrolling in 8-bit depth with a font width that is not a
+multiple of 8 pixels works fine.
+
+The cfb_copyarea code was very buggy and it looks like it was written
+and never tried with non-8-pixel font.
+
+Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
+Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ti.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/video/cfbcopyarea.c | 153 ++++++++++++++++++++++----------------------
+ 1 file changed, 78 insertions(+), 75 deletions(-)
+
+--- a/drivers/video/cfbcopyarea.c
++++ b/drivers/video/cfbcopyarea.c
+@@ -43,13 +43,22 @@
+ */
+
+ static void
+-bitcpy(struct fb_info *p, unsigned long __iomem *dst, int dst_idx,
+- const unsigned long __iomem *src, int src_idx, int bits,
++bitcpy(struct fb_info *p, unsigned long __iomem *dst, unsigned dst_idx,
++ const unsigned long __iomem *src, unsigned src_idx, int bits,
+ unsigned n, u32 bswapmask)
+ {
+ unsigned long first, last;
+ int const shift = dst_idx-src_idx;
+- int left, right;
++
++#if 0
++ /*
++ * If you suspect bug in this function, compare it with this simple
++ * memmove implementation.
++ */
++ fb_memmove((char *)dst + ((dst_idx & (bits - 1))) / 8,
++ (char *)src + ((src_idx & (bits - 1))) / 8, n / 8);
++ return;
++#endif
+
+ first = fb_shifted_pixels_mask_long(p, dst_idx, bswapmask);
+ last = ~fb_shifted_pixels_mask_long(p, (dst_idx+n) % bits, bswapmask);
+@@ -98,9 +107,8 @@ bitcpy(struct fb_info *p, unsigned long
+ unsigned long d0, d1;
+ int m;
+
+- right = shift & (bits - 1);
+- left = -shift & (bits - 1);
+- bswapmask &= shift;
++ int const left = shift & (bits - 1);
++ int const right = -shift & (bits - 1);
+
+ if (dst_idx+n <= bits) {
+ // Single destination word
+@@ -110,15 +118,15 @@ bitcpy(struct fb_info *p, unsigned long
+ d0 = fb_rev_pixels_in_long(d0, bswapmask);
+ if (shift > 0) {
+ // Single source word
+- d0 >>= right;
++ d0 <<= left;
+ } else if (src_idx+n <= bits) {
+ // Single source word
+- d0 <<= left;
++ d0 >>= right;
+ } else {
+ // 2 source words
+ d1 = FB_READL(src + 1);
+ d1 = fb_rev_pixels_in_long(d1, bswapmask);
+- d0 = d0<<left | d1>>right;
++ d0 = d0 >> right | d1 << left;
+ }
+ d0 = fb_rev_pixels_in_long(d0, bswapmask);
+ FB_WRITEL(comp(d0, FB_READL(dst), first), dst);
+@@ -135,60 +143,59 @@ bitcpy(struct fb_info *p, unsigned long
+ if (shift > 0) {
+ // Single source word
+ d1 = d0;
+- d0 >>= right;
+- dst++;
++ d0 <<= left;
+ n -= bits - dst_idx;
+ } else {
+ // 2 source words
+ d1 = FB_READL(src++);
+ d1 = fb_rev_pixels_in_long(d1, bswapmask);
+
+- d0 = d0<<left | d1>>right;
+- dst++;
++ d0 = d0 >> right | d1 << left;
+ n -= bits - dst_idx;
+ }
+ d0 = fb_rev_pixels_in_long(d0, bswapmask);
+ FB_WRITEL(comp(d0, FB_READL(dst), first), dst);
+ d0 = d1;
++ dst++;
+
+ // Main chunk
+ m = n % bits;
+ n /= bits;
+ while ((n >= 4) && !bswapmask) {
+ d1 = FB_READL(src++);
+- FB_WRITEL(d0 << left | d1 >> right, dst++);
++ FB_WRITEL(d0 >> right | d1 << left, dst++);
+ d0 = d1;
+ d1 = FB_READL(src++);
+- FB_WRITEL(d0 << left | d1 >> right, dst++);
++ FB_WRITEL(d0 >> right | d1 << left, dst++);
+ d0 = d1;
+ d1 = FB_READL(src++);
+- FB_WRITEL(d0 << left | d1 >> right, dst++);
++ FB_WRITEL(d0 >> right | d1 << left, dst++);
+ d0 = d1;
+ d1 = FB_READL(src++);
+- FB_WRITEL(d0 << left | d1 >> right, dst++);
++ FB_WRITEL(d0 >> right | d1 << left, dst++);
+ d0 = d1;
+ n -= 4;
+ }
+ while (n--) {
+ d1 = FB_READL(src++);
+ d1 = fb_rev_pixels_in_long(d1, bswapmask);
+- d0 = d0 << left | d1 >> right;
++ d0 = d0 >> right | d1 << left;
+ d0 = fb_rev_pixels_in_long(d0, bswapmask);
+ FB_WRITEL(d0, dst++);
+ d0 = d1;
+ }
+
+ // Trailing bits
+- if (last) {
+- if (m <= right) {
++ if (m) {
++ if (m <= bits - right) {
+ // Single source word
+- d0 <<= left;
++ d0 >>= right;
+ } else {
+ // 2 source words
+ d1 = FB_READL(src);
+ d1 = fb_rev_pixels_in_long(d1,
+ bswapmask);
+- d0 = d0<<left | d1>>right;
++ d0 = d0 >> right | d1 << left;
+ }
+ d0 = fb_rev_pixels_in_long(d0, bswapmask);
+ FB_WRITEL(comp(d0, FB_READL(dst), last), dst);
+@@ -202,43 +209,46 @@ bitcpy(struct fb_info *p, unsigned long
+ */
+
+ static void
+-bitcpy_rev(struct fb_info *p, unsigned long __iomem *dst, int dst_idx,
+- const unsigned long __iomem *src, int src_idx, int bits,
++bitcpy_rev(struct fb_info *p, unsigned long __iomem *dst, unsigned dst_idx,
++ const unsigned long __iomem *src, unsigned src_idx, int bits,
+ unsigned n, u32 bswapmask)
+ {
+ unsigned long first, last;
+ int shift;
+
+- dst += (n-1)/bits;
+- src += (n-1)/bits;
+- if ((n-1) % bits) {
+- dst_idx += (n-1) % bits;
+- dst += dst_idx >> (ffs(bits) - 1);
+- dst_idx &= bits - 1;
+- src_idx += (n-1) % bits;
+- src += src_idx >> (ffs(bits) - 1);
+- src_idx &= bits - 1;
+- }
++#if 0
++ /*
++ * If you suspect bug in this function, compare it with this simple
++ * memmove implementation.
++ */
++ fb_memmove((char *)dst + ((dst_idx & (bits - 1))) / 8,
++ (char *)src + ((src_idx & (bits - 1))) / 8, n / 8);
++ return;
++#endif
++
++ dst += (dst_idx + n - 1) / bits;
++ src += (src_idx + n - 1) / bits;
++ dst_idx = (dst_idx + n - 1) % bits;
++ src_idx = (src_idx + n - 1) % bits;
+
+ shift = dst_idx-src_idx;
+
+- first = fb_shifted_pixels_mask_long(p, bits - 1 - dst_idx, bswapmask);
+- last = ~fb_shifted_pixels_mask_long(p, bits - 1 - ((dst_idx-n) % bits),
+- bswapmask);
++ first = ~fb_shifted_pixels_mask_long(p, (dst_idx + 1) % bits, bswapmask);
++ last = fb_shifted_pixels_mask_long(p, (bits + dst_idx + 1 - n) % bits, bswapmask);
+
+ if (!shift) {
+ // Same alignment for source and dest
+
+ if ((unsigned long)dst_idx+1 >= n) {
+ // Single word
+- if (last)
+- first &= last;
+- FB_WRITEL( comp( FB_READL(src), FB_READL(dst), first), dst);
++ if (first)
++ last &= first;
++ FB_WRITEL( comp( FB_READL(src), FB_READL(dst), last), dst);
+ } else {
+ // Multiple destination words
+
+ // Leading bits
+- if (first != ~0UL) {
++ if (first) {
+ FB_WRITEL( comp( FB_READL(src), FB_READL(dst), first), dst);
+ dst--;
+ src--;
+@@ -262,7 +272,7 @@ bitcpy_rev(struct fb_info *p, unsigned l
+ FB_WRITEL(FB_READL(src--), dst--);
+
+ // Trailing bits
+- if (last)
++ if (last != -1UL)
+ FB_WRITEL( comp( FB_READL(src), FB_READL(dst), last), dst);
+ }
+ } else {
+@@ -270,29 +280,28 @@ bitcpy_rev(struct fb_info *p, unsigned l
+ unsigned long d0, d1;
+ int m;
+
+- int const left = -shift & (bits-1);
+- int const right = shift & (bits-1);
+- bswapmask &= shift;
++ int const left = shift & (bits-1);
++ int const right = -shift & (bits-1);
+
+ if ((unsigned long)dst_idx+1 >= n) {
+ // Single destination word
+- if (last)
+- first &= last;
++ if (first)
++ last &= first;
+ d0 = FB_READL(src);
+ if (shift < 0) {
+ // Single source word
+- d0 <<= left;
++ d0 >>= right;
+ } else if (1+(unsigned long)src_idx >= n) {
+ // Single source word
+- d0 >>= right;
++ d0 <<= left;
+ } else {
+ // 2 source words
+ d1 = FB_READL(src - 1);
+ d1 = fb_rev_pixels_in_long(d1, bswapmask);
+- d0 = d0>>right | d1<<left;
++ d0 = d0 << left | d1 >> right;
+ }
+ d0 = fb_rev_pixels_in_long(d0, bswapmask);
+- FB_WRITEL(comp(d0, FB_READL(dst), first), dst);
++ FB_WRITEL(comp(d0, FB_READL(dst), last), dst);
+ } else {
+ // Multiple destination words
+ /** We must always remember the last value read, because in case
+@@ -307,12 +316,12 @@ bitcpy_rev(struct fb_info *p, unsigned l
+ if (shift < 0) {
+ // Single source word
+ d1 = d0;
+- d0 <<= left;
++ d0 >>= right;
+ } else {
+ // 2 source words
+ d1 = FB_READL(src--);
+ d1 = fb_rev_pixels_in_long(d1, bswapmask);
+- d0 = d0>>right | d1<<left;
++ d0 = d0 << left | d1 >> right;
+ }
+ d0 = fb_rev_pixels_in_long(d0, bswapmask);
+ FB_WRITEL(comp(d0, FB_READL(dst), first), dst);
+@@ -325,39 +334,39 @@ bitcpy_rev(struct fb_info *p, unsigned l
+ n /= bits;
+ while ((n >= 4) && !bswapmask) {
+ d1 = FB_READL(src--);
+- FB_WRITEL(d0 >> right | d1 << left, dst--);
++ FB_WRITEL(d0 << left | d1 >> right, dst--);
+ d0 = d1;
+ d1 = FB_READL(src--);
+- FB_WRITEL(d0 >> right | d1 << left, dst--);
++ FB_WRITEL(d0 << left | d1 >> right, dst--);
+ d0 = d1;
+ d1 = FB_READL(src--);
+- FB_WRITEL(d0 >> right | d1 << left, dst--);
++ FB_WRITEL(d0 << left | d1 >> right, dst--);
+ d0 = d1;
+ d1 = FB_READL(src--);
+- FB_WRITEL(d0 >> right | d1 << left, dst--);
++ FB_WRITEL(d0 << left | d1 >> right, dst--);
+ d0 = d1;
+ n -= 4;
+ }
+ while (n--) {
+ d1 = FB_READL(src--);
+ d1 = fb_rev_pixels_in_long(d1, bswapmask);
+- d0 = d0 >> right | d1 << left;
++ d0 = d0 << left | d1 >> right;
+ d0 = fb_rev_pixels_in_long(d0, bswapmask);
+ FB_WRITEL(d0, dst--);
+ d0 = d1;
+ }
+
+ // Trailing bits
+- if (last) {
+- if (m <= left) {
++ if (m) {
++ if (m <= bits - left) {
+ // Single source word
+- d0 >>= right;
++ d0 <<= left;
+ } else {
+ // 2 source words
+ d1 = FB_READL(src);
+ d1 = fb_rev_pixels_in_long(d1,
+ bswapmask);
+- d0 = d0>>right | d1<<left;
++ d0 = d0 << left | d1 >> right;
+ }
+ d0 = fb_rev_pixels_in_long(d0, bswapmask);
+ FB_WRITEL(comp(d0, FB_READL(dst), last), dst);
+@@ -371,9 +380,9 @@ void cfb_copyarea(struct fb_info *p, con
+ u32 dx = area->dx, dy = area->dy, sx = area->sx, sy = area->sy;
+ u32 height = area->height, width = area->width;
+ unsigned long const bits_per_line = p->fix.line_length*8u;
+- unsigned long __iomem *dst = NULL, *src = NULL;
++ unsigned long __iomem *base = NULL;
+ int bits = BITS_PER_LONG, bytes = bits >> 3;
+- int dst_idx = 0, src_idx = 0, rev_copy = 0;
++ unsigned dst_idx = 0, src_idx = 0, rev_copy = 0;
+ u32 bswapmask = fb_compute_bswapmask(p);
+
+ if (p->state != FBINFO_STATE_RUNNING)
+@@ -389,7 +398,7 @@ void cfb_copyarea(struct fb_info *p, con
+
+ // split the base of the framebuffer into a long-aligned address and the
+ // index of the first bit
+- dst = src = (unsigned long __iomem *)((unsigned long)p->screen_base & ~(bytes-1));
++ base = (unsigned long __iomem *)((unsigned long)p->screen_base & ~(bytes-1));
+ dst_idx = src_idx = 8*((unsigned long)p->screen_base & (bytes-1));
+ // add offset of source and target area
+ dst_idx += dy*bits_per_line + dx*p->var.bits_per_pixel;
+@@ -402,20 +411,14 @@ void cfb_copyarea(struct fb_info *p, con
+ while (height--) {
+ dst_idx -= bits_per_line;
+ src_idx -= bits_per_line;
+- dst += dst_idx >> (ffs(bits) - 1);
+- dst_idx &= (bytes - 1);
+- src += src_idx >> (ffs(bits) - 1);
+- src_idx &= (bytes - 1);
+- bitcpy_rev(p, dst, dst_idx, src, src_idx, bits,
++ bitcpy_rev(p, base + (dst_idx / bits), dst_idx % bits,
++ base + (src_idx / bits), src_idx % bits, bits,
+ width*p->var.bits_per_pixel, bswapmask);
+ }
+ } else {
+ while (height--) {
+- dst += dst_idx >> (ffs(bits) - 1);
+- dst_idx &= (bytes - 1);
+- src += src_idx >> (ffs(bits) - 1);
+- src_idx &= (bytes - 1);
+- bitcpy(p, dst, dst_idx, src, src_idx, bits,
++ bitcpy(p, base + (dst_idx / bits), dst_idx % bits,
++ base + (src_idx / bits), src_idx % bits, bits,
+ width*p->var.bits_per_pixel, bswapmask);
+ dst_idx += bits_per_line;
+ src_idx += bits_per_line;
--- /dev/null
+From 8a4aeec8d2d6a3edeffbdfae451cdf05cbf0fefd Mon Sep 17 00:00:00 2001
+From: Dan Williams <dan.j.williams@intel.com>
+Date: Thu, 17 Apr 2014 11:48:21 -0700
+Subject: libata/ahci: accommodate tag ordered controllers
+
+From: Dan Williams <dan.j.williams@intel.com>
+
+commit 8a4aeec8d2d6a3edeffbdfae451cdf05cbf0fefd upstream.
+
+The AHCI spec allows implementations to issue commands in tag order
+rather than FIFO order:
+
+ 5.3.2.12 P:SelectCmd
+ HBA sets pSlotLoc = (pSlotLoc + 1) mod (CAP.NCS + 1)
+ or HBA selects the command to issue that has had the
+ PxCI bit set to '1' longer than any other command
+ pending to be issued.
+
+The result is that commands posted sequentially (time-wise) may play out
+of sequence when issued by hardware.
+
+This behavior has likely been hidden by drives that arrange for commands
+to complete in issue order. However, it appears recent drives (two from
+different vendors that we have found so far) inflict out-of-order
+completions as a matter of course. So, we need to take care to maintain
+ordered submission, otherwise we risk triggering a drive to fall out of
+sequential-io automation and back to random-io processing, which incurs
+large latency and degrades throughput.
+
+This issue was found in simple benchmarks where QD=2 seq-write
+performance was 30-50% *greater* than QD=32 seq-write performance.
+
+Tagging for -stable and making the change globally since it has a low
+risk-to-reward ratio. Also, word is that recent versions of an unnamed
+OS also does it this way now. So, drives in the field are already
+experienced with this tag ordering scheme.
+
+Cc: Dave Jiang <dave.jiang@intel.com>
+Cc: Ed Ciechanowski <ed.ciechanowski@intel.com>
+Reviewed-by: Matthew Wilcox <matthew.r.wilcox@intel.com>
+Signed-off-by: Dan Williams <dan.j.williams@intel.com>
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/ata/libata-core.c | 21 +++++++++++++--------
+ include/linux/libata.h | 1 +
+ 2 files changed, 14 insertions(+), 8 deletions(-)
+
+--- a/drivers/ata/libata-core.c
++++ b/drivers/ata/libata-core.c
+@@ -4700,21 +4700,26 @@ void swap_buf_le16(u16 *buf, unsigned in
+ static struct ata_queued_cmd *ata_qc_new(struct ata_port *ap)
+ {
+ struct ata_queued_cmd *qc = NULL;
+- unsigned int i;
++ unsigned int i, tag;
+
+ /* no command while frozen */
+ if (unlikely(ap->pflags & ATA_PFLAG_FROZEN))
+ return NULL;
+
+- /* the last tag is reserved for internal command. */
+- for (i = 0; i < ATA_MAX_QUEUE - 1; i++)
+- if (!test_and_set_bit(i, &ap->qc_allocated)) {
+- qc = __ata_qc_from_tag(ap, i);
++ for (i = 0; i < ATA_MAX_QUEUE; i++) {
++ tag = (i + ap->last_tag + 1) % ATA_MAX_QUEUE;
++
++ /* the last tag is reserved for internal command. */
++ if (tag == ATA_TAG_INTERNAL)
++ continue;
++
++ if (!test_and_set_bit(tag, &ap->qc_allocated)) {
++ qc = __ata_qc_from_tag(ap, tag);
++ qc->tag = tag;
++ ap->last_tag = tag;
+ break;
+ }
+-
+- if (qc)
+- qc->tag = i;
++ }
+
+ return qc;
+ }
+--- a/include/linux/libata.h
++++ b/include/linux/libata.h
+@@ -762,6 +762,7 @@ struct ata_port {
+ unsigned long qc_allocated;
+ unsigned int qc_active;
+ int nr_active_links; /* #links with active qcs */
++ unsigned int last_tag; /* track next tag hw expects */
+
+ struct ata_link link; /* host default link */
+ struct ata_link *slave_link; /* see ata_slave_link_init() */
--- /dev/null
+From 43751a1b8ee2e70ce392bf31ef3133da324e68b3 Mon Sep 17 00:00:00 2001
+From: Mikulas Patocka <mpatocka@redhat.com>
+Date: Thu, 23 Jan 2014 14:41:59 -0500
+Subject: mach64: fix cursor when character width is not a multiple of 8 pixels
+
+From: Mikulas Patocka <mpatocka@redhat.com>
+
+commit 43751a1b8ee2e70ce392bf31ef3133da324e68b3 upstream.
+
+This patch fixes the hardware cursor on mach64 when font width is not a
+multiple of 8 pixels.
+
+If you load such a font, the cursor is expanded to the next 8-byte
+boundary and a part of the next character after the cursor is not
+visible.
+For example, when you load a font with 12-pixel width, the cursor width
+is 16 pixels and when the cursor is displayed, 4 pixels of the next
+character are not visible.
+
+The reason is this: atyfb_cursor is called with proper parameters to
+load an image that is 12-pixel wide. However, the number is aligned on
+the next 8-pixel boundary on the line
+"unsigned int width = (cursor->image.width + 7) >> 3;" and the whole
+function acts as it is was loading a 16-pixel image.
+
+This patch fixes it so that the value written to the framebuffer is
+padded with 0xaaaa (the transparent pattern) when the image size it not
+a multiple of 8 pixels. The transparent pattern causes that the cursor
+will not interfere with the next character.
+
+Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
+Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ti.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/video/aty/mach64_cursor.c | 22 ++++++++++++++++------
+ 1 file changed, 16 insertions(+), 6 deletions(-)
+
+--- a/drivers/video/aty/mach64_cursor.c
++++ b/drivers/video/aty/mach64_cursor.c
+@@ -5,6 +5,7 @@
+ #include <linux/fb.h>
+ #include <linux/init.h>
+ #include <linux/string.h>
++#include "../fb_draw.h"
+
+ #include <asm/io.h>
+
+@@ -157,24 +158,33 @@ static int atyfb_cursor(struct fb_info *
+
+ for (i = 0; i < height; i++) {
+ for (j = 0; j < width; j++) {
++ u16 l = 0xaaaa;
+ b = *src++;
+ m = *msk++;
+ switch (cursor->rop) {
+ case ROP_XOR:
+ // Upper 4 bits of mask data
+- fb_writeb(cursor_bits_lookup[(b ^ m) >> 4], dst++);
++ l = cursor_bits_lookup[(b ^ m) >> 4] |
+ // Lower 4 bits of mask
+- fb_writeb(cursor_bits_lookup[(b ^ m) & 0x0f],
+- dst++);
++ (cursor_bits_lookup[(b ^ m) & 0x0f] << 8);
+ break;
+ case ROP_COPY:
+ // Upper 4 bits of mask data
+- fb_writeb(cursor_bits_lookup[(b & m) >> 4], dst++);
++ l = cursor_bits_lookup[(b & m) >> 4] |
+ // Lower 4 bits of mask
+- fb_writeb(cursor_bits_lookup[(b & m) & 0x0f],
+- dst++);
++ (cursor_bits_lookup[(b & m) & 0x0f] << 8);
+ break;
+ }
++ /*
++ * If cursor size is not a multiple of 8 characters
++ * we must pad it with transparent pattern (0xaaaa).
++ */
++ if ((j + 1) * 8 > cursor->image.width) {
++ l = comp(l, 0xaaaa,
++ (1 << ((cursor->image.width & 7) * 2)) - 1);
++ }
++ fb_writeb(l & 0xff, dst++);
++ fb_writeb(l >> 8, dst++);
+ }
+ dst += offset;
+ }
--- /dev/null
+From c29dd8696dc5dbd50b3ac441b8a26751277ba520 Mon Sep 17 00:00:00 2001
+From: Mikulas Patocka <mpatocka@redhat.com>
+Date: Thu, 23 Jan 2014 14:41:09 -0500
+Subject: mach64: use unaligned access
+
+From: Mikulas Patocka <mpatocka@redhat.com>
+
+commit c29dd8696dc5dbd50b3ac441b8a26751277ba520 upstream.
+
+This patch fixes mach64 to use unaligned access to the font bitmap.
+
+This fixes unaligned access warning on sparc64 when 14x8 font is loaded.
+
+On x86(64), unaligned access is handled in hardware, so both functions
+le32_to_cpup and get_unaligned_le32 perform the same operation.
+
+On RISC machines, unaligned access is not handled in hardware, so we
+better use get_unaligned_le32 to avoid the unaligned trap and warning.
+
+Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
+Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ti.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/video/aty/mach64_accel.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/drivers/video/aty/mach64_accel.c
++++ b/drivers/video/aty/mach64_accel.c
+@@ -4,6 +4,7 @@
+ */
+
+ #include <linux/delay.h>
++#include <asm/unaligned.h>
+ #include <linux/fb.h>
+ #include <video/mach64.h>
+ #include "atyfb.h"
+@@ -419,7 +420,7 @@ void atyfb_imageblit(struct fb_info *inf
+ u32 *pbitmap, dwords = (src_bytes + 3) / 4;
+ for (pbitmap = (u32*)(image->data); dwords; dwords--, pbitmap++) {
+ wait_for_fifo(1, par);
+- aty_st_le32(HOST_DATA0, le32_to_cpup(pbitmap), par);
++ aty_st_le32(HOST_DATA0, get_unaligned_le32(pbitmap), par);
+ }
+ }
+
--- /dev/null
+From a772d4736641ec1b421ad965e13457c17379fc86 Mon Sep 17 00:00:00 2001
+From: Mikulas Patocka <mpatocka@redhat.com>
+Date: Thu, 23 Jan 2014 14:39:04 -0500
+Subject: matroxfb: restore the registers M_ACCESS and M_PITCH
+
+From: Mikulas Patocka <mpatocka@redhat.com>
+
+commit a772d4736641ec1b421ad965e13457c17379fc86 upstream.
+
+When X11 is running and the user switches back to console, the card
+modifies the content of registers M_MACCESS and M_PITCH in periodic
+intervals.
+
+This patch fixes it by restoring the content of these registers before
+issuing any accelerator command.
+
+Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
+Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ti.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/video/matrox/matroxfb_accel.c | 38 +++++++++++++++++++++++++---------
+ drivers/video/matrox/matroxfb_base.h | 2 +
+ 2 files changed, 30 insertions(+), 10 deletions(-)
+
+--- a/drivers/video/matrox/matroxfb_accel.c
++++ b/drivers/video/matrox/matroxfb_accel.c
+@@ -192,10 +192,18 @@ void matrox_cfbX_init(struct matrox_fb_i
+ minfo->accel.m_dwg_rect = M_DWG_TRAP | M_DWG_SOLID | M_DWG_ARZERO | M_DWG_SGNZERO | M_DWG_SHIFTZERO;
+ if (isMilleniumII(minfo)) minfo->accel.m_dwg_rect |= M_DWG_TRANSC;
+ minfo->accel.m_opmode = mopmode;
++ minfo->accel.m_access = maccess;
++ minfo->accel.m_pitch = mpitch;
+ }
+
+ EXPORT_SYMBOL(matrox_cfbX_init);
+
++static void matrox_accel_restore_maccess(struct matrox_fb_info *minfo)
++{
++ mga_outl(M_MACCESS, minfo->accel.m_access);
++ mga_outl(M_PITCH, minfo->accel.m_pitch);
++}
++
+ static void matrox_accel_bmove(struct matrox_fb_info *minfo, int vxres, int sy,
+ int sx, int dy, int dx, int height, int width)
+ {
+@@ -207,7 +215,8 @@ static void matrox_accel_bmove(struct ma
+ CRITBEGIN
+
+ if ((dy < sy) || ((dy == sy) && (dx <= sx))) {
+- mga_fifo(2);
++ mga_fifo(4);
++ matrox_accel_restore_maccess(minfo);
+ mga_outl(M_DWGCTL, M_DWG_BITBLT | M_DWG_SHIFTZERO | M_DWG_SGNZERO |
+ M_DWG_BFCOL | M_DWG_REPLACE);
+ mga_outl(M_AR5, vxres);
+@@ -215,7 +224,8 @@ static void matrox_accel_bmove(struct ma
+ start = sy*vxres+sx+curr_ydstorg(minfo);
+ end = start+width;
+ } else {
+- mga_fifo(3);
++ mga_fifo(5);
++ matrox_accel_restore_maccess(minfo);
+ mga_outl(M_DWGCTL, M_DWG_BITBLT | M_DWG_SHIFTZERO | M_DWG_BFCOL | M_DWG_REPLACE);
+ mga_outl(M_SGN, 5);
+ mga_outl(M_AR5, -vxres);
+@@ -224,7 +234,8 @@ static void matrox_accel_bmove(struct ma
+ start = end+width;
+ dy += height-1;
+ }
+- mga_fifo(4);
++ mga_fifo(6);
++ matrox_accel_restore_maccess(minfo);
+ mga_outl(M_AR0, end);
+ mga_outl(M_AR3, start);
+ mga_outl(M_FXBNDRY, ((dx+width)<<16) | dx);
+@@ -246,7 +257,8 @@ static void matrox_accel_bmove_lin(struc
+ CRITBEGIN
+
+ if ((dy < sy) || ((dy == sy) && (dx <= sx))) {
+- mga_fifo(2);
++ mga_fifo(4);
++ matrox_accel_restore_maccess(minfo);
+ mga_outl(M_DWGCTL, M_DWG_BITBLT | M_DWG_SHIFTZERO | M_DWG_SGNZERO |
+ M_DWG_BFCOL | M_DWG_REPLACE);
+ mga_outl(M_AR5, vxres);
+@@ -254,7 +266,8 @@ static void matrox_accel_bmove_lin(struc
+ start = sy*vxres+sx+curr_ydstorg(minfo);
+ end = start+width;
+ } else {
+- mga_fifo(3);
++ mga_fifo(5);
++ matrox_accel_restore_maccess(minfo);
+ mga_outl(M_DWGCTL, M_DWG_BITBLT | M_DWG_SHIFTZERO | M_DWG_BFCOL | M_DWG_REPLACE);
+ mga_outl(M_SGN, 5);
+ mga_outl(M_AR5, -vxres);
+@@ -263,7 +276,8 @@ static void matrox_accel_bmove_lin(struc
+ start = end+width;
+ dy += height-1;
+ }
+- mga_fifo(5);
++ mga_fifo(7);
++ matrox_accel_restore_maccess(minfo);
+ mga_outl(M_AR0, end);
+ mga_outl(M_AR3, start);
+ mga_outl(M_FXBNDRY, ((dx+width)<<16) | dx);
+@@ -298,7 +312,8 @@ static void matroxfb_accel_clear(struct
+
+ CRITBEGIN
+
+- mga_fifo(5);
++ mga_fifo(7);
++ matrox_accel_restore_maccess(minfo);
+ mga_outl(M_DWGCTL, minfo->accel.m_dwg_rect | M_DWG_REPLACE);
+ mga_outl(M_FCOL, color);
+ mga_outl(M_FXBNDRY, ((sx + width) << 16) | sx);
+@@ -341,7 +356,8 @@ static void matroxfb_cfb4_clear(struct m
+ width >>= 1;
+ sx >>= 1;
+ if (width) {
+- mga_fifo(5);
++ mga_fifo(7);
++ matrox_accel_restore_maccess(minfo);
+ mga_outl(M_DWGCTL, minfo->accel.m_dwg_rect | M_DWG_REPLACE2);
+ mga_outl(M_FCOL, bgx);
+ mga_outl(M_FXBNDRY, ((sx + width) << 16) | sx);
+@@ -415,7 +431,8 @@ static void matroxfb_1bpp_imageblit(stru
+
+ CRITBEGIN
+
+- mga_fifo(3);
++ mga_fifo(5);
++ matrox_accel_restore_maccess(minfo);
+ if (easy)
+ mga_outl(M_DWGCTL, M_DWG_ILOAD | M_DWG_SGNZERO | M_DWG_SHIFTZERO | M_DWG_BMONOWF | M_DWG_LINEAR | M_DWG_REPLACE);
+ else
+@@ -425,7 +442,8 @@ static void matroxfb_1bpp_imageblit(stru
+ fxbndry = ((xx + width - 1) << 16) | xx;
+ mmio = minfo->mmio.vbase;
+
+- mga_fifo(6);
++ mga_fifo(8);
++ matrox_accel_restore_maccess(minfo);
+ mga_writel(mmio, M_FXBNDRY, fxbndry);
+ mga_writel(mmio, M_AR0, ar0);
+ mga_writel(mmio, M_AR3, 0);
+--- a/drivers/video/matrox/matroxfb_base.h
++++ b/drivers/video/matrox/matroxfb_base.h
+@@ -307,6 +307,8 @@ struct matrox_accel_data {
+ #endif
+ u_int32_t m_dwg_rect;
+ u_int32_t m_opmode;
++ u_int32_t m_access;
++ u_int32_t m_pitch;
+ };
+
+ struct v4l2_queryctrl;
mips-hibernate-flush-tlb-entries-in-swsusp_arch_resume.patch
virtio_balloon-don-t-softlockup-on-huge-balloon-changes.patch
mpt2sas-don-t-disable-device-twice-at-suspend.patch
+crypto-ghash-clmulni-intel-use-c-implementation-for-setkey.patch
+framebuffer-fix-cfb_copyarea.patch
+matroxfb-restore-the-registers-m_access-and-m_pitch.patch
+mach64-use-unaligned-access.patch
+mach64-fix-cursor-when-character-width-is-not-a-multiple-of-8-pixels.patch
+b43-fix-machine-check-error-due-to-improper-access-of-b43_mmio_psm_phy_hdr.patch
+libata-ahci-accommodate-tag-ordered-controllers.patch