From b85de3fa436071bf8313c8f585fcf6af42f4fbf0 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Tue, 6 May 2014 15:52:57 -0700
Subject: [PATCH] 3.4-stable patches

added patches:
	b43-fix-machine-check-error-due-to-improper-access-of-b43_mmio_psm_phy_hdr.patch
	crypto-ghash-clmulni-intel-use-c-implementation-for-setkey.patch
	framebuffer-fix-cfb_copyarea.patch
	libata-ahci-accommodate-tag-ordered-controllers.patch
	mach64-fix-cursor-when-character-width-is-not-a-multiple-of-8-pixels.patch
	mach64-use-unaligned-access.patch
	matroxfb-restore-the-registers-m_access-and-m_pitch.patch
---
 ...roper-access-of-b43_mmio_psm_phy_hdr.patch |  65 +++
 ...ntel-use-c-implementation-for-setkey.patch | 105 +++++
 queue-3.4/framebuffer-fix-cfb_copyarea.patch  | 400 ++++++++++++++++++
 ...-accommodate-tag-ordered-controllers.patch |  96 +++++
 ...-width-is-not-a-multiple-of-8-pixels.patch |  88 ++++
 queue-3.4/mach64-use-unaligned-access.patch   |  46 ++
 ...e-the-registers-m_access-and-m_pitch.patch | 157 +++++++
 queue-3.4/series                              |   7 +
 8 files changed, 964 insertions(+)
 create mode 100644 queue-3.4/b43-fix-machine-check-error-due-to-improper-access-of-b43_mmio_psm_phy_hdr.patch
 create mode 100644 queue-3.4/crypto-ghash-clmulni-intel-use-c-implementation-for-setkey.patch
 create mode 100644 queue-3.4/framebuffer-fix-cfb_copyarea.patch
 create mode 100644 queue-3.4/libata-ahci-accommodate-tag-ordered-controllers.patch
 create mode 100644 queue-3.4/mach64-fix-cursor-when-character-width-is-not-a-multiple-of-8-pixels.patch
 create mode 100644 queue-3.4/mach64-use-unaligned-access.patch
 create mode 100644 queue-3.4/matroxfb-restore-the-registers-m_access-and-m_pitch.patch

diff --git a/queue-3.4/b43-fix-machine-check-error-due-to-improper-access-of-b43_mmio_psm_phy_hdr.patch b/queue-3.4/b43-fix-machine-check-error-due-to-improper-access-of-b43_mmio_psm_phy_hdr.patch
new file mode 100644
index 00000000000..86f3381c5a7
--- /dev/null
+++ b/queue-3.4/b43-fix-machine-check-error-due-to-improper-access-of-b43_mmio_psm_phy_hdr.patch
@@ -0,0 +1,65 @@
+From 12cd43c6ed6da7bf7c5afbd74da6959cda6d056b Mon Sep 17 00:00:00 2001
+From: RafaÅ MiÅecki <zajec5@gmail.com>
+Date: Sat, 5 Apr 2014 18:08:25 +0200
+Subject: b43: Fix machine check error due to improper access of B43_MMIO_PSM_PHY_HDR
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: RafaÅ MiÅecki <zajec5@gmail.com>
+
+commit 12cd43c6ed6da7bf7c5afbd74da6959cda6d056b upstream.
+
+Register B43_MMIO_PSM_PHY_HDR is 16 bit one, so accessing it with 32b
+functions isn't safe. On my machine it causes delayed (!) CPU exception:
+
+Disabling lock debugging due to kernel taint
+mce: [Hardware Error]: CPU 0: Machine Check Exception: 4 Bank 4: b200000000070f0f
+mce: [Hardware Error]: TSC 164083803dc
+mce: [Hardware Error]: PROCESSOR 2:20fc2 TIME 1396650505 SOCKET 0 APIC 0 microcode 0
+mce: [Hardware Error]: Run the above through 'mcelog --ascii'
+mce: [Hardware Error]: Machine check: Processor context corrupt
+Kernel panic - not syncing: Fatal machine check on current CPU
+Kernel Offset: 0x0 from 0xffffffff81000000 (relocation range: 0xffffffff80000000-0xffffffff9fffffff)
+
+Signed-off-by: RafaÅ MiÅecki <zajec5@gmail.com>
+Acked-by: Larry Finger <Larry.Finger@lwfinger.net>
+Signed-off-by: John W. Linville <linville@tuxdriver.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/net/wireless/b43/phy_n.c |   14 +++++++-------
+ 1 file changed, 7 insertions(+), 7 deletions(-)
+
+--- a/drivers/net/wireless/b43/phy_n.c
++++ b/drivers/net/wireless/b43/phy_n.c
+@@ -4599,22 +4599,22 @@ static void b43_nphy_channel_setup(struc
+ 	int ch = new_channel->hw_value;
+ 
+ 	u16 old_band_5ghz;
+-	u32 tmp32;
++	u16 tmp16;
+ 
+ 	old_band_5ghz =
+ 		b43_phy_read(dev, B43_NPHY_BANDCTL) & B43_NPHY_BANDCTL_5GHZ;
+ 	if (new_channel->band == IEEE80211_BAND_5GHZ && !old_band_5ghz) {
+-		tmp32 = b43_read32(dev, B43_MMIO_PSM_PHY_HDR);
+-		b43_write32(dev, B43_MMIO_PSM_PHY_HDR, tmp32 | 4);
++		tmp16 = b43_read16(dev, B43_MMIO_PSM_PHY_HDR);
++		b43_write16(dev, B43_MMIO_PSM_PHY_HDR, tmp16 | 4);
+ 		b43_phy_set(dev, B43_PHY_B_BBCFG, 0xC000);
+-		b43_write32(dev, B43_MMIO_PSM_PHY_HDR, tmp32);
++		b43_write16(dev, B43_MMIO_PSM_PHY_HDR, tmp16);
+ 		b43_phy_set(dev, B43_NPHY_BANDCTL, B43_NPHY_BANDCTL_5GHZ);
+ 	} else if (new_channel->band == IEEE80211_BAND_2GHZ && old_band_5ghz) {
+ 		b43_phy_mask(dev, B43_NPHY_BANDCTL, ~B43_NPHY_BANDCTL_5GHZ);
+-		tmp32 = b43_read32(dev, B43_MMIO_PSM_PHY_HDR);
+-		b43_write32(dev, B43_MMIO_PSM_PHY_HDR, tmp32 | 4);
++		tmp16 = b43_read16(dev, B43_MMIO_PSM_PHY_HDR);
++		b43_write16(dev, B43_MMIO_PSM_PHY_HDR, tmp16 | 4);
+ 		b43_phy_mask(dev, B43_PHY_B_BBCFG, 0x3FFF);
+-		b43_write32(dev, B43_MMIO_PSM_PHY_HDR, tmp32);
++		b43_write16(dev, B43_MMIO_PSM_PHY_HDR, tmp16);
+ 	}
+ 
+ 	b43_chantab_phy_upload(dev, e);
diff --git a/queue-3.4/crypto-ghash-clmulni-intel-use-c-implementation-for-setkey.patch b/queue-3.4/crypto-ghash-clmulni-intel-use-c-implementation-for-setkey.patch
new file mode 100644
index 00000000000..9abd16b5f22
--- /dev/null
+++ b/queue-3.4/crypto-ghash-clmulni-intel-use-c-implementation-for-setkey.patch
@@ -0,0 +1,105 @@
+From 8ceee72808d1ae3fb191284afc2257a2be964725 Mon Sep 17 00:00:00 2001
+From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
+Date: Thu, 27 Mar 2014 18:14:40 +0100
+Subject: crypto: ghash-clmulni-intel - use C implementation for setkey()
+
+From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
+
+commit 8ceee72808d1ae3fb191284afc2257a2be964725 upstream.
+
+The GHASH setkey() function uses SSE registers but fails to call
+kernel_fpu_begin()/kernel_fpu_end(). Instead of adding these calls, and
+then having to deal with the restriction that they cannot be called from
+interrupt context, move the setkey() implementation to the C domain.
+
+Note that setkey() does not use any particular SSE features and is not
+expected to become a performance bottleneck.
+
+Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
+Acked-by: H. Peter Anvin <hpa@linux.intel.com>
+Fixes: 0e1227d356e9b (crypto: ghash - Add PCLMULQDQ accelerated implementation)
+Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/crypto/ghash-clmulni-intel_asm.S  |   28 ----------------------------
+ arch/x86/crypto/ghash-clmulni-intel_glue.c |   14 +++++++++++---
+ 2 files changed, 11 insertions(+), 31 deletions(-)
+
+--- a/arch/x86/crypto/ghash-clmulni-intel_asm.S
++++ b/arch/x86/crypto/ghash-clmulni-intel_asm.S
+@@ -24,10 +24,6 @@
+ .align 16
+ .Lbswap_mask:
+ 	.octa 0x000102030405060708090a0b0c0d0e0f
+-.Lpoly:
+-	.octa 0xc2000000000000000000000000000001
+-.Ltwo_one:
+-	.octa 0x00000001000000000000000000000001
+ 
+ #define DATA	%xmm0
+ #define SHASH	%xmm1
+@@ -131,27 +127,3 @@ ENTRY(clmul_ghash_update)
+ 	movups DATA, (%rdi)
+ .Lupdate_just_ret:
+ 	ret
+-
+-/*
+- * void clmul_ghash_setkey(be128 *shash, const u8 *key);
+- *
+- * Calculate hash_key << 1 mod poly
+- */
+-ENTRY(clmul_ghash_setkey)
+-	movaps .Lbswap_mask, BSWAP
+-	movups (%rsi), %xmm0
+-	PSHUFB_XMM BSWAP %xmm0
+-	movaps %xmm0, %xmm1
+-	psllq $1, %xmm0
+-	psrlq $63, %xmm1
+-	movaps %xmm1, %xmm2
+-	pslldq $8, %xmm1
+-	psrldq $8, %xmm2
+-	por %xmm1, %xmm0
+-	# reduction
+-	pshufd $0b00100100, %xmm2, %xmm1
+-	pcmpeqd .Ltwo_one, %xmm1
+-	pand .Lpoly, %xmm1
+-	pxor %xmm1, %xmm0
+-	movups %xmm0, (%rdi)
+-	ret
+--- a/arch/x86/crypto/ghash-clmulni-intel_glue.c
++++ b/arch/x86/crypto/ghash-clmulni-intel_glue.c
+@@ -30,8 +30,6 @@ void clmul_ghash_mul(char *dst, const be
+ void clmul_ghash_update(char *dst, const char *src, unsigned int srclen,
+ 			const be128 *shash);
+ 
+-void clmul_ghash_setkey(be128 *shash, const u8 *key);
+-
+ struct ghash_async_ctx {
+ 	struct cryptd_ahash *cryptd_tfm;
+ };
+@@ -58,13 +56,23 @@ static int ghash_setkey(struct crypto_sh
+ 			const u8 *key, unsigned int keylen)
+ {
+ 	struct ghash_ctx *ctx = crypto_shash_ctx(tfm);
++	be128 *x = (be128 *)key;
++	u64 a, b;
+ 
+ 	if (keylen != GHASH_BLOCK_SIZE) {
+ 		crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+ 		return -EINVAL;
+ 	}
+ 
+-	clmul_ghash_setkey(&ctx->shash, key);
++	/* perform multiplication by 'x' in GF(2^128) */
++	a = be64_to_cpu(x->a);
++	b = be64_to_cpu(x->b);
++
++	ctx->shash.a = (__be64)((b << 1) | (a >> 63));
++	ctx->shash.b = (__be64)((a << 1) | (b >> 63));
++
++	if (a >> 63)
++		ctx->shash.b ^= cpu_to_be64(0xc2);
+ 
+ 	return 0;
+ }
diff --git a/queue-3.4/framebuffer-fix-cfb_copyarea.patch b/queue-3.4/framebuffer-fix-cfb_copyarea.patch
new file mode 100644
index 00000000000..3513e07dec5
--- /dev/null
+++ b/queue-3.4/framebuffer-fix-cfb_copyarea.patch
@@ -0,0 +1,400 @@
+From 00a9d699bc85052d2d3ed56251cd928024ce06a3 Mon Sep 17 00:00:00 2001
+From: Mikulas Patocka <mpatocka@redhat.com>
+Date: Thu, 23 Jan 2014 14:39:29 -0500
+Subject: framebuffer: fix cfb_copyarea
+
+From: Mikulas Patocka <mpatocka@redhat.com>
+
+commit 00a9d699bc85052d2d3ed56251cd928024ce06a3 upstream.
+
+The function cfb_copyarea is buggy when the copy operation is not aligned on
+long boundary (4 bytes on 32-bit machines, 8 bytes on 64-bit machines).
+
+How to reproduce:
+- use x86-64 machine
+- use a framebuffer driver without acceleration (for example uvesafb)
+- set the framebuffer to 8-bit depth
+	(for example fbset -a 1024x768-60 -depth 8)
+- load a font with character width that is not a multiple of 8 pixels
+	note: the console-tools package cannot load a font that has
+	width different from 8 pixels. You need to install the packages
+	"kbd" and "console-terminus" and use the program "setfont" to
+	set font width (for example: setfont Uni2-Terminus20x10)
+- move some text left and right on the bash command line and you get a
+	screen corruption
+
+To expose more bugs, put this line to the end of uvesafb_init_info:
+info->flags |= FBINFO_HWACCEL_COPYAREA | FBINFO_READS_FAST;
+- Now framebuffer console will use cfb_copyarea for console scrolling.
+You get a screen corruption when console is scrolled.
+
+This patch is a rewrite of cfb_copyarea. It fixes the bugs, with this
+patch, console scrolling in 8-bit depth with a font width that is not a
+multiple of 8 pixels works fine.
+
+The cfb_copyarea code was very buggy and it looks like it was written
+and never tried with non-8-pixel font.
+
+Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
+Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ti.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/video/cfbcopyarea.c |  153 ++++++++++++++++++++++----------------------
+ 1 file changed, 78 insertions(+), 75 deletions(-)
+
+--- a/drivers/video/cfbcopyarea.c
++++ b/drivers/video/cfbcopyarea.c
+@@ -43,13 +43,22 @@
+      */
+ 
+ static void
+-bitcpy(struct fb_info *p, unsigned long __iomem *dst, int dst_idx,
+-		const unsigned long __iomem *src, int src_idx, int bits,
++bitcpy(struct fb_info *p, unsigned long __iomem *dst, unsigned dst_idx,
++		const unsigned long __iomem *src, unsigned src_idx, int bits,
+ 		unsigned n, u32 bswapmask)
+ {
+ 	unsigned long first, last;
+ 	int const shift = dst_idx-src_idx;
+-	int left, right;
++
++#if 0
++	/*
++	 * If you suspect bug in this function, compare it with this simple
++	 * memmove implementation.
++	 */
++	fb_memmove((char *)dst + ((dst_idx & (bits - 1))) / 8,
++		   (char *)src + ((src_idx & (bits - 1))) / 8, n / 8);
++	return;
++#endif
+ 
+ 	first = fb_shifted_pixels_mask_long(p, dst_idx, bswapmask);
+ 	last = ~fb_shifted_pixels_mask_long(p, (dst_idx+n) % bits, bswapmask);
+@@ -98,9 +107,8 @@ bitcpy(struct fb_info *p, unsigned long
+ 		unsigned long d0, d1;
+ 		int m;
+ 
+-		right = shift & (bits - 1);
+-		left = -shift & (bits - 1);
+-		bswapmask &= shift;
++		int const left = shift & (bits - 1);
++		int const right = -shift & (bits - 1);
+ 
+ 		if (dst_idx+n <= bits) {
+ 			// Single destination word
+@@ -110,15 +118,15 @@ bitcpy(struct fb_info *p, unsigned long
+ 			d0 = fb_rev_pixels_in_long(d0, bswapmask);
+ 			if (shift > 0) {
+ 				// Single source word
+-				d0 >>= right;
++				d0 <<= left;
+ 			} else if (src_idx+n <= bits) {
+ 				// Single source word
+-				d0 <<= left;
++				d0 >>= right;
+ 			} else {
+ 				// 2 source words
+ 				d1 = FB_READL(src + 1);
+ 				d1 = fb_rev_pixels_in_long(d1, bswapmask);
+-				d0 = d0<<left | d1>>right;
++				d0 = d0 >> right | d1 << left;
+ 			}
+ 			d0 = fb_rev_pixels_in_long(d0, bswapmask);
+ 			FB_WRITEL(comp(d0, FB_READL(dst), first), dst);
+@@ -135,60 +143,59 @@ bitcpy(struct fb_info *p, unsigned long
+ 			if (shift > 0) {
+ 				// Single source word
+ 				d1 = d0;
+-				d0 >>= right;
+-				dst++;
++				d0 <<= left;
+ 				n -= bits - dst_idx;
+ 			} else {
+ 				// 2 source words
+ 				d1 = FB_READL(src++);
+ 				d1 = fb_rev_pixels_in_long(d1, bswapmask);
+ 
+-				d0 = d0<<left | d1>>right;
+-				dst++;
++				d0 = d0 >> right | d1 << left;
+ 				n -= bits - dst_idx;
+ 			}
+ 			d0 = fb_rev_pixels_in_long(d0, bswapmask);
+ 			FB_WRITEL(comp(d0, FB_READL(dst), first), dst);
+ 			d0 = d1;
++			dst++;
+ 
+ 			// Main chunk
+ 			m = n % bits;
+ 			n /= bits;
+ 			while ((n >= 4) && !bswapmask) {
+ 				d1 = FB_READL(src++);
+-				FB_WRITEL(d0 << left | d1 >> right, dst++);
++				FB_WRITEL(d0 >> right | d1 << left, dst++);
+ 				d0 = d1;
+ 				d1 = FB_READL(src++);
+-				FB_WRITEL(d0 << left | d1 >> right, dst++);
++				FB_WRITEL(d0 >> right | d1 << left, dst++);
+ 				d0 = d1;
+ 				d1 = FB_READL(src++);
+-				FB_WRITEL(d0 << left | d1 >> right, dst++);
++				FB_WRITEL(d0 >> right | d1 << left, dst++);
+ 				d0 = d1;
+ 				d1 = FB_READL(src++);
+-				FB_WRITEL(d0 << left | d1 >> right, dst++);
++				FB_WRITEL(d0 >> right | d1 << left, dst++);
+ 				d0 = d1;
+ 				n -= 4;
+ 			}
+ 			while (n--) {
+ 				d1 = FB_READL(src++);
+ 				d1 = fb_rev_pixels_in_long(d1, bswapmask);
+-				d0 = d0 << left | d1 >> right;
++				d0 = d0 >> right | d1 << left;
+ 				d0 = fb_rev_pixels_in_long(d0, bswapmask);
+ 				FB_WRITEL(d0, dst++);
+ 				d0 = d1;
+ 			}
+ 
+ 			// Trailing bits
+-			if (last) {
+-				if (m <= right) {
++			if (m) {
++				if (m <= bits - right) {
+ 					// Single source word
+-					d0 <<= left;
++					d0 >>= right;
+ 				} else {
+ 					// 2 source words
+ 					d1 = FB_READL(src);
+ 					d1 = fb_rev_pixels_in_long(d1,
+ 								bswapmask);
+-					d0 = d0<<left | d1>>right;
++					d0 = d0 >> right | d1 << left;
+ 				}
+ 				d0 = fb_rev_pixels_in_long(d0, bswapmask);
+ 				FB_WRITEL(comp(d0, FB_READL(dst), last), dst);
+@@ -202,43 +209,46 @@ bitcpy(struct fb_info *p, unsigned long
+      */
+ 
+ static void
+-bitcpy_rev(struct fb_info *p, unsigned long __iomem *dst, int dst_idx,
+-		const unsigned long __iomem *src, int src_idx, int bits,
++bitcpy_rev(struct fb_info *p, unsigned long __iomem *dst, unsigned dst_idx,
++		const unsigned long __iomem *src, unsigned src_idx, int bits,
+ 		unsigned n, u32 bswapmask)
+ {
+ 	unsigned long first, last;
+ 	int shift;
+ 
+-	dst += (n-1)/bits;
+-	src += (n-1)/bits;
+-	if ((n-1) % bits) {
+-		dst_idx += (n-1) % bits;
+-		dst += dst_idx >> (ffs(bits) - 1);
+-		dst_idx &= bits - 1;
+-		src_idx += (n-1) % bits;
+-		src += src_idx >> (ffs(bits) - 1);
+-		src_idx &= bits - 1;
+-	}
++#if 0
++	/*
++	 * If you suspect bug in this function, compare it with this simple
++	 * memmove implementation.
++	 */
++	fb_memmove((char *)dst + ((dst_idx & (bits - 1))) / 8,
++		   (char *)src + ((src_idx & (bits - 1))) / 8, n / 8);
++	return;
++#endif
++
++	dst += (dst_idx + n - 1) / bits;
++	src += (src_idx + n - 1) / bits;
++	dst_idx = (dst_idx + n - 1) % bits;
++	src_idx = (src_idx + n - 1) % bits;
+ 
+ 	shift = dst_idx-src_idx;
+ 
+-	first = fb_shifted_pixels_mask_long(p, bits - 1 - dst_idx, bswapmask);
+-	last = ~fb_shifted_pixels_mask_long(p, bits - 1 - ((dst_idx-n) % bits),
+-					    bswapmask);
++	first = ~fb_shifted_pixels_mask_long(p, (dst_idx + 1) % bits, bswapmask);
++	last = fb_shifted_pixels_mask_long(p, (bits + dst_idx + 1 - n) % bits, bswapmask);
+ 
+ 	if (!shift) {
+ 		// Same alignment for source and dest
+ 
+ 		if ((unsigned long)dst_idx+1 >= n) {
+ 			// Single word
+-			if (last)
+-				first &= last;
+-			FB_WRITEL( comp( FB_READL(src), FB_READL(dst), first), dst);
++			if (first)
++				last &= first;
++			FB_WRITEL( comp( FB_READL(src), FB_READL(dst), last), dst);
+ 		} else {
+ 			// Multiple destination words
+ 
+ 			// Leading bits
+-			if (first != ~0UL) {
++			if (first) {
+ 				FB_WRITEL( comp( FB_READL(src), FB_READL(dst), first), dst);
+ 				dst--;
+ 				src--;
+@@ -262,7 +272,7 @@ bitcpy_rev(struct fb_info *p, unsigned l
+ 				FB_WRITEL(FB_READL(src--), dst--);
+ 
+ 			// Trailing bits
+-			if (last)
++			if (last != -1UL)
+ 				FB_WRITEL( comp( FB_READL(src), FB_READL(dst), last), dst);
+ 		}
+ 	} else {
+@@ -270,29 +280,28 @@ bitcpy_rev(struct fb_info *p, unsigned l
+ 		unsigned long d0, d1;
+ 		int m;
+ 
+-		int const left = -shift & (bits-1);
+-		int const right = shift & (bits-1);
+-		bswapmask &= shift;
++		int const left = shift & (bits-1);
++		int const right = -shift & (bits-1);
+ 
+ 		if ((unsigned long)dst_idx+1 >= n) {
+ 			// Single destination word
+-			if (last)
+-				first &= last;
++			if (first)
++				last &= first;
+ 			d0 = FB_READL(src);
+ 			if (shift < 0) {
+ 				// Single source word
+-				d0 <<= left;
++				d0 >>= right;
+ 			} else if (1+(unsigned long)src_idx >= n) {
+ 				// Single source word
+-				d0 >>= right;
++				d0 <<= left;
+ 			} else {
+ 				// 2 source words
+ 				d1 = FB_READL(src - 1);
+ 				d1 = fb_rev_pixels_in_long(d1, bswapmask);
+-				d0 = d0>>right | d1<<left;
++				d0 = d0 << left | d1 >> right;
+ 			}
+ 			d0 = fb_rev_pixels_in_long(d0, bswapmask);
+-			FB_WRITEL(comp(d0, FB_READL(dst), first), dst);
++			FB_WRITEL(comp(d0, FB_READL(dst), last), dst);
+ 		} else {
+ 			// Multiple destination words
+ 			/** We must always remember the last value read, because in case
+@@ -307,12 +316,12 @@ bitcpy_rev(struct fb_info *p, unsigned l
+ 			if (shift < 0) {
+ 				// Single source word
+ 				d1 = d0;
+-				d0 <<= left;
++				d0 >>= right;
+ 			} else {
+ 				// 2 source words
+ 				d1 = FB_READL(src--);
+ 				d1 = fb_rev_pixels_in_long(d1, bswapmask);
+-				d0 = d0>>right | d1<<left;
++				d0 = d0 << left | d1 >> right;
+ 			}
+ 			d0 = fb_rev_pixels_in_long(d0, bswapmask);
+ 			FB_WRITEL(comp(d0, FB_READL(dst), first), dst);
+@@ -325,39 +334,39 @@ bitcpy_rev(struct fb_info *p, unsigned l
+ 			n /= bits;
+ 			while ((n >= 4) && !bswapmask) {
+ 				d1 = FB_READL(src--);
+-				FB_WRITEL(d0 >> right | d1 << left, dst--);
++				FB_WRITEL(d0 << left | d1 >> right, dst--);
+ 				d0 = d1;
+ 				d1 = FB_READL(src--);
+-				FB_WRITEL(d0 >> right | d1 << left, dst--);
++				FB_WRITEL(d0 << left | d1 >> right, dst--);
+ 				d0 = d1;
+ 				d1 = FB_READL(src--);
+-				FB_WRITEL(d0 >> right | d1 << left, dst--);
++				FB_WRITEL(d0 << left | d1 >> right, dst--);
+ 				d0 = d1;
+ 				d1 = FB_READL(src--);
+-				FB_WRITEL(d0 >> right | d1 << left, dst--);
++				FB_WRITEL(d0 << left | d1 >> right, dst--);
+ 				d0 = d1;
+ 				n -= 4;
+ 			}
+ 			while (n--) {
+ 				d1 = FB_READL(src--);
+ 				d1 = fb_rev_pixels_in_long(d1, bswapmask);
+-				d0 = d0 >> right | d1 << left;
++				d0 = d0 << left | d1 >> right;
+ 				d0 = fb_rev_pixels_in_long(d0, bswapmask);
+ 				FB_WRITEL(d0, dst--);
+ 				d0 = d1;
+ 			}
+ 
+ 			// Trailing bits
+-			if (last) {
+-				if (m <= left) {
++			if (m) {
++				if (m <= bits - left) {
+ 					// Single source word
+-					d0 >>= right;
++					d0 <<= left;
+ 				} else {
+ 					// 2 source words
+ 					d1 = FB_READL(src);
+ 					d1 = fb_rev_pixels_in_long(d1,
+ 								bswapmask);
+-					d0 = d0>>right | d1<<left;
++					d0 = d0 << left | d1 >> right;
+ 				}
+ 				d0 = fb_rev_pixels_in_long(d0, bswapmask);
+ 				FB_WRITEL(comp(d0, FB_READL(dst), last), dst);
+@@ -371,9 +380,9 @@ void cfb_copyarea(struct fb_info *p, con
+ 	u32 dx = area->dx, dy = area->dy, sx = area->sx, sy = area->sy;
+ 	u32 height = area->height, width = area->width;
+ 	unsigned long const bits_per_line = p->fix.line_length*8u;
+-	unsigned long __iomem *dst = NULL, *src = NULL;
++	unsigned long __iomem *base = NULL;
+ 	int bits = BITS_PER_LONG, bytes = bits >> 3;
+-	int dst_idx = 0, src_idx = 0, rev_copy = 0;
++	unsigned dst_idx = 0, src_idx = 0, rev_copy = 0;
+ 	u32 bswapmask = fb_compute_bswapmask(p);
+ 
+ 	if (p->state != FBINFO_STATE_RUNNING)
+@@ -389,7 +398,7 @@ void cfb_copyarea(struct fb_info *p, con
+ 
+ 	// split the base of the framebuffer into a long-aligned address and the
+ 	// index of the first bit
+-	dst = src = (unsigned long __iomem *)((unsigned long)p->screen_base & ~(bytes-1));
++	base = (unsigned long __iomem *)((unsigned long)p->screen_base & ~(bytes-1));
+ 	dst_idx = src_idx = 8*((unsigned long)p->screen_base & (bytes-1));
+ 	// add offset of source and target area
+ 	dst_idx += dy*bits_per_line + dx*p->var.bits_per_pixel;
+@@ -402,20 +411,14 @@ void cfb_copyarea(struct fb_info *p, con
+ 		while (height--) {
+ 			dst_idx -= bits_per_line;
+ 			src_idx -= bits_per_line;
+-			dst += dst_idx >> (ffs(bits) - 1);
+-			dst_idx &= (bytes - 1);
+-			src += src_idx >> (ffs(bits) - 1);
+-			src_idx &= (bytes - 1);
+-			bitcpy_rev(p, dst, dst_idx, src, src_idx, bits,
++			bitcpy_rev(p, base + (dst_idx / bits), dst_idx % bits,
++				base + (src_idx / bits), src_idx % bits, bits,
+ 				width*p->var.bits_per_pixel, bswapmask);
+ 		}
+ 	} else {
+ 		while (height--) {
+-			dst += dst_idx >> (ffs(bits) - 1);
+-			dst_idx &= (bytes - 1);
+-			src += src_idx >> (ffs(bits) - 1);
+-			src_idx &= (bytes - 1);
+-			bitcpy(p, dst, dst_idx, src, src_idx, bits,
++			bitcpy(p, base + (dst_idx / bits), dst_idx % bits,
++				base + (src_idx / bits), src_idx % bits, bits,
+ 				width*p->var.bits_per_pixel, bswapmask);
+ 			dst_idx += bits_per_line;
+ 			src_idx += bits_per_line;
diff --git a/queue-3.4/libata-ahci-accommodate-tag-ordered-controllers.patch b/queue-3.4/libata-ahci-accommodate-tag-ordered-controllers.patch
new file mode 100644
index 00000000000..88dbfdbffb5
--- /dev/null
+++ b/queue-3.4/libata-ahci-accommodate-tag-ordered-controllers.patch
@@ -0,0 +1,96 @@
+From 8a4aeec8d2d6a3edeffbdfae451cdf05cbf0fefd Mon Sep 17 00:00:00 2001
+From: Dan Williams <dan.j.williams@intel.com>
+Date: Thu, 17 Apr 2014 11:48:21 -0700
+Subject: libata/ahci: accommodate tag ordered controllers
+
+From: Dan Williams <dan.j.williams@intel.com>
+
+commit 8a4aeec8d2d6a3edeffbdfae451cdf05cbf0fefd upstream.
+
+The AHCI spec allows implementations to issue commands in tag order
+rather than FIFO order:
+
+	5.3.2.12 P:SelectCmd
+	HBA sets pSlotLoc = (pSlotLoc + 1) mod (CAP.NCS + 1)
+	or HBA selects the command to issue that has had the
+	PxCI bit set to '1' longer than any other command
+	pending to be issued.
+
+The result is that commands posted sequentially (time-wise) may play out
+of sequence when issued by hardware.
+
+This behavior has likely been hidden by drives that arrange for commands
+to complete in issue order.  However, it appears recent drives (two from
+different vendors that we have found so far) inflict out-of-order
+completions as a matter of course.  So, we need to take care to maintain
+ordered submission, otherwise we risk triggering a drive to fall out of
+sequential-io automation and back to random-io processing, which incurs
+large latency and degrades throughput.
+
+This issue was found in simple benchmarks where QD=2 seq-write
+performance was 30-50% *greater* than QD=32 seq-write performance.
+
+Tagging for -stable and making the change globally since it has a low
+risk-to-reward ratio.  Also, word is that recent versions of an unnamed
+OS also does it this way now.  So, drives in the field are already
+experienced with this tag ordering scheme.
+
+Cc: Dave Jiang <dave.jiang@intel.com>
+Cc: Ed Ciechanowski <ed.ciechanowski@intel.com>
+Reviewed-by: Matthew Wilcox <matthew.r.wilcox@intel.com>
+Signed-off-by: Dan Williams <dan.j.williams@intel.com>
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/ata/libata-core.c |   21 +++++++++++++--------
+ include/linux/libata.h    |    1 +
+ 2 files changed, 14 insertions(+), 8 deletions(-)
+
+--- a/drivers/ata/libata-core.c
++++ b/drivers/ata/libata-core.c
+@@ -4700,21 +4700,26 @@ void swap_buf_le16(u16 *buf, unsigned in
+ static struct ata_queued_cmd *ata_qc_new(struct ata_port *ap)
+ {
+ 	struct ata_queued_cmd *qc = NULL;
+-	unsigned int i;
++	unsigned int i, tag;
+ 
+ 	/* no command while frozen */
+ 	if (unlikely(ap->pflags & ATA_PFLAG_FROZEN))
+ 		return NULL;
+ 
+-	/* the last tag is reserved for internal command. */
+-	for (i = 0; i < ATA_MAX_QUEUE - 1; i++)
+-		if (!test_and_set_bit(i, &ap->qc_allocated)) {
+-			qc = __ata_qc_from_tag(ap, i);
++	for (i = 0; i < ATA_MAX_QUEUE; i++) {
++		tag = (i + ap->last_tag + 1) % ATA_MAX_QUEUE;
++
++		/* the last tag is reserved for internal command. */
++		if (tag == ATA_TAG_INTERNAL)
++			continue;
++
++		if (!test_and_set_bit(tag, &ap->qc_allocated)) {
++			qc = __ata_qc_from_tag(ap, tag);
++			qc->tag = tag;
++			ap->last_tag = tag;
+ 			break;
+ 		}
+-
+-	if (qc)
+-		qc->tag = i;
++	}
+ 
+ 	return qc;
+ }
+--- a/include/linux/libata.h
++++ b/include/linux/libata.h
+@@ -762,6 +762,7 @@ struct ata_port {
+ 	unsigned long		qc_allocated;
+ 	unsigned int		qc_active;
+ 	int			nr_active_links; /* #links with active qcs */
++	unsigned int		last_tag;	/* track next tag hw expects */
+ 
+ 	struct ata_link		link;		/* host default link */
+ 	struct ata_link		*slave_link;	/* see ata_slave_link_init() */
diff --git a/queue-3.4/mach64-fix-cursor-when-character-width-is-not-a-multiple-of-8-pixels.patch b/queue-3.4/mach64-fix-cursor-when-character-width-is-not-a-multiple-of-8-pixels.patch
new file mode 100644
index 00000000000..389943573f8
--- /dev/null
+++ b/queue-3.4/mach64-fix-cursor-when-character-width-is-not-a-multiple-of-8-pixels.patch
@@ -0,0 +1,88 @@
+From 43751a1b8ee2e70ce392bf31ef3133da324e68b3 Mon Sep 17 00:00:00 2001
+From: Mikulas Patocka <mpatocka@redhat.com>
+Date: Thu, 23 Jan 2014 14:41:59 -0500
+Subject: mach64: fix cursor when character width is not a multiple of 8 pixels
+
+From: Mikulas Patocka <mpatocka@redhat.com>
+
+commit 43751a1b8ee2e70ce392bf31ef3133da324e68b3 upstream.
+
+This patch fixes the hardware cursor on mach64 when font width is not a
+multiple of 8 pixels.
+
+If you load such a font, the cursor is expanded to the next 8-byte
+boundary and a part of the next character after the cursor is not
+visible.
+For example, when you load a font with 12-pixel width, the cursor width
+is 16 pixels and when the cursor is displayed, 4 pixels of the next
+character are not visible.
+
+The reason is this: atyfb_cursor is called with proper parameters to
+load an image that is 12-pixel wide. However, the number is aligned on
+the next 8-pixel boundary on the line
+"unsigned int width = (cursor->image.width + 7) >> 3;" and the whole
+function acts as it is was loading a 16-pixel image.
+
+This patch fixes it so that the value written to the framebuffer is
+padded with 0xaaaa (the transparent pattern) when the image size it not
+a multiple of 8 pixels. The transparent pattern causes that the cursor
+will not interfere with the next character.
+
+Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
+Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ti.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/video/aty/mach64_cursor.c |   22 ++++++++++++++++------
+ 1 file changed, 16 insertions(+), 6 deletions(-)
+
+--- a/drivers/video/aty/mach64_cursor.c
++++ b/drivers/video/aty/mach64_cursor.c
+@@ -5,6 +5,7 @@
+ #include <linux/fb.h>
+ #include <linux/init.h>
+ #include <linux/string.h>
++#include "../fb_draw.h"
+ 
+ #include <asm/io.h>
+ 
+@@ -157,24 +158,33 @@ static int atyfb_cursor(struct fb_info *
+ 
+ 	    for (i = 0; i < height; i++) {
+ 		for (j = 0; j < width; j++) {
++			u16 l = 0xaaaa;
+ 			b = *src++;
+ 			m = *msk++;
+ 			switch (cursor->rop) {
+ 			case ROP_XOR:
+ 			    // Upper 4 bits of mask data
+-			    fb_writeb(cursor_bits_lookup[(b ^ m) >> 4], dst++);
++			    l = cursor_bits_lookup[(b ^ m) >> 4] |
+ 			    // Lower 4 bits of mask
+-			    fb_writeb(cursor_bits_lookup[(b ^ m) & 0x0f],
+-				      dst++);
++				    (cursor_bits_lookup[(b ^ m) & 0x0f] << 8);
+ 			    break;
+ 			case ROP_COPY:
+ 			    // Upper 4 bits of mask data
+-			    fb_writeb(cursor_bits_lookup[(b & m) >> 4], dst++);
++			    l = cursor_bits_lookup[(b & m) >> 4] |
+ 			    // Lower 4 bits of mask
+-			    fb_writeb(cursor_bits_lookup[(b & m) & 0x0f],
+-				      dst++);
++				    (cursor_bits_lookup[(b & m) & 0x0f] << 8);
+ 			    break;
+ 			}
++			/*
++			 * If cursor size is not a multiple of 8 characters
++			 * we must pad it with transparent pattern (0xaaaa).
++			 */
++			if ((j + 1) * 8 > cursor->image.width) {
++				l = comp(l, 0xaaaa,
++				    (1 << ((cursor->image.width & 7) * 2)) - 1);
++			}
++			fb_writeb(l & 0xff, dst++);
++			fb_writeb(l >> 8, dst++);
+ 		}
+ 		dst += offset;
+ 	    }
diff --git a/queue-3.4/mach64-use-unaligned-access.patch b/queue-3.4/mach64-use-unaligned-access.patch
new file mode 100644
index 00000000000..123bf725586
--- /dev/null
+++ b/queue-3.4/mach64-use-unaligned-access.patch
@@ -0,0 +1,46 @@
+From c29dd8696dc5dbd50b3ac441b8a26751277ba520 Mon Sep 17 00:00:00 2001
+From: Mikulas Patocka <mpatocka@redhat.com>
+Date: Thu, 23 Jan 2014 14:41:09 -0500
+Subject: mach64: use unaligned access
+
+From: Mikulas Patocka <mpatocka@redhat.com>
+
+commit c29dd8696dc5dbd50b3ac441b8a26751277ba520 upstream.
+
+This patch fixes mach64 to use unaligned access to the font bitmap.
+
+This fixes unaligned access warning on sparc64 when 14x8 font is loaded.
+
+On x86(64), unaligned access is handled in hardware, so both functions
+le32_to_cpup and get_unaligned_le32 perform the same operation.
+
+On RISC machines, unaligned access is not handled in hardware, so we
+better use get_unaligned_le32 to avoid the unaligned trap and warning.
+
+Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
+Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ti.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/video/aty/mach64_accel.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/drivers/video/aty/mach64_accel.c
++++ b/drivers/video/aty/mach64_accel.c
+@@ -4,6 +4,7 @@
+  */
+ 
+ #include <linux/delay.h>
++#include <asm/unaligned.h>
+ #include <linux/fb.h>
+ #include <video/mach64.h>
+ #include "atyfb.h"
+@@ -419,7 +420,7 @@ void atyfb_imageblit(struct fb_info *inf
+ 		u32 *pbitmap, dwords = (src_bytes + 3) / 4;
+ 		for (pbitmap = (u32*)(image->data); dwords; dwords--, pbitmap++) {
+ 			wait_for_fifo(1, par);
+-			aty_st_le32(HOST_DATA0, le32_to_cpup(pbitmap), par);
++			aty_st_le32(HOST_DATA0, get_unaligned_le32(pbitmap), par);
+ 		}
+ 	}
+ 
diff --git a/queue-3.4/matroxfb-restore-the-registers-m_access-and-m_pitch.patch b/queue-3.4/matroxfb-restore-the-registers-m_access-and-m_pitch.patch
new file mode 100644
index 00000000000..384d1c660f1
--- /dev/null
+++ b/queue-3.4/matroxfb-restore-the-registers-m_access-and-m_pitch.patch
@@ -0,0 +1,157 @@
+From a772d4736641ec1b421ad965e13457c17379fc86 Mon Sep 17 00:00:00 2001
+From: Mikulas Patocka <mpatocka@redhat.com>
+Date: Thu, 23 Jan 2014 14:39:04 -0500
+Subject: matroxfb: restore the registers M_ACCESS and M_PITCH
+
+From: Mikulas Patocka <mpatocka@redhat.com>
+
+commit a772d4736641ec1b421ad965e13457c17379fc86 upstream.
+
+When X11 is running and the user switches back to console, the card
+modifies the content of registers M_MACCESS and M_PITCH in periodic
+intervals.
+
+This patch fixes it by restoring the content of these registers before
+issuing any accelerator command.
+
+Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
+Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ti.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/video/matrox/matroxfb_accel.c |   38 +++++++++++++++++++++++++---------
+ drivers/video/matrox/matroxfb_base.h  |    2 +
+ 2 files changed, 30 insertions(+), 10 deletions(-)
+
+--- a/drivers/video/matrox/matroxfb_accel.c
++++ b/drivers/video/matrox/matroxfb_accel.c
+@@ -192,10 +192,18 @@ void matrox_cfbX_init(struct matrox_fb_i
+ 	minfo->accel.m_dwg_rect = M_DWG_TRAP | M_DWG_SOLID | M_DWG_ARZERO | M_DWG_SGNZERO | M_DWG_SHIFTZERO;
+ 	if (isMilleniumII(minfo)) minfo->accel.m_dwg_rect |= M_DWG_TRANSC;
+ 	minfo->accel.m_opmode = mopmode;
++	minfo->accel.m_access = maccess;
++	minfo->accel.m_pitch = mpitch;
+ }
+ 
+ EXPORT_SYMBOL(matrox_cfbX_init);
+ 
++static void matrox_accel_restore_maccess(struct matrox_fb_info *minfo)
++{
++	mga_outl(M_MACCESS, minfo->accel.m_access);
++	mga_outl(M_PITCH, minfo->accel.m_pitch);
++}
++
+ static void matrox_accel_bmove(struct matrox_fb_info *minfo, int vxres, int sy,
+ 			       int sx, int dy, int dx, int height, int width)
+ {
+@@ -207,7 +215,8 @@ static void matrox_accel_bmove(struct ma
+ 	CRITBEGIN
+ 
+ 	if ((dy < sy) || ((dy == sy) && (dx <= sx))) {
+-		mga_fifo(2);
++		mga_fifo(4);
++		matrox_accel_restore_maccess(minfo);
+ 		mga_outl(M_DWGCTL, M_DWG_BITBLT | M_DWG_SHIFTZERO | M_DWG_SGNZERO |
+ 			 M_DWG_BFCOL | M_DWG_REPLACE);
+ 		mga_outl(M_AR5, vxres);
+@@ -215,7 +224,8 @@ static void matrox_accel_bmove(struct ma
+ 		start = sy*vxres+sx+curr_ydstorg(minfo);
+ 		end = start+width;
+ 	} else {
+-		mga_fifo(3);
++		mga_fifo(5);
++		matrox_accel_restore_maccess(minfo);
+ 		mga_outl(M_DWGCTL, M_DWG_BITBLT | M_DWG_SHIFTZERO | M_DWG_BFCOL | M_DWG_REPLACE);
+ 		mga_outl(M_SGN, 5);
+ 		mga_outl(M_AR5, -vxres);
+@@ -224,7 +234,8 @@ static void matrox_accel_bmove(struct ma
+ 		start = end+width;
+ 		dy += height-1;
+ 	}
+-	mga_fifo(4);
++	mga_fifo(6);
++	matrox_accel_restore_maccess(minfo);
+ 	mga_outl(M_AR0, end);
+ 	mga_outl(M_AR3, start);
+ 	mga_outl(M_FXBNDRY, ((dx+width)<<16) | dx);
+@@ -246,7 +257,8 @@ static void matrox_accel_bmove_lin(struc
+ 	CRITBEGIN
+ 
+ 	if ((dy < sy) || ((dy == sy) && (dx <= sx))) {
+-		mga_fifo(2);
++		mga_fifo(4);
++		matrox_accel_restore_maccess(minfo);
+ 		mga_outl(M_DWGCTL, M_DWG_BITBLT | M_DWG_SHIFTZERO | M_DWG_SGNZERO |
+ 			M_DWG_BFCOL | M_DWG_REPLACE);
+ 		mga_outl(M_AR5, vxres);
+@@ -254,7 +266,8 @@ static void matrox_accel_bmove_lin(struc
+ 		start = sy*vxres+sx+curr_ydstorg(minfo);
+ 		end = start+width;
+ 	} else {
+-		mga_fifo(3);
++		mga_fifo(5);
++		matrox_accel_restore_maccess(minfo);
+ 		mga_outl(M_DWGCTL, M_DWG_BITBLT | M_DWG_SHIFTZERO | M_DWG_BFCOL | M_DWG_REPLACE);
+ 		mga_outl(M_SGN, 5);
+ 		mga_outl(M_AR5, -vxres);
+@@ -263,7 +276,8 @@ static void matrox_accel_bmove_lin(struc
+ 		start = end+width;
+ 		dy += height-1;
+ 	}
+-	mga_fifo(5);
++	mga_fifo(7);
++	matrox_accel_restore_maccess(minfo);
+ 	mga_outl(M_AR0, end);
+ 	mga_outl(M_AR3, start);
+ 	mga_outl(M_FXBNDRY, ((dx+width)<<16) | dx);
+@@ -298,7 +312,8 @@ static void matroxfb_accel_clear(struct
+ 
+ 	CRITBEGIN
+ 
+-	mga_fifo(5);
++	mga_fifo(7);
++	matrox_accel_restore_maccess(minfo);
+ 	mga_outl(M_DWGCTL, minfo->accel.m_dwg_rect | M_DWG_REPLACE);
+ 	mga_outl(M_FCOL, color);
+ 	mga_outl(M_FXBNDRY, ((sx + width) << 16) | sx);
+@@ -341,7 +356,8 @@ static void matroxfb_cfb4_clear(struct m
+ 	width >>= 1;
+ 	sx >>= 1;
+ 	if (width) {
+-		mga_fifo(5);
++		mga_fifo(7);
++		matrox_accel_restore_maccess(minfo);
+ 		mga_outl(M_DWGCTL, minfo->accel.m_dwg_rect | M_DWG_REPLACE2);
+ 		mga_outl(M_FCOL, bgx);
+ 		mga_outl(M_FXBNDRY, ((sx + width) << 16) | sx);
+@@ -415,7 +431,8 @@ static void matroxfb_1bpp_imageblit(stru
+ 
+ 	CRITBEGIN
+ 
+-	mga_fifo(3);
++	mga_fifo(5);
++	matrox_accel_restore_maccess(minfo);
+ 	if (easy)
+ 		mga_outl(M_DWGCTL, M_DWG_ILOAD | M_DWG_SGNZERO | M_DWG_SHIFTZERO | M_DWG_BMONOWF | M_DWG_LINEAR | M_DWG_REPLACE);
+ 	else
+@@ -425,7 +442,8 @@ static void matroxfb_1bpp_imageblit(stru
+ 	fxbndry = ((xx + width - 1) << 16) | xx;
+ 	mmio = minfo->mmio.vbase;
+ 
+-	mga_fifo(6);
++	mga_fifo(8);
++	matrox_accel_restore_maccess(minfo);
+ 	mga_writel(mmio, M_FXBNDRY, fxbndry);
+ 	mga_writel(mmio, M_AR0, ar0);
+ 	mga_writel(mmio, M_AR3, 0);
+--- a/drivers/video/matrox/matroxfb_base.h
++++ b/drivers/video/matrox/matroxfb_base.h
+@@ -307,6 +307,8 @@ struct matrox_accel_data {
+ #endif
+ 	u_int32_t	m_dwg_rect;
+ 	u_int32_t	m_opmode;
++	u_int32_t	m_access;
++	u_int32_t	m_pitch;
+ };
+ 
+ struct v4l2_queryctrl;
diff --git a/queue-3.4/series b/queue-3.4/series
index a70eaca99b8..cb1bd6b0cba 100644
--- a/queue-3.4/series
+++ b/queue-3.4/series
@@ -4,3 +4,10 @@ floppy-don-t-write-kernel-only-members-to-fdrawcmd-ioctl-output.patch
 mips-hibernate-flush-tlb-entries-in-swsusp_arch_resume.patch
 virtio_balloon-don-t-softlockup-on-huge-balloon-changes.patch
 mpt2sas-don-t-disable-device-twice-at-suspend.patch
+crypto-ghash-clmulni-intel-use-c-implementation-for-setkey.patch
+framebuffer-fix-cfb_copyarea.patch
+matroxfb-restore-the-registers-m_access-and-m_pitch.patch
+mach64-use-unaligned-access.patch
+mach64-fix-cursor-when-character-width-is-not-a-multiple-of-8-pixels.patch
+b43-fix-machine-check-error-due-to-improper-access-of-b43_mmio_psm_phy_hdr.patch
+libata-ahci-accommodate-tag-ordered-controllers.patch
-- 
2.47.3