From: Greg Kroah-Hartman <gregkh@suse.de>
Date: Fri, 13 Jan 2012 00:42:10 +0000 (-0800)
Subject: 3.2-stable patches
X-Git-Tag: v3.1.10~20
X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=7b5b1623c9ac496437366bb3d9e47a9dd5f4c130;p=thirdparty%2Fkernel%2Fstable-queue.git

3.2-stable patches

added patches:
	hid-bump-maximum-global-item-tag-report-size-to-96-bytes.patch
	hid-wiimote-select-input_ff_memless.patch
	ima-fix-invalid-memory-reference.patch
	ima-free-duplicate-measurement-memory.patch
	md-raid1-perform-bad-block-tests-for-writemostly-devices-too.patch
	nfs-fix-recent-breakage-to-nfs-error-handling.patch
	nfs-fix-regression-in-handling-of-context-option-in-nfsv4.patch
	nfs-retry-mounting-nfsroot.patch
	nfsv4.1-fix-backchannel-slotid-off-by-one-bug.patch
	nfsv4-include-bitmap-in-nfsv4-get-acl-data.patch
	pci-fix-pci_exp_type_rc_ec-value.patch
	pci-msi-disable-msi-interrupts-when-we-initialize-a-pci-device.patch
	pnfs-obj-must-return-layout-on-io-error.patch
	pnfs-obj-pnfs-errors-are-communicated-on-iodata-pnfs_error.patch
	pnp-work-around-dell-1536-1546-bios-mmconfig-bug-that-breaks-usb.patch
	scsi-mpt2sas-fix-for-memory-allocation-error-for-large-host-credits.patch
	scsi-mpt2sas-release-spinlock-for-the-raid-device-list-before-blocking-it.patch
	slub-fix-a-possible-memleak-in-__slab_alloc.patch
	ubi-fix-missing-scrub-when-there-is-a-bit-flip.patch
	ubi-fix-use-after-free-on-error-path.patch
	x86-pci-amd-factor-out-mmconfig-discovery.patch
	x86-pci-build-amd_bus.o-only-when-config_amd_nb-y.patch
	x86-pci-ignore-cpu-non-addressable-_crs-reserved-memory-resources.patch
	xen-xenbus-reject-replies-with-payload-xenstore_payload_max.patch
---

diff --git a/queue-3.2/hid-bump-maximum-global-item-tag-report-size-to-96-bytes.patch b/queue-3.2/hid-bump-maximum-global-item-tag-report-size-to-96-bytes.patch
new file mode 100644
index 00000000000..fa469e8d71c
--- /dev/null
+++ b/queue-3.2/hid-bump-maximum-global-item-tag-report-size-to-96-bytes.patch
@@ -0,0 +1,32 @@
+From e46e927b9b7e8d95526e69322855243882b7e1a3 Mon Sep 17 00:00:00 2001
+From: Chase Douglas <chase.douglas@canonical.com>
+Date: Mon, 7 Nov 2011 11:08:05 -0800
+Subject: HID: bump maximum global item tag report size to 96 bytes
+
+From: Chase Douglas <chase.douglas@canonical.com>
+
+commit e46e927b9b7e8d95526e69322855243882b7e1a3 upstream.
+
+This allows the latest N-Trig devices to function properly.
+
+BugLink: https://bugs.launchpad.net/bugs/724831
+
+Signed-off-by: Chase Douglas <chase.douglas@canonical.com>
+Signed-off-by: Jiri Kosina <jkosina@suse.cz>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ drivers/hid/hid-core.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/hid/hid-core.c
++++ b/drivers/hid/hid-core.c
+@@ -362,7 +362,7 @@ static int hid_parser_global(struct hid_
+ 
+ 	case HID_GLOBAL_ITEM_TAG_REPORT_SIZE:
+ 		parser->global.report_size = item_udata(item);
+-		if (parser->global.report_size > 32) {
++		if (parser->global.report_size > 96) {
+ 			dbg_hid("invalid report_size %d\n",
+ 					parser->global.report_size);
+ 			return -1;
diff --git a/queue-3.2/hid-wiimote-select-input_ff_memless.patch b/queue-3.2/hid-wiimote-select-input_ff_memless.patch
new file mode 100644
index 00000000000..926fff51b95
--- /dev/null
+++ b/queue-3.2/hid-wiimote-select-input_ff_memless.patch
@@ -0,0 +1,31 @@
+From ef6f41157f3864d9bf42671b2ed66062dcafb72e Mon Sep 17 00:00:00 2001
+From: David Herrmann <dh.herrmann@googlemail.com>
+Date: Wed, 7 Dec 2011 21:33:59 +0100
+Subject: HID: wiimote: Select INPUT_FF_MEMLESS
+
+From: David Herrmann <dh.herrmann@googlemail.com>
+
+commit ef6f41157f3864d9bf42671b2ed66062dcafb72e upstream.
+
+We depend on memless force-feedback support, therefore correctly select the
+related config options.
+
+Reported-by: Randy Dunlap <rdunlap@xenotime.net>
+Signed-off-by: David Herrmann <dh.herrmann@googlemail.com>
+Signed-off-by: Jiri Kosina <jkosina@suse.cz>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ drivers/hid/Kconfig |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/hid/Kconfig
++++ b/drivers/hid/Kconfig
+@@ -620,6 +620,7 @@ config HID_WIIMOTE
+ 	depends on BT_HIDP
+ 	depends on LEDS_CLASS
+ 	select POWER_SUPPLY
++	select INPUT_FF_MEMLESS
+ 	---help---
+ 	Support for the Nintendo Wii Remote bluetooth device.
+ 
diff --git a/queue-3.2/ima-fix-invalid-memory-reference.patch b/queue-3.2/ima-fix-invalid-memory-reference.patch
new file mode 100644
index 00000000000..15cc3b41a74
--- /dev/null
+++ b/queue-3.2/ima-fix-invalid-memory-reference.patch
@@ -0,0 +1,66 @@
+From 7b7e5916aa2f46e57f8bd8cb89c34620ebfda5da Mon Sep 17 00:00:00 2001
+From: Roberto Sassu <roberto.sassu@polito.it>
+Date: Mon, 19 Dec 2011 15:57:28 +0100
+Subject: ima: fix invalid memory reference
+
+From: Roberto Sassu <roberto.sassu@polito.it>
+
+commit 7b7e5916aa2f46e57f8bd8cb89c34620ebfda5da upstream.
+
+Don't free a valid measurement entry on TPM PCR extend failure.
+
+Signed-off-by: Roberto Sassu <roberto.sassu@polito.it>
+Signed-off-by: Mimi Zohar <zohar@us.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ security/integrity/ima/ima_queue.c |   16 +++++++++++-----
+ 1 file changed, 11 insertions(+), 5 deletions(-)
+
+--- a/security/integrity/ima/ima_queue.c
++++ b/security/integrity/ima/ima_queue.c
+@@ -23,6 +23,8 @@
+ #include <linux/slab.h>
+ #include "ima.h"
+ 
++#define AUDIT_CAUSE_LEN_MAX 32
++
+ LIST_HEAD(ima_measurements);	/* list of all measurements */
+ 
+ /* key: inode (before secure-hashing a file) */
+@@ -94,7 +96,8 @@ static int ima_pcr_extend(const u8 *hash
+ 
+ 	result = tpm_pcr_extend(TPM_ANY_NUM, CONFIG_IMA_MEASURE_PCR_IDX, hash);
+ 	if (result != 0)
+-		pr_err("IMA: Error Communicating to TPM chip\n");
++		pr_err("IMA: Error Communicating to TPM chip, result: %d\n",
++		       result);
+ 	return result;
+ }
+ 
+@@ -106,8 +109,9 @@ int ima_add_template_entry(struct ima_te
+ {
+ 	u8 digest[IMA_DIGEST_SIZE];
+ 	const char *audit_cause = "hash_added";
++	char tpm_audit_cause[AUDIT_CAUSE_LEN_MAX];
+ 	int audit_info = 1;
+-	int result = 0;
++	int result = 0, tpmresult = 0;
+ 
+ 	mutex_lock(&ima_extend_list_mutex);
+ 	if (!violation) {
+@@ -129,9 +133,11 @@ int ima_add_template_entry(struct ima_te
+ 	if (violation)		/* invalidate pcr */
+ 		memset(digest, 0xff, sizeof digest);
+ 
+-	result = ima_pcr_extend(digest);
+-	if (result != 0) {
+-		audit_cause = "TPM error";
++	tpmresult = ima_pcr_extend(digest);
++	if (tpmresult != 0) {
++		snprintf(tpm_audit_cause, AUDIT_CAUSE_LEN_MAX, "TPM_error(%d)",
++			 tpmresult);
++		audit_cause = tpm_audit_cause;
+ 		audit_info = 0;
+ 	}
+ out:
diff --git a/queue-3.2/ima-free-duplicate-measurement-memory.patch b/queue-3.2/ima-free-duplicate-measurement-memory.patch
new file mode 100644
index 00000000000..7bf1f0c5307
--- /dev/null
+++ b/queue-3.2/ima-free-duplicate-measurement-memory.patch
@@ -0,0 +1,48 @@
+From 45fae7493970d7c45626ccd96d4a74f5f1eea5a9 Mon Sep 17 00:00:00 2001
+From: Roberto Sassu <roberto.sassu@polito.it>
+Date: Mon, 19 Dec 2011 15:57:27 +0100
+Subject: ima: free duplicate measurement memory
+
+From: Roberto Sassu <roberto.sassu@polito.it>
+
+commit 45fae7493970d7c45626ccd96d4a74f5f1eea5a9 upstream.
+
+Info about new measurements are cached in the iint for performance.  When
+the inode is flushed from cache, the associated iint is flushed as well.
+Subsequent access to the inode will cause the inode to be re-measured and
+will attempt to add a duplicate entry to the measurement list.
+
+This patch frees the duplicate measurement memory, fixing a memory leak.
+
+Signed-off-by: Roberto Sassu <roberto.sassu@polito.it>
+Signed-off-by: Mimi Zohar <zohar@us.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ security/integrity/ima/ima_api.c   |    4 ++--
+ security/integrity/ima/ima_queue.c |    1 +
+ 2 files changed, 3 insertions(+), 2 deletions(-)
+
+--- a/security/integrity/ima/ima_api.c
++++ b/security/integrity/ima/ima_api.c
+@@ -178,8 +178,8 @@ void ima_store_measurement(struct integr
+ 	strncpy(entry->template.file_name, filename, IMA_EVENT_NAME_LEN_MAX);
+ 
+ 	result = ima_store_template(entry, violation, inode);
+-	if (!result)
++	if (!result || result == -EEXIST)
+ 		iint->flags |= IMA_MEASURED;
+-	else
++	if (result < 0)
+ 		kfree(entry);
+ }
+--- a/security/integrity/ima/ima_queue.c
++++ b/security/integrity/ima/ima_queue.c
+@@ -114,6 +114,7 @@ int ima_add_template_entry(struct ima_te
+ 		memcpy(digest, entry->digest, sizeof digest);
+ 		if (ima_lookup_digest_entry(digest)) {
+ 			audit_cause = "hash_exists";
++			result = -EEXIST;
+ 			goto out;
+ 		}
+ 	}
diff --git a/queue-3.2/md-raid1-perform-bad-block-tests-for-writemostly-devices-too.patch b/queue-3.2/md-raid1-perform-bad-block-tests-for-writemostly-devices-too.patch
new file mode 100644
index 00000000000..41b3668b8dc
--- /dev/null
+++ b/queue-3.2/md-raid1-perform-bad-block-tests-for-writemostly-devices-too.patch
@@ -0,0 +1,53 @@
+From 307729c8bc5b5a41361af8af95906eee7552acb1 Mon Sep 17 00:00:00 2001
+From: NeilBrown <neilb@suse.de>
+Date: Mon, 9 Jan 2012 01:41:51 +1100
+Subject: md/raid1: perform bad-block tests for WriteMostly devices too.
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: NeilBrown <neilb@suse.de>
+
+commit 307729c8bc5b5a41361af8af95906eee7552acb1 upstream.
+
+We normally try to avoid reading from write-mostly devices, but when
+we do we really have to check for bad blocks and be sure not to
+try reading them.
+
+With the current code, best_good_sectors might not get set and that
+causes zero-length read requests to be send down which is very
+confusing.
+
+This bug was introduced in commit d2eb35acfdccbe2 and so the patch
+is suitable for 3.1.x and 3.2.x
+
+Reported-and-tested-by: MichaÅ MirosÅaw <mirq-linux@rere.qmqm.pl>
+Reported-and-tested-by: Art -kwaak- van Breemen <ard@telegraafnet.nl>
+Signed-off-by: NeilBrown <neilb@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ drivers/md/raid1.c |   11 ++++++++++-
+ 1 file changed, 10 insertions(+), 1 deletion(-)
+
+--- a/drivers/md/raid1.c
++++ b/drivers/md/raid1.c
+@@ -525,8 +525,17 @@ static int read_balance(struct r1conf *c
+ 		if (test_bit(WriteMostly, &rdev->flags)) {
+ 			/* Don't balance among write-mostly, just
+ 			 * use the first as a last resort */
+-			if (best_disk < 0)
++			if (best_disk < 0) {
++				if (is_badblock(rdev, this_sector, sectors,
++						&first_bad, &bad_sectors)) {
++					if (first_bad < this_sector)
++						/* Cannot use this */
++						continue;
++					best_good_sectors = first_bad - this_sector;
++				} else
++					best_good_sectors = sectors;
+ 				best_disk = disk;
++			}
+ 			continue;
+ 		}
+ 		/* This is a reasonable device to use.  It might
diff --git a/queue-3.2/nfs-fix-recent-breakage-to-nfs-error-handling.patch b/queue-3.2/nfs-fix-recent-breakage-to-nfs-error-handling.patch
new file mode 100644
index 00000000000..8f55da04720
--- /dev/null
+++ b/queue-3.2/nfs-fix-recent-breakage-to-nfs-error-handling.patch
@@ -0,0 +1,58 @@
+From 2edb6bc3852c681c0d948245bd55108dc6407604 Mon Sep 17 00:00:00 2001
+From: NeilBrown <neilb@suse.de>
+Date: Wed, 16 Nov 2011 11:46:31 +1100
+Subject: NFS - fix recent breakage to NFS error handling.
+
+From: NeilBrown <neilb@suse.de>
+
+commit 2edb6bc3852c681c0d948245bd55108dc6407604 upstream.
+
+From c6d615d2b97fe305cbf123a8751ced859dca1d5e Mon Sep 17 00:00:00 2001
+From: NeilBrown <neilb@suse.de>
+Date: Wed, 16 Nov 2011 09:39:05 +1100
+Subject: NFS - fix recent breakage to NFS error handling.
+
+commit 02c24a82187d5a628c68edfe71ae60dc135cd178 made a small and
+presumably unintended change to write error handling in NFS.
+
+Previously an error from filemap_write_and_wait_range would only be of
+interest if nfs_file_fsync did not return an error.  After this commit,
+an error from filemap_write_and_wait_range would mean that (the rest of)
+nfs_file_fsync would not even be called.
+
+This means that:
+ 1/ you are more likely to see EIO than e.g. EDQUOT or ENOSPC.
+ 2/ NFS_CONTEXT_ERROR_WRITE remains set for longer so more writes are
+    synchronous.
+
+This patch restores previous behaviour.
+
+Cc: Josef Bacik <josef@redhat.com>
+Cc: Jan Kara <jack@suse.cz>
+Cc: Al Viro <viro@zeniv.linux.org.uk>
+Signed-off-by: NeilBrown <neilb@suse.de>
+Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/nfs/file.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/fs/nfs/file.c
++++ b/fs/nfs/file.c
+@@ -272,13 +272,13 @@ nfs_file_fsync(struct file *file, loff_t
+ 			datasync);
+ 
+ 	ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
+-	if (ret)
+-		return ret;
+ 	mutex_lock(&inode->i_mutex);
+ 
+ 	nfs_inc_stats(inode, NFSIOS_VFSFSYNC);
+ 	have_error = test_and_clear_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags);
+ 	status = nfs_commit_inode(inode, FLUSH_SYNC);
++	if (status >= 0 && ret < 0)
++		status = ret;
+ 	have_error |= test_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags);
+ 	if (have_error)
+ 		ret = xchg(&ctx->error, 0);
diff --git a/queue-3.2/nfs-fix-regression-in-handling-of-context-option-in-nfsv4.patch b/queue-3.2/nfs-fix-regression-in-handling-of-context-option-in-nfsv4.patch
new file mode 100644
index 00000000000..11bd2a4f749
--- /dev/null
+++ b/queue-3.2/nfs-fix-regression-in-handling-of-context-option-in-nfsv4.patch
@@ -0,0 +1,144 @@
+From 8a0d551a59ac92d8ff048d6cb29d3a02073e81e8 Mon Sep 17 00:00:00 2001
+From: Jeff Layton <jlayton@redhat.com>
+Date: Tue, 20 Dec 2011 06:57:45 -0500
+Subject: nfs: fix regression in handling of context= option in NFSv4
+
+From: Jeff Layton <jlayton@redhat.com>
+
+commit 8a0d551a59ac92d8ff048d6cb29d3a02073e81e8 upstream.
+
+Setting the security context of a NFSv4 mount via the context= mount
+option is currently broken. The NFSv4 codepath allocates a parsed
+options struct, and then parses the mount options to fill it. It
+eventually calls nfs4_remote_mount which calls security_init_mnt_opts.
+That clobbers the lsm_opts struct that was populated earlier. This bug
+also looks like it causes a small memory leak on each v4 mount where
+context= is used.
+
+Fix this by moving the initialization of the lsm_opts into
+nfs_alloc_parsed_mount_data. Also, add a destructor for
+nfs_parsed_mount_data to make it easier to free all of the allocations
+hanging off of it, and to ensure that the security_free_mnt_opts is
+called whenever security_init_mnt_opts is.
+
+I believe this regression was introduced quite some time ago, probably
+by commit c02d7adf.
+
+Signed-off-by: Jeff Layton <jlayton@redhat.com>
+Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/nfs/super.c |   43 +++++++++++++++++++------------------------
+ 1 file changed, 19 insertions(+), 24 deletions(-)
+
+--- a/fs/nfs/super.c
++++ b/fs/nfs/super.c
+@@ -909,10 +909,24 @@ static struct nfs_parsed_mount_data *nfs
+ 		data->auth_flavor_len	= 1;
+ 		data->version		= version;
+ 		data->minorversion	= 0;
++		security_init_mnt_opts(&data->lsm_opts);
+ 	}
+ 	return data;
+ }
+ 
++static void nfs_free_parsed_mount_data(struct nfs_parsed_mount_data *data)
++{
++	if (data) {
++		kfree(data->client_address);
++		kfree(data->mount_server.hostname);
++		kfree(data->nfs_server.export_path);
++		kfree(data->nfs_server.hostname);
++		kfree(data->fscache_uniq);
++		security_free_mnt_opts(&data->lsm_opts);
++		kfree(data);
++	}
++}
++
+ /*
+  * Sanity-check a server address provided by the mount command.
+  *
+@@ -2220,9 +2234,7 @@ static struct dentry *nfs_fs_mount(struc
+ 	data = nfs_alloc_parsed_mount_data(NFS_DEFAULT_VERSION);
+ 	mntfh = nfs_alloc_fhandle();
+ 	if (data == NULL || mntfh == NULL)
+-		goto out_free_fh;
+-
+-	security_init_mnt_opts(&data->lsm_opts);
++		goto out;
+ 
+ 	/* Validate the mount data */
+ 	error = nfs_validate_mount_data(raw_data, data, mntfh, dev_name);
+@@ -2234,8 +2246,6 @@ static struct dentry *nfs_fs_mount(struc
+ #ifdef CONFIG_NFS_V4
+ 	if (data->version == 4) {
+ 		mntroot = nfs4_try_mount(flags, dev_name, data);
+-		kfree(data->client_address);
+-		kfree(data->nfs_server.export_path);
+ 		goto out;
+ 	}
+ #endif	/* CONFIG_NFS_V4 */
+@@ -2290,13 +2300,8 @@ static struct dentry *nfs_fs_mount(struc
+ 	s->s_flags |= MS_ACTIVE;
+ 
+ out:
+-	kfree(data->nfs_server.hostname);
+-	kfree(data->mount_server.hostname);
+-	kfree(data->fscache_uniq);
+-	security_free_mnt_opts(&data->lsm_opts);
+-out_free_fh:
++	nfs_free_parsed_mount_data(data);
+ 	nfs_free_fhandle(mntfh);
+-	kfree(data);
+ 	return mntroot;
+ 
+ out_err_nosb:
+@@ -2623,9 +2628,7 @@ nfs4_remote_mount(struct file_system_typ
+ 
+ 	mntfh = nfs_alloc_fhandle();
+ 	if (data == NULL || mntfh == NULL)
+-		goto out_free_fh;
+-
+-	security_init_mnt_opts(&data->lsm_opts);
++		goto out;
+ 
+ 	/* Get a volume representation */
+ 	server = nfs4_create_server(data, mntfh);
+@@ -2677,13 +2680,10 @@ nfs4_remote_mount(struct file_system_typ
+ 
+ 	s->s_flags |= MS_ACTIVE;
+ 
+-	security_free_mnt_opts(&data->lsm_opts);
+ 	nfs_free_fhandle(mntfh);
+ 	return mntroot;
+ 
+ out:
+-	security_free_mnt_opts(&data->lsm_opts);
+-out_free_fh:
+ 	nfs_free_fhandle(mntfh);
+ 	return ERR_PTR(error);
+ 
+@@ -2838,7 +2838,7 @@ static struct dentry *nfs4_mount(struct
+ 
+ 	data = nfs_alloc_parsed_mount_data(4);
+ 	if (data == NULL)
+-		goto out_free_data;
++		goto out;
+ 
+ 	/* Validate the mount data */
+ 	error = nfs4_validate_mount_data(raw_data, data, dev_name);
+@@ -2852,12 +2852,7 @@ static struct dentry *nfs4_mount(struct
+ 		error = PTR_ERR(res);
+ 
+ out:
+-	kfree(data->client_address);
+-	kfree(data->nfs_server.export_path);
+-	kfree(data->nfs_server.hostname);
+-	kfree(data->fscache_uniq);
+-out_free_data:
+-	kfree(data);
++	nfs_free_parsed_mount_data(data);
+ 	dprintk("<-- nfs4_mount() = %d%s\n", error,
+ 			error != 0 ? " [error]" : "");
+ 	return res;
diff --git a/queue-3.2/nfs-retry-mounting-nfsroot.patch b/queue-3.2/nfs-retry-mounting-nfsroot.patch
new file mode 100644
index 00000000000..100f62fbea3
--- /dev/null
+++ b/queue-3.2/nfs-retry-mounting-nfsroot.patch
@@ -0,0 +1,92 @@
+From 43717c7daebf10b43f12e68512484b3095bb1ba5 Mon Sep 17 00:00:00 2001
+From: Chuck Lever <chuck.lever@oracle.com>
+Date: Mon, 5 Dec 2011 15:40:30 -0500
+Subject: NFS: Retry mounting NFSROOT
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+commit 43717c7daebf10b43f12e68512484b3095bb1ba5 upstream.
+
+Lukas Razik <linux@razik.name> reports that on his SPARC system,
+booting with an NFS root file system stopped working after commit
+56463e50 "NFS: Use super.c for NFSROOT mount option parsing."
+
+We found that the network switch to which Lukas' client was attached
+was delaying access to the LAN after the client's NIC driver reported
+that its link was up.  The delay was longer than the timeouts used in
+the NFS client during mounting.
+
+NFSROOT worked for Lukas before commit 56463e50 because in those
+kernels, the client's first operation was an rpcbind request to
+determine which port the NFS server was listening on.  When that
+request failed after a long timeout, the client simply selected the
+default NFS port (2049).  By that time the switch was allowing access
+to the LAN, and the mount succeeded.
+
+Neither of these client behaviors is desirable, so reverting 56463e50
+is really not a choice.  Instead, introduce a mechanism that retries
+the NFSROOT mount request several times.  This is the same tactic that
+normal user space NFS mounts employ to overcome server and network
+delays.
+
+Signed-off-by: Lukas Razik <linux@razik.name>
+[ cel: match kernel coding style, add proper patch description ]
+[ cel: add exponential back-off ]
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Tested-by: Lukas Razik <linux@razik.name>
+Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ init/do_mounts.c |   35 +++++++++++++++++++++++++++++++----
+ 1 file changed, 31 insertions(+), 4 deletions(-)
+
+--- a/init/do_mounts.c
++++ b/init/do_mounts.c
+@@ -398,15 +398,42 @@ out:
+ }
+  
+ #ifdef CONFIG_ROOT_NFS
++
++#define NFSROOT_TIMEOUT_MIN	5
++#define NFSROOT_TIMEOUT_MAX	30
++#define NFSROOT_RETRY_MAX	5
++
+ static int __init mount_nfs_root(void)
+ {
+ 	char *root_dev, *root_data;
++	unsigned int timeout;
++	int try, err;
+ 
+-	if (nfs_root_data(&root_dev, &root_data) != 0)
+-		return 0;
+-	if (do_mount_root(root_dev, "nfs", root_mountflags, root_data) != 0)
++	err = nfs_root_data(&root_dev, &root_data);
++	if (err != 0)
+ 		return 0;
+-	return 1;
++
++	/*
++	 * The server or network may not be ready, so try several
++	 * times.  Stop after a few tries in case the client wants
++	 * to fall back to other boot methods.
++	 */
++	timeout = NFSROOT_TIMEOUT_MIN;
++	for (try = 1; ; try++) {
++		err = do_mount_root(root_dev, "nfs",
++					root_mountflags, root_data);
++		if (err == 0)
++			return 1;
++		if (try > NFSROOT_RETRY_MAX)
++			break;
++
++		/* Wait, in case the server refused us immediately */
++		ssleep(timeout);
++		timeout <<= 1;
++		if (timeout > NFSROOT_TIMEOUT_MAX)
++			timeout = NFSROOT_TIMEOUT_MAX;
++	}
++	return 0;
+ }
+ #endif
+ 
diff --git a/queue-3.2/nfsv4-include-bitmap-in-nfsv4-get-acl-data.patch b/queue-3.2/nfsv4-include-bitmap-in-nfsv4-get-acl-data.patch
new file mode 100644
index 00000000000..85ac36ec97a
--- /dev/null
+++ b/queue-3.2/nfsv4-include-bitmap-in-nfsv4-get-acl-data.patch
@@ -0,0 +1,303 @@
+From bf118a342f10dafe44b14451a1392c3254629a1f Mon Sep 17 00:00:00 2001
+From: Andy Adamson <andros@netapp.com>
+Date: Wed, 7 Dec 2011 11:55:27 -0500
+Subject: NFSv4: include bitmap in nfsv4 get acl data
+
+From: Andy Adamson <andros@netapp.com>
+
+commit bf118a342f10dafe44b14451a1392c3254629a1f upstream.
+
+The NFSv4 bitmap size is unbounded: a server can return an arbitrary
+sized bitmap in an FATTR4_WORD0_ACL request.  Replace using the
+nfs4_fattr_bitmap_maxsz as a guess to the maximum bitmask returned by a server
+with the inclusion of the bitmap (xdr length plus bitmasks) and the acl data
+xdr length to the (cached) acl page data.
+
+This is a general solution to commit e5012d1f "NFSv4.1: update
+nfs4_fattr_bitmap_maxsz" and fixes hitting a BUG_ON in xdr_shrink_bufhead
+when getting ACLs.
+
+Fix a bug in decode_getacl that returned -EINVAL on ACLs > page when getxattr
+was called with a NULL buffer, preventing ACL > PAGE_SIZE from being retrieved.
+
+Signed-off-by: Andy Adamson <andros@netapp.com>
+Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/nfs/nfs4proc.c          |   96 ++++++++++++++++++++++++++-------------------
+ fs/nfs/nfs4xdr.c           |   31 ++++++++++----
+ include/linux/nfs_xdr.h    |    5 ++
+ include/linux/sunrpc/xdr.h |    2 
+ net/sunrpc/xdr.c           |    3 -
+ 5 files changed, 89 insertions(+), 48 deletions(-)
+
+--- a/fs/nfs/nfs4proc.c
++++ b/fs/nfs/nfs4proc.c
+@@ -3430,19 +3430,6 @@ static inline int nfs4_server_supports_a
+  */
+ #define NFS4ACL_MAXPAGES (XATTR_SIZE_MAX >> PAGE_CACHE_SHIFT)
+ 
+-static void buf_to_pages(const void *buf, size_t buflen,
+-		struct page **pages, unsigned int *pgbase)
+-{
+-	const void *p = buf;
+-
+-	*pgbase = offset_in_page(buf);
+-	p -= *pgbase;
+-	while (p < buf + buflen) {
+-		*(pages++) = virt_to_page(p);
+-		p += PAGE_CACHE_SIZE;
+-	}
+-}
+-
+ static int buf_to_pages_noslab(const void *buf, size_t buflen,
+ 		struct page **pages, unsigned int *pgbase)
+ {
+@@ -3539,9 +3526,19 @@ out:
+ 	nfs4_set_cached_acl(inode, acl);
+ }
+ 
++/*
++ * The getxattr API returns the required buffer length when called with a
++ * NULL buf. The NFSv4 acl tool then calls getxattr again after allocating
++ * the required buf.  On a NULL buf, we send a page of data to the server
++ * guessing that the ACL request can be serviced by a page. If so, we cache
++ * up to the page of ACL data, and the 2nd call to getxattr is serviced by
++ * the cache. If not so, we throw away the page, and cache the required
++ * length. The next getxattr call will then produce another round trip to
++ * the server, this time with the input buf of the required size.
++ */
+ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t buflen)
+ {
+-	struct page *pages[NFS4ACL_MAXPAGES];
++	struct page *pages[NFS4ACL_MAXPAGES] = {NULL, };
+ 	struct nfs_getaclargs args = {
+ 		.fh = NFS_FH(inode),
+ 		.acl_pages = pages,
+@@ -3556,41 +3553,60 @@ static ssize_t __nfs4_get_acl_uncached(s
+ 		.rpc_argp = &args,
+ 		.rpc_resp = &res,
+ 	};
+-	struct page *localpage = NULL;
+-	int ret;
++	int ret = -ENOMEM, npages, i, acl_len = 0;
+ 
+-	if (buflen < PAGE_SIZE) {
+-		/* As long as we're doing a round trip to the server anyway,
+-		 * let's be prepared for a page of acl data. */
+-		localpage = alloc_page(GFP_KERNEL);
+-		resp_buf = page_address(localpage);
+-		if (localpage == NULL)
+-			return -ENOMEM;
+-		args.acl_pages[0] = localpage;
+-		args.acl_pgbase = 0;
+-		args.acl_len = PAGE_SIZE;
+-	} else {
+-		resp_buf = buf;
+-		buf_to_pages(buf, buflen, args.acl_pages, &args.acl_pgbase);
++	npages = (buflen + PAGE_SIZE - 1) >> PAGE_SHIFT;
++	/* As long as we're doing a round trip to the server anyway,
++	 * let's be prepared for a page of acl data. */
++	if (npages == 0)
++		npages = 1;
++
++	for (i = 0; i < npages; i++) {
++		pages[i] = alloc_page(GFP_KERNEL);
++		if (!pages[i])
++			goto out_free;
+ 	}
+-	ret = nfs4_call_sync(NFS_SERVER(inode)->client, NFS_SERVER(inode), &msg, &args.seq_args, &res.seq_res, 0);
++	if (npages > 1) {
++		/* for decoding across pages */
++		args.acl_scratch = alloc_page(GFP_KERNEL);
++		if (!args.acl_scratch)
++			goto out_free;
++	}
++	args.acl_len = npages * PAGE_SIZE;
++	args.acl_pgbase = 0;
++	/* Let decode_getfacl know not to fail if the ACL data is larger than
++	 * the page we send as a guess */
++	if (buf == NULL)
++		res.acl_flags |= NFS4_ACL_LEN_REQUEST;
++	resp_buf = page_address(pages[0]);
++
++	dprintk("%s  buf %p buflen %ld npages %d args.acl_len %ld\n",
++		__func__, buf, buflen, npages, args.acl_len);
++	ret = nfs4_call_sync(NFS_SERVER(inode)->client, NFS_SERVER(inode),
++			     &msg, &args.seq_args, &res.seq_res, 0);
+ 	if (ret)
+ 		goto out_free;
+-	if (res.acl_len > args.acl_len)
+-		nfs4_write_cached_acl(inode, NULL, res.acl_len);
++
++	acl_len = res.acl_len - res.acl_data_offset;
++	if (acl_len > args.acl_len)
++		nfs4_write_cached_acl(inode, NULL, acl_len);
+ 	else
+-		nfs4_write_cached_acl(inode, resp_buf, res.acl_len);
++		nfs4_write_cached_acl(inode, resp_buf + res.acl_data_offset,
++				      acl_len);
+ 	if (buf) {
+ 		ret = -ERANGE;
+-		if (res.acl_len > buflen)
++		if (acl_len > buflen)
+ 			goto out_free;
+-		if (localpage)
+-			memcpy(buf, resp_buf, res.acl_len);
++		_copy_from_pages(buf, pages, res.acl_data_offset,
++				res.acl_len);
+ 	}
+-	ret = res.acl_len;
++	ret = acl_len;
+ out_free:
+-	if (localpage)
+-		__free_page(localpage);
++	for (i = 0; i < npages; i++)
++		if (pages[i])
++			__free_page(pages[i]);
++	if (args.acl_scratch)
++		__free_page(args.acl_scratch);
+ 	return ret;
+ }
+ 
+@@ -3621,6 +3637,8 @@ static ssize_t nfs4_proc_get_acl(struct
+ 		nfs_zap_acl_cache(inode);
+ 	ret = nfs4_read_cached_acl(inode, buf, buflen);
+ 	if (ret != -ENOENT)
++		/* -ENOENT is returned if there is no ACL or if there is an ACL
++		 * but no cached acl data, just the acl length */
+ 		return ret;
+ 	return nfs4_get_acl_uncached(inode, buf, buflen);
+ }
+--- a/fs/nfs/nfs4xdr.c
++++ b/fs/nfs/nfs4xdr.c
+@@ -2517,11 +2517,13 @@ static void nfs4_xdr_enc_getacl(struct r
+ 	encode_compound_hdr(xdr, req, &hdr);
+ 	encode_sequence(xdr, &args->seq_args, &hdr);
+ 	encode_putfh(xdr, args->fh, &hdr);
+-	replen = hdr.replen + op_decode_hdr_maxsz + nfs4_fattr_bitmap_maxsz + 1;
++	replen = hdr.replen + op_decode_hdr_maxsz + 1;
+ 	encode_getattr_two(xdr, FATTR4_WORD0_ACL, 0, &hdr);
+ 
+ 	xdr_inline_pages(&req->rq_rcv_buf, replen << 2,
+ 		args->acl_pages, args->acl_pgbase, args->acl_len);
++	xdr_set_scratch_buffer(xdr, page_address(args->acl_scratch), PAGE_SIZE);
++
+ 	encode_nops(&hdr);
+ }
+ 
+@@ -4957,17 +4959,18 @@ decode_restorefh(struct xdr_stream *xdr)
+ }
+ 
+ static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req,
+-		size_t *acl_len)
++			 struct nfs_getaclres *res)
+ {
+-	__be32 *savep;
++	__be32 *savep, *bm_p;
+ 	uint32_t attrlen,
+ 		 bitmap[3] = {0};
+ 	struct kvec *iov = req->rq_rcv_buf.head;
+ 	int status;
+ 
+-	*acl_len = 0;
++	res->acl_len = 0;
+ 	if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0)
+ 		goto out;
++	bm_p = xdr->p;
+ 	if ((status = decode_attr_bitmap(xdr, bitmap)) != 0)
+ 		goto out;
+ 	if ((status = decode_attr_length(xdr, &attrlen, &savep)) != 0)
+@@ -4979,18 +4982,30 @@ static int decode_getacl(struct xdr_stre
+ 		size_t hdrlen;
+ 		u32 recvd;
+ 
++		/* The bitmap (xdr len + bitmaps) and the attr xdr len words
++		 * are stored with the acl data to handle the problem of
++		 * variable length bitmaps.*/
++		xdr->p = bm_p;
++		res->acl_data_offset = be32_to_cpup(bm_p) + 2;
++		res->acl_data_offset <<= 2;
++
+ 		/* We ignore &savep and don't do consistency checks on
+ 		 * the attr length.  Let userspace figure it out.... */
+ 		hdrlen = (u8 *)xdr->p - (u8 *)iov->iov_base;
++		attrlen += res->acl_data_offset;
+ 		recvd = req->rq_rcv_buf.len - hdrlen;
+ 		if (attrlen > recvd) {
+-			dprintk("NFS: server cheating in getattr"
+-					" acl reply: attrlen %u > recvd %u\n",
++			if (res->acl_flags & NFS4_ACL_LEN_REQUEST) {
++				/* getxattr interface called with a NULL buf */
++				res->acl_len = attrlen;
++				goto out;
++			}
++			dprintk("NFS: acl reply: attrlen %u > recvd %u\n",
+ 					attrlen, recvd);
+ 			return -EINVAL;
+ 		}
+ 		xdr_read_pages(xdr, attrlen);
+-		*acl_len = attrlen;
++		res->acl_len = attrlen;
+ 	} else
+ 		status = -EOPNOTSUPP;
+ 
+@@ -6028,7 +6043,7 @@ nfs4_xdr_dec_getacl(struct rpc_rqst *rqs
+ 	status = decode_putfh(xdr);
+ 	if (status)
+ 		goto out;
+-	status = decode_getacl(xdr, rqstp, &res->acl_len);
++	status = decode_getacl(xdr, rqstp, res);
+ 
+ out:
+ 	return status;
+--- a/include/linux/nfs_xdr.h
++++ b/include/linux/nfs_xdr.h
+@@ -602,11 +602,16 @@ struct nfs_getaclargs {
+ 	size_t				acl_len;
+ 	unsigned int			acl_pgbase;
+ 	struct page **			acl_pages;
++	struct page *			acl_scratch;
+ 	struct nfs4_sequence_args 	seq_args;
+ };
+ 
++/* getxattr ACL interface flags */
++#define NFS4_ACL_LEN_REQUEST	0x0001	/* zero length getxattr buffer */
+ struct nfs_getaclres {
+ 	size_t				acl_len;
++	size_t				acl_data_offset;
++	int				acl_flags;
+ 	struct nfs4_sequence_res	seq_res;
+ };
+ 
+--- a/include/linux/sunrpc/xdr.h
++++ b/include/linux/sunrpc/xdr.h
+@@ -191,6 +191,8 @@ extern int xdr_decode_array2(struct xdr_
+ 			     struct xdr_array2_desc *desc);
+ extern int xdr_encode_array2(struct xdr_buf *buf, unsigned int base,
+ 			     struct xdr_array2_desc *desc);
++extern void _copy_from_pages(char *p, struct page **pages, size_t pgbase,
++			     size_t len);
+ 
+ /*
+  * Provide some simple tools for XDR buffer overflow-checking etc.
+--- a/net/sunrpc/xdr.c
++++ b/net/sunrpc/xdr.c
+@@ -296,7 +296,7 @@ _copy_to_pages(struct page **pages, size
+  * Copies data into an arbitrary memory location from an array of pages
+  * The copy is assumed to be non-overlapping.
+  */
+-static void
++void
+ _copy_from_pages(char *p, struct page **pages, size_t pgbase, size_t len)
+ {
+ 	struct page **pgfrom;
+@@ -324,6 +324,7 @@ _copy_from_pages(char *p, struct page **
+ 
+ 	} while ((len -= copy) != 0);
+ }
++EXPORT_SYMBOL_GPL(_copy_from_pages);
+ 
+ /*
+  * xdr_shrink_bufhead
diff --git a/queue-3.2/nfsv4.1-fix-backchannel-slotid-off-by-one-bug.patch b/queue-3.2/nfsv4.1-fix-backchannel-slotid-off-by-one-bug.patch
new file mode 100644
index 00000000000..dd42c443520
--- /dev/null
+++ b/queue-3.2/nfsv4.1-fix-backchannel-slotid-off-by-one-bug.patch
@@ -0,0 +1,28 @@
+From 61f2e5106582d02f30b6807e3f9c07463c572ccb Mon Sep 17 00:00:00 2001
+From: Andy Adamson <andros@netapp.com>
+Date: Wed, 9 Nov 2011 13:58:20 -0500
+Subject: NFSv4.1: fix backchannel slotid off-by-one bug
+
+From: Andy Adamson <andros@netapp.com>
+
+commit 61f2e5106582d02f30b6807e3f9c07463c572ccb upstream.
+
+Signed-off-by: Andy Adamson <andros@netapp.com>
+Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/nfs/callback_proc.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/nfs/callback_proc.c
++++ b/fs/nfs/callback_proc.c
+@@ -339,7 +339,7 @@ validate_seqid(struct nfs4_slot_table *t
+ 	dprintk("%s enter. slotid %d seqid %d\n",
+ 		__func__, args->csa_slotid, args->csa_sequenceid);
+ 
+-	if (args->csa_slotid > NFS41_BC_MAX_CALLBACKS)
++	if (args->csa_slotid >= NFS41_BC_MAX_CALLBACKS)
+ 		return htonl(NFS4ERR_BADSLOT);
+ 
+ 	slot = tbl->slots + args->csa_slotid;
diff --git a/queue-3.2/pci-fix-pci_exp_type_rc_ec-value.patch b/queue-3.2/pci-fix-pci_exp_type_rc_ec-value.patch
new file mode 100644
index 00000000000..6f5f6ba45eb
--- /dev/null
+++ b/queue-3.2/pci-fix-pci_exp_type_rc_ec-value.patch
@@ -0,0 +1,30 @@
+From 1830ea91c20b06608f7cdb2455ce05ba834b3214 Mon Sep 17 00:00:00 2001
+From: Alex Williamson <alex.williamson@redhat.com>
+Date: Wed, 16 Nov 2011 09:24:16 -0700
+Subject: PCI: Fix PCI_EXP_TYPE_RC_EC value
+
+From: Alex Williamson <alex.williamson@redhat.com>
+
+commit 1830ea91c20b06608f7cdb2455ce05ba834b3214 upstream.
+
+Spec shows this as 1010b = 0xa
+
+Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
+Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ include/linux/pci_regs.h |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/include/linux/pci_regs.h
++++ b/include/linux/pci_regs.h
+@@ -392,7 +392,7 @@
+ #define  PCI_EXP_TYPE_DOWNSTREAM 0x6	/* Downstream Port */
+ #define  PCI_EXP_TYPE_PCI_BRIDGE 0x7	/* PCI/PCI-X Bridge */
+ #define  PCI_EXP_TYPE_RC_END	0x9	/* Root Complex Integrated Endpoint */
+-#define  PCI_EXP_TYPE_RC_EC	0x10	/* Root Complex Event Collector */
++#define  PCI_EXP_TYPE_RC_EC	0xa	/* Root Complex Event Collector */
+ #define PCI_EXP_FLAGS_SLOT	0x0100	/* Slot implemented */
+ #define PCI_EXP_FLAGS_IRQ	0x3e00	/* Interrupt message number */
+ #define PCI_EXP_DEVCAP		4	/* Device capabilities */
diff --git a/queue-3.2/pci-msi-disable-msi-interrupts-when-we-initialize-a-pci-device.patch b/queue-3.2/pci-msi-disable-msi-interrupts-when-we-initialize-a-pci-device.patch
new file mode 100644
index 00000000000..9200c64656f
--- /dev/null
+++ b/queue-3.2/pci-msi-disable-msi-interrupts-when-we-initialize-a-pci-device.patch
@@ -0,0 +1,50 @@
+From a776c491ca5e38c26d9f66923ff574d041e747f4 Mon Sep 17 00:00:00 2001
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+Date: Mon, 17 Oct 2011 11:46:06 -0700
+Subject: PCI: msi: Disable msi interrupts when we initialize a pci device
+
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+
+commit a776c491ca5e38c26d9f66923ff574d041e747f4 upstream.
+
+I traced a nasty kexec on panic boot failure to the fact that we had
+screaming msi interrupts and we were not disabling the msi messages at
+kernel startup.  The booting kernel had not enabled those interupts so
+was not prepared to handle them.
+
+I can see no reason why we would ever want to leave the msi interrupts
+enabled at boot if something else has enabled those interrupts.  The pci
+spec specifies that msi interrupts should be off by default.  Drivers
+are expected to enable the msi interrupts if they want to use them.  Our
+interrupt handling code reprograms the interrupt handlers at boot and
+will not be be able to do anything useful with an unexpected interrupt.
+
+This patch applies cleanly all of the way back to 2.6.32 where I noticed
+the problem.
+
+Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
+Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ drivers/pci/msi.c |   10 ++++++++++
+ 1 file changed, 10 insertions(+)
+
+--- a/drivers/pci/msi.c
++++ b/drivers/pci/msi.c
+@@ -870,5 +870,15 @@ EXPORT_SYMBOL(pci_msi_enabled);
+ 
+ void pci_msi_init_pci_dev(struct pci_dev *dev)
+ {
++	int pos;
+ 	INIT_LIST_HEAD(&dev->msi_list);
++
++	/* Disable the msi hardware to avoid screaming interrupts
++	 * during boot.  This is the power on reset default so
++	 * usually this should be a noop.
++	 */
++	pos = pci_find_capability(dev, PCI_CAP_ID_MSI);
++	if (pos)
++		msi_set_enable(dev, pos, 0);
++	msix_set_enable(dev, 0);
+ }
diff --git a/queue-3.2/pnfs-obj-must-return-layout-on-io-error.patch b/queue-3.2/pnfs-obj-must-return-layout-on-io-error.patch
new file mode 100644
index 00000000000..1e340ec2af0
--- /dev/null
+++ b/queue-3.2/pnfs-obj-must-return-layout-on-io-error.patch
@@ -0,0 +1,86 @@
+From fe0fe83585f88346557868a803a479dfaaa0688a Mon Sep 17 00:00:00 2001
+From: Boaz Harrosh <bharrosh@panasas.com>
+Date: Fri, 6 Jan 2012 09:31:20 +0200
+Subject: pnfs-obj: Must return layout on IO error
+
+From: Boaz Harrosh <bharrosh@panasas.com>
+
+commit fe0fe83585f88346557868a803a479dfaaa0688a upstream.
+
+As mandated by the standard. In case of an IO error, a pNFS
+objects layout driver must return it's layout. This is because
+all device errors are reported to the server as part of the
+layout return buffer.
+
+This is implemented the same way PNFS_LAYOUTRET_ON_SETATTR
+is done, through a bit flag on the pnfs_layoutdriver_type->flags
+member. The flag is set by the layout driver that wants a
+layout_return preformed at pnfs_ld_{write,read}_done in case
+of an error.
+(Though I have not defined a wrapper like pnfs_ld_layoutret_on_setattr
+ because this code is never called outside of pnfs.c and pnfs IO
+ paths)
+
+Without this patch 3.[0-2] Kernels leak memory and have an annoying
+WARN_ON after every IO error utilizing the pnfs-obj driver.
+
+Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
+Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/nfs/objlayout/objio_osd.c |    3 ++-
+ fs/nfs/pnfs.c                |   12 ++++++++++++
+ fs/nfs/pnfs.h                |    1 +
+ 3 files changed, 15 insertions(+), 1 deletion(-)
+
+--- a/fs/nfs/objlayout/objio_osd.c
++++ b/fs/nfs/objlayout/objio_osd.c
+@@ -551,7 +551,8 @@ static const struct nfs_pageio_ops objio
+ static struct pnfs_layoutdriver_type objlayout_type = {
+ 	.id = LAYOUT_OSD2_OBJECTS,
+ 	.name = "LAYOUT_OSD2_OBJECTS",
+-	.flags                   = PNFS_LAYOUTRET_ON_SETATTR,
++	.flags                   = PNFS_LAYOUTRET_ON_SETATTR |
++				   PNFS_LAYOUTRET_ON_ERROR,
+ 
+ 	.alloc_layout_hdr        = objlayout_alloc_layout_hdr,
+ 	.free_layout_hdr         = objlayout_free_layout_hdr,
+--- a/fs/nfs/pnfs.c
++++ b/fs/nfs/pnfs.c
+@@ -1178,6 +1178,15 @@ void pnfs_ld_write_done(struct nfs_write
+ 		put_lseg(data->lseg);
+ 		data->lseg = NULL;
+ 		dprintk("pnfs write error = %d\n", data->pnfs_error);
++		if (NFS_SERVER(data->inode)->pnfs_curr_ld->flags &
++						PNFS_LAYOUTRET_ON_ERROR) {
++			/* Don't lo_commit on error, Server will needs to
++			 * preform a file recovery.
++			 */
++			clear_bit(NFS_INO_LAYOUTCOMMIT,
++				  &NFS_I(data->inode)->flags);
++			pnfs_return_layout(data->inode);
++		}
+ 	}
+ 	data->mds_ops->rpc_release(data);
+ }
+@@ -1267,6 +1276,9 @@ static void pnfs_ld_handle_read_error(st
+ 	put_lseg(data->lseg);
+ 	data->lseg = NULL;
+ 	dprintk("pnfs write error = %d\n", data->pnfs_error);
++	if (NFS_SERVER(data->inode)->pnfs_curr_ld->flags &
++						PNFS_LAYOUTRET_ON_ERROR)
++		pnfs_return_layout(data->inode);
+ 
+ 	nfs_pageio_init_read_mds(&pgio, data->inode);
+ 
+--- a/fs/nfs/pnfs.h
++++ b/fs/nfs/pnfs.h
+@@ -68,6 +68,7 @@ enum {
+ enum layoutdriver_policy_flags {
+ 	/* Should the pNFS client commit and return the layout upon a setattr */
+ 	PNFS_LAYOUTRET_ON_SETATTR	= 1 << 0,
++	PNFS_LAYOUTRET_ON_ERROR		= 1 << 1,
+ };
+ 
+ struct nfs4_deviceid_node;
diff --git a/queue-3.2/pnfs-obj-pnfs-errors-are-communicated-on-iodata-pnfs_error.patch b/queue-3.2/pnfs-obj-pnfs-errors-are-communicated-on-iodata-pnfs_error.patch
new file mode 100644
index 00000000000..7f04a75bad5
--- /dev/null
+++ b/queue-3.2/pnfs-obj-pnfs-errors-are-communicated-on-iodata-pnfs_error.patch
@@ -0,0 +1,47 @@
+From 5c0b4129c07b902b27d3f3ebc087757f534a3abd Mon Sep 17 00:00:00 2001
+From: Boaz Harrosh <bharrosh@panasas.com>
+Date: Fri, 6 Jan 2012 09:28:12 +0200
+Subject: pnfs-obj: pNFS errors are communicated on iodata->pnfs_error
+
+From: Boaz Harrosh <bharrosh@panasas.com>
+
+commit 5c0b4129c07b902b27d3f3ebc087757f534a3abd upstream.
+
+Some time along the way pNFS IO errors were switched to
+communicate with a special iodata->pnfs_error member instead
+of the regular RPC members. But objlayout was not switched
+over.
+
+Fix that!
+Without this fix any IO error is hanged, because IO is not
+switched to MDS and pages are never cleared or read.
+
+[Applies to 3.2.0. Same bug different patch for 3.1/0 Kernels]
+Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
+Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/nfs/objlayout/objlayout.c |    4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/fs/nfs/objlayout/objlayout.c
++++ b/fs/nfs/objlayout/objlayout.c
+@@ -254,6 +254,8 @@ objlayout_read_done(struct objlayout_io_
+ 	oir->status = rdata->task.tk_status = status;
+ 	if (status >= 0)
+ 		rdata->res.count = status;
++	else
++		rdata->pnfs_error = status;
+ 	objlayout_iodone(oir);
+ 	/* must not use oir after this point */
+ 
+@@ -334,6 +336,8 @@ objlayout_write_done(struct objlayout_io
+ 	if (status >= 0) {
+ 		wdata->res.count = status;
+ 		wdata->verf.committed = oir->committed;
++	} else {
++		wdata->pnfs_error = status;
+ 	}
+ 	objlayout_iodone(oir);
+ 	/* must not use oir after this point */
diff --git a/queue-3.2/pnp-work-around-dell-1536-1546-bios-mmconfig-bug-that-breaks-usb.patch b/queue-3.2/pnp-work-around-dell-1536-1546-bios-mmconfig-bug-that-breaks-usb.patch
new file mode 100644
index 00000000000..914a18a07ff
--- /dev/null
+++ b/queue-3.2/pnp-work-around-dell-1536-1546-bios-mmconfig-bug-that-breaks-usb.patch
@@ -0,0 +1,98 @@
+From eb31aae8cb5eb54e234ed2d857ddac868195d911 Mon Sep 17 00:00:00 2001
+From: Bjorn Helgaas <bhelgaas@google.com>
+Date: Thu, 5 Jan 2012 14:27:24 -0700
+Subject: PNP: work around Dell 1536/1546 BIOS MMCONFIG bug that breaks USB
+
+From: Bjorn Helgaas <bhelgaas@google.com>
+
+commit eb31aae8cb5eb54e234ed2d857ddac868195d911 upstream.
+
+Some Dell BIOSes have MCFG tables that don't report the entire
+MMCONFIG area claimed by the chipset.  If we move PCI devices into
+that claimed-but-unreported area, they don't work.
+
+This quirk reads the AMD MMCONFIG MSRs and adds PNP0C01 resources as
+needed to cover the entire area.
+
+Example problem scenario:
+
+  BIOS-e820: 00000000cfec5400 - 00000000d4000000 (reserved)
+  Fam 10h mmconf [d0000000, dfffffff]
+  PCI: MMCONFIG for domain 0000 [bus 00-3f] at [mem 0xd0000000-0xd3ffffff] (base 0xd0000000)
+  pnp 00:0c: [mem 0xd0000000-0xd3ffffff]
+  pci 0000:00:12.0: reg 10: [mem 0xffb00000-0xffb00fff]
+  pci 0000:00:12.0: no compatible bridge window for [mem 0xffb00000-0xffb00fff]
+  pci 0000:00:12.0: BAR 0: assigned [mem 0xd4000000-0xd40000ff]
+
+Reported-by: Lisa Salimbas <lisa.salimbas@canonical.com>
+Reported-by: <thuban@singularity.fr>
+Tested-by: dann frazier <dann.frazier@canonical.com>
+References: https://bugzilla.kernel.org/show_bug.cgi?id=31602
+References: https://bugs.launchpad.net/ubuntu/+source/linux/+bug/647043
+References: https://bugzilla.redhat.com/show_bug.cgi?id=770308
+Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
+Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ drivers/pnp/quirks.c |   42 ++++++++++++++++++++++++++++++++++++++++++
+ 1 file changed, 42 insertions(+)
+
+--- a/drivers/pnp/quirks.c
++++ b/drivers/pnp/quirks.c
+@@ -295,6 +295,45 @@ static void quirk_system_pci_resources(s
+ 	}
+ }
+ 
++#ifdef CONFIG_AMD_NB
++
++#include <asm/amd_nb.h>
++
++static void quirk_amd_mmconfig_area(struct pnp_dev *dev)
++{
++	resource_size_t start, end;
++	struct pnp_resource *pnp_res;
++	struct resource *res;
++	struct resource mmconfig_res, *mmconfig;
++
++	mmconfig = amd_get_mmconfig_range(&mmconfig_res);
++	if (!mmconfig)
++		return;
++
++	list_for_each_entry(pnp_res, &dev->resources, list) {
++		res = &pnp_res->res;
++		if (res->end < mmconfig->start || res->start > mmconfig->end ||
++		    (res->start == mmconfig->start && res->end == mmconfig->end))
++			continue;
++
++		dev_info(&dev->dev, FW_BUG
++			 "%pR covers only part of AMD MMCONFIG area %pR; adding more reservations\n",
++			 res, mmconfig);
++		if (mmconfig->start < res->start) {
++			start = mmconfig->start;
++			end = res->start - 1;
++			pnp_add_mem_resource(dev, start, end, 0);
++		}
++		if (mmconfig->end > res->end) {
++			start = res->end + 1;
++			end = mmconfig->end;
++			pnp_add_mem_resource(dev, start, end, 0);
++		}
++		break;
++	}
++}
++#endif
++
+ /*
+  *  PnP Quirks
+  *  Cards or devices that need some tweaking due to incomplete resource info
+@@ -322,6 +361,9 @@ static struct pnp_fixup pnp_fixups[] = {
+ 	/* PnP resources that might overlap PCI BARs */
+ 	{"PNP0c01", quirk_system_pci_resources},
+ 	{"PNP0c02", quirk_system_pci_resources},
++#ifdef CONFIG_AMD_NB
++	{"PNP0c01", quirk_amd_mmconfig_area},
++#endif
+ 	{""}
+ };
+ 
diff --git a/queue-3.2/scsi-mpt2sas-fix-for-memory-allocation-error-for-large-host-credits.patch b/queue-3.2/scsi-mpt2sas-fix-for-memory-allocation-error-for-large-host-credits.patch
new file mode 100644
index 00000000000..5d97461bd5d
--- /dev/null
+++ b/queue-3.2/scsi-mpt2sas-fix-for-memory-allocation-error-for-large-host-credits.patch
@@ -0,0 +1,172 @@
+From aff132d95ffe14eca96cab90597cdd010b457af7 Mon Sep 17 00:00:00 2001
+From: "nagalakshmi.nandigama@lsi.com" <nagalakshmi.nandigama@lsi.com>
+Date: Thu, 1 Dec 2011 07:53:08 +0530
+Subject: SCSI: mpt2sas : Fix for memory allocation error for large host credits
+
+From: "nagalakshmi.nandigama@lsi.com" <nagalakshmi.nandigama@lsi.com>
+
+commit aff132d95ffe14eca96cab90597cdd010b457af7 upstream.
+
+The amount of memory required for tracking chain buffers is rather
+large, and when the host credit count is big, memory allocation
+failure occurs inside __get_free_pages.
+
+The fix is to limit the number of chains to 100,000.  In addition,
+the number of host credits is limited to 30,000 IOs. However this
+limitation can be overridden this using the command line option
+max_queue_depth.  The algorithm for calculating the
+reply_post_queue_depth is changed so that it is equal to
+(reply_free_queue_depth + 16), previously it was (reply_free_queue_depth * 2).
+
+Signed-off-by: Nagalakshmi Nandigama <nagalakshmi.nandigama@lsi.com>
+Signed-off-by: James Bottomley <JBottomley@Parallels.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ drivers/scsi/mpt2sas/mpt2sas_base.c  |   83 +++++++++++------------------------
+ drivers/scsi/mpt2sas/mpt2sas_scsih.c |    4 -
+ 2 files changed, 29 insertions(+), 58 deletions(-)
+
+--- a/drivers/scsi/mpt2sas/mpt2sas_base.c
++++ b/drivers/scsi/mpt2sas/mpt2sas_base.c
+@@ -65,6 +65,8 @@ static MPT_CALLBACK	mpt_callbacks[MPT_MA
+ 
+ #define FAULT_POLLING_INTERVAL 1000 /* in milliseconds */
+ 
++#define MAX_HBA_QUEUE_DEPTH	30000
++#define MAX_CHAIN_DEPTH		100000
+ static int max_queue_depth = -1;
+ module_param(max_queue_depth, int, 0);
+ MODULE_PARM_DESC(max_queue_depth, " max controller queue depth ");
+@@ -2311,8 +2313,6 @@ _base_release_memory_pools(struct MPT2SA
+ 		}
+ 		if (ioc->chain_dma_pool)
+ 			pci_pool_destroy(ioc->chain_dma_pool);
+-	}
+-	if (ioc->chain_lookup) {
+ 		free_pages((ulong)ioc->chain_lookup, ioc->chain_pages);
+ 		ioc->chain_lookup = NULL;
+ 	}
+@@ -2330,9 +2330,7 @@ static int
+ _base_allocate_memory_pools(struct MPT2SAS_ADAPTER *ioc,  int sleep_flag)
+ {
+ 	struct mpt2sas_facts *facts;
+-	u32 queue_size, queue_diff;
+ 	u16 max_sge_elements;
+-	u16 num_of_reply_frames;
+ 	u16 chains_needed_per_io;
+ 	u32 sz, total_sz, reply_post_free_sz;
+ 	u32 retry_sz;
+@@ -2359,7 +2357,8 @@ _base_allocate_memory_pools(struct MPT2S
+ 		max_request_credit = (max_queue_depth < facts->RequestCredit)
+ 		    ? max_queue_depth : facts->RequestCredit;
+ 	else
+-		max_request_credit = facts->RequestCredit;
++		max_request_credit = min_t(u16, facts->RequestCredit,
++		    MAX_HBA_QUEUE_DEPTH);
+ 
+ 	ioc->hba_queue_depth = max_request_credit;
+ 	ioc->hi_priority_depth = facts->HighPriorityCredit;
+@@ -2400,50 +2399,25 @@ _base_allocate_memory_pools(struct MPT2S
+ 	}
+ 	ioc->chains_needed_per_io = chains_needed_per_io;
+ 
+-	/* reply free queue sizing - taking into account for events */
+-	num_of_reply_frames = ioc->hba_queue_depth + 32;
+-
+-	/* number of replies frames can't be a multiple of 16 */
+-	/* decrease number of reply frames by 1 */
+-	if (!(num_of_reply_frames % 16))
+-		num_of_reply_frames--;
+-
+-	/* calculate number of reply free queue entries
+-	 *  (must be multiple of 16)
+-	 */
+-
+-	/* (we know reply_free_queue_depth is not a multiple of 16) */
+-	queue_size = num_of_reply_frames;
+-	queue_size += 16 - (queue_size % 16);
+-	ioc->reply_free_queue_depth = queue_size;
+-
+-	/* reply descriptor post queue sizing */
+-	/* this size should be the number of request frames + number of reply
+-	 * frames
+-	 */
++	/* reply free queue sizing - taking into account for 64 FW events */
++	ioc->reply_free_queue_depth = ioc->hba_queue_depth + 64;
+ 
+-	queue_size = ioc->hba_queue_depth + num_of_reply_frames + 1;
+-	/* round up to 16 byte boundary */
+-	if (queue_size % 16)
+-		queue_size += 16 - (queue_size % 16);
+-
+-	/* check against IOC maximum reply post queue depth */
+-	if (queue_size > facts->MaxReplyDescriptorPostQueueDepth) {
+-		queue_diff = queue_size -
+-		    facts->MaxReplyDescriptorPostQueueDepth;
+-
+-		/* round queue_diff up to multiple of 16 */
+-		if (queue_diff % 16)
+-			queue_diff += 16 - (queue_diff % 16);
+-
+-		/* adjust hba_queue_depth, reply_free_queue_depth,
+-		 * and queue_size
+-		 */
+-		ioc->hba_queue_depth -= (queue_diff / 2);
+-		ioc->reply_free_queue_depth -= (queue_diff / 2);
+-		queue_size = facts->MaxReplyDescriptorPostQueueDepth;
++	/* align the reply post queue on the next 16 count boundary */
++	if (!ioc->reply_free_queue_depth % 16)
++		ioc->reply_post_queue_depth = ioc->reply_free_queue_depth + 16;
++	else
++		ioc->reply_post_queue_depth = ioc->reply_free_queue_depth +
++				32 - (ioc->reply_free_queue_depth % 16);
++	if (ioc->reply_post_queue_depth >
++	    facts->MaxReplyDescriptorPostQueueDepth) {
++		ioc->reply_post_queue_depth = min_t(u16,
++		    (facts->MaxReplyDescriptorPostQueueDepth -
++		    (facts->MaxReplyDescriptorPostQueueDepth % 16)),
++		    (ioc->hba_queue_depth - (ioc->hba_queue_depth % 16)));
++		ioc->reply_free_queue_depth = ioc->reply_post_queue_depth - 16;
++		ioc->hba_queue_depth = ioc->reply_free_queue_depth - 64;
+ 	}
+-	ioc->reply_post_queue_depth = queue_size;
++
+ 
+ 	dinitprintk(ioc, printk(MPT2SAS_INFO_FMT "scatter gather: "
+ 	    "sge_in_main_msg(%d), sge_per_chain(%d), sge_per_io(%d), "
+@@ -2529,15 +2503,12 @@ _base_allocate_memory_pools(struct MPT2S
+ 	    "depth(%d)\n", ioc->name, ioc->request,
+ 	    ioc->scsiio_depth));
+ 
+-	/* loop till the allocation succeeds */
+-	do {
+-		sz = ioc->chain_depth * sizeof(struct chain_tracker);
+-		ioc->chain_pages = get_order(sz);
+-		ioc->chain_lookup = (struct chain_tracker *)__get_free_pages(
+-		    GFP_KERNEL, ioc->chain_pages);
+-		if (ioc->chain_lookup == NULL)
+-			ioc->chain_depth -= 100;
+-	} while (ioc->chain_lookup == NULL);
++	ioc->chain_depth = min_t(u32, ioc->chain_depth, MAX_CHAIN_DEPTH);
++	sz = ioc->chain_depth * sizeof(struct chain_tracker);
++	ioc->chain_pages = get_order(sz);
++
++	ioc->chain_lookup = (struct chain_tracker *)__get_free_pages(
++	    GFP_KERNEL, ioc->chain_pages);
+ 	ioc->chain_dma_pool = pci_pool_create("chain pool", ioc->pdev,
+ 	    ioc->request_sz, 16, 0);
+ 	if (!ioc->chain_dma_pool) {
+--- a/drivers/scsi/mpt2sas/mpt2sas_scsih.c
++++ b/drivers/scsi/mpt2sas/mpt2sas_scsih.c
+@@ -1007,8 +1007,8 @@ _scsih_get_chain_buffer_tracker(struct M
+ 	spin_lock_irqsave(&ioc->scsi_lookup_lock, flags);
+ 	if (list_empty(&ioc->free_chain_list)) {
+ 		spin_unlock_irqrestore(&ioc->scsi_lookup_lock, flags);
+-		printk(MPT2SAS_WARN_FMT "chain buffers not available\n",
+-		    ioc->name);
++		dfailprintk(ioc, printk(MPT2SAS_WARN_FMT "chain buffers not "
++			"available\n", ioc->name));
+ 		return NULL;
+ 	}
+ 	chain_req = list_entry(ioc->free_chain_list.next,
diff --git a/queue-3.2/scsi-mpt2sas-release-spinlock-for-the-raid-device-list-before-blocking-it.patch b/queue-3.2/scsi-mpt2sas-release-spinlock-for-the-raid-device-list-before-blocking-it.patch
new file mode 100644
index 00000000000..cd614a80b28
--- /dev/null
+++ b/queue-3.2/scsi-mpt2sas-release-spinlock-for-the-raid-device-list-before-blocking-it.patch
@@ -0,0 +1,53 @@
+From 30c43282f3d347f47f9e05199d2b14f56f3f2837 Mon Sep 17 00:00:00 2001
+From: "nagalakshmi.nandigama@lsi.com" <nagalakshmi.nandigama@lsi.com>
+Date: Thu, 1 Dec 2011 07:52:56 +0530
+Subject: SCSI: mpt2sas: Release spinlock for the raid device list before blocking it
+
+From: "nagalakshmi.nandigama@lsi.com" <nagalakshmi.nandigama@lsi.com>
+
+commit 30c43282f3d347f47f9e05199d2b14f56f3f2837 upstream.
+
+Added code to release the spinlock that is used to protect the
+raid device list before calling a function that can block. The
+blocking was causing a reschedule, and subsequently it is tried
+to acquire the same lock, resulting in a panic (NMI Watchdog
+detecting a CPU lockup).
+
+Signed-off-by: Nagalakshmi Nandigama <nagalakshmi.nandigama@lsi.com>
+Signed-off-by: James Bottomley <JBottomley@Parallels.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ drivers/scsi/mpt2sas/mpt2sas_scsih.c |    7 ++++---
+ 1 file changed, 4 insertions(+), 3 deletions(-)
+
+--- a/drivers/scsi/mpt2sas/mpt2sas_scsih.c
++++ b/drivers/scsi/mpt2sas/mpt2sas_scsih.c
+@@ -6714,6 +6714,7 @@ _scsih_mark_responding_raid_device(struc
+ 			} else
+ 				sas_target_priv_data = NULL;
+ 			raid_device->responding = 1;
++			spin_unlock_irqrestore(&ioc->raid_device_lock, flags);
+ 			starget_printk(KERN_INFO, raid_device->starget,
+ 			    "handle(0x%04x), wwid(0x%016llx)\n", handle,
+ 			    (unsigned long long)raid_device->wwid);
+@@ -6724,16 +6725,16 @@ _scsih_mark_responding_raid_device(struc
+ 			 */
+ 			_scsih_init_warpdrive_properties(ioc, raid_device);
+ 			if (raid_device->handle == handle)
+-				goto out;
++				return;
+ 			printk(KERN_INFO "\thandle changed from(0x%04x)!!!\n",
+ 			    raid_device->handle);
+ 			raid_device->handle = handle;
+ 			if (sas_target_priv_data)
+ 				sas_target_priv_data->handle = handle;
+-			goto out;
++			return;
+ 		}
+ 	}
+- out:
++
+ 	spin_unlock_irqrestore(&ioc->raid_device_lock, flags);
+ }
+ 
diff --git a/queue-3.2/series b/queue-3.2/series
index 5ba28244ddc..d0333965283 100644
--- a/queue-3.2/series
+++ b/queue-3.2/series
@@ -17,3 +17,27 @@ alsa-hda-fix-the-lost-power-setup-of-seconary-pins-after-pm-resume.patch
 drm-radeon-kms-workaround-invalid-avi-infoframe-checksum-issue.patch
 drm-radeon-kms-disable-writeback-on-pre-r300-asics.patch
 radeon-fix-disabling-pci-bus-mastering-on-big-endian-hosts.patch
+pnfs-obj-pnfs-errors-are-communicated-on-iodata-pnfs_error.patch
+pnfs-obj-must-return-layout-on-io-error.patch
+nfs-retry-mounting-nfsroot.patch
+nfsv4.1-fix-backchannel-slotid-off-by-one-bug.patch
+nfs-fix-recent-breakage-to-nfs-error-handling.patch
+nfsv4-include-bitmap-in-nfsv4-get-acl-data.patch
+nfs-fix-regression-in-handling-of-context-option-in-nfsv4.patch
+hid-bump-maximum-global-item-tag-report-size-to-96-bytes.patch
+hid-wiimote-select-input_ff_memless.patch
+ubi-fix-missing-scrub-when-there-is-a-bit-flip.patch
+ubi-fix-use-after-free-on-error-path.patch
+pci-fix-pci_exp_type_rc_ec-value.patch
+pci-msi-disable-msi-interrupts-when-we-initialize-a-pci-device.patch
+x86-pci-ignore-cpu-non-addressable-_crs-reserved-memory-resources.patch
+x86-pci-amd-factor-out-mmconfig-discovery.patch
+x86-pci-build-amd_bus.o-only-when-config_amd_nb-y.patch
+scsi-mpt2sas-release-spinlock-for-the-raid-device-list-before-blocking-it.patch
+scsi-mpt2sas-fix-for-memory-allocation-error-for-large-host-credits.patch
+xen-xenbus-reject-replies-with-payload-xenstore_payload_max.patch
+md-raid1-perform-bad-block-tests-for-writemostly-devices-too.patch
+ima-free-duplicate-measurement-memory.patch
+ima-fix-invalid-memory-reference.patch
+slub-fix-a-possible-memleak-in-__slab_alloc.patch
+pnp-work-around-dell-1536-1546-bios-mmconfig-bug-that-breaks-usb.patch
diff --git a/queue-3.2/slub-fix-a-possible-memleak-in-__slab_alloc.patch b/queue-3.2/slub-fix-a-possible-memleak-in-__slab_alloc.patch
new file mode 100644
index 00000000000..75153c4c317
--- /dev/null
+++ b/queue-3.2/slub-fix-a-possible-memleak-in-__slab_alloc.patch
@@ -0,0 +1,45 @@
+From 73736e0387ba0e6d2b703407b4d26168d31516a7 Mon Sep 17 00:00:00 2001
+From: Eric Dumazet <eric.dumazet@gmail.com>
+Date: Tue, 13 Dec 2011 04:57:06 +0100
+Subject: slub: fix a possible memleak in __slab_alloc()
+
+From: Eric Dumazet <eric.dumazet@gmail.com>
+
+commit 73736e0387ba0e6d2b703407b4d26168d31516a7 upstream.
+
+Zhihua Che reported a possible memleak in slub allocator on
+CONFIG_PREEMPT=y builds.
+
+It is possible current thread migrates right before disabling irqs in
+__slab_alloc(). We must check again c->freelist, and perform a normal
+allocation instead of scratching c->freelist.
+
+Many thanks to Zhihua Che for spotting this bug, introduced in 2.6.39
+
+V2: Its also possible an IRQ freed one (or several) object(s) and
+populated c->freelist, so its not a CONFIG_PREEMPT only problem.
+
+Reported-by: Zhihua Che <zhihua.che@gmail.com>
+Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
+Acked-by: Christoph Lameter <cl@linux.com>
+Signed-off-by: Pekka Enberg <penberg@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ mm/slub.c |    5 +++++
+ 1 file changed, 5 insertions(+)
+
+--- a/mm/slub.c
++++ b/mm/slub.c
+@@ -2166,6 +2166,11 @@ redo:
+ 		goto new_slab;
+ 	}
+ 
++	/* must check again c->freelist in case of cpu migration or IRQ */
++	object = c->freelist;
++	if (object)
++		goto load_freelist;
++
+ 	stat(s, ALLOC_SLOWPATH);
+ 
+ 	do {
diff --git a/queue-3.2/ubi-fix-missing-scrub-when-there-is-a-bit-flip.patch b/queue-3.2/ubi-fix-missing-scrub-when-there-is-a-bit-flip.patch
new file mode 100644
index 00000000000..a6869efed33
--- /dev/null
+++ b/queue-3.2/ubi-fix-missing-scrub-when-there-is-a-bit-flip.patch
@@ -0,0 +1,75 @@
+From e801e128b2200c40a0ec236cf2330b2586b6e05a Mon Sep 17 00:00:00 2001
+From: Bhavesh Parekh <bparekh@nvidia.com>
+Date: Wed, 30 Nov 2011 17:43:42 +0530
+Subject: UBI: fix missing scrub when there is a bit-flip
+
+From: Bhavesh Parekh <bparekh@nvidia.com>
+
+commit e801e128b2200c40a0ec236cf2330b2586b6e05a upstream.
+
+Under some cases, when scrubbing the PEB if we did not get the lock on
+the PEB it fails to scrub. Add that PEB again to the scrub list
+
+Artem: minor amendments.
+
+Signed-off-by: Bhavesh Parekh <bparekh@nvidia.com>
+Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ drivers/mtd/ubi/eba.c |    6 ++++--
+ drivers/mtd/ubi/ubi.h |    2 ++
+ drivers/mtd/ubi/wl.c  |    5 ++++-
+ 3 files changed, 10 insertions(+), 3 deletions(-)
+
+--- a/drivers/mtd/ubi/eba.c
++++ b/drivers/mtd/ubi/eba.c
+@@ -1028,12 +1028,14 @@ int ubi_eba_copy_leb(struct ubi_device *
+ 	 * 'ubi_wl_put_peb()' function on the @ubi->move_mutex. In turn, we are
+ 	 * holding @ubi->move_mutex and go sleep on the LEB lock. So, if the
+ 	 * LEB is already locked, we just do not move it and return
+-	 * %MOVE_CANCEL_RACE, which means that UBI will re-try, but later.
++	 * %MOVE_RETRY. Note, we do not return %MOVE_CANCEL_RACE here because
++	 * we do not know the reasons of the contention - it may be just a
++	 * normal I/O on this LEB, so we want to re-try.
+ 	 */
+ 	err = leb_write_trylock(ubi, vol_id, lnum);
+ 	if (err) {
+ 		dbg_wl("contention on LEB %d:%d, cancel", vol_id, lnum);
+-		return MOVE_CANCEL_RACE;
++		return MOVE_RETRY;
+ 	}
+ 
+ 	/*
+--- a/drivers/mtd/ubi/ubi.h
++++ b/drivers/mtd/ubi/ubi.h
+@@ -120,6 +120,7 @@ enum {
+  *                     PEB
+  * MOVE_CANCEL_BITFLIPS: canceled because a bit-flip was detected in the
+  *                       target PEB
++ * MOVE_RETRY: retry scrubbing the PEB
+  */
+ enum {
+ 	MOVE_CANCEL_RACE = 1,
+@@ -127,6 +128,7 @@ enum {
+ 	MOVE_TARGET_RD_ERR,
+ 	MOVE_TARGET_WR_ERR,
+ 	MOVE_CANCEL_BITFLIPS,
++	MOVE_RETRY,
+ };
+ 
+ /**
+--- a/drivers/mtd/ubi/wl.c
++++ b/drivers/mtd/ubi/wl.c
+@@ -795,7 +795,10 @@ static int wear_leveling_worker(struct u
+ 			protect = 1;
+ 			goto out_not_moved;
+ 		}
+-
++		if (err == MOVE_RETRY) {
++			scrubbing = 1;
++			goto out_not_moved;
++		}
+ 		if (err == MOVE_CANCEL_BITFLIPS || err == MOVE_TARGET_WR_ERR ||
+ 		    err == MOVE_TARGET_RD_ERR) {
+ 			/*
diff --git a/queue-3.2/ubi-fix-use-after-free-on-error-path.patch b/queue-3.2/ubi-fix-use-after-free-on-error-path.patch
new file mode 100644
index 00000000000..12212198443
--- /dev/null
+++ b/queue-3.2/ubi-fix-use-after-free-on-error-path.patch
@@ -0,0 +1,50 @@
+From e57e0d8e818512047fe379157c3f77f1b9fabffb Mon Sep 17 00:00:00 2001
+From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
+Date: Thu, 5 Jan 2012 10:47:18 +0200
+Subject: UBI: fix use-after-free on error path
+
+From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
+
+commit e57e0d8e818512047fe379157c3f77f1b9fabffb upstream.
+
+When we fail to erase a PEB, we free the corresponding erase entry object,
+but then re-schedule this object if the error code was something like -EAGAIN.
+Obviously, it is a bug to use the object after we have freed it.
+
+Reported-by: Emese Revfy <re.emese@gmail.com>
+Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ drivers/mtd/ubi/wl.c |    7 ++++---
+ 1 file changed, 4 insertions(+), 3 deletions(-)
+
+--- a/drivers/mtd/ubi/wl.c
++++ b/drivers/mtd/ubi/wl.c
+@@ -1052,7 +1052,6 @@ static int erase_worker(struct ubi_devic
+ 
+ 	ubi_err("failed to erase PEB %d, error %d", pnum, err);
+ 	kfree(wl_wrk);
+-	kmem_cache_free(ubi_wl_entry_slab, e);
+ 
+ 	if (err == -EINTR || err == -ENOMEM || err == -EAGAIN ||
+ 	    err == -EBUSY) {
+@@ -1065,14 +1064,16 @@ static int erase_worker(struct ubi_devic
+ 			goto out_ro;
+ 		}
+ 		return err;
+-	} else if (err != -EIO) {
++	}
++
++	kmem_cache_free(ubi_wl_entry_slab, e);
++	if (err != -EIO)
+ 		/*
+ 		 * If this is not %-EIO, we have no idea what to do. Scheduling
+ 		 * this physical eraseblock for erasure again would cause
+ 		 * errors again and again. Well, lets switch to R/O mode.
+ 		 */
+ 		goto out_ro;
+-	}
+ 
+ 	/* It is %-EIO, the PEB went bad */
+ 
diff --git a/queue-3.2/x86-pci-amd-factor-out-mmconfig-discovery.patch b/queue-3.2/x86-pci-amd-factor-out-mmconfig-discovery.patch
new file mode 100644
index 00000000000..cf808645c21
--- /dev/null
+++ b/queue-3.2/x86-pci-amd-factor-out-mmconfig-discovery.patch
@@ -0,0 +1,154 @@
+From 24d25dbfa63c376323096660bfa9ad45a08870ce Mon Sep 17 00:00:00 2001
+From: Bjorn Helgaas <bhelgaas@google.com>
+Date: Thu, 5 Jan 2012 14:27:19 -0700
+Subject: x86/PCI: amd: factor out MMCONFIG discovery
+
+From: Bjorn Helgaas <bhelgaas@google.com>
+
+commit 24d25dbfa63c376323096660bfa9ad45a08870ce upstream.
+
+This factors out the AMD native MMCONFIG discovery so we can use it
+outside amd_bus.c.
+
+amd_bus.c reads AMD MSRs so it can remove the MMCONFIG area from the
+PCI resources.  We may also need the MMCONFIG information to work
+around BIOS defects in the ACPI MCFG table.
+
+Cc: Borislav Petkov <borislav.petkov@amd.com>
+Cc: Yinghai Lu <yinghai@kernel.org>
+Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
+Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ arch/x86/include/asm/amd_nb.h |    2 ++
+ arch/x86/kernel/amd_nb.c      |   31 +++++++++++++++++++++++++++++++
+ arch/x86/pci/amd_bus.c        |   42 +++++++++++-------------------------------
+ 3 files changed, 44 insertions(+), 31 deletions(-)
+
+--- a/arch/x86/include/asm/amd_nb.h
++++ b/arch/x86/include/asm/amd_nb.h
+@@ -1,6 +1,7 @@
+ #ifndef _ASM_X86_AMD_NB_H
+ #define _ASM_X86_AMD_NB_H
+ 
++#include <linux/ioport.h>
+ #include <linux/pci.h>
+ 
+ struct amd_nb_bus_dev_range {
+@@ -13,6 +14,7 @@ extern const struct pci_device_id amd_nb
+ extern const struct amd_nb_bus_dev_range amd_nb_bus_dev_ranges[];
+ 
+ extern bool early_is_amd_nb(u32 value);
++extern struct resource *amd_get_mmconfig_range(struct resource *res);
+ extern int amd_cache_northbridges(void);
+ extern void amd_flush_garts(void);
+ extern int amd_numa_init(void);
+--- a/arch/x86/kernel/amd_nb.c
++++ b/arch/x86/kernel/amd_nb.c
+@@ -119,6 +119,37 @@ bool __init early_is_amd_nb(u32 device)
+ 	return false;
+ }
+ 
++struct resource *amd_get_mmconfig_range(struct resource *res)
++{
++	u32 address;
++	u64 base, msr;
++	unsigned segn_busn_bits;
++
++	if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
++		return NULL;
++
++	/* assume all cpus from fam10h have mmconfig */
++        if (boot_cpu_data.x86 < 0x10)
++		return NULL;
++
++	address = MSR_FAM10H_MMIO_CONF_BASE;
++	rdmsrl(address, msr);
++
++	/* mmconfig is not enabled */
++	if (!(msr & FAM10H_MMIO_CONF_ENABLE))
++		return NULL;
++
++	base = msr & (FAM10H_MMIO_CONF_BASE_MASK<<FAM10H_MMIO_CONF_BASE_SHIFT);
++
++	segn_busn_bits = (msr >> FAM10H_MMIO_CONF_BUSRANGE_SHIFT) &
++			 FAM10H_MMIO_CONF_BUSRANGE_MASK;
++
++	res->flags = IORESOURCE_MEM;
++	res->start = base;
++	res->end = base + (1ULL<<(segn_busn_bits + 20)) - 1;
++	return res;
++}
++
+ int amd_get_subcaches(int cpu)
+ {
+ 	struct pci_dev *link = node_to_amd_nb(amd_get_nb_id(cpu))->link;
+--- a/arch/x86/pci/amd_bus.c
++++ b/arch/x86/pci/amd_bus.c
+@@ -30,34 +30,6 @@ static struct pci_hostbridge_probe pci_p
+ 	{ 0, 0x18, PCI_VENDOR_ID_AMD, 0x1300 },
+ };
+ 
+-static u64 __initdata fam10h_mmconf_start;
+-static u64 __initdata fam10h_mmconf_end;
+-static void __init get_pci_mmcfg_amd_fam10h_range(void)
+-{
+-	u32 address;
+-	u64 base, msr;
+-	unsigned segn_busn_bits;
+-
+-	/* assume all cpus from fam10h have mmconf */
+-        if (boot_cpu_data.x86 < 0x10)
+-		return;
+-
+-	address = MSR_FAM10H_MMIO_CONF_BASE;
+-	rdmsrl(address, msr);
+-
+-	/* mmconfig is not enable */
+-	if (!(msr & FAM10H_MMIO_CONF_ENABLE))
+-		return;
+-
+-	base = msr & (FAM10H_MMIO_CONF_BASE_MASK<<FAM10H_MMIO_CONF_BASE_SHIFT);
+-
+-	segn_busn_bits = (msr >> FAM10H_MMIO_CONF_BUSRANGE_SHIFT) &
+-			 FAM10H_MMIO_CONF_BUSRANGE_MASK;
+-
+-	fam10h_mmconf_start = base;
+-	fam10h_mmconf_end = base + (1ULL<<(segn_busn_bits + 20)) - 1;
+-}
+-
+ #define RANGE_NUM 16
+ 
+ /**
+@@ -85,6 +57,9 @@ static int __init early_fill_mp_bus_info
+ 	u64 val;
+ 	u32 address;
+ 	bool found;
++	struct resource fam10h_mmconf_res, *fam10h_mmconf;
++	u64 fam10h_mmconf_start;
++	u64 fam10h_mmconf_end;
+ 
+ 	if (!early_pci_allowed())
+ 		return -1;
+@@ -211,12 +186,17 @@ static int __init early_fill_mp_bus_info
+ 		subtract_range(range, RANGE_NUM, 0, end);
+ 
+ 	/* get mmconfig */
+-	get_pci_mmcfg_amd_fam10h_range();
++	fam10h_mmconf = amd_get_mmconfig_range(&fam10h_mmconf_res);
+ 	/* need to take out mmconf range */
+-	if (fam10h_mmconf_end) {
+-		printk(KERN_DEBUG "Fam 10h mmconf [%llx, %llx]\n", fam10h_mmconf_start, fam10h_mmconf_end);
++	if (fam10h_mmconf) {
++		printk(KERN_DEBUG "Fam 10h mmconf %pR\n", fam10h_mmconf);
++		fam10h_mmconf_start = fam10h_mmconf->start;
++		fam10h_mmconf_end = fam10h_mmconf->end;
+ 		subtract_range(range, RANGE_NUM, fam10h_mmconf_start,
+ 				 fam10h_mmconf_end + 1);
++	} else {
++		fam10h_mmconf_start = 0;
++		fam10h_mmconf_end = 0;
+ 	}
+ 
+ 	/* mmio resource */
diff --git a/queue-3.2/x86-pci-build-amd_bus.o-only-when-config_amd_nb-y.patch b/queue-3.2/x86-pci-build-amd_bus.o-only-when-config_amd_nb-y.patch
new file mode 100644
index 00000000000..9e779ec31e9
--- /dev/null
+++ b/queue-3.2/x86-pci-build-amd_bus.o-only-when-config_amd_nb-y.patch
@@ -0,0 +1,35 @@
+From 5cf9a4e69c1ff0ccdd1d2b7404f95c0531355274 Mon Sep 17 00:00:00 2001
+From: Bjorn Helgaas <bhelgaas@google.com>
+Date: Thu, 12 Jan 2012 08:01:40 -0700
+Subject: x86/PCI: build amd_bus.o only when CONFIG_AMD_NB=y
+
+From: Bjorn Helgaas <bhelgaas@google.com>
+
+commit 5cf9a4e69c1ff0ccdd1d2b7404f95c0531355274 upstream.
+
+We only need amd_bus.o for AMD systems with PCI.  arch/x86/pci/Makefile
+already depends on CONFIG_PCI=y, so this patch just adds the dependency
+on CONFIG_AMD_NB.
+
+Cc: Yinghai Lu <yinghai@kernel.org>
+Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ arch/x86/pci/Makefile |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/pci/Makefile
++++ b/arch/x86/pci/Makefile
+@@ -18,8 +18,9 @@ obj-$(CONFIG_X86_NUMAQ)		+= numaq_32.o
+ obj-$(CONFIG_X86_MRST)		+= mrst.o
+ 
+ obj-y				+= common.o early.o
+-obj-y				+= amd_bus.o bus_numa.o
++obj-y				+= bus_numa.o
+ 
++obj-$(CONFIG_AMD_NB)		+= amd_bus.o
+ obj-$(CONFIG_PCI_CNB20LE_QUIRK)	+= broadcom_bus.o
+ 
+ ifeq ($(CONFIG_PCI_DEBUG),y)
diff --git a/queue-3.2/x86-pci-ignore-cpu-non-addressable-_crs-reserved-memory-resources.patch b/queue-3.2/x86-pci-ignore-cpu-non-addressable-_crs-reserved-memory-resources.patch
new file mode 100644
index 00000000000..dc98bf0a6d4
--- /dev/null
+++ b/queue-3.2/x86-pci-ignore-cpu-non-addressable-_crs-reserved-memory-resources.patch
@@ -0,0 +1,62 @@
+From ae5cd86455381282ece162966183d3f208c6fad7 Mon Sep 17 00:00:00 2001
+From: Gary Hade <garyhade@us.ibm.com>
+Date: Mon, 14 Nov 2011 15:42:16 -0800
+Subject: x86/PCI: Ignore CPU non-addressable _CRS reserved memory resources
+
+From: Gary Hade <garyhade@us.ibm.com>
+
+commit ae5cd86455381282ece162966183d3f208c6fad7 upstream.
+
+This assures that a _CRS reserved host bridge window or window region is
+not used if it is not addressable by the CPU.  The new code either trims
+the window to exclude the non-addressable portion or totally ignores the
+window if the entire window is non-addressable.
+
+The current code has been shown to be problematic with 32-bit non-PAE
+kernels on systems where _CRS reserves resources above 4GB.
+
+Signed-off-by: Gary Hade <garyhade@us.ibm.com>
+Reviewed-by: Bjorn Helgaas <bhelgaas@google.com>
+Cc: Thomas Renninger <trenn@novell.com>
+Cc: linux-kernel@vger.kernel.org
+Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ arch/x86/pci/acpi.c |   18 ++++++++++++++++--
+ 1 file changed, 16 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/pci/acpi.c
++++ b/arch/x86/pci/acpi.c
+@@ -149,7 +149,7 @@ setup_resource(struct acpi_resource *acp
+ 	struct acpi_resource_address64 addr;
+ 	acpi_status status;
+ 	unsigned long flags;
+-	u64 start, end;
++	u64 start, orig_end, end;
+ 
+ 	status = resource_to_addr(acpi_res, &addr);
+ 	if (!ACPI_SUCCESS(status))
+@@ -165,7 +165,21 @@ setup_resource(struct acpi_resource *acp
+ 		return AE_OK;
+ 
+ 	start = addr.minimum + addr.translation_offset;
+-	end = addr.maximum + addr.translation_offset;
++	orig_end = end = addr.maximum + addr.translation_offset;
++
++	/* Exclude non-addressable range or non-addressable portion of range */
++	end = min(end, (u64)iomem_resource.end);
++	if (end <= start) {
++		dev_info(&info->bridge->dev,
++			"host bridge window [%#llx-%#llx] "
++			"(ignored, not CPU addressable)\n", start, orig_end);
++		return AE_OK;
++	} else if (orig_end != end) {
++		dev_info(&info->bridge->dev,
++			"host bridge window [%#llx-%#llx] "
++			"([%#llx-%#llx] ignored, not CPU addressable)\n",
++			start, orig_end, end + 1, orig_end);
++	}
+ 
+ 	res = &info->res[info->res_num];
+ 	res->name = info->name;
diff --git a/queue-3.2/xen-xenbus-reject-replies-with-payload-xenstore_payload_max.patch b/queue-3.2/xen-xenbus-reject-replies-with-payload-xenstore_payload_max.patch
new file mode 100644
index 00000000000..f7ea4ad9cf1
--- /dev/null
+++ b/queue-3.2/xen-xenbus-reject-replies-with-payload-xenstore_payload_max.patch
@@ -0,0 +1,73 @@
+From 9e7860cee18241633eddb36a4c34c7b61d8cecbc Mon Sep 17 00:00:00 2001
+From: Ian Campbell <Ian.Campbell@citrix.com>
+Date: Wed, 4 Jan 2012 09:34:49 +0000
+Subject: xen/xenbus: Reject replies with payload > XENSTORE_PAYLOAD_MAX.
+
+From: Ian Campbell <Ian.Campbell@citrix.com>
+
+commit 9e7860cee18241633eddb36a4c34c7b61d8cecbc upstream.
+
+Haogang Chen found out that:
+
+ There is a potential integer overflow in process_msg() that could result
+ in cross-domain attack.
+
+ 	body = kmalloc(msg->hdr.len + 1, GFP_NOIO | __GFP_HIGH);
+
+ When a malicious guest passes 0xffffffff in msg->hdr.len, the subsequent
+ call to xb_read() would write to a zero-length buffer.
+
+ The other end of this connection is always the xenstore backend daemon
+ so there is no guest (malicious or otherwise) which can do this. The
+ xenstore daemon is a trusted component in the system.
+
+ However this seem like a reasonable robustness improvement so we should
+ have it.
+
+And Ian when read the API docs found that:
+        The payload length (len field of the header) is limited to 4096
+        (XENSTORE_PAYLOAD_MAX) in both directions.  If a client exceeds the
+        limit, its xenstored connection will be immediately killed by
+        xenstored, which is usually catastrophic from the client's point of
+        view.  Clients (particularly domains, which cannot just reconnect)
+        should avoid this.
+
+so this patch checks against that instead.
+
+This also avoids a potential integer overflow pointed out by Haogang Chen.
+
+Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
+Cc: Haogang Chen <haogangchen@gmail.com>
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ drivers/xen/xenbus/xenbus_xs.c     |    6 ++++++
+ include/xen/interface/io/xs_wire.h |    3 +++
+ 2 files changed, 9 insertions(+)
+
+--- a/drivers/xen/xenbus/xenbus_xs.c
++++ b/drivers/xen/xenbus/xenbus_xs.c
+@@ -801,6 +801,12 @@ static int process_msg(void)
+ 		goto out;
+ 	}
+ 
++	if (msg->hdr.len > XENSTORE_PAYLOAD_MAX) {
++		kfree(msg);
++		err = -EINVAL;
++		goto out;
++	}
++
+ 	body = kmalloc(msg->hdr.len + 1, GFP_NOIO | __GFP_HIGH);
+ 	if (body == NULL) {
+ 		kfree(msg);
+--- a/include/xen/interface/io/xs_wire.h
++++ b/include/xen/interface/io/xs_wire.h
+@@ -87,4 +87,7 @@ struct xenstore_domain_interface {
+     XENSTORE_RING_IDX rsp_cons, rsp_prod;
+ };
+ 
++/* Violating this is very bad.  See docs/misc/xenstore.txt. */
++#define XENSTORE_PAYLOAD_MAX 4096
++
+ #endif /* _XS_WIRE_H */