]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
3.2-stable patches
authorGreg Kroah-Hartman <gregkh@suse.de>
Fri, 13 Jan 2012 00:42:10 +0000 (16:42 -0800)
committerGreg Kroah-Hartman <gregkh@suse.de>
Fri, 13 Jan 2012 00:42:10 +0000 (16:42 -0800)
added patches:
hid-bump-maximum-global-item-tag-report-size-to-96-bytes.patch
hid-wiimote-select-input_ff_memless.patch
ima-fix-invalid-memory-reference.patch
ima-free-duplicate-measurement-memory.patch
md-raid1-perform-bad-block-tests-for-writemostly-devices-too.patch
nfs-fix-recent-breakage-to-nfs-error-handling.patch
nfs-fix-regression-in-handling-of-context-option-in-nfsv4.patch
nfs-retry-mounting-nfsroot.patch
nfsv4.1-fix-backchannel-slotid-off-by-one-bug.patch
nfsv4-include-bitmap-in-nfsv4-get-acl-data.patch
pci-fix-pci_exp_type_rc_ec-value.patch
pci-msi-disable-msi-interrupts-when-we-initialize-a-pci-device.patch
pnfs-obj-must-return-layout-on-io-error.patch
pnfs-obj-pnfs-errors-are-communicated-on-iodata-pnfs_error.patch
pnp-work-around-dell-1536-1546-bios-mmconfig-bug-that-breaks-usb.patch
scsi-mpt2sas-fix-for-memory-allocation-error-for-large-host-credits.patch
scsi-mpt2sas-release-spinlock-for-the-raid-device-list-before-blocking-it.patch
slub-fix-a-possible-memleak-in-__slab_alloc.patch
ubi-fix-missing-scrub-when-there-is-a-bit-flip.patch
ubi-fix-use-after-free-on-error-path.patch
x86-pci-amd-factor-out-mmconfig-discovery.patch
x86-pci-build-amd_bus.o-only-when-config_amd_nb-y.patch
x86-pci-ignore-cpu-non-addressable-_crs-reserved-memory-resources.patch
xen-xenbus-reject-replies-with-payload-xenstore_payload_max.patch

25 files changed:
queue-3.2/hid-bump-maximum-global-item-tag-report-size-to-96-bytes.patch [new file with mode: 0644]
queue-3.2/hid-wiimote-select-input_ff_memless.patch [new file with mode: 0644]
queue-3.2/ima-fix-invalid-memory-reference.patch [new file with mode: 0644]
queue-3.2/ima-free-duplicate-measurement-memory.patch [new file with mode: 0644]
queue-3.2/md-raid1-perform-bad-block-tests-for-writemostly-devices-too.patch [new file with mode: 0644]
queue-3.2/nfs-fix-recent-breakage-to-nfs-error-handling.patch [new file with mode: 0644]
queue-3.2/nfs-fix-regression-in-handling-of-context-option-in-nfsv4.patch [new file with mode: 0644]
queue-3.2/nfs-retry-mounting-nfsroot.patch [new file with mode: 0644]
queue-3.2/nfsv4-include-bitmap-in-nfsv4-get-acl-data.patch [new file with mode: 0644]
queue-3.2/nfsv4.1-fix-backchannel-slotid-off-by-one-bug.patch [new file with mode: 0644]
queue-3.2/pci-fix-pci_exp_type_rc_ec-value.patch [new file with mode: 0644]
queue-3.2/pci-msi-disable-msi-interrupts-when-we-initialize-a-pci-device.patch [new file with mode: 0644]
queue-3.2/pnfs-obj-must-return-layout-on-io-error.patch [new file with mode: 0644]
queue-3.2/pnfs-obj-pnfs-errors-are-communicated-on-iodata-pnfs_error.patch [new file with mode: 0644]
queue-3.2/pnp-work-around-dell-1536-1546-bios-mmconfig-bug-that-breaks-usb.patch [new file with mode: 0644]
queue-3.2/scsi-mpt2sas-fix-for-memory-allocation-error-for-large-host-credits.patch [new file with mode: 0644]
queue-3.2/scsi-mpt2sas-release-spinlock-for-the-raid-device-list-before-blocking-it.patch [new file with mode: 0644]
queue-3.2/series
queue-3.2/slub-fix-a-possible-memleak-in-__slab_alloc.patch [new file with mode: 0644]
queue-3.2/ubi-fix-missing-scrub-when-there-is-a-bit-flip.patch [new file with mode: 0644]
queue-3.2/ubi-fix-use-after-free-on-error-path.patch [new file with mode: 0644]
queue-3.2/x86-pci-amd-factor-out-mmconfig-discovery.patch [new file with mode: 0644]
queue-3.2/x86-pci-build-amd_bus.o-only-when-config_amd_nb-y.patch [new file with mode: 0644]
queue-3.2/x86-pci-ignore-cpu-non-addressable-_crs-reserved-memory-resources.patch [new file with mode: 0644]
queue-3.2/xen-xenbus-reject-replies-with-payload-xenstore_payload_max.patch [new file with mode: 0644]

diff --git a/queue-3.2/hid-bump-maximum-global-item-tag-report-size-to-96-bytes.patch b/queue-3.2/hid-bump-maximum-global-item-tag-report-size-to-96-bytes.patch
new file mode 100644 (file)
index 0000000..fa469e8
--- /dev/null
@@ -0,0 +1,32 @@
+From e46e927b9b7e8d95526e69322855243882b7e1a3 Mon Sep 17 00:00:00 2001
+From: Chase Douglas <chase.douglas@canonical.com>
+Date: Mon, 7 Nov 2011 11:08:05 -0800
+Subject: HID: bump maximum global item tag report size to 96 bytes
+
+From: Chase Douglas <chase.douglas@canonical.com>
+
+commit e46e927b9b7e8d95526e69322855243882b7e1a3 upstream.
+
+This allows the latest N-Trig devices to function properly.
+
+BugLink: https://bugs.launchpad.net/bugs/724831
+
+Signed-off-by: Chase Douglas <chase.douglas@canonical.com>
+Signed-off-by: Jiri Kosina <jkosina@suse.cz>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ drivers/hid/hid-core.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/hid/hid-core.c
++++ b/drivers/hid/hid-core.c
+@@ -362,7 +362,7 @@ static int hid_parser_global(struct hid_
+       case HID_GLOBAL_ITEM_TAG_REPORT_SIZE:
+               parser->global.report_size = item_udata(item);
+-              if (parser->global.report_size > 32) {
++              if (parser->global.report_size > 96) {
+                       dbg_hid("invalid report_size %d\n",
+                                       parser->global.report_size);
+                       return -1;
diff --git a/queue-3.2/hid-wiimote-select-input_ff_memless.patch b/queue-3.2/hid-wiimote-select-input_ff_memless.patch
new file mode 100644 (file)
index 0000000..926fff5
--- /dev/null
@@ -0,0 +1,31 @@
+From ef6f41157f3864d9bf42671b2ed66062dcafb72e Mon Sep 17 00:00:00 2001
+From: David Herrmann <dh.herrmann@googlemail.com>
+Date: Wed, 7 Dec 2011 21:33:59 +0100
+Subject: HID: wiimote: Select INPUT_FF_MEMLESS
+
+From: David Herrmann <dh.herrmann@googlemail.com>
+
+commit ef6f41157f3864d9bf42671b2ed66062dcafb72e upstream.
+
+We depend on memless force-feedback support, therefore correctly select the
+related config options.
+
+Reported-by: Randy Dunlap <rdunlap@xenotime.net>
+Signed-off-by: David Herrmann <dh.herrmann@googlemail.com>
+Signed-off-by: Jiri Kosina <jkosina@suse.cz>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ drivers/hid/Kconfig |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/hid/Kconfig
++++ b/drivers/hid/Kconfig
+@@ -620,6 +620,7 @@ config HID_WIIMOTE
+       depends on BT_HIDP
+       depends on LEDS_CLASS
+       select POWER_SUPPLY
++      select INPUT_FF_MEMLESS
+       ---help---
+       Support for the Nintendo Wii Remote bluetooth device.
diff --git a/queue-3.2/ima-fix-invalid-memory-reference.patch b/queue-3.2/ima-fix-invalid-memory-reference.patch
new file mode 100644 (file)
index 0000000..15cc3b4
--- /dev/null
@@ -0,0 +1,66 @@
+From 7b7e5916aa2f46e57f8bd8cb89c34620ebfda5da Mon Sep 17 00:00:00 2001
+From: Roberto Sassu <roberto.sassu@polito.it>
+Date: Mon, 19 Dec 2011 15:57:28 +0100
+Subject: ima: fix invalid memory reference
+
+From: Roberto Sassu <roberto.sassu@polito.it>
+
+commit 7b7e5916aa2f46e57f8bd8cb89c34620ebfda5da upstream.
+
+Don't free a valid measurement entry on TPM PCR extend failure.
+
+Signed-off-by: Roberto Sassu <roberto.sassu@polito.it>
+Signed-off-by: Mimi Zohar <zohar@us.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ security/integrity/ima/ima_queue.c |   16 +++++++++++-----
+ 1 file changed, 11 insertions(+), 5 deletions(-)
+
+--- a/security/integrity/ima/ima_queue.c
++++ b/security/integrity/ima/ima_queue.c
+@@ -23,6 +23,8 @@
+ #include <linux/slab.h>
+ #include "ima.h"
++#define AUDIT_CAUSE_LEN_MAX 32
++
+ LIST_HEAD(ima_measurements);  /* list of all measurements */
+ /* key: inode (before secure-hashing a file) */
+@@ -94,7 +96,8 @@ static int ima_pcr_extend(const u8 *hash
+       result = tpm_pcr_extend(TPM_ANY_NUM, CONFIG_IMA_MEASURE_PCR_IDX, hash);
+       if (result != 0)
+-              pr_err("IMA: Error Communicating to TPM chip\n");
++              pr_err("IMA: Error Communicating to TPM chip, result: %d\n",
++                     result);
+       return result;
+ }
+@@ -106,8 +109,9 @@ int ima_add_template_entry(struct ima_te
+ {
+       u8 digest[IMA_DIGEST_SIZE];
+       const char *audit_cause = "hash_added";
++      char tpm_audit_cause[AUDIT_CAUSE_LEN_MAX];
+       int audit_info = 1;
+-      int result = 0;
++      int result = 0, tpmresult = 0;
+       mutex_lock(&ima_extend_list_mutex);
+       if (!violation) {
+@@ -129,9 +133,11 @@ int ima_add_template_entry(struct ima_te
+       if (violation)          /* invalidate pcr */
+               memset(digest, 0xff, sizeof digest);
+-      result = ima_pcr_extend(digest);
+-      if (result != 0) {
+-              audit_cause = "TPM error";
++      tpmresult = ima_pcr_extend(digest);
++      if (tpmresult != 0) {
++              snprintf(tpm_audit_cause, AUDIT_CAUSE_LEN_MAX, "TPM_error(%d)",
++                       tpmresult);
++              audit_cause = tpm_audit_cause;
+               audit_info = 0;
+       }
+ out:
diff --git a/queue-3.2/ima-free-duplicate-measurement-memory.patch b/queue-3.2/ima-free-duplicate-measurement-memory.patch
new file mode 100644 (file)
index 0000000..7bf1f0c
--- /dev/null
@@ -0,0 +1,48 @@
+From 45fae7493970d7c45626ccd96d4a74f5f1eea5a9 Mon Sep 17 00:00:00 2001
+From: Roberto Sassu <roberto.sassu@polito.it>
+Date: Mon, 19 Dec 2011 15:57:27 +0100
+Subject: ima: free duplicate measurement memory
+
+From: Roberto Sassu <roberto.sassu@polito.it>
+
+commit 45fae7493970d7c45626ccd96d4a74f5f1eea5a9 upstream.
+
+Info about new measurements are cached in the iint for performance.  When
+the inode is flushed from cache, the associated iint is flushed as well.
+Subsequent access to the inode will cause the inode to be re-measured and
+will attempt to add a duplicate entry to the measurement list.
+
+This patch frees the duplicate measurement memory, fixing a memory leak.
+
+Signed-off-by: Roberto Sassu <roberto.sassu@polito.it>
+Signed-off-by: Mimi Zohar <zohar@us.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ security/integrity/ima/ima_api.c   |    4 ++--
+ security/integrity/ima/ima_queue.c |    1 +
+ 2 files changed, 3 insertions(+), 2 deletions(-)
+
+--- a/security/integrity/ima/ima_api.c
++++ b/security/integrity/ima/ima_api.c
+@@ -178,8 +178,8 @@ void ima_store_measurement(struct integr
+       strncpy(entry->template.file_name, filename, IMA_EVENT_NAME_LEN_MAX);
+       result = ima_store_template(entry, violation, inode);
+-      if (!result)
++      if (!result || result == -EEXIST)
+               iint->flags |= IMA_MEASURED;
+-      else
++      if (result < 0)
+               kfree(entry);
+ }
+--- a/security/integrity/ima/ima_queue.c
++++ b/security/integrity/ima/ima_queue.c
+@@ -114,6 +114,7 @@ int ima_add_template_entry(struct ima_te
+               memcpy(digest, entry->digest, sizeof digest);
+               if (ima_lookup_digest_entry(digest)) {
+                       audit_cause = "hash_exists";
++                      result = -EEXIST;
+                       goto out;
+               }
+       }
diff --git a/queue-3.2/md-raid1-perform-bad-block-tests-for-writemostly-devices-too.patch b/queue-3.2/md-raid1-perform-bad-block-tests-for-writemostly-devices-too.patch
new file mode 100644 (file)
index 0000000..41b3668
--- /dev/null
@@ -0,0 +1,53 @@
+From 307729c8bc5b5a41361af8af95906eee7552acb1 Mon Sep 17 00:00:00 2001
+From: NeilBrown <neilb@suse.de>
+Date: Mon, 9 Jan 2012 01:41:51 +1100
+Subject: md/raid1: perform bad-block tests for WriteMostly devices too.
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: NeilBrown <neilb@suse.de>
+
+commit 307729c8bc5b5a41361af8af95906eee7552acb1 upstream.
+
+We normally try to avoid reading from write-mostly devices, but when
+we do we really have to check for bad blocks and be sure not to
+try reading them.
+
+With the current code, best_good_sectors might not get set and that
+causes zero-length read requests to be send down which is very
+confusing.
+
+This bug was introduced in commit d2eb35acfdccbe2 and so the patch
+is suitable for 3.1.x and 3.2.x
+
+Reported-and-tested-by: Michał Mirosław <mirq-linux@rere.qmqm.pl>
+Reported-and-tested-by: Art -kwaak- van Breemen <ard@telegraafnet.nl>
+Signed-off-by: NeilBrown <neilb@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ drivers/md/raid1.c |   11 ++++++++++-
+ 1 file changed, 10 insertions(+), 1 deletion(-)
+
+--- a/drivers/md/raid1.c
++++ b/drivers/md/raid1.c
+@@ -525,8 +525,17 @@ static int read_balance(struct r1conf *c
+               if (test_bit(WriteMostly, &rdev->flags)) {
+                       /* Don't balance among write-mostly, just
+                        * use the first as a last resort */
+-                      if (best_disk < 0)
++                      if (best_disk < 0) {
++                              if (is_badblock(rdev, this_sector, sectors,
++                                              &first_bad, &bad_sectors)) {
++                                      if (first_bad < this_sector)
++                                              /* Cannot use this */
++                                              continue;
++                                      best_good_sectors = first_bad - this_sector;
++                              } else
++                                      best_good_sectors = sectors;
+                               best_disk = disk;
++                      }
+                       continue;
+               }
+               /* This is a reasonable device to use.  It might
diff --git a/queue-3.2/nfs-fix-recent-breakage-to-nfs-error-handling.patch b/queue-3.2/nfs-fix-recent-breakage-to-nfs-error-handling.patch
new file mode 100644 (file)
index 0000000..8f55da0
--- /dev/null
@@ -0,0 +1,58 @@
+From 2edb6bc3852c681c0d948245bd55108dc6407604 Mon Sep 17 00:00:00 2001
+From: NeilBrown <neilb@suse.de>
+Date: Wed, 16 Nov 2011 11:46:31 +1100
+Subject: NFS - fix recent breakage to NFS error handling.
+
+From: NeilBrown <neilb@suse.de>
+
+commit 2edb6bc3852c681c0d948245bd55108dc6407604 upstream.
+
+From c6d615d2b97fe305cbf123a8751ced859dca1d5e Mon Sep 17 00:00:00 2001
+From: NeilBrown <neilb@suse.de>
+Date: Wed, 16 Nov 2011 09:39:05 +1100
+Subject: NFS - fix recent breakage to NFS error handling.
+
+commit 02c24a82187d5a628c68edfe71ae60dc135cd178 made a small and
+presumably unintended change to write error handling in NFS.
+
+Previously an error from filemap_write_and_wait_range would only be of
+interest if nfs_file_fsync did not return an error.  After this commit,
+an error from filemap_write_and_wait_range would mean that (the rest of)
+nfs_file_fsync would not even be called.
+
+This means that:
+ 1/ you are more likely to see EIO than e.g. EDQUOT or ENOSPC.
+ 2/ NFS_CONTEXT_ERROR_WRITE remains set for longer so more writes are
+    synchronous.
+
+This patch restores previous behaviour.
+
+Cc: Josef Bacik <josef@redhat.com>
+Cc: Jan Kara <jack@suse.cz>
+Cc: Al Viro <viro@zeniv.linux.org.uk>
+Signed-off-by: NeilBrown <neilb@suse.de>
+Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/nfs/file.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/fs/nfs/file.c
++++ b/fs/nfs/file.c
+@@ -272,13 +272,13 @@ nfs_file_fsync(struct file *file, loff_t
+                       datasync);
+       ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
+-      if (ret)
+-              return ret;
+       mutex_lock(&inode->i_mutex);
+       nfs_inc_stats(inode, NFSIOS_VFSFSYNC);
+       have_error = test_and_clear_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags);
+       status = nfs_commit_inode(inode, FLUSH_SYNC);
++      if (status >= 0 && ret < 0)
++              status = ret;
+       have_error |= test_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags);
+       if (have_error)
+               ret = xchg(&ctx->error, 0);
diff --git a/queue-3.2/nfs-fix-regression-in-handling-of-context-option-in-nfsv4.patch b/queue-3.2/nfs-fix-regression-in-handling-of-context-option-in-nfsv4.patch
new file mode 100644 (file)
index 0000000..11bd2a4
--- /dev/null
@@ -0,0 +1,144 @@
+From 8a0d551a59ac92d8ff048d6cb29d3a02073e81e8 Mon Sep 17 00:00:00 2001
+From: Jeff Layton <jlayton@redhat.com>
+Date: Tue, 20 Dec 2011 06:57:45 -0500
+Subject: nfs: fix regression in handling of context= option in NFSv4
+
+From: Jeff Layton <jlayton@redhat.com>
+
+commit 8a0d551a59ac92d8ff048d6cb29d3a02073e81e8 upstream.
+
+Setting the security context of a NFSv4 mount via the context= mount
+option is currently broken. The NFSv4 codepath allocates a parsed
+options struct, and then parses the mount options to fill it. It
+eventually calls nfs4_remote_mount which calls security_init_mnt_opts.
+That clobbers the lsm_opts struct that was populated earlier. This bug
+also looks like it causes a small memory leak on each v4 mount where
+context= is used.
+
+Fix this by moving the initialization of the lsm_opts into
+nfs_alloc_parsed_mount_data. Also, add a destructor for
+nfs_parsed_mount_data to make it easier to free all of the allocations
+hanging off of it, and to ensure that the security_free_mnt_opts is
+called whenever security_init_mnt_opts is.
+
+I believe this regression was introduced quite some time ago, probably
+by commit c02d7adf.
+
+Signed-off-by: Jeff Layton <jlayton@redhat.com>
+Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/nfs/super.c |   43 +++++++++++++++++++------------------------
+ 1 file changed, 19 insertions(+), 24 deletions(-)
+
+--- a/fs/nfs/super.c
++++ b/fs/nfs/super.c
+@@ -909,10 +909,24 @@ static struct nfs_parsed_mount_data *nfs
+               data->auth_flavor_len   = 1;
+               data->version           = version;
+               data->minorversion      = 0;
++              security_init_mnt_opts(&data->lsm_opts);
+       }
+       return data;
+ }
++static void nfs_free_parsed_mount_data(struct nfs_parsed_mount_data *data)
++{
++      if (data) {
++              kfree(data->client_address);
++              kfree(data->mount_server.hostname);
++              kfree(data->nfs_server.export_path);
++              kfree(data->nfs_server.hostname);
++              kfree(data->fscache_uniq);
++              security_free_mnt_opts(&data->lsm_opts);
++              kfree(data);
++      }
++}
++
+ /*
+  * Sanity-check a server address provided by the mount command.
+  *
+@@ -2220,9 +2234,7 @@ static struct dentry *nfs_fs_mount(struc
+       data = nfs_alloc_parsed_mount_data(NFS_DEFAULT_VERSION);
+       mntfh = nfs_alloc_fhandle();
+       if (data == NULL || mntfh == NULL)
+-              goto out_free_fh;
+-
+-      security_init_mnt_opts(&data->lsm_opts);
++              goto out;
+       /* Validate the mount data */
+       error = nfs_validate_mount_data(raw_data, data, mntfh, dev_name);
+@@ -2234,8 +2246,6 @@ static struct dentry *nfs_fs_mount(struc
+ #ifdef CONFIG_NFS_V4
+       if (data->version == 4) {
+               mntroot = nfs4_try_mount(flags, dev_name, data);
+-              kfree(data->client_address);
+-              kfree(data->nfs_server.export_path);
+               goto out;
+       }
+ #endif        /* CONFIG_NFS_V4 */
+@@ -2290,13 +2300,8 @@ static struct dentry *nfs_fs_mount(struc
+       s->s_flags |= MS_ACTIVE;
+ out:
+-      kfree(data->nfs_server.hostname);
+-      kfree(data->mount_server.hostname);
+-      kfree(data->fscache_uniq);
+-      security_free_mnt_opts(&data->lsm_opts);
+-out_free_fh:
++      nfs_free_parsed_mount_data(data);
+       nfs_free_fhandle(mntfh);
+-      kfree(data);
+       return mntroot;
+ out_err_nosb:
+@@ -2623,9 +2628,7 @@ nfs4_remote_mount(struct file_system_typ
+       mntfh = nfs_alloc_fhandle();
+       if (data == NULL || mntfh == NULL)
+-              goto out_free_fh;
+-
+-      security_init_mnt_opts(&data->lsm_opts);
++              goto out;
+       /* Get a volume representation */
+       server = nfs4_create_server(data, mntfh);
+@@ -2677,13 +2680,10 @@ nfs4_remote_mount(struct file_system_typ
+       s->s_flags |= MS_ACTIVE;
+-      security_free_mnt_opts(&data->lsm_opts);
+       nfs_free_fhandle(mntfh);
+       return mntroot;
+ out:
+-      security_free_mnt_opts(&data->lsm_opts);
+-out_free_fh:
+       nfs_free_fhandle(mntfh);
+       return ERR_PTR(error);
+@@ -2838,7 +2838,7 @@ static struct dentry *nfs4_mount(struct
+       data = nfs_alloc_parsed_mount_data(4);
+       if (data == NULL)
+-              goto out_free_data;
++              goto out;
+       /* Validate the mount data */
+       error = nfs4_validate_mount_data(raw_data, data, dev_name);
+@@ -2852,12 +2852,7 @@ static struct dentry *nfs4_mount(struct
+               error = PTR_ERR(res);
+ out:
+-      kfree(data->client_address);
+-      kfree(data->nfs_server.export_path);
+-      kfree(data->nfs_server.hostname);
+-      kfree(data->fscache_uniq);
+-out_free_data:
+-      kfree(data);
++      nfs_free_parsed_mount_data(data);
+       dprintk("<-- nfs4_mount() = %d%s\n", error,
+                       error != 0 ? " [error]" : "");
+       return res;
diff --git a/queue-3.2/nfs-retry-mounting-nfsroot.patch b/queue-3.2/nfs-retry-mounting-nfsroot.patch
new file mode 100644 (file)
index 0000000..100f62f
--- /dev/null
@@ -0,0 +1,92 @@
+From 43717c7daebf10b43f12e68512484b3095bb1ba5 Mon Sep 17 00:00:00 2001
+From: Chuck Lever <chuck.lever@oracle.com>
+Date: Mon, 5 Dec 2011 15:40:30 -0500
+Subject: NFS: Retry mounting NFSROOT
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+commit 43717c7daebf10b43f12e68512484b3095bb1ba5 upstream.
+
+Lukas Razik <linux@razik.name> reports that on his SPARC system,
+booting with an NFS root file system stopped working after commit
+56463e50 "NFS: Use super.c for NFSROOT mount option parsing."
+
+We found that the network switch to which Lukas' client was attached
+was delaying access to the LAN after the client's NIC driver reported
+that its link was up.  The delay was longer than the timeouts used in
+the NFS client during mounting.
+
+NFSROOT worked for Lukas before commit 56463e50 because in those
+kernels, the client's first operation was an rpcbind request to
+determine which port the NFS server was listening on.  When that
+request failed after a long timeout, the client simply selected the
+default NFS port (2049).  By that time the switch was allowing access
+to the LAN, and the mount succeeded.
+
+Neither of these client behaviors is desirable, so reverting 56463e50
+is really not a choice.  Instead, introduce a mechanism that retries
+the NFSROOT mount request several times.  This is the same tactic that
+normal user space NFS mounts employ to overcome server and network
+delays.
+
+Signed-off-by: Lukas Razik <linux@razik.name>
+[ cel: match kernel coding style, add proper patch description ]
+[ cel: add exponential back-off ]
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Tested-by: Lukas Razik <linux@razik.name>
+Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ init/do_mounts.c |   35 +++++++++++++++++++++++++++++++----
+ 1 file changed, 31 insertions(+), 4 deletions(-)
+
+--- a/init/do_mounts.c
++++ b/init/do_mounts.c
+@@ -398,15 +398,42 @@ out:
+ }
+  
+ #ifdef CONFIG_ROOT_NFS
++
++#define NFSROOT_TIMEOUT_MIN   5
++#define NFSROOT_TIMEOUT_MAX   30
++#define NFSROOT_RETRY_MAX     5
++
+ static int __init mount_nfs_root(void)
+ {
+       char *root_dev, *root_data;
++      unsigned int timeout;
++      int try, err;
+-      if (nfs_root_data(&root_dev, &root_data) != 0)
+-              return 0;
+-      if (do_mount_root(root_dev, "nfs", root_mountflags, root_data) != 0)
++      err = nfs_root_data(&root_dev, &root_data);
++      if (err != 0)
+               return 0;
+-      return 1;
++
++      /*
++       * The server or network may not be ready, so try several
++       * times.  Stop after a few tries in case the client wants
++       * to fall back to other boot methods.
++       */
++      timeout = NFSROOT_TIMEOUT_MIN;
++      for (try = 1; ; try++) {
++              err = do_mount_root(root_dev, "nfs",
++                                      root_mountflags, root_data);
++              if (err == 0)
++                      return 1;
++              if (try > NFSROOT_RETRY_MAX)
++                      break;
++
++              /* Wait, in case the server refused us immediately */
++              ssleep(timeout);
++              timeout <<= 1;
++              if (timeout > NFSROOT_TIMEOUT_MAX)
++                      timeout = NFSROOT_TIMEOUT_MAX;
++      }
++      return 0;
+ }
+ #endif
diff --git a/queue-3.2/nfsv4-include-bitmap-in-nfsv4-get-acl-data.patch b/queue-3.2/nfsv4-include-bitmap-in-nfsv4-get-acl-data.patch
new file mode 100644 (file)
index 0000000..85ac36e
--- /dev/null
@@ -0,0 +1,303 @@
+From bf118a342f10dafe44b14451a1392c3254629a1f Mon Sep 17 00:00:00 2001
+From: Andy Adamson <andros@netapp.com>
+Date: Wed, 7 Dec 2011 11:55:27 -0500
+Subject: NFSv4: include bitmap in nfsv4 get acl data
+
+From: Andy Adamson <andros@netapp.com>
+
+commit bf118a342f10dafe44b14451a1392c3254629a1f upstream.
+
+The NFSv4 bitmap size is unbounded: a server can return an arbitrary
+sized bitmap in an FATTR4_WORD0_ACL request.  Replace using the
+nfs4_fattr_bitmap_maxsz as a guess to the maximum bitmask returned by a server
+with the inclusion of the bitmap (xdr length plus bitmasks) and the acl data
+xdr length to the (cached) acl page data.
+
+This is a general solution to commit e5012d1f "NFSv4.1: update
+nfs4_fattr_bitmap_maxsz" and fixes hitting a BUG_ON in xdr_shrink_bufhead
+when getting ACLs.
+
+Fix a bug in decode_getacl that returned -EINVAL on ACLs > page when getxattr
+was called with a NULL buffer, preventing ACL > PAGE_SIZE from being retrieved.
+
+Signed-off-by: Andy Adamson <andros@netapp.com>
+Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/nfs/nfs4proc.c          |   96 ++++++++++++++++++++++++++-------------------
+ fs/nfs/nfs4xdr.c           |   31 ++++++++++----
+ include/linux/nfs_xdr.h    |    5 ++
+ include/linux/sunrpc/xdr.h |    2 
+ net/sunrpc/xdr.c           |    3 -
+ 5 files changed, 89 insertions(+), 48 deletions(-)
+
+--- a/fs/nfs/nfs4proc.c
++++ b/fs/nfs/nfs4proc.c
+@@ -3430,19 +3430,6 @@ static inline int nfs4_server_supports_a
+  */
+ #define NFS4ACL_MAXPAGES (XATTR_SIZE_MAX >> PAGE_CACHE_SHIFT)
+-static void buf_to_pages(const void *buf, size_t buflen,
+-              struct page **pages, unsigned int *pgbase)
+-{
+-      const void *p = buf;
+-
+-      *pgbase = offset_in_page(buf);
+-      p -= *pgbase;
+-      while (p < buf + buflen) {
+-              *(pages++) = virt_to_page(p);
+-              p += PAGE_CACHE_SIZE;
+-      }
+-}
+-
+ static int buf_to_pages_noslab(const void *buf, size_t buflen,
+               struct page **pages, unsigned int *pgbase)
+ {
+@@ -3539,9 +3526,19 @@ out:
+       nfs4_set_cached_acl(inode, acl);
+ }
++/*
++ * The getxattr API returns the required buffer length when called with a
++ * NULL buf. The NFSv4 acl tool then calls getxattr again after allocating
++ * the required buf.  On a NULL buf, we send a page of data to the server
++ * guessing that the ACL request can be serviced by a page. If so, we cache
++ * up to the page of ACL data, and the 2nd call to getxattr is serviced by
++ * the cache. If not so, we throw away the page, and cache the required
++ * length. The next getxattr call will then produce another round trip to
++ * the server, this time with the input buf of the required size.
++ */
+ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t buflen)
+ {
+-      struct page *pages[NFS4ACL_MAXPAGES];
++      struct page *pages[NFS4ACL_MAXPAGES] = {NULL, };
+       struct nfs_getaclargs args = {
+               .fh = NFS_FH(inode),
+               .acl_pages = pages,
+@@ -3556,41 +3553,60 @@ static ssize_t __nfs4_get_acl_uncached(s
+               .rpc_argp = &args,
+               .rpc_resp = &res,
+       };
+-      struct page *localpage = NULL;
+-      int ret;
++      int ret = -ENOMEM, npages, i, acl_len = 0;
+-      if (buflen < PAGE_SIZE) {
+-              /* As long as we're doing a round trip to the server anyway,
+-               * let's be prepared for a page of acl data. */
+-              localpage = alloc_page(GFP_KERNEL);
+-              resp_buf = page_address(localpage);
+-              if (localpage == NULL)
+-                      return -ENOMEM;
+-              args.acl_pages[0] = localpage;
+-              args.acl_pgbase = 0;
+-              args.acl_len = PAGE_SIZE;
+-      } else {
+-              resp_buf = buf;
+-              buf_to_pages(buf, buflen, args.acl_pages, &args.acl_pgbase);
++      npages = (buflen + PAGE_SIZE - 1) >> PAGE_SHIFT;
++      /* As long as we're doing a round trip to the server anyway,
++       * let's be prepared for a page of acl data. */
++      if (npages == 0)
++              npages = 1;
++
++      for (i = 0; i < npages; i++) {
++              pages[i] = alloc_page(GFP_KERNEL);
++              if (!pages[i])
++                      goto out_free;
+       }
+-      ret = nfs4_call_sync(NFS_SERVER(inode)->client, NFS_SERVER(inode), &msg, &args.seq_args, &res.seq_res, 0);
++      if (npages > 1) {
++              /* for decoding across pages */
++              args.acl_scratch = alloc_page(GFP_KERNEL);
++              if (!args.acl_scratch)
++                      goto out_free;
++      }
++      args.acl_len = npages * PAGE_SIZE;
++      args.acl_pgbase = 0;
++      /* Let decode_getfacl know not to fail if the ACL data is larger than
++       * the page we send as a guess */
++      if (buf == NULL)
++              res.acl_flags |= NFS4_ACL_LEN_REQUEST;
++      resp_buf = page_address(pages[0]);
++
++      dprintk("%s  buf %p buflen %ld npages %d args.acl_len %ld\n",
++              __func__, buf, buflen, npages, args.acl_len);
++      ret = nfs4_call_sync(NFS_SERVER(inode)->client, NFS_SERVER(inode),
++                           &msg, &args.seq_args, &res.seq_res, 0);
+       if (ret)
+               goto out_free;
+-      if (res.acl_len > args.acl_len)
+-              nfs4_write_cached_acl(inode, NULL, res.acl_len);
++
++      acl_len = res.acl_len - res.acl_data_offset;
++      if (acl_len > args.acl_len)
++              nfs4_write_cached_acl(inode, NULL, acl_len);
+       else
+-              nfs4_write_cached_acl(inode, resp_buf, res.acl_len);
++              nfs4_write_cached_acl(inode, resp_buf + res.acl_data_offset,
++                                    acl_len);
+       if (buf) {
+               ret = -ERANGE;
+-              if (res.acl_len > buflen)
++              if (acl_len > buflen)
+                       goto out_free;
+-              if (localpage)
+-                      memcpy(buf, resp_buf, res.acl_len);
++              _copy_from_pages(buf, pages, res.acl_data_offset,
++                              res.acl_len);
+       }
+-      ret = res.acl_len;
++      ret = acl_len;
+ out_free:
+-      if (localpage)
+-              __free_page(localpage);
++      for (i = 0; i < npages; i++)
++              if (pages[i])
++                      __free_page(pages[i]);
++      if (args.acl_scratch)
++              __free_page(args.acl_scratch);
+       return ret;
+ }
+@@ -3621,6 +3637,8 @@ static ssize_t nfs4_proc_get_acl(struct
+               nfs_zap_acl_cache(inode);
+       ret = nfs4_read_cached_acl(inode, buf, buflen);
+       if (ret != -ENOENT)
++              /* -ENOENT is returned if there is no ACL or if there is an ACL
++               * but no cached acl data, just the acl length */
+               return ret;
+       return nfs4_get_acl_uncached(inode, buf, buflen);
+ }
+--- a/fs/nfs/nfs4xdr.c
++++ b/fs/nfs/nfs4xdr.c
+@@ -2517,11 +2517,13 @@ static void nfs4_xdr_enc_getacl(struct r
+       encode_compound_hdr(xdr, req, &hdr);
+       encode_sequence(xdr, &args->seq_args, &hdr);
+       encode_putfh(xdr, args->fh, &hdr);
+-      replen = hdr.replen + op_decode_hdr_maxsz + nfs4_fattr_bitmap_maxsz + 1;
++      replen = hdr.replen + op_decode_hdr_maxsz + 1;
+       encode_getattr_two(xdr, FATTR4_WORD0_ACL, 0, &hdr);
+       xdr_inline_pages(&req->rq_rcv_buf, replen << 2,
+               args->acl_pages, args->acl_pgbase, args->acl_len);
++      xdr_set_scratch_buffer(xdr, page_address(args->acl_scratch), PAGE_SIZE);
++
+       encode_nops(&hdr);
+ }
+@@ -4957,17 +4959,18 @@ decode_restorefh(struct xdr_stream *xdr)
+ }
+ static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req,
+-              size_t *acl_len)
++                       struct nfs_getaclres *res)
+ {
+-      __be32 *savep;
++      __be32 *savep, *bm_p;
+       uint32_t attrlen,
+                bitmap[3] = {0};
+       struct kvec *iov = req->rq_rcv_buf.head;
+       int status;
+-      *acl_len = 0;
++      res->acl_len = 0;
+       if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0)
+               goto out;
++      bm_p = xdr->p;
+       if ((status = decode_attr_bitmap(xdr, bitmap)) != 0)
+               goto out;
+       if ((status = decode_attr_length(xdr, &attrlen, &savep)) != 0)
+@@ -4979,18 +4982,30 @@ static int decode_getacl(struct xdr_stre
+               size_t hdrlen;
+               u32 recvd;
++              /* The bitmap (xdr len + bitmaps) and the attr xdr len words
++               * are stored with the acl data to handle the problem of
++               * variable length bitmaps.*/
++              xdr->p = bm_p;
++              res->acl_data_offset = be32_to_cpup(bm_p) + 2;
++              res->acl_data_offset <<= 2;
++
+               /* We ignore &savep and don't do consistency checks on
+                * the attr length.  Let userspace figure it out.... */
+               hdrlen = (u8 *)xdr->p - (u8 *)iov->iov_base;
++              attrlen += res->acl_data_offset;
+               recvd = req->rq_rcv_buf.len - hdrlen;
+               if (attrlen > recvd) {
+-                      dprintk("NFS: server cheating in getattr"
+-                                      " acl reply: attrlen %u > recvd %u\n",
++                      if (res->acl_flags & NFS4_ACL_LEN_REQUEST) {
++                              /* getxattr interface called with a NULL buf */
++                              res->acl_len = attrlen;
++                              goto out;
++                      }
++                      dprintk("NFS: acl reply: attrlen %u > recvd %u\n",
+                                       attrlen, recvd);
+                       return -EINVAL;
+               }
+               xdr_read_pages(xdr, attrlen);
+-              *acl_len = attrlen;
++              res->acl_len = attrlen;
+       } else
+               status = -EOPNOTSUPP;
+@@ -6028,7 +6043,7 @@ nfs4_xdr_dec_getacl(struct rpc_rqst *rqs
+       status = decode_putfh(xdr);
+       if (status)
+               goto out;
+-      status = decode_getacl(xdr, rqstp, &res->acl_len);
++      status = decode_getacl(xdr, rqstp, res);
+ out:
+       return status;
+--- a/include/linux/nfs_xdr.h
++++ b/include/linux/nfs_xdr.h
+@@ -602,11 +602,16 @@ struct nfs_getaclargs {
+       size_t                          acl_len;
+       unsigned int                    acl_pgbase;
+       struct page **                  acl_pages;
++      struct page *                   acl_scratch;
+       struct nfs4_sequence_args       seq_args;
+ };
++/* getxattr ACL interface flags */
++#define NFS4_ACL_LEN_REQUEST  0x0001  /* zero length getxattr buffer */
+ struct nfs_getaclres {
+       size_t                          acl_len;
++      size_t                          acl_data_offset;
++      int                             acl_flags;
+       struct nfs4_sequence_res        seq_res;
+ };
+--- a/include/linux/sunrpc/xdr.h
++++ b/include/linux/sunrpc/xdr.h
+@@ -191,6 +191,8 @@ extern int xdr_decode_array2(struct xdr_
+                            struct xdr_array2_desc *desc);
+ extern int xdr_encode_array2(struct xdr_buf *buf, unsigned int base,
+                            struct xdr_array2_desc *desc);
++extern void _copy_from_pages(char *p, struct page **pages, size_t pgbase,
++                           size_t len);
+ /*
+  * Provide some simple tools for XDR buffer overflow-checking etc.
+--- a/net/sunrpc/xdr.c
++++ b/net/sunrpc/xdr.c
+@@ -296,7 +296,7 @@ _copy_to_pages(struct page **pages, size
+  * Copies data into an arbitrary memory location from an array of pages
+  * The copy is assumed to be non-overlapping.
+  */
+-static void
++void
+ _copy_from_pages(char *p, struct page **pages, size_t pgbase, size_t len)
+ {
+       struct page **pgfrom;
+@@ -324,6 +324,7 @@ _copy_from_pages(char *p, struct page **
+       } while ((len -= copy) != 0);
+ }
++EXPORT_SYMBOL_GPL(_copy_from_pages);
+ /*
+  * xdr_shrink_bufhead
diff --git a/queue-3.2/nfsv4.1-fix-backchannel-slotid-off-by-one-bug.patch b/queue-3.2/nfsv4.1-fix-backchannel-slotid-off-by-one-bug.patch
new file mode 100644 (file)
index 0000000..dd42c44
--- /dev/null
@@ -0,0 +1,28 @@
+From 61f2e5106582d02f30b6807e3f9c07463c572ccb Mon Sep 17 00:00:00 2001
+From: Andy Adamson <andros@netapp.com>
+Date: Wed, 9 Nov 2011 13:58:20 -0500
+Subject: NFSv4.1: fix backchannel slotid off-by-one bug
+
+From: Andy Adamson <andros@netapp.com>
+
+commit 61f2e5106582d02f30b6807e3f9c07463c572ccb upstream.
+
+Signed-off-by: Andy Adamson <andros@netapp.com>
+Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/nfs/callback_proc.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/nfs/callback_proc.c
++++ b/fs/nfs/callback_proc.c
+@@ -339,7 +339,7 @@ validate_seqid(struct nfs4_slot_table *t
+       dprintk("%s enter. slotid %d seqid %d\n",
+               __func__, args->csa_slotid, args->csa_sequenceid);
+-      if (args->csa_slotid > NFS41_BC_MAX_CALLBACKS)
++      if (args->csa_slotid >= NFS41_BC_MAX_CALLBACKS)
+               return htonl(NFS4ERR_BADSLOT);
+       slot = tbl->slots + args->csa_slotid;
diff --git a/queue-3.2/pci-fix-pci_exp_type_rc_ec-value.patch b/queue-3.2/pci-fix-pci_exp_type_rc_ec-value.patch
new file mode 100644 (file)
index 0000000..6f5f6ba
--- /dev/null
@@ -0,0 +1,30 @@
+From 1830ea91c20b06608f7cdb2455ce05ba834b3214 Mon Sep 17 00:00:00 2001
+From: Alex Williamson <alex.williamson@redhat.com>
+Date: Wed, 16 Nov 2011 09:24:16 -0700
+Subject: PCI: Fix PCI_EXP_TYPE_RC_EC value
+
+From: Alex Williamson <alex.williamson@redhat.com>
+
+commit 1830ea91c20b06608f7cdb2455ce05ba834b3214 upstream.
+
+Spec shows this as 1010b = 0xa
+
+Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
+Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ include/linux/pci_regs.h |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/include/linux/pci_regs.h
++++ b/include/linux/pci_regs.h
+@@ -392,7 +392,7 @@
+ #define  PCI_EXP_TYPE_DOWNSTREAM 0x6  /* Downstream Port */
+ #define  PCI_EXP_TYPE_PCI_BRIDGE 0x7  /* PCI/PCI-X Bridge */
+ #define  PCI_EXP_TYPE_RC_END  0x9     /* Root Complex Integrated Endpoint */
+-#define  PCI_EXP_TYPE_RC_EC   0x10    /* Root Complex Event Collector */
++#define  PCI_EXP_TYPE_RC_EC   0xa     /* Root Complex Event Collector */
+ #define PCI_EXP_FLAGS_SLOT    0x0100  /* Slot implemented */
+ #define PCI_EXP_FLAGS_IRQ     0x3e00  /* Interrupt message number */
+ #define PCI_EXP_DEVCAP                4       /* Device capabilities */
diff --git a/queue-3.2/pci-msi-disable-msi-interrupts-when-we-initialize-a-pci-device.patch b/queue-3.2/pci-msi-disable-msi-interrupts-when-we-initialize-a-pci-device.patch
new file mode 100644 (file)
index 0000000..9200c64
--- /dev/null
@@ -0,0 +1,50 @@
+From a776c491ca5e38c26d9f66923ff574d041e747f4 Mon Sep 17 00:00:00 2001
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+Date: Mon, 17 Oct 2011 11:46:06 -0700
+Subject: PCI: msi: Disable msi interrupts when we initialize a pci device
+
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+
+commit a776c491ca5e38c26d9f66923ff574d041e747f4 upstream.
+
+I traced a nasty kexec on panic boot failure to the fact that we had
+screaming msi interrupts and we were not disabling the msi messages at
+kernel startup.  The booting kernel had not enabled those interupts so
+was not prepared to handle them.
+
+I can see no reason why we would ever want to leave the msi interrupts
+enabled at boot if something else has enabled those interrupts.  The pci
+spec specifies that msi interrupts should be off by default.  Drivers
+are expected to enable the msi interrupts if they want to use them.  Our
+interrupt handling code reprograms the interrupt handlers at boot and
+will not be be able to do anything useful with an unexpected interrupt.
+
+This patch applies cleanly all of the way back to 2.6.32 where I noticed
+the problem.
+
+Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
+Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ drivers/pci/msi.c |   10 ++++++++++
+ 1 file changed, 10 insertions(+)
+
+--- a/drivers/pci/msi.c
++++ b/drivers/pci/msi.c
+@@ -870,5 +870,15 @@ EXPORT_SYMBOL(pci_msi_enabled);
+ void pci_msi_init_pci_dev(struct pci_dev *dev)
+ {
++      int pos;
+       INIT_LIST_HEAD(&dev->msi_list);
++
++      /* Disable the msi hardware to avoid screaming interrupts
++       * during boot.  This is the power on reset default so
++       * usually this should be a noop.
++       */
++      pos = pci_find_capability(dev, PCI_CAP_ID_MSI);
++      if (pos)
++              msi_set_enable(dev, pos, 0);
++      msix_set_enable(dev, 0);
+ }
diff --git a/queue-3.2/pnfs-obj-must-return-layout-on-io-error.patch b/queue-3.2/pnfs-obj-must-return-layout-on-io-error.patch
new file mode 100644 (file)
index 0000000..1e340ec
--- /dev/null
@@ -0,0 +1,86 @@
+From fe0fe83585f88346557868a803a479dfaaa0688a Mon Sep 17 00:00:00 2001
+From: Boaz Harrosh <bharrosh@panasas.com>
+Date: Fri, 6 Jan 2012 09:31:20 +0200
+Subject: pnfs-obj: Must return layout on IO error
+
+From: Boaz Harrosh <bharrosh@panasas.com>
+
+commit fe0fe83585f88346557868a803a479dfaaa0688a upstream.
+
+As mandated by the standard. In case of an IO error, a pNFS
+objects layout driver must return it's layout. This is because
+all device errors are reported to the server as part of the
+layout return buffer.
+
+This is implemented the same way PNFS_LAYOUTRET_ON_SETATTR
+is done, through a bit flag on the pnfs_layoutdriver_type->flags
+member. The flag is set by the layout driver that wants a
+layout_return preformed at pnfs_ld_{write,read}_done in case
+of an error.
+(Though I have not defined a wrapper like pnfs_ld_layoutret_on_setattr
+ because this code is never called outside of pnfs.c and pnfs IO
+ paths)
+
+Without this patch 3.[0-2] Kernels leak memory and have an annoying
+WARN_ON after every IO error utilizing the pnfs-obj driver.
+
+Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
+Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/nfs/objlayout/objio_osd.c |    3 ++-
+ fs/nfs/pnfs.c                |   12 ++++++++++++
+ fs/nfs/pnfs.h                |    1 +
+ 3 files changed, 15 insertions(+), 1 deletion(-)
+
+--- a/fs/nfs/objlayout/objio_osd.c
++++ b/fs/nfs/objlayout/objio_osd.c
+@@ -551,7 +551,8 @@ static const struct nfs_pageio_ops objio
+ static struct pnfs_layoutdriver_type objlayout_type = {
+       .id = LAYOUT_OSD2_OBJECTS,
+       .name = "LAYOUT_OSD2_OBJECTS",
+-      .flags                   = PNFS_LAYOUTRET_ON_SETATTR,
++      .flags                   = PNFS_LAYOUTRET_ON_SETATTR |
++                                 PNFS_LAYOUTRET_ON_ERROR,
+       .alloc_layout_hdr        = objlayout_alloc_layout_hdr,
+       .free_layout_hdr         = objlayout_free_layout_hdr,
+--- a/fs/nfs/pnfs.c
++++ b/fs/nfs/pnfs.c
+@@ -1178,6 +1178,15 @@ void pnfs_ld_write_done(struct nfs_write
+               put_lseg(data->lseg);
+               data->lseg = NULL;
+               dprintk("pnfs write error = %d\n", data->pnfs_error);
++              if (NFS_SERVER(data->inode)->pnfs_curr_ld->flags &
++                                              PNFS_LAYOUTRET_ON_ERROR) {
++                      /* Don't lo_commit on error, Server will needs to
++                       * preform a file recovery.
++                       */
++                      clear_bit(NFS_INO_LAYOUTCOMMIT,
++                                &NFS_I(data->inode)->flags);
++                      pnfs_return_layout(data->inode);
++              }
+       }
+       data->mds_ops->rpc_release(data);
+ }
+@@ -1267,6 +1276,9 @@ static void pnfs_ld_handle_read_error(st
+       put_lseg(data->lseg);
+       data->lseg = NULL;
+       dprintk("pnfs write error = %d\n", data->pnfs_error);
++      if (NFS_SERVER(data->inode)->pnfs_curr_ld->flags &
++                                              PNFS_LAYOUTRET_ON_ERROR)
++              pnfs_return_layout(data->inode);
+       nfs_pageio_init_read_mds(&pgio, data->inode);
+--- a/fs/nfs/pnfs.h
++++ b/fs/nfs/pnfs.h
+@@ -68,6 +68,7 @@ enum {
+ enum layoutdriver_policy_flags {
+       /* Should the pNFS client commit and return the layout upon a setattr */
+       PNFS_LAYOUTRET_ON_SETATTR       = 1 << 0,
++      PNFS_LAYOUTRET_ON_ERROR         = 1 << 1,
+ };
+ struct nfs4_deviceid_node;
diff --git a/queue-3.2/pnfs-obj-pnfs-errors-are-communicated-on-iodata-pnfs_error.patch b/queue-3.2/pnfs-obj-pnfs-errors-are-communicated-on-iodata-pnfs_error.patch
new file mode 100644 (file)
index 0000000..7f04a75
--- /dev/null
@@ -0,0 +1,47 @@
+From 5c0b4129c07b902b27d3f3ebc087757f534a3abd Mon Sep 17 00:00:00 2001
+From: Boaz Harrosh <bharrosh@panasas.com>
+Date: Fri, 6 Jan 2012 09:28:12 +0200
+Subject: pnfs-obj: pNFS errors are communicated on iodata->pnfs_error
+
+From: Boaz Harrosh <bharrosh@panasas.com>
+
+commit 5c0b4129c07b902b27d3f3ebc087757f534a3abd upstream.
+
+Some time along the way pNFS IO errors were switched to
+communicate with a special iodata->pnfs_error member instead
+of the regular RPC members. But objlayout was not switched
+over.
+
+Fix that!
+Without this fix any IO error is hanged, because IO is not
+switched to MDS and pages are never cleared or read.
+
+[Applies to 3.2.0. Same bug different patch for 3.1/0 Kernels]
+Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
+Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/nfs/objlayout/objlayout.c |    4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/fs/nfs/objlayout/objlayout.c
++++ b/fs/nfs/objlayout/objlayout.c
+@@ -254,6 +254,8 @@ objlayout_read_done(struct objlayout_io_
+       oir->status = rdata->task.tk_status = status;
+       if (status >= 0)
+               rdata->res.count = status;
++      else
++              rdata->pnfs_error = status;
+       objlayout_iodone(oir);
+       /* must not use oir after this point */
+@@ -334,6 +336,8 @@ objlayout_write_done(struct objlayout_io
+       if (status >= 0) {
+               wdata->res.count = status;
+               wdata->verf.committed = oir->committed;
++      } else {
++              wdata->pnfs_error = status;
+       }
+       objlayout_iodone(oir);
+       /* must not use oir after this point */
diff --git a/queue-3.2/pnp-work-around-dell-1536-1546-bios-mmconfig-bug-that-breaks-usb.patch b/queue-3.2/pnp-work-around-dell-1536-1546-bios-mmconfig-bug-that-breaks-usb.patch
new file mode 100644 (file)
index 0000000..914a18a
--- /dev/null
@@ -0,0 +1,98 @@
+From eb31aae8cb5eb54e234ed2d857ddac868195d911 Mon Sep 17 00:00:00 2001
+From: Bjorn Helgaas <bhelgaas@google.com>
+Date: Thu, 5 Jan 2012 14:27:24 -0700
+Subject: PNP: work around Dell 1536/1546 BIOS MMCONFIG bug that breaks USB
+
+From: Bjorn Helgaas <bhelgaas@google.com>
+
+commit eb31aae8cb5eb54e234ed2d857ddac868195d911 upstream.
+
+Some Dell BIOSes have MCFG tables that don't report the entire
+MMCONFIG area claimed by the chipset.  If we move PCI devices into
+that claimed-but-unreported area, they don't work.
+
+This quirk reads the AMD MMCONFIG MSRs and adds PNP0C01 resources as
+needed to cover the entire area.
+
+Example problem scenario:
+
+  BIOS-e820: 00000000cfec5400 - 00000000d4000000 (reserved)
+  Fam 10h mmconf [d0000000, dfffffff]
+  PCI: MMCONFIG for domain 0000 [bus 00-3f] at [mem 0xd0000000-0xd3ffffff] (base 0xd0000000)
+  pnp 00:0c: [mem 0xd0000000-0xd3ffffff]
+  pci 0000:00:12.0: reg 10: [mem 0xffb00000-0xffb00fff]
+  pci 0000:00:12.0: no compatible bridge window for [mem 0xffb00000-0xffb00fff]
+  pci 0000:00:12.0: BAR 0: assigned [mem 0xd4000000-0xd40000ff]
+
+Reported-by: Lisa Salimbas <lisa.salimbas@canonical.com>
+Reported-by: <thuban@singularity.fr>
+Tested-by: dann frazier <dann.frazier@canonical.com>
+References: https://bugzilla.kernel.org/show_bug.cgi?id=31602
+References: https://bugs.launchpad.net/ubuntu/+source/linux/+bug/647043
+References: https://bugzilla.redhat.com/show_bug.cgi?id=770308
+Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
+Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ drivers/pnp/quirks.c |   42 ++++++++++++++++++++++++++++++++++++++++++
+ 1 file changed, 42 insertions(+)
+
+--- a/drivers/pnp/quirks.c
++++ b/drivers/pnp/quirks.c
+@@ -295,6 +295,45 @@ static void quirk_system_pci_resources(s
+       }
+ }
++#ifdef CONFIG_AMD_NB
++
++#include <asm/amd_nb.h>
++
++static void quirk_amd_mmconfig_area(struct pnp_dev *dev)
++{
++      resource_size_t start, end;
++      struct pnp_resource *pnp_res;
++      struct resource *res;
++      struct resource mmconfig_res, *mmconfig;
++
++      mmconfig = amd_get_mmconfig_range(&mmconfig_res);
++      if (!mmconfig)
++              return;
++
++      list_for_each_entry(pnp_res, &dev->resources, list) {
++              res = &pnp_res->res;
++              if (res->end < mmconfig->start || res->start > mmconfig->end ||
++                  (res->start == mmconfig->start && res->end == mmconfig->end))
++                      continue;
++
++              dev_info(&dev->dev, FW_BUG
++                       "%pR covers only part of AMD MMCONFIG area %pR; adding more reservations\n",
++                       res, mmconfig);
++              if (mmconfig->start < res->start) {
++                      start = mmconfig->start;
++                      end = res->start - 1;
++                      pnp_add_mem_resource(dev, start, end, 0);
++              }
++              if (mmconfig->end > res->end) {
++                      start = res->end + 1;
++                      end = mmconfig->end;
++                      pnp_add_mem_resource(dev, start, end, 0);
++              }
++              break;
++      }
++}
++#endif
++
+ /*
+  *  PnP Quirks
+  *  Cards or devices that need some tweaking due to incomplete resource info
+@@ -322,6 +361,9 @@ static struct pnp_fixup pnp_fixups[] = {
+       /* PnP resources that might overlap PCI BARs */
+       {"PNP0c01", quirk_system_pci_resources},
+       {"PNP0c02", quirk_system_pci_resources},
++#ifdef CONFIG_AMD_NB
++      {"PNP0c01", quirk_amd_mmconfig_area},
++#endif
+       {""}
+ };
diff --git a/queue-3.2/scsi-mpt2sas-fix-for-memory-allocation-error-for-large-host-credits.patch b/queue-3.2/scsi-mpt2sas-fix-for-memory-allocation-error-for-large-host-credits.patch
new file mode 100644 (file)
index 0000000..5d97461
--- /dev/null
@@ -0,0 +1,172 @@
+From aff132d95ffe14eca96cab90597cdd010b457af7 Mon Sep 17 00:00:00 2001
+From: "nagalakshmi.nandigama@lsi.com" <nagalakshmi.nandigama@lsi.com>
+Date: Thu, 1 Dec 2011 07:53:08 +0530
+Subject: SCSI: mpt2sas : Fix for memory allocation error for large host credits
+
+From: "nagalakshmi.nandigama@lsi.com" <nagalakshmi.nandigama@lsi.com>
+
+commit aff132d95ffe14eca96cab90597cdd010b457af7 upstream.
+
+The amount of memory required for tracking chain buffers is rather
+large, and when the host credit count is big, memory allocation
+failure occurs inside __get_free_pages.
+
+The fix is to limit the number of chains to 100,000.  In addition,
+the number of host credits is limited to 30,000 IOs. However this
+limitation can be overridden this using the command line option
+max_queue_depth.  The algorithm for calculating the
+reply_post_queue_depth is changed so that it is equal to
+(reply_free_queue_depth + 16), previously it was (reply_free_queue_depth * 2).
+
+Signed-off-by: Nagalakshmi Nandigama <nagalakshmi.nandigama@lsi.com>
+Signed-off-by: James Bottomley <JBottomley@Parallels.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ drivers/scsi/mpt2sas/mpt2sas_base.c  |   83 +++++++++++------------------------
+ drivers/scsi/mpt2sas/mpt2sas_scsih.c |    4 -
+ 2 files changed, 29 insertions(+), 58 deletions(-)
+
+--- a/drivers/scsi/mpt2sas/mpt2sas_base.c
++++ b/drivers/scsi/mpt2sas/mpt2sas_base.c
+@@ -65,6 +65,8 @@ static MPT_CALLBACK  mpt_callbacks[MPT_MA
+ #define FAULT_POLLING_INTERVAL 1000 /* in milliseconds */
++#define MAX_HBA_QUEUE_DEPTH   30000
++#define MAX_CHAIN_DEPTH               100000
+ static int max_queue_depth = -1;
+ module_param(max_queue_depth, int, 0);
+ MODULE_PARM_DESC(max_queue_depth, " max controller queue depth ");
+@@ -2311,8 +2313,6 @@ _base_release_memory_pools(struct MPT2SA
+               }
+               if (ioc->chain_dma_pool)
+                       pci_pool_destroy(ioc->chain_dma_pool);
+-      }
+-      if (ioc->chain_lookup) {
+               free_pages((ulong)ioc->chain_lookup, ioc->chain_pages);
+               ioc->chain_lookup = NULL;
+       }
+@@ -2330,9 +2330,7 @@ static int
+ _base_allocate_memory_pools(struct MPT2SAS_ADAPTER *ioc,  int sleep_flag)
+ {
+       struct mpt2sas_facts *facts;
+-      u32 queue_size, queue_diff;
+       u16 max_sge_elements;
+-      u16 num_of_reply_frames;
+       u16 chains_needed_per_io;
+       u32 sz, total_sz, reply_post_free_sz;
+       u32 retry_sz;
+@@ -2359,7 +2357,8 @@ _base_allocate_memory_pools(struct MPT2S
+               max_request_credit = (max_queue_depth < facts->RequestCredit)
+                   ? max_queue_depth : facts->RequestCredit;
+       else
+-              max_request_credit = facts->RequestCredit;
++              max_request_credit = min_t(u16, facts->RequestCredit,
++                  MAX_HBA_QUEUE_DEPTH);
+       ioc->hba_queue_depth = max_request_credit;
+       ioc->hi_priority_depth = facts->HighPriorityCredit;
+@@ -2400,50 +2399,25 @@ _base_allocate_memory_pools(struct MPT2S
+       }
+       ioc->chains_needed_per_io = chains_needed_per_io;
+-      /* reply free queue sizing - taking into account for events */
+-      num_of_reply_frames = ioc->hba_queue_depth + 32;
+-
+-      /* number of replies frames can't be a multiple of 16 */
+-      /* decrease number of reply frames by 1 */
+-      if (!(num_of_reply_frames % 16))
+-              num_of_reply_frames--;
+-
+-      /* calculate number of reply free queue entries
+-       *  (must be multiple of 16)
+-       */
+-
+-      /* (we know reply_free_queue_depth is not a multiple of 16) */
+-      queue_size = num_of_reply_frames;
+-      queue_size += 16 - (queue_size % 16);
+-      ioc->reply_free_queue_depth = queue_size;
+-
+-      /* reply descriptor post queue sizing */
+-      /* this size should be the number of request frames + number of reply
+-       * frames
+-       */
++      /* reply free queue sizing - taking into account for 64 FW events */
++      ioc->reply_free_queue_depth = ioc->hba_queue_depth + 64;
+-      queue_size = ioc->hba_queue_depth + num_of_reply_frames + 1;
+-      /* round up to 16 byte boundary */
+-      if (queue_size % 16)
+-              queue_size += 16 - (queue_size % 16);
+-
+-      /* check against IOC maximum reply post queue depth */
+-      if (queue_size > facts->MaxReplyDescriptorPostQueueDepth) {
+-              queue_diff = queue_size -
+-                  facts->MaxReplyDescriptorPostQueueDepth;
+-
+-              /* round queue_diff up to multiple of 16 */
+-              if (queue_diff % 16)
+-                      queue_diff += 16 - (queue_diff % 16);
+-
+-              /* adjust hba_queue_depth, reply_free_queue_depth,
+-               * and queue_size
+-               */
+-              ioc->hba_queue_depth -= (queue_diff / 2);
+-              ioc->reply_free_queue_depth -= (queue_diff / 2);
+-              queue_size = facts->MaxReplyDescriptorPostQueueDepth;
++      /* align the reply post queue on the next 16 count boundary */
++      if (!ioc->reply_free_queue_depth % 16)
++              ioc->reply_post_queue_depth = ioc->reply_free_queue_depth + 16;
++      else
++              ioc->reply_post_queue_depth = ioc->reply_free_queue_depth +
++                              32 - (ioc->reply_free_queue_depth % 16);
++      if (ioc->reply_post_queue_depth >
++          facts->MaxReplyDescriptorPostQueueDepth) {
++              ioc->reply_post_queue_depth = min_t(u16,
++                  (facts->MaxReplyDescriptorPostQueueDepth -
++                  (facts->MaxReplyDescriptorPostQueueDepth % 16)),
++                  (ioc->hba_queue_depth - (ioc->hba_queue_depth % 16)));
++              ioc->reply_free_queue_depth = ioc->reply_post_queue_depth - 16;
++              ioc->hba_queue_depth = ioc->reply_free_queue_depth - 64;
+       }
+-      ioc->reply_post_queue_depth = queue_size;
++
+       dinitprintk(ioc, printk(MPT2SAS_INFO_FMT "scatter gather: "
+           "sge_in_main_msg(%d), sge_per_chain(%d), sge_per_io(%d), "
+@@ -2529,15 +2503,12 @@ _base_allocate_memory_pools(struct MPT2S
+           "depth(%d)\n", ioc->name, ioc->request,
+           ioc->scsiio_depth));
+-      /* loop till the allocation succeeds */
+-      do {
+-              sz = ioc->chain_depth * sizeof(struct chain_tracker);
+-              ioc->chain_pages = get_order(sz);
+-              ioc->chain_lookup = (struct chain_tracker *)__get_free_pages(
+-                  GFP_KERNEL, ioc->chain_pages);
+-              if (ioc->chain_lookup == NULL)
+-                      ioc->chain_depth -= 100;
+-      } while (ioc->chain_lookup == NULL);
++      ioc->chain_depth = min_t(u32, ioc->chain_depth, MAX_CHAIN_DEPTH);
++      sz = ioc->chain_depth * sizeof(struct chain_tracker);
++      ioc->chain_pages = get_order(sz);
++
++      ioc->chain_lookup = (struct chain_tracker *)__get_free_pages(
++          GFP_KERNEL, ioc->chain_pages);
+       ioc->chain_dma_pool = pci_pool_create("chain pool", ioc->pdev,
+           ioc->request_sz, 16, 0);
+       if (!ioc->chain_dma_pool) {
+--- a/drivers/scsi/mpt2sas/mpt2sas_scsih.c
++++ b/drivers/scsi/mpt2sas/mpt2sas_scsih.c
+@@ -1007,8 +1007,8 @@ _scsih_get_chain_buffer_tracker(struct M
+       spin_lock_irqsave(&ioc->scsi_lookup_lock, flags);
+       if (list_empty(&ioc->free_chain_list)) {
+               spin_unlock_irqrestore(&ioc->scsi_lookup_lock, flags);
+-              printk(MPT2SAS_WARN_FMT "chain buffers not available\n",
+-                  ioc->name);
++              dfailprintk(ioc, printk(MPT2SAS_WARN_FMT "chain buffers not "
++                      "available\n", ioc->name));
+               return NULL;
+       }
+       chain_req = list_entry(ioc->free_chain_list.next,
diff --git a/queue-3.2/scsi-mpt2sas-release-spinlock-for-the-raid-device-list-before-blocking-it.patch b/queue-3.2/scsi-mpt2sas-release-spinlock-for-the-raid-device-list-before-blocking-it.patch
new file mode 100644 (file)
index 0000000..cd614a8
--- /dev/null
@@ -0,0 +1,53 @@
+From 30c43282f3d347f47f9e05199d2b14f56f3f2837 Mon Sep 17 00:00:00 2001
+From: "nagalakshmi.nandigama@lsi.com" <nagalakshmi.nandigama@lsi.com>
+Date: Thu, 1 Dec 2011 07:52:56 +0530
+Subject: SCSI: mpt2sas: Release spinlock for the raid device list before blocking it
+
+From: "nagalakshmi.nandigama@lsi.com" <nagalakshmi.nandigama@lsi.com>
+
+commit 30c43282f3d347f47f9e05199d2b14f56f3f2837 upstream.
+
+Added code to release the spinlock that is used to protect the
+raid device list before calling a function that can block. The
+blocking was causing a reschedule, and subsequently it is tried
+to acquire the same lock, resulting in a panic (NMI Watchdog
+detecting a CPU lockup).
+
+Signed-off-by: Nagalakshmi Nandigama <nagalakshmi.nandigama@lsi.com>
+Signed-off-by: James Bottomley <JBottomley@Parallels.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ drivers/scsi/mpt2sas/mpt2sas_scsih.c |    7 ++++---
+ 1 file changed, 4 insertions(+), 3 deletions(-)
+
+--- a/drivers/scsi/mpt2sas/mpt2sas_scsih.c
++++ b/drivers/scsi/mpt2sas/mpt2sas_scsih.c
+@@ -6714,6 +6714,7 @@ _scsih_mark_responding_raid_device(struc
+                       } else
+                               sas_target_priv_data = NULL;
+                       raid_device->responding = 1;
++                      spin_unlock_irqrestore(&ioc->raid_device_lock, flags);
+                       starget_printk(KERN_INFO, raid_device->starget,
+                           "handle(0x%04x), wwid(0x%016llx)\n", handle,
+                           (unsigned long long)raid_device->wwid);
+@@ -6724,16 +6725,16 @@ _scsih_mark_responding_raid_device(struc
+                        */
+                       _scsih_init_warpdrive_properties(ioc, raid_device);
+                       if (raid_device->handle == handle)
+-                              goto out;
++                              return;
+                       printk(KERN_INFO "\thandle changed from(0x%04x)!!!\n",
+                           raid_device->handle);
+                       raid_device->handle = handle;
+                       if (sas_target_priv_data)
+                               sas_target_priv_data->handle = handle;
+-                      goto out;
++                      return;
+               }
+       }
+- out:
++
+       spin_unlock_irqrestore(&ioc->raid_device_lock, flags);
+ }
index 5ba28244ddca9d30795a7ee95fe42c1c7d469f58..d03339652837349b9d07f1264532f9978f8b4a32 100644 (file)
@@ -17,3 +17,27 @@ alsa-hda-fix-the-lost-power-setup-of-seconary-pins-after-pm-resume.patch
 drm-radeon-kms-workaround-invalid-avi-infoframe-checksum-issue.patch
 drm-radeon-kms-disable-writeback-on-pre-r300-asics.patch
 radeon-fix-disabling-pci-bus-mastering-on-big-endian-hosts.patch
+pnfs-obj-pnfs-errors-are-communicated-on-iodata-pnfs_error.patch
+pnfs-obj-must-return-layout-on-io-error.patch
+nfs-retry-mounting-nfsroot.patch
+nfsv4.1-fix-backchannel-slotid-off-by-one-bug.patch
+nfs-fix-recent-breakage-to-nfs-error-handling.patch
+nfsv4-include-bitmap-in-nfsv4-get-acl-data.patch
+nfs-fix-regression-in-handling-of-context-option-in-nfsv4.patch
+hid-bump-maximum-global-item-tag-report-size-to-96-bytes.patch
+hid-wiimote-select-input_ff_memless.patch
+ubi-fix-missing-scrub-when-there-is-a-bit-flip.patch
+ubi-fix-use-after-free-on-error-path.patch
+pci-fix-pci_exp_type_rc_ec-value.patch
+pci-msi-disable-msi-interrupts-when-we-initialize-a-pci-device.patch
+x86-pci-ignore-cpu-non-addressable-_crs-reserved-memory-resources.patch
+x86-pci-amd-factor-out-mmconfig-discovery.patch
+x86-pci-build-amd_bus.o-only-when-config_amd_nb-y.patch
+scsi-mpt2sas-release-spinlock-for-the-raid-device-list-before-blocking-it.patch
+scsi-mpt2sas-fix-for-memory-allocation-error-for-large-host-credits.patch
+xen-xenbus-reject-replies-with-payload-xenstore_payload_max.patch
+md-raid1-perform-bad-block-tests-for-writemostly-devices-too.patch
+ima-free-duplicate-measurement-memory.patch
+ima-fix-invalid-memory-reference.patch
+slub-fix-a-possible-memleak-in-__slab_alloc.patch
+pnp-work-around-dell-1536-1546-bios-mmconfig-bug-that-breaks-usb.patch
diff --git a/queue-3.2/slub-fix-a-possible-memleak-in-__slab_alloc.patch b/queue-3.2/slub-fix-a-possible-memleak-in-__slab_alloc.patch
new file mode 100644 (file)
index 0000000..75153c4
--- /dev/null
@@ -0,0 +1,45 @@
+From 73736e0387ba0e6d2b703407b4d26168d31516a7 Mon Sep 17 00:00:00 2001
+From: Eric Dumazet <eric.dumazet@gmail.com>
+Date: Tue, 13 Dec 2011 04:57:06 +0100
+Subject: slub: fix a possible memleak in __slab_alloc()
+
+From: Eric Dumazet <eric.dumazet@gmail.com>
+
+commit 73736e0387ba0e6d2b703407b4d26168d31516a7 upstream.
+
+Zhihua Che reported a possible memleak in slub allocator on
+CONFIG_PREEMPT=y builds.
+
+It is possible current thread migrates right before disabling irqs in
+__slab_alloc(). We must check again c->freelist, and perform a normal
+allocation instead of scratching c->freelist.
+
+Many thanks to Zhihua Che for spotting this bug, introduced in 2.6.39
+
+V2: Its also possible an IRQ freed one (or several) object(s) and
+populated c->freelist, so its not a CONFIG_PREEMPT only problem.
+
+Reported-by: Zhihua Che <zhihua.che@gmail.com>
+Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
+Acked-by: Christoph Lameter <cl@linux.com>
+Signed-off-by: Pekka Enberg <penberg@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ mm/slub.c |    5 +++++
+ 1 file changed, 5 insertions(+)
+
+--- a/mm/slub.c
++++ b/mm/slub.c
+@@ -2166,6 +2166,11 @@ redo:
+               goto new_slab;
+       }
++      /* must check again c->freelist in case of cpu migration or IRQ */
++      object = c->freelist;
++      if (object)
++              goto load_freelist;
++
+       stat(s, ALLOC_SLOWPATH);
+       do {
diff --git a/queue-3.2/ubi-fix-missing-scrub-when-there-is-a-bit-flip.patch b/queue-3.2/ubi-fix-missing-scrub-when-there-is-a-bit-flip.patch
new file mode 100644 (file)
index 0000000..a6869ef
--- /dev/null
@@ -0,0 +1,75 @@
+From e801e128b2200c40a0ec236cf2330b2586b6e05a Mon Sep 17 00:00:00 2001
+From: Bhavesh Parekh <bparekh@nvidia.com>
+Date: Wed, 30 Nov 2011 17:43:42 +0530
+Subject: UBI: fix missing scrub when there is a bit-flip
+
+From: Bhavesh Parekh <bparekh@nvidia.com>
+
+commit e801e128b2200c40a0ec236cf2330b2586b6e05a upstream.
+
+Under some cases, when scrubbing the PEB if we did not get the lock on
+the PEB it fails to scrub. Add that PEB again to the scrub list
+
+Artem: minor amendments.
+
+Signed-off-by: Bhavesh Parekh <bparekh@nvidia.com>
+Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ drivers/mtd/ubi/eba.c |    6 ++++--
+ drivers/mtd/ubi/ubi.h |    2 ++
+ drivers/mtd/ubi/wl.c  |    5 ++++-
+ 3 files changed, 10 insertions(+), 3 deletions(-)
+
+--- a/drivers/mtd/ubi/eba.c
++++ b/drivers/mtd/ubi/eba.c
+@@ -1028,12 +1028,14 @@ int ubi_eba_copy_leb(struct ubi_device *
+        * 'ubi_wl_put_peb()' function on the @ubi->move_mutex. In turn, we are
+        * holding @ubi->move_mutex and go sleep on the LEB lock. So, if the
+        * LEB is already locked, we just do not move it and return
+-       * %MOVE_CANCEL_RACE, which means that UBI will re-try, but later.
++       * %MOVE_RETRY. Note, we do not return %MOVE_CANCEL_RACE here because
++       * we do not know the reasons of the contention - it may be just a
++       * normal I/O on this LEB, so we want to re-try.
+        */
+       err = leb_write_trylock(ubi, vol_id, lnum);
+       if (err) {
+               dbg_wl("contention on LEB %d:%d, cancel", vol_id, lnum);
+-              return MOVE_CANCEL_RACE;
++              return MOVE_RETRY;
+       }
+       /*
+--- a/drivers/mtd/ubi/ubi.h
++++ b/drivers/mtd/ubi/ubi.h
+@@ -120,6 +120,7 @@ enum {
+  *                     PEB
+  * MOVE_CANCEL_BITFLIPS: canceled because a bit-flip was detected in the
+  *                       target PEB
++ * MOVE_RETRY: retry scrubbing the PEB
+  */
+ enum {
+       MOVE_CANCEL_RACE = 1,
+@@ -127,6 +128,7 @@ enum {
+       MOVE_TARGET_RD_ERR,
+       MOVE_TARGET_WR_ERR,
+       MOVE_CANCEL_BITFLIPS,
++      MOVE_RETRY,
+ };
+ /**
+--- a/drivers/mtd/ubi/wl.c
++++ b/drivers/mtd/ubi/wl.c
+@@ -795,7 +795,10 @@ static int wear_leveling_worker(struct u
+                       protect = 1;
+                       goto out_not_moved;
+               }
+-
++              if (err == MOVE_RETRY) {
++                      scrubbing = 1;
++                      goto out_not_moved;
++              }
+               if (err == MOVE_CANCEL_BITFLIPS || err == MOVE_TARGET_WR_ERR ||
+                   err == MOVE_TARGET_RD_ERR) {
+                       /*
diff --git a/queue-3.2/ubi-fix-use-after-free-on-error-path.patch b/queue-3.2/ubi-fix-use-after-free-on-error-path.patch
new file mode 100644 (file)
index 0000000..1221219
--- /dev/null
@@ -0,0 +1,50 @@
+From e57e0d8e818512047fe379157c3f77f1b9fabffb Mon Sep 17 00:00:00 2001
+From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
+Date: Thu, 5 Jan 2012 10:47:18 +0200
+Subject: UBI: fix use-after-free on error path
+
+From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
+
+commit e57e0d8e818512047fe379157c3f77f1b9fabffb upstream.
+
+When we fail to erase a PEB, we free the corresponding erase entry object,
+but then re-schedule this object if the error code was something like -EAGAIN.
+Obviously, it is a bug to use the object after we have freed it.
+
+Reported-by: Emese Revfy <re.emese@gmail.com>
+Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ drivers/mtd/ubi/wl.c |    7 ++++---
+ 1 file changed, 4 insertions(+), 3 deletions(-)
+
+--- a/drivers/mtd/ubi/wl.c
++++ b/drivers/mtd/ubi/wl.c
+@@ -1052,7 +1052,6 @@ static int erase_worker(struct ubi_devic
+       ubi_err("failed to erase PEB %d, error %d", pnum, err);
+       kfree(wl_wrk);
+-      kmem_cache_free(ubi_wl_entry_slab, e);
+       if (err == -EINTR || err == -ENOMEM || err == -EAGAIN ||
+           err == -EBUSY) {
+@@ -1065,14 +1064,16 @@ static int erase_worker(struct ubi_devic
+                       goto out_ro;
+               }
+               return err;
+-      } else if (err != -EIO) {
++      }
++
++      kmem_cache_free(ubi_wl_entry_slab, e);
++      if (err != -EIO)
+               /*
+                * If this is not %-EIO, we have no idea what to do. Scheduling
+                * this physical eraseblock for erasure again would cause
+                * errors again and again. Well, lets switch to R/O mode.
+                */
+               goto out_ro;
+-      }
+       /* It is %-EIO, the PEB went bad */
diff --git a/queue-3.2/x86-pci-amd-factor-out-mmconfig-discovery.patch b/queue-3.2/x86-pci-amd-factor-out-mmconfig-discovery.patch
new file mode 100644 (file)
index 0000000..cf80864
--- /dev/null
@@ -0,0 +1,154 @@
+From 24d25dbfa63c376323096660bfa9ad45a08870ce Mon Sep 17 00:00:00 2001
+From: Bjorn Helgaas <bhelgaas@google.com>
+Date: Thu, 5 Jan 2012 14:27:19 -0700
+Subject: x86/PCI: amd: factor out MMCONFIG discovery
+
+From: Bjorn Helgaas <bhelgaas@google.com>
+
+commit 24d25dbfa63c376323096660bfa9ad45a08870ce upstream.
+
+This factors out the AMD native MMCONFIG discovery so we can use it
+outside amd_bus.c.
+
+amd_bus.c reads AMD MSRs so it can remove the MMCONFIG area from the
+PCI resources.  We may also need the MMCONFIG information to work
+around BIOS defects in the ACPI MCFG table.
+
+Cc: Borislav Petkov <borislav.petkov@amd.com>
+Cc: Yinghai Lu <yinghai@kernel.org>
+Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
+Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ arch/x86/include/asm/amd_nb.h |    2 ++
+ arch/x86/kernel/amd_nb.c      |   31 +++++++++++++++++++++++++++++++
+ arch/x86/pci/amd_bus.c        |   42 +++++++++++-------------------------------
+ 3 files changed, 44 insertions(+), 31 deletions(-)
+
+--- a/arch/x86/include/asm/amd_nb.h
++++ b/arch/x86/include/asm/amd_nb.h
+@@ -1,6 +1,7 @@
+ #ifndef _ASM_X86_AMD_NB_H
+ #define _ASM_X86_AMD_NB_H
++#include <linux/ioport.h>
+ #include <linux/pci.h>
+ struct amd_nb_bus_dev_range {
+@@ -13,6 +14,7 @@ extern const struct pci_device_id amd_nb
+ extern const struct amd_nb_bus_dev_range amd_nb_bus_dev_ranges[];
+ extern bool early_is_amd_nb(u32 value);
++extern struct resource *amd_get_mmconfig_range(struct resource *res);
+ extern int amd_cache_northbridges(void);
+ extern void amd_flush_garts(void);
+ extern int amd_numa_init(void);
+--- a/arch/x86/kernel/amd_nb.c
++++ b/arch/x86/kernel/amd_nb.c
+@@ -119,6 +119,37 @@ bool __init early_is_amd_nb(u32 device)
+       return false;
+ }
++struct resource *amd_get_mmconfig_range(struct resource *res)
++{
++      u32 address;
++      u64 base, msr;
++      unsigned segn_busn_bits;
++
++      if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
++              return NULL;
++
++      /* assume all cpus from fam10h have mmconfig */
++        if (boot_cpu_data.x86 < 0x10)
++              return NULL;
++
++      address = MSR_FAM10H_MMIO_CONF_BASE;
++      rdmsrl(address, msr);
++
++      /* mmconfig is not enabled */
++      if (!(msr & FAM10H_MMIO_CONF_ENABLE))
++              return NULL;
++
++      base = msr & (FAM10H_MMIO_CONF_BASE_MASK<<FAM10H_MMIO_CONF_BASE_SHIFT);
++
++      segn_busn_bits = (msr >> FAM10H_MMIO_CONF_BUSRANGE_SHIFT) &
++                       FAM10H_MMIO_CONF_BUSRANGE_MASK;
++
++      res->flags = IORESOURCE_MEM;
++      res->start = base;
++      res->end = base + (1ULL<<(segn_busn_bits + 20)) - 1;
++      return res;
++}
++
+ int amd_get_subcaches(int cpu)
+ {
+       struct pci_dev *link = node_to_amd_nb(amd_get_nb_id(cpu))->link;
+--- a/arch/x86/pci/amd_bus.c
++++ b/arch/x86/pci/amd_bus.c
+@@ -30,34 +30,6 @@ static struct pci_hostbridge_probe pci_p
+       { 0, 0x18, PCI_VENDOR_ID_AMD, 0x1300 },
+ };
+-static u64 __initdata fam10h_mmconf_start;
+-static u64 __initdata fam10h_mmconf_end;
+-static void __init get_pci_mmcfg_amd_fam10h_range(void)
+-{
+-      u32 address;
+-      u64 base, msr;
+-      unsigned segn_busn_bits;
+-
+-      /* assume all cpus from fam10h have mmconf */
+-        if (boot_cpu_data.x86 < 0x10)
+-              return;
+-
+-      address = MSR_FAM10H_MMIO_CONF_BASE;
+-      rdmsrl(address, msr);
+-
+-      /* mmconfig is not enable */
+-      if (!(msr & FAM10H_MMIO_CONF_ENABLE))
+-              return;
+-
+-      base = msr & (FAM10H_MMIO_CONF_BASE_MASK<<FAM10H_MMIO_CONF_BASE_SHIFT);
+-
+-      segn_busn_bits = (msr >> FAM10H_MMIO_CONF_BUSRANGE_SHIFT) &
+-                       FAM10H_MMIO_CONF_BUSRANGE_MASK;
+-
+-      fam10h_mmconf_start = base;
+-      fam10h_mmconf_end = base + (1ULL<<(segn_busn_bits + 20)) - 1;
+-}
+-
+ #define RANGE_NUM 16
+ /**
+@@ -85,6 +57,9 @@ static int __init early_fill_mp_bus_info
+       u64 val;
+       u32 address;
+       bool found;
++      struct resource fam10h_mmconf_res, *fam10h_mmconf;
++      u64 fam10h_mmconf_start;
++      u64 fam10h_mmconf_end;
+       if (!early_pci_allowed())
+               return -1;
+@@ -211,12 +186,17 @@ static int __init early_fill_mp_bus_info
+               subtract_range(range, RANGE_NUM, 0, end);
+       /* get mmconfig */
+-      get_pci_mmcfg_amd_fam10h_range();
++      fam10h_mmconf = amd_get_mmconfig_range(&fam10h_mmconf_res);
+       /* need to take out mmconf range */
+-      if (fam10h_mmconf_end) {
+-              printk(KERN_DEBUG "Fam 10h mmconf [%llx, %llx]\n", fam10h_mmconf_start, fam10h_mmconf_end);
++      if (fam10h_mmconf) {
++              printk(KERN_DEBUG "Fam 10h mmconf %pR\n", fam10h_mmconf);
++              fam10h_mmconf_start = fam10h_mmconf->start;
++              fam10h_mmconf_end = fam10h_mmconf->end;
+               subtract_range(range, RANGE_NUM, fam10h_mmconf_start,
+                                fam10h_mmconf_end + 1);
++      } else {
++              fam10h_mmconf_start = 0;
++              fam10h_mmconf_end = 0;
+       }
+       /* mmio resource */
diff --git a/queue-3.2/x86-pci-build-amd_bus.o-only-when-config_amd_nb-y.patch b/queue-3.2/x86-pci-build-amd_bus.o-only-when-config_amd_nb-y.patch
new file mode 100644 (file)
index 0000000..9e779ec
--- /dev/null
@@ -0,0 +1,35 @@
+From 5cf9a4e69c1ff0ccdd1d2b7404f95c0531355274 Mon Sep 17 00:00:00 2001
+From: Bjorn Helgaas <bhelgaas@google.com>
+Date: Thu, 12 Jan 2012 08:01:40 -0700
+Subject: x86/PCI: build amd_bus.o only when CONFIG_AMD_NB=y
+
+From: Bjorn Helgaas <bhelgaas@google.com>
+
+commit 5cf9a4e69c1ff0ccdd1d2b7404f95c0531355274 upstream.
+
+We only need amd_bus.o for AMD systems with PCI.  arch/x86/pci/Makefile
+already depends on CONFIG_PCI=y, so this patch just adds the dependency
+on CONFIG_AMD_NB.
+
+Cc: Yinghai Lu <yinghai@kernel.org>
+Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ arch/x86/pci/Makefile |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/pci/Makefile
++++ b/arch/x86/pci/Makefile
+@@ -18,8 +18,9 @@ obj-$(CONFIG_X86_NUMAQ)              += numaq_32.o
+ obj-$(CONFIG_X86_MRST)                += mrst.o
+ obj-y                         += common.o early.o
+-obj-y                         += amd_bus.o bus_numa.o
++obj-y                         += bus_numa.o
++obj-$(CONFIG_AMD_NB)          += amd_bus.o
+ obj-$(CONFIG_PCI_CNB20LE_QUIRK)       += broadcom_bus.o
+ ifeq ($(CONFIG_PCI_DEBUG),y)
diff --git a/queue-3.2/x86-pci-ignore-cpu-non-addressable-_crs-reserved-memory-resources.patch b/queue-3.2/x86-pci-ignore-cpu-non-addressable-_crs-reserved-memory-resources.patch
new file mode 100644 (file)
index 0000000..dc98bf0
--- /dev/null
@@ -0,0 +1,62 @@
+From ae5cd86455381282ece162966183d3f208c6fad7 Mon Sep 17 00:00:00 2001
+From: Gary Hade <garyhade@us.ibm.com>
+Date: Mon, 14 Nov 2011 15:42:16 -0800
+Subject: x86/PCI: Ignore CPU non-addressable _CRS reserved memory resources
+
+From: Gary Hade <garyhade@us.ibm.com>
+
+commit ae5cd86455381282ece162966183d3f208c6fad7 upstream.
+
+This assures that a _CRS reserved host bridge window or window region is
+not used if it is not addressable by the CPU.  The new code either trims
+the window to exclude the non-addressable portion or totally ignores the
+window if the entire window is non-addressable.
+
+The current code has been shown to be problematic with 32-bit non-PAE
+kernels on systems where _CRS reserves resources above 4GB.
+
+Signed-off-by: Gary Hade <garyhade@us.ibm.com>
+Reviewed-by: Bjorn Helgaas <bhelgaas@google.com>
+Cc: Thomas Renninger <trenn@novell.com>
+Cc: linux-kernel@vger.kernel.org
+Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ arch/x86/pci/acpi.c |   18 ++++++++++++++++--
+ 1 file changed, 16 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/pci/acpi.c
++++ b/arch/x86/pci/acpi.c
+@@ -149,7 +149,7 @@ setup_resource(struct acpi_resource *acp
+       struct acpi_resource_address64 addr;
+       acpi_status status;
+       unsigned long flags;
+-      u64 start, end;
++      u64 start, orig_end, end;
+       status = resource_to_addr(acpi_res, &addr);
+       if (!ACPI_SUCCESS(status))
+@@ -165,7 +165,21 @@ setup_resource(struct acpi_resource *acp
+               return AE_OK;
+       start = addr.minimum + addr.translation_offset;
+-      end = addr.maximum + addr.translation_offset;
++      orig_end = end = addr.maximum + addr.translation_offset;
++
++      /* Exclude non-addressable range or non-addressable portion of range */
++      end = min(end, (u64)iomem_resource.end);
++      if (end <= start) {
++              dev_info(&info->bridge->dev,
++                      "host bridge window [%#llx-%#llx] "
++                      "(ignored, not CPU addressable)\n", start, orig_end);
++              return AE_OK;
++      } else if (orig_end != end) {
++              dev_info(&info->bridge->dev,
++                      "host bridge window [%#llx-%#llx] "
++                      "([%#llx-%#llx] ignored, not CPU addressable)\n",
++                      start, orig_end, end + 1, orig_end);
++      }
+       res = &info->res[info->res_num];
+       res->name = info->name;
diff --git a/queue-3.2/xen-xenbus-reject-replies-with-payload-xenstore_payload_max.patch b/queue-3.2/xen-xenbus-reject-replies-with-payload-xenstore_payload_max.patch
new file mode 100644 (file)
index 0000000..f7ea4ad
--- /dev/null
@@ -0,0 +1,73 @@
+From 9e7860cee18241633eddb36a4c34c7b61d8cecbc Mon Sep 17 00:00:00 2001
+From: Ian Campbell <Ian.Campbell@citrix.com>
+Date: Wed, 4 Jan 2012 09:34:49 +0000
+Subject: xen/xenbus: Reject replies with payload > XENSTORE_PAYLOAD_MAX.
+
+From: Ian Campbell <Ian.Campbell@citrix.com>
+
+commit 9e7860cee18241633eddb36a4c34c7b61d8cecbc upstream.
+
+Haogang Chen found out that:
+
+ There is a potential integer overflow in process_msg() that could result
+ in cross-domain attack.
+
+       body = kmalloc(msg->hdr.len + 1, GFP_NOIO | __GFP_HIGH);
+
+ When a malicious guest passes 0xffffffff in msg->hdr.len, the subsequent
+ call to xb_read() would write to a zero-length buffer.
+
+ The other end of this connection is always the xenstore backend daemon
+ so there is no guest (malicious or otherwise) which can do this. The
+ xenstore daemon is a trusted component in the system.
+
+ However this seem like a reasonable robustness improvement so we should
+ have it.
+
+And Ian when read the API docs found that:
+        The payload length (len field of the header) is limited to 4096
+        (XENSTORE_PAYLOAD_MAX) in both directions.  If a client exceeds the
+        limit, its xenstored connection will be immediately killed by
+        xenstored, which is usually catastrophic from the client's point of
+        view.  Clients (particularly domains, which cannot just reconnect)
+        should avoid this.
+
+so this patch checks against that instead.
+
+This also avoids a potential integer overflow pointed out by Haogang Chen.
+
+Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
+Cc: Haogang Chen <haogangchen@gmail.com>
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ drivers/xen/xenbus/xenbus_xs.c     |    6 ++++++
+ include/xen/interface/io/xs_wire.h |    3 +++
+ 2 files changed, 9 insertions(+)
+
+--- a/drivers/xen/xenbus/xenbus_xs.c
++++ b/drivers/xen/xenbus/xenbus_xs.c
+@@ -801,6 +801,12 @@ static int process_msg(void)
+               goto out;
+       }
++      if (msg->hdr.len > XENSTORE_PAYLOAD_MAX) {
++              kfree(msg);
++              err = -EINVAL;
++              goto out;
++      }
++
+       body = kmalloc(msg->hdr.len + 1, GFP_NOIO | __GFP_HIGH);
+       if (body == NULL) {
+               kfree(msg);
+--- a/include/xen/interface/io/xs_wire.h
++++ b/include/xen/interface/io/xs_wire.h
+@@ -87,4 +87,7 @@ struct xenstore_domain_interface {
+     XENSTORE_RING_IDX rsp_cons, rsp_prod;
+ };
++/* Violating this is very bad.  See docs/misc/xenstore.txt. */
++#define XENSTORE_PAYLOAD_MAX 4096
++
+ #endif /* _XS_WIRE_H */