]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
.38 patches
authorGreg Kroah-Hartman <gregkh@suse.de>
Thu, 19 May 2011 00:40:02 +0000 (17:40 -0700)
committerGreg Kroah-Hartman <gregkh@suse.de>
Thu, 19 May 2011 00:40:02 +0000 (17:40 -0700)
23 files changed:
queue-2.6.38/block-rescan-partitions-on-invalidated-devices-on-enomedia-too.patch [new file with mode: 0644]
queue-2.6.38/cdrom-always-check_disk_change-on-open.patch [new file with mode: 0644]
queue-2.6.38/cifs-add-fallback-in-is_path_accessible-for-old-servers.patch [new file with mode: 0644]
queue-2.6.38/clocksource-install-completely-before-selecting.patch [new file with mode: 0644]
queue-2.6.38/drm-radeon-kms-fix-extended-lvds-info-parsing.patch [new file with mode: 0644]
queue-2.6.38/fix-cx88-remote-control-input.patch [new file with mode: 0644]
queue-2.6.38/libata-fix-oops-when-lpm-is-used-with-pmp.patch [new file with mode: 0644]
queue-2.6.38/megaraid_sas-sanity-check-user-supplied-length-before-passing-it-to-dma_alloc_coherent.patch [new file with mode: 0644]
queue-2.6.38/rapidio-fix-default-routing-initialization.patch [new file with mode: 0644]
queue-2.6.38/revert-mmc-fix-a-race-between-card-detect-rescan-and-clock-gate-work-instances.patch [new file with mode: 0644]
queue-2.6.38/revert-x86-amd-fix-apic-timer-erratum-400-affecting-k8-rev.a-e-processors.patch [new file with mode: 0644]
queue-2.6.38/series
queue-2.6.38/tick-clear-broadcast-active-bit-when-switching-to-oneshot.patch [new file with mode: 0644]
queue-2.6.38/tmpfs-fix-off-by-one-in-max_blocks-checks.patch [new file with mode: 0644]
queue-2.6.38/tmpfs-fix-race-between-swapoff-and-writepage.patch [new file with mode: 0644]
queue-2.6.38/tmpfs-fix-race-between-umount-and-writepage.patch [new file with mode: 0644]
queue-2.6.38/tmpfs-fix-spurious-enospc-when-racing-with-unswap.patch [new file with mode: 0644]
queue-2.6.38/v4l-release-module-if-subdev-registration-fails.patch [new file with mode: 0644]
queue-2.6.38/vmxnet3-fix-inconsistent-lro-state-after-initialization.patch [new file with mode: 0644]
queue-2.6.38/x86-amd-fix-arat-feature-setting-again.patch [new file with mode: 0644]
queue-2.6.38/x86-apic-fix-spurious-error-interrupts-triggering-on-all-non-boot-aps.patch [new file with mode: 0644]
queue-2.6.38/x86-fix-uv-bau-for-non-consecutive-nasids.patch [new file with mode: 0644]
queue-2.6.38/x86-mce-amd-fix-leaving-freed-data-in-a-list.patch [new file with mode: 0644]

diff --git a/queue-2.6.38/block-rescan-partitions-on-invalidated-devices-on-enomedia-too.patch b/queue-2.6.38/block-rescan-partitions-on-invalidated-devices-on-enomedia-too.patch
new file mode 100644 (file)
index 0000000..c1a6c5f
--- /dev/null
@@ -0,0 +1,98 @@
+From 02e352287a40bd456eb78df705bf888bc3161d3f Mon Sep 17 00:00:00 2001
+From: Tejun Heo <tj@kernel.org>
+Date: Fri, 29 Apr 2011 10:15:20 +0200
+Subject: block: rescan partitions on invalidated devices on -ENOMEDIA too
+
+From: Tejun Heo <tj@kernel.org>
+
+commit 02e352287a40bd456eb78df705bf888bc3161d3f upstream.
+
+__blkdev_get() doesn't rescan partitions if disk->fops->open() fails,
+which leads to ghost partition devices lingering after medimum removal
+is known to both the kernel and userland.  The behavior also creates a
+subtle inconsistency where O_NONBLOCK open, which doesn't fail even if
+there's no medium, clears the ghots partitions, which is exploited to
+work around the problem from userland.
+
+Fix it by updating __blkdev_get() to issue partition rescan after
+-ENOMEDIA too.
+
+This was reported in the following bz.
+
+ https://bugzilla.kernel.org/show_bug.cgi?id=13029
+
+Stable: 2.6.38
+
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Reported-by: David Zeuthen <zeuthen@gmail.com>
+Reported-by: Martin Pitt <martin.pitt@ubuntu.com>
+Reported-by: Kay Sievers <kay.sievers@vrfy.org>
+Tested-by: Kay Sievers <kay.sievers@vrfy.org>
+Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
+Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/block_dev.c |   27 ++++++++++++++++++---------
+ 1 file changed, 18 insertions(+), 9 deletions(-)
+
+--- a/fs/block_dev.c
++++ b/fs/block_dev.c
+@@ -1099,6 +1099,7 @@ static int __blkdev_get(struct block_dev
+                       if (!bdev->bd_part)
+                               goto out_clear;
++                      ret = 0;
+                       if (disk->fops->open) {
+                               ret = disk->fops->open(bdev, mode);
+                               if (ret == -ERESTARTSYS) {
+@@ -1114,9 +1115,18 @@ static int __blkdev_get(struct block_dev
+                                       mutex_unlock(&bdev->bd_mutex);
+                                       goto restart;
+                               }
+-                              if (ret)
+-                                      goto out_clear;
+                       }
++                      /*
++                       * If the device is invalidated, rescan partition
++                       * if open succeeded or failed with -ENOMEDIUM.
++                       * The latter is necessary to prevent ghost
++                       * partitions on a removed medium.
++                       */
++                      if (bdev->bd_invalidated && (!ret || ret == -ENOMEDIUM))
++                              rescan_partitions(disk, bdev);
++                      if (ret)
++                              goto out_clear;
++
+                       if (!bdev->bd_openers) {
+                               bd_set_size(bdev,(loff_t)get_capacity(disk)<<9);
+                               bdi = blk_get_backing_dev_info(bdev);
+@@ -1124,8 +1134,6 @@ static int __blkdev_get(struct block_dev
+                                       bdi = &default_backing_dev_info;
+                               bdev_inode_switch_bdi(bdev->bd_inode, bdi);
+                       }
+-                      if (bdev->bd_invalidated)
+-                              rescan_partitions(disk, bdev);
+               } else {
+                       struct block_device *whole;
+                       whole = bdget_disk(disk, 0);
+@@ -1152,13 +1160,14 @@ static int __blkdev_get(struct block_dev
+               put_disk(disk);
+               disk = NULL;
+               if (bdev->bd_contains == bdev) {
+-                      if (bdev->bd_disk->fops->open) {
++                      ret = 0;
++                      if (bdev->bd_disk->fops->open)
+                               ret = bdev->bd_disk->fops->open(bdev, mode);
+-                              if (ret)
+-                                      goto out_unlock_bdev;
+-                      }
+-                      if (bdev->bd_invalidated)
++                      /* the same as first opener case, read comment there */
++                      if (bdev->bd_invalidated && (!ret || ret == -ENOMEDIUM))
+                               rescan_partitions(bdev->bd_disk, bdev);
++                      if (ret)
++                              goto out_unlock_bdev;
+               }
+       }
+       bdev->bd_openers++;
diff --git a/queue-2.6.38/cdrom-always-check_disk_change-on-open.patch b/queue-2.6.38/cdrom-always-check_disk_change-on-open.patch
new file mode 100644 (file)
index 0000000..ef40be0
--- /dev/null
@@ -0,0 +1,64 @@
+From bf2253a6f00e8fea5b026e471e9f0d0a1b3621f2 Mon Sep 17 00:00:00 2001
+From: Tejun Heo <tj@kernel.org>
+Date: Fri, 29 Apr 2011 10:15:14 +0200
+Subject: cdrom: always check_disk_change() on open
+
+From: Tejun Heo <tj@kernel.org>
+
+commit bf2253a6f00e8fea5b026e471e9f0d0a1b3621f2 upstream.
+
+cdrom_open() called check_disk_change() after the rest of open path
+succeeded which leads to the following bizarre behavior.
+
+* After media change, if the device opened without O_NONBLOCK,
+  open_for_data() naturally fails with -ENOMEDIA and
+  check_disk_change() is never called.  The media is known to be gone
+  and the open failure makes it obvious to the userland but device
+  invalidation never happens.
+
+* But if the device is opened with O_NONBLOCK, all the checks are
+  bypassed and cdrom_open() doesn't notice that the media is not there
+  and check_disk_change() is called and invalidation happens.
+
+There's nothing to be gained by avoiding calling check_disk_change()
+on open failure.  Common cases end up calling check_disk_change()
+anyway.  All we get is inconsistent behavior.
+
+Fix it by moving check_disk_change() invocation to the top of
+cdrom_open() so that it always gets called regardless of how the rest
+of open proceeds.
+
+Stable: 2.6.38
+
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Reported-by: Amit Shah <amit.shah@redhat.com>
+Tested-by: Amit Shah <amit.shah@redhat.com>
+Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ drivers/cdrom/cdrom.c |    6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/drivers/cdrom/cdrom.c
++++ b/drivers/cdrom/cdrom.c
+@@ -986,6 +986,9 @@ int cdrom_open(struct cdrom_device_info
+       cdinfo(CD_OPEN, "entering cdrom_open\n"); 
++      /* open is event synchronization point, check events first */
++      check_disk_change(bdev);
++
+       /* if this was a O_NONBLOCK open and we should honor the flags,
+        * do a quick open without drive/disc integrity checks. */
+       cdi->use_count++;
+@@ -1012,9 +1015,6 @@ int cdrom_open(struct cdrom_device_info
+       cdinfo(CD_OPEN, "Use count for \"/dev/%s\" now %d\n",
+                       cdi->name, cdi->use_count);
+-      /* Do this on open.  Don't wait for mount, because they might
+-          not be mounting, but opening with O_NONBLOCK */
+-      check_disk_change(bdev);
+       return 0;
+ err_release:
+       if (CDROM_CAN(CDC_LOCK) && cdi->options & CDO_LOCK) {
diff --git a/queue-2.6.38/cifs-add-fallback-in-is_path_accessible-for-old-servers.patch b/queue-2.6.38/cifs-add-fallback-in-is_path_accessible-for-old-servers.patch
new file mode 100644 (file)
index 0000000..8d19a91
--- /dev/null
@@ -0,0 +1,36 @@
+From 221d1d797202984cb874e3ed9f1388593d34ee22 Mon Sep 17 00:00:00 2001
+From: Jeff Layton <jlayton@redhat.com>
+Date: Tue, 17 May 2011 06:40:30 -0400
+Subject: cifs: add fallback in is_path_accessible for old servers
+
+From: Jeff Layton <jlayton@redhat.com>
+
+commit 221d1d797202984cb874e3ed9f1388593d34ee22 upstream.
+
+The is_path_accessible check uses a QPathInfo call, which isn't
+supported by ancient win9x era servers. Fall back to an older
+SMBQueryInfo call if it fails with the magic error codes.
+
+Reported-and-Tested-by: Sandro Bonazzola <sandro.bonazzola@gmail.com>
+Signed-off-by: Jeff Layton <jlayton@redhat.com>
+Signed-off-by: Steve French <sfrench@us.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/cifs/connect.c |    5 +++++
+ 1 file changed, 5 insertions(+)
+
+--- a/fs/cifs/connect.c
++++ b/fs/cifs/connect.c
+@@ -2657,6 +2657,11 @@ is_path_accessible(int xid, struct cifsT
+                             0 /* not legacy */, cifs_sb->local_nls,
+                             cifs_sb->mnt_cifs_flags &
+                               CIFS_MOUNT_MAP_SPECIAL_CHR);
++
++      if (rc == -EOPNOTSUPP || rc == -EINVAL)
++              rc = SMBQueryInformation(xid, tcon, full_path, pfile_info,
++                              cifs_sb->local_nls, cifs_sb->mnt_cifs_flags &
++                                CIFS_MOUNT_MAP_SPECIAL_CHR);
+       kfree(pfile_info);
+       return rc;
+ }
diff --git a/queue-2.6.38/clocksource-install-completely-before-selecting.patch b/queue-2.6.38/clocksource-install-completely-before-selecting.patch
new file mode 100644 (file)
index 0000000..dc86786
--- /dev/null
@@ -0,0 +1,58 @@
+From e05b2efb82596905ebfe88e8612ee81dec9b6592 Mon Sep 17 00:00:00 2001
+From: john stultz <johnstul@us.ibm.com>
+Date: Wed, 4 May 2011 18:16:50 -0700
+Subject: clocksource: Install completely before selecting
+
+From: john stultz <johnstul@us.ibm.com>
+
+commit e05b2efb82596905ebfe88e8612ee81dec9b6592 upstream.
+
+Christian Hoffmann reported that the command line clocksource override
+with acpi_pm timer fails:
+
+ Kernel command line: <SNIP> clocksource=acpi_pm
+ hpet clockevent registered
+ Switching to clocksource hpet
+ Override clocksource acpi_pm is not HRT compatible.
+ Cannot switch while in HRT/NOHZ mode.
+
+The watchdog code is what enables CLOCK_SOURCE_VALID_FOR_HRES, but we
+actually end up selecting the clocksource before we enqueue it into
+the watchdog list, so that's why we see the warning and fail to switch
+to acpi_pm timer as requested. That's particularly bad when we want to
+debug timekeeping related problems in early boot.
+
+Put the selection call last.
+
+Reported-by: Christian Hoffmann <email@christianhoffmann.info>
+Signed-off-by: John Stultz <johnstul@us.ibm.com>
+Link: http://lkml.kernel.org/r/%3C1304558210.2943.24.camel%40work-vm%3E
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ kernel/time/clocksource.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/kernel/time/clocksource.c
++++ b/kernel/time/clocksource.c
+@@ -685,8 +685,8 @@ int __clocksource_register_scale(struct
+       /* Add clocksource to the clcoksource list */
+       mutex_lock(&clocksource_mutex);
+       clocksource_enqueue(cs);
+-      clocksource_select();
+       clocksource_enqueue_watchdog(cs);
++      clocksource_select();
+       mutex_unlock(&clocksource_mutex);
+       return 0;
+ }
+@@ -706,8 +706,8 @@ int clocksource_register(struct clocksou
+       mutex_lock(&clocksource_mutex);
+       clocksource_enqueue(cs);
+-      clocksource_select();
+       clocksource_enqueue_watchdog(cs);
++      clocksource_select();
+       mutex_unlock(&clocksource_mutex);
+       return 0;
+ }
diff --git a/queue-2.6.38/drm-radeon-kms-fix-extended-lvds-info-parsing.patch b/queue-2.6.38/drm-radeon-kms-fix-extended-lvds-info-parsing.patch
new file mode 100644 (file)
index 0000000..1471fd1
--- /dev/null
@@ -0,0 +1,47 @@
+From 05fa7ea7d23980de0014417a0e0af2048a0f9fc1 Mon Sep 17 00:00:00 2001
+From: Alex Deucher <alexdeucher@gmail.com>
+Date: Wed, 11 May 2011 14:02:07 -0400
+Subject: drm/radeon/kms: fix extended lvds info parsing
+
+From: Alex Deucher <alexdeucher@gmail.com>
+
+commit 05fa7ea7d23980de0014417a0e0af2048a0f9fc1 upstream.
+
+On rev <= 1.1 tables, the offset is absolute,
+on newer tables, it's relative.
+
+Fixes:
+https://bugzilla.redhat.com/show_bug.cgi?id=700326
+
+Signed-off-by: Alex Deucher <alexdeucher@gmail.com>
+Reviewed-by: Jerome Glisse <jglisse@redhat.com>
+Signed-off-by: Dave Airlie <airlied@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ drivers/gpu/drm/radeon/radeon_atombios.c |   14 +++++++++++---
+ 1 file changed, 11 insertions(+), 3 deletions(-)
+
+--- a/drivers/gpu/drm/radeon/radeon_atombios.c
++++ b/drivers/gpu/drm/radeon/radeon_atombios.c
+@@ -1574,9 +1574,17 @@ struct radeon_encoder_atom_dig *radeon_a
+                       ATOM_FAKE_EDID_PATCH_RECORD *fake_edid_record;
+                       ATOM_PANEL_RESOLUTION_PATCH_RECORD *panel_res_record;
+                       bool bad_record = false;
+-                      u8 *record = (u8 *)(mode_info->atom_context->bios +
+-                                          data_offset +
+-                                          le16_to_cpu(lvds_info->info.usModePatchTableOffset));
++                      u8 *record;
++
++                      if ((frev == 1) && (crev < 2))
++                              /* absolute */
++                              record = (u8 *)(mode_info->atom_context->bios +
++                                              le16_to_cpu(lvds_info->info.usModePatchTableOffset));
++                      else
++                              /* relative */
++                              record = (u8 *)(mode_info->atom_context->bios +
++                                              data_offset +
++                                              le16_to_cpu(lvds_info->info.usModePatchTableOffset));
+                       while (*record != ATOM_RECORD_END_TYPE) {
+                               switch (*record) {
+                               case LCD_MODE_PATCH_RECORD_MODE_TYPE:
diff --git a/queue-2.6.38/fix-cx88-remote-control-input.patch b/queue-2.6.38/fix-cx88-remote-control-input.patch
new file mode 100644 (file)
index 0000000..b0109b9
--- /dev/null
@@ -0,0 +1,32 @@
+From 2a164d02dd34c6b49a3f0995900e0f8af102b804 Mon Sep 17 00:00:00 2001
+From: Lawrence Rust <lvr@softsystem.co.uk>
+Date: Fri, 8 Apr 2011 09:50:45 -0300
+Subject: [media] Fix cx88 remote control input
+
+From: Lawrence Rust <lvr@softsystem.co.uk>
+
+commit 2a164d02dd34c6b49a3f0995900e0f8af102b804 upstream.
+
+In the IR interrupt handler of cx88-input.c there's a 32-bit multiply
+overflow which causes IR pulse durations to be incorrectly calculated.
+
+This is a regression caused by commit 2997137be8eba.
+
+Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ drivers/media/video/cx88/cx88-input.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/media/video/cx88/cx88-input.c
++++ b/drivers/media/video/cx88/cx88-input.c
+@@ -523,7 +523,7 @@ void cx88_ir_irq(struct cx88_core *core)
+       for (todo = 32; todo > 0; todo -= bits) {
+               ev.pulse = samples & 0x80000000 ? false : true;
+               bits = min(todo, 32U - fls(ev.pulse ? samples : ~samples));
+-              ev.duration = (bits * NSEC_PER_SEC) / (1000 * ir_samplerate);
++              ev.duration = (bits * (NSEC_PER_SEC / 1000)) / ir_samplerate;
+               ir_raw_event_store_with_filter(ir->dev, &ev);
+               samples <<= bits;
+       }
diff --git a/queue-2.6.38/libata-fix-oops-when-lpm-is-used-with-pmp.patch b/queue-2.6.38/libata-fix-oops-when-lpm-is-used-with-pmp.patch
new file mode 100644 (file)
index 0000000..07ce52a
--- /dev/null
@@ -0,0 +1,75 @@
+From 5f6f12ccf3aa42cfc0c5bde9228df0c843dd63f7 Mon Sep 17 00:00:00 2001
+From: Tejun Heo <tj@kernel.org>
+Date: Mon, 9 May 2011 16:04:11 +0200
+Subject: libata: fix oops when LPM is used with PMP
+
+From: Tejun Heo <tj@kernel.org>
+
+commit 5f6f12ccf3aa42cfc0c5bde9228df0c843dd63f7 upstream.
+
+ae01b2493c (libata: Implement ATA_FLAG_NO_DIPM and apply it to mcp65)
+added ATA_FLAG_NO_DIPM and made ata_eh_set_lpm() check the flag.
+However, @ap is NULL if @link points to a PMP link and thus the
+unconditional @ap->flags dereference leads to the following oops.
+
+  BUG: unable to handle kernel NULL pointer dereference at 0000000000000018
+  IP: [<ffffffff813f98e1>] ata_eh_recover+0x9a1/0x1510
+  ...
+  Pid: 295, comm: scsi_eh_4 Tainted: P            2.6.38.5-core2 #1 System76, Inc. Serval Professional/Serval Professional
+  RIP: 0010:[<ffffffff813f98e1>]  [<ffffffff813f98e1>] ata_eh_recover+0x9a1/0x1510
+  RSP: 0018:ffff880132defbf0  EFLAGS: 00010246
+  RAX: 0000000000000000 RBX: ffff880132f40000 RCX: 0000000000000000
+  RDX: ffff88013377c000 RSI: ffff880132f40000 RDI: 0000000000000000
+  RBP: ffff880132defce0 R08: ffff88013377dc58 R09: ffff880132defd98
+  R10: 0000000000000000 R11: 00000000ffffffff R12: 0000000000000000
+  R13: 0000000000000000 R14: ffff88013377c000 R15: 0000000000000000
+  FS:  0000000000000000(0000) GS:ffff8800bf700000(0000) knlGS:0000000000000000
+  CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b
+  CR2: 0000000000000018 CR3: 0000000001a03000 CR4: 00000000000406e0
+  DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+  DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
+  Process scsi_eh_4 (pid: 295, threadinfo ffff880132dee000, task ffff880133b416c0)
+  Stack:
+   0000000000000000 ffff880132defcc0 0000000000000000 ffff880132f42738
+   ffffffff813ee8f0 ffffffff813eefe0 ffff880132defd98 ffff88013377f190
+   ffffffffa00b3e30 ffffffff813ef030 0000000032defc60 ffff880100000000
+  Call Trace:
+   [<ffffffff81400867>] sata_pmp_error_handler+0x607/0xc30
+   [<ffffffffa00b273f>] ahci_error_handler+0x1f/0x70 [libahci]
+   [<ffffffff813faade>] ata_scsi_error+0x5be/0x900
+   [<ffffffff813cf724>] scsi_error_handler+0x124/0x650
+   [<ffffffff810834b6>] kthread+0x96/0xa0
+   [<ffffffff8100cd64>] kernel_thread_helper+0x4/0x10
+  Code: 8b 95 70 ff ff ff b8 00 00 00 00 48 3b 9a 10 2e 00 00 48 0f 44 c2 48 89 85 70 ff ff ff 48 8b 8d 70 ff ff ff f6 83 69 02 00 00 01 <48> 8b 41 18 0f 85 48 01 00 00 48 85 c9 74 12 48 8b 51 08 48 83
+  RIP  [<ffffffff813f98e1>] ata_eh_recover+0x9a1/0x1510
+   RSP <ffff880132defbf0>
+  CR2: 0000000000000018
+
+Fix it by testing @link->ap->flags instead.
+
+stable: ATA_FLAG_NO_DIPM was added during 2.6.39 cycle but was
+        backported to 2.6.37 and 38.  This is a fix for that and thus
+        also applicable to 2.6.37 and 38.
+
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Reported-by: "Nathan A. Mourey II" <nmoureyii@ne.rr.com>
+LKML-Reference: <1304555277.2059.2.camel@localhost.localdomain>
+Cc: Connor H <cmdkhh@gmail.com>
+Signed-off-by: Jeff Garzik <jgarzik@pobox.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ drivers/ata/libata-eh.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/ata/libata-eh.c
++++ b/drivers/ata/libata-eh.c
+@@ -3276,7 +3276,7 @@ static int ata_eh_set_lpm(struct ata_lin
+       struct ata_eh_context *ehc = &link->eh_context;
+       struct ata_device *dev, *link_dev = NULL, *lpm_dev = NULL;
+       enum ata_lpm_policy old_policy = link->lpm_policy;
+-      bool no_dipm = ap->flags & ATA_FLAG_NO_DIPM;
++      bool no_dipm = link->ap->flags & ATA_FLAG_NO_DIPM;
+       unsigned int hints = ATA_LPM_EMPTY | ATA_LPM_HIPM;
+       unsigned int err_mask;
+       int rc;
diff --git a/queue-2.6.38/megaraid_sas-sanity-check-user-supplied-length-before-passing-it-to-dma_alloc_coherent.patch b/queue-2.6.38/megaraid_sas-sanity-check-user-supplied-length-before-passing-it-to-dma_alloc_coherent.patch
new file mode 100644 (file)
index 0000000..8db82c0
--- /dev/null
@@ -0,0 +1,89 @@
+From 98cb7e4413d189cd2b54daf993a4667d9788c0bb Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= <bjorn@mork.no>
+Date: Wed, 19 Jan 2011 10:01:14 +0100
+Subject: [SCSI] megaraid_sas: Sanity check user supplied length before passing it to dma_alloc_coherent()
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= <bjorn@mork.no>
+
+commit 98cb7e4413d189cd2b54daf993a4667d9788c0bb upstream.
+
+The ioc->sgl[i].iov_len value is supplied by the ioctl caller, and can be
+zero in some cases.  Assume that's valid and continue without error.
+
+Fixes (multiple individual reports of the same problem for quite a while):
+
+http://marc.info/?l=linux-ide&m=128941801715301
+http://bugs.debian.org/604627
+http://www.mail-archive.com/linux-poweredge@dell.com/msg02575.html
+
+megasas: Failed to alloc kernel SGL buffer for IOCTL
+
+and
+
+[   69.162538] ------------[ cut here ]------------
+[   69.162806] kernel BUG at /build/buildd/linux-2.6.32/lib/swiotlb.c:368!
+[   69.163134] invalid opcode: 0000 [#1] SMP
+[   69.163570] last sysfs file: /sys/devices/system/cpu/cpu3/cache/index2/shared_cpu_map
+[   69.163975] CPU 0
+[   69.164227] Modules linked in: fbcon tileblit font bitblit softcursor vga16fb vgastate ioatdma radeon ttm drm_kms_helper shpchp drm i2c_algo_bit lp parport floppy pata_jmicron megaraid_sas igb dca
+[   69.167419] Pid: 1206, comm: smartctl Tainted: G        W  2.6.32-25-server #45-Ubuntu X8DTN
+[   69.167843] RIP: 0010:[<ffffffff812c4dc5>]  [<ffffffff812c4dc5>] map_single+0x255/0x260
+[   69.168370] RSP: 0018:ffff88081c0ebc58  EFLAGS: 00010246
+[   69.168655] RAX: 000000000003bffc RBX: 00000000ffffffff RCX: 0000000000000002
+[   69.169000] RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff88001dffe000
+[   69.169346] RBP: ffff88081c0ebcb8 R08: 0000000000000000 R09: ffff880000030840
+[   69.169691] R10: 0000000000100000 R11: 0000000000000000 R12: 0000000000000000
+[   69.170036] R13: 00000000ffffffff R14: 0000000000000001 R15: 0000000000200000
+[   69.170382] FS:  00007fb8de189720(0000) GS:ffff88001de00000(0000) knlGS:0000000000000000
+[   69.170794] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+[   69.171094] CR2: 00007fb8dd59237c CR3: 000000081a790000 CR4: 00000000000006f0
+[   69.171439] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+[   69.171784] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
+[   69.172130] Process smartctl (pid: 1206, threadinfo ffff88081c0ea000, task ffff88081a760000)
+[   69.194513] Stack:
+[   69.205788]  0000000000000034 00000002817e3390 0000000000000000 ffff88081c0ebe00
+[   69.217739] <0> 0000000000000000 000000000003bffc 0000000000000000 0000000000000000
+[   69.241250] <0> 0000000000000000 00000000ffffffff ffff88081c5b4080 ffff88081c0ebe00
+[   69.277310] Call Trace:
+[   69.289278]  [<ffffffff812c52ac>] swiotlb_alloc_coherent+0xec/0x130
+[   69.301118]  [<ffffffff81038b31>] x86_swiotlb_alloc_coherent+0x61/0x70
+[   69.313045]  [<ffffffffa002d0ce>] megasas_mgmt_fw_ioctl+0x1ae/0x690 [megaraid_sas]
+[   69.336399]  [<ffffffffa002d748>] megasas_mgmt_ioctl_fw+0x198/0x240 [megaraid_sas]
+[   69.359346]  [<ffffffffa002f695>] megasas_mgmt_ioctl+0x35/0x50 [megaraid_sas]
+[   69.370902]  [<ffffffff81153b12>] vfs_ioctl+0x22/0xa0
+[   69.382322]  [<ffffffff8115da2a>] ? alloc_fd+0x10a/0x150
+[   69.393622]  [<ffffffff81153cb1>] do_vfs_ioctl+0x81/0x410
+[   69.404696]  [<ffffffff8155cc13>] ? do_page_fault+0x153/0x3b0
+[   69.415761]  [<ffffffff811540c1>] sys_ioctl+0x81/0xa0
+[   69.426640]  [<ffffffff810121b2>] system_call_fastpath+0x16/0x1b
+[   69.437491] Code: fe ff ff 48 8b 3d 74 38 76 00 41 bf 00 00 20 00 e8 51 f5 d7 ff 83 e0 ff 48 05 ff 07 00 00 48 c1 e8 0b 48 89 45 c8 e9 13 fe ff ff <0f> 0b eb fe 0f 1f 80 00 00 00 00 55 48 89 e5 48 83 ec 20 4c 89
+[   69.478216] RIP  [<ffffffff812c4dc5>] map_single+0x255/0x260
+[   69.489668]  RSP <ffff88081c0ebc58>
+[   69.500975] ---[ end trace 6a2181b634e2abc7 ]---
+
+Reported-by: Bokhan Artem <aptem@ngs.ru>
+Reported by: Marc-Christian Petersen <m.c.p@gmx.de>
+Signed-off-by: Bjørn Mork <bjorn@mork.no>
+Cc: "Benz, Michael" <Michael.Benz@lsi.com>
+Signed-off-by: James Bottomley <James.Bottomley@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ drivers/scsi/megaraid/megaraid_sas_base.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/drivers/scsi/megaraid/megaraid_sas_base.c
++++ b/drivers/scsi/megaraid/megaraid_sas_base.c
+@@ -4611,6 +4611,9 @@ megasas_mgmt_fw_ioctl(struct megasas_ins
+        * For each user buffer, create a mirror buffer and copy in
+        */
+       for (i = 0; i < ioc->sge_count; i++) {
++              if (!ioc->sgl[i].iov_len)
++                      continue;
++
+               kbuff_arr[i] = dma_alloc_coherent(&instance->pdev->dev,
+                                                   ioc->sgl[i].iov_len,
+                                                   &buf_handle, GFP_KERNEL);
diff --git a/queue-2.6.38/rapidio-fix-default-routing-initialization.patch b/queue-2.6.38/rapidio-fix-default-routing-initialization.patch
new file mode 100644 (file)
index 0000000..b502ab9
--- /dev/null
@@ -0,0 +1,94 @@
+From 0bf2461fdd9008290cf429e50e4f362dafab4249 Mon Sep 17 00:00:00 2001
+From: Alexandre Bounine <alexandre.bounine@idt.com>
+Date: Tue, 17 May 2011 15:44:08 -0700
+Subject: rapidio: fix default routing initialization
+
+From: Alexandre Bounine <alexandre.bounine@idt.com>
+
+commit 0bf2461fdd9008290cf429e50e4f362dafab4249 upstream.
+
+Fix switch initialization to ensure that all switches have default routing
+disabled.  This guarantees that no unexpected RapidIO packets arrive to
+the default port set by reset and there is no default routing destination
+until it is properly configured by software.
+
+This update also unifies handling of unmapped destinations by tsi57x, IDT
+Gen1 and IDT Gen2 switches.
+
+Signed-off-by: Alexandre Bounine <alexandre.bounine@idt.com>
+Cc: Kumar Gala <galak@kernel.crashing.org>
+Cc: Matt Porter <mporter@kernel.crashing.org>
+Cc: Li Yang <leoli@freescale.com>
+Cc: Thomas Moll <thomas.moll@sysgo.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ drivers/rapidio/switches/idt_gen2.c |    9 +++++++++
+ drivers/rapidio/switches/idtcps.c   |    6 ++++++
+ drivers/rapidio/switches/tsi57x.c   |    6 ++++++
+ 3 files changed, 21 insertions(+)
+
+--- a/drivers/rapidio/switches/idt_gen2.c
++++ b/drivers/rapidio/switches/idt_gen2.c
+@@ -95,6 +95,9 @@ idtg2_route_add_entry(struct rio_mport *
+       else
+               table++;
++      if (route_port == RIO_INVALID_ROUTE)
++              route_port = IDT_DEFAULT_ROUTE;
++
+       rio_mport_write_config_32(mport, destid, hopcount,
+                                 LOCAL_RTE_CONF_DESTID_SEL, table);
+@@ -411,6 +414,12 @@ static int idtg2_switch_init(struct rio_
+       rdev->rswitch->em_handle = idtg2_em_handler;
+       rdev->rswitch->sw_sysfs = idtg2_sysfs;
++      if (do_enum) {
++              /* Ensure that default routing is disabled on startup */
++              rio_write_config_32(rdev,
++                                  RIO_STD_RTE_DEFAULT_PORT, IDT_NO_ROUTE);
++      }
++
+       return 0;
+ }
+--- a/drivers/rapidio/switches/idtcps.c
++++ b/drivers/rapidio/switches/idtcps.c
+@@ -26,6 +26,9 @@ idtcps_route_add_entry(struct rio_mport
+ {
+       u32 result;
++      if (route_port == RIO_INVALID_ROUTE)
++              route_port = CPS_DEFAULT_ROUTE;
++
+       if (table == RIO_GLOBAL_TABLE) {
+               rio_mport_write_config_32(mport, destid, hopcount,
+                               RIO_STD_RTE_CONF_DESTID_SEL_CSR, route_destid);
+@@ -130,6 +133,9 @@ static int idtcps_switch_init(struct rio
+               /* set TVAL = ~50us */
+               rio_write_config_32(rdev,
+                       rdev->phys_efptr + RIO_PORT_LINKTO_CTL_CSR, 0x8e << 8);
++              /* Ensure that default routing is disabled on startup */
++              rio_write_config_32(rdev,
++                                  RIO_STD_RTE_DEFAULT_PORT, CPS_NO_ROUTE);
+       }
+       return 0;
+--- a/drivers/rapidio/switches/tsi57x.c
++++ b/drivers/rapidio/switches/tsi57x.c
+@@ -303,6 +303,12 @@ static int tsi57x_switch_init(struct rio
+       rdev->rswitch->em_init = tsi57x_em_init;
+       rdev->rswitch->em_handle = tsi57x_em_handler;
++      if (do_enum) {
++              /* Ensure that default routing is disabled on startup */
++              rio_write_config_32(rdev, RIO_STD_RTE_DEFAULT_PORT,
++                                  RIO_INVALID_ROUTE);
++      }
++
+       return 0;
+ }
diff --git a/queue-2.6.38/revert-mmc-fix-a-race-between-card-detect-rescan-and-clock-gate-work-instances.patch b/queue-2.6.38/revert-mmc-fix-a-race-between-card-detect-rescan-and-clock-gate-work-instances.patch
new file mode 100644 (file)
index 0000000..7c33f86
--- /dev/null
@@ -0,0 +1,78 @@
+From 86f315bbb2374f1f077500ad131dd9b71856e697 Mon Sep 17 00:00:00 2001
+From: Chris Ball <cjb@laptop.org>
+Date: Mon, 16 May 2011 11:32:26 -0400
+Subject: Revert "mmc: fix a race between card-detect rescan and clock-gate work instances"
+
+From: Chris Ball <cjb@laptop.org>
+
+commit 86f315bbb2374f1f077500ad131dd9b71856e697 upstream.
+
+This reverts commit 26fc8775b51484d8c0a671198639c6d5ae60533e, which has
+been reported to cause boot/resume-time crashes for some users:
+
+https://bbs.archlinux.org/viewtopic.php?id=118751.
+
+Signed-off-by: Chris Ball <cjb@laptop.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ drivers/mmc/core/host.c  |    9 +++++----
+ include/linux/mmc/host.h |    1 +
+ 2 files changed, 6 insertions(+), 4 deletions(-)
+
+--- a/drivers/mmc/core/host.c
++++ b/drivers/mmc/core/host.c
+@@ -94,7 +94,7 @@ static void mmc_host_clk_gate_delayed(st
+               spin_unlock_irqrestore(&host->clk_lock, flags);
+               return;
+       }
+-      mmc_claim_host(host);
++      mutex_lock(&host->clk_gate_mutex);
+       spin_lock_irqsave(&host->clk_lock, flags);
+       if (!host->clk_requests) {
+               spin_unlock_irqrestore(&host->clk_lock, flags);
+@@ -104,7 +104,7 @@ static void mmc_host_clk_gate_delayed(st
+               pr_debug("%s: gated MCI clock\n", mmc_hostname(host));
+       }
+       spin_unlock_irqrestore(&host->clk_lock, flags);
+-      mmc_release_host(host);
++      mutex_unlock(&host->clk_gate_mutex);
+ }
+ /*
+@@ -130,7 +130,7 @@ void mmc_host_clk_ungate(struct mmc_host
+ {
+       unsigned long flags;
+-      mmc_claim_host(host);
++      mutex_lock(&host->clk_gate_mutex);
+       spin_lock_irqsave(&host->clk_lock, flags);
+       if (host->clk_gated) {
+               spin_unlock_irqrestore(&host->clk_lock, flags);
+@@ -140,7 +140,7 @@ void mmc_host_clk_ungate(struct mmc_host
+       }
+       host->clk_requests++;
+       spin_unlock_irqrestore(&host->clk_lock, flags);
+-      mmc_release_host(host);
++      mutex_unlock(&host->clk_gate_mutex);
+ }
+ /**
+@@ -218,6 +218,7 @@ static inline void mmc_host_clk_init(str
+       host->clk_gated = false;
+       INIT_WORK(&host->clk_gate_work, mmc_host_clk_gate_work);
+       spin_lock_init(&host->clk_lock);
++      mutex_init(&host->clk_gate_mutex);
+ }
+ /**
+--- a/include/linux/mmc/host.h
++++ b/include/linux/mmc/host.h
+@@ -183,6 +183,7 @@ struct mmc_host {
+       struct work_struct      clk_gate_work; /* delayed clock gate */
+       unsigned int            clk_old;        /* old clock value cache */
+       spinlock_t              clk_lock;       /* lock for clk fields */
++      struct mutex            clk_gate_mutex; /* mutex for clock gating */
+ #endif
+       /* host specific block data */
diff --git a/queue-2.6.38/revert-x86-amd-fix-apic-timer-erratum-400-affecting-k8-rev.a-e-processors.patch b/queue-2.6.38/revert-x86-amd-fix-apic-timer-erratum-400-affecting-k8-rev.a-e-processors.patch
new file mode 100644 (file)
index 0000000..d9e5946
--- /dev/null
@@ -0,0 +1,47 @@
+From 328935e6348c6a7cb34798a68c326f4b8372e68a Mon Sep 17 00:00:00 2001
+From: Borislav Petkov <borislav.petkov@amd.com>
+Date: Tue, 17 May 2011 14:55:18 +0200
+Subject: Revert "x86, AMD: Fix APIC timer erratum 400 affecting K8 Rev.A-E processors"
+
+From: Borislav Petkov <borislav.petkov@amd.com>
+
+commit 328935e6348c6a7cb34798a68c326f4b8372e68a upstream.
+
+This reverts commit e20a2d205c05cef6b5783df339a7d54adeb50962, as it crashes
+certain boxes with specific AMD CPU models.
+
+Moving the lower endpoint of the Erratum 400 check to accomodate
+earlier K8 revisions (A-E) opens a can of worms which is simply
+not worth to fix properly by tweaking the errata checking
+framework:
+
+* missing IntPenging MSR on revisions < CG cause #GP:
+
+http://marc.info/?l=linux-kernel&m=130541471818831
+
+* makes earlier revisions use the LAPIC timer instead of the C1E
+idle routine which switches to HPET, thus not waking up in
+deeper C-states:
+
+http://lkml.org/lkml/2011/4/24/20
+
+Therefore, leave the original boundary starting with K8-revF.
+
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ arch/x86/kernel/cpu/amd.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/x86/kernel/cpu/amd.c
++++ b/arch/x86/kernel/cpu/amd.c
+@@ -681,7 +681,7 @@ cpu_dev_register(amd_cpu_dev);
+  */
+ const int amd_erratum_400[] =
+-      AMD_OSVW_ERRATUM(1, AMD_MODEL_RANGE(0x0f, 0x4, 0x2, 0xff, 0xf),
++      AMD_OSVW_ERRATUM(1, AMD_MODEL_RANGE(0xf, 0x41, 0x2, 0xff, 0xf),
+                           AMD_MODEL_RANGE(0x10, 0x2, 0x1, 0xff, 0xf));
+ EXPORT_SYMBOL_GPL(amd_erratum_400);
index f3f7015c206e567b76716621fe00e7206c26f5fd..63a266cfb555b89ddc15616c4f03a068e47aff71 100644 (file)
@@ -43,3 +43,25 @@ hydra-fix-regression-caused-during-net_device_ops-conversion.patch
 ehea-fix-memory-hotplug-oops.patch
 libertas-fix-cmdpendingq-locking.patch
 zorro8390-fix-regression-caused-during-net_device_ops-conversion.patch
+tmpfs-fix-race-between-umount-and-writepage.patch
+tmpfs-fix-race-between-swapoff-and-writepage.patch
+tmpfs-fix-off-by-one-in-max_blocks-checks.patch
+tmpfs-fix-spurious-enospc-when-racing-with-unswap.patch
+libata-fix-oops-when-lpm-is-used-with-pmp.patch
+drm-radeon-kms-fix-extended-lvds-info-parsing.patch
+revert-mmc-fix-a-race-between-card-detect-rescan-and-clock-gate-work-instances.patch
+cifs-add-fallback-in-is_path_accessible-for-old-servers.patch
+rapidio-fix-default-routing-initialization.patch
+revert-x86-amd-fix-apic-timer-erratum-400-affecting-k8-rev.a-e-processors.patch
+x86-amd-fix-arat-feature-setting-again.patch
+block-rescan-partitions-on-invalidated-devices-on-enomedia-too.patch
+clocksource-install-completely-before-selecting.patch
+tick-clear-broadcast-active-bit-when-switching-to-oneshot.patch
+x86-apic-fix-spurious-error-interrupts-triggering-on-all-non-boot-aps.patch
+fix-cx88-remote-control-input.patch
+v4l-release-module-if-subdev-registration-fails.patch
+x86-fix-uv-bau-for-non-consecutive-nasids.patch
+x86-mce-amd-fix-leaving-freed-data-in-a-list.patch
+megaraid_sas-sanity-check-user-supplied-length-before-passing-it-to-dma_alloc_coherent.patch
+cdrom-always-check_disk_change-on-open.patch
+vmxnet3-fix-inconsistent-lro-state-after-initialization.patch
diff --git a/queue-2.6.38/tick-clear-broadcast-active-bit-when-switching-to-oneshot.patch b/queue-2.6.38/tick-clear-broadcast-active-bit-when-switching-to-oneshot.patch
new file mode 100644 (file)
index 0000000..c494dc3
--- /dev/null
@@ -0,0 +1,85 @@
+From 07f4beb0b5bbfaf36a64aa00d59e670ec578a95a Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Mon, 16 May 2011 11:07:48 +0200
+Subject: tick: Clear broadcast active bit when switching to oneshot
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+commit 07f4beb0b5bbfaf36a64aa00d59e670ec578a95a upstream.
+
+The first cpu which switches from periodic to oneshot mode switches
+also the broadcast device into oneshot mode. The broadcast device
+serves as a backup for per cpu timers which stop in deeper
+C-states. To avoid starvation of the cpus which might be in idle and
+depend on broadcast mode it marks the other cpus as broadcast active
+and sets the brodcast expiry value of those cpus to the next tick.
+
+The oneshot mode broadcast bit for the other cpus is sticky and gets
+only cleared when those cpus exit idle. If a cpu was not idle while
+the bit got set in consequence the bit prevents that the broadcast
+device is armed on behalf of that cpu when it enters idle for the
+first time after it switched to oneshot mode.
+
+In most cases that goes unnoticed as one of the other cpus has usually
+a timer pending which keeps the broadcast device armed with a short
+timeout. Now if the only cpu which has a short timer active has the
+bit set then the broadcast device will not be armed on behalf of that
+cpu and will fire way after the expected timer expiry. In the case of
+Christians bug report it took ~145 seconds which is about half of the
+wrap around time of HPET (the limit for that device) due to the fact
+that all other cpus had no timers armed which expired before the 145
+seconds timeframe.
+
+The solution is simply to clear the broadcast active bit
+unconditionally when a cpu switches to oneshot mode after the first
+cpu switched the broadcast device over. It's not idle at that point
+otherwise it would not be executing that code.
+
+[ I fundamentally hate that broadcast crap. Why the heck thought some
+  folks that when going into deep idle it's a brilliant concept to
+  switch off the last device which brings the cpu back from that
+  state? ]
+
+Thanks to Christian for providing all the valuable debug information!
+
+Reported-and-tested-by: Christian Hoffmann <email@christianhoffmann.info>
+Cc: John Stultz <johnstul@us.ibm.com>
+Link: http://lkml.kernel.org/r/%3Calpine.LFD.2.02.1105161105170.3078%40ionos%3E
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ kernel/time/tick-broadcast.c |   12 +++++++++++-
+ 1 file changed, 11 insertions(+), 1 deletion(-)
+
+--- a/kernel/time/tick-broadcast.c
++++ b/kernel/time/tick-broadcast.c
+@@ -523,10 +523,11 @@ static void tick_broadcast_init_next_eve
+  */
+ void tick_broadcast_setup_oneshot(struct clock_event_device *bc)
+ {
++      int cpu = smp_processor_id();
++
+       /* Set it up only once ! */
+       if (bc->event_handler != tick_handle_oneshot_broadcast) {
+               int was_periodic = bc->mode == CLOCK_EVT_MODE_PERIODIC;
+-              int cpu = smp_processor_id();
+               bc->event_handler = tick_handle_oneshot_broadcast;
+               clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT);
+@@ -552,6 +553,15 @@ void tick_broadcast_setup_oneshot(struct
+                       tick_broadcast_set_event(tick_next_period, 1);
+               } else
+                       bc->next_event.tv64 = KTIME_MAX;
++      } else {
++              /*
++               * The first cpu which switches to oneshot mode sets
++               * the bit for all other cpus which are in the general
++               * (periodic) broadcast mask. So the bit is set and
++               * would prevent the first broadcast enter after this
++               * to program the bc device.
++               */
++              tick_broadcast_clear_oneshot(cpu);
+       }
+ }
diff --git a/queue-2.6.38/tmpfs-fix-off-by-one-in-max_blocks-checks.patch b/queue-2.6.38/tmpfs-fix-off-by-one-in-max_blocks-checks.patch
new file mode 100644 (file)
index 0000000..463fb13
--- /dev/null
@@ -0,0 +1,49 @@
+From fc5da22ae35d4720be59af8787a8a6d5e4da9517 Mon Sep 17 00:00:00 2001
+From: Hugh Dickins <hughd@google.com>
+Date: Thu, 14 Apr 2011 15:22:07 -0700
+Subject: tmpfs: fix off-by-one in max_blocks checks
+
+From: Hugh Dickins <hughd@google.com>
+
+commit fc5da22ae35d4720be59af8787a8a6d5e4da9517 upstream.
+
+If you fill up a tmpfs, df was showing
+
+  tmpfs                   460800         -         -   -  /tmp
+
+because of an off-by-one in the max_blocks checks.  Fix it so df shows
+
+  tmpfs                   460800    460800         0 100% /tmp
+
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Cc: Tim Chen <tim.c.chen@linux.intel.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ mm/shmem.c |    6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+--- a/mm/shmem.c
++++ b/mm/shmem.c
+@@ -422,7 +422,8 @@ static swp_entry_t *shmem_swp_alloc(stru
+                * a waste to allocate index if we cannot allocate data.
+                */
+               if (sbinfo->max_blocks) {
+-                      if (percpu_counter_compare(&sbinfo->used_blocks, (sbinfo->max_blocks - 1)) > 0)
++                      if (percpu_counter_compare(&sbinfo->used_blocks,
++                                              sbinfo->max_blocks - 1) >= 0)
+                               return ERR_PTR(-ENOSPC);
+                       percpu_counter_inc(&sbinfo->used_blocks);
+                       spin_lock(&inode->i_lock);
+@@ -1404,7 +1405,8 @@ repeat:
+               shmem_swp_unmap(entry);
+               sbinfo = SHMEM_SB(inode->i_sb);
+               if (sbinfo->max_blocks) {
+-                      if ((percpu_counter_compare(&sbinfo->used_blocks, sbinfo->max_blocks) > 0) ||
++                      if (percpu_counter_compare(&sbinfo->used_blocks,
++                                              sbinfo->max_blocks) >= 0 ||
+                           shmem_acct_block(info->flags)) {
+                               spin_unlock(&info->lock);
+                               error = -ENOSPC;
diff --git a/queue-2.6.38/tmpfs-fix-race-between-swapoff-and-writepage.patch b/queue-2.6.38/tmpfs-fix-race-between-swapoff-and-writepage.patch
new file mode 100644 (file)
index 0000000..f929e6e
--- /dev/null
@@ -0,0 +1,55 @@
+From 05bf86b4ccfd0f197da61c67bd372111d15a6620 Mon Sep 17 00:00:00 2001
+From: Hugh Dickins <hughd@google.com>
+Date: Sat, 14 May 2011 12:06:42 -0700
+Subject: tmpfs: fix race between swapoff and writepage
+
+From: Hugh Dickins <hughd@google.com>
+
+commit 05bf86b4ccfd0f197da61c67bd372111d15a6620 upstream.
+
+Shame on me!  Commit b1dea800ac39 "tmpfs: fix race between umount and
+writepage" fixed the advertized race, but introduced another: as even
+its comment makes clear, we cannot safely rely on a peek at list_empty()
+while holding no lock - until info->swapped is set, shmem_unuse_inode()
+may delete any formerly-swapped inode from the shmem_swaplist, which
+in this case would leave a swap area impossible to swapoff.
+
+Although I don't relish taking the mutex every time, I don't care much
+for the alternatives either; and at least the peek at list_empty() in
+shmem_evict_inode() (a hotter path since most inodes would never have
+been swapped) remains safe, because we already truncated the whole file.
+
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+--- a/mm/shmem.c
++++ b/mm/shmem.c
+@@ -1037,7 +1037,6 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
+       struct address_space *mapping;
+       unsigned long index;
+       struct inode *inode;
+-      bool unlock_mutex = false;
+       BUG_ON(!PageLocked(page));
+       mapping = page->mapping;
+@@ -1072,15 +1071,14 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
+        * we've taken the spinlock, because shmem_unuse_inode() will
+        * prune a !swapped inode from the swaplist under both locks.
+        */
+-      if (swap.val && list_empty(&info->swaplist)) {
++      if (swap.val) {
+               mutex_lock(&shmem_swaplist_mutex);
+-              /* move instead of add in case we're racing */
+-              list_move_tail(&info->swaplist, &shmem_swaplist);
+-              unlock_mutex = true;
++              if (list_empty(&info->swaplist))
++                      list_add_tail(&info->swaplist, &shmem_swaplist);
+       }
+       spin_lock(&info->lock);
+-      if (unlock_mutex)
++      if (swap.val)
+               mutex_unlock(&shmem_swaplist_mutex);
+       if (index >= info->next_index) {
diff --git a/queue-2.6.38/tmpfs-fix-race-between-umount-and-writepage.patch b/queue-2.6.38/tmpfs-fix-race-between-umount-and-writepage.patch
new file mode 100644 (file)
index 0000000..9a1234a
--- /dev/null
@@ -0,0 +1,158 @@
+From b1dea800ac39599301d4bb8dcf2b1d29c2558211 Mon Sep 17 00:00:00 2001
+From: Hugh Dickins <hughd@google.com>
+Date: Wed, 11 May 2011 15:13:36 -0700
+Subject: tmpfs: fix race between umount and writepage
+
+From: Hugh Dickins <hughd@google.com>
+
+commit b1dea800ac39599301d4bb8dcf2b1d29c2558211 upstream.
+
+Konstanin Khlebnikov reports that a dangerous race between umount and
+shmem_writepage can be reproduced by this script:
+
+  for i in {1..300} ; do
+       mkdir $i
+       while true ; do
+               mount -t tmpfs none $i
+               dd if=/dev/zero of=$i/test bs=1M count=$(($RANDOM % 100))
+               umount $i
+       done &
+  done
+
+on a 6xCPU node with 8Gb RAM: kernel very unstable after this accident. =)
+
+Kernel log:
+
+  VFS: Busy inodes after unmount of tmpfs.
+                 Self-destruct in 5 seconds.  Have a nice day...
+
+  WARNING: at lib/list_debug.c:53 __list_del_entry+0x8d/0x98()
+  list_del corruption. prev->next should be ffff880222fdaac8, but was (null)
+  Pid: 11222, comm: mount.tmpfs Not tainted 2.6.39-rc2+ #4
+  Call Trace:
+   warn_slowpath_common+0x80/0x98
+   warn_slowpath_fmt+0x41/0x43
+   __list_del_entry+0x8d/0x98
+   evict+0x50/0x113
+   iput+0x138/0x141
+  ...
+  BUG: unable to handle kernel paging request at ffffffffffffffff
+  IP: shmem_free_blocks+0x18/0x4c
+  Pid: 10422, comm: dd Tainted: G        W   2.6.39-rc2+ #4
+  Call Trace:
+   shmem_recalc_inode+0x61/0x66
+   shmem_writepage+0xba/0x1dc
+   pageout+0x13c/0x24c
+   shrink_page_list+0x28e/0x4be
+   shrink_inactive_list+0x21f/0x382
+  ...
+
+shmem_writepage() calls igrab() on the inode for the page which came from
+page reclaim, to add it later into shmem_swaplist for swapoff operation.
+
+This igrab() can race with super-block deactivating process:
+
+  shrink_inactive_list()          deactivate_super()
+  pageout()                       tmpfs_fs_type->kill_sb()
+  shmem_writepage()               kill_litter_super()
+                                  generic_shutdown_super()
+                                   evict_inodes()
+   igrab()
+                                    atomic_read(&inode->i_count)
+                                     skip-inode
+   iput()
+                                   if (!list_empty(&sb->s_inodes))
+                                          printk("VFS: Busy inodes after...
+
+This igrap-iput pair was added in commit 1b1b32f2c6f6 "tmpfs: fix
+shmem_swaplist races" based on incorrect assumptions: igrab() protects the
+inode from concurrent eviction by deletion, but it does nothing to protect
+it from concurrent unmounting, which goes ahead despite the raised
+i_count.
+
+So this use of igrab() was wrong all along, but the race made much worse
+in 2.6.37 when commit 63997e98a3be "split invalidate_inodes()" replaced
+two attempts at invalidate_inodes() by a single evict_inodes().
+
+Konstantin posted a plausible patch, raising sb->s_active too: I'm unsure
+whether it was correct or not; but burnt once by igrab(), I am sure that
+we don't want to rely more deeply upon externals here.
+
+Fix it by adding the inode to shmem_swaplist earlier, while the page lock
+on page in page cache still secures the inode against eviction, without
+artifically raising i_count.  It was originally added later because
+shmem_unuse_inode() is liable to remove an inode from the list while it's
+unswapped; but we can guard against that by taking spinlock before
+dropping mutex.
+
+Reported-by: Konstantin Khlebnikov <khlebnikov@openvz.org>
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Tested-by: Konstantin Khlebnikov <khlebnikov@openvz.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ mm/shmem.c |   31 ++++++++++++++++++++-----------
+ 1 file changed, 20 insertions(+), 11 deletions(-)
+
+--- a/mm/shmem.c
++++ b/mm/shmem.c
+@@ -1037,6 +1037,7 @@ static int shmem_writepage(struct page *
+       struct address_space *mapping;
+       unsigned long index;
+       struct inode *inode;
++      bool unlock_mutex = false;
+       BUG_ON(!PageLocked(page));
+       mapping = page->mapping;
+@@ -1062,7 +1063,26 @@ static int shmem_writepage(struct page *
+       else
+               swap.val = 0;
++      /*
++       * Add inode to shmem_unuse()'s list of swapped-out inodes,
++       * if it's not already there.  Do it now because we cannot take
++       * mutex while holding spinlock, and must do so before the page
++       * is moved to swap cache, when its pagelock no longer protects
++       * the inode from eviction.  But don't unlock the mutex until
++       * we've taken the spinlock, because shmem_unuse_inode() will
++       * prune a !swapped inode from the swaplist under both locks.
++       */
++      if (swap.val && list_empty(&info->swaplist)) {
++              mutex_lock(&shmem_swaplist_mutex);
++              /* move instead of add in case we're racing */
++              list_move_tail(&info->swaplist, &shmem_swaplist);
++              unlock_mutex = true;
++      }
++
+       spin_lock(&info->lock);
++      if (unlock_mutex)
++              mutex_unlock(&shmem_swaplist_mutex);
++
+       if (index >= info->next_index) {
+               BUG_ON(!(info->flags & SHMEM_TRUNCATE));
+               goto unlock;
+@@ -1082,22 +1102,11 @@ static int shmem_writepage(struct page *
+               remove_from_page_cache(page);
+               shmem_swp_set(info, entry, swap.val);
+               shmem_swp_unmap(entry);
+-              if (list_empty(&info->swaplist))
+-                      inode = igrab(inode);
+-              else
+-                      inode = NULL;
+               spin_unlock(&info->lock);
+               swap_shmem_alloc(swap);
+               BUG_ON(page_mapped(page));
+               page_cache_release(page);       /* pagecache ref */
+               swap_writepage(page, wbc);
+-              if (inode) {
+-                      mutex_lock(&shmem_swaplist_mutex);
+-                      /* move instead of add in case we're racing */
+-                      list_move_tail(&info->swaplist, &shmem_swaplist);
+-                      mutex_unlock(&shmem_swaplist_mutex);
+-                      iput(inode);
+-              }
+               return 0;
+       }
diff --git a/queue-2.6.38/tmpfs-fix-spurious-enospc-when-racing-with-unswap.patch b/queue-2.6.38/tmpfs-fix-spurious-enospc-when-racing-with-unswap.patch
new file mode 100644 (file)
index 0000000..8b395e5
--- /dev/null
@@ -0,0 +1,86 @@
+From 59a16ead572330deb38e5848151d30ed1af754bc Mon Sep 17 00:00:00 2001
+From: Hugh Dickins <hughd@google.com>
+Date: Wed, 11 May 2011 15:13:38 -0700
+Subject: tmpfs: fix spurious ENOSPC when racing with unswap
+
+From: Hugh Dickins <hughd@google.com>
+
+commit 59a16ead572330deb38e5848151d30ed1af754bc upstream.
+
+Testing the shmem_swaplist replacements for igrab() revealed another bug:
+writes to /dev/loop0 on a tmpfs file which fills its filesystem were
+sometimes failing with "Buffer I/O error"s.
+
+These came from ENOSPC failures of shmem_getpage(), when racing with
+swapoff: the same could happen when racing with another shmem_getpage(),
+pulling the page in from swap in between our find_lock_page() and our
+taking the info->lock (though not in the single-threaded loop case).
+
+This is unacceptable, and surprising that I've not noticed it before:
+it dates back many years, but (presumably) was made a lot easier to
+reproduce in 2.6.36, which sited a page preallocation in the race window.
+
+Fix it by rechecking the page cache before settling on an ENOSPC error.
+
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Cc: Konstantin Khlebnikov <khlebnikov@openvz.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ mm/shmem.c |   32 ++++++++++++++++++++++----------
+ 1 file changed, 22 insertions(+), 10 deletions(-)
+
+--- a/mm/shmem.c
++++ b/mm/shmem.c
+@@ -1407,20 +1407,14 @@ repeat:
+               if (sbinfo->max_blocks) {
+                       if (percpu_counter_compare(&sbinfo->used_blocks,
+                                               sbinfo->max_blocks) >= 0 ||
+-                          shmem_acct_block(info->flags)) {
+-                              spin_unlock(&info->lock);
+-                              error = -ENOSPC;
+-                              goto failed;
+-                      }
++                          shmem_acct_block(info->flags))
++                              goto nospace;
+                       percpu_counter_inc(&sbinfo->used_blocks);
+                       spin_lock(&inode->i_lock);
+                       inode->i_blocks += BLOCKS_PER_PAGE;
+                       spin_unlock(&inode->i_lock);
+-              } else if (shmem_acct_block(info->flags)) {
+-                      spin_unlock(&info->lock);
+-                      error = -ENOSPC;
+-                      goto failed;
+-              }
++              } else if (shmem_acct_block(info->flags))
++                      goto nospace;
+               if (!filepage) {
+                       int ret;
+@@ -1500,6 +1494,24 @@ done:
+       error = 0;
+       goto out;
++nospace:
++      /*
++       * Perhaps the page was brought in from swap between find_lock_page
++       * and taking info->lock?  We allow for that at add_to_page_cache_lru,
++       * but must also avoid reporting a spurious ENOSPC while working on a
++       * full tmpfs.  (When filepage has been passed in to shmem_getpage, it
++       * is already in page cache, which prevents this race from occurring.)
++       */
++      if (!filepage) {
++              struct page *page = find_get_page(mapping, idx);
++              if (page) {
++                      spin_unlock(&info->lock);
++                      page_cache_release(page);
++                      goto repeat;
++              }
++      }
++      spin_unlock(&info->lock);
++      error = -ENOSPC;
+ failed:
+       if (*pagep != filepage) {
+               unlock_page(filepage);
diff --git a/queue-2.6.38/v4l-release-module-if-subdev-registration-fails.patch b/queue-2.6.38/v4l-release-module-if-subdev-registration-fails.patch
new file mode 100644 (file)
index 0000000..1b5bbed
--- /dev/null
@@ -0,0 +1,42 @@
+From b7534f002d3c81d18abfbf57179d07d3ec763bb5 Mon Sep 17 00:00:00 2001
+From: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
+Date: Sat, 30 Apr 2011 10:34:05 -0300
+Subject: [media] v4l: Release module if subdev registration fails
+
+From: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
+
+commit b7534f002d3c81d18abfbf57179d07d3ec763bb5 upstream.
+
+If v4l2_device_register_subdev() fails, the reference to the subdev
+module taken by the function isn't released. Fix this.
+
+Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
+Acked-by: Hans Verkuil <hverkuil@xs4all.nl>
+Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ drivers/media/video/v4l2-device.c |    5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+--- a/drivers/media/video/v4l2-device.c
++++ b/drivers/media/video/v4l2-device.c
+@@ -131,14 +131,17 @@ int v4l2_device_register_subdev(struct v
+       sd->v4l2_dev = v4l2_dev;
+       if (sd->internal_ops && sd->internal_ops->registered) {
+               err = sd->internal_ops->registered(sd);
+-              if (err)
++              if (err) {
++                      module_put(sd->owner);
+                       return err;
++              }
+       }
+       /* This just returns 0 if either of the two args is NULL */
+       err = v4l2_ctrl_add_handler(v4l2_dev->ctrl_handler, sd->ctrl_handler);
+       if (err) {
+               if (sd->internal_ops && sd->internal_ops->unregistered)
+                       sd->internal_ops->unregistered(sd);
++              module_put(sd->owner);
+               return err;
+       }
+       spin_lock(&v4l2_dev->lock);
diff --git a/queue-2.6.38/vmxnet3-fix-inconsistent-lro-state-after-initialization.patch b/queue-2.6.38/vmxnet3-fix-inconsistent-lro-state-after-initialization.patch
new file mode 100644 (file)
index 0000000..8953f65
--- /dev/null
@@ -0,0 +1,56 @@
+From ebde6f8acba92abfc203585198a54f47e83e2cd0 Mon Sep 17 00:00:00 2001
+From: Thomas Jarosch <thomas.jarosch@intra2net.com>
+Date: Mon, 16 May 2011 06:28:15 +0000
+Subject: vmxnet3: Fix inconsistent LRO state after initialization
+
+From: Thomas Jarosch <thomas.jarosch@intra2net.com>
+
+commit ebde6f8acba92abfc203585198a54f47e83e2cd0 upstream.
+
+During initialization of vmxnet3, the state of LRO
+gets out of sync with netdev->features.
+
+This leads to very poor TCP performance in a IP forwarding
+setup and is hitting many VMware users.
+
+Simplified call sequence:
+1. vmxnet3_declare_features() initializes "adapter->lro" to true.
+
+2. The kernel automatically disables LRO if IP forwarding is enabled,
+so vmxnet3_set_flags() gets called. This also updates netdev->features.
+
+3. Now vmxnet3_setup_driver_shared() is called. "adapter->lro" is still
+set to true and LRO gets enabled again, even though
+netdev->features shows it's disabled.
+
+Fix it by updating "adapter->lro", too.
+
+The private vmxnet3 adapter flags are scheduled for removal
+in net-next, see commit a0d2730c9571aeba793cb5d3009094ee1d8fda35
+"net: vmxnet3: convert to hw_features".
+
+Patch applies to 2.6.37 / 2.6.38 and 2.6.39-rc6.
+
+Please CC: comments.
+
+Signed-off-by: Thomas Jarosch <thomas.jarosch@intra2net.com>
+Acked-by: Stephen Hemminger <shemminger@vyatta.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ drivers/net/vmxnet3/vmxnet3_ethtool.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/drivers/net/vmxnet3/vmxnet3_ethtool.c
++++ b/drivers/net/vmxnet3/vmxnet3_ethtool.c
+@@ -311,6 +311,9 @@ vmxnet3_set_flags(struct net_device *net
+               /* toggle the LRO feature*/
+               netdev->features ^= NETIF_F_LRO;
++              /* Update private LRO flag */
++              adapter->lro = lro_requested;
++
+               /* update harware LRO capability accordingly */
+               if (lro_requested)
+                       adapter->shared->devRead.misc.uptFeatures |=
diff --git a/queue-2.6.38/x86-amd-fix-arat-feature-setting-again.patch b/queue-2.6.38/x86-amd-fix-arat-feature-setting-again.patch
new file mode 100644 (file)
index 0000000..4d4ca8f
--- /dev/null
@@ -0,0 +1,43 @@
+From 14fb57dccb6e1defe9f89a66f548fcb24c374c1d Mon Sep 17 00:00:00 2001
+From: Borislav Petkov <borislav.petkov@amd.com>
+Date: Tue, 17 May 2011 14:55:19 +0200
+Subject: x86, AMD: Fix ARAT feature setting again
+
+From: Borislav Petkov <borislav.petkov@amd.com>
+
+commit 14fb57dccb6e1defe9f89a66f548fcb24c374c1d upstream.
+
+Trying to enable the local APIC timer on early K8 revisions
+uncovers a number of other issues with it, in conjunction with
+the C1E enter path on AMD. Fixing those causes much more churn
+and troubles than the benefit of using that timer brings so
+don't enable it on K8 at all, falling back to the original
+functionality the kernel had wrt to that.
+
+Reported-and-bisected-by: Nick Bowler <nbowler@elliptictech.com>
+Cc: Boris Ostrovsky <Boris.Ostrovsky@amd.com>
+Cc: Andreas Herrmann <andreas.herrmann3@amd.com>
+Cc: Greg Kroah-Hartman <greg@kroah.com>
+Cc: Hans Rosenfeld <hans.rosenfeld@amd.com>
+Cc: Nick Bowler <nbowler@elliptictech.com>
+Cc: Joerg-Volker-Peetz <jvpeetz@web.de>
+Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>
+Link: http://lkml.kernel.org/r/1305636919-31165-3-git-send-email-bp@amd64.org
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ arch/x86/kernel/cpu/amd.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/x86/kernel/cpu/amd.c
++++ b/arch/x86/kernel/cpu/amd.c
+@@ -596,7 +596,7 @@ static void __cpuinit init_amd(struct cp
+ #endif
+       /* As a rule processors have APIC timer running in deep C states */
+-      if (c->x86 >= 0xf && !cpu_has_amd_erratum(amd_erratum_400))
++      if (c->x86 > 0xf && !cpu_has_amd_erratum(amd_erratum_400))
+               set_cpu_cap(c, X86_FEATURE_ARAT);
+       /*
diff --git a/queue-2.6.38/x86-apic-fix-spurious-error-interrupts-triggering-on-all-non-boot-aps.patch b/queue-2.6.38/x86-apic-fix-spurious-error-interrupts-triggering-on-all-non-boot-aps.patch
new file mode 100644 (file)
index 0000000..e4e8f81
--- /dev/null
@@ -0,0 +1,94 @@
+From e503f9e4b092e2349a9477a333543de8f3c7f5d9 Mon Sep 17 00:00:00 2001
+From: Youquan Song <youquan.song@intel.com>
+Date: Fri, 22 Apr 2011 00:22:43 +0800
+Subject: x86, apic: Fix spurious error interrupts triggering on all non-boot APs
+
+From: Youquan Song <youquan.song@intel.com>
+
+commit e503f9e4b092e2349a9477a333543de8f3c7f5d9 upstream.
+
+This patch fixes a bug reported by a customer, who found
+that many unreasonable error interrupts reported on all
+non-boot CPUs (APs) during the system boot stage.
+
+According to Chapter 10 of Intel Software Developer Manual
+Volume 3A, Local APIC may signal an illegal vector error when
+an LVT entry is set as an illegal vector value (0~15) under
+FIXED delivery mode (bits 8-11 is 0), regardless of whether
+the mask bit is set or an interrupt actually happen. These
+errors are seen as error interrupts.
+
+The initial value of thermal LVT entries on all APs always reads
+0x10000 because APs are woken up by BSP issuing INIT-SIPI-SIPI
+sequence to them and LVT registers are reset to 0s except for
+the mask bits which are set to 1s when APs receive INIT IPI.
+
+When the BIOS takes over the thermal throttling interrupt,
+the LVT thermal deliver mode should be SMI and it is required
+from the kernel to keep AP's LVT thermal monitoring register
+programmed as such as well.
+
+This issue happens when BIOS does not take over thermal throttling
+interrupt, AP's LVT thermal monitor register will be restored to
+0x10000 which means vector 0 and fixed deliver mode, so all APs will
+signal illegal vector error interrupts.
+
+This patch check if interrupt delivery mode is not fixed mode before
+restoring AP's LVT thermal monitor register.
+
+Signed-off-by: Youquan Song <youquan.song@intel.com>
+Acked-by: Suresh Siddha <suresh.b.siddha@intel.com>
+Acked-by: Yong Wang <yong.y.wang@intel.com>
+Cc: hpa@linux.intel.com
+Cc: joe@perches.com
+Cc: jbaron@redhat.com
+Cc: trenn@suse.de
+Cc: kent.liu@intel.com
+Cc: chaohong.guo@intel.com
+Link: http://lkml.kernel.org/r/1303402963-17738-1-git-send-email-youquan.song@intel.com
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ arch/x86/include/asm/apicdef.h           |    1 +
+ arch/x86/kernel/cpu/mcheck/therm_throt.c |   12 +++++++-----
+ 2 files changed, 8 insertions(+), 5 deletions(-)
+
+--- a/arch/x86/include/asm/apicdef.h
++++ b/arch/x86/include/asm/apicdef.h
+@@ -78,6 +78,7 @@
+ #define               APIC_DEST_LOGICAL       0x00800
+ #define               APIC_DEST_PHYSICAL      0x00000
+ #define               APIC_DM_FIXED           0x00000
++#define               APIC_DM_FIXED_MASK      0x00700
+ #define               APIC_DM_LOWEST          0x00100
+ #define               APIC_DM_SMI             0x00200
+ #define               APIC_DM_REMRD           0x00300
+--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
++++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
+@@ -446,18 +446,20 @@ void intel_init_thermal(struct cpuinfo_x
+        */
+       rdmsr(MSR_IA32_MISC_ENABLE, l, h);
++      h = lvtthmr_init;
+       /*
+        * The initial value of thermal LVT entries on all APs always reads
+        * 0x10000 because APs are woken up by BSP issuing INIT-SIPI-SIPI
+        * sequence to them and LVT registers are reset to 0s except for
+        * the mask bits which are set to 1s when APs receive INIT IPI.
+-       * Always restore the value that BIOS has programmed on AP based on
+-       * BSP's info we saved since BIOS is always setting the same value
+-       * for all threads/cores
++       * If BIOS takes over the thermal interrupt and sets its interrupt
++       * delivery mode to SMI (not fixed), it restores the value that the
++       * BIOS has programmed on AP based on BSP's info we saved since BIOS
++       * is always setting the same value for all threads/cores.
+        */
+-      apic_write(APIC_LVTTHMR, lvtthmr_init);
++      if ((h & APIC_DM_FIXED_MASK) != APIC_DM_FIXED)
++              apic_write(APIC_LVTTHMR, lvtthmr_init);
+-      h = lvtthmr_init;
+       if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) {
+               printk(KERN_DEBUG
diff --git a/queue-2.6.38/x86-fix-uv-bau-for-non-consecutive-nasids.patch b/queue-2.6.38/x86-fix-uv-bau-for-non-consecutive-nasids.patch
new file mode 100644 (file)
index 0000000..b8b8dd5
--- /dev/null
@@ -0,0 +1,335 @@
+From 77ed23f8d995a01cd8101d84351b567bf5177a30 Mon Sep 17 00:00:00 2001
+From: Cliff Wickman <cpw@sgi.com>
+Date: Tue, 10 May 2011 08:26:43 -0500
+Subject: x86: Fix UV BAU for non-consecutive nasids
+
+From: Cliff Wickman <cpw@sgi.com>
+
+commit 77ed23f8d995a01cd8101d84351b567bf5177a30 upstream.
+
+This is a fix for the SGI Altix-UV Broadcast Assist Unit code,
+which is used for TLB flushing.
+
+Certain hardware configurations (that customers are ordering)
+cause nasids (numa address space id's) to be non-consecutive.
+Specifically, once you have more than 4 blades in a IRU
+(Individual Rack Unit - or 1/2 rack) but less than the maximum
+of 16, the nasid numbering becomes non-consecutive.  This
+currently results in a 'catastrophic error' (CATERR) detected by
+the firmware during OS boot.  The BAU is generating an 'INTD'
+request that is targeting a non-existent nasid value. Such
+configurations may also occur when a blade is configured off
+because of hardware errors. (There is one UV hub per blade.)
+
+This patch is required to support such configurations.
+
+The problem with the tlb_uv.c code is that is using the
+consecutive hub numbers as indices to the BAU distribution bit
+map. These are simply the ordinal position of the hub or blade
+within its partition.  It should be using physical node numbers
+(pnodes), which correspond to the physical nasid values. Use of
+the hub number only works as long as the nasids in the partition
+are consecutive and increase with a stride of 1.
+
+This patch changes the index to be the pnode number, thus
+allowing nasids to be non-consecutive.
+It also provides a table in local memory for each cpu to
+translate target cpu number to target pnode and nasid.
+And it improves naming to properly reflect 'node' and 'uvhub'
+versus 'nasid'.
+
+Signed-off-by: Cliff Wickman <cpw@sgi.com>
+Link: http://lkml.kernel.org/r/E1QJmxX-0002Mz-Fk@eag09.americas.sgi.com
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ arch/x86/include/asm/uv/uv_bau.h |   17 +++++--
+ arch/x86/platform/uv/tlb_uv.c    |   92 ++++++++++++++++++++++++++-------------
+ 2 files changed, 76 insertions(+), 33 deletions(-)
+
+--- a/arch/x86/include/asm/uv/uv_bau.h
++++ b/arch/x86/include/asm/uv/uv_bau.h
+@@ -94,6 +94,8 @@
+ /* after this # consecutive successes, bump up the throttle if it was lowered */
+ #define COMPLETE_THRESHOLD 5
++#define UV_LB_SUBNODEID 0x10
++
+ /*
+  * number of entries in the destination side payload queue
+  */
+@@ -124,7 +126,7 @@
+  * The distribution specification (32 bytes) is interpreted as a 256-bit
+  * distribution vector. Adjacent bits correspond to consecutive even numbered
+  * nodeIDs. The result of adding the index of a given bit to the 15-bit
+- * 'base_dest_nodeid' field of the header corresponds to the
++ * 'base_dest_nasid' field of the header corresponds to the
+  * destination nodeID associated with that specified bit.
+  */
+ struct bau_target_uvhubmask {
+@@ -176,7 +178,7 @@ struct bau_msg_payload {
+ struct bau_msg_header {
+       unsigned int dest_subnodeid:6;  /* must be 0x10, for the LB */
+       /* bits 5:0 */
+-      unsigned int base_dest_nodeid:15; /* nasid of the */
++      unsigned int base_dest_nasid:15; /* nasid of the */
+       /* bits 20:6 */                   /* first bit in uvhub map */
+       unsigned int command:8; /* message type */
+       /* bits 28:21 */
+@@ -378,6 +380,10 @@ struct ptc_stats {
+       unsigned long d_rcanceled; /* number of messages canceled by resets */
+ };
++struct hub_and_pnode {
++      short uvhub;
++      short pnode;
++};
+ /*
+  * one per-cpu; to locate the software tables
+  */
+@@ -399,10 +405,12 @@ struct bau_control {
+       int baudisabled;
+       int set_bau_off;
+       short cpu;
++      short osnode;
+       short uvhub_cpu;
+       short uvhub;
+       short cpus_in_socket;
+       short cpus_in_uvhub;
++      short partition_base_pnode;
+       unsigned short message_number;
+       unsigned short uvhub_quiesce;
+       short socket_acknowledge_count[DEST_Q_SIZE];
+@@ -422,15 +430,16 @@ struct bau_control {
+       int congested_period;
+       cycles_t period_time;
+       long period_requests;
++      struct hub_and_pnode *target_hub_and_pnode;
+ };
+ static inline int bau_uvhub_isset(int uvhub, struct bau_target_uvhubmask *dstp)
+ {
+       return constant_test_bit(uvhub, &dstp->bits[0]);
+ }
+-static inline void bau_uvhub_set(int uvhub, struct bau_target_uvhubmask *dstp)
++static inline void bau_uvhub_set(int pnode, struct bau_target_uvhubmask *dstp)
+ {
+-      __set_bit(uvhub, &dstp->bits[0]);
++      __set_bit(pnode, &dstp->bits[0]);
+ }
+ static inline void bau_uvhubs_clear(struct bau_target_uvhubmask *dstp,
+                                   int nbits)
+--- a/arch/x86/platform/uv/tlb_uv.c
++++ b/arch/x86/platform/uv/tlb_uv.c
+@@ -698,16 +698,17 @@ const struct cpumask *uv_flush_tlb_other
+                                         struct mm_struct *mm,
+                                         unsigned long va, unsigned int cpu)
+ {
+-      int tcpu;
+-      int uvhub;
+       int locals = 0;
+       int remotes = 0;
+       int hubs = 0;
++      int tcpu;
++      int tpnode;
+       struct bau_desc *bau_desc;
+       struct cpumask *flush_mask;
+       struct ptc_stats *stat;
+       struct bau_control *bcp;
+       struct bau_control *tbcp;
++      struct hub_and_pnode *hpp;
+       /* kernel was booted 'nobau' */
+       if (nobau)
+@@ -749,11 +750,18 @@ const struct cpumask *uv_flush_tlb_other
+       bau_desc += UV_ITEMS_PER_DESCRIPTOR * bcp->uvhub_cpu;
+       bau_uvhubs_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE);
+-      /* cpu statistics */
+       for_each_cpu(tcpu, flush_mask) {
+-              uvhub = uv_cpu_to_blade_id(tcpu);
+-              bau_uvhub_set(uvhub, &bau_desc->distribution);
+-              if (uvhub == bcp->uvhub)
++              /*
++               * The distribution vector is a bit map of pnodes, relative
++               * to the partition base pnode (and the partition base nasid
++               * in the header).
++               * Translate cpu to pnode and hub using an array stored
++               * in local memory.
++               */
++              hpp = &bcp->socket_master->target_hub_and_pnode[tcpu];
++              tpnode = hpp->pnode - bcp->partition_base_pnode;
++              bau_uvhub_set(tpnode, &bau_desc->distribution);
++              if (hpp->uvhub == bcp->uvhub)
+                       locals++;
+               else
+                       remotes++;
+@@ -854,7 +862,7 @@ void uv_bau_message_interrupt(struct pt_
+  * an interrupt, but causes an error message to be returned to
+  * the sender.
+  */
+-static void uv_enable_timeouts(void)
++static void __init uv_enable_timeouts(void)
+ {
+       int uvhub;
+       int nuvhubs;
+@@ -1325,10 +1333,10 @@ static int __init uv_ptc_init(void)
+ }
+ /*
+- * initialize the sending side's sending buffers
++ * Initialize the sending side's sending buffers.
+  */
+ static void
+-uv_activation_descriptor_init(int node, int pnode)
++uv_activation_descriptor_init(int node, int pnode, int base_pnode)
+ {
+       int i;
+       int cpu;
+@@ -1351,11 +1359,11 @@ uv_activation_descriptor_init(int node,
+       n = pa >> uv_nshift;
+       m = pa & uv_mmask;
++      /* the 14-bit pnode */
+       uv_write_global_mmr64(pnode, UVH_LB_BAU_SB_DESCRIPTOR_BASE,
+                             (n << UV_DESC_BASE_PNODE_SHIFT | m));
+-
+       /*
+-       * initializing all 8 (UV_ITEMS_PER_DESCRIPTOR) descriptors for each
++       * Initializing all 8 (UV_ITEMS_PER_DESCRIPTOR) descriptors for each
+        * cpu even though we only use the first one; one descriptor can
+        * describe a broadcast to 256 uv hubs.
+        */
+@@ -1364,12 +1372,13 @@ uv_activation_descriptor_init(int node,
+               memset(bd2, 0, sizeof(struct bau_desc));
+               bd2->header.sw_ack_flag = 1;
+               /*
+-               * base_dest_nodeid is the nasid of the first uvhub
+-               * in the partition. The bit map will indicate uvhub numbers,
+-               * which are 0-N in a partition. Pnodes are unique system-wide.
++               * The base_dest_nasid set in the message header is the nasid
++               * of the first uvhub in the partition. The bit map will
++               * indicate destination pnode numbers relative to that base.
++               * They may not be consecutive if nasid striding is being used.
+                */
+-              bd2->header.base_dest_nodeid = UV_PNODE_TO_NASID(uv_partition_base_pnode);
+-              bd2->header.dest_subnodeid = 0x10; /* the LB */
++              bd2->header.base_dest_nasid = UV_PNODE_TO_NASID(base_pnode);
++              bd2->header.dest_subnodeid = UV_LB_SUBNODEID;
+               bd2->header.command = UV_NET_ENDPOINT_INTD;
+               bd2->header.int_both = 1;
+               /*
+@@ -1441,7 +1450,7 @@ uv_payload_queue_init(int node, int pnod
+ /*
+  * Initialization of each UV hub's structures
+  */
+-static void __init uv_init_uvhub(int uvhub, int vector)
++static void __init uv_init_uvhub(int uvhub, int vector, int base_pnode)
+ {
+       int node;
+       int pnode;
+@@ -1449,11 +1458,11 @@ static void __init uv_init_uvhub(int uvh
+       node = uvhub_to_first_node(uvhub);
+       pnode = uv_blade_to_pnode(uvhub);
+-      uv_activation_descriptor_init(node, pnode);
++      uv_activation_descriptor_init(node, pnode, base_pnode);
+       uv_payload_queue_init(node, pnode);
+       /*
+-       * the below initialization can't be in firmware because the
+-       * messaging IRQ will be determined by the OS
++       * The below initialization can't be in firmware because the
++       * messaging IRQ will be determined by the OS.
+        */
+       apicid = uvhub_to_first_apicid(uvhub) | uv_apicid_hibits;
+       uv_write_global_mmr64(pnode, UVH_BAU_DATA_CONFIG,
+@@ -1490,10 +1499,11 @@ calculate_destination_timeout(void)
+ /*
+  * initialize the bau_control structure for each cpu
+  */
+-static int __init uv_init_per_cpu(int nuvhubs)
++static int __init uv_init_per_cpu(int nuvhubs, int base_part_pnode)
+ {
+       int i;
+       int cpu;
++      int tcpu;
+       int pnode;
+       int uvhub;
+       int have_hmaster;
+@@ -1527,6 +1537,15 @@ static int __init uv_init_per_cpu(int nu
+               bcp = &per_cpu(bau_control, cpu);
+               memset(bcp, 0, sizeof(struct bau_control));
+               pnode = uv_cpu_hub_info(cpu)->pnode;
++              if ((pnode - base_part_pnode) >= UV_DISTRIBUTION_SIZE) {
++                      printk(KERN_EMERG
++                              "cpu %d pnode %d-%d beyond %d; BAU disabled\n",
++                              cpu, pnode, base_part_pnode,
++                              UV_DISTRIBUTION_SIZE);
++                      return 1;
++              }
++              bcp->osnode = cpu_to_node(cpu);
++              bcp->partition_base_pnode = uv_partition_base_pnode;
+               uvhub = uv_cpu_hub_info(cpu)->numa_blade_id;
+               *(uvhub_mask + (uvhub/8)) |= (1 << (uvhub%8));
+               bdp = &uvhub_descs[uvhub];
+@@ -1535,7 +1554,7 @@ static int __init uv_init_per_cpu(int nu
+               bdp->pnode = pnode;
+               /* kludge: 'assuming' one node per socket, and assuming that
+                  disabling a socket just leaves a gap in node numbers */
+-              socket = (cpu_to_node(cpu) & 1);
++              socket = bcp->osnode & 1;
+               bdp->socket_mask |= (1 << socket);
+               sdp = &bdp->socket[socket];
+               sdp->cpu_number[sdp->num_cpus] = cpu;
+@@ -1584,6 +1603,20 @@ static int __init uv_init_per_cpu(int nu
+ nextsocket:
+                       socket++;
+                       socket_mask = (socket_mask >> 1);
++                      /* each socket gets a local array of pnodes/hubs */
++                      bcp = smaster;
++                      bcp->target_hub_and_pnode = kmalloc_node(
++                              sizeof(struct hub_and_pnode) *
++                              num_possible_cpus(), GFP_KERNEL, bcp->osnode);
++                      memset(bcp->target_hub_and_pnode, 0,
++                              sizeof(struct hub_and_pnode) *
++                              num_possible_cpus());
++                      for_each_present_cpu(tcpu) {
++                              bcp->target_hub_and_pnode[tcpu].pnode =
++                                      uv_cpu_hub_info(tcpu)->pnode;
++                              bcp->target_hub_and_pnode[tcpu].uvhub =
++                                      uv_cpu_hub_info(tcpu)->numa_blade_id;
++                      }
+               }
+       }
+       kfree(uvhub_descs);
+@@ -1636,21 +1669,22 @@ static int __init uv_bau_init(void)
+       spin_lock_init(&disable_lock);
+       congested_cycles = microsec_2_cycles(congested_response_us);
+-      if (uv_init_per_cpu(nuvhubs)) {
+-              nobau = 1;
+-              return 0;
+-      }
+-
+       uv_partition_base_pnode = 0x7fffffff;
+-      for (uvhub = 0; uvhub < nuvhubs; uvhub++)
++      for (uvhub = 0; uvhub < nuvhubs; uvhub++) {
+               if (uv_blade_nr_possible_cpus(uvhub) &&
+                       (uv_blade_to_pnode(uvhub) < uv_partition_base_pnode))
+                       uv_partition_base_pnode = uv_blade_to_pnode(uvhub);
++      }
++
++      if (uv_init_per_cpu(nuvhubs, uv_partition_base_pnode)) {
++              nobau = 1;
++              return 0;
++      }
+       vector = UV_BAU_MESSAGE;
+       for_each_possible_blade(uvhub)
+               if (uv_blade_nr_possible_cpus(uvhub))
+-                      uv_init_uvhub(uvhub, vector);
++                      uv_init_uvhub(uvhub, vector, uv_partition_base_pnode);
+       uv_enable_timeouts();
+       alloc_intr_gate(vector, uv_bau_message_intr1);
diff --git a/queue-2.6.38/x86-mce-amd-fix-leaving-freed-data-in-a-list.patch b/queue-2.6.38/x86-mce-amd-fix-leaving-freed-data-in-a-list.patch
new file mode 100644 (file)
index 0000000..c8cad64
--- /dev/null
@@ -0,0 +1,53 @@
+From d9a5ac9ef306eb5cc874f285185a15c303c50009 Mon Sep 17 00:00:00 2001
+From: Julia Lawall <julia@diku.dk>
+Date: Fri, 13 May 2011 15:52:09 +0200
+Subject: x86, mce, AMD: Fix leaving freed data in a list
+
+From: Julia Lawall <julia@diku.dk>
+
+commit d9a5ac9ef306eb5cc874f285185a15c303c50009 upstream.
+
+b may be added to a list, but is not removed before being freed
+in the case of an error.  This is done in the corresponding
+deallocation function, so the code here has been changed to
+follow that.
+
+The sematic match that finds this problem is as follows:
+(http://coccinelle.lip6.fr/)
+
+// <smpl>
+@@
+expression E,E1,E2;
+identifier l;
+@@
+
+*list_add(&E->l,E1);
+... when != E1
+    when != list_del(&E->l)
+    when != list_del_init(&E->l)
+    when != E = E2
+*kfree(E);// </smpl>
+
+Signed-off-by: Julia Lawall <julia@diku.dk>
+Cc: Borislav Petkov <borislav.petkov@amd.com>
+Cc: Robert Richter <robert.richter@amd.com>
+Cc: Yinghai Lu <yinghai@kernel.org>
+Cc: Andreas Herrmann <andreas.herrmann3@amd.com>
+Link: http://lkml.kernel.org/r/1305294731-12127-1-git-send-email-julia@diku.dk
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ arch/x86/kernel/cpu/mcheck/mce_amd.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
++++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
+@@ -509,6 +509,7 @@ recurse:
+ out_free:
+       if (b) {
+               kobject_put(&b->kobj);
++              list_del(&b->miscj);
+               kfree(b);
+       }
+       return err;