From: Greg Kroah-Hartman Date: Fri, 3 Sep 2010 22:38:23 +0000 (-0700) Subject: .35 patches X-Git-Tag: v2.6.27.54~38 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=7958dd71d791eea6417d2051da83b93d8cdcabe4;p=thirdparty%2Fkernel%2Fstable-queue.git .35 patches --- diff --git a/queue-2.6.35/alsa-hda-add-sony-vaio-quirk-for-alc269.patch b/queue-2.6.35/alsa-hda-add-sony-vaio-quirk-for-alc269.patch new file mode 100644 index 00000000000..e35f679f294 --- /dev/null +++ b/queue-2.6.35/alsa-hda-add-sony-vaio-quirk-for-alc269.patch @@ -0,0 +1,31 @@ +From dbbcbc073ad3132bfbc410b11546b2fb4bdf2568 Mon Sep 17 00:00:00 2001 +From: David Henningsson +Date: Mon, 23 Aug 2010 08:14:35 +0200 +Subject: ALSA: hda - Add Sony VAIO quirk for ALC269 + +From: David Henningsson + +commit dbbcbc073ad3132bfbc410b11546b2fb4bdf2568 upstream. + +The attached patch enables playback on a Sony VAIO machine. + +BugLink: http://launchpad.net/bugs/618271 + +Signed-off-by: David Henningsson +Signed-off-by: Takashi Iwai +Signed-off-by: Greg Kroah-Hartman + +--- + sound/pci/hda/patch_realtek.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/sound/pci/hda/patch_realtek.c ++++ b/sound/pci/hda/patch_realtek.c +@@ -14244,6 +14244,7 @@ static const struct alc_fixup alc269_fix + + static struct snd_pci_quirk alc269_fixup_tbl[] = { + SND_PCI_QUIRK(0x104d, 0x9071, "Sony VAIO", ALC269_FIXUP_SONY_VAIO), ++ SND_PCI_QUIRK(0x104d, 0x9077, "Sony VAIO", ALC269_FIXUP_SONY_VAIO), + {} + }; + diff --git a/queue-2.6.35/alsa-hda-rename-imic-to-int-mic-on-lenovo-nb0763.patch b/queue-2.6.35/alsa-hda-rename-imic-to-int-mic-on-lenovo-nb0763.patch new file mode 100644 index 00000000000..bf4ba19891e --- /dev/null +++ b/queue-2.6.35/alsa-hda-rename-imic-to-int-mic-on-lenovo-nb0763.patch @@ -0,0 +1,42 @@ +From 150b432f448281d5518f5229d240923f9a9c5459 Mon Sep 17 00:00:00 2001 +From: David Henningsson +Date: Thu, 29 Jul 2010 14:46:42 +0200 +Subject: ALSA: hda - Rename iMic to Int Mic on Lenovo NB0763 + +From: David Henningsson + +commit 150b432f448281d5518f5229d240923f9a9c5459 upstream. + +The non-standard name "iMic" makes PulseAudio ignore the microphone. +BugLink: https://launchpad.net/bugs/605101 + +Signed-off-by: David Henningsson +Signed-off-by: Takashi Iwai +Signed-off-by: Greg Kroah-Hartman + +--- + sound/pci/hda/patch_realtek.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +--- a/sound/pci/hda/patch_realtek.c ++++ b/sound/pci/hda/patch_realtek.c +@@ -7005,7 +7005,7 @@ static struct hda_input_mux alc883_lenov + .num_items = 4, + .items = { + { "Mic", 0x0 }, +- { "iMic", 0x1 }, ++ { "Int Mic", 0x1 }, + { "Line", 0x2 }, + { "CD", 0x4 }, + }, +@@ -8575,8 +8575,8 @@ static struct snd_kcontrol_new alc883_le + HDA_CODEC_MUTE("CD Playback Switch", 0x0b, 0x04, HDA_INPUT), + HDA_CODEC_VOLUME("Mic Playback Volume", 0x0b, 0x0, HDA_INPUT), + HDA_CODEC_MUTE("Mic Playback Switch", 0x0b, 0x0, HDA_INPUT), +- HDA_CODEC_VOLUME("iMic Playback Volume", 0x0b, 0x1, HDA_INPUT), +- HDA_CODEC_MUTE("iMic Playback Switch", 0x0b, 0x1, HDA_INPUT), ++ HDA_CODEC_VOLUME("Int Mic Playback Volume", 0x0b, 0x1, HDA_INPUT), ++ HDA_CODEC_MUTE("Int Mic Playback Switch", 0x0b, 0x1, HDA_INPUT), + { } /* end */ + }; + diff --git a/queue-2.6.35/alsa-hda-use-model-auto-for-lg-r510.patch b/queue-2.6.35/alsa-hda-use-model-auto-for-lg-r510.patch new file mode 100644 index 00000000000..d6b2a1ff148 --- /dev/null +++ b/queue-2.6.35/alsa-hda-use-model-auto-for-lg-r510.patch @@ -0,0 +1,30 @@ +From 81cd3fca642cecb40a1ccef099799dcb5730734b Mon Sep 17 00:00:00 2001 +From: David Henningsson +Date: Tue, 10 Aug 2010 09:18:00 +0200 +Subject: ALSA: HDA: Use model=auto for LG R510 + +From: David Henningsson + +commit 81cd3fca642cecb40a1ccef099799dcb5730734b upstream. + +Two users report model=auto is needed to make the internal mic work properly. +BugLink: https://bugs.launchpad.net/bugs/495134 + +Signed-off-by: David Henningsson +Signed-off-by: Takashi Iwai +Signed-off-by: Greg Kroah-Hartman + +--- + sound/pci/hda/patch_realtek.c | 1 - + 1 file changed, 1 deletion(-) + +--- a/sound/pci/hda/patch_realtek.c ++++ b/sound/pci/hda/patch_realtek.c +@@ -13305,7 +13305,6 @@ static struct snd_pci_quirk alc268_cfg_t + SND_PCI_QUIRK(0x14c0, 0x0025, "COMPAL IFL90/JFL-92", ALC268_TOSHIBA), + SND_PCI_QUIRK(0x152d, 0x0763, "Diverse (CPR2000)", ALC268_ACER), + SND_PCI_QUIRK(0x152d, 0x0771, "Quanta IL1", ALC267_QUANTA_IL1), +- SND_PCI_QUIRK(0x1854, 0x1775, "LG R510", ALC268_DELL), + {} + }; + diff --git a/queue-2.6.35/direct-io-move-aio_complete-into-end_io.patch b/queue-2.6.35/direct-io-move-aio_complete-into-end_io.patch new file mode 100644 index 00000000000..9d0b35ae078 --- /dev/null +++ b/queue-2.6.35/direct-io-move-aio_complete-into-end_io.patch @@ -0,0 +1,204 @@ +From 40e2e97316af6e62affab7a392e792494b8d9dde Mon Sep 17 00:00:00 2001 +From: Christoph Hellwig +Date: Sun, 18 Jul 2010 21:17:09 +0000 +Subject: direct-io: move aio_complete into ->end_io + +From: Christoph Hellwig + +commit 40e2e97316af6e62affab7a392e792494b8d9dde upstream. + +Filesystems with unwritten extent support must not complete an AIO request +until the transaction to convert the extent has been commited. That means +the aio_complete calls needs to be moved into the ->end_io callback so +that the filesystem can control when to call it exactly. + +This makes a bit of a mess out of dio_complete and the ->end_io callback +prototype even more complicated. + +Signed-off-by: Christoph Hellwig +Reviewed-by: Jan Kara +Signed-off-by: Alex Elder +Cc: Chuck Ebbert +Signed-off-by: Greg Kroah-Hartman + +--- + fs/direct-io.c | 26 ++++++++++++++------------ + fs/ext4/inode.c | 10 +++++++--- + fs/ocfs2/aops.c | 7 ++++++- + fs/xfs/linux-2.6/xfs_aops.c | 7 ++++++- + fs/xfs/linux-2.6/xfs_aops.h | 2 ++ + include/linux/fs.h | 3 ++- + 6 files changed, 37 insertions(+), 18 deletions(-) + +--- a/fs/direct-io.c ++++ b/fs/direct-io.c +@@ -218,7 +218,7 @@ static struct page *dio_get_page(struct + * filesystems can use it to hold additional state between get_block calls and + * dio_complete. + */ +-static int dio_complete(struct dio *dio, loff_t offset, int ret) ++static int dio_complete(struct dio *dio, loff_t offset, int ret, bool is_async) + { + ssize_t transferred = 0; + +@@ -239,14 +239,6 @@ static int dio_complete(struct dio *dio, + transferred = dio->i_size - offset; + } + +- if (dio->end_io && dio->result) +- dio->end_io(dio->iocb, offset, transferred, +- dio->map_bh.b_private); +- +- if (dio->flags & DIO_LOCKING) +- /* lockdep: non-owner release */ +- up_read_non_owner(&dio->inode->i_alloc_sem); +- + if (ret == 0) + ret = dio->page_errors; + if (ret == 0) +@@ -254,6 +246,17 @@ static int dio_complete(struct dio *dio, + if (ret == 0) + ret = transferred; + ++ if (dio->end_io && dio->result) { ++ dio->end_io(dio->iocb, offset, transferred, ++ dio->map_bh.b_private, ret, is_async); ++ } else if (is_async) { ++ aio_complete(dio->iocb, ret, 0); ++ } ++ ++ if (dio->flags & DIO_LOCKING) ++ /* lockdep: non-owner release */ ++ up_read_non_owner(&dio->inode->i_alloc_sem); ++ + return ret; + } + +@@ -277,8 +280,7 @@ static void dio_bio_end_aio(struct bio * + spin_unlock_irqrestore(&dio->bio_lock, flags); + + if (remaining == 0) { +- int ret = dio_complete(dio, dio->iocb->ki_pos, 0); +- aio_complete(dio->iocb, ret, 0); ++ dio_complete(dio, dio->iocb->ki_pos, 0, true); + kfree(dio); + } + } +@@ -1126,7 +1128,7 @@ direct_io_worker(int rw, struct kiocb *i + spin_unlock_irqrestore(&dio->bio_lock, flags); + + if (ret2 == 0) { +- ret = dio_complete(dio, offset, ret); ++ ret = dio_complete(dio, offset, ret, false); + kfree(dio); + } else + BUG_ON(ret != -EIOCBQUEUED); +--- a/fs/ext4/inode.c ++++ b/fs/ext4/inode.c +@@ -3775,7 +3775,8 @@ static ext4_io_end_t *ext4_init_io_end ( + } + + static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, +- ssize_t size, void *private) ++ ssize_t size, void *private, int ret, ++ bool is_async) + { + ext4_io_end_t *io_end = iocb->private; + struct workqueue_struct *wq; +@@ -3784,7 +3785,7 @@ static void ext4_end_io_dio(struct kiocb + + /* if not async direct IO or dio with 0 bytes write, just return */ + if (!io_end || !size) +- return; ++ goto out; + + ext_debug("ext4_end_io_dio(): io_end 0x%p" + "for inode %lu, iocb 0x%p, offset %llu, size %llu\n", +@@ -3795,7 +3796,7 @@ static void ext4_end_io_dio(struct kiocb + if (io_end->flag != EXT4_IO_UNWRITTEN){ + ext4_free_io_end(io_end); + iocb->private = NULL; +- return; ++ goto out; + } + + io_end->offset = offset; +@@ -3812,6 +3813,9 @@ static void ext4_end_io_dio(struct kiocb + list_add_tail(&io_end->list, &ei->i_completed_io_list); + spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); + iocb->private = NULL; ++out: ++ if (is_async) ++ aio_complete(iocb, ret, 0); + } + + static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate) +--- a/fs/ocfs2/aops.c ++++ b/fs/ocfs2/aops.c +@@ -578,7 +578,9 @@ bail: + static void ocfs2_dio_end_io(struct kiocb *iocb, + loff_t offset, + ssize_t bytes, +- void *private) ++ void *private, ++ int ret, ++ bool is_async) + { + struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode; + int level; +@@ -592,6 +594,9 @@ static void ocfs2_dio_end_io(struct kioc + if (!level) + up_read(&inode->i_alloc_sem); + ocfs2_rw_unlock(inode, level); ++ ++ if (is_async) ++ aio_complete(iocb, ret, 0); + } + + /* +--- a/fs/xfs/linux-2.6/xfs_aops.c ++++ b/fs/xfs/linux-2.6/xfs_aops.c +@@ -1599,7 +1599,9 @@ xfs_end_io_direct( + struct kiocb *iocb, + loff_t offset, + ssize_t size, +- void *private) ++ void *private, ++ int ret, ++ bool is_async) + { + xfs_ioend_t *ioend = iocb->private; + +@@ -1645,6 +1647,9 @@ xfs_end_io_direct( + * against double-freeing. + */ + iocb->private = NULL; ++ ++ if (is_async) ++ aio_complete(iocb, ret, 0); + } + + STATIC ssize_t +--- a/fs/xfs/linux-2.6/xfs_aops.h ++++ b/fs/xfs/linux-2.6/xfs_aops.h +@@ -37,6 +37,8 @@ typedef struct xfs_ioend { + size_t io_size; /* size of the extent */ + xfs_off_t io_offset; /* offset in the file */ + struct work_struct io_work; /* xfsdatad work queue */ ++ struct kiocb *io_iocb; ++ int io_result; + } xfs_ioend_t; + + extern const struct address_space_operations xfs_address_space_operations; +--- a/include/linux/fs.h ++++ b/include/linux/fs.h +@@ -416,7 +416,8 @@ struct buffer_head; + typedef int (get_block_t)(struct inode *inode, sector_t iblock, + struct buffer_head *bh_result, int create); + typedef void (dio_iodone_t)(struct kiocb *iocb, loff_t offset, +- ssize_t bytes, void *private); ++ ssize_t bytes, void *private, int ret, ++ bool is_async); + + /* + * Attribute flags. These should be or-ed together to figure out what diff --git a/queue-2.6.35/ext4-move-aio-completion-after-unwritten-extent-conversion.patch b/queue-2.6.35/ext4-move-aio-completion-after-unwritten-extent-conversion.patch new file mode 100644 index 00000000000..d38fafbd925 --- /dev/null +++ b/queue-2.6.35/ext4-move-aio-completion-after-unwritten-extent-conversion.patch @@ -0,0 +1,99 @@ +From 5b3ff237bef43b9e7fb7d1eb858e29b73fd664f9 Mon Sep 17 00:00:00 2001 +From: Jiaying Zhang +Date: Tue, 27 Jul 2010 11:56:06 -0400 +Subject: ext4: move aio completion after unwritten extent conversion + +From: Jiaying Zhang + +commit 5b3ff237bef43b9e7fb7d1eb858e29b73fd664f9 upstream. + +This patch is to be applied upon Christoph's "direct-io: move aio_complete +into ->end_io" patch. It adds iocb and result fields to struct ext4_io_end_t, +so that we can call aio_complete from ext4_end_io_nolock() after the extent +conversion has finished. + +I have verified with Christoph's aio-dio test that used to fail after a few +runs on an original kernel but now succeeds on the patched kernel. + +See http://thread.gmane.org/gmane.comp.file-systems.ext4/19659 for details. + +Signed-off-by: Jiaying Zhang +Signed-off-by: "Theodore Ts'o" +Cc: Chuck Ebbert +Signed-off-by: Greg Kroah-Hartman + +--- + fs/ext4/ext4.h | 4 +++- + fs/ext4/inode.c | 17 ++++++++++++----- + 2 files changed, 15 insertions(+), 6 deletions(-) + +--- a/fs/ext4/ext4.h ++++ b/fs/ext4/ext4.h +@@ -167,13 +167,15 @@ struct mpage_da_data { + }; + #define EXT4_IO_UNWRITTEN 0x1 + typedef struct ext4_io_end { +- struct list_head list; /* per-file finished AIO list */ ++ struct list_head list; /* per-file finished IO list */ + struct inode *inode; /* file being written to */ + unsigned int flag; /* unwritten or not */ + struct page *page; /* page struct for buffer write */ + loff_t offset; /* offset in the file */ + ssize_t size; /* size of the extent */ + struct work_struct work; /* data work queue */ ++ struct kiocb *iocb; /* iocb struct for AIO */ ++ int result; /* error value for AIO */ + } ext4_io_end_t; + + /* +--- a/fs/ext4/inode.c ++++ b/fs/ext4/inode.c +@@ -3668,6 +3668,8 @@ static int ext4_end_io_nolock(ext4_io_en + return ret; + } + ++ if (io->iocb) ++ aio_complete(io->iocb, io->result, 0); + /* clear the DIO AIO unwritten flag */ + io->flag = 0; + return ret; +@@ -3767,6 +3769,8 @@ static ext4_io_end_t *ext4_init_io_end ( + io->offset = 0; + io->size = 0; + io->page = NULL; ++ io->iocb = NULL; ++ io->result = 0; + INIT_WORK(&io->work, ext4_end_io_work); + INIT_LIST_HEAD(&io->list); + } +@@ -3796,12 +3800,18 @@ static void ext4_end_io_dio(struct kiocb + if (io_end->flag != EXT4_IO_UNWRITTEN){ + ext4_free_io_end(io_end); + iocb->private = NULL; +- goto out; ++out: ++ if (is_async) ++ aio_complete(iocb, ret, 0); ++ return; + } + + io_end->offset = offset; + io_end->size = size; +- io_end->flag = EXT4_IO_UNWRITTEN; ++ if (is_async) { ++ io_end->iocb = iocb; ++ io_end->result = ret; ++ } + wq = EXT4_SB(io_end->inode->i_sb)->dio_unwritten_wq; + + /* queue the work to convert unwritten extents to written */ +@@ -3813,9 +3823,6 @@ static void ext4_end_io_dio(struct kiocb + list_add_tail(&io_end->list, &ei->i_completed_io_list); + spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); + iocb->private = NULL; +-out: +- if (is_async) +- aio_complete(iocb, ret, 0); + } + + static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate) diff --git a/queue-2.6.35/hwmon-ads7871-fix-ads7871_probe-error-paths.patch b/queue-2.6.35/hwmon-ads7871-fix-ads7871_probe-error-paths.patch new file mode 100644 index 00000000000..98ced106ea5 --- /dev/null +++ b/queue-2.6.35/hwmon-ads7871-fix-ads7871_probe-error-paths.patch @@ -0,0 +1,85 @@ +From c12c507d7185fe4e8ada7ed9832957576eefecf8 Mon Sep 17 00:00:00 2001 +From: Axel Lin +Date: Wed, 25 Aug 2010 15:42:10 +0200 +Subject: hwmon: (ads7871) Fix ads7871_probe error paths + +From: Axel Lin + +commit c12c507d7185fe4e8ada7ed9832957576eefecf8 upstream. + +1. remove 'status' variable +2. remove unneeded initialization of 'err' variable +3. return missing error code if sysfs_create_group fail. +4. fix the init sequence as: + - check hardware existence + - kzalloc for ads7871_data + - sysfs_create_group + - hwmon_device_register + +Signed-off-by: Axel Lin +Signed-off-by: Jean Delvare +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/hwmon/ads7871.c | 38 +++++++++++++++++++------------------- + 1 file changed, 19 insertions(+), 19 deletions(-) + +--- a/drivers/hwmon/ads7871.c ++++ b/drivers/hwmon/ads7871.c +@@ -160,30 +160,12 @@ static const struct attribute_group ads7 + + static int __devinit ads7871_probe(struct spi_device *spi) + { +- int status, ret, err = 0; ++ int ret, err; + uint8_t val; + struct ads7871_data *pdata; + + dev_dbg(&spi->dev, "probe\n"); + +- pdata = kzalloc(sizeof(struct ads7871_data), GFP_KERNEL); +- if (!pdata) { +- err = -ENOMEM; +- goto exit; +- } +- +- status = sysfs_create_group(&spi->dev.kobj, &ads7871_group); +- if (status < 0) +- goto error_free; +- +- pdata->hwmon_dev = hwmon_device_register(&spi->dev); +- if (IS_ERR(pdata->hwmon_dev)) { +- err = PTR_ERR(pdata->hwmon_dev); +- goto error_remove; +- } +- +- spi_set_drvdata(spi, pdata); +- + /* Configure the SPI bus */ + spi->mode = (SPI_MODE_0); + spi->bits_per_word = 8; +@@ -201,6 +183,24 @@ static int __devinit ads7871_probe(struc + we need to make sure we really have a chip*/ + if (val != ret) { + err = -ENODEV; ++ goto exit; ++ } ++ ++ pdata = kzalloc(sizeof(struct ads7871_data), GFP_KERNEL); ++ if (!pdata) { ++ err = -ENOMEM; ++ goto exit; ++ } ++ ++ err = sysfs_create_group(&spi->dev.kobj, &ads7871_group); ++ if (err < 0) ++ goto error_free; ++ ++ spi_set_drvdata(spi, pdata); ++ ++ pdata->hwmon_dev = hwmon_device_register(&spi->dev); ++ if (IS_ERR(pdata->hwmon_dev)) { ++ err = PTR_ERR(pdata->hwmon_dev); + goto error_remove; + } + diff --git a/queue-2.6.35/hwmon-k8temp-differentiate-between-am2-and-asb1.patch b/queue-2.6.35/hwmon-k8temp-differentiate-between-am2-and-asb1.patch new file mode 100644 index 00000000000..bfb3f3f5d51 --- /dev/null +++ b/queue-2.6.35/hwmon-k8temp-differentiate-between-am2-and-asb1.patch @@ -0,0 +1,83 @@ +From a05e93f3b3fc2f53c1d0de3b17019e207c482349 Mon Sep 17 00:00:00 2001 +From: Andreas Herrmann +Date: Wed, 25 Aug 2010 15:42:12 +0200 +Subject: hwmon: (k8temp) Differentiate between AM2 and ASB1 + +From: Andreas Herrmann + +commit a05e93f3b3fc2f53c1d0de3b17019e207c482349 upstream. + +Commit 8bf0223ed515be24de0c671eedaff49e78bebc9c (hwmon, k8temp: Fix +temperature reporting for ASB1 processor revisions) fixed temperature +reporting for ASB1 CPUs. But those CPU models (model 0x6b, 0x6f, 0x7f) +were packaged both as AM2 (desktop) and ASB1 (mobile). Thus the commit +leads to wrong temperature reporting for AM2 CPU parts. + +The solution is to determine the package type for models 0x6b, 0x6f, +0x7f. + +This is done using BrandId from CPUID Fn8000_0001_EBX[15:0]. See +"Constructing the processor Name String" in "Revision Guide for AMD +NPT Family 0Fh Processors" (Rev. 3.46). + +Cc: Rudolf Marek +Reported-by: Vladislav Guberinic +Signed-off-by: Andreas Herrmann +Signed-off-by: Jean Delvare +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/hwmon/k8temp.c | 35 ++++++++++++++++++++++++++++++++--- + 1 file changed, 32 insertions(+), 3 deletions(-) + +--- a/drivers/hwmon/k8temp.c ++++ b/drivers/hwmon/k8temp.c +@@ -143,6 +143,37 @@ static const struct pci_device_id k8temp + + MODULE_DEVICE_TABLE(pci, k8temp_ids); + ++static int __devinit is_rev_g_desktop(u8 model) ++{ ++ u32 brandidx; ++ ++ if (model < 0x69) ++ return 0; ++ ++ if (model == 0xc1 || model == 0x6c || model == 0x7c) ++ return 0; ++ ++ /* ++ * Differentiate between AM2 and ASB1. ++ * See "Constructing the processor Name String" in "Revision ++ * Guide for AMD NPT Family 0Fh Processors" (33610). ++ */ ++ brandidx = cpuid_ebx(0x80000001); ++ brandidx = (brandidx >> 9) & 0x1f; ++ ++ /* Single core */ ++ if ((model == 0x6f || model == 0x7f) && ++ (brandidx == 0x7 || brandidx == 0x9 || brandidx == 0xc)) ++ return 0; ++ ++ /* Dual core */ ++ if (model == 0x6b && ++ (brandidx == 0xb || brandidx == 0xc)) ++ return 0; ++ ++ return 1; ++} ++ + static int __devinit k8temp_probe(struct pci_dev *pdev, + const struct pci_device_id *id) + { +@@ -179,9 +210,7 @@ static int __devinit k8temp_probe(struct + "wrong - check erratum #141\n"); + } + +- if ((model >= 0x69) && +- !(model == 0xc1 || model == 0x6c || model == 0x7c || +- model == 0x6b || model == 0x6f || model == 0x7f)) { ++ if (is_rev_g_desktop(model)) { + /* + * RevG desktop CPUs (i.e. no socket S1G1 or + * ASB1 parts) need additional offset, diff --git a/queue-2.6.35/libata-sff-remove-harmful-bug_on-from-ata_bmdma_qc_issue.patch b/queue-2.6.35/libata-sff-remove-harmful-bug_on-from-ata_bmdma_qc_issue.patch new file mode 100644 index 00000000000..45c8ca78a29 --- /dev/null +++ b/queue-2.6.35/libata-sff-remove-harmful-bug_on-from-ata_bmdma_qc_issue.patch @@ -0,0 +1,37 @@ +From 55ee67f837882f28a900705a2ca1af257ab6c53d Mon Sep 17 00:00:00 2001 +From: Mark Lord +Date: Fri, 20 Aug 2010 10:13:16 -0400 +Subject: libata-sff: remove harmful BUG_ON from ata_bmdma_qc_issue + +From: Mark Lord + +commit 55ee67f837882f28a900705a2ca1af257ab6c53d upstream. + +Remove harmful BUG_ON() from ata_bmdma_qc_issue(), +as it casts too wide of a net and breaks sata_mv. +It also crashes the kernel while doing the BUG_ON(). + +There's already a WARN_ON_ONCE() further down to catch +the case of POLLING for a BMDMA operation. + +Signed-off-by: Mark Lord +Signed-off-by: Jeff Garzik +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/ata/libata-sff.c | 4 ---- + 1 file changed, 4 deletions(-) + +--- a/drivers/ata/libata-sff.c ++++ b/drivers/ata/libata-sff.c +@@ -2735,10 +2735,6 @@ unsigned int ata_bmdma_qc_issue(struct a + { + struct ata_port *ap = qc->ap; + +- /* see ata_dma_blacklisted() */ +- BUG_ON((ap->flags & ATA_FLAG_PIO_POLLING) && +- qc->tf.protocol == ATAPI_PROT_DMA); +- + /* defer PIO handling to sff_qc_issue */ + if (!ata_is_dma(qc->tf.protocol)) + return ata_sff_qc_issue(qc); diff --git a/queue-2.6.35/netfilter-fix-config_compat-support.patch b/queue-2.6.35/netfilter-fix-config_compat-support.patch new file mode 100644 index 00000000000..82ca81a4fb5 --- /dev/null +++ b/queue-2.6.35/netfilter-fix-config_compat-support.patch @@ -0,0 +1,68 @@ +From cca77b7c81876d819a5806f408b3c29b5b61a815 Mon Sep 17 00:00:00 2001 +From: Florian Westphal +Date: Mon, 23 Aug 2010 14:41:22 -0700 +Subject: netfilter: fix CONFIG_COMPAT support + +From: Florian Westphal + +commit cca77b7c81876d819a5806f408b3c29b5b61a815 upstream. + +commit f3c5c1bfd430858d3a05436f82c51e53104feb6b +(netfilter: xtables: make ip_tables reentrant) forgot to +also compute the jumpstack size in the compat handlers. + +Result is that "iptables -I INPUT -j userchain" turns into -j DROP. + +Reported by Sebastian Roesner on #netfilter, closes +http://bugzilla.netfilter.org/show_bug.cgi?id=669. + +Note: arptables change is compile-tested only. + +Signed-off-by: Florian Westphal +Acked-by: Eric Dumazet +Tested-by: Mikael Pettersson +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman + +--- + net/ipv4/netfilter/arp_tables.c | 3 +++ + net/ipv4/netfilter/ip_tables.c | 3 +++ + net/ipv6/netfilter/ip6_tables.c | 3 +++ + 3 files changed, 9 insertions(+) + +--- a/net/ipv4/netfilter/arp_tables.c ++++ b/net/ipv4/netfilter/arp_tables.c +@@ -1420,6 +1420,9 @@ static int translate_compat_table(const + if (ret != 0) + break; + ++i; ++ if (strcmp(arpt_get_target(iter1)->u.user.name, ++ XT_ERROR_TARGET) == 0) ++ ++newinfo->stacksize; + } + if (ret) { + /* +--- a/net/ipv4/netfilter/ip_tables.c ++++ b/net/ipv4/netfilter/ip_tables.c +@@ -1747,6 +1747,9 @@ translate_compat_table(struct net *net, + if (ret != 0) + break; + ++i; ++ if (strcmp(ipt_get_target(iter1)->u.user.name, ++ XT_ERROR_TARGET) == 0) ++ ++newinfo->stacksize; + } + if (ret) { + /* +--- a/net/ipv6/netfilter/ip6_tables.c ++++ b/net/ipv6/netfilter/ip6_tables.c +@@ -1765,6 +1765,9 @@ translate_compat_table(struct net *net, + if (ret != 0) + break; + ++i; ++ if (strcmp(ip6t_get_target(iter1)->u.user.name, ++ XT_ERROR_TARGET) == 0) ++ ++newinfo->stacksize; + } + if (ret) { + /* diff --git a/queue-2.6.35/pata_cmd64x-revert-commit-d62f5576.patch b/queue-2.6.35/pata_cmd64x-revert-commit-d62f5576.patch new file mode 100644 index 00000000000..d9042ae840f --- /dev/null +++ b/queue-2.6.35/pata_cmd64x-revert-commit-d62f5576.patch @@ -0,0 +1,43 @@ +From aba8a08ded89a74f1ba04ae94ecc98f26e27d41c Mon Sep 17 00:00:00 2001 +From: Tejun Heo +Date: Tue, 17 Aug 2010 14:13:42 +0200 +Subject: pata_cmd64x: revert commit d62f5576 + +From: Tejun Heo + +commit aba8a08ded89a74f1ba04ae94ecc98f26e27d41c upstream. + +Commit d62f5576 (pata_cmd64x: fix handling of address setup timings) +incorrectly called ata_timing_compute() on UDMA mode on 0 @UT leading +to devide by zero fault. Revert it until better fix is available. +This is reported in bko#16607 by Milan Kocian who also root caused it. + + https://bugzilla.kernel.org/show_bug.cgi?id=16607 + +Signed-off-by: Tejun Heo +Reported-and-root-caused-by: Milan Kocian +Cc: Bartlomiej Zolnierkiewicz +Signed-off-by: Jeff Garzik +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/ata/pata_cmd64x.c | 6 ------ + 1 file changed, 6 deletions(-) + +--- a/drivers/ata/pata_cmd64x.c ++++ b/drivers/ata/pata_cmd64x.c +@@ -121,14 +121,8 @@ static void cmd64x_set_timing(struct ata + + if (pair) { + struct ata_timing tp; +- + ata_timing_compute(pair, pair->pio_mode, &tp, T, 0); + ata_timing_merge(&t, &tp, &t, ATA_TIMING_SETUP); +- if (pair->dma_mode) { +- ata_timing_compute(pair, pair->dma_mode, +- &tp, T, 0); +- ata_timing_merge(&tp, &t, &t, ATA_TIMING_SETUP); +- } + } + } + diff --git a/queue-2.6.35/pci-msi-remove-unsafe-and-unnecessary-hardware-access.patch b/queue-2.6.35/pci-msi-remove-unsafe-and-unnecessary-hardware-access.patch new file mode 100644 index 00000000000..1fc1af7cff0 --- /dev/null +++ b/queue-2.6.35/pci-msi-remove-unsafe-and-unnecessary-hardware-access.patch @@ -0,0 +1,86 @@ +From fcd097f31a6ee207cc0c3da9cccd2a86d4334785 Mon Sep 17 00:00:00 2001 +From: Ben Hutchings +Date: Thu, 17 Jun 2010 20:16:36 +0100 +Subject: PCI: MSI: Remove unsafe and unnecessary hardware access + +From: Ben Hutchings + +commit fcd097f31a6ee207cc0c3da9cccd2a86d4334785 upstream. + +During suspend on an SMP system, {read,write}_msi_msg_desc() may be +called to mask and unmask interrupts on a device that is already in a +reduced power state. At this point memory-mapped registers including +MSI-X tables are not accessible, and config space may not be fully +functional either. + +While a device is in a reduced power state its interrupts are +effectively masked and its MSI(-X) state will be restored when it is +brought back to D0. Therefore these functions can simply read and +write msi_desc::msg for devices not in D0. + +Further, read_msi_msg_desc() should only ever be used to update a +previously written message, so it can always read msi_desc::msg +and never needs to touch the hardware. + +Tested-by: "Michael Chan" +Signed-off-by: Ben Hutchings +Signed-off-by: Jesse Barnes +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/pci/msi.c | 36 ++++++++++++------------------------ + 1 file changed, 12 insertions(+), 24 deletions(-) + +--- a/drivers/pci/msi.c ++++ b/drivers/pci/msi.c +@@ -196,30 +196,15 @@ void unmask_msi_irq(unsigned int irq) + void read_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg) + { + struct msi_desc *entry = get_irq_desc_msi(desc); +- if (entry->msi_attrib.is_msix) { +- void __iomem *base = entry->mask_base + +- entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE; + +- msg->address_lo = readl(base + PCI_MSIX_ENTRY_LOWER_ADDR); +- msg->address_hi = readl(base + PCI_MSIX_ENTRY_UPPER_ADDR); +- msg->data = readl(base + PCI_MSIX_ENTRY_DATA); +- } else { +- struct pci_dev *dev = entry->dev; +- int pos = entry->msi_attrib.pos; +- u16 data; +- +- pci_read_config_dword(dev, msi_lower_address_reg(pos), +- &msg->address_lo); +- if (entry->msi_attrib.is_64) { +- pci_read_config_dword(dev, msi_upper_address_reg(pos), +- &msg->address_hi); +- pci_read_config_word(dev, msi_data_reg(pos, 1), &data); +- } else { +- msg->address_hi = 0; +- pci_read_config_word(dev, msi_data_reg(pos, 0), &data); +- } +- msg->data = data; +- } ++ /* We do not touch the hardware (which may not even be ++ * accessible at the moment) but return the last message ++ * written. Assert that this is valid, assuming that ++ * valid messages are not all-zeroes. */ ++ BUG_ON(!(entry->msg.address_hi | entry->msg.address_lo | ++ entry->msg.data)); ++ ++ *msg = entry->msg; + } + + void read_msi_msg(unsigned int irq, struct msi_msg *msg) +@@ -232,7 +217,10 @@ void read_msi_msg(unsigned int irq, stru + void write_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg) + { + struct msi_desc *entry = get_irq_desc_msi(desc); +- if (entry->msi_attrib.is_msix) { ++ ++ if (entry->dev->current_state != PCI_D0) { ++ /* Don't touch the hardware now */ ++ } else if (entry->msi_attrib.is_msix) { + void __iomem *base; + base = entry->mask_base + + entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE; diff --git a/queue-2.6.35/pci-msi-restore-read_msi_msg_desc-add-get_cached_msi_msg_desc.patch b/queue-2.6.35/pci-msi-restore-read_msi_msg_desc-add-get_cached_msi_msg_desc.patch new file mode 100644 index 00000000000..1077645b5c4 --- /dev/null +++ b/queue-2.6.35/pci-msi-restore-read_msi_msg_desc-add-get_cached_msi_msg_desc.patch @@ -0,0 +1,148 @@ +From 30da55242818a8ca08583188ebcbaccd283ad4d9 Mon Sep 17 00:00:00 2001 +From: Ben Hutchings +Date: Fri, 23 Jul 2010 14:56:28 +0100 +Subject: PCI: MSI: Restore read_msi_msg_desc(); add get_cached_msi_msg_desc() + +From: Ben Hutchings + +commit 30da55242818a8ca08583188ebcbaccd283ad4d9 upstream. + +commit 2ca1af9aa3285c6a5f103ed31ad09f7399fc65d7 "PCI: MSI: Remove +unsafe and unnecessary hardware access" changed read_msi_msg_desc() to +return the last MSI message written instead of reading it from the +device, since it may be called while the device is in a reduced +power state. + +However, the pSeries platform code really does need to read messages +from the device, since they are initially written by firmware. +Therefore: +- Restore the previous behaviour of read_msi_msg_desc() +- Add new functions get_cached_msi_msg{,_desc}() which return the + last MSI message written +- Use the new functions where appropriate + +Acked-by: Michael Ellerman +Signed-off-by: Ben Hutchings +Signed-off-by: Jesse Barnes +Signed-off-by: Greg Kroah-Hartman + +--- + arch/ia64/kernel/msi_ia64.c | 2 - + arch/ia64/sn/kernel/msi_sn.c | 2 - + arch/x86/kernel/apic/io_apic.c | 2 - + drivers/pci/msi.c | 47 ++++++++++++++++++++++++++++++++++++----- + include/linux/msi.h | 2 + + 5 files changed, 47 insertions(+), 8 deletions(-) + +--- a/arch/ia64/kernel/msi_ia64.c ++++ b/arch/ia64/kernel/msi_ia64.c +@@ -25,7 +25,7 @@ static int ia64_set_msi_irq_affinity(uns + if (irq_prepare_move(irq, cpu)) + return -1; + +- read_msi_msg(irq, &msg); ++ get_cached_msi_msg(irq, &msg); + + addr = msg.address_lo; + addr &= MSI_ADDR_DEST_ID_MASK; +--- a/arch/ia64/sn/kernel/msi_sn.c ++++ b/arch/ia64/sn/kernel/msi_sn.c +@@ -175,7 +175,7 @@ static int sn_set_msi_irq_affinity(unsig + * Release XIO resources for the old MSI PCI address + */ + +- read_msi_msg(irq, &msg); ++ get_cached_msi_msg(irq, &msg); + sn_pdev = (struct pcidev_info *)sn_irq_info->irq_pciioinfo; + pdev = sn_pdev->pdi_linux_pcidev; + provider = SN_PCIDEV_BUSPROVIDER(pdev); +--- a/arch/x86/kernel/apic/io_apic.c ++++ b/arch/x86/kernel/apic/io_apic.c +@@ -3399,7 +3399,7 @@ static int set_msi_irq_affinity(unsigned + + cfg = desc->chip_data; + +- read_msi_msg_desc(desc, &msg); ++ get_cached_msi_msg_desc(desc, &msg); + + msg.data &= ~MSI_DATA_VECTOR_MASK; + msg.data |= MSI_DATA_VECTOR(cfg->vector); +--- a/drivers/pci/msi.c ++++ b/drivers/pci/msi.c +@@ -197,9 +197,46 @@ void read_msi_msg_desc(struct irq_desc * + { + struct msi_desc *entry = get_irq_desc_msi(desc); + +- /* We do not touch the hardware (which may not even be +- * accessible at the moment) but return the last message +- * written. Assert that this is valid, assuming that ++ BUG_ON(entry->dev->current_state != PCI_D0); ++ ++ if (entry->msi_attrib.is_msix) { ++ void __iomem *base = entry->mask_base + ++ entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE; ++ ++ msg->address_lo = readl(base + PCI_MSIX_ENTRY_LOWER_ADDR); ++ msg->address_hi = readl(base + PCI_MSIX_ENTRY_UPPER_ADDR); ++ msg->data = readl(base + PCI_MSIX_ENTRY_DATA); ++ } else { ++ struct pci_dev *dev = entry->dev; ++ int pos = entry->msi_attrib.pos; ++ u16 data; ++ ++ pci_read_config_dword(dev, msi_lower_address_reg(pos), ++ &msg->address_lo); ++ if (entry->msi_attrib.is_64) { ++ pci_read_config_dword(dev, msi_upper_address_reg(pos), ++ &msg->address_hi); ++ pci_read_config_word(dev, msi_data_reg(pos, 1), &data); ++ } else { ++ msg->address_hi = 0; ++ pci_read_config_word(dev, msi_data_reg(pos, 0), &data); ++ } ++ msg->data = data; ++ } ++} ++ ++void read_msi_msg(unsigned int irq, struct msi_msg *msg) ++{ ++ struct irq_desc *desc = irq_to_desc(irq); ++ ++ read_msi_msg_desc(desc, msg); ++} ++ ++void get_cached_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg) ++{ ++ struct msi_desc *entry = get_irq_desc_msi(desc); ++ ++ /* Assert that the cache is valid, assuming that + * valid messages are not all-zeroes. */ + BUG_ON(!(entry->msg.address_hi | entry->msg.address_lo | + entry->msg.data)); +@@ -207,11 +244,11 @@ void read_msi_msg_desc(struct irq_desc * + *msg = entry->msg; + } + +-void read_msi_msg(unsigned int irq, struct msi_msg *msg) ++void get_cached_msi_msg(unsigned int irq, struct msi_msg *msg) + { + struct irq_desc *desc = irq_to_desc(irq); + +- read_msi_msg_desc(desc, msg); ++ get_cached_msi_msg_desc(desc, msg); + } + + void write_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg) +--- a/include/linux/msi.h ++++ b/include/linux/msi.h +@@ -14,8 +14,10 @@ struct irq_desc; + extern void mask_msi_irq(unsigned int irq); + extern void unmask_msi_irq(unsigned int irq); + extern void read_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg); ++extern void get_cached_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg); + extern void write_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg); + extern void read_msi_msg(unsigned int irq, struct msi_msg *msg); ++extern void get_cached_msi_msg(unsigned int irq, struct msi_msg *msg); + extern void write_msi_msg(unsigned int irq, struct msi_msg *msg); + + struct msi_desc { diff --git a/queue-2.6.35/perf-x86-pentium4-clear-the-p4_cccr_force_ovf-flag.patch b/queue-2.6.35/perf-x86-pentium4-clear-the-p4_cccr_force_ovf-flag.patch new file mode 100644 index 00000000000..b83edd1b35f --- /dev/null +++ b/queue-2.6.35/perf-x86-pentium4-clear-the-p4_cccr_force_ovf-flag.patch @@ -0,0 +1,35 @@ +From 8d330919927ea31fa083b5a80084dc991da813a0 Mon Sep 17 00:00:00 2001 +From: Lin Ming +Date: Wed, 25 Aug 2010 21:06:32 +0000 +Subject: perf, x86, Pentium4: Clear the P4_CCCR_FORCE_OVF flag + +From: Lin Ming + +commit 8d330919927ea31fa083b5a80084dc991da813a0 upstream. + +If on Pentium4 CPUs the FORCE_OVF flag is set then an NMI happens +on every event, which can generate a flood of NMIs. Clear it. + +Reported-by: Vince Weaver +Signed-off-by: Lin Ming +Signed-off-by: Cyrill Gorcunov +Cc: Frederic Weisbecker +Cc: Peter Zijlstra +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/kernel/cpu/perf_event_p4.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/arch/x86/kernel/cpu/perf_event_p4.c ++++ b/arch/x86/kernel/cpu/perf_event_p4.c +@@ -457,6 +457,8 @@ static int p4_hw_config(struct perf_even + event->hw.config |= event->attr.config & + (p4_config_pack_escr(P4_ESCR_MASK_HT) | + p4_config_pack_cccr(P4_CCCR_MASK_HT)); ++ ++ event->hw.config &= ~P4_CCCR_FORCE_OVF; + } + + rc = x86_setup_perfctr(event); diff --git a/queue-2.6.35/sata_mv-fix-broken-dsm-trim-support-v2.patch b/queue-2.6.35/sata_mv-fix-broken-dsm-trim-support-v2.patch new file mode 100644 index 00000000000..76dbd67df8b --- /dev/null +++ b/queue-2.6.35/sata_mv-fix-broken-dsm-trim-support-v2.patch @@ -0,0 +1,117 @@ +From 44b733809a5aba7f6b15a548d31a56d25bf3851c Mon Sep 17 00:00:00 2001 +From: Mark Lord +Date: Thu, 19 Aug 2010 21:40:44 -0400 +Subject: sata_mv: fix broken DSM/TRIM support (v2) + +From: Mark Lord + +commit 44b733809a5aba7f6b15a548d31a56d25bf3851c upstream. + +Fix DSM/TRIM commands in sata_mv (v2). +These need to be issued using old-school "BM DMA", +rather than via the EDMA host queue. + +Since the chips don't have proper BM DMA status, +we need to be more careful with setting the ATA_DMA_INTR bit, +since DSM/TRIM often has a long delay between "DMA complete" +and "command complete". + +GEN_I chips don't have BM DMA, so no TRIM for them. + +Signed-off-by: Mark Lord +Signed-off-by: Jeff Garzik +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/ata/sata_mv.c | 44 +++++++++++++++++++++++++++++++++++++------- + 1 file changed, 37 insertions(+), 7 deletions(-) + +--- a/drivers/ata/sata_mv.c ++++ b/drivers/ata/sata_mv.c +@@ -1898,19 +1898,25 @@ static void mv_bmdma_start(struct ata_qu + * LOCKING: + * Inherited from caller. + */ +-static void mv_bmdma_stop(struct ata_queued_cmd *qc) ++static void mv_bmdma_stop_ap(struct ata_port *ap) + { +- struct ata_port *ap = qc->ap; + void __iomem *port_mmio = mv_ap_base(ap); + u32 cmd; + + /* clear start/stop bit */ + cmd = readl(port_mmio + BMDMA_CMD); +- cmd &= ~ATA_DMA_START; +- writelfl(cmd, port_mmio + BMDMA_CMD); ++ if (cmd & ATA_DMA_START) { ++ cmd &= ~ATA_DMA_START; ++ writelfl(cmd, port_mmio + BMDMA_CMD); + +- /* one-PIO-cycle guaranteed wait, per spec, for HDMA1:0 transition */ +- ata_sff_dma_pause(ap); ++ /* one-PIO-cycle guaranteed wait, per spec, for HDMA1:0 transition */ ++ ata_sff_dma_pause(ap); ++ } ++} ++ ++static void mv_bmdma_stop(struct ata_queued_cmd *qc) ++{ ++ mv_bmdma_stop_ap(qc->ap); + } + + /** +@@ -1934,8 +1940,21 @@ static u8 mv_bmdma_status(struct ata_por + reg = readl(port_mmio + BMDMA_STATUS); + if (reg & ATA_DMA_ACTIVE) + status = ATA_DMA_ACTIVE; +- else ++ else if (reg & ATA_DMA_ERR) + status = (reg & ATA_DMA_ERR) | ATA_DMA_INTR; ++ else { ++ /* ++ * Just because DMA_ACTIVE is 0 (DMA completed), ++ * this does _not_ mean the device is "done". ++ * So we should not yet be signalling ATA_DMA_INTR ++ * in some cases. Eg. DSM/TRIM, and perhaps others. ++ */ ++ mv_bmdma_stop_ap(ap); ++ if (ioread8(ap->ioaddr.altstatus_addr) & ATA_BUSY) ++ status = 0; ++ else ++ status = ATA_DMA_INTR; ++ } + return status; + } + +@@ -1995,6 +2014,9 @@ static void mv_qc_prep(struct ata_queued + + switch (tf->protocol) { + case ATA_PROT_DMA: ++ if (tf->command == ATA_CMD_DSM) ++ return; ++ /* fall-thru */ + case ATA_PROT_NCQ: + break; /* continue below */ + case ATA_PROT_PIO: +@@ -2094,6 +2116,8 @@ static void mv_qc_prep_iie(struct ata_qu + if ((tf->protocol != ATA_PROT_DMA) && + (tf->protocol != ATA_PROT_NCQ)) + return; ++ if (tf->command == ATA_CMD_DSM) ++ return; /* use bmdma for this */ + + /* Fill in Gen IIE command request block */ + if (!(tf->flags & ATA_TFLAG_WRITE)) +@@ -2289,6 +2313,12 @@ static unsigned int mv_qc_issue(struct a + + switch (qc->tf.protocol) { + case ATA_PROT_DMA: ++ if (qc->tf.command == ATA_CMD_DSM) { ++ if (!ap->ops->bmdma_setup) /* no bmdma on GEN_I */ ++ return AC_ERR_OTHER; ++ break; /* use bmdma for this */ ++ } ++ /* fall thru */ + case ATA_PROT_NCQ: + mv_start_edma(ap, port_mmio, pp, qc->tf.protocol); + pp->req_idx = (pp->req_idx + 1) & MV_MAX_Q_DEPTH_MASK; diff --git a/queue-2.6.35/writeback-write_cache_pages-doesn-t-terminate-at-nr_to_write-0.patch b/queue-2.6.35/writeback-write_cache_pages-doesn-t-terminate-at-nr_to_write-0.patch new file mode 100644 index 00000000000..f90f74e049a --- /dev/null +++ b/queue-2.6.35/writeback-write_cache_pages-doesn-t-terminate-at-nr_to_write-0.patch @@ -0,0 +1,71 @@ +From 546a1924224078c6f582e68f890b05b387b42653 Mon Sep 17 00:00:00 2001 +From: Dave Chinner +Date: Tue, 24 Aug 2010 11:44:34 +1000 +Subject: writeback: write_cache_pages doesn't terminate at nr_to_write <= 0 + +From: Dave Chinner + +commit 546a1924224078c6f582e68f890b05b387b42653 upstream. + +I noticed XFS writeback in 2.6.36-rc1 was much slower than it should have +been. Enabling writeback tracing showed: + + flush-253:16-8516 [007] 1342952.351608: wbc_writepage: bdi 253:16: towrt=1024 skip=0 mode=0 kupd=0 bgrd=1 reclm=0 cyclic=1 more=0 older=0x0 start=0x0 end=0x0 + flush-253:16-8516 [007] 1342952.351654: wbc_writepage: bdi 253:16: towrt=1023 skip=0 mode=0 kupd=0 bgrd=1 reclm=0 cyclic=1 more=0 older=0x0 start=0x0 end=0x0 + flush-253:16-8516 [000] 1342952.369520: wbc_writepage: bdi 253:16: towrt=0 skip=0 mode=0 kupd=0 bgrd=1 reclm=0 cyclic=1 more=0 older=0x0 start=0x0 end=0x0 + flush-253:16-8516 [000] 1342952.369542: wbc_writepage: bdi 253:16: towrt=-1 skip=0 mode=0 kupd=0 bgrd=1 reclm=0 cyclic=1 more=0 older=0x0 start=0x0 end=0x0 + flush-253:16-8516 [000] 1342952.369549: wbc_writepage: bdi 253:16: towrt=-2 skip=0 mode=0 kupd=0 bgrd=1 reclm=0 cyclic=1 more=0 older=0x0 start=0x0 end=0x0 + +Writeback is not terminating in background writeback if ->writepage is +returning with wbc->nr_to_write == 0, resulting in sub-optimal single page +writeback on XFS. + +Fix the write_cache_pages loop to terminate correctly when this situation +occurs and so prevent this sub-optimal background writeback pattern. This +improves sustained sequential buffered write performance from around +250MB/s to 750MB/s for a 100GB file on an XFS filesystem on my 8p test VM. + +Signed-off-by: Dave Chinner +Reviewed-by: Wu Fengguang +Reviewed-by: Christoph Hellwig +Signed-off-by: Greg Kroah-Hartman + +--- + mm/page-writeback.c | 26 ++++++++++---------------- + 1 file changed, 10 insertions(+), 16 deletions(-) + +--- a/mm/page-writeback.c ++++ b/mm/page-writeback.c +@@ -949,22 +949,16 @@ continue_unlock: + } + } + +- if (wbc->nr_to_write > 0) { +- if (--wbc->nr_to_write == 0 && +- wbc->sync_mode == WB_SYNC_NONE) { +- /* +- * We stop writing back only if we are +- * not doing integrity sync. In case of +- * integrity sync we have to keep going +- * because someone may be concurrently +- * dirtying pages, and we might have +- * synced a lot of newly appeared dirty +- * pages, but have not synced all of the +- * old dirty pages. +- */ +- done = 1; +- break; +- } ++ /* ++ * We stop writing back only if we are not doing ++ * integrity sync. In case of integrity sync we have to ++ * keep going until we have written all the pages ++ * we tagged for writeback prior to entering this loop. ++ */ ++ if (--wbc->nr_to_write <= 0 && ++ wbc->sync_mode == WB_SYNC_NONE) { ++ done = 1; ++ break; + } + } + pagevec_release(&pvec); diff --git a/queue-2.6.35/x86-tsc-sched-recompute-cyc2ns_offset-s-during-resume-from-sleep-states.patch b/queue-2.6.35/x86-tsc-sched-recompute-cyc2ns_offset-s-during-resume-from-sleep-states.patch new file mode 100644 index 00000000000..a4706b8364d --- /dev/null +++ b/queue-2.6.35/x86-tsc-sched-recompute-cyc2ns_offset-s-during-resume-from-sleep-states.patch @@ -0,0 +1,115 @@ +From cd7240c0b900eb6d690ccee088a6c9b46dae815a Mon Sep 17 00:00:00 2001 +From: Suresh Siddha +Date: Thu, 19 Aug 2010 17:03:38 -0700 +Subject: x86, tsc, sched: Recompute cyc2ns_offset's during resume from sleep states + +From: Suresh Siddha + +commit cd7240c0b900eb6d690ccee088a6c9b46dae815a upstream. + +TSC's get reset after suspend/resume (even on cpu's with invariant TSC +which runs at a constant rate across ACPI P-, C- and T-states). And in +some systems BIOS seem to reinit TSC to arbitrary large value (still +sync'd across cpu's) during resume. + +This leads to a scenario of scheduler rq->clock (sched_clock_cpu()) less +than rq->age_stamp (introduced in 2.6.32). This leads to a big value +returned by scale_rt_power() and the resulting big group power set by the +update_group_power() is causing improper load balancing between busy and +idle cpu's after suspend/resume. + +This resulted in multi-threaded workloads (like kernel-compilation) go +slower after suspend/resume cycle on core i5 laptops. + +Fix this by recomputing cyc2ns_offset's during resume, so that +sched_clock() continues from the point where it was left off during +suspend. + +Reported-by: Florian Pritz +Signed-off-by: Suresh Siddha +Signed-off-by: Peter Zijlstra +LKML-Reference: <1282262618.2675.24.camel@sbsiddha-MOBL3.sc.intel.com> +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/include/asm/tsc.h | 2 ++ + arch/x86/kernel/tsc.c | 38 ++++++++++++++++++++++++++++++++++++++ + arch/x86/power/cpu.c | 2 ++ + 3 files changed, 42 insertions(+) + +--- a/arch/x86/include/asm/tsc.h ++++ b/arch/x86/include/asm/tsc.h +@@ -59,5 +59,7 @@ extern void check_tsc_sync_source(int cp + extern void check_tsc_sync_target(void); + + extern int notsc_setup(char *); ++extern void save_sched_clock_state(void); ++extern void restore_sched_clock_state(void); + + #endif /* _ASM_X86_TSC_H */ +--- a/arch/x86/kernel/tsc.c ++++ b/arch/x86/kernel/tsc.c +@@ -626,6 +626,44 @@ static void set_cyc2ns_scale(unsigned lo + local_irq_restore(flags); + } + ++static unsigned long long cyc2ns_suspend; ++ ++void save_sched_clock_state(void) ++{ ++ if (!sched_clock_stable) ++ return; ++ ++ cyc2ns_suspend = sched_clock(); ++} ++ ++/* ++ * Even on processors with invariant TSC, TSC gets reset in some the ++ * ACPI system sleep states. And in some systems BIOS seem to reinit TSC to ++ * arbitrary value (still sync'd across cpu's) during resume from such sleep ++ * states. To cope up with this, recompute the cyc2ns_offset for each cpu so ++ * that sched_clock() continues from the point where it was left off during ++ * suspend. ++ */ ++void restore_sched_clock_state(void) ++{ ++ unsigned long long offset; ++ unsigned long flags; ++ int cpu; ++ ++ if (!sched_clock_stable) ++ return; ++ ++ local_irq_save(flags); ++ ++ get_cpu_var(cyc2ns_offset) = 0; ++ offset = cyc2ns_suspend - sched_clock(); ++ ++ for_each_possible_cpu(cpu) ++ per_cpu(cyc2ns_offset, cpu) = offset; ++ ++ local_irq_restore(flags); ++} ++ + #ifdef CONFIG_CPU_FREQ + + /* Frequency scaling support. Adjust the TSC based timer when the cpu frequency +--- a/arch/x86/power/cpu.c ++++ b/arch/x86/power/cpu.c +@@ -113,6 +113,7 @@ static void __save_processor_state(struc + void save_processor_state(void) + { + __save_processor_state(&saved_context); ++ save_sched_clock_state(); + } + #ifdef CONFIG_X86_32 + EXPORT_SYMBOL(save_processor_state); +@@ -229,6 +230,7 @@ static void __restore_processor_state(st + void restore_processor_state(void) + { + __restore_processor_state(&saved_context); ++ restore_sched_clock_state(); + } + #ifdef CONFIG_X86_32 + EXPORT_SYMBOL(restore_processor_state); diff --git a/queue-2.6.35/xen-handle-events-as-edge-triggered.patch b/queue-2.6.35/xen-handle-events-as-edge-triggered.patch new file mode 100644 index 00000000000..dd06bbfb86e --- /dev/null +++ b/queue-2.6.35/xen-handle-events-as-edge-triggered.patch @@ -0,0 +1,44 @@ +From dffe2e1e1a1ddb566a76266136c312801c66dcf7 Mon Sep 17 00:00:00 2001 +From: Jeremy Fitzhardinge +Date: Fri, 20 Aug 2010 19:10:01 -0700 +Subject: xen: handle events as edge-triggered + +From: Jeremy Fitzhardinge + +commit dffe2e1e1a1ddb566a76266136c312801c66dcf7 upstream. + +Xen events are logically edge triggered, as Xen only calls the event +upcall when an event is newly set, but not continuously as it remains set. +As a result, use handle_edge_irq rather than handle_level_irq. + +This has the important side-effect of fixing a long-standing bug of +events getting lost if: + - an event's interrupt handler is running + - the event is migrated to a different vcpu + - the event is re-triggered + +The most noticable symptom of these lost events is occasional lockups +of blkfront. + +Many thanks to Tom Kopec and Daniel Stodden in tracking this down. + +Signed-off-by: Jeremy Fitzhardinge +Cc: Tom Kopec +Cc: Daniel Stodden +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/xen/events.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/xen/events.c ++++ b/drivers/xen/events.c +@@ -363,7 +363,7 @@ int bind_evtchn_to_irq(unsigned int evtc + irq = find_unbound_irq(); + + set_irq_chip_and_handler_name(irq, &xen_dynamic_chip, +- handle_level_irq, "event"); ++ handle_edge_irq, "event"); + + evtchn_to_irq[evtchn] = irq; + irq_info[irq] = mk_evtchn_info(evtchn); diff --git a/queue-2.6.35/xen-use-percpu-interrupts-for-ipis-and-virqs.patch b/queue-2.6.35/xen-use-percpu-interrupts-for-ipis-and-virqs.patch new file mode 100644 index 00000000000..646f92c8e53 --- /dev/null +++ b/queue-2.6.35/xen-use-percpu-interrupts-for-ipis-and-virqs.patch @@ -0,0 +1,73 @@ +From aaca49642b92c8a57d3ca5029a5a94019c7af69f Mon Sep 17 00:00:00 2001 +From: Jeremy Fitzhardinge +Date: Fri, 20 Aug 2010 18:57:53 -0700 +Subject: xen: use percpu interrupts for IPIs and VIRQs + +From: Jeremy Fitzhardinge + +commit aaca49642b92c8a57d3ca5029a5a94019c7af69f upstream. + +IPIs and VIRQs are inherently per-cpu event types, so treat them as such: + - use a specific percpu irq_chip implementation, and + - handle them with handle_percpu_irq + +This makes the path for delivering these interrupts more efficient +(no masking/unmasking, no locks), and it avoid problems with attempts +to migrate them. + +Signed-off-by: Jeremy Fitzhardinge +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/xen/events.c | 19 +++++++++++++++---- + 1 file changed, 15 insertions(+), 4 deletions(-) + +--- a/drivers/xen/events.c ++++ b/drivers/xen/events.c +@@ -107,6 +107,7 @@ static inline unsigned long *cpu_evtchn_ + #define VALID_EVTCHN(chn) ((chn) != 0) + + static struct irq_chip xen_dynamic_chip; ++static struct irq_chip xen_percpu_chip; + + /* Constructor for packed IRQ information. */ + static struct irq_info mk_unbound_info(void) +@@ -389,8 +390,8 @@ static int bind_ipi_to_irq(unsigned int + if (irq < 0) + goto out; + +- set_irq_chip_and_handler_name(irq, &xen_dynamic_chip, +- handle_level_irq, "ipi"); ++ set_irq_chip_and_handler_name(irq, &xen_percpu_chip, ++ handle_percpu_irq, "ipi"); + + bind_ipi.vcpu = cpu; + if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi, +@@ -430,8 +431,8 @@ static int bind_virq_to_irq(unsigned int + + irq = find_unbound_irq(); + +- set_irq_chip_and_handler_name(irq, &xen_dynamic_chip, +- handle_level_irq, "virq"); ++ set_irq_chip_and_handler_name(irq, &xen_percpu_chip, ++ handle_percpu_irq, "virq"); + + evtchn_to_irq[evtchn] = irq; + irq_info[irq] = mk_virq_info(evtchn, virq); +@@ -934,6 +935,16 @@ static struct irq_chip xen_dynamic_chip + .retrigger = retrigger_dynirq, + }; + ++static struct irq_chip en_percpu_chip __read_mostly = { ++ .name = "xen-percpu", ++ ++ .disable = disable_dynirq, ++ .mask = disable_dynirq, ++ .unmask = enable_dynirq, ++ ++ .ack = ack_dynirq, ++}; ++ + void __init xen_init_IRQ(void) + { + int i; diff --git a/queue-2.6.35/xfs-ensure-we-mark-all-inodes-in-a-freed-cluster-xfs_istale.patch b/queue-2.6.35/xfs-ensure-we-mark-all-inodes-in-a-freed-cluster-xfs_istale.patch new file mode 100644 index 00000000000..e8bb6f3a527 --- /dev/null +++ b/queue-2.6.35/xfs-ensure-we-mark-all-inodes-in-a-freed-cluster-xfs_istale.patch @@ -0,0 +1,151 @@ +From 5b3eed756cd37255cad1181bd86bfd0977e97953 Mon Sep 17 00:00:00 2001 +From: Dave Chinner +Date: Tue, 24 Aug 2010 11:42:41 +1000 +Subject: xfs: ensure we mark all inodes in a freed cluster XFS_ISTALE + +From: Dave Chinner + +commit 5b3eed756cd37255cad1181bd86bfd0977e97953 upstream. + +Under heavy load parallel metadata loads (e.g. dbench), we can fail +to mark all the inodes in a cluster being freed as XFS_ISTALE as we +skip inodes we cannot get the XFS_ILOCK_EXCL or the flush lock on. +When this happens and the inode cluster buffer has already been +marked stale and freed, inode reclaim can try to write the inode out +as it is dirty and not marked stale. This can result in writing th +metadata to an freed extent, or in the case it has already +been overwritten trigger a magic number check failure and return an +EUCLEAN error such as: + +Filesystem "ram0": inode 0x442ba1 background reclaim flush failed with 117 + +Fix this by ensuring that we hoover up all in memory inodes in the +cluster and mark them XFS_ISTALE when freeing the cluster. + +Signed-off-by: Dave Chinner +Reviewed-by: Christoph Hellwig +Signed-off-by: Greg Kroah-Hartman + +--- + fs/xfs/xfs_inode.c | 49 ++++++++++++++++++++++++++----------------------- + 1 file changed, 26 insertions(+), 23 deletions(-) + +--- a/fs/xfs/xfs_inode.c ++++ b/fs/xfs/xfs_inode.c +@@ -1927,6 +1927,11 @@ xfs_iunlink_remove( + return 0; + } + ++/* ++ * A big issue when freeing the inode cluster is is that we _cannot_ skip any ++ * inodes that are in memory - they all must be marked stale and attached to ++ * the cluster buffer. ++ */ + STATIC void + xfs_ifree_cluster( + xfs_inode_t *free_ip, +@@ -1958,8 +1963,6 @@ xfs_ifree_cluster( + } + + for (j = 0; j < nbufs; j++, inum += ninodes) { +- int found = 0; +- + blkno = XFS_AGB_TO_DADDR(mp, XFS_INO_TO_AGNO(mp, inum), + XFS_INO_TO_AGBNO(mp, inum)); + +@@ -1978,7 +1981,9 @@ xfs_ifree_cluster( + /* + * Walk the inodes already attached to the buffer and mark them + * stale. These will all have the flush locks held, so an +- * in-memory inode walk can't lock them. ++ * in-memory inode walk can't lock them. By marking them all ++ * stale first, we will not attempt to lock them in the loop ++ * below as the XFS_ISTALE flag will be set. + */ + lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *); + while (lip) { +@@ -1990,11 +1995,11 @@ xfs_ifree_cluster( + &iip->ili_flush_lsn, + &iip->ili_item.li_lsn); + xfs_iflags_set(iip->ili_inode, XFS_ISTALE); +- found++; + } + lip = lip->li_bio_list; + } + ++ + /* + * For each inode in memory attempt to add it to the inode + * buffer and set it up for being staled on buffer IO +@@ -2006,6 +2011,7 @@ xfs_ifree_cluster( + * even trying to lock them. + */ + for (i = 0; i < ninodes; i++) { ++retry: + read_lock(&pag->pag_ici_lock); + ip = radix_tree_lookup(&pag->pag_ici_root, + XFS_INO_TO_AGINO(mp, (inum + i))); +@@ -2016,38 +2022,36 @@ xfs_ifree_cluster( + continue; + } + +- /* don't try to lock/unlock the current inode */ ++ /* ++ * Don't try to lock/unlock the current inode, but we ++ * _cannot_ skip the other inodes that we did not find ++ * in the list attached to the buffer and are not ++ * already marked stale. If we can't lock it, back off ++ * and retry. ++ */ + if (ip != free_ip && + !xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) { + read_unlock(&pag->pag_ici_lock); +- continue; ++ delay(1); ++ goto retry; + } + read_unlock(&pag->pag_ici_lock); + +- if (!xfs_iflock_nowait(ip)) { +- if (ip != free_ip) +- xfs_iunlock(ip, XFS_ILOCK_EXCL); +- continue; +- } +- ++ xfs_iflock(ip); + xfs_iflags_set(ip, XFS_ISTALE); +- if (xfs_inode_clean(ip)) { +- ASSERT(ip != free_ip); +- xfs_ifunlock(ip); +- xfs_iunlock(ip, XFS_ILOCK_EXCL); +- continue; +- } + ++ /* ++ * we don't need to attach clean inodes or those only ++ * with unlogged changes (which we throw away, anyway). ++ */ + iip = ip->i_itemp; +- if (!iip) { +- /* inode with unlogged changes only */ ++ if (!iip || xfs_inode_clean(ip)) { + ASSERT(ip != free_ip); + ip->i_update_core = 0; + xfs_ifunlock(ip); + xfs_iunlock(ip, XFS_ILOCK_EXCL); + continue; + } +- found++; + + iip->ili_last_fields = iip->ili_format.ilf_fields; + iip->ili_format.ilf_fields = 0; +@@ -2063,8 +2067,7 @@ xfs_ifree_cluster( + xfs_iunlock(ip, XFS_ILOCK_EXCL); + } + +- if (found) +- xfs_trans_stale_inode_buf(tp, bp); ++ xfs_trans_stale_inode_buf(tp, bp); + xfs_trans_binval(tp, bp); + } + diff --git a/queue-2.6.35/xfs-fix-untrusted-inode-number-lookup.patch b/queue-2.6.35/xfs-fix-untrusted-inode-number-lookup.patch new file mode 100644 index 00000000000..36d1b971468 --- /dev/null +++ b/queue-2.6.35/xfs-fix-untrusted-inode-number-lookup.patch @@ -0,0 +1,74 @@ +From 4536f2ad8b330453d7ebec0746c4374eadd649b1 Mon Sep 17 00:00:00 2001 +From: Dave Chinner +Date: Tue, 24 Aug 2010 11:42:30 +1000 +Subject: xfs: fix untrusted inode number lookup + +From: Dave Chinner + +commit 4536f2ad8b330453d7ebec0746c4374eadd649b1 upstream. + +Commit 7124fe0a5b619d65b739477b3b55a20bf805b06d ("xfs: validate untrusted inode +numbers during lookup") changes the inode lookup code to do btree lookups for +untrusted inode numbers. This change made an invalid assumption about the +alignment of inodes and hence incorrectly calculated the first inode in the +cluster. As a result, some inode numbers were being incorrectly considered +invalid when they were actually valid. + +The issue was not picked up by the xfstests suite because it always runs fsr +and dump (the two utilities that utilise the bulkstat interface) on cache hot +inodes and hence the lookup code in the cold cache path was not sufficiently +exercised to uncover this intermittent problem. + +Fix the issue by relaxing the btree lookup criteria and then checking if the +record returned contains the inode number we are lookup for. If it we get an +incorrect record, then the inode number is invalid. + +Signed-off-by: Dave Chinner +Reviewed-by: Christoph Hellwig +Signed-off-by: Greg Kroah-Hartman + +--- + fs/xfs/xfs_ialloc.c | 16 ++++++++++------ + 1 file changed, 10 insertions(+), 6 deletions(-) + +--- a/fs/xfs/xfs_ialloc.c ++++ b/fs/xfs/xfs_ialloc.c +@@ -1217,7 +1217,6 @@ xfs_imap_lookup( + struct xfs_inobt_rec_incore rec; + struct xfs_btree_cur *cur; + struct xfs_buf *agbp; +- xfs_agino_t startino; + int error; + int i; + +@@ -1231,13 +1230,13 @@ xfs_imap_lookup( + } + + /* +- * derive and lookup the exact inode record for the given agino. If the +- * record cannot be found, then it's an invalid inode number and we +- * should abort. ++ * Lookup the inode record for the given agino. If the record cannot be ++ * found, then it's an invalid inode number and we should abort. Once ++ * we have a record, we need to ensure it contains the inode number ++ * we are looking up. + */ + cur = xfs_inobt_init_cursor(mp, tp, agbp, agno); +- startino = agino & ~(XFS_IALLOC_INODES(mp) - 1); +- error = xfs_inobt_lookup(cur, startino, XFS_LOOKUP_EQ, &i); ++ error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &i); + if (!error) { + if (i) + error = xfs_inobt_get_rec(cur, &rec, &i); +@@ -1250,6 +1249,11 @@ xfs_imap_lookup( + if (error) + return error; + ++ /* check that the returned record contains the required inode */ ++ if (rec.ir_startino > agino || ++ rec.ir_startino + XFS_IALLOC_INODES(mp) <= agino) ++ return EINVAL; ++ + /* for untrusted inodes check it is allocated first */ + if ((flags & XFS_IGET_UNTRUSTED) && + (rec.ir_free & XFS_INOBT_MASK(agino - rec.ir_startino))) diff --git a/queue-2.6.35/xfs-move-aio-completion-after-unwritten-extent-conversion.patch b/queue-2.6.35/xfs-move-aio-completion-after-unwritten-extent-conversion.patch new file mode 100644 index 00000000000..ae12a113d17 --- /dev/null +++ b/queue-2.6.35/xfs-move-aio-completion-after-unwritten-extent-conversion.patch @@ -0,0 +1,88 @@ +From fb511f2150174b18b28ad54708c1adda0df39b17 Mon Sep 17 00:00:00 2001 +From: Christoph Hellwig +Date: Sun, 18 Jul 2010 21:17:10 +0000 +Subject: xfs: move aio completion after unwritten extent conversion + +From: Christoph Hellwig + +commit fb511f2150174b18b28ad54708c1adda0df39b17 upstream. + +If we write into an unwritten extent using AIO we need to complete the AIO +request after the extent conversion has finished. Without that a read could +race to see see the extent still unwritten and return zeros. For synchronous +I/O we already take care of that by flushing the xfsconvertd workqueue (which +might be a bit of overkill). + +To do that add iocb and result fields to struct xfs_ioend, so that we can +call aio_complete from xfs_end_io after the extent conversion has happened. +Note that we need a new result field as io_error is used for positive errno +values, while the AIO code can return negative error values and positive +transfer sizes. + +Signed-off-by: Christoph Hellwig +Reviewed-by: Dave Chinner +Signed-off-by: Alex Elder +Cc: Chuck Ebbert +Signed-off-by: Greg Kroah-Hartman + +--- + fs/xfs/linux-2.6/xfs_aops.c | 19 ++++++++++++++++--- + 1 file changed, 16 insertions(+), 3 deletions(-) + +--- a/fs/xfs/linux-2.6/xfs_aops.c ++++ b/fs/xfs/linux-2.6/xfs_aops.c +@@ -275,8 +275,11 @@ xfs_end_io( + xfs_finish_ioend(ioend, 0); + /* ensure we don't spin on blocked ioends */ + delay(1); +- } else ++ } else { ++ if (ioend->io_iocb) ++ aio_complete(ioend->io_iocb, ioend->io_result, 0); + xfs_destroy_ioend(ioend); ++ } + } + + /* +@@ -309,6 +312,8 @@ xfs_alloc_ioend( + atomic_inc(&XFS_I(ioend->io_inode)->i_iocount); + ioend->io_offset = 0; + ioend->io_size = 0; ++ ioend->io_iocb = NULL; ++ ioend->io_result = 0; + + INIT_WORK(&ioend->io_work, xfs_end_io); + return ioend; +@@ -1604,6 +1609,7 @@ xfs_end_io_direct( + bool is_async) + { + xfs_ioend_t *ioend = iocb->private; ++ bool complete_aio = is_async; + + /* + * Non-NULL private data means we need to issue a transaction to +@@ -1629,7 +1635,14 @@ xfs_end_io_direct( + if (ioend->io_type == IO_READ) { + xfs_finish_ioend(ioend, 0); + } else if (private && size > 0) { +- xfs_finish_ioend(ioend, is_sync_kiocb(iocb)); ++ if (is_async) { ++ ioend->io_iocb = iocb; ++ ioend->io_result = ret; ++ complete_aio = false; ++ xfs_finish_ioend(ioend, 0); ++ } else { ++ xfs_finish_ioend(ioend, 1); ++ } + } else { + /* + * A direct I/O write ioend starts it's life in unwritten +@@ -1648,7 +1661,7 @@ xfs_end_io_direct( + */ + iocb->private = NULL; + +- if (is_async) ++ if (complete_aio) + aio_complete(iocb, ret, 0); + } +