--- /dev/null
+From dbbcbc073ad3132bfbc410b11546b2fb4bdf2568 Mon Sep 17 00:00:00 2001
+From: David Henningsson <david.henningsson@canonical.com>
+Date: Mon, 23 Aug 2010 08:14:35 +0200
+Subject: ALSA: hda - Add Sony VAIO quirk for ALC269
+
+From: David Henningsson <david.henningsson@canonical.com>
+
+commit dbbcbc073ad3132bfbc410b11546b2fb4bdf2568 upstream.
+
+The attached patch enables playback on a Sony VAIO machine.
+
+BugLink: http://launchpad.net/bugs/618271
+
+Signed-off-by: David Henningsson <david.henningsson@canonical.com>
+Signed-off-by: Takashi Iwai <tiwai@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ sound/pci/hda/patch_realtek.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/sound/pci/hda/patch_realtek.c
++++ b/sound/pci/hda/patch_realtek.c
+@@ -14244,6 +14244,7 @@ static const struct alc_fixup alc269_fix
+
+ static struct snd_pci_quirk alc269_fixup_tbl[] = {
+ SND_PCI_QUIRK(0x104d, 0x9071, "Sony VAIO", ALC269_FIXUP_SONY_VAIO),
++ SND_PCI_QUIRK(0x104d, 0x9077, "Sony VAIO", ALC269_FIXUP_SONY_VAIO),
+ {}
+ };
+
--- /dev/null
+From 150b432f448281d5518f5229d240923f9a9c5459 Mon Sep 17 00:00:00 2001
+From: David Henningsson <david.henningsson@canonical.com>
+Date: Thu, 29 Jul 2010 14:46:42 +0200
+Subject: ALSA: hda - Rename iMic to Int Mic on Lenovo NB0763
+
+From: David Henningsson <david.henningsson@canonical.com>
+
+commit 150b432f448281d5518f5229d240923f9a9c5459 upstream.
+
+The non-standard name "iMic" makes PulseAudio ignore the microphone.
+BugLink: https://launchpad.net/bugs/605101
+
+Signed-off-by: David Henningsson <david.henningsson@canonical.com>
+Signed-off-by: Takashi Iwai <tiwai@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ sound/pci/hda/patch_realtek.c | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/sound/pci/hda/patch_realtek.c
++++ b/sound/pci/hda/patch_realtek.c
+@@ -7005,7 +7005,7 @@ static struct hda_input_mux alc883_lenov
+ .num_items = 4,
+ .items = {
+ { "Mic", 0x0 },
+- { "iMic", 0x1 },
++ { "Int Mic", 0x1 },
+ { "Line", 0x2 },
+ { "CD", 0x4 },
+ },
+@@ -8575,8 +8575,8 @@ static struct snd_kcontrol_new alc883_le
+ HDA_CODEC_MUTE("CD Playback Switch", 0x0b, 0x04, HDA_INPUT),
+ HDA_CODEC_VOLUME("Mic Playback Volume", 0x0b, 0x0, HDA_INPUT),
+ HDA_CODEC_MUTE("Mic Playback Switch", 0x0b, 0x0, HDA_INPUT),
+- HDA_CODEC_VOLUME("iMic Playback Volume", 0x0b, 0x1, HDA_INPUT),
+- HDA_CODEC_MUTE("iMic Playback Switch", 0x0b, 0x1, HDA_INPUT),
++ HDA_CODEC_VOLUME("Int Mic Playback Volume", 0x0b, 0x1, HDA_INPUT),
++ HDA_CODEC_MUTE("Int Mic Playback Switch", 0x0b, 0x1, HDA_INPUT),
+ { } /* end */
+ };
+
--- /dev/null
+From 81cd3fca642cecb40a1ccef099799dcb5730734b Mon Sep 17 00:00:00 2001
+From: David Henningsson <david.henningsson@canonical.com>
+Date: Tue, 10 Aug 2010 09:18:00 +0200
+Subject: ALSA: HDA: Use model=auto for LG R510
+
+From: David Henningsson <david.henningsson@canonical.com>
+
+commit 81cd3fca642cecb40a1ccef099799dcb5730734b upstream.
+
+Two users report model=auto is needed to make the internal mic work properly.
+BugLink: https://bugs.launchpad.net/bugs/495134
+
+Signed-off-by: David Henningsson <david.henningsson@canonical.com>
+Signed-off-by: Takashi Iwai <tiwai@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ sound/pci/hda/patch_realtek.c | 1 -
+ 1 file changed, 1 deletion(-)
+
+--- a/sound/pci/hda/patch_realtek.c
++++ b/sound/pci/hda/patch_realtek.c
+@@ -13305,7 +13305,6 @@ static struct snd_pci_quirk alc268_cfg_t
+ SND_PCI_QUIRK(0x14c0, 0x0025, "COMPAL IFL90/JFL-92", ALC268_TOSHIBA),
+ SND_PCI_QUIRK(0x152d, 0x0763, "Diverse (CPR2000)", ALC268_ACER),
+ SND_PCI_QUIRK(0x152d, 0x0771, "Quanta IL1", ALC267_QUANTA_IL1),
+- SND_PCI_QUIRK(0x1854, 0x1775, "LG R510", ALC268_DELL),
+ {}
+ };
+
--- /dev/null
+From 40e2e97316af6e62affab7a392e792494b8d9dde Mon Sep 17 00:00:00 2001
+From: Christoph Hellwig <hch@infradead.org>
+Date: Sun, 18 Jul 2010 21:17:09 +0000
+Subject: direct-io: move aio_complete into ->end_io
+
+From: Christoph Hellwig <hch@infradead.org>
+
+commit 40e2e97316af6e62affab7a392e792494b8d9dde upstream.
+
+Filesystems with unwritten extent support must not complete an AIO request
+until the transaction to convert the extent has been commited. That means
+the aio_complete calls needs to be moved into the ->end_io callback so
+that the filesystem can control when to call it exactly.
+
+This makes a bit of a mess out of dio_complete and the ->end_io callback
+prototype even more complicated.
+
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Alex Elder <aelder@sgi.com>
+Cc: Chuck Ebbert <cebbert@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/direct-io.c | 26 ++++++++++++++------------
+ fs/ext4/inode.c | 10 +++++++---
+ fs/ocfs2/aops.c | 7 ++++++-
+ fs/xfs/linux-2.6/xfs_aops.c | 7 ++++++-
+ fs/xfs/linux-2.6/xfs_aops.h | 2 ++
+ include/linux/fs.h | 3 ++-
+ 6 files changed, 37 insertions(+), 18 deletions(-)
+
+--- a/fs/direct-io.c
++++ b/fs/direct-io.c
+@@ -218,7 +218,7 @@ static struct page *dio_get_page(struct
+ * filesystems can use it to hold additional state between get_block calls and
+ * dio_complete.
+ */
+-static int dio_complete(struct dio *dio, loff_t offset, int ret)
++static int dio_complete(struct dio *dio, loff_t offset, int ret, bool is_async)
+ {
+ ssize_t transferred = 0;
+
+@@ -239,14 +239,6 @@ static int dio_complete(struct dio *dio,
+ transferred = dio->i_size - offset;
+ }
+
+- if (dio->end_io && dio->result)
+- dio->end_io(dio->iocb, offset, transferred,
+- dio->map_bh.b_private);
+-
+- if (dio->flags & DIO_LOCKING)
+- /* lockdep: non-owner release */
+- up_read_non_owner(&dio->inode->i_alloc_sem);
+-
+ if (ret == 0)
+ ret = dio->page_errors;
+ if (ret == 0)
+@@ -254,6 +246,17 @@ static int dio_complete(struct dio *dio,
+ if (ret == 0)
+ ret = transferred;
+
++ if (dio->end_io && dio->result) {
++ dio->end_io(dio->iocb, offset, transferred,
++ dio->map_bh.b_private, ret, is_async);
++ } else if (is_async) {
++ aio_complete(dio->iocb, ret, 0);
++ }
++
++ if (dio->flags & DIO_LOCKING)
++ /* lockdep: non-owner release */
++ up_read_non_owner(&dio->inode->i_alloc_sem);
++
+ return ret;
+ }
+
+@@ -277,8 +280,7 @@ static void dio_bio_end_aio(struct bio *
+ spin_unlock_irqrestore(&dio->bio_lock, flags);
+
+ if (remaining == 0) {
+- int ret = dio_complete(dio, dio->iocb->ki_pos, 0);
+- aio_complete(dio->iocb, ret, 0);
++ dio_complete(dio, dio->iocb->ki_pos, 0, true);
+ kfree(dio);
+ }
+ }
+@@ -1126,7 +1128,7 @@ direct_io_worker(int rw, struct kiocb *i
+ spin_unlock_irqrestore(&dio->bio_lock, flags);
+
+ if (ret2 == 0) {
+- ret = dio_complete(dio, offset, ret);
++ ret = dio_complete(dio, offset, ret, false);
+ kfree(dio);
+ } else
+ BUG_ON(ret != -EIOCBQUEUED);
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -3775,7 +3775,8 @@ static ext4_io_end_t *ext4_init_io_end (
+ }
+
+ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
+- ssize_t size, void *private)
++ ssize_t size, void *private, int ret,
++ bool is_async)
+ {
+ ext4_io_end_t *io_end = iocb->private;
+ struct workqueue_struct *wq;
+@@ -3784,7 +3785,7 @@ static void ext4_end_io_dio(struct kiocb
+
+ /* if not async direct IO or dio with 0 bytes write, just return */
+ if (!io_end || !size)
+- return;
++ goto out;
+
+ ext_debug("ext4_end_io_dio(): io_end 0x%p"
+ "for inode %lu, iocb 0x%p, offset %llu, size %llu\n",
+@@ -3795,7 +3796,7 @@ static void ext4_end_io_dio(struct kiocb
+ if (io_end->flag != EXT4_IO_UNWRITTEN){
+ ext4_free_io_end(io_end);
+ iocb->private = NULL;
+- return;
++ goto out;
+ }
+
+ io_end->offset = offset;
+@@ -3812,6 +3813,9 @@ static void ext4_end_io_dio(struct kiocb
+ list_add_tail(&io_end->list, &ei->i_completed_io_list);
+ spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
+ iocb->private = NULL;
++out:
++ if (is_async)
++ aio_complete(iocb, ret, 0);
+ }
+
+ static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate)
+--- a/fs/ocfs2/aops.c
++++ b/fs/ocfs2/aops.c
+@@ -578,7 +578,9 @@ bail:
+ static void ocfs2_dio_end_io(struct kiocb *iocb,
+ loff_t offset,
+ ssize_t bytes,
+- void *private)
++ void *private,
++ int ret,
++ bool is_async)
+ {
+ struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode;
+ int level;
+@@ -592,6 +594,9 @@ static void ocfs2_dio_end_io(struct kioc
+ if (!level)
+ up_read(&inode->i_alloc_sem);
+ ocfs2_rw_unlock(inode, level);
++
++ if (is_async)
++ aio_complete(iocb, ret, 0);
+ }
+
+ /*
+--- a/fs/xfs/linux-2.6/xfs_aops.c
++++ b/fs/xfs/linux-2.6/xfs_aops.c
+@@ -1599,7 +1599,9 @@ xfs_end_io_direct(
+ struct kiocb *iocb,
+ loff_t offset,
+ ssize_t size,
+- void *private)
++ void *private,
++ int ret,
++ bool is_async)
+ {
+ xfs_ioend_t *ioend = iocb->private;
+
+@@ -1645,6 +1647,9 @@ xfs_end_io_direct(
+ * against double-freeing.
+ */
+ iocb->private = NULL;
++
++ if (is_async)
++ aio_complete(iocb, ret, 0);
+ }
+
+ STATIC ssize_t
+--- a/fs/xfs/linux-2.6/xfs_aops.h
++++ b/fs/xfs/linux-2.6/xfs_aops.h
+@@ -37,6 +37,8 @@ typedef struct xfs_ioend {
+ size_t io_size; /* size of the extent */
+ xfs_off_t io_offset; /* offset in the file */
+ struct work_struct io_work; /* xfsdatad work queue */
++ struct kiocb *io_iocb;
++ int io_result;
+ } xfs_ioend_t;
+
+ extern const struct address_space_operations xfs_address_space_operations;
+--- a/include/linux/fs.h
++++ b/include/linux/fs.h
+@@ -416,7 +416,8 @@ struct buffer_head;
+ typedef int (get_block_t)(struct inode *inode, sector_t iblock,
+ struct buffer_head *bh_result, int create);
+ typedef void (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
+- ssize_t bytes, void *private);
++ ssize_t bytes, void *private, int ret,
++ bool is_async);
+
+ /*
+ * Attribute flags. These should be or-ed together to figure out what
--- /dev/null
+From 5b3ff237bef43b9e7fb7d1eb858e29b73fd664f9 Mon Sep 17 00:00:00 2001
+From: Jiaying Zhang <jiayingz@google.com>
+Date: Tue, 27 Jul 2010 11:56:06 -0400
+Subject: ext4: move aio completion after unwritten extent conversion
+
+From: Jiaying Zhang <jiayingz@google.com>
+
+commit 5b3ff237bef43b9e7fb7d1eb858e29b73fd664f9 upstream.
+
+This patch is to be applied upon Christoph's "direct-io: move aio_complete
+into ->end_io" patch. It adds iocb and result fields to struct ext4_io_end_t,
+so that we can call aio_complete from ext4_end_io_nolock() after the extent
+conversion has finished.
+
+I have verified with Christoph's aio-dio test that used to fail after a few
+runs on an original kernel but now succeeds on the patched kernel.
+
+See http://thread.gmane.org/gmane.comp.file-systems.ext4/19659 for details.
+
+Signed-off-by: Jiaying Zhang <jiayingz@google.com>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Cc: Chuck Ebbert <cebbert@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/ext4/ext4.h | 4 +++-
+ fs/ext4/inode.c | 17 ++++++++++++-----
+ 2 files changed, 15 insertions(+), 6 deletions(-)
+
+--- a/fs/ext4/ext4.h
++++ b/fs/ext4/ext4.h
+@@ -167,13 +167,15 @@ struct mpage_da_data {
+ };
+ #define EXT4_IO_UNWRITTEN 0x1
+ typedef struct ext4_io_end {
+- struct list_head list; /* per-file finished AIO list */
++ struct list_head list; /* per-file finished IO list */
+ struct inode *inode; /* file being written to */
+ unsigned int flag; /* unwritten or not */
+ struct page *page; /* page struct for buffer write */
+ loff_t offset; /* offset in the file */
+ ssize_t size; /* size of the extent */
+ struct work_struct work; /* data work queue */
++ struct kiocb *iocb; /* iocb struct for AIO */
++ int result; /* error value for AIO */
+ } ext4_io_end_t;
+
+ /*
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -3668,6 +3668,8 @@ static int ext4_end_io_nolock(ext4_io_en
+ return ret;
+ }
+
++ if (io->iocb)
++ aio_complete(io->iocb, io->result, 0);
+ /* clear the DIO AIO unwritten flag */
+ io->flag = 0;
+ return ret;
+@@ -3767,6 +3769,8 @@ static ext4_io_end_t *ext4_init_io_end (
+ io->offset = 0;
+ io->size = 0;
+ io->page = NULL;
++ io->iocb = NULL;
++ io->result = 0;
+ INIT_WORK(&io->work, ext4_end_io_work);
+ INIT_LIST_HEAD(&io->list);
+ }
+@@ -3796,12 +3800,18 @@ static void ext4_end_io_dio(struct kiocb
+ if (io_end->flag != EXT4_IO_UNWRITTEN){
+ ext4_free_io_end(io_end);
+ iocb->private = NULL;
+- goto out;
++out:
++ if (is_async)
++ aio_complete(iocb, ret, 0);
++ return;
+ }
+
+ io_end->offset = offset;
+ io_end->size = size;
+- io_end->flag = EXT4_IO_UNWRITTEN;
++ if (is_async) {
++ io_end->iocb = iocb;
++ io_end->result = ret;
++ }
+ wq = EXT4_SB(io_end->inode->i_sb)->dio_unwritten_wq;
+
+ /* queue the work to convert unwritten extents to written */
+@@ -3813,9 +3823,6 @@ static void ext4_end_io_dio(struct kiocb
+ list_add_tail(&io_end->list, &ei->i_completed_io_list);
+ spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
+ iocb->private = NULL;
+-out:
+- if (is_async)
+- aio_complete(iocb, ret, 0);
+ }
+
+ static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate)
--- /dev/null
+From c12c507d7185fe4e8ada7ed9832957576eefecf8 Mon Sep 17 00:00:00 2001
+From: Axel Lin <axel.lin@gmail.com>
+Date: Wed, 25 Aug 2010 15:42:10 +0200
+Subject: hwmon: (ads7871) Fix ads7871_probe error paths
+
+From: Axel Lin <axel.lin@gmail.com>
+
+commit c12c507d7185fe4e8ada7ed9832957576eefecf8 upstream.
+
+1. remove 'status' variable
+2. remove unneeded initialization of 'err' variable
+3. return missing error code if sysfs_create_group fail.
+4. fix the init sequence as:
+ - check hardware existence
+ - kzalloc for ads7871_data
+ - sysfs_create_group
+ - hwmon_device_register
+
+Signed-off-by: Axel Lin <axel.lin@gmail.com>
+Signed-off-by: Jean Delvare <khali@linux-fr.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ drivers/hwmon/ads7871.c | 38 +++++++++++++++++++-------------------
+ 1 file changed, 19 insertions(+), 19 deletions(-)
+
+--- a/drivers/hwmon/ads7871.c
++++ b/drivers/hwmon/ads7871.c
+@@ -160,30 +160,12 @@ static const struct attribute_group ads7
+
+ static int __devinit ads7871_probe(struct spi_device *spi)
+ {
+- int status, ret, err = 0;
++ int ret, err;
+ uint8_t val;
+ struct ads7871_data *pdata;
+
+ dev_dbg(&spi->dev, "probe\n");
+
+- pdata = kzalloc(sizeof(struct ads7871_data), GFP_KERNEL);
+- if (!pdata) {
+- err = -ENOMEM;
+- goto exit;
+- }
+-
+- status = sysfs_create_group(&spi->dev.kobj, &ads7871_group);
+- if (status < 0)
+- goto error_free;
+-
+- pdata->hwmon_dev = hwmon_device_register(&spi->dev);
+- if (IS_ERR(pdata->hwmon_dev)) {
+- err = PTR_ERR(pdata->hwmon_dev);
+- goto error_remove;
+- }
+-
+- spi_set_drvdata(spi, pdata);
+-
+ /* Configure the SPI bus */
+ spi->mode = (SPI_MODE_0);
+ spi->bits_per_word = 8;
+@@ -201,6 +183,24 @@ static int __devinit ads7871_probe(struc
+ we need to make sure we really have a chip*/
+ if (val != ret) {
+ err = -ENODEV;
++ goto exit;
++ }
++
++ pdata = kzalloc(sizeof(struct ads7871_data), GFP_KERNEL);
++ if (!pdata) {
++ err = -ENOMEM;
++ goto exit;
++ }
++
++ err = sysfs_create_group(&spi->dev.kobj, &ads7871_group);
++ if (err < 0)
++ goto error_free;
++
++ spi_set_drvdata(spi, pdata);
++
++ pdata->hwmon_dev = hwmon_device_register(&spi->dev);
++ if (IS_ERR(pdata->hwmon_dev)) {
++ err = PTR_ERR(pdata->hwmon_dev);
+ goto error_remove;
+ }
+
--- /dev/null
+From a05e93f3b3fc2f53c1d0de3b17019e207c482349 Mon Sep 17 00:00:00 2001
+From: Andreas Herrmann <andreas.herrmann3@amd.com>
+Date: Wed, 25 Aug 2010 15:42:12 +0200
+Subject: hwmon: (k8temp) Differentiate between AM2 and ASB1
+
+From: Andreas Herrmann <andreas.herrmann3@amd.com>
+
+commit a05e93f3b3fc2f53c1d0de3b17019e207c482349 upstream.
+
+Commit 8bf0223ed515be24de0c671eedaff49e78bebc9c (hwmon, k8temp: Fix
+temperature reporting for ASB1 processor revisions) fixed temperature
+reporting for ASB1 CPUs. But those CPU models (model 0x6b, 0x6f, 0x7f)
+were packaged both as AM2 (desktop) and ASB1 (mobile). Thus the commit
+leads to wrong temperature reporting for AM2 CPU parts.
+
+The solution is to determine the package type for models 0x6b, 0x6f,
+0x7f.
+
+This is done using BrandId from CPUID Fn8000_0001_EBX[15:0]. See
+"Constructing the processor Name String" in "Revision Guide for AMD
+NPT Family 0Fh Processors" (Rev. 3.46).
+
+Cc: Rudolf Marek <r.marek@assembler.cz>
+Reported-by: Vladislav Guberinic <neosisani@gmail.com>
+Signed-off-by: Andreas Herrmann <andreas.herrmann3@amd.com>
+Signed-off-by: Jean Delvare <khali@linux-fr.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ drivers/hwmon/k8temp.c | 35 ++++++++++++++++++++++++++++++++---
+ 1 file changed, 32 insertions(+), 3 deletions(-)
+
+--- a/drivers/hwmon/k8temp.c
++++ b/drivers/hwmon/k8temp.c
+@@ -143,6 +143,37 @@ static const struct pci_device_id k8temp
+
+ MODULE_DEVICE_TABLE(pci, k8temp_ids);
+
++static int __devinit is_rev_g_desktop(u8 model)
++{
++ u32 brandidx;
++
++ if (model < 0x69)
++ return 0;
++
++ if (model == 0xc1 || model == 0x6c || model == 0x7c)
++ return 0;
++
++ /*
++ * Differentiate between AM2 and ASB1.
++ * See "Constructing the processor Name String" in "Revision
++ * Guide for AMD NPT Family 0Fh Processors" (33610).
++ */
++ brandidx = cpuid_ebx(0x80000001);
++ brandidx = (brandidx >> 9) & 0x1f;
++
++ /* Single core */
++ if ((model == 0x6f || model == 0x7f) &&
++ (brandidx == 0x7 || brandidx == 0x9 || brandidx == 0xc))
++ return 0;
++
++ /* Dual core */
++ if (model == 0x6b &&
++ (brandidx == 0xb || brandidx == 0xc))
++ return 0;
++
++ return 1;
++}
++
+ static int __devinit k8temp_probe(struct pci_dev *pdev,
+ const struct pci_device_id *id)
+ {
+@@ -179,9 +210,7 @@ static int __devinit k8temp_probe(struct
+ "wrong - check erratum #141\n");
+ }
+
+- if ((model >= 0x69) &&
+- !(model == 0xc1 || model == 0x6c || model == 0x7c ||
+- model == 0x6b || model == 0x6f || model == 0x7f)) {
++ if (is_rev_g_desktop(model)) {
+ /*
+ * RevG desktop CPUs (i.e. no socket S1G1 or
+ * ASB1 parts) need additional offset,
--- /dev/null
+From 55ee67f837882f28a900705a2ca1af257ab6c53d Mon Sep 17 00:00:00 2001
+From: Mark Lord <kernel@teksavvy.com>
+Date: Fri, 20 Aug 2010 10:13:16 -0400
+Subject: libata-sff: remove harmful BUG_ON from ata_bmdma_qc_issue
+
+From: Mark Lord <kernel@teksavvy.com>
+
+commit 55ee67f837882f28a900705a2ca1af257ab6c53d upstream.
+
+Remove harmful BUG_ON() from ata_bmdma_qc_issue(),
+as it casts too wide of a net and breaks sata_mv.
+It also crashes the kernel while doing the BUG_ON().
+
+There's already a WARN_ON_ONCE() further down to catch
+the case of POLLING for a BMDMA operation.
+
+Signed-off-by: Mark Lord <mlord@pobox.com>
+Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ drivers/ata/libata-sff.c | 4 ----
+ 1 file changed, 4 deletions(-)
+
+--- a/drivers/ata/libata-sff.c
++++ b/drivers/ata/libata-sff.c
+@@ -2735,10 +2735,6 @@ unsigned int ata_bmdma_qc_issue(struct a
+ {
+ struct ata_port *ap = qc->ap;
+
+- /* see ata_dma_blacklisted() */
+- BUG_ON((ap->flags & ATA_FLAG_PIO_POLLING) &&
+- qc->tf.protocol == ATAPI_PROT_DMA);
+-
+ /* defer PIO handling to sff_qc_issue */
+ if (!ata_is_dma(qc->tf.protocol))
+ return ata_sff_qc_issue(qc);
--- /dev/null
+From cca77b7c81876d819a5806f408b3c29b5b61a815 Mon Sep 17 00:00:00 2001
+From: Florian Westphal <fw@strlen.de>
+Date: Mon, 23 Aug 2010 14:41:22 -0700
+Subject: netfilter: fix CONFIG_COMPAT support
+
+From: Florian Westphal <fw@strlen.de>
+
+commit cca77b7c81876d819a5806f408b3c29b5b61a815 upstream.
+
+commit f3c5c1bfd430858d3a05436f82c51e53104feb6b
+(netfilter: xtables: make ip_tables reentrant) forgot to
+also compute the jumpstack size in the compat handlers.
+
+Result is that "iptables -I INPUT -j userchain" turns into -j DROP.
+
+Reported by Sebastian Roesner on #netfilter, closes
+http://bugzilla.netfilter.org/show_bug.cgi?id=669.
+
+Note: arptables change is compile-tested only.
+
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
+Tested-by: Mikael Pettersson <mikpe@it.uu.se>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ net/ipv4/netfilter/arp_tables.c | 3 +++
+ net/ipv4/netfilter/ip_tables.c | 3 +++
+ net/ipv6/netfilter/ip6_tables.c | 3 +++
+ 3 files changed, 9 insertions(+)
+
+--- a/net/ipv4/netfilter/arp_tables.c
++++ b/net/ipv4/netfilter/arp_tables.c
+@@ -1420,6 +1420,9 @@ static int translate_compat_table(const
+ if (ret != 0)
+ break;
+ ++i;
++ if (strcmp(arpt_get_target(iter1)->u.user.name,
++ XT_ERROR_TARGET) == 0)
++ ++newinfo->stacksize;
+ }
+ if (ret) {
+ /*
+--- a/net/ipv4/netfilter/ip_tables.c
++++ b/net/ipv4/netfilter/ip_tables.c
+@@ -1747,6 +1747,9 @@ translate_compat_table(struct net *net,
+ if (ret != 0)
+ break;
+ ++i;
++ if (strcmp(ipt_get_target(iter1)->u.user.name,
++ XT_ERROR_TARGET) == 0)
++ ++newinfo->stacksize;
+ }
+ if (ret) {
+ /*
+--- a/net/ipv6/netfilter/ip6_tables.c
++++ b/net/ipv6/netfilter/ip6_tables.c
+@@ -1765,6 +1765,9 @@ translate_compat_table(struct net *net,
+ if (ret != 0)
+ break;
+ ++i;
++ if (strcmp(ip6t_get_target(iter1)->u.user.name,
++ XT_ERROR_TARGET) == 0)
++ ++newinfo->stacksize;
+ }
+ if (ret) {
+ /*
--- /dev/null
+From aba8a08ded89a74f1ba04ae94ecc98f26e27d41c Mon Sep 17 00:00:00 2001
+From: Tejun Heo <tj@kernel.org>
+Date: Tue, 17 Aug 2010 14:13:42 +0200
+Subject: pata_cmd64x: revert commit d62f5576
+
+From: Tejun Heo <tj@kernel.org>
+
+commit aba8a08ded89a74f1ba04ae94ecc98f26e27d41c upstream.
+
+Commit d62f5576 (pata_cmd64x: fix handling of address setup timings)
+incorrectly called ata_timing_compute() on UDMA mode on 0 @UT leading
+to devide by zero fault. Revert it until better fix is available.
+This is reported in bko#16607 by Milan Kocian who also root caused it.
+
+ https://bugzilla.kernel.org/show_bug.cgi?id=16607
+
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Reported-and-root-caused-by: Milan Kocian <milan.kocian@wq.cz>
+Cc: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
+Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ drivers/ata/pata_cmd64x.c | 6 ------
+ 1 file changed, 6 deletions(-)
+
+--- a/drivers/ata/pata_cmd64x.c
++++ b/drivers/ata/pata_cmd64x.c
+@@ -121,14 +121,8 @@ static void cmd64x_set_timing(struct ata
+
+ if (pair) {
+ struct ata_timing tp;
+-
+ ata_timing_compute(pair, pair->pio_mode, &tp, T, 0);
+ ata_timing_merge(&t, &tp, &t, ATA_TIMING_SETUP);
+- if (pair->dma_mode) {
+- ata_timing_compute(pair, pair->dma_mode,
+- &tp, T, 0);
+- ata_timing_merge(&tp, &t, &t, ATA_TIMING_SETUP);
+- }
+ }
+ }
+
--- /dev/null
+From fcd097f31a6ee207cc0c3da9cccd2a86d4334785 Mon Sep 17 00:00:00 2001
+From: Ben Hutchings <bhutchings@solarflare.com>
+Date: Thu, 17 Jun 2010 20:16:36 +0100
+Subject: PCI: MSI: Remove unsafe and unnecessary hardware access
+
+From: Ben Hutchings <bhutchings@solarflare.com>
+
+commit fcd097f31a6ee207cc0c3da9cccd2a86d4334785 upstream.
+
+During suspend on an SMP system, {read,write}_msi_msg_desc() may be
+called to mask and unmask interrupts on a device that is already in a
+reduced power state. At this point memory-mapped registers including
+MSI-X tables are not accessible, and config space may not be fully
+functional either.
+
+While a device is in a reduced power state its interrupts are
+effectively masked and its MSI(-X) state will be restored when it is
+brought back to D0. Therefore these functions can simply read and
+write msi_desc::msg for devices not in D0.
+
+Further, read_msi_msg_desc() should only ever be used to update a
+previously written message, so it can always read msi_desc::msg
+and never needs to touch the hardware.
+
+Tested-by: "Michael Chan" <mchan@broadcom.com>
+Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
+Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ drivers/pci/msi.c | 36 ++++++++++++------------------------
+ 1 file changed, 12 insertions(+), 24 deletions(-)
+
+--- a/drivers/pci/msi.c
++++ b/drivers/pci/msi.c
+@@ -196,30 +196,15 @@ void unmask_msi_irq(unsigned int irq)
+ void read_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg)
+ {
+ struct msi_desc *entry = get_irq_desc_msi(desc);
+- if (entry->msi_attrib.is_msix) {
+- void __iomem *base = entry->mask_base +
+- entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE;
+
+- msg->address_lo = readl(base + PCI_MSIX_ENTRY_LOWER_ADDR);
+- msg->address_hi = readl(base + PCI_MSIX_ENTRY_UPPER_ADDR);
+- msg->data = readl(base + PCI_MSIX_ENTRY_DATA);
+- } else {
+- struct pci_dev *dev = entry->dev;
+- int pos = entry->msi_attrib.pos;
+- u16 data;
+-
+- pci_read_config_dword(dev, msi_lower_address_reg(pos),
+- &msg->address_lo);
+- if (entry->msi_attrib.is_64) {
+- pci_read_config_dword(dev, msi_upper_address_reg(pos),
+- &msg->address_hi);
+- pci_read_config_word(dev, msi_data_reg(pos, 1), &data);
+- } else {
+- msg->address_hi = 0;
+- pci_read_config_word(dev, msi_data_reg(pos, 0), &data);
+- }
+- msg->data = data;
+- }
++ /* We do not touch the hardware (which may not even be
++ * accessible at the moment) but return the last message
++ * written. Assert that this is valid, assuming that
++ * valid messages are not all-zeroes. */
++ BUG_ON(!(entry->msg.address_hi | entry->msg.address_lo |
++ entry->msg.data));
++
++ *msg = entry->msg;
+ }
+
+ void read_msi_msg(unsigned int irq, struct msi_msg *msg)
+@@ -232,7 +217,10 @@ void read_msi_msg(unsigned int irq, stru
+ void write_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg)
+ {
+ struct msi_desc *entry = get_irq_desc_msi(desc);
+- if (entry->msi_attrib.is_msix) {
++
++ if (entry->dev->current_state != PCI_D0) {
++ /* Don't touch the hardware now */
++ } else if (entry->msi_attrib.is_msix) {
+ void __iomem *base;
+ base = entry->mask_base +
+ entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE;
--- /dev/null
+From 30da55242818a8ca08583188ebcbaccd283ad4d9 Mon Sep 17 00:00:00 2001
+From: Ben Hutchings <bhutchings@solarflare.com>
+Date: Fri, 23 Jul 2010 14:56:28 +0100
+Subject: PCI: MSI: Restore read_msi_msg_desc(); add get_cached_msi_msg_desc()
+
+From: Ben Hutchings <bhutchings@solarflare.com>
+
+commit 30da55242818a8ca08583188ebcbaccd283ad4d9 upstream.
+
+commit 2ca1af9aa3285c6a5f103ed31ad09f7399fc65d7 "PCI: MSI: Remove
+unsafe and unnecessary hardware access" changed read_msi_msg_desc() to
+return the last MSI message written instead of reading it from the
+device, since it may be called while the device is in a reduced
+power state.
+
+However, the pSeries platform code really does need to read messages
+from the device, since they are initially written by firmware.
+Therefore:
+- Restore the previous behaviour of read_msi_msg_desc()
+- Add new functions get_cached_msi_msg{,_desc}() which return the
+ last MSI message written
+- Use the new functions where appropriate
+
+Acked-by: Michael Ellerman <michael@ellerman.id.au>
+Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
+Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ arch/ia64/kernel/msi_ia64.c | 2 -
+ arch/ia64/sn/kernel/msi_sn.c | 2 -
+ arch/x86/kernel/apic/io_apic.c | 2 -
+ drivers/pci/msi.c | 47 ++++++++++++++++++++++++++++++++++++-----
+ include/linux/msi.h | 2 +
+ 5 files changed, 47 insertions(+), 8 deletions(-)
+
+--- a/arch/ia64/kernel/msi_ia64.c
++++ b/arch/ia64/kernel/msi_ia64.c
+@@ -25,7 +25,7 @@ static int ia64_set_msi_irq_affinity(uns
+ if (irq_prepare_move(irq, cpu))
+ return -1;
+
+- read_msi_msg(irq, &msg);
++ get_cached_msi_msg(irq, &msg);
+
+ addr = msg.address_lo;
+ addr &= MSI_ADDR_DEST_ID_MASK;
+--- a/arch/ia64/sn/kernel/msi_sn.c
++++ b/arch/ia64/sn/kernel/msi_sn.c
+@@ -175,7 +175,7 @@ static int sn_set_msi_irq_affinity(unsig
+ * Release XIO resources for the old MSI PCI address
+ */
+
+- read_msi_msg(irq, &msg);
++ get_cached_msi_msg(irq, &msg);
+ sn_pdev = (struct pcidev_info *)sn_irq_info->irq_pciioinfo;
+ pdev = sn_pdev->pdi_linux_pcidev;
+ provider = SN_PCIDEV_BUSPROVIDER(pdev);
+--- a/arch/x86/kernel/apic/io_apic.c
++++ b/arch/x86/kernel/apic/io_apic.c
+@@ -3399,7 +3399,7 @@ static int set_msi_irq_affinity(unsigned
+
+ cfg = desc->chip_data;
+
+- read_msi_msg_desc(desc, &msg);
++ get_cached_msi_msg_desc(desc, &msg);
+
+ msg.data &= ~MSI_DATA_VECTOR_MASK;
+ msg.data |= MSI_DATA_VECTOR(cfg->vector);
+--- a/drivers/pci/msi.c
++++ b/drivers/pci/msi.c
+@@ -197,9 +197,46 @@ void read_msi_msg_desc(struct irq_desc *
+ {
+ struct msi_desc *entry = get_irq_desc_msi(desc);
+
+- /* We do not touch the hardware (which may not even be
+- * accessible at the moment) but return the last message
+- * written. Assert that this is valid, assuming that
++ BUG_ON(entry->dev->current_state != PCI_D0);
++
++ if (entry->msi_attrib.is_msix) {
++ void __iomem *base = entry->mask_base +
++ entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE;
++
++ msg->address_lo = readl(base + PCI_MSIX_ENTRY_LOWER_ADDR);
++ msg->address_hi = readl(base + PCI_MSIX_ENTRY_UPPER_ADDR);
++ msg->data = readl(base + PCI_MSIX_ENTRY_DATA);
++ } else {
++ struct pci_dev *dev = entry->dev;
++ int pos = entry->msi_attrib.pos;
++ u16 data;
++
++ pci_read_config_dword(dev, msi_lower_address_reg(pos),
++ &msg->address_lo);
++ if (entry->msi_attrib.is_64) {
++ pci_read_config_dword(dev, msi_upper_address_reg(pos),
++ &msg->address_hi);
++ pci_read_config_word(dev, msi_data_reg(pos, 1), &data);
++ } else {
++ msg->address_hi = 0;
++ pci_read_config_word(dev, msi_data_reg(pos, 0), &data);
++ }
++ msg->data = data;
++ }
++}
++
++void read_msi_msg(unsigned int irq, struct msi_msg *msg)
++{
++ struct irq_desc *desc = irq_to_desc(irq);
++
++ read_msi_msg_desc(desc, msg);
++}
++
++void get_cached_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg)
++{
++ struct msi_desc *entry = get_irq_desc_msi(desc);
++
++ /* Assert that the cache is valid, assuming that
+ * valid messages are not all-zeroes. */
+ BUG_ON(!(entry->msg.address_hi | entry->msg.address_lo |
+ entry->msg.data));
+@@ -207,11 +244,11 @@ void read_msi_msg_desc(struct irq_desc *
+ *msg = entry->msg;
+ }
+
+-void read_msi_msg(unsigned int irq, struct msi_msg *msg)
++void get_cached_msi_msg(unsigned int irq, struct msi_msg *msg)
+ {
+ struct irq_desc *desc = irq_to_desc(irq);
+
+- read_msi_msg_desc(desc, msg);
++ get_cached_msi_msg_desc(desc, msg);
+ }
+
+ void write_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg)
+--- a/include/linux/msi.h
++++ b/include/linux/msi.h
+@@ -14,8 +14,10 @@ struct irq_desc;
+ extern void mask_msi_irq(unsigned int irq);
+ extern void unmask_msi_irq(unsigned int irq);
+ extern void read_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg);
++extern void get_cached_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg);
+ extern void write_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg);
+ extern void read_msi_msg(unsigned int irq, struct msi_msg *msg);
++extern void get_cached_msi_msg(unsigned int irq, struct msi_msg *msg);
+ extern void write_msi_msg(unsigned int irq, struct msi_msg *msg);
+
+ struct msi_desc {
--- /dev/null
+From 8d330919927ea31fa083b5a80084dc991da813a0 Mon Sep 17 00:00:00 2001
+From: Lin Ming <ming.m.lin@intel.com>
+Date: Wed, 25 Aug 2010 21:06:32 +0000
+Subject: perf, x86, Pentium4: Clear the P4_CCCR_FORCE_OVF flag
+
+From: Lin Ming <ming.m.lin@intel.com>
+
+commit 8d330919927ea31fa083b5a80084dc991da813a0 upstream.
+
+If on Pentium4 CPUs the FORCE_OVF flag is set then an NMI happens
+on every event, which can generate a flood of NMIs. Clear it.
+
+Reported-by: Vince Weaver <vweaver1@eecs.utk.edu>
+Signed-off-by: Lin Ming <ming.m.lin@intel.com>
+Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
+Cc: Frederic Weisbecker <fweisbec@gmail.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ arch/x86/kernel/cpu/perf_event_p4.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/arch/x86/kernel/cpu/perf_event_p4.c
++++ b/arch/x86/kernel/cpu/perf_event_p4.c
+@@ -457,6 +457,8 @@ static int p4_hw_config(struct perf_even
+ event->hw.config |= event->attr.config &
+ (p4_config_pack_escr(P4_ESCR_MASK_HT) |
+ p4_config_pack_cccr(P4_CCCR_MASK_HT));
++
++ event->hw.config &= ~P4_CCCR_FORCE_OVF;
+ }
+
+ rc = x86_setup_perfctr(event);
--- /dev/null
+From 44b733809a5aba7f6b15a548d31a56d25bf3851c Mon Sep 17 00:00:00 2001
+From: Mark Lord <kernel@teksavvy.com>
+Date: Thu, 19 Aug 2010 21:40:44 -0400
+Subject: sata_mv: fix broken DSM/TRIM support (v2)
+
+From: Mark Lord <kernel@teksavvy.com>
+
+commit 44b733809a5aba7f6b15a548d31a56d25bf3851c upstream.
+
+Fix DSM/TRIM commands in sata_mv (v2).
+These need to be issued using old-school "BM DMA",
+rather than via the EDMA host queue.
+
+Since the chips don't have proper BM DMA status,
+we need to be more careful with setting the ATA_DMA_INTR bit,
+since DSM/TRIM often has a long delay between "DMA complete"
+and "command complete".
+
+GEN_I chips don't have BM DMA, so no TRIM for them.
+
+Signed-off-by: Mark Lord <mlord@pobox.com>
+Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ drivers/ata/sata_mv.c | 44 +++++++++++++++++++++++++++++++++++++-------
+ 1 file changed, 37 insertions(+), 7 deletions(-)
+
+--- a/drivers/ata/sata_mv.c
++++ b/drivers/ata/sata_mv.c
+@@ -1898,19 +1898,25 @@ static void mv_bmdma_start(struct ata_qu
+ * LOCKING:
+ * Inherited from caller.
+ */
+-static void mv_bmdma_stop(struct ata_queued_cmd *qc)
++static void mv_bmdma_stop_ap(struct ata_port *ap)
+ {
+- struct ata_port *ap = qc->ap;
+ void __iomem *port_mmio = mv_ap_base(ap);
+ u32 cmd;
+
+ /* clear start/stop bit */
+ cmd = readl(port_mmio + BMDMA_CMD);
+- cmd &= ~ATA_DMA_START;
+- writelfl(cmd, port_mmio + BMDMA_CMD);
++ if (cmd & ATA_DMA_START) {
++ cmd &= ~ATA_DMA_START;
++ writelfl(cmd, port_mmio + BMDMA_CMD);
+
+- /* one-PIO-cycle guaranteed wait, per spec, for HDMA1:0 transition */
+- ata_sff_dma_pause(ap);
++ /* one-PIO-cycle guaranteed wait, per spec, for HDMA1:0 transition */
++ ata_sff_dma_pause(ap);
++ }
++}
++
++static void mv_bmdma_stop(struct ata_queued_cmd *qc)
++{
++ mv_bmdma_stop_ap(qc->ap);
+ }
+
+ /**
+@@ -1934,8 +1940,21 @@ static u8 mv_bmdma_status(struct ata_por
+ reg = readl(port_mmio + BMDMA_STATUS);
+ if (reg & ATA_DMA_ACTIVE)
+ status = ATA_DMA_ACTIVE;
+- else
++ else if (reg & ATA_DMA_ERR)
+ status = (reg & ATA_DMA_ERR) | ATA_DMA_INTR;
++ else {
++ /*
++ * Just because DMA_ACTIVE is 0 (DMA completed),
++ * this does _not_ mean the device is "done".
++ * So we should not yet be signalling ATA_DMA_INTR
++ * in some cases. Eg. DSM/TRIM, and perhaps others.
++ */
++ mv_bmdma_stop_ap(ap);
++ if (ioread8(ap->ioaddr.altstatus_addr) & ATA_BUSY)
++ status = 0;
++ else
++ status = ATA_DMA_INTR;
++ }
+ return status;
+ }
+
+@@ -1995,6 +2014,9 @@ static void mv_qc_prep(struct ata_queued
+
+ switch (tf->protocol) {
+ case ATA_PROT_DMA:
++ if (tf->command == ATA_CMD_DSM)
++ return;
++ /* fall-thru */
+ case ATA_PROT_NCQ:
+ break; /* continue below */
+ case ATA_PROT_PIO:
+@@ -2094,6 +2116,8 @@ static void mv_qc_prep_iie(struct ata_qu
+ if ((tf->protocol != ATA_PROT_DMA) &&
+ (tf->protocol != ATA_PROT_NCQ))
+ return;
++ if (tf->command == ATA_CMD_DSM)
++ return; /* use bmdma for this */
+
+ /* Fill in Gen IIE command request block */
+ if (!(tf->flags & ATA_TFLAG_WRITE))
+@@ -2289,6 +2313,12 @@ static unsigned int mv_qc_issue(struct a
+
+ switch (qc->tf.protocol) {
+ case ATA_PROT_DMA:
++ if (qc->tf.command == ATA_CMD_DSM) {
++ if (!ap->ops->bmdma_setup) /* no bmdma on GEN_I */
++ return AC_ERR_OTHER;
++ break; /* use bmdma for this */
++ }
++ /* fall thru */
+ case ATA_PROT_NCQ:
+ mv_start_edma(ap, port_mmio, pp, qc->tf.protocol);
+ pp->req_idx = (pp->req_idx + 1) & MV_MAX_Q_DEPTH_MASK;
--- /dev/null
+From 546a1924224078c6f582e68f890b05b387b42653 Mon Sep 17 00:00:00 2001
+From: Dave Chinner <dchinner@redhat.com>
+Date: Tue, 24 Aug 2010 11:44:34 +1000
+Subject: writeback: write_cache_pages doesn't terminate at nr_to_write <= 0
+
+From: Dave Chinner <dchinner@redhat.com>
+
+commit 546a1924224078c6f582e68f890b05b387b42653 upstream.
+
+I noticed XFS writeback in 2.6.36-rc1 was much slower than it should have
+been. Enabling writeback tracing showed:
+
+ flush-253:16-8516 [007] 1342952.351608: wbc_writepage: bdi 253:16: towrt=1024 skip=0 mode=0 kupd=0 bgrd=1 reclm=0 cyclic=1 more=0 older=0x0 start=0x0 end=0x0
+ flush-253:16-8516 [007] 1342952.351654: wbc_writepage: bdi 253:16: towrt=1023 skip=0 mode=0 kupd=0 bgrd=1 reclm=0 cyclic=1 more=0 older=0x0 start=0x0 end=0x0
+ flush-253:16-8516 [000] 1342952.369520: wbc_writepage: bdi 253:16: towrt=0 skip=0 mode=0 kupd=0 bgrd=1 reclm=0 cyclic=1 more=0 older=0x0 start=0x0 end=0x0
+ flush-253:16-8516 [000] 1342952.369542: wbc_writepage: bdi 253:16: towrt=-1 skip=0 mode=0 kupd=0 bgrd=1 reclm=0 cyclic=1 more=0 older=0x0 start=0x0 end=0x0
+ flush-253:16-8516 [000] 1342952.369549: wbc_writepage: bdi 253:16: towrt=-2 skip=0 mode=0 kupd=0 bgrd=1 reclm=0 cyclic=1 more=0 older=0x0 start=0x0 end=0x0
+
+Writeback is not terminating in background writeback if ->writepage is
+returning with wbc->nr_to_write == 0, resulting in sub-optimal single page
+writeback on XFS.
+
+Fix the write_cache_pages loop to terminate correctly when this situation
+occurs and so prevent this sub-optimal background writeback pattern. This
+improves sustained sequential buffered write performance from around
+250MB/s to 750MB/s for a 100GB file on an XFS filesystem on my 8p test VM.
+
+Signed-off-by: Dave Chinner <dchinner@redhat.com>
+Reviewed-by: Wu Fengguang <fengguang.wu@intel.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ mm/page-writeback.c | 26 ++++++++++----------------
+ 1 file changed, 10 insertions(+), 16 deletions(-)
+
+--- a/mm/page-writeback.c
++++ b/mm/page-writeback.c
+@@ -949,22 +949,16 @@ continue_unlock:
+ }
+ }
+
+- if (wbc->nr_to_write > 0) {
+- if (--wbc->nr_to_write == 0 &&
+- wbc->sync_mode == WB_SYNC_NONE) {
+- /*
+- * We stop writing back only if we are
+- * not doing integrity sync. In case of
+- * integrity sync we have to keep going
+- * because someone may be concurrently
+- * dirtying pages, and we might have
+- * synced a lot of newly appeared dirty
+- * pages, but have not synced all of the
+- * old dirty pages.
+- */
+- done = 1;
+- break;
+- }
++ /*
++ * We stop writing back only if we are not doing
++ * integrity sync. In case of integrity sync we have to
++ * keep going until we have written all the pages
++ * we tagged for writeback prior to entering this loop.
++ */
++ if (--wbc->nr_to_write <= 0 &&
++ wbc->sync_mode == WB_SYNC_NONE) {
++ done = 1;
++ break;
+ }
+ }
+ pagevec_release(&pvec);
--- /dev/null
+From cd7240c0b900eb6d690ccee088a6c9b46dae815a Mon Sep 17 00:00:00 2001
+From: Suresh Siddha <suresh.b.siddha@intel.com>
+Date: Thu, 19 Aug 2010 17:03:38 -0700
+Subject: x86, tsc, sched: Recompute cyc2ns_offset's during resume from sleep states
+
+From: Suresh Siddha <suresh.b.siddha@intel.com>
+
+commit cd7240c0b900eb6d690ccee088a6c9b46dae815a upstream.
+
+TSC's get reset after suspend/resume (even on cpu's with invariant TSC
+which runs at a constant rate across ACPI P-, C- and T-states). And in
+some systems BIOS seem to reinit TSC to arbitrary large value (still
+sync'd across cpu's) during resume.
+
+This leads to a scenario of scheduler rq->clock (sched_clock_cpu()) less
+than rq->age_stamp (introduced in 2.6.32). This leads to a big value
+returned by scale_rt_power() and the resulting big group power set by the
+update_group_power() is causing improper load balancing between busy and
+idle cpu's after suspend/resume.
+
+This resulted in multi-threaded workloads (like kernel-compilation) go
+slower after suspend/resume cycle on core i5 laptops.
+
+Fix this by recomputing cyc2ns_offset's during resume, so that
+sched_clock() continues from the point where it was left off during
+suspend.
+
+Reported-by: Florian Pritz <flo@xssn.at>
+Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
+Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
+LKML-Reference: <1282262618.2675.24.camel@sbsiddha-MOBL3.sc.intel.com>
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ arch/x86/include/asm/tsc.h | 2 ++
+ arch/x86/kernel/tsc.c | 38 ++++++++++++++++++++++++++++++++++++++
+ arch/x86/power/cpu.c | 2 ++
+ 3 files changed, 42 insertions(+)
+
+--- a/arch/x86/include/asm/tsc.h
++++ b/arch/x86/include/asm/tsc.h
+@@ -59,5 +59,7 @@ extern void check_tsc_sync_source(int cp
+ extern void check_tsc_sync_target(void);
+
+ extern int notsc_setup(char *);
++extern void save_sched_clock_state(void);
++extern void restore_sched_clock_state(void);
+
+ #endif /* _ASM_X86_TSC_H */
+--- a/arch/x86/kernel/tsc.c
++++ b/arch/x86/kernel/tsc.c
+@@ -626,6 +626,44 @@ static void set_cyc2ns_scale(unsigned lo
+ local_irq_restore(flags);
+ }
+
++static unsigned long long cyc2ns_suspend;
++
++void save_sched_clock_state(void)
++{
++ if (!sched_clock_stable)
++ return;
++
++ cyc2ns_suspend = sched_clock();
++}
++
++/*
++ * Even on processors with invariant TSC, TSC gets reset in some the
++ * ACPI system sleep states. And in some systems BIOS seem to reinit TSC to
++ * arbitrary value (still sync'd across cpu's) during resume from such sleep
++ * states. To cope up with this, recompute the cyc2ns_offset for each cpu so
++ * that sched_clock() continues from the point where it was left off during
++ * suspend.
++ */
++void restore_sched_clock_state(void)
++{
++ unsigned long long offset;
++ unsigned long flags;
++ int cpu;
++
++ if (!sched_clock_stable)
++ return;
++
++ local_irq_save(flags);
++
++ get_cpu_var(cyc2ns_offset) = 0;
++ offset = cyc2ns_suspend - sched_clock();
++
++ for_each_possible_cpu(cpu)
++ per_cpu(cyc2ns_offset, cpu) = offset;
++
++ local_irq_restore(flags);
++}
++
+ #ifdef CONFIG_CPU_FREQ
+
+ /* Frequency scaling support. Adjust the TSC based timer when the cpu frequency
+--- a/arch/x86/power/cpu.c
++++ b/arch/x86/power/cpu.c
+@@ -113,6 +113,7 @@ static void __save_processor_state(struc
+ void save_processor_state(void)
+ {
+ __save_processor_state(&saved_context);
++ save_sched_clock_state();
+ }
+ #ifdef CONFIG_X86_32
+ EXPORT_SYMBOL(save_processor_state);
+@@ -229,6 +230,7 @@ static void __restore_processor_state(st
+ void restore_processor_state(void)
+ {
+ __restore_processor_state(&saved_context);
++ restore_sched_clock_state();
+ }
+ #ifdef CONFIG_X86_32
+ EXPORT_SYMBOL(restore_processor_state);
--- /dev/null
+From dffe2e1e1a1ddb566a76266136c312801c66dcf7 Mon Sep 17 00:00:00 2001
+From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
+Date: Fri, 20 Aug 2010 19:10:01 -0700
+Subject: xen: handle events as edge-triggered
+
+From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
+
+commit dffe2e1e1a1ddb566a76266136c312801c66dcf7 upstream.
+
+Xen events are logically edge triggered, as Xen only calls the event
+upcall when an event is newly set, but not continuously as it remains set.
+As a result, use handle_edge_irq rather than handle_level_irq.
+
+This has the important side-effect of fixing a long-standing bug of
+events getting lost if:
+ - an event's interrupt handler is running
+ - the event is migrated to a different vcpu
+ - the event is re-triggered
+
+The most noticable symptom of these lost events is occasional lockups
+of blkfront.
+
+Many thanks to Tom Kopec and Daniel Stodden in tracking this down.
+
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
+Cc: Tom Kopec <tek@acm.org>
+Cc: Daniel Stodden <daniel.stodden@citrix.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ drivers/xen/events.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/xen/events.c
++++ b/drivers/xen/events.c
+@@ -363,7 +363,7 @@ int bind_evtchn_to_irq(unsigned int evtc
+ irq = find_unbound_irq();
+
+ set_irq_chip_and_handler_name(irq, &xen_dynamic_chip,
+- handle_level_irq, "event");
++ handle_edge_irq, "event");
+
+ evtchn_to_irq[evtchn] = irq;
+ irq_info[irq] = mk_evtchn_info(evtchn);
--- /dev/null
+From aaca49642b92c8a57d3ca5029a5a94019c7af69f Mon Sep 17 00:00:00 2001
+From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
+Date: Fri, 20 Aug 2010 18:57:53 -0700
+Subject: xen: use percpu interrupts for IPIs and VIRQs
+
+From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
+
+commit aaca49642b92c8a57d3ca5029a5a94019c7af69f upstream.
+
+IPIs and VIRQs are inherently per-cpu event types, so treat them as such:
+ - use a specific percpu irq_chip implementation, and
+ - handle them with handle_percpu_irq
+
+This makes the path for delivering these interrupts more efficient
+(no masking/unmasking, no locks), and it avoid problems with attempts
+to migrate them.
+
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ drivers/xen/events.c | 19 +++++++++++++++----
+ 1 file changed, 15 insertions(+), 4 deletions(-)
+
+--- a/drivers/xen/events.c
++++ b/drivers/xen/events.c
+@@ -107,6 +107,7 @@ static inline unsigned long *cpu_evtchn_
+ #define VALID_EVTCHN(chn) ((chn) != 0)
+
+ static struct irq_chip xen_dynamic_chip;
++static struct irq_chip xen_percpu_chip;
+
+ /* Constructor for packed IRQ information. */
+ static struct irq_info mk_unbound_info(void)
+@@ -389,8 +390,8 @@ static int bind_ipi_to_irq(unsigned int
+ if (irq < 0)
+ goto out;
+
+- set_irq_chip_and_handler_name(irq, &xen_dynamic_chip,
+- handle_level_irq, "ipi");
++ set_irq_chip_and_handler_name(irq, &xen_percpu_chip,
++ handle_percpu_irq, "ipi");
+
+ bind_ipi.vcpu = cpu;
+ if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi,
+@@ -430,8 +431,8 @@ static int bind_virq_to_irq(unsigned int
+
+ irq = find_unbound_irq();
+
+- set_irq_chip_and_handler_name(irq, &xen_dynamic_chip,
+- handle_level_irq, "virq");
++ set_irq_chip_and_handler_name(irq, &xen_percpu_chip,
++ handle_percpu_irq, "virq");
+
+ evtchn_to_irq[evtchn] = irq;
+ irq_info[irq] = mk_virq_info(evtchn, virq);
+@@ -934,6 +935,16 @@ static struct irq_chip xen_dynamic_chip
+ .retrigger = retrigger_dynirq,
+ };
+
++static struct irq_chip en_percpu_chip __read_mostly = {
++ .name = "xen-percpu",
++
++ .disable = disable_dynirq,
++ .mask = disable_dynirq,
++ .unmask = enable_dynirq,
++
++ .ack = ack_dynirq,
++};
++
+ void __init xen_init_IRQ(void)
+ {
+ int i;
--- /dev/null
+From 5b3eed756cd37255cad1181bd86bfd0977e97953 Mon Sep 17 00:00:00 2001
+From: Dave Chinner <dchinner@redhat.com>
+Date: Tue, 24 Aug 2010 11:42:41 +1000
+Subject: xfs: ensure we mark all inodes in a freed cluster XFS_ISTALE
+
+From: Dave Chinner <dchinner@redhat.com>
+
+commit 5b3eed756cd37255cad1181bd86bfd0977e97953 upstream.
+
+Under heavy load parallel metadata loads (e.g. dbench), we can fail
+to mark all the inodes in a cluster being freed as XFS_ISTALE as we
+skip inodes we cannot get the XFS_ILOCK_EXCL or the flush lock on.
+When this happens and the inode cluster buffer has already been
+marked stale and freed, inode reclaim can try to write the inode out
+as it is dirty and not marked stale. This can result in writing th
+metadata to an freed extent, or in the case it has already
+been overwritten trigger a magic number check failure and return an
+EUCLEAN error such as:
+
+Filesystem "ram0": inode 0x442ba1 background reclaim flush failed with 117
+
+Fix this by ensuring that we hoover up all in memory inodes in the
+cluster and mark them XFS_ISTALE when freeing the cluster.
+
+Signed-off-by: Dave Chinner <dchinner@redhat.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/xfs/xfs_inode.c | 49 ++++++++++++++++++++++++++-----------------------
+ 1 file changed, 26 insertions(+), 23 deletions(-)
+
+--- a/fs/xfs/xfs_inode.c
++++ b/fs/xfs/xfs_inode.c
+@@ -1927,6 +1927,11 @@ xfs_iunlink_remove(
+ return 0;
+ }
+
++/*
++ * A big issue when freeing the inode cluster is is that we _cannot_ skip any
++ * inodes that are in memory - they all must be marked stale and attached to
++ * the cluster buffer.
++ */
+ STATIC void
+ xfs_ifree_cluster(
+ xfs_inode_t *free_ip,
+@@ -1958,8 +1963,6 @@ xfs_ifree_cluster(
+ }
+
+ for (j = 0; j < nbufs; j++, inum += ninodes) {
+- int found = 0;
+-
+ blkno = XFS_AGB_TO_DADDR(mp, XFS_INO_TO_AGNO(mp, inum),
+ XFS_INO_TO_AGBNO(mp, inum));
+
+@@ -1978,7 +1981,9 @@ xfs_ifree_cluster(
+ /*
+ * Walk the inodes already attached to the buffer and mark them
+ * stale. These will all have the flush locks held, so an
+- * in-memory inode walk can't lock them.
++ * in-memory inode walk can't lock them. By marking them all
++ * stale first, we will not attempt to lock them in the loop
++ * below as the XFS_ISTALE flag will be set.
+ */
+ lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *);
+ while (lip) {
+@@ -1990,11 +1995,11 @@ xfs_ifree_cluster(
+ &iip->ili_flush_lsn,
+ &iip->ili_item.li_lsn);
+ xfs_iflags_set(iip->ili_inode, XFS_ISTALE);
+- found++;
+ }
+ lip = lip->li_bio_list;
+ }
+
++
+ /*
+ * For each inode in memory attempt to add it to the inode
+ * buffer and set it up for being staled on buffer IO
+@@ -2006,6 +2011,7 @@ xfs_ifree_cluster(
+ * even trying to lock them.
+ */
+ for (i = 0; i < ninodes; i++) {
++retry:
+ read_lock(&pag->pag_ici_lock);
+ ip = radix_tree_lookup(&pag->pag_ici_root,
+ XFS_INO_TO_AGINO(mp, (inum + i)));
+@@ -2016,38 +2022,36 @@ xfs_ifree_cluster(
+ continue;
+ }
+
+- /* don't try to lock/unlock the current inode */
++ /*
++ * Don't try to lock/unlock the current inode, but we
++ * _cannot_ skip the other inodes that we did not find
++ * in the list attached to the buffer and are not
++ * already marked stale. If we can't lock it, back off
++ * and retry.
++ */
+ if (ip != free_ip &&
+ !xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) {
+ read_unlock(&pag->pag_ici_lock);
+- continue;
++ delay(1);
++ goto retry;
+ }
+ read_unlock(&pag->pag_ici_lock);
+
+- if (!xfs_iflock_nowait(ip)) {
+- if (ip != free_ip)
+- xfs_iunlock(ip, XFS_ILOCK_EXCL);
+- continue;
+- }
+-
++ xfs_iflock(ip);
+ xfs_iflags_set(ip, XFS_ISTALE);
+- if (xfs_inode_clean(ip)) {
+- ASSERT(ip != free_ip);
+- xfs_ifunlock(ip);
+- xfs_iunlock(ip, XFS_ILOCK_EXCL);
+- continue;
+- }
+
++ /*
++ * we don't need to attach clean inodes or those only
++ * with unlogged changes (which we throw away, anyway).
++ */
+ iip = ip->i_itemp;
+- if (!iip) {
+- /* inode with unlogged changes only */
++ if (!iip || xfs_inode_clean(ip)) {
+ ASSERT(ip != free_ip);
+ ip->i_update_core = 0;
+ xfs_ifunlock(ip);
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ continue;
+ }
+- found++;
+
+ iip->ili_last_fields = iip->ili_format.ilf_fields;
+ iip->ili_format.ilf_fields = 0;
+@@ -2063,8 +2067,7 @@ xfs_ifree_cluster(
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ }
+
+- if (found)
+- xfs_trans_stale_inode_buf(tp, bp);
++ xfs_trans_stale_inode_buf(tp, bp);
+ xfs_trans_binval(tp, bp);
+ }
+
--- /dev/null
+From 4536f2ad8b330453d7ebec0746c4374eadd649b1 Mon Sep 17 00:00:00 2001
+From: Dave Chinner <dchinner@redhat.com>
+Date: Tue, 24 Aug 2010 11:42:30 +1000
+Subject: xfs: fix untrusted inode number lookup
+
+From: Dave Chinner <dchinner@redhat.com>
+
+commit 4536f2ad8b330453d7ebec0746c4374eadd649b1 upstream.
+
+Commit 7124fe0a5b619d65b739477b3b55a20bf805b06d ("xfs: validate untrusted inode
+numbers during lookup") changes the inode lookup code to do btree lookups for
+untrusted inode numbers. This change made an invalid assumption about the
+alignment of inodes and hence incorrectly calculated the first inode in the
+cluster. As a result, some inode numbers were being incorrectly considered
+invalid when they were actually valid.
+
+The issue was not picked up by the xfstests suite because it always runs fsr
+and dump (the two utilities that utilise the bulkstat interface) on cache hot
+inodes and hence the lookup code in the cold cache path was not sufficiently
+exercised to uncover this intermittent problem.
+
+Fix the issue by relaxing the btree lookup criteria and then checking if the
+record returned contains the inode number we are lookup for. If it we get an
+incorrect record, then the inode number is invalid.
+
+Signed-off-by: Dave Chinner <dchinner@redhat.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/xfs/xfs_ialloc.c | 16 ++++++++++------
+ 1 file changed, 10 insertions(+), 6 deletions(-)
+
+--- a/fs/xfs/xfs_ialloc.c
++++ b/fs/xfs/xfs_ialloc.c
+@@ -1217,7 +1217,6 @@ xfs_imap_lookup(
+ struct xfs_inobt_rec_incore rec;
+ struct xfs_btree_cur *cur;
+ struct xfs_buf *agbp;
+- xfs_agino_t startino;
+ int error;
+ int i;
+
+@@ -1231,13 +1230,13 @@ xfs_imap_lookup(
+ }
+
+ /*
+- * derive and lookup the exact inode record for the given agino. If the
+- * record cannot be found, then it's an invalid inode number and we
+- * should abort.
++ * Lookup the inode record for the given agino. If the record cannot be
++ * found, then it's an invalid inode number and we should abort. Once
++ * we have a record, we need to ensure it contains the inode number
++ * we are looking up.
+ */
+ cur = xfs_inobt_init_cursor(mp, tp, agbp, agno);
+- startino = agino & ~(XFS_IALLOC_INODES(mp) - 1);
+- error = xfs_inobt_lookup(cur, startino, XFS_LOOKUP_EQ, &i);
++ error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &i);
+ if (!error) {
+ if (i)
+ error = xfs_inobt_get_rec(cur, &rec, &i);
+@@ -1250,6 +1249,11 @@ xfs_imap_lookup(
+ if (error)
+ return error;
+
++ /* check that the returned record contains the required inode */
++ if (rec.ir_startino > agino ||
++ rec.ir_startino + XFS_IALLOC_INODES(mp) <= agino)
++ return EINVAL;
++
+ /* for untrusted inodes check it is allocated first */
+ if ((flags & XFS_IGET_UNTRUSTED) &&
+ (rec.ir_free & XFS_INOBT_MASK(agino - rec.ir_startino)))
--- /dev/null
+From fb511f2150174b18b28ad54708c1adda0df39b17 Mon Sep 17 00:00:00 2001
+From: Christoph Hellwig <hch@infradead.org>
+Date: Sun, 18 Jul 2010 21:17:10 +0000
+Subject: xfs: move aio completion after unwritten extent conversion
+
+From: Christoph Hellwig <hch@infradead.org>
+
+commit fb511f2150174b18b28ad54708c1adda0df39b17 upstream.
+
+If we write into an unwritten extent using AIO we need to complete the AIO
+request after the extent conversion has finished. Without that a read could
+race to see see the extent still unwritten and return zeros. For synchronous
+I/O we already take care of that by flushing the xfsconvertd workqueue (which
+might be a bit of overkill).
+
+To do that add iocb and result fields to struct xfs_ioend, so that we can
+call aio_complete from xfs_end_io after the extent conversion has happened.
+Note that we need a new result field as io_error is used for positive errno
+values, while the AIO code can return negative error values and positive
+transfer sizes.
+
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Dave Chinner <dchinner@redhat.com>
+Signed-off-by: Alex Elder <aelder@sgi.com>
+Cc: Chuck Ebbert <cebbert@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/xfs/linux-2.6/xfs_aops.c | 19 ++++++++++++++++---
+ 1 file changed, 16 insertions(+), 3 deletions(-)
+
+--- a/fs/xfs/linux-2.6/xfs_aops.c
++++ b/fs/xfs/linux-2.6/xfs_aops.c
+@@ -275,8 +275,11 @@ xfs_end_io(
+ xfs_finish_ioend(ioend, 0);
+ /* ensure we don't spin on blocked ioends */
+ delay(1);
+- } else
++ } else {
++ if (ioend->io_iocb)
++ aio_complete(ioend->io_iocb, ioend->io_result, 0);
+ xfs_destroy_ioend(ioend);
++ }
+ }
+
+ /*
+@@ -309,6 +312,8 @@ xfs_alloc_ioend(
+ atomic_inc(&XFS_I(ioend->io_inode)->i_iocount);
+ ioend->io_offset = 0;
+ ioend->io_size = 0;
++ ioend->io_iocb = NULL;
++ ioend->io_result = 0;
+
+ INIT_WORK(&ioend->io_work, xfs_end_io);
+ return ioend;
+@@ -1604,6 +1609,7 @@ xfs_end_io_direct(
+ bool is_async)
+ {
+ xfs_ioend_t *ioend = iocb->private;
++ bool complete_aio = is_async;
+
+ /*
+ * Non-NULL private data means we need to issue a transaction to
+@@ -1629,7 +1635,14 @@ xfs_end_io_direct(
+ if (ioend->io_type == IO_READ) {
+ xfs_finish_ioend(ioend, 0);
+ } else if (private && size > 0) {
+- xfs_finish_ioend(ioend, is_sync_kiocb(iocb));
++ if (is_async) {
++ ioend->io_iocb = iocb;
++ ioend->io_result = ret;
++ complete_aio = false;
++ xfs_finish_ioend(ioend, 0);
++ } else {
++ xfs_finish_ioend(ioend, 1);
++ }
+ } else {
+ /*
+ * A direct I/O write ioend starts it's life in unwritten
+@@ -1648,7 +1661,7 @@ xfs_end_io_direct(
+ */
+ iocb->private = NULL;
+
+- if (is_async)
++ if (complete_aio)
+ aio_complete(iocb, ret, 0);
+ }
+