From 54a08b37e0b773e130be244df7d61d33b5badaaa Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 12 Sep 2014 18:48:59 -0700 Subject: [PATCH] 3.10-stable patches added patches: bluetooth-avoid-use-of-session-socket-after-the-session-gets-freed.patch bluetooth-never-linger-on-process-exit.patch md-raid10-fix-memory-leak-when-raid10-reshape-completes.patch md-raid10-fix-memory-leak-when-reshaping-a-raid10.patch md-raid6-avoid-data-corruption-during-recovery-of-double-degraded-raid6.patch rdma-iwcm-use-a-default-listen-backlog-if-needed.patch xfs-don-t-dirty-buffers-beyond-eof.patch xfs-don-t-zero-partial-page-cache-pages-during-o_direct-writes.patch xfs-don-t-zero-partial-page-cache-pages-during.patch xfs-quotacheck-leaves-dquot-buffers-without-verifiers.patch --- ...-socket-after-the-session-gets-freed.patch | 51 +++++++ ...uetooth-never-linger-on-process-exit.patch | 93 +++++++++++++ ...y-leak-when-raid10-reshape-completes.patch | 39 ++++++ ...-memory-leak-when-reshaping-a-raid10.patch | 40 ++++++ ...ng-recovery-of-double-degraded-raid6.patch | 48 +++++++ ...e-a-default-listen-backlog-if-needed.patch | 87 ++++++++++++ queue-3.10/series | 10 ++ .../xfs-don-t-dirty-buffers-beyond-eof.patch | 130 ++++++++++++++++++ ...e-cache-pages-during-o_direct-writes.patch | 47 +++++++ ...zero-partial-page-cache-pages-during.patch | 59 ++++++++ ...aves-dquot-buffers-without-verifiers.patch | 110 +++++++++++++++ 11 files changed, 714 insertions(+) create mode 100644 queue-3.10/bluetooth-avoid-use-of-session-socket-after-the-session-gets-freed.patch create mode 100644 queue-3.10/bluetooth-never-linger-on-process-exit.patch create mode 100644 queue-3.10/md-raid10-fix-memory-leak-when-raid10-reshape-completes.patch create mode 100644 queue-3.10/md-raid10-fix-memory-leak-when-reshaping-a-raid10.patch create mode 100644 queue-3.10/md-raid6-avoid-data-corruption-during-recovery-of-double-degraded-raid6.patch create mode 100644 queue-3.10/rdma-iwcm-use-a-default-listen-backlog-if-needed.patch create mode 100644 queue-3.10/xfs-don-t-dirty-buffers-beyond-eof.patch create mode 100644 queue-3.10/xfs-don-t-zero-partial-page-cache-pages-during-o_direct-writes.patch create mode 100644 queue-3.10/xfs-don-t-zero-partial-page-cache-pages-during.patch create mode 100644 queue-3.10/xfs-quotacheck-leaves-dquot-buffers-without-verifiers.patch diff --git a/queue-3.10/bluetooth-avoid-use-of-session-socket-after-the-session-gets-freed.patch b/queue-3.10/bluetooth-avoid-use-of-session-socket-after-the-session-gets-freed.patch new file mode 100644 index 00000000000..16f2d06bbdf --- /dev/null +++ b/queue-3.10/bluetooth-avoid-use-of-session-socket-after-the-session-gets-freed.patch @@ -0,0 +1,51 @@ +From 32333edb82fb2009980eefc5518100068147ab82 Mon Sep 17 00:00:00 2001 +From: Vignesh Raman +Date: Tue, 22 Jul 2014 19:24:25 +0530 +Subject: Bluetooth: Avoid use of session socket after the session gets freed + +From: Vignesh Raman + +commit 32333edb82fb2009980eefc5518100068147ab82 upstream. + +The commits 08c30aca9e698faddebd34f81e1196295f9dc063 "Bluetooth: Remove +RFCOMM session refcnt" and 8ff52f7d04d9cc31f1e81dcf9a2ba6335ed34905 +"Bluetooth: Return RFCOMM session ptrs to avoid freed session" +allow rfcomm_recv_ua and rfcomm_session_close to delete the session +(and free the corresponding socket) and propagate NULL session pointer +to the upper callers. + +Additional fix is required to terminate the loop in rfcomm_process_rx +function to avoid use of freed 'sk' memory. + +The issue is only reproducible with kernel option CONFIG_PAGE_POISONING +enabled making freed memory being changed and filled up with fixed char +value used to unmask use-after-free issues. + +Signed-off-by: Vignesh Raman +Signed-off-by: Vitaly Kuzmichev +Acked-by: Dean Jenkins +Signed-off-by: Marcel Holtmann +Signed-off-by: Greg Kroah-Hartman + +--- + net/bluetooth/rfcomm/core.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +--- a/net/bluetooth/rfcomm/core.c ++++ b/net/bluetooth/rfcomm/core.c +@@ -1857,10 +1857,13 @@ static struct rfcomm_session *rfcomm_pro + /* Get data directly from socket receive queue without copying it. */ + while ((skb = skb_dequeue(&sk->sk_receive_queue))) { + skb_orphan(skb); +- if (!skb_linearize(skb)) ++ if (!skb_linearize(skb)) { + s = rfcomm_recv_frame(s, skb); +- else ++ if (!s) ++ break; ++ } else { + kfree_skb(skb); ++ } + } + + if (s && (sk->sk_state == BT_CLOSED)) diff --git a/queue-3.10/bluetooth-never-linger-on-process-exit.patch b/queue-3.10/bluetooth-never-linger-on-process-exit.patch new file mode 100644 index 00000000000..73d10ba8a2f --- /dev/null +++ b/queue-3.10/bluetooth-never-linger-on-process-exit.patch @@ -0,0 +1,93 @@ +From 093facf3634da1b0c2cc7ed106f1983da901bbab Mon Sep 17 00:00:00 2001 +From: Vladimir Davydov +Date: Tue, 15 Jul 2014 12:25:28 +0400 +Subject: Bluetooth: never linger on process exit + +From: Vladimir Davydov + +commit 093facf3634da1b0c2cc7ed106f1983da901bbab upstream. + +If the current process is exiting, lingering on socket close will make +it unkillable, so we should avoid it. + +Reproducer: + + #include + #include + + #define BTPROTO_L2CAP 0 + #define BTPROTO_SCO 2 + #define BTPROTO_RFCOMM 3 + + int main() + { + int fd; + struct linger ling; + + fd = socket(PF_BLUETOOTH, SOCK_STREAM, BTPROTO_RFCOMM); + //or: fd = socket(PF_BLUETOOTH, SOCK_DGRAM, BTPROTO_L2CAP); + //or: fd = socket(PF_BLUETOOTH, SOCK_SEQPACKET, BTPROTO_SCO); + + ling.l_onoff = 1; + ling.l_linger = 1000000000; + setsockopt(fd, SOL_SOCKET, SO_LINGER, &ling, sizeof(ling)); + + return 0; + } + +Signed-off-by: Vladimir Davydov +Signed-off-by: Marcel Holtmann +Signed-off-by: Greg Kroah-Hartman + +--- + net/bluetooth/l2cap_sock.c | 3 ++- + net/bluetooth/rfcomm/sock.c | 3 ++- + net/bluetooth/sco.c | 6 ++++-- + 3 files changed, 8 insertions(+), 4 deletions(-) + +--- a/net/bluetooth/l2cap_sock.c ++++ b/net/bluetooth/l2cap_sock.c +@@ -887,7 +887,8 @@ static int l2cap_sock_shutdown(struct so + l2cap_chan_close(chan, 0); + lock_sock(sk); + +- if (sock_flag(sk, SOCK_LINGER) && sk->sk_lingertime) ++ if (sock_flag(sk, SOCK_LINGER) && sk->sk_lingertime && ++ !(current->flags & PF_EXITING)) + err = bt_sock_wait_state(sk, BT_CLOSED, + sk->sk_lingertime); + } +--- a/net/bluetooth/rfcomm/sock.c ++++ b/net/bluetooth/rfcomm/sock.c +@@ -887,7 +887,8 @@ static int rfcomm_sock_shutdown(struct s + sk->sk_shutdown = SHUTDOWN_MASK; + __rfcomm_sock_close(sk); + +- if (sock_flag(sk, SOCK_LINGER) && sk->sk_lingertime) ++ if (sock_flag(sk, SOCK_LINGER) && sk->sk_lingertime && ++ !(current->flags & PF_EXITING)) + err = bt_sock_wait_state(sk, BT_CLOSED, sk->sk_lingertime); + } + release_sock(sk); +--- a/net/bluetooth/sco.c ++++ b/net/bluetooth/sco.c +@@ -858,7 +858,8 @@ static int sco_sock_shutdown(struct sock + sco_sock_clear_timer(sk); + __sco_sock_close(sk); + +- if (sock_flag(sk, SOCK_LINGER) && sk->sk_lingertime) ++ if (sock_flag(sk, SOCK_LINGER) && sk->sk_lingertime && ++ !(current->flags & PF_EXITING)) + err = bt_sock_wait_state(sk, BT_CLOSED, + sk->sk_lingertime); + } +@@ -878,7 +879,8 @@ static int sco_sock_release(struct socke + + sco_sock_close(sk); + +- if (sock_flag(sk, SOCK_LINGER) && sk->sk_lingertime) { ++ if (sock_flag(sk, SOCK_LINGER) && sk->sk_lingertime && ++ !(current->flags & PF_EXITING)) { + lock_sock(sk); + err = bt_sock_wait_state(sk, BT_CLOSED, sk->sk_lingertime); + release_sock(sk); diff --git a/queue-3.10/md-raid10-fix-memory-leak-when-raid10-reshape-completes.patch b/queue-3.10/md-raid10-fix-memory-leak-when-raid10-reshape-completes.patch new file mode 100644 index 00000000000..ae1873476bd --- /dev/null +++ b/queue-3.10/md-raid10-fix-memory-leak-when-raid10-reshape-completes.patch @@ -0,0 +1,39 @@ +From b39685526f46976bcd13aa08c82480092befa46c Mon Sep 17 00:00:00 2001 +From: NeilBrown +Date: Mon, 18 Aug 2014 13:59:50 +1000 +Subject: md/raid10: Fix memory leak when raid10 reshape completes. + +From: NeilBrown + +commit b39685526f46976bcd13aa08c82480092befa46c upstream. + +When a raid10 commences a resync/recovery/reshape it allocates +some buffer space. +When a resync/recovery completes the buffer space is freed. But not +when the reshape completes. +This can result in a small memory leak. + +There is a subtle side-effect of this bug. When a RAID10 is reshaped +to a larger array (more devices), the reshape is immediately followed +by a "resync" of the new space. This "resync" will use the buffer +space which was allocated for "reshape". This can cause problems +including a "BUG" in the SCSI layer. So this is suitable for -stable. + +Fixes: 3ea7daa5d7fde47cd41f4d56c2deb949114da9d6 +Signed-off-by: NeilBrown +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/md/raid10.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/drivers/md/raid10.c ++++ b/drivers/md/raid10.c +@@ -2948,6 +2948,7 @@ static sector_t sync_request(struct mdde + */ + if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) { + end_reshape(conf); ++ close_sync(conf); + return 0; + } + diff --git a/queue-3.10/md-raid10-fix-memory-leak-when-reshaping-a-raid10.patch b/queue-3.10/md-raid10-fix-memory-leak-when-reshaping-a-raid10.patch new file mode 100644 index 00000000000..f2cc4b6f276 --- /dev/null +++ b/queue-3.10/md-raid10-fix-memory-leak-when-reshaping-a-raid10.patch @@ -0,0 +1,40 @@ +From ce0b0a46955d1bb389684a2605dbcaa990ba0154 Mon Sep 17 00:00:00 2001 +From: NeilBrown +Date: Mon, 18 Aug 2014 13:56:38 +1000 +Subject: md/raid10: fix memory leak when reshaping a RAID10. + +From: NeilBrown + +commit ce0b0a46955d1bb389684a2605dbcaa990ba0154 upstream. + +raid10 reshape clears unwanted bits from a bio->bi_flags using +a method which, while clumsy, worked until 3.10 when BIO_OWNS_VEC +was added. +Since then it clears that bit but shouldn't. This results in a +memory leak. + +So change to used the approved method of clearing unwanted bits. + +As this causes a memory leak which can consume all of memory +the fix is suitable for -stable. + +Fixes: a38352e0ac02dbbd4fa464dc22d1352b5fbd06fd +Reported-by: mdraid.pkoch@dfgh.net (Peter Koch) +Signed-off-by: NeilBrown +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/md/raid10.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/md/raid10.c ++++ b/drivers/md/raid10.c +@@ -4398,7 +4398,7 @@ read_more: + read_bio->bi_private = r10_bio; + read_bio->bi_end_io = end_sync_read; + read_bio->bi_rw = READ; +- read_bio->bi_flags &= ~(BIO_POOL_MASK - 1); ++ read_bio->bi_flags &= (~0UL << BIO_RESET_BITS); + read_bio->bi_flags |= 1 << BIO_UPTODATE; + read_bio->bi_vcnt = 0; + read_bio->bi_size = 0; diff --git a/queue-3.10/md-raid6-avoid-data-corruption-during-recovery-of-double-degraded-raid6.patch b/queue-3.10/md-raid6-avoid-data-corruption-during-recovery-of-double-degraded-raid6.patch new file mode 100644 index 00000000000..1861818637f --- /dev/null +++ b/queue-3.10/md-raid6-avoid-data-corruption-during-recovery-of-double-degraded-raid6.patch @@ -0,0 +1,48 @@ +From 9c4bdf697c39805078392d5ddbbba5ae5680e0dd Mon Sep 17 00:00:00 2001 +From: NeilBrown +Date: Wed, 13 Aug 2014 09:57:07 +1000 +Subject: md/raid6: avoid data corruption during recovery of double-degraded RAID6 + +From: NeilBrown + +commit 9c4bdf697c39805078392d5ddbbba5ae5680e0dd upstream. + +During recovery of a double-degraded RAID6 it is possible for +some blocks not to be recovered properly, leading to corruption. + +If a write happens to one block in a stripe that would be written to a +missing device, and at the same time that stripe is recovering data +to the other missing device, then that recovered data may not be written. + +This patch skips, in the double-degraded case, an optimisation that is +only safe for single-degraded arrays. + +Bug was introduced in 2.6.32 and fix is suitable for any kernel since +then. In an older kernel with separate handle_stripe5() and +handle_stripe6() functions the patch must change handle_stripe6(). + +Fixes: 6c0069c0ae9659e3a91b68eaed06a5c6c37f45c8 +Cc: Yuri Tikhonov +Cc: Dan Williams +Reported-by: "Manibalan P" +Tested-by: "Manibalan P" +Resolves: https://bugzilla.redhat.com/show_bug.cgi?id=1090423 +Signed-off-by: NeilBrown +Acked-by: Dan Williams +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/md/raid5.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/drivers/md/raid5.c ++++ b/drivers/md/raid5.c +@@ -3561,6 +3561,8 @@ static void handle_stripe(struct stripe_ + set_bit(R5_Wantwrite, &dev->flags); + if (prexor) + continue; ++ if (s.failed > 1) ++ continue; + if (!test_bit(R5_Insync, &dev->flags) || + ((i == sh->pd_idx || i == sh->qd_idx) && + s.failed == 0)) diff --git a/queue-3.10/rdma-iwcm-use-a-default-listen-backlog-if-needed.patch b/queue-3.10/rdma-iwcm-use-a-default-listen-backlog-if-needed.patch new file mode 100644 index 00000000000..6b0565376e7 --- /dev/null +++ b/queue-3.10/rdma-iwcm-use-a-default-listen-backlog-if-needed.patch @@ -0,0 +1,87 @@ +From 2f0304d21867476394cd51a54e97f7273d112261 Mon Sep 17 00:00:00 2001 +From: Steve Wise +Date: Fri, 25 Jul 2014 09:11:33 -0500 +Subject: RDMA/iwcm: Use a default listen backlog if needed + +From: Steve Wise + +commit 2f0304d21867476394cd51a54e97f7273d112261 upstream. + +If the user creates a listening cm_id with backlog of 0 the IWCM ends +up not allowing any connection requests at all. The correct behavior +is for the IWCM to pick a default value if the user backlog parameter +is zero. + +Lustre from version 1.8.8 onward uses a backlog of 0, which breaks +iwarp support without this fix. + +Signed-off-by: Steve Wise +Signed-off-by: Roland Dreier +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/infiniband/core/iwcm.c | 27 +++++++++++++++++++++++++++ + 1 file changed, 27 insertions(+) + +--- a/drivers/infiniband/core/iwcm.c ++++ b/drivers/infiniband/core/iwcm.c +@@ -46,6 +46,7 @@ + #include + #include + #include ++#include + + #include + #include +@@ -65,6 +66,20 @@ struct iwcm_work { + struct list_head free_list; + }; + ++static unsigned int default_backlog = 256; ++ ++static struct ctl_table_header *iwcm_ctl_table_hdr; ++static struct ctl_table iwcm_ctl_table[] = { ++ { ++ .procname = "default_backlog", ++ .data = &default_backlog, ++ .maxlen = sizeof(default_backlog), ++ .mode = 0644, ++ .proc_handler = proc_dointvec, ++ }, ++ { } ++}; ++ + /* + * The following services provide a mechanism for pre-allocating iwcm_work + * elements. The design pre-allocates them based on the cm_id type: +@@ -419,6 +434,9 @@ int iw_cm_listen(struct iw_cm_id *cm_id, + + cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); + ++ if (!backlog) ++ backlog = default_backlog; ++ + ret = alloc_work_entries(cm_id_priv, backlog); + if (ret) + return ret; +@@ -1024,11 +1042,20 @@ static int __init iw_cm_init(void) + if (!iwcm_wq) + return -ENOMEM; + ++ iwcm_ctl_table_hdr = register_net_sysctl(&init_net, "net/iw_cm", ++ iwcm_ctl_table); ++ if (!iwcm_ctl_table_hdr) { ++ pr_err("iw_cm: couldn't register sysctl paths\n"); ++ destroy_workqueue(iwcm_wq); ++ return -ENOMEM; ++ } ++ + return 0; + } + + static void __exit iw_cm_cleanup(void) + { ++ unregister_net_sysctl_table(iwcm_ctl_table_hdr); + destroy_workqueue(iwcm_wq); + } + diff --git a/queue-3.10/series b/queue-3.10/series index 3dfcb787f21..47d761bd968 100644 --- a/queue-3.10/series +++ b/queue-3.10/series @@ -42,3 +42,13 @@ mnt-move-the-test-for-mnt_lock_readonly-from-change_mount_flags-into-do_remount. mnt-correct-permission-checks-in-do_remount.patch mnt-change-the-default-remount-atime-from-relatime-to-the-existing-value.patch mnt-add-tests-for-unprivileged-remount-cases-that-have-found-to-be-faulty.patch +bluetooth-never-linger-on-process-exit.patch +bluetooth-avoid-use-of-session-socket-after-the-session-gets-freed.patch +md-raid6-avoid-data-corruption-during-recovery-of-double-degraded-raid6.patch +md-raid10-fix-memory-leak-when-reshaping-a-raid10.patch +md-raid10-fix-memory-leak-when-raid10-reshape-completes.patch +rdma-iwcm-use-a-default-listen-backlog-if-needed.patch +xfs-quotacheck-leaves-dquot-buffers-without-verifiers.patch +xfs-don-t-dirty-buffers-beyond-eof.patch +xfs-don-t-zero-partial-page-cache-pages-during-o_direct-writes.patch +xfs-don-t-zero-partial-page-cache-pages-during.patch diff --git a/queue-3.10/xfs-don-t-dirty-buffers-beyond-eof.patch b/queue-3.10/xfs-don-t-dirty-buffers-beyond-eof.patch new file mode 100644 index 00000000000..870fb4725ba --- /dev/null +++ b/queue-3.10/xfs-don-t-dirty-buffers-beyond-eof.patch @@ -0,0 +1,130 @@ +From 22e757a49cf010703fcb9c9b4ef793248c39b0c2 Mon Sep 17 00:00:00 2001 +From: Dave Chinner +Date: Tue, 2 Sep 2014 12:12:51 +1000 +Subject: xfs: don't dirty buffers beyond EOF + +From: Dave Chinner + +commit 22e757a49cf010703fcb9c9b4ef793248c39b0c2 upstream. + +generic/263 is failing fsx at this point with a page spanning +EOF that cannot be invalidated. The operations are: + +1190 mapwrite 0x52c00 thru 0x5e569 (0xb96a bytes) +1191 mapread 0x5c000 thru 0x5d636 (0x1637 bytes) +1192 write 0x5b600 thru 0x771ff (0x1bc00 bytes) + +where 1190 extents EOF from 0x54000 to 0x5e569. When the direct IO +write attempts to invalidate the cached page over this range, it +fails with -EBUSY and so any attempt to do page invalidation fails. + +The real question is this: Why can't that page be invalidated after +it has been written to disk and cleaned? + +Well, there's data on the first two buffers in the page (1k block +size, 4k page), but the third buffer on the page (i.e. beyond EOF) +is failing drop_buffers because it's bh->b_state == 0x3, which is +BH_Uptodate | BH_Dirty. IOWs, there's dirty buffers beyond EOF. Say +what? + +OK, set_buffer_dirty() is called on all buffers from +__set_page_buffers_dirty(), regardless of whether the buffer is +beyond EOF or not, which means that when we get to ->writepage, +we have buffers marked dirty beyond EOF that we need to clean. +So, we need to implement our own .set_page_dirty method that +doesn't dirty buffers beyond EOF. + +This is messy because the buffer code is not meant to be shared +and it has interesting locking issues on the buffer dirty bits. +So just copy and paste it and then modify it to suit what we need. + +Note: the solutions the other filesystems and generic block code use +of marking the buffers clean in ->writepage does not work for XFS. +It still leaves dirty buffers beyond EOF and invalidations still +fail. Hence rather than play whack-a-mole, this patch simply +prevents those buffers from being dirtied in the first place. + +Signed-off-by: Dave Chinner +Reviewed-by: Brian Foster +Signed-off-by: Dave Chinner +Signed-off-by: Greg Kroah-Hartman + +--- + fs/xfs/xfs_aops.c | 61 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 61 insertions(+) + +--- a/fs/xfs/xfs_aops.c ++++ b/fs/xfs/xfs_aops.c +@@ -1661,11 +1661,72 @@ xfs_vm_readpages( + return mpage_readpages(mapping, pages, nr_pages, xfs_get_blocks); + } + ++/* ++ * This is basically a copy of __set_page_dirty_buffers() with one ++ * small tweak: buffers beyond EOF do not get marked dirty. If we mark them ++ * dirty, we'll never be able to clean them because we don't write buffers ++ * beyond EOF, and that means we can't invalidate pages that span EOF ++ * that have been marked dirty. Further, the dirty state can leak into ++ * the file interior if the file is extended, resulting in all sorts of ++ * bad things happening as the state does not match the underlying data. ++ * ++ * XXX: this really indicates that bufferheads in XFS need to die. Warts like ++ * this only exist because of bufferheads and how the generic code manages them. ++ */ ++STATIC int ++xfs_vm_set_page_dirty( ++ struct page *page) ++{ ++ struct address_space *mapping = page->mapping; ++ struct inode *inode = mapping->host; ++ loff_t end_offset; ++ loff_t offset; ++ int newly_dirty; ++ ++ if (unlikely(!mapping)) ++ return !TestSetPageDirty(page); ++ ++ end_offset = i_size_read(inode); ++ offset = page_offset(page); ++ ++ spin_lock(&mapping->private_lock); ++ if (page_has_buffers(page)) { ++ struct buffer_head *head = page_buffers(page); ++ struct buffer_head *bh = head; ++ ++ do { ++ if (offset < end_offset) ++ set_buffer_dirty(bh); ++ bh = bh->b_this_page; ++ offset += 1 << inode->i_blkbits; ++ } while (bh != head); ++ } ++ newly_dirty = !TestSetPageDirty(page); ++ spin_unlock(&mapping->private_lock); ++ ++ if (newly_dirty) { ++ /* sigh - __set_page_dirty() is static, so copy it here, too */ ++ unsigned long flags; ++ ++ spin_lock_irqsave(&mapping->tree_lock, flags); ++ if (page->mapping) { /* Race with truncate? */ ++ WARN_ON_ONCE(!PageUptodate(page)); ++ account_page_dirtied(page, mapping); ++ radix_tree_tag_set(&mapping->page_tree, ++ page_index(page), PAGECACHE_TAG_DIRTY); ++ } ++ spin_unlock_irqrestore(&mapping->tree_lock, flags); ++ __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); ++ } ++ return newly_dirty; ++} ++ + const struct address_space_operations xfs_address_space_operations = { + .readpage = xfs_vm_readpage, + .readpages = xfs_vm_readpages, + .writepage = xfs_vm_writepage, + .writepages = xfs_vm_writepages, ++ .set_page_dirty = xfs_vm_set_page_dirty, + .releasepage = xfs_vm_releasepage, + .invalidatepage = xfs_vm_invalidatepage, + .write_begin = xfs_vm_write_begin, diff --git a/queue-3.10/xfs-don-t-zero-partial-page-cache-pages-during-o_direct-writes.patch b/queue-3.10/xfs-don-t-zero-partial-page-cache-pages-during-o_direct-writes.patch new file mode 100644 index 00000000000..57eb8c20c4a --- /dev/null +++ b/queue-3.10/xfs-don-t-zero-partial-page-cache-pages-during-o_direct-writes.patch @@ -0,0 +1,47 @@ +From 834ffca6f7e345a79f6f2e2d131b0dfba8a4b67a Mon Sep 17 00:00:00 2001 +From: Dave Chinner +Date: Tue, 2 Sep 2014 12:12:52 +1000 +Subject: xfs: don't zero partial page cache pages during O_DIRECT writes + +From: Dave Chinner + +commit 834ffca6f7e345a79f6f2e2d131b0dfba8a4b67a upstream. + +Similar to direct IO reads, direct IO writes are using +truncate_pagecache_range to invalidate the page cache. This is +incorrect due to the sub-block zeroing in the page cache that +truncate_pagecache_range() triggers. + +This patch fixes things by using invalidate_inode_pages2_range +instead. It preserves the page cache invalidation, but won't zero +any pages. + +Signed-off-by: Dave Chinner +Reviewed-by: Brian Foster +Reviewed-by: Christoph Hellwig +Signed-off-by: Dave Chinner +Signed-off-by: Greg Kroah-Hartman + +--- + fs/xfs/xfs_file.c | 10 +++++++++- + 1 file changed, 9 insertions(+), 1 deletion(-) + +--- a/fs/xfs/xfs_file.c ++++ b/fs/xfs/xfs_file.c +@@ -677,7 +677,15 @@ xfs_file_dio_aio_write( + pos, -1); + if (ret) + goto out; +- truncate_pagecache_range(VFS_I(ip), pos, -1); ++ /* ++ * Invalidate whole pages. This can return an error if ++ * we fail to invalidate a page, but this should never ++ * happen on XFS. Warn if it does fail. ++ */ ++ ret = invalidate_inode_pages2_range(VFS_I(ip)->i_mapping, ++ pos >> PAGE_CACHE_SHIFT, -1); ++ WARN_ON_ONCE(ret); ++ ret = 0; + } + + /* diff --git a/queue-3.10/xfs-don-t-zero-partial-page-cache-pages-during.patch b/queue-3.10/xfs-don-t-zero-partial-page-cache-pages-during.patch new file mode 100644 index 00000000000..64de534b684 --- /dev/null +++ b/queue-3.10/xfs-don-t-zero-partial-page-cache-pages-during.patch @@ -0,0 +1,59 @@ +From 85e584da3212140ee80fd047f9058bbee0bc00d5 Mon Sep 17 00:00:00 2001 +From: Chris Mason +Date: Tue, 2 Sep 2014 12:12:52 +1000 +Subject: xfs: don't zero partial page cache pages during + O_DIRECT writes + +From: Chris Mason + +commit 85e584da3212140ee80fd047f9058bbee0bc00d5 upstream. + +xfs is using truncate_pagecache_range to invalidate the page cache +during DIO reads. This is different from the other filesystems who +only invalidate pages during DIO writes. + +truncate_pagecache_range is meant to be used when we are freeing the +underlying data structs from disk, so it will zero any partial +ranges in the page. This means a DIO read can zero out part of the +page cache page, and it is possible the page will stay in cache. + +buffered reads will find an up to date page with zeros instead of +the data actually on disk. + +This patch fixes things by using invalidate_inode_pages2_range +instead. It preserves the page cache invalidation, but won't zero +any pages. + +[dchinner: catch error and warn if it fails. Comment.] + +Signed-off-by: Chris Mason +Reviewed-by: Dave Chinner +Reviewed-by: Brian Foster +Reviewed-by: Christoph Hellwig +Signed-off-by: Dave Chinner +Signed-off-by: Greg Kroah-Hartman + +--- + fs/xfs/xfs_file.c | 11 ++++++++++- + 1 file changed, 10 insertions(+), 1 deletion(-) + +--- a/fs/xfs/xfs_file.c ++++ b/fs/xfs/xfs_file.c +@@ -298,7 +298,16 @@ xfs_file_aio_read( + xfs_rw_iunlock(ip, XFS_IOLOCK_EXCL); + return ret; + } +- truncate_pagecache_range(VFS_I(ip), pos, -1); ++ ++ /* ++ * Invalidate whole pages. This can return an error if ++ * we fail to invalidate a page, but this should never ++ * happen on XFS. Warn if it does fail. ++ */ ++ ret = invalidate_inode_pages2_range(VFS_I(ip)->i_mapping, ++ pos >> PAGE_CACHE_SHIFT, -1); ++ WARN_ON_ONCE(ret); ++ ret = 0; + } + xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL); + } diff --git a/queue-3.10/xfs-quotacheck-leaves-dquot-buffers-without-verifiers.patch b/queue-3.10/xfs-quotacheck-leaves-dquot-buffers-without-verifiers.patch new file mode 100644 index 00000000000..8597db5c1f6 --- /dev/null +++ b/queue-3.10/xfs-quotacheck-leaves-dquot-buffers-without-verifiers.patch @@ -0,0 +1,110 @@ +From 5fd364fee81a7888af806e42ed8a91c845894f2d Mon Sep 17 00:00:00 2001 +From: Dave Chinner +Date: Mon, 4 Aug 2014 12:43:26 +1000 +Subject: xfs: quotacheck leaves dquot buffers without verifiers + +From: Dave Chinner + +commit 5fd364fee81a7888af806e42ed8a91c845894f2d upstream. + +When running xfs/305, I noticed that quotacheck was flushing dquot +buffers that did not have the xfs_dquot_buf_ops verifiers attached: + +XFS (vdb): _xfs_buf_ioapply: no ops on block 0x1dc8/0x1dc8 +ffff880052489000: 44 51 01 04 00 00 65 b8 00 00 00 00 00 00 00 00 DQ....e......... +ffff880052489010: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ +ffff880052489020: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ +ffff880052489030: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ +CPU: 1 PID: 2376 Comm: mount Not tainted 3.16.0-rc2-dgc+ #306 +Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 + ffff88006fe38000 ffff88004a0ffae8 ffffffff81cf1cca 0000000000000001 + ffff88004a0ffb88 ffffffff814d50ca 000010004a0ffc70 0000000000000000 + ffff88006be56dc4 0000000000000021 0000000000001dc8 ffff88007c773d80 +Call Trace: + [] dump_stack+0x45/0x56 + [] _xfs_buf_ioapply+0x3ca/0x3d0 + [] ? wake_up_state+0x20/0x20 + [] ? xfs_bdstrat_cb+0x55/0xb0 + [] xfs_buf_iorequest+0x6b/0xd0 + [] xfs_bdstrat_cb+0x55/0xb0 + [] __xfs_buf_delwri_submit+0x15b/0x220 + [] ? xfs_buf_delwri_submit+0x30/0x90 + [] xfs_buf_delwri_submit+0x30/0x90 + [] xfs_qm_quotacheck+0x17d/0x3c0 + [] xfs_qm_mount_quotas+0x151/0x1e0 + [] xfs_mountfs+0x56c/0x7d0 + [] xfs_fs_fill_super+0x2c2/0x340 + [] mount_bdev+0x194/0x1d0 + [] ? xfs_finish_flags+0x170/0x170 + [] xfs_fs_mount+0x15/0x20 + [] mount_fs+0x39/0x1b0 + [] vfs_kern_mount+0x67/0x120 + [] do_mount+0x23e/0xad0 + [] ? __get_free_pages+0xe/0x50 + [] ? copy_mount_options+0x36/0x150 + [] SyS_mount+0x83/0xc0 + [] tracesys+0xdd/0xe2 + +This was caused by dquot buffer readahead not attaching a verifier +structure to the buffer when readahead was issued, resulting in the +followup read of the buffer finding a valid buffer and so not +attaching new verifiers to the buffer as part of the read. + +Also, when a verifier failure occurs, we then read the buffer +without verifiers. Attach the verifiers manually after this read so +that if the buffer is then written it will be verified that the +corruption has been repaired. + +Further, when flushing a dquot we don't ask for a verifier when +reading in the dquot buffer the dquot belongs to. Most of the time +this isn't an issue because the buffer is still cached, but when it +is not cached it will result in writing the dquot buffer without +having the verfier attached. + +Signed-off-by: Dave Chinner +Reviewed-by: Brian Foster +Reviewed-by: Christoph Hellwig +Signed-off-by: Dave Chinner +Signed-off-by: Greg Kroah-Hartman + +--- + fs/xfs/xfs_dquot.c | 3 ++- + fs/xfs/xfs_qm.c | 8 +++++++- + 2 files changed, 9 insertions(+), 2 deletions(-) + +--- a/fs/xfs/xfs_dquot.c ++++ b/fs/xfs/xfs_dquot.c +@@ -1104,7 +1104,8 @@ xfs_qm_dqflush( + * Get the buffer containing the on-disk dquot + */ + error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, dqp->q_blkno, +- mp->m_quotainfo->qi_dqchunklen, 0, &bp, NULL); ++ mp->m_quotainfo->qi_dqchunklen, 0, &bp, ++ &xfs_dquot_buf_ops); + if (error) + goto out_unlock; + +--- a/fs/xfs/xfs_qm.c ++++ b/fs/xfs/xfs_qm.c +@@ -935,6 +935,12 @@ xfs_qm_dqiter_bufs( + if (error) + break; + ++ /* ++ * A corrupt buffer might not have a verifier attached, so ++ * make sure we have the correct one attached before writeback ++ * occurs. ++ */ ++ bp->b_ops = &xfs_dquot_buf_ops; + xfs_qm_reset_dqcounts(mp, bp, firstid, type); + xfs_buf_delwri_queue(bp, buffer_list); + xfs_buf_relse(bp); +@@ -1018,7 +1024,7 @@ xfs_qm_dqiterate( + xfs_buf_readahead(mp->m_ddev_targp, + XFS_FSB_TO_DADDR(mp, rablkno), + mp->m_quotainfo->qi_dqchunklen, +- NULL); ++ &xfs_dquot_buf_ops); + rablkno++; + } + } -- 2.47.3