3.0-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Thu, 16 Aug 2012 23:21:59 +0000 (16:21 -0700)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Thu, 16 Aug 2012 23:21:59 +0000 (16:21 -0700)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 16 Aug 2012 23:21:59 +0000 (16:21 -0700)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 16 Aug 2012 23:21:59 +0000 (16:21 -0700)
diff --git a/queue-3.0/fuse-verify-all-ioctl-retry-iov-elements.patch b/queue-3.0/fuse-verify-all-ioctl-retry-iov-elements.patch

new file mode 100644 (file)

index 0000000..12f4f99
--- /dev/null
+++ b/queue-3.0/fuse-verify-all-ioctl-retry-iov-elements.patch
@@ -0,0 +1,42 @@
+From fb6ccff667712c46b4501b920ea73a326e49626a Mon Sep 17 00:00:00 2001
+From: Zach Brown <zab@redhat.com>
+Date: Tue, 24 Jul 2012 12:10:11 -0700
+Subject: fuse: verify all ioctl retry iov elements
+
+From: Zach Brown <zab@redhat.com>
+
+commit fb6ccff667712c46b4501b920ea73a326e49626a upstream.
+
+Commit 7572777eef78ebdee1ecb7c258c0ef94d35bad16 attempted to verify that
+the total iovec from the client doesn't overflow iov_length() but it
+only checked the first element.  The iovec could still overflow by
+starting with a small element.  The obvious fix is to check all the
+elements.
+
+The overflow case doesn't look dangerous to the kernel as the copy is
+limited by the length after the overflow.  This fix restores the
+intention of returning an error instead of successfully copying less
+than the iovec represented.
+
+I found this by code inspection.  I built it but don't have a test case.
+I'm cc:ing stable because the initial commit did as well.
+
+Signed-off-by: Zach Brown <zab@redhat.com>
+Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/fuse/file.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/fuse/file.c
++++ b/fs/fuse/file.c
+@@ -1710,7 +1710,7 @@ static int fuse_verify_ioctl_iov(struct
+       size_t n;
+       u32 max = FUSE_MAX_PAGES_PER_REQ << PAGE_SHIFT;
+ 
+-      for (n = 0; n < count; n++) {
++      for (n = 0; n < count; n++, iov++) {
+               if (iov->iov_len > (size_t) max)
+                       return -ENOMEM;
+               max -= iov->iov_len;
diff --git a/queue-3.0/series b/queue-3.0/series

index 591b18477ed0a6d600fe6acfc9ece0926a882488..8afbfd47318ad576c7ad5e0bb088177dee7a14de 100644 (file)
--- a/queue-3.0/series
+++ b/queue-3.0/series
@@ -1 +1,4 @@
  s390-compat-fix-mmap-compat-system-calls.patch
+fuse-verify-all-ioctl-retry-iov-elements.patch
+xen-p2m-reserve-8mb-of-_brk-space-for-p2m-leafs-when-populating-back.patch
+xen-mark-local-pages-as-foreign-in-the-m2p_override.patch
diff --git a/queue-3.0/xen-mark-local-pages-as-foreign-in-the-m2p_override.patch b/queue-3.0/xen-mark-local-pages-as-foreign-in-the-m2p_override.patch

new file mode 100644 (file)

index 0000000..71e5696
--- /dev/null
+++ b/queue-3.0/xen-mark-local-pages-as-foreign-in-the-m2p_override.patch
@@ -0,0 +1,154 @@
+From b9e0d95c041ca2d7ad297ee37c2e9cfab67a188f Mon Sep 17 00:00:00 2001
+From: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
+Date: Wed, 23 May 2012 18:57:20 +0100
+Subject: xen: mark local pages as FOREIGN in the m2p_override
+
+From: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
+
+commit b9e0d95c041ca2d7ad297ee37c2e9cfab67a188f upstream.
+
+When the frontend and the backend reside on the same domain, even if we
+add pages to the m2p_override, these pages will never be returned by
+mfn_to_pfn because the check "get_phys_to_machine(pfn) != mfn" will
+always fail, so the pfn of the frontend will be returned instead
+(resulting in a deadlock because the frontend pages are already locked).
+
+INFO: task qemu-system-i38:1085 blocked for more than 120 seconds.
+"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
+qemu-system-i38 D ffff8800cfc137c0     0  1085      1 0x00000000
+ ffff8800c47ed898 0000000000000282 ffff8800be4596b0 00000000000137c0
+ ffff8800c47edfd8 ffff8800c47ec010 00000000000137c0 00000000000137c0
+ ffff8800c47edfd8 00000000000137c0 ffffffff82213020 ffff8800be4596b0
+Call Trace:
+ [<ffffffff81101ee0>] ? __lock_page+0x70/0x70
+ [<ffffffff81a0fdd9>] schedule+0x29/0x70
+ [<ffffffff81a0fe80>] io_schedule+0x60/0x80
+ [<ffffffff81101eee>] sleep_on_page+0xe/0x20
+ [<ffffffff81a0e1ca>] __wait_on_bit_lock+0x5a/0xc0
+ [<ffffffff81101ed7>] __lock_page+0x67/0x70
+ [<ffffffff8106f750>] ? autoremove_wake_function+0x40/0x40
+ [<ffffffff811867e6>] ? bio_add_page+0x36/0x40
+ [<ffffffff8110b692>] set_page_dirty_lock+0x52/0x60
+ [<ffffffff81186021>] bio_set_pages_dirty+0x51/0x70
+ [<ffffffff8118c6b4>] do_blockdev_direct_IO+0xb24/0xeb0
+ [<ffffffff811e71a0>] ? ext3_get_blocks_handle+0xe00/0xe00
+ [<ffffffff8118ca95>] __blockdev_direct_IO+0x55/0x60
+ [<ffffffff811e71a0>] ? ext3_get_blocks_handle+0xe00/0xe00
+ [<ffffffff811e91c8>] ext3_direct_IO+0xf8/0x390
+ [<ffffffff811e71a0>] ? ext3_get_blocks_handle+0xe00/0xe00
+ [<ffffffff81004b60>] ? xen_mc_flush+0xb0/0x1b0
+ [<ffffffff81104027>] generic_file_aio_read+0x737/0x780
+ [<ffffffff813bedeb>] ? gnttab_map_refs+0x15b/0x1e0
+ [<ffffffff811038f0>] ? find_get_pages+0x150/0x150
+ [<ffffffff8119736c>] aio_rw_vect_retry+0x7c/0x1d0
+ [<ffffffff811972f0>] ? lookup_ioctx+0x90/0x90
+ [<ffffffff81198856>] aio_run_iocb+0x66/0x1a0
+ [<ffffffff811998b8>] do_io_submit+0x708/0xb90
+ [<ffffffff81199d50>] sys_io_submit+0x10/0x20
+ [<ffffffff81a18d69>] system_call_fastpath+0x16/0x1b
+
+The explanation is in the comment within the code:
+
+We need to do this because the pages shared by the frontend
+(xen-blkfront) can be already locked (lock_page, called by
+do_read_cache_page); when the userspace backend tries to use them
+with direct_IO, mfn_to_pfn returns the pfn of the frontend, so
+do_blockdev_direct_IO is going to try to lock the same pages
+again resulting in a deadlock.
+
+A simplified call graph looks like this:
+
+pygrub                          QEMU
+-----------------------------------------------
+do_read_cache_page              io_submit
+  |                              |
+lock_page                       ext3_direct_IO
+                                 |
+                                bio_add_page
+                                 |
+                                lock_page
+
+Internally the xen-blkback uses m2p_add_override to swizzle (temporarily)
+a 'struct page' to have a different MFN (so that it can point to another
+guest). It also can easily find out whether another pfn corresponding
+to the mfn exists in the m2p, and can set the FOREIGN bit
+in the p2m, making sure that mfn_to_pfn returns the pfn of the backend.
+
+This allows the backend to perform direct_IO on these pages, but as a
+side effect prevents the frontend from using get_user_pages_fast on
+them while they are being shared with the backend.
+
+Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/xen/p2m.c |   36 ++++++++++++++++++++++++++++++++++++
+ 1 file changed, 36 insertions(+)
+
+--- a/arch/x86/xen/p2m.c
++++ b/arch/x86/xen/p2m.c
+@@ -688,6 +688,7 @@ int m2p_add_override(unsigned long mfn,
+       unsigned long uninitialized_var(address);
+       unsigned level;
+       pte_t *ptep = NULL;
++      int ret = 0;
+ 
+       pfn = page_to_pfn(page);
+       if (!PageHighMem(page)) {
+@@ -711,6 +712,24 @@ int m2p_add_override(unsigned long mfn,
+       list_add(&page->lru,  &m2p_overrides[mfn_hash(mfn)]);
+       spin_unlock_irqrestore(&m2p_override_lock, flags);
+ 
++      /* p2m(m2p(mfn)) == mfn: the mfn is already present somewhere in
++       * this domain. Set the FOREIGN_FRAME_BIT in the p2m for the other
++       * pfn so that the following mfn_to_pfn(mfn) calls will return the
++       * pfn from the m2p_override (the backend pfn) instead.
++       * We need to do this because the pages shared by the frontend
++       * (xen-blkfront) can be already locked (lock_page, called by
++       * do_read_cache_page); when the userspace backend tries to use them
++       * with direct_IO, mfn_to_pfn returns the pfn of the frontend, so
++       * do_blockdev_direct_IO is going to try to lock the same pages
++       * again resulting in a deadlock.
++       * As a side effect get_user_pages_fast might not be safe on the
++       * frontend pages while they are being shared with the backend,
++       * because mfn_to_pfn (that ends up being called by GUPF) will
++       * return the backend pfn rather than the frontend pfn. */
++      ret = __get_user(pfn, &machine_to_phys_mapping[mfn]);
++      if (ret == 0 && get_phys_to_machine(pfn) == mfn)
++              set_phys_to_machine(pfn, FOREIGN_FRAME(mfn));
++
+       return 0;
+ }
+ EXPORT_SYMBOL_GPL(m2p_add_override);
+@@ -722,6 +741,7 @@ int m2p_remove_override(struct page *pag
+       unsigned long uninitialized_var(address);
+       unsigned level;
+       pte_t *ptep = NULL;
++      int ret = 0;
+ 
+       pfn = page_to_pfn(page);
+       mfn = get_phys_to_machine(pfn);
+@@ -748,6 +768,22 @@ int m2p_remove_override(struct page *pag
+               /* No tlb flush necessary because the caller already
+                * left the pte unmapped. */
+ 
++      /* p2m(m2p(mfn)) == FOREIGN_FRAME(mfn): the mfn is already present
++       * somewhere in this domain, even before being added to the
++       * m2p_override (see comment above in m2p_add_override).
++       * If there are no other entries in the m2p_override corresponding
++       * to this mfn, then remove the FOREIGN_FRAME_BIT from the p2m for
++       * the original pfn (the one shared by the frontend): the backend
++       * cannot do any IO on this page anymore because it has been
++       * unshared. Removing the FOREIGN_FRAME_BIT from the p2m entry of
++       * the original pfn causes mfn_to_pfn(mfn) to return the frontend
++       * pfn again. */
++      mfn &= ~FOREIGN_FRAME_BIT;
++      ret = __get_user(pfn, &machine_to_phys_mapping[mfn]);
++      if (ret == 0 && get_phys_to_machine(pfn) == FOREIGN_FRAME(mfn) &&
++                      m2p_find_override(mfn) == NULL)
++              set_phys_to_machine(pfn, mfn);
++
+       return 0;
+ }
+ EXPORT_SYMBOL_GPL(m2p_remove_override);
diff --git a/queue-3.0/xen-p2m-reserve-8mb-of-_brk-space-for-p2m-leafs-when-populating-back.patch b/queue-3.0/xen-p2m-reserve-8mb-of-_brk-space-for-p2m-leafs-when-populating-back.patch

new file mode 100644 (file)

index 0000000..abe1288
--- /dev/null
+++ b/queue-3.0/xen-p2m-reserve-8mb-of-_brk-space-for-p2m-leafs-when-populating-back.patch
@@ -0,0 +1,82 @@
+From 5bc6f9888db5739abfa0cae279b4b442e4db8049 Mon Sep 17 00:00:00 2001
+From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Date: Mon, 30 Jul 2012 10:18:05 -0400
+Subject: xen/p2m: Reserve 8MB of _brk space for P2M leafs when populating back.
+
+From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+
+commit 5bc6f9888db5739abfa0cae279b4b442e4db8049 upstream.
+
+When we release pages back during bootup:
+
+Freeing  9d-100 pfn range: 99 pages freed
+Freeing  9cf36-9d0d2 pfn range: 412 pages freed
+Freeing  9f6bd-9f6bf pfn range: 2 pages freed
+Freeing  9f714-9f7bf pfn range: 171 pages freed
+Freeing  9f7e0-9f7ff pfn range: 31 pages freed
+Freeing  9f800-100000 pfn range: 395264 pages freed
+Released 395979 pages of unused memory
+
+We then try to populate those pages back. In the P2M tree however
+the space for those leafs must be reserved - as such we use extend_brk.
+We reserve 8MB of _brk space, which means we can fit over
+1048576 PFNs - which is more than we should ever need.
+
+Without this, on certain compilation of the kernel we would hit:
+
+(XEN) domain_crash_sync called from entry.S
+(XEN) CPU:    0
+(XEN) RIP:    e033:[<ffffffff818aad3b>]
+(XEN) RFLAGS: 0000000000000206   EM: 1   CONTEXT: pv guest
+(XEN) rax: ffffffff81a7c000   rbx: 000000000000003d   rcx: 0000000000001000
+(XEN) rdx: ffffffff81a7b000   rsi: 0000000000001000   rdi: 0000000000001000
+(XEN) rbp: ffffffff81801cd8   rsp: ffffffff81801c98   r8:  0000000000100000
+(XEN) r9:  ffffffff81a7a000   r10: 0000000000000001   r11: 0000000000000003
+(XEN) r12: 0000000000000004   r13: 0000000000000004   r14: 000000000000003d
+(XEN) r15: 00000000000001e8   cr0: 000000008005003b   cr4: 00000000000006f0
+(XEN) cr3: 0000000125803000   cr2: 0000000000000000
+(XEN) ds: 0000   es: 0000   fs: 0000   gs: 0000   ss: e02b   cs: e033
+(XEN) Guest stack trace from rsp=ffffffff81801c98:
+
+.. which is extend_brk hitting a BUG_ON.
+
+Interestingly enough, most of the time we are not going to hit this
+b/c the _brk space is quite large (v3.5):
+ ffffffff81a25000 B __brk_base
+ ffffffff81e43000 B __brk_limit
+= ~4MB.
+
+vs earlier kernels (with this back-ported), the space is smaller:
+ ffffffff81a25000 B __brk_base
+ ffffffff81a7b000 B __brk_limit
+= 344 kBytes.
+
+where we would certainly hit this and hit extend_brk.
+
+Note that git commit c3d93f880197953f86ab90d9da4744e926b38e33
+(xen: populate correct number of pages when across mem boundary (v2))
+exposed this bug).
+
+[v1: Made it 8MB of _brk space instead of 4MB per Jan's suggestion]
+
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/xen/p2m.c |    5 +++++
+ 1 file changed, 5 insertions(+)
+
+--- a/arch/x86/xen/p2m.c
++++ b/arch/x86/xen/p2m.c
+@@ -192,6 +192,11 @@ RESERVE_BRK(p2m_mid_mfn, PAGE_SIZE * (MA
+  * boundary violation will require three middle nodes. */
+ RESERVE_BRK(p2m_mid_identity, PAGE_SIZE * 2 * 3);
+ 
++/* When we populate back during bootup, the amount of pages can vary. The
++ * max we have is seen is 395979, but that does not mean it can't be more.
++ * But some machines can have 3GB I/O holes even. So lets reserve enough
++ * for 4GB of I/O and E820 holes. */
++RESERVE_BRK(p2m_populated, PMD_SIZE * 4);
+ static inline unsigned p2m_top_index(unsigned long pfn)
+ {
+       BUG_ON(pfn >= MAX_P2M_PFN);
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Thu, 16 Aug 2012 23:21:59 +0000 (16:21 -0700)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Thu, 16 Aug 2012 23:21:59 +0000 (16:21 -0700)
queue-3.0/fuse-verify-all-ioctl-retry-iov-elements.patch	[new file with mode: 0644]	patch \| blob
queue-3.0/series		patch \| blob \| blame \| history
queue-3.0/xen-mark-local-pages-as-foreign-in-the-m2p_override.patch	[new file with mode: 0644]	patch \| blob
queue-3.0/xen-p2m-reserve-8mb-of-_brk-space-for-p2m-leafs-when-populating-back.patch	[new file with mode: 0644]	patch \| blob