--- /dev/null
+From fb6ccff667712c46b4501b920ea73a326e49626a Mon Sep 17 00:00:00 2001
+From: Zach Brown <zab@redhat.com>
+Date: Tue, 24 Jul 2012 12:10:11 -0700
+Subject: fuse: verify all ioctl retry iov elements
+
+From: Zach Brown <zab@redhat.com>
+
+commit fb6ccff667712c46b4501b920ea73a326e49626a upstream.
+
+Commit 7572777eef78ebdee1ecb7c258c0ef94d35bad16 attempted to verify that
+the total iovec from the client doesn't overflow iov_length() but it
+only checked the first element. The iovec could still overflow by
+starting with a small element. The obvious fix is to check all the
+elements.
+
+The overflow case doesn't look dangerous to the kernel as the copy is
+limited by the length after the overflow. This fix restores the
+intention of returning an error instead of successfully copying less
+than the iovec represented.
+
+I found this by code inspection. I built it but don't have a test case.
+I'm cc:ing stable because the initial commit did as well.
+
+Signed-off-by: Zach Brown <zab@redhat.com>
+Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/fuse/file.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/fuse/file.c
++++ b/fs/fuse/file.c
+@@ -1710,7 +1710,7 @@ static int fuse_verify_ioctl_iov(struct
+ size_t n;
+ u32 max = FUSE_MAX_PAGES_PER_REQ << PAGE_SHIFT;
+
+- for (n = 0; n < count; n++) {
++ for (n = 0; n < count; n++, iov++) {
+ if (iov->iov_len > (size_t) max)
+ return -ENOMEM;
+ max -= iov->iov_len;
--- /dev/null
+From b9e0d95c041ca2d7ad297ee37c2e9cfab67a188f Mon Sep 17 00:00:00 2001
+From: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
+Date: Wed, 23 May 2012 18:57:20 +0100
+Subject: xen: mark local pages as FOREIGN in the m2p_override
+
+From: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
+
+commit b9e0d95c041ca2d7ad297ee37c2e9cfab67a188f upstream.
+
+When the frontend and the backend reside on the same domain, even if we
+add pages to the m2p_override, these pages will never be returned by
+mfn_to_pfn because the check "get_phys_to_machine(pfn) != mfn" will
+always fail, so the pfn of the frontend will be returned instead
+(resulting in a deadlock because the frontend pages are already locked).
+
+INFO: task qemu-system-i38:1085 blocked for more than 120 seconds.
+"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
+qemu-system-i38 D ffff8800cfc137c0 0 1085 1 0x00000000
+ ffff8800c47ed898 0000000000000282 ffff8800be4596b0 00000000000137c0
+ ffff8800c47edfd8 ffff8800c47ec010 00000000000137c0 00000000000137c0
+ ffff8800c47edfd8 00000000000137c0 ffffffff82213020 ffff8800be4596b0
+Call Trace:
+ [<ffffffff81101ee0>] ? __lock_page+0x70/0x70
+ [<ffffffff81a0fdd9>] schedule+0x29/0x70
+ [<ffffffff81a0fe80>] io_schedule+0x60/0x80
+ [<ffffffff81101eee>] sleep_on_page+0xe/0x20
+ [<ffffffff81a0e1ca>] __wait_on_bit_lock+0x5a/0xc0
+ [<ffffffff81101ed7>] __lock_page+0x67/0x70
+ [<ffffffff8106f750>] ? autoremove_wake_function+0x40/0x40
+ [<ffffffff811867e6>] ? bio_add_page+0x36/0x40
+ [<ffffffff8110b692>] set_page_dirty_lock+0x52/0x60
+ [<ffffffff81186021>] bio_set_pages_dirty+0x51/0x70
+ [<ffffffff8118c6b4>] do_blockdev_direct_IO+0xb24/0xeb0
+ [<ffffffff811e71a0>] ? ext3_get_blocks_handle+0xe00/0xe00
+ [<ffffffff8118ca95>] __blockdev_direct_IO+0x55/0x60
+ [<ffffffff811e71a0>] ? ext3_get_blocks_handle+0xe00/0xe00
+ [<ffffffff811e91c8>] ext3_direct_IO+0xf8/0x390
+ [<ffffffff811e71a0>] ? ext3_get_blocks_handle+0xe00/0xe00
+ [<ffffffff81004b60>] ? xen_mc_flush+0xb0/0x1b0
+ [<ffffffff81104027>] generic_file_aio_read+0x737/0x780
+ [<ffffffff813bedeb>] ? gnttab_map_refs+0x15b/0x1e0
+ [<ffffffff811038f0>] ? find_get_pages+0x150/0x150
+ [<ffffffff8119736c>] aio_rw_vect_retry+0x7c/0x1d0
+ [<ffffffff811972f0>] ? lookup_ioctx+0x90/0x90
+ [<ffffffff81198856>] aio_run_iocb+0x66/0x1a0
+ [<ffffffff811998b8>] do_io_submit+0x708/0xb90
+ [<ffffffff81199d50>] sys_io_submit+0x10/0x20
+ [<ffffffff81a18d69>] system_call_fastpath+0x16/0x1b
+
+The explanation is in the comment within the code:
+
+We need to do this because the pages shared by the frontend
+(xen-blkfront) can be already locked (lock_page, called by
+do_read_cache_page); when the userspace backend tries to use them
+with direct_IO, mfn_to_pfn returns the pfn of the frontend, so
+do_blockdev_direct_IO is going to try to lock the same pages
+again resulting in a deadlock.
+
+A simplified call graph looks like this:
+
+pygrub QEMU
+-----------------------------------------------
+do_read_cache_page io_submit
+ | |
+lock_page ext3_direct_IO
+ |
+ bio_add_page
+ |
+ lock_page
+
+Internally the xen-blkback uses m2p_add_override to swizzle (temporarily)
+a 'struct page' to have a different MFN (so that it can point to another
+guest). It also can easily find out whether another pfn corresponding
+to the mfn exists in the m2p, and can set the FOREIGN bit
+in the p2m, making sure that mfn_to_pfn returns the pfn of the backend.
+
+This allows the backend to perform direct_IO on these pages, but as a
+side effect prevents the frontend from using get_user_pages_fast on
+them while they are being shared with the backend.
+
+Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/xen/p2m.c | 36 ++++++++++++++++++++++++++++++++++++
+ 1 file changed, 36 insertions(+)
+
+--- a/arch/x86/xen/p2m.c
++++ b/arch/x86/xen/p2m.c
+@@ -688,6 +688,7 @@ int m2p_add_override(unsigned long mfn,
+ unsigned long uninitialized_var(address);
+ unsigned level;
+ pte_t *ptep = NULL;
++ int ret = 0;
+
+ pfn = page_to_pfn(page);
+ if (!PageHighMem(page)) {
+@@ -711,6 +712,24 @@ int m2p_add_override(unsigned long mfn,
+ list_add(&page->lru, &m2p_overrides[mfn_hash(mfn)]);
+ spin_unlock_irqrestore(&m2p_override_lock, flags);
+
++ /* p2m(m2p(mfn)) == mfn: the mfn is already present somewhere in
++ * this domain. Set the FOREIGN_FRAME_BIT in the p2m for the other
++ * pfn so that the following mfn_to_pfn(mfn) calls will return the
++ * pfn from the m2p_override (the backend pfn) instead.
++ * We need to do this because the pages shared by the frontend
++ * (xen-blkfront) can be already locked (lock_page, called by
++ * do_read_cache_page); when the userspace backend tries to use them
++ * with direct_IO, mfn_to_pfn returns the pfn of the frontend, so
++ * do_blockdev_direct_IO is going to try to lock the same pages
++ * again resulting in a deadlock.
++ * As a side effect get_user_pages_fast might not be safe on the
++ * frontend pages while they are being shared with the backend,
++ * because mfn_to_pfn (that ends up being called by GUPF) will
++ * return the backend pfn rather than the frontend pfn. */
++ ret = __get_user(pfn, &machine_to_phys_mapping[mfn]);
++ if (ret == 0 && get_phys_to_machine(pfn) == mfn)
++ set_phys_to_machine(pfn, FOREIGN_FRAME(mfn));
++
+ return 0;
+ }
+ EXPORT_SYMBOL_GPL(m2p_add_override);
+@@ -722,6 +741,7 @@ int m2p_remove_override(struct page *pag
+ unsigned long uninitialized_var(address);
+ unsigned level;
+ pte_t *ptep = NULL;
++ int ret = 0;
+
+ pfn = page_to_pfn(page);
+ mfn = get_phys_to_machine(pfn);
+@@ -748,6 +768,22 @@ int m2p_remove_override(struct page *pag
+ /* No tlb flush necessary because the caller already
+ * left the pte unmapped. */
+
++ /* p2m(m2p(mfn)) == FOREIGN_FRAME(mfn): the mfn is already present
++ * somewhere in this domain, even before being added to the
++ * m2p_override (see comment above in m2p_add_override).
++ * If there are no other entries in the m2p_override corresponding
++ * to this mfn, then remove the FOREIGN_FRAME_BIT from the p2m for
++ * the original pfn (the one shared by the frontend): the backend
++ * cannot do any IO on this page anymore because it has been
++ * unshared. Removing the FOREIGN_FRAME_BIT from the p2m entry of
++ * the original pfn causes mfn_to_pfn(mfn) to return the frontend
++ * pfn again. */
++ mfn &= ~FOREIGN_FRAME_BIT;
++ ret = __get_user(pfn, &machine_to_phys_mapping[mfn]);
++ if (ret == 0 && get_phys_to_machine(pfn) == FOREIGN_FRAME(mfn) &&
++ m2p_find_override(mfn) == NULL)
++ set_phys_to_machine(pfn, mfn);
++
+ return 0;
+ }
+ EXPORT_SYMBOL_GPL(m2p_remove_override);
--- /dev/null
+From 5bc6f9888db5739abfa0cae279b4b442e4db8049 Mon Sep 17 00:00:00 2001
+From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Date: Mon, 30 Jul 2012 10:18:05 -0400
+Subject: xen/p2m: Reserve 8MB of _brk space for P2M leafs when populating back.
+
+From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+
+commit 5bc6f9888db5739abfa0cae279b4b442e4db8049 upstream.
+
+When we release pages back during bootup:
+
+Freeing 9d-100 pfn range: 99 pages freed
+Freeing 9cf36-9d0d2 pfn range: 412 pages freed
+Freeing 9f6bd-9f6bf pfn range: 2 pages freed
+Freeing 9f714-9f7bf pfn range: 171 pages freed
+Freeing 9f7e0-9f7ff pfn range: 31 pages freed
+Freeing 9f800-100000 pfn range: 395264 pages freed
+Released 395979 pages of unused memory
+
+We then try to populate those pages back. In the P2M tree however
+the space for those leafs must be reserved - as such we use extend_brk.
+We reserve 8MB of _brk space, which means we can fit over
+1048576 PFNs - which is more than we should ever need.
+
+Without this, on certain compilation of the kernel we would hit:
+
+(XEN) domain_crash_sync called from entry.S
+(XEN) CPU: 0
+(XEN) RIP: e033:[<ffffffff818aad3b>]
+(XEN) RFLAGS: 0000000000000206 EM: 1 CONTEXT: pv guest
+(XEN) rax: ffffffff81a7c000 rbx: 000000000000003d rcx: 0000000000001000
+(XEN) rdx: ffffffff81a7b000 rsi: 0000000000001000 rdi: 0000000000001000
+(XEN) rbp: ffffffff81801cd8 rsp: ffffffff81801c98 r8: 0000000000100000
+(XEN) r9: ffffffff81a7a000 r10: 0000000000000001 r11: 0000000000000003
+(XEN) r12: 0000000000000004 r13: 0000000000000004 r14: 000000000000003d
+(XEN) r15: 00000000000001e8 cr0: 000000008005003b cr4: 00000000000006f0
+(XEN) cr3: 0000000125803000 cr2: 0000000000000000
+(XEN) ds: 0000 es: 0000 fs: 0000 gs: 0000 ss: e02b cs: e033
+(XEN) Guest stack trace from rsp=ffffffff81801c98:
+
+.. which is extend_brk hitting a BUG_ON.
+
+Interestingly enough, most of the time we are not going to hit this
+b/c the _brk space is quite large (v3.5):
+ ffffffff81a25000 B __brk_base
+ ffffffff81e43000 B __brk_limit
+= ~4MB.
+
+vs earlier kernels (with this back-ported), the space is smaller:
+ ffffffff81a25000 B __brk_base
+ ffffffff81a7b000 B __brk_limit
+= 344 kBytes.
+
+where we would certainly hit this and hit extend_brk.
+
+Note that git commit c3d93f880197953f86ab90d9da4744e926b38e33
+(xen: populate correct number of pages when across mem boundary (v2))
+exposed this bug).
+
+[v1: Made it 8MB of _brk space instead of 4MB per Jan's suggestion]
+
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/xen/p2m.c | 5 +++++
+ 1 file changed, 5 insertions(+)
+
+--- a/arch/x86/xen/p2m.c
++++ b/arch/x86/xen/p2m.c
+@@ -192,6 +192,11 @@ RESERVE_BRK(p2m_mid_mfn, PAGE_SIZE * (MA
+ * boundary violation will require three middle nodes. */
+ RESERVE_BRK(p2m_mid_identity, PAGE_SIZE * 2 * 3);
+
++/* When we populate back during bootup, the amount of pages can vary. The
++ * max we have is seen is 395979, but that does not mean it can't be more.
++ * But some machines can have 3GB I/O holes even. So lets reserve enough
++ * for 4GB of I/O and E820 holes. */
++RESERVE_BRK(p2m_populated, PMD_SIZE * 4);
+ static inline unsigned p2m_top_index(unsigned long pfn)
+ {
+ BUG_ON(pfn >= MAX_P2M_PFN);