xen-setup-populate-freed-mfns-from-non-ram-e820-entries-and-gaps-to-e820-ram.patch
xen-setup-combine-the-two-hypercall-functions-since-they-are-quite-similar.patch
xen-setup-update-va-mapping-when-releasing-memory-during-setup.patch
+xen-balloon-subtract-from-xen_released_pages-the-count-that-is-populated.patch
+xen-populate-correct-number-of-pages-when-across-mem-boundary-v2.patch
+xen-p2m-reserve-8mb-of-_brk-space-for-p2m-leafs-when-populating-back.patch
+xen-p2m-reuse-existing-p2m-leafs-if-they-are-filled-with-1-1-pfns-or-invalid.patch
--- /dev/null
+From 58b7b53a36b0be8081fbfc91aeea24b83c20ca1b Mon Sep 17 00:00:00 2001
+From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Date: Tue, 29 May 2012 12:36:43 -0400
+Subject: xen/balloon: Subtract from xen_released_pages the count that is populated.
+
+From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+
+commit 58b7b53a36b0be8081fbfc91aeea24b83c20ca1b upstream.
+
+We did not take into account that xen_released_pages would be
+used outside the initial E820 parsing code. As such we would
+did not subtract from xen_released_pages the count of pages
+that we had populated back (instead we just did a simple
+extra_pages = released - populated).
+
+The balloon driver uses xen_released_pages to set the initial
+current_pages count. If this is wrong (too low) then when a new
+(higher) target is set, the balloon driver will request too many pages
+from Xen."
+
+This fixes errors such as:
+
+(XEN) memory.c:133:d0 Could not allocate order=0 extent: id=0 memflags=0 (51 of 512)
+during bootup and
+free_memory : 0
+
+where the free_memory should be 128.
+
+Acked-by: David Vrabel <david.vrabel@citrix.com>
+[v1: Per David's review made the git commit better]
+Signed-off-by: Daniel Kiper <daniel.kiper@oracle.com>
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/xen/setup.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/xen/setup.c
++++ b/arch/x86/xen/setup.c
+@@ -399,7 +399,8 @@ char * __init xen_memory_setup(void)
+ populated = xen_populate_chunk(map, memmap.nr_entries,
+ max_pfn, &last_pfn, xen_released_pages);
+
+- extra_pages += (xen_released_pages - populated);
++ xen_released_pages -= populated;
++ extra_pages += xen_released_pages;
+
+ if (last_pfn > max_pfn) {
+ max_pfn = min(MAX_DOMAIN_PAGES, last_pfn);
--- /dev/null
+From 5bc6f9888db5739abfa0cae279b4b442e4db8049 Mon Sep 17 00:00:00 2001
+From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Date: Mon, 30 Jul 2012 10:18:05 -0400
+Subject: xen/p2m: Reserve 8MB of _brk space for P2M leafs when populating back.
+
+From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+
+commit 5bc6f9888db5739abfa0cae279b4b442e4db8049 upstream.
+
+When we release pages back during bootup:
+
+Freeing 9d-100 pfn range: 99 pages freed
+Freeing 9cf36-9d0d2 pfn range: 412 pages freed
+Freeing 9f6bd-9f6bf pfn range: 2 pages freed
+Freeing 9f714-9f7bf pfn range: 171 pages freed
+Freeing 9f7e0-9f7ff pfn range: 31 pages freed
+Freeing 9f800-100000 pfn range: 395264 pages freed
+Released 395979 pages of unused memory
+
+We then try to populate those pages back. In the P2M tree however
+the space for those leafs must be reserved - as such we use extend_brk.
+We reserve 8MB of _brk space, which means we can fit over
+1048576 PFNs - which is more than we should ever need.
+
+Without this, on certain compilation of the kernel we would hit:
+
+(XEN) domain_crash_sync called from entry.S
+(XEN) CPU: 0
+(XEN) RIP: e033:[<ffffffff818aad3b>]
+(XEN) RFLAGS: 0000000000000206 EM: 1 CONTEXT: pv guest
+(XEN) rax: ffffffff81a7c000 rbx: 000000000000003d rcx: 0000000000001000
+(XEN) rdx: ffffffff81a7b000 rsi: 0000000000001000 rdi: 0000000000001000
+(XEN) rbp: ffffffff81801cd8 rsp: ffffffff81801c98 r8: 0000000000100000
+(XEN) r9: ffffffff81a7a000 r10: 0000000000000001 r11: 0000000000000003
+(XEN) r12: 0000000000000004 r13: 0000000000000004 r14: 000000000000003d
+(XEN) r15: 00000000000001e8 cr0: 000000008005003b cr4: 00000000000006f0
+(XEN) cr3: 0000000125803000 cr2: 0000000000000000
+(XEN) ds: 0000 es: 0000 fs: 0000 gs: 0000 ss: e02b cs: e033
+(XEN) Guest stack trace from rsp=ffffffff81801c98:
+
+.. which is extend_brk hitting a BUG_ON.
+
+Interestingly enough, most of the time we are not going to hit this
+b/c the _brk space is quite large (v3.5):
+ ffffffff81a25000 B __brk_base
+ ffffffff81e43000 B __brk_limit
+= ~4MB.
+
+vs earlier kernels (with this back-ported), the space is smaller:
+ ffffffff81a25000 B __brk_base
+ ffffffff81a7b000 B __brk_limit
+= 344 kBytes.
+
+where we would certainly hit this and hit extend_brk.
+
+Note that git commit c3d93f880197953f86ab90d9da4744e926b38e33
+(xen: populate correct number of pages when across mem boundary (v2))
+exposed this bug).
+
+[v1: Made it 8MB of _brk space instead of 4MB per Jan's suggestion]
+
+Signed-off-by: Daniel Kiper <daniel.kiper@oracle.com>
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/xen/p2m.c | 5 +++++
+ 1 file changed, 5 insertions(+)
+
+--- a/arch/x86/xen/p2m.c
++++ b/arch/x86/xen/p2m.c
+@@ -194,6 +194,11 @@ RESERVE_BRK(p2m_mid_mfn, PAGE_SIZE * (MA
+ * boundary violation will require three middle nodes. */
+ RESERVE_BRK(p2m_mid_identity, PAGE_SIZE * 2 * 3);
+
++/* When we populate back during bootup, the amount of pages can vary. The
++ * max we have is seen is 395979, but that does not mean it can't be more.
++ * But some machines can have 3GB I/O holes even. So lets reserve enough
++ * for 4GB of I/O and E820 holes. */
++RESERVE_BRK(p2m_populated, PMD_SIZE * 4);
+ static inline unsigned p2m_top_index(unsigned long pfn)
+ {
+ BUG_ON(pfn >= MAX_P2M_PFN);
--- /dev/null
+From 250a41e0ecc433cdd553a364d0fc74c766425209 Mon Sep 17 00:00:00 2001
+From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Date: Fri, 17 Aug 2012 09:27:35 -0400
+Subject: xen/p2m: Reuse existing P2M leafs if they are filled with 1:1 PFNs or INVALID.
+
+From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+
+commit 250a41e0ecc433cdd553a364d0fc74c766425209 upstream.
+
+If P2M leaf is completly packed with INVALID_P2M_ENTRY or with
+1:1 PFNs (so IDENTITY_FRAME type PFNs), we can swap the P2M leaf
+with either a p2m_missing or p2m_identity respectively. The old
+page (which was created via extend_brk or was grafted on from the
+mfn_list) can be re-used for setting new PFNs.
+
+This also means we can remove git commit:
+5bc6f9888db5739abfa0cae279b4b442e4db8049
+xen/p2m: Reserve 8MB of _brk space for P2M leafs when populating back
+which tried to fix this.
+
+and make the amount that is required to be reserved much smaller.
+
+Signed-off-by: Daniel Kiper <daniel.kiper@oracle.com>
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/xen/p2m.c | 95 +++++++++++++++++++++++++++++++++++++++++++++++++++--
+ 1 file changed, 92 insertions(+), 3 deletions(-)
+
+--- a/arch/x86/xen/p2m.c
++++ b/arch/x86/xen/p2m.c
+@@ -196,9 +196,11 @@ RESERVE_BRK(p2m_mid_identity, PAGE_SIZE
+
+ /* When we populate back during bootup, the amount of pages can vary. The
+ * max we have is seen is 395979, but that does not mean it can't be more.
+- * But some machines can have 3GB I/O holes even. So lets reserve enough
+- * for 4GB of I/O and E820 holes. */
+-RESERVE_BRK(p2m_populated, PMD_SIZE * 4);
++ * Some machines can have 3GB I/O holes even. With early_can_reuse_p2m_middle
++ * it can re-use Xen provided mfn_list array, so we only need to allocate at
++ * most three P2M top nodes. */
++RESERVE_BRK(p2m_populated, PAGE_SIZE * 3);
++
+ static inline unsigned p2m_top_index(unsigned long pfn)
+ {
+ BUG_ON(pfn >= MAX_P2M_PFN);
+@@ -575,12 +577,99 @@ static bool __init early_alloc_p2m(unsig
+ }
+ return true;
+ }
++
++/*
++ * Skim over the P2M tree looking at pages that are either filled with
++ * INVALID_P2M_ENTRY or with 1:1 PFNs. If found, re-use that page and
++ * replace the P2M leaf with a p2m_missing or p2m_identity.
++ * Stick the old page in the new P2M tree location.
++ */
++bool __init early_can_reuse_p2m_middle(unsigned long set_pfn, unsigned long set_mfn)
++{
++ unsigned topidx;
++ unsigned mididx;
++ unsigned ident_pfns;
++ unsigned inv_pfns;
++ unsigned long *p2m;
++ unsigned long *mid_mfn_p;
++ unsigned idx;
++ unsigned long pfn;
++
++ /* We only look when this entails a P2M middle layer */
++ if (p2m_index(set_pfn))
++ return false;
++
++ for (pfn = 0; pfn <= MAX_DOMAIN_PAGES; pfn += P2M_PER_PAGE) {
++ topidx = p2m_top_index(pfn);
++
++ if (!p2m_top[topidx])
++ continue;
++
++ if (p2m_top[topidx] == p2m_mid_missing)
++ continue;
++
++ mididx = p2m_mid_index(pfn);
++ p2m = p2m_top[topidx][mididx];
++ if (!p2m)
++ continue;
++
++ if ((p2m == p2m_missing) || (p2m == p2m_identity))
++ continue;
++
++ if ((unsigned long)p2m == INVALID_P2M_ENTRY)
++ continue;
++
++ ident_pfns = 0;
++ inv_pfns = 0;
++ for (idx = 0; idx < P2M_PER_PAGE; idx++) {
++ /* IDENTITY_PFNs are 1:1 */
++ if (p2m[idx] == IDENTITY_FRAME(pfn + idx))
++ ident_pfns++;
++ else if (p2m[idx] == INVALID_P2M_ENTRY)
++ inv_pfns++;
++ else
++ break;
++ }
++ if ((ident_pfns == P2M_PER_PAGE) || (inv_pfns == P2M_PER_PAGE))
++ goto found;
++ }
++ return false;
++found:
++ /* Found one, replace old with p2m_identity or p2m_missing */
++ p2m_top[topidx][mididx] = (ident_pfns ? p2m_identity : p2m_missing);
++ /* And the other for save/restore.. */
++ mid_mfn_p = p2m_top_mfn_p[topidx];
++ /* NOTE: Even if it is a p2m_identity it should still be point to
++ * a page filled with INVALID_P2M_ENTRY entries. */
++ mid_mfn_p[mididx] = virt_to_mfn(p2m_missing);
++
++ /* Reset where we want to stick the old page in. */
++ topidx = p2m_top_index(set_pfn);
++ mididx = p2m_mid_index(set_pfn);
++
++ /* This shouldn't happen */
++ if (WARN_ON(p2m_top[topidx] == p2m_mid_missing))
++ early_alloc_p2m(set_pfn);
++
++ if (WARN_ON(p2m_top[topidx][mididx] != p2m_missing))
++ return false;
++
++ p2m_init(p2m);
++ p2m_top[topidx][mididx] = p2m;
++ mid_mfn_p = p2m_top_mfn_p[topidx];
++ mid_mfn_p[mididx] = virt_to_mfn(p2m);
++
++ return true;
++}
+ bool __init early_set_phys_to_machine(unsigned long pfn, unsigned long mfn)
+ {
+ if (unlikely(!__set_phys_to_machine(pfn, mfn))) {
+ if (!early_alloc_p2m(pfn))
+ return false;
+
++ if (early_can_reuse_p2m_middle(pfn, mfn))
++ return __set_phys_to_machine(pfn, mfn);
++
+ if (!early_alloc_p2m_middle(pfn, false /* boundary crossover OK!*/))
+ return false;
+
--- /dev/null
+From c3d93f880197953f86ab90d9da4744e926b38e33 Mon Sep 17 00:00:00 2001
+From: "zhenzhong.duan" <zhenzhong.duan@oracle.com>
+Date: Wed, 18 Jul 2012 13:06:39 +0800
+Subject: xen: populate correct number of pages when across mem boundary (v2)
+
+From: "zhenzhong.duan" <zhenzhong.duan@oracle.com>
+
+commit c3d93f880197953f86ab90d9da4744e926b38e33 upstream.
+
+When populate pages across a mem boundary at bootup, the page count
+populated isn't correct. This is due to mem populated to non-mem
+region and ignored.
+
+Pfn range is also wrongly aligned when mem boundary isn't page aligned.
+
+For a dom0 booted with dom_mem=3368952K(0xcd9ff000-4k) dmesg diff is:
+ [ 0.000000] Freeing 9e-100 pfn range: 98 pages freed
+ [ 0.000000] 1-1 mapping on 9e->100
+ [ 0.000000] 1-1 mapping on cd9ff->100000
+ [ 0.000000] Released 98 pages of unused memory
+ [ 0.000000] Set 206435 page(s) to 1-1 mapping
+-[ 0.000000] Populating cd9fe-cda00 pfn range: 1 pages added
++[ 0.000000] Populating cd9fe-cd9ff pfn range: 1 pages added
++[ 0.000000] Populating 100000-100061 pfn range: 97 pages added
+ [ 0.000000] BIOS-provided physical RAM map:
+ [ 0.000000] Xen: 0000000000000000 - 000000000009e000 (usable)
+ [ 0.000000] Xen: 00000000000a0000 - 0000000000100000 (reserved)
+ [ 0.000000] Xen: 0000000000100000 - 00000000cd9ff000 (usable)
+ [ 0.000000] Xen: 00000000cd9ffc00 - 00000000cda53c00 (ACPI NVS)
+...
+ [ 0.000000] Xen: 0000000100000000 - 0000000100061000 (usable)
+ [ 0.000000] Xen: 0000000100061000 - 000000012c000000 (unusable)
+...
+ [ 0.000000] MEMBLOCK configuration:
+...
+-[ 0.000000] reserved[0x4] [0x000000cd9ff000-0x000000cd9ffbff], 0xc00 bytes
+-[ 0.000000] reserved[0x5] [0x00000100000000-0x00000100060fff], 0x61000 bytes
+
+Related xen memory layout:
+(XEN) Xen-e820 RAM map:
+(XEN) 0000000000000000 - 000000000009ec00 (usable)
+(XEN) 00000000000f0000 - 0000000000100000 (reserved)
+(XEN) 0000000000100000 - 00000000cd9ffc00 (usable)
+
+Signed-off-by: Zhenzhong Duan <zhenzhong.duan@oracle.com>
+[v2: If xen_do_chunk fail(populate), abort this chunk and any others]
+Suggested by David, thanks.
+Signed-off-by: Daniel Kiper <daniel.kiper@oracle.com>
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/xen/setup.c | 23 +++++++++--------------
+ 1 file changed, 9 insertions(+), 14 deletions(-)
+
+--- a/arch/x86/xen/setup.c
++++ b/arch/x86/xen/setup.c
+@@ -165,25 +165,24 @@ static unsigned long __init xen_populate
+ unsigned long dest_pfn;
+
+ for (i = 0, entry = list; i < map_size; i++, entry++) {
+- unsigned long credits = credits_left;
+ unsigned long s_pfn;
+ unsigned long e_pfn;
+ unsigned long pfns;
+ long capacity;
+
+- if (credits <= 0)
++ if (credits_left <= 0)
+ break;
+
+ if (entry->type != E820_RAM)
+ continue;
+
+- e_pfn = PFN_UP(entry->addr + entry->size);
++ e_pfn = PFN_DOWN(entry->addr + entry->size);
+
+ /* We only care about E820 after the xen_start_info->nr_pages */
+ if (e_pfn <= max_pfn)
+ continue;
+
+- s_pfn = PFN_DOWN(entry->addr);
++ s_pfn = PFN_UP(entry->addr);
+ /* If the E820 falls within the nr_pages, we want to start
+ * at the nr_pages PFN.
+ * If that would mean going past the E820 entry, skip it
+@@ -192,23 +191,19 @@ static unsigned long __init xen_populate
+ capacity = e_pfn - max_pfn;
+ dest_pfn = max_pfn;
+ } else {
+- /* last_pfn MUST be within E820_RAM regions */
+- if (*last_pfn && e_pfn >= *last_pfn)
+- s_pfn = *last_pfn;
+ capacity = e_pfn - s_pfn;
+ dest_pfn = s_pfn;
+ }
+- /* If we had filled this E820_RAM entry, go to the next one. */
+- if (capacity <= 0)
+- continue;
+
+- if (credits > capacity)
+- credits = capacity;
++ if (credits_left < capacity)
++ capacity = credits_left;
+
+- pfns = xen_do_chunk(dest_pfn, dest_pfn + credits, false);
++ pfns = xen_do_chunk(dest_pfn, dest_pfn + capacity, false);
+ done += pfns;
+- credits_left -= pfns;
+ *last_pfn = (dest_pfn + pfns);
++ if (pfns < capacity)
++ break;
++ credits_left -= pfns;
+ }
+ return done;
+ }