--- /dev/null
+From f742dc4a32587bff50b13dde9d8894b96851951a Mon Sep 17 00:00:00 2001
+From: Peng Tao <bergwolf@gmail.com>
+Date: Fri, 24 Aug 2012 00:27:52 +0800
+Subject: pnfsblock: fix non-aligned DIO read
+
+From: Peng Tao <bergwolf@gmail.com>
+
+commit f742dc4a32587bff50b13dde9d8894b96851951a upstream.
+
+For DIO read, if it is not sector aligned, we should reject it
+and resend via MDS. Otherwise there might be data corruption.
+Also teach bl_read_pagelist to handle partial page reads for DIO.
+
+Signed-off-by: Peng Tao <tao.peng@emc.com>
+Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/nfs/blocklayout/blocklayout.c | 77 +++++++++++++++++++++++----------------
+ 1 file changed, 46 insertions(+), 31 deletions(-)
+
+--- a/fs/nfs/blocklayout/blocklayout.c
++++ b/fs/nfs/blocklayout/blocklayout.c
+@@ -242,14 +242,6 @@ bl_end_par_io_read(void *data, int unuse
+ schedule_work(&rdata->task.u.tk_work);
+ }
+
+-static bool
+-bl_check_alignment(u64 offset, u32 len, unsigned long blkmask)
+-{
+- if ((offset & blkmask) || (len & blkmask))
+- return false;
+- return true;
+-}
+-
+ static enum pnfs_try_status
+ bl_read_pagelist(struct nfs_read_data *rdata)
+ {
+@@ -260,15 +252,15 @@ bl_read_pagelist(struct nfs_read_data *r
+ sector_t isect, extent_length = 0;
+ struct parallel_io *par;
+ loff_t f_offset = rdata->args.offset;
++ size_t bytes_left = rdata->args.count;
++ unsigned int pg_offset, pg_len;
+ struct page **pages = rdata->args.pages;
+ int pg_index = rdata->args.pgbase >> PAGE_CACHE_SHIFT;
++ const bool is_dio = (header->dreq != NULL);
+
+ dprintk("%s enter nr_pages %u offset %lld count %u\n", __func__,
+ rdata->pages.npages, f_offset, (unsigned int)rdata->args.count);
+
+- if (!bl_check_alignment(f_offset, rdata->args.count, PAGE_CACHE_MASK))
+- goto use_mds;
+-
+ par = alloc_parallel(rdata);
+ if (!par)
+ goto use_mds;
+@@ -298,36 +290,53 @@ bl_read_pagelist(struct nfs_read_data *r
+ extent_length = min(extent_length, cow_length);
+ }
+ }
++
++ if (is_dio) {
++ pg_offset = f_offset & ~PAGE_CACHE_MASK;
++ if (pg_offset + bytes_left > PAGE_CACHE_SIZE)
++ pg_len = PAGE_CACHE_SIZE - pg_offset;
++ else
++ pg_len = bytes_left;
++
++ f_offset += pg_len;
++ bytes_left -= pg_len;
++ isect += (pg_offset >> SECTOR_SHIFT);
++ } else {
++ pg_offset = 0;
++ pg_len = PAGE_CACHE_SIZE;
++ }
++
+ hole = is_hole(be, isect);
+ if (hole && !cow_read) {
+ bio = bl_submit_bio(READ, bio);
+ /* Fill hole w/ zeroes w/o accessing device */
+ dprintk("%s Zeroing page for hole\n", __func__);
+- zero_user_segment(pages[i], 0, PAGE_CACHE_SIZE);
++ zero_user_segment(pages[i], pg_offset, pg_len);
+ print_page(pages[i]);
+ SetPageUptodate(pages[i]);
+ } else {
+ struct pnfs_block_extent *be_read;
+
+ be_read = (hole && cow_read) ? cow_read : be;
+- bio = bl_add_page_to_bio(bio, rdata->pages.npages - i,
++ bio = do_add_page_to_bio(bio, rdata->pages.npages - i,
+ READ,
+ isect, pages[i], be_read,
+- bl_end_io_read, par);
++ bl_end_io_read, par,
++ pg_offset, pg_len);
+ if (IS_ERR(bio)) {
+ header->pnfs_error = PTR_ERR(bio);
+ bio = NULL;
+ goto out;
+ }
+ }
+- isect += PAGE_CACHE_SECTORS;
++ isect += (pg_len >> SECTOR_SHIFT);
+ extent_length -= PAGE_CACHE_SECTORS;
+ }
+ if ((isect << SECTOR_SHIFT) >= header->inode->i_size) {
+ rdata->res.eof = 1;
+- rdata->res.count = header->inode->i_size - f_offset;
++ rdata->res.count = header->inode->i_size - rdata->args.offset;
+ } else {
+- rdata->res.count = (isect << SECTOR_SHIFT) - f_offset;
++ rdata->res.count = (isect << SECTOR_SHIFT) - rdata->args.offset;
+ }
+ out:
+ bl_put_extent(be);
+@@ -676,7 +685,7 @@ bl_write_pagelist(struct nfs_write_data
+ struct bio *bio = NULL;
+ struct pnfs_block_extent *be = NULL, *cow_read = NULL;
+ sector_t isect, last_isect = 0, extent_length = 0;
+- struct parallel_io *par = NULL;
++ struct parallel_io *par;
+ loff_t offset = wdata->args.offset;
+ size_t count = wdata->args.count;
+ unsigned int pg_offset, pg_len, saved_len;
+@@ -688,10 +697,6 @@ bl_write_pagelist(struct nfs_write_data
+ NFS_SERVER(header->inode)->pnfs_blksize >> PAGE_CACHE_SHIFT;
+
+ dprintk("%s enter, %Zu@%lld\n", __func__, count, offset);
+- /* Check for alignment first */
+- if (!bl_check_alignment(offset, count, PAGE_CACHE_MASK))
+- goto out_mds;
+-
+ /* At this point, wdata->pages is a (sequential) list of nfs_pages.
+ * We want to write each, and if there is an error set pnfs_error
+ * to have it redone using nfs.
+@@ -1164,32 +1169,42 @@ bl_clear_layoutdriver(struct nfs_server
+ return 0;
+ }
+
++static bool
++is_aligned_req(struct nfs_page *req, unsigned int alignment)
++{
++ return IS_ALIGNED(req->wb_offset, alignment) &&
++ IS_ALIGNED(req->wb_bytes, alignment);
++}
++
+ static void
+ bl_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
+ {
+- if (!bl_check_alignment(req->wb_offset, req->wb_bytes, PAGE_CACHE_MASK))
++ if (pgio->pg_dreq != NULL &&
++ !is_aligned_req(req, SECTOR_SIZE))
+ nfs_pageio_reset_read_mds(pgio);
+ else
+ pnfs_generic_pg_init_read(pgio, req);
+ }
+
+-static void
+-bl_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
++static bool
++bl_pg_test_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
++ struct nfs_page *req)
+ {
+- if (!bl_check_alignment(req->wb_offset, req->wb_bytes, PAGE_CACHE_MASK))
+- nfs_pageio_reset_write_mds(pgio);
+- else
+- pnfs_generic_pg_init_write(pgio, req);
++ if (pgio->pg_dreq != NULL &&
++ !is_aligned_req(req, SECTOR_SIZE))
++ return false;
++
++ return pnfs_generic_pg_test(pgio, prev, req);
+ }
+
+ static const struct nfs_pageio_ops bl_pg_read_ops = {
+ .pg_init = bl_pg_init_read,
+- .pg_test = pnfs_generic_pg_test,
++ .pg_test = bl_pg_test_read,
+ .pg_doio = pnfs_generic_pg_readpages,
+ };
+
+ static const struct nfs_pageio_ops bl_pg_write_ops = {
+- .pg_init = bl_pg_init_write,
++ .pg_init = pnfs_generic_pg_init_write,
+ .pg_test = pnfs_generic_pg_test,
+ .pg_doio = pnfs_generic_pg_writepages,
+ };
--- /dev/null
+From 96c9eae638765c2bf2ca4f5a6325484f9bb69aa7 Mon Sep 17 00:00:00 2001
+From: Peng Tao <bergwolf@gmail.com>
+Date: Fri, 24 Aug 2012 00:27:53 +0800
+Subject: pnfsblock: fix non-aligned DIO write
+
+From: Peng Tao <bergwolf@gmail.com>
+
+commit 96c9eae638765c2bf2ca4f5a6325484f9bb69aa7 upstream.
+
+For DIO writes, if it is not blocksize aligned, we need to do
+internal serialization. It may slow down writers anyway. So we
+just bail them out and resend to MDS.
+
+Signed-off-by: Peng Tao <tao.peng@emc.com>
+Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/nfs/blocklayout/blocklayout.c | 34 +++++++++++++++++++++++++++++++---
+ 1 file changed, 31 insertions(+), 3 deletions(-)
+
+--- a/fs/nfs/blocklayout/blocklayout.c
++++ b/fs/nfs/blocklayout/blocklayout.c
+@@ -685,7 +685,7 @@ bl_write_pagelist(struct nfs_write_data
+ struct bio *bio = NULL;
+ struct pnfs_block_extent *be = NULL, *cow_read = NULL;
+ sector_t isect, last_isect = 0, extent_length = 0;
+- struct parallel_io *par;
++ struct parallel_io *par = NULL;
+ loff_t offset = wdata->args.offset;
+ size_t count = wdata->args.count;
+ unsigned int pg_offset, pg_len, saved_len;
+@@ -697,6 +697,13 @@ bl_write_pagelist(struct nfs_write_data
+ NFS_SERVER(header->inode)->pnfs_blksize >> PAGE_CACHE_SHIFT;
+
+ dprintk("%s enter, %Zu@%lld\n", __func__, count, offset);
++
++ if (header->dreq != NULL &&
++ (!IS_ALIGNED(offset, NFS_SERVER(header->inode)->pnfs_blksize) ||
++ !IS_ALIGNED(count, NFS_SERVER(header->inode)->pnfs_blksize))) {
++ dprintk("pnfsblock nonblock aligned DIO writes. Resend MDS\n");
++ goto out_mds;
++ }
+ /* At this point, wdata->pages is a (sequential) list of nfs_pages.
+ * We want to write each, and if there is an error set pnfs_error
+ * to have it redone using nfs.
+@@ -1197,6 +1204,27 @@ bl_pg_test_read(struct nfs_pageio_descri
+ return pnfs_generic_pg_test(pgio, prev, req);
+ }
+
++void
++bl_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
++{
++ if (pgio->pg_dreq != NULL &&
++ !is_aligned_req(req, PAGE_CACHE_SIZE))
++ nfs_pageio_reset_write_mds(pgio);
++ else
++ pnfs_generic_pg_init_write(pgio, req);
++}
++
++static bool
++bl_pg_test_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
++ struct nfs_page *req)
++{
++ if (pgio->pg_dreq != NULL &&
++ !is_aligned_req(req, PAGE_CACHE_SIZE))
++ return false;
++
++ return pnfs_generic_pg_test(pgio, prev, req);
++}
++
+ static const struct nfs_pageio_ops bl_pg_read_ops = {
+ .pg_init = bl_pg_init_read,
+ .pg_test = bl_pg_test_read,
+@@ -1204,8 +1232,8 @@ static const struct nfs_pageio_ops bl_pg
+ };
+
+ static const struct nfs_pageio_ops bl_pg_write_ops = {
+- .pg_init = pnfs_generic_pg_init_write,
+- .pg_test = pnfs_generic_pg_test,
++ .pg_init = bl_pg_init_write,
++ .pg_test = bl_pg_test_write,
+ .pg_doio = pnfs_generic_pg_writepages,
+ };
+
--- /dev/null
+From 1f2ff682ac951ed82cc043cf140d2851084512df Mon Sep 17 00:00:00 2001
+From: Yinghai Lu <yinghai@kernel.org>
+Date: Mon, 22 Oct 2012 16:35:18 -0700
+Subject: x86, mm: Use memblock memory loop instead of e820_RAM
+
+From: Yinghai Lu <yinghai@kernel.org>
+
+commit 1f2ff682ac951ed82cc043cf140d2851084512df upstream.
+
+We need to handle E820_RAM and E820_RESERVED_KERNEL at the same time.
+
+Also memblock has page aligned range for ram, so we could avoid mapping
+partial pages.
+
+Signed-off-by: Yinghai Lu <yinghai@kernel.org>
+Link: http://lkml.kernel.org/r/CAE9FiQVZirvaBMFYRfXMmWEcHbKSicQEHz4VAwUv0xFCk51ZNw@mail.gmail.com
+Acked-by: Jacob Shin <jacob.shin@amd.com>
+Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/kernel/setup.c | 15 ++++++++-------
+ 1 file changed, 8 insertions(+), 7 deletions(-)
+
+--- a/arch/x86/kernel/setup.c
++++ b/arch/x86/kernel/setup.c
+@@ -920,18 +920,19 @@ void __init setup_arch(char **cmdline_p)
+ #ifdef CONFIG_X86_64
+ if (max_pfn > max_low_pfn) {
+ int i;
+- for (i = 0; i < e820.nr_map; i++) {
+- struct e820entry *ei = &e820.map[i];
++ unsigned long start, end;
++ unsigned long start_pfn, end_pfn;
+
+- if (ei->addr + ei->size <= 1UL << 32)
+- continue;
++ for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn,
++ NULL) {
+
+- if (ei->type == E820_RESERVED)
++ end = PFN_PHYS(end_pfn);
++ if (end <= (1UL<<32))
+ continue;
+
++ start = PFN_PHYS(start_pfn);
+ max_pfn_mapped = init_memory_mapping(
+- ei->addr < 1UL << 32 ? 1UL << 32 : ei->addr,
+- ei->addr + ei->size);
++ max((1UL<<32), start), end);
+ }
+
+ /* can we preseve max_low_pfn ?*/