From febb909148b66accbd1afdc059ecd4564abb4ac3 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 29 Oct 2012 11:44:16 -0700 Subject: [PATCH] 3.6-stable patches added patches: pnfsblock-fix-non-aligned-dio-read.patch pnfsblock-fix-non-aligned-dio-write.patch x86-mm-use-memblock-memory-loop-instead-of-e820_ram.patch --- .../pnfsblock-fix-non-aligned-dio-read.patch | 189 ++++++++++++++++++ .../pnfsblock-fix-non-aligned-dio-write.patch | 85 ++++++++ queue-3.6/series | 3 + ...lock-memory-loop-instead-of-e820_ram.patch | 53 +++++ 4 files changed, 330 insertions(+) create mode 100644 queue-3.6/pnfsblock-fix-non-aligned-dio-read.patch create mode 100644 queue-3.6/pnfsblock-fix-non-aligned-dio-write.patch create mode 100644 queue-3.6/x86-mm-use-memblock-memory-loop-instead-of-e820_ram.patch diff --git a/queue-3.6/pnfsblock-fix-non-aligned-dio-read.patch b/queue-3.6/pnfsblock-fix-non-aligned-dio-read.patch new file mode 100644 index 00000000000..dbf39e53bac --- /dev/null +++ b/queue-3.6/pnfsblock-fix-non-aligned-dio-read.patch @@ -0,0 +1,189 @@ +From f742dc4a32587bff50b13dde9d8894b96851951a Mon Sep 17 00:00:00 2001 +From: Peng Tao +Date: Fri, 24 Aug 2012 00:27:52 +0800 +Subject: pnfsblock: fix non-aligned DIO read + +From: Peng Tao + +commit f742dc4a32587bff50b13dde9d8894b96851951a upstream. + +For DIO read, if it is not sector aligned, we should reject it +and resend via MDS. Otherwise there might be data corruption. +Also teach bl_read_pagelist to handle partial page reads for DIO. + +Signed-off-by: Peng Tao +Signed-off-by: Trond Myklebust +Signed-off-by: Greg Kroah-Hartman + +--- + fs/nfs/blocklayout/blocklayout.c | 77 +++++++++++++++++++++++---------------- + 1 file changed, 46 insertions(+), 31 deletions(-) + +--- a/fs/nfs/blocklayout/blocklayout.c ++++ b/fs/nfs/blocklayout/blocklayout.c +@@ -242,14 +242,6 @@ bl_end_par_io_read(void *data, int unuse + schedule_work(&rdata->task.u.tk_work); + } + +-static bool +-bl_check_alignment(u64 offset, u32 len, unsigned long blkmask) +-{ +- if ((offset & blkmask) || (len & blkmask)) +- return false; +- return true; +-} +- + static enum pnfs_try_status + bl_read_pagelist(struct nfs_read_data *rdata) + { +@@ -260,15 +252,15 @@ bl_read_pagelist(struct nfs_read_data *r + sector_t isect, extent_length = 0; + struct parallel_io *par; + loff_t f_offset = rdata->args.offset; ++ size_t bytes_left = rdata->args.count; ++ unsigned int pg_offset, pg_len; + struct page **pages = rdata->args.pages; + int pg_index = rdata->args.pgbase >> PAGE_CACHE_SHIFT; ++ const bool is_dio = (header->dreq != NULL); + + dprintk("%s enter nr_pages %u offset %lld count %u\n", __func__, + rdata->pages.npages, f_offset, (unsigned int)rdata->args.count); + +- if (!bl_check_alignment(f_offset, rdata->args.count, PAGE_CACHE_MASK)) +- goto use_mds; +- + par = alloc_parallel(rdata); + if (!par) + goto use_mds; +@@ -298,36 +290,53 @@ bl_read_pagelist(struct nfs_read_data *r + extent_length = min(extent_length, cow_length); + } + } ++ ++ if (is_dio) { ++ pg_offset = f_offset & ~PAGE_CACHE_MASK; ++ if (pg_offset + bytes_left > PAGE_CACHE_SIZE) ++ pg_len = PAGE_CACHE_SIZE - pg_offset; ++ else ++ pg_len = bytes_left; ++ ++ f_offset += pg_len; ++ bytes_left -= pg_len; ++ isect += (pg_offset >> SECTOR_SHIFT); ++ } else { ++ pg_offset = 0; ++ pg_len = PAGE_CACHE_SIZE; ++ } ++ + hole = is_hole(be, isect); + if (hole && !cow_read) { + bio = bl_submit_bio(READ, bio); + /* Fill hole w/ zeroes w/o accessing device */ + dprintk("%s Zeroing page for hole\n", __func__); +- zero_user_segment(pages[i], 0, PAGE_CACHE_SIZE); ++ zero_user_segment(pages[i], pg_offset, pg_len); + print_page(pages[i]); + SetPageUptodate(pages[i]); + } else { + struct pnfs_block_extent *be_read; + + be_read = (hole && cow_read) ? cow_read : be; +- bio = bl_add_page_to_bio(bio, rdata->pages.npages - i, ++ bio = do_add_page_to_bio(bio, rdata->pages.npages - i, + READ, + isect, pages[i], be_read, +- bl_end_io_read, par); ++ bl_end_io_read, par, ++ pg_offset, pg_len); + if (IS_ERR(bio)) { + header->pnfs_error = PTR_ERR(bio); + bio = NULL; + goto out; + } + } +- isect += PAGE_CACHE_SECTORS; ++ isect += (pg_len >> SECTOR_SHIFT); + extent_length -= PAGE_CACHE_SECTORS; + } + if ((isect << SECTOR_SHIFT) >= header->inode->i_size) { + rdata->res.eof = 1; +- rdata->res.count = header->inode->i_size - f_offset; ++ rdata->res.count = header->inode->i_size - rdata->args.offset; + } else { +- rdata->res.count = (isect << SECTOR_SHIFT) - f_offset; ++ rdata->res.count = (isect << SECTOR_SHIFT) - rdata->args.offset; + } + out: + bl_put_extent(be); +@@ -676,7 +685,7 @@ bl_write_pagelist(struct nfs_write_data + struct bio *bio = NULL; + struct pnfs_block_extent *be = NULL, *cow_read = NULL; + sector_t isect, last_isect = 0, extent_length = 0; +- struct parallel_io *par = NULL; ++ struct parallel_io *par; + loff_t offset = wdata->args.offset; + size_t count = wdata->args.count; + unsigned int pg_offset, pg_len, saved_len; +@@ -688,10 +697,6 @@ bl_write_pagelist(struct nfs_write_data + NFS_SERVER(header->inode)->pnfs_blksize >> PAGE_CACHE_SHIFT; + + dprintk("%s enter, %Zu@%lld\n", __func__, count, offset); +- /* Check for alignment first */ +- if (!bl_check_alignment(offset, count, PAGE_CACHE_MASK)) +- goto out_mds; +- + /* At this point, wdata->pages is a (sequential) list of nfs_pages. + * We want to write each, and if there is an error set pnfs_error + * to have it redone using nfs. +@@ -1164,32 +1169,42 @@ bl_clear_layoutdriver(struct nfs_server + return 0; + } + ++static bool ++is_aligned_req(struct nfs_page *req, unsigned int alignment) ++{ ++ return IS_ALIGNED(req->wb_offset, alignment) && ++ IS_ALIGNED(req->wb_bytes, alignment); ++} ++ + static void + bl_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) + { +- if (!bl_check_alignment(req->wb_offset, req->wb_bytes, PAGE_CACHE_MASK)) ++ if (pgio->pg_dreq != NULL && ++ !is_aligned_req(req, SECTOR_SIZE)) + nfs_pageio_reset_read_mds(pgio); + else + pnfs_generic_pg_init_read(pgio, req); + } + +-static void +-bl_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) ++static bool ++bl_pg_test_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, ++ struct nfs_page *req) + { +- if (!bl_check_alignment(req->wb_offset, req->wb_bytes, PAGE_CACHE_MASK)) +- nfs_pageio_reset_write_mds(pgio); +- else +- pnfs_generic_pg_init_write(pgio, req); ++ if (pgio->pg_dreq != NULL && ++ !is_aligned_req(req, SECTOR_SIZE)) ++ return false; ++ ++ return pnfs_generic_pg_test(pgio, prev, req); + } + + static const struct nfs_pageio_ops bl_pg_read_ops = { + .pg_init = bl_pg_init_read, +- .pg_test = pnfs_generic_pg_test, ++ .pg_test = bl_pg_test_read, + .pg_doio = pnfs_generic_pg_readpages, + }; + + static const struct nfs_pageio_ops bl_pg_write_ops = { +- .pg_init = bl_pg_init_write, ++ .pg_init = pnfs_generic_pg_init_write, + .pg_test = pnfs_generic_pg_test, + .pg_doio = pnfs_generic_pg_writepages, + }; diff --git a/queue-3.6/pnfsblock-fix-non-aligned-dio-write.patch b/queue-3.6/pnfsblock-fix-non-aligned-dio-write.patch new file mode 100644 index 00000000000..dd056e36541 --- /dev/null +++ b/queue-3.6/pnfsblock-fix-non-aligned-dio-write.patch @@ -0,0 +1,85 @@ +From 96c9eae638765c2bf2ca4f5a6325484f9bb69aa7 Mon Sep 17 00:00:00 2001 +From: Peng Tao +Date: Fri, 24 Aug 2012 00:27:53 +0800 +Subject: pnfsblock: fix non-aligned DIO write + +From: Peng Tao + +commit 96c9eae638765c2bf2ca4f5a6325484f9bb69aa7 upstream. + +For DIO writes, if it is not blocksize aligned, we need to do +internal serialization. It may slow down writers anyway. So we +just bail them out and resend to MDS. + +Signed-off-by: Peng Tao +Signed-off-by: Trond Myklebust +Signed-off-by: Greg Kroah-Hartman + +--- + fs/nfs/blocklayout/blocklayout.c | 34 +++++++++++++++++++++++++++++++--- + 1 file changed, 31 insertions(+), 3 deletions(-) + +--- a/fs/nfs/blocklayout/blocklayout.c ++++ b/fs/nfs/blocklayout/blocklayout.c +@@ -685,7 +685,7 @@ bl_write_pagelist(struct nfs_write_data + struct bio *bio = NULL; + struct pnfs_block_extent *be = NULL, *cow_read = NULL; + sector_t isect, last_isect = 0, extent_length = 0; +- struct parallel_io *par; ++ struct parallel_io *par = NULL; + loff_t offset = wdata->args.offset; + size_t count = wdata->args.count; + unsigned int pg_offset, pg_len, saved_len; +@@ -697,6 +697,13 @@ bl_write_pagelist(struct nfs_write_data + NFS_SERVER(header->inode)->pnfs_blksize >> PAGE_CACHE_SHIFT; + + dprintk("%s enter, %Zu@%lld\n", __func__, count, offset); ++ ++ if (header->dreq != NULL && ++ (!IS_ALIGNED(offset, NFS_SERVER(header->inode)->pnfs_blksize) || ++ !IS_ALIGNED(count, NFS_SERVER(header->inode)->pnfs_blksize))) { ++ dprintk("pnfsblock nonblock aligned DIO writes. Resend MDS\n"); ++ goto out_mds; ++ } + /* At this point, wdata->pages is a (sequential) list of nfs_pages. + * We want to write each, and if there is an error set pnfs_error + * to have it redone using nfs. +@@ -1197,6 +1204,27 @@ bl_pg_test_read(struct nfs_pageio_descri + return pnfs_generic_pg_test(pgio, prev, req); + } + ++void ++bl_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) ++{ ++ if (pgio->pg_dreq != NULL && ++ !is_aligned_req(req, PAGE_CACHE_SIZE)) ++ nfs_pageio_reset_write_mds(pgio); ++ else ++ pnfs_generic_pg_init_write(pgio, req); ++} ++ ++static bool ++bl_pg_test_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, ++ struct nfs_page *req) ++{ ++ if (pgio->pg_dreq != NULL && ++ !is_aligned_req(req, PAGE_CACHE_SIZE)) ++ return false; ++ ++ return pnfs_generic_pg_test(pgio, prev, req); ++} ++ + static const struct nfs_pageio_ops bl_pg_read_ops = { + .pg_init = bl_pg_init_read, + .pg_test = bl_pg_test_read, +@@ -1204,8 +1232,8 @@ static const struct nfs_pageio_ops bl_pg + }; + + static const struct nfs_pageio_ops bl_pg_write_ops = { +- .pg_init = pnfs_generic_pg_init_write, +- .pg_test = pnfs_generic_pg_test, ++ .pg_init = bl_pg_init_write, ++ .pg_test = bl_pg_test_write, + .pg_doio = pnfs_generic_pg_writepages, + }; + diff --git a/queue-3.6/series b/queue-3.6/series index fbfb6e56f4c..671f387f7d7 100644 --- a/queue-3.6/series +++ b/queue-3.6/series @@ -96,3 +96,6 @@ x86-mm-undo-incorrect-revert-in-arch-x86-mm-init.c.patch cfg80211-mac80211-avoid-state-mishmash-on-deauth.patch efi-defer-freeing-boot-services-memory-until-after-acpi-init.patch x86-efi-turn-off-efi_enabled-after-setup-on-mixed-fw-kernel.patch +x86-mm-use-memblock-memory-loop-instead-of-e820_ram.patch +pnfsblock-fix-non-aligned-dio-read.patch +pnfsblock-fix-non-aligned-dio-write.patch diff --git a/queue-3.6/x86-mm-use-memblock-memory-loop-instead-of-e820_ram.patch b/queue-3.6/x86-mm-use-memblock-memory-loop-instead-of-e820_ram.patch new file mode 100644 index 00000000000..60d4261e005 --- /dev/null +++ b/queue-3.6/x86-mm-use-memblock-memory-loop-instead-of-e820_ram.patch @@ -0,0 +1,53 @@ +From 1f2ff682ac951ed82cc043cf140d2851084512df Mon Sep 17 00:00:00 2001 +From: Yinghai Lu +Date: Mon, 22 Oct 2012 16:35:18 -0700 +Subject: x86, mm: Use memblock memory loop instead of e820_RAM + +From: Yinghai Lu + +commit 1f2ff682ac951ed82cc043cf140d2851084512df upstream. + +We need to handle E820_RAM and E820_RESERVED_KERNEL at the same time. + +Also memblock has page aligned range for ram, so we could avoid mapping +partial pages. + +Signed-off-by: Yinghai Lu +Link: http://lkml.kernel.org/r/CAE9FiQVZirvaBMFYRfXMmWEcHbKSicQEHz4VAwUv0xFCk51ZNw@mail.gmail.com +Acked-by: Jacob Shin +Signed-off-by: H. Peter Anvin +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/kernel/setup.c | 15 ++++++++------- + 1 file changed, 8 insertions(+), 7 deletions(-) + +--- a/arch/x86/kernel/setup.c ++++ b/arch/x86/kernel/setup.c +@@ -920,18 +920,19 @@ void __init setup_arch(char **cmdline_p) + #ifdef CONFIG_X86_64 + if (max_pfn > max_low_pfn) { + int i; +- for (i = 0; i < e820.nr_map; i++) { +- struct e820entry *ei = &e820.map[i]; ++ unsigned long start, end; ++ unsigned long start_pfn, end_pfn; + +- if (ei->addr + ei->size <= 1UL << 32) +- continue; ++ for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, ++ NULL) { + +- if (ei->type == E820_RESERVED) ++ end = PFN_PHYS(end_pfn); ++ if (end <= (1UL<<32)) + continue; + ++ start = PFN_PHYS(start_pfn); + max_pfn_mapped = init_memory_mapping( +- ei->addr < 1UL << 32 ? 1UL << 32 : ei->addr, +- ei->addr + ei->size); ++ max((1UL<<32), start), end); + } + + /* can we preseve max_low_pfn ?*/ -- 2.47.2