From dd4ccd413b01602ecb279034e3ca6ed104921b65 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 9 May 2013 14:09:26 -0700 Subject: [PATCH] 3.8-stable patches added patches: btrfs-compare-relevant-parts-of-delayed-tree-refs.patch btrfs-fix-extent-logging-with-o_direct-into-prealloc.patch edac-don-t-give-write-permission-to-read-only-files.patch kernel-audit_tree.c-tree-will-leak-memory-when-failure-occurs-in-audit_trim_trees.patch nfsv4.x-fix-handling-of-partially-delegated-locks.patch x86-mm-account-for-pgdir_size-alignment.patch --- ...-relevant-parts-of-delayed-tree-refs.patch | 93 +++++++++++++++++++ ...-logging-with-o_direct-into-prealloc.patch | 91 ++++++++++++++++++ ...-write-permission-to-read-only-files.patch | 60 ++++++++++++ ...n-failure-occurs-in-audit_trim_trees.patch | 38 ++++++++ ...andling-of-partially-delegated-locks.patch | 40 ++++++++ queue-3.8/series | 6 ++ ...-mm-account-for-pgdir_size-alignment.patch | 59 ++++++++++++ 7 files changed, 387 insertions(+) create mode 100644 queue-3.8/btrfs-compare-relevant-parts-of-delayed-tree-refs.patch create mode 100644 queue-3.8/btrfs-fix-extent-logging-with-o_direct-into-prealloc.patch create mode 100644 queue-3.8/edac-don-t-give-write-permission-to-read-only-files.patch create mode 100644 queue-3.8/kernel-audit_tree.c-tree-will-leak-memory-when-failure-occurs-in-audit_trim_trees.patch create mode 100644 queue-3.8/nfsv4.x-fix-handling-of-partially-delegated-locks.patch create mode 100644 queue-3.8/x86-mm-account-for-pgdir_size-alignment.patch diff --git a/queue-3.8/btrfs-compare-relevant-parts-of-delayed-tree-refs.patch b/queue-3.8/btrfs-compare-relevant-parts-of-delayed-tree-refs.patch new file mode 100644 index 00000000000..e6aa2ec6179 --- /dev/null +++ b/queue-3.8/btrfs-compare-relevant-parts-of-delayed-tree-refs.patch @@ -0,0 +1,93 @@ +From 41b0fc42800569f63e029549b75c4c9cb63f2dfd Mon Sep 17 00:00:00 2001 +From: Josef Bacik +Date: Mon, 1 Apr 2013 20:36:28 -0400 +Subject: Btrfs: compare relevant parts of delayed tree refs + +From: Josef Bacik + +commit 41b0fc42800569f63e029549b75c4c9cb63f2dfd upstream. + +A user reported a panic while running a balance. What was happening was he was +relocating a block, which added the reference to the relocation tree. Then +relocation would walk through the relocation tree and drop that reference and +free that block, and then it would walk down a snapshot which referenced the +same block and add another ref to the block. The problem is this was all +happening in the same transaction, so the parent block was free'ed up when we +drop our reference which was immediately available for allocation, and then it +was used _again_ to add a reference for the same block from a different +snapshot. This resulted in something like this in the delayed ref tree + +add ref to 90234880, parent=2067398656, ref_root 1766, level 1 +del ref to 90234880, parent=2067398656, ref_root 18446744073709551608, level 1 +add ref to 90234880, parent=2067398656, ref_root 1767, level 1 + +as you can see the ref_root's don't match, because when we inc the ref we use +the header owner, which is the original tree the block belonged to, instead of +the data reloc tree. Then when we remove the extent we use the reloc tree +objectid. But none of this matters, since it is a shared reference which means +only the parent matters. When the delayed ref stuff runs it adds all the +increments first, and then does all the drops, to make sure that we don't delete +the ref if we net a positive ref count. But tree blocks aren't allowed to have +multiple refs from the same block, so this panics when it tries to add the +second ref. We need the add and the drop to cancel each other out in memory so +we only do the final add. + +So to fix this we need to adjust how the delayed refs are added to the tree. +Only the ref_root matters when it is a normal backref, and only the parent +matters when it is a shared backref. So make our decision based on what ref +type we have. This allows us to keep the ref_root in memory in case anybody +wants to use it for something else, and it allows the delayed refs to be merged +properly so we don't end up with this panic. + +With this patch the users image no longer panics on mount, and it has a clean +fsck after a normal mount/umount cycle. Thanks, + +Reported-by: Roman Mamedov +Signed-off-by: Josef Bacik +Signed-off-by: Greg Kroah-Hartman + +--- + fs/btrfs/delayed-ref.c | 24 ++++++++++++++---------- + 1 file changed, 14 insertions(+), 10 deletions(-) + +--- a/fs/btrfs/delayed-ref.c ++++ b/fs/btrfs/delayed-ref.c +@@ -36,16 +36,19 @@ + * compare two delayed tree backrefs with same bytenr and type + */ + static int comp_tree_refs(struct btrfs_delayed_tree_ref *ref2, +- struct btrfs_delayed_tree_ref *ref1) ++ struct btrfs_delayed_tree_ref *ref1, int type) + { +- if (ref1->root < ref2->root) +- return -1; +- if (ref1->root > ref2->root) +- return 1; +- if (ref1->parent < ref2->parent) +- return -1; +- if (ref1->parent > ref2->parent) +- return 1; ++ if (type == BTRFS_TREE_BLOCK_REF_KEY) { ++ if (ref1->root < ref2->root) ++ return -1; ++ if (ref1->root > ref2->root) ++ return 1; ++ } else { ++ if (ref1->parent < ref2->parent) ++ return -1; ++ if (ref1->parent > ref2->parent) ++ return 1; ++ } + return 0; + } + +@@ -109,7 +112,8 @@ static int comp_entry(struct btrfs_delay + if (ref1->type == BTRFS_TREE_BLOCK_REF_KEY || + ref1->type == BTRFS_SHARED_BLOCK_REF_KEY) { + return comp_tree_refs(btrfs_delayed_node_to_tree_ref(ref2), +- btrfs_delayed_node_to_tree_ref(ref1)); ++ btrfs_delayed_node_to_tree_ref(ref1), ++ ref1->type); + } else if (ref1->type == BTRFS_EXTENT_DATA_REF_KEY || + ref1->type == BTRFS_SHARED_DATA_REF_KEY) { + return comp_data_refs(btrfs_delayed_node_to_data_ref(ref2), diff --git a/queue-3.8/btrfs-fix-extent-logging-with-o_direct-into-prealloc.patch b/queue-3.8/btrfs-fix-extent-logging-with-o_direct-into-prealloc.patch new file mode 100644 index 00000000000..18835973445 --- /dev/null +++ b/queue-3.8/btrfs-fix-extent-logging-with-o_direct-into-prealloc.patch @@ -0,0 +1,91 @@ +From eb384b55ae9c2055ea00c5cc87971e182d47aefa Mon Sep 17 00:00:00 2001 +From: Josef Bacik +Date: Wed, 24 Apr 2013 16:32:55 -0400 +Subject: Btrfs: fix extent logging with O_DIRECT into prealloc + +From: Josef Bacik + +commit eb384b55ae9c2055ea00c5cc87971e182d47aefa upstream. + +This is the same as the fix from commit + +Btrfs: fix bad extent logging + +but for O_DIRECT. I missed this when I fixed the problem originally, we were +still using the em for the orig_start and orig_block_len, which would be the +merged extent. We need to use the actual extent from the on disk file extent +item, which we have to lookup to make sure it's ok to nocow anyway so just pass +in some pointers to hold this info. Thanks, + +Signed-off-by: Josef Bacik +Signed-off-by: Greg Kroah-Hartman + +--- + fs/btrfs/inode.c | 21 +++++++++++++-------- + 1 file changed, 13 insertions(+), 8 deletions(-) + +--- a/fs/btrfs/inode.c ++++ b/fs/btrfs/inode.c +@@ -5794,7 +5794,9 @@ out: + * block must be cow'd + */ + static noinline int can_nocow_odirect(struct btrfs_trans_handle *trans, +- struct inode *inode, u64 offset, u64 len) ++ struct inode *inode, u64 offset, u64 *len, ++ u64 *orig_start, u64 *orig_block_len, ++ u64 *ram_bytes) + { + struct btrfs_path *path; + int ret; +@@ -5851,8 +5853,12 @@ static noinline int can_nocow_odirect(st + disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); + backref_offset = btrfs_file_extent_offset(leaf, fi); + ++ *orig_start = key.offset - backref_offset; ++ *orig_block_len = btrfs_file_extent_disk_num_bytes(leaf, fi); ++ *ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi); ++ + extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi); +- if (extent_end < offset + len) { ++ if (extent_end < offset + *len) { + /* extent doesn't include our full range, must cow */ + goto out; + } +@@ -5876,13 +5882,14 @@ static noinline int can_nocow_odirect(st + */ + disk_bytenr += backref_offset; + disk_bytenr += offset - key.offset; +- num_bytes = min(offset + len, extent_end) - offset; ++ num_bytes = min(offset + *len, extent_end) - offset; + if (csum_exist_in_range(root, disk_bytenr, num_bytes)) + goto out; + /* + * all of the above have passed, it is safe to overwrite this extent + * without cow + */ ++ *len = num_bytes; + ret = 1; + out: + btrfs_free_path(path); +@@ -6092,7 +6099,7 @@ static int btrfs_get_blocks_direct(struc + em->block_start != EXTENT_MAP_HOLE)) { + int type; + int ret; +- u64 block_start; ++ u64 block_start, orig_start, orig_block_len, ram_bytes; + + if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) + type = BTRFS_ORDERED_PREALLOC; +@@ -6110,10 +6117,8 @@ static int btrfs_get_blocks_direct(struc + if (IS_ERR(trans)) + goto must_cow; + +- if (can_nocow_odirect(trans, inode, start, len) == 1) { +- u64 orig_start = em->orig_start; +- u64 orig_block_len = em->orig_block_len; +- ++ if (can_nocow_odirect(trans, inode, start, &len, &orig_start, ++ &orig_block_len, &ram_bytes) == 1) { + if (type == BTRFS_ORDERED_PREALLOC) { + free_extent_map(em); + em = create_pinned_em(inode, start, len, diff --git a/queue-3.8/edac-don-t-give-write-permission-to-read-only-files.patch b/queue-3.8/edac-don-t-give-write-permission-to-read-only-files.patch new file mode 100644 index 00000000000..cfe0b69f1c0 --- /dev/null +++ b/queue-3.8/edac-don-t-give-write-permission-to-read-only-files.patch @@ -0,0 +1,60 @@ +From c8c64d165ccfd2274058ac84e0c680f9b48c4ec1 Mon Sep 17 00:00:00 2001 +From: "Srivatsa S. Bhat" +Date: Tue, 30 Apr 2013 15:17:16 +0530 +Subject: EDAC: Don't give write permission to read-only files + +From: "Srivatsa S. Bhat" + +commit c8c64d165ccfd2274058ac84e0c680f9b48c4ec1 upstream. + +I get the following warning on boot: + +------------[ cut here ]------------ +WARNING: at drivers/base/core.c:575 device_create_file+0x9a/0xa0() +Hardware name: -[8737R2A]- +Write permission without 'store' +... + + +Drilling down, this is related to dynamic channel ce_count attribute +files sporting a S_IWUSR mode without a ->store() function. Looking +around, it appears that they aren't supposed to have a ->store() +function. So remove the bogus write permission to get rid of the +warning. + +Signed-off-by: Srivatsa S. Bhat +Cc: Mauro Carvalho Chehab +[ shorten commit message ] +Signed-off-by: Borislav Petkov +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/edac/edac_mc_sysfs.c | 12 ++++++------ + 1 file changed, 6 insertions(+), 6 deletions(-) + +--- a/drivers/edac/edac_mc_sysfs.c ++++ b/drivers/edac/edac_mc_sysfs.c +@@ -330,17 +330,17 @@ static struct device_attribute *dynamic_ + }; + + /* possible dynamic channel ce_count attribute files */ +-DEVICE_CHANNEL(ch0_ce_count, S_IRUGO | S_IWUSR, ++DEVICE_CHANNEL(ch0_ce_count, S_IRUGO, + channel_ce_count_show, NULL, 0); +-DEVICE_CHANNEL(ch1_ce_count, S_IRUGO | S_IWUSR, ++DEVICE_CHANNEL(ch1_ce_count, S_IRUGO, + channel_ce_count_show, NULL, 1); +-DEVICE_CHANNEL(ch2_ce_count, S_IRUGO | S_IWUSR, ++DEVICE_CHANNEL(ch2_ce_count, S_IRUGO, + channel_ce_count_show, NULL, 2); +-DEVICE_CHANNEL(ch3_ce_count, S_IRUGO | S_IWUSR, ++DEVICE_CHANNEL(ch3_ce_count, S_IRUGO, + channel_ce_count_show, NULL, 3); +-DEVICE_CHANNEL(ch4_ce_count, S_IRUGO | S_IWUSR, ++DEVICE_CHANNEL(ch4_ce_count, S_IRUGO, + channel_ce_count_show, NULL, 4); +-DEVICE_CHANNEL(ch5_ce_count, S_IRUGO | S_IWUSR, ++DEVICE_CHANNEL(ch5_ce_count, S_IRUGO, + channel_ce_count_show, NULL, 5); + + /* Total possible dynamic ce_count attribute file table */ diff --git a/queue-3.8/kernel-audit_tree.c-tree-will-leak-memory-when-failure-occurs-in-audit_trim_trees.patch b/queue-3.8/kernel-audit_tree.c-tree-will-leak-memory-when-failure-occurs-in-audit_trim_trees.patch new file mode 100644 index 00000000000..8f03e79c9c0 --- /dev/null +++ b/queue-3.8/kernel-audit_tree.c-tree-will-leak-memory-when-failure-occurs-in-audit_trim_trees.patch @@ -0,0 +1,38 @@ +From 12b2f117f3bf738c1a00a6f64393f1953a740bd4 Mon Sep 17 00:00:00 2001 +From: Chen Gang +Date: Mon, 29 Apr 2013 15:05:19 -0700 +Subject: kernel/audit_tree.c: tree will leak memory when failure occurs in audit_trim_trees() + +From: Chen Gang + +commit 12b2f117f3bf738c1a00a6f64393f1953a740bd4 upstream. + +audit_trim_trees() calls get_tree(). If a failure occurs we must call +put_tree(). + +[akpm@linux-foundation.org: run put_tree() before mutex_lock() for small scalability improvement] +Signed-off-by: Chen Gang +Cc: Al Viro +Cc: Eric Paris +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Jonghwan Choi +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/audit_tree.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/kernel/audit_tree.c ++++ b/kernel/audit_tree.c +@@ -617,9 +617,9 @@ void audit_trim_trees(void) + } + spin_unlock(&hash_lock); + trim_marked(tree); +- put_tree(tree); + drop_collected_mounts(root_mnt); + skip_it: ++ put_tree(tree); + mutex_lock(&audit_filter_mutex); + } + list_del(&cursor); diff --git a/queue-3.8/nfsv4.x-fix-handling-of-partially-delegated-locks.patch b/queue-3.8/nfsv4.x-fix-handling-of-partially-delegated-locks.patch new file mode 100644 index 00000000000..726623da587 --- /dev/null +++ b/queue-3.8/nfsv4.x-fix-handling-of-partially-delegated-locks.patch @@ -0,0 +1,40 @@ +From c5a2a15f8146fdfe45078df7873a6dc1006b3869 Mon Sep 17 00:00:00 2001 +From: Trond Myklebust +Date: Tue, 30 Apr 2013 12:43:42 -0400 +Subject: NFSv4.x: Fix handling of partially delegated locks + +From: Trond Myklebust + +commit c5a2a15f8146fdfe45078df7873a6dc1006b3869 upstream. + +If a NFS client receives a delegation for a file after it has taken +a lock on that file, we can currently end up in a situation where +we mistakenly skip unlocking that file. + +The following patch swaps an erroneous check in nfs4_proc_unlck for +whether or not the file has a delegation to one which checks whether +or not we hold a lock stateid for that file. + +Reported-by: Chuck Lever +Signed-off-by: Trond Myklebust +Tested-by: Chuck Lever +Signed-off-by: Greg Kroah-Hartman + +--- + fs/nfs/nfs4proc.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/fs/nfs/nfs4proc.c ++++ b/fs/nfs/nfs4proc.c +@@ -4513,9 +4513,9 @@ static int nfs4_proc_unlck(struct nfs4_s + if (status != 0) + goto out; + /* Is this a delegated lock? */ +- if (test_bit(NFS_DELEGATED_STATE, &state->flags)) +- goto out; + lsp = request->fl_u.nfs4_fl.owner; ++ if (test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags) == 0) ++ goto out; + seqid = nfs_alloc_seqid(&lsp->ls_seqid, GFP_KERNEL); + status = -ENOMEM; + if (seqid == NULL) diff --git a/queue-3.8/series b/queue-3.8/series index 018540e4f5a..632089d1a41 100644 --- a/queue-3.8/series +++ b/queue-3.8/series @@ -65,3 +65,9 @@ drm-radeon-fix-possible-segfault-when-parsing-pm-tables.patch drm-radeon-add-new-richland-pci-ids.patch drm-radeon-fix-handling-of-v6-power-tables.patch tracing-fix-ftrace_dump.patch +btrfs-compare-relevant-parts-of-delayed-tree-refs.patch +btrfs-fix-extent-logging-with-o_direct-into-prealloc.patch +edac-don-t-give-write-permission-to-read-only-files.patch +nfsv4.x-fix-handling-of-partially-delegated-locks.patch +kernel-audit_tree.c-tree-will-leak-memory-when-failure-occurs-in-audit_trim_trees.patch +x86-mm-account-for-pgdir_size-alignment.patch diff --git a/queue-3.8/x86-mm-account-for-pgdir_size-alignment.patch b/queue-3.8/x86-mm-account-for-pgdir_size-alignment.patch new file mode 100644 index 00000000000..9fe03168e3d --- /dev/null +++ b/queue-3.8/x86-mm-account-for-pgdir_size-alignment.patch @@ -0,0 +1,59 @@ +From jerry.hoemann@hp.com Thu May 9 13:59:15 2013 +From: Jerry Hoemann +Date: Tue, 30 Apr 2013 15:15:55 -0600 +Subject: x86/mm: account for PGDIR_SIZE alignment +To: tglx@linutronix.de, mingo@redhat.com, hpa@zytor.com +Cc: x86@kernel.org, jacob.shin@amd.com, gregkh@linuxfoundation.org, yinghai@kernel.org, Jerry Hoemann +Message-ID: <1367356555-16320-1-git-send-email-jerry.hoemann@hp.com> + +From: Jerry Hoemann + +Patch for -stable. Function find_early_table_space removed upstream. + +Fixes panic in alloc_low_page due to pgt_buf overflow during +init_memory_mapping. + +find_early_table_space sizes pgt_buf based upon the size of the +memory being mapped, but it does not take into account the alignment +of the memory. When the region being mapped spans a 512GB (PGDIR_SIZE) +alignment, a panic from alloc_low_pages occurs. + +kernel_physical_mapping_init takes into account PGDIR_SIZE alignment. +This causes an extra call to alloc_low_page to be made. This extra call +isn't accounted for by find_early_table_space and causes a kernel panic. + +Change is to take into account PGDIR_SIZE alignment in find_early_table_space. + +Signed-off-by: Jerry Hoemann +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/mm/init.c | 5 +++++ + 1 file changed, 5 insertions(+) + +--- a/arch/x86/mm/init.c ++++ b/arch/x86/mm/init.c +@@ -45,11 +45,15 @@ static void __init find_early_table_spac + int i; + unsigned long puds = 0, pmds = 0, ptes = 0, tables; + unsigned long start = 0, good_end; ++ unsigned long pgd_extra = 0; + phys_addr_t base; + + for (i = 0; i < nr_range; i++) { + unsigned long range, extra; + ++ if ((mr[i].end >> PGDIR_SHIFT) - (mr[i].start >> PGDIR_SHIFT)) ++ pgd_extra++; ++ + range = mr[i].end - mr[i].start; + puds += (range + PUD_SIZE - 1) >> PUD_SHIFT; + +@@ -74,6 +78,7 @@ static void __init find_early_table_spac + tables = roundup(puds * sizeof(pud_t), PAGE_SIZE); + tables += roundup(pmds * sizeof(pmd_t), PAGE_SIZE); + tables += roundup(ptes * sizeof(pte_t), PAGE_SIZE); ++ tables += (pgd_extra * PAGE_SIZE); + + #ifdef CONFIG_X86_32 + /* for fixmap */ -- 2.47.3