From: Greg Kroah-Hartman Date: Tue, 30 Jun 2015 00:29:36 +0000 (-0700) Subject: 3.10-stable patches X-Git-Tag: v3.10.83~19 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=841bf18e9a2962dee8fb15abd0fd0343e67ce0ef;p=thirdparty%2Fkernel%2Fstable-queue.git 3.10-stable patches added patches: btrfs-make-xattr-replace-operations-atomic.patch fs-take-i_mutex-during-prepare_binprm-for-setid-executables.patch hpsa-add-missing-pci_set_master-in-kdump-path.patch hpsa-refine-the-pci-enable-disable-handling.patch ipv6-prevent-fib6_run_gc-contention.patch ipv6-update-ip6_rt_last_gc-every-time-gc-is-run.patch sb_edac-fix-erroneous-bytes-gigabytes-conversion.patch x86-microcode-intel-guard-against-stack-overflow-in-the-loader.patch xfrm-increase-the-garbage-collector-threshold.patch --- diff --git a/queue-3.10/btrfs-make-xattr-replace-operations-atomic.patch b/queue-3.10/btrfs-make-xattr-replace-operations-atomic.patch new file mode 100644 index 00000000000..d29e1186763 --- /dev/null +++ b/queue-3.10/btrfs-make-xattr-replace-operations-atomic.patch @@ -0,0 +1,311 @@ +From 5f5bc6b1e2d5a6f827bc860ef2dc5b6f365d1339 Mon Sep 17 00:00:00 2001 +From: Filipe Manana +Date: Sun, 9 Nov 2014 08:38:39 +0000 +Subject: Btrfs: make xattr replace operations atomic + +From: Filipe Manana + +commit 5f5bc6b1e2d5a6f827bc860ef2dc5b6f365d1339 upstream. + +Replacing a xattr consists of doing a lookup for its existing value, delete +the current value from the respective leaf, release the search path and then +finally insert the new value. This leaves a time window where readers (getxattr, +listxattrs) won't see any value for the xattr. Xattrs are used to store ACLs, +so this has security implications. + +This change also fixes 2 other existing issues which were: + +*) Deleting the old xattr value without verifying first if the new xattr will + fit in the existing leaf item (in case multiple xattrs are packed in the + same item due to name hash collision); + +*) Returning -EEXIST when the flag XATTR_CREATE is given and the xattr doesn't + exist but we have have an existing item that packs muliple xattrs with + the same name hash as the input xattr. In this case we should return ENOSPC. + +A test case for xfstests follows soon. + +Thanks to Alexandre Oliva for reporting the non-atomicity of the xattr replace +implementation. + +Reported-by: Alexandre Oliva +Signed-off-by: Filipe Manana +Signed-off-by: Chris Mason +[shengyong: backport to 3.10 + - FIX: CVE-2014-9710 + - adjust context + - ASSERT() was added v3.12, so we do check with if statement + - set the first parameter of btrfs_item_nr() as NULL, because it is not + used, and is removed in v3.13 +] +Signed-off-by: Sheng Yong +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/ctree.c | 2 + fs/btrfs/ctree.h | 5 + + fs/btrfs/dir-item.c | 10 --- + fs/btrfs/xattr.c | 159 +++++++++++++++++++++++++++++++++------------------- + 4 files changed, 111 insertions(+), 65 deletions(-) + +--- a/fs/btrfs/ctree.c ++++ b/fs/btrfs/ctree.c +@@ -2769,7 +2769,7 @@ done: + */ + if (!p->leave_spinning) + btrfs_set_path_blocking(p); +- if (ret < 0) ++ if (ret < 0 && !p->skip_release_on_error) + btrfs_release_path(p); + return ret; + } +--- a/fs/btrfs/ctree.h ++++ b/fs/btrfs/ctree.h +@@ -586,6 +586,7 @@ struct btrfs_path { + unsigned int skip_locking:1; + unsigned int leave_spinning:1; + unsigned int search_commit_root:1; ++ unsigned int skip_release_on_error:1; + }; + + /* +@@ -3406,6 +3407,10 @@ struct btrfs_dir_item *btrfs_lookup_xatt + int verify_dir_item(struct btrfs_root *root, + struct extent_buffer *leaf, + struct btrfs_dir_item *dir_item); ++struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root, ++ struct btrfs_path *path, ++ const char *name, ++ int name_len); + + /* orphan.c */ + int btrfs_insert_orphan_item(struct btrfs_trans_handle *trans, +--- a/fs/btrfs/dir-item.c ++++ b/fs/btrfs/dir-item.c +@@ -21,10 +21,6 @@ + #include "hash.h" + #include "transaction.h" + +-static struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root, +- struct btrfs_path *path, +- const char *name, int name_len); +- + /* + * insert a name into a directory, doing overflow properly if there is a hash + * collision. data_size indicates how big the item inserted should be. On +@@ -383,9 +379,9 @@ struct btrfs_dir_item *btrfs_lookup_xatt + * this walks through all the entries in a dir item and finds one + * for a specific name. + */ +-static struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root, +- struct btrfs_path *path, +- const char *name, int name_len) ++struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root, ++ struct btrfs_path *path, ++ const char *name, int name_len) + { + struct btrfs_dir_item *dir_item; + unsigned long name_ptr; +--- a/fs/btrfs/xattr.c ++++ b/fs/btrfs/xattr.c +@@ -27,6 +27,7 @@ + #include "transaction.h" + #include "xattr.h" + #include "disk-io.h" ++#include "locking.h" + + + ssize_t __btrfs_getxattr(struct inode *inode, const char *name, +@@ -89,7 +90,7 @@ static int do_setxattr(struct btrfs_tran + struct inode *inode, const char *name, + const void *value, size_t size, int flags) + { +- struct btrfs_dir_item *di; ++ struct btrfs_dir_item *di = NULL; + struct btrfs_root *root = BTRFS_I(inode)->root; + struct btrfs_path *path; + size_t name_len = strlen(name); +@@ -101,84 +102,128 @@ static int do_setxattr(struct btrfs_tran + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; ++ path->skip_release_on_error = 1; ++ ++ if (!value) { ++ di = btrfs_lookup_xattr(trans, root, path, btrfs_ino(inode), ++ name, name_len, -1); ++ if (!di && (flags & XATTR_REPLACE)) ++ ret = -ENODATA; ++ else if (di) ++ ret = btrfs_delete_one_dir_name(trans, root, path, di); ++ goto out; ++ } + ++ /* ++ * For a replace we can't just do the insert blindly. ++ * Do a lookup first (read-only btrfs_search_slot), and return if xattr ++ * doesn't exist. If it exists, fall down below to the insert/replace ++ * path - we can't race with a concurrent xattr delete, because the VFS ++ * locks the inode's i_mutex before calling setxattr or removexattr. ++ */ + if (flags & XATTR_REPLACE) { +- di = btrfs_lookup_xattr(trans, root, path, btrfs_ino(inode), name, +- name_len, -1); +- if (IS_ERR(di)) { +- ret = PTR_ERR(di); +- goto out; +- } else if (!di) { ++ if(!mutex_is_locked(&inode->i_mutex)) { ++ pr_err("BTRFS: assertion failed: %s, file: %s, line: %d", ++ "mutex_is_locked(&inode->i_mutex)", __FILE__, ++ __LINE__); ++ BUG(); ++ } ++ di = btrfs_lookup_xattr(NULL, root, path, btrfs_ino(inode), ++ name, name_len, 0); ++ if (!di) { + ret = -ENODATA; + goto out; + } +- ret = btrfs_delete_one_dir_name(trans, root, path, di); +- if (ret) +- goto out; + btrfs_release_path(path); ++ di = NULL; ++ } + ++ ret = btrfs_insert_xattr_item(trans, root, path, btrfs_ino(inode), ++ name, name_len, value, size); ++ if (ret == -EOVERFLOW) { + /* +- * remove the attribute ++ * We have an existing item in a leaf, split_leaf couldn't ++ * expand it. That item might have or not a dir_item that ++ * matches our target xattr, so lets check. + */ +- if (!value) +- goto out; +- } else { +- di = btrfs_lookup_xattr(NULL, root, path, btrfs_ino(inode), +- name, name_len, 0); +- if (IS_ERR(di)) { +- ret = PTR_ERR(di); ++ ret = 0; ++ btrfs_assert_tree_locked(path->nodes[0]); ++ di = btrfs_match_dir_item_name(root, path, name, name_len); ++ if (!di && !(flags & XATTR_REPLACE)) { ++ ret = -ENOSPC; + goto out; + } +- if (!di && !value) +- goto out; +- btrfs_release_path(path); ++ } else if (ret == -EEXIST) { ++ ret = 0; ++ di = btrfs_match_dir_item_name(root, path, name, name_len); ++ if(!di) { /* logic error */ ++ pr_err("BTRFS: assertion failed: %s, file: %s, line: %d", ++ "di", __FILE__, __LINE__); ++ BUG(); ++ } ++ } else if (ret) { ++ goto out; + } + +-again: +- ret = btrfs_insert_xattr_item(trans, root, path, btrfs_ino(inode), +- name, name_len, value, size); +- /* +- * If we're setting an xattr to a new value but the new value is say +- * exactly BTRFS_MAX_XATTR_SIZE, we could end up with EOVERFLOW getting +- * back from split_leaf. This is because it thinks we'll be extending +- * the existing item size, but we're asking for enough space to add the +- * item itself. So if we get EOVERFLOW just set ret to EEXIST and let +- * the rest of the function figure it out. +- */ +- if (ret == -EOVERFLOW) ++ if (di && (flags & XATTR_CREATE)) { + ret = -EEXIST; ++ goto out; ++ } + +- if (ret == -EEXIST) { +- if (flags & XATTR_CREATE) +- goto out; ++ if (di) { + /* +- * We can't use the path we already have since we won't have the +- * proper locking for a delete, so release the path and +- * re-lookup to delete the thing. ++ * We're doing a replace, and it must be atomic, that is, at ++ * any point in time we have either the old or the new xattr ++ * value in the tree. We don't want readers (getxattr and ++ * listxattrs) to miss a value, this is specially important ++ * for ACLs. + */ +- btrfs_release_path(path); +- di = btrfs_lookup_xattr(trans, root, path, btrfs_ino(inode), +- name, name_len, -1); +- if (IS_ERR(di)) { +- ret = PTR_ERR(di); +- goto out; +- } else if (!di) { +- /* Shouldn't happen but just in case... */ +- btrfs_release_path(path); +- goto again; ++ const int slot = path->slots[0]; ++ struct extent_buffer *leaf = path->nodes[0]; ++ const u16 old_data_len = btrfs_dir_data_len(leaf, di); ++ const u32 item_size = btrfs_item_size_nr(leaf, slot); ++ const u32 data_size = sizeof(*di) + name_len + size; ++ struct btrfs_item *item; ++ unsigned long data_ptr; ++ char *ptr; ++ ++ if (size > old_data_len) { ++ if (btrfs_leaf_free_space(root, leaf) < ++ (size - old_data_len)) { ++ ret = -ENOSPC; ++ goto out; ++ } + } + +- ret = btrfs_delete_one_dir_name(trans, root, path, di); +- if (ret) +- goto out; ++ if (old_data_len + name_len + sizeof(*di) == item_size) { ++ /* No other xattrs packed in the same leaf item. */ ++ if (size > old_data_len) ++ btrfs_extend_item(root, path, ++ size - old_data_len); ++ else if (size < old_data_len) ++ btrfs_truncate_item(root, path, data_size, 1); ++ } else { ++ /* There are other xattrs packed in the same item. */ ++ ret = btrfs_delete_one_dir_name(trans, root, path, di); ++ if (ret) ++ goto out; ++ btrfs_extend_item(root, path, data_size); ++ } + ++ item = btrfs_item_nr(NULL, slot); ++ ptr = btrfs_item_ptr(leaf, slot, char); ++ ptr += btrfs_item_size(leaf, item) - data_size; ++ di = (struct btrfs_dir_item *)ptr; ++ btrfs_set_dir_data_len(leaf, di, size); ++ data_ptr = ((unsigned long)(di + 1)) + name_len; ++ write_extent_buffer(leaf, value, data_ptr, size); ++ btrfs_mark_buffer_dirty(leaf); ++ } else { + /* +- * We have a value to set, so go back and try to insert it now. ++ * Insert, and we had space for the xattr, so path->slots[0] is ++ * where our xattr dir_item is and btrfs_insert_xattr_item() ++ * filled it. + */ +- if (value) { +- btrfs_release_path(path); +- goto again; +- } + } + out: + btrfs_free_path(path); diff --git a/queue-3.10/fs-take-i_mutex-during-prepare_binprm-for-setid-executables.patch b/queue-3.10/fs-take-i_mutex-during-prepare_binprm-for-setid-executables.patch new file mode 100644 index 00000000000..ea3f6b05961 --- /dev/null +++ b/queue-3.10/fs-take-i_mutex-during-prepare_binprm-for-setid-executables.patch @@ -0,0 +1,123 @@ +From 5176b77f1aacdc560eaeac4685ade444bb814689 Mon Sep 17 00:00:00 2001 +From: Jann Horn +Date: Sun, 19 Apr 2015 02:48:39 +0200 +Subject: fs: take i_mutex during prepare_binprm for set[ug]id executables + +From: Jann Horn + +commit 8b01fc86b9f425899f8a3a8fc1c47d73c2c20543 upstream. + +This prevents a race between chown() and execve(), where chowning a +setuid-user binary to root would momentarily make the binary setuid +root. + +This patch was mostly written by Linus Torvalds. + +Signed-off-by: Jann Horn +Signed-off-by: Linus Torvalds +Signed-off-by: Charles Williams +Signed-off-by: Jiri Slaby +Signed-off-by: Sheng Yong +Signed-off-by: Greg Kroah-Hartman + +--- + fs/exec.c | 76 +++++++++++++++++++++++++++++++++++++++----------------------- + 1 file changed, 48 insertions(+), 28 deletions(-) + +--- a/fs/exec.c ++++ b/fs/exec.c +@@ -1265,6 +1265,53 @@ static int check_unsafe_exec(struct linu + return res; + } + ++static void bprm_fill_uid(struct linux_binprm *bprm) ++{ ++ struct inode *inode; ++ unsigned int mode; ++ kuid_t uid; ++ kgid_t gid; ++ ++ /* clear any previous set[ug]id data from a previous binary */ ++ bprm->cred->euid = current_euid(); ++ bprm->cred->egid = current_egid(); ++ ++ if (bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID) ++ return; ++ ++ if (current->no_new_privs) ++ return; ++ ++ inode = file_inode(bprm->file); ++ mode = ACCESS_ONCE(inode->i_mode); ++ if (!(mode & (S_ISUID|S_ISGID))) ++ return; ++ ++ /* Be careful if suid/sgid is set */ ++ mutex_lock(&inode->i_mutex); ++ ++ /* reload atomically mode/uid/gid now that lock held */ ++ mode = inode->i_mode; ++ uid = inode->i_uid; ++ gid = inode->i_gid; ++ mutex_unlock(&inode->i_mutex); ++ ++ /* We ignore suid/sgid if there are no mappings for them in the ns */ ++ if (!kuid_has_mapping(bprm->cred->user_ns, uid) || ++ !kgid_has_mapping(bprm->cred->user_ns, gid)) ++ return; ++ ++ if (mode & S_ISUID) { ++ bprm->per_clear |= PER_CLEAR_ON_SETID; ++ bprm->cred->euid = uid; ++ } ++ ++ if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) { ++ bprm->per_clear |= PER_CLEAR_ON_SETID; ++ bprm->cred->egid = gid; ++ } ++} ++ + /* + * Fill the binprm structure from the inode. + * Check permissions, then read the first 128 (BINPRM_BUF_SIZE) bytes +@@ -1273,39 +1320,12 @@ static int check_unsafe_exec(struct linu + */ + int prepare_binprm(struct linux_binprm *bprm) + { +- umode_t mode; +- struct inode * inode = file_inode(bprm->file); + int retval; + +- mode = inode->i_mode; + if (bprm->file->f_op == NULL) + return -EACCES; + +- /* clear any previous set[ug]id data from a previous binary */ +- bprm->cred->euid = current_euid(); +- bprm->cred->egid = current_egid(); +- +- if (!(bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID) && +- !current->no_new_privs && +- kuid_has_mapping(bprm->cred->user_ns, inode->i_uid) && +- kgid_has_mapping(bprm->cred->user_ns, inode->i_gid)) { +- /* Set-uid? */ +- if (mode & S_ISUID) { +- bprm->per_clear |= PER_CLEAR_ON_SETID; +- bprm->cred->euid = inode->i_uid; +- } +- +- /* Set-gid? */ +- /* +- * If setgid is set but no group execute bit then this +- * is a candidate for mandatory locking, not a setgid +- * executable. +- */ +- if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) { +- bprm->per_clear |= PER_CLEAR_ON_SETID; +- bprm->cred->egid = inode->i_gid; +- } +- } ++ bprm_fill_uid(bprm); + + /* fill in binprm security blob */ + retval = security_bprm_set_creds(bprm); diff --git a/queue-3.10/hpsa-add-missing-pci_set_master-in-kdump-path.patch b/queue-3.10/hpsa-add-missing-pci_set_master-in-kdump-path.patch new file mode 100644 index 00000000000..04977c0e603 --- /dev/null +++ b/queue-3.10/hpsa-add-missing-pci_set_master-in-kdump-path.patch @@ -0,0 +1,34 @@ +From 859c75aba20264d87dd026bab0d0ca3bff385955 Mon Sep 17 00:00:00 2001 +From: Tomas Henzl +Date: Fri, 12 Sep 2014 14:44:15 +0200 +Subject: hpsa: add missing pci_set_master in kdump path + +From: Tomas Henzl + +commit 859c75aba20264d87dd026bab0d0ca3bff385955 upstream. + +Add a call to pci_set_master(...) missing in the previous +patch "hpsa: refine the pci enable/disable handling". +Found thanks to Rob Elliot. + +Signed-off-by: Tomas Henzl +Reviewed-by: Robert Elliott +Tested-by: Robert Elliott +Signed-off-by: Christoph Hellwig +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/scsi/hpsa.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/scsi/hpsa.c ++++ b/drivers/scsi/hpsa.c +@@ -4455,7 +4455,7 @@ static int hpsa_init_reset_devices(struc + dev_warn(&pdev->dev, "failed to enable device.\n"); + return -ENODEV; + } +- ++ pci_set_master(pdev); + /* Reset the controller with a PCI power-cycle or via doorbell */ + rc = hpsa_kdump_hard_reset_controller(pdev); + diff --git a/queue-3.10/hpsa-refine-the-pci-enable-disable-handling.patch b/queue-3.10/hpsa-refine-the-pci-enable-disable-handling.patch new file mode 100644 index 00000000000..b11c53bfdc1 --- /dev/null +++ b/queue-3.10/hpsa-refine-the-pci-enable-disable-handling.patch @@ -0,0 +1,116 @@ +From 132aa220b45d60e9b20def1e9d8be9422eed9616 Mon Sep 17 00:00:00 2001 +From: Tomas Henzl +Date: Thu, 14 Aug 2014 16:12:39 +0200 +Subject: hpsa: refine the pci enable/disable handling + +From: Tomas Henzl + +commit 132aa220b45d60e9b20def1e9d8be9422eed9616 upstream. + +When a second(kdump) kernel starts and the hard reset method is used +the driver calls pci_disable_device without previously enabling it, +so the kernel shows a warning - +[ 16.876248] WARNING: at drivers/pci/pci.c:1431 pci_disable_device+0x84/0x90() +[ 16.882686] Device hpsa +disabling already-disabled device +... +This patch fixes it, in addition to this I tried to balance also some other pairs +of enable/disable device in the driver. +Unfortunately I wasn't able to verify the functionality for the case of a sw reset, +because of a lack of proper hw. + +Signed-off-by: Tomas Henzl +Reviewed-by: Stephen M. Cameron +Signed-off-by: Christoph Hellwig +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/scsi/hpsa.c | 42 ++++++++++++++++++++++++++++-------------- + 1 file changed, 28 insertions(+), 14 deletions(-) + +--- a/drivers/scsi/hpsa.c ++++ b/drivers/scsi/hpsa.c +@@ -3898,10 +3898,6 @@ static int hpsa_kdump_hard_reset_control + + /* Save the PCI command register */ + pci_read_config_word(pdev, 4, &command_register); +- /* Turn the board off. This is so that later pci_restore_state() +- * won't turn the board on before the rest of config space is ready. +- */ +- pci_disable_device(pdev); + pci_save_state(pdev); + + /* find the first memory BAR, so we can find the cfg table */ +@@ -3949,11 +3945,6 @@ static int hpsa_kdump_hard_reset_control + goto unmap_cfgtable; + + pci_restore_state(pdev); +- rc = pci_enable_device(pdev); +- if (rc) { +- dev_warn(&pdev->dev, "failed to enable device.\n"); +- goto unmap_cfgtable; +- } + pci_write_config_word(pdev, 4, command_register); + + /* Some devices (notably the HP Smart Array 5i Controller) +@@ -4448,6 +4439,23 @@ static int hpsa_init_reset_devices(struc + if (!reset_devices) + return 0; + ++ /* kdump kernel is loading, we don't know in which state is ++ * the pci interface. The dev->enable_cnt is equal zero ++ * so we call enable+disable, wait a while and switch it on. ++ */ ++ rc = pci_enable_device(pdev); ++ if (rc) { ++ dev_warn(&pdev->dev, "Failed to enable PCI device\n"); ++ return -ENODEV; ++ } ++ pci_disable_device(pdev); ++ msleep(260); /* a randomly chosen number */ ++ rc = pci_enable_device(pdev); ++ if (rc) { ++ dev_warn(&pdev->dev, "failed to enable device.\n"); ++ return -ENODEV; ++ } ++ + /* Reset the controller with a PCI power-cycle or via doorbell */ + rc = hpsa_kdump_hard_reset_controller(pdev); + +@@ -4456,10 +4464,11 @@ static int hpsa_init_reset_devices(struc + * "performant mode". Or, it might be 640x, which can't reset + * due to concerns about shared bbwc between 6402/6404 pair. + */ +- if (rc == -ENOTSUPP) +- return rc; /* just try to do the kdump anyhow. */ +- if (rc) +- return -ENODEV; ++ if (rc) { ++ if (rc != -ENOTSUPP) /* just try to do the kdump anyhow. */ ++ rc = -ENODEV; ++ goto out_disable; ++ } + + /* Now try to get the controller to respond to a no-op */ + dev_warn(&pdev->dev, "Waiting for controller to respond to no-op\n"); +@@ -4470,7 +4479,11 @@ static int hpsa_init_reset_devices(struc + dev_warn(&pdev->dev, "no-op failed%s\n", + (i < 11 ? "; re-trying" : "")); + } +- return 0; ++ ++out_disable: ++ ++ pci_disable_device(pdev); ++ return rc; + } + + static int hpsa_allocate_cmd_pool(struct ctlr_info *h) +@@ -4613,6 +4626,7 @@ static void hpsa_undo_allocations_after_ + iounmap(h->transtable); + if (h->cfgtable) + iounmap(h->cfgtable); ++ pci_disable_device(h->pdev); + pci_release_regions(h->pdev); + kfree(h); + } diff --git a/queue-3.10/ipv6-prevent-fib6_run_gc-contention.patch b/queue-3.10/ipv6-prevent-fib6_run_gc-contention.patch new file mode 100644 index 00000000000..c79b53a35aa --- /dev/null +++ b/queue-3.10/ipv6-prevent-fib6_run_gc-contention.patch @@ -0,0 +1,128 @@ +From 2ac3ac8f86f2fe065d746d9a9abaca867adec577 Mon Sep 17 00:00:00 2001 +From: Michal Kubeček +Date: Thu, 1 Aug 2013 10:04:14 +0200 +Subject: ipv6: prevent fib6_run_gc() contention + +From: Michal Kubeček + +commit 2ac3ac8f86f2fe065d746d9a9abaca867adec577 upstream. + +On a high-traffic router with many processors and many IPv6 dst +entries, soft lockup in fib6_run_gc() can occur when number of +entries reaches gc_thresh. + +This happens because fib6_run_gc() uses fib6_gc_lock to allow +only one thread to run the garbage collector but ip6_dst_gc() +doesn't update net->ipv6.ip6_rt_last_gc until fib6_run_gc() +returns. On a system with many entries, this can take some time +so that in the meantime, other threads pass the tests in +ip6_dst_gc() (ip6_rt_last_gc is still not updated) and wait for +the lock. They then have to run the garbage collector one after +another which blocks them for quite long. + +Resolve this by replacing special value ~0UL of expire parameter +to fib6_run_gc() by explicit "force" parameter to choose between +spin_lock_bh() and spin_trylock_bh() and call fib6_run_gc() with +force=false if gc_thresh is reached but not max_size. + +Signed-off-by: Michal Kubecek +Signed-off-by: David S. Miller +Cc: Konstantin Khlebnikov +Signed-off-by: Greg Kroah-Hartman + +--- + include/net/ip6_fib.h | 2 +- + net/ipv6/ip6_fib.c | 19 ++++++++----------- + net/ipv6/ndisc.c | 4 ++-- + net/ipv6/route.c | 4 ++-- + 4 files changed, 13 insertions(+), 16 deletions(-) + +--- a/include/net/ip6_fib.h ++++ b/include/net/ip6_fib.h +@@ -301,7 +301,7 @@ extern void inet6_rt_notify(int event, + struct nl_info *info); + + extern void fib6_run_gc(unsigned long expires, +- struct net *net); ++ struct net *net, bool force); + + extern void fib6_gc_cleanup(void); + +--- a/net/ipv6/ip6_fib.c ++++ b/net/ipv6/ip6_fib.c +@@ -1648,19 +1648,16 @@ static int fib6_age(struct rt6_info *rt, + + static DEFINE_SPINLOCK(fib6_gc_lock); + +-void fib6_run_gc(unsigned long expires, struct net *net) ++void fib6_run_gc(unsigned long expires, struct net *net, bool force) + { +- if (expires != ~0UL) { ++ if (force) { + spin_lock_bh(&fib6_gc_lock); +- gc_args.timeout = expires ? (int)expires : +- net->ipv6.sysctl.ip6_rt_gc_interval; +- } else { +- if (!spin_trylock_bh(&fib6_gc_lock)) { +- mod_timer(&net->ipv6.ip6_fib_timer, jiffies + HZ); +- return; +- } +- gc_args.timeout = net->ipv6.sysctl.ip6_rt_gc_interval; ++ } else if (!spin_trylock_bh(&fib6_gc_lock)) { ++ mod_timer(&net->ipv6.ip6_fib_timer, jiffies + HZ); ++ return; + } ++ gc_args.timeout = expires ? (int)expires : ++ net->ipv6.sysctl.ip6_rt_gc_interval; + + gc_args.more = icmp6_dst_gc(); + +@@ -1677,7 +1674,7 @@ void fib6_run_gc(unsigned long expires, + + static void fib6_gc_timer_cb(unsigned long arg) + { +- fib6_run_gc(0, (struct net *)arg); ++ fib6_run_gc(0, (struct net *)arg, true); + } + + static int __net_init fib6_net_init(struct net *net) +--- a/net/ipv6/ndisc.c ++++ b/net/ipv6/ndisc.c +@@ -1584,7 +1584,7 @@ static int ndisc_netdev_event(struct not + switch (event) { + case NETDEV_CHANGEADDR: + neigh_changeaddr(&nd_tbl, dev); +- fib6_run_gc(~0UL, net); ++ fib6_run_gc(0, net, false); + idev = in6_dev_get(dev); + if (!idev) + break; +@@ -1594,7 +1594,7 @@ static int ndisc_netdev_event(struct not + break; + case NETDEV_DOWN: + neigh_ifdown(&nd_tbl, dev); +- fib6_run_gc(~0UL, net); ++ fib6_run_gc(0, net, false); + break; + case NETDEV_NOTIFY_PEERS: + ndisc_send_unsol_na(dev); +--- a/net/ipv6/route.c ++++ b/net/ipv6/route.c +@@ -1349,7 +1349,7 @@ static int ip6_dst_gc(struct dst_ops *op + goto out; + + net->ipv6.ip6_rt_gc_expire++; +- fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net); ++ fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, entries > rt_max_size); + net->ipv6.ip6_rt_last_gc = now; + entries = dst_entries_get_slow(ops); + if (entries < ops->gc_thresh) +@@ -2849,7 +2849,7 @@ int ipv6_sysctl_rtcache_flush(ctl_table + net = (struct net *)ctl->extra1; + delay = net->ipv6.sysctl.flush_delay; + proc_dointvec(ctl, write, buffer, lenp, ppos); +- fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net); ++ fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0); + return 0; + } + diff --git a/queue-3.10/ipv6-update-ip6_rt_last_gc-every-time-gc-is-run.patch b/queue-3.10/ipv6-update-ip6_rt_last_gc-every-time-gc-is-run.patch new file mode 100644 index 00000000000..eacafb11e71 --- /dev/null +++ b/queue-3.10/ipv6-update-ip6_rt_last_gc-every-time-gc-is-run.patch @@ -0,0 +1,74 @@ +From 49a18d86f66d33a20144ecb5a34bba0d1856b260 Mon Sep 17 00:00:00 2001 +From: Michal Kubeček +Date: Thu, 1 Aug 2013 10:04:24 +0200 +Subject: ipv6: update ip6_rt_last_gc every time GC is run + +From: Michal Kubeček + +commit 49a18d86f66d33a20144ecb5a34bba0d1856b260 upstream. + +As pointed out by Eric Dumazet, net->ipv6.ip6_rt_last_gc should +hold the last time garbage collector was run so that we should +update it whenever fib6_run_gc() calls fib6_clean_all(), not only +if we got there from ip6_dst_gc(). + +Signed-off-by: Michal Kubecek +Signed-off-by: David S. Miller +Cc: Konstantin Khlebnikov +Signed-off-by: Greg Kroah-Hartman + +--- + net/ipv6/ip6_fib.c | 6 +++++- + net/ipv6/route.c | 4 +--- + 2 files changed, 6 insertions(+), 4 deletions(-) + +--- a/net/ipv6/ip6_fib.c ++++ b/net/ipv6/ip6_fib.c +@@ -1650,6 +1650,8 @@ static DEFINE_SPINLOCK(fib6_gc_lock); + + void fib6_run_gc(unsigned long expires, struct net *net, bool force) + { ++ unsigned long now; ++ + if (force) { + spin_lock_bh(&fib6_gc_lock); + } else if (!spin_trylock_bh(&fib6_gc_lock)) { +@@ -1662,10 +1664,12 @@ void fib6_run_gc(unsigned long expires, + gc_args.more = icmp6_dst_gc(); + + fib6_clean_all(net, fib6_age, 0, NULL); ++ now = jiffies; ++ net->ipv6.ip6_rt_last_gc = now; + + if (gc_args.more) + mod_timer(&net->ipv6.ip6_fib_timer, +- round_jiffies(jiffies ++ round_jiffies(now + + net->ipv6.sysctl.ip6_rt_gc_interval)); + else + del_timer(&net->ipv6.ip6_fib_timer); +--- a/net/ipv6/route.c ++++ b/net/ipv6/route.c +@@ -1334,7 +1334,6 @@ static void icmp6_clean_all(int (*func)( + + static int ip6_dst_gc(struct dst_ops *ops) + { +- unsigned long now = jiffies; + struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops); + int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval; + int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size; +@@ -1344,13 +1343,12 @@ static int ip6_dst_gc(struct dst_ops *op + int entries; + + entries = dst_entries_get_fast(ops); +- if (time_after(rt_last_gc + rt_min_interval, now) && ++ if (time_after(rt_last_gc + rt_min_interval, jiffies) && + entries <= rt_max_size) + goto out; + + net->ipv6.ip6_rt_gc_expire++; + fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, entries > rt_max_size); +- net->ipv6.ip6_rt_last_gc = now; + entries = dst_entries_get_slow(ops); + if (entries < ops->gc_thresh) + net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1; diff --git a/queue-3.10/sb_edac-fix-erroneous-bytes-gigabytes-conversion.patch b/queue-3.10/sb_edac-fix-erroneous-bytes-gigabytes-conversion.patch new file mode 100644 index 00000000000..b0454621d3b --- /dev/null +++ b/queue-3.10/sb_edac-fix-erroneous-bytes-gigabytes-conversion.patch @@ -0,0 +1,142 @@ +From 0fd2dc596b4cbfe1cafc157c2b4ea30dbb95bece Mon Sep 17 00:00:00 2001 +From: Jim Snow +Date: Tue, 18 Nov 2014 14:51:09 +0100 +Subject: sb_edac: Fix erroneous bytes->gigabytes conversion + +From: Jim Snow + +commit 8c009100295597f23978c224aec5751a365bc965 upstream. + +Signed-off-by: Jim Snow +Signed-off-by: Lukasz Anaczkowski +Signed-off-by: Mauro Carvalho Chehab +Signed-off-by: Jiri Slaby +Cc: Vinson Lee +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/edac/sb_edac.c | 38 ++++++++++++++++++++------------------ + 1 file changed, 20 insertions(+), 18 deletions(-) + +--- a/drivers/edac/sb_edac.c ++++ b/drivers/edac/sb_edac.c +@@ -623,7 +623,7 @@ static void get_memory_layout(const stru + u32 reg; + u64 limit, prv = 0; + u64 tmp_mb; +- u32 mb, kb; ++ u32 gb, mb; + u32 rir_way; + + /* +@@ -636,8 +636,9 @@ static void get_memory_layout(const stru + pvt->tolm = GET_TOLM(reg); + tmp_mb = (1 + pvt->tolm) >> 20; + +- mb = div_u64_rem(tmp_mb, 1000, &kb); +- edac_dbg(0, "TOLM: %u.%03u GB (0x%016Lx)\n", mb, kb, (u64)pvt->tolm); ++ gb = div_u64_rem(tmp_mb, 1024, &mb); ++ edac_dbg(0, "TOLM: %u.%03u GB (0x%016Lx)\n", ++ gb, (mb*1000)/1024, (u64)pvt->tolm); + + /* Address range is already 45:25 */ + pci_read_config_dword(pvt->pci_sad1, TOHM, +@@ -645,8 +646,9 @@ static void get_memory_layout(const stru + pvt->tohm = GET_TOHM(reg); + tmp_mb = (1 + pvt->tohm) >> 20; + +- mb = div_u64_rem(tmp_mb, 1000, &kb); +- edac_dbg(0, "TOHM: %u.%03u GB (0x%016Lx)\n", mb, kb, (u64)pvt->tohm); ++ gb = div_u64_rem(tmp_mb, 1024, &mb); ++ edac_dbg(0, "TOHM: %u.%03u GB (0x%016Lx)\n", ++ gb, (mb*1000)/1024, (u64)pvt->tohm); + + /* + * Step 2) Get SAD range and SAD Interleave list +@@ -668,11 +670,11 @@ static void get_memory_layout(const stru + break; + + tmp_mb = (limit + 1) >> 20; +- mb = div_u64_rem(tmp_mb, 1000, &kb); ++ gb = div_u64_rem(tmp_mb, 1024, &mb); + edac_dbg(0, "SAD#%d %s up to %u.%03u GB (0x%016Lx) Interleave: %s reg=0x%08x\n", + n_sads, + get_dram_attr(reg), +- mb, kb, ++ gb, (mb*1000)/1024, + ((u64)tmp_mb) << 20L, + INTERLEAVE_MODE(reg) ? "8:6" : "[8:6]XOR[18:16]", + reg); +@@ -702,9 +704,9 @@ static void get_memory_layout(const stru + break; + tmp_mb = (limit + 1) >> 20; + +- mb = div_u64_rem(tmp_mb, 1000, &kb); ++ gb = div_u64_rem(tmp_mb, 1024, &mb); + edac_dbg(0, "TAD#%d: up to %u.%03u GB (0x%016Lx), socket interleave %d, memory interleave %d, TGT: %d, %d, %d, %d, reg=0x%08x\n", +- n_tads, mb, kb, ++ n_tads, gb, (mb*1000)/1024, + ((u64)tmp_mb) << 20L, + (u32)TAD_SOCK(reg), + (u32)TAD_CH(reg), +@@ -727,10 +729,10 @@ static void get_memory_layout(const stru + tad_ch_nilv_offset[j], + ®); + tmp_mb = TAD_OFFSET(reg) >> 20; +- mb = div_u64_rem(tmp_mb, 1000, &kb); ++ gb = div_u64_rem(tmp_mb, 1024, &mb); + edac_dbg(0, "TAD CH#%d, offset #%d: %u.%03u GB (0x%016Lx), reg=0x%08x\n", + i, j, +- mb, kb, ++ gb, (mb*1000)/1024, + ((u64)tmp_mb) << 20L, + reg); + } +@@ -752,10 +754,10 @@ static void get_memory_layout(const stru + + tmp_mb = RIR_LIMIT(reg) >> 20; + rir_way = 1 << RIR_WAY(reg); +- mb = div_u64_rem(tmp_mb, 1000, &kb); ++ gb = div_u64_rem(tmp_mb, 1024, &mb); + edac_dbg(0, "CH#%d RIR#%d, limit: %u.%03u GB (0x%016Lx), way: %d, reg=0x%08x\n", + i, j, +- mb, kb, ++ gb, (mb*1000)/1024, + ((u64)tmp_mb) << 20L, + rir_way, + reg); +@@ -766,10 +768,10 @@ static void get_memory_layout(const stru + ®); + tmp_mb = RIR_OFFSET(reg) << 6; + +- mb = div_u64_rem(tmp_mb, 1000, &kb); ++ gb = div_u64_rem(tmp_mb, 1024, &mb); + edac_dbg(0, "CH#%d RIR#%d INTL#%d, offset %u.%03u GB (0x%016Lx), tgt: %d, reg=0x%08x\n", + i, j, k, +- mb, kb, ++ gb, (mb*1000)/1024, + ((u64)tmp_mb) << 20L, + (u32)RIR_RNK_TGT(reg), + reg); +@@ -806,7 +808,7 @@ static int get_memory_error_data(struct + u8 ch_way,sck_way; + u32 tad_offset; + u32 rir_way; +- u32 mb, kb; ++ u32 mb, gb; + u64 ch_addr, offset, limit, prv = 0; + + +@@ -1022,10 +1024,10 @@ static int get_memory_error_data(struct + continue; + + limit = RIR_LIMIT(reg); +- mb = div_u64_rem(limit >> 20, 1000, &kb); ++ gb = div_u64_rem(limit >> 20, 1024, &mb); + edac_dbg(0, "RIR#%d, limit: %u.%03u GB (0x%016Lx), way: %d\n", + n_rir, +- mb, kb, ++ gb, (mb*1000)/1024, + limit, + 1 << RIR_WAY(reg)); + if (ch_addr <= limit) diff --git a/queue-3.10/series b/queue-3.10/series index f1d4f1c0ed3..f34f2ec2229 100644 --- a/queue-3.10/series +++ b/queue-3.10/series @@ -7,3 +7,12 @@ include-linux-sched.h-don-t-use-task-pid-tgid-in.patch __ptrace_may_access-should-not-deny-sub-threads.patch acpica-utilities-cleanup-to-convert-physical-address-printing-formats.patch acpica-utilities-cleanup-to-remove-useless-acpi_printf-format_xxx-helpers.patch +sb_edac-fix-erroneous-bytes-gigabytes-conversion.patch +hpsa-refine-the-pci-enable-disable-handling.patch +hpsa-add-missing-pci_set_master-in-kdump-path.patch +fs-take-i_mutex-during-prepare_binprm-for-setid-executables.patch +x86-microcode-intel-guard-against-stack-overflow-in-the-loader.patch +btrfs-make-xattr-replace-operations-atomic.patch +xfrm-increase-the-garbage-collector-threshold.patch +ipv6-prevent-fib6_run_gc-contention.patch +ipv6-update-ip6_rt_last_gc-every-time-gc-is-run.patch diff --git a/queue-3.10/x86-microcode-intel-guard-against-stack-overflow-in-the-loader.patch b/queue-3.10/x86-microcode-intel-guard-against-stack-overflow-in-the-loader.patch new file mode 100644 index 00000000000..c70c779c35c --- /dev/null +++ b/queue-3.10/x86-microcode-intel-guard-against-stack-overflow-in-the-loader.patch @@ -0,0 +1,38 @@ +From c5988181af3b41381c4d20e08ca6852f99f95417 Mon Sep 17 00:00:00 2001 +From: Quentin Casasnovas +Date: Tue, 3 Feb 2015 13:00:22 +0100 +Subject: x86/microcode/intel: Guard against stack overflow in the loader + +From: Quentin Casasnovas + +commit f84598bd7c851f8b0bf8cd0d7c3be0d73c432ff4 upstream. + +mc_saved_tmp is a static array allocated on the stack, we need to make +sure mc_saved_count stays within its bounds, otherwise we're overflowing +the stack in _save_mc(). A specially crafted microcode header could lead +to a kernel crash or potentially kernel execution. + +Signed-off-by: Quentin Casasnovas +Cc: "H. Peter Anvin" +Cc: Fenghua Yu +Link: http://lkml.kernel.org/r/1422964824-22056-1-git-send-email-quentin.casasnovas@oracle.com +Signed-off-by: Borislav Petkov +Signed-off-by: Jiri Slaby +Signed-off-by: Sheng Yong +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/kernel/microcode_intel_early.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/arch/x86/kernel/microcode_intel_early.c ++++ b/arch/x86/kernel/microcode_intel_early.c +@@ -321,7 +321,7 @@ get_matching_model_microcode(int cpu, un + unsigned int mc_saved_count = mc_saved_data->mc_saved_count; + int i; + +- while (leftover) { ++ while (leftover && mc_saved_count < ARRAY_SIZE(mc_saved_tmp)) { + mc_header = (struct microcode_header_intel *)ucode_ptr; + + mc_size = get_totalsize(mc_header); diff --git a/queue-3.10/xfrm-increase-the-garbage-collector-threshold.patch b/queue-3.10/xfrm-increase-the-garbage-collector-threshold.patch new file mode 100644 index 00000000000..fbb067e5098 --- /dev/null +++ b/queue-3.10/xfrm-increase-the-garbage-collector-threshold.patch @@ -0,0 +1,54 @@ +From eeb1b73378b560e00ff1da2ef09fed9254f4e128 Mon Sep 17 00:00:00 2001 +From: Steffen Klassert +Date: Fri, 25 Oct 2013 10:21:32 +0200 +Subject: xfrm: Increase the garbage collector threshold + +From: Steffen Klassert + +commit eeb1b73378b560e00ff1da2ef09fed9254f4e128 upstream. + +With the removal of the routing cache, we lost the +option to tweak the garbage collector threshold +along with the maximum routing cache size. So git +commit 703fb94ec ("xfrm: Fix the gc threshold value +for ipv4") moved back to a static threshold. + +It turned out that the current threshold before we +start garbage collecting is much to small for some +workloads, so increase it from 1024 to 32768. This +means that we start the garbage collector if we have +more than 32768 dst entries in the system and refuse +new allocations if we are above 65536. + +Reported-by: Wolfgang Walter +Signed-off-by: Steffen Klassert +Cc: Stephen Hemminger +Signed-off-by: Greg Kroah-Hartman + +--- + net/ipv4/xfrm4_policy.c | 2 +- + net/ipv6/xfrm6_policy.c | 2 +- + 2 files changed, 2 insertions(+), 2 deletions(-) + +--- a/net/ipv4/xfrm4_policy.c ++++ b/net/ipv4/xfrm4_policy.c +@@ -235,7 +235,7 @@ static struct dst_ops xfrm4_dst_ops = { + .destroy = xfrm4_dst_destroy, + .ifdown = xfrm4_dst_ifdown, + .local_out = __ip_local_out, +- .gc_thresh = 1024, ++ .gc_thresh = 32768, + }; + + static struct xfrm_policy_afinfo xfrm4_policy_afinfo = { +--- a/net/ipv6/xfrm6_policy.c ++++ b/net/ipv6/xfrm6_policy.c +@@ -284,7 +284,7 @@ static struct dst_ops xfrm6_dst_ops = { + .destroy = xfrm6_dst_destroy, + .ifdown = xfrm6_dst_ifdown, + .local_out = __ip6_local_out, +- .gc_thresh = 1024, ++ .gc_thresh = 32768, + }; + + static struct xfrm_policy_afinfo xfrm6_policy_afinfo = {