]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
3.12-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 22 Jan 2014 20:26:10 +0000 (12:26 -0800)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 22 Jan 2014 20:26:10 +0000 (12:26 -0800)
added patches:
fork-allow-clone_parent-after-setns-clone_newpid.patch
ftrace-x86-load-ftrace_ops-in-parameter-not-the-variable-holding-it.patch
hwmon-coretemp-fix-truncated-name-of-alarm-attributes.patch
i2c-re-instate-body-of-i2c_parent_is_i2c_adapter.patch
selinux-fix-possible-null-pointer-dereference-in-selinux_inode_permission.patch
thp-fix-copy_page_rep-gpf-by-testing-is_huge_zero_pmd-once-only.patch
vfs-fix-a-regression-in-mounting-proc.patch
vfs-in-d_path-don-t-call-d_dname-on-a-mount-point.patch
writeback-fix-data-corruption-on-nfs.patch

queue-3.12/fork-allow-clone_parent-after-setns-clone_newpid.patch [new file with mode: 0644]
queue-3.12/ftrace-x86-load-ftrace_ops-in-parameter-not-the-variable-holding-it.patch [new file with mode: 0644]
queue-3.12/hwmon-coretemp-fix-truncated-name-of-alarm-attributes.patch [new file with mode: 0644]
queue-3.12/i2c-re-instate-body-of-i2c_parent_is_i2c_adapter.patch [new file with mode: 0644]
queue-3.12/selinux-fix-possible-null-pointer-dereference-in-selinux_inode_permission.patch [new file with mode: 0644]
queue-3.12/series
queue-3.12/thp-fix-copy_page_rep-gpf-by-testing-is_huge_zero_pmd-once-only.patch [new file with mode: 0644]
queue-3.12/vfs-fix-a-regression-in-mounting-proc.patch [new file with mode: 0644]
queue-3.12/vfs-in-d_path-don-t-call-d_dname-on-a-mount-point.patch [new file with mode: 0644]
queue-3.12/writeback-fix-data-corruption-on-nfs.patch [new file with mode: 0644]

diff --git a/queue-3.12/fork-allow-clone_parent-after-setns-clone_newpid.patch b/queue-3.12/fork-allow-clone_parent-after-setns-clone_newpid.patch
new file mode 100644 (file)
index 0000000..0fe3be0
--- /dev/null
@@ -0,0 +1,73 @@
+From 1f7f4dde5c945f41a7abc2285be43d918029ecc5 Mon Sep 17 00:00:00 2001
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+Date: Thu, 14 Nov 2013 21:10:16 -0800
+Subject: fork:  Allow CLONE_PARENT after setns(CLONE_NEWPID)
+
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+
+commit 1f7f4dde5c945f41a7abc2285be43d918029ecc5 upstream.
+
+Serge Hallyn <serge.hallyn@ubuntu.com> writes:
+> Hi Oleg,
+>
+> commit 40a0d32d1eaffe6aac7324ca92604b6b3977eb0e :
+> "fork: unify and tighten up CLONE_NEWUSER/CLONE_NEWPID checks"
+> breaks lxc-attach in 3.12.  That code forks a child which does
+> setns() and then does a clone(CLONE_PARENT).  That way the
+> grandchild can be in the right namespaces (which the child was
+> not) and be a child of the original task, which is the monitor.
+>
+> lxc-attach in 3.11 was working fine with no side effects that I
+> could see.  Is there a real danger in allowing CLONE_PARENT
+> when current->nsproxy->pidns_for_children is not our pidns,
+> or was this done out of an "over-abundance of caution"?  Can we
+> safely revert that new extra check?
+
+The two fundamental things I know we can not allow are:
+- A shared signal queue aka CLONE_THREAD.  Because we compute the pid
+  and uid of the signal when we place it in the queue.
+
+- Changing the pid and by extention pid_namespace of an existing
+  process.
+
+From a parents perspective there is nothing special about the pid
+namespace, to deny CLONE_PARENT, because the parent simply won't know or
+care.
+
+From the childs perspective all that is special really are shared signal
+queues.
+
+User mode threading with CLONE_PARENT|CLONE_VM|CLONE_SIGHAND and tasks
+in different pid namespaces is almost certainly going to break because
+it is complicated.  But shared signal handlers can look at per thread
+information to know which pid namespace a process is in, so I don't know
+of any reason not to support CLONE_PARENT|CLONE_VM|CLONE_SIGHAND threads
+at the kernel level.  It would be absolutely stupid to implement but
+that is a different thing.
+
+So hmm.
+
+Because it can do no harm, and because it is a regression let's remove
+the CLONE_PARENT check and send it stable.
+
+Acked-by: Oleg Nesterov <oleg@redhat.com>
+Acked-by: Andy Lutomirski <luto@amacapital.net>
+Acked-by: Serge E. Hallyn <serge.hallyn@ubuntu.com>
+Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/fork.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/kernel/fork.c
++++ b/kernel/fork.c
+@@ -1175,7 +1175,7 @@ static struct task_struct *copy_process(
+        * do not allow it to share a thread group or signal handlers or
+        * parent with the forking task.
+        */
+-      if (clone_flags & (CLONE_SIGHAND | CLONE_PARENT)) {
++      if (clone_flags & CLONE_SIGHAND) {
+               if ((clone_flags & (CLONE_NEWUSER | CLONE_NEWPID)) ||
+                   (task_active_pid_ns(current) !=
+                               current->nsproxy->pid_ns_for_children))
diff --git a/queue-3.12/ftrace-x86-load-ftrace_ops-in-parameter-not-the-variable-holding-it.patch b/queue-3.12/ftrace-x86-load-ftrace_ops-in-parameter-not-the-variable-holding-it.patch
new file mode 100644 (file)
index 0000000..719e71d
--- /dev/null
@@ -0,0 +1,58 @@
+From 1739f09e33d8f66bf48ddbc3eca615574da6c4f6 Mon Sep 17 00:00:00 2001
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Wed, 13 Nov 2013 15:20:04 -0500
+Subject: ftrace/x86: Load ftrace_ops in parameter not the variable holding it
+
+From: Steven Rostedt <rostedt@goodmis.org>
+
+commit 1739f09e33d8f66bf48ddbc3eca615574da6c4f6 upstream.
+
+Function tracing callbacks expect to have the ftrace_ops that registered it
+passed to them, not the address of the variable that holds the ftrace_ops
+that registered it.
+
+Use a mov instead of a lea to store the ftrace_ops into the parameter
+of the function tracing callback.
+
+Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
+Reviewed-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
+Link: http://lkml.kernel.org/r/20131113152004.459787f9@gandalf.local.home
+Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/kernel/entry_32.S |    4 ++--
+ arch/x86/kernel/entry_64.S |    2 +-
+ 2 files changed, 3 insertions(+), 3 deletions(-)
+
+--- a/arch/x86/kernel/entry_32.S
++++ b/arch/x86/kernel/entry_32.S
+@@ -1085,7 +1085,7 @@ ENTRY(ftrace_caller)
+       pushl $0        /* Pass NULL as regs pointer */
+       movl 4*4(%esp), %eax
+       movl 0x4(%ebp), %edx
+-      leal function_trace_op, %ecx
++      movl function_trace_op, %ecx
+       subl $MCOUNT_INSN_SIZE, %eax
+ .globl ftrace_call
+@@ -1143,7 +1143,7 @@ ENTRY(ftrace_regs_caller)
+       movl 12*4(%esp), %eax   /* Load ip (1st parameter) */
+       subl $MCOUNT_INSN_SIZE, %eax    /* Adjust ip */
+       movl 0x4(%ebp), %edx    /* Load parent ip (2nd parameter) */
+-      leal function_trace_op, %ecx /* Save ftrace_pos in 3rd parameter */
++      movl function_trace_op, %ecx /* Save ftrace_pos in 3rd parameter */
+       pushl %esp              /* Save pt_regs as 4th parameter */
+ GLOBAL(ftrace_regs_call)
+--- a/arch/x86/kernel/entry_64.S
++++ b/arch/x86/kernel/entry_64.S
+@@ -88,7 +88,7 @@ END(function_hook)
+       MCOUNT_SAVE_FRAME \skip
+       /* Load the ftrace_ops into the 3rd parameter */
+-      leaq function_trace_op, %rdx
++      movq function_trace_op(%rip), %rdx
+       /* Load ip into the first parameter */
+       movq RIP(%rsp), %rdi
diff --git a/queue-3.12/hwmon-coretemp-fix-truncated-name-of-alarm-attributes.patch b/queue-3.12/hwmon-coretemp-fix-truncated-name-of-alarm-attributes.patch
new file mode 100644 (file)
index 0000000..1191be7
--- /dev/null
@@ -0,0 +1,34 @@
+From 3f9aec7610b39521c7c69d754de7265f6994c194 Mon Sep 17 00:00:00 2001
+From: Jean Delvare <khali@linux-fr.org>
+Date: Tue, 14 Jan 2014 15:59:55 +0100
+Subject: hwmon: (coretemp) Fix truncated name of alarm attributes
+
+From: Jean Delvare <khali@linux-fr.org>
+
+commit 3f9aec7610b39521c7c69d754de7265f6994c194 upstream.
+
+When the core number exceeds 9, the size of the buffer storing the
+alarm attribute name is insufficient and the attribute name is
+truncated. This causes libsensors to skip these attributes as the
+truncated name is not recognized.
+
+Reported-by: Andreas Hollmann <hollmann@in.tum.de>
+Signed-off-by: Jean Delvare <khali@linux-fr.org>
+Signed-off-by: Guenter Roeck <linux@roeck-us.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/hwmon/coretemp.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/hwmon/coretemp.c
++++ b/drivers/hwmon/coretemp.c
+@@ -52,7 +52,7 @@ MODULE_PARM_DESC(tjmax, "TjMax value in
+ #define BASE_SYSFS_ATTR_NO    2       /* Sysfs Base attr no for coretemp */
+ #define NUM_REAL_CORES                32      /* Number of Real cores per cpu */
+-#define CORETEMP_NAME_LENGTH  17      /* String Length of attrs */
++#define CORETEMP_NAME_LENGTH  19      /* String Length of attrs */
+ #define MAX_CORE_ATTRS                4       /* Maximum no of basic attrs */
+ #define TOTAL_ATTRS           (MAX_CORE_ATTRS + 1)
+ #define MAX_CORE_DATA         (NUM_REAL_CORES + BASE_SYSFS_ATTR_NO)
diff --git a/queue-3.12/i2c-re-instate-body-of-i2c_parent_is_i2c_adapter.patch b/queue-3.12/i2c-re-instate-body-of-i2c_parent_is_i2c_adapter.patch
new file mode 100644 (file)
index 0000000..f955772
--- /dev/null
@@ -0,0 +1,45 @@
+From 2fac2b891f287691c27ee8d2eeecf39571b27fea Mon Sep 17 00:00:00 2001
+From: Stephen Warren <swarren@nvidia.com>
+Date: Mon, 13 Jan 2014 14:29:04 -0700
+Subject: i2c: Re-instate body of i2c_parent_is_i2c_adapter()
+
+From: Stephen Warren <swarren@nvidia.com>
+
+commit 2fac2b891f287691c27ee8d2eeecf39571b27fea upstream.
+
+The body of i2c_parent_is_i2c_adapter() is currently guarded by
+I2C_MUX. It should be CONFIG_I2C_MUX instead.
+
+Among potentially other problems, this resulted in i2c_lock_adapter()
+only locking I2C mux child adapters, and not the parent adapter. In
+turn, this could allow inter-mingling of mux child selection and I2C
+transactions, which could result in I2C transactions being directed to
+the wrong I2C bus, and possibly even switching between busses in the
+middle of a transaction.
+
+One concrete issue caused by this bug was corrupted HDMI EDID reads
+during boot on the NVIDIA Tegra Seaboard system, although this only
+became apparent in recent linux-next, when the boot timing was changed
+just enough to trigger the race condition.
+
+Fixes: 3923172b3d70 ("i2c: reduce parent checking to a NOOP in non-I2C_MUX case")
+Cc: Phil Carmody <phil.carmody@partner.samsung.com>
+Signed-off-by: Stephen Warren <swarren@nvidia.com>
+Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/linux/i2c.h |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/include/linux/i2c.h
++++ b/include/linux/i2c.h
+@@ -447,7 +447,7 @@ static inline void i2c_set_adapdata(stru
+ static inline struct i2c_adapter *
+ i2c_parent_is_i2c_adapter(const struct i2c_adapter *adapter)
+ {
+-#if IS_ENABLED(I2C_MUX)
++#if IS_ENABLED(CONFIG_I2C_MUX)
+       struct device *parent = adapter->dev.parent;
+       if (parent != NULL && parent->type == &i2c_adapter_type)
diff --git a/queue-3.12/selinux-fix-possible-null-pointer-dereference-in-selinux_inode_permission.patch b/queue-3.12/selinux-fix-possible-null-pointer-dereference-in-selinux_inode_permission.patch
new file mode 100644 (file)
index 0000000..9417100
--- /dev/null
@@ -0,0 +1,139 @@
+From 3dc91d4338d698ce77832985f9cb183d8eeaf6be Mon Sep 17 00:00:00 2001
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Thu, 9 Jan 2014 21:46:34 -0500
+Subject: SELinux: Fix possible NULL pointer dereference in selinux_inode_permission()
+
+From: Steven Rostedt <rostedt@goodmis.org>
+
+commit 3dc91d4338d698ce77832985f9cb183d8eeaf6be upstream.
+
+While running stress tests on adding and deleting ftrace instances I hit
+this bug:
+
+  BUG: unable to handle kernel NULL pointer dereference at 0000000000000020
+  IP: selinux_inode_permission+0x85/0x160
+  PGD 63681067 PUD 7ddbe067 PMD 0
+  Oops: 0000 [#1] PREEMPT
+  CPU: 0 PID: 5634 Comm: ftrace-test-mki Not tainted 3.13.0-rc4-test-00033-gd2a6dde-dirty #20
+  Hardware name:                  /DG965MQ, BIOS MQ96510J.86A.0372.2006.0605.1717 06/05/2006
+  task: ffff880078375800 ti: ffff88007ddb0000 task.ti: ffff88007ddb0000
+  RIP: 0010:[<ffffffff812d8bc5>]  [<ffffffff812d8bc5>] selinux_inode_permission+0x85/0x160
+  RSP: 0018:ffff88007ddb1c48  EFLAGS: 00010246
+  RAX: 0000000000000000 RBX: 0000000000800000 RCX: ffff88006dd43840
+  RDX: 0000000000000001 RSI: 0000000000000081 RDI: ffff88006ee46000
+  RBP: ffff88007ddb1c88 R08: 0000000000000000 R09: ffff88007ddb1c54
+  R10: 6e6576652f6f6f66 R11: 0000000000000003 R12: 0000000000000000
+  R13: 0000000000000081 R14: ffff88006ee46000 R15: 0000000000000000
+  FS:  00007f217b5b6700(0000) GS:ffffffff81e21000(0000) knlGS:0000000000000000
+  CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033^M
+  CR2: 0000000000000020 CR3: 000000006a0fe000 CR4: 00000000000007f0
+  Call Trace:
+    security_inode_permission+0x1c/0x30
+    __inode_permission+0x41/0xa0
+    inode_permission+0x18/0x50
+    link_path_walk+0x66/0x920
+    path_openat+0xa6/0x6c0
+    do_filp_open+0x43/0xa0
+    do_sys_open+0x146/0x240
+    SyS_open+0x1e/0x20
+    system_call_fastpath+0x16/0x1b
+  Code: 84 a1 00 00 00 81 e3 00 20 00 00 89 d8 83 c8 02 40 f6 c6 04 0f 45 d8 40 f6 c6 08 74 71 80 cf 02 49 8b 46 38 4c 8d 4d cc 45 31 c0 <0f> b7 50 20 8b 70 1c 48 8b 41 70 89 d9 8b 78 04 e8 36 cf ff ff
+  RIP  selinux_inode_permission+0x85/0x160
+  CR2: 0000000000000020
+
+Investigating, I found that the inode->i_security was NULL, and the
+dereference of it caused the oops.
+
+in selinux_inode_permission():
+
+       isec = inode->i_security;
+
+       rc = avc_has_perm_noaudit(sid, isec->sid, isec->sclass, perms, 0, &avd);
+
+Note, the crash came from stressing the deletion and reading of debugfs
+files.  I was not able to recreate this via normal files.  But I'm not
+sure they are safe.  It may just be that the race window is much harder
+to hit.
+
+What seems to have happened (and what I have traced), is the file is
+being opened at the same time the file or directory is being deleted.
+As the dentry and inode locks are not held during the path walk, nor is
+the inodes ref counts being incremented, there is nothing saving these
+structures from being discarded except for an rcu_read_lock().
+
+The rcu_read_lock() protects against freeing of the inode, but it does
+not protect freeing of the inode_security_struct.  Now if the freeing of
+the i_security happens with a call_rcu(), and the i_security field of
+the inode is not changed (it gets freed as the inode gets freed) then
+there will be no issue here.  (Linus Torvalds suggested not setting the
+field to NULL such that we do not need to check if it is NULL in the
+permission check).
+
+Note, this is a hack, but it fixes the problem at hand.  A real fix is
+to restructure the destroy_inode() to call all the destructor handlers
+from the RCU callback.  But that is a major job to do, and requires a
+lot of work.  For now, we just band-aid this bug with this fix (it
+works), and work on a more maintainable solution in the future.
+
+Link: http://lkml.kernel.org/r/20140109101932.0508dec7@gandalf.local.home
+Link: http://lkml.kernel.org/r/20140109182756.17abaaa8@gandalf.local.home
+
+Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ security/selinux/hooks.c          |   20 ++++++++++++++++++--
+ security/selinux/include/objsec.h |    5 ++++-
+ 2 files changed, 22 insertions(+), 3 deletions(-)
+
+--- a/security/selinux/hooks.c
++++ b/security/selinux/hooks.c
+@@ -220,6 +220,14 @@ static int inode_alloc_security(struct i
+       return 0;
+ }
++static void inode_free_rcu(struct rcu_head *head)
++{
++      struct inode_security_struct *isec;
++
++      isec = container_of(head, struct inode_security_struct, rcu);
++      kmem_cache_free(sel_inode_cache, isec);
++}
++
+ static void inode_free_security(struct inode *inode)
+ {
+       struct inode_security_struct *isec = inode->i_security;
+@@ -230,8 +238,16 @@ static void inode_free_security(struct i
+               list_del_init(&isec->list);
+       spin_unlock(&sbsec->isec_lock);
+-      inode->i_security = NULL;
+-      kmem_cache_free(sel_inode_cache, isec);
++      /*
++       * The inode may still be referenced in a path walk and
++       * a call to selinux_inode_permission() can be made
++       * after inode_free_security() is called. Ideally, the VFS
++       * wouldn't do this, but fixing that is a much harder
++       * job. For now, simply free the i_security via RCU, and
++       * leave the current inode->i_security pointer intact.
++       * The inode will be freed after the RCU grace period too.
++       */
++      call_rcu(&isec->rcu, inode_free_rcu);
+ }
+ static int file_alloc_security(struct file *file)
+--- a/security/selinux/include/objsec.h
++++ b/security/selinux/include/objsec.h
+@@ -38,7 +38,10 @@ struct task_security_struct {
+ struct inode_security_struct {
+       struct inode *inode;    /* back pointer to inode object */
+-      struct list_head list;  /* list of inode_security_struct */
++      union {
++              struct list_head list;  /* list of inode_security_struct */
++              struct rcu_head rcu;    /* for freeing the inode_security_struct */
++      };
+       u32 task_sid;           /* SID of creating task */
+       u32 sid;                /* SID of this object */
+       u16 sclass;             /* security class of this object */
index d61b3895a80b4cf0809b1742d4cd19018bcb09b7..97160b57d30a8a86d6408eac387873a9aa9c7a81 100644 (file)
@@ -3,3 +3,12 @@ perf-x86-amd-ibs-fix-waking-up-from-s3-for-amd-family-10h.patch
 gfs2-increase-i_writecount-during-gfs2_setattr_chown.patch
 staging-comedi-addi_apci_1032-fix-subdevice-type-flags-bug.patch
 staging-comedi-adl_pci9111-fix-incorrect-irq-passed-to-request_irq.patch
+vfs-in-d_path-don-t-call-d_dname-on-a-mount-point.patch
+vfs-fix-a-regression-in-mounting-proc.patch
+fork-allow-clone_parent-after-setns-clone_newpid.patch
+i2c-re-instate-body-of-i2c_parent_is_i2c_adapter.patch
+hwmon-coretemp-fix-truncated-name-of-alarm-attributes.patch
+writeback-fix-data-corruption-on-nfs.patch
+selinux-fix-possible-null-pointer-dereference-in-selinux_inode_permission.patch
+thp-fix-copy_page_rep-gpf-by-testing-is_huge_zero_pmd-once-only.patch
+ftrace-x86-load-ftrace_ops-in-parameter-not-the-variable-holding-it.patch
diff --git a/queue-3.12/thp-fix-copy_page_rep-gpf-by-testing-is_huge_zero_pmd-once-only.patch b/queue-3.12/thp-fix-copy_page_rep-gpf-by-testing-is_huge_zero_pmd-once-only.patch
new file mode 100644 (file)
index 0000000..7e59cac
--- /dev/null
@@ -0,0 +1,82 @@
+From eecc1e426d681351a6026a7d3e7d225f38955b6c Mon Sep 17 00:00:00 2001
+From: Hugh Dickins <hughd@google.com>
+Date: Sun, 12 Jan 2014 01:25:21 -0800
+Subject: thp: fix copy_page_rep GPF by testing is_huge_zero_pmd once only
+
+From: Hugh Dickins <hughd@google.com>
+
+commit eecc1e426d681351a6026a7d3e7d225f38955b6c upstream.
+
+We see General Protection Fault on RSI in copy_page_rep: that RSI is
+what you get from a NULL struct page pointer.
+
+  RIP: 0010:[<ffffffff81154955>]  [<ffffffff81154955>] copy_page_rep+0x5/0x10
+  RSP: 0000:ffff880136e15c00  EFLAGS: 00010286
+  RAX: ffff880000000000 RBX: ffff880136e14000 RCX: 0000000000000200
+  RDX: 6db6db6db6db6db7 RSI: db73880000000000 RDI: ffff880dd0c00000
+  RBP: ffff880136e15c18 R08: 0000000000000200 R09: 000000000005987c
+  R10: 000000000005987c R11: 0000000000000200 R12: 0000000000000001
+  R13: ffffea00305aa000 R14: 0000000000000000 R15: 0000000000000000
+  FS:  00007f195752f700(0000) GS:ffff880c7fc20000(0000) knlGS:0000000000000000
+  CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+  CR2: 0000000093010000 CR3: 00000001458e1000 CR4: 00000000000027e0
+  Call Trace:
+    copy_user_huge_page+0x93/0xab
+    do_huge_pmd_wp_page+0x710/0x815
+    handle_mm_fault+0x15d8/0x1d70
+    __do_page_fault+0x14d/0x840
+    do_page_fault+0x2f/0x90
+    page_fault+0x22/0x30
+
+do_huge_pmd_wp_page() tests is_huge_zero_pmd(orig_pmd) four times: but
+since shrink_huge_zero_page() can free the huge_zero_page, and we have
+no hold of our own on it here (except where the fourth test holds
+page_table_lock and has checked pmd_same), it's possible for it to
+answer yes the first time, but no to the second or third test.  Change
+all those last three to tests for NULL page.
+
+(Note: this is not the same issue as trinity's DEBUG_PAGEALLOC BUG
+in copy_page_rep with RSI: ffff88009c422000, reported by Sasha Levin
+in https://lkml.org/lkml/2013/3/29/103.  I believe that one is due
+to the source page being split, and a tail page freed, while copy
+is in progress; and not a problem without DEBUG_PAGEALLOC, since
+the pmd_same check will prevent a miscopy from being made visible.)
+
+Fixes: 97ae17497e99 ("thp: implement refcounting for huge zero page")
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/huge_memory.c |    6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/mm/huge_memory.c
++++ b/mm/huge_memory.c
+@@ -1154,7 +1154,7 @@ alloc:
+               new_page = NULL;
+       if (unlikely(!new_page)) {
+-              if (is_huge_zero_pmd(orig_pmd)) {
++              if (!page) {
+                       ret = do_huge_pmd_wp_zero_page_fallback(mm, vma,
+                                       address, pmd, orig_pmd, haddr);
+               } else {
+@@ -1181,7 +1181,7 @@ alloc:
+       count_vm_event(THP_FAULT_ALLOC);
+-      if (is_huge_zero_pmd(orig_pmd))
++      if (!page)
+               clear_huge_page(new_page, haddr, HPAGE_PMD_NR);
+       else
+               copy_user_huge_page(new_page, page, haddr, vma, HPAGE_PMD_NR);
+@@ -1207,7 +1207,7 @@ alloc:
+               page_add_new_anon_rmap(new_page, vma, haddr);
+               set_pmd_at(mm, haddr, pmd, entry);
+               update_mmu_cache_pmd(vma, address, pmd);
+-              if (is_huge_zero_pmd(orig_pmd)) {
++              if (!page) {
+                       add_mm_counter(mm, MM_ANONPAGES, HPAGE_PMD_NR);
+                       put_huge_zero_page();
+               } else {
diff --git a/queue-3.12/vfs-fix-a-regression-in-mounting-proc.patch b/queue-3.12/vfs-fix-a-regression-in-mounting-proc.patch
new file mode 100644 (file)
index 0000000..bafe842
--- /dev/null
@@ -0,0 +1,47 @@
+From 41301ae78a99ead04ea42672a1ab72c6f44cc81d Mon Sep 17 00:00:00 2001
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+Date: Thu, 14 Nov 2013 21:22:25 -0800
+Subject: vfs: Fix a regression in mounting proc
+
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+
+commit 41301ae78a99ead04ea42672a1ab72c6f44cc81d upstream.
+
+Gao feng <gaofeng@cn.fujitsu.com> reported that commit
+e51db73532955dc5eaba4235e62b74b460709d5b
+userns: Better restrictions on when proc and sysfs can be mounted
+caused a regression on mounting a new instance of proc in a mount
+namespace created with user namespace privileges, when binfmt_misc
+is mounted on /proc/sys/fs/binfmt_misc.
+
+This is an unintended regression caused by the absolutely bogus empty
+directory check in fs_fully_visible.  The check fs_fully_visible replaced
+didn't even bother to attempt to verify proc was fully visible and
+hiding proc files with any kind of mount is rare.  So for now fix
+the userspace regression by allowing directory with nlink == 1
+as /proc/sys/fs/binfmt_misc has.
+
+I will have a better patch but it is not stable material, or
+last minute kernel material.  So it will have to wait.
+
+Acked-by: Serge Hallyn <serge.hallyn@canonical.com>
+Acked-by: Gao feng <gaofeng@cn.fujitsu.com>
+Tested-by: Gao feng <gaofeng@cn.fujitsu.com>
+Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/namespace.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/namespace.c
++++ b/fs/namespace.c
+@@ -2888,7 +2888,7 @@ bool fs_fully_visible(struct file_system
+                       struct inode *inode = child->mnt_mountpoint->d_inode;
+                       if (!S_ISDIR(inode->i_mode))
+                               goto next;
+-                      if (inode->i_nlink != 2)
++                      if (inode->i_nlink > 2)
+                               goto next;
+               }
+               visible = true;
diff --git a/queue-3.12/vfs-in-d_path-don-t-call-d_dname-on-a-mount-point.patch b/queue-3.12/vfs-in-d_path-don-t-call-d_dname-on-a-mount-point.patch
new file mode 100644 (file)
index 0000000..e3e10f2
--- /dev/null
@@ -0,0 +1,65 @@
+From f48cfddc6729ef133933062320039808bafa6f45 Mon Sep 17 00:00:00 2001
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+Date: Fri, 8 Nov 2013 16:31:29 -0800
+Subject: vfs: In d_path don't call d_dname on a mount point
+
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+
+commit f48cfddc6729ef133933062320039808bafa6f45 upstream.
+
+Aditya Kali (adityakali@google.com) wrote:
+> Commit bf056bfa80596a5d14b26b17276a56a0dcb080e5:
+> "proc: Fix the namespace inode permission checks." converted
+> the namespace files into symlinks. The same commit changed
+> the way namespace bind mounts appear in /proc/mounts:
+>   $ mount --bind /proc/self/ns/ipc /mnt/ipc
+> Originally:
+>   $ cat /proc/mounts | grep ipc
+>   proc /mnt/ipc proc rw,nosuid,nodev,noexec 0 0
+>
+> After commit bf056bfa80596a5d14b26b17276a56a0dcb080e5:
+>   $ cat /proc/mounts | grep ipc
+>   proc ipc:[4026531839] proc rw,nosuid,nodev,noexec 0 0
+>
+> This breaks userspace which expects the 2nd field in
+> /proc/mounts to be a valid path.
+
+The symlink /proc/<pid>/ns/{ipc,mnt,net,pid,user,uts} point to
+dentries allocated with d_alloc_pseudo that we can mount, and
+that have interesting names printed out with d_dname.
+
+When these files are bind mounted /proc/mounts is not currently
+displaying the mount point correctly because d_dname is called instead
+of just displaying the path where the file is mounted.
+
+Solve this by adding an explicit check to distinguish mounted pseudo
+inodes and unmounted pseudo inodes.  Unmounted pseudo inodes always
+use mount of their filesstem as the mnt_root  in their path making
+these two cases easy to distinguish.
+
+Acked-by: Serge Hallyn <serge.hallyn@canonical.com>
+Reported-by: Aditya Kali <adityakali@google.com>
+Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/dcache.c |    7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+--- a/fs/dcache.c
++++ b/fs/dcache.c
+@@ -3064,8 +3064,13 @@ char *d_path(const struct path *path, ch
+        * thus don't need to be hashed.  They also don't need a name until a
+        * user wants to identify the object in /proc/pid/fd/.  The little hack
+        * below allows us to generate a name for these objects on demand:
++       *
++       * Some pseudo inodes are mountable.  When they are mounted
++       * path->dentry == path->mnt->mnt_root.  In that case don't call d_dname
++       * and instead have d_path return the mounted path.
+        */
+-      if (path->dentry->d_op && path->dentry->d_op->d_dname)
++      if (path->dentry->d_op && path->dentry->d_op->d_dname &&
++          (!IS_ROOT(path->dentry) || path->dentry != path->mnt->mnt_root))
+               return path->dentry->d_op->d_dname(path->dentry, buf, buflen);
+       rcu_read_lock();
diff --git a/queue-3.12/writeback-fix-data-corruption-on-nfs.patch b/queue-3.12/writeback-fix-data-corruption-on-nfs.patch
new file mode 100644 (file)
index 0000000..062ea6b
--- /dev/null
@@ -0,0 +1,55 @@
+From f9b0e058cbd04ada76b13afffa7e1df830543c24 Mon Sep 17 00:00:00 2001
+From: Jan Kara <jack@suse.cz>
+Date: Sat, 14 Dec 2013 04:21:26 +0800
+Subject: writeback: Fix data corruption on NFS
+
+From: Jan Kara <jack@suse.cz>
+
+commit f9b0e058cbd04ada76b13afffa7e1df830543c24 upstream.
+
+Commit 4f8ad655dbc8 "writeback: Refactor writeback_single_inode()" added
+a condition to skip clean inode. However this is wrong in WB_SYNC_ALL
+mode because there we also want to wait for outstanding writeback on
+possibly clean inode. This was causing occasional data corruption issues
+on NFS because it uses sync_inode() to make sure all outstanding writes
+are flushed to the server before truncating the inode and with
+sync_inode() returning prematurely file was sometimes extended back
+by an outstanding write after it was truncated.
+
+So modify the test to also check for pages under writeback in
+WB_SYNC_ALL mode.
+
+Fixes: 4f8ad655dbc82cf05d2edc11e66b78a42d38bf93
+Reported-and-tested-by: Dan Duval <dan.duval@oracle.com>
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/fs-writeback.c |   15 +++++++++------
+ 1 file changed, 9 insertions(+), 6 deletions(-)
+
+--- a/fs/fs-writeback.c
++++ b/fs/fs-writeback.c
+@@ -510,13 +510,16 @@ writeback_single_inode(struct inode *ino
+       }
+       WARN_ON(inode->i_state & I_SYNC);
+       /*
+-       * Skip inode if it is clean. We don't want to mess with writeback
+-       * lists in this function since flusher thread may be doing for example
+-       * sync in parallel and if we move the inode, it could get skipped. So
+-       * here we make sure inode is on some writeback list and leave it there
+-       * unless we have completely cleaned the inode.
++       * Skip inode if it is clean and we have no outstanding writeback in
++       * WB_SYNC_ALL mode. We don't want to mess with writeback lists in this
++       * function since flusher thread may be doing for example sync in
++       * parallel and if we move the inode, it could get skipped. So here we
++       * make sure inode is on some writeback list and leave it there unless
++       * we have completely cleaned the inode.
+        */
+-      if (!(inode->i_state & I_DIRTY))
++      if (!(inode->i_state & I_DIRTY) &&
++          (wbc->sync_mode != WB_SYNC_ALL ||
++           !mapping_tagged(inode->i_mapping, PAGECACHE_TAG_WRITEBACK)))
+               goto out;
+       inode->i_state |= I_SYNC;
+       spin_unlock(&inode->i_lock);