From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 1 Apr 2013 21:58:10 +0000 (-0700)
Subject: 3.8-stable patches
X-Git-Tag: v3.8.6~42
X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=f7b3a864aba5ce137d00b47cabc783a53392e2d2;p=thirdparty%2Fkernel%2Fstable-queue.git

3.8-stable patches

added patches:
	ipc-restrict-mounting-the-mqueue-filesystem.patch
	mac80211-prevent-spurious-ht-vht-downgrade-message.patch
	media-bt8xx-fix-too-large-height-in-cropcap.patch
	pid-handle-the-exit-of-a-multi-threaded-init.patch
	scm-require-cap_sys_admin-over-the-current-pidns-to-spoof-pids.patch
	target-fix-reservation_conflict-status-regression-for-iscsi-target-special-case.patch
	userns-don-t-allow-creation-if-the-user-is-chrooted.patch
	userns-restrict-when-proc-and-sysfs-can-be-mounted.patch
	vfs-add-a-mount-flag-to-lock-read-only-bind-mounts.patch
	vfs-carefully-propogate-mounts-across-user-namespaces.patch
---

diff --git a/queue-3.8/ipc-restrict-mounting-the-mqueue-filesystem.patch b/queue-3.8/ipc-restrict-mounting-the-mqueue-filesystem.patch
new file mode 100644
index 00000000000..75149c06f19
--- /dev/null
+++ b/queue-3.8/ipc-restrict-mounting-the-mqueue-filesystem.patch
@@ -0,0 +1,51 @@
+From a636b702ed1805e988ad3d8ff8b52c060f8b341c Mon Sep 17 00:00:00 2001
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+Date: Thu, 21 Mar 2013 18:13:15 -0700
+Subject: ipc: Restrict mounting the mqueue filesystem
+
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+
+commit a636b702ed1805e988ad3d8ff8b52c060f8b341c upstream.
+
+Only allow mounting the mqueue filesystem if the caller has CAP_SYS_ADMIN
+rights over the ipc namespace.   The principle here is if you create
+or have capabilities over it you can mount it, otherwise you get to live
+with what other people have mounted.
+
+This information is not particularly sensitive and mqueue essentially
+only reports which posix messages queues exist.  Still when creating a
+restricted environment for an application to live any extra
+information may be of use to someone with sufficient creativity.  The
+historical if imperfect way this information has been restricted has
+been not to allow mounts and restricting this to ipc namespace
+creators maintains the spirit of the historical restriction.
+
+Acked-by: Serge Hallyn <serge.hallyn@canonical.com>
+Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ ipc/mqueue.c |   12 ++++++++++--
+ 1 file changed, 10 insertions(+), 2 deletions(-)
+
+--- a/ipc/mqueue.c
++++ b/ipc/mqueue.c
+@@ -330,8 +330,16 @@ static struct dentry *mqueue_mount(struc
+ 			 int flags, const char *dev_name,
+ 			 void *data)
+ {
+-	if (!(flags & MS_KERNMOUNT))
+-		data = current->nsproxy->ipc_ns;
++	if (!(flags & MS_KERNMOUNT)) {
++		struct ipc_namespace *ns = current->nsproxy->ipc_ns;
++		/* Don't allow mounting unless the caller has CAP_SYS_ADMIN
++		 * over the ipc namespace.
++		 */
++		if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN))
++			return ERR_PTR(-EPERM);
++
++		data = ns;
++	}
+ 	return mount_ns(fs_type, flags, data, mqueue_fill_super);
+ }
+ 
diff --git a/queue-3.8/mac80211-prevent-spurious-ht-vht-downgrade-message.patch b/queue-3.8/mac80211-prevent-spurious-ht-vht-downgrade-message.patch
new file mode 100644
index 00000000000..c4959e9c854
--- /dev/null
+++ b/queue-3.8/mac80211-prevent-spurious-ht-vht-downgrade-message.patch
@@ -0,0 +1,35 @@
+From 586e01ededf9b713a1512dd658806791a7ca1a50 Mon Sep 17 00:00:00 2001
+From: Johannes Berg <johannes.berg@intel.com>
+Date: Thu, 14 Feb 2013 12:13:53 +0100
+Subject: mac80211: prevent spurious HT/VHT downgrade message
+
+From: Johannes Berg <johannes.berg@intel.com>
+
+commit 586e01ededf9b713a1512dd658806791a7ca1a50 upstream.
+
+Even when connecting to an AP that doesn't support VHT,
+and even when the local device doesn't support it either,
+the downgrade message gets printed. Suppress the message
+if HT and/or VHT is disabled.
+
+Signed-off-by: Johannes Berg <johannes.berg@intel.com>
+Cc: Andrew Lutomirski <luto@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ net/mac80211/mlme.c |    4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/net/mac80211/mlme.c
++++ b/net/mac80211/mlme.c
+@@ -3401,6 +3401,10 @@ ieee80211_determine_chantype(struct ieee
+ 	ret = 0;
+ 
+ out:
++	/* don't print the message below for VHT mismatch if VHT is disabled */
++	if (ret & IEEE80211_STA_DISABLE_VHT)
++		vht_chandef = *chandef;
++
+ 	while (!cfg80211_chandef_usable(sdata->local->hw.wiphy, chandef,
+ 					IEEE80211_CHAN_DISABLED)) {
+ 		if (WARN_ON(chandef->width == NL80211_CHAN_WIDTH_20_NOHT)) {
diff --git a/queue-3.8/media-bt8xx-fix-too-large-height-in-cropcap.patch b/queue-3.8/media-bt8xx-fix-too-large-height-in-cropcap.patch
new file mode 100644
index 00000000000..db6f498a1dd
--- /dev/null
+++ b/queue-3.8/media-bt8xx-fix-too-large-height-in-cropcap.patch
@@ -0,0 +1,149 @@
+From 35ccecef6ed48a5602755ddf580c45a026a1dc05 Mon Sep 17 00:00:00 2001
+From: Hans de Goede <hdegoede@redhat.com>
+Date: Mon, 25 Mar 2013 14:45:54 -0300
+Subject: media: [REGRESSION] bt8xx: Fix too large height in cropcap
+
+From: Hans de Goede <hdegoede@redhat.com>
+
+commit 35ccecef6ed48a5602755ddf580c45a026a1dc05 upstream.
+
+Since commit a1fd287780c8e91fed4957b30c757b0c93021162:
+"[media] bttv-driver: fix two warnings"
+cropcap.defrect.height and cropcap.bounds.height for the PAL entry are 32
+resp 30 pixels too large, if a userspace app (ie xawtv) actually tries to use
+the full advertised height, the resulting image is broken in ways only a
+screenshot can describe.
+The cause of this is the fix for this warning:
+drivers/media/pci/bt8xx/bttv-driver.c:308:3: warning: initialized field overwritten [-Woverride-init]
+In this chunk of the commit:
+@@ -301,11 +301,10 @@ const struct bttv_tvnorm bttv_tvnorms[] = {
+                        /* totalwidth */ 1135,
+                        /* sqwidth */ 944,
+                        /* vdelay */ 0x20,
+-                       /* sheight */ 576,
+-                       /* videostart0 */ 23)
+                /* bt878 (and bt848?) can capture another
+                   line below active video. */
+-               .cropcap.bounds.height = (576 + 2) + 0x20 - 2,
++                       /* sheight */ (576 + 2) + 0x20 - 2,
++                       /* videostart0 */ 23)
+        },{
+                .v4l2_id        = V4L2_STD_NTSC_M | V4L2_STD_NTSC_M_KR,
+                .name           = "NTSC",
+Which replaces the overriding of cropcap.bounds.height initialization outside
+of the CROPCAP macro (which also initializes it), with passing a
+different sheight value to the CROPCAP macro.
+There are 2 problems with this warning fix:
+1) The sheight value is used twice in the CROPCAP macro, and the old code
+   only changed one resulting value.
+2) The old code increased the .cropcap.bounds.height value (and did not
+   touch the .cropcap.defrect.height value at all) by 2, where as the fixed
+   code increases it by 32, as the fixed code passes (576 + 2) + 0x20 - 2
+   to the CROPCAP macro, but the + 0x20 - 2 is already done by the macro so
+   now is done twice for .cropcap.bounds.height, and also is applied to
+   .cropcap.defrect.height where it should not be applied at all.
+This patch fixes this by adding an extraheight parameter to the CROPCAP entry
+and using it for the PAL entry.
+
+Signed-off-by: Hans de Goede <hdegoede@redhat.com>
+Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/media/pci/bt8xx/bttv-driver.c |   20 +++++++++++++++-----
+ 1 file changed, 15 insertions(+), 5 deletions(-)
+
+--- a/drivers/media/pci/bt8xx/bttv-driver.c
++++ b/drivers/media/pci/bt8xx/bttv-driver.c
+@@ -250,17 +250,19 @@ static u8 SRAM_Table[][60] =
+    vdelay	start of active video in 2 * field lines relative to
+ 		trailing edge of /VRESET pulse (VDELAY register).
+    sheight	height of active video in 2 * field lines.
++   extraheight	Added to sheight for cropcap.bounds.height only
+    videostart0	ITU-R frame line number of the line corresponding
+ 		to vdelay in the first field. */
+ #define CROPCAP(minhdelayx1, hdelayx1, swidth, totalwidth, sqwidth,	 \
+-		vdelay, sheight, videostart0)				 \
++		vdelay, sheight, extraheight, videostart0)		 \
+ 	.cropcap.bounds.left = minhdelayx1,				 \
+ 	/* * 2 because vertically we count field lines times two, */	 \
+ 	/* e.g. 23 * 2 to 23 * 2 + 576 in PAL-BGHI defrect. */		 \
+ 	.cropcap.bounds.top = (videostart0) * 2 - (vdelay) + MIN_VDELAY, \
+ 	/* 4 is a safety margin at the end of the line. */		 \
+ 	.cropcap.bounds.width = (totalwidth) - (minhdelayx1) - 4,	 \
+-	.cropcap.bounds.height = (sheight) + (vdelay) - MIN_VDELAY,	 \
++	.cropcap.bounds.height = (sheight) + (extraheight) + (vdelay) -	 \
++				 MIN_VDELAY,				 \
+ 	.cropcap.defrect.left = hdelayx1,				 \
+ 	.cropcap.defrect.top = (videostart0) * 2,			 \
+ 	.cropcap.defrect.width = swidth,				 \
+@@ -301,9 +303,10 @@ const struct bttv_tvnorm bttv_tvnorms[]
+ 			/* totalwidth */ 1135,
+ 			/* sqwidth */ 944,
+ 			/* vdelay */ 0x20,
+-		/* bt878 (and bt848?) can capture another
+-		   line below active video. */
+-			/* sheight */ (576 + 2) + 0x20 - 2,
++			/* sheight */ 576,
++			/* bt878 (and bt848?) can capture another
++			   line below active video. */
++			/* extraheight */ 2,
+ 			/* videostart0 */ 23)
+ 	},{
+ 		.v4l2_id        = V4L2_STD_NTSC_M | V4L2_STD_NTSC_M_KR,
+@@ -330,6 +333,7 @@ const struct bttv_tvnorm bttv_tvnorms[]
+ 			/* sqwidth */ 780,
+ 			/* vdelay */ 0x1a,
+ 			/* sheight */ 480,
++			/* extraheight */ 0,
+ 			/* videostart0 */ 23)
+ 	},{
+ 		.v4l2_id        = V4L2_STD_SECAM,
+@@ -355,6 +359,7 @@ const struct bttv_tvnorm bttv_tvnorms[]
+ 			/* sqwidth */ 944,
+ 			/* vdelay */ 0x20,
+ 			/* sheight */ 576,
++			/* extraheight */ 0,
+ 			/* videostart0 */ 23)
+ 	},{
+ 		.v4l2_id        = V4L2_STD_PAL_Nc,
+@@ -380,6 +385,7 @@ const struct bttv_tvnorm bttv_tvnorms[]
+ 			/* sqwidth */ 780,
+ 			/* vdelay */ 0x1a,
+ 			/* sheight */ 576,
++			/* extraheight */ 0,
+ 			/* videostart0 */ 23)
+ 	},{
+ 		.v4l2_id        = V4L2_STD_PAL_M,
+@@ -405,6 +411,7 @@ const struct bttv_tvnorm bttv_tvnorms[]
+ 			/* sqwidth */ 780,
+ 			/* vdelay */ 0x1a,
+ 			/* sheight */ 480,
++			/* extraheight */ 0,
+ 			/* videostart0 */ 23)
+ 	},{
+ 		.v4l2_id        = V4L2_STD_PAL_N,
+@@ -430,6 +437,7 @@ const struct bttv_tvnorm bttv_tvnorms[]
+ 			/* sqwidth */ 944,
+ 			/* vdelay */ 0x20,
+ 			/* sheight */ 576,
++			/* extraheight */ 0,
+ 			/* videostart0 */ 23)
+ 	},{
+ 		.v4l2_id        = V4L2_STD_NTSC_M_JP,
+@@ -455,6 +463,7 @@ const struct bttv_tvnorm bttv_tvnorms[]
+ 			/* sqwidth */ 780,
+ 			/* vdelay */ 0x16,
+ 			/* sheight */ 480,
++			/* extraheight */ 0,
+ 			/* videostart0 */ 23)
+ 	},{
+ 		/* that one hopefully works with the strange timing
+@@ -484,6 +493,7 @@ const struct bttv_tvnorm bttv_tvnorms[]
+ 			/* sqwidth */ 944,
+ 			/* vdelay */ 0x1a,
+ 			/* sheight */ 480,
++			/* extraheight */ 0,
+ 			/* videostart0 */ 23)
+ 	}
+ };
diff --git a/queue-3.8/pid-handle-the-exit-of-a-multi-threaded-init.patch b/queue-3.8/pid-handle-the-exit-of-a-multi-threaded-init.patch
new file mode 100644
index 00000000000..6c6857ab10f
--- /dev/null
+++ b/queue-3.8/pid-handle-the-exit-of-a-multi-threaded-init.patch
@@ -0,0 +1,46 @@
+From 751c644b95bb48aaa8825f0c66abbcc184d92051 Mon Sep 17 00:00:00 2001
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+Date: Tue, 26 Mar 2013 02:27:11 -0700
+Subject: pid: Handle the exit of a multi-threaded init.
+
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+
+commit 751c644b95bb48aaa8825f0c66abbcc184d92051 upstream.
+
+When a multi-threaded init exits and the initial thread is not the
+last thread to exit the initial thread hangs around as a zombie
+until the last thread exits.  In that case zap_pid_ns_processes
+needs to wait until there are only 2 hashed pids in the pid
+namespace not one.
+
+v2. Replace thread_pid_vnr(me) == 1 with the test thread_group_leader(me)
+    as suggested by Oleg.
+
+Reported-by: Caj Larsson <caj@omnicloud.com>
+Cc: Oleg Nesterov <oleg@redhat.com>
+Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/pid_namespace.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/kernel/pid_namespace.c
++++ b/kernel/pid_namespace.c
+@@ -181,6 +181,7 @@ void zap_pid_ns_processes(struct pid_nam
+ 	int nr;
+ 	int rc;
+ 	struct task_struct *task, *me = current;
++	int init_pids = thread_group_leader(me) ? 1 : 2;
+ 
+ 	/* Don't allow any more processes into the pid namespace */
+ 	disable_pid_allocation(pid_ns);
+@@ -230,7 +231,7 @@ void zap_pid_ns_processes(struct pid_nam
+ 	 */
+ 	for (;;) {
+ 		set_current_state(TASK_UNINTERRUPTIBLE);
+-		if (pid_ns->nr_hashed == 1)
++		if (pid_ns->nr_hashed == init_pids)
+ 			break;
+ 		schedule();
+ 	}
diff --git a/queue-3.8/scm-require-cap_sys_admin-over-the-current-pidns-to-spoof-pids.patch b/queue-3.8/scm-require-cap_sys_admin-over-the-current-pidns-to-spoof-pids.patch
new file mode 100644
index 00000000000..d05cf9c440d
--- /dev/null
+++ b/queue-3.8/scm-require-cap_sys_admin-over-the-current-pidns-to-spoof-pids.patch
@@ -0,0 +1,41 @@
+From 92f28d973cce45ef5823209aab3138eb45d8b349 Mon Sep 17 00:00:00 2001
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+Date: Fri, 15 Mar 2013 01:03:33 -0700
+Subject: scm: Require CAP_SYS_ADMIN over the current pidns to spoof pids.
+
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+
+commit 92f28d973cce45ef5823209aab3138eb45d8b349 upstream.
+
+Don't allow spoofing pids over unix domain sockets in the corner
+cases where a user has created a user namespace but has not yet
+created a pid namespace.
+
+Reported-by: Andy Lutomirski <luto@amacapital.net>
+Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ net/core/scm.c |    4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/net/core/scm.c
++++ b/net/core/scm.c
+@@ -24,6 +24,7 @@
+ #include <linux/interrupt.h>
+ #include <linux/netdevice.h>
+ #include <linux/security.h>
++#include <linux/pid_namespace.h>
+ #include <linux/pid.h>
+ #include <linux/nsproxy.h>
+ #include <linux/slab.h>
+@@ -52,7 +53,8 @@ static __inline__ int scm_check_creds(st
+ 	if (!uid_valid(uid) || !gid_valid(gid))
+ 		return -EINVAL;
+ 
+-	if ((creds->pid == task_tgid_vnr(current) || nsown_capable(CAP_SYS_ADMIN)) &&
++	if ((creds->pid == task_tgid_vnr(current) ||
++	     ns_capable(current->nsproxy->pid_ns->user_ns, CAP_SYS_ADMIN)) &&
+ 	    ((uid_eq(uid, cred->uid)   || uid_eq(uid, cred->euid) ||
+ 	      uid_eq(uid, cred->suid)) || nsown_capable(CAP_SETUID)) &&
+ 	    ((gid_eq(gid, cred->gid)   || gid_eq(gid, cred->egid) ||
diff --git a/queue-3.8/series b/queue-3.8/series
index 82d1382e97e..c1a8c717111 100644
--- a/queue-3.8/series
+++ b/queue-3.8/series
@@ -41,3 +41,13 @@ net-irda-add-missing-error-path-release_sock-call.patch
 nest-rename_lock-inside-vfsmount_lock.patch
 usb-ehci-fix-bug-in-itd-sitd-dma-pool-allocation.patch
 usb-xhci-fix-trb-transfer-length-macro-used-for-event-trb.patch
+target-fix-reservation_conflict-status-regression-for-iscsi-target-special-case.patch
+media-bt8xx-fix-too-large-height-in-cropcap.patch
+mac80211-prevent-spurious-ht-vht-downgrade-message.patch
+scm-require-cap_sys_admin-over-the-current-pidns-to-spoof-pids.patch
+pid-handle-the-exit-of-a-multi-threaded-init.patch
+userns-don-t-allow-creation-if-the-user-is-chrooted.patch
+vfs-add-a-mount-flag-to-lock-read-only-bind-mounts.patch
+vfs-carefully-propogate-mounts-across-user-namespaces.patch
+ipc-restrict-mounting-the-mqueue-filesystem.patch
+userns-restrict-when-proc-and-sysfs-can-be-mounted.patch
diff --git a/queue-3.8/target-fix-reservation_conflict-status-regression-for-iscsi-target-special-case.patch b/queue-3.8/target-fix-reservation_conflict-status-regression-for-iscsi-target-special-case.patch
new file mode 100644
index 00000000000..5c29459aa1a
--- /dev/null
+++ b/queue-3.8/target-fix-reservation_conflict-status-regression-for-iscsi-target-special-case.patch
@@ -0,0 +1,59 @@
+From f85eda8d75d37a3796cee7f5a906e50e3f13d9e1 Mon Sep 17 00:00:00 2001
+From: Nicholas Bellinger <nab@linux-iscsi.org>
+Date: Thu, 28 Mar 2013 23:06:00 -0700
+Subject: target: Fix RESERVATION_CONFLICT status regression for iscsi-target special case
+
+From: Nicholas Bellinger <nab@linux-iscsi.org>
+
+commit f85eda8d75d37a3796cee7f5a906e50e3f13d9e1 upstream.
+
+This patch fixes a regression introduced in v3.8-rc1 code where a failed
+target_check_reservation() check in target_setup_cmd_from_cdb() was causing
+an incorrect SAM_STAT_GOOD status to be returned during a WRITE operation
+performed by an unregistered / unreserved iscsi initiator port.
+
+This regression is only effecting iscsi-target due to a special case check
+for TCM_RESERVATION_CONFLICT within iscsi_target_erl1.c:iscsit_execute_cmd(),
+and was still correctly disallowing WRITE commands from backend submission
+for unregistered / unreserved initiator ports, while returning the incorrect
+SAM_STAT_GOOD status due to the missing SAM_STAT_RESERVATION_CONFLICT
+assignment.
+
+This regression was first introduced with:
+
+commit de103c93aff0bed0ae984274e5dc8b95899badab
+Author: Christoph Hellwig <hch@lst.de>
+Date:   Tue Nov 6 12:24:09 2012 -0800
+
+    target: pass sense_reason as a return value
+
+Go ahead and re-add the missing SAM_STAT_RESERVATION_CONFLICT assignment
+during a target_check_reservation() failure, so that iscsi-target code
+sends the correct SCSI status.
+
+All other fabrics using target_submit_cmd_*() with a RESERVATION_CONFLICT
+call to transport_generic_request_failure() are not effected by this bug.
+
+Reported-by: Jeff Leung <jleung@curriegrad2004.ca>
+Cc: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/target/target_core_transport.c |    4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/drivers/target/target_core_transport.c
++++ b/drivers/target/target_core_transport.c
+@@ -1136,8 +1136,10 @@ target_setup_cmd_from_cdb(struct se_cmd
+ 		return ret;
+ 
+ 	ret = target_check_reservation(cmd);
+-	if (ret)
++	if (ret) {
++		cmd->scsi_status = SAM_STAT_RESERVATION_CONFLICT;
+ 		return ret;
++	}
+ 
+ 	ret = dev->transport->parse_cdb(cmd);
+ 	if (ret)
diff --git a/queue-3.8/userns-don-t-allow-creation-if-the-user-is-chrooted.patch b/queue-3.8/userns-don-t-allow-creation-if-the-user-is-chrooted.patch
new file mode 100644
index 00000000000..ca479cee91f
--- /dev/null
+++ b/queue-3.8/userns-don-t-allow-creation-if-the-user-is-chrooted.patch
@@ -0,0 +1,101 @@
+From 3151527ee007b73a0ebd296010f1c0454a919c7d Mon Sep 17 00:00:00 2001
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+Date: Fri, 15 Mar 2013 01:45:51 -0700
+Subject: userns:  Don't allow creation if the user is chrooted
+
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+
+commit 3151527ee007b73a0ebd296010f1c0454a919c7d upstream.
+
+Guarantee that the policy of which files may be access that is
+established by setting the root directory will not be violated
+by user namespaces by verifying that the root directory points
+to the root of the mount namespace at the time of user namespace
+creation.
+
+Changing the root is a privileged operation, and as a matter of policy
+it serves to limit unprivileged processes to files below the current
+root directory.
+
+For reasons of simplicity and comprehensibility the privilege to
+change the root directory is gated solely on the CAP_SYS_CHROOT
+capability in the user namespace.  Therefore when creating a user
+namespace we must ensure that the policy of which files may be access
+can not be violated by changing the root directory.
+
+Anyone who runs a processes in a chroot and would like to use user
+namespace can setup the same view of filesystems with a mount
+namespace instead.  With this result that this is not a practical
+limitation for using user namespaces.
+
+Acked-by: Serge Hallyn <serge.hallyn@canonical.com>
+Reported-by: Andy Lutomirski <luto@amacapital.net>
+Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/namespace.c            |   24 ++++++++++++++++++++++++
+ include/linux/fs_struct.h |    2 ++
+ kernel/user_namespace.c   |    9 +++++++++
+ 3 files changed, 35 insertions(+)
+
+--- a/fs/namespace.c
++++ b/fs/namespace.c
+@@ -2758,6 +2758,30 @@ bool our_mnt(struct vfsmount *mnt)
+ 	return check_mnt(real_mount(mnt));
+ }
+ 
++bool current_chrooted(void)
++{
++	/* Does the current process have a non-standard root */
++	struct path ns_root;
++	struct path fs_root;
++	bool chrooted;
++
++	/* Find the namespace root */
++	ns_root.mnt = &current->nsproxy->mnt_ns->root->mnt;
++	ns_root.dentry = ns_root.mnt->mnt_root;
++	path_get(&ns_root);
++	while (d_mountpoint(ns_root.dentry) && follow_down_one(&ns_root))
++		;
++
++	get_fs_root(current->fs, &fs_root);
++
++	chrooted = !path_equal(&fs_root, &ns_root);
++
++	path_put(&fs_root);
++	path_put(&ns_root);
++
++	return chrooted;
++}
++
+ static void *mntns_get(struct task_struct *task)
+ {
+ 	struct mnt_namespace *ns = NULL;
+--- a/include/linux/fs_struct.h
++++ b/include/linux/fs_struct.h
+@@ -50,4 +50,6 @@ static inline void get_fs_root_and_pwd(s
+ 	spin_unlock(&fs->lock);
+ }
+ 
++extern bool current_chrooted(void);
++
+ #endif /* _LINUX_FS_STRUCT_H */
+--- a/kernel/user_namespace.c
++++ b/kernel/user_namespace.c
+@@ -61,6 +61,15 @@ int create_user_ns(struct cred *new)
+ 	kgid_t group = new->egid;
+ 	int ret;
+ 
++	/*
++	 * Verify that we can not violate the policy of which files
++	 * may be accessed that is specified by the root directory,
++	 * by verifing that the root directory is at the root of the
++	 * mount namespace which allows all files to be accessed.
++	 */
++	if (current_chrooted())
++		return -EPERM;
++
+ 	/* The creator needs a mapping in the parent user namespace
+ 	 * or else we won't be able to reasonably tell userspace who
+ 	 * created a user_namespace.
diff --git a/queue-3.8/userns-restrict-when-proc-and-sysfs-can-be-mounted.patch b/queue-3.8/userns-restrict-when-proc-and-sysfs-can-be-mounted.patch
new file mode 100644
index 00000000000..307c04d1b21
--- /dev/null
+++ b/queue-3.8/userns-restrict-when-proc-and-sysfs-can-be-mounted.patch
@@ -0,0 +1,148 @@
+From 87a8ebd637dafc255070f503909a053cf0d98d3f Mon Sep 17 00:00:00 2001
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+Date: Sun, 24 Mar 2013 14:28:27 -0700
+Subject: userns: Restrict when proc and sysfs can be mounted
+
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+
+commit 87a8ebd637dafc255070f503909a053cf0d98d3f upstream.
+
+Only allow unprivileged mounts of proc and sysfs if they are already
+mounted when the user namespace is created.
+
+proc and sysfs are interesting because they have content that is
+per namespace, and so fresh mounts are needed when new namespaces
+are created while at the same time proc and sysfs have content that
+is shared between every instance.
+
+Respect the policy of who may see the shared content of proc and sysfs
+by only allowing new mounts if there was an existing mount at the time
+the user namespace was created.
+
+In practice there are only two interesting cases: proc and sysfs are
+mounted at their usual places, proc and sysfs are not mounted at all
+(some form of mount namespace jail).
+
+Acked-by: Serge Hallyn <serge.hallyn@canonical.com>
+Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/namespace.c                 |   21 +++++++++++++++++++++
+ fs/proc/root.c                 |    4 ++++
+ fs/sysfs/mount.c               |    4 ++++
+ include/linux/user_namespace.h |    4 ++++
+ kernel/user.c                  |    2 ++
+ kernel/user_namespace.c        |    2 ++
+ 6 files changed, 37 insertions(+)
+
+--- a/fs/namespace.c
++++ b/fs/namespace.c
+@@ -2789,6 +2789,27 @@ bool current_chrooted(void)
+ 	return chrooted;
+ }
+ 
++void update_mnt_policy(struct user_namespace *userns)
++{
++	struct mnt_namespace *ns = current->nsproxy->mnt_ns;
++	struct mount *mnt;
++
++	down_read(&namespace_sem);
++	list_for_each_entry(mnt, &ns->list, mnt_list) {
++		switch (mnt->mnt.mnt_sb->s_magic) {
++		case SYSFS_MAGIC:
++			userns->may_mount_sysfs = true;
++			break;
++		case PROC_SUPER_MAGIC:
++			userns->may_mount_proc = true;
++			break;
++		}
++		if (userns->may_mount_sysfs && userns->may_mount_proc)
++			break;
++	}
++	up_read(&namespace_sem);
++}
++
+ static void *mntns_get(struct task_struct *task)
+ {
+ 	struct mnt_namespace *ns = NULL;
+--- a/fs/proc/root.c
++++ b/fs/proc/root.c
+@@ -16,6 +16,7 @@
+ #include <linux/sched.h>
+ #include <linux/module.h>
+ #include <linux/bitops.h>
++#include <linux/user_namespace.h>
+ #include <linux/mount.h>
+ #include <linux/pid_namespace.h>
+ #include <linux/parser.h>
+@@ -108,6 +109,9 @@ static struct dentry *proc_mount(struct
+ 	} else {
+ 		ns = task_active_pid_ns(current);
+ 		options = data;
++
++		if (!current_user_ns()->may_mount_proc)
++			return ERR_PTR(-EPERM);
+ 	}
+ 
+ 	sb = sget(fs_type, proc_test_super, proc_set_super, flags, ns);
+--- a/fs/sysfs/mount.c
++++ b/fs/sysfs/mount.c
+@@ -19,6 +19,7 @@
+ #include <linux/module.h>
+ #include <linux/magic.h>
+ #include <linux/slab.h>
++#include <linux/user_namespace.h>
+ 
+ #include "sysfs.h"
+ 
+@@ -111,6 +112,9 @@ static struct dentry *sysfs_mount(struct
+ 	struct super_block *sb;
+ 	int error;
+ 
++	if (!(flags & MS_KERNMOUNT) && !current_user_ns()->may_mount_sysfs)
++		return ERR_PTR(-EPERM);
++
+ 	info = kzalloc(sizeof(*info), GFP_KERNEL);
+ 	if (!info)
+ 		return ERR_PTR(-ENOMEM);
+--- a/include/linux/user_namespace.h
++++ b/include/linux/user_namespace.h
+@@ -26,6 +26,8 @@ struct user_namespace {
+ 	kuid_t			owner;
+ 	kgid_t			group;
+ 	unsigned int		proc_inum;
++	bool			may_mount_sysfs;
++	bool			may_mount_proc;
+ };
+ 
+ extern struct user_namespace init_user_ns;
+@@ -82,4 +84,6 @@ static inline void put_user_ns(struct us
+ 
+ #endif
+ 
++void update_mnt_policy(struct user_namespace *userns);
++
+ #endif /* _LINUX_USER_H */
+--- a/kernel/user.c
++++ b/kernel/user.c
+@@ -53,6 +53,8 @@ struct user_namespace init_user_ns = {
+ 	.owner = GLOBAL_ROOT_UID,
+ 	.group = GLOBAL_ROOT_GID,
+ 	.proc_inum = PROC_USER_INIT_INO,
++	.may_mount_sysfs = true,
++	.may_mount_proc = true,
+ };
+ EXPORT_SYMBOL_GPL(init_user_ns);
+ 
+--- a/kernel/user_namespace.c
++++ b/kernel/user_namespace.c
+@@ -96,6 +96,8 @@ int create_user_ns(struct cred *new)
+ 
+ 	set_cred_user_ns(new, ns);
+ 
++	update_mnt_policy(ns);
++
+ 	return 0;
+ }
+ 
diff --git a/queue-3.8/vfs-add-a-mount-flag-to-lock-read-only-bind-mounts.patch b/queue-3.8/vfs-add-a-mount-flag-to-lock-read-only-bind-mounts.patch
new file mode 100644
index 00000000000..bb7cd2ee270
--- /dev/null
+++ b/queue-3.8/vfs-add-a-mount-flag-to-lock-read-only-bind-mounts.patch
@@ -0,0 +1,49 @@
+From 90563b198e4c6674c63672fae1923da467215f45 Mon Sep 17 00:00:00 2001
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+Date: Fri, 22 Mar 2013 03:10:15 -0700
+Subject: vfs: Add a mount flag to lock read only bind mounts
+
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+
+commit 90563b198e4c6674c63672fae1923da467215f45 upstream.
+
+When a read-only bind mount is copied from mount namespace in a higher
+privileged user namespace to a mount namespace in a lesser privileged
+user namespace, it should not be possible to remove the the read-only
+restriction.
+
+Add a MNT_LOCK_READONLY mount flag to indicate that a mount must
+remain read-only.
+
+Acked-by: Serge Hallyn <serge.hallyn@canonical.com>
+Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/namespace.c        |    3 +++
+ include/linux/mount.h |    2 ++
+ 2 files changed, 5 insertions(+)
+
+--- a/fs/namespace.c
++++ b/fs/namespace.c
+@@ -1736,6 +1736,9 @@ static int change_mount_flags(struct vfs
+ 	if (readonly_request == __mnt_is_readonly(mnt))
+ 		return 0;
+ 
++	if (mnt->mnt_flags & MNT_LOCK_READONLY)
++		return -EPERM;
++
+ 	if (readonly_request)
+ 		error = mnt_make_readonly(real_mount(mnt));
+ 	else
+--- a/include/linux/mount.h
++++ b/include/linux/mount.h
+@@ -47,6 +47,8 @@ struct mnt_namespace;
+ 
+ #define MNT_INTERNAL	0x4000
+ 
++#define MNT_LOCK_READONLY	0x400000
++
+ struct vfsmount {
+ 	struct dentry *mnt_root;	/* root of the mounted tree */
+ 	struct super_block *mnt_sb;	/* pointer to superblock */
diff --git a/queue-3.8/vfs-carefully-propogate-mounts-across-user-namespaces.patch b/queue-3.8/vfs-carefully-propogate-mounts-across-user-namespaces.patch
new file mode 100644
index 00000000000..0456120bace
--- /dev/null
+++ b/queue-3.8/vfs-carefully-propogate-mounts-across-user-namespaces.patch
@@ -0,0 +1,95 @@
+From 132c94e31b8bca8ea921f9f96a57d684fa4ae0a9 Mon Sep 17 00:00:00 2001
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+Date: Fri, 22 Mar 2013 04:08:05 -0700
+Subject: vfs: Carefully propogate mounts across user namespaces
+
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+
+commit 132c94e31b8bca8ea921f9f96a57d684fa4ae0a9 upstream.
+
+As a matter of policy MNT_READONLY should not be changable if the
+original mounter had more privileges than creator of the mount
+namespace.
+
+Add the flag CL_UNPRIVILEGED to note when we are copying a mount from
+a mount namespace that requires more privileges to a mount namespace
+that requires fewer privileges.
+
+When the CL_UNPRIVILEGED flag is set cause clone_mnt to set MNT_NO_REMOUNT
+if any of the mnt flags that should never be changed are set.
+
+This protects both mount propagation and the initial creation of a less
+privileged mount namespace.
+
+Acked-by: Serge Hallyn <serge.hallyn@canonical.com>
+Reported-by: Andy Lutomirski <luto@amacapital.net>
+Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/namespace.c |    6 +++++-
+ fs/pnode.c     |    6 ++++++
+ fs/pnode.h     |    1 +
+ 3 files changed, 12 insertions(+), 1 deletion(-)
+
+--- a/fs/namespace.c
++++ b/fs/namespace.c
+@@ -798,6 +798,10 @@ static struct mount *clone_mnt(struct mo
+ 	}
+ 
+ 	mnt->mnt.mnt_flags = old->mnt.mnt_flags & ~MNT_WRITE_HOLD;
++	/* Don't allow unprivileged users to change mount flags */
++	if ((flag & CL_UNPRIVILEGED) && (mnt->mnt.mnt_flags & MNT_READONLY))
++		mnt->mnt.mnt_flags |= MNT_LOCK_READONLY;
++
+ 	atomic_inc(&sb->s_active);
+ 	mnt->mnt.mnt_sb = sb;
+ 	mnt->mnt.mnt_root = dget(root);
+@@ -2368,7 +2372,7 @@ static struct mnt_namespace *dup_mnt_ns(
+ 	/* First pass: copy the tree topology */
+ 	copy_flags = CL_COPY_ALL | CL_EXPIRE;
+ 	if (user_ns != mnt_ns->user_ns)
+-		copy_flags |= CL_SHARED_TO_SLAVE;
++		copy_flags |= CL_SHARED_TO_SLAVE | CL_UNPRIVILEGED;
+ 	new = copy_tree(old, old->mnt.mnt_root, copy_flags);
+ 	if (IS_ERR(new)) {
+ 		up_write(&namespace_sem);
+--- a/fs/pnode.c
++++ b/fs/pnode.c
+@@ -9,6 +9,7 @@
+ #include <linux/mnt_namespace.h>
+ #include <linux/mount.h>
+ #include <linux/fs.h>
++#include <linux/nsproxy.h>
+ #include "internal.h"
+ #include "pnode.h"
+ 
+@@ -220,6 +221,7 @@ static struct mount *get_source(struct m
+ int propagate_mnt(struct mount *dest_mnt, struct dentry *dest_dentry,
+ 		    struct mount *source_mnt, struct list_head *tree_list)
+ {
++	struct user_namespace *user_ns = current->nsproxy->mnt_ns->user_ns;
+ 	struct mount *m, *child;
+ 	int ret = 0;
+ 	struct mount *prev_dest_mnt = dest_mnt;
+@@ -237,6 +239,10 @@ int propagate_mnt(struct mount *dest_mnt
+ 
+ 		source =  get_source(m, prev_dest_mnt, prev_src_mnt, &type);
+ 
++		/* Notice when we are propagating across user namespaces */
++		if (m->mnt_ns->user_ns != user_ns)
++			type |= CL_UNPRIVILEGED;
++
+ 		child = copy_tree(source, source->mnt.mnt_root, type);
+ 		if (IS_ERR(child)) {
+ 			ret = PTR_ERR(child);
+--- a/fs/pnode.h
++++ b/fs/pnode.h
+@@ -23,6 +23,7 @@
+ #define CL_MAKE_SHARED 		0x08
+ #define CL_PRIVATE 		0x10
+ #define CL_SHARED_TO_SLAVE	0x20
++#define CL_UNPRIVILEGED		0x40
+ 
+ static inline void set_mnt_shared(struct mount *mnt)
+ {