3.14-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Sat, 13 Sep 2014 01:31:51 +0000 (18:31 -0700)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Sat, 13 Sep 2014 01:31:51 +0000 (18:31 -0700)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Sat, 13 Sep 2014 01:31:51 +0000 (18:31 -0700)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Sat, 13 Sep 2014 01:31:51 +0000 (18:31 -0700)
diff --git a/queue-3.14/fix-ebusy-on-umount-from-mnt_shrinkable.patch b/queue-3.14/fix-ebusy-on-umount-from-mnt_shrinkable.patch

new file mode 100644 (file)

index 0000000..1ebaaa0
--- /dev/null
+++ b/queue-3.14/fix-ebusy-on-umount-from-mnt_shrinkable.patch
@@ -0,0 +1,49 @@
+From 81b6b06197606b4bef4e427a197aeb808e8d89e1 Mon Sep 17 00:00:00 2001
+From: Al Viro <viro@zeniv.linux.org.uk>
+Date: Sat, 30 Aug 2014 18:32:05 -0400
+Subject: fix EBUSY on umount() from MNT_SHRINKABLE
+
+From: Al Viro <viro@zeniv.linux.org.uk>
+
+commit 81b6b06197606b4bef4e427a197aeb808e8d89e1 upstream.
+
+We need the parents of victims alive until namespace_unlock() gets to
+dput() of the (ex-)mountpoints.  However, that screws up the "is it
+busy" checks in case when we have shrinkable mounts that need to be
+killed.  Solution: go ahead and decrement refcounts of parents right
+in umount_tree(), increment them again just before dropping rwsem in
+namespace_unlock() (and let the loop in the end of namespace_unlock()
+finally drop those references for good, as we do now).  Parents can't
+get freed until we drop rwsem - at least one reference is kept until
+then, both in case when parent is among the victims and when it is
+not.  So they'll still be around when we get to namespace_unlock().
+
+Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/namespace.c |    6 ++++++
+ 1 file changed, 6 insertions(+)
+
+--- a/fs/namespace.c
++++ b/fs/namespace.c
+@@ -1217,6 +1217,11 @@ static void namespace_unlock(void)
+       head.first->pprev = &head.first;
+       INIT_HLIST_HEAD(&unmounted);
+ 
++      /* undo decrements we'd done in umount_tree() */
++      hlist_for_each_entry(mnt, &head, mnt_hash)
++              if (mnt->mnt_ex_mountpoint.mnt)
++                      mntget(mnt->mnt_ex_mountpoint.mnt);
++
+       up_write(&namespace_sem);
+ 
+       synchronize_rcu();
+@@ -1268,6 +1273,7 @@ void umount_tree(struct mount *mnt, int
+                       p->mnt.mnt_flags |= MNT_SYNC_UMOUNT;
+               if (mnt_has_parent(p)) {
+                       put_mountpoint(p->mnt_mp);
++                      mnt_add_count(p->mnt_parent, -1);
+                       /* move the reference to mountpoint into ->mnt_ex_mountpoint */
+                       p->mnt_ex_mountpoint.dentry = p->mnt_mountpoint;
+                       p->mnt_ex_mountpoint.mnt = &p->mnt_parent->mnt;
diff --git a/queue-3.14/get-rid-of-propagate_umount-mistakenly-treating-slaves-as-busy.patch b/queue-3.14/get-rid-of-propagate_umount-mistakenly-treating-slaves-as-busy.patch

new file mode 100644 (file)

index 0000000..d8f28da
--- /dev/null
+++ b/queue-3.14/get-rid-of-propagate_umount-mistakenly-treating-slaves-as-busy.patch
@@ -0,0 +1,51 @@
+From 88b368f27a094277143d8ecd5a056116f6a41520 Mon Sep 17 00:00:00 2001
+From: Al Viro <viro@zeniv.linux.org.uk>
+Date: Mon, 18 Aug 2014 15:09:26 -0400
+Subject: get rid of propagate_umount() mistakenly treating slaves as busy.
+
+From: Al Viro <viro@zeniv.linux.org.uk>
+
+commit 88b368f27a094277143d8ecd5a056116f6a41520 upstream.
+
+The check in __propagate_umount() ("has somebody explicitly mounted
+something on that slave?") is done *before* taking the already doomed
+victims out of the child lists.
+
+Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/namespace.c |    4 +++-
+ fs/pnode.c     |    1 +
+ 2 files changed, 4 insertions(+), 1 deletion(-)
+
+--- a/fs/namespace.c
++++ b/fs/namespace.c
+@@ -1253,6 +1253,9 @@ void umount_tree(struct mount *mnt, int
+               hlist_add_head(&p->mnt_hash, &tmp_list);
+       }
+ 
++      hlist_for_each_entry(p, &tmp_list, mnt_hash)
++              list_del_init(&p->mnt_child);
++
+       if (how)
+               propagate_umount(&tmp_list);
+ 
+@@ -1263,7 +1266,6 @@ void umount_tree(struct mount *mnt, int
+               p->mnt_ns = NULL;
+               if (how < 2)
+                       p->mnt.mnt_flags |= MNT_SYNC_UMOUNT;
+-              list_del_init(&p->mnt_child);
+               if (mnt_has_parent(p)) {
+                       put_mountpoint(p->mnt_mp);
+                       /* move the reference to mountpoint into ->mnt_ex_mountpoint */
+--- a/fs/pnode.c
++++ b/fs/pnode.c
+@@ -381,6 +381,7 @@ static void __propagate_umount(struct mo
+                * other children
+                */
+               if (child && list_empty(&child->mnt_mounts)) {
++                      list_del_init(&child->mnt_child);
+                       hlist_del_init_rcu(&child->mnt_hash);
+                       hlist_add_before_rcu(&child->mnt_hash, &mnt->mnt_hash);
+               }
diff --git a/queue-3.14/mnt-add-tests-for-unprivileged-remount-cases-that-have-found-to-be-faulty.patch b/queue-3.14/mnt-add-tests-for-unprivileged-remount-cases-that-have-found-to-be-faulty.patch

new file mode 100644 (file)

index 0000000..315c839
--- /dev/null
+++ b/queue-3.14/mnt-add-tests-for-unprivileged-remount-cases-that-have-found-to-be-faulty.patch
@@ -0,0 +1,309 @@
+From db181ce011e3c033328608299cd6fac06ea50130 Mon Sep 17 00:00:00 2001
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+Date: Tue, 29 Jul 2014 15:50:44 -0700
+Subject: mnt: Add tests for unprivileged remount cases that have found to be faulty
+
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+
+commit db181ce011e3c033328608299cd6fac06ea50130 upstream.
+
+Kenton Varda <kenton@sandstorm.io> discovered that by remounting a
+read-only bind mount read-only in a user namespace the
+MNT_LOCK_READONLY bit would be cleared, allowing an unprivileged user
+to the remount a read-only mount read-write.
+
+Upon review of the code in remount it was discovered that the code allowed
+nosuid, noexec, and nodev to be cleared.  It was also discovered that
+the code was allowing the per mount atime flags to be changed.
+
+The first naive patch to fix these issues contained the flaw that using
+default atime settings when remounting a filesystem could be disallowed.
+
+To avoid this problems in the future add tests to ensure unprivileged
+remounts are succeeding and failing at the appropriate times.
+
+Acked-by: Serge E. Hallyn <serge.hallyn@ubuntu.com>
+Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ tools/testing/selftests/Makefile                          |    1 
+ tools/testing/selftests/mount/Makefile                    |   17 
+ tools/testing/selftests/mount/unprivileged-remount-test.c |  242 ++++++++++++++
+ 3 files changed, 260 insertions(+)
+
+--- a/tools/testing/selftests/Makefile
++++ b/tools/testing/selftests/Makefile
+@@ -4,6 +4,7 @@ TARGETS += efivarfs
+ TARGETS += kcmp
+ TARGETS += memory-hotplug
+ TARGETS += mqueue
++TARGETS += mount
+ TARGETS += net
+ TARGETS += ptrace
+ TARGETS += timers
+--- /dev/null
++++ b/tools/testing/selftests/mount/Makefile
+@@ -0,0 +1,17 @@
++# Makefile for mount selftests.
++
++all: unprivileged-remount-test
++
++unprivileged-remount-test: unprivileged-remount-test.c
++      gcc -Wall -O2 unprivileged-remount-test.c -o unprivileged-remount-test
++
++# Allow specific tests to be selected.
++test_unprivileged_remount: unprivileged-remount-test
++      @if [ -f /proc/self/uid_map ] ; then ./unprivileged-remount-test ; fi
++
++run_tests: all test_unprivileged_remount
++
++clean:
++      rm -f unprivileged-remount-test
++
++.PHONY: all test_unprivileged_remount
+--- /dev/null
++++ b/tools/testing/selftests/mount/unprivileged-remount-test.c
+@@ -0,0 +1,242 @@
++#define _GNU_SOURCE
++#include <sched.h>
++#include <stdio.h>
++#include <errno.h>
++#include <string.h>
++#include <sys/types.h>
++#include <sys/mount.h>
++#include <sys/wait.h>
++#include <stdlib.h>
++#include <unistd.h>
++#include <fcntl.h>
++#include <grp.h>
++#include <stdbool.h>
++#include <stdarg.h>
++
++#ifndef CLONE_NEWNS
++# define CLONE_NEWNS 0x00020000
++#endif
++#ifndef CLONE_NEWUTS
++# define CLONE_NEWUTS 0x04000000
++#endif
++#ifndef CLONE_NEWIPC
++# define CLONE_NEWIPC 0x08000000
++#endif
++#ifndef CLONE_NEWNET
++# define CLONE_NEWNET 0x40000000
++#endif
++#ifndef CLONE_NEWUSER
++# define CLONE_NEWUSER 0x10000000
++#endif
++#ifndef CLONE_NEWPID
++# define CLONE_NEWPID 0x20000000
++#endif
++
++#ifndef MS_RELATIME
++#define MS_RELATIME (1 << 21)
++#endif
++#ifndef MS_STRICTATIME
++#define MS_STRICTATIME (1 << 24)
++#endif
++
++static void die(char *fmt, ...)
++{
++      va_list ap;
++      va_start(ap, fmt);
++      vfprintf(stderr, fmt, ap);
++      va_end(ap);
++      exit(EXIT_FAILURE);
++}
++
++static void write_file(char *filename, char *fmt, ...)
++{
++      char buf[4096];
++      int fd;
++      ssize_t written;
++      int buf_len;
++      va_list ap;
++
++      va_start(ap, fmt);
++      buf_len = vsnprintf(buf, sizeof(buf), fmt, ap);
++      va_end(ap);
++      if (buf_len < 0) {
++              die("vsnprintf failed: %s\n",
++                  strerror(errno));
++      }
++      if (buf_len >= sizeof(buf)) {
++              die("vsnprintf output truncated\n");
++      }
++
++      fd = open(filename, O_WRONLY);
++      if (fd < 0) {
++              die("open of %s failed: %s\n",
++                  filename, strerror(errno));
++      }
++      written = write(fd, buf, buf_len);
++      if (written != buf_len) {
++              if (written >= 0) {
++                      die("short write to %s\n", filename);
++              } else {
++                      die("write to %s failed: %s\n",
++                              filename, strerror(errno));
++              }
++      }
++      if (close(fd) != 0) {
++              die("close of %s failed: %s\n",
++                      filename, strerror(errno));
++      }
++}
++
++static void create_and_enter_userns(void)
++{
++      uid_t uid;
++      gid_t gid;
++
++      uid = getuid();
++      gid = getgid();
++
++      if (unshare(CLONE_NEWUSER) !=0) {
++              die("unshare(CLONE_NEWUSER) failed: %s\n",
++                      strerror(errno));
++      }
++
++      write_file("/proc/self/uid_map", "0 %d 1", uid);
++      write_file("/proc/self/gid_map", "0 %d 1", gid);
++
++      if (setgroups(0, NULL) != 0) {
++              die("setgroups failed: %s\n",
++                      strerror(errno));
++      }
++      if (setgid(0) != 0) {
++              die ("setgid(0) failed %s\n",
++                      strerror(errno));
++      }
++      if (setuid(0) != 0) {
++              die("setuid(0) failed %s\n",
++                      strerror(errno));
++      }
++}
++
++static
++bool test_unpriv_remount(int mount_flags, int remount_flags, int invalid_flags)
++{
++      pid_t child;
++
++      child = fork();
++      if (child == -1) {
++              die("fork failed: %s\n",
++                      strerror(errno));
++      }
++      if (child != 0) { /* parent */
++              pid_t pid;
++              int status;
++              pid = waitpid(child, &status, 0);
++              if (pid == -1) {
++                      die("waitpid failed: %s\n",
++                              strerror(errno));
++              }
++              if (pid != child) {
++                      die("waited for %d got %d\n",
++                              child, pid);
++              }
++              if (!WIFEXITED(status)) {
++                      die("child did not terminate cleanly\n");
++              }
++              return WEXITSTATUS(status) == EXIT_SUCCESS ? true : false;
++      }
++
++      create_and_enter_userns();
++      if (unshare(CLONE_NEWNS) != 0) {
++              die("unshare(CLONE_NEWNS) failed: %s\n",
++                      strerror(errno));
++      }
++
++      if (mount("testing", "/tmp", "ramfs", mount_flags, NULL) != 0) {
++              die("mount of /tmp failed: %s\n",
++                      strerror(errno));
++      }
++
++      create_and_enter_userns();
++
++      if (unshare(CLONE_NEWNS) != 0) {
++              die("unshare(CLONE_NEWNS) failed: %s\n",
++                      strerror(errno));
++      }
++
++      if (mount("/tmp", "/tmp", "none",
++                MS_REMOUNT | MS_BIND | remount_flags, NULL) != 0) {
++              /* system("cat /proc/self/mounts"); */
++              die("remount of /tmp failed: %s\n",
++                  strerror(errno));
++      }
++
++      if (mount("/tmp", "/tmp", "none",
++                MS_REMOUNT | MS_BIND | invalid_flags, NULL) == 0) {
++              /* system("cat /proc/self/mounts"); */
++              die("remount of /tmp with invalid flags "
++                  "succeeded unexpectedly\n");
++      }
++      exit(EXIT_SUCCESS);
++}
++
++static bool test_unpriv_remount_simple(int mount_flags)
++{
++      return test_unpriv_remount(mount_flags, mount_flags, 0);
++}
++
++static bool test_unpriv_remount_atime(int mount_flags, int invalid_flags)
++{
++      return test_unpriv_remount(mount_flags, mount_flags, invalid_flags);
++}
++
++int main(int argc, char **argv)
++{
++      if (!test_unpriv_remount_simple(MS_RDONLY|MS_NODEV)) {
++              die("MS_RDONLY malfunctions\n");
++      }
++      if (!test_unpriv_remount_simple(MS_NODEV)) {
++              die("MS_NODEV malfunctions\n");
++      }
++      if (!test_unpriv_remount_simple(MS_NOSUID|MS_NODEV)) {
++              die("MS_NOSUID malfunctions\n");
++      }
++      if (!test_unpriv_remount_simple(MS_NOEXEC|MS_NODEV)) {
++              die("MS_NOEXEC malfunctions\n");
++      }
++      if (!test_unpriv_remount_atime(MS_RELATIME|MS_NODEV,
++                                     MS_NOATIME|MS_NODEV))
++      {
++              die("MS_RELATIME malfunctions\n");
++      }
++      if (!test_unpriv_remount_atime(MS_STRICTATIME|MS_NODEV,
++                                     MS_NOATIME|MS_NODEV))
++      {
++              die("MS_STRICTATIME malfunctions\n");
++      }
++      if (!test_unpriv_remount_atime(MS_NOATIME|MS_NODEV,
++                                     MS_STRICTATIME|MS_NODEV))
++      {
++              die("MS_RELATIME malfunctions\n");
++      }
++      if (!test_unpriv_remount_atime(MS_RELATIME|MS_NODIRATIME|MS_NODEV,
++                                     MS_NOATIME|MS_NODEV))
++      {
++              die("MS_RELATIME malfunctions\n");
++      }
++      if (!test_unpriv_remount_atime(MS_STRICTATIME|MS_NODIRATIME|MS_NODEV,
++                                     MS_NOATIME|MS_NODEV))
++      {
++              die("MS_RELATIME malfunctions\n");
++      }
++      if (!test_unpriv_remount_atime(MS_NOATIME|MS_NODIRATIME|MS_NODEV,
++                                     MS_STRICTATIME|MS_NODEV))
++      {
++              die("MS_RELATIME malfunctions\n");
++      }
++      if (!test_unpriv_remount(MS_STRICTATIME|MS_NODEV, MS_NODEV,
++                               MS_NOATIME|MS_NODEV))
++      {
++              die("Default atime malfunctions\n");
++      }
++      return EXIT_SUCCESS;
++}
diff --git a/queue-3.14/mnt-change-the-default-remount-atime-from-relatime-to-the-existing-value.patch b/queue-3.14/mnt-change-the-default-remount-atime-from-relatime-to-the-existing-value.patch

new file mode 100644 (file)

index 0000000..227e5a6
--- /dev/null
+++ b/queue-3.14/mnt-change-the-default-remount-atime-from-relatime-to-the-existing-value.patch
@@ -0,0 +1,57 @@
+From ffbc6f0ead47fa5a1dc9642b0331cb75c20a640e Mon Sep 17 00:00:00 2001
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+Date: Mon, 28 Jul 2014 17:36:04 -0700
+Subject: mnt: Change the default remount atime from relatime to the existing value
+
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+
+commit ffbc6f0ead47fa5a1dc9642b0331cb75c20a640e upstream.
+
+Since March 2009 the kernel has treated the state that if no
+MS_..ATIME flags are passed then the kernel defaults to relatime.
+
+Defaulting to relatime instead of the existing atime state during a
+remount is silly, and causes problems in practice for people who don't
+specify any MS_...ATIME flags and to get the default filesystem atime
+setting.  Those users may encounter a permission error because the
+default atime setting does not work.
+
+A default that does not work and causes permission problems is
+ridiculous, so preserve the existing value to have a default
+atime setting that is always guaranteed to work.
+
+Using the default atime setting in this way is particularly
+interesting for applications built to run in restricted userspace
+environments without /proc mounted, as the existing atime mount
+options of a filesystem can not be read from /proc/mounts.
+
+In practice this fixes user space that uses the default atime
+setting on remount that are broken by the permission checks
+keeping less privileged users from changing more privileged users
+atime settings.
+
+Acked-by: Serge E. Hallyn <serge.hallyn@ubuntu.com>
+Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/namespace.c |    8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+--- a/fs/namespace.c
++++ b/fs/namespace.c
+@@ -2464,6 +2464,14 @@ long do_mount(const char *dev_name, cons
+       if (flags & MS_RDONLY)
+               mnt_flags |= MNT_READONLY;
+ 
++      /* The default atime for remount is preservation */
++      if ((flags & MS_REMOUNT) &&
++          ((flags & (MS_NOATIME | MS_NODIRATIME | MS_RELATIME |
++                     MS_STRICTATIME)) == 0)) {
++              mnt_flags &= ~MNT_ATIME_MASK;
++              mnt_flags |= path.mnt->mnt_flags & MNT_ATIME_MASK;
++      }
++
+       flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE | MS_BORN |
+                  MS_NOATIME | MS_NODIRATIME | MS_RELATIME| MS_KERNMOUNT |
+                  MS_STRICTATIME);
diff --git a/queue-3.14/mnt-correct-permission-checks-in-do_remount.patch b/queue-3.14/mnt-correct-permission-checks-in-do_remount.patch

new file mode 100644 (file)

index 0000000..17aabd2
--- /dev/null
+++ b/queue-3.14/mnt-correct-permission-checks-in-do_remount.patch
@@ -0,0 +1,130 @@
+From 9566d6742852c527bf5af38af5cbb878dad75705 Mon Sep 17 00:00:00 2001
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+Date: Mon, 28 Jul 2014 17:26:07 -0700
+Subject: mnt: Correct permission checks in do_remount
+
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+
+commit 9566d6742852c527bf5af38af5cbb878dad75705 upstream.
+
+While invesgiating the issue where in "mount --bind -oremount,ro ..."
+would result in later "mount --bind -oremount,rw" succeeding even if
+the mount started off locked I realized that there are several
+additional mount flags that should be locked and are not.
+
+In particular MNT_NOSUID, MNT_NODEV, MNT_NOEXEC, and the atime
+flags in addition to MNT_READONLY should all be locked.  These
+flags are all per superblock, can all be changed with MS_BIND,
+and should not be changable if set by a more privileged user.
+
+The following additions to the current logic are added in this patch.
+- nosuid may not be clearable by a less privileged user.
+- nodev  may not be clearable by a less privielged user.
+- noexec may not be clearable by a less privileged user.
+- atime flags may not be changeable by a less privileged user.
+
+The logic with atime is that always setting atime on access is a
+global policy and backup software and auditing software could break if
+atime bits are not updated (when they are configured to be updated),
+and serious performance degradation could result (DOS attack) if atime
+updates happen when they have been explicitly disabled.  Therefore an
+unprivileged user should not be able to mess with the atime bits set
+by a more privileged user.
+
+The additional restrictions are implemented with the addition of
+MNT_LOCK_NOSUID, MNT_LOCK_NODEV, MNT_LOCK_NOEXEC, and MNT_LOCK_ATIME
+mnt flags.
+
+Taken together these changes and the fixes for MNT_LOCK_READONLY
+should make it safe for an unprivileged user to create a user
+namespace and to call "mount --bind -o remount,... ..." without
+the danger of mount flags being changed maliciously.
+
+Acked-by: Serge E. Hallyn <serge.hallyn@ubuntu.com>
+Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/namespace.c        |   36 +++++++++++++++++++++++++++++++++---
+ include/linux/mount.h |    5 +++++
+ 2 files changed, 38 insertions(+), 3 deletions(-)
+
+--- a/fs/namespace.c
++++ b/fs/namespace.c
+@@ -887,8 +887,21 @@ static struct mount *clone_mnt(struct mo
+ 
+       mnt->mnt.mnt_flags = old->mnt.mnt_flags & ~(MNT_WRITE_HOLD|MNT_MARKED);
+       /* Don't allow unprivileged users to change mount flags */
+-      if ((flag & CL_UNPRIVILEGED) && (mnt->mnt.mnt_flags & MNT_READONLY))
+-              mnt->mnt.mnt_flags |= MNT_LOCK_READONLY;
++      if (flag & CL_UNPRIVILEGED) {
++              mnt->mnt.mnt_flags |= MNT_LOCK_ATIME;
++
++              if (mnt->mnt.mnt_flags & MNT_READONLY)
++                      mnt->mnt.mnt_flags |= MNT_LOCK_READONLY;
++
++              if (mnt->mnt.mnt_flags & MNT_NODEV)
++                      mnt->mnt.mnt_flags |= MNT_LOCK_NODEV;
++
++              if (mnt->mnt.mnt_flags & MNT_NOSUID)
++                      mnt->mnt.mnt_flags |= MNT_LOCK_NOSUID;
++
++              if (mnt->mnt.mnt_flags & MNT_NOEXEC)
++                      mnt->mnt.mnt_flags |= MNT_LOCK_NOEXEC;
++      }
+ 
+       /* Don't allow unprivileged users to reveal what is under a mount */
+       if ((flag & CL_UNPRIVILEGED) && list_empty(&old->mnt_expire))
+@@ -1922,6 +1935,23 @@ static int do_remount(struct path *path,
+           !(mnt_flags & MNT_READONLY)) {
+               return -EPERM;
+       }
++      if ((mnt->mnt.mnt_flags & MNT_LOCK_NODEV) &&
++          !(mnt_flags & MNT_NODEV)) {
++              return -EPERM;
++      }
++      if ((mnt->mnt.mnt_flags & MNT_LOCK_NOSUID) &&
++          !(mnt_flags & MNT_NOSUID)) {
++              return -EPERM;
++      }
++      if ((mnt->mnt.mnt_flags & MNT_LOCK_NOEXEC) &&
++          !(mnt_flags & MNT_NOEXEC)) {
++              return -EPERM;
++      }
++      if ((mnt->mnt.mnt_flags & MNT_LOCK_ATIME) &&
++          ((mnt->mnt.mnt_flags & MNT_ATIME_MASK) != (mnt_flags & MNT_ATIME_MASK))) {
++              return -EPERM;
++      }
++
+       err = security_sb_remount(sb, data);
+       if (err)
+               return err;
+@@ -2120,7 +2150,7 @@ static int do_new_mount(struct path *pat
+                */
+               if (!(type->fs_flags & FS_USERNS_DEV_MOUNT)) {
+                       flags |= MS_NODEV;
+-                      mnt_flags |= MNT_NODEV;
++                      mnt_flags |= MNT_NODEV | MNT_LOCK_NODEV;
+               }
+       }
+ 
+--- a/include/linux/mount.h
++++ b/include/linux/mount.h
+@@ -45,12 +45,17 @@ struct mnt_namespace;
+ #define MNT_USER_SETTABLE_MASK  (MNT_NOSUID | MNT_NODEV | MNT_NOEXEC \
+                                | MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME \
+                                | MNT_READONLY)
++#define MNT_ATIME_MASK (MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME )
+ 
+ #define MNT_INTERNAL_FLAGS (MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL | \
+                           MNT_DOOMED | MNT_SYNC_UMOUNT | MNT_MARKED)
+ 
+ #define MNT_INTERNAL  0x4000
+ 
++#define MNT_LOCK_ATIME                0x040000
++#define MNT_LOCK_NOEXEC               0x080000
++#define MNT_LOCK_NOSUID               0x100000
++#define MNT_LOCK_NODEV                0x200000
+ #define MNT_LOCK_READONLY     0x400000
+ #define MNT_LOCKED            0x800000
+ #define MNT_DOOMED            0x1000000
diff --git a/queue-3.14/mnt-move-the-test-for-mnt_lock_readonly-from-change_mount_flags-into-do_remount.patch b/queue-3.14/mnt-move-the-test-for-mnt_lock_readonly-from-change_mount_flags-into-do_remount.patch

new file mode 100644 (file)

index 0000000..f0e8c92
--- /dev/null
+++ b/queue-3.14/mnt-move-the-test-for-mnt_lock_readonly-from-change_mount_flags-into-do_remount.patch
@@ -0,0 +1,54 @@
+From 07b645589dcda8b7a5249e096fece2a67556f0f4 Mon Sep 17 00:00:00 2001
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+Date: Mon, 28 Jul 2014 17:10:56 -0700
+Subject: mnt: Move the test for MNT_LOCK_READONLY from change_mount_flags into do_remount
+
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+
+commit 07b645589dcda8b7a5249e096fece2a67556f0f4 upstream.
+
+There are no races as locked mount flags are guaranteed to never change.
+
+Moving the test into do_remount makes it more visible, and ensures all
+filesystem remounts pass the MNT_LOCK_READONLY permission check.  This
+second case is not an issue today as filesystem remounts are guarded
+by capable(CAP_DAC_ADMIN) and thus will always fail in less privileged
+mount namespaces, but it could become an issue in the future.
+
+Acked-by: Serge E. Hallyn <serge.hallyn@ubuntu.com>
+Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/namespace.c |   13 ++++++++++---
+ 1 file changed, 10 insertions(+), 3 deletions(-)
+
+--- a/fs/namespace.c
++++ b/fs/namespace.c
+@@ -1887,9 +1887,6 @@ static int change_mount_flags(struct vfs
+       if (readonly_request == __mnt_is_readonly(mnt))
+               return 0;
+ 
+-      if (mnt->mnt_flags & MNT_LOCK_READONLY)
+-              return -EPERM;
+-
+       if (readonly_request)
+               error = mnt_make_readonly(real_mount(mnt));
+       else
+@@ -1915,6 +1912,16 @@ static int do_remount(struct path *path,
+       if (path->dentry != path->mnt->mnt_root)
+               return -EINVAL;
+ 
++      /* Don't allow changing of locked mnt flags.
++       *
++       * No locks need to be held here while testing the various
++       * MNT_LOCK flags because those flags can never be cleared
++       * once they are set.
++       */
++      if ((mnt->mnt.mnt_flags & MNT_LOCK_READONLY) &&
++          !(mnt_flags & MNT_READONLY)) {
++              return -EPERM;
++      }
+       err = security_sb_remount(sb, data);
+       if (err)
+               return err;
diff --git a/queue-3.14/mnt-only-change-user-settable-mount-flags-in-remount.patch b/queue-3.14/mnt-only-change-user-settable-mount-flags-in-remount.patch

new file mode 100644 (file)

index 0000000..df8e18d
--- /dev/null
+++ b/queue-3.14/mnt-only-change-user-settable-mount-flags-in-remount.patch
@@ -0,0 +1,53 @@
+From a6138db815df5ee542d848318e5dae681590fccd Mon Sep 17 00:00:00 2001
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+Date: Mon, 28 Jul 2014 16:26:53 -0700
+Subject: mnt: Only change user settable mount flags in remount
+
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+
+commit a6138db815df5ee542d848318e5dae681590fccd upstream.
+
+Kenton Varda <kenton@sandstorm.io> discovered that by remounting a
+read-only bind mount read-only in a user namespace the
+MNT_LOCK_READONLY bit would be cleared, allowing an unprivileged user
+to the remount a read-only mount read-write.
+
+Correct this by replacing the mask of mount flags to preserve
+with a mask of mount flags that may be changed, and preserve
+all others.   This ensures that any future bugs with this mask and
+remount will fail in an easy to detect way where new mount flags
+simply won't change.
+
+Acked-by: Serge E. Hallyn <serge.hallyn@ubuntu.com>
+Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/namespace.c        |    2 +-
+ include/linux/mount.h |    4 +++-
+ 2 files changed, 4 insertions(+), 2 deletions(-)
+
+--- a/fs/namespace.c
++++ b/fs/namespace.c
+@@ -1928,7 +1928,7 @@ static int do_remount(struct path *path,
+               err = do_remount_sb(sb, flags, data, 0);
+       if (!err) {
+               lock_mount_hash();
+-              mnt_flags |= mnt->mnt.mnt_flags & MNT_PROPAGATION_MASK;
++              mnt_flags |= mnt->mnt.mnt_flags & ~MNT_USER_SETTABLE_MASK;
+               mnt->mnt.mnt_flags = mnt_flags;
+               touch_mnt_namespace(mnt->mnt_ns);
+               unlock_mount_hash();
+--- a/include/linux/mount.h
++++ b/include/linux/mount.h
+@@ -42,7 +42,9 @@ struct mnt_namespace;
+  * flag, consider how it interacts with shared mounts.
+  */
+ #define MNT_SHARED_MASK       (MNT_UNBINDABLE)
+-#define MNT_PROPAGATION_MASK  (MNT_SHARED | MNT_UNBINDABLE)
++#define MNT_USER_SETTABLE_MASK  (MNT_NOSUID | MNT_NODEV | MNT_NOEXEC \
++                               | MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME \
++                               | MNT_READONLY)
+ 
+ #define MNT_INTERNAL_FLAGS (MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL | \
+                           MNT_DOOMED | MNT_SYNC_UMOUNT | MNT_MARKED)
diff --git a/queue-3.14/ring-buffer-always-reset-iterator-to-reader-page.patch b/queue-3.14/ring-buffer-always-reset-iterator-to-reader-page.patch

new file mode 100644 (file)

index 0000000..94c9f1a
--- /dev/null
+++ b/queue-3.14/ring-buffer-always-reset-iterator-to-reader-page.patch
@@ -0,0 +1,131 @@
+From 651e22f2701b4113989237c3048d17337dd2185c Mon Sep 17 00:00:00 2001
+From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
+Date: Wed, 6 Aug 2014 14:11:33 -0400
+Subject: ring-buffer: Always reset iterator to reader page
+
+From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
+
+commit 651e22f2701b4113989237c3048d17337dd2185c upstream.
+
+When performing a consuming read, the ring buffer swaps out a
+page from the ring buffer with a empty page and this page that
+was swapped out becomes the new reader page. The reader page
+is owned by the reader and since it was swapped out of the ring
+buffer, writers do not have access to it (there's an exception
+to that rule, but it's out of scope for this commit).
+
+When reading the "trace" file, it is a non consuming read, which
+means that the data in the ring buffer will not be modified.
+When the trace file is opened, a ring buffer iterator is allocated
+and writes to the ring buffer are disabled, such that the iterator
+will not have issues iterating over the data.
+
+Although the ring buffer disabled writes, it does not disable other
+reads, or even consuming reads. If a consuming read happens, then
+the iterator is reset and starts reading from the beginning again.
+
+My tests would sometimes trigger this bug on my i386 box:
+
+WARNING: CPU: 0 PID: 5175 at kernel/trace/trace.c:1527 __trace_find_cmdline+0x66/0xaa()
+Modules linked in:
+CPU: 0 PID: 5175 Comm: grep Not tainted 3.16.0-rc3-test+ #8
+Hardware name:                  /DG965MQ, BIOS MQ96510J.86A.0372.2006.0605.1717 06/05/2006
+ 00000000 00000000 f09c9e1c c18796b3 c1b5d74c f09c9e4c c103a0e3 c1b5154b
+ f09c9e78 00001437 c1b5d74c 000005f7 c10bd85a c10bd85a c1cac57c f09c9eb0
+ ed0e0000 f09c9e64 c103a185 00000009 f09c9e5c c1b5154b f09c9e78 f09c9e80^M
+Call Trace:
+ [<c18796b3>] dump_stack+0x4b/0x75
+ [<c103a0e3>] warn_slowpath_common+0x7e/0x95
+ [<c10bd85a>] ? __trace_find_cmdline+0x66/0xaa
+ [<c10bd85a>] ? __trace_find_cmdline+0x66/0xaa
+ [<c103a185>] warn_slowpath_fmt+0x33/0x35
+ [<c10bd85a>] __trace_find_cmdline+0x66/0xaa^M
+ [<c10bed04>] trace_find_cmdline+0x40/0x64
+ [<c10c3c16>] trace_print_context+0x27/0xec
+ [<c10c4360>] ? trace_seq_printf+0x37/0x5b
+ [<c10c0b15>] print_trace_line+0x319/0x39b
+ [<c10ba3fb>] ? ring_buffer_read+0x47/0x50
+ [<c10c13b1>] s_show+0x192/0x1ab
+ [<c10bfd9a>] ? s_next+0x5a/0x7c
+ [<c112e76e>] seq_read+0x267/0x34c
+ [<c1115a25>] vfs_read+0x8c/0xef
+ [<c112e507>] ? seq_lseek+0x154/0x154
+ [<c1115ba2>] SyS_read+0x54/0x7f
+ [<c188488e>] syscall_call+0x7/0xb
+---[ end trace 3f507febd6b4cc83 ]---
+>>>> ##### CPU 1 buffer started ####
+
+Which was the __trace_find_cmdline() function complaining about the pid
+in the event record being negative.
+
+After adding more test cases, this would trigger more often. Strangely
+enough, it would never trigger on a single test, but instead would trigger
+only when running all the tests. I believe that was the case because it
+required one of the tests to be shutting down via delayed instances while
+a new test started up.
+
+After spending several days debugging this, I found that it was caused by
+the iterator becoming corrupted. Debugging further, I found out why
+the iterator became corrupted. It happened with the rb_iter_reset().
+
+As consuming reads may not read the full reader page, and only part
+of it, there's a "read" field to know where the last read took place.
+The iterator, must also start at the read position. In the rb_iter_reset()
+code, if the reader page was disconnected from the ring buffer, the iterator
+would start at the head page within the ring buffer (where writes still
+happen). But the mistake there was that it still used the "read" field
+to start the iterator on the head page, where it should always start
+at zero because readers never read from within the ring buffer where
+writes occur.
+
+I originally wrote a patch to have it set the iter->head to 0 instead
+of iter->head_page->read, but then I questioned why it wasn't always
+setting the iter to point to the reader page, as the reader page is
+still valid.  The list_empty(reader_page->list) just means that it was
+successful in swapping out. But the reader_page may still have data.
+
+There was a bug report a long time ago that was not reproducible that
+had something about trace_pipe (consuming read) not matching trace
+(iterator read). This may explain why that happened.
+
+Anyway, the correct answer to this bug is to always use the reader page
+an not reset the iterator to inside the writable ring buffer.
+
+Fixes: d769041f8653 "ring_buffer: implement new locking"
+Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/trace/ring_buffer.c |   17 ++++++-----------
+ 1 file changed, 6 insertions(+), 11 deletions(-)
+
+--- a/kernel/trace/ring_buffer.c
++++ b/kernel/trace/ring_buffer.c
+@@ -3354,21 +3354,16 @@ static void rb_iter_reset(struct ring_bu
+       struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
+ 
+       /* Iterator usage is expected to have record disabled */
+-      if (list_empty(&cpu_buffer->reader_page->list)) {
+-              iter->head_page = rb_set_head_page(cpu_buffer);
+-              if (unlikely(!iter->head_page))
+-                      return;
+-              iter->head = iter->head_page->read;
+-      } else {
+-              iter->head_page = cpu_buffer->reader_page;
+-              iter->head = cpu_buffer->reader_page->read;
+-      }
++      iter->head_page = cpu_buffer->reader_page;
++      iter->head = cpu_buffer->reader_page->read;
++
++      iter->cache_reader_page = iter->head_page;
++      iter->cache_read = iter->head;
++
+       if (iter->head)
+               iter->read_stamp = cpu_buffer->read_stamp;
+       else
+               iter->read_stamp = iter->head_page->page->time_stamp;
+-      iter->cache_reader_page = cpu_buffer->reader_page;
+-      iter->cache_read = cpu_buffer->read;
+ }
+ 
+ /**
diff --git a/queue-3.14/ring-buffer-up-rb_iter_peek-loop-count-to-3.patch b/queue-3.14/ring-buffer-up-rb_iter_peek-loop-count-to-3.patch

new file mode 100644 (file)

index 0000000..f68de6c
--- /dev/null
+++ b/queue-3.14/ring-buffer-up-rb_iter_peek-loop-count-to-3.patch
@@ -0,0 +1,100 @@
+From 021de3d904b88b1771a3a2cfc5b75023c391e646 Mon Sep 17 00:00:00 2001
+From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
+Date: Wed, 6 Aug 2014 15:36:31 -0400
+Subject: ring-buffer: Up rb_iter_peek() loop count to 3
+
+From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
+
+commit 021de3d904b88b1771a3a2cfc5b75023c391e646 upstream.
+
+After writting a test to try to trigger the bug that caused the
+ring buffer iterator to become corrupted, I hit another bug:
+
+ WARNING: CPU: 1 PID: 5281 at kernel/trace/ring_buffer.c:3766 rb_iter_peek+0x113/0x238()
+ Modules linked in: ipt_MASQUERADE sunrpc [...]
+ CPU: 1 PID: 5281 Comm: grep Tainted: G        W     3.16.0-rc3-test+ #143
+ Hardware name: To Be Filled By O.E.M. To Be Filled By O.E.M./To be filled by O.E.M., BIOS SDBLI944.86P 05/08/2007
+  0000000000000000 ffffffff81809a80 ffffffff81503fb0 0000000000000000
+  ffffffff81040ca1 ffff8800796d6010 ffffffff810c138d ffff8800796d6010
+  ffff880077438c80 ffff8800796d6010 ffff88007abbe600 0000000000000003
+ Call Trace:
+  [<ffffffff81503fb0>] ? dump_stack+0x4a/0x75
+  [<ffffffff81040ca1>] ? warn_slowpath_common+0x7e/0x97
+  [<ffffffff810c138d>] ? rb_iter_peek+0x113/0x238
+  [<ffffffff810c138d>] ? rb_iter_peek+0x113/0x238
+  [<ffffffff810c14df>] ? ring_buffer_iter_peek+0x2d/0x5c
+  [<ffffffff810c6f73>] ? tracing_iter_reset+0x6e/0x96
+  [<ffffffff810c74a3>] ? s_start+0xd7/0x17b
+  [<ffffffff8112b13e>] ? kmem_cache_alloc_trace+0xda/0xea
+  [<ffffffff8114cf94>] ? seq_read+0x148/0x361
+  [<ffffffff81132d98>] ? vfs_read+0x93/0xf1
+  [<ffffffff81132f1b>] ? SyS_read+0x60/0x8e
+  [<ffffffff8150bf9f>] ? tracesys+0xdd/0xe2
+
+Debugging this bug, which triggers when the rb_iter_peek() loops too
+many times (more than 2 times), I discovered there's a case that can
+cause that function to legitimately loop 3 times!
+
+rb_iter_peek() is different than rb_buffer_peek() as the rb_buffer_peek()
+only deals with the reader page (it's for consuming reads). The
+rb_iter_peek() is for traversing the buffer without consuming it, and as
+such, it can loop for one more reason. That is, if we hit the end of
+the reader page or any page, it will go to the next page and try again.
+
+That is, we have this:
+
+ 1. iter->head > iter->head_page->page->commit
+    (rb_inc_iter() which moves the iter to the next page)
+    try again
+
+ 2. event = rb_iter_head_event()
+    event->type_len == RINGBUF_TYPE_TIME_EXTEND
+    rb_advance_iter()
+    try again
+
+ 3. read the event.
+
+But we never get to 3, because the count is greater than 2 and we
+cause the WARNING and return NULL.
+
+Up the counter to 3.
+
+Fixes: 69d1b839f7ee "ring-buffer: Bind time extend and data events together"
+Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/trace/ring_buffer.c |   14 ++++++++------
+ 1 file changed, 8 insertions(+), 6 deletions(-)
+
+--- a/kernel/trace/ring_buffer.c
++++ b/kernel/trace/ring_buffer.c
+@@ -1981,7 +1981,7 @@ rb_add_time_stamp(struct ring_buffer_eve
+ 
+ /**
+  * rb_update_event - update event type and data
+- * @event: the even to update
++ * @event: the event to update
+  * @type: the type of event
+  * @length: the size of the event field in the ring buffer
+  *
+@@ -3756,12 +3756,14 @@ rb_iter_peek(struct ring_buffer_iter *it
+               return NULL;
+ 
+       /*
+-       * We repeat when a time extend is encountered.
+-       * Since the time extend is always attached to a data event,
+-       * we should never loop more than once.
+-       * (We never hit the following condition more than twice).
++       * We repeat when a time extend is encountered or we hit
++       * the end of the page. Since the time extend is always attached
++       * to a data event, we should never loop more than three times.
++       * Once for going to next page, once on time extend, and
++       * finally once to get the event.
++       * (We never hit the following condition more than thrice).
+        */
+-      if (RB_WARN_ON(cpu_buffer, ++nr_loops > 2))
++      if (RB_WARN_ON(cpu_buffer, ++nr_loops > 3))
+               return NULL;
+ 
+       if (rb_per_cpu_empty(cpu_buffer))
diff --git a/queue-3.14/series b/queue-3.14/series

index fceb3534550ea409ae7cde512bbb403482d20d9f..4bad1d5ef8d1558ef34bddf95beecf09271d6f93 100644 (file)
--- a/queue-3.14/series
+++ b/queue-3.14/series
@@ -67,3 +67,13 @@ spi-pxa2xx-add-acpi-id-for-intel-braswell.patch
  acpi-run-fixed-event-device-notifications-in-process-context.patch
  acpi-scan-not-cache-_sun-value-in-struct-acpi_device_pnp.patch
  acpi-cpuidle-fix-deadlock-between-cpuidle_lock-and-cpu_hotplug.lock.patch
+xen-events-fifo-reset-control-block-and-local-heads-on-resume.patch
+ring-buffer-always-reset-iterator-to-reader-page.patch
+ring-buffer-up-rb_iter_peek-loop-count-to-3.patch
+mnt-only-change-user-settable-mount-flags-in-remount.patch
+mnt-move-the-test-for-mnt_lock_readonly-from-change_mount_flags-into-do_remount.patch
+mnt-correct-permission-checks-in-do_remount.patch
+mnt-change-the-default-remount-atime-from-relatime-to-the-existing-value.patch
+mnt-add-tests-for-unprivileged-remount-cases-that-have-found-to-be-faulty.patch
+get-rid-of-propagate_umount-mistakenly-treating-slaves-as-busy.patch
+fix-ebusy-on-umount-from-mnt_shrinkable.patch
diff --git a/queue-3.14/xen-events-fifo-reset-control-block-and-local-heads-on-resume.patch b/queue-3.14/xen-events-fifo-reset-control-block-and-local-heads-on-resume.patch

new file mode 100644 (file)

index 0000000..2900476
--- /dev/null
+++ b/queue-3.14/xen-events-fifo-reset-control-block-and-local-heads-on-resume.patch
@@ -0,0 +1,129 @@
+From c12784c3d14a2110468ec4d1383f60cfd2665576 Mon Sep 17 00:00:00 2001
+From: David Vrabel <david.vrabel@citrix.com>
+Date: Thu, 31 Jul 2014 16:22:24 +0100
+Subject: xen/events/fifo: reset control block and local HEADs on resume
+
+From: David Vrabel <david.vrabel@citrix.com>
+
+commit c12784c3d14a2110468ec4d1383f60cfd2665576 upstream.
+
+When using the FIFO-based event channel ABI, if the control block or
+the local HEADs are not reset after resuming the guest may see stale
+HEAD values and will fail to traverse the FIFO correctly.
+
+This may prevent one or more VCPUs from receiving any events following
+a resume.
+
+Signed-off-by: David Vrabel <david.vrabel@citrix.com>
+Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/xen/events/events_fifo.c |   48 ++++++++++++++++++++++-----------------
+ 1 file changed, 28 insertions(+), 20 deletions(-)
+
+--- a/drivers/xen/events/events_fifo.c
++++ b/drivers/xen/events/events_fifo.c
+@@ -99,6 +99,25 @@ static unsigned evtchn_fifo_nr_channels(
+       return event_array_pages * EVENT_WORDS_PER_PAGE;
+ }
+ 
++static int init_control_block(int cpu,
++                              struct evtchn_fifo_control_block *control_block)
++{
++      struct evtchn_fifo_queue *q = &per_cpu(cpu_queue, cpu);
++      struct evtchn_init_control init_control;
++      unsigned int i;
++
++      /* Reset the control block and the local HEADs. */
++      clear_page(control_block);
++      for (i = 0; i < EVTCHN_FIFO_MAX_QUEUES; i++)
++              q->head[i] = 0;
++
++      init_control.control_gfn = virt_to_mfn(control_block);
++      init_control.offset      = 0;
++      init_control.vcpu        = cpu;
++
++      return HYPERVISOR_event_channel_op(EVTCHNOP_init_control, &init_control);
++}
++
+ static void free_unused_array_pages(void)
+ {
+       unsigned i;
+@@ -327,7 +346,6 @@ static void evtchn_fifo_resume(void)
+ 
+       for_each_possible_cpu(cpu) {
+               void *control_block = per_cpu(cpu_control_block, cpu);
+-              struct evtchn_init_control init_control;
+               int ret;
+ 
+               if (!control_block)
+@@ -344,12 +362,7 @@ static void evtchn_fifo_resume(void)
+                       continue;
+               }
+ 
+-              init_control.control_gfn = virt_to_mfn(control_block);
+-              init_control.offset = 0;
+-              init_control.vcpu = cpu;
+-
+-              ret = HYPERVISOR_event_channel_op(EVTCHNOP_init_control,
+-                                                &init_control);
++              ret = init_control_block(cpu, control_block);
+               if (ret < 0)
+                       BUG();
+       }
+@@ -377,30 +390,25 @@ static const struct evtchn_ops evtchn_op
+       .resume            = evtchn_fifo_resume,
+ };
+ 
+-static int evtchn_fifo_init_control_block(unsigned cpu)
++static int evtchn_fifo_alloc_control_block(unsigned cpu)
+ {
+-      struct page *control_block = NULL;
+-      struct evtchn_init_control init_control;
++      void *control_block = NULL;
+       int ret = -ENOMEM;
+ 
+-      control_block = alloc_page(GFP_KERNEL|__GFP_ZERO);
++      control_block = (void *)__get_free_page(GFP_KERNEL);
+       if (control_block == NULL)
+               goto error;
+ 
+-      init_control.control_gfn = virt_to_mfn(page_address(control_block));
+-      init_control.offset      = 0;
+-      init_control.vcpu        = cpu;
+-
+-      ret = HYPERVISOR_event_channel_op(EVTCHNOP_init_control, &init_control);
++      ret = init_control_block(cpu, control_block);
+       if (ret < 0)
+               goto error;
+ 
+-      per_cpu(cpu_control_block, cpu) = page_address(control_block);
++      per_cpu(cpu_control_block, cpu) = control_block;
+ 
+       return 0;
+ 
+   error:
+-      __free_page(control_block);
++      free_page((unsigned long)control_block);
+       return ret;
+ }
+ 
+@@ -414,7 +422,7 @@ static int evtchn_fifo_cpu_notification(
+       switch (action) {
+       case CPU_UP_PREPARE:
+               if (!per_cpu(cpu_control_block, cpu))
+-                      ret = evtchn_fifo_init_control_block(cpu);
++                      ret = evtchn_fifo_alloc_control_block(cpu);
+               break;
+       default:
+               break;
+@@ -431,7 +439,7 @@ int __init xen_evtchn_fifo_init(void)
+       int cpu = get_cpu();
+       int ret;
+ 
+-      ret = evtchn_fifo_init_control_block(cpu);
++      ret = evtchn_fifo_alloc_control_block(cpu);
+       if (ret < 0)
+               goto out;
+
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Sat, 13 Sep 2014 01:31:51 +0000 (18:31 -0700)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Sat, 13 Sep 2014 01:31:51 +0000 (18:31 -0700)
queue-3.14/fix-ebusy-on-umount-from-mnt_shrinkable.patch	[new file with mode: 0644]	patch \| blob
queue-3.14/get-rid-of-propagate_umount-mistakenly-treating-slaves-as-busy.patch	[new file with mode: 0644]	patch \| blob
queue-3.14/mnt-add-tests-for-unprivileged-remount-cases-that-have-found-to-be-faulty.patch	[new file with mode: 0644]	patch \| blob
queue-3.14/mnt-change-the-default-remount-atime-from-relatime-to-the-existing-value.patch	[new file with mode: 0644]	patch \| blob
queue-3.14/mnt-correct-permission-checks-in-do_remount.patch	[new file with mode: 0644]	patch \| blob
queue-3.14/mnt-move-the-test-for-mnt_lock_readonly-from-change_mount_flags-into-do_remount.patch	[new file with mode: 0644]	patch \| blob
queue-3.14/mnt-only-change-user-settable-mount-flags-in-remount.patch	[new file with mode: 0644]	patch \| blob
queue-3.14/ring-buffer-always-reset-iterator-to-reader-page.patch	[new file with mode: 0644]	patch \| blob
queue-3.14/ring-buffer-up-rb_iter_peek-loop-count-to-3.patch	[new file with mode: 0644]	patch \| blob
queue-3.14/series		patch \| blob \| blame \| history
queue-3.14/xen-events-fifo-reset-control-block-and-local-heads-on-resume.patch	[new file with mode: 0644]	patch \| blob