From cae0d5cf4e7d585ca774a44871ce5e07a7b3f018 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Fri, 12 Feb 2016 13:00:34 -0800
Subject: [PATCH] 4.3-stable patches

added patches:
	fat-fix-fake_offset-handling-on-error-path.patch
	fs-seqfile-always-allow-oom-killer.patch
	kernel-signal.c-unexport-sigsuspend.patch
	lib-hexdump.c-truncate-output-in-case-of-overflow.patch
	memcg-fix-thresholds-for-32b-architectures.patch
	mm-hugetlb-call-huge_pte_alloc-only-if-ptep-is-null.patch
	mm-hugetlb-fix-hugepage-memory-leak-caused-by-wrong-reserve-count.patch
	mm-hugetlb.c-fix-resv-map-memory-leak-for-placeholder-entries.patch
	mm-hugetlbfs-fix-bugs-in-fallocate-hole-punch-of-areas-with-holes.patch
	mm-oom_kill.c-reverse-the-order-of-setting-tif_memdie-and-sending-sigkill.patch
	mm-slab-only-move-management-objects-off-slab-for-sizes-larger-than-kmalloc_min_size.patch
	mm-vmstat-allow-wq-concurrency-to-discover-memory-reclaim-doesn-t-make-any-progress.patch
	ocfs2-dlm-clear-refmap-bit-of-recovery-lock-while-doing-local-recovery-cleanup.patch
	ocfs2-dlm-ignore-cleaning-the-migration-mle-that-is-inuse.patch
	ocfs2-fix-bug-when-calculate-new-backup-super.patch
	ocfs2-fix-sgid-not-inherited-issue.patch
	proc-actually-make-proc_fd_permission-thread-friendly.patch
	proc-fix-esrch-error-when-writing-to-proc-pid-coredump_filter.patch
	remoteproc-avoid-stack-overflow-in-debugfs-file.patch
	sh64-fix-__nr_fgetxattr.patch
---
 ...x-fake_offset-handling-on-error-path.patch |  80 +++++++++
 .../fs-seqfile-always-allow-oom-killer.patch  |  64 +++++++
 .../kernel-signal.c-unexport-sigsuspend.patch |  64 +++++++
 ...-truncate-output-in-case-of-overflow.patch |  51 ++++++
 ...fix-thresholds-for-32b-architectures.patch | 105 +++++++++++
 ...-huge_pte_alloc-only-if-ptep-is-null.patch |  61 +++++++
 ...y-leak-caused-by-wrong-reserve-count.patch |  59 +++++++
 ...-memory-leak-for-placeholder-entries.patch |  92 ++++++++++
 ...ocate-hole-punch-of-areas-with-holes.patch | 167 ++++++++++++++++++
 ...tting-tif_memdie-and-sending-sigkill.patch |  77 ++++++++
 ...r-sizes-larger-than-kmalloc_min_size.patch |  87 +++++++++
 ...ry-reclaim-doesn-t-make-any-progress.patch | 122 +++++++++++++
 ...k-while-doing-local-recovery-cleanup.patch |  38 ++++
 ...ning-the-migration-mle-that-is-inuse.patch |  97 ++++++++++
 ...-bug-when-calculate-new-backup-super.patch |  98 ++++++++++
 .../ocfs2-fix-sgid-not-inherited-issue.patch  |  44 +++++
 ...e-proc_fd_permission-thread-friendly.patch |  53 ++++++
 ...-writing-to-proc-pid-coredump_filter.patch |  40 +++++
 ...avoid-stack-overflow-in-debugfs-file.patch |  40 +++++
 queue-4.3/series                              |  20 +++
 queue-4.3/sh64-fix-__nr_fgetxattr.patch       |  37 ++++
 21 files changed, 1496 insertions(+)
 create mode 100644 queue-4.3/fat-fix-fake_offset-handling-on-error-path.patch
 create mode 100644 queue-4.3/fs-seqfile-always-allow-oom-killer.patch
 create mode 100644 queue-4.3/kernel-signal.c-unexport-sigsuspend.patch
 create mode 100644 queue-4.3/lib-hexdump.c-truncate-output-in-case-of-overflow.patch
 create mode 100644 queue-4.3/memcg-fix-thresholds-for-32b-architectures.patch
 create mode 100644 queue-4.3/mm-hugetlb-call-huge_pte_alloc-only-if-ptep-is-null.patch
 create mode 100644 queue-4.3/mm-hugetlb-fix-hugepage-memory-leak-caused-by-wrong-reserve-count.patch
 create mode 100644 queue-4.3/mm-hugetlb.c-fix-resv-map-memory-leak-for-placeholder-entries.patch
 create mode 100644 queue-4.3/mm-hugetlbfs-fix-bugs-in-fallocate-hole-punch-of-areas-with-holes.patch
 create mode 100644 queue-4.3/mm-oom_kill.c-reverse-the-order-of-setting-tif_memdie-and-sending-sigkill.patch
 create mode 100644 queue-4.3/mm-slab-only-move-management-objects-off-slab-for-sizes-larger-than-kmalloc_min_size.patch
 create mode 100644 queue-4.3/mm-vmstat-allow-wq-concurrency-to-discover-memory-reclaim-doesn-t-make-any-progress.patch
 create mode 100644 queue-4.3/ocfs2-dlm-clear-refmap-bit-of-recovery-lock-while-doing-local-recovery-cleanup.patch
 create mode 100644 queue-4.3/ocfs2-dlm-ignore-cleaning-the-migration-mle-that-is-inuse.patch
 create mode 100644 queue-4.3/ocfs2-fix-bug-when-calculate-new-backup-super.patch
 create mode 100644 queue-4.3/ocfs2-fix-sgid-not-inherited-issue.patch
 create mode 100644 queue-4.3/proc-actually-make-proc_fd_permission-thread-friendly.patch
 create mode 100644 queue-4.3/proc-fix-esrch-error-when-writing-to-proc-pid-coredump_filter.patch
 create mode 100644 queue-4.3/remoteproc-avoid-stack-overflow-in-debugfs-file.patch
 create mode 100644 queue-4.3/sh64-fix-__nr_fgetxattr.patch

diff --git a/queue-4.3/fat-fix-fake_offset-handling-on-error-path.patch b/queue-4.3/fat-fix-fake_offset-handling-on-error-path.patch
new file mode 100644
index 00000000000..155daff64de
--- /dev/null
+++ b/queue-4.3/fat-fix-fake_offset-handling-on-error-path.patch
@@ -0,0 +1,80 @@
+From 928a477102c4fc6739883415b66987207e3502f4 Mon Sep 17 00:00:00 2001
+From: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
+Date: Fri, 20 Nov 2015 15:57:15 -0800
+Subject: fat: fix fake_offset handling on error path
+
+From: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
+
+commit 928a477102c4fc6739883415b66987207e3502f4 upstream.
+
+For the root directory, .  and ..  are faked (using dir_emit_dots()) and
+ctx->pos is reset from 2 to 0.
+
+A corrupted root directory could cause fat_get_entry() to fail, but
+->iterate() (fat_readdir()) reports progress to the VFS (with ctx->pos
+rewound to 0), so any following calls to ->iterate() continue to return
+the same entries again and again.
+
+The result is that userspace will never see the end of the directory,
+causing e.g.  'ls' to hang in a getdents() loop.
+
+[hirofumi@mail.parknet.co.jp: cleanup and make sure to correct fake_offset]
+Reported-by: Vegard Nossum <vegard.nossum@oracle.com>
+Tested-by: Vegard Nossum <vegard.nossum@oracle.com>
+Signed-off-by: Richard Weinberger <richard.weinberger@gmail.com>
+Signed-off-by: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/fat/dir.c |   16 +++++++++++-----
+ 1 file changed, 11 insertions(+), 5 deletions(-)
+
+--- a/fs/fat/dir.c
++++ b/fs/fat/dir.c
+@@ -610,9 +610,9 @@ parse_record:
+ 		int status = fat_parse_long(inode, &cpos, &bh, &de,
+ 					    &unicode, &nr_slots);
+ 		if (status < 0) {
+-			ctx->pos = cpos;
++			bh = NULL;
+ 			ret = status;
+-			goto out;
++			goto end_of_dir;
+ 		} else if (status == PARSE_INVALID)
+ 			goto record_end;
+ 		else if (status == PARSE_NOT_LONGNAME)
+@@ -654,8 +654,9 @@ parse_record:
+ 	fill_len = short_len;
+ 
+ start_filldir:
+-	if (!fake_offset)
+-		ctx->pos = cpos - (nr_slots + 1) * sizeof(struct msdos_dir_entry);
++	ctx->pos = cpos - (nr_slots + 1) * sizeof(struct msdos_dir_entry);
++	if (fake_offset && ctx->pos < 2)
++		ctx->pos = 2;
+ 
+ 	if (!memcmp(de->name, MSDOS_DOT, MSDOS_NAME)) {
+ 		if (!dir_emit_dot(file, ctx))
+@@ -681,14 +682,19 @@ record_end:
+ 	fake_offset = 0;
+ 	ctx->pos = cpos;
+ 	goto get_new;
++
+ end_of_dir:
+-	ctx->pos = cpos;
++	if (fake_offset && cpos < 2)
++		ctx->pos = 2;
++	else
++		ctx->pos = cpos;
+ fill_failed:
+ 	brelse(bh);
+ 	if (unicode)
+ 		__putname(unicode);
+ out:
+ 	mutex_unlock(&sbi->s_lock);
++
+ 	return ret;
+ }
+ 
diff --git a/queue-4.3/fs-seqfile-always-allow-oom-killer.patch b/queue-4.3/fs-seqfile-always-allow-oom-killer.patch
new file mode 100644
index 00000000000..c4919c40532
--- /dev/null
+++ b/queue-4.3/fs-seqfile-always-allow-oom-killer.patch
@@ -0,0 +1,64 @@
+From 0f930902eb8806cff8dcaef9ff9faf3cfa5fd748 Mon Sep 17 00:00:00 2001
+From: Greg Thelen <gthelen@google.com>
+Date: Fri, 6 Nov 2015 16:32:42 -0800
+Subject: fs, seqfile: always allow oom killer
+
+From: Greg Thelen <gthelen@google.com>
+
+commit 0f930902eb8806cff8dcaef9ff9faf3cfa5fd748 upstream.
+
+Since 5cec38ac866b ("fs, seq_file: fallback to vmalloc instead of oom kill
+processes") seq_buf_alloc() avoids calling the oom killer for PAGE_SIZE or
+smaller allocations; but larger allocations can use the oom killer via
+vmalloc().  Thus reads of small files can return ENOMEM, but larger files
+use the oom killer to avoid ENOMEM.
+
+The effect of this bug is that reads from /proc and other virtual
+filesystems can return ENOMEM instead of the preferred behavior - oom
+killing something (possibly the calling process).  I don't know of anyone
+except Google who has noticed the issue.
+
+I suspect the fix is more needed in smaller systems where there isn't any
+reclaimable memory.  But these seem like the kinds of systems which
+probably don't use the oom killer for production situations.
+
+Memory overcommit requires use of the oom killer to select a victim
+regardless of file size.
+
+Enable oom killer for small seq_buf_alloc() allocations.
+
+Fixes: 5cec38ac866b ("fs, seq_file: fallback to vmalloc instead of oom kill processes")
+Signed-off-by: David Rientjes <rientjes@google.com>
+Signed-off-by: Greg Thelen <gthelen@google.com>
+Acked-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/seq_file.c |   11 ++++++++---
+ 1 file changed, 8 insertions(+), 3 deletions(-)
+
+--- a/fs/seq_file.c
++++ b/fs/seq_file.c
+@@ -25,12 +25,17 @@ static void seq_set_overflow(struct seq_
+ static void *seq_buf_alloc(unsigned long size)
+ {
+ 	void *buf;
++	gfp_t gfp = GFP_KERNEL;
+ 
+ 	/*
+-	 * __GFP_NORETRY to avoid oom-killings with high-order allocations -
+-	 * it's better to fall back to vmalloc() than to kill things.
++	 * For high order allocations, use __GFP_NORETRY to avoid oom-killing -
++	 * it's better to fall back to vmalloc() than to kill things.  For small
++	 * allocations, just use GFP_KERNEL which will oom kill, thus no need
++	 * for vmalloc fallback.
+ 	 */
+-	buf = kmalloc(size, GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN);
++	if (size > PAGE_SIZE)
++		gfp |= __GFP_NORETRY | __GFP_NOWARN;
++	buf = kmalloc(size, gfp);
+ 	if (!buf && size > PAGE_SIZE)
+ 		buf = vmalloc(size);
+ 	return buf;
diff --git a/queue-4.3/kernel-signal.c-unexport-sigsuspend.patch b/queue-4.3/kernel-signal.c-unexport-sigsuspend.patch
new file mode 100644
index 00000000000..82422de58b8
--- /dev/null
+++ b/queue-4.3/kernel-signal.c-unexport-sigsuspend.patch
@@ -0,0 +1,64 @@
+From 9d8a765211335cfdad464b90fb19f546af5706ae Mon Sep 17 00:00:00 2001
+From: Richard Weinberger <richard@nod.at>
+Date: Fri, 20 Nov 2015 15:57:21 -0800
+Subject: kernel/signal.c: unexport sigsuspend()
+
+From: Richard Weinberger <richard@nod.at>
+
+commit 9d8a765211335cfdad464b90fb19f546af5706ae upstream.
+
+sigsuspend() is nowhere used except in signal.c itself, so we can mark it
+static do not pollute the global namespace.
+
+But this patch is more than a boring cleanup patch, it fixes a real issue
+on UserModeLinux.  UML has a special console driver to display ttys using
+xterm, or other terminal emulators, on the host side.  Vegard reported
+that sometimes UML is unable to spawn a xterm and he's facing the
+following warning:
+
+  WARNING: CPU: 0 PID: 908 at include/linux/thread_info.h:128 sigsuspend+0xab/0xc0()
+
+It turned out that this warning makes absolutely no sense as the UML
+xterm code calls sigsuspend() on the host side, at least it tries.  But
+as the kernel itself offers a sigsuspend() symbol the linker choose this
+one instead of the glibc wrapper.  Interestingly this code used to work
+since ever but always blocked signals on the wrong side.  Some recent
+kernel change made the WARN_ON() trigger and uncovered the bug.
+
+It is a wonderful example of how much works by chance on computers. :-)
+
+Fixes: 68f3f16d9ad0f1 ("new helper: sigsuspend()")
+Signed-off-by: Richard Weinberger <richard@nod.at>
+Reported-by: Vegard Nossum <vegard.nossum@oracle.com>
+Tested-by: Vegard Nossum <vegard.nossum@oracle.com>
+Acked-by: Oleg Nesterov <oleg@redhat.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/linux/signal.h |    1 -
+ kernel/signal.c        |    2 +-
+ 2 files changed, 1 insertion(+), 2 deletions(-)
+
+--- a/include/linux/signal.h
++++ b/include/linux/signal.h
+@@ -239,7 +239,6 @@ extern int sigprocmask(int, sigset_t *,
+ extern void set_current_blocked(sigset_t *);
+ extern void __set_current_blocked(const sigset_t *);
+ extern int show_unhandled_signals;
+-extern int sigsuspend(sigset_t *);
+ 
+ struct sigaction {
+ #ifndef __ARCH_HAS_IRIX_SIGACTION
+--- a/kernel/signal.c
++++ b/kernel/signal.c
+@@ -3552,7 +3552,7 @@ SYSCALL_DEFINE0(pause)
+ 
+ #endif
+ 
+-int sigsuspend(sigset_t *set)
++static int sigsuspend(sigset_t *set)
+ {
+ 	current->saved_sigmask = current->blocked;
+ 	set_current_blocked(set);
diff --git a/queue-4.3/lib-hexdump.c-truncate-output-in-case-of-overflow.patch b/queue-4.3/lib-hexdump.c-truncate-output-in-case-of-overflow.patch
new file mode 100644
index 00000000000..6a09620add3
--- /dev/null
+++ b/queue-4.3/lib-hexdump.c-truncate-output-in-case-of-overflow.patch
@@ -0,0 +1,51 @@
+From 9f029f540c2f7e010e4922d44ba0dfd05da79f88 Mon Sep 17 00:00:00 2001
+From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+Date: Fri, 6 Nov 2015 16:31:31 -0800
+Subject: lib/hexdump.c: truncate output in case of overflow
+
+From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+
+commit 9f029f540c2f7e010e4922d44ba0dfd05da79f88 upstream.
+
+There is a classical off-by-one error in case when we try to place, for
+example, 1+1 bytes as hex in the buffer of size 6.  The expected result is
+to get an output truncated, but in the reality we get 6 bytes filed
+followed by terminating NUL.
+
+Change the logic how we fill the output in case of byte dumping into
+limited space.  This will follow the snprintf() behaviour by truncating
+output even on half bytes.
+
+Fixes: 114fc1afb2de (hexdump: make it return number of bytes placed in buffer)
+Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+Reported-by: Aaro Koskinen <aaro.koskinen@nokia.com>
+Tested-by: Aaro Koskinen <aaro.koskinen@nokia.com>
+Cc: Al Viro <viro@zeniv.linux.org.uk>
+Cc: Catalin Marinas <catalin.marinas@arm.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ lib/hexdump.c |    6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+--- a/lib/hexdump.c
++++ b/lib/hexdump.c
+@@ -169,11 +169,15 @@ int hex_dump_to_buffer(const void *buf,
+ 		}
+ 	} else {
+ 		for (j = 0; j < len; j++) {
+-			if (linebuflen < lx + 3)
++			if (linebuflen < lx + 2)
+ 				goto overflow2;
+ 			ch = ptr[j];
+ 			linebuf[lx++] = hex_asc_hi(ch);
++			if (linebuflen < lx + 2)
++				goto overflow2;
+ 			linebuf[lx++] = hex_asc_lo(ch);
++			if (linebuflen < lx + 2)
++				goto overflow2;
+ 			linebuf[lx++] = ' ';
+ 		}
+ 		if (j)
diff --git a/queue-4.3/memcg-fix-thresholds-for-32b-architectures.patch b/queue-4.3/memcg-fix-thresholds-for-32b-architectures.patch
new file mode 100644
index 00000000000..4e05dc06479
--- /dev/null
+++ b/queue-4.3/memcg-fix-thresholds-for-32b-architectures.patch
@@ -0,0 +1,105 @@
+From c12176d3368b9b36ae484d323d41e94be26f9b65 Mon Sep 17 00:00:00 2001
+From: Michal Hocko <mhocko@suse.com>
+Date: Thu, 5 Nov 2015 18:50:29 -0800
+Subject: memcg: fix thresholds for 32b architectures.
+
+From: Michal Hocko <mhocko@suse.com>
+
+commit c12176d3368b9b36ae484d323d41e94be26f9b65 upstream.
+
+Commit 424cdc141380 ("memcg: convert threshold to bytes") has fixed a
+regression introduced by 3e32cb2e0a12 ("mm: memcontrol: lockless page
+counters") where thresholds were silently converted to use page units
+rather than bytes when interpreting the user input.
+
+The fix is not complete, though, as properly pointed out by Ben Hutchings
+during stable backport review.  The page count is converted to bytes but
+unsigned long is used to hold the value which would be obviously not
+sufficient for 32b systems with more than 4G thresholds.  The same applies
+to usage as taken from mem_cgroup_usage which might overflow.
+
+Let's remove this bytes vs.  pages internal tracking differences and
+handle thresholds in page units internally.  Chage mem_cgroup_usage() to
+return the value in page units and revert 424cdc141380 because this should
+be sufficient for the consistent handling.  mem_cgroup_read_u64 as the
+only users of mem_cgroup_usage outside of the threshold handling code is
+converted to give the proper in bytes result.  It is doing that already
+for page_counter output so this is more consistent as well.
+
+The value presented to the userspace is still in bytes units.
+
+Fixes: 424cdc141380 ("memcg: convert threshold to bytes")
+Fixes: 3e32cb2e0a12 ("mm: memcontrol: lockless page counters")
+Signed-off-by: Michal Hocko <mhocko@suse.com>
+Reported-by: Ben Hutchings <ben@decadent.org.uk>
+Reviewed-by: Vladimir Davydov <vdavydov@virtuozzo.com>
+Acked-by: Johannes Weiner <hannes@cmpxchg.org>
+From: Michal Hocko <mhocko@kernel.org>
+Subject: memcg: fix thresholds for 32b architectures.
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+Cc: Ben Hutchings <ben@decadent.org.uk>
+Cc: Vladimir Davydov <vdavydov@virtuozzo.com>
+Cc: Johannes Weiner <hannes@cmpxchg.org>
+From: Andrew Morton <akpm@linux-foundation.org>
+Subject: memcg: fix thresholds for 32b architectures.
+
+don't attempt to inline mem_cgroup_usage()
+
+The compiler ignores the inline anwyay.  And __always_inlining it adds 600
+bytes of goop to the .o file.
+
+Cc: Ben Hutchings <ben@decadent.org.uk>
+Cc: Johannes Weiner <hannes@cmpxchg.org>
+Cc: Michal Hocko <mhocko@kernel.org>
+Cc: Vladimir Davydov <vdavydov@virtuozzo.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+
+---
+ mm/memcontrol.c |   11 +++++------
+ 1 file changed, 5 insertions(+), 6 deletions(-)
+
+--- a/mm/memcontrol.c
++++ b/mm/memcontrol.c
+@@ -2836,9 +2836,9 @@ static unsigned long tree_stat(struct me
+ 	return val;
+ }
+ 
+-static inline u64 mem_cgroup_usage(struct mem_cgroup *memcg, bool swap)
++static inline unsigned long mem_cgroup_usage(struct mem_cgroup *memcg, bool swap)
+ {
+-	u64 val;
++	unsigned long val;
+ 
+ 	if (mem_cgroup_is_root(memcg)) {
+ 		val = tree_stat(memcg, MEM_CGROUP_STAT_CACHE);
+@@ -2851,7 +2851,7 @@ static inline u64 mem_cgroup_usage(struc
+ 		else
+ 			val = page_counter_read(&memcg->memsw);
+ 	}
+-	return val << PAGE_SHIFT;
++	return val;
+ }
+ 
+ enum {
+@@ -2885,9 +2885,9 @@ static u64 mem_cgroup_read_u64(struct cg
+ 	switch (MEMFILE_ATTR(cft->private)) {
+ 	case RES_USAGE:
+ 		if (counter == &memcg->memory)
+-			return mem_cgroup_usage(memcg, false);
++			return (u64)mem_cgroup_usage(memcg, false) * PAGE_SIZE;
+ 		if (counter == &memcg->memsw)
+-			return mem_cgroup_usage(memcg, true);
++			return (u64)mem_cgroup_usage(memcg, true) * PAGE_SIZE;
+ 		return (u64)page_counter_read(counter) * PAGE_SIZE;
+ 	case RES_LIMIT:
+ 		return (u64)counter->limit * PAGE_SIZE;
+@@ -3387,7 +3387,6 @@ static int __mem_cgroup_usage_register_e
+ 	ret = page_counter_memparse(args, "-1", &threshold);
+ 	if (ret)
+ 		return ret;
+-	threshold <<= PAGE_SHIFT;
+ 
+ 	mutex_lock(&memcg->thresholds_lock);
+ 
diff --git a/queue-4.3/mm-hugetlb-call-huge_pte_alloc-only-if-ptep-is-null.patch b/queue-4.3/mm-hugetlb-call-huge_pte_alloc-only-if-ptep-is-null.patch
new file mode 100644
index 00000000000..79a23e5533d
--- /dev/null
+++ b/queue-4.3/mm-hugetlb-call-huge_pte_alloc-only-if-ptep-is-null.patch
@@ -0,0 +1,61 @@
+From 0d777df5d8953293be090d9ab5a355db893e8357 Mon Sep 17 00:00:00 2001
+From: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
+Date: Fri, 11 Dec 2015 13:40:49 -0800
+Subject: mm: hugetlb: call huge_pte_alloc() only if ptep is null
+
+From: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
+
+commit 0d777df5d8953293be090d9ab5a355db893e8357 upstream.
+
+Currently at the beginning of hugetlb_fault(), we call huge_pte_offset()
+and check whether the obtained *ptep is a migration/hwpoison entry or
+not.  And if not, then we get to call huge_pte_alloc().  This is racy
+because the *ptep could turn into migration/hwpoison entry after the
+huge_pte_offset() check.  This race results in BUG_ON in
+huge_pte_alloc().
+
+We don't have to call huge_pte_alloc() when the huge_pte_offset()
+returns non-NULL, so let's fix this bug with moving the code into else
+block.
+
+Note that the *ptep could turn into a migration/hwpoison entry after
+this block, but that's not a problem because we have another
+!pte_present check later (we never go into hugetlb_no_page() in that
+case.)
+
+Fixes: 290408d4a250 ("hugetlb: hugepage migration core")
+Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
+Acked-by: Hillf Danton <hillf.zj@alibaba-inc.com>
+Acked-by: David Rientjes <rientjes@google.com>
+Cc: Hugh Dickins <hughd@google.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Mel Gorman <mgorman@suse.de>
+Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
+Cc: Mike Kravetz <mike.kravetz@oracle.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/hugetlb.c |    8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+--- a/mm/hugetlb.c
++++ b/mm/hugetlb.c
+@@ -3590,12 +3590,12 @@ int hugetlb_fault(struct mm_struct *mm,
+ 		} else if (unlikely(is_hugetlb_entry_hwpoisoned(entry)))
+ 			return VM_FAULT_HWPOISON_LARGE |
+ 				VM_FAULT_SET_HINDEX(hstate_index(h));
++	} else {
++		ptep = huge_pte_alloc(mm, address, huge_page_size(h));
++		if (!ptep)
++			return VM_FAULT_OOM;
+ 	}
+ 
+-	ptep = huge_pte_alloc(mm, address, huge_page_size(h));
+-	if (!ptep)
+-		return VM_FAULT_OOM;
+-
+ 	mapping = vma->vm_file->f_mapping;
+ 	idx = vma_hugecache_offset(h, vma, address);
+ 
diff --git a/queue-4.3/mm-hugetlb-fix-hugepage-memory-leak-caused-by-wrong-reserve-count.patch b/queue-4.3/mm-hugetlb-fix-hugepage-memory-leak-caused-by-wrong-reserve-count.patch
new file mode 100644
index 00000000000..fbab19957bb
--- /dev/null
+++ b/queue-4.3/mm-hugetlb-fix-hugepage-memory-leak-caused-by-wrong-reserve-count.patch
@@ -0,0 +1,59 @@
+From a88c769548047b21f76fd71e04b6a3300ff17160 Mon Sep 17 00:00:00 2001
+From: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
+Date: Fri, 11 Dec 2015 13:40:24 -0800
+Subject: mm: hugetlb: fix hugepage memory leak caused by wrong reserve count
+
+From: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
+
+commit a88c769548047b21f76fd71e04b6a3300ff17160 upstream.
+
+When dequeue_huge_page_vma() in alloc_huge_page() fails, we fall back on
+alloc_buddy_huge_page() to directly create a hugepage from the buddy
+allocator.
+
+In that case, however, if alloc_buddy_huge_page() succeeds we don't
+decrement h->resv_huge_pages, which means that successful
+hugetlb_fault() returns without releasing the reserve count.  As a
+result, subsequent hugetlb_fault() might fail despite that there are
+still free hugepages.
+
+This patch simply adds decrementing code on that code path.
+
+I reproduced this problem when testing v4.3 kernel in the following situation:
+ - the test machine/VM is a NUMA system,
+ - hugepage overcommiting is enabled,
+ - most of hugepages are allocated and there's only one free hugepage
+   which is on node 0 (for example),
+ - another program, which calls set_mempolicy(MPOL_BIND) to bind itself to
+   node 1, tries to allocate a hugepage,
+ - the allocation should fail but the reserve count is still hold.
+
+Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
+Cc: David Rientjes <rientjes@google.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Mel Gorman <mgorman@suse.de>
+Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
+Cc: Hillf Danton <hillf.zj@alibaba-inc.com>
+Cc: Mike Kravetz <mike.kravetz@oracle.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/hugetlb.c |    5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+--- a/mm/hugetlb.c
++++ b/mm/hugetlb.c
+@@ -1790,7 +1790,10 @@ struct page *alloc_huge_page(struct vm_a
+ 		page = alloc_buddy_huge_page(h, NUMA_NO_NODE);
+ 		if (!page)
+ 			goto out_uncharge_cgroup;
+-
++		if (!avoid_reserve && vma_has_reserves(vma, gbl_chg)) {
++			SetPagePrivate(page);
++			h->resv_huge_pages--;
++		}
+ 		spin_lock(&hugetlb_lock);
+ 		list_move(&page->lru, &h->hugepage_activelist);
+ 		/* Fall through */
diff --git a/queue-4.3/mm-hugetlb.c-fix-resv-map-memory-leak-for-placeholder-entries.patch b/queue-4.3/mm-hugetlb.c-fix-resv-map-memory-leak-for-placeholder-entries.patch
new file mode 100644
index 00000000000..e4a9a089b10
--- /dev/null
+++ b/queue-4.3/mm-hugetlb.c-fix-resv-map-memory-leak-for-placeholder-entries.patch
@@ -0,0 +1,92 @@
+From dbe409e4f5e5075bd9ff7f8dd5c627abf3ee38c1 Mon Sep 17 00:00:00 2001
+From: Mike Kravetz <mike.kravetz@oracle.com>
+Date: Fri, 11 Dec 2015 13:40:52 -0800
+Subject: mm/hugetlb.c: fix resv map memory leak for placeholder entries
+
+From: Mike Kravetz <mike.kravetz@oracle.com>
+
+commit dbe409e4f5e5075bd9ff7f8dd5c627abf3ee38c1 upstream.
+
+Dmitry Vyukov reported the following memory leak
+
+unreferenced object 0xffff88002eaafd88 (size 32):
+  comm "a.out", pid 5063, jiffies 4295774645 (age 15.810s)
+  hex dump (first 32 bytes):
+    28 e9 4e 63 00 88 ff ff 28 e9 4e 63 00 88 ff ff  (.Nc....(.Nc....
+    00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
+  backtrace:
+     kmalloc include/linux/slab.h:458
+     region_chg+0x2d4/0x6b0 mm/hugetlb.c:398
+     __vma_reservation_common+0x2c3/0x390 mm/hugetlb.c:1791
+     vma_needs_reservation mm/hugetlb.c:1813
+     alloc_huge_page+0x19e/0xc70 mm/hugetlb.c:1845
+     hugetlb_no_page mm/hugetlb.c:3543
+     hugetlb_fault+0x7a1/0x1250 mm/hugetlb.c:3717
+     follow_hugetlb_page+0x339/0xc70 mm/hugetlb.c:3880
+     __get_user_pages+0x542/0xf30 mm/gup.c:497
+     populate_vma_page_range+0xde/0x110 mm/gup.c:919
+     __mm_populate+0x1c7/0x310 mm/gup.c:969
+     do_mlock+0x291/0x360 mm/mlock.c:637
+     SYSC_mlock2 mm/mlock.c:658
+     SyS_mlock2+0x4b/0x70 mm/mlock.c:648
+
+Dmitry identified a potential memory leak in the routine region_chg,
+where a region descriptor is not free'ed on an error path.
+
+However, the root cause for the above memory leak resides in region_del.
+In this specific case, a "placeholder" entry is created in region_chg.
+The associated page allocation fails, and the placeholder entry is left
+in the reserve map.  This is "by design" as the entry should be deleted
+when the map is released.  The bug is in the region_del routine which is
+used to delete entries within a specific range (and when the map is
+released).  region_del did not handle the case where a placeholder entry
+exactly matched the start of the range range to be deleted.  In this
+case, the entry would not be deleted and leaked.  The fix is to take
+these special placeholder entries into account in region_del.
+
+The region_chg error path leak is also fixed.
+
+Fixes: feba16e25a57 ("mm/hugetlb: add region_del() to delete a specific range of entries")
+Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com>
+Reported-by: Dmitry Vyukov <dvyukov@google.com>
+Acked-by: Hillf Danton <hillf.zj@alibaba-inc.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/hugetlb.c |   14 ++++++++++++--
+ 1 file changed, 12 insertions(+), 2 deletions(-)
+
+--- a/mm/hugetlb.c
++++ b/mm/hugetlb.c
+@@ -372,8 +372,10 @@ retry_locked:
+ 		spin_unlock(&resv->lock);
+ 
+ 		trg = kmalloc(sizeof(*trg), GFP_KERNEL);
+-		if (!trg)
++		if (!trg) {
++			kfree(nrg);
+ 			return -ENOMEM;
++		}
+ 
+ 		spin_lock(&resv->lock);
+ 		list_add(&trg->link, &resv->region_cache);
+@@ -483,8 +485,16 @@ static long region_del(struct resv_map *
+ retry:
+ 	spin_lock(&resv->lock);
+ 	list_for_each_entry_safe(rg, trg, head, link) {
+-		if (rg->to <= f)
++		/*
++		 * Skip regions before the range to be deleted.  file_region
++		 * ranges are normally of the form [from, to).  However, there
++		 * may be a "placeholder" entry in the map which is of the form
++		 * (from, to) with from == to.  Check for placeholder entries
++		 * at the beginning of the range to be deleted.
++		 */
++		if (rg->to <= f && (rg->to != rg->from || rg->to != f))
+ 			continue;
++
+ 		if (rg->from >= t)
+ 			break;
+ 
diff --git a/queue-4.3/mm-hugetlbfs-fix-bugs-in-fallocate-hole-punch-of-areas-with-holes.patch b/queue-4.3/mm-hugetlbfs-fix-bugs-in-fallocate-hole-punch-of-areas-with-holes.patch
new file mode 100644
index 00000000000..f6bcd4031f8
--- /dev/null
+++ b/queue-4.3/mm-hugetlbfs-fix-bugs-in-fallocate-hole-punch-of-areas-with-holes.patch
@@ -0,0 +1,167 @@
+From 1817889e3b2cc1db8abb595712095129ff9156c1 Mon Sep 17 00:00:00 2001
+From: Mike Kravetz <mike.kravetz@oracle.com>
+Date: Fri, 20 Nov 2015 15:57:13 -0800
+Subject: mm/hugetlbfs: fix bugs in fallocate hole punch of areas with holes
+
+From: Mike Kravetz <mike.kravetz@oracle.com>
+
+commit 1817889e3b2cc1db8abb595712095129ff9156c1 upstream.
+
+Hugh Dickins pointed out problems with the new hugetlbfs fallocate hole
+punch code.  These problems are in the routine remove_inode_hugepages and
+mostly occur in the case where there are holes in the range of pages to be
+removed.  These holes could be the result of a previous hole punch or
+simply sparse allocation.  The current code could access pages outside the
+specified range.
+
+remove_inode_hugepages handles both hole punch and truncate operations.
+Page index handling was fixed/cleaned up so that the loop index always
+matches the page being processed.  The code now only makes a single pass
+through the range of pages as it was determined page faults could not race
+with truncate.  A cond_resched() was added after removing up to
+PAGEVEC_SIZE pages.
+
+Some totally unnecessary code in hugetlbfs_fallocate() that remained from
+early development was also removed.
+
+Tested with fallocate tests submitted here:
+http://librelist.com/browser//libhugetlbfs/2015/6/25/patch-tests-add-tests-for-fallocate-system-call/
+And, some ftruncate tests under development
+
+Fixes: b5cec28d36f5 ("hugetlbfs: truncate_hugepages() takes a range of pages")
+Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com>
+Acked-by: Hugh Dickins <hughd@google.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
+Cc: Davidlohr Bueso <dave@stgolabs.net>
+Cc: "Hillf Danton" <hillf.zj@alibaba-inc.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/hugetlbfs/inode.c |   65 +++++++++++++++++++++++++--------------------------
+ 1 file changed, 32 insertions(+), 33 deletions(-)
+
+--- a/fs/hugetlbfs/inode.c
++++ b/fs/hugetlbfs/inode.c
+@@ -332,12 +332,17 @@ static void remove_huge_page(struct page
+  * truncation is indicated by end of range being LLONG_MAX
+  *	In this case, we first scan the range and release found pages.
+  *	After releasing pages, hugetlb_unreserve_pages cleans up region/reserv
+- *	maps and global counts.
++ *	maps and global counts.  Page faults can not race with truncation
++ *	in this routine.  hugetlb_no_page() prevents page faults in the
++ *	truncated range.  It checks i_size before allocation, and again after
++ *	with the page table lock for the page held.  The same lock must be
++ *	acquired to unmap a page.
+  * hole punch is indicated if end is not LLONG_MAX
+  *	In the hole punch case we scan the range and release found pages.
+  *	Only when releasing a page is the associated region/reserv map
+  *	deleted.  The region/reserv map for ranges without associated
+- *	pages are not modified.
++ *	pages are not modified.  Page faults can race with hole punch.
++ *	This is indicated if we find a mapped page.
+  * Note: If the passed end of range value is beyond the end of file, but
+  * not LLONG_MAX this routine still performs a hole punch operation.
+  */
+@@ -361,46 +366,37 @@ static void remove_inode_hugepages(struc
+ 	next = start;
+ 	while (next < end) {
+ 		/*
+-		 * Make sure to never grab more pages that we
+-		 * might possibly need.
++		 * Don't grab more pages than the number left in the range.
+ 		 */
+ 		if (end - next < lookup_nr)
+ 			lookup_nr = end - next;
+ 
+ 		/*
+-		 * This pagevec_lookup() may return pages past 'end',
+-		 * so we must check for page->index > end.
++		 * When no more pages are found, we are done.
+ 		 */
+-		if (!pagevec_lookup(&pvec, mapping, next, lookup_nr)) {
+-			if (next == start)
+-				break;
+-			next = start;
+-			continue;
+-		}
++		if (!pagevec_lookup(&pvec, mapping, next, lookup_nr))
++			break;
+ 
+ 		for (i = 0; i < pagevec_count(&pvec); ++i) {
+ 			struct page *page = pvec.pages[i];
+ 			u32 hash;
+ 
++			/*
++			 * The page (index) could be beyond end.  This is
++			 * only possible in the punch hole case as end is
++			 * max page offset in the truncate case.
++			 */
++			next = page->index;
++			if (next >= end)
++				break;
++
+ 			hash = hugetlb_fault_mutex_hash(h, current->mm,
+ 							&pseudo_vma,
+ 							mapping, next, 0);
+ 			mutex_lock(&hugetlb_fault_mutex_table[hash]);
+ 
+ 			lock_page(page);
+-			if (page->index >= end) {
+-				unlock_page(page);
+-				mutex_unlock(&hugetlb_fault_mutex_table[hash]);
+-				next = end;	/* we are done */
+-				break;
+-			}
+-
+-			/*
+-			 * If page is mapped, it was faulted in after being
+-			 * unmapped.  Do nothing in this race case.  In the
+-			 * normal case page is not mapped.
+-			 */
+-			if (!page_mapped(page)) {
++			if (likely(!page_mapped(page))) {
+ 				bool rsv_on_error = !PagePrivate(page);
+ 				/*
+ 				 * We must free the huge page and remove
+@@ -421,17 +417,23 @@ static void remove_inode_hugepages(struc
+ 						hugetlb_fix_reserve_counts(
+ 							inode, rsv_on_error);
+ 				}
++			} else {
++				/*
++				 * If page is mapped, it was faulted in after
++				 * being unmapped.  It indicates a race between
++				 * hole punch and page fault.  Do nothing in
++				 * this case.  Getting here in a truncate
++				 * operation is a bug.
++				 */
++				BUG_ON(truncate_op);
+ 			}
+ 
+-			if (page->index > next)
+-				next = page->index;
+-
+-			++next;
+ 			unlock_page(page);
+-
+ 			mutex_unlock(&hugetlb_fault_mutex_table[hash]);
+ 		}
++		++next;
+ 		huge_pagevec_release(&pvec);
++		cond_resched();
+ 	}
+ 
+ 	if (truncate_op)
+@@ -647,9 +649,6 @@ static long hugetlbfs_fallocate(struct f
+ 	if (!(mode & FALLOC_FL_KEEP_SIZE) && offset + len > inode->i_size)
+ 		i_size_write(inode, offset + len);
+ 	inode->i_ctime = CURRENT_TIME;
+-	spin_lock(&inode->i_lock);
+-	inode->i_private = NULL;
+-	spin_unlock(&inode->i_lock);
+ out:
+ 	mutex_unlock(&inode->i_mutex);
+ 	return error;
diff --git a/queue-4.3/mm-oom_kill.c-reverse-the-order-of-setting-tif_memdie-and-sending-sigkill.patch b/queue-4.3/mm-oom_kill.c-reverse-the-order-of-setting-tif_memdie-and-sending-sigkill.patch
new file mode 100644
index 00000000000..bc6b68a49fe
--- /dev/null
+++ b/queue-4.3/mm-oom_kill.c-reverse-the-order-of-setting-tif_memdie-and-sending-sigkill.patch
@@ -0,0 +1,77 @@
+From 426fb5e72d92b868912e47a1e3ca2df6eabc3872 Mon Sep 17 00:00:00 2001
+From: Tetsuo Handa <penguin-kernel@i-love.sakura.ne.jp>
+Date: Thu, 5 Nov 2015 18:47:44 -0800
+Subject: mm/oom_kill.c: reverse the order of setting TIF_MEMDIE and sending SIGKILL
+
+From: Tetsuo Handa <penguin-kernel@i-love.sakura.ne.jp>
+
+commit 426fb5e72d92b868912e47a1e3ca2df6eabc3872 upstream.
+
+It was confirmed that a local unprivileged user can consume all memory
+reserves and hang up that system using time lag between the OOM killer
+sets TIF_MEMDIE on an OOM victim and sends SIGKILL to that victim, for
+printk() inside for_each_process() loop at oom_kill_process() can consume
+many seconds when there are many thread groups sharing the same memory.
+
+Before starting oom-depleter process:
+
+    Node 0 DMA: 3*4kB (UM) 6*8kB (U) 4*16kB (UEM) 0*32kB 0*64kB 1*128kB (M) 2*256kB (EM) 2*512kB (UE) 2*1024kB (EM) 1*2048kB (E) 1*4096kB (M) = 9980kB
+    Node 0 DMA32: 31*4kB (UEM) 27*8kB (UE) 32*16kB (UE) 13*32kB (UE) 14*64kB (UM) 7*128kB (UM) 8*256kB (UM) 8*512kB (UM) 3*1024kB (U) 4*2048kB (UM) 362*4096kB (UM) = 1503220kB
+
+As of invoking the OOM killer:
+
+    Node 0 DMA: 11*4kB (UE) 8*8kB (UEM) 6*16kB (UE) 2*32kB (EM) 0*64kB 1*128kB (U) 3*256kB (UEM) 2*512kB (UE) 3*1024kB (UEM) 1*2048kB (U) 0*4096kB = 7308kB
+    Node 0 DMA32: 1049*4kB (UEM) 507*8kB (UE) 151*16kB (UE) 53*32kB (UEM) 83*64kB (UEM) 52*128kB (EM) 25*256kB (UEM) 11*512kB (M) 6*1024kB (UM) 1*2048kB (M) 0*4096kB = 44556kB
+
+Between the thread group leader got TIF_MEMDIE and receives SIGKILL:
+
+    Node 0 DMA: 0*4kB 0*8kB 0*16kB 0*32kB 0*64kB 0*128kB 0*256kB 0*512kB 0*1024kB 0*2048kB 0*4096kB = 0kB
+    Node 0 DMA32: 0*4kB 0*8kB 0*16kB 0*32kB 0*64kB 0*128kB 0*256kB 0*512kB 0*1024kB 0*2048kB 0*4096kB = 0kB
+
+The oom-depleter's thread group leader which got TIF_MEMDIE started
+memset() in user space after the OOM killer set TIF_MEMDIE, and it was
+free to abuse ALLOC_NO_WATERMARKS by TIF_MEMDIE for memset() in user space
+until SIGKILL is delivered.  If SIGKILL is delivered before TIF_MEMDIE is
+set, the oom-depleter can terminate without touching memory reserves.
+
+Although the possibility of hitting this time lag is very small for 3.19
+and earlier kernels because TIF_MEMDIE is set immediately before sending
+SIGKILL, preemption or long interrupts (an extreme example is SysRq-t) can
+step between and allow memory allocations which are not needed for
+terminating the OOM victim.
+
+Fixes: 83363b917a29 ("oom: make sure that TIF_MEMDIE is set under task_lock")
+Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
+Acked-by: Michal Hocko <mhocko@suse.com>
+Cc: David Rientjes <rientjes@google.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/oom_kill.c |    7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+--- a/mm/oom_kill.c
++++ b/mm/oom_kill.c
+@@ -554,6 +554,12 @@ void oom_kill_process(struct oom_control
+ 
+ 	/* mm cannot safely be dereferenced after task_unlock(victim) */
+ 	mm = victim->mm;
++	/*
++	 * We should send SIGKILL before setting TIF_MEMDIE in order to prevent
++	 * the OOM victim from depleting the memory reserves from the user
++	 * space under its control.
++	 */
++	do_send_sig_info(SIGKILL, SEND_SIG_FORCED, victim, true);
+ 	mark_oom_victim(victim);
+ 	pr_err("Killed process %d (%s) total-vm:%lukB, anon-rss:%lukB, file-rss:%lukB\n",
+ 		task_pid_nr(victim), victim->comm, K(victim->mm->total_vm),
+@@ -585,7 +591,6 @@ void oom_kill_process(struct oom_control
+ 		}
+ 	rcu_read_unlock();
+ 
+-	do_send_sig_info(SIGKILL, SEND_SIG_FORCED, victim, true);
+ 	put_task_struct(victim);
+ }
+ #undef K
diff --git a/queue-4.3/mm-slab-only-move-management-objects-off-slab-for-sizes-larger-than-kmalloc_min_size.patch b/queue-4.3/mm-slab-only-move-management-objects-off-slab-for-sizes-larger-than-kmalloc_min_size.patch
new file mode 100644
index 00000000000..4c020025fa9
--- /dev/null
+++ b/queue-4.3/mm-slab-only-move-management-objects-off-slab-for-sizes-larger-than-kmalloc_min_size.patch
@@ -0,0 +1,87 @@
+From d4322d88f5fdf92729dd40f923013414fbb2184d Mon Sep 17 00:00:00 2001
+From: Catalin Marinas <catalin.marinas@arm.com>
+Date: Thu, 5 Nov 2015 18:45:54 -0800
+Subject: mm: slab: only move management objects off-slab for sizes larger than KMALLOC_MIN_SIZE
+
+From: Catalin Marinas <catalin.marinas@arm.com>
+
+commit d4322d88f5fdf92729dd40f923013414fbb2184d upstream.
+
+On systems with a KMALLOC_MIN_SIZE of 128 (arm64, some mips and powerpc
+configurations defining ARCH_DMA_MINALIGN to 128), the first
+kmalloc_caches[] entry to be initialised after slab_early_init = 0 is
+"kmalloc-128" with index 7.  Depending on the debug kernel configuration,
+sizeof(struct kmem_cache) can be larger than 128 resulting in an
+INDEX_NODE of 8.
+
+Commit 8fc9cf420b36 ("slab: make more slab management structure off the
+slab") enables off-slab management objects for sizes starting with
+PAGE_SIZE >> 5 (128 bytes for a 4KB page configuration) and the creation
+of the "kmalloc-128" cache would try to place the management objects
+off-slab.  However, since KMALLOC_MIN_SIZE is already 128 and
+freelist_size == 32 in __kmem_cache_create(), kmalloc_slab(freelist_size)
+returns NULL (kmalloc_caches[7] not populated yet).  This triggers the
+following bug on arm64:
+
+  kernel BUG at /work/Linux/linux-2.6-aarch64/mm/slab.c:2283!
+  Internal error: Oops - BUG: 0 [#1] SMP
+  Modules linked in:
+  CPU: 0 PID: 0 Comm: swapper Not tainted 4.3.0-rc4+ #540
+  Hardware name: Juno (DT)
+  PC is at __kmem_cache_create+0x21c/0x280
+  LR is at __kmem_cache_create+0x210/0x280
+  [...]
+  Call trace:
+    __kmem_cache_create+0x21c/0x280
+    create_boot_cache+0x48/0x80
+    create_kmalloc_cache+0x50/0x88
+    create_kmalloc_caches+0x4c/0xf4
+    kmem_cache_init+0x100/0x118
+    start_kernel+0x214/0x33c
+
+This patch introduces an OFF_SLAB_MIN_SIZE definition to avoid off-slab
+management objects for sizes equal to or smaller than KMALLOC_MIN_SIZE.
+
+Fixes: 8fc9cf420b36 ("slab: make more slab management structure off the slab")
+Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
+Reported-by: Geert Uytterhoeven <geert@linux-m68k.org>
+Acked-by: Christoph Lameter <cl@linux.com>
+Cc: Pekka Enberg <penberg@kernel.org>
+Cc: David Rientjes <rientjes@google.com>
+Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/slab.c |    5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+--- a/mm/slab.c
++++ b/mm/slab.c
+@@ -282,6 +282,7 @@ static void kmem_cache_node_init(struct
+ 
+ #define CFLGS_OFF_SLAB		(0x80000000UL)
+ #define	OFF_SLAB(x)	((x)->flags & CFLGS_OFF_SLAB)
++#define OFF_SLAB_MIN_SIZE (max_t(size_t, PAGE_SIZE >> 5, KMALLOC_MIN_SIZE + 1))
+ 
+ #define BATCHREFILL_LIMIT	16
+ /*
+@@ -2212,7 +2213,7 @@ __kmem_cache_create (struct kmem_cache *
+ 	 * it too early on. Always use on-slab management when
+ 	 * SLAB_NOLEAKTRACE to avoid recursive calls into kmemleak)
+ 	 */
+-	if ((size >= (PAGE_SIZE >> 5)) && !slab_early_init &&
++	if (size >= OFF_SLAB_MIN_SIZE && !slab_early_init &&
+ 	    !(flags & SLAB_NOLEAKTRACE))
+ 		/*
+ 		 * Size is large, assume best to place the slab management obj
+@@ -2276,7 +2277,7 @@ __kmem_cache_create (struct kmem_cache *
+ 		/*
+ 		 * This is a possibility for one of the kmalloc_{dma,}_caches.
+ 		 * But since we go off slab only for object size greater than
+-		 * PAGE_SIZE/8, and kmalloc_{dma,}_caches get created
++		 * OFF_SLAB_MIN_SIZE, and kmalloc_{dma,}_caches get created
+ 		 * in ascending order,this should not happen at all.
+ 		 * But leave a BUG_ON for some lucky dude.
+ 		 */
diff --git a/queue-4.3/mm-vmstat-allow-wq-concurrency-to-discover-memory-reclaim-doesn-t-make-any-progress.patch b/queue-4.3/mm-vmstat-allow-wq-concurrency-to-discover-memory-reclaim-doesn-t-make-any-progress.patch
new file mode 100644
index 00000000000..3ffb87e8c8a
--- /dev/null
+++ b/queue-4.3/mm-vmstat-allow-wq-concurrency-to-discover-memory-reclaim-doesn-t-make-any-progress.patch
@@ -0,0 +1,122 @@
+From 373ccbe5927034b55bdc80b0f8b54d6e13fe8d12 Mon Sep 17 00:00:00 2001
+From: Michal Hocko <mhocko@suse.com>
+Date: Fri, 11 Dec 2015 13:40:32 -0800
+Subject: mm, vmstat: allow WQ concurrency to discover memory reclaim doesn't make any progress
+
+From: Michal Hocko <mhocko@suse.com>
+
+commit 373ccbe5927034b55bdc80b0f8b54d6e13fe8d12 upstream.
+
+Tetsuo Handa has reported that the system might basically livelock in
+OOM condition without triggering the OOM killer.
+
+The issue is caused by internal dependency of the direct reclaim on
+vmstat counter updates (via zone_reclaimable) which are performed from
+the workqueue context.  If all the current workers get assigned to an
+allocation request, though, they will be looping inside the allocator
+trying to reclaim memory but zone_reclaimable can see stalled numbers so
+it will consider a zone reclaimable even though it has been scanned way
+too much.  WQ concurrency logic will not consider this situation as a
+congested workqueue because it relies that worker would have to sleep in
+such a situation.  This also means that it doesn't try to spawn new
+workers or invoke the rescuer thread if the one is assigned to the
+queue.
+
+In order to fix this issue we need to do two things.  First we have to
+let wq concurrency code know that we are in trouble so we have to do a
+short sleep.  In order to prevent from issues handled by 0e093d99763e
+("writeback: do not sleep on the congestion queue if there are no
+congested BDIs or if significant congestion is not being encountered in
+the current zone") we limit the sleep only to worker threads which are
+the ones of the interest anyway.
+
+The second thing to do is to create a dedicated workqueue for vmstat and
+mark it WQ_MEM_RECLAIM to note it participates in the reclaim and to
+have a spare worker thread for it.
+
+Signed-off-by: Michal Hocko <mhocko@suse.com>
+Reported-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
+Cc: Tejun Heo <tj@kernel.org>
+Cc: Cristopher Lameter <clameter@sgi.com>
+Cc: Joonsoo Kim <js1304@gmail.com>
+Cc: Arkadiusz Miskiewicz <arekm@maven.pl>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/backing-dev.c |   19 ++++++++++++++++---
+ mm/vmstat.c      |    6 ++++--
+ 2 files changed, 20 insertions(+), 5 deletions(-)
+
+--- a/mm/backing-dev.c
++++ b/mm/backing-dev.c
+@@ -957,8 +957,9 @@ EXPORT_SYMBOL(congestion_wait);
+  * jiffies for either a BDI to exit congestion of the given @sync queue
+  * or a write to complete.
+  *
+- * In the absence of zone congestion, cond_resched() is called to yield
+- * the processor if necessary but otherwise does not sleep.
++ * In the absence of zone congestion, a short sleep or a cond_resched is
++ * performed to yield the processor and to allow other subsystems to make
++ * a forward progress.
+  *
+  * The return value is 0 if the sleep is for the full timeout. Otherwise,
+  * it is the number of jiffies that were still remaining when the function
+@@ -978,7 +979,19 @@ long wait_iff_congested(struct zone *zon
+ 	 */
+ 	if (atomic_read(&nr_wb_congested[sync]) == 0 ||
+ 	    !test_bit(ZONE_CONGESTED, &zone->flags)) {
+-		cond_resched();
++
++		/*
++		 * Memory allocation/reclaim might be called from a WQ
++		 * context and the current implementation of the WQ
++		 * concurrency control doesn't recognize that a particular
++		 * WQ is congested if the worker thread is looping without
++		 * ever sleeping. Therefore we have to do a short sleep
++		 * here rather than calling cond_resched().
++		 */
++		if (current->flags & PF_WQ_WORKER)
++			schedule_timeout(1);
++		else
++			cond_resched();
+ 
+ 		/* In case we scheduled, work out time remaining */
+ 		ret = timeout - (jiffies - start);
+--- a/mm/vmstat.c
++++ b/mm/vmstat.c
+@@ -1357,6 +1357,7 @@ static const struct file_operations proc
+ #endif /* CONFIG_PROC_FS */
+ 
+ #ifdef CONFIG_SMP
++static struct workqueue_struct *vmstat_wq;
+ static DEFINE_PER_CPU(struct delayed_work, vmstat_work);
+ int sysctl_stat_interval __read_mostly = HZ;
+ static cpumask_var_t cpu_stat_off;
+@@ -1369,7 +1370,7 @@ static void vmstat_update(struct work_st
+ 		 * to occur in the future. Keep on running the
+ 		 * update worker thread.
+ 		 */
+-		schedule_delayed_work_on(smp_processor_id(),
++		queue_delayed_work_on(smp_processor_id(), vmstat_wq,
+ 			this_cpu_ptr(&vmstat_work),
+ 			round_jiffies_relative(sysctl_stat_interval));
+ 	} else {
+@@ -1438,7 +1439,7 @@ static void vmstat_shepherd(struct work_
+ 		if (need_update(cpu) &&
+ 			cpumask_test_and_clear_cpu(cpu, cpu_stat_off))
+ 
+-			schedule_delayed_work_on(cpu,
++			queue_delayed_work_on(cpu, vmstat_wq,
+ 				&per_cpu(vmstat_work, cpu), 0);
+ 
+ 	put_online_cpus();
+@@ -1527,6 +1528,7 @@ static int __init setup_vmstat(void)
+ 
+ 	start_shepherd_timer();
+ 	cpu_notifier_register_done();
++	vmstat_wq = alloc_workqueue("vmstat", WQ_FREEZABLE|WQ_MEM_RECLAIM, 0);
+ #endif
+ #ifdef CONFIG_PROC_FS
+ 	proc_create("buddyinfo", S_IRUGO, NULL, &fragmentation_file_operations);
diff --git a/queue-4.3/ocfs2-dlm-clear-refmap-bit-of-recovery-lock-while-doing-local-recovery-cleanup.patch b/queue-4.3/ocfs2-dlm-clear-refmap-bit-of-recovery-lock-while-doing-local-recovery-cleanup.patch
new file mode 100644
index 00000000000..85b55f42930
--- /dev/null
+++ b/queue-4.3/ocfs2-dlm-clear-refmap-bit-of-recovery-lock-while-doing-local-recovery-cleanup.patch
@@ -0,0 +1,38 @@
+From c95a51807b730e4681e2ecbdfd669ca52601959e Mon Sep 17 00:00:00 2001
+From: xuejiufei <xuejiufei@huawei.com>
+Date: Fri, 5 Feb 2016 15:36:47 -0800
+Subject: ocfs2/dlm: clear refmap bit of recovery lock while doing local recovery cleanup
+
+From: xuejiufei <xuejiufei@huawei.com>
+
+commit c95a51807b730e4681e2ecbdfd669ca52601959e upstream.
+
+When recovery master down, dlm_do_local_recovery_cleanup() only remove
+the $RECOVERY lock owned by dead node, but do not clear the refmap bit.
+Which will make umount thread falling in dead loop migrating $RECOVERY
+to the dead node.
+
+Signed-off-by: xuejiufei <xuejiufei@huawei.com>
+Reviewed-by: Joseph Qi <joseph.qi@huawei.com>
+Cc: Mark Fasheh <mfasheh@suse.de>
+Cc: Joel Becker <jlbec@evilplan.org>
+Cc: Junxiao Bi <junxiao.bi@oracle.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ocfs2/dlm/dlmrecovery.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/fs/ocfs2/dlm/dlmrecovery.c
++++ b/fs/ocfs2/dlm/dlmrecovery.c
+@@ -2360,6 +2360,8 @@ static void dlm_do_local_recovery_cleanu
+ 						break;
+ 					}
+ 				}
++				dlm_lockres_clear_refmap_bit(dlm, res,
++						dead_node);
+ 				spin_unlock(&res->spinlock);
+ 				continue;
+ 			}
diff --git a/queue-4.3/ocfs2-dlm-ignore-cleaning-the-migration-mle-that-is-inuse.patch b/queue-4.3/ocfs2-dlm-ignore-cleaning-the-migration-mle-that-is-inuse.patch
new file mode 100644
index 00000000000..5fe0d6a8858
--- /dev/null
+++ b/queue-4.3/ocfs2-dlm-ignore-cleaning-the-migration-mle-that-is-inuse.patch
@@ -0,0 +1,97 @@
+From bef5502de074b6f6fa647b94b73155d675694420 Mon Sep 17 00:00:00 2001
+From: xuejiufei <xuejiufei@huawei.com>
+Date: Thu, 14 Jan 2016 15:17:38 -0800
+Subject: ocfs2/dlm: ignore cleaning the migration mle that is inuse
+
+From: xuejiufei <xuejiufei@huawei.com>
+
+commit bef5502de074b6f6fa647b94b73155d675694420 upstream.
+
+We have found that migration source will trigger a BUG that the refcount
+of mle is already zero before put when the target is down during
+migration.  The situation is as follows:
+
+dlm_migrate_lockres
+  dlm_add_migration_mle
+  dlm_mark_lockres_migrating
+  dlm_get_mle_inuse
+  <<<<<< Now the refcount of the mle is 2.
+  dlm_send_one_lockres and wait for the target to become the
+  new master.
+  <<<<<< o2hb detect the target down and clean the migration
+  mle. Now the refcount is 1.
+
+dlm_migrate_lockres woken, and put the mle twice when found the target
+goes down which trigger the BUG with the following message:
+
+  "ERROR: bad mle: ".
+
+Signed-off-by: Jiufei Xue <xuejiufei@huawei.com>
+Reviewed-by: Joseph Qi <joseph.qi@huawei.com>
+Cc: Mark Fasheh <mfasheh@suse.de>
+Cc: Joel Becker <jlbec@evilplan.org>
+Cc: Junxiao Bi <junxiao.bi@oracle.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ocfs2/dlm/dlmmaster.c |   26 +++++++++++++++-----------
+ 1 file changed, 15 insertions(+), 11 deletions(-)
+
+--- a/fs/ocfs2/dlm/dlmmaster.c
++++ b/fs/ocfs2/dlm/dlmmaster.c
+@@ -2519,6 +2519,11 @@ static int dlm_migrate_lockres(struct dl
+ 	spin_lock(&dlm->master_lock);
+ 	ret = dlm_add_migration_mle(dlm, res, mle, &oldmle, name,
+ 				    namelen, target, dlm->node_num);
++	/* get an extra reference on the mle.
++	 * otherwise the assert_master from the new
++	 * master will destroy this.
++	 */
++	dlm_get_mle_inuse(mle);
+ 	spin_unlock(&dlm->master_lock);
+ 	spin_unlock(&dlm->spinlock);
+ 
+@@ -2554,6 +2559,7 @@ fail:
+ 		if (mle_added) {
+ 			dlm_mle_detach_hb_events(dlm, mle);
+ 			dlm_put_mle(mle);
++			dlm_put_mle_inuse(mle);
+ 		} else if (mle) {
+ 			kmem_cache_free(dlm_mle_cache, mle);
+ 			mle = NULL;
+@@ -2571,17 +2577,6 @@ fail:
+ 	 * ensure that all assert_master work is flushed. */
+ 	flush_workqueue(dlm->dlm_worker);
+ 
+-	/* get an extra reference on the mle.
+-	 * otherwise the assert_master from the new
+-	 * master will destroy this.
+-	 * also, make sure that all callers of dlm_get_mle
+-	 * take both dlm->spinlock and dlm->master_lock */
+-	spin_lock(&dlm->spinlock);
+-	spin_lock(&dlm->master_lock);
+-	dlm_get_mle_inuse(mle);
+-	spin_unlock(&dlm->master_lock);
+-	spin_unlock(&dlm->spinlock);
+-
+ 	/* notify new node and send all lock state */
+ 	/* call send_one_lockres with migration flag.
+ 	 * this serves as notice to the target node that a
+@@ -3310,6 +3305,15 @@ top:
+ 			    mle->new_master != dead_node)
+ 				continue;
+ 
++			if (mle->new_master == dead_node && mle->inuse) {
++				mlog(ML_NOTICE, "%s: target %u died during "
++						"migration from %u, the MLE is "
++						"still keep used, ignore it!\n",
++						dlm->name, dead_node,
++						mle->master);
++				continue;
++			}
++
+ 			/* If we have reached this point, this mle needs to be
+ 			 * removed from the list and freed. */
+ 			dlm_clean_migration_mle(dlm, mle);
diff --git a/queue-4.3/ocfs2-fix-bug-when-calculate-new-backup-super.patch b/queue-4.3/ocfs2-fix-bug-when-calculate-new-backup-super.patch
new file mode 100644
index 00000000000..bf11d9f8d74
--- /dev/null
+++ b/queue-4.3/ocfs2-fix-bug-when-calculate-new-backup-super.patch
@@ -0,0 +1,98 @@
+From 5c9ee4cbf2a945271f25b89b137f2c03bbc3be33 Mon Sep 17 00:00:00 2001
+From: Joseph Qi <joseph.qi@huawei.com>
+Date: Tue, 29 Dec 2015 14:54:06 -0800
+Subject: ocfs2: fix BUG when calculate new backup super
+
+From: Joseph Qi <joseph.qi@huawei.com>
+
+commit 5c9ee4cbf2a945271f25b89b137f2c03bbc3be33 upstream.
+
+When resizing, it firstly extends the last gd.  Once it should backup
+super in the gd, it calculates new backup super and update the
+corresponding value.
+
+But it currently doesn't consider the situation that the backup super is
+already done.  And in this case, it still sets the bit in gd bitmap and
+then decrease from bg_free_bits_count, which leads to a corrupted gd and
+trigger the BUG in ocfs2_block_group_set_bits:
+
+    BUG_ON(le16_to_cpu(bg->bg_free_bits_count) < num_bits);
+
+So check whether the backup super is done and then do the updates.
+
+Signed-off-by: Joseph Qi <joseph.qi@huawei.com>
+Reviewed-by: Jiufei Xue <xuejiufei@huawei.com>
+Reviewed-by: Yiwen Jiang <jiangyiwen@huawei.com>
+Cc: Mark Fasheh <mfasheh@suse.de>
+Cc: Joel Becker <jlbec@evilplan.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ocfs2/resize.c |   15 ++++++++++++---
+ 1 file changed, 12 insertions(+), 3 deletions(-)
+
+--- a/fs/ocfs2/resize.c
++++ b/fs/ocfs2/resize.c
+@@ -54,11 +54,12 @@
+ static u16 ocfs2_calc_new_backup_super(struct inode *inode,
+ 				       struct ocfs2_group_desc *gd,
+ 				       u16 cl_cpg,
++				       u16 old_bg_clusters,
+ 				       int set)
+ {
+ 	int i;
+ 	u16 backups = 0;
+-	u32 cluster;
++	u32 cluster, lgd_cluster;
+ 	u64 blkno, gd_blkno, lgd_blkno = le64_to_cpu(gd->bg_blkno);
+ 
+ 	for (i = 0; i < OCFS2_MAX_BACKUP_SUPERBLOCKS; i++) {
+@@ -71,6 +72,12 @@ static u16 ocfs2_calc_new_backup_super(s
+ 		else if (gd_blkno > lgd_blkno)
+ 			break;
+ 
++		/* check if already done backup super */
++		lgd_cluster = ocfs2_blocks_to_clusters(inode->i_sb, lgd_blkno);
++		lgd_cluster += old_bg_clusters;
++		if (lgd_cluster >= cluster)
++			continue;
++
+ 		if (set)
+ 			ocfs2_set_bit(cluster % cl_cpg,
+ 				      (unsigned long *)gd->bg_bitmap);
+@@ -99,6 +106,7 @@ static int ocfs2_update_last_group_and_i
+ 	u16 chain, num_bits, backups = 0;
+ 	u16 cl_bpc = le16_to_cpu(cl->cl_bpc);
+ 	u16 cl_cpg = le16_to_cpu(cl->cl_cpg);
++	u16 old_bg_clusters;
+ 
+ 	trace_ocfs2_update_last_group_and_inode(new_clusters,
+ 						first_new_cluster);
+@@ -112,6 +120,7 @@ static int ocfs2_update_last_group_and_i
+ 
+ 	group = (struct ocfs2_group_desc *)group_bh->b_data;
+ 
++	old_bg_clusters = le16_to_cpu(group->bg_bits) / cl_bpc;
+ 	/* update the group first. */
+ 	num_bits = new_clusters * cl_bpc;
+ 	le16_add_cpu(&group->bg_bits, num_bits);
+@@ -125,7 +134,7 @@ static int ocfs2_update_last_group_and_i
+ 				     OCFS2_FEATURE_COMPAT_BACKUP_SB)) {
+ 		backups = ocfs2_calc_new_backup_super(bm_inode,
+ 						     group,
+-						     cl_cpg, 1);
++						     cl_cpg, old_bg_clusters, 1);
+ 		le16_add_cpu(&group->bg_free_bits_count, -1 * backups);
+ 	}
+ 
+@@ -163,7 +172,7 @@ out_rollback:
+ 	if (ret < 0) {
+ 		ocfs2_calc_new_backup_super(bm_inode,
+ 					    group,
+-					    cl_cpg, 0);
++					    cl_cpg, old_bg_clusters, 0);
+ 		le16_add_cpu(&group->bg_free_bits_count, backups);
+ 		le16_add_cpu(&group->bg_bits, -1 * num_bits);
+ 		le16_add_cpu(&group->bg_free_bits_count, -1 * num_bits);
diff --git a/queue-4.3/ocfs2-fix-sgid-not-inherited-issue.patch b/queue-4.3/ocfs2-fix-sgid-not-inherited-issue.patch
new file mode 100644
index 00000000000..6d8bff64432
--- /dev/null
+++ b/queue-4.3/ocfs2-fix-sgid-not-inherited-issue.patch
@@ -0,0 +1,44 @@
+From 854ee2e944b4daf795e32562a7d2f9e90ab5a6a8 Mon Sep 17 00:00:00 2001
+From: Junxiao Bi <junxiao.bi@oracle.com>
+Date: Fri, 11 Dec 2015 13:41:03 -0800
+Subject: ocfs2: fix SGID not inherited issue
+
+From: Junxiao Bi <junxiao.bi@oracle.com>
+
+commit 854ee2e944b4daf795e32562a7d2f9e90ab5a6a8 upstream.
+
+Commit 8f1eb48758aa ("ocfs2: fix umask ignored issue") introduced an
+issue, SGID of sub dir was not inherited from its parents dir.  It is
+because SGID is set into "inode->i_mode" in ocfs2_get_init_inode(), but
+is overwritten by "mode" which don't have SGID set later.
+
+Fixes: 8f1eb48758aa ("ocfs2: fix umask ignored issue")
+Signed-off-by: Junxiao Bi <junxiao.bi@oracle.com>
+Cc: Mark Fasheh <mfasheh@suse.de>
+Cc: Joel Becker <jlbec@evilplan.org>
+Acked-by: Srinivas Eeda <srinivas.eeda@oracle.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ocfs2/namei.c |    4 +---
+ 1 file changed, 1 insertion(+), 3 deletions(-)
+
+--- a/fs/ocfs2/namei.c
++++ b/fs/ocfs2/namei.c
+@@ -369,13 +369,11 @@ static int ocfs2_mknod(struct inode *dir
+ 		goto leave;
+ 	}
+ 
+-	status = posix_acl_create(dir, &mode, &default_acl, &acl);
++	status = posix_acl_create(dir, &inode->i_mode, &default_acl, &acl);
+ 	if (status) {
+ 		mlog_errno(status);
+ 		goto leave;
+ 	}
+-	/* update inode->i_mode after mask with "umask". */
+-	inode->i_mode = mode;
+ 
+ 	handle = ocfs2_start_trans(osb, ocfs2_mknod_credits(osb->sb,
+ 							    S_ISDIR(mode),
diff --git a/queue-4.3/proc-actually-make-proc_fd_permission-thread-friendly.patch b/queue-4.3/proc-actually-make-proc_fd_permission-thread-friendly.patch
new file mode 100644
index 00000000000..588ffda4a55
--- /dev/null
+++ b/queue-4.3/proc-actually-make-proc_fd_permission-thread-friendly.patch
@@ -0,0 +1,53 @@
+From 54708d2858e79a2bdda10bf8a20c80eb96c20613 Mon Sep 17 00:00:00 2001
+From: Oleg Nesterov <oleg@redhat.com>
+Date: Fri, 6 Nov 2015 16:30:06 -0800
+Subject: proc: actually make proc_fd_permission() thread-friendly
+
+From: Oleg Nesterov <oleg@redhat.com>
+
+commit 54708d2858e79a2bdda10bf8a20c80eb96c20613 upstream.
+
+The commit 96d0df79f264 ("proc: make proc_fd_permission() thread-friendly")
+fixed the access to /proc/self/fd from sub-threads, but introduced another
+problem: a sub-thread can't access /proc/<tid>/fd/ or /proc/thread-self/fd
+if generic_permission() fails.
+
+Change proc_fd_permission() to check same_thread_group(pid_task(), current).
+
+Fixes: 96d0df79f264 ("proc: make proc_fd_permission() thread-friendly")
+Reported-by: "Jin, Yihua" <yihua.jin@intel.com>
+Signed-off-by: Oleg Nesterov <oleg@redhat.com>
+Cc: "Eric W. Biederman" <ebiederm@xmission.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/proc/fd.c |   14 +++++++++++---
+ 1 file changed, 11 insertions(+), 3 deletions(-)
+
+--- a/fs/proc/fd.c
++++ b/fs/proc/fd.c
+@@ -291,11 +291,19 @@ static struct dentry *proc_lookupfd(stru
+  */
+ int proc_fd_permission(struct inode *inode, int mask)
+ {
+-	int rv = generic_permission(inode, mask);
++	struct task_struct *p;
++	int rv;
++
++	rv = generic_permission(inode, mask);
+ 	if (rv == 0)
+-		return 0;
+-	if (task_tgid(current) == proc_pid(inode))
++		return rv;
++
++	rcu_read_lock();
++	p = pid_task(proc_pid(inode), PIDTYPE_PID);
++	if (p && same_thread_group(p, current))
+ 		rv = 0;
++	rcu_read_unlock();
++
+ 	return rv;
+ }
+ 
diff --git a/queue-4.3/proc-fix-esrch-error-when-writing-to-proc-pid-coredump_filter.patch b/queue-4.3/proc-fix-esrch-error-when-writing-to-proc-pid-coredump_filter.patch
new file mode 100644
index 00000000000..97c125fb903
--- /dev/null
+++ b/queue-4.3/proc-fix-esrch-error-when-writing-to-proc-pid-coredump_filter.patch
@@ -0,0 +1,40 @@
+From 41a0c249cb8706a2efa1ab3d59466b23a27d0c8b Mon Sep 17 00:00:00 2001
+From: Colin Ian King <colin.king@canonical.com>
+Date: Fri, 18 Dec 2015 14:22:01 -0800
+Subject: proc: fix -ESRCH error when writing to /proc/$pid/coredump_filter
+
+From: Colin Ian King <colin.king@canonical.com>
+
+commit 41a0c249cb8706a2efa1ab3d59466b23a27d0c8b upstream.
+
+Writing to /proc/$pid/coredump_filter always returns -ESRCH because commit
+774636e19ed51 ("proc: convert to kstrto*()/kstrto*_from_user()") removed
+the setting of ret after the get_proc_task call and incorrectly left it as
+-ESRCH.  Instead, return 0 when successful.
+
+Example breakage:
+
+  echo 0 > /proc/self/coredump_filter
+  bash: echo: write error: No such process
+
+Fixes: 774636e19ed51 ("proc: convert to kstrto*()/kstrto*_from_user()")
+Signed-off-by: Colin Ian King <colin.king@canonical.com>
+Acked-by: Kees Cook <keescook@chromium.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/proc/base.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/fs/proc/base.c
++++ b/fs/proc/base.c
+@@ -2484,6 +2484,7 @@ static ssize_t proc_coredump_filter_writ
+ 	mm = get_task_mm(task);
+ 	if (!mm)
+ 		goto out_no_mm;
++	ret = 0;
+ 
+ 	for (i = 0, mask = 1; i < MMF_DUMP_FILTER_BITS; i++, mask <<= 1) {
+ 		if (val & mask)
diff --git a/queue-4.3/remoteproc-avoid-stack-overflow-in-debugfs-file.patch b/queue-4.3/remoteproc-avoid-stack-overflow-in-debugfs-file.patch
new file mode 100644
index 00000000000..694732e8e39
--- /dev/null
+++ b/queue-4.3/remoteproc-avoid-stack-overflow-in-debugfs-file.patch
@@ -0,0 +1,40 @@
+From 92792e48e2ae6051af30468a87994b5432da2f06 Mon Sep 17 00:00:00 2001
+From: Arnd Bergmann <arnd@arndb.de>
+Date: Fri, 20 Nov 2015 18:26:07 +0100
+Subject: remoteproc: avoid stack overflow in debugfs file
+
+From: Arnd Bergmann <arnd@arndb.de>
+
+commit 92792e48e2ae6051af30468a87994b5432da2f06 upstream.
+
+Recent gcc versions warn about reading from a negative offset of
+an on-stack array:
+
+drivers/remoteproc/remoteproc_debugfs.c: In function 'rproc_recovery_write':
+drivers/remoteproc/remoteproc_debugfs.c:167:9: warning: 'buf[4294967295u]' may be used uninitialized in this function [-Wmaybe-uninitialized]
+
+I don't see anything in sys_write() that prevents us from
+being called with a zero 'count' argument, so we should
+add an extra check in rproc_recovery_write() to prevent the
+access and avoid the warning.
+
+Signed-off-by: Arnd Bergmann <arnd@arndb.de>
+Fixes: 2e37abb89a2e ("remoteproc: create a 'recovery' debugfs entry")
+Signed-off-by: Ohad Ben-Cohen <ohad@wizery.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/remoteproc/remoteproc_debugfs.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/remoteproc/remoteproc_debugfs.c
++++ b/drivers/remoteproc/remoteproc_debugfs.c
+@@ -156,7 +156,7 @@ rproc_recovery_write(struct file *filp,
+ 	char buf[10];
+ 	int ret;
+ 
+-	if (count > sizeof(buf))
++	if (count < 1 || count > sizeof(buf))
+ 		return count;
+ 
+ 	ret = copy_from_user(buf, user_buf, count);
diff --git a/queue-4.3/series b/queue-4.3/series
index 69bb5ea7ff9..604aed1b32f 100644
--- a/queue-4.3/series
+++ b/queue-4.3/series
@@ -16,3 +16,23 @@ nfsv4.1-pnfs-fixup-an-lo-plh_block_lgets-imbalance-in-layoutreturn.patch
 ocfs2-nfs-hangs-in-__ocfs2_cluster_lock-due-to-race-with-ocfs2_unblock_lock.patch
 hid-usbhid-fix-recursive-deadlock.patch
 alsa-hda-implement-loopback-control-switch-for-realtek-and-other-codecs.patch
+proc-actually-make-proc_fd_permission-thread-friendly.patch
+remoteproc-avoid-stack-overflow-in-debugfs-file.patch
+proc-fix-esrch-error-when-writing-to-proc-pid-coredump_filter.patch
+mm-slab-only-move-management-objects-off-slab-for-sizes-larger-than-kmalloc_min_size.patch
+mm-oom_kill.c-reverse-the-order-of-setting-tif_memdie-and-sending-sigkill.patch
+lib-hexdump.c-truncate-output-in-case-of-overflow.patch
+fs-seqfile-always-allow-oom-killer.patch
+memcg-fix-thresholds-for-32b-architectures.patch
+mm-hugetlb-fix-hugepage-memory-leak-caused-by-wrong-reserve-count.patch
+mm-vmstat-allow-wq-concurrency-to-discover-memory-reclaim-doesn-t-make-any-progress.patch
+mm-hugetlbfs-fix-bugs-in-fallocate-hole-punch-of-areas-with-holes.patch
+fat-fix-fake_offset-handling-on-error-path.patch
+mm-hugetlb-call-huge_pte_alloc-only-if-ptep-is-null.patch
+kernel-signal.c-unexport-sigsuspend.patch
+mm-hugetlb.c-fix-resv-map-memory-leak-for-placeholder-entries.patch
+ocfs2-fix-sgid-not-inherited-issue.patch
+ocfs2-fix-bug-when-calculate-new-backup-super.patch
+ocfs2-dlm-ignore-cleaning-the-migration-mle-that-is-inuse.patch
+ocfs2-dlm-clear-refmap-bit-of-recovery-lock-while-doing-local-recovery-cleanup.patch
+sh64-fix-__nr_fgetxattr.patch
diff --git a/queue-4.3/sh64-fix-__nr_fgetxattr.patch b/queue-4.3/sh64-fix-__nr_fgetxattr.patch
new file mode 100644
index 00000000000..b2d552c69df
--- /dev/null
+++ b/queue-4.3/sh64-fix-__nr_fgetxattr.patch
@@ -0,0 +1,37 @@
+From 2d33fa1059da4c8e816627a688d950b613ec0474 Mon Sep 17 00:00:00 2001
+From: "Dmitry V. Levin" <ldv@altlinux.org>
+Date: Fri, 11 Dec 2015 13:41:06 -0800
+Subject: sh64: fix __NR_fgetxattr
+
+From: Dmitry V. Levin <ldv@altlinux.org>
+
+commit 2d33fa1059da4c8e816627a688d950b613ec0474 upstream.
+
+According to arch/sh/kernel/syscalls_64.S and common sense, __NR_fgetxattr
+has to be defined to 259, but it doesn't.  Instead, it's defined to 269,
+which is of course used by another syscall, __NR_sched_setaffinity in this
+case.
+
+This bug was found by strace test suite.
+
+Signed-off-by: Dmitry V. Levin <ldv@altlinux.org>
+Acked-by: Geert Uytterhoeven <geert+renesas@glider.be>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/sh/include/uapi/asm/unistd_64.h |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/sh/include/uapi/asm/unistd_64.h
++++ b/arch/sh/include/uapi/asm/unistd_64.h
+@@ -278,7 +278,7 @@
+ #define __NR_fsetxattr		256
+ #define __NR_getxattr		257
+ #define __NR_lgetxattr		258
+-#define __NR_fgetxattr		269
++#define __NR_fgetxattr		259
+ #define __NR_listxattr		260
+ #define __NR_llistxattr		261
+ #define __NR_flistxattr		262
-- 
2.47.3