From 26d099b58d3b8d138f0cf0ba31522fc40039bc72 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Sat, 3 May 2014 14:30:17 -0400
Subject: [PATCH] 3.14-stable patches

added patches:
	hung_task-check-the-value-of-sysctl_hung_task_timeout_sec.patch
	iser-target-add-missing-se_cmd-put-for-write_pending-in-tx_comp_err.patch
	iser-target-match-frmr-descriptors-to-available-session-tags.patch
	mm-hugetlb-fix-softlockup-when-a-large-number-of-hugepages-are-freed.patch
	mm-page_alloc-spill-to-remote-nodes-before-waking-kswapd.patch
	mm-try_to_unmap_cluster-should-lock_page-before-mlocking.patch
	mm-vmscan-do-not-swap-anon-pages-just-because-free-file-is-low.patch
	sh-fix-format-string-bug-in-stack-tracer.patch
---
 ...alue-of-sysctl_hung_task_timeout_sec.patch |  66 +++++
 ...put-for-write_pending-in-tx_comp_err.patch | 149 ++++++++++++
 ...escriptors-to-available-session-tags.patch | 117 +++++++++
 ...-large-number-of-hugepages-are-freed.patch |  89 +++++++
 ...to-remote-nodes-before-waking-kswapd.patch | 227 ++++++++++++++++++
 ...ter-should-lock_page-before-mlocking.patch |  90 +++++++
 ...-pages-just-because-free-file-is-low.patch |  73 ++++++
 queue-3.14/series                             |   8 +
 ...ix-format-string-bug-in-stack-tracer.patch |  40 +++
 9 files changed, 859 insertions(+)
 create mode 100644 queue-3.14/hung_task-check-the-value-of-sysctl_hung_task_timeout_sec.patch
 create mode 100644 queue-3.14/iser-target-add-missing-se_cmd-put-for-write_pending-in-tx_comp_err.patch
 create mode 100644 queue-3.14/iser-target-match-frmr-descriptors-to-available-session-tags.patch
 create mode 100644 queue-3.14/mm-hugetlb-fix-softlockup-when-a-large-number-of-hugepages-are-freed.patch
 create mode 100644 queue-3.14/mm-page_alloc-spill-to-remote-nodes-before-waking-kswapd.patch
 create mode 100644 queue-3.14/mm-try_to_unmap_cluster-should-lock_page-before-mlocking.patch
 create mode 100644 queue-3.14/mm-vmscan-do-not-swap-anon-pages-just-because-free-file-is-low.patch
 create mode 100644 queue-3.14/sh-fix-format-string-bug-in-stack-tracer.patch

diff --git a/queue-3.14/hung_task-check-the-value-of-sysctl_hung_task_timeout_sec.patch b/queue-3.14/hung_task-check-the-value-of-sysctl_hung_task_timeout_sec.patch
new file mode 100644
index 00000000000..a2c28d0fd0a
--- /dev/null
+++ b/queue-3.14/hung_task-check-the-value-of-sysctl_hung_task_timeout_sec.patch
@@ -0,0 +1,66 @@
+From 80df28476505ed4e6701c3448c63c9229a50c655 Mon Sep 17 00:00:00 2001
+From: Liu Hua <sdu.liu@huawei.com>
+Date: Mon, 7 Apr 2014 15:38:57 -0700
+Subject: hung_task: check the value of "sysctl_hung_task_timeout_sec"
+
+From: Liu Hua <sdu.liu@huawei.com>
+
+commit 80df28476505ed4e6701c3448c63c9229a50c655 upstream.
+
+As sysctl_hung_task_timeout_sec is unsigned long, when this value is
+larger then LONG_MAX/HZ, the function schedule_timeout_interruptible in
+watchdog will return immediately without sleep and with print :
+
+  schedule_timeout: wrong timeout value ffffffffffffff83
+
+and then the funtion watchdog will call schedule_timeout_interruptible
+again and again.  The screen will be filled with
+
+	"schedule_timeout: wrong timeout value ffffffffffffff83"
+
+This patch does some check and correction in sysctl, to let the function
+schedule_timeout_interruptible allways get the valid parameter.
+
+Signed-off-by: Liu Hua <sdu.liu@huawei.com>
+Tested-by: Satoru Takeuchi <satoru.takeuchi@gmail.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ Documentation/sysctl/kernel.txt |    1 +
+ kernel/sysctl.c                 |    6 ++++++
+ 2 files changed, 7 insertions(+)
+
+--- a/Documentation/sysctl/kernel.txt
++++ b/Documentation/sysctl/kernel.txt
+@@ -317,6 +317,7 @@ for more than this value report a warnin
+ This file shows up if CONFIG_DETECT_HUNG_TASK is enabled.
+ 
+ 0: means infinite timeout - no checking done.
++Possible values to set are in range {0..LONG_MAX/HZ}.
+ 
+ ==============================================================
+ 
+--- a/kernel/sysctl.c
++++ b/kernel/sysctl.c
+@@ -144,6 +144,11 @@ static int min_percpu_pagelist_fract = 8
+ static int ngroups_max = NGROUPS_MAX;
+ static const int cap_last_cap = CAP_LAST_CAP;
+ 
++/*this is needed for proc_doulongvec_minmax of sysctl_hung_task_timeout_secs */
++#ifdef CONFIG_DETECT_HUNG_TASK
++static unsigned long hung_task_timeout_max = (LONG_MAX/HZ);
++#endif
++
+ #ifdef CONFIG_INOTIFY_USER
+ #include <linux/inotify.h>
+ #endif
+@@ -995,6 +1000,7 @@ static struct ctl_table kern_table[] = {
+ 		.maxlen		= sizeof(unsigned long),
+ 		.mode		= 0644,
+ 		.proc_handler	= proc_dohung_task_timeout_secs,
++		.extra2		= &hung_task_timeout_max,
+ 	},
+ 	{
+ 		.procname	= "hung_task_warnings",
diff --git a/queue-3.14/iser-target-add-missing-se_cmd-put-for-write_pending-in-tx_comp_err.patch b/queue-3.14/iser-target-add-missing-se_cmd-put-for-write_pending-in-tx_comp_err.patch
new file mode 100644
index 00000000000..1bf6aaf13a1
--- /dev/null
+++ b/queue-3.14/iser-target-add-missing-se_cmd-put-for-write_pending-in-tx_comp_err.patch
@@ -0,0 +1,149 @@
+From nab@linux-iscsi.org  Sat May  3 14:15:37 2014
+From: "Nicholas A. Bellinger" <nab@linux-iscsi.org>
+Date: Fri,  2 May 2014 21:26:30 +0000
+Subject: [PATCH-v3.14.y 2/2] iser-target: Add missing se_cmd put for WRITE_PENDING in tx_comp_err
+To: target-devel <target-devel@vger.kernel.org>
+Cc: Greg-KH <gregkh@linuxfoundation.org>, stable <stable@vger.kernel.org>, Nicholas Bellinger <nab@linux-iscsi.org>, Or Gerlitz <ogerlitz@mellanox.com>
+Message-ID: <1399065990-30552-3-git-send-email-nab@linux-iscsi.org>
+
+
+From: Nicholas Bellinger <nab@linux-iscsi.org>
+
+commit 03e7848a64ed535a30f5d7fc6dede2d5a6a2534b upstream.
+
+This patch fixes a bug where outstanding RDMA_READs with WRITE_PENDING
+status require an extra target_put_sess_cmd() in isert_put_cmd() code
+when called from isert_cq_tx_comp_err() + isert_cq_drain_comp_llist()
+context during session shutdown.
+
+The extra kref PUT is required so that transport_generic_free_cmd()
+invokes the last target_put_sess_cmd() -> target_release_cmd_kref(),
+which will complete(&se_cmd->cmd_wait_comp) the outstanding se_cmd
+descriptor with WRITE_PENDING status, and awake the completion in
+target_wait_for_sess_cmds() to invoke TFO->release_cmd().
+
+The bug was manifesting itself in target_wait_for_sess_cmds() where
+a se_cmd descriptor with WRITE_PENDING status would end up sleeping
+indefinately.
+
+Acked-by: Sagi Grimberg <sagig@mellanox.com>
+Cc: Or Gerlitz <ogerlitz@mellanox.com>
+Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/infiniband/ulp/isert/ib_isert.c |   35 +++++++++++++++++++++-----------
+ 1 file changed, 24 insertions(+), 11 deletions(-)
+
+--- a/drivers/infiniband/ulp/isert/ib_isert.c
++++ b/drivers/infiniband/ulp/isert/ib_isert.c
+@@ -1456,7 +1456,7 @@ isert_unreg_rdma(struct isert_cmd *isert
+ }
+ 
+ static void
+-isert_put_cmd(struct isert_cmd *isert_cmd)
++isert_put_cmd(struct isert_cmd *isert_cmd, bool comp_err)
+ {
+ 	struct iscsi_cmd *cmd = isert_cmd->iscsi_cmd;
+ 	struct isert_conn *isert_conn = isert_cmd->conn;
+@@ -1472,8 +1472,21 @@ isert_put_cmd(struct isert_cmd *isert_cm
+ 			list_del_init(&cmd->i_conn_node);
+ 		spin_unlock_bh(&conn->cmd_lock);
+ 
+-		if (cmd->data_direction == DMA_TO_DEVICE)
++		if (cmd->data_direction == DMA_TO_DEVICE) {
+ 			iscsit_stop_dataout_timer(cmd);
++			/*
++			 * Check for special case during comp_err where
++			 * WRITE_PENDING has been handed off from core,
++			 * but requires an extra target_put_sess_cmd()
++			 * before transport_generic_free_cmd() below.
++			 */
++			if (comp_err &&
++			    cmd->se_cmd.t_state == TRANSPORT_WRITE_PENDING) {
++				struct se_cmd *se_cmd = &cmd->se_cmd;
++
++				target_put_sess_cmd(se_cmd->se_sess, se_cmd);
++			}
++		}
+ 
+ 		device->unreg_rdma_mem(isert_cmd, isert_conn);
+ 		transport_generic_free_cmd(&cmd->se_cmd, 0);
+@@ -1528,7 +1541,7 @@ isert_unmap_tx_desc(struct iser_tx_desc
+ 
+ static void
+ isert_completion_put(struct iser_tx_desc *tx_desc, struct isert_cmd *isert_cmd,
+-		     struct ib_device *ib_dev)
++		     struct ib_device *ib_dev, bool comp_err)
+ {
+ 	if (isert_cmd->pdu_buf_dma != 0) {
+ 		pr_debug("Calling ib_dma_unmap_single for isert_cmd->pdu_buf_dma\n");
+@@ -1538,7 +1551,7 @@ isert_completion_put(struct iser_tx_desc
+ 	}
+ 
+ 	isert_unmap_tx_desc(tx_desc, ib_dev);
+-	isert_put_cmd(isert_cmd);
++	isert_put_cmd(isert_cmd, comp_err);
+ }
+ 
+ static void
+@@ -1582,14 +1595,14 @@ isert_do_control_comp(struct work_struct
+ 		iscsit_tmr_post_handler(cmd, cmd->conn);
+ 
+ 		cmd->i_state = ISTATE_SENT_STATUS;
+-		isert_completion_put(&isert_cmd->tx_desc, isert_cmd, ib_dev);
++		isert_completion_put(&isert_cmd->tx_desc, isert_cmd, ib_dev, false);
+ 		break;
+ 	case ISTATE_SEND_REJECT:
+ 		pr_debug("Got isert_do_control_comp ISTATE_SEND_REJECT: >>>\n");
+ 		atomic_dec(&isert_conn->post_send_buf_count);
+ 
+ 		cmd->i_state = ISTATE_SENT_STATUS;
+-		isert_completion_put(&isert_cmd->tx_desc, isert_cmd, ib_dev);
++		isert_completion_put(&isert_cmd->tx_desc, isert_cmd, ib_dev, false);
+ 		break;
+ 	case ISTATE_SEND_LOGOUTRSP:
+ 		pr_debug("Calling iscsit_logout_post_handler >>>>>>>>>>>>>>\n");
+@@ -1603,7 +1616,7 @@ isert_do_control_comp(struct work_struct
+ 	case ISTATE_SEND_TEXTRSP:
+ 		atomic_dec(&isert_conn->post_send_buf_count);
+ 		cmd->i_state = ISTATE_SENT_STATUS;
+-		isert_completion_put(&isert_cmd->tx_desc, isert_cmd, ib_dev);
++		isert_completion_put(&isert_cmd->tx_desc, isert_cmd, ib_dev, false);
+ 		break;
+ 	default:
+ 		pr_err("Unknown do_control_comp i_state %d\n", cmd->i_state);
+@@ -1634,7 +1647,7 @@ isert_response_completion(struct iser_tx
+ 	atomic_sub(wr->send_wr_num + 1, &isert_conn->post_send_buf_count);
+ 
+ 	cmd->i_state = ISTATE_SENT_STATUS;
+-	isert_completion_put(tx_desc, isert_cmd, ib_dev);
++	isert_completion_put(tx_desc, isert_cmd, ib_dev, false);
+ }
+ 
+ static void
+@@ -1715,7 +1728,7 @@ isert_cq_drain_comp_llist(struct isert_c
+ 		wr = &t->isert_cmd->rdma_wr;
+ 
+ 		atomic_sub(wr->send_wr_num + 1, &isert_conn->post_send_buf_count);
+-		isert_completion_put(t, t->isert_cmd, ib_dev);
++		isert_completion_put(t, t->isert_cmd, ib_dev, true);
+ 	}
+ }
+ 
+@@ -1734,14 +1747,14 @@ isert_cq_tx_comp_err(struct iser_tx_desc
+ 		wr = &t->isert_cmd->rdma_wr;
+ 
+ 		atomic_sub(wr->send_wr_num + 1, &isert_conn->post_send_buf_count);
+-		isert_completion_put(t, t->isert_cmd, ib_dev);
++		isert_completion_put(t, t->isert_cmd, ib_dev, true);
+ 	}
+ 	tx_desc->comp_llnode_batch = NULL;
+ 
+ 	if (!isert_cmd)
+ 		isert_unmap_tx_desc(tx_desc, ib_dev);
+ 	else
+-		isert_completion_put(tx_desc, isert_cmd, ib_dev);
++		isert_completion_put(tx_desc, isert_cmd, ib_dev, true);
+ }
+ 
+ static void
diff --git a/queue-3.14/iser-target-match-frmr-descriptors-to-available-session-tags.patch b/queue-3.14/iser-target-match-frmr-descriptors-to-available-session-tags.patch
new file mode 100644
index 00000000000..b8b7f6b2002
--- /dev/null
+++ b/queue-3.14/iser-target-match-frmr-descriptors-to-available-session-tags.patch
@@ -0,0 +1,117 @@
+From nab@linux-iscsi.org  Sat May  3 14:15:20 2014
+From: "Nicholas A. Bellinger" <nab@linux-iscsi.org>
+Date: Fri,  2 May 2014 21:26:29 +0000
+Subject: iser-target: Match FRMR descriptors to available session tags
+To: target-devel <target-devel@vger.kernel.org>
+Cc: Greg-KH <gregkh@linuxfoundation.org>, stable <stable@vger.kernel.org>, Nicholas Bellinger <nab@linux-iscsi.org>, Sagi Grimberg <sagig@mellanox.com>, Or Gerlitz <ogerlitz@mellanox.com>
+Message-ID: <1399065990-30552-2-git-send-email-nab@linux-iscsi.org>
+
+From: Nicholas Bellinger <nab@linux-iscsi.org>
+
+commit f46d6a8a01d6bbd83a97140f30a72a89b038807b upstream.
+
+This patch changes isert_conn_create_fastreg_pool() to follow
+logic in iscsi_target_locate_portal() for determining how many
+FRMR descriptors to allocate based upon the number of possible
+per-session command slots that are available.
+
+This addresses an OOPs in isert_reg_rdma() where due to the
+use of ISCSI_DEF_XMIT_CMDS_MAX could end up returning a bogus
+fast_reg_descriptor when the number of active tags exceeded
+the original hardcoded max.
+
+Note this also includes moving isert_conn_create_fastreg_pool()
+from isert_connect_request() to isert_put_login_tx() before
+posting the final Login Response PDU in order to determine the
+se_nacl->queue_depth (eg: number of tags) per session the target
+will be enforcing.
+
+v2 changes:
+  - Move isert_conn->conn_fr_pool list_head init into
+    isert_conn_request()
+v3 changes:
+  - Drop unnecessary list_empty() check in isert_reg_rdma()
+    (Sagi)
+
+Cc: Sagi Grimberg <sagig@mellanox.com>
+Cc: Or Gerlitz <ogerlitz@mellanox.com>
+Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/infiniband/ulp/isert/ib_isert.c |   35 ++++++++++++++++++--------------
+ 1 file changed, 20 insertions(+), 15 deletions(-)
+
+--- a/drivers/infiniband/ulp/isert/ib_isert.c
++++ b/drivers/infiniband/ulp/isert/ib_isert.c
+@@ -436,11 +436,18 @@ isert_conn_create_fastreg_pool(struct is
+ {
+ 	struct fast_reg_descriptor *fr_desc;
+ 	struct isert_device *device = isert_conn->conn_device;
+-	int i, ret;
++	struct se_session *se_sess = isert_conn->conn->sess->se_sess;
++	struct se_node_acl *se_nacl = se_sess->se_node_acl;
++	int i, ret, tag_num;
++	/*
++	 * Setup the number of FRMRs based upon the number of tags
++	 * available to session in iscsi_target_locate_portal().
++	 */
++	tag_num = max_t(u32, ISCSIT_MIN_TAGS, se_nacl->queue_depth);
++	tag_num = (tag_num * 2) + ISCSIT_EXTRA_TAGS;
+ 
+-	INIT_LIST_HEAD(&isert_conn->conn_fr_pool);
+ 	isert_conn->conn_fr_pool_size = 0;
+-	for (i = 0; i < ISCSI_DEF_XMIT_CMDS_MAX; i++) {
++	for (i = 0; i < tag_num; i++) {
+ 		fr_desc = kzalloc(sizeof(*fr_desc), GFP_KERNEL);
+ 		if (!fr_desc) {
+ 			pr_err("Failed to allocate fast_reg descriptor\n");
+@@ -498,6 +505,7 @@ isert_connect_request(struct rdma_cm_id
+ 	kref_get(&isert_conn->conn_kref);
+ 	mutex_init(&isert_conn->conn_mutex);
+ 	spin_lock_init(&isert_conn->conn_lock);
++	INIT_LIST_HEAD(&isert_conn->conn_fr_pool);
+ 
+ 	cma_id->context = isert_conn;
+ 	isert_conn->conn_cm_id = cma_id;
+@@ -569,15 +577,6 @@ isert_connect_request(struct rdma_cm_id
+ 		goto out_mr;
+ 	}
+ 
+-	if (device->use_fastreg) {
+-		ret = isert_conn_create_fastreg_pool(isert_conn);
+-		if (ret) {
+-			pr_err("Conn: %p failed to create fastreg pool\n",
+-			       isert_conn);
+-			goto out_fastreg;
+-		}
+-	}
+-
+ 	ret = isert_conn_setup_qp(isert_conn, cma_id);
+ 	if (ret)
+ 		goto out_conn_dev;
+@@ -591,9 +590,6 @@ isert_connect_request(struct rdma_cm_id
+ 	return 0;
+ 
+ out_conn_dev:
+-	if (device->use_fastreg)
+-		isert_conn_free_fastreg_pool(isert_conn);
+-out_fastreg:
+ 	ib_dereg_mr(isert_conn->conn_mr);
+ out_mr:
+ 	ib_dealloc_pd(isert_conn->conn_pd);
+@@ -967,6 +963,15 @@ isert_put_login_tx(struct iscsi_conn *co
+ 	}
+ 	if (!login->login_failed) {
+ 		if (login->login_complete) {
++			if (isert_conn->conn_device->use_fastreg) {
++				ret = isert_conn_create_fastreg_pool(isert_conn);
++				if (ret) {
++					pr_err("Conn: %p failed to create"
++					       " fastreg pool\n", isert_conn);
++					return ret;
++				}
++			}
++
+ 			ret = isert_alloc_rx_descriptors(isert_conn);
+ 			if (ret)
+ 				return ret;
diff --git a/queue-3.14/mm-hugetlb-fix-softlockup-when-a-large-number-of-hugepages-are-freed.patch b/queue-3.14/mm-hugetlb-fix-softlockup-when-a-large-number-of-hugepages-are-freed.patch
new file mode 100644
index 00000000000..69b3369a33b
--- /dev/null
+++ b/queue-3.14/mm-hugetlb-fix-softlockup-when-a-large-number-of-hugepages-are-freed.patch
@@ -0,0 +1,89 @@
+From 55f67141a8927b2be3e51840da37b8a2320143ed Mon Sep 17 00:00:00 2001
+From: "Mizuma, Masayoshi" <m.mizuma@jp.fujitsu.com>
+Date: Mon, 7 Apr 2014 15:37:54 -0700
+Subject: mm: hugetlb: fix softlockup when a large number of hugepages are freed.
+
+From: "Mizuma, Masayoshi" <m.mizuma@jp.fujitsu.com>
+
+commit 55f67141a8927b2be3e51840da37b8a2320143ed upstream.
+
+When I decrease the value of nr_hugepage in procfs a lot, softlockup
+happens.  It is because there is no chance of context switch during this
+process.
+
+On the other hand, when I allocate a large number of hugepages, there is
+some chance of context switch.  Hence softlockup doesn't happen during
+this process.  So it's necessary to add the context switch in the
+freeing process as same as allocating process to avoid softlockup.
+
+When I freed 12 TB hugapages with kernel-2.6.32-358.el6, the freeing
+process occupied a CPU over 150 seconds and following softlockup message
+appeared twice or more.
+
+$ echo 6000000 > /proc/sys/vm/nr_hugepages
+$ cat /proc/sys/vm/nr_hugepages
+6000000
+$ grep ^Huge /proc/meminfo
+HugePages_Total:   6000000
+HugePages_Free:    6000000
+HugePages_Rsvd:        0
+HugePages_Surp:        0
+Hugepagesize:       2048 kB
+$ echo 0 > /proc/sys/vm/nr_hugepages
+
+BUG: soft lockup - CPU#16 stuck for 67s! [sh:12883] ...
+Pid: 12883, comm: sh Not tainted 2.6.32-358.el6.x86_64 #1
+Call Trace:
+  free_pool_huge_page+0xb8/0xd0
+  set_max_huge_pages+0x128/0x190
+  hugetlb_sysctl_handler_common+0x113/0x140
+  hugetlb_sysctl_handler+0x1e/0x20
+  proc_sys_call_handler+0x97/0xd0
+  proc_sys_write+0x14/0x20
+  vfs_write+0xb8/0x1a0
+  sys_write+0x51/0x90
+  __audit_syscall_exit+0x265/0x290
+  system_call_fastpath+0x16/0x1b
+
+I have not confirmed this problem with upstream kernels because I am not
+able to prepare the machine equipped with 12TB memory now.  However I
+confirmed that the amount of decreasing hugepages was directly
+proportional to the amount of required time.
+
+I measured required times on a smaller machine.  It showed 130-145
+hugepages decreased in a millisecond.
+
+  Amount of decreasing     Required time      Decreasing rate
+  hugepages                     (msec)         (pages/msec)
+  ------------------------------------------------------------
+  10,000 pages == 20GB         70 -  74          135-142
+  30,000 pages == 60GB        208 - 229          131-144
+
+It means decrement of 6TB hugepages will trigger softlockup with the
+default threshold 20sec, in this decreasing rate.
+
+Signed-off-by: Masayoshi Mizuma <m.mizuma@jp.fujitsu.com>
+Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
+Cc: Michal Hocko <mhocko@suse.cz>
+Cc: Wanpeng Li <liwanp@linux.vnet.ibm.com>
+Cc: Aneesh Kumar <aneesh.kumar@linux.vnet.ibm.com>
+Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
+Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/hugetlb.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/mm/hugetlb.c
++++ b/mm/hugetlb.c
+@@ -1509,6 +1509,7 @@ static unsigned long set_max_huge_pages(
+ 	while (min_count < persistent_huge_pages(h)) {
+ 		if (!free_pool_huge_page(h, nodes_allowed, 0))
+ 			break;
++		cond_resched_lock(&hugetlb_lock);
+ 	}
+ 	while (count < persistent_huge_pages(h)) {
+ 		if (!adjust_pool_surplus(h, nodes_allowed, 1))
diff --git a/queue-3.14/mm-page_alloc-spill-to-remote-nodes-before-waking-kswapd.patch b/queue-3.14/mm-page_alloc-spill-to-remote-nodes-before-waking-kswapd.patch
new file mode 100644
index 00000000000..4b724f5aaff
--- /dev/null
+++ b/queue-3.14/mm-page_alloc-spill-to-remote-nodes-before-waking-kswapd.patch
@@ -0,0 +1,227 @@
+From 3a025760fc158b3726eac89ee95d7f29599e9dfa Mon Sep 17 00:00:00 2001
+From: Johannes Weiner <hannes@cmpxchg.org>
+Date: Mon, 7 Apr 2014 15:37:48 -0700
+Subject: mm: page_alloc: spill to remote nodes before waking kswapd
+
+From: Johannes Weiner <hannes@cmpxchg.org>
+
+commit 3a025760fc158b3726eac89ee95d7f29599e9dfa upstream.
+
+On NUMA systems, a node may start thrashing cache or even swap anonymous
+pages while there are still free pages on remote nodes.
+
+This is a result of commits 81c0a2bb515f ("mm: page_alloc: fair zone
+allocator policy") and fff4068cba48 ("mm: page_alloc: revert NUMA aspect
+of fair allocation policy").
+
+Before those changes, the allocator would first try all allowed zones,
+including those on remote nodes, before waking any kswapds.  But now,
+the allocator fastpath doubles as the fairness pass, which in turn can
+only consider the local node to prevent remote spilling based on
+exhausted fairness batches alone.  Remote nodes are only considered in
+the slowpath, after the kswapds are woken up.  But if remote nodes still
+have free memory, kswapd should not be woken to rebalance the local node
+or it may thrash cash or swap prematurely.
+
+Fix this by adding one more unfair pass over the zonelist that is
+allowed to spill to remote nodes after the local fairness pass fails but
+before entering the slowpath and waking the kswapds.
+
+This also gets rid of the GFP_THISNODE exemption from the fairness
+protocol because the unfair pass is no longer tied to kswapd, which
+GFP_THISNODE is not allowed to wake up.
+
+However, because remote spills can be more frequent now - we prefer them
+over local kswapd reclaim - the allocation batches on remote nodes could
+underflow more heavily.  When resetting the batches, use
+atomic_long_read() directly instead of zone_page_state() to calculate the
+delta as the latter filters negative counter values.
+
+Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
+Acked-by: Rik van Riel <riel@redhat.com>
+Acked-by: Mel Gorman <mgorman@suse.de>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/internal.h   |    1 
+ mm/page_alloc.c |   89 ++++++++++++++++++++++++++++----------------------------
+ 2 files changed, 46 insertions(+), 44 deletions(-)
+
+--- a/mm/internal.h
++++ b/mm/internal.h
+@@ -370,5 +370,6 @@ unsigned long reclaim_clean_pages_from_l
+ #define ALLOC_HIGH		0x20 /* __GFP_HIGH set */
+ #define ALLOC_CPUSET		0x40 /* check for correct cpuset */
+ #define ALLOC_CMA		0x80 /* allow allocations from CMA areas */
++#define ALLOC_FAIR		0x100 /* fair zone allocation */
+ 
+ #endif	/* __MM_INTERNAL_H */
+--- a/mm/page_alloc.c
++++ b/mm/page_alloc.c
+@@ -1238,15 +1238,6 @@ void drain_zone_pages(struct zone *zone,
+ 	}
+ 	local_irq_restore(flags);
+ }
+-static bool gfp_thisnode_allocation(gfp_t gfp_mask)
+-{
+-	return (gfp_mask & GFP_THISNODE) == GFP_THISNODE;
+-}
+-#else
+-static bool gfp_thisnode_allocation(gfp_t gfp_mask)
+-{
+-	return false;
+-}
+ #endif
+ 
+ /*
+@@ -1583,12 +1574,7 @@ again:
+ 					  get_pageblock_migratetype(page));
+ 	}
+ 
+-	/*
+-	 * NOTE: GFP_THISNODE allocations do not partake in the kswapd
+-	 * aging protocol, so they can't be fair.
+-	 */
+-	if (!gfp_thisnode_allocation(gfp_flags))
+-		__mod_zone_page_state(zone, NR_ALLOC_BATCH, -(1 << order));
++	__mod_zone_page_state(zone, NR_ALLOC_BATCH, -(1 << order));
+ 
+ 	__count_zone_vm_events(PGALLOC, zone, 1 << order);
+ 	zone_statistics(preferred_zone, zone, gfp_flags);
+@@ -1954,23 +1940,12 @@ zonelist_scan:
+ 		 * zone size to ensure fair page aging.  The zone a
+ 		 * page was allocated in should have no effect on the
+ 		 * time the page has in memory before being reclaimed.
+-		 *
+-		 * Try to stay in local zones in the fastpath.  If
+-		 * that fails, the slowpath is entered, which will do
+-		 * another pass starting with the local zones, but
+-		 * ultimately fall back to remote zones that do not
+-		 * partake in the fairness round-robin cycle of this
+-		 * zonelist.
+-		 *
+-		 * NOTE: GFP_THISNODE allocations do not partake in
+-		 * the kswapd aging protocol, so they can't be fair.
+ 		 */
+-		if ((alloc_flags & ALLOC_WMARK_LOW) &&
+-		    !gfp_thisnode_allocation(gfp_mask)) {
+-			if (zone_page_state(zone, NR_ALLOC_BATCH) <= 0)
+-				continue;
++		if (alloc_flags & ALLOC_FAIR) {
+ 			if (!zone_local(preferred_zone, zone))
+ 				continue;
++			if (zone_page_state(zone, NR_ALLOC_BATCH) <= 0)
++				continue;
+ 		}
+ 		/*
+ 		 * When allocating a page cache page for writing, we
+@@ -2408,32 +2383,40 @@ __alloc_pages_high_priority(gfp_t gfp_ma
+ 	return page;
+ }
+ 
+-static void prepare_slowpath(gfp_t gfp_mask, unsigned int order,
+-			     struct zonelist *zonelist,
+-			     enum zone_type high_zoneidx,
+-			     struct zone *preferred_zone)
++static void reset_alloc_batches(struct zonelist *zonelist,
++				enum zone_type high_zoneidx,
++				struct zone *preferred_zone)
+ {
+ 	struct zoneref *z;
+ 	struct zone *zone;
+ 
+ 	for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
+-		if (!(gfp_mask & __GFP_NO_KSWAPD))
+-			wakeup_kswapd(zone, order, zone_idx(preferred_zone));
+ 		/*
+ 		 * Only reset the batches of zones that were actually
+-		 * considered in the fast path, we don't want to
+-		 * thrash fairness information for zones that are not
++		 * considered in the fairness pass, we don't want to
++		 * trash fairness information for zones that are not
+ 		 * actually part of this zonelist's round-robin cycle.
+ 		 */
+ 		if (!zone_local(preferred_zone, zone))
+ 			continue;
+ 		mod_zone_page_state(zone, NR_ALLOC_BATCH,
+-				    high_wmark_pages(zone) -
+-				    low_wmark_pages(zone) -
+-				    zone_page_state(zone, NR_ALLOC_BATCH));
++			high_wmark_pages(zone) - low_wmark_pages(zone) -
++			atomic_long_read(&zone->vm_stat[NR_ALLOC_BATCH]));
+ 	}
+ }
+ 
++static void wake_all_kswapds(unsigned int order,
++			     struct zonelist *zonelist,
++			     enum zone_type high_zoneidx,
++			     struct zone *preferred_zone)
++{
++	struct zoneref *z;
++	struct zone *zone;
++
++	for_each_zone_zonelist(zone, z, zonelist, high_zoneidx)
++		wakeup_kswapd(zone, order, zone_idx(preferred_zone));
++}
++
+ static inline int
+ gfp_to_alloc_flags(gfp_t gfp_mask)
+ {
+@@ -2522,12 +2505,13 @@ __alloc_pages_slowpath(gfp_t gfp_mask, u
+ 	 * allowed per node queues are empty and that nodes are
+ 	 * over allocated.
+ 	 */
+-	if (gfp_thisnode_allocation(gfp_mask))
++	if (IS_ENABLED(CONFIG_NUMA) &&
++	    (gfp_mask & GFP_THISNODE) == GFP_THISNODE)
+ 		goto nopage;
+ 
+ restart:
+-	prepare_slowpath(gfp_mask, order, zonelist,
+-			 high_zoneidx, preferred_zone);
++	if (!(gfp_mask & __GFP_NO_KSWAPD))
++		wake_all_kswapds(order, zonelist, high_zoneidx, preferred_zone);
+ 
+ 	/*
+ 	 * OK, we're below the kswapd watermark and have kicked background
+@@ -2711,7 +2695,7 @@ __alloc_pages_nodemask(gfp_t gfp_mask, u
+ 	struct page *page = NULL;
+ 	int migratetype = allocflags_to_migratetype(gfp_mask);
+ 	unsigned int cpuset_mems_cookie;
+-	int alloc_flags = ALLOC_WMARK_LOW|ALLOC_CPUSET;
++	int alloc_flags = ALLOC_WMARK_LOW|ALLOC_CPUSET|ALLOC_FAIR;
+ 	struct mem_cgroup *memcg = NULL;
+ 
+ 	gfp_mask &= gfp_allowed_mask;
+@@ -2752,12 +2736,29 @@ retry_cpuset:
+ 	if (allocflags_to_migratetype(gfp_mask) == MIGRATE_MOVABLE)
+ 		alloc_flags |= ALLOC_CMA;
+ #endif
++retry:
+ 	/* First allocation attempt */
+ 	page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, nodemask, order,
+ 			zonelist, high_zoneidx, alloc_flags,
+ 			preferred_zone, migratetype);
+ 	if (unlikely(!page)) {
+ 		/*
++		 * The first pass makes sure allocations are spread
++		 * fairly within the local node.  However, the local
++		 * node might have free pages left after the fairness
++		 * batches are exhausted, and remote zones haven't
++		 * even been considered yet.  Try once more without
++		 * fairness, and include remote zones now, before
++		 * entering the slowpath and waking kswapd: prefer
++		 * spilling to a remote zone over swapping locally.
++		 */
++		if (alloc_flags & ALLOC_FAIR) {
++			reset_alloc_batches(zonelist, high_zoneidx,
++					    preferred_zone);
++			alloc_flags &= ~ALLOC_FAIR;
++			goto retry;
++		}
++		/*
+ 		 * Runtime PM, block IO and its error handling path
+ 		 * can deadlock because I/O on the device might not
+ 		 * complete.
diff --git a/queue-3.14/mm-try_to_unmap_cluster-should-lock_page-before-mlocking.patch b/queue-3.14/mm-try_to_unmap_cluster-should-lock_page-before-mlocking.patch
new file mode 100644
index 00000000000..3ae5ab3ad99
--- /dev/null
+++ b/queue-3.14/mm-try_to_unmap_cluster-should-lock_page-before-mlocking.patch
@@ -0,0 +1,90 @@
+From 57e68e9cd65b4b8eb4045a1e0d0746458502554c Mon Sep 17 00:00:00 2001
+From: Vlastimil Babka <vbabka@suse.cz>
+Date: Mon, 7 Apr 2014 15:37:50 -0700
+Subject: mm: try_to_unmap_cluster() should lock_page() before mlocking
+
+From: Vlastimil Babka <vbabka@suse.cz>
+
+commit 57e68e9cd65b4b8eb4045a1e0d0746458502554c upstream.
+
+A BUG_ON(!PageLocked) was triggered in mlock_vma_page() by Sasha Levin
+fuzzing with trinity.  The call site try_to_unmap_cluster() does not lock
+the pages other than its check_page parameter (which is already locked).
+
+The BUG_ON in mlock_vma_page() is not documented and its purpose is
+somewhat unclear, but apparently it serializes against page migration,
+which could otherwise fail to transfer the PG_mlocked flag.  This would
+not be fatal, as the page would be eventually encountered again, but
+NR_MLOCK accounting would become distorted nevertheless.  This patch adds
+a comment to the BUG_ON in mlock_vma_page() and munlock_vma_page() to that
+effect.
+
+The call site try_to_unmap_cluster() is fixed so that for page !=
+check_page, trylock_page() is attempted (to avoid possible deadlocks as we
+already have check_page locked) and mlock_vma_page() is performed only
+upon success.  If the page lock cannot be obtained, the page is left
+without PG_mlocked, which is again not a problem in the whole unevictable
+memory design.
+
+Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
+Signed-off-by: Bob Liu <bob.liu@oracle.com>
+Reported-by: Sasha Levin <sasha.levin@oracle.com>
+Cc: Wanpeng Li <liwanp@linux.vnet.ibm.com>
+Cc: Michel Lespinasse <walken@google.com>
+Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
+Acked-by: Rik van Riel <riel@redhat.com>
+Cc: David Rientjes <rientjes@google.com>
+Cc: Mel Gorman <mgorman@suse.de>
+Cc: Hugh Dickins <hughd@google.com>
+Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/mlock.c |    2 ++
+ mm/rmap.c  |   14 ++++++++++++--
+ 2 files changed, 14 insertions(+), 2 deletions(-)
+
+--- a/mm/mlock.c
++++ b/mm/mlock.c
+@@ -79,6 +79,7 @@ void clear_page_mlock(struct page *page)
+  */
+ void mlock_vma_page(struct page *page)
+ {
++	/* Serialize with page migration */
+ 	BUG_ON(!PageLocked(page));
+ 
+ 	if (!TestSetPageMlocked(page)) {
+@@ -174,6 +175,7 @@ unsigned int munlock_vma_page(struct pag
+ 	unsigned int nr_pages;
+ 	struct zone *zone = page_zone(page);
+ 
++	/* For try_to_munlock() and to serialize with page migration */
+ 	BUG_ON(!PageLocked(page));
+ 
+ 	/*
+--- a/mm/rmap.c
++++ b/mm/rmap.c
+@@ -1322,9 +1322,19 @@ static int try_to_unmap_cluster(unsigned
+ 		BUG_ON(!page || PageAnon(page));
+ 
+ 		if (locked_vma) {
+-			mlock_vma_page(page);   /* no-op if already mlocked */
+-			if (page == check_page)
++			if (page == check_page) {
++				/* we know we have check_page locked */
++				mlock_vma_page(page);
+ 				ret = SWAP_MLOCK;
++			} else if (trylock_page(page)) {
++				/*
++				 * If we can lock the page, perform mlock.
++				 * Otherwise leave the page alone, it will be
++				 * eventually encountered again later.
++				 */
++				mlock_vma_page(page);
++				unlock_page(page);
++			}
+ 			continue;	/* don't unmap */
+ 		}
+ 
diff --git a/queue-3.14/mm-vmscan-do-not-swap-anon-pages-just-because-free-file-is-low.patch b/queue-3.14/mm-vmscan-do-not-swap-anon-pages-just-because-free-file-is-low.patch
new file mode 100644
index 00000000000..24c63b34dac
--- /dev/null
+++ b/queue-3.14/mm-vmscan-do-not-swap-anon-pages-just-because-free-file-is-low.patch
@@ -0,0 +1,73 @@
+From 0bf1457f0cfca7bc026a82323ad34bcf58ad035d Mon Sep 17 00:00:00 2001
+From: Johannes Weiner <hannes@cmpxchg.org>
+Date: Tue, 8 Apr 2014 16:04:10 -0700
+Subject: mm: vmscan: do not swap anon pages just because free+file is low
+
+From: Johannes Weiner <hannes@cmpxchg.org>
+
+commit 0bf1457f0cfca7bc026a82323ad34bcf58ad035d upstream.
+
+Page reclaim force-scans / swaps anonymous pages when file cache drops
+below the high watermark of a zone in order to prevent what little cache
+remains from thrashing.
+
+However, on bigger machines the high watermark value can be quite large
+and when the workload is dominated by a static anonymous/shmem set, the
+file set might just be a small window of used-once cache.  In such
+situations, the VM starts swapping heavily when instead it should be
+recycling the no longer used cache.
+
+This is a longer-standing problem, but it's more likely to trigger after
+commit 81c0a2bb515f ("mm: page_alloc: fair zone allocator policy")
+because file pages can no longer accumulate in a single zone and are
+dispersed into smaller fractions among the available zones.
+
+To resolve this, do not force scan anon when file pages are low but
+instead rely on the scan/rotation ratios to make the right prediction.
+
+Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
+Acked-by: Rafael Aquini <aquini@redhat.com>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Mel Gorman <mgorman@suse.de>
+Cc: Hugh Dickins <hughd@google.com>
+Cc: Suleiman Souhlal <suleiman@google.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/vmscan.c |   16 +---------------
+ 1 file changed, 1 insertion(+), 15 deletions(-)
+
+--- a/mm/vmscan.c
++++ b/mm/vmscan.c
+@@ -1848,7 +1848,7 @@ static void get_scan_count(struct lruvec
+ 	struct zone *zone = lruvec_zone(lruvec);
+ 	unsigned long anon_prio, file_prio;
+ 	enum scan_balance scan_balance;
+-	unsigned long anon, file, free;
++	unsigned long anon, file;
+ 	bool force_scan = false;
+ 	unsigned long ap, fp;
+ 	enum lru_list lru;
+@@ -1902,20 +1902,6 @@ static void get_scan_count(struct lruvec
+ 		get_lru_size(lruvec, LRU_INACTIVE_FILE);
+ 
+ 	/*
+-	 * If it's foreseeable that reclaiming the file cache won't be
+-	 * enough to get the zone back into a desirable shape, we have
+-	 * to swap.  Better start now and leave the - probably heavily
+-	 * thrashing - remaining file pages alone.
+-	 */
+-	if (global_reclaim(sc)) {
+-		free = zone_page_state(zone, NR_FREE_PAGES);
+-		if (unlikely(file + free <= high_wmark_pages(zone))) {
+-			scan_balance = SCAN_ANON;
+-			goto out;
+-		}
+-	}
+-
+-	/*
+ 	 * There is enough inactive page cache, do not reclaim
+ 	 * anything from the anonymous working set right now.
+ 	 */
diff --git a/queue-3.14/series b/queue-3.14/series
index 3342233b4ba..2a9ecc0c009 100644
--- a/queue-3.14/series
+++ b/queue-3.14/series
@@ -130,3 +130,11 @@ usb-unbind-all-interfaces-before-rebinding-any.patch
 mtip32xx-set-queue-bounce-limit.patch
 mtip32xx-unmap-the-dma-segments-before-completing-the-io-request.patch
 mtip32xx-mtip_async_complete-bug-fixes.patch
+iser-target-match-frmr-descriptors-to-available-session-tags.patch
+iser-target-add-missing-se_cmd-put-for-write_pending-in-tx_comp_err.patch
+sh-fix-format-string-bug-in-stack-tracer.patch
+mm-page_alloc-spill-to-remote-nodes-before-waking-kswapd.patch
+mm-try_to_unmap_cluster-should-lock_page-before-mlocking.patch
+mm-hugetlb-fix-softlockup-when-a-large-number-of-hugepages-are-freed.patch
+mm-vmscan-do-not-swap-anon-pages-just-because-free-file-is-low.patch
+hung_task-check-the-value-of-sysctl_hung_task_timeout_sec.patch
diff --git a/queue-3.14/sh-fix-format-string-bug-in-stack-tracer.patch b/queue-3.14/sh-fix-format-string-bug-in-stack-tracer.patch
new file mode 100644
index 00000000000..142ff0b7463
--- /dev/null
+++ b/queue-3.14/sh-fix-format-string-bug-in-stack-tracer.patch
@@ -0,0 +1,40 @@
+From a0c32761e73c9999cbf592b702f284221fea8040 Mon Sep 17 00:00:00 2001
+From: Matt Fleming <matt.fleming@intel.com>
+Date: Thu, 3 Apr 2014 14:46:20 -0700
+Subject: sh: fix format string bug in stack tracer
+
+From: Matt Fleming <matt.fleming@intel.com>
+
+commit a0c32761e73c9999cbf592b702f284221fea8040 upstream.
+
+Kees reported the following error:
+
+   arch/sh/kernel/dumpstack.c: In function 'print_trace_address':
+   arch/sh/kernel/dumpstack.c:118:2: error: format not a string literal and no format arguments [-Werror=format-security]
+
+Use the "%s" format so that it's impossible to interpret 'data' as a
+format string.
+
+Signed-off-by: Matt Fleming <matt.fleming@intel.com>
+Reported-by: Kees Cook <keescook@chromium.org>
+Acked-by: Kees Cook <keescook@chromium.org>
+Cc: Paul Mundt <lethal@linux-sh.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/sh/kernel/dumpstack.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/sh/kernel/dumpstack.c
++++ b/arch/sh/kernel/dumpstack.c
+@@ -115,7 +115,7 @@ static int print_trace_stack(void *data,
+  */
+ static void print_trace_address(void *data, unsigned long addr, int reliable)
+ {
+-	printk(data);
++	printk("%s", (char *)data);
+ 	printk_address(addr, reliable);
+ }
+ 
-- 
2.47.3