--- /dev/null
+From 80df28476505ed4e6701c3448c63c9229a50c655 Mon Sep 17 00:00:00 2001
+From: Liu Hua <sdu.liu@huawei.com>
+Date: Mon, 7 Apr 2014 15:38:57 -0700
+Subject: hung_task: check the value of "sysctl_hung_task_timeout_sec"
+
+From: Liu Hua <sdu.liu@huawei.com>
+
+commit 80df28476505ed4e6701c3448c63c9229a50c655 upstream.
+
+As sysctl_hung_task_timeout_sec is unsigned long, when this value is
+larger then LONG_MAX/HZ, the function schedule_timeout_interruptible in
+watchdog will return immediately without sleep and with print :
+
+ schedule_timeout: wrong timeout value ffffffffffffff83
+
+and then the funtion watchdog will call schedule_timeout_interruptible
+again and again. The screen will be filled with
+
+ "schedule_timeout: wrong timeout value ffffffffffffff83"
+
+This patch does some check and correction in sysctl, to let the function
+schedule_timeout_interruptible allways get the valid parameter.
+
+Signed-off-by: Liu Hua <sdu.liu@huawei.com>
+Tested-by: Satoru Takeuchi <satoru.takeuchi@gmail.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ Documentation/sysctl/kernel.txt | 1 +
+ kernel/sysctl.c | 6 ++++++
+ 2 files changed, 7 insertions(+)
+
+--- a/Documentation/sysctl/kernel.txt
++++ b/Documentation/sysctl/kernel.txt
+@@ -317,6 +317,7 @@ for more than this value report a warnin
+ This file shows up if CONFIG_DETECT_HUNG_TASK is enabled.
+
+ 0: means infinite timeout - no checking done.
++Possible values to set are in range {0..LONG_MAX/HZ}.
+
+ ==============================================================
+
+--- a/kernel/sysctl.c
++++ b/kernel/sysctl.c
+@@ -144,6 +144,11 @@ static int min_percpu_pagelist_fract = 8
+ static int ngroups_max = NGROUPS_MAX;
+ static const int cap_last_cap = CAP_LAST_CAP;
+
++/*this is needed for proc_doulongvec_minmax of sysctl_hung_task_timeout_secs */
++#ifdef CONFIG_DETECT_HUNG_TASK
++static unsigned long hung_task_timeout_max = (LONG_MAX/HZ);
++#endif
++
+ #ifdef CONFIG_INOTIFY_USER
+ #include <linux/inotify.h>
+ #endif
+@@ -995,6 +1000,7 @@ static struct ctl_table kern_table[] = {
+ .maxlen = sizeof(unsigned long),
+ .mode = 0644,
+ .proc_handler = proc_dohung_task_timeout_secs,
++ .extra2 = &hung_task_timeout_max,
+ },
+ {
+ .procname = "hung_task_warnings",
--- /dev/null
+From nab@linux-iscsi.org Sat May 3 14:15:37 2014
+From: "Nicholas A. Bellinger" <nab@linux-iscsi.org>
+Date: Fri, 2 May 2014 21:26:30 +0000
+Subject: [PATCH-v3.14.y 2/2] iser-target: Add missing se_cmd put for WRITE_PENDING in tx_comp_err
+To: target-devel <target-devel@vger.kernel.org>
+Cc: Greg-KH <gregkh@linuxfoundation.org>, stable <stable@vger.kernel.org>, Nicholas Bellinger <nab@linux-iscsi.org>, Or Gerlitz <ogerlitz@mellanox.com>
+Message-ID: <1399065990-30552-3-git-send-email-nab@linux-iscsi.org>
+
+
+From: Nicholas Bellinger <nab@linux-iscsi.org>
+
+commit 03e7848a64ed535a30f5d7fc6dede2d5a6a2534b upstream.
+
+This patch fixes a bug where outstanding RDMA_READs with WRITE_PENDING
+status require an extra target_put_sess_cmd() in isert_put_cmd() code
+when called from isert_cq_tx_comp_err() + isert_cq_drain_comp_llist()
+context during session shutdown.
+
+The extra kref PUT is required so that transport_generic_free_cmd()
+invokes the last target_put_sess_cmd() -> target_release_cmd_kref(),
+which will complete(&se_cmd->cmd_wait_comp) the outstanding se_cmd
+descriptor with WRITE_PENDING status, and awake the completion in
+target_wait_for_sess_cmds() to invoke TFO->release_cmd().
+
+The bug was manifesting itself in target_wait_for_sess_cmds() where
+a se_cmd descriptor with WRITE_PENDING status would end up sleeping
+indefinately.
+
+Acked-by: Sagi Grimberg <sagig@mellanox.com>
+Cc: Or Gerlitz <ogerlitz@mellanox.com>
+Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/infiniband/ulp/isert/ib_isert.c | 35 +++++++++++++++++++++-----------
+ 1 file changed, 24 insertions(+), 11 deletions(-)
+
+--- a/drivers/infiniband/ulp/isert/ib_isert.c
++++ b/drivers/infiniband/ulp/isert/ib_isert.c
+@@ -1456,7 +1456,7 @@ isert_unreg_rdma(struct isert_cmd *isert
+ }
+
+ static void
+-isert_put_cmd(struct isert_cmd *isert_cmd)
++isert_put_cmd(struct isert_cmd *isert_cmd, bool comp_err)
+ {
+ struct iscsi_cmd *cmd = isert_cmd->iscsi_cmd;
+ struct isert_conn *isert_conn = isert_cmd->conn;
+@@ -1472,8 +1472,21 @@ isert_put_cmd(struct isert_cmd *isert_cm
+ list_del_init(&cmd->i_conn_node);
+ spin_unlock_bh(&conn->cmd_lock);
+
+- if (cmd->data_direction == DMA_TO_DEVICE)
++ if (cmd->data_direction == DMA_TO_DEVICE) {
+ iscsit_stop_dataout_timer(cmd);
++ /*
++ * Check for special case during comp_err where
++ * WRITE_PENDING has been handed off from core,
++ * but requires an extra target_put_sess_cmd()
++ * before transport_generic_free_cmd() below.
++ */
++ if (comp_err &&
++ cmd->se_cmd.t_state == TRANSPORT_WRITE_PENDING) {
++ struct se_cmd *se_cmd = &cmd->se_cmd;
++
++ target_put_sess_cmd(se_cmd->se_sess, se_cmd);
++ }
++ }
+
+ device->unreg_rdma_mem(isert_cmd, isert_conn);
+ transport_generic_free_cmd(&cmd->se_cmd, 0);
+@@ -1528,7 +1541,7 @@ isert_unmap_tx_desc(struct iser_tx_desc
+
+ static void
+ isert_completion_put(struct iser_tx_desc *tx_desc, struct isert_cmd *isert_cmd,
+- struct ib_device *ib_dev)
++ struct ib_device *ib_dev, bool comp_err)
+ {
+ if (isert_cmd->pdu_buf_dma != 0) {
+ pr_debug("Calling ib_dma_unmap_single for isert_cmd->pdu_buf_dma\n");
+@@ -1538,7 +1551,7 @@ isert_completion_put(struct iser_tx_desc
+ }
+
+ isert_unmap_tx_desc(tx_desc, ib_dev);
+- isert_put_cmd(isert_cmd);
++ isert_put_cmd(isert_cmd, comp_err);
+ }
+
+ static void
+@@ -1582,14 +1595,14 @@ isert_do_control_comp(struct work_struct
+ iscsit_tmr_post_handler(cmd, cmd->conn);
+
+ cmd->i_state = ISTATE_SENT_STATUS;
+- isert_completion_put(&isert_cmd->tx_desc, isert_cmd, ib_dev);
++ isert_completion_put(&isert_cmd->tx_desc, isert_cmd, ib_dev, false);
+ break;
+ case ISTATE_SEND_REJECT:
+ pr_debug("Got isert_do_control_comp ISTATE_SEND_REJECT: >>>\n");
+ atomic_dec(&isert_conn->post_send_buf_count);
+
+ cmd->i_state = ISTATE_SENT_STATUS;
+- isert_completion_put(&isert_cmd->tx_desc, isert_cmd, ib_dev);
++ isert_completion_put(&isert_cmd->tx_desc, isert_cmd, ib_dev, false);
+ break;
+ case ISTATE_SEND_LOGOUTRSP:
+ pr_debug("Calling iscsit_logout_post_handler >>>>>>>>>>>>>>\n");
+@@ -1603,7 +1616,7 @@ isert_do_control_comp(struct work_struct
+ case ISTATE_SEND_TEXTRSP:
+ atomic_dec(&isert_conn->post_send_buf_count);
+ cmd->i_state = ISTATE_SENT_STATUS;
+- isert_completion_put(&isert_cmd->tx_desc, isert_cmd, ib_dev);
++ isert_completion_put(&isert_cmd->tx_desc, isert_cmd, ib_dev, false);
+ break;
+ default:
+ pr_err("Unknown do_control_comp i_state %d\n", cmd->i_state);
+@@ -1634,7 +1647,7 @@ isert_response_completion(struct iser_tx
+ atomic_sub(wr->send_wr_num + 1, &isert_conn->post_send_buf_count);
+
+ cmd->i_state = ISTATE_SENT_STATUS;
+- isert_completion_put(tx_desc, isert_cmd, ib_dev);
++ isert_completion_put(tx_desc, isert_cmd, ib_dev, false);
+ }
+
+ static void
+@@ -1715,7 +1728,7 @@ isert_cq_drain_comp_llist(struct isert_c
+ wr = &t->isert_cmd->rdma_wr;
+
+ atomic_sub(wr->send_wr_num + 1, &isert_conn->post_send_buf_count);
+- isert_completion_put(t, t->isert_cmd, ib_dev);
++ isert_completion_put(t, t->isert_cmd, ib_dev, true);
+ }
+ }
+
+@@ -1734,14 +1747,14 @@ isert_cq_tx_comp_err(struct iser_tx_desc
+ wr = &t->isert_cmd->rdma_wr;
+
+ atomic_sub(wr->send_wr_num + 1, &isert_conn->post_send_buf_count);
+- isert_completion_put(t, t->isert_cmd, ib_dev);
++ isert_completion_put(t, t->isert_cmd, ib_dev, true);
+ }
+ tx_desc->comp_llnode_batch = NULL;
+
+ if (!isert_cmd)
+ isert_unmap_tx_desc(tx_desc, ib_dev);
+ else
+- isert_completion_put(tx_desc, isert_cmd, ib_dev);
++ isert_completion_put(tx_desc, isert_cmd, ib_dev, true);
+ }
+
+ static void
--- /dev/null
+From nab@linux-iscsi.org Sat May 3 14:15:20 2014
+From: "Nicholas A. Bellinger" <nab@linux-iscsi.org>
+Date: Fri, 2 May 2014 21:26:29 +0000
+Subject: iser-target: Match FRMR descriptors to available session tags
+To: target-devel <target-devel@vger.kernel.org>
+Cc: Greg-KH <gregkh@linuxfoundation.org>, stable <stable@vger.kernel.org>, Nicholas Bellinger <nab@linux-iscsi.org>, Sagi Grimberg <sagig@mellanox.com>, Or Gerlitz <ogerlitz@mellanox.com>
+Message-ID: <1399065990-30552-2-git-send-email-nab@linux-iscsi.org>
+
+From: Nicholas Bellinger <nab@linux-iscsi.org>
+
+commit f46d6a8a01d6bbd83a97140f30a72a89b038807b upstream.
+
+This patch changes isert_conn_create_fastreg_pool() to follow
+logic in iscsi_target_locate_portal() for determining how many
+FRMR descriptors to allocate based upon the number of possible
+per-session command slots that are available.
+
+This addresses an OOPs in isert_reg_rdma() where due to the
+use of ISCSI_DEF_XMIT_CMDS_MAX could end up returning a bogus
+fast_reg_descriptor when the number of active tags exceeded
+the original hardcoded max.
+
+Note this also includes moving isert_conn_create_fastreg_pool()
+from isert_connect_request() to isert_put_login_tx() before
+posting the final Login Response PDU in order to determine the
+se_nacl->queue_depth (eg: number of tags) per session the target
+will be enforcing.
+
+v2 changes:
+ - Move isert_conn->conn_fr_pool list_head init into
+ isert_conn_request()
+v3 changes:
+ - Drop unnecessary list_empty() check in isert_reg_rdma()
+ (Sagi)
+
+Cc: Sagi Grimberg <sagig@mellanox.com>
+Cc: Or Gerlitz <ogerlitz@mellanox.com>
+Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/infiniband/ulp/isert/ib_isert.c | 35 ++++++++++++++++++--------------
+ 1 file changed, 20 insertions(+), 15 deletions(-)
+
+--- a/drivers/infiniband/ulp/isert/ib_isert.c
++++ b/drivers/infiniband/ulp/isert/ib_isert.c
+@@ -436,11 +436,18 @@ isert_conn_create_fastreg_pool(struct is
+ {
+ struct fast_reg_descriptor *fr_desc;
+ struct isert_device *device = isert_conn->conn_device;
+- int i, ret;
++ struct se_session *se_sess = isert_conn->conn->sess->se_sess;
++ struct se_node_acl *se_nacl = se_sess->se_node_acl;
++ int i, ret, tag_num;
++ /*
++ * Setup the number of FRMRs based upon the number of tags
++ * available to session in iscsi_target_locate_portal().
++ */
++ tag_num = max_t(u32, ISCSIT_MIN_TAGS, se_nacl->queue_depth);
++ tag_num = (tag_num * 2) + ISCSIT_EXTRA_TAGS;
+
+- INIT_LIST_HEAD(&isert_conn->conn_fr_pool);
+ isert_conn->conn_fr_pool_size = 0;
+- for (i = 0; i < ISCSI_DEF_XMIT_CMDS_MAX; i++) {
++ for (i = 0; i < tag_num; i++) {
+ fr_desc = kzalloc(sizeof(*fr_desc), GFP_KERNEL);
+ if (!fr_desc) {
+ pr_err("Failed to allocate fast_reg descriptor\n");
+@@ -498,6 +505,7 @@ isert_connect_request(struct rdma_cm_id
+ kref_get(&isert_conn->conn_kref);
+ mutex_init(&isert_conn->conn_mutex);
+ spin_lock_init(&isert_conn->conn_lock);
++ INIT_LIST_HEAD(&isert_conn->conn_fr_pool);
+
+ cma_id->context = isert_conn;
+ isert_conn->conn_cm_id = cma_id;
+@@ -569,15 +577,6 @@ isert_connect_request(struct rdma_cm_id
+ goto out_mr;
+ }
+
+- if (device->use_fastreg) {
+- ret = isert_conn_create_fastreg_pool(isert_conn);
+- if (ret) {
+- pr_err("Conn: %p failed to create fastreg pool\n",
+- isert_conn);
+- goto out_fastreg;
+- }
+- }
+-
+ ret = isert_conn_setup_qp(isert_conn, cma_id);
+ if (ret)
+ goto out_conn_dev;
+@@ -591,9 +590,6 @@ isert_connect_request(struct rdma_cm_id
+ return 0;
+
+ out_conn_dev:
+- if (device->use_fastreg)
+- isert_conn_free_fastreg_pool(isert_conn);
+-out_fastreg:
+ ib_dereg_mr(isert_conn->conn_mr);
+ out_mr:
+ ib_dealloc_pd(isert_conn->conn_pd);
+@@ -967,6 +963,15 @@ isert_put_login_tx(struct iscsi_conn *co
+ }
+ if (!login->login_failed) {
+ if (login->login_complete) {
++ if (isert_conn->conn_device->use_fastreg) {
++ ret = isert_conn_create_fastreg_pool(isert_conn);
++ if (ret) {
++ pr_err("Conn: %p failed to create"
++ " fastreg pool\n", isert_conn);
++ return ret;
++ }
++ }
++
+ ret = isert_alloc_rx_descriptors(isert_conn);
+ if (ret)
+ return ret;
--- /dev/null
+From 55f67141a8927b2be3e51840da37b8a2320143ed Mon Sep 17 00:00:00 2001
+From: "Mizuma, Masayoshi" <m.mizuma@jp.fujitsu.com>
+Date: Mon, 7 Apr 2014 15:37:54 -0700
+Subject: mm: hugetlb: fix softlockup when a large number of hugepages are freed.
+
+From: "Mizuma, Masayoshi" <m.mizuma@jp.fujitsu.com>
+
+commit 55f67141a8927b2be3e51840da37b8a2320143ed upstream.
+
+When I decrease the value of nr_hugepage in procfs a lot, softlockup
+happens. It is because there is no chance of context switch during this
+process.
+
+On the other hand, when I allocate a large number of hugepages, there is
+some chance of context switch. Hence softlockup doesn't happen during
+this process. So it's necessary to add the context switch in the
+freeing process as same as allocating process to avoid softlockup.
+
+When I freed 12 TB hugapages with kernel-2.6.32-358.el6, the freeing
+process occupied a CPU over 150 seconds and following softlockup message
+appeared twice or more.
+
+$ echo 6000000 > /proc/sys/vm/nr_hugepages
+$ cat /proc/sys/vm/nr_hugepages
+6000000
+$ grep ^Huge /proc/meminfo
+HugePages_Total: 6000000
+HugePages_Free: 6000000
+HugePages_Rsvd: 0
+HugePages_Surp: 0
+Hugepagesize: 2048 kB
+$ echo 0 > /proc/sys/vm/nr_hugepages
+
+BUG: soft lockup - CPU#16 stuck for 67s! [sh:12883] ...
+Pid: 12883, comm: sh Not tainted 2.6.32-358.el6.x86_64 #1
+Call Trace:
+ free_pool_huge_page+0xb8/0xd0
+ set_max_huge_pages+0x128/0x190
+ hugetlb_sysctl_handler_common+0x113/0x140
+ hugetlb_sysctl_handler+0x1e/0x20
+ proc_sys_call_handler+0x97/0xd0
+ proc_sys_write+0x14/0x20
+ vfs_write+0xb8/0x1a0
+ sys_write+0x51/0x90
+ __audit_syscall_exit+0x265/0x290
+ system_call_fastpath+0x16/0x1b
+
+I have not confirmed this problem with upstream kernels because I am not
+able to prepare the machine equipped with 12TB memory now. However I
+confirmed that the amount of decreasing hugepages was directly
+proportional to the amount of required time.
+
+I measured required times on a smaller machine. It showed 130-145
+hugepages decreased in a millisecond.
+
+ Amount of decreasing Required time Decreasing rate
+ hugepages (msec) (pages/msec)
+ ------------------------------------------------------------
+ 10,000 pages == 20GB 70 - 74 135-142
+ 30,000 pages == 60GB 208 - 229 131-144
+
+It means decrement of 6TB hugepages will trigger softlockup with the
+default threshold 20sec, in this decreasing rate.
+
+Signed-off-by: Masayoshi Mizuma <m.mizuma@jp.fujitsu.com>
+Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
+Cc: Michal Hocko <mhocko@suse.cz>
+Cc: Wanpeng Li <liwanp@linux.vnet.ibm.com>
+Cc: Aneesh Kumar <aneesh.kumar@linux.vnet.ibm.com>
+Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
+Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/hugetlb.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/mm/hugetlb.c
++++ b/mm/hugetlb.c
+@@ -1509,6 +1509,7 @@ static unsigned long set_max_huge_pages(
+ while (min_count < persistent_huge_pages(h)) {
+ if (!free_pool_huge_page(h, nodes_allowed, 0))
+ break;
++ cond_resched_lock(&hugetlb_lock);
+ }
+ while (count < persistent_huge_pages(h)) {
+ if (!adjust_pool_surplus(h, nodes_allowed, 1))
--- /dev/null
+From 3a025760fc158b3726eac89ee95d7f29599e9dfa Mon Sep 17 00:00:00 2001
+From: Johannes Weiner <hannes@cmpxchg.org>
+Date: Mon, 7 Apr 2014 15:37:48 -0700
+Subject: mm: page_alloc: spill to remote nodes before waking kswapd
+
+From: Johannes Weiner <hannes@cmpxchg.org>
+
+commit 3a025760fc158b3726eac89ee95d7f29599e9dfa upstream.
+
+On NUMA systems, a node may start thrashing cache or even swap anonymous
+pages while there are still free pages on remote nodes.
+
+This is a result of commits 81c0a2bb515f ("mm: page_alloc: fair zone
+allocator policy") and fff4068cba48 ("mm: page_alloc: revert NUMA aspect
+of fair allocation policy").
+
+Before those changes, the allocator would first try all allowed zones,
+including those on remote nodes, before waking any kswapds. But now,
+the allocator fastpath doubles as the fairness pass, which in turn can
+only consider the local node to prevent remote spilling based on
+exhausted fairness batches alone. Remote nodes are only considered in
+the slowpath, after the kswapds are woken up. But if remote nodes still
+have free memory, kswapd should not be woken to rebalance the local node
+or it may thrash cash or swap prematurely.
+
+Fix this by adding one more unfair pass over the zonelist that is
+allowed to spill to remote nodes after the local fairness pass fails but
+before entering the slowpath and waking the kswapds.
+
+This also gets rid of the GFP_THISNODE exemption from the fairness
+protocol because the unfair pass is no longer tied to kswapd, which
+GFP_THISNODE is not allowed to wake up.
+
+However, because remote spills can be more frequent now - we prefer them
+over local kswapd reclaim - the allocation batches on remote nodes could
+underflow more heavily. When resetting the batches, use
+atomic_long_read() directly instead of zone_page_state() to calculate the
+delta as the latter filters negative counter values.
+
+Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
+Acked-by: Rik van Riel <riel@redhat.com>
+Acked-by: Mel Gorman <mgorman@suse.de>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/internal.h | 1
+ mm/page_alloc.c | 89 ++++++++++++++++++++++++++++----------------------------
+ 2 files changed, 46 insertions(+), 44 deletions(-)
+
+--- a/mm/internal.h
++++ b/mm/internal.h
+@@ -370,5 +370,6 @@ unsigned long reclaim_clean_pages_from_l
+ #define ALLOC_HIGH 0x20 /* __GFP_HIGH set */
+ #define ALLOC_CPUSET 0x40 /* check for correct cpuset */
+ #define ALLOC_CMA 0x80 /* allow allocations from CMA areas */
++#define ALLOC_FAIR 0x100 /* fair zone allocation */
+
+ #endif /* __MM_INTERNAL_H */
+--- a/mm/page_alloc.c
++++ b/mm/page_alloc.c
+@@ -1238,15 +1238,6 @@ void drain_zone_pages(struct zone *zone,
+ }
+ local_irq_restore(flags);
+ }
+-static bool gfp_thisnode_allocation(gfp_t gfp_mask)
+-{
+- return (gfp_mask & GFP_THISNODE) == GFP_THISNODE;
+-}
+-#else
+-static bool gfp_thisnode_allocation(gfp_t gfp_mask)
+-{
+- return false;
+-}
+ #endif
+
+ /*
+@@ -1583,12 +1574,7 @@ again:
+ get_pageblock_migratetype(page));
+ }
+
+- /*
+- * NOTE: GFP_THISNODE allocations do not partake in the kswapd
+- * aging protocol, so they can't be fair.
+- */
+- if (!gfp_thisnode_allocation(gfp_flags))
+- __mod_zone_page_state(zone, NR_ALLOC_BATCH, -(1 << order));
++ __mod_zone_page_state(zone, NR_ALLOC_BATCH, -(1 << order));
+
+ __count_zone_vm_events(PGALLOC, zone, 1 << order);
+ zone_statistics(preferred_zone, zone, gfp_flags);
+@@ -1954,23 +1940,12 @@ zonelist_scan:
+ * zone size to ensure fair page aging. The zone a
+ * page was allocated in should have no effect on the
+ * time the page has in memory before being reclaimed.
+- *
+- * Try to stay in local zones in the fastpath. If
+- * that fails, the slowpath is entered, which will do
+- * another pass starting with the local zones, but
+- * ultimately fall back to remote zones that do not
+- * partake in the fairness round-robin cycle of this
+- * zonelist.
+- *
+- * NOTE: GFP_THISNODE allocations do not partake in
+- * the kswapd aging protocol, so they can't be fair.
+ */
+- if ((alloc_flags & ALLOC_WMARK_LOW) &&
+- !gfp_thisnode_allocation(gfp_mask)) {
+- if (zone_page_state(zone, NR_ALLOC_BATCH) <= 0)
+- continue;
++ if (alloc_flags & ALLOC_FAIR) {
+ if (!zone_local(preferred_zone, zone))
+ continue;
++ if (zone_page_state(zone, NR_ALLOC_BATCH) <= 0)
++ continue;
+ }
+ /*
+ * When allocating a page cache page for writing, we
+@@ -2408,32 +2383,40 @@ __alloc_pages_high_priority(gfp_t gfp_ma
+ return page;
+ }
+
+-static void prepare_slowpath(gfp_t gfp_mask, unsigned int order,
+- struct zonelist *zonelist,
+- enum zone_type high_zoneidx,
+- struct zone *preferred_zone)
++static void reset_alloc_batches(struct zonelist *zonelist,
++ enum zone_type high_zoneidx,
++ struct zone *preferred_zone)
+ {
+ struct zoneref *z;
+ struct zone *zone;
+
+ for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
+- if (!(gfp_mask & __GFP_NO_KSWAPD))
+- wakeup_kswapd(zone, order, zone_idx(preferred_zone));
+ /*
+ * Only reset the batches of zones that were actually
+- * considered in the fast path, we don't want to
+- * thrash fairness information for zones that are not
++ * considered in the fairness pass, we don't want to
++ * trash fairness information for zones that are not
+ * actually part of this zonelist's round-robin cycle.
+ */
+ if (!zone_local(preferred_zone, zone))
+ continue;
+ mod_zone_page_state(zone, NR_ALLOC_BATCH,
+- high_wmark_pages(zone) -
+- low_wmark_pages(zone) -
+- zone_page_state(zone, NR_ALLOC_BATCH));
++ high_wmark_pages(zone) - low_wmark_pages(zone) -
++ atomic_long_read(&zone->vm_stat[NR_ALLOC_BATCH]));
+ }
+ }
+
++static void wake_all_kswapds(unsigned int order,
++ struct zonelist *zonelist,
++ enum zone_type high_zoneidx,
++ struct zone *preferred_zone)
++{
++ struct zoneref *z;
++ struct zone *zone;
++
++ for_each_zone_zonelist(zone, z, zonelist, high_zoneidx)
++ wakeup_kswapd(zone, order, zone_idx(preferred_zone));
++}
++
+ static inline int
+ gfp_to_alloc_flags(gfp_t gfp_mask)
+ {
+@@ -2522,12 +2505,13 @@ __alloc_pages_slowpath(gfp_t gfp_mask, u
+ * allowed per node queues are empty and that nodes are
+ * over allocated.
+ */
+- if (gfp_thisnode_allocation(gfp_mask))
++ if (IS_ENABLED(CONFIG_NUMA) &&
++ (gfp_mask & GFP_THISNODE) == GFP_THISNODE)
+ goto nopage;
+
+ restart:
+- prepare_slowpath(gfp_mask, order, zonelist,
+- high_zoneidx, preferred_zone);
++ if (!(gfp_mask & __GFP_NO_KSWAPD))
++ wake_all_kswapds(order, zonelist, high_zoneidx, preferred_zone);
+
+ /*
+ * OK, we're below the kswapd watermark and have kicked background
+@@ -2711,7 +2695,7 @@ __alloc_pages_nodemask(gfp_t gfp_mask, u
+ struct page *page = NULL;
+ int migratetype = allocflags_to_migratetype(gfp_mask);
+ unsigned int cpuset_mems_cookie;
+- int alloc_flags = ALLOC_WMARK_LOW|ALLOC_CPUSET;
++ int alloc_flags = ALLOC_WMARK_LOW|ALLOC_CPUSET|ALLOC_FAIR;
+ struct mem_cgroup *memcg = NULL;
+
+ gfp_mask &= gfp_allowed_mask;
+@@ -2752,12 +2736,29 @@ retry_cpuset:
+ if (allocflags_to_migratetype(gfp_mask) == MIGRATE_MOVABLE)
+ alloc_flags |= ALLOC_CMA;
+ #endif
++retry:
+ /* First allocation attempt */
+ page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, nodemask, order,
+ zonelist, high_zoneidx, alloc_flags,
+ preferred_zone, migratetype);
+ if (unlikely(!page)) {
+ /*
++ * The first pass makes sure allocations are spread
++ * fairly within the local node. However, the local
++ * node might have free pages left after the fairness
++ * batches are exhausted, and remote zones haven't
++ * even been considered yet. Try once more without
++ * fairness, and include remote zones now, before
++ * entering the slowpath and waking kswapd: prefer
++ * spilling to a remote zone over swapping locally.
++ */
++ if (alloc_flags & ALLOC_FAIR) {
++ reset_alloc_batches(zonelist, high_zoneidx,
++ preferred_zone);
++ alloc_flags &= ~ALLOC_FAIR;
++ goto retry;
++ }
++ /*
+ * Runtime PM, block IO and its error handling path
+ * can deadlock because I/O on the device might not
+ * complete.
--- /dev/null
+From 57e68e9cd65b4b8eb4045a1e0d0746458502554c Mon Sep 17 00:00:00 2001
+From: Vlastimil Babka <vbabka@suse.cz>
+Date: Mon, 7 Apr 2014 15:37:50 -0700
+Subject: mm: try_to_unmap_cluster() should lock_page() before mlocking
+
+From: Vlastimil Babka <vbabka@suse.cz>
+
+commit 57e68e9cd65b4b8eb4045a1e0d0746458502554c upstream.
+
+A BUG_ON(!PageLocked) was triggered in mlock_vma_page() by Sasha Levin
+fuzzing with trinity. The call site try_to_unmap_cluster() does not lock
+the pages other than its check_page parameter (which is already locked).
+
+The BUG_ON in mlock_vma_page() is not documented and its purpose is
+somewhat unclear, but apparently it serializes against page migration,
+which could otherwise fail to transfer the PG_mlocked flag. This would
+not be fatal, as the page would be eventually encountered again, but
+NR_MLOCK accounting would become distorted nevertheless. This patch adds
+a comment to the BUG_ON in mlock_vma_page() and munlock_vma_page() to that
+effect.
+
+The call site try_to_unmap_cluster() is fixed so that for page !=
+check_page, trylock_page() is attempted (to avoid possible deadlocks as we
+already have check_page locked) and mlock_vma_page() is performed only
+upon success. If the page lock cannot be obtained, the page is left
+without PG_mlocked, which is again not a problem in the whole unevictable
+memory design.
+
+Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
+Signed-off-by: Bob Liu <bob.liu@oracle.com>
+Reported-by: Sasha Levin <sasha.levin@oracle.com>
+Cc: Wanpeng Li <liwanp@linux.vnet.ibm.com>
+Cc: Michel Lespinasse <walken@google.com>
+Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
+Acked-by: Rik van Riel <riel@redhat.com>
+Cc: David Rientjes <rientjes@google.com>
+Cc: Mel Gorman <mgorman@suse.de>
+Cc: Hugh Dickins <hughd@google.com>
+Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/mlock.c | 2 ++
+ mm/rmap.c | 14 ++++++++++++--
+ 2 files changed, 14 insertions(+), 2 deletions(-)
+
+--- a/mm/mlock.c
++++ b/mm/mlock.c
+@@ -79,6 +79,7 @@ void clear_page_mlock(struct page *page)
+ */
+ void mlock_vma_page(struct page *page)
+ {
++ /* Serialize with page migration */
+ BUG_ON(!PageLocked(page));
+
+ if (!TestSetPageMlocked(page)) {
+@@ -174,6 +175,7 @@ unsigned int munlock_vma_page(struct pag
+ unsigned int nr_pages;
+ struct zone *zone = page_zone(page);
+
++ /* For try_to_munlock() and to serialize with page migration */
+ BUG_ON(!PageLocked(page));
+
+ /*
+--- a/mm/rmap.c
++++ b/mm/rmap.c
+@@ -1322,9 +1322,19 @@ static int try_to_unmap_cluster(unsigned
+ BUG_ON(!page || PageAnon(page));
+
+ if (locked_vma) {
+- mlock_vma_page(page); /* no-op if already mlocked */
+- if (page == check_page)
++ if (page == check_page) {
++ /* we know we have check_page locked */
++ mlock_vma_page(page);
+ ret = SWAP_MLOCK;
++ } else if (trylock_page(page)) {
++ /*
++ * If we can lock the page, perform mlock.
++ * Otherwise leave the page alone, it will be
++ * eventually encountered again later.
++ */
++ mlock_vma_page(page);
++ unlock_page(page);
++ }
+ continue; /* don't unmap */
+ }
+
--- /dev/null
+From 0bf1457f0cfca7bc026a82323ad34bcf58ad035d Mon Sep 17 00:00:00 2001
+From: Johannes Weiner <hannes@cmpxchg.org>
+Date: Tue, 8 Apr 2014 16:04:10 -0700
+Subject: mm: vmscan: do not swap anon pages just because free+file is low
+
+From: Johannes Weiner <hannes@cmpxchg.org>
+
+commit 0bf1457f0cfca7bc026a82323ad34bcf58ad035d upstream.
+
+Page reclaim force-scans / swaps anonymous pages when file cache drops
+below the high watermark of a zone in order to prevent what little cache
+remains from thrashing.
+
+However, on bigger machines the high watermark value can be quite large
+and when the workload is dominated by a static anonymous/shmem set, the
+file set might just be a small window of used-once cache. In such
+situations, the VM starts swapping heavily when instead it should be
+recycling the no longer used cache.
+
+This is a longer-standing problem, but it's more likely to trigger after
+commit 81c0a2bb515f ("mm: page_alloc: fair zone allocator policy")
+because file pages can no longer accumulate in a single zone and are
+dispersed into smaller fractions among the available zones.
+
+To resolve this, do not force scan anon when file pages are low but
+instead rely on the scan/rotation ratios to make the right prediction.
+
+Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
+Acked-by: Rafael Aquini <aquini@redhat.com>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Mel Gorman <mgorman@suse.de>
+Cc: Hugh Dickins <hughd@google.com>
+Cc: Suleiman Souhlal <suleiman@google.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/vmscan.c | 16 +---------------
+ 1 file changed, 1 insertion(+), 15 deletions(-)
+
+--- a/mm/vmscan.c
++++ b/mm/vmscan.c
+@@ -1848,7 +1848,7 @@ static void get_scan_count(struct lruvec
+ struct zone *zone = lruvec_zone(lruvec);
+ unsigned long anon_prio, file_prio;
+ enum scan_balance scan_balance;
+- unsigned long anon, file, free;
++ unsigned long anon, file;
+ bool force_scan = false;
+ unsigned long ap, fp;
+ enum lru_list lru;
+@@ -1902,20 +1902,6 @@ static void get_scan_count(struct lruvec
+ get_lru_size(lruvec, LRU_INACTIVE_FILE);
+
+ /*
+- * If it's foreseeable that reclaiming the file cache won't be
+- * enough to get the zone back into a desirable shape, we have
+- * to swap. Better start now and leave the - probably heavily
+- * thrashing - remaining file pages alone.
+- */
+- if (global_reclaim(sc)) {
+- free = zone_page_state(zone, NR_FREE_PAGES);
+- if (unlikely(file + free <= high_wmark_pages(zone))) {
+- scan_balance = SCAN_ANON;
+- goto out;
+- }
+- }
+-
+- /*
+ * There is enough inactive page cache, do not reclaim
+ * anything from the anonymous working set right now.
+ */
mtip32xx-set-queue-bounce-limit.patch
mtip32xx-unmap-the-dma-segments-before-completing-the-io-request.patch
mtip32xx-mtip_async_complete-bug-fixes.patch
+iser-target-match-frmr-descriptors-to-available-session-tags.patch
+iser-target-add-missing-se_cmd-put-for-write_pending-in-tx_comp_err.patch
+sh-fix-format-string-bug-in-stack-tracer.patch
+mm-page_alloc-spill-to-remote-nodes-before-waking-kswapd.patch
+mm-try_to_unmap_cluster-should-lock_page-before-mlocking.patch
+mm-hugetlb-fix-softlockup-when-a-large-number-of-hugepages-are-freed.patch
+mm-vmscan-do-not-swap-anon-pages-just-because-free-file-is-low.patch
+hung_task-check-the-value-of-sysctl_hung_task_timeout_sec.patch
--- /dev/null
+From a0c32761e73c9999cbf592b702f284221fea8040 Mon Sep 17 00:00:00 2001
+From: Matt Fleming <matt.fleming@intel.com>
+Date: Thu, 3 Apr 2014 14:46:20 -0700
+Subject: sh: fix format string bug in stack tracer
+
+From: Matt Fleming <matt.fleming@intel.com>
+
+commit a0c32761e73c9999cbf592b702f284221fea8040 upstream.
+
+Kees reported the following error:
+
+ arch/sh/kernel/dumpstack.c: In function 'print_trace_address':
+ arch/sh/kernel/dumpstack.c:118:2: error: format not a string literal and no format arguments [-Werror=format-security]
+
+Use the "%s" format so that it's impossible to interpret 'data' as a
+format string.
+
+Signed-off-by: Matt Fleming <matt.fleming@intel.com>
+Reported-by: Kees Cook <keescook@chromium.org>
+Acked-by: Kees Cook <keescook@chromium.org>
+Cc: Paul Mundt <lethal@linux-sh.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/sh/kernel/dumpstack.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/sh/kernel/dumpstack.c
++++ b/arch/sh/kernel/dumpstack.c
+@@ -115,7 +115,7 @@ static int print_trace_stack(void *data,
+ */
+ static void print_trace_address(void *data, unsigned long addr, int reliable)
+ {
+- printk(data);
++ printk("%s", (char *)data);
+ printk_address(addr, reliable);
+ }
+