From: Greg Kroah-Hartman Date: Sat, 26 Sep 2015 17:28:40 +0000 (-0700) Subject: 3.14-stable patches X-Git-Tag: v4.1.9~23 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=0225c50881557111e5ebcdff08f43fdd2e24fdeb;p=thirdparty%2Fkernel%2Fstable-queue.git 3.14-stable patches added patches: fs-if-a-coredump-already-exists-unlink-and-recreate-with-o_excl.patch md-raid10-always-set-reshape_safe-when-initializing-reshape_position.patch mmc-core-fix-race-condition-in-mmc_wait_data_done.patch parisc-filter-out-spurious-interrupts-in-pa-risc-irq-handler.patch parisc-use-double-word-condition-in-64bit-cas-operation.patch vmscan-fix-increasing-nr_isolated-incurred-by-putback-unevictable-pages.patch --- diff --git a/queue-3.14/fs-if-a-coredump-already-exists-unlink-and-recreate-with-o_excl.patch b/queue-3.14/fs-if-a-coredump-already-exists-unlink-and-recreate-with-o_excl.patch new file mode 100644 index 00000000000..894e78b1cab --- /dev/null +++ b/queue-3.14/fs-if-a-coredump-already-exists-unlink-and-recreate-with-o_excl.patch @@ -0,0 +1,146 @@ +From fbb1816942c04429e85dbf4c1a080accc534299e Mon Sep 17 00:00:00 2001 +From: Jann Horn +Date: Wed, 9 Sep 2015 15:38:28 -0700 +Subject: fs: if a coredump already exists, unlink and recreate with O_EXCL + +From: Jann Horn + +commit fbb1816942c04429e85dbf4c1a080accc534299e upstream. + +It was possible for an attacking user to trick root (or another user) into +writing his coredumps into an attacker-readable, pre-existing file using +rename() or link(), causing the disclosure of secret data from the victim +process' virtual memory. Depending on the configuration, it was also +possible to trick root into overwriting system files with coredumps. Fix +that issue by never writing coredumps into existing files. + +Requirements for the attack: + - The attack only applies if the victim's process has a nonzero + RLIMIT_CORE and is dumpable. + - The attacker can trick the victim into coredumping into an + attacker-writable directory D, either because the core_pattern is + relative and the victim's cwd is attacker-writable or because an + absolute core_pattern pointing to a world-writable directory is used. + - The attacker has one of these: + A: on a system with protected_hardlinks=0: + execute access to a folder containing a victim-owned, + attacker-readable file on the same partition as D, and the + victim-owned file will be deleted before the main part of the attack + takes place. (In practice, there are lots of files that fulfill + this condition, e.g. entries in Debian's /var/lib/dpkg/info/.) + This does not apply to most Linux systems because most distros set + protected_hardlinks=1. + B: on a system with protected_hardlinks=1: + execute access to a folder containing a victim-owned, + attacker-readable and attacker-writable file on the same partition + as D, and the victim-owned file will be deleted before the main part + of the attack takes place. + (This seems to be uncommon.) + C: on any system, independent of protected_hardlinks: + write access to a non-sticky folder containing a victim-owned, + attacker-readable file on the same partition as D + (This seems to be uncommon.) + +The basic idea is that the attacker moves the victim-owned file to where +he expects the victim process to dump its core. The victim process dumps +its core into the existing file, and the attacker reads the coredump from +it. + +If the attacker can't move the file because he does not have write access +to the containing directory, he can instead link the file to a directory +he controls, then wait for the original link to the file to be deleted +(because the kernel checks that the link count of the corefile is 1). + +A less reliable variant that requires D to be non-sticky works with link() +and does not require deletion of the original link: link() the file into +D, but then unlink() it directly before the kernel performs the link count +check. + +On systems with protected_hardlinks=0, this variant allows an attacker to +not only gain information from coredumps, but also clobber existing, +victim-writable files with coredumps. (This could theoretically lead to a +privilege escalation.) + +Signed-off-by: Jann Horn +Cc: Kees Cook +Cc: Al Viro +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + fs/coredump.c | 38 ++++++++++++++++++++++++++++++++------ + 1 file changed, 32 insertions(+), 6 deletions(-) + +--- a/fs/coredump.c ++++ b/fs/coredump.c +@@ -498,10 +498,10 @@ void do_coredump(const siginfo_t *siginf + const struct cred *old_cred; + struct cred *cred; + int retval = 0; +- int flag = 0; + int ispipe; + struct files_struct *displaced; +- bool need_nonrelative = false; ++ /* require nonrelative corefile path and be extra careful */ ++ bool need_suid_safe = false; + bool core_dumped = false; + static atomic_t core_dump_count = ATOMIC_INIT(0); + struct coredump_params cprm = { +@@ -535,9 +535,8 @@ void do_coredump(const siginfo_t *siginf + */ + if (__get_dumpable(cprm.mm_flags) == SUID_DUMP_ROOT) { + /* Setuid core dump mode */ +- flag = O_EXCL; /* Stop rewrite attacks */ + cred->fsuid = GLOBAL_ROOT_UID; /* Dump root private */ +- need_nonrelative = true; ++ need_suid_safe = true; + } + + retval = coredump_wait(siginfo->si_signo, &core_state); +@@ -618,7 +617,7 @@ void do_coredump(const siginfo_t *siginf + if (cprm.limit < binfmt->min_coredump) + goto fail_unlock; + +- if (need_nonrelative && cn.corename[0] != '/') { ++ if (need_suid_safe && cn.corename[0] != '/') { + printk(KERN_WARNING "Pid %d(%s) can only dump core "\ + "to fully qualified path!\n", + task_tgid_vnr(current), current->comm); +@@ -626,8 +625,35 @@ void do_coredump(const siginfo_t *siginf + goto fail_unlock; + } + ++ /* ++ * Unlink the file if it exists unless this is a SUID ++ * binary - in that case, we're running around with root ++ * privs and don't want to unlink another user's coredump. ++ */ ++ if (!need_suid_safe) { ++ mm_segment_t old_fs; ++ ++ old_fs = get_fs(); ++ set_fs(KERNEL_DS); ++ /* ++ * If it doesn't exist, that's fine. If there's some ++ * other problem, we'll catch it at the filp_open(). ++ */ ++ (void) sys_unlink((const char __user *)cn.corename); ++ set_fs(old_fs); ++ } ++ ++ /* ++ * There is a race between unlinking and creating the ++ * file, but if that causes an EEXIST here, that's ++ * fine - another process raced with us while creating ++ * the corefile, and the other process won. To userspace, ++ * what matters is that at least one of the two processes ++ * writes its coredump successfully, not which one. ++ */ + cprm.file = filp_open(cn.corename, +- O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag, ++ O_CREAT | 2 | O_NOFOLLOW | ++ O_LARGEFILE | O_EXCL, + 0600); + if (IS_ERR(cprm.file)) + goto fail_unlock; diff --git a/queue-3.14/md-raid10-always-set-reshape_safe-when-initializing-reshape_position.patch b/queue-3.14/md-raid10-always-set-reshape_safe-when-initializing-reshape_position.patch new file mode 100644 index 00000000000..f309fbff370 --- /dev/null +++ b/queue-3.14/md-raid10-always-set-reshape_safe-when-initializing-reshape_position.patch @@ -0,0 +1,74 @@ +From 299b0685e31c9f3dcc2d58ee3beca761a40b44b3 Mon Sep 17 00:00:00 2001 +From: NeilBrown +Date: Mon, 6 Jul 2015 17:37:49 +1000 +Subject: md/raid10: always set reshape_safe when initializing reshape_position. + +From: NeilBrown + +commit 299b0685e31c9f3dcc2d58ee3beca761a40b44b3 upstream. + +'reshape_position' tracks where in the reshape we have reached. +'reshape_safe' tracks where in the reshape we have safely recorded +in the metadata. + +These are compared to determine when to update the metadata. +So it is important that reshape_safe is initialised properly. +Currently it isn't. When starting a reshape from the beginning +it usually has the correct value by luck. But when reducing the +number of devices in a RAID10, it has the wrong value and this leads +to the metadata not being updated correctly. +This can lead to corruption if the reshape is not allowed to complete. + +This patch is suitable for any -stable kernel which supports RAID10 +reshape, which is 3.5 and later. + +Fixes: 3ea7daa5d7fd ("md/raid10: add reshape support") +Signed-off-by: NeilBrown +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/md/raid10.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +--- a/drivers/md/raid10.c ++++ b/drivers/md/raid10.c +@@ -3585,6 +3585,7 @@ static struct r10conf *setup_conf(struct + /* far_copies must be 1 */ + conf->prev.stride = conf->dev_sectors; + } ++ conf->reshape_safe = conf->reshape_progress; + spin_lock_init(&conf->device_lock); + INIT_LIST_HEAD(&conf->retry_list); + +@@ -3793,7 +3794,6 @@ static int run(struct mddev *mddev) + } + conf->offset_diff = min_offset_diff; + +- conf->reshape_safe = conf->reshape_progress; + clear_bit(MD_RECOVERY_SYNC, &mddev->recovery); + clear_bit(MD_RECOVERY_CHECK, &mddev->recovery); + set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery); +@@ -4138,6 +4138,7 @@ static int raid10_start_reshape(struct m + conf->reshape_progress = size; + } else + conf->reshape_progress = 0; ++ conf->reshape_safe = conf->reshape_progress; + spin_unlock_irq(&conf->device_lock); + + if (mddev->delta_disks && mddev->bitmap) { +@@ -4204,6 +4205,7 @@ abort: + rdev->new_data_offset = rdev->data_offset; + smp_wmb(); + conf->reshape_progress = MaxSector; ++ conf->reshape_safe = MaxSector; + mddev->reshape_position = MaxSector; + spin_unlock_irq(&conf->device_lock); + return ret; +@@ -4556,6 +4558,7 @@ static void end_reshape(struct r10conf * + md_finish_reshape(conf->mddev); + smp_wmb(); + conf->reshape_progress = MaxSector; ++ conf->reshape_safe = MaxSector; + spin_unlock_irq(&conf->device_lock); + + /* read-ahead size must cover two whole stripes, which is diff --git a/queue-3.14/mmc-core-fix-race-condition-in-mmc_wait_data_done.patch b/queue-3.14/mmc-core-fix-race-condition-in-mmc_wait_data_done.patch new file mode 100644 index 00000000000..38103853e84 --- /dev/null +++ b/queue-3.14/mmc-core-fix-race-condition-in-mmc_wait_data_done.patch @@ -0,0 +1,105 @@ +From 71f8a4b81d040b3d094424197ca2f1bf811b1245 Mon Sep 17 00:00:00 2001 +From: Jialing Fu +Date: Fri, 28 Aug 2015 11:13:09 +0800 +Subject: mmc: core: fix race condition in mmc_wait_data_done + +From: Jialing Fu + +commit 71f8a4b81d040b3d094424197ca2f1bf811b1245 upstream. + +The following panic is captured in ker3.14, but the issue still exists +in latest kernel. +--------------------------------------------------------------------- +[ 20.738217] c0 3136 (Compiler) Unable to handle kernel NULL pointer dereference +at virtual address 00000578 +...... +[ 20.738499] c0 3136 (Compiler) PC is at _raw_spin_lock_irqsave+0x24/0x60 +[ 20.738527] c0 3136 (Compiler) LR is at _raw_spin_lock_irqsave+0x20/0x60 +[ 20.740134] c0 3136 (Compiler) Call trace: +[ 20.740165] c0 3136 (Compiler) [] _raw_spin_lock_irqsave+0x24/0x60 +[ 20.740200] c0 3136 (Compiler) [] __wake_up+0x1c/0x54 +[ 20.740230] c0 3136 (Compiler) [] mmc_wait_data_done+0x28/0x34 +[ 20.740262] c0 3136 (Compiler) [] mmc_request_done+0xa4/0x220 +[ 20.740314] c0 3136 (Compiler) [] sdhci_tasklet_finish+0xac/0x264 +[ 20.740352] c0 3136 (Compiler) [] tasklet_action+0xa0/0x158 +[ 20.740382] c0 3136 (Compiler) [] __do_softirq+0x10c/0x2e4 +[ 20.740411] c0 3136 (Compiler) [] irq_exit+0x8c/0xc0 +[ 20.740439] c0 3136 (Compiler) [] handle_IRQ+0x48/0xac +[ 20.740469] c0 3136 (Compiler) [] gic_handle_irq+0x38/0x7c +---------------------------------------------------------------------- +Because in SMP, "mrq" has race condition between below two paths: +path1: CPU0: + static void mmc_wait_data_done(struct mmc_request *mrq) + { + mrq->host->context_info.is_done_rcv = true; + // + // If CPU0 has just finished "is_done_rcv = true" in path1, and at + // this moment, IRQ or ICache line missing happens in CPU0. + // What happens in CPU1 (path2)? + // + // If the mmcqd thread in CPU1(path2) hasn't entered to sleep mode: + // path2 would have chance to break from wait_event_interruptible + // in mmc_wait_for_data_req_done and continue to run for next + // mmc_request (mmc_blk_rw_rq_prep). + // + // Within mmc_blk_rq_prep, mrq is cleared to 0. + // If below line still gets host from "mrq" as the result of + // compiler, the panic happens as we traced. + wake_up_interruptible(&mrq->host->context_info.wait); + } + +path2: CPU1: + static int mmc_wait_for_data_req_done(... + { + ... + while (1) { + wait_event_interruptible(context_info->wait, + (context_info->is_done_rcv || + context_info->is_new_req)); + static void mmc_blk_rw_rq_prep(... + { + ... + memset(brq, 0, sizeof(struct mmc_blk_request)); + +This issue happens very coincidentally; however adding mdelay(1) in +mmc_wait_data_done as below could duplicate it easily. + + static void mmc_wait_data_done(struct mmc_request *mrq) + { + mrq->host->context_info.is_done_rcv = true; ++ mdelay(1); + wake_up_interruptible(&mrq->host->context_info.wait); + } + +At runtime, IRQ or ICache line missing may just happen at the same place +of the mdelay(1). + +This patch gets the mmc_context_info at the beginning of function, it can +avoid this race condition. + +Signed-off-by: Jialing Fu +Tested-by: Shawn Lin +Fixes: 2220eedfd7ae ("mmc: fix async request mechanism ....") +Signed-off-by: Shawn Lin +Signed-off-by: Ulf Hansson +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/mmc/core/core.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +--- a/drivers/mmc/core/core.c ++++ b/drivers/mmc/core/core.c +@@ -329,8 +329,10 @@ EXPORT_SYMBOL(mmc_start_bkops); + */ + static void mmc_wait_data_done(struct mmc_request *mrq) + { +- mrq->host->context_info.is_done_rcv = true; +- wake_up_interruptible(&mrq->host->context_info.wait); ++ struct mmc_context_info *context_info = &mrq->host->context_info; ++ ++ context_info->is_done_rcv = true; ++ wake_up_interruptible(&context_info->wait); + } + + static void mmc_wait_done(struct mmc_request *mrq) diff --git a/queue-3.14/parisc-filter-out-spurious-interrupts-in-pa-risc-irq-handler.patch b/queue-3.14/parisc-filter-out-spurious-interrupts-in-pa-risc-irq-handler.patch new file mode 100644 index 00000000000..c4624c8c072 --- /dev/null +++ b/queue-3.14/parisc-filter-out-spurious-interrupts-in-pa-risc-irq-handler.patch @@ -0,0 +1,78 @@ +From b1b4e435e4ef7de77f07bf2a42c8380b960c2d44 Mon Sep 17 00:00:00 2001 +From: Helge Deller +Date: Thu, 3 Sep 2015 22:45:21 +0200 +Subject: parisc: Filter out spurious interrupts in PA-RISC irq handler + +From: Helge Deller + +commit b1b4e435e4ef7de77f07bf2a42c8380b960c2d44 upstream. + +When detecting a serial port on newer PA-RISC machines (with iosapic) we have a +long way to go to find the right IRQ line, registering it, then registering the +serial port and the irq handler for the serial port. During this phase spurious +interrupts for the serial port may happen which then crashes the kernel because +the action handler might not have been set up yet. + +So, basically it's a race condition between the serial port hardware and the +CPU which sets up the necessary fields in the irq sructs. The main reason for +this race is, that we unmask the serial port irqs too early without having set +up everything properly before (which isn't easily possible because we need the +IRQ number to register the serial ports). + +This patch is a work-around for this problem. It adds checks to the CPU irq +handler to verify if the IRQ action field has been initialized already. If not, +we just skip this interrupt (which isn't critical for a serial port at bootup). +The real fix would probably involve rewriting all PA-RISC specific IRQ code +(for CPU, IOSAPIC, GSC and EISA) to use IRQ domains with proper parenting of +the irq chips and proper irq enabling along this line. + +This bug has been in the PA-RISC port since the beginning, but the crashes +happened very rarely with currently used hardware. But on the latest machine +which I bought (a C8000 workstation), which uses the fastest CPUs (4 x PA8900, +1GHz) and which has the largest possible L1 cache size (64MB each), the kernel +crashed at every boot because of this race. So, without this patch the machine +would currently be unuseable. + +For the record, here is the flow logic: +1. serial_init_chip() in 8250_gsc.c calls iosapic_serial_irq(). +2. iosapic_serial_irq() calls txn_alloc_irq() to find the irq. +3. iosapic_serial_irq() calls cpu_claim_irq() to register the CPU irq +4. cpu_claim_irq() unmasks the CPU irq (which it shouldn't!) +5. serial_init_chip() then registers the 8250 port. +Problems: +- In step 4 the CPU irq shouldn't have been registered yet, but after step 5 +- If serial irq happens between 4 and 5 have finished, the kernel will crash + +Signed-off-by: Helge Deller +Signed-off-by: Greg Kroah-Hartman + +--- + arch/parisc/kernel/irq.c | 8 ++++++-- + 1 file changed, 6 insertions(+), 2 deletions(-) + +--- a/arch/parisc/kernel/irq.c ++++ b/arch/parisc/kernel/irq.c +@@ -507,8 +507,8 @@ void do_cpu_irq_mask(struct pt_regs *reg + struct pt_regs *old_regs; + unsigned long eirr_val; + int irq, cpu = smp_processor_id(); +-#ifdef CONFIG_SMP + struct irq_desc *desc; ++#ifdef CONFIG_SMP + cpumask_t dest; + #endif + +@@ -521,8 +521,12 @@ void do_cpu_irq_mask(struct pt_regs *reg + goto set_out; + irq = eirr_to_irq(eirr_val); + +-#ifdef CONFIG_SMP ++ /* Filter out spurious interrupts, mostly from serial port at bootup */ + desc = irq_to_desc(irq); ++ if (unlikely(!desc->action)) ++ goto set_out; ++ ++#ifdef CONFIG_SMP + cpumask_copy(&dest, desc->irq_data.affinity); + if (irqd_is_per_cpu(&desc->irq_data) && + !cpu_isset(smp_processor_id(), dest)) { diff --git a/queue-3.14/parisc-use-double-word-condition-in-64bit-cas-operation.patch b/queue-3.14/parisc-use-double-word-condition-in-64bit-cas-operation.patch new file mode 100644 index 00000000000..4697e5f0701 --- /dev/null +++ b/queue-3.14/parisc-use-double-word-condition-in-64bit-cas-operation.patch @@ -0,0 +1,34 @@ +From 1b59ddfcf1678de38a1f8ca9fb8ea5eebeff1843 Mon Sep 17 00:00:00 2001 +From: John David Anglin +Date: Mon, 7 Sep 2015 20:13:28 -0400 +Subject: parisc: Use double word condition in 64bit CAS operation + +From: John David Anglin + +commit 1b59ddfcf1678de38a1f8ca9fb8ea5eebeff1843 upstream. + +The attached change fixes the condition used in the "sub" instruction. +A double word comparison is needed. This fixes the 64-bit LWS CAS +operation on 64-bit kernels. + +I can now enable 64-bit atomic support in GCC. + +Signed-off-by: John David Anglin +Signed-off-by: Helge Deller +Signed-off-by: Greg Kroah-Hartman + +--- + arch/parisc/kernel/syscall.S | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/arch/parisc/kernel/syscall.S ++++ b/arch/parisc/kernel/syscall.S +@@ -821,7 +821,7 @@ cas2_action: + /* 64bit CAS */ + #ifdef CONFIG_64BIT + 19: ldd,ma 0(%sr3,%r26), %r29 +- sub,= %r29, %r25, %r0 ++ sub,*= %r29, %r25, %r0 + b,n cas2_end + 20: std,ma %r24, 0(%sr3,%r26) + copy %r0, %r28 diff --git a/queue-3.14/series b/queue-3.14/series index 28c2ce264c7..5ec0d8806a0 100644 --- a/queue-3.14/series +++ b/queue-3.14/series @@ -25,4 +25,9 @@ btrfs-check-if-previous-transaction-aborted-to-avoid-fs-corruption.patch nfsv4-don-t-set-setattr-for-o_rdonly-o_excl.patch nfs-fix-a-null-pointer-dereference-of-migration-recovery-ops-for-v4.2-client.patch nfs-nfs_set_pgio_error-sometimes-misses-errors.patch -sunrpc-xs_reset_transport-must-mark-the-connection-as-disconnected.patch +parisc-use-double-word-condition-in-64bit-cas-operation.patch +parisc-filter-out-spurious-interrupts-in-pa-risc-irq-handler.patch +vmscan-fix-increasing-nr_isolated-incurred-by-putback-unevictable-pages.patch +fs-if-a-coredump-already-exists-unlink-and-recreate-with-o_excl.patch +mmc-core-fix-race-condition-in-mmc_wait_data_done.patch +md-raid10-always-set-reshape_safe-when-initializing-reshape_position.patch diff --git a/queue-3.14/sunrpc-xs_reset_transport-must-mark-the-connection-as-disconnected.patch b/queue-3.14/sunrpc-xs_reset_transport-must-mark-the-connection-as-disconnected.patch deleted file mode 100644 index 9dd43c83860..00000000000 --- a/queue-3.14/sunrpc-xs_reset_transport-must-mark-the-connection-as-disconnected.patch +++ /dev/null @@ -1,28 +0,0 @@ -From 0c78789e3a030615c6650fde89546cadf40ec2cc Mon Sep 17 00:00:00 2001 -From: Trond Myklebust -Date: Sat, 29 Aug 2015 13:36:30 -0700 -Subject: SUNRPC: xs_reset_transport must mark the connection as disconnected - -From: Trond Myklebust - -commit 0c78789e3a030615c6650fde89546cadf40ec2cc upstream. - -In case the reconnection attempt fails. - -Signed-off-by: Trond Myklebust -Signed-off-by: Greg Kroah-Hartman - ---- - net/sunrpc/xprtsock.c | 1 + - 1 file changed, 1 insertion(+) - ---- a/net/sunrpc/xprtsock.c -+++ b/net/sunrpc/xprtsock.c -@@ -866,6 +866,7 @@ static void xs_reset_transport(struct so - sk->sk_user_data = NULL; - - xs_restore_old_callbacks(transport, sk); -+ xprt_clear_connected(xprt); - write_unlock_bh(&sk->sk_callback_lock); - - sk->sk_no_check = 0; diff --git a/queue-3.14/vmscan-fix-increasing-nr_isolated-incurred-by-putback-unevictable-pages.patch b/queue-3.14/vmscan-fix-increasing-nr_isolated-incurred-by-putback-unevictable-pages.patch new file mode 100644 index 00000000000..356385f5145 --- /dev/null +++ b/queue-3.14/vmscan-fix-increasing-nr_isolated-incurred-by-putback-unevictable-pages.patch @@ -0,0 +1,54 @@ +From c54839a722a02818677bcabe57e957f0ce4f841d Mon Sep 17 00:00:00 2001 +From: Jaewon Kim +Date: Tue, 8 Sep 2015 15:02:21 -0700 +Subject: vmscan: fix increasing nr_isolated incurred by putback unevictable pages + +From: Jaewon Kim + +commit c54839a722a02818677bcabe57e957f0ce4f841d upstream. + +reclaim_clean_pages_from_list() assumes that shrink_page_list() returns +number of pages removed from the candidate list. But shrink_page_list() +puts back mlocked pages without passing it to caller and without +counting as nr_reclaimed. This increases nr_isolated. + +To fix this, this patch changes shrink_page_list() to pass unevictable +pages back to caller. Caller will take care those pages. + +Minchan said: + +It fixes two issues. + +1. With unevictable page, cma_alloc will be successful. + +Exactly speaking, cma_alloc of current kernel will fail due to +unevictable pages. + +2. fix leaking of NR_ISOLATED counter of vmstat + +With it, too_many_isolated works. Otherwise, it could make hang until +the process get SIGKILL. + +Signed-off-by: Jaewon Kim +Acked-by: Minchan Kim +Cc: Mel Gorman +Acked-by: Vlastimil Babka +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + mm/vmscan.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/mm/vmscan.c ++++ b/mm/vmscan.c +@@ -1087,7 +1087,7 @@ cull_mlocked: + if (PageSwapCache(page)) + try_to_free_swap(page); + unlock_page(page); +- putback_lru_page(page); ++ list_add(&page->lru, &ret_pages); + continue; + + activate_locked: