--- /dev/null
+From 60aaf933854511630e16be4efe0f96485e132de4 Mon Sep 17 00:00:00 2001
+From: majianpeng <majianpeng@gmail.com>
+Date: Thu, 14 Nov 2013 15:16:20 +1100
+Subject: md/raid5: Use conf->device_lock protect changing of multi-thread resources.
+
+From: majianpeng <majianpeng@gmail.com>
+
+commit 60aaf933854511630e16be4efe0f96485e132de4 upstream.
+and commit 0c775d5208284700de423e6746259da54a42e1f5
+
+When we change group_thread_cnt from sysfs entry, it can OOPS.
+
+The kernel messages are:
+[ 135.299021] BUG: unable to handle kernel NULL pointer dereference at (null)
+[ 135.299073] IP: [<ffffffff815188ab>] handle_active_stripes+0x32b/0x440
+[ 135.299107] PGD 0
+[ 135.299122] Oops: 0000 [#1] SMP
+[ 135.299144] Modules linked in: netconsole e1000e ptp pps_core
+[ 135.299188] CPU: 3 PID: 2225 Comm: md0_raid5 Not tainted 3.12.0+ #24
+[ 135.299214] Hardware name: To Be Filled By O.E.M. To Be Filled By O.E.M./To be filled by O.E.M., BIOS 080015 11/09/2011
+[ 135.299255] task: ffff8800b9638f80 ti: ffff8800b77a4000 task.ti: ffff8800b77a4000
+[ 135.299283] RIP: 0010:[<ffffffff815188ab>] [<ffffffff815188ab>] handle_active_stripes+0x32b/0x440
+[ 135.299323] RSP: 0018:ffff8800b77a5c48 EFLAGS: 00010002
+[ 135.299344] RAX: ffff880037bb5c70 RBX: 0000000000000000 RCX: 0000000000000008
+[ 135.299371] RDX: ffff880037bb5cb8 RSI: 0000000000000001 RDI: ffff880037bb5c00
+[ 135.299398] RBP: ffff8800b77a5d08 R08: 0000000000000001 R09: 0000000000000000
+[ 135.299425] R10: ffff8800b77a5c98 R11: 00000000ffffffff R12: ffff880037bb5c00
+[ 135.299452] R13: 0000000000000000 R14: 0000000000000000 R15: ffff880037bb5c70
+[ 135.299479] FS: 0000000000000000(0000) GS:ffff88013fd80000(0000) knlGS:0000000000000000
+[ 135.299510] CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b
+[ 135.299532] CR2: 0000000000000000 CR3: 0000000001c0b000 CR4: 00000000000407e0
+[ 135.299559] Stack:
+[ 135.299570] ffff8800b77a5c88 ffffffff8107383e ffff8800b77a5c88 ffff880037a64300
+[ 135.299611] 000000000000ec08 ffff880037bb5cb8 ffff8800b77a5c98 ffffffffffffffd8
+[ 135.299654] 000000000000ec08 ffff880037bb5c60 ffff8800b77a5c98 ffff8800b77a5c98
+[ 135.299696] Call Trace:
+[ 135.299711] [<ffffffff8107383e>] ? __wake_up+0x4e/0x70
+[ 135.299733] [<ffffffff81518f88>] raid5d+0x4c8/0x680
+[ 135.299756] [<ffffffff817174ed>] ? schedule_timeout+0x15d/0x1f0
+[ 135.299781] [<ffffffff81524c9f>] md_thread+0x11f/0x170
+[ 135.299804] [<ffffffff81069cd0>] ? wake_up_bit+0x40/0x40
+[ 135.299826] [<ffffffff81524b80>] ? md_rdev_init+0x110/0x110
+[ 135.299850] [<ffffffff81069656>] kthread+0xc6/0xd0
+[ 135.299871] [<ffffffff81069590>] ? kthread_freezable_should_stop+0x70/0x70
+[ 135.299899] [<ffffffff81722ffc>] ret_from_fork+0x7c/0xb0
+[ 135.299923] [<ffffffff81069590>] ? kthread_freezable_should_stop+0x70/0x70
+[ 135.299951] Code: ff ff ff 0f 84 d7 fe ff ff e9 5c fe ff ff 66 90 41 8b b4 24 d8 01 00 00 45 31 ed 85 f6 0f 8e 7b fd ff ff 49 8b 9c 24 d0 01 00 00 <48> 3b 1b 49 89 dd 0f 85 67 fd ff ff 48 8d 43 28 31 d2 eb 17 90
+[ 135.300005] RIP [<ffffffff815188ab>] handle_active_stripes+0x32b/0x440
+[ 135.300005] RSP <ffff8800b77a5c48>
+[ 135.300005] CR2: 0000000000000000
+[ 135.300005] ---[ end trace 504854e5bb7562ed ]---
+[ 135.300005] Kernel panic - not syncing: Fatal exception
+
+This is because raid5d() can be running when the multi-thread
+resources are changed via system. We see need to provide locking.
+
+mddev->device_lock is suitable, but we cannot simple call
+alloc_thread_groups under this lock as we cannot allocate memory
+while holding a spinlock.
+So change alloc_thread_groups() to allocate and return the data
+structures, then raid5_store_group_thread_cnt() can take the lock
+while updating the pointers to the data structures.
+
+This fixes a bug introduced in 3.12 and so is suitable for the 3.12.x
+stable series.
+
+Fixes: b721420e8719131896b009b11edbbd27
+Signed-off-by: Jianpeng Ma <majianpeng@gmail.com>
+Signed-off-by: NeilBrown <neilb@suse.de>
+Reviewed-by: Shaohua Li <shli@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/md/raid5.c | 63 ++++++++++++++++++++++++++++++++---------------------
+ 1 file changed, 39 insertions(+), 24 deletions(-)
+
+--- a/drivers/md/raid5.c
++++ b/drivers/md/raid5.c
+@@ -5214,15 +5214,18 @@ raid5_show_group_thread_cnt(struct mddev
+ return 0;
+ }
+
+-static int alloc_thread_groups(struct r5conf *conf, int cnt);
++static int alloc_thread_groups(struct r5conf *conf, int cnt,
++ int *group_cnt,
++ int *worker_cnt_per_group,
++ struct r5worker_group **worker_groups);
+ static ssize_t
+ raid5_store_group_thread_cnt(struct mddev *mddev, const char *page, size_t len)
+ {
+ struct r5conf *conf = mddev->private;
+ unsigned long new;
+ int err;
+- struct r5worker_group *old_groups;
+- int old_group_cnt;
++ struct r5worker_group *new_groups, *old_groups;
++ int group_cnt, worker_cnt_per_group;
+
+ if (len >= PAGE_SIZE)
+ return -EINVAL;
+@@ -5238,17 +5241,19 @@ raid5_store_group_thread_cnt(struct mdde
+ mddev_suspend(mddev);
+
+ old_groups = conf->worker_groups;
+- old_group_cnt = conf->worker_cnt_per_group;
+-
+ if (old_groups)
+ flush_workqueue(raid5_wq);
+
+- conf->worker_groups = NULL;
+- err = alloc_thread_groups(conf, new);
+- if (err) {
+- conf->worker_groups = old_groups;
+- conf->worker_cnt_per_group = old_group_cnt;
+- } else {
++ err = alloc_thread_groups(conf, new,
++ &group_cnt, &worker_cnt_per_group,
++ &new_groups);
++ if (!err) {
++ spin_lock_irq(&conf->device_lock);
++ conf->group_cnt = group_cnt;
++ conf->worker_cnt_per_group = worker_cnt_per_group;
++ conf->worker_groups = new_groups;
++ spin_unlock_irq(&conf->device_lock);
++
+ if (old_groups)
+ kfree(old_groups[0].workers);
+ kfree(old_groups);
+@@ -5278,33 +5283,36 @@ static struct attribute_group raid5_attr
+ .attrs = raid5_attrs,
+ };
+
+-static int alloc_thread_groups(struct r5conf *conf, int cnt)
++static int alloc_thread_groups(struct r5conf *conf, int cnt,
++ int *group_cnt,
++ int *worker_cnt_per_group,
++ struct r5worker_group **worker_groups)
+ {
+ int i, j;
+ ssize_t size;
+ struct r5worker *workers;
+
+- conf->worker_cnt_per_group = cnt;
++ *worker_cnt_per_group = cnt;
+ if (cnt == 0) {
+- conf->worker_groups = NULL;
++ *group_cnt = 0;
++ *worker_groups = NULL;
+ return 0;
+ }
+- conf->group_cnt = num_possible_nodes();
++ *group_cnt = num_possible_nodes();
+ size = sizeof(struct r5worker) * cnt;
+- workers = kzalloc(size * conf->group_cnt, GFP_NOIO);
+- conf->worker_groups = kzalloc(sizeof(struct r5worker_group) *
+- conf->group_cnt, GFP_NOIO);
+- if (!conf->worker_groups || !workers) {
++ workers = kzalloc(size * *group_cnt, GFP_NOIO);
++ *worker_groups = kzalloc(sizeof(struct r5worker_group) *
++ *group_cnt, GFP_NOIO);
++ if (!*worker_groups || !workers) {
+ kfree(workers);
+- kfree(conf->worker_groups);
+- conf->worker_groups = NULL;
++ kfree(*worker_groups);
+ return -ENOMEM;
+ }
+
+- for (i = 0; i < conf->group_cnt; i++) {
++ for (i = 0; i < *group_cnt; i++) {
+ struct r5worker_group *group;
+
+- group = &conf->worker_groups[i];
++ group = &(*worker_groups)[i];
+ INIT_LIST_HEAD(&group->handle_list);
+ group->conf = conf;
+ group->workers = workers + i * cnt;
+@@ -5462,6 +5470,8 @@ static struct r5conf *setup_conf(struct
+ struct md_rdev *rdev;
+ struct disk_info *disk;
+ char pers_name[6];
++ int group_cnt, worker_cnt_per_group;
++ struct r5worker_group *new_group;
+
+ if (mddev->new_level != 5
+ && mddev->new_level != 4
+@@ -5496,7 +5506,12 @@ static struct r5conf *setup_conf(struct
+ if (conf == NULL)
+ goto abort;
+ /* Don't enable multi-threading by default*/
+- if (alloc_thread_groups(conf, 0))
++ if (!alloc_thread_groups(conf, 0, &group_cnt, &worker_cnt_per_group,
++ &new_group)) {
++ conf->group_cnt = group_cnt;
++ conf->worker_cnt_per_group = worker_cnt_per_group;
++ conf->worker_groups = new_group;
++ } else
+ goto abort;
+ spin_lock_init(&conf->device_lock);
+ seqcount_init(&conf->gen_lock);
--- /dev/null
+From 72403b4a0fbdf433c1fe0127e49864658f6f6468 Mon Sep 17 00:00:00 2001
+From: Mel Gorman <mgorman@suse.de>
+Date: Tue, 12 Nov 2013 15:08:32 -0800
+Subject: mm: numa: return the number of base pages altered by protection changes
+
+From: Mel Gorman <mgorman@suse.de>
+
+commit 72403b4a0fbdf433c1fe0127e49864658f6f6468 upstream.
+
+Commit 0255d4918480 ("mm: Account for a THP NUMA hinting update as one
+PTE update") was added to account for the number of PTE updates when
+marking pages prot_numa. task_numa_work was using the old return value
+to track how much address space had been updated. Altering the return
+value causes the scanner to do more work than it is configured or
+documented to in a single unit of work.
+
+This patch reverts that commit and accounts for the number of THP
+updates separately in vmstat. It is up to the administrator to
+interpret the pair of values correctly. This is a straight-forward
+operation and likely to only be of interest when actively debugging NUMA
+balancing problems.
+
+The impact of this patch is that the NUMA PTE scanner will scan slower
+when THP is enabled and workloads may converge slower as a result. On
+the flip size system CPU usage should be lower than recent tests
+reported. This is an illustrative example of a short single JVM specjbb
+test
+
+specjbb
+ 3.12.0 3.12.0
+ vanilla acctupdates
+TPut 1 26143.00 ( 0.00%) 25747.00 ( -1.51%)
+TPut 7 185257.00 ( 0.00%) 183202.00 ( -1.11%)
+TPut 13 329760.00 ( 0.00%) 346577.00 ( 5.10%)
+TPut 19 442502.00 ( 0.00%) 460146.00 ( 3.99%)
+TPut 25 540634.00 ( 0.00%) 549053.00 ( 1.56%)
+TPut 31 512098.00 ( 0.00%) 519611.00 ( 1.47%)
+TPut 37 461276.00 ( 0.00%) 474973.00 ( 2.97%)
+TPut 43 403089.00 ( 0.00%) 414172.00 ( 2.75%)
+
+ 3.12.0 3.12.0
+ vanillaacctupdates
+User 5169.64 5184.14
+System 100.45 80.02
+Elapsed 252.75 251.85
+
+Performance is similar but note the reduction in system CPU time. While
+this showed a performance gain, it will not be universal but at least
+it'll be behaving as documented. The vmstats are obviously different but
+here is an obvious interpretation of them from mmtests.
+
+ 3.12.0 3.12.0
+ vanillaacctupdates
+NUMA page range updates 1408326 11043064
+NUMA huge PMD updates 0 21040
+NUMA PTE updates 1408326 291624
+
+"NUMA page range updates" == nr_pte_updates and is the value returned to
+the NUMA pte scanner. NUMA huge PMD updates were the number of THP
+updates which in combination can be used to calculate how many ptes were
+updated from userspace.
+
+Signed-off-by: Mel Gorman <mgorman@suse.de>
+Reported-by: Alex Thorlton <athorlton@sgi.com>
+Reviewed-by: Rik van Riel <riel@redhat.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Mel Gorman <mgorman@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/linux/vm_event_item.h | 1 +
+ mm/mprotect.c | 7 ++++++-
+ mm/vmstat.c | 1 +
+ 3 files changed, 8 insertions(+), 1 deletion(-)
+
+--- a/include/linux/vm_event_item.h
++++ b/include/linux/vm_event_item.h
+@@ -39,6 +39,7 @@ enum vm_event_item { PGPGIN, PGPGOUT, PS
+ PAGEOUTRUN, ALLOCSTALL, PGROTATED,
+ #ifdef CONFIG_NUMA_BALANCING
+ NUMA_PTE_UPDATES,
++ NUMA_HUGE_PTE_UPDATES,
+ NUMA_HINT_FAULTS,
+ NUMA_HINT_FAULTS_LOCAL,
+ NUMA_PAGE_MIGRATE,
+--- a/mm/mprotect.c
++++ b/mm/mprotect.c
+@@ -138,6 +138,7 @@ static inline unsigned long change_pmd_r
+ pmd_t *pmd;
+ unsigned long next;
+ unsigned long pages = 0;
++ unsigned long nr_huge_updates = 0;
+ bool all_same_node;
+
+ pmd = pmd_offset(pud, addr);
+@@ -148,7 +149,8 @@ static inline unsigned long change_pmd_r
+ split_huge_page_pmd(vma, addr, pmd);
+ else if (change_huge_pmd(vma, pmd, addr, newprot,
+ prot_numa)) {
+- pages++;
++ pages += HPAGE_PMD_NR;
++ nr_huge_updates++;
+ continue;
+ }
+ /* fall through */
+@@ -168,6 +170,9 @@ static inline unsigned long change_pmd_r
+ change_pmd_protnuma(vma->vm_mm, addr, pmd);
+ } while (pmd++, addr = next, addr != end);
+
++ if (nr_huge_updates)
++ count_vm_numa_events(NUMA_HUGE_PTE_UPDATES, nr_huge_updates);
++
+ return pages;
+ }
+
+--- a/mm/vmstat.c
++++ b/mm/vmstat.c
+@@ -812,6 +812,7 @@ const char * const vmstat_text[] = {
+
+ #ifdef CONFIG_NUMA_BALANCING
+ "numa_pte_updates",
++ "numa_huge_pte_updates",
+ "numa_hint_faults",
+ "numa_hint_faults_local",
+ "numa_pages_migrated",