]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/blame - releases/4.14.98/md-raid5-fix-out-of-memory-during-raid-cache-recovery.patch
4.14-stable patches
[thirdparty/kernel/stable-queue.git] / releases / 4.14.98 / md-raid5-fix-out-of-memory-during-raid-cache-recovery.patch
CommitLineData
db596b1f
GKH
1From 483cbbeddd5fe2c80fd4141ff0748fa06c4ff146 Mon Sep 17 00:00:00 2001
2From: Alexei Naberezhnov <anaberezhnov@fb.com>
3Date: Tue, 27 Mar 2018 16:54:16 -0700
4Subject: md/raid5: fix 'out of memory' during raid cache recovery
5
6From: Alexei Naberezhnov <anaberezhnov@fb.com>
7
8commit 483cbbeddd5fe2c80fd4141ff0748fa06c4ff146 upstream.
9
10This fixes the case when md array assembly fails because of raid cache recovery
11unable to allocate a stripe, despite attempts to replay stripes and increase
12cache size. This happens because stripes released by r5c_recovery_replay_stripes
13and raid5_set_cache_size don't become available for allocation immediately.
14Released stripes first are placed on conf->released_stripes list and require
15md thread to merge them on conf->inactive_list before they can be allocated.
16
17Patch allows final allocation attempt during cache recovery to wait for
18new stripes to become availabe for allocation.
19
20Cc: linux-raid@vger.kernel.org
21Cc: Shaohua Li <shli@kernel.org>
22Cc: linux-stable <stable@vger.kernel.org> # 4.10+
23Fixes: b4c625c67362 ("md/r5cache: r5cache recovery: part 1")
24Signed-off-by: Alexei Naberezhnov <anaberezhnov@fb.com>
25Signed-off-by: Song Liu <songliubraving@fb.com>
26Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
27
28---
29 drivers/md/raid5-cache.c | 33 ++++++++++++++++++++++-----------
30 drivers/md/raid5.c | 8 ++++++--
31 2 files changed, 28 insertions(+), 13 deletions(-)
32
33--- a/drivers/md/raid5-cache.c
34+++ b/drivers/md/raid5-cache.c
35@@ -1942,12 +1942,14 @@ out:
36 }
37
38 static struct stripe_head *
39-r5c_recovery_alloc_stripe(struct r5conf *conf,
40- sector_t stripe_sect)
41+r5c_recovery_alloc_stripe(
42+ struct r5conf *conf,
43+ sector_t stripe_sect,
44+ int noblock)
45 {
46 struct stripe_head *sh;
47
48- sh = raid5_get_active_stripe(conf, stripe_sect, 0, 1, 0);
49+ sh = raid5_get_active_stripe(conf, stripe_sect, 0, noblock, 0);
50 if (!sh)
51 return NULL; /* no more stripe available */
52
53@@ -2157,7 +2159,7 @@ r5c_recovery_analyze_meta_block(struct r
54 stripe_sect);
55
56 if (!sh) {
57- sh = r5c_recovery_alloc_stripe(conf, stripe_sect);
58+ sh = r5c_recovery_alloc_stripe(conf, stripe_sect, 1);
59 /*
60 * cannot get stripe from raid5_get_active_stripe
61 * try replay some stripes
62@@ -2166,20 +2168,29 @@ r5c_recovery_analyze_meta_block(struct r
63 r5c_recovery_replay_stripes(
64 cached_stripe_list, ctx);
65 sh = r5c_recovery_alloc_stripe(
66- conf, stripe_sect);
67+ conf, stripe_sect, 1);
68 }
69 if (!sh) {
70+ int new_size = conf->min_nr_stripes * 2;
71 pr_debug("md/raid:%s: Increasing stripe cache size to %d to recovery data on journal.\n",
72 mdname(mddev),
73- conf->min_nr_stripes * 2);
74- raid5_set_cache_size(mddev,
75- conf->min_nr_stripes * 2);
76- sh = r5c_recovery_alloc_stripe(conf,
77- stripe_sect);
78+ new_size);
79+ ret = raid5_set_cache_size(mddev, new_size);
80+ if (conf->min_nr_stripes <= new_size / 2) {
81+ pr_err("md/raid:%s: Cannot increase cache size, ret=%d, new_size=%d, min_nr_stripes=%d, max_nr_stripes=%d\n",
82+ mdname(mddev),
83+ ret,
84+ new_size,
85+ conf->min_nr_stripes,
86+ conf->max_nr_stripes);
87+ return -ENOMEM;
88+ }
89+ sh = r5c_recovery_alloc_stripe(
90+ conf, stripe_sect, 0);
91 }
92 if (!sh) {
93 pr_err("md/raid:%s: Cannot get enough stripes due to memory pressure. Recovery failed.\n",
94- mdname(mddev));
95+ mdname(mddev));
96 return -ENOMEM;
97 }
98 list_add_tail(&sh->lru, cached_stripe_list);
99--- a/drivers/md/raid5.c
100+++ b/drivers/md/raid5.c
101@@ -6336,6 +6336,7 @@ raid5_show_stripe_cache_size(struct mdde
102 int
103 raid5_set_cache_size(struct mddev *mddev, int size)
104 {
105+ int result = 0;
106 struct r5conf *conf = mddev->private;
107
108 if (size <= 16 || size > 32768)
109@@ -6352,11 +6353,14 @@ raid5_set_cache_size(struct mddev *mddev
110
111 mutex_lock(&conf->cache_size_mutex);
112 while (size > conf->max_nr_stripes)
113- if (!grow_one_stripe(conf, GFP_KERNEL))
114+ if (!grow_one_stripe(conf, GFP_KERNEL)) {
115+ conf->min_nr_stripes = conf->max_nr_stripes;
116+ result = -ENOMEM;
117 break;
118+ }
119 mutex_unlock(&conf->cache_size_mutex);
120
121- return 0;
122+ return result;
123 }
124 EXPORT_SYMBOL(raid5_set_cache_size);
125