]> git.ipfire.org Git - people/teissler/ipfire-2.x.git/blob - src/patches/suse-2.6.27.31/patches.fixes/0014-md-raid5-finish-support-for-DDF-raid6.patch
Merge branch 'master' of git://git.ipfire.org/ipfire-2.x
[people/teissler/ipfire-2.x.git] / src / patches / suse-2.6.27.31 / patches.fixes / 0014-md-raid5-finish-support-for-DDF-raid6.patch
1 From 67cc2b8165857ba019920d1f00d64bcc4140075d Mon Sep 17 00:00:00 2001
2 From: NeilBrown <neilb@suse.de>
3 Date: Tue, 31 Mar 2009 14:39:38 +1100
4 Subject: [PATCH] md/raid5: finish support for DDF/raid6
5
6 DDF requires RAID6 calculations over different devices in a different
7 order.
8 For md/raid6, we calculate over just the data devices, starting
9 immediately after the 'Q' block.
10 For ddf/raid6 we calculate over all devices, using zeros in place of
11 the P and Q blocks.
12
13 This requires unfortunately complex loops...
14
15 Signed-off-by: NeilBrown <neilb@suse.de>
16 ---
17 drivers/md/raid5.c | 58 +++++++++++++++++++++++++++++++--------------
18 include/linux/raid/raid5.h | 1
19 2 files changed, 41 insertions(+), 18 deletions(-)
20
21 --- linux-2.6.27-SLE11_BRANCH.orig/drivers/md/raid5.c
22 +++ linux-2.6.27-SLE11_BRANCH/drivers/md/raid5.c
23 @@ -138,6 +138,10 @@ static inline void raid5_set_bi_hw_segme
24 /* Find first data disk in a raid6 stripe */
25 static inline int raid6_d0(struct stripe_head *sh)
26 {
27 + if (sh->ddf_layout)
28 + /* ddf always start from first device */
29 + return 0;
30 + /* md starts just after Q block */
31 if (sh->qd_idx == sh->disks - 1)
32 return 0;
33 else
34 @@ -154,13 +158,15 @@ static inline int raid6_next_disk(int di
35 * 0 .. raid_disks-3, the parity disk is raid_disks-2 and the Q disk
36 * is raid_disks-1. This help does that mapping.
37 */
38 -static int raid6_idx_to_slot(int idx, struct stripe_head *sh, int *count)
39 +static int raid6_idx_to_slot(int idx, struct stripe_head *sh,
40 + int *count, int syndrome_disks)
41 {
42 int slot;
43 +
44 if (idx == sh->pd_idx)
45 - return sh->disks - 2;
46 + return syndrome_disks;
47 if (idx == sh->qd_idx)
48 - return sh->disks - 1;
49 + return syndrome_disks + 1;
50 slot = (*count)++;
51 return slot;
52 }
53 @@ -1269,6 +1275,7 @@ static sector_t raid5_compute_sector(rai
54 unsigned long chunk_number;
55 unsigned int chunk_offset;
56 int pd_idx, qd_idx;
57 + int ddf_layout = 0;
58 sector_t new_sector;
59 int sectors_per_chunk = conf->chunk_size >> 9;
60 int raid_disks = previous ? conf->previous_raid_disks
61 @@ -1388,6 +1395,7 @@ static sector_t raid5_compute_sector(rai
62 qd_idx = 0;
63 } else if (*dd_idx >= pd_idx)
64 (*dd_idx) += 2; /* D D P Q D */
65 + ddf_layout = 1;
66 break;
67
68 case ALGORITHM_ROTATING_N_RESTART:
69 @@ -1402,6 +1410,7 @@ static sector_t raid5_compute_sector(rai
70 qd_idx = 0;
71 } else if (*dd_idx >= pd_idx)
72 (*dd_idx) += 2; /* D D P Q D */
73 + ddf_layout = 1;
74 break;
75
76 case ALGORITHM_ROTATING_N_CONTINUE:
77 @@ -1409,6 +1418,7 @@ static sector_t raid5_compute_sector(rai
78 pd_idx = raid_disks - 1 - (stripe % raid_disks);
79 qd_idx = (pd_idx + raid_disks - 1) % raid_disks;
80 *dd_idx = (pd_idx + 1 + *dd_idx) % raid_disks;
81 + ddf_layout = 1;
82 break;
83
84 case ALGORITHM_LEFT_ASYMMETRIC_6:
85 @@ -1456,6 +1466,7 @@ static sector_t raid5_compute_sector(rai
86 if (sh) {
87 sh->pd_idx = pd_idx;
88 sh->qd_idx = qd_idx;
89 + sh->ddf_layout = ddf_layout;
90 }
91 /*
92 * Finally, compute the new sector number
93 @@ -1644,9 +1655,10 @@ static void compute_parity6(struct strip
94 {
95 raid6_conf_t *conf = sh->raid_conf;
96 int i, pd_idx, qd_idx, d0_idx, disks = sh->disks, count;
97 + int syndrome_disks = sh->ddf_layout ? disks : (disks - 2);
98 struct bio *chosen;
99 /**** FIX THIS: This could be very bad if disks is close to 256 ****/
100 - void *ptrs[disks];
101 + void *ptrs[syndrome_disks+2];
102
103 pd_idx = sh->pd_idx;
104 qd_idx = sh->qd_idx;
105 @@ -1689,23 +1701,28 @@ static void compute_parity6(struct strip
106 }
107
108 /* Note that unlike RAID-5, the ordering of the disks matters greatly.*/
109 - /* FIX: Is this ordering of drives even remotely optimal? */
110 +
111 + for (i = 0; i < disks; i++)
112 + ptrs[i] = (void *)raid6_empty_zero_page;
113 +
114 count = 0;
115 i = d0_idx;
116 do {
117 - int slot = raid6_idx_to_slot(i, sh, &count);
118 + int slot = raid6_idx_to_slot(i, sh, &count, syndrome_disks);
119 +
120 ptrs[slot] = page_address(sh->dev[i].page);
121 - if (slot < sh->disks - 2 &&
122 + if (slot < syndrome_disks &&
123 !test_bit(R5_UPTODATE, &sh->dev[i].flags)) {
124 printk(KERN_ERR "block %d/%d not uptodate "
125 "on parity calc\n", i, count);
126 BUG();
127 }
128 +
129 i = raid6_next_disk(i, disks);
130 } while (i != d0_idx);
131 - BUG_ON(count+2 != disks);
132 + BUG_ON(count != syndrome_disks);
133
134 - raid6_call.gen_syndrome(disks, STRIPE_SIZE, ptrs);
135 + raid6_call.gen_syndrome(syndrome_disks+2, STRIPE_SIZE, ptrs);
136
137 switch(method) {
138 case RECONSTRUCT_WRITE:
139 @@ -1763,24 +1780,28 @@ static void compute_block_1(struct strip
140 static void compute_block_2(struct stripe_head *sh, int dd_idx1, int dd_idx2)
141 {
142 int i, count, disks = sh->disks;
143 + int syndrome_disks = sh->ddf_layout ? disks : disks-2;
144 int d0_idx = raid6_d0(sh);
145 int faila = -1, failb = -1;
146 /**** FIX THIS: This could be very bad if disks is close to 256 ****/
147 - void *ptrs[disks];
148 + void *ptrs[syndrome_disks+2];
149
150 + for (i = 0; i < disks ; i++)
151 + ptrs[i] = (void *)raid6_empty_zero_page;
152 count = 0;
153 i = d0_idx;
154 do {
155 - int slot;
156 - slot = raid6_idx_to_slot(i, sh, &count);
157 + int slot = raid6_idx_to_slot(i, sh, &count, syndrome_disks);
158 +
159 ptrs[slot] = page_address(sh->dev[i].page);
160 +
161 if (i == dd_idx1)
162 faila = slot;
163 if (i == dd_idx2)
164 failb = slot;
165 i = raid6_next_disk(i, disks);
166 } while (i != d0_idx);
167 - BUG_ON(count+2 != disks);
168 + BUG_ON(count != syndrome_disks);
169
170 BUG_ON(faila == failb);
171 if ( failb < faila ) { int tmp = faila; faila = failb; failb = tmp; }
172 @@ -1789,9 +1810,9 @@ static void compute_block_2(struct strip
173 (unsigned long long)sh->sector, dd_idx1, dd_idx2,
174 faila, failb);
175
176 - if ( failb == disks-1 ) {
177 + if (failb == syndrome_disks+1) {
178 /* Q disk is one of the missing disks */
179 - if ( faila == disks-2 ) {
180 + if (faila == syndrome_disks) {
181 /* Missing P+Q, just recompute */
182 compute_parity6(sh, UPDATE_PARITY);
183 return;
184 @@ -1806,12 +1827,13 @@ static void compute_block_2(struct strip
185 }
186
187 /* We're missing D+P or D+D; */
188 - if (failb == disks-2) {
189 + if (failb == syndrome_disks) {
190 /* We're missing D+P. */
191 - raid6_datap_recov(disks, STRIPE_SIZE, faila, ptrs);
192 + raid6_datap_recov(syndrome_disks+2, STRIPE_SIZE, faila, ptrs);
193 } else {
194 /* We're missing D+D. */
195 - raid6_2data_recov(disks, STRIPE_SIZE, faila, failb, ptrs);
196 + raid6_2data_recov(syndrome_disks+2, STRIPE_SIZE, faila, failb,
197 + ptrs);
198 }
199
200 /* Both the above update both missing blocks */
201 --- linux-2.6.27-SLE11_BRANCH.orig/include/linux/raid/raid5.h
202 +++ linux-2.6.27-SLE11_BRANCH/include/linux/raid/raid5.h
203 @@ -202,6 +202,7 @@ struct stripe_head {
204 sector_t sector; /* sector of this row */
205 short pd_idx; /* parity disk index */
206 short qd_idx; /* 'Q' disk index for raid6 */
207 + short ddf_layout;/* use DDF ordering to calculate Q */
208 unsigned long state; /* state flags */
209 atomic_t count; /* nr of active thread/requests */
210 spinlock_t lock;