]>
Commit | Line | Data |
---|---|---|
1 | From 67cc2b8165857ba019920d1f00d64bcc4140075d Mon Sep 17 00:00:00 2001 | |
2 | From: NeilBrown <neilb@suse.de> | |
3 | Date: Tue, 31 Mar 2009 14:39:38 +1100 | |
4 | Subject: [PATCH] md/raid5: finish support for DDF/raid6 | |
5 | ||
6 | DDF requires RAID6 calculations over different devices in a different | |
7 | order. | |
8 | For md/raid6, we calculate over just the data devices, starting | |
9 | immediately after the 'Q' block. | |
10 | For ddf/raid6 we calculate over all devices, using zeros in place of | |
11 | the P and Q blocks. | |
12 | ||
13 | This requires unfortunately complex loops... | |
14 | ||
15 | Signed-off-by: NeilBrown <neilb@suse.de> | |
16 | --- | |
17 | drivers/md/raid5.c | 58 +++++++++++++++++++++++++++++++-------------- | |
18 | include/linux/raid/raid5.h | 1 | |
19 | 2 files changed, 41 insertions(+), 18 deletions(-) | |
20 | ||
21 | --- linux-2.6.27-SLE11_BRANCH.orig/drivers/md/raid5.c | |
22 | +++ linux-2.6.27-SLE11_BRANCH/drivers/md/raid5.c | |
23 | @@ -138,6 +138,10 @@ static inline void raid5_set_bi_hw_segme | |
24 | /* Find first data disk in a raid6 stripe */ | |
25 | static inline int raid6_d0(struct stripe_head *sh) | |
26 | { | |
27 | + if (sh->ddf_layout) | |
28 | + /* ddf always start from first device */ | |
29 | + return 0; | |
30 | + /* md starts just after Q block */ | |
31 | if (sh->qd_idx == sh->disks - 1) | |
32 | return 0; | |
33 | else | |
34 | @@ -154,13 +158,15 @@ static inline int raid6_next_disk(int di | |
35 | * 0 .. raid_disks-3, the parity disk is raid_disks-2 and the Q disk | |
36 | * is raid_disks-1. This help does that mapping. | |
37 | */ | |
38 | -static int raid6_idx_to_slot(int idx, struct stripe_head *sh, int *count) | |
39 | +static int raid6_idx_to_slot(int idx, struct stripe_head *sh, | |
40 | + int *count, int syndrome_disks) | |
41 | { | |
42 | int slot; | |
43 | + | |
44 | if (idx == sh->pd_idx) | |
45 | - return sh->disks - 2; | |
46 | + return syndrome_disks; | |
47 | if (idx == sh->qd_idx) | |
48 | - return sh->disks - 1; | |
49 | + return syndrome_disks + 1; | |
50 | slot = (*count)++; | |
51 | return slot; | |
52 | } | |
53 | @@ -1269,6 +1275,7 @@ static sector_t raid5_compute_sector(rai | |
54 | unsigned long chunk_number; | |
55 | unsigned int chunk_offset; | |
56 | int pd_idx, qd_idx; | |
57 | + int ddf_layout = 0; | |
58 | sector_t new_sector; | |
59 | int sectors_per_chunk = conf->chunk_size >> 9; | |
60 | int raid_disks = previous ? conf->previous_raid_disks | |
61 | @@ -1388,6 +1395,7 @@ static sector_t raid5_compute_sector(rai | |
62 | qd_idx = 0; | |
63 | } else if (*dd_idx >= pd_idx) | |
64 | (*dd_idx) += 2; /* D D P Q D */ | |
65 | + ddf_layout = 1; | |
66 | break; | |
67 | ||
68 | case ALGORITHM_ROTATING_N_RESTART: | |
69 | @@ -1402,6 +1410,7 @@ static sector_t raid5_compute_sector(rai | |
70 | qd_idx = 0; | |
71 | } else if (*dd_idx >= pd_idx) | |
72 | (*dd_idx) += 2; /* D D P Q D */ | |
73 | + ddf_layout = 1; | |
74 | break; | |
75 | ||
76 | case ALGORITHM_ROTATING_N_CONTINUE: | |
77 | @@ -1409,6 +1418,7 @@ static sector_t raid5_compute_sector(rai | |
78 | pd_idx = raid_disks - 1 - (stripe % raid_disks); | |
79 | qd_idx = (pd_idx + raid_disks - 1) % raid_disks; | |
80 | *dd_idx = (pd_idx + 1 + *dd_idx) % raid_disks; | |
81 | + ddf_layout = 1; | |
82 | break; | |
83 | ||
84 | case ALGORITHM_LEFT_ASYMMETRIC_6: | |
85 | @@ -1456,6 +1466,7 @@ static sector_t raid5_compute_sector(rai | |
86 | if (sh) { | |
87 | sh->pd_idx = pd_idx; | |
88 | sh->qd_idx = qd_idx; | |
89 | + sh->ddf_layout = ddf_layout; | |
90 | } | |
91 | /* | |
92 | * Finally, compute the new sector number | |
93 | @@ -1644,9 +1655,10 @@ static void compute_parity6(struct strip | |
94 | { | |
95 | raid6_conf_t *conf = sh->raid_conf; | |
96 | int i, pd_idx, qd_idx, d0_idx, disks = sh->disks, count; | |
97 | + int syndrome_disks = sh->ddf_layout ? disks : (disks - 2); | |
98 | struct bio *chosen; | |
99 | /**** FIX THIS: This could be very bad if disks is close to 256 ****/ | |
100 | - void *ptrs[disks]; | |
101 | + void *ptrs[syndrome_disks+2]; | |
102 | ||
103 | pd_idx = sh->pd_idx; | |
104 | qd_idx = sh->qd_idx; | |
105 | @@ -1689,23 +1701,28 @@ static void compute_parity6(struct strip | |
106 | } | |
107 | ||
108 | /* Note that unlike RAID-5, the ordering of the disks matters greatly.*/ | |
109 | - /* FIX: Is this ordering of drives even remotely optimal? */ | |
110 | + | |
111 | + for (i = 0; i < disks; i++) | |
112 | + ptrs[i] = (void *)raid6_empty_zero_page; | |
113 | + | |
114 | count = 0; | |
115 | i = d0_idx; | |
116 | do { | |
117 | - int slot = raid6_idx_to_slot(i, sh, &count); | |
118 | + int slot = raid6_idx_to_slot(i, sh, &count, syndrome_disks); | |
119 | + | |
120 | ptrs[slot] = page_address(sh->dev[i].page); | |
121 | - if (slot < sh->disks - 2 && | |
122 | + if (slot < syndrome_disks && | |
123 | !test_bit(R5_UPTODATE, &sh->dev[i].flags)) { | |
124 | printk(KERN_ERR "block %d/%d not uptodate " | |
125 | "on parity calc\n", i, count); | |
126 | BUG(); | |
127 | } | |
128 | + | |
129 | i = raid6_next_disk(i, disks); | |
130 | } while (i != d0_idx); | |
131 | - BUG_ON(count+2 != disks); | |
132 | + BUG_ON(count != syndrome_disks); | |
133 | ||
134 | - raid6_call.gen_syndrome(disks, STRIPE_SIZE, ptrs); | |
135 | + raid6_call.gen_syndrome(syndrome_disks+2, STRIPE_SIZE, ptrs); | |
136 | ||
137 | switch(method) { | |
138 | case RECONSTRUCT_WRITE: | |
139 | @@ -1763,24 +1780,28 @@ static void compute_block_1(struct strip | |
140 | static void compute_block_2(struct stripe_head *sh, int dd_idx1, int dd_idx2) | |
141 | { | |
142 | int i, count, disks = sh->disks; | |
143 | + int syndrome_disks = sh->ddf_layout ? disks : disks-2; | |
144 | int d0_idx = raid6_d0(sh); | |
145 | int faila = -1, failb = -1; | |
146 | /**** FIX THIS: This could be very bad if disks is close to 256 ****/ | |
147 | - void *ptrs[disks]; | |
148 | + void *ptrs[syndrome_disks+2]; | |
149 | ||
150 | + for (i = 0; i < disks ; i++) | |
151 | + ptrs[i] = (void *)raid6_empty_zero_page; | |
152 | count = 0; | |
153 | i = d0_idx; | |
154 | do { | |
155 | - int slot; | |
156 | - slot = raid6_idx_to_slot(i, sh, &count); | |
157 | + int slot = raid6_idx_to_slot(i, sh, &count, syndrome_disks); | |
158 | + | |
159 | ptrs[slot] = page_address(sh->dev[i].page); | |
160 | + | |
161 | if (i == dd_idx1) | |
162 | faila = slot; | |
163 | if (i == dd_idx2) | |
164 | failb = slot; | |
165 | i = raid6_next_disk(i, disks); | |
166 | } while (i != d0_idx); | |
167 | - BUG_ON(count+2 != disks); | |
168 | + BUG_ON(count != syndrome_disks); | |
169 | ||
170 | BUG_ON(faila == failb); | |
171 | if ( failb < faila ) { int tmp = faila; faila = failb; failb = tmp; } | |
172 | @@ -1789,9 +1810,9 @@ static void compute_block_2(struct strip | |
173 | (unsigned long long)sh->sector, dd_idx1, dd_idx2, | |
174 | faila, failb); | |
175 | ||
176 | - if ( failb == disks-1 ) { | |
177 | + if (failb == syndrome_disks+1) { | |
178 | /* Q disk is one of the missing disks */ | |
179 | - if ( faila == disks-2 ) { | |
180 | + if (faila == syndrome_disks) { | |
181 | /* Missing P+Q, just recompute */ | |
182 | compute_parity6(sh, UPDATE_PARITY); | |
183 | return; | |
184 | @@ -1806,12 +1827,13 @@ static void compute_block_2(struct strip | |
185 | } | |
186 | ||
187 | /* We're missing D+P or D+D; */ | |
188 | - if (failb == disks-2) { | |
189 | + if (failb == syndrome_disks) { | |
190 | /* We're missing D+P. */ | |
191 | - raid6_datap_recov(disks, STRIPE_SIZE, faila, ptrs); | |
192 | + raid6_datap_recov(syndrome_disks+2, STRIPE_SIZE, faila, ptrs); | |
193 | } else { | |
194 | /* We're missing D+D. */ | |
195 | - raid6_2data_recov(disks, STRIPE_SIZE, faila, failb, ptrs); | |
196 | + raid6_2data_recov(syndrome_disks+2, STRIPE_SIZE, faila, failb, | |
197 | + ptrs); | |
198 | } | |
199 | ||
200 | /* Both the above update both missing blocks */ | |
201 | --- linux-2.6.27-SLE11_BRANCH.orig/include/linux/raid/raid5.h | |
202 | +++ linux-2.6.27-SLE11_BRANCH/include/linux/raid/raid5.h | |
203 | @@ -202,6 +202,7 @@ struct stripe_head { | |
204 | sector_t sector; /* sector of this row */ | |
205 | short pd_idx; /* parity disk index */ | |
206 | short qd_idx; /* 'Q' disk index for raid6 */ | |
207 | + short ddf_layout;/* use DDF ordering to calculate Q */ | |
208 | unsigned long state; /* state flags */ | |
209 | atomic_t count; /* nr of active thread/requests */ | |
210 | spinlock_t lock; |