]> git.ipfire.org Git - thirdparty/linux.git/blame - block/badblocks.c
badblocks: add helper routines for badblock ranges handling
[thirdparty/linux.git] / block / badblocks.c
CommitLineData
8c16567d 1// SPDX-License-Identifier: GPL-2.0
9e0e252a
VV
2/*
3 * Bad block management
4 *
5 * - Heavily based on MD badblocks code from Neil Brown
6 *
7 * Copyright (c) 2015, Intel Corporation.
9e0e252a
VV
8 */
9
10#include <linux/badblocks.h>
11#include <linux/seqlock.h>
16263ff6 12#include <linux/device.h>
9e0e252a
VV
13#include <linux/kernel.h>
14#include <linux/module.h>
15#include <linux/stddef.h>
16#include <linux/types.h>
17#include <linux/slab.h>
18
c3c6a86e
CL
19/*
20 * Find the range starts at-or-before 's' from bad table. The search
21 * starts from index 'hint' and stops at index 'hint_end' from the bad
22 * table.
23 */
24static int prev_by_hint(struct badblocks *bb, sector_t s, int hint)
25{
26 int hint_end = hint + 2;
27 u64 *p = bb->page;
28 int ret = -1;
29
30 while ((hint < hint_end) && ((hint + 1) <= bb->count) &&
31 (BB_OFFSET(p[hint]) <= s)) {
32 if ((hint + 1) == bb->count || BB_OFFSET(p[hint + 1]) > s) {
33 ret = hint;
34 break;
35 }
36 hint++;
37 }
38
39 return ret;
40}
41
42/*
43 * Find the range starts at-or-before bad->start. If 'hint' is provided
44 * (hint >= 0) then search in the bad table from hint firstly. It is
45 * very probably the wanted bad range can be found from the hint index,
46 * then the unnecessary while-loop iteration can be avoided.
47 */
48static int prev_badblocks(struct badblocks *bb, struct badblocks_context *bad,
49 int hint)
50{
51 sector_t s = bad->start;
52 int ret = -1;
53 int lo, hi;
54 u64 *p;
55
56 if (!bb->count)
57 goto out;
58
59 if (hint >= 0) {
60 ret = prev_by_hint(bb, s, hint);
61 if (ret >= 0)
62 goto out;
63 }
64
65 lo = 0;
66 hi = bb->count;
67 p = bb->page;
68
69 /* The following bisect search might be unnecessary */
70 if (BB_OFFSET(p[lo]) > s)
71 return -1;
72 if (BB_OFFSET(p[hi - 1]) <= s)
73 return hi - 1;
74
75 /* Do bisect search in bad table */
76 while (hi - lo > 1) {
77 int mid = (lo + hi)/2;
78 sector_t a = BB_OFFSET(p[mid]);
79
80 if (a == s) {
81 ret = mid;
82 goto out;
83 }
84
85 if (a < s)
86 lo = mid;
87 else
88 hi = mid;
89 }
90
91 if (BB_OFFSET(p[lo]) <= s)
92 ret = lo;
93out:
94 return ret;
95}
96
97/*
98 * Return 'true' if the range indicated by 'bad' can be backward merged
99 * with the bad range (from the bad table) index by 'behind'.
100 */
101static bool can_merge_behind(struct badblocks *bb,
102 struct badblocks_context *bad, int behind)
103{
104 sector_t sectors = bad->len;
105 sector_t s = bad->start;
106 u64 *p = bb->page;
107
108 if ((s < BB_OFFSET(p[behind])) &&
109 ((s + sectors) >= BB_OFFSET(p[behind])) &&
110 ((BB_END(p[behind]) - s) <= BB_MAX_LEN) &&
111 BB_ACK(p[behind]) == bad->ack)
112 return true;
113 return false;
114}
115
116/*
117 * Do backward merge for range indicated by 'bad' and the bad range
118 * (from the bad table) indexed by 'behind'. The return value is merged
119 * sectors from bad->len.
120 */
121static int behind_merge(struct badblocks *bb, struct badblocks_context *bad,
122 int behind)
123{
124 sector_t sectors = bad->len;
125 sector_t s = bad->start;
126 u64 *p = bb->page;
127 int merged = 0;
128
129 WARN_ON(s >= BB_OFFSET(p[behind]));
130 WARN_ON((s + sectors) < BB_OFFSET(p[behind]));
131
132 if (s < BB_OFFSET(p[behind])) {
133 merged = BB_OFFSET(p[behind]) - s;
134 p[behind] = BB_MAKE(s, BB_LEN(p[behind]) + merged, bad->ack);
135
136 WARN_ON((BB_LEN(p[behind]) + merged) >= BB_MAX_LEN);
137 }
138
139 return merged;
140}
141
142/*
143 * Return 'true' if the range indicated by 'bad' can be forward
144 * merged with the bad range (from the bad table) indexed by 'prev'.
145 */
146static bool can_merge_front(struct badblocks *bb, int prev,
147 struct badblocks_context *bad)
148{
149 sector_t s = bad->start;
150 u64 *p = bb->page;
151
152 if (BB_ACK(p[prev]) == bad->ack &&
153 (s < BB_END(p[prev]) ||
154 (s == BB_END(p[prev]) && (BB_LEN(p[prev]) < BB_MAX_LEN))))
155 return true;
156 return false;
157}
158
159/*
160 * Do forward merge for range indicated by 'bad' and the bad range
161 * (from bad table) indexed by 'prev'. The return value is sectors
162 * merged from bad->len.
163 */
164static int front_merge(struct badblocks *bb, int prev, struct badblocks_context *bad)
165{
166 sector_t sectors = bad->len;
167 sector_t s = bad->start;
168 u64 *p = bb->page;
169 int merged = 0;
170
171 WARN_ON(s > BB_END(p[prev]));
172
173 if (s < BB_END(p[prev])) {
174 merged = min_t(sector_t, sectors, BB_END(p[prev]) - s);
175 } else {
176 merged = min_t(sector_t, sectors, BB_MAX_LEN - BB_LEN(p[prev]));
177 if ((prev + 1) < bb->count &&
178 merged > (BB_OFFSET(p[prev + 1]) - BB_END(p[prev]))) {
179 merged = BB_OFFSET(p[prev + 1]) - BB_END(p[prev]);
180 }
181
182 p[prev] = BB_MAKE(BB_OFFSET(p[prev]),
183 BB_LEN(p[prev]) + merged, bad->ack);
184 }
185
186 return merged;
187}
188
189/*
190 * 'Combine' is a special case which can_merge_front() is not able to
191 * handle: If a bad range (indexed by 'prev' from bad table) exactly
192 * starts as bad->start, and the bad range ahead of 'prev' (indexed by
193 * 'prev - 1' from bad table) exactly ends at where 'prev' starts, and
194 * the sum of their lengths does not exceed BB_MAX_LEN limitation, then
195 * these two bad range (from bad table) can be combined.
196 *
197 * Return 'true' if bad ranges indexed by 'prev' and 'prev - 1' from bad
198 * table can be combined.
199 */
200static bool can_combine_front(struct badblocks *bb, int prev,
201 struct badblocks_context *bad)
202{
203 u64 *p = bb->page;
204
205 if ((prev > 0) &&
206 (BB_OFFSET(p[prev]) == bad->start) &&
207 (BB_END(p[prev - 1]) == BB_OFFSET(p[prev])) &&
208 (BB_LEN(p[prev - 1]) + BB_LEN(p[prev]) <= BB_MAX_LEN) &&
209 (BB_ACK(p[prev - 1]) == BB_ACK(p[prev])))
210 return true;
211 return false;
212}
213
214/*
215 * Combine the bad ranges indexed by 'prev' and 'prev - 1' (from bad
216 * table) into one larger bad range, and the new range is indexed by
217 * 'prev - 1'.
218 * The caller of front_combine() will decrease bb->count, therefore
219 * it is unnecessary to clear p[perv] after front merge.
220 */
221static void front_combine(struct badblocks *bb, int prev)
222{
223 u64 *p = bb->page;
224
225 p[prev - 1] = BB_MAKE(BB_OFFSET(p[prev - 1]),
226 BB_LEN(p[prev - 1]) + BB_LEN(p[prev]),
227 BB_ACK(p[prev]));
228 if ((prev + 1) < bb->count)
229 memmove(p + prev, p + prev + 1, (bb->count - prev - 1) * 8);
230}
231
232/*
233 * Return 'true' if the range indicated by 'bad' is exactly forward
234 * overlapped with the bad range (from bad table) indexed by 'front'.
235 * Exactly forward overlap means the bad range (from bad table) indexed
236 * by 'prev' does not cover the whole range indicated by 'bad'.
237 */
238static bool overlap_front(struct badblocks *bb, int front,
239 struct badblocks_context *bad)
240{
241 u64 *p = bb->page;
242
243 if (bad->start >= BB_OFFSET(p[front]) &&
244 bad->start < BB_END(p[front]))
245 return true;
246 return false;
247}
248
249/*
250 * Return 'true' if the range indicated by 'bad' is exactly backward
251 * overlapped with the bad range (from bad table) indexed by 'behind'.
252 */
253static bool overlap_behind(struct badblocks *bb, struct badblocks_context *bad,
254 int behind)
255{
256 u64 *p = bb->page;
257
258 if (bad->start < BB_OFFSET(p[behind]) &&
259 (bad->start + bad->len) > BB_OFFSET(p[behind]))
260 return true;
261 return false;
262}
263
264/*
265 * Return 'true' if the range indicated by 'bad' can overwrite the bad
266 * range (from bad table) indexed by 'prev'.
267 *
268 * The range indicated by 'bad' can overwrite the bad range indexed by
269 * 'prev' when,
270 * 1) The whole range indicated by 'bad' can cover partial or whole bad
271 * range (from bad table) indexed by 'prev'.
272 * 2) The ack value of 'bad' is larger or equal to the ack value of bad
273 * range 'prev'.
274 *
275 * If the overwriting doesn't cover the whole bad range (from bad table)
276 * indexed by 'prev', new range might be split from existing bad range,
277 * 1) The overwrite covers head or tail part of existing bad range, 1
278 * extra bad range will be split and added into the bad table.
279 * 2) The overwrite covers middle of existing bad range, 2 extra bad
280 * ranges will be split (ahead and after the overwritten range) and
281 * added into the bad table.
282 * The number of extra split ranges of the overwriting is stored in
283 * 'extra' and returned for the caller.
284 */
285static bool can_front_overwrite(struct badblocks *bb, int prev,
286 struct badblocks_context *bad, int *extra)
287{
288 u64 *p = bb->page;
289 int len;
290
291 WARN_ON(!overlap_front(bb, prev, bad));
292
293 if (BB_ACK(p[prev]) >= bad->ack)
294 return false;
295
296 if (BB_END(p[prev]) <= (bad->start + bad->len)) {
297 len = BB_END(p[prev]) - bad->start;
298 if (BB_OFFSET(p[prev]) == bad->start)
299 *extra = 0;
300 else
301 *extra = 1;
302
303 bad->len = len;
304 } else {
305 if (BB_OFFSET(p[prev]) == bad->start)
306 *extra = 1;
307 else
308 /*
309 * prev range will be split into two, beside the overwritten
310 * one, an extra slot needed from bad table.
311 */
312 *extra = 2;
313 }
314
315 if ((bb->count + (*extra)) >= MAX_BADBLOCKS)
316 return false;
317
318 return true;
319}
320
321/*
322 * Do the overwrite from the range indicated by 'bad' to the bad range
323 * (from bad table) indexed by 'prev'.
324 * The previously called can_front_overwrite() will provide how many
325 * extra bad range(s) might be split and added into the bad table. All
326 * the splitting cases in the bad table will be handled here.
327 */
328static int front_overwrite(struct badblocks *bb, int prev,
329 struct badblocks_context *bad, int extra)
330{
331 u64 *p = bb->page;
332 sector_t orig_end = BB_END(p[prev]);
333 int orig_ack = BB_ACK(p[prev]);
334
335 switch (extra) {
336 case 0:
337 p[prev] = BB_MAKE(BB_OFFSET(p[prev]), BB_LEN(p[prev]),
338 bad->ack);
339 break;
340 case 1:
341 if (BB_OFFSET(p[prev]) == bad->start) {
342 p[prev] = BB_MAKE(BB_OFFSET(p[prev]),
343 bad->len, bad->ack);
344 memmove(p + prev + 2, p + prev + 1,
345 (bb->count - prev - 1) * 8);
346 p[prev + 1] = BB_MAKE(bad->start + bad->len,
347 orig_end - BB_END(p[prev]),
348 orig_ack);
349 } else {
350 p[prev] = BB_MAKE(BB_OFFSET(p[prev]),
351 bad->start - BB_OFFSET(p[prev]),
352 orig_ack);
353 /*
354 * prev +2 -> prev + 1 + 1, which is for,
355 * 1) prev + 1: the slot index of the previous one
356 * 2) + 1: one more slot for extra being 1.
357 */
358 memmove(p + prev + 2, p + prev + 1,
359 (bb->count - prev - 1) * 8);
360 p[prev + 1] = BB_MAKE(bad->start, bad->len, bad->ack);
361 }
362 break;
363 case 2:
364 p[prev] = BB_MAKE(BB_OFFSET(p[prev]),
365 bad->start - BB_OFFSET(p[prev]),
366 orig_ack);
367 /*
368 * prev + 3 -> prev + 1 + 2, which is for,
369 * 1) prev + 1: the slot index of the previous one
370 * 2) + 2: two more slots for extra being 2.
371 */
372 memmove(p + prev + 3, p + prev + 1,
373 (bb->count - prev - 1) * 8);
374 p[prev + 1] = BB_MAKE(bad->start, bad->len, bad->ack);
375 p[prev + 2] = BB_MAKE(BB_END(p[prev + 1]),
376 orig_end - BB_END(p[prev + 1]),
377 orig_ack);
378 break;
379 default:
380 break;
381 }
382
383 return bad->len;
384}
385
386/*
387 * Explicitly insert a range indicated by 'bad' to the bad table, where
388 * the location is indexed by 'at'.
389 */
390static int insert_at(struct badblocks *bb, int at, struct badblocks_context *bad)
391{
392 u64 *p = bb->page;
393 int len;
394
395 WARN_ON(badblocks_full(bb));
396
397 len = min_t(sector_t, bad->len, BB_MAX_LEN);
398 if (at < bb->count)
399 memmove(p + at + 1, p + at, (bb->count - at) * 8);
400 p[at] = BB_MAKE(bad->start, len, bad->ack);
401
402 return len;
403}
404
9e0e252a
VV
405/**
406 * badblocks_check() - check a given range for bad sectors
407 * @bb: the badblocks structure that holds all badblock information
408 * @s: sector (start) at which to check for badblocks
409 * @sectors: number of sectors to check for badblocks
410 * @first_bad: pointer to store location of the first badblock
411 * @bad_sectors: pointer to store number of badblocks after @first_bad
412 *
413 * We can record which blocks on each device are 'bad' and so just
414 * fail those blocks, or that stripe, rather than the whole device.
415 * Entries in the bad-block table are 64bits wide. This comprises:
416 * Length of bad-range, in sectors: 0-511 for lengths 1-512
417 * Start of bad-range, sector offset, 54 bits (allows 8 exbibytes)
418 * A 'shift' can be set so that larger blocks are tracked and
419 * consequently larger devices can be covered.
420 * 'Acknowledged' flag - 1 bit. - the most significant bit.
421 *
422 * Locking of the bad-block table uses a seqlock so badblocks_check
423 * might need to retry if it is very unlucky.
424 * We will sometimes want to check for bad blocks in a bi_end_io function,
425 * so we use the write_seqlock_irq variant.
426 *
427 * When looking for a bad block we specify a range and want to
428 * know if any block in the range is bad. So we binary-search
429 * to the last range that starts at-or-before the given endpoint,
430 * (or "before the sector after the target range")
431 * then see if it ends after the given start.
432 *
433 * Return:
434 * 0: there are no known bad blocks in the range
435 * 1: there are known bad block which are all acknowledged
436 * -1: there are bad blocks which have not yet been acknowledged in metadata.
437 * plus the start/length of the first bad section we overlap.
438 */
439int badblocks_check(struct badblocks *bb, sector_t s, int sectors,
440 sector_t *first_bad, int *bad_sectors)
441{
442 int hi;
443 int lo;
444 u64 *p = bb->page;
445 int rv;
446 sector_t target = s + sectors;
447 unsigned seq;
448
449 if (bb->shift > 0) {
450 /* round the start down, and the end up */
451 s >>= bb->shift;
452 target += (1<<bb->shift) - 1;
453 target >>= bb->shift;
9e0e252a
VV
454 }
455 /* 'target' is now the first block after the bad range */
456
457retry:
458 seq = read_seqbegin(&bb->lock);
459 lo = 0;
460 rv = 0;
461 hi = bb->count;
462
463 /* Binary search between lo and hi for 'target'
464 * i.e. for the last range that starts before 'target'
465 */
466 /* INVARIANT: ranges before 'lo' and at-or-after 'hi'
467 * are known not to be the last range before target.
468 * VARIANT: hi-lo is the number of possible
469 * ranges, and decreases until it reaches 1
470 */
471 while (hi - lo > 1) {
472 int mid = (lo + hi) / 2;
473 sector_t a = BB_OFFSET(p[mid]);
474
475 if (a < target)
476 /* This could still be the one, earlier ranges
477 * could not.
478 */
479 lo = mid;
480 else
481 /* This and later ranges are definitely out. */
482 hi = mid;
483 }
484 /* 'lo' might be the last that started before target, but 'hi' isn't */
485 if (hi > lo) {
486 /* need to check all range that end after 's' to see if
487 * any are unacknowledged.
488 */
489 while (lo >= 0 &&
490 BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > s) {
491 if (BB_OFFSET(p[lo]) < target) {
492 /* starts before the end, and finishes after
493 * the start, so they must overlap
494 */
495 if (rv != -1 && BB_ACK(p[lo]))
496 rv = 1;
497 else
498 rv = -1;
499 *first_bad = BB_OFFSET(p[lo]);
500 *bad_sectors = BB_LEN(p[lo]);
501 }
502 lo--;
503 }
504 }
505
506 if (read_seqretry(&bb->lock, seq))
507 goto retry;
508
509 return rv;
510}
511EXPORT_SYMBOL_GPL(badblocks_check);
512
b4a1278c
SL
513static void badblocks_update_acked(struct badblocks *bb)
514{
515 u64 *p = bb->page;
516 int i;
517 bool unacked = false;
518
519 if (!bb->unacked_exist)
520 return;
521
522 for (i = 0; i < bb->count ; i++) {
523 if (!BB_ACK(p[i])) {
524 unacked = true;
525 break;
526 }
527 }
528
529 if (!unacked)
530 bb->unacked_exist = 0;
531}
532
9e0e252a
VV
533/**
534 * badblocks_set() - Add a range of bad blocks to the table.
535 * @bb: the badblocks structure that holds all badblock information
536 * @s: first sector to mark as bad
537 * @sectors: number of sectors to mark as bad
538 * @acknowledged: weather to mark the bad sectors as acknowledged
539 *
540 * This might extend the table, or might contract it if two adjacent ranges
541 * can be merged. We binary-search to find the 'insertion' point, then
542 * decide how best to handle it.
543 *
544 * Return:
545 * 0: success
546 * 1: failed to set badblocks (out of space)
547 */
548int badblocks_set(struct badblocks *bb, sector_t s, int sectors,
549 int acknowledged)
550{
551 u64 *p;
552 int lo, hi;
553 int rv = 0;
554 unsigned long flags;
555
556 if (bb->shift < 0)
557 /* badblocks are disabled */
39b4954c 558 return 1;
9e0e252a
VV
559
560 if (bb->shift) {
561 /* round the start down, and the end up */
562 sector_t next = s + sectors;
563
564 s >>= bb->shift;
565 next += (1<<bb->shift) - 1;
566 next >>= bb->shift;
567 sectors = next - s;
568 }
569
570 write_seqlock_irqsave(&bb->lock, flags);
571
572 p = bb->page;
573 lo = 0;
574 hi = bb->count;
575 /* Find the last range that starts at-or-before 's' */
576 while (hi - lo > 1) {
577 int mid = (lo + hi) / 2;
578 sector_t a = BB_OFFSET(p[mid]);
579
580 if (a <= s)
581 lo = mid;
582 else
583 hi = mid;
584 }
585 if (hi > lo && BB_OFFSET(p[lo]) > s)
586 hi = lo;
587
588 if (hi > lo) {
589 /* we found a range that might merge with the start
590 * of our new range
591 */
592 sector_t a = BB_OFFSET(p[lo]);
593 sector_t e = a + BB_LEN(p[lo]);
594 int ack = BB_ACK(p[lo]);
595
596 if (e >= s) {
597 /* Yes, we can merge with a previous range */
598 if (s == a && s + sectors >= e)
599 /* new range covers old */
600 ack = acknowledged;
601 else
602 ack = ack && acknowledged;
603
604 if (e < s + sectors)
605 e = s + sectors;
606 if (e - a <= BB_MAX_LEN) {
607 p[lo] = BB_MAKE(a, e-a, ack);
608 s = e;
609 } else {
610 /* does not all fit in one range,
611 * make p[lo] maximal
612 */
613 if (BB_LEN(p[lo]) != BB_MAX_LEN)
614 p[lo] = BB_MAKE(a, BB_MAX_LEN, ack);
615 s = a + BB_MAX_LEN;
616 }
617 sectors = e - s;
618 }
619 }
620 if (sectors && hi < bb->count) {
621 /* 'hi' points to the first range that starts after 's'.
622 * Maybe we can merge with the start of that range
623 */
624 sector_t a = BB_OFFSET(p[hi]);
625 sector_t e = a + BB_LEN(p[hi]);
626 int ack = BB_ACK(p[hi]);
627
628 if (a <= s + sectors) {
629 /* merging is possible */
630 if (e <= s + sectors) {
631 /* full overlap */
632 e = s + sectors;
633 ack = acknowledged;
634 } else
635 ack = ack && acknowledged;
636
637 a = s;
638 if (e - a <= BB_MAX_LEN) {
639 p[hi] = BB_MAKE(a, e-a, ack);
640 s = e;
641 } else {
642 p[hi] = BB_MAKE(a, BB_MAX_LEN, ack);
643 s = a + BB_MAX_LEN;
644 }
645 sectors = e - s;
646 lo = hi;
647 hi++;
648 }
649 }
650 if (sectors == 0 && hi < bb->count) {
651 /* we might be able to combine lo and hi */
652 /* Note: 's' is at the end of 'lo' */
653 sector_t a = BB_OFFSET(p[hi]);
654 int lolen = BB_LEN(p[lo]);
655 int hilen = BB_LEN(p[hi]);
656 int newlen = lolen + hilen - (s - a);
657
658 if (s >= a && newlen < BB_MAX_LEN) {
659 /* yes, we can combine them */
660 int ack = BB_ACK(p[lo]) && BB_ACK(p[hi]);
661
662 p[lo] = BB_MAKE(BB_OFFSET(p[lo]), newlen, ack);
663 memmove(p + hi, p + hi + 1,
664 (bb->count - hi - 1) * 8);
665 bb->count--;
666 }
667 }
668 while (sectors) {
669 /* didn't merge (it all).
670 * Need to add a range just before 'hi'
671 */
672 if (bb->count >= MAX_BADBLOCKS) {
673 /* No room for more */
674 rv = 1;
675 break;
676 } else {
677 int this_sectors = sectors;
678
679 memmove(p + hi + 1, p + hi,
680 (bb->count - hi) * 8);
681 bb->count++;
682
683 if (this_sectors > BB_MAX_LEN)
684 this_sectors = BB_MAX_LEN;
685 p[hi] = BB_MAKE(s, this_sectors, acknowledged);
686 sectors -= this_sectors;
687 s += this_sectors;
688 }
689 }
690
691 bb->changed = 1;
692 if (!acknowledged)
693 bb->unacked_exist = 1;
b4a1278c
SL
694 else
695 badblocks_update_acked(bb);
9e0e252a
VV
696 write_sequnlock_irqrestore(&bb->lock, flags);
697
698 return rv;
699}
700EXPORT_SYMBOL_GPL(badblocks_set);
701
702/**
703 * badblocks_clear() - Remove a range of bad blocks to the table.
704 * @bb: the badblocks structure that holds all badblock information
705 * @s: first sector to mark as bad
706 * @sectors: number of sectors to mark as bad
707 *
708 * This may involve extending the table if we spilt a region,
709 * but it must not fail. So if the table becomes full, we just
710 * drop the remove request.
711 *
712 * Return:
713 * 0: success
714 * 1: failed to clear badblocks
715 */
716int badblocks_clear(struct badblocks *bb, sector_t s, int sectors)
717{
718 u64 *p;
719 int lo, hi;
720 sector_t target = s + sectors;
721 int rv = 0;
722
723 if (bb->shift > 0) {
724 /* When clearing we round the start up and the end down.
725 * This should not matter as the shift should align with
726 * the block size and no rounding should ever be needed.
727 * However it is better the think a block is bad when it
728 * isn't than to think a block is not bad when it is.
729 */
730 s += (1<<bb->shift) - 1;
731 s >>= bb->shift;
732 target >>= bb->shift;
9e0e252a
VV
733 }
734
735 write_seqlock_irq(&bb->lock);
736
737 p = bb->page;
738 lo = 0;
739 hi = bb->count;
740 /* Find the last range that starts before 'target' */
741 while (hi - lo > 1) {
742 int mid = (lo + hi) / 2;
743 sector_t a = BB_OFFSET(p[mid]);
744
745 if (a < target)
746 lo = mid;
747 else
748 hi = mid;
749 }
750 if (hi > lo) {
751 /* p[lo] is the last range that could overlap the
752 * current range. Earlier ranges could also overlap,
753 * but only this one can overlap the end of the range.
754 */
1fa9ce8d
TM
755 if ((BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > target) &&
756 (BB_OFFSET(p[lo]) < target)) {
9e0e252a
VV
757 /* Partial overlap, leave the tail of this range */
758 int ack = BB_ACK(p[lo]);
759 sector_t a = BB_OFFSET(p[lo]);
760 sector_t end = a + BB_LEN(p[lo]);
761
762 if (a < s) {
763 /* we need to split this range */
764 if (bb->count >= MAX_BADBLOCKS) {
765 rv = -ENOSPC;
766 goto out;
767 }
768 memmove(p+lo+1, p+lo, (bb->count - lo) * 8);
769 bb->count++;
770 p[lo] = BB_MAKE(a, s-a, ack);
771 lo++;
772 }
773 p[lo] = BB_MAKE(target, end - target, ack);
774 /* there is no longer an overlap */
775 hi = lo;
776 lo--;
777 }
778 while (lo >= 0 &&
1fa9ce8d
TM
779 (BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > s) &&
780 (BB_OFFSET(p[lo]) < target)) {
9e0e252a
VV
781 /* This range does overlap */
782 if (BB_OFFSET(p[lo]) < s) {
783 /* Keep the early parts of this range. */
784 int ack = BB_ACK(p[lo]);
785 sector_t start = BB_OFFSET(p[lo]);
786
787 p[lo] = BB_MAKE(start, s - start, ack);
788 /* now low doesn't overlap, so.. */
789 break;
790 }
791 lo--;
792 }
793 /* 'lo' is strictly before, 'hi' is strictly after,
794 * anything between needs to be discarded
795 */
796 if (hi - lo > 1) {
797 memmove(p+lo+1, p+hi, (bb->count - hi) * 8);
798 bb->count -= (hi - lo - 1);
799 }
800 }
801
b4a1278c 802 badblocks_update_acked(bb);
9e0e252a
VV
803 bb->changed = 1;
804out:
805 write_sequnlock_irq(&bb->lock);
806 return rv;
807}
808EXPORT_SYMBOL_GPL(badblocks_clear);
809
810/**
811 * ack_all_badblocks() - Acknowledge all bad blocks in a list.
812 * @bb: the badblocks structure that holds all badblock information
813 *
814 * This only succeeds if ->changed is clear. It is used by
815 * in-kernel metadata updates
816 */
817void ack_all_badblocks(struct badblocks *bb)
818{
819 if (bb->page == NULL || bb->changed)
820 /* no point even trying */
821 return;
822 write_seqlock_irq(&bb->lock);
823
824 if (bb->changed == 0 && bb->unacked_exist) {
825 u64 *p = bb->page;
826 int i;
827
828 for (i = 0; i < bb->count ; i++) {
829 if (!BB_ACK(p[i])) {
830 sector_t start = BB_OFFSET(p[i]);
831 int len = BB_LEN(p[i]);
832
833 p[i] = BB_MAKE(start, len, 1);
834 }
835 }
836 bb->unacked_exist = 0;
837 }
838 write_sequnlock_irq(&bb->lock);
839}
840EXPORT_SYMBOL_GPL(ack_all_badblocks);
841
842/**
843 * badblocks_show() - sysfs access to bad-blocks list
844 * @bb: the badblocks structure that holds all badblock information
845 * @page: buffer received from sysfs
846 * @unack: weather to show unacknowledged badblocks
847 *
848 * Return:
849 * Length of returned data
850 */
851ssize_t badblocks_show(struct badblocks *bb, char *page, int unack)
852{
853 size_t len;
854 int i;
855 u64 *p = bb->page;
856 unsigned seq;
857
858 if (bb->shift < 0)
859 return 0;
860
861retry:
862 seq = read_seqbegin(&bb->lock);
863
864 len = 0;
865 i = 0;
866
867 while (len < PAGE_SIZE && i < bb->count) {
868 sector_t s = BB_OFFSET(p[i]);
869 unsigned int length = BB_LEN(p[i]);
870 int ack = BB_ACK(p[i]);
871
872 i++;
873
874 if (unack && ack)
875 continue;
876
877 len += snprintf(page+len, PAGE_SIZE-len, "%llu %u\n",
878 (unsigned long long)s << bb->shift,
879 length << bb->shift);
880 }
881 if (unack && len == 0)
882 bb->unacked_exist = 0;
883
884 if (read_seqretry(&bb->lock, seq))
885 goto retry;
886
887 return len;
888}
889EXPORT_SYMBOL_GPL(badblocks_show);
890
891/**
892 * badblocks_store() - sysfs access to bad-blocks list
893 * @bb: the badblocks structure that holds all badblock information
894 * @page: buffer received from sysfs
895 * @len: length of data received from sysfs
896 * @unack: weather to show unacknowledged badblocks
897 *
898 * Return:
899 * Length of the buffer processed or -ve error.
900 */
901ssize_t badblocks_store(struct badblocks *bb, const char *page, size_t len,
902 int unack)
903{
904 unsigned long long sector;
905 int length;
906 char newline;
907
908 switch (sscanf(page, "%llu %d%c", &sector, &length, &newline)) {
909 case 3:
910 if (newline != '\n')
911 return -EINVAL;
df561f66 912 fallthrough;
9e0e252a
VV
913 case 2:
914 if (length <= 0)
915 return -EINVAL;
916 break;
917 default:
918 return -EINVAL;
919 }
920
921 if (badblocks_set(bb, sector, length, !unack))
922 return -ENOSPC;
923 else
924 return len;
925}
926EXPORT_SYMBOL_GPL(badblocks_store);
927
16263ff6
DW
928static int __badblocks_init(struct device *dev, struct badblocks *bb,
929 int enable)
9e0e252a 930{
16263ff6 931 bb->dev = dev;
9e0e252a
VV
932 bb->count = 0;
933 if (enable)
934 bb->shift = 0;
935 else
936 bb->shift = -1;
16263ff6
DW
937 if (dev)
938 bb->page = devm_kzalloc(dev, PAGE_SIZE, GFP_KERNEL);
939 else
940 bb->page = kzalloc(PAGE_SIZE, GFP_KERNEL);
941 if (!bb->page) {
9e0e252a
VV
942 bb->shift = -1;
943 return -ENOMEM;
944 }
945 seqlock_init(&bb->lock);
946
947 return 0;
948}
16263ff6
DW
949
950/**
951 * badblocks_init() - initialize the badblocks structure
952 * @bb: the badblocks structure that holds all badblock information
953 * @enable: weather to enable badblocks accounting
954 *
955 * Return:
956 * 0: success
957 * -ve errno: on error
958 */
959int badblocks_init(struct badblocks *bb, int enable)
960{
961 return __badblocks_init(NULL, bb, enable);
962}
9e0e252a
VV
963EXPORT_SYMBOL_GPL(badblocks_init);
964
16263ff6
DW
965int devm_init_badblocks(struct device *dev, struct badblocks *bb)
966{
967 if (!bb)
968 return -EINVAL;
969 return __badblocks_init(dev, bb, 1);
970}
971EXPORT_SYMBOL_GPL(devm_init_badblocks);
972
9e0e252a 973/**
d3b407fb 974 * badblocks_exit() - free the badblocks structure
9e0e252a
VV
975 * @bb: the badblocks structure that holds all badblock information
976 */
d3b407fb 977void badblocks_exit(struct badblocks *bb)
9e0e252a 978{
20a308f0
DW
979 if (!bb)
980 return;
16263ff6
DW
981 if (bb->dev)
982 devm_kfree(bb->dev, bb->page);
983 else
984 kfree(bb->page);
9e0e252a
VV
985 bb->page = NULL;
986}
d3b407fb 987EXPORT_SYMBOL_GPL(badblocks_exit);