]>
Commit | Line | Data |
---|---|---|
2cb7cef9 BS |
1 | From b24db85c3e8a53ff0d3255b22e8e8b674572bdbc Mon Sep 17 00:00:00 2001 |
2 | From: Hannes Reinecke <hare@suse.de> | |
3 | Date: Thu, 23 Oct 2008 13:53:12 +0200 | |
4 | Subject: [PATCH] Implement block discard | |
5 | ||
6 | Signed-off-by: Jens Axboe <jens.axboe@oracle.com> | |
7 | Signed-off-by: Hannes Reinecke <hare@suse.de> | |
8 | --- | |
9 | block/blk-barrier.c | 69 +++++++++++++++++++++++++++++++++++++++ | |
10 | block/blk-core.c | 34 +++++++++++++------ | |
11 | block/blk-merge.c | 27 +++++++++------ | |
12 | block/blk-settings.c | 17 +++++++++ | |
13 | block/blktrace.c | 29 ++++------------ | |
14 | block/compat_ioctl.c | 1 | |
15 | block/elevator.c | 12 +++++- | |
16 | block/ioctl.c | 76 +++++++++++++++++++++++++++++++++++++++++++ | |
17 | drivers/mtd/ftl.c | 24 +++++++++++++ | |
18 | drivers/mtd/mtd_blkdevs.c | 16 +++++++++ | |
19 | fs/fat/fatent.c | 14 +++++++ | |
20 | include/linux/bio.h | 10 +++-- | |
21 | include/linux/blkdev.h | 22 +++++++++++- | |
22 | include/linux/blktrace_api.h | 6 ++- | |
23 | include/linux/fs.h | 5 ++ | |
24 | include/linux/mtd/blktrans.h | 2 + | |
25 | 16 files changed, 314 insertions(+), 50 deletions(-) | |
26 | ||
27 | --- a/block/blk-barrier.c | |
28 | +++ b/block/blk-barrier.c | |
29 | @@ -315,3 +315,72 @@ int blkdev_issue_flush(struct block_devi | |
30 | return ret; | |
31 | } | |
32 | EXPORT_SYMBOL(blkdev_issue_flush); | |
33 | + | |
34 | +static void blkdev_discard_end_io(struct bio *bio, int err) | |
35 | +{ | |
36 | + if (err) { | |
37 | + if (err == -EOPNOTSUPP) | |
38 | + set_bit(BIO_EOPNOTSUPP, &bio->bi_flags); | |
39 | + clear_bit(BIO_UPTODATE, &bio->bi_flags); | |
40 | + } | |
41 | + | |
42 | + bio_put(bio); | |
43 | +} | |
44 | + | |
45 | +/** | |
46 | + * blkdev_issue_discard - queue a discard | |
47 | + * @bdev: blockdev to issue discard for | |
48 | + * @sector: start sector | |
49 | + * @nr_sects: number of sectors to discard | |
50 | + * | |
51 | + * Description: | |
52 | + * Issue a discard request for the sectors in question. Does not wait. | |
53 | + */ | |
54 | +int blkdev_issue_discard(struct block_device *bdev, sector_t sector, | |
55 | + unsigned nr_sects) | |
56 | +{ | |
57 | + struct request_queue *q; | |
58 | + struct bio *bio; | |
59 | + int ret = 0; | |
60 | + | |
61 | + if (bdev->bd_disk == NULL) | |
62 | + return -ENXIO; | |
63 | + | |
64 | + q = bdev_get_queue(bdev); | |
65 | + if (!q) | |
66 | + return -ENXIO; | |
67 | + | |
68 | + if (!q->prepare_discard_fn) | |
69 | + return -EOPNOTSUPP; | |
70 | + | |
71 | + while (nr_sects && !ret) { | |
72 | + bio = bio_alloc(GFP_KERNEL, 0); | |
73 | + if (!bio) | |
74 | + return -ENOMEM; | |
75 | + | |
76 | + bio->bi_end_io = blkdev_discard_end_io; | |
77 | + bio->bi_bdev = bdev; | |
78 | + | |
79 | + bio->bi_sector = sector; | |
80 | + | |
81 | + if (nr_sects > q->max_hw_sectors) { | |
82 | + bio->bi_size = q->max_hw_sectors << 9; | |
83 | + nr_sects -= q->max_hw_sectors; | |
84 | + sector += q->max_hw_sectors; | |
85 | + } else { | |
86 | + bio->bi_size = nr_sects << 9; | |
87 | + nr_sects = 0; | |
88 | + } | |
89 | + bio_get(bio); | |
90 | + submit_bio(DISCARD_BARRIER, bio); | |
91 | + | |
92 | + /* Check if it failed immediately */ | |
93 | + if (bio_flagged(bio, BIO_EOPNOTSUPP)) | |
94 | + ret = -EOPNOTSUPP; | |
95 | + else if (!bio_flagged(bio, BIO_UPTODATE)) | |
96 | + ret = -EIO; | |
97 | + bio_put(bio); | |
98 | + } | |
99 | + return ret; | |
100 | +} | |
101 | +EXPORT_SYMBOL(blkdev_issue_discard); | |
102 | --- a/block/blk-core.c | |
103 | +++ b/block/blk-core.c | |
104 | @@ -1077,7 +1077,12 @@ void init_request_from_bio(struct reques | |
105 | /* | |
106 | * REQ_BARRIER implies no merging, but lets make it explicit | |
107 | */ | |
108 | - if (unlikely(bio_barrier(bio))) | |
109 | + if (unlikely(bio_discard(bio))) { | |
110 | + req->cmd_flags |= REQ_DISCARD; | |
111 | + if (bio_barrier(bio)) | |
112 | + req->cmd_flags |= REQ_SOFTBARRIER; | |
113 | + req->q->prepare_discard_fn(req->q, req); | |
114 | + } else if (unlikely(bio_barrier(bio))) | |
115 | req->cmd_flags |= (REQ_HARDBARRIER | REQ_NOMERGE); | |
116 | ||
117 | if (bio_sync(bio)) | |
118 | @@ -1095,7 +1100,7 @@ void init_request_from_bio(struct reques | |
119 | static int __make_request(struct request_queue *q, struct bio *bio) | |
120 | { | |
121 | struct request *req; | |
122 | - int el_ret, nr_sectors, barrier, err; | |
123 | + int el_ret, nr_sectors, barrier, discard, err; | |
124 | const unsigned short prio = bio_prio(bio); | |
125 | const int sync = bio_sync(bio); | |
126 | int rw_flags; | |
127 | @@ -1110,7 +1115,14 @@ static int __make_request(struct request | |
128 | blk_queue_bounce(q, &bio); | |
129 | ||
130 | barrier = bio_barrier(bio); | |
131 | - if (unlikely(barrier) && (q->next_ordered == QUEUE_ORDERED_NONE)) { | |
132 | + if (unlikely(barrier) && bio_has_data(bio) && | |
133 | + (q->next_ordered == QUEUE_ORDERED_NONE)) { | |
134 | + err = -EOPNOTSUPP; | |
135 | + goto end_io; | |
136 | + } | |
137 | + | |
138 | + discard = bio_discard(bio); | |
139 | + if (unlikely(discard) && !q->prepare_discard_fn) { | |
140 | err = -EOPNOTSUPP; | |
141 | goto end_io; | |
142 | } | |
143 | @@ -1405,7 +1417,8 @@ end_io: | |
144 | ||
145 | if (bio_check_eod(bio, nr_sectors)) | |
146 | goto end_io; | |
147 | - if (bio_empty_barrier(bio) && !q->prepare_flush_fn) { | |
148 | + if ((bio_empty_barrier(bio) && !q->prepare_flush_fn) || | |
149 | + (bio_discard(bio) && !q->prepare_discard_fn)) { | |
150 | err = -EOPNOTSUPP; | |
151 | goto end_io; | |
152 | } | |
153 | @@ -1487,7 +1500,6 @@ void submit_bio(int rw, struct bio *bio) | |
154 | * go through the normal accounting stuff before submission. | |
155 | */ | |
156 | if (bio_has_data(bio)) { | |
157 | - | |
158 | if (rw & WRITE) { | |
159 | count_vm_events(PGPGOUT, count); | |
160 | } else { | |
161 | @@ -1881,7 +1893,7 @@ static int blk_end_io(struct request *rq | |
162 | struct request_queue *q = rq->q; | |
163 | unsigned long flags = 0UL; | |
164 | ||
165 | - if (bio_has_data(rq->bio)) { | |
166 | + if (bio_has_data(rq->bio) || blk_discard_rq(rq)) { | |
167 | if (__end_that_request_first(rq, error, nr_bytes)) | |
168 | return 1; | |
169 | ||
170 | @@ -1939,7 +1951,7 @@ EXPORT_SYMBOL_GPL(blk_end_request); | |
171 | **/ | |
172 | int __blk_end_request(struct request *rq, int error, unsigned int nr_bytes) | |
173 | { | |
174 | - if (bio_has_data(rq->bio) && | |
175 | + if ((bio_has_data(rq->bio) || blk_discard_rq(rq)) && | |
176 | __end_that_request_first(rq, error, nr_bytes)) | |
177 | return 1; | |
178 | ||
179 | @@ -2012,12 +2024,14 @@ void blk_rq_bio_prep(struct request_queu | |
180 | we want BIO_RW_AHEAD (bit 1) to imply REQ_FAILFAST (bit 1). */ | |
181 | rq->cmd_flags |= (bio->bi_rw & 3); | |
182 | ||
183 | - rq->nr_phys_segments = bio_phys_segments(q, bio); | |
184 | - rq->nr_hw_segments = bio_hw_segments(q, bio); | |
185 | + if (bio_has_data(bio)) { | |
186 | + rq->nr_phys_segments = bio_phys_segments(q, bio); | |
187 | + rq->nr_hw_segments = bio_hw_segments(q, bio); | |
188 | + rq->buffer = bio_data(bio); | |
189 | + } | |
190 | rq->current_nr_sectors = bio_cur_sectors(bio); | |
191 | rq->hard_cur_sectors = rq->current_nr_sectors; | |
192 | rq->hard_nr_sectors = rq->nr_sectors = bio_sectors(bio); | |
193 | - rq->buffer = bio_data(bio); | |
194 | rq->data_len = bio->bi_size; | |
195 | ||
196 | rq->bio = rq->biotail = bio; | |
197 | --- a/block/blk-merge.c | |
198 | +++ b/block/blk-merge.c | |
199 | @@ -11,7 +11,7 @@ | |
200 | ||
201 | void blk_recalc_rq_sectors(struct request *rq, int nsect) | |
202 | { | |
203 | - if (blk_fs_request(rq)) { | |
204 | + if (blk_fs_request(rq) || blk_discard_rq(rq)) { | |
205 | rq->hard_sector += nsect; | |
206 | rq->hard_nr_sectors -= nsect; | |
207 | ||
208 | @@ -138,14 +138,18 @@ static int blk_phys_contig_segment(struc | |
209 | if (!test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags)) | |
210 | return 0; | |
211 | ||
212 | - if (!BIOVEC_PHYS_MERGEABLE(__BVEC_END(bio), __BVEC_START(nxt))) | |
213 | - return 0; | |
214 | if (bio->bi_seg_back_size + nxt->bi_seg_front_size > | |
215 | q->max_segment_size) | |
216 | return 0; | |
217 | ||
218 | + if (!bio_has_data(bio)) | |
219 | + return 1; | |
220 | + | |
221 | + if (!BIOVEC_PHYS_MERGEABLE(__BVEC_END(bio), __BVEC_START(nxt))) | |
222 | + return 0; | |
223 | + | |
224 | /* | |
225 | - * bio and nxt are contigous in memory, check if the queue allows | |
226 | + * bio and nxt are contiguous in memory, check if the queue allows | |
227 | * these two to be merged into one | |
228 | */ | |
229 | if (BIO_SEG_BOUNDARY(q, bio, nxt)) | |
230 | @@ -161,8 +165,9 @@ static int blk_hw_contig_segment(struct | |
231 | blk_recount_segments(q, bio); | |
232 | if (!bio_flagged(nxt, BIO_SEG_VALID)) | |
233 | blk_recount_segments(q, nxt); | |
234 | - if (!BIOVEC_VIRT_MERGEABLE(__BVEC_END(bio), __BVEC_START(nxt)) || | |
235 | - BIOVEC_VIRT_OVERSIZE(bio->bi_hw_back_size + nxt->bi_hw_front_size)) | |
236 | + if (bio_has_data(bio) && | |
237 | + (!BIOVEC_VIRT_MERGEABLE(__BVEC_END(bio), __BVEC_START(nxt)) || | |
238 | + BIOVEC_VIRT_OVERSIZE(bio->bi_hw_back_size + nxt->bi_hw_front_size))) | |
239 | return 0; | |
240 | if (bio->bi_hw_back_size + nxt->bi_hw_front_size > q->max_segment_size) | |
241 | return 0; | |
242 | @@ -325,8 +330,9 @@ int ll_back_merge_fn(struct request_queu | |
243 | if (!bio_flagged(bio, BIO_SEG_VALID)) | |
244 | blk_recount_segments(q, bio); | |
245 | len = req->biotail->bi_hw_back_size + bio->bi_hw_front_size; | |
246 | - if (BIOVEC_VIRT_MERGEABLE(__BVEC_END(req->biotail), __BVEC_START(bio)) | |
247 | - && !BIOVEC_VIRT_OVERSIZE(len)) { | |
248 | + if (!bio_has_data(bio) || | |
249 | + (BIOVEC_VIRT_MERGEABLE(__BVEC_END(req->biotail), __BVEC_START(bio)) | |
250 | + && !BIOVEC_VIRT_OVERSIZE(len))) { | |
251 | int mergeable = ll_new_mergeable(q, req, bio); | |
252 | ||
253 | if (mergeable) { | |
254 | @@ -364,8 +370,9 @@ int ll_front_merge_fn(struct request_que | |
255 | blk_recount_segments(q, bio); | |
256 | if (!bio_flagged(req->bio, BIO_SEG_VALID)) | |
257 | blk_recount_segments(q, req->bio); | |
258 | - if (BIOVEC_VIRT_MERGEABLE(__BVEC_END(bio), __BVEC_START(req->bio)) && | |
259 | - !BIOVEC_VIRT_OVERSIZE(len)) { | |
260 | + if (!bio_has_data(bio) || | |
261 | + (BIOVEC_VIRT_MERGEABLE(__BVEC_END(bio), __BVEC_START(req->bio)) && | |
262 | + !BIOVEC_VIRT_OVERSIZE(len))) { | |
263 | int mergeable = ll_new_mergeable(q, req, bio); | |
264 | ||
265 | if (mergeable) { | |
266 | --- a/block/blk-settings.c | |
267 | +++ b/block/blk-settings.c | |
268 | @@ -33,6 +33,23 @@ void blk_queue_prep_rq(struct request_qu | |
269 | EXPORT_SYMBOL(blk_queue_prep_rq); | |
270 | ||
271 | /** | |
272 | + * blk_queue_set_discard - set a discard_sectors function for queue | |
273 | + * @q: queue | |
274 | + * @dfn: prepare_discard function | |
275 | + * | |
276 | + * It's possible for a queue to register a discard callback which is used | |
277 | + * to transform a discard request into the appropriate type for the | |
278 | + * hardware. If none is registered, then discard requests are failed | |
279 | + * with %EOPNOTSUPP. | |
280 | + * | |
281 | + */ | |
282 | +void blk_queue_set_discard(struct request_queue *q, prepare_discard_fn *dfn) | |
283 | +{ | |
284 | + q->prepare_discard_fn = dfn; | |
285 | +} | |
286 | +EXPORT_SYMBOL(blk_queue_set_discard); | |
287 | + | |
288 | +/** | |
289 | * blk_queue_merge_bvec - set a merge_bvec function for queue | |
290 | * @q: queue | |
291 | * @mbfn: merge_bvec_fn | |
292 | --- a/block/blktrace.c | |
293 | +++ b/block/blktrace.c | |
294 | @@ -111,23 +111,9 @@ static int act_log_check(struct blk_trac | |
295 | */ | |
296 | static u32 ddir_act[2] __read_mostly = { BLK_TC_ACT(BLK_TC_READ), BLK_TC_ACT(BLK_TC_WRITE) }; | |
297 | ||
298 | -/* | |
299 | - * Bio action bits of interest | |
300 | - */ | |
301 | -static u32 bio_act[9] __read_mostly = { 0, BLK_TC_ACT(BLK_TC_BARRIER), BLK_TC_ACT(BLK_TC_SYNC), 0, BLK_TC_ACT(BLK_TC_AHEAD), 0, 0, 0, BLK_TC_ACT(BLK_TC_META) }; | |
302 | - | |
303 | -/* | |
304 | - * More could be added as needed, taking care to increment the decrementer | |
305 | - * to get correct indexing | |
306 | - */ | |
307 | -#define trace_barrier_bit(rw) \ | |
308 | - (((rw) & (1 << BIO_RW_BARRIER)) >> (BIO_RW_BARRIER - 0)) | |
309 | -#define trace_sync_bit(rw) \ | |
310 | - (((rw) & (1 << BIO_RW_SYNC)) >> (BIO_RW_SYNC - 1)) | |
311 | -#define trace_ahead_bit(rw) \ | |
312 | - (((rw) & (1 << BIO_RW_AHEAD)) << (2 - BIO_RW_AHEAD)) | |
313 | -#define trace_meta_bit(rw) \ | |
314 | - (((rw) & (1 << BIO_RW_META)) >> (BIO_RW_META - 3)) | |
315 | +/* The ilog2() calls fall out because they're constant */ | |
316 | +#define MASK_TC_BIT(rw, __name) ( (rw & (1 << BIO_RW_ ## __name)) << \ | |
317 | + (ilog2(BLK_TC_ ## __name) + BLK_TC_SHIFT - BIO_RW_ ## __name) ) | |
318 | ||
319 | /* | |
320 | * The worker for the various blk_add_trace*() types. Fills out a | |
321 | @@ -147,10 +133,11 @@ void __blk_add_trace(struct blk_trace *b | |
322 | return; | |
323 | ||
324 | what |= ddir_act[rw & WRITE]; | |
325 | - what |= bio_act[trace_barrier_bit(rw)]; | |
326 | - what |= bio_act[trace_sync_bit(rw)]; | |
327 | - what |= bio_act[trace_ahead_bit(rw)]; | |
328 | - what |= bio_act[trace_meta_bit(rw)]; | |
329 | + what |= MASK_TC_BIT(rw, BARRIER); | |
330 | + what |= MASK_TC_BIT(rw, SYNC); | |
331 | + what |= MASK_TC_BIT(rw, AHEAD); | |
332 | + what |= MASK_TC_BIT(rw, META); | |
333 | + what |= MASK_TC_BIT(rw, DISCARD); | |
334 | ||
335 | pid = tsk->pid; | |
336 | if (unlikely(act_log_check(bt, what, sector, pid))) | |
337 | --- a/block/compat_ioctl.c | |
338 | +++ b/block/compat_ioctl.c | |
339 | @@ -788,6 +788,7 @@ long compat_blkdev_ioctl(struct file *fi | |
340 | return compat_hdio_getgeo(disk, bdev, compat_ptr(arg)); | |
341 | case BLKFLSBUF: | |
342 | case BLKROSET: | |
343 | + case BLKDISCARD: | |
344 | /* | |
345 | * the ones below are implemented in blkdev_locked_ioctl, | |
346 | * but we call blkdev_ioctl, which gets the lock for us | |
347 | --- a/block/elevator.c | |
348 | +++ b/block/elevator.c | |
349 | @@ -75,6 +75,12 @@ int elv_rq_merge_ok(struct request *rq, | |
350 | return 0; | |
351 | ||
352 | /* | |
353 | + * Don't merge file system requests and discard requests | |
354 | + */ | |
355 | + if (bio_discard(bio) != bio_discard(rq->bio)) | |
356 | + return 0; | |
357 | + | |
358 | + /* | |
359 | * different data direction or already started, don't merge | |
360 | */ | |
361 | if (bio_data_dir(bio) != rq_data_dir(rq)) | |
362 | @@ -438,6 +444,8 @@ void elv_dispatch_sort(struct request_qu | |
363 | list_for_each_prev(entry, &q->queue_head) { | |
364 | struct request *pos = list_entry_rq(entry); | |
365 | ||
366 | + if (blk_discard_rq(rq) != blk_discard_rq(pos)) | |
367 | + break; | |
368 | if (rq_data_dir(rq) != rq_data_dir(pos)) | |
369 | break; | |
370 | if (pos->cmd_flags & stop_flags) | |
371 | @@ -607,7 +615,7 @@ void elv_insert(struct request_queue *q, | |
372 | break; | |
373 | ||
374 | case ELEVATOR_INSERT_SORT: | |
375 | - BUG_ON(!blk_fs_request(rq)); | |
376 | + BUG_ON(!blk_fs_request(rq) && !blk_discard_rq(rq)); | |
377 | rq->cmd_flags |= REQ_SORTED; | |
378 | q->nr_sorted++; | |
379 | if (rq_mergeable(rq)) { | |
380 | @@ -692,7 +700,7 @@ void __elv_add_request(struct request_qu | |
381 | * this request is scheduling boundary, update | |
382 | * end_sector | |
383 | */ | |
384 | - if (blk_fs_request(rq)) { | |
385 | + if (blk_fs_request(rq) || blk_discard_rq(rq)) { | |
386 | q->end_sector = rq_end_sector(rq); | |
387 | q->boundary_rq = rq; | |
388 | } | |
389 | --- a/block/ioctl.c | |
390 | +++ b/block/ioctl.c | |
391 | @@ -111,6 +111,69 @@ static int blkdev_reread_part(struct blo | |
392 | return res; | |
393 | } | |
394 | ||
395 | +static void blk_ioc_discard_endio(struct bio *bio, int err) | |
396 | +{ | |
397 | + if (err) { | |
398 | + if (err == -EOPNOTSUPP) | |
399 | + set_bit(BIO_EOPNOTSUPP, &bio->bi_flags); | |
400 | + clear_bit(BIO_UPTODATE, &bio->bi_flags); | |
401 | + } | |
402 | + complete(bio->bi_private); | |
403 | +} | |
404 | + | |
405 | +static int blk_ioctl_discard(struct block_device *bdev, uint64_t start, | |
406 | + uint64_t len) | |
407 | +{ | |
408 | + struct request_queue *q = bdev_get_queue(bdev); | |
409 | + int ret = 0; | |
410 | + | |
411 | + if (start & 511) | |
412 | + return -EINVAL; | |
413 | + if (len & 511) | |
414 | + return -EINVAL; | |
415 | + start >>= 9; | |
416 | + len >>= 9; | |
417 | + | |
418 | + if (start + len > (bdev->bd_inode->i_size >> 9)) | |
419 | + return -EINVAL; | |
420 | + | |
421 | + if (!q->prepare_discard_fn) | |
422 | + return -EOPNOTSUPP; | |
423 | + | |
424 | + while (len && !ret) { | |
425 | + DECLARE_COMPLETION_ONSTACK(wait); | |
426 | + struct bio *bio; | |
427 | + | |
428 | + bio = bio_alloc(GFP_KERNEL, 0); | |
429 | + if (!bio) | |
430 | + return -ENOMEM; | |
431 | + | |
432 | + bio->bi_end_io = blk_ioc_discard_endio; | |
433 | + bio->bi_bdev = bdev; | |
434 | + bio->bi_private = &wait; | |
435 | + bio->bi_sector = start; | |
436 | + | |
437 | + if (len > q->max_hw_sectors) { | |
438 | + bio->bi_size = q->max_hw_sectors << 9; | |
439 | + len -= q->max_hw_sectors; | |
440 | + start += q->max_hw_sectors; | |
441 | + } else { | |
442 | + bio->bi_size = len << 9; | |
443 | + len = 0; | |
444 | + } | |
445 | + submit_bio(DISCARD_NOBARRIER, bio); | |
446 | + | |
447 | + wait_for_completion(&wait); | |
448 | + | |
449 | + if (bio_flagged(bio, BIO_EOPNOTSUPP)) | |
450 | + ret = -EOPNOTSUPP; | |
451 | + else if (!bio_flagged(bio, BIO_UPTODATE)) | |
452 | + ret = -EIO; | |
453 | + bio_put(bio); | |
454 | + } | |
455 | + return ret; | |
456 | +} | |
457 | + | |
458 | static int put_ushort(unsigned long arg, unsigned short val) | |
459 | { | |
460 | return put_user(val, (unsigned short __user *)arg); | |
461 | @@ -258,6 +321,19 @@ int blkdev_ioctl(struct inode *inode, st | |
462 | set_device_ro(bdev, n); | |
463 | unlock_kernel(); | |
464 | return 0; | |
465 | + | |
466 | + case BLKDISCARD: { | |
467 | + uint64_t range[2]; | |
468 | + | |
469 | + if (!(file->f_mode & FMODE_WRITE)) | |
470 | + return -EBADF; | |
471 | + | |
472 | + if (copy_from_user(range, (void __user *)arg, sizeof(range))) | |
473 | + return -EFAULT; | |
474 | + | |
475 | + return blk_ioctl_discard(bdev, range[0], range[1]); | |
476 | + } | |
477 | + | |
478 | case HDIO_GETGEO: { | |
479 | struct hd_geometry geo; | |
480 | ||
481 | --- a/drivers/mtd/ftl.c | |
482 | +++ b/drivers/mtd/ftl.c | |
483 | @@ -1005,6 +1005,29 @@ static int ftl_writesect(struct mtd_blkt | |
484 | return ftl_write((void *)dev, buf, block, 1); | |
485 | } | |
486 | ||
487 | +static int ftl_discardsect(struct mtd_blktrans_dev *dev, | |
488 | + unsigned long sector, unsigned nr_sects) | |
489 | +{ | |
490 | + partition_t *part = (void *)dev; | |
491 | + uint32_t bsize = 1 << part->header.EraseUnitSize; | |
492 | + | |
493 | + DEBUG(1, "FTL erase sector %ld for %d sectors\n", | |
494 | + sector, nr_sects); | |
495 | + | |
496 | + while (nr_sects) { | |
497 | + uint32_t old_addr = part->VirtualBlockMap[sector]; | |
498 | + if (old_addr != 0xffffffff) { | |
499 | + part->VirtualBlockMap[sector] = 0xffffffff; | |
500 | + part->EUNInfo[old_addr/bsize].Deleted++; | |
501 | + if (set_bam_entry(part, old_addr, 0)) | |
502 | + return -EIO; | |
503 | + } | |
504 | + nr_sects--; | |
505 | + sector++; | |
506 | + } | |
507 | + | |
508 | + return 0; | |
509 | +} | |
510 | /*====================================================================*/ | |
511 | ||
512 | static void ftl_freepart(partition_t *part) | |
513 | @@ -1069,6 +1092,7 @@ static struct mtd_blktrans_ops ftl_tr = | |
514 | .blksize = SECTOR_SIZE, | |
515 | .readsect = ftl_readsect, | |
516 | .writesect = ftl_writesect, | |
517 | + .discard = ftl_discardsect, | |
518 | .getgeo = ftl_getgeo, | |
519 | .add_mtd = ftl_add_mtd, | |
520 | .remove_dev = ftl_remove_dev, | |
521 | --- a/drivers/mtd/mtd_blkdevs.c | |
522 | +++ b/drivers/mtd/mtd_blkdevs.c | |
523 | @@ -32,6 +32,14 @@ struct mtd_blkcore_priv { | |
524 | spinlock_t queue_lock; | |
525 | }; | |
526 | ||
527 | +static int blktrans_discard_request(struct request_queue *q, | |
528 | + struct request *req) | |
529 | +{ | |
530 | + req->cmd_type = REQ_TYPE_LINUX_BLOCK; | |
531 | + req->cmd[0] = REQ_LB_OP_DISCARD; | |
532 | + return 0; | |
533 | +} | |
534 | + | |
535 | static int do_blktrans_request(struct mtd_blktrans_ops *tr, | |
536 | struct mtd_blktrans_dev *dev, | |
537 | struct request *req) | |
538 | @@ -44,6 +52,10 @@ static int do_blktrans_request(struct mt | |
539 | ||
540 | buf = req->buffer; | |
541 | ||
542 | + if (req->cmd_type == REQ_TYPE_LINUX_BLOCK && | |
543 | + req->cmd[0] == REQ_LB_OP_DISCARD) | |
544 | + return !tr->discard(dev, block, nsect); | |
545 | + | |
546 | if (!blk_fs_request(req)) | |
547 | return 0; | |
548 | ||
549 | @@ -367,6 +379,10 @@ int register_mtd_blktrans(struct mtd_blk | |
550 | ||
551 | tr->blkcore_priv->rq->queuedata = tr; | |
552 | blk_queue_hardsect_size(tr->blkcore_priv->rq, tr->blksize); | |
553 | + if (tr->discard) | |
554 | + blk_queue_set_discard(tr->blkcore_priv->rq, | |
555 | + blktrans_discard_request); | |
556 | + | |
557 | tr->blkshift = ffs(tr->blksize) - 1; | |
558 | ||
559 | tr->blkcore_priv->thread = kthread_run(mtd_blktrans_thread, tr, | |
560 | --- a/fs/fat/fatent.c | |
561 | +++ b/fs/fat/fatent.c | |
562 | @@ -6,6 +6,7 @@ | |
563 | #include <linux/module.h> | |
564 | #include <linux/fs.h> | |
565 | #include <linux/msdos_fs.h> | |
566 | +#include <linux/blkdev.h> | |
567 | ||
568 | struct fatent_operations { | |
569 | void (*ent_blocknr)(struct super_block *, int, int *, sector_t *); | |
570 | @@ -535,6 +536,7 @@ int fat_free_clusters(struct inode *inod | |
571 | struct fat_entry fatent; | |
572 | struct buffer_head *bhs[MAX_BUF_PER_PAGE]; | |
573 | int i, err, nr_bhs; | |
574 | + int first_cl = cluster; | |
575 | ||
576 | nr_bhs = 0; | |
577 | fatent_init(&fatent); | |
578 | @@ -551,6 +553,18 @@ int fat_free_clusters(struct inode *inod | |
579 | goto error; | |
580 | } | |
581 | ||
582 | + /* | |
583 | + * Issue discard for the sectors we no longer care about, | |
584 | + * batching contiguous clusters into one request | |
585 | + */ | |
586 | + if (cluster != fatent.entry + 1) { | |
587 | + int nr_clus = fatent.entry - first_cl + 1; | |
588 | + | |
589 | + sb_issue_discard(sb, fat_clus_to_blknr(sbi, first_cl), | |
590 | + nr_clus * sbi->sec_per_clus); | |
591 | + first_cl = cluster; | |
592 | + } | |
593 | + | |
594 | ops->ent_put(&fatent, FAT_ENT_FREE); | |
595 | if (sbi->free_clusters != -1) { | |
596 | sbi->free_clusters++; | |
597 | --- a/include/linux/bio.h | |
598 | +++ b/include/linux/bio.h | |
599 | @@ -156,6 +156,8 @@ struct bio { | |
600 | * bit 2 -- barrier | |
601 | * bit 3 -- fail fast, don't want low level driver retries | |
602 | * bit 4 -- synchronous I/O hint: the block layer will unplug immediately | |
603 | + * bit 5 -- metadata request | |
604 | + * bit 6 -- discard sectors | |
605 | */ | |
606 | #define BIO_RW 0 /* Must match RW in req flags (blkdev.h) */ | |
607 | #define BIO_RW_AHEAD 1 /* Must match FAILFAST in req flags */ | |
608 | @@ -163,6 +165,7 @@ struct bio { | |
609 | #define BIO_RW_FAILFAST 3 | |
610 | #define BIO_RW_SYNC 4 | |
611 | #define BIO_RW_META 5 | |
612 | +#define BIO_RW_DISCARD 6 | |
613 | ||
614 | /* | |
615 | * upper 16 bits of bi_rw define the io priority of this bio | |
616 | @@ -192,14 +195,15 @@ struct bio { | |
617 | #define bio_failfast(bio) ((bio)->bi_rw & (1 << BIO_RW_FAILFAST)) | |
618 | #define bio_rw_ahead(bio) ((bio)->bi_rw & (1 << BIO_RW_AHEAD)) | |
619 | #define bio_rw_meta(bio) ((bio)->bi_rw & (1 << BIO_RW_META)) | |
620 | -#define bio_empty_barrier(bio) (bio_barrier(bio) && !bio_has_data(bio)) | |
621 | +#define bio_discard(bio) ((bio)->bi_rw & (1 << BIO_RW_DISCARD)) | |
622 | +#define bio_empty_barrier(bio) (bio_barrier(bio) && !bio_has_data(bio) && !bio_discard(bio)) | |
623 | ||
624 | static inline unsigned int bio_cur_sectors(struct bio *bio) | |
625 | { | |
626 | if (bio->bi_vcnt) | |
627 | return bio_iovec(bio)->bv_len >> 9; | |
628 | - | |
629 | - return 0; | |
630 | + else /* dataless requests such as discard */ | |
631 | + return bio->bi_size >> 9; | |
632 | } | |
633 | ||
634 | static inline void *bio_data(struct bio *bio) | |
635 | --- a/include/linux/blkdev.h | |
636 | +++ b/include/linux/blkdev.h | |
637 | @@ -81,6 +81,7 @@ enum { | |
638 | */ | |
639 | REQ_LB_OP_EJECT = 0x40, /* eject request */ | |
640 | REQ_LB_OP_FLUSH = 0x41, /* flush device */ | |
641 | + REQ_LB_OP_DISCARD = 0x42, /* discard sectors */ | |
642 | }; | |
643 | ||
644 | /* | |
645 | @@ -89,6 +90,7 @@ enum { | |
646 | enum rq_flag_bits { | |
647 | __REQ_RW, /* not set, read. set, write */ | |
648 | __REQ_FAILFAST, /* no low level driver retries */ | |
649 | + __REQ_DISCARD, /* request to discard sectors */ | |
650 | __REQ_SORTED, /* elevator knows about this request */ | |
651 | __REQ_SOFTBARRIER, /* may not be passed by ioscheduler */ | |
652 | __REQ_HARDBARRIER, /* may not be passed by drive either */ | |
653 | @@ -111,6 +113,7 @@ enum rq_flag_bits { | |
654 | }; | |
655 | ||
656 | #define REQ_RW (1 << __REQ_RW) | |
657 | +#define REQ_DISCARD (1 << __REQ_DISCARD) | |
658 | #define REQ_FAILFAST (1 << __REQ_FAILFAST) | |
659 | #define REQ_SORTED (1 << __REQ_SORTED) | |
660 | #define REQ_SOFTBARRIER (1 << __REQ_SOFTBARRIER) | |
661 | @@ -252,6 +255,7 @@ typedef void (request_fn_proc) (struct r | |
662 | typedef int (make_request_fn) (struct request_queue *q, struct bio *bio); | |
663 | typedef int (prep_rq_fn) (struct request_queue *, struct request *); | |
664 | typedef void (unplug_fn) (struct request_queue *); | |
665 | +typedef int (prepare_discard_fn) (struct request_queue *, struct request *); | |
666 | ||
667 | struct bio_vec; | |
668 | struct bvec_merge_data { | |
669 | @@ -307,6 +311,7 @@ struct request_queue | |
670 | make_request_fn *make_request_fn; | |
671 | prep_rq_fn *prep_rq_fn; | |
672 | unplug_fn *unplug_fn; | |
673 | + prepare_discard_fn *prepare_discard_fn; | |
674 | merge_bvec_fn *merge_bvec_fn; | |
675 | prepare_flush_fn *prepare_flush_fn; | |
676 | softirq_done_fn *softirq_done_fn; | |
677 | @@ -536,7 +541,7 @@ enum { | |
678 | #define blk_noretry_request(rq) ((rq)->cmd_flags & REQ_FAILFAST) | |
679 | #define blk_rq_started(rq) ((rq)->cmd_flags & REQ_STARTED) | |
680 | ||
681 | -#define blk_account_rq(rq) (blk_rq_started(rq) && blk_fs_request(rq)) | |
682 | +#define blk_account_rq(rq) (blk_rq_started(rq) && (blk_fs_request(rq) || blk_discard_rq(rq))) | |
683 | ||
684 | #define blk_pm_suspend_request(rq) ((rq)->cmd_type == REQ_TYPE_PM_SUSPEND) | |
685 | #define blk_pm_resume_request(rq) ((rq)->cmd_type == REQ_TYPE_PM_RESUME) | |
686 | @@ -546,6 +551,7 @@ enum { | |
687 | #define blk_sorted_rq(rq) ((rq)->cmd_flags & REQ_SORTED) | |
688 | #define blk_barrier_rq(rq) ((rq)->cmd_flags & REQ_HARDBARRIER) | |
689 | #define blk_fua_rq(rq) ((rq)->cmd_flags & REQ_FUA) | |
690 | +#define blk_discard_rq(rq) ((rq)->cmd_flags & REQ_DISCARD) | |
691 | #define blk_bidi_rq(rq) ((rq)->next_rq != NULL) | |
692 | #define blk_empty_barrier(rq) (blk_barrier_rq(rq) && blk_fs_request(rq) && !(rq)->hard_nr_sectors) | |
693 | /* rq->queuelist of dequeued request must be list_empty() */ | |
694 | @@ -592,7 +598,8 @@ static inline void blk_clear_queue_full( | |
695 | #define RQ_NOMERGE_FLAGS \ | |
696 | (REQ_NOMERGE | REQ_STARTED | REQ_HARDBARRIER | REQ_SOFTBARRIER) | |
697 | #define rq_mergeable(rq) \ | |
698 | - (!((rq)->cmd_flags & RQ_NOMERGE_FLAGS) && blk_fs_request((rq))) | |
699 | + (!((rq)->cmd_flags & RQ_NOMERGE_FLAGS) && \ | |
700 | + (blk_discard_rq(rq) || blk_fs_request((rq)))) | |
701 | ||
702 | /* | |
703 | * q->prep_rq_fn return values | |
704 | @@ -797,6 +804,7 @@ extern void blk_queue_merge_bvec(struct | |
705 | extern void blk_queue_dma_alignment(struct request_queue *, int); | |
706 | extern void blk_queue_update_dma_alignment(struct request_queue *, int); | |
707 | extern void blk_queue_softirq_done(struct request_queue *, softirq_done_fn *); | |
708 | +extern void blk_queue_set_discard(struct request_queue *, prepare_discard_fn *); | |
709 | extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev); | |
710 | extern int blk_queue_ordered(struct request_queue *, unsigned, prepare_flush_fn *); | |
711 | extern int blk_do_ordered(struct request_queue *, struct request **); | |
712 | @@ -838,6 +846,16 @@ static inline struct request *blk_map_qu | |
713 | } | |
714 | ||
715 | extern int blkdev_issue_flush(struct block_device *, sector_t *); | |
716 | +extern int blkdev_issue_discard(struct block_device *, sector_t sector, | |
717 | + unsigned nr_sects); | |
718 | + | |
719 | +static inline int sb_issue_discard(struct super_block *sb, | |
720 | + sector_t block, unsigned nr_blocks) | |
721 | +{ | |
722 | + block <<= (sb->s_blocksize_bits - 9); | |
723 | + nr_blocks <<= (sb->s_blocksize_bits - 9); | |
724 | + return blkdev_issue_discard(sb->s_bdev, block, nr_blocks); | |
725 | +} | |
726 | ||
727 | /* | |
728 | * command filter functions | |
729 | --- a/include/linux/blktrace_api.h | |
730 | +++ b/include/linux/blktrace_api.h | |
731 | @@ -23,7 +23,8 @@ enum blktrace_cat { | |
732 | BLK_TC_NOTIFY = 1 << 10, /* special message */ | |
733 | BLK_TC_AHEAD = 1 << 11, /* readahead */ | |
734 | BLK_TC_META = 1 << 12, /* metadata */ | |
735 | - BLK_TC_DRV_DATA = 1 << 13, /* binary per-driver data */ | |
736 | + BLK_TC_DISCARD = 1 << 13, /* discard requests */ | |
737 | + BLK_TC_DRV_DATA = 1 << 14, /* binary per-drivers data */ | |
738 | ||
739 | BLK_TC_END = 1 << 15, /* only 16-bits, reminder */ | |
740 | }; | |
741 | @@ -204,6 +205,9 @@ static inline void blk_add_trace_rq(stru | |
742 | if (likely(!bt)) | |
743 | return; | |
744 | ||
745 | + if (blk_discard_rq(rq)) | |
746 | + rw |= (1 << BIO_RW_DISCARD); | |
747 | + | |
748 | if (blk_pc_request(rq)) { | |
749 | what |= BLK_TC_ACT(BLK_TC_PC); | |
750 | __blk_add_trace(bt, 0, rq->data_len, rw, what, rq->errors, sizeof(rq->cmd), rq->cmd); | |
751 | --- a/include/linux/fs.h | |
752 | +++ b/include/linux/fs.h | |
753 | @@ -91,7 +91,9 @@ extern int dir_notify_enable; | |
754 | #define READ_META (READ | (1 << BIO_RW_META)) | |
755 | #define WRITE_SYNC (WRITE | (1 << BIO_RW_SYNC)) | |
756 | #define SWRITE_SYNC (SWRITE | (1 << BIO_RW_SYNC)) | |
757 | -#define WRITE_BARRIER ((1 << BIO_RW) | (1 << BIO_RW_BARRIER)) | |
758 | +#define WRITE_BARRIER (WRITE | (1 << BIO_RW_BARRIER)) | |
759 | +#define DISCARD_NOBARRIER (1 << BIO_RW_DISCARD) | |
760 | +#define DISCARD_BARRIER ((1 << BIO_RW_DISCARD) | (1 << BIO_RW_BARRIER)) | |
761 | ||
762 | #define SEL_IN 1 | |
763 | #define SEL_OUT 2 | |
764 | @@ -229,6 +231,7 @@ extern int dir_notify_enable; | |
765 | #define BLKTRACESTART _IO(0x12,116) | |
766 | #define BLKTRACESTOP _IO(0x12,117) | |
767 | #define BLKTRACETEARDOWN _IO(0x12,118) | |
768 | +#define BLKDISCARD _IO(0x12,119) | |
769 | ||
770 | #define BMAP_IOCTL 1 /* obsolete - kept for compatibility */ | |
771 | #define FIBMAP _IO(0x00,1) /* bmap access */ | |
772 | --- a/include/linux/mtd/blktrans.h | |
773 | +++ b/include/linux/mtd/blktrans.h | |
774 | @@ -41,6 +41,8 @@ struct mtd_blktrans_ops { | |
775 | unsigned long block, char *buffer); | |
776 | int (*writesect)(struct mtd_blktrans_dev *dev, | |
777 | unsigned long block, char *buffer); | |
778 | + int (*discard)(struct mtd_blktrans_dev *dev, | |
779 | + unsigned long block, unsigned nr_blocks); | |
780 | ||
781 | /* Block layer ioctls */ | |
782 | int (*getgeo)(struct mtd_blktrans_dev *dev, struct hd_geometry *geo); |