From b24db85c3e8a53ff0d3255b22e8e8b674572bdbc Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Thu, 23 Oct 2008 13:53:12 +0200 Subject: [PATCH] Implement block discard Signed-off-by: Jens Axboe Signed-off-by: Hannes Reinecke --- block/blk-barrier.c | 69 +++++++++++++++++++++++++++++++++++++++ block/blk-core.c | 34 +++++++++++++------ block/blk-merge.c | 27 +++++++++------ block/blk-settings.c | 17 +++++++++ block/blktrace.c | 29 ++++------------ block/compat_ioctl.c | 1 block/elevator.c | 12 +++++- block/ioctl.c | 76 +++++++++++++++++++++++++++++++++++++++++++ drivers/mtd/ftl.c | 24 +++++++++++++ drivers/mtd/mtd_blkdevs.c | 16 +++++++++ fs/fat/fatent.c | 14 +++++++ include/linux/bio.h | 10 +++-- include/linux/blkdev.h | 22 +++++++++++- include/linux/blktrace_api.h | 6 ++- include/linux/fs.h | 5 ++ include/linux/mtd/blktrans.h | 2 + 16 files changed, 314 insertions(+), 50 deletions(-) --- a/block/blk-barrier.c +++ b/block/blk-barrier.c @@ -315,3 +315,72 @@ int blkdev_issue_flush(struct block_devi return ret; } EXPORT_SYMBOL(blkdev_issue_flush); + +static void blkdev_discard_end_io(struct bio *bio, int err) +{ + if (err) { + if (err == -EOPNOTSUPP) + set_bit(BIO_EOPNOTSUPP, &bio->bi_flags); + clear_bit(BIO_UPTODATE, &bio->bi_flags); + } + + bio_put(bio); +} + +/** + * blkdev_issue_discard - queue a discard + * @bdev: blockdev to issue discard for + * @sector: start sector + * @nr_sects: number of sectors to discard + * + * Description: + * Issue a discard request for the sectors in question. Does not wait. + */ +int blkdev_issue_discard(struct block_device *bdev, sector_t sector, + unsigned nr_sects) +{ + struct request_queue *q; + struct bio *bio; + int ret = 0; + + if (bdev->bd_disk == NULL) + return -ENXIO; + + q = bdev_get_queue(bdev); + if (!q) + return -ENXIO; + + if (!q->prepare_discard_fn) + return -EOPNOTSUPP; + + while (nr_sects && !ret) { + bio = bio_alloc(GFP_KERNEL, 0); + if (!bio) + return -ENOMEM; + + bio->bi_end_io = blkdev_discard_end_io; + bio->bi_bdev = bdev; + + bio->bi_sector = sector; + + if (nr_sects > q->max_hw_sectors) { + bio->bi_size = q->max_hw_sectors << 9; + nr_sects -= q->max_hw_sectors; + sector += q->max_hw_sectors; + } else { + bio->bi_size = nr_sects << 9; + nr_sects = 0; + } + bio_get(bio); + submit_bio(DISCARD_BARRIER, bio); + + /* Check if it failed immediately */ + if (bio_flagged(bio, BIO_EOPNOTSUPP)) + ret = -EOPNOTSUPP; + else if (!bio_flagged(bio, BIO_UPTODATE)) + ret = -EIO; + bio_put(bio); + } + return ret; +} +EXPORT_SYMBOL(blkdev_issue_discard); --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1077,7 +1077,12 @@ void init_request_from_bio(struct reques /* * REQ_BARRIER implies no merging, but lets make it explicit */ - if (unlikely(bio_barrier(bio))) + if (unlikely(bio_discard(bio))) { + req->cmd_flags |= REQ_DISCARD; + if (bio_barrier(bio)) + req->cmd_flags |= REQ_SOFTBARRIER; + req->q->prepare_discard_fn(req->q, req); + } else if (unlikely(bio_barrier(bio))) req->cmd_flags |= (REQ_HARDBARRIER | REQ_NOMERGE); if (bio_sync(bio)) @@ -1095,7 +1100,7 @@ void init_request_from_bio(struct reques static int __make_request(struct request_queue *q, struct bio *bio) { struct request *req; - int el_ret, nr_sectors, barrier, err; + int el_ret, nr_sectors, barrier, discard, err; const unsigned short prio = bio_prio(bio); const int sync = bio_sync(bio); int rw_flags; @@ -1110,7 +1115,14 @@ static int __make_request(struct request blk_queue_bounce(q, &bio); barrier = bio_barrier(bio); - if (unlikely(barrier) && (q->next_ordered == QUEUE_ORDERED_NONE)) { + if (unlikely(barrier) && bio_has_data(bio) && + (q->next_ordered == QUEUE_ORDERED_NONE)) { + err = -EOPNOTSUPP; + goto end_io; + } + + discard = bio_discard(bio); + if (unlikely(discard) && !q->prepare_discard_fn) { err = -EOPNOTSUPP; goto end_io; } @@ -1405,7 +1417,8 @@ end_io: if (bio_check_eod(bio, nr_sectors)) goto end_io; - if (bio_empty_barrier(bio) && !q->prepare_flush_fn) { + if ((bio_empty_barrier(bio) && !q->prepare_flush_fn) || + (bio_discard(bio) && !q->prepare_discard_fn)) { err = -EOPNOTSUPP; goto end_io; } @@ -1487,7 +1500,6 @@ void submit_bio(int rw, struct bio *bio) * go through the normal accounting stuff before submission. */ if (bio_has_data(bio)) { - if (rw & WRITE) { count_vm_events(PGPGOUT, count); } else { @@ -1881,7 +1893,7 @@ static int blk_end_io(struct request *rq struct request_queue *q = rq->q; unsigned long flags = 0UL; - if (bio_has_data(rq->bio)) { + if (bio_has_data(rq->bio) || blk_discard_rq(rq)) { if (__end_that_request_first(rq, error, nr_bytes)) return 1; @@ -1939,7 +1951,7 @@ EXPORT_SYMBOL_GPL(blk_end_request); **/ int __blk_end_request(struct request *rq, int error, unsigned int nr_bytes) { - if (bio_has_data(rq->bio) && + if ((bio_has_data(rq->bio) || blk_discard_rq(rq)) && __end_that_request_first(rq, error, nr_bytes)) return 1; @@ -2012,12 +2024,14 @@ void blk_rq_bio_prep(struct request_queu we want BIO_RW_AHEAD (bit 1) to imply REQ_FAILFAST (bit 1). */ rq->cmd_flags |= (bio->bi_rw & 3); - rq->nr_phys_segments = bio_phys_segments(q, bio); - rq->nr_hw_segments = bio_hw_segments(q, bio); + if (bio_has_data(bio)) { + rq->nr_phys_segments = bio_phys_segments(q, bio); + rq->nr_hw_segments = bio_hw_segments(q, bio); + rq->buffer = bio_data(bio); + } rq->current_nr_sectors = bio_cur_sectors(bio); rq->hard_cur_sectors = rq->current_nr_sectors; rq->hard_nr_sectors = rq->nr_sectors = bio_sectors(bio); - rq->buffer = bio_data(bio); rq->data_len = bio->bi_size; rq->bio = rq->biotail = bio; --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -11,7 +11,7 @@ void blk_recalc_rq_sectors(struct request *rq, int nsect) { - if (blk_fs_request(rq)) { + if (blk_fs_request(rq) || blk_discard_rq(rq)) { rq->hard_sector += nsect; rq->hard_nr_sectors -= nsect; @@ -138,14 +138,18 @@ static int blk_phys_contig_segment(struc if (!test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags)) return 0; - if (!BIOVEC_PHYS_MERGEABLE(__BVEC_END(bio), __BVEC_START(nxt))) - return 0; if (bio->bi_seg_back_size + nxt->bi_seg_front_size > q->max_segment_size) return 0; + if (!bio_has_data(bio)) + return 1; + + if (!BIOVEC_PHYS_MERGEABLE(__BVEC_END(bio), __BVEC_START(nxt))) + return 0; + /* - * bio and nxt are contigous in memory, check if the queue allows + * bio and nxt are contiguous in memory, check if the queue allows * these two to be merged into one */ if (BIO_SEG_BOUNDARY(q, bio, nxt)) @@ -161,8 +165,9 @@ static int blk_hw_contig_segment(struct blk_recount_segments(q, bio); if (!bio_flagged(nxt, BIO_SEG_VALID)) blk_recount_segments(q, nxt); - if (!BIOVEC_VIRT_MERGEABLE(__BVEC_END(bio), __BVEC_START(nxt)) || - BIOVEC_VIRT_OVERSIZE(bio->bi_hw_back_size + nxt->bi_hw_front_size)) + if (bio_has_data(bio) && + (!BIOVEC_VIRT_MERGEABLE(__BVEC_END(bio), __BVEC_START(nxt)) || + BIOVEC_VIRT_OVERSIZE(bio->bi_hw_back_size + nxt->bi_hw_front_size))) return 0; if (bio->bi_hw_back_size + nxt->bi_hw_front_size > q->max_segment_size) return 0; @@ -325,8 +330,9 @@ int ll_back_merge_fn(struct request_queu if (!bio_flagged(bio, BIO_SEG_VALID)) blk_recount_segments(q, bio); len = req->biotail->bi_hw_back_size + bio->bi_hw_front_size; - if (BIOVEC_VIRT_MERGEABLE(__BVEC_END(req->biotail), __BVEC_START(bio)) - && !BIOVEC_VIRT_OVERSIZE(len)) { + if (!bio_has_data(bio) || + (BIOVEC_VIRT_MERGEABLE(__BVEC_END(req->biotail), __BVEC_START(bio)) + && !BIOVEC_VIRT_OVERSIZE(len))) { int mergeable = ll_new_mergeable(q, req, bio); if (mergeable) { @@ -364,8 +370,9 @@ int ll_front_merge_fn(struct request_que blk_recount_segments(q, bio); if (!bio_flagged(req->bio, BIO_SEG_VALID)) blk_recount_segments(q, req->bio); - if (BIOVEC_VIRT_MERGEABLE(__BVEC_END(bio), __BVEC_START(req->bio)) && - !BIOVEC_VIRT_OVERSIZE(len)) { + if (!bio_has_data(bio) || + (BIOVEC_VIRT_MERGEABLE(__BVEC_END(bio), __BVEC_START(req->bio)) && + !BIOVEC_VIRT_OVERSIZE(len))) { int mergeable = ll_new_mergeable(q, req, bio); if (mergeable) { --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -33,6 +33,23 @@ void blk_queue_prep_rq(struct request_qu EXPORT_SYMBOL(blk_queue_prep_rq); /** + * blk_queue_set_discard - set a discard_sectors function for queue + * @q: queue + * @dfn: prepare_discard function + * + * It's possible for a queue to register a discard callback which is used + * to transform a discard request into the appropriate type for the + * hardware. If none is registered, then discard requests are failed + * with %EOPNOTSUPP. + * + */ +void blk_queue_set_discard(struct request_queue *q, prepare_discard_fn *dfn) +{ + q->prepare_discard_fn = dfn; +} +EXPORT_SYMBOL(blk_queue_set_discard); + +/** * blk_queue_merge_bvec - set a merge_bvec function for queue * @q: queue * @mbfn: merge_bvec_fn --- a/block/blktrace.c +++ b/block/blktrace.c @@ -111,23 +111,9 @@ static int act_log_check(struct blk_trac */ static u32 ddir_act[2] __read_mostly = { BLK_TC_ACT(BLK_TC_READ), BLK_TC_ACT(BLK_TC_WRITE) }; -/* - * Bio action bits of interest - */ -static u32 bio_act[9] __read_mostly = { 0, BLK_TC_ACT(BLK_TC_BARRIER), BLK_TC_ACT(BLK_TC_SYNC), 0, BLK_TC_ACT(BLK_TC_AHEAD), 0, 0, 0, BLK_TC_ACT(BLK_TC_META) }; - -/* - * More could be added as needed, taking care to increment the decrementer - * to get correct indexing - */ -#define trace_barrier_bit(rw) \ - (((rw) & (1 << BIO_RW_BARRIER)) >> (BIO_RW_BARRIER - 0)) -#define trace_sync_bit(rw) \ - (((rw) & (1 << BIO_RW_SYNC)) >> (BIO_RW_SYNC - 1)) -#define trace_ahead_bit(rw) \ - (((rw) & (1 << BIO_RW_AHEAD)) << (2 - BIO_RW_AHEAD)) -#define trace_meta_bit(rw) \ - (((rw) & (1 << BIO_RW_META)) >> (BIO_RW_META - 3)) +/* The ilog2() calls fall out because they're constant */ +#define MASK_TC_BIT(rw, __name) ( (rw & (1 << BIO_RW_ ## __name)) << \ + (ilog2(BLK_TC_ ## __name) + BLK_TC_SHIFT - BIO_RW_ ## __name) ) /* * The worker for the various blk_add_trace*() types. Fills out a @@ -147,10 +133,11 @@ void __blk_add_trace(struct blk_trace *b return; what |= ddir_act[rw & WRITE]; - what |= bio_act[trace_barrier_bit(rw)]; - what |= bio_act[trace_sync_bit(rw)]; - what |= bio_act[trace_ahead_bit(rw)]; - what |= bio_act[trace_meta_bit(rw)]; + what |= MASK_TC_BIT(rw, BARRIER); + what |= MASK_TC_BIT(rw, SYNC); + what |= MASK_TC_BIT(rw, AHEAD); + what |= MASK_TC_BIT(rw, META); + what |= MASK_TC_BIT(rw, DISCARD); pid = tsk->pid; if (unlikely(act_log_check(bt, what, sector, pid))) --- a/block/compat_ioctl.c +++ b/block/compat_ioctl.c @@ -788,6 +788,7 @@ long compat_blkdev_ioctl(struct file *fi return compat_hdio_getgeo(disk, bdev, compat_ptr(arg)); case BLKFLSBUF: case BLKROSET: + case BLKDISCARD: /* * the ones below are implemented in blkdev_locked_ioctl, * but we call blkdev_ioctl, which gets the lock for us --- a/block/elevator.c +++ b/block/elevator.c @@ -75,6 +75,12 @@ int elv_rq_merge_ok(struct request *rq, return 0; /* + * Don't merge file system requests and discard requests + */ + if (bio_discard(bio) != bio_discard(rq->bio)) + return 0; + + /* * different data direction or already started, don't merge */ if (bio_data_dir(bio) != rq_data_dir(rq)) @@ -438,6 +444,8 @@ void elv_dispatch_sort(struct request_qu list_for_each_prev(entry, &q->queue_head) { struct request *pos = list_entry_rq(entry); + if (blk_discard_rq(rq) != blk_discard_rq(pos)) + break; if (rq_data_dir(rq) != rq_data_dir(pos)) break; if (pos->cmd_flags & stop_flags) @@ -607,7 +615,7 @@ void elv_insert(struct request_queue *q, break; case ELEVATOR_INSERT_SORT: - BUG_ON(!blk_fs_request(rq)); + BUG_ON(!blk_fs_request(rq) && !blk_discard_rq(rq)); rq->cmd_flags |= REQ_SORTED; q->nr_sorted++; if (rq_mergeable(rq)) { @@ -692,7 +700,7 @@ void __elv_add_request(struct request_qu * this request is scheduling boundary, update * end_sector */ - if (blk_fs_request(rq)) { + if (blk_fs_request(rq) || blk_discard_rq(rq)) { q->end_sector = rq_end_sector(rq); q->boundary_rq = rq; } --- a/block/ioctl.c +++ b/block/ioctl.c @@ -111,6 +111,69 @@ static int blkdev_reread_part(struct blo return res; } +static void blk_ioc_discard_endio(struct bio *bio, int err) +{ + if (err) { + if (err == -EOPNOTSUPP) + set_bit(BIO_EOPNOTSUPP, &bio->bi_flags); + clear_bit(BIO_UPTODATE, &bio->bi_flags); + } + complete(bio->bi_private); +} + +static int blk_ioctl_discard(struct block_device *bdev, uint64_t start, + uint64_t len) +{ + struct request_queue *q = bdev_get_queue(bdev); + int ret = 0; + + if (start & 511) + return -EINVAL; + if (len & 511) + return -EINVAL; + start >>= 9; + len >>= 9; + + if (start + len > (bdev->bd_inode->i_size >> 9)) + return -EINVAL; + + if (!q->prepare_discard_fn) + return -EOPNOTSUPP; + + while (len && !ret) { + DECLARE_COMPLETION_ONSTACK(wait); + struct bio *bio; + + bio = bio_alloc(GFP_KERNEL, 0); + if (!bio) + return -ENOMEM; + + bio->bi_end_io = blk_ioc_discard_endio; + bio->bi_bdev = bdev; + bio->bi_private = &wait; + bio->bi_sector = start; + + if (len > q->max_hw_sectors) { + bio->bi_size = q->max_hw_sectors << 9; + len -= q->max_hw_sectors; + start += q->max_hw_sectors; + } else { + bio->bi_size = len << 9; + len = 0; + } + submit_bio(DISCARD_NOBARRIER, bio); + + wait_for_completion(&wait); + + if (bio_flagged(bio, BIO_EOPNOTSUPP)) + ret = -EOPNOTSUPP; + else if (!bio_flagged(bio, BIO_UPTODATE)) + ret = -EIO; + bio_put(bio); + } + return ret; +} + static int put_ushort(unsigned long arg, unsigned short val) { return put_user(val, (unsigned short __user *)arg); @@ -258,6 +321,19 @@ int blkdev_ioctl(struct inode *inode, st set_device_ro(bdev, n); unlock_kernel(); return 0; + + case BLKDISCARD: { + uint64_t range[2]; + + if (!(file->f_mode & FMODE_WRITE)) + return -EBADF; + + if (copy_from_user(range, (void __user *)arg, sizeof(range))) + return -EFAULT; + + return blk_ioctl_discard(bdev, range[0], range[1]); + } + case HDIO_GETGEO: { struct hd_geometry geo; --- a/drivers/mtd/ftl.c +++ b/drivers/mtd/ftl.c @@ -1005,6 +1005,29 @@ static int ftl_writesect(struct mtd_blkt return ftl_write((void *)dev, buf, block, 1); } +static int ftl_discardsect(struct mtd_blktrans_dev *dev, + unsigned long sector, unsigned nr_sects) +{ + partition_t *part = (void *)dev; + uint32_t bsize = 1 << part->header.EraseUnitSize; + + DEBUG(1, "FTL erase sector %ld for %d sectors\n", + sector, nr_sects); + + while (nr_sects) { + uint32_t old_addr = part->VirtualBlockMap[sector]; + if (old_addr != 0xffffffff) { + part->VirtualBlockMap[sector] = 0xffffffff; + part->EUNInfo[old_addr/bsize].Deleted++; + if (set_bam_entry(part, old_addr, 0)) + return -EIO; + } + nr_sects--; + sector++; + } + + return 0; +} /*====================================================================*/ static void ftl_freepart(partition_t *part) @@ -1069,6 +1092,7 @@ static struct mtd_blktrans_ops ftl_tr = .blksize = SECTOR_SIZE, .readsect = ftl_readsect, .writesect = ftl_writesect, + .discard = ftl_discardsect, .getgeo = ftl_getgeo, .add_mtd = ftl_add_mtd, .remove_dev = ftl_remove_dev, --- a/drivers/mtd/mtd_blkdevs.c +++ b/drivers/mtd/mtd_blkdevs.c @@ -32,6 +32,14 @@ struct mtd_blkcore_priv { spinlock_t queue_lock; }; +static int blktrans_discard_request(struct request_queue *q, + struct request *req) +{ + req->cmd_type = REQ_TYPE_LINUX_BLOCK; + req->cmd[0] = REQ_LB_OP_DISCARD; + return 0; +} + static int do_blktrans_request(struct mtd_blktrans_ops *tr, struct mtd_blktrans_dev *dev, struct request *req) @@ -44,6 +52,10 @@ static int do_blktrans_request(struct mt buf = req->buffer; + if (req->cmd_type == REQ_TYPE_LINUX_BLOCK && + req->cmd[0] == REQ_LB_OP_DISCARD) + return !tr->discard(dev, block, nsect); + if (!blk_fs_request(req)) return 0; @@ -367,6 +379,10 @@ int register_mtd_blktrans(struct mtd_blk tr->blkcore_priv->rq->queuedata = tr; blk_queue_hardsect_size(tr->blkcore_priv->rq, tr->blksize); + if (tr->discard) + blk_queue_set_discard(tr->blkcore_priv->rq, + blktrans_discard_request); + tr->blkshift = ffs(tr->blksize) - 1; tr->blkcore_priv->thread = kthread_run(mtd_blktrans_thread, tr, --- a/fs/fat/fatent.c +++ b/fs/fat/fatent.c @@ -6,6 +6,7 @@ #include #include #include +#include struct fatent_operations { void (*ent_blocknr)(struct super_block *, int, int *, sector_t *); @@ -535,6 +536,7 @@ int fat_free_clusters(struct inode *inod struct fat_entry fatent; struct buffer_head *bhs[MAX_BUF_PER_PAGE]; int i, err, nr_bhs; + int first_cl = cluster; nr_bhs = 0; fatent_init(&fatent); @@ -551,6 +553,18 @@ int fat_free_clusters(struct inode *inod goto error; } + /* + * Issue discard for the sectors we no longer care about, + * batching contiguous clusters into one request + */ + if (cluster != fatent.entry + 1) { + int nr_clus = fatent.entry - first_cl + 1; + + sb_issue_discard(sb, fat_clus_to_blknr(sbi, first_cl), + nr_clus * sbi->sec_per_clus); + first_cl = cluster; + } + ops->ent_put(&fatent, FAT_ENT_FREE); if (sbi->free_clusters != -1) { sbi->free_clusters++; --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -156,6 +156,8 @@ struct bio { * bit 2 -- barrier * bit 3 -- fail fast, don't want low level driver retries * bit 4 -- synchronous I/O hint: the block layer will unplug immediately + * bit 5 -- metadata request + * bit 6 -- discard sectors */ #define BIO_RW 0 /* Must match RW in req flags (blkdev.h) */ #define BIO_RW_AHEAD 1 /* Must match FAILFAST in req flags */ @@ -163,6 +165,7 @@ struct bio { #define BIO_RW_FAILFAST 3 #define BIO_RW_SYNC 4 #define BIO_RW_META 5 +#define BIO_RW_DISCARD 6 /* * upper 16 bits of bi_rw define the io priority of this bio @@ -192,14 +195,15 @@ struct bio { #define bio_failfast(bio) ((bio)->bi_rw & (1 << BIO_RW_FAILFAST)) #define bio_rw_ahead(bio) ((bio)->bi_rw & (1 << BIO_RW_AHEAD)) #define bio_rw_meta(bio) ((bio)->bi_rw & (1 << BIO_RW_META)) -#define bio_empty_barrier(bio) (bio_barrier(bio) && !bio_has_data(bio)) +#define bio_discard(bio) ((bio)->bi_rw & (1 << BIO_RW_DISCARD)) +#define bio_empty_barrier(bio) (bio_barrier(bio) && !bio_has_data(bio) && !bio_discard(bio)) static inline unsigned int bio_cur_sectors(struct bio *bio) { if (bio->bi_vcnt) return bio_iovec(bio)->bv_len >> 9; - - return 0; + else /* dataless requests such as discard */ + return bio->bi_size >> 9; } static inline void *bio_data(struct bio *bio) --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -81,6 +81,7 @@ enum { */ REQ_LB_OP_EJECT = 0x40, /* eject request */ REQ_LB_OP_FLUSH = 0x41, /* flush device */ + REQ_LB_OP_DISCARD = 0x42, /* discard sectors */ }; /* @@ -89,6 +90,7 @@ enum { enum rq_flag_bits { __REQ_RW, /* not set, read. set, write */ __REQ_FAILFAST, /* no low level driver retries */ + __REQ_DISCARD, /* request to discard sectors */ __REQ_SORTED, /* elevator knows about this request */ __REQ_SOFTBARRIER, /* may not be passed by ioscheduler */ __REQ_HARDBARRIER, /* may not be passed by drive either */ @@ -111,6 +113,7 @@ enum rq_flag_bits { }; #define REQ_RW (1 << __REQ_RW) +#define REQ_DISCARD (1 << __REQ_DISCARD) #define REQ_FAILFAST (1 << __REQ_FAILFAST) #define REQ_SORTED (1 << __REQ_SORTED) #define REQ_SOFTBARRIER (1 << __REQ_SOFTBARRIER) @@ -252,6 +255,7 @@ typedef void (request_fn_proc) (struct r typedef int (make_request_fn) (struct request_queue *q, struct bio *bio); typedef int (prep_rq_fn) (struct request_queue *, struct request *); typedef void (unplug_fn) (struct request_queue *); +typedef int (prepare_discard_fn) (struct request_queue *, struct request *); struct bio_vec; struct bvec_merge_data { @@ -307,6 +311,7 @@ struct request_queue make_request_fn *make_request_fn; prep_rq_fn *prep_rq_fn; unplug_fn *unplug_fn; + prepare_discard_fn *prepare_discard_fn; merge_bvec_fn *merge_bvec_fn; prepare_flush_fn *prepare_flush_fn; softirq_done_fn *softirq_done_fn; @@ -536,7 +541,7 @@ enum { #define blk_noretry_request(rq) ((rq)->cmd_flags & REQ_FAILFAST) #define blk_rq_started(rq) ((rq)->cmd_flags & REQ_STARTED) -#define blk_account_rq(rq) (blk_rq_started(rq) && blk_fs_request(rq)) +#define blk_account_rq(rq) (blk_rq_started(rq) && (blk_fs_request(rq) || blk_discard_rq(rq))) #define blk_pm_suspend_request(rq) ((rq)->cmd_type == REQ_TYPE_PM_SUSPEND) #define blk_pm_resume_request(rq) ((rq)->cmd_type == REQ_TYPE_PM_RESUME) @@ -546,6 +551,7 @@ enum { #define blk_sorted_rq(rq) ((rq)->cmd_flags & REQ_SORTED) #define blk_barrier_rq(rq) ((rq)->cmd_flags & REQ_HARDBARRIER) #define blk_fua_rq(rq) ((rq)->cmd_flags & REQ_FUA) +#define blk_discard_rq(rq) ((rq)->cmd_flags & REQ_DISCARD) #define blk_bidi_rq(rq) ((rq)->next_rq != NULL) #define blk_empty_barrier(rq) (blk_barrier_rq(rq) && blk_fs_request(rq) && !(rq)->hard_nr_sectors) /* rq->queuelist of dequeued request must be list_empty() */ @@ -592,7 +598,8 @@ static inline void blk_clear_queue_full( #define RQ_NOMERGE_FLAGS \ (REQ_NOMERGE | REQ_STARTED | REQ_HARDBARRIER | REQ_SOFTBARRIER) #define rq_mergeable(rq) \ - (!((rq)->cmd_flags & RQ_NOMERGE_FLAGS) && blk_fs_request((rq))) + (!((rq)->cmd_flags & RQ_NOMERGE_FLAGS) && \ + (blk_discard_rq(rq) || blk_fs_request((rq)))) /* * q->prep_rq_fn return values @@ -797,6 +804,7 @@ extern void blk_queue_merge_bvec(struct extern void blk_queue_dma_alignment(struct request_queue *, int); extern void blk_queue_update_dma_alignment(struct request_queue *, int); extern void blk_queue_softirq_done(struct request_queue *, softirq_done_fn *); +extern void blk_queue_set_discard(struct request_queue *, prepare_discard_fn *); extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev); extern int blk_queue_ordered(struct request_queue *, unsigned, prepare_flush_fn *); extern int blk_do_ordered(struct request_queue *, struct request **); @@ -838,6 +846,16 @@ static inline struct request *blk_map_qu } extern int blkdev_issue_flush(struct block_device *, sector_t *); +extern int blkdev_issue_discard(struct block_device *, sector_t sector, + unsigned nr_sects); + +static inline int sb_issue_discard(struct super_block *sb, + sector_t block, unsigned nr_blocks) +{ + block <<= (sb->s_blocksize_bits - 9); + nr_blocks <<= (sb->s_blocksize_bits - 9); + return blkdev_issue_discard(sb->s_bdev, block, nr_blocks); +} /* * command filter functions --- a/include/linux/blktrace_api.h +++ b/include/linux/blktrace_api.h @@ -23,7 +23,8 @@ enum blktrace_cat { BLK_TC_NOTIFY = 1 << 10, /* special message */ BLK_TC_AHEAD = 1 << 11, /* readahead */ BLK_TC_META = 1 << 12, /* metadata */ - BLK_TC_DRV_DATA = 1 << 13, /* binary per-driver data */ + BLK_TC_DISCARD = 1 << 13, /* discard requests */ + BLK_TC_DRV_DATA = 1 << 14, /* binary per-drivers data */ BLK_TC_END = 1 << 15, /* only 16-bits, reminder */ }; @@ -204,6 +205,9 @@ static inline void blk_add_trace_rq(stru if (likely(!bt)) return; + if (blk_discard_rq(rq)) + rw |= (1 << BIO_RW_DISCARD); + if (blk_pc_request(rq)) { what |= BLK_TC_ACT(BLK_TC_PC); __blk_add_trace(bt, 0, rq->data_len, rw, what, rq->errors, sizeof(rq->cmd), rq->cmd); --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -91,7 +91,9 @@ extern int dir_notify_enable; #define READ_META (READ | (1 << BIO_RW_META)) #define WRITE_SYNC (WRITE | (1 << BIO_RW_SYNC)) #define SWRITE_SYNC (SWRITE | (1 << BIO_RW_SYNC)) -#define WRITE_BARRIER ((1 << BIO_RW) | (1 << BIO_RW_BARRIER)) +#define WRITE_BARRIER (WRITE | (1 << BIO_RW_BARRIER)) +#define DISCARD_NOBARRIER (1 << BIO_RW_DISCARD) +#define DISCARD_BARRIER ((1 << BIO_RW_DISCARD) | (1 << BIO_RW_BARRIER)) #define SEL_IN 1 #define SEL_OUT 2 @@ -229,6 +231,7 @@ extern int dir_notify_enable; #define BLKTRACESTART _IO(0x12,116) #define BLKTRACESTOP _IO(0x12,117) #define BLKTRACETEARDOWN _IO(0x12,118) +#define BLKDISCARD _IO(0x12,119) #define BMAP_IOCTL 1 /* obsolete - kept for compatibility */ #define FIBMAP _IO(0x00,1) /* bmap access */ --- a/include/linux/mtd/blktrans.h +++ b/include/linux/mtd/blktrans.h @@ -41,6 +41,8 @@ struct mtd_blktrans_ops { unsigned long block, char *buffer); int (*writesect)(struct mtd_blktrans_dev *dev, unsigned long block, char *buffer); + int (*discard)(struct mtd_blktrans_dev *dev, + unsigned long block, unsigned nr_blocks); /* Block layer ioctls */ int (*getgeo)(struct mtd_blktrans_dev *dev, struct hd_geometry *geo);