src/patches/suse-2.6.27.31/patches.drivers/blk-request-based-multipath-update

   1 From: Kiyoshi Ueda <k-ueda@ct.jp.nec.com>
   2 Subject: Block layer fixes for request-based multipathing
   3 References: References: FATE#302108
   4
   5 This is a combined patch from linux-2.6.git. Commit-IDs:
   6
   7 d6c578ec08b3f07050401ed83193b3f21729213b
   8 afac32f0c9c68698eaf7688d52de859301a0539f
   9 ebd2bf40e9cfa4ebfa614703944f4eafdf0d2c64
  10 509395182b6b7cf7e3c1ca2cd669506d8f43ee01
  11 88171cad9ace4b67c5298e6504d70454296afb76
  12
  13 Signed-off-by: Hannes Reinecke <hare@suse.de>
  14
  15 ---
  16  block/blk-core.c        |  169 +++++++++++++++++++++++++++++++++++++++++++++---
  17  block/blk-settings.c    |    6 +
  18  drivers/scsi/scsi_lib.c |   32 +++++++++
  19  include/linux/blkdev.h  |   12 +++
  20  4 files changed, 209 insertions(+), 10 deletions(-)
  21
  22 --- a/block/blk-core.c
  23 +++ b/block/blk-core.c
  24 @@ -592,7 +592,8 @@ blk_init_queue_node(request_fn_proc *rfn
  25         q->request_fn           = rfn;
  26         q->prep_rq_fn           = NULL;
  27         q->unplug_fn            = generic_unplug_device;
  28 -       q->queue_flags          = (1 << QUEUE_FLAG_CLUSTER);
  29 +       q->queue_flags          = (1 << QUEUE_FLAG_CLUSTER |
  30 +                                  1 << QUEUE_FLAG_STACKABLE);
  31         q->queue_lock           = lock;
  32
  33         blk_queue_segment_boundary(q, 0xffffffff);
  34 @@ -1586,6 +1587,87 @@ void blkdev_dequeue_request(struct reque
  35  EXPORT_SYMBOL(blkdev_dequeue_request);
  36
  37  /**
  38 + * blk_rq_check_limits - Helper function to check a request for the queue limit
  39 + * @q:  the queue
  40 + * @rq: the request being checked
  41 + *
  42 + * Description:
  43 + *    @rq may have been made based on weaker limitations of upper-level queues
  44 + *    in request stacking drivers, and it may violate the limitation of @q.
  45 + *    Since the block layer and the underlying device driver trust @rq
  46 + *    after it is inserted to @q, it should be checked against @q before
  47 + *    the insertion using this generic function.
  48 + *
  49 + *    This function should also be useful for request stacking drivers
  50 + *    in some cases below, so export this fuction.
  51 + *    Request stacking drivers like request-based dm may change the queue
  52 + *    limits while requests are in the queue (e.g. dm's table swapping).
  53 + *    Such request stacking drivers should check those requests agaist
  54 + *    the new queue limits again when they dispatch those requests,
  55 + *    although such checkings are also done against the old queue limits
  56 + *    when submitting requests.
  57 + */
  58 +int blk_rq_check_limits(struct request_queue *q, struct request *rq)
  59 +{
  60 +       if (rq->nr_sectors > q->max_sectors ||
  61 +           rq->data_len > q->max_hw_sectors << 9) {
  62 +               printk(KERN_ERR "%s: over max size limit.\n", __func__);
  63 +               return -EIO;
  64 +       }
  65 +
  66 +       /*
  67 +        * queue's settings related to segment counting like q->bounce_pfn
  68 +        * may differ from that of other stacking queues.
  69 +        * Recalculate it to check the request correctly on this queue's
  70 +        * limitation.
  71 +        */
  72 +       blk_recalc_rq_segments(rq);
  73 +       if (rq->nr_phys_segments > q->max_phys_segments ||
  74 +           rq->nr_phys_segments > q->max_hw_segments) {
  75 +               printk(KERN_ERR "%s: over max segments limit.\n", __func__);
  76 +               return -EIO;
  77 +       }
  78 +
  79 +       return 0;
  80 +}
  81 +EXPORT_SYMBOL_GPL(blk_rq_check_limits);
  82 +
  83 +/**
  84 + * blk_insert_cloned_request - Helper for stacking drivers to submit a request
  85 + * @q:  the queue to submit the request
  86 + * @rq: the request being queued
  87 + */
  88 +int blk_insert_cloned_request(struct request_queue *q, struct request *rq)
  89 +{
  90 +       unsigned long flags;
  91 +
  92 +       if (blk_rq_check_limits(q, rq))
  93 +               return -EIO;
  94 +
  95 +#ifdef CONFIG_FAIL_MAKE_REQUEST
  96 +       if (rq->rq_disk && rq->rq_disk->part0.make_it_fail &&
  97 +           should_fail(&fail_make_request, blk_rq_bytes(rq)))
  98 +               return -EIO;
  99 +#endif
 100 +
 101 +       spin_lock_irqsave(q->queue_lock, flags);
 102 +
 103 +       /*
 104 +        * Submitting request must be dequeued before calling this function
 105 +        * because it will be linked to another request_queue
 106 +        */
 107 +       BUG_ON(blk_queued_rq(rq));
 108 +
 109 +       drive_stat_acct(rq, 1);
 110 +       __elv_add_request(q, rq, ELEVATOR_INSERT_BACK, 0);
 111 +
 112 +       spin_unlock_irqrestore(q->queue_lock, flags);
 113 +
 114 +       return 0;
 115 +}
 116 +EXPORT_SYMBOL_GPL(blk_insert_cloned_request);
 117 +
 118 +/**
 119   * __end_that_request_first - end I/O on a request
 120   * @req:      the request being processed
 121   * @error:    %0 for success, < %0 for error
 122 @@ -1857,6 +1939,22 @@ void end_request(struct request *req, in
 123  }
 124  EXPORT_SYMBOL(end_request);
 125
 126 +static int end_that_request_data(struct request *rq, int error,
 127 +                                unsigned int nr_bytes, unsigned int bidi_bytes)
 128 +{
 129 +       if (rq->bio) {
 130 +               if (__end_that_request_first(rq, error, nr_bytes))
 131 +                       return 1;
 132 +
 133 +               /* Bidi request must be completed as a whole */
 134 +               if (blk_bidi_rq(rq) &&
 135 +                   __end_that_request_first(rq->next_rq, error, bidi_bytes))
 136 +                       return 1;
 137 +       }
 138 +
 139 +       return 0;
 140 +}
 141 +
 142  /**
 143   * blk_end_io - Generic end_io function to complete a request.
 144   * @rq:           the request being processed
 145 @@ -1883,15 +1981,8 @@ static int blk_end_io(struct request *rq
 146         struct request_queue *q = rq->q;
 147         unsigned long flags = 0UL;
 148
 149 -       if (rq->bio) {
 150 -               if (__end_that_request_first(rq, error, nr_bytes))
 151 -                       return 1;
 152 -
 153 -               /* Bidi request must be completed as a whole */
 154 -               if (blk_bidi_rq(rq) &&
 155 -                   __end_that_request_first(rq->next_rq, error, bidi_bytes))
 156 -                       return 1;
 157 -       }
 158 +       if (end_that_request_data(rq, error, nr_bytes, bidi_bytes))
 159 +               return 1;
 160
 161         /* Special feature for tricky drivers */
 162         if (drv_callback && drv_callback(rq))
 163 @@ -1974,6 +2065,36 @@ int blk_end_bidi_request(struct request
 164  EXPORT_SYMBOL_GPL(blk_end_bidi_request);
 165
 166  /**
 167 + * blk_update_request - Special helper function for request stacking drivers
 168 + * @rq:           the request being processed
 169 + * @error:        %0 for success, < %0 for error
 170 + * @nr_bytes:     number of bytes to complete @rq
 171 + *
 172 + * Description:
 173 + *     Ends I/O on a number of bytes attached to @rq, but doesn't complete
 174 + *     the request structure even if @rq doesn't have leftover.
 175 + *     If @rq has leftover, sets it up for the next range of segments.
 176 + *
 177 + *     This special helper function is only for request stacking drivers
 178 + *     (e.g. request-based dm) so that they can handle partial completion.
 179 + *     Actual device drivers should use blk_end_request instead.
 180 + */
 181 +void blk_update_request(struct request *rq, int error, unsigned int nr_bytes)
 182 +{
 183 +       if (!end_that_request_data(rq, error, nr_bytes, 0)) {
 184 +               /*
 185 +                * These members are not updated in end_that_request_data()
 186 +                * when all bios are completed.
 187 +                * Update them so that the request stacking driver can find
 188 +                * how many bytes remain in the request later.
 189 +                */
 190 +               rq->nr_sectors = rq->hard_nr_sectors = 0;
 191 +               rq->current_nr_sectors = rq->hard_cur_sectors = 0;
 192 +       }
 193 +}
 194 +EXPORT_SYMBOL_GPL(blk_update_request);
 195 +
 196 +/**
 197   * blk_end_request_callback - Special helper function for tricky drivers
 198   * @rq:           the request being processed
 199   * @error:        %0 for success, < %0 for error
 200 @@ -2028,6 +2149,34 @@ void blk_rq_bio_prep(struct request_queu
 201                 rq->rq_disk = bio->bi_bdev->bd_disk;
 202  }
 203
 204 +/**
 205 + * blk_lld_busy - Check if underlying low-level drivers of a device are busy
 206 + * @q : the queue of the device being checked
 207 + *
 208 + * Description:
 209 + *    Check if underlying low-level drivers of a device are busy.
 210 + *    If the drivers want to export their busy state, they must set own
 211 + *    exporting function using blk_queue_lld_busy() first.
 212 + *
 213 + *    Basically, this function is used only by request stacking drivers
 214 + *    to stop dispatching requests to underlying devices when underlying
 215 + *    devices are busy.  This behavior helps more I/O merging on the queue
 216 + *    of the request stacking driver and prevents I/O throughput regression
 217 + *    on burst I/O load.
 218 + *
 219 + * Return:
 220 + *    0 - Not busy (The request stacking driver should dispatch request)
 221 + *    1 - Busy (The request stacking driver should stop dispatching request)
 222 + */
 223 +int blk_lld_busy(struct request_queue *q)
 224 +{
 225 +       if (q->lld_busy_fn)
 226 +               return q->lld_busy_fn(q);
 227 +
 228 +       return 0;
 229 +}
 230 +EXPORT_SYMBOL_GPL(blk_lld_busy);
 231 +
 232  int kblockd_schedule_work(struct request_queue *q, struct work_struct *work)
 233  {
 234         return queue_work(kblockd_workqueue, work);
 235 --- a/block/blk-settings.c
 236 +++ b/block/blk-settings.c
 237 @@ -89,6 +89,12 @@ void blk_queue_rq_timed_out(struct reque
 238  }
 239  EXPORT_SYMBOL_GPL(blk_queue_rq_timed_out);
 240
 241 +void blk_queue_lld_busy(struct request_queue *q, lld_busy_fn *fn)
 242 +{
 243 +       q->lld_busy_fn = fn;
 244 +}
 245 +EXPORT_SYMBOL_GPL(blk_queue_lld_busy);
 246 +
 247  /**
 248   * blk_queue_make_request - define an alternate make_request function for a device
 249   * @q:  the request queue for the device to be affected
 250 --- a/drivers/scsi/scsi_lib.c
 251 +++ b/drivers/scsi/scsi_lib.c
 252 @@ -1465,6 +1465,37 @@ static inline int scsi_host_queue_ready(
 253  }
 254
 255  /*
 256 + * Busy state exporting function for request stacking drivers.
 257 + *
 258 + * For efficiency, no lock is taken to check the busy state of
 259 + * shost/starget/sdev, since the returned value is not guaranteed and
 260 + * may be changed after request stacking drivers call the function,
 261 + * regardless of taking lock or not.
 262 + *
 263 + * When scsi can't dispatch I/Os anymore and needs to kill I/Os
 264 + * (e.g. !sdev), scsi needs to return 'not busy'.
 265 + * Otherwise, request stacking drivers may hold requests forever.
 266 + */
 267 +static int scsi_lld_busy(struct request_queue *q)
 268 +{
 269 +       struct scsi_device *sdev = q->queuedata;
 270 +       struct Scsi_Host *shost;
 271 +       struct scsi_target *starget;
 272 +
 273 +       if (!sdev)
 274 +               return 0;
 275 +
 276 +       shost = sdev->host;
 277 +       starget = scsi_target(sdev);
 278 +
 279 +       if (scsi_host_in_recovery(shost) || scsi_host_is_busy(shost) ||
 280 +           scsi_target_is_busy(starget) || scsi_device_is_busy(sdev))
 281 +               return 1;
 282 +
 283 +       return 0;
 284 +}
 285 +
 286 +/*
 287   * Kill a request for a dead device
 288   */
 289  static void scsi_kill_request(struct request *req, struct request_queue *q)
 290 @@ -1778,6 +1809,7 @@ struct request_queue *scsi_alloc_queue(s
 291         blk_queue_prep_rq(q, scsi_prep_fn);
 292         blk_queue_softirq_done(q, scsi_softirq_done);
 293         blk_queue_rq_timed_out(q, scsi_times_out);
 294 +       blk_queue_lld_busy(q, scsi_lld_busy);
 295         return q;
 296  }
 297
 298 --- a/include/linux/blkdev.h
 299 +++ b/include/linux/blkdev.h
 300 @@ -272,6 +272,7 @@ typedef int (merge_bvec_fn) (struct requ
 301  typedef void (prepare_flush_fn) (struct request_queue *, struct request *);
 302  typedef void (softirq_done_fn)(struct request *);
 303  typedef int (dma_drain_needed_fn)(struct request *);
 304 +typedef int (lld_busy_fn) (struct request_queue *q);
 305
 306  enum blk_eh_timer_return {
 307         BLK_EH_NOT_HANDLED,
 308 @@ -328,6 +329,7 @@ struct request_queue
 309         softirq_done_fn         *softirq_done_fn;
 310         rq_timed_out_fn         *rq_timed_out_fn;
 311         dma_drain_needed_fn     *dma_drain_needed;
 312 +       lld_busy_fn             *lld_busy_fn;
 313
 314         /*
 315          * Dispatch queue sorting
 316 @@ -443,6 +445,7 @@ struct request_queue
 317  #define QUEUE_FLAG_BIDI                9       /* queue supports bidi requests */
 318  #define QUEUE_FLAG_NOMERGES    10      /* disable merge attempts */
 319  #define QUEUE_FLAG_SAME_COMP   11      /* force complete on same CPU */
 320 +#define QUEUE_FLAG_STACKABLE   13      /* supports request stacking */
 321
 322  static inline int queue_is_locked(struct request_queue *q)
 323  {
 324 @@ -549,6 +552,8 @@ enum {
 325  #define blk_queue_stopped(q)   test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags)
 326  #define blk_queue_nomerges(q)  test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags)
 327  #define blk_queue_flushing(q)  ((q)->ordseq)
 328 +#define blk_queue_stackable(q) \
 329 +       test_bit(QUEUE_FLAG_STACKABLE, &(q)->queue_flags)
 330
 331  #define blk_fs_request(rq)     ((rq)->cmd_type == REQ_TYPE_FS)
 332  #define blk_pc_request(rq)     ((rq)->cmd_type == REQ_TYPE_BLOCK_PC)
 333 @@ -695,6 +700,10 @@ extern void __blk_put_request(struct req
 334  extern struct request *blk_get_request(struct request_queue *, int, gfp_t);
 335  extern void blk_insert_request(struct request_queue *, struct request *, int, void *);
 336  extern void blk_requeue_request(struct request_queue *, struct request *);
 337 +extern int blk_rq_check_limits(struct request_queue *q, struct request *rq);
 338 +extern int blk_lld_busy(struct request_queue *q);
 339 +extern int blk_insert_cloned_request(struct request_queue *q,
 340 +                                    struct request *rq);
 341  extern void blk_plug_device(struct request_queue *);
 342  extern void blk_plug_device_unlocked(struct request_queue *);
 343  extern int blk_remove_plug(struct request_queue *);
 344 @@ -792,6 +801,8 @@ extern void blk_complete_request(struct
 345  extern void __blk_complete_request(struct request *);
 346  extern void blk_abort_request(struct request *);
 347  extern void blk_abort_queue(struct request_queue *);
 348 +extern void blk_update_request(struct request *rq, int error,
 349 +                              unsigned int nr_bytes);
 350
 351  /*
 352   * blk_end_request() takes bytes instead of sectors as a complete size.
 353 @@ -821,6 +832,7 @@ extern void blk_queue_update_dma_pad(str
 354  extern int blk_queue_dma_drain(struct request_queue *q,
 355                                dma_drain_needed_fn *dma_drain_needed,
 356                                void *buf, unsigned int size);
 357 +extern void blk_queue_lld_busy(struct request_queue *q, lld_busy_fn *fn);
 358  extern void blk_queue_segment_boundary(struct request_queue *, unsigned long);
 359  extern void blk_queue_prep_rq(struct request_queue *, prep_rq_fn *pfn);
 360  extern void blk_queue_merge_bvec(struct request_queue *, merge_bvec_fn *);