]> git.ipfire.org Git - people/ms/linux.git/blob - drivers/infiniband/ulp/srp/ib_srp.c
IB/srp: Fix a memory leak
[people/ms/linux.git] / drivers / infiniband / ulp / srp / ib_srp.c
1 /*
2 * Copyright (c) 2005 Cisco Systems. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
34
35 #include <linux/module.h>
36 #include <linux/init.h>
37 #include <linux/slab.h>
38 #include <linux/err.h>
39 #include <linux/string.h>
40 #include <linux/parser.h>
41 #include <linux/random.h>
42 #include <linux/jiffies.h>
43 #include <rdma/ib_cache.h>
44
45 #include <linux/atomic.h>
46
47 #include <scsi/scsi.h>
48 #include <scsi/scsi_device.h>
49 #include <scsi/scsi_dbg.h>
50 #include <scsi/scsi_tcq.h>
51 #include <scsi/srp.h>
52 #include <scsi/scsi_transport_srp.h>
53
54 #include "ib_srp.h"
55
56 #define DRV_NAME "ib_srp"
57 #define PFX DRV_NAME ": "
58 #define DRV_VERSION "2.0"
59 #define DRV_RELDATE "July 26, 2015"
60
61 MODULE_AUTHOR("Roland Dreier");
62 MODULE_DESCRIPTION("InfiniBand SCSI RDMA Protocol initiator");
63 MODULE_LICENSE("Dual BSD/GPL");
64 MODULE_VERSION(DRV_VERSION);
65 MODULE_INFO(release_date, DRV_RELDATE);
66
67 static unsigned int srp_sg_tablesize;
68 static unsigned int cmd_sg_entries;
69 static unsigned int indirect_sg_entries;
70 static bool allow_ext_sg;
71 static bool prefer_fr = true;
72 static bool register_always = true;
73 static int topspin_workarounds = 1;
74
75 module_param(srp_sg_tablesize, uint, 0444);
76 MODULE_PARM_DESC(srp_sg_tablesize, "Deprecated name for cmd_sg_entries");
77
78 module_param(cmd_sg_entries, uint, 0444);
79 MODULE_PARM_DESC(cmd_sg_entries,
80 "Default number of gather/scatter entries in the SRP command (default is 12, max 255)");
81
82 module_param(indirect_sg_entries, uint, 0444);
83 MODULE_PARM_DESC(indirect_sg_entries,
84 "Default max number of gather/scatter entries (default is 12, max is " __stringify(SCSI_MAX_SG_CHAIN_SEGMENTS) ")");
85
86 module_param(allow_ext_sg, bool, 0444);
87 MODULE_PARM_DESC(allow_ext_sg,
88 "Default behavior when there are more than cmd_sg_entries S/G entries after mapping; fails the request when false (default false)");
89
90 module_param(topspin_workarounds, int, 0444);
91 MODULE_PARM_DESC(topspin_workarounds,
92 "Enable workarounds for Topspin/Cisco SRP target bugs if != 0");
93
94 module_param(prefer_fr, bool, 0444);
95 MODULE_PARM_DESC(prefer_fr,
96 "Whether to use fast registration if both FMR and fast registration are supported");
97
98 module_param(register_always, bool, 0444);
99 MODULE_PARM_DESC(register_always,
100 "Use memory registration even for contiguous memory regions");
101
102 static const struct kernel_param_ops srp_tmo_ops;
103
104 static int srp_reconnect_delay = 10;
105 module_param_cb(reconnect_delay, &srp_tmo_ops, &srp_reconnect_delay,
106 S_IRUGO | S_IWUSR);
107 MODULE_PARM_DESC(reconnect_delay, "Time between successive reconnect attempts");
108
109 static int srp_fast_io_fail_tmo = 15;
110 module_param_cb(fast_io_fail_tmo, &srp_tmo_ops, &srp_fast_io_fail_tmo,
111 S_IRUGO | S_IWUSR);
112 MODULE_PARM_DESC(fast_io_fail_tmo,
113 "Number of seconds between the observation of a transport"
114 " layer error and failing all I/O. \"off\" means that this"
115 " functionality is disabled.");
116
117 static int srp_dev_loss_tmo = 600;
118 module_param_cb(dev_loss_tmo, &srp_tmo_ops, &srp_dev_loss_tmo,
119 S_IRUGO | S_IWUSR);
120 MODULE_PARM_DESC(dev_loss_tmo,
121 "Maximum number of seconds that the SRP transport should"
122 " insulate transport layer errors. After this time has been"
123 " exceeded the SCSI host is removed. Should be"
124 " between 1 and " __stringify(SCSI_DEVICE_BLOCK_MAX_TIMEOUT)
125 " if fast_io_fail_tmo has not been set. \"off\" means that"
126 " this functionality is disabled.");
127
128 static unsigned ch_count;
129 module_param(ch_count, uint, 0444);
130 MODULE_PARM_DESC(ch_count,
131 "Number of RDMA channels to use for communication with an SRP target. Using more than one channel improves performance if the HCA supports multiple completion vectors. The default value is the minimum of four times the number of online CPU sockets and the number of completion vectors supported by the HCA.");
132
133 static void srp_add_one(struct ib_device *device);
134 static void srp_remove_one(struct ib_device *device, void *client_data);
135 static void srp_recv_completion(struct ib_cq *cq, void *ch_ptr);
136 static void srp_send_completion(struct ib_cq *cq, void *ch_ptr);
137 static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event);
138
139 static struct scsi_transport_template *ib_srp_transport_template;
140 static struct workqueue_struct *srp_remove_wq;
141
142 static struct ib_client srp_client = {
143 .name = "srp",
144 .add = srp_add_one,
145 .remove = srp_remove_one
146 };
147
148 static struct ib_sa_client srp_sa_client;
149
150 static int srp_tmo_get(char *buffer, const struct kernel_param *kp)
151 {
152 int tmo = *(int *)kp->arg;
153
154 if (tmo >= 0)
155 return sprintf(buffer, "%d", tmo);
156 else
157 return sprintf(buffer, "off");
158 }
159
160 static int srp_tmo_set(const char *val, const struct kernel_param *kp)
161 {
162 int tmo, res;
163
164 res = srp_parse_tmo(&tmo, val);
165 if (res)
166 goto out;
167
168 if (kp->arg == &srp_reconnect_delay)
169 res = srp_tmo_valid(tmo, srp_fast_io_fail_tmo,
170 srp_dev_loss_tmo);
171 else if (kp->arg == &srp_fast_io_fail_tmo)
172 res = srp_tmo_valid(srp_reconnect_delay, tmo, srp_dev_loss_tmo);
173 else
174 res = srp_tmo_valid(srp_reconnect_delay, srp_fast_io_fail_tmo,
175 tmo);
176 if (res)
177 goto out;
178 *(int *)kp->arg = tmo;
179
180 out:
181 return res;
182 }
183
184 static const struct kernel_param_ops srp_tmo_ops = {
185 .get = srp_tmo_get,
186 .set = srp_tmo_set,
187 };
188
189 static inline struct srp_target_port *host_to_target(struct Scsi_Host *host)
190 {
191 return (struct srp_target_port *) host->hostdata;
192 }
193
194 static const char *srp_target_info(struct Scsi_Host *host)
195 {
196 return host_to_target(host)->target_name;
197 }
198
199 static int srp_target_is_topspin(struct srp_target_port *target)
200 {
201 static const u8 topspin_oui[3] = { 0x00, 0x05, 0xad };
202 static const u8 cisco_oui[3] = { 0x00, 0x1b, 0x0d };
203
204 return topspin_workarounds &&
205 (!memcmp(&target->ioc_guid, topspin_oui, sizeof topspin_oui) ||
206 !memcmp(&target->ioc_guid, cisco_oui, sizeof cisco_oui));
207 }
208
209 static struct srp_iu *srp_alloc_iu(struct srp_host *host, size_t size,
210 gfp_t gfp_mask,
211 enum dma_data_direction direction)
212 {
213 struct srp_iu *iu;
214
215 iu = kmalloc(sizeof *iu, gfp_mask);
216 if (!iu)
217 goto out;
218
219 iu->buf = kzalloc(size, gfp_mask);
220 if (!iu->buf)
221 goto out_free_iu;
222
223 iu->dma = ib_dma_map_single(host->srp_dev->dev, iu->buf, size,
224 direction);
225 if (ib_dma_mapping_error(host->srp_dev->dev, iu->dma))
226 goto out_free_buf;
227
228 iu->size = size;
229 iu->direction = direction;
230
231 return iu;
232
233 out_free_buf:
234 kfree(iu->buf);
235 out_free_iu:
236 kfree(iu);
237 out:
238 return NULL;
239 }
240
241 static void srp_free_iu(struct srp_host *host, struct srp_iu *iu)
242 {
243 if (!iu)
244 return;
245
246 ib_dma_unmap_single(host->srp_dev->dev, iu->dma, iu->size,
247 iu->direction);
248 kfree(iu->buf);
249 kfree(iu);
250 }
251
252 static void srp_qp_event(struct ib_event *event, void *context)
253 {
254 pr_debug("QP event %s (%d)\n",
255 ib_event_msg(event->event), event->event);
256 }
257
258 static int srp_init_qp(struct srp_target_port *target,
259 struct ib_qp *qp)
260 {
261 struct ib_qp_attr *attr;
262 int ret;
263
264 attr = kmalloc(sizeof *attr, GFP_KERNEL);
265 if (!attr)
266 return -ENOMEM;
267
268 ret = ib_find_cached_pkey(target->srp_host->srp_dev->dev,
269 target->srp_host->port,
270 be16_to_cpu(target->pkey),
271 &attr->pkey_index);
272 if (ret)
273 goto out;
274
275 attr->qp_state = IB_QPS_INIT;
276 attr->qp_access_flags = (IB_ACCESS_REMOTE_READ |
277 IB_ACCESS_REMOTE_WRITE);
278 attr->port_num = target->srp_host->port;
279
280 ret = ib_modify_qp(qp, attr,
281 IB_QP_STATE |
282 IB_QP_PKEY_INDEX |
283 IB_QP_ACCESS_FLAGS |
284 IB_QP_PORT);
285
286 out:
287 kfree(attr);
288 return ret;
289 }
290
291 static int srp_new_cm_id(struct srp_rdma_ch *ch)
292 {
293 struct srp_target_port *target = ch->target;
294 struct ib_cm_id *new_cm_id;
295
296 new_cm_id = ib_create_cm_id(target->srp_host->srp_dev->dev,
297 srp_cm_handler, ch);
298 if (IS_ERR(new_cm_id))
299 return PTR_ERR(new_cm_id);
300
301 if (ch->cm_id)
302 ib_destroy_cm_id(ch->cm_id);
303 ch->cm_id = new_cm_id;
304 ch->path.sgid = target->sgid;
305 ch->path.dgid = target->orig_dgid;
306 ch->path.pkey = target->pkey;
307 ch->path.service_id = target->service_id;
308
309 return 0;
310 }
311
312 static struct ib_fmr_pool *srp_alloc_fmr_pool(struct srp_target_port *target)
313 {
314 struct srp_device *dev = target->srp_host->srp_dev;
315 struct ib_fmr_pool_param fmr_param;
316
317 memset(&fmr_param, 0, sizeof(fmr_param));
318 fmr_param.pool_size = target->scsi_host->can_queue;
319 fmr_param.dirty_watermark = fmr_param.pool_size / 4;
320 fmr_param.cache = 1;
321 fmr_param.max_pages_per_fmr = dev->max_pages_per_mr;
322 fmr_param.page_shift = ilog2(dev->mr_page_size);
323 fmr_param.access = (IB_ACCESS_LOCAL_WRITE |
324 IB_ACCESS_REMOTE_WRITE |
325 IB_ACCESS_REMOTE_READ);
326
327 return ib_create_fmr_pool(dev->pd, &fmr_param);
328 }
329
330 /**
331 * srp_destroy_fr_pool() - free the resources owned by a pool
332 * @pool: Fast registration pool to be destroyed.
333 */
334 static void srp_destroy_fr_pool(struct srp_fr_pool *pool)
335 {
336 int i;
337 struct srp_fr_desc *d;
338
339 if (!pool)
340 return;
341
342 for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) {
343 if (d->mr)
344 ib_dereg_mr(d->mr);
345 }
346 kfree(pool);
347 }
348
349 /**
350 * srp_create_fr_pool() - allocate and initialize a pool for fast registration
351 * @device: IB device to allocate fast registration descriptors for.
352 * @pd: Protection domain associated with the FR descriptors.
353 * @pool_size: Number of descriptors to allocate.
354 * @max_page_list_len: Maximum fast registration work request page list length.
355 */
356 static struct srp_fr_pool *srp_create_fr_pool(struct ib_device *device,
357 struct ib_pd *pd, int pool_size,
358 int max_page_list_len)
359 {
360 struct srp_fr_pool *pool;
361 struct srp_fr_desc *d;
362 struct ib_mr *mr;
363 int i, ret = -EINVAL;
364
365 if (pool_size <= 0)
366 goto err;
367 ret = -ENOMEM;
368 pool = kzalloc(sizeof(struct srp_fr_pool) +
369 pool_size * sizeof(struct srp_fr_desc), GFP_KERNEL);
370 if (!pool)
371 goto err;
372 pool->size = pool_size;
373 pool->max_page_list_len = max_page_list_len;
374 spin_lock_init(&pool->lock);
375 INIT_LIST_HEAD(&pool->free_list);
376
377 for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) {
378 mr = ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG,
379 max_page_list_len);
380 if (IS_ERR(mr)) {
381 ret = PTR_ERR(mr);
382 goto destroy_pool;
383 }
384 d->mr = mr;
385 list_add_tail(&d->entry, &pool->free_list);
386 }
387
388 out:
389 return pool;
390
391 destroy_pool:
392 srp_destroy_fr_pool(pool);
393
394 err:
395 pool = ERR_PTR(ret);
396 goto out;
397 }
398
399 /**
400 * srp_fr_pool_get() - obtain a descriptor suitable for fast registration
401 * @pool: Pool to obtain descriptor from.
402 */
403 static struct srp_fr_desc *srp_fr_pool_get(struct srp_fr_pool *pool)
404 {
405 struct srp_fr_desc *d = NULL;
406 unsigned long flags;
407
408 spin_lock_irqsave(&pool->lock, flags);
409 if (!list_empty(&pool->free_list)) {
410 d = list_first_entry(&pool->free_list, typeof(*d), entry);
411 list_del(&d->entry);
412 }
413 spin_unlock_irqrestore(&pool->lock, flags);
414
415 return d;
416 }
417
418 /**
419 * srp_fr_pool_put() - put an FR descriptor back in the free list
420 * @pool: Pool the descriptor was allocated from.
421 * @desc: Pointer to an array of fast registration descriptor pointers.
422 * @n: Number of descriptors to put back.
423 *
424 * Note: The caller must already have queued an invalidation request for
425 * desc->mr->rkey before calling this function.
426 */
427 static void srp_fr_pool_put(struct srp_fr_pool *pool, struct srp_fr_desc **desc,
428 int n)
429 {
430 unsigned long flags;
431 int i;
432
433 spin_lock_irqsave(&pool->lock, flags);
434 for (i = 0; i < n; i++)
435 list_add(&desc[i]->entry, &pool->free_list);
436 spin_unlock_irqrestore(&pool->lock, flags);
437 }
438
439 static struct srp_fr_pool *srp_alloc_fr_pool(struct srp_target_port *target)
440 {
441 struct srp_device *dev = target->srp_host->srp_dev;
442
443 return srp_create_fr_pool(dev->dev, dev->pd,
444 target->scsi_host->can_queue,
445 dev->max_pages_per_mr);
446 }
447
448 /**
449 * srp_destroy_qp() - destroy an RDMA queue pair
450 * @ch: SRP RDMA channel.
451 *
452 * Change a queue pair into the error state and wait until all receive
453 * completions have been processed before destroying it. This avoids that
454 * the receive completion handler can access the queue pair while it is
455 * being destroyed.
456 */
457 static void srp_destroy_qp(struct srp_rdma_ch *ch)
458 {
459 static struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR };
460 static struct ib_recv_wr wr = { .wr_id = SRP_LAST_WR_ID };
461 struct ib_recv_wr *bad_wr;
462 int ret;
463
464 /* Destroying a QP and reusing ch->done is only safe if not connected */
465 WARN_ON_ONCE(ch->connected);
466
467 ret = ib_modify_qp(ch->qp, &attr, IB_QP_STATE);
468 WARN_ONCE(ret, "ib_cm_init_qp_attr() returned %d\n", ret);
469 if (ret)
470 goto out;
471
472 init_completion(&ch->done);
473 ret = ib_post_recv(ch->qp, &wr, &bad_wr);
474 WARN_ONCE(ret, "ib_post_recv() returned %d\n", ret);
475 if (ret == 0)
476 wait_for_completion(&ch->done);
477
478 out:
479 ib_destroy_qp(ch->qp);
480 }
481
482 static int srp_create_ch_ib(struct srp_rdma_ch *ch)
483 {
484 struct srp_target_port *target = ch->target;
485 struct srp_device *dev = target->srp_host->srp_dev;
486 struct ib_qp_init_attr *init_attr;
487 struct ib_cq *recv_cq, *send_cq;
488 struct ib_qp *qp;
489 struct ib_fmr_pool *fmr_pool = NULL;
490 struct srp_fr_pool *fr_pool = NULL;
491 const int m = 1 + dev->use_fast_reg;
492 struct ib_cq_init_attr cq_attr = {};
493 int ret;
494
495 init_attr = kzalloc(sizeof *init_attr, GFP_KERNEL);
496 if (!init_attr)
497 return -ENOMEM;
498
499 /* + 1 for SRP_LAST_WR_ID */
500 cq_attr.cqe = target->queue_size + 1;
501 cq_attr.comp_vector = ch->comp_vector;
502 recv_cq = ib_create_cq(dev->dev, srp_recv_completion, NULL, ch,
503 &cq_attr);
504 if (IS_ERR(recv_cq)) {
505 ret = PTR_ERR(recv_cq);
506 goto err;
507 }
508
509 cq_attr.cqe = m * target->queue_size;
510 cq_attr.comp_vector = ch->comp_vector;
511 send_cq = ib_create_cq(dev->dev, srp_send_completion, NULL, ch,
512 &cq_attr);
513 if (IS_ERR(send_cq)) {
514 ret = PTR_ERR(send_cq);
515 goto err_recv_cq;
516 }
517
518 ib_req_notify_cq(recv_cq, IB_CQ_NEXT_COMP);
519
520 init_attr->event_handler = srp_qp_event;
521 init_attr->cap.max_send_wr = m * target->queue_size;
522 init_attr->cap.max_recv_wr = target->queue_size + 1;
523 init_attr->cap.max_recv_sge = 1;
524 init_attr->cap.max_send_sge = 1;
525 init_attr->sq_sig_type = IB_SIGNAL_REQ_WR;
526 init_attr->qp_type = IB_QPT_RC;
527 init_attr->send_cq = send_cq;
528 init_attr->recv_cq = recv_cq;
529
530 qp = ib_create_qp(dev->pd, init_attr);
531 if (IS_ERR(qp)) {
532 ret = PTR_ERR(qp);
533 goto err_send_cq;
534 }
535
536 ret = srp_init_qp(target, qp);
537 if (ret)
538 goto err_qp;
539
540 if (dev->use_fast_reg) {
541 fr_pool = srp_alloc_fr_pool(target);
542 if (IS_ERR(fr_pool)) {
543 ret = PTR_ERR(fr_pool);
544 shost_printk(KERN_WARNING, target->scsi_host, PFX
545 "FR pool allocation failed (%d)\n", ret);
546 goto err_qp;
547 }
548 } else if (dev->use_fmr) {
549 fmr_pool = srp_alloc_fmr_pool(target);
550 if (IS_ERR(fmr_pool)) {
551 ret = PTR_ERR(fmr_pool);
552 shost_printk(KERN_WARNING, target->scsi_host, PFX
553 "FMR pool allocation failed (%d)\n", ret);
554 goto err_qp;
555 }
556 }
557
558 if (ch->qp)
559 srp_destroy_qp(ch);
560 if (ch->recv_cq)
561 ib_destroy_cq(ch->recv_cq);
562 if (ch->send_cq)
563 ib_destroy_cq(ch->send_cq);
564
565 ch->qp = qp;
566 ch->recv_cq = recv_cq;
567 ch->send_cq = send_cq;
568
569 if (dev->use_fast_reg) {
570 if (ch->fr_pool)
571 srp_destroy_fr_pool(ch->fr_pool);
572 ch->fr_pool = fr_pool;
573 } else if (dev->use_fmr) {
574 if (ch->fmr_pool)
575 ib_destroy_fmr_pool(ch->fmr_pool);
576 ch->fmr_pool = fmr_pool;
577 }
578
579 kfree(init_attr);
580 return 0;
581
582 err_qp:
583 ib_destroy_qp(qp);
584
585 err_send_cq:
586 ib_destroy_cq(send_cq);
587
588 err_recv_cq:
589 ib_destroy_cq(recv_cq);
590
591 err:
592 kfree(init_attr);
593 return ret;
594 }
595
596 /*
597 * Note: this function may be called without srp_alloc_iu_bufs() having been
598 * invoked. Hence the ch->[rt]x_ring checks.
599 */
600 static void srp_free_ch_ib(struct srp_target_port *target,
601 struct srp_rdma_ch *ch)
602 {
603 struct srp_device *dev = target->srp_host->srp_dev;
604 int i;
605
606 if (!ch->target)
607 return;
608
609 if (ch->cm_id) {
610 ib_destroy_cm_id(ch->cm_id);
611 ch->cm_id = NULL;
612 }
613
614 /* If srp_new_cm_id() succeeded but srp_create_ch_ib() not, return. */
615 if (!ch->qp)
616 return;
617
618 if (dev->use_fast_reg) {
619 if (ch->fr_pool)
620 srp_destroy_fr_pool(ch->fr_pool);
621 } else if (dev->use_fmr) {
622 if (ch->fmr_pool)
623 ib_destroy_fmr_pool(ch->fmr_pool);
624 }
625 srp_destroy_qp(ch);
626 ib_destroy_cq(ch->send_cq);
627 ib_destroy_cq(ch->recv_cq);
628
629 /*
630 * Avoid that the SCSI error handler tries to use this channel after
631 * it has been freed. The SCSI error handler can namely continue
632 * trying to perform recovery actions after scsi_remove_host()
633 * returned.
634 */
635 ch->target = NULL;
636
637 ch->qp = NULL;
638 ch->send_cq = ch->recv_cq = NULL;
639
640 if (ch->rx_ring) {
641 for (i = 0; i < target->queue_size; ++i)
642 srp_free_iu(target->srp_host, ch->rx_ring[i]);
643 kfree(ch->rx_ring);
644 ch->rx_ring = NULL;
645 }
646 if (ch->tx_ring) {
647 for (i = 0; i < target->queue_size; ++i)
648 srp_free_iu(target->srp_host, ch->tx_ring[i]);
649 kfree(ch->tx_ring);
650 ch->tx_ring = NULL;
651 }
652 }
653
654 static void srp_path_rec_completion(int status,
655 struct ib_sa_path_rec *pathrec,
656 void *ch_ptr)
657 {
658 struct srp_rdma_ch *ch = ch_ptr;
659 struct srp_target_port *target = ch->target;
660
661 ch->status = status;
662 if (status)
663 shost_printk(KERN_ERR, target->scsi_host,
664 PFX "Got failed path rec status %d\n", status);
665 else
666 ch->path = *pathrec;
667 complete(&ch->done);
668 }
669
670 static int srp_lookup_path(struct srp_rdma_ch *ch)
671 {
672 struct srp_target_port *target = ch->target;
673 int ret;
674
675 ch->path.numb_path = 1;
676
677 init_completion(&ch->done);
678
679 ch->path_query_id = ib_sa_path_rec_get(&srp_sa_client,
680 target->srp_host->srp_dev->dev,
681 target->srp_host->port,
682 &ch->path,
683 IB_SA_PATH_REC_SERVICE_ID |
684 IB_SA_PATH_REC_DGID |
685 IB_SA_PATH_REC_SGID |
686 IB_SA_PATH_REC_NUMB_PATH |
687 IB_SA_PATH_REC_PKEY,
688 SRP_PATH_REC_TIMEOUT_MS,
689 GFP_KERNEL,
690 srp_path_rec_completion,
691 ch, &ch->path_query);
692 if (ch->path_query_id < 0)
693 return ch->path_query_id;
694
695 ret = wait_for_completion_interruptible(&ch->done);
696 if (ret < 0)
697 return ret;
698
699 if (ch->status < 0)
700 shost_printk(KERN_WARNING, target->scsi_host,
701 PFX "Path record query failed\n");
702
703 return ch->status;
704 }
705
706 static int srp_send_req(struct srp_rdma_ch *ch, bool multich)
707 {
708 struct srp_target_port *target = ch->target;
709 struct {
710 struct ib_cm_req_param param;
711 struct srp_login_req priv;
712 } *req = NULL;
713 int status;
714
715 req = kzalloc(sizeof *req, GFP_KERNEL);
716 if (!req)
717 return -ENOMEM;
718
719 req->param.primary_path = &ch->path;
720 req->param.alternate_path = NULL;
721 req->param.service_id = target->service_id;
722 req->param.qp_num = ch->qp->qp_num;
723 req->param.qp_type = ch->qp->qp_type;
724 req->param.private_data = &req->priv;
725 req->param.private_data_len = sizeof req->priv;
726 req->param.flow_control = 1;
727
728 get_random_bytes(&req->param.starting_psn, 4);
729 req->param.starting_psn &= 0xffffff;
730
731 /*
732 * Pick some arbitrary defaults here; we could make these
733 * module parameters if anyone cared about setting them.
734 */
735 req->param.responder_resources = 4;
736 req->param.remote_cm_response_timeout = 20;
737 req->param.local_cm_response_timeout = 20;
738 req->param.retry_count = target->tl_retry_count;
739 req->param.rnr_retry_count = 7;
740 req->param.max_cm_retries = 15;
741
742 req->priv.opcode = SRP_LOGIN_REQ;
743 req->priv.tag = 0;
744 req->priv.req_it_iu_len = cpu_to_be32(target->max_iu_len);
745 req->priv.req_buf_fmt = cpu_to_be16(SRP_BUF_FORMAT_DIRECT |
746 SRP_BUF_FORMAT_INDIRECT);
747 req->priv.req_flags = (multich ? SRP_MULTICHAN_MULTI :
748 SRP_MULTICHAN_SINGLE);
749 /*
750 * In the published SRP specification (draft rev. 16a), the
751 * port identifier format is 8 bytes of ID extension followed
752 * by 8 bytes of GUID. Older drafts put the two halves in the
753 * opposite order, so that the GUID comes first.
754 *
755 * Targets conforming to these obsolete drafts can be
756 * recognized by the I/O Class they report.
757 */
758 if (target->io_class == SRP_REV10_IB_IO_CLASS) {
759 memcpy(req->priv.initiator_port_id,
760 &target->sgid.global.interface_id, 8);
761 memcpy(req->priv.initiator_port_id + 8,
762 &target->initiator_ext, 8);
763 memcpy(req->priv.target_port_id, &target->ioc_guid, 8);
764 memcpy(req->priv.target_port_id + 8, &target->id_ext, 8);
765 } else {
766 memcpy(req->priv.initiator_port_id,
767 &target->initiator_ext, 8);
768 memcpy(req->priv.initiator_port_id + 8,
769 &target->sgid.global.interface_id, 8);
770 memcpy(req->priv.target_port_id, &target->id_ext, 8);
771 memcpy(req->priv.target_port_id + 8, &target->ioc_guid, 8);
772 }
773
774 /*
775 * Topspin/Cisco SRP targets will reject our login unless we
776 * zero out the first 8 bytes of our initiator port ID and set
777 * the second 8 bytes to the local node GUID.
778 */
779 if (srp_target_is_topspin(target)) {
780 shost_printk(KERN_DEBUG, target->scsi_host,
781 PFX "Topspin/Cisco initiator port ID workaround "
782 "activated for target GUID %016llx\n",
783 be64_to_cpu(target->ioc_guid));
784 memset(req->priv.initiator_port_id, 0, 8);
785 memcpy(req->priv.initiator_port_id + 8,
786 &target->srp_host->srp_dev->dev->node_guid, 8);
787 }
788
789 status = ib_send_cm_req(ch->cm_id, &req->param);
790
791 kfree(req);
792
793 return status;
794 }
795
796 static bool srp_queue_remove_work(struct srp_target_port *target)
797 {
798 bool changed = false;
799
800 spin_lock_irq(&target->lock);
801 if (target->state != SRP_TARGET_REMOVED) {
802 target->state = SRP_TARGET_REMOVED;
803 changed = true;
804 }
805 spin_unlock_irq(&target->lock);
806
807 if (changed)
808 queue_work(srp_remove_wq, &target->remove_work);
809
810 return changed;
811 }
812
813 static void srp_disconnect_target(struct srp_target_port *target)
814 {
815 struct srp_rdma_ch *ch;
816 int i;
817
818 /* XXX should send SRP_I_LOGOUT request */
819
820 for (i = 0; i < target->ch_count; i++) {
821 ch = &target->ch[i];
822 ch->connected = false;
823 if (ch->cm_id && ib_send_cm_dreq(ch->cm_id, NULL, 0)) {
824 shost_printk(KERN_DEBUG, target->scsi_host,
825 PFX "Sending CM DREQ failed\n");
826 }
827 }
828 }
829
830 static void srp_free_req_data(struct srp_target_port *target,
831 struct srp_rdma_ch *ch)
832 {
833 struct srp_device *dev = target->srp_host->srp_dev;
834 struct ib_device *ibdev = dev->dev;
835 struct srp_request *req;
836 int i;
837
838 if (!ch->req_ring)
839 return;
840
841 for (i = 0; i < target->req_ring_size; ++i) {
842 req = &ch->req_ring[i];
843 if (dev->use_fast_reg) {
844 kfree(req->fr_list);
845 } else {
846 kfree(req->fmr_list);
847 kfree(req->map_page);
848 }
849 if (req->indirect_dma_addr) {
850 ib_dma_unmap_single(ibdev, req->indirect_dma_addr,
851 target->indirect_size,
852 DMA_TO_DEVICE);
853 }
854 kfree(req->indirect_desc);
855 }
856
857 kfree(ch->req_ring);
858 ch->req_ring = NULL;
859 }
860
861 static int srp_alloc_req_data(struct srp_rdma_ch *ch)
862 {
863 struct srp_target_port *target = ch->target;
864 struct srp_device *srp_dev = target->srp_host->srp_dev;
865 struct ib_device *ibdev = srp_dev->dev;
866 struct srp_request *req;
867 void *mr_list;
868 dma_addr_t dma_addr;
869 int i, ret = -ENOMEM;
870
871 ch->req_ring = kcalloc(target->req_ring_size, sizeof(*ch->req_ring),
872 GFP_KERNEL);
873 if (!ch->req_ring)
874 goto out;
875
876 for (i = 0; i < target->req_ring_size; ++i) {
877 req = &ch->req_ring[i];
878 mr_list = kmalloc(target->cmd_sg_cnt * sizeof(void *),
879 GFP_KERNEL);
880 if (!mr_list)
881 goto out;
882 if (srp_dev->use_fast_reg) {
883 req->fr_list = mr_list;
884 } else {
885 req->fmr_list = mr_list;
886 req->map_page = kmalloc(srp_dev->max_pages_per_mr *
887 sizeof(void *), GFP_KERNEL);
888 if (!req->map_page)
889 goto out;
890 }
891 req->indirect_desc = kmalloc(target->indirect_size, GFP_KERNEL);
892 if (!req->indirect_desc)
893 goto out;
894
895 dma_addr = ib_dma_map_single(ibdev, req->indirect_desc,
896 target->indirect_size,
897 DMA_TO_DEVICE);
898 if (ib_dma_mapping_error(ibdev, dma_addr))
899 goto out;
900
901 req->indirect_dma_addr = dma_addr;
902 }
903 ret = 0;
904
905 out:
906 return ret;
907 }
908
909 /**
910 * srp_del_scsi_host_attr() - Remove attributes defined in the host template.
911 * @shost: SCSI host whose attributes to remove from sysfs.
912 *
913 * Note: Any attributes defined in the host template and that did not exist
914 * before invocation of this function will be ignored.
915 */
916 static void srp_del_scsi_host_attr(struct Scsi_Host *shost)
917 {
918 struct device_attribute **attr;
919
920 for (attr = shost->hostt->shost_attrs; attr && *attr; ++attr)
921 device_remove_file(&shost->shost_dev, *attr);
922 }
923
924 static void srp_remove_target(struct srp_target_port *target)
925 {
926 struct srp_rdma_ch *ch;
927 int i;
928
929 WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED);
930
931 srp_del_scsi_host_attr(target->scsi_host);
932 srp_rport_get(target->rport);
933 srp_remove_host(target->scsi_host);
934 scsi_remove_host(target->scsi_host);
935 srp_stop_rport_timers(target->rport);
936 srp_disconnect_target(target);
937 for (i = 0; i < target->ch_count; i++) {
938 ch = &target->ch[i];
939 srp_free_ch_ib(target, ch);
940 }
941 cancel_work_sync(&target->tl_err_work);
942 srp_rport_put(target->rport);
943 for (i = 0; i < target->ch_count; i++) {
944 ch = &target->ch[i];
945 srp_free_req_data(target, ch);
946 }
947 kfree(target->ch);
948 target->ch = NULL;
949
950 spin_lock(&target->srp_host->target_lock);
951 list_del(&target->list);
952 spin_unlock(&target->srp_host->target_lock);
953
954 scsi_host_put(target->scsi_host);
955 }
956
957 static void srp_remove_work(struct work_struct *work)
958 {
959 struct srp_target_port *target =
960 container_of(work, struct srp_target_port, remove_work);
961
962 WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED);
963
964 srp_remove_target(target);
965 }
966
967 static void srp_rport_delete(struct srp_rport *rport)
968 {
969 struct srp_target_port *target = rport->lld_data;
970
971 srp_queue_remove_work(target);
972 }
973
974 /**
975 * srp_connected_ch() - number of connected channels
976 * @target: SRP target port.
977 */
978 static int srp_connected_ch(struct srp_target_port *target)
979 {
980 int i, c = 0;
981
982 for (i = 0; i < target->ch_count; i++)
983 c += target->ch[i].connected;
984
985 return c;
986 }
987
988 static int srp_connect_ch(struct srp_rdma_ch *ch, bool multich)
989 {
990 struct srp_target_port *target = ch->target;
991 int ret;
992
993 WARN_ON_ONCE(!multich && srp_connected_ch(target) > 0);
994
995 ret = srp_lookup_path(ch);
996 if (ret)
997 goto out;
998
999 while (1) {
1000 init_completion(&ch->done);
1001 ret = srp_send_req(ch, multich);
1002 if (ret)
1003 goto out;
1004 ret = wait_for_completion_interruptible(&ch->done);
1005 if (ret < 0)
1006 goto out;
1007
1008 /*
1009 * The CM event handling code will set status to
1010 * SRP_PORT_REDIRECT if we get a port redirect REJ
1011 * back, or SRP_DLID_REDIRECT if we get a lid/qp
1012 * redirect REJ back.
1013 */
1014 ret = ch->status;
1015 switch (ret) {
1016 case 0:
1017 ch->connected = true;
1018 goto out;
1019
1020 case SRP_PORT_REDIRECT:
1021 ret = srp_lookup_path(ch);
1022 if (ret)
1023 goto out;
1024 break;
1025
1026 case SRP_DLID_REDIRECT:
1027 break;
1028
1029 case SRP_STALE_CONN:
1030 shost_printk(KERN_ERR, target->scsi_host, PFX
1031 "giving up on stale connection\n");
1032 ret = -ECONNRESET;
1033 goto out;
1034
1035 default:
1036 goto out;
1037 }
1038 }
1039
1040 out:
1041 return ret <= 0 ? ret : -ENODEV;
1042 }
1043
1044 static int srp_inv_rkey(struct srp_rdma_ch *ch, u32 rkey)
1045 {
1046 struct ib_send_wr *bad_wr;
1047 struct ib_send_wr wr = {
1048 .opcode = IB_WR_LOCAL_INV,
1049 .wr_id = LOCAL_INV_WR_ID_MASK,
1050 .next = NULL,
1051 .num_sge = 0,
1052 .send_flags = 0,
1053 .ex.invalidate_rkey = rkey,
1054 };
1055
1056 return ib_post_send(ch->qp, &wr, &bad_wr);
1057 }
1058
1059 static void srp_unmap_data(struct scsi_cmnd *scmnd,
1060 struct srp_rdma_ch *ch,
1061 struct srp_request *req)
1062 {
1063 struct srp_target_port *target = ch->target;
1064 struct srp_device *dev = target->srp_host->srp_dev;
1065 struct ib_device *ibdev = dev->dev;
1066 int i, res;
1067
1068 if (!scsi_sglist(scmnd) ||
1069 (scmnd->sc_data_direction != DMA_TO_DEVICE &&
1070 scmnd->sc_data_direction != DMA_FROM_DEVICE))
1071 return;
1072
1073 if (dev->use_fast_reg) {
1074 struct srp_fr_desc **pfr;
1075
1076 for (i = req->nmdesc, pfr = req->fr_list; i > 0; i--, pfr++) {
1077 res = srp_inv_rkey(ch, (*pfr)->mr->rkey);
1078 if (res < 0) {
1079 shost_printk(KERN_ERR, target->scsi_host, PFX
1080 "Queueing INV WR for rkey %#x failed (%d)\n",
1081 (*pfr)->mr->rkey, res);
1082 queue_work(system_long_wq,
1083 &target->tl_err_work);
1084 }
1085 }
1086 if (req->nmdesc)
1087 srp_fr_pool_put(ch->fr_pool, req->fr_list,
1088 req->nmdesc);
1089 } else if (dev->use_fmr) {
1090 struct ib_pool_fmr **pfmr;
1091
1092 for (i = req->nmdesc, pfmr = req->fmr_list; i > 0; i--, pfmr++)
1093 ib_fmr_pool_unmap(*pfmr);
1094 }
1095
1096 ib_dma_unmap_sg(ibdev, scsi_sglist(scmnd), scsi_sg_count(scmnd),
1097 scmnd->sc_data_direction);
1098 }
1099
1100 /**
1101 * srp_claim_req - Take ownership of the scmnd associated with a request.
1102 * @ch: SRP RDMA channel.
1103 * @req: SRP request.
1104 * @sdev: If not NULL, only take ownership for this SCSI device.
1105 * @scmnd: If NULL, take ownership of @req->scmnd. If not NULL, only take
1106 * ownership of @req->scmnd if it equals @scmnd.
1107 *
1108 * Return value:
1109 * Either NULL or a pointer to the SCSI command the caller became owner of.
1110 */
1111 static struct scsi_cmnd *srp_claim_req(struct srp_rdma_ch *ch,
1112 struct srp_request *req,
1113 struct scsi_device *sdev,
1114 struct scsi_cmnd *scmnd)
1115 {
1116 unsigned long flags;
1117
1118 spin_lock_irqsave(&ch->lock, flags);
1119 if (req->scmnd &&
1120 (!sdev || req->scmnd->device == sdev) &&
1121 (!scmnd || req->scmnd == scmnd)) {
1122 scmnd = req->scmnd;
1123 req->scmnd = NULL;
1124 } else {
1125 scmnd = NULL;
1126 }
1127 spin_unlock_irqrestore(&ch->lock, flags);
1128
1129 return scmnd;
1130 }
1131
1132 /**
1133 * srp_free_req() - Unmap data and add request to the free request list.
1134 * @ch: SRP RDMA channel.
1135 * @req: Request to be freed.
1136 * @scmnd: SCSI command associated with @req.
1137 * @req_lim_delta: Amount to be added to @target->req_lim.
1138 */
1139 static void srp_free_req(struct srp_rdma_ch *ch, struct srp_request *req,
1140 struct scsi_cmnd *scmnd, s32 req_lim_delta)
1141 {
1142 unsigned long flags;
1143
1144 srp_unmap_data(scmnd, ch, req);
1145
1146 spin_lock_irqsave(&ch->lock, flags);
1147 ch->req_lim += req_lim_delta;
1148 spin_unlock_irqrestore(&ch->lock, flags);
1149 }
1150
1151 static void srp_finish_req(struct srp_rdma_ch *ch, struct srp_request *req,
1152 struct scsi_device *sdev, int result)
1153 {
1154 struct scsi_cmnd *scmnd = srp_claim_req(ch, req, sdev, NULL);
1155
1156 if (scmnd) {
1157 srp_free_req(ch, req, scmnd, 0);
1158 scmnd->result = result;
1159 scmnd->scsi_done(scmnd);
1160 }
1161 }
1162
1163 static void srp_terminate_io(struct srp_rport *rport)
1164 {
1165 struct srp_target_port *target = rport->lld_data;
1166 struct srp_rdma_ch *ch;
1167 struct Scsi_Host *shost = target->scsi_host;
1168 struct scsi_device *sdev;
1169 int i, j;
1170
1171 /*
1172 * Invoking srp_terminate_io() while srp_queuecommand() is running
1173 * is not safe. Hence the warning statement below.
1174 */
1175 shost_for_each_device(sdev, shost)
1176 WARN_ON_ONCE(sdev->request_queue->request_fn_active);
1177
1178 for (i = 0; i < target->ch_count; i++) {
1179 ch = &target->ch[i];
1180
1181 for (j = 0; j < target->req_ring_size; ++j) {
1182 struct srp_request *req = &ch->req_ring[j];
1183
1184 srp_finish_req(ch, req, NULL,
1185 DID_TRANSPORT_FAILFAST << 16);
1186 }
1187 }
1188 }
1189
1190 /*
1191 * It is up to the caller to ensure that srp_rport_reconnect() calls are
1192 * serialized and that no concurrent srp_queuecommand(), srp_abort(),
1193 * srp_reset_device() or srp_reset_host() calls will occur while this function
1194 * is in progress. One way to realize that is not to call this function
1195 * directly but to call srp_reconnect_rport() instead since that last function
1196 * serializes calls of this function via rport->mutex and also blocks
1197 * srp_queuecommand() calls before invoking this function.
1198 */
1199 static int srp_rport_reconnect(struct srp_rport *rport)
1200 {
1201 struct srp_target_port *target = rport->lld_data;
1202 struct srp_rdma_ch *ch;
1203 int i, j, ret = 0;
1204 bool multich = false;
1205
1206 srp_disconnect_target(target);
1207
1208 if (target->state == SRP_TARGET_SCANNING)
1209 return -ENODEV;
1210
1211 /*
1212 * Now get a new local CM ID so that we avoid confusing the target in
1213 * case things are really fouled up. Doing so also ensures that all CM
1214 * callbacks will have finished before a new QP is allocated.
1215 */
1216 for (i = 0; i < target->ch_count; i++) {
1217 ch = &target->ch[i];
1218 ret += srp_new_cm_id(ch);
1219 }
1220 for (i = 0; i < target->ch_count; i++) {
1221 ch = &target->ch[i];
1222 for (j = 0; j < target->req_ring_size; ++j) {
1223 struct srp_request *req = &ch->req_ring[j];
1224
1225 srp_finish_req(ch, req, NULL, DID_RESET << 16);
1226 }
1227 }
1228 for (i = 0; i < target->ch_count; i++) {
1229 ch = &target->ch[i];
1230 /*
1231 * Whether or not creating a new CM ID succeeded, create a new
1232 * QP. This guarantees that all completion callback function
1233 * invocations have finished before request resetting starts.
1234 */
1235 ret += srp_create_ch_ib(ch);
1236
1237 INIT_LIST_HEAD(&ch->free_tx);
1238 for (j = 0; j < target->queue_size; ++j)
1239 list_add(&ch->tx_ring[j]->list, &ch->free_tx);
1240 }
1241
1242 target->qp_in_error = false;
1243
1244 for (i = 0; i < target->ch_count; i++) {
1245 ch = &target->ch[i];
1246 if (ret)
1247 break;
1248 ret = srp_connect_ch(ch, multich);
1249 multich = true;
1250 }
1251
1252 if (ret == 0)
1253 shost_printk(KERN_INFO, target->scsi_host,
1254 PFX "reconnect succeeded\n");
1255
1256 return ret;
1257 }
1258
1259 static void srp_map_desc(struct srp_map_state *state, dma_addr_t dma_addr,
1260 unsigned int dma_len, u32 rkey)
1261 {
1262 struct srp_direct_buf *desc = state->desc;
1263
1264 WARN_ON_ONCE(!dma_len);
1265
1266 desc->va = cpu_to_be64(dma_addr);
1267 desc->key = cpu_to_be32(rkey);
1268 desc->len = cpu_to_be32(dma_len);
1269
1270 state->total_len += dma_len;
1271 state->desc++;
1272 state->ndesc++;
1273 }
1274
1275 static int srp_map_finish_fmr(struct srp_map_state *state,
1276 struct srp_rdma_ch *ch)
1277 {
1278 struct srp_target_port *target = ch->target;
1279 struct srp_device *dev = target->srp_host->srp_dev;
1280 struct ib_pool_fmr *fmr;
1281 u64 io_addr = 0;
1282
1283 if (state->fmr.next >= state->fmr.end)
1284 return -ENOMEM;
1285
1286 WARN_ON_ONCE(!dev->use_fmr);
1287
1288 if (state->npages == 0)
1289 return 0;
1290
1291 if (state->npages == 1 && target->global_mr) {
1292 srp_map_desc(state, state->base_dma_addr, state->dma_len,
1293 target->global_mr->rkey);
1294 goto reset_state;
1295 }
1296
1297 fmr = ib_fmr_pool_map_phys(ch->fmr_pool, state->pages,
1298 state->npages, io_addr);
1299 if (IS_ERR(fmr))
1300 return PTR_ERR(fmr);
1301
1302 *state->fmr.next++ = fmr;
1303 state->nmdesc++;
1304
1305 srp_map_desc(state, state->base_dma_addr & ~dev->mr_page_mask,
1306 state->dma_len, fmr->fmr->rkey);
1307
1308 reset_state:
1309 state->npages = 0;
1310 state->dma_len = 0;
1311
1312 return 0;
1313 }
1314
1315 static int srp_map_finish_fr(struct srp_map_state *state,
1316 struct srp_rdma_ch *ch)
1317 {
1318 struct srp_target_port *target = ch->target;
1319 struct srp_device *dev = target->srp_host->srp_dev;
1320 struct ib_send_wr *bad_wr;
1321 struct ib_reg_wr wr;
1322 struct srp_fr_desc *desc;
1323 u32 rkey;
1324 int n, err;
1325
1326 if (state->fr.next >= state->fr.end)
1327 return -ENOMEM;
1328
1329 WARN_ON_ONCE(!dev->use_fast_reg);
1330
1331 if (state->sg_nents == 0)
1332 return 0;
1333
1334 if (state->sg_nents == 1 && target->global_mr) {
1335 srp_map_desc(state, sg_dma_address(state->sg),
1336 sg_dma_len(state->sg),
1337 target->global_mr->rkey);
1338 return 1;
1339 }
1340
1341 desc = srp_fr_pool_get(ch->fr_pool);
1342 if (!desc)
1343 return -ENOMEM;
1344
1345 rkey = ib_inc_rkey(desc->mr->rkey);
1346 ib_update_fast_reg_key(desc->mr, rkey);
1347
1348 n = ib_map_mr_sg(desc->mr, state->sg, state->sg_nents,
1349 dev->mr_page_size);
1350 if (unlikely(n < 0))
1351 return n;
1352
1353 wr.wr.next = NULL;
1354 wr.wr.opcode = IB_WR_REG_MR;
1355 wr.wr.wr_id = FAST_REG_WR_ID_MASK;
1356 wr.wr.num_sge = 0;
1357 wr.wr.send_flags = 0;
1358 wr.mr = desc->mr;
1359 wr.key = desc->mr->rkey;
1360 wr.access = (IB_ACCESS_LOCAL_WRITE |
1361 IB_ACCESS_REMOTE_READ |
1362 IB_ACCESS_REMOTE_WRITE);
1363
1364 *state->fr.next++ = desc;
1365 state->nmdesc++;
1366
1367 srp_map_desc(state, desc->mr->iova,
1368 desc->mr->length, desc->mr->rkey);
1369
1370 err = ib_post_send(ch->qp, &wr.wr, &bad_wr);
1371 if (unlikely(err))
1372 return err;
1373
1374 return n;
1375 }
1376
1377 static int srp_map_sg_entry(struct srp_map_state *state,
1378 struct srp_rdma_ch *ch,
1379 struct scatterlist *sg, int sg_index)
1380 {
1381 struct srp_target_port *target = ch->target;
1382 struct srp_device *dev = target->srp_host->srp_dev;
1383 struct ib_device *ibdev = dev->dev;
1384 dma_addr_t dma_addr = ib_sg_dma_address(ibdev, sg);
1385 unsigned int dma_len = ib_sg_dma_len(ibdev, sg);
1386 unsigned int len = 0;
1387 int ret;
1388
1389 WARN_ON_ONCE(!dma_len);
1390
1391 while (dma_len) {
1392 unsigned offset = dma_addr & ~dev->mr_page_mask;
1393 if (state->npages == dev->max_pages_per_mr || offset != 0) {
1394 ret = srp_map_finish_fmr(state, ch);
1395 if (ret)
1396 return ret;
1397 }
1398
1399 len = min_t(unsigned int, dma_len, dev->mr_page_size - offset);
1400
1401 if (!state->npages)
1402 state->base_dma_addr = dma_addr;
1403 state->pages[state->npages++] = dma_addr & dev->mr_page_mask;
1404 state->dma_len += len;
1405 dma_addr += len;
1406 dma_len -= len;
1407 }
1408
1409 /*
1410 * If the last entry of the MR wasn't a full page, then we need to
1411 * close it out and start a new one -- we can only merge at page
1412 * boundries.
1413 */
1414 ret = 0;
1415 if (len != dev->mr_page_size)
1416 ret = srp_map_finish_fmr(state, ch);
1417 return ret;
1418 }
1419
1420 static int srp_map_sg_fmr(struct srp_map_state *state, struct srp_rdma_ch *ch,
1421 struct srp_request *req, struct scatterlist *scat,
1422 int count)
1423 {
1424 struct scatterlist *sg;
1425 int i, ret;
1426
1427 state->desc = req->indirect_desc;
1428 state->pages = req->map_page;
1429 state->fmr.next = req->fmr_list;
1430 state->fmr.end = req->fmr_list + ch->target->cmd_sg_cnt;
1431
1432 for_each_sg(scat, sg, count, i) {
1433 ret = srp_map_sg_entry(state, ch, sg, i);
1434 if (ret)
1435 return ret;
1436 }
1437
1438 ret = srp_map_finish_fmr(state, ch);
1439 if (ret)
1440 return ret;
1441
1442 req->nmdesc = state->nmdesc;
1443
1444 return 0;
1445 }
1446
1447 static int srp_map_sg_fr(struct srp_map_state *state, struct srp_rdma_ch *ch,
1448 struct srp_request *req, struct scatterlist *scat,
1449 int count)
1450 {
1451 state->desc = req->indirect_desc;
1452 state->fr.next = req->fr_list;
1453 state->fr.end = req->fr_list + ch->target->cmd_sg_cnt;
1454 state->sg = scat;
1455 state->sg_nents = scsi_sg_count(req->scmnd);
1456
1457 while (state->sg_nents) {
1458 int i, n;
1459
1460 n = srp_map_finish_fr(state, ch);
1461 if (unlikely(n < 0))
1462 return n;
1463
1464 state->sg_nents -= n;
1465 for (i = 0; i < n; i++)
1466 state->sg = sg_next(state->sg);
1467 }
1468
1469 req->nmdesc = state->nmdesc;
1470
1471 return 0;
1472 }
1473
1474 static int srp_map_sg_dma(struct srp_map_state *state, struct srp_rdma_ch *ch,
1475 struct srp_request *req, struct scatterlist *scat,
1476 int count)
1477 {
1478 struct srp_target_port *target = ch->target;
1479 struct srp_device *dev = target->srp_host->srp_dev;
1480 struct scatterlist *sg;
1481 int i;
1482
1483 state->desc = req->indirect_desc;
1484 for_each_sg(scat, sg, count, i) {
1485 srp_map_desc(state, ib_sg_dma_address(dev->dev, sg),
1486 ib_sg_dma_len(dev->dev, sg),
1487 target->global_mr->rkey);
1488 }
1489
1490 req->nmdesc = state->nmdesc;
1491
1492 return 0;
1493 }
1494
1495 /*
1496 * Register the indirect data buffer descriptor with the HCA.
1497 *
1498 * Note: since the indirect data buffer descriptor has been allocated with
1499 * kmalloc() it is guaranteed that this buffer is a physically contiguous
1500 * memory buffer.
1501 */
1502 static int srp_map_idb(struct srp_rdma_ch *ch, struct srp_request *req,
1503 void **next_mr, void **end_mr, u32 idb_len,
1504 __be32 *idb_rkey)
1505 {
1506 struct srp_target_port *target = ch->target;
1507 struct srp_device *dev = target->srp_host->srp_dev;
1508 struct srp_map_state state;
1509 struct srp_direct_buf idb_desc;
1510 u64 idb_pages[1];
1511 struct scatterlist idb_sg[1];
1512 int ret;
1513
1514 memset(&state, 0, sizeof(state));
1515 memset(&idb_desc, 0, sizeof(idb_desc));
1516 state.gen.next = next_mr;
1517 state.gen.end = end_mr;
1518 state.desc = &idb_desc;
1519 state.base_dma_addr = req->indirect_dma_addr;
1520 state.dma_len = idb_len;
1521
1522 if (dev->use_fast_reg) {
1523 state.sg = idb_sg;
1524 state.sg_nents = 1;
1525 sg_set_buf(idb_sg, req->indirect_desc, idb_len);
1526 idb_sg->dma_address = req->indirect_dma_addr; /* hack! */
1527 ret = srp_map_finish_fr(&state, ch);
1528 if (ret < 0)
1529 return ret;
1530 } else if (dev->use_fmr) {
1531 state.pages = idb_pages;
1532 state.pages[0] = (req->indirect_dma_addr &
1533 dev->mr_page_mask);
1534 state.npages = 1;
1535 ret = srp_map_finish_fmr(&state, ch);
1536 if (ret < 0)
1537 return ret;
1538 } else {
1539 return -EINVAL;
1540 }
1541
1542 *idb_rkey = idb_desc.key;
1543
1544 return 0;
1545 }
1546
1547 static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch,
1548 struct srp_request *req)
1549 {
1550 struct srp_target_port *target = ch->target;
1551 struct scatterlist *scat;
1552 struct srp_cmd *cmd = req->cmd->buf;
1553 int len, nents, count, ret;
1554 struct srp_device *dev;
1555 struct ib_device *ibdev;
1556 struct srp_map_state state;
1557 struct srp_indirect_buf *indirect_hdr;
1558 u32 idb_len, table_len;
1559 __be32 idb_rkey;
1560 u8 fmt;
1561
1562 if (!scsi_sglist(scmnd) || scmnd->sc_data_direction == DMA_NONE)
1563 return sizeof (struct srp_cmd);
1564
1565 if (scmnd->sc_data_direction != DMA_FROM_DEVICE &&
1566 scmnd->sc_data_direction != DMA_TO_DEVICE) {
1567 shost_printk(KERN_WARNING, target->scsi_host,
1568 PFX "Unhandled data direction %d\n",
1569 scmnd->sc_data_direction);
1570 return -EINVAL;
1571 }
1572
1573 nents = scsi_sg_count(scmnd);
1574 scat = scsi_sglist(scmnd);
1575
1576 dev = target->srp_host->srp_dev;
1577 ibdev = dev->dev;
1578
1579 count = ib_dma_map_sg(ibdev, scat, nents, scmnd->sc_data_direction);
1580 if (unlikely(count == 0))
1581 return -EIO;
1582
1583 fmt = SRP_DATA_DESC_DIRECT;
1584 len = sizeof (struct srp_cmd) + sizeof (struct srp_direct_buf);
1585
1586 if (count == 1 && target->global_mr) {
1587 /*
1588 * The midlayer only generated a single gather/scatter
1589 * entry, or DMA mapping coalesced everything to a
1590 * single entry. So a direct descriptor along with
1591 * the DMA MR suffices.
1592 */
1593 struct srp_direct_buf *buf = (void *) cmd->add_data;
1594
1595 buf->va = cpu_to_be64(ib_sg_dma_address(ibdev, scat));
1596 buf->key = cpu_to_be32(target->global_mr->rkey);
1597 buf->len = cpu_to_be32(ib_sg_dma_len(ibdev, scat));
1598
1599 req->nmdesc = 0;
1600 goto map_complete;
1601 }
1602
1603 /*
1604 * We have more than one scatter/gather entry, so build our indirect
1605 * descriptor table, trying to merge as many entries as we can.
1606 */
1607 indirect_hdr = (void *) cmd->add_data;
1608
1609 ib_dma_sync_single_for_cpu(ibdev, req->indirect_dma_addr,
1610 target->indirect_size, DMA_TO_DEVICE);
1611
1612 memset(&state, 0, sizeof(state));
1613 if (dev->use_fast_reg)
1614 srp_map_sg_fr(&state, ch, req, scat, count);
1615 else if (dev->use_fmr)
1616 srp_map_sg_fmr(&state, ch, req, scat, count);
1617 else
1618 srp_map_sg_dma(&state, ch, req, scat, count);
1619
1620 /* We've mapped the request, now pull as much of the indirect
1621 * descriptor table as we can into the command buffer. If this
1622 * target is not using an external indirect table, we are
1623 * guaranteed to fit into the command, as the SCSI layer won't
1624 * give us more S/G entries than we allow.
1625 */
1626 if (state.ndesc == 1) {
1627 /*
1628 * Memory registration collapsed the sg-list into one entry,
1629 * so use a direct descriptor.
1630 */
1631 struct srp_direct_buf *buf = (void *) cmd->add_data;
1632
1633 *buf = req->indirect_desc[0];
1634 goto map_complete;
1635 }
1636
1637 if (unlikely(target->cmd_sg_cnt < state.ndesc &&
1638 !target->allow_ext_sg)) {
1639 shost_printk(KERN_ERR, target->scsi_host,
1640 "Could not fit S/G list into SRP_CMD\n");
1641 return -EIO;
1642 }
1643
1644 count = min(state.ndesc, target->cmd_sg_cnt);
1645 table_len = state.ndesc * sizeof (struct srp_direct_buf);
1646 idb_len = sizeof(struct srp_indirect_buf) + table_len;
1647
1648 fmt = SRP_DATA_DESC_INDIRECT;
1649 len = sizeof(struct srp_cmd) + sizeof (struct srp_indirect_buf);
1650 len += count * sizeof (struct srp_direct_buf);
1651
1652 memcpy(indirect_hdr->desc_list, req->indirect_desc,
1653 count * sizeof (struct srp_direct_buf));
1654
1655 if (!target->global_mr) {
1656 ret = srp_map_idb(ch, req, state.gen.next, state.gen.end,
1657 idb_len, &idb_rkey);
1658 if (ret < 0)
1659 return ret;
1660 req->nmdesc++;
1661 } else {
1662 idb_rkey = target->global_mr->rkey;
1663 }
1664
1665 indirect_hdr->table_desc.va = cpu_to_be64(req->indirect_dma_addr);
1666 indirect_hdr->table_desc.key = idb_rkey;
1667 indirect_hdr->table_desc.len = cpu_to_be32(table_len);
1668 indirect_hdr->len = cpu_to_be32(state.total_len);
1669
1670 if (scmnd->sc_data_direction == DMA_TO_DEVICE)
1671 cmd->data_out_desc_cnt = count;
1672 else
1673 cmd->data_in_desc_cnt = count;
1674
1675 ib_dma_sync_single_for_device(ibdev, req->indirect_dma_addr, table_len,
1676 DMA_TO_DEVICE);
1677
1678 map_complete:
1679 if (scmnd->sc_data_direction == DMA_TO_DEVICE)
1680 cmd->buf_fmt = fmt << 4;
1681 else
1682 cmd->buf_fmt = fmt;
1683
1684 return len;
1685 }
1686
1687 /*
1688 * Return an IU and possible credit to the free pool
1689 */
1690 static void srp_put_tx_iu(struct srp_rdma_ch *ch, struct srp_iu *iu,
1691 enum srp_iu_type iu_type)
1692 {
1693 unsigned long flags;
1694
1695 spin_lock_irqsave(&ch->lock, flags);
1696 list_add(&iu->list, &ch->free_tx);
1697 if (iu_type != SRP_IU_RSP)
1698 ++ch->req_lim;
1699 spin_unlock_irqrestore(&ch->lock, flags);
1700 }
1701
1702 /*
1703 * Must be called with ch->lock held to protect req_lim and free_tx.
1704 * If IU is not sent, it must be returned using srp_put_tx_iu().
1705 *
1706 * Note:
1707 * An upper limit for the number of allocated information units for each
1708 * request type is:
1709 * - SRP_IU_CMD: SRP_CMD_SQ_SIZE, since the SCSI mid-layer never queues
1710 * more than Scsi_Host.can_queue requests.
1711 * - SRP_IU_TSK_MGMT: SRP_TSK_MGMT_SQ_SIZE.
1712 * - SRP_IU_RSP: 1, since a conforming SRP target never sends more than
1713 * one unanswered SRP request to an initiator.
1714 */
1715 static struct srp_iu *__srp_get_tx_iu(struct srp_rdma_ch *ch,
1716 enum srp_iu_type iu_type)
1717 {
1718 struct srp_target_port *target = ch->target;
1719 s32 rsv = (iu_type == SRP_IU_TSK_MGMT) ? 0 : SRP_TSK_MGMT_SQ_SIZE;
1720 struct srp_iu *iu;
1721
1722 srp_send_completion(ch->send_cq, ch);
1723
1724 if (list_empty(&ch->free_tx))
1725 return NULL;
1726
1727 /* Initiator responses to target requests do not consume credits */
1728 if (iu_type != SRP_IU_RSP) {
1729 if (ch->req_lim <= rsv) {
1730 ++target->zero_req_lim;
1731 return NULL;
1732 }
1733
1734 --ch->req_lim;
1735 }
1736
1737 iu = list_first_entry(&ch->free_tx, struct srp_iu, list);
1738 list_del(&iu->list);
1739 return iu;
1740 }
1741
1742 static int srp_post_send(struct srp_rdma_ch *ch, struct srp_iu *iu, int len)
1743 {
1744 struct srp_target_port *target = ch->target;
1745 struct ib_sge list;
1746 struct ib_send_wr wr, *bad_wr;
1747
1748 list.addr = iu->dma;
1749 list.length = len;
1750 list.lkey = target->lkey;
1751
1752 wr.next = NULL;
1753 wr.wr_id = (uintptr_t) iu;
1754 wr.sg_list = &list;
1755 wr.num_sge = 1;
1756 wr.opcode = IB_WR_SEND;
1757 wr.send_flags = IB_SEND_SIGNALED;
1758
1759 return ib_post_send(ch->qp, &wr, &bad_wr);
1760 }
1761
1762 static int srp_post_recv(struct srp_rdma_ch *ch, struct srp_iu *iu)
1763 {
1764 struct srp_target_port *target = ch->target;
1765 struct ib_recv_wr wr, *bad_wr;
1766 struct ib_sge list;
1767
1768 list.addr = iu->dma;
1769 list.length = iu->size;
1770 list.lkey = target->lkey;
1771
1772 wr.next = NULL;
1773 wr.wr_id = (uintptr_t) iu;
1774 wr.sg_list = &list;
1775 wr.num_sge = 1;
1776
1777 return ib_post_recv(ch->qp, &wr, &bad_wr);
1778 }
1779
1780 static void srp_process_rsp(struct srp_rdma_ch *ch, struct srp_rsp *rsp)
1781 {
1782 struct srp_target_port *target = ch->target;
1783 struct srp_request *req;
1784 struct scsi_cmnd *scmnd;
1785 unsigned long flags;
1786
1787 if (unlikely(rsp->tag & SRP_TAG_TSK_MGMT)) {
1788 spin_lock_irqsave(&ch->lock, flags);
1789 ch->req_lim += be32_to_cpu(rsp->req_lim_delta);
1790 spin_unlock_irqrestore(&ch->lock, flags);
1791
1792 ch->tsk_mgmt_status = -1;
1793 if (be32_to_cpu(rsp->resp_data_len) >= 4)
1794 ch->tsk_mgmt_status = rsp->data[3];
1795 complete(&ch->tsk_mgmt_done);
1796 } else {
1797 scmnd = scsi_host_find_tag(target->scsi_host, rsp->tag);
1798 if (scmnd) {
1799 req = (void *)scmnd->host_scribble;
1800 scmnd = srp_claim_req(ch, req, NULL, scmnd);
1801 }
1802 if (!scmnd) {
1803 shost_printk(KERN_ERR, target->scsi_host,
1804 "Null scmnd for RSP w/tag %#016llx received on ch %td / QP %#x\n",
1805 rsp->tag, ch - target->ch, ch->qp->qp_num);
1806
1807 spin_lock_irqsave(&ch->lock, flags);
1808 ch->req_lim += be32_to_cpu(rsp->req_lim_delta);
1809 spin_unlock_irqrestore(&ch->lock, flags);
1810
1811 return;
1812 }
1813 scmnd->result = rsp->status;
1814
1815 if (rsp->flags & SRP_RSP_FLAG_SNSVALID) {
1816 memcpy(scmnd->sense_buffer, rsp->data +
1817 be32_to_cpu(rsp->resp_data_len),
1818 min_t(int, be32_to_cpu(rsp->sense_data_len),
1819 SCSI_SENSE_BUFFERSIZE));
1820 }
1821
1822 if (unlikely(rsp->flags & SRP_RSP_FLAG_DIUNDER))
1823 scsi_set_resid(scmnd, be32_to_cpu(rsp->data_in_res_cnt));
1824 else if (unlikely(rsp->flags & SRP_RSP_FLAG_DIOVER))
1825 scsi_set_resid(scmnd, -be32_to_cpu(rsp->data_in_res_cnt));
1826 else if (unlikely(rsp->flags & SRP_RSP_FLAG_DOUNDER))
1827 scsi_set_resid(scmnd, be32_to_cpu(rsp->data_out_res_cnt));
1828 else if (unlikely(rsp->flags & SRP_RSP_FLAG_DOOVER))
1829 scsi_set_resid(scmnd, -be32_to_cpu(rsp->data_out_res_cnt));
1830
1831 srp_free_req(ch, req, scmnd,
1832 be32_to_cpu(rsp->req_lim_delta));
1833
1834 scmnd->host_scribble = NULL;
1835 scmnd->scsi_done(scmnd);
1836 }
1837 }
1838
1839 static int srp_response_common(struct srp_rdma_ch *ch, s32 req_delta,
1840 void *rsp, int len)
1841 {
1842 struct srp_target_port *target = ch->target;
1843 struct ib_device *dev = target->srp_host->srp_dev->dev;
1844 unsigned long flags;
1845 struct srp_iu *iu;
1846 int err;
1847
1848 spin_lock_irqsave(&ch->lock, flags);
1849 ch->req_lim += req_delta;
1850 iu = __srp_get_tx_iu(ch, SRP_IU_RSP);
1851 spin_unlock_irqrestore(&ch->lock, flags);
1852
1853 if (!iu) {
1854 shost_printk(KERN_ERR, target->scsi_host, PFX
1855 "no IU available to send response\n");
1856 return 1;
1857 }
1858
1859 ib_dma_sync_single_for_cpu(dev, iu->dma, len, DMA_TO_DEVICE);
1860 memcpy(iu->buf, rsp, len);
1861 ib_dma_sync_single_for_device(dev, iu->dma, len, DMA_TO_DEVICE);
1862
1863 err = srp_post_send(ch, iu, len);
1864 if (err) {
1865 shost_printk(KERN_ERR, target->scsi_host, PFX
1866 "unable to post response: %d\n", err);
1867 srp_put_tx_iu(ch, iu, SRP_IU_RSP);
1868 }
1869
1870 return err;
1871 }
1872
1873 static void srp_process_cred_req(struct srp_rdma_ch *ch,
1874 struct srp_cred_req *req)
1875 {
1876 struct srp_cred_rsp rsp = {
1877 .opcode = SRP_CRED_RSP,
1878 .tag = req->tag,
1879 };
1880 s32 delta = be32_to_cpu(req->req_lim_delta);
1881
1882 if (srp_response_common(ch, delta, &rsp, sizeof(rsp)))
1883 shost_printk(KERN_ERR, ch->target->scsi_host, PFX
1884 "problems processing SRP_CRED_REQ\n");
1885 }
1886
1887 static void srp_process_aer_req(struct srp_rdma_ch *ch,
1888 struct srp_aer_req *req)
1889 {
1890 struct srp_target_port *target = ch->target;
1891 struct srp_aer_rsp rsp = {
1892 .opcode = SRP_AER_RSP,
1893 .tag = req->tag,
1894 };
1895 s32 delta = be32_to_cpu(req->req_lim_delta);
1896
1897 shost_printk(KERN_ERR, target->scsi_host, PFX
1898 "ignoring AER for LUN %llu\n", scsilun_to_int(&req->lun));
1899
1900 if (srp_response_common(ch, delta, &rsp, sizeof(rsp)))
1901 shost_printk(KERN_ERR, target->scsi_host, PFX
1902 "problems processing SRP_AER_REQ\n");
1903 }
1904
1905 static void srp_handle_recv(struct srp_rdma_ch *ch, struct ib_wc *wc)
1906 {
1907 struct srp_target_port *target = ch->target;
1908 struct ib_device *dev = target->srp_host->srp_dev->dev;
1909 struct srp_iu *iu = (struct srp_iu *) (uintptr_t) wc->wr_id;
1910 int res;
1911 u8 opcode;
1912
1913 ib_dma_sync_single_for_cpu(dev, iu->dma, ch->max_ti_iu_len,
1914 DMA_FROM_DEVICE);
1915
1916 opcode = *(u8 *) iu->buf;
1917
1918 if (0) {
1919 shost_printk(KERN_ERR, target->scsi_host,
1920 PFX "recv completion, opcode 0x%02x\n", opcode);
1921 print_hex_dump(KERN_ERR, "", DUMP_PREFIX_OFFSET, 8, 1,
1922 iu->buf, wc->byte_len, true);
1923 }
1924
1925 switch (opcode) {
1926 case SRP_RSP:
1927 srp_process_rsp(ch, iu->buf);
1928 break;
1929
1930 case SRP_CRED_REQ:
1931 srp_process_cred_req(ch, iu->buf);
1932 break;
1933
1934 case SRP_AER_REQ:
1935 srp_process_aer_req(ch, iu->buf);
1936 break;
1937
1938 case SRP_T_LOGOUT:
1939 /* XXX Handle target logout */
1940 shost_printk(KERN_WARNING, target->scsi_host,
1941 PFX "Got target logout request\n");
1942 break;
1943
1944 default:
1945 shost_printk(KERN_WARNING, target->scsi_host,
1946 PFX "Unhandled SRP opcode 0x%02x\n", opcode);
1947 break;
1948 }
1949
1950 ib_dma_sync_single_for_device(dev, iu->dma, ch->max_ti_iu_len,
1951 DMA_FROM_DEVICE);
1952
1953 res = srp_post_recv(ch, iu);
1954 if (res != 0)
1955 shost_printk(KERN_ERR, target->scsi_host,
1956 PFX "Recv failed with error code %d\n", res);
1957 }
1958
1959 /**
1960 * srp_tl_err_work() - handle a transport layer error
1961 * @work: Work structure embedded in an SRP target port.
1962 *
1963 * Note: This function may get invoked before the rport has been created,
1964 * hence the target->rport test.
1965 */
1966 static void srp_tl_err_work(struct work_struct *work)
1967 {
1968 struct srp_target_port *target;
1969
1970 target = container_of(work, struct srp_target_port, tl_err_work);
1971 if (target->rport)
1972 srp_start_tl_fail_timers(target->rport);
1973 }
1974
1975 static void srp_handle_qp_err(u64 wr_id, enum ib_wc_status wc_status,
1976 bool send_err, struct srp_rdma_ch *ch)
1977 {
1978 struct srp_target_port *target = ch->target;
1979
1980 if (wr_id == SRP_LAST_WR_ID) {
1981 complete(&ch->done);
1982 return;
1983 }
1984
1985 if (ch->connected && !target->qp_in_error) {
1986 if (wr_id & LOCAL_INV_WR_ID_MASK) {
1987 shost_printk(KERN_ERR, target->scsi_host, PFX
1988 "LOCAL_INV failed with status %s (%d)\n",
1989 ib_wc_status_msg(wc_status), wc_status);
1990 } else if (wr_id & FAST_REG_WR_ID_MASK) {
1991 shost_printk(KERN_ERR, target->scsi_host, PFX
1992 "FAST_REG_MR failed status %s (%d)\n",
1993 ib_wc_status_msg(wc_status), wc_status);
1994 } else {
1995 shost_printk(KERN_ERR, target->scsi_host,
1996 PFX "failed %s status %s (%d) for iu %p\n",
1997 send_err ? "send" : "receive",
1998 ib_wc_status_msg(wc_status), wc_status,
1999 (void *)(uintptr_t)wr_id);
2000 }
2001 queue_work(system_long_wq, &target->tl_err_work);
2002 }
2003 target->qp_in_error = true;
2004 }
2005
2006 static void srp_recv_completion(struct ib_cq *cq, void *ch_ptr)
2007 {
2008 struct srp_rdma_ch *ch = ch_ptr;
2009 struct ib_wc wc;
2010
2011 ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
2012 while (ib_poll_cq(cq, 1, &wc) > 0) {
2013 if (likely(wc.status == IB_WC_SUCCESS)) {
2014 srp_handle_recv(ch, &wc);
2015 } else {
2016 srp_handle_qp_err(wc.wr_id, wc.status, false, ch);
2017 }
2018 }
2019 }
2020
2021 static void srp_send_completion(struct ib_cq *cq, void *ch_ptr)
2022 {
2023 struct srp_rdma_ch *ch = ch_ptr;
2024 struct ib_wc wc;
2025 struct srp_iu *iu;
2026
2027 while (ib_poll_cq(cq, 1, &wc) > 0) {
2028 if (likely(wc.status == IB_WC_SUCCESS)) {
2029 iu = (struct srp_iu *) (uintptr_t) wc.wr_id;
2030 list_add(&iu->list, &ch->free_tx);
2031 } else {
2032 srp_handle_qp_err(wc.wr_id, wc.status, true, ch);
2033 }
2034 }
2035 }
2036
2037 static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd)
2038 {
2039 struct srp_target_port *target = host_to_target(shost);
2040 struct srp_rport *rport = target->rport;
2041 struct srp_rdma_ch *ch;
2042 struct srp_request *req;
2043 struct srp_iu *iu;
2044 struct srp_cmd *cmd;
2045 struct ib_device *dev;
2046 unsigned long flags;
2047 u32 tag;
2048 u16 idx;
2049 int len, ret;
2050 const bool in_scsi_eh = !in_interrupt() && current == shost->ehandler;
2051
2052 /*
2053 * The SCSI EH thread is the only context from which srp_queuecommand()
2054 * can get invoked for blocked devices (SDEV_BLOCK /
2055 * SDEV_CREATED_BLOCK). Avoid racing with srp_reconnect_rport() by
2056 * locking the rport mutex if invoked from inside the SCSI EH.
2057 */
2058 if (in_scsi_eh)
2059 mutex_lock(&rport->mutex);
2060
2061 scmnd->result = srp_chkready(target->rport);
2062 if (unlikely(scmnd->result))
2063 goto err;
2064
2065 WARN_ON_ONCE(scmnd->request->tag < 0);
2066 tag = blk_mq_unique_tag(scmnd->request);
2067 ch = &target->ch[blk_mq_unique_tag_to_hwq(tag)];
2068 idx = blk_mq_unique_tag_to_tag(tag);
2069 WARN_ONCE(idx >= target->req_ring_size, "%s: tag %#x: idx %d >= %d\n",
2070 dev_name(&shost->shost_gendev), tag, idx,
2071 target->req_ring_size);
2072
2073 spin_lock_irqsave(&ch->lock, flags);
2074 iu = __srp_get_tx_iu(ch, SRP_IU_CMD);
2075 spin_unlock_irqrestore(&ch->lock, flags);
2076
2077 if (!iu)
2078 goto err;
2079
2080 req = &ch->req_ring[idx];
2081 dev = target->srp_host->srp_dev->dev;
2082 ib_dma_sync_single_for_cpu(dev, iu->dma, target->max_iu_len,
2083 DMA_TO_DEVICE);
2084
2085 scmnd->host_scribble = (void *) req;
2086
2087 cmd = iu->buf;
2088 memset(cmd, 0, sizeof *cmd);
2089
2090 cmd->opcode = SRP_CMD;
2091 int_to_scsilun(scmnd->device->lun, &cmd->lun);
2092 cmd->tag = tag;
2093 memcpy(cmd->cdb, scmnd->cmnd, scmnd->cmd_len);
2094
2095 req->scmnd = scmnd;
2096 req->cmd = iu;
2097
2098 len = srp_map_data(scmnd, ch, req);
2099 if (len < 0) {
2100 shost_printk(KERN_ERR, target->scsi_host,
2101 PFX "Failed to map data (%d)\n", len);
2102 /*
2103 * If we ran out of memory descriptors (-ENOMEM) because an
2104 * application is queuing many requests with more than
2105 * max_pages_per_mr sg-list elements, tell the SCSI mid-layer
2106 * to reduce queue depth temporarily.
2107 */
2108 scmnd->result = len == -ENOMEM ?
2109 DID_OK << 16 | QUEUE_FULL << 1 : DID_ERROR << 16;
2110 goto err_iu;
2111 }
2112
2113 ib_dma_sync_single_for_device(dev, iu->dma, target->max_iu_len,
2114 DMA_TO_DEVICE);
2115
2116 if (srp_post_send(ch, iu, len)) {
2117 shost_printk(KERN_ERR, target->scsi_host, PFX "Send failed\n");
2118 goto err_unmap;
2119 }
2120
2121 ret = 0;
2122
2123 unlock_rport:
2124 if (in_scsi_eh)
2125 mutex_unlock(&rport->mutex);
2126
2127 return ret;
2128
2129 err_unmap:
2130 srp_unmap_data(scmnd, ch, req);
2131
2132 err_iu:
2133 srp_put_tx_iu(ch, iu, SRP_IU_CMD);
2134
2135 /*
2136 * Avoid that the loops that iterate over the request ring can
2137 * encounter a dangling SCSI command pointer.
2138 */
2139 req->scmnd = NULL;
2140
2141 err:
2142 if (scmnd->result) {
2143 scmnd->scsi_done(scmnd);
2144 ret = 0;
2145 } else {
2146 ret = SCSI_MLQUEUE_HOST_BUSY;
2147 }
2148
2149 goto unlock_rport;
2150 }
2151
2152 /*
2153 * Note: the resources allocated in this function are freed in
2154 * srp_free_ch_ib().
2155 */
2156 static int srp_alloc_iu_bufs(struct srp_rdma_ch *ch)
2157 {
2158 struct srp_target_port *target = ch->target;
2159 int i;
2160
2161 ch->rx_ring = kcalloc(target->queue_size, sizeof(*ch->rx_ring),
2162 GFP_KERNEL);
2163 if (!ch->rx_ring)
2164 goto err_no_ring;
2165 ch->tx_ring = kcalloc(target->queue_size, sizeof(*ch->tx_ring),
2166 GFP_KERNEL);
2167 if (!ch->tx_ring)
2168 goto err_no_ring;
2169
2170 for (i = 0; i < target->queue_size; ++i) {
2171 ch->rx_ring[i] = srp_alloc_iu(target->srp_host,
2172 ch->max_ti_iu_len,
2173 GFP_KERNEL, DMA_FROM_DEVICE);
2174 if (!ch->rx_ring[i])
2175 goto err;
2176 }
2177
2178 for (i = 0; i < target->queue_size; ++i) {
2179 ch->tx_ring[i] = srp_alloc_iu(target->srp_host,
2180 target->max_iu_len,
2181 GFP_KERNEL, DMA_TO_DEVICE);
2182 if (!ch->tx_ring[i])
2183 goto err;
2184
2185 list_add(&ch->tx_ring[i]->list, &ch->free_tx);
2186 }
2187
2188 return 0;
2189
2190 err:
2191 for (i = 0; i < target->queue_size; ++i) {
2192 srp_free_iu(target->srp_host, ch->rx_ring[i]);
2193 srp_free_iu(target->srp_host, ch->tx_ring[i]);
2194 }
2195
2196
2197 err_no_ring:
2198 kfree(ch->tx_ring);
2199 ch->tx_ring = NULL;
2200 kfree(ch->rx_ring);
2201 ch->rx_ring = NULL;
2202
2203 return -ENOMEM;
2204 }
2205
2206 static uint32_t srp_compute_rq_tmo(struct ib_qp_attr *qp_attr, int attr_mask)
2207 {
2208 uint64_t T_tr_ns, max_compl_time_ms;
2209 uint32_t rq_tmo_jiffies;
2210
2211 /*
2212 * According to section 11.2.4.2 in the IBTA spec (Modify Queue Pair,
2213 * table 91), both the QP timeout and the retry count have to be set
2214 * for RC QP's during the RTR to RTS transition.
2215 */
2216 WARN_ON_ONCE((attr_mask & (IB_QP_TIMEOUT | IB_QP_RETRY_CNT)) !=
2217 (IB_QP_TIMEOUT | IB_QP_RETRY_CNT));
2218
2219 /*
2220 * Set target->rq_tmo_jiffies to one second more than the largest time
2221 * it can take before an error completion is generated. See also
2222 * C9-140..142 in the IBTA spec for more information about how to
2223 * convert the QP Local ACK Timeout value to nanoseconds.
2224 */
2225 T_tr_ns = 4096 * (1ULL << qp_attr->timeout);
2226 max_compl_time_ms = qp_attr->retry_cnt * 4 * T_tr_ns;
2227 do_div(max_compl_time_ms, NSEC_PER_MSEC);
2228 rq_tmo_jiffies = msecs_to_jiffies(max_compl_time_ms + 1000);
2229
2230 return rq_tmo_jiffies;
2231 }
2232
2233 static void srp_cm_rep_handler(struct ib_cm_id *cm_id,
2234 const struct srp_login_rsp *lrsp,
2235 struct srp_rdma_ch *ch)
2236 {
2237 struct srp_target_port *target = ch->target;
2238 struct ib_qp_attr *qp_attr = NULL;
2239 int attr_mask = 0;
2240 int ret;
2241 int i;
2242
2243 if (lrsp->opcode == SRP_LOGIN_RSP) {
2244 ch->max_ti_iu_len = be32_to_cpu(lrsp->max_ti_iu_len);
2245 ch->req_lim = be32_to_cpu(lrsp->req_lim_delta);
2246
2247 /*
2248 * Reserve credits for task management so we don't
2249 * bounce requests back to the SCSI mid-layer.
2250 */
2251 target->scsi_host->can_queue
2252 = min(ch->req_lim - SRP_TSK_MGMT_SQ_SIZE,
2253 target->scsi_host->can_queue);
2254 target->scsi_host->cmd_per_lun
2255 = min_t(int, target->scsi_host->can_queue,
2256 target->scsi_host->cmd_per_lun);
2257 } else {
2258 shost_printk(KERN_WARNING, target->scsi_host,
2259 PFX "Unhandled RSP opcode %#x\n", lrsp->opcode);
2260 ret = -ECONNRESET;
2261 goto error;
2262 }
2263
2264 if (!ch->rx_ring) {
2265 ret = srp_alloc_iu_bufs(ch);
2266 if (ret)
2267 goto error;
2268 }
2269
2270 ret = -ENOMEM;
2271 qp_attr = kmalloc(sizeof *qp_attr, GFP_KERNEL);
2272 if (!qp_attr)
2273 goto error;
2274
2275 qp_attr->qp_state = IB_QPS_RTR;
2276 ret = ib_cm_init_qp_attr(cm_id, qp_attr, &attr_mask);
2277 if (ret)
2278 goto error_free;
2279
2280 ret = ib_modify_qp(ch->qp, qp_attr, attr_mask);
2281 if (ret)
2282 goto error_free;
2283
2284 for (i = 0; i < target->queue_size; i++) {
2285 struct srp_iu *iu = ch->rx_ring[i];
2286
2287 ret = srp_post_recv(ch, iu);
2288 if (ret)
2289 goto error_free;
2290 }
2291
2292 qp_attr->qp_state = IB_QPS_RTS;
2293 ret = ib_cm_init_qp_attr(cm_id, qp_attr, &attr_mask);
2294 if (ret)
2295 goto error_free;
2296
2297 target->rq_tmo_jiffies = srp_compute_rq_tmo(qp_attr, attr_mask);
2298
2299 ret = ib_modify_qp(ch->qp, qp_attr, attr_mask);
2300 if (ret)
2301 goto error_free;
2302
2303 ret = ib_send_cm_rtu(cm_id, NULL, 0);
2304
2305 error_free:
2306 kfree(qp_attr);
2307
2308 error:
2309 ch->status = ret;
2310 }
2311
2312 static void srp_cm_rej_handler(struct ib_cm_id *cm_id,
2313 struct ib_cm_event *event,
2314 struct srp_rdma_ch *ch)
2315 {
2316 struct srp_target_port *target = ch->target;
2317 struct Scsi_Host *shost = target->scsi_host;
2318 struct ib_class_port_info *cpi;
2319 int opcode;
2320
2321 switch (event->param.rej_rcvd.reason) {
2322 case IB_CM_REJ_PORT_CM_REDIRECT:
2323 cpi = event->param.rej_rcvd.ari;
2324 ch->path.dlid = cpi->redirect_lid;
2325 ch->path.pkey = cpi->redirect_pkey;
2326 cm_id->remote_cm_qpn = be32_to_cpu(cpi->redirect_qp) & 0x00ffffff;
2327 memcpy(ch->path.dgid.raw, cpi->redirect_gid, 16);
2328
2329 ch->status = ch->path.dlid ?
2330 SRP_DLID_REDIRECT : SRP_PORT_REDIRECT;
2331 break;
2332
2333 case IB_CM_REJ_PORT_REDIRECT:
2334 if (srp_target_is_topspin(target)) {
2335 /*
2336 * Topspin/Cisco SRP gateways incorrectly send
2337 * reject reason code 25 when they mean 24
2338 * (port redirect).
2339 */
2340 memcpy(ch->path.dgid.raw,
2341 event->param.rej_rcvd.ari, 16);
2342
2343 shost_printk(KERN_DEBUG, shost,
2344 PFX "Topspin/Cisco redirect to target port GID %016llx%016llx\n",
2345 be64_to_cpu(ch->path.dgid.global.subnet_prefix),
2346 be64_to_cpu(ch->path.dgid.global.interface_id));
2347
2348 ch->status = SRP_PORT_REDIRECT;
2349 } else {
2350 shost_printk(KERN_WARNING, shost,
2351 " REJ reason: IB_CM_REJ_PORT_REDIRECT\n");
2352 ch->status = -ECONNRESET;
2353 }
2354 break;
2355
2356 case IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID:
2357 shost_printk(KERN_WARNING, shost,
2358 " REJ reason: IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID\n");
2359 ch->status = -ECONNRESET;
2360 break;
2361
2362 case IB_CM_REJ_CONSUMER_DEFINED:
2363 opcode = *(u8 *) event->private_data;
2364 if (opcode == SRP_LOGIN_REJ) {
2365 struct srp_login_rej *rej = event->private_data;
2366 u32 reason = be32_to_cpu(rej->reason);
2367
2368 if (reason == SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE)
2369 shost_printk(KERN_WARNING, shost,
2370 PFX "SRP_LOGIN_REJ: requested max_it_iu_len too large\n");
2371 else
2372 shost_printk(KERN_WARNING, shost, PFX
2373 "SRP LOGIN from %pI6 to %pI6 REJECTED, reason 0x%08x\n",
2374 target->sgid.raw,
2375 target->orig_dgid.raw, reason);
2376 } else
2377 shost_printk(KERN_WARNING, shost,
2378 " REJ reason: IB_CM_REJ_CONSUMER_DEFINED,"
2379 " opcode 0x%02x\n", opcode);
2380 ch->status = -ECONNRESET;
2381 break;
2382
2383 case IB_CM_REJ_STALE_CONN:
2384 shost_printk(KERN_WARNING, shost, " REJ reason: stale connection\n");
2385 ch->status = SRP_STALE_CONN;
2386 break;
2387
2388 default:
2389 shost_printk(KERN_WARNING, shost, " REJ reason 0x%x\n",
2390 event->param.rej_rcvd.reason);
2391 ch->status = -ECONNRESET;
2392 }
2393 }
2394
2395 static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
2396 {
2397 struct srp_rdma_ch *ch = cm_id->context;
2398 struct srp_target_port *target = ch->target;
2399 int comp = 0;
2400
2401 switch (event->event) {
2402 case IB_CM_REQ_ERROR:
2403 shost_printk(KERN_DEBUG, target->scsi_host,
2404 PFX "Sending CM REQ failed\n");
2405 comp = 1;
2406 ch->status = -ECONNRESET;
2407 break;
2408
2409 case IB_CM_REP_RECEIVED:
2410 comp = 1;
2411 srp_cm_rep_handler(cm_id, event->private_data, ch);
2412 break;
2413
2414 case IB_CM_REJ_RECEIVED:
2415 shost_printk(KERN_DEBUG, target->scsi_host, PFX "REJ received\n");
2416 comp = 1;
2417
2418 srp_cm_rej_handler(cm_id, event, ch);
2419 break;
2420
2421 case IB_CM_DREQ_RECEIVED:
2422 shost_printk(KERN_WARNING, target->scsi_host,
2423 PFX "DREQ received - connection closed\n");
2424 ch->connected = false;
2425 if (ib_send_cm_drep(cm_id, NULL, 0))
2426 shost_printk(KERN_ERR, target->scsi_host,
2427 PFX "Sending CM DREP failed\n");
2428 queue_work(system_long_wq, &target->tl_err_work);
2429 break;
2430
2431 case IB_CM_TIMEWAIT_EXIT:
2432 shost_printk(KERN_ERR, target->scsi_host,
2433 PFX "connection closed\n");
2434 comp = 1;
2435
2436 ch->status = 0;
2437 break;
2438
2439 case IB_CM_MRA_RECEIVED:
2440 case IB_CM_DREQ_ERROR:
2441 case IB_CM_DREP_RECEIVED:
2442 break;
2443
2444 default:
2445 shost_printk(KERN_WARNING, target->scsi_host,
2446 PFX "Unhandled CM event %d\n", event->event);
2447 break;
2448 }
2449
2450 if (comp)
2451 complete(&ch->done);
2452
2453 return 0;
2454 }
2455
2456 /**
2457 * srp_change_queue_depth - setting device queue depth
2458 * @sdev: scsi device struct
2459 * @qdepth: requested queue depth
2460 *
2461 * Returns queue depth.
2462 */
2463 static int
2464 srp_change_queue_depth(struct scsi_device *sdev, int qdepth)
2465 {
2466 if (!sdev->tagged_supported)
2467 qdepth = 1;
2468 return scsi_change_queue_depth(sdev, qdepth);
2469 }
2470
2471 static int srp_send_tsk_mgmt(struct srp_rdma_ch *ch, u64 req_tag, u64 lun,
2472 u8 func)
2473 {
2474 struct srp_target_port *target = ch->target;
2475 struct srp_rport *rport = target->rport;
2476 struct ib_device *dev = target->srp_host->srp_dev->dev;
2477 struct srp_iu *iu;
2478 struct srp_tsk_mgmt *tsk_mgmt;
2479
2480 if (!ch->connected || target->qp_in_error)
2481 return -1;
2482
2483 init_completion(&ch->tsk_mgmt_done);
2484
2485 /*
2486 * Lock the rport mutex to avoid that srp_create_ch_ib() is
2487 * invoked while a task management function is being sent.
2488 */
2489 mutex_lock(&rport->mutex);
2490 spin_lock_irq(&ch->lock);
2491 iu = __srp_get_tx_iu(ch, SRP_IU_TSK_MGMT);
2492 spin_unlock_irq(&ch->lock);
2493
2494 if (!iu) {
2495 mutex_unlock(&rport->mutex);
2496
2497 return -1;
2498 }
2499
2500 ib_dma_sync_single_for_cpu(dev, iu->dma, sizeof *tsk_mgmt,
2501 DMA_TO_DEVICE);
2502 tsk_mgmt = iu->buf;
2503 memset(tsk_mgmt, 0, sizeof *tsk_mgmt);
2504
2505 tsk_mgmt->opcode = SRP_TSK_MGMT;
2506 int_to_scsilun(lun, &tsk_mgmt->lun);
2507 tsk_mgmt->tag = req_tag | SRP_TAG_TSK_MGMT;
2508 tsk_mgmt->tsk_mgmt_func = func;
2509 tsk_mgmt->task_tag = req_tag;
2510
2511 ib_dma_sync_single_for_device(dev, iu->dma, sizeof *tsk_mgmt,
2512 DMA_TO_DEVICE);
2513 if (srp_post_send(ch, iu, sizeof(*tsk_mgmt))) {
2514 srp_put_tx_iu(ch, iu, SRP_IU_TSK_MGMT);
2515 mutex_unlock(&rport->mutex);
2516
2517 return -1;
2518 }
2519 mutex_unlock(&rport->mutex);
2520
2521 if (!wait_for_completion_timeout(&ch->tsk_mgmt_done,
2522 msecs_to_jiffies(SRP_ABORT_TIMEOUT_MS)))
2523 return -1;
2524
2525 return 0;
2526 }
2527
2528 static int srp_abort(struct scsi_cmnd *scmnd)
2529 {
2530 struct srp_target_port *target = host_to_target(scmnd->device->host);
2531 struct srp_request *req = (struct srp_request *) scmnd->host_scribble;
2532 u32 tag;
2533 u16 ch_idx;
2534 struct srp_rdma_ch *ch;
2535 int ret;
2536
2537 shost_printk(KERN_ERR, target->scsi_host, "SRP abort called\n");
2538
2539 if (!req)
2540 return SUCCESS;
2541 tag = blk_mq_unique_tag(scmnd->request);
2542 ch_idx = blk_mq_unique_tag_to_hwq(tag);
2543 if (WARN_ON_ONCE(ch_idx >= target->ch_count))
2544 return SUCCESS;
2545 ch = &target->ch[ch_idx];
2546 if (!srp_claim_req(ch, req, NULL, scmnd))
2547 return SUCCESS;
2548 shost_printk(KERN_ERR, target->scsi_host,
2549 "Sending SRP abort for tag %#x\n", tag);
2550 if (srp_send_tsk_mgmt(ch, tag, scmnd->device->lun,
2551 SRP_TSK_ABORT_TASK) == 0)
2552 ret = SUCCESS;
2553 else if (target->rport->state == SRP_RPORT_LOST)
2554 ret = FAST_IO_FAIL;
2555 else
2556 ret = FAILED;
2557 srp_free_req(ch, req, scmnd, 0);
2558 scmnd->result = DID_ABORT << 16;
2559 scmnd->scsi_done(scmnd);
2560
2561 return ret;
2562 }
2563
2564 static int srp_reset_device(struct scsi_cmnd *scmnd)
2565 {
2566 struct srp_target_port *target = host_to_target(scmnd->device->host);
2567 struct srp_rdma_ch *ch;
2568 int i;
2569
2570 shost_printk(KERN_ERR, target->scsi_host, "SRP reset_device called\n");
2571
2572 ch = &target->ch[0];
2573 if (srp_send_tsk_mgmt(ch, SRP_TAG_NO_REQ, scmnd->device->lun,
2574 SRP_TSK_LUN_RESET))
2575 return FAILED;
2576 if (ch->tsk_mgmt_status)
2577 return FAILED;
2578
2579 for (i = 0; i < target->ch_count; i++) {
2580 ch = &target->ch[i];
2581 for (i = 0; i < target->req_ring_size; ++i) {
2582 struct srp_request *req = &ch->req_ring[i];
2583
2584 srp_finish_req(ch, req, scmnd->device, DID_RESET << 16);
2585 }
2586 }
2587
2588 return SUCCESS;
2589 }
2590
2591 static int srp_reset_host(struct scsi_cmnd *scmnd)
2592 {
2593 struct srp_target_port *target = host_to_target(scmnd->device->host);
2594
2595 shost_printk(KERN_ERR, target->scsi_host, PFX "SRP reset_host called\n");
2596
2597 return srp_reconnect_rport(target->rport) == 0 ? SUCCESS : FAILED;
2598 }
2599
2600 static int srp_slave_configure(struct scsi_device *sdev)
2601 {
2602 struct Scsi_Host *shost = sdev->host;
2603 struct srp_target_port *target = host_to_target(shost);
2604 struct request_queue *q = sdev->request_queue;
2605 unsigned long timeout;
2606
2607 if (sdev->type == TYPE_DISK) {
2608 timeout = max_t(unsigned, 30 * HZ, target->rq_tmo_jiffies);
2609 blk_queue_rq_timeout(q, timeout);
2610 }
2611
2612 return 0;
2613 }
2614
2615 static ssize_t show_id_ext(struct device *dev, struct device_attribute *attr,
2616 char *buf)
2617 {
2618 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2619
2620 return sprintf(buf, "0x%016llx\n", be64_to_cpu(target->id_ext));
2621 }
2622
2623 static ssize_t show_ioc_guid(struct device *dev, struct device_attribute *attr,
2624 char *buf)
2625 {
2626 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2627
2628 return sprintf(buf, "0x%016llx\n", be64_to_cpu(target->ioc_guid));
2629 }
2630
2631 static ssize_t show_service_id(struct device *dev,
2632 struct device_attribute *attr, char *buf)
2633 {
2634 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2635
2636 return sprintf(buf, "0x%016llx\n", be64_to_cpu(target->service_id));
2637 }
2638
2639 static ssize_t show_pkey(struct device *dev, struct device_attribute *attr,
2640 char *buf)
2641 {
2642 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2643
2644 return sprintf(buf, "0x%04x\n", be16_to_cpu(target->pkey));
2645 }
2646
2647 static ssize_t show_sgid(struct device *dev, struct device_attribute *attr,
2648 char *buf)
2649 {
2650 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2651
2652 return sprintf(buf, "%pI6\n", target->sgid.raw);
2653 }
2654
2655 static ssize_t show_dgid(struct device *dev, struct device_attribute *attr,
2656 char *buf)
2657 {
2658 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2659 struct srp_rdma_ch *ch = &target->ch[0];
2660
2661 return sprintf(buf, "%pI6\n", ch->path.dgid.raw);
2662 }
2663
2664 static ssize_t show_orig_dgid(struct device *dev,
2665 struct device_attribute *attr, char *buf)
2666 {
2667 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2668
2669 return sprintf(buf, "%pI6\n", target->orig_dgid.raw);
2670 }
2671
2672 static ssize_t show_req_lim(struct device *dev,
2673 struct device_attribute *attr, char *buf)
2674 {
2675 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2676 struct srp_rdma_ch *ch;
2677 int i, req_lim = INT_MAX;
2678
2679 for (i = 0; i < target->ch_count; i++) {
2680 ch = &target->ch[i];
2681 req_lim = min(req_lim, ch->req_lim);
2682 }
2683 return sprintf(buf, "%d\n", req_lim);
2684 }
2685
2686 static ssize_t show_zero_req_lim(struct device *dev,
2687 struct device_attribute *attr, char *buf)
2688 {
2689 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2690
2691 return sprintf(buf, "%d\n", target->zero_req_lim);
2692 }
2693
2694 static ssize_t show_local_ib_port(struct device *dev,
2695 struct device_attribute *attr, char *buf)
2696 {
2697 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2698
2699 return sprintf(buf, "%d\n", target->srp_host->port);
2700 }
2701
2702 static ssize_t show_local_ib_device(struct device *dev,
2703 struct device_attribute *attr, char *buf)
2704 {
2705 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2706
2707 return sprintf(buf, "%s\n", target->srp_host->srp_dev->dev->name);
2708 }
2709
2710 static ssize_t show_ch_count(struct device *dev, struct device_attribute *attr,
2711 char *buf)
2712 {
2713 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2714
2715 return sprintf(buf, "%d\n", target->ch_count);
2716 }
2717
2718 static ssize_t show_comp_vector(struct device *dev,
2719 struct device_attribute *attr, char *buf)
2720 {
2721 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2722
2723 return sprintf(buf, "%d\n", target->comp_vector);
2724 }
2725
2726 static ssize_t show_tl_retry_count(struct device *dev,
2727 struct device_attribute *attr, char *buf)
2728 {
2729 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2730
2731 return sprintf(buf, "%d\n", target->tl_retry_count);
2732 }
2733
2734 static ssize_t show_cmd_sg_entries(struct device *dev,
2735 struct device_attribute *attr, char *buf)
2736 {
2737 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2738
2739 return sprintf(buf, "%u\n", target->cmd_sg_cnt);
2740 }
2741
2742 static ssize_t show_allow_ext_sg(struct device *dev,
2743 struct device_attribute *attr, char *buf)
2744 {
2745 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2746
2747 return sprintf(buf, "%s\n", target->allow_ext_sg ? "true" : "false");
2748 }
2749
2750 static DEVICE_ATTR(id_ext, S_IRUGO, show_id_ext, NULL);
2751 static DEVICE_ATTR(ioc_guid, S_IRUGO, show_ioc_guid, NULL);
2752 static DEVICE_ATTR(service_id, S_IRUGO, show_service_id, NULL);
2753 static DEVICE_ATTR(pkey, S_IRUGO, show_pkey, NULL);
2754 static DEVICE_ATTR(sgid, S_IRUGO, show_sgid, NULL);
2755 static DEVICE_ATTR(dgid, S_IRUGO, show_dgid, NULL);
2756 static DEVICE_ATTR(orig_dgid, S_IRUGO, show_orig_dgid, NULL);
2757 static DEVICE_ATTR(req_lim, S_IRUGO, show_req_lim, NULL);
2758 static DEVICE_ATTR(zero_req_lim, S_IRUGO, show_zero_req_lim, NULL);
2759 static DEVICE_ATTR(local_ib_port, S_IRUGO, show_local_ib_port, NULL);
2760 static DEVICE_ATTR(local_ib_device, S_IRUGO, show_local_ib_device, NULL);
2761 static DEVICE_ATTR(ch_count, S_IRUGO, show_ch_count, NULL);
2762 static DEVICE_ATTR(comp_vector, S_IRUGO, show_comp_vector, NULL);
2763 static DEVICE_ATTR(tl_retry_count, S_IRUGO, show_tl_retry_count, NULL);
2764 static DEVICE_ATTR(cmd_sg_entries, S_IRUGO, show_cmd_sg_entries, NULL);
2765 static DEVICE_ATTR(allow_ext_sg, S_IRUGO, show_allow_ext_sg, NULL);
2766
2767 static struct device_attribute *srp_host_attrs[] = {
2768 &dev_attr_id_ext,
2769 &dev_attr_ioc_guid,
2770 &dev_attr_service_id,
2771 &dev_attr_pkey,
2772 &dev_attr_sgid,
2773 &dev_attr_dgid,
2774 &dev_attr_orig_dgid,
2775 &dev_attr_req_lim,
2776 &dev_attr_zero_req_lim,
2777 &dev_attr_local_ib_port,
2778 &dev_attr_local_ib_device,
2779 &dev_attr_ch_count,
2780 &dev_attr_comp_vector,
2781 &dev_attr_tl_retry_count,
2782 &dev_attr_cmd_sg_entries,
2783 &dev_attr_allow_ext_sg,
2784 NULL
2785 };
2786
2787 static struct scsi_host_template srp_template = {
2788 .module = THIS_MODULE,
2789 .name = "InfiniBand SRP initiator",
2790 .proc_name = DRV_NAME,
2791 .slave_configure = srp_slave_configure,
2792 .info = srp_target_info,
2793 .queuecommand = srp_queuecommand,
2794 .change_queue_depth = srp_change_queue_depth,
2795 .eh_abort_handler = srp_abort,
2796 .eh_device_reset_handler = srp_reset_device,
2797 .eh_host_reset_handler = srp_reset_host,
2798 .skip_settle_delay = true,
2799 .sg_tablesize = SRP_DEF_SG_TABLESIZE,
2800 .can_queue = SRP_DEFAULT_CMD_SQ_SIZE,
2801 .this_id = -1,
2802 .cmd_per_lun = SRP_DEFAULT_CMD_SQ_SIZE,
2803 .use_clustering = ENABLE_CLUSTERING,
2804 .shost_attrs = srp_host_attrs,
2805 .track_queue_depth = 1,
2806 };
2807
2808 static int srp_sdev_count(struct Scsi_Host *host)
2809 {
2810 struct scsi_device *sdev;
2811 int c = 0;
2812
2813 shost_for_each_device(sdev, host)
2814 c++;
2815
2816 return c;
2817 }
2818
2819 /*
2820 * Return values:
2821 * < 0 upon failure. Caller is responsible for SRP target port cleanup.
2822 * 0 and target->state == SRP_TARGET_REMOVED if asynchronous target port
2823 * removal has been scheduled.
2824 * 0 and target->state != SRP_TARGET_REMOVED upon success.
2825 */
2826 static int srp_add_target(struct srp_host *host, struct srp_target_port *target)
2827 {
2828 struct srp_rport_identifiers ids;
2829 struct srp_rport *rport;
2830
2831 target->state = SRP_TARGET_SCANNING;
2832 sprintf(target->target_name, "SRP.T10:%016llX",
2833 be64_to_cpu(target->id_ext));
2834
2835 if (scsi_add_host(target->scsi_host, host->srp_dev->dev->dma_device))
2836 return -ENODEV;
2837
2838 memcpy(ids.port_id, &target->id_ext, 8);
2839 memcpy(ids.port_id + 8, &target->ioc_guid, 8);
2840 ids.roles = SRP_RPORT_ROLE_TARGET;
2841 rport = srp_rport_add(target->scsi_host, &ids);
2842 if (IS_ERR(rport)) {
2843 scsi_remove_host(target->scsi_host);
2844 return PTR_ERR(rport);
2845 }
2846
2847 rport->lld_data = target;
2848 target->rport = rport;
2849
2850 spin_lock(&host->target_lock);
2851 list_add_tail(&target->list, &host->target_list);
2852 spin_unlock(&host->target_lock);
2853
2854 scsi_scan_target(&target->scsi_host->shost_gendev,
2855 0, target->scsi_id, SCAN_WILD_CARD, 0);
2856
2857 if (srp_connected_ch(target) < target->ch_count ||
2858 target->qp_in_error) {
2859 shost_printk(KERN_INFO, target->scsi_host,
2860 PFX "SCSI scan failed - removing SCSI host\n");
2861 srp_queue_remove_work(target);
2862 goto out;
2863 }
2864
2865 pr_debug(PFX "%s: SCSI scan succeeded - detected %d LUNs\n",
2866 dev_name(&target->scsi_host->shost_gendev),
2867 srp_sdev_count(target->scsi_host));
2868
2869 spin_lock_irq(&target->lock);
2870 if (target->state == SRP_TARGET_SCANNING)
2871 target->state = SRP_TARGET_LIVE;
2872 spin_unlock_irq(&target->lock);
2873
2874 out:
2875 return 0;
2876 }
2877
2878 static void srp_release_dev(struct device *dev)
2879 {
2880 struct srp_host *host =
2881 container_of(dev, struct srp_host, dev);
2882
2883 complete(&host->released);
2884 }
2885
2886 static struct class srp_class = {
2887 .name = "infiniband_srp",
2888 .dev_release = srp_release_dev
2889 };
2890
2891 /**
2892 * srp_conn_unique() - check whether the connection to a target is unique
2893 * @host: SRP host.
2894 * @target: SRP target port.
2895 */
2896 static bool srp_conn_unique(struct srp_host *host,
2897 struct srp_target_port *target)
2898 {
2899 struct srp_target_port *t;
2900 bool ret = false;
2901
2902 if (target->state == SRP_TARGET_REMOVED)
2903 goto out;
2904
2905 ret = true;
2906
2907 spin_lock(&host->target_lock);
2908 list_for_each_entry(t, &host->target_list, list) {
2909 if (t != target &&
2910 target->id_ext == t->id_ext &&
2911 target->ioc_guid == t->ioc_guid &&
2912 target->initiator_ext == t->initiator_ext) {
2913 ret = false;
2914 break;
2915 }
2916 }
2917 spin_unlock(&host->target_lock);
2918
2919 out:
2920 return ret;
2921 }
2922
2923 /*
2924 * Target ports are added by writing
2925 *
2926 * id_ext=<SRP ID ext>,ioc_guid=<SRP IOC GUID>,dgid=<dest GID>,
2927 * pkey=<P_Key>,service_id=<service ID>
2928 *
2929 * to the add_target sysfs attribute.
2930 */
2931 enum {
2932 SRP_OPT_ERR = 0,
2933 SRP_OPT_ID_EXT = 1 << 0,
2934 SRP_OPT_IOC_GUID = 1 << 1,
2935 SRP_OPT_DGID = 1 << 2,
2936 SRP_OPT_PKEY = 1 << 3,
2937 SRP_OPT_SERVICE_ID = 1 << 4,
2938 SRP_OPT_MAX_SECT = 1 << 5,
2939 SRP_OPT_MAX_CMD_PER_LUN = 1 << 6,
2940 SRP_OPT_IO_CLASS = 1 << 7,
2941 SRP_OPT_INITIATOR_EXT = 1 << 8,
2942 SRP_OPT_CMD_SG_ENTRIES = 1 << 9,
2943 SRP_OPT_ALLOW_EXT_SG = 1 << 10,
2944 SRP_OPT_SG_TABLESIZE = 1 << 11,
2945 SRP_OPT_COMP_VECTOR = 1 << 12,
2946 SRP_OPT_TL_RETRY_COUNT = 1 << 13,
2947 SRP_OPT_QUEUE_SIZE = 1 << 14,
2948 SRP_OPT_ALL = (SRP_OPT_ID_EXT |
2949 SRP_OPT_IOC_GUID |
2950 SRP_OPT_DGID |
2951 SRP_OPT_PKEY |
2952 SRP_OPT_SERVICE_ID),
2953 };
2954
2955 static const match_table_t srp_opt_tokens = {
2956 { SRP_OPT_ID_EXT, "id_ext=%s" },
2957 { SRP_OPT_IOC_GUID, "ioc_guid=%s" },
2958 { SRP_OPT_DGID, "dgid=%s" },
2959 { SRP_OPT_PKEY, "pkey=%x" },
2960 { SRP_OPT_SERVICE_ID, "service_id=%s" },
2961 { SRP_OPT_MAX_SECT, "max_sect=%d" },
2962 { SRP_OPT_MAX_CMD_PER_LUN, "max_cmd_per_lun=%d" },
2963 { SRP_OPT_IO_CLASS, "io_class=%x" },
2964 { SRP_OPT_INITIATOR_EXT, "initiator_ext=%s" },
2965 { SRP_OPT_CMD_SG_ENTRIES, "cmd_sg_entries=%u" },
2966 { SRP_OPT_ALLOW_EXT_SG, "allow_ext_sg=%u" },
2967 { SRP_OPT_SG_TABLESIZE, "sg_tablesize=%u" },
2968 { SRP_OPT_COMP_VECTOR, "comp_vector=%u" },
2969 { SRP_OPT_TL_RETRY_COUNT, "tl_retry_count=%u" },
2970 { SRP_OPT_QUEUE_SIZE, "queue_size=%d" },
2971 { SRP_OPT_ERR, NULL }
2972 };
2973
2974 static int srp_parse_options(const char *buf, struct srp_target_port *target)
2975 {
2976 char *options, *sep_opt;
2977 char *p;
2978 char dgid[3];
2979 substring_t args[MAX_OPT_ARGS];
2980 int opt_mask = 0;
2981 int token;
2982 int ret = -EINVAL;
2983 int i;
2984
2985 options = kstrdup(buf, GFP_KERNEL);
2986 if (!options)
2987 return -ENOMEM;
2988
2989 sep_opt = options;
2990 while ((p = strsep(&sep_opt, ",\n")) != NULL) {
2991 if (!*p)
2992 continue;
2993
2994 token = match_token(p, srp_opt_tokens, args);
2995 opt_mask |= token;
2996
2997 switch (token) {
2998 case SRP_OPT_ID_EXT:
2999 p = match_strdup(args);
3000 if (!p) {
3001 ret = -ENOMEM;
3002 goto out;
3003 }
3004 target->id_ext = cpu_to_be64(simple_strtoull(p, NULL, 16));
3005 kfree(p);
3006 break;
3007
3008 case SRP_OPT_IOC_GUID:
3009 p = match_strdup(args);
3010 if (!p) {
3011 ret = -ENOMEM;
3012 goto out;
3013 }
3014 target->ioc_guid = cpu_to_be64(simple_strtoull(p, NULL, 16));
3015 kfree(p);
3016 break;
3017
3018 case SRP_OPT_DGID:
3019 p = match_strdup(args);
3020 if (!p) {
3021 ret = -ENOMEM;
3022 goto out;
3023 }
3024 if (strlen(p) != 32) {
3025 pr_warn("bad dest GID parameter '%s'\n", p);
3026 kfree(p);
3027 goto out;
3028 }
3029
3030 for (i = 0; i < 16; ++i) {
3031 strlcpy(dgid, p + i * 2, sizeof(dgid));
3032 if (sscanf(dgid, "%hhx",
3033 &target->orig_dgid.raw[i]) < 1) {
3034 ret = -EINVAL;
3035 kfree(p);
3036 goto out;
3037 }
3038 }
3039 kfree(p);
3040 break;
3041
3042 case SRP_OPT_PKEY:
3043 if (match_hex(args, &token)) {
3044 pr_warn("bad P_Key parameter '%s'\n", p);
3045 goto out;
3046 }
3047 target->pkey = cpu_to_be16(token);
3048 break;
3049
3050 case SRP_OPT_SERVICE_ID:
3051 p = match_strdup(args);
3052 if (!p) {
3053 ret = -ENOMEM;
3054 goto out;
3055 }
3056 target->service_id = cpu_to_be64(simple_strtoull(p, NULL, 16));
3057 kfree(p);
3058 break;
3059
3060 case SRP_OPT_MAX_SECT:
3061 if (match_int(args, &token)) {
3062 pr_warn("bad max sect parameter '%s'\n", p);
3063 goto out;
3064 }
3065 target->scsi_host->max_sectors = token;
3066 break;
3067
3068 case SRP_OPT_QUEUE_SIZE:
3069 if (match_int(args, &token) || token < 1) {
3070 pr_warn("bad queue_size parameter '%s'\n", p);
3071 goto out;
3072 }
3073 target->scsi_host->can_queue = token;
3074 target->queue_size = token + SRP_RSP_SQ_SIZE +
3075 SRP_TSK_MGMT_SQ_SIZE;
3076 if (!(opt_mask & SRP_OPT_MAX_CMD_PER_LUN))
3077 target->scsi_host->cmd_per_lun = token;
3078 break;
3079
3080 case SRP_OPT_MAX_CMD_PER_LUN:
3081 if (match_int(args, &token) || token < 1) {
3082 pr_warn("bad max cmd_per_lun parameter '%s'\n",
3083 p);
3084 goto out;
3085 }
3086 target->scsi_host->cmd_per_lun = token;
3087 break;
3088
3089 case SRP_OPT_IO_CLASS:
3090 if (match_hex(args, &token)) {
3091 pr_warn("bad IO class parameter '%s'\n", p);
3092 goto out;
3093 }
3094 if (token != SRP_REV10_IB_IO_CLASS &&
3095 token != SRP_REV16A_IB_IO_CLASS) {
3096 pr_warn("unknown IO class parameter value %x specified (use %x or %x).\n",
3097 token, SRP_REV10_IB_IO_CLASS,
3098 SRP_REV16A_IB_IO_CLASS);
3099 goto out;
3100 }
3101 target->io_class = token;
3102 break;
3103
3104 case SRP_OPT_INITIATOR_EXT:
3105 p = match_strdup(args);
3106 if (!p) {
3107 ret = -ENOMEM;
3108 goto out;
3109 }
3110 target->initiator_ext = cpu_to_be64(simple_strtoull(p, NULL, 16));
3111 kfree(p);
3112 break;
3113
3114 case SRP_OPT_CMD_SG_ENTRIES:
3115 if (match_int(args, &token) || token < 1 || token > 255) {
3116 pr_warn("bad max cmd_sg_entries parameter '%s'\n",
3117 p);
3118 goto out;
3119 }
3120 target->cmd_sg_cnt = token;
3121 break;
3122
3123 case SRP_OPT_ALLOW_EXT_SG:
3124 if (match_int(args, &token)) {
3125 pr_warn("bad allow_ext_sg parameter '%s'\n", p);
3126 goto out;
3127 }
3128 target->allow_ext_sg = !!token;
3129 break;
3130
3131 case SRP_OPT_SG_TABLESIZE:
3132 if (match_int(args, &token) || token < 1 ||
3133 token > SCSI_MAX_SG_CHAIN_SEGMENTS) {
3134 pr_warn("bad max sg_tablesize parameter '%s'\n",
3135 p);
3136 goto out;
3137 }
3138 target->sg_tablesize = token;
3139 break;
3140
3141 case SRP_OPT_COMP_VECTOR:
3142 if (match_int(args, &token) || token < 0) {
3143 pr_warn("bad comp_vector parameter '%s'\n", p);
3144 goto out;
3145 }
3146 target->comp_vector = token;
3147 break;
3148
3149 case SRP_OPT_TL_RETRY_COUNT:
3150 if (match_int(args, &token) || token < 2 || token > 7) {
3151 pr_warn("bad tl_retry_count parameter '%s' (must be a number between 2 and 7)\n",
3152 p);
3153 goto out;
3154 }
3155 target->tl_retry_count = token;
3156 break;
3157
3158 default:
3159 pr_warn("unknown parameter or missing value '%s' in target creation request\n",
3160 p);
3161 goto out;
3162 }
3163 }
3164
3165 if ((opt_mask & SRP_OPT_ALL) == SRP_OPT_ALL)
3166 ret = 0;
3167 else
3168 for (i = 0; i < ARRAY_SIZE(srp_opt_tokens); ++i)
3169 if ((srp_opt_tokens[i].token & SRP_OPT_ALL) &&
3170 !(srp_opt_tokens[i].token & opt_mask))
3171 pr_warn("target creation request is missing parameter '%s'\n",
3172 srp_opt_tokens[i].pattern);
3173
3174 if (target->scsi_host->cmd_per_lun > target->scsi_host->can_queue
3175 && (opt_mask & SRP_OPT_MAX_CMD_PER_LUN))
3176 pr_warn("cmd_per_lun = %d > queue_size = %d\n",
3177 target->scsi_host->cmd_per_lun,
3178 target->scsi_host->can_queue);
3179
3180 out:
3181 kfree(options);
3182 return ret;
3183 }
3184
3185 static ssize_t srp_create_target(struct device *dev,
3186 struct device_attribute *attr,
3187 const char *buf, size_t count)
3188 {
3189 struct srp_host *host =
3190 container_of(dev, struct srp_host, dev);
3191 struct Scsi_Host *target_host;
3192 struct srp_target_port *target;
3193 struct srp_rdma_ch *ch;
3194 struct srp_device *srp_dev = host->srp_dev;
3195 struct ib_device *ibdev = srp_dev->dev;
3196 int ret, node_idx, node, cpu, i;
3197 bool multich = false;
3198
3199 target_host = scsi_host_alloc(&srp_template,
3200 sizeof (struct srp_target_port));
3201 if (!target_host)
3202 return -ENOMEM;
3203
3204 target_host->transportt = ib_srp_transport_template;
3205 target_host->max_channel = 0;
3206 target_host->max_id = 1;
3207 target_host->max_lun = -1LL;
3208 target_host->max_cmd_len = sizeof ((struct srp_cmd *) (void *) 0L)->cdb;
3209
3210 target = host_to_target(target_host);
3211
3212 target->io_class = SRP_REV16A_IB_IO_CLASS;
3213 target->scsi_host = target_host;
3214 target->srp_host = host;
3215 target->lkey = host->srp_dev->pd->local_dma_lkey;
3216 target->global_mr = host->srp_dev->global_mr;
3217 target->cmd_sg_cnt = cmd_sg_entries;
3218 target->sg_tablesize = indirect_sg_entries ? : cmd_sg_entries;
3219 target->allow_ext_sg = allow_ext_sg;
3220 target->tl_retry_count = 7;
3221 target->queue_size = SRP_DEFAULT_QUEUE_SIZE;
3222
3223 /*
3224 * Avoid that the SCSI host can be removed by srp_remove_target()
3225 * before this function returns.
3226 */
3227 scsi_host_get(target->scsi_host);
3228
3229 mutex_lock(&host->add_target_mutex);
3230
3231 ret = srp_parse_options(buf, target);
3232 if (ret)
3233 goto out;
3234
3235 target->req_ring_size = target->queue_size - SRP_TSK_MGMT_SQ_SIZE;
3236
3237 if (!srp_conn_unique(target->srp_host, target)) {
3238 shost_printk(KERN_INFO, target->scsi_host,
3239 PFX "Already connected to target port with id_ext=%016llx;ioc_guid=%016llx;initiator_ext=%016llx\n",
3240 be64_to_cpu(target->id_ext),
3241 be64_to_cpu(target->ioc_guid),
3242 be64_to_cpu(target->initiator_ext));
3243 ret = -EEXIST;
3244 goto out;
3245 }
3246
3247 if (!srp_dev->has_fmr && !srp_dev->has_fr && !target->allow_ext_sg &&
3248 target->cmd_sg_cnt < target->sg_tablesize) {
3249 pr_warn("No MR pool and no external indirect descriptors, limiting sg_tablesize to cmd_sg_cnt\n");
3250 target->sg_tablesize = target->cmd_sg_cnt;
3251 }
3252
3253 target_host->sg_tablesize = target->sg_tablesize;
3254 target->indirect_size = target->sg_tablesize *
3255 sizeof (struct srp_direct_buf);
3256 target->max_iu_len = sizeof (struct srp_cmd) +
3257 sizeof (struct srp_indirect_buf) +
3258 target->cmd_sg_cnt * sizeof (struct srp_direct_buf);
3259
3260 INIT_WORK(&target->tl_err_work, srp_tl_err_work);
3261 INIT_WORK(&target->remove_work, srp_remove_work);
3262 spin_lock_init(&target->lock);
3263 ret = ib_query_gid(ibdev, host->port, 0, &target->sgid, NULL);
3264 if (ret)
3265 goto out;
3266
3267 ret = -ENOMEM;
3268 target->ch_count = max_t(unsigned, num_online_nodes(),
3269 min(ch_count ? :
3270 min(4 * num_online_nodes(),
3271 ibdev->num_comp_vectors),
3272 num_online_cpus()));
3273 target->ch = kcalloc(target->ch_count, sizeof(*target->ch),
3274 GFP_KERNEL);
3275 if (!target->ch)
3276 goto out;
3277
3278 node_idx = 0;
3279 for_each_online_node(node) {
3280 const int ch_start = (node_idx * target->ch_count /
3281 num_online_nodes());
3282 const int ch_end = ((node_idx + 1) * target->ch_count /
3283 num_online_nodes());
3284 const int cv_start = (node_idx * ibdev->num_comp_vectors /
3285 num_online_nodes() + target->comp_vector)
3286 % ibdev->num_comp_vectors;
3287 const int cv_end = ((node_idx + 1) * ibdev->num_comp_vectors /
3288 num_online_nodes() + target->comp_vector)
3289 % ibdev->num_comp_vectors;
3290 int cpu_idx = 0;
3291
3292 for_each_online_cpu(cpu) {
3293 if (cpu_to_node(cpu) != node)
3294 continue;
3295 if (ch_start + cpu_idx >= ch_end)
3296 continue;
3297 ch = &target->ch[ch_start + cpu_idx];
3298 ch->target = target;
3299 ch->comp_vector = cv_start == cv_end ? cv_start :
3300 cv_start + cpu_idx % (cv_end - cv_start);
3301 spin_lock_init(&ch->lock);
3302 INIT_LIST_HEAD(&ch->free_tx);
3303 ret = srp_new_cm_id(ch);
3304 if (ret)
3305 goto err_disconnect;
3306
3307 ret = srp_create_ch_ib(ch);
3308 if (ret)
3309 goto err_disconnect;
3310
3311 ret = srp_alloc_req_data(ch);
3312 if (ret)
3313 goto err_disconnect;
3314
3315 ret = srp_connect_ch(ch, multich);
3316 if (ret) {
3317 shost_printk(KERN_ERR, target->scsi_host,
3318 PFX "Connection %d/%d failed\n",
3319 ch_start + cpu_idx,
3320 target->ch_count);
3321 if (node_idx == 0 && cpu_idx == 0) {
3322 goto err_disconnect;
3323 } else {
3324 srp_free_ch_ib(target, ch);
3325 srp_free_req_data(target, ch);
3326 target->ch_count = ch - target->ch;
3327 goto connected;
3328 }
3329 }
3330
3331 multich = true;
3332 cpu_idx++;
3333 }
3334 node_idx++;
3335 }
3336
3337 connected:
3338 target->scsi_host->nr_hw_queues = target->ch_count;
3339
3340 ret = srp_add_target(host, target);
3341 if (ret)
3342 goto err_disconnect;
3343
3344 if (target->state != SRP_TARGET_REMOVED) {
3345 shost_printk(KERN_DEBUG, target->scsi_host, PFX
3346 "new target: id_ext %016llx ioc_guid %016llx pkey %04x service_id %016llx sgid %pI6 dgid %pI6\n",
3347 be64_to_cpu(target->id_ext),
3348 be64_to_cpu(target->ioc_guid),
3349 be16_to_cpu(target->pkey),
3350 be64_to_cpu(target->service_id),
3351 target->sgid.raw, target->orig_dgid.raw);
3352 }
3353
3354 ret = count;
3355
3356 out:
3357 mutex_unlock(&host->add_target_mutex);
3358
3359 scsi_host_put(target->scsi_host);
3360 if (ret < 0)
3361 scsi_host_put(target->scsi_host);
3362
3363 return ret;
3364
3365 err_disconnect:
3366 srp_disconnect_target(target);
3367
3368 for (i = 0; i < target->ch_count; i++) {
3369 ch = &target->ch[i];
3370 srp_free_ch_ib(target, ch);
3371 srp_free_req_data(target, ch);
3372 }
3373
3374 kfree(target->ch);
3375 goto out;
3376 }
3377
3378 static DEVICE_ATTR(add_target, S_IWUSR, NULL, srp_create_target);
3379
3380 static ssize_t show_ibdev(struct device *dev, struct device_attribute *attr,
3381 char *buf)
3382 {
3383 struct srp_host *host = container_of(dev, struct srp_host, dev);
3384
3385 return sprintf(buf, "%s\n", host->srp_dev->dev->name);
3386 }
3387
3388 static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL);
3389
3390 static ssize_t show_port(struct device *dev, struct device_attribute *attr,
3391 char *buf)
3392 {
3393 struct srp_host *host = container_of(dev, struct srp_host, dev);
3394
3395 return sprintf(buf, "%d\n", host->port);
3396 }
3397
3398 static DEVICE_ATTR(port, S_IRUGO, show_port, NULL);
3399
3400 static struct srp_host *srp_add_port(struct srp_device *device, u8 port)
3401 {
3402 struct srp_host *host;
3403
3404 host = kzalloc(sizeof *host, GFP_KERNEL);
3405 if (!host)
3406 return NULL;
3407
3408 INIT_LIST_HEAD(&host->target_list);
3409 spin_lock_init(&host->target_lock);
3410 init_completion(&host->released);
3411 mutex_init(&host->add_target_mutex);
3412 host->srp_dev = device;
3413 host->port = port;
3414
3415 host->dev.class = &srp_class;
3416 host->dev.parent = device->dev->dma_device;
3417 dev_set_name(&host->dev, "srp-%s-%d", device->dev->name, port);
3418
3419 if (device_register(&host->dev))
3420 goto free_host;
3421 if (device_create_file(&host->dev, &dev_attr_add_target))
3422 goto err_class;
3423 if (device_create_file(&host->dev, &dev_attr_ibdev))
3424 goto err_class;
3425 if (device_create_file(&host->dev, &dev_attr_port))
3426 goto err_class;
3427
3428 return host;
3429
3430 err_class:
3431 device_unregister(&host->dev);
3432
3433 free_host:
3434 kfree(host);
3435
3436 return NULL;
3437 }
3438
3439 static void srp_add_one(struct ib_device *device)
3440 {
3441 struct srp_device *srp_dev;
3442 struct ib_device_attr *dev_attr;
3443 struct srp_host *host;
3444 int mr_page_shift, p;
3445 u64 max_pages_per_mr;
3446
3447 dev_attr = kmalloc(sizeof *dev_attr, GFP_KERNEL);
3448 if (!dev_attr)
3449 return;
3450
3451 if (ib_query_device(device, dev_attr)) {
3452 pr_warn("Query device failed for %s\n", device->name);
3453 goto free_attr;
3454 }
3455
3456 srp_dev = kmalloc(sizeof *srp_dev, GFP_KERNEL);
3457 if (!srp_dev)
3458 goto free_attr;
3459
3460 srp_dev->has_fmr = (device->alloc_fmr && device->dealloc_fmr &&
3461 device->map_phys_fmr && device->unmap_fmr);
3462 srp_dev->has_fr = (dev_attr->device_cap_flags &
3463 IB_DEVICE_MEM_MGT_EXTENSIONS);
3464 if (!srp_dev->has_fmr && !srp_dev->has_fr)
3465 dev_warn(&device->dev, "neither FMR nor FR is supported\n");
3466
3467 srp_dev->use_fast_reg = (srp_dev->has_fr &&
3468 (!srp_dev->has_fmr || prefer_fr));
3469 srp_dev->use_fmr = !srp_dev->use_fast_reg && srp_dev->has_fmr;
3470
3471 /*
3472 * Use the smallest page size supported by the HCA, down to a
3473 * minimum of 4096 bytes. We're unlikely to build large sglists
3474 * out of smaller entries.
3475 */
3476 mr_page_shift = max(12, ffs(dev_attr->page_size_cap) - 1);
3477 srp_dev->mr_page_size = 1 << mr_page_shift;
3478 srp_dev->mr_page_mask = ~((u64) srp_dev->mr_page_size - 1);
3479 max_pages_per_mr = dev_attr->max_mr_size;
3480 do_div(max_pages_per_mr, srp_dev->mr_page_size);
3481 srp_dev->max_pages_per_mr = min_t(u64, SRP_MAX_PAGES_PER_MR,
3482 max_pages_per_mr);
3483 if (srp_dev->use_fast_reg) {
3484 srp_dev->max_pages_per_mr =
3485 min_t(u32, srp_dev->max_pages_per_mr,
3486 dev_attr->max_fast_reg_page_list_len);
3487 }
3488 srp_dev->mr_max_size = srp_dev->mr_page_size *
3489 srp_dev->max_pages_per_mr;
3490 pr_debug("%s: mr_page_shift = %d, dev_attr->max_mr_size = %#llx, dev_attr->max_fast_reg_page_list_len = %u, max_pages_per_mr = %d, mr_max_size = %#x\n",
3491 device->name, mr_page_shift, dev_attr->max_mr_size,
3492 dev_attr->max_fast_reg_page_list_len,
3493 srp_dev->max_pages_per_mr, srp_dev->mr_max_size);
3494
3495 INIT_LIST_HEAD(&srp_dev->dev_list);
3496
3497 srp_dev->dev = device;
3498 srp_dev->pd = ib_alloc_pd(device);
3499 if (IS_ERR(srp_dev->pd))
3500 goto free_dev;
3501
3502 if (!register_always || (!srp_dev->has_fmr && !srp_dev->has_fr)) {
3503 srp_dev->global_mr = ib_get_dma_mr(srp_dev->pd,
3504 IB_ACCESS_LOCAL_WRITE |
3505 IB_ACCESS_REMOTE_READ |
3506 IB_ACCESS_REMOTE_WRITE);
3507 if (IS_ERR(srp_dev->global_mr))
3508 goto err_pd;
3509 } else {
3510 srp_dev->global_mr = NULL;
3511 }
3512
3513 for (p = rdma_start_port(device); p <= rdma_end_port(device); ++p) {
3514 host = srp_add_port(srp_dev, p);
3515 if (host)
3516 list_add_tail(&host->list, &srp_dev->dev_list);
3517 }
3518
3519 ib_set_client_data(device, &srp_client, srp_dev);
3520
3521 goto free_attr;
3522
3523 err_pd:
3524 ib_dealloc_pd(srp_dev->pd);
3525
3526 free_dev:
3527 kfree(srp_dev);
3528
3529 free_attr:
3530 kfree(dev_attr);
3531 }
3532
3533 static void srp_remove_one(struct ib_device *device, void *client_data)
3534 {
3535 struct srp_device *srp_dev;
3536 struct srp_host *host, *tmp_host;
3537 struct srp_target_port *target;
3538
3539 srp_dev = client_data;
3540 if (!srp_dev)
3541 return;
3542
3543 list_for_each_entry_safe(host, tmp_host, &srp_dev->dev_list, list) {
3544 device_unregister(&host->dev);
3545 /*
3546 * Wait for the sysfs entry to go away, so that no new
3547 * target ports can be created.
3548 */
3549 wait_for_completion(&host->released);
3550
3551 /*
3552 * Remove all target ports.
3553 */
3554 spin_lock(&host->target_lock);
3555 list_for_each_entry(target, &host->target_list, list)
3556 srp_queue_remove_work(target);
3557 spin_unlock(&host->target_lock);
3558
3559 /*
3560 * Wait for tl_err and target port removal tasks.
3561 */
3562 flush_workqueue(system_long_wq);
3563 flush_workqueue(srp_remove_wq);
3564
3565 kfree(host);
3566 }
3567
3568 if (srp_dev->global_mr)
3569 ib_dereg_mr(srp_dev->global_mr);
3570 ib_dealloc_pd(srp_dev->pd);
3571
3572 kfree(srp_dev);
3573 }
3574
3575 static struct srp_function_template ib_srp_transport_functions = {
3576 .has_rport_state = true,
3577 .reset_timer_if_blocked = true,
3578 .reconnect_delay = &srp_reconnect_delay,
3579 .fast_io_fail_tmo = &srp_fast_io_fail_tmo,
3580 .dev_loss_tmo = &srp_dev_loss_tmo,
3581 .reconnect = srp_rport_reconnect,
3582 .rport_delete = srp_rport_delete,
3583 .terminate_rport_io = srp_terminate_io,
3584 };
3585
3586 static int __init srp_init_module(void)
3587 {
3588 int ret;
3589
3590 BUILD_BUG_ON(FIELD_SIZEOF(struct ib_wc, wr_id) < sizeof(void *));
3591
3592 if (srp_sg_tablesize) {
3593 pr_warn("srp_sg_tablesize is deprecated, please use cmd_sg_entries\n");
3594 if (!cmd_sg_entries)
3595 cmd_sg_entries = srp_sg_tablesize;
3596 }
3597
3598 if (!cmd_sg_entries)
3599 cmd_sg_entries = SRP_DEF_SG_TABLESIZE;
3600
3601 if (cmd_sg_entries > 255) {
3602 pr_warn("Clamping cmd_sg_entries to 255\n");
3603 cmd_sg_entries = 255;
3604 }
3605
3606 if (!indirect_sg_entries)
3607 indirect_sg_entries = cmd_sg_entries;
3608 else if (indirect_sg_entries < cmd_sg_entries) {
3609 pr_warn("Bumping up indirect_sg_entries to match cmd_sg_entries (%u)\n",
3610 cmd_sg_entries);
3611 indirect_sg_entries = cmd_sg_entries;
3612 }
3613
3614 srp_remove_wq = create_workqueue("srp_remove");
3615 if (!srp_remove_wq) {
3616 ret = -ENOMEM;
3617 goto out;
3618 }
3619
3620 ret = -ENOMEM;
3621 ib_srp_transport_template =
3622 srp_attach_transport(&ib_srp_transport_functions);
3623 if (!ib_srp_transport_template)
3624 goto destroy_wq;
3625
3626 ret = class_register(&srp_class);
3627 if (ret) {
3628 pr_err("couldn't register class infiniband_srp\n");
3629 goto release_tr;
3630 }
3631
3632 ib_sa_register_client(&srp_sa_client);
3633
3634 ret = ib_register_client(&srp_client);
3635 if (ret) {
3636 pr_err("couldn't register IB client\n");
3637 goto unreg_sa;
3638 }
3639
3640 out:
3641 return ret;
3642
3643 unreg_sa:
3644 ib_sa_unregister_client(&srp_sa_client);
3645 class_unregister(&srp_class);
3646
3647 release_tr:
3648 srp_release_transport(ib_srp_transport_template);
3649
3650 destroy_wq:
3651 destroy_workqueue(srp_remove_wq);
3652 goto out;
3653 }
3654
3655 static void __exit srp_cleanup_module(void)
3656 {
3657 ib_unregister_client(&srp_client);
3658 ib_sa_unregister_client(&srp_sa_client);
3659 class_unregister(&srp_class);
3660 srp_release_transport(ib_srp_transport_template);
3661 destroy_workqueue(srp_remove_wq);
3662 }
3663
3664 module_init(srp_init_module);
3665 module_exit(srp_cleanup_module);