]> git.ipfire.org Git - thirdparty/linux.git/blame - drivers/nvme/host/core.c
Merge tag 'io_uring-5.7-2020-05-22' of git://git.kernel.dk/linux-block
[thirdparty/linux.git] / drivers / nvme / host / core.c
CommitLineData
bc50ad75 1// SPDX-License-Identifier: GPL-2.0
21d34711
CH
2/*
3 * NVM Express device driver
4 * Copyright (c) 2011-2014, Intel Corporation.
21d34711
CH
5 */
6
7#include <linux/blkdev.h>
8#include <linux/blk-mq.h>
c95b708d 9#include <linux/compat.h>
5fd4ce1b 10#include <linux/delay.h>
21d34711 11#include <linux/errno.h>
1673f1f0 12#include <linux/hdreg.h>
21d34711 13#include <linux/kernel.h>
5bae7f73 14#include <linux/module.h>
958f2a0f 15#include <linux/backing-dev.h>
5bae7f73 16#include <linux/list_sort.h>
21d34711
CH
17#include <linux/slab.h>
18#include <linux/types.h>
1673f1f0
CH
19#include <linux/pr.h>
20#include <linux/ptrace.h>
21#include <linux/nvme_ioctl.h>
22#include <linux/t10-pi.h>
c5552fde 23#include <linux/pm_qos.h>
1673f1f0 24#include <asm/unaligned.h>
21d34711
CH
25
26#include "nvme.h"
038bd4cb 27#include "fabrics.h"
21d34711 28
35fe0d12
HR
29#define CREATE_TRACE_POINTS
30#include "trace.h"
31
f3ca80fc
CH
32#define NVME_MINORS (1U << MINORBITS)
33
8ae4e447
MO
34unsigned int admin_timeout = 60;
35module_param(admin_timeout, uint, 0644);
ba0ba7d3 36MODULE_PARM_DESC(admin_timeout, "timeout in seconds for admin commands");
576d55d6 37EXPORT_SYMBOL_GPL(admin_timeout);
ba0ba7d3 38
8ae4e447
MO
39unsigned int nvme_io_timeout = 30;
40module_param_named(io_timeout, nvme_io_timeout, uint, 0644);
ba0ba7d3 41MODULE_PARM_DESC(io_timeout, "timeout in seconds for I/O");
576d55d6 42EXPORT_SYMBOL_GPL(nvme_io_timeout);
ba0ba7d3 43
b3b1b0b0 44static unsigned char shutdown_timeout = 5;
ba0ba7d3
ML
45module_param(shutdown_timeout, byte, 0644);
46MODULE_PARM_DESC(shutdown_timeout, "timeout in seconds for controller shutdown");
47
44e44b29
CH
48static u8 nvme_max_retries = 5;
49module_param_named(max_retries, nvme_max_retries, byte, 0644);
f80ec966 50MODULE_PARM_DESC(max_retries, "max number of retries a command may have");
5bae7f73 51
9947d6a0 52static unsigned long default_ps_max_latency_us = 100000;
c5552fde
AL
53module_param(default_ps_max_latency_us, ulong, 0644);
54MODULE_PARM_DESC(default_ps_max_latency_us,
55 "max power saving latency for new devices; use PM QOS to change per device");
56
c35e30b4
AL
57static bool force_apst;
58module_param(force_apst, bool, 0644);
59MODULE_PARM_DESC(force_apst, "allow APST for newly enumerated devices even if quirked off");
60
f5d11840
JA
61static bool streams;
62module_param(streams, bool, 0644);
63MODULE_PARM_DESC(streams, "turn on support for Streams write directives");
64
b227c59b
RS
65/*
66 * nvme_wq - hosts nvme related works that are not reset or delete
67 * nvme_reset_wq - hosts nvme reset works
68 * nvme_delete_wq - hosts nvme delete works
69 *
97b2512a
NK
70 * nvme_wq will host works such as scan, aen handling, fw activation,
71 * keep-alive, periodic reconnects etc. nvme_reset_wq
b227c59b
RS
72 * runs reset works which also flush works hosted on nvme_wq for
73 * serialization purposes. nvme_delete_wq host controller deletion
74 * works which flush reset works for serialization.
75 */
9a6327d2
SG
76struct workqueue_struct *nvme_wq;
77EXPORT_SYMBOL_GPL(nvme_wq);
78
b227c59b
RS
79struct workqueue_struct *nvme_reset_wq;
80EXPORT_SYMBOL_GPL(nvme_reset_wq);
81
82struct workqueue_struct *nvme_delete_wq;
83EXPORT_SYMBOL_GPL(nvme_delete_wq);
84
ab9e00cc
CH
85static LIST_HEAD(nvme_subsystems);
86static DEFINE_MUTEX(nvme_subsystems_lock);
1673f1f0 87
9843f685 88static DEFINE_IDA(nvme_instance_ida);
a6a5149b 89static dev_t nvme_chr_devt;
f3ca80fc 90static struct class *nvme_class;
ab9e00cc 91static struct class *nvme_subsys_class;
f3ca80fc 92
84fef62d 93static int nvme_revalidate_disk(struct gendisk *disk);
12d9f070 94static void nvme_put_subsystem(struct nvme_subsystem *subsys);
cf39a6bc
SB
95static void nvme_remove_invalid_namespaces(struct nvme_ctrl *ctrl,
96 unsigned nsid);
97
98static void nvme_set_queue_dying(struct nvme_ns *ns)
99{
100 /*
101 * Revalidating a dead namespace sets capacity to 0. This will end
102 * buffered writers dirtying pages that can't be synced.
103 */
104 if (!ns->disk || test_and_set_bit(NVME_NS_DEAD, &ns->flags))
105 return;
cf39a6bc
SB
106 blk_set_queue_dying(ns->queue);
107 /* Forcibly unquiesce queues to avoid blocking dispatch */
108 blk_mq_unquiesce_queue(ns->queue);
b224726d
BS
109 /*
110 * Revalidate after unblocking dispatchers that may be holding bd_butex
111 */
112 revalidate_disk(ns->disk);
cf39a6bc 113}
f3ca80fc 114
50e8d8ee
CH
115static void nvme_queue_scan(struct nvme_ctrl *ctrl)
116{
117 /*
118 * Only new queue scan work when admin and IO queues are both alive
119 */
5d02a5c1 120 if (ctrl->state == NVME_CTRL_LIVE && ctrl->tagset)
50e8d8ee
CH
121 queue_work(nvme_wq, &ctrl->scan_work);
122}
123
4c75f877
KB
124/*
125 * Use this function to proceed with scheduling reset_work for a controller
126 * that had previously been set to the resetting state. This is intended for
127 * code paths that can't be interrupted by other reset attempts. A hot removal
128 * may prevent this from succeeding.
129 */
c1ac9a4b 130int nvme_try_sched_reset(struct nvme_ctrl *ctrl)
4c75f877
KB
131{
132 if (ctrl->state != NVME_CTRL_RESETTING)
133 return -EBUSY;
134 if (!queue_work(nvme_reset_wq, &ctrl->reset_work))
135 return -EBUSY;
136 return 0;
137}
c1ac9a4b 138EXPORT_SYMBOL_GPL(nvme_try_sched_reset);
4c75f877 139
d86c4d8e
CH
140int nvme_reset_ctrl(struct nvme_ctrl *ctrl)
141{
142 if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING))
143 return -EBUSY;
b227c59b 144 if (!queue_work(nvme_reset_wq, &ctrl->reset_work))
d86c4d8e
CH
145 return -EBUSY;
146 return 0;
147}
148EXPORT_SYMBOL_GPL(nvme_reset_ctrl);
149
79c48ccf 150int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl)
d86c4d8e
CH
151{
152 int ret;
153
154 ret = nvme_reset_ctrl(ctrl);
8000d1fd 155 if (!ret) {
d86c4d8e 156 flush_work(&ctrl->reset_work);
5d02a5c1 157 if (ctrl->state != NVME_CTRL_LIVE)
8000d1fd
NC
158 ret = -ENETRESET;
159 }
160
d86c4d8e
CH
161 return ret;
162}
79c48ccf 163EXPORT_SYMBOL_GPL(nvme_reset_ctrl_sync);
d86c4d8e 164
a686ed75 165static void nvme_do_delete_ctrl(struct nvme_ctrl *ctrl)
c5017e85 166{
77d0612d
MG
167 dev_info(ctrl->device,
168 "Removing ctrl: NQN \"%s\"\n", ctrl->opts->subsysnqn);
169
4054637c 170 flush_work(&ctrl->reset_work);
6cd53d14
CH
171 nvme_stop_ctrl(ctrl);
172 nvme_remove_namespaces(ctrl);
c5017e85 173 ctrl->ops->delete_ctrl(ctrl);
6cd53d14 174 nvme_uninit_ctrl(ctrl);
c5017e85
CH
175}
176
a686ed75
BVA
177static void nvme_delete_ctrl_work(struct work_struct *work)
178{
179 struct nvme_ctrl *ctrl =
180 container_of(work, struct nvme_ctrl, delete_work);
181
182 nvme_do_delete_ctrl(ctrl);
183}
184
c5017e85
CH
185int nvme_delete_ctrl(struct nvme_ctrl *ctrl)
186{
187 if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_DELETING))
188 return -EBUSY;
b227c59b 189 if (!queue_work(nvme_delete_wq, &ctrl->delete_work))
c5017e85
CH
190 return -EBUSY;
191 return 0;
192}
193EXPORT_SYMBOL_GPL(nvme_delete_ctrl);
194
6721c18a 195static void nvme_delete_ctrl_sync(struct nvme_ctrl *ctrl)
c5017e85 196{
c5017e85 197 /*
01fc08ff
YY
198 * Keep a reference until nvme_do_delete_ctrl() complete,
199 * since ->delete_ctrl can free the controller.
c5017e85
CH
200 */
201 nvme_get_ctrl(ctrl);
6721c18a 202 if (nvme_change_ctrl_state(ctrl, NVME_CTRL_DELETING))
b9c77583 203 nvme_do_delete_ctrl(ctrl);
c5017e85 204 nvme_put_ctrl(ctrl);
c5017e85 205}
c5017e85 206
715ea9e0
CH
207static inline bool nvme_ns_has_pi(struct nvme_ns *ns)
208{
209 return ns->pi_type && ns->ms == sizeof(struct t10_pi_tuple);
210}
211
2f9c1736 212static blk_status_t nvme_error_status(u16 status)
27fa9bc5 213{
2f9c1736 214 switch (status & 0x7ff) {
27fa9bc5 215 case NVME_SC_SUCCESS:
2a842aca 216 return BLK_STS_OK;
27fa9bc5 217 case NVME_SC_CAP_EXCEEDED:
2a842aca 218 return BLK_STS_NOSPC;
e96fef2c 219 case NVME_SC_LBA_RANGE:
35038bff
KB
220 case NVME_SC_CMD_INTERRUPTED:
221 case NVME_SC_NS_NOT_READY:
e96fef2c
KB
222 return BLK_STS_TARGET;
223 case NVME_SC_BAD_ATTRIBUTES:
e02ab023 224 case NVME_SC_ONCS_NOT_SUPPORTED:
e96fef2c
KB
225 case NVME_SC_INVALID_OPCODE:
226 case NVME_SC_INVALID_FIELD:
227 case NVME_SC_INVALID_NS:
2a842aca 228 return BLK_STS_NOTSUPP;
e02ab023
JG
229 case NVME_SC_WRITE_FAULT:
230 case NVME_SC_READ_ERROR:
231 case NVME_SC_UNWRITTEN_BLOCK:
a751da33
CH
232 case NVME_SC_ACCESS_DENIED:
233 case NVME_SC_READ_ONLY:
e96fef2c 234 case NVME_SC_COMPARE_FAILED:
2a842aca 235 return BLK_STS_MEDIUM;
a751da33
CH
236 case NVME_SC_GUARD_CHECK:
237 case NVME_SC_APPTAG_CHECK:
238 case NVME_SC_REFTAG_CHECK:
239 case NVME_SC_INVALID_PI:
240 return BLK_STS_PROTECTION;
241 case NVME_SC_RESERVATION_CONFLICT:
242 return BLK_STS_NEXUS;
1c0d12c0
SG
243 case NVME_SC_HOST_PATH_ERROR:
244 return BLK_STS_TRANSPORT;
2a842aca
CH
245 default:
246 return BLK_STS_IOERR;
27fa9bc5
CH
247 }
248}
27fa9bc5 249
f6324b1b 250static inline bool nvme_req_needs_retry(struct request *req)
77f02a7a 251{
f6324b1b
CH
252 if (blk_noretry_request(req))
253 return false;
27fa9bc5 254 if (nvme_req(req)->status & NVME_SC_DNR)
f6324b1b 255 return false;
44e44b29 256 if (nvme_req(req)->retries >= nvme_max_retries)
f6324b1b
CH
257 return false;
258 return true;
77f02a7a
CH
259}
260
49cd84b6
KB
261static void nvme_retry_req(struct request *req)
262{
263 struct nvme_ns *ns = req->q->queuedata;
264 unsigned long delay = 0;
265 u16 crd;
266
267 /* The mask and shift result must be <= 3 */
268 crd = (nvme_req(req)->status & NVME_SC_CRD) >> 11;
269 if (ns && crd)
270 delay = ns->ctrl->crdt[crd - 1] * 100;
271
272 nvme_req(req)->retries++;
273 blk_mq_requeue_request(req, false);
274 blk_mq_delay_kick_requeue_list(req->q, delay);
275}
276
77f02a7a
CH
277void nvme_complete_rq(struct request *req)
278{
2f9c1736 279 blk_status_t status = nvme_error_status(nvme_req(req)->status);
908e4564 280
ca5554a6
JT
281 trace_nvme_complete_rq(req);
282
16686f3a
MG
283 nvme_cleanup_cmd(req);
284
6e3ca03e
SG
285 if (nvme_req(req)->ctrl->kas)
286 nvme_req(req)->ctrl->comp_seen = true;
287
908e4564 288 if (unlikely(status != BLK_STS_OK && nvme_req_needs_retry(req))) {
764e9332 289 if ((req->cmd_flags & REQ_NVME_MPATH) && nvme_failover_req(req))
32acab31 290 return;
32acab31
CH
291
292 if (!blk_queue_dying(req->q)) {
49cd84b6 293 nvme_retry_req(req);
32acab31
CH
294 return;
295 }
77f02a7a 296 }
35fe0d12
HR
297
298 nvme_trace_bio_complete(req, status);
908e4564 299 blk_mq_end_request(req, status);
77f02a7a
CH
300}
301EXPORT_SYMBOL_GPL(nvme_complete_rq);
302
7baa8572 303bool nvme_cancel_request(struct request *req, void *data, bool reserved)
c55a2fd4 304{
c55a2fd4
ML
305 dev_dbg_ratelimited(((struct nvme_ctrl *) data)->device,
306 "Cancelling I/O %d", req->tag);
307
78ca4072
ML
308 /* don't abort one completed request */
309 if (blk_mq_request_completed(req))
310 return true;
311
2dc3947b 312 nvme_req(req)->status = NVME_SC_HOST_ABORTED_CMD;
a87ccce0 313 blk_mq_complete_request(req);
7baa8572 314 return true;
c55a2fd4
ML
315}
316EXPORT_SYMBOL_GPL(nvme_cancel_request);
317
bb8d261e
CH
318bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
319 enum nvme_ctrl_state new_state)
320{
f6b6a28e 321 enum nvme_ctrl_state old_state;
0a72bbba 322 unsigned long flags;
bb8d261e
CH
323 bool changed = false;
324
0a72bbba 325 spin_lock_irqsave(&ctrl->lock, flags);
f6b6a28e
GKB
326
327 old_state = ctrl->state;
bb8d261e
CH
328 switch (new_state) {
329 case NVME_CTRL_LIVE:
330 switch (old_state) {
7d2e8008 331 case NVME_CTRL_NEW:
bb8d261e 332 case NVME_CTRL_RESETTING:
ad6a0a52 333 case NVME_CTRL_CONNECTING:
bb8d261e
CH
334 changed = true;
335 /* FALLTHRU */
336 default:
337 break;
338 }
339 break;
340 case NVME_CTRL_RESETTING:
341 switch (old_state) {
342 case NVME_CTRL_NEW:
def61eca 343 case NVME_CTRL_LIVE:
def61eca
CH
344 changed = true;
345 /* FALLTHRU */
346 default:
347 break;
348 }
349 break;
ad6a0a52 350 case NVME_CTRL_CONNECTING:
def61eca 351 switch (old_state) {
b754a32c 352 case NVME_CTRL_NEW:
3cec7f9d 353 case NVME_CTRL_RESETTING:
bb8d261e
CH
354 changed = true;
355 /* FALLTHRU */
356 default:
357 break;
358 }
359 break;
360 case NVME_CTRL_DELETING:
361 switch (old_state) {
362 case NVME_CTRL_LIVE:
363 case NVME_CTRL_RESETTING:
ad6a0a52 364 case NVME_CTRL_CONNECTING:
bb8d261e
CH
365 changed = true;
366 /* FALLTHRU */
367 default:
368 break;
369 }
370 break;
0ff9d4e1
KB
371 case NVME_CTRL_DEAD:
372 switch (old_state) {
373 case NVME_CTRL_DELETING:
374 changed = true;
375 /* FALLTHRU */
376 default:
377 break;
378 }
379 break;
bb8d261e
CH
380 default:
381 break;
382 }
bb8d261e 383
c1ac9a4b 384 if (changed) {
bb8d261e 385 ctrl->state = new_state;
c1ac9a4b
KB
386 wake_up_all(&ctrl->state_wq);
387 }
bb8d261e 388
0a72bbba 389 spin_unlock_irqrestore(&ctrl->lock, flags);
32acab31
CH
390 if (changed && ctrl->state == NVME_CTRL_LIVE)
391 nvme_kick_requeue_lists(ctrl);
bb8d261e
CH
392 return changed;
393}
394EXPORT_SYMBOL_GPL(nvme_change_ctrl_state);
395
c1ac9a4b
KB
396/*
397 * Returns true for sink states that can't ever transition back to live.
398 */
399static bool nvme_state_terminal(struct nvme_ctrl *ctrl)
400{
401 switch (ctrl->state) {
402 case NVME_CTRL_NEW:
403 case NVME_CTRL_LIVE:
404 case NVME_CTRL_RESETTING:
405 case NVME_CTRL_CONNECTING:
406 return false;
407 case NVME_CTRL_DELETING:
408 case NVME_CTRL_DEAD:
409 return true;
410 default:
411 WARN_ONCE(1, "Unhandled ctrl state:%d", ctrl->state);
412 return true;
413 }
414}
415
416/*
417 * Waits for the controller state to be resetting, or returns false if it is
418 * not possible to ever transition to that state.
419 */
420bool nvme_wait_reset(struct nvme_ctrl *ctrl)
421{
422 wait_event(ctrl->state_wq,
423 nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING) ||
424 nvme_state_terminal(ctrl));
425 return ctrl->state == NVME_CTRL_RESETTING;
426}
427EXPORT_SYMBOL_GPL(nvme_wait_reset);
428
ed754e5d
CH
429static void nvme_free_ns_head(struct kref *ref)
430{
431 struct nvme_ns_head *head =
432 container_of(ref, struct nvme_ns_head, ref);
433
32acab31 434 nvme_mpath_remove_disk(head);
ed754e5d
CH
435 ida_simple_remove(&head->subsys->ns_ida, head->instance);
436 list_del_init(&head->entry);
f5ad3991 437 cleanup_srcu_struct(&head->srcu);
12d9f070 438 nvme_put_subsystem(head->subsys);
ed754e5d
CH
439 kfree(head);
440}
441
442static void nvme_put_ns_head(struct nvme_ns_head *head)
443{
444 kref_put(&head->ref, nvme_free_ns_head);
445}
446
1673f1f0
CH
447static void nvme_free_ns(struct kref *kref)
448{
449 struct nvme_ns *ns = container_of(kref, struct nvme_ns, kref);
450
b0b4e09c
MB
451 if (ns->ndev)
452 nvme_nvm_unregister(ns);
1673f1f0 453
1673f1f0 454 put_disk(ns->disk);
ed754e5d 455 nvme_put_ns_head(ns->head);
075790eb 456 nvme_put_ctrl(ns->ctrl);
1673f1f0
CH
457 kfree(ns);
458}
459
5bae7f73 460static void nvme_put_ns(struct nvme_ns *ns)
1673f1f0
CH
461{
462 kref_put(&ns->kref, nvme_free_ns);
463}
464
bb06ec31
JS
465static inline void nvme_clear_nvme_request(struct request *req)
466{
467 if (!(req->rq_flags & RQF_DONTPREP)) {
468 nvme_req(req)->retries = 0;
469 nvme_req(req)->flags = 0;
470 req->rq_flags |= RQF_DONTPREP;
471 }
472}
473
4160982e 474struct request *nvme_alloc_request(struct request_queue *q,
9a95e4ef 475 struct nvme_command *cmd, blk_mq_req_flags_t flags, int qid)
21d34711 476{
aebf526b 477 unsigned op = nvme_is_write(cmd) ? REQ_OP_DRV_OUT : REQ_OP_DRV_IN;
21d34711 478 struct request *req;
21d34711 479
eb71f435 480 if (qid == NVME_QID_ANY) {
aebf526b 481 req = blk_mq_alloc_request(q, op, flags);
eb71f435 482 } else {
aebf526b 483 req = blk_mq_alloc_request_hctx(q, op, flags,
eb71f435
CH
484 qid ? qid - 1 : 0);
485 }
21d34711 486 if (IS_ERR(req))
4160982e 487 return req;
21d34711 488
21d34711 489 req->cmd_flags |= REQ_FAILFAST_DRIVER;
bb06ec31 490 nvme_clear_nvme_request(req);
d49187e9 491 nvme_req(req)->cmd = cmd;
21d34711 492
4160982e
CH
493 return req;
494}
576d55d6 495EXPORT_SYMBOL_GPL(nvme_alloc_request);
4160982e 496
f5d11840
JA
497static int nvme_toggle_streams(struct nvme_ctrl *ctrl, bool enable)
498{
499 struct nvme_command c;
500
501 memset(&c, 0, sizeof(c));
502
503 c.directive.opcode = nvme_admin_directive_send;
62346eae 504 c.directive.nsid = cpu_to_le32(NVME_NSID_ALL);
f5d11840
JA
505 c.directive.doper = NVME_DIR_SND_ID_OP_ENABLE;
506 c.directive.dtype = NVME_DIR_IDENTIFY;
507 c.directive.tdtype = NVME_DIR_STREAMS;
508 c.directive.endir = enable ? NVME_DIR_ENDIR : 0;
509
510 return nvme_submit_sync_cmd(ctrl->admin_q, &c, NULL, 0);
511}
512
513static int nvme_disable_streams(struct nvme_ctrl *ctrl)
514{
515 return nvme_toggle_streams(ctrl, false);
516}
517
518static int nvme_enable_streams(struct nvme_ctrl *ctrl)
519{
520 return nvme_toggle_streams(ctrl, true);
521}
522
523static int nvme_get_stream_params(struct nvme_ctrl *ctrl,
524 struct streams_directive_params *s, u32 nsid)
525{
526 struct nvme_command c;
527
528 memset(&c, 0, sizeof(c));
529 memset(s, 0, sizeof(*s));
530
531 c.directive.opcode = nvme_admin_directive_recv;
532 c.directive.nsid = cpu_to_le32(nsid);
a082b426 533 c.directive.numd = cpu_to_le32((sizeof(*s) >> 2) - 1);
f5d11840
JA
534 c.directive.doper = NVME_DIR_RCV_ST_OP_PARAM;
535 c.directive.dtype = NVME_DIR_STREAMS;
536
537 return nvme_submit_sync_cmd(ctrl->admin_q, &c, s, sizeof(*s));
538}
539
540static int nvme_configure_directives(struct nvme_ctrl *ctrl)
541{
542 struct streams_directive_params s;
543 int ret;
544
545 if (!(ctrl->oacs & NVME_CTRL_OACS_DIRECTIVES))
546 return 0;
547 if (!streams)
548 return 0;
549
550 ret = nvme_enable_streams(ctrl);
551 if (ret)
552 return ret;
553
62346eae 554 ret = nvme_get_stream_params(ctrl, &s, NVME_NSID_ALL);
f5d11840
JA
555 if (ret)
556 return ret;
557
558 ctrl->nssa = le16_to_cpu(s.nssa);
559 if (ctrl->nssa < BLK_MAX_WRITE_HINTS - 1) {
560 dev_info(ctrl->device, "too few streams (%u) available\n",
561 ctrl->nssa);
562 nvme_disable_streams(ctrl);
563 return 0;
564 }
565
566 ctrl->nr_streams = min_t(unsigned, ctrl->nssa, BLK_MAX_WRITE_HINTS - 1);
567 dev_info(ctrl->device, "Using %u streams\n", ctrl->nr_streams);
568 return 0;
569}
570
571/*
572 * Check if 'req' has a write hint associated with it. If it does, assign
573 * a valid namespace stream to the write.
574 */
575static void nvme_assign_write_stream(struct nvme_ctrl *ctrl,
576 struct request *req, u16 *control,
577 u32 *dsmgmt)
578{
579 enum rw_hint streamid = req->write_hint;
580
581 if (streamid == WRITE_LIFE_NOT_SET || streamid == WRITE_LIFE_NONE)
582 streamid = 0;
583 else {
584 streamid--;
585 if (WARN_ON_ONCE(streamid > ctrl->nr_streams))
586 return;
587
588 *control |= NVME_RW_DTYPE_STREAMS;
589 *dsmgmt |= streamid << 16;
590 }
591
592 if (streamid < ARRAY_SIZE(req->q->write_hints))
593 req->q->write_hints[streamid] += blk_rq_bytes(req) >> 9;
594}
595
8093f7ca
ML
596static inline void nvme_setup_flush(struct nvme_ns *ns,
597 struct nvme_command *cmnd)
598{
8093f7ca 599 cmnd->common.opcode = nvme_cmd_flush;
ed754e5d 600 cmnd->common.nsid = cpu_to_le32(ns->head->ns_id);
8093f7ca
ML
601}
602
fc17b653 603static blk_status_t nvme_setup_discard(struct nvme_ns *ns, struct request *req,
8093f7ca
ML
604 struct nvme_command *cmnd)
605{
b35ba01e 606 unsigned short segments = blk_rq_nr_discard_segments(req), n = 0;
8093f7ca 607 struct nvme_dsm_range *range;
b35ba01e 608 struct bio *bio;
8093f7ca 609
530436c4
EH
610 /*
611 * Some devices do not consider the DSM 'Number of Ranges' field when
612 * determining how much data to DMA. Always allocate memory for maximum
613 * number of segments to prevent device reading beyond end of buffer.
614 */
615 static const size_t alloc_size = sizeof(*range) * NVME_DSM_MAX_RANGES;
616
617 range = kzalloc(alloc_size, GFP_ATOMIC | __GFP_NOWARN);
cb5b7262
JA
618 if (!range) {
619 /*
620 * If we fail allocation our range, fallback to the controller
621 * discard page. If that's also busy, it's safe to return
622 * busy, as we know we can make progress once that's freed.
623 */
624 if (test_and_set_bit_lock(0, &ns->ctrl->discard_page_busy))
625 return BLK_STS_RESOURCE;
626
627 range = page_address(ns->ctrl->discard_page);
628 }
8093f7ca 629
b35ba01e 630 __rq_for_each_bio(bio, req) {
314d48dd 631 u64 slba = nvme_sect_to_lba(ns, bio->bi_iter.bi_sector);
b35ba01e
CH
632 u32 nlb = bio->bi_iter.bi_size >> ns->lba_shift;
633
8cb6af7b
KB
634 if (n < segments) {
635 range[n].cattr = cpu_to_le32(0);
636 range[n].nlb = cpu_to_le32(nlb);
637 range[n].slba = cpu_to_le64(slba);
638 }
b35ba01e
CH
639 n++;
640 }
641
642 if (WARN_ON_ONCE(n != segments)) {
cb5b7262
JA
643 if (virt_to_page(range) == ns->ctrl->discard_page)
644 clear_bit_unlock(0, &ns->ctrl->discard_page_busy);
645 else
646 kfree(range);
fc17b653 647 return BLK_STS_IOERR;
b35ba01e 648 }
8093f7ca 649
8093f7ca 650 cmnd->dsm.opcode = nvme_cmd_dsm;
ed754e5d 651 cmnd->dsm.nsid = cpu_to_le32(ns->head->ns_id);
f1dd03a8 652 cmnd->dsm.nr = cpu_to_le32(segments - 1);
8093f7ca
ML
653 cmnd->dsm.attributes = cpu_to_le32(NVME_DSMGMT_AD);
654
f9d03f96
CH
655 req->special_vec.bv_page = virt_to_page(range);
656 req->special_vec.bv_offset = offset_in_page(range);
530436c4 657 req->special_vec.bv_len = alloc_size;
f9d03f96 658 req->rq_flags |= RQF_SPECIAL_PAYLOAD;
8093f7ca 659
fc17b653 660 return BLK_STS_OK;
8093f7ca 661}
8093f7ca 662
6e02318e
CK
663static inline blk_status_t nvme_setup_write_zeroes(struct nvme_ns *ns,
664 struct request *req, struct nvme_command *cmnd)
665{
666 if (ns->ctrl->quirks & NVME_QUIRK_DEALLOCATE_ZEROES)
667 return nvme_setup_discard(ns, req, cmnd);
668
669 cmnd->write_zeroes.opcode = nvme_cmd_write_zeroes;
670 cmnd->write_zeroes.nsid = cpu_to_le32(ns->head->ns_id);
671 cmnd->write_zeroes.slba =
314d48dd 672 cpu_to_le64(nvme_sect_to_lba(ns, blk_rq_pos(req)));
6e02318e
CK
673 cmnd->write_zeroes.length =
674 cpu_to_le16((blk_rq_bytes(req) >> ns->lba_shift) - 1);
675 cmnd->write_zeroes.control = 0;
676 return BLK_STS_OK;
677}
678
ebe6d874
CH
679static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns,
680 struct request *req, struct nvme_command *cmnd)
8093f7ca 681{
f5d11840 682 struct nvme_ctrl *ctrl = ns->ctrl;
8093f7ca
ML
683 u16 control = 0;
684 u32 dsmgmt = 0;
685
686 if (req->cmd_flags & REQ_FUA)
687 control |= NVME_RW_FUA;
688 if (req->cmd_flags & (REQ_FAILFAST_DEV | REQ_RAHEAD))
689 control |= NVME_RW_LR;
690
691 if (req->cmd_flags & REQ_RAHEAD)
692 dsmgmt |= NVME_RW_DSM_FREQ_PREFETCH;
693
8093f7ca 694 cmnd->rw.opcode = (rq_data_dir(req) ? nvme_cmd_write : nvme_cmd_read);
ed754e5d 695 cmnd->rw.nsid = cpu_to_le32(ns->head->ns_id);
314d48dd 696 cmnd->rw.slba = cpu_to_le64(nvme_sect_to_lba(ns, blk_rq_pos(req)));
8093f7ca
ML
697 cmnd->rw.length = cpu_to_le16((blk_rq_bytes(req) >> ns->lba_shift) - 1);
698
f5d11840
JA
699 if (req_op(req) == REQ_OP_WRITE && ctrl->nr_streams)
700 nvme_assign_write_stream(ctrl, req, &control, &dsmgmt);
701
8093f7ca 702 if (ns->ms) {
715ea9e0
CH
703 /*
704 * If formated with metadata, the block layer always provides a
705 * metadata buffer if CONFIG_BLK_DEV_INTEGRITY is enabled. Else
706 * we enable the PRACT bit for protection information or set the
707 * namespace capacity to zero to prevent any I/O.
708 */
709 if (!blk_integrity_rq(req)) {
710 if (WARN_ON_ONCE(!nvme_ns_has_pi(ns)))
711 return BLK_STS_NOTSUPP;
712 control |= NVME_RW_PRINFO_PRACT;
713 }
714
8093f7ca
ML
715 switch (ns->pi_type) {
716 case NVME_NS_DPS_PI_TYPE3:
717 control |= NVME_RW_PRINFO_PRCHK_GUARD;
718 break;
719 case NVME_NS_DPS_PI_TYPE1:
720 case NVME_NS_DPS_PI_TYPE2:
721 control |= NVME_RW_PRINFO_PRCHK_GUARD |
722 NVME_RW_PRINFO_PRCHK_REF;
ddd0bc75 723 cmnd->rw.reftag = cpu_to_le32(t10_pi_ref_tag(req));
8093f7ca
ML
724 break;
725 }
8093f7ca
ML
726 }
727
728 cmnd->rw.control = cpu_to_le16(control);
729 cmnd->rw.dsmgmt = cpu_to_le32(dsmgmt);
ebe6d874 730 return 0;
8093f7ca
ML
731}
732
f7f1fc36
MG
733void nvme_cleanup_cmd(struct request *req)
734{
f7f1fc36 735 if (req->rq_flags & RQF_SPECIAL_PAYLOAD) {
cb5b7262
JA
736 struct nvme_ns *ns = req->rq_disk->private_data;
737 struct page *page = req->special_vec.bv_page;
738
739 if (page == ns->ctrl->discard_page)
740 clear_bit_unlock(0, &ns->ctrl->discard_page_busy);
741 else
742 kfree(page_address(page) + req->special_vec.bv_offset);
f7f1fc36
MG
743 }
744}
745EXPORT_SYMBOL_GPL(nvme_cleanup_cmd);
746
fc17b653 747blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req,
8093f7ca
ML
748 struct nvme_command *cmd)
749{
fc17b653 750 blk_status_t ret = BLK_STS_OK;
8093f7ca 751
bb06ec31 752 nvme_clear_nvme_request(req);
987f699a 753
11902035 754 memset(cmd, 0, sizeof(*cmd));
aebf526b
CH
755 switch (req_op(req)) {
756 case REQ_OP_DRV_IN:
757 case REQ_OP_DRV_OUT:
d49187e9 758 memcpy(cmd, nvme_req(req)->cmd, sizeof(*cmd));
aebf526b
CH
759 break;
760 case REQ_OP_FLUSH:
8093f7ca 761 nvme_setup_flush(ns, cmd);
aebf526b 762 break;
e850fd16 763 case REQ_OP_WRITE_ZEROES:
6e02318e
CK
764 ret = nvme_setup_write_zeroes(ns, req, cmd);
765 break;
aebf526b 766 case REQ_OP_DISCARD:
8093f7ca 767 ret = nvme_setup_discard(ns, req, cmd);
aebf526b
CH
768 break;
769 case REQ_OP_READ:
770 case REQ_OP_WRITE:
ebe6d874 771 ret = nvme_setup_rw(ns, req, cmd);
aebf526b
CH
772 break;
773 default:
774 WARN_ON_ONCE(1);
fc17b653 775 return BLK_STS_IOERR;
aebf526b 776 }
8093f7ca 777
721b3917 778 cmd->common.command_id = req->tag;
5d87eb94 779 trace_nvme_setup_cmd(req, cmd);
8093f7ca
ML
780 return ret;
781}
782EXPORT_SYMBOL_GPL(nvme_setup_cmd);
783
6287b51c
SG
784static void nvme_end_sync_rq(struct request *rq, blk_status_t error)
785{
786 struct completion *waiting = rq->end_io_data;
787
788 rq->end_io_data = NULL;
789 complete(waiting);
790}
791
792static void nvme_execute_rq_polled(struct request_queue *q,
793 struct gendisk *bd_disk, struct request *rq, int at_head)
794{
795 DECLARE_COMPLETION_ONSTACK(wait);
796
797 WARN_ON_ONCE(!test_bit(QUEUE_FLAG_POLL, &q->queue_flags));
798
799 rq->cmd_flags |= REQ_HIPRI;
800 rq->end_io_data = &wait;
801 blk_execute_rq_nowait(q, bd_disk, rq, at_head, nvme_end_sync_rq);
802
803 while (!completion_done(&wait)) {
804 blk_poll(q, request_to_qc_t(rq->mq_hctx, rq), true);
805 cond_resched();
806 }
807}
808
4160982e
CH
809/*
810 * Returns 0 on success. If the result is negative, it's a Linux error code;
811 * if the result is positive, it's an NVM Express status code
812 */
813int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
d49187e9 814 union nvme_result *result, void *buffer, unsigned bufflen,
9a95e4ef 815 unsigned timeout, int qid, int at_head,
6287b51c 816 blk_mq_req_flags_t flags, bool poll)
4160982e
CH
817{
818 struct request *req;
819 int ret;
820
eb71f435 821 req = nvme_alloc_request(q, cmd, flags, qid);
4160982e
CH
822 if (IS_ERR(req))
823 return PTR_ERR(req);
824
825 req->timeout = timeout ? timeout : ADMIN_TIMEOUT;
826
21d34711
CH
827 if (buffer && bufflen) {
828 ret = blk_rq_map_kern(q, req, buffer, bufflen, GFP_KERNEL);
829 if (ret)
830 goto out;
4160982e
CH
831 }
832
6287b51c
SG
833 if (poll)
834 nvme_execute_rq_polled(req->q, NULL, req, at_head);
835 else
836 blk_execute_rq(req->q, NULL, req, at_head);
d49187e9
CH
837 if (result)
838 *result = nvme_req(req)->result;
27fa9bc5
CH
839 if (nvme_req(req)->flags & NVME_REQ_CANCELLED)
840 ret = -EINTR;
841 else
842 ret = nvme_req(req)->status;
4160982e
CH
843 out:
844 blk_mq_free_request(req);
845 return ret;
846}
eb71f435 847EXPORT_SYMBOL_GPL(__nvme_submit_sync_cmd);
4160982e
CH
848
849int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
850 void *buffer, unsigned bufflen)
851{
eb71f435 852 return __nvme_submit_sync_cmd(q, cmd, NULL, buffer, bufflen, 0,
6287b51c 853 NVME_QID_ANY, 0, 0, false);
4160982e 854}
576d55d6 855EXPORT_SYMBOL_GPL(nvme_submit_sync_cmd);
4160982e 856
1cad6562
CH
857static void *nvme_add_user_metadata(struct bio *bio, void __user *ubuf,
858 unsigned len, u32 seed, bool write)
859{
860 struct bio_integrity_payload *bip;
861 int ret = -ENOMEM;
862 void *buf;
863
864 buf = kmalloc(len, GFP_KERNEL);
865 if (!buf)
866 goto out;
867
868 ret = -EFAULT;
869 if (write && copy_from_user(buf, ubuf, len))
870 goto out_free_meta;
871
872 bip = bio_integrity_alloc(bio, GFP_KERNEL, 1);
873 if (IS_ERR(bip)) {
874 ret = PTR_ERR(bip);
875 goto out_free_meta;
876 }
877
878 bip->bip_iter.bi_size = len;
879 bip->bip_iter.bi_sector = seed;
880 ret = bio_integrity_add_page(bio, virt_to_page(buf), len,
881 offset_in_page(buf));
882 if (ret == len)
883 return buf;
884 ret = -ENOMEM;
885out_free_meta:
886 kfree(buf);
887out:
888 return ERR_PTR(ret);
889}
890
63263d60 891static int nvme_submit_user_cmd(struct request_queue *q,
485783ca
KB
892 struct nvme_command *cmd, void __user *ubuffer,
893 unsigned bufflen, void __user *meta_buffer, unsigned meta_len,
65e68edc 894 u32 meta_seed, u64 *result, unsigned timeout)
4160982e 895{
7a5abb4b 896 bool write = nvme_is_write(cmd);
0b7f1f26
KB
897 struct nvme_ns *ns = q->queuedata;
898 struct gendisk *disk = ns ? ns->disk : NULL;
4160982e 899 struct request *req;
0b7f1f26
KB
900 struct bio *bio = NULL;
901 void *meta = NULL;
4160982e
CH
902 int ret;
903
eb71f435 904 req = nvme_alloc_request(q, cmd, 0, NVME_QID_ANY);
4160982e
CH
905 if (IS_ERR(req))
906 return PTR_ERR(req);
907
908 req->timeout = timeout ? timeout : ADMIN_TIMEOUT;
bb06ec31 909 nvme_req(req)->flags |= NVME_REQ_USERCMD;
4160982e
CH
910
911 if (ubuffer && bufflen) {
21d34711
CH
912 ret = blk_rq_map_user(q, req, NULL, ubuffer, bufflen,
913 GFP_KERNEL);
914 if (ret)
915 goto out;
916 bio = req->bio;
74d46992 917 bio->bi_disk = disk;
1cad6562
CH
918 if (disk && meta_buffer && meta_len) {
919 meta = nvme_add_user_metadata(bio, meta_buffer, meta_len,
920 meta_seed, write);
921 if (IS_ERR(meta)) {
922 ret = PTR_ERR(meta);
0b7f1f26
KB
923 goto out_unmap;
924 }
f31a2110 925 req->cmd_flags |= REQ_INTEGRITY;
0b7f1f26
KB
926 }
927 }
1cad6562 928
0b7f1f26 929 blk_execute_rq(req->q, disk, req, 0);
27fa9bc5
CH
930 if (nvme_req(req)->flags & NVME_REQ_CANCELLED)
931 ret = -EINTR;
932 else
933 ret = nvme_req(req)->status;
21d34711 934 if (result)
65e68edc 935 *result = le64_to_cpu(nvme_req(req)->result.u64);
0b7f1f26
KB
936 if (meta && !ret && !write) {
937 if (copy_to_user(meta_buffer, meta, meta_len))
938 ret = -EFAULT;
939 }
0b7f1f26
KB
940 kfree(meta);
941 out_unmap:
74d46992 942 if (bio)
0b7f1f26 943 blk_rq_unmap_user(bio);
21d34711
CH
944 out:
945 blk_mq_free_request(req);
946 return ret;
947}
948
2a842aca 949static void nvme_keep_alive_end_io(struct request *rq, blk_status_t status)
038bd4cb
SG
950{
951 struct nvme_ctrl *ctrl = rq->end_io_data;
86880d64
JS
952 unsigned long flags;
953 bool startka = false;
038bd4cb
SG
954
955 blk_mq_free_request(rq);
956
2a842aca 957 if (status) {
038bd4cb 958 dev_err(ctrl->device,
2a842aca
CH
959 "failed nvme_keep_alive_end_io error=%d\n",
960 status);
038bd4cb
SG
961 return;
962 }
963
6e3ca03e 964 ctrl->comp_seen = false;
86880d64
JS
965 spin_lock_irqsave(&ctrl->lock, flags);
966 if (ctrl->state == NVME_CTRL_LIVE ||
967 ctrl->state == NVME_CTRL_CONNECTING)
968 startka = true;
969 spin_unlock_irqrestore(&ctrl->lock, flags);
970 if (startka)
97b2512a 971 queue_delayed_work(nvme_wq, &ctrl->ka_work, ctrl->kato * HZ);
038bd4cb
SG
972}
973
974static int nvme_keep_alive(struct nvme_ctrl *ctrl)
975{
038bd4cb
SG
976 struct request *rq;
977
0a34e466 978 rq = nvme_alloc_request(ctrl->admin_q, &ctrl->ka_cmd, BLK_MQ_REQ_RESERVED,
038bd4cb
SG
979 NVME_QID_ANY);
980 if (IS_ERR(rq))
981 return PTR_ERR(rq);
982
983 rq->timeout = ctrl->kato * HZ;
984 rq->end_io_data = ctrl;
985
986 blk_execute_rq_nowait(rq->q, NULL, rq, 0, nvme_keep_alive_end_io);
987
988 return 0;
989}
990
991static void nvme_keep_alive_work(struct work_struct *work)
992{
993 struct nvme_ctrl *ctrl = container_of(to_delayed_work(work),
994 struct nvme_ctrl, ka_work);
6e3ca03e
SG
995 bool comp_seen = ctrl->comp_seen;
996
997 if ((ctrl->ctratt & NVME_CTRL_ATTR_TBKAS) && comp_seen) {
998 dev_dbg(ctrl->device,
999 "reschedule traffic based keep-alive timer\n");
1000 ctrl->comp_seen = false;
97b2512a 1001 queue_delayed_work(nvme_wq, &ctrl->ka_work, ctrl->kato * HZ);
6e3ca03e
SG
1002 return;
1003 }
038bd4cb
SG
1004
1005 if (nvme_keep_alive(ctrl)) {
1006 /* allocation failure, reset the controller */
1007 dev_err(ctrl->device, "keep-alive failed\n");
39bdc590 1008 nvme_reset_ctrl(ctrl);
038bd4cb
SG
1009 return;
1010 }
1011}
1012
00b683db 1013static void nvme_start_keep_alive(struct nvme_ctrl *ctrl)
038bd4cb
SG
1014{
1015 if (unlikely(ctrl->kato == 0))
1016 return;
1017
97b2512a 1018 queue_delayed_work(nvme_wq, &ctrl->ka_work, ctrl->kato * HZ);
038bd4cb 1019}
038bd4cb
SG
1020
1021void nvme_stop_keep_alive(struct nvme_ctrl *ctrl)
1022{
1023 if (unlikely(ctrl->kato == 0))
1024 return;
1025
1026 cancel_delayed_work_sync(&ctrl->ka_work);
1027}
1028EXPORT_SYMBOL_GPL(nvme_stop_keep_alive);
1029
3f7f25a9 1030static int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id)
21d34711
CH
1031{
1032 struct nvme_command c = { };
1033 int error;
1034
1035 /* gcc-4.4.4 (at least) has issues with initializers and anon unions */
1036 c.identify.opcode = nvme_admin_identify;
986994a2 1037 c.identify.cns = NVME_ID_CNS_CTRL;
21d34711
CH
1038
1039 *id = kmalloc(sizeof(struct nvme_id_ctrl), GFP_KERNEL);
1040 if (!*id)
1041 return -ENOMEM;
1042
1043 error = nvme_submit_sync_cmd(dev->admin_q, &c, *id,
1044 sizeof(struct nvme_id_ctrl));
1045 if (error)
1046 kfree(*id);
1047 return error;
1048}
1049
ad95a613
CK
1050static int nvme_process_ns_desc(struct nvme_ctrl *ctrl, struct nvme_ns_ids *ids,
1051 struct nvme_ns_id_desc *cur)
1052{
1053 const char *warn_str = "ctrl returned bogus length:";
1054 void *data = cur;
1055
1056 switch (cur->nidt) {
1057 case NVME_NIDT_EUI64:
1058 if (cur->nidl != NVME_NIDT_EUI64_LEN) {
1059 dev_warn(ctrl->device, "%s %d for NVME_NIDT_EUI64\n",
1060 warn_str, cur->nidl);
1061 return -1;
1062 }
1063 memcpy(ids->eui64, data + sizeof(*cur), NVME_NIDT_EUI64_LEN);
1064 return NVME_NIDT_EUI64_LEN;
1065 case NVME_NIDT_NGUID:
1066 if (cur->nidl != NVME_NIDT_NGUID_LEN) {
1067 dev_warn(ctrl->device, "%s %d for NVME_NIDT_NGUID\n",
1068 warn_str, cur->nidl);
1069 return -1;
1070 }
1071 memcpy(ids->nguid, data + sizeof(*cur), NVME_NIDT_NGUID_LEN);
1072 return NVME_NIDT_NGUID_LEN;
1073 case NVME_NIDT_UUID:
1074 if (cur->nidl != NVME_NIDT_UUID_LEN) {
1075 dev_warn(ctrl->device, "%s %d for NVME_NIDT_UUID\n",
1076 warn_str, cur->nidl);
1077 return -1;
1078 }
1079 uuid_copy(&ids->uuid, data + sizeof(*cur));
1080 return NVME_NIDT_UUID_LEN;
1081 default:
1082 /* Skip unknown types */
1083 return cur->nidl;
1084 }
1085}
1086
cdbff4f2 1087static int nvme_identify_ns_descs(struct nvme_ctrl *ctrl, unsigned nsid,
002fab04 1088 struct nvme_ns_ids *ids)
3b22ba26
JT
1089{
1090 struct nvme_command c = { };
1091 int status;
1092 void *data;
1093 int pos;
1094 int len;
1095
1096 c.identify.opcode = nvme_admin_identify;
1097 c.identify.nsid = cpu_to_le32(nsid);
1098 c.identify.cns = NVME_ID_CNS_NS_DESC_LIST;
1099
1100 data = kzalloc(NVME_IDENTIFY_DATA_SIZE, GFP_KERNEL);
1101 if (!data)
1102 return -ENOMEM;
1103
cdbff4f2 1104 status = nvme_submit_sync_cmd(ctrl->admin_q, &c, data,
3b22ba26 1105 NVME_IDENTIFY_DATA_SIZE);
fb314eb0
CH
1106 if (status) {
1107 dev_warn(ctrl->device,
1108 "Identify Descriptors failed (%d)\n", status);
1109 /*
1110 * Don't treat an error as fatal, as we potentially already
1111 * have a NGUID or EUI-64.
1112 */
59c7c3ca 1113 if (status > 0 && !(status & NVME_SC_DNR))
fb314eb0 1114 status = 0;
3b22ba26 1115 goto free_data;
fb314eb0 1116 }
3b22ba26
JT
1117
1118 for (pos = 0; pos < NVME_IDENTIFY_DATA_SIZE; pos += len) {
1119 struct nvme_ns_id_desc *cur = data + pos;
1120
1121 if (cur->nidl == 0)
1122 break;
1123
ad95a613
CK
1124 len = nvme_process_ns_desc(ctrl, ids, cur);
1125 if (len < 0)
1126 goto free_data;
3b22ba26
JT
1127
1128 len += sizeof(*cur);
1129 }
1130free_data:
1131 kfree(data);
1132 return status;
1133}
1134
540c801c
KB
1135static int nvme_identify_ns_list(struct nvme_ctrl *dev, unsigned nsid, __le32 *ns_list)
1136{
1137 struct nvme_command c = { };
1138
1139 c.identify.opcode = nvme_admin_identify;
986994a2 1140 c.identify.cns = NVME_ID_CNS_NS_ACTIVE_LIST;
540c801c 1141 c.identify.nsid = cpu_to_le32(nsid);
42595eb7
MI
1142 return nvme_submit_sync_cmd(dev->admin_q, &c, ns_list,
1143 NVME_IDENTIFY_DATA_SIZE);
540c801c
KB
1144}
1145
331813f6
SG
1146static int nvme_identify_ns(struct nvme_ctrl *ctrl,
1147 unsigned nsid, struct nvme_id_ns **id)
21d34711
CH
1148{
1149 struct nvme_command c = { };
1150 int error;
1151
1152 /* gcc-4.4.4 (at least) has issues with initializers and anon unions */
778f067c
MG
1153 c.identify.opcode = nvme_admin_identify;
1154 c.identify.nsid = cpu_to_le32(nsid);
986994a2 1155 c.identify.cns = NVME_ID_CNS_NS;
21d34711 1156
331813f6
SG
1157 *id = kmalloc(sizeof(**id), GFP_KERNEL);
1158 if (!*id)
1159 return -ENOMEM;
21d34711 1160
331813f6 1161 error = nvme_submit_sync_cmd(ctrl->admin_q, &c, *id, sizeof(**id));
cdbff4f2 1162 if (error) {
d0de579c 1163 dev_warn(ctrl->device, "Identify namespace failed (%d)\n", error);
331813f6 1164 kfree(*id);
cdbff4f2
CH
1165 }
1166
331813f6 1167 return error;
21d34711
CH
1168}
1169
1a87ee65
KB
1170static int nvme_features(struct nvme_ctrl *dev, u8 op, unsigned int fid,
1171 unsigned int dword11, void *buffer, size_t buflen, u32 *result)
21d34711 1172{
15755854 1173 union nvme_result res = { 0 };
21d34711 1174 struct nvme_command c;
1cb3cce5 1175 int ret;
21d34711
CH
1176
1177 memset(&c, 0, sizeof(c));
1a87ee65 1178 c.features.opcode = op;
21d34711
CH
1179 c.features.fid = cpu_to_le32(fid);
1180 c.features.dword11 = cpu_to_le32(dword11);
1181
d49187e9 1182 ret = __nvme_submit_sync_cmd(dev->admin_q, &c, &res,
6287b51c 1183 buffer, buflen, 0, NVME_QID_ANY, 0, 0, false);
9b47f77a 1184 if (ret >= 0 && result)
d49187e9 1185 *result = le32_to_cpu(res.u32);
1cb3cce5 1186 return ret;
21d34711
CH
1187}
1188
1a87ee65
KB
1189int nvme_set_features(struct nvme_ctrl *dev, unsigned int fid,
1190 unsigned int dword11, void *buffer, size_t buflen,
1191 u32 *result)
1192{
1193 return nvme_features(dev, nvme_admin_set_features, fid, dword11, buffer,
1194 buflen, result);
1195}
1196EXPORT_SYMBOL_GPL(nvme_set_features);
1197
1198int nvme_get_features(struct nvme_ctrl *dev, unsigned int fid,
1199 unsigned int dword11, void *buffer, size_t buflen,
1200 u32 *result)
1201{
1202 return nvme_features(dev, nvme_admin_get_features, fid, dword11, buffer,
1203 buflen, result);
1204}
1205EXPORT_SYMBOL_GPL(nvme_get_features);
1206
9a0be7ab
CH
1207int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count)
1208{
1209 u32 q_count = (*count - 1) | ((*count - 1) << 16);
1210 u32 result;
1211 int status, nr_io_queues;
1212
1a6fe74d 1213 status = nvme_set_features(ctrl, NVME_FEAT_NUM_QUEUES, q_count, NULL, 0,
9a0be7ab 1214 &result);
f5fa90dc 1215 if (status < 0)
9a0be7ab
CH
1216 return status;
1217
f5fa90dc
CH
1218 /*
1219 * Degraded controllers might return an error when setting the queue
1220 * count. We still want to be able to bring them online and offer
1221 * access to the admin queue, as that might be only way to fix them up.
1222 */
1223 if (status > 0) {
f0425db0 1224 dev_err(ctrl->device, "Could not set queue count (%d)\n", status);
f5fa90dc
CH
1225 *count = 0;
1226 } else {
1227 nr_io_queues = min(result & 0xffff, result >> 16) + 1;
1228 *count = min(*count, nr_io_queues);
1229 }
1230
9a0be7ab
CH
1231 return 0;
1232}
576d55d6 1233EXPORT_SYMBOL_GPL(nvme_set_queue_count);
9a0be7ab 1234
c0561f82 1235#define NVME_AEN_SUPPORTED \
85f8a435
SG
1236 (NVME_AEN_CFG_NS_ATTR | NVME_AEN_CFG_FW_ACT | \
1237 NVME_AEN_CFG_ANA_CHANGE | NVME_AEN_CFG_DISC_CHANGE)
c0561f82
HR
1238
1239static void nvme_enable_aen(struct nvme_ctrl *ctrl)
1240{
fa441b71 1241 u32 result, supported_aens = ctrl->oaes & NVME_AEN_SUPPORTED;
c0561f82
HR
1242 int status;
1243
fa441b71
WZ
1244 if (!supported_aens)
1245 return;
1246
1247 status = nvme_set_features(ctrl, NVME_FEAT_ASYNC_EVENT, supported_aens,
1248 NULL, 0, &result);
c0561f82
HR
1249 if (status)
1250 dev_warn(ctrl->device, "Failed to configure AEN (cfg %x)\n",
fa441b71 1251 supported_aens);
93da4023
SG
1252
1253 queue_work(nvme_wq, &ctrl->async_event_work);
c0561f82
HR
1254}
1255
c95b708d
NB
1256/*
1257 * Convert integer values from ioctl structures to user pointers, silently
1258 * ignoring the upper bits in the compat case to match behaviour of 32-bit
1259 * kernels.
1260 */
1261static void __user *nvme_to_user_ptr(uintptr_t ptrval)
1262{
1263 if (in_compat_syscall())
1264 ptrval = (compat_uptr_t)ptrval;
1265 return (void __user *)ptrval;
1266}
1267
1673f1f0
CH
1268static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
1269{
1270 struct nvme_user_io io;
1271 struct nvme_command c;
1272 unsigned length, meta_len;
1273 void __user *metadata;
1274
1275 if (copy_from_user(&io, uio, sizeof(io)))
1276 return -EFAULT;
63088ec7
KB
1277 if (io.flags)
1278 return -EINVAL;
1673f1f0
CH
1279
1280 switch (io.opcode) {
1281 case nvme_cmd_write:
1282 case nvme_cmd_read:
1283 case nvme_cmd_compare:
1284 break;
1285 default:
1286 return -EINVAL;
1287 }
1288
1289 length = (io.nblocks + 1) << ns->lba_shift;
1290 meta_len = (io.nblocks + 1) * ns->ms;
c95b708d 1291 metadata = nvme_to_user_ptr(io.metadata);
1673f1f0
CH
1292
1293 if (ns->ext) {
1294 length += meta_len;
1295 meta_len = 0;
1296 } else if (meta_len) {
1297 if ((io.metadata & 3) || !io.metadata)
1298 return -EINVAL;
1299 }
1300
1301 memset(&c, 0, sizeof(c));
1302 c.rw.opcode = io.opcode;
1303 c.rw.flags = io.flags;
ed754e5d 1304 c.rw.nsid = cpu_to_le32(ns->head->ns_id);
1673f1f0
CH
1305 c.rw.slba = cpu_to_le64(io.slba);
1306 c.rw.length = cpu_to_le16(io.nblocks);
1307 c.rw.control = cpu_to_le16(io.control);
1308 c.rw.dsmgmt = cpu_to_le32(io.dsmgmt);
1309 c.rw.reftag = cpu_to_le32(io.reftag);
1310 c.rw.apptag = cpu_to_le16(io.apptag);
1311 c.rw.appmask = cpu_to_le16(io.appmask);
1312
63263d60 1313 return nvme_submit_user_cmd(ns->queue, &c,
c95b708d 1314 nvme_to_user_ptr(io.addr), length,
202359c0 1315 metadata, meta_len, lower_32_bits(io.slba), NULL, 0);
1673f1f0
CH
1316}
1317
84fef62d
KB
1318static u32 nvme_known_admin_effects(u8 opcode)
1319{
1320 switch (opcode) {
1321 case nvme_admin_format_nvm:
1322 return NVME_CMD_EFFECTS_CSUPP | NVME_CMD_EFFECTS_LBCC |
1323 NVME_CMD_EFFECTS_CSE_MASK;
1324 case nvme_admin_sanitize_nvm:
1325 return NVME_CMD_EFFECTS_CSE_MASK;
1326 default:
1327 break;
1328 }
1329 return 0;
1330}
1331
1332static u32 nvme_passthru_start(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
1333 u8 opcode)
1334{
1335 u32 effects = 0;
1336
1337 if (ns) {
1338 if (ctrl->effects)
1339 effects = le32_to_cpu(ctrl->effects->iocs[opcode]);
415df90b 1340 if (effects & ~(NVME_CMD_EFFECTS_CSUPP | NVME_CMD_EFFECTS_LBCC))
84fef62d
KB
1341 dev_warn(ctrl->device,
1342 "IO command:%02x has unhandled effects:%08x\n",
1343 opcode, effects);
1344 return 0;
1345 }
1346
1347 if (ctrl->effects)
62843c2e 1348 effects = le32_to_cpu(ctrl->effects->acs[opcode]);
6fa0321a 1349 effects |= nvme_known_admin_effects(opcode);
84fef62d
KB
1350
1351 /*
1352 * For simplicity, IO to all namespaces is quiesced even if the command
1353 * effects say only one namespace is affected.
1354 */
1355 if (effects & (NVME_CMD_EFFECTS_LBCC | NVME_CMD_EFFECTS_CSE_MASK)) {
e7ad43c3 1356 mutex_lock(&ctrl->scan_lock);
b9156dae
SG
1357 mutex_lock(&ctrl->subsys->lock);
1358 nvme_mpath_start_freeze(ctrl->subsys);
1359 nvme_mpath_wait_freeze(ctrl->subsys);
84fef62d
KB
1360 nvme_start_freeze(ctrl);
1361 nvme_wait_freeze(ctrl);
1362 }
1363 return effects;
1364}
1365
1366static void nvme_update_formats(struct nvme_ctrl *ctrl)
1367{
cf39a6bc 1368 struct nvme_ns *ns;
84fef62d 1369
cf39a6bc
SB
1370 down_read(&ctrl->namespaces_rwsem);
1371 list_for_each_entry(ns, &ctrl->namespaces, list)
1372 if (ns->disk && nvme_revalidate_disk(ns->disk))
1373 nvme_set_queue_dying(ns);
1374 up_read(&ctrl->namespaces_rwsem);
84fef62d
KB
1375}
1376
1377static void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects)
1378{
1379 /*
1380 * Revalidate LBA changes prior to unfreezing. This is necessary to
1381 * prevent memory corruption if a logical block size was changed by
1382 * this command.
1383 */
1384 if (effects & NVME_CMD_EFFECTS_LBCC)
1385 nvme_update_formats(ctrl);
e7ad43c3 1386 if (effects & (NVME_CMD_EFFECTS_LBCC | NVME_CMD_EFFECTS_CSE_MASK)) {
84fef62d 1387 nvme_unfreeze(ctrl);
b9156dae
SG
1388 nvme_mpath_unfreeze(ctrl->subsys);
1389 mutex_unlock(&ctrl->subsys->lock);
6abff1b9 1390 nvme_remove_invalid_namespaces(ctrl, NVME_NSID_ALL);
e7ad43c3
KB
1391 mutex_unlock(&ctrl->scan_lock);
1392 }
84fef62d
KB
1393 if (effects & NVME_CMD_EFFECTS_CCC)
1394 nvme_init_identify(ctrl);
1395 if (effects & (NVME_CMD_EFFECTS_NIC | NVME_CMD_EFFECTS_NCC))
1396 nvme_queue_scan(ctrl);
1397}
1398
f3ca80fc 1399static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
1673f1f0
CH
1400 struct nvme_passthru_cmd __user *ucmd)
1401{
1402 struct nvme_passthru_cmd cmd;
1403 struct nvme_command c;
1404 unsigned timeout = 0;
84fef62d 1405 u32 effects;
65e68edc
MR
1406 u64 result;
1407 int status;
1408
1409 if (!capable(CAP_SYS_ADMIN))
1410 return -EACCES;
1411 if (copy_from_user(&cmd, ucmd, sizeof(cmd)))
1412 return -EFAULT;
1413 if (cmd.flags)
1414 return -EINVAL;
1415
1416 memset(&c, 0, sizeof(c));
1417 c.common.opcode = cmd.opcode;
1418 c.common.flags = cmd.flags;
1419 c.common.nsid = cpu_to_le32(cmd.nsid);
1420 c.common.cdw2[0] = cpu_to_le32(cmd.cdw2);
1421 c.common.cdw2[1] = cpu_to_le32(cmd.cdw3);
1422 c.common.cdw10 = cpu_to_le32(cmd.cdw10);
1423 c.common.cdw11 = cpu_to_le32(cmd.cdw11);
1424 c.common.cdw12 = cpu_to_le32(cmd.cdw12);
1425 c.common.cdw13 = cpu_to_le32(cmd.cdw13);
1426 c.common.cdw14 = cpu_to_le32(cmd.cdw14);
1427 c.common.cdw15 = cpu_to_le32(cmd.cdw15);
1428
1429 if (cmd.timeout_ms)
1430 timeout = msecs_to_jiffies(cmd.timeout_ms);
1431
1432 effects = nvme_passthru_start(ctrl, ns, cmd.opcode);
1433 status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c,
c95b708d
NB
1434 nvme_to_user_ptr(cmd.addr), cmd.data_len,
1435 nvme_to_user_ptr(cmd.metadata), cmd.metadata_len,
1436 0, &result, timeout);
65e68edc
MR
1437 nvme_passthru_end(ctrl, effects);
1438
1439 if (status >= 0) {
1440 if (put_user(result, &ucmd->result))
1441 return -EFAULT;
1442 }
1443
1444 return status;
1445}
1446
1447static int nvme_user_cmd64(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
1448 struct nvme_passthru_cmd64 __user *ucmd)
1449{
1450 struct nvme_passthru_cmd64 cmd;
1451 struct nvme_command c;
1452 unsigned timeout = 0;
1453 u32 effects;
1673f1f0
CH
1454 int status;
1455
1456 if (!capable(CAP_SYS_ADMIN))
1457 return -EACCES;
1458 if (copy_from_user(&cmd, ucmd, sizeof(cmd)))
1459 return -EFAULT;
63088ec7
KB
1460 if (cmd.flags)
1461 return -EINVAL;
1673f1f0
CH
1462
1463 memset(&c, 0, sizeof(c));
1464 c.common.opcode = cmd.opcode;
1465 c.common.flags = cmd.flags;
1466 c.common.nsid = cpu_to_le32(cmd.nsid);
1467 c.common.cdw2[0] = cpu_to_le32(cmd.cdw2);
1468 c.common.cdw2[1] = cpu_to_le32(cmd.cdw3);
b7c8f366
CK
1469 c.common.cdw10 = cpu_to_le32(cmd.cdw10);
1470 c.common.cdw11 = cpu_to_le32(cmd.cdw11);
1471 c.common.cdw12 = cpu_to_le32(cmd.cdw12);
1472 c.common.cdw13 = cpu_to_le32(cmd.cdw13);
1473 c.common.cdw14 = cpu_to_le32(cmd.cdw14);
1474 c.common.cdw15 = cpu_to_le32(cmd.cdw15);
1673f1f0
CH
1475
1476 if (cmd.timeout_ms)
1477 timeout = msecs_to_jiffies(cmd.timeout_ms);
1478
84fef62d 1479 effects = nvme_passthru_start(ctrl, ns, cmd.opcode);
1673f1f0 1480 status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c,
c95b708d
NB
1481 nvme_to_user_ptr(cmd.addr), cmd.data_len,
1482 nvme_to_user_ptr(cmd.metadata), cmd.metadata_len,
63263d60 1483 0, &cmd.result, timeout);
84fef62d
KB
1484 nvme_passthru_end(ctrl, effects);
1485
1673f1f0
CH
1486 if (status >= 0) {
1487 if (put_user(cmd.result, &ucmd->result))
1488 return -EFAULT;
1489 }
1490
1491 return status;
1492}
1493
32acab31
CH
1494/*
1495 * Issue ioctl requests on the first available path. Note that unlike normal
1496 * block layer requests we will not retry failed request on another controller.
1497 */
1498static struct nvme_ns *nvme_get_ns_from_disk(struct gendisk *disk,
1499 struct nvme_ns_head **head, int *srcu_idx)
1673f1f0 1500{
32acab31
CH
1501#ifdef CONFIG_NVME_MULTIPATH
1502 if (disk->fops == &nvme_ns_head_ops) {
100c815c
CH
1503 struct nvme_ns *ns;
1504
32acab31
CH
1505 *head = disk->private_data;
1506 *srcu_idx = srcu_read_lock(&(*head)->srcu);
100c815c
CH
1507 ns = nvme_find_path(*head);
1508 if (!ns)
1509 srcu_read_unlock(&(*head)->srcu, *srcu_idx);
1510 return ns;
32acab31
CH
1511 }
1512#endif
1513 *head = NULL;
1514 *srcu_idx = -1;
1515 return disk->private_data;
1516}
1673f1f0 1517
32acab31
CH
1518static void nvme_put_ns_from_disk(struct nvme_ns_head *head, int idx)
1519{
1520 if (head)
1521 srcu_read_unlock(&head->srcu, idx);
1522}
1673f1f0 1523
65e68edc
MR
1524static bool is_ctrl_ioctl(unsigned int cmd)
1525{
1526 if (cmd == NVME_IOCTL_ADMIN_CMD || cmd == NVME_IOCTL_ADMIN64_CMD)
1527 return true;
1528 if (is_sed_ioctl(cmd))
1529 return true;
1530 return false;
1531}
1532
1533static int nvme_handle_ctrl_ioctl(struct nvme_ns *ns, unsigned int cmd,
1534 void __user *argp,
1535 struct nvme_ns_head *head,
1536 int srcu_idx)
1537{
1538 struct nvme_ctrl *ctrl = ns->ctrl;
1539 int ret;
1540
1541 nvme_get_ctrl(ns->ctrl);
1542 nvme_put_ns_from_disk(head, srcu_idx);
1543
1544 switch (cmd) {
1545 case NVME_IOCTL_ADMIN_CMD:
1546 ret = nvme_user_cmd(ctrl, NULL, argp);
1547 break;
1548 case NVME_IOCTL_ADMIN64_CMD:
1549 ret = nvme_user_cmd64(ctrl, NULL, argp);
1550 break;
1551 default:
1552 ret = sed_ioctl(ctrl->opal_dev, cmd, argp);
1553 break;
1554 }
1555 nvme_put_ctrl(ctrl);
1556 return ret;
1557}
1558
32acab31
CH
1559static int nvme_ioctl(struct block_device *bdev, fmode_t mode,
1560 unsigned int cmd, unsigned long arg)
1673f1f0 1561{
32acab31 1562 struct nvme_ns_head *head = NULL;
90ec611a 1563 void __user *argp = (void __user *)arg;
32acab31
CH
1564 struct nvme_ns *ns;
1565 int srcu_idx, ret;
1566
1567 ns = nvme_get_ns_from_disk(bdev->bd_disk, &head, &srcu_idx);
1568 if (unlikely(!ns))
100c815c
CH
1569 return -EWOULDBLOCK;
1570
5fb4aac7
CH
1571 /*
1572 * Handle ioctls that apply to the controller instead of the namespace
1573 * seperately and drop the ns SRCU reference early. This avoids a
1574 * deadlock when deleting namespaces using the passthrough interface.
1575 */
65e68edc
MR
1576 if (is_ctrl_ioctl(cmd))
1577 return nvme_handle_ctrl_ioctl(ns, cmd, argp, head, srcu_idx);
5fb4aac7 1578
90ec611a
CH
1579 switch (cmd) {
1580 case NVME_IOCTL_ID:
1581 force_successful_syscall_return();
1582 ret = ns->head->ns_id;
1583 break;
90ec611a
CH
1584 case NVME_IOCTL_IO_CMD:
1585 ret = nvme_user_cmd(ns->ctrl, ns, argp);
1586 break;
1587 case NVME_IOCTL_SUBMIT_IO:
1588 ret = nvme_submit_io(ns, argp);
1589 break;
65e68edc
MR
1590 case NVME_IOCTL_IO64_CMD:
1591 ret = nvme_user_cmd64(ns->ctrl, ns, argp);
1592 break;
90ec611a
CH
1593 default:
1594 if (ns->ndev)
1595 ret = nvme_nvm_ioctl(ns, cmd, arg);
90ec611a
CH
1596 else
1597 ret = -ENOTTY;
1598 }
1599
32acab31
CH
1600 nvme_put_ns_from_disk(head, srcu_idx);
1601 return ret;
1673f1f0 1602}
1673f1f0 1603
c225b610 1604#ifdef CONFIG_COMPAT
1605struct nvme_user_io32 {
1606 __u8 opcode;
1607 __u8 flags;
1608 __u16 control;
1609 __u16 nblocks;
1610 __u16 rsvd;
1611 __u64 metadata;
1612 __u64 addr;
1613 __u64 slba;
1614 __u32 dsmgmt;
1615 __u32 reftag;
1616 __u16 apptag;
1617 __u16 appmask;
1618} __attribute__((__packed__));
1619
1620#define NVME_IOCTL_SUBMIT_IO32 _IOW('N', 0x42, struct nvme_user_io32)
1621
1622static int nvme_compat_ioctl(struct block_device *bdev, fmode_t mode,
1623 unsigned int cmd, unsigned long arg)
1624{
1625 /*
1626 * Corresponds to the difference of NVME_IOCTL_SUBMIT_IO
1627 * between 32 bit programs and 64 bit kernel.
1628 * The cause is that the results of sizeof(struct nvme_user_io),
1629 * which is used to define NVME_IOCTL_SUBMIT_IO,
1630 * are not same between 32 bit compiler and 64 bit compiler.
1631 * NVME_IOCTL_SUBMIT_IO32 is for 64 bit kernel handling
1632 * NVME_IOCTL_SUBMIT_IO issued from 32 bit programs.
1633 * Other IOCTL numbers are same between 32 bit and 64 bit.
1634 * So there is nothing to do regarding to other IOCTL numbers.
1635 */
1636 if (cmd == NVME_IOCTL_SUBMIT_IO32)
1637 return nvme_ioctl(bdev, mode, NVME_IOCTL_SUBMIT_IO, arg);
1638
1639 return nvme_ioctl(bdev, mode, cmd, arg);
1640}
1641#else
1642#define nvme_compat_ioctl NULL
1643#endif /* CONFIG_COMPAT */
1644
1673f1f0
CH
1645static int nvme_open(struct block_device *bdev, fmode_t mode)
1646{
c6424a90
CH
1647 struct nvme_ns *ns = bdev->bd_disk->private_data;
1648
32acab31
CH
1649#ifdef CONFIG_NVME_MULTIPATH
1650 /* should never be called due to GENHD_FL_HIDDEN */
1651 if (WARN_ON_ONCE(ns->head->disk))
85088c4a 1652 goto fail;
32acab31 1653#endif
c6424a90 1654 if (!kref_get_unless_zero(&ns->kref))
85088c4a
NC
1655 goto fail;
1656 if (!try_module_get(ns->ctrl->ops->module))
1657 goto fail_put_ns;
1658
c6424a90 1659 return 0;
85088c4a
NC
1660
1661fail_put_ns:
1662 nvme_put_ns(ns);
1663fail:
1664 return -ENXIO;
1673f1f0
CH
1665}
1666
1667static void nvme_release(struct gendisk *disk, fmode_t mode)
1668{
85088c4a
NC
1669 struct nvme_ns *ns = disk->private_data;
1670
1671 module_put(ns->ctrl->ops->module);
1672 nvme_put_ns(ns);
1673f1f0
CH
1673}
1674
1675static int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo)
1676{
1677 /* some standard values */
1678 geo->heads = 1 << 6;
1679 geo->sectors = 1 << 5;
1680 geo->cylinders = get_capacity(bdev->bd_disk) >> 11;
1681 return 0;
1682}
1683
1684#ifdef CONFIG_BLK_DEV_INTEGRITY
39b7baa4 1685static void nvme_init_integrity(struct gendisk *disk, u16 ms, u8 pi_type)
1673f1f0
CH
1686{
1687 struct blk_integrity integrity;
1688
fa9a89fc 1689 memset(&integrity, 0, sizeof(integrity));
39b7baa4 1690 switch (pi_type) {
1673f1f0
CH
1691 case NVME_NS_DPS_PI_TYPE3:
1692 integrity.profile = &t10_pi_type3_crc;
ba36c21b
NB
1693 integrity.tag_size = sizeof(u16) + sizeof(u32);
1694 integrity.flags |= BLK_INTEGRITY_DEVICE_CAPABLE;
1673f1f0
CH
1695 break;
1696 case NVME_NS_DPS_PI_TYPE1:
1697 case NVME_NS_DPS_PI_TYPE2:
1698 integrity.profile = &t10_pi_type1_crc;
ba36c21b
NB
1699 integrity.tag_size = sizeof(u16);
1700 integrity.flags |= BLK_INTEGRITY_DEVICE_CAPABLE;
1673f1f0
CH
1701 break;
1702 default:
1703 integrity.profile = NULL;
1704 break;
1705 }
39b7baa4
CH
1706 integrity.tuple_size = ms;
1707 blk_integrity_register(disk, &integrity);
1708 blk_queue_max_integrity_segments(disk->queue, 1);
1673f1f0
CH
1709}
1710#else
39b7baa4 1711static void nvme_init_integrity(struct gendisk *disk, u16 ms, u8 pi_type)
1673f1f0
CH
1712{
1713}
1714#endif /* CONFIG_BLK_DEV_INTEGRITY */
1715
6b8190d6
SB
1716static void nvme_set_chunk_size(struct nvme_ns *ns)
1717{
e08f2ae8 1718 u32 chunk_size = nvme_lba_to_sect(ns, ns->noiob);
6b8190d6
SB
1719 blk_queue_chunk_sectors(ns->queue, rounddown_pow_of_two(chunk_size));
1720}
1721
26318571 1722static void nvme_config_discard(struct gendisk *disk, struct nvme_ns *ns)
1673f1f0 1723{
3831761e 1724 struct nvme_ctrl *ctrl = ns->ctrl;
26318571 1725 struct request_queue *queue = disk->queue;
30e5e929
CH
1726 u32 size = queue_logical_block_size(queue);
1727
3831761e
JA
1728 if (!(ctrl->oncs & NVME_CTRL_ONCS_DSM)) {
1729 blk_queue_flag_clear(QUEUE_FLAG_DISCARD, queue);
1730 return;
1731 }
1732
1733 if (ctrl->nr_streams && ns->sws && ns->sgs)
1734 size *= ns->sws * ns->sgs;
08095e70 1735
b35ba01e
CH
1736 BUILD_BUG_ON(PAGE_SIZE / sizeof(struct nvme_dsm_range) <
1737 NVME_DSM_MAX_RANGES);
1738
b224f613 1739 queue->limits.discard_alignment = 0;
30e5e929 1740 queue->limits.discard_granularity = size;
f5d11840 1741
3831761e
JA
1742 /* If discard is already enabled, don't reset queue limits */
1743 if (blk_queue_flag_test_and_set(QUEUE_FLAG_DISCARD, queue))
1744 return;
1745
30e5e929
CH
1746 blk_queue_max_discard_sectors(queue, UINT_MAX);
1747 blk_queue_max_discard_segments(queue, NVME_DSM_MAX_RANGES);
e850fd16
CH
1748
1749 if (ctrl->quirks & NVME_QUIRK_DEALLOCATE_ZEROES)
30e5e929 1750 blk_queue_max_write_zeroes_sectors(queue, UINT_MAX);
1673f1f0
CH
1751}
1752
9f0916ab 1753static void nvme_config_write_zeroes(struct gendisk *disk, struct nvme_ns *ns)
6e02318e 1754{
e08f2ae8 1755 u64 max_blocks;
6e02318e 1756
7b210e4e
CH
1757 if (!(ns->ctrl->oncs & NVME_CTRL_ONCS_WRITE_ZEROES) ||
1758 (ns->ctrl->quirks & NVME_QUIRK_DISABLE_WRITE_ZEROES))
6e02318e
CK
1759 return;
1760 /*
1761 * Even though NVMe spec explicitly states that MDTS is not
1762 * applicable to the write-zeroes:- "The restriction does not apply to
1763 * commands that do not transfer data between the host and the
1764 * controller (e.g., Write Uncorrectable ro Write Zeroes command).".
1765 * In order to be more cautious use controller's max_hw_sectors value
1766 * to configure the maximum sectors for the write-zeroes which is
1767 * configured based on the controller's MDTS field in the
1768 * nvme_init_identify() if available.
1769 */
1770 if (ns->ctrl->max_hw_sectors == UINT_MAX)
e08f2ae8 1771 max_blocks = (u64)USHRT_MAX + 1;
6e02318e 1772 else
e08f2ae8 1773 max_blocks = ns->ctrl->max_hw_sectors + 1;
6e02318e 1774
e08f2ae8
DLM
1775 blk_queue_max_write_zeroes_sectors(disk->queue,
1776 nvme_lba_to_sect(ns, max_blocks));
6e02318e
CK
1777}
1778
538af88e 1779static int nvme_report_ns_ids(struct nvme_ctrl *ctrl, unsigned int nsid,
002fab04 1780 struct nvme_id_ns *id, struct nvme_ns_ids *ids)
1673f1f0 1781{
002fab04
CH
1782 memset(ids, 0, sizeof(*ids));
1783
cdbff4f2 1784 if (ctrl->vs >= NVME_VS(1, 1, 0))
002fab04 1785 memcpy(ids->eui64, id->eui64, sizeof(id->eui64));
cdbff4f2 1786 if (ctrl->vs >= NVME_VS(1, 2, 0))
002fab04 1787 memcpy(ids->nguid, id->nguid, sizeof(id->nguid));
fb314eb0
CH
1788 if (ctrl->vs >= NVME_VS(1, 3, 0))
1789 return nvme_identify_ns_descs(ctrl, nsid, ids);
1790 return 0;
ac81bfa9
MB
1791}
1792
ed754e5d
CH
1793static bool nvme_ns_ids_valid(struct nvme_ns_ids *ids)
1794{
1795 return !uuid_is_null(&ids->uuid) ||
1796 memchr_inv(ids->nguid, 0, sizeof(ids->nguid)) ||
1797 memchr_inv(ids->eui64, 0, sizeof(ids->eui64));
1798}
1799
002fab04
CH
1800static bool nvme_ns_ids_equal(struct nvme_ns_ids *a, struct nvme_ns_ids *b)
1801{
1802 return uuid_equal(&a->uuid, &b->uuid) &&
1803 memcmp(&a->nguid, &b->nguid, sizeof(a->nguid)) == 0 &&
1804 memcmp(&a->eui64, &b->eui64, sizeof(a->eui64)) == 0;
1805}
1806
24b0b58c
CH
1807static void nvme_update_disk_info(struct gendisk *disk,
1808 struct nvme_ns *ns, struct nvme_id_ns *id)
1809{
e08f2ae8 1810 sector_t capacity = nvme_lba_to_sect(ns, le64_to_cpu(id->nsze));
cee160fd 1811 unsigned short bs = 1 << ns->lba_shift;
81adb863 1812 u32 atomic_bs, phys_bs, io_opt;
24b0b58c 1813
01fa0174
SG
1814 if (ns->lba_shift > PAGE_SHIFT) {
1815 /* unsupported block size, set capacity to 0 later */
1816 bs = (1 << 9);
1817 }
24b0b58c
CH
1818 blk_mq_freeze_queue(disk->queue);
1819 blk_integrity_unregister(disk);
1820
81adb863
BVA
1821 if (id->nabo == 0) {
1822 /*
1823 * Bit 1 indicates whether NAWUPF is defined for this namespace
1824 * and whether it should be used instead of AWUPF. If NAWUPF ==
1825 * 0 then AWUPF must be used instead.
1826 */
1827 if (id->nsfeat & (1 << 1) && id->nawupf)
1828 atomic_bs = (1 + le16_to_cpu(id->nawupf)) * bs;
1829 else
1830 atomic_bs = (1 + ns->ctrl->subsys->awupf) * bs;
1831 } else {
1832 atomic_bs = bs;
1833 }
1834 phys_bs = bs;
1835 io_opt = bs;
1836 if (id->nsfeat & (1 << 4)) {
1837 /* NPWG = Namespace Preferred Write Granularity */
1838 phys_bs *= 1 + le16_to_cpu(id->npwg);
1839 /* NOWS = Namespace Optimal Write Size */
1840 io_opt *= 1 + le16_to_cpu(id->nows);
1841 }
1842
cee160fd 1843 blk_queue_logical_block_size(disk->queue, bs);
81adb863
BVA
1844 /*
1845 * Linux filesystems assume writing a single physical block is
1846 * an atomic operation. Hence limit the physical block size to the
1847 * value of the Atomic Write Unit Power Fail parameter.
1848 */
1849 blk_queue_physical_block_size(disk->queue, min(phys_bs, atomic_bs));
1850 blk_queue_io_min(disk->queue, phys_bs);
1851 blk_queue_io_opt(disk->queue, io_opt);
cee160fd 1852
24b0b58c
CH
1853 if (ns->ms && !ns->ext &&
1854 (ns->ctrl->ops->flags & NVME_F_METADATA_SUPPORTED))
1855 nvme_init_integrity(disk, ns->ms, ns->pi_type);
01fa0174
SG
1856 if ((ns->ms && !nvme_ns_has_pi(ns) && !blk_get_integrity(disk)) ||
1857 ns->lba_shift > PAGE_SHIFT)
24b0b58c 1858 capacity = 0;
24b0b58c 1859
cb224c3a 1860 set_capacity_revalidate_and_notify(disk, capacity, false);
b1aafb35 1861
26318571 1862 nvme_config_discard(disk, ns);
9f0916ab 1863 nvme_config_write_zeroes(disk, ns);
1293477f
CK
1864
1865 if (id->nsattr & (1 << 0))
1866 set_disk_ro(disk, true);
1867 else
1868 set_disk_ro(disk, false);
1869
24b0b58c
CH
1870 blk_mq_unfreeze_queue(disk->queue);
1871}
1872
ac81bfa9
MB
1873static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id)
1874{
1875 struct nvme_ns *ns = disk->private_data;
1673f1f0
CH
1876
1877 /*
1878 * If identify namespace failed, use default 512 byte block size so
1879 * block layer can use before failing read/write for 0 capacity.
1880 */
c81bfba9 1881 ns->lba_shift = id->lbaf[id->flbas & NVME_NS_FLBAS_LBA_MASK].ds;
1673f1f0
CH
1882 if (ns->lba_shift == 0)
1883 ns->lba_shift = 9;
6b8190d6 1884 ns->noiob = le16_to_cpu(id->noiob);
b5be3b39 1885 ns->ms = le16_to_cpu(id->lbaf[id->flbas & NVME_NS_FLBAS_LBA_MASK].ms);
c97f414c 1886 ns->ext = ns->ms && (id->flbas & NVME_NS_FLBAS_META_EXT);
b5be3b39
CH
1887 /* the PI implementation requires metadata equal t10 pi tuple size */
1888 if (ns->ms == sizeof(struct t10_pi_tuple))
1889 ns->pi_type = id->dps & NVME_NS_DPS_PI_MASK;
1890 else
1891 ns->pi_type = 0;
1673f1f0 1892
6b8190d6
SB
1893 if (ns->noiob)
1894 nvme_set_chunk_size(ns);
24b0b58c 1895 nvme_update_disk_info(disk, ns, id);
32acab31 1896#ifdef CONFIG_NVME_MULTIPATH
8f676b85 1897 if (ns->head->disk) {
32acab31 1898 nvme_update_disk_info(ns->head->disk, ns, id);
8f676b85 1899 blk_queue_stack_limits(ns->head->disk->queue, ns->queue);
74e4d20e
SG
1900 if (bdi_cap_stable_pages_required(ns->queue->backing_dev_info)) {
1901 struct backing_dev_info *info =
1902 ns->head->disk->queue->backing_dev_info;
1903
1904 info->capabilities |= BDI_CAP_STABLE_WRITES;
1905 }
1906
fab7772b 1907 revalidate_disk(ns->head->disk);
8f676b85 1908 }
32acab31 1909#endif
ac81bfa9 1910}
1673f1f0 1911
ac81bfa9
MB
1912static int nvme_revalidate_disk(struct gendisk *disk)
1913{
1914 struct nvme_ns *ns = disk->private_data;
cdbff4f2
CH
1915 struct nvme_ctrl *ctrl = ns->ctrl;
1916 struct nvme_id_ns *id;
002fab04 1917 struct nvme_ns_ids ids;
cdbff4f2 1918 int ret = 0;
ac81bfa9
MB
1919
1920 if (test_bit(NVME_NS_DEAD, &ns->flags)) {
1921 set_capacity(disk, 0);
1922 return -ENODEV;
1923 }
1924
331813f6
SG
1925 ret = nvme_identify_ns(ctrl, ns->head->ns_id, &id);
1926 if (ret)
1927 goto out;
ac81bfa9 1928
cdbff4f2
CH
1929 if (id->ncap == 0) {
1930 ret = -ENODEV;
331813f6 1931 goto free_id;
cdbff4f2 1932 }
ac81bfa9 1933
5e0fab57 1934 __nvme_revalidate_disk(disk, id);
538af88e
SG
1935 ret = nvme_report_ns_ids(ctrl, ns->head->ns_id, id, &ids);
1936 if (ret)
1937 goto free_id;
1938
ed754e5d 1939 if (!nvme_ns_ids_equal(&ns->head->ids, &ids)) {
1d5df6af 1940 dev_err(ctrl->device,
ed754e5d 1941 "identifiers changed for nsid %d\n", ns->head->ns_id);
1d5df6af
CH
1942 ret = -ENODEV;
1943 }
1944
331813f6 1945free_id:
cdbff4f2 1946 kfree(id);
331813f6 1947out:
205da243
SG
1948 /*
1949 * Only fail the function if we got a fatal error back from the
1950 * device, otherwise ignore the error and just move on.
1951 */
1952 if (ret == -ENOMEM || (ret > 0 && !(ret & NVME_SC_DNR)))
1953 ret = 0;
1954 else if (ret > 0)
331813f6 1955 ret = blk_status_to_errno(nvme_error_status(ret));
cdbff4f2 1956 return ret;
1673f1f0
CH
1957}
1958
1959static char nvme_pr_type(enum pr_type type)
1960{
1961 switch (type) {
1962 case PR_WRITE_EXCLUSIVE:
1963 return 1;
1964 case PR_EXCLUSIVE_ACCESS:
1965 return 2;
1966 case PR_WRITE_EXCLUSIVE_REG_ONLY:
1967 return 3;
1968 case PR_EXCLUSIVE_ACCESS_REG_ONLY:
1969 return 4;
1970 case PR_WRITE_EXCLUSIVE_ALL_REGS:
1971 return 5;
1972 case PR_EXCLUSIVE_ACCESS_ALL_REGS:
1973 return 6;
1974 default:
1975 return 0;
1976 }
1977};
1978
1979static int nvme_pr_command(struct block_device *bdev, u32 cdw10,
1980 u64 key, u64 sa_key, u8 op)
1981{
32acab31
CH
1982 struct nvme_ns_head *head = NULL;
1983 struct nvme_ns *ns;
1673f1f0 1984 struct nvme_command c;
32acab31 1985 int srcu_idx, ret;
1673f1f0
CH
1986 u8 data[16] = { 0, };
1987
b0d61d58
KB
1988 ns = nvme_get_ns_from_disk(bdev->bd_disk, &head, &srcu_idx);
1989 if (unlikely(!ns))
1990 return -EWOULDBLOCK;
1991
1673f1f0
CH
1992 put_unaligned_le64(key, &data[0]);
1993 put_unaligned_le64(sa_key, &data[8]);
1994
1995 memset(&c, 0, sizeof(c));
1996 c.common.opcode = op;
b0d61d58 1997 c.common.nsid = cpu_to_le32(ns->head->ns_id);
b7c8f366 1998 c.common.cdw10 = cpu_to_le32(cdw10);
1673f1f0 1999
b0d61d58 2000 ret = nvme_submit_sync_cmd(ns->queue, &c, data, 16);
32acab31
CH
2001 nvme_put_ns_from_disk(head, srcu_idx);
2002 return ret;
1673f1f0
CH
2003}
2004
2005static int nvme_pr_register(struct block_device *bdev, u64 old,
2006 u64 new, unsigned flags)
2007{
2008 u32 cdw10;
2009
2010 if (flags & ~PR_FL_IGNORE_KEY)
2011 return -EOPNOTSUPP;
2012
2013 cdw10 = old ? 2 : 0;
2014 cdw10 |= (flags & PR_FL_IGNORE_KEY) ? 1 << 3 : 0;
2015 cdw10 |= (1 << 30) | (1 << 31); /* PTPL=1 */
2016 return nvme_pr_command(bdev, cdw10, old, new, nvme_cmd_resv_register);
2017}
2018
2019static int nvme_pr_reserve(struct block_device *bdev, u64 key,
2020 enum pr_type type, unsigned flags)
2021{
2022 u32 cdw10;
2023
2024 if (flags & ~PR_FL_IGNORE_KEY)
2025 return -EOPNOTSUPP;
2026
2027 cdw10 = nvme_pr_type(type) << 8;
2028 cdw10 |= ((flags & PR_FL_IGNORE_KEY) ? 1 << 3 : 0);
2029 return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_acquire);
2030}
2031
2032static int nvme_pr_preempt(struct block_device *bdev, u64 old, u64 new,
2033 enum pr_type type, bool abort)
2034{
e9a9853c 2035 u32 cdw10 = nvme_pr_type(type) << 8 | (abort ? 2 : 1);
1673f1f0
CH
2036 return nvme_pr_command(bdev, cdw10, old, new, nvme_cmd_resv_acquire);
2037}
2038
2039static int nvme_pr_clear(struct block_device *bdev, u64 key)
2040{
8c0b3915 2041 u32 cdw10 = 1 | (key ? 1 << 3 : 0);
1673f1f0
CH
2042 return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_register);
2043}
2044
2045static int nvme_pr_release(struct block_device *bdev, u64 key, enum pr_type type)
2046{
e9a9853c 2047 u32 cdw10 = nvme_pr_type(type) << 8 | (key ? 1 << 3 : 0);
1673f1f0
CH
2048 return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_release);
2049}
2050
2051static const struct pr_ops nvme_pr_ops = {
2052 .pr_register = nvme_pr_register,
2053 .pr_reserve = nvme_pr_reserve,
2054 .pr_release = nvme_pr_release,
2055 .pr_preempt = nvme_pr_preempt,
2056 .pr_clear = nvme_pr_clear,
2057};
2058
a98e58e5 2059#ifdef CONFIG_BLK_SED_OPAL
4f1244c8
CH
2060int nvme_sec_submit(void *data, u16 spsp, u8 secp, void *buffer, size_t len,
2061 bool send)
a98e58e5 2062{
4f1244c8 2063 struct nvme_ctrl *ctrl = data;
a98e58e5 2064 struct nvme_command cmd;
a98e58e5
SB
2065
2066 memset(&cmd, 0, sizeof(cmd));
2067 if (send)
2068 cmd.common.opcode = nvme_admin_security_send;
2069 else
2070 cmd.common.opcode = nvme_admin_security_recv;
a98e58e5 2071 cmd.common.nsid = 0;
b7c8f366
CK
2072 cmd.common.cdw10 = cpu_to_le32(((u32)secp) << 24 | ((u32)spsp) << 8);
2073 cmd.common.cdw11 = cpu_to_le32(len);
a98e58e5
SB
2074
2075 return __nvme_submit_sync_cmd(ctrl->admin_q, &cmd, NULL, buffer, len,
6287b51c 2076 ADMIN_TIMEOUT, NVME_QID_ANY, 1, 0, false);
a98e58e5
SB
2077}
2078EXPORT_SYMBOL_GPL(nvme_sec_submit);
2079#endif /* CONFIG_BLK_SED_OPAL */
2080
5bae7f73 2081static const struct block_device_operations nvme_fops = {
1673f1f0
CH
2082 .owner = THIS_MODULE,
2083 .ioctl = nvme_ioctl,
c225b610 2084 .compat_ioctl = nvme_compat_ioctl,
1673f1f0
CH
2085 .open = nvme_open,
2086 .release = nvme_release,
2087 .getgeo = nvme_getgeo,
2088 .revalidate_disk= nvme_revalidate_disk,
2089 .pr_ops = &nvme_pr_ops,
2090};
2091
32acab31
CH
2092#ifdef CONFIG_NVME_MULTIPATH
2093static int nvme_ns_head_open(struct block_device *bdev, fmode_t mode)
2094{
2095 struct nvme_ns_head *head = bdev->bd_disk->private_data;
2096
2097 if (!kref_get_unless_zero(&head->ref))
2098 return -ENXIO;
2099 return 0;
2100}
2101
2102static void nvme_ns_head_release(struct gendisk *disk, fmode_t mode)
2103{
2104 nvme_put_ns_head(disk->private_data);
2105}
2106
2107const struct block_device_operations nvme_ns_head_ops = {
2108 .owner = THIS_MODULE,
2109 .open = nvme_ns_head_open,
2110 .release = nvme_ns_head_release,
2111 .ioctl = nvme_ioctl,
c225b610 2112 .compat_ioctl = nvme_compat_ioctl,
32acab31
CH
2113 .getgeo = nvme_getgeo,
2114 .pr_ops = &nvme_pr_ops,
2115};
2116#endif /* CONFIG_NVME_MULTIPATH */
2117
5fd4ce1b
CH
2118static int nvme_wait_ready(struct nvme_ctrl *ctrl, u64 cap, bool enabled)
2119{
2120 unsigned long timeout =
2121 ((NVME_CAP_TIMEOUT(cap) + 1) * HZ / 2) + jiffies;
2122 u32 csts, bit = enabled ? NVME_CSTS_RDY : 0;
2123 int ret;
2124
2125 while ((ret = ctrl->ops->reg_read32(ctrl, NVME_REG_CSTS, &csts)) == 0) {
0df1e4f5
KB
2126 if (csts == ~0)
2127 return -ENODEV;
5fd4ce1b
CH
2128 if ((csts & NVME_CSTS_RDY) == bit)
2129 break;
2130
3e98c244 2131 usleep_range(1000, 2000);
5fd4ce1b
CH
2132 if (fatal_signal_pending(current))
2133 return -EINTR;
2134 if (time_after(jiffies, timeout)) {
1b3c47c1 2135 dev_err(ctrl->device,
94d2e705
RG
2136 "Device not ready; aborting %s, CSTS=0x%x\n",
2137 enabled ? "initialisation" : "reset", csts);
5fd4ce1b
CH
2138 return -ENODEV;
2139 }
2140 }
2141
2142 return ret;
2143}
2144
2145/*
2146 * If the device has been passed off to us in an enabled state, just clear
2147 * the enabled bit. The spec says we should set the 'shutdown notification
2148 * bits', but doing so may cause the device to complete commands to the
2149 * admin queue ... and we don't know what memory that might be pointing at!
2150 */
b5b05048 2151int nvme_disable_ctrl(struct nvme_ctrl *ctrl)
5fd4ce1b
CH
2152{
2153 int ret;
2154
2155 ctrl->ctrl_config &= ~NVME_CC_SHN_MASK;
2156 ctrl->ctrl_config &= ~NVME_CC_ENABLE;
2157
2158 ret = ctrl->ops->reg_write32(ctrl, NVME_REG_CC, ctrl->ctrl_config);
2159 if (ret)
2160 return ret;
54adc010 2161
b5a10c5f 2162 if (ctrl->quirks & NVME_QUIRK_DELAY_BEFORE_CHK_RDY)
54adc010
GP
2163 msleep(NVME_QUIRK_DELAY_AMOUNT);
2164
b5b05048 2165 return nvme_wait_ready(ctrl, ctrl->cap, false);
5fd4ce1b 2166}
576d55d6 2167EXPORT_SYMBOL_GPL(nvme_disable_ctrl);
5fd4ce1b 2168
c0f2f45b 2169int nvme_enable_ctrl(struct nvme_ctrl *ctrl)
5fd4ce1b
CH
2170{
2171 /*
2172 * Default to a 4K page size, with the intention to update this
2173 * path in the future to accomodate architectures with differing
2174 * kernel and IO page sizes.
2175 */
c0f2f45b 2176 unsigned dev_page_min, page_shift = 12;
5fd4ce1b
CH
2177 int ret;
2178
c0f2f45b
SG
2179 ret = ctrl->ops->reg_read64(ctrl, NVME_REG_CAP, &ctrl->cap);
2180 if (ret) {
2181 dev_err(ctrl->device, "Reading CAP failed (%d)\n", ret);
2182 return ret;
2183 }
2184 dev_page_min = NVME_CAP_MPSMIN(ctrl->cap) + 12;
2185
5fd4ce1b 2186 if (page_shift < dev_page_min) {
1b3c47c1 2187 dev_err(ctrl->device,
5fd4ce1b
CH
2188 "Minimum device page size %u too large for host (%u)\n",
2189 1 << dev_page_min, 1 << page_shift);
2190 return -ENODEV;
2191 }
2192
2193 ctrl->page_size = 1 << page_shift;
2194
2195 ctrl->ctrl_config = NVME_CC_CSS_NVM;
2196 ctrl->ctrl_config |= (page_shift - 12) << NVME_CC_MPS_SHIFT;
60b43f62 2197 ctrl->ctrl_config |= NVME_CC_AMS_RR | NVME_CC_SHN_NONE;
5fd4ce1b
CH
2198 ctrl->ctrl_config |= NVME_CC_IOSQES | NVME_CC_IOCQES;
2199 ctrl->ctrl_config |= NVME_CC_ENABLE;
2200
2201 ret = ctrl->ops->reg_write32(ctrl, NVME_REG_CC, ctrl->ctrl_config);
2202 if (ret)
2203 return ret;
c0f2f45b 2204 return nvme_wait_ready(ctrl, ctrl->cap, true);
5fd4ce1b 2205}
576d55d6 2206EXPORT_SYMBOL_GPL(nvme_enable_ctrl);
5fd4ce1b
CH
2207
2208int nvme_shutdown_ctrl(struct nvme_ctrl *ctrl)
2209{
07fbd32a 2210 unsigned long timeout = jiffies + (ctrl->shutdown_timeout * HZ);
5fd4ce1b
CH
2211 u32 csts;
2212 int ret;
2213
2214 ctrl->ctrl_config &= ~NVME_CC_SHN_MASK;
2215 ctrl->ctrl_config |= NVME_CC_SHN_NORMAL;
2216
2217 ret = ctrl->ops->reg_write32(ctrl, NVME_REG_CC, ctrl->ctrl_config);
2218 if (ret)
2219 return ret;
2220
2221 while ((ret = ctrl->ops->reg_read32(ctrl, NVME_REG_CSTS, &csts)) == 0) {
2222 if ((csts & NVME_CSTS_SHST_MASK) == NVME_CSTS_SHST_CMPLT)
2223 break;
2224
2225 msleep(100);
2226 if (fatal_signal_pending(current))
2227 return -EINTR;
2228 if (time_after(jiffies, timeout)) {
1b3c47c1 2229 dev_err(ctrl->device,
5fd4ce1b
CH
2230 "Device shutdown incomplete; abort shutdown\n");
2231 return -ENODEV;
2232 }
2233 }
2234
2235 return ret;
2236}
576d55d6 2237EXPORT_SYMBOL_GPL(nvme_shutdown_ctrl);
5fd4ce1b 2238
da35825d
CH
2239static void nvme_set_queue_limits(struct nvme_ctrl *ctrl,
2240 struct request_queue *q)
2241{
7c88cb00
JA
2242 bool vwc = false;
2243
da35825d 2244 if (ctrl->max_hw_sectors) {
45686b61
CH
2245 u32 max_segments =
2246 (ctrl->max_hw_sectors / (ctrl->page_size >> 9)) + 1;
2247
943e942e 2248 max_segments = min_not_zero(max_segments, ctrl->max_segments);
da35825d 2249 blk_queue_max_hw_sectors(q, ctrl->max_hw_sectors);
45686b61 2250 blk_queue_max_segments(q, min_t(u32, max_segments, USHRT_MAX));
da35825d 2251 }
249159c5
KB
2252 if ((ctrl->quirks & NVME_QUIRK_STRIPE_SIZE) &&
2253 is_power_of_2(ctrl->max_hw_sectors))
e6282aef 2254 blk_queue_chunk_sectors(q, ctrl->max_hw_sectors);
da35825d 2255 blk_queue_virt_boundary(q, ctrl->page_size - 1);
7c88cb00
JA
2256 if (ctrl->vwc & NVME_CTRL_VWC_PRESENT)
2257 vwc = true;
2258 blk_queue_write_cache(q, vwc, vwc);
da35825d
CH
2259}
2260
dbf86b39
JD
2261static int nvme_configure_timestamp(struct nvme_ctrl *ctrl)
2262{
2263 __le64 ts;
2264 int ret;
2265
2266 if (!(ctrl->oncs & NVME_CTRL_ONCS_TIMESTAMP))
2267 return 0;
2268
2269 ts = cpu_to_le64(ktime_to_ms(ktime_get_real()));
2270 ret = nvme_set_features(ctrl, NVME_FEAT_TIMESTAMP, 0, &ts, sizeof(ts),
2271 NULL);
2272 if (ret)
2273 dev_warn_once(ctrl->device,
2274 "could not set timestamp (%d)\n", ret);
2275 return ret;
2276}
2277
49cd84b6
KB
2278static int nvme_configure_acre(struct nvme_ctrl *ctrl)
2279{
2280 struct nvme_feat_host_behavior *host;
2281 int ret;
2282
2283 /* Don't bother enabling the feature if retry delay is not reported */
2284 if (!ctrl->crdt[0])
2285 return 0;
2286
2287 host = kzalloc(sizeof(*host), GFP_KERNEL);
2288 if (!host)
2289 return 0;
2290
2291 host->acre = NVME_ENABLE_ACRE;
2292 ret = nvme_set_features(ctrl, NVME_FEAT_HOST_BEHAVIOR, 0,
2293 host, sizeof(*host), NULL);
2294 kfree(host);
2295 return ret;
2296}
2297
634b8325 2298static int nvme_configure_apst(struct nvme_ctrl *ctrl)
c5552fde
AL
2299{
2300 /*
2301 * APST (Autonomous Power State Transition) lets us program a
2302 * table of power state transitions that the controller will
2303 * perform automatically. We configure it with a simple
2304 * heuristic: we are willing to spend at most 2% of the time
2305 * transitioning between power states. Therefore, when running
2306 * in any given state, we will enter the next lower-power
76e4ad09 2307 * non-operational state after waiting 50 * (enlat + exlat)
da87591b 2308 * microseconds, as long as that state's exit latency is under
c5552fde
AL
2309 * the requested maximum latency.
2310 *
2311 * We will not autonomously enter any non-operational state for
2312 * which the total latency exceeds ps_max_latency_us. Users
2313 * can set ps_max_latency_us to zero to turn off APST.
2314 */
2315
2316 unsigned apste;
2317 struct nvme_feat_auto_pst *table;
fb0dc399
AL
2318 u64 max_lat_us = 0;
2319 int max_ps = -1;
c5552fde
AL
2320 int ret;
2321
2322 /*
2323 * If APST isn't supported or if we haven't been initialized yet,
2324 * then don't do anything.
2325 */
2326 if (!ctrl->apsta)
634b8325 2327 return 0;
c5552fde
AL
2328
2329 if (ctrl->npss > 31) {
2330 dev_warn(ctrl->device, "NPSS is invalid; not using APST\n");
634b8325 2331 return 0;
c5552fde
AL
2332 }
2333
2334 table = kzalloc(sizeof(*table), GFP_KERNEL);
2335 if (!table)
634b8325 2336 return 0;
c5552fde 2337
76a5af84 2338 if (!ctrl->apst_enabled || ctrl->ps_max_latency_us == 0) {
c5552fde
AL
2339 /* Turn off APST. */
2340 apste = 0;
fb0dc399 2341 dev_dbg(ctrl->device, "APST disabled\n");
c5552fde
AL
2342 } else {
2343 __le64 target = cpu_to_le64(0);
2344 int state;
2345
2346 /*
2347 * Walk through all states from lowest- to highest-power.
2348 * According to the spec, lower-numbered states use more
2349 * power. NPSS, despite the name, is the index of the
2350 * lowest-power state, not the number of states.
2351 */
2352 for (state = (int)ctrl->npss; state >= 0; state--) {
da87591b 2353 u64 total_latency_us, exit_latency_us, transition_ms;
c5552fde
AL
2354
2355 if (target)
2356 table->entries[state] = target;
2357
ff5350a8
AL
2358 /*
2359 * Don't allow transitions to the deepest state
2360 * if it's quirked off.
2361 */
2362 if (state == ctrl->npss &&
2363 (ctrl->quirks & NVME_QUIRK_NO_DEEPEST_PS))
2364 continue;
2365
c5552fde
AL
2366 /*
2367 * Is this state a useful non-operational state for
2368 * higher-power states to autonomously transition to?
2369 */
2370 if (!(ctrl->psd[state].flags &
2371 NVME_PS_FLAGS_NON_OP_STATE))
2372 continue;
2373
da87591b
KHF
2374 exit_latency_us =
2375 (u64)le32_to_cpu(ctrl->psd[state].exit_lat);
2376 if (exit_latency_us > ctrl->ps_max_latency_us)
c5552fde
AL
2377 continue;
2378
da87591b
KHF
2379 total_latency_us =
2380 exit_latency_us +
2381 le32_to_cpu(ctrl->psd[state].entry_lat);
2382
c5552fde
AL
2383 /*
2384 * This state is good. Use it as the APST idle
2385 * target for higher power states.
2386 */
2387 transition_ms = total_latency_us + 19;
2388 do_div(transition_ms, 20);
2389 if (transition_ms > (1 << 24) - 1)
2390 transition_ms = (1 << 24) - 1;
2391
2392 target = cpu_to_le64((state << 3) |
2393 (transition_ms << 8));
fb0dc399
AL
2394
2395 if (max_ps == -1)
2396 max_ps = state;
2397
2398 if (total_latency_us > max_lat_us)
2399 max_lat_us = total_latency_us;
c5552fde
AL
2400 }
2401
2402 apste = 1;
fb0dc399
AL
2403
2404 if (max_ps == -1) {
2405 dev_dbg(ctrl->device, "APST enabled but no non-operational states are available\n");
2406 } else {
2407 dev_dbg(ctrl->device, "APST enabled: max PS = %d, max round-trip latency = %lluus, table = %*phN\n",
2408 max_ps, max_lat_us, (int)sizeof(*table), table);
2409 }
c5552fde
AL
2410 }
2411
2412 ret = nvme_set_features(ctrl, NVME_FEAT_AUTO_PST, apste,
2413 table, sizeof(*table), NULL);
2414 if (ret)
2415 dev_err(ctrl->device, "failed to set APST feature (%d)\n", ret);
2416
2417 kfree(table);
634b8325 2418 return ret;
c5552fde
AL
2419}
2420
2421static void nvme_set_latency_tolerance(struct device *dev, s32 val)
2422{
2423 struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
2424 u64 latency;
2425
2426 switch (val) {
2427 case PM_QOS_LATENCY_TOLERANCE_NO_CONSTRAINT:
2428 case PM_QOS_LATENCY_ANY:
2429 latency = U64_MAX;
2430 break;
2431
2432 default:
2433 latency = val;
2434 }
2435
2436 if (ctrl->ps_max_latency_us != latency) {
2437 ctrl->ps_max_latency_us = latency;
2438 nvme_configure_apst(ctrl);
2439 }
2440}
2441
bd4da3ab
AL
2442struct nvme_core_quirk_entry {
2443 /*
2444 * NVMe model and firmware strings are padded with spaces. For
2445 * simplicity, strings in the quirk table are padded with NULLs
2446 * instead.
2447 */
2448 u16 vid;
2449 const char *mn;
2450 const char *fr;
2451 unsigned long quirks;
2452};
2453
2454static const struct nvme_core_quirk_entry core_quirks[] = {
c5552fde 2455 {
be56945c
AL
2456 /*
2457 * This Toshiba device seems to die using any APST states. See:
2458 * https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1678184/comments/11
2459 */
2460 .vid = 0x1179,
2461 .mn = "THNSF5256GPUK TOSHIBA",
c5552fde 2462 .quirks = NVME_QUIRK_NO_APST,
cb32de1b
ML
2463 },
2464 {
2465 /*
2466 * This LiteON CL1-3D*-Q11 firmware version has a race
2467 * condition associated with actions related to suspend to idle
2468 * LiteON has resolved the problem in future firmware
2469 */
2470 .vid = 0x14a4,
2471 .fr = "22301111",
2472 .quirks = NVME_QUIRK_SIMPLE_SUSPEND,
be56945c 2473 }
bd4da3ab
AL
2474};
2475
2476/* match is null-terminated but idstr is space-padded. */
2477static bool string_matches(const char *idstr, const char *match, size_t len)
2478{
2479 size_t matchlen;
2480
2481 if (!match)
2482 return true;
2483
2484 matchlen = strlen(match);
2485 WARN_ON_ONCE(matchlen > len);
2486
2487 if (memcmp(idstr, match, matchlen))
2488 return false;
2489
2490 for (; matchlen < len; matchlen++)
2491 if (idstr[matchlen] != ' ')
2492 return false;
2493
2494 return true;
2495}
2496
2497static bool quirk_matches(const struct nvme_id_ctrl *id,
2498 const struct nvme_core_quirk_entry *q)
2499{
2500 return q->vid == le16_to_cpu(id->vid) &&
2501 string_matches(id->mn, q->mn, sizeof(id->mn)) &&
2502 string_matches(id->fr, q->fr, sizeof(id->fr));
2503}
2504
ab9e00cc
CH
2505static void nvme_init_subnqn(struct nvme_subsystem *subsys, struct nvme_ctrl *ctrl,
2506 struct nvme_id_ctrl *id)
180de007
CH
2507{
2508 size_t nqnlen;
2509 int off;
2510
6299358d
JD
2511 if(!(ctrl->quirks & NVME_QUIRK_IGNORE_DEV_SUBNQN)) {
2512 nqnlen = strnlen(id->subnqn, NVMF_NQN_SIZE);
2513 if (nqnlen > 0 && nqnlen < NVMF_NQN_SIZE) {
2514 strlcpy(subsys->subnqn, id->subnqn, NVMF_NQN_SIZE);
2515 return;
2516 }
180de007 2517
6299358d
JD
2518 if (ctrl->vs >= NVME_VS(1, 2, 1))
2519 dev_warn(ctrl->device, "missing or invalid SUBNQN field.\n");
2520 }
180de007
CH
2521
2522 /* Generate a "fake" NQN per Figure 254 in NVMe 1.3 + ECN 001 */
ab9e00cc 2523 off = snprintf(subsys->subnqn, NVMF_NQN_SIZE,
3da584f5 2524 "nqn.2014.08.org.nvmexpress:%04x%04x",
180de007 2525 le16_to_cpu(id->vid), le16_to_cpu(id->ssvid));
ab9e00cc 2526 memcpy(subsys->subnqn + off, id->sn, sizeof(id->sn));
180de007 2527 off += sizeof(id->sn);
ab9e00cc 2528 memcpy(subsys->subnqn + off, id->mn, sizeof(id->mn));
180de007 2529 off += sizeof(id->mn);
ab9e00cc
CH
2530 memset(subsys->subnqn + off, 0, sizeof(subsys->subnqn) - off);
2531}
2532
e654dfd3 2533static void nvme_release_subsystem(struct device *dev)
ab9e00cc 2534{
e654dfd3
LG
2535 struct nvme_subsystem *subsys =
2536 container_of(dev, struct nvme_subsystem, dev);
2537
733e4b69
KB
2538 if (subsys->instance >= 0)
2539 ida_simple_remove(&nvme_instance_ida, subsys->instance);
ab9e00cc
CH
2540 kfree(subsys);
2541}
2542
ab9e00cc
CH
2543static void nvme_destroy_subsystem(struct kref *ref)
2544{
2545 struct nvme_subsystem *subsys =
2546 container_of(ref, struct nvme_subsystem, ref);
2547
2548 mutex_lock(&nvme_subsystems_lock);
2549 list_del(&subsys->entry);
2550 mutex_unlock(&nvme_subsystems_lock);
2551
ed754e5d 2552 ida_destroy(&subsys->ns_ida);
ab9e00cc
CH
2553 device_del(&subsys->dev);
2554 put_device(&subsys->dev);
2555}
2556
2557static void nvme_put_subsystem(struct nvme_subsystem *subsys)
2558{
2559 kref_put(&subsys->ref, nvme_destroy_subsystem);
2560}
2561
2562static struct nvme_subsystem *__nvme_find_get_subsystem(const char *subsysnqn)
2563{
2564 struct nvme_subsystem *subsys;
2565
2566 lockdep_assert_held(&nvme_subsystems_lock);
2567
c26aa572
JS
2568 /*
2569 * Fail matches for discovery subsystems. This results
2570 * in each discovery controller bound to a unique subsystem.
2571 * This avoids issues with validating controller values
2572 * that can only be true when there is a single unique subsystem.
2573 * There may be multiple and completely independent entities
2574 * that provide discovery controllers.
2575 */
2576 if (!strcmp(subsysnqn, NVME_DISC_SUBSYS_NAME))
2577 return NULL;
2578
ab9e00cc
CH
2579 list_for_each_entry(subsys, &nvme_subsystems, entry) {
2580 if (strcmp(subsys->subnqn, subsysnqn))
2581 continue;
2582 if (!kref_get_unless_zero(&subsys->ref))
2583 continue;
2584 return subsys;
2585 }
2586
2587 return NULL;
2588}
2589
1e496938
HR
2590#define SUBSYS_ATTR_RO(_name, _mode, _show) \
2591 struct device_attribute subsys_attr_##_name = \
2592 __ATTR(_name, _mode, _show, NULL)
2593
2594static ssize_t nvme_subsys_show_nqn(struct device *dev,
2595 struct device_attribute *attr,
2596 char *buf)
2597{
2598 struct nvme_subsystem *subsys =
2599 container_of(dev, struct nvme_subsystem, dev);
2600
2601 return snprintf(buf, PAGE_SIZE, "%s\n", subsys->subnqn);
2602}
2603static SUBSYS_ATTR_RO(subsysnqn, S_IRUGO, nvme_subsys_show_nqn);
2604
2605#define nvme_subsys_show_str_function(field) \
2606static ssize_t subsys_##field##_show(struct device *dev, \
2607 struct device_attribute *attr, char *buf) \
2608{ \
2609 struct nvme_subsystem *subsys = \
2610 container_of(dev, struct nvme_subsystem, dev); \
2611 return sprintf(buf, "%.*s\n", \
2612 (int)sizeof(subsys->field), subsys->field); \
2613} \
2614static SUBSYS_ATTR_RO(field, S_IRUGO, subsys_##field##_show);
2615
2616nvme_subsys_show_str_function(model);
2617nvme_subsys_show_str_function(serial);
2618nvme_subsys_show_str_function(firmware_rev);
2619
2620static struct attribute *nvme_subsys_attrs[] = {
2621 &subsys_attr_model.attr,
2622 &subsys_attr_serial.attr,
2623 &subsys_attr_firmware_rev.attr,
2624 &subsys_attr_subsysnqn.attr,
75c10e73
HR
2625#ifdef CONFIG_NVME_MULTIPATH
2626 &subsys_attr_iopolicy.attr,
2627#endif
1e496938
HR
2628 NULL,
2629};
2630
2631static struct attribute_group nvme_subsys_attrs_group = {
2632 .attrs = nvme_subsys_attrs,
2633};
2634
2635static const struct attribute_group *nvme_subsys_attrs_groups[] = {
2636 &nvme_subsys_attrs_group,
2637 NULL,
2638};
2639
1b1031ca
CH
2640static bool nvme_validate_cntlid(struct nvme_subsystem *subsys,
2641 struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
b837b283 2642{
1b1031ca 2643 struct nvme_ctrl *tmp;
b837b283 2644
32fd90c4
CH
2645 lockdep_assert_held(&nvme_subsystems_lock);
2646
1b1031ca 2647 list_for_each_entry(tmp, &subsys->ctrls, subsys_entry) {
e7c43fea 2648 if (nvme_state_terminal(tmp))
1b1031ca
CH
2649 continue;
2650
2651 if (tmp->cntlid == ctrl->cntlid) {
2652 dev_err(ctrl->device,
2653 "Duplicate cntlid %u with %s, rejecting\n",
2654 ctrl->cntlid, dev_name(tmp->device));
2655 return false;
2656 }
b837b283 2657
1b1031ca
CH
2658 if ((id->cmic & (1 << 1)) ||
2659 (ctrl->opts && ctrl->opts->discovery_nqn))
2660 continue;
2661
2662 dev_err(ctrl->device,
2663 "Subsystem does not support multiple controllers\n");
2664 return false;
b837b283 2665 }
b837b283 2666
1b1031ca 2667 return true;
b837b283
IR
2668}
2669
ab9e00cc
CH
2670static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
2671{
2672 struct nvme_subsystem *subsys, *found;
2673 int ret;
2674
2675 subsys = kzalloc(sizeof(*subsys), GFP_KERNEL);
2676 if (!subsys)
2677 return -ENOMEM;
733e4b69
KB
2678
2679 subsys->instance = -1;
ab9e00cc
CH
2680 mutex_init(&subsys->lock);
2681 kref_init(&subsys->ref);
2682 INIT_LIST_HEAD(&subsys->ctrls);
ed754e5d 2683 INIT_LIST_HEAD(&subsys->nsheads);
ab9e00cc
CH
2684 nvme_init_subnqn(subsys, ctrl, id);
2685 memcpy(subsys->serial, id->sn, sizeof(subsys->serial));
2686 memcpy(subsys->model, id->mn, sizeof(subsys->model));
2687 memcpy(subsys->firmware_rev, id->fr, sizeof(subsys->firmware_rev));
2688 subsys->vendor_id = le16_to_cpu(id->vid);
2689 subsys->cmic = id->cmic;
81adb863 2690 subsys->awupf = le16_to_cpu(id->awupf);
75c10e73
HR
2691#ifdef CONFIG_NVME_MULTIPATH
2692 subsys->iopolicy = NVME_IOPOLICY_NUMA;
2693#endif
ab9e00cc
CH
2694
2695 subsys->dev.class = nvme_subsys_class;
2696 subsys->dev.release = nvme_release_subsystem;
1e496938 2697 subsys->dev.groups = nvme_subsys_attrs_groups;
733e4b69 2698 dev_set_name(&subsys->dev, "nvme-subsys%d", ctrl->instance);
ab9e00cc
CH
2699 device_initialize(&subsys->dev);
2700
2701 mutex_lock(&nvme_subsystems_lock);
2702 found = __nvme_find_get_subsystem(subsys->subnqn);
2703 if (found) {
e654dfd3 2704 put_device(&subsys->dev);
ab9e00cc 2705 subsys = found;
32fd90c4 2706
1b1031ca 2707 if (!nvme_validate_cntlid(subsys, ctrl, id)) {
ab9e00cc 2708 ret = -EINVAL;
32fd90c4 2709 goto out_put_subsystem;
ab9e00cc 2710 }
ab9e00cc
CH
2711 } else {
2712 ret = device_add(&subsys->dev);
2713 if (ret) {
2714 dev_err(ctrl->device,
2715 "failed to register subsystem device.\n");
8c36e66f 2716 put_device(&subsys->dev);
ab9e00cc
CH
2717 goto out_unlock;
2718 }
ed754e5d 2719 ida_init(&subsys->ns_ida);
ab9e00cc
CH
2720 list_add_tail(&subsys->entry, &nvme_subsystems);
2721 }
2722
bc4f6e06
DC
2723 ret = sysfs_create_link(&subsys->dev.kobj, &ctrl->device->kobj,
2724 dev_name(ctrl->device));
2725 if (ret) {
ab9e00cc
CH
2726 dev_err(ctrl->device,
2727 "failed to create sysfs link from subsystem.\n");
32fd90c4 2728 goto out_put_subsystem;
ab9e00cc
CH
2729 }
2730
733e4b69
KB
2731 if (!found)
2732 subsys->instance = ctrl->instance;
32fd90c4 2733 ctrl->subsys = subsys;
ab9e00cc 2734 list_add_tail(&ctrl->subsys_entry, &subsys->ctrls);
32fd90c4 2735 mutex_unlock(&nvme_subsystems_lock);
ab9e00cc
CH
2736 return 0;
2737
32fd90c4
CH
2738out_put_subsystem:
2739 nvme_put_subsystem(subsys);
ab9e00cc
CH
2740out_unlock:
2741 mutex_unlock(&nvme_subsystems_lock);
ab9e00cc 2742 return ret;
180de007
CH
2743}
2744
0e98719b
CH
2745int nvme_get_log(struct nvme_ctrl *ctrl, u32 nsid, u8 log_page, u8 lsp,
2746 void *log, size_t size, u64 offset)
c627c487
KB
2747{
2748 struct nvme_command c = { };
70da6094
MB
2749 unsigned long dwlen = size / 4 - 1;
2750
2751 c.get_log_page.opcode = nvme_admin_get_log_page;
0e98719b 2752 c.get_log_page.nsid = cpu_to_le32(nsid);
70da6094 2753 c.get_log_page.lid = log_page;
0e98719b 2754 c.get_log_page.lsp = lsp;
70da6094
MB
2755 c.get_log_page.numdl = cpu_to_le16(dwlen & ((1 << 16) - 1));
2756 c.get_log_page.numdu = cpu_to_le16(dwlen >> 16);
7ec6074f
MB
2757 c.get_log_page.lpol = cpu_to_le32(lower_32_bits(offset));
2758 c.get_log_page.lpou = cpu_to_le32(upper_32_bits(offset));
c627c487
KB
2759
2760 return nvme_submit_sync_cmd(ctrl->admin_q, &c, log, size);
2761}
2762
84fef62d
KB
2763static int nvme_get_effects_log(struct nvme_ctrl *ctrl)
2764{
2765 int ret;
2766
2767 if (!ctrl->effects)
2768 ctrl->effects = kzalloc(sizeof(*ctrl->effects), GFP_KERNEL);
2769
2770 if (!ctrl->effects)
2771 return 0;
2772
0e98719b
CH
2773 ret = nvme_get_log(ctrl, NVME_NSID_ALL, NVME_LOG_CMD_EFFECTS, 0,
2774 ctrl->effects, sizeof(*ctrl->effects), 0);
84fef62d
KB
2775 if (ret) {
2776 kfree(ctrl->effects);
2777 ctrl->effects = NULL;
2778 }
2779 return ret;
180de007
CH
2780}
2781
7fd8930f
CH
2782/*
2783 * Initialize the cached copies of the Identify data and various controller
2784 * register in our nvme_ctrl structure. This should be called as soon as
2785 * the admin queue is fully up and running.
2786 */
2787int nvme_init_identify(struct nvme_ctrl *ctrl)
2788{
2789 struct nvme_id_ctrl *id;
7fd8930f 2790 int ret, page_shift;
a229dbf6 2791 u32 max_hw_sectors;
76a5af84 2792 bool prev_apst_enabled;
7fd8930f 2793
f3ca80fc
CH
2794 ret = ctrl->ops->reg_read32(ctrl, NVME_REG_VS, &ctrl->vs);
2795 if (ret) {
1b3c47c1 2796 dev_err(ctrl->device, "Reading VS failed (%d)\n", ret);
f3ca80fc
CH
2797 return ret;
2798 }
4fba4458 2799 page_shift = NVME_CAP_MPSMIN(ctrl->cap) + 12;
c0f2f45b 2800 ctrl->sqsize = min_t(int, NVME_CAP_MQES(ctrl->cap), ctrl->sqsize);
7fd8930f 2801
8ef2074d 2802 if (ctrl->vs >= NVME_VS(1, 1, 0))
4fba4458 2803 ctrl->subsystem = NVME_CAP_NSSRC(ctrl->cap);
f3ca80fc 2804
7fd8930f
CH
2805 ret = nvme_identify_ctrl(ctrl, &id);
2806 if (ret) {
1b3c47c1 2807 dev_err(ctrl->device, "Identify Controller failed (%d)\n", ret);
7fd8930f
CH
2808 return -EIO;
2809 }
2810
84fef62d
KB
2811 if (id->lpa & NVME_CTRL_LPA_CMD_EFFECTS_LOG) {
2812 ret = nvme_get_effects_log(ctrl);
2813 if (ret < 0)
75c8b19a 2814 goto out_free;
84fef62d 2815 }
180de007 2816
a89fcca8
GP
2817 if (!(ctrl->ops->flags & NVME_F_FABRICS))
2818 ctrl->cntlid = le16_to_cpu(id->cntlid);
2819
bd4da3ab 2820 if (!ctrl->identified) {
ab9e00cc
CH
2821 int i;
2822
2823 ret = nvme_init_subsystem(ctrl, id);
2824 if (ret)
2825 goto out_free;
2826
bd4da3ab
AL
2827 /*
2828 * Check for quirks. Quirk can depend on firmware version,
2829 * so, in principle, the set of quirks present can change
2830 * across a reset. As a possible future enhancement, we
2831 * could re-scan for quirks every time we reinitialize
2832 * the device, but we'd have to make sure that the driver
2833 * behaves intelligently if the quirks change.
2834 */
bd4da3ab
AL
2835 for (i = 0; i < ARRAY_SIZE(core_quirks); i++) {
2836 if (quirk_matches(id, &core_quirks[i]))
2837 ctrl->quirks |= core_quirks[i].quirks;
2838 }
2839 }
2840
c35e30b4 2841 if (force_apst && (ctrl->quirks & NVME_QUIRK_NO_DEEPEST_PS)) {
f0425db0 2842 dev_warn(ctrl->device, "forcibly allowing all power states due to nvme_core.force_apst -- use at your own risk\n");
c35e30b4
AL
2843 ctrl->quirks &= ~NVME_QUIRK_NO_DEEPEST_PS;
2844 }
2845
49cd84b6
KB
2846 ctrl->crdt[0] = le16_to_cpu(id->crdt1);
2847 ctrl->crdt[1] = le16_to_cpu(id->crdt2);
2848 ctrl->crdt[2] = le16_to_cpu(id->crdt3);
2849
8a9ae523 2850 ctrl->oacs = le16_to_cpu(id->oacs);
43e2d08d 2851 ctrl->oncs = le16_to_cpu(id->oncs);
2d466c7a 2852 ctrl->mtfa = le16_to_cpu(id->mtfa);
c0561f82 2853 ctrl->oaes = le32_to_cpu(id->oaes);
400b6a7b
GR
2854 ctrl->wctemp = le16_to_cpu(id->wctemp);
2855 ctrl->cctemp = le16_to_cpu(id->cctemp);
2856
6bf25d16 2857 atomic_set(&ctrl->abort_limit, id->acl + 1);
7fd8930f 2858 ctrl->vwc = id->vwc;
7fd8930f 2859 if (id->mdts)
a229dbf6 2860 max_hw_sectors = 1 << (id->mdts + page_shift - 9);
7fd8930f 2861 else
a229dbf6
CH
2862 max_hw_sectors = UINT_MAX;
2863 ctrl->max_hw_sectors =
2864 min_not_zero(ctrl->max_hw_sectors, max_hw_sectors);
7fd8930f 2865
da35825d 2866 nvme_set_queue_limits(ctrl, ctrl->admin_q);
07bfcd09 2867 ctrl->sgls = le32_to_cpu(id->sgls);
038bd4cb 2868 ctrl->kas = le16_to_cpu(id->kas);
0d0b660f 2869 ctrl->max_namespaces = le32_to_cpu(id->mnan);
3e53ba38 2870 ctrl->ctratt = le32_to_cpu(id->ctratt);
07bfcd09 2871
07fbd32a
MP
2872 if (id->rtd3e) {
2873 /* us -> s */
2874 u32 transition_time = le32_to_cpu(id->rtd3e) / 1000000;
2875
2876 ctrl->shutdown_timeout = clamp_t(unsigned int, transition_time,
2877 shutdown_timeout, 60);
2878
2879 if (ctrl->shutdown_timeout != shutdown_timeout)
1a3838d7 2880 dev_info(ctrl->device,
07fbd32a
MP
2881 "Shutdown timeout set to %u seconds\n",
2882 ctrl->shutdown_timeout);
2883 } else
2884 ctrl->shutdown_timeout = shutdown_timeout;
2885
c5552fde 2886 ctrl->npss = id->npss;
76a5af84
KHF
2887 ctrl->apsta = id->apsta;
2888 prev_apst_enabled = ctrl->apst_enabled;
c35e30b4
AL
2889 if (ctrl->quirks & NVME_QUIRK_NO_APST) {
2890 if (force_apst && id->apsta) {
f0425db0 2891 dev_warn(ctrl->device, "forcibly allowing APST due to nvme_core.force_apst -- use at your own risk\n");
76a5af84 2892 ctrl->apst_enabled = true;
c35e30b4 2893 } else {
76a5af84 2894 ctrl->apst_enabled = false;
c35e30b4
AL
2895 }
2896 } else {
76a5af84 2897 ctrl->apst_enabled = id->apsta;
c35e30b4 2898 }
c5552fde
AL
2899 memcpy(ctrl->psd, id->psd, sizeof(ctrl->psd));
2900
d3d5b87d 2901 if (ctrl->ops->flags & NVME_F_FABRICS) {
07bfcd09
CH
2902 ctrl->icdoff = le16_to_cpu(id->icdoff);
2903 ctrl->ioccsz = le32_to_cpu(id->ioccsz);
2904 ctrl->iorcsz = le32_to_cpu(id->iorcsz);
2905 ctrl->maxcmd = le16_to_cpu(id->maxcmd);
2906
2907 /*
2908 * In fabrics we need to verify the cntlid matches the
2909 * admin connect
2910 */
634b8325 2911 if (ctrl->cntlid != le16_to_cpu(id->cntlid)) {
a8157ff3
JS
2912 dev_err(ctrl->device,
2913 "Mismatching cntlid: Connect %u vs Identify "
2914 "%u, rejecting\n",
2915 ctrl->cntlid, le16_to_cpu(id->cntlid));
07bfcd09 2916 ret = -EINVAL;
634b8325
KB
2917 goto out_free;
2918 }
038bd4cb
SG
2919
2920 if (!ctrl->opts->discovery_nqn && !ctrl->kas) {
f0425db0 2921 dev_err(ctrl->device,
038bd4cb
SG
2922 "keep-alive support is mandatory for fabrics\n");
2923 ret = -EINVAL;
634b8325 2924 goto out_free;
038bd4cb 2925 }
07bfcd09 2926 } else {
fe6d53c9
CH
2927 ctrl->hmpre = le32_to_cpu(id->hmpre);
2928 ctrl->hmmin = le32_to_cpu(id->hmmin);
044a9df1
CH
2929 ctrl->hmminds = le32_to_cpu(id->hmminds);
2930 ctrl->hmmaxd = le16_to_cpu(id->hmmaxd);
07bfcd09 2931 }
da35825d 2932
0d0b660f 2933 ret = nvme_mpath_init(ctrl, id);
7fd8930f 2934 kfree(id);
bd4da3ab 2935
0d0b660f
CH
2936 if (ret < 0)
2937 return ret;
2938
76a5af84 2939 if (ctrl->apst_enabled && !prev_apst_enabled)
c5552fde 2940 dev_pm_qos_expose_latency_tolerance(ctrl->device);
76a5af84 2941 else if (!ctrl->apst_enabled && prev_apst_enabled)
c5552fde
AL
2942 dev_pm_qos_hide_latency_tolerance(ctrl->device);
2943
634b8325
KB
2944 ret = nvme_configure_apst(ctrl);
2945 if (ret < 0)
2946 return ret;
dbf86b39
JD
2947
2948 ret = nvme_configure_timestamp(ctrl);
2949 if (ret < 0)
2950 return ret;
634b8325
KB
2951
2952 ret = nvme_configure_directives(ctrl);
2953 if (ret < 0)
2954 return ret;
c5552fde 2955
49cd84b6
KB
2956 ret = nvme_configure_acre(ctrl);
2957 if (ret < 0)
2958 return ret;
2959
400b6a7b
GR
2960 if (!ctrl->identified)
2961 nvme_hwmon_init(ctrl);
2962
bd4da3ab 2963 ctrl->identified = true;
c5552fde 2964
634b8325
KB
2965 return 0;
2966
2967out_free:
2968 kfree(id);
07bfcd09 2969 return ret;
7fd8930f 2970}
576d55d6 2971EXPORT_SYMBOL_GPL(nvme_init_identify);
7fd8930f 2972
f3ca80fc 2973static int nvme_dev_open(struct inode *inode, struct file *file)
1673f1f0 2974{
a6a5149b
CH
2975 struct nvme_ctrl *ctrl =
2976 container_of(inode->i_cdev, struct nvme_ctrl, cdev);
1673f1f0 2977
2b1b7e78
JW
2978 switch (ctrl->state) {
2979 case NVME_CTRL_LIVE:
2b1b7e78
JW
2980 break;
2981 default:
a6a5149b 2982 return -EWOULDBLOCK;
2b1b7e78
JW
2983 }
2984
a6a5149b 2985 file->private_data = ctrl;
f3ca80fc
CH
2986 return 0;
2987}
2988
bfd89471
CH
2989static int nvme_dev_user_cmd(struct nvme_ctrl *ctrl, void __user *argp)
2990{
2991 struct nvme_ns *ns;
2992 int ret;
2993
765cc031 2994 down_read(&ctrl->namespaces_rwsem);
bfd89471
CH
2995 if (list_empty(&ctrl->namespaces)) {
2996 ret = -ENOTTY;
2997 goto out_unlock;
2998 }
2999
3000 ns = list_first_entry(&ctrl->namespaces, struct nvme_ns, list);
3001 if (ns != list_last_entry(&ctrl->namespaces, struct nvme_ns, list)) {
1b3c47c1 3002 dev_warn(ctrl->device,
bfd89471
CH
3003 "NVME_IOCTL_IO_CMD not supported when multiple namespaces present!\n");
3004 ret = -EINVAL;
3005 goto out_unlock;
3006 }
3007
1b3c47c1 3008 dev_warn(ctrl->device,
bfd89471
CH
3009 "using deprecated NVME_IOCTL_IO_CMD ioctl on the char device!\n");
3010 kref_get(&ns->kref);
765cc031 3011 up_read(&ctrl->namespaces_rwsem);
bfd89471
CH
3012
3013 ret = nvme_user_cmd(ctrl, ns, argp);
3014 nvme_put_ns(ns);
3015 return ret;
3016
3017out_unlock:
765cc031 3018 up_read(&ctrl->namespaces_rwsem);
bfd89471
CH
3019 return ret;
3020}
3021
f3ca80fc
CH
3022static long nvme_dev_ioctl(struct file *file, unsigned int cmd,
3023 unsigned long arg)
3024{
3025 struct nvme_ctrl *ctrl = file->private_data;
3026 void __user *argp = (void __user *)arg;
f3ca80fc
CH
3027
3028 switch (cmd) {
3029 case NVME_IOCTL_ADMIN_CMD:
3030 return nvme_user_cmd(ctrl, NULL, argp);
65e68edc
MR
3031 case NVME_IOCTL_ADMIN64_CMD:
3032 return nvme_user_cmd64(ctrl, NULL, argp);
f3ca80fc 3033 case NVME_IOCTL_IO_CMD:
bfd89471 3034 return nvme_dev_user_cmd(ctrl, argp);
f3ca80fc 3035 case NVME_IOCTL_RESET:
1b3c47c1 3036 dev_warn(ctrl->device, "resetting controller\n");
d86c4d8e 3037 return nvme_reset_ctrl_sync(ctrl);
f3ca80fc
CH
3038 case NVME_IOCTL_SUBSYS_RESET:
3039 return nvme_reset_subsystem(ctrl);
9ec3bb2f
KB
3040 case NVME_IOCTL_RESCAN:
3041 nvme_queue_scan(ctrl);
3042 return 0;
f3ca80fc
CH
3043 default:
3044 return -ENOTTY;
3045 }
3046}
3047
3048static const struct file_operations nvme_dev_fops = {
3049 .owner = THIS_MODULE,
3050 .open = nvme_dev_open,
f3ca80fc 3051 .unlocked_ioctl = nvme_dev_ioctl,
1832f2d8 3052 .compat_ioctl = compat_ptr_ioctl,
f3ca80fc
CH
3053};
3054
3055static ssize_t nvme_sysfs_reset(struct device *dev,
3056 struct device_attribute *attr, const char *buf,
3057 size_t count)
3058{
3059 struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
3060 int ret;
3061
d86c4d8e 3062 ret = nvme_reset_ctrl_sync(ctrl);
f3ca80fc
CH
3063 if (ret < 0)
3064 return ret;
3065 return count;
1673f1f0 3066}
f3ca80fc 3067static DEVICE_ATTR(reset_controller, S_IWUSR, NULL, nvme_sysfs_reset);
1673f1f0 3068
9ec3bb2f
KB
3069static ssize_t nvme_sysfs_rescan(struct device *dev,
3070 struct device_attribute *attr, const char *buf,
3071 size_t count)
3072{
3073 struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
3074
3075 nvme_queue_scan(ctrl);
3076 return count;
3077}
3078static DEVICE_ATTR(rescan_controller, S_IWUSR, NULL, nvme_sysfs_rescan);
3079
5b85b826
CH
3080static inline struct nvme_ns_head *dev_to_ns_head(struct device *dev)
3081{
3082 struct gendisk *disk = dev_to_disk(dev);
3083
3084 if (disk->fops == &nvme_fops)
3085 return nvme_get_ns_from_dev(dev)->head;
3086 else
3087 return disk->private_data;
3088}
3089
118472ab 3090static ssize_t wwid_show(struct device *dev, struct device_attribute *attr,
5b85b826 3091 char *buf)
118472ab 3092{
5b85b826
CH
3093 struct nvme_ns_head *head = dev_to_ns_head(dev);
3094 struct nvme_ns_ids *ids = &head->ids;
3095 struct nvme_subsystem *subsys = head->subsys;
ab9e00cc
CH
3096 int serial_len = sizeof(subsys->serial);
3097 int model_len = sizeof(subsys->model);
118472ab 3098
002fab04
CH
3099 if (!uuid_is_null(&ids->uuid))
3100 return sprintf(buf, "uuid.%pU\n", &ids->uuid);
6484f5d1 3101
002fab04
CH
3102 if (memchr_inv(ids->nguid, 0, sizeof(ids->nguid)))
3103 return sprintf(buf, "eui.%16phN\n", ids->nguid);
118472ab 3104
002fab04
CH
3105 if (memchr_inv(ids->eui64, 0, sizeof(ids->eui64)))
3106 return sprintf(buf, "eui.%8phN\n", ids->eui64);
118472ab 3107
ab9e00cc
CH
3108 while (serial_len > 0 && (subsys->serial[serial_len - 1] == ' ' ||
3109 subsys->serial[serial_len - 1] == '\0'))
118472ab 3110 serial_len--;
ab9e00cc
CH
3111 while (model_len > 0 && (subsys->model[model_len - 1] == ' ' ||
3112 subsys->model[model_len - 1] == '\0'))
118472ab
KB
3113 model_len--;
3114
ab9e00cc
CH
3115 return sprintf(buf, "nvme.%04x-%*phN-%*phN-%08x\n", subsys->vendor_id,
3116 serial_len, subsys->serial, model_len, subsys->model,
5b85b826 3117 head->ns_id);
118472ab 3118}
c828a892 3119static DEVICE_ATTR_RO(wwid);
118472ab 3120
d934f984 3121static ssize_t nguid_show(struct device *dev, struct device_attribute *attr,
5b85b826 3122 char *buf)
d934f984 3123{
5b85b826 3124 return sprintf(buf, "%pU\n", dev_to_ns_head(dev)->ids.nguid);
d934f984 3125}
c828a892 3126static DEVICE_ATTR_RO(nguid);
d934f984 3127
2b9b6e86 3128static ssize_t uuid_show(struct device *dev, struct device_attribute *attr,
5b85b826 3129 char *buf)
2b9b6e86 3130{
5b85b826 3131 struct nvme_ns_ids *ids = &dev_to_ns_head(dev)->ids;
d934f984
JT
3132
3133 /* For backward compatibility expose the NGUID to userspace if
3134 * we have no UUID set
3135 */
002fab04 3136 if (uuid_is_null(&ids->uuid)) {
d934f984
JT
3137 printk_ratelimited(KERN_WARNING
3138 "No UUID available providing old NGUID\n");
002fab04 3139 return sprintf(buf, "%pU\n", ids->nguid);
d934f984 3140 }
002fab04 3141 return sprintf(buf, "%pU\n", &ids->uuid);
2b9b6e86 3142}
c828a892 3143static DEVICE_ATTR_RO(uuid);
2b9b6e86
KB
3144
3145static ssize_t eui_show(struct device *dev, struct device_attribute *attr,
5b85b826 3146 char *buf)
2b9b6e86 3147{
5b85b826 3148 return sprintf(buf, "%8ph\n", dev_to_ns_head(dev)->ids.eui64);
2b9b6e86 3149}
c828a892 3150static DEVICE_ATTR_RO(eui);
2b9b6e86
KB
3151
3152static ssize_t nsid_show(struct device *dev, struct device_attribute *attr,
5b85b826 3153 char *buf)
2b9b6e86 3154{
5b85b826 3155 return sprintf(buf, "%d\n", dev_to_ns_head(dev)->ns_id);
2b9b6e86 3156}
c828a892 3157static DEVICE_ATTR_RO(nsid);
2b9b6e86 3158
5b85b826 3159static struct attribute *nvme_ns_id_attrs[] = {
118472ab 3160 &dev_attr_wwid.attr,
2b9b6e86 3161 &dev_attr_uuid.attr,
d934f984 3162 &dev_attr_nguid.attr,
2b9b6e86
KB
3163 &dev_attr_eui.attr,
3164 &dev_attr_nsid.attr,
0d0b660f
CH
3165#ifdef CONFIG_NVME_MULTIPATH
3166 &dev_attr_ana_grpid.attr,
3167 &dev_attr_ana_state.attr,
3168#endif
2b9b6e86
KB
3169 NULL,
3170};
3171
5b85b826 3172static umode_t nvme_ns_id_attrs_are_visible(struct kobject *kobj,
2b9b6e86
KB
3173 struct attribute *a, int n)
3174{
3175 struct device *dev = container_of(kobj, struct device, kobj);
5b85b826 3176 struct nvme_ns_ids *ids = &dev_to_ns_head(dev)->ids;
2b9b6e86
KB
3177
3178 if (a == &dev_attr_uuid.attr) {
a04b5de5 3179 if (uuid_is_null(&ids->uuid) &&
002fab04 3180 !memchr_inv(ids->nguid, 0, sizeof(ids->nguid)))
d934f984
JT
3181 return 0;
3182 }
3183 if (a == &dev_attr_nguid.attr) {
002fab04 3184 if (!memchr_inv(ids->nguid, 0, sizeof(ids->nguid)))
2b9b6e86
KB
3185 return 0;
3186 }
3187 if (a == &dev_attr_eui.attr) {
002fab04 3188 if (!memchr_inv(ids->eui64, 0, sizeof(ids->eui64)))
2b9b6e86
KB
3189 return 0;
3190 }
0d0b660f
CH
3191#ifdef CONFIG_NVME_MULTIPATH
3192 if (a == &dev_attr_ana_grpid.attr || a == &dev_attr_ana_state.attr) {
3193 if (dev_to_disk(dev)->fops != &nvme_fops) /* per-path attr */
3194 return 0;
3195 if (!nvme_ctrl_use_ana(nvme_get_ns_from_dev(dev)->ctrl))
3196 return 0;
3197 }
3198#endif
2b9b6e86
KB
3199 return a->mode;
3200}
3201
eb090c4c 3202static const struct attribute_group nvme_ns_id_attr_group = {
5b85b826
CH
3203 .attrs = nvme_ns_id_attrs,
3204 .is_visible = nvme_ns_id_attrs_are_visible,
2b9b6e86
KB
3205};
3206
33b14f67
HR
3207const struct attribute_group *nvme_ns_id_attr_groups[] = {
3208 &nvme_ns_id_attr_group,
3209#ifdef CONFIG_NVM
3210 &nvme_nvm_attr_group,
3211#endif
3212 NULL,
3213};
3214
931e1c22 3215#define nvme_show_str_function(field) \
779ff756
KB
3216static ssize_t field##_show(struct device *dev, \
3217 struct device_attribute *attr, char *buf) \
3218{ \
3219 struct nvme_ctrl *ctrl = dev_get_drvdata(dev); \
ab9e00cc
CH
3220 return sprintf(buf, "%.*s\n", \
3221 (int)sizeof(ctrl->subsys->field), ctrl->subsys->field); \
779ff756
KB
3222} \
3223static DEVICE_ATTR(field, S_IRUGO, field##_show, NULL);
3224
ab9e00cc
CH
3225nvme_show_str_function(model);
3226nvme_show_str_function(serial);
3227nvme_show_str_function(firmware_rev);
3228
931e1c22
ML
3229#define nvme_show_int_function(field) \
3230static ssize_t field##_show(struct device *dev, \
3231 struct device_attribute *attr, char *buf) \
3232{ \
3233 struct nvme_ctrl *ctrl = dev_get_drvdata(dev); \
3234 return sprintf(buf, "%d\n", ctrl->field); \
3235} \
3236static DEVICE_ATTR(field, S_IRUGO, field##_show, NULL);
3237
931e1c22 3238nvme_show_int_function(cntlid);
103e515e 3239nvme_show_int_function(numa_node);
2b1ff255
JS
3240nvme_show_int_function(queue_count);
3241nvme_show_int_function(sqsize);
779ff756 3242
1a353d85
ML
3243static ssize_t nvme_sysfs_delete(struct device *dev,
3244 struct device_attribute *attr, const char *buf,
3245 size_t count)
3246{
3247 struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
3248
ce151813
IR
3249 /* Can't delete non-created controllers */
3250 if (!ctrl->created)
3251 return -EBUSY;
3252
1a353d85 3253 if (device_remove_file_self(dev, attr))
c5017e85 3254 nvme_delete_ctrl_sync(ctrl);
1a353d85
ML
3255 return count;
3256}
3257static DEVICE_ATTR(delete_controller, S_IWUSR, NULL, nvme_sysfs_delete);
3258
3259static ssize_t nvme_sysfs_show_transport(struct device *dev,
3260 struct device_attribute *attr,
3261 char *buf)
3262{
3263 struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
3264
3265 return snprintf(buf, PAGE_SIZE, "%s\n", ctrl->ops->name);
3266}
3267static DEVICE_ATTR(transport, S_IRUGO, nvme_sysfs_show_transport, NULL);
3268
8432bdb2
SG
3269static ssize_t nvme_sysfs_show_state(struct device *dev,
3270 struct device_attribute *attr,
3271 char *buf)
3272{
3273 struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
3274 static const char *const state_name[] = {
3275 [NVME_CTRL_NEW] = "new",
3276 [NVME_CTRL_LIVE] = "live",
3277 [NVME_CTRL_RESETTING] = "resetting",
ad6a0a52 3278 [NVME_CTRL_CONNECTING] = "connecting",
8432bdb2
SG
3279 [NVME_CTRL_DELETING] = "deleting",
3280 [NVME_CTRL_DEAD] = "dead",
3281 };
3282
3283 if ((unsigned)ctrl->state < ARRAY_SIZE(state_name) &&
3284 state_name[ctrl->state])
3285 return sprintf(buf, "%s\n", state_name[ctrl->state]);
3286
3287 return sprintf(buf, "unknown state\n");
3288}
3289
3290static DEVICE_ATTR(state, S_IRUGO, nvme_sysfs_show_state, NULL);
3291
1a353d85
ML
3292static ssize_t nvme_sysfs_show_subsysnqn(struct device *dev,
3293 struct device_attribute *attr,
3294 char *buf)
3295{
3296 struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
3297
ab9e00cc 3298 return snprintf(buf, PAGE_SIZE, "%s\n", ctrl->subsys->subnqn);
1a353d85
ML
3299}
3300static DEVICE_ATTR(subsysnqn, S_IRUGO, nvme_sysfs_show_subsysnqn, NULL);
3301
76171c6c
SG
3302static ssize_t nvme_sysfs_show_hostnqn(struct device *dev,
3303 struct device_attribute *attr,
3304 char *buf)
3305{
3306 struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
3307
3308 return snprintf(buf, PAGE_SIZE, "%s\n", ctrl->opts->host->nqn);
3309}
3310static DEVICE_ATTR(hostnqn, S_IRUGO, nvme_sysfs_show_hostnqn, NULL);
3311
45fb19f7
SG
3312static ssize_t nvme_sysfs_show_hostid(struct device *dev,
3313 struct device_attribute *attr,
3314 char *buf)
3315{
3316 struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
3317
3318 return snprintf(buf, PAGE_SIZE, "%pU\n", &ctrl->opts->host->id);
3319}
3320static DEVICE_ATTR(hostid, S_IRUGO, nvme_sysfs_show_hostid, NULL);
3321
1a353d85
ML
3322static ssize_t nvme_sysfs_show_address(struct device *dev,
3323 struct device_attribute *attr,
3324 char *buf)
3325{
3326 struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
3327
3328 return ctrl->ops->get_address(ctrl, buf, PAGE_SIZE);
3329}
3330static DEVICE_ATTR(address, S_IRUGO, nvme_sysfs_show_address, NULL);
3331
779ff756
KB
3332static struct attribute *nvme_dev_attrs[] = {
3333 &dev_attr_reset_controller.attr,
9ec3bb2f 3334 &dev_attr_rescan_controller.attr,
779ff756
KB
3335 &dev_attr_model.attr,
3336 &dev_attr_serial.attr,
3337 &dev_attr_firmware_rev.attr,
931e1c22 3338 &dev_attr_cntlid.attr,
1a353d85
ML
3339 &dev_attr_delete_controller.attr,
3340 &dev_attr_transport.attr,
3341 &dev_attr_subsysnqn.attr,
3342 &dev_attr_address.attr,
8432bdb2 3343 &dev_attr_state.attr,
103e515e 3344 &dev_attr_numa_node.attr,
2b1ff255
JS
3345 &dev_attr_queue_count.attr,
3346 &dev_attr_sqsize.attr,
76171c6c 3347 &dev_attr_hostnqn.attr,
45fb19f7 3348 &dev_attr_hostid.attr,
779ff756
KB
3349 NULL
3350};
3351
1a353d85
ML
3352static umode_t nvme_dev_attrs_are_visible(struct kobject *kobj,
3353 struct attribute *a, int n)
3354{
3355 struct device *dev = container_of(kobj, struct device, kobj);
3356 struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
3357
49d3d50b
CH
3358 if (a == &dev_attr_delete_controller.attr && !ctrl->ops->delete_ctrl)
3359 return 0;
3360 if (a == &dev_attr_address.attr && !ctrl->ops->get_address)
3361 return 0;
76171c6c
SG
3362 if (a == &dev_attr_hostnqn.attr && !ctrl->opts)
3363 return 0;
45fb19f7
SG
3364 if (a == &dev_attr_hostid.attr && !ctrl->opts)
3365 return 0;
1a353d85
ML
3366
3367 return a->mode;
3368}
3369
779ff756 3370static struct attribute_group nvme_dev_attrs_group = {
1a353d85
ML
3371 .attrs = nvme_dev_attrs,
3372 .is_visible = nvme_dev_attrs_are_visible,
779ff756
KB
3373};
3374
3375static const struct attribute_group *nvme_dev_attr_groups[] = {
3376 &nvme_dev_attrs_group,
3377 NULL,
3378};
3379
026d2ef7 3380static struct nvme_ns_head *nvme_find_ns_head(struct nvme_subsystem *subsys,
ed754e5d
CH
3381 unsigned nsid)
3382{
3383 struct nvme_ns_head *h;
3384
3385 lockdep_assert_held(&subsys->lock);
3386
3387 list_for_each_entry(h, &subsys->nsheads, entry) {
3388 if (h->ns_id == nsid && kref_get_unless_zero(&h->ref))
3389 return h;
3390 }
3391
3392 return NULL;
3393}
3394
3395static int __nvme_check_ids(struct nvme_subsystem *subsys,
3396 struct nvme_ns_head *new)
3397{
3398 struct nvme_ns_head *h;
3399
3400 lockdep_assert_held(&subsys->lock);
3401
3402 list_for_each_entry(h, &subsys->nsheads, entry) {
3403 if (nvme_ns_ids_valid(&new->ids) &&
2079699c 3404 !list_empty(&h->list) &&
ed754e5d
CH
3405 nvme_ns_ids_equal(&new->ids, &h->ids))
3406 return -EINVAL;
3407 }
3408
3409 return 0;
3410}
3411
3412static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl,
43fcd9e1
CH
3413 unsigned nsid, struct nvme_id_ns *id,
3414 struct nvme_ns_ids *ids)
ed754e5d
CH
3415{
3416 struct nvme_ns_head *head;
f3334447 3417 size_t size = sizeof(*head);
ed754e5d
CH
3418 int ret = -ENOMEM;
3419
f3334447
CH
3420#ifdef CONFIG_NVME_MULTIPATH
3421 size += num_possible_nodes() * sizeof(struct nvme_ns *);
3422#endif
3423
3424 head = kzalloc(size, GFP_KERNEL);
ed754e5d
CH
3425 if (!head)
3426 goto out;
3427 ret = ida_simple_get(&ctrl->subsys->ns_ida, 1, 0, GFP_KERNEL);
3428 if (ret < 0)
3429 goto out_free_head;
3430 head->instance = ret;
3431 INIT_LIST_HEAD(&head->list);
fd92c77f
MG
3432 ret = init_srcu_struct(&head->srcu);
3433 if (ret)
3434 goto out_ida_remove;
ed754e5d
CH
3435 head->subsys = ctrl->subsys;
3436 head->ns_id = nsid;
43fcd9e1 3437 head->ids = *ids;
ed754e5d
CH
3438 kref_init(&head->ref);
3439
ed754e5d
CH
3440 ret = __nvme_check_ids(ctrl->subsys, head);
3441 if (ret) {
3442 dev_err(ctrl->device,
3443 "duplicate IDs for nsid %d\n", nsid);
3444 goto out_cleanup_srcu;
3445 }
3446
32acab31
CH
3447 ret = nvme_mpath_alloc_disk(ctrl, head);
3448 if (ret)
3449 goto out_cleanup_srcu;
3450
ed754e5d 3451 list_add_tail(&head->entry, &ctrl->subsys->nsheads);
12d9f070
JW
3452
3453 kref_get(&ctrl->subsys->ref);
3454
ed754e5d
CH
3455 return head;
3456out_cleanup_srcu:
3457 cleanup_srcu_struct(&head->srcu);
fd92c77f 3458out_ida_remove:
ed754e5d
CH
3459 ida_simple_remove(&ctrl->subsys->ns_ida, head->instance);
3460out_free_head:
3461 kfree(head);
3462out:
538af88e
SG
3463 if (ret > 0)
3464 ret = blk_status_to_errno(nvme_error_status(ret));
ed754e5d
CH
3465 return ERR_PTR(ret);
3466}
3467
3468static int nvme_init_ns_head(struct nvme_ns *ns, unsigned nsid,
9bd82b1a 3469 struct nvme_id_ns *id)
ed754e5d
CH
3470{
3471 struct nvme_ctrl *ctrl = ns->ctrl;
3472 bool is_shared = id->nmic & (1 << 0);
3473 struct nvme_ns_head *head = NULL;
43fcd9e1 3474 struct nvme_ns_ids ids;
ed754e5d
CH
3475 int ret = 0;
3476
43fcd9e1
CH
3477 ret = nvme_report_ns_ids(ctrl, nsid, id, &ids);
3478 if (ret)
3479 goto out;
3480
ed754e5d
CH
3481 mutex_lock(&ctrl->subsys->lock);
3482 if (is_shared)
026d2ef7 3483 head = nvme_find_ns_head(ctrl->subsys, nsid);
ed754e5d 3484 if (!head) {
43fcd9e1 3485 head = nvme_alloc_ns_head(ctrl, nsid, id, &ids);
ed754e5d
CH
3486 if (IS_ERR(head)) {
3487 ret = PTR_ERR(head);
3488 goto out_unlock;
3489 }
ed754e5d 3490 } else {
ed754e5d
CH
3491 if (!nvme_ns_ids_equal(&head->ids, &ids)) {
3492 dev_err(ctrl->device,
3493 "IDs don't match for shared namespace %d\n",
3494 nsid);
3495 ret = -EINVAL;
3496 goto out_unlock;
3497 }
ed754e5d
CH
3498 }
3499
3500 list_add_tail(&ns->siblings, &head->list);
3501 ns->head = head;
3502
3503out_unlock:
3504 mutex_unlock(&ctrl->subsys->lock);
43fcd9e1 3505out:
538af88e
SG
3506 if (ret > 0)
3507 ret = blk_status_to_errno(nvme_error_status(ret));
ed754e5d
CH
3508 return ret;
3509}
3510
5bae7f73
CH
3511static int ns_cmp(void *priv, struct list_head *a, struct list_head *b)
3512{
3513 struct nvme_ns *nsa = container_of(a, struct nvme_ns, list);
3514 struct nvme_ns *nsb = container_of(b, struct nvme_ns, list);
3515
ed754e5d 3516 return nsa->head->ns_id - nsb->head->ns_id;
5bae7f73
CH
3517}
3518
32f0c4af 3519static struct nvme_ns *nvme_find_get_ns(struct nvme_ctrl *ctrl, unsigned nsid)
5bae7f73 3520{
32f0c4af 3521 struct nvme_ns *ns, *ret = NULL;
69d3b8ac 3522
765cc031 3523 down_read(&ctrl->namespaces_rwsem);
5bae7f73 3524 list_for_each_entry(ns, &ctrl->namespaces, list) {
ed754e5d 3525 if (ns->head->ns_id == nsid) {
2dd41228
CH
3526 if (!kref_get_unless_zero(&ns->kref))
3527 continue;
32f0c4af
KB
3528 ret = ns;
3529 break;
3530 }
ed754e5d 3531 if (ns->head->ns_id > nsid)
5bae7f73
CH
3532 break;
3533 }
765cc031 3534 up_read(&ctrl->namespaces_rwsem);
32f0c4af 3535 return ret;
5bae7f73
CH
3536}
3537
f5d11840
JA
3538static int nvme_setup_streams_ns(struct nvme_ctrl *ctrl, struct nvme_ns *ns)
3539{
3540 struct streams_directive_params s;
3541 int ret;
3542
3543 if (!ctrl->nr_streams)
3544 return 0;
3545
ed754e5d 3546 ret = nvme_get_stream_params(ctrl, &s, ns->head->ns_id);
f5d11840
JA
3547 if (ret)
3548 return ret;
3549
3550 ns->sws = le32_to_cpu(s.sws);
3551 ns->sgs = le16_to_cpu(s.sgs);
3552
3553 if (ns->sws) {
3554 unsigned int bs = 1 << ns->lba_shift;
3555
3556 blk_queue_io_min(ns->queue, bs * ns->sws);
3557 if (ns->sgs)
3558 blk_queue_io_opt(ns->queue, bs * ns->sws * ns->sgs);
3559 }
3560
3561 return 0;
3562}
3563
adce7e98 3564static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
5bae7f73
CH
3565{
3566 struct nvme_ns *ns;
3567 struct gendisk *disk;
ac81bfa9
MB
3568 struct nvme_id_ns *id;
3569 char disk_name[DISK_NAME_LEN];
ab4ab09c 3570 int node = ctrl->numa_node, flags = GENHD_FL_EXT_DEVT, ret;
5bae7f73
CH
3571
3572 ns = kzalloc_node(sizeof(*ns), GFP_KERNEL, node);
3573 if (!ns)
adce7e98 3574 return;
5bae7f73
CH
3575
3576 ns->queue = blk_mq_init_queue(ctrl->tagset);
adce7e98 3577 if (IS_ERR(ns->queue))
ed754e5d 3578 goto out_free_ns;
e0596ab2 3579
7d30c81b 3580 if (ctrl->opts && ctrl->opts->data_digest)
958f2a0f
MS
3581 ns->queue->backing_dev_info->capabilities
3582 |= BDI_CAP_STABLE_WRITES;
3583
8b904b5b 3584 blk_queue_flag_set(QUEUE_FLAG_NONROT, ns->queue);
e0596ab2
LG
3585 if (ctrl->ops->flags & NVME_F_PCI_P2PDMA)
3586 blk_queue_flag_set(QUEUE_FLAG_PCI_P2PDMA, ns->queue);
3587
5bae7f73
CH
3588 ns->queue->queuedata = ns;
3589 ns->ctrl = ctrl;
3590
5bae7f73 3591 kref_init(&ns->kref);
5bae7f73 3592 ns->lba_shift = 9; /* set to a default value for 512 until disk is validated */
5bae7f73
CH
3593
3594 blk_queue_logical_block_size(ns->queue, 1 << ns->lba_shift);
da35825d 3595 nvme_set_queue_limits(ctrl, ns->queue);
5bae7f73 3596
331813f6
SG
3597 ret = nvme_identify_ns(ctrl, nsid, &id);
3598 if (ret)
ac81bfa9
MB
3599 goto out_free_queue;
3600
adce7e98 3601 if (id->ncap == 0) /* no namespace (legacy quirk) */
cdbff4f2
CH
3602 goto out_free_id;
3603
ab4ab09c
HR
3604 ret = nvme_init_ns_head(ns, nsid, id);
3605 if (ret)
ed754e5d 3606 goto out_free_id;
654b4a4a 3607 nvme_setup_streams_ns(ctrl, ns);
a785dbcc 3608 nvme_set_disk_name(disk_name, ns, ctrl, &flags);
cdbff4f2 3609
3dc87dd0 3610 disk = alloc_disk_node(0, node);
adce7e98 3611 if (!disk)
ed754e5d 3612 goto out_unlink_ns;
ac81bfa9 3613
3dc87dd0
MB
3614 disk->fops = &nvme_fops;
3615 disk->private_data = ns;
3616 disk->queue = ns->queue;
32acab31 3617 disk->flags = flags;
3dc87dd0
MB
3618 memcpy(disk->disk_name, disk_name, DISK_NAME_LEN);
3619 ns->disk = disk;
3620
3621 __nvme_revalidate_disk(disk, id);
5bae7f73 3622
85136c01 3623 if ((ctrl->quirks & NVME_QUIRK_LIGHTNVM) && id->vs[0] == 0x1) {
ab4ab09c
HR
3624 ret = nvme_nvm_register(ns, disk_name, node);
3625 if (ret) {
85136c01
MB
3626 dev_warn(ctrl->device, "LightNVM init failure\n");
3627 goto out_put_disk;
3628 }
3629 }
3630
765cc031 3631 down_write(&ctrl->namespaces_rwsem);
32f0c4af 3632 list_add_tail(&ns->list, &ctrl->namespaces);
765cc031 3633 up_write(&ctrl->namespaces_rwsem);
32f0c4af 3634
d22524a4 3635 nvme_get_ctrl(ctrl);
ac81bfa9 3636
33b14f67 3637 device_add_disk(ctrl->device, ns->disk, nvme_ns_id_attr_groups);
32acab31 3638
0d0b660f 3639 nvme_mpath_add_disk(ns, id);
a3646451 3640 nvme_fault_inject_init(&ns->fault_inject, ns->disk->disk_name);
0d0b660f
CH
3641 kfree(id);
3642
adce7e98 3643 return;
85136c01 3644 out_put_disk:
132be623
NC
3645 /* prevent double queue cleanup */
3646 ns->disk->queue = NULL;
85136c01 3647 put_disk(ns->disk);
ed754e5d
CH
3648 out_unlink_ns:
3649 mutex_lock(&ctrl->subsys->lock);
3650 list_del_rcu(&ns->siblings);
3651 mutex_unlock(&ctrl->subsys->lock);
a63b8370 3652 nvme_put_ns_head(ns->head);
ac81bfa9
MB
3653 out_free_id:
3654 kfree(id);
5bae7f73
CH
3655 out_free_queue:
3656 blk_cleanup_queue(ns->queue);
3657 out_free_ns:
3658 kfree(ns);
3659}
3660
3661static void nvme_ns_remove(struct nvme_ns *ns)
3662{
646017a6
KB
3663 if (test_and_set_bit(NVME_NS_REMOVING, &ns->flags))
3664 return;
69d3b8ac 3665
a3646451 3666 nvme_fault_inject_fini(&ns->fault_inject);
2181e455
AE
3667
3668 mutex_lock(&ns->ctrl->subsys->lock);
3669 list_del_rcu(&ns->siblings);
3670 mutex_unlock(&ns->ctrl->subsys->lock);
3671 synchronize_rcu(); /* guarantee not available in head->list */
3672 nvme_mpath_clear_current_path(ns);
3673 synchronize_srcu(&ns->head->srcu); /* wait for concurrent submissions */
3674
b0b4e09c 3675 if (ns->disk && ns->disk->flags & GENHD_FL_UP) {
5bae7f73 3676 del_gendisk(ns->disk);
5bae7f73 3677 blk_cleanup_queue(ns->queue);
bd9f5d65
ML
3678 if (blk_get_integrity(ns->disk))
3679 blk_integrity_unregister(ns->disk);
5bae7f73 3680 }
32f0c4af 3681
765cc031 3682 down_write(&ns->ctrl->namespaces_rwsem);
5bae7f73 3683 list_del_init(&ns->list);
765cc031 3684 up_write(&ns->ctrl->namespaces_rwsem);
32f0c4af 3685
479a322f 3686 nvme_mpath_check_last_path(ns);
5bae7f73
CH
3687 nvme_put_ns(ns);
3688}
3689
540c801c
KB
3690static void nvme_validate_ns(struct nvme_ctrl *ctrl, unsigned nsid)
3691{
3692 struct nvme_ns *ns;
3693
32f0c4af 3694 ns = nvme_find_get_ns(ctrl, nsid);
540c801c 3695 if (ns) {
b0b4e09c 3696 if (ns->disk && revalidate_disk(ns->disk))
540c801c 3697 nvme_ns_remove(ns);
32f0c4af 3698 nvme_put_ns(ns);
540c801c
KB
3699 } else
3700 nvme_alloc_ns(ctrl, nsid);
3701}
3702
47b0e50a
SB
3703static void nvme_remove_invalid_namespaces(struct nvme_ctrl *ctrl,
3704 unsigned nsid)
3705{
3706 struct nvme_ns *ns, *next;
6f8e0d78 3707 LIST_HEAD(rm_list);
47b0e50a 3708
765cc031 3709 down_write(&ctrl->namespaces_rwsem);
47b0e50a 3710 list_for_each_entry_safe(ns, next, &ctrl->namespaces, list) {
cf39a6bc 3711 if (ns->head->ns_id > nsid || test_bit(NVME_NS_DEAD, &ns->flags))
6f8e0d78 3712 list_move_tail(&ns->list, &rm_list);
47b0e50a 3713 }
765cc031 3714 up_write(&ctrl->namespaces_rwsem);
6f8e0d78
JW
3715
3716 list_for_each_entry_safe(ns, next, &rm_list, list)
3717 nvme_ns_remove(ns);
3718
47b0e50a
SB
3719}
3720
540c801c
KB
3721static int nvme_scan_ns_list(struct nvme_ctrl *ctrl, unsigned nn)
3722{
3723 struct nvme_ns *ns;
3724 __le32 *ns_list;
c8e8c77b
JL
3725 unsigned i, j, nsid, prev = 0;
3726 unsigned num_lists = DIV_ROUND_UP_ULL((u64)nn, 1024);
540c801c
KB
3727 int ret = 0;
3728
42595eb7 3729 ns_list = kzalloc(NVME_IDENTIFY_DATA_SIZE, GFP_KERNEL);
540c801c
KB
3730 if (!ns_list)
3731 return -ENOMEM;
3732
3733 for (i = 0; i < num_lists; i++) {
3734 ret = nvme_identify_ns_list(ctrl, prev, ns_list);
3735 if (ret)
47b0e50a 3736 goto free;
540c801c
KB
3737
3738 for (j = 0; j < min(nn, 1024U); j++) {
3739 nsid = le32_to_cpu(ns_list[j]);
3740 if (!nsid)
3741 goto out;
3742
3743 nvme_validate_ns(ctrl, nsid);
3744
3745 while (++prev < nsid) {
32f0c4af
KB
3746 ns = nvme_find_get_ns(ctrl, prev);
3747 if (ns) {
540c801c 3748 nvme_ns_remove(ns);
32f0c4af
KB
3749 nvme_put_ns(ns);
3750 }
540c801c
KB
3751 }
3752 }
3753 nn -= j;
3754 }
3755 out:
47b0e50a
SB
3756 nvme_remove_invalid_namespaces(ctrl, prev);
3757 free:
540c801c
KB
3758 kfree(ns_list);
3759 return ret;
3760}
3761
5955be21 3762static void nvme_scan_ns_sequential(struct nvme_ctrl *ctrl, unsigned nn)
5bae7f73 3763{
5bae7f73
CH
3764 unsigned i;
3765
540c801c
KB
3766 for (i = 1; i <= nn; i++)
3767 nvme_validate_ns(ctrl, i);
3768
47b0e50a 3769 nvme_remove_invalid_namespaces(ctrl, nn);
5bae7f73
CH
3770}
3771
f493af37 3772static void nvme_clear_changed_ns_log(struct nvme_ctrl *ctrl)
30d90964
CH
3773{
3774 size_t log_size = NVME_MAX_CHANGED_NAMESPACES * sizeof(__le32);
3775 __le32 *log;
f493af37 3776 int error;
30d90964
CH
3777
3778 log = kzalloc(log_size, GFP_KERNEL);
3779 if (!log)
f493af37 3780 return;
30d90964 3781
f493af37
CH
3782 /*
3783 * We need to read the log to clear the AEN, but we don't want to rely
3784 * on it for the changed namespace information as userspace could have
3785 * raced with us in reading the log page, which could cause us to miss
3786 * updates.
3787 */
0e98719b
CH
3788 error = nvme_get_log(ctrl, NVME_NSID_ALL, NVME_LOG_CHANGED_NS, 0, log,
3789 log_size, 0);
f493af37 3790 if (error)
30d90964
CH
3791 dev_warn(ctrl->device,
3792 "reading changed ns log failed: %d\n", error);
30d90964 3793
30d90964 3794 kfree(log);
30d90964
CH
3795}
3796
5955be21 3797static void nvme_scan_work(struct work_struct *work)
5bae7f73 3798{
5955be21
CH
3799 struct nvme_ctrl *ctrl =
3800 container_of(work, struct nvme_ctrl, scan_work);
5bae7f73 3801 struct nvme_id_ctrl *id;
540c801c 3802 unsigned nn;
5bae7f73 3803
5d02a5c1
KB
3804 /* No tagset on a live ctrl means IO queues could not created */
3805 if (ctrl->state != NVME_CTRL_LIVE || !ctrl->tagset)
5955be21
CH
3806 return;
3807
77016199 3808 if (test_and_clear_bit(NVME_AER_NOTICE_NS_CHANGED, &ctrl->events)) {
30d90964 3809 dev_info(ctrl->device, "rescanning namespaces.\n");
f493af37 3810 nvme_clear_changed_ns_log(ctrl);
30d90964
CH
3811 }
3812
5bae7f73
CH
3813 if (nvme_identify_ctrl(ctrl, &id))
3814 return;
540c801c 3815
e7ad43c3 3816 mutex_lock(&ctrl->scan_lock);
540c801c 3817 nn = le32_to_cpu(id->nn);
8ef2074d 3818 if (ctrl->vs >= NVME_VS(1, 1, 0) &&
540c801c
KB
3819 !(ctrl->quirks & NVME_QUIRK_IDENTIFY_CNS)) {
3820 if (!nvme_scan_ns_list(ctrl, nn))
30d90964 3821 goto out_free_id;
540c801c 3822 }
5955be21 3823 nvme_scan_ns_sequential(ctrl, nn);
30d90964 3824out_free_id:
e7ad43c3 3825 mutex_unlock(&ctrl->scan_lock);
30d90964 3826 kfree(id);
765cc031 3827 down_write(&ctrl->namespaces_rwsem);
540c801c 3828 list_sort(NULL, &ctrl->namespaces, ns_cmp);
765cc031 3829 up_write(&ctrl->namespaces_rwsem);
5955be21 3830}
5bae7f73 3831
32f0c4af
KB
3832/*
3833 * This function iterates the namespace list unlocked to allow recovery from
3834 * controller failure. It is up to the caller to ensure the namespace list is
3835 * not modified by scan work while this function is executing.
3836 */
5bae7f73
CH
3837void nvme_remove_namespaces(struct nvme_ctrl *ctrl)
3838{
3839 struct nvme_ns *ns, *next;
6f8e0d78 3840 LIST_HEAD(ns_list);
5bae7f73 3841
0157ec8d
SG
3842 /*
3843 * make sure to requeue I/O to all namespaces as these
3844 * might result from the scan itself and must complete
3845 * for the scan_work to make progress
3846 */
3847 nvme_mpath_clear_ctrl_paths(ctrl);
3848
f6c8e432
SG
3849 /* prevent racing with ns scanning */
3850 flush_work(&ctrl->scan_work);
3851
0ff9d4e1
KB
3852 /*
3853 * The dead states indicates the controller was not gracefully
3854 * disconnected. In that case, we won't be able to flush any data while
3855 * removing the namespaces' disks; fail all the queues now to avoid
3856 * potentially having to clean up the failed sync later.
3857 */
3858 if (ctrl->state == NVME_CTRL_DEAD)
3859 nvme_kill_queues(ctrl);
3860
765cc031 3861 down_write(&ctrl->namespaces_rwsem);
6f8e0d78 3862 list_splice_init(&ctrl->namespaces, &ns_list);
765cc031 3863 up_write(&ctrl->namespaces_rwsem);
6f8e0d78
JW
3864
3865 list_for_each_entry_safe(ns, next, &ns_list, list)
5bae7f73
CH
3866 nvme_ns_remove(ns);
3867}
576d55d6 3868EXPORT_SYMBOL_GPL(nvme_remove_namespaces);
5bae7f73 3869
a42f42e5
SG
3870static int nvme_class_uevent(struct device *dev, struct kobj_uevent_env *env)
3871{
3872 struct nvme_ctrl *ctrl =
3873 container_of(dev, struct nvme_ctrl, ctrl_device);
3874 struct nvmf_ctrl_options *opts = ctrl->opts;
3875 int ret;
3876
3877 ret = add_uevent_var(env, "NVME_TRTYPE=%s", ctrl->ops->name);
3878 if (ret)
3879 return ret;
3880
3881 if (opts) {
3882 ret = add_uevent_var(env, "NVME_TRADDR=%s", opts->traddr);
3883 if (ret)
3884 return ret;
3885
3886 ret = add_uevent_var(env, "NVME_TRSVCID=%s",
3887 opts->trsvcid ?: "none");
3888 if (ret)
3889 return ret;
3890
3891 ret = add_uevent_var(env, "NVME_HOST_TRADDR=%s",
3892 opts->host_traddr ?: "none");
3893 }
3894 return ret;
3895}
3896
e3d7874d
KB
3897static void nvme_aen_uevent(struct nvme_ctrl *ctrl)
3898{
3899 char *envp[2] = { NULL, NULL };
3900 u32 aen_result = ctrl->aen_result;
3901
3902 ctrl->aen_result = 0;
3903 if (!aen_result)
3904 return;
3905
3906 envp[0] = kasprintf(GFP_KERNEL, "NVME_AEN=%#08x", aen_result);
3907 if (!envp[0])
3908 return;
3909 kobject_uevent_env(&ctrl->device->kobj, KOBJ_CHANGE, envp);
3910 kfree(envp[0]);
3911}
3912
f866fc42
CH
3913static void nvme_async_event_work(struct work_struct *work)
3914{
3915 struct nvme_ctrl *ctrl =
3916 container_of(work, struct nvme_ctrl, async_event_work);
3917
e3d7874d 3918 nvme_aen_uevent(ctrl);
ad22c355 3919 ctrl->ops->submit_async_event(ctrl);
f866fc42
CH
3920}
3921
b6dccf7f
AD
3922static bool nvme_ctrl_pp_status(struct nvme_ctrl *ctrl)
3923{
3924
3925 u32 csts;
3926
3927 if (ctrl->ops->reg_read32(ctrl, NVME_REG_CSTS, &csts))
3928 return false;
3929
3930 if (csts == ~0)
3931 return false;
3932
3933 return ((ctrl->ctrl_config & NVME_CC_ENABLE) && (csts & NVME_CSTS_PP));
3934}
3935
3936static void nvme_get_fw_slot_info(struct nvme_ctrl *ctrl)
3937{
b6dccf7f
AD
3938 struct nvme_fw_slot_info_log *log;
3939
3940 log = kmalloc(sizeof(*log), GFP_KERNEL);
3941 if (!log)
3942 return;
3943
f25372ff 3944 if (nvme_get_log(ctrl, NVME_NSID_ALL, NVME_LOG_FW_SLOT, 0, log,
0e98719b
CH
3945 sizeof(*log), 0))
3946 dev_warn(ctrl->device, "Get FW SLOT INFO log error\n");
b6dccf7f
AD
3947 kfree(log);
3948}
3949
3950static void nvme_fw_act_work(struct work_struct *work)
3951{
3952 struct nvme_ctrl *ctrl = container_of(work,
3953 struct nvme_ctrl, fw_act_work);
3954 unsigned long fw_act_timeout;
3955
3956 if (ctrl->mtfa)
3957 fw_act_timeout = jiffies +
3958 msecs_to_jiffies(ctrl->mtfa * 100);
3959 else
3960 fw_act_timeout = jiffies +
3961 msecs_to_jiffies(admin_timeout * 1000);
3962
3963 nvme_stop_queues(ctrl);
3964 while (nvme_ctrl_pp_status(ctrl)) {
3965 if (time_after(jiffies, fw_act_timeout)) {
3966 dev_warn(ctrl->device,
3967 "Fw activation timeout, reset controller\n");
4c75f877
KB
3968 nvme_try_sched_reset(ctrl);
3969 return;
b6dccf7f
AD
3970 }
3971 msleep(100);
3972 }
3973
4c75f877 3974 if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_LIVE))
b6dccf7f
AD
3975 return;
3976
3977 nvme_start_queues(ctrl);
a806c6c8 3978 /* read FW slot information to clear the AER */
b6dccf7f
AD
3979 nvme_get_fw_slot_info(ctrl);
3980}
3981
868c2392
CH
3982static void nvme_handle_aen_notice(struct nvme_ctrl *ctrl, u32 result)
3983{
09bd1ff4
CK
3984 u32 aer_notice_type = (result & 0xff00) >> 8;
3985
521cfb8e
CK
3986 trace_nvme_async_event(ctrl, aer_notice_type);
3987
09bd1ff4 3988 switch (aer_notice_type) {
868c2392 3989 case NVME_AER_NOTICE_NS_CHANGED:
77016199 3990 set_bit(NVME_AER_NOTICE_NS_CHANGED, &ctrl->events);
868c2392
CH
3991 nvme_queue_scan(ctrl);
3992 break;
3993 case NVME_AER_NOTICE_FW_ACT_STARTING:
4c75f877
KB
3994 /*
3995 * We are (ab)using the RESETTING state to prevent subsequent
3996 * recovery actions from interfering with the controller's
3997 * firmware activation.
3998 */
3999 if (nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING))
4000 queue_work(nvme_wq, &ctrl->fw_act_work);
868c2392 4001 break;
0d0b660f
CH
4002#ifdef CONFIG_NVME_MULTIPATH
4003 case NVME_AER_NOTICE_ANA:
4004 if (!ctrl->ana_log_buf)
4005 break;
4006 queue_work(nvme_wq, &ctrl->ana_work);
4007 break;
4008#endif
85f8a435
SG
4009 case NVME_AER_NOTICE_DISC_CHANGED:
4010 ctrl->aen_result = result;
4011 break;
868c2392
CH
4012 default:
4013 dev_warn(ctrl->device, "async event result %08x\n", result);
4014 }
4015}
4016
7bf58533 4017void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status,
287a63eb 4018 volatile union nvme_result *res)
f866fc42 4019{
7bf58533 4020 u32 result = le32_to_cpu(res->u32);
09bd1ff4 4021 u32 aer_type = result & 0x07;
f866fc42 4022
ad22c355 4023 if (le16_to_cpu(status) >> 1 != NVME_SC_SUCCESS)
f866fc42
CH
4024 return;
4025
09bd1ff4 4026 switch (aer_type) {
868c2392
CH
4027 case NVME_AER_NOTICE:
4028 nvme_handle_aen_notice(ctrl, result);
4029 break;
e3d7874d
KB
4030 case NVME_AER_ERROR:
4031 case NVME_AER_SMART:
4032 case NVME_AER_CSS:
4033 case NVME_AER_VS:
09bd1ff4 4034 trace_nvme_async_event(ctrl, aer_type);
e3d7874d 4035 ctrl->aen_result = result;
7bf58533
CH
4036 break;
4037 default:
4038 break;
f866fc42 4039 }
c669ccdc 4040 queue_work(nvme_wq, &ctrl->async_event_work);
f866fc42 4041}
f866fc42 4042EXPORT_SYMBOL_GPL(nvme_complete_async_event);
f3ca80fc 4043
d09f2b45 4044void nvme_stop_ctrl(struct nvme_ctrl *ctrl)
576d55d6 4045{
0d0b660f 4046 nvme_mpath_stop(ctrl);
d09f2b45 4047 nvme_stop_keep_alive(ctrl);
f866fc42 4048 flush_work(&ctrl->async_event_work);
b6dccf7f 4049 cancel_work_sync(&ctrl->fw_act_work);
d09f2b45
SG
4050}
4051EXPORT_SYMBOL_GPL(nvme_stop_ctrl);
4052
4053void nvme_start_ctrl(struct nvme_ctrl *ctrl)
4054{
4055 if (ctrl->kato)
4056 nvme_start_keep_alive(ctrl);
4057
93da4023
SG
4058 nvme_enable_aen(ctrl);
4059
d09f2b45
SG
4060 if (ctrl->queue_count > 1) {
4061 nvme_queue_scan(ctrl);
d09f2b45
SG
4062 nvme_start_queues(ctrl);
4063 }
ce151813 4064 ctrl->created = true;
d09f2b45
SG
4065}
4066EXPORT_SYMBOL_GPL(nvme_start_ctrl);
5955be21 4067
d09f2b45
SG
4068void nvme_uninit_ctrl(struct nvme_ctrl *ctrl)
4069{
f79d5fda 4070 nvme_fault_inject_fini(&ctrl->fault_inject);
510a405d 4071 dev_pm_qos_hide_latency_tolerance(ctrl->device);
a6a5149b 4072 cdev_device_del(&ctrl->cdev, ctrl->device);
726612b6 4073 nvme_put_ctrl(ctrl);
53029b04 4074}
576d55d6 4075EXPORT_SYMBOL_GPL(nvme_uninit_ctrl);
53029b04 4076
d22524a4 4077static void nvme_free_ctrl(struct device *dev)
53029b04 4078{
d22524a4
CH
4079 struct nvme_ctrl *ctrl =
4080 container_of(dev, struct nvme_ctrl, ctrl_device);
ab9e00cc 4081 struct nvme_subsystem *subsys = ctrl->subsys;
f3ca80fc 4082
733e4b69
KB
4083 if (subsys && ctrl->instance != subsys->instance)
4084 ida_simple_remove(&nvme_instance_ida, ctrl->instance);
4085
84fef62d 4086 kfree(ctrl->effects);
0d0b660f 4087 nvme_mpath_uninit(ctrl);
092ff052 4088 __free_page(ctrl->discard_page);
f3ca80fc 4089
ab9e00cc 4090 if (subsys) {
32fd90c4 4091 mutex_lock(&nvme_subsystems_lock);
ab9e00cc 4092 list_del(&ctrl->subsys_entry);
ab9e00cc 4093 sysfs_remove_link(&subsys->dev.kobj, dev_name(ctrl->device));
32fd90c4 4094 mutex_unlock(&nvme_subsystems_lock);
ab9e00cc 4095 }
f3ca80fc
CH
4096
4097 ctrl->ops->free_ctrl(ctrl);
f3ca80fc 4098
ab9e00cc
CH
4099 if (subsys)
4100 nvme_put_subsystem(subsys);
f3ca80fc
CH
4101}
4102
4103/*
4104 * Initialize a NVMe controller structures. This needs to be called during
4105 * earliest initialization so that we have the initialized structured around
4106 * during probing.
4107 */
4108int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
4109 const struct nvme_ctrl_ops *ops, unsigned long quirks)
4110{
4111 int ret;
4112
bb8d261e
CH
4113 ctrl->state = NVME_CTRL_NEW;
4114 spin_lock_init(&ctrl->lock);
e7ad43c3 4115 mutex_init(&ctrl->scan_lock);
f3ca80fc 4116 INIT_LIST_HEAD(&ctrl->namespaces);
765cc031 4117 init_rwsem(&ctrl->namespaces_rwsem);
f3ca80fc
CH
4118 ctrl->dev = dev;
4119 ctrl->ops = ops;
4120 ctrl->quirks = quirks;
5955be21 4121 INIT_WORK(&ctrl->scan_work, nvme_scan_work);
f866fc42 4122 INIT_WORK(&ctrl->async_event_work, nvme_async_event_work);
b6dccf7f 4123 INIT_WORK(&ctrl->fw_act_work, nvme_fw_act_work);
c5017e85 4124 INIT_WORK(&ctrl->delete_work, nvme_delete_ctrl_work);
c1ac9a4b 4125 init_waitqueue_head(&ctrl->state_wq);
f3ca80fc 4126
230f1f9e
JS
4127 INIT_DELAYED_WORK(&ctrl->ka_work, nvme_keep_alive_work);
4128 memset(&ctrl->ka_cmd, 0, sizeof(ctrl->ka_cmd));
4129 ctrl->ka_cmd.common.opcode = nvme_admin_keep_alive;
4130
cb5b7262
JA
4131 BUILD_BUG_ON(NVME_DSM_MAX_RANGES * sizeof(struct nvme_dsm_range) >
4132 PAGE_SIZE);
4133 ctrl->discard_page = alloc_page(GFP_KERNEL);
4134 if (!ctrl->discard_page) {
4135 ret = -ENOMEM;
4136 goto out;
4137 }
4138
9843f685
CH
4139 ret = ida_simple_get(&nvme_instance_ida, 0, 0, GFP_KERNEL);
4140 if (ret < 0)
f3ca80fc 4141 goto out;
9843f685 4142 ctrl->instance = ret;
f3ca80fc 4143
d22524a4
CH
4144 device_initialize(&ctrl->ctrl_device);
4145 ctrl->device = &ctrl->ctrl_device;
a6a5149b 4146 ctrl->device->devt = MKDEV(MAJOR(nvme_chr_devt), ctrl->instance);
d22524a4
CH
4147 ctrl->device->class = nvme_class;
4148 ctrl->device->parent = ctrl->dev;
4149 ctrl->device->groups = nvme_dev_attr_groups;
4150 ctrl->device->release = nvme_free_ctrl;
4151 dev_set_drvdata(ctrl->device, ctrl);
4152 ret = dev_set_name(ctrl->device, "nvme%d", ctrl->instance);
4153 if (ret)
f3ca80fc 4154 goto out_release_instance;
f3ca80fc 4155
b780d741 4156 nvme_get_ctrl(ctrl);
a6a5149b
CH
4157 cdev_init(&ctrl->cdev, &nvme_dev_fops);
4158 ctrl->cdev.owner = ops->module;
4159 ret = cdev_device_add(&ctrl->cdev, ctrl->device);
d22524a4
CH
4160 if (ret)
4161 goto out_free_name;
f3ca80fc 4162
c5552fde
AL
4163 /*
4164 * Initialize latency tolerance controls. The sysfs files won't
4165 * be visible to userspace unless the device actually supports APST.
4166 */
4167 ctrl->device->power.set_latency_tolerance = nvme_set_latency_tolerance;
4168 dev_pm_qos_update_user_latency_tolerance(ctrl->device,
4169 min(default_ps_max_latency_us, (unsigned long)S32_MAX));
4170
f79d5fda
AM
4171 nvme_fault_inject_init(&ctrl->fault_inject, dev_name(ctrl->device));
4172
f3ca80fc 4173 return 0;
d22524a4 4174out_free_name:
b780d741 4175 nvme_put_ctrl(ctrl);
d6a2b953 4176 kfree_const(ctrl->device->kobj.name);
f3ca80fc 4177out_release_instance:
9843f685 4178 ida_simple_remove(&nvme_instance_ida, ctrl->instance);
f3ca80fc 4179out:
cb5b7262
JA
4180 if (ctrl->discard_page)
4181 __free_page(ctrl->discard_page);
f3ca80fc
CH
4182 return ret;
4183}
576d55d6 4184EXPORT_SYMBOL_GPL(nvme_init_ctrl);
f3ca80fc 4185
69d9a99c
KB
4186/**
4187 * nvme_kill_queues(): Ends all namespace queues
4188 * @ctrl: the dead controller that needs to end
4189 *
4190 * Call this function when the driver determines it is unable to get the
4191 * controller in a state capable of servicing IO.
4192 */
4193void nvme_kill_queues(struct nvme_ctrl *ctrl)
4194{
4195 struct nvme_ns *ns;
4196
765cc031 4197 down_read(&ctrl->namespaces_rwsem);
82654b6b 4198
443bd90f 4199 /* Forcibly unquiesce queues to avoid blocking dispatch */
751a0cc0 4200 if (ctrl->admin_q && !blk_queue_dying(ctrl->admin_q))
7dd1ab16 4201 blk_mq_unquiesce_queue(ctrl->admin_q);
443bd90f 4202
cf39a6bc
SB
4203 list_for_each_entry(ns, &ctrl->namespaces, list)
4204 nvme_set_queue_dying(ns);
806f026f 4205
765cc031 4206 up_read(&ctrl->namespaces_rwsem);
69d9a99c 4207}
237045fc 4208EXPORT_SYMBOL_GPL(nvme_kill_queues);
69d9a99c 4209
302ad8cc
KB
4210void nvme_unfreeze(struct nvme_ctrl *ctrl)
4211{
4212 struct nvme_ns *ns;
4213
765cc031 4214 down_read(&ctrl->namespaces_rwsem);
302ad8cc
KB
4215 list_for_each_entry(ns, &ctrl->namespaces, list)
4216 blk_mq_unfreeze_queue(ns->queue);
765cc031 4217 up_read(&ctrl->namespaces_rwsem);
302ad8cc
KB
4218}
4219EXPORT_SYMBOL_GPL(nvme_unfreeze);
4220
4221void nvme_wait_freeze_timeout(struct nvme_ctrl *ctrl, long timeout)
4222{
4223 struct nvme_ns *ns;
4224
765cc031 4225 down_read(&ctrl->namespaces_rwsem);
302ad8cc
KB
4226 list_for_each_entry(ns, &ctrl->namespaces, list) {
4227 timeout = blk_mq_freeze_queue_wait_timeout(ns->queue, timeout);
4228 if (timeout <= 0)
4229 break;
4230 }
765cc031 4231 up_read(&ctrl->namespaces_rwsem);
302ad8cc
KB
4232}
4233EXPORT_SYMBOL_GPL(nvme_wait_freeze_timeout);
4234
4235void nvme_wait_freeze(struct nvme_ctrl *ctrl)
4236{
4237 struct nvme_ns *ns;
4238
765cc031 4239 down_read(&ctrl->namespaces_rwsem);
302ad8cc
KB
4240 list_for_each_entry(ns, &ctrl->namespaces, list)
4241 blk_mq_freeze_queue_wait(ns->queue);
765cc031 4242 up_read(&ctrl->namespaces_rwsem);
302ad8cc
KB
4243}
4244EXPORT_SYMBOL_GPL(nvme_wait_freeze);
4245
4246void nvme_start_freeze(struct nvme_ctrl *ctrl)
4247{
4248 struct nvme_ns *ns;
4249
765cc031 4250 down_read(&ctrl->namespaces_rwsem);
302ad8cc 4251 list_for_each_entry(ns, &ctrl->namespaces, list)
1671d522 4252 blk_freeze_queue_start(ns->queue);
765cc031 4253 up_read(&ctrl->namespaces_rwsem);
302ad8cc
KB
4254}
4255EXPORT_SYMBOL_GPL(nvme_start_freeze);
4256
25646264 4257void nvme_stop_queues(struct nvme_ctrl *ctrl)
363c9aac
SG
4258{
4259 struct nvme_ns *ns;
4260
765cc031 4261 down_read(&ctrl->namespaces_rwsem);
a6eaa884 4262 list_for_each_entry(ns, &ctrl->namespaces, list)
3174dd33 4263 blk_mq_quiesce_queue(ns->queue);
765cc031 4264 up_read(&ctrl->namespaces_rwsem);
363c9aac 4265}
576d55d6 4266EXPORT_SYMBOL_GPL(nvme_stop_queues);
363c9aac 4267
25646264 4268void nvme_start_queues(struct nvme_ctrl *ctrl)
363c9aac
SG
4269{
4270 struct nvme_ns *ns;
4271
765cc031 4272 down_read(&ctrl->namespaces_rwsem);
8d7b8faf 4273 list_for_each_entry(ns, &ctrl->namespaces, list)
f660174e 4274 blk_mq_unquiesce_queue(ns->queue);
765cc031 4275 up_read(&ctrl->namespaces_rwsem);
363c9aac 4276}
576d55d6 4277EXPORT_SYMBOL_GPL(nvme_start_queues);
363c9aac 4278
d6135c3a
KB
4279
4280void nvme_sync_queues(struct nvme_ctrl *ctrl)
4281{
4282 struct nvme_ns *ns;
4283
4284 down_read(&ctrl->namespaces_rwsem);
4285 list_for_each_entry(ns, &ctrl->namespaces, list)
4286 blk_sync_queue(ns->queue);
4287 up_read(&ctrl->namespaces_rwsem);
03894b7a
EN
4288
4289 if (ctrl->admin_q)
4290 blk_sync_queue(ctrl->admin_q);
d6135c3a
KB
4291}
4292EXPORT_SYMBOL_GPL(nvme_sync_queues);
4293
81101540
CH
4294/*
4295 * Check we didn't inadvertently grow the command structure sizes:
4296 */
4297static inline void _nvme_check_size(void)
4298{
4299 BUILD_BUG_ON(sizeof(struct nvme_common_command) != 64);
4300 BUILD_BUG_ON(sizeof(struct nvme_rw_command) != 64);
4301 BUILD_BUG_ON(sizeof(struct nvme_identify) != 64);
4302 BUILD_BUG_ON(sizeof(struct nvme_features) != 64);
4303 BUILD_BUG_ON(sizeof(struct nvme_download_firmware) != 64);
4304 BUILD_BUG_ON(sizeof(struct nvme_format_cmd) != 64);
4305 BUILD_BUG_ON(sizeof(struct nvme_dsm_cmd) != 64);
4306 BUILD_BUG_ON(sizeof(struct nvme_write_zeroes_cmd) != 64);
4307 BUILD_BUG_ON(sizeof(struct nvme_abort_cmd) != 64);
4308 BUILD_BUG_ON(sizeof(struct nvme_get_log_page_command) != 64);
4309 BUILD_BUG_ON(sizeof(struct nvme_command) != 64);
4310 BUILD_BUG_ON(sizeof(struct nvme_id_ctrl) != NVME_IDENTIFY_DATA_SIZE);
4311 BUILD_BUG_ON(sizeof(struct nvme_id_ns) != NVME_IDENTIFY_DATA_SIZE);
4312 BUILD_BUG_ON(sizeof(struct nvme_lba_range_type) != 64);
4313 BUILD_BUG_ON(sizeof(struct nvme_smart_log) != 512);
4314 BUILD_BUG_ON(sizeof(struct nvme_dbbuf) != 64);
4315 BUILD_BUG_ON(sizeof(struct nvme_directive_cmd) != 64);
4316}
4317
4318
893a74b7 4319static int __init nvme_core_init(void)
5bae7f73 4320{
b227c59b 4321 int result = -ENOMEM;
5bae7f73 4322
81101540
CH
4323 _nvme_check_size();
4324
9a6327d2
SG
4325 nvme_wq = alloc_workqueue("nvme-wq",
4326 WQ_UNBOUND | WQ_MEM_RECLAIM | WQ_SYSFS, 0);
4327 if (!nvme_wq)
b227c59b
RS
4328 goto out;
4329
4330 nvme_reset_wq = alloc_workqueue("nvme-reset-wq",
4331 WQ_UNBOUND | WQ_MEM_RECLAIM | WQ_SYSFS, 0);
4332 if (!nvme_reset_wq)
4333 goto destroy_wq;
4334
4335 nvme_delete_wq = alloc_workqueue("nvme-delete-wq",
4336 WQ_UNBOUND | WQ_MEM_RECLAIM | WQ_SYSFS, 0);
4337 if (!nvme_delete_wq)
4338 goto destroy_reset_wq;
9a6327d2 4339
a6a5149b 4340 result = alloc_chrdev_region(&nvme_chr_devt, 0, NVME_MINORS, "nvme");
f3ca80fc 4341 if (result < 0)
b227c59b 4342 goto destroy_delete_wq;
f3ca80fc
CH
4343
4344 nvme_class = class_create(THIS_MODULE, "nvme");
4345 if (IS_ERR(nvme_class)) {
4346 result = PTR_ERR(nvme_class);
4347 goto unregister_chrdev;
4348 }
a42f42e5 4349 nvme_class->dev_uevent = nvme_class_uevent;
f3ca80fc 4350
ab9e00cc
CH
4351 nvme_subsys_class = class_create(THIS_MODULE, "nvme-subsystem");
4352 if (IS_ERR(nvme_subsys_class)) {
4353 result = PTR_ERR(nvme_subsys_class);
4354 goto destroy_class;
4355 }
5bae7f73 4356 return 0;
f3ca80fc 4357
ab9e00cc
CH
4358destroy_class:
4359 class_destroy(nvme_class);
9a6327d2 4360unregister_chrdev:
a6a5149b 4361 unregister_chrdev_region(nvme_chr_devt, NVME_MINORS);
b227c59b
RS
4362destroy_delete_wq:
4363 destroy_workqueue(nvme_delete_wq);
4364destroy_reset_wq:
4365 destroy_workqueue(nvme_reset_wq);
9a6327d2
SG
4366destroy_wq:
4367 destroy_workqueue(nvme_wq);
b227c59b 4368out:
f3ca80fc 4369 return result;
5bae7f73
CH
4370}
4371
893a74b7 4372static void __exit nvme_core_exit(void)
5bae7f73 4373{
ab9e00cc 4374 class_destroy(nvme_subsys_class);
f3ca80fc 4375 class_destroy(nvme_class);
a6a5149b 4376 unregister_chrdev_region(nvme_chr_devt, NVME_MINORS);
b227c59b
RS
4377 destroy_workqueue(nvme_delete_wq);
4378 destroy_workqueue(nvme_reset_wq);
9a6327d2 4379 destroy_workqueue(nvme_wq);
f41cfd5d 4380 ida_destroy(&nvme_instance_ida);
5bae7f73 4381}
576d55d6
ML
4382
4383MODULE_LICENSE("GPL");
4384MODULE_VERSION("1.0");
4385module_init(nvme_core_init);
4386module_exit(nvme_core_exit);