reconnect_delay_timer_del(s);
}
-static coroutine_fn int nbd_receive_replies(BDRVNBDState *s, uint64_t cookie)
+static coroutine_fn int nbd_receive_replies(BDRVNBDState *s, uint64_t cookie,
+ Error **errp)
{
int ret;
uint64_t ind = COOKIE_TO_INDEX(cookie), ind2;
/* We are under mutex and cookie is 0. We have to do the dirty work. */
assert(s->reply.cookie == 0);
- ret = nbd_receive_reply(s->bs, s->ioc, &s->reply, NULL);
- if (ret <= 0) {
- ret = ret ? ret : -EIO;
+ ret = nbd_receive_reply(s->bs, s->ioc, &s->reply, s->info.mode, errp);
+ if (ret == 0) {
+ ret = -EIO;
+ error_setg(errp, "server dropped connection");
+ }
+ if (ret < 0) {
nbd_channel_error(s, ret);
return ret;
}
if (nbd_reply_is_structured(&s->reply) &&
s->info.mode < NBD_MODE_STRUCTURED) {
nbd_channel_error(s, -EINVAL);
+ error_setg(errp, "unexpected structured reply");
return -EINVAL;
}
ind2 = COOKIE_TO_INDEX(s->reply.cookie);
if (ind2 >= MAX_NBD_REQUESTS || !s->requests[ind2].coroutine) {
nbd_channel_error(s, -EINVAL);
+ error_setg(errp, "unexpected cookie value");
return -EINVAL;
}
if (s->reply.cookie == cookie) {
*/
static int nbd_parse_blockstatus_payload(BDRVNBDState *s,
NBDStructuredReplyChunk *chunk,
- uint8_t *payload, uint64_t orig_length,
- NBDExtent32 *extent, Error **errp)
+ uint8_t *payload, bool wide,
+ uint64_t orig_length,
+ NBDExtent64 *extent, Error **errp)
{
uint32_t context_id;
+ uint32_t count;
+ size_t ext_len = wide ? sizeof(*extent) : sizeof(NBDExtent32);
+ size_t pay_len = sizeof(context_id) + wide * sizeof(count) + ext_len;
/* The server succeeded, so it must have sent [at least] one extent */
- if (chunk->length < sizeof(context_id) + sizeof(*extent)) {
+ if (chunk->length < pay_len) {
error_setg(errp, "Protocol error: invalid payload for "
"NBD_REPLY_TYPE_BLOCK_STATUS");
return -EINVAL;
return -EINVAL;
}
- extent->length = payload_advance32(&payload);
- extent->flags = payload_advance32(&payload);
+ if (wide) {
+ count = payload_advance32(&payload);
+ extent->length = payload_advance64(&payload);
+ extent->flags = payload_advance64(&payload);
+ } else {
+ count = 0;
+ extent->length = payload_advance32(&payload);
+ extent->flags = payload_advance32(&payload);
+ }
if (extent->length == 0) {
error_setg(errp, "Protocol error: server sent status chunk with "
* (always a safe status, even if it loses information).
*/
if (s->info.min_block && !QEMU_IS_ALIGNED(extent->length,
- s->info.min_block)) {
+ s->info.min_block)) {
trace_nbd_parse_blockstatus_compliance("extent length is unaligned");
if (extent->length > s->info.min_block) {
extent->length = QEMU_ALIGN_DOWN(extent->length,
/*
* We used NBD_CMD_FLAG_REQ_ONE, so the server should not have
* sent us any more than one extent, nor should it have included
- * status beyond our request in that extent. However, it's easy
- * enough to ignore the server's noncompliance without killing the
+ * status beyond our request in that extent. Furthermore, a wide
+ * server should have replied with an accurate count (we left
+ * count at 0 for a narrow server). However, it's easy enough to
+ * ignore the server's noncompliance without killing the
* connection; just ignore trailing extents, and clamp things to
* the length of our request.
*/
- if (chunk->length > sizeof(context_id) + sizeof(*extent)) {
- trace_nbd_parse_blockstatus_compliance("more than one extent");
+ if (count != wide || chunk->length > pay_len) {
+ trace_nbd_parse_blockstatus_compliance("unexpected extent count");
}
if (extent->length > orig_length) {
extent->length = orig_length;
}
*request_ret = 0;
- ret = nbd_receive_replies(s, cookie);
+ ret = nbd_receive_replies(s, cookie, errp);
if (ret < 0) {
- error_setg(errp, "Connection closed");
+ error_prepend(errp, "Connection closed: ");
return -EIO;
}
assert(s->ioc);
static int coroutine_fn
nbd_co_receive_blockstatus_reply(BDRVNBDState *s, uint64_t cookie,
- uint64_t length, NBDExtent32 *extent,
+ uint64_t length, NBDExtent64 *extent,
int *request_ret, Error **errp)
{
NBDReplyChunkIter iter;
NBD_FOREACH_REPLY_CHUNK(s, iter, cookie, false, NULL, &reply, &payload) {
int ret;
NBDStructuredReplyChunk *chunk = &reply.structured;
+ bool wide;
assert(nbd_reply_is_structured(&reply));
switch (chunk->type) {
+ case NBD_REPLY_TYPE_BLOCK_STATUS_EXT:
case NBD_REPLY_TYPE_BLOCK_STATUS:
+ wide = chunk->type == NBD_REPLY_TYPE_BLOCK_STATUS_EXT;
+ if ((s->info.mode >= NBD_MODE_EXTENDED) != wide) {
+ trace_nbd_extended_headers_compliance("block_status");
+ }
if (received) {
nbd_channel_error(s, -EINVAL);
error_setg(&local_err, "Several BLOCK_STATUS chunks in reply");
}
received = true;
- ret = nbd_parse_blockstatus_payload(s, &reply.structured,
- payload, length, extent,
- &local_err);
+ ret = nbd_parse_blockstatus_payload(
+ s, &reply.structured, payload, wide,
+ length, extent, &local_err);
if (ret < 0) {
nbd_channel_error(s, ret);
nbd_iter_channel_error(&iter, ret, &local_err);
int64_t *pnum, int64_t *map, BlockDriverState **file)
{
int ret, request_ret;
- NBDExtent32 extent = { 0 };
+ NBDExtent64 extent = { 0 };
BDRVNBDState *s = (BDRVNBDState *)bs->opaque;
Error *local_err = NULL;
if (fixedNewStyle) {
int result = 0;
+ if (max_mode >= NBD_MODE_EXTENDED) {
+ result = nbd_request_simple_option(ioc,
+ NBD_OPT_EXTENDED_HEADERS,
+ false, errp);
+ if (result) {
+ return result < 0 ? -EINVAL : NBD_MODE_EXTENDED;
+ }
+ }
if (max_mode >= NBD_MODE_STRUCTURED) {
result = nbd_request_simple_option(ioc,
NBD_OPT_STRUCTURED_REPLY,
false, errp);
- if (result < 0) {
- return -EINVAL;
+ if (result) {
+ return result < 0 ? -EINVAL : NBD_MODE_STRUCTURED;
}
}
- return result ? NBD_MODE_STRUCTURED : NBD_MODE_SIMPLE;
+ return NBD_MODE_SIMPLE;
} else {
return NBD_MODE_EXPORT_NAME;
}
}
switch (info->mode) {
+ case NBD_MODE_EXTENDED:
case NBD_MODE_STRUCTURED:
if (base_allocation) {
result = nbd_negotiate_simple_meta_context(ioc, info, errp);
*info = NULL;
result = nbd_start_negotiate(ioc, tlscreds, hostname, &sioc,
- NBD_MODE_STRUCTURED, NULL, errp);
+ NBD_MODE_EXTENDED, NULL, errp);
if (tlscreds && sioc) {
ioc = sioc;
}
switch ((NBDMode)result) {
case NBD_MODE_SIMPLE:
case NBD_MODE_STRUCTURED:
+ case NBD_MODE_EXTENDED:
/* newstyle - use NBD_OPT_LIST to populate array, then try
* NBD_OPT_INFO on each array member. If structured replies
* are enabled, also try NBD_OPT_LIST_META_CONTEXT. */
break;
}
- if (result == NBD_MODE_STRUCTURED &&
+ if (result >= NBD_MODE_STRUCTURED &&
nbd_list_meta_contexts(ioc, &array[i], errp) < 0) {
goto out;
}
int nbd_send_request(QIOChannel *ioc, NBDRequest *request)
{
- uint8_t buf[NBD_REQUEST_SIZE];
+ uint8_t buf[NBD_EXTENDED_REQUEST_SIZE];
+ size_t len;
- assert(request->mode <= NBD_MODE_STRUCTURED); /* TODO handle extended */
- assert(request->len <= UINT32_MAX);
trace_nbd_send_request(request->from, request->len, request->cookie,
request->flags, request->type,
nbd_cmd_lookup(request->type));
- stl_be_p(buf, NBD_REQUEST_MAGIC);
stw_be_p(buf + 4, request->flags);
stw_be_p(buf + 6, request->type);
stq_be_p(buf + 8, request->cookie);
stq_be_p(buf + 16, request->from);
- stl_be_p(buf + 24, request->len);
+ if (request->mode >= NBD_MODE_EXTENDED) {
+ stl_be_p(buf, NBD_EXTENDED_REQUEST_MAGIC);
+ stq_be_p(buf + 24, request->len);
+ len = NBD_EXTENDED_REQUEST_SIZE;
+ } else {
+ assert(request->len <= UINT32_MAX);
+ stl_be_p(buf, NBD_REQUEST_MAGIC);
+ stl_be_p(buf + 24, request->len);
+ len = NBD_REQUEST_SIZE;
+ }
- return nbd_write(ioc, buf, sizeof(buf), NULL);
+ return nbd_write(ioc, buf, len, NULL);
}
/* nbd_receive_simple_reply
return 0;
}
-/* nbd_receive_structured_reply_chunk
+/* nbd_receive_reply_chunk_header
* Read structured reply chunk except magic field (which should be already
- * read).
+ * read). Normalize into the compact form.
* Payload is not read.
*/
-static int nbd_receive_structured_reply_chunk(QIOChannel *ioc,
- NBDStructuredReplyChunk *chunk,
- Error **errp)
+static int nbd_receive_reply_chunk_header(QIOChannel *ioc, NBDReply *chunk,
+ Error **errp)
{
int ret;
+ size_t len;
+ uint64_t payload_len;
- assert(chunk->magic == NBD_STRUCTURED_REPLY_MAGIC);
+ if (chunk->magic == NBD_STRUCTURED_REPLY_MAGIC) {
+ len = sizeof(chunk->structured);
+ } else {
+ assert(chunk->magic == NBD_EXTENDED_REPLY_MAGIC);
+ len = sizeof(chunk->extended);
+ }
ret = nbd_read(ioc, (uint8_t *)chunk + sizeof(chunk->magic),
- sizeof(*chunk) - sizeof(chunk->magic), "structured chunk",
+ len - sizeof(chunk->magic), "structured chunk",
errp);
if (ret < 0) {
return ret;
}
- chunk->flags = be16_to_cpu(chunk->flags);
- chunk->type = be16_to_cpu(chunk->type);
- chunk->cookie = be64_to_cpu(chunk->cookie);
- chunk->length = be32_to_cpu(chunk->length);
+ /* flags, type, and cookie occupy same space between forms */
+ chunk->structured.flags = be16_to_cpu(chunk->structured.flags);
+ chunk->structured.type = be16_to_cpu(chunk->structured.type);
+ chunk->structured.cookie = be64_to_cpu(chunk->structured.cookie);
/*
* Because we use BLOCK_STATUS with REQ_ONE, and cap READ requests
* this. Even if we stopped using REQ_ONE, sane servers will cap
* the number of extents they return for block status.
*/
- if (chunk->length > NBD_MAX_BUFFER_SIZE + sizeof(NBDStructuredReadData)) {
+ if (chunk->magic == NBD_STRUCTURED_REPLY_MAGIC) {
+ payload_len = be32_to_cpu(chunk->structured.length);
+ } else {
+ /* For now, we are ignoring the extended header offset. */
+ payload_len = be64_to_cpu(chunk->extended.length);
+ chunk->magic = NBD_STRUCTURED_REPLY_MAGIC;
+ }
+ if (payload_len > NBD_MAX_BUFFER_SIZE + sizeof(NBDStructuredReadData)) {
error_setg(errp, "server chunk %" PRIu32 " (%s) payload is too long",
- chunk->type, nbd_rep_lookup(chunk->type));
+ chunk->structured.type,
+ nbd_rep_lookup(chunk->structured.type));
return -EINVAL;
}
+ chunk->structured.length = payload_len;
return 0;
}
/* nbd_receive_reply
*
- * Decreases bs->in_flight while waiting for a new reply. This yield is where
- * we wait indefinitely and the coroutine must be able to be safely reentered
- * for nbd_client_attach_aio_context().
+ * Wait for a new reply. If this yields, the coroutine must be able to be
+ * safely reentered for nbd_client_attach_aio_context(). @mode determines
+ * which reply magic we are expecting, although this normalizes the result
+ * so that the caller only has to work with compact headers.
*
* Returns 1 on success
- * 0 on eof, when no data was read (errp is not set)
- * negative errno on failure (errp is set)
+ * 0 on eof, when no data was read
+ * negative errno on failure
*/
int coroutine_fn nbd_receive_reply(BlockDriverState *bs, QIOChannel *ioc,
- NBDReply *reply, Error **errp)
+ NBDReply *reply, NBDMode mode, Error **errp)
{
int ret;
const char *type;
+ uint32_t expected;
ret = nbd_read_eof(bs, ioc, &reply->magic, sizeof(reply->magic), errp);
if (ret <= 0) {
reply->magic = be32_to_cpu(reply->magic);
+ /* Diagnose but accept wrong-width header */
switch (reply->magic) {
case NBD_SIMPLE_REPLY_MAGIC:
+ if (mode >= NBD_MODE_EXTENDED) {
+ trace_nbd_receive_wrong_header(reply->magic,
+ nbd_mode_lookup(mode));
+ }
ret = nbd_receive_simple_reply(ioc, &reply->simple, errp);
if (ret < 0) {
- break;
+ return ret;
}
trace_nbd_receive_simple_reply(reply->simple.error,
nbd_err_lookup(reply->simple.error),
reply->cookie);
break;
case NBD_STRUCTURED_REPLY_MAGIC:
- ret = nbd_receive_structured_reply_chunk(ioc, &reply->structured, errp);
+ case NBD_EXTENDED_REPLY_MAGIC:
+ expected = mode >= NBD_MODE_EXTENDED ? NBD_EXTENDED_REPLY_MAGIC
+ : NBD_STRUCTURED_REPLY_MAGIC;
+ if (reply->magic != expected) {
+ trace_nbd_receive_wrong_header(reply->magic,
+ nbd_mode_lookup(mode));
+ }
+ ret = nbd_receive_reply_chunk_header(ioc, reply, errp);
if (ret < 0) {
- break;
+ return ret;
}
type = nbd_reply_type_lookup(reply->structured.type);
- trace_nbd_receive_structured_reply_chunk(reply->structured.flags,
- reply->structured.type, type,
- reply->structured.cookie,
- reply->structured.length);
+ trace_nbd_receive_reply_chunk_header(reply->structured.flags,
+ reply->structured.type, type,
+ reply->structured.cookie,
+ reply->structured.length);
break;
default:
+ trace_nbd_receive_wrong_header(reply->magic, nbd_mode_lookup(mode));
error_setg(errp, "invalid magic (got 0x%" PRIx32 ")", reply->magic);
return -EINVAL;
}
- if (ret < 0) {
- return ret;
- }
return 1;
}
static QTAILQ_HEAD(, NBDExport) exports = QTAILQ_HEAD_INITIALIZER(exports);
-/* NBDExportMetaContexts represents a list of contexts to be exported,
+/*
+ * NBDMetaContexts represents a list of meta contexts in use,
* as selected by NBD_OPT_SET_META_CONTEXT. Also used for
- * NBD_OPT_LIST_META_CONTEXT. */
-typedef struct NBDExportMetaContexts {
- NBDExport *exp;
+ * NBD_OPT_LIST_META_CONTEXT.
+ */
+struct NBDMetaContexts {
+ const NBDExport *exp; /* associated export */
size_t count; /* number of negotiated contexts */
bool base_allocation; /* export base:allocation context (block status) */
bool allocation_depth; /* export qemu:allocation-depth */
* export qemu:dirty-bitmap:<export bitmap name>,
* sized by exp->nr_export_bitmaps
*/
-} NBDExportMetaContexts;
+};
struct NBDClient {
int refcount;
uint32_t check_align; /* If non-zero, check for aligned client requests */
NBDMode mode;
- NBDExportMetaContexts export_meta;
+ NBDMetaContexts contexts; /* Negotiated meta contexts */
uint32_t opt; /* Current option being negotiated */
uint32_t optlen; /* remaining length of data in ioc for the option being
return nbd_negotiate_send_rep(client, NBD_REP_ACK, errp);
}
-static void nbd_check_meta_export(NBDClient *client)
+static void nbd_check_meta_export(NBDClient *client, NBDExport *exp)
{
- if (client->exp != client->export_meta.exp) {
- client->export_meta.count = 0;
+ if (exp != client->contexts.exp) {
+ client->contexts.count = 0;
}
}
[10 .. 133] reserved (0) [unless no_zeroes]
*/
trace_nbd_negotiate_handle_export_name();
+ if (client->mode >= NBD_MODE_EXTENDED) {
+ error_setg(errp, "Extended headers already negotiated");
+ return -EINVAL;
+ }
if (client->optlen > NBD_MAX_STRING_SIZE) {
error_setg(errp, "Bad length received");
return -EINVAL;
error_setg(errp, "export not found");
return -EINVAL;
}
+ nbd_check_meta_export(client, client->exp);
myflags = client->exp->nbdflags;
if (client->mode >= NBD_MODE_STRUCTURED) {
myflags |= NBD_FLAG_SEND_DF;
}
+ if (client->mode >= NBD_MODE_EXTENDED && client->contexts.count) {
+ myflags |= NBD_FLAG_BLOCK_STAT_PAYLOAD;
+ }
trace_nbd_negotiate_new_style_size_flags(client->exp->size, myflags);
stq_be_p(buf, client->exp->size);
stw_be_p(buf + 8, myflags);
QTAILQ_INSERT_TAIL(&client->exp->clients, client, next);
blk_exp_ref(&client->exp->common);
- nbd_check_meta_export(client);
return 0;
}
errp, "export '%s' not present",
sane_name);
}
+ if (client->opt == NBD_OPT_GO) {
+ nbd_check_meta_export(client, exp);
+ }
/* Don't bother sending NBD_INFO_NAME unless client requested it */
if (sendname) {
if (client->mode >= NBD_MODE_STRUCTURED) {
myflags |= NBD_FLAG_SEND_DF;
}
+ if (client->mode >= NBD_MODE_EXTENDED &&
+ (client->contexts.count || client->opt == NBD_OPT_INFO)) {
+ myflags |= NBD_FLAG_BLOCK_STAT_PAYLOAD;
+ }
trace_nbd_negotiate_new_style_size_flags(exp->size, myflags);
stq_be_p(buf, exp->size);
stw_be_p(buf + 8, myflags);
client->check_align = check_align;
QTAILQ_INSERT_TAIL(&client->exp->clients, client, next);
blk_exp_ref(&client->exp->common);
- nbd_check_meta_export(client);
rc = 1;
}
return rc;
* Handle queries to 'base' namespace. For now, only the base:allocation
* context is available. Return true if @query has been handled.
*/
-static bool nbd_meta_base_query(NBDClient *client, NBDExportMetaContexts *meta,
+static bool nbd_meta_base_query(NBDClient *client, NBDMetaContexts *meta,
const char *query)
{
if (!nbd_strshift(&query, "base:")) {
* and qemu:allocation-depth contexts are available. Return true if @query
* has been handled.
*/
-static bool nbd_meta_qemu_query(NBDClient *client, NBDExportMetaContexts *meta,
+static bool nbd_meta_qemu_query(NBDClient *client, NBDMetaContexts *meta,
const char *query)
{
size_t i;
* Return -errno on I/O error, 0 if option was completely handled by
* sending a reply about inconsistent lengths, or 1 on success. */
static int nbd_negotiate_meta_query(NBDClient *client,
- NBDExportMetaContexts *meta, Error **errp)
+ NBDMetaContexts *meta, Error **errp)
{
int ret;
g_autofree char *query = NULL;
* Handle NBD_OPT_LIST_META_CONTEXT and NBD_OPT_SET_META_CONTEXT
*
* Return -errno on I/O error, or 0 if option was completely handled. */
-static int nbd_negotiate_meta_queries(NBDClient *client,
- NBDExportMetaContexts *meta, Error **errp)
+static int nbd_negotiate_meta_queries(NBDClient *client, Error **errp)
{
int ret;
g_autofree char *export_name = NULL;
/* Mark unused to work around https://bugs.llvm.org/show_bug.cgi?id=3888 */
g_autofree G_GNUC_UNUSED bool *bitmaps = NULL;
- NBDExportMetaContexts local_meta = {0};
+ NBDMetaContexts local_meta = {0};
+ NBDMetaContexts *meta;
uint32_t nb_queries;
size_t i;
size_t count = 0;
if (client->opt == NBD_OPT_LIST_META_CONTEXT) {
/* Only change the caller's meta on SET. */
meta = &local_meta;
+ } else {
+ meta = &client->contexts;
}
g_free(meta->bitmaps);
case NBD_OPT_STRUCTURED_REPLY:
if (length) {
ret = nbd_reject_length(client, false, errp);
+ } else if (client->mode >= NBD_MODE_EXTENDED) {
+ ret = nbd_negotiate_send_rep_err(
+ client, NBD_REP_ERR_EXT_HEADER_REQD, errp,
+ "extended headers already negotiated");
} else if (client->mode >= NBD_MODE_STRUCTURED) {
ret = nbd_negotiate_send_rep_err(
client, NBD_REP_ERR_INVALID, errp,
case NBD_OPT_LIST_META_CONTEXT:
case NBD_OPT_SET_META_CONTEXT:
- ret = nbd_negotiate_meta_queries(client, &client->export_meta,
- errp);
+ ret = nbd_negotiate_meta_queries(client, errp);
+ break;
+
+ case NBD_OPT_EXTENDED_HEADERS:
+ if (length) {
+ ret = nbd_reject_length(client, false, errp);
+ } else if (client->mode >= NBD_MODE_EXTENDED) {
+ ret = nbd_negotiate_send_rep_err(
+ client, NBD_REP_ERR_INVALID, errp,
+ "extended headers already negotiated");
+ } else {
+ ret = nbd_negotiate_send_rep(client, NBD_REP_ACK, errp);
+ client->mode = NBD_MODE_EXTENDED;
+ }
break;
default:
static int coroutine_fn nbd_receive_request(NBDClient *client, NBDRequest *request,
Error **errp)
{
- uint8_t buf[NBD_REQUEST_SIZE];
- uint32_t magic;
+ uint8_t buf[NBD_EXTENDED_REQUEST_SIZE];
+ uint32_t magic, expect;
int ret;
+ size_t size = client->mode >= NBD_MODE_EXTENDED ?
+ NBD_EXTENDED_REQUEST_SIZE : NBD_REQUEST_SIZE;
- ret = nbd_read_eof(client, buf, sizeof(buf), errp);
+ ret = nbd_read_eof(client, buf, size, errp);
if (ret < 0) {
return ret;
}
return -EIO;
}
- /* Request
- [ 0 .. 3] magic (NBD_REQUEST_MAGIC)
- [ 4 .. 5] flags (NBD_CMD_FLAG_FUA, ...)
- [ 6 .. 7] type (NBD_CMD_READ, ...)
- [ 8 .. 15] cookie
- [16 .. 23] from
- [24 .. 27] len
+ /*
+ * Compact request
+ * [ 0 .. 3] magic (NBD_REQUEST_MAGIC)
+ * [ 4 .. 5] flags (NBD_CMD_FLAG_FUA, ...)
+ * [ 6 .. 7] type (NBD_CMD_READ, ...)
+ * [ 8 .. 15] cookie
+ * [16 .. 23] from
+ * [24 .. 27] len
+ * Extended request
+ * [ 0 .. 3] magic (NBD_EXTENDED_REQUEST_MAGIC)
+ * [ 4 .. 5] flags (NBD_CMD_FLAG_FUA, NBD_CMD_FLAG_PAYLOAD_LEN, ...)
+ * [ 6 .. 7] type (NBD_CMD_READ, ...)
+ * [ 8 .. 15] cookie
+ * [16 .. 23] from
+ * [24 .. 31] len
*/
magic = ldl_be_p(buf);
request->type = lduw_be_p(buf + 6);
request->cookie = ldq_be_p(buf + 8);
request->from = ldq_be_p(buf + 16);
- request->len = (uint32_t)ldl_be_p(buf + 24); /* widen 32 to 64 bits */
+ if (client->mode >= NBD_MODE_EXTENDED) {
+ request->len = ldq_be_p(buf + 24);
+ expect = NBD_EXTENDED_REQUEST_MAGIC;
+ } else {
+ request->len = (uint32_t)ldl_be_p(buf + 24); /* widen 32 to 64 bits */
+ expect = NBD_REQUEST_MAGIC;
+ }
trace_nbd_receive_request(magic, request->flags, request->type,
request->from, request->len);
- if (magic != NBD_REQUEST_MAGIC) {
- error_setg(errp, "invalid magic (got 0x%" PRIx32 ")", magic);
+ if (magic != expect) {
+ error_setg(errp, "invalid magic (got 0x%" PRIx32 ", expected 0x%"
+ PRIx32 ")", magic, expect);
return -EINVAL;
}
return 0;
QTAILQ_REMOVE(&client->exp->clients, client, next);
blk_exp_unref(&client->exp->common);
}
- g_free(client->export_meta.bitmaps);
+ g_free(client->contexts.bitmaps);
g_free(client);
}
}
size_t niov, uint16_t flags, uint16_t type,
NBDRequest *request)
{
- /* TODO - handle structured vs. extended replies */
- NBDStructuredReplyChunk *chunk = iov->iov_base;
size_t i, length = 0;
for (i = 1; i < niov; i++) {
}
assert(length <= NBD_MAX_BUFFER_SIZE + sizeof(NBDStructuredReadData));
- iov[0].iov_len = sizeof(*chunk);
- stl_be_p(&chunk->magic, NBD_STRUCTURED_REPLY_MAGIC);
- stw_be_p(&chunk->flags, flags);
- stw_be_p(&chunk->type, type);
- stq_be_p(&chunk->cookie, request->cookie);
- stl_be_p(&chunk->length, length);
+ if (client->mode >= NBD_MODE_EXTENDED) {
+ NBDExtendedReplyChunk *chunk = iov->iov_base;
+
+ iov[0].iov_len = sizeof(*chunk);
+ stl_be_p(&chunk->magic, NBD_EXTENDED_REPLY_MAGIC);
+ stw_be_p(&chunk->flags, flags);
+ stw_be_p(&chunk->type, type);
+ stq_be_p(&chunk->cookie, request->cookie);
+ stq_be_p(&chunk->offset, request->from);
+ stq_be_p(&chunk->length, length);
+ } else {
+ NBDStructuredReplyChunk *chunk = iov->iov_base;
+
+ iov[0].iov_len = sizeof(*chunk);
+ stl_be_p(&chunk->magic, NBD_STRUCTURED_REPLY_MAGIC);
+ stw_be_p(&chunk->flags, flags);
+ stw_be_p(&chunk->type, type);
+ stq_be_p(&chunk->cookie, request->cookie);
+ stl_be_p(&chunk->length, length);
+ }
}
static int coroutine_fn nbd_co_send_chunk_done(NBDClient *client,
}
typedef struct NBDExtentArray {
- NBDExtent32 *extents;
+ NBDExtent64 *extents;
unsigned int nb_alloc;
unsigned int count;
uint64_t total_length;
+ bool extended;
bool can_add;
bool converted_to_be;
} NBDExtentArray;
-static NBDExtentArray *nbd_extent_array_new(unsigned int nb_alloc)
+static NBDExtentArray *nbd_extent_array_new(unsigned int nb_alloc,
+ NBDMode mode)
{
NBDExtentArray *ea = g_new0(NBDExtentArray, 1);
+ assert(mode >= NBD_MODE_STRUCTURED);
ea->nb_alloc = nb_alloc;
- ea->extents = g_new(NBDExtent32, nb_alloc);
+ ea->extents = g_new(NBDExtent64, nb_alloc);
+ ea->extended = mode >= NBD_MODE_EXTENDED;
ea->can_add = true;
return ea;
int i;
assert(!ea->converted_to_be);
+ assert(ea->extended);
ea->can_add = false;
ea->converted_to_be = true;
for (i = 0; i < ea->count; i++) {
- ea->extents[i].flags = cpu_to_be32(ea->extents[i].flags);
- ea->extents[i].length = cpu_to_be32(ea->extents[i].length);
+ ea->extents[i].length = cpu_to_be64(ea->extents[i].length);
+ ea->extents[i].flags = cpu_to_be64(ea->extents[i].flags);
}
}
+/* Further modifications of the array after conversion are abandoned */
+static NBDExtent32 *nbd_extent_array_convert_to_narrow(NBDExtentArray *ea)
+{
+ int i;
+ NBDExtent32 *extents = g_new(NBDExtent32, ea->count);
+
+ assert(!ea->converted_to_be);
+ assert(!ea->extended);
+ ea->can_add = false;
+ ea->converted_to_be = true;
+
+ for (i = 0; i < ea->count; i++) {
+ assert((ea->extents[i].length | ea->extents[i].flags) <= UINT32_MAX);
+ extents[i].length = cpu_to_be32(ea->extents[i].length);
+ extents[i].flags = cpu_to_be32(ea->extents[i].flags);
+ }
+
+ return extents;
+}
+
/*
* Add extent to NBDExtentArray. If extent can't be added (no available space),
* return -1.
* would result in an incorrect range reported to the client)
*/
static int nbd_extent_array_add(NBDExtentArray *ea,
- uint32_t length, uint32_t flags)
+ uint64_t length, uint32_t flags)
{
assert(ea->can_add);
if (!length) {
return 0;
}
+ if (!ea->extended) {
+ assert(length <= UINT32_MAX);
+ }
/* Extend previous extent if flags are the same */
if (ea->count > 0 && flags == ea->extents[ea->count - 1].flags) {
- uint64_t sum = (uint64_t)length + ea->extents[ea->count - 1].length;
+ uint64_t sum = length + ea->extents[ea->count - 1].length;
- if (sum <= UINT32_MAX) {
+ /*
+ * sum cannot overflow: the block layer bounds image size at
+ * 2^63, and ea->extents[].length comes from the block layer.
+ */
+ assert(sum >= length);
+ if (sum <= UINT32_MAX || ea->extended) {
ea->extents[ea->count - 1].length = sum;
ea->total_length += length;
return 0;
}
ea->total_length += length;
- ea->extents[ea->count] = (NBDExtent32) {.length = length, .flags = flags};
+ ea->extents[ea->count] = (NBDExtent64) {.length = length, .flags = flags};
ea->count++;
return 0;
bool last, uint32_t context_id, Error **errp)
{
NBDReply hdr;
- NBDStructuredMeta chunk;
- struct iovec iov[] = {
- {.iov_base = &hdr},
- {.iov_base = &chunk, .iov_len = sizeof(chunk)},
- {.iov_base = ea->extents, .iov_len = ea->count * sizeof(ea->extents[0])}
- };
+ NBDStructuredMeta meta;
+ NBDExtendedMeta meta_ext;
+ g_autofree NBDExtent32 *extents = NULL;
+ uint16_t type;
+ struct iovec iov[] = { {.iov_base = &hdr}, {0}, {0} };
+
+ if (client->mode >= NBD_MODE_EXTENDED) {
+ type = NBD_REPLY_TYPE_BLOCK_STATUS_EXT;
+
+ iov[1].iov_base = &meta_ext;
+ iov[1].iov_len = sizeof(meta_ext);
+ stl_be_p(&meta_ext.context_id, context_id);
+ stl_be_p(&meta_ext.count, ea->count);
+
+ nbd_extent_array_convert_to_be(ea);
+ iov[2].iov_base = ea->extents;
+ iov[2].iov_len = ea->count * sizeof(ea->extents[0]);
+ } else {
+ type = NBD_REPLY_TYPE_BLOCK_STATUS;
+
+ iov[1].iov_base = &meta;
+ iov[1].iov_len = sizeof(meta);
+ stl_be_p(&meta.context_id, context_id);
- nbd_extent_array_convert_to_be(ea);
+ extents = nbd_extent_array_convert_to_narrow(ea);
+ iov[2].iov_base = extents;
+ iov[2].iov_len = ea->count * sizeof(extents[0]);
+ }
trace_nbd_co_send_extents(request->cookie, ea->count, context_id,
ea->total_length, last);
- set_be_chunk(client, iov, 3, last ? NBD_REPLY_FLAG_DONE : 0,
- NBD_REPLY_TYPE_BLOCK_STATUS, request);
- stl_be_p(&chunk.context_id, context_id);
+ set_be_chunk(client, iov, 3, last ? NBD_REPLY_FLAG_DONE : 0, type,
+ request);
return nbd_co_send_iov(client, iov, 3, errp);
}
static int
coroutine_fn nbd_co_send_block_status(NBDClient *client, NBDRequest *request,
BlockBackend *blk, uint64_t offset,
- uint32_t length, bool dont_fragment,
+ uint64_t length, bool dont_fragment,
bool last, uint32_t context_id,
Error **errp)
{
int ret;
unsigned int nb_extents = dont_fragment ? 1 : NBD_MAX_BLOCK_STATUS_EXTENTS;
- g_autoptr(NBDExtentArray) ea = nbd_extent_array_new(nb_extents);
+ g_autoptr(NBDExtentArray) ea =
+ nbd_extent_array_new(nb_extents, client->mode);
if (context_id == NBD_META_ID_BASE_ALLOCATION) {
ret = blockstatus_to_extents(blk, offset, length, ea);
int64_t start, dirty_start, dirty_count;
int64_t end = offset + length;
bool full = false;
+ int64_t bound = es->extended ? INT64_MAX : INT32_MAX;
bdrv_dirty_bitmap_lock(bitmap);
for (start = offset;
- bdrv_dirty_bitmap_next_dirty_area(bitmap, start, end, INT32_MAX,
+ bdrv_dirty_bitmap_next_dirty_area(bitmap, start, end, bound,
&dirty_start, &dirty_count);
start = dirty_start + dirty_count)
{
NBDRequest *request,
BdrvDirtyBitmap *bitmap,
uint64_t offset,
- uint32_t length, bool dont_fragment,
+ uint64_t length, bool dont_fragment,
bool last, uint32_t context_id,
Error **errp)
{
unsigned int nb_extents = dont_fragment ? 1 : NBD_MAX_BLOCK_STATUS_EXTENTS;
- g_autoptr(NBDExtentArray) ea = nbd_extent_array_new(nb_extents);
+ g_autoptr(NBDExtentArray) ea =
+ nbd_extent_array_new(nb_extents, client->mode);
bitmap_to_extents(bitmap, offset, length, ea);
return nbd_co_send_extents(client, request, ea, last, context_id, errp);
}
+/*
+ * nbd_co_block_status_payload_read
+ * Called when a client wants a subset of negotiated contexts via a
+ * BLOCK_STATUS payload. Check the payload for valid length and
+ * contents. On success, return 0 with request updated to effective
+ * length. If request was invalid but all payload consumed, return 0
+ * with request->len and request->contexts->count set to 0 (which will
+ * trigger an appropriate NBD_EINVAL response later on). Return
+ * negative errno if the payload was not fully consumed.
+ */
+static int
+nbd_co_block_status_payload_read(NBDClient *client, NBDRequest *request,
+ Error **errp)
+{
+ uint64_t payload_len = request->len;
+ g_autofree char *buf = NULL;
+ size_t count, i, nr_bitmaps;
+ uint32_t id;
+
+ if (payload_len > NBD_MAX_BUFFER_SIZE) {
+ error_setg(errp, "len (%" PRIu64 ") is larger than max len (%u)",
+ request->len, NBD_MAX_BUFFER_SIZE);
+ return -EINVAL;
+ }
+
+ assert(client->contexts.exp == client->exp);
+ nr_bitmaps = client->exp->nr_export_bitmaps;
+ request->contexts = g_new0(NBDMetaContexts, 1);
+ request->contexts->exp = client->exp;
+
+ if (payload_len % sizeof(uint32_t) ||
+ payload_len < sizeof(NBDBlockStatusPayload) ||
+ payload_len > (sizeof(NBDBlockStatusPayload) +
+ sizeof(id) * client->contexts.count)) {
+ goto skip;
+ }
+
+ buf = g_malloc(payload_len);
+ if (nbd_read(client->ioc, buf, payload_len,
+ "CMD_BLOCK_STATUS data", errp) < 0) {
+ return -EIO;
+ }
+ trace_nbd_co_receive_request_payload_received(request->cookie,
+ payload_len);
+ request->contexts->bitmaps = g_new0(bool, nr_bitmaps);
+ count = (payload_len - sizeof(NBDBlockStatusPayload)) / sizeof(id);
+ payload_len = 0;
+
+ for (i = 0; i < count; i++) {
+ id = ldl_be_p(buf + sizeof(NBDBlockStatusPayload) + sizeof(id) * i);
+ if (id == NBD_META_ID_BASE_ALLOCATION) {
+ if (!client->contexts.base_allocation ||
+ request->contexts->base_allocation) {
+ goto skip;
+ }
+ request->contexts->base_allocation = true;
+ } else if (id == NBD_META_ID_ALLOCATION_DEPTH) {
+ if (!client->contexts.allocation_depth ||
+ request->contexts->allocation_depth) {
+ goto skip;
+ }
+ request->contexts->allocation_depth = true;
+ } else {
+ unsigned idx = id - NBD_META_ID_DIRTY_BITMAP;
+
+ if (idx >= nr_bitmaps || !client->contexts.bitmaps[idx] ||
+ request->contexts->bitmaps[idx]) {
+ goto skip;
+ }
+ request->contexts->bitmaps[idx] = true;
+ }
+ }
+
+ request->len = ldq_be_p(buf);
+ request->contexts->count = count;
+ return 0;
+
+ skip:
+ trace_nbd_co_receive_block_status_payload_compliance(request->from,
+ request->len);
+ request->len = request->contexts->count = 0;
+ return nbd_drop(client->ioc, payload_len, errp);
+}
+
/* nbd_co_receive_request
* Collect a client request. Return 0 if request looks valid, -EIO to drop
* connection right away, -EAGAIN to indicate we were interrupted and the
Error **errp)
{
NBDClient *client = req->client;
+ bool extended_with_payload;
bool check_length = false;
bool check_rofs = false;
bool allocate_buffer = false;
- unsigned payload_len = 0;
+ bool payload_okay = false;
+ uint64_t payload_len = 0;
int valid_flags = NBD_CMD_FLAG_FUA;
int ret;
trace_nbd_co_receive_request_decode_type(request->cookie, request->type,
nbd_cmd_lookup(request->type));
+ extended_with_payload = client->mode >= NBD_MODE_EXTENDED &&
+ request->flags & NBD_CMD_FLAG_PAYLOAD_LEN;
+ if (extended_with_payload) {
+ payload_len = request->len;
+ check_length = true;
+ }
+
switch (request->type) {
case NBD_CMD_DISC:
/* Special case: we're going to disconnect without a reply,
break;
case NBD_CMD_WRITE:
+ if (client->mode >= NBD_MODE_EXTENDED) {
+ if (!extended_with_payload) {
+ /* The client is noncompliant. Trace it, but proceed. */
+ trace_nbd_co_receive_ext_payload_compliance(request->from,
+ request->len);
+ }
+ valid_flags |= NBD_CMD_FLAG_PAYLOAD_LEN;
+ }
+ payload_okay = true;
payload_len = request->len;
check_length = true;
allocate_buffer = true;
break;
case NBD_CMD_BLOCK_STATUS:
+ if (extended_with_payload) {
+ ret = nbd_co_block_status_payload_read(client, request, errp);
+ if (ret < 0) {
+ return ret;
+ }
+ /* payload now consumed */
+ check_length = false;
+ payload_len = 0;
+ valid_flags |= NBD_CMD_FLAG_PAYLOAD_LEN;
+ } else {
+ request->contexts = &client->contexts;
+ }
valid_flags |= NBD_CMD_FLAG_REQ_ONE;
break;
request->len, NBD_MAX_BUFFER_SIZE);
return -EINVAL;
}
+ if (payload_len && !payload_okay) {
+ /*
+ * For now, we don't support payloads on other commands; but
+ * we can keep the connection alive by ignoring the payload.
+ * We will fail the command later with NBD_EINVAL for the use
+ * of an unsupported flag (and not for access beyond bounds).
+ */
+ assert(request->type != NBD_CMD_WRITE);
+ request->len = 0;
+ }
if (allocate_buffer) {
/* READ, WRITE */
req->data = blk_try_blockalign(client->exp->common.blk,
}
}
if (payload_len) {
- /* WRITE */
- assert(req->data);
- ret = nbd_read(client->ioc, req->data, payload_len,
- "CMD_WRITE data", errp);
+ if (payload_okay) {
+ /* WRITE */
+ assert(req->data);
+ ret = nbd_read(client->ioc, req->data, payload_len,
+ "CMD_WRITE data", errp);
+ } else {
+ ret = nbd_drop(client->ioc, payload_len, errp);
+ }
if (ret < 0) {
return -EIO;
}
{
if (client->mode >= NBD_MODE_STRUCTURED && ret < 0) {
return nbd_co_send_chunk_error(client, request, -ret, error_msg, errp);
+ } else if (client->mode >= NBD_MODE_EXTENDED) {
+ return nbd_co_send_chunk_done(client, request, errp);
} else {
return nbd_co_send_simple_reply(client, request, ret < 0 ? -ret : 0,
NULL, 0, errp);
"discard failed", errp);
case NBD_CMD_BLOCK_STATUS:
- if (!request->len) {
- return nbd_send_generic_reply(client, request, -EINVAL,
- "need non-zero length", errp);
- }
- assert(request->len <= UINT32_MAX);
- if (client->export_meta.count) {
+ assert(request->contexts);
+ assert(client->mode >= NBD_MODE_EXTENDED ||
+ request->len <= UINT32_MAX);
+ if (request->contexts->count) {
bool dont_fragment = request->flags & NBD_CMD_FLAG_REQ_ONE;
- int contexts_remaining = client->export_meta.count;
+ int contexts_remaining = request->contexts->count;
- if (client->export_meta.base_allocation) {
+ if (!request->len) {
+ return nbd_send_generic_reply(client, request, -EINVAL,
+ "need non-zero length", errp);
+ }
+ if (request->contexts->base_allocation) {
ret = nbd_co_send_block_status(client, request,
exp->common.blk,
request->from,
}
}
- if (client->export_meta.allocation_depth) {
+ if (request->contexts->allocation_depth) {
ret = nbd_co_send_block_status(client, request,
exp->common.blk,
request->from, request->len,
}
}
+ assert(request->contexts->exp == client->exp);
for (i = 0; i < client->exp->nr_export_bitmaps; i++) {
- if (!client->export_meta.bitmaps[i]) {
+ if (!request->contexts->bitmaps[i]) {
continue;
}
ret = nbd_co_send_bitmap(client, request,
assert(!contexts_remaining);
return 0;
+ } else if (client->contexts.count) {
+ return nbd_send_generic_reply(client, request, -EINVAL,
+ "CMD_BLOCK_STATUS payload not valid",
+ errp);
} else {
return nbd_send_generic_reply(client, request, -EINVAL,
"CMD_BLOCK_STATUS not negotiated",
} else {
ret = nbd_handle_request(client, &request, req->data, &local_err);
}
+ if (request.contexts && request.contexts != &client->contexts) {
+ assert(request.type == NBD_CMD_BLOCK_STATUS);
+ g_free(request.contexts->bitmaps);
+ g_free(request.contexts);
+ }
if (ret < 0) {
error_prepend(&local_err, "Failed to send reply: ");
goto disconnect;
}
- /* We must disconnect after NBD_CMD_WRITE if we did not
- * read the payload.
+ /*
+ * We must disconnect after NBD_CMD_WRITE or BLOCK_STATUS with
+ * payload if we did not read the payload.
*/
if (!req->complete) {
error_setg(&local_err, "Request handling failed in intermediate state");