On macOS we need to increase unix socket buffers size on the client and
server to get good performance. We set socket buffers on macOS after
connecting or accepting a client connection.
Testing shows that setting socket receive buffer size (SO_RCVBUF) has no
effect on performance, so we set only the send buffer size (SO_SNDBUF).
It seems to work like Linux but not documented.
Testing shows that optimal buffer size is 512k to 4 MiB, depending on
the test case. The difference is very small, so I chose 2 MiB.
I tested reading from qemu-nbd and writing to qemu-nbd with qemu-img and
computing a blkhash with nbdcopy and blksum.
To focus on NBD communication and get less noisy results, I tested
reading and writing to null-co driver. I added a read-pattern option to
the null-co driver to return data full of 0xff:
NULL="json:{'driver': 'raw', 'file': {'driver': 'null-co', 'size': '10g', 'read-pattern': 255}}"
For testing buffer size I added an environment variable for setting the
socket buffer size.
Read from qemu-nbd via qemu-img convert. In this test buffer size of 2m
is optimal (12.6 times faster).
qemu-nbd -r -t -e 0 -f raw -k /tmp/nbd.sock "$NULL" &
qemu-img convert -f raw -O raw -W -n "nbd+unix:///?socket=/tmp/nbd.sock" "$NULL"
| buffer size | time | user | system |
|-------------|---------|---------|---------|
| default | 13.361 | 2.653 | 5.702 |
| 65536 | 2.283 | 0.204 | 1.318 |
| 131072 | 1.673 | 0.062 | 1.008 |
| 262144 | 1.592 | 0.053 | 0.952 |
| 524288 | 1.496 | 0.049 | 0.887 |
|
1048576 | 1.234 | 0.047 | 0.738 |
|
2097152 | 1.060 | 0.080 | 0.602 |
|
4194304 | 1.061 | 0.076 | 0.604 |
Write to qemu-nbd with qemu-img convert. In this test buffer size of 2m
is optimal (9.2 times faster).
qemu-nbd -t -e 0 -f raw -k /tmp/nbd.sock "$NULL" &
qemu-img convert -f raw -O raw -W -n "$NULL" "nbd+unix:///?socket=/tmp/nbd.sock"
| buffer size | time | user | system |
|-------------|---------|---------|---------|
| default | 8.063 | 2.522 | 4.184 |
| 65536 | 1.472 | 0.430 | 0.867 |
| 131072 | 1.071 | 0.297 | 0.654 |
| 262144 | 1.012 | 0.239 | 0.587 |
| 524288 | 0.970 | 0.201 | 0.514 |
|
1048576 | 0.895 | 0.184 | 0.454 |
|
2097152 | 0.877 | 0.174 | 0.440 |
|
4194304 | 0.944 | 0.231 | 0.535 |
Compute a blkhash with nbdcopy, using 4 NBD connections and 256k request
size. In this test buffer size of 4m is optimal (5.1 times faster).
qemu-nbd -r -t -e 0 -f raw -k /tmp/nbd.sock "$NULL" &
nbdcopy --blkhash "nbd+unix:///?socket=/tmp/nbd.sock" null:
| buffer size | time | user | system |
|-------------|---------|---------|---------|
| default | 8.624 | 5.727 | 6.507 |
| 65536 | 2.563 | 4.760 | 2.498 |
| 131072 | 1.903 | 4.559 | 2.093 |
| 262144 | 1.759 | 4.513 | 1.935 |
| 524288 | 1.729 | 4.489 | 1.924 |
|
1048576 | 1.696 | 4.479 | 1.884 |
|
2097152 | 1.710 | 4.480 | 1.763 |
|
4194304 | 1.687 | 4.479 | 1.712 |
Compute a blkhash with blksum, using 1 NBD connection and 256k read
size. In this test buffer size of 512k is optimal (10.3 times faster).
qemu-nbd -r -t -e 0 -f raw -k /tmp/nbd.sock "$NULL" &
blksum "nbd+unix:///?socket=/tmp/nbd.sock"
| buffer size | time | user | system |
|-------------|---------|---------|---------|
| default | 13.085 | 5.664 | 6.461 |
| 65536 | 3.299 | 5.106 | 2.515 |
| 131072 | 2.396 | 4.989 | 2.069 |
| 262144 | 1.607 | 4.724 | 1.555 |
| 524288 | 1.271 | 4.528 | 1.224 |
|
1048576 | 1.294 | 4.565 | 1.333 |
|
2097152 | 1.299 | 4.569 | 1.344 |
|
4194304 | 1.291 | 4.559 | 1.327 |
Signed-off-by: Nir Soffer <nirsof@gmail.com>
Message-ID: <
20250517201154.88456-3-nirsof@gmail.com>
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
Signed-off-by: Eric Blake <eblake@redhat.com>
#include "qapi/clone-visitor.h"
#include "qemu/coroutine.h"
+#include "nbd/nbd-internal.h"
+
struct NBDClientConnection {
/* Initialization constants, never change */
SocketAddress *saddr; /* address to connect to */
return ret;
}
+ nbd_set_socket_send_buffer(sioc);
qio_channel_set_delay(QIO_CHANNEL(sioc), false);
if (!info) {
#include "qemu/osdep.h"
#include "trace.h"
+#include "io/channel-socket.h"
+#include "qapi/error.h"
+#include "qemu/units.h"
#include "nbd-internal.h"
/* Discard length bytes from channel. Return -errno on failure and 0 on
return "<unknown>";
}
}
+
+/*
+ * Testing shows that 2m send buffer is optimal. Changing the receive buffer
+ * size has no effect on performance.
+ */
+#if defined(__APPLE__)
+#define UNIX_STREAM_SOCKET_SEND_BUFFER_SIZE (2 * MiB)
+#endif
+
+void nbd_set_socket_send_buffer(QIOChannelSocket *sioc)
+{
+#ifdef UNIX_STREAM_SOCKET_SEND_BUFFER_SIZE
+ if (sioc->localAddr.ss_family == AF_UNIX) {
+ size_t size = UNIX_STREAM_SOCKET_SEND_BUFFER_SIZE;
+ Error *errp = NULL;
+
+ if (qio_channel_socket_set_send_buffer(sioc, size, &errp) < 0) {
+ warn_report_err(errp);
+ }
+ }
+#endif /* UNIX_STREAM_SOCKET_SEND_BUFFER_SIZE */
+}
int nbd_drop(QIOChannel *ioc, size_t size, Error **errp);
+/* nbd_set_socket_send_buffer
+ * Set the socket send buffer size for optimal performance.
+ */
+void nbd_set_socket_send_buffer(QIOChannelSocket *sioc);
+
#endif
client->close_fn = close_fn;
client->owner = owner;
+ nbd_set_socket_send_buffer(sioc);
+
co = qemu_coroutine_create(nbd_co_client_start, client);
qemu_coroutine_enter(co);
}