F: drivers/crypto/virtio/
VIRTIO DRIVERS FOR S390
-M: Christian Borntraeger <borntraeger@de.ibm.com>
M: Cornelia Huck <cornelia.huck@de.ibm.com>
+M: Halil Pasic <pasic@linux.vnet.ibm.com>
L: linux-s390@vger.kernel.org
L: virtualization@lists.linux-foundation.org
L: kvm@vger.kernel.org
}
/* Discover virtqueues and write information to configuration. */
- err = vdev->config->find_vqs(vdev, num_vqs, vqs, callbacks, names,
- &desc);
+ err = virtio_find_vqs(vdev, num_vqs, vqs, callbacks, names, &desc);
if (err)
goto out;
}
}
/* Find the queues. */
- err = portdev->vdev->config->find_vqs(portdev->vdev, nr_queues, vqs,
- io_callbacks,
- (const char **)io_names, NULL);
+ err = virtio_find_vqs(portdev->vdev, nr_queues, vqs,
+ io_callbacks,
+ (const char **)io_names, NULL);
if (err)
goto free;
names[i] = vi->data_vq[i].name;
}
- ret = vi->vdev->config->find_vqs(vi->vdev, total_vqs, vqs, callbacks,
- names, NULL);
+ ret = virtio_find_vqs(vi->vdev, total_vqs, vqs, callbacks, names, NULL);
if (ret)
goto err_find;
DRM_INFO("virgl 3d acceleration not supported by guest\n");
#endif
- ret = vgdev->vdev->config->find_vqs(vgdev->vdev, 2, vqs,
- callbacks, names, NULL);
+ ret = virtio_find_vqs(vgdev->vdev, 2, vqs, callbacks, names, NULL);
if (ret) {
DRM_ERROR("failed to find virt queues\n");
goto err_vqs;
static struct virtqueue *vop_find_vq(struct virtio_device *dev,
unsigned index,
void (*callback)(struct virtqueue *vq),
- const char *name)
+ const char *name, bool ctx)
{
struct _vop_vdev *vdev = to_vopvdev(dev);
struct vop_device *vpdev = vdev->vpdev;
le16_to_cpu(config.num), MIC_VIRTIO_RING_ALIGN,
dev,
false,
+ ctx,
(void __force *)va, vop_notify, callback, name);
if (!vq) {
err = -ENOMEM;
static int vop_find_vqs(struct virtio_device *dev, unsigned nvqs,
struct virtqueue *vqs[],
vq_callback_t *callbacks[],
- const char * const names[], struct irq_affinity *desc)
+ const char * const names[], const bool *ctx,
+ struct irq_affinity *desc)
{
struct _vop_vdev *vdev = to_vopvdev(dev);
struct vop_device *vpdev = vdev->vpdev;
for (i = 0; i < nvqs; ++i) {
dev_dbg(_vop_dev(vdev), "%s: %d: %s\n",
__func__, i, names[i]);
- vqs[i] = vop_find_vq(dev, i, callbacks[i], names[i]);
+ vqs[i] = vop_find_vq(dev, i, callbacks[i], names[i],
+ ctx ? ctx[i] : false);
if (IS_ERR(vqs[i])) {
err = PTR_ERR(vqs[i]);
goto error;
goto err;
/* Get the TX virtio ring. This is a "guest side vring". */
- err = vdev->config->find_vqs(vdev, 1, &cfv->vq_tx, &vq_cbs, &names,
- NULL);
+ err = virtio_find_vqs(vdev, 1, &cfv->vq_tx, &vq_cbs, &names, NULL);
if (err)
goto err;
#include <linux/slab.h>
#include <linux/cpu.h>
#include <linux/average.h>
+#include <net/route.h>
static int napi_weight = NAPI_POLL_WEIGHT;
module_param(napi_weight, int, 0444);
*/
DECLARE_EWMA(pkt_len, 0, 64)
-/* With mergeable buffers we align buffer address and use the low bits to
- * encode its true size. Buffer size is up to 1 page so we need to align to
- * square root of page size to ensure we reserve enough bits to encode the true
- * size.
- */
-#define MERGEABLE_BUFFER_MIN_ALIGN_SHIFT ((PAGE_SHIFT + 1) / 2)
-
-/* Minimum alignment for mergeable packet buffers. */
-#define MERGEABLE_BUFFER_ALIGN max(L1_CACHE_BYTES, \
- 1 << MERGEABLE_BUFFER_MIN_ALIGN_SHIFT)
-
#define VIRTNET_DRIVER_VERSION "1.0.0"
struct virtnet_stats {
/* RX: fragments + linear part + virtio header */
struct scatterlist sg[MAX_SKB_FRAGS + 2];
+ /* Min single buffer size for mergeable buffers case. */
+ unsigned int min_buf_len;
+
/* Name of this receive queue: input.$index */
char name[40];
};
netif_wake_subqueue(vi->dev, vq2txq(vq));
}
-static unsigned int mergeable_ctx_to_buf_truesize(unsigned long mrg_ctx)
-{
- unsigned int truesize = mrg_ctx & (MERGEABLE_BUFFER_ALIGN - 1);
- return (truesize + 1) * MERGEABLE_BUFFER_ALIGN;
-}
-
-static void *mergeable_ctx_to_buf_address(unsigned long mrg_ctx)
-{
- return (void *)(mrg_ctx & -MERGEABLE_BUFFER_ALIGN);
-
-}
-
-static unsigned long mergeable_buf_to_ctx(void *buf, unsigned int truesize)
-{
- unsigned int size = truesize / MERGEABLE_BUFFER_ALIGN;
- return (unsigned long)buf | (size - 1);
-}
-
/* Called from bottom half context */
static struct sk_buff *page_to_skb(struct virtnet_info *vi,
struct receive_queue *rq,
while (--*num_buf) {
unsigned int buflen;
- unsigned long ctx;
void *buf;
int off;
- ctx = (unsigned long)virtqueue_get_buf(rq->vq, &buflen);
- if (unlikely(!ctx))
+ buf = virtqueue_get_buf(rq->vq, &buflen);
+ if (unlikely(!buf))
goto err_buf;
- buf = mergeable_ctx_to_buf_address(ctx);
p = virt_to_head_page(buf);
off = buf - page_address(p);
static struct sk_buff *receive_mergeable(struct net_device *dev,
struct virtnet_info *vi,
struct receive_queue *rq,
- unsigned long ctx,
+ void *buf,
+ void *ctx,
unsigned int len)
{
- void *buf = mergeable_ctx_to_buf_address(ctx);
struct virtio_net_hdr_mrg_rxbuf *hdr = buf;
u16 num_buf = virtio16_to_cpu(vi->vdev, hdr->num_buffers);
struct page *page = virt_to_head_page(buf);
}
rcu_read_unlock();
- truesize = max(len, mergeable_ctx_to_buf_truesize(ctx));
+ if (unlikely(len > (unsigned long)ctx)) {
+ pr_debug("%s: rx error: len %u exceeds truesize %lu\n",
+ dev->name, len, (unsigned long)ctx);
+ dev->stats.rx_length_errors++;
+ goto err_skb;
+ }
+ truesize = (unsigned long)ctx;
head_skb = page_to_skb(vi, rq, page, offset, len, truesize);
curr_skb = head_skb;
while (--num_buf) {
int num_skb_frags;
- ctx = (unsigned long)virtqueue_get_buf(rq->vq, &len);
+ buf = virtqueue_get_buf_ctx(rq->vq, &len, &ctx);
if (unlikely(!ctx)) {
pr_debug("%s: rx error: %d buffers out of %d missing\n",
dev->name, num_buf,
goto err_buf;
}
- buf = mergeable_ctx_to_buf_address(ctx);
page = virt_to_head_page(buf);
+ if (unlikely(len > (unsigned long)ctx)) {
+ pr_debug("%s: rx error: len %u exceeds truesize %lu\n",
+ dev->name, len, (unsigned long)ctx);
+ dev->stats.rx_length_errors++;
+ goto err_skb;
+ }
+ truesize = (unsigned long)ctx;
num_skb_frags = skb_shinfo(curr_skb)->nr_frags;
if (unlikely(num_skb_frags == MAX_SKB_FRAGS)) {
head_skb->truesize += nskb->truesize;
num_skb_frags = 0;
}
- truesize = max(len, mergeable_ctx_to_buf_truesize(ctx));
if (curr_skb != head_skb) {
head_skb->data_len += len;
head_skb->len += len;
err_skb:
put_page(page);
while (--num_buf) {
- ctx = (unsigned long)virtqueue_get_buf(rq->vq, &len);
- if (unlikely(!ctx)) {
+ buf = virtqueue_get_buf(rq->vq, &len);
+ if (unlikely(!buf)) {
pr_debug("%s: rx error: %d buffers missing\n",
dev->name, num_buf);
dev->stats.rx_length_errors++;
break;
}
- page = virt_to_head_page(mergeable_ctx_to_buf_address(ctx));
+ page = virt_to_head_page(buf);
put_page(page);
}
err_buf:
}
static int receive_buf(struct virtnet_info *vi, struct receive_queue *rq,
- void *buf, unsigned int len)
+ void *buf, unsigned int len, void **ctx)
{
struct net_device *dev = vi->dev;
struct sk_buff *skb;
pr_debug("%s: short packet %i\n", dev->name, len);
dev->stats.rx_length_errors++;
if (vi->mergeable_rx_bufs) {
- unsigned long ctx = (unsigned long)buf;
- void *base = mergeable_ctx_to_buf_address(ctx);
- put_page(virt_to_head_page(base));
+ put_page(virt_to_head_page(buf));
} else if (vi->big_packets) {
give_pages(rq, buf);
} else {
}
if (vi->mergeable_rx_bufs)
- skb = receive_mergeable(dev, vi, rq, (unsigned long)buf, len);
+ skb = receive_mergeable(dev, vi, rq, buf, ctx, len);
else if (vi->big_packets)
skb = receive_big(dev, vi, rq, buf, len);
else
return err;
}
-static unsigned int get_mergeable_buf_len(struct ewma_pkt_len *avg_pkt_len)
+static unsigned int get_mergeable_buf_len(struct receive_queue *rq,
+ struct ewma_pkt_len *avg_pkt_len)
{
const size_t hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf);
unsigned int len;
len = hdr_len + clamp_t(unsigned int, ewma_pkt_len_read(avg_pkt_len),
- GOOD_PACKET_LEN, PAGE_SIZE - hdr_len);
- return ALIGN(len, MERGEABLE_BUFFER_ALIGN);
+ rq->min_buf_len - hdr_len, PAGE_SIZE - hdr_len);
+ return ALIGN(len, L1_CACHE_BYTES);
}
static int add_recvbuf_mergeable(struct virtnet_info *vi,
struct page_frag *alloc_frag = &rq->alloc_frag;
unsigned int headroom = virtnet_get_headroom(vi);
char *buf;
- unsigned long ctx;
+ void *ctx;
int err;
unsigned int len, hole;
- len = get_mergeable_buf_len(&rq->mrg_avg_pkt_len);
+ len = get_mergeable_buf_len(rq, &rq->mrg_avg_pkt_len);
if (unlikely(!skb_page_frag_refill(len + headroom, alloc_frag, gfp)))
return -ENOMEM;
buf = (char *)page_address(alloc_frag->page) + alloc_frag->offset;
buf += headroom; /* advance address leaving hole at front of pkt */
- ctx = mergeable_buf_to_ctx(buf, len);
+ ctx = (void *)(unsigned long)len;
get_page(alloc_frag->page);
alloc_frag->offset += len + headroom;
hole = alloc_frag->size - alloc_frag->offset;
}
sg_init_one(rq->sg, buf, len);
- err = virtqueue_add_inbuf(rq->vq, rq->sg, 1, (void *)ctx, gfp);
+ err = virtqueue_add_inbuf_ctx(rq->vq, rq->sg, 1, buf, ctx, gfp);
if (err < 0)
put_page(virt_to_head_page(buf));
void *buf;
struct virtnet_stats *stats = this_cpu_ptr(vi->stats);
- while (received < budget &&
- (buf = virtqueue_get_buf(rq->vq, &len)) != NULL) {
- bytes += receive_buf(vi, rq, buf, len);
- received++;
+ if (vi->mergeable_rx_bufs) {
+ void *ctx;
+
+ while (received < budget &&
+ (buf = virtqueue_get_buf_ctx(rq->vq, &len, &ctx))) {
+ bytes += receive_buf(vi, rq, buf, len, ctx);
+ received++;
+ }
+ } else {
+ while (received < budget &&
+ (buf = virtqueue_get_buf(rq->vq, &len)) != NULL) {
+ bytes += receive_buf(vi, rq, buf, len, NULL);
+ received++;
+ }
}
if (rq->vq->num_free > virtqueue_get_vring_size(rq->vq) / 2) {
virtnet_freeze_down(dev);
_remove_vq_common(vi);
- dev->config->reset(dev);
virtio_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE);
virtio_add_status(dev, VIRTIO_CONFIG_S_DRIVER);
while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) {
if (vi->mergeable_rx_bufs) {
- unsigned long ctx = (unsigned long)buf;
- void *base = mergeable_ctx_to_buf_address(ctx);
- put_page(virt_to_head_page(base));
+ put_page(virt_to_head_page(buf));
} else if (vi->big_packets) {
give_pages(&vi->rq[i], buf);
} else {
virtnet_free_queues(vi);
}
+/* How large should a single buffer be so a queue full of these can fit at
+ * least one full packet?
+ * Logic below assumes the mergeable buffer header is used.
+ */
+static unsigned int mergeable_min_buf_len(struct virtnet_info *vi, struct virtqueue *vq)
+{
+ const unsigned int hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf);
+ unsigned int rq_size = virtqueue_get_vring_size(vq);
+ unsigned int packet_len = vi->big_packets ? IP_MAX_MTU : vi->dev->max_mtu;
+ unsigned int buf_len = hdr_len + ETH_HLEN + VLAN_HLEN + packet_len;
+ unsigned int min_buf_len = DIV_ROUND_UP(buf_len, rq_size);
+
+ return max(min_buf_len, hdr_len);
+}
+
static int virtnet_find_vqs(struct virtnet_info *vi)
{
vq_callback_t **callbacks;
int ret = -ENOMEM;
int i, total_vqs;
const char **names;
+ bool *ctx;
/* We expect 1 RX virtqueue followed by 1 TX virtqueue, followed by
* possible N-1 RX/TX queue pairs used in multiqueue mode, followed by
names = kmalloc(total_vqs * sizeof(*names), GFP_KERNEL);
if (!names)
goto err_names;
+ if (vi->mergeable_rx_bufs) {
+ ctx = kzalloc(total_vqs * sizeof(*ctx), GFP_KERNEL);
+ if (!ctx)
+ goto err_ctx;
+ } else {
+ ctx = NULL;
+ }
/* Parameters for control virtqueue, if any */
if (vi->has_cvq) {
sprintf(vi->sq[i].name, "output.%d", i);
names[rxq2vq(i)] = vi->rq[i].name;
names[txq2vq(i)] = vi->sq[i].name;
+ if (ctx)
+ ctx[rxq2vq(i)] = true;
}
ret = vi->vdev->config->find_vqs(vi->vdev, total_vqs, vqs, callbacks,
- names, NULL);
+ names, ctx, NULL);
if (ret)
goto err_find;
for (i = 0; i < vi->max_queue_pairs; i++) {
vi->rq[i].vq = vqs[rxq2vq(i)];
+ vi->rq[i].min_buf_len = mergeable_min_buf_len(vi, vi->rq[i].vq);
vi->sq[i].vq = vqs[txq2vq(i)];
}
return 0;
err_find:
+ kfree(ctx);
+err_ctx:
kfree(names);
err_names:
kfree(callbacks);
BUG_ON(queue_index >= vi->max_queue_pairs);
avg = &vi->rq[queue_index].mrg_avg_pkt_len;
- return sprintf(buf, "%u\n", get_mergeable_buf_len(avg));
+ return sprintf(buf, "%u\n",
+ get_mergeable_buf_len(&vi->rq[queue_index], avg));
}
static struct rx_queue_attribute mergeable_rx_buffer_size_attribute =
static struct virtqueue *rp_find_vq(struct virtio_device *vdev,
unsigned int id,
void (*callback)(struct virtqueue *vq),
- const char *name)
+ const char *name, bool ctx)
{
struct rproc_vdev *rvdev = vdev_to_rvdev(vdev);
struct rproc *rproc = vdev_to_rproc(vdev);
* Create the new vq, and tell virtio we're not interested in
* the 'weak' smp barriers, since we're talking with a real device.
*/
- vq = vring_new_virtqueue(id, len, rvring->align, vdev, false, addr,
- rproc_virtio_notify, callback, name);
+ vq = vring_new_virtqueue(id, len, rvring->align, vdev, false, ctx,
+ addr, rproc_virtio_notify, callback, name);
if (!vq) {
dev_err(dev, "vring_new_virtqueue %s failed\n", name);
rproc_free_vring(rvring);
struct virtqueue *vqs[],
vq_callback_t *callbacks[],
const char * const names[],
+ const bool * ctx,
struct irq_affinity *desc)
{
int i, ret;
for (i = 0; i < nvqs; ++i) {
- vqs[i] = rp_find_vq(vdev, i, callbacks[i], names[i]);
+ vqs[i] = rp_find_vq(vdev, i, callbacks[i], names[i],
+ ctx ? ctx[i] : false);
if (IS_ERR(vqs[i])) {
ret = PTR_ERR(vqs[i]);
goto error;
init_waitqueue_head(&vrp->sendq);
/* We expect two virtqueues, rx and tx (and in this order) */
- err = vdev->config->find_vqs(vdev, 2, vqs, vq_cbs, names, NULL);
+ err = virtio_find_vqs(vdev, 2, vqs, vq_cbs, names, NULL);
if (err)
goto free_vrp;
static struct virtqueue *kvm_find_vq(struct virtio_device *vdev,
unsigned index,
void (*callback)(struct virtqueue *vq),
- const char *name)
+ const char *name, bool ctx)
{
struct kvm_device *kdev = to_kvmdev(vdev);
struct kvm_vqconfig *config;
goto out;
vq = vring_new_virtqueue(index, config->num, KVM_S390_VIRTIO_RING_ALIGN,
- vdev, true, (void *) config->address,
+ vdev, true, ctx, (void *) config->address,
kvm_notify, callback, name);
if (!vq) {
err = -ENOMEM;
struct virtqueue *vqs[],
vq_callback_t *callbacks[],
const char * const names[],
+ const bool *ctx,
struct irq_affinity *desc)
{
struct kvm_device *kdev = to_kvmdev(vdev);
return -ENOENT;
for (i = 0; i < nvqs; ++i) {
- vqs[i] = kvm_find_vq(vdev, i, callbacks[i], names[i]);
+ vqs[i] = kvm_find_vq(vdev, i, callbacks[i], names[i],
+ ctx ? ctx[i] : false);
if (IS_ERR(vqs[i]))
goto error;
}
static struct virtqueue *virtio_ccw_setup_vq(struct virtio_device *vdev,
int i, vq_callback_t *callback,
- const char *name,
+ const char *name, bool ctx,
struct ccw1 *ccw)
{
struct virtio_ccw_device *vcdev = to_vc_device(vdev);
}
vq = vring_new_virtqueue(i, info->num, KVM_VIRTIO_CCW_RING_ALIGN, vdev,
- true, info->queue, virtio_ccw_kvm_notify,
+ true, ctx, info->queue, virtio_ccw_kvm_notify,
callback, name);
if (!vq) {
/* For now, we fail if we can't get the requested size. */
struct virtqueue *vqs[],
vq_callback_t *callbacks[],
const char * const names[],
+ const bool *ctx,
struct irq_affinity *desc)
{
struct virtio_ccw_device *vcdev = to_vc_device(vdev);
for (i = 0; i < nvqs; ++i) {
vqs[i] = virtio_ccw_setup_vq(vdev, i, callbacks[i], names[i],
- ccw);
+ ctx ? ctx[i] : false, ccw);
if (IS_ERR(vqs[i])) {
ret = PTR_ERR(vqs[i]);
vqs[i] = NULL;
}
/* Discover virtqueues and write information to configuration. */
- err = vdev->config->find_vqs(vdev, num_vqs, vqs, callbacks, names,
- &desc);
+ err = virtio_find_vqs(vdev, num_vqs, vqs, callbacks, names, &desc);
if (err)
goto out;
* optionally stat.
*/
nvqs = virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ) ? 3 : 2;
- err = vb->vdev->config->find_vqs(vb->vdev, nvqs, vqs, callbacks, names,
- NULL);
+ err = virtio_find_vqs(vb->vdev, nvqs, vqs, callbacks, names, NULL);
if (err)
return err;
static const char * const names[] = { "events", "status" };
int err;
- err = vi->vdev->config->find_vqs(vi->vdev, 2, vqs, cbs, names,
- NULL);
+ err = virtio_find_vqs(vi->vdev, 2, vqs, cbs, names, NULL);
if (err)
return err;
vi->evt = vqs[0];
static struct virtqueue *vm_setup_vq(struct virtio_device *vdev, unsigned index,
void (*callback)(struct virtqueue *vq),
- const char *name)
+ const char *name, bool ctx)
{
struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
struct virtio_mmio_vq_info *info;
/* Create the vring */
vq = vring_create_virtqueue(index, num, VIRTIO_MMIO_VRING_ALIGN, vdev,
- true, true, vm_notify, callback, name);
+ true, true, ctx, vm_notify, callback, name);
if (!vq) {
err = -ENOMEM;
goto error_new_virtqueue;
struct virtqueue *vqs[],
vq_callback_t *callbacks[],
const char * const names[],
+ const bool *ctx,
struct irq_affinity *desc)
{
struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
return err;
for (i = 0; i < nvqs; ++i) {
- vqs[i] = vm_setup_vq(vdev, i, callbacks[i], names[i]);
+ vqs[i] = vm_setup_vq(vdev, i, callbacks[i], names[i],
+ ctx ? ctx[i] : false);
if (IS_ERR(vqs[i])) {
vm_del_vqs(vdev);
return PTR_ERR(vqs[i]);
static struct virtqueue *vp_setup_vq(struct virtio_device *vdev, unsigned index,
void (*callback)(struct virtqueue *vq),
const char *name,
+ bool ctx,
u16 msix_vec)
{
struct virtio_pci_device *vp_dev = to_vp_device(vdev);
if (!info)
return ERR_PTR(-ENOMEM);
- vq = vp_dev->setup_vq(vp_dev, info, index, callback, name,
+ vq = vp_dev->setup_vq(vp_dev, info, index, callback, name, ctx,
msix_vec);
if (IS_ERR(vq))
goto out_info;
static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned nvqs,
struct virtqueue *vqs[], vq_callback_t *callbacks[],
const char * const names[], bool per_vq_vectors,
+ const bool *ctx,
struct irq_affinity *desc)
{
struct virtio_pci_device *vp_dev = to_vp_device(vdev);
else
msix_vec = VP_MSIX_VQ_VECTOR;
vqs[i] = vp_setup_vq(vdev, i, callbacks[i], names[i],
+ ctx ? ctx[i] : false,
msix_vec);
if (IS_ERR(vqs[i])) {
err = PTR_ERR(vqs[i]);
static int vp_find_vqs_intx(struct virtio_device *vdev, unsigned nvqs,
struct virtqueue *vqs[], vq_callback_t *callbacks[],
- const char * const names[])
+ const char * const names[], const bool *ctx)
{
struct virtio_pci_device *vp_dev = to_vp_device(vdev);
int i, err;
continue;
}
vqs[i] = vp_setup_vq(vdev, i, callbacks[i], names[i],
+ ctx ? ctx[i] : false,
VIRTIO_MSI_NO_VECTOR);
if (IS_ERR(vqs[i])) {
err = PTR_ERR(vqs[i]);
/* the config->find_vqs() implementation */
int vp_find_vqs(struct virtio_device *vdev, unsigned nvqs,
struct virtqueue *vqs[], vq_callback_t *callbacks[],
- const char * const names[], struct irq_affinity *desc)
+ const char * const names[], const bool *ctx,
+ struct irq_affinity *desc)
{
int err;
/* Try MSI-X with one vector per queue. */
- err = vp_find_vqs_msix(vdev, nvqs, vqs, callbacks, names, true, desc);
+ err = vp_find_vqs_msix(vdev, nvqs, vqs, callbacks, names, true, ctx, desc);
if (!err)
return 0;
/* Fallback: MSI-X with one vector for config, one shared for queues. */
- err = vp_find_vqs_msix(vdev, nvqs, vqs, callbacks, names, false, desc);
+ err = vp_find_vqs_msix(vdev, nvqs, vqs, callbacks, names, false, ctx, desc);
if (!err)
return 0;
/* Finally fall back to regular interrupts. */
- return vp_find_vqs_intx(vdev, nvqs, vqs, callbacks, names);
+ return vp_find_vqs_intx(vdev, nvqs, vqs, callbacks, names, ctx);
}
const char *vp_bus_name(struct virtio_device *vdev)
unsigned idx,
void (*callback)(struct virtqueue *vq),
const char *name,
+ bool ctx,
u16 msix_vec);
void (*del_vq)(struct virtio_pci_vq_info *info);
/* the config->find_vqs() implementation */
int vp_find_vqs(struct virtio_device *vdev, unsigned nvqs,
struct virtqueue *vqs[], vq_callback_t *callbacks[],
- const char * const names[], struct irq_affinity *desc);
+ const char * const names[], const bool *ctx,
+ struct irq_affinity *desc);
const char *vp_bus_name(struct virtio_device *vdev);
/* Setup the affinity for a virtqueue:
unsigned index,
void (*callback)(struct virtqueue *vq),
const char *name,
+ bool ctx,
u16 msix_vec)
{
struct virtqueue *vq;
/* create the vring */
vq = vring_create_virtqueue(index, num,
VIRTIO_PCI_VRING_ALIGN, &vp_dev->vdev,
- true, false, vp_notify, callback, name);
+ true, false, ctx,
+ vp_notify, callback, name);
if (!vq)
return ERR_PTR(-ENOMEM);
unsigned index,
void (*callback)(struct virtqueue *vq),
const char *name,
+ bool ctx,
u16 msix_vec)
{
struct virtio_pci_common_cfg __iomem *cfg = vp_dev->common;
/* create the vring */
vq = vring_create_virtqueue(index, num,
SMP_CACHE_BYTES, &vp_dev->vdev,
- true, true, vp_notify, callback, name);
+ true, true, ctx,
+ vp_notify, callback, name);
if (!vq)
return ERR_PTR(-ENOMEM);
}
static int vp_modern_find_vqs(struct virtio_device *vdev, unsigned nvqs,
- struct virtqueue *vqs[], vq_callback_t *callbacks[],
- const char * const names[], struct irq_affinity *desc)
+ struct virtqueue *vqs[],
+ vq_callback_t *callbacks[],
+ const char * const names[], const bool *ctx,
+ struct irq_affinity *desc)
{
struct virtio_pci_device *vp_dev = to_vp_device(vdev);
struct virtqueue *vq;
- int rc = vp_find_vqs(vdev, nvqs, vqs, callbacks, names, desc);
+ int rc = vp_find_vqs(vdev, nvqs, vqs, callbacks, names, ctx, desc);
if (rc)
return rc;
unsigned int out_sgs,
unsigned int in_sgs,
void *data,
+ void *ctx,
gfp_t gfp)
{
struct vring_virtqueue *vq = to_vvq(_vq);
START_USE(vq);
BUG_ON(data == NULL);
+ BUG_ON(ctx && vq->indirect);
if (unlikely(vq->broken)) {
END_USE(vq);
vq->desc_state[head].data = data;
if (indirect)
vq->desc_state[head].indir_desc = desc;
+ if (ctx)
+ vq->desc_state[head].indir_desc = ctx;
/* Put entry in available array (but don't update avail->idx until they
* do sync). */
for (sg = sgs[i]; sg; sg = sg_next(sg))
total_sg++;
}
- return virtqueue_add(_vq, sgs, total_sg, out_sgs, in_sgs, data, gfp);
+ return virtqueue_add(_vq, sgs, total_sg, out_sgs, in_sgs,
+ data, NULL, gfp);
}
EXPORT_SYMBOL_GPL(virtqueue_add_sgs);
void *data,
gfp_t gfp)
{
- return virtqueue_add(vq, &sg, num, 1, 0, data, gfp);
+ return virtqueue_add(vq, &sg, num, 1, 0, data, NULL, gfp);
}
EXPORT_SYMBOL_GPL(virtqueue_add_outbuf);
void *data,
gfp_t gfp)
{
- return virtqueue_add(vq, &sg, num, 0, 1, data, gfp);
+ return virtqueue_add(vq, &sg, num, 0, 1, data, NULL, gfp);
}
EXPORT_SYMBOL_GPL(virtqueue_add_inbuf);
+/**
+ * virtqueue_add_inbuf_ctx - expose input buffers to other end
+ * @vq: the struct virtqueue we're talking about.
+ * @sg: scatterlist (must be well-formed and terminated!)
+ * @num: the number of entries in @sg writable by other side
+ * @data: the token identifying the buffer.
+ * @ctx: extra context for the token
+ * @gfp: how to do memory allocations (if necessary).
+ *
+ * Caller must ensure we don't call this with other virtqueue operations
+ * at the same time (except where noted).
+ *
+ * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
+ */
+int virtqueue_add_inbuf_ctx(struct virtqueue *vq,
+ struct scatterlist *sg, unsigned int num,
+ void *data,
+ void *ctx,
+ gfp_t gfp)
+{
+ return virtqueue_add(vq, &sg, num, 0, 1, data, ctx, gfp);
+}
+EXPORT_SYMBOL_GPL(virtqueue_add_inbuf_ctx);
+
/**
* virtqueue_kick_prepare - first half of split virtqueue_kick call.
* @vq: the struct virtqueue
}
EXPORT_SYMBOL_GPL(virtqueue_kick);
-static void detach_buf(struct vring_virtqueue *vq, unsigned int head)
+static void detach_buf(struct vring_virtqueue *vq, unsigned int head,
+ void **ctx)
{
unsigned int i, j;
__virtio16 nextflag = cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT);
/* Plus final descriptor */
vq->vq.num_free++;
- /* Free the indirect table, if any, now that it's unmapped. */
- if (vq->desc_state[head].indir_desc) {
+ if (vq->indirect) {
struct vring_desc *indir_desc = vq->desc_state[head].indir_desc;
- u32 len = virtio32_to_cpu(vq->vq.vdev, vq->vring.desc[head].len);
+ u32 len;
+
+ /* Free the indirect table, if any, now that it's unmapped. */
+ if (!indir_desc)
+ return;
+
+ len = virtio32_to_cpu(vq->vq.vdev, vq->vring.desc[head].len);
BUG_ON(!(vq->vring.desc[head].flags &
cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_INDIRECT)));
for (j = 0; j < len / sizeof(struct vring_desc); j++)
vring_unmap_one(vq, &indir_desc[j]);
- kfree(vq->desc_state[head].indir_desc);
+ kfree(indir_desc);
vq->desc_state[head].indir_desc = NULL;
+ } else if (ctx) {
+ *ctx = vq->desc_state[head].indir_desc;
}
}
* Returns NULL if there are no used buffers, or the "data" token
* handed to virtqueue_add_*().
*/
-void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len)
+void *virtqueue_get_buf_ctx(struct virtqueue *_vq, unsigned int *len,
+ void **ctx)
{
struct vring_virtqueue *vq = to_vvq(_vq);
void *ret;
/* detach_buf clears data, so grab it now. */
ret = vq->desc_state[i].data;
- detach_buf(vq, i);
+ detach_buf(vq, i, ctx);
vq->last_used_idx++;
/* If we expect an interrupt for the next entry, tell host
* by writing event index and flush out the write before
END_USE(vq);
return ret;
}
-EXPORT_SYMBOL_GPL(virtqueue_get_buf);
+EXPORT_SYMBOL_GPL(virtqueue_get_buf_ctx);
+void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len)
+{
+ return virtqueue_get_buf_ctx(_vq, len, NULL);
+}
+EXPORT_SYMBOL_GPL(virtqueue_get_buf);
/**
* virtqueue_disable_cb - disable callbacks
* @vq: the struct virtqueue we're talking about.
continue;
/* detach_buf clears data, so grab it now. */
buf = vq->desc_state[i].data;
- detach_buf(vq, i);
+ detach_buf(vq, i, NULL);
vq->avail_idx_shadow--;
vq->vring.avail->idx = cpu_to_virtio16(_vq->vdev, vq->avail_idx_shadow);
END_USE(vq);
struct vring vring,
struct virtio_device *vdev,
bool weak_barriers,
+ bool context,
bool (*notify)(struct virtqueue *),
void (*callback)(struct virtqueue *),
const char *name)
vq->last_add_time_valid = false;
#endif
- vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC);
+ vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) &&
+ !context;
vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
/* No callback? Tell other side not to bother us. */
struct virtio_device *vdev,
bool weak_barriers,
bool may_reduce_num,
+ bool context,
bool (*notify)(struct virtqueue *),
void (*callback)(struct virtqueue *),
const char *name)
queue_size_in_bytes = vring_size(num, vring_align);
vring_init(&vring, num, queue, vring_align);
- vq = __vring_new_virtqueue(index, vring, vdev, weak_barriers,
+ vq = __vring_new_virtqueue(index, vring, vdev, weak_barriers, context,
notify, callback, name);
if (!vq) {
vring_free_queue(vdev, queue_size_in_bytes, queue,
unsigned int vring_align,
struct virtio_device *vdev,
bool weak_barriers,
+ bool context,
void *pages,
bool (*notify)(struct virtqueue *vq),
void (*callback)(struct virtqueue *vq),
{
struct vring vring;
vring_init(&vring, num, pages, vring_align);
- return __vring_new_virtqueue(index, vring, vdev, weak_barriers,
+ return __vring_new_virtqueue(index, vring, vdev, weak_barriers, context,
notify, callback, name);
}
EXPORT_SYMBOL_GPL(vring_new_virtqueue);
struct ptr_ring {
int producer ____cacheline_aligned_in_smp;
spinlock_t producer_lock;
- int consumer ____cacheline_aligned_in_smp;
+ int consumer_head ____cacheline_aligned_in_smp; /* next valid entry */
+ int consumer_tail; /* next entry to invalidate */
spinlock_t consumer_lock;
/* Shared consumer/producer data */
/* Read-only by both the producer and the consumer */
int size ____cacheline_aligned_in_smp; /* max entries in queue */
+ int batch; /* number of entries to consume in a batch */
void **queue;
};
static inline void *__ptr_ring_peek(struct ptr_ring *r)
{
if (likely(r->size))
- return r->queue[r->consumer];
+ return r->queue[r->consumer_head];
return NULL;
}
/* Must only be called after __ptr_ring_peek returned !NULL */
static inline void __ptr_ring_discard_one(struct ptr_ring *r)
{
- r->queue[r->consumer++] = NULL;
- if (unlikely(r->consumer >= r->size))
- r->consumer = 0;
+ /* Fundamentally, what we want to do is update consumer
+ * index and zero out the entry so producer can reuse it.
+ * Doing it naively at each consume would be as simple as:
+ * r->queue[r->consumer++] = NULL;
+ * if (unlikely(r->consumer >= r->size))
+ * r->consumer = 0;
+ * but that is suboptimal when the ring is full as producer is writing
+ * out new entries in the same cache line. Defer these updates until a
+ * batch of entries has been consumed.
+ */
+ int head = r->consumer_head++;
+
+ /* Once we have processed enough entries invalidate them in
+ * the ring all at once so producer can reuse their space in the ring.
+ * We also do this when we reach end of the ring - not mandatory
+ * but helps keep the implementation simple.
+ */
+ if (unlikely(r->consumer_head - r->consumer_tail >= r->batch ||
+ r->consumer_head >= r->size)) {
+ /* Zero out entries in the reverse order: this way we touch the
+ * cache line that producer might currently be reading the last;
+ * producer won't make progress and touch other cache lines
+ * besides the first one until we write out all entries.
+ */
+ while (likely(head >= r->consumer_tail))
+ r->queue[head--] = NULL;
+ r->consumer_tail = r->consumer_head;
+ }
+ if (unlikely(r->consumer_head >= r->size)) {
+ r->consumer_head = 0;
+ r->consumer_tail = 0;
+ }
}
static inline void *__ptr_ring_consume(struct ptr_ring *r)
return kzalloc(ALIGN(size * sizeof(void *), SMP_CACHE_BYTES), gfp);
}
+static inline void __ptr_ring_set_size(struct ptr_ring *r, int size)
+{
+ r->size = size;
+ r->batch = SMP_CACHE_BYTES * 2 / sizeof(*(r->queue));
+ /* We need to set batch at least to 1 to make logic
+ * in __ptr_ring_discard_one work correctly.
+ * Batching too much (because ring is small) would cause a lot of
+ * burstiness. Needs tuning, for now disable batching.
+ */
+ if (r->batch > r->size / 2 || !r->batch)
+ r->batch = 1;
+}
+
static inline int ptr_ring_init(struct ptr_ring *r, int size, gfp_t gfp)
{
r->queue = __ptr_ring_init_queue_alloc(size, gfp);
if (!r->queue)
return -ENOMEM;
- r->size = size;
- r->producer = r->consumer = 0;
+ __ptr_ring_set_size(r, size);
+ r->producer = r->consumer_head = r->consumer_tail = 0;
spin_lock_init(&r->producer_lock);
spin_lock_init(&r->consumer_lock);
else if (destroy)
destroy(ptr);
- r->size = size;
+ __ptr_ring_set_size(r, size);
r->producer = producer;
- r->consumer = 0;
+ r->consumer_head = 0;
+ r->consumer_tail = 0;
old = r->queue;
r->queue = queue;
void *data,
gfp_t gfp);
+int virtqueue_add_inbuf_ctx(struct virtqueue *vq,
+ struct scatterlist sg[], unsigned int num,
+ void *data,
+ void *ctx,
+ gfp_t gfp);
+
int virtqueue_add_sgs(struct virtqueue *vq,
struct scatterlist *sgs[],
unsigned int out_sgs,
void *virtqueue_get_buf(struct virtqueue *vq, unsigned int *len);
+void *virtqueue_get_buf_ctx(struct virtqueue *vq, unsigned int *len,
+ void **ctx);
+
void virtqueue_disable_cb(struct virtqueue *vq);
bool virtqueue_enable_cb(struct virtqueue *vq);
* @feature_table_legacy: same as feature_table but when working in legacy mode.
* @feature_table_size_legacy: number of entries in feature table legacy array.
* @probe: the function to call when a device is found. Returns 0 or -errno.
+ * @scan: optional function to call after successful probe; intended
+ * for virtio-scsi to invoke a scan.
* @remove: the function to call when a device is removed.
* @config_changed: optional function to call when the device configuration
* changes; may be called in interrupt context.
+ * @freeze: optional function to call during suspend/hibernation.
+ * @restore: optional function to call on resume.
*/
struct virtio_driver {
struct device_driver driver;
void (*reset)(struct virtio_device *vdev);
int (*find_vqs)(struct virtio_device *, unsigned nvqs,
struct virtqueue *vqs[], vq_callback_t *callbacks[],
- const char * const names[], struct irq_affinity *desc);
+ const char * const names[], const bool *ctx,
+ struct irq_affinity *desc);
void (*del_vqs)(struct virtio_device *);
u64 (*get_features)(struct virtio_device *vdev);
int (*finalize_features)(struct virtio_device *vdev);
vq_callback_t *callbacks[] = { c };
const char *names[] = { n };
struct virtqueue *vq;
- int err = vdev->config->find_vqs(vdev, 1, &vq, callbacks, names, NULL);
+ int err = vdev->config->find_vqs(vdev, 1, &vq, callbacks, names, NULL,
+ NULL);
if (err < 0)
return ERR_PTR(err);
return vq;
}
+static inline
+int virtio_find_vqs(struct virtio_device *vdev, unsigned nvqs,
+ struct virtqueue *vqs[], vq_callback_t *callbacks[],
+ const char * const names[],
+ struct irq_affinity *desc)
+{
+ return vdev->config->find_vqs(vdev, nvqs, vqs, callbacks, names, NULL, desc);
+}
+
+static inline
+int virtio_find_vqs_ctx(struct virtio_device *vdev, unsigned nvqs,
+ struct virtqueue *vqs[], vq_callback_t *callbacks[],
+ const char * const names[], const bool *ctx,
+ struct irq_affinity *desc)
+{
+ return vdev->config->find_vqs(vdev, nvqs, vqs, callbacks, names, ctx,
+ desc);
+}
+
/**
* virtio_device_ready - enable vq use in probe function
* @vdev: the device
struct virtio_device *vdev,
bool weak_barriers,
bool may_reduce_num,
+ bool ctx,
bool (*notify)(struct virtqueue *vq),
void (*callback)(struct virtqueue *vq),
const char *name);
struct vring vring,
struct virtio_device *vdev,
bool weak_barriers,
+ bool ctx,
bool (*notify)(struct virtqueue *),
void (*callback)(struct virtqueue *),
const char *name);
unsigned int vring_align,
struct virtio_device *vdev,
bool weak_barriers,
+ bool ctx,
void *pages,
bool (*notify)(struct virtqueue *vq),
void (*callback)(struct virtqueue *vq),
vsock->vdev = vdev;
- ret = vsock->vdev->config->find_vqs(vsock->vdev, VSOCK_VQ_MAX,
- vsock->vqs, callbacks, names,
- NULL);
+ ret = virtio_find_vqs(vsock->vdev, VSOCK_VQ_MAX,
+ vsock->vqs, callbacks, names,
+ NULL);
if (ret < 0)
goto out;
unsigned int vring_align,
struct virtio_device *vdev,
bool weak_barriers,
+ bool ctx,
void *pages,
bool (*notify)(struct virtqueue *vq),
void (*callback)(struct virtqueue *vq),
int runcycles = 10000000;
int max_outstanding = INT_MAX;
int batch = 1;
+int param = 0;
bool do_sleep = false;
bool do_relax = false;
cpu = strtol(arg, &endptr, 0);
assert(!*endptr);
- assert(cpu >= 0 || cpu < CPU_SETSIZE);
+ assert(cpu >= 0 && cpu < CPU_SETSIZE);
self = pthread_self();
CPU_ZERO(&cpuset);
.has_arg = required_argument,
.val = 'b',
},
+ {
+ .name = "param",
+ .has_arg = required_argument,
+ .val = 'p',
+ },
{
.name = "sleep",
.has_arg = no_argument,
" [--run-cycles C (default: %d)]"
" [--batch b]"
" [--outstanding o]"
+ " [--param p]"
" [--sleep]"
" [--relax]"
" [--exit]"
assert(c > 0 && c < INT_MAX);
max_outstanding = c;
break;
+ case 'p':
+ c = strtol(optarg, &endptr, 0);
+ assert(!*endptr);
+ assert(c > 0 && c < INT_MAX);
+ param = c;
+ break;
case 'b':
c = strtol(optarg, &endptr, 0);
assert(!*endptr);
#include <stdbool.h>
+extern int param;
+
extern bool do_exit;
#if defined(__x86_64__) || defined(__i386__)
{
int ret = ptr_ring_init(&array, ring_size, 0);
assert(!ret);
+ /* Hacky way to poke at ring internals. Useful for testing though. */
+ if (param)
+ array.batch = param;
}
/* guest side */
vring_init(&info->vring, num, info->ring, 4096);
info->vq = vring_new_virtqueue(info->idx,
info->vring.num, 4096, &dev->vdev,
- true, info->ring,
+ true, false, info->ring,
vq_notify, vq_callback, "test");
assert(info->vq);
info->vq->priv = info;
test = 0;
r = ioctl(dev->control, VHOST_TEST_RUN, &test);
assert(r >= 0);
- fprintf(stderr, "spurious wakeus: 0x%llx\n", spurious);
+ fprintf(stderr, "spurious wakeups: 0x%llx\n", spurious);
}
const char optstring[] = "h";
err(1, "Could not set affinity to cpu %u", first_cpu);
vq = vring_new_virtqueue(0, RINGSIZE, ALIGN, &gvdev.vdev, true,
- guest_map, fast_vringh ? no_notify_host
+ false, guest_map,
+ fast_vringh ? no_notify_host
: parallel_notify_host,
never_callback_guest, "guest vq");
memset(__user_addr_min, 0, vring_size(RINGSIZE, ALIGN));
/* Set up guest side. */
- vq = vring_new_virtqueue(0, RINGSIZE, ALIGN, &vdev, true,
+ vq = vring_new_virtqueue(0, RINGSIZE, ALIGN, &vdev, true, false,
__user_addr_min,
never_notify_host, never_callback_guest,
"guest vq");
/* Force creation of direct, which we modify. */
__virtio_clear_bit(&vdev, VIRTIO_RING_F_INDIRECT_DESC);
vq = vring_new_virtqueue(0, RINGSIZE, ALIGN, &vdev, true,
- __user_addr_min,
+ false, __user_addr_min,
never_notify_host,
never_callback_guest,
"guest vq");