net/xdp/xdp_umem.c

   1 // SPDX-License-Identifier: GPL-2.0
   2 /* XDP user-space packet buffer
   3  * Copyright(c) 2018 Intel Corporation.
   4  */
   5
   6 #include <linux/init.h>
   7 #include <linux/sched/mm.h>
   8 #include <linux/sched/signal.h>
   9 #include <linux/sched/task.h>
  10 #include <linux/uaccess.h>
  11 #include <linux/slab.h>
  12 #include <linux/bpf.h>
  13 #include <linux/mm.h>
  14 #include <linux/netdevice.h>
  15 #include <linux/rtnetlink.h>
  16 #include <linux/idr.h>
  17
  18 #include "xdp_umem.h"
  19 #include "xsk_queue.h"
  20
  21 #define XDP_UMEM_MIN_CHUNK_SIZE 2048
  22
  23 static DEFINE_IDA(umem_ida);
  24
  25 void xdp_add_sk_umem(struct xdp_umem *umem, struct xdp_sock *xs)
  26 {
  27         unsigned long flags;
  28
  29         spin_lock_irqsave(&umem->xsk_list_lock, flags);
  30         list_add_rcu(&xs->list, &umem->xsk_list);
  31         spin_unlock_irqrestore(&umem->xsk_list_lock, flags);
  32 }
  33
  34 void xdp_del_sk_umem(struct xdp_umem *umem, struct xdp_sock *xs)
  35 {
  36         unsigned long flags;
  37
  38         spin_lock_irqsave(&umem->xsk_list_lock, flags);
  39         list_del_rcu(&xs->list);
  40         spin_unlock_irqrestore(&umem->xsk_list_lock, flags);
  41 }
  42
  43 /* The umem is stored both in the _rx struct and the _tx struct as we do
  44  * not know if the device has more tx queues than rx, or the opposite.
  45  * This might also change during run time.
  46  */
  47 static int xdp_reg_umem_at_qid(struct net_device *dev, struct xdp_umem *umem,
  48                                u16 queue_id)
  49 {
  50         if (queue_id >= max_t(unsigned int,
  51                               dev->real_num_rx_queues,
  52                               dev->real_num_tx_queues))
  53                 return -EINVAL;
  54
  55         if (queue_id < dev->real_num_rx_queues)
  56                 dev->_rx[queue_id].umem = umem;
  57         if (queue_id < dev->real_num_tx_queues)
  58                 dev->_tx[queue_id].umem = umem;
  59
  60         return 0;
  61 }
  62
  63 struct xdp_umem *xdp_get_umem_from_qid(struct net_device *dev,
  64                                        u16 queue_id)
  65 {
  66         if (queue_id < dev->real_num_rx_queues)
  67                 return dev->_rx[queue_id].umem;
  68         if (queue_id < dev->real_num_tx_queues)
  69                 return dev->_tx[queue_id].umem;
  70
  71         return NULL;
  72 }
  73 EXPORT_SYMBOL(xdp_get_umem_from_qid);
  74
  75 static void xdp_clear_umem_at_qid(struct net_device *dev, u16 queue_id)
  76 {
  77         if (queue_id < dev->real_num_rx_queues)
  78                 dev->_rx[queue_id].umem = NULL;
  79         if (queue_id < dev->real_num_tx_queues)
  80                 dev->_tx[queue_id].umem = NULL;
  81 }
  82
  83 int xdp_umem_assign_dev(struct xdp_umem *umem, struct net_device *dev,
  84                         u16 queue_id, u16 flags)
  85 {
  86         bool force_zc, force_copy;
  87         struct netdev_bpf bpf;
  88         int err = 0;
  89
  90         force_zc = flags & XDP_ZEROCOPY;
  91         force_copy = flags & XDP_COPY;
  92
  93         if (force_zc && force_copy)
  94                 return -EINVAL;
  95
  96         rtnl_lock();
  97         if (xdp_get_umem_from_qid(dev, queue_id)) {
  98                 err = -EBUSY;
  99                 goto out_rtnl_unlock;
 100         }
 101
 102         err = xdp_reg_umem_at_qid(dev, umem, queue_id);
 103         if (err)
 104                 goto out_rtnl_unlock;
 105
 106         umem->dev = dev;
 107         umem->queue_id = queue_id;
 108         if (force_copy)
 109                 /* For copy-mode, we are done. */
 110                 goto out_rtnl_unlock;
 111
 112         if (!dev->netdev_ops->ndo_bpf ||
 113             !dev->netdev_ops->ndo_xsk_async_xmit) {
 114                 err = -EOPNOTSUPP;
 115                 goto err_unreg_umem;
 116         }
 117
 118         bpf.command = XDP_SETUP_XSK_UMEM;
 119         bpf.xsk.umem = umem;
 120         bpf.xsk.queue_id = queue_id;
 121
 122         err = dev->netdev_ops->ndo_bpf(dev, &bpf);
 123         if (err)
 124                 goto err_unreg_umem;
 125         rtnl_unlock();
 126
 127         dev_hold(dev);
 128         umem->zc = true;
 129         return 0;
 130
 131 err_unreg_umem:
 132         if (!force_zc)
 133                 err = 0; /* fallback to copy mode */
 134         if (err)
 135                 xdp_clear_umem_at_qid(dev, queue_id);
 136 out_rtnl_unlock:
 137         rtnl_unlock();
 138         return err;
 139 }
 140
 141 static void xdp_umem_clear_dev(struct xdp_umem *umem)
 142 {
 143         struct netdev_bpf bpf;
 144         int err;
 145
 146         if (!umem->dev)
 147                 return;
 148
 149         if (umem->zc) {
 150                 bpf.command = XDP_SETUP_XSK_UMEM;
 151                 bpf.xsk.umem = NULL;
 152                 bpf.xsk.queue_id = umem->queue_id;
 153
 154                 rtnl_lock();
 155                 err = umem->dev->netdev_ops->ndo_bpf(umem->dev, &bpf);
 156                 rtnl_unlock();
 157
 158                 if (err)
 159                         WARN(1, "failed to disable umem!\n");
 160         }
 161
 162         rtnl_lock();
 163         xdp_clear_umem_at_qid(umem->dev, umem->queue_id);
 164         rtnl_unlock();
 165
 166         if (umem->zc) {
 167                 dev_put(umem->dev);
 168                 umem->zc = false;
 169         }
 170 }
 171
 172 static void xdp_umem_unpin_pages(struct xdp_umem *umem)
 173 {
 174         unsigned int i;
 175
 176         for (i = 0; i < umem->npgs; i++) {
 177                 struct page *page = umem->pgs[i];
 178
 179                 set_page_dirty_lock(page);
 180                 put_page(page);
 181         }
 182
 183         kfree(umem->pgs);
 184         umem->pgs = NULL;
 185 }
 186
 187 static void xdp_umem_unaccount_pages(struct xdp_umem *umem)
 188 {
 189         if (umem->user) {
 190                 atomic_long_sub(umem->npgs, &umem->user->locked_vm);
 191                 free_uid(umem->user);
 192         }
 193 }
 194
 195 static void xdp_umem_release(struct xdp_umem *umem)
 196 {
 197         xdp_umem_clear_dev(umem);
 198
 199         ida_simple_remove(&umem_ida, umem->id);
 200
 201         if (umem->fq) {
 202                 xskq_destroy(umem->fq);
 203                 umem->fq = NULL;
 204         }
 205
 206         if (umem->cq) {
 207                 xskq_destroy(umem->cq);
 208                 umem->cq = NULL;
 209         }
 210
 211         xsk_reuseq_destroy(umem);
 212
 213         xdp_umem_unpin_pages(umem);
 214
 215         kfree(umem->pages);
 216         umem->pages = NULL;
 217
 218         xdp_umem_unaccount_pages(umem);
 219         kfree(umem);
 220 }
 221
 222 static void xdp_umem_release_deferred(struct work_struct *work)
 223 {
 224         struct xdp_umem *umem = container_of(work, struct xdp_umem, work);
 225
 226         xdp_umem_release(umem);
 227 }
 228
 229 void xdp_get_umem(struct xdp_umem *umem)
 230 {
 231         refcount_inc(&umem->users);
 232 }
 233
 234 void xdp_put_umem(struct xdp_umem *umem)
 235 {
 236         if (!umem)
 237                 return;
 238
 239         if (refcount_dec_and_test(&umem->users)) {
 240                 INIT_WORK(&umem->work, xdp_umem_release_deferred);
 241                 schedule_work(&umem->work);
 242         }
 243 }
 244
 245 static int xdp_umem_pin_pages(struct xdp_umem *umem)
 246 {
 247         unsigned int gup_flags = FOLL_WRITE;
 248         long npgs;
 249         int err;
 250
 251         umem->pgs = kcalloc(umem->npgs, sizeof(*umem->pgs),
 252                             GFP_KERNEL | __GFP_NOWARN);
 253         if (!umem->pgs)
 254                 return -ENOMEM;
 255
 256         down_read(&current->mm->mmap_sem);
 257         npgs = get_user_pages_longterm(umem->address, umem->npgs,
 258                                        gup_flags, &umem->pgs[0], NULL);
 259         up_read(&current->mm->mmap_sem);
 260
 261         if (npgs != umem->npgs) {
 262                 if (npgs >= 0) {
 263                         umem->npgs = npgs;
 264                         err = -ENOMEM;
 265                         goto out_pin;
 266                 }
 267                 err = npgs;
 268                 goto out_pgs;
 269         }
 270         return 0;
 271
 272 out_pin:
 273         xdp_umem_unpin_pages(umem);
 274 out_pgs:
 275         kfree(umem->pgs);
 276         umem->pgs = NULL;
 277         return err;
 278 }
 279
 280 static int xdp_umem_account_pages(struct xdp_umem *umem)
 281 {
 282         unsigned long lock_limit, new_npgs, old_npgs;
 283
 284         if (capable(CAP_IPC_LOCK))
 285                 return 0;
 286
 287         lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
 288         umem->user = get_uid(current_user());
 289
 290         do {
 291                 old_npgs = atomic_long_read(&umem->user->locked_vm);
 292                 new_npgs = old_npgs + umem->npgs;
 293                 if (new_npgs > lock_limit) {
 294                         free_uid(umem->user);
 295                         umem->user = NULL;
 296                         return -ENOBUFS;
 297                 }
 298         } while (atomic_long_cmpxchg(&umem->user->locked_vm, old_npgs,
 299                                      new_npgs) != old_npgs);
 300         return 0;
 301 }
 302
 303 static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
 304 {
 305         u32 chunk_size = mr->chunk_size, headroom = mr->headroom;
 306         unsigned int chunks, chunks_per_page;
 307         u64 addr = mr->addr, size = mr->len;
 308         int size_chk, err, i;
 309
 310         if (chunk_size < XDP_UMEM_MIN_CHUNK_SIZE || chunk_size > PAGE_SIZE) {
 311                 /* Strictly speaking we could support this, if:
 312                  * - huge pages, or*
 313                  * - using an IOMMU, or
 314                  * - making sure the memory area is consecutive
 315                  * but for now, we simply say "computer says no".
 316                  */
 317                 return -EINVAL;
 318         }
 319
 320         if (!is_power_of_2(chunk_size))
 321                 return -EINVAL;
 322
 323         if (!PAGE_ALIGNED(addr)) {
 324                 /* Memory area has to be page size aligned. For
 325                  * simplicity, this might change.
 326                  */
 327                 return -EINVAL;
 328         }
 329
 330         if ((addr + size) < addr)
 331                 return -EINVAL;
 332
 333         chunks = (unsigned int)div_u64(size, chunk_size);
 334         if (chunks == 0)
 335                 return -EINVAL;
 336
 337         chunks_per_page = PAGE_SIZE / chunk_size;
 338         if (chunks < chunks_per_page || chunks % chunks_per_page)
 339                 return -EINVAL;
 340
 341         headroom = ALIGN(headroom, 64);
 342
 343         size_chk = chunk_size - headroom - XDP_PACKET_HEADROOM;
 344         if (size_chk < 0)
 345                 return -EINVAL;
 346
 347         umem->address = (unsigned long)addr;
 348         umem->chunk_mask = ~((u64)chunk_size - 1);
 349         umem->size = size;
 350         umem->headroom = headroom;
 351         umem->chunk_size_nohr = chunk_size - headroom;
 352         umem->npgs = size / PAGE_SIZE;
 353         umem->pgs = NULL;
 354         umem->user = NULL;
 355         INIT_LIST_HEAD(&umem->xsk_list);
 356         spin_lock_init(&umem->xsk_list_lock);
 357
 358         refcount_set(&umem->users, 1);
 359
 360         err = xdp_umem_account_pages(umem);
 361         if (err)
 362                 return err;
 363
 364         err = xdp_umem_pin_pages(umem);
 365         if (err)
 366                 goto out_account;
 367
 368         umem->pages = kcalloc(umem->npgs, sizeof(*umem->pages), GFP_KERNEL);
 369         if (!umem->pages) {
 370                 err = -ENOMEM;
 371                 goto out_account;
 372         }
 373
 374         for (i = 0; i < umem->npgs; i++)
 375                 umem->pages[i].addr = page_address(umem->pgs[i]);
 376
 377         return 0;
 378
 379 out_account:
 380         xdp_umem_unaccount_pages(umem);
 381         return err;
 382 }
 383
 384 struct xdp_umem *xdp_umem_create(struct xdp_umem_reg *mr)
 385 {
 386         struct xdp_umem *umem;
 387         int err;
 388
 389         umem = kzalloc(sizeof(*umem), GFP_KERNEL);
 390         if (!umem)
 391                 return ERR_PTR(-ENOMEM);
 392
 393         err = ida_simple_get(&umem_ida, 0, 0, GFP_KERNEL);
 394         if (err < 0) {
 395                 kfree(umem);
 396                 return ERR_PTR(err);
 397         }
 398         umem->id = err;
 399
 400         err = xdp_umem_reg(umem, mr);
 401         if (err) {
 402                 ida_simple_remove(&umem_ida, umem->id);
 403                 kfree(umem);
 404                 return ERR_PTR(err);
 405         }
 406
 407         return umem;
 408 }
 409
 410 bool xdp_umem_validate_queues(struct xdp_umem *umem)
 411 {
 412         return umem->fq && umem->cq;
 413 }