net/xdp/xdp_umem.c

   1 // SPDX-License-Identifier: GPL-2.0
   2 /* XDP user-space packet buffer
   3  * Copyright(c) 2018 Intel Corporation.
   4  */
   5
   6 #include <linux/init.h>
   7 #include <linux/sched/mm.h>
   8 #include <linux/sched/signal.h>
   9 #include <linux/sched/task.h>
  10 #include <linux/uaccess.h>
  11 #include <linux/slab.h>
  12 #include <linux/bpf.h>
  13 #include <linux/mm.h>
  14 #include <linux/netdevice.h>
  15 #include <linux/rtnetlink.h>
  16 #include <linux/idr.h>
  17
  18 #include "xdp_umem.h"
  19 #include "xsk_queue.h"
  20
  21 #define XDP_UMEM_MIN_CHUNK_SIZE 2048
  22
  23 static DEFINE_IDA(umem_ida);
  24
  25 void xdp_add_sk_umem(struct xdp_umem *umem, struct xdp_sock *xs)
  26 {
  27         unsigned long flags;
  28
  29         spin_lock_irqsave(&umem->xsk_list_lock, flags);
  30         list_add_rcu(&xs->list, &umem->xsk_list);
  31         spin_unlock_irqrestore(&umem->xsk_list_lock, flags);
  32 }
  33
  34 void xdp_del_sk_umem(struct xdp_umem *umem, struct xdp_sock *xs)
  35 {
  36         unsigned long flags;
  37
  38         spin_lock_irqsave(&umem->xsk_list_lock, flags);
  39         list_del_rcu(&xs->list);
  40         spin_unlock_irqrestore(&umem->xsk_list_lock, flags);
  41 }
  42
  43 /* The umem is stored both in the _rx struct and the _tx struct as we do
  44  * not know if the device has more tx queues than rx, or the opposite.
  45  * This might also change during run time.
  46  */
  47 static int xdp_reg_umem_at_qid(struct net_device *dev, struct xdp_umem *umem,
  48                                u16 queue_id)
  49 {
  50         if (queue_id >= max_t(unsigned int,
  51                               dev->real_num_rx_queues,
  52                               dev->real_num_tx_queues))
  53                 return -EINVAL;
  54
  55         if (queue_id < dev->real_num_rx_queues)
  56                 dev->_rx[queue_id].umem = umem;
  57         if (queue_id < dev->real_num_tx_queues)
  58                 dev->_tx[queue_id].umem = umem;
  59
  60         return 0;
  61 }
  62
  63 struct xdp_umem *xdp_get_umem_from_qid(struct net_device *dev,
  64                                        u16 queue_id)
  65 {
  66         if (queue_id < dev->real_num_rx_queues)
  67                 return dev->_rx[queue_id].umem;
  68         if (queue_id < dev->real_num_tx_queues)
  69                 return dev->_tx[queue_id].umem;
  70
  71         return NULL;
  72 }
  73 EXPORT_SYMBOL(xdp_get_umem_from_qid);
  74
  75 static void xdp_clear_umem_at_qid(struct net_device *dev, u16 queue_id)
  76 {
  77         if (queue_id < dev->real_num_rx_queues)
  78                 dev->_rx[queue_id].umem = NULL;
  79         if (queue_id < dev->real_num_tx_queues)
  80                 dev->_tx[queue_id].umem = NULL;
  81 }
  82
  83 int xdp_umem_assign_dev(struct xdp_umem *umem, struct net_device *dev,
  84                         u16 queue_id, u16 flags)
  85 {
  86         bool force_zc, force_copy;
  87         struct netdev_bpf bpf;
  88         int err = 0;
  89
  90         force_zc = flags & XDP_ZEROCOPY;
  91         force_copy = flags & XDP_COPY;
  92
  93         if (force_zc && force_copy)
  94                 return -EINVAL;
  95
  96         rtnl_lock();
  97         if (xdp_get_umem_from_qid(dev, queue_id)) {
  98                 err = -EBUSY;
  99                 goto out_rtnl_unlock;
 100         }
 101
 102         err = xdp_reg_umem_at_qid(dev, umem, queue_id);
 103         if (err)
 104                 goto out_rtnl_unlock;
 105
 106         umem->dev = dev;
 107         umem->queue_id = queue_id;
 108         if (force_copy)
 109                 /* For copy-mode, we are done. */
 110                 goto out_rtnl_unlock;
 111
 112         if (!dev->netdev_ops->ndo_bpf ||
 113             !dev->netdev_ops->ndo_xsk_async_xmit) {
 114                 err = -EOPNOTSUPP;
 115                 goto err_unreg_umem;
 116         }
 117
 118         bpf.command = XDP_SETUP_XSK_UMEM;
 119         bpf.xsk.umem = umem;
 120         bpf.xsk.queue_id = queue_id;
 121
 122         err = dev->netdev_ops->ndo_bpf(dev, &bpf);
 123         if (err)
 124                 goto err_unreg_umem;
 125         rtnl_unlock();
 126
 127         dev_hold(dev);
 128         umem->zc = true;
 129         return 0;
 130
 131 err_unreg_umem:
 132         if (!force_zc)
 133                 err = 0; /* fallback to copy mode */
 134         if (err)
 135                 xdp_clear_umem_at_qid(dev, queue_id);
 136 out_rtnl_unlock:
 137         rtnl_unlock();
 138         return err;
 139 }
 140
 141 static void xdp_umem_clear_dev(struct xdp_umem *umem)
 142 {
 143         struct netdev_bpf bpf;
 144         int err;
 145
 146         if (umem->zc) {
 147                 bpf.command = XDP_SETUP_XSK_UMEM;
 148                 bpf.xsk.umem = NULL;
 149                 bpf.xsk.queue_id = umem->queue_id;
 150
 151                 rtnl_lock();
 152                 err = umem->dev->netdev_ops->ndo_bpf(umem->dev, &bpf);
 153                 rtnl_unlock();
 154
 155                 if (err)
 156                         WARN(1, "failed to disable umem!\n");
 157         }
 158
 159         if (umem->dev) {
 160                 rtnl_lock();
 161                 xdp_clear_umem_at_qid(umem->dev, umem->queue_id);
 162                 rtnl_unlock();
 163         }
 164
 165         if (umem->zc) {
 166                 dev_put(umem->dev);
 167                 umem->zc = false;
 168         }
 169 }
 170
 171 static void xdp_umem_unpin_pages(struct xdp_umem *umem)
 172 {
 173         unsigned int i;
 174
 175         for (i = 0; i < umem->npgs; i++) {
 176                 struct page *page = umem->pgs[i];
 177
 178                 set_page_dirty_lock(page);
 179                 put_page(page);
 180         }
 181
 182         kfree(umem->pgs);
 183         umem->pgs = NULL;
 184 }
 185
 186 static void xdp_umem_unaccount_pages(struct xdp_umem *umem)
 187 {
 188         if (umem->user) {
 189                 atomic_long_sub(umem->npgs, &umem->user->locked_vm);
 190                 free_uid(umem->user);
 191         }
 192 }
 193
 194 static void xdp_umem_release(struct xdp_umem *umem)
 195 {
 196         xdp_umem_clear_dev(umem);
 197
 198         ida_simple_remove(&umem_ida, umem->id);
 199
 200         if (umem->fq) {
 201                 xskq_destroy(umem->fq);
 202                 umem->fq = NULL;
 203         }
 204
 205         if (umem->cq) {
 206                 xskq_destroy(umem->cq);
 207                 umem->cq = NULL;
 208         }
 209
 210         xsk_reuseq_destroy(umem);
 211
 212         xdp_umem_unpin_pages(umem);
 213
 214         kfree(umem->pages);
 215         umem->pages = NULL;
 216
 217         xdp_umem_unaccount_pages(umem);
 218         kfree(umem);
 219 }
 220
 221 static void xdp_umem_release_deferred(struct work_struct *work)
 222 {
 223         struct xdp_umem *umem = container_of(work, struct xdp_umem, work);
 224
 225         xdp_umem_release(umem);
 226 }
 227
 228 void xdp_get_umem(struct xdp_umem *umem)
 229 {
 230         refcount_inc(&umem->users);
 231 }
 232
 233 void xdp_put_umem(struct xdp_umem *umem)
 234 {
 235         if (!umem)
 236                 return;
 237
 238         if (refcount_dec_and_test(&umem->users)) {
 239                 INIT_WORK(&umem->work, xdp_umem_release_deferred);
 240                 schedule_work(&umem->work);
 241         }
 242 }
 243
 244 static int xdp_umem_pin_pages(struct xdp_umem *umem)
 245 {
 246         unsigned int gup_flags = FOLL_WRITE;
 247         long npgs;
 248         int err;
 249
 250         umem->pgs = kcalloc(umem->npgs, sizeof(*umem->pgs),
 251                             GFP_KERNEL | __GFP_NOWARN);
 252         if (!umem->pgs)
 253                 return -ENOMEM;
 254
 255         down_read(&current->mm->mmap_sem);
 256         npgs = get_user_pages(umem->address, umem->npgs,
 257                               gup_flags | FOLL_LONGTERM, &umem->pgs[0], NULL);
 258         up_read(&current->mm->mmap_sem);
 259
 260         if (npgs != umem->npgs) {
 261                 if (npgs >= 0) {
 262                         umem->npgs = npgs;
 263                         err = -ENOMEM;
 264                         goto out_pin;
 265                 }
 266                 err = npgs;
 267                 goto out_pgs;
 268         }
 269         return 0;
 270
 271 out_pin:
 272         xdp_umem_unpin_pages(umem);
 273 out_pgs:
 274         kfree(umem->pgs);
 275         umem->pgs = NULL;
 276         return err;
 277 }
 278
 279 static int xdp_umem_account_pages(struct xdp_umem *umem)
 280 {
 281         unsigned long lock_limit, new_npgs, old_npgs;
 282
 283         if (capable(CAP_IPC_LOCK))
 284                 return 0;
 285
 286         lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
 287         umem->user = get_uid(current_user());
 288
 289         do {
 290                 old_npgs = atomic_long_read(&umem->user->locked_vm);
 291                 new_npgs = old_npgs + umem->npgs;
 292                 if (new_npgs > lock_limit) {
 293                         free_uid(umem->user);
 294                         umem->user = NULL;
 295                         return -ENOBUFS;
 296                 }
 297         } while (atomic_long_cmpxchg(&umem->user->locked_vm, old_npgs,
 298                                      new_npgs) != old_npgs);
 299         return 0;
 300 }
 301
 302 static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
 303 {
 304         u32 chunk_size = mr->chunk_size, headroom = mr->headroom;
 305         unsigned int chunks, chunks_per_page;
 306         u64 addr = mr->addr, size = mr->len;
 307         int size_chk, err, i;
 308
 309         if (chunk_size < XDP_UMEM_MIN_CHUNK_SIZE || chunk_size > PAGE_SIZE) {
 310                 /* Strictly speaking we could support this, if:
 311                  * - huge pages, or*
 312                  * - using an IOMMU, or
 313                  * - making sure the memory area is consecutive
 314                  * but for now, we simply say "computer says no".
 315                  */
 316                 return -EINVAL;
 317         }
 318
 319         if (!is_power_of_2(chunk_size))
 320                 return -EINVAL;
 321
 322         if (!PAGE_ALIGNED(addr)) {
 323                 /* Memory area has to be page size aligned. For
 324                  * simplicity, this might change.
 325                  */
 326                 return -EINVAL;
 327         }
 328
 329         if ((addr + size) < addr)
 330                 return -EINVAL;
 331
 332         chunks = (unsigned int)div_u64(size, chunk_size);
 333         if (chunks == 0)
 334                 return -EINVAL;
 335
 336         chunks_per_page = PAGE_SIZE / chunk_size;
 337         if (chunks < chunks_per_page || chunks % chunks_per_page)
 338                 return -EINVAL;
 339
 340         headroom = ALIGN(headroom, 64);
 341
 342         size_chk = chunk_size - headroom - XDP_PACKET_HEADROOM;
 343         if (size_chk < 0)
 344                 return -EINVAL;
 345
 346         umem->address = (unsigned long)addr;
 347         umem->chunk_mask = ~((u64)chunk_size - 1);
 348         umem->size = size;
 349         umem->headroom = headroom;
 350         umem->chunk_size_nohr = chunk_size - headroom;
 351         umem->npgs = size / PAGE_SIZE;
 352         umem->pgs = NULL;
 353         umem->user = NULL;
 354         INIT_LIST_HEAD(&umem->xsk_list);
 355         spin_lock_init(&umem->xsk_list_lock);
 356
 357         refcount_set(&umem->users, 1);
 358
 359         err = xdp_umem_account_pages(umem);
 360         if (err)
 361                 return err;
 362
 363         err = xdp_umem_pin_pages(umem);
 364         if (err)
 365                 goto out_account;
 366
 367         umem->pages = kcalloc(umem->npgs, sizeof(*umem->pages), GFP_KERNEL);
 368         if (!umem->pages) {
 369                 err = -ENOMEM;
 370                 goto out_account;
 371         }
 372
 373         for (i = 0; i < umem->npgs; i++)
 374                 umem->pages[i].addr = page_address(umem->pgs[i]);
 375
 376         return 0;
 377
 378 out_account:
 379         xdp_umem_unaccount_pages(umem);
 380         return err;
 381 }
 382
 383 struct xdp_umem *xdp_umem_create(struct xdp_umem_reg *mr)
 384 {
 385         struct xdp_umem *umem;
 386         int err;
 387
 388         umem = kzalloc(sizeof(*umem), GFP_KERNEL);
 389         if (!umem)
 390                 return ERR_PTR(-ENOMEM);
 391
 392         err = ida_simple_get(&umem_ida, 0, 0, GFP_KERNEL);
 393         if (err < 0) {
 394                 kfree(umem);
 395                 return ERR_PTR(err);
 396         }
 397         umem->id = err;
 398
 399         err = xdp_umem_reg(umem, mr);
 400         if (err) {
 401                 ida_simple_remove(&umem_ida, umem->id);
 402                 kfree(umem);
 403                 return ERR_PTR(err);
 404         }
 405
 406         return umem;
 407 }
 408
 409 bool xdp_umem_validate_queues(struct xdp_umem *umem)
 410 {
 411         return umem->fq && umem->cq;
 412 }