It is recommended to set the soft limit always below the hard limit,
otherwise the hard limit will take precedence.
- 8. Move charges at task migration
- =================================
+.. _cgroup-v1-memory-move-charges:
+
+ 8. Move charges at task migration (DEPRECATED!)
+ ===============================================
+
+ THIS IS DEPRECATED!
+
+ It's expensive and unreliable! It's better practice to launch workload
+ tasks directly from inside their target cgroup. Use dedicated workload
+ cgroups to allow fine-grained policy adjustments without having to
+ move physical pages between control domains.
Users can move charges associated with a task along with task migration, that
is, uncharge task's pages from the old cgroup and charge them to the new cgroup.
- =============
-.. _numaperf:
-
+ =======================
+ NUMA Memory Performance
+ =======================
+
NUMA Locality
=============
--- /dev/null
- vma->vm_flags |= VM_PFNMAP | VM_DONTEXPAND;
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2020-2023 Intel Corporation
+ */
+
+#include <linux/dma-buf.h>
+#include <linux/highmem.h>
+#include <linux/module.h>
+#include <linux/set_memory.h>
+#include <linux/xarray.h>
+
+#include <drm/drm_cache.h>
+#include <drm/drm_debugfs.h>
+#include <drm/drm_file.h>
+#include <drm/drm_utils.h>
+
+#include "ivpu_drv.h"
+#include "ivpu_gem.h"
+#include "ivpu_hw.h"
+#include "ivpu_mmu.h"
+#include "ivpu_mmu_context.h"
+
+MODULE_IMPORT_NS(DMA_BUF);
+
+static const struct drm_gem_object_funcs ivpu_gem_funcs;
+
+static struct lock_class_key prime_bo_lock_class_key;
+
+static int __must_check prime_alloc_pages_locked(struct ivpu_bo *bo)
+{
+ /* Pages are managed by the underlying dma-buf */
+ return 0;
+}
+
+static void prime_free_pages_locked(struct ivpu_bo *bo)
+{
+ /* Pages are managed by the underlying dma-buf */
+}
+
+static int prime_map_pages_locked(struct ivpu_bo *bo)
+{
+ struct ivpu_device *vdev = ivpu_bo_to_vdev(bo);
+ struct sg_table *sgt;
+
+ sgt = dma_buf_map_attachment_unlocked(bo->base.import_attach, DMA_BIDIRECTIONAL);
+ if (IS_ERR(sgt)) {
+ ivpu_err(vdev, "Failed to map attachment: %ld\n", PTR_ERR(sgt));
+ return PTR_ERR(sgt);
+ }
+
+ bo->sgt = sgt;
+ return 0;
+}
+
+static void prime_unmap_pages_locked(struct ivpu_bo *bo)
+{
+ dma_buf_unmap_attachment_unlocked(bo->base.import_attach, bo->sgt, DMA_BIDIRECTIONAL);
+ bo->sgt = NULL;
+}
+
+static const struct ivpu_bo_ops prime_ops = {
+ .type = IVPU_BO_TYPE_PRIME,
+ .name = "prime",
+ .alloc_pages = prime_alloc_pages_locked,
+ .free_pages = prime_free_pages_locked,
+ .map_pages = prime_map_pages_locked,
+ .unmap_pages = prime_unmap_pages_locked,
+};
+
+static int __must_check shmem_alloc_pages_locked(struct ivpu_bo *bo)
+{
+ int npages = bo->base.size >> PAGE_SHIFT;
+ struct page **pages;
+
+ pages = drm_gem_get_pages(&bo->base);
+ if (IS_ERR(pages))
+ return PTR_ERR(pages);
+
+ if (bo->flags & DRM_IVPU_BO_WC)
+ set_pages_array_wc(pages, npages);
+ else if (bo->flags & DRM_IVPU_BO_UNCACHED)
+ set_pages_array_uc(pages, npages);
+
+ bo->pages = pages;
+ return 0;
+}
+
+static void shmem_free_pages_locked(struct ivpu_bo *bo)
+{
+ if (ivpu_bo_cache_mode(bo) != DRM_IVPU_BO_CACHED)
+ set_pages_array_wb(bo->pages, bo->base.size >> PAGE_SHIFT);
+
+ drm_gem_put_pages(&bo->base, bo->pages, true, false);
+ bo->pages = NULL;
+}
+
+static int ivpu_bo_map_pages_locked(struct ivpu_bo *bo)
+{
+ int npages = bo->base.size >> PAGE_SHIFT;
+ struct ivpu_device *vdev = ivpu_bo_to_vdev(bo);
+ struct sg_table *sgt;
+ int ret;
+
+ sgt = drm_prime_pages_to_sg(&vdev->drm, bo->pages, npages);
+ if (IS_ERR(sgt)) {
+ ivpu_err(vdev, "Failed to allocate sgtable\n");
+ return PTR_ERR(sgt);
+ }
+
+ ret = dma_map_sgtable(vdev->drm.dev, sgt, DMA_BIDIRECTIONAL, 0);
+ if (ret) {
+ ivpu_err(vdev, "Failed to map BO in IOMMU: %d\n", ret);
+ goto err_free_sgt;
+ }
+
+ bo->sgt = sgt;
+ return 0;
+
+err_free_sgt:
+ kfree(sgt);
+ return ret;
+}
+
+static void ivpu_bo_unmap_pages_locked(struct ivpu_bo *bo)
+{
+ struct ivpu_device *vdev = ivpu_bo_to_vdev(bo);
+
+ dma_unmap_sgtable(vdev->drm.dev, bo->sgt, DMA_BIDIRECTIONAL, 0);
+ sg_free_table(bo->sgt);
+ kfree(bo->sgt);
+ bo->sgt = NULL;
+}
+
+static const struct ivpu_bo_ops shmem_ops = {
+ .type = IVPU_BO_TYPE_SHMEM,
+ .name = "shmem",
+ .alloc_pages = shmem_alloc_pages_locked,
+ .free_pages = shmem_free_pages_locked,
+ .map_pages = ivpu_bo_map_pages_locked,
+ .unmap_pages = ivpu_bo_unmap_pages_locked,
+};
+
+static int __must_check internal_alloc_pages_locked(struct ivpu_bo *bo)
+{
+ unsigned int i, npages = bo->base.size >> PAGE_SHIFT;
+ struct page **pages;
+ int ret;
+
+ pages = kvmalloc_array(npages, sizeof(*bo->pages), GFP_KERNEL);
+ if (!pages)
+ return -ENOMEM;
+
+ for (i = 0; i < npages; i++) {
+ pages[i] = alloc_page(GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO);
+ if (!pages[i]) {
+ ret = -ENOMEM;
+ goto err_free_pages;
+ }
+ cond_resched();
+ }
+
+ bo->pages = pages;
+ return 0;
+
+err_free_pages:
+ while (i--)
+ put_page(pages[i]);
+ kvfree(pages);
+ return ret;
+}
+
+static void internal_free_pages_locked(struct ivpu_bo *bo)
+{
+ unsigned int i, npages = bo->base.size >> PAGE_SHIFT;
+
+ for (i = 0; i < npages; i++)
+ put_page(bo->pages[i]);
+
+ kvfree(bo->pages);
+ bo->pages = NULL;
+}
+
+static const struct ivpu_bo_ops internal_ops = {
+ .type = IVPU_BO_TYPE_INTERNAL,
+ .name = "internal",
+ .alloc_pages = internal_alloc_pages_locked,
+ .free_pages = internal_free_pages_locked,
+ .map_pages = ivpu_bo_map_pages_locked,
+ .unmap_pages = ivpu_bo_unmap_pages_locked,
+};
+
+static int __must_check ivpu_bo_alloc_and_map_pages_locked(struct ivpu_bo *bo)
+{
+ struct ivpu_device *vdev = ivpu_bo_to_vdev(bo);
+ int ret;
+
+ lockdep_assert_held(&bo->lock);
+ drm_WARN_ON(&vdev->drm, bo->sgt);
+
+ ret = bo->ops->alloc_pages(bo);
+ if (ret) {
+ ivpu_err(vdev, "Failed to allocate pages for BO: %d", ret);
+ return ret;
+ }
+
+ ret = bo->ops->map_pages(bo);
+ if (ret) {
+ ivpu_err(vdev, "Failed to map pages for BO: %d", ret);
+ goto err_free_pages;
+ }
+ return ret;
+
+err_free_pages:
+ bo->ops->free_pages(bo);
+ return ret;
+}
+
+static void ivpu_bo_unmap_and_free_pages(struct ivpu_bo *bo)
+{
+ mutex_lock(&bo->lock);
+
+ WARN_ON(!bo->sgt);
+ bo->ops->unmap_pages(bo);
+ WARN_ON(bo->sgt);
+ bo->ops->free_pages(bo);
+ WARN_ON(bo->pages);
+
+ mutex_unlock(&bo->lock);
+}
+
+/*
+ * ivpu_bo_pin() - pin the backing physical pages and map them to VPU.
+ *
+ * This function pins physical memory pages, then maps the physical pages
+ * to IOMMU address space and finally updates the VPU MMU page tables
+ * to allow the VPU to translate VPU address to IOMMU address.
+ */
+int __must_check ivpu_bo_pin(struct ivpu_bo *bo)
+{
+ struct ivpu_device *vdev = ivpu_bo_to_vdev(bo);
+ int ret = 0;
+
+ mutex_lock(&bo->lock);
+
+ if (!bo->vpu_addr) {
+ ivpu_err(vdev, "vpu_addr not set for BO ctx_id: %d handle: %d\n",
+ bo->ctx->id, bo->handle);
+ ret = -EINVAL;
+ goto unlock;
+ }
+
+ if (!bo->sgt) {
+ ret = ivpu_bo_alloc_and_map_pages_locked(bo);
+ if (ret)
+ goto unlock;
+ }
+
+ if (!bo->mmu_mapped) {
+ ret = ivpu_mmu_context_map_sgt(vdev, bo->ctx, bo->vpu_addr, bo->sgt,
+ ivpu_bo_is_snooped(bo));
+ if (ret) {
+ ivpu_err(vdev, "Failed to map BO in MMU: %d\n", ret);
+ goto unlock;
+ }
+ bo->mmu_mapped = true;
+ }
+
+unlock:
+ mutex_unlock(&bo->lock);
+
+ return ret;
+}
+
+static int
+ivpu_bo_alloc_vpu_addr(struct ivpu_bo *bo, struct ivpu_mmu_context *ctx,
+ const struct ivpu_addr_range *range)
+{
+ struct ivpu_device *vdev = ivpu_bo_to_vdev(bo);
+ int ret;
+
+ if (!range) {
+ if (bo->flags & DRM_IVPU_BO_HIGH_MEM)
+ range = &vdev->hw->ranges.user_high;
+ else
+ range = &vdev->hw->ranges.user_low;
+ }
+
+ mutex_lock(&ctx->lock);
+ ret = ivpu_mmu_context_insert_node_locked(ctx, range, bo->base.size, &bo->mm_node);
+ if (!ret) {
+ bo->ctx = ctx;
+ bo->vpu_addr = bo->mm_node.start;
+ list_add_tail(&bo->ctx_node, &ctx->bo_list);
+ }
+ mutex_unlock(&ctx->lock);
+
+ return ret;
+}
+
+static void ivpu_bo_free_vpu_addr(struct ivpu_bo *bo)
+{
+ struct ivpu_device *vdev = ivpu_bo_to_vdev(bo);
+ struct ivpu_mmu_context *ctx = bo->ctx;
+
+ ivpu_dbg(vdev, BO, "remove from ctx: ctx %d vpu_addr 0x%llx allocated %d mmu_mapped %d\n",
+ ctx->id, bo->vpu_addr, (bool)bo->sgt, bo->mmu_mapped);
+
+ mutex_lock(&bo->lock);
+
+ if (bo->mmu_mapped) {
+ drm_WARN_ON(&vdev->drm, !bo->sgt);
+ ivpu_mmu_context_unmap_sgt(vdev, ctx, bo->vpu_addr, bo->sgt);
+ bo->mmu_mapped = false;
+ }
+
+ mutex_lock(&ctx->lock);
+ list_del(&bo->ctx_node);
+ bo->vpu_addr = 0;
+ bo->ctx = NULL;
+ ivpu_mmu_context_remove_node_locked(ctx, &bo->mm_node);
+ mutex_unlock(&ctx->lock);
+
+ mutex_unlock(&bo->lock);
+}
+
+void ivpu_bo_remove_all_bos_from_context(struct ivpu_mmu_context *ctx)
+{
+ struct ivpu_bo *bo, *tmp;
+
+ list_for_each_entry_safe(bo, tmp, &ctx->bo_list, ctx_node)
+ ivpu_bo_free_vpu_addr(bo);
+}
+
+static struct ivpu_bo *
+ivpu_bo_alloc(struct ivpu_device *vdev, struct ivpu_mmu_context *mmu_context,
+ u64 size, u32 flags, const struct ivpu_bo_ops *ops,
+ const struct ivpu_addr_range *range, u64 user_ptr)
+{
+ struct ivpu_bo *bo;
+ int ret = 0;
+
+ if (drm_WARN_ON(&vdev->drm, size == 0 || !PAGE_ALIGNED(size)))
+ return ERR_PTR(-EINVAL);
+
+ switch (flags & DRM_IVPU_BO_CACHE_MASK) {
+ case DRM_IVPU_BO_CACHED:
+ case DRM_IVPU_BO_UNCACHED:
+ case DRM_IVPU_BO_WC:
+ break;
+ default:
+ return ERR_PTR(-EINVAL);
+ }
+
+ bo = kzalloc(sizeof(*bo), GFP_KERNEL);
+ if (!bo)
+ return ERR_PTR(-ENOMEM);
+
+ mutex_init(&bo->lock);
+ bo->base.funcs = &ivpu_gem_funcs;
+ bo->flags = flags;
+ bo->ops = ops;
+ bo->user_ptr = user_ptr;
+
+ if (ops->type == IVPU_BO_TYPE_SHMEM)
+ ret = drm_gem_object_init(&vdev->drm, &bo->base, size);
+ else
+ drm_gem_private_object_init(&vdev->drm, &bo->base, size);
+
+ if (ret) {
+ ivpu_err(vdev, "Failed to initialize drm object\n");
+ goto err_free;
+ }
+
+ if (flags & DRM_IVPU_BO_MAPPABLE) {
+ ret = drm_gem_create_mmap_offset(&bo->base);
+ if (ret) {
+ ivpu_err(vdev, "Failed to allocate mmap offset\n");
+ goto err_release;
+ }
+ }
+
+ if (mmu_context) {
+ ret = ivpu_bo_alloc_vpu_addr(bo, mmu_context, range);
+ if (ret) {
+ ivpu_err(vdev, "Failed to add BO to context: %d\n", ret);
+ goto err_release;
+ }
+ }
+
+ return bo;
+
+err_release:
+ drm_gem_object_release(&bo->base);
+err_free:
+ kfree(bo);
+ return ERR_PTR(ret);
+}
+
+static void ivpu_bo_free(struct drm_gem_object *obj)
+{
+ struct ivpu_bo *bo = to_ivpu_bo(obj);
+ struct ivpu_device *vdev = ivpu_bo_to_vdev(bo);
+
+ if (bo->ctx)
+ ivpu_dbg(vdev, BO, "free: ctx %d vpu_addr 0x%llx allocated %d mmu_mapped %d\n",
+ bo->ctx->id, bo->vpu_addr, (bool)bo->sgt, bo->mmu_mapped);
+ else
+ ivpu_dbg(vdev, BO, "free: ctx (released) allocated %d mmu_mapped %d\n",
+ (bool)bo->sgt, bo->mmu_mapped);
+
+ drm_WARN_ON(&vdev->drm, !dma_resv_test_signaled(obj->resv, DMA_RESV_USAGE_READ));
+
+ vunmap(bo->kvaddr);
+
+ if (bo->ctx)
+ ivpu_bo_free_vpu_addr(bo);
+
+ if (bo->sgt)
+ ivpu_bo_unmap_and_free_pages(bo);
+
+ if (bo->base.import_attach)
+ drm_prime_gem_destroy(&bo->base, bo->sgt);
+
+ drm_gem_object_release(&bo->base);
+
+ mutex_destroy(&bo->lock);
+ kfree(bo);
+}
+
+static int ivpu_bo_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma)
+{
+ struct ivpu_bo *bo = to_ivpu_bo(obj);
+ struct ivpu_device *vdev = ivpu_bo_to_vdev(bo);
+
+ ivpu_dbg(vdev, BO, "mmap: ctx %u handle %u vpu_addr 0x%llx size %zu type %s",
+ bo->ctx->id, bo->handle, bo->vpu_addr, bo->base.size, bo->ops->name);
+
+ if (obj->import_attach) {
+ /* Drop the reference drm_gem_mmap_obj() acquired.*/
+ drm_gem_object_put(obj);
+ vma->vm_private_data = NULL;
+ return dma_buf_mmap(obj->dma_buf, vma, 0);
+ }
+
++ vm_flags_set(vma, VM_PFNMAP | VM_DONTEXPAND);
+ vma->vm_page_prot = ivpu_bo_pgprot(bo, vm_get_page_prot(vma->vm_flags));
+
+ return 0;
+}
+
+static struct sg_table *ivpu_bo_get_sg_table(struct drm_gem_object *obj)
+{
+ struct ivpu_bo *bo = to_ivpu_bo(obj);
+ loff_t npages = obj->size >> PAGE_SHIFT;
+ int ret = 0;
+
+ mutex_lock(&bo->lock);
+
+ if (!bo->sgt)
+ ret = ivpu_bo_alloc_and_map_pages_locked(bo);
+
+ mutex_unlock(&bo->lock);
+
+ if (ret)
+ return ERR_PTR(ret);
+
+ return drm_prime_pages_to_sg(obj->dev, bo->pages, npages);
+}
+
+static vm_fault_t ivpu_vm_fault(struct vm_fault *vmf)
+{
+ struct vm_area_struct *vma = vmf->vma;
+ struct drm_gem_object *obj = vma->vm_private_data;
+ struct ivpu_bo *bo = to_ivpu_bo(obj);
+ loff_t npages = obj->size >> PAGE_SHIFT;
+ pgoff_t page_offset;
+ struct page *page;
+ vm_fault_t ret;
+ int err;
+
+ mutex_lock(&bo->lock);
+
+ if (!bo->sgt) {
+ err = ivpu_bo_alloc_and_map_pages_locked(bo);
+ if (err) {
+ ret = vmf_error(err);
+ goto unlock;
+ }
+ }
+
+ /* We don't use vmf->pgoff since that has the fake offset */
+ page_offset = (vmf->address - vma->vm_start) >> PAGE_SHIFT;
+ if (page_offset >= npages) {
+ ret = VM_FAULT_SIGBUS;
+ } else {
+ page = bo->pages[page_offset];
+ ret = vmf_insert_pfn(vma, vmf->address, page_to_pfn(page));
+ }
+
+unlock:
+ mutex_unlock(&bo->lock);
+
+ return ret;
+}
+
+static const struct vm_operations_struct ivpu_vm_ops = {
+ .fault = ivpu_vm_fault,
+ .open = drm_gem_vm_open,
+ .close = drm_gem_vm_close,
+};
+
+static const struct drm_gem_object_funcs ivpu_gem_funcs = {
+ .free = ivpu_bo_free,
+ .mmap = ivpu_bo_mmap,
+ .vm_ops = &ivpu_vm_ops,
+ .get_sg_table = ivpu_bo_get_sg_table,
+};
+
+int
+ivpu_bo_create_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
+{
+ struct ivpu_file_priv *file_priv = file->driver_priv;
+ struct ivpu_device *vdev = file_priv->vdev;
+ struct drm_ivpu_bo_create *args = data;
+ u64 size = PAGE_ALIGN(args->size);
+ struct ivpu_bo *bo;
+ int ret;
+
+ if (args->flags & ~DRM_IVPU_BO_FLAGS)
+ return -EINVAL;
+
+ if (size == 0)
+ return -EINVAL;
+
+ bo = ivpu_bo_alloc(vdev, &file_priv->ctx, size, args->flags, &shmem_ops, NULL, 0);
+ if (IS_ERR(bo)) {
+ ivpu_err(vdev, "Failed to create BO: %pe (ctx %u size %llu flags 0x%x)",
+ bo, file_priv->ctx.id, args->size, args->flags);
+ return PTR_ERR(bo);
+ }
+
+ ret = drm_gem_handle_create(file, &bo->base, &bo->handle);
+ if (!ret) {
+ args->vpu_addr = bo->vpu_addr;
+ args->handle = bo->handle;
+ }
+
+ drm_gem_object_put(&bo->base);
+
+ ivpu_dbg(vdev, BO, "alloc shmem: ctx %u vpu_addr 0x%llx size %zu flags 0x%x\n",
+ file_priv->ctx.id, bo->vpu_addr, bo->base.size, bo->flags);
+
+ return ret;
+}
+
+struct ivpu_bo *
+ivpu_bo_alloc_internal(struct ivpu_device *vdev, u64 vpu_addr, u64 size, u32 flags)
+{
+ const struct ivpu_addr_range *range;
+ struct ivpu_addr_range fixed_range;
+ struct ivpu_bo *bo;
+ pgprot_t prot;
+ int ret;
+
+ drm_WARN_ON(&vdev->drm, !PAGE_ALIGNED(vpu_addr));
+ drm_WARN_ON(&vdev->drm, !PAGE_ALIGNED(size));
+
+ if (vpu_addr) {
+ fixed_range.start = vpu_addr;
+ fixed_range.end = vpu_addr + size;
+ range = &fixed_range;
+ } else {
+ range = &vdev->hw->ranges.global_low;
+ }
+
+ bo = ivpu_bo_alloc(vdev, &vdev->gctx, size, flags, &internal_ops, range, 0);
+ if (IS_ERR(bo)) {
+ ivpu_err(vdev, "Failed to create BO: %pe (vpu_addr 0x%llx size %llu flags 0x%x)",
+ bo, vpu_addr, size, flags);
+ return NULL;
+ }
+
+ ret = ivpu_bo_pin(bo);
+ if (ret)
+ goto err_put;
+
+ if (ivpu_bo_cache_mode(bo) != DRM_IVPU_BO_CACHED)
+ drm_clflush_pages(bo->pages, bo->base.size >> PAGE_SHIFT);
+
+ prot = ivpu_bo_pgprot(bo, PAGE_KERNEL);
+ bo->kvaddr = vmap(bo->pages, bo->base.size >> PAGE_SHIFT, VM_MAP, prot);
+ if (!bo->kvaddr) {
+ ivpu_err(vdev, "Failed to map BO into kernel virtual memory\n");
+ goto err_put;
+ }
+
+ ivpu_dbg(vdev, BO, "alloc internal: ctx 0 vpu_addr 0x%llx size %zu flags 0x%x\n",
+ bo->vpu_addr, bo->base.size, flags);
+
+ return bo;
+
+err_put:
+ drm_gem_object_put(&bo->base);
+ return NULL;
+}
+
+void ivpu_bo_free_internal(struct ivpu_bo *bo)
+{
+ drm_gem_object_put(&bo->base);
+}
+
+struct drm_gem_object *ivpu_gem_prime_import(struct drm_device *dev, struct dma_buf *buf)
+{
+ struct ivpu_device *vdev = to_ivpu_device(dev);
+ struct dma_buf_attachment *attach;
+ struct ivpu_bo *bo;
+
+ attach = dma_buf_attach(buf, dev->dev);
+ if (IS_ERR(attach))
+ return ERR_CAST(attach);
+
+ get_dma_buf(buf);
+
+ bo = ivpu_bo_alloc(vdev, NULL, buf->size, DRM_IVPU_BO_MAPPABLE, &prime_ops, NULL, 0);
+ if (IS_ERR(bo)) {
+ ivpu_err(vdev, "Failed to import BO: %pe (size %lu)", bo, buf->size);
+ goto err_detach;
+ }
+
+ lockdep_set_class(&bo->lock, &prime_bo_lock_class_key);
+
+ bo->base.import_attach = attach;
+
+ return &bo->base;
+
+err_detach:
+ dma_buf_detach(buf, attach);
+ dma_buf_put(buf);
+ return ERR_CAST(bo);
+}
+
+int ivpu_bo_info_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
+{
+ struct ivpu_file_priv *file_priv = file->driver_priv;
+ struct ivpu_device *vdev = to_ivpu_device(dev);
+ struct drm_ivpu_bo_info *args = data;
+ struct drm_gem_object *obj;
+ struct ivpu_bo *bo;
+ int ret = 0;
+
+ obj = drm_gem_object_lookup(file, args->handle);
+ if (!obj)
+ return -ENOENT;
+
+ bo = to_ivpu_bo(obj);
+
+ mutex_lock(&bo->lock);
+
+ if (!bo->ctx) {
+ ret = ivpu_bo_alloc_vpu_addr(bo, &file_priv->ctx, NULL);
+ if (ret) {
+ ivpu_err(vdev, "Failed to allocate vpu_addr: %d\n", ret);
+ goto unlock;
+ }
+ }
+
+ args->flags = bo->flags;
+ args->mmap_offset = drm_vma_node_offset_addr(&obj->vma_node);
+ args->vpu_addr = bo->vpu_addr;
+ args->size = obj->size;
+unlock:
+ mutex_unlock(&bo->lock);
+ drm_gem_object_put(obj);
+ return ret;
+}
+
+int ivpu_bo_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
+{
+ struct drm_ivpu_bo_wait *args = data;
+ struct drm_gem_object *obj;
+ unsigned long timeout;
+ long ret;
+
+ timeout = drm_timeout_abs_to_jiffies(args->timeout_ns);
+
+ obj = drm_gem_object_lookup(file, args->handle);
+ if (!obj)
+ return -EINVAL;
+
+ ret = dma_resv_wait_timeout(obj->resv, DMA_RESV_USAGE_READ, true, timeout);
+ if (ret == 0) {
+ ret = -ETIMEDOUT;
+ } else if (ret > 0) {
+ ret = 0;
+ args->job_status = to_ivpu_bo(obj)->job_status;
+ }
+
+ drm_gem_object_put(obj);
+
+ return ret;
+}
+
+static void ivpu_bo_print_info(struct ivpu_bo *bo, struct drm_printer *p)
+{
+ unsigned long dma_refcount = 0;
+
+ if (bo->base.dma_buf && bo->base.dma_buf->file)
+ dma_refcount = atomic_long_read(&bo->base.dma_buf->file->f_count);
+
+ drm_printf(p, "%5u %6d %16llx %10lu %10u %12lu %14s\n",
+ bo->ctx->id, bo->handle, bo->vpu_addr, bo->base.size,
+ kref_read(&bo->base.refcount), dma_refcount, bo->ops->name);
+}
+
+void ivpu_bo_list(struct drm_device *dev, struct drm_printer *p)
+{
+ struct ivpu_device *vdev = to_ivpu_device(dev);
+ struct ivpu_file_priv *file_priv;
+ unsigned long ctx_id;
+ struct ivpu_bo *bo;
+
+ drm_printf(p, "%5s %6s %16s %10s %10s %12s %14s\n",
+ "ctx", "handle", "vpu_addr", "size", "refcount", "dma_refcount", "type");
+
+ mutex_lock(&vdev->gctx.lock);
+ list_for_each_entry(bo, &vdev->gctx.bo_list, ctx_node)
+ ivpu_bo_print_info(bo, p);
+ mutex_unlock(&vdev->gctx.lock);
+
+ xa_for_each(&vdev->context_xa, ctx_id, file_priv) {
+ file_priv = ivpu_file_priv_get_by_ctx_id(vdev, ctx_id);
+ if (!file_priv)
+ continue;
+
+ mutex_lock(&file_priv->ctx.lock);
+ list_for_each_entry(bo, &file_priv->ctx.bo_list, ctx_node)
+ ivpu_bo_print_info(bo, p);
+ mutex_unlock(&file_priv->ctx.lock);
+
+ ivpu_file_priv_put(&file_priv);
+ }
+}
+
+void ivpu_bo_list_print(struct drm_device *dev)
+{
+ struct drm_printer p = drm_info_printer(dev->dev);
+
+ ivpu_bo_list(dev, &p);
+}
/* Tell the block layer that this is not a rotational device */
blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue);
+ blk_queue_flag_set(QUEUE_FLAG_SYNCHRONOUS, disk->queue);
blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, disk->queue);
+ blk_queue_flag_set(QUEUE_FLAG_NOWAIT, disk->queue);
err = add_disk(disk);
if (err)
goto out_cleanup_disk;
*/
static void end_buffer_async_read_io(struct buffer_head *bh, int uptodate)
{
- struct inode *inode = bh->b_page->mapping->host;
- /* Decrypt if needed */
- if (uptodate &&
- fscrypt_inode_uses_fs_layer_crypto(bh->b_folio->mapping->host)) {
- struct decrypt_bh_ctx *ctx = kmalloc(sizeof(*ctx), GFP_ATOMIC);
++ struct inode *inode = bh->b_folio->mapping->host;
+ bool decrypt = fscrypt_inode_uses_fs_layer_crypto(inode);
+ bool verify = need_fsverity(bh);
+
+ /* Decrypt (with fscrypt) and/or verify (with fsverity) if needed. */
+ if (uptodate && (decrypt || verify)) {
+ struct postprocess_bh_ctx *ctx =
+ kmalloc(sizeof(*ctx), GFP_ATOMIC);
if (ctx) {
- INIT_WORK(&ctx->work, decrypt_bh);
ctx->bh = bh;
- fscrypt_enqueue_decrypt_work(&ctx->work);
+ if (decrypt) {
+ INIT_WORK(&ctx->work, decrypt_bh);
+ fscrypt_enqueue_decrypt_work(&ctx->work);
+ } else {
+ INIT_WORK(&ctx->work, verify_bh);
+ fsverity_enqueue_verify_work(&ctx->work);
+ }
return;
}
uptodate = 0;
return rc;
}
-static struct cifs_writedata *
-wdata_alloc_and_fillpages(pgoff_t tofind, struct address_space *mapping,
- pgoff_t end, pgoff_t *index,
- unsigned int *found_pages)
+/*
+ * Extend the region to be written back to include subsequent contiguously
+ * dirty pages if possible, but don't sleep while doing so.
+ */
+static void cifs_extend_writeback(struct address_space *mapping,
+ long *_count,
+ loff_t start,
+ int max_pages,
+ size_t max_len,
+ unsigned int *_len)
{
- struct cifs_writedata *wdata;
- struct folio_batch fbatch;
- unsigned int i, idx, p, nr;
- wdata = cifs_writedata_alloc((unsigned int)tofind,
- cifs_writev_complete);
- if (!wdata)
- return NULL;
-
- folio_batch_init(&fbatch);
- *found_pages = 0;
-
-again:
- nr = filemap_get_folios_tag(mapping, index, end,
- PAGECACHE_TAG_DIRTY, &fbatch);
- if (!nr)
- goto out; /* No dirty pages left in the range */
-
- for (i = 0; i < nr; i++) {
- struct folio *folio = fbatch.folios[i];
-
- idx = 0;
- p = folio_nr_pages(folio);
-add_more:
- wdata->pages[*found_pages] = folio_page(folio, idx);
- folio_get(folio);
- if (++*found_pages == tofind) {
- folio_batch_release(&fbatch);
- goto out;
- }
- if (++idx < p)
- goto add_more;
- }
- folio_batch_release(&fbatch);
- goto again;
-out:
- return wdata;
-}
+ struct folio_batch batch;
+ struct folio *folio;
+ unsigned int psize, nr_pages;
+ size_t len = *_len;
+ pgoff_t index = (start + len) / PAGE_SIZE;
+ bool stop = true;
+ unsigned int i;
+ XA_STATE(xas, &mapping->i_pages, index);
-static unsigned int
-wdata_prepare_pages(struct cifs_writedata *wdata, unsigned int found_pages,
- struct address_space *mapping,
- struct writeback_control *wbc,
- pgoff_t end, pgoff_t *index, pgoff_t *next, bool *done)
-{
- unsigned int nr_pages = 0, i;
- struct page *page;
+ folio_batch_init(&batch);
- for (i = 0; i < found_pages; i++) {
- page = wdata->pages[i];
- /*
- * At this point we hold neither the i_pages lock nor the
- * page lock: the page may be truncated or invalidated
- * (changing page->mapping to NULL), or even swizzled
- * back from swapper_space to tmpfs file mapping
+ do {
+ /* Firstly, we gather up a batch of contiguous dirty pages
+ * under the RCU read lock - but we can't clear the dirty flags
+ * there if any of those pages are mapped.
*/
+ rcu_read_lock();
- if (nr_pages == 0)
- lock_page(page);
- else if (!trylock_page(page))
- break;
-
- if (unlikely(page->mapping != mapping)) {
- unlock_page(page);
- break;
- }
+ xas_for_each(&xas, folio, ULONG_MAX) {
+ stop = true;
+ if (xas_retry(&xas, folio))
+ continue;
+ if (xa_is_value(folio))
+ break;
+ if (folio_index(folio) != index)
+ break;
+ if (!folio_try_get_rcu(folio)) {
+ xas_reset(&xas);
+ continue;
+ }
+ nr_pages = folio_nr_pages(folio);
+ if (nr_pages > max_pages)
+ break;
- if (!wbc->range_cyclic && page->index > end) {
- *done = true;
- unlock_page(page);
- break;
- }
+ /* Has the page moved or been split? */
+ if (unlikely(folio != xas_reload(&xas))) {
+ folio_put(folio);
+ break;
+ }
- if (*next && (page->index != *next)) {
- /* Not next consecutive page */
- unlock_page(page);
- break;
- }
+ if (!folio_trylock(folio)) {
+ folio_put(folio);
+ break;
+ }
+ if (!folio_test_dirty(folio) || folio_test_writeback(folio)) {
+ folio_unlock(folio);
+ folio_put(folio);
+ break;
+ }
- if (wbc->sync_mode != WB_SYNC_NONE)
- wait_on_page_writeback(page);
+ max_pages -= nr_pages;
+ psize = folio_size(folio);
+ len += psize;
+ stop = false;
+ if (max_pages <= 0 || len >= max_len || *_count <= 0)
+ stop = true;
- if (PageWriteback(page) ||
- !clear_page_dirty_for_io(page)) {
- unlock_page(page);
- break;
+ index += nr_pages;
+ if (!folio_batch_add(&batch, folio))
+ break;
+ if (stop)
+ break;
}
- /*
- * This actually clears the dirty bit in the radix tree.
- * See cifs_writepage() for more commentary.
+ if (!stop)
+ xas_pause(&xas);
+ rcu_read_unlock();
+
+ /* Now, if we obtained any pages, we can shift them to being
+ * writable and mark them for caching.
*/
- set_page_writeback(page);
- if (page_offset(page) >= i_size_read(mapping->host)) {
- *done = true;
- unlock_page(page);
- end_page_writeback(page);
+ if (!folio_batch_count(&batch))
break;
- }
- wdata->pages[i] = page;
- *next = page->index + 1;
- ++nr_pages;
- }
+ for (i = 0; i < folio_batch_count(&batch); i++) {
+ folio = batch.folios[i];
+ /* The folio should be locked, dirty and not undergoing
+ * writeback from the loop above.
+ */
+ if (!folio_clear_dirty_for_io(folio))
+ WARN_ON(1);
+ if (folio_start_writeback(folio))
+ WARN_ON(1);
- /* reset index to refind any pages skipped */
- if (nr_pages == 0)
- *index = wdata->pages[0]->index + 1;
+ *_count -= folio_nr_pages(folio);
+ folio_unlock(folio);
+ }
- /* put any pages we aren't going to use */
- for (i = nr_pages; i < found_pages; i++) {
- put_page(wdata->pages[i]);
- wdata->pages[i] = NULL;
- }
+ folio_batch_release(&batch);
+ cond_resched();
+ } while (!stop);
- return nr_pages;
+ *_len = len;
}
-static int
-wdata_send_pages(struct cifs_writedata *wdata, unsigned int nr_pages,
- struct address_space *mapping, struct writeback_control *wbc)
+/*
+ * Write back the locked page and any subsequent non-locked dirty pages.
+ */
+static ssize_t cifs_write_back_from_locked_folio(struct address_space *mapping,
+ struct writeback_control *wbc,
+ struct folio *folio,
+ loff_t start, loff_t end)
{
+ struct inode *inode = mapping->host;
+ struct TCP_Server_Info *server;
+ struct cifs_writedata *wdata;
+ struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
+ struct cifs_credits credits_on_stack;
+ struct cifs_credits *credits = &credits_on_stack;
+ struct cifsFileInfo *cfile = NULL;
+ unsigned int xid, wsize, len;
+ loff_t i_size = i_size_read(inode);
+ size_t max_len;
+ long count = wbc->nr_to_write;
int rc;
- wdata->sync_mode = wbc->sync_mode;
- wdata->nr_pages = nr_pages;
- wdata->offset = page_offset(wdata->pages[0]);
- wdata->pagesz = PAGE_SIZE;
- wdata->tailsz = min(i_size_read(mapping->host) -
- page_offset(wdata->pages[nr_pages - 1]),
- (loff_t)PAGE_SIZE);
- wdata->bytes = ((nr_pages - 1) * PAGE_SIZE) + wdata->tailsz;
- wdata->pid = wdata->cfile->pid;
-
- rc = adjust_credits(wdata->server, &wdata->credits, wdata->bytes);
- if (rc)
- return rc;
-
- if (wdata->cfile->invalidHandle)
- rc = -EAGAIN;
- else
- rc = wdata->server->ops->async_writev(wdata,
- cifs_writedata_release);
+ /* The folio should be locked, dirty and not undergoing writeback. */
+ if (folio_start_writeback(folio))
+ WARN_ON(1);
- return rc;
-}
+ count -= folio_nr_pages(folio);
+ len = folio_size(folio);
-static int
-cifs_writepage_locked(struct page *page, struct writeback_control *wbc);
+ xid = get_xid();
+ server = cifs_pick_channel(cifs_sb_master_tcon(cifs_sb)->ses);
-static int cifs_write_one_page(struct folio *folio,
- struct writeback_control *wbc, void *data)
-{
- struct address_space *mapping = data;
- int ret;
+ rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY, &cfile);
+ if (rc) {
+ cifs_dbg(VFS, "No writable handle in writepages rc=%d\n", rc);
+ goto err_xid;
+ }
- ret = cifs_writepage_locked(&folio->page, wbc);
- folio_unlock(folio);
- mapping_set_error(mapping, ret);
- return ret;
-}
+ rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize,
+ &wsize, credits);
+ if (rc != 0)
+ goto err_close;
-static int cifs_writepages(struct address_space *mapping,
- struct writeback_control *wbc)
-{
- struct inode *inode = mapping->host;
- struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
- struct TCP_Server_Info *server;
- bool done = false, scanned = false, range_whole = false;
- pgoff_t end, index;
- struct cifs_writedata *wdata;
- struct cifsFileInfo *cfile = NULL;
- int rc = 0;
- int saved_rc = 0;
- unsigned int xid;
+ wdata = cifs_writedata_alloc(cifs_writev_complete);
+ if (!wdata) {
+ rc = -ENOMEM;
+ goto err_uncredit;
+ }
- /*
- * If wsize is smaller than the page cache size, default to writing
- * one page at a time.
+ wdata->sync_mode = wbc->sync_mode;
+ wdata->offset = folio_pos(folio);
+ wdata->pid = cfile->pid;
+ wdata->credits = credits_on_stack;
+ wdata->cfile = cfile;
+ wdata->server = server;
+ cfile = NULL;
+
+ /* Find all consecutive lockable dirty pages, stopping when we find a
+ * page that is not immediately lockable, is not dirty or is missing,
+ * or we reach the end of the range.
*/
- if (cifs_sb->ctx->wsize < PAGE_SIZE)
- return write_cache_pages(mapping, wbc, cifs_write_one_page,
- mapping);
+ if (start < i_size) {
+ /* Trim the write to the EOF; the extra data is ignored. Also
+ * put an upper limit on the size of a single storedata op.
+ */
+ max_len = wsize;
+ max_len = min_t(unsigned long long, max_len, end - start + 1);
+ max_len = min_t(unsigned long long, max_len, i_size - start);
- xid = get_xid();
- if (wbc->range_cyclic) {
- index = mapping->writeback_index; /* Start from prev offset */
- end = -1;
- } else {
- index = wbc->range_start >> PAGE_SHIFT;
- end = wbc->range_end >> PAGE_SHIFT;
- if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
- range_whole = true;
- scanned = true;
+ if (len < max_len) {
+ int max_pages = INT_MAX;
+
+#ifdef CONFIG_CIFS_SMB_DIRECT
+ if (server->smbd_conn)
+ max_pages = server->smbd_conn->max_frmr_depth;
+#endif
+ max_pages -= folio_nr_pages(folio);
+
+ if (max_pages > 0)
+ cifs_extend_writeback(mapping, &count, start,
+ max_pages, max_len, &len);
+ }
+ len = min_t(loff_t, len, max_len);
}
- server = cifs_pick_channel(cifs_sb_master_tcon(cifs_sb)->ses);
-retry:
- while (!done && index <= end) {
- unsigned int i, nr_pages, found_pages, wsize;
- pgoff_t next = 0, tofind, saved_index = index;
- struct cifs_credits credits_on_stack;
- struct cifs_credits *credits = &credits_on_stack;
- int get_file_rc = 0;
+ wdata->bytes = len;
- if (cfile)
- cifsFileInfo_put(cfile);
+ /* We now have a contiguous set of dirty pages, each with writeback
+ * set; the first page is still locked at this point, but all the rest
+ * have been unlocked.
+ */
+ folio_unlock(folio);
- rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY, &cfile);
+ if (start < i_size) {
+ iov_iter_xarray(&wdata->iter, ITER_SOURCE, &mapping->i_pages,
+ start, len);
- /* in case of an error store it to return later */
+ rc = adjust_credits(wdata->server, &wdata->credits, wdata->bytes);
if (rc)
- get_file_rc = rc;
+ goto err_wdata;
- rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize,
- &wsize, credits);
- if (rc != 0) {
- done = true;
- break;
+ if (wdata->cfile->invalidHandle)
+ rc = -EAGAIN;
+ else
+ rc = wdata->server->ops->async_writev(wdata,
+ cifs_writedata_release);
+ if (rc >= 0) {
+ kref_put(&wdata->refcount, cifs_writedata_release);
+ goto err_close;
}
+ } else {
+ /* The dirty region was entirely beyond the EOF. */
+ cifs_pages_written_back(inode, start, len);
+ rc = 0;
+ }
- tofind = min((wsize / PAGE_SIZE) - 1, end - index) + 1;
+err_wdata:
+ kref_put(&wdata->refcount, cifs_writedata_release);
+err_uncredit:
+ add_credits_and_wake_if(server, credits, 0);
+err_close:
+ if (cfile)
+ cifsFileInfo_put(cfile);
+err_xid:
+ free_xid(xid);
+ if (rc == 0) {
+ wbc->nr_to_write = count;
+ } else if (is_retryable_error(rc)) {
+ cifs_pages_write_redirty(inode, start, len);
+ } else {
+ cifs_pages_write_failed(inode, start, len);
+ mapping_set_error(mapping, rc);
+ }
+ /* Indication to update ctime and mtime as close is deferred */
+ set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags);
+ return rc;
+}
- wdata = wdata_alloc_and_fillpages(tofind, mapping, end, &index,
- &found_pages);
- if (!wdata) {
- rc = -ENOMEM;
- done = true;
- add_credits_and_wake_if(server, credits, 0);
- break;
- }
+/*
+ * write a region of pages back to the server
+ */
+static int cifs_writepages_region(struct address_space *mapping,
+ struct writeback_control *wbc,
+ loff_t start, loff_t end, loff_t *_next)
+{
- struct folio *folio;
- struct page *head_page;
- ssize_t ret;
- int n, skips = 0;
++ struct folio_batch fbatch;
++ int skips = 0;
- if (found_pages == 0) {
- kref_put(&wdata->refcount, cifs_writedata_release);
- add_credits_and_wake_if(server, credits, 0);
++ folio_batch_init(&fbatch);
+ do {
++ int nr;
+ pgoff_t index = start / PAGE_SIZE;
+
- n = find_get_pages_range_tag(mapping, &index, end / PAGE_SIZE,
- PAGECACHE_TAG_DIRTY, 1, &head_page);
- if (!n)
++ nr = filemap_get_folios_tag(mapping, &index, end / PAGE_SIZE,
++ PAGECACHE_TAG_DIRTY, &fbatch);
++ if (!nr)
break;
- }
- folio = page_folio(head_page);
- start = folio_pos(folio); /* May regress with THPs */
- nr_pages = wdata_prepare_pages(wdata, found_pages, mapping, wbc,
- end, &index, &next, &done);
++ for (int i = 0; i < nr; i++) {
++ ssize_t ret;
++ struct folio *folio = fbatch.folios[i];
- /* At this point we hold neither the i_pages lock nor the
- * page lock: the page may be truncated or invalidated
- * (changing page->mapping to NULL), or even swizzled
- * back from swapper_space to tmpfs file mapping
- */
- if (wbc->sync_mode != WB_SYNC_NONE) {
- ret = folio_lock_killable(folio);
- if (ret < 0) {
- folio_put(folio);
- return ret;
- /* nothing to write? */
- if (nr_pages == 0) {
- kref_put(&wdata->refcount, cifs_writedata_release);
- add_credits_and_wake_if(server, credits, 0);
- continue;
- }
++redo_folio:
++ start = folio_pos(folio); /* May regress with THPs */
+
- wdata->credits = credits_on_stack;
- wdata->cfile = cfile;
- wdata->server = server;
- cfile = NULL;
++ /* At this point we hold neither the i_pages lock nor the
++ * page lock: the page may be truncated or invalidated
++ * (changing page->mapping to NULL), or even swizzled
++ * back from swapper_space to tmpfs file mapping
++ */
++ if (wbc->sync_mode != WB_SYNC_NONE) {
++ ret = folio_lock_killable(folio);
++ if (ret < 0)
++ goto write_error;
++ } else {
++ if (!folio_trylock(folio))
++ goto skip_write;
+ }
- } else {
- if (!folio_trylock(folio)) {
- folio_put(folio);
- return 0;
+
- if (!wdata->cfile) {
- cifs_dbg(VFS, "No writable handle in writepages rc=%d\n",
- get_file_rc);
- if (is_retryable_error(get_file_rc))
- rc = get_file_rc;
- else
- rc = -EBADF;
- } else
- rc = wdata_send_pages(wdata, nr_pages, mapping, wbc);
++ if (folio_mapping(folio) != mapping ||
++ !folio_test_dirty(folio)) {
++ folio_unlock(folio);
++ goto skip_write;
+ }
- }
- if (folio_mapping(folio) != mapping ||
- !folio_test_dirty(folio)) {
- start += folio_size(folio);
- folio_unlock(folio);
- folio_put(folio);
- continue;
- }
- for (i = 0; i < nr_pages; ++i)
- unlock_page(wdata->pages[i]);
++ if (folio_test_writeback(folio) ||
++ folio_test_fscache(folio)) {
++ folio_unlock(folio);
++ if (wbc->sync_mode == WB_SYNC_NONE)
++ goto skip_write;
- if (folio_test_writeback(folio) ||
- folio_test_fscache(folio)) {
- folio_unlock(folio);
- if (wbc->sync_mode != WB_SYNC_NONE) {
- /* send failure -- clean up the mess */
- if (rc != 0) {
- add_credits_and_wake_if(server, &wdata->credits, 0);
- for (i = 0; i < nr_pages; ++i) {
- if (is_retryable_error(rc))
- redirty_page_for_writepage(wbc,
- wdata->pages[i]);
- else
- SetPageError(wdata->pages[i]);
- end_page_writeback(wdata->pages[i]);
- put_page(wdata->pages[i]);
+ folio_wait_writeback(folio);
+#ifdef CONFIG_CIFS_FSCACHE
+ folio_wait_fscache(folio);
+#endif
- } else {
- start += folio_size(folio);
++ goto redo_folio;
}
- folio_put(folio);
- if (wbc->sync_mode == WB_SYNC_NONE) {
- if (skips >= 5 || need_resched())
- break;
- skips++;
- }
- continue;
- if (!is_retryable_error(rc))
- mapping_set_error(mapping, rc);
-- }
- kref_put(&wdata->refcount, cifs_writedata_release);
- if (!folio_clear_dirty_for_io(folio))
- /* We hold the page lock - it should've been dirty. */
- WARN_ON(1);
- if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN) {
- index = saved_index;
- continue;
- }
++ if (!folio_clear_dirty_for_io(folio))
++ /* We hold the page lock - it should've been dirty. */
++ WARN_ON(1);
- ret = cifs_write_back_from_locked_folio(mapping, wbc, folio, start, end);
- folio_put(folio);
- if (ret < 0)
- /* Return immediately if we received a signal during writing */
- if (is_interrupt_error(rc)) {
- done = true;
- break;
- }
++ ret = cifs_write_back_from_locked_folio(mapping, wbc, folio, start, end);
++ if (ret < 0)
++ goto write_error;
+
- if (rc != 0 && saved_rc == 0)
- saved_rc = rc;
++ start += ret;
++ continue;
+
- wbc->nr_to_write -= nr_pages;
- if (wbc->nr_to_write <= 0)
- done = true;
++write_error:
++ folio_batch_release(&fbatch);
++ *_next = start;
+ return ret;
- start += ret;
- index = next;
- }
++skip_write:
++ /*
++ * Too many skipped writes, or need to reschedule?
++ * Treat it as a write error without an error code.
++ */
++ if (skips >= 5 || need_resched()) {
++ ret = 0;
++ goto write_error;
++ }
+
- if (!scanned && !done) {
- /*
- * We hit the last page and there is more work to be done: wrap
- * back to the start of the file
- */
- scanned = true;
- index = 0;
- goto retry;
- }
++ /* Otherwise, just skip that folio and go on to the next */
++ skips++;
++ start += folio_size(folio);
++ continue;
++ }
+
- if (saved_rc != 0)
- rc = saved_rc;
++ folio_batch_release(&fbatch);
+ cond_resched();
+ } while (wbc->nr_to_write > 0);
- if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
- mapping->writeback_index = index;
+ *_next = start;
+ return 0;
+}
- if (cfile)
- cifsFileInfo_put(cfile);
- free_xid(xid);
- /* Indication to update ctime and mtime as close is deferred */
- set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags);
- return rc;
+/*
+ * Write some of the pending data back to the server
+ */
+static int cifs_writepages(struct address_space *mapping,
+ struct writeback_control *wbc)
+{
+ loff_t start, next;
+ int ret;
+
+ /* We have to be careful as we can end up racing with setattr()
+ * truncating the pagecache since the caller doesn't take a lock here
+ * to prevent it.
+ */
+
+ if (wbc->range_cyclic) {
+ start = mapping->writeback_index * PAGE_SIZE;
+ ret = cifs_writepages_region(mapping, wbc, start, LLONG_MAX, &next);
+ if (ret == 0) {
+ mapping->writeback_index = next / PAGE_SIZE;
+ if (start > 0 && wbc->nr_to_write > 0) {
+ ret = cifs_writepages_region(mapping, wbc, 0,
+ start, &next);
+ if (ret == 0)
+ mapping->writeback_index =
+ next / PAGE_SIZE;
+ }
+ }
+ } else if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) {
+ ret = cifs_writepages_region(mapping, wbc, 0, LLONG_MAX, &next);
+ if (wbc->nr_to_write > 0 && ret == 0)
+ mapping->writeback_index = next / PAGE_SIZE;
+ } else {
+ ret = cifs_writepages_region(mapping, wbc,
+ wbc->range_start, wbc->range_end, &next);
+ }
+
+ return ret;
}
static int
brelse(bd->bd_bh);
}
- static int __gfs2_writepage(struct page *page, struct writeback_control *wbc,
++static int __gfs2_writepage(struct folio *folio, struct writeback_control *wbc,
+ void *data)
+{
+ struct address_space *mapping = data;
- int ret = mapping->a_ops->writepage(page, wbc);
++ int ret = mapping->a_ops->writepage(&folio->page, wbc);
+ mapping_set_error(mapping, ret);
+ return ret;
+}
+
/**
* gfs2_ail1_start_one - Start I/O on a transaction
* @sdp: The superblock
return ret;
}
- static int nfs_writepages_callback(struct page *page,
+ static int nfs_writepages_callback(struct folio *folio,
- struct writeback_control *wbc, void *data)
+ struct writeback_control *wbc, void *data)
{
- struct folio *folio = page_folio(page);
int ret;
- ret = nfs_do_writepage(&folio->page, wbc, data);
+ ret = nfs_do_writepage(folio, wbc, data);
if (ret != AOP_WRITEPAGE_ACTIVATE)
- unlock_page(page);
+ folio_unlock(folio);
return ret;
}
}
}
- static int udf_adinicb_writepage(struct page *page,
++static int udf_adinicb_writepage(struct folio *folio,
+ struct writeback_control *wbc, void *data)
+{
++ struct page *page = &folio->page;
+ struct inode *inode = page->mapping->host;
+ struct udf_inode_info *iinfo = UDF_I(inode);
+
+ BUG_ON(!PageLocked(page));
+ memcpy_to_page(page, 0, iinfo->i_data + iinfo->i_lenEAttr,
+ i_size_read(inode));
+ unlock_page(page);
+ mark_inode_dirty(inode);
+
+ return 0;
+}
+
static int udf_writepages(struct address_space *mapping,
- struct writeback_control *wbc)
+ struct writeback_control *wbc)
{
- return mpage_writepages(mapping, wbc, udf_get_block);
+ struct inode *inode = mapping->host;
+ struct udf_inode_info *iinfo = UDF_I(inode);
+
+ if (iinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB)
+ return mpage_writepages(mapping, wbc, udf_get_block_wb);
+ return write_cache_pages(mapping, wbc, udf_adinicb_writepage, NULL);
+}
+
+static void udf_adinicb_readpage(struct page *page)
+{
+ struct inode *inode = page->mapping->host;
+ char *kaddr;
+ struct udf_inode_info *iinfo = UDF_I(inode);
+ loff_t isize = i_size_read(inode);
+
+ kaddr = kmap_local_page(page);
+ memcpy(kaddr, iinfo->i_data + iinfo->i_lenEAttr, isize);
+ memset(kaddr + isize, 0, PAGE_SIZE - isize);
+ flush_dcache_page(page);
+ SetPageUptodate(page);
+ kunmap_local(kaddr);
}
static int udf_read_folio(struct file *file, struct folio *folio)
int obj_cgroup_charge(struct obj_cgroup *objcg, gfp_t gfp, size_t size);
void obj_cgroup_uncharge(struct obj_cgroup *objcg, size_t size);
- extern struct static_key_false memcg_kmem_enabled_key;
+extern struct static_key_false memcg_bpf_enabled_key;
+static inline bool memcg_bpf_enabled(void)
+{
+ return static_branch_likely(&memcg_bpf_enabled_key);
+}
+
+ extern struct static_key_false memcg_kmem_online_key;
- static inline bool memcg_kmem_enabled(void)
+ static inline bool memcg_kmem_online(void)
{
- return static_branch_likely(&memcg_kmem_enabled_key);
+ return static_branch_likely(&memcg_kmem_online_key);
}
static inline int memcg_kmem_charge_page(struct page *page, gfp_t gfp,
return NULL;
}
- static inline bool memcg_kmem_enabled(void)
+static inline bool memcg_bpf_enabled(void)
+{
+ return false;
+}
+
+ static inline bool memcg_kmem_online(void)
{
return false;
}
* &struct mm_struct is freed.
*/
atomic_t mm_count;
-
+#ifdef CONFIG_SCHED_MM_CID
+ /**
+ * @cid_lock: Protect cid bitmap updates vs lookups.
+ *
+ * Prevent situations where updates to the cid bitmap happen
+ * concurrently with lookups. Those can lead to situations
+ * where a lookup cannot find a free bit simply because it was
+ * unlucky enough to load, non-atomically, bitmap words as they
+ * were being concurrently updated by the updaters.
+ */
+ raw_spinlock_t cid_lock;
+#endif
#ifdef CONFIG_MMU
- atomic_long_t pgtables_bytes; /* PTE page table pages */
+ atomic_long_t pgtables_bytes; /* size of all page tables */
#endif
int map_count; /* number of VMAs */
static inline void vma_iter_init(struct vma_iterator *vmi,
struct mm_struct *mm, unsigned long addr)
{
- vmi->mas.tree = &mm->mm_mt;
- vmi->mas.index = addr;
- vmi->mas.node = MAS_START;
+ mas_init(&vmi->mas, &mm->mm_mt, addr);
}
+#ifdef CONFIG_SCHED_MM_CID
+/* Accessor for struct mm_struct's cidmask. */
+static inline cpumask_t *mm_cidmask(struct mm_struct *mm)
+{
+ unsigned long cid_bitmap = (unsigned long)mm;
+
+ cid_bitmap += offsetof(struct mm_struct, cpu_bitmap);
+ /* Skip cpu_bitmap */
+ cid_bitmap += cpumask_size();
+ return (struct cpumask *)cid_bitmap;
+}
+
+static inline void mm_init_cid(struct mm_struct *mm)
+{
+ raw_spin_lock_init(&mm->cid_lock);
+ cpumask_clear(mm_cidmask(mm));
+}
+
+static inline unsigned int mm_cid_size(void)
+{
+ return cpumask_size();
+}
+#else /* CONFIG_SCHED_MM_CID */
+static inline void mm_init_cid(struct mm_struct *mm) { }
+static inline unsigned int mm_cid_size(void)
+{
+ return 0;
+}
+#endif /* CONFIG_SCHED_MM_CID */
+
struct mmu_gather;
extern void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm);
extern void tlb_gather_mmu_fullmm(struct mmu_gather *tlb, struct mm_struct *mm);
pmde = mk_huge_pmd(new, READ_ONCE(vma->vm_page_prot));
if (pmd_swp_soft_dirty(*pvmw->pmd))
pmde = pmd_mksoft_dirty(pmde);
- if (is_writable_migration_entry(entry))
- pmde = maybe_pmd_mkwrite(pmde, vma);
if (pmd_swp_uffd_wp(*pvmw->pmd))
- pmde = pmd_wrprotect(pmd_mkuffd_wp(pmde));
+ pmde = pmd_mkuffd_wp(pmde);
if (!is_migration_entry_young(entry))
pmde = pmd_mkold(pmde);
/* NOTE: this may contain setting soft-dirty on some archs */
* conditional to this static branch, we'll have to allow modules that does
* kmem_cache_alloc and the such to see this symbol as well
*/
- DEFINE_STATIC_KEY_FALSE(memcg_kmem_enabled_key);
- EXPORT_SYMBOL(memcg_kmem_enabled_key);
+ DEFINE_STATIC_KEY_FALSE(memcg_kmem_online_key);
+ EXPORT_SYMBOL(memcg_kmem_online_key);
+
+DEFINE_STATIC_KEY_FALSE(memcg_bpf_enabled_key);
+EXPORT_SYMBOL(memcg_bpf_enabled_key);
#endif
/**
stat->attributes_mask |= (STATX_ATTR_APPEND |
STATX_ATTR_IMMUTABLE |
STATX_ATTR_NODUMP);
- generic_fillattr(&init_user_ns, inode, stat);
+ generic_fillattr(idmap, inode, stat);
- if (shmem_is_huge(NULL, inode, 0, false))
+ if (shmem_is_huge(inode, 0, false, NULL, 0))
stat->blksize = HPAGE_PMD_SIZE;
if (request_mask & STATX_BTIME) {