Merge branch 'vfs.file'

author Christian Brauner <brauner@kernel.org>

Thu, 21 Dec 2023 12:21:52 +0000 (13:21 +0100)

committer Christian Brauner <brauner@kernel.org>

Thu, 21 Dec 2023 12:21:52 +0000 (13:21 +0100)
author Christian Brauner <brauner@kernel.org>
Thu, 21 Dec 2023 12:21:52 +0000 (13:21 +0100)
committer Christian Brauner <brauner@kernel.org>
Thu, 21 Dec 2023 12:21:52 +0000 (13:21 +0100)
diff --git a/MAINTAINERS b/MAINTAINERS

index 97f51d5ec1cfd715487a616c78afd40324082dfc..d60c4888e6df565c25b0e02d73399df8e6e8a832 100644 (file)
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -8157,6 +8157,7 @@ F:        include/trace/events/fs_dax.h
  FILESYSTEMS (VFS and infrastructure)
  M:     Alexander Viro <viro@zeniv.linux.org.uk>
  M:     Christian Brauner <brauner@kernel.org>
+R:     Jan Kara <jack@suse.cz>
  L:     linux-fsdevel@vger.kernel.org
  S:     Maintained
  F:     fs/*
@@ -8177,6 +8178,16 @@ F:       fs/exportfs/
  F:     fs/fhandle.c
  F:     include/linux/exportfs.h
  
+FILESYSTEMS [IDMAPPED MOUNTS]
+M:     Christian Brauner <brauner@kernel.org>
+M:     Seth Forshee <sforshee@kernel.org>
+L:     linux-fsdevel@vger.kernel.org
+S:     Maintained
+F:     Documentation/filesystems/idmappings.rst
+F:     fs/mnt_idmapping.c
+F:     include/linux/mnt_idmapping.*
+F:     tools/testing/selftests/mount_setattr/
+
  FILESYSTEMS [IOMAP]
  M:     Christian Brauner <brauner@kernel.org>
  R:     Darrick J. Wong <djwong@kernel.org>
@@ -10252,16 +10263,6 @@ S:     Maintained
  W:     https://github.com/o2genum/ideapad-slidebar
  F:     drivers/input/misc/ideapad_slidebar.c
  
-IDMAPPED MOUNTS
-M:     Christian Brauner <brauner@kernel.org>
-M:     Seth Forshee <sforshee@kernel.org>
-L:     linux-fsdevel@vger.kernel.org
-S:     Maintained
-T:     git git://git.kernel.org/pub/scm/linux/kernel/git/vfs/idmapping.git
-F:     Documentation/filesystems/idmappings.rst
-F:     include/linux/mnt_idmapping.*
-F:     tools/testing/selftests/mount_setattr/
-
  IDT VersaClock 5 CLOCK DRIVER
  M:     Luca Ceresoli <luca@lucaceresoli.net>
  S:     Maintained
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c

index 238afd7335e46d7fcce8d8ad096136cf35bf70b1..4943f6b2bbee491651bdacf288e4cdbda2e49dec 100644 (file)
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -2388,7 +2388,7 @@ static u16 kvm_hvcall_signal_event(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *h
         if (!eventfd)
                 return HV_STATUS_INVALID_PORT_ID;
  
-       eventfd_signal(eventfd, 1);
+       eventfd_signal(eventfd);
         return HV_STATUS_SUCCESS;
  }
  
diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c

index e53fad915a626486936971757ca3ef61f5216d2c..523bb6df5ac9858a052b7da5fbb568f0488436ec 100644 (file)
--- a/arch/x86/kvm/xen.c
+++ b/arch/x86/kvm/xen.c
@@ -2088,7 +2088,7 @@ static bool kvm_xen_hcall_evtchn_send(struct kvm_vcpu *vcpu, u64 param, u64 *r)
                 if (ret < 0 && ret != -ENOTCONN)
                         return false;
         } else {
-               eventfd_signal(evtchnfd->deliver.eventfd.ctx, 1);
+               eventfd_signal(evtchnfd->deliver.eventfd.ctx);
         }
  
         *r = 0;
diff --git a/drivers/accel/habanalabs/common/device.c b/drivers/accel/habanalabs/common/device.c

index 9711e8fc979d9ade05bdf96bcba300c4ec27432c..3a89644f087cb30c1040f2bfc6bd44f699a88bd6 100644 (file)
--- a/drivers/accel/habanalabs/common/device.c
+++ b/drivers/accel/habanalabs/common/device.c
@@ -2044,7 +2044,7 @@ static void hl_notifier_event_send(struct hl_notifier_event *notifier_event, u64
         notifier_event->events_mask |= event_mask;
  
         if (notifier_event->eventfd)
-               eventfd_signal(notifier_event->eventfd, 1);
+               eventfd_signal(notifier_event->eventfd);
  
         mutex_unlock(&notifier_event->lock);
  }
diff --git a/drivers/fpga/dfl.c b/drivers/fpga/dfl.c

index dd7a783d53b5f4d865251ec34a3c390770c9c0a2..e73f88050f08d9066990ed329cf4b163e7cc238f 100644 (file)
--- a/drivers/fpga/dfl.c
+++ b/drivers/fpga/dfl.c
@@ -1872,7 +1872,7 @@ static irqreturn_t dfl_irq_handler(int irq, void *arg)
  {
         struct eventfd_ctx *trigger = arg;
  
-       eventfd_signal(trigger, 1);
+       eventfd_signal(trigger);
         return IRQ_HANDLED;
  }
  
diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c

index 01da6789d0440940c7e754d16e6866746a5614ff..b9cc629821964f931566f627b647fa91a53ef61c 100644 (file)
--- a/drivers/gpu/drm/drm_syncobj.c
+++ b/drivers/gpu/drm/drm_syncobj.c
@@ -1365,7 +1365,7 @@ static void syncobj_eventfd_entry_fence_func(struct dma_fence *fence,
         struct syncobj_eventfd_entry *entry =
                 container_of(cb, struct syncobj_eventfd_entry, fence_cb);
  
-       eventfd_signal(entry->ev_fd_ctx, 1);
+       eventfd_signal(entry->ev_fd_ctx);
         syncobj_eventfd_entry_free(entry);
  }
  
@@ -1388,13 +1388,13 @@ syncobj_eventfd_entry_func(struct drm_syncobj *syncobj,
         entry->fence = fence;
  
         if (entry->flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE) {
-               eventfd_signal(entry->ev_fd_ctx, 1);
+               eventfd_signal(entry->ev_fd_ctx);
                 syncobj_eventfd_entry_free(entry);
         } else {
                 ret = dma_fence_add_callback(fence, &entry->fence_cb,
                                              syncobj_eventfd_entry_fence_func);
                 if (ret == -ENOENT) {
-                       eventfd_signal(entry->ev_fd_ctx, 1);
+                       eventfd_signal(entry->ev_fd_ctx);
                         syncobj_eventfd_entry_free(entry);
                 }
         }
diff --git a/drivers/gpu/drm/i915/gvt/interrupt.c b/drivers/gpu/drm/i915/gvt/interrupt.c

index de3f5903d1a7a2a1cb17d25353383fdb34334e6e..c8e7dfc9f79109b50179596a1b0b34cf7f11447a 100644 (file)
--- a/drivers/gpu/drm/i915/gvt/interrupt.c
+++ b/drivers/gpu/drm/i915/gvt/interrupt.c
@@ -422,7 +422,7 @@ static void init_irq_map(struct intel_gvt_irq *irq)
  #define MSI_CAP_DATA(offset) (offset + 8)
  #define MSI_CAP_EN 0x1
  
-static int inject_virtual_interrupt(struct intel_vgpu *vgpu)
+static void inject_virtual_interrupt(struct intel_vgpu *vgpu)
  {
         unsigned long offset = vgpu->gvt->device_info.msi_cap_offset;
         u16 control, data;
@@ -434,10 +434,10 @@ static int inject_virtual_interrupt(struct intel_vgpu *vgpu)
  
         /* Do not generate MSI if MSIEN is disabled */
         if (!(control & MSI_CAP_EN))
-               return 0;
+               return;
  
         if (WARN(control & GENMASK(15, 1), "only support one MSI format\n"))
-               return -EINVAL;
+               return;
  
         trace_inject_msi(vgpu->id, addr, data);
  
@@ -451,10 +451,9 @@ static int inject_virtual_interrupt(struct intel_vgpu *vgpu)
          * returned and don't inject interrupt into guest.
          */
         if (!test_bit(INTEL_VGPU_STATUS_ATTACHED, vgpu->status))
-               return -ESRCH;
-       if (vgpu->msi_trigger && eventfd_signal(vgpu->msi_trigger, 1) != 1)
-               return -EFAULT;
-       return 0;
+               return;
+       if (vgpu->msi_trigger)
+               eventfd_signal(vgpu->msi_trigger);
  }
  
  static void propagate_event(struct intel_gvt_irq *irq,
diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c

index 8ba53edf23119f8077ca816116fa1cabb40703e5..869369cb5b5fa4745aaca7bc5eb7032e684bb132 100644 (file)
--- a/drivers/infiniband/hw/mlx5/devx.c
+++ b/drivers/infiniband/hw/mlx5/devx.c
@@ -2498,7 +2498,7 @@ static void dispatch_event_fd(struct list_head *fd_list,
  
         list_for_each_entry_rcu(item, fd_list, xa_list) {
                 if (item->eventfd)
-                       eventfd_signal(item->eventfd, 1);
+                       eventfd_signal(item->eventfd);
                 else
                         deliver_event(item, data);
         }
diff --git a/drivers/misc/ocxl/file.c b/drivers/misc/ocxl/file.c

index ac69b7f361f5bb8347f4daffb29ed99d71fb0d4e..7eb74711ac9682b2d86172b573c3ca68d2209abf 100644 (file)
--- a/drivers/misc/ocxl/file.c
+++ b/drivers/misc/ocxl/file.c
@@ -184,7 +184,7 @@ static irqreturn_t irq_handler(void *private)
  {
         struct eventfd_ctx *ev_ctx = private;
  
-       eventfd_signal(ev_ctx, 1);
+       eventfd_signal(ev_ctx);
         return IRQ_HANDLED;
  }
  
diff --git a/drivers/s390/cio/vfio_ccw_chp.c b/drivers/s390/cio/vfio_ccw_chp.c

index d3f3a611f95b41a11bde8ed2fe7bae18dad5f21b..38c176cf6295724b60db5c27529a5a32b73350af 100644 (file)
--- a/drivers/s390/cio/vfio_ccw_chp.c
+++ b/drivers/s390/cio/vfio_ccw_chp.c
@@ -115,7 +115,7 @@ static ssize_t vfio_ccw_crw_region_read(struct vfio_ccw_private *private,
  
         /* Notify the guest if more CRWs are on our queue */
         if (!list_empty(&private->crw) && private->crw_trigger)
-               eventfd_signal(private->crw_trigger, 1);
+               eventfd_signal(private->crw_trigger);
  
         return ret;
  }
diff --git a/drivers/s390/cio/vfio_ccw_drv.c b/drivers/s390/cio/vfio_ccw_drv.c

index 43601816ea4e4fdb3a45a7ce90096b375e9ca8d6..bfb35cfce1ef1a673426f6f73d66c8fe1db35001 100644 (file)
--- a/drivers/s390/cio/vfio_ccw_drv.c
+++ b/drivers/s390/cio/vfio_ccw_drv.c
@@ -112,7 +112,7 @@ void vfio_ccw_sch_io_todo(struct work_struct *work)
                 private->state = VFIO_CCW_STATE_IDLE;
  
         if (private->io_trigger)
-               eventfd_signal(private->io_trigger, 1);
+               eventfd_signal(private->io_trigger);
  }
  
  void vfio_ccw_crw_todo(struct work_struct *work)
@@ -122,7 +122,7 @@ void vfio_ccw_crw_todo(struct work_struct *work)
         private = container_of(work, struct vfio_ccw_private, crw_work);
  
         if (!list_empty(&private->crw) && private->crw_trigger)
-               eventfd_signal(private->crw_trigger, 1);
+               eventfd_signal(private->crw_trigger);
  }
  
  /*
diff --git a/drivers/s390/cio/vfio_ccw_ops.c b/drivers/s390/cio/vfio_ccw_ops.c

index cba4971618ff6d0be24d1dca4b18695801fb61ab..ea532a8a4a0c2905a579d21b4ff633ac658f84f1 100644 (file)
--- a/drivers/s390/cio/vfio_ccw_ops.c
+++ b/drivers/s390/cio/vfio_ccw_ops.c
@@ -421,7 +421,7 @@ static int vfio_ccw_mdev_set_irqs(struct vfio_ccw_private *private,
         case VFIO_IRQ_SET_DATA_NONE:
         {
                 if (*ctx)
-                       eventfd_signal(*ctx, 1);
+                       eventfd_signal(*ctx);
                 return 0;
         }
         case VFIO_IRQ_SET_DATA_BOOL:
@@ -432,7 +432,7 @@ static int vfio_ccw_mdev_set_irqs(struct vfio_ccw_private *private,
                         return -EFAULT;
  
                 if (trigger && *ctx)
-                       eventfd_signal(*ctx, 1);
+                       eventfd_signal(*ctx);
                 return 0;
         }
         case VFIO_IRQ_SET_DATA_EVENTFD:
@@ -612,7 +612,7 @@ static void vfio_ccw_mdev_request(struct vfio_device *vdev, unsigned int count)
                                                "Relaying device request to user (#%u)\n",
                                                count);
  
-               eventfd_signal(private->req_trigger, 1);
+               eventfd_signal(private->req_trigger);
         } else if (count == 0) {
                 dev_notice(dev,
                            "No device request channel registered, blocked until released by user\n");
diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c

index 4db538a55192556ce81a473944bc6d813484ca87..542b5be73a6a8cecacce3e2167fd0d1576a368ef 100644 (file)
--- a/drivers/s390/crypto/vfio_ap_ops.c
+++ b/drivers/s390/crypto/vfio_ap_ops.c
@@ -1794,7 +1794,7 @@ static void vfio_ap_mdev_request(struct vfio_device *vdev, unsigned int count)
                                                "Relaying device request to user (#%u)\n",
                                                count);
  
-               eventfd_signal(matrix_mdev->req_trigger, 1);
+               eventfd_signal(matrix_mdev->req_trigger);
         } else if (count == 0) {
                 dev_notice(dev,
                            "No device request registered, blocked until released by user\n");
diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c

index efe3e3b857695e6dc80e5c217294f3343153845c..fdd0fc7b8f259e18524f825229c6d5723a363c25 100644 (file)
--- a/drivers/usb/gadget/function/f_fs.c
+++ b/drivers/usb/gadget/function/f_fs.c
@@ -831,7 +831,7 @@ static void ffs_user_copy_worker(struct work_struct *work)
         io_data->kiocb->ki_complete(io_data->kiocb, ret);
  
         if (io_data->ffs->ffs_eventfd && !kiocb_has_eventfd)
-               eventfd_signal(io_data->ffs->ffs_eventfd, 1);
+               eventfd_signal(io_data->ffs->ffs_eventfd);
  
         if (io_data->read)
                 kfree(io_data->to_free);
@@ -2738,7 +2738,7 @@ static void __ffs_event_add(struct ffs_data *ffs,
         ffs->ev.types[ffs->ev.count++] = type;
         wake_up_locked(&ffs->ev.waitq);
         if (ffs->ffs_eventfd)
-               eventfd_signal(ffs->ffs_eventfd, 1);
+               eventfd_signal(ffs->ffs_eventfd);
  }
  
  static void ffs_event_add(struct ffs_data *ffs,
diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c b/drivers/vdpa/vdpa_user/vduse_dev.c

index fafd4610b1857a9edae74acc6fea28de9b36b8b5..1d24da79c39956ae59400ac6ec4406494ba8c655 100644 (file)
--- a/drivers/vdpa/vdpa_user/vduse_dev.c
+++ b/drivers/vdpa/vdpa_user/vduse_dev.c
@@ -493,7 +493,7 @@ static void vduse_vq_kick(struct vduse_virtqueue *vq)
                 goto unlock;
  
         if (vq->kickfd)
-               eventfd_signal(vq->kickfd, 1);
+               eventfd_signal(vq->kickfd);
         else
                 vq->kicked = true;
  unlock:
@@ -911,7 +911,7 @@ static int vduse_kickfd_setup(struct vduse_dev *dev,
                 eventfd_ctx_put(vq->kickfd);
         vq->kickfd = ctx;
         if (vq->ready && vq->kicked && vq->kickfd) {
-               eventfd_signal(vq->kickfd, 1);
+               eventfd_signal(vq->kickfd);
                 vq->kicked = false;
         }
         spin_unlock(&vq->kick_lock);
@@ -960,7 +960,7 @@ static bool vduse_vq_signal_irqfd(struct vduse_virtqueue *vq)
  
         spin_lock_irq(&vq->irq_lock);
         if (vq->ready && vq->cb.trigger) {
-               eventfd_signal(vq->cb.trigger, 1);
+               eventfd_signal(vq->cb.trigger);
                 signal = true;
         }
         spin_unlock_irq(&vq->irq_lock);
diff --git a/drivers/vfio/fsl-mc/vfio_fsl_mc_intr.c b/drivers/vfio/fsl-mc/vfio_fsl_mc_intr.c

index c51229fccbd6a8f24600ac95a9f93ab6da5b4aee..d62fbfff20b827936fae17a35321c63ba9183cb1 100644 (file)
--- a/drivers/vfio/fsl-mc/vfio_fsl_mc_intr.c
+++ b/drivers/vfio/fsl-mc/vfio_fsl_mc_intr.c
@@ -54,7 +54,7 @@ static irqreturn_t vfio_fsl_mc_irq_handler(int irq_num, void *arg)
  {
         struct vfio_fsl_mc_irq *mc_irq = (struct vfio_fsl_mc_irq *)arg;
  
-       eventfd_signal(mc_irq->trigger, 1);
+       eventfd_signal(mc_irq->trigger);
         return IRQ_HANDLED;
  }
  
diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c

index 1929103ee59a3da894ae9d9a8ec040b26dd1a677..1cbc990d42e07cf41904dc726f83c08c72922b77 100644 (file)
--- a/drivers/vfio/pci/vfio_pci_core.c
+++ b/drivers/vfio/pci/vfio_pci_core.c
@@ -443,7 +443,7 @@ static int vfio_pci_core_runtime_resume(struct device *dev)
          */
         down_write(&vdev->memory_lock);
         if (vdev->pm_wake_eventfd_ctx) {
-               eventfd_signal(vdev->pm_wake_eventfd_ctx, 1);
+               eventfd_signal(vdev->pm_wake_eventfd_ctx);
                 __vfio_pci_runtime_pm_exit(vdev);
         }
         up_write(&vdev->memory_lock);
@@ -1883,7 +1883,7 @@ void vfio_pci_core_request(struct vfio_device *core_vdev, unsigned int count)
                         pci_notice_ratelimited(pdev,
                                 "Relaying device request to user (#%u)\n",
                                 count);
-               eventfd_signal(vdev->req_trigger, 1);
+               eventfd_signal(vdev->req_trigger);
         } else if (count == 0) {
                 pci_warn(pdev,
                         "No device request channel registered, blocked until released by user\n");
@@ -2302,7 +2302,7 @@ pci_ers_result_t vfio_pci_core_aer_err_detected(struct pci_dev *pdev,
         mutex_lock(&vdev->igate);
  
         if (vdev->err_trigger)
-               eventfd_signal(vdev->err_trigger, 1);
+               eventfd_signal(vdev->err_trigger);
  
         mutex_unlock(&vdev->igate);
  
diff --git a/drivers/vfio/pci/vfio_pci_intrs.c b/drivers/vfio/pci/vfio_pci_intrs.c

index cbb4bcbfbf83d98ec3e9c3a44a120c9fb74623f2..237beac8380975110231503e0d30f331ba64e2d5 100644 (file)
--- a/drivers/vfio/pci/vfio_pci_intrs.c
+++ b/drivers/vfio/pci/vfio_pci_intrs.c
@@ -94,7 +94,7 @@ static void vfio_send_intx_eventfd(void *opaque, void *unused)
                 ctx = vfio_irq_ctx_get(vdev, 0);
                 if (WARN_ON_ONCE(!ctx))
                         return;
-               eventfd_signal(ctx->trigger, 1);
+               eventfd_signal(ctx->trigger);
         }
  }
  
@@ -342,7 +342,7 @@ static irqreturn_t vfio_msihandler(int irq, void *arg)
  {
         struct eventfd_ctx *trigger = arg;
  
-       eventfd_signal(trigger, 1);
+       eventfd_signal(trigger);
         return IRQ_HANDLED;
  }
  
@@ -689,11 +689,11 @@ static int vfio_pci_set_msi_trigger(struct vfio_pci_core_device *vdev,
                 if (!ctx)
                         continue;
                 if (flags & VFIO_IRQ_SET_DATA_NONE) {
-                       eventfd_signal(ctx->trigger, 1);
+                       eventfd_signal(ctx->trigger);
                 } else if (flags & VFIO_IRQ_SET_DATA_BOOL) {
                         uint8_t *bools = data;
                         if (bools[i - start])
-                               eventfd_signal(ctx->trigger, 1);
+                               eventfd_signal(ctx->trigger);
                 }
         }
         return 0;
@@ -707,7 +707,7 @@ static int vfio_pci_set_ctx_trigger_single(struct eventfd_ctx **ctx,
         if (flags & VFIO_IRQ_SET_DATA_NONE) {
                 if (*ctx) {
                         if (count) {
-                               eventfd_signal(*ctx, 1);
+                               eventfd_signal(*ctx);
                         } else {
                                 eventfd_ctx_put(*ctx);
                                 *ctx = NULL;
@@ -722,7 +722,7 @@ static int vfio_pci_set_ctx_trigger_single(struct eventfd_ctx **ctx,
  
                 trigger = *(uint8_t *)data;
                 if (trigger && *ctx)
-                       eventfd_signal(*ctx, 1);
+                       eventfd_signal(*ctx);
  
                 return 0;
         } else if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
diff --git a/drivers/vfio/platform/vfio_platform_irq.c b/drivers/vfio/platform/vfio_platform_irq.c

index 665197caed89e41ad042c87a897c411047561602..61a1bfb68ac7864b84c712c121e61190f230dcfa 100644 (file)
--- a/drivers/vfio/platform/vfio_platform_irq.c
+++ b/drivers/vfio/platform/vfio_platform_irq.c
@@ -155,7 +155,7 @@ static irqreturn_t vfio_automasked_irq_handler(int irq, void *dev_id)
         spin_unlock_irqrestore(&irq_ctx->lock, flags);
  
         if (ret == IRQ_HANDLED)
-               eventfd_signal(irq_ctx->trigger, 1);
+               eventfd_signal(irq_ctx->trigger);
  
         return ret;
  }
@@ -164,7 +164,7 @@ static irqreturn_t vfio_irq_handler(int irq, void *dev_id)
  {
         struct vfio_platform_irq *irq_ctx = dev_id;
  
-       eventfd_signal(irq_ctx->trigger, 1);
+       eventfd_signal(irq_ctx->trigger);
  
         return IRQ_HANDLED;
  }
diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c

index 30df5c58db73a846dc3c58ad821201694869a348..8d9f958946d65cf1661fa1e4c9ac4ce9da7c4a98 100644 (file)
--- a/drivers/vhost/vdpa.c
+++ b/drivers/vhost/vdpa.c
@@ -178,7 +178,7 @@ static irqreturn_t vhost_vdpa_virtqueue_cb(void *private)
         struct eventfd_ctx *call_ctx = vq->call_ctx.ctx;
  
         if (call_ctx)
-               eventfd_signal(call_ctx, 1);
+               eventfd_signal(call_ctx);
  
         return IRQ_HANDLED;
  }
@@ -189,7 +189,7 @@ static irqreturn_t vhost_vdpa_config_cb(void *private)
         struct eventfd_ctx *config_ctx = v->config_ctx;
  
         if (config_ctx)
-               eventfd_signal(config_ctx, 1);
+               eventfd_signal(config_ctx);
  
         return IRQ_HANDLED;
  }
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c

index e0c181ad17e3166f77b9bd3ecf55d4a90b6e30f0..045f666b4f12a2a6416c93dafc2189af03662668 100644 (file)
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -2248,7 +2248,7 @@ int vhost_log_write(struct vhost_virtqueue *vq, struct vhost_log *log,
                 len -= l;
                 if (!len) {
                         if (vq->log_ctx)
-                               eventfd_signal(vq->log_ctx, 1);
+                               eventfd_signal(vq->log_ctx);
                         return 0;
                 }
         }
@@ -2271,7 +2271,7 @@ static int vhost_update_used_flags(struct vhost_virtqueue *vq)
                 log_used(vq, (used - (void __user *)vq->used),
                          sizeof vq->used->flags);
                 if (vq->log_ctx)
-                       eventfd_signal(vq->log_ctx, 1);
+                       eventfd_signal(vq->log_ctx);
         }
         return 0;
  }
@@ -2289,7 +2289,7 @@ static int vhost_update_avail_event(struct vhost_virtqueue *vq)
                 log_used(vq, (used - (void __user *)vq->used),
                          sizeof *vhost_avail_event(vq));
                 if (vq->log_ctx)
-                       eventfd_signal(vq->log_ctx, 1);
+                       eventfd_signal(vq->log_ctx);
         }
         return 0;
  }
@@ -2715,7 +2715,7 @@ int vhost_add_used_n(struct vhost_virtqueue *vq, struct vring_used_elem *heads,
                 log_used(vq, offsetof(struct vring_used, idx),
                          sizeof vq->used->idx);
                 if (vq->log_ctx)
-                       eventfd_signal(vq->log_ctx, 1);
+                       eventfd_signal(vq->log_ctx);
         }
         return r;
  }
@@ -2763,7 +2763,7 @@ void vhost_signal(struct vhost_dev *dev, struct vhost_virtqueue *vq)
  {
         /* Signal the Guest tell them we used something up. */
         if (vq->call_ctx.ctx && vhost_notify(dev, vq))
-               eventfd_signal(vq->call_ctx.ctx, 1);
+               eventfd_signal(vq->call_ctx.ctx);
  }
  EXPORT_SYMBOL_GPL(vhost_signal);
  
diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h

index f60d5f7bef944e2e965e54c83bb3925d3da56c5b..9e942fcda5c3f73ff39da74524120d455703de3a 100644 (file)
--- a/drivers/vhost/vhost.h
+++ b/drivers/vhost/vhost.h
@@ -249,7 +249,7 @@ void vhost_iotlb_map_free(struct vhost_iotlb *iotlb,
  #define vq_err(vq, fmt, ...) do {                                  \
                 pr_debug(pr_fmt(fmt), ##__VA_ARGS__);       \
                 if ((vq)->error_ctx)                               \
-                               eventfd_signal((vq)->error_ctx, 1);\
+                               eventfd_signal((vq)->error_ctx);\
         } while (0)
  
  enum {
diff --git a/drivers/virt/acrn/ioeventfd.c b/drivers/virt/acrn/ioeventfd.c

index ac4037e9f947ec95c4395de56764b30d3fa14ce0..4e845c6ca0b5731fff8b2422b46cf41e86cbf299 100644 (file)
--- a/drivers/virt/acrn/ioeventfd.c
+++ b/drivers/virt/acrn/ioeventfd.c
@@ -223,7 +223,7 @@ static int acrn_ioeventfd_handler(struct acrn_ioreq_client *client,
         mutex_lock(&client->vm->ioeventfds_lock);
         p = hsm_ioeventfd_match(client->vm, addr, val, size, req->type);
         if (p)
-               eventfd_signal(p->eventfd, 1);
+               eventfd_signal(p->eventfd);
         mutex_unlock(&client->vm->ioeventfds_lock);
  
         return 0;
diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c

index 1ce7f3c7a950931e0655df5f8587aee3adacd6be..7efc0c62e984c6766898d39a9ee09acb379d64c3 100644 (file)
--- a/drivers/xen/privcmd.c
+++ b/drivers/xen/privcmd.c
@@ -1147,7 +1147,7 @@ static irqreturn_t ioeventfd_interrupt(int irq, void *dev_id)
                 if (ioreq->addr == kioeventfd->addr + VIRTIO_MMIO_QUEUE_NOTIFY &&
                     ioreq->size == kioeventfd->addr_len &&
                     (ioreq->data & QUEUE_NOTIFY_VQ_MASK) == kioeventfd->vq) {
-                       eventfd_signal(kioeventfd->eventfd, 1);
+                       eventfd_signal(kioeventfd->eventfd);
                         state = STATE_IORESP_READY;
                         break;
                 }
diff --git a/fs/aio.c b/fs/aio.c

index f8589caef9c10ec829bc6470cab5ce159915114c..918d80d4c5ad6b9d0c8ee8ec9bd4ddc595cb5c33 100644 (file)
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -266,7 +266,7 @@ static struct file *aio_private_file(struct kioctx *ctx, loff_t nr_pages)
                 return ERR_CAST(inode);
  
         inode->i_mapping->a_ops = &aio_ctx_aops;
-       inode->i_mapping->private_data = ctx;
+       inode->i_mapping->i_private_data = ctx;
         inode->i_size = PAGE_SIZE * nr_pages;
  
         file = alloc_file_pseudo(inode, aio_mnt, "[aio]",
@@ -316,10 +316,10 @@ static void put_aio_ring_file(struct kioctx *ctx)
  
                 /* Prevent further access to the kioctx from migratepages */
                 i_mapping = aio_ring_file->f_mapping;
-               spin_lock(&i_mapping->private_lock);
-               i_mapping->private_data = NULL;
+               spin_lock(&i_mapping->i_private_lock);
+               i_mapping->i_private_data = NULL;
                 ctx->aio_ring_file = NULL;
-               spin_unlock(&i_mapping->private_lock);
+               spin_unlock(&i_mapping->i_private_lock);
  
                 fput(aio_ring_file);
         }
@@ -422,9 +422,9 @@ static int aio_migrate_folio(struct address_space *mapping, struct folio *dst,
  
         rc = 0;
  
-       /* mapping->private_lock here protects against the kioctx teardown.  */
-       spin_lock(&mapping->private_lock);
-       ctx = mapping->private_data;
+       /* mapping->i_private_lock here protects against the kioctx teardown.  */
+       spin_lock(&mapping->i_private_lock);
+       ctx = mapping->i_private_data;
         if (!ctx) {
                 rc = -EINVAL;
                 goto out;
@@ -476,7 +476,7 @@ static int aio_migrate_folio(struct address_space *mapping, struct folio *dst,
  out_unlock:
         mutex_unlock(&ctx->ring_lock);
  out:
-       spin_unlock(&mapping->private_lock);
+       spin_unlock(&mapping->i_private_lock);
         return rc;
  }
  #else
@@ -1106,6 +1106,11 @@ static inline void iocb_destroy(struct aio_kiocb *iocb)
         kmem_cache_free(kiocb_cachep, iocb);
  }
  
+struct aio_waiter {
+       struct wait_queue_entry w;
+       size_t                  min_nr;
+};
+
  /* aio_complete
   *     Called when the io request on the given iocb is complete.
   */
@@ -1114,7 +1119,7 @@ static void aio_complete(struct aio_kiocb *iocb)
         struct kioctx   *ctx = iocb->ki_ctx;
         struct aio_ring *ring;
         struct io_event *ev_page, *event;
-       unsigned tail, pos, head;
+       unsigned tail, pos, head, avail;
         unsigned long   flags;
  
         /*
@@ -1156,6 +1161,10 @@ static void aio_complete(struct aio_kiocb *iocb)
         ctx->completed_events++;
         if (ctx->completed_events > 1)
                 refill_reqs_available(ctx, head, tail);
+
+       avail = tail > head
+               ? tail - head
+               : tail + ctx->nr_events - head;
         spin_unlock_irqrestore(&ctx->completion_lock, flags);
  
         pr_debug("added to ring %p at [%u]\n", iocb, tail);
@@ -1166,7 +1175,7 @@ static void aio_complete(struct aio_kiocb *iocb)
          * from IRQ context.
          */
         if (iocb->ki_eventfd)
-               eventfd_signal(iocb->ki_eventfd, 1);
+               eventfd_signal(iocb->ki_eventfd);
  
         /*
          * We have to order our ring_info tail store above and test
@@ -1176,8 +1185,18 @@ static void aio_complete(struct aio_kiocb *iocb)
          */
         smp_mb();
  
-       if (waitqueue_active(&ctx->wait))
-               wake_up(&ctx->wait);
+       if (waitqueue_active(&ctx->wait)) {
+               struct aio_waiter *curr, *next;
+               unsigned long flags;
+
+               spin_lock_irqsave(&ctx->wait.lock, flags);
+               list_for_each_entry_safe(curr, next, &ctx->wait.head, w.entry)
+                       if (avail >= curr->min_nr) {
+                               list_del_init_careful(&curr->w.entry);
+                               wake_up_process(curr->w.private);
+                       }
+               spin_unlock_irqrestore(&ctx->wait.lock, flags);
+       }
  }
  
  static inline void iocb_put(struct aio_kiocb *iocb)
@@ -1290,7 +1309,9 @@ static long read_events(struct kioctx *ctx, long min_nr, long nr,
                         struct io_event __user *event,
                         ktime_t until)
  {
-       long ret = 0;
+       struct hrtimer_sleeper  t;
+       struct aio_waiter       w;
+       long ret = 0, ret2 = 0;
  
         /*
          * Note that aio_read_events() is being called as the conditional - i.e.
@@ -1306,12 +1327,38 @@ static long read_events(struct kioctx *ctx, long min_nr, long nr,
          * the ringbuffer empty. So in practice we should be ok, but it's
          * something to be aware of when touching this code.
          */
-       if (until == 0)
-               aio_read_events(ctx, min_nr, nr, event, &ret);
-       else
-               wait_event_interruptible_hrtimeout(ctx->wait,
-                               aio_read_events(ctx, min_nr, nr, event, &ret),
-                               until);
+       aio_read_events(ctx, min_nr, nr, event, &ret);
+       if (until == 0 || ret < 0 || ret >= min_nr)
+               return ret;
+
+       hrtimer_init_sleeper_on_stack(&t, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+       if (until != KTIME_MAX) {
+               hrtimer_set_expires_range_ns(&t.timer, until, current->timer_slack_ns);
+               hrtimer_sleeper_start_expires(&t, HRTIMER_MODE_REL);
+       }
+
+       init_wait(&w.w);
+
+       while (1) {
+               unsigned long nr_got = ret;
+
+               w.min_nr = min_nr - ret;
+
+               ret2 = prepare_to_wait_event(&ctx->wait, &w.w, TASK_INTERRUPTIBLE);
+               if (!ret2 && !t.task)
+                       ret2 = -ETIME;
+
+               if (aio_read_events(ctx, min_nr, nr, event, &ret) || ret2)
+                       break;
+
+               if (nr_got == ret)
+                       schedule();
+       }
+
+       finish_wait(&ctx->wait, &w.w);
+       hrtimer_cancel(&t.timer);
+       destroy_hrtimer_on_stack(&t.timer);
+
         return ret;
  }
  
diff --git a/fs/attr.c b/fs/attr.c

index bdf5deb06ea97de76a52323eb7c0d46e280b1415..5a13f0c8495fde67df096d4501d32c47e86f056e 100644 (file)
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -157,7 +157,7 @@ static bool chgrp_ok(struct mnt_idmap *idmap,
   * the vfsmount must be passed through @idmap. This function will then
   * take care to map the inode according to @idmap before checking
   * permissions. On non-idmapped mounts or if permission checking is to be
- * performed on the raw inode simply passs @nop_mnt_idmap.
+ * performed on the raw inode simply pass @nop_mnt_idmap.
   *
   * Should be called as the first thing in ->setattr implementations,
   * possibly after taking additional locks.
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c

index 03cef28d9e3789d83e7037a7bcb4a818d00d4cbc..3431a53bf3fdf3454f1e2545ddbb175732c4836d 100644 (file)
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -870,7 +870,7 @@ static int attach_extent_buffer_page(struct extent_buffer *eb,
          * will not race with any other ebs.
          */
         if (page->mapping)
-               lockdep_assert_held(&page->mapping->private_lock);
+               lockdep_assert_held(&page->mapping->i_private_lock);
  
         if (fs_info->nodesize >= PAGE_SIZE) {
                 if (!PagePrivate(page))
@@ -1736,16 +1736,16 @@ static int submit_eb_subpage(struct page *page, struct writeback_control *wbc)
                  * Take private lock to ensure the subpage won't be detached
                  * in the meantime.
                  */
-               spin_lock(&page->mapping->private_lock);
+               spin_lock(&page->mapping->i_private_lock);
                 if (!PagePrivate(page)) {
-                       spin_unlock(&page->mapping->private_lock);
+                       spin_unlock(&page->mapping->i_private_lock);
                         break;
                 }
                 spin_lock_irqsave(&subpage->lock, flags);
                 if (!test_bit(bit_start + fs_info->subpage_info->dirty_offset,
                               subpage->bitmaps)) {
                         spin_unlock_irqrestore(&subpage->lock, flags);
-                       spin_unlock(&page->mapping->private_lock);
+                       spin_unlock(&page->mapping->i_private_lock);
                         bit_start++;
                         continue;
                 }
@@ -1759,7 +1759,7 @@ static int submit_eb_subpage(struct page *page, struct writeback_control *wbc)
                  */
                 eb = find_extent_buffer_nolock(fs_info, start);
                 spin_unlock_irqrestore(&subpage->lock, flags);
-               spin_unlock(&page->mapping->private_lock);
+               spin_unlock(&page->mapping->i_private_lock);
  
                 /*
                  * The eb has already reached 0 refs thus find_extent_buffer()
@@ -1811,9 +1811,9 @@ static int submit_eb_page(struct page *page, struct btrfs_eb_write_context *ctx)
         if (btrfs_sb(page->mapping->host->i_sb)->nodesize < PAGE_SIZE)
                 return submit_eb_subpage(page, wbc);
  
-       spin_lock(&mapping->private_lock);
+       spin_lock(&mapping->i_private_lock);
         if (!PagePrivate(page)) {
-               spin_unlock(&mapping->private_lock);
+               spin_unlock(&mapping->i_private_lock);
                 return 0;
         }
  
@@ -1824,16 +1824,16 @@ static int submit_eb_page(struct page *page, struct btrfs_eb_write_context *ctx)
          * crashing the machine for something we can survive anyway.
          */
         if (WARN_ON(!eb)) {
-               spin_unlock(&mapping->private_lock);
+               spin_unlock(&mapping->i_private_lock);
                 return 0;
         }
  
         if (eb == ctx->eb) {
-               spin_unlock(&mapping->private_lock);
+               spin_unlock(&mapping->i_private_lock);
                 return 0;
         }
         ret = atomic_inc_not_zero(&eb->refs);
-       spin_unlock(&mapping->private_lock);
+       spin_unlock(&mapping->i_private_lock);
         if (!ret)
                 return 0;
  
@@ -3056,7 +3056,7 @@ static bool page_range_has_eb(struct btrfs_fs_info *fs_info, struct page *page)
  {
         struct btrfs_subpage *subpage;
  
-       lockdep_assert_held(&page->mapping->private_lock);
+       lockdep_assert_held(&page->mapping->i_private_lock);
  
         if (PagePrivate(page)) {
                 subpage = (struct btrfs_subpage *)page->private;
@@ -3079,14 +3079,14 @@ static void detach_extent_buffer_page(struct extent_buffer *eb, struct page *pag
  
         /*
          * For mapped eb, we're going to change the page private, which should
-        * be done under the private_lock.
+        * be done under the i_private_lock.
          */
         if (mapped)
-               spin_lock(&page->mapping->private_lock);
+               spin_lock(&page->mapping->i_private_lock);
  
         if (!PagePrivate(page)) {
                 if (mapped)
-                       spin_unlock(&page->mapping->private_lock);
+                       spin_unlock(&page->mapping->i_private_lock);
                 return;
         }
  
@@ -3110,7 +3110,7 @@ static void detach_extent_buffer_page(struct extent_buffer *eb, struct page *pag
                         detach_page_private(page);
                 }
                 if (mapped)
-                       spin_unlock(&page->mapping->private_lock);
+                       spin_unlock(&page->mapping->i_private_lock);
                 return;
         }
  
@@ -3133,7 +3133,7 @@ static void detach_extent_buffer_page(struct extent_buffer *eb, struct page *pag
         if (!page_range_has_eb(fs_info, page))
                 btrfs_detach_subpage(fs_info, page);
  
-       spin_unlock(&page->mapping->private_lock);
+       spin_unlock(&page->mapping->i_private_lock);
  }
  
  /* Release all pages attached to the extent buffer */
@@ -3514,7 +3514,7 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
  
         /*
          * Preallocate page->private for subpage case, so that we won't
-        * allocate memory with private_lock nor page lock hold.
+        * allocate memory with i_private_lock nor page lock hold.
          *
          * The memory will be freed by attach_extent_buffer_page() or freed
          * manually if we exit earlier.
@@ -3535,10 +3535,10 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
                         goto free_eb;
                 }
  
-               spin_lock(&mapping->private_lock);
+               spin_lock(&mapping->i_private_lock);
                 exists = grab_extent_buffer(fs_info, p);
                 if (exists) {
-                       spin_unlock(&mapping->private_lock);
+                       spin_unlock(&mapping->i_private_lock);
                         unlock_page(p);
                         put_page(p);
                         mark_extent_buffer_accessed(exists, p);
@@ -3558,7 +3558,7 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
                  * Thus needs no special handling in error path.
                  */
                 btrfs_page_inc_eb_refs(fs_info, p);
-               spin_unlock(&mapping->private_lock);
+               spin_unlock(&mapping->i_private_lock);
  
                 WARN_ON(btrfs_page_test_dirty(fs_info, p, eb->start, eb->len));
                 eb->pages[i] = p;
@@ -4563,12 +4563,12 @@ static int try_release_subpage_extent_buffer(struct page *page)
          * Finally to check if we have cleared page private, as if we have
          * released all ebs in the page, the page private should be cleared now.
          */
-       spin_lock(&page->mapping->private_lock);
+       spin_lock(&page->mapping->i_private_lock);
         if (!PagePrivate(page))
                 ret = 1;
         else
                 ret = 0;
-       spin_unlock(&page->mapping->private_lock);
+       spin_unlock(&page->mapping->i_private_lock);
         return ret;
  
  }
@@ -4584,9 +4584,9 @@ int try_release_extent_buffer(struct page *page)
          * We need to make sure nobody is changing page->private, as we rely on
          * page->private as the pointer to extent buffer.
          */
-       spin_lock(&page->mapping->private_lock);
+       spin_lock(&page->mapping->i_private_lock);
         if (!PagePrivate(page)) {
-               spin_unlock(&page->mapping->private_lock);
+               spin_unlock(&page->mapping->i_private_lock);
                 return 1;
         }
  
@@ -4601,10 +4601,10 @@ int try_release_extent_buffer(struct page *page)
         spin_lock(&eb->refs_lock);
         if (atomic_read(&eb->refs) != 1 || extent_buffer_under_io(eb)) {
                 spin_unlock(&eb->refs_lock);
-               spin_unlock(&page->mapping->private_lock);
+               spin_unlock(&page->mapping->i_private_lock);
                 return 0;
         }
-       spin_unlock(&page->mapping->private_lock);
+       spin_unlock(&page->mapping->i_private_lock);
  
         /*
          * If tree ref isn't set then we know the ref on this eb is a real ref,
diff --git a/fs/btrfs/subpage.c b/fs/btrfs/subpage.c

index 1b999c6e419307c80cd0cf9666bff21d0b2bcee1..2347cf15278be8e3790e6cb757a5a9ea18455c80 100644 (file)
--- a/fs/btrfs/subpage.c
+++ b/fs/btrfs/subpage.c
@@ -200,7 +200,7 @@ void btrfs_page_inc_eb_refs(const struct btrfs_fs_info *fs_info,
                 return;
  
         ASSERT(PagePrivate(page) && page->mapping);
-       lockdep_assert_held(&page->mapping->private_lock);
+       lockdep_assert_held(&page->mapping->i_private_lock);
  
         subpage = (struct btrfs_subpage *)page->private;
         atomic_inc(&subpage->eb_refs);
@@ -215,7 +215,7 @@ void btrfs_page_dec_eb_refs(const struct btrfs_fs_info *fs_info,
                 return;
  
         ASSERT(PagePrivate(page) && page->mapping);
-       lockdep_assert_held(&page->mapping->private_lock);
+       lockdep_assert_held(&page->mapping->i_private_lock);
  
         subpage = (struct btrfs_subpage *)page->private;
         ASSERT(atomic_read(&subpage->eb_refs));
diff --git a/fs/buffer.c b/fs/buffer.c

index 967f34b70aa8f73aa56beec71a93a08b20546698..5ffc44ab485410f748e9541d57230bfb2fc4f5cc 100644 (file)
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -180,11 +180,11 @@ EXPORT_SYMBOL(end_buffer_write_sync);
   * Various filesystems appear to want __find_get_block to be non-blocking.
   * But it's the page lock which protects the buffers.  To get around this,
   * we get exclusion from try_to_free_buffers with the blockdev mapping's
- * private_lock.
+ * i_private_lock.
   *
- * Hack idea: for the blockdev mapping, private_lock contention
+ * Hack idea: for the blockdev mapping, i_private_lock contention
   * may be quite high.  This code could TryLock the page, and if that
- * succeeds, there is no need to take private_lock.
+ * succeeds, there is no need to take i_private_lock.
   */
  static struct buffer_head *
  __find_get_block_slow(struct block_device *bdev, sector_t block)
@@ -204,7 +204,7 @@ __find_get_block_slow(struct block_device *bdev, sector_t block)
         if (IS_ERR(folio))
                 goto out;
  
-       spin_lock(&bd_mapping->private_lock);
+       spin_lock(&bd_mapping->i_private_lock);
         head = folio_buffers(folio);
         if (!head)
                 goto out_unlock;
@@ -236,7 +236,7 @@ __find_get_block_slow(struct block_device *bdev, sector_t block)
                        1 << bd_inode->i_blkbits);
         }
  out_unlock:
-       spin_unlock(&bd_mapping->private_lock);
+       spin_unlock(&bd_mapping->i_private_lock);
         folio_put(folio);
  out:
         return ret;
@@ -467,25 +467,25 @@ EXPORT_SYMBOL(mark_buffer_async_write);
   *
   * The functions mark_buffer_inode_dirty(), fsync_inode_buffers(),
   * inode_has_buffers() and invalidate_inode_buffers() are provided for the
- * management of a list of dependent buffers at ->i_mapping->private_list.
+ * management of a list of dependent buffers at ->i_mapping->i_private_list.
   *
   * Locking is a little subtle: try_to_free_buffers() will remove buffers
   * from their controlling inode's queue when they are being freed.  But
   * try_to_free_buffers() will be operating against the *blockdev* mapping
   * at the time, not against the S_ISREG file which depends on those buffers.
- * So the locking for private_list is via the private_lock in the address_space
+ * So the locking for i_private_list is via the i_private_lock in the address_space
   * which backs the buffers.  Which is different from the address_space 
   * against which the buffers are listed.  So for a particular address_space,
- * mapping->private_lock does *not* protect mapping->private_list!  In fact,
- * mapping->private_list will always be protected by the backing blockdev's
- * ->private_lock.
+ * mapping->i_private_lock does *not* protect mapping->i_private_list!  In fact,
+ * mapping->i_private_list will always be protected by the backing blockdev's
+ * ->i_private_lock.
   *
   * Which introduces a requirement: all buffers on an address_space's
- * ->private_list must be from the same address_space: the blockdev's.
+ * ->i_private_list must be from the same address_space: the blockdev's.
   *
- * address_spaces which do not place buffers at ->private_list via these
- * utility functions are free to use private_lock and private_list for
- * whatever they want.  The only requirement is that list_empty(private_list)
+ * address_spaces which do not place buffers at ->i_private_list via these
+ * utility functions are free to use i_private_lock and i_private_list for
+ * whatever they want.  The only requirement is that list_empty(i_private_list)
   * be true at clear_inode() time.
   *
   * FIXME: clear_inode should not call invalidate_inode_buffers().  The
@@ -508,7 +508,7 @@ EXPORT_SYMBOL(mark_buffer_async_write);
   */
  
  /*
- * The buffer's backing address_space's private_lock must be held
+ * The buffer's backing address_space's i_private_lock must be held
   */
  static void __remove_assoc_queue(struct buffer_head *bh)
  {
@@ -519,7 +519,7 @@ static void __remove_assoc_queue(struct buffer_head *bh)
  
  int inode_has_buffers(struct inode *inode)
  {
-       return !list_empty(&inode->i_data.private_list);
+       return !list_empty(&inode->i_data.i_private_list);
  }
  
  /*
@@ -561,7 +561,7 @@ repeat:
   * sync_mapping_buffers - write out & wait upon a mapping's "associated" buffers
   * @mapping: the mapping which wants those buffers written
   *
- * Starts I/O against the buffers at mapping->private_list, and waits upon
+ * Starts I/O against the buffers at mapping->i_private_list, and waits upon
   * that I/O.
   *
   * Basically, this is a convenience function for fsync().
@@ -570,13 +570,13 @@ repeat:
   */
  int sync_mapping_buffers(struct address_space *mapping)
  {
-       struct address_space *buffer_mapping = mapping->private_data;
+       struct address_space *buffer_mapping = mapping->i_private_data;
  
-       if (buffer_mapping == NULL || list_empty(&mapping->private_list))
+       if (buffer_mapping == NULL || list_empty(&mapping->i_private_list))
                 return 0;
  
-       return fsync_buffers_list(&buffer_mapping->private_lock,
-                                       &mapping->private_list);
+       return fsync_buffers_list(&buffer_mapping->i_private_lock,
+                                       &mapping->i_private_list);
  }
  EXPORT_SYMBOL(sync_mapping_buffers);
  
@@ -673,17 +673,17 @@ void mark_buffer_dirty_inode(struct buffer_head *bh, struct inode *inode)
         struct address_space *buffer_mapping = bh->b_folio->mapping;
  
         mark_buffer_dirty(bh);
-       if (!mapping->private_data) {
-               mapping->private_data = buffer_mapping;
+       if (!mapping->i_private_data) {
+               mapping->i_private_data = buffer_mapping;
         } else {
-               BUG_ON(mapping->private_data != buffer_mapping);
+               BUG_ON(mapping->i_private_data != buffer_mapping);
         }
         if (!bh->b_assoc_map) {
-               spin_lock(&buffer_mapping->private_lock);
+               spin_lock(&buffer_mapping->i_private_lock);
                 list_move_tail(&bh->b_assoc_buffers,
-                               &mapping->private_list);
+                               &mapping->i_private_list);
                 bh->b_assoc_map = mapping;
-               spin_unlock(&buffer_mapping->private_lock);
+               spin_unlock(&buffer_mapping->i_private_lock);
         }
  }
  EXPORT_SYMBOL(mark_buffer_dirty_inode);
@@ -706,7 +706,7 @@ EXPORT_SYMBOL(mark_buffer_dirty_inode);
   * bit, see a bunch of clean buffers and we'd end up with dirty buffers/clean
   * page on the dirty page list.
   *
- * We use private_lock to lock against try_to_free_buffers while using the
+ * We use i_private_lock to lock against try_to_free_buffers while using the
   * page's buffer list.  Also use this to protect against clean buffers being
   * added to the page after it was set dirty.
   *
@@ -718,7 +718,7 @@ bool block_dirty_folio(struct address_space *mapping, struct folio *folio)
         struct buffer_head *head;
         bool newly_dirty;
  
-       spin_lock(&mapping->private_lock);
+       spin_lock(&mapping->i_private_lock);
         head = folio_buffers(folio);
         if (head) {
                 struct buffer_head *bh = head;
@@ -734,7 +734,7 @@ bool block_dirty_folio(struct address_space *mapping, struct folio *folio)
          */
         folio_memcg_lock(folio);
         newly_dirty = !folio_test_set_dirty(folio);
-       spin_unlock(&mapping->private_lock);
+       spin_unlock(&mapping->i_private_lock);
  
         if (newly_dirty)
                 __folio_mark_dirty(folio, mapping, 1);
@@ -827,7 +827,7 @@ static int fsync_buffers_list(spinlock_t *lock, struct list_head *list)
                 smp_mb();
                 if (buffer_dirty(bh)) {
                         list_add(&bh->b_assoc_buffers,
-                                &mapping->private_list);
+                                &mapping->i_private_list);
                         bh->b_assoc_map = mapping;
                 }
                 spin_unlock(lock);
@@ -851,7 +851,7 @@ static int fsync_buffers_list(spinlock_t *lock, struct list_head *list)
   * probably unmounting the fs, but that doesn't mean we have already
   * done a sync().  Just drop the buffers from the inode list.
   *
- * NOTE: we take the inode's blockdev's mapping's private_lock.  Which
+ * NOTE: we take the inode's blockdev's mapping's i_private_lock.  Which
   * assumes that all the buffers are against the blockdev.  Not true
   * for reiserfs.
   */
@@ -859,13 +859,13 @@ void invalidate_inode_buffers(struct inode *inode)
  {
         if (inode_has_buffers(inode)) {
                 struct address_space *mapping = &inode->i_data;
-               struct list_head *list = &mapping->private_list;
-               struct address_space *buffer_mapping = mapping->private_data;
+               struct list_head *list = &mapping->i_private_list;
+               struct address_space *buffer_mapping = mapping->i_private_data;
  
-               spin_lock(&buffer_mapping->private_lock);
+               spin_lock(&buffer_mapping->i_private_lock);
                 while (!list_empty(list))
                         __remove_assoc_queue(BH_ENTRY(list->next));
-               spin_unlock(&buffer_mapping->private_lock);
+               spin_unlock(&buffer_mapping->i_private_lock);
         }
  }
  EXPORT_SYMBOL(invalidate_inode_buffers);
@@ -882,10 +882,10 @@ int remove_inode_buffers(struct inode *inode)
  
         if (inode_has_buffers(inode)) {
                 struct address_space *mapping = &inode->i_data;
-               struct list_head *list = &mapping->private_list;
-               struct address_space *buffer_mapping = mapping->private_data;
+               struct list_head *list = &mapping->i_private_list;
+               struct address_space *buffer_mapping = mapping->i_private_data;
  
-               spin_lock(&buffer_mapping->private_lock);
+               spin_lock(&buffer_mapping->i_private_lock);
                 while (!list_empty(list)) {
                         struct buffer_head *bh = BH_ENTRY(list->next);
                         if (buffer_dirty(bh)) {
@@ -894,7 +894,7 @@ int remove_inode_buffers(struct inode *inode)
                         }
                         __remove_assoc_queue(bh);
                 }
-               spin_unlock(&buffer_mapping->private_lock);
+               spin_unlock(&buffer_mapping->i_private_lock);
         }
         return ret;
  }
@@ -1064,11 +1064,11 @@ grow_dev_page(struct block_device *bdev, sector_t block,
          * lock to be atomic wrt __find_get_block(), which does not
          * run under the folio lock.
          */
-       spin_lock(&inode->i_mapping->private_lock);
+       spin_lock(&inode->i_mapping->i_private_lock);
         link_dev_buffers(folio, bh);
         end_block = folio_init_buffers(folio, bdev,
                         (sector_t)index << sizebits, size);
-       spin_unlock(&inode->i_mapping->private_lock);
+       spin_unlock(&inode->i_mapping->i_private_lock);
  done:
         ret = (block < end_block) ? 1 : -ENXIO;
  failed:
@@ -1168,7 +1168,7 @@ __getblk_slow(struct block_device *bdev, sector_t block,
   * and then attach the address_space's inode to its superblock's dirty
   * inode list.
   *
- * mark_buffer_dirty() is atomic.  It takes bh->b_folio->mapping->private_lock,
+ * mark_buffer_dirty() is atomic.  It takes bh->b_folio->mapping->i_private_lock,
   * i_pages lock and mapping->host->i_lock.
   */
  void mark_buffer_dirty(struct buffer_head *bh)
@@ -1246,10 +1246,10 @@ void __bforget(struct buffer_head *bh)
         if (bh->b_assoc_map) {
                 struct address_space *buffer_mapping = bh->b_folio->mapping;
  
-               spin_lock(&buffer_mapping->private_lock);
+               spin_lock(&buffer_mapping->i_private_lock);
                 list_del_init(&bh->b_assoc_buffers);
                 bh->b_assoc_map = NULL;
-               spin_unlock(&buffer_mapping->private_lock);
+               spin_unlock(&buffer_mapping->i_private_lock);
         }
         __brelse(bh);
  }
@@ -1638,7 +1638,7 @@ EXPORT_SYMBOL(block_invalidate_folio);
  
  /*
   * We attach and possibly dirty the buffers atomically wrt
- * block_dirty_folio() via private_lock.  try_to_free_buffers
+ * block_dirty_folio() via i_private_lock.  try_to_free_buffers
   * is already excluded via the folio lock.
   */
  struct buffer_head *create_empty_buffers(struct folio *folio,
@@ -1656,7 +1656,7 @@ struct buffer_head *create_empty_buffers(struct folio *folio,
         } while (bh);
         tail->b_this_page = head;
  
-       spin_lock(&folio->mapping->private_lock);
+       spin_lock(&folio->mapping->i_private_lock);
         if (folio_test_uptodate(folio) || folio_test_dirty(folio)) {
                 bh = head;
                 do {
@@ -1668,7 +1668,7 @@ struct buffer_head *create_empty_buffers(struct folio *folio,
                 } while (bh != head);
         }
         folio_attach_private(folio, head);
-       spin_unlock(&folio->mapping->private_lock);
+       spin_unlock(&folio->mapping->i_private_lock);
  
         return head;
  }
@@ -1715,7 +1715,7 @@ void clean_bdev_aliases(struct block_device *bdev, sector_t block, sector_t len)
                         if (!folio_buffers(folio))
                                 continue;
                         /*
-                        * We use folio lock instead of bd_mapping->private_lock
+                        * We use folio lock instead of bd_mapping->i_private_lock
                          * to pin buffers here since we can afford to sleep and
                          * it scales better than a global spinlock lock.
                          */
@@ -2883,7 +2883,7 @@ EXPORT_SYMBOL(sync_dirty_buffer);
   * are unused, and releases them if so.
   *
   * Exclusion against try_to_free_buffers may be obtained by either
- * locking the folio or by holding its mapping's private_lock.
+ * locking the folio or by holding its mapping's i_private_lock.
   *
   * If the folio is dirty but all the buffers are clean then we need to
   * be sure to mark the folio clean as well.  This is because the folio
@@ -2894,7 +2894,7 @@ EXPORT_SYMBOL(sync_dirty_buffer);
   * The same applies to regular filesystem folios: if all the buffers are
   * clean then we set the folio clean and proceed.  To do that, we require
   * total exclusion from block_dirty_folio().  That is obtained with
- * private_lock.
+ * i_private_lock.
   *
   * try_to_free_buffers() is non-blocking.
   */
@@ -2946,7 +2946,7 @@ bool try_to_free_buffers(struct folio *folio)
                 goto out;
         }
  
-       spin_lock(&mapping->private_lock);
+       spin_lock(&mapping->i_private_lock);
         ret = drop_buffers(folio, &buffers_to_free);
  
         /*
@@ -2959,13 +2959,13 @@ bool try_to_free_buffers(struct folio *folio)
          * the folio's buffers clean.  We discover that here and clean
          * the folio also.
          *
-        * private_lock must be held over this entire operation in order
+        * i_private_lock must be held over this entire operation in order
          * to synchronise against block_dirty_folio and prevent the
          * dirty bit from being lost.
          */
         if (ret)
                 folio_cancel_dirty(folio);
-       spin_unlock(&mapping->private_lock);
+       spin_unlock(&mapping->i_private_lock);
  out:
         if (buffers_to_free) {
                 struct buffer_head *bh = buffers_to_free;
diff --git a/fs/dax.c b/fs/dax.c

index 3380b43cb6bbbd1289901a3e29edc903abe6156e..423fc1607dfae5bfda9acb7042acb408e187823c 100644 (file)
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -1128,7 +1128,7 @@ static int dax_iomap_copy_around(loff_t pos, uint64_t length, size_t align_size,
         /* zero the edges if srcmap is a HOLE or IOMAP_UNWRITTEN */
         bool zero_edge = srcmap->flags & IOMAP_F_SHARED ||
                          srcmap->type == IOMAP_UNWRITTEN;
-       void *saddr = 0;
+       void *saddr = NULL;
         int ret = 0;
  
         if (!zero_edge) {
diff --git a/fs/direct-io.c b/fs/direct-io.c

index 20533266ade6e5b097b023182d8b59f5d6e82e19..60456263a338e018e6b4cb0f79ce26e0806ec4c9 100644 (file)
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -1114,7 +1114,7 @@ ssize_t __blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
         loff_t offset = iocb->ki_pos;
         const loff_t end = offset + count;
         struct dio *dio;
-       struct dio_submit sdio = { 0, };
+       struct dio_submit sdio = { NULL, };
         struct buffer_head map_bh = { 0, };
         struct blk_plug plug;
         unsigned long align = offset | iov_iter_alignment(iter);
diff --git a/fs/eventfd.c b/fs/eventfd.c

index 33a918f9566c310c663776e6c8983695e7476d18..ad8186d47ba76062f1540835c9a5a0a64f560cd5 100644 (file)
--- a/fs/eventfd.c
+++ b/fs/eventfd.c
@@ -43,7 +43,17 @@ struct eventfd_ctx {
         int id;
  };
  
-__u64 eventfd_signal_mask(struct eventfd_ctx *ctx, __u64 n, __poll_t mask)
+/**
+ * eventfd_signal_mask - Increment the event counter
+ * @ctx: [in] Pointer to the eventfd context.
+ * @mask: [in] poll mask
+ *
+ * This function is supposed to be called by the kernel in paths that do not
+ * allow sleeping. In this function we allow the counter to reach the ULLONG_MAX
+ * value, and we signal this as overflow condition by returning a EPOLLERR
+ * to poll(2).
+ */
+void eventfd_signal_mask(struct eventfd_ctx *ctx, __poll_t mask)
  {
         unsigned long flags;
  
@@ -56,45 +66,23 @@ __u64 eventfd_signal_mask(struct eventfd_ctx *ctx, __u64 n, __poll_t mask)
          * safe context.
          */
         if (WARN_ON_ONCE(current->in_eventfd))
-               return 0;
+               return;
  
         spin_lock_irqsave(&ctx->wqh.lock, flags);
         current->in_eventfd = 1;
-       if (ULLONG_MAX - ctx->count < n)
-               n = ULLONG_MAX - ctx->count;
-       ctx->count += n;
+       if (ctx->count < ULLONG_MAX)
+               ctx->count++;
         if (waitqueue_active(&ctx->wqh))
                 wake_up_locked_poll(&ctx->wqh, EPOLLIN | mask);
         current->in_eventfd = 0;
         spin_unlock_irqrestore(&ctx->wqh.lock, flags);
-
-       return n;
-}
-
-/**
- * eventfd_signal - Adds @n to the eventfd counter.
- * @ctx: [in] Pointer to the eventfd context.
- * @n: [in] Value of the counter to be added to the eventfd internal counter.
- *          The value cannot be negative.
- *
- * This function is supposed to be called by the kernel in paths that do not
- * allow sleeping. In this function we allow the counter to reach the ULLONG_MAX
- * value, and we signal this as overflow condition by returning a EPOLLERR
- * to poll(2).
- *
- * Returns the amount by which the counter was incremented.  This will be less
- * than @n if the counter has overflowed.
- */
-__u64 eventfd_signal(struct eventfd_ctx *ctx, __u64 n)
-{
-       return eventfd_signal_mask(ctx, n, 0);
  }
-EXPORT_SYMBOL_GPL(eventfd_signal);
+EXPORT_SYMBOL_GPL(eventfd_signal_mask);
  
  static void eventfd_free_ctx(struct eventfd_ctx *ctx)
  {
         if (ctx->id >= 0)
-               ida_simple_remove(&eventfd_ida, ctx->id);
+               ida_free(&eventfd_ida, ctx->id);
         kfree(ctx);
  }
  
@@ -407,7 +395,7 @@ static int do_eventfd(unsigned int count, int flags)
         init_waitqueue_head(&ctx->wqh);
         ctx->count = count;
         ctx->flags = flags;
-       ctx->id = ida_simple_get(&eventfd_ida, 0, 0, GFP_KERNEL);
+       ctx->id = ida_alloc(&eventfd_ida, GFP_KERNEL);
  
         flags &= EFD_SHARED_FCNTL_FLAGS;
         flags |= O_RDWR;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c

index 61277f7f87225a0a69701922ad84f75290a5a113..0558c8c986d4a1bdb88a9aaa54771c83a24163f2 100644 (file)
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1261,7 +1261,7 @@ static int write_end_fn(handle_t *handle, struct inode *inode,
   * We need to pick up the new inode size which generic_commit_write gave us
   * `file' can be NULL - eg, when called from page_symlink().
   *
- * ext4 never places buffers on inode->i_mapping->private_list.  metadata
+ * ext4 never places buffers on inode->i_mapping->i_private_list.  metadata
   * buffers are managed internally.
   */
  static int ext4_write_end(struct file *file,
@@ -3213,7 +3213,7 @@ static bool ext4_inode_datasync_dirty(struct inode *inode)
         }
  
         /* Any metadata buffers to write? */
-       if (!list_empty(&inode->i_mapping->private_list))
+       if (!list_empty(&inode->i_mapping->i_private_list))
                 return true;
         return inode->i_state & I_DIRTY_DATASYNC;
  }
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c

index d6bf1f8c25dcd19ecd62d3e819aa608126ae6ac3..d8b619ed2f1e6fd08367d046611b47d703207845 100644 (file)
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -1213,7 +1213,7 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
                 mapping->host = s->s_bdev->bd_inode;
                 mapping->flags = 0;
                 mapping_set_gfp_mask(mapping, GFP_NOFS);
-               mapping->private_data = NULL;
+               mapping->i_private_data = NULL;
                 mapping->writeback_index = 0;
         }
  
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c

index b108c5d26839e1dc234c294df77fb557b4861a06..00ce89bdf32c122370d10a3b4aeb485ac7790b5f 100644 (file)
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -117,7 +117,7 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
         mapping->host = sb->s_bdev->bd_inode;
         mapping->flags = 0;
         mapping_set_gfp_mask(mapping, GFP_NOFS);
-       mapping->private_data = NULL;
+       mapping->i_private_data = NULL;
         mapping->writeback_index = 0;
  
         spin_lock_init(&sdp->sd_log_lock);
diff --git a/fs/hfsplus/wrapper.c b/fs/hfsplus/wrapper.c

index 0b791adf02e53d6498e5c36fc94b2402b77ad2a0..b0cb704009963c73d1021acdd70f65b01cab9911 100644 (file)
--- a/fs/hfsplus/wrapper.c
+++ b/fs/hfsplus/wrapper.c
@@ -30,8 +30,7 @@ struct hfsplus_wd {
   * @sector: block to read or write, for blocks of HFSPLUS_SECTOR_SIZE bytes
   * @buf: buffer for I/O
   * @data: output pointer for location of requested data
- * @op: direction of I/O
- * @op_flags: request op flags
+ * @opf: request op flags
   *
   * The unit of I/O is hfsplus_min_io_size(sb), which may be bigger than
   * HFSPLUS_SECTOR_SIZE, and @buf must be sized accordingly. On reads
@@ -43,6 +42,8 @@ struct hfsplus_wd {
   * that starts at the rounded-down address. As long as the data was
   * read using hfsplus_submit_bio() and the same buffer is used things
   * will work correctly.
+ *
+ * Returns: %0 on success else -errno code
   */
  int hfsplus_submit_bio(struct super_block *sb, sector_t sector,
                        void *buf, void **data, blk_opf_t opf)
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c

index f757d4f7ad98a48542cb197fe8292ff91c868cd3..05609ab15cbcf43e1476032c38562b652e884a2d 100644 (file)
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -686,7 +686,7 @@ static void hugetlbfs_evict_inode(struct inode *inode)
          * at inode creation time.  If this is a device special inode,
          * i_mapping may not point to the original address space.
          */
-       resv_map = (struct resv_map *)(&inode->i_data)->private_data;
+       resv_map = (struct resv_map *)(&inode->i_data)->i_private_data;
         /* Only regular and link inodes have associated reserve maps */
         if (resv_map)
                 resv_map_release(&resv_map->refs);
@@ -1000,7 +1000,7 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb,
                                 &hugetlbfs_i_mmap_rwsem_key);
                 inode->i_mapping->a_ops = &hugetlbfs_aops;
                 simple_inode_init_ts(inode);
-               inode->i_mapping->private_data = resv_map;
+               inode->i_mapping->i_private_data = resv_map;
                 info->seals = F_SEAL_SEAL;
                 switch (mode & S_IFMT) {
                 default:
diff --git a/fs/inode.c b/fs/inode.c

index edcd8a61975f34c7a4cf467589848870430b3b8a..287c6269553d7938683f4ca30985813294d4ad24 100644 (file)
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -209,7 +209,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
         atomic_set(&mapping->nr_thps, 0);
  #endif
         mapping_set_gfp_mask(mapping, GFP_HIGHUSER_MOVABLE);
-       mapping->private_data = NULL;
+       mapping->i_private_data = NULL;
         mapping->writeback_index = 0;
         init_rwsem(&mapping->invalidate_lock);
         lockdep_set_class_and_name(&mapping->invalidate_lock,
@@ -396,8 +396,8 @@ static void __address_space_init_once(struct address_space *mapping)
  {
         xa_init_flags(&mapping->i_pages, XA_FLAGS_LOCK_IRQ | XA_FLAGS_ACCOUNT);
         init_rwsem(&mapping->i_mmap_rwsem);
-       INIT_LIST_HEAD(&mapping->private_list);
-       spin_lock_init(&mapping->private_lock);
+       INIT_LIST_HEAD(&mapping->i_private_list);
+       spin_lock_init(&mapping->i_private_lock);
         mapping->i_mmap = RB_ROOT_CACHED;
  }
  
@@ -618,7 +618,7 @@ void clear_inode(struct inode *inode)
          * nor even WARN_ON(!mapping_empty).
          */
         xa_unlock_irq(&inode->i_data.i_pages);
-       BUG_ON(!list_empty(&inode->i_data.private_list));
+       BUG_ON(!list_empty(&inode->i_data.i_private_list));
         BUG_ON(!(inode->i_state & I_FREEING));
         BUG_ON(inode->i_state & I_CLEAR);
         BUG_ON(!list_empty(&inode->i_wb_list));
@@ -1834,37 +1834,37 @@ EXPORT_SYMBOL(bmap);
   * earlier than or equal to either the ctime or mtime,
   * or if at least a day has passed since the last atime update.
   */
-static int relatime_need_update(struct vfsmount *mnt, struct inode *inode,
+static bool relatime_need_update(struct vfsmount *mnt, struct inode *inode,
                              struct timespec64 now)
  {
         struct timespec64 atime, mtime, ctime;
  
         if (!(mnt->mnt_flags & MNT_RELATIME))
-               return 1;
+               return true;
         /*
          * Is mtime younger than or equal to atime? If yes, update atime:
          */
         atime = inode_get_atime(inode);
         mtime = inode_get_mtime(inode);
         if (timespec64_compare(&mtime, &atime) >= 0)
-               return 1;
+               return true;
         /*
          * Is ctime younger than or equal to atime? If yes, update atime:
          */
         ctime = inode_get_ctime(inode);
         if (timespec64_compare(&ctime, &atime) >= 0)
-               return 1;
+               return true;
  
         /*
          * Is the previous atime value older than a day? If yes,
          * update atime:
          */
         if ((long)(now.tv_sec - atime.tv_sec) >= 24*60*60)
-               return 1;
+               return true;
         /*
          * Good, we can skip the atime update:
          */
-       return 0;
+       return false;
  }
  
  /**
@@ -2402,7 +2402,7 @@ EXPORT_SYMBOL(inode_init_owner);
   * the vfsmount must be passed through @idmap. This function will then take
   * care to map the inode according to @idmap before checking permissions.
   * On non-idmapped mounts or if permission checking is to be performed on the
- * raw inode simply passs @nop_mnt_idmap.
+ * raw inode simply pass @nop_mnt_idmap.
   */
  bool inode_owner_or_capable(struct mnt_idmap *idmap,
                             const struct inode *inode)
diff --git a/fs/mnt_idmapping.c b/fs/mnt_idmapping.c

index 57d1dedf3f8fa72fe7cb32316ee72961559a82c6..64c5205e2b5e7dac35c3ee8470d0f65d3254d4fb 100644 (file)
--- a/fs/mnt_idmapping.c
+++ b/fs/mnt_idmapping.c
@@ -9,8 +9,16 @@
  
  #include "internal.h"
  
+/*
+ * Outside of this file vfs{g,u}id_t are always created from k{g,u}id_t,
+ * never from raw values. These are just internal helpers.
+ */
+#define VFSUIDT_INIT_RAW(val) (vfsuid_t){ val }
+#define VFSGIDT_INIT_RAW(val) (vfsgid_t){ val }
+
  struct mnt_idmap {
-       struct user_namespace *owner;
+       struct uid_gid_map uid_map;
+       struct uid_gid_map gid_map;
         refcount_t count;
  };
  
@@ -20,24 +28,10 @@ struct mnt_idmap {
   * mapped to {g,u}id 1, [...], {g,u}id 1000 to {g,u}id 1000, [...].
   */
  struct mnt_idmap nop_mnt_idmap = {
-       .owner  = &init_user_ns,
         .count  = REFCOUNT_INIT(1),
  };
  EXPORT_SYMBOL_GPL(nop_mnt_idmap);
  
-/**
- * check_fsmapping - check whether an mount idmapping is allowed
- * @idmap: idmap of the relevent mount
- * @sb:    super block of the filesystem
- *
- * Return: true if @idmap is allowed, false if not.
- */
-bool check_fsmapping(const struct mnt_idmap *idmap,
-                    const struct super_block *sb)
-{
-       return idmap->owner != sb->s_user_ns;
-}
-
  /**
   * initial_idmapping - check whether this is the initial mapping
   * @ns: idmapping to check
@@ -52,26 +46,6 @@ static inline bool initial_idmapping(const struct user_namespace *ns)
         return ns == &init_user_ns;
  }
  
-/**
- * no_idmapping - check whether we can skip remapping a kuid/gid
- * @mnt_userns: the mount's idmapping
- * @fs_userns: the filesystem's idmapping
- *
- * This function can be used to check whether a remapping between two
- * idmappings is required.
- * An idmapped mount is a mount that has an idmapping attached to it that
- * is different from the filsystem's idmapping and the initial idmapping.
- * If the initial mapping is used or the idmapping of the mount and the
- * filesystem are identical no remapping is required.
- *
- * Return: true if remapping can be skipped, false if not.
- */
-static inline bool no_idmapping(const struct user_namespace *mnt_userns,
-                               const struct user_namespace *fs_userns)
-{
-       return initial_idmapping(mnt_userns) || mnt_userns == fs_userns;
-}
-
  /**
   * make_vfsuid - map a filesystem kuid according to an idmapping
   * @idmap: the mount's idmapping
@@ -81,8 +55,8 @@ static inline bool no_idmapping(const struct user_namespace *mnt_userns,
   * Take a @kuid and remap it from @fs_userns into @idmap. Use this
   * function when preparing a @kuid to be reported to userspace.
   *
- * If no_idmapping() determines that this is not an idmapped mount we can
- * simply return @kuid unchanged.
+ * If initial_idmapping() determines that this is not an idmapped mount
+ * we can simply return @kuid unchanged.
   * If initial_idmapping() tells us that the filesystem is not mounted with an
   * idmapping we know the value of @kuid won't change when calling
   * from_kuid() so we can simply retrieve the value via __kuid_val()
@@ -94,13 +68,12 @@ static inline bool no_idmapping(const struct user_namespace *mnt_userns,
   */
  
  vfsuid_t make_vfsuid(struct mnt_idmap *idmap,
-                                  struct user_namespace *fs_userns,
-                                  kuid_t kuid)
+                    struct user_namespace *fs_userns,
+                    kuid_t kuid)
  {
         uid_t uid;
-       struct user_namespace *mnt_userns = idmap->owner;
  
-       if (no_idmapping(mnt_userns, fs_userns))
+       if (idmap == &nop_mnt_idmap)
                 return VFSUIDT_INIT(kuid);
         if (initial_idmapping(fs_userns))
                 uid = __kuid_val(kuid);
@@ -108,7 +81,7 @@ vfsuid_t make_vfsuid(struct mnt_idmap *idmap,
                 uid = from_kuid(fs_userns, kuid);
         if (uid == (uid_t)-1)
                 return INVALID_VFSUID;
-       return VFSUIDT_INIT(make_kuid(mnt_userns, uid));
+       return VFSUIDT_INIT_RAW(map_id_down(&idmap->uid_map, uid));
  }
  EXPORT_SYMBOL_GPL(make_vfsuid);
  
@@ -121,8 +94,8 @@ EXPORT_SYMBOL_GPL(make_vfsuid);
   * Take a @kgid and remap it from @fs_userns into @idmap. Use this
   * function when preparing a @kgid to be reported to userspace.
   *
- * If no_idmapping() determines that this is not an idmapped mount we can
- * simply return @kgid unchanged.
+ * If initial_idmapping() determines that this is not an idmapped mount
+ * we can simply return @kgid unchanged.
   * If initial_idmapping() tells us that the filesystem is not mounted with an
   * idmapping we know the value of @kgid won't change when calling
   * from_kgid() so we can simply retrieve the value via __kgid_val()
@@ -136,9 +109,8 @@ vfsgid_t make_vfsgid(struct mnt_idmap *idmap,
                      struct user_namespace *fs_userns, kgid_t kgid)
  {
         gid_t gid;
-       struct user_namespace *mnt_userns = idmap->owner;
  
-       if (no_idmapping(mnt_userns, fs_userns))
+       if (idmap == &nop_mnt_idmap)
                 return VFSGIDT_INIT(kgid);
         if (initial_idmapping(fs_userns))
                 gid = __kgid_val(kgid);
@@ -146,7 +118,7 @@ vfsgid_t make_vfsgid(struct mnt_idmap *idmap,
                 gid = from_kgid(fs_userns, kgid);
         if (gid == (gid_t)-1)
                 return INVALID_VFSGID;
-       return VFSGIDT_INIT(make_kgid(mnt_userns, gid));
+       return VFSGIDT_INIT_RAW(map_id_down(&idmap->gid_map, gid));
  }
  EXPORT_SYMBOL_GPL(make_vfsgid);
  
@@ -165,11 +137,10 @@ kuid_t from_vfsuid(struct mnt_idmap *idmap,
                    struct user_namespace *fs_userns, vfsuid_t vfsuid)
  {
         uid_t uid;
-       struct user_namespace *mnt_userns = idmap->owner;
  
-       if (no_idmapping(mnt_userns, fs_userns))
+       if (idmap == &nop_mnt_idmap)
                 return AS_KUIDT(vfsuid);
-       uid = from_kuid(mnt_userns, AS_KUIDT(vfsuid));
+       uid = map_id_up(&idmap->uid_map, __vfsuid_val(vfsuid));
         if (uid == (uid_t)-1)
                 return INVALID_UID;
         if (initial_idmapping(fs_userns))
@@ -193,11 +164,10 @@ kgid_t from_vfsgid(struct mnt_idmap *idmap,
                    struct user_namespace *fs_userns, vfsgid_t vfsgid)
  {
         gid_t gid;
-       struct user_namespace *mnt_userns = idmap->owner;
  
-       if (no_idmapping(mnt_userns, fs_userns))
+       if (idmap == &nop_mnt_idmap)
                 return AS_KGIDT(vfsgid);
-       gid = from_kgid(mnt_userns, AS_KGIDT(vfsgid));
+       gid = map_id_up(&idmap->gid_map, __vfsgid_val(vfsgid));
         if (gid == (gid_t)-1)
                 return INVALID_GID;
         if (initial_idmapping(fs_userns))
@@ -228,16 +198,91 @@ int vfsgid_in_group_p(vfsgid_t vfsgid)
  #endif
  EXPORT_SYMBOL_GPL(vfsgid_in_group_p);
  
+static int copy_mnt_idmap(struct uid_gid_map *map_from,
+                         struct uid_gid_map *map_to)
+{
+       struct uid_gid_extent *forward, *reverse;
+       u32 nr_extents = READ_ONCE(map_from->nr_extents);
+       /* Pairs with smp_wmb() when writing the idmapping. */
+       smp_rmb();
+
+       /*
+        * Don't blindly copy @map_to into @map_from if nr_extents is
+        * smaller or equal to UID_GID_MAP_MAX_BASE_EXTENTS. Since we
+        * read @nr_extents someone could have written an idmapping and
+        * then we might end up with inconsistent data. So just don't do
+        * anything at all.
+        */
+       if (nr_extents == 0)
+               return 0;
+
+       /*
+        * Here we know that nr_extents is greater than zero which means
+        * a map has been written. Since idmappings can't be changed
+        * once they have been written we know that we can safely copy
+        * from @map_to into @map_from.
+        */
+
+       if (nr_extents <= UID_GID_MAP_MAX_BASE_EXTENTS) {
+               *map_to = *map_from;
+               return 0;
+       }
+
+       forward = kmemdup(map_from->forward,
+                         nr_extents * sizeof(struct uid_gid_extent),
+                         GFP_KERNEL_ACCOUNT);
+       if (!forward)
+               return -ENOMEM;
+
+       reverse = kmemdup(map_from->reverse,
+                         nr_extents * sizeof(struct uid_gid_extent),
+                         GFP_KERNEL_ACCOUNT);
+       if (!reverse) {
+               kfree(forward);
+               return -ENOMEM;
+       }
+
+       /*
+        * The idmapping isn't exposed anywhere so we don't need to care
+        * about ordering between extent pointers and @nr_extents
+        * initialization.
+        */
+       map_to->forward = forward;
+       map_to->reverse = reverse;
+       map_to->nr_extents = nr_extents;
+       return 0;
+}
+
+static void free_mnt_idmap(struct mnt_idmap *idmap)
+{
+       if (idmap->uid_map.nr_extents > UID_GID_MAP_MAX_BASE_EXTENTS) {
+               kfree(idmap->uid_map.forward);
+               kfree(idmap->uid_map.reverse);
+       }
+       if (idmap->gid_map.nr_extents > UID_GID_MAP_MAX_BASE_EXTENTS) {
+               kfree(idmap->gid_map.forward);
+               kfree(idmap->gid_map.reverse);
+       }
+       kfree(idmap);
+}
+
  struct mnt_idmap *alloc_mnt_idmap(struct user_namespace *mnt_userns)
  {
         struct mnt_idmap *idmap;
+       int ret;
  
         idmap = kzalloc(sizeof(struct mnt_idmap), GFP_KERNEL_ACCOUNT);
         if (!idmap)
                 return ERR_PTR(-ENOMEM);
  
-       idmap->owner = get_user_ns(mnt_userns);
         refcount_set(&idmap->count, 1);
+       ret = copy_mnt_idmap(&mnt_userns->uid_map, &idmap->uid_map);
+       if (!ret)
+               ret = copy_mnt_idmap(&mnt_userns->gid_map, &idmap->gid_map);
+       if (ret) {
+               free_mnt_idmap(idmap);
+               idmap = ERR_PTR(ret);
+       }
         return idmap;
  }
  
@@ -267,9 +312,7 @@ EXPORT_SYMBOL_GPL(mnt_idmap_get);
   */
  void mnt_idmap_put(struct mnt_idmap *idmap)
  {
-       if (idmap != &nop_mnt_idmap && refcount_dec_and_test(&idmap->count)) {
-               put_user_ns(idmap->owner);
-               kfree(idmap);
-       }
+       if (idmap != &nop_mnt_idmap && refcount_dec_and_test(&idmap->count))
+               free_mnt_idmap(idmap);
  }
  EXPORT_SYMBOL_GPL(mnt_idmap_put);
diff --git a/fs/namei.c b/fs/namei.c

index f0ead1858267cfbb0ac912e9d132511605fec9fd..faae721e4d637c7eb9de679740044862f4d997c8 100644 (file)
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -289,7 +289,7 @@ EXPORT_SYMBOL(putname);
   * the vfsmount must be passed through @idmap. This function will then take
   * care to map the inode according to @idmap before checking permissions.
   * On non-idmapped mounts or if permission checking is to be performed on the
- * raw inode simply passs @nop_mnt_idmap.
+ * raw inode simply pass @nop_mnt_idmap.
   */
  static int check_acl(struct mnt_idmap *idmap,
                      struct inode *inode, int mask)
@@ -334,7 +334,7 @@ static int check_acl(struct mnt_idmap *idmap,
   * the vfsmount must be passed through @idmap. This function will then take
   * care to map the inode according to @idmap before checking permissions.
   * On non-idmapped mounts or if permission checking is to be performed on the
- * raw inode simply passs @nop_mnt_idmap.
+ * raw inode simply pass @nop_mnt_idmap.
   */
  static int acl_permission_check(struct mnt_idmap *idmap,
                                 struct inode *inode, int mask)
@@ -395,7 +395,7 @@ static int acl_permission_check(struct mnt_idmap *idmap,
   * the vfsmount must be passed through @idmap. This function will then take
   * care to map the inode according to @idmap before checking permissions.
   * On non-idmapped mounts or if permission checking is to be performed on the
- * raw inode simply passs @nop_mnt_idmap.
+ * raw inode simply pass @nop_mnt_idmap.
   */
  int generic_permission(struct mnt_idmap *idmap, struct inode *inode,
                        int mask)
@@ -2467,7 +2467,7 @@ static int handle_lookup_down(struct nameidata *nd)
         return PTR_ERR(step_into(nd, WALK_NOFOLLOW, nd->path.dentry));
  }
  
-/* Returns 0 and nd will be valid on success; Retuns error, otherwise. */
+/* Returns 0 and nd will be valid on success; Returns error, otherwise. */
  static int path_lookupat(struct nameidata *nd, unsigned flags, struct path *path)
  {
         const char *s = path_init(nd, flags);
@@ -2522,7 +2522,7 @@ int filename_lookup(int dfd, struct filename *name, unsigned flags,
         return retval;
  }
  
-/* Returns 0 and nd will be valid on success; Retuns error, otherwise. */
+/* Returns 0 and nd will be valid on success; Returns error, otherwise. */
  static int path_parentat(struct nameidata *nd, unsigned flags,
                                 struct path *parent)
  {
@@ -3158,7 +3158,7 @@ static inline umode_t vfs_prepare_mode(struct mnt_idmap *idmap,
   * the vfsmount must be passed through @idmap. This function will then take
   * care to map the inode according to @idmap before checking permissions.
   * On non-idmapped mounts or if permission checking is to be performed on the
- * raw inode simply passs @nop_mnt_idmap.
+ * raw inode simply pass @nop_mnt_idmap.
   */
  int vfs_create(struct mnt_idmap *idmap, struct inode *dir,
                struct dentry *dentry, umode_t mode, bool want_excl)
@@ -3646,7 +3646,7 @@ static int do_open(struct nameidata *nd,
   * the vfsmount must be passed through @idmap. This function will then take
   * care to map the inode according to @idmap before checking permissions.
   * On non-idmapped mounts or if permission checking is to be performed on the
- * raw inode simply passs @nop_mnt_idmap.
+ * raw inode simply pass @nop_mnt_idmap.
   */
  static int vfs_tmpfile(struct mnt_idmap *idmap,
                        const struct path *parentpath,
@@ -3951,7 +3951,7 @@ EXPORT_SYMBOL(user_path_create);
   * the vfsmount must be passed through @idmap. This function will then take
   * care to map the inode according to @idmap before checking permissions.
   * On non-idmapped mounts or if permission checking is to be performed on the
- * raw inode simply passs @nop_mnt_idmap.
+ * raw inode simply pass @nop_mnt_idmap.
   */
  int vfs_mknod(struct mnt_idmap *idmap, struct inode *dir,
               struct dentry *dentry, umode_t mode, dev_t dev)
@@ -4077,7 +4077,7 @@ SYSCALL_DEFINE3(mknod, const char __user *, filename, umode_t, mode, unsigned, d
   * the vfsmount must be passed through @idmap. This function will then take
   * care to map the inode according to @idmap before checking permissions.
   * On non-idmapped mounts or if permission checking is to be performed on the
- * raw inode simply passs @nop_mnt_idmap.
+ * raw inode simply pass @nop_mnt_idmap.
   */
  int vfs_mkdir(struct mnt_idmap *idmap, struct inode *dir,
               struct dentry *dentry, umode_t mode)
@@ -4158,7 +4158,7 @@ SYSCALL_DEFINE2(mkdir, const char __user *, pathname, umode_t, mode)
   * the vfsmount must be passed through @idmap. This function will then take
   * care to map the inode according to @idmap before checking permissions.
   * On non-idmapped mounts or if permission checking is to be performed on the
- * raw inode simply passs @nop_mnt_idmap.
+ * raw inode simply pass @nop_mnt_idmap.
   */
  int vfs_rmdir(struct mnt_idmap *idmap, struct inode *dir,
                      struct dentry *dentry)
@@ -4287,7 +4287,7 @@ SYSCALL_DEFINE1(rmdir, const char __user *, pathname)
   * the vfsmount must be passed through @idmap. This function will then take
   * care to map the inode according to @idmap before checking permissions.
   * On non-idmapped mounts or if permission checking is to be performed on the
- * raw inode simply passs @nop_mnt_idmap.
+ * raw inode simply pass @nop_mnt_idmap.
   */
  int vfs_unlink(struct mnt_idmap *idmap, struct inode *dir,
                struct dentry *dentry, struct inode **delegated_inode)
@@ -4440,7 +4440,7 @@ SYSCALL_DEFINE1(unlink, const char __user *, pathname)
   * the vfsmount must be passed through @idmap. This function will then take
   * care to map the inode according to @idmap before checking permissions.
   * On non-idmapped mounts or if permission checking is to be performed on the
- * raw inode simply passs @nop_mnt_idmap.
+ * raw inode simply pass @nop_mnt_idmap.
   */
  int vfs_symlink(struct mnt_idmap *idmap, struct inode *dir,
                 struct dentry *dentry, const char *oldname)
@@ -4532,7 +4532,7 @@ SYSCALL_DEFINE2(symlink, const char __user *, oldname, const char __user *, newn
   * the vfsmount must be passed through @idmap. This function will then take
   * care to map the inode according to @idmap before checking permissions.
   * On non-idmapped mounts or if permission checking is to be performed on the
- * raw inode simply passs @nop_mnt_idmap.
+ * raw inode simply pass @nop_mnt_idmap.
   */
  int vfs_link(struct dentry *old_dentry, struct mnt_idmap *idmap,
              struct inode *dir, struct dentry *new_dentry,
diff --git a/fs/namespace.c b/fs/namespace.c

index fbf0e596fcd30c9bae8d8cc1fbe09cf309f02334..78366f114515f5841b1a5ddbc679e68f75692c21 100644 (file)
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -3026,6 +3026,7 @@ static inline bool path_overmounted(const struct path *path)
   * can_move_mount_beneath - check that we can mount beneath the top mount
   * @from: mount to mount beneath
   * @to:   mount under which to mount
+ * @mp:   mountpoint of @to
   *
   * - Make sure that @to->dentry is actually the root of a mount under
   *   which we can mount another mount.
@@ -4288,7 +4289,7 @@ static int can_idmap_mount(const struct mount_kattr *kattr, struct mount *mnt)
          * Creating an idmapped mount with the filesystem wide idmapping
          * doesn't make sense so block that. We don't allow mushy semantics.
          */
-       if (!check_fsmapping(kattr->mnt_idmap, m->mnt_sb))
+       if (kattr->mnt_userns == m->mnt_sb->s_user_ns)
                 return -EINVAL;
  
         /*
diff --git a/fs/nfs/write.c b/fs/nfs/write.c

index b664caea8b4e6704bc0b46e8388487999697b80b..7248705faef47cbf60b394d4921d2c92831e039f 100644 (file)
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -192,13 +192,13 @@ static struct nfs_page *nfs_folio_find_private_request(struct folio *folio)
  
         if (!folio_test_private(folio))
                 return NULL;
-       spin_lock(&mapping->private_lock);
+       spin_lock(&mapping->i_private_lock);
         req = nfs_folio_private_request(folio);
         if (req) {
                 WARN_ON_ONCE(req->wb_head != req);
                 kref_get(&req->wb_kref);
         }
-       spin_unlock(&mapping->private_lock);
+       spin_unlock(&mapping->i_private_lock);
         return req;
  }
  
@@ -769,13 +769,13 @@ static void nfs_inode_add_request(struct nfs_page *req)
          * Swap-space should not get truncated. Hence no need to plug the race
          * with invalidate/truncate.
          */
-       spin_lock(&mapping->private_lock);
+       spin_lock(&mapping->i_private_lock);
         if (likely(!folio_test_swapcache(folio))) {
                 set_bit(PG_MAPPED, &req->wb_flags);
                 folio_set_private(folio);
                 folio->private = req;
         }
-       spin_unlock(&mapping->private_lock);
+       spin_unlock(&mapping->i_private_lock);
         atomic_long_inc(&nfsi->nrequests);
         /* this a head request for a page group - mark it as having an
          * extra reference so sub groups can follow suit.
@@ -796,13 +796,13 @@ static void nfs_inode_remove_request(struct nfs_page *req)
                 struct folio *folio = nfs_page_to_folio(req->wb_head);
                 struct address_space *mapping = folio_file_mapping(folio);
  
-               spin_lock(&mapping->private_lock);
+               spin_lock(&mapping->i_private_lock);
                 if (likely(folio && !folio_test_swapcache(folio))) {
                         folio->private = NULL;
                         folio_clear_private(folio);
                         clear_bit(PG_MAPPED, &req->wb_head->wb_flags);
                 }
-               spin_unlock(&mapping->private_lock);
+               spin_unlock(&mapping->i_private_lock);
         }
  
         if (test_and_clear_bit(PG_INODE_REF, &req->wb_flags)) {
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c

index f861f3a0bf5cf244c40a70b0d111d0b153a42ee9..2ead36dfa2a39cad898054a547665c580ef6e006 100644 (file)
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c
@@ -214,7 +214,7 @@ static bool nilfs_dirty_folio(struct address_space *mapping,
         /*
          * The page may not be locked, eg if called from try_to_unmap_one()
          */
-       spin_lock(&mapping->private_lock);
+       spin_lock(&mapping->i_private_lock);
         head = folio_buffers(folio);
         if (head) {
                 struct buffer_head *bh = head;
@@ -230,7 +230,7 @@ static bool nilfs_dirty_folio(struct address_space *mapping,
         } else if (ret) {
                 nr_dirty = 1 << (folio_shift(folio) - inode->i_blkbits);
         }
-       spin_unlock(&mapping->private_lock);
+       spin_unlock(&mapping->i_private_lock);
  
         if (nr_dirty)
                 nilfs_set_file_dirty(inode, nr_dirty);
diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c

index 71e31e789b292e0f3fa645219134423ebd647dbf..548f3b51aa5f464800f9632ec3a3f6e59c6b65a1 100644 (file)
--- a/fs/ntfs/aops.c
+++ b/fs/ntfs/aops.c
@@ -1690,7 +1690,7 @@ const struct address_space_operations ntfs_mst_aops = {
   *
   * If the page does not have buffers, we create them and set them uptodate.
   * The page may not be locked which is why we need to handle the buffers under
- * the mapping->private_lock.  Once the buffers are marked dirty we no longer
+ * the mapping->i_private_lock.  Once the buffers are marked dirty we no longer
   * need the lock since try_to_free_buffers() does not free dirty buffers.
   */
  void mark_ntfs_record_dirty(struct page *page, const unsigned int ofs) {
@@ -1702,11 +1702,11 @@ void mark_ntfs_record_dirty(struct page *page, const unsigned int ofs) {
         BUG_ON(!PageUptodate(page));
         end = ofs + ni->itype.index.block_size;
         bh_size = VFS_I(ni)->i_sb->s_blocksize;
-       spin_lock(&mapping->private_lock);
+       spin_lock(&mapping->i_private_lock);
         if (unlikely(!page_has_buffers(page))) {
-               spin_unlock(&mapping->private_lock);
+               spin_unlock(&mapping->i_private_lock);
                 bh = head = alloc_page_buffers(page, bh_size, true);
-               spin_lock(&mapping->private_lock);
+               spin_lock(&mapping->i_private_lock);
                 if (likely(!page_has_buffers(page))) {
                         struct buffer_head *tail;
  
@@ -1730,7 +1730,7 @@ void mark_ntfs_record_dirty(struct page *page, const unsigned int ofs) {
                         break;
                 set_buffer_dirty(bh);
         } while ((bh = bh->b_this_page) != head);
-       spin_unlock(&mapping->private_lock);
+       spin_unlock(&mapping->i_private_lock);
         filemap_dirty_folio(mapping, page_folio(page));
         if (unlikely(buffers_to_free)) {
                 do {
diff --git a/fs/ntfs/dir.c b/fs/ntfs/dir.c

index 4596c90e7b7cf096e0c9b590dfc4ef59eb3fe9b8..629723a8d7125f12a04fc8c1a3dbf6afc9dc286f 100644 (file)
--- a/fs/ntfs/dir.c
+++ b/fs/ntfs/dir.c
@@ -1462,7 +1462,8 @@ static int ntfs_dir_open(struct inode *vi, struct file *filp)
  /**
   * ntfs_dir_fsync - sync a directory to disk
   * @filp:      directory to be synced
- * @dentry:    dentry describing the directory to sync
+ * @start:     offset in bytes of the beginning of data range to sync
+ * @end:       offset in bytes of the end of data range (inclusive)
   * @datasync:  if non-zero only flush user data and not metadata
   *
   * Data integrity sync of a directory to disk.  Used for fsync, fdatasync, and
diff --git a/fs/open.c b/fs/open.c

index 48775329f3f19abd69a2adf85515369b52b97d34..328dc6ef18836dad1f68e88789c684898d0403b0 100644 (file)
--- a/fs/open.c
+++ b/fs/open.c
@@ -442,7 +442,8 @@ static const struct cred *access_override_creds(void)
          * 'get_current_cred()' function), that will clear the
          * non_rcu field, because now that other user may be
          * expecting RCU freeing. But normal thread-synchronous
-        * cred accesses will keep things non-RCY.
+        * cred accesses will keep things non-racy to avoid RCU
+        * freeing.
          */
         override_cred->non_rcu = 1;
  
diff --git a/fs/pipe.c b/fs/pipe.c

index 804a7d78945217efd3b5394a8a8e6e7d605a937a..8d9286a1f2e8506041bd53065495fdf4e19f889b 100644 (file)
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -446,6 +446,18 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
         bool was_empty = false;
         bool wake_next_writer = false;
  
+       /*
+        * Reject writing to watch queue pipes before the point where we lock
+        * the pipe.
+        * Otherwise, lockdep would be unhappy if the caller already has another
+        * pipe locked.
+        * If we had to support locking a normal pipe and a notification pipe at
+        * the same time, we could set up lockdep annotations for that, but
+        * since we don't actually need that, it's simpler to just bail here.
+        */
+       if (pipe_has_watch_queue(pipe))
+               return -EXDEV;
+
         /* Null write succeeds. */
         if (unlikely(total_len == 0))
                 return 0;
@@ -458,11 +470,6 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
                 goto out;
         }
  
-       if (pipe_has_watch_queue(pipe)) {
-               ret = -EXDEV;
-               goto out;
-       }
-
         /*
          * If it wasn't empty we try to merge new data into
          * the last buffer.
@@ -1317,6 +1324,11 @@ int pipe_resize_ring(struct pipe_inode_info *pipe, unsigned int nr_slots)
         pipe->tail = tail;
         pipe->head = head;
  
+       if (!pipe_has_watch_queue(pipe)) {
+               pipe->max_usage = nr_slots;
+               pipe->nr_accounted = nr_slots;
+       }
+
         spin_unlock_irq(&pipe->rd_wait.lock);
  
         /* This might have made more room for writers */
@@ -1368,8 +1380,6 @@ static long pipe_set_size(struct pipe_inode_info *pipe, unsigned int arg)
         if (ret < 0)
                 goto out_revert_acct;
  
-       pipe->max_usage = nr_slots;
-       pipe->nr_accounted = nr_slots;
         return pipe->max_usage * PAGE_SIZE;
  
  out_revert_acct:
diff --git a/fs/posix_acl.c b/fs/posix_acl.c

index a05fe94970ce7872c4b155e3ab3102c5712006fc..e1af20893ebe1ed400acb7c7215377868f623bbf 100644 (file)
--- a/fs/posix_acl.c
+++ b/fs/posix_acl.c
@@ -600,7 +600,7 @@ EXPORT_SYMBOL(__posix_acl_chmod);
   * the vfsmount must be passed through @idmap. This function will then
   * take care to map the inode according to @idmap before checking
   * permissions. On non-idmapped mounts or if permission checking is to be
- * performed on the raw inode simply passs @nop_mnt_idmap.
+ * performed on the raw inode simply pass @nop_mnt_idmap.
   */
  int
   posix_acl_chmod(struct mnt_idmap *idmap, struct dentry *dentry,
@@ -700,7 +700,7 @@ EXPORT_SYMBOL_GPL(posix_acl_create);
   * the vfsmount must be passed through @idmap. This function will then
   * take care to map the inode according to @idmap before checking
   * permissions. On non-idmapped mounts or if permission checking is to be
- * performed on the raw inode simply passs @nop_mnt_idmap.
+ * performed on the raw inode simply pass @nop_mnt_idmap.
   *
   * Called from set_acl inode operations.
   */
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c

index ef2eb12906da88c6fe3a227e82598020f0badc44..5de0f70e52ff4c0ba8296edb7e5e2f95b7045a25 100644 (file)
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -273,7 +273,8 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma)
         const char *name = NULL;
  
         if (file) {
-               struct inode *inode = file_inode(vma->vm_file);
+               const struct inode *inode = file_user_inode(vma->vm_file);
+
                 dev = inode->i_sb->s_dev;
                 ino = inode->i_ino;
                 pgoff = ((loff_t)vma->vm_pgoff) << PAGE_SHIFT;
diff --git a/fs/stat.c b/fs/stat.c

index 24bb0209e4599f934af06f6c0a9f984880b2fb34..0ab525f80a4939aab2e257a1d7781469a3da5bae 100644 (file)
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -41,7 +41,7 @@
   * the vfsmount must be passed through @idmap. This function will then
   * take care to map the inode according to @idmap before filling in the
   * uid and gid filds. On non-idmapped mounts or if permission checking is to be
- * performed on the raw inode simply passs @nop_mnt_idmap.
+ * performed on the raw inode simply pass @nop_mnt_idmap.
   */
  void generic_fillattr(struct mnt_idmap *idmap, u32 request_mask,
                       struct inode *inode, struct kstat *stat)
diff --git a/fs/super.c b/fs/super.c

index 076392396e724e210d565e6f78b35d88613e471d..6fe482371633396e54b9c454cdc59b03ac9be7a7 100644 (file)
--- a/fs/super.c
+++ b/fs/super.c
@@ -323,7 +323,7 @@ static void destroy_unused_super(struct super_block *s)
  static struct super_block *alloc_super(struct file_system_type *type, int flags,
                                        struct user_namespace *user_ns)
  {
-       struct super_block *s = kzalloc(sizeof(struct super_block),  GFP_USER);
+       struct super_block *s = kzalloc(sizeof(struct super_block), GFP_KERNEL);
         static const struct super_operations default_op;
         int i;
  
diff --git a/include/linux/eventfd.h b/include/linux/eventfd.h

index b9d83652c097abba51ea4f6863d7af375a3e1234..e32bee4345fb9fedfad3821fbbd1d8e3925fb97a 100644 (file)
--- a/include/linux/eventfd.h
+++ b/include/linux/eventfd.h
@@ -35,8 +35,7 @@ void eventfd_ctx_put(struct eventfd_ctx *ctx);
  struct file *eventfd_fget(int fd);
  struct eventfd_ctx *eventfd_ctx_fdget(int fd);
  struct eventfd_ctx *eventfd_ctx_fileget(struct file *file);
-__u64 eventfd_signal(struct eventfd_ctx *ctx, __u64 n);
-__u64 eventfd_signal_mask(struct eventfd_ctx *ctx, __u64 n, __poll_t mask);
+void eventfd_signal_mask(struct eventfd_ctx *ctx, __poll_t mask);
  int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, wait_queue_entry_t *wait,
                                   __u64 *cnt);
  void eventfd_ctx_do_read(struct eventfd_ctx *ctx, __u64 *cnt);
@@ -58,15 +57,8 @@ static inline struct eventfd_ctx *eventfd_ctx_fdget(int fd)
         return ERR_PTR(-ENOSYS);
  }
  
-static inline int eventfd_signal(struct eventfd_ctx *ctx, __u64 n)
+static inline void eventfd_signal_mask(struct eventfd_ctx *ctx, __poll_t mask)
  {
-       return -ENOSYS;
-}
-
-static inline int eventfd_signal_mask(struct eventfd_ctx *ctx, __u64 n,
-                                     unsigned mask)
-{
-       return -ENOSYS;
  }
  
  static inline void eventfd_ctx_put(struct eventfd_ctx *ctx)
@@ -92,5 +84,10 @@ static inline void eventfd_ctx_do_read(struct eventfd_ctx *ctx, __u64 *cnt)
  
  #endif
  
+static inline void eventfd_signal(struct eventfd_ctx *ctx)
+{
+       eventfd_signal_mask(ctx, 0);
+}
+
  #endif /* _LINUX_EVENTFD_H */
  
diff --git a/include/linux/fs.h b/include/linux/fs.h

index 354fd02e0e111bc229d4c6e4aa805256cc19b7d4..3d58376ed39e2bcb01f1c1a0c200151c9fb30f92 100644 (file)
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -463,9 +463,9 @@ extern const struct address_space_operations empty_aops;
   * @a_ops: Methods.
   * @flags: Error bits and flags (AS_*).
   * @wb_err: The most recent error which has occurred.
- * @private_lock: For use by the owner of the address_space.
- * @private_list: For use by the owner of the address_space.
- * @private_data: For use by the owner of the address_space.
+ * @i_private_lock: For use by the owner of the address_space.
+ * @i_private_list: For use by the owner of the address_space.
+ * @i_private_data: For use by the owner of the address_space.
   */
  struct address_space {
         struct inode            *host;
@@ -484,9 +484,9 @@ struct address_space {
         unsigned long           flags;
         struct rw_semaphore     i_mmap_rwsem;
         errseq_t                wb_err;
-       spinlock_t              private_lock;
-       struct list_head        private_list;
-       void                    *private_data;
+       spinlock_t              i_private_lock;
+       struct list_head        i_private_list;
+       void *                  i_private_data;
  } __attribute__((aligned(sizeof(long)))) __randomize_layout;
         /*
          * On most architectures that alignment is already the case; but
@@ -2525,20 +2525,28 @@ struct file *backing_file_open(const struct path *user_path, int flags,
  struct path *backing_file_user_path(struct file *f);
  
  /*
- * file_user_path - get the path to display for memory mapped file
- *
   * When mmapping a file on a stackable filesystem (e.g., overlayfs), the file
   * stored in ->vm_file is a backing file whose f_inode is on the underlying
- * filesystem.  When the mapped file path is displayed to user (e.g. via
- * /proc/<pid>/maps), this helper should be used to get the path to display
- * to the user, which is the path of the fd that user has requested to map.
+ * filesystem.  When the mapped file path and inode number are displayed to
+ * user (e.g. via /proc/<pid>/maps), these helpers should be used to get the
+ * path and inode number to display to the user, which is the path of the fd
+ * that user has requested to map and the inode number that would be returned
+ * by fstat() on that same fd.
   */
+/* Get the path to display in /proc/<pid>/maps */
  static inline const struct path *file_user_path(struct file *f)
  {
         if (unlikely(f->f_mode & FMODE_BACKING))
                 return backing_file_user_path(f);
         return &f->f_path;
  }
+/* Get the inode whose inode number to display in /proc/<pid>/maps */
+static inline const struct inode *file_user_inode(struct file *f)
+{
+       if (unlikely(f->f_mode & FMODE_BACKING))
+               return d_inode(backing_file_user_path(f)->dentry);
+       return file_inode(f);
+}
  
  static inline struct file *file_clone_open(struct file *file)
  {
diff --git a/include/linux/mnt_idmapping.h b/include/linux/mnt_idmapping.h

index b8da2db4ecd295672ec512800ffc3b50b363c63d..cd4d5c8781f54e0531bf713fd2d2957c97e41f63 100644 (file)
--- a/include/linux/mnt_idmapping.h
+++ b/include/linux/mnt_idmapping.h
@@ -244,7 +244,4 @@ static inline kgid_t mapped_fsgid(struct mnt_idmap *idmap,
         return from_vfsgid(idmap, fs_userns, VFSGIDT_INIT(current_fsgid()));
  }
  
-bool check_fsmapping(const struct mnt_idmap *idmap,
-                    const struct super_block *sb);
-
  #endif /* _LINUX_MNT_IDMAPPING_H */
diff --git a/include/linux/uidgid.h b/include/linux/uidgid.h

index b0542cd11aeb09f5d0c979e1acf31ad8d550bb6d..415a7ca2b8829ce9ec2077a34d1ffc915d5eb559 100644 (file)
--- a/include/linux/uidgid.h
+++ b/include/linux/uidgid.h
@@ -17,6 +17,7 @@
  
  struct user_namespace;
  extern struct user_namespace init_user_ns;
+struct uid_gid_map;
  
  typedef struct {
         uid_t val;
@@ -138,6 +139,9 @@ static inline bool kgid_has_mapping(struct user_namespace *ns, kgid_t gid)
         return from_kgid(ns, gid) != (gid_t) -1;
  }
  
+u32 map_id_down(struct uid_gid_map *map, u32 id);
+u32 map_id_up(struct uid_gid_map *map, u32 id);
+
  #else
  
  static inline kuid_t make_kuid(struct user_namespace *from, uid_t uid)
@@ -186,6 +190,15 @@ static inline bool kgid_has_mapping(struct user_namespace *ns, kgid_t gid)
         return gid_valid(gid);
  }
  
+static inline u32 map_id_down(struct uid_gid_map *map, u32 id)
+{
+       return id;
+}
+
+static inline u32 map_id_up(struct uid_gid_map *map, u32 id)
+{
+       return id;
+}
  #endif /* CONFIG_USER_NS */
  
  #endif /* _LINUX_UIDGID_H */
diff --git a/include/linux/writeback.h b/include/linux/writeback.h

index 083387c00f0c8b8e070705c38da59c97c8170dcb..6d0a14f7019d1e7b76a1931be98ff4f7fa9f0493 100644 (file)
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -193,7 +193,6 @@ void inode_io_list_del(struct inode *inode);
  /* writeback.h requires fs.h; it, too, is not included from here. */
  static inline void wait_on_inode(struct inode *inode)
  {
-       might_sleep();
         wait_on_bit(&inode->i_state, __I_NEW, TASK_UNINTERRUPTIBLE);
  }
  
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c

index ed254076c723332c3e9338b65ea3505be1608e71..70170a41eac48f57ec903691e0dc746adea3df22 100644 (file)
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -558,7 +558,7 @@ static void io_eventfd_ops(struct rcu_head *rcu)
         int ops = atomic_xchg(&ev_fd->ops, 0);
  
         if (ops & BIT(IO_EVENTFD_OP_SIGNAL_BIT))
-               eventfd_signal_mask(ev_fd->cq_ev_fd, 1, EPOLL_URING_WAKE);
+               eventfd_signal_mask(ev_fd->cq_ev_fd, EPOLL_URING_WAKE);
  
         /* IO_EVENTFD_OP_FREE_BIT may not be set here depending on callback
          * ordering in a race but if references are 0 we know we have to free
@@ -594,7 +594,7 @@ static void io_eventfd_signal(struct io_ring_ctx *ctx)
                 goto out;
  
         if (likely(eventfd_signal_allowed())) {
-               eventfd_signal_mask(ev_fd->cq_ev_fd, 1, EPOLL_URING_WAKE);
+               eventfd_signal_mask(ev_fd->cq_ev_fd, EPOLL_URING_WAKE);
         } else {
                 atomic_inc(&ev_fd->refs);
                 if (!atomic_fetch_or(BIT(IO_EVENTFD_OP_SIGNAL_BIT), &ev_fd->ops))
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c

index eabe8bcc70426519bdfef4b08e53a86451ba76c2..ce4d99df5f0eb4621ef32455eadfc57be314edee 100644 (file)
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -231,7 +231,7 @@ void __put_user_ns(struct user_namespace *ns)
  }
  EXPORT_SYMBOL(__put_user_ns);
  
-/**
+/*
   * struct idmap_key - holds the information necessary to find an idmapping in a
   * sorted idmap array. It is passed to cmp_map_id() as first argument.
   */
@@ -241,7 +241,7 @@ struct idmap_key {
         u32 count; /* == 0 unless used with map_id_range_down() */
  };
  
-/**
+/*
   * cmp_map_id - Function to be passed to bsearch() to find the requested
   * idmapping. Expects struct idmap_key to be passed via @k.
   */
@@ -271,7 +271,7 @@ static int cmp_map_id(const void *k, const void *e)
         return 1;
  }
  
-/**
+/*
   * map_id_range_down_max - Find idmap via binary search in ordered idmap array.
   * Can only be called if number of mappings exceeds UID_GID_MAP_MAX_BASE_EXTENTS.
   */
@@ -288,7 +288,7 @@ map_id_range_down_max(unsigned extents, struct uid_gid_map *map, u32 id, u32 cou
                        sizeof(struct uid_gid_extent), cmp_map_id);
  }
  
-/**
+/*
   * map_id_range_down_base - Find idmap via binary search in static extent array.
   * Can only be called if number of mappings is equal or less than
   * UID_GID_MAP_MAX_BASE_EXTENTS.
@@ -332,12 +332,12 @@ static u32 map_id_range_down(struct uid_gid_map *map, u32 id, u32 count)
         return id;
  }
  
-static u32 map_id_down(struct uid_gid_map *map, u32 id)
+u32 map_id_down(struct uid_gid_map *map, u32 id)
  {
         return map_id_range_down(map, id, 1);
  }
  
-/**
+/*
   * map_id_up_base - Find idmap via binary search in static extent array.
   * Can only be called if number of mappings is equal or less than
   * UID_GID_MAP_MAX_BASE_EXTENTS.
@@ -358,7 +358,7 @@ map_id_up_base(unsigned extents, struct uid_gid_map *map, u32 id)
         return NULL;
  }
  
-/**
+/*
   * map_id_up_max - Find idmap via binary search in ordered idmap array.
   * Can only be called if number of mappings exceeds UID_GID_MAP_MAX_BASE_EXTENTS.
   */
@@ -375,7 +375,7 @@ map_id_up_max(unsigned extents, struct uid_gid_map *map, u32 id)
                        sizeof(struct uid_gid_extent), cmp_map_id);
  }
  
-static u32 map_id_up(struct uid_gid_map *map, u32 id)
+u32 map_id_up(struct uid_gid_map *map, u32 id)
  {
         struct uid_gid_extent *extent;
         unsigned extents = map->nr_extents;
@@ -770,7 +770,7 @@ static bool mappings_overlap(struct uid_gid_map *new_map,
         return false;
  }
  
-/**
+/*
   * insert_extent - Safely insert a new idmap extent into struct uid_gid_map.
   * Takes care to allocate a 4K block of memory if the number of mappings exceeds
   * UID_GID_MAP_MAX_BASE_EXTENTS.
@@ -839,7 +839,7 @@ static int cmp_extents_reverse(const void *a, const void *b)
         return 0;
  }
  
-/**
+/*
   * sort_idmaps - Sorts an array of idmap entries.
   * Can only be called if number of mappings exceeds UID_GID_MAP_MAX_BASE_EXTENTS.
   */
diff --git a/kernel/watch_queue.c b/kernel/watch_queue.c

index 778b4056700ff5703e6c5c984c469c173dfcaa69..03b90d7d217540549aa8482c9af726aee38771d1 100644 (file)
--- a/kernel/watch_queue.c
+++ b/kernel/watch_queue.c
@@ -270,7 +270,7 @@ long watch_queue_set_size(struct pipe_inode_info *pipe, unsigned int nr_notes)
                 goto error;
  
         ret = -ENOMEM;
-       pages = kcalloc(sizeof(struct page *), nr_pages, GFP_KERNEL);
+       pages = kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL);
         if (!pages)
                 goto error;
  
diff --git a/mm/hugetlb.c b/mm/hugetlb.c

index 1169ef2f2176fa2cd1ca0bce159ce7d31abf0359..38c4477fda6a0a0714ee6cc659f5869e7f42ef26 100644 (file)
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1141,7 +1141,7 @@ static inline struct resv_map *inode_resv_map(struct inode *inode)
          * The VERY common case is inode->mapping == &inode->i_data but,
          * this may not be true for device special inodes.
          */
-       return (struct resv_map *)(&inode->i_data)->private_data;
+       return (struct resv_map *)(&inode->i_data)->i_private_data;
  }
  
  static struct resv_map *vma_resv_map(struct vm_area_struct *vma)
diff --git a/mm/memcontrol.c b/mm/memcontrol.c

index 774bd6e21e2788ac1ee094c84176b488543fd6c3..dfbb1d3b77b78d001cc3ee2887d67a9c6d7b3aa4 100644 (file)
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -4378,7 +4378,7 @@ static void __mem_cgroup_threshold(struct mem_cgroup *memcg, bool swap)
          * only one element of the array here.
          */
         for (; i >= 0 && unlikely(t->entries[i].threshold > usage); i--)
-               eventfd_signal(t->entries[i].eventfd, 1);
+               eventfd_signal(t->entries[i].eventfd);
  
         /* i = current_threshold + 1 */
         i++;
@@ -4390,7 +4390,7 @@ static void __mem_cgroup_threshold(struct mem_cgroup *memcg, bool swap)
          * only one element of the array here.
          */
         for (; i < t->size && unlikely(t->entries[i].threshold <= usage); i++)
-               eventfd_signal(t->entries[i].eventfd, 1);
+               eventfd_signal(t->entries[i].eventfd);
  
         /* Update current_threshold */
         t->current_threshold = i - 1;
@@ -4430,7 +4430,7 @@ static int mem_cgroup_oom_notify_cb(struct mem_cgroup *memcg)
         spin_lock(&memcg_oom_lock);
  
         list_for_each_entry(ev, &memcg->oom_notify, list)
-               eventfd_signal(ev->eventfd, 1);
+               eventfd_signal(ev->eventfd);
  
         spin_unlock(&memcg_oom_lock);
         return 0;
@@ -4649,7 +4649,7 @@ static int mem_cgroup_oom_register_event(struct mem_cgroup *memcg,
  
         /* already in OOM ? */
         if (memcg->under_oom)
-               eventfd_signal(eventfd, 1);
+               eventfd_signal(eventfd);
         spin_unlock(&memcg_oom_lock);
  
         return 0;
@@ -4941,7 +4941,7 @@ static void memcg_event_remove(struct work_struct *work)
         event->unregister_event(memcg, event->eventfd);
  
         /* Notify userspace the event is going away. */
-       eventfd_signal(event->eventfd, 1);
+       eventfd_signal(event->eventfd);
  
         eventfd_ctx_put(event->eventfd);
         kfree(event);
diff --git a/mm/migrate.c b/mm/migrate.c

index 35a88334bb3c2ffa9b641f7fc8b9abe0ee53f04d..377f55ebf7f496f6bb8738efd3721f470fdbc6ff 100644 (file)
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -746,7 +746,7 @@ static int __buffer_migrate_folio(struct address_space *mapping,
  
  recheck_buffers:
                 busy = false;
-               spin_lock(&mapping->private_lock);
+               spin_lock(&mapping->i_private_lock);
                 bh = head;
                 do {
                         if (atomic_read(&bh->b_count)) {
@@ -760,7 +760,7 @@ recheck_buffers:
                                 rc = -EAGAIN;
                                 goto unlock_buffers;
                         }
-                       spin_unlock(&mapping->private_lock);
+                       spin_unlock(&mapping->i_private_lock);
                         invalidate_bh_lrus();
                         invalidated = true;
                         goto recheck_buffers;
@@ -787,7 +787,7 @@ recheck_buffers:
         rc = MIGRATEPAGE_SUCCESS;
  unlock_buffers:
         if (check_refs)
-               spin_unlock(&mapping->private_lock);
+               spin_unlock(&mapping->i_private_lock);
         bh = head;
         do {
                 unlock_buffer(bh);
diff --git a/mm/vmpressure.c b/mm/vmpressure.c

index 22c6689d930277365f20029991223d98b0b983cd..bd5183dfd8791fef6cbd8ae35dddf59ee88516fe 100644 (file)
--- a/mm/vmpressure.c
+++ b/mm/vmpressure.c
@@ -169,7 +169,7 @@ static bool vmpressure_event(struct vmpressure *vmpr,
                         continue;
                 if (level < ev->level)
                         continue;
-               eventfd_signal(ev->efd, 1);
+               eventfd_signal(ev->efd);
                 ret = true;
         }
         mutex_unlock(&vmpr->events_lock);
diff --git a/samples/vfio-mdev/mtty.c b/samples/vfio-mdev/mtty.c

index 69ba0281f9e0baed7e88d56aea0d5d4e6938d48e..2284b37512402b89b449a6d1af0b21c40af1c37d 100644 (file)
--- a/samples/vfio-mdev/mtty.c
+++ b/samples/vfio-mdev/mtty.c
@@ -234,10 +234,10 @@ static void mtty_trigger_interrupt(struct mdev_state *mdev_state)
  
         if (is_msi(mdev_state)) {
                 if (mdev_state->msi_evtfd)
-                       eventfd_signal(mdev_state->msi_evtfd, 1);
+                       eventfd_signal(mdev_state->msi_evtfd);
         } else if (is_intx(mdev_state)) {
                 if (mdev_state->intx_evtfd && !mdev_state->intx_mask) {
-                       eventfd_signal(mdev_state->intx_evtfd, 1);
+                       eventfd_signal(mdev_state->intx_evtfd);
                         mdev_state->intx_mask = true;
                 }
         }
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile

index 3b2061d1c1a527c9868d8a80c18a1a91781f27e4..0939a40abb28332e8f79fd10e3ee954bb828726d 100644 (file)
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -26,6 +26,7 @@ TARGETS += filesystems
  TARGETS += filesystems/binderfs
  TARGETS += filesystems/epoll
  TARGETS += filesystems/fat
+TARGETS += filesystems/overlayfs
  TARGETS += firmware
  TARGETS += fpu
  TARGETS += ftrace
diff --git a/tools/testing/selftests/filesystems/overlayfs/.gitignore b/tools/testing/selftests/filesystems/overlayfs/.gitignore

new file mode 100644 (file)

index 0000000..52ae618
--- /dev/null
+++ b/tools/testing/selftests/filesystems/overlayfs/.gitignore
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+dev_in_maps
diff --git a/tools/testing/selftests/filesystems/overlayfs/Makefile b/tools/testing/selftests/filesystems/overlayfs/Makefile

new file mode 100644 (file)

index 0000000..56b2b48
--- /dev/null
+++ b/tools/testing/selftests/filesystems/overlayfs/Makefile
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0
+
+TEST_GEN_PROGS := dev_in_maps
+
+CFLAGS := -Wall -Werror
+
+include ../../lib.mk
diff --git a/tools/testing/selftests/filesystems/overlayfs/dev_in_maps.c b/tools/testing/selftests/filesystems/overlayfs/dev_in_maps.c

new file mode 100644 (file)

index 0000000..e19ab0e
--- /dev/null
+++ b/tools/testing/selftests/filesystems/overlayfs/dev_in_maps.c
@@ -0,0 +1,182 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+
+#include <inttypes.h>
+#include <unistd.h>
+#include <stdio.h>
+
+#include <linux/unistd.h>
+#include <linux/types.h>
+#include <linux/mount.h>
+#include <sys/syscall.h>
+#include <sys/stat.h>
+#include <sys/mount.h>
+#include <sys/mman.h>
+#include <sched.h>
+#include <fcntl.h>
+
+#include "../../kselftest.h"
+#include "log.h"
+
+static int sys_fsopen(const char *fsname, unsigned int flags)
+{
+       return syscall(__NR_fsopen, fsname, flags);
+}
+
+static int sys_fsconfig(int fd, unsigned int cmd, const char *key, const char *value, int aux)
+{
+       return syscall(__NR_fsconfig, fd, cmd, key, value, aux);
+}
+
+static int sys_fsmount(int fd, unsigned int flags, unsigned int attr_flags)
+{
+       return syscall(__NR_fsmount, fd, flags, attr_flags);
+}
+
+static int sys_move_mount(int from_dfd, const char *from_pathname,
+                         int to_dfd, const char *to_pathname,
+                         unsigned int flags)
+{
+       return syscall(__NR_move_mount, from_dfd, from_pathname, to_dfd, to_pathname, flags);
+}
+
+static long get_file_dev_and_inode(void *addr, struct statx *stx)
+{
+       char buf[4096];
+       FILE *mapf;
+
+       mapf = fopen("/proc/self/maps", "r");
+       if (mapf == NULL)
+               return pr_perror("fopen(/proc/self/maps)");
+
+       while (fgets(buf, sizeof(buf), mapf)) {
+               unsigned long start, end;
+               uint32_t maj, min;
+               __u64 ino;
+
+               if (sscanf(buf, "%lx-%lx %*s %*s %x:%x %llu",
+                               &start, &end, &maj, &min, &ino) != 5)
+                       return pr_perror("unable to parse: %s", buf);
+               if (start == (unsigned long)addr) {
+                       stx->stx_dev_major = maj;
+                       stx->stx_dev_minor = min;
+                       stx->stx_ino = ino;
+                       return 0;
+               }
+       }
+
+       return pr_err("unable to find the mapping");
+}
+
+static int ovl_mount(void)
+{
+       int tmpfs, fsfd, ovl;
+
+       fsfd = sys_fsopen("tmpfs", 0);
+       if (fsfd == -1)
+               return pr_perror("fsopen(tmpfs)");
+
+       if (sys_fsconfig(fsfd, FSCONFIG_CMD_CREATE, NULL, NULL, 0) == -1)
+               return pr_perror("FSCONFIG_CMD_CREATE");
+
+       tmpfs = sys_fsmount(fsfd, 0, 0);
+       if (tmpfs == -1)
+               return pr_perror("fsmount");
+
+       close(fsfd);
+
+       /* overlayfs can't be constructed on top of a detached mount. */
+       if (sys_move_mount(tmpfs, "", AT_FDCWD, "/tmp", MOVE_MOUNT_F_EMPTY_PATH))
+               return pr_perror("move_mount");
+       close(tmpfs);
+
+       if (mkdir("/tmp/w", 0755) == -1 ||
+           mkdir("/tmp/u", 0755) == -1 ||
+           mkdir("/tmp/l", 0755) == -1)
+               return pr_perror("mkdir");
+
+       fsfd = sys_fsopen("overlay", 0);
+       if (fsfd == -1)
+               return pr_perror("fsopen(overlay)");
+       if (sys_fsconfig(fsfd, FSCONFIG_SET_STRING, "source", "test", 0) == -1 ||
+           sys_fsconfig(fsfd, FSCONFIG_SET_STRING, "lowerdir", "/tmp/l", 0) == -1 ||
+           sys_fsconfig(fsfd, FSCONFIG_SET_STRING, "upperdir", "/tmp/u", 0) == -1 ||
+           sys_fsconfig(fsfd, FSCONFIG_SET_STRING, "workdir", "/tmp/w", 0) == -1)
+               return pr_perror("fsconfig");
+       if (sys_fsconfig(fsfd, FSCONFIG_CMD_CREATE, NULL, NULL, 0) == -1)
+               return pr_perror("fsconfig");
+       ovl = sys_fsmount(fsfd, 0, 0);
+       if (ovl == -1)
+               return pr_perror("fsmount");
+
+       return ovl;
+}
+
+/*
+ * Check that the file device and inode shown in /proc/pid/maps match values
+ * returned by stat(2).
+ */
+static int test(void)
+{
+       struct statx stx, mstx;
+       int ovl, fd;
+       void *addr;
+
+       ovl = ovl_mount();
+       if (ovl == -1)
+               return -1;
+
+       fd = openat(ovl, "test", O_RDWR | O_CREAT, 0644);
+       if (fd == -1)
+               return pr_perror("openat");
+
+       addr = mmap(NULL, 4096, PROT_READ | PROT_WRITE, MAP_FILE | MAP_SHARED, fd, 0);
+       if (addr == MAP_FAILED)
+               return pr_perror("mmap");
+
+       if (get_file_dev_and_inode(addr, &mstx))
+               return -1;
+       if (statx(fd, "", AT_EMPTY_PATH | AT_STATX_SYNC_AS_STAT, STATX_INO, &stx))
+               return pr_perror("statx");
+
+       if (stx.stx_dev_major != mstx.stx_dev_major ||
+           stx.stx_dev_minor != mstx.stx_dev_minor ||
+           stx.stx_ino != mstx.stx_ino)
+               return pr_fail("unmatched dev:ino %x:%x:%llx (expected %x:%x:%llx)\n",
+                       mstx.stx_dev_major, mstx.stx_dev_minor, mstx.stx_ino,
+                       stx.stx_dev_major, stx.stx_dev_minor, stx.stx_ino);
+
+       ksft_test_result_pass("devices are matched\n");
+       return 0;
+}
+
+int main(int argc, char **argv)
+{
+       int fsfd;
+
+       fsfd = sys_fsopen("overlay", 0);
+       if (fsfd == -1) {
+               ksft_test_result_skip("unable to create overlay mount\n");
+               return 1;
+       }
+       close(fsfd);
+
+       /* Create a new mount namespace to not care about cleaning test mounts. */
+       if (unshare(CLONE_NEWNS) == -1) {
+               ksft_test_result_skip("unable to create a new mount namespace\n");
+               return 1;
+       }
+
+       if (mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL) == -1) {
+               pr_perror("mount");
+               return 1;
+       }
+
+       ksft_set_plan(1);
+
+       if (test())
+               return 1;
+
+       ksft_exit_pass();
+       return 0;
+}
diff --git a/tools/testing/selftests/filesystems/overlayfs/log.h b/tools/testing/selftests/filesystems/overlayfs/log.h

new file mode 100644 (file)

index 0000000..db64df2
--- /dev/null
+++ b/tools/testing/selftests/filesystems/overlayfs/log.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __SELFTEST_TIMENS_LOG_H__
+#define __SELFTEST_TIMENS_LOG_H__
+
+#define pr_msg(fmt, lvl, ...)                                          \
+       ksft_print_msg("[%s] (%s:%d)\t" fmt "\n",                       \
+                       lvl, __FILE__, __LINE__, ##__VA_ARGS__)
+
+#define pr_p(func, fmt, ...)   func(fmt ": %m", ##__VA_ARGS__)
+
+#define pr_err(fmt, ...)                                               \
+       ({                                                              \
+               ksft_test_result_error(fmt "\n", ##__VA_ARGS__);                \
+               -1;                                                     \
+       })
+
+#define pr_fail(fmt, ...)                                      \
+       ({                                                      \
+               ksft_test_result_fail(fmt, ##__VA_ARGS__);      \
+               -1;                                             \
+       })
+
+#define pr_perror(fmt, ...)    pr_p(pr_err, fmt, ##__VA_ARGS__)
+
+#endif
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c

index 89912a17f5d576da3a06d0020ff1ed2e2c6bee3d..c0e230f4c3e9318a8d949054b739518995f58ce3 100644 (file)
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -61,7 +61,7 @@ static void irqfd_resampler_notify(struct kvm_kernel_irqfd_resampler *resampler)
  
         list_for_each_entry_srcu(irqfd, &resampler->list, resampler_link,
                                  srcu_read_lock_held(&resampler->kvm->irq_srcu))
-               eventfd_signal(irqfd->resamplefd, 1);
+               eventfd_signal(irqfd->resamplefd);
  }
  
  /*
@@ -786,7 +786,7 @@ ioeventfd_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this, gpa_t addr,
         if (!ioeventfd_in_range(p, addr, len, val))
                 return -EOPNOTSUPP;
  
-       eventfd_signal(p->eventfd, 1);
+       eventfd_signal(p->eventfd);
         return 0;
  }
author	Christian Brauner <brauner@kernel.org>
	Thu, 21 Dec 2023 12:21:52 +0000 (13:21 +0100)
committer	Christian Brauner <brauner@kernel.org>
	Thu, 21 Dec 2023 12:21:52 +0000 (13:21 +0100)
MAINTAINERS		patch \| blob \| blame \| history
arch/x86/kvm/hyperv.c		patch \| blob \| blame \| history
arch/x86/kvm/xen.c		patch \| blob \| blame \| history
drivers/accel/habanalabs/common/device.c		patch \| blob \| blame \| history
drivers/fpga/dfl.c		patch \| blob \| blame \| history
drivers/gpu/drm/drm_syncobj.c		patch \| blob \| blame \| history
drivers/gpu/drm/i915/gvt/interrupt.c		patch \| blob \| blame \| history
drivers/infiniband/hw/mlx5/devx.c		patch \| blob \| blame \| history
drivers/misc/ocxl/file.c		patch \| blob \| blame \| history
drivers/s390/cio/vfio_ccw_chp.c		patch \| blob \| blame \| history
drivers/s390/cio/vfio_ccw_drv.c		patch \| blob \| blame \| history
drivers/s390/cio/vfio_ccw_ops.c		patch \| blob \| blame \| history
drivers/s390/crypto/vfio_ap_ops.c		patch \| blob \| blame \| history
drivers/usb/gadget/function/f_fs.c		patch \| blob \| blame \| history
drivers/vdpa/vdpa_user/vduse_dev.c		patch \| blob \| blame \| history
drivers/vfio/fsl-mc/vfio_fsl_mc_intr.c		patch \| blob \| blame \| history
drivers/vfio/pci/vfio_pci_core.c		patch \| blob \| blame \| history
drivers/vfio/pci/vfio_pci_intrs.c		patch \| blob \| blame \| history
drivers/vfio/platform/vfio_platform_irq.c		patch \| blob \| blame \| history
drivers/vhost/vdpa.c		patch \| blob \| blame \| history
drivers/vhost/vhost.c		patch \| blob \| blame \| history
drivers/vhost/vhost.h		patch \| blob \| blame \| history
drivers/virt/acrn/ioeventfd.c		patch \| blob \| blame \| history
drivers/xen/privcmd.c		patch \| blob \| blame \| history
fs/aio.c		patch \| blob \| blame \| history
fs/attr.c		patch \| blob \| blame \| history
fs/btrfs/extent_io.c		patch \| blob \| blame \| history
fs/btrfs/subpage.c		patch \| blob \| blame \| history
fs/buffer.c		patch \| blob \| blame \| history
fs/dax.c		patch \| blob \| blame \| history
fs/direct-io.c		patch \| blob \| blame \| history
fs/eventfd.c		patch \| blob \| blame \| history
fs/ext4/inode.c		patch \| blob \| blame \| history
fs/gfs2/glock.c		patch \| blob \| blame \| history
fs/gfs2/ops_fstype.c		patch \| blob \| blame \| history
fs/hfsplus/wrapper.c		patch \| blob \| blame \| history
fs/hugetlbfs/inode.c		patch \| blob \| blame \| history
fs/inode.c		patch \| blob \| blame \| history
fs/mnt_idmapping.c		patch \| blob \| blame \| history
fs/namei.c		patch \| blob \| blame \| history
fs/namespace.c		patch \| blob \| blame \| history
fs/nfs/write.c		patch \| blob \| blame \| history
fs/nilfs2/inode.c		patch \| blob \| blame \| history
fs/ntfs/aops.c		patch \| blob \| blame \| history
fs/ntfs/dir.c		patch \| blob \| blame \| history
fs/open.c		patch \| blob \| blame \| history
fs/pipe.c		patch \| blob \| blame \| history
fs/posix_acl.c		patch \| blob \| blame \| history
fs/proc/task_mmu.c		patch \| blob \| blame \| history
fs/stat.c		patch \| blob \| blame \| history
fs/super.c		patch \| blob \| blame \| history
include/linux/eventfd.h		patch \| blob \| blame \| history
include/linux/fs.h		patch \| blob \| blame \| history
include/linux/mnt_idmapping.h		patch \| blob \| blame \| history
include/linux/uidgid.h		patch \| blob \| blame \| history
include/linux/writeback.h		patch \| blob \| blame \| history
io_uring/io_uring.c		patch \| blob \| blame \| history
kernel/user_namespace.c		patch \| blob \| blame \| history
kernel/watch_queue.c		patch \| blob \| blame \| history
mm/hugetlb.c		patch \| blob \| blame \| history
mm/memcontrol.c		patch \| blob \| blame \| history
mm/migrate.c		patch \| blob \| blame \| history
mm/vmpressure.c		patch \| blob \| blame \| history
samples/vfio-mdev/mtty.c		patch \| blob \| blame \| history
tools/testing/selftests/Makefile		patch \| blob \| blame \| history
tools/testing/selftests/filesystems/overlayfs/.gitignore	[new file with mode: 0644]	patch \| blob
tools/testing/selftests/filesystems/overlayfs/Makefile	[new file with mode: 0644]	patch \| blob
tools/testing/selftests/filesystems/overlayfs/dev_in_maps.c	[new file with mode: 0644]	patch \| blob
tools/testing/selftests/filesystems/overlayfs/log.h	[new file with mode: 0644]	patch \| blob
virt/kvm/eventfd.c		patch \| blob \| blame \| history