]> git.ipfire.org Git - thirdparty/kernel/stable.git/blobdiff - fs/dax.c
Merge tag 'libnvdimm-for-4.19_dax-memory-failure' of gitolite.kernel.org:pub/scm...
[thirdparty/kernel/stable.git] / fs / dax.c
index f76724139f80c50da39e37e6b95058b7095eabbc..f32d7125ad0f237d61173cd72383683ac380c4e4 100644 (file)
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -226,8 +226,8 @@ static inline void *unlock_slot(struct address_space *mapping, void **slot)
  *
  * Must be called with the i_pages lock held.
  */
-static void *get_unlocked_mapping_entry(struct address_space *mapping,
-                                       pgoff_t index, void ***slotp)
+static void *__get_unlocked_mapping_entry(struct address_space *mapping,
+               pgoff_t index, void ***slotp, bool (*wait_fn)(void))
 {
        void *entry, **slot;
        struct wait_exceptional_entry_queue ewait;
@@ -237,6 +237,8 @@ static void *get_unlocked_mapping_entry(struct address_space *mapping,
        ewait.wait.func = wake_exceptional_entry_func;
 
        for (;;) {
+               bool revalidate;
+
                entry = __radix_tree_lookup(&mapping->i_pages, index, NULL,
                                          &slot);
                if (!entry ||
@@ -251,14 +253,31 @@ static void *get_unlocked_mapping_entry(struct address_space *mapping,
                prepare_to_wait_exclusive(wq, &ewait.wait,
                                          TASK_UNINTERRUPTIBLE);
                xa_unlock_irq(&mapping->i_pages);
-               schedule();
+               revalidate = wait_fn();
                finish_wait(wq, &ewait.wait);
                xa_lock_irq(&mapping->i_pages);
+               if (revalidate)
+                       return ERR_PTR(-EAGAIN);
        }
 }
 
-static void dax_unlock_mapping_entry(struct address_space *mapping,
-                                    pgoff_t index)
+static bool entry_wait(void)
+{
+       schedule();
+       /*
+        * Never return an ERR_PTR() from
+        * __get_unlocked_mapping_entry(), just keep looping.
+        */
+       return false;
+}
+
+static void *get_unlocked_mapping_entry(struct address_space *mapping,
+               pgoff_t index, void ***slotp)
+{
+       return __get_unlocked_mapping_entry(mapping, index, slotp, entry_wait);
+}
+
+static void unlock_mapping_entry(struct address_space *mapping, pgoff_t index)
 {
        void *entry, **slot;
 
@@ -277,7 +296,7 @@ static void dax_unlock_mapping_entry(struct address_space *mapping,
 static void put_locked_mapping_entry(struct address_space *mapping,
                pgoff_t index)
 {
-       dax_unlock_mapping_entry(mapping, index);
+       unlock_mapping_entry(mapping, index);
 }
 
 /*
@@ -319,18 +338,27 @@ static unsigned long dax_radix_end_pfn(void *entry)
        for (pfn = dax_radix_pfn(entry); \
                        pfn < dax_radix_end_pfn(entry); pfn++)
 
-static void dax_associate_entry(void *entry, struct address_space *mapping)
+/*
+ * TODO: for reflink+dax we need a way to associate a single page with
+ * multiple address_space instances at different linear_page_index()
+ * offsets.
+ */
+static void dax_associate_entry(void *entry, struct address_space *mapping,
+               struct vm_area_struct *vma, unsigned long address)
 {
-       unsigned long pfn;
+       unsigned long size = dax_entry_size(entry), pfn, index;
+       int i = 0;
 
        if (IS_ENABLED(CONFIG_FS_DAX_LIMITED))
                return;
 
+       index = linear_page_index(vma, address & ~(size - 1));
        for_each_mapped_pfn(entry, pfn) {
                struct page *page = pfn_to_page(pfn);
 
                WARN_ON_ONCE(page->mapping);
                page->mapping = mapping;
+               page->index = index + i++;
        }
 }
 
@@ -348,6 +376,7 @@ static void dax_disassociate_entry(void *entry, struct address_space *mapping,
                WARN_ON_ONCE(trunc && page_ref_count(page) > 1);
                WARN_ON_ONCE(page->mapping && page->mapping != mapping);
                page->mapping = NULL;
+               page->index = 0;
        }
 }
 
@@ -364,6 +393,84 @@ static struct page *dax_busy_page(void *entry)
        return NULL;
 }
 
+static bool entry_wait_revalidate(void)
+{
+       rcu_read_unlock();
+       schedule();
+       rcu_read_lock();
+
+       /*
+        * Tell __get_unlocked_mapping_entry() to take a break, we need
+        * to revalidate page->mapping after dropping locks
+        */
+       return true;
+}
+
+bool dax_lock_mapping_entry(struct page *page)
+{
+       pgoff_t index;
+       struct inode *inode;
+       bool did_lock = false;
+       void *entry = NULL, **slot;
+       struct address_space *mapping;
+
+       rcu_read_lock();
+       for (;;) {
+               mapping = READ_ONCE(page->mapping);
+
+               if (!dax_mapping(mapping))
+                       break;
+
+               /*
+                * In the device-dax case there's no need to lock, a
+                * struct dev_pagemap pin is sufficient to keep the
+                * inode alive, and we assume we have dev_pagemap pin
+                * otherwise we would not have a valid pfn_to_page()
+                * translation.
+                */
+               inode = mapping->host;
+               if (S_ISCHR(inode->i_mode)) {
+                       did_lock = true;
+                       break;
+               }
+
+               xa_lock_irq(&mapping->i_pages);
+               if (mapping != page->mapping) {
+                       xa_unlock_irq(&mapping->i_pages);
+                       continue;
+               }
+               index = page->index;
+
+               entry = __get_unlocked_mapping_entry(mapping, index, &slot,
+                               entry_wait_revalidate);
+               if (!entry) {
+                       xa_unlock_irq(&mapping->i_pages);
+                       break;
+               } else if (IS_ERR(entry)) {
+                       WARN_ON_ONCE(PTR_ERR(entry) != -EAGAIN);
+                       continue;
+               }
+               lock_slot(mapping, slot);
+               did_lock = true;
+               xa_unlock_irq(&mapping->i_pages);
+               break;
+       }
+       rcu_read_unlock();
+
+       return did_lock;
+}
+
+void dax_unlock_mapping_entry(struct page *page)
+{
+       struct address_space *mapping = page->mapping;
+       struct inode *inode = mapping->host;
+
+       if (S_ISCHR(inode->i_mode))
+               return;
+
+       unlock_mapping_entry(mapping, page->index);
+}
+
 /*
  * Find radix tree entry at given index. If it points to an exceptional entry,
  * return it with the radix tree entry locked. If the radix tree doesn't
@@ -708,7 +815,7 @@ static void *dax_insert_mapping_entry(struct address_space *mapping,
        new_entry = dax_radix_locked_entry(pfn, flags);
        if (dax_entry_size(entry) != dax_entry_size(new_entry)) {
                dax_disassociate_entry(entry, mapping, false);
-               dax_associate_entry(new_entry, mapping);
+               dax_associate_entry(new_entry, mapping, vmf->vma, vmf->address);
        }
 
        if (dax_is_zero_entry(entry) || dax_is_empty_entry(entry)) {