]> git.ipfire.org Git - people/arne_f/kernel.git/blobdiff - mm/shmem.c
uaccess: Add non-pagefault user-space read functions
[people/arne_f/kernel.git] / mm / shmem.c
index 581e7baf04782264863429fb5603733ecd40a1be..24005c3b345ca80687f841e483bd2a8ad053b4fc 100644 (file)
@@ -2052,9 +2052,10 @@ unsigned long shmem_get_unmapped_area(struct file *file,
        /*
         * Our priority is to support MAP_SHARED mapped hugely;
         * and support MAP_PRIVATE mapped hugely too, until it is COWed.
-        * But if caller specified an address hint, respect that as before.
+        * But if caller specified an address hint and we allocated area there
+        * successfully, respect that as before.
         */
-       if (uaddr)
+       if (uaddr == addr)
                return addr;
 
        if (shmem_huge != SHMEM_HUGE_FORCE) {
@@ -2088,7 +2089,7 @@ unsigned long shmem_get_unmapped_area(struct file *file,
        if (inflated_len < len)
                return addr;
 
-       inflated_addr = get_area(NULL, 0, inflated_len, 0, flags);
+       inflated_addr = get_area(NULL, uaddr, inflated_len, 0, flags);
        if (IS_ERR_VALUE(inflated_addr))
                return addr;
        if (inflated_addr & ~PAGE_MASK)
@@ -2128,7 +2129,11 @@ int shmem_lock(struct file *file, int lock, struct user_struct *user)
        struct shmem_inode_info *info = SHMEM_I(inode);
        int retval = -ENOMEM;
 
-       spin_lock_irq(&info->lock);
+       /*
+        * What serializes the accesses to info->flags?
+        * ipc_lock_object() when called from shmctl_do_lock(),
+        * no serialization needed when called from shm_destroy().
+        */
        if (lock && !(info->flags & VM_LOCKED)) {
                if (!user_shm_lock(inode->i_size, user))
                        goto out_nomem;
@@ -2143,7 +2148,6 @@ int shmem_lock(struct file *file, int lock, struct user_struct *user)
        retval = 0;
 
 out_nomem:
-       spin_unlock_irq(&info->lock);
        return retval;
 }
 
@@ -2305,6 +2309,16 @@ static int shmem_mfill_atomic_pte(struct mm_struct *dst_mm,
        _dst_pte = mk_pte(page, dst_vma->vm_page_prot);
        if (dst_vma->vm_flags & VM_WRITE)
                _dst_pte = pte_mkwrite(pte_mkdirty(_dst_pte));
+       else {
+               /*
+                * We don't set the pte dirty if the vma has no
+                * VM_WRITE permission, so mark the page dirty or it
+                * could be freed from under us. We could do it
+                * unconditionally before unlock_page(), but doing it
+                * only if VM_WRITE is not set is faster.
+                */
+               set_page_dirty(page);
+       }
 
        dst_pte = pte_offset_map_lock(dst_mm, dst_pmd, dst_addr, &ptl);
 
@@ -2319,11 +2333,11 @@ static int shmem_mfill_atomic_pte(struct mm_struct *dst_mm,
 
        lru_cache_add_anon(page);
 
-       spin_lock(&info->lock);
+       spin_lock_irq(&info->lock);
        info->alloced++;
        inode->i_blocks += BLOCKS_PER_PAGE;
        shmem_recalc_inode(inode);
-       spin_unlock(&info->lock);
+       spin_unlock_irq(&info->lock);
 
        inc_mm_counter(dst_mm, mm_counter_file(page));
        page_add_file_rmap(page, false);
@@ -2338,6 +2352,7 @@ out:
        return ret;
 out_release_uncharge_unlock:
        pte_unmap_unlock(dst_pte, ptl);
+       ClearPageDirty(page);
        delete_from_page_cache(page);
 out_release_uncharge:
        mem_cgroup_cancel_charge(page, memcg, false);
@@ -2646,31 +2661,33 @@ static void shmem_tag_pins(struct address_space *mapping)
        void **slot;
        pgoff_t start;
        struct page *page;
+       unsigned int tagged = 0;
 
        lru_add_drain();
        start = 0;
-       rcu_read_lock();
 
+       spin_lock_irq(&mapping->tree_lock);
        radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) {
-               page = radix_tree_deref_slot(slot);
+               page = radix_tree_deref_slot_protected(slot, &mapping->tree_lock);
                if (!page || radix_tree_exception(page)) {
                        if (radix_tree_deref_retry(page)) {
                                slot = radix_tree_iter_retry(&iter);
                                continue;
                        }
                } else if (page_count(page) - page_mapcount(page) > 1) {
-                       spin_lock_irq(&mapping->tree_lock);
                        radix_tree_tag_set(&mapping->page_tree, iter.index,
                                           SHMEM_TAG_PINNED);
-                       spin_unlock_irq(&mapping->tree_lock);
                }
 
-               if (need_resched()) {
-                       slot = radix_tree_iter_resume(slot, &iter);
-                       cond_resched_rcu();
-               }
+               if (++tagged % 1024)
+                       continue;
+
+               slot = radix_tree_iter_resume(slot, &iter);
+               spin_unlock_irq(&mapping->tree_lock);
+               cond_resched();
+               spin_lock_irq(&mapping->tree_lock);
        }
-       rcu_read_unlock();
+       spin_unlock_irq(&mapping->tree_lock);
 }
 
 /*
@@ -2882,7 +2899,7 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset,
                }
 
                shmem_falloc.waitq = &shmem_falloc_waitq;
-               shmem_falloc.start = unmap_start >> PAGE_SHIFT;
+               shmem_falloc.start = (u64)unmap_start >> PAGE_SHIFT;
                shmem_falloc.next = (unmap_end + 1) >> PAGE_SHIFT;
                spin_lock(&inode->i_lock);
                inode->i_private = &shmem_falloc;
@@ -3085,16 +3102,20 @@ static int shmem_create(struct inode *dir, struct dentry *dentry, umode_t mode,
 static int shmem_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
 {
        struct inode *inode = d_inode(old_dentry);
-       int ret;
+       int ret = 0;
 
        /*
         * No ordinary (disk based) filesystem counts links as inodes;
         * but each new link needs a new dentry, pinning lowmem, and
         * tmpfs dentries cannot be pruned until they are unlinked.
+        * But if an O_TMPFILE file is linked into the tmpfs, the
+        * first link must skip that, to get the accounting right.
         */
-       ret = shmem_reserve_inode(inode->i_sb);
-       if (ret)
-               goto out;
+       if (inode->i_nlink) {
+               ret = shmem_reserve_inode(inode->i_sb);
+               if (ret)
+                       goto out;
+       }
 
        dir->i_size += BOGO_DIRENT_SIZE;
        inode->i_ctime = dir->i_ctime = dir->i_mtime = current_time(inode);