releases/5.0.18/s390-mm-convert-to-the-generic-get_user_pages_fast-code.patch

   1 From 1a42010cdc26bb7e5912984f3c91b8c6d55f089a Mon Sep 17 00:00:00 2001
   2 From: Martin Schwidefsky <schwidefsky@de.ibm.com>
   3 Date: Tue, 23 Apr 2019 10:53:21 +0200
   4 Subject: s390/mm: convert to the generic get_user_pages_fast code
   5
   6 From: Martin Schwidefsky <schwidefsky@de.ibm.com>
   7
   8 commit 1a42010cdc26bb7e5912984f3c91b8c6d55f089a upstream.
   9
  10 Define the gup_fast_permitted to check against the asce_limit of the
  11 mm attached to the current task, then replace the s390 specific gup
  12 code with the generic implementation in mm/gup.c.
  13
  14 Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
  15 Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
  16
  17 ---
  18  arch/s390/Kconfig               |    1
  19  arch/s390/include/asm/pgtable.h |   12 +
  20  arch/s390/mm/Makefile           |    2
  21  arch/s390/mm/gup.c              |  291 ----------------------------------------
  22  4 files changed, 14 insertions(+), 292 deletions(-)
  23
  24 --- a/arch/s390/Kconfig
  25 +++ b/arch/s390/Kconfig
  26 @@ -148,6 +148,7 @@ config S390
  27         select HAVE_FUNCTION_TRACER
  28         select HAVE_FUTEX_CMPXCHG if FUTEX
  29         select HAVE_GCC_PLUGINS
  30 +       select HAVE_GENERIC_GUP
  31         select HAVE_KERNEL_BZIP2
  32         select HAVE_KERNEL_GZIP
  33         select HAVE_KERNEL_LZ4
  34 --- a/arch/s390/include/asm/pgtable.h
  35 +++ b/arch/s390/include/asm/pgtable.h
  36 @@ -1264,6 +1264,18 @@ static inline pte_t *pte_offset(pmd_t *p
  37  #define pte_offset_map(pmd, address) pte_offset_kernel(pmd, address)
  38  #define pte_unmap(pte) do { } while (0)
  39
  40 +static inline bool gup_fast_permitted(unsigned long start, int nr_pages)
  41 +{
  42 +       unsigned long len, end;
  43 +
  44 +       len = (unsigned long) nr_pages << PAGE_SHIFT;
  45 +       end = start + len;
  46 +       if (end < start)
  47 +               return false;
  48 +       return end <= current->mm->context.asce_limit;
  49 +}
  50 +#define gup_fast_permitted gup_fast_permitted
  51 +
  52  #define pfn_pte(pfn,pgprot) mk_pte_phys(__pa((pfn) << PAGE_SHIFT),(pgprot))
  53  #define pte_pfn(x) (pte_val(x) >> PAGE_SHIFT)
  54  #define pte_page(x) pfn_to_page(pte_pfn(x))
  55 --- a/arch/s390/mm/Makefile
  56 +++ b/arch/s390/mm/Makefile
  57 @@ -4,7 +4,7 @@
  58  #
  59
  60  obj-y          := init.o fault.o extmem.o mmap.o vmem.o maccess.o
  61 -obj-y          += page-states.o gup.o pageattr.o pgtable.o pgalloc.o
  62 +obj-y          += page-states.o pageattr.o pgtable.o pgalloc.o
  63
  64  obj-$(CONFIG_CMM)              += cmm.o
  65  obj-$(CONFIG_HUGETLB_PAGE)     += hugetlbpage.o
  66 --- a/arch/s390/mm/gup.c
  67 +++ /dev/null
  68 @@ -1,291 +0,0 @@
  69 -// SPDX-License-Identifier: GPL-2.0
  70 -/*
  71 - *  Lockless get_user_pages_fast for s390
  72 - *
  73 - *  Copyright IBM Corp. 2010
  74 - *  Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
  75 - */
  76 -#include <linux/sched.h>
  77 -#include <linux/mm.h>
  78 -#include <linux/hugetlb.h>
  79 -#include <linux/vmstat.h>
  80 -#include <linux/pagemap.h>
  81 -#include <linux/rwsem.h>
  82 -#include <asm/pgtable.h>
  83 -
  84 -/*
  85 - * The performance critical leaf functions are made noinline otherwise gcc
  86 - * inlines everything into a single function which results in too much
  87 - * register pressure.
  88 - */
  89 -static inline int gup_pte_range(pmd_t pmd, unsigned long addr,
  90 -               unsigned long end, int write, struct page **pages, int *nr)
  91 -{
  92 -       struct page *head, *page;
  93 -       unsigned long mask;
  94 -       pte_t *ptep, pte;
  95 -
  96 -       mask = (write ? _PAGE_PROTECT : 0) | _PAGE_INVALID | _PAGE_SPECIAL;
  97 -
  98 -       ptep = pte_offset_map(&pmd, addr);
  99 -       do {
 100 -               pte = *ptep;
 101 -               barrier();
 102 -               /* Similar to the PMD case, NUMA hinting must take slow path */
 103 -               if (pte_protnone(pte))
 104 -                       return 0;
 105 -               if ((pte_val(pte) & mask) != 0)
 106 -                       return 0;
 107 -               VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
 108 -               page = pte_page(pte);
 109 -               head = compound_head(page);
 110 -               if (!page_cache_get_speculative(head))
 111 -                       return 0;
 112 -               if (unlikely(pte_val(pte) != pte_val(*ptep))) {
 113 -                       put_page(head);
 114 -                       return 0;
 115 -               }
 116 -               VM_BUG_ON_PAGE(compound_head(page) != head, page);
 117 -               pages[*nr] = page;
 118 -               (*nr)++;
 119 -
 120 -       } while (ptep++, addr += PAGE_SIZE, addr != end);
 121 -
 122 -       return 1;
 123 -}
 124 -
 125 -static inline int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr,
 126 -               unsigned long end, int write, struct page **pages, int *nr)
 127 -{
 128 -       struct page *head, *page;
 129 -       unsigned long mask;
 130 -       int refs;
 131 -
 132 -       mask = (write ? _SEGMENT_ENTRY_PROTECT : 0) | _SEGMENT_ENTRY_INVALID;
 133 -       if ((pmd_val(pmd) & mask) != 0)
 134 -               return 0;
 135 -       VM_BUG_ON(!pfn_valid(pmd_val(pmd) >> PAGE_SHIFT));
 136 -
 137 -       refs = 0;
 138 -       head = pmd_page(pmd);
 139 -       page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
 140 -       do {
 141 -               VM_BUG_ON(compound_head(page) != head);
 142 -               pages[*nr] = page;
 143 -               (*nr)++;
 144 -               page++;
 145 -               refs++;
 146 -       } while (addr += PAGE_SIZE, addr != end);
 147 -
 148 -       if (!page_cache_add_speculative(head, refs)) {
 149 -               *nr -= refs;
 150 -               return 0;
 151 -       }
 152 -
 153 -       if (unlikely(pmd_val(pmd) != pmd_val(*pmdp))) {
 154 -               *nr -= refs;
 155 -               while (refs--)
 156 -                       put_page(head);
 157 -               return 0;
 158 -       }
 159 -
 160 -       return 1;
 161 -}
 162 -
 163 -
 164 -static inline int gup_pmd_range(pud_t pud, unsigned long addr,
 165 -               unsigned long end, int write, struct page **pages, int *nr)
 166 -{
 167 -       unsigned long next;
 168 -       pmd_t *pmdp, pmd;
 169 -
 170 -       pmdp = pmd_offset(&pud, addr);
 171 -       do {
 172 -               pmd = *pmdp;
 173 -               barrier();
 174 -               next = pmd_addr_end(addr, end);
 175 -               if (pmd_none(pmd))
 176 -                       return 0;
 177 -               if (unlikely(pmd_large(pmd))) {
 178 -                       /*
 179 -                        * NUMA hinting faults need to be handled in the GUP
 180 -                        * slowpath for accounting purposes and so that they
 181 -                        * can be serialised against THP migration.
 182 -                        */
 183 -                       if (pmd_protnone(pmd))
 184 -                               return 0;
 185 -                       if (!gup_huge_pmd(pmdp, pmd, addr, next,
 186 -                                         write, pages, nr))
 187 -                               return 0;
 188 -               } else if (!gup_pte_range(pmd, addr, next,
 189 -                                         write, pages, nr))
 190 -                       return 0;
 191 -       } while (pmdp++, addr = next, addr != end);
 192 -
 193 -       return 1;
 194 -}
 195 -
 196 -static int gup_huge_pud(pud_t *pudp, pud_t pud, unsigned long addr,
 197 -               unsigned long end, int write, struct page **pages, int *nr)
 198 -{
 199 -       struct page *head, *page;
 200 -       unsigned long mask;
 201 -       int refs;
 202 -
 203 -       mask = (write ? _REGION_ENTRY_PROTECT : 0) | _REGION_ENTRY_INVALID;
 204 -       if ((pud_val(pud) & mask) != 0)
 205 -               return 0;
 206 -       VM_BUG_ON(!pfn_valid(pud_pfn(pud)));
 207 -
 208 -       refs = 0;
 209 -       head = pud_page(pud);
 210 -       page = head + ((addr & ~PUD_MASK) >> PAGE_SHIFT);
 211 -       do {
 212 -               VM_BUG_ON_PAGE(compound_head(page) != head, page);
 213 -               pages[*nr] = page;
 214 -               (*nr)++;
 215 -               page++;
 216 -               refs++;
 217 -       } while (addr += PAGE_SIZE, addr != end);
 218 -
 219 -       if (!page_cache_add_speculative(head, refs)) {
 220 -               *nr -= refs;
 221 -               return 0;
 222 -       }
 223 -
 224 -       if (unlikely(pud_val(pud) != pud_val(*pudp))) {
 225 -               *nr -= refs;
 226 -               while (refs--)
 227 -                       put_page(head);
 228 -               return 0;
 229 -       }
 230 -
 231 -       return 1;
 232 -}
 233 -
 234 -static inline int gup_pud_range(p4d_t p4d, unsigned long addr,
 235 -               unsigned long end, int write, struct page **pages, int *nr)
 236 -{
 237 -       unsigned long next;
 238 -       pud_t *pudp, pud;
 239 -
 240 -       pudp = pud_offset(&p4d, addr);
 241 -       do {
 242 -               pud = *pudp;
 243 -               barrier();
 244 -               next = pud_addr_end(addr, end);
 245 -               if (pud_none(pud))
 246 -                       return 0;
 247 -               if (unlikely(pud_large(pud))) {
 248 -                       if (!gup_huge_pud(pudp, pud, addr, next, write, pages,
 249 -                                         nr))
 250 -                               return 0;
 251 -               } else if (!gup_pmd_range(pud, addr, next, write, pages,
 252 -                                         nr))
 253 -                       return 0;
 254 -       } while (pudp++, addr = next, addr != end);
 255 -
 256 -       return 1;
 257 -}
 258 -
 259 -static inline int gup_p4d_range(pgd_t pgd, unsigned long addr,
 260 -               unsigned long end, int write, struct page **pages, int *nr)
 261 -{
 262 -       unsigned long next;
 263 -       p4d_t *p4dp, p4d;
 264 -
 265 -       p4dp = p4d_offset(&pgd, addr);
 266 -       do {
 267 -               p4d = *p4dp;
 268 -               barrier();
 269 -               next = p4d_addr_end(addr, end);
 270 -               if (p4d_none(p4d))
 271 -                       return 0;
 272 -               if (!gup_pud_range(p4d, addr, next, write, pages, nr))
 273 -                       return 0;
 274 -       } while (p4dp++, addr = next, addr != end);
 275 -
 276 -       return 1;
 277 -}
 278 -
 279 -/*
 280 - * Like get_user_pages_fast() except its IRQ-safe in that it won't fall
 281 - * back to the regular GUP.
 282 - * Note a difference with get_user_pages_fast: this always returns the
 283 - * number of pages pinned, 0 if no pages were pinned.
 284 - */
 285 -int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
 286 -                         struct page **pages)
 287 -{
 288 -       struct mm_struct *mm = current->mm;
 289 -       unsigned long addr, len, end;
 290 -       unsigned long next, flags;
 291 -       pgd_t *pgdp, pgd;
 292 -       int nr = 0;
 293 -
 294 -       start &= PAGE_MASK;
 295 -       addr = start;
 296 -       len = (unsigned long) nr_pages << PAGE_SHIFT;
 297 -       end = start + len;
 298 -       if ((end <= start) || (end > mm->context.asce_limit))
 299 -               return 0;
 300 -       /*
 301 -        * local_irq_save() doesn't prevent pagetable teardown, but does
 302 -        * prevent the pagetables from being freed on s390.
 303 -        *
 304 -        * So long as we atomically load page table pointers versus teardown,
 305 -        * we can follow the address down to the the page and take a ref on it.
 306 -        */
 307 -       local_irq_save(flags);
 308 -       pgdp = pgd_offset(mm, addr);
 309 -       do {
 310 -               pgd = *pgdp;
 311 -               barrier();
 312 -               next = pgd_addr_end(addr, end);
 313 -               if (pgd_none(pgd))
 314 -                       break;
 315 -               if (!gup_p4d_range(pgd, addr, next, write, pages, &nr))
 316 -                       break;
 317 -       } while (pgdp++, addr = next, addr != end);
 318 -       local_irq_restore(flags);
 319 -
 320 -       return nr;
 321 -}
 322 -
 323 -/**
 324 - * get_user_pages_fast() - pin user pages in memory
 325 - * @start:     starting user address
 326 - * @nr_pages:  number of pages from start to pin
 327 - * @write:     whether pages will be written to
 328 - * @pages:     array that receives pointers to the pages pinned.
 329 - *             Should be at least nr_pages long.
 330 - *
 331 - * Attempt to pin user pages in memory without taking mm->mmap_sem.
 332 - * If not successful, it will fall back to taking the lock and
 333 - * calling get_user_pages().
 334 - *
 335 - * Returns number of pages pinned. This may be fewer than the number
 336 - * requested. If nr_pages is 0 or negative, returns 0. If no pages
 337 - * were pinned, returns -errno.
 338 - */
 339 -int get_user_pages_fast(unsigned long start, int nr_pages, int write,
 340 -                       struct page **pages)
 341 -{
 342 -       int nr, ret;
 343 -
 344 -       might_sleep();
 345 -       start &= PAGE_MASK;
 346 -       nr = __get_user_pages_fast(start, nr_pages, write, pages);
 347 -       if (nr == nr_pages)
 348 -               return nr;
 349 -
 350 -       /* Try to get the remaining pages with get_user_pages */
 351 -       start += nr << PAGE_SHIFT;
 352 -       pages += nr;
 353 -       ret = get_user_pages_unlocked(start, nr_pages - nr, pages,
 354 -                                     write ? FOLL_WRITE : 0);
 355 -       /* Have to be a bit careful with return values */
 356 -       if (nr > 0)
 357 -               ret = (ret < 0) ? nr : ret + nr;
 358 -       return ret;
 359 -}