git.ipfire.org Git - thirdparty/kernel/stable-queue.git/blob

   1 From b969c4ab9f182a6e1b2a0848be349f99714947b0 Mon Sep 17 00:00:00 2001
   2 From: Mel Gorman <mgorman@suse.de>
   3 Date: Thu, 12 Jan 2012 17:19:34 -0800
   4 Subject: mm: compaction: determine if dirty pages can be migrated without blocking within ->migratepage
   5
   6 From: Mel Gorman <mgorman@suse.de>
   7
   8 commit b969c4ab9f182a6e1b2a0848be349f99714947b0 upstream.
   9
  10 Stable note: Not tracked in Bugzilla. A fix aimed at preserving page
  11         aging information by reducing LRU list churning had the side-effect
  12         of reducing THP allocation success rates. This was part of a series
  13         to restore the success rates while preserving the reclaim fix.
  14
  15 Asynchronous compaction is used when allocating transparent hugepages to
  16 avoid blocking for long periods of time.  Due to reports of stalling,
  17 there was a debate on disabling synchronous compaction but this severely
  18 impacted allocation success rates.  Part of the reason was that many dirty
  19 pages are skipped in asynchronous compaction by the following check;
  20
  21         if (PageDirty(page) && !sync &&
  22                 mapping->a_ops->migratepage != migrate_page)
  23                         rc = -EBUSY;
  24
  25 This skips over all mapping aops using buffer_migrate_page() even though
  26 it is possible to migrate some of these pages without blocking.  This
  27 patch updates the ->migratepage callback with a "sync" parameter.  It is
  28 the responsibility of the callback to fail gracefully if migration would
  29 block.
  30
  31 Signed-off-by: Mel Gorman <mgorman@suse.de>
  32 Reviewed-by: Rik van Riel <riel@redhat.com>
  33 Cc: Andrea Arcangeli <aarcange@redhat.com>
  34 Cc: Minchan Kim <minchan.kim@gmail.com>
  35 Cc: Dave Jones <davej@redhat.com>
  36 Cc: Jan Kara <jack@suse.cz>
  37 Cc: Andy Isaacson <adi@hexapodia.org>
  38 Cc: Nai Xia <nai.xia@gmail.com>
  39 Cc: Johannes Weiner <jweiner@redhat.com>
  40 Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
  41 Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
  42 Signed-off-by: Mel Gorman <mgorman@suse.de>
  43 Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
  44
  45 ---
  46  fs/btrfs/disk-io.c      |    4 -
  47  fs/hugetlbfs/inode.c    |    3 -
  48  fs/nfs/internal.h       |    2
  49  fs/nfs/write.c          |    4 -
  50  include/linux/fs.h      |    9 ++-
  51  include/linux/migrate.h |    2
  52  mm/migrate.c            |  129 ++++++++++++++++++++++++++++++++++--------------
  53  7 files changed, 106 insertions(+), 47 deletions(-)
  54
  55 --- a/fs/btrfs/disk-io.c
  56 +++ b/fs/btrfs/disk-io.c
  57 @@ -801,7 +801,7 @@ static int btree_submit_bio_hook(struct
  58
  59  #ifdef CONFIG_MIGRATION
  60  static int btree_migratepage(struct address_space *mapping,
  61 -                       struct page *newpage, struct page *page)
  62 +                       struct page *newpage, struct page *page, bool sync)
  63  {
  64         /*
  65          * we can't safely write a btree page from here,
  66 @@ -816,7 +816,7 @@ static int btree_migratepage(struct addr
  67         if (page_has_private(page) &&
  68             !try_to_release_page(page, GFP_KERNEL))
  69                 return -EAGAIN;
  70 -       return migrate_page(mapping, newpage, page);
  71 +       return migrate_page(mapping, newpage, page, sync);
  72  }
  73  #endif
  74
  75 --- a/fs/hugetlbfs/inode.c
  76 +++ b/fs/hugetlbfs/inode.c
  77 @@ -568,7 +568,8 @@ static int hugetlbfs_set_page_dirty(stru
  78  }
  79
  80  static int hugetlbfs_migrate_page(struct address_space *mapping,
  81 -                               struct page *newpage, struct page *page)
  82 +                               struct page *newpage, struct page *page,
  83 +                               bool sync)
  84  {
  85         int rc;
  86
  87 --- a/fs/nfs/internal.h
  88 +++ b/fs/nfs/internal.h
  89 @@ -315,7 +315,7 @@ void nfs_commit_release_pages(struct nfs
  90
  91  #ifdef CONFIG_MIGRATION
  92  extern int nfs_migrate_page(struct address_space *,
  93 -               struct page *, struct page *);
  94 +               struct page *, struct page *, bool);
  95  #else
  96  #define nfs_migrate_page NULL
  97  #endif
  98 --- a/fs/nfs/write.c
  99 +++ b/fs/nfs/write.c
 100 @@ -1662,7 +1662,7 @@ out_error:
 101
 102  #ifdef CONFIG_MIGRATION
 103  int nfs_migrate_page(struct address_space *mapping, struct page *newpage,
 104 -               struct page *page)
 105 +               struct page *page, bool sync)
 106  {
 107         /*
 108          * If PagePrivate is set, then the page is currently associated with
 109 @@ -1677,7 +1677,7 @@ int nfs_migrate_page(struct address_spac
 110
 111         nfs_fscache_release_page(page, GFP_KERNEL);
 112
 113 -       return migrate_page(mapping, newpage, page);
 114 +       return migrate_page(mapping, newpage, page, sync);
 115  }
 116  #endif
 117
 118 --- a/include/linux/fs.h
 119 +++ b/include/linux/fs.h
 120 @@ -607,9 +607,12 @@ struct address_space_operations {
 121                         loff_t offset, unsigned long nr_segs);
 122         int (*get_xip_mem)(struct address_space *, pgoff_t, int,
 123                                                 void **, unsigned long *);
 124 -       /* migrate the contents of a page to the specified target */
 125 +       /*
 126 +        * migrate the contents of a page to the specified target. If sync
 127 +        * is false, it must not block.
 128 +        */
 129         int (*migratepage) (struct address_space *,
 130 -                       struct page *, struct page *);
 131 +                       struct page *, struct page *, bool);
 132         int (*launder_page) (struct page *);
 133         int (*is_partially_uptodate) (struct page *, read_descriptor_t *,
 134                                         unsigned long);
 135 @@ -2478,7 +2481,7 @@ extern int generic_check_addressable(uns
 136
 137  #ifdef CONFIG_MIGRATION
 138  extern int buffer_migrate_page(struct address_space *,
 139 -                               struct page *, struct page *);
 140 +                               struct page *, struct page *, bool);
 141  #else
 142  #define buffer_migrate_page NULL
 143  #endif
 144 --- a/include/linux/migrate.h
 145 +++ b/include/linux/migrate.h
 146 @@ -11,7 +11,7 @@ typedef struct page *new_page_t(struct p
 147
 148  extern void putback_lru_pages(struct list_head *l);
 149  extern int migrate_page(struct address_space *,
 150 -                       struct page *, struct page *);
 151 +                       struct page *, struct page *, bool);
 152  extern int migrate_pages(struct list_head *l, new_page_t x,
 153                         unsigned long private, bool offlining,
 154                         bool sync);
 155 --- a/mm/migrate.c
 156 +++ b/mm/migrate.c
 157 @@ -220,6 +220,55 @@ out:
 158         pte_unmap_unlock(ptep, ptl);
 159  }
 160
 161 +#ifdef CONFIG_BLOCK
 162 +/* Returns true if all buffers are successfully locked */
 163 +static bool buffer_migrate_lock_buffers(struct buffer_head *head, bool sync)
 164 +{
 165 +       struct buffer_head *bh = head;
 166 +
 167 +       /* Simple case, sync compaction */
 168 +       if (sync) {
 169 +               do {
 170 +                       get_bh(bh);
 171 +                       lock_buffer(bh);
 172 +                       bh = bh->b_this_page;
 173 +
 174 +               } while (bh != head);
 175 +
 176 +               return true;
 177 +       }
 178 +
 179 +       /* async case, we cannot block on lock_buffer so use trylock_buffer */
 180 +       do {
 181 +               get_bh(bh);
 182 +               if (!trylock_buffer(bh)) {
 183 +                       /*
 184 +                        * We failed to lock the buffer and cannot stall in
 185 +                        * async migration. Release the taken locks
 186 +                        */
 187 +                       struct buffer_head *failed_bh = bh;
 188 +                       put_bh(failed_bh);
 189 +                       bh = head;
 190 +                       while (bh != failed_bh) {
 191 +                               unlock_buffer(bh);
 192 +                               put_bh(bh);
 193 +                               bh = bh->b_this_page;
 194 +                       }
 195 +                       return false;
 196 +               }
 197 +
 198 +               bh = bh->b_this_page;
 199 +       } while (bh != head);
 200 +       return true;
 201 +}
 202 +#else
 203 +static inline bool buffer_migrate_lock_buffers(struct buffer_head *head,
 204 +                                                               bool sync)
 205 +{
 206 +       return true;
 207 +}
 208 +#endif /* CONFIG_BLOCK */
 209 +
 210  /*
 211   * Replace the page in the mapping.
 212   *
 213 @@ -229,7 +278,8 @@ out:
 214   * 3 for pages with a mapping and PagePrivate/PagePrivate2 set.
 215   */
 216  static int migrate_page_move_mapping(struct address_space *mapping,
 217 -               struct page *newpage, struct page *page)
 218 +               struct page *newpage, struct page *page,
 219 +               struct buffer_head *head, bool sync)
 220  {
 221         int expected_count;
 222         void **pslot;
 223 @@ -259,6 +309,19 @@ static int migrate_page_move_mapping(str
 224         }
 225
 226         /*
 227 +        * In the async migration case of moving a page with buffers, lock the
 228 +        * buffers using trylock before the mapping is moved. If the mapping
 229 +        * was moved, we later failed to lock the buffers and could not move
 230 +        * the mapping back due to an elevated page count, we would have to
 231 +        * block waiting on other references to be dropped.
 232 +        */
 233 +       if (!sync && head && !buffer_migrate_lock_buffers(head, sync)) {
 234 +               page_unfreeze_refs(page, expected_count);
 235 +               spin_unlock_irq(&mapping->tree_lock);
 236 +               return -EAGAIN;
 237 +       }
 238 +
 239 +       /*
 240          * Now we know that no one else is looking at the page.
 241          */
 242         get_page(newpage);      /* add cache reference */
 243 @@ -415,13 +478,13 @@ EXPORT_SYMBOL(fail_migrate_page);
 244   * Pages are locked upon entry and exit.
 245   */
 246  int migrate_page(struct address_space *mapping,
 247 -               struct page *newpage, struct page *page)
 248 +               struct page *newpage, struct page *page, bool sync)
 249  {
 250         int rc;
 251
 252         BUG_ON(PageWriteback(page));    /* Writeback must be complete */
 253
 254 -       rc = migrate_page_move_mapping(mapping, newpage, page);
 255 +       rc = migrate_page_move_mapping(mapping, newpage, page, NULL, sync);
 256
 257         if (rc)
 258                 return rc;
 259 @@ -438,28 +501,28 @@ EXPORT_SYMBOL(migrate_page);
 260   * exist.
 261   */
 262  int buffer_migrate_page(struct address_space *mapping,
 263 -               struct page *newpage, struct page *page)
 264 +               struct page *newpage, struct page *page, bool sync)
 265  {
 266         struct buffer_head *bh, *head;
 267         int rc;
 268
 269         if (!page_has_buffers(page))
 270 -               return migrate_page(mapping, newpage, page);
 271 +               return migrate_page(mapping, newpage, page, sync);
 272
 273         head = page_buffers(page);
 274
 275 -       rc = migrate_page_move_mapping(mapping, newpage, page);
 276 +       rc = migrate_page_move_mapping(mapping, newpage, page, head, sync);
 277
 278         if (rc)
 279                 return rc;
 280
 281 -       bh = head;
 282 -       do {
 283 -               get_bh(bh);
 284 -               lock_buffer(bh);
 285 -               bh = bh->b_this_page;
 286 -
 287 -       } while (bh != head);
 288 +       /*
 289 +        * In the async case, migrate_page_move_mapping locked the buffers
 290 +        * with an IRQ-safe spinlock held. In the sync case, the buffers
 291 +        * need to be locked now
 292 +        */
 293 +       if (sync)
 294 +               BUG_ON(!buffer_migrate_lock_buffers(head, sync));
 295
 296         ClearPagePrivate(page);
 297         set_page_private(newpage, page_private(page));
 298 @@ -536,10 +599,13 @@ static int writeout(struct address_space
 299   * Default handling if a filesystem does not provide a migration function.
 300   */
 301  static int fallback_migrate_page(struct address_space *mapping,
 302 -       struct page *newpage, struct page *page)
 303 +       struct page *newpage, struct page *page, bool sync)
 304  {
 305 -       if (PageDirty(page))
 306 +       if (PageDirty(page)) {
 307 +               if (!sync)
 308 +                       return -EBUSY;
 309                 return writeout(mapping, page);
 310 +       }
 311
 312         /*
 313          * Buffers may be managed in a filesystem specific way.
 314 @@ -549,7 +615,7 @@ static int fallback_migrate_page(struct
 315             !try_to_release_page(page, GFP_KERNEL))
 316                 return -EAGAIN;
 317
 318 -       return migrate_page(mapping, newpage, page);
 319 +       return migrate_page(mapping, newpage, page, sync);
 320  }
 321
 322  /*
 323 @@ -585,29 +651,18 @@ static int move_to_new_page(struct page
 324
 325         mapping = page_mapping(page);
 326         if (!mapping)
 327 -               rc = migrate_page(mapping, newpage, page);
 328 -       else {
 329 +               rc = migrate_page(mapping, newpage, page, sync);
 330 +       else if (mapping->a_ops->migratepage)
 331                 /*
 332 -                * Do not writeback pages if !sync and migratepage is
 333 -                * not pointing to migrate_page() which is nonblocking
 334 -                * (swapcache/tmpfs uses migratepage = migrate_page).
 335 +                * Most pages have a mapping and most filesystems provide a
 336 +                * migratepage callback. Anonymous pages are part of swap
 337 +                * space which also has its own migratepage callback. This
 338 +                * is the most common path for page migration.
 339                  */
 340 -               if (PageDirty(page) && !sync &&
 341 -                   mapping->a_ops->migratepage != migrate_page)
 342 -                       rc = -EBUSY;
 343 -               else if (mapping->a_ops->migratepage)
 344 -                       /*
 345 -                        * Most pages have a mapping and most filesystems
 346 -                        * should provide a migration function. Anonymous
 347 -                        * pages are part of swap space which also has its
 348 -                        * own migration function. This is the most common
 349 -                        * path for page migration.
 350 -                        */
 351 -                       rc = mapping->a_ops->migratepage(mapping,
 352 -                                                       newpage, page);
 353 -               else
 354 -                       rc = fallback_migrate_page(mapping, newpage, page);
 355 -       }
 356 +               rc = mapping->a_ops->migratepage(mapping,
 357 +                                               newpage, page, sync);
 358 +       else
 359 +               rc = fallback_migrate_page(mapping, newpage, page, sync);
 360
 361         if (rc) {
 362                 newpage->mapping = NULL;