From: Greg Kroah-Hartman Date: Mon, 23 Jan 2012 23:54:48 +0000 (-0800) Subject: 3.2-stable patches X-Git-Tag: v3.2.2~3 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=9e982a8f18367f68f639c7ca7a89b8ab45981613;p=thirdparty%2Fkernel%2Fstable-queue.git 3.2-stable patches added patches: shm_unlock-fix-long-unpreemptible-section.patch shm_unlock-fix-unevictable-pages-stranded-after-swap.patch --- diff --git a/queue-3.2/series b/queue-3.2/series index 529d53ce589..83351db99d6 100644 --- a/queue-3.2/series +++ b/queue-3.2/series @@ -125,3 +125,5 @@ proc-clear_refs-do-not-clear-reserved-pages.patch mm-fix-null-ptr-dereference-in-__count_immobile_pages.patch iwlagn-check-for-smps-mode.patch iwlegacy-3945-fix-hw-passive-scan-on-radar-channels.patch +shm_unlock-fix-long-unpreemptible-section.patch +shm_unlock-fix-unevictable-pages-stranded-after-swap.patch diff --git a/queue-3.2/shm_unlock-fix-long-unpreemptible-section.patch b/queue-3.2/shm_unlock-fix-long-unpreemptible-section.patch new file mode 100644 index 00000000000..f483caeff61 --- /dev/null +++ b/queue-3.2/shm_unlock-fix-long-unpreemptible-section.patch @@ -0,0 +1,180 @@ +From 85046579bde15e532983438f86b36856e358f417 Mon Sep 17 00:00:00 2001 +From: Hugh Dickins +Date: Fri, 20 Jan 2012 14:34:19 -0800 +Subject: SHM_UNLOCK: fix long unpreemptible section + +From: Hugh Dickins + +commit 85046579bde15e532983438f86b36856e358f417 upstream. + +scan_mapping_unevictable_pages() is used to make SysV SHM_LOCKed pages +evictable again once the shared memory is unlocked. It does this with +pagevec_lookup()s across the whole object (which might occupy most of +memory), and takes 300ms to unlock 7GB here. A cond_resched() every +PAGEVEC_SIZE pages would be good. + +However, KOSAKI-san points out that this is called under shmem.c's +info->lock, and it's also under shm.c's shm_lock(), both spinlocks. +There is no strong reason for that: we need to take these pages off the +unevictable list soonish, but those locks are not required for it. + +So move the call to scan_mapping_unevictable_pages() from shmem.c's +unlock handling up to shm.c's unlock handling. Remove the recently +added barrier, not needed now we have spin_unlock() before the scan. + +Use get_file(), with subsequent fput(), to make sure we have a reference +to mapping throughout scan_mapping_unevictable_pages(): that's something +that was previously guaranteed by the shm_lock(). + +Remove shmctl's lru_add_drain_all(): we don't fault in pages at SHM_LOCK +time, and we lazily discover them to be Unevictable later, so it serves +no purpose for SHM_LOCK; and serves no purpose for SHM_UNLOCK, since +pages still on pagevec are not marked Unevictable. + +The original code avoided redundant rescans by checking VM_LOCKED flag +at its level: now avoid them by checking shp's SHM_LOCKED. + +The original code called scan_mapping_unevictable_pages() on a locked +area at shm_destroy() time: perhaps we once had accounting cross-checks +which required that, but not now, so skip the overhead and just let +inode eviction deal with them. + +Put check_move_unevictable_page() and scan_mapping_unevictable_pages() +under CONFIG_SHMEM (with stub for the TINY case when ramfs is used), +more as comment than to save space; comment them used for SHM_UNLOCK. + +Signed-off-by: Hugh Dickins +Reviewed-by: KOSAKI Motohiro +Cc: Minchan Kim +Cc: Rik van Riel +Cc: Shaohua Li +Cc: Eric Dumazet +Cc: Johannes Weiner +Cc: Michel Lespinasse +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + ipc/shm.c | 37 ++++++++++++++++++++++--------------- + mm/shmem.c | 7 ------- + mm/vmscan.c | 12 +++++++++++- + 3 files changed, 33 insertions(+), 23 deletions(-) + +--- a/ipc/shm.c ++++ b/ipc/shm.c +@@ -870,9 +870,7 @@ SYSCALL_DEFINE3(shmctl, int, shmid, int, + case SHM_LOCK: + case SHM_UNLOCK: + { +- struct file *uninitialized_var(shm_file); +- +- lru_add_drain_all(); /* drain pagevecs to lru lists */ ++ struct file *shm_file; + + shp = shm_lock_check(ns, shmid); + if (IS_ERR(shp)) { +@@ -895,22 +893,31 @@ SYSCALL_DEFINE3(shmctl, int, shmid, int, + err = security_shm_shmctl(shp, cmd); + if (err) + goto out_unlock; +- +- if(cmd==SHM_LOCK) { ++ ++ shm_file = shp->shm_file; ++ if (is_file_hugepages(shm_file)) ++ goto out_unlock; ++ ++ if (cmd == SHM_LOCK) { + struct user_struct *user = current_user(); +- if (!is_file_hugepages(shp->shm_file)) { +- err = shmem_lock(shp->shm_file, 1, user); +- if (!err && !(shp->shm_perm.mode & SHM_LOCKED)){ +- shp->shm_perm.mode |= SHM_LOCKED; +- shp->mlock_user = user; +- } ++ err = shmem_lock(shm_file, 1, user); ++ if (!err && !(shp->shm_perm.mode & SHM_LOCKED)) { ++ shp->shm_perm.mode |= SHM_LOCKED; ++ shp->mlock_user = user; + } +- } else if (!is_file_hugepages(shp->shm_file)) { +- shmem_lock(shp->shm_file, 0, shp->mlock_user); +- shp->shm_perm.mode &= ~SHM_LOCKED; +- shp->mlock_user = NULL; ++ goto out_unlock; + } ++ ++ /* SHM_UNLOCK */ ++ if (!(shp->shm_perm.mode & SHM_LOCKED)) ++ goto out_unlock; ++ shmem_lock(shm_file, 0, shp->mlock_user); ++ shp->shm_perm.mode &= ~SHM_LOCKED; ++ shp->mlock_user = NULL; ++ get_file(shm_file); + shm_unlock(shp); ++ scan_mapping_unevictable_pages(shm_file->f_mapping); ++ fput(shm_file); + goto out; + } + case IPC_RMID: +--- a/mm/shmem.c ++++ b/mm/shmem.c +@@ -1068,13 +1068,6 @@ int shmem_lock(struct file *file, int lo + user_shm_unlock(inode->i_size, user); + info->flags &= ~VM_LOCKED; + mapping_clear_unevictable(file->f_mapping); +- /* +- * Ensure that a racing putback_lru_page() can see +- * the pages of this mapping are evictable when we +- * skip them due to !PageLRU during the scan. +- */ +- smp_mb__after_clear_bit(); +- scan_mapping_unevictable_pages(file->f_mapping); + } + retval = 0; + +--- a/mm/vmscan.c ++++ b/mm/vmscan.c +@@ -3353,6 +3353,7 @@ int page_evictable(struct page *page, st + return 1; + } + ++#ifdef CONFIG_SHMEM + /** + * check_move_unevictable_page - check page for evictability and move to appropriate zone lru list + * @page: page to check evictability and move to appropriate lru list +@@ -3363,6 +3364,8 @@ int page_evictable(struct page *page, st + * + * Restrictions: zone->lru_lock must be held, page must be on LRU and must + * have PageUnevictable set. ++ * ++ * This function is only used for SysV IPC SHM_UNLOCK. + */ + static void check_move_unevictable_page(struct page *page, struct zone *zone) + { +@@ -3396,6 +3399,8 @@ retry: + * + * Scan all pages in mapping. Check unevictable pages for + * evictability and move them to the appropriate zone lru list. ++ * ++ * This function is only used for SysV IPC SHM_UNLOCK. + */ + void scan_mapping_unevictable_pages(struct address_space *mapping) + { +@@ -3441,9 +3446,14 @@ void scan_mapping_unevictable_pages(stru + pagevec_release(&pvec); + + count_vm_events(UNEVICTABLE_PGSCANNED, pg_scanned); ++ cond_resched(); + } +- + } ++#else ++void scan_mapping_unevictable_pages(struct address_space *mapping) ++{ ++} ++#endif /* CONFIG_SHMEM */ + + static void warn_scan_unevictable_pages(void) + { diff --git a/queue-3.2/shm_unlock-fix-unevictable-pages-stranded-after-swap.patch b/queue-3.2/shm_unlock-fix-unevictable-pages-stranded-after-swap.patch new file mode 100644 index 00000000000..43e5bcaea9a --- /dev/null +++ b/queue-3.2/shm_unlock-fix-unevictable-pages-stranded-after-swap.patch @@ -0,0 +1,333 @@ +From 245132643e1cfcd145bbc86a716c1818371fcb93 Mon Sep 17 00:00:00 2001 +From: Hugh Dickins +Date: Fri, 20 Jan 2012 14:34:21 -0800 +Subject: SHM_UNLOCK: fix Unevictable pages stranded after swap + +From: Hugh Dickins + +commit 245132643e1cfcd145bbc86a716c1818371fcb93 upstream. + +Commit cc39c6a9bbde ("mm: account skipped entries to avoid looping in +find_get_pages") correctly fixed an infinite loop; but left a problem +that find_get_pages() on shmem would return 0 (appearing to callers to +mean end of tree) when it meets a run of nr_pages swap entries. + +The only uses of find_get_pages() on shmem are via pagevec_lookup(), +called from invalidate_mapping_pages(), and from shmctl SHM_UNLOCK's +scan_mapping_unevictable_pages(). The first is already commented, and +not worth worrying about; but the second can leave pages on the +Unevictable list after an unusual sequence of swapping and locking. + +Fix that by using shmem_find_get_pages_and_swap() (then ignoring the +swap) instead of pagevec_lookup(). + +But I don't want to contaminate vmscan.c with shmem internals, nor +shmem.c with LRU locking. So move scan_mapping_unevictable_pages() into +shmem.c, renaming it shmem_unlock_mapping(); and rename +check_move_unevictable_page() to check_move_unevictable_pages(), looping +down an array of pages, oftentimes under the same lock. + +Leave out the "rotate unevictable list" block: that's a leftover from +when this was used for /proc/sys/vm/scan_unevictable_pages, whose flawed +handling involved looking at pages at tail of LRU. + +Was there significance to the sequence first ClearPageUnevictable, then +test page_evictable, then SetPageUnevictable here? I think not, we're +under LRU lock, and have no barriers between those. + +Signed-off-by: Hugh Dickins +Reviewed-by: KOSAKI Motohiro +Cc: Minchan Kim +Cc: Rik van Riel +Cc: Shaohua Li +Cc: Eric Dumazet +Cc: Johannes Weiner +Cc: Michel Lespinasse +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + + +--- + include/linux/shmem_fs.h | 1 + include/linux/swap.h | 2 + ipc/shm.c | 2 + mm/shmem.c | 46 +++++++++++++++-- + mm/vmscan.c | 124 ++++++++++++++--------------------------------- + 5 files changed, 82 insertions(+), 93 deletions(-) + +--- a/include/linux/shmem_fs.h ++++ b/include/linux/shmem_fs.h +@@ -48,6 +48,7 @@ extern struct file *shmem_file_setup(con + loff_t size, unsigned long flags); + extern int shmem_zero_setup(struct vm_area_struct *); + extern int shmem_lock(struct file *file, int lock, struct user_struct *user); ++extern void shmem_unlock_mapping(struct address_space *mapping); + extern struct page *shmem_read_mapping_page_gfp(struct address_space *mapping, + pgoff_t index, gfp_t gfp_mask); + extern void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end); +--- a/include/linux/swap.h ++++ b/include/linux/swap.h +@@ -272,7 +272,7 @@ static inline int zone_reclaim(struct zo + #endif + + extern int page_evictable(struct page *page, struct vm_area_struct *vma); +-extern void scan_mapping_unevictable_pages(struct address_space *); ++extern void check_move_unevictable_pages(struct page **, int nr_pages); + + extern unsigned long scan_unevictable_pages; + extern int scan_unevictable_handler(struct ctl_table *, int, +--- a/ipc/shm.c ++++ b/ipc/shm.c +@@ -916,7 +916,7 @@ SYSCALL_DEFINE3(shmctl, int, shmid, int, + shp->mlock_user = NULL; + get_file(shm_file); + shm_unlock(shp); +- scan_mapping_unevictable_pages(shm_file->f_mapping); ++ shmem_unlock_mapping(shm_file->f_mapping); + fput(shm_file); + goto out; + } +--- a/mm/shmem.c ++++ b/mm/shmem.c +@@ -379,7 +379,7 @@ static int shmem_free_swap(struct addres + /* + * Pagevec may contain swap entries, so shuffle up pages before releasing. + */ +-static void shmem_pagevec_release(struct pagevec *pvec) ++static void shmem_deswap_pagevec(struct pagevec *pvec) + { + int i, j; + +@@ -389,7 +389,36 @@ static void shmem_pagevec_release(struct + pvec->pages[j++] = page; + } + pvec->nr = j; +- pagevec_release(pvec); ++} ++ ++/* ++ * SysV IPC SHM_UNLOCK restore Unevictable pages to their evictable lists. ++ */ ++void shmem_unlock_mapping(struct address_space *mapping) ++{ ++ struct pagevec pvec; ++ pgoff_t indices[PAGEVEC_SIZE]; ++ pgoff_t index = 0; ++ ++ pagevec_init(&pvec, 0); ++ /* ++ * Minor point, but we might as well stop if someone else SHM_LOCKs it. ++ */ ++ while (!mapping_unevictable(mapping)) { ++ /* ++ * Avoid pagevec_lookup(): find_get_pages() returns 0 as if it ++ * has finished, if it hits a row of PAGEVEC_SIZE swap entries. ++ */ ++ pvec.nr = shmem_find_get_pages_and_swap(mapping, index, ++ PAGEVEC_SIZE, pvec.pages, indices); ++ if (!pvec.nr) ++ break; ++ index = indices[pvec.nr - 1] + 1; ++ shmem_deswap_pagevec(&pvec); ++ check_move_unevictable_pages(pvec.pages, pvec.nr); ++ pagevec_release(&pvec); ++ cond_resched(); ++ } + } + + /* +@@ -440,7 +469,8 @@ void shmem_truncate_range(struct inode * + } + unlock_page(page); + } +- shmem_pagevec_release(&pvec); ++ shmem_deswap_pagevec(&pvec); ++ pagevec_release(&pvec); + mem_cgroup_uncharge_end(); + cond_resched(); + index++; +@@ -470,7 +500,8 @@ void shmem_truncate_range(struct inode * + continue; + } + if (index == start && indices[0] > end) { +- shmem_pagevec_release(&pvec); ++ shmem_deswap_pagevec(&pvec); ++ pagevec_release(&pvec); + break; + } + mem_cgroup_uncharge_start(); +@@ -494,7 +525,8 @@ void shmem_truncate_range(struct inode * + } + unlock_page(page); + } +- shmem_pagevec_release(&pvec); ++ shmem_deswap_pagevec(&pvec); ++ pagevec_release(&pvec); + mem_cgroup_uncharge_end(); + index++; + } +@@ -2439,6 +2471,10 @@ int shmem_lock(struct file *file, int lo + return 0; + } + ++void shmem_unlock_mapping(struct address_space *mapping) ++{ ++} ++ + void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend) + { + truncate_inode_pages_range(inode->i_mapping, lstart, lend); +--- a/mm/vmscan.c ++++ b/mm/vmscan.c +@@ -636,7 +636,7 @@ redo: + * When racing with an mlock or AS_UNEVICTABLE clearing + * (page is unlocked) make sure that if the other thread + * does not observe our setting of PG_lru and fails +- * isolation/check_move_unevictable_page, ++ * isolation/check_move_unevictable_pages, + * we see PG_mlocked/AS_UNEVICTABLE cleared below and move + * the page back to the evictable list. + * +@@ -3355,104 +3355,56 @@ int page_evictable(struct page *page, st + + #ifdef CONFIG_SHMEM + /** +- * check_move_unevictable_page - check page for evictability and move to appropriate zone lru list +- * @page: page to check evictability and move to appropriate lru list +- * @zone: zone page is in ++ * check_move_unevictable_pages - check pages for evictability and move to appropriate zone lru list ++ * @pages: array of pages to check ++ * @nr_pages: number of pages to check + * +- * Checks a page for evictability and moves the page to the appropriate +- * zone lru list. +- * +- * Restrictions: zone->lru_lock must be held, page must be on LRU and must +- * have PageUnevictable set. ++ * Checks pages for evictability and moves them to the appropriate lru list. + * + * This function is only used for SysV IPC SHM_UNLOCK. + */ +-static void check_move_unevictable_page(struct page *page, struct zone *zone) ++void check_move_unevictable_pages(struct page **pages, int nr_pages) + { +- VM_BUG_ON(PageActive(page)); +- +-retry: +- ClearPageUnevictable(page); +- if (page_evictable(page, NULL)) { +- enum lru_list l = page_lru_base_type(page); +- +- __dec_zone_state(zone, NR_UNEVICTABLE); +- list_move(&page->lru, &zone->lru[l].list); +- mem_cgroup_move_lists(page, LRU_UNEVICTABLE, l); +- __inc_zone_state(zone, NR_INACTIVE_ANON + l); +- __count_vm_event(UNEVICTABLE_PGRESCUED); +- } else { +- /* +- * rotate unevictable list +- */ +- SetPageUnevictable(page); +- list_move(&page->lru, &zone->lru[LRU_UNEVICTABLE].list); +- mem_cgroup_rotate_lru_list(page, LRU_UNEVICTABLE); +- if (page_evictable(page, NULL)) +- goto retry; +- } +-} ++ struct zone *zone = NULL; ++ int pgscanned = 0; ++ int pgrescued = 0; ++ int i; + +-/** +- * scan_mapping_unevictable_pages - scan an address space for evictable pages +- * @mapping: struct address_space to scan for evictable pages +- * +- * Scan all pages in mapping. Check unevictable pages for +- * evictability and move them to the appropriate zone lru list. +- * +- * This function is only used for SysV IPC SHM_UNLOCK. +- */ +-void scan_mapping_unevictable_pages(struct address_space *mapping) +-{ +- pgoff_t next = 0; +- pgoff_t end = (i_size_read(mapping->host) + PAGE_CACHE_SIZE - 1) >> +- PAGE_CACHE_SHIFT; +- struct zone *zone; +- struct pagevec pvec; ++ for (i = 0; i < nr_pages; i++) { ++ struct page *page = pages[i]; ++ struct zone *pagezone; ++ ++ pgscanned++; ++ pagezone = page_zone(page); ++ if (pagezone != zone) { ++ if (zone) ++ spin_unlock_irq(&zone->lru_lock); ++ zone = pagezone; ++ spin_lock_irq(&zone->lru_lock); ++ } + +- if (mapping->nrpages == 0) +- return; ++ if (!PageLRU(page) || !PageUnevictable(page)) ++ continue; + +- pagevec_init(&pvec, 0); +- while (next < end && +- pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) { +- int i; +- int pg_scanned = 0; +- +- zone = NULL; +- +- for (i = 0; i < pagevec_count(&pvec); i++) { +- struct page *page = pvec.pages[i]; +- pgoff_t page_index = page->index; +- struct zone *pagezone = page_zone(page); +- +- pg_scanned++; +- if (page_index > next) +- next = page_index; +- next++; +- +- if (pagezone != zone) { +- if (zone) +- spin_unlock_irq(&zone->lru_lock); +- zone = pagezone; +- spin_lock_irq(&zone->lru_lock); +- } ++ if (page_evictable(page, NULL)) { ++ enum lru_list lru = page_lru_base_type(page); + +- if (PageLRU(page) && PageUnevictable(page)) +- check_move_unevictable_page(page, zone); ++ VM_BUG_ON(PageActive(page)); ++ ClearPageUnevictable(page); ++ __dec_zone_state(zone, NR_UNEVICTABLE); ++ list_move(&page->lru, &zone->lru[lru].list); ++ mem_cgroup_move_lists(page, LRU_UNEVICTABLE, lru); ++ __inc_zone_state(zone, NR_INACTIVE_ANON + lru); ++ pgrescued++; + } +- if (zone) +- spin_unlock_irq(&zone->lru_lock); +- pagevec_release(&pvec); ++ } + +- count_vm_events(UNEVICTABLE_PGSCANNED, pg_scanned); +- cond_resched(); ++ if (zone) { ++ __count_vm_events(UNEVICTABLE_PGRESCUED, pgrescued); ++ __count_vm_events(UNEVICTABLE_PGSCANNED, pgscanned); ++ spin_unlock_irq(&zone->lru_lock); + } + } +-#else +-void scan_mapping_unevictable_pages(struct address_space *mapping) +-{ +-} + #endif /* CONFIG_SHMEM */ + + static void warn_scan_unevictable_pages(void)