]>
Commit | Line | Data |
---|---|---|
4067483a SL |
1 | From 3e6e59a4b47e060cb860957aadeb476e2194503a Mon Sep 17 00:00:00 2001 |
2 | From: Hugh Dickins <hughd@google.com> | |
3 | Date: Fri, 30 Nov 2018 14:10:21 -0800 | |
4 | Subject: mm/huge_memory: fix lockdep complaint on 32-bit i_size_read() | |
5 | ||
6 | commit 006d3ff27e884f80bd7d306b041afc415f63598f upstream. | |
7 | ||
8 | Huge tmpfs testing, on 32-bit kernel with lockdep enabled, showed that | |
9 | __split_huge_page() was using i_size_read() while holding the irq-safe | |
10 | lru_lock and page tree lock, but the 32-bit i_size_read() uses an | |
11 | irq-unsafe seqlock which should not be nested inside them. | |
12 | ||
13 | Instead, read the i_size earlier in split_huge_page_to_list(), and pass | |
14 | the end offset down to __split_huge_page(): all while holding head page | |
15 | lock, which is enough to prevent truncation of that extent before the | |
16 | page tree lock has been taken. | |
17 | ||
18 | Link: http://lkml.kernel.org/r/alpine.LSU.2.11.1811261520070.2275@eggly.anvils | |
19 | Fixes: baa355fd33142 ("thp: file pages support for split_huge_page()") | |
20 | Signed-off-by: Hugh Dickins <hughd@google.com> | |
21 | Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> | |
22 | Cc: Jerome Glisse <jglisse@redhat.com> | |
23 | Cc: Konstantin Khlebnikov <khlebnikov@yandex-team.ru> | |
24 | Cc: Matthew Wilcox <willy@infradead.org> | |
25 | Cc: <stable@vger.kernel.org> [4.8+] | |
26 | Signed-off-by: Andrew Morton <akpm@linux-foundation.org> | |
27 | Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> | |
28 | Signed-off-by: Sasha Levin <sashal@kernel.org> | |
29 | --- | |
30 | mm/huge_memory.c | 19 +++++++++++++------ | |
31 | 1 file changed, 13 insertions(+), 6 deletions(-) | |
32 | ||
33 | diff --git a/mm/huge_memory.c b/mm/huge_memory.c | |
34 | index c12b441a99f9..15310f14c25e 100644 | |
35 | --- a/mm/huge_memory.c | |
36 | +++ b/mm/huge_memory.c | |
37 | @@ -2410,12 +2410,11 @@ static void __split_huge_page_tail(struct page *head, int tail, | |
38 | } | |
39 | ||
40 | static void __split_huge_page(struct page *page, struct list_head *list, | |
41 | - unsigned long flags) | |
42 | + pgoff_t end, unsigned long flags) | |
43 | { | |
44 | struct page *head = compound_head(page); | |
45 | struct zone *zone = page_zone(head); | |
46 | struct lruvec *lruvec; | |
47 | - pgoff_t end = -1; | |
48 | int i; | |
49 | ||
50 | lruvec = mem_cgroup_page_lruvec(head, zone->zone_pgdat); | |
51 | @@ -2423,9 +2422,6 @@ static void __split_huge_page(struct page *page, struct list_head *list, | |
52 | /* complete memcg works before add pages to LRU */ | |
53 | mem_cgroup_split_huge_fixup(head); | |
54 | ||
55 | - if (!PageAnon(page)) | |
56 | - end = DIV_ROUND_UP(i_size_read(head->mapping->host), PAGE_SIZE); | |
57 | - | |
58 | for (i = HPAGE_PMD_NR - 1; i >= 1; i--) { | |
59 | __split_huge_page_tail(head, i, lruvec, list); | |
60 | /* Some pages can be beyond i_size: drop them from page cache */ | |
61 | @@ -2597,6 +2593,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) | |
62 | int count, mapcount, extra_pins, ret; | |
63 | bool mlocked; | |
64 | unsigned long flags; | |
65 | + pgoff_t end; | |
66 | ||
67 | VM_BUG_ON_PAGE(is_huge_zero_page(page), page); | |
68 | VM_BUG_ON_PAGE(!PageLocked(page), page); | |
69 | @@ -2619,6 +2616,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) | |
70 | ret = -EBUSY; | |
71 | goto out; | |
72 | } | |
73 | + end = -1; | |
74 | mapping = NULL; | |
75 | anon_vma_lock_write(anon_vma); | |
76 | } else { | |
77 | @@ -2632,6 +2630,15 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) | |
78 | ||
79 | anon_vma = NULL; | |
80 | i_mmap_lock_read(mapping); | |
81 | + | |
82 | + /* | |
83 | + *__split_huge_page() may need to trim off pages beyond EOF: | |
84 | + * but on 32-bit, i_size_read() takes an irq-unsafe seqlock, | |
85 | + * which cannot be nested inside the page tree lock. So note | |
86 | + * end now: i_size itself may be changed at any moment, but | |
87 | + * head page lock is good enough to serialize the trimming. | |
88 | + */ | |
89 | + end = DIV_ROUND_UP(i_size_read(mapping->host), PAGE_SIZE); | |
90 | } | |
91 | ||
92 | /* | |
93 | @@ -2681,7 +2688,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) | |
94 | if (mapping) | |
95 | __dec_node_page_state(page, NR_SHMEM_THPS); | |
96 | spin_unlock(&pgdata->split_queue_lock); | |
97 | - __split_huge_page(page, list, flags); | |
98 | + __split_huge_page(page, list, end, flags); | |
99 | if (PageSwapCache(head)) { | |
100 | swp_entry_t entry = { .val = page_private(head) }; | |
101 | ||
102 | -- | |
103 | 2.17.1 | |
104 |