]>
Commit | Line | Data |
---|---|---|
5789e898 GKH |
1 | From a1c3bfb2f67ef766de03f1f56bdfff9c8595ab14 Mon Sep 17 00:00:00 2001 |
2 | From: Johannes Weiner <hannes@cmpxchg.org> | |
3 | Date: Wed, 29 Jan 2014 14:05:41 -0800 | |
4 | Subject: mm/page-writeback.c: do not count anon pages as dirtyable memory | |
5 | ||
6 | From: Johannes Weiner <hannes@cmpxchg.org> | |
7 | ||
8 | commit a1c3bfb2f67ef766de03f1f56bdfff9c8595ab14 upstream. | |
9 | ||
10 | The VM is currently heavily tuned to avoid swapping. Whether that is | |
11 | good or bad is a separate discussion, but as long as the VM won't swap | |
12 | to make room for dirty cache, we can not consider anonymous pages when | |
13 | calculating the amount of dirtyable memory, the baseline to which | |
14 | dirty_background_ratio and dirty_ratio are applied. | |
15 | ||
16 | A simple workload that occupies a significant size (40+%, depending on | |
17 | memory layout, storage speeds etc.) of memory with anon/tmpfs pages and | |
18 | uses the remainder for a streaming writer demonstrates this problem. In | |
19 | that case, the actual cache pages are a small fraction of what is | |
20 | considered dirtyable overall, which results in an relatively large | |
21 | portion of the cache pages to be dirtied. As kswapd starts rotating | |
22 | these, random tasks enter direct reclaim and stall on IO. | |
23 | ||
24 | Only consider free pages and file pages dirtyable. | |
25 | ||
26 | Signed-off-by: Johannes Weiner <hannes@cmpxchg.org> | |
27 | Reported-by: Tejun Heo <tj@kernel.org> | |
28 | Tested-by: Tejun Heo <tj@kernel.org> | |
29 | Reviewed-by: Rik van Riel <riel@redhat.com> | |
30 | Cc: Mel Gorman <mgorman@suse.de> | |
31 | Cc: Wu Fengguang <fengguang.wu@intel.com> | |
32 | Reviewed-by: Michal Hocko <mhocko@suse.cz> | |
33 | Signed-off-by: Andrew Morton <akpm@linux-foundation.org> | |
34 | Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> | |
35 | Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> | |
36 | ||
37 | --- | |
38 | include/linux/vmstat.h | 3 --- | |
39 | mm/page-writeback.c | 6 ++++-- | |
40 | mm/vmscan.c | 49 ++++++++++++++----------------------------------- | |
41 | 3 files changed, 18 insertions(+), 40 deletions(-) | |
42 | ||
43 | --- a/include/linux/vmstat.h | |
44 | +++ b/include/linux/vmstat.h | |
45 | @@ -142,9 +142,6 @@ static inline unsigned long zone_page_st | |
46 | return x; | |
47 | } | |
48 | ||
49 | -extern unsigned long global_reclaimable_pages(void); | |
50 | -extern unsigned long zone_reclaimable_pages(struct zone *zone); | |
51 | - | |
52 | #ifdef CONFIG_NUMA | |
53 | /* | |
54 | * Determine the per node value of a stat item. This function | |
55 | --- a/mm/page-writeback.c | |
56 | +++ b/mm/page-writeback.c | |
57 | @@ -202,7 +202,8 @@ static unsigned long zone_dirtyable_memo | |
58 | nr_pages = zone_page_state(zone, NR_FREE_PAGES); | |
59 | nr_pages -= min(nr_pages, zone->dirty_balance_reserve); | |
60 | ||
61 | - nr_pages += zone_reclaimable_pages(zone); | |
62 | + nr_pages += zone_page_state(zone, NR_INACTIVE_FILE); | |
63 | + nr_pages += zone_page_state(zone, NR_ACTIVE_FILE); | |
64 | ||
65 | return nr_pages; | |
66 | } | |
67 | @@ -255,7 +256,8 @@ static unsigned long global_dirtyable_me | |
68 | x = global_page_state(NR_FREE_PAGES); | |
69 | x -= min(x, dirty_balance_reserve); | |
70 | ||
71 | - x += global_reclaimable_pages(); | |
72 | + x += global_page_state(NR_INACTIVE_FILE); | |
73 | + x += global_page_state(NR_ACTIVE_FILE); | |
74 | ||
75 | if (!vm_highmem_is_dirtyable) | |
76 | x -= highmem_dirtyable_memory(x); | |
77 | --- a/mm/vmscan.c | |
78 | +++ b/mm/vmscan.c | |
79 | @@ -2117,6 +2117,20 @@ static bool shrink_zones(struct zonelist | |
80 | return aborted_reclaim; | |
81 | } | |
82 | ||
83 | +static unsigned long zone_reclaimable_pages(struct zone *zone) | |
84 | +{ | |
85 | + int nr; | |
86 | + | |
87 | + nr = zone_page_state(zone, NR_ACTIVE_FILE) + | |
88 | + zone_page_state(zone, NR_INACTIVE_FILE); | |
89 | + | |
90 | + if (get_nr_swap_pages() > 0) | |
91 | + nr += zone_page_state(zone, NR_ACTIVE_ANON) + | |
92 | + zone_page_state(zone, NR_INACTIVE_ANON); | |
93 | + | |
94 | + return nr; | |
95 | +} | |
96 | + | |
97 | static bool zone_reclaimable(struct zone *zone) | |
98 | { | |
99 | return zone->pages_scanned < zone_reclaimable_pages(zone) * 6; | |
100 | @@ -3075,41 +3089,6 @@ void wakeup_kswapd(struct zone *zone, in | |
101 | wake_up_interruptible(&pgdat->kswapd_wait); | |
102 | } | |
103 | ||
104 | -/* | |
105 | - * The reclaimable count would be mostly accurate. | |
106 | - * The less reclaimable pages may be | |
107 | - * - mlocked pages, which will be moved to unevictable list when encountered | |
108 | - * - mapped pages, which may require several travels to be reclaimed | |
109 | - * - dirty pages, which is not "instantly" reclaimable | |
110 | - */ | |
111 | -unsigned long global_reclaimable_pages(void) | |
112 | -{ | |
113 | - int nr; | |
114 | - | |
115 | - nr = global_page_state(NR_ACTIVE_FILE) + | |
116 | - global_page_state(NR_INACTIVE_FILE); | |
117 | - | |
118 | - if (get_nr_swap_pages() > 0) | |
119 | - nr += global_page_state(NR_ACTIVE_ANON) + | |
120 | - global_page_state(NR_INACTIVE_ANON); | |
121 | - | |
122 | - return nr; | |
123 | -} | |
124 | - | |
125 | -unsigned long zone_reclaimable_pages(struct zone *zone) | |
126 | -{ | |
127 | - int nr; | |
128 | - | |
129 | - nr = zone_page_state(zone, NR_ACTIVE_FILE) + | |
130 | - zone_page_state(zone, NR_INACTIVE_FILE); | |
131 | - | |
132 | - if (get_nr_swap_pages() > 0) | |
133 | - nr += zone_page_state(zone, NR_ACTIVE_ANON) + | |
134 | - zone_page_state(zone, NR_INACTIVE_ANON); | |
135 | - | |
136 | - return nr; | |
137 | -} | |
138 | - | |
139 | #ifdef CONFIG_HIBERNATION | |
140 | /* | |
141 | * Try to free `nr_to_reclaim' of memory, system-wide, and return the number of |