From: Greg Kroah-Hartman Date: Fri, 26 Jul 2019 13:55:52 +0000 (+0200) Subject: 4.14-stable patches X-Git-Tag: v5.2.4~8 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=6be722aa34c3d0235676646836cab56085ab3970;p=thirdparty%2Fkernel%2Fstable-queue.git 4.14-stable patches added patches: mm-vmscan-scan-anonymous-pages-on-file-refaults.patch --- diff --git a/queue-4.14/mm-vmscan-scan-anonymous-pages-on-file-refaults.patch b/queue-4.14/mm-vmscan-scan-anonymous-pages-on-file-refaults.patch new file mode 100644 index 00000000000..5c3feeb6826 --- /dev/null +++ b/queue-4.14/mm-vmscan-scan-anonymous-pages-on-file-refaults.patch @@ -0,0 +1,241 @@ +From 2c012a4ad1a2cd3fb5a0f9307b9d219f84eda1fa Mon Sep 17 00:00:00 2001 +From: Kuo-Hsin Yang +Date: Thu, 11 Jul 2019 20:52:04 -0700 +Subject: mm: vmscan: scan anonymous pages on file refaults + +From: Kuo-Hsin Yang + +commit 2c012a4ad1a2cd3fb5a0f9307b9d219f84eda1fa upstream. + +When file refaults are detected and there are many inactive file pages, +the system never reclaim anonymous pages, the file pages are dropped +aggressively when there are still a lot of cold anonymous pages and +system thrashes. This issue impacts the performance of applications +with large executable, e.g. chrome. + +With this patch, when file refault is detected, inactive_list_is_low() +always returns true for file pages in get_scan_count() to enable +scanning anonymous pages. + +The problem can be reproduced by the following test program. + +---8<--- +void fallocate_file(const char *filename, off_t size) +{ + struct stat st; + int fd; + + if (!stat(filename, &st) && st.st_size >= size) + return; + + fd = open(filename, O_WRONLY | O_CREAT, 0600); + if (fd < 0) { + perror("create file"); + exit(1); + } + if (posix_fallocate(fd, 0, size)) { + perror("fallocate"); + exit(1); + } + close(fd); +} + +long *alloc_anon(long size) +{ + long *start = malloc(size); + memset(start, 1, size); + return start; +} + +long access_file(const char *filename, long size, long rounds) +{ + int fd, i; + volatile char *start1, *end1, *start2; + const int page_size = getpagesize(); + long sum = 0; + + fd = open(filename, O_RDONLY); + if (fd == -1) { + perror("open"); + exit(1); + } + + /* + * Some applications, e.g. chrome, use a lot of executable file + * pages, map some of the pages with PROT_EXEC flag to simulate + * the behavior. + */ + start1 = mmap(NULL, size / 2, PROT_READ | PROT_EXEC, MAP_SHARED, + fd, 0); + if (start1 == MAP_FAILED) { + perror("mmap"); + exit(1); + } + end1 = start1 + size / 2; + + start2 = mmap(NULL, size / 2, PROT_READ, MAP_SHARED, fd, size / 2); + if (start2 == MAP_FAILED) { + perror("mmap"); + exit(1); + } + + for (i = 0; i < rounds; ++i) { + struct timeval before, after; + volatile char *ptr1 = start1, *ptr2 = start2; + gettimeofday(&before, NULL); + for (; ptr1 < end1; ptr1 += page_size, ptr2 += page_size) + sum += *ptr1 + *ptr2; + gettimeofday(&after, NULL); + printf("File access time, round %d: %f (sec) +", i, + (after.tv_sec - before.tv_sec) + + (after.tv_usec - before.tv_usec) / 1000000.0); + } + return sum; +} + +int main(int argc, char *argv[]) +{ + const long MB = 1024 * 1024; + long anon_mb, file_mb, file_rounds; + const char filename[] = "large"; + long *ret1; + long ret2; + + if (argc != 4) { + printf("usage: thrash ANON_MB FILE_MB FILE_ROUNDS +"); + exit(0); + } + anon_mb = atoi(argv[1]); + file_mb = atoi(argv[2]); + file_rounds = atoi(argv[3]); + + fallocate_file(filename, file_mb * MB); + printf("Allocate %ld MB anonymous pages +", anon_mb); + ret1 = alloc_anon(anon_mb * MB); + printf("Access %ld MB file pages +", file_mb); + ret2 = access_file(filename, file_mb * MB, file_rounds); + printf("Print result to prevent optimization: %ld +", + *ret1 + ret2); + return 0; +} +---8<--- + +Running the test program on 2GB RAM VM with kernel 5.2.0-rc5, the program +fills ram with 2048 MB memory, access a 200 MB file for 10 times. Without +this patch, the file cache is dropped aggresively and every access to the +file is from disk. + + $ ./thrash 2048 200 10 + Allocate 2048 MB anonymous pages + Access 200 MB file pages + File access time, round 0: 2.489316 (sec) + File access time, round 1: 2.581277 (sec) + File access time, round 2: 2.487624 (sec) + File access time, round 3: 2.449100 (sec) + File access time, round 4: 2.420423 (sec) + File access time, round 5: 2.343411 (sec) + File access time, round 6: 2.454833 (sec) + File access time, round 7: 2.483398 (sec) + File access time, round 8: 2.572701 (sec) + File access time, round 9: 2.493014 (sec) + +With this patch, these file pages can be cached. + + $ ./thrash 2048 200 10 + Allocate 2048 MB anonymous pages + Access 200 MB file pages + File access time, round 0: 2.475189 (sec) + File access time, round 1: 2.440777 (sec) + File access time, round 2: 2.411671 (sec) + File access time, round 3: 1.955267 (sec) + File access time, round 4: 0.029924 (sec) + File access time, round 5: 0.000808 (sec) + File access time, round 6: 0.000771 (sec) + File access time, round 7: 0.000746 (sec) + File access time, round 8: 0.000738 (sec) + File access time, round 9: 0.000747 (sec) + +Checked the swap out stats during the test [1], 19006 pages swapped out +with this patch, 3418 pages swapped out without this patch. There are +more swap out, but I think it's within reasonable range when file backed +data set doesn't fit into the memory. + +$ ./thrash 2000 100 2100 5 1 # ANON_MB FILE_EXEC FILE_NOEXEC ROUNDS +PROCESSES Allocate 2000 MB anonymous pages active_anon: 1613644, +inactive_anon: 348656, active_file: 892, inactive_file: 1384 (kB) +pswpout: 7972443, pgpgin: 478615246 Access 100 MB executable file pages +Access 2100 MB regular file pages File access time, round 0: 12.165, +(sec) active_anon: 1433788, inactive_anon: 478116, active_file: 17896, +inactive_file: 24328 (kB) File access time, round 1: 11.493, (sec) +active_anon: 1430576, inactive_anon: 477144, active_file: 25440, +inactive_file: 26172 (kB) File access time, round 2: 11.455, (sec) +active_anon: 1427436, inactive_anon: 476060, active_file: 21112, +inactive_file: 28808 (kB) File access time, round 3: 11.454, (sec) +active_anon: 1420444, inactive_anon: 473632, active_file: 23216, +inactive_file: 35036 (kB) File access time, round 4: 11.479, (sec) +active_anon: 1413964, inactive_anon: 471460, active_file: 31728, +inactive_file: 32224 (kB) pswpout: 7991449 (+ 19006), pgpgin: 489924366 +(+ 11309120) + +With 4 processes accessing non-overlapping parts of a large file, 30316 +pages swapped out with this patch, 5152 pages swapped out without this +patch. The swapout number is small comparing to pgpgin. + +[1]: https://github.com/vovo/testing/blob/master/mem_thrash.c + +Link: http://lkml.kernel.org/r/20190701081038.GA83398@google.com +Fixes: e9868505987a ("mm,vmscan: only evict file pages when we have plenty") +Fixes: 7c5bd705d8f9 ("mm: memcg: only evict file pages when we have plenty") +Signed-off-by: Kuo-Hsin Yang +Acked-by: Johannes Weiner +Cc: Michal Hocko +Cc: Sonny Rao +Cc: Mel Gorman +Cc: Rik van Riel +Cc: Vladimir Davydov +Cc: Minchan Kim +Cc: [4.12+] +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +[backported to 4.14.y, 4.19.y, 5.1.y: adjust context] +Signed-off-by: Kuo-Hsin Yang +Signed-off-by: Greg Kroah-Hartman + +--- + mm/vmscan.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +--- a/mm/vmscan.c ++++ b/mm/vmscan.c +@@ -2120,7 +2120,7 @@ static void shrink_active_list(unsigned + * 10TB 320 32GB + */ + static bool inactive_list_is_low(struct lruvec *lruvec, bool file, +- struct scan_control *sc, bool actual_reclaim) ++ struct scan_control *sc, bool trace) + { + enum lru_list active_lru = file * LRU_FILE + LRU_ACTIVE; + struct pglist_data *pgdat = lruvec_pgdat(lruvec); +@@ -2146,7 +2146,7 @@ static bool inactive_list_is_low(struct + * rid of the stale workingset quickly. + */ + refaults = lruvec_page_state(lruvec, WORKINGSET_ACTIVATE); +- if (file && actual_reclaim && lruvec->refaults != refaults) { ++ if (file && lruvec->refaults != refaults) { + inactive_ratio = 0; + } else { + gb = (inactive + active) >> (30 - PAGE_SHIFT); +@@ -2156,7 +2156,7 @@ static bool inactive_list_is_low(struct + inactive_ratio = 1; + } + +- if (actual_reclaim) ++ if (trace) + trace_mm_vmscan_inactive_list_is_low(pgdat->node_id, sc->reclaim_idx, + lruvec_lru_size(lruvec, inactive_lru, MAX_NR_ZONES), inactive, + lruvec_lru_size(lruvec, active_lru, MAX_NR_ZONES), active, diff --git a/queue-4.14/net-stmmac-re-work-the-queue-selection-for-tso-packets.patch b/queue-4.14/net-stmmac-re-work-the-queue-selection-for-tso-packets.patch deleted file mode 100644 index d680c536889..00000000000 --- a/queue-4.14/net-stmmac-re-work-the-queue-selection-for-tso-packets.patch +++ /dev/null @@ -1,81 +0,0 @@ -From foo@baz Fri 26 Jul 2019 12:22:46 PM CEST -From: Jose Abreu -Date: Mon, 8 Jul 2019 14:26:28 +0200 -Subject: net: stmmac: Re-work the queue selection for TSO packets - -From: Jose Abreu - -[ Upstream commit 4993e5b37e8bcb55ac90f76eb6d2432647273747 ] - -Ben Hutchings says: - "This is the wrong place to change the queue mapping. - stmmac_xmit() is called with a specific TX queue locked, - and accessing a different TX queue results in a data race - for all of that queue's state. - - I think this commit should be reverted upstream and in all - stable branches. Instead, the driver should implement the - ndo_select_queue operation and override the queue mapping there." - -Fixes: c5acdbee22a1 ("net: stmmac: Send TSO packets always from Queue 0") -Suggested-by: Ben Hutchings -Signed-off-by: Jose Abreu -Signed-off-by: David S. Miller -Signed-off-by: Greg Kroah-Hartman ---- - drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 29 ++++++++++++++-------- - 1 file changed, 19 insertions(+), 10 deletions(-) - ---- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c -+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c -@@ -3010,17 +3010,8 @@ static netdev_tx_t stmmac_xmit(struct sk - - /* Manage oversized TCP frames for GMAC4 device */ - if (skb_is_gso(skb) && priv->tso) { -- if (skb_shinfo(skb)->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)) { -- /* -- * There is no way to determine the number of TSO -- * capable Queues. Let's use always the Queue 0 -- * because if TSO is supported then at least this -- * one will be capable. -- */ -- skb_set_queue_mapping(skb, 0); -- -+ if (skb_shinfo(skb)->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)) - return stmmac_tso_xmit(skb, dev); -- } - } - - if (unlikely(stmmac_tx_avail(priv, queue) < nfrags + 1)) { -@@ -3777,6 +3768,23 @@ static int stmmac_ioctl(struct net_devic - return ret; - } - -+static u16 stmmac_select_queue(struct net_device *dev, struct sk_buff *skb, -+ struct net_device *sb_dev, -+ select_queue_fallback_t fallback) -+{ -+ if (skb_shinfo(skb)->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)) { -+ /* -+ * There is no way to determine the number of TSO -+ * capable Queues. Let's use always the Queue 0 -+ * because if TSO is supported then at least this -+ * one will be capable. -+ */ -+ return 0; -+ } -+ -+ return fallback(dev, skb, NULL) % dev->real_num_tx_queues; -+} -+ - static int stmmac_set_mac_address(struct net_device *ndev, void *addr) - { - struct stmmac_priv *priv = netdev_priv(ndev); -@@ -4018,6 +4026,7 @@ static const struct net_device_ops stmma - .ndo_set_rx_mode = stmmac_set_rx_mode, - .ndo_tx_timeout = stmmac_tx_timeout, - .ndo_do_ioctl = stmmac_ioctl, -+ .ndo_select_queue = stmmac_select_queue, - #ifdef CONFIG_NET_POLL_CONTROLLER - .ndo_poll_controller = stmmac_poll_controller, - #endif diff --git a/queue-4.14/series b/queue-4.14/series index 80fb9c80db6..de8d35b4892 100644 --- a/queue-4.14/series +++ b/queue-4.14/series @@ -197,7 +197,6 @@ netrom-fix-a-memory-leak-in-nr_rx_frame.patch netrom-hold-sock-when-setting-skb-destructor.patch bonding-validate-ip-header-before-check-ipproto_igmp.patch net-make-skb_dst_force-return-true-when-dst-is-refcounted.patch -net-stmmac-re-work-the-queue-selection-for-tso-packets.patch tcp-fix-tcp_set_congestion_control-use-from-bpf-hook.patch tcp-reset-bytes_acked-and-bytes_received-when-disconnecting.patch net-bridge-mcast-fix-stale-nsrcs-pointer-in-igmp3-mld2-report-handling.patch @@ -213,3 +212,4 @@ mm-add-filemap_fdatawait_range_keep_errors.patch jbd2-introduce-jbd2_inode-dirty-range-scoping.patch ext4-use-jbd2_inode-dirty-range-scoping.patch ext4-allow-directory-holes.patch +mm-vmscan-scan-anonymous-pages-on-file-refaults.patch