--- /dev/null
+From 88dddc11a8d6b09201b4db9d255b3394d9bc9e57 Mon Sep 17 00:00:00 2001
+From: Paolo Bonzini <pbonzini@redhat.com>
+Date: Fri, 19 Jul 2019 18:41:10 +0200
+Subject: KVM: nVMX: do not use dangling shadow VMCS after guest reset
+
+From: Paolo Bonzini <pbonzini@redhat.com>
+
+commit 88dddc11a8d6b09201b4db9d255b3394d9bc9e57 upstream.
+
+If a KVM guest is reset while running a nested guest, free_nested will
+disable the shadow VMCS execution control in the vmcs01. However,
+on the next KVM_RUN vmx_vcpu_run would nevertheless try to sync
+the VMCS12 to the shadow VMCS which has since been freed.
+
+This causes a vmptrld of a NULL pointer on my machime, but Jan reports
+the host to hang altogether. Let's see how much this trivial patch fixes.
+
+Reported-by: Jan Kiszka <jan.kiszka@siemens.com>
+Cc: Liran Alon <liran.alon@oracle.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+
+---
+ arch/x86/kvm/vmx.c | 8 +++++++-
+ 1 file changed, 7 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/kvm/vmx.c
++++ b/arch/x86/kvm/vmx.c
+@@ -8457,6 +8457,7 @@ static void vmx_disable_shadow_vmcs(stru
+ {
+ vmcs_clear_bits(SECONDARY_VM_EXEC_CONTROL, SECONDARY_EXEC_SHADOW_VMCS);
+ vmcs_write64(VMCS_LINK_POINTER, -1ull);
++ vmx->nested.sync_shadow_vmcs = false;
+ }
+
+ static inline void nested_release_vmcs12(struct vcpu_vmx *vmx)
+@@ -8468,7 +8469,6 @@ static inline void nested_release_vmcs12
+ /* copy to memory all shadowed fields in case
+ they were modified */
+ copy_shadow_to_vmcs12(vmx);
+- vmx->nested.sync_shadow_vmcs = false;
+ vmx_disable_shadow_vmcs(vmx);
+ }
+ vmx->nested.posted_intr_nv = -1;
+@@ -8668,6 +8668,9 @@ static void copy_shadow_to_vmcs12(struct
+ u64 field_value;
+ struct vmcs *shadow_vmcs = vmx->vmcs01.shadow_vmcs;
+
++ if (WARN_ON(!shadow_vmcs))
++ return;
++
+ preempt_disable();
+
+ vmcs_load(shadow_vmcs);
+@@ -8706,6 +8709,9 @@ static void copy_vmcs12_to_shadow(struct
+ u64 field_value = 0;
+ struct vmcs *shadow_vmcs = vmx->vmcs01.shadow_vmcs;
+
++ if (WARN_ON(!shadow_vmcs))
++ return;
++
+ vmcs_load(shadow_vmcs);
+
+ for (q = 0; q < ARRAY_SIZE(fields); q++) {
--- /dev/null
+From 2c012a4ad1a2cd3fb5a0f9307b9d219f84eda1fa Mon Sep 17 00:00:00 2001
+From: Kuo-Hsin Yang <vovoy@chromium.org>
+Date: Thu, 11 Jul 2019 20:52:04 -0700
+Subject: mm: vmscan: scan anonymous pages on file refaults
+
+From: Kuo-Hsin Yang <vovoy@chromium.org>
+
+commit 2c012a4ad1a2cd3fb5a0f9307b9d219f84eda1fa upstream.
+
+When file refaults are detected and there are many inactive file pages,
+the system never reclaim anonymous pages, the file pages are dropped
+aggressively when there are still a lot of cold anonymous pages and
+system thrashes. This issue impacts the performance of applications
+with large executable, e.g. chrome.
+
+With this patch, when file refault is detected, inactive_list_is_low()
+always returns true for file pages in get_scan_count() to enable
+scanning anonymous pages.
+
+The problem can be reproduced by the following test program.
+
+---8<---
+void fallocate_file(const char *filename, off_t size)
+{
+ struct stat st;
+ int fd;
+
+ if (!stat(filename, &st) && st.st_size >= size)
+ return;
+
+ fd = open(filename, O_WRONLY | O_CREAT, 0600);
+ if (fd < 0) {
+ perror("create file");
+ exit(1);
+ }
+ if (posix_fallocate(fd, 0, size)) {
+ perror("fallocate");
+ exit(1);
+ }
+ close(fd);
+}
+
+long *alloc_anon(long size)
+{
+ long *start = malloc(size);
+ memset(start, 1, size);
+ return start;
+}
+
+long access_file(const char *filename, long size, long rounds)
+{
+ int fd, i;
+ volatile char *start1, *end1, *start2;
+ const int page_size = getpagesize();
+ long sum = 0;
+
+ fd = open(filename, O_RDONLY);
+ if (fd == -1) {
+ perror("open");
+ exit(1);
+ }
+
+ /*
+ * Some applications, e.g. chrome, use a lot of executable file
+ * pages, map some of the pages with PROT_EXEC flag to simulate
+ * the behavior.
+ */
+ start1 = mmap(NULL, size / 2, PROT_READ | PROT_EXEC, MAP_SHARED,
+ fd, 0);
+ if (start1 == MAP_FAILED) {
+ perror("mmap");
+ exit(1);
+ }
+ end1 = start1 + size / 2;
+
+ start2 = mmap(NULL, size / 2, PROT_READ, MAP_SHARED, fd, size / 2);
+ if (start2 == MAP_FAILED) {
+ perror("mmap");
+ exit(1);
+ }
+
+ for (i = 0; i < rounds; ++i) {
+ struct timeval before, after;
+ volatile char *ptr1 = start1, *ptr2 = start2;
+ gettimeofday(&before, NULL);
+ for (; ptr1 < end1; ptr1 += page_size, ptr2 += page_size)
+ sum += *ptr1 + *ptr2;
+ gettimeofday(&after, NULL);
+ printf("File access time, round %d: %f (sec)
+", i,
+ (after.tv_sec - before.tv_sec) +
+ (after.tv_usec - before.tv_usec) / 1000000.0);
+ }
+ return sum;
+}
+
+int main(int argc, char *argv[])
+{
+ const long MB = 1024 * 1024;
+ long anon_mb, file_mb, file_rounds;
+ const char filename[] = "large";
+ long *ret1;
+ long ret2;
+
+ if (argc != 4) {
+ printf("usage: thrash ANON_MB FILE_MB FILE_ROUNDS
+");
+ exit(0);
+ }
+ anon_mb = atoi(argv[1]);
+ file_mb = atoi(argv[2]);
+ file_rounds = atoi(argv[3]);
+
+ fallocate_file(filename, file_mb * MB);
+ printf("Allocate %ld MB anonymous pages
+", anon_mb);
+ ret1 = alloc_anon(anon_mb * MB);
+ printf("Access %ld MB file pages
+", file_mb);
+ ret2 = access_file(filename, file_mb * MB, file_rounds);
+ printf("Print result to prevent optimization: %ld
+",
+ *ret1 + ret2);
+ return 0;
+}
+---8<---
+
+Running the test program on 2GB RAM VM with kernel 5.2.0-rc5, the program
+fills ram with 2048 MB memory, access a 200 MB file for 10 times. Without
+this patch, the file cache is dropped aggresively and every access to the
+file is from disk.
+
+ $ ./thrash 2048 200 10
+ Allocate 2048 MB anonymous pages
+ Access 200 MB file pages
+ File access time, round 0: 2.489316 (sec)
+ File access time, round 1: 2.581277 (sec)
+ File access time, round 2: 2.487624 (sec)
+ File access time, round 3: 2.449100 (sec)
+ File access time, round 4: 2.420423 (sec)
+ File access time, round 5: 2.343411 (sec)
+ File access time, round 6: 2.454833 (sec)
+ File access time, round 7: 2.483398 (sec)
+ File access time, round 8: 2.572701 (sec)
+ File access time, round 9: 2.493014 (sec)
+
+With this patch, these file pages can be cached.
+
+ $ ./thrash 2048 200 10
+ Allocate 2048 MB anonymous pages
+ Access 200 MB file pages
+ File access time, round 0: 2.475189 (sec)
+ File access time, round 1: 2.440777 (sec)
+ File access time, round 2: 2.411671 (sec)
+ File access time, round 3: 1.955267 (sec)
+ File access time, round 4: 0.029924 (sec)
+ File access time, round 5: 0.000808 (sec)
+ File access time, round 6: 0.000771 (sec)
+ File access time, round 7: 0.000746 (sec)
+ File access time, round 8: 0.000738 (sec)
+ File access time, round 9: 0.000747 (sec)
+
+Checked the swap out stats during the test [1], 19006 pages swapped out
+with this patch, 3418 pages swapped out without this patch. There are
+more swap out, but I think it's within reasonable range when file backed
+data set doesn't fit into the memory.
+
+$ ./thrash 2000 100 2100 5 1 # ANON_MB FILE_EXEC FILE_NOEXEC ROUNDS
+PROCESSES Allocate 2000 MB anonymous pages active_anon: 1613644,
+inactive_anon: 348656, active_file: 892, inactive_file: 1384 (kB)
+pswpout: 7972443, pgpgin: 478615246 Access 100 MB executable file pages
+Access 2100 MB regular file pages File access time, round 0: 12.165,
+(sec) active_anon: 1433788, inactive_anon: 478116, active_file: 17896,
+inactive_file: 24328 (kB) File access time, round 1: 11.493, (sec)
+active_anon: 1430576, inactive_anon: 477144, active_file: 25440,
+inactive_file: 26172 (kB) File access time, round 2: 11.455, (sec)
+active_anon: 1427436, inactive_anon: 476060, active_file: 21112,
+inactive_file: 28808 (kB) File access time, round 3: 11.454, (sec)
+active_anon: 1420444, inactive_anon: 473632, active_file: 23216,
+inactive_file: 35036 (kB) File access time, round 4: 11.479, (sec)
+active_anon: 1413964, inactive_anon: 471460, active_file: 31728,
+inactive_file: 32224 (kB) pswpout: 7991449 (+ 19006), pgpgin: 489924366
+(+ 11309120)
+
+With 4 processes accessing non-overlapping parts of a large file, 30316
+pages swapped out with this patch, 5152 pages swapped out without this
+patch. The swapout number is small comparing to pgpgin.
+
+[1]: https://github.com/vovo/testing/blob/master/mem_thrash.c
+
+Link: http://lkml.kernel.org/r/20190701081038.GA83398@google.com
+Fixes: e9868505987a ("mm,vmscan: only evict file pages when we have plenty")
+Fixes: 7c5bd705d8f9 ("mm: memcg: only evict file pages when we have plenty")
+Signed-off-by: Kuo-Hsin Yang <vovoy@chromium.org>
+Acked-by: Johannes Weiner <hannes@cmpxchg.org>
+Cc: Michal Hocko <mhocko@suse.com>
+Cc: Sonny Rao <sonnyrao@chromium.org>
+Cc: Mel Gorman <mgorman@techsingularity.net>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
+Cc: Minchan Kim <minchan@kernel.org>
+Cc: <stable@vger.kernel.org> [4.12+]
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+[backported to 4.14.y, 4.19.y, 5.1.y: adjust context]
+Signed-off-by: Kuo-Hsin Yang <vovoy@chromium.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/vmscan.c | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/mm/vmscan.c
++++ b/mm/vmscan.c
+@@ -2190,7 +2190,7 @@ static void shrink_active_list(unsigned
+ * 10TB 320 32GB
+ */
+ static bool inactive_list_is_low(struct lruvec *lruvec, bool file,
+- struct scan_control *sc, bool actual_reclaim)
++ struct scan_control *sc, bool trace)
+ {
+ enum lru_list active_lru = file * LRU_FILE + LRU_ACTIVE;
+ struct pglist_data *pgdat = lruvec_pgdat(lruvec);
+@@ -2216,7 +2216,7 @@ static bool inactive_list_is_low(struct
+ * rid of the stale workingset quickly.
+ */
+ refaults = lruvec_page_state(lruvec, WORKINGSET_ACTIVATE);
+- if (file && actual_reclaim && lruvec->refaults != refaults) {
++ if (file && lruvec->refaults != refaults) {
+ inactive_ratio = 0;
+ } else {
+ gb = (inactive + active) >> (30 - PAGE_SHIFT);
+@@ -2226,7 +2226,7 @@ static bool inactive_list_is_low(struct
+ inactive_ratio = 1;
+ }
+
+- if (actual_reclaim)
++ if (trace)
+ trace_mm_vmscan_inactive_list_is_low(pgdat->node_id, sc->reclaim_idx,
+ lruvec_lru_size(lruvec, inactive_lru, MAX_NR_ZONES), inactive,
+ lruvec_lru_size(lruvec, active_lru, MAX_NR_ZONES), active,