From: Greg Kroah-Hartman Date: Wed, 13 Sep 2023 08:13:57 +0000 (+0200) Subject: 6.1-stable patches X-Git-Tag: v5.10.195~79 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=a5f43aeb1f8d2ce36eb275e18e325e426725b49b;p=thirdparty%2Fkernel%2Fstable-queue.git 6.1-stable patches added patches: mm-multi-gen-lru-rename-lrugen-lists-to-lrugen-folios.patch multi-gen-lru-fix-per-zone-reclaim.patch net-ipv6-skb-symmetric-hash-should-incorporate-transport-ports.patch series --- diff --git a/queue-6.1/mm-multi-gen-lru-rename-lrugen-lists-to-lrugen-folios.patch b/queue-6.1/mm-multi-gen-lru-rename-lrugen-lists-to-lrugen-folios.patch new file mode 100644 index 00000000000..c922082bfe5 --- /dev/null +++ b/queue-6.1/mm-multi-gen-lru-rename-lrugen-lists-to-lrugen-folios.patch @@ -0,0 +1,197 @@ +From 6df1b2212950aae2b2188c6645ea18e2a9e3fdd5 Mon Sep 17 00:00:00 2001 +From: Yu Zhao +Date: Wed, 21 Dec 2022 21:19:00 -0700 +Subject: mm: multi-gen LRU: rename lrugen->lists[] to lrugen->folios[] + +From: Yu Zhao + +commit 6df1b2212950aae2b2188c6645ea18e2a9e3fdd5 upstream. + +lru_gen_folio will be chained into per-node lists by the coming +lrugen->list. + +Link: https://lkml.kernel.org/r/20221222041905.2431096-3-yuzhao@google.com +Signed-off-by: Yu Zhao +Cc: Johannes Weiner +Cc: Jonathan Corbet +Cc: Michael Larabel +Cc: Michal Hocko +Cc: Mike Rapoport +Cc: Roman Gushchin +Cc: Suren Baghdasaryan +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + Documentation/mm/multigen_lru.rst | 8 ++++---- + include/linux/mm_inline.h | 4 ++-- + include/linux/mmzone.h | 8 ++++---- + mm/vmscan.c | 20 ++++++++++---------- + 4 files changed, 20 insertions(+), 20 deletions(-) + +--- a/Documentation/mm/multigen_lru.rst ++++ b/Documentation/mm/multigen_lru.rst +@@ -89,15 +89,15 @@ variables are monotonically increasing. + + Generation numbers are truncated into ``order_base_2(MAX_NR_GENS+1)`` + bits in order to fit into the gen counter in ``folio->flags``. Each +-truncated generation number is an index to ``lrugen->lists[]``. The ++truncated generation number is an index to ``lrugen->folios[]``. The + sliding window technique is used to track at least ``MIN_NR_GENS`` and + at most ``MAX_NR_GENS`` generations. The gen counter stores a value + within ``[1, MAX_NR_GENS]`` while a page is on one of +-``lrugen->lists[]``; otherwise it stores zero. ++``lrugen->folios[]``; otherwise it stores zero. + + Each generation is divided into multiple tiers. A page accessed ``N`` + times through file descriptors is in tier ``order_base_2(N)``. Unlike +-generations, tiers do not have dedicated ``lrugen->lists[]``. In ++generations, tiers do not have dedicated ``lrugen->folios[]``. In + contrast to moving across generations, which requires the LRU lock, + moving across tiers only involves atomic operations on + ``folio->flags`` and therefore has a negligible cost. A feedback loop +@@ -127,7 +127,7 @@ page mapped by this PTE to ``(max_seq%MA + Eviction + -------- + The eviction consumes old generations. Given an ``lruvec``, it +-increments ``min_seq`` when ``lrugen->lists[]`` indexed by ++increments ``min_seq`` when ``lrugen->folios[]`` indexed by + ``min_seq%MAX_NR_GENS`` becomes empty. To select a type and a tier to + evict from, it first compares ``min_seq[]`` to select the older type. + If both types are equally old, it selects the one whose first tier has +--- a/include/linux/mm_inline.h ++++ b/include/linux/mm_inline.h +@@ -256,9 +256,9 @@ static inline bool lru_gen_add_folio(str + lru_gen_update_size(lruvec, folio, -1, gen); + /* for folio_rotate_reclaimable() */ + if (reclaiming) +- list_add_tail(&folio->lru, &lrugen->lists[gen][type][zone]); ++ list_add_tail(&folio->lru, &lrugen->folios[gen][type][zone]); + else +- list_add(&folio->lru, &lrugen->lists[gen][type][zone]); ++ list_add(&folio->lru, &lrugen->folios[gen][type][zone]); + + return true; + } +--- a/include/linux/mmzone.h ++++ b/include/linux/mmzone.h +@@ -312,7 +312,7 @@ enum lruvec_flags { + * They form a sliding window of a variable size [MIN_NR_GENS, MAX_NR_GENS]. An + * offset within MAX_NR_GENS, i.e., gen, indexes the LRU list of the + * corresponding generation. The gen counter in folio->flags stores gen+1 while +- * a page is on one of lrugen->lists[]. Otherwise it stores 0. ++ * a page is on one of lrugen->folios[]. Otherwise it stores 0. + * + * A page is added to the youngest generation on faulting. The aging needs to + * check the accessed bit at least twice before handing this page over to the +@@ -324,8 +324,8 @@ enum lruvec_flags { + * rest of generations, if they exist, are considered inactive. See + * lru_gen_is_active(). + * +- * PG_active is always cleared while a page is on one of lrugen->lists[] so that +- * the aging needs not to worry about it. And it's set again when a page ++ * PG_active is always cleared while a page is on one of lrugen->folios[] so ++ * that the aging needs not to worry about it. And it's set again when a page + * considered active is isolated for non-reclaiming purposes, e.g., migration. + * See lru_gen_add_folio() and lru_gen_del_folio(). + * +@@ -412,7 +412,7 @@ struct lru_gen_struct { + /* the birth time of each generation in jiffies */ + unsigned long timestamps[MAX_NR_GENS]; + /* the multi-gen LRU lists, lazily sorted on eviction */ +- struct list_head lists[MAX_NR_GENS][ANON_AND_FILE][MAX_NR_ZONES]; ++ struct list_head folios[MAX_NR_GENS][ANON_AND_FILE][MAX_NR_ZONES]; + /* the multi-gen LRU sizes, eventually consistent */ + long nr_pages[MAX_NR_GENS][ANON_AND_FILE][MAX_NR_ZONES]; + /* the exponential moving average of refaulted */ +--- a/mm/vmscan.c ++++ b/mm/vmscan.c +@@ -4258,7 +4258,7 @@ static bool inc_min_seq(struct lruvec *l + + /* prevent cold/hot inversion if force_scan is true */ + for (zone = 0; zone < MAX_NR_ZONES; zone++) { +- struct list_head *head = &lrugen->lists[old_gen][type][zone]; ++ struct list_head *head = &lrugen->folios[old_gen][type][zone]; + + while (!list_empty(head)) { + struct folio *folio = lru_to_folio(head); +@@ -4269,7 +4269,7 @@ static bool inc_min_seq(struct lruvec *l + VM_WARN_ON_ONCE_FOLIO(folio_zonenum(folio) != zone, folio); + + new_gen = folio_inc_gen(lruvec, folio, false); +- list_move_tail(&folio->lru, &lrugen->lists[new_gen][type][zone]); ++ list_move_tail(&folio->lru, &lrugen->folios[new_gen][type][zone]); + + if (!--remaining) + return false; +@@ -4297,7 +4297,7 @@ static bool try_to_inc_min_seq(struct lr + gen = lru_gen_from_seq(min_seq[type]); + + for (zone = 0; zone < MAX_NR_ZONES; zone++) { +- if (!list_empty(&lrugen->lists[gen][type][zone])) ++ if (!list_empty(&lrugen->folios[gen][type][zone])) + goto next; + } + +@@ -4762,7 +4762,7 @@ static bool sort_folio(struct lruvec *lr + + /* promoted */ + if (gen != lru_gen_from_seq(lrugen->min_seq[type])) { +- list_move(&folio->lru, &lrugen->lists[gen][type][zone]); ++ list_move(&folio->lru, &lrugen->folios[gen][type][zone]); + return true; + } + +@@ -4771,7 +4771,7 @@ static bool sort_folio(struct lruvec *lr + int hist = lru_hist_from_seq(lrugen->min_seq[type]); + + gen = folio_inc_gen(lruvec, folio, false); +- list_move_tail(&folio->lru, &lrugen->lists[gen][type][zone]); ++ list_move_tail(&folio->lru, &lrugen->folios[gen][type][zone]); + + WRITE_ONCE(lrugen->protected[hist][type][tier - 1], + lrugen->protected[hist][type][tier - 1] + delta); +@@ -4783,7 +4783,7 @@ static bool sort_folio(struct lruvec *lr + if (folio_test_locked(folio) || folio_test_writeback(folio) || + (type == LRU_GEN_FILE && folio_test_dirty(folio))) { + gen = folio_inc_gen(lruvec, folio, true); +- list_move(&folio->lru, &lrugen->lists[gen][type][zone]); ++ list_move(&folio->lru, &lrugen->folios[gen][type][zone]); + return true; + } + +@@ -4850,7 +4850,7 @@ static int scan_folios(struct lruvec *lr + for (zone = sc->reclaim_idx; zone >= 0; zone--) { + LIST_HEAD(moved); + int skipped = 0; +- struct list_head *head = &lrugen->lists[gen][type][zone]; ++ struct list_head *head = &lrugen->folios[gen][type][zone]; + + while (!list_empty(head)) { + struct folio *folio = lru_to_folio(head); +@@ -5250,7 +5250,7 @@ static bool __maybe_unused state_is_vali + int gen, type, zone; + + for_each_gen_type_zone(gen, type, zone) { +- if (!list_empty(&lrugen->lists[gen][type][zone])) ++ if (!list_empty(&lrugen->folios[gen][type][zone])) + return false; + } + } +@@ -5295,7 +5295,7 @@ static bool drain_evictable(struct lruve + int remaining = MAX_LRU_BATCH; + + for_each_gen_type_zone(gen, type, zone) { +- struct list_head *head = &lruvec->lrugen.lists[gen][type][zone]; ++ struct list_head *head = &lruvec->lrugen.folios[gen][type][zone]; + + while (!list_empty(head)) { + bool success; +@@ -5832,7 +5832,7 @@ void lru_gen_init_lruvec(struct lruvec * + lrugen->timestamps[i] = jiffies; + + for_each_gen_type_zone(gen, type, zone) +- INIT_LIST_HEAD(&lrugen->lists[gen][type][zone]); ++ INIT_LIST_HEAD(&lrugen->folios[gen][type][zone]); + + lruvec->mm_state.seq = MIN_NR_GENS; + init_waitqueue_head(&lruvec->mm_state.wait); diff --git a/queue-6.1/multi-gen-lru-fix-per-zone-reclaim.patch b/queue-6.1/multi-gen-lru-fix-per-zone-reclaim.patch new file mode 100644 index 00000000000..6a65ec29fe6 --- /dev/null +++ b/queue-6.1/multi-gen-lru-fix-per-zone-reclaim.patch @@ -0,0 +1,154 @@ +From 669281ee7ef731fb5204df9d948669bf32a5e68d Mon Sep 17 00:00:00 2001 +From: Kalesh Singh +Date: Tue, 1 Aug 2023 19:56:02 -0700 +Subject: Multi-gen LRU: fix per-zone reclaim + +From: Kalesh Singh + +commit 669281ee7ef731fb5204df9d948669bf32a5e68d upstream. + +MGLRU has a LRU list for each zone for each type (anon/file) in each +generation: + + long nr_pages[MAX_NR_GENS][ANON_AND_FILE][MAX_NR_ZONES]; + +The min_seq (oldest generation) can progress independently for each +type but the max_seq (youngest generation) is shared for both anon and +file. This is to maintain a common frame of reference. + +In order for eviction to advance the min_seq of a type, all the per-zone +lists in the oldest generation of that type must be empty. + +The eviction logic only considers pages from eligible zones for +eviction or promotion. + + scan_folios() { + ... + for (zone = sc->reclaim_idx; zone >= 0; zone--) { + ... + sort_folio(); // Promote + ... + isolate_folio(); // Evict + } + ... + } + +Consider the system has the movable zone configured and default 4 +generations. The current state of the system is as shown below +(only illustrating one type for simplicity): + +Type: ANON + + Zone DMA32 Normal Movable Device + + Gen 0 0 0 4GB 0 + + Gen 1 0 1GB 1MB 0 + + Gen 2 1MB 4GB 1MB 0 + + Gen 3 1MB 1MB 1MB 0 + +Now consider there is a GFP_KERNEL allocation request (eligible zone +index <= Normal), evict_folios() will return without doing any work +since there are no pages to scan in the eligible zones of the oldest +generation. Reclaim won't make progress until triggered from a ZONE_MOVABLE +allocation request; which may not happen soon if there is a lot of free +memory in the movable zone. This can lead to OOM kills, although there +is 1GB pages in the Normal zone of Gen 1 that we have not yet tried to +reclaim. + +This issue is not seen in the conventional active/inactive LRU since +there are no per-zone lists. + +If there are no (not enough) folios to scan in the eligible zones, move +folios from ineligible zone (zone_index > reclaim_index) to the next +generation. This allows for the progression of min_seq and reclaiming +from the next generation (Gen 1). + +Qualcomm, Mediatek and raspberrypi [1] discovered this issue independently. + +[1] https://github.com/raspberrypi/linux/issues/5395 + +Link: https://lkml.kernel.org/r/20230802025606.346758-1-kaleshsingh@google.com +Fixes: ac35a4902374 ("mm: multi-gen LRU: minimal implementation") +Signed-off-by: Kalesh Singh +Reported-by: Charan Teja Kalla +Reported-by: Lecopzer Chen +Tested-by: AngeloGioacchino Del Regno [mediatek] +Tested-by: Charan Teja Kalla +Cc: Yu Zhao +Cc: Barry Song +Cc: Brian Geffon +Cc: Jan Alexander Steffens (heftig) +Cc: Matthias Brugger +Cc: Oleksandr Natalenko +Cc: Qi Zheng +Cc: Steven Barrett +Cc: Suleiman Souhlal +Cc: Suren Baghdasaryan +Cc: Aneesh Kumar K V +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + mm/vmscan.c | 18 ++++++++++++++---- + 1 file changed, 14 insertions(+), 4 deletions(-) + +--- a/mm/vmscan.c ++++ b/mm/vmscan.c +@@ -4728,7 +4728,8 @@ void lru_gen_look_around(struct page_vma + * the eviction + ******************************************************************************/ + +-static bool sort_folio(struct lruvec *lruvec, struct folio *folio, int tier_idx) ++static bool sort_folio(struct lruvec *lruvec, struct folio *folio, struct scan_control *sc, ++ int tier_idx) + { + bool success; + int gen = folio_lru_gen(folio); +@@ -4779,6 +4780,13 @@ static bool sort_folio(struct lruvec *lr + return true; + } + ++ /* ineligible */ ++ if (zone > sc->reclaim_idx) { ++ gen = folio_inc_gen(lruvec, folio, false); ++ list_move_tail(&folio->lru, &lrugen->folios[gen][type][zone]); ++ return true; ++ } ++ + /* waiting for writeback */ + if (folio_test_locked(folio) || folio_test_writeback(folio) || + (type == LRU_GEN_FILE && folio_test_dirty(folio))) { +@@ -4831,7 +4839,8 @@ static bool isolate_folio(struct lruvec + static int scan_folios(struct lruvec *lruvec, struct scan_control *sc, + int type, int tier, struct list_head *list) + { +- int gen, zone; ++ int i; ++ int gen; + enum vm_event_item item; + int sorted = 0; + int scanned = 0; +@@ -4847,9 +4856,10 @@ static int scan_folios(struct lruvec *lr + + gen = lru_gen_from_seq(lrugen->min_seq[type]); + +- for (zone = sc->reclaim_idx; zone >= 0; zone--) { ++ for (i = MAX_NR_ZONES; i > 0; i--) { + LIST_HEAD(moved); + int skipped = 0; ++ int zone = (sc->reclaim_idx + i) % MAX_NR_ZONES; + struct list_head *head = &lrugen->folios[gen][type][zone]; + + while (!list_empty(head)) { +@@ -4863,7 +4873,7 @@ static int scan_folios(struct lruvec *lr + + scanned += delta; + +- if (sort_folio(lruvec, folio, tier)) ++ if (sort_folio(lruvec, folio, sc, tier)) + sorted += delta; + else if (isolate_folio(lruvec, folio, sc)) { + list_add(&folio->lru, list); diff --git a/queue-6.1/net-ipv6-skb-symmetric-hash-should-incorporate-transport-ports.patch b/queue-6.1/net-ipv6-skb-symmetric-hash-should-incorporate-transport-ports.patch new file mode 100644 index 00000000000..9fe20c04a9b --- /dev/null +++ b/queue-6.1/net-ipv6-skb-symmetric-hash-should-incorporate-transport-ports.patch @@ -0,0 +1,51 @@ +From a5e2151ff9d5852d0ababbbcaeebd9646af9c8d9 Mon Sep 17 00:00:00 2001 +From: Quan Tian +Date: Tue, 5 Sep 2023 10:36:10 +0000 +Subject: net/ipv6: SKB symmetric hash should incorporate transport ports + +From: Quan Tian + +commit a5e2151ff9d5852d0ababbbcaeebd9646af9c8d9 upstream. + +__skb_get_hash_symmetric() was added to compute a symmetric hash over +the protocol, addresses and transport ports, by commit eb70db875671 +("packet: Use symmetric hash for PACKET_FANOUT_HASH."). It uses +flow_keys_dissector_symmetric_keys as the flow_dissector to incorporate +IPv4 addresses, IPv6 addresses and ports. However, it should not specify +the flag as FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL, which stops further +dissection when an IPv6 flow label is encountered, making transport +ports not being incorporated in such case. + +As a consequence, the symmetric hash is based on 5-tuple for IPv4 but +3-tuple for IPv6 when flow label is present. It caused a few problems, +e.g. when nft symhash and openvswitch l4_sym rely on the symmetric hash +to perform load balancing as different L4 flows between two given IPv6 +addresses would always get the same symmetric hash, leading to uneven +traffic distribution. + +Removing the use of FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL makes sure the +symmetric hash is based on 5-tuple for both IPv4 and IPv6 consistently. + +Fixes: eb70db875671 ("packet: Use symmetric hash for PACKET_FANOUT_HASH.") +Reported-by: Lars Ekman +Closes: https://github.com/antrea-io/antrea/issues/5457 +Signed-off-by: Quan Tian +Reviewed-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/flow_dissector.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +--- a/net/core/flow_dissector.c ++++ b/net/core/flow_dissector.c +@@ -1738,8 +1738,7 @@ u32 __skb_get_hash_symmetric(const struc + + memset(&keys, 0, sizeof(keys)); + __skb_flow_dissect(NULL, skb, &flow_keys_dissector_symmetric, +- &keys, NULL, 0, 0, 0, +- FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL); ++ &keys, NULL, 0, 0, 0, 0); + + return __flow_hash_from_keys(&keys, &hashrnd); + } diff --git a/queue-6.1/series b/queue-6.1/series new file mode 100644 index 00000000000..130c42dd899 --- /dev/null +++ b/queue-6.1/series @@ -0,0 +1,3 @@ +net-ipv6-skb-symmetric-hash-should-incorporate-transport-ports.patch +mm-multi-gen-lru-rename-lrugen-lists-to-lrugen-folios.patch +multi-gen-lru-fix-per-zone-reclaim.patch