--- /dev/null
+From f9bff0e31881d03badf191d3b0005839391f5f2b Mon Sep 17 00:00:00 2001
+From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
+Date: Wed, 2 Aug 2023 16:13:29 +0100
+Subject: minmax: add in_range() macro
+
+From: Matthew Wilcox (Oracle) <willy@infradead.org>
+
+commit f9bff0e31881d03badf191d3b0005839391f5f2b upstream.
+
+Patch series "New page table range API", v6.
+
+This patchset changes the API used by the MM to set up page table entries.
+The four APIs are:
+
+ set_ptes(mm, addr, ptep, pte, nr)
+ update_mmu_cache_range(vma, addr, ptep, nr)
+ flush_dcache_folio(folio)
+ flush_icache_pages(vma, page, nr)
+
+flush_dcache_folio() isn't technically new, but no architecture
+implemented it, so I've done that for them. The old APIs remain around
+but are mostly implemented by calling the new interfaces.
+
+The new APIs are based around setting up N page table entries at once.
+The N entries belong to the same PMD, the same folio and the same VMA, so
+ptep++ is a legitimate operation, and locking is taken care of for you.
+Some architectures can do a better job of it than just a loop, but I have
+hesitated to make too deep a change to architectures I don't understand
+well.
+
+One thing I have changed in every architecture is that PG_arch_1 is now a
+per-folio bit instead of a per-page bit when used for dcache clean/dirty
+tracking. This was something that would have to happen eventually, and it
+makes sense to do it now rather than iterate over every page involved in a
+cache flush and figure out if it needs to happen.
+
+The point of all this is better performance, and Fengwei Yin has measured
+improvement on x86. I suspect you'll see improvement on your architecture
+too. Try the new will-it-scale test mentioned here:
+https://lore.kernel.org/linux-mm/20230206140639.538867-5-fengwei.yin@intel.com/
+You'll need to run it on an XFS filesystem and have
+CONFIG_TRANSPARENT_HUGEPAGE set.
+
+This patchset is the basis for much of the anonymous large folio work
+being done by Ryan, so it's received quite a lot of testing over the last
+few months.
+
+
+This patch (of 38):
+
+Determine if a value lies within a range more efficiently (subtraction +
+comparison vs two comparisons and an AND). It also has useful (under some
+circumstances) behaviour if the range exceeds the maximum value of the
+type. Convert all the conflicting definitions of in_range() within the
+kernel; some can use the generic definition while others need their own
+definition.
+
+Link: https://lkml.kernel.org/r/20230802151406.3735276-1-willy@infradead.org
+Link: https://lkml.kernel.org/r/20230802151406.3735276-2-willy@infradead.org
+Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Eliav Farber <farbere@amazon.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/arm/mm/pageattr.c | 6 +-
+ drivers/gpu/drm/arm/display/include/malidp_utils.h | 2
+ drivers/gpu/drm/arm/display/komeda/komeda_pipeline_state.c | 24 +++++------
+ drivers/gpu/drm/msm/adreno/a6xx_gmu.c | 6 --
+ drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c | 18 ++++----
+ drivers/virt/acrn/ioreq.c | 4 -
+ fs/btrfs/misc.h | 2
+ fs/ext2/balloc.c | 2
+ fs/ext4/ext4.h | 2
+ fs/ufs/util.h | 6 --
+ include/linux/minmax.h | 27 +++++++++++++
+ lib/logic_pio.c | 3 -
+ net/netfilter/nf_nat_core.c | 6 +-
+ net/tipc/core.h | 2
+ net/tipc/link.c | 10 ++--
+ 15 files changed, 63 insertions(+), 57 deletions(-)
+
+--- a/arch/arm/mm/pageattr.c
++++ b/arch/arm/mm/pageattr.c
+@@ -25,7 +25,7 @@ static int change_page_range(pte_t *ptep
+ return 0;
+ }
+
+-static bool in_range(unsigned long start, unsigned long size,
++static bool range_in_range(unsigned long start, unsigned long size,
+ unsigned long range_start, unsigned long range_end)
+ {
+ return start >= range_start && start < range_end &&
+@@ -46,8 +46,8 @@ static int change_memory_common(unsigned
+ if (!size)
+ return 0;
+
+- if (!in_range(start, size, MODULES_VADDR, MODULES_END) &&
+- !in_range(start, size, VMALLOC_START, VMALLOC_END))
++ if (!range_in_range(start, size, MODULES_VADDR, MODULES_END) &&
++ !range_in_range(start, size, VMALLOC_START, VMALLOC_END))
+ return -EINVAL;
+
+ data.set_mask = set_mask;
+--- a/drivers/gpu/drm/arm/display/include/malidp_utils.h
++++ b/drivers/gpu/drm/arm/display/include/malidp_utils.h
+@@ -35,7 +35,7 @@ static inline void set_range(struct mali
+ rg->end = end;
+ }
+
+-static inline bool in_range(struct malidp_range *rg, u32 v)
++static inline bool malidp_in_range(struct malidp_range *rg, u32 v)
+ {
+ return (v >= rg->start) && (v <= rg->end);
+ }
+--- a/drivers/gpu/drm/arm/display/komeda/komeda_pipeline_state.c
++++ b/drivers/gpu/drm/arm/display/komeda/komeda_pipeline_state.c
+@@ -305,12 +305,12 @@ komeda_layer_check_cfg(struct komeda_lay
+ if (komeda_fb_check_src_coords(kfb, src_x, src_y, src_w, src_h))
+ return -EINVAL;
+
+- if (!in_range(&layer->hsize_in, src_w)) {
++ if (!malidp_in_range(&layer->hsize_in, src_w)) {
+ DRM_DEBUG_ATOMIC("invalidate src_w %d.\n", src_w);
+ return -EINVAL;
+ }
+
+- if (!in_range(&layer->vsize_in, src_h)) {
++ if (!malidp_in_range(&layer->vsize_in, src_h)) {
+ DRM_DEBUG_ATOMIC("invalidate src_h %d.\n", src_h);
+ return -EINVAL;
+ }
+@@ -452,14 +452,14 @@ komeda_scaler_check_cfg(struct komeda_sc
+ hsize_out = dflow->out_w;
+ vsize_out = dflow->out_h;
+
+- if (!in_range(&scaler->hsize, hsize_in) ||
+- !in_range(&scaler->hsize, hsize_out)) {
++ if (!malidp_in_range(&scaler->hsize, hsize_in) ||
++ !malidp_in_range(&scaler->hsize, hsize_out)) {
+ DRM_DEBUG_ATOMIC("Invalid horizontal sizes");
+ return -EINVAL;
+ }
+
+- if (!in_range(&scaler->vsize, vsize_in) ||
+- !in_range(&scaler->vsize, vsize_out)) {
++ if (!malidp_in_range(&scaler->vsize, vsize_in) ||
++ !malidp_in_range(&scaler->vsize, vsize_out)) {
+ DRM_DEBUG_ATOMIC("Invalid vertical sizes");
+ return -EINVAL;
+ }
+@@ -574,13 +574,13 @@ komeda_splitter_validate(struct komeda_s
+ return -EINVAL;
+ }
+
+- if (!in_range(&splitter->hsize, dflow->in_w)) {
++ if (!malidp_in_range(&splitter->hsize, dflow->in_w)) {
+ DRM_DEBUG_ATOMIC("split in_w:%d is out of the acceptable range.\n",
+ dflow->in_w);
+ return -EINVAL;
+ }
+
+- if (!in_range(&splitter->vsize, dflow->in_h)) {
++ if (!malidp_in_range(&splitter->vsize, dflow->in_h)) {
+ DRM_DEBUG_ATOMIC("split in_h: %d exceeds the acceptable range.\n",
+ dflow->in_h);
+ return -EINVAL;
+@@ -624,13 +624,13 @@ komeda_merger_validate(struct komeda_mer
+ return -EINVAL;
+ }
+
+- if (!in_range(&merger->hsize_merged, output->out_w)) {
++ if (!malidp_in_range(&merger->hsize_merged, output->out_w)) {
+ DRM_DEBUG_ATOMIC("merged_w: %d is out of the accepted range.\n",
+ output->out_w);
+ return -EINVAL;
+ }
+
+- if (!in_range(&merger->vsize_merged, output->out_h)) {
++ if (!malidp_in_range(&merger->vsize_merged, output->out_h)) {
+ DRM_DEBUG_ATOMIC("merged_h: %d is out of the accepted range.\n",
+ output->out_h);
+ return -EINVAL;
+@@ -866,8 +866,8 @@ void komeda_complete_data_flow_cfg(struc
+ * input/output range.
+ */
+ if (dflow->en_scaling && scaler)
+- dflow->en_split = !in_range(&scaler->hsize, dflow->in_w) ||
+- !in_range(&scaler->hsize, dflow->out_w);
++ dflow->en_split = !malidp_in_range(&scaler->hsize, dflow->in_w) ||
++ !malidp_in_range(&scaler->hsize, dflow->out_w);
+ }
+
+ static bool merger_is_available(struct komeda_pipeline *pipe,
+--- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
++++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
+@@ -676,12 +676,6 @@ struct block_header {
+ u32 data[];
+ };
+
+-/* this should be a general kernel helper */
+-static int in_range(u32 addr, u32 start, u32 size)
+-{
+- return addr >= start && addr < start + size;
+-}
+-
+ static bool fw_block_mem(struct a6xx_gmu_bo *bo, const struct block_header *blk)
+ {
+ if (!in_range(blk->addr, bo->iova, bo->size))
+--- a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c
++++ b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c
+@@ -2135,7 +2135,7 @@ static const struct ethtool_ops cxgb_eth
+ .set_link_ksettings = set_link_ksettings,
+ };
+
+-static int in_range(int val, int lo, int hi)
++static int cxgb_in_range(int val, int lo, int hi)
+ {
+ return val < 0 || (val <= hi && val >= lo);
+ }
+@@ -2171,19 +2171,19 @@ static int cxgb_siocdevprivate(struct ne
+ return -EINVAL;
+ if (t.qset_idx >= SGE_QSETS)
+ return -EINVAL;
+- if (!in_range(t.intr_lat, 0, M_NEWTIMER) ||
+- !in_range(t.cong_thres, 0, 255) ||
+- !in_range(t.txq_size[0], MIN_TXQ_ENTRIES,
++ if (!cxgb_in_range(t.intr_lat, 0, M_NEWTIMER) ||
++ !cxgb_in_range(t.cong_thres, 0, 255) ||
++ !cxgb_in_range(t.txq_size[0], MIN_TXQ_ENTRIES,
+ MAX_TXQ_ENTRIES) ||
+- !in_range(t.txq_size[1], MIN_TXQ_ENTRIES,
++ !cxgb_in_range(t.txq_size[1], MIN_TXQ_ENTRIES,
+ MAX_TXQ_ENTRIES) ||
+- !in_range(t.txq_size[2], MIN_CTRL_TXQ_ENTRIES,
++ !cxgb_in_range(t.txq_size[2], MIN_CTRL_TXQ_ENTRIES,
+ MAX_CTRL_TXQ_ENTRIES) ||
+- !in_range(t.fl_size[0], MIN_FL_ENTRIES,
++ !cxgb_in_range(t.fl_size[0], MIN_FL_ENTRIES,
+ MAX_RX_BUFFERS) ||
+- !in_range(t.fl_size[1], MIN_FL_ENTRIES,
++ !cxgb_in_range(t.fl_size[1], MIN_FL_ENTRIES,
+ MAX_RX_JUMBO_BUFFERS) ||
+- !in_range(t.rspq_size, MIN_RSPQ_ENTRIES,
++ !cxgb_in_range(t.rspq_size, MIN_RSPQ_ENTRIES,
+ MAX_RSPQ_ENTRIES))
+ return -EINVAL;
+
+--- a/drivers/virt/acrn/ioreq.c
++++ b/drivers/virt/acrn/ioreq.c
+@@ -356,7 +356,7 @@ static bool handle_cf8cfc(struct acrn_vm
+ return is_handled;
+ }
+
+-static bool in_range(struct acrn_ioreq_range *range,
++static bool acrn_in_range(struct acrn_ioreq_range *range,
+ struct acrn_io_request *req)
+ {
+ bool ret = false;
+@@ -394,7 +394,7 @@ static struct acrn_ioreq_client *find_io
+ list_for_each_entry(client, &vm->ioreq_clients, list) {
+ read_lock_bh(&client->range_lock);
+ list_for_each_entry(range, &client->range_list, list) {
+- if (in_range(range, req)) {
++ if (acrn_in_range(range, req)) {
+ found = client;
+ break;
+ }
+--- a/fs/btrfs/misc.h
++++ b/fs/btrfs/misc.h
+@@ -8,8 +8,6 @@
+ #include <linux/math64.h>
+ #include <linux/rbtree.h>
+
+-#define in_range(b, first, len) ((b) >= (first) && (b) < (first) + (len))
+-
+ static inline void cond_wake_up(struct wait_queue_head *wq)
+ {
+ /*
+--- a/fs/ext2/balloc.c
++++ b/fs/ext2/balloc.c
+@@ -36,8 +36,6 @@
+ */
+
+
+-#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1)
+-
+ struct ext2_group_desc * ext2_get_group_desc(struct super_block * sb,
+ unsigned int block_group,
+ struct buffer_head ** bh)
+--- a/fs/ext4/ext4.h
++++ b/fs/ext4/ext4.h
+@@ -3830,8 +3830,6 @@ static inline void set_bitmap_uptodate(s
+ set_bit(BH_BITMAP_UPTODATE, &(bh)->b_state);
+ }
+
+-#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1)
+-
+ /* For ioend & aio unwritten conversion wait queues */
+ #define EXT4_WQ_HASH_SZ 37
+ #define ext4_ioend_wq(v) (&ext4__ioend_wq[((unsigned long)(v)) %\
+--- a/fs/ufs/util.h
++++ b/fs/ufs/util.h
+@@ -11,12 +11,6 @@
+ #include <linux/fs.h>
+ #include "swab.h"
+
+-
+-/*
+- * some useful macros
+- */
+-#define in_range(b,first,len) ((b)>=(first)&&(b)<(first)+(len))
+-
+ /*
+ * functions used for retyping
+ */
+--- a/include/linux/minmax.h
++++ b/include/linux/minmax.h
+@@ -5,6 +5,7 @@
+ #include <linux/build_bug.h>
+ #include <linux/compiler.h>
+ #include <linux/const.h>
++#include <linux/types.h>
+
+ /*
+ * min()/max()/clamp() macros must accomplish three things:
+@@ -192,6 +193,32 @@
+ */
+ #define clamp_val(val, lo, hi) clamp_t(typeof(val), val, lo, hi)
+
++static inline bool in_range64(u64 val, u64 start, u64 len)
++{
++ return (val - start) < len;
++}
++
++static inline bool in_range32(u32 val, u32 start, u32 len)
++{
++ return (val - start) < len;
++}
++
++/**
++ * in_range - Determine if a value lies within a range.
++ * @val: Value to test.
++ * @start: First value in range.
++ * @len: Number of values in range.
++ *
++ * This is more efficient than "if (start <= val && val < (start + len))".
++ * It also gives a different answer if @start + @len overflows the size of
++ * the type by a sufficient amount to encompass @val. Decide for yourself
++ * which behaviour you want, or prove that start + len never overflow.
++ * Do not blindly replace one form with the other.
++ */
++#define in_range(val, start, len) \
++ ((sizeof(start) | sizeof(len) | sizeof(val)) <= sizeof(u32) ? \
++ in_range32(val, start, len) : in_range64(val, start, len))
++
+ /**
+ * swap - swap values of @a and @b
+ * @a: first value
+--- a/lib/logic_pio.c
++++ b/lib/logic_pio.c
+@@ -20,9 +20,6 @@
+ static LIST_HEAD(io_range_list);
+ static DEFINE_MUTEX(io_range_mutex);
+
+-/* Consider a kernel general helper for this */
+-#define in_range(b, first, len) ((b) >= (first) && (b) < (first) + (len))
+-
+ /**
+ * logic_pio_register_range - register logical PIO range for a host
+ * @new_range: pointer to the IO range to be registered.
+--- a/net/netfilter/nf_nat_core.c
++++ b/net/netfilter/nf_nat_core.c
+@@ -242,7 +242,7 @@ static bool l4proto_in_range(const struc
+ /* If we source map this tuple so reply looks like reply_tuple, will
+ * that meet the constraints of range.
+ */
+-static int in_range(const struct nf_conntrack_tuple *tuple,
++static int nf_in_range(const struct nf_conntrack_tuple *tuple,
+ const struct nf_nat_range2 *range)
+ {
+ /* If we are supposed to map IPs, then we must be in the
+@@ -291,7 +291,7 @@ find_appropriate_src(struct net *net,
+ &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
+ result->dst = tuple->dst;
+
+- if (in_range(result, range))
++ if (nf_in_range(result, range))
+ return 1;
+ }
+ }
+@@ -523,7 +523,7 @@ get_unique_tuple(struct nf_conntrack_tup
+ if (maniptype == NF_NAT_MANIP_SRC &&
+ !(range->flags & NF_NAT_RANGE_PROTO_RANDOM_ALL)) {
+ /* try the original tuple first */
+- if (in_range(orig_tuple, range)) {
++ if (nf_in_range(orig_tuple, range)) {
+ if (!nf_nat_used_tuple(orig_tuple, ct)) {
+ *tuple = *orig_tuple;
+ return;
+--- a/net/tipc/core.h
++++ b/net/tipc/core.h
+@@ -197,7 +197,7 @@ static inline int less(u16 left, u16 rig
+ return less_eq(left, right) && (mod(right) != mod(left));
+ }
+
+-static inline int in_range(u16 val, u16 min, u16 max)
++static inline int tipc_in_range(u16 val, u16 min, u16 max)
+ {
+ return !less(val, min) && !more(val, max);
+ }
+--- a/net/tipc/link.c
++++ b/net/tipc/link.c
+@@ -1623,7 +1623,7 @@ next_gap_ack:
+ last_ga->bgack_cnt);
+ }
+ /* Check against the last Gap ACK block */
+- if (in_range(seqno, start, end))
++ if (tipc_in_range(seqno, start, end))
+ continue;
+ /* Update/release the packet peer is acking */
+ bc_has_acked = true;
+@@ -2251,12 +2251,12 @@ static int tipc_link_proto_rcv(struct ti
+ strncpy(if_name, data, TIPC_MAX_IF_NAME);
+
+ /* Update own tolerance if peer indicates a non-zero value */
+- if (in_range(peers_tol, TIPC_MIN_LINK_TOL, TIPC_MAX_LINK_TOL)) {
++ if (tipc_in_range(peers_tol, TIPC_MIN_LINK_TOL, TIPC_MAX_LINK_TOL)) {
+ l->tolerance = peers_tol;
+ l->bc_rcvlink->tolerance = peers_tol;
+ }
+ /* Update own priority if peer's priority is higher */
+- if (in_range(peers_prio, l->priority + 1, TIPC_MAX_LINK_PRI))
++ if (tipc_in_range(peers_prio, l->priority + 1, TIPC_MAX_LINK_PRI))
+ l->priority = peers_prio;
+
+ /* If peer is going down we want full re-establish cycle */
+@@ -2299,13 +2299,13 @@ static int tipc_link_proto_rcv(struct ti
+ l->rcv_nxt_state = msg_seqno(hdr) + 1;
+
+ /* Update own tolerance if peer indicates a non-zero value */
+- if (in_range(peers_tol, TIPC_MIN_LINK_TOL, TIPC_MAX_LINK_TOL)) {
++ if (tipc_in_range(peers_tol, TIPC_MIN_LINK_TOL, TIPC_MAX_LINK_TOL)) {
+ l->tolerance = peers_tol;
+ l->bc_rcvlink->tolerance = peers_tol;
+ }
+ /* Update own prio if peer indicates a different value */
+ if ((peers_prio != l->priority) &&
+- in_range(peers_prio, 1, TIPC_MAX_LINK_PRI)) {
++ tipc_in_range(peers_prio, 1, TIPC_MAX_LINK_PRI)) {
+ l->priority = peers_prio;
+ rc = tipc_link_fsm_evt(l, LINK_FAILURE_EVT);
+ }