]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
5.15-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sun, 12 Dec 2021 12:29:50 +0000 (13:29 +0100)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sun, 12 Dec 2021 12:29:50 +0000 (13:29 +0100)
added patches:
kvm-x86-don-t-warn-if-userspace-mucks-with-rcx-during-string-i-o-exit.patch
kvm-x86-ignore-sparse-banks-size-for-an-all-cpus-non-sparse-ipi-req.patch
kvm-x86-wait-for-ipis-to-be-delivered-when-handling-hyper-v-tlb-flush-hypercall.patch
mm-bdi-initialize-bdi_min_ratio-when-bdi-is-unregistered.patch
mm-damon-core-fix-fake-load-reports-due-to-uninterruptible-sleeps.patch
mm-slub-fix-endianness-bug-for-alloc-free_traces-attributes.patch
timers-implement-usleep_idle_range.patch

queue-5.15/kvm-x86-don-t-warn-if-userspace-mucks-with-rcx-during-string-i-o-exit.patch [new file with mode: 0644]
queue-5.15/kvm-x86-ignore-sparse-banks-size-for-an-all-cpus-non-sparse-ipi-req.patch [new file with mode: 0644]
queue-5.15/kvm-x86-wait-for-ipis-to-be-delivered-when-handling-hyper-v-tlb-flush-hypercall.patch [new file with mode: 0644]
queue-5.15/mm-bdi-initialize-bdi_min_ratio-when-bdi-is-unregistered.patch [new file with mode: 0644]
queue-5.15/mm-damon-core-fix-fake-load-reports-due-to-uninterruptible-sleeps.patch [new file with mode: 0644]
queue-5.15/mm-slub-fix-endianness-bug-for-alloc-free_traces-attributes.patch [new file with mode: 0644]
queue-5.15/series
queue-5.15/timers-implement-usleep_idle_range.patch [new file with mode: 0644]

diff --git a/queue-5.15/kvm-x86-don-t-warn-if-userspace-mucks-with-rcx-during-string-i-o-exit.patch b/queue-5.15/kvm-x86-don-t-warn-if-userspace-mucks-with-rcx-during-string-i-o-exit.patch
new file mode 100644 (file)
index 0000000..4c0ee1e
--- /dev/null
@@ -0,0 +1,50 @@
+From d07898eaf39909806128caccb6ebd922ee3edd69 Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Mon, 25 Oct 2021 13:13:10 -0700
+Subject: KVM: x86: Don't WARN if userspace mucks with RCX during string I/O exit
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit d07898eaf39909806128caccb6ebd922ee3edd69 upstream.
+
+Replace a WARN with a comment to call out that userspace can modify RCX
+during an exit to userspace to handle string I/O.  KVM doesn't actually
+support changing the rep count during an exit, i.e. the scenario can be
+ignored, but the WARN needs to go as it's trivial to trigger from
+userspace.
+
+Cc: stable@vger.kernel.org
+Fixes: 3b27de271839 ("KVM: x86: split the two parts of emulator_pio_in")
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Message-Id: <20211025201311.1881846-2-seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/x86.c |    9 +++++++--
+ 1 file changed, 7 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -7021,7 +7021,13 @@ static int emulator_pio_in(struct kvm_vc
+                          unsigned short port, void *val, unsigned int count)
+ {
+       if (vcpu->arch.pio.count) {
+-              /* Complete previous iteration.  */
++              /*
++               * Complete a previous iteration that required userspace I/O.
++               * Note, @count isn't guaranteed to match pio.count as userspace
++               * can modify ECX before rerunning the vCPU.  Ignore any such
++               * shenanigans as KVM doesn't support modifying the rep count,
++               * and the emulator ensures @count doesn't overflow the buffer.
++               */
+       } else {
+               int r = __emulator_pio_in(vcpu, size, port, count);
+               if (!r)
+@@ -7030,7 +7036,6 @@ static int emulator_pio_in(struct kvm_vc
+               /* Results already available, fall through.  */
+       }
+-      WARN_ON(count != vcpu->arch.pio.count);
+       complete_emulator_pio_in(vcpu, val);
+       return 1;
+ }
diff --git a/queue-5.15/kvm-x86-ignore-sparse-banks-size-for-an-all-cpus-non-sparse-ipi-req.patch b/queue-5.15/kvm-x86-ignore-sparse-banks-size-for-an-all-cpus-non-sparse-ipi-req.patch
new file mode 100644 (file)
index 0000000..ba658fa
--- /dev/null
@@ -0,0 +1,60 @@
+From 3244867af8c065e51969f1bffe732d3ebfd9a7d2 Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Tue, 7 Dec 2021 22:09:19 +0000
+Subject: KVM: x86: Ignore sparse banks size for an "all CPUs", non-sparse IPI req
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit 3244867af8c065e51969f1bffe732d3ebfd9a7d2 upstream.
+
+Do not bail early if there are no bits set in the sparse banks for a
+non-sparse, a.k.a. "all CPUs", IPI request.  Per the Hyper-V spec, it is
+legal to have a variable length of '0', e.g. VP_SET's BankContents in
+this case, if the request can be serviced without the extra info.
+
+  It is possible that for a given invocation of a hypercall that does
+  accept variable sized input headers that all the header input fits
+  entirely within the fixed size header. In such cases the variable sized
+  input header is zero-sized and the corresponding bits in the hypercall
+  input should be set to zero.
+
+Bailing early results in KVM failing to send IPIs to all CPUs as expected
+by the guest.
+
+Fixes: 214ff83d4473 ("KVM: x86: hyperv: implement PV IPI send hypercalls")
+Cc: stable@vger.kernel.org
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Reviewed-by: Vitaly Kuznetsov <vkuznets@redhat.com>
+Message-Id: <20211207220926.718794-2-seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/hyperv.c |    7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/kvm/hyperv.c
++++ b/arch/x86/kvm/hyperv.c
+@@ -1922,11 +1922,13 @@ static u64 kvm_hv_send_ipi(struct kvm_vc
+               all_cpus = send_ipi_ex.vp_set.format == HV_GENERIC_SET_ALL;
++              if (all_cpus)
++                      goto check_and_send_ipi;
++
+               if (!sparse_banks_len)
+                       goto ret_success;
+-              if (!all_cpus &&
+-                  kvm_read_guest(kvm,
++              if (kvm_read_guest(kvm,
+                                  hc->ingpa + offsetof(struct hv_send_ipi_ex,
+                                                       vp_set.bank_contents),
+                                  sparse_banks,
+@@ -1934,6 +1936,7 @@ static u64 kvm_hv_send_ipi(struct kvm_vc
+                       return HV_STATUS_INVALID_HYPERCALL_INPUT;
+       }
++check_and_send_ipi:
+       if ((vector < HV_IPI_LOW_VECTOR) || (vector > HV_IPI_HIGH_VECTOR))
+               return HV_STATUS_INVALID_HYPERCALL_INPUT;
diff --git a/queue-5.15/kvm-x86-wait-for-ipis-to-be-delivered-when-handling-hyper-v-tlb-flush-hypercall.patch b/queue-5.15/kvm-x86-wait-for-ipis-to-be-delivered-when-handling-hyper-v-tlb-flush-hypercall.patch
new file mode 100644 (file)
index 0000000..5b1cc29
--- /dev/null
@@ -0,0 +1,50 @@
+From 1ebfaa11ebb5b603a3c3f54b2e84fcf1030f5a14 Mon Sep 17 00:00:00 2001
+From: Vitaly Kuznetsov <vkuznets@redhat.com>
+Date: Thu, 9 Dec 2021 11:29:37 +0100
+Subject: KVM: x86: Wait for IPIs to be delivered when handling Hyper-V TLB flush hypercall
+
+From: Vitaly Kuznetsov <vkuznets@redhat.com>
+
+commit 1ebfaa11ebb5b603a3c3f54b2e84fcf1030f5a14 upstream.
+
+Prior to commit 0baedd792713 ("KVM: x86: make Hyper-V PV TLB flush use
+tlb_flush_guest()"), kvm_hv_flush_tlb() was using 'KVM_REQ_TLB_FLUSH |
+KVM_REQUEST_NO_WAKEUP' when making a request to flush TLBs on other vCPUs
+and KVM_REQ_TLB_FLUSH is/was defined as:
+
+ (0 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
+
+so KVM_REQUEST_WAIT was lost. Hyper-V TLFS, however, requires that
+"This call guarantees that by the time control returns back to the
+caller, the observable effects of all flushes on the specified virtual
+processors have occurred." and without KVM_REQUEST_WAIT there's a small
+chance that the vCPU making the TLB flush will resume running before
+all IPIs get delivered to other vCPUs and a stale mapping can get read
+there.
+
+Fix the issue by adding KVM_REQUEST_WAIT flag to KVM_REQ_TLB_FLUSH_GUEST:
+kvm_hv_flush_tlb() is the sole caller which uses it for
+kvm_make_all_cpus_request()/kvm_make_vcpus_request_mask() where
+KVM_REQUEST_WAIT makes a difference.
+
+Cc: stable@kernel.org
+Fixes: 0baedd792713 ("KVM: x86: make Hyper-V PV TLB flush use tlb_flush_guest()")
+Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
+Message-Id: <20211209102937.584397-1-vkuznets@redhat.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/kvm_host.h |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/x86/include/asm/kvm_host.h
++++ b/arch/x86/include/asm/kvm_host.h
+@@ -98,7 +98,7 @@
+       KVM_ARCH_REQ_FLAGS(25, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
+ #define KVM_REQ_TLB_FLUSH_CURRENT     KVM_ARCH_REQ(26)
+ #define KVM_REQ_TLB_FLUSH_GUEST \
+-      KVM_ARCH_REQ_FLAGS(27, KVM_REQUEST_NO_WAKEUP)
++      KVM_ARCH_REQ_FLAGS(27, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
+ #define KVM_REQ_APF_READY             KVM_ARCH_REQ(28)
+ #define KVM_REQ_MSR_FILTER_CHANGED    KVM_ARCH_REQ(29)
+ #define KVM_REQ_UPDATE_CPU_DIRTY_LOGGING \
diff --git a/queue-5.15/mm-bdi-initialize-bdi_min_ratio-when-bdi-is-unregistered.patch b/queue-5.15/mm-bdi-initialize-bdi_min_ratio-when-bdi-is-unregistered.patch
new file mode 100644 (file)
index 0000000..e35ca9a
--- /dev/null
@@ -0,0 +1,61 @@
+From 3c376dfafbf7a8ea0dea212d095ddd83e93280bb Mon Sep 17 00:00:00 2001
+From: Manjong Lee <mj0123.lee@samsung.com>
+Date: Fri, 10 Dec 2021 14:47:11 -0800
+Subject: mm: bdi: initialize bdi_min_ratio when bdi is unregistered
+
+From: Manjong Lee <mj0123.lee@samsung.com>
+
+commit 3c376dfafbf7a8ea0dea212d095ddd83e93280bb upstream.
+
+Initialize min_ratio if it is set during bdi unregistration.  This can
+prevent problems that may occur a when bdi is removed without resetting
+min_ratio.
+
+For example.
+1) insert external sdcard
+2) set external sdcard's min_ratio 70
+3) remove external sdcard without setting min_ratio 0
+4) insert external sdcard
+5) set external sdcard's min_ratio 70 << error occur(can't set)
+
+Because when an sdcard is removed, the present bdi_min_ratio value will
+remain.  Currently, the only way to reset bdi_min_ratio is to reboot.
+
+[akpm@linux-foundation.org: tweak comment and coding style]
+
+Link: https://lkml.kernel.org/r/20211021161942.5983-1-mj0123.lee@samsung.com
+Signed-off-by: Manjong Lee <mj0123.lee@samsung.com>
+Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Cc: Changheun Lee <nanich.lee@samsung.com>
+Cc: Jens Axboe <axboe@kernel.dk>
+Cc: Christoph Hellwig <hch@infradead.org>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: <seunghwan.hyun@samsung.com>
+Cc: <sookwan7.kim@samsung.com>
+Cc: <yt0928.kim@samsung.com>
+Cc: <junho89.kim@samsung.com>
+Cc: <jisoo2146.oh@samsung.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/backing-dev.c |    7 +++++++
+ 1 file changed, 7 insertions(+)
+
+--- a/mm/backing-dev.c
++++ b/mm/backing-dev.c
+@@ -947,6 +947,13 @@ void bdi_unregister(struct backing_dev_i
+       wb_shutdown(&bdi->wb);
+       cgwb_bdi_unregister(bdi);
++      /*
++       * If this BDI's min ratio has been set, use bdi_set_min_ratio() to
++       * update the global bdi_min_ratio.
++       */
++      if (bdi->min_ratio)
++              bdi_set_min_ratio(bdi, 0);
++
+       if (bdi->dev) {
+               bdi_debug_unregister(bdi);
+               device_unregister(bdi->dev);
diff --git a/queue-5.15/mm-damon-core-fix-fake-load-reports-due-to-uninterruptible-sleeps.patch b/queue-5.15/mm-damon-core-fix-fake-load-reports-due-to-uninterruptible-sleeps.patch
new file mode 100644 (file)
index 0000000..cd11ef1
--- /dev/null
@@ -0,0 +1,68 @@
+From 70e9274805fccfd175d0431a947bfd11ee7df40e Mon Sep 17 00:00:00 2001
+From: SeongJae Park <sj@kernel.org>
+Date: Fri, 10 Dec 2021 14:46:25 -0800
+Subject: mm/damon/core: fix fake load reports due to uninterruptible sleeps
+
+From: SeongJae Park <sj@kernel.org>
+
+commit 70e9274805fccfd175d0431a947bfd11ee7df40e upstream.
+
+Because DAMON sleeps in uninterruptible mode, /proc/loadavg reports fake
+load while DAMON is turned on, though it is doing nothing.  This can
+confuse users[1].  To avoid the case, this commit makes DAMON sleeps in
+idle mode.
+
+[1] https://lore.kernel.org/all/11868371.O9o76ZdvQC@natalenko.name/
+
+Link: https://lkml.kernel.org/r/20211126145015.15862-3-sj@kernel.org
+Fixes: 2224d8485492 ("mm: introduce Data Access MONitor (DAMON)")
+Reported-by: Oleksandr Natalenko <oleksandr@natalenko.name>
+Signed-off-by: SeongJae Park <sj@kernel.org>
+Tested-by: Oleksandr Natalenko <oleksandr@natalenko.name>
+Cc: John Stultz <john.stultz@linaro.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/damon/core.c |   14 +++++++++++---
+ 1 file changed, 11 insertions(+), 3 deletions(-)
+
+--- a/mm/damon/core.c
++++ b/mm/damon/core.c
+@@ -357,6 +357,15 @@ int damon_start(struct damon_ctx **ctxs,
+       return err;
+ }
++static void kdamond_usleep(unsigned long usecs)
++{
++      /* See Documentation/timers/timers-howto.rst for the thresholds */
++      if (usecs > 20 * 1000)
++              schedule_timeout_idle(usecs_to_jiffies(usecs));
++      else
++              usleep_idle_range(usecs, usecs + 1);
++}
++
+ /*
+  * __damon_stop() - Stops monitoring of given context.
+  * @ctx:      monitoring context
+@@ -370,8 +379,7 @@ static int __damon_stop(struct damon_ctx
+               ctx->kdamond_stop = true;
+               mutex_unlock(&ctx->kdamond_lock);
+               while (damon_kdamond_running(ctx))
+-                      usleep_range(ctx->sample_interval,
+-                                      ctx->sample_interval * 2);
++                      kdamond_usleep(ctx->sample_interval);
+               return 0;
+       }
+       mutex_unlock(&ctx->kdamond_lock);
+@@ -670,7 +678,7 @@ static int kdamond_fn(void *data)
+                               ctx->callback.after_sampling(ctx))
+                       set_kdamond_stop(ctx);
+-              usleep_range(ctx->sample_interval, ctx->sample_interval + 1);
++              kdamond_usleep(ctx->sample_interval);
+               if (ctx->primitive.check_accesses)
+                       max_nr_accesses = ctx->primitive.check_accesses(ctx);
diff --git a/queue-5.15/mm-slub-fix-endianness-bug-for-alloc-free_traces-attributes.patch b/queue-5.15/mm-slub-fix-endianness-bug-for-alloc-free_traces-attributes.patch
new file mode 100644 (file)
index 0000000..954e607
--- /dev/null
@@ -0,0 +1,92 @@
+From 005a79e5c254c3f60ec269a459cc41b55028c798 Mon Sep 17 00:00:00 2001
+From: Gerald Schaefer <gerald.schaefer@linux.ibm.com>
+Date: Fri, 10 Dec 2021 14:47:02 -0800
+Subject: mm/slub: fix endianness bug for alloc/free_traces attributes
+
+From: Gerald Schaefer <gerald.schaefer@linux.ibm.com>
+
+commit 005a79e5c254c3f60ec269a459cc41b55028c798 upstream.
+
+On big-endian s390, the alloc/free_traces attributes produce endless
+output, because of always 0 idx in slab_debugfs_show().
+
+idx is de-referenced from *v, which points to a loff_t value, with
+
+    unsigned int idx = *(unsigned int *)v;
+
+This will only give the upper 32 bits on big-endian, which remain 0.
+
+Instead of only fixing this de-reference, during discussion it seemed
+more appropriate to change the seq_ops so that they use an explicit
+iterator in private loc_track struct.
+
+This patch adds idx to loc_track, which will also fix the endianness
+bug.
+
+Link: https://lore.kernel.org/r/20211117193932.4049412-1-gerald.schaefer@linux.ibm.com
+Link: https://lkml.kernel.org/r/20211126171848.17534-1-gerald.schaefer@linux.ibm.com
+Fixes: 64dd68497be7 ("mm: slub: move sysfs slab alloc/free interfaces to debugfs")
+Signed-off-by: Gerald Schaefer <gerald.schaefer@linux.ibm.com>
+Reported-by: Steffen Maier <maier@linux.ibm.com>
+Acked-by: Vlastimil Babka <vbabka@suse.cz>
+Cc: Faiyaz Mohammed <faiyazm@codeaurora.org>
+Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Cc: Christoph Lameter <cl@linux.com>
+Cc: Pekka Enberg <penberg@kernel.org>
+Cc: David Rientjes <rientjes@google.com>
+Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/slub.c |   15 +++++++++------
+ 1 file changed, 9 insertions(+), 6 deletions(-)
+
+--- a/mm/slub.c
++++ b/mm/slub.c
+@@ -5072,6 +5072,7 @@ struct loc_track {
+       unsigned long max;
+       unsigned long count;
+       struct location *loc;
++      loff_t idx;
+ };
+ static struct dentry *slab_debugfs_root;
+@@ -6035,11 +6036,11 @@ __initcall(slab_sysfs_init);
+ #if defined(CONFIG_SLUB_DEBUG) && defined(CONFIG_DEBUG_FS)
+ static int slab_debugfs_show(struct seq_file *seq, void *v)
+ {
+-
+-      struct location *l;
+-      unsigned int idx = *(unsigned int *)v;
+       struct loc_track *t = seq->private;
++      struct location *l;
++      unsigned long idx;
++      idx = (unsigned long) t->idx;
+       if (idx < t->count) {
+               l = &t->loc[idx];
+@@ -6088,16 +6089,18 @@ static void *slab_debugfs_next(struct se
+ {
+       struct loc_track *t = seq->private;
+-      v = ppos;
+-      ++*ppos;
++      t->idx = ++(*ppos);
+       if (*ppos <= t->count)
+-              return v;
++              return ppos;
+       return NULL;
+ }
+ static void *slab_debugfs_start(struct seq_file *seq, loff_t *ppos)
+ {
++      struct loc_track *t = seq->private;
++
++      t->idx = *ppos;
+       return ppos;
+ }
index f8f266c6dac7a54a26dd1044616dd5ee38e596de..598e80188034682e749be8e337e00dd10b6e4385 100644 (file)
@@ -54,3 +54,10 @@ net-dsa-mv88e6xxx-error-handling-for-serdes_power-functions.patch
 net-dsa-felix-fix-memory-leak-in-felix_setup_mmio_filtering.patch
 net-sched-fq_pie-prevent-dismantle-issue.patch
 net-mvpp2-fix-xdp-rx-queues-registering.patch
+kvm-x86-don-t-warn-if-userspace-mucks-with-rcx-during-string-i-o-exit.patch
+kvm-x86-ignore-sparse-banks-size-for-an-all-cpus-non-sparse-ipi-req.patch
+kvm-x86-wait-for-ipis-to-be-delivered-when-handling-hyper-v-tlb-flush-hypercall.patch
+timers-implement-usleep_idle_range.patch
+mm-damon-core-fix-fake-load-reports-due-to-uninterruptible-sleeps.patch
+mm-slub-fix-endianness-bug-for-alloc-free_traces-attributes.patch
+mm-bdi-initialize-bdi_min_ratio-when-bdi-is-unregistered.patch
diff --git a/queue-5.15/timers-implement-usleep_idle_range.patch b/queue-5.15/timers-implement-usleep_idle_range.patch
new file mode 100644 (file)
index 0000000..4da2a70
--- /dev/null
@@ -0,0 +1,111 @@
+From e4779015fd5d2fb8390c258268addff24d6077c7 Mon Sep 17 00:00:00 2001
+From: SeongJae Park <sj@kernel.org>
+Date: Fri, 10 Dec 2021 14:46:22 -0800
+Subject: timers: implement usleep_idle_range()
+
+From: SeongJae Park <sj@kernel.org>
+
+commit e4779015fd5d2fb8390c258268addff24d6077c7 upstream.
+
+Patch series "mm/damon: Fix fake /proc/loadavg reports", v3.
+
+This patchset fixes DAMON's fake load report issue.  The first patch
+makes yet another variant of usleep_range() for this fix, and the second
+patch fixes the issue of DAMON by making it using the newly introduced
+function.
+
+This patch (of 2):
+
+Some kernel threads such as DAMON could need to repeatedly sleep in
+micro seconds level.  Because usleep_range() sleeps in uninterruptible
+state, however, such threads would make /proc/loadavg reports fake load.
+
+To help such cases, this commit implements a variant of usleep_range()
+called usleep_idle_range().  It is same to usleep_range() but sets the
+state of the current task as TASK_IDLE while sleeping.
+
+Link: https://lkml.kernel.org/r/20211126145015.15862-1-sj@kernel.org
+Link: https://lkml.kernel.org/r/20211126145015.15862-2-sj@kernel.org
+Signed-off-by: SeongJae Park <sj@kernel.org>
+Suggested-by: Andrew Morton <akpm@linux-foundation.org>
+Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
+Tested-by: Oleksandr Natalenko <oleksandr@natalenko.name>
+Cc: John Stultz <john.stultz@linaro.org>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/linux/delay.h |   14 +++++++++++++-
+ kernel/time/timer.c   |   16 +++++++++-------
+ 2 files changed, 22 insertions(+), 8 deletions(-)
+
+--- a/include/linux/delay.h
++++ b/include/linux/delay.h
+@@ -20,6 +20,7 @@
+  */
+ #include <linux/kernel.h>
++#include <linux/sched.h>
+ extern unsigned long loops_per_jiffy;
+@@ -58,7 +59,18 @@ void calibrate_delay(void);
+ void __attribute__((weak)) calibration_delay_done(void);
+ void msleep(unsigned int msecs);
+ unsigned long msleep_interruptible(unsigned int msecs);
+-void usleep_range(unsigned long min, unsigned long max);
++void usleep_range_state(unsigned long min, unsigned long max,
++                      unsigned int state);
++
++static inline void usleep_range(unsigned long min, unsigned long max)
++{
++      usleep_range_state(min, max, TASK_UNINTERRUPTIBLE);
++}
++
++static inline void usleep_idle_range(unsigned long min, unsigned long max)
++{
++      usleep_range_state(min, max, TASK_IDLE);
++}
+ static inline void ssleep(unsigned int seconds)
+ {
+--- a/kernel/time/timer.c
++++ b/kernel/time/timer.c
+@@ -2054,26 +2054,28 @@ unsigned long msleep_interruptible(unsig
+ EXPORT_SYMBOL(msleep_interruptible);
+ /**
+- * usleep_range - Sleep for an approximate time
+- * @min: Minimum time in usecs to sleep
+- * @max: Maximum time in usecs to sleep
++ * usleep_range_state - Sleep for an approximate time in a given state
++ * @min:      Minimum time in usecs to sleep
++ * @max:      Maximum time in usecs to sleep
++ * @state:    State of the current task that will be while sleeping
+  *
+  * In non-atomic context where the exact wakeup time is flexible, use
+- * usleep_range() instead of udelay().  The sleep improves responsiveness
++ * usleep_range_state() instead of udelay().  The sleep improves responsiveness
+  * by avoiding the CPU-hogging busy-wait of udelay(), and the range reduces
+  * power usage by allowing hrtimers to take advantage of an already-
+  * scheduled interrupt instead of scheduling a new one just for this sleep.
+  */
+-void __sched usleep_range(unsigned long min, unsigned long max)
++void __sched usleep_range_state(unsigned long min, unsigned long max,
++                              unsigned int state)
+ {
+       ktime_t exp = ktime_add_us(ktime_get(), min);
+       u64 delta = (u64)(max - min) * NSEC_PER_USEC;
+       for (;;) {
+-              __set_current_state(TASK_UNINTERRUPTIBLE);
++              __set_current_state(state);
+               /* Do not return before the requested sleep time has elapsed */
+               if (!schedule_hrtimeout_range(&exp, delta, HRTIMER_MODE_ABS))
+                       break;
+       }
+ }
+-EXPORT_SYMBOL(usleep_range);
++EXPORT_SYMBOL(usleep_range_state);