]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
Fixes for 5.15
authorSasha Levin <sashal@kernel.org>
Sun, 15 Jan 2023 23:58:57 +0000 (18:58 -0500)
committerSasha Levin <sashal@kernel.org>
Sun, 15 Jan 2023 23:58:57 +0000 (18:58 -0500)
Signed-off-by: Sasha Levin <sashal@kernel.org>
38 files changed:
queue-5.15/af_unix-selftest-fix-the-size-of-the-parameter-to-co.patch [new file with mode: 0644]
queue-5.15/alsa-usb-audio-make-sure-to-stop-endpoints-before-cl.patch [new file with mode: 0644]
queue-5.15/alsa-usb-audio-relax-hw-constraints-for-implicit-fb-.patch [new file with mode: 0644]
queue-5.15/arm64-atomics-format-whitespace-consistently.patch [new file with mode: 0644]
queue-5.15/arm64-atomics-remove-ll-sc-trampolines.patch [new file with mode: 0644]
queue-5.15/arm64-cmpxchg_double-hazard-against-entire-exchange-.patch [new file with mode: 0644]
queue-5.15/asoc-wm8904-fix-wrong-outputs-volume-after-power-rea.patch [new file with mode: 0644]
queue-5.15/documentation-kvm-add-api-issues-section.patch [new file with mode: 0644]
queue-5.15/efi-fix-null-deref-in-init-error-path.patch [new file with mode: 0644]
queue-5.15/hvc-xen-lock-console-list-traversal.patch [new file with mode: 0644]
queue-5.15/igc-fix-pps-delta-between-two-synchronized-end-point.patch [new file with mode: 0644]
queue-5.15/io_uring-lock-overflowing-for-iopoll.patch [new file with mode: 0644]
queue-5.15/kvm-x86-do-not-return-host-topology-information-from.patch [new file with mode: 0644]
queue-5.15/mm-always-release-pages-to-the-buddy-allocator-in-me.patch [new file with mode: 0644]
queue-5.15/net-mlx5-fix-ptp-max-frequency-adjustment-range.patch [new file with mode: 0644]
queue-5.15/net-mlx5e-don-t-support-encap-rules-with-gbp-option.patch [new file with mode: 0644]
queue-5.15/net-sched-act_mpls-fix-warning-during-failed-attribu.patch [new file with mode: 0644]
queue-5.15/nfc-pn533-wait-for-out_urb-s-completion-in-pn533_usb.patch [new file with mode: 0644]
queue-5.15/octeontx2-af-fix-lmac-config-in-cgx_lmac_rx_tx_enabl.patch [new file with mode: 0644]
queue-5.15/octeontx2-pf-fix-resource-leakage-in-vf-driver-unbin.patch [new file with mode: 0644]
queue-5.15/perf-build-properly-guard-libbpf-includes.patch [new file with mode: 0644]
queue-5.15/platform-surface-aggregator-add-missing-call-to-ssam.patch [new file with mode: 0644]
queue-5.15/regulator-da9211-use-irq-handler-when-ready.patch [new file with mode: 0644]
queue-5.15/scsi-mpi3mr-refer-config_scsi_mpi3mr-in-makefile.patch [new file with mode: 0644]
queue-5.15/scsi-ufs-core-wlun-suspend-ssu-enter-hibern8-fail-re.patch [new file with mode: 0644]
queue-5.15/scsi-ufs-stop-using-the-clock-scaling-lock-in-the-er.patch [new file with mode: 0644]
queue-5.15/series
queue-5.15/tipc-fix-unexpected-link-reset-due-to-discovery-mess.patch [new file with mode: 0644]
queue-5.15/tools-nolibc-arch-mark-the-_start-symbol-as-weak.patch [new file with mode: 0644]
queue-5.15/tools-nolibc-arch-split-arch-specific-code-into-indi.patch [new file with mode: 0644]
queue-5.15/tools-nolibc-fix-the-o_-fcntl-open-macro-definitions.patch [new file with mode: 0644]
queue-5.15/tools-nolibc-remove-.global-_start-from-the-entry-po.patch [new file with mode: 0644]
queue-5.15/tools-nolibc-restore-mips-branch-ordering-in-the-_st.patch [new file with mode: 0644]
queue-5.15/tools-nolibc-std-move-the-standard-type-definitions-.patch [new file with mode: 0644]
queue-5.15/tools-nolibc-types-split-syscall-specific-definition.patch [new file with mode: 0644]
queue-5.15/tools-nolibc-use-pselect6-on-riscv.patch [new file with mode: 0644]
queue-5.15/tools-nolibc-x86-64-use-mov-60-eax-instead-of-mov-60.patch [new file with mode: 0644]
queue-5.15/tools-nolibc-x86-remove-r8-r9-and-r10-from-the-clobb.patch [new file with mode: 0644]

diff --git a/queue-5.15/af_unix-selftest-fix-the-size-of-the-parameter-to-co.patch b/queue-5.15/af_unix-selftest-fix-the-size-of-the-parameter-to-co.patch
new file mode 100644 (file)
index 0000000..026ef57
--- /dev/null
@@ -0,0 +1,58 @@
+From 88dc589c7a16d6678a8a8c7d4838e40e0b66667f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 7 Jan 2023 04:40:20 +0100
+Subject: af_unix: selftest: Fix the size of the parameter to connect()
+
+From: Mirsad Goran Todorovac <mirsad.todorovac@alu.unizg.hr>
+
+[ Upstream commit 7d6ceeb1875cc08dc3d1e558e191434d94840cd5 ]
+
+Adjust size parameter in connect() to match the type of the parameter, to
+fix "No such file or directory" error in selftests/net/af_unix/
+test_oob_unix.c:127.
+
+The existing code happens to work provided that the autogenerated pathname
+is shorter than sizeof (struct sockaddr), which is why it hasn't been
+noticed earlier.
+
+Visible from the trace excerpt:
+
+bind(3, {sa_family=AF_UNIX, sun_path="unix_oob_453059"}, 110) = 0
+clone(child_stack=NULL, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD, child_tidptr=0x7fa6a6577a10) = 453060
+[pid <child>] connect(6, {sa_family=AF_UNIX, sun_path="unix_oob_45305"}, 16) = -1 ENOENT (No such file or directory)
+
+BUG: The filename is trimmed to sizeof (struct sockaddr).
+
+Cc: "David S. Miller" <davem@davemloft.net>
+Cc: Eric Dumazet <edumazet@google.com>
+Cc: Jakub Kicinski <kuba@kernel.org>
+Cc: Paolo Abeni <pabeni@redhat.com>
+Cc: Shuah Khan <shuah@kernel.org>
+Cc: Kuniyuki Iwashima <kuniyu@amazon.co.jp>
+Cc: Florian Westphal <fw@strlen.de>
+Reviewed-by: Florian Westphal <fw@strlen.de>
+Fixes: 314001f0bf92 ("af_unix: Add OOB support")
+Signed-off-by: Mirsad Goran Todorovac <mirsad.todorovac@alu.unizg.hr>
+Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/testing/selftests/net/af_unix/test_unix_oob.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/tools/testing/selftests/net/af_unix/test_unix_oob.c b/tools/testing/selftests/net/af_unix/test_unix_oob.c
+index b57e91e1c3f2..532459a15067 100644
+--- a/tools/testing/selftests/net/af_unix/test_unix_oob.c
++++ b/tools/testing/selftests/net/af_unix/test_unix_oob.c
+@@ -124,7 +124,7 @@ void producer(struct sockaddr_un *consumer_addr)
+       wait_for_signal(pipefd[0]);
+       if (connect(cfd, (struct sockaddr *)consumer_addr,
+-                   sizeof(struct sockaddr)) != 0) {
++                   sizeof(*consumer_addr)) != 0) {
+               perror("Connect failed");
+               kill(0, SIGTERM);
+               exit(1);
+-- 
+2.35.1
+
diff --git a/queue-5.15/alsa-usb-audio-make-sure-to-stop-endpoints-before-cl.patch b/queue-5.15/alsa-usb-audio-make-sure-to-stop-endpoints-before-cl.patch
new file mode 100644 (file)
index 0000000..60ebaf3
--- /dev/null
@@ -0,0 +1,45 @@
+From a1161e9bdc869b9208ba58751197d757ab3b1e39 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 2 Jan 2023 18:07:57 +0100
+Subject: ALSA: usb-audio: Make sure to stop endpoints before closing EPs
+
+From: Takashi Iwai <tiwai@suse.de>
+
+[ Upstream commit 0599313e26666e79f6e7fe1450588431b8cb25d5 ]
+
+At the PCM hw params, we may re-configure the endpoints and it's done
+by a temporary EP close followed by re-open.  A potential problem
+there is that the EP might be already running internally at the PCM
+prepare stage; it's seen typically in the playback stream with the
+implicit feedback sync.  As this stream start isn't tracked by the
+core PCM layer, we'd need to stop it explicitly, and that's the
+missing piece.
+
+This patch adds the stop_endpoints() call at snd_usb_hw_params() to
+assure the stream stop before closing the EPs.
+
+Fixes: bf6313a0ff76 ("ALSA: usb-audio: Refactor endpoint management")
+Link: https://lore.kernel.org/r/4e509aea-e563-e592-e652-ba44af6733fe@veniogames.com
+Link: https://lore.kernel.org/r/20230102170759.29610-2-tiwai@suse.de
+Signed-off-by: Takashi Iwai <tiwai@suse.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ sound/usb/pcm.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/sound/usb/pcm.c b/sound/usb/pcm.c
+index b6cd43c5ea3e..ef0c1baaefde 100644
+--- a/sound/usb/pcm.c
++++ b/sound/usb/pcm.c
+@@ -525,6 +525,8 @@ static int snd_usb_hw_params(struct snd_pcm_substream *substream,
+               if (snd_usb_endpoint_compatible(chip, subs->data_endpoint,
+                                               fmt, hw_params))
+                       goto unlock;
++              if (stop_endpoints(subs, false))
++                      sync_pending_stops(subs);
+               close_endpoints(chip, subs);
+       }
+-- 
+2.35.1
+
diff --git a/queue-5.15/alsa-usb-audio-relax-hw-constraints-for-implicit-fb-.patch b/queue-5.15/alsa-usb-audio-relax-hw-constraints-for-implicit-fb-.patch
new file mode 100644 (file)
index 0000000..0164341
--- /dev/null
@@ -0,0 +1,55 @@
+From 66479c042e90e6dcf70119d61ff69ab09c196669 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 2 Jan 2023 18:07:58 +0100
+Subject: ALSA: usb-audio: Relax hw constraints for implicit fb sync
+
+From: Takashi Iwai <tiwai@suse.de>
+
+[ Upstream commit d463ac1acb454fafed58f695cb3067fbf489f3a0 ]
+
+The fix commit the commit e4ea77f8e53f ("ALSA: usb-audio: Always apply
+the hw constraints for implicit fb sync") tried to address the bug
+where an incorrect PCM parameter is chosen when two (implicit fb)
+streams are set up at the same time.  This change had, however, some
+side effect: once when the sync endpoint is chosen and set up, this
+restriction is applied at the next hw params unless it's freed via hw
+free explicitly.
+
+This patch is a workaround for the problem by relaxing the hw
+constraints a bit for the implicit fb sync.  We still keep applying
+the hw constraints for implicit fb sync, but only when the matching
+sync EP is being used by other streams.
+
+Fixes: e4ea77f8e53f ("ALSA: usb-audio: Always apply the hw constraints for implicit fb sync")
+Reported-by: Ruud van Asseldonk <ruud@veniogames.com>
+Link: https://lore.kernel.org/r/4e509aea-e563-e592-e652-ba44af6733fe@veniogames.com
+Link: https://lore.kernel.org/r/20230102170759.29610-3-tiwai@suse.de
+Signed-off-by: Takashi Iwai <tiwai@suse.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ sound/usb/pcm.c | 9 +++++++--
+ 1 file changed, 7 insertions(+), 2 deletions(-)
+
+diff --git a/sound/usb/pcm.c b/sound/usb/pcm.c
+index ef0c1baaefde..87a30be64324 100644
+--- a/sound/usb/pcm.c
++++ b/sound/usb/pcm.c
+@@ -909,8 +909,13 @@ get_sync_ep_from_substream(struct snd_usb_substream *subs)
+                       continue;
+               /* for the implicit fb, check the sync ep as well */
+               ep = snd_usb_get_endpoint(chip, fp->sync_ep);
+-              if (ep && ep->cur_audiofmt)
+-                      return ep;
++              if (ep && ep->cur_audiofmt) {
++                      /* ditto, if the sync (data) ep is used by others,
++                       * this stream is restricted by the sync ep
++                       */
++                      if (ep != subs->sync_endpoint || ep->opened > 1)
++                              return ep;
++              }
+       }
+       return NULL;
+ }
+-- 
+2.35.1
+
diff --git a/queue-5.15/arm64-atomics-format-whitespace-consistently.patch b/queue-5.15/arm64-atomics-format-whitespace-consistently.patch
new file mode 100644 (file)
index 0000000..d4da7a0
--- /dev/null
@@ -0,0 +1,276 @@
+From f4b01789fdf318743f30f031a4860352f9c13b39 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 10 Dec 2021 15:14:06 +0000
+Subject: arm64: atomics: format whitespace consistently
+
+From: Mark Rutland <mark.rutland@arm.com>
+
+[ Upstream commit 8e6082e94aac6d0338883b5953631b662a5a9188 ]
+
+The code for the atomic ops is formatted inconsistently, and while this
+is not a functional problem it is rather distracting when working on
+them.
+
+Some have ops have consistent indentation, e.g.
+
+| #define ATOMIC_OP_ADD_RETURN(name, mb, cl...)                           \
+| static inline int __lse_atomic_add_return##name(int i, atomic_t *v)     \
+| {                                                                       \
+|         u32 tmp;                                                        \
+|                                                                         \
+|         asm volatile(                                                   \
+|         __LSE_PREAMBLE                                                  \
+|         "       ldadd" #mb "    %w[i], %w[tmp], %[v]\n"                 \
+|         "       add     %w[i], %w[i], %w[tmp]"                          \
+|         : [i] "+r" (i), [v] "+Q" (v->counter), [tmp] "=&r" (tmp)        \
+|         : "r" (v)                                                       \
+|         : cl);                                                          \
+|                                                                         \
+|         return i;                                                       \
+| }
+
+While others have negative indentation for some lines, and/or have
+misaligned trailing backslashes, e.g.
+
+| static inline void __lse_atomic_##op(int i, atomic_t *v)                        \
+| {                                                                       \
+|         asm volatile(                                                   \
+|         __LSE_PREAMBLE                                                  \
+| "       " #asm_op "     %w[i], %[v]\n"                                  \
+|         : [i] "+r" (i), [v] "+Q" (v->counter)                           \
+|         : "r" (v));                                                     \
+| }
+
+This patch makes the indentation consistent and also aligns the trailing
+backslashes. This makes the code easier to read for those (like myself)
+who are easily distracted by these inconsistencies.
+
+This is intended as a cleanup.
+There should be no functional change as a result of this patch.
+
+Signed-off-by: Mark Rutland <mark.rutland@arm.com>
+Cc: Boqun Feng <boqun.feng@gmail.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Will Deacon <will@kernel.org>
+Acked-by: Will Deacon <will@kernel.org>
+Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Link: https://lore.kernel.org/r/20211210151410.2782645-2-mark.rutland@arm.com
+Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
+Stable-dep-of: 031af50045ea ("arm64: cmpxchg_double*: hazard against entire exchange variable")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/arm64/include/asm/atomic_ll_sc.h | 86 +++++++++++++--------------
+ arch/arm64/include/asm/atomic_lse.h   | 14 ++---
+ 2 files changed, 50 insertions(+), 50 deletions(-)
+
+diff --git a/arch/arm64/include/asm/atomic_ll_sc.h b/arch/arm64/include/asm/atomic_ll_sc.h
+index 13869b76b58c..fe0db8d416fb 100644
+--- a/arch/arm64/include/asm/atomic_ll_sc.h
++++ b/arch/arm64/include/asm/atomic_ll_sc.h
+@@ -44,11 +44,11 @@ __ll_sc_atomic_##op(int i, atomic_t *v)                                    \
+                                                                       \
+       asm volatile("// atomic_" #op "\n"                              \
+       __LL_SC_FALLBACK(                                               \
+-"     prfm    pstl1strm, %2\n"                                        \
+-"1:   ldxr    %w0, %2\n"                                              \
+-"     " #asm_op "     %w0, %w0, %w3\n"                                \
+-"     stxr    %w1, %w0, %2\n"                                         \
+-"     cbnz    %w1, 1b\n")                                             \
++      "       prfm    pstl1strm, %2\n"                                \
++      "1:     ldxr    %w0, %2\n"                                      \
++      "       " #asm_op "     %w0, %w0, %w3\n"                        \
++      "       stxr    %w1, %w0, %2\n"                                 \
++      "       cbnz    %w1, 1b\n")                                     \
+       : "=&r" (result), "=&r" (tmp), "+Q" (v->counter)                \
+       : __stringify(constraint) "r" (i));                             \
+ }
+@@ -62,12 +62,12 @@ __ll_sc_atomic_##op##_return##name(int i, atomic_t *v)                     \
+                                                                       \
+       asm volatile("// atomic_" #op "_return" #name "\n"              \
+       __LL_SC_FALLBACK(                                               \
+-"     prfm    pstl1strm, %2\n"                                        \
+-"1:   ld" #acq "xr    %w0, %2\n"                                      \
+-"     " #asm_op "     %w0, %w0, %w3\n"                                \
+-"     st" #rel "xr    %w1, %w0, %2\n"                                 \
+-"     cbnz    %w1, 1b\n"                                              \
+-"     " #mb )                                                         \
++      "       prfm    pstl1strm, %2\n"                                \
++      "1:     ld" #acq "xr    %w0, %2\n"                              \
++      "       " #asm_op "     %w0, %w0, %w3\n"                        \
++      "       st" #rel "xr    %w1, %w0, %2\n"                         \
++      "       cbnz    %w1, 1b\n"                                      \
++      "       " #mb )                                                 \
+       : "=&r" (result), "=&r" (tmp), "+Q" (v->counter)                \
+       : __stringify(constraint) "r" (i)                               \
+       : cl);                                                          \
+@@ -84,12 +84,12 @@ __ll_sc_atomic_fetch_##op##name(int i, atomic_t *v)                        \
+                                                                       \
+       asm volatile("// atomic_fetch_" #op #name "\n"                  \
+       __LL_SC_FALLBACK(                                               \
+-"     prfm    pstl1strm, %3\n"                                        \
+-"1:   ld" #acq "xr    %w0, %3\n"                                      \
+-"     " #asm_op "     %w1, %w0, %w4\n"                                \
+-"     st" #rel "xr    %w2, %w1, %3\n"                                 \
+-"     cbnz    %w2, 1b\n"                                              \
+-"     " #mb )                                                         \
++      "       prfm    pstl1strm, %3\n"                                \
++      "1:     ld" #acq "xr    %w0, %3\n"                              \
++      "       " #asm_op "     %w1, %w0, %w4\n"                        \
++      "       st" #rel "xr    %w2, %w1, %3\n"                         \
++      "       cbnz    %w2, 1b\n"                                      \
++      "       " #mb )                                                 \
+       : "=&r" (result), "=&r" (val), "=&r" (tmp), "+Q" (v->counter)   \
+       : __stringify(constraint) "r" (i)                               \
+       : cl);                                                          \
+@@ -143,11 +143,11 @@ __ll_sc_atomic64_##op(s64 i, atomic64_t *v)                              \
+                                                                       \
+       asm volatile("// atomic64_" #op "\n"                            \
+       __LL_SC_FALLBACK(                                               \
+-"     prfm    pstl1strm, %2\n"                                        \
+-"1:   ldxr    %0, %2\n"                                               \
+-"     " #asm_op "     %0, %0, %3\n"                                   \
+-"     stxr    %w1, %0, %2\n"                                          \
+-"     cbnz    %w1, 1b")                                               \
++      "       prfm    pstl1strm, %2\n"                                \
++      "1:     ldxr    %0, %2\n"                                       \
++      "       " #asm_op "     %0, %0, %3\n"                           \
++      "       stxr    %w1, %0, %2\n"                                  \
++      "       cbnz    %w1, 1b")                                       \
+       : "=&r" (result), "=&r" (tmp), "+Q" (v->counter)                \
+       : __stringify(constraint) "r" (i));                             \
+ }
+@@ -161,12 +161,12 @@ __ll_sc_atomic64_##op##_return##name(s64 i, atomic64_t *v)               \
+                                                                       \
+       asm volatile("// atomic64_" #op "_return" #name "\n"            \
+       __LL_SC_FALLBACK(                                               \
+-"     prfm    pstl1strm, %2\n"                                        \
+-"1:   ld" #acq "xr    %0, %2\n"                                       \
+-"     " #asm_op "     %0, %0, %3\n"                                   \
+-"     st" #rel "xr    %w1, %0, %2\n"                                  \
+-"     cbnz    %w1, 1b\n"                                              \
+-"     " #mb )                                                         \
++      "       prfm    pstl1strm, %2\n"                                \
++      "1:     ld" #acq "xr    %0, %2\n"                               \
++      "       " #asm_op "     %0, %0, %3\n"                           \
++      "       st" #rel "xr    %w1, %0, %2\n"                          \
++      "       cbnz    %w1, 1b\n"                                      \
++      "       " #mb )                                                 \
+       : "=&r" (result), "=&r" (tmp), "+Q" (v->counter)                \
+       : __stringify(constraint) "r" (i)                               \
+       : cl);                                                          \
+@@ -176,19 +176,19 @@ __ll_sc_atomic64_##op##_return##name(s64 i, atomic64_t *v)               \
+ #define ATOMIC64_FETCH_OP(name, mb, acq, rel, cl, op, asm_op, constraint)\
+ static inline long                                                    \
+-__ll_sc_atomic64_fetch_##op##name(s64 i, atomic64_t *v)               \
++__ll_sc_atomic64_fetch_##op##name(s64 i, atomic64_t *v)                       \
+ {                                                                     \
+       s64 result, val;                                                \
+       unsigned long tmp;                                              \
+                                                                       \
+       asm volatile("// atomic64_fetch_" #op #name "\n"                \
+       __LL_SC_FALLBACK(                                               \
+-"     prfm    pstl1strm, %3\n"                                        \
+-"1:   ld" #acq "xr    %0, %3\n"                                       \
+-"     " #asm_op "     %1, %0, %4\n"                                   \
+-"     st" #rel "xr    %w2, %1, %3\n"                                  \
+-"     cbnz    %w2, 1b\n"                                              \
+-"     " #mb )                                                         \
++      "       prfm    pstl1strm, %3\n"                                \
++      "1:     ld" #acq "xr    %0, %3\n"                               \
++      "       " #asm_op "     %1, %0, %4\n"                           \
++      "       st" #rel "xr    %w2, %1, %3\n"                          \
++      "       cbnz    %w2, 1b\n"                                      \
++      "       " #mb )                                                 \
+       : "=&r" (result), "=&r" (val), "=&r" (tmp), "+Q" (v->counter)   \
+       : __stringify(constraint) "r" (i)                               \
+       : cl);                                                          \
+@@ -241,14 +241,14 @@ __ll_sc_atomic64_dec_if_positive(atomic64_t *v)
+       asm volatile("// atomic64_dec_if_positive\n"
+       __LL_SC_FALLBACK(
+-"     prfm    pstl1strm, %2\n"
+-"1:   ldxr    %0, %2\n"
+-"     subs    %0, %0, #1\n"
+-"     b.lt    2f\n"
+-"     stlxr   %w1, %0, %2\n"
+-"     cbnz    %w1, 1b\n"
+-"     dmb     ish\n"
+-"2:")
++      "       prfm    pstl1strm, %2\n"
++      "1:     ldxr    %0, %2\n"
++      "       subs    %0, %0, #1\n"
++      "       b.lt    2f\n"
++      "       stlxr   %w1, %0, %2\n"
++      "       cbnz    %w1, 1b\n"
++      "       dmb     ish\n"
++      "2:")
+       : "=&r" (result), "=&r" (tmp), "+Q" (v->counter)
+       :
+       : "cc", "memory");
+diff --git a/arch/arm64/include/asm/atomic_lse.h b/arch/arm64/include/asm/atomic_lse.h
+index da3280f639cd..ab661375835e 100644
+--- a/arch/arm64/include/asm/atomic_lse.h
++++ b/arch/arm64/include/asm/atomic_lse.h
+@@ -11,11 +11,11 @@
+ #define __ASM_ATOMIC_LSE_H
+ #define ATOMIC_OP(op, asm_op)                                         \
+-static inline void __lse_atomic_##op(int i, atomic_t *v)                      \
++static inline void __lse_atomic_##op(int i, atomic_t *v)              \
+ {                                                                     \
+       asm volatile(                                                   \
+       __LSE_PREAMBLE                                                  \
+-"     " #asm_op "     %w[i], %[v]\n"                                  \
++      "       " #asm_op "     %w[i], %[v]\n"                          \
+       : [i] "+r" (i), [v] "+Q" (v->counter)                           \
+       : "r" (v));                                                     \
+ }
+@@ -32,7 +32,7 @@ static inline int __lse_atomic_fetch_##op##name(int i, atomic_t *v)  \
+ {                                                                     \
+       asm volatile(                                                   \
+       __LSE_PREAMBLE                                                  \
+-"     " #asm_op #mb " %w[i], %w[i], %[v]"                             \
++      "       " #asm_op #mb " %w[i], %w[i], %[v]"                     \
+       : [i] "+r" (i), [v] "+Q" (v->counter)                           \
+       : "r" (v)                                                       \
+       : cl);                                                          \
+@@ -130,7 +130,7 @@ static inline int __lse_atomic_sub_return##name(int i, atomic_t *v)        \
+       "       add     %w[i], %w[i], %w[tmp]"                          \
+       : [i] "+&r" (i), [v] "+Q" (v->counter), [tmp] "=&r" (tmp)       \
+       : "r" (v)                                                       \
+-      : cl);                                                  \
++      : cl);                                                          \
+                                                                       \
+       return i;                                                       \
+ }
+@@ -168,7 +168,7 @@ static inline void __lse_atomic64_##op(s64 i, atomic64_t *v)               \
+ {                                                                     \
+       asm volatile(                                                   \
+       __LSE_PREAMBLE                                                  \
+-"     " #asm_op "     %[i], %[v]\n"                                   \
++      "       " #asm_op "     %[i], %[v]\n"                           \
+       : [i] "+r" (i), [v] "+Q" (v->counter)                           \
+       : "r" (v));                                                     \
+ }
+@@ -185,7 +185,7 @@ static inline long __lse_atomic64_fetch_##op##name(s64 i, atomic64_t *v)\
+ {                                                                     \
+       asm volatile(                                                   \
+       __LSE_PREAMBLE                                                  \
+-"     " #asm_op #mb " %[i], %[i], %[v]"                               \
++      "       " #asm_op #mb " %[i], %[i], %[v]"                       \
+       : [i] "+r" (i), [v] "+Q" (v->counter)                           \
+       : "r" (v)                                                       \
+       : cl);                                                          \
+@@ -272,7 +272,7 @@ static inline void __lse_atomic64_sub(s64 i, atomic64_t *v)
+ }
+ #define ATOMIC64_OP_SUB_RETURN(name, mb, cl...)                               \
+-static inline long __lse_atomic64_sub_return##name(s64 i, atomic64_t *v)      \
++static inline long __lse_atomic64_sub_return##name(s64 i, atomic64_t *v)\
+ {                                                                     \
+       unsigned long tmp;                                              \
+                                                                       \
+-- 
+2.35.1
+
diff --git a/queue-5.15/arm64-atomics-remove-ll-sc-trampolines.patch b/queue-5.15/arm64-atomics-remove-ll-sc-trampolines.patch
new file mode 100644 (file)
index 0000000..8d246ca
--- /dev/null
@@ -0,0 +1,274 @@
+From ce4cd975cf8c2dd5e5f258ad5a84f88473b7271e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 17 Aug 2022 16:59:13 +0100
+Subject: arm64: atomics: remove LL/SC trampolines
+
+From: Mark Rutland <mark.rutland@arm.com>
+
+[ Upstream commit b2c3ccbd0011bb3b51d0fec24cb3a5812b1ec8ea ]
+
+When CONFIG_ARM64_LSE_ATOMICS=y, each use of an LL/SC atomic results in
+a fragment of code being generated in a subsection without a clear
+association with its caller. A trampoline in the caller branches to the
+LL/SC atomic with with a direct branch, and the atomic directly branches
+back into its trampoline.
+
+This breaks backtracing, as any PC within the out-of-line fragment will
+be symbolized as an offset from the nearest prior symbol (which may not
+be the function using the atomic), and since the atomic returns with a
+direct branch, the caller's PC may be missing from the backtrace.
+
+For example, with secondary_start_kernel() hacked to contain
+atomic_inc(NULL), the resulting exception can be reported as being taken
+from cpus_are_stuck_in_kernel():
+
+| Unable to handle kernel NULL pointer dereference at virtual address 0000000000000000
+| Mem abort info:
+|   ESR = 0x0000000096000004
+|   EC = 0x25: DABT (current EL), IL = 32 bits
+|   SET = 0, FnV = 0
+|   EA = 0, S1PTW = 0
+|   FSC = 0x04: level 0 translation fault
+| Data abort info:
+|   ISV = 0, ISS = 0x00000004
+|   CM = 0, WnR = 0
+| [0000000000000000] user address but active_mm is swapper
+| Internal error: Oops: 96000004 [#1] PREEMPT SMP
+| Modules linked in:
+| CPU: 1 PID: 0 Comm: swapper/1 Not tainted 5.19.0-11219-geb555cb5b794-dirty #3
+| Hardware name: linux,dummy-virt (DT)
+| pstate: 60000005 (nZCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--)
+| pc : cpus_are_stuck_in_kernel+0xa4/0x120
+| lr : secondary_start_kernel+0x164/0x170
+| sp : ffff80000a4cbe90
+| x29: ffff80000a4cbe90 x28: 0000000000000000 x27: 0000000000000000
+| x26: 0000000000000000 x25: 0000000000000000 x24: 0000000000000000
+| x23: 0000000000000000 x22: 0000000000000000 x21: 0000000000000000
+| x20: 0000000000000001 x19: 0000000000000001 x18: 0000000000000008
+| x17: 3030383832343030 x16: 3030303030307830 x15: ffff80000a4cbab0
+| x14: 0000000000000001 x13: 5d31666130663133 x12: 3478305b20313030
+| x11: 3030303030303078 x10: 3020726f73736563 x9 : 726f737365636f72
+| x8 : ffff800009ff2ef0 x7 : 0000000000000003 x6 : 0000000000000000
+| x5 : 0000000000000000 x4 : 0000000000000000 x3 : 0000000000000100
+| x2 : 0000000000000000 x1 : ffff0000029bd880 x0 : 0000000000000000
+| Call trace:
+|  cpus_are_stuck_in_kernel+0xa4/0x120
+|  __secondary_switched+0xb0/0xb4
+| Code: 35ffffa3 17fffc6c d53cd040 f9800011 (885f7c01)
+| ---[ end trace 0000000000000000 ]---
+
+This is confusing and hinders debugging, and will be problematic for
+CONFIG_LIVEPATCH as these cases cannot be unwound reliably.
+
+This is very similar to recent issues with out-of-line exception fixups,
+which were removed in commits:
+
+  35d67794b8828333 ("arm64: lib: __arch_clear_user(): fold fixups into body")
+  4012e0e22739eef9 ("arm64: lib: __arch_copy_from_user(): fold fixups into body")
+  139f9ab73d60cf76 ("arm64: lib: __arch_copy_to_user(): fold fixups into body")
+
+When the trampolines were introduced in commit:
+
+  addfc38672c73efd ("arm64: atomics: avoid out-of-line ll/sc atomics")
+
+The rationale was to improve icache performance by grouping the LL/SC
+atomics together. This has never been measured, and this theoretical
+benefit is outweighed by other factors:
+
+* As the subsections are collapsed into sections at object file
+  granularity, these are spread out throughout the kernel and can share
+  cachelines with unrelated code regardless.
+
+* GCC 12.1.0 has been observed to place the trampoline out-of-line in
+  specialised __ll_sc_*() functions, introducing more branching than was
+  intended.
+
+* Removing the trampolines has been observed to shrink a defconfig
+  kernel Image by 64KiB when building with GCC 12.1.0.
+
+This patch removes the LL/SC trampolines, meaning that the LL/SC atomics
+will be inlined into their callers (or placed in out-of line functions
+using regular BL/RET pairs). When CONFIG_ARM64_LSE_ATOMICS=y, the LL/SC
+atomics are always called in an unlikely branch, and will be placed in a
+cold portion of the function, so this should have minimal impact to the
+hot paths.
+
+Other than the improved backtracing, there should be no functional
+change as a result of this patch.
+
+Signed-off-by: Mark Rutland <mark.rutland@arm.com>
+Cc: Will Deacon <will@kernel.org>
+Link: https://lore.kernel.org/r/20220817155914.3975112-2-mark.rutland@arm.com
+Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
+Stable-dep-of: 031af50045ea ("arm64: cmpxchg_double*: hazard against entire exchange variable")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/arm64/include/asm/atomic_ll_sc.h | 40 ++++++---------------------
+ 1 file changed, 9 insertions(+), 31 deletions(-)
+
+diff --git a/arch/arm64/include/asm/atomic_ll_sc.h b/arch/arm64/include/asm/atomic_ll_sc.h
+index fe0db8d416fb..906e2d8c254c 100644
+--- a/arch/arm64/include/asm/atomic_ll_sc.h
++++ b/arch/arm64/include/asm/atomic_ll_sc.h
+@@ -12,19 +12,6 @@
+ #include <linux/stringify.h>
+-#ifdef CONFIG_ARM64_LSE_ATOMICS
+-#define __LL_SC_FALLBACK(asm_ops)                                     \
+-"     b       3f\n"                                                   \
+-"     .subsection     1\n"                                            \
+-"3:\n"                                                                        \
+-asm_ops "\n"                                                          \
+-"     b       4f\n"                                                   \
+-"     .previous\n"                                                    \
+-"4:\n"
+-#else
+-#define __LL_SC_FALLBACK(asm_ops) asm_ops
+-#endif
+-
+ #ifndef CONFIG_CC_HAS_K_CONSTRAINT
+ #define K
+ #endif
+@@ -43,12 +30,11 @@ __ll_sc_atomic_##op(int i, atomic_t *v)                                    \
+       int result;                                                     \
+                                                                       \
+       asm volatile("// atomic_" #op "\n"                              \
+-      __LL_SC_FALLBACK(                                               \
+       "       prfm    pstl1strm, %2\n"                                \
+       "1:     ldxr    %w0, %2\n"                                      \
+       "       " #asm_op "     %w0, %w0, %w3\n"                        \
+       "       stxr    %w1, %w0, %2\n"                                 \
+-      "       cbnz    %w1, 1b\n")                                     \
++      "       cbnz    %w1, 1b\n"                                      \
+       : "=&r" (result), "=&r" (tmp), "+Q" (v->counter)                \
+       : __stringify(constraint) "r" (i));                             \
+ }
+@@ -61,13 +47,12 @@ __ll_sc_atomic_##op##_return##name(int i, atomic_t *v)                     \
+       int result;                                                     \
+                                                                       \
+       asm volatile("// atomic_" #op "_return" #name "\n"              \
+-      __LL_SC_FALLBACK(                                               \
+       "       prfm    pstl1strm, %2\n"                                \
+       "1:     ld" #acq "xr    %w0, %2\n"                              \
+       "       " #asm_op "     %w0, %w0, %w3\n"                        \
+       "       st" #rel "xr    %w1, %w0, %2\n"                         \
+       "       cbnz    %w1, 1b\n"                                      \
+-      "       " #mb )                                                 \
++      "       " #mb                                                   \
+       : "=&r" (result), "=&r" (tmp), "+Q" (v->counter)                \
+       : __stringify(constraint) "r" (i)                               \
+       : cl);                                                          \
+@@ -83,13 +68,12 @@ __ll_sc_atomic_fetch_##op##name(int i, atomic_t *v)                        \
+       int val, result;                                                \
+                                                                       \
+       asm volatile("// atomic_fetch_" #op #name "\n"                  \
+-      __LL_SC_FALLBACK(                                               \
+       "       prfm    pstl1strm, %3\n"                                \
+       "1:     ld" #acq "xr    %w0, %3\n"                              \
+       "       " #asm_op "     %w1, %w0, %w4\n"                        \
+       "       st" #rel "xr    %w2, %w1, %3\n"                         \
+       "       cbnz    %w2, 1b\n"                                      \
+-      "       " #mb )                                                 \
++      "       " #mb                                                   \
+       : "=&r" (result), "=&r" (val), "=&r" (tmp), "+Q" (v->counter)   \
+       : __stringify(constraint) "r" (i)                               \
+       : cl);                                                          \
+@@ -142,12 +126,11 @@ __ll_sc_atomic64_##op(s64 i, atomic64_t *v)                              \
+       unsigned long tmp;                                              \
+                                                                       \
+       asm volatile("// atomic64_" #op "\n"                            \
+-      __LL_SC_FALLBACK(                                               \
+       "       prfm    pstl1strm, %2\n"                                \
+       "1:     ldxr    %0, %2\n"                                       \
+       "       " #asm_op "     %0, %0, %3\n"                           \
+       "       stxr    %w1, %0, %2\n"                                  \
+-      "       cbnz    %w1, 1b")                                       \
++      "       cbnz    %w1, 1b"                                        \
+       : "=&r" (result), "=&r" (tmp), "+Q" (v->counter)                \
+       : __stringify(constraint) "r" (i));                             \
+ }
+@@ -160,13 +143,12 @@ __ll_sc_atomic64_##op##_return##name(s64 i, atomic64_t *v)               \
+       unsigned long tmp;                                              \
+                                                                       \
+       asm volatile("// atomic64_" #op "_return" #name "\n"            \
+-      __LL_SC_FALLBACK(                                               \
+       "       prfm    pstl1strm, %2\n"                                \
+       "1:     ld" #acq "xr    %0, %2\n"                               \
+       "       " #asm_op "     %0, %0, %3\n"                           \
+       "       st" #rel "xr    %w1, %0, %2\n"                          \
+       "       cbnz    %w1, 1b\n"                                      \
+-      "       " #mb )                                                 \
++      "       " #mb                                                   \
+       : "=&r" (result), "=&r" (tmp), "+Q" (v->counter)                \
+       : __stringify(constraint) "r" (i)                               \
+       : cl);                                                          \
+@@ -182,13 +164,12 @@ __ll_sc_atomic64_fetch_##op##name(s64 i, atomic64_t *v)                  \
+       unsigned long tmp;                                              \
+                                                                       \
+       asm volatile("// atomic64_fetch_" #op #name "\n"                \
+-      __LL_SC_FALLBACK(                                               \
+       "       prfm    pstl1strm, %3\n"                                \
+       "1:     ld" #acq "xr    %0, %3\n"                               \
+       "       " #asm_op "     %1, %0, %4\n"                           \
+       "       st" #rel "xr    %w2, %1, %3\n"                          \
+       "       cbnz    %w2, 1b\n"                                      \
+-      "       " #mb )                                                 \
++      "       " #mb                                                   \
+       : "=&r" (result), "=&r" (val), "=&r" (tmp), "+Q" (v->counter)   \
+       : __stringify(constraint) "r" (i)                               \
+       : cl);                                                          \
+@@ -240,7 +221,6 @@ __ll_sc_atomic64_dec_if_positive(atomic64_t *v)
+       unsigned long tmp;
+       asm volatile("// atomic64_dec_if_positive\n"
+-      __LL_SC_FALLBACK(
+       "       prfm    pstl1strm, %2\n"
+       "1:     ldxr    %0, %2\n"
+       "       subs    %0, %0, #1\n"
+@@ -248,7 +228,7 @@ __ll_sc_atomic64_dec_if_positive(atomic64_t *v)
+       "       stlxr   %w1, %0, %2\n"
+       "       cbnz    %w1, 1b\n"
+       "       dmb     ish\n"
+-      "2:")
++      "2:"
+       : "=&r" (result), "=&r" (tmp), "+Q" (v->counter)
+       :
+       : "cc", "memory");
+@@ -274,7 +254,6 @@ __ll_sc__cmpxchg_case_##name##sz(volatile void *ptr,                       \
+               old = (u##sz)old;                                       \
+                                                                       \
+       asm volatile(                                                   \
+-      __LL_SC_FALLBACK(                                               \
+       "       prfm    pstl1strm, %[v]\n"                              \
+       "1:     ld" #acq "xr" #sfx "\t%" #w "[oldval], %[v]\n"          \
+       "       eor     %" #w "[tmp], %" #w "[oldval], %" #w "[old]\n"  \
+@@ -282,7 +261,7 @@ __ll_sc__cmpxchg_case_##name##sz(volatile void *ptr,                       \
+       "       st" #rel "xr" #sfx "\t%w[tmp], %" #w "[new], %[v]\n"    \
+       "       cbnz    %w[tmp], 1b\n"                                  \
+       "       " #mb "\n"                                              \
+-      "2:")                                                           \
++      "2:"                                                            \
+       : [tmp] "=&r" (tmp), [oldval] "=&r" (oldval),                   \
+         [v] "+Q" (*(u##sz *)ptr)                                      \
+       : [old] __stringify(constraint) "r" (old), [new] "r" (new)      \
+@@ -326,7 +305,6 @@ __ll_sc__cmpxchg_double##name(unsigned long old1,                  \
+       unsigned long tmp, ret;                                         \
+                                                                       \
+       asm volatile("// __cmpxchg_double" #name "\n"                   \
+-      __LL_SC_FALLBACK(                                               \
+       "       prfm    pstl1strm, %2\n"                                \
+       "1:     ldxp    %0, %1, %2\n"                                   \
+       "       eor     %0, %0, %3\n"                                   \
+@@ -336,7 +314,7 @@ __ll_sc__cmpxchg_double##name(unsigned long old1,                  \
+       "       st" #rel "xp    %w0, %5, %6, %2\n"                      \
+       "       cbnz    %w0, 1b\n"                                      \
+       "       " #mb "\n"                                              \
+-      "2:")                                                           \
++      "2:"                                                            \
+       : "=&r" (tmp), "=&r" (ret), "+Q" (*(unsigned long *)ptr)        \
+       : "r" (old1), "r" (old2), "r" (new1), "r" (new2)                \
+       : cl);                                                          \
+-- 
+2.35.1
+
diff --git a/queue-5.15/arm64-cmpxchg_double-hazard-against-entire-exchange-.patch b/queue-5.15/arm64-cmpxchg_double-hazard-against-entire-exchange-.patch
new file mode 100644 (file)
index 0000000..09b6991
--- /dev/null
@@ -0,0 +1,185 @@
+From 99402850c3b026a25b9fcaa328aa4f154c60b1fb Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 4 Jan 2023 15:16:26 +0000
+Subject: arm64: cmpxchg_double*: hazard against entire exchange variable
+
+From: Mark Rutland <mark.rutland@arm.com>
+
+[ Upstream commit 031af50045ea97ed4386eb3751ca2c134d0fc911 ]
+
+The inline assembly for arm64's cmpxchg_double*() implementations use a
++Q constraint to hazard against other accesses to the memory location
+being exchanged. However, the pointer passed to the constraint is a
+pointer to unsigned long, and thus the hazard only applies to the first
+8 bytes of the location.
+
+GCC can take advantage of this, assuming that other portions of the
+location are unchanged, leading to a number of potential problems.
+
+This is similar to what we fixed back in commit:
+
+  fee960bed5e857eb ("arm64: xchg: hazard against entire exchange variable")
+
+... but we forgot to adjust cmpxchg_double*() similarly at the same
+time.
+
+The same problem applies, as demonstrated with the following test:
+
+| struct big {
+|         u64 lo, hi;
+| } __aligned(128);
+|
+| unsigned long foo(struct big *b)
+| {
+|         u64 hi_old, hi_new;
+|
+|         hi_old = b->hi;
+|         cmpxchg_double_local(&b->lo, &b->hi, 0x12, 0x34, 0x56, 0x78);
+|         hi_new = b->hi;
+|
+|         return hi_old ^ hi_new;
+| }
+
+... which GCC 12.1.0 compiles as:
+
+| 0000000000000000 <foo>:
+|    0:   d503233f        paciasp
+|    4:   aa0003e4        mov     x4, x0
+|    8:   1400000e        b       40 <foo+0x40>
+|    c:   d2800240        mov     x0, #0x12                       // #18
+|   10:   d2800681        mov     x1, #0x34                       // #52
+|   14:   aa0003e5        mov     x5, x0
+|   18:   aa0103e6        mov     x6, x1
+|   1c:   d2800ac2        mov     x2, #0x56                       // #86
+|   20:   d2800f03        mov     x3, #0x78                       // #120
+|   24:   48207c82        casp    x0, x1, x2, x3, [x4]
+|   28:   ca050000        eor     x0, x0, x5
+|   2c:   ca060021        eor     x1, x1, x6
+|   30:   aa010000        orr     x0, x0, x1
+|   34:   d2800000        mov     x0, #0x0                        // #0    <--- BANG
+|   38:   d50323bf        autiasp
+|   3c:   d65f03c0        ret
+|   40:   d2800240        mov     x0, #0x12                       // #18
+|   44:   d2800681        mov     x1, #0x34                       // #52
+|   48:   d2800ac2        mov     x2, #0x56                       // #86
+|   4c:   d2800f03        mov     x3, #0x78                       // #120
+|   50:   f9800091        prfm    pstl1strm, [x4]
+|   54:   c87f1885        ldxp    x5, x6, [x4]
+|   58:   ca0000a5        eor     x5, x5, x0
+|   5c:   ca0100c6        eor     x6, x6, x1
+|   60:   aa0600a6        orr     x6, x5, x6
+|   64:   b5000066        cbnz    x6, 70 <foo+0x70>
+|   68:   c8250c82        stxp    w5, x2, x3, [x4]
+|   6c:   35ffff45        cbnz    w5, 54 <foo+0x54>
+|   70:   d2800000        mov     x0, #0x0                        // #0     <--- BANG
+|   74:   d50323bf        autiasp
+|   78:   d65f03c0        ret
+
+Notice that at the lines with "BANG" comments, GCC has assumed that the
+higher 8 bytes are unchanged by the cmpxchg_double() call, and that
+`hi_old ^ hi_new` can be reduced to a constant zero, for both LSE and
+LL/SC versions of cmpxchg_double().
+
+This patch fixes the issue by passing a pointer to __uint128_t into the
++Q constraint, ensuring that the compiler hazards against the entire 16
+bytes being modified.
+
+With this change, GCC 12.1.0 compiles the above test as:
+
+| 0000000000000000 <foo>:
+|    0:   f9400407        ldr     x7, [x0, #8]
+|    4:   d503233f        paciasp
+|    8:   aa0003e4        mov     x4, x0
+|    c:   1400000f        b       48 <foo+0x48>
+|   10:   d2800240        mov     x0, #0x12                       // #18
+|   14:   d2800681        mov     x1, #0x34                       // #52
+|   18:   aa0003e5        mov     x5, x0
+|   1c:   aa0103e6        mov     x6, x1
+|   20:   d2800ac2        mov     x2, #0x56                       // #86
+|   24:   d2800f03        mov     x3, #0x78                       // #120
+|   28:   48207c82        casp    x0, x1, x2, x3, [x4]
+|   2c:   ca050000        eor     x0, x0, x5
+|   30:   ca060021        eor     x1, x1, x6
+|   34:   aa010000        orr     x0, x0, x1
+|   38:   f9400480        ldr     x0, [x4, #8]
+|   3c:   d50323bf        autiasp
+|   40:   ca0000e0        eor     x0, x7, x0
+|   44:   d65f03c0        ret
+|   48:   d2800240        mov     x0, #0x12                       // #18
+|   4c:   d2800681        mov     x1, #0x34                       // #52
+|   50:   d2800ac2        mov     x2, #0x56                       // #86
+|   54:   d2800f03        mov     x3, #0x78                       // #120
+|   58:   f9800091        prfm    pstl1strm, [x4]
+|   5c:   c87f1885        ldxp    x5, x6, [x4]
+|   60:   ca0000a5        eor     x5, x5, x0
+|   64:   ca0100c6        eor     x6, x6, x1
+|   68:   aa0600a6        orr     x6, x5, x6
+|   6c:   b5000066        cbnz    x6, 78 <foo+0x78>
+|   70:   c8250c82        stxp    w5, x2, x3, [x4]
+|   74:   35ffff45        cbnz    w5, 5c <foo+0x5c>
+|   78:   f9400480        ldr     x0, [x4, #8]
+|   7c:   d50323bf        autiasp
+|   80:   ca0000e0        eor     x0, x7, x0
+|   84:   d65f03c0        ret
+
+... sampling the high 8 bytes before and after the cmpxchg, and
+performing an EOR, as we'd expect.
+
+For backporting, I've tested this atop linux-4.9.y with GCC 5.5.0. Note
+that linux-4.9.y is oldest currently supported stable release, and
+mandates GCC 5.1+. Unfortunately I couldn't get a GCC 5.1 binary to run
+on my machines due to library incompatibilities.
+
+I've also used a standalone test to check that we can use a __uint128_t
+pointer in a +Q constraint at least as far back as GCC 4.8.5 and LLVM
+3.9.1.
+
+Fixes: 5284e1b4bc8a ("arm64: xchg: Implement cmpxchg_double")
+Fixes: e9a4b795652f ("arm64: cmpxchg_dbl: patch in lse instructions when supported by the CPU")
+Reported-by: Boqun Feng <boqun.feng@gmail.com>
+Link: https://lore.kernel.org/lkml/Y6DEfQXymYVgL3oJ@boqun-archlinux/
+Reported-by: Peter Zijlstra <peterz@infradead.org>
+Link: https://lore.kernel.org/lkml/Y6GXoO4qmH9OIZ5Q@hirez.programming.kicks-ass.net/
+Signed-off-by: Mark Rutland <mark.rutland@arm.com>
+Cc: stable@vger.kernel.org
+Cc: Arnd Bergmann <arnd@arndb.de>
+Cc: Catalin Marinas <catalin.marinas@arm.com>
+Cc: Steve Capper <steve.capper@arm.com>
+Cc: Will Deacon <will@kernel.org>
+Link: https://lore.kernel.org/r/20230104151626.3262137-1-mark.rutland@arm.com
+Signed-off-by: Will Deacon <will@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/arm64/include/asm/atomic_ll_sc.h | 2 +-
+ arch/arm64/include/asm/atomic_lse.h   | 2 +-
+ 2 files changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/arch/arm64/include/asm/atomic_ll_sc.h b/arch/arm64/include/asm/atomic_ll_sc.h
+index 906e2d8c254c..abd302e521c0 100644
+--- a/arch/arm64/include/asm/atomic_ll_sc.h
++++ b/arch/arm64/include/asm/atomic_ll_sc.h
+@@ -315,7 +315,7 @@ __ll_sc__cmpxchg_double##name(unsigned long old1,                  \
+       "       cbnz    %w0, 1b\n"                                      \
+       "       " #mb "\n"                                              \
+       "2:"                                                            \
+-      : "=&r" (tmp), "=&r" (ret), "+Q" (*(unsigned long *)ptr)        \
++      : "=&r" (tmp), "=&r" (ret), "+Q" (*(__uint128_t *)ptr)          \
+       : "r" (old1), "r" (old2), "r" (new1), "r" (new2)                \
+       : cl);                                                          \
+                                                                       \
+diff --git a/arch/arm64/include/asm/atomic_lse.h b/arch/arm64/include/asm/atomic_lse.h
+index ab661375835e..28e96118c1e5 100644
+--- a/arch/arm64/include/asm/atomic_lse.h
++++ b/arch/arm64/include/asm/atomic_lse.h
+@@ -403,7 +403,7 @@ __lse__cmpxchg_double##name(unsigned long old1,                            \
+       "       eor     %[old2], %[old2], %[oldval2]\n"                 \
+       "       orr     %[old1], %[old1], %[old2]"                      \
+       : [old1] "+&r" (x0), [old2] "+&r" (x1),                         \
+-        [v] "+Q" (*(unsigned long *)ptr)                              \
++        [v] "+Q" (*(__uint128_t *)ptr)                                \
+       : [new1] "r" (x2), [new2] "r" (x3), [ptr] "r" (x4),             \
+         [oldval1] "r" (oldval1), [oldval2] "r" (oldval2)              \
+       : cl);                                                          \
+-- 
+2.35.1
+
diff --git a/queue-5.15/asoc-wm8904-fix-wrong-outputs-volume-after-power-rea.patch b/queue-5.15/asoc-wm8904-fix-wrong-outputs-volume-after-power-rea.patch
new file mode 100644 (file)
index 0000000..e7c7504
--- /dev/null
@@ -0,0 +1,69 @@
+From 61db47b6a4b039b0cc80af217b9bf88244e8befa Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 23 Dec 2022 09:02:47 +0100
+Subject: ASoC: wm8904: fix wrong outputs volume after power reactivation
+
+From: Emanuele Ghidoli <emanuele.ghidoli@toradex.com>
+
+[ Upstream commit 472a6309c6467af89dbf660a8310369cc9cb041f ]
+
+Restore volume after charge pump and PGA activation to ensure
+that volume settings are correctly applied when re-enabling codec
+from SND_SOC_BIAS_OFF state.
+CLASS_W, CHARGE_PUMP and POWER_MANAGEMENT_2 register configuration
+affect how the volume register are applied and must be configured first.
+
+Fixes: a91eb199e4dc ("ASoC: Initial WM8904 CODEC driver")
+Link: https://lore.kernel.org/all/c7864c35-738c-a867-a6a6-ddf9f98df7e7@gmail.com/
+Signed-off-by: Emanuele Ghidoli <emanuele.ghidoli@toradex.com>
+Signed-off-by: Francesco Dolcini <francesco.dolcini@toradex.com>
+Acked-by: Charles Keepax <ckeepax@opensource.cirrus.com>
+Link: https://lore.kernel.org/r/20221223080247.7258-1-francesco@dolcini.it
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ sound/soc/codecs/wm8904.c | 7 +++++++
+ 1 file changed, 7 insertions(+)
+
+diff --git a/sound/soc/codecs/wm8904.c b/sound/soc/codecs/wm8904.c
+index a02a77fef360..6759ce7e09ff 100644
+--- a/sound/soc/codecs/wm8904.c
++++ b/sound/soc/codecs/wm8904.c
+@@ -697,6 +697,7 @@ static int out_pga_event(struct snd_soc_dapm_widget *w,
+       int dcs_mask;
+       int dcs_l, dcs_r;
+       int dcs_l_reg, dcs_r_reg;
++      int an_out_reg;
+       int timeout;
+       int pwr_reg;
+@@ -712,6 +713,7 @@ static int out_pga_event(struct snd_soc_dapm_widget *w,
+               dcs_mask = WM8904_DCS_ENA_CHAN_0 | WM8904_DCS_ENA_CHAN_1;
+               dcs_r_reg = WM8904_DC_SERVO_8;
+               dcs_l_reg = WM8904_DC_SERVO_9;
++              an_out_reg = WM8904_ANALOGUE_OUT1_LEFT;
+               dcs_l = 0;
+               dcs_r = 1;
+               break;
+@@ -720,6 +722,7 @@ static int out_pga_event(struct snd_soc_dapm_widget *w,
+               dcs_mask = WM8904_DCS_ENA_CHAN_2 | WM8904_DCS_ENA_CHAN_3;
+               dcs_r_reg = WM8904_DC_SERVO_6;
+               dcs_l_reg = WM8904_DC_SERVO_7;
++              an_out_reg = WM8904_ANALOGUE_OUT2_LEFT;
+               dcs_l = 2;
+               dcs_r = 3;
+               break;
+@@ -792,6 +795,10 @@ static int out_pga_event(struct snd_soc_dapm_widget *w,
+               snd_soc_component_update_bits(component, reg,
+                                   WM8904_HPL_ENA_OUTP | WM8904_HPR_ENA_OUTP,
+                                   WM8904_HPL_ENA_OUTP | WM8904_HPR_ENA_OUTP);
++
++              /* Update volume, requires PGA to be powered */
++              val = snd_soc_component_read(component, an_out_reg);
++              snd_soc_component_write(component, an_out_reg, val);
+               break;
+       case SND_SOC_DAPM_POST_PMU:
+-- 
+2.35.1
+
diff --git a/queue-5.15/documentation-kvm-add-api-issues-section.patch b/queue-5.15/documentation-kvm-add-api-issues-section.patch
new file mode 100644 (file)
index 0000000..60ed9d8
--- /dev/null
@@ -0,0 +1,79 @@
+From dad1005b3501947da0a8e85e31656b6824932e35 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 22 Mar 2022 12:07:12 +0100
+Subject: Documentation: KVM: add API issues section
+
+From: Paolo Bonzini <pbonzini@redhat.com>
+
+[ Upstream commit cde363ab7ca7aea7a853851cd6a6745a9e1aaf5e ]
+
+Add a section to document all the different ways in which the KVM API sucks.
+
+I am sure there are way more, give people a place to vent so that userspace
+authors are aware.
+
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Message-Id: <20220322110712.222449-4-pbonzini@redhat.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ Documentation/virt/kvm/api.rst | 46 ++++++++++++++++++++++++++++++++++
+ 1 file changed, 46 insertions(+)
+
+diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
+index a6729c8cf063..8826f8023f06 100644
+--- a/Documentation/virt/kvm/api.rst
++++ b/Documentation/virt/kvm/api.rst
+@@ -7265,3 +7265,49 @@ The argument to KVM_ENABLE_CAP is also a bitmask, and must be a subset
+ of the result of KVM_CHECK_EXTENSION.  KVM will forward to userspace
+ the hypercalls whose corresponding bit is in the argument, and return
+ ENOSYS for the others.
++
++9. Known KVM API problems
++=========================
++
++In some cases, KVM's API has some inconsistencies or common pitfalls
++that userspace need to be aware of.  This section details some of
++these issues.
++
++Most of them are architecture specific, so the section is split by
++architecture.
++
++9.1. x86
++--------
++
++``KVM_GET_SUPPORTED_CPUID`` issues
++^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
++
++In general, ``KVM_GET_SUPPORTED_CPUID`` is designed so that it is possible
++to take its result and pass it directly to ``KVM_SET_CPUID2``.  This section
++documents some cases in which that requires some care.
++
++Local APIC features
++~~~~~~~~~~~~~~~~~~~
++
++CPU[EAX=1]:ECX[21] (X2APIC) is reported by ``KVM_GET_SUPPORTED_CPUID``,
++but it can only be enabled if ``KVM_CREATE_IRQCHIP`` or
++``KVM_ENABLE_CAP(KVM_CAP_IRQCHIP_SPLIT)`` are used to enable in-kernel emulation of
++the local APIC.
++
++The same is true for the ``KVM_FEATURE_PV_UNHALT`` paravirtualized feature.
++
++CPU[EAX=1]:ECX[24] (TSC_DEADLINE) is not reported by ``KVM_GET_SUPPORTED_CPUID``.
++It can be enabled if ``KVM_CAP_TSC_DEADLINE_TIMER`` is present and the kernel
++has enabled in-kernel emulation of the local APIC.
++
++Obsolete ioctls and capabilities
++^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
++
++KVM_CAP_DISABLE_QUIRKS does not let userspace know which quirks are actually
++available.  Use ``KVM_CHECK_EXTENSION(KVM_CAP_DISABLE_QUIRKS2)`` instead if
++available.
++
++Ordering of KVM_GET_*/KVM_SET_* ioctls
++^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
++
++TBD
+-- 
+2.35.1
+
diff --git a/queue-5.15/efi-fix-null-deref-in-init-error-path.patch b/queue-5.15/efi-fix-null-deref-in-init-error-path.patch
new file mode 100644 (file)
index 0000000..1fa7e97
--- /dev/null
@@ -0,0 +1,56 @@
+From 28ce0d52c7d8a7ad603e0b95ba7651855198a611 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 19 Dec 2022 10:10:04 +0100
+Subject: efi: fix NULL-deref in init error path
+
+From: Johan Hovold <johan+linaro@kernel.org>
+
+[ Upstream commit 703c13fe3c9af557d312f5895ed6a5fda2711104 ]
+
+In cases where runtime services are not supported or have been disabled,
+the runtime services workqueue will never have been allocated.
+
+Do not try to destroy the workqueue unconditionally in the unlikely
+event that EFI initialisation fails to avoid dereferencing a NULL
+pointer.
+
+Fixes: 98086df8b70c ("efi: add missed destroy_workqueue when efisubsys_init fails")
+Cc: stable@vger.kernel.org
+Cc: Li Heng <liheng40@huawei.com>
+Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
+Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/firmware/efi/efi.c | 9 ++++++---
+ 1 file changed, 6 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c
+index ba03f5a4b30c..a2765d668856 100644
+--- a/drivers/firmware/efi/efi.c
++++ b/drivers/firmware/efi/efi.c
+@@ -385,8 +385,8 @@ static int __init efisubsys_init(void)
+       efi_kobj = kobject_create_and_add("efi", firmware_kobj);
+       if (!efi_kobj) {
+               pr_err("efi: Firmware registration failed.\n");
+-              destroy_workqueue(efi_rts_wq);
+-              return -ENOMEM;
++              error = -ENOMEM;
++              goto err_destroy_wq;
+       }
+       if (efi_rt_services_supported(EFI_RT_SUPPORTED_GET_VARIABLE |
+@@ -429,7 +429,10 @@ static int __init efisubsys_init(void)
+               generic_ops_unregister();
+ err_put:
+       kobject_put(efi_kobj);
+-      destroy_workqueue(efi_rts_wq);
++err_destroy_wq:
++      if (efi_rts_wq)
++              destroy_workqueue(efi_rts_wq);
++
+       return error;
+ }
+-- 
+2.35.1
+
diff --git a/queue-5.15/hvc-xen-lock-console-list-traversal.patch b/queue-5.15/hvc-xen-lock-console-list-traversal.patch
new file mode 100644 (file)
index 0000000..aefbd02
--- /dev/null
@@ -0,0 +1,186 @@
+From b91f19c3e6dd88fc81c8ce90195d2dc5d4963680 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 30 Nov 2022 17:36:02 +0100
+Subject: hvc/xen: lock console list traversal
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Roger Pau Monne <roger.pau@citrix.com>
+
+[ Upstream commit c0dccad87cf68fc6012aec7567e354353097ec1a ]
+
+The currently lockless access to the xen console list in
+vtermno_to_xencons() is incorrect, as additions and removals from the
+list can happen anytime, and as such the traversal of the list to get
+the private console data for a given termno needs to happen with the
+lock held.  Note users that modify the list already do so with the
+lock taken.
+
+Adjust current lock takers to use the _irq{save,restore} helpers,
+since the context in which vtermno_to_xencons() is called can have
+interrupts disabled.  Use the _irq{save,restore} set of helpers to
+switch the current callers to disable interrupts in the locked region.
+I haven't checked if existing users could instead use the _irq
+variant, as I think it's safer to use _irq{save,restore} upfront.
+
+While there switch from using list_for_each_entry_safe to
+list_for_each_entry: the current entry cursor won't be removed as
+part of the code in the loop body, so using the _safe variant is
+pointless.
+
+Fixes: 02e19f9c7cac ('hvc_xen: implement multiconsole support')
+Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
+Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
+Link: https://lore.kernel.org/r/20221130163611.14686-1-roger.pau@citrix.com
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/tty/hvc/hvc_xen.c | 46 ++++++++++++++++++++++++---------------
+ 1 file changed, 29 insertions(+), 17 deletions(-)
+
+diff --git a/drivers/tty/hvc/hvc_xen.c b/drivers/tty/hvc/hvc_xen.c
+index 8ee7ce120692..609a51137e96 100644
+--- a/drivers/tty/hvc/hvc_xen.c
++++ b/drivers/tty/hvc/hvc_xen.c
+@@ -52,17 +52,22 @@ static DEFINE_SPINLOCK(xencons_lock);
+ static struct xencons_info *vtermno_to_xencons(int vtermno)
+ {
+-      struct xencons_info *entry, *n, *ret = NULL;
++      struct xencons_info *entry, *ret = NULL;
++      unsigned long flags;
+-      if (list_empty(&xenconsoles))
+-                      return NULL;
++      spin_lock_irqsave(&xencons_lock, flags);
++      if (list_empty(&xenconsoles)) {
++              spin_unlock_irqrestore(&xencons_lock, flags);
++              return NULL;
++      }
+-      list_for_each_entry_safe(entry, n, &xenconsoles, list) {
++      list_for_each_entry(entry, &xenconsoles, list) {
+               if (entry->vtermno == vtermno) {
+                       ret  = entry;
+                       break;
+               }
+       }
++      spin_unlock_irqrestore(&xencons_lock, flags);
+       return ret;
+ }
+@@ -223,7 +228,7 @@ static int xen_hvm_console_init(void)
+ {
+       int r;
+       uint64_t v = 0;
+-      unsigned long gfn;
++      unsigned long gfn, flags;
+       struct xencons_info *info;
+       if (!xen_hvm_domain())
+@@ -258,9 +263,9 @@ static int xen_hvm_console_init(void)
+               goto err;
+       info->vtermno = HVC_COOKIE;
+-      spin_lock(&xencons_lock);
++      spin_lock_irqsave(&xencons_lock, flags);
+       list_add_tail(&info->list, &xenconsoles);
+-      spin_unlock(&xencons_lock);
++      spin_unlock_irqrestore(&xencons_lock, flags);
+       return 0;
+ err:
+@@ -283,6 +288,7 @@ static int xencons_info_pv_init(struct xencons_info *info, int vtermno)
+ static int xen_pv_console_init(void)
+ {
+       struct xencons_info *info;
++      unsigned long flags;
+       if (!xen_pv_domain())
+               return -ENODEV;
+@@ -299,9 +305,9 @@ static int xen_pv_console_init(void)
+               /* already configured */
+               return 0;
+       }
+-      spin_lock(&xencons_lock);
++      spin_lock_irqsave(&xencons_lock, flags);
+       xencons_info_pv_init(info, HVC_COOKIE);
+-      spin_unlock(&xencons_lock);
++      spin_unlock_irqrestore(&xencons_lock, flags);
+       return 0;
+ }
+@@ -309,6 +315,7 @@ static int xen_pv_console_init(void)
+ static int xen_initial_domain_console_init(void)
+ {
+       struct xencons_info *info;
++      unsigned long flags;
+       if (!xen_initial_domain())
+               return -ENODEV;
+@@ -323,9 +330,9 @@ static int xen_initial_domain_console_init(void)
+       info->irq = bind_virq_to_irq(VIRQ_CONSOLE, 0, false);
+       info->vtermno = HVC_COOKIE;
+-      spin_lock(&xencons_lock);
++      spin_lock_irqsave(&xencons_lock, flags);
+       list_add_tail(&info->list, &xenconsoles);
+-      spin_unlock(&xencons_lock);
++      spin_unlock_irqrestore(&xencons_lock, flags);
+       return 0;
+ }
+@@ -380,10 +387,12 @@ static void xencons_free(struct xencons_info *info)
+ static int xen_console_remove(struct xencons_info *info)
+ {
++      unsigned long flags;
++
+       xencons_disconnect_backend(info);
+-      spin_lock(&xencons_lock);
++      spin_lock_irqsave(&xencons_lock, flags);
+       list_del(&info->list);
+-      spin_unlock(&xencons_lock);
++      spin_unlock_irqrestore(&xencons_lock, flags);
+       if (info->xbdev != NULL)
+               xencons_free(info);
+       else {
+@@ -464,6 +473,7 @@ static int xencons_probe(struct xenbus_device *dev,
+ {
+       int ret, devid;
+       struct xencons_info *info;
++      unsigned long flags;
+       devid = dev->nodename[strlen(dev->nodename) - 1] - '0';
+       if (devid == 0)
+@@ -482,9 +492,9 @@ static int xencons_probe(struct xenbus_device *dev,
+       ret = xencons_connect_backend(dev, info);
+       if (ret < 0)
+               goto error;
+-      spin_lock(&xencons_lock);
++      spin_lock_irqsave(&xencons_lock, flags);
+       list_add_tail(&info->list, &xenconsoles);
+-      spin_unlock(&xencons_lock);
++      spin_unlock_irqrestore(&xencons_lock, flags);
+       return 0;
+@@ -583,10 +593,12 @@ static int __init xen_hvc_init(void)
+       info->hvc = hvc_alloc(HVC_COOKIE, info->irq, ops, 256);
+       if (IS_ERR(info->hvc)) {
++              unsigned long flags;
++
+               r = PTR_ERR(info->hvc);
+-              spin_lock(&xencons_lock);
++              spin_lock_irqsave(&xencons_lock, flags);
+               list_del(&info->list);
+-              spin_unlock(&xencons_lock);
++              spin_unlock_irqrestore(&xencons_lock, flags);
+               if (info->irq)
+                       unbind_from_irqhandler(info->irq, NULL);
+               kfree(info);
+-- 
+2.35.1
+
diff --git a/queue-5.15/igc-fix-pps-delta-between-two-synchronized-end-point.patch b/queue-5.15/igc-fix-pps-delta-between-two-synchronized-end-point.patch
new file mode 100644 (file)
index 0000000..036c945
--- /dev/null
@@ -0,0 +1,97 @@
+From 4cc0563872522eed60adb0f3757bc1bb409a8ce2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 14 Dec 2022 16:10:38 +0800
+Subject: igc: Fix PPS delta between two synchronized end-points
+
+From: Christopher S Hall <christopher.s.hall@intel.com>
+
+[ Upstream commit 5e91c72e560cc85f7163bbe3d14197268de31383 ]
+
+This patch fix the pulse per second output delta between
+two synchronized end-points.
+
+Based on Intel Discrete I225 Software User Manual Section
+4.2.15 TimeSync Auxiliary Control Register, ST0[Bit 4] and
+ST1[Bit 7] must be set to ensure that clock output will be
+toggles based on frequency value defined. This is to ensure
+that output of the PPS is aligned with the clock.
+
+How to test:
+
+1) Running time synchronization on both end points.
+Ex: ptp4l --step_threshold=1 -m -f gPTP.cfg -i <interface name>
+
+2) Configure PPS output using below command for both end-points
+Ex: SDP0 on I225 REV4 SKU variant
+
+./testptp -d /dev/ptp0 -L 0,2
+./testptp -d /dev/ptp0 -p 1000000000
+
+3) Measure the output using analyzer for both end-points
+
+Fixes: 87938851b6ef ("igc: enable auxiliary PHC functions for the i225")
+Signed-off-by: Christopher S Hall <christopher.s.hall@intel.com>
+Signed-off-by: Muhammad Husaini Zulkifli <muhammad.husaini.zulkifli@intel.com>
+Acked-by: Sasha Neftin <sasha.neftin@intel.com>
+Tested-by: Naama Meir <naamax.meir@linux.intel.com>
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/igc/igc_defines.h |  2 ++
+ drivers/net/ethernet/intel/igc/igc_ptp.c     | 10 ++++++----
+ 2 files changed, 8 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/net/ethernet/intel/igc/igc_defines.h b/drivers/net/ethernet/intel/igc/igc_defines.h
+index f171bc99e58c..60d0ca69ceca 100644
+--- a/drivers/net/ethernet/intel/igc/igc_defines.h
++++ b/drivers/net/ethernet/intel/igc/igc_defines.h
+@@ -469,7 +469,9 @@
+ #define IGC_TSAUXC_EN_TT0     BIT(0)  /* Enable target time 0. */
+ #define IGC_TSAUXC_EN_TT1     BIT(1)  /* Enable target time 1. */
+ #define IGC_TSAUXC_EN_CLK0    BIT(2)  /* Enable Configurable Frequency Clock 0. */
++#define IGC_TSAUXC_ST0                BIT(4)  /* Start Clock 0 Toggle on Target Time 0. */
+ #define IGC_TSAUXC_EN_CLK1    BIT(5)  /* Enable Configurable Frequency Clock 1. */
++#define IGC_TSAUXC_ST1                BIT(7)  /* Start Clock 1 Toggle on Target Time 1. */
+ #define IGC_TSAUXC_EN_TS0     BIT(8)  /* Enable hardware timestamp 0. */
+ #define IGC_TSAUXC_AUTT0      BIT(9)  /* Auxiliary Timestamp Taken. */
+ #define IGC_TSAUXC_EN_TS1     BIT(10) /* Enable hardware timestamp 0. */
+diff --git a/drivers/net/ethernet/intel/igc/igc_ptp.c b/drivers/net/ethernet/intel/igc/igc_ptp.c
+index 8e521f99b80a..fbde7826927b 100644
+--- a/drivers/net/ethernet/intel/igc/igc_ptp.c
++++ b/drivers/net/ethernet/intel/igc/igc_ptp.c
+@@ -323,7 +323,7 @@ static int igc_ptp_feature_enable_i225(struct ptp_clock_info *ptp,
+               ts = ns_to_timespec64(ns);
+               if (rq->perout.index == 1) {
+                       if (use_freq) {
+-                              tsauxc_mask = IGC_TSAUXC_EN_CLK1;
++                              tsauxc_mask = IGC_TSAUXC_EN_CLK1 | IGC_TSAUXC_ST1;
+                               tsim_mask = 0;
+                       } else {
+                               tsauxc_mask = IGC_TSAUXC_EN_TT1;
+@@ -334,7 +334,7 @@ static int igc_ptp_feature_enable_i225(struct ptp_clock_info *ptp,
+                       freqout = IGC_FREQOUT1;
+               } else {
+                       if (use_freq) {
+-                              tsauxc_mask = IGC_TSAUXC_EN_CLK0;
++                              tsauxc_mask = IGC_TSAUXC_EN_CLK0 | IGC_TSAUXC_ST0;
+                               tsim_mask = 0;
+                       } else {
+                               tsauxc_mask = IGC_TSAUXC_EN_TT0;
+@@ -348,10 +348,12 @@ static int igc_ptp_feature_enable_i225(struct ptp_clock_info *ptp,
+               tsauxc = rd32(IGC_TSAUXC);
+               tsim = rd32(IGC_TSIM);
+               if (rq->perout.index == 1) {
+-                      tsauxc &= ~(IGC_TSAUXC_EN_TT1 | IGC_TSAUXC_EN_CLK1);
++                      tsauxc &= ~(IGC_TSAUXC_EN_TT1 | IGC_TSAUXC_EN_CLK1 |
++                                  IGC_TSAUXC_ST1);
+                       tsim &= ~IGC_TSICR_TT1;
+               } else {
+-                      tsauxc &= ~(IGC_TSAUXC_EN_TT0 | IGC_TSAUXC_EN_CLK0);
++                      tsauxc &= ~(IGC_TSAUXC_EN_TT0 | IGC_TSAUXC_EN_CLK0 |
++                                  IGC_TSAUXC_ST0);
+                       tsim &= ~IGC_TSICR_TT0;
+               }
+               if (on) {
+-- 
+2.35.1
+
diff --git a/queue-5.15/io_uring-lock-overflowing-for-iopoll.patch b/queue-5.15/io_uring-lock-overflowing-for-iopoll.patch
new file mode 100644 (file)
index 0000000..a3f980c
--- /dev/null
@@ -0,0 +1,79 @@
+From a6f79d7306fd0eb440ebac0efed2a5beadf93456 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 14 Jan 2023 09:14:03 -0700
+Subject: io_uring: lock overflowing for IOPOLL
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Pavel Begunkov <asml.silence@gmail.com>
+
+commit 544d163d659d45a206d8929370d5a2984e546cb7 upstream.
+
+syzbot reports an issue with overflow filling for IOPOLL:
+
+WARNING: CPU: 0 PID: 28 at io_uring/io_uring.c:734 io_cqring_event_overflow+0x1c0/0x230 io_uring/io_uring.c:734
+CPU: 0 PID: 28 Comm: kworker/u4:1 Not tainted 6.2.0-rc3-syzkaller-16369-g358a161a6a9e #0
+Workqueue: events_unbound io_ring_exit_work
+Call trace:
+ io_cqring_event_overflow+0x1c0/0x230 io_uring/io_uring.c:734
+ io_req_cqe_overflow+0x5c/0x70 io_uring/io_uring.c:773
+ io_fill_cqe_req io_uring/io_uring.h:168 [inline]
+ io_do_iopoll+0x474/0x62c io_uring/rw.c:1065
+ io_iopoll_try_reap_events+0x6c/0x108 io_uring/io_uring.c:1513
+ io_uring_try_cancel_requests+0x13c/0x258 io_uring/io_uring.c:3056
+ io_ring_exit_work+0xec/0x390 io_uring/io_uring.c:2869
+ process_one_work+0x2d8/0x504 kernel/workqueue.c:2289
+ worker_thread+0x340/0x610 kernel/workqueue.c:2436
+ kthread+0x12c/0x158 kernel/kthread.c:376
+ ret_from_fork+0x10/0x20 arch/arm64/kernel/entry.S:863
+
+There is no real problem for normal IOPOLL as flush is also called with
+uring_lock taken, but it's getting more complicated for IOPOLL|SQPOLL,
+for which __io_cqring_overflow_flush() happens from the CQ waiting path.
+
+Reported-and-tested-by: syzbot+6805087452d72929404e@syzkaller.appspotmail.com
+Cc: stable@vger.kernel.org # 5.10+
+Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ io_uring/io_uring.c | 18 ++++++++++++++++--
+ 1 file changed, 16 insertions(+), 2 deletions(-)
+
+diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
+index c587221a289c..9a01188ff45a 100644
+--- a/io_uring/io_uring.c
++++ b/io_uring/io_uring.c
+@@ -2477,12 +2477,26 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events,
+       io_init_req_batch(&rb);
+       while (!list_empty(done)) {
++              struct io_uring_cqe *cqe;
++              unsigned cflags;
++
+               req = list_first_entry(done, struct io_kiocb, inflight_entry);
+               list_del(&req->inflight_entry);
+-
+-              io_fill_cqe_req(req, req->result, io_put_rw_kbuf(req));
++              cflags = io_put_rw_kbuf(req);
+               (*nr_events)++;
++              cqe = io_get_cqe(ctx);
++              if (cqe) {
++                      WRITE_ONCE(cqe->user_data, req->user_data);
++                      WRITE_ONCE(cqe->res, req->result);
++                      WRITE_ONCE(cqe->flags, cflags);
++              } else {
++                      spin_lock(&ctx->completion_lock);
++                      io_cqring_event_overflow(ctx, req->user_data,
++                                                      req->result, cflags);
++                      spin_unlock(&ctx->completion_lock);
++              }
++
+               if (req_ref_put_and_test(req))
+                       io_req_free_batch(&rb, req, &ctx->submit_state);
+       }
+-- 
+2.35.1
+
diff --git a/queue-5.15/kvm-x86-do-not-return-host-topology-information-from.patch b/queue-5.15/kvm-x86-do-not-return-host-topology-information-from.patch
new file mode 100644 (file)
index 0000000..939dbc3
--- /dev/null
@@ -0,0 +1,123 @@
+From c835668114e06b8b178b97e2a3370ff18e839ba6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 22 Oct 2022 04:17:53 -0400
+Subject: KVM: x86: Do not return host topology information from
+ KVM_GET_SUPPORTED_CPUID
+
+From: Paolo Bonzini <pbonzini@redhat.com>
+
+[ Upstream commit 45e966fcca03ecdcccac7cb236e16eea38cc18af ]
+
+Passing the host topology to the guest is almost certainly wrong
+and will confuse the scheduler.  In addition, several fields of
+these CPUID leaves vary on each processor; it is simply impossible to
+return the right values from KVM_GET_SUPPORTED_CPUID in such a way that
+they can be passed to KVM_SET_CPUID2.
+
+The values that will most likely prevent confusion are all zeroes.
+Userspace will have to override it anyway if it wishes to present a
+specific topology to the guest.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ Documentation/virt/kvm/api.rst | 14 ++++++++++++++
+ arch/x86/kvm/cpuid.c           | 32 ++++++++++++++++----------------
+ 2 files changed, 30 insertions(+), 16 deletions(-)
+
+diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
+index 8826f8023f06..b550f43214c7 100644
+--- a/Documentation/virt/kvm/api.rst
++++ b/Documentation/virt/kvm/api.rst
+@@ -7300,6 +7300,20 @@ CPU[EAX=1]:ECX[24] (TSC_DEADLINE) is not reported by ``KVM_GET_SUPPORTED_CPUID``
+ It can be enabled if ``KVM_CAP_TSC_DEADLINE_TIMER`` is present and the kernel
+ has enabled in-kernel emulation of the local APIC.
++CPU topology
++~~~~~~~~~~~~
++
++Several CPUID values include topology information for the host CPU:
++0x0b and 0x1f for Intel systems, 0x8000001e for AMD systems.  Different
++versions of KVM return different values for this information and userspace
++should not rely on it.  Currently they return all zeroes.
++
++If userspace wishes to set up a guest topology, it should be careful that
++the values of these three leaves differ for each CPU.  In particular,
++the APIC ID is found in EDX for all subleaves of 0x0b and 0x1f, and in EAX
++for 0x8000001e; the latter also encodes the core id and node id in bits
++7:0 of EBX and ECX respectively.
++
+ Obsolete ioctls and capabilities
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
+index 05b27b4a54c9..528437e3e2f3 100644
+--- a/arch/x86/kvm/cpuid.c
++++ b/arch/x86/kvm/cpuid.c
+@@ -567,16 +567,22 @@ struct kvm_cpuid_array {
+       int nent;
+ };
++static struct kvm_cpuid_entry2 *get_next_cpuid(struct kvm_cpuid_array *array)
++{
++      if (array->nent >= array->maxnent)
++              return NULL;
++
++      return &array->entries[array->nent++];
++}
++
+ static struct kvm_cpuid_entry2 *do_host_cpuid(struct kvm_cpuid_array *array,
+                                             u32 function, u32 index)
+ {
+-      struct kvm_cpuid_entry2 *entry;
++      struct kvm_cpuid_entry2 *entry = get_next_cpuid(array);
+-      if (array->nent >= array->maxnent)
++      if (!entry)
+               return NULL;
+-      entry = &array->entries[array->nent++];
+-
+       entry->function = function;
+       entry->index = index;
+       entry->flags = 0;
+@@ -755,22 +761,13 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
+               entry->edx = edx.full;
+               break;
+       }
+-      /*
+-       * Per Intel's SDM, the 0x1f is a superset of 0xb,
+-       * thus they can be handled by common code.
+-       */
+       case 0x1f:
+       case 0xb:
+               /*
+-               * Populate entries until the level type (ECX[15:8]) of the
+-               * previous entry is zero.  Note, CPUID EAX.{0x1f,0xb}.0 is
+-               * the starting entry, filled by the primary do_host_cpuid().
++               * No topology; a valid topology is indicated by the presence
++               * of subleaf 1.
+                */
+-              for (i = 1; entry->ecx & 0xff00; ++i) {
+-                      entry = do_host_cpuid(array, function, i);
+-                      if (!entry)
+-                              goto out;
+-              }
++              entry->eax = entry->ebx = entry->ecx = 0;
+               break;
+       case 0xd:
+               entry->eax &= supported_xcr0;
+@@ -962,6 +959,9 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
+               entry->ebx = entry->ecx = entry->edx = 0;
+               break;
+       case 0x8000001e:
++              /* Do not return host topology information.  */
++              entry->eax = entry->ebx = entry->ecx = 0;
++              entry->edx = 0; /* reserved */
+               break;
+       case 0x8000001F:
+               if (!kvm_cpu_cap_has(X86_FEATURE_SEV)) {
+-- 
+2.35.1
+
diff --git a/queue-5.15/mm-always-release-pages-to-the-buddy-allocator-in-me.patch b/queue-5.15/mm-always-release-pages-to-the-buddy-allocator-in-me.patch
new file mode 100644 (file)
index 0000000..1039469
--- /dev/null
@@ -0,0 +1,96 @@
+From a2d920d05bdbd98882e3a6f7d7b83d26e440a20d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 6 Jan 2023 22:22:44 +0000
+Subject: mm: Always release pages to the buddy allocator in
+ memblock_free_late().
+
+From: Aaron Thompson <dev@aaront.org>
+
+[ Upstream commit 115d9d77bb0f9152c60b6e8646369fa7f6167593 ]
+
+If CONFIG_DEFERRED_STRUCT_PAGE_INIT is enabled, memblock_free_pages()
+only releases pages to the buddy allocator if they are not in the
+deferred range. This is correct for free pages (as defined by
+for_each_free_mem_pfn_range_in_zone()) because free pages in the
+deferred range will be initialized and released as part of the deferred
+init process. memblock_free_pages() is called by memblock_free_late(),
+which is used to free reserved ranges after memblock_free_all() has
+run. All pages in reserved ranges have been initialized at that point,
+and accordingly, those pages are not touched by the deferred init
+process. This means that currently, if the pages that
+memblock_free_late() intends to release are in the deferred range, they
+will never be released to the buddy allocator. They will forever be
+reserved.
+
+In addition, memblock_free_pages() calls kmsan_memblock_free_pages(),
+which is also correct for free pages but is not correct for reserved
+pages. KMSAN metadata for reserved pages is initialized by
+kmsan_init_shadow(), which runs shortly before memblock_free_all().
+
+For both of these reasons, memblock_free_pages() should only be called
+for free pages, and memblock_free_late() should call __free_pages_core()
+directly instead.
+
+One case where this issue can occur in the wild is EFI boot on
+x86_64. The x86 EFI code reserves all EFI boot services memory ranges
+via memblock_reserve() and frees them later via memblock_free_late()
+(efi_reserve_boot_services() and efi_free_boot_services(),
+respectively). If any of those ranges happens to fall within the
+deferred init range, the pages will not be released and that memory will
+be unavailable.
+
+For example, on an Amazon EC2 t3.micro VM (1 GB) booting via EFI:
+
+v6.2-rc2:
+  # grep -E 'Node|spanned|present|managed' /proc/zoneinfo
+  Node 0, zone      DMA
+          spanned  4095
+          present  3999
+          managed  3840
+  Node 0, zone    DMA32
+          spanned  246652
+          present  245868
+          managed  178867
+
+v6.2-rc2 + patch:
+  # grep -E 'Node|spanned|present|managed' /proc/zoneinfo
+  Node 0, zone      DMA
+          spanned  4095
+          present  3999
+          managed  3840
+  Node 0, zone    DMA32
+          spanned  246652
+          present  245868
+          managed  222816   # +43,949 pages
+
+Fixes: 3a80a7fa7989 ("mm: meminit: initialise a subset of struct pages if CONFIG_DEFERRED_STRUCT_PAGE_INIT is set")
+Signed-off-by: Aaron Thompson <dev@aaront.org>
+Link: https://lore.kernel.org/r/01010185892de53e-e379acfb-7044-4b24-b30a-e2657c1ba989-000000@us-west-2.amazonses.com
+Signed-off-by: Mike Rapoport (IBM) <rppt@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ mm/memblock.c | 8 +++++++-
+ 1 file changed, 7 insertions(+), 1 deletion(-)
+
+diff --git a/mm/memblock.c b/mm/memblock.c
+index 2b7397781c99..838d59a74c65 100644
+--- a/mm/memblock.c
++++ b/mm/memblock.c
+@@ -1615,7 +1615,13 @@ void __init __memblock_free_late(phys_addr_t base, phys_addr_t size)
+       end = PFN_DOWN(base + size);
+       for (; cursor < end; cursor++) {
+-              memblock_free_pages(pfn_to_page(cursor), cursor, 0);
++              /*
++               * Reserved pages are always initialized by the end of
++               * memblock_free_all() (by memmap_init() and, if deferred
++               * initialization is enabled, memmap_init_reserved_pages()), so
++               * these pages can be released directly to the buddy allocator.
++               */
++              __free_pages_core(pfn_to_page(cursor), 0);
+               totalram_pages_inc();
+       }
+ }
+-- 
+2.35.1
+
diff --git a/queue-5.15/net-mlx5-fix-ptp-max-frequency-adjustment-range.patch b/queue-5.15/net-mlx5-fix-ptp-max-frequency-adjustment-range.patch
new file mode 100644 (file)
index 0000000..8ca0d25
--- /dev/null
@@ -0,0 +1,41 @@
+From 078d298cff84a8993d1a35f6d682705fc8ae0684 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 5 Dec 2022 14:26:09 -0800
+Subject: net/mlx5: Fix ptp max frequency adjustment range
+
+From: Rahul Rameshbabu <rrameshbabu@nvidia.com>
+
+[ Upstream commit fe91d57277eef8bb4aca05acfa337b4a51d0bba4 ]
+
+.max_adj of ptp_clock_info acts as an absolute value for the amount in ppb
+that can be set for a single call of .adjfine. This means that a single
+call to .getfine cannot be greater than .max_adj or less than -(.max_adj).
+Provides correct value for max frequency adjustment value supported by
+devices.
+
+Fixes: 3d8c38af1493 ("net/mlx5e: Add PTP Hardware Clock (PHC) support")
+Signed-off-by: Rahul Rameshbabu <rrameshbabu@nvidia.com>
+Reviewed-by: Gal Pressman <gal@nvidia.com>
+Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
+index 91e806c1aa21..8490c0cf80a8 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
+@@ -599,7 +599,7 @@ static int mlx5_ptp_verify(struct ptp_clock_info *ptp, unsigned int pin,
+ static const struct ptp_clock_info mlx5_ptp_clock_info = {
+       .owner          = THIS_MODULE,
+       .name           = "mlx5_ptp",
+-      .max_adj        = 100000000,
++      .max_adj        = 50000000,
+       .n_alarm        = 0,
+       .n_ext_ts       = 0,
+       .n_per_out      = 0,
+-- 
+2.35.1
+
diff --git a/queue-5.15/net-mlx5e-don-t-support-encap-rules-with-gbp-option.patch b/queue-5.15/net-mlx5e-don-t-support-encap-rules-with-gbp-option.patch
new file mode 100644 (file)
index 0000000..4c9b151
--- /dev/null
@@ -0,0 +1,40 @@
+From 91344d58cf8043f235bd1aff5e51068dfa3a6c8b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 27 Dec 2022 04:54:09 +0200
+Subject: net/mlx5e: Don't support encap rules with gbp option
+
+From: Gavin Li <gavinl@nvidia.com>
+
+[ Upstream commit d515d63cae2cd186acf40deaa8ef33067bb7f637 ]
+
+Previously, encap rules with gbp option would be offloaded by mistake but
+driver does not support gbp option offload.
+
+To fix this issue, check if the encap rule has gbp option and don't
+offload the rule
+
+Fixes: d8f9dfae49ce ("net: sched: allow flower to match vxlan options")
+Signed-off-by: Gavin Li <gavinl@nvidia.com>
+Reviewed-by: Maor Dickman <maord@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c
+index 4267f3a1059e..78b1a6ddd967 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c
+@@ -88,6 +88,8 @@ static int mlx5e_gen_ip_tunnel_header_vxlan(char buf[],
+       struct udphdr *udp = (struct udphdr *)(buf);
+       struct vxlanhdr *vxh;
++      if (tun_key->tun_flags & TUNNEL_VXLAN_OPT)
++              return -EOPNOTSUPP;
+       vxh = (struct vxlanhdr *)((char *)udp + sizeof(struct udphdr));
+       *ip_proto = IPPROTO_UDP;
+-- 
+2.35.1
+
diff --git a/queue-5.15/net-sched-act_mpls-fix-warning-during-failed-attribu.patch b/queue-5.15/net-sched-act_mpls-fix-warning-during-failed-attribu.patch
new file mode 100644 (file)
index 0000000..07966cb
--- /dev/null
@@ -0,0 +1,109 @@
+From 09a9e5a03438c7a954ddcae5858b6fb53a100b2d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 7 Jan 2023 19:10:04 +0200
+Subject: net/sched: act_mpls: Fix warning during failed attribute validation
+
+From: Ido Schimmel <idosch@nvidia.com>
+
+[ Upstream commit 9e17f99220d111ea031b44153fdfe364b0024ff2 ]
+
+The 'TCA_MPLS_LABEL' attribute is of 'NLA_U32' type, but has a
+validation type of 'NLA_VALIDATE_FUNCTION'. This is an invalid
+combination according to the comment above 'struct nla_policy':
+
+"
+Meaning of `validate' field, use via NLA_POLICY_VALIDATE_FN:
+   NLA_BINARY           Validation function called for the attribute.
+   All other            Unused - but note that it's a union
+"
+
+This can trigger the warning [1] in nla_get_range_unsigned() when
+validation of the attribute fails. Despite being of 'NLA_U32' type, the
+associated 'min'/'max' fields in the policy are negative as they are
+aliased by the 'validate' field.
+
+Fix by changing the attribute type to 'NLA_BINARY' which is consistent
+with the above comment and all other users of NLA_POLICY_VALIDATE_FN().
+As a result, move the length validation to the validation function.
+
+No regressions in MPLS tests:
+
+ # ./tdc.py -f tc-tests/actions/mpls.json
+ [...]
+ # echo $?
+ 0
+
+[1]
+WARNING: CPU: 0 PID: 17743 at lib/nlattr.c:118
+nla_get_range_unsigned+0x1d8/0x1e0 lib/nlattr.c:117
+Modules linked in:
+CPU: 0 PID: 17743 Comm: syz-executor.0 Not tainted 6.1.0-rc8 #3
+Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS
+rel-1.13.0-48-gd9c812dda519-prebuilt.qemu.org 04/01/2014
+RIP: 0010:nla_get_range_unsigned+0x1d8/0x1e0 lib/nlattr.c:117
+[...]
+Call Trace:
+ <TASK>
+ __netlink_policy_dump_write_attr+0x23d/0x990 net/netlink/policy.c:310
+ netlink_policy_dump_write_attr+0x22/0x30 net/netlink/policy.c:411
+ netlink_ack_tlv_fill net/netlink/af_netlink.c:2454 [inline]
+ netlink_ack+0x546/0x760 net/netlink/af_netlink.c:2506
+ netlink_rcv_skb+0x1b7/0x240 net/netlink/af_netlink.c:2546
+ rtnetlink_rcv+0x18/0x20 net/core/rtnetlink.c:6109
+ netlink_unicast_kernel net/netlink/af_netlink.c:1319 [inline]
+ netlink_unicast+0x5e9/0x6b0 net/netlink/af_netlink.c:1345
+ netlink_sendmsg+0x739/0x860 net/netlink/af_netlink.c:1921
+ sock_sendmsg_nosec net/socket.c:714 [inline]
+ sock_sendmsg net/socket.c:734 [inline]
+ ____sys_sendmsg+0x38f/0x500 net/socket.c:2482
+ ___sys_sendmsg net/socket.c:2536 [inline]
+ __sys_sendmsg+0x197/0x230 net/socket.c:2565
+ __do_sys_sendmsg net/socket.c:2574 [inline]
+ __se_sys_sendmsg net/socket.c:2572 [inline]
+ __x64_sys_sendmsg+0x42/0x50 net/socket.c:2572
+ do_syscall_x64 arch/x86/entry/common.c:50 [inline]
+ do_syscall_64+0x2b/0x70 arch/x86/entry/common.c:80
+ entry_SYSCALL_64_after_hwframe+0x63/0xcd
+
+Link: https://lore.kernel.org/netdev/CAO4mrfdmjvRUNbDyP0R03_DrD_eFCLCguz6OxZ2TYRSv0K9gxA@mail.gmail.com/
+Fixes: 2a2ea50870ba ("net: sched: add mpls manipulation actions to TC")
+Reported-by: Wei Chen <harperchen1110@gmail.com>
+Tested-by: Wei Chen <harperchen1110@gmail.com>
+Signed-off-by: Ido Schimmel <idosch@nvidia.com>
+Reviewed-by: Alexander Duyck <alexanderduyck@fb.com>
+Link: https://lore.kernel.org/r/20230107171004.608436-1-idosch@nvidia.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/sched/act_mpls.c | 8 +++++++-
+ 1 file changed, 7 insertions(+), 1 deletion(-)
+
+diff --git a/net/sched/act_mpls.c b/net/sched/act_mpls.c
+index e4529b428cf4..db0ef0486309 100644
+--- a/net/sched/act_mpls.c
++++ b/net/sched/act_mpls.c
+@@ -133,6 +133,11 @@ static int valid_label(const struct nlattr *attr,
+ {
+       const u32 *label = nla_data(attr);
++      if (nla_len(attr) != sizeof(*label)) {
++              NL_SET_ERR_MSG_MOD(extack, "Invalid MPLS label length");
++              return -EINVAL;
++      }
++
+       if (*label & ~MPLS_LABEL_MASK || *label == MPLS_LABEL_IMPLNULL) {
+               NL_SET_ERR_MSG_MOD(extack, "MPLS label out of range");
+               return -EINVAL;
+@@ -144,7 +149,8 @@ static int valid_label(const struct nlattr *attr,
+ static const struct nla_policy mpls_policy[TCA_MPLS_MAX + 1] = {
+       [TCA_MPLS_PARMS]        = NLA_POLICY_EXACT_LEN(sizeof(struct tc_mpls)),
+       [TCA_MPLS_PROTO]        = { .type = NLA_U16 },
+-      [TCA_MPLS_LABEL]        = NLA_POLICY_VALIDATE_FN(NLA_U32, valid_label),
++      [TCA_MPLS_LABEL]        = NLA_POLICY_VALIDATE_FN(NLA_BINARY,
++                                                       valid_label),
+       [TCA_MPLS_TC]           = NLA_POLICY_RANGE(NLA_U8, 0, 7),
+       [TCA_MPLS_TTL]          = NLA_POLICY_MIN(NLA_U8, 1),
+       [TCA_MPLS_BOS]          = NLA_POLICY_RANGE(NLA_U8, 0, 1),
+-- 
+2.35.1
+
diff --git a/queue-5.15/nfc-pn533-wait-for-out_urb-s-completion-in-pn533_usb.patch b/queue-5.15/nfc-pn533-wait-for-out_urb-s-completion-in-pn533_usb.patch
new file mode 100644 (file)
index 0000000..6e87aa9
--- /dev/null
@@ -0,0 +1,129 @@
+From f4e6193f7339e2544e68f83110dcba1b86e0c81f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 6 Jan 2023 17:23:44 +0900
+Subject: nfc: pn533: Wait for out_urb's completion in pn533_usb_send_frame()
+
+From: Minsuk Kang <linuxlovemin@yonsei.ac.kr>
+
+[ Upstream commit 9dab880d675b9d0dd56c6428e4e8352a3339371d ]
+
+Fix a use-after-free that occurs in hcd when in_urb sent from
+pn533_usb_send_frame() is completed earlier than out_urb. Its callback
+frees the skb data in pn533_send_async_complete() that is used as a
+transfer buffer of out_urb. Wait before sending in_urb until the
+callback of out_urb is called. To modify the callback of out_urb alone,
+separate the complete function of out_urb and ack_urb.
+
+Found by a modified version of syzkaller.
+
+BUG: KASAN: use-after-free in dummy_timer
+Call Trace:
+ memcpy (mm/kasan/shadow.c:65)
+ dummy_perform_transfer (drivers/usb/gadget/udc/dummy_hcd.c:1352)
+ transfer (drivers/usb/gadget/udc/dummy_hcd.c:1453)
+ dummy_timer (drivers/usb/gadget/udc/dummy_hcd.c:1972)
+ arch_static_branch (arch/x86/include/asm/jump_label.h:27)
+ static_key_false (include/linux/jump_label.h:207)
+ timer_expire_exit (include/trace/events/timer.h:127)
+ call_timer_fn (kernel/time/timer.c:1475)
+ expire_timers (kernel/time/timer.c:1519)
+ __run_timers (kernel/time/timer.c:1790)
+ run_timer_softirq (kernel/time/timer.c:1803)
+
+Fixes: c46ee38620a2 ("NFC: pn533: add NXP pn533 nfc device driver")
+Signed-off-by: Minsuk Kang <linuxlovemin@yonsei.ac.kr>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/nfc/pn533/usb.c | 44 ++++++++++++++++++++++++++++++++++++++---
+ 1 file changed, 41 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/nfc/pn533/usb.c b/drivers/nfc/pn533/usb.c
+index bd7f7478d189..62ad26e4299d 100644
+--- a/drivers/nfc/pn533/usb.c
++++ b/drivers/nfc/pn533/usb.c
+@@ -153,10 +153,17 @@ static int pn533_usb_send_ack(struct pn533 *dev, gfp_t flags)
+       return usb_submit_urb(phy->ack_urb, flags);
+ }
++struct pn533_out_arg {
++      struct pn533_usb_phy *phy;
++      struct completion done;
++};
++
+ static int pn533_usb_send_frame(struct pn533 *dev,
+                               struct sk_buff *out)
+ {
+       struct pn533_usb_phy *phy = dev->phy;
++      struct pn533_out_arg arg;
++      void *cntx;
+       int rc;
+       if (phy->priv == NULL)
+@@ -168,10 +175,17 @@ static int pn533_usb_send_frame(struct pn533 *dev,
+       print_hex_dump_debug("PN533 TX: ", DUMP_PREFIX_NONE, 16, 1,
+                            out->data, out->len, false);
++      init_completion(&arg.done);
++      cntx = phy->out_urb->context;
++      phy->out_urb->context = &arg;
++
+       rc = usb_submit_urb(phy->out_urb, GFP_KERNEL);
+       if (rc)
+               return rc;
++      wait_for_completion(&arg.done);
++      phy->out_urb->context = cntx;
++
+       if (dev->protocol_type == PN533_PROTO_REQ_RESP) {
+               /* request for response for sent packet directly */
+               rc = pn533_submit_urb_for_response(phy, GFP_KERNEL);
+@@ -408,7 +422,31 @@ static int pn533_acr122_poweron_rdr(struct pn533_usb_phy *phy)
+       return arg.rc;
+ }
+-static void pn533_send_complete(struct urb *urb)
++static void pn533_out_complete(struct urb *urb)
++{
++      struct pn533_out_arg *arg = urb->context;
++      struct pn533_usb_phy *phy = arg->phy;
++
++      switch (urb->status) {
++      case 0:
++              break; /* success */
++      case -ECONNRESET:
++      case -ENOENT:
++              dev_dbg(&phy->udev->dev,
++                      "The urb has been stopped (status %d)\n",
++                      urb->status);
++              break;
++      case -ESHUTDOWN:
++      default:
++              nfc_err(&phy->udev->dev,
++                      "Urb failure (status %d)\n",
++                      urb->status);
++      }
++
++      complete(&arg->done);
++}
++
++static void pn533_ack_complete(struct urb *urb)
+ {
+       struct pn533_usb_phy *phy = urb->context;
+@@ -496,10 +534,10 @@ static int pn533_usb_probe(struct usb_interface *interface,
+       usb_fill_bulk_urb(phy->out_urb, phy->udev,
+                         usb_sndbulkpipe(phy->udev, out_endpoint),
+-                        NULL, 0, pn533_send_complete, phy);
++                        NULL, 0, pn533_out_complete, phy);
+       usb_fill_bulk_urb(phy->ack_urb, phy->udev,
+                         usb_sndbulkpipe(phy->udev, out_endpoint),
+-                        NULL, 0, pn533_send_complete, phy);
++                        NULL, 0, pn533_ack_complete, phy);
+       switch (id->driver_info) {
+       case PN533_DEVICE_STD:
+-- 
+2.35.1
+
diff --git a/queue-5.15/octeontx2-af-fix-lmac-config-in-cgx_lmac_rx_tx_enabl.patch b/queue-5.15/octeontx2-af-fix-lmac-config-in-cgx_lmac_rx_tx_enabl.patch
new file mode 100644 (file)
index 0000000..4d5796a
--- /dev/null
@@ -0,0 +1,57 @@
+From 75ebc224bb6027bf0f342c120bdc59fc292bcbb8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 5 Jan 2023 21:31:07 +0530
+Subject: octeontx2-af: Fix LMAC config in cgx_lmac_rx_tx_enable
+
+From: Angela Czubak <aczubak@marvell.com>
+
+[ Upstream commit b4e9b8763e417db31c7088103cc557d55cb7a8f5 ]
+
+PF netdev can request AF to enable or disable reception and transmission
+on assigned CGX::LMAC. The current code instead of disabling or enabling
+'reception and transmission' also disables/enable the LMAC. This patch
+fixes this issue.
+
+Fixes: 1435f66a28b4 ("octeontx2-af: CGX Rx/Tx enable/disable mbox handlers")
+Signed-off-by: Angela Czubak <aczubak@marvell.com>
+Signed-off-by: Hariprasad Kelam <hkelam@marvell.com>
+Reviewed-by: Leon Romanovsky <leonro@nvidia.com>
+Link: https://lore.kernel.org/r/20230105160107.17638-1-hkelam@marvell.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/marvell/octeontx2/af/cgx.c | 4 ++--
+ drivers/net/ethernet/marvell/octeontx2/af/cgx.h | 1 -
+ 2 files changed, 2 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c
+index 6b335139abe7..fd0a31bf94fe 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c
++++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c
+@@ -695,9 +695,9 @@ int cgx_lmac_rx_tx_enable(void *cgxd, int lmac_id, bool enable)
+       cfg = cgx_read(cgx, lmac_id, CGXX_CMRX_CFG);
+       if (enable)
+-              cfg |= CMR_EN | DATA_PKT_RX_EN | DATA_PKT_TX_EN;
++              cfg |= DATA_PKT_RX_EN | DATA_PKT_TX_EN;
+       else
+-              cfg &= ~(CMR_EN | DATA_PKT_RX_EN | DATA_PKT_TX_EN);
++              cfg &= ~(DATA_PKT_RX_EN | DATA_PKT_TX_EN);
+       cgx_write(cgx, lmac_id, CGXX_CMRX_CFG, cfg);
+       return 0;
+ }
+diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.h b/drivers/net/ethernet/marvell/octeontx2/af/cgx.h
+index ab1e4abdea38..5714280a4252 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.h
++++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.h
+@@ -30,7 +30,6 @@
+ #define CMR_P2X_SEL_SHIFT             59ULL
+ #define CMR_P2X_SEL_NIX0              1ULL
+ #define CMR_P2X_SEL_NIX1              2ULL
+-#define CMR_EN                                BIT_ULL(55)
+ #define DATA_PKT_TX_EN                        BIT_ULL(53)
+ #define DATA_PKT_RX_EN                        BIT_ULL(54)
+ #define CGX_LMAC_TYPE_SHIFT           40
+-- 
+2.35.1
+
diff --git a/queue-5.15/octeontx2-pf-fix-resource-leakage-in-vf-driver-unbin.patch b/queue-5.15/octeontx2-pf-fix-resource-leakage-in-vf-driver-unbin.patch
new file mode 100644 (file)
index 0000000..06098e1
--- /dev/null
@@ -0,0 +1,39 @@
+From e422a25ea9385a41d9691223032679574f6664ce Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 9 Jan 2023 11:43:25 +0530
+Subject: octeontx2-pf: Fix resource leakage in VF driver unbind
+
+From: Hariprasad Kelam <hkelam@marvell.com>
+
+[ Upstream commit 53da7aec32982f5ee775b69dce06d63992ce4af3 ]
+
+resources allocated like mcam entries to support the Ntuple feature
+and hash tables for the tc feature are not getting freed in driver
+unbind. This patch fixes the issue.
+
+Fixes: 2da489432747 ("octeontx2-pf: devlink params support to set mcam entry count")
+Signed-off-by: Hariprasad Kelam <hkelam@marvell.com>
+Signed-off-by: Sunil Kovvuri Goutham <sgoutham@marvell.com>
+Link: https://lore.kernel.org/r/20230109061325.21395-1-hkelam@marvell.com
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c
+index 03b4ec630432..1613638c69a1 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c
++++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c
+@@ -736,6 +736,8 @@ static void otx2vf_remove(struct pci_dev *pdev)
+       unregister_netdev(netdev);
+       if (vf->otx2_wq)
+               destroy_workqueue(vf->otx2_wq);
++      otx2_mcam_flow_del(vf);
++      otx2_shutdown_tc(vf);
+       otx2vf_disable_mbox_intr(vf);
+       otx2_detach_resources(&vf->mbox);
+       if (test_bit(CN10K_LMTST, &vf->hw.cap_flag))
+-- 
+2.35.1
+
diff --git a/queue-5.15/perf-build-properly-guard-libbpf-includes.patch b/queue-5.15/perf-build-properly-guard-libbpf-includes.patch
new file mode 100644 (file)
index 0000000..5af0dae
--- /dev/null
@@ -0,0 +1,80 @@
+From b22a604ae894c964dbca55001e314ce59932690a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 6 Jan 2023 07:13:19 -0800
+Subject: perf build: Properly guard libbpf includes
+
+From: Ian Rogers <irogers@google.com>
+
+[ Upstream commit d891f2b724b39a2a41e3ad7b57110193993242ff ]
+
+Including libbpf header files should be guarded by HAVE_LIBBPF_SUPPORT.
+In bpf_counter.h, move the skeleton utilities under HAVE_BPF_SKEL.
+
+Fixes: d6a735ef3277c45f ("perf bpf_counter: Move common functions to bpf_counter.h")
+Reported-by: Mike Leach <mike.leach@linaro.org>
+Signed-off-by: Ian Rogers <irogers@google.com>
+Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
+Tested-by: Jiri Olsa <jolsa@kernel.org>
+Tested-by: Mike Leach <mike.leach@linaro.org>
+Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Namhyung Kim <namhyung@kernel.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Link: http://lore.kernel.org/lkml/20230105172243.7238-1-mike.leach@linaro.org
+Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/perf/builtin-trace.c    | 2 ++
+ tools/perf/util/bpf_counter.h | 6 ++++++
+ 2 files changed, 8 insertions(+)
+
+diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
+index 2fea9952818f..d9ea546850cd 100644
+--- a/tools/perf/builtin-trace.c
++++ b/tools/perf/builtin-trace.c
+@@ -17,7 +17,9 @@
+ #include "util/record.h"
+ #include <traceevent/event-parse.h>
+ #include <api/fs/tracing_path.h>
++#ifdef HAVE_LIBBPF_SUPPORT
+ #include <bpf/bpf.h>
++#endif
+ #include "util/bpf_map.h"
+ #include "util/rlimit.h"
+ #include "builtin.h"
+diff --git a/tools/perf/util/bpf_counter.h b/tools/perf/util/bpf_counter.h
+index 65ebaa6694fb..4b5dda7530c4 100644
+--- a/tools/perf/util/bpf_counter.h
++++ b/tools/perf/util/bpf_counter.h
+@@ -4,9 +4,12 @@
+ #include <linux/list.h>
+ #include <sys/resource.h>
++
++#ifdef HAVE_LIBBPF_SUPPORT
+ #include <bpf/bpf.h>
+ #include <bpf/btf.h>
+ #include <bpf/libbpf.h>
++#endif
+ struct evsel;
+ struct target;
+@@ -87,6 +90,8 @@ static inline void set_max_rlimit(void)
+       setrlimit(RLIMIT_MEMLOCK, &rinf);
+ }
++#ifdef HAVE_BPF_SKEL
++
+ static inline __u32 bpf_link_get_id(int fd)
+ {
+       struct bpf_link_info link_info = { .id = 0, };
+@@ -127,5 +132,6 @@ static inline int bperf_trigger_reading(int prog_fd, int cpu)
+       return bpf_prog_test_run_opts(prog_fd, &opts);
+ }
++#endif /* HAVE_BPF_SKEL */
+ #endif /* __PERF_BPF_COUNTER_H */
+-- 
+2.35.1
+
diff --git a/queue-5.15/platform-surface-aggregator-add-missing-call-to-ssam.patch b/queue-5.15/platform-surface-aggregator-add-missing-call-to-ssam.patch
new file mode 100644 (file)
index 0000000..4d90a20
--- /dev/null
@@ -0,0 +1,43 @@
+From 304074d2f036c0bf41af15138534a94479b1bb81 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 20 Dec 2022 18:56:07 +0100
+Subject: platform/surface: aggregator: Add missing call to
+ ssam_request_sync_free()
+
+From: Maximilian Luz <luzmaximilian@gmail.com>
+
+[ Upstream commit c965daac370f08a9b71d573a71d13cda76f2a884 ]
+
+Although rare, ssam_request_sync_init() can fail. In that case, the
+request should be freed via ssam_request_sync_free(). Currently it is
+leaked instead. Fix this.
+
+Fixes: c167b9c7e3d6 ("platform/surface: Add Surface Aggregator subsystem")
+Signed-off-by: Maximilian Luz <luzmaximilian@gmail.com>
+Link: https://lore.kernel.org/r/20221220175608.1436273-1-luzmaximilian@gmail.com
+Reviewed-by: Hans de Goede <hdegoede@redhat.com>
+Signed-off-by: Hans de Goede <hdegoede@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/platform/surface/aggregator/controller.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/platform/surface/aggregator/controller.c b/drivers/platform/surface/aggregator/controller.c
+index b8c377b3f932..f23f7128cf2b 100644
+--- a/drivers/platform/surface/aggregator/controller.c
++++ b/drivers/platform/surface/aggregator/controller.c
+@@ -1700,8 +1700,10 @@ int ssam_request_sync(struct ssam_controller *ctrl,
+               return status;
+       status = ssam_request_sync_init(rqst, spec->flags);
+-      if (status)
++      if (status) {
++              ssam_request_sync_free(rqst);
+               return status;
++      }
+       ssam_request_sync_set_resp(rqst, rsp);
+-- 
+2.35.1
+
diff --git a/queue-5.15/regulator-da9211-use-irq-handler-when-ready.patch b/queue-5.15/regulator-da9211-use-irq-handler-when-ready.patch
new file mode 100644 (file)
index 0000000..741b060
--- /dev/null
@@ -0,0 +1,66 @@
+From 763c3a824f2b8455b1b6ee80a448359060ea6199 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 27 Nov 2022 22:06:02 +0100
+Subject: regulator: da9211: Use irq handler when ready
+
+From: Ricardo Ribalda <ribalda@chromium.org>
+
+[ Upstream commit 02228f6aa6a64d588bc31e3267d05ff184d772eb ]
+
+If the system does not come from reset (like when it is kexec()), the
+regulator might have an IRQ waiting for us.
+
+If we enable the IRQ handler before its structures are ready, we crash.
+
+This patch fixes:
+
+[    1.141839] Unable to handle kernel read from unreadable memory at virtual address 0000000000000078
+[    1.316096] Call trace:
+[    1.316101]  blocking_notifier_call_chain+0x20/0xa8
+[    1.322757] cpu cpu0: dummy supplies not allowed for exclusive requests
+[    1.327823]  regulator_notifier_call_chain+0x1c/0x2c
+[    1.327825]  da9211_irq_handler+0x68/0xf8
+[    1.327829]  irq_thread+0x11c/0x234
+[    1.327833]  kthread+0x13c/0x154
+
+Signed-off-by: Ricardo Ribalda <ribalda@chromium.org>
+Reviewed-by: Adam Ward <DLG-Adam.Ward.opensource@dm.renesas.com>
+Link: https://lore.kernel.org/r/20221124-da9211-v2-0-1779e3c5d491@chromium.org
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/regulator/da9211-regulator.c | 11 ++++++-----
+ 1 file changed, 6 insertions(+), 5 deletions(-)
+
+diff --git a/drivers/regulator/da9211-regulator.c b/drivers/regulator/da9211-regulator.c
+index e01b32d1fa17..00828f5baa97 100644
+--- a/drivers/regulator/da9211-regulator.c
++++ b/drivers/regulator/da9211-regulator.c
+@@ -498,6 +498,12 @@ static int da9211_i2c_probe(struct i2c_client *i2c)
+       chip->chip_irq = i2c->irq;
++      ret = da9211_regulator_init(chip);
++      if (ret < 0) {
++              dev_err(chip->dev, "Failed to initialize regulator: %d\n", ret);
++              return ret;
++      }
++
+       if (chip->chip_irq != 0) {
+               ret = devm_request_threaded_irq(chip->dev, chip->chip_irq, NULL,
+                                       da9211_irq_handler,
+@@ -512,11 +518,6 @@ static int da9211_i2c_probe(struct i2c_client *i2c)
+               dev_warn(chip->dev, "No IRQ configured\n");
+       }
+-      ret = da9211_regulator_init(chip);
+-
+-      if (ret < 0)
+-              dev_err(chip->dev, "Failed to initialize regulator: %d\n", ret);
+-
+       return ret;
+ }
+-- 
+2.35.1
+
diff --git a/queue-5.15/scsi-mpi3mr-refer-config_scsi_mpi3mr-in-makefile.patch b/queue-5.15/scsi-mpi3mr-refer-config_scsi_mpi3mr-in-makefile.patch
new file mode 100644 (file)
index 0000000..5256854
--- /dev/null
@@ -0,0 +1,43 @@
+From 3ccf1b64b2d981b238af024ba3d2eb38d6b99b07 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 7 Dec 2022 11:36:59 +0900
+Subject: scsi: mpi3mr: Refer CONFIG_SCSI_MPI3MR in Makefile
+
+From: Shin'ichiro Kawasaki <shinichiro.kawasaki@wdc.com>
+
+[ Upstream commit f0a43ba6c66cc0688e2748d986a1459fdd3442ef ]
+
+When Kconfig item CONFIG_SCSI_MPI3MR was introduced for mpi3mr driver, the
+Makefile of the driver was not modified to refer the Kconfig item.
+
+As a result, mpi3mr.ko is built regardless of the Kconfig item value y or
+m. Also, if 'make localmodconfig' can not find the Kconfig item in the
+Makefile, then it does not generate CONFIG_SCSI_MPI3MR=m even when
+mpi3mr.ko is loaded on the system.
+
+Refer to the Kconfig item to avoid the issues.
+
+Fixes: c4f7ac64616e ("scsi: mpi3mr: Add mpi30 Rev-R headers and Kconfig")
+Signed-off-by: Shin'ichiro Kawasaki <shinichiro.kawasaki@wdc.com>
+Link: https://lore.kernel.org/r/20221207023659.2411785-1-shinichiro.kawasaki@wdc.com
+Reviewed-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
+Acked-by: Sathya Prakash Veerichetty <sathya.prakash@broadcom.com>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/scsi/mpi3mr/Makefile | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/scsi/mpi3mr/Makefile b/drivers/scsi/mpi3mr/Makefile
+index 7c2063e04c81..7ebca0ba538d 100644
+--- a/drivers/scsi/mpi3mr/Makefile
++++ b/drivers/scsi/mpi3mr/Makefile
+@@ -1,4 +1,4 @@
+ # mpi3mr makefile
+-obj-m += mpi3mr.o
++obj-$(CONFIG_SCSI_MPI3MR) += mpi3mr.o
+ mpi3mr-y +=  mpi3mr_os.o     \
+               mpi3mr_fw.o \
+-- 
+2.35.1
+
diff --git a/queue-5.15/scsi-ufs-core-wlun-suspend-ssu-enter-hibern8-fail-re.patch b/queue-5.15/scsi-ufs-core-wlun-suspend-ssu-enter-hibern8-fail-re.patch
new file mode 100644 (file)
index 0000000..98cc9a6
--- /dev/null
@@ -0,0 +1,79 @@
+From 865c0f3ed291f7343c106c1a486ff4820623ecf0 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 8 Dec 2022 15:25:20 +0800
+Subject: scsi: ufs: core: WLUN suspend SSU/enter hibern8 fail recovery
+
+From: Peter Wang <peter.wang@mediatek.com>
+
+[ Upstream commit 1a5665fc8d7a000671ebd3fe69c6f9acf1e0dcd9 ]
+
+When SSU/enter hibern8 fail in WLUN suspend flow, trigger the error handler
+and return busy to break the suspend.  Otherwise the consumer will get
+stuck in runtime suspend status.
+
+Fixes: b294ff3e3449 ("scsi: ufs: core: Enable power management for wlun")
+Signed-off-by: Peter Wang <peter.wang@mediatek.com>
+Link: https://lore.kernel.org/r/20221208072520.26210-1-peter.wang@mediatek.com
+Reviewed-by: Stanley Chu <stanley.chu@mediatek.com>
+Reviewed-by: Bart Van Assche <bvanassche@acm.org>
+Reviewed-by: Adrian Hunter <adrian.hunter@intel.com>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/scsi/ufs/ufshcd.c | 26 ++++++++++++++++++++++++++
+ 1 file changed, 26 insertions(+)
+
+diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
+index 6dd341674110..0b06223f5714 100644
+--- a/drivers/scsi/ufs/ufshcd.c
++++ b/drivers/scsi/ufs/ufshcd.c
+@@ -5909,6 +5909,14 @@ static inline void ufshcd_schedule_eh_work(struct ufs_hba *hba)
+       }
+ }
++static void ufshcd_force_error_recovery(struct ufs_hba *hba)
++{
++      spin_lock_irq(hba->host->host_lock);
++      hba->force_reset = true;
++      ufshcd_schedule_eh_work(hba);
++      spin_unlock_irq(hba->host->host_lock);
++}
++
+ static void ufshcd_clk_scaling_allow(struct ufs_hba *hba, bool allow)
+ {
+       down_write(&hba->clk_scaling_lock);
+@@ -8775,6 +8783,15 @@ static int __ufshcd_wl_suspend(struct ufs_hba *hba, enum ufs_pm_op pm_op)
+               if (!hba->dev_info.b_rpm_dev_flush_capable) {
+                       ret = ufshcd_set_dev_pwr_mode(hba, req_dev_pwr_mode);
++                      if (ret && pm_op != UFS_SHUTDOWN_PM) {
++                              /*
++                               * If return err in suspend flow, IO will hang.
++                               * Trigger error handler and break suspend for
++                               * error recovery.
++                               */
++                              ufshcd_force_error_recovery(hba);
++                              ret = -EBUSY;
++                      }
+                       if (ret)
+                               goto enable_scaling;
+               }
+@@ -8786,6 +8803,15 @@ static int __ufshcd_wl_suspend(struct ufs_hba *hba, enum ufs_pm_op pm_op)
+        */
+       check_for_bkops = !ufshcd_is_ufs_dev_deepsleep(hba);
+       ret = ufshcd_link_state_transition(hba, req_link_state, check_for_bkops);
++      if (ret && pm_op != UFS_SHUTDOWN_PM) {
++              /*
++               * If return err in suspend flow, IO will hang.
++               * Trigger error handler and break suspend for
++               * error recovery.
++               */
++              ufshcd_force_error_recovery(hba);
++              ret = -EBUSY;
++      }
+       if (ret)
+               goto set_dev_active;
+-- 
+2.35.1
+
diff --git a/queue-5.15/scsi-ufs-stop-using-the-clock-scaling-lock-in-the-er.patch b/queue-5.15/scsi-ufs-stop-using-the-clock-scaling-lock-in-the-er.patch
new file mode 100644 (file)
index 0000000..d4e3df7
--- /dev/null
@@ -0,0 +1,66 @@
+From 27b50d4b8710b7699b07650ac8a87bc590e814e0 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 3 Dec 2021 15:19:48 -0800
+Subject: scsi: ufs: Stop using the clock scaling lock in the error handler
+
+From: Bart Van Assche <bvanassche@acm.org>
+
+[ Upstream commit 5675c381ea51360b4968b78f23aefda73e3de90d ]
+
+Instead of locking and unlocking the clock scaling lock, surround the
+command queueing code with an RCU reader lock and call synchronize_rcu().
+This patch prepares for removal of the clock scaling lock.
+
+Link: https://lore.kernel.org/r/20211203231950.193369-16-bvanassche@acm.org
+Tested-by: Bean Huo <beanhuo@micron.com>
+Reviewed-by: Adrian Hunter <adrian.hunter@intel.com>
+Reviewed-by: Bean Huo <beanhuo@micron.com>
+Signed-off-by: Bart Van Assche <bvanassche@acm.org>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Stable-dep-of: 1a5665fc8d7a ("scsi: ufs: core: WLUN suspend SSU/enter hibern8 fail recovery")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/scsi/ufs/ufshcd.c | 12 ++++++++++--
+ 1 file changed, 10 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
+index a428b8145dcc..6dd341674110 100644
+--- a/drivers/scsi/ufs/ufshcd.c
++++ b/drivers/scsi/ufs/ufshcd.c
+@@ -2700,6 +2700,12 @@ static int ufshcd_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd)
+       if (!down_read_trylock(&hba->clk_scaling_lock))
+               return SCSI_MLQUEUE_HOST_BUSY;
++      /*
++       * Allows the UFS error handler to wait for prior ufshcd_queuecommand()
++       * calls.
++       */
++      rcu_read_lock();
++
+       switch (hba->ufshcd_state) {
+       case UFSHCD_STATE_OPERATIONAL:
+       case UFSHCD_STATE_EH_SCHEDULED_NON_FATAL:
+@@ -2766,7 +2772,10 @@ static int ufshcd_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd)
+       }
+       ufshcd_send_command(hba, tag);
++
+ out:
++      rcu_read_unlock();
++
+       up_read(&hba->clk_scaling_lock);
+       if (ufs_trigger_eh()) {
+@@ -5952,8 +5961,7 @@ static void ufshcd_err_handling_prepare(struct ufs_hba *hba)
+       }
+       ufshcd_scsi_block_requests(hba);
+       /* Drain ufshcd_queuecommand() */
+-      down_write(&hba->clk_scaling_lock);
+-      up_write(&hba->clk_scaling_lock);
++      synchronize_rcu();
+       cancel_work_sync(&hba->eeh_work);
+ }
+-- 
+2.35.1
+
index 8c81065c4f0b95ae7bcefd6ec0584d6ed8c64104..0588cd7618e08643ac06627af0ada94fe1718bbf 100644 (file)
@@ -42,3 +42,40 @@ powerpc-imc-pmu-fix-use-of-mutex-in-irqs-disabled-section.patch
 x86-boot-avoid-using-intel-mnemonics-in-at-t-syntax-asm.patch
 edac-device-fix-period-calculation-in-edac_device_reset_delay_period.patch
 x86-resctrl-fix-task-closid-rmid-update-race.patch
+regulator-da9211-use-irq-handler-when-ready.patch
+scsi-mpi3mr-refer-config_scsi_mpi3mr-in-makefile.patch
+scsi-ufs-stop-using-the-clock-scaling-lock-in-the-er.patch
+scsi-ufs-core-wlun-suspend-ssu-enter-hibern8-fail-re.patch
+asoc-wm8904-fix-wrong-outputs-volume-after-power-rea.patch
+alsa-usb-audio-make-sure-to-stop-endpoints-before-cl.patch
+alsa-usb-audio-relax-hw-constraints-for-implicit-fb-.patch
+tipc-fix-unexpected-link-reset-due-to-discovery-mess.patch
+octeontx2-af-fix-lmac-config-in-cgx_lmac_rx_tx_enabl.patch
+hvc-xen-lock-console-list-traversal.patch
+nfc-pn533-wait-for-out_urb-s-completion-in-pn533_usb.patch
+af_unix-selftest-fix-the-size-of-the-parameter-to-co.patch
+tools-nolibc-x86-remove-r8-r9-and-r10-from-the-clobb.patch
+tools-nolibc-x86-64-use-mov-60-eax-instead-of-mov-60.patch
+tools-nolibc-use-pselect6-on-riscv.patch
+tools-nolibc-std-move-the-standard-type-definitions-.patch
+tools-nolibc-types-split-syscall-specific-definition.patch
+tools-nolibc-arch-split-arch-specific-code-into-indi.patch
+tools-nolibc-arch-mark-the-_start-symbol-as-weak.patch
+tools-nolibc-remove-.global-_start-from-the-entry-po.patch
+tools-nolibc-restore-mips-branch-ordering-in-the-_st.patch
+tools-nolibc-fix-the-o_-fcntl-open-macro-definitions.patch
+net-sched-act_mpls-fix-warning-during-failed-attribu.patch
+net-mlx5-fix-ptp-max-frequency-adjustment-range.patch
+net-mlx5e-don-t-support-encap-rules-with-gbp-option.patch
+octeontx2-pf-fix-resource-leakage-in-vf-driver-unbin.patch
+perf-build-properly-guard-libbpf-includes.patch
+igc-fix-pps-delta-between-two-synchronized-end-point.patch
+platform-surface-aggregator-add-missing-call-to-ssam.patch
+mm-always-release-pages-to-the-buddy-allocator-in-me.patch
+documentation-kvm-add-api-issues-section.patch
+kvm-x86-do-not-return-host-topology-information-from.patch
+io_uring-lock-overflowing-for-iopoll.patch
+arm64-atomics-format-whitespace-consistently.patch
+arm64-atomics-remove-ll-sc-trampolines.patch
+arm64-cmpxchg_double-hazard-against-entire-exchange-.patch
+efi-fix-null-deref-in-init-error-path.patch
diff --git a/queue-5.15/tipc-fix-unexpected-link-reset-due-to-discovery-mess.patch b/queue-5.15/tipc-fix-unexpected-link-reset-due-to-discovery-mess.patch
new file mode 100644 (file)
index 0000000..73324bd
--- /dev/null
@@ -0,0 +1,111 @@
+From 3364f02fe9c1830005c21a21ab04cdeea05ff3a0 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 5 Jan 2023 06:02:51 +0000
+Subject: tipc: fix unexpected link reset due to discovery messages
+
+From: Tung Nguyen <tung.q.nguyen@dektech.com.au>
+
+[ Upstream commit c244c092f1ed2acfb5af3d3da81e22367d3dd733 ]
+
+This unexpected behavior is observed:
+
+node 1                    | node 2
+------                    | ------
+link is established       | link is established
+reboot                    | link is reset
+up                        | send discovery message
+receive discovery message |
+link is established       | link is established
+send discovery message    |
+                          | receive discovery message
+                          | link is reset (unexpected)
+                          | send reset message
+link is reset             |
+
+It is due to delayed re-discovery as described in function
+tipc_node_check_dest(): "this link endpoint has already reset
+and re-established contact with the peer, before receiving a
+discovery message from that node."
+
+However, commit 598411d70f85 has changed the condition for calling
+tipc_node_link_down() which was the acceptance of new media address.
+
+This commit fixes this by restoring the old and correct behavior.
+
+Fixes: 598411d70f85 ("tipc: make resetting of links non-atomic")
+Acked-by: Jon Maloy <jmaloy@redhat.com>
+Signed-off-by: Tung Nguyen <tung.q.nguyen@dektech.com.au>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/tipc/node.c | 12 ++++++++----
+ 1 file changed, 8 insertions(+), 4 deletions(-)
+
+diff --git a/net/tipc/node.c b/net/tipc/node.c
+index 49ddc484c4fe..5e000fde8067 100644
+--- a/net/tipc/node.c
++++ b/net/tipc/node.c
+@@ -1179,8 +1179,9 @@ void tipc_node_check_dest(struct net *net, u32 addr,
+       bool addr_match = false;
+       bool sign_match = false;
+       bool link_up = false;
++      bool link_is_reset = false;
+       bool accept_addr = false;
+-      bool reset = true;
++      bool reset = false;
+       char *if_name;
+       unsigned long intv;
+       u16 session;
+@@ -1200,14 +1201,14 @@ void tipc_node_check_dest(struct net *net, u32 addr,
+       /* Prepare to validate requesting node's signature and media address */
+       l = le->link;
+       link_up = l && tipc_link_is_up(l);
++      link_is_reset = l && tipc_link_is_reset(l);
+       addr_match = l && !memcmp(&le->maddr, maddr, sizeof(*maddr));
+       sign_match = (signature == n->signature);
+       /* These three flags give us eight permutations: */
+       if (sign_match && addr_match && link_up) {
+-              /* All is fine. Do nothing. */
+-              reset = false;
++              /* All is fine. Ignore requests. */
+               /* Peer node is not a container/local namespace */
+               if (!n->peer_hash_mix)
+                       n->peer_hash_mix = hash_mixes;
+@@ -1232,6 +1233,7 @@ void tipc_node_check_dest(struct net *net, u32 addr,
+                */
+               accept_addr = true;
+               *respond = true;
++              reset = true;
+       } else if (!sign_match && addr_match && link_up) {
+               /* Peer node rebooted. Two possibilities:
+                *  - Delayed re-discovery; this link endpoint has already
+@@ -1263,6 +1265,7 @@ void tipc_node_check_dest(struct net *net, u32 addr,
+               n->signature = signature;
+               accept_addr = true;
+               *respond = true;
++              reset = true;
+       }
+       if (!accept_addr)
+@@ -1291,6 +1294,7 @@ void tipc_node_check_dest(struct net *net, u32 addr,
+               tipc_link_fsm_evt(l, LINK_RESET_EVT);
+               if (n->state == NODE_FAILINGOVER)
+                       tipc_link_fsm_evt(l, LINK_FAILOVER_BEGIN_EVT);
++              link_is_reset = tipc_link_is_reset(l);
+               le->link = l;
+               n->link_cnt++;
+               tipc_node_calculate_timer(n, l);
+@@ -1303,7 +1307,7 @@ void tipc_node_check_dest(struct net *net, u32 addr,
+       memcpy(&le->maddr, maddr, sizeof(*maddr));
+ exit:
+       tipc_node_write_unlock(n);
+-      if (reset && l && !tipc_link_is_reset(l))
++      if (reset && !link_is_reset)
+               tipc_node_link_down(n, b->identity, false);
+       tipc_node_put(n);
+ }
+-- 
+2.35.1
+
diff --git a/queue-5.15/tools-nolibc-arch-mark-the-_start-symbol-as-weak.patch b/queue-5.15/tools-nolibc-arch-mark-the-_start-symbol-as-weak.patch
new file mode 100644 (file)
index 0000000..2d60958
--- /dev/null
@@ -0,0 +1,100 @@
+From 747c3f52dc5c0f8df4203b93146b2365ec1dd1e7 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 7 Feb 2022 17:23:45 +0100
+Subject: tools/nolibc/arch: mark the _start symbol as weak
+
+From: Willy Tarreau <w@1wt.eu>
+
+[ Upstream commit dffeb81af5fe5eedccf5ea4a8a120d8c3accd26e ]
+
+By doing so we can link together multiple C files that have been compiled
+with nolibc and which each have a _start symbol.
+
+Signed-off-by: Willy Tarreau <w@1wt.eu>
+Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
+Stable-dep-of: 184177c3d6e0 ("tools/nolibc: restore mips branch ordering in the _start block")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/include/nolibc/arch-aarch64.h | 1 +
+ tools/include/nolibc/arch-arm.h     | 1 +
+ tools/include/nolibc/arch-i386.h    | 1 +
+ tools/include/nolibc/arch-mips.h    | 1 +
+ tools/include/nolibc/arch-riscv.h   | 1 +
+ tools/include/nolibc/arch-x86_64.h  | 1 +
+ 6 files changed, 6 insertions(+)
+
+diff --git a/tools/include/nolibc/arch-aarch64.h b/tools/include/nolibc/arch-aarch64.h
+index 443de5fb7f54..87d9e434820c 100644
+--- a/tools/include/nolibc/arch-aarch64.h
++++ b/tools/include/nolibc/arch-aarch64.h
+@@ -183,6 +183,7 @@ struct sys_stat_struct {
+ /* startup code */
+ asm(".section .text\n"
++    ".weak _start\n"
+     ".global _start\n"
+     "_start:\n"
+     "ldr x0, [sp]\n"              // argc (x0) was in the stack
+diff --git a/tools/include/nolibc/arch-arm.h b/tools/include/nolibc/arch-arm.h
+index 66f687ad987f..001a3c8c9ad5 100644
+--- a/tools/include/nolibc/arch-arm.h
++++ b/tools/include/nolibc/arch-arm.h
+@@ -176,6 +176,7 @@ struct sys_stat_struct {
+ /* startup code */
+ asm(".section .text\n"
++    ".weak _start\n"
+     ".global _start\n"
+     "_start:\n"
+ #if defined(__THUMBEB__) || defined(__THUMBEL__)
+diff --git a/tools/include/nolibc/arch-i386.h b/tools/include/nolibc/arch-i386.h
+index 32f42e2cee26..d7e4d53325a3 100644
+--- a/tools/include/nolibc/arch-i386.h
++++ b/tools/include/nolibc/arch-i386.h
+@@ -175,6 +175,7 @@ struct sys_stat_struct {
+  *
+  */
+ asm(".section .text\n"
++    ".weak _start\n"
+     ".global _start\n"
+     "_start:\n"
+     "pop %eax\n"                // argc   (first arg, %eax)
+diff --git a/tools/include/nolibc/arch-mips.h b/tools/include/nolibc/arch-mips.h
+index e330201dde6a..c9a6aac87c6d 100644
+--- a/tools/include/nolibc/arch-mips.h
++++ b/tools/include/nolibc/arch-mips.h
+@@ -190,6 +190,7 @@ struct sys_stat_struct {
+ /* startup code, note that it's called __start on MIPS */
+ asm(".section .text\n"
++    ".weak __start\n"
+     ".set nomips16\n"
+     ".global __start\n"
+     ".set    noreorder\n"
+diff --git a/tools/include/nolibc/arch-riscv.h b/tools/include/nolibc/arch-riscv.h
+index 9d5ff78f606b..bc10b7b5706d 100644
+--- a/tools/include/nolibc/arch-riscv.h
++++ b/tools/include/nolibc/arch-riscv.h
+@@ -184,6 +184,7 @@ struct sys_stat_struct {
+ /* startup code */
+ asm(".section .text\n"
++    ".weak _start\n"
+     ".global _start\n"
+     "_start:\n"
+     ".option push\n"
+diff --git a/tools/include/nolibc/arch-x86_64.h b/tools/include/nolibc/arch-x86_64.h
+index 83c4b458ada7..fe517c16cd4d 100644
+--- a/tools/include/nolibc/arch-x86_64.h
++++ b/tools/include/nolibc/arch-x86_64.h
+@@ -198,6 +198,7 @@ struct sys_stat_struct {
+  *
+  */
+ asm(".section .text\n"
++    ".weak _start\n"
+     ".global _start\n"
+     "_start:\n"
+     "pop %rdi\n"                // argc   (first arg, %rdi)
+-- 
+2.35.1
+
diff --git a/queue-5.15/tools-nolibc-arch-split-arch-specific-code-into-indi.patch b/queue-5.15/tools-nolibc-arch-split-arch-specific-code-into-indi.patch
new file mode 100644 (file)
index 0000000..7c745ff
--- /dev/null
@@ -0,0 +1,2553 @@
+From 4e685afa3b111896a3d6b0bfd69ad09a8483a717 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 7 Feb 2022 17:23:17 +0100
+Subject: tools/nolibc/arch: split arch-specific code into individual files
+
+From: Willy Tarreau <w@1wt.eu>
+
+[ Upstream commit 271661c1cde5ff47eb7af9946866cd66b70dc328 ]
+
+In order to ease maintenance, this splits the arch-specific code into
+one file per architecture. A common file "arch.h" is used to include the
+right file among arch-* based on the detected architecture. Projects
+which are already split per architecture could simply rename these
+files to $arch/arch.h and get rid of the common arch.h. For this
+reason, include guards were placed into each arch-specific file.
+
+Signed-off-by: Willy Tarreau <w@1wt.eu>
+Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
+Stable-dep-of: 184177c3d6e0 ("tools/nolibc: restore mips branch ordering in the _start block")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/include/nolibc/arch-aarch64.h |  199 +++++
+ tools/include/nolibc/arch-arm.h     |  204 +++++
+ tools/include/nolibc/arch-i386.h    |  196 +++++
+ tools/include/nolibc/arch-mips.h    |  215 +++++
+ tools/include/nolibc/arch-riscv.h   |  204 +++++
+ tools/include/nolibc/arch-x86_64.h  |  215 +++++
+ tools/include/nolibc/arch.h         |   32 +
+ tools/include/nolibc/nolibc.h       | 1187 +--------------------------
+ 8 files changed, 1266 insertions(+), 1186 deletions(-)
+ create mode 100644 tools/include/nolibc/arch-aarch64.h
+ create mode 100644 tools/include/nolibc/arch-arm.h
+ create mode 100644 tools/include/nolibc/arch-i386.h
+ create mode 100644 tools/include/nolibc/arch-mips.h
+ create mode 100644 tools/include/nolibc/arch-riscv.h
+ create mode 100644 tools/include/nolibc/arch-x86_64.h
+ create mode 100644 tools/include/nolibc/arch.h
+
+diff --git a/tools/include/nolibc/arch-aarch64.h b/tools/include/nolibc/arch-aarch64.h
+new file mode 100644
+index 000000000000..443de5fb7f54
+--- /dev/null
++++ b/tools/include/nolibc/arch-aarch64.h
+@@ -0,0 +1,199 @@
++/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
++/*
++ * AARCH64 specific definitions for NOLIBC
++ * Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu>
++ */
++
++#ifndef _NOLIBC_ARCH_AARCH64_H
++#define _NOLIBC_ARCH_AARCH64_H
++
++/* O_* macros for fcntl/open are architecture-specific */
++#define O_RDONLY            0
++#define O_WRONLY            1
++#define O_RDWR              2
++#define O_CREAT          0x40
++#define O_EXCL           0x80
++#define O_NOCTTY        0x100
++#define O_TRUNC         0x200
++#define O_APPEND        0x400
++#define O_NONBLOCK      0x800
++#define O_DIRECTORY    0x4000
++
++/* The struct returned by the newfstatat() syscall. Differs slightly from the
++ * x86_64's stat one by field ordering, so be careful.
++ */
++struct sys_stat_struct {
++      unsigned long   st_dev;
++      unsigned long   st_ino;
++      unsigned int    st_mode;
++      unsigned int    st_nlink;
++      unsigned int    st_uid;
++      unsigned int    st_gid;
++
++      unsigned long   st_rdev;
++      unsigned long   __pad1;
++      long            st_size;
++      int             st_blksize;
++      int             __pad2;
++
++      long            st_blocks;
++      long            st_atime;
++      unsigned long   st_atime_nsec;
++      long            st_mtime;
++
++      unsigned long   st_mtime_nsec;
++      long            st_ctime;
++      unsigned long   st_ctime_nsec;
++      unsigned int    __unused[2];
++};
++
++/* Syscalls for AARCH64 :
++ *   - registers are 64-bit
++ *   - stack is 16-byte aligned
++ *   - syscall number is passed in x8
++ *   - arguments are in x0, x1, x2, x3, x4, x5
++ *   - the system call is performed by calling svc 0
++ *   - syscall return comes in x0.
++ *   - the arguments are cast to long and assigned into the target registers
++ *     which are then simply passed as registers to the asm code, so that we
++ *     don't have to experience issues with register constraints.
++ *
++ * On aarch64, select() is not implemented so we have to use pselect6().
++ */
++#define __ARCH_WANT_SYS_PSELECT6
++
++#define my_syscall0(num)                                                      \
++({                                                                            \
++      register long _num  asm("x8") = (num);                                \
++      register long _arg1 asm("x0");                                        \
++                                                                            \
++      asm volatile (                                                        \
++              "svc #0\n"                                                    \
++              : "=r"(_arg1)                                                 \
++              : "r"(_num)                                                   \
++              : "memory", "cc"                                              \
++      );                                                                    \
++      _arg1;                                                                \
++})
++
++#define my_syscall1(num, arg1)                                                \
++({                                                                            \
++      register long _num  asm("x8") = (num);                                \
++      register long _arg1 asm("x0") = (long)(arg1);                         \
++                                                                            \
++      asm volatile (                                                        \
++              "svc #0\n"                                                    \
++              : "=r"(_arg1)                                                 \
++              : "r"(_arg1),                                                 \
++                "r"(_num)                                                   \
++              : "memory", "cc"                                              \
++      );                                                                    \
++      _arg1;                                                                \
++})
++
++#define my_syscall2(num, arg1, arg2)                                          \
++({                                                                            \
++      register long _num  asm("x8") = (num);                                \
++      register long _arg1 asm("x0") = (long)(arg1);                         \
++      register long _arg2 asm("x1") = (long)(arg2);                         \
++                                                                            \
++      asm volatile (                                                        \
++              "svc #0\n"                                                    \
++              : "=r"(_arg1)                                                 \
++              : "r"(_arg1), "r"(_arg2),                                     \
++                "r"(_num)                                                   \
++              : "memory", "cc"                                              \
++      );                                                                    \
++      _arg1;                                                                \
++})
++
++#define my_syscall3(num, arg1, arg2, arg3)                                    \
++({                                                                            \
++      register long _num  asm("x8") = (num);                                \
++      register long _arg1 asm("x0") = (long)(arg1);                         \
++      register long _arg2 asm("x1") = (long)(arg2);                         \
++      register long _arg3 asm("x2") = (long)(arg3);                         \
++                                                                            \
++      asm volatile (                                                        \
++              "svc #0\n"                                                    \
++              : "=r"(_arg1)                                                 \
++              : "r"(_arg1), "r"(_arg2), "r"(_arg3),                         \
++                "r"(_num)                                                   \
++              : "memory", "cc"                                              \
++      );                                                                    \
++      _arg1;                                                                \
++})
++
++#define my_syscall4(num, arg1, arg2, arg3, arg4)                              \
++({                                                                            \
++      register long _num  asm("x8") = (num);                                \
++      register long _arg1 asm("x0") = (long)(arg1);                         \
++      register long _arg2 asm("x1") = (long)(arg2);                         \
++      register long _arg3 asm("x2") = (long)(arg3);                         \
++      register long _arg4 asm("x3") = (long)(arg4);                         \
++                                                                            \
++      asm volatile (                                                        \
++              "svc #0\n"                                                    \
++              : "=r"(_arg1)                                                 \
++              : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4),             \
++                "r"(_num)                                                   \
++              : "memory", "cc"                                              \
++      );                                                                    \
++      _arg1;                                                                \
++})
++
++#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5)                        \
++({                                                                            \
++      register long _num  asm("x8") = (num);                                \
++      register long _arg1 asm("x0") = (long)(arg1);                         \
++      register long _arg2 asm("x1") = (long)(arg2);                         \
++      register long _arg3 asm("x2") = (long)(arg3);                         \
++      register long _arg4 asm("x3") = (long)(arg4);                         \
++      register long _arg5 asm("x4") = (long)(arg5);                         \
++                                                                            \
++      asm volatile (                                                        \
++              "svc #0\n"                                                    \
++              : "=r" (_arg1)                                                \
++              : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \
++                "r"(_num)                                                   \
++              : "memory", "cc"                                              \
++      );                                                                    \
++      _arg1;                                                                \
++})
++
++#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6)                  \
++({                                                                            \
++      register long _num  asm("x8") = (num);                                \
++      register long _arg1 asm("x0") = (long)(arg1);                         \
++      register long _arg2 asm("x1") = (long)(arg2);                         \
++      register long _arg3 asm("x2") = (long)(arg3);                         \
++      register long _arg4 asm("x3") = (long)(arg4);                         \
++      register long _arg5 asm("x4") = (long)(arg5);                         \
++      register long _arg6 asm("x5") = (long)(arg6);                         \
++                                                                            \
++      asm volatile (                                                        \
++              "svc #0\n"                                                    \
++              : "=r" (_arg1)                                                \
++              : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \
++                "r"(_arg6), "r"(_num)                                       \
++              : "memory", "cc"                                              \
++      );                                                                    \
++      _arg1;                                                                \
++})
++
++/* startup code */
++asm(".section .text\n"
++    ".global _start\n"
++    "_start:\n"
++    "ldr x0, [sp]\n"              // argc (x0) was in the stack
++    "add x1, sp, 8\n"             // argv (x1) = sp
++    "lsl x2, x0, 3\n"             // envp (x2) = 8*argc ...
++    "add x2, x2, 8\n"             //           + 8 (skip null)
++    "add x2, x2, x1\n"            //           + argv
++    "and sp, x1, -16\n"           // sp must be 16-byte aligned in the callee
++    "bl main\n"                   // main() returns the status code, we'll exit with it.
++    "mov x8, 93\n"                // NR_exit == 93
++    "svc #0\n"
++    "");
++
++#endif // _NOLIBC_ARCH_AARCH64_H
+diff --git a/tools/include/nolibc/arch-arm.h b/tools/include/nolibc/arch-arm.h
+new file mode 100644
+index 000000000000..66f687ad987f
+--- /dev/null
++++ b/tools/include/nolibc/arch-arm.h
+@@ -0,0 +1,204 @@
++/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
++/*
++ * ARM specific definitions for NOLIBC
++ * Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu>
++ */
++
++#ifndef _NOLIBC_ARCH_ARM_H
++#define _NOLIBC_ARCH_ARM_H
++
++/* O_* macros for fcntl/open are architecture-specific */
++#define O_RDONLY            0
++#define O_WRONLY            1
++#define O_RDWR              2
++#define O_CREAT          0x40
++#define O_EXCL           0x80
++#define O_NOCTTY        0x100
++#define O_TRUNC         0x200
++#define O_APPEND        0x400
++#define O_NONBLOCK      0x800
++#define O_DIRECTORY    0x4000
++
++/* The struct returned by the stat() syscall, 32-bit only, the syscall returns
++ * exactly 56 bytes (stops before the unused array). In big endian, the format
++ * differs as devices are returned as short only.
++ */
++struct sys_stat_struct {
++#if defined(__ARMEB__)
++      unsigned short st_dev;
++      unsigned short __pad1;
++#else
++      unsigned long  st_dev;
++#endif
++      unsigned long  st_ino;
++      unsigned short st_mode;
++      unsigned short st_nlink;
++      unsigned short st_uid;
++      unsigned short st_gid;
++
++#if defined(__ARMEB__)
++      unsigned short st_rdev;
++      unsigned short __pad2;
++#else
++      unsigned long  st_rdev;
++#endif
++      unsigned long  st_size;
++      unsigned long  st_blksize;
++      unsigned long  st_blocks;
++
++      unsigned long  st_atime;
++      unsigned long  st_atime_nsec;
++      unsigned long  st_mtime;
++      unsigned long  st_mtime_nsec;
++
++      unsigned long  st_ctime;
++      unsigned long  st_ctime_nsec;
++      unsigned long  __unused[2];
++};
++
++/* Syscalls for ARM in ARM or Thumb modes :
++ *   - registers are 32-bit
++ *   - stack is 8-byte aligned
++ *     ( http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.faqs/ka4127.html)
++ *   - syscall number is passed in r7
++ *   - arguments are in r0, r1, r2, r3, r4, r5
++ *   - the system call is performed by calling svc #0
++ *   - syscall return comes in r0.
++ *   - only lr is clobbered.
++ *   - the arguments are cast to long and assigned into the target registers
++ *     which are then simply passed as registers to the asm code, so that we
++ *     don't have to experience issues with register constraints.
++ *   - the syscall number is always specified last in order to allow to force
++ *     some registers before (gcc refuses a %-register at the last position).
++ *
++ * Also, ARM supports the old_select syscall if newselect is not available
++ */
++#define __ARCH_WANT_SYS_OLD_SELECT
++
++#define my_syscall0(num)                                                      \
++({                                                                            \
++      register long _num asm("r7") = (num);                                 \
++      register long _arg1 asm("r0");                                        \
++                                                                            \
++      asm volatile (                                                        \
++              "svc #0\n"                                                    \
++              : "=r"(_arg1)                                                 \
++              : "r"(_num)                                                   \
++              : "memory", "cc", "lr"                                        \
++      );                                                                    \
++      _arg1;                                                                \
++})
++
++#define my_syscall1(num, arg1)                                                \
++({                                                                            \
++      register long _num asm("r7") = (num);                                 \
++      register long _arg1 asm("r0") = (long)(arg1);                         \
++                                                                            \
++      asm volatile (                                                        \
++              "svc #0\n"                                                    \
++              : "=r"(_arg1)                                                 \
++              : "r"(_arg1),                                                 \
++                "r"(_num)                                                   \
++              : "memory", "cc", "lr"                                        \
++      );                                                                    \
++      _arg1;                                                                \
++})
++
++#define my_syscall2(num, arg1, arg2)                                          \
++({                                                                            \
++      register long _num asm("r7") = (num);                                 \
++      register long _arg1 asm("r0") = (long)(arg1);                         \
++      register long _arg2 asm("r1") = (long)(arg2);                         \
++                                                                            \
++      asm volatile (                                                        \
++              "svc #0\n"                                                    \
++              : "=r"(_arg1)                                                 \
++              : "r"(_arg1), "r"(_arg2),                                     \
++                "r"(_num)                                                   \
++              : "memory", "cc", "lr"                                        \
++      );                                                                    \
++      _arg1;                                                                \
++})
++
++#define my_syscall3(num, arg1, arg2, arg3)                                    \
++({                                                                            \
++      register long _num asm("r7") = (num);                                 \
++      register long _arg1 asm("r0") = (long)(arg1);                         \
++      register long _arg2 asm("r1") = (long)(arg2);                         \
++      register long _arg3 asm("r2") = (long)(arg3);                         \
++                                                                            \
++      asm volatile (                                                        \
++              "svc #0\n"                                                    \
++              : "=r"(_arg1)                                                 \
++              : "r"(_arg1), "r"(_arg2), "r"(_arg3),                         \
++                "r"(_num)                                                   \
++              : "memory", "cc", "lr"                                        \
++      );                                                                    \
++      _arg1;                                                                \
++})
++
++#define my_syscall4(num, arg1, arg2, arg3, arg4)                              \
++({                                                                            \
++      register long _num asm("r7") = (num);                                 \
++      register long _arg1 asm("r0") = (long)(arg1);                         \
++      register long _arg2 asm("r1") = (long)(arg2);                         \
++      register long _arg3 asm("r2") = (long)(arg3);                         \
++      register long _arg4 asm("r3") = (long)(arg4);                         \
++                                                                            \
++      asm volatile (                                                        \
++              "svc #0\n"                                                    \
++              : "=r"(_arg1)                                                 \
++              : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4),             \
++                "r"(_num)                                                   \
++              : "memory", "cc", "lr"                                        \
++      );                                                                    \
++      _arg1;                                                                \
++})
++
++#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5)                        \
++({                                                                            \
++      register long _num asm("r7") = (num);                                 \
++      register long _arg1 asm("r0") = (long)(arg1);                         \
++      register long _arg2 asm("r1") = (long)(arg2);                         \
++      register long _arg3 asm("r2") = (long)(arg3);                         \
++      register long _arg4 asm("r3") = (long)(arg4);                         \
++      register long _arg5 asm("r4") = (long)(arg5);                         \
++                                                                            \
++      asm volatile (                                                        \
++              "svc #0\n"                                                    \
++              : "=r" (_arg1)                                                \
++              : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \
++                "r"(_num)                                                   \
++              : "memory", "cc", "lr"                                        \
++      );                                                                    \
++      _arg1;                                                                \
++})
++
++/* startup code */
++asm(".section .text\n"
++    ".global _start\n"
++    "_start:\n"
++#if defined(__THUMBEB__) || defined(__THUMBEL__)
++    /* We enter here in 32-bit mode but if some previous functions were in
++     * 16-bit mode, the assembler cannot know, so we need to tell it we're in
++     * 32-bit now, then switch to 16-bit (is there a better way to do it than
++     * adding 1 by hand ?) and tell the asm we're now in 16-bit mode so that
++     * it generates correct instructions. Note that we do not support thumb1.
++     */
++    ".code 32\n"
++    "add     r0, pc, #1\n"
++    "bx      r0\n"
++    ".code 16\n"
++#endif
++    "pop {%r0}\n"                 // argc was in the stack
++    "mov %r1, %sp\n"              // argv = sp
++    "add %r2, %r1, %r0, lsl #2\n" // envp = argv + 4*argc ...
++    "add %r2, %r2, $4\n"          //        ... + 4
++    "and %r3, %r1, $-8\n"         // AAPCS : sp must be 8-byte aligned in the
++    "mov %sp, %r3\n"              //         callee, an bl doesn't push (lr=pc)
++    "bl main\n"                   // main() returns the status code, we'll exit with it.
++    "movs r7, $1\n"               // NR_exit == 1
++    "svc $0x00\n"
++    "");
++
++#endif // _NOLIBC_ARCH_ARM_H
+diff --git a/tools/include/nolibc/arch-i386.h b/tools/include/nolibc/arch-i386.h
+new file mode 100644
+index 000000000000..32f42e2cee26
+--- /dev/null
++++ b/tools/include/nolibc/arch-i386.h
+@@ -0,0 +1,196 @@
++/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
++/*
++ * i386 specific definitions for NOLIBC
++ * Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu>
++ */
++
++#ifndef _NOLIBC_ARCH_I386_H
++#define _NOLIBC_ARCH_I386_H
++
++/* O_* macros for fcntl/open are architecture-specific */
++#define O_RDONLY            0
++#define O_WRONLY            1
++#define O_RDWR              2
++#define O_CREAT          0x40
++#define O_EXCL           0x80
++#define O_NOCTTY        0x100
++#define O_TRUNC         0x200
++#define O_APPEND        0x400
++#define O_NONBLOCK      0x800
++#define O_DIRECTORY   0x10000
++
++/* The struct returned by the stat() syscall, 32-bit only, the syscall returns
++ * exactly 56 bytes (stops before the unused array).
++ */
++struct sys_stat_struct {
++      unsigned long  st_dev;
++      unsigned long  st_ino;
++      unsigned short st_mode;
++      unsigned short st_nlink;
++      unsigned short st_uid;
++      unsigned short st_gid;
++
++      unsigned long  st_rdev;
++      unsigned long  st_size;
++      unsigned long  st_blksize;
++      unsigned long  st_blocks;
++
++      unsigned long  st_atime;
++      unsigned long  st_atime_nsec;
++      unsigned long  st_mtime;
++      unsigned long  st_mtime_nsec;
++
++      unsigned long  st_ctime;
++      unsigned long  st_ctime_nsec;
++      unsigned long  __unused[2];
++};
++
++/* Syscalls for i386 :
++ *   - mostly similar to x86_64
++ *   - registers are 32-bit
++ *   - syscall number is passed in eax
++ *   - arguments are in ebx, ecx, edx, esi, edi, ebp respectively
++ *   - all registers are preserved (except eax of course)
++ *   - the system call is performed by calling int $0x80
++ *   - syscall return comes in eax
++ *   - the arguments are cast to long and assigned into the target registers
++ *     which are then simply passed as registers to the asm code, so that we
++ *     don't have to experience issues with register constraints.
++ *   - the syscall number is always specified last in order to allow to force
++ *     some registers before (gcc refuses a %-register at the last position).
++ *
++ * Also, i386 supports the old_select syscall if newselect is not available
++ */
++#define __ARCH_WANT_SYS_OLD_SELECT
++
++#define my_syscall0(num)                                                      \
++({                                                                            \
++      long _ret;                                                            \
++      register long _num asm("eax") = (num);                                \
++                                                                            \
++      asm volatile (                                                        \
++              "int $0x80\n"                                                 \
++              : "=a" (_ret)                                                 \
++              : "0"(_num)                                                   \
++              : "memory", "cc"                                              \
++      );                                                                    \
++      _ret;                                                                 \
++})
++
++#define my_syscall1(num, arg1)                                                \
++({                                                                            \
++      long _ret;                                                            \
++      register long _num asm("eax") = (num);                                \
++      register long _arg1 asm("ebx") = (long)(arg1);                        \
++                                                                            \
++      asm volatile (                                                        \
++              "int $0x80\n"                                                 \
++              : "=a" (_ret)                                                 \
++              : "r"(_arg1),                                                 \
++                "0"(_num)                                                   \
++              : "memory", "cc"                                              \
++      );                                                                    \
++      _ret;                                                                 \
++})
++
++#define my_syscall2(num, arg1, arg2)                                          \
++({                                                                            \
++      long _ret;                                                            \
++      register long _num asm("eax") = (num);                                \
++      register long _arg1 asm("ebx") = (long)(arg1);                        \
++      register long _arg2 asm("ecx") = (long)(arg2);                        \
++                                                                            \
++      asm volatile (                                                        \
++              "int $0x80\n"                                                 \
++              : "=a" (_ret)                                                 \
++              : "r"(_arg1), "r"(_arg2),                                     \
++                "0"(_num)                                                   \
++              : "memory", "cc"                                              \
++      );                                                                    \
++      _ret;                                                                 \
++})
++
++#define my_syscall3(num, arg1, arg2, arg3)                                    \
++({                                                                            \
++      long _ret;                                                            \
++      register long _num asm("eax") = (num);                                \
++      register long _arg1 asm("ebx") = (long)(arg1);                        \
++      register long _arg2 asm("ecx") = (long)(arg2);                        \
++      register long _arg3 asm("edx") = (long)(arg3);                        \
++                                                                            \
++      asm volatile (                                                        \
++              "int $0x80\n"                                                 \
++              : "=a" (_ret)                                                 \
++              : "r"(_arg1), "r"(_arg2), "r"(_arg3),                         \
++                "0"(_num)                                                   \
++              : "memory", "cc"                                              \
++      );                                                                    \
++      _ret;                                                                 \
++})
++
++#define my_syscall4(num, arg1, arg2, arg3, arg4)                              \
++({                                                                            \
++      long _ret;                                                            \
++      register long _num asm("eax") = (num);                                \
++      register long _arg1 asm("ebx") = (long)(arg1);                        \
++      register long _arg2 asm("ecx") = (long)(arg2);                        \
++      register long _arg3 asm("edx") = (long)(arg3);                        \
++      register long _arg4 asm("esi") = (long)(arg4);                        \
++                                                                            \
++      asm volatile (                                                        \
++              "int $0x80\n"                                                 \
++              : "=a" (_ret)                                                 \
++              : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4),             \
++                "0"(_num)                                                   \
++              : "memory", "cc"                                              \
++      );                                                                    \
++      _ret;                                                                 \
++})
++
++#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5)                        \
++({                                                                            \
++      long _ret;                                                            \
++      register long _num asm("eax") = (num);                                \
++      register long _arg1 asm("ebx") = (long)(arg1);                        \
++      register long _arg2 asm("ecx") = (long)(arg2);                        \
++      register long _arg3 asm("edx") = (long)(arg3);                        \
++      register long _arg4 asm("esi") = (long)(arg4);                        \
++      register long _arg5 asm("edi") = (long)(arg5);                        \
++                                                                            \
++      asm volatile (                                                        \
++              "int $0x80\n"                                                 \
++              : "=a" (_ret)                                                 \
++              : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \
++                "0"(_num)                                                   \
++              : "memory", "cc"                                              \
++      );                                                                    \
++      _ret;                                                                 \
++})
++
++/* startup code */
++/*
++ * i386 System V ABI mandates:
++ * 1) last pushed argument must be 16-byte aligned.
++ * 2) The deepest stack frame should be set to zero
++ *
++ */
++asm(".section .text\n"
++    ".global _start\n"
++    "_start:\n"
++    "pop %eax\n"                // argc   (first arg, %eax)
++    "mov %esp, %ebx\n"          // argv[] (second arg, %ebx)
++    "lea 4(%ebx,%eax,4),%ecx\n" // then a NULL then envp (third arg, %ecx)
++    "xor %ebp, %ebp\n"          // zero the stack frame
++    "and $-16, %esp\n"          // x86 ABI : esp must be 16-byte aligned before
++    "sub $4, %esp\n"            // the call instruction (args are aligned)
++    "push %ecx\n"               // push all registers on the stack so that we
++    "push %ebx\n"               // support both regparm and plain stack modes
++    "push %eax\n"
++    "call main\n"               // main() returns the status code in %eax
++    "mov %eax, %ebx\n"          // retrieve exit code (32-bit int)
++    "movl $1, %eax\n"           // NR_exit == 1
++    "int $0x80\n"               // exit now
++    "hlt\n"                     // ensure it does not
++    "");
++
++#endif // _NOLIBC_ARCH_I386_H
+diff --git a/tools/include/nolibc/arch-mips.h b/tools/include/nolibc/arch-mips.h
+new file mode 100644
+index 000000000000..e330201dde6a
+--- /dev/null
++++ b/tools/include/nolibc/arch-mips.h
+@@ -0,0 +1,215 @@
++/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
++/*
++ * MIPS specific definitions for NOLIBC
++ * Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu>
++ */
++
++#ifndef _NOLIBC_ARCH_MIPS_H
++#define _NOLIBC_ARCH_MIPS_H
++
++/* O_* macros for fcntl/open are architecture-specific */
++#define O_RDONLY            0
++#define O_WRONLY            1
++#define O_RDWR              2
++#define O_APPEND       0x0008
++#define O_NONBLOCK     0x0080
++#define O_CREAT        0x0100
++#define O_TRUNC        0x0200
++#define O_EXCL         0x0400
++#define O_NOCTTY       0x0800
++#define O_DIRECTORY   0x10000
++
++/* The struct returned by the stat() syscall. 88 bytes are returned by the
++ * syscall.
++ */
++struct sys_stat_struct {
++      unsigned int  st_dev;
++      long          st_pad1[3];
++      unsigned long st_ino;
++      unsigned int  st_mode;
++      unsigned int  st_nlink;
++      unsigned int  st_uid;
++      unsigned int  st_gid;
++      unsigned int  st_rdev;
++      long          st_pad2[2];
++      long          st_size;
++      long          st_pad3;
++
++      long          st_atime;
++      long          st_atime_nsec;
++      long          st_mtime;
++      long          st_mtime_nsec;
++
++      long          st_ctime;
++      long          st_ctime_nsec;
++      long          st_blksize;
++      long          st_blocks;
++      long          st_pad4[14];
++};
++
++/* Syscalls for MIPS ABI O32 :
++ *   - WARNING! there's always a delayed slot!
++ *   - WARNING again, the syntax is different, registers take a '$' and numbers
++ *     do not.
++ *   - registers are 32-bit
++ *   - stack is 8-byte aligned
++ *   - syscall number is passed in v0 (starts at 0xfa0).
++ *   - arguments are in a0, a1, a2, a3, then the stack. The caller needs to
++ *     leave some room in the stack for the callee to save a0..a3 if needed.
++ *   - Many registers are clobbered, in fact only a0..a2 and s0..s8 are
++ *     preserved. See: https://www.linux-mips.org/wiki/Syscall as well as
++ *     scall32-o32.S in the kernel sources.
++ *   - the system call is performed by calling "syscall"
++ *   - syscall return comes in v0, and register a3 needs to be checked to know
++ *     if an error occurred, in which case errno is in v0.
++ *   - the arguments are cast to long and assigned into the target registers
++ *     which are then simply passed as registers to the asm code, so that we
++ *     don't have to experience issues with register constraints.
++ */
++
++#define my_syscall0(num)                                                      \
++({                                                                            \
++      register long _num asm("v0") = (num);                                 \
++      register long _arg4 asm("a3");                                        \
++                                                                            \
++      asm volatile (                                                        \
++              "addiu $sp, $sp, -32\n"                                       \
++              "syscall\n"                                                   \
++              "addiu $sp, $sp, 32\n"                                        \
++              : "=r"(_num), "=r"(_arg4)                                     \
++              : "r"(_num)                                                   \
++              : "memory", "cc", "at", "v1", "hi", "lo",                     \
++                "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9"  \
++      );                                                                    \
++      _arg4 ? -_num : _num;                                                 \
++})
++
++#define my_syscall1(num, arg1)                                                \
++({                                                                            \
++      register long _num asm("v0") = (num);                                 \
++      register long _arg1 asm("a0") = (long)(arg1);                         \
++      register long _arg4 asm("a3");                                        \
++                                                                            \
++      asm volatile (                                                        \
++              "addiu $sp, $sp, -32\n"                                       \
++              "syscall\n"                                                   \
++              "addiu $sp, $sp, 32\n"                                        \
++              : "=r"(_num), "=r"(_arg4)                                     \
++              : "0"(_num),                                                  \
++                "r"(_arg1)                                                  \
++              : "memory", "cc", "at", "v1", "hi", "lo",                     \
++                "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9"  \
++      );                                                                    \
++      _arg4 ? -_num : _num;                                                 \
++})
++
++#define my_syscall2(num, arg1, arg2)                                          \
++({                                                                            \
++      register long _num asm("v0") = (num);                                 \
++      register long _arg1 asm("a0") = (long)(arg1);                         \
++      register long _arg2 asm("a1") = (long)(arg2);                         \
++      register long _arg4 asm("a3");                                        \
++                                                                            \
++      asm volatile (                                                        \
++              "addiu $sp, $sp, -32\n"                                       \
++              "syscall\n"                                                   \
++              "addiu $sp, $sp, 32\n"                                        \
++              : "=r"(_num), "=r"(_arg4)                                     \
++              : "0"(_num),                                                  \
++                "r"(_arg1), "r"(_arg2)                                      \
++              : "memory", "cc", "at", "v1", "hi", "lo",                     \
++                "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9"  \
++      );                                                                    \
++      _arg4 ? -_num : _num;                                                 \
++})
++
++#define my_syscall3(num, arg1, arg2, arg3)                                    \
++({                                                                            \
++      register long _num asm("v0")  = (num);                                \
++      register long _arg1 asm("a0") = (long)(arg1);                         \
++      register long _arg2 asm("a1") = (long)(arg2);                         \
++      register long _arg3 asm("a2") = (long)(arg3);                         \
++      register long _arg4 asm("a3");                                        \
++                                                                            \
++      asm volatile (                                                        \
++              "addiu $sp, $sp, -32\n"                                       \
++              "syscall\n"                                                   \
++              "addiu $sp, $sp, 32\n"                                        \
++              : "=r"(_num), "=r"(_arg4)                                     \
++              : "0"(_num),                                                  \
++                "r"(_arg1), "r"(_arg2), "r"(_arg3)                          \
++              : "memory", "cc", "at", "v1", "hi", "lo",                     \
++                "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9"  \
++      );                                                                    \
++      _arg4 ? -_num : _num;                                                 \
++})
++
++#define my_syscall4(num, arg1, arg2, arg3, arg4)                              \
++({                                                                            \
++      register long _num asm("v0") = (num);                                 \
++      register long _arg1 asm("a0") = (long)(arg1);                         \
++      register long _arg2 asm("a1") = (long)(arg2);                         \
++      register long _arg3 asm("a2") = (long)(arg3);                         \
++      register long _arg4 asm("a3") = (long)(arg4);                         \
++                                                                            \
++      asm volatile (                                                        \
++              "addiu $sp, $sp, -32\n"                                       \
++              "syscall\n"                                                   \
++              "addiu $sp, $sp, 32\n"                                        \
++              : "=r" (_num), "=r"(_arg4)                                    \
++              : "0"(_num),                                                  \
++                "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4)              \
++              : "memory", "cc", "at", "v1", "hi", "lo",                     \
++                "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9"  \
++      );                                                                    \
++      _arg4 ? -_num : _num;                                                 \
++})
++
++#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5)                        \
++({                                                                            \
++      register long _num asm("v0") = (num);                                 \
++      register long _arg1 asm("a0") = (long)(arg1);                         \
++      register long _arg2 asm("a1") = (long)(arg2);                         \
++      register long _arg3 asm("a2") = (long)(arg3);                         \
++      register long _arg4 asm("a3") = (long)(arg4);                         \
++      register long _arg5 = (long)(arg5);                                   \
++                                                                            \
++      asm volatile (                                                        \
++              "addiu $sp, $sp, -32\n"                                       \
++              "sw %7, 16($sp)\n"                                            \
++              "syscall\n  "                                                 \
++              "addiu $sp, $sp, 32\n"                                        \
++              : "=r" (_num), "=r"(_arg4)                                    \
++              : "0"(_num),                                                  \
++                "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5)  \
++              : "memory", "cc", "at", "v1", "hi", "lo",                     \
++                "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9"  \
++      );                                                                    \
++      _arg4 ? -_num : _num;                                                 \
++})
++
++/* startup code, note that it's called __start on MIPS */
++asm(".section .text\n"
++    ".set nomips16\n"
++    ".global __start\n"
++    ".set    noreorder\n"
++    ".option pic0\n"
++    ".ent __start\n"
++    "__start:\n"
++    "lw $a0,($sp)\n"              // argc was in the stack
++    "addiu  $a1, $sp, 4\n"        // argv = sp + 4
++    "sll $a2, $a0, 2\n"           // a2 = argc * 4
++    "add   $a2, $a2, $a1\n"       // envp = argv + 4*argc ...
++    "addiu $a2, $a2, 4\n"         //        ... + 4
++    "li $t0, -8\n"
++    "and $sp, $sp, $t0\n"         // sp must be 8-byte aligned
++    "addiu $sp,$sp,-16\n"         // the callee expects to save a0..a3 there!
++    "jal main\n"                  // main() returns the status code, we'll exit with it.
++    "nop\n"                       // delayed slot
++    "move $a0, $v0\n"             // retrieve 32-bit exit code from v0
++    "li $v0, 4001\n"              // NR_exit == 4001
++    "syscall\n"
++    ".end __start\n"
++    "");
++
++#endif // _NOLIBC_ARCH_MIPS_H
+diff --git a/tools/include/nolibc/arch-riscv.h b/tools/include/nolibc/arch-riscv.h
+new file mode 100644
+index 000000000000..9d5ff78f606b
+--- /dev/null
++++ b/tools/include/nolibc/arch-riscv.h
+@@ -0,0 +1,204 @@
++/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
++/*
++ * RISCV (32 and 64) specific definitions for NOLIBC
++ * Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu>
++ */
++
++#ifndef _NOLIBC_ARCH_RISCV_H
++#define _NOLIBC_ARCH_RISCV_H
++
++/* O_* macros for fcntl/open are architecture-specific */
++#define O_RDONLY            0
++#define O_WRONLY            1
++#define O_RDWR              2
++#define O_CREAT         0x100
++#define O_EXCL          0x200
++#define O_NOCTTY        0x400
++#define O_TRUNC        0x1000
++#define O_APPEND       0x2000
++#define O_NONBLOCK     0x4000
++#define O_DIRECTORY  0x200000
++
++struct sys_stat_struct {
++      unsigned long   st_dev;         /* Device.  */
++      unsigned long   st_ino;         /* File serial number.  */
++      unsigned int    st_mode;        /* File mode.  */
++      unsigned int    st_nlink;       /* Link count.  */
++      unsigned int    st_uid;         /* User ID of the file's owner.  */
++      unsigned int    st_gid;         /* Group ID of the file's group. */
++      unsigned long   st_rdev;        /* Device number, if device.  */
++      unsigned long   __pad1;
++      long            st_size;        /* Size of file, in bytes.  */
++      int             st_blksize;     /* Optimal block size for I/O.  */
++      int             __pad2;
++      long            st_blocks;      /* Number 512-byte blocks allocated. */
++      long            st_atime;       /* Time of last access.  */
++      unsigned long   st_atime_nsec;
++      long            st_mtime;       /* Time of last modification.  */
++      unsigned long   st_mtime_nsec;
++      long            st_ctime;       /* Time of last status change.  */
++      unsigned long   st_ctime_nsec;
++      unsigned int    __unused4;
++      unsigned int    __unused5;
++};
++
++#if   __riscv_xlen == 64
++#define PTRLOG "3"
++#define SZREG  "8"
++#elif __riscv_xlen == 32
++#define PTRLOG "2"
++#define SZREG  "4"
++#endif
++
++/* Syscalls for RISCV :
++ *   - stack is 16-byte aligned
++ *   - syscall number is passed in a7
++ *   - arguments are in a0, a1, a2, a3, a4, a5
++ *   - the system call is performed by calling ecall
++ *   - syscall return comes in a0
++ *   - the arguments are cast to long and assigned into the target
++ *     registers which are then simply passed as registers to the asm code,
++ *     so that we don't have to experience issues with register constraints.
++ *
++ * On riscv, select() is not implemented so we have to use pselect6().
++ */
++#define __ARCH_WANT_SYS_PSELECT6
++
++#define my_syscall0(num)                                                      \
++({                                                                            \
++      register long _num  asm("a7") = (num);                                \
++      register long _arg1 asm("a0");                                        \
++                                                                            \
++      asm volatile (                                                        \
++              "ecall\n\t"                                                   \
++              : "=r"(_arg1)                                                 \
++              : "r"(_num)                                                   \
++              : "memory", "cc"                                              \
++      );                                                                    \
++      _arg1;                                                                \
++})
++
++#define my_syscall1(num, arg1)                                                \
++({                                                                            \
++      register long _num  asm("a7") = (num);                                \
++      register long _arg1 asm("a0") = (long)(arg1);                         \
++                                                                            \
++      asm volatile (                                                        \
++              "ecall\n"                                                     \
++              : "+r"(_arg1)                                                 \
++              : "r"(_num)                                                   \
++              : "memory", "cc"                                              \
++      );                                                                    \
++      _arg1;                                                                \
++})
++
++#define my_syscall2(num, arg1, arg2)                                          \
++({                                                                            \
++      register long _num  asm("a7") = (num);                                \
++      register long _arg1 asm("a0") = (long)(arg1);                         \
++      register long _arg2 asm("a1") = (long)(arg2);                         \
++                                                                            \
++      asm volatile (                                                        \
++              "ecall\n"                                                     \
++              : "+r"(_arg1)                                                 \
++              : "r"(_arg2),                                                 \
++                "r"(_num)                                                   \
++              : "memory", "cc"                                              \
++      );                                                                    \
++      _arg1;                                                                \
++})
++
++#define my_syscall3(num, arg1, arg2, arg3)                                    \
++({                                                                            \
++      register long _num  asm("a7") = (num);                                \
++      register long _arg1 asm("a0") = (long)(arg1);                         \
++      register long _arg2 asm("a1") = (long)(arg2);                         \
++      register long _arg3 asm("a2") = (long)(arg3);                         \
++                                                                            \
++      asm volatile (                                                        \
++              "ecall\n\t"                                                   \
++              : "+r"(_arg1)                                                 \
++              : "r"(_arg2), "r"(_arg3),                                     \
++                "r"(_num)                                                   \
++              : "memory", "cc"                                              \
++      );                                                                    \
++      _arg1;                                                                \
++})
++
++#define my_syscall4(num, arg1, arg2, arg3, arg4)                              \
++({                                                                            \
++      register long _num  asm("a7") = (num);                                \
++      register long _arg1 asm("a0") = (long)(arg1);                         \
++      register long _arg2 asm("a1") = (long)(arg2);                         \
++      register long _arg3 asm("a2") = (long)(arg3);                         \
++      register long _arg4 asm("a3") = (long)(arg4);                         \
++                                                                            \
++      asm volatile (                                                        \
++              "ecall\n"                                                     \
++              : "+r"(_arg1)                                                 \
++              : "r"(_arg2), "r"(_arg3), "r"(_arg4),                         \
++                "r"(_num)                                                   \
++              : "memory", "cc"                                              \
++      );                                                                    \
++      _arg1;                                                                \
++})
++
++#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5)                        \
++({                                                                            \
++      register long _num  asm("a7") = (num);                                \
++      register long _arg1 asm("a0") = (long)(arg1);                         \
++      register long _arg2 asm("a1") = (long)(arg2);                         \
++      register long _arg3 asm("a2") = (long)(arg3);                         \
++      register long _arg4 asm("a3") = (long)(arg4);                         \
++      register long _arg5 asm("a4") = (long)(arg5);                         \
++                                                                            \
++      asm volatile (                                                        \
++              "ecall\n"                                                     \
++              : "+r"(_arg1)                                                 \
++              : "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5),             \
++                "r"(_num)                                                   \
++              : "memory", "cc"                                              \
++      );                                                                    \
++      _arg1;                                                                \
++})
++
++#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6)                  \
++({                                                                            \
++      register long _num  asm("a7") = (num);                                \
++      register long _arg1 asm("a0") = (long)(arg1);                         \
++      register long _arg2 asm("a1") = (long)(arg2);                         \
++      register long _arg3 asm("a2") = (long)(arg3);                         \
++      register long _arg4 asm("a3") = (long)(arg4);                         \
++      register long _arg5 asm("a4") = (long)(arg5);                         \
++      register long _arg6 asm("a5") = (long)(arg6);                         \
++                                                                            \
++      asm volatile (                                                        \
++              "ecall\n"                                                     \
++              : "+r"(_arg1)                                                 \
++              : "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), "r"(_arg6), \
++                "r"(_num)                                                   \
++              : "memory", "cc"                                              \
++      );                                                                    \
++      _arg1;                                                                \
++})
++
++/* startup code */
++asm(".section .text\n"
++    ".global _start\n"
++    "_start:\n"
++    ".option push\n"
++    ".option norelax\n"
++    "lla   gp, __global_pointer$\n"
++    ".option pop\n"
++    "ld    a0, 0(sp)\n"          // argc (a0) was in the stack
++    "add   a1, sp, "SZREG"\n"    // argv (a1) = sp
++    "slli  a2, a0, "PTRLOG"\n"   // envp (a2) = SZREG*argc ...
++    "add   a2, a2, "SZREG"\n"    //             + SZREG (skip null)
++    "add   a2,a2,a1\n"           //             + argv
++    "andi  sp,a1,-16\n"          // sp must be 16-byte aligned
++    "call  main\n"               // main() returns the status code, we'll exit with it.
++    "li a7, 93\n"                // NR_exit == 93
++    "ecall\n"
++    "");
++
++#endif // _NOLIBC_ARCH_RISCV_H
+diff --git a/tools/include/nolibc/arch-x86_64.h b/tools/include/nolibc/arch-x86_64.h
+new file mode 100644
+index 000000000000..83c4b458ada7
+--- /dev/null
++++ b/tools/include/nolibc/arch-x86_64.h
+@@ -0,0 +1,215 @@
++/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
++/*
++ * x86_64 specific definitions for NOLIBC
++ * Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu>
++ */
++
++#ifndef _NOLIBC_ARCH_X86_64_H
++#define _NOLIBC_ARCH_X86_64_H
++
++/* O_* macros for fcntl/open are architecture-specific */
++#define O_RDONLY            0
++#define O_WRONLY            1
++#define O_RDWR              2
++#define O_CREAT          0x40
++#define O_EXCL           0x80
++#define O_NOCTTY        0x100
++#define O_TRUNC         0x200
++#define O_APPEND        0x400
++#define O_NONBLOCK      0x800
++#define O_DIRECTORY   0x10000
++
++/* The struct returned by the stat() syscall, equivalent to stat64(). The
++ * syscall returns 116 bytes and stops in the middle of __unused.
++ */
++struct sys_stat_struct {
++      unsigned long st_dev;
++      unsigned long st_ino;
++      unsigned long st_nlink;
++      unsigned int  st_mode;
++      unsigned int  st_uid;
++
++      unsigned int  st_gid;
++      unsigned int  __pad0;
++      unsigned long st_rdev;
++      long          st_size;
++      long          st_blksize;
++
++      long          st_blocks;
++      unsigned long st_atime;
++      unsigned long st_atime_nsec;
++      unsigned long st_mtime;
++
++      unsigned long st_mtime_nsec;
++      unsigned long st_ctime;
++      unsigned long st_ctime_nsec;
++      long          __unused[3];
++};
++
++/* Syscalls for x86_64 :
++ *   - registers are 64-bit
++ *   - syscall number is passed in rax
++ *   - arguments are in rdi, rsi, rdx, r10, r8, r9 respectively
++ *   - the system call is performed by calling the syscall instruction
++ *   - syscall return comes in rax
++ *   - rcx and r11 are clobbered, others are preserved.
++ *   - the arguments are cast to long and assigned into the target registers
++ *     which are then simply passed as registers to the asm code, so that we
++ *     don't have to experience issues with register constraints.
++ *   - the syscall number is always specified last in order to allow to force
++ *     some registers before (gcc refuses a %-register at the last position).
++ *   - see also x86-64 ABI section A.2 AMD64 Linux Kernel Conventions, A.2.1
++ *     Calling Conventions.
++ *
++ * Link x86-64 ABI: https://gitlab.com/x86-psABIs/x86-64-ABI/-/wikis/x86-64-psABI
++ *
++ */
++
++#define my_syscall0(num)                                                      \
++({                                                                            \
++      long _ret;                                                            \
++      register long _num  asm("rax") = (num);                               \
++                                                                            \
++      asm volatile (                                                        \
++              "syscall\n"                                                   \
++              : "=a"(_ret)                                                  \
++              : "0"(_num)                                                   \
++              : "rcx", "r11", "memory", "cc"                                \
++      );                                                                    \
++      _ret;                                                                 \
++})
++
++#define my_syscall1(num, arg1)                                                \
++({                                                                            \
++      long _ret;                                                            \
++      register long _num  asm("rax") = (num);                               \
++      register long _arg1 asm("rdi") = (long)(arg1);                        \
++                                                                            \
++      asm volatile (                                                        \
++              "syscall\n"                                                   \
++              : "=a"(_ret)                                                  \
++              : "r"(_arg1),                                                 \
++                "0"(_num)                                                   \
++              : "rcx", "r11", "memory", "cc"                                \
++      );                                                                    \
++      _ret;                                                                 \
++})
++
++#define my_syscall2(num, arg1, arg2)                                          \
++({                                                                            \
++      long _ret;                                                            \
++      register long _num  asm("rax") = (num);                               \
++      register long _arg1 asm("rdi") = (long)(arg1);                        \
++      register long _arg2 asm("rsi") = (long)(arg2);                        \
++                                                                            \
++      asm volatile (                                                        \
++              "syscall\n"                                                   \
++              : "=a"(_ret)                                                  \
++              : "r"(_arg1), "r"(_arg2),                                     \
++                "0"(_num)                                                   \
++              : "rcx", "r11", "memory", "cc"                                \
++      );                                                                    \
++      _ret;                                                                 \
++})
++
++#define my_syscall3(num, arg1, arg2, arg3)                                    \
++({                                                                            \
++      long _ret;                                                            \
++      register long _num  asm("rax") = (num);                               \
++      register long _arg1 asm("rdi") = (long)(arg1);                        \
++      register long _arg2 asm("rsi") = (long)(arg2);                        \
++      register long _arg3 asm("rdx") = (long)(arg3);                        \
++                                                                            \
++      asm volatile (                                                        \
++              "syscall\n"                                                   \
++              : "=a"(_ret)                                                  \
++              : "r"(_arg1), "r"(_arg2), "r"(_arg3),                         \
++                "0"(_num)                                                   \
++              : "rcx", "r11", "memory", "cc"                                \
++      );                                                                    \
++      _ret;                                                                 \
++})
++
++#define my_syscall4(num, arg1, arg2, arg3, arg4)                              \
++({                                                                            \
++      long _ret;                                                            \
++      register long _num  asm("rax") = (num);                               \
++      register long _arg1 asm("rdi") = (long)(arg1);                        \
++      register long _arg2 asm("rsi") = (long)(arg2);                        \
++      register long _arg3 asm("rdx") = (long)(arg3);                        \
++      register long _arg4 asm("r10") = (long)(arg4);                        \
++                                                                            \
++      asm volatile (                                                        \
++              "syscall\n"                                                   \
++              : "=a"(_ret)                                                  \
++              : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4),             \
++                "0"(_num)                                                   \
++              : "rcx", "r11", "memory", "cc"                                \
++      );                                                                    \
++      _ret;                                                                 \
++})
++
++#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5)                        \
++({                                                                            \
++      long _ret;                                                            \
++      register long _num  asm("rax") = (num);                               \
++      register long _arg1 asm("rdi") = (long)(arg1);                        \
++      register long _arg2 asm("rsi") = (long)(arg2);                        \
++      register long _arg3 asm("rdx") = (long)(arg3);                        \
++      register long _arg4 asm("r10") = (long)(arg4);                        \
++      register long _arg5 asm("r8")  = (long)(arg5);                        \
++                                                                            \
++      asm volatile (                                                        \
++              "syscall\n"                                                   \
++              : "=a"(_ret)                                                  \
++              : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \
++                "0"(_num)                                                   \
++              : "rcx", "r11", "memory", "cc"                                \
++      );                                                                    \
++      _ret;                                                                 \
++})
++
++#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6)                  \
++({                                                                            \
++      long _ret;                                                            \
++      register long _num  asm("rax") = (num);                               \
++      register long _arg1 asm("rdi") = (long)(arg1);                        \
++      register long _arg2 asm("rsi") = (long)(arg2);                        \
++      register long _arg3 asm("rdx") = (long)(arg3);                        \
++      register long _arg4 asm("r10") = (long)(arg4);                        \
++      register long _arg5 asm("r8")  = (long)(arg5);                        \
++      register long _arg6 asm("r9")  = (long)(arg6);                        \
++                                                                            \
++      asm volatile (                                                        \
++              "syscall\n"                                                   \
++              : "=a"(_ret)                                                  \
++              : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \
++                "r"(_arg6), "0"(_num)                                       \
++              : "rcx", "r11", "memory", "cc"                                \
++      );                                                                    \
++      _ret;                                                                 \
++})
++
++/* startup code */
++/*
++ * x86-64 System V ABI mandates:
++ * 1) %rsp must be 16-byte aligned right before the function call.
++ * 2) The deepest stack frame should be zero (the %rbp).
++ *
++ */
++asm(".section .text\n"
++    ".global _start\n"
++    "_start:\n"
++    "pop %rdi\n"                // argc   (first arg, %rdi)
++    "mov %rsp, %rsi\n"          // argv[] (second arg, %rsi)
++    "lea 8(%rsi,%rdi,8),%rdx\n" // then a NULL then envp (third arg, %rdx)
++    "xor %ebp, %ebp\n"          // zero the stack frame
++    "and $-16, %rsp\n"          // x86 ABI : esp must be 16-byte aligned before call
++    "call main\n"               // main() returns the status code, we'll exit with it.
++    "mov %eax, %edi\n"          // retrieve exit code (32 bit)
++    "mov $60, %eax\n"           // NR_exit == 60
++    "syscall\n"                 // really exit
++    "hlt\n"                     // ensure it does not return
++    "");
++
++#endif // _NOLIBC_ARCH_X86_64_H
+diff --git a/tools/include/nolibc/arch.h b/tools/include/nolibc/arch.h
+new file mode 100644
+index 000000000000..4c6992321b0d
+--- /dev/null
++++ b/tools/include/nolibc/arch.h
+@@ -0,0 +1,32 @@
++/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
++/*
++ * Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu>
++ */
++
++/* Below comes the architecture-specific code. For each architecture, we have
++ * the syscall declarations and the _start code definition. This is the only
++ * global part. On all architectures the kernel puts everything in the stack
++ * before jumping to _start just above us, without any return address (_start
++ * is not a function but an entry pint). So at the stack pointer we find argc.
++ * Then argv[] begins, and ends at the first NULL. Then we have envp which
++ * starts and ends with a NULL as well. So envp=argv+argc+1.
++ */
++
++#ifndef _NOLIBC_ARCH_H
++#define _NOLIBC_ARCH_H
++
++#if defined(__x86_64__)
++#include "arch-x86_64.h"
++#elif defined(__i386__) || defined(__i486__) || defined(__i586__) || defined(__i686__)
++#include "arch-i386.h"
++#elif defined(__ARM_EABI__)
++#include "arch-arm.h"
++#elif defined(__aarch64__)
++#include "arch-aarch64.h"
++#elif defined(__mips__) && defined(_ABIO32)
++#include "arch-mips.h"
++#elif defined(__riscv)
++#include "arch-riscv.h"
++#endif
++
++#endif /* _NOLIBC_ARCH_H */
+diff --git a/tools/include/nolibc/nolibc.h b/tools/include/nolibc/nolibc.h
+index fbfc02aa99c3..d272b721dc51 100644
+--- a/tools/include/nolibc/nolibc.h
++++ b/tools/include/nolibc/nolibc.h
+@@ -92,6 +92,7 @@
+ #include <linux/fs.h>
+ #include <linux/loop.h>
+ #include <linux/time.h>
++#include "arch.h"
+ #include "types.h"
+ /* Used by programs to avoid std includes */
+@@ -111,1192 +112,6 @@ static int errno;
+  */
+ #define MAX_ERRNO 4095
+-/* Below comes the architecture-specific code. For each architecture, we have
+- * the syscall declarations and the _start code definition. This is the only
+- * global part. On all architectures the kernel puts everything in the stack
+- * before jumping to _start just above us, without any return address (_start
+- * is not a function but an entry pint). So at the stack pointer we find argc.
+- * Then argv[] begins, and ends at the first NULL. Then we have envp which
+- * starts and ends with a NULL as well. So envp=argv+argc+1.
+- */
+-
+-#if defined(__x86_64__)
+-/* Syscalls for x86_64 :
+- *   - registers are 64-bit
+- *   - syscall number is passed in rax
+- *   - arguments are in rdi, rsi, rdx, r10, r8, r9 respectively
+- *   - the system call is performed by calling the syscall instruction
+- *   - syscall return comes in rax
+- *   - rcx and r11 are clobbered, others are preserved.
+- *   - the arguments are cast to long and assigned into the target registers
+- *     which are then simply passed as registers to the asm code, so that we
+- *     don't have to experience issues with register constraints.
+- *   - the syscall number is always specified last in order to allow to force
+- *     some registers before (gcc refuses a %-register at the last position).
+- *   - see also x86-64 ABI section A.2 AMD64 Linux Kernel Conventions, A.2.1
+- *     Calling Conventions.
+- *
+- * Link x86-64 ABI: https://gitlab.com/x86-psABIs/x86-64-ABI/-/wikis/x86-64-psABI
+- *
+- */
+-
+-#define my_syscall0(num)                                                      \
+-({                                                                            \
+-      long _ret;                                                            \
+-      register long _num  asm("rax") = (num);                               \
+-                                                                            \
+-      asm volatile (                                                        \
+-              "syscall\n"                                                   \
+-              : "=a"(_ret)                                                  \
+-              : "0"(_num)                                                   \
+-              : "rcx", "r11", "memory", "cc"                                \
+-      );                                                                    \
+-      _ret;                                                                 \
+-})
+-
+-#define my_syscall1(num, arg1)                                                \
+-({                                                                            \
+-      long _ret;                                                            \
+-      register long _num  asm("rax") = (num);                               \
+-      register long _arg1 asm("rdi") = (long)(arg1);                        \
+-                                                                            \
+-      asm volatile (                                                        \
+-              "syscall\n"                                                   \
+-              : "=a"(_ret)                                                  \
+-              : "r"(_arg1),                                                 \
+-                "0"(_num)                                                   \
+-              : "rcx", "r11", "memory", "cc"                                \
+-      );                                                                    \
+-      _ret;                                                                 \
+-})
+-
+-#define my_syscall2(num, arg1, arg2)                                          \
+-({                                                                            \
+-      long _ret;                                                            \
+-      register long _num  asm("rax") = (num);                               \
+-      register long _arg1 asm("rdi") = (long)(arg1);                        \
+-      register long _arg2 asm("rsi") = (long)(arg2);                        \
+-                                                                            \
+-      asm volatile (                                                        \
+-              "syscall\n"                                                   \
+-              : "=a"(_ret)                                                  \
+-              : "r"(_arg1), "r"(_arg2),                                     \
+-                "0"(_num)                                                   \
+-              : "rcx", "r11", "memory", "cc"                                \
+-      );                                                                    \
+-      _ret;                                                                 \
+-})
+-
+-#define my_syscall3(num, arg1, arg2, arg3)                                    \
+-({                                                                            \
+-      long _ret;                                                            \
+-      register long _num  asm("rax") = (num);                               \
+-      register long _arg1 asm("rdi") = (long)(arg1);                        \
+-      register long _arg2 asm("rsi") = (long)(arg2);                        \
+-      register long _arg3 asm("rdx") = (long)(arg3);                        \
+-                                                                            \
+-      asm volatile (                                                        \
+-              "syscall\n"                                                   \
+-              : "=a"(_ret)                                                  \
+-              : "r"(_arg1), "r"(_arg2), "r"(_arg3),                         \
+-                "0"(_num)                                                   \
+-              : "rcx", "r11", "memory", "cc"                                \
+-      );                                                                    \
+-      _ret;                                                                 \
+-})
+-
+-#define my_syscall4(num, arg1, arg2, arg3, arg4)                              \
+-({                                                                            \
+-      long _ret;                                                            \
+-      register long _num  asm("rax") = (num);                               \
+-      register long _arg1 asm("rdi") = (long)(arg1);                        \
+-      register long _arg2 asm("rsi") = (long)(arg2);                        \
+-      register long _arg3 asm("rdx") = (long)(arg3);                        \
+-      register long _arg4 asm("r10") = (long)(arg4);                        \
+-                                                                            \
+-      asm volatile (                                                        \
+-              "syscall\n"                                                   \
+-              : "=a"(_ret)                                                  \
+-              : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4),             \
+-                "0"(_num)                                                   \
+-              : "rcx", "r11", "memory", "cc"                                \
+-      );                                                                    \
+-      _ret;                                                                 \
+-})
+-
+-#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5)                        \
+-({                                                                            \
+-      long _ret;                                                            \
+-      register long _num  asm("rax") = (num);                               \
+-      register long _arg1 asm("rdi") = (long)(arg1);                        \
+-      register long _arg2 asm("rsi") = (long)(arg2);                        \
+-      register long _arg3 asm("rdx") = (long)(arg3);                        \
+-      register long _arg4 asm("r10") = (long)(arg4);                        \
+-      register long _arg5 asm("r8")  = (long)(arg5);                        \
+-                                                                            \
+-      asm volatile (                                                        \
+-              "syscall\n"                                                   \
+-              : "=a"(_ret)                                                  \
+-              : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \
+-                "0"(_num)                                                   \
+-              : "rcx", "r11", "memory", "cc"                                \
+-      );                                                                    \
+-      _ret;                                                                 \
+-})
+-
+-#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6)                  \
+-({                                                                            \
+-      long _ret;                                                            \
+-      register long _num  asm("rax") = (num);                               \
+-      register long _arg1 asm("rdi") = (long)(arg1);                        \
+-      register long _arg2 asm("rsi") = (long)(arg2);                        \
+-      register long _arg3 asm("rdx") = (long)(arg3);                        \
+-      register long _arg4 asm("r10") = (long)(arg4);                        \
+-      register long _arg5 asm("r8")  = (long)(arg5);                        \
+-      register long _arg6 asm("r9")  = (long)(arg6);                        \
+-                                                                            \
+-      asm volatile (                                                        \
+-              "syscall\n"                                                   \
+-              : "=a"(_ret)                                                  \
+-              : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \
+-                "r"(_arg6), "0"(_num)                                       \
+-              : "rcx", "r11", "memory", "cc"                                \
+-      );                                                                    \
+-      _ret;                                                                 \
+-})
+-
+-/* startup code */
+-/*
+- * x86-64 System V ABI mandates:
+- * 1) %rsp must be 16-byte aligned right before the function call.
+- * 2) The deepest stack frame should be zero (the %rbp).
+- *
+- */
+-asm(".section .text\n"
+-    ".global _start\n"
+-    "_start:\n"
+-    "pop %rdi\n"                // argc   (first arg, %rdi)
+-    "mov %rsp, %rsi\n"          // argv[] (second arg, %rsi)
+-    "lea 8(%rsi,%rdi,8),%rdx\n" // then a NULL then envp (third arg, %rdx)
+-    "xor %ebp, %ebp\n"          // zero the stack frame
+-    "and $-16, %rsp\n"          // x86 ABI : esp must be 16-byte aligned before call
+-    "call main\n"               // main() returns the status code, we'll exit with it.
+-    "mov %eax, %edi\n"          // retrieve exit code (32 bit)
+-    "mov $60, %eax\n"           // NR_exit == 60
+-    "syscall\n"                 // really exit
+-    "hlt\n"                     // ensure it does not return
+-    "");
+-
+-/* fcntl / open */
+-#define O_RDONLY            0
+-#define O_WRONLY            1
+-#define O_RDWR              2
+-#define O_CREAT          0x40
+-#define O_EXCL           0x80
+-#define O_NOCTTY        0x100
+-#define O_TRUNC         0x200
+-#define O_APPEND        0x400
+-#define O_NONBLOCK      0x800
+-#define O_DIRECTORY   0x10000
+-
+-/* The struct returned by the stat() syscall, equivalent to stat64(). The
+- * syscall returns 116 bytes and stops in the middle of __unused.
+- */
+-struct sys_stat_struct {
+-      unsigned long st_dev;
+-      unsigned long st_ino;
+-      unsigned long st_nlink;
+-      unsigned int  st_mode;
+-      unsigned int  st_uid;
+-
+-      unsigned int  st_gid;
+-      unsigned int  __pad0;
+-      unsigned long st_rdev;
+-      long          st_size;
+-      long          st_blksize;
+-
+-      long          st_blocks;
+-      unsigned long st_atime;
+-      unsigned long st_atime_nsec;
+-      unsigned long st_mtime;
+-
+-      unsigned long st_mtime_nsec;
+-      unsigned long st_ctime;
+-      unsigned long st_ctime_nsec;
+-      long          __unused[3];
+-};
+-
+-#elif defined(__i386__) || defined(__i486__) || defined(__i586__) || defined(__i686__)
+-/* Syscalls for i386 :
+- *   - mostly similar to x86_64
+- *   - registers are 32-bit
+- *   - syscall number is passed in eax
+- *   - arguments are in ebx, ecx, edx, esi, edi, ebp respectively
+- *   - all registers are preserved (except eax of course)
+- *   - the system call is performed by calling int $0x80
+- *   - syscall return comes in eax
+- *   - the arguments are cast to long and assigned into the target registers
+- *     which are then simply passed as registers to the asm code, so that we
+- *     don't have to experience issues with register constraints.
+- *   - the syscall number is always specified last in order to allow to force
+- *     some registers before (gcc refuses a %-register at the last position).
+- *
+- * Also, i386 supports the old_select syscall if newselect is not available
+- */
+-#define __ARCH_WANT_SYS_OLD_SELECT
+-
+-#define my_syscall0(num)                                                      \
+-({                                                                            \
+-      long _ret;                                                            \
+-      register long _num asm("eax") = (num);                                \
+-                                                                            \
+-      asm volatile (                                                        \
+-              "int $0x80\n"                                                 \
+-              : "=a" (_ret)                                                 \
+-              : "0"(_num)                                                   \
+-              : "memory", "cc"                                              \
+-      );                                                                    \
+-      _ret;                                                                 \
+-})
+-
+-#define my_syscall1(num, arg1)                                                \
+-({                                                                            \
+-      long _ret;                                                            \
+-      register long _num asm("eax") = (num);                                \
+-      register long _arg1 asm("ebx") = (long)(arg1);                        \
+-                                                                            \
+-      asm volatile (                                                        \
+-              "int $0x80\n"                                                 \
+-              : "=a" (_ret)                                                 \
+-              : "r"(_arg1),                                                 \
+-                "0"(_num)                                                   \
+-              : "memory", "cc"                                              \
+-      );                                                                    \
+-      _ret;                                                                 \
+-})
+-
+-#define my_syscall2(num, arg1, arg2)                                          \
+-({                                                                            \
+-      long _ret;                                                            \
+-      register long _num asm("eax") = (num);                                \
+-      register long _arg1 asm("ebx") = (long)(arg1);                        \
+-      register long _arg2 asm("ecx") = (long)(arg2);                        \
+-                                                                            \
+-      asm volatile (                                                        \
+-              "int $0x80\n"                                                 \
+-              : "=a" (_ret)                                                 \
+-              : "r"(_arg1), "r"(_arg2),                                     \
+-                "0"(_num)                                                   \
+-              : "memory", "cc"                                              \
+-      );                                                                    \
+-      _ret;                                                                 \
+-})
+-
+-#define my_syscall3(num, arg1, arg2, arg3)                                    \
+-({                                                                            \
+-      long _ret;                                                            \
+-      register long _num asm("eax") = (num);                                \
+-      register long _arg1 asm("ebx") = (long)(arg1);                        \
+-      register long _arg2 asm("ecx") = (long)(arg2);                        \
+-      register long _arg3 asm("edx") = (long)(arg3);                        \
+-                                                                            \
+-      asm volatile (                                                        \
+-              "int $0x80\n"                                                 \
+-              : "=a" (_ret)                                                 \
+-              : "r"(_arg1), "r"(_arg2), "r"(_arg3),                         \
+-                "0"(_num)                                                   \
+-              : "memory", "cc"                                              \
+-      );                                                                    \
+-      _ret;                                                                 \
+-})
+-
+-#define my_syscall4(num, arg1, arg2, arg3, arg4)                              \
+-({                                                                            \
+-      long _ret;                                                            \
+-      register long _num asm("eax") = (num);                                \
+-      register long _arg1 asm("ebx") = (long)(arg1);                        \
+-      register long _arg2 asm("ecx") = (long)(arg2);                        \
+-      register long _arg3 asm("edx") = (long)(arg3);                        \
+-      register long _arg4 asm("esi") = (long)(arg4);                        \
+-                                                                            \
+-      asm volatile (                                                        \
+-              "int $0x80\n"                                                 \
+-              : "=a" (_ret)                                                 \
+-              : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4),             \
+-                "0"(_num)                                                   \
+-              : "memory", "cc"                                              \
+-      );                                                                    \
+-      _ret;                                                                 \
+-})
+-
+-#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5)                        \
+-({                                                                            \
+-      long _ret;                                                            \
+-      register long _num asm("eax") = (num);                                \
+-      register long _arg1 asm("ebx") = (long)(arg1);                        \
+-      register long _arg2 asm("ecx") = (long)(arg2);                        \
+-      register long _arg3 asm("edx") = (long)(arg3);                        \
+-      register long _arg4 asm("esi") = (long)(arg4);                        \
+-      register long _arg5 asm("edi") = (long)(arg5);                        \
+-                                                                            \
+-      asm volatile (                                                        \
+-              "int $0x80\n"                                                 \
+-              : "=a" (_ret)                                                 \
+-              : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \
+-                "0"(_num)                                                   \
+-              : "memory", "cc"                                              \
+-      );                                                                    \
+-      _ret;                                                                 \
+-})
+-
+-/* startup code */
+-/*
+- * i386 System V ABI mandates:
+- * 1) last pushed argument must be 16-byte aligned.
+- * 2) The deepest stack frame should be set to zero
+- *
+- */
+-asm(".section .text\n"
+-    ".global _start\n"
+-    "_start:\n"
+-    "pop %eax\n"                // argc   (first arg, %eax)
+-    "mov %esp, %ebx\n"          // argv[] (second arg, %ebx)
+-    "lea 4(%ebx,%eax,4),%ecx\n" // then a NULL then envp (third arg, %ecx)
+-    "xor %ebp, %ebp\n"          // zero the stack frame
+-    "and $-16, %esp\n"          // x86 ABI : esp must be 16-byte aligned before
+-    "sub $4, %esp\n"            // the call instruction (args are aligned)
+-    "push %ecx\n"               // push all registers on the stack so that we
+-    "push %ebx\n"               // support both regparm and plain stack modes
+-    "push %eax\n"
+-    "call main\n"               // main() returns the status code in %eax
+-    "mov %eax, %ebx\n"          // retrieve exit code (32-bit int)
+-    "movl $1, %eax\n"           // NR_exit == 1
+-    "int $0x80\n"               // exit now
+-    "hlt\n"                     // ensure it does not
+-    "");
+-
+-/* fcntl / open */
+-#define O_RDONLY            0
+-#define O_WRONLY            1
+-#define O_RDWR              2
+-#define O_CREAT          0x40
+-#define O_EXCL           0x80
+-#define O_NOCTTY        0x100
+-#define O_TRUNC         0x200
+-#define O_APPEND        0x400
+-#define O_NONBLOCK      0x800
+-#define O_DIRECTORY   0x10000
+-
+-/* The struct returned by the stat() syscall, 32-bit only, the syscall returns
+- * exactly 56 bytes (stops before the unused array).
+- */
+-struct sys_stat_struct {
+-      unsigned long  st_dev;
+-      unsigned long  st_ino;
+-      unsigned short st_mode;
+-      unsigned short st_nlink;
+-      unsigned short st_uid;
+-      unsigned short st_gid;
+-
+-      unsigned long  st_rdev;
+-      unsigned long  st_size;
+-      unsigned long  st_blksize;
+-      unsigned long  st_blocks;
+-
+-      unsigned long  st_atime;
+-      unsigned long  st_atime_nsec;
+-      unsigned long  st_mtime;
+-      unsigned long  st_mtime_nsec;
+-
+-      unsigned long  st_ctime;
+-      unsigned long  st_ctime_nsec;
+-      unsigned long  __unused[2];
+-};
+-
+-#elif defined(__ARM_EABI__)
+-/* Syscalls for ARM in ARM or Thumb modes :
+- *   - registers are 32-bit
+- *   - stack is 8-byte aligned
+- *     ( http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.faqs/ka4127.html)
+- *   - syscall number is passed in r7
+- *   - arguments are in r0, r1, r2, r3, r4, r5
+- *   - the system call is performed by calling svc #0
+- *   - syscall return comes in r0.
+- *   - only lr is clobbered.
+- *   - the arguments are cast to long and assigned into the target registers
+- *     which are then simply passed as registers to the asm code, so that we
+- *     don't have to experience issues with register constraints.
+- *   - the syscall number is always specified last in order to allow to force
+- *     some registers before (gcc refuses a %-register at the last position).
+- *
+- * Also, ARM supports the old_select syscall if newselect is not available
+- */
+-#define __ARCH_WANT_SYS_OLD_SELECT
+-
+-#define my_syscall0(num)                                                      \
+-({                                                                            \
+-      register long _num asm("r7") = (num);                                 \
+-      register long _arg1 asm("r0");                                        \
+-                                                                            \
+-      asm volatile (                                                        \
+-              "svc #0\n"                                                    \
+-              : "=r"(_arg1)                                                 \
+-              : "r"(_num)                                                   \
+-              : "memory", "cc", "lr"                                        \
+-      );                                                                    \
+-      _arg1;                                                                \
+-})
+-
+-#define my_syscall1(num, arg1)                                                \
+-({                                                                            \
+-      register long _num asm("r7") = (num);                                 \
+-      register long _arg1 asm("r0") = (long)(arg1);                         \
+-                                                                            \
+-      asm volatile (                                                        \
+-              "svc #0\n"                                                    \
+-              : "=r"(_arg1)                                                 \
+-              : "r"(_arg1),                                                 \
+-                "r"(_num)                                                   \
+-              : "memory", "cc", "lr"                                        \
+-      );                                                                    \
+-      _arg1;                                                                \
+-})
+-
+-#define my_syscall2(num, arg1, arg2)                                          \
+-({                                                                            \
+-      register long _num asm("r7") = (num);                                 \
+-      register long _arg1 asm("r0") = (long)(arg1);                         \
+-      register long _arg2 asm("r1") = (long)(arg2);                         \
+-                                                                            \
+-      asm volatile (                                                        \
+-              "svc #0\n"                                                    \
+-              : "=r"(_arg1)                                                 \
+-              : "r"(_arg1), "r"(_arg2),                                     \
+-                "r"(_num)                                                   \
+-              : "memory", "cc", "lr"                                        \
+-      );                                                                    \
+-      _arg1;                                                                \
+-})
+-
+-#define my_syscall3(num, arg1, arg2, arg3)                                    \
+-({                                                                            \
+-      register long _num asm("r7") = (num);                                 \
+-      register long _arg1 asm("r0") = (long)(arg1);                         \
+-      register long _arg2 asm("r1") = (long)(arg2);                         \
+-      register long _arg3 asm("r2") = (long)(arg3);                         \
+-                                                                            \
+-      asm volatile (                                                        \
+-              "svc #0\n"                                                    \
+-              : "=r"(_arg1)                                                 \
+-              : "r"(_arg1), "r"(_arg2), "r"(_arg3),                         \
+-                "r"(_num)                                                   \
+-              : "memory", "cc", "lr"                                        \
+-      );                                                                    \
+-      _arg1;                                                                \
+-})
+-
+-#define my_syscall4(num, arg1, arg2, arg3, arg4)                              \
+-({                                                                            \
+-      register long _num asm("r7") = (num);                                 \
+-      register long _arg1 asm("r0") = (long)(arg1);                         \
+-      register long _arg2 asm("r1") = (long)(arg2);                         \
+-      register long _arg3 asm("r2") = (long)(arg3);                         \
+-      register long _arg4 asm("r3") = (long)(arg4);                         \
+-                                                                            \
+-      asm volatile (                                                        \
+-              "svc #0\n"                                                    \
+-              : "=r"(_arg1)                                                 \
+-              : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4),             \
+-                "r"(_num)                                                   \
+-              : "memory", "cc", "lr"                                        \
+-      );                                                                    \
+-      _arg1;                                                                \
+-})
+-
+-#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5)                        \
+-({                                                                            \
+-      register long _num asm("r7") = (num);                                 \
+-      register long _arg1 asm("r0") = (long)(arg1);                         \
+-      register long _arg2 asm("r1") = (long)(arg2);                         \
+-      register long _arg3 asm("r2") = (long)(arg3);                         \
+-      register long _arg4 asm("r3") = (long)(arg4);                         \
+-      register long _arg5 asm("r4") = (long)(arg5);                         \
+-                                                                            \
+-      asm volatile (                                                        \
+-              "svc #0\n"                                                    \
+-              : "=r" (_arg1)                                                \
+-              : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \
+-                "r"(_num)                                                   \
+-              : "memory", "cc", "lr"                                        \
+-      );                                                                    \
+-      _arg1;                                                                \
+-})
+-
+-/* startup code */
+-asm(".section .text\n"
+-    ".global _start\n"
+-    "_start:\n"
+-#if defined(__THUMBEB__) || defined(__THUMBEL__)
+-    /* We enter here in 32-bit mode but if some previous functions were in
+-     * 16-bit mode, the assembler cannot know, so we need to tell it we're in
+-     * 32-bit now, then switch to 16-bit (is there a better way to do it than
+-     * adding 1 by hand ?) and tell the asm we're now in 16-bit mode so that
+-     * it generates correct instructions. Note that we do not support thumb1.
+-     */
+-    ".code 32\n"
+-    "add     r0, pc, #1\n"
+-    "bx      r0\n"
+-    ".code 16\n"
+-#endif
+-    "pop {%r0}\n"                 // argc was in the stack
+-    "mov %r1, %sp\n"              // argv = sp
+-    "add %r2, %r1, %r0, lsl #2\n" // envp = argv + 4*argc ...
+-    "add %r2, %r2, $4\n"          //        ... + 4
+-    "and %r3, %r1, $-8\n"         // AAPCS : sp must be 8-byte aligned in the
+-    "mov %sp, %r3\n"              //         callee, an bl doesn't push (lr=pc)
+-    "bl main\n"                   // main() returns the status code, we'll exit with it.
+-    "movs r7, $1\n"               // NR_exit == 1
+-    "svc $0x00\n"
+-    "");
+-
+-/* fcntl / open */
+-#define O_RDONLY            0
+-#define O_WRONLY            1
+-#define O_RDWR              2
+-#define O_CREAT          0x40
+-#define O_EXCL           0x80
+-#define O_NOCTTY        0x100
+-#define O_TRUNC         0x200
+-#define O_APPEND        0x400
+-#define O_NONBLOCK      0x800
+-#define O_DIRECTORY    0x4000
+-
+-/* The struct returned by the stat() syscall, 32-bit only, the syscall returns
+- * exactly 56 bytes (stops before the unused array). In big endian, the format
+- * differs as devices are returned as short only.
+- */
+-struct sys_stat_struct {
+-#if defined(__ARMEB__)
+-      unsigned short st_dev;
+-      unsigned short __pad1;
+-#else
+-      unsigned long  st_dev;
+-#endif
+-      unsigned long  st_ino;
+-      unsigned short st_mode;
+-      unsigned short st_nlink;
+-      unsigned short st_uid;
+-      unsigned short st_gid;
+-#if defined(__ARMEB__)
+-      unsigned short st_rdev;
+-      unsigned short __pad2;
+-#else
+-      unsigned long  st_rdev;
+-#endif
+-      unsigned long  st_size;
+-      unsigned long  st_blksize;
+-      unsigned long  st_blocks;
+-      unsigned long  st_atime;
+-      unsigned long  st_atime_nsec;
+-      unsigned long  st_mtime;
+-      unsigned long  st_mtime_nsec;
+-      unsigned long  st_ctime;
+-      unsigned long  st_ctime_nsec;
+-      unsigned long  __unused[2];
+-};
+-
+-#elif defined(__aarch64__)
+-/* Syscalls for AARCH64 :
+- *   - registers are 64-bit
+- *   - stack is 16-byte aligned
+- *   - syscall number is passed in x8
+- *   - arguments are in x0, x1, x2, x3, x4, x5
+- *   - the system call is performed by calling svc 0
+- *   - syscall return comes in x0.
+- *   - the arguments are cast to long and assigned into the target registers
+- *     which are then simply passed as registers to the asm code, so that we
+- *     don't have to experience issues with register constraints.
+- *
+- * On aarch64, select() is not implemented so we have to use pselect6().
+- */
+-#define __ARCH_WANT_SYS_PSELECT6
+-
+-#define my_syscall0(num)                                                      \
+-({                                                                            \
+-      register long _num  asm("x8") = (num);                                \
+-      register long _arg1 asm("x0");                                        \
+-                                                                            \
+-      asm volatile (                                                        \
+-              "svc #0\n"                                                    \
+-              : "=r"(_arg1)                                                 \
+-              : "r"(_num)                                                   \
+-              : "memory", "cc"                                              \
+-      );                                                                    \
+-      _arg1;                                                                \
+-})
+-
+-#define my_syscall1(num, arg1)                                                \
+-({                                                                            \
+-      register long _num  asm("x8") = (num);                                \
+-      register long _arg1 asm("x0") = (long)(arg1);                         \
+-                                                                            \
+-      asm volatile (                                                        \
+-              "svc #0\n"                                                    \
+-              : "=r"(_arg1)                                                 \
+-              : "r"(_arg1),                                                 \
+-                "r"(_num)                                                   \
+-              : "memory", "cc"                                              \
+-      );                                                                    \
+-      _arg1;                                                                \
+-})
+-
+-#define my_syscall2(num, arg1, arg2)                                          \
+-({                                                                            \
+-      register long _num  asm("x8") = (num);                                \
+-      register long _arg1 asm("x0") = (long)(arg1);                         \
+-      register long _arg2 asm("x1") = (long)(arg2);                         \
+-                                                                            \
+-      asm volatile (                                                        \
+-              "svc #0\n"                                                    \
+-              : "=r"(_arg1)                                                 \
+-              : "r"(_arg1), "r"(_arg2),                                     \
+-                "r"(_num)                                                   \
+-              : "memory", "cc"                                              \
+-      );                                                                    \
+-      _arg1;                                                                \
+-})
+-
+-#define my_syscall3(num, arg1, arg2, arg3)                                    \
+-({                                                                            \
+-      register long _num  asm("x8") = (num);                                \
+-      register long _arg1 asm("x0") = (long)(arg1);                         \
+-      register long _arg2 asm("x1") = (long)(arg2);                         \
+-      register long _arg3 asm("x2") = (long)(arg3);                         \
+-                                                                            \
+-      asm volatile (                                                        \
+-              "svc #0\n"                                                    \
+-              : "=r"(_arg1)                                                 \
+-              : "r"(_arg1), "r"(_arg2), "r"(_arg3),                         \
+-                "r"(_num)                                                   \
+-              : "memory", "cc"                                              \
+-      );                                                                    \
+-      _arg1;                                                                \
+-})
+-
+-#define my_syscall4(num, arg1, arg2, arg3, arg4)                              \
+-({                                                                            \
+-      register long _num  asm("x8") = (num);                                \
+-      register long _arg1 asm("x0") = (long)(arg1);                         \
+-      register long _arg2 asm("x1") = (long)(arg2);                         \
+-      register long _arg3 asm("x2") = (long)(arg3);                         \
+-      register long _arg4 asm("x3") = (long)(arg4);                         \
+-                                                                            \
+-      asm volatile (                                                        \
+-              "svc #0\n"                                                    \
+-              : "=r"(_arg1)                                                 \
+-              : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4),             \
+-                "r"(_num)                                                   \
+-              : "memory", "cc"                                              \
+-      );                                                                    \
+-      _arg1;                                                                \
+-})
+-
+-#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5)                        \
+-({                                                                            \
+-      register long _num  asm("x8") = (num);                                \
+-      register long _arg1 asm("x0") = (long)(arg1);                         \
+-      register long _arg2 asm("x1") = (long)(arg2);                         \
+-      register long _arg3 asm("x2") = (long)(arg3);                         \
+-      register long _arg4 asm("x3") = (long)(arg4);                         \
+-      register long _arg5 asm("x4") = (long)(arg5);                         \
+-                                                                            \
+-      asm volatile (                                                        \
+-              "svc #0\n"                                                    \
+-              : "=r" (_arg1)                                                \
+-              : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \
+-                "r"(_num)                                                   \
+-              : "memory", "cc"                                              \
+-      );                                                                    \
+-      _arg1;                                                                \
+-})
+-
+-#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6)                  \
+-({                                                                            \
+-      register long _num  asm("x8") = (num);                                \
+-      register long _arg1 asm("x0") = (long)(arg1);                         \
+-      register long _arg2 asm("x1") = (long)(arg2);                         \
+-      register long _arg3 asm("x2") = (long)(arg3);                         \
+-      register long _arg4 asm("x3") = (long)(arg4);                         \
+-      register long _arg5 asm("x4") = (long)(arg5);                         \
+-      register long _arg6 asm("x5") = (long)(arg6);                         \
+-                                                                            \
+-      asm volatile (                                                        \
+-              "svc #0\n"                                                    \
+-              : "=r" (_arg1)                                                \
+-              : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \
+-                "r"(_arg6), "r"(_num)                                       \
+-              : "memory", "cc"                                              \
+-      );                                                                    \
+-      _arg1;                                                                \
+-})
+-
+-/* startup code */
+-asm(".section .text\n"
+-    ".global _start\n"
+-    "_start:\n"
+-    "ldr x0, [sp]\n"              // argc (x0) was in the stack
+-    "add x1, sp, 8\n"             // argv (x1) = sp
+-    "lsl x2, x0, 3\n"             // envp (x2) = 8*argc ...
+-    "add x2, x2, 8\n"             //           + 8 (skip null)
+-    "add x2, x2, x1\n"            //           + argv
+-    "and sp, x1, -16\n"           // sp must be 16-byte aligned in the callee
+-    "bl main\n"                   // main() returns the status code, we'll exit with it.
+-    "mov x8, 93\n"                // NR_exit == 93
+-    "svc #0\n"
+-    "");
+-
+-/* fcntl / open */
+-#define O_RDONLY            0
+-#define O_WRONLY            1
+-#define O_RDWR              2
+-#define O_CREAT          0x40
+-#define O_EXCL           0x80
+-#define O_NOCTTY        0x100
+-#define O_TRUNC         0x200
+-#define O_APPEND        0x400
+-#define O_NONBLOCK      0x800
+-#define O_DIRECTORY    0x4000
+-
+-/* The struct returned by the newfstatat() syscall. Differs slightly from the
+- * x86_64's stat one by field ordering, so be careful.
+- */
+-struct sys_stat_struct {
+-      unsigned long   st_dev;
+-      unsigned long   st_ino;
+-      unsigned int    st_mode;
+-      unsigned int    st_nlink;
+-      unsigned int    st_uid;
+-      unsigned int    st_gid;
+-
+-      unsigned long   st_rdev;
+-      unsigned long   __pad1;
+-      long            st_size;
+-      int             st_blksize;
+-      int             __pad2;
+-
+-      long            st_blocks;
+-      long            st_atime;
+-      unsigned long   st_atime_nsec;
+-      long            st_mtime;
+-
+-      unsigned long   st_mtime_nsec;
+-      long            st_ctime;
+-      unsigned long   st_ctime_nsec;
+-      unsigned int    __unused[2];
+-};
+-
+-#elif defined(__mips__) && defined(_ABIO32)
+-/* Syscalls for MIPS ABI O32 :
+- *   - WARNING! there's always a delayed slot!
+- *   - WARNING again, the syntax is different, registers take a '$' and numbers
+- *     do not.
+- *   - registers are 32-bit
+- *   - stack is 8-byte aligned
+- *   - syscall number is passed in v0 (starts at 0xfa0).
+- *   - arguments are in a0, a1, a2, a3, then the stack. The caller needs to
+- *     leave some room in the stack for the callee to save a0..a3 if needed.
+- *   - Many registers are clobbered, in fact only a0..a2 and s0..s8 are
+- *     preserved. See: https://www.linux-mips.org/wiki/Syscall as well as
+- *     scall32-o32.S in the kernel sources.
+- *   - the system call is performed by calling "syscall"
+- *   - syscall return comes in v0, and register a3 needs to be checked to know
+- *     if an error occurred, in which case errno is in v0.
+- *   - the arguments are cast to long and assigned into the target registers
+- *     which are then simply passed as registers to the asm code, so that we
+- *     don't have to experience issues with register constraints.
+- */
+-
+-#define my_syscall0(num)                                                      \
+-({                                                                            \
+-      register long _num asm("v0") = (num);                                 \
+-      register long _arg4 asm("a3");                                        \
+-                                                                            \
+-      asm volatile (                                                        \
+-              "addiu $sp, $sp, -32\n"                                       \
+-              "syscall\n"                                                   \
+-              "addiu $sp, $sp, 32\n"                                        \
+-              : "=r"(_num), "=r"(_arg4)                                     \
+-              : "r"(_num)                                                   \
+-              : "memory", "cc", "at", "v1", "hi", "lo",                     \
+-                "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9"  \
+-      );                                                                    \
+-      _arg4 ? -_num : _num;                                                 \
+-})
+-
+-#define my_syscall1(num, arg1)                                                \
+-({                                                                            \
+-      register long _num asm("v0") = (num);                                 \
+-      register long _arg1 asm("a0") = (long)(arg1);                         \
+-      register long _arg4 asm("a3");                                        \
+-                                                                            \
+-      asm volatile (                                                        \
+-              "addiu $sp, $sp, -32\n"                                       \
+-              "syscall\n"                                                   \
+-              "addiu $sp, $sp, 32\n"                                        \
+-              : "=r"(_num), "=r"(_arg4)                                     \
+-              : "0"(_num),                                                  \
+-                "r"(_arg1)                                                  \
+-              : "memory", "cc", "at", "v1", "hi", "lo",                     \
+-                "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9"  \
+-      );                                                                    \
+-      _arg4 ? -_num : _num;                                                 \
+-})
+-
+-#define my_syscall2(num, arg1, arg2)                                          \
+-({                                                                            \
+-      register long _num asm("v0") = (num);                                 \
+-      register long _arg1 asm("a0") = (long)(arg1);                         \
+-      register long _arg2 asm("a1") = (long)(arg2);                         \
+-      register long _arg4 asm("a3");                                        \
+-                                                                            \
+-      asm volatile (                                                        \
+-              "addiu $sp, $sp, -32\n"                                       \
+-              "syscall\n"                                                   \
+-              "addiu $sp, $sp, 32\n"                                        \
+-              : "=r"(_num), "=r"(_arg4)                                     \
+-              : "0"(_num),                                                  \
+-                "r"(_arg1), "r"(_arg2)                                      \
+-              : "memory", "cc", "at", "v1", "hi", "lo",                     \
+-                "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9"  \
+-      );                                                                    \
+-      _arg4 ? -_num : _num;                                                 \
+-})
+-
+-#define my_syscall3(num, arg1, arg2, arg3)                                    \
+-({                                                                            \
+-      register long _num asm("v0")  = (num);                                \
+-      register long _arg1 asm("a0") = (long)(arg1);                         \
+-      register long _arg2 asm("a1") = (long)(arg2);                         \
+-      register long _arg3 asm("a2") = (long)(arg3);                         \
+-      register long _arg4 asm("a3");                                        \
+-                                                                            \
+-      asm volatile (                                                        \
+-              "addiu $sp, $sp, -32\n"                                       \
+-              "syscall\n"                                                   \
+-              "addiu $sp, $sp, 32\n"                                        \
+-              : "=r"(_num), "=r"(_arg4)                                     \
+-              : "0"(_num),                                                  \
+-                "r"(_arg1), "r"(_arg2), "r"(_arg3)                          \
+-              : "memory", "cc", "at", "v1", "hi", "lo",                     \
+-                "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9"  \
+-      );                                                                    \
+-      _arg4 ? -_num : _num;                                                 \
+-})
+-
+-#define my_syscall4(num, arg1, arg2, arg3, arg4)                              \
+-({                                                                            \
+-      register long _num asm("v0") = (num);                                 \
+-      register long _arg1 asm("a0") = (long)(arg1);                         \
+-      register long _arg2 asm("a1") = (long)(arg2);                         \
+-      register long _arg3 asm("a2") = (long)(arg3);                         \
+-      register long _arg4 asm("a3") = (long)(arg4);                         \
+-                                                                            \
+-      asm volatile (                                                        \
+-              "addiu $sp, $sp, -32\n"                                       \
+-              "syscall\n"                                                   \
+-              "addiu $sp, $sp, 32\n"                                        \
+-              : "=r" (_num), "=r"(_arg4)                                    \
+-              : "0"(_num),                                                  \
+-                "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4)              \
+-              : "memory", "cc", "at", "v1", "hi", "lo",                     \
+-                "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9"  \
+-      );                                                                    \
+-      _arg4 ? -_num : _num;                                                 \
+-})
+-
+-#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5)                        \
+-({                                                                            \
+-      register long _num asm("v0") = (num);                                 \
+-      register long _arg1 asm("a0") = (long)(arg1);                         \
+-      register long _arg2 asm("a1") = (long)(arg2);                         \
+-      register long _arg3 asm("a2") = (long)(arg3);                         \
+-      register long _arg4 asm("a3") = (long)(arg4);                         \
+-      register long _arg5 = (long)(arg5);                                   \
+-                                                                            \
+-      asm volatile (                                                        \
+-              "addiu $sp, $sp, -32\n"                                       \
+-              "sw %7, 16($sp)\n"                                            \
+-              "syscall\n  "                                                 \
+-              "addiu $sp, $sp, 32\n"                                        \
+-              : "=r" (_num), "=r"(_arg4)                                    \
+-              : "0"(_num),                                                  \
+-                "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5)  \
+-              : "memory", "cc", "at", "v1", "hi", "lo",                     \
+-                "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9"  \
+-      );                                                                    \
+-      _arg4 ? -_num : _num;                                                 \
+-})
+-
+-/* startup code, note that it's called __start on MIPS */
+-asm(".section .text\n"
+-    ".set nomips16\n"
+-    ".global __start\n"
+-    ".set    noreorder\n"
+-    ".option pic0\n"
+-    ".ent __start\n"
+-    "__start:\n"
+-    "lw $a0,($sp)\n"              // argc was in the stack
+-    "addiu  $a1, $sp, 4\n"        // argv = sp + 4
+-    "sll $a2, $a0, 2\n"           // a2 = argc * 4
+-    "add   $a2, $a2, $a1\n"       // envp = argv + 4*argc ...
+-    "addiu $a2, $a2, 4\n"         //        ... + 4
+-    "li $t0, -8\n"
+-    "and $sp, $sp, $t0\n"         // sp must be 8-byte aligned
+-    "addiu $sp,$sp,-16\n"         // the callee expects to save a0..a3 there!
+-    "jal main\n"                  // main() returns the status code, we'll exit with it.
+-    "nop\n"                       // delayed slot
+-    "move $a0, $v0\n"             // retrieve 32-bit exit code from v0
+-    "li $v0, 4001\n"              // NR_exit == 4001
+-    "syscall\n"
+-    ".end __start\n"
+-    "");
+-
+-/* fcntl / open */
+-#define O_RDONLY            0
+-#define O_WRONLY            1
+-#define O_RDWR              2
+-#define O_APPEND       0x0008
+-#define O_NONBLOCK     0x0080
+-#define O_CREAT        0x0100
+-#define O_TRUNC        0x0200
+-#define O_EXCL         0x0400
+-#define O_NOCTTY       0x0800
+-#define O_DIRECTORY   0x10000
+-
+-/* The struct returned by the stat() syscall. 88 bytes are returned by the
+- * syscall.
+- */
+-struct sys_stat_struct {
+-      unsigned int  st_dev;
+-      long          st_pad1[3];
+-      unsigned long st_ino;
+-      unsigned int  st_mode;
+-      unsigned int  st_nlink;
+-      unsigned int  st_uid;
+-      unsigned int  st_gid;
+-      unsigned int  st_rdev;
+-      long          st_pad2[2];
+-      long          st_size;
+-      long          st_pad3;
+-      long          st_atime;
+-      long          st_atime_nsec;
+-      long          st_mtime;
+-      long          st_mtime_nsec;
+-      long          st_ctime;
+-      long          st_ctime_nsec;
+-      long          st_blksize;
+-      long          st_blocks;
+-      long          st_pad4[14];
+-};
+-
+-#elif defined(__riscv)
+-
+-#if   __riscv_xlen == 64
+-#define PTRLOG "3"
+-#define SZREG  "8"
+-#elif __riscv_xlen == 32
+-#define PTRLOG "2"
+-#define SZREG  "4"
+-#endif
+-
+-/* Syscalls for RISCV :
+- *   - stack is 16-byte aligned
+- *   - syscall number is passed in a7
+- *   - arguments are in a0, a1, a2, a3, a4, a5
+- *   - the system call is performed by calling ecall
+- *   - syscall return comes in a0
+- *   - the arguments are cast to long and assigned into the target
+- *     registers which are then simply passed as registers to the asm code,
+- *     so that we don't have to experience issues with register constraints.
+- *
+- * On riscv, select() is not implemented so we have to use pselect6().
+- */
+-#define __ARCH_WANT_SYS_PSELECT6
+-
+-#define my_syscall0(num)                                                      \
+-({                                                                            \
+-      register long _num  asm("a7") = (num);                                \
+-      register long _arg1 asm("a0");                                        \
+-                                                                            \
+-      asm volatile (                                                        \
+-              "ecall\n\t"                                                   \
+-              : "=r"(_arg1)                                                 \
+-              : "r"(_num)                                                   \
+-              : "memory", "cc"                                              \
+-      );                                                                    \
+-      _arg1;                                                                \
+-})
+-
+-#define my_syscall1(num, arg1)                                                \
+-({                                                                            \
+-      register long _num  asm("a7") = (num);                                \
+-      register long _arg1 asm("a0") = (long)(arg1);                         \
+-                                                                            \
+-      asm volatile (                                                        \
+-              "ecall\n"                                                     \
+-              : "+r"(_arg1)                                                 \
+-              : "r"(_num)                                                   \
+-              : "memory", "cc"                                              \
+-      );                                                                    \
+-      _arg1;                                                                \
+-})
+-
+-#define my_syscall2(num, arg1, arg2)                                          \
+-({                                                                            \
+-      register long _num  asm("a7") = (num);                                \
+-      register long _arg1 asm("a0") = (long)(arg1);                         \
+-      register long _arg2 asm("a1") = (long)(arg2);                         \
+-                                                                            \
+-      asm volatile (                                                        \
+-              "ecall\n"                                                     \
+-              : "+r"(_arg1)                                                 \
+-              : "r"(_arg2),                                                 \
+-                "r"(_num)                                                   \
+-              : "memory", "cc"                                              \
+-      );                                                                    \
+-      _arg1;                                                                \
+-})
+-
+-#define my_syscall3(num, arg1, arg2, arg3)                                    \
+-({                                                                            \
+-      register long _num  asm("a7") = (num);                                \
+-      register long _arg1 asm("a0") = (long)(arg1);                         \
+-      register long _arg2 asm("a1") = (long)(arg2);                         \
+-      register long _arg3 asm("a2") = (long)(arg3);                         \
+-                                                                            \
+-      asm volatile (                                                        \
+-              "ecall\n\t"                                                   \
+-              : "+r"(_arg1)                                                 \
+-              : "r"(_arg2), "r"(_arg3),                                     \
+-                "r"(_num)                                                   \
+-              : "memory", "cc"                                              \
+-      );                                                                    \
+-      _arg1;                                                                \
+-})
+-
+-#define my_syscall4(num, arg1, arg2, arg3, arg4)                              \
+-({                                                                            \
+-      register long _num  asm("a7") = (num);                                \
+-      register long _arg1 asm("a0") = (long)(arg1);                         \
+-      register long _arg2 asm("a1") = (long)(arg2);                         \
+-      register long _arg3 asm("a2") = (long)(arg3);                         \
+-      register long _arg4 asm("a3") = (long)(arg4);                         \
+-                                                                            \
+-      asm volatile (                                                        \
+-              "ecall\n"                                                     \
+-              : "+r"(_arg1)                                                 \
+-              : "r"(_arg2), "r"(_arg3), "r"(_arg4),                         \
+-                "r"(_num)                                                   \
+-              : "memory", "cc"                                              \
+-      );                                                                    \
+-      _arg1;                                                                \
+-})
+-
+-#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5)                        \
+-({                                                                            \
+-      register long _num  asm("a7") = (num);                                \
+-      register long _arg1 asm("a0") = (long)(arg1);                         \
+-      register long _arg2 asm("a1") = (long)(arg2);                         \
+-      register long _arg3 asm("a2") = (long)(arg3);                         \
+-      register long _arg4 asm("a3") = (long)(arg4);                         \
+-      register long _arg5 asm("a4") = (long)(arg5);                         \
+-                                                                            \
+-      asm volatile (                                                        \
+-              "ecall\n"                                                     \
+-              : "+r"(_arg1)                                                 \
+-              : "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5),             \
+-                "r"(_num)                                                   \
+-              : "memory", "cc"                                              \
+-      );                                                                    \
+-      _arg1;                                                                \
+-})
+-
+-#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6)                  \
+-({                                                                            \
+-      register long _num  asm("a7") = (num);                                \
+-      register long _arg1 asm("a0") = (long)(arg1);                         \
+-      register long _arg2 asm("a1") = (long)(arg2);                         \
+-      register long _arg3 asm("a2") = (long)(arg3);                         \
+-      register long _arg4 asm("a3") = (long)(arg4);                         \
+-      register long _arg5 asm("a4") = (long)(arg5);                         \
+-      register long _arg6 asm("a5") = (long)(arg6);                         \
+-                                                                            \
+-      asm volatile (                                                        \
+-              "ecall\n"                                                     \
+-              : "+r"(_arg1)                                                 \
+-              : "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), "r"(_arg6), \
+-                "r"(_num)                                                   \
+-              : "memory", "cc"                                              \
+-      );                                                                    \
+-      _arg1;                                                                \
+-})
+-
+-/* startup code */
+-asm(".section .text\n"
+-    ".global _start\n"
+-    "_start:\n"
+-    ".option push\n"
+-    ".option norelax\n"
+-    "lla   gp, __global_pointer$\n"
+-    ".option pop\n"
+-    "ld    a0, 0(sp)\n"          // argc (a0) was in the stack
+-    "add   a1, sp, "SZREG"\n"    // argv (a1) = sp
+-    "slli  a2, a0, "PTRLOG"\n"   // envp (a2) = SZREG*argc ...
+-    "add   a2, a2, "SZREG"\n"    //             + SZREG (skip null)
+-    "add   a2,a2,a1\n"           //             + argv
+-    "andi  sp,a1,-16\n"          // sp must be 16-byte aligned
+-    "call  main\n"               // main() returns the status code, we'll exit with it.
+-    "li a7, 93\n"                // NR_exit == 93
+-    "ecall\n"
+-    "");
+-
+-/* fcntl / open */
+-#define O_RDONLY            0
+-#define O_WRONLY            1
+-#define O_RDWR              2
+-#define O_CREAT         0x100
+-#define O_EXCL          0x200
+-#define O_NOCTTY        0x400
+-#define O_TRUNC        0x1000
+-#define O_APPEND       0x2000
+-#define O_NONBLOCK     0x4000
+-#define O_DIRECTORY  0x200000
+-
+-struct sys_stat_struct {
+-      unsigned long   st_dev;         /* Device.  */
+-      unsigned long   st_ino;         /* File serial number.  */
+-      unsigned int    st_mode;        /* File mode.  */
+-      unsigned int    st_nlink;       /* Link count.  */
+-      unsigned int    st_uid;         /* User ID of the file's owner.  */
+-      unsigned int    st_gid;         /* Group ID of the file's group. */
+-      unsigned long   st_rdev;        /* Device number, if device.  */
+-      unsigned long   __pad1;
+-      long            st_size;        /* Size of file, in bytes.  */
+-      int             st_blksize;     /* Optimal block size for I/O.  */
+-      int             __pad2;
+-      long            st_blocks;      /* Number 512-byte blocks allocated. */
+-      long            st_atime;       /* Time of last access.  */
+-      unsigned long   st_atime_nsec;
+-      long            st_mtime;       /* Time of last modification.  */
+-      unsigned long   st_mtime_nsec;
+-      long            st_ctime;       /* Time of last status change.  */
+-      unsigned long   st_ctime_nsec;
+-      unsigned int    __unused4;
+-      unsigned int    __unused5;
+-};
+-
+-#endif
+-
+ /* Below are the C functions used to declare the raw syscalls. They try to be
+  * architecture-agnostic, and return either a success or -errno. Declaring them
+-- 
+2.35.1
+
diff --git a/queue-5.15/tools-nolibc-fix-the-o_-fcntl-open-macro-definitions.patch b/queue-5.15/tools-nolibc-fix-the-o_-fcntl-open-macro-definitions.patch
new file mode 100644 (file)
index 0000000..461792e
--- /dev/null
@@ -0,0 +1,49 @@
+From 9bad5ae312a2df88a5267676bc6cd96da286726c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 9 Jan 2023 08:54:42 +0100
+Subject: tools/nolibc: fix the O_* fcntl/open macro definitions for riscv
+
+From: Willy Tarreau <w@1wt.eu>
+
+[ Upstream commit 00b18da4089330196906b9fe075c581c17eb726c ]
+
+When RISCV port was imported in 5.2, the O_* macros were taken with
+their octal value and written as-is in hex, resulting in the getdents64()
+to fail in nolibc-test.
+
+Fixes: 582e84f7b779 ("tool headers nolibc: add RISCV support") #5.2
+Signed-off-by: Willy Tarreau <w@1wt.eu>
+Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/include/nolibc/arch-riscv.h | 14 +++++++-------
+ 1 file changed, 7 insertions(+), 7 deletions(-)
+
+diff --git a/tools/include/nolibc/arch-riscv.h b/tools/include/nolibc/arch-riscv.h
+index 511d67fc534e..8c0cb1abb29f 100644
+--- a/tools/include/nolibc/arch-riscv.h
++++ b/tools/include/nolibc/arch-riscv.h
+@@ -11,13 +11,13 @@
+ #define O_RDONLY            0
+ #define O_WRONLY            1
+ #define O_RDWR              2
+-#define O_CREAT         0x100
+-#define O_EXCL          0x200
+-#define O_NOCTTY        0x400
+-#define O_TRUNC        0x1000
+-#define O_APPEND       0x2000
+-#define O_NONBLOCK     0x4000
+-#define O_DIRECTORY  0x200000
++#define O_CREAT          0x40
++#define O_EXCL           0x80
++#define O_NOCTTY        0x100
++#define O_TRUNC         0x200
++#define O_APPEND        0x400
++#define O_NONBLOCK      0x800
++#define O_DIRECTORY   0x10000
+ struct sys_stat_struct {
+       unsigned long   st_dev;         /* Device.  */
+-- 
+2.35.1
+
diff --git a/queue-5.15/tools-nolibc-remove-.global-_start-from-the-entry-po.patch b/queue-5.15/tools-nolibc-remove-.global-_start-from-the-entry-po.patch
new file mode 100644 (file)
index 0000000..d0dca47
--- /dev/null
@@ -0,0 +1,110 @@
+From 75e976fd38710080fbef1752f9dc01a475498933 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 29 Mar 2022 17:17:31 +0700
+Subject: tools/nolibc: Remove .global _start from the entry point code
+
+From: Ammar Faizi <ammarfaizi2@gnuweeb.org>
+
+[ Upstream commit 1590c59836dace3a20945bad049fe8802c4e6f3f ]
+
+Building with clang yields the following error:
+```
+  <inline asm>:3:1: error: _start changed binding to STB_GLOBAL
+  .global _start
+  ^
+  1 error generated.
+```
+Make sure only specify one between `.global _start` and `.weak _start`.
+Remove `.global _start`.
+
+Cc: llvm@lists.linux.dev
+Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
+Acked-by: Willy Tarreau <w@1wt.eu>
+Signed-off-by: Ammar Faizi <ammarfaizi2@gnuweeb.org>
+Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
+Stable-dep-of: 184177c3d6e0 ("tools/nolibc: restore mips branch ordering in the _start block")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/include/nolibc/arch-aarch64.h | 1 -
+ tools/include/nolibc/arch-arm.h     | 1 -
+ tools/include/nolibc/arch-i386.h    | 1 -
+ tools/include/nolibc/arch-mips.h    | 1 -
+ tools/include/nolibc/arch-riscv.h   | 1 -
+ tools/include/nolibc/arch-x86_64.h  | 1 -
+ 6 files changed, 6 deletions(-)
+
+diff --git a/tools/include/nolibc/arch-aarch64.h b/tools/include/nolibc/arch-aarch64.h
+index 87d9e434820c..2dbd80d633cb 100644
+--- a/tools/include/nolibc/arch-aarch64.h
++++ b/tools/include/nolibc/arch-aarch64.h
+@@ -184,7 +184,6 @@ struct sys_stat_struct {
+ /* startup code */
+ asm(".section .text\n"
+     ".weak _start\n"
+-    ".global _start\n"
+     "_start:\n"
+     "ldr x0, [sp]\n"              // argc (x0) was in the stack
+     "add x1, sp, 8\n"             // argv (x1) = sp
+diff --git a/tools/include/nolibc/arch-arm.h b/tools/include/nolibc/arch-arm.h
+index 001a3c8c9ad5..1191395b5acd 100644
+--- a/tools/include/nolibc/arch-arm.h
++++ b/tools/include/nolibc/arch-arm.h
+@@ -177,7 +177,6 @@ struct sys_stat_struct {
+ /* startup code */
+ asm(".section .text\n"
+     ".weak _start\n"
+-    ".global _start\n"
+     "_start:\n"
+ #if defined(__THUMBEB__) || defined(__THUMBEL__)
+     /* We enter here in 32-bit mode but if some previous functions were in
+diff --git a/tools/include/nolibc/arch-i386.h b/tools/include/nolibc/arch-i386.h
+index d7e4d53325a3..125a691fc631 100644
+--- a/tools/include/nolibc/arch-i386.h
++++ b/tools/include/nolibc/arch-i386.h
+@@ -176,7 +176,6 @@ struct sys_stat_struct {
+  */
+ asm(".section .text\n"
+     ".weak _start\n"
+-    ".global _start\n"
+     "_start:\n"
+     "pop %eax\n"                // argc   (first arg, %eax)
+     "mov %esp, %ebx\n"          // argv[] (second arg, %ebx)
+diff --git a/tools/include/nolibc/arch-mips.h b/tools/include/nolibc/arch-mips.h
+index c9a6aac87c6d..1a124790c99f 100644
+--- a/tools/include/nolibc/arch-mips.h
++++ b/tools/include/nolibc/arch-mips.h
+@@ -192,7 +192,6 @@ struct sys_stat_struct {
+ asm(".section .text\n"
+     ".weak __start\n"
+     ".set nomips16\n"
+-    ".global __start\n"
+     ".set    noreorder\n"
+     ".option pic0\n"
+     ".ent __start\n"
+diff --git a/tools/include/nolibc/arch-riscv.h b/tools/include/nolibc/arch-riscv.h
+index bc10b7b5706d..511d67fc534e 100644
+--- a/tools/include/nolibc/arch-riscv.h
++++ b/tools/include/nolibc/arch-riscv.h
+@@ -185,7 +185,6 @@ struct sys_stat_struct {
+ /* startup code */
+ asm(".section .text\n"
+     ".weak _start\n"
+-    ".global _start\n"
+     "_start:\n"
+     ".option push\n"
+     ".option norelax\n"
+diff --git a/tools/include/nolibc/arch-x86_64.h b/tools/include/nolibc/arch-x86_64.h
+index fe517c16cd4d..b1af63ce1cb0 100644
+--- a/tools/include/nolibc/arch-x86_64.h
++++ b/tools/include/nolibc/arch-x86_64.h
+@@ -199,7 +199,6 @@ struct sys_stat_struct {
+  */
+ asm(".section .text\n"
+     ".weak _start\n"
+-    ".global _start\n"
+     "_start:\n"
+     "pop %rdi\n"                // argc   (first arg, %rdi)
+     "mov %rsp, %rsi\n"          // argv[] (second arg, %rsi)
+-- 
+2.35.1
+
diff --git a/queue-5.15/tools-nolibc-restore-mips-branch-ordering-in-the-_st.patch b/queue-5.15/tools-nolibc-restore-mips-branch-ordering-in-the-_st.patch
new file mode 100644 (file)
index 0000000..bea202a
--- /dev/null
@@ -0,0 +1,80 @@
+From fde22a5f2a2580031a423136b1aa51a3b239ee90 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 9 Jan 2023 08:54:39 +0100
+Subject: tools/nolibc: restore mips branch ordering in the _start block
+
+From: Willy Tarreau <w@1wt.eu>
+
+[ Upstream commit 184177c3d6e023da934761e198c281344d7dd65b ]
+
+Depending on the compiler used and the optimization options, the sbrk()
+test was crashing, both on real hardware (mips-24kc) and in qemu. One
+such example is kernel.org toolchain in version 11.3 optimizing at -Os.
+
+Inspecting the sys_brk() call shows the following code:
+
+  0040047c <sys_brk>:
+    40047c:       24020fcd        li      v0,4045
+    400480:       27bdffe0        addiu   sp,sp,-32
+    400484:       0000000c        syscall
+    400488:       27bd0020        addiu   sp,sp,32
+    40048c:       10e00001        beqz    a3,400494 <sys_brk+0x18>
+    400490:       00021023        negu    v0,v0
+    400494:       03e00008        jr      ra
+
+It is obviously wrong, the "negu" instruction is placed in beqz's
+delayed slot, and worse, there's no nop nor instruction after the
+return, so the next function's first instruction (addiu sip,sip,-32)
+will also be executed as part of the delayed slot that follows the
+return.
+
+This is caused by the ".set noreorder" directive in the _start block,
+that applies to the whole program. The compiler emits code without the
+delayed slots and relies on the compiler to swap instructions when this
+option is not set. Removing the option would require to change the
+startup code in a way that wouldn't make it look like the resulting
+code, which would not be easy to debug. Instead let's just save the
+default ordering before changing it, and restore it at the end of the
+_start block. Now the code is correct:
+
+  0040047c <sys_brk>:
+    40047c:       24020fcd        li      v0,4045
+    400480:       27bdffe0        addiu   sp,sp,-32
+    400484:       0000000c        syscall
+    400488:       10e00002        beqz    a3,400494 <sys_brk+0x18>
+    40048c:       27bd0020        addiu   sp,sp,32
+    400490:       00021023        negu    v0,v0
+    400494:       03e00008        jr      ra
+    400498:       00000000        nop
+
+Fixes: 66b6f755ad45 ("rcutorture: Import a copy of nolibc") #5.0
+Signed-off-by: Willy Tarreau <w@1wt.eu>
+Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/include/nolibc/arch-mips.h | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/tools/include/nolibc/arch-mips.h b/tools/include/nolibc/arch-mips.h
+index 1a124790c99f..5d647afa42e6 100644
+--- a/tools/include/nolibc/arch-mips.h
++++ b/tools/include/nolibc/arch-mips.h
+@@ -192,6 +192,7 @@ struct sys_stat_struct {
+ asm(".section .text\n"
+     ".weak __start\n"
+     ".set nomips16\n"
++    ".set push\n"
+     ".set    noreorder\n"
+     ".option pic0\n"
+     ".ent __start\n"
+@@ -210,6 +211,7 @@ asm(".section .text\n"
+     "li $v0, 4001\n"              // NR_exit == 4001
+     "syscall\n"
+     ".end __start\n"
++    ".set pop\n"
+     "");
+ #endif // _NOLIBC_ARCH_MIPS_H
+-- 
+2.35.1
+
diff --git a/queue-5.15/tools-nolibc-std-move-the-standard-type-definitions-.patch b/queue-5.15/tools-nolibc-std-move-the-standard-type-definitions-.patch
new file mode 100644 (file)
index 0000000..aa0f20a
--- /dev/null
@@ -0,0 +1,154 @@
+From 0c09e37edd9a27791df708bb525202297199bfa8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 7 Feb 2022 17:23:15 +0100
+Subject: tools/nolibc/std: move the standard type definitions to std.h
+
+From: Willy Tarreau <w@1wt.eu>
+
+[ Upstream commit 967cce191f50090d5cbd3841ee2bbb7835afeae2 ]
+
+The ordering of includes and definitions for now is a bit of a mess, as
+for example asm/signal.h is included after int definitions, but plenty of
+structures are defined later as they rely on other includes.
+
+Let's move the standard type definitions to a dedicated file that is
+included first. We also move NULL there. This way all other includes
+are aware of it, and we can bring asm/signal.h back to the top of the
+file.
+
+Signed-off-by: Willy Tarreau <w@1wt.eu>
+Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
+Stable-dep-of: 184177c3d6e0 ("tools/nolibc: restore mips branch ordering in the _start block")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/include/nolibc/nolibc.h | 42 ++++--------------------------
+ tools/include/nolibc/std.h    | 49 +++++++++++++++++++++++++++++++++++
+ 2 files changed, 54 insertions(+), 37 deletions(-)
+ create mode 100644 tools/include/nolibc/std.h
+
+diff --git a/tools/include/nolibc/nolibc.h b/tools/include/nolibc/nolibc.h
+index 7ba180651b17..41eb7e647238 100644
+--- a/tools/include/nolibc/nolibc.h
++++ b/tools/include/nolibc/nolibc.h
+@@ -81,7 +81,12 @@
+  *
+  */
++/* standard type definitions */
++#include "std.h"
++
++/* system includes */
+ #include <asm/unistd.h>
++#include <asm/signal.h>  // for SIGCHLD
+ #include <asm/ioctls.h>
+ #include <asm/errno.h>
+ #include <linux/fs.h>
+@@ -104,40 +109,6 @@ static int errno;
+  */
+ #define MAX_ERRNO 4095
+-/* Declare a few quite common macros and types that usually are in stdlib.h,
+- * stdint.h, ctype.h, unistd.h and a few other common locations.
+- */
+-
+-#define NULL ((void *)0)
+-
+-/* stdint types */
+-typedef unsigned char       uint8_t;
+-typedef   signed char        int8_t;
+-typedef unsigned short     uint16_t;
+-typedef   signed short      int16_t;
+-typedef unsigned int       uint32_t;
+-typedef   signed int        int32_t;
+-typedef unsigned long long uint64_t;
+-typedef   signed long long  int64_t;
+-typedef unsigned long        size_t;
+-typedef   signed long       ssize_t;
+-typedef unsigned long     uintptr_t;
+-typedef   signed long      intptr_t;
+-typedef   signed long     ptrdiff_t;
+-
+-/* for stat() */
+-typedef unsigned int          dev_t;
+-typedef unsigned long         ino_t;
+-typedef unsigned int         mode_t;
+-typedef   signed int          pid_t;
+-typedef unsigned int          uid_t;
+-typedef unsigned int          gid_t;
+-typedef unsigned long       nlink_t;
+-typedef   signed long         off_t;
+-typedef   signed long     blksize_t;
+-typedef   signed long      blkcnt_t;
+-typedef   signed long        time_t;
+-
+ /* for poll() */
+ struct pollfd {
+       int fd;
+@@ -246,9 +217,6 @@ struct stat {
+ #define WEXITSTATUS(status)   (((status) & 0xff00) >> 8)
+ #define WIFEXITED(status)     (((status) & 0x7f) == 0)
+-/* for SIGCHLD */
+-#include <asm/signal.h>
+-
+ /* Below comes the architecture-specific code. For each architecture, we have
+  * the syscall declarations and the _start code definition. This is the only
+  * global part. On all architectures the kernel puts everything in the stack
+diff --git a/tools/include/nolibc/std.h b/tools/include/nolibc/std.h
+new file mode 100644
+index 000000000000..1747ae125392
+--- /dev/null
++++ b/tools/include/nolibc/std.h
+@@ -0,0 +1,49 @@
++/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
++/*
++ * Standard definitions and types for NOLIBC
++ * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu>
++ */
++
++#ifndef _NOLIBC_STD_H
++#define _NOLIBC_STD_H
++
++/* Declare a few quite common macros and types that usually are in stdlib.h,
++ * stdint.h, ctype.h, unistd.h and a few other common locations. Please place
++ * integer type definitions and generic macros here, but avoid OS-specific and
++ * syscall-specific stuff, as this file is expected to be included very early.
++ */
++
++/* note: may already be defined */
++#ifndef NULL
++#define NULL ((void *)0)
++#endif
++
++/* stdint types */
++typedef unsigned char       uint8_t;
++typedef   signed char        int8_t;
++typedef unsigned short     uint16_t;
++typedef   signed short      int16_t;
++typedef unsigned int       uint32_t;
++typedef   signed int        int32_t;
++typedef unsigned long long uint64_t;
++typedef   signed long long  int64_t;
++typedef unsigned long        size_t;
++typedef   signed long       ssize_t;
++typedef unsigned long     uintptr_t;
++typedef   signed long      intptr_t;
++typedef   signed long     ptrdiff_t;
++
++/* those are commonly provided by sys/types.h */
++typedef unsigned int          dev_t;
++typedef unsigned long         ino_t;
++typedef unsigned int         mode_t;
++typedef   signed int          pid_t;
++typedef unsigned int          uid_t;
++typedef unsigned int          gid_t;
++typedef unsigned long       nlink_t;
++typedef   signed long         off_t;
++typedef   signed long     blksize_t;
++typedef   signed long      blkcnt_t;
++typedef   signed long        time_t;
++
++#endif /* _NOLIBC_STD_H */
+-- 
+2.35.1
+
diff --git a/queue-5.15/tools-nolibc-types-split-syscall-specific-definition.patch b/queue-5.15/tools-nolibc-types-split-syscall-specific-definition.patch
new file mode 100644 (file)
index 0000000..19ea286
--- /dev/null
@@ -0,0 +1,296 @@
+From f313d89cf19dc59fe1f081326442b003923d7ae7 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 7 Feb 2022 17:23:16 +0100
+Subject: tools/nolibc/types: split syscall-specific definitions into their own
+ files
+
+From: Willy Tarreau <w@1wt.eu>
+
+[ Upstream commit cc7a492ad0a076dff5cb4281b1516676d7924fcf ]
+
+The macros and type definitions used by a number of syscalls were moved
+to types.h where they will be easier to maintain. A few of them
+are arch-specific and must not be moved there (e.g. O_*, sys_stat_struct).
+A warning about them was placed at the top of the file.
+
+Signed-off-by: Willy Tarreau <w@1wt.eu>
+Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
+Stable-dep-of: 184177c3d6e0 ("tools/nolibc: restore mips branch ordering in the _start block")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/include/nolibc/nolibc.h | 110 +---------------------------
+ tools/include/nolibc/types.h  | 133 ++++++++++++++++++++++++++++++++++
+ 2 files changed, 135 insertions(+), 108 deletions(-)
+ create mode 100644 tools/include/nolibc/types.h
+
+diff --git a/tools/include/nolibc/nolibc.h b/tools/include/nolibc/nolibc.h
+index 41eb7e647238..fbfc02aa99c3 100644
+--- a/tools/include/nolibc/nolibc.h
++++ b/tools/include/nolibc/nolibc.h
+@@ -92,7 +92,9 @@
+ #include <linux/fs.h>
+ #include <linux/loop.h>
+ #include <linux/time.h>
++#include "types.h"
++/* Used by programs to avoid std includes */
+ #define NOLIBC
+ /* this way it will be removed if unused */
+@@ -109,114 +111,6 @@ static int errno;
+  */
+ #define MAX_ERRNO 4095
+-/* for poll() */
+-struct pollfd {
+-      int fd;
+-      short int events;
+-      short int revents;
+-};
+-
+-/* for getdents64() */
+-struct linux_dirent64 {
+-      uint64_t       d_ino;
+-      int64_t        d_off;
+-      unsigned short d_reclen;
+-      unsigned char  d_type;
+-      char           d_name[];
+-};
+-
+-/* commonly an fd_set represents 256 FDs */
+-#define FD_SETSIZE 256
+-typedef struct { uint32_t fd32[FD_SETSIZE/32]; } fd_set;
+-
+-/* needed by wait4() */
+-struct rusage {
+-      struct timeval ru_utime;
+-      struct timeval ru_stime;
+-      long   ru_maxrss;
+-      long   ru_ixrss;
+-      long   ru_idrss;
+-      long   ru_isrss;
+-      long   ru_minflt;
+-      long   ru_majflt;
+-      long   ru_nswap;
+-      long   ru_inblock;
+-      long   ru_oublock;
+-      long   ru_msgsnd;
+-      long   ru_msgrcv;
+-      long   ru_nsignals;
+-      long   ru_nvcsw;
+-      long   ru_nivcsw;
+-};
+-
+-/* stat flags (WARNING, octal here) */
+-#define S_IFDIR       0040000
+-#define S_IFCHR       0020000
+-#define S_IFBLK       0060000
+-#define S_IFREG       0100000
+-#define S_IFIFO       0010000
+-#define S_IFLNK       0120000
+-#define S_IFSOCK      0140000
+-#define S_IFMT        0170000
+-
+-#define S_ISDIR(mode)  (((mode) & S_IFDIR) == S_IFDIR)
+-#define S_ISCHR(mode)  (((mode) & S_IFCHR) == S_IFCHR)
+-#define S_ISBLK(mode)  (((mode) & S_IFBLK) == S_IFBLK)
+-#define S_ISREG(mode)  (((mode) & S_IFREG) == S_IFREG)
+-#define S_ISFIFO(mode) (((mode) & S_IFIFO) == S_IFIFO)
+-#define S_ISLNK(mode)  (((mode) & S_IFLNK) == S_IFLNK)
+-#define S_ISSOCK(mode) (((mode) & S_IFSOCK) == S_IFSOCK)
+-
+-#define DT_UNKNOWN 0
+-#define DT_FIFO    1
+-#define DT_CHR     2
+-#define DT_DIR     4
+-#define DT_BLK     6
+-#define DT_REG     8
+-#define DT_LNK    10
+-#define DT_SOCK   12
+-
+-/* all the *at functions */
+-#ifndef AT_FDCWD
+-#define AT_FDCWD             -100
+-#endif
+-
+-/* lseek */
+-#define SEEK_SET        0
+-#define SEEK_CUR        1
+-#define SEEK_END        2
+-
+-/* reboot */
+-#define LINUX_REBOOT_MAGIC1         0xfee1dead
+-#define LINUX_REBOOT_MAGIC2         0x28121969
+-#define LINUX_REBOOT_CMD_HALT       0xcdef0123
+-#define LINUX_REBOOT_CMD_POWER_OFF  0x4321fedc
+-#define LINUX_REBOOT_CMD_RESTART    0x01234567
+-#define LINUX_REBOOT_CMD_SW_SUSPEND 0xd000fce2
+-
+-
+-/* The format of the struct as returned by the libc to the application, which
+- * significantly differs from the format returned by the stat() syscall flavours.
+- */
+-struct stat {
+-      dev_t     st_dev;     /* ID of device containing file */
+-      ino_t     st_ino;     /* inode number */
+-      mode_t    st_mode;    /* protection */
+-      nlink_t   st_nlink;   /* number of hard links */
+-      uid_t     st_uid;     /* user ID of owner */
+-      gid_t     st_gid;     /* group ID of owner */
+-      dev_t     st_rdev;    /* device ID (if special file) */
+-      off_t     st_size;    /* total size, in bytes */
+-      blksize_t st_blksize; /* blocksize for file system I/O */
+-      blkcnt_t  st_blocks;  /* number of 512B blocks allocated */
+-      time_t    st_atime;   /* time of last access */
+-      time_t    st_mtime;   /* time of last modification */
+-      time_t    st_ctime;   /* time of last status change */
+-};
+-
+-#define WEXITSTATUS(status)   (((status) & 0xff00) >> 8)
+-#define WIFEXITED(status)     (((status) & 0x7f) == 0)
+-
+ /* Below comes the architecture-specific code. For each architecture, we have
+  * the syscall declarations and the _start code definition. This is the only
+  * global part. On all architectures the kernel puts everything in the stack
+diff --git a/tools/include/nolibc/types.h b/tools/include/nolibc/types.h
+new file mode 100644
+index 000000000000..2f09abaf95f1
+--- /dev/null
++++ b/tools/include/nolibc/types.h
+@@ -0,0 +1,133 @@
++/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
++/*
++ * Special types used by various syscalls for NOLIBC
++ * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu>
++ */
++
++#ifndef _NOLIBC_TYPES_H
++#define _NOLIBC_TYPES_H
++
++#include "std.h"
++#include <linux/time.h>
++
++
++/* Only the generic macros and types may be defined here. The arch-specific
++ * ones such as the O_RDONLY and related macros used by fcntl() and open(), or
++ * the layout of sys_stat_struct must not be defined here.
++ */
++
++/* stat flags (WARNING, octal here) */
++#define S_IFDIR        0040000
++#define S_IFCHR        0020000
++#define S_IFBLK        0060000
++#define S_IFREG        0100000
++#define S_IFIFO        0010000
++#define S_IFLNK        0120000
++#define S_IFSOCK       0140000
++#define S_IFMT         0170000
++
++#define S_ISDIR(mode)  (((mode) & S_IFDIR)  == S_IFDIR)
++#define S_ISCHR(mode)  (((mode) & S_IFCHR)  == S_IFCHR)
++#define S_ISBLK(mode)  (((mode) & S_IFBLK)  == S_IFBLK)
++#define S_ISREG(mode)  (((mode) & S_IFREG)  == S_IFREG)
++#define S_ISFIFO(mode) (((mode) & S_IFIFO)  == S_IFIFO)
++#define S_ISLNK(mode)  (((mode) & S_IFLNK)  == S_IFLNK)
++#define S_ISSOCK(mode) (((mode) & S_IFSOCK) == S_IFSOCK)
++
++/* dirent types */
++#define DT_UNKNOWN     0x0
++#define DT_FIFO        0x1
++#define DT_CHR         0x2
++#define DT_DIR         0x4
++#define DT_BLK         0x6
++#define DT_REG         0x8
++#define DT_LNK         0xa
++#define DT_SOCK        0xc
++
++/* commonly an fd_set represents 256 FDs */
++#define FD_SETSIZE     256
++
++/* Special FD used by all the *at functions */
++#ifndef AT_FDCWD
++#define AT_FDCWD       (-100)
++#endif
++
++/* whence values for lseek() */
++#define SEEK_SET       0
++#define SEEK_CUR       1
++#define SEEK_END       2
++
++/* cmd for reboot() */
++#define LINUX_REBOOT_MAGIC1         0xfee1dead
++#define LINUX_REBOOT_MAGIC2         0x28121969
++#define LINUX_REBOOT_CMD_HALT       0xcdef0123
++#define LINUX_REBOOT_CMD_POWER_OFF  0x4321fedc
++#define LINUX_REBOOT_CMD_RESTART    0x01234567
++#define LINUX_REBOOT_CMD_SW_SUSPEND 0xd000fce2
++
++/* Macros used on waitpid()'s return status */
++#define WEXITSTATUS(status) (((status) & 0xff00) >> 8)
++#define WIFEXITED(status)   (((status) & 0x7f) == 0)
++
++
++/* for select() */
++typedef struct {
++      uint32_t fd32[FD_SETSIZE / 32];
++} fd_set;
++
++/* for poll() */
++struct pollfd {
++      int fd;
++      short int events;
++      short int revents;
++};
++
++/* for getdents64() */
++struct linux_dirent64 {
++      uint64_t       d_ino;
++      int64_t        d_off;
++      unsigned short d_reclen;
++      unsigned char  d_type;
++      char           d_name[];
++};
++
++/* needed by wait4() */
++struct rusage {
++      struct timeval ru_utime;
++      struct timeval ru_stime;
++      long   ru_maxrss;
++      long   ru_ixrss;
++      long   ru_idrss;
++      long   ru_isrss;
++      long   ru_minflt;
++      long   ru_majflt;
++      long   ru_nswap;
++      long   ru_inblock;
++      long   ru_oublock;
++      long   ru_msgsnd;
++      long   ru_msgrcv;
++      long   ru_nsignals;
++      long   ru_nvcsw;
++      long   ru_nivcsw;
++};
++
++/* The format of the struct as returned by the libc to the application, which
++ * significantly differs from the format returned by the stat() syscall flavours.
++ */
++struct stat {
++      dev_t     st_dev;     /* ID of device containing file */
++      ino_t     st_ino;     /* inode number */
++      mode_t    st_mode;    /* protection */
++      nlink_t   st_nlink;   /* number of hard links */
++      uid_t     st_uid;     /* user ID of owner */
++      gid_t     st_gid;     /* group ID of owner */
++      dev_t     st_rdev;    /* device ID (if special file) */
++      off_t     st_size;    /* total size, in bytes */
++      blksize_t st_blksize; /* blocksize for file system I/O */
++      blkcnt_t  st_blocks;  /* number of 512B blocks allocated */
++      time_t    st_atime;   /* time of last access */
++      time_t    st_mtime;   /* time of last modification */
++      time_t    st_ctime;   /* time of last status change */
++};
++
++#endif /* _NOLIBC_TYPES_H */
+-- 
+2.35.1
+
diff --git a/queue-5.15/tools-nolibc-use-pselect6-on-riscv.patch b/queue-5.15/tools-nolibc-use-pselect6-on-riscv.patch
new file mode 100644 (file)
index 0000000..8b77a8c
--- /dev/null
@@ -0,0 +1,38 @@
+From feb61ba67b9bbc403542a4cf18a94e27ae5a8bed Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 7 Feb 2022 17:23:13 +0100
+Subject: tools/nolibc: use pselect6 on RISCV
+
+From: Willy Tarreau <w@1wt.eu>
+
+[ Upstream commit 9c2970fbb425cca0256ecf0f96490e4f253fda24 ]
+
+This arch doesn't provide the old-style select() syscall, we have to
+use pselect6().
+
+Signed-off-by: Willy Tarreau <w@1wt.eu>
+Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
+Stable-dep-of: 184177c3d6e0 ("tools/nolibc: restore mips branch ordering in the _start block")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/include/nolibc/nolibc.h | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/tools/include/nolibc/nolibc.h b/tools/include/nolibc/nolibc.h
+index 676fe5d92875..7ba180651b17 100644
+--- a/tools/include/nolibc/nolibc.h
++++ b/tools/include/nolibc/nolibc.h
+@@ -1256,7 +1256,10 @@ struct sys_stat_struct {
+  *   - the arguments are cast to long and assigned into the target
+  *     registers which are then simply passed as registers to the asm code,
+  *     so that we don't have to experience issues with register constraints.
++ *
++ * On riscv, select() is not implemented so we have to use pselect6().
+  */
++#define __ARCH_WANT_SYS_PSELECT6
+ #define my_syscall0(num)                                                      \
+ ({                                                                            \
+-- 
+2.35.1
+
diff --git a/queue-5.15/tools-nolibc-x86-64-use-mov-60-eax-instead-of-mov-60.patch b/queue-5.15/tools-nolibc-x86-64-use-mov-60-eax-instead-of-mov-60.patch
new file mode 100644 (file)
index 0000000..8742dc9
--- /dev/null
@@ -0,0 +1,43 @@
+From d74510b4743015fc50c3cda41dc632340f1905d7 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 24 Oct 2021 19:43:22 +0200
+Subject: tools/nolibc: x86-64: Use `mov $60,%eax` instead of `mov $60,%rax`
+
+From: Ammar Faizi <ammar.faizi@students.amikom.ac.id>
+
+[ Upstream commit 7bdc0e7a390511cd3df8194003b908f15a6170a5 ]
+
+Note that mov to 32-bit register will zero extend to 64-bit register.
+Thus `mov $60,%eax` has the same effect with `mov $60,%rax`. Use the
+shorter opcode to achieve the same thing.
+```
+  b8 3c 00 00 00               mov    $60,%eax (5 bytes) [1]
+  48 c7 c0 3c 00 00 00         mov    $60,%rax (7 bytes) [2]
+```
+Currently, we use [2]. Change it to [1] for shorter code.
+
+Signed-off-by: Ammar Faizi <ammar.faizi@students.amikom.ac.id>
+Signed-off-by: Willy Tarreau <w@1wt.eu>
+Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
+Stable-dep-of: 184177c3d6e0 ("tools/nolibc: restore mips branch ordering in the _start block")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/include/nolibc/nolibc.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/tools/include/nolibc/nolibc.h b/tools/include/nolibc/nolibc.h
+index ece7a70d8b39..676fe5d92875 100644
+--- a/tools/include/nolibc/nolibc.h
++++ b/tools/include/nolibc/nolibc.h
+@@ -420,7 +420,7 @@ asm(".section .text\n"
+     "and $-16, %rsp\n"          // x86 ABI : esp must be 16-byte aligned before call
+     "call main\n"               // main() returns the status code, we'll exit with it.
+     "mov %eax, %edi\n"          // retrieve exit code (32 bit)
+-    "mov $60, %rax\n"           // NR_exit == 60
++    "mov $60, %eax\n"           // NR_exit == 60
+     "syscall\n"                 // really exit
+     "hlt\n"                     // ensure it does not return
+     "");
+-- 
+2.35.1
+
diff --git a/queue-5.15/tools-nolibc-x86-remove-r8-r9-and-r10-from-the-clobb.patch b/queue-5.15/tools-nolibc-x86-remove-r8-r9-and-r10-from-the-clobb.patch
new file mode 100644 (file)
index 0000000..2b0c835
--- /dev/null
@@ -0,0 +1,245 @@
+From 865934f5ef389bedb7aa42d4ee29552c24c056d5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 24 Oct 2021 19:43:21 +0200
+Subject: tools/nolibc: x86: Remove `r8`, `r9` and `r10` from the clobber list
+
+From: Ammar Faizi <ammar.faizi@students.amikom.ac.id>
+
+[ Upstream commit bf91666959eeac44fb686e9359e37830944beef2 ]
+
+Linux x86-64 syscall only clobbers rax, rcx and r11 (and "memory").
+
+  - rax for the return value.
+  - rcx to save the return address.
+  - r11 to save the rflags.
+
+Other registers are preserved.
+
+Having r8, r9 and r10 in the syscall clobber list is harmless, but this
+results in a missed-optimization.
+
+As the syscall doesn't clobber r8-r10, GCC should be allowed to reuse
+their value after the syscall returns to userspace. But since they are
+in the clobber list, GCC will always miss this opportunity.
+
+Remove them from the x86-64 syscall clobber list to help GCC generate
+better code and fix the comment.
+
+See also the x86-64 ABI, section A.2 AMD64 Linux Kernel Conventions,
+A.2.1 Calling Conventions [1].
+
+Extra note:
+Some people may think it does not really give a benefit to remove r8,
+r9 and r10 from the syscall clobber list because the impression of
+syscall is a C function call, and function call always clobbers those 3.
+
+However, that is not the case for nolibc.h, because we have a potential
+to inline the "syscall" instruction (which its opcode is "0f 05") to the
+user functions.
+
+All syscalls in the nolibc.h are written as a static function with inline
+ASM and are likely always inline if we use optimization flag, so this is
+a profit not to have r8, r9 and r10 in the clobber list.
+
+Here is the example where this matters.
+
+Consider the following C code:
+```
+  #include "tools/include/nolibc/nolibc.h"
+  #define read_abc(a, b, c) __asm__ volatile("nop"::"r"(a),"r"(b),"r"(c))
+
+  int main(void)
+  {
+       int a = 0xaa;
+       int b = 0xbb;
+       int c = 0xcc;
+
+       read_abc(a, b, c);
+       write(1, "test\n", 5);
+       read_abc(a, b, c);
+
+       return 0;
+  }
+```
+
+Compile with:
+    gcc -Os test.c -o test -nostdlib
+
+With r8, r9, r10 in the clobber list, GCC generates this:
+
+0000000000001000 <main>:
+    1000:      f3 0f 1e fa             endbr64
+    1004:      41 54                   push   %r12
+    1006:      41 bc cc 00 00 00       mov    $0xcc,%r12d
+    100c:      55                      push   %rbp
+    100d:      bd bb 00 00 00          mov    $0xbb,%ebp
+    1012:      53                      push   %rbx
+    1013:      bb aa 00 00 00          mov    $0xaa,%ebx
+    1018:      90                      nop
+    1019:      b8 01 00 00 00          mov    $0x1,%eax
+    101e:      bf 01 00 00 00          mov    $0x1,%edi
+    1023:      ba 05 00 00 00          mov    $0x5,%edx
+    1028:      48 8d 35 d1 0f 00 00    lea    0xfd1(%rip),%rsi
+    102f:      0f 05                   syscall
+    1031:      90                      nop
+    1032:      31 c0                   xor    %eax,%eax
+    1034:      5b                      pop    %rbx
+    1035:      5d                      pop    %rbp
+    1036:      41 5c                   pop    %r12
+    1038:      c3                      ret
+
+GCC thinks that syscall will clobber r8, r9, r10. So it spills 0xaa,
+0xbb and 0xcc to callee saved registers (r12, rbp and rbx). This is
+clearly extra memory access and extra stack size for preserving them.
+
+But syscall does not actually clobber them, so this is a missed
+optimization.
+
+Now without r8, r9, r10 in the clobber list, GCC generates better code:
+
+0000000000001000 <main>:
+    1000:      f3 0f 1e fa             endbr64
+    1004:      41 b8 aa 00 00 00       mov    $0xaa,%r8d
+    100a:      41 b9 bb 00 00 00       mov    $0xbb,%r9d
+    1010:      41 ba cc 00 00 00       mov    $0xcc,%r10d
+    1016:      90                      nop
+    1017:      b8 01 00 00 00          mov    $0x1,%eax
+    101c:      bf 01 00 00 00          mov    $0x1,%edi
+    1021:      ba 05 00 00 00          mov    $0x5,%edx
+    1026:      48 8d 35 d3 0f 00 00    lea    0xfd3(%rip),%rsi
+    102d:      0f 05                   syscall
+    102f:      90                      nop
+    1030:      31 c0                   xor    %eax,%eax
+    1032:      c3                      ret
+
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: Ingo Molnar <mingo@redhat.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: x86@kernel.org
+Cc: "H. Peter Anvin" <hpa@zytor.com>
+Cc: David Laight <David.Laight@ACULAB.COM>
+Acked-by: Andy Lutomirski <luto@kernel.org>
+Signed-off-by: Ammar Faizi <ammar.faizi@students.amikom.ac.id>
+Link: https://gitlab.com/x86-psABIs/x86-64-ABI/-/wikis/x86-64-psABI [1]
+Link: https://lore.kernel.org/lkml/20211011040344.437264-1-ammar.faizi@students.amikom.ac.id/
+Signed-off-by: Willy Tarreau <w@1wt.eu>
+Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
+Stable-dep-of: 184177c3d6e0 ("tools/nolibc: restore mips branch ordering in the _start block")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/include/nolibc/nolibc.h | 33 +++++++++++++++++++--------------
+ 1 file changed, 19 insertions(+), 14 deletions(-)
+
+diff --git a/tools/include/nolibc/nolibc.h b/tools/include/nolibc/nolibc.h
+index d64020c1922c..ece7a70d8b39 100644
+--- a/tools/include/nolibc/nolibc.h
++++ b/tools/include/nolibc/nolibc.h
+@@ -265,12 +265,17 @@ struct stat {
+  *   - arguments are in rdi, rsi, rdx, r10, r8, r9 respectively
+  *   - the system call is performed by calling the syscall instruction
+  *   - syscall return comes in rax
+- *   - rcx and r8..r11 may be clobbered, others are preserved.
++ *   - rcx and r11 are clobbered, others are preserved.
+  *   - the arguments are cast to long and assigned into the target registers
+  *     which are then simply passed as registers to the asm code, so that we
+  *     don't have to experience issues with register constraints.
+  *   - the syscall number is always specified last in order to allow to force
+  *     some registers before (gcc refuses a %-register at the last position).
++ *   - see also x86-64 ABI section A.2 AMD64 Linux Kernel Conventions, A.2.1
++ *     Calling Conventions.
++ *
++ * Link x86-64 ABI: https://gitlab.com/x86-psABIs/x86-64-ABI/-/wikis/x86-64-psABI
++ *
+  */
+ #define my_syscall0(num)                                                      \
+@@ -280,9 +285,9 @@ struct stat {
+                                                                             \
+       asm volatile (                                                        \
+               "syscall\n"                                                   \
+-              : "=a" (_ret)                                                 \
++              : "=a"(_ret)                                                  \
+               : "0"(_num)                                                   \
+-              : "rcx", "r8", "r9", "r10", "r11", "memory", "cc"             \
++              : "rcx", "r11", "memory", "cc"                                \
+       );                                                                    \
+       _ret;                                                                 \
+ })
+@@ -295,10 +300,10 @@ struct stat {
+                                                                             \
+       asm volatile (                                                        \
+               "syscall\n"                                                   \
+-              : "=a" (_ret)                                                 \
++              : "=a"(_ret)                                                  \
+               : "r"(_arg1),                                                 \
+                 "0"(_num)                                                   \
+-              : "rcx", "r8", "r9", "r10", "r11", "memory", "cc"             \
++              : "rcx", "r11", "memory", "cc"                                \
+       );                                                                    \
+       _ret;                                                                 \
+ })
+@@ -312,10 +317,10 @@ struct stat {
+                                                                             \
+       asm volatile (                                                        \
+               "syscall\n"                                                   \
+-              : "=a" (_ret)                                                 \
++              : "=a"(_ret)                                                  \
+               : "r"(_arg1), "r"(_arg2),                                     \
+                 "0"(_num)                                                   \
+-              : "rcx", "r8", "r9", "r10", "r11", "memory", "cc"             \
++              : "rcx", "r11", "memory", "cc"                                \
+       );                                                                    \
+       _ret;                                                                 \
+ })
+@@ -330,10 +335,10 @@ struct stat {
+                                                                             \
+       asm volatile (                                                        \
+               "syscall\n"                                                   \
+-              : "=a" (_ret)                                                 \
++              : "=a"(_ret)                                                  \
+               : "r"(_arg1), "r"(_arg2), "r"(_arg3),                         \
+                 "0"(_num)                                                   \
+-              : "rcx", "r8", "r9", "r10", "r11", "memory", "cc"             \
++              : "rcx", "r11", "memory", "cc"                                \
+       );                                                                    \
+       _ret;                                                                 \
+ })
+@@ -349,10 +354,10 @@ struct stat {
+                                                                             \
+       asm volatile (                                                        \
+               "syscall\n"                                                   \
+-              : "=a" (_ret), "=r"(_arg4)                                    \
++              : "=a"(_ret)                                                  \
+               : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4),             \
+                 "0"(_num)                                                   \
+-              : "rcx", "r8", "r9", "r11", "memory", "cc"                    \
++              : "rcx", "r11", "memory", "cc"                                \
+       );                                                                    \
+       _ret;                                                                 \
+ })
+@@ -369,10 +374,10 @@ struct stat {
+                                                                             \
+       asm volatile (                                                        \
+               "syscall\n"                                                   \
+-              : "=a" (_ret), "=r"(_arg4), "=r"(_arg5)                       \
++              : "=a"(_ret)                                                  \
+               : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \
+                 "0"(_num)                                                   \
+-              : "rcx", "r9", "r11", "memory", "cc"                          \
++              : "rcx", "r11", "memory", "cc"                                \
+       );                                                                    \
+       _ret;                                                                 \
+ })
+@@ -390,7 +395,7 @@ struct stat {
+                                                                             \
+       asm volatile (                                                        \
+               "syscall\n"                                                   \
+-              : "=a" (_ret), "=r"(_arg4), "=r"(_arg5)                       \
++              : "=a"(_ret)                                                  \
+               : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \
+                 "r"(_arg6), "0"(_num)                                       \
+               : "rcx", "r11", "memory", "cc"                                \
+-- 
+2.35.1
+