--- /dev/null
+From 9366c328518766d896155388726055624716c0af Mon Sep 17 00:00:00 2001
+From: Wilco Dijkstra <wilco.dijkstra@arm.com>
+Date: Tue, 10 Dec 2024 14:22:48 +0000
+Subject: [PATCH] arm: Fix LDRD register overlap [PR117675]
+
+The register indexed variants of LDRD have complex register overlap constraints
+which makes them hard to use without using output_move_double (which can't be
+used for atomics as it doesn't guarantee to emit atomic LDRD/STRD when required).
+Add a new predicate and constraint for plain LDRD/STRD with base or base+imm.
+This blocks register indexing and fixes PR117675.
+
+gcc:
+ PR target/117675
+ * config/arm/arm.cc (arm_ldrd_legitimate_address): New function.
+ * config/arm/arm-protos.h (arm_ldrd_legitimate_address): New prototype.
+ * config/arm/constraints.md: Add new Uo constraint.
+ * config/arm/predicates.md (arm_ldrd_memory_operand): Add new predicate.
+ * config/arm/sync.md (arm_atomic_loaddi2_ldrd): Use
+ arm_ldrd_memory_operand and Uo.
+
+gcc/testsuite:
+ PR target/117675
+ * gcc.target/arm/pr117675.c: Add new test.
+
+(cherry picked from commit 21fbfae2e55e1a153820acc6fbd922e66f67e65b)
+
+Upstream-Status: Backport [https://gcc.gnu.org/bugzilla/show_bug.cgi?id=117675]
+---
+ gcc/config/arm/arm-protos.h | 1 +
+ gcc/config/arm/arm.cc | 24 ++++++++++++++++++++++++
+ gcc/config/arm/constraints.md | 8 +++++++-
+ gcc/config/arm/predicates.md | 4 ++++
+ gcc/config/arm/sync.md | 2 +-
+ gcc/testsuite/gcc.target/arm/pr117675.c | 17 +++++++++++++++++
+ 6 files changed, 54 insertions(+), 2 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.target/arm/pr117675.c
+
+--- a/gcc/config/arm/arm-protos.h
++++ b/gcc/config/arm/arm-protos.h
+@@ -202,6 +202,7 @@ extern rtx arm_load_tp (rtx);
+ extern bool arm_coproc_builtin_available (enum unspecv);
+ extern bool arm_coproc_ldc_stc_legitimate_address (rtx);
+ extern rtx arm_stack_protect_tls_canary_mem (bool);
++extern bool arm_ldrd_legitimate_address (rtx);
+
+
+ #if defined TREE_CODE
+--- a/gcc/config/arm/arm.cc
++++ b/gcc/config/arm/arm.cc
+@@ -34523,6 +34523,30 @@ arm_coproc_ldc_stc_legitimate_address (r
+ return false;
+ }
+
++/* Return true if OP is a valid memory operand for LDRD/STRD without any
++ register overlap restrictions. Allow [base] and [base, imm] for now. */
++bool
++arm_ldrd_legitimate_address (rtx op)
++{
++ if (!MEM_P (op))
++ return false;
++
++ op = XEXP (op, 0);
++ if (REG_P (op))
++ return true;
++
++ if (GET_CODE (op) != PLUS)
++ return false;
++ if (!REG_P (XEXP (op, 0)) || !CONST_INT_P (XEXP (op, 1)))
++ return false;
++
++ HOST_WIDE_INT val = INTVAL (XEXP (op, 1));
++
++ if (TARGET_ARM)
++ return IN_RANGE (val, -255, 255);
++ return IN_RANGE (val, -1020, 1020) && (val & 3) == 0;
++}
++
+ /* Return the diagnostic message string if conversion from FROMTYPE to
+ TOTYPE is not allowed, NULL otherwise. */
+
+--- a/gcc/config/arm/constraints.md
++++ b/gcc/config/arm/constraints.md
+@@ -39,7 +39,7 @@
+ ;; in all states: Pg
+
+ ;; The following memory constraints have been used:
+-;; in ARM/Thumb-2 state: Uh, Ut, Uv, Uy, Un, Um, Us, Up, Uf, Ux, Ul
++;; in ARM/Thumb-2 state: Uh, Ut, Uv, Uy, Un, Um, Us, Uo, Up, Uf, Ux, Ul, Uz
+ ;; in ARM state: Uq
+ ;; in Thumb state: Uu, Uw
+ ;; in all states: Q
+@@ -585,6 +585,12 @@
+ (and (match_code "mem")
+ (match_test "arm_coproc_ldc_stc_legitimate_address (op)")))
+
++(define_memory_constraint "Uo"
++ "@internal
++ A memory operand for Arm/Thumb-2 LDRD/STRD"
++ (and (match_code "mem")
++ (match_test "arm_ldrd_legitimate_address (op)")))
++
+ ;; We used to have constraint letters for S and R in ARM state, but
+ ;; all uses of these now appear to have been removed.
+
+--- a/gcc/config/arm/predicates.md
++++ b/gcc/config/arm/predicates.md
+@@ -849,6 +849,10 @@
+ (and (match_operand 0 "memory_operand")
+ (match_code "reg" "0")))
+
++;; True if the operand is memory reference suitable for a ldrd/strd.
++(define_predicate "arm_ldrd_memory_operand"
++ (match_test "arm_ldrd_legitimate_address (op)"))
++
+ ;; Predicates for parallel expanders based on mode.
+ (define_special_predicate "vect_par_constant_high"
+ (match_code "parallel")
+--- a/gcc/config/arm/sync.md
++++ b/gcc/config/arm/sync.md
+@@ -161,7 +161,7 @@
+ (define_insn "arm_atomic_loaddi2_ldrd"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (unspec_volatile:DI
+- [(match_operand:DI 1 "memory_operand" "m")]
++ [(match_operand:DI 1 "arm_ldrd_memory_operand" "Uo")]
+ VUNSPEC_LDRD_ATOMIC))]
+ "ARM_DOUBLEWORD_ALIGN && TARGET_HAVE_LPAE"
+ "ldrd\t%0, %H0, %1"
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/arm/pr117675.c
+@@ -0,0 +1,17 @@
++/* { dg-do compile } */
++/* { dg-options "-O2 -marm" } */
++/* { dg-require-effective-target arm_arch_v7ve_neon_ok } */
++/* { dg-add-options arm_arch_v7ve_neon } */
++/* { dg-final { check-function-bodies "**" "" "" } } */
++
++/*
++** f1:
++** add r0, r0, r1
++** ldrd r0, r1, \[r0\]
++** bx lr
++*/
++long long f1 (char *p, int i)
++{
++ return __atomic_load_n ((long long *)(p + i), __ATOMIC_RELAXED);
++}
++