From a50b4406e5d64adb6bb0ebde3710f6742a0bdbed Mon Sep 17 00:00:00 2001 From: Kishan Parmar Date: Tue, 4 Nov 2025 12:41:28 +0530 Subject: [PATCH] simplify-rtx: Canonicalize SUBREG and LSHIFTRT order for AND operations For a given rtx expression (and (lshiftrt (subreg X) shift) mask) combine pass tries to simplify the RTL form to (and (subreg (lshiftrt X shift)) mask) where the SUBREG wraps the result of the shift. This leaves the AND and the shift in different modes, which complicates recognition. (and (lshiftrt (subreg X) shift) mask) where the SUBREG is inside the shift and both operations share the same mode. This form is easier to recognize across targets and enables cleaner pattern matching. This patch checks in simplify-rtx to perform this transformation when safe: the SUBREG must be a lowpart, the shift amount must be valid, and the precision of the operation must be preserved. Tested on powerpc64le-linux-gnu, powerpc64-linux-gnu, and x86_64-pc-linux-gnu with no regressions. On rs6000, the change reduces insn counts due to improved matching. 2025-11-04 Kishan Parmar gcc/ChangeLog: PR rtl-optimization/93738 * simplify-rtx.cc (simplify_binary_operation_1): Canonicalize SUBREG(LSHIFTRT) into LSHIFTRT(SUBREG) when valid. gcc/testsuite/ChangeLog: PR rtl-optimization/93738 * gcc.target/powerpc/rlwimi-2.c: Update expected rldicl count. --- gcc/simplify-rtx.cc | 40 +++++++++++++++++++++ gcc/testsuite/gcc.target/powerpc/rlwimi-2.c | 2 +- 2 files changed, 41 insertions(+), 1 deletion(-) diff --git a/gcc/simplify-rtx.cc b/gcc/simplify-rtx.cc index 53592d2a2f4d..59a86c6c6cd5 100644 --- a/gcc/simplify-rtx.cc +++ b/gcc/simplify-rtx.cc @@ -4184,6 +4184,46 @@ simplify_context::simplify_binary_operation_1 (rtx_code code, not do an AND. */ if ((nzop0 & ~val1) == 0) return op0; + + /* Canonicalize (and (subreg (lshiftrt X shift)) mask) into + (and (lshiftrt (subreg X) shift) mask). + + Keeps shift and AND in the same mode, improving recognition. + Only applied when subreg is a lowpart, shift is valid, + and no precision is lost. */ + if (SUBREG_P (op0) && subreg_lowpart_p (op0) + && GET_CODE (XEXP (op0, 0)) == LSHIFTRT + && CONST_INT_P (XEXP (XEXP (op0, 0), 1)) + && INTVAL (XEXP (XEXP (op0, 0), 1)) >= 0 + && INTVAL (XEXP (XEXP (op0, 0), 1)) < HOST_BITS_PER_WIDE_INT + && ((INTVAL (XEXP (XEXP (op0, 0), 1)) + + floor_log2 (val1)) + < GET_MODE_PRECISION (as_a (mode)))) + { + tem = XEXP (XEXP (op0, 0), 0); + if (SUBREG_P (tem)) + { + if (subreg_lowpart_p (tem)) + tem = SUBREG_REG (tem); + else + tem = NULL_RTX; + } + if (tem != NULL_RTX) + { + offset = subreg_lowpart_offset (mode, GET_MODE (tem)); + tem = simplify_gen_subreg (mode, tem, GET_MODE (tem), + offset); + if (tem) + { + unsigned shiftamt = INTVAL (XEXP (XEXP (op0, 0), 1)); + rtx shiftamtrtx = gen_int_shift_amount (mode, + shiftamt); + op0 = simplify_gen_binary (LSHIFTRT, mode, tem, + shiftamtrtx); + return simplify_gen_binary (AND, mode, op0, op1); + } + } + } } nzop1 = nonzero_bits (trueop1, mode); /* If we are clearing all the nonzero bits, the result is zero. */ diff --git a/gcc/testsuite/gcc.target/powerpc/rlwimi-2.c b/gcc/testsuite/gcc.target/powerpc/rlwimi-2.c index bafa371db73f..afbde0e5fc60 100644 --- a/gcc/testsuite/gcc.target/powerpc/rlwimi-2.c +++ b/gcc/testsuite/gcc.target/powerpc/rlwimi-2.c @@ -6,7 +6,7 @@ /* { dg-final { scan-assembler-times {(?n)^\s+blr} 6750 } } */ /* { dg-final { scan-assembler-times {(?n)^\s+mr} 643 { target ilp32 } } } */ /* { dg-final { scan-assembler-times {(?n)^\s+mr} 11 { target lp64 } } } */ -/* { dg-final { scan-assembler-times {(?n)^\s+rldicl} 7790 { target lp64 } } } */ +/* { dg-final { scan-assembler-times {(?n)^\s+rldicl} 6754 { target lp64 } } } */ /* { dg-final { scan-assembler-times {(?n)^\s+rlwimi} 1692 { target ilp32 } } } */ /* { dg-final { scan-assembler-times {(?n)^\s+rlwimi} 1666 { target lp64 } } } */ -- 2.47.3