From: Juergen Christ Date: Thu, 28 Aug 2025 13:48:31 +0000 (+0200) Subject: s390: Implement clz and ctz for SI mode X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=f50cff9766c5e42e1f5d20fe3e6c135601341663;p=thirdparty%2Fgcc.git s390: Implement clz and ctz for SI mode To properly implement __builtin_ffs for SI mode, implement clz and (for >= z17) ctz for SI mode. Otherwise, gcc falls back to a libcall which causes problems for Linux kernel code. Also adjust the C?Z_DEFINED_VALUE_AT_ZERO macros to return 2. Since the optabs now return exactly the value set by these macros, return value 2 is more appropriate and leads to better code. gcc/ChangeLog: * config/s390/s390.h (CLZ_DEFINED_VALUE_AT_ZERO): Adjust and return 2. (CTZ_DEFINED_VALUE_AT_ZERO): Return 2. * config/s390/s390.md (clzsi2): Implement. (ctzsi2): Implement. gcc/testsuite/ChangeLog: * gcc.dg/vect/pr109011-2.c: Fix expected outcome. * gcc.dg/vect/pr109011-4.c: Fix expected outcome. * gcc.target/s390/ffs-1.c: New test. Signed-off-by: Juergen Christ --- diff --git a/gcc/config/s390/s390.h b/gcc/config/s390/s390.h index 8b04bc9a755..6478be8c7ac 100644 --- a/gcc/config/s390/s390.h +++ b/gcc/config/s390/s390.h @@ -1001,10 +1001,10 @@ do { \ #define FUNCTION_MODE QImode /* Specify the value which is used when clz operand is zero. */ -#define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) ((VALUE) = 64, 1) +#define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) ((VALUE) = GET_MODE_PRECISION (MODE), 2) /* Specify the value which is used when ctz operand is zero. */ -#define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) ((VALUE) = 64, 1) +#define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) ((VALUE) = 64, 2) /* Machine-specific symbol_ref flags. */ #define SYMBOL_FLAG_ALIGN_SHIFT SYMBOL_FLAG_MACH_DEP_SHIFT diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md index 858387cd85c..06876a5563a 100644 --- a/gcc/config/s390/s390.md +++ b/gcc/config/s390/s390.md @@ -9738,6 +9738,19 @@ "flogr\t%0,%1" [(set_attr "op_type" "RRE")]) +(define_expand "clzsi2" + [(match_operand:SI 0 "register_operand" "") + (match_operand:SI 1 "register_operand" "")] + "TARGET_EXTIMM && TARGET_ZARCH" +{ + rtx extreg = gen_reg_rtx (DImode); + rtx clzreg = gen_reg_rtx (DImode); + emit_insn (gen_zero_extendsidi2 (extreg, operands[1])); + emit_insn (gen_clzdi2 (clzreg, extreg)); + rtx truncreg = gen_lowpart (SImode, clzreg); + emit_insn (gen_addsi3 (operands[0], truncreg, GEN_INT (-32))); + DONE; +}) ;; ;; Count Trailing Zeros. @@ -9750,6 +9763,16 @@ "ctzg\t%0,%1" [(set_attr "op_type" "RRE")]) +(define_expand "ctzsi2" + [(set (match_dup 2) + (zero_extend:DI (match_operand:SI 1 "register_operand" ""))) + (set (match_dup 3) (ctz:DI (match_dup 2))) + (set (match_operand:SI 0 "register_operand" "") (subreg:SI (match_dup 3) 4))] + "TARGET_Z17 && TARGET_64BIT" +{ + operands[2] = gen_reg_rtx (DImode); + operands[3] = gen_reg_rtx (DImode); +}) ;; ;;- Rotate instructions. diff --git a/gcc/testsuite/gcc.dg/vect/pr109011-2.c b/gcc/testsuite/gcc.dg/vect/pr109011-2.c index 4c7e6ad07a4..dc62d01da5d 100644 --- a/gcc/testsuite/gcc.dg/vect/pr109011-2.c +++ b/gcc/testsuite/gcc.dg/vect/pr109011-2.c @@ -31,5 +31,4 @@ baz (int *p, int *q) /* { dg-final { scan-tree-dump-times " = \.CLZ \\\(vect" 3 "optimized" { target { { { { i?86-*-* x86_64-*-* } && avx512cd } && lzcnt } && bmi } } } } */ /* { dg-final { scan-tree-dump-times " = \.CTZ \\\(vect" 3 "optimized" { target powerpc_vsx } } } */ -/* { dg-final { scan-tree-dump-times " = \.CTZ \\\(vect" 2 "optimized" { target s390_vx } } } */ -/* { dg-final { scan-tree-dump-times " = \.POPCOUNT \\\(vect" 1 "optimized" { target s390_vx } } } */ +/* { dg-final { scan-tree-dump-times " = \.CTZ \\\(vect" 3 "optimized" { target s390_vx } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/pr109011-4.c b/gcc/testsuite/gcc.dg/vect/pr109011-4.c index 38b2ab4d511..8440ec73080 100644 --- a/gcc/testsuite/gcc.dg/vect/pr109011-4.c +++ b/gcc/testsuite/gcc.dg/vect/pr109011-4.c @@ -31,5 +31,4 @@ baz (long long *p, long long *q) /* { dg-final { scan-tree-dump-times " = \.CLZ \\\(vect" 3 "optimized" { target { { { { i?86-*-* x86_64-*-* } && avx512cd } && lzcnt } && bmi } } } } */ /* { dg-final { scan-tree-dump-times " = \.CTZ \\\(vect" 3 "optimized" { target powerpc_vsx } } } */ -/* { dg-final { scan-tree-dump-times " = \.CTZ \\\(vect" 2 "optimized" { target s390_vx } } } */ -/* { dg-final { scan-tree-dump-times " = \.POPCOUNT \\\(vect" 1 "optimized" { target s390_vx } } } */ +/* { dg-final { scan-tree-dump-times " = \.CTZ \\\(vect" 3 "optimized" { target s390_vx } } } */ diff --git a/gcc/testsuite/gcc.target/s390/ffs-1.c b/gcc/testsuite/gcc.target/s390/ffs-1.c new file mode 100644 index 00000000000..79774d29dda --- /dev/null +++ b/gcc/testsuite/gcc.target/s390/ffs-1.c @@ -0,0 +1,18 @@ +/* Check that __builtin_ffs does not expand to libcall. This is required by + Linux kernel code since libcalls are not present there. */ +/* { dg-do compile } */ +/* { dg-options "-march=z10" } */ + +long +fool (long x) +{ + return __builtin_ffsl (x); +} + +int +foo (int x) +{ + return __builtin_ffs (x); +} + +/* { dg-final { scan-assembler-not "brasl" } } */