Some constants can be built up using LI+RORI instructions.
The current implementation requires one of the upper 32-bits
to be a zero bit, which is not neccesary.
Let's drop this requirement in order to be able to synthesize
a constant like 0xffffffff00ffffffL.
The tests for LI+RORI are made more strict to detect regression
in the calculation of the LI constant and the rotation amount.
Signed-off-by: Christoph Müllner <christoph.muellner@vrull.eu>
gcc/ChangeLog:
* config/riscv/riscv.cc (riscv_build_integer_1): Don't
require one zero bit in the upper 32 bits for LI+RORI synthesis.
gcc/testsuite/ChangeLog:
* gcc.target/riscv/xtheadbb-li-rotr.c: New tests.
* gcc.target/riscv/zbb-li-rotr.c: Likewise.
int trailing_ones = ctz_hwi (~value);
/* If all bits are one except a few that are zero, and the zero bits
- are within a range of 11 bits, and at least one of the upper 32-bits
- is a zero, then we can generate a constant by loading a small
- negative constant and rotating. */
- if (leading_ones < 32
+ are within a range of 11 bits, then we can synthesize a constant
+ by loading a small negative constant and rotating. */
+ if (leading_ones < 64
&& ((64 - leading_ones - trailing_ones) < 12))
{
codes[0].code = UNKNOWN;
/* { dg-do compile } */
/* { dg-options "-march=rv64gc_xtheadbb" } */
+/* { dg-skip-if "" { *-*-* } {"-O0" "-Os" "-Og" "-Oz" "-flto" } } */
+/* { dg-final { check-function-bodies "**" "" } } */
+/*
+**li_th_srri_1:
+** li a[0-9]+,-18
+** th.srri a[0-9]+,a[0-9]+,21
+** ret
+*/
long
-li_rori (void)
+li_th_srri_1 (void)
{
return 0xffff77ffffffffffL;
}
+/*
+**li_th_srri_2:
+** li a[0-9]+,-18
+** th.srri a[0-9]+,a[0-9]+,5
+** ret
+*/
long
-li_rori_2 (void)
+li_th_srri_2 (void)
{
return 0x77ffffffffffffffL;
}
+/*
+**li_th_srri_3:
+** li a[0-9]+,-18
+** th.srri a[0-9]+,a[0-9]+,36
+** ret
+*/
long
-li_rori_3 (void)
+li_th_srri_3 (void)
{
return 0xfffffffeefffffffL;
}
+/*
+**li_th_srri_4:
+** li a[0-9]+,-86
+** th.srri a[0-9]+,a[0-9]+,3
+** ret
+*/
long
-li_rori_4 (void)
+li_th_srri_4 (void)
{
return 0x5ffffffffffffff5L;
}
+/*
+**li_th_srri_5:
+** li a[0-9]+,-86
+** th.srri a[0-9]+,a[0-9]+,4
+** ret
+*/
long
-li_rori_5 (void)
+li_th_srri_5 (void)
{
return 0xaffffffffffffffaL;
}
-/* { dg-final { scan-assembler-times "th.srri\t" 5 } } */
+/*
+**li_th_srri_6:
+** li a[0-9]+,-256
+** th.srri a[0-9]+,a[0-9]+,40
+** ret
+*/
+long
+li_th_srri_6 (void)
+{
+ return 0xffffffff00ffffffL;
+}
+
+/*
+**li_th_srri_7:
+** li a[0-9]+,-2048
+** th.srri a[0-9]+,a[0-9]+,16
+** ret
+*/
+long
+li_th_srri_7 (void)
+{
+ return 0xf800ffffffffffffL;
+}
/* { dg-do compile } */
/* { dg-options "-march=rv64gc_zbb -mabi=lp64" } */
+/* { dg-skip-if "" { *-*-* } {"-O0" "-Os" "-Og" "-Oz" "-flto" } } */
+/* { dg-final { check-function-bodies "**" "" } } */
+/*
+**li_rori_1:
+** li a[0-9]+,-18
+** rori a[0-9]+,a[0-9]+,21
+** ret
+*/
long
-li_rori (void)
+li_rori_1 (void)
{
return 0xffff77ffffffffffL;
}
+/*
+**li_rori_2:
+** li a[0-9]+,-18
+** rori a[0-9]+,a[0-9]+,5
+** ret
+*/
long
li_rori_2 (void)
{
return 0x77ffffffffffffffL;
}
+/*
+**li_rori_3:
+** li a[0-9]+,-18
+** rori a[0-9]+,a[0-9]+,36
+** ret
+*/
long
li_rori_3 (void)
{
return 0xfffffffeefffffffL;
}
+/*
+**li_rori_4:
+** li a[0-9]+,-86
+** rori a[0-9]+,a[0-9]+,3
+** ret
+*/
long
li_rori_4 (void)
{
return 0x5ffffffffffffff5L;
}
+/*
+**li_rori_5:
+** li a[0-9]+,-86
+** rori a[0-9]+,a[0-9]+,4
+** ret
+*/
long
li_rori_5 (void)
{
return 0xaffffffffffffffaL;
}
+/*
+**li_rori_6:
+** li a[0-9]+,-256
+** rori a[0-9]+,a[0-9]+,40
+** ret
+*/
+long
+li_rori_6 (void)
+{
+ return 0xffffffff00ffffffL;
+}
-/* { dg-final { scan-assembler-times "rori\t" 5 } } */
+/*
+**li_rori_7:
+** li a[0-9]+,-2048
+** rori a[0-9]+,a[0-9]+,16
+** ret
+*/
+long
+li_rori_7 (void)
+{
+ return 0xf800ffffffffffffL;
+}