extern enum rtx_code xtensa_shlrd_which_direction (rtx, rtx);
extern bool xtensa_split1_finished_p (void);
extern void xtensa_split_DI_reg_imm (rtx *);
+extern char *xtensa_bswapsi2_output (rtx_insn *, const char *);
#ifdef TREE_CODE
extern void init_cumulative_args (CUMULATIVE_ARGS *, int);
}
+/* Return the asm output string of bswapsi2_internal insn pattern.
+ It does this by scanning backwards for the BB from the specified insn,
+ and if an another bswapsi2_internal is found, it omits the instruction
+ to set SAR to 8. If not found, or if a CALL, JUMP, ASM, or other insn
+ that clobbers SAR is found first, prepend an instruction to set SAR to
+ 8 as usual. */
+
+static int
+xtensa_bswapsi2_output_1 (rtx_insn *insn)
+{
+ int icode;
+ rtx pat;
+ const char *iname;
+
+ /* CALL insn do not preserve SAR.
+ JUMP insn only appear at the end of BB, so they do not need to be
+ considered when scanning backwards. */
+ if (CALL_P (insn))
+ return -1;
+
+ switch (icode = INSN_CODE (insn))
+ {
+ /* rotate insns clobber SAR. */
+ case CODE_FOR_rotlsi3:
+ case CODE_FOR_rotrsi3:
+ return -1;
+ /* simple shift insns clobber SAR if non-immediate shift amounts. */
+ case CODE_FOR_ashlsi3_internal:
+ case CODE_FOR_ashrsi3:
+ case CODE_FOR_lshrsi3:
+ if (! CONST_INT_P (XEXP (SET_SRC (PATTERN (insn)), 1)))
+ return -1;
+ break;
+ /* this insn always set SAR to 8. */
+ case CODE_FOR_bswapsi2_internal:
+ return 1;
+ default:
+ break;
+ }
+
+ /* "*shift_per_byte" and "*shlrd_*" complex shift insns clobber SAR. */
+ if (icode >= CODE_FOR_nothing
+ && (! strcmp (iname = insn_data[icode].name, "*shift_per_byte")
+ || ! strncmp (iname, "*shlrd_", 7)))
+ return -1;
+
+ /* asm statements may also clobber SAR, so they are anything goes. */
+ if (NONJUMP_INSN_P (insn))
+ switch (GET_CODE (pat = PATTERN (insn)))
+ {
+ case SET:
+ return GET_CODE (SET_SRC (pat)) == ASM_OPERANDS ? -1 : 0;
+ case PARALLEL:
+ return (GET_CODE (pat = XVECEXP (pat, 0, 0)) == SET
+ && GET_CODE (SET_SRC (pat)) == ASM_OPERANDS)
+ || GET_CODE (pat) == ASM_OPERANDS
+ || GET_CODE (pat) == ASM_INPUT ? -1 : 0;
+ case ASM_OPERANDS:
+ return -1;
+ default:
+ break;
+ }
+
+ /* All other insns are not interested in SAR. */
+ return 0;
+}
+
+char *
+xtensa_bswapsi2_output (rtx_insn *insn, const char *output)
+{
+ static char result[128];
+ int i;
+
+ strcpy (result, "ssai\t8\n\t");
+ while ((insn = prev_nonnote_nondebug_insn_bb (insn)))
+ if ((i = xtensa_bswapsi2_output_1 (insn)) < 0)
+ break;
+ else if (i > 0)
+ {
+ result[0] = '\0';
+ break;
+ }
+ strcat (result, output);
+
+ return result;
+}
+
+
/* Try to split an integer value into what are suitable for two consecutive
immediate addition instructions, ADDI or ADDMI. */
})
(define_insn "bswapsi2_internal"
- [(set (match_operand:SI 0 "register_operand" "=a,&a")
- (bswap:SI (match_operand:SI 1 "register_operand" "0,r")))
- (clobber (match_scratch:SI 2 "=&a,X"))]
+ [(set (match_operand:SI 0 "register_operand")
+ (bswap:SI (match_operand:SI 1 "register_operand")))
+ (clobber (match_scratch:SI 2))]
"!optimize_debug && optimize > 1 && !optimize_size"
-{
- rtx_insn *prev_insn = prev_nonnote_nondebug_insn (insn);
- const char *init = "ssai\t8\;";
- static char result[128];
- if (prev_insn && NONJUMP_INSN_P (prev_insn))
- {
- rtx x = PATTERN (prev_insn);
- if (GET_CODE (x) == PARALLEL && XVECLEN (x, 0) == 2
- && GET_CODE (XVECEXP (x, 0, 0)) == SET
- && GET_CODE (XVECEXP (x, 0, 1)) == CLOBBER)
- {
- x = XEXP (XVECEXP (x, 0, 0), 1);
- if (GET_CODE (x) == BSWAP && GET_MODE (x) == SImode)
- init = "";
- }
- }
- sprintf (result,
- (which_alternative == 0)
- ? "%s" "srli\t%%2, %%1, 16\;src\t%%2, %%2, %%1\;src\t%%2, %%2, %%2\;src\t%%0, %%1, %%2"
- : "%s" "srli\t%%0, %%1, 16\;src\t%%0, %%0, %%1\;src\t%%0, %%0, %%0\;src\t%%0, %%1, %%0",
- init);
- return result;
-}
- [(set_attr "type" "arith,arith")
- (set_attr "mode" "SI")
- (set_attr "length" "15,15")])
+ {@ [cons: =0, 1, =2; attrs: type, length]
+ [ a, 0, &a; arith, 15] << xtensa_bswapsi2_output (insn, "srli\t%2, %1, 16\;src\t%2, %2, %1\;src\t%2, %2, %2\;src\t%0, %1, %2");
+ [&a, r, X; arith, 15] << xtensa_bswapsi2_output (insn, "srli\t%0, %1, 16\;src\t%0, %0, %1\;src\t%0, %0, %0\;src\t%0, %1, %0");
+ }
+ [(set_attr "mode" "SI")])
(define_expand "bswapdi2"
[(set (match_operand:DI 0 "register_operand" "")
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+extern void foo(void);
+
+void test_0(volatile unsigned int a[], unsigned int b)
+{
+ a[0] = __builtin_bswap32(a[0]);
+ a[1] = a[1] >> 9;
+ a[2] = __builtin_bswap32(a[2]);
+ a[3] = a[3] << b;
+ a[4] = __builtin_bswap32(a[4]);
+ foo();
+ a[5] = __builtin_bswap32(a[5]);
+ a[6] = __builtin_stdc_rotate_left (a[6], 13);
+ a[7] = __builtin_bswap32(a[7]);
+ asm volatile ("# asm volatile");
+ a[8] = __builtin_bswap32(a[8]);
+ a[9] = (a[9] << 9) | (b >> 23);
+ a[10] = __builtin_bswap32(a[10]);
+}
+
+void test_1(volatile unsigned long long a[])
+{
+ a[0] = __builtin_bswap64(a[0]);
+ a[1] = __builtin_bswap64(a[1]);
+}
+
+/* { dg-final { scan-assembler-times "ssai\t8" 7 } } */