(define_insn "sse4_1_phminposuw"
[(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,x")
- (unspec:V8HI [(match_operand:V8HI 1 "vector_operand" "YrBm,*xBm,xm")]
+ (unspec:V8HI [(match_operand:V8HI 1 "vector_operand" "Yrja,*xja,xjm")]
UNSPEC_PHMINPOSUW))]
"TARGET_SSE4_1"
"%vphminposuw\t{%1, %0|%0, %1}"
[(set_attr "isa" "noavx,noavx,avx")
+ (set_attr "gpr32" "0")
(set_attr "type" "sselog1")
(set_attr "prefix_extra" "1")
(set_attr "prefix" "orig,orig,vex")
(define_insn "*<sse4_1>_ptest<mode>"
[(set (reg FLAGS_REG)
(unspec [(match_operand:V_AVX 0 "register_operand" "Yr, *x, x")
- (match_operand:V_AVX 1 "vector_operand" "YrBm, *xBm, xm")]
+ (match_operand:V_AVX 1 "vector_operand" "Yrja, *xja, xjm")]
UNSPEC_PTEST))]
"TARGET_SSE4_1 && ix86_match_ptest_ccmode (insn)"
"%vptest\t{%1, %0|%0, %1}"
[(set_attr "isa" "noavx,noavx,avx")
(set_attr "type" "ssecomi")
+ (set_attr "gpr32" "0")
(set_attr "prefix_extra" "1")
(set_attr "prefix" "orig,orig,vex")
(set (attr "btver2_decode")
(define_insn "ptesttf2"
[(set (reg:CC FLAGS_REG)
(unspec:CC [(match_operand:TF 0 "register_operand" "Yr, *x, x")
- (match_operand:TF 1 "vector_operand" "YrBm, *xBm, xm")]
+ (match_operand:TF 1 "vector_operand" "Yrja, *xja, xjm")]
UNSPEC_PTEST))]
"TARGET_SSE4_1"
"%vptest\t{%1, %0|%0, %1}"
[(set_attr "isa" "noavx,noavx,avx")
(set_attr "type" "ssecomi")
+ (set_attr "gpr32" "0")
(set_attr "prefix_extra" "1")
(set_attr "prefix" "orig,orig,vex")
(set_attr "mode" "TI")])
(define_insn "<sse4_1>_round<ssemodesuffix><avxsizesuffix>"
[(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
(unspec:VF_128_256
- [(match_operand:VF_128_256 1 "vector_operand" "YrBm,*xBm,xm")
+ [(match_operand:VF_128_256 1 "vector_operand" "Yrja,*xja,xjm")
(match_operand:SI 2 "const_0_to_15_operand")]
UNSPEC_ROUND))]
"TARGET_SSE4_1"
"%vround<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "isa" "noavx,noavx,avx")
(set_attr "type" "ssecvt")
+ (set_attr "gpr32" "0")
(set_attr "prefix_data16" "1,1,*")
(set_attr "prefix_extra" "1")
(set_attr "length_immediate" "1")
[(set (match_operand:VF_128 0 "register_operand" "=Yr,*x,x,v")
(vec_merge:VF_128
(unspec:VF_128
- [(match_operand:VF_128 2 "nonimmediate_operand" "Yrm,*xm,xm,vm")
+ [(match_operand:VF_128 2 "nonimmediate_operand" "Yrjm,*xjm,xjm,vm")
(match_operand:SI 3 "const_0_to_15_operand")]
UNSPEC_ROUND)
(match_operand:VF_128 1 "register_operand" "0,0,x,v")
(const_int 1)))]
"TARGET_SSE4_1"
- "@
- round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %<iptr>2, %3}
- round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %<iptr>2, %3}
- vround<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %3}
- vrndscale<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %3}"
- [(set_attr "isa" "noavx,noavx,avx,avx512f")
+{
+ switch (which_alternative)
+ {
+ case 0:
+ case 1:
+ return "round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %<iptr>2, %3}";
+ case 2:
+ return "vround<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %3}";
+ case 3:
+ if (x86_evex_reg_mentioned_p (operands, 3))
+ return "vrndscale<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %3}";
+ else
+ return "vround<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %3}";
+ default:
+ gcc_unreachable ();
+ }
+}
+ [(set_attr "isa" "noavx,noavx,noavx512f,avx512f")
(set_attr "type" "ssecvt")
+ (set_attr "gpr32" "0,0,0,1")
(set_attr "length_immediate" "1")
(set_attr "prefix_data16" "1,1,*,*")
(set_attr "prefix_extra" "1")
(vec_merge:VFH_128
(vec_duplicate:VFH_128
(unspec:<ssescalarmode>
- [(match_operand:<ssescalarmode> 2 "nonimmediate_operand" "Yrm,*xm,xm,vm")
+ [(match_operand:<ssescalarmode> 2 "nonimmediate_operand" "Yrjm,*xjm,xjm,vm")
(match_operand:SI 3 "const_0_to_15_operand")]
UNSPEC_ROUND))
(match_operand:VFH_128 1 "register_operand" "0,0,x,v")
(const_int 1)))]
"TARGET_SSE4_1"
- "@
- round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
- round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
- vround<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
- vrndscale<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
- [(set_attr "isa" "noavx,noavx,avx,avx512f")
+{
+ switch (which_alternative)
+ {
+ case 0:
+ case 1:
+ return "round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}";
+ case 2:
+ return "vround<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
+ case 3:
+ if (x86_evex_reg_mentioned_p (operands, 3) || <MODE>mode == V8HFmode)
+ return "vrndscale<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
+ else
+ return "vround<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
+ default:
+ gcc_unreachable ();
+ }
+}
+ [(set_attr "isa" "noavx,noavx,noavx512f,avx512f")
(set_attr "type" "ssecvt")
+ (set_attr "gpr32" "0,0,0,1")
(set_attr "length_immediate" "1")
(set_attr "prefix_data16" "1,1,*,*")
(set_attr "prefix_extra" "1")
(unspec:SI
[(match_operand:V16QI 1 "register_operand" "x,x")
(match_operand:SI 2 "register_operand" "a,a")
- (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
+ (match_operand:V16QI 3 "nonimmediate_operand" "x,jm")
(match_operand:SI 4 "register_operand" "d,d")
(match_operand:SI 5 "const_0_to_255_operand")]
UNSPEC_PCMPESTR))
"TARGET_SSE4_2"
"%vpcmpestri\t{%5, %3, %1|%1, %3, %5}"
[(set_attr "type" "sselog")
+ (set_attr "gpr32" "0")
(set_attr "prefix_extra" "1")
(set_attr "prefix" "maybe_vex")
(set_attr "length_immediate" "1")
(unspec:V16QI
[(match_operand:V16QI 1 "register_operand" "x,x")
(match_operand:SI 2 "register_operand" "a,a")
- (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
+ (match_operand:V16QI 3 "nonimmediate_operand" "x,jm")
(match_operand:SI 4 "register_operand" "d,d")
(match_operand:SI 5 "const_0_to_255_operand")]
UNSPEC_PCMPESTR))
"TARGET_SSE4_2"
"%vpcmpestrm\t{%5, %3, %1|%1, %3, %5}"
[(set_attr "type" "sselog")
+ (set_attr "gpr32" "0")
(set_attr "prefix_extra" "1")
(set_attr "length_immediate" "1")
(set_attr "prefix" "maybe_vex")
(unspec:CC
[(match_operand:V16QI 2 "register_operand" "x,x,x,x")
(match_operand:SI 3 "register_operand" "a,a,a,a")
- (match_operand:V16QI 4 "nonimmediate_operand" "x,m,x,m")
+ (match_operand:V16QI 4 "nonimmediate_operand" "x,jm,x,jm")
(match_operand:SI 5 "register_operand" "d,d,d,d")
(match_operand:SI 6 "const_0_to_255_operand")]
UNSPEC_PCMPESTR))
%vpcmpestri\t{%6, %4, %2|%2, %4, %6}
%vpcmpestri\t{%6, %4, %2|%2, %4, %6}"
[(set_attr "type" "sselog")
+ (set_attr "gpr32" "0")
(set_attr "prefix_extra" "1")
(set_attr "length_immediate" "1")
(set_attr "memory" "none,load,none,load")
[(set (match_operand:SI 0 "register_operand" "=c,c")
(unspec:SI
[(match_operand:V16QI 2 "register_operand" "x,x")
- (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
+ (match_operand:V16QI 3 "nonimmediate_operand" "x,jm")
(match_operand:SI 4 "const_0_to_255_operand")]
UNSPEC_PCMPISTR))
(set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
DONE;
}
[(set_attr "type" "sselog")
+ (set_attr "gpr32" "0")
(set_attr "prefix_extra" "1")
(set_attr "length_immediate" "1")
(set_attr "memory" "none,load")
[(set (match_operand:SI 0 "register_operand" "=c,c")
(unspec:SI
[(match_operand:V16QI 1 "register_operand" "x,x")
- (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
+ (match_operand:V16QI 2 "nonimmediate_operand" "x,jm")
(match_operand:SI 3 "const_0_to_255_operand")]
UNSPEC_PCMPISTR))
(set (reg:CC FLAGS_REG)
"TARGET_SSE4_2"
"%vpcmpistri\t{%3, %2, %1|%1, %2, %3}"
[(set_attr "type" "sselog")
+ (set_attr "gpr32" "0")
(set_attr "prefix_extra" "1")
(set_attr "length_immediate" "1")
(set_attr "prefix" "maybe_vex")
[(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
(unspec:V16QI
[(match_operand:V16QI 1 "register_operand" "x,x")
- (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
+ (match_operand:V16QI 2 "nonimmediate_operand" "x,jm")
(match_operand:SI 3 "const_0_to_255_operand")]
UNSPEC_PCMPISTR))
(set (reg:CC FLAGS_REG)
"TARGET_SSE4_2"
"%vpcmpistrm\t{%3, %2, %1|%1, %2, %3}"
[(set_attr "type" "sselog")
+ (set_attr "gpr32" "0")
(set_attr "prefix_extra" "1")
(set_attr "length_immediate" "1")
(set_attr "prefix" "maybe_vex")
[(set (reg:CC FLAGS_REG)
(unspec:CC
[(match_operand:V16QI 2 "register_operand" "x,x,x,x")
- (match_operand:V16QI 3 "nonimmediate_operand" "x,m,x,m")
+ (match_operand:V16QI 3 "nonimmediate_operand" "x,jm,x,jm")
(match_operand:SI 4 "const_0_to_255_operand")]
UNSPEC_PCMPISTR))
(clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
%vpcmpistri\t{%4, %3, %2|%2, %3, %4}
%vpcmpistri\t{%4, %3, %2|%2, %3, %4}"
[(set_attr "type" "sselog")
+ (set_attr "gpr32" "0")
(set_attr "prefix_extra" "1")
(set_attr "length_immediate" "1")
(set_attr "memory" "none,load,none,load")
(define_insn "aesimc"
[(set (match_operand:V2DI 0 "register_operand" "=x")
- (unspec:V2DI [(match_operand:V2DI 1 "vector_operand" "xBm")]
+ (unspec:V2DI [(match_operand:V2DI 1 "vector_operand" "xja")]
UNSPEC_AESIMC))]
"TARGET_AES"
"%vaesimc\t{%1, %0|%0, %1}"
[(set_attr "type" "sselog1")
+ (set_attr "gpr32" "0")
(set_attr "prefix_extra" "1")
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "TI")])
(define_insn "aeskeygenassist"
[(set (match_operand:V2DI 0 "register_operand" "=x")
- (unspec:V2DI [(match_operand:V2DI 1 "vector_operand" "xBm")
+ (unspec:V2DI [(match_operand:V2DI 1 "vector_operand" "xja")
(match_operand:SI 2 "const_0_to_255_operand")]
UNSPEC_AESKEYGENASSIST))]
"TARGET_AES"
"%vaeskeygenassist\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "type" "sselog1")
+ (set_attr "gpr32" "0")
(set_attr "prefix_extra" "1")
(set_attr "length_immediate" "1")
(set_attr "prefix" "maybe_vex")
DTYPE a[16];
} tmp_u;
-__attribute__((target("sse4.2")))
+__attribute__((target("sse4.2,aes")))
void sse_test ()
{
register tmp_u *tdst __asm__("%r16");
register tmp_u *src1 __asm__("%r17");
register tmp_u *src2 __asm__("%r18");
-
+
+ src1->xi[0] = _mm_minpos_epu16 (src1->xi[1]);
+ src1->a[2] = _mm_testc_si128 (src1->xi[3], src2->xi[4]);
+ src1->xf[3] = _mm_round_ss (src1->xf[5], src2->xf[6],
+ _MM_FROUND_CUR_DIRECTION);
+ src1->xf[4] = _mm_round_ps (src1->xf[7], _MM_FROUND_CUR_DIRECTION);
+ src1->xd[0] = _mm_round_sd (src1->xd[2], src2->xd[3],
+ _MM_FROUND_CUR_DIRECTION);
+ src1->xd[1] = _mm_round_pd (src1->xd[4], _MM_FROUND_CUR_DIRECTION);
+
src1->xi[0] = _mm_hadd_epi16 (tdst->xi[2], src2->xi[3]);
src1->xi[1] = _mm_hadd_epi32 (tdst->xi[0], src2->xi[1]);
tdst->xi[2] = _mm_hadds_epi16 (src1->xi[4], src2->xi[5]);
tdst->xi[1] = _mm_sign_epi8 (src1->xi[5], src2->xi[6]);
tdst->xi[2] = _mm_sign_epi16 (src1->xi[7], src2->xi[0]);
tdst->xi[3] = _mm_sign_epi32 (src1->xi[1], src2->xi[2]);
+
+ tdst->a[2] = _mm_cmpestri (src1->xi[3], 16, src2->xi[4], 16, 0x0c);
+ tdst->xi[4] = _mm_cmpestrm (src1->xi[3], 16, src2->xi[4], 16, 0x20);
+ tdst->a[5] = _mm_cmpistri (src1->xi[5], src2->xi[6], 0x30);
+ tdst->xi[6] = _mm_cmpistrm (src1->xi[5], src2->xi[6], 0x40);
+
+ tdst->xi[7] = _mm_aesimc_si128 (src1->xi[7]);
+ tdst->xi[0] = _mm_aeskeygenassist_si128 (src1->xi[1], 0x1b);
}
-__attribute__((target("avx2")))
+__attribute__((target("avx2,aes")))
void vex_test ()
{
register tmp_u *tdst __asm__("%r16");
register tmp_u *src1 __asm__("%r17");
register tmp_u *src2 __asm__("%r18");
-
+
+ src1->xi[0] = _mm_minpos_epu16 (src1->xi[1]);
+ src1->a[2] = _mm256_testc_si256 (src1->yi[2], src2->yi[3]);
+ src1->xf[3] = _mm_round_ss (src1->xf[5], src2->xf[6],
+ _MM_FROUND_CUR_DIRECTION);
+ src1->yf[4] = _mm256_round_ps (src1->yf[2], _MM_FROUND_CUR_DIRECTION);
+ src1->xd[0] = _mm_round_sd (src1->xd[2], src2->xd[3],
+ _MM_FROUND_CUR_DIRECTION);
+ src1->yd[1] = _mm256_round_pd (src1->yd[3], _MM_FROUND_CUR_DIRECTION);
+
src1->yi[1] = _mm256_hadd_epi16 (tdst->yi[2], src2->yi[3]);
src1->yi[2] = _mm256_hadd_epi32 (tdst->yi[0], src2->yi[1]);
tdst->yi[3] = _mm256_hadds_epi16 (src1->yi[1], src2->yi[2]);
src1->yi[1] = _mm256_cmpgt_epi64 (tdst->yi[3], src2->yi[0]);
tdst->yf[2] = _mm256_dp_ps (src1->yf[0], src2->yf[1], 0xbf);
- tdst->xd[3] = _mm_dp_pd (src1->xd[0], src2->xd[1], 0xbf);
tdst->yi[3] = _mm256_mpsadbw_epu8 (src1->yi[1], src2->yi[1], 0xc1);
tdst->yi[2] = _mm256_sign_epi8 (src1->yi[0], src2->yi[1]);
tdst->yi[3] = _mm256_sign_epi16 (src1->yi[2], src2->yi[3]);
tdst->yi[0] = _mm256_sign_epi32 (src1->yi[0], src2->yi[1]);
+
+ tdst->a[2] = _mm_cmpestri (src1->xi[3], 16, src2->xi[4], 16, 0x0c);
+ tdst->xi[4] = _mm_cmpestrm (src1->xi[3], 16, src2->xi[4], 16, 0x20);
+ tdst->a[5] = _mm_cmpistri (src1->xi[5], src2->xi[6], 0x30);
+ tdst->xi[6] = _mm_cmpistrm (src1->xi[5], src2->xi[6], 0x40);
+
+ tdst->xi[7] = _mm_aesimc_si128 (src1->xi[7]);
+ tdst->xi[0] = _mm_aeskeygenassist_si128 (src1->xi[1], 0x1b);
}
/* { dg-final { scan-assembler-not "v?pcmpeqq\[ \\t]+\\\.\\\*r\(1\[6-9\]\|2\[0-9\]|30\|31\)" } } */
/* { dg-final { scan-assembler-not "v?psignb\[ \\t]+\\\.\\\*r\(1\[6-9\]\|2\[0-9\]|30\|31\)" } } */
/* { dg-final { scan-assembler-not "v?psignw\[ \\t]+\\\.\\\*r\(1\[6-9\]\|2\[0-9\]|30\|31\)" } } */
/* { dg-final { scan-assembler-not "v?psignd\[ \\t]+\\\.\\\*r\(1\[6-9\]\|2\[0-9\]|30\|31\)" } } */
+/* { dg-final { scan-assembler-not "v?phminposuw\[ \\t]+\\\.\\\*r\(1\[6-9\]\|2\[0-9\]|30\|31\)" } } */
+/* { dg-final { scan-assembler-not "v?ptest\[ \\t]+\\\.\\\*r\(1\[6-9\]\|2\[0-9\]|30\|31\)" } } */
+/* { dg-final { scan-assembler-not "v?roundss\[ \\t]+\\\.\\\*r\(1\[6-9\]\|2\[0-9\]|30\|31\)" } } */
+/* { dg-final { scan-assembler-not "v?roundsd\[ \\t]+\\\.\\\*r\(1\[6-9\]\|2\[0-9\]|30\|31\)" } } */
+/* { dg-final { scan-assembler-not "v?roundps\[ \\t]+\\\.\\\*r\(1\[6-9\]\|2\[0-9\]|30\|31\)" } } */
+/* { dg-final { scan-assembler-not "v?roundpd\[ \\t]+\\\.\\\*r\(1\[6-9\]\|2\[0-9\]|30\|31\)" } } */
+/* { dg-final { scan-assembler-not "v?pcmpestri\[ \\t]+\\\.\\\*r\(1\[6-9\]\|2\[0-9\]|30\|31\)" } } */
+/* { dg-final { scan-assembler-not "v?pcmpistri\[ \\t]+\\\.\\\*r\(1\[6-9\]\|2\[0-9\]|30\|31\)" } } */
+/* { dg-final { scan-assembler-not "v?pcmpestrm\[ \\t]+\\\.\\\*r\(1\[6-9\]\|2\[0-9\]|30\|31\)" } } */
+/* { dg-final { scan-assembler-not "v?pcmpistrm\[ \\t]+\\\.\\\*r\(1\[6-9\]\|2\[0-9\]|30\|31\)" } } */
+/* { dg-final { scan-assembler-not "v?aesimc\[ \\t]+\\\.\\\*r\(1\[6-9\]\|2\[0-9\]|30\|31\)" } } */
+/* { dg-final { scan-assembler-not "v?aeskeygenassist\[ \\t]+\\\.\\\*r\(1\[6-9\]\|2\[0-9\]|30\|31\)" } } */