(V16SF "TARGET_EVEX512") (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
(V8DF "TARGET_EVEX512") (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
+(define_mode_iterator V48_AVX512VL_4
+ [(V4SF "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")
+ (V4SI "TARGET_AVX512VL") (V4DI "TARGET_AVX512VL")])
+
+(define_mode_iterator VI48_AVX512VL_4
+ [(V4SI "TARGET_AVX512VL") (V4DI "TARGET_AVX512VL")])
+
+(define_mode_iterator V8_AVX512VL_2
+ [(V2DF "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
+
(define_mode_iterator VFH_AVX10_2
[(V32HF "TARGET_AVX10_2") V16HF V8HF
(V16SF "TARGET_AVX10_2") V8SF V4SF
(unspec:<V48H_AVX512VL:avx512fmaskmode>
[(match_operand:V48H_AVX512VL 1 "nonimmediate_operand" "v")
(match_operand:V48H_AVX512VL 2 "nonimmediate_operand" "vm")
- (match_operand:SI 3 "const_0_to_7_operand" "n")]
+ (match_operand:SI 3 "<cmp_imm_predicate>" "n")]
UNSPEC_PCMP)))]
"TARGET_AVX512F
&& (!VALID_MASK_AVX512BW_MODE (<SWI248x:MODE>mode) || TARGET_AVX512BW)
(unspec:<V48H_AVX512VL:avx512fmaskmode>
[(match_operand:V48H_AVX512VL 1 "nonimmediate_operand")
(match_operand:V48H_AVX512VL 2 "nonimmediate_operand")
- (match_operand:SI 3 "const_0_to_7_operand")]
+ (match_operand:SI 3 "<cmp_imm_predicate>")]
UNSPEC_PCMP)))
(set (match_operand:<V48H_AVX512VL:avx512fmaskmode> 4 "register_operand")
(unspec:<V48H_AVX512VL:avx512fmaskmode>
(match_operand:V48H_AVX512VL 2 "nonimmediate_operand")
(match_operand:SI 3 "<cmp_imm_predicate>" "n")]
UNSPEC_PCMP)))]
- "TARGET_AVX512F && ix86_pre_reload_split ()"
+ "TARGET_AVX512F && GET_MODE_NUNITS (<MODE>mode) >= 8
+ && ix86_pre_reload_split ()"
"#"
"&& 1"
[(set (match_dup 0)
UNSPEC_PCMP))]
"operands[4] = GEN_INT (INTVAL (operands[3]) ^ 4);")
+(define_insn "*<avx512>_cmp<mode>3_and15"
+ [(set (match_operand:QI 0 "register_operand" "=k")
+ (and:QI
+ (unspec:QI
+ [(match_operand:V48_AVX512VL_4 1 "nonimmediate_operand" "v")
+ (match_operand:V48_AVX512VL_4 2 "nonimmediate_operand" "vm")
+ (match_operand:SI 3 "<cmp_imm_predicate>" "n")]
+ UNSPEC_PCMP)
+ (const_int 15)))]
+ "TARGET_AVX512F"
+ "v<ssecmpintprefix>cmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "ssecmp")
+ (set_attr "length_immediate" "1")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "*<avx512>_ucmp<mode>3_and15"
+ [(set (match_operand:QI 0 "register_operand" "=k")
+ (and:QI
+ (unspec:QI
+ [(match_operand:VI48_AVX512VL_4 1 "nonimmediate_operand" "v")
+ (match_operand:VI48_AVX512VL_4 2 "nonimmediate_operand" "vm")
+ (match_operand:SI 3 "const_0_to_7_operand" "n")]
+ UNSPEC_UNSIGNED_PCMP)
+ (const_int 15)))]
+ "TARGET_AVX512F"
+ "vpcmpu<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "ssecmp")
+ (set_attr "length_immediate" "1")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "*<avx512>_cmp<mode>3_and3"
+ [(set (match_operand:QI 0 "register_operand" "=k")
+ (and:QI
+ (unspec:QI
+ [(match_operand:V8_AVX512VL_2 1 "nonimmediate_operand" "v")
+ (match_operand:V8_AVX512VL_2 2 "nonimmediate_operand" "vm")
+ (match_operand:SI 3 "<cmp_imm_predicate>" "n")]
+ UNSPEC_PCMP)
+ (const_int 3)))]
+ "TARGET_AVX512F"
+ "v<ssecmpintprefix>cmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "ssecmp")
+ (set_attr "length_immediate" "1")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "*avx512vl_ucmpv2di3_and3"
+ [(set (match_operand:QI 0 "register_operand" "=k")
+ (and:QI
+ (unspec:QI
+ [(match_operand:V2DI 1 "nonimmediate_operand" "v")
+ (match_operand:V2DI 2 "nonimmediate_operand" "vm")
+ (match_operand:SI 3 "const_0_to_7_operand" "n")]
+ UNSPEC_UNSIGNED_PCMP)
+ (const_int 3)))]
+ "TARGET_AVX512F"
+ "vpcmpuq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "ssecmp")
+ (set_attr "length_immediate" "1")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "TI")])
+
(define_insn "<avx512>_cmp<mode>3<mask_scalar_merge_name>"
[(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
(unspec:<avx512fmaskmode>
(match_operand:VI48_AVX512VL 2 "nonimmediate_operand")
(match_operand:SI 3 "const_0_to_7_operand")]
UNSPEC_UNSIGNED_PCMP)))]
- "TARGET_AVX512F && ix86_pre_reload_split ()"
+ "TARGET_AVX512F && ix86_pre_reload_split ()
+ && GET_MODE_NUNITS (<MODE>mode) >= 8"
"#"
"&& 1"
[(set (match_dup 0)
/* { dg-do compile } */
/* { dg-options "-mavx512vl -mavx512bw -mavx512dq -O2 -masm=att -mstv -mno-stackrealign" } */
/* { dg-final { scan-assembler-not {not[bwlqd]\]} } } */
-/* { dg-final { scan-assembler-times {(?n)vpcmp[bwdq][ \t]*\$5} 4} } */
-/* { dg-final { scan-assembler-times {(?n)vpcmp[bwdq][ \t]*\$6} 4} } */
+/* { dg-final { scan-assembler-times {(?n)vpcmp[bwdq][ \t]*\$5} 2} } */
+/* { dg-final { scan-assembler-times {(?n)vpcmp[bwdq][ \t]*\$6} 3} } */
/* { dg-final { scan-assembler-times {(?n)vpcmp[bwdq][ \t]*\$[37]} 4} } */
-/* { dg-final { scan-assembler-times {(?n)vcmpp[sd][ \t]*\$5} 2} } */
-/* { dg-final { scan-assembler-times {(?n)vcmpp[sd][ \t]*\$6} 2} } */
+/* { dg-final { scan-assembler-times {(?n)vcmpp[sd][ \t]*\$6} 1} } */
/* { dg-final { scan-assembler-times {(?n)vcmpp[sd][ \t]*\$7} 2} } */
#include<immintrin.h>
FOO (__m128i,, epi8, __mmask16, 128, 1);
FOO (__m128i,, epi16, __mmask8, 128, 1);
-FOO (__m128i,, epi32, __mmask8, 128, 1);
-FOO (__m128i,, epi64, __mmask8, 128, 1);
FOO (__m256i, 256, epi8, __mmask32, 256, 2);
FOO (__m256i, 256, epi16, __mmask16, 256, 2);
FOO (__m256i, 256, epi32, __mmask8, 256, 2);
-FOO (__m256i, 256, epi64, __mmask8, 256, 2);
FOO (__m512i, 512, epi8, __mmask64, 512, 3);
FOO (__m512i, 512, epi16, __mmask32, 512, 3);
FOO (__m512i, 512, epi32, __mmask16, 512, 3);
FOO (__m512i, 512, epi64, __mmask8, 512, 3);
-FOO (__m128,, ps, __mmask8, 128, 1);
-FOO (__m128d,, pd, __mmask8, 128, 1);
FOO (__m256, 256, ps, __mmask8, 256, 2);
-FOO (__m256d, 256, pd, __mmask8, 256, 2);
FOO (__m512, 512, ps, __mmask16, 512, 3);
FOO (__m512d, 512, pd, __mmask8, 512, 3);
avx512f_test (void)
{
m = _mm512_cmpgt_epu64_mask (x, x);
- m = _mm512_mask_cmpgt_epu64_mask (3, x, x);
+ m = _mm512_mask_cmpgt_epu64_mask (5, x, x);
}
--- /dev/null
+/* PR target/103750 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512vl" } */
+/* { dg-final { scan-assembler-not "and" } } */
+
+#include <immintrin.h>
+extern __m128i* pi128;
+extern __m256i* pi256;
+
+extern __m128* ps128;
+extern __m256* ps256;
+
+extern __m128d* pd128;
+extern __m256d* pd256;
+
+extern char a;
+void
+foo ()
+{
+ __mmask8 mask1 = _mm_cmpeq_epu32_mask (pi128[0], pi128[1]);
+ a = mask1 & 15;
+}
+
+void
+foo1 ()
+{
+ __mmask8 mask1 = _mm_cmpeq_epu64_mask (pi128[0], pi128[1]);
+ a = mask1 & 3;
+}
+
+void
+foo2 ()
+{
+ __mmask8 mask1 = _mm256_cmpeq_epu64_mask (pi256[0], pi256[1]);
+ a = mask1 & 15;
+}
+
+void
+sign_foo ()
+{
+ __mmask8 mask1 = _mm_cmpeq_epi32_mask (pi128[0], pi128[1]);
+ a = mask1 & 15;
+}
+
+void
+sign_foo1 ()
+{
+ __mmask8 mask1 = _mm_cmpeq_epi64_mask (pi128[0], pi128[1]);
+ a = mask1 & 3;
+}
+
+
+void
+sign_foo2 ()
+{
+ __mmask8 mask1 = _mm256_cmpeq_epi64_mask (pi256[0], pi256[1]);
+ a = mask1 & 15;
+}
+
+void
+float_foo ()
+{
+ __mmask8 mask1 = _mm_cmp_ps_mask (ps128[0], ps128[1], 1);
+ a = mask1 & 15;
+}
+
+void
+double_foo ()
+{
+ __mmask8 mask1 = _mm_cmp_pd_mask (pd128[0], pd128[1], 1);
+ a = mask1 & 3;
+}
+
+void
+double_foo2 ()
+{
+ __mmask8 mask1 = _mm256_cmp_pd_mask (pd256[0], pd256[1], 1);
+ a = mask1 & 15;
+}
{
m = _mm_cmpeq_epi64_mask (x128, x128);
m = _mm256_cmpeq_epi64_mask (x256, x256);
- m = _mm_mask_cmpeq_epi64_mask (3, x128, x128);
- m = _mm256_mask_cmpeq_epi64_mask (3, x256, x256);
+ m = _mm_mask_cmpeq_epi64_mask (5, x128, x128);
+ m = _mm256_mask_cmpeq_epi64_mask (5, x256, x256);
}
{
m = _mm_cmpeq_epu64_mask (x128, x128);
m = _mm256_cmpeq_epu64_mask (x256, x256);
- m = _mm_mask_cmpeq_epu64_mask (3, x128, x128);
- m = _mm256_mask_cmpeq_epu64_mask (3, x256, x256);
+ m = _mm_mask_cmpeq_epu64_mask (5, x128, x128);
+ m = _mm256_mask_cmpeq_epu64_mask (5, x256, x256);
}
{
m = _mm_cmpge_epi64_mask (x128, x128);
m = _mm256_cmpge_epi64_mask (x256, x256);
- m = _mm_mask_cmpge_epi64_mask (3, x128, x128);
- m = _mm256_mask_cmpge_epi64_mask (3, x256, x256);
+ m = _mm_mask_cmpge_epi64_mask (5, x128, x128);
+ m = _mm256_mask_cmpge_epi64_mask (5, x256, x256);
}
{
m = _mm_cmpge_epu64_mask (x128, x128);
m = _mm256_cmpge_epu64_mask (x256, x256);
- m = _mm_mask_cmpge_epu64_mask (3, x128, x128);
- m = _mm256_mask_cmpge_epu64_mask (3, x256, x256);
+ m = _mm_mask_cmpge_epu64_mask (5, x128, x128);
+ m = _mm256_mask_cmpge_epu64_mask (5, x256, x256);
}
{
m = _mm_cmpgt_epi64_mask (x128, x128);
m = _mm256_cmpgt_epi64_mask (x256, x256);
- m = _mm_mask_cmpgt_epi64_mask (3, x128, x128);
- m = _mm256_mask_cmpgt_epi64_mask (3, x256, x256);
+ m = _mm_mask_cmpgt_epi64_mask (5, x128, x128);
+ m = _mm256_mask_cmpgt_epi64_mask (5, x256, x256);
}
{
m = _mm_cmpgt_epu64_mask (x128, x128);
m = _mm256_cmpgt_epu64_mask (x256, x256);
- m = _mm_mask_cmpgt_epu64_mask (3, x128, x128);
- m = _mm256_mask_cmpgt_epu64_mask (3, x256, x256);
+ m = _mm_mask_cmpgt_epu64_mask (5, x128, x128);
+ m = _mm256_mask_cmpgt_epu64_mask (5, x256, x256);
}
{
m = _mm_cmple_epi64_mask (x128, x128);
m = _mm256_cmple_epi64_mask (x256, x256);
- m = _mm_mask_cmple_epi64_mask (3, x128, x128);
- m = _mm256_mask_cmple_epi64_mask (3, x256, x256);
+ m = _mm_mask_cmple_epi64_mask (5, x128, x128);
+ m = _mm256_mask_cmple_epi64_mask (5, x256, x256);
}
{
m = _mm_cmple_epu64_mask (x128, x128);
m = _mm256_cmple_epu64_mask (x256, x256);
- m = _mm_mask_cmple_epu64_mask (3, x128, x128);
- m = _mm256_mask_cmple_epu64_mask (3, x256, x256);
+ m = _mm_mask_cmple_epu64_mask (5, x128, x128);
+ m = _mm256_mask_cmple_epu64_mask (5, x256, x256);
}
{
m = _mm_cmplt_epi64_mask (x128, x128);
m = _mm256_cmplt_epi64_mask (x256, x256);
- m = _mm_mask_cmplt_epi64_mask (3, x128, x128);
- m = _mm256_mask_cmplt_epi64_mask (3, x256, x256);
+ m = _mm_mask_cmplt_epi64_mask (5, x128, x128);
+ m = _mm256_mask_cmplt_epi64_mask (5, x256, x256);
}
{
m = _mm_cmplt_epu64_mask (x128, x128);
m = _mm256_cmplt_epu64_mask (x256, x256);
- m = _mm_mask_cmplt_epu64_mask (3, x128, x128);
- m = _mm256_mask_cmplt_epu64_mask (3, x256, x256);
+ m = _mm_mask_cmplt_epu64_mask (5, x128, x128);
+ m = _mm256_mask_cmplt_epu64_mask (5, x256, x256);
}
{
m = _mm_cmpneq_epi64_mask (x128, x128);
m = _mm256_cmpneq_epi64_mask (x256, x256);
- m = _mm_mask_cmpneq_epi64_mask (3, x128, x128);
- m = _mm256_mask_cmpneq_epi64_mask (3, x256, x256);
+ m = _mm_mask_cmpneq_epi64_mask (5, x128, x128);
+ m = _mm256_mask_cmpneq_epi64_mask (5, x256, x256);
}
{
m = _mm_cmpneq_epu64_mask (x128, x128);
m = _mm256_cmpneq_epu64_mask (x256, x256);
- m = _mm_mask_cmpneq_epu64_mask (3, x128, x128);
- m = _mm256_mask_cmpneq_epu64_mask (3, x256, x256);
+ m = _mm_mask_cmpneq_epu64_mask (5, x128, x128);
+ m = _mm256_mask_cmpneq_epu64_mask (5, x256, x256);
}