DONE;
})
+;; convert (truncate)(~x >> imm) into (truncate)(((u16)-1 - x) >> imm)
+;; because it will result in the 'not' being replaced with a constant load
+;; which allows for better loop optimization.
+;; We limit this to truncations that take the upper half and shift it to the
+;; lower half as we use subhn (patterns that would have generated an shrn
+;; otherwise).
+;; On some implementations the use of subhn also result in better throughput.
+(define_insn_and_split "*shrn_to_subhn_<mode>"
+ [(set (match_operand:<VNARROWQ> 0 "register_operand" "=&w")
+ (truncate:<VNARROWQ>
+ (lshiftrt:VQN
+ (not:VQN (match_operand:VQN 1 "register_operand" "w"))
+ (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_exact_top"))))]
+ "TARGET_SIMD"
+ "#"
+ "&& true"
+ [(const_int 0)]
+{
+ rtx tmp;
+ if (can_create_pseudo_p ())
+ tmp = gen_reg_rtx (<MODE>mode);
+ else
+ tmp = gen_rtx_REG (<MODE>mode, REGNO (operands[0]));
+ emit_move_insn (tmp, CONSTM1_RTX (<MODE>mode));
+ emit_insn (gen_aarch64_subhn<mode>_insn (operands[0], tmp,
+ operands[1], operands[2]));
+ DONE;
+})
+
+
;; pmul.
(define_insn "aarch64_pmul<mode>"
--- /dev/null
+/* This test case checks that replacing a not+shift by a sub -1 works. */
+/* { dg-do compile } */
+/* { dg-additional-options "-O1" } */
+/* { dg-final { scan-assembler-times "\\tsubhn\\t" 6 } } */
+
+#include<arm_neon.h>
+
+uint8x8_t neg_narrow_v8hi(uint16x8_t a) {
+ uint16x8_t b = vmvnq_u16(a);
+ return vshrn_n_u16(b, 8);
+}
+
+uint8x8_t neg_narrow_vsubhn_v8hi(uint16x8_t a) {
+ uint16x8_t ones = vdupq_n_u16(0xffff);
+ return vsubhn_u16(ones, a);
+}
+
+uint16x4_t neg_narrow_v4si(uint32x4_t a) {
+ uint32x4_t b = vmvnq_u32(a);
+ return vshrn_n_u32(b, 16);
+}
+
+uint16x4_t neg_narrow_vsubhn_v4si(uint32x4_t a) {
+ uint32x4_t ones = vdupq_n_u32(0xffffffff);
+ return vsubhn_u32(ones, a);
+}
+
+uint32x2_t neg_narrow_v2di(uint64x2_t a) {
+ uint64x2_t b = ~a;
+ return vshrn_n_u64(b, 32);
+}
+
+uint32x2_t neg_narrow_vsubhn_v2di(uint64x2_t a) {
+ uint64x2_t ones = vdupq_n_u64(0xffffffffffffffff);
+ return vsubhn_u64(ones, a);
+}