From: Julian Seward Date: Sat, 22 Oct 2011 09:32:16 +0000 (+0000) Subject: VEX side fixes to match r12190, which is a fix for #279698 (incorrect X-Git-Tag: svn/VALGRIND_3_7_0^2~5 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=578a006a4aebbace309554b523f00a0041b34f2b;p=thirdparty%2Fvalgrind.git VEX side fixes to match r12190, which is a fix for #279698 (incorrect Memcheck handling of saturating narrowing operations.) git-svn-id: svn://svn.valgrind.org/vex/trunk@2223 --- diff --git a/VEX/priv/host_amd64_isel.c b/VEX/priv/host_amd64_isel.c index 86c089d1a3..ca8bdb4e64 100644 --- a/VEX/priv/host_amd64_isel.c +++ b/VEX/priv/host_amd64_isel.c @@ -1100,6 +1100,10 @@ static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e ) fn = (HWord)h_generic_calc_QNarrowBin16Sto8Sx8; break; case Iop_QNarrowBin16Sto8Ux8: fn = (HWord)h_generic_calc_QNarrowBin16Sto8Ux8; break; + case Iop_NarrowBin16to8x8: + fn = (HWord)h_generic_calc_NarrowBin16to8x8; break; + case Iop_NarrowBin32to16x4: + fn = (HWord)h_generic_calc_NarrowBin32to16x4; break; case Iop_QSub8Sx8: fn = (HWord)h_generic_calc_QSub8Sx8; break; @@ -3665,6 +3669,12 @@ static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e ) case Iop_QNarrowBin32Sto16Ux8: fn = (HWord)h_generic_calc_QNarrowBin32Sto16Ux8; goto do_SseAssistedBinary; + case Iop_NarrowBin16to8x16: + fn = (HWord)h_generic_calc_NarrowBin16to8x16; + goto do_SseAssistedBinary; + case Iop_NarrowBin32to16x8: + fn = (HWord)h_generic_calc_NarrowBin32to16x8; + goto do_SseAssistedBinary; do_SseAssistedBinary: { /* RRRufff! RRRufff code is what we're generating here. Oh well. */ diff --git a/VEX/priv/host_generic_simd128.c b/VEX/priv/host_generic_simd128.c index e29a2eb554..e9d1437485 100644 --- a/VEX/priv/host_generic_simd128.c +++ b/VEX/priv/host_generic_simd128.c @@ -118,7 +118,19 @@ static inline UShort qnarrow32Sto16U ( UInt xx0 ) return (UShort)xx; } -void h_generic_calc_Mul32x4 ( /*OUT*/V128* res, +static inline UShort narrow32to16 ( UInt xx ) +{ + return (UShort)xx; +} + +static inline UChar narrow16to8 ( UShort xx ) +{ + return (UChar)xx; +} + + +void VEX_REGPARM(3) + h_generic_calc_Mul32x4 ( /*OUT*/V128* res, V128* argL, V128* argR ) { res->w32[0] = mul32(argL->w32[0], argR->w32[0]); @@ -127,7 +139,8 @@ void h_generic_calc_Mul32x4 ( /*OUT*/V128* res, res->w32[3] = mul32(argL->w32[3], argR->w32[3]); } -void h_generic_calc_Max32Sx4 ( /*OUT*/V128* res, +void VEX_REGPARM(3) + h_generic_calc_Max32Sx4 ( /*OUT*/V128* res, V128* argL, V128* argR ) { res->w32[0] = max32S(argL->w32[0], argR->w32[0]); @@ -136,7 +149,8 @@ void h_generic_calc_Max32Sx4 ( /*OUT*/V128* res, res->w32[3] = max32S(argL->w32[3], argR->w32[3]); } -void h_generic_calc_Min32Sx4 ( /*OUT*/V128* res, +void VEX_REGPARM(3) + h_generic_calc_Min32Sx4 ( /*OUT*/V128* res, V128* argL, V128* argR ) { res->w32[0] = min32S(argL->w32[0], argR->w32[0]); @@ -145,7 +159,8 @@ void h_generic_calc_Min32Sx4 ( /*OUT*/V128* res, res->w32[3] = min32S(argL->w32[3], argR->w32[3]); } -void h_generic_calc_Max32Ux4 ( /*OUT*/V128* res, +void VEX_REGPARM(3) + h_generic_calc_Max32Ux4 ( /*OUT*/V128* res, V128* argL, V128* argR ) { res->w32[0] = max32U(argL->w32[0], argR->w32[0]); @@ -154,7 +169,8 @@ void h_generic_calc_Max32Ux4 ( /*OUT*/V128* res, res->w32[3] = max32U(argL->w32[3], argR->w32[3]); } -void h_generic_calc_Min32Ux4 ( /*OUT*/V128* res, +void VEX_REGPARM(3) + h_generic_calc_Min32Ux4 ( /*OUT*/V128* res, V128* argL, V128* argR ) { res->w32[0] = min32U(argL->w32[0], argR->w32[0]); @@ -163,7 +179,8 @@ void h_generic_calc_Min32Ux4 ( /*OUT*/V128* res, res->w32[3] = min32U(argL->w32[3], argR->w32[3]); } -void h_generic_calc_Max16Ux8 ( /*OUT*/V128* res, +void VEX_REGPARM(3) + h_generic_calc_Max16Ux8 ( /*OUT*/V128* res, V128* argL, V128* argR ) { res->w16[0] = max16U(argL->w16[0], argR->w16[0]); @@ -176,7 +193,8 @@ void h_generic_calc_Max16Ux8 ( /*OUT*/V128* res, res->w16[7] = max16U(argL->w16[7], argR->w16[7]); } -void h_generic_calc_Min16Ux8 ( /*OUT*/V128* res, +void VEX_REGPARM(3) + h_generic_calc_Min16Ux8 ( /*OUT*/V128* res, V128* argL, V128* argR ) { res->w16[0] = min16U(argL->w16[0], argR->w16[0]); @@ -189,7 +207,8 @@ void h_generic_calc_Min16Ux8 ( /*OUT*/V128* res, res->w16[7] = min16U(argL->w16[7], argR->w16[7]); } -void h_generic_calc_Max8Sx16 ( /*OUT*/V128* res, +void VEX_REGPARM(3) + h_generic_calc_Max8Sx16 ( /*OUT*/V128* res, V128* argL, V128* argR ) { res->w8[ 0] = max8S(argL->w8[ 0], argR->w8[ 0]); @@ -210,7 +229,8 @@ void h_generic_calc_Max8Sx16 ( /*OUT*/V128* res, res->w8[15] = max8S(argL->w8[15], argR->w8[15]); } -void h_generic_calc_Min8Sx16 ( /*OUT*/V128* res, +void VEX_REGPARM(3) + h_generic_calc_Min8Sx16 ( /*OUT*/V128* res, V128* argL, V128* argR ) { res->w8[ 0] = min8S(argL->w8[ 0], argR->w8[ 0]); @@ -231,14 +251,16 @@ void h_generic_calc_Min8Sx16 ( /*OUT*/V128* res, res->w8[15] = min8S(argL->w8[15], argR->w8[15]); } -void h_generic_calc_CmpEQ64x2 ( /*OUT*/V128* res, +void VEX_REGPARM(3) + h_generic_calc_CmpEQ64x2 ( /*OUT*/V128* res, V128* argL, V128* argR ) { res->w64[0] = cmpEQ64(argL->w64[0], argR->w64[0]); res->w64[1] = cmpEQ64(argL->w64[1], argR->w64[1]); } -void h_generic_calc_CmpGT64Sx2 ( /*OUT*/V128* res, +void VEX_REGPARM(3) + h_generic_calc_CmpGT64Sx2 ( /*OUT*/V128* res, V128* argL, V128* argR ) { res->w64[0] = cmpGT64S(argL->w64[0], argR->w64[0]); @@ -252,7 +274,8 @@ void h_generic_calc_CmpGT64Sx2 ( /*OUT*/V128* res, semantics of these primops (Sar64x2, etc) it is an error if in fact we are ever given an out-of-range shift amount. */ -void h_generic_calc_SarN64x2 ( /*OUT*/V128* res, +void /*not-regparm*/ + h_generic_calc_SarN64x2 ( /*OUT*/V128* res, V128* argL, UInt nn) { /* vassert(nn < 64); */ @@ -261,7 +284,8 @@ void h_generic_calc_SarN64x2 ( /*OUT*/V128* res, res->w64[1] = sar64(argL->w64[1], nn); } -void h_generic_calc_SarN8x16 ( /*OUT*/V128* res, +void /*not-regparm*/ + h_generic_calc_SarN8x16 ( /*OUT*/V128* res, V128* argL, UInt nn) { /* vassert(nn < 8); */ @@ -284,7 +308,8 @@ void h_generic_calc_SarN8x16 ( /*OUT*/V128* res, res->w8[15] = sar8(argL->w8[15], nn); } -void h_generic_calc_QNarrowBin32Sto16Ux8 ( /*OUT*/V128* res, +void VEX_REGPARM(3) + h_generic_calc_QNarrowBin32Sto16Ux8 ( /*OUT*/V128* res, V128* argL, V128* argR ) { res->w16[0] = qnarrow32Sto16U(argR->w32[0]); @@ -297,6 +322,42 @@ void h_generic_calc_QNarrowBin32Sto16Ux8 ( /*OUT*/V128* res, res->w16[7] = qnarrow32Sto16U(argL->w32[3]); } +void VEX_REGPARM(3) + h_generic_calc_NarrowBin16to8x16 ( /*OUT*/V128* res, + V128* argL, V128* argR ) +{ + res->w8[ 0] = narrow16to8(argR->w16[0]); + res->w8[ 1] = narrow16to8(argR->w16[1]); + res->w8[ 2] = narrow16to8(argR->w16[2]); + res->w8[ 3] = narrow16to8(argR->w16[3]); + res->w8[ 4] = narrow16to8(argR->w16[4]); + res->w8[ 5] = narrow16to8(argR->w16[5]); + res->w8[ 6] = narrow16to8(argR->w16[6]); + res->w8[ 7] = narrow16to8(argR->w16[7]); + res->w8[ 8] = narrow16to8(argL->w16[0]); + res->w8[ 9] = narrow16to8(argL->w16[1]); + res->w8[10] = narrow16to8(argL->w16[2]); + res->w8[11] = narrow16to8(argL->w16[3]); + res->w8[12] = narrow16to8(argL->w16[4]); + res->w8[13] = narrow16to8(argL->w16[5]); + res->w8[14] = narrow16to8(argL->w16[6]); + res->w8[15] = narrow16to8(argL->w16[7]); +} + +void VEX_REGPARM(3) + h_generic_calc_NarrowBin32to16x8 ( /*OUT*/V128* res, + V128* argL, V128* argR ) +{ + res->w16[0] = narrow32to16(argR->w32[0]); + res->w16[1] = narrow32to16(argR->w32[1]); + res->w16[2] = narrow32to16(argR->w32[2]); + res->w16[3] = narrow32to16(argR->w32[3]); + res->w16[4] = narrow32to16(argL->w32[0]); + res->w16[5] = narrow32to16(argL->w32[1]); + res->w16[6] = narrow32to16(argL->w32[2]); + res->w16[7] = narrow32to16(argL->w32[3]); +} + /*---------------------------------------------------------------*/ /*--- end host_generic_simd128.c ---*/ diff --git a/VEX/priv/host_generic_simd128.h b/VEX/priv/host_generic_simd128.h index 797617823c..6e37a870b3 100644 --- a/VEX/priv/host_generic_simd128.h +++ b/VEX/priv/host_generic_simd128.h @@ -45,26 +45,43 @@ #include "libvex_basictypes.h" -/* DO NOT MAKE THESE INTO REGPARM FNS! THIS WILL BREAK CALLING - SEQUENCES GENERATED BY host-x86/isel.c. */ - -extern void h_generic_calc_Mul32x4 ( /*OUT*/V128*, V128*, V128* ); -extern void h_generic_calc_Max32Sx4 ( /*OUT*/V128*, V128*, V128* ); -extern void h_generic_calc_Min32Sx4 ( /*OUT*/V128*, V128*, V128* ); -extern void h_generic_calc_Max32Ux4 ( /*OUT*/V128*, V128*, V128* ); -extern void h_generic_calc_Min32Ux4 ( /*OUT*/V128*, V128*, V128* ); -extern void h_generic_calc_Max16Ux8 ( /*OUT*/V128*, V128*, V128* ); -extern void h_generic_calc_Min16Ux8 ( /*OUT*/V128*, V128*, V128* ); -extern void h_generic_calc_Max8Sx16 ( /*OUT*/V128*, V128*, V128* ); -extern void h_generic_calc_Min8Sx16 ( /*OUT*/V128*, V128*, V128* ); -extern void h_generic_calc_CmpEQ64x2 ( /*OUT*/V128*, V128*, V128* ); -extern void h_generic_calc_CmpGT64Sx2 ( /*OUT*/V128*, V128*, V128* ); -extern void h_generic_calc_SarN64x2 ( /*OUT*/V128*, V128*, UInt ); -extern void h_generic_calc_SarN8x16 ( /*OUT*/V128*, V128*, UInt ); - -extern void h_generic_calc_QNarrowBin32Sto16Ux8 +extern VEX_REGPARM(3) + void h_generic_calc_Mul32x4 ( /*OUT*/V128*, V128*, V128* ); +extern VEX_REGPARM(3) + void h_generic_calc_Max32Sx4 ( /*OUT*/V128*, V128*, V128* ); +extern VEX_REGPARM(3) + void h_generic_calc_Min32Sx4 ( /*OUT*/V128*, V128*, V128* ); +extern VEX_REGPARM(3) + void h_generic_calc_Max32Ux4 ( /*OUT*/V128*, V128*, V128* ); +extern VEX_REGPARM(3) + void h_generic_calc_Min32Ux4 ( /*OUT*/V128*, V128*, V128* ); +extern VEX_REGPARM(3) + void h_generic_calc_Max16Ux8 ( /*OUT*/V128*, V128*, V128* ); +extern VEX_REGPARM(3) + void h_generic_calc_Min16Ux8 ( /*OUT*/V128*, V128*, V128* ); +extern VEX_REGPARM(3) + void h_generic_calc_Max8Sx16 ( /*OUT*/V128*, V128*, V128* ); +extern VEX_REGPARM(3) + void h_generic_calc_Min8Sx16 ( /*OUT*/V128*, V128*, V128* ); +extern VEX_REGPARM(3) + void h_generic_calc_CmpEQ64x2 ( /*OUT*/V128*, V128*, V128* ); +extern VEX_REGPARM(3) + void h_generic_calc_CmpGT64Sx2 ( /*OUT*/V128*, V128*, V128* ); + +extern /*not-regparm*/ + void h_generic_calc_SarN64x2 ( /*OUT*/V128*, V128*, UInt ); +extern /*not-regparm*/ + void h_generic_calc_SarN8x16 ( /*OUT*/V128*, V128*, UInt ); + +extern VEX_REGPARM(3) + void h_generic_calc_QNarrowBin32Sto16Ux8 + ( /*OUT*/V128*, V128*, V128* ); +extern VEX_REGPARM(3) + void h_generic_calc_NarrowBin16to8x16 + ( /*OUT*/V128*, V128*, V128* ); +extern VEX_REGPARM(3) + void h_generic_calc_NarrowBin32to16x8 ( /*OUT*/V128*, V128*, V128* ); - #endif /* ndef __VEX_HOST_GENERIC_SIMD128_H */ diff --git a/VEX/priv/host_generic_simd64.c b/VEX/priv/host_generic_simd64.c index 61bdbd3e68..693d796994 100644 --- a/VEX/priv/host_generic_simd64.c +++ b/VEX/priv/host_generic_simd64.c @@ -296,6 +296,16 @@ static inline UChar qnarrow16Sto8U ( UShort xx0 ) return (UChar)xx; } +static inline UShort narrow32to16 ( UInt xx ) +{ + return (UShort)xx; +} + +static inline UChar narrow16to8 ( UShort xx ) +{ + return (UChar)xx; +} + /* shifts: we don't care about out-of-range ones, since that is dealt with at a higher level. */ @@ -817,6 +827,44 @@ ULong h_generic_calc_QNarrowBin16Sto8Ux8 ( ULong aa, ULong bb ) ); } +/* ------------ Truncating narrowing ------------ */ + +ULong h_generic_calc_NarrowBin32to16x4 ( ULong aa, ULong bb ) +{ + UInt d = sel32x2_1(aa); + UInt c = sel32x2_0(aa); + UInt b = sel32x2_1(bb); + UInt a = sel32x2_0(bb); + return mk16x4( + narrow32to16(d), + narrow32to16(c), + narrow32to16(b), + narrow32to16(a) + ); +} + +ULong h_generic_calc_NarrowBin16to8x8 ( ULong aa, ULong bb ) +{ + UShort h = sel16x4_3(aa); + UShort g = sel16x4_2(aa); + UShort f = sel16x4_1(aa); + UShort e = sel16x4_0(aa); + UShort d = sel16x4_3(bb); + UShort c = sel16x4_2(bb); + UShort b = sel16x4_1(bb); + UShort a = sel16x4_0(bb); + return mk8x8( + narrow16to8(h), + narrow16to8(g), + narrow16to8(f), + narrow16to8(e), + narrow16to8(d), + narrow16to8(c), + narrow16to8(b), + narrow16to8(a) + ); +} + /* ------------ Interleaving ------------ */ ULong h_generic_calc_InterleaveHI8x8 ( ULong aa, ULong bb ) diff --git a/VEX/priv/host_generic_simd64.h b/VEX/priv/host_generic_simd64.h index 1807ed7f68..6275480c72 100644 --- a/VEX/priv/host_generic_simd64.h +++ b/VEX/priv/host_generic_simd64.h @@ -90,6 +90,8 @@ extern ULong h_generic_calc_CmpNEZ8x8 ( ULong ); extern ULong h_generic_calc_QNarrowBin32Sto16Sx4 ( ULong, ULong ); extern ULong h_generic_calc_QNarrowBin16Sto8Sx8 ( ULong, ULong ); extern ULong h_generic_calc_QNarrowBin16Sto8Ux8 ( ULong, ULong ); +extern ULong h_generic_calc_NarrowBin32to16x4 ( ULong, ULong ); +extern ULong h_generic_calc_NarrowBin16to8x8 ( ULong, ULong ); extern ULong h_generic_calc_InterleaveHI8x8 ( ULong, ULong ); extern ULong h_generic_calc_InterleaveLO8x8 ( ULong, ULong ); diff --git a/VEX/priv/host_x86_isel.c b/VEX/priv/host_x86_isel.c index 7ec6305705..3b0d995495 100644 --- a/VEX/priv/host_x86_isel.c +++ b/VEX/priv/host_x86_isel.c @@ -42,6 +42,7 @@ #include "main_globals.h" #include "host_generic_regs.h" #include "host_generic_simd64.h" +#include "host_generic_simd128.h" #include "host_x86_defs.h" /* TODO 21 Apr 2005: @@ -2392,6 +2393,10 @@ static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo, ISelEnv* env, IRExpr* e ) fn = (HWord)h_generic_calc_QNarrowBin16Sto8Sx8; goto binnish; case Iop_QNarrowBin16Sto8Ux8: fn = (HWord)h_generic_calc_QNarrowBin16Sto8Ux8; goto binnish; + case Iop_NarrowBin16to8x8: + fn = (HWord)h_generic_calc_NarrowBin16to8x8; goto binnish; + case Iop_NarrowBin32to16x4: + fn = (HWord)h_generic_calc_NarrowBin32to16x4; goto binnish; case Iop_QSub8Sx8: fn = (HWord)h_generic_calc_QSub8Sx8; goto binnish; @@ -3135,6 +3140,7 @@ static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e ) # define SSE2_OR_ABOVE \ (env->hwcaps & VEX_HWCAPS_X86_SSE2) + HWord fn = 0; /* address of helper fn, if required */ MatchInfo mi; Bool arg1isEReg = False; X86SseOp op = Xsse_INVALID; @@ -3601,6 +3607,59 @@ static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e ) return dst; } + case Iop_NarrowBin32to16x8: + fn = (HWord)h_generic_calc_NarrowBin32to16x8; + goto do_SseAssistedBinary; + case Iop_NarrowBin16to8x16: + fn = (HWord)h_generic_calc_NarrowBin16to8x16; + goto do_SseAssistedBinary; + do_SseAssistedBinary: { + /* As with the amd64 case (where this is copied from) we + generate pretty bad code. */ + vassert(fn != 0); + HReg dst = newVRegV(env); + HReg argL = iselVecExpr(env, e->Iex.Binop.arg1); + HReg argR = iselVecExpr(env, e->Iex.Binop.arg2); + HReg argp = newVRegI(env); + /* subl $112, %esp -- make a space */ + sub_from_esp(env, 112); + /* leal 48(%esp), %r_argp -- point into it */ + addInstr(env, X86Instr_Lea32(X86AMode_IR(48, hregX86_ESP()), + argp)); + /* andl $-16, %r_argp -- 16-align the pointer */ + addInstr(env, X86Instr_Alu32R(Xalu_AND, + X86RMI_Imm( ~(UInt)15 ), + argp)); + /* Prepare 3 arg regs: + leal 0(%r_argp), %eax + leal 16(%r_argp), %edx + leal 32(%r_argp), %ecx + */ + addInstr(env, X86Instr_Lea32(X86AMode_IR(0, argp), + hregX86_EAX())); + addInstr(env, X86Instr_Lea32(X86AMode_IR(16, argp), + hregX86_EDX())); + addInstr(env, X86Instr_Lea32(X86AMode_IR(32, argp), + hregX86_ECX())); + /* Store the two args, at (%edx) and (%ecx): + movupd %argL, 0(%edx) + movupd %argR, 0(%ecx) + */ + addInstr(env, X86Instr_SseLdSt(False/*!isLoad*/, argL, + X86AMode_IR(0, hregX86_EDX()))); + addInstr(env, X86Instr_SseLdSt(False/*!isLoad*/, argR, + X86AMode_IR(0, hregX86_ECX()))); + /* call the helper */ + addInstr(env, X86Instr_Call( Xcc_ALWAYS, (Addr32)fn, 3 )); + /* fetch the result from memory, using %r_argp, which the + register allocator will keep alive across the call. */ + addInstr(env, X86Instr_SseLdSt(True/*isLoad*/, dst, + X86AMode_IR(0, argp))); + /* and finally, clear the space */ + add_to_esp(env, 112); + return dst; + } + default: break; } /* switch (e->Iex.Binop.op) */ diff --git a/VEX/priv/ir_defs.c b/VEX/priv/ir_defs.c index 13667d7c79..21506497ff 100644 --- a/VEX/priv/ir_defs.c +++ b/VEX/priv/ir_defs.c @@ -514,6 +514,8 @@ void ppIROp ( IROp op ) case Iop_QNarrowBin16Sto8Ux8: vex_printf("QNarrowBin16Sto8Ux8"); return; case Iop_QNarrowBin16Sto8Sx8: vex_printf("QNarrowBin16Sto8Sx8"); return; case Iop_QNarrowBin32Sto16Sx4: vex_printf("QNarrowBin32Sto16Sx4"); return; + case Iop_NarrowBin16to8x8: vex_printf("NarrowBin16to8x8"); return; + case Iop_NarrowBin32to16x4: vex_printf("NarrowBin32to16x4"); return; case Iop_InterleaveHI8x8: vex_printf("InterleaveHI8x8"); return; case Iop_InterleaveHI16x4: vex_printf("InterleaveHI16x4"); return; case Iop_InterleaveHI32x2: vex_printf("InterleaveHI32x2"); return; @@ -2066,6 +2068,7 @@ void typeOfPrimop ( IROp op, case Iop_PwAdd32Fx2: case Iop_QNarrowBin32Sto16Sx4: case Iop_QNarrowBin16Sto8Sx8: case Iop_QNarrowBin16Sto8Ux8: + case Iop_NarrowBin16to8x8: case Iop_NarrowBin32to16x4: case Iop_Sub8x8: case Iop_Sub16x4: case Iop_Sub32x2: case Iop_QSub8Sx8: case Iop_QSub16Sx4: case Iop_QSub32Sx2: case Iop_QSub64Sx1: diff --git a/VEX/pub/libvex_ir.h b/VEX/pub/libvex_ir.h index f8324697f0..0432af3953 100644 --- a/VEX/pub/libvex_ir.h +++ b/VEX/pub/libvex_ir.h @@ -923,6 +923,7 @@ typedef */ Iop_QNarrowBin16Sto8Ux8, Iop_QNarrowBin16Sto8Sx8, Iop_QNarrowBin32Sto16Sx4, + Iop_NarrowBin16to8x8, Iop_NarrowBin32to16x4, /* INTERLEAVING */ /* Interleave lanes from low or high halves of