From: Jan Hubicka Date: Tue, 18 Jan 2000 15:25:05 +0000 (+0100) Subject: i386.md (memstr): Do not use rep stosb for counts divisible by 4 when optimize_size. X-Git-Tag: prereleases/libstdc++-2.92~8655 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=e2e52e1be7f47a95853b79d36c2e7c1b636e52f6;p=thirdparty%2Fgcc.git i386.md (memstr): Do not use rep stosb for counts divisible by 4 when optimize_size. * i386.md (memstr): Do not use rep stosb for counts divisible by 4 when optimize_size. (clrstrsi): Rewrite. (strsethi, strsetqi): New expanders. (strsethi_1, strsetqi_1, rep_stossi, rep_stosqi): New insn patterns. (cmpstrsi): Emit compare insn before cmpstrsi_1 (cmpstrsi_nz): use flags, set type to str, prefix_length to 1. (strlensi_1): Likewise. (cmpstrsi_1): Likewise; do not output compare. (strlen expander): Do not unroll when optimizing for size. (*subsi3_carry): Rename to subsi3_carry (addqi3_cc): New pattern. * i386.h (processor_costs): Add move_ratio field. (MOVE_RATIO): Use move_ratio field, set to 3 for OPTIMIZE_SIZE * i386.c (*_cost): Set move_ratio. (x86_unroll_strlen): Enable for Athlon, PPro and K6 too. (x86_expand_strlensi_1): Rewrite the main loop. From-SVN: r31488 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index e1f1277bf96d..00ce0986c42c 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,23 @@ +Tue Jan 18 16:19:55 MET 2000 Jan Hubicka + + * i386.md (memstr): Do not use rep stosb for counts divisible by 4 + when optimize_size. + (clrstrsi): Rewrite. + (strsethi, strsetqi): New expanders. + (strsethi_1, strsetqi_1, rep_stossi, rep_stosqi): New insn patterns. + (cmpstrsi): Emit compare insn before cmpstrsi_1 + (cmpstrsi_nz): use flags, set type to str, prefix_length to 1. + (strlensi_1): Likewise. + (cmpstrsi_1): Likewise; do not output compare. + (strlen expander): Do not unroll when optimizing for size. + (*subsi3_carry): Rename to subsi3_carry + (addqi3_cc): New pattern. + * i386.h (processor_costs): Add move_ratio field. + (MOVE_RATIO): Use move_ratio field, set to 3 for OPTIMIZE_SIZE + * i386.c (*_cost): Set move_ratio. + (x86_unroll_strlen): Enable for Athlon, PPro and K6 too. + (x86_expand_strlensi_1): Rewrite the main loop. + 2000-01-17 Richard Henderson * combine.c (combine_simplify_rtx): Give FLOAT_STORE_FLAG_VALUE a mode. diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index c788229ae771..ded731783b59 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -64,6 +64,7 @@ struct processor_costs i386_cost = { /* 386 specific costs */ 1, /* cost of multiply per each bit set */ 23, /* cost of a divide/mod */ 15, /* "large" insn */ + 3, /* MOVE_RATIO */ 4, /* cost for loading QImode using movzbl */ {2, 4, 2}, /* cost of loading integer registers in QImode, HImode and SImode. @@ -84,6 +85,7 @@ struct processor_costs i486_cost = { /* 486 specific costs */ 1, /* cost of multiply per each bit set */ 40, /* cost of a divide/mod */ 15, /* "large" insn */ + 3, /* MOVE_RATIO */ 4, /* cost for loading QImode using movzbl */ {2, 4, 2}, /* cost of loading integer registers in QImode, HImode and SImode. @@ -104,6 +106,7 @@ struct processor_costs pentium_cost = { 0, /* cost of multiply per each bit set */ 25, /* cost of a divide/mod */ 8, /* "large" insn */ + 6, /* MOVE_RATIO */ 6, /* cost for loading QImode using movzbl */ {2, 4, 2}, /* cost of loading integer registers in QImode, HImode and SImode. @@ -124,6 +127,7 @@ struct processor_costs pentiumpro_cost = { 0, /* cost of multiply per each bit set */ 17, /* cost of a divide/mod */ 8, /* "large" insn */ + 6, /* MOVE_RATIO */ 2, /* cost for loading QImode using movzbl */ {4, 4, 4}, /* cost of loading integer registers in QImode, HImode and SImode. @@ -144,6 +148,7 @@ struct processor_costs k6_cost = { 0, /* cost of multiply per each bit set */ 18, /* cost of a divide/mod */ 8, /* "large" insn */ + 4, /* MOVE_RATIO */ 3, /* cost for loading QImode using movzbl */ {4, 5, 4}, /* cost of loading integer registers in QImode, HImode and SImode. @@ -164,6 +169,7 @@ struct processor_costs athlon_cost = { 0, /* cost of multiply per each bit set */ 19, /* cost of a divide/mod */ 8, /* "large" insn */ + 9, /* MOVE_RATIO */ 4, /* cost for loading QImode using movzbl */ {4, 5, 4}, /* cost of loading integer registers in QImode, HImode and SImode. @@ -191,7 +197,7 @@ const int x86_zero_extend_with_and = m_486 | m_PENT; const int x86_movx = m_ATHLON /* m_386 | m_PPRO | m_K6 */; const int x86_double_with_add = ~m_386; const int x86_use_bit_test = m_386; -const int x86_unroll_strlen = m_486 | m_PENT; +const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON | m_K6; const int x86_use_q_reg = m_PENT | m_PPRO | m_K6; const int x86_use_any_reg = m_486; const int x86_cmove = m_PPRO | m_ATHLON; @@ -5149,10 +5155,9 @@ ix86_expand_strlensi_unroll_1 (out, align_rtx, scratch) rtx align_3_label = NULL_RTX; rtx align_4_label = gen_label_rtx (); rtx end_0_label = gen_label_rtx (); - rtx end_2_label = gen_label_rtx (); - rtx end_3_label = gen_label_rtx (); rtx mem; rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG); + rtx tmpreg = gen_reg_rtx (SImode); align = 0; if (GET_CODE (align_rtx) == CONST_INT) @@ -5269,48 +5274,69 @@ ix86_expand_strlensi_unroll_1 (out, align_rtx, scratch) mem = gen_rtx_MEM (SImode, out); emit_move_insn (scratch, mem); - - /* Check first byte. */ - emit_insn (gen_cmpqi_0 (gen_lowpart (QImode, scratch), const0_rtx)); - tmp = gen_rtx_EQ (VOIDmode, flags, const0_rtx); - tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp, - gen_rtx_LABEL_REF (VOIDmode, end_0_label), - pc_rtx); - emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp)); - - /* Check second byte. */ - emit_insn (gen_cmpqi_ext_3 (scratch, const0_rtx)); - tmp = gen_rtx_EQ (VOIDmode, flags, const0_rtx); - tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp, - gen_rtx_LABEL_REF (VOIDmode, end_3_label), - pc_rtx); - emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp)); - - /* Check third byte. */ - emit_insn (gen_testsi_1 (scratch, GEN_INT (0x00ff0000))); - tmp = gen_rtx_EQ (VOIDmode, flags, const0_rtx); - tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp, - gen_rtx_LABEL_REF (VOIDmode, end_2_label), - pc_rtx); - emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp)); - - /* Check fourth byte and increment address. */ emit_insn (gen_addsi3 (out, out, GEN_INT (4))); - emit_insn (gen_testsi_1 (scratch, GEN_INT (0xff000000))); - tmp = gen_rtx_NE (VOIDmode, flags, const0_rtx); - tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp, - gen_rtx_LABEL_REF (VOIDmode, align_4_label), - pc_rtx); - emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp)); - - /* Now generate fixups when the compare stops within a 4-byte word. */ - emit_insn (gen_subsi3 (out, out, GEN_INT (3))); - - emit_label (end_2_label); - emit_insn (gen_addsi3 (out, out, const1_rtx)); - emit_label (end_3_label); - emit_insn (gen_addsi3 (out, out, const1_rtx)); + /* This formula yields a nonzero result iff one of the bytes is zero. + This saves three branches inside loop and many cycles. */ + + emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101))); + emit_insn (gen_one_cmplsi2 (scratch, scratch)); + emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch)); + emit_insn (gen_andsi3 (tmpreg, tmpreg, GEN_INT (0x80808080))); + emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1, 0, align_4_label); + + if (TARGET_CMOVE) + { + rtx reg = gen_reg_rtx (SImode); + emit_move_insn (reg, tmpreg); + emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16))); + + /* If zero is not in the first two bytes, move two bytes forward. */ + emit_insn (gen_testsi_1 (tmpreg, GEN_INT (0x8080))); + tmp = gen_rtx_REG (CCNOmode, FLAGS_REG); + tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx); + emit_insn (gen_rtx_SET (VOIDmode, tmpreg, + gen_rtx_IF_THEN_ELSE (SImode, tmp, + reg, + tmpreg))); + /* Emit lea manually to avoid clobbering of flags. */ + emit_insn (gen_rtx_SET (SImode, reg, + gen_rtx_PLUS (SImode, out, GEN_INT (2)))); + + tmp = gen_rtx_REG (CCNOmode, FLAGS_REG); + tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx); + emit_insn (gen_rtx_SET (VOIDmode, out, + gen_rtx_IF_THEN_ELSE (SImode, tmp, + reg, + out))); + + } + else + { + rtx end_2_label = gen_label_rtx (); + /* Is zero in the first two bytes? */ + + emit_insn (gen_testsi_1 (tmpreg, GEN_INT (0x8080))); + tmp = gen_rtx_REG (CCNOmode, FLAGS_REG); + tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx); + tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp, + gen_rtx_LABEL_REF (VOIDmode, end_2_label), + pc_rtx); + tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp)); + JUMP_LABEL (tmp) = end_2_label; + + /* Not in the first two. Move two bytes forward. */ + emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16))); + emit_insn (gen_addsi3 (out, out, GEN_INT (2))); + + emit_label (end_2_label); + + } + + /* Avoid branch in fixing the byte. */ + tmpreg = gen_lowpart (QImode, tmpreg); + emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg)); + emit_insn (gen_subsi3_carry (out, out, GEN_INT (3))); emit_label (end_0_label); } diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 62883d73c9e6..e6111280a7c2 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -62,6 +62,8 @@ struct processor_costs { int mult_bit; /* cost of multiply per each bit set */ int divide; /* cost of a divide/mod */ int large_insn; /* insns larger than this cost more */ + int move_ratio; /* The threshold of number of scalar memory-to-memory + move insns. */ int movzbl_load; /* cost of loading using movzbl */ int int_load[3]; /* cost of loading integer registers in QImode, HImode and SImode relative @@ -1709,13 +1711,9 @@ while (0) Increasing the value will always make code faster, but eventually incurs high cost in increased code size. - If you don't define this, a reasonable default is used. + If you don't define this, a reasonable default is used. */ - Make this large on i386, since the block move is very inefficient with small - blocks, and the hard register needs of the block move require much reload - work. */ - -#define MOVE_RATIO 5 +#define MOVE_RATIO (optimize_size ? 3 : ix86_cost->move_ratio) /* Define if shifts truncate the shift count which implies one can omit a sign-extension or zero-extension diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index c7b01819a434..0bd532416904 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -3235,6 +3235,15 @@ "add{l}\\t{%2, %0|%0, %2}" [(set_attr "type" "alu")]) +(define_insn "addqi3_cc" + [(set (reg:CC 17) (plus:CC (match_operand:QI 1 "nonimmediate_operand" "%0,0") + (match_operand:QI 2 "general_operand" "ri,rm"))) + (set (match_operand:QI 0 "nonimmediate_operand" "=rm,r") + (plus:QI (match_dup 1) (match_dup 2)))] + "ix86_binary_operator_ok (PLUS, QImode, operands)" + "add{b}\\t{%2, %0|%0, %2}" + [(set_attr "type" "alu")]) + (define_insn "*addsi3_carry" [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,r") (plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0") @@ -3736,7 +3745,7 @@ "sub{l}\\t{%2, %0|%0, %2}" [(set_attr "type" "alu")]) -(define_insn "*subsi3_carry" +(define_insn "subsi3_carry" [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,r") (minus:SI (match_operand:SI 1 "nonimmediate_operand" "0,0") (plus:SI (match_operand:SI 2 "general_operand" "ri,rm") @@ -7841,8 +7850,9 @@ srcreg = copy_to_mode_reg (Pmode, XEXP (operands[1], 0)); emit_insn (gen_cld()); - /* When optimizing for size emit simple rep ; movsb instruction. */ - if (!optimize || optimize_size) + /* When optimizing for size emit simple rep ; movsb instruction for + counts not divisible by 4. */ + if ((!optimize || optimize_size) && (INTVAL (operands[2]) & 0x03)) { countreg = copy_to_mode_reg (SImode, operands[2]); emit_insn (gen_rep_movqi (destreg, srcreg, countreg, @@ -7983,84 +7993,143 @@ (set_attr "memory" "both")]) (define_expand "clrstrsi" - [(set (reg:SI 19) (const_int 0)) - (set (match_dup 3) (const_int 0)) - (parallel [(set (match_operand:BLK 0 "memory_operand" "") - (const_int 0)) - (use (match_operand:SI 1 "const_int_operand" "")) - (use (match_operand:SI 2 "const_int_operand" "")) - (use (match_dup 3)) - (use (reg:SI 19)) - (clobber (match_scratch:SI 4 "")) - (clobber (match_dup 5))])] + [(use (match_operand:BLK 0 "memory_operand" "")) + (use (match_operand:SI 1 "const_int_operand" "")) + (use (match_operand:SI 2 "const_int_operand" ""))] "" " { - rtx addr0; + rtx destreg, zeroreg, countreg; if (GET_CODE (operands[1]) != CONST_INT) FAIL; - addr0 = copy_to_mode_reg (Pmode, XEXP (operands[0], 0)); + destreg = copy_to_mode_reg (Pmode, XEXP (operands[0], 0)); + + emit_insn (gen_cld()); + + /* When optimizing for size emit simple rep ; movsb instruction for + counts not divisible by 4. */ + if ((!optimize || optimize_size) && (INTVAL (operands[1]) & 0x03)) + { + countreg = copy_to_mode_reg (SImode, operands[1]); + zeroreg = copy_to_mode_reg (QImode, const0_rtx); + emit_insn (gen_rep_stosqi (destreg, countreg, zeroreg, + destreg, countreg)); + } + else + { + zeroreg = copy_to_mode_reg (SImode, const0_rtx); + if (INTVAL (operands[1]) & ~0x03) + { + countreg = copy_to_mode_reg (SImode, + GEN_INT ((INTVAL (operands[1]) >> 2) + & 0x3fffffff)); + emit_insn (gen_rep_stossi (destreg, countreg, zeroreg, + destreg, countreg)); + } + if (INTVAL (operands[1]) & 0x02) + emit_insn (gen_strsethi (destreg, + gen_rtx_SUBREG (HImode, zeroreg, 0))); + if (INTVAL (operands[1]) & 0x01) + emit_insn (gen_strsetqi (destreg, + gen_rtx_SUBREG (QImode, zeroreg, 0))); + } + DONE; +}") + +;; Most CPUs don't like single string operations +;; Handle this case here to simplify previous expander. - operands[3] = gen_reg_rtx (SImode); - operands[5] = addr0; +(define_expand "strsethi" + [(set (mem:HI (match_operand:SI 0 "register_operand" "")) + (match_operand:HI 1 "register_operand" "")) + (parallel [(set (match_dup 0) (plus:SI (match_dup 0) (const_int 2))) + (clobber (reg:CC 17))])] + "" + " +{ + if (TARGET_SINGLE_STRINGOP || optimize_size) + { + emit_insn (gen_strsethi_1 (operands[0], operands[0], operands[1])); + DONE; + } +}") - operands[0] = gen_rtx_MEM (BLKmode, addr0); +(define_expand "strsetqi" + [(set (mem:QI (match_operand:SI 0 "register_operand" "")) + (match_operand:QI 1 "register_operand" "")) + (parallel [(set (match_dup 0) (plus:SI (match_dup 0) (const_int 1))) + (clobber (reg:CC 17))])] + "" + " +{ + if (TARGET_SINGLE_STRINGOP || optimize_size) + { + emit_insn (gen_strsetqi_1 (operands[0], operands[0], operands[1])); + DONE; + } }") +(define_insn "strsethi_1" + [(set (mem:HI (match_operand:SI 1 "register_operand" "0")) + (match_operand:HI 2 "register_operand" "a")) + (set (match_operand:SI 0 "register_operand" "=D") + (plus:SI (match_dup 0) + (const_int 2))) + (use (reg:SI 19))] + "TARGET_SINGLE_STRINGOP || optimize_size" + "stosw" + [(set_attr "type" "str") + (set_attr "memory" "store") + (set_attr "length_prefix" "1")]) + +(define_insn "strsetqi_1" + [(set (mem:QI (match_operand:SI 1 "register_operand" "0")) + (match_operand:QI 2 "register_operand" "a")) + (set (match_operand:SI 0 "register_operand" "=D") + (plus:SI (match_dup 0) + (const_int 1))) + (use (reg:SI 19))] + "TARGET_SINGLE_STRINGOP || optimize_size" + "stosb" + [(set_attr "type" "str") + (set_attr "memory" "store")]) + ;; It might seem that operand 0 could use predicate register_operand. ;; But strength reduction might offset the MEM expression. So we let ;; reload put the address into %edi. -(define_insn "*clrstrsi_1" - [(set (mem:BLK (match_operand:SI 0 "address_operand" "D")) +(define_insn "rep_stossi" + [(set (match_operand:SI 1 "register_operand" "=c") (const_int 0)) + (use (match_operand:SI 2 "register_operand" "a")) + (use (match_operand:SI 4 "register_operand" "1")) + (set (match_operand:SI 0 "register_operand" "=D") + (plus:SI (match_operand:SI 3 "address_operand" "0") + (ashift:SI (match_dup 3) (const_int 2)))) + (set (mem:BLK (match_dup 3)) (const_int 0)) - (use (match_operand:SI 1 "const_int_operand" "n")) - (use (match_operand:SI 2 "immediate_operand" "i")) - (use (match_operand:SI 3 "register_operand" "a")) - (use (reg:SI 19)) - (clobber (match_scratch:SI 4 "=&c")) - (clobber (match_dup 0))] + (use (reg:SI 19))] "" - "* -{ - rtx xops[2]; + "rep\;stosl|rep stosd" + [(set_attr "type" "str") + (set_attr "length_prefix" "1") + (set_attr "memory" "store")]) - if (GET_CODE (operands[1]) == CONST_INT) - { - unsigned int count = INTVAL (operands[1]) & 0xffffffff; - if (count & ~0x03) - { - xops[0] = GEN_INT (count / 4); - xops[1] = operands[4]; - - /* K6: stos takes 1 cycle, rep stos takes 8 + %ecx cycles. - 80386: 4/5+5n (+2 for set of ecx) - 80486: 5/7+5n (+1 for set of ecx) - */ - if (count / 4 < ((int) ix86_cpu < (int)PROCESSOR_PENTIUM ? 4 : 6)) - { - do - output_asm_insn (\"{stosl|stosd}\", xops); - while ((count -= 4) > 3); - } - else - { - output_asm_insn (\"mov{l}\\t{%0, %1|%1, %0}\", xops); - output_asm_insn (\"{rep\;stosl|rep stosd}\", xops); - } - } - if (INTVAL (operands[1]) & 0x02) - output_asm_insn (\"stosw\", operands); - if (INTVAL (operands[1]) & 0x01) - output_asm_insn (\"stosb\", operands); - } - else - abort (); - RET; -}" - [(set_attr "type" "multi")]) +(define_insn "rep_stosqi" + [(set (match_operand:SI 1 "register_operand" "=c") (const_int 0)) + (use (match_operand:QI 2 "register_operand" "a")) + (use (match_operand:SI 4 "register_operand" "1")) + (set (match_operand:SI 0 "register_operand" "=D") + (plus:SI (match_operand:SI 3 "address_operand" "0") (match_dup 3))) + (set (mem:BLK (match_dup 3)) + (const_int 0)) + (use (reg:SI 19))] + "" + "rep\;stosb|rep stosb" + [(set_attr "type" "str") + (set_attr "length_prefix" "1") + (set_attr "memory" "store")]) (define_expand "cmpstrsi" [(set (match_operand:SI 0 "register_operand" "") @@ -8099,7 +8168,10 @@ emit_insn (gen_cmpstrsi_nz_1 (addr1, addr2, countreg, align)); } else - emit_insn (gen_cmpstrsi_1 (addr1, addr2, countreg, align)); + { + emit_insn (gen_cmpsi_1 (countreg, countreg)); + emit_insn (gen_cmpstrsi_1 (addr1, addr2, countreg, align)); + } outlow = gen_lowpart (QImode, out); emit_insn (gen_cmpintqi (outlow)); @@ -8145,8 +8217,8 @@ (clobber (match_dup 2))] "" "repz{\;| }cmpsb" - [(set_attr "type" "multi") - (set_attr "length" "3")]) + [(set_attr "type" "str") + (set_attr "length_prefix" "1")]) ;; The same, but the count is not known to not be zero. @@ -8158,15 +8230,15 @@ (mem:BLK (match_operand:SI 1 "address_operand" "D"))) (const_int 0))) (use (match_operand:SI 3 "immediate_operand" "i")) + (use (reg:CC 17)) (use (reg:SI 19)) (clobber (match_dup 0)) (clobber (match_dup 1)) (clobber (match_dup 2))] "" - ;; The initial compare sets the zero flag. - "cmp{l}\\t%2, %2\;repz{\;| }cmpsb" - [(set_attr "type" "multi") - (set_attr "length" "5")]) + "repz{\;| }cmpsb" + [(set_attr "type" "str") + (set_attr "length_prefix" "1")]) (define_expand "strlensi" [(set (match_operand:SI 0 "register_operand" "") @@ -8184,7 +8256,8 @@ align = operands[3]; scratch1 = gen_reg_rtx (SImode); - if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1) + if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1 + && !optimize_size) { /* Well it seems that some optimizer does not combine a call like foo(strlen(bar), strlen(bar)); @@ -8236,8 +8309,8 @@ (clobber (reg:CC 17))] "" "repnz{\;| }scasb" - [(set_attr "type" "multi") - (set_attr "length" "3")]) + [(set_attr "type" "str") + (set_attr "length_prefix" "1")]) ;; Conditional move instructions.