Update copyright years.

[thirdparty/gcc.git] / gcc / optabs.c
diff --git a/gcc/optabs.c b/gcc/optabs.c

index cadf4676c986c8430baafab8ef5282e890d36308..5f759d5837e9a4abe23601d2f50637d0a02d4e2c 100644 (file)
--- a/gcc/optabs.c
+++ b/gcc/optabs.c
@@ -1,5 +1,5 @@
  /* Expand the basic unary and binary arithmetic operations, for GNU compiler.
-   Copyright (C) 1987-2018 Free Software Foundation, Inc.
+   Copyright (C) 1987-2022 Free Software Foundation, Inc.
  
  This file is part of GCC.
  
@@ -28,8 +28,8 @@ along with GCC; see the file COPYING3.  If not see
  #include "memmodel.h"
  #include "predict.h"
  #include "tm_p.h"
-#include "expmed.h"
  #include "optabs.h"
+#include "expmed.h"
  #include "emit-rtl.h"
  #include "recog.h"
  #include "diagnostic-core.h"
@@ -44,6 +44,8 @@ along with GCC; see the file COPYING3.  If not see
  #include "expr.h"
  #include "optabs-tree.h"
  #include "libfuncs.h"
+#include "internal-fn.h"
+#include "langhooks.h"
  
  static void prepare_float_lib_cmp (rtx, rtx, enum rtx_code, rtx *,
                                    machine_mode *);
@@ -55,7 +57,7 @@ void debug_optab_libfuncs (void);
  \f
  /* Add a REG_EQUAL note to the last insn in INSNS.  TARGET is being set to
     the result of operation CODE applied to OP0 (and OP1 if it is a binary
-   operation).
+   operation).  OP0_MODE is OP0's mode.
  
     If the last insn does not set TARGET, don't do anything, but return 1.
  
@@ -64,7 +66,8 @@ void debug_optab_libfuncs (void);
     try again, ensuring that TARGET is not one of the operands.  */
  
  static int
-add_equal_note (rtx_insn *insns, rtx target, enum rtx_code code, rtx op0, rtx op1)
+add_equal_note (rtx_insn *insns, rtx target, enum rtx_code code, rtx op0,
+               rtx op1, machine_mode op0_mode)
  {
    rtx_insn *last_insn;
    rtx set;
@@ -136,16 +139,16 @@ add_equal_note (rtx_insn *insns, rtx target, enum rtx_code code, rtx op0, rtx op
        case POPCOUNT:
        case PARITY:
        case BSWAP:
-       if (GET_MODE (op0) != VOIDmode && GET_MODE (target) != GET_MODE (op0))
+       if (op0_mode != VOIDmode && GET_MODE (target) != op0_mode)
           {
-           note = gen_rtx_fmt_e (code, GET_MODE (op0), copy_rtx (op0));
-           if (GET_MODE_UNIT_SIZE (GET_MODE (op0))
+           note = gen_rtx_fmt_e (code, op0_mode, copy_rtx (op0));
+           if (GET_MODE_UNIT_SIZE (op0_mode)
                 > GET_MODE_UNIT_SIZE (GET_MODE (target)))
               note = simplify_gen_unary (TRUNCATE, GET_MODE (target),
-                                        note, GET_MODE (op0));
+                                        note, op0_mode);
             else
               note = simplify_gen_unary (ZERO_EXTEND, GET_MODE (target),
-                                        note, GET_MODE (op0));
+                                        note, op0_mode);
             break;
           }
         /* FALLTHRU */
@@ -249,15 +252,21 @@ rtx
  expand_widen_pattern_expr (sepops ops, rtx op0, rtx op1, rtx wide_op,
                            rtx target, int unsignedp)
  {
-  struct expand_operand eops[4];
+  class expand_operand eops[4];
    tree oprnd0, oprnd1, oprnd2;
    machine_mode wmode = VOIDmode, tmode0, tmode1 = VOIDmode;
    optab widen_pattern_optab;
    enum insn_code icode;
    int nops = TREE_CODE_LENGTH (ops->code);
    int op;
+  bool sbool = false;
  
    oprnd0 = ops->op0;
+  if (nops >= 2)
+    oprnd1 = ops->op1;
+  if (nops >= 3)
+    oprnd2 = ops->op2;
+
    tmode0 = TYPE_MODE (TREE_TYPE (oprnd0));
    if (ops->code == VEC_UNPACK_FIX_TRUNC_HI_EXPR
        || ops->code == VEC_UNPACK_FIX_TRUNC_LO_EXPR)
@@ -265,6 +274,43 @@ expand_widen_pattern_expr (sepops ops, rtx op0, rtx op1, rtx wide_op,
         for these ops.  */
      widen_pattern_optab
        = optab_for_tree_code (ops->code, ops->type, optab_default);
+  else if ((ops->code == VEC_UNPACK_HI_EXPR
+           || ops->code == VEC_UNPACK_LO_EXPR)
+          && VECTOR_BOOLEAN_TYPE_P (ops->type)
+          && VECTOR_BOOLEAN_TYPE_P (TREE_TYPE (oprnd0))
+          && TYPE_MODE (ops->type) == TYPE_MODE (TREE_TYPE (oprnd0))
+          && SCALAR_INT_MODE_P (TYPE_MODE (ops->type)))
+    {
+      /* For VEC_UNPACK_{LO,HI}_EXPR if the mode of op0 and result is
+        the same scalar mode for VECTOR_BOOLEAN_TYPE_P vectors, use
+        vec_unpacks_sbool_{lo,hi}_optab, so that we can pass in
+        the pattern number of elements in the wider vector.  */
+      widen_pattern_optab
+       = (ops->code == VEC_UNPACK_HI_EXPR
+          ? vec_unpacks_sbool_hi_optab : vec_unpacks_sbool_lo_optab);
+      sbool = true;
+    }
+  else if (ops->code == DOT_PROD_EXPR)
+    {
+      enum optab_subtype subtype = optab_default;
+      signop sign1 = TYPE_SIGN (TREE_TYPE (oprnd0));
+      signop sign2 = TYPE_SIGN (TREE_TYPE (oprnd1));
+      if (sign1 == sign2)
+       ;
+      else if (sign1 == SIGNED && sign2 == UNSIGNED)
+       {
+         subtype = optab_vector_mixed_sign;
+         /* Same as optab_vector_mixed_sign but flip the operands.  */
+         std::swap (op0, op1);
+       }
+      else if (sign1 == UNSIGNED && sign2 == SIGNED)
+       subtype = optab_vector_mixed_sign;
+      else
+       gcc_unreachable ();
+
+      widen_pattern_optab
+       = optab_for_tree_code (ops->code, TREE_TYPE (oprnd0), subtype);
+    }
    else
      widen_pattern_optab
        = optab_for_tree_code (ops->code, TREE_TYPE (oprnd0), optab_default);
@@ -278,9 +324,12 @@ expand_widen_pattern_expr (sepops ops, rtx op0, rtx op1, rtx wide_op,
    gcc_assert (icode != CODE_FOR_nothing);
  
    if (nops >= 2)
+    tmode1 = TYPE_MODE (TREE_TYPE (oprnd1));
+  else if (sbool)
      {
-      oprnd1 = ops->op1;
-      tmode1 = TYPE_MODE (TREE_TYPE (oprnd1));
+      nops = 2;
+      op1 = GEN_INT (TYPE_VECTOR_SUBPARTS (TREE_TYPE (oprnd0)).to_constant ());
+      tmode1 = tmode0;
      }
  
    /* The last operand is of a wider mode than the rest of the operands.  */
@@ -290,7 +339,6 @@ expand_widen_pattern_expr (sepops ops, rtx op0, rtx op1, rtx wide_op,
      {
        gcc_assert (tmode1 == tmode0);
        gcc_assert (op1);
-      oprnd2 = ops->op2;
        wmode = TYPE_MODE (TREE_TYPE (oprnd2));
      }
  
@@ -320,7 +368,7 @@ rtx
  expand_ternary_op (machine_mode mode, optab ternary_optab, rtx op0,
                    rtx op1, rtx op2, rtx target, int unsignedp)
  {
-  struct expand_operand ops[4];
+  class expand_operand ops[4];
    enum insn_code icode = optab_handler (ternary_optab, mode);
  
    gcc_assert (optab_handler (ternary_optab, mode) != CODE_FOR_nothing);
@@ -389,7 +437,7 @@ expand_vector_broadcast (machine_mode vmode, rtx op)
    insn_code icode = optab_handler (vec_duplicate_optab, vmode);
    if (icode != CODE_FOR_nothing)
      {
-      struct expand_operand ops[2];
+      class expand_operand ops[2];
        create_output_operand (&ops[0], NULL_RTX, vmode);
        create_input_operand (&ops[1], op, GET_MODE (op));
        expand_insn (icode, 2, ops);
@@ -902,6 +950,299 @@ expand_doubleword_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
    emit_move_insn (product_high, adjust);
    return product;
  }
+
+/* Subroutine of expand_binop.  Optimize unsigned double-word OP0 % OP1 for
+   constant OP1.  If for some bit in [BITS_PER_WORD / 2, BITS_PER_WORD] range
+   (prefer higher bits) ((1w << bit) % OP1) == 1, then the modulo can be
+   computed in word-mode as ((OP0 & (bit - 1)) + ((OP0 >> bit) & (bit - 1))
+   + (OP0 >> (2 * bit))) % OP1.  Whether we need to sum 2, 3 or 4 values
+   depends on the bit value, if 2, then carry from the addition needs to be
+   added too, i.e. like:
+   sum += __builtin_add_overflow (low, high, &sum)
+
+   Optimize signed double-word OP0 % OP1 similarly, just apply some correction
+   factor to the sum before doing unsigned remainder, in the form of
+   sum += (((signed) OP0 >> (2 * BITS_PER_WORD - 1)) & const);
+   then perform unsigned
+   remainder = sum % OP1;
+   and finally
+   remainder += ((signed) OP0 >> (2 * BITS_PER_WORD - 1)) & (1 - OP1);  */
+
+static rtx
+expand_doubleword_mod (machine_mode mode, rtx op0, rtx op1, bool unsignedp)
+{
+  if (INTVAL (op1) <= 1 || (INTVAL (op1) & 1) == 0)
+    return NULL_RTX;
+
+  rtx_insn *last = get_last_insn ();
+  for (int bit = BITS_PER_WORD; bit >= BITS_PER_WORD / 2; bit--)
+    {
+      wide_int w = wi::shifted_mask (bit, 1, false, 2 * BITS_PER_WORD);
+      if (wi::ne_p (wi::umod_trunc (w, INTVAL (op1)), 1))
+       continue;
+      rtx sum = NULL_RTX, mask = NULL_RTX;
+      if (bit == BITS_PER_WORD)
+       {
+         /* For signed modulo we need to add correction to the sum
+            and that might again overflow.  */
+         if (!unsignedp)
+           continue;
+         if (optab_handler (uaddv4_optab, word_mode) == CODE_FOR_nothing)
+           continue;
+         tree wtype = lang_hooks.types.type_for_mode (word_mode, 1);
+         if (wtype == NULL_TREE)
+           continue;
+         tree ctype = build_complex_type (wtype);
+         if (TYPE_MODE (ctype) != GET_MODE_COMPLEX_MODE (word_mode))
+           continue;
+         machine_mode cmode = TYPE_MODE (ctype);
+         rtx op00 = operand_subword_force (op0, 0, mode);
+         rtx op01 = operand_subword_force (op0, 1, mode);
+         rtx cres = gen_rtx_CONCAT (cmode, gen_reg_rtx (word_mode),
+                                    gen_reg_rtx (word_mode));
+         tree lhs = make_tree (ctype, cres);
+         tree arg0 = make_tree (wtype, op00);
+         tree arg1 = make_tree (wtype, op01);
+         expand_addsub_overflow (UNKNOWN_LOCATION, PLUS_EXPR, lhs, arg0,
+                                 arg1, true, true, true, false, NULL);
+         sum = expand_simple_binop (word_mode, PLUS, XEXP (cres, 0),
+                                    XEXP (cres, 1), NULL_RTX, 1,
+                                    OPTAB_DIRECT);
+         if (sum == NULL_RTX)
+           return NULL_RTX;
+       }
+      else
+       {
+         /* Code below uses GEN_INT, so we need the masks to be representable
+            in HOST_WIDE_INTs.  */
+         if (bit >= HOST_BITS_PER_WIDE_INT)
+           continue;
+         /* If op0 is e.g. -1 or -2 unsigned, then the 2 additions might
+            overflow.  Consider 64-bit -1ULL for word size 32, if we add
+            0x7fffffffU + 0x7fffffffU + 3U, it wraps around to 1.  */
+         if (bit == BITS_PER_WORD - 1)
+           continue;
+
+         int count = (2 * BITS_PER_WORD + bit - 1) / bit;
+         rtx sum_corr = NULL_RTX;
+
+         if (!unsignedp)
+           {
+             /* For signed modulo, compute it as unsigned modulo of
+                sum with a correction added to it if OP0 is negative,
+                such that the result can be computed as unsigned
+                remainder + ((OP1 >> (2 * BITS_PER_WORD - 1)) & (1 - OP1).  */
+             w = wi::min_value (2 * BITS_PER_WORD, SIGNED);
+             wide_int wmod1 = wi::umod_trunc (w, INTVAL (op1));
+             wide_int wmod2 = wi::smod_trunc (w, INTVAL (op1));
+             /* wmod2 == -wmod1.  */
+             wmod2 = wmod2 + (INTVAL (op1) - 1);
+             if (wi::ne_p (wmod1, wmod2))
+               {
+                 wide_int wcorr = wmod2 - wmod1;
+                 if (wi::neg_p (w))
+                   wcorr = wcorr + INTVAL (op1);
+                 /* Now verify if the count sums can't overflow, and punt
+                    if they could.  */
+                 w = wi::mask (bit, false, 2 * BITS_PER_WORD);
+                 w = w * (count - 1);
+                 w = w + wi::mask (2 * BITS_PER_WORD - (count - 1) * bit,
+                                   false, 2 * BITS_PER_WORD);
+                 w = w + wcorr;
+                 w = wi::lrshift (w, BITS_PER_WORD);
+                 if (wi::ne_p (w, 0))
+                   continue;
+
+                 mask = operand_subword_force (op0, WORDS_BIG_ENDIAN ? 0 : 1,
+                                               mode);
+                 mask = expand_simple_binop (word_mode, ASHIFTRT, mask,
+                                             GEN_INT (BITS_PER_WORD - 1),
+                                             NULL_RTX, 0, OPTAB_DIRECT);
+                 if (mask == NULL_RTX)
+                   return NULL_RTX;
+                 sum_corr = immed_wide_int_const (wcorr, word_mode);
+                 sum_corr = expand_simple_binop (word_mode, AND, mask,
+                                                 sum_corr, NULL_RTX, 1,
+                                                 OPTAB_DIRECT);
+                 if (sum_corr == NULL_RTX)
+                   return NULL_RTX;
+               }
+           }
+
+         for (int i = 0; i < count; i++)
+           {
+             rtx v = op0;
+             if (i)
+               v = expand_simple_binop (mode, LSHIFTRT, v, GEN_INT (i * bit),
+                                        NULL_RTX, 1, OPTAB_DIRECT);
+             if (v == NULL_RTX)
+               return NULL_RTX;
+             v = lowpart_subreg (word_mode, v, mode);
+             if (v == NULL_RTX)
+               return NULL_RTX;
+             if (i != count - 1)
+               v = expand_simple_binop (word_mode, AND, v,
+                                        GEN_INT ((HOST_WIDE_INT_1U << bit)
+                                                 - 1), NULL_RTX, 1,
+                                        OPTAB_DIRECT);
+             if (v == NULL_RTX)
+               return NULL_RTX;
+             if (sum == NULL_RTX)
+               sum = v;
+             else
+               sum = expand_simple_binop (word_mode, PLUS, sum, v, NULL_RTX,
+                                          1, OPTAB_DIRECT);
+             if (sum == NULL_RTX)
+               return NULL_RTX;
+           }
+         if (sum_corr)
+           {
+             sum = expand_simple_binop (word_mode, PLUS, sum, sum_corr,
+                                        NULL_RTX, 1, OPTAB_DIRECT);
+             if (sum == NULL_RTX)
+               return NULL_RTX;
+           }
+       }
+      rtx remainder = expand_divmod (1, TRUNC_MOD_EXPR, word_mode, sum,
+                                    gen_int_mode (INTVAL (op1), word_mode),
+                                    NULL_RTX, 1, OPTAB_DIRECT);
+      if (remainder == NULL_RTX)
+       return NULL_RTX;
+
+      if (!unsignedp)
+       {
+         if (mask == NULL_RTX)
+           {
+             mask = operand_subword_force (op0, WORDS_BIG_ENDIAN ? 0 : 1,
+                                           mode);
+             mask = expand_simple_binop (word_mode, ASHIFTRT, mask,
+                                         GEN_INT (BITS_PER_WORD - 1),
+                                         NULL_RTX, 0, OPTAB_DIRECT);
+             if (mask == NULL_RTX)
+               return NULL_RTX;
+           }
+         mask = expand_simple_binop (word_mode, AND, mask,
+                                     gen_int_mode (1 - INTVAL (op1),
+                                                   word_mode),
+                                     NULL_RTX, 1, OPTAB_DIRECT);
+         if (mask == NULL_RTX)
+           return NULL_RTX;
+         remainder = expand_simple_binop (word_mode, PLUS, remainder,
+                                          mask, NULL_RTX, 1, OPTAB_DIRECT);
+         if (remainder == NULL_RTX)
+           return NULL_RTX;
+       }
+
+      remainder = convert_modes (mode, word_mode, remainder, unsignedp);
+      /* Punt if we need any library calls.  */
+      if (last)
+       last = NEXT_INSN (last);
+      else
+       last = get_insns ();
+      for (; last; last = NEXT_INSN (last))
+       if (CALL_P (last))
+         return NULL_RTX;
+      return remainder;
+    }
+  return NULL_RTX;
+}
+
+/* Similarly to the above function, but compute both quotient and remainder.
+   Quotient can be computed from the remainder as:
+   rem = op0 % op1;  // Handled using expand_doubleword_mod
+   quot = (op0 - rem) * inv; // inv is multiplicative inverse of op1 modulo
+                            // 2 * BITS_PER_WORD
+
+   We can also handle cases where op1 is a multiple of power of two constant
+   and constant handled by expand_doubleword_mod.
+   op11 = 1 << __builtin_ctz (op1);
+   op12 = op1 / op11;
+   rem1 = op0 % op12;  // Handled using expand_doubleword_mod
+   quot1 = (op0 - rem1) * inv; // inv is multiplicative inverse of op12 modulo
+                              // 2 * BITS_PER_WORD
+   rem = (quot1 % op11) * op12 + rem1;
+   quot = quot1 / op11;  */
+
+rtx
+expand_doubleword_divmod (machine_mode mode, rtx op0, rtx op1, rtx *rem,
+                         bool unsignedp)
+{
+  *rem = NULL_RTX;
+
+  /* Negative dividend should have been optimized into positive,
+     similarly modulo by 1 and modulo by power of two is optimized
+     differently too.  */
+  if (INTVAL (op1) <= 1 || pow2p_hwi (INTVAL (op1)))
+    return NULL_RTX;
+
+  rtx op11 = const1_rtx;
+  rtx op12 = op1;
+  if ((INTVAL (op1) & 1) == 0)
+    {
+      int bit = ctz_hwi (INTVAL (op1));
+      op11 = GEN_INT (HOST_WIDE_INT_1 << bit);
+      op12 = GEN_INT (INTVAL (op1) >> bit);
+    }
+
+  rtx rem1 = expand_doubleword_mod (mode, op0, op12, unsignedp);
+  if (rem1 == NULL_RTX)
+    return NULL_RTX;
+
+  int prec = 2 * BITS_PER_WORD;
+  wide_int a = wide_int::from (INTVAL (op12), prec + 1, UNSIGNED);
+  wide_int b = wi::shifted_mask (prec, 1, false, prec + 1);
+  wide_int m = wide_int::from (wi::mod_inv (a, b), prec, UNSIGNED);
+  rtx inv = immed_wide_int_const (m, mode);
+
+  rtx_insn *last = get_last_insn ();
+  rtx quot1 = expand_simple_binop (mode, MINUS, op0, rem1,
+                                  NULL_RTX, unsignedp, OPTAB_DIRECT);
+  if (quot1 == NULL_RTX)
+    return NULL_RTX;
+
+  quot1 = expand_simple_binop (mode, MULT, quot1, inv,
+                              NULL_RTX, unsignedp, OPTAB_DIRECT);
+  if (quot1 == NULL_RTX)
+    return NULL_RTX;
+
+  if (op11 != const1_rtx)
+    {
+      rtx rem2 = expand_divmod (1, TRUNC_MOD_EXPR, mode, quot1, op11,
+                               NULL_RTX, unsignedp, OPTAB_DIRECT);
+      if (rem2 == NULL_RTX)
+       return NULL_RTX;
+
+      rem2 = expand_simple_binop (mode, MULT, rem2, op12, NULL_RTX,
+                                 unsignedp, OPTAB_DIRECT);
+      if (rem2 == NULL_RTX)
+       return NULL_RTX;
+
+      rem2 = expand_simple_binop (mode, PLUS, rem2, rem1, NULL_RTX,
+                                 unsignedp, OPTAB_DIRECT);
+      if (rem2 == NULL_RTX)
+       return NULL_RTX;
+
+      rtx quot2 = expand_divmod (0, TRUNC_DIV_EXPR, mode, quot1, op11,
+                                NULL_RTX, unsignedp, OPTAB_DIRECT);
+      if (quot2 == NULL_RTX)
+       return NULL_RTX;
+
+      rem1 = rem2;
+      quot1 = quot2;
+    }
+
+  /* Punt if we need any library calls.  */
+  if (last)
+    last = NEXT_INSN (last);
+  else
+    last = get_insns ();
+  for (; last; last = NEXT_INSN (last))
+    if (CALL_P (last))
+      return NULL_RTX;
+
+  *rem = rem1;
+  return quot1;
+}
  \f
  /* Wrapper around expand_binop which takes an rtx code to specify
     the operation to perform, not an optab pointer.  All other
@@ -1015,7 +1356,7 @@ expand_binop_directly (enum insn_code icode, machine_mode mode, optab binoptab,
    machine_mode xmode0 = insn_data[(int) icode].operand[1].mode;
    machine_mode xmode1 = insn_data[(int) icode].operand[2].mode;
    machine_mode mode0, mode1, tmp_mode;
-  struct expand_operand ops[3];
+  class expand_operand ops[3];
    bool commutative_p;
    rtx_insn *pat;
    rtx xop0 = op0, xop1 = op1;
@@ -1026,7 +1367,7 @@ expand_binop_directly (enum insn_code icode, machine_mode mode, optab binoptab,
    commutative_p = commutative_optab_p (binoptab);
    if (commutative_p
        && GET_MODE (xop0) != xmode0 && GET_MODE (xop1) != xmode1
-      && GET_MODE (xop0) == xmode1 && GET_MODE (xop1) == xmode1)
+      && GET_MODE (xop0) == xmode1 && GET_MODE (xop1) == xmode0)
      std::swap (xop0, xop1);
  
    /* If we are optimizing, force expensive constants into a register.  */
@@ -1104,7 +1445,7 @@ expand_binop_directly (enum insn_code icode, machine_mode mode, optab binoptab,
        if (INSN_P (pat) && NEXT_INSN (pat) != NULL_RTX
           && ! add_equal_note (pat, ops[0].value,
                                optab_to_code (binoptab),
-                              ops[1].value, ops[2].value))
+                              ops[1].value, ops[2].value, mode0))
         {
           delete_insns_since (last);
           return expand_binop (mode, binoptab, op0, op1, NULL_RTX,
@@ -1371,18 +1712,26 @@ expand_binop (machine_mode mode, optab binoptab, rtx op0, rtx op1,
        if (target == 0
           || target == op0
           || target == op1
+         || reg_overlap_mentioned_p (target, op0)
+         || reg_overlap_mentioned_p (target, op1)
           || !valid_multiword_target_p (target))
         target = gen_reg_rtx (int_mode);
  
        start_sequence ();
  
        /* Do the actual arithmetic.  */
+      machine_mode op0_mode = GET_MODE (op0);
+      machine_mode op1_mode = GET_MODE (op1);
+      if (op0_mode == VOIDmode)
+       op0_mode = int_mode;
+      if (op1_mode == VOIDmode)
+       op1_mode = int_mode;
        for (i = 0; i < GET_MODE_BITSIZE (int_mode) / BITS_PER_WORD; i++)
         {
           rtx target_piece = operand_subword (target, i, 1, int_mode);
           rtx x = expand_binop (word_mode, binoptab,
-                               operand_subword_force (op0, i, int_mode),
-                               operand_subword_force (op1, i, int_mode),
+                               operand_subword_force (op0, i, op0_mode),
+                               operand_subword_force (op1, i, op1_mode),
                                 target_piece, unsignedp, next_methods);
  
           if (x == 0)
@@ -1445,6 +1794,8 @@ expand_binop (machine_mode mode, optab binoptab, rtx op0, rtx op1,
           if (target == 0
               || target == op0
               || target == op1
+             || reg_overlap_mentioned_p (target, op0)
+             || reg_overlap_mentioned_p (target, op1)
               || !valid_multiword_target_p (target))
             target = gen_reg_rtx (int_mode);
  
@@ -1503,6 +1854,8 @@ expand_binop (machine_mode mode, optab binoptab, rtx op0, rtx op1,
           || target == op0
           || target == op1
           || !REG_P (target)
+         || reg_overlap_mentioned_p (target, op0)
+         || reg_overlap_mentioned_p (target, op1)
           || !valid_multiword_target_p (target))
         target = gen_reg_rtx (int_mode);
  
@@ -1770,6 +2123,54 @@ expand_binop (machine_mode mode, optab binoptab, rtx op0, rtx op1,
         }
      }
  
+  /* Attempt to synthetize double word modulo by constant divisor.  */
+  if ((binoptab == umod_optab
+       || binoptab == smod_optab
+       || binoptab == udiv_optab
+       || binoptab == sdiv_optab)
+      && optimize
+      && CONST_INT_P (op1)
+      && is_int_mode (mode, &int_mode)
+      && GET_MODE_SIZE (int_mode) == 2 * UNITS_PER_WORD
+      && optab_handler ((binoptab == umod_optab || binoptab == udiv_optab)
+                       ? udivmod_optab : sdivmod_optab,
+                       int_mode) == CODE_FOR_nothing
+      && optab_handler (and_optab, word_mode) != CODE_FOR_nothing
+      && optab_handler (add_optab, word_mode) != CODE_FOR_nothing
+      && optimize_insn_for_speed_p ())
+    {
+      rtx res = NULL_RTX;
+      if ((binoptab == umod_optab || binoptab == smod_optab)
+         && (INTVAL (op1) & 1) == 0)
+       res = expand_doubleword_mod (int_mode, op0, op1,
+                                    binoptab == umod_optab);
+      else
+       {
+         rtx quot = expand_doubleword_divmod (int_mode, op0, op1, &res,
+                                              binoptab == umod_optab
+                                              || binoptab == udiv_optab);
+         if (quot == NULL_RTX)
+           res = NULL_RTX;
+         else if (binoptab == udiv_optab || binoptab == sdiv_optab)
+           res = quot;
+       }
+      if (res != NULL_RTX)
+       {
+         if (optab_handler (mov_optab, int_mode) != CODE_FOR_nothing)
+           {
+             rtx_insn *move = emit_move_insn (target ? target : res,
+                                              res);
+             set_dst_reg_note (move, REG_EQUAL,
+                               gen_rtx_fmt_ee (optab_to_code (binoptab),
+                                               int_mode, copy_rtx (op0), op1),
+                               target ? target : res);
+           }
+         return res;
+       }
+      else
+       delete_insns_since (last);
+    }
+
    /* It can't be open-coded in this mode.
       Use a library call if one is available and caller says that's ok.  */
  
@@ -1982,7 +2383,7 @@ expand_twoval_unop (optab unoptab, rtx op0, rtx targ0, rtx targ1,
  
    if (optab_handler (unoptab, mode) != CODE_FOR_nothing)
      {
-      struct expand_operand ops[3];
+      class expand_operand ops[3];
        enum insn_code icode = optab_handler (unoptab, mode);
  
        create_fixed_operand (&ops[0], targ0);
@@ -2054,7 +2455,7 @@ expand_twoval_binop (optab binoptab, rtx op0, rtx op1, rtx targ0, rtx targ1,
  
    if (optab_handler (binoptab, mode) != CODE_FOR_nothing)
      {
-      struct expand_operand ops[4];
+      class expand_operand ops[4];
        enum insn_code icode = optab_handler (binoptab, mode);
        machine_mode mode0 = insn_data[icode].operand[1].mode;
        machine_mode mode1 = insn_data[icode].operand[2].mode;
@@ -2065,8 +2466,8 @@ expand_twoval_binop (optab binoptab, rtx op0, rtx op1, rtx targ0, rtx targ1,
        xop1 = avoid_expensive_constant (mode1, binoptab, 1, xop1, unsignedp);
  
        create_fixed_operand (&ops[0], targ0);
-      create_convert_operand_from (&ops[1], op0, mode, unsignedp);
-      create_convert_operand_from (&ops[2], op1, mode, unsignedp);
+      create_convert_operand_from (&ops[1], xop0, mode, unsignedp);
+      create_convert_operand_from (&ops[2], xop1, mode, unsignedp);
        create_fixed_operand (&ops[3], targ1);
        if (maybe_expand_insn (icode, 4, ops))
         return 1;
@@ -2207,6 +2608,82 @@ widen_leading (scalar_int_mode mode, rtx op0, rtx target, optab unoptab)
    return 0;
  }
  
+/* Attempt to emit (clrsb:mode op0) as
+   (plus:mode (clz:mode (xor:mode op0 (ashr:mode op0 (const_int prec-1))))
+             (const_int -1))
+   if CLZ_DEFINED_VALUE_AT_ZERO (mode, val) is 2 and val is prec,
+   or as
+   (clz:mode (ior:mode (xor:mode (ashl:mode op0 (const_int 1))
+                                (ashr:mode op0 (const_int prec-1)))
+                      (const_int 1)))
+   otherwise.  */
+
+static rtx
+expand_clrsb_using_clz (scalar_int_mode mode, rtx op0, rtx target)
+{
+  if (optimize_insn_for_size_p ()
+      || optab_handler (clz_optab, mode) == CODE_FOR_nothing)
+    return NULL_RTX;
+
+  start_sequence ();
+  HOST_WIDE_INT val = 0;
+  if (CLZ_DEFINED_VALUE_AT_ZERO (mode, val) != 2
+      || val != GET_MODE_PRECISION (mode))
+    val = 0;
+  else
+    val = 1;
+
+  rtx temp2 = op0;
+  if (!val)
+    {
+      temp2 = expand_binop (mode, ashl_optab, op0, const1_rtx,
+                           NULL_RTX, 0, OPTAB_DIRECT);
+      if (!temp2)
+       {
+       fail:
+         end_sequence ();
+         return NULL_RTX;
+       }
+    }
+
+  rtx temp = expand_binop (mode, ashr_optab, op0,
+                          GEN_INT (GET_MODE_PRECISION (mode) - 1),
+                          NULL_RTX, 0, OPTAB_DIRECT);
+  if (!temp)
+    goto fail;
+
+  temp = expand_binop (mode, xor_optab, temp2, temp, NULL_RTX, 0,
+                      OPTAB_DIRECT);
+  if (!temp)
+    goto fail;
+
+  if (!val)
+    {
+      temp = expand_binop (mode, ior_optab, temp, const1_rtx,
+                          NULL_RTX, 0, OPTAB_DIRECT);
+      if (!temp)
+       goto fail;
+    }
+  temp = expand_unop_direct (mode, clz_optab, temp, val ? NULL_RTX : target,
+                            true);
+  if (!temp)
+    goto fail;
+  if (val)
+    {
+      temp = expand_binop (mode, add_optab, temp, constm1_rtx,
+                          target, 0, OPTAB_DIRECT);
+      if (!temp)
+       goto fail;
+    }
+
+  rtx_insn *seq = get_insns ();
+  end_sequence ();
+
+  add_equal_note (seq, temp, CLRSB, op0, NULL_RTX, mode);
+  emit_insn (seq);
+  return temp;
+}
+
  /* Try calculating clz of a double-word quantity as two clz's of word-sized
     quantities, choosing which based on whether the high word is nonzero.  */
  static rtx
@@ -2269,7 +2746,7 @@ expand_doubleword_clz (scalar_int_mode mode, rtx op0, rtx target)
    seq = get_insns ();
    end_sequence ();
  
-  add_equal_note (seq, target, CLZ, xop0, 0);
+  add_equal_note (seq, target, CLZ, xop0, NULL_RTX, mode);
    emit_insn (seq);
    return target;
  
@@ -2311,7 +2788,7 @@ expand_doubleword_popcount (scalar_int_mode mode, rtx op0, rtx target)
    seq = get_insns ();
    end_sequence ();
  
-  add_equal_note (seq, t, POPCOUNT, op0, 0);
+  add_equal_note (seq, t, POPCOUNT, op0, NULL_RTX, mode);
    emit_insn (seq);
    return t;
  }
@@ -2482,7 +2959,7 @@ expand_ctz (scalar_int_mode mode, rtx op0, rtx target)
    seq = get_insns ();
    end_sequence ();
  
-  add_equal_note (seq, temp, CTZ, op0, 0);
+  add_equal_note (seq, temp, CTZ, op0, NULL_RTX, mode);
    emit_insn (seq);
    return temp;
  }
@@ -2560,7 +3037,7 @@ expand_ffs (scalar_int_mode mode, rtx op0, rtx target)
    seq = get_insns ();
    end_sequence ();
  
-  add_equal_note (seq, temp, FFS, op0, 0);
+  add_equal_note (seq, temp, FFS, op0, NULL_RTX, mode);
    emit_insn (seq);
    return temp;
  
@@ -2640,6 +3117,7 @@ expand_absneg_bit (enum rtx_code code, scalar_float_mode mode,
  
    if (target == 0
        || target == op0
+      || reg_overlap_mentioned_p (target, op0)
        || (nwords > 1 && !valid_multiword_target_p (target)))
      target = gen_reg_rtx (mode);
  
@@ -2694,7 +3172,7 @@ expand_unop_direct (machine_mode mode, optab unoptab, rtx op0, rtx target,
  {
    if (optab_handler (unoptab, mode) != CODE_FOR_nothing)
      {
-      struct expand_operand ops[2];
+      class expand_operand ops[2];
        enum insn_code icode = optab_handler (unoptab, mode);
        rtx_insn *last = get_last_insn ();
        rtx_insn *pat;
@@ -2707,7 +3185,7 @@ expand_unop_direct (machine_mode mode, optab unoptab, rtx op0, rtx target,
           if (INSN_P (pat) && NEXT_INSN (pat) != NULL_RTX
               && ! add_equal_note (pat, ops[0].value,
                                    optab_to_code (unoptab),
-                                  ops[1].value, NULL_RTX))
+                                  ops[1].value, NULL_RTX, mode))
             {
               delete_insns_since (last);
               return expand_unop (mode, unoptab, op0, NULL_RTX, unsignedp);
@@ -2777,6 +3255,9 @@ expand_unop (machine_mode mode, optab unoptab, rtx op0, rtx target,
           temp = widen_leading (int_mode, op0, target, unoptab);
           if (temp)
             return temp;
+         temp = expand_clrsb_using_clz (int_mode, op0, target);
+         if (temp)
+           return temp;
         }
        goto try_libcall;
      }
@@ -2859,8 +3340,11 @@ expand_unop (machine_mode mode, optab unoptab, rtx op0, rtx target,
           if (temp)
             return temp;
  
+         /* We do not provide a 128-bit bswap in libgcc so force the use of
+            a double bswap for 64-bit targets.  */
           if (GET_MODE_SIZE (int_mode) == 2 * UNITS_PER_WORD
-             && optab_handler (unoptab, word_mode) != CODE_FOR_nothing)
+             && (UNITS_PER_WORD == 8
+                 || optab_handler (unoptab, word_mode) != CODE_FOR_nothing))
             {
               temp = expand_doubleword_bswap (mode, op0, target);
               if (temp)
@@ -2918,7 +3402,10 @@ expand_unop (machine_mode mode, optab unoptab, rtx op0, rtx target,
        int i;
        rtx_insn *insns;
  
-      if (target == 0 || target == op0 || !valid_multiword_target_p (target))
+      if (target == 0
+         || target == op0
+         || reg_overlap_mentioned_p (target, op0)
+         || !valid_multiword_target_p (target))
         target = gen_reg_rtx (int_mode);
  
        start_sequence ();
@@ -2942,6 +3429,17 @@ expand_unop (machine_mode mode, optab unoptab, rtx op0, rtx target,
        return target;
      }
  
+  /* Emit ~op0 as op0 ^ -1.  */
+  if (unoptab == one_cmpl_optab
+      && (SCALAR_INT_MODE_P (mode) || GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
+      && optab_handler (xor_optab, mode) != CODE_FOR_nothing)
+    {
+      temp = expand_binop (mode, xor_optab, op0, CONSTM1_RTX (mode),
+                          target, unsignedp, OPTAB_DIRECT);
+      if (temp)
+       return temp;
+    }
+
    if (optab_to_code (unoptab) == NEG)
      {
        /* Try negating floating point values by flipping the sign bit.  */
@@ -3428,6 +3926,8 @@ expand_copysign_bit (scalar_float_mode mode, rtx op0, rtx op1, rtx target,
    if (target == 0
        || target == op0
        || target == op1
+      || reg_overlap_mentioned_p (target, op0)
+      || reg_overlap_mentioned_p (target, op1)
        || (nwords > 1 && !valid_multiword_target_p (target)))
      target = gen_reg_rtx (mode);
  
@@ -3548,7 +4048,7 @@ bool
  maybe_emit_unop_insn (enum insn_code icode, rtx target, rtx op0,
                       enum rtx_code code)
  {
-  struct expand_operand ops[2];
+  class expand_operand ops[2];
    rtx_insn *pat;
  
    create_output_operand (&ops[0], target, GET_MODE (target));
@@ -3559,7 +4059,8 @@ maybe_emit_unop_insn (enum insn_code icode, rtx target, rtx op0,
  
    if (INSN_P (pat) && NEXT_INSN (pat) != NULL_RTX
        && code != UNKNOWN)
-    add_equal_note (pat, ops[0].value, code, ops[1].value, NULL_RTX);
+    add_equal_note (pat, ops[0].value, code, ops[1].value, NULL_RTX,
+                   GET_MODE (op0));
  
    emit_insn (pat);
  
@@ -3696,7 +4197,7 @@ emit_libcall_block_1 (rtx_insn *insns, rtx target, rtx result, rtx equiv,
           data.first = insns;
           data.insn = insn;
           data.must_stay = 0;
-         note_stores (PATTERN (insn), no_conflict_move_test, &data);
+         note_stores (insn, no_conflict_move_test, &data);
           if (! data.must_stay)
             {
               if (PREV_INSN (insn))
@@ -3777,6 +4278,82 @@ can_compare_p (enum rtx_code code, machine_mode mode,
    return 0;
  }
  
+/* Return whether RTL code CODE corresponds to an unsigned optab.  */
+
+static bool
+unsigned_optab_p (enum rtx_code code)
+{
+  return code == LTU || code == LEU || code == GTU || code == GEU;
+}
+
+/* Return whether the backend-emitted comparison for code CODE, comparing
+   operands of mode VALUE_MODE and producing a result with MASK_MODE, matches
+   operand OPNO of pattern ICODE.  */
+
+static bool
+insn_predicate_matches_p (enum insn_code icode, unsigned int opno,
+                         enum rtx_code code, machine_mode mask_mode,
+                         machine_mode value_mode)
+{
+  rtx reg1 = alloca_raw_REG (value_mode, LAST_VIRTUAL_REGISTER + 1);
+  rtx reg2 = alloca_raw_REG (value_mode, LAST_VIRTUAL_REGISTER + 2);
+  rtx test = alloca_rtx_fmt_ee (code, mask_mode, reg1, reg2);
+  return insn_operand_matches (icode, opno, test);
+}
+
+/* Return whether the backend can emit a vector comparison (vec_cmp/vec_cmpu)
+   for code CODE, comparing operands of mode VALUE_MODE and producing a result
+   with MASK_MODE.  */
+
+bool
+can_vec_cmp_compare_p (enum rtx_code code, machine_mode value_mode,
+                      machine_mode mask_mode)
+{
+  enum insn_code icode
+      = get_vec_cmp_icode (value_mode, mask_mode, unsigned_optab_p (code));
+  if (icode == CODE_FOR_nothing)
+    return false;
+
+  return insn_predicate_matches_p (icode, 1, code, mask_mode, value_mode);
+}
+
+/* Return whether the backend can emit a vector comparison (vcond/vcondu) for
+   code CODE, comparing operands of mode CMP_OP_MODE and producing a result
+   with VALUE_MODE.  */
+
+bool
+can_vcond_compare_p (enum rtx_code code, machine_mode value_mode,
+                    machine_mode cmp_op_mode)
+{
+  enum insn_code icode
+      = get_vcond_icode (value_mode, cmp_op_mode, unsigned_optab_p (code));
+  if (icode == CODE_FOR_nothing)
+    return false;
+
+  return insn_predicate_matches_p (icode, 3, code, value_mode, cmp_op_mode);
+}
+
+/* Return whether the backend can emit vector set instructions for inserting
+   element into vector at variable index position.  */
+
+bool
+can_vec_set_var_idx_p (machine_mode vec_mode)
+{
+  if (!VECTOR_MODE_P (vec_mode))
+    return false;
+
+  machine_mode inner_mode = GET_MODE_INNER (vec_mode);
+  rtx reg1 = alloca_raw_REG (vec_mode, LAST_VIRTUAL_REGISTER + 1);
+  rtx reg2 = alloca_raw_REG (inner_mode, LAST_VIRTUAL_REGISTER + 2);
+  rtx reg3 = alloca_raw_REG (VOIDmode, LAST_VIRTUAL_REGISTER + 3);
+
+  enum insn_code icode = optab_handler (vec_set_optab, vec_mode);
+
+  return icode != CODE_FOR_nothing && insn_operand_matches (icode, 0, reg1)
+        && insn_operand_matches (icode, 1, reg2)
+        && insn_operand_matches (icode, 2, reg3);
+}
+
  /* This function is called when we are going to emit a compare instruction that
     compares the values found in X and Y, using the rtl operator COMPARISON.
  
@@ -3812,6 +4389,9 @@ prepare_cmp_insn (rtx x, rtx y, enum rtx_code comparison, rtx size,
    gcc_assert (methods == OPTAB_DIRECT || methods == OPTAB_WIDEN
               || methods == OPTAB_LIB_WIDEN);
  
+  if (CONST_SCALAR_INT_P (y))
+    canonicalize_comparison (mode, &comparison, &y);
+
    /* If we are optimizing, force expensive constants into a register.  */
    if (CONSTANT_P (x) && optimize
        && (rtx_cost (x, mode, COMPARE, 0, optimize_insn_for_speed_p ())
@@ -3823,13 +4403,6 @@ prepare_cmp_insn (rtx x, rtx y, enum rtx_code comparison, rtx size,
            > COSTS_N_INSNS (1)))
      y = force_reg (mode, y);
  
-#if HAVE_cc0
-  /* Make sure if we have a canonical comparison.  The RTL
-     documentation states that canonical comparisons are required only
-     for targets which have cc0.  */
-  gcc_assert (!CONSTANT_P (x) || CONSTANT_P (y));
-#endif
-
    /* Don't let both operands fail to indicate the mode.  */
    if (GET_MODE (x) == VOIDmode && GET_MODE (y) == VOIDmode)
      x = force_reg (mode, x);
@@ -3864,7 +4437,7 @@ prepare_cmp_insn (rtx x, rtx y, enum rtx_code comparison, rtx size,
  
           /* Must make sure the size fits the insn's mode.  */
           if (CONST_INT_P (size)
-             ? INTVAL (size) >= (1 << GET_MODE_BITSIZE (cmp_mode))
+             ? UINTVAL (size) > GET_MODE_MASK (cmp_mode)
               : (GET_MODE_BITSIZE (as_a <scalar_int_mode> (GET_MODE (size)))
                  > GET_MODE_BITSIZE (cmp_mode)))
             continue;
@@ -3883,7 +4456,7 @@ prepare_cmp_insn (rtx x, rtx y, enum rtx_code comparison, rtx size,
         goto fail;
  
        /* Otherwise call a library function.  */
-      result = emit_block_comp_via_libcall (XEXP (x, 0), XEXP (y, 0), size);
+      result = emit_block_comp_via_libcall (x, y, size);
  
        x = result;
        y = const0_rtx;
@@ -4255,7 +4828,7 @@ emit_indirect_jump (rtx loc)
      sorry ("indirect jumps are not available on this target");
    else
      {
-      struct expand_operand ops[1];
+      class expand_operand ops[1];
        create_address_operand (&ops[0], loc);
        expand_jump_insn (targetm.code_for_indirect_jump, 1, ops);
        emit_barrier ();
@@ -4360,7 +4933,7 @@ emit_conditional_move (rtx target, enum rtx_code code, rtx op0, rtx op1,
                             OPTAB_WIDEN, &comparison, &cmpmode);
           if (comparison)
             {
-             struct expand_operand ops[4];
+             class expand_operand ops[4];
  
               create_output_operand (&ops[0], target, mode);
               create_fixed_operand (&ops[1], comparison);
@@ -4426,7 +4999,7 @@ emit_conditional_neg_or_complement (rtx target, rtx_code code,
      target = gen_reg_rtx (mode);
  
    rtx_insn *last = get_last_insn ();
-  struct expand_operand ops[4];
+  class expand_operand ops[4];
  
    create_output_operand (&ops[0], target, mode);
    create_fixed_operand (&ops[1], cond);
@@ -4514,7 +5087,7 @@ emit_conditional_add (rtx target, enum rtx_code code, rtx op0, rtx op1,
                      &comparison, &cmode);
    if (comparison)
      {
-      struct expand_operand ops[4];
+      class expand_operand ops[4];
  
        create_output_operand (&ops[0], target, mode);
        create_fixed_operand (&ops[1], comparison);
@@ -4923,20 +5496,21 @@ expand_fix (rtx to, rtx from, int unsignedp)
         if (icode != CODE_FOR_nothing)
           {
             rtx_insn *last = get_last_insn ();
+           rtx from1 = from;
             if (fmode != GET_MODE (from))
-             from = convert_to_mode (fmode, from, 0);
+             from1 = convert_to_mode (fmode, from, 0);
  
             if (must_trunc)
               {
-               rtx temp = gen_reg_rtx (GET_MODE (from));
-               from = expand_unop (GET_MODE (from), ftrunc_optab, from,
-                                   temp, 0);
+               rtx temp = gen_reg_rtx (GET_MODE (from1));
+               from1 = expand_unop (GET_MODE (from1), ftrunc_optab, from1,
+                                    temp, 0);
               }
  
             if (imode != GET_MODE (to))
               target = gen_reg_rtx (imode);
  
-           if (maybe_emit_unop_insn (icode, target, from,
+           if (maybe_emit_unop_insn (icode, target, from1,
                                       doing_unsigned ? UNSIGNED_FIX : FIX))
               {
                 if (target != to)
@@ -5303,11 +5877,11 @@ gen_cond_trap (enum rtx_code code, rtx op1, rtx op2, rtx tcode)
    return insn;
  }
  
-/* Return rtx code for TCODE. Use UNSIGNEDP to select signed
+/* Return rtx code for TCODE or UNKNOWN.  Use UNSIGNEDP to select signed
     or unsigned operation code.  */
  
  enum rtx_code
-get_rtx_code (enum tree_code tcode, bool unsignedp)
+get_rtx_code_1 (enum tree_code tcode, bool unsignedp)
  {
    enum rtx_code code;
    switch (tcode)
@@ -5365,22 +5939,34 @@ get_rtx_code (enum tree_code tcode, bool unsignedp)
        break;
  
      default:
-      gcc_unreachable ();
+      code = UNKNOWN;
+      break;
      }
    return code;
  }
  
+/* Return rtx code for TCODE.  Use UNSIGNEDP to select signed
+   or unsigned operation code.  */
+
+enum rtx_code
+get_rtx_code (enum tree_code tcode, bool unsignedp)
+{
+  enum rtx_code code = get_rtx_code_1 (tcode, unsignedp);
+  gcc_assert (code != UNKNOWN);
+  return code;
+}
+
  /* Return a comparison rtx of mode CMP_MODE for COND.  Use UNSIGNEDP to
     select signed or unsigned operators.  OPNO holds the index of the
     first comparison operand for insn ICODE.  Do not generate the
     compare instruction itself.  */
  
-static rtx
+rtx
  vector_compare_rtx (machine_mode cmp_mode, enum tree_code tcode,
                     tree t_op0, tree t_op1, bool unsignedp,
                     enum insn_code icode, unsigned int opno)
  {
-  struct expand_operand ops[2];
+  class expand_operand ops[2];
    rtx rtx_op0, rtx_op1;
    machine_mode m0, m1;
    enum rtx_code rcode = get_rtx_code (tcode, unsignedp);
@@ -5410,19 +5996,45 @@ vector_compare_rtx (machine_mode cmp_mode, enum tree_code tcode,
  }
  
  /* Check if vec_perm mask SEL is a constant equivalent to a shift of
-   the first vec_perm operand, assuming the second operand is a constant
-   vector of zeros.  Return the shift distance in bits if so, or NULL_RTX
-   if the vec_perm is not a shift.  MODE is the mode of the value being
-   shifted.  */
+   the first vec_perm operand, assuming the second operand (for left shift
+   first operand) is a constant vector of zeros.  Return the shift distance
+   in bits if so, or NULL_RTX if the vec_perm is not a shift.  MODE is the
+   mode of the value being shifted.  SHIFT_OPTAB is vec_shr_optab for right
+   shift or vec_shl_optab for left shift.  */
  static rtx
-shift_amt_for_vec_perm_mask (machine_mode mode, const vec_perm_indices &sel)
+shift_amt_for_vec_perm_mask (machine_mode mode, const vec_perm_indices &sel,
+                            optab shift_optab)
  {
    unsigned int bitsize = GET_MODE_UNIT_BITSIZE (mode);
    poly_int64 first = sel[0];
    if (maybe_ge (sel[0], GET_MODE_NUNITS (mode)))
      return NULL_RTX;
  
-  if (!sel.series_p (0, 1, first, 1))
+  if (shift_optab == vec_shl_optab)
+    {
+      unsigned int nelt;
+      if (!GET_MODE_NUNITS (mode).is_constant (&nelt))
+       return NULL_RTX;
+      unsigned firstidx = 0;
+      for (unsigned int i = 0; i < nelt; i++)
+       {
+         if (known_eq (sel[i], nelt))
+           {
+             if (i == 0 || firstidx)
+               return NULL_RTX;
+             firstidx = i;
+           }
+         else if (firstidx
+                  ? maybe_ne (sel[i], nelt + i - firstidx)
+                  : maybe_ge (sel[i], nelt))
+           return NULL_RTX;
+       }
+
+      if (firstidx == 0)
+       return NULL_RTX;
+      first = firstidx;
+    }
+  else if (!sel.series_p (0, 1, first, 1))
      {
        unsigned int nelt;
        if (!GET_MODE_NUNITS (mode).is_constant (&nelt))
@@ -5449,10 +6061,10 @@ expand_vec_perm_1 (enum insn_code icode, rtx target,
  {
    machine_mode tmode = GET_MODE (target);
    machine_mode smode = GET_MODE (sel);
-  struct expand_operand ops[4];
+  class expand_operand ops[4];
  
    gcc_assert (GET_MODE_CLASS (smode) == MODE_VECTOR_INT
-             || mode_for_int_vector (tmode).require () == smode);
+             || related_int_vector_mode (tmode).require () == smode);
    create_output_operand (&ops[0], target, tmode);
    create_input_operand (&ops[3], sel, smode);
  
@@ -5510,25 +6122,39 @@ expand_vec_perm_const (machine_mode mode, rtx v0, rtx v1,
       target instruction.  */
    vec_perm_indices indices (sel, 2, GET_MODE_NUNITS (mode));
  
-  /* See if this can be handled with a vec_shr.  We only do this if the
-     second vector is all zeroes.  */
-  insn_code shift_code = optab_handler (vec_shr_optab, mode);
-  insn_code shift_code_qi = ((qimode != VOIDmode && qimode != mode)
-                            ? optab_handler (vec_shr_optab, qimode)
-                            : CODE_FOR_nothing);
-
-  if (v1 == CONST0_RTX (GET_MODE (v1))
-      && (shift_code != CODE_FOR_nothing
-         || shift_code_qi != CODE_FOR_nothing))
+  /* See if this can be handled with a vec_shr or vec_shl.  We only do this
+     if the second (for vec_shr) or first (for vec_shl) vector is all
+     zeroes.  */
+  insn_code shift_code = CODE_FOR_nothing;
+  insn_code shift_code_qi = CODE_FOR_nothing;
+  optab shift_optab = unknown_optab;
+  rtx v2 = v0;
+  if (v1 == CONST0_RTX (GET_MODE (v1)))
+    shift_optab = vec_shr_optab;
+  else if (v0 == CONST0_RTX (GET_MODE (v0)))
+    {
+      shift_optab = vec_shl_optab;
+      v2 = v1;
+    }
+  if (shift_optab != unknown_optab)
      {
-      rtx shift_amt = shift_amt_for_vec_perm_mask (mode, indices);
+      shift_code = optab_handler (shift_optab, mode);
+      shift_code_qi = ((qimode != VOIDmode && qimode != mode)
+                      ? optab_handler (shift_optab, qimode)
+                      : CODE_FOR_nothing);
+    }
+  if (shift_code != CODE_FOR_nothing || shift_code_qi != CODE_FOR_nothing)
+    {
+      rtx shift_amt = shift_amt_for_vec_perm_mask (mode, indices, shift_optab);
        if (shift_amt)
         {
-         struct expand_operand ops[3];
+         class expand_operand ops[3];
+         if (shift_amt == const0_rtx)
+           return v2;
           if (shift_code != CODE_FOR_nothing)
             {
               create_output_operand (&ops[0], target, mode);
-             create_input_operand (&ops[1], v0, mode);
+             create_input_operand (&ops[1], v2, mode);
               create_convert_operand_from_type (&ops[2], shift_amt, sizetype);
               if (maybe_expand_insn (shift_code, 3, ops))
                 return ops[0].value;
@@ -5537,7 +6163,7 @@ expand_vec_perm_const (machine_mode mode, rtx v0, rtx v1,
             {
               rtx tmp = gen_reg_rtx (qimode);
               create_output_operand (&ops[0], tmp, qimode);
-             create_input_operand (&ops[1], gen_lowpart (qimode, v0), qimode);
+             create_input_operand (&ops[1], gen_lowpart (qimode, v2), qimode);
               create_convert_operand_from_type (&ops[2], shift_amt, sizetype);
               if (maybe_expand_insn (shift_code_qi, 3, ops))
                 return gen_lowpart (mode, ops[0].value);
@@ -5547,11 +6173,8 @@ expand_vec_perm_const (machine_mode mode, rtx v0, rtx v1,
  
    if (targetm.vectorize.vec_perm_const != NULL)
      {
-      v0 = force_reg (mode, v0);
        if (single_arg_p)
         v1 = v0;
-      else
-       v1 = force_reg (mode, v1);
  
        if (targetm.vectorize.vec_perm_const (mode, target, v0, v1, indices))
         return target;
@@ -5572,13 +6195,17 @@ expand_vec_perm_const (machine_mode mode, rtx v0, rtx v1,
         return gen_lowpart (mode, target_qi);
      }
  
+  v0 = force_reg (mode, v0);
+  if (single_arg_p)
+    v1 = v0;
+  v1 = force_reg (mode, v1);
+
    /* Otherwise expand as a fully variable permuation.  */
  
    /* The optabs are only defined for selectors with the same width
       as the values being permuted.  */
    machine_mode required_sel_mode;
-  if (!mode_for_int_vector (mode).exists (&required_sel_mode)
-      || !VECTOR_MODE_P (required_sel_mode))
+  if (!related_int_vector_mode (mode).exists (&required_sel_mode))
      {
        delete_insns_since (last);
        return NULL_RTX;
@@ -5703,116 +6330,13 @@ expand_vec_perm_var (machine_mode mode, rtx v0, rtx v1, rtx sel, rtx target)
    return tmp;
  }
  
-/* Generate insns for a VEC_COND_EXPR with mask, given its TYPE and its
-   three operands.  */
-
-rtx
-expand_vec_cond_mask_expr (tree vec_cond_type, tree op0, tree op1, tree op2,
-                          rtx target)
-{
-  struct expand_operand ops[4];
-  machine_mode mode = TYPE_MODE (vec_cond_type);
-  machine_mode mask_mode = TYPE_MODE (TREE_TYPE (op0));
-  enum insn_code icode = get_vcond_mask_icode (mode, mask_mode);
-  rtx mask, rtx_op1, rtx_op2;
-
-  if (icode == CODE_FOR_nothing)
-    return 0;
-
-  mask = expand_normal (op0);
-  rtx_op1 = expand_normal (op1);
-  rtx_op2 = expand_normal (op2);
-
-  mask = force_reg (mask_mode, mask);
-  rtx_op1 = force_reg (GET_MODE (rtx_op1), rtx_op1);
-
-  create_output_operand (&ops[0], target, mode);
-  create_input_operand (&ops[1], rtx_op1, mode);
-  create_input_operand (&ops[2], rtx_op2, mode);
-  create_input_operand (&ops[3], mask, mask_mode);
-  expand_insn (icode, 4, ops);
-
-  return ops[0].value;
-}
-
-/* Generate insns for a VEC_COND_EXPR, given its TYPE and its
-   three operands.  */
-
-rtx
-expand_vec_cond_expr (tree vec_cond_type, tree op0, tree op1, tree op2,
-                     rtx target)
-{
-  struct expand_operand ops[6];
-  enum insn_code icode;
-  rtx comparison, rtx_op1, rtx_op2;
-  machine_mode mode = TYPE_MODE (vec_cond_type);
-  machine_mode cmp_op_mode;
-  bool unsignedp;
-  tree op0a, op0b;
-  enum tree_code tcode;
-
-  if (COMPARISON_CLASS_P (op0))
-    {
-      op0a = TREE_OPERAND (op0, 0);
-      op0b = TREE_OPERAND (op0, 1);
-      tcode = TREE_CODE (op0);
-    }
-  else
-    {
-      gcc_assert (VECTOR_BOOLEAN_TYPE_P (TREE_TYPE (op0)));
-      if (get_vcond_mask_icode (mode, TYPE_MODE (TREE_TYPE (op0)))
-         != CODE_FOR_nothing)
-       return expand_vec_cond_mask_expr (vec_cond_type, op0, op1,
-                                         op2, target);
-      /* Fake op0 < 0.  */
-      else
-       {
-         gcc_assert (GET_MODE_CLASS (TYPE_MODE (TREE_TYPE (op0)))
-                     == MODE_VECTOR_INT);
-         op0a = op0;
-         op0b = build_zero_cst (TREE_TYPE (op0));
-         tcode = LT_EXPR;
-       }
-    }
-  cmp_op_mode = TYPE_MODE (TREE_TYPE (op0a));
-  unsignedp = TYPE_UNSIGNED (TREE_TYPE (op0a));
-
-
-  gcc_assert (known_eq (GET_MODE_SIZE (mode), GET_MODE_SIZE (cmp_op_mode))
-             && known_eq (GET_MODE_NUNITS (mode),
-                          GET_MODE_NUNITS (cmp_op_mode)));
-
-  icode = get_vcond_icode (mode, cmp_op_mode, unsignedp);
-  if (icode == CODE_FOR_nothing)
-    {
-      if (tcode == EQ_EXPR || tcode == NE_EXPR)
-       icode = get_vcond_eq_icode (mode, cmp_op_mode);
-      if (icode == CODE_FOR_nothing)
-       return 0;
-    }
-
-  comparison = vector_compare_rtx (VOIDmode, tcode, op0a, op0b, unsignedp,
-                                  icode, 4);
-  rtx_op1 = expand_normal (op1);
-  rtx_op2 = expand_normal (op2);
-
-  create_output_operand (&ops[0], target, mode);
-  create_input_operand (&ops[1], rtx_op1, mode);
-  create_input_operand (&ops[2], rtx_op2, mode);
-  create_fixed_operand (&ops[3], comparison);
-  create_fixed_operand (&ops[4], XEXP (comparison, 0));
-  create_fixed_operand (&ops[5], XEXP (comparison, 1));
-  expand_insn (icode, 6, ops);
-  return ops[0].value;
-}
-
  /* Generate VEC_SERIES_EXPR <OP0, OP1>, returning a value of mode VMODE.
     Use TARGET for the result if nonnull and convenient.  */
  
  rtx
  expand_vec_series_expr (machine_mode vmode, rtx op0, rtx op1, rtx target)
  {
-  struct expand_operand ops[3];
+  class expand_operand ops[3];
    enum insn_code icode;
    machine_mode emode = GET_MODE_INNER (vmode);
  
@@ -5832,7 +6356,7 @@ expand_vec_series_expr (machine_mode vmode, rtx op0, rtx op1, rtx target)
  rtx
  expand_vec_cmp_expr (tree type, tree exp, rtx target)
  {
-  struct expand_operand ops[4];
+  class expand_operand ops[4];
    enum insn_code icode;
    rtx comparison;
    machine_mode mask_mode = TYPE_MODE (type);
@@ -5873,7 +6397,7 @@ rtx
  expand_mult_highpart (machine_mode mode, rtx op0, rtx op1,
                       rtx target, bool uns_p)
  {
-  struct expand_operand eops[3];
+  class expand_operand eops[3];
    enum insn_code icode;
    int method, i;
    machine_mode wmode;
@@ -6026,7 +6550,7 @@ maybe_emit_atomic_exchange (rtx target, rtx mem, rtx val, enum memmodel model)
    icode = direct_optab_handler (atomic_exchange_optab, mode);
    if (icode != CODE_FOR_nothing)
      {
-      struct expand_operand ops[4];
+      class expand_operand ops[4];
  
        create_output_operand (&ops[0], target, mode);
        create_fixed_operand (&ops[1], mem);
@@ -6064,7 +6588,7 @@ maybe_emit_sync_lock_test_and_set (rtx target, rtx mem, rtx val,
  
    if (icode != CODE_FOR_nothing)
      {
-      struct expand_operand ops[3];
+      class expand_operand ops[3];
        create_output_operand (&ops[0], target, mode);
        create_fixed_operand (&ops[1], mem);
        create_input_operand (&ops[2], val, mode);
@@ -6124,7 +6648,7 @@ static rtx
  maybe_emit_atomic_test_and_set (rtx target, rtx mem, enum memmodel model)
  {
    machine_mode pat_bool_mode;
-  struct expand_operand ops[3];
+  class expand_operand ops[3];
  
    if (!targetm.have_atomic_test_and_set ())
      return NULL_RTX;
@@ -6294,7 +6818,7 @@ expand_atomic_compare_and_swap (rtx *ptarget_bool, rtx *ptarget_oval,
                                 enum memmodel fail_model)
  {
    machine_mode mode = GET_MODE (mem);
-  struct expand_operand ops[8];
+  class expand_operand ops[8];
    enum insn_code icode;
    rtx target_oval, target_bool = NULL_RTX;
    rtx libfunc;
@@ -6376,7 +6900,7 @@ expand_atomic_compare_and_swap (rtx *ptarget_bool, rtx *ptarget_oval,
        /* Otherwise, work out if the compare-and-swap succeeded.  */
        cc_reg = NULL_RTX;
        if (have_insn_for (COMPARE, CCmode))
-       note_stores (PATTERN (get_last_insn ()), find_cc_set, &cc_reg);
+       note_stores (get_last_insn (), find_cc_set, &cc_reg);
        if (cc_reg)
         {
           target_bool = emit_store_flag_force (target_bool, EQ, cc_reg,
@@ -6448,6 +6972,48 @@ expand_memory_blockage (void)
      expand_asm_memory_blockage ();
  }
  
+/* Generate asm volatile("" : : : "memory") as a memory blockage, at the
+   same time clobbering the register set specified by REGS.  */
+
+void
+expand_asm_reg_clobber_mem_blockage (HARD_REG_SET regs)
+{
+  rtx asm_op, clob_mem;
+
+  unsigned int num_of_regs = 0;
+  for (unsigned int i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+    if (TEST_HARD_REG_BIT (regs, i))
+      num_of_regs++;
+
+  asm_op = gen_rtx_ASM_OPERANDS (VOIDmode, "", "", 0,
+                                rtvec_alloc (0), rtvec_alloc (0),
+                                rtvec_alloc (0), UNKNOWN_LOCATION);
+  MEM_VOLATILE_P (asm_op) = 1;
+
+  rtvec v = rtvec_alloc (num_of_regs + 2);
+
+  clob_mem = gen_rtx_SCRATCH (VOIDmode);
+  clob_mem = gen_rtx_MEM (BLKmode, clob_mem);
+  clob_mem = gen_rtx_CLOBBER (VOIDmode, clob_mem);
+
+  RTVEC_ELT (v, 0) = asm_op;
+  RTVEC_ELT (v, 1) = clob_mem;
+
+  if (num_of_regs > 0)
+    {
+      unsigned int j = 2;
+      for (unsigned int i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+       if (TEST_HARD_REG_BIT (regs, i))
+         {
+           RTVEC_ELT (v, j) = gen_rtx_CLOBBER (VOIDmode, regno_reg_rtx[i]);
+           j++;
+         }
+      gcc_assert (j == (num_of_regs + 2));
+    }
+
+  emit_insn (gen_rtx_PARALLEL (VOIDmode, v));
+}
+
  /* This routine will either emit the mem_thread_fence pattern or issue a 
     sync_synchronize to generate a fence for memory model MEMMODEL.  */
  
@@ -6496,7 +7062,7 @@ expand_atomic_load (rtx target, rtx mem, enum memmodel model)
    icode = direct_optab_handler (atomic_load_optab, mode);
    if (icode != CODE_FOR_nothing)
      {
-      struct expand_operand ops[3];
+      class expand_operand ops[3];
        rtx_insn *last = get_last_insn ();
        if (is_mm_seq_cst (model))
         expand_memory_blockage ();
@@ -6549,7 +7115,7 @@ expand_atomic_store (rtx mem, rtx val, enum memmodel model, bool use_release)
  {
    machine_mode mode = GET_MODE (mem);
    enum insn_code icode;
-  struct expand_operand ops[3];
+  class expand_operand ops[3];
  
    /* If the target supports the store directly, great.  */
    icode = direct_optab_handler (atomic_store_optab, mode);
@@ -6759,7 +7325,7 @@ maybe_emit_op (const struct atomic_op_functions *optab, rtx target, rtx mem,
                rtx val, bool use_memmodel, enum memmodel model, bool after)
  {
    machine_mode mode = GET_MODE (mem);
-  struct expand_operand ops[4];
+  class expand_operand ops[4];
    enum insn_code icode;
    int op_counter = 0;
    int num_ops;
@@ -7073,7 +7639,7 @@ valid_multiword_target_p (rtx target)
     of that rtx if so.  */
  
  void
-create_integer_operand (struct expand_operand *op, poly_int64 intval)
+create_integer_operand (class expand_operand *op, poly_int64 intval)
  {
    create_expand_operand (op, EXPAND_INTEGER,
                          gen_int_mode (intval, MAX_MODE_INT),
@@ -7085,7 +7651,7 @@ create_integer_operand (struct expand_operand *op, poly_int64 intval)
  
  static bool
  maybe_legitimize_operand_same_code (enum insn_code icode, unsigned int opno,
-                                   struct expand_operand *op)
+                                   class expand_operand *op)
  {
    /* See if the operand matches in its current form.  */
    if (insn_operand_matches (icode, opno, op->value))
@@ -7127,20 +7693,18 @@ maybe_legitimize_operand_same_code (enum insn_code icode, unsigned int opno,
  
  static bool
  maybe_legitimize_operand (enum insn_code icode, unsigned int opno,
-                         struct expand_operand *op)
+                         class expand_operand *op)
  {
-  machine_mode mode, imode;
-  bool old_volatile_ok, result;
+  machine_mode mode, imode, tmode;
  
    mode = op->mode;
    switch (op->type)
      {
      case EXPAND_FIXED:
-      old_volatile_ok = volatile_ok;
-      volatile_ok = true;
-      result = maybe_legitimize_operand_same_code (icode, opno, op);
-      volatile_ok = old_volatile_ok;
-      return result;
+      {
+       temporary_volatile_ok v (true);
+       return maybe_legitimize_operand_same_code (icode, opno, op);
+      }
  
      case EXPAND_OUTPUT:
        gcc_assert (mode != VOIDmode);
@@ -7178,9 +7742,17 @@ maybe_legitimize_operand (enum insn_code icode, unsigned int opno,
         gcc_assert (mode != VOIDmode);
  
        imode = insn_data[(int) icode].operand[opno].mode;
+      tmode = (VECTOR_MODE_P (imode) && !VECTOR_MODE_P (mode)
+              ? GET_MODE_INNER (imode) : imode);
+      if (tmode != VOIDmode && tmode != mode)
+       {
+         op->value = convert_modes (tmode, mode, op->value, op->unsigned_p);
+         mode = tmode;
+       }
        if (imode != VOIDmode && imode != mode)
         {
-         op->value = convert_modes (imode, mode, op->value, op->unsigned_p);
+         gcc_assert (VECTOR_MODE_P (imode) && !VECTOR_MODE_P (mode));
+         op->value = expand_vector_broadcast (imode, op->value);
           mode = imode;
         }
        goto input;
@@ -7209,7 +7781,7 @@ maybe_legitimize_operand (enum insn_code icode, unsigned int opno,
     TYPE is the type of VALUE.  */
  
  void
-create_convert_operand_from_type (struct expand_operand *op,
+create_convert_operand_from_type (class expand_operand *op,
                                   rtx value, tree type)
  {
    create_convert_operand_from (op, value, TYPE_MODE (type),
@@ -7224,8 +7796,8 @@ create_convert_operand_from_type (struct expand_operand *op,
  static inline bool
  can_reuse_operands_p (enum insn_code icode,
                       unsigned int opno1, unsigned int opno2,
-                     const struct expand_operand *op1,
-                     const struct expand_operand *op2)
+                     const class expand_operand *op1,
+                     const class expand_operand *op2)
  {
    /* Check requirements that are common to all types.  */
    if (op1->type != op2->type
@@ -7260,7 +7832,7 @@ can_reuse_operands_p (enum insn_code icode,
  
  bool
  maybe_legitimize_operands (enum insn_code icode, unsigned int opno,
-                          unsigned int nops, struct expand_operand *ops)
+                          unsigned int nops, class expand_operand *ops)
  {
    rtx_insn *last = get_last_insn ();
    rtx *orig_values = XALLOCAVEC (rtx, nops);
@@ -7302,7 +7874,7 @@ maybe_legitimize_operands (enum insn_code icode, unsigned int opno,
  
  rtx_insn *
  maybe_gen_insn (enum insn_code icode, unsigned int nops,
-               struct expand_operand *ops)
+               class expand_operand *ops)
  {
    gcc_assert (nops == (unsigned int) insn_data[(int) icode].n_generator_args);
    if (!maybe_legitimize_operands (icode, 0, nops, ops))
@@ -7346,7 +7918,7 @@ maybe_gen_insn (enum insn_code icode, unsigned int nops,
  
  bool
  maybe_expand_insn (enum insn_code icode, unsigned int nops,
-                  struct expand_operand *ops)
+                  class expand_operand *ops)
  {
    rtx_insn *pat = maybe_gen_insn (icode, nops, ops);
    if (pat)
@@ -7361,7 +7933,7 @@ maybe_expand_insn (enum insn_code icode, unsigned int nops,
  
  bool
  maybe_expand_jump_insn (enum insn_code icode, unsigned int nops,
-                       struct expand_operand *ops)
+                       class expand_operand *ops)
  {
    rtx_insn *pat = maybe_gen_insn (icode, nops, ops);
    if (pat)
@@ -7377,7 +7949,7 @@ maybe_expand_jump_insn (enum insn_code icode, unsigned int nops,
  
  void
  expand_insn (enum insn_code icode, unsigned int nops,
-            struct expand_operand *ops)
+            class expand_operand *ops)
  {
    if (!maybe_expand_insn (icode, nops, ops))
      gcc_unreachable ();
@@ -7387,7 +7959,7 @@ expand_insn (enum insn_code icode, unsigned int nops,
  
  void
  expand_jump_insn (enum insn_code icode, unsigned int nops,
-                 struct expand_operand *ops)
+                 class expand_operand *ops)
  {
    if (!maybe_expand_jump_insn (icode, nops, ops))
      gcc_unreachable ();