}
-/* Function vect_permute_store_chain.
-
- Given a chain of interleaved stores in DR_CHAIN of LENGTH that must be
- a power of 2 or equal to 3, generate interleave_high/low stmts to reorder
- the data correctly for the stores. Return the final references for stores
- in RESULT_CHAIN.
-
- E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
- The input is 4 vectors each containing 8 elements. We assign a number to
- each element, the input sequence is:
-
- 1st vec: 0 1 2 3 4 5 6 7
- 2nd vec: 8 9 10 11 12 13 14 15
- 3rd vec: 16 17 18 19 20 21 22 23
- 4th vec: 24 25 26 27 28 29 30 31
-
- The output sequence should be:
-
- 1st vec: 0 8 16 24 1 9 17 25
- 2nd vec: 2 10 18 26 3 11 19 27
- 3rd vec: 4 12 20 28 5 13 21 30
- 4th vec: 6 14 22 30 7 15 23 31
-
- i.e., we interleave the contents of the four vectors in their order.
-
- We use interleave_high/low instructions to create such output. The input of
- each interleave_high/low operation is two vectors:
- 1st vec 2nd vec
- 0 1 2 3 4 5 6 7
- the even elements of the result vector are obtained left-to-right from the
- high/low elements of the first vector. The odd elements of the result are
- obtained left-to-right from the high/low elements of the second vector.
- The output of interleave_high will be: 0 4 1 5
- and of interleave_low: 2 6 3 7
-
-
- The permutation is done in log LENGTH stages. In each stage interleave_high
- and interleave_low stmts are created for each pair of vectors in DR_CHAIN,
- where the first argument is taken from the first half of DR_CHAIN and the
- second argument from it's second half.
- In our example,
-
- I1: interleave_high (1st vec, 3rd vec)
- I2: interleave_low (1st vec, 3rd vec)
- I3: interleave_high (2nd vec, 4th vec)
- I4: interleave_low (2nd vec, 4th vec)
-
- The output for the first stage is:
-
- I1: 0 16 1 17 2 18 3 19
- I2: 4 20 5 21 6 22 7 23
- I3: 8 24 9 25 10 26 11 27
- I4: 12 28 13 29 14 30 15 31
-
- The output of the second stage, i.e. the final result is:
-
- I1: 0 8 16 24 1 9 17 25
- I2: 2 10 18 26 3 11 19 27
- I3: 4 12 20 28 5 13 21 30
- I4: 6 14 22 30 7 15 23 31. */
-
-void
-vect_permute_store_chain (vec_info *vinfo, vec<tree> &dr_chain,
- unsigned int length,
- stmt_vec_info stmt_info,
- gimple_stmt_iterator *gsi,
- vec<tree> *result_chain)
-{
- tree vect1, vect2, high, low;
- gimple *perm_stmt;
- tree vectype = STMT_VINFO_VECTYPE (stmt_info);
- tree perm_mask_low, perm_mask_high;
- tree data_ref;
- tree perm3_mask_low, perm3_mask_high;
- unsigned int i, j, n, log_length = exact_log2 (length);
-
- result_chain->quick_grow (length);
- memcpy (result_chain->address (), dr_chain.address (),
- length * sizeof (tree));
-
- if (length == 3)
- {
- /* vect_grouped_store_supported ensures that this is constant. */
- unsigned int nelt = TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
- unsigned int j0 = 0, j1 = 0, j2 = 0;
-
- vec_perm_builder sel (nelt, nelt, 1);
- sel.quick_grow (nelt);
- vec_perm_indices indices;
- for (j = 0; j < 3; j++)
- {
- int nelt0 = ((3 - j) * nelt) % 3;
- int nelt1 = ((3 - j) * nelt + 1) % 3;
- int nelt2 = ((3 - j) * nelt + 2) % 3;
-
- for (i = 0; i < nelt; i++)
- {
- if (3 * i + nelt0 < nelt)
- sel[3 * i + nelt0] = j0++;
- if (3 * i + nelt1 < nelt)
- sel[3 * i + nelt1] = nelt + j1++;
- if (3 * i + nelt2 < nelt)
- sel[3 * i + nelt2] = 0;
- }
- indices.new_vector (sel, 2, nelt);
- perm3_mask_low = vect_gen_perm_mask_checked (vectype, indices);
-
- for (i = 0; i < nelt; i++)
- {
- if (3 * i + nelt0 < nelt)
- sel[3 * i + nelt0] = 3 * i + nelt0;
- if (3 * i + nelt1 < nelt)
- sel[3 * i + nelt1] = 3 * i + nelt1;
- if (3 * i + nelt2 < nelt)
- sel[3 * i + nelt2] = nelt + j2++;
- }
- indices.new_vector (sel, 2, nelt);
- perm3_mask_high = vect_gen_perm_mask_checked (vectype, indices);
-
- vect1 = dr_chain[0];
- vect2 = dr_chain[1];
-
- /* Create interleaving stmt:
- low = VEC_PERM_EXPR <vect1, vect2,
- {j, nelt, *, j + 1, nelt + j + 1, *,
- j + 2, nelt + j + 2, *, ...}> */
- data_ref = make_temp_ssa_name (vectype, NULL, "vect_shuffle3_low");
- perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, vect1,
- vect2, perm3_mask_low);
- vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
-
- vect1 = data_ref;
- vect2 = dr_chain[2];
- /* Create interleaving stmt:
- low = VEC_PERM_EXPR <vect1, vect2,
- {0, 1, nelt + j, 3, 4, nelt + j + 1,
- 6, 7, nelt + j + 2, ...}> */
- data_ref = make_temp_ssa_name (vectype, NULL, "vect_shuffle3_high");
- perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, vect1,
- vect2, perm3_mask_high);
- vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
- (*result_chain)[j] = data_ref;
- }
- }
- else
- {
- /* If length is not equal to 3 then only power of 2 is supported. */
- gcc_assert (pow2p_hwi (length));
-
- /* The encoding has 2 interleaved stepped patterns. */
- poly_uint64 nelt = TYPE_VECTOR_SUBPARTS (vectype);
- vec_perm_builder sel (nelt, 2, 3);
- sel.quick_grow (6);
- for (i = 0; i < 3; i++)
- {
- sel[i * 2] = i;
- sel[i * 2 + 1] = i + nelt;
- }
- vec_perm_indices indices (sel, 2, nelt);
- perm_mask_high = vect_gen_perm_mask_checked (vectype, indices);
-
- for (i = 0; i < 6; i++)
- sel[i] += exact_div (nelt, 2);
- indices.new_vector (sel, 2, nelt);
- perm_mask_low = vect_gen_perm_mask_checked (vectype, indices);
-
- for (i = 0, n = log_length; i < n; i++)
- {
- for (j = 0; j < length/2; j++)
- {
- vect1 = dr_chain[j];
- vect2 = dr_chain[j+length/2];
-
- /* Create interleaving stmt:
- high = VEC_PERM_EXPR <vect1, vect2, {0, nelt, 1, nelt+1,
- ...}> */
- high = make_temp_ssa_name (vectype, NULL, "vect_inter_high");
- perm_stmt = gimple_build_assign (high, VEC_PERM_EXPR, vect1,
- vect2, perm_mask_high);
- vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
- (*result_chain)[2*j] = high;
-
- /* Create interleaving stmt:
- low = VEC_PERM_EXPR <vect1, vect2,
- {nelt/2, nelt*3/2, nelt/2+1, nelt*3/2+1,
- ...}> */
- low = make_temp_ssa_name (vectype, NULL, "vect_inter_low");
- perm_stmt = gimple_build_assign (low, VEC_PERM_EXPR, vect1,
- vect2, perm_mask_low);
- vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
- (*result_chain)[2*j+1] = low;
- }
- memcpy (dr_chain.address (), result_chain->address (),
- length * sizeof (tree));
- }
- }
-}
-
/* Function vect_setup_realignment
This function is called when vectorizing an unaligned load using