]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/matrix-reorg.c
tree-vect-loop-manip.c (vect_can_advance_ivs_p): Query is_gimple_reg on the SSA name...
[thirdparty/gcc.git] / gcc / matrix-reorg.c
1 /* Matrix layout transformations.
2 Copyright (C) 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
3 Contributed by Razya Ladelsky <razya@il.ibm.com>
4 Originally written by Revital Eres and Mustafa Hagog.
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 /*
23 Matrix flattening optimization tries to replace a N-dimensional
24 matrix with its equivalent M-dimensional matrix, where M < N.
25 This first implementation focuses on global matrices defined dynamically.
26
27 When N==1, we actually flatten the whole matrix.
28 For instance consider a two-dimensional array a [dim1] [dim2].
29 The code for allocating space for it usually looks like:
30
31 a = (int **) malloc(dim1 * sizeof(int *));
32 for (i=0; i<dim1; i++)
33 a[i] = (int *) malloc (dim2 * sizeof(int));
34
35 If the array "a" is found suitable for this optimization,
36 its allocation is replaced by:
37
38 a = (int *) malloc (dim1 * dim2 *sizeof(int));
39
40 and all the references to a[i][j] are replaced by a[i * dim2 + j].
41
42 The two main phases of the optimization are the analysis
43 and transformation.
44 The driver of the optimization is matrix_reorg ().
45
46
47
48 Analysis phase:
49 ===============
50
51 We'll number the dimensions outside-in, meaning the most external
52 is 0, then 1, and so on.
53 The analysis part of the optimization determines K, the escape
54 level of a N-dimensional matrix (K <= N), that allows flattening of
55 the external dimensions 0,1,..., K-1. Escape level 0 means that the
56 whole matrix escapes and no flattening is possible.
57
58 The analysis part is implemented in analyze_matrix_allocation_site()
59 and analyze_matrix_accesses().
60
61 Transformation phase:
62 =====================
63 In this phase we define the new flattened matrices that replace the
64 original matrices in the code.
65 Implemented in transform_allocation_sites(),
66 transform_access_sites().
67
68 Matrix Transposing
69 ==================
70 The idea of Matrix Transposing is organizing the matrix in a different
71 layout such that the dimensions are reordered.
72 This could produce better cache behavior in some cases.
73
74 For example, lets look at the matrix accesses in the following loop:
75
76 for (i=0; i<N; i++)
77 for (j=0; j<M; j++)
78 access to a[i][j]
79
80 This loop can produce good cache behavior because the elements of
81 the inner dimension are accessed sequentially.
82
83 However, if the accesses of the matrix were of the following form:
84
85 for (i=0; i<N; i++)
86 for (j=0; j<M; j++)
87 access to a[j][i]
88
89 In this loop we iterate the columns and not the rows.
90 Therefore, replacing the rows and columns
91 would have had an organization with better (cache) locality.
92 Replacing the dimensions of the matrix is called matrix transposing.
93
94 This example, of course, could be enhanced to multiple dimensions matrices
95 as well.
96
97 Since a program could include all kind of accesses, there is a decision
98 mechanism, implemented in analyze_transpose(), which implements a
99 heuristic that tries to determine whether to transpose the matrix or not,
100 according to the form of the more dominant accesses.
101 This decision is transferred to the flattening mechanism, and whether
102 the matrix was transposed or not, the matrix is flattened (if possible).
103
104 This decision making is based on profiling information and loop information.
105 If profiling information is available, decision making mechanism will be
106 operated, otherwise the matrix will only be flattened (if possible).
107
108 Both optimizations are described in the paper "Matrix flattening and
109 transposing in GCC" which was presented in GCC summit 2006.
110 http://www.gccsummit.org/2006/2006-GCC-Summit-Proceedings.pdf. */
111
112 #include "config.h"
113 #include "system.h"
114 #include "coretypes.h"
115 #include "tm.h"
116 #include "tree.h"
117 #include "rtl.h"
118 #include "tree-inline.h"
119 #include "tree-flow.h"
120 #include "tree-flow-inline.h"
121 #include "langhooks.h"
122 #include "hashtab.h"
123 #include "flags.h"
124 #include "ggc.h"
125 #include "debug.h"
126 #include "target.h"
127 #include "cgraph.h"
128 #include "diagnostic-core.h"
129 #include "params.h"
130 #include "intl.h"
131 #include "function.h"
132 #include "basic-block.h"
133 #include "cfgloop.h"
134 #include "tree-iterator.h"
135 #include "tree-pass.h"
136 #include "opts.h"
137 #include "tree-data-ref.h"
138 #include "tree-chrec.h"
139 #include "tree-scalar-evolution.h"
140 #include "tree-ssa-sccvn.h"
141
142 /* We need to collect a lot of data from the original malloc,
143 particularly as the gimplifier has converted:
144
145 orig_var = (struct_type *) malloc (x * sizeof (struct_type *));
146
147 into
148
149 T3 = <constant> ; ** <constant> is amount to malloc; precomputed **
150 T4 = malloc (T3);
151 T5 = (struct_type *) T4;
152 orig_var = T5;
153
154 The following struct fields allow us to collect all the necessary data from
155 the gimplified program. The comments in the struct below are all based
156 on the gimple example above. */
157
158 struct malloc_call_data
159 {
160 gimple call_stmt; /* Tree for "T4 = malloc (T3);" */
161 tree size_var; /* Var decl for T3. */
162 tree malloc_size; /* Tree for "<constant>", the rhs assigned to T3. */
163 };
164
165 static tree can_calculate_expr_before_stmt (tree, sbitmap);
166 static tree can_calculate_stmt_before_stmt (gimple, sbitmap);
167
168 /* The front end of the compiler, when parsing statements of the form:
169
170 var = (type_cast) malloc (sizeof (type));
171
172 always converts this single statement into the following statements
173 (GIMPLE form):
174
175 T.1 = sizeof (type);
176 T.2 = malloc (T.1);
177 T.3 = (type_cast) T.2;
178 var = T.3;
179
180 Since we need to create new malloc statements and modify the original
181 statements somewhat, we need to find all four of the above statements.
182 Currently record_call_1 (called for building cgraph edges) finds and
183 records the statements containing the actual call to malloc, but we
184 need to find the rest of the variables/statements on our own. That
185 is what the following function does. */
186 static void
187 collect_data_for_malloc_call (gimple stmt, struct malloc_call_data *m_data)
188 {
189 tree size_var = NULL;
190 tree malloc_fn_decl;
191 tree arg1;
192
193 gcc_assert (is_gimple_call (stmt));
194
195 malloc_fn_decl = gimple_call_fndecl (stmt);
196 if (malloc_fn_decl == NULL
197 || DECL_FUNCTION_CODE (malloc_fn_decl) != BUILT_IN_MALLOC)
198 return;
199
200 arg1 = gimple_call_arg (stmt, 0);
201 size_var = arg1;
202
203 m_data->call_stmt = stmt;
204 m_data->size_var = size_var;
205 if (TREE_CODE (size_var) != VAR_DECL)
206 m_data->malloc_size = size_var;
207 else
208 m_data->malloc_size = NULL_TREE;
209 }
210
211 /* Information about matrix access site.
212 For example: if an access site of matrix arr is arr[i][j]
213 the ACCESS_SITE_INFO structure will have the address
214 of arr as its stmt. The INDEX_INFO will hold information about the
215 initial address and index of each dimension. */
216 struct access_site_info
217 {
218 /* The statement (MEM_REF or POINTER_PLUS_EXPR). */
219 gimple stmt;
220
221 /* In case of POINTER_PLUS_EXPR, what is the offset. */
222 tree offset;
223
224 /* The index which created the offset. */
225 tree index;
226
227 /* The indirection level of this statement. */
228 int level;
229
230 /* TRUE for allocation site FALSE for access site. */
231 bool is_alloc;
232
233 /* The function containing the access site. */
234 tree function_decl;
235
236 /* This access is iterated in the inner most loop */
237 bool iterated_by_inner_most_loop_p;
238 };
239
240 typedef struct access_site_info *access_site_info_p;
241 DEF_VEC_P (access_site_info_p);
242 DEF_VEC_ALLOC_P (access_site_info_p, heap);
243
244 /* Calls to free when flattening a matrix. */
245
246 struct free_info
247 {
248 gimple stmt;
249 tree func;
250 };
251
252 /* Information about matrix to flatten. */
253 struct matrix_info
254 {
255 /* Decl tree of this matrix. */
256 tree decl;
257 /* Number of dimensions; number
258 of "*" in the type declaration. */
259 int num_dims;
260
261 /* Minimum indirection level that escapes, 0 means that
262 the whole matrix escapes, k means that dimensions
263 0 to ACTUAL_DIM - k escapes. */
264 int min_indirect_level_escape;
265
266 gimple min_indirect_level_escape_stmt;
267
268 /* Hold the allocation site for each level (dimension).
269 We can use NUM_DIMS as the upper bound and allocate the array
270 once with this number of elements and no need to use realloc and
271 MAX_MALLOCED_LEVEL. */
272 gimple *malloc_for_level;
273
274 int max_malloced_level;
275
276 /* Is the matrix transposed. */
277 bool is_transposed_p;
278
279 /* The location of the allocation sites (they must be in one
280 function). */
281 tree allocation_function_decl;
282
283 /* The calls to free for each level of indirection. */
284 struct free_info *free_stmts;
285
286 /* An array which holds for each dimension its size. where
287 dimension 0 is the outer most (one that contains all the others).
288 */
289 tree *dimension_size;
290
291 /* An array which holds for each dimension it's original size
292 (before transposing and flattening take place). */
293 tree *dimension_size_orig;
294
295 /* An array which holds for each dimension the size of the type of
296 of elements accessed in that level (in bytes). */
297 HOST_WIDE_INT *dimension_type_size;
298
299 int dimension_type_size_len;
300
301 /* An array collecting the count of accesses for each dimension. */
302 gcov_type *dim_hot_level;
303
304 /* An array of the accesses to be flattened.
305 elements are of type "struct access_site_info *". */
306 VEC (access_site_info_p, heap) * access_l;
307
308 /* A map of how the dimensions will be organized at the end of
309 the analyses. */
310 int *dim_map;
311 };
312
313 /* In each phi node we want to record the indirection level we have when we
314 get to the phi node. Usually we will have phi nodes with more than two
315 arguments, then we must assure that all of them get to the phi node with
316 the same indirection level, otherwise it's not safe to do the flattening.
317 So we record the information regarding the indirection level each time we
318 get to the phi node in this hash table. */
319
320 struct matrix_access_phi_node
321 {
322 gimple phi;
323 int indirection_level;
324 };
325
326 /* We use this structure to find if the SSA variable is accessed inside the
327 tree and record the tree containing it. */
328
329 struct ssa_acc_in_tree
330 {
331 /* The variable whose accesses in the tree we are looking for. */
332 tree ssa_var;
333 /* The tree and code inside it the ssa_var is accessed, currently
334 it could be an MEM_REF or CALL_EXPR. */
335 enum tree_code t_code;
336 tree t_tree;
337 /* The place in the containing tree. */
338 tree *tp;
339 tree second_op;
340 bool var_found;
341 };
342
343 static void analyze_matrix_accesses (struct matrix_info *, tree, int, bool,
344 sbitmap, bool);
345 static int transform_allocation_sites (void **, void *);
346 static int transform_access_sites (void **, void *);
347 static int analyze_transpose (void **, void *);
348 static int dump_matrix_reorg_analysis (void **, void *);
349
350 static bool check_transpose_p;
351
352 /* Hash function used for the phi nodes. */
353
354 static hashval_t
355 mat_acc_phi_hash (const void *p)
356 {
357 const struct matrix_access_phi_node *const ma_phi =
358 (const struct matrix_access_phi_node *) p;
359
360 return htab_hash_pointer (ma_phi->phi);
361 }
362
363 /* Equality means phi node pointers are the same. */
364
365 static int
366 mat_acc_phi_eq (const void *p1, const void *p2)
367 {
368 const struct matrix_access_phi_node *const phi1 =
369 (const struct matrix_access_phi_node *) p1;
370 const struct matrix_access_phi_node *const phi2 =
371 (const struct matrix_access_phi_node *) p2;
372
373 if (phi1->phi == phi2->phi)
374 return 1;
375
376 return 0;
377 }
378
379 /* Hold the PHI nodes we visit during the traversal for escaping
380 analysis. */
381 static htab_t htab_mat_acc_phi_nodes = NULL;
382
383 /* This hash-table holds the information about the matrices we are
384 going to handle. */
385 static htab_t matrices_to_reorg = NULL;
386
387 /* Return a hash for MTT, which is really a "matrix_info *". */
388 static hashval_t
389 mtt_info_hash (const void *mtt)
390 {
391 return htab_hash_pointer (((const struct matrix_info *) mtt)->decl);
392 }
393
394 /* Return true if MTT1 and MTT2 (which are really both of type
395 "matrix_info *") refer to the same decl. */
396 static int
397 mtt_info_eq (const void *mtt1, const void *mtt2)
398 {
399 const struct matrix_info *const i1 = (const struct matrix_info *) mtt1;
400 const struct matrix_info *const i2 = (const struct matrix_info *) mtt2;
401
402 if (i1->decl == i2->decl)
403 return true;
404
405 return false;
406 }
407
408 /* Return false if STMT may contain a vector expression.
409 In this situation, all matrices should not be flattened. */
410 static bool
411 may_flatten_matrices_1 (gimple stmt)
412 {
413 switch (gimple_code (stmt))
414 {
415 case GIMPLE_ASSIGN:
416 case GIMPLE_CALL:
417 if (!gimple_has_lhs (stmt))
418 return true;
419 if (TREE_CODE (TREE_TYPE (gimple_get_lhs (stmt))) == VECTOR_TYPE)
420 {
421 if (dump_file)
422 fprintf (dump_file,
423 "Found vector type, don't flatten matrix\n");
424 return false;
425 }
426 break;
427 case GIMPLE_ASM:
428 /* Asm code could contain vector operations. */
429 return false;
430 break;
431 default:
432 break;
433 }
434 return true;
435 }
436
437 /* Return false if there are hand-written vectors in the program.
438 We disable the flattening in such a case. */
439 static bool
440 may_flatten_matrices (struct cgraph_node *node)
441 {
442 tree decl;
443 struct function *func;
444 basic_block bb;
445 gimple_stmt_iterator gsi;
446
447 decl = node->symbol.decl;
448 if (node->analyzed)
449 {
450 func = DECL_STRUCT_FUNCTION (decl);
451 FOR_EACH_BB_FN (bb, func)
452 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
453 if (!may_flatten_matrices_1 (gsi_stmt (gsi)))
454 return false;
455 }
456 return true;
457 }
458
459 /* Given a VAR_DECL, check its type to determine whether it is
460 a definition of a dynamic allocated matrix and therefore is
461 a suitable candidate for the matrix flattening optimization.
462 Return NULL if VAR_DECL is not such decl. Otherwise, allocate
463 a MATRIX_INFO structure, fill it with the relevant information
464 and return a pointer to it.
465 TODO: handle also statically defined arrays. */
466 static struct matrix_info *
467 analyze_matrix_decl (tree var_decl)
468 {
469 struct matrix_info *m_node, tmpmi, *mi;
470 tree var_type;
471 int dim_num = 0;
472
473 gcc_assert (matrices_to_reorg);
474
475 if (TREE_CODE (var_decl) == PARM_DECL)
476 var_type = DECL_ARG_TYPE (var_decl);
477 else if (TREE_CODE (var_decl) == VAR_DECL)
478 var_type = TREE_TYPE (var_decl);
479 else
480 return NULL;
481
482 if (!POINTER_TYPE_P (var_type))
483 return NULL;
484
485 while (POINTER_TYPE_P (var_type))
486 {
487 var_type = TREE_TYPE (var_type);
488 dim_num++;
489 }
490
491 if (dim_num <= 1)
492 return NULL;
493
494 if (!COMPLETE_TYPE_P (var_type)
495 || TREE_CODE (TYPE_SIZE_UNIT (var_type)) != INTEGER_CST)
496 return NULL;
497
498 /* Check to see if this pointer is already in there. */
499 tmpmi.decl = var_decl;
500 mi = (struct matrix_info *) htab_find (matrices_to_reorg, &tmpmi);
501
502 if (mi)
503 return NULL;
504
505 /* Record the matrix. */
506
507 m_node = (struct matrix_info *) xcalloc (1, sizeof (struct matrix_info));
508 m_node->decl = var_decl;
509 m_node->num_dims = dim_num;
510 m_node->free_stmts
511 = (struct free_info *) xcalloc (dim_num, sizeof (struct free_info));
512
513 /* Init min_indirect_level_escape to -1 to indicate that no escape
514 analysis has been done yet. */
515 m_node->min_indirect_level_escape = -1;
516 m_node->is_transposed_p = false;
517
518 return m_node;
519 }
520
521 /* Free matrix E. */
522 static void
523 mat_free (void *e)
524 {
525 struct matrix_info *mat = (struct matrix_info *) e;
526
527 if (!mat)
528 return;
529
530 free (mat->free_stmts);
531 free (mat->dim_hot_level);
532 free (mat->malloc_for_level);
533 }
534
535 /* Find all potential matrices.
536 TODO: currently we handle only multidimensional
537 dynamically allocated arrays. */
538 static void
539 find_matrices_decl (void)
540 {
541 struct matrix_info *tmp;
542 PTR *slot;
543 struct varpool_node *vnode;
544
545 gcc_assert (matrices_to_reorg);
546
547 /* For every global variable in the program:
548 Check to see if it's of a candidate type and record it. */
549 FOR_EACH_DEFINED_VARIABLE (vnode)
550 {
551 tree var_decl = vnode->symbol.decl;
552
553 if (!var_decl || TREE_CODE (var_decl) != VAR_DECL)
554 continue;
555
556 if (matrices_to_reorg)
557 if ((tmp = analyze_matrix_decl (var_decl)))
558 {
559 if (!TREE_ADDRESSABLE (var_decl))
560 {
561 slot = htab_find_slot (matrices_to_reorg, tmp, INSERT);
562 *slot = tmp;
563 }
564 }
565 }
566 return;
567 }
568
569 /* Mark that the matrix MI escapes at level L. */
570 static void
571 mark_min_matrix_escape_level (struct matrix_info *mi, int l, gimple s)
572 {
573 if (mi->min_indirect_level_escape == -1
574 || (mi->min_indirect_level_escape > l))
575 {
576 mi->min_indirect_level_escape = l;
577 mi->min_indirect_level_escape_stmt = s;
578 }
579 }
580
581 /* Find if the SSA variable is accessed inside the
582 tree and record the tree containing it.
583 The only relevant uses are the case of SSA_NAME, or SSA inside
584 MEM_REF, PLUS_EXPR, POINTER_PLUS_EXPR, MULT_EXPR. */
585 static void
586 ssa_accessed_in_tree (tree t, struct ssa_acc_in_tree *a)
587 {
588 a->t_code = TREE_CODE (t);
589 switch (a->t_code)
590 {
591 case SSA_NAME:
592 if (t == a->ssa_var)
593 a->var_found = true;
594 break;
595 case MEM_REF:
596 if (SSA_VAR_P (TREE_OPERAND (t, 0))
597 && TREE_OPERAND (t, 0) == a->ssa_var)
598 a->var_found = true;
599 break;
600 default:
601 break;
602 }
603 }
604
605 /* Find if the SSA variable is accessed on the right hand side of
606 gimple call STMT. */
607
608 static void
609 ssa_accessed_in_call_rhs (gimple stmt, struct ssa_acc_in_tree *a)
610 {
611 tree decl;
612 tree arg;
613 size_t i;
614
615 a->t_code = CALL_EXPR;
616 for (i = 0; i < gimple_call_num_args (stmt); i++)
617 {
618 arg = gimple_call_arg (stmt, i);
619 if (arg == a->ssa_var)
620 {
621 a->var_found = true;
622 decl = gimple_call_fndecl (stmt);
623 a->t_tree = decl;
624 break;
625 }
626 }
627 }
628
629 /* Find if the SSA variable is accessed on the right hand side of
630 gimple assign STMT. */
631
632 static void
633 ssa_accessed_in_assign_rhs (gimple stmt, struct ssa_acc_in_tree *a)
634 {
635
636 a->t_code = gimple_assign_rhs_code (stmt);
637 switch (a->t_code)
638 {
639 tree op1, op2;
640
641 case SSA_NAME:
642 case MEM_REF:
643 CASE_CONVERT:
644 case VIEW_CONVERT_EXPR:
645 ssa_accessed_in_tree (gimple_assign_rhs1 (stmt), a);
646 break;
647 case POINTER_PLUS_EXPR:
648 case PLUS_EXPR:
649 case MULT_EXPR:
650 op1 = gimple_assign_rhs1 (stmt);
651 op2 = gimple_assign_rhs2 (stmt);
652
653 if (op1 == a->ssa_var)
654 {
655 a->var_found = true;
656 a->second_op = op2;
657 }
658 else if (op2 == a->ssa_var)
659 {
660 a->var_found = true;
661 a->second_op = op1;
662 }
663 break;
664 default:
665 break;
666 }
667 }
668
669 /* Record the access/allocation site information for matrix MI so we can
670 handle it later in transformation. */
671 static void
672 record_access_alloc_site_info (struct matrix_info *mi, gimple stmt, tree offset,
673 tree index, int level, bool is_alloc)
674 {
675 struct access_site_info *acc_info;
676
677 if (!mi->access_l)
678 mi->access_l = VEC_alloc (access_site_info_p, heap, 100);
679
680 acc_info
681 = (struct access_site_info *)
682 xcalloc (1, sizeof (struct access_site_info));
683 acc_info->stmt = stmt;
684 acc_info->offset = offset;
685 acc_info->index = index;
686 acc_info->function_decl = current_function_decl;
687 acc_info->level = level;
688 acc_info->is_alloc = is_alloc;
689
690 VEC_safe_push (access_site_info_p, heap, mi->access_l, acc_info);
691
692 }
693
694 /* Record the malloc as the allocation site of the given LEVEL. But
695 first we Make sure that all the size parameters passed to malloc in
696 all the allocation sites could be pre-calculated before the call to
697 the malloc of level 0 (the main malloc call). */
698 static void
699 add_allocation_site (struct matrix_info *mi, gimple stmt, int level)
700 {
701 struct malloc_call_data mcd;
702
703 /* Make sure that the allocation sites are in the same function. */
704 if (!mi->allocation_function_decl)
705 mi->allocation_function_decl = current_function_decl;
706 else if (mi->allocation_function_decl != current_function_decl)
707 {
708 int min_malloc_level;
709
710 gcc_assert (mi->malloc_for_level);
711
712 /* Find the minimum malloc level that already has been seen;
713 we known its allocation function must be
714 MI->allocation_function_decl since it's different than
715 CURRENT_FUNCTION_DECL then the escaping level should be
716 MIN (LEVEL, MIN_MALLOC_LEVEL) - 1 , and the allocation function
717 must be set accordingly. */
718 for (min_malloc_level = 0;
719 min_malloc_level < mi->max_malloced_level
720 && mi->malloc_for_level[min_malloc_level]; min_malloc_level++)
721 ;
722 if (level < min_malloc_level)
723 {
724 mi->allocation_function_decl = current_function_decl;
725 mark_min_matrix_escape_level (mi, min_malloc_level, stmt);
726 }
727 else
728 {
729 mark_min_matrix_escape_level (mi, level, stmt);
730 /* cannot be that (level == min_malloc_level)
731 we would have returned earlier. */
732 return;
733 }
734 }
735
736 /* Find the correct malloc information. */
737 collect_data_for_malloc_call (stmt, &mcd);
738
739 /* We accept only calls to malloc function; we do not accept
740 calls like calloc and realloc. */
741 if (!mi->malloc_for_level)
742 {
743 mi->malloc_for_level = XCNEWVEC (gimple, level + 1);
744 mi->max_malloced_level = level + 1;
745 }
746 else if (mi->max_malloced_level <= level)
747 {
748 mi->malloc_for_level
749 = XRESIZEVEC (gimple, mi->malloc_for_level, level + 1);
750
751 /* Zero the newly allocated items. */
752 memset (&(mi->malloc_for_level[mi->max_malloced_level + 1]),
753 0, (level - mi->max_malloced_level) * sizeof (tree));
754
755 mi->max_malloced_level = level + 1;
756 }
757 mi->malloc_for_level[level] = stmt;
758 }
759
760 /* Given an assignment statement STMT that we know that its
761 left-hand-side is the matrix MI variable, we traverse the immediate
762 uses backwards until we get to a malloc site. We make sure that
763 there is one and only one malloc site that sets this variable. When
764 we are performing the flattening we generate a new variable that
765 will hold the size for each dimension; each malloc that allocates a
766 dimension has the size parameter; we use that parameter to
767 initialize the dimension size variable so we can use it later in
768 the address calculations. LEVEL is the dimension we're inspecting.
769 Return if STMT is related to an allocation site. */
770
771 static void
772 analyze_matrix_allocation_site (struct matrix_info *mi, gimple stmt,
773 int level, sbitmap visited)
774 {
775 if (gimple_assign_copy_p (stmt) || gimple_assign_cast_p (stmt))
776 {
777 tree rhs = gimple_assign_rhs1 (stmt);
778
779 if (TREE_CODE (rhs) == SSA_NAME)
780 {
781 gimple def = SSA_NAME_DEF_STMT (rhs);
782
783 analyze_matrix_allocation_site (mi, def, level, visited);
784 return;
785 }
786 /* If we are back to the original matrix variable then we
787 are sure that this is analyzed as an access site. */
788 else if (rhs == mi->decl)
789 return;
790 }
791 /* A result of call to malloc. */
792 else if (is_gimple_call (stmt))
793 {
794 int call_flags = gimple_call_flags (stmt);
795
796 if (!(call_flags & ECF_MALLOC))
797 {
798 mark_min_matrix_escape_level (mi, level, stmt);
799 return;
800 }
801 else
802 {
803 tree malloc_fn_decl;
804
805 malloc_fn_decl = gimple_call_fndecl (stmt);
806 if (malloc_fn_decl == NULL_TREE)
807 {
808 mark_min_matrix_escape_level (mi, level, stmt);
809 return;
810 }
811 if (DECL_FUNCTION_CODE (malloc_fn_decl) != BUILT_IN_MALLOC)
812 {
813 if (dump_file)
814 fprintf (dump_file,
815 "Matrix %s is an argument to function %s\n",
816 get_name (mi->decl), get_name (malloc_fn_decl));
817 mark_min_matrix_escape_level (mi, level, stmt);
818 return;
819 }
820 }
821 /* This is a call to malloc of level 'level'.
822 mi->max_malloced_level-1 == level means that we've
823 seen a malloc statement of level 'level' before.
824 If the statement is not the same one that we've
825 seen before, then there's another malloc statement
826 for the same level, which means that we need to mark
827 it escaping. */
828 if (mi->malloc_for_level
829 && mi->max_malloced_level-1 == level
830 && mi->malloc_for_level[level] != stmt)
831 {
832 mark_min_matrix_escape_level (mi, level, stmt);
833 return;
834 }
835 else
836 add_allocation_site (mi, stmt, level);
837 return;
838 }
839 /* Looks like we don't know what is happening in this
840 statement so be in the safe side and mark it as escaping. */
841 mark_min_matrix_escape_level (mi, level, stmt);
842 }
843
844 /* The transposing decision making.
845 In order to calculate the profitability of transposing, we collect two
846 types of information regarding the accesses:
847 1. profiling information used to express the hotness of an access, that
848 is how often the matrix is accessed by this access site (count of the
849 access site).
850 2. which dimension in the access site is iterated by the inner
851 most loop containing this access.
852
853 The matrix will have a calculated value of weighted hotness for each
854 dimension.
855 Intuitively the hotness level of a dimension is a function of how
856 many times it was the most frequently accessed dimension in the
857 highly executed access sites of this matrix.
858
859 As computed by following equation:
860 m n
861 __ __
862 \ \ dim_hot_level[i] +=
863 /_ /_
864 j i
865 acc[j]->dim[i]->iter_by_inner_loop * count(j)
866
867 Where n is the number of dims and m is the number of the matrix
868 access sites. acc[j]->dim[i]->iter_by_inner_loop is 1 if acc[j]
869 iterates over dim[i] in innermost loop, and is 0 otherwise.
870
871 The organization of the new matrix should be according to the
872 hotness of each dimension. The hotness of the dimension implies
873 the locality of the elements.*/
874 static int
875 analyze_transpose (void **slot, void *data ATTRIBUTE_UNUSED)
876 {
877 struct matrix_info *mi = (struct matrix_info *) *slot;
878 int min_escape_l = mi->min_indirect_level_escape;
879 struct loop *loop;
880 affine_iv iv;
881 struct access_site_info *acc_info;
882 int i;
883
884 if (min_escape_l < 2 || !mi->access_l)
885 {
886 if (mi->access_l)
887 {
888 FOR_EACH_VEC_ELT (access_site_info_p, mi->access_l, i, acc_info)
889 free (acc_info);
890 VEC_free (access_site_info_p, heap, mi->access_l);
891
892 }
893 return 1;
894 }
895 if (!mi->dim_hot_level)
896 mi->dim_hot_level =
897 (gcov_type *) xcalloc (min_escape_l, sizeof (gcov_type));
898
899
900 for (i = 0; VEC_iterate (access_site_info_p, mi->access_l, i, acc_info);
901 i++)
902 {
903 if (gimple_assign_rhs_code (acc_info->stmt) == POINTER_PLUS_EXPR
904 && acc_info->level < min_escape_l)
905 {
906 loop = loop_containing_stmt (acc_info->stmt);
907 if (!loop || loop->inner)
908 {
909 free (acc_info);
910 continue;
911 }
912 if (simple_iv (loop, loop, acc_info->offset, &iv, true))
913 {
914 if (iv.step != NULL)
915 {
916 HOST_WIDE_INT istep;
917
918 istep = int_cst_value (iv.step);
919 if (istep != 0)
920 {
921 acc_info->iterated_by_inner_most_loop_p = 1;
922 mi->dim_hot_level[acc_info->level] +=
923 gimple_bb (acc_info->stmt)->count;
924 }
925
926 }
927 }
928 }
929 free (acc_info);
930 }
931 VEC_free (access_site_info_p, heap, mi->access_l);
932
933 return 1;
934 }
935
936 /* Find the index which defines the OFFSET from base.
937 We walk from use to def until we find how the offset was defined. */
938 static tree
939 get_index_from_offset (tree offset, gimple def_stmt)
940 {
941 tree op1, op2, index;
942
943 if (gimple_code (def_stmt) == GIMPLE_PHI)
944 return NULL;
945 if ((gimple_assign_copy_p (def_stmt) || gimple_assign_cast_p (def_stmt))
946 && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == SSA_NAME)
947 return get_index_from_offset (offset,
948 SSA_NAME_DEF_STMT (gimple_assign_rhs1 (def_stmt)));
949 else if (is_gimple_assign (def_stmt)
950 && gimple_assign_rhs_code (def_stmt) == MULT_EXPR)
951 {
952 op1 = gimple_assign_rhs1 (def_stmt);
953 op2 = gimple_assign_rhs2 (def_stmt);
954 if (TREE_CODE (op1) != INTEGER_CST && TREE_CODE (op2) != INTEGER_CST)
955 return NULL;
956 index = (TREE_CODE (op1) == INTEGER_CST) ? op2 : op1;
957 return index;
958 }
959 else
960 return NULL_TREE;
961 }
962
963 /* update MI->dimension_type_size[CURRENT_INDIRECT_LEVEL] with the size
964 of the type related to the SSA_VAR, or the type related to the
965 lhs of STMT, in the case that it is an MEM_REF. */
966 static void
967 update_type_size (struct matrix_info *mi, gimple stmt, tree ssa_var,
968 int current_indirect_level)
969 {
970 tree lhs;
971 HOST_WIDE_INT type_size;
972
973 /* Update type according to the type of the MEM_REF expr. */
974 if (is_gimple_assign (stmt)
975 && TREE_CODE (gimple_assign_lhs (stmt)) == MEM_REF)
976 {
977 lhs = gimple_assign_lhs (stmt);
978 gcc_assert (POINTER_TYPE_P
979 (TREE_TYPE (TREE_OPERAND (lhs, 0))));
980 type_size =
981 int_size_in_bytes (TREE_TYPE
982 (TREE_TYPE (TREE_OPERAND (lhs, 0))));
983 }
984 else
985 type_size = int_size_in_bytes (TREE_TYPE (ssa_var));
986
987 /* Record the size of elements accessed (as a whole)
988 in the current indirection level (dimension). If the size of
989 elements is not known at compile time, mark it as escaping. */
990 if (type_size <= 0)
991 mark_min_matrix_escape_level (mi, current_indirect_level, stmt);
992 else
993 {
994 int l = current_indirect_level;
995
996 if (!mi->dimension_type_size)
997 {
998 mi->dimension_type_size
999 = (HOST_WIDE_INT *) xcalloc (l + 1, sizeof (HOST_WIDE_INT));
1000 mi->dimension_type_size_len = l + 1;
1001 }
1002 else if (mi->dimension_type_size_len < l + 1)
1003 {
1004 mi->dimension_type_size
1005 = (HOST_WIDE_INT *) xrealloc (mi->dimension_type_size,
1006 (l + 1) * sizeof (HOST_WIDE_INT));
1007 memset (&mi->dimension_type_size[mi->dimension_type_size_len],
1008 0, (l + 1 - mi->dimension_type_size_len)
1009 * sizeof (HOST_WIDE_INT));
1010 mi->dimension_type_size_len = l + 1;
1011 }
1012 /* Make sure all the accesses in the same level have the same size
1013 of the type. */
1014 if (!mi->dimension_type_size[l])
1015 mi->dimension_type_size[l] = type_size;
1016 else if (mi->dimension_type_size[l] != type_size)
1017 mark_min_matrix_escape_level (mi, l, stmt);
1018 }
1019 }
1020
1021 /* USE_STMT represents a GIMPLE_CALL, where one of the arguments is the
1022 ssa var that we want to check because it came from some use of matrix
1023 MI. CURRENT_INDIRECT_LEVEL is the indirection level we reached so
1024 far. */
1025
1026 static int
1027 analyze_accesses_for_call_stmt (struct matrix_info *mi, tree ssa_var,
1028 gimple use_stmt, int current_indirect_level)
1029 {
1030 tree fndecl = gimple_call_fndecl (use_stmt);
1031
1032 if (gimple_call_lhs (use_stmt))
1033 {
1034 tree lhs = gimple_call_lhs (use_stmt);
1035 struct ssa_acc_in_tree lhs_acc, rhs_acc;
1036
1037 memset (&lhs_acc, 0, sizeof (lhs_acc));
1038 memset (&rhs_acc, 0, sizeof (rhs_acc));
1039
1040 lhs_acc.ssa_var = ssa_var;
1041 lhs_acc.t_code = ERROR_MARK;
1042 ssa_accessed_in_tree (lhs, &lhs_acc);
1043 rhs_acc.ssa_var = ssa_var;
1044 rhs_acc.t_code = ERROR_MARK;
1045 ssa_accessed_in_call_rhs (use_stmt, &rhs_acc);
1046
1047 /* The SSA must be either in the left side or in the right side,
1048 to understand what is happening.
1049 In case the SSA_NAME is found in both sides we should be escaping
1050 at this level because in this case we cannot calculate the
1051 address correctly. */
1052 if ((lhs_acc.var_found && rhs_acc.var_found
1053 && lhs_acc.t_code == MEM_REF)
1054 || (!rhs_acc.var_found && !lhs_acc.var_found))
1055 {
1056 mark_min_matrix_escape_level (mi, current_indirect_level, use_stmt);
1057 return current_indirect_level;
1058 }
1059 gcc_assert (!rhs_acc.var_found || !lhs_acc.var_found);
1060
1061 /* If we are storing to the matrix at some level, then mark it as
1062 escaping at that level. */
1063 if (lhs_acc.var_found)
1064 {
1065 int l = current_indirect_level + 1;
1066
1067 gcc_assert (lhs_acc.t_code == MEM_REF);
1068 mark_min_matrix_escape_level (mi, l, use_stmt);
1069 return current_indirect_level;
1070 }
1071 }
1072
1073 if (fndecl)
1074 {
1075 if (DECL_FUNCTION_CODE (fndecl) != BUILT_IN_FREE)
1076 {
1077 if (dump_file)
1078 fprintf (dump_file,
1079 "Matrix %s: Function call %s, level %d escapes.\n",
1080 get_name (mi->decl), get_name (fndecl),
1081 current_indirect_level);
1082 mark_min_matrix_escape_level (mi, current_indirect_level, use_stmt);
1083 }
1084 else if (mi->free_stmts[current_indirect_level].stmt != NULL
1085 && mi->free_stmts[current_indirect_level].stmt != use_stmt)
1086 mark_min_matrix_escape_level (mi, current_indirect_level, use_stmt);
1087 else
1088 {
1089 /*Record the free statements so we can delete them
1090 later. */
1091 int l = current_indirect_level;
1092
1093 mi->free_stmts[l].stmt = use_stmt;
1094 mi->free_stmts[l].func = current_function_decl;
1095 }
1096 }
1097 return current_indirect_level;
1098 }
1099
1100 /* USE_STMT represents a phi node of the ssa var that we want to
1101 check because it came from some use of matrix
1102 MI.
1103 We check all the escaping levels that get to the PHI node
1104 and make sure they are all the same escaping;
1105 if not (which is rare) we let the escaping level be the
1106 minimum level that gets into that PHI because starting from
1107 that level we cannot expect the behavior of the indirections.
1108 CURRENT_INDIRECT_LEVEL is the indirection level we reached so far. */
1109
1110 static void
1111 analyze_accesses_for_phi_node (struct matrix_info *mi, gimple use_stmt,
1112 int current_indirect_level, sbitmap visited,
1113 bool record_accesses)
1114 {
1115
1116 struct matrix_access_phi_node tmp_maphi, *maphi, **pmaphi;
1117
1118 tmp_maphi.phi = use_stmt;
1119 if ((maphi = (struct matrix_access_phi_node *)
1120 htab_find (htab_mat_acc_phi_nodes, &tmp_maphi)))
1121 {
1122 if (maphi->indirection_level == current_indirect_level)
1123 return;
1124 else
1125 {
1126 int level = MIN (maphi->indirection_level,
1127 current_indirect_level);
1128 size_t j;
1129 gimple stmt = NULL;
1130
1131 maphi->indirection_level = level;
1132 for (j = 0; j < gimple_phi_num_args (use_stmt); j++)
1133 {
1134 tree def = PHI_ARG_DEF (use_stmt, j);
1135
1136 if (gimple_code (SSA_NAME_DEF_STMT (def)) != GIMPLE_PHI)
1137 stmt = SSA_NAME_DEF_STMT (def);
1138 }
1139 mark_min_matrix_escape_level (mi, level, stmt);
1140 }
1141 return;
1142 }
1143 maphi = (struct matrix_access_phi_node *)
1144 xcalloc (1, sizeof (struct matrix_access_phi_node));
1145 maphi->phi = use_stmt;
1146 maphi->indirection_level = current_indirect_level;
1147
1148 /* Insert to hash table. */
1149 pmaphi = (struct matrix_access_phi_node **)
1150 htab_find_slot (htab_mat_acc_phi_nodes, maphi, INSERT);
1151 gcc_assert (pmaphi);
1152 *pmaphi = maphi;
1153
1154 if (!TEST_BIT (visited, SSA_NAME_VERSION (PHI_RESULT (use_stmt))))
1155 {
1156 SET_BIT (visited, SSA_NAME_VERSION (PHI_RESULT (use_stmt)));
1157 analyze_matrix_accesses (mi, PHI_RESULT (use_stmt),
1158 current_indirect_level, false, visited,
1159 record_accesses);
1160 RESET_BIT (visited, SSA_NAME_VERSION (PHI_RESULT (use_stmt)));
1161 }
1162 }
1163
1164 /* USE_STMT represents an assign statement (the rhs or lhs include
1165 the ssa var that we want to check because it came from some use of matrix
1166 MI. CURRENT_INDIRECT_LEVEL is the indirection level we reached so far. */
1167
1168 static int
1169 analyze_accesses_for_assign_stmt (struct matrix_info *mi, tree ssa_var,
1170 gimple use_stmt, int current_indirect_level,
1171 bool last_op, sbitmap visited,
1172 bool record_accesses)
1173 {
1174 tree lhs = gimple_get_lhs (use_stmt);
1175 struct ssa_acc_in_tree lhs_acc, rhs_acc;
1176
1177 memset (&lhs_acc, 0, sizeof (lhs_acc));
1178 memset (&rhs_acc, 0, sizeof (rhs_acc));
1179
1180 lhs_acc.ssa_var = ssa_var;
1181 lhs_acc.t_code = ERROR_MARK;
1182 ssa_accessed_in_tree (lhs, &lhs_acc);
1183 rhs_acc.ssa_var = ssa_var;
1184 rhs_acc.t_code = ERROR_MARK;
1185 ssa_accessed_in_assign_rhs (use_stmt, &rhs_acc);
1186
1187 /* The SSA must be either in the left side or in the right side,
1188 to understand what is happening.
1189 In case the SSA_NAME is found in both sides we should be escaping
1190 at this level because in this case we cannot calculate the
1191 address correctly. */
1192 if ((lhs_acc.var_found && rhs_acc.var_found
1193 && lhs_acc.t_code == MEM_REF)
1194 || (!rhs_acc.var_found && !lhs_acc.var_found))
1195 {
1196 mark_min_matrix_escape_level (mi, current_indirect_level, use_stmt);
1197 return current_indirect_level;
1198 }
1199 gcc_assert (!rhs_acc.var_found || !lhs_acc.var_found);
1200
1201 /* If we are storing to the matrix at some level, then mark it as
1202 escaping at that level. */
1203 if (lhs_acc.var_found)
1204 {
1205 int l = current_indirect_level + 1;
1206
1207 gcc_assert (lhs_acc.t_code == MEM_REF);
1208
1209 if (!(gimple_assign_copy_p (use_stmt)
1210 || gimple_assign_cast_p (use_stmt))
1211 || (TREE_CODE (gimple_assign_rhs1 (use_stmt)) != SSA_NAME))
1212 mark_min_matrix_escape_level (mi, l, use_stmt);
1213 else
1214 {
1215 gimple def_stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (use_stmt));
1216 analyze_matrix_allocation_site (mi, def_stmt, l, visited);
1217 if (record_accesses)
1218 record_access_alloc_site_info (mi, use_stmt, NULL_TREE,
1219 NULL_TREE, l, true);
1220 update_type_size (mi, use_stmt, NULL, l);
1221 }
1222 return current_indirect_level;
1223 }
1224 /* Now, check the right-hand-side, to see how the SSA variable
1225 is used. */
1226 if (rhs_acc.var_found)
1227 {
1228 if (rhs_acc.t_code != MEM_REF
1229 && rhs_acc.t_code != POINTER_PLUS_EXPR && rhs_acc.t_code != SSA_NAME)
1230 {
1231 mark_min_matrix_escape_level (mi, current_indirect_level, use_stmt);
1232 return current_indirect_level;
1233 }
1234 /* If the access in the RHS has an indirection increase the
1235 indirection level. */
1236 if (rhs_acc.t_code == MEM_REF)
1237 {
1238 if (record_accesses)
1239 record_access_alloc_site_info (mi, use_stmt, NULL_TREE,
1240 NULL_TREE,
1241 current_indirect_level, true);
1242 current_indirect_level += 1;
1243 }
1244 else if (rhs_acc.t_code == POINTER_PLUS_EXPR)
1245 {
1246 gcc_assert (rhs_acc.second_op);
1247 if (last_op)
1248 /* Currently we support only one PLUS expression on the
1249 SSA_NAME that holds the base address of the current
1250 indirection level; to support more general case there
1251 is a need to hold a stack of expressions and regenerate
1252 the calculation later. */
1253 mark_min_matrix_escape_level (mi, current_indirect_level,
1254 use_stmt);
1255 else
1256 {
1257 tree index;
1258 tree op1, op2;
1259
1260 op1 = gimple_assign_rhs1 (use_stmt);
1261 op2 = gimple_assign_rhs2 (use_stmt);
1262
1263 op2 = (op1 == ssa_var) ? op2 : op1;
1264 if (TREE_CODE (op2) == INTEGER_CST)
1265 index =
1266 build_int_cst (TREE_TYPE (op1),
1267 TREE_INT_CST_LOW (op2) /
1268 int_size_in_bytes (TREE_TYPE (op1)));
1269 else
1270 {
1271 index =
1272 get_index_from_offset (op2, SSA_NAME_DEF_STMT (op2));
1273 if (index == NULL_TREE)
1274 {
1275 mark_min_matrix_escape_level (mi,
1276 current_indirect_level,
1277 use_stmt);
1278 return current_indirect_level;
1279 }
1280 }
1281 if (record_accesses)
1282 record_access_alloc_site_info (mi, use_stmt, op2,
1283 index,
1284 current_indirect_level, false);
1285 }
1286 }
1287 /* If we are storing this level of indirection mark it as
1288 escaping. */
1289 if (lhs_acc.t_code == MEM_REF || TREE_CODE (lhs) != SSA_NAME)
1290 {
1291 int l = current_indirect_level;
1292
1293 /* One exception is when we are storing to the matrix
1294 variable itself; this is the case of malloc, we must make
1295 sure that it's the one and only one call to malloc so
1296 we call analyze_matrix_allocation_site to check
1297 this out. */
1298 if (TREE_CODE (lhs) != VAR_DECL || lhs != mi->decl)
1299 mark_min_matrix_escape_level (mi, current_indirect_level,
1300 use_stmt);
1301 else
1302 {
1303 /* Also update the escaping level. */
1304 analyze_matrix_allocation_site (mi, use_stmt, l, visited);
1305 if (record_accesses)
1306 record_access_alloc_site_info (mi, use_stmt, NULL_TREE,
1307 NULL_TREE, l, true);
1308 }
1309 }
1310 else
1311 {
1312 /* We are placing it in an SSA, follow that SSA. */
1313 analyze_matrix_accesses (mi, lhs,
1314 current_indirect_level,
1315 rhs_acc.t_code == POINTER_PLUS_EXPR,
1316 visited, record_accesses);
1317 }
1318 }
1319 return current_indirect_level;
1320 }
1321
1322 /* Given a SSA_VAR (coming from a use statement of the matrix MI),
1323 follow its uses and level of indirection and find out the minimum
1324 indirection level it escapes in (the highest dimension) and the maximum
1325 level it is accessed in (this will be the actual dimension of the
1326 matrix). The information is accumulated in MI.
1327 We look at the immediate uses, if one escapes we finish; if not,
1328 we make a recursive call for each one of the immediate uses of the
1329 resulting SSA name. */
1330 static void
1331 analyze_matrix_accesses (struct matrix_info *mi, tree ssa_var,
1332 int current_indirect_level, bool last_op,
1333 sbitmap visited, bool record_accesses)
1334 {
1335 imm_use_iterator imm_iter;
1336 use_operand_p use_p;
1337
1338 update_type_size (mi, SSA_NAME_DEF_STMT (ssa_var), ssa_var,
1339 current_indirect_level);
1340
1341 /* We don't go beyond the escaping level when we are performing the
1342 flattening. NOTE: we keep the last indirection level that doesn't
1343 escape. */
1344 if (mi->min_indirect_level_escape > -1
1345 && mi->min_indirect_level_escape <= current_indirect_level)
1346 return;
1347
1348 /* Now go over the uses of the SSA_NAME and check how it is used in
1349 each one of them. We are mainly looking for the pattern MEM_REF,
1350 then a POINTER_PLUS_EXPR, then MEM_REF etc. while in between there could
1351 be any number of copies and casts. */
1352 gcc_assert (TREE_CODE (ssa_var) == SSA_NAME);
1353
1354 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, ssa_var)
1355 {
1356 gimple use_stmt = USE_STMT (use_p);
1357 if (gimple_code (use_stmt) == GIMPLE_PHI)
1358 /* We check all the escaping levels that get to the PHI node
1359 and make sure they are all the same escaping;
1360 if not (which is rare) we let the escaping level be the
1361 minimum level that gets into that PHI because starting from
1362 that level we cannot expect the behavior of the indirections. */
1363
1364 analyze_accesses_for_phi_node (mi, use_stmt, current_indirect_level,
1365 visited, record_accesses);
1366
1367 else if (is_gimple_call (use_stmt))
1368 analyze_accesses_for_call_stmt (mi, ssa_var, use_stmt,
1369 current_indirect_level);
1370 else if (is_gimple_assign (use_stmt))
1371 current_indirect_level =
1372 analyze_accesses_for_assign_stmt (mi, ssa_var, use_stmt,
1373 current_indirect_level, last_op,
1374 visited, record_accesses);
1375 }
1376 }
1377
1378 typedef struct
1379 {
1380 tree fn;
1381 gimple stmt;
1382 } check_var_data;
1383
1384 /* A walk_tree function to go over the VAR_DECL, PARM_DECL nodes of
1385 the malloc size expression and check that those aren't changed
1386 over the function. */
1387 static tree
1388 check_var_notmodified_p (tree * tp, int *walk_subtrees, void *data)
1389 {
1390 basic_block bb;
1391 tree t = *tp;
1392 check_var_data *callback_data = (check_var_data*) data;
1393 tree fn = callback_data->fn;
1394 gimple_stmt_iterator gsi;
1395 gimple stmt;
1396
1397 if (TREE_CODE (t) != VAR_DECL && TREE_CODE (t) != PARM_DECL)
1398 return NULL_TREE;
1399
1400 FOR_EACH_BB_FN (bb, DECL_STRUCT_FUNCTION (fn))
1401 {
1402 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1403 {
1404 stmt = gsi_stmt (gsi);
1405 if (!is_gimple_assign (stmt) && !is_gimple_call (stmt))
1406 continue;
1407 if (gimple_get_lhs (stmt) == t)
1408 {
1409 callback_data->stmt = stmt;
1410 return t;
1411 }
1412 }
1413 }
1414 *walk_subtrees = 1;
1415 return NULL_TREE;
1416 }
1417
1418 /* Go backwards in the use-def chains and find out the expression
1419 represented by the possible SSA name in STMT, until it is composed
1420 of only VAR_DECL, PARM_DECL and INT_CST. In case of phi nodes
1421 we make sure that all the arguments represent the same subexpression,
1422 otherwise we fail. */
1423
1424 static tree
1425 can_calculate_stmt_before_stmt (gimple stmt, sbitmap visited)
1426 {
1427 tree op1, op2, res;
1428 enum tree_code code;
1429
1430 switch (gimple_code (stmt))
1431 {
1432 case GIMPLE_ASSIGN:
1433 code = gimple_assign_rhs_code (stmt);
1434 op1 = gimple_assign_rhs1 (stmt);
1435
1436 switch (code)
1437 {
1438 case POINTER_PLUS_EXPR:
1439 case PLUS_EXPR:
1440 case MINUS_EXPR:
1441 case MULT_EXPR:
1442
1443 op2 = gimple_assign_rhs2 (stmt);
1444 op1 = can_calculate_expr_before_stmt (op1, visited);
1445 if (!op1)
1446 return NULL_TREE;
1447 op2 = can_calculate_expr_before_stmt (op2, visited);
1448 if (op2)
1449 return fold_build2 (code, gimple_expr_type (stmt), op1, op2);
1450 return NULL_TREE;
1451
1452 CASE_CONVERT:
1453 res = can_calculate_expr_before_stmt (op1, visited);
1454 if (res != NULL_TREE)
1455 return build1 (code, gimple_expr_type (stmt), res);
1456 else
1457 return NULL_TREE;
1458
1459 default:
1460 if (gimple_assign_single_p (stmt))
1461 return can_calculate_expr_before_stmt (op1, visited);
1462 else
1463 return NULL_TREE;
1464 }
1465
1466 case GIMPLE_PHI:
1467 {
1468 size_t j;
1469
1470 res = NULL_TREE;
1471 /* Make sure all the arguments represent the same value. */
1472 for (j = 0; j < gimple_phi_num_args (stmt); j++)
1473 {
1474 tree new_res;
1475 tree def = PHI_ARG_DEF (stmt, j);
1476
1477 new_res = can_calculate_expr_before_stmt (def, visited);
1478 if (res == NULL_TREE)
1479 res = new_res;
1480 else if (!new_res || !expressions_equal_p (res, new_res))
1481 return NULL_TREE;
1482 }
1483 return res;
1484 }
1485
1486 default:
1487 return NULL_TREE;
1488 }
1489 }
1490
1491 /* Go backwards in the use-def chains and find out the expression
1492 represented by the possible SSA name in EXPR, until it is composed
1493 of only VAR_DECL, PARM_DECL and INT_CST. In case of phi nodes
1494 we make sure that all the arguments represent the same subexpression,
1495 otherwise we fail. */
1496 static tree
1497 can_calculate_expr_before_stmt (tree expr, sbitmap visited)
1498 {
1499 gimple def_stmt;
1500 tree res;
1501
1502 switch (TREE_CODE (expr))
1503 {
1504 case SSA_NAME:
1505 /* Case of loop, we don't know to represent this expression. */
1506 if (TEST_BIT (visited, SSA_NAME_VERSION (expr)))
1507 return NULL_TREE;
1508
1509 SET_BIT (visited, SSA_NAME_VERSION (expr));
1510 def_stmt = SSA_NAME_DEF_STMT (expr);
1511 res = can_calculate_stmt_before_stmt (def_stmt, visited);
1512 RESET_BIT (visited, SSA_NAME_VERSION (expr));
1513 return res;
1514 case VAR_DECL:
1515 case PARM_DECL:
1516 case INTEGER_CST:
1517 return expr;
1518
1519 default:
1520 return NULL_TREE;
1521 }
1522 }
1523
1524 /* There should be only one allocation function for the dimensions
1525 that don't escape. Here we check the allocation sites in this
1526 function. We must make sure that all the dimensions are allocated
1527 using malloc and that the malloc size parameter expression could be
1528 pre-calculated before the call to the malloc of dimension 0.
1529
1530 Given a candidate matrix for flattening -- MI -- check if it's
1531 appropriate for flattening -- we analyze the allocation
1532 sites that we recorded in the previous analysis. The result of the
1533 analysis is a level of indirection (matrix dimension) in which the
1534 flattening is safe. We check the following conditions:
1535 1. There is only one allocation site for each dimension.
1536 2. The allocation sites of all the dimensions are in the same
1537 function.
1538 (The above two are being taken care of during the analysis when
1539 we check the allocation site).
1540 3. All the dimensions that we flatten are allocated at once; thus
1541 the total size must be known before the allocation of the
1542 dimension 0 (top level) -- we must make sure we represent the
1543 size of the allocation as an expression of global parameters or
1544 constants and that those doesn't change over the function. */
1545
1546 static int
1547 check_allocation_function (void **slot, void *data ATTRIBUTE_UNUSED)
1548 {
1549 int level;
1550 struct matrix_info *mi = (struct matrix_info *) *slot;
1551 sbitmap visited;
1552
1553 if (!mi->malloc_for_level)
1554 return 1;
1555
1556 visited = sbitmap_alloc (num_ssa_names);
1557
1558 /* Do nothing if the current function is not the allocation
1559 function of MI. */
1560 if (mi->allocation_function_decl != current_function_decl
1561 /* We aren't in the main allocation function yet. */
1562 || !mi->malloc_for_level[0])
1563 return 1;
1564
1565 for (level = 1; level < mi->max_malloced_level; level++)
1566 if (!mi->malloc_for_level[level])
1567 break;
1568
1569 mark_min_matrix_escape_level (mi, level, NULL);
1570
1571 /* Check if the expression of the size passed to malloc could be
1572 pre-calculated before the malloc of level 0. */
1573 for (level = 1; level < mi->min_indirect_level_escape; level++)
1574 {
1575 gimple call_stmt;
1576 tree size;
1577 struct malloc_call_data mcd = {NULL, NULL_TREE, NULL_TREE};
1578
1579 call_stmt = mi->malloc_for_level[level];
1580
1581 /* Find the correct malloc information. */
1582 collect_data_for_malloc_call (call_stmt, &mcd);
1583
1584 /* No need to check anticipation for constants. */
1585 if (TREE_CODE (mcd.size_var) == INTEGER_CST)
1586 {
1587 if (!mi->dimension_size)
1588 {
1589 mi->dimension_size =
1590 (tree *) xcalloc (mi->min_indirect_level_escape,
1591 sizeof (tree));
1592 mi->dimension_size_orig =
1593 (tree *) xcalloc (mi->min_indirect_level_escape,
1594 sizeof (tree));
1595 }
1596 mi->dimension_size[level] = mcd.size_var;
1597 mi->dimension_size_orig[level] = mcd.size_var;
1598 continue;
1599 }
1600 /* ??? Here we should also add the way to calculate the size
1601 expression not only know that it is anticipated. */
1602 sbitmap_zero (visited);
1603 size = can_calculate_expr_before_stmt (mcd.size_var, visited);
1604 if (size == NULL_TREE)
1605 {
1606 mark_min_matrix_escape_level (mi, level, call_stmt);
1607 if (dump_file)
1608 fprintf (dump_file,
1609 "Matrix %s: Cannot calculate the size of allocation, escaping at level %d\n",
1610 get_name (mi->decl), level);
1611 break;
1612 }
1613 if (!mi->dimension_size)
1614 {
1615 mi->dimension_size =
1616 (tree *) xcalloc (mi->min_indirect_level_escape, sizeof (tree));
1617 mi->dimension_size_orig =
1618 (tree *) xcalloc (mi->min_indirect_level_escape, sizeof (tree));
1619 }
1620 mi->dimension_size[level] = size;
1621 mi->dimension_size_orig[level] = size;
1622 }
1623
1624 /* We don't need those anymore. */
1625 for (level = mi->min_indirect_level_escape;
1626 level < mi->max_malloced_level; level++)
1627 mi->malloc_for_level[level] = NULL;
1628 return 1;
1629 }
1630
1631 /* Track all access and allocation sites. */
1632 static void
1633 find_sites_in_func (bool record)
1634 {
1635 sbitmap visited_stmts_1;
1636
1637 gimple_stmt_iterator gsi;
1638 gimple stmt;
1639 basic_block bb;
1640 struct matrix_info tmpmi, *mi;
1641
1642 visited_stmts_1 = sbitmap_alloc (num_ssa_names);
1643
1644 FOR_EACH_BB (bb)
1645 {
1646 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1647 {
1648 tree lhs;
1649
1650 stmt = gsi_stmt (gsi);
1651 lhs = gimple_get_lhs (stmt);
1652 if (lhs != NULL_TREE
1653 && TREE_CODE (lhs) == VAR_DECL)
1654 {
1655 tmpmi.decl = lhs;
1656 if ((mi = (struct matrix_info *) htab_find (matrices_to_reorg,
1657 &tmpmi)))
1658 {
1659 sbitmap_zero (visited_stmts_1);
1660 analyze_matrix_allocation_site (mi, stmt, 0, visited_stmts_1);
1661 }
1662 }
1663 if (is_gimple_assign (stmt)
1664 && gimple_assign_single_p (stmt)
1665 && TREE_CODE (lhs) == SSA_NAME
1666 && TREE_CODE (gimple_assign_rhs1 (stmt)) == VAR_DECL)
1667 {
1668 tmpmi.decl = gimple_assign_rhs1 (stmt);
1669 if ((mi = (struct matrix_info *) htab_find (matrices_to_reorg,
1670 &tmpmi)))
1671 {
1672 sbitmap_zero (visited_stmts_1);
1673 analyze_matrix_accesses (mi, lhs, 0,
1674 false, visited_stmts_1, record);
1675 }
1676 }
1677 }
1678 }
1679 sbitmap_free (visited_stmts_1);
1680 }
1681
1682 /* Traverse the use-def chains to see if there are matrices that
1683 are passed through pointers and we cannot know how they are accessed.
1684 For each SSA-name defined by a global variable of our interest,
1685 we traverse the use-def chains of the SSA and follow the indirections,
1686 and record in what level of indirection the use of the variable
1687 escapes. A use of a pointer escapes when it is passed to a function,
1688 stored into memory or assigned (except in malloc and free calls). */
1689
1690 static void
1691 record_all_accesses_in_func (void)
1692 {
1693 unsigned i;
1694 sbitmap visited_stmts_1;
1695
1696 visited_stmts_1 = sbitmap_alloc (num_ssa_names);
1697
1698 for (i = 0; i < num_ssa_names; i++)
1699 {
1700 struct matrix_info tmpmi, *mi;
1701 tree ssa_var = ssa_name (i);
1702 tree rhs, lhs;
1703
1704 if (!ssa_var
1705 || !is_gimple_assign (SSA_NAME_DEF_STMT (ssa_var))
1706 || !gimple_assign_single_p (SSA_NAME_DEF_STMT (ssa_var)))
1707 continue;
1708 rhs = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (ssa_var));
1709 lhs = gimple_assign_lhs (SSA_NAME_DEF_STMT (ssa_var));
1710 if (TREE_CODE (rhs) != VAR_DECL && TREE_CODE (lhs) != VAR_DECL)
1711 continue;
1712
1713 /* If the RHS is a matrix that we want to analyze, follow the def-use
1714 chain for this SSA_VAR and check for escapes or apply the
1715 flattening. */
1716 tmpmi.decl = rhs;
1717 if ((mi = (struct matrix_info *) htab_find (matrices_to_reorg, &tmpmi)))
1718 {
1719 /* This variable will track the visited PHI nodes, so we can limit
1720 its size to the maximum number of SSA names. */
1721 sbitmap_zero (visited_stmts_1);
1722 analyze_matrix_accesses (mi, ssa_var,
1723 0, false, visited_stmts_1, true);
1724
1725 }
1726 }
1727 sbitmap_free (visited_stmts_1);
1728 }
1729
1730 /* Used when we want to convert the expression: RESULT = something *
1731 ORIG to RESULT = something * NEW_VAL. If ORIG and NEW_VAL are power
1732 of 2, shift operations can be done, else division and
1733 multiplication. */
1734
1735 static tree
1736 compute_offset (HOST_WIDE_INT orig, HOST_WIDE_INT new_val, tree result)
1737 {
1738
1739 int x, y;
1740 tree result1, ratio, log, orig_tree, new_tree;
1741
1742 x = exact_log2 (orig);
1743 y = exact_log2 (new_val);
1744
1745 if (x != -1 && y != -1)
1746 {
1747 if (x == y)
1748 return result;
1749 else if (x > y)
1750 {
1751 log = build_int_cst (TREE_TYPE (result), x - y);
1752 result1 =
1753 fold_build2 (LSHIFT_EXPR, TREE_TYPE (result), result, log);
1754 return result1;
1755 }
1756 log = build_int_cst (TREE_TYPE (result), y - x);
1757 result1 = fold_build2 (RSHIFT_EXPR, TREE_TYPE (result), result, log);
1758
1759 return result1;
1760 }
1761 orig_tree = build_int_cst (TREE_TYPE (result), orig);
1762 new_tree = build_int_cst (TREE_TYPE (result), new_val);
1763 ratio = fold_build2 (TRUNC_DIV_EXPR, TREE_TYPE (result), result, orig_tree);
1764 result1 = fold_build2 (MULT_EXPR, TREE_TYPE (result), ratio, new_tree);
1765
1766 return result1;
1767 }
1768
1769
1770 /* We know that we are allowed to perform matrix flattening (according to the
1771 escape analysis), so we traverse the use-def chains of the SSA vars
1772 defined by the global variables pointing to the matrices of our interest.
1773 in each use of the SSA we calculate the offset from the base address
1774 according to the following equation:
1775
1776 a[I1][I2]...[Ik] , where D1..Dk is the length of each dimension and the
1777 escaping level is m <= k, and a' is the new allocated matrix,
1778 will be translated to :
1779
1780 b[I(m+1)]...[Ik]
1781
1782 where
1783 b = a' + I1*D2...*Dm + I2*D3...Dm + ... + Im
1784 */
1785
1786 static int
1787 transform_access_sites (void **slot, void *data ATTRIBUTE_UNUSED)
1788 {
1789 gimple_stmt_iterator gsi;
1790 struct matrix_info *mi = (struct matrix_info *) *slot;
1791 int min_escape_l = mi->min_indirect_level_escape;
1792 struct access_site_info *acc_info;
1793 enum tree_code code;
1794 int i;
1795
1796 if (min_escape_l < 2 || !mi->access_l)
1797 return 1;
1798 for (i = 0; VEC_iterate (access_site_info_p, mi->access_l, i, acc_info);
1799 i++)
1800 {
1801 /* This is possible because we collect the access sites before
1802 we determine the final minimum indirection level. */
1803 if (acc_info->level >= min_escape_l)
1804 {
1805 free (acc_info);
1806 continue;
1807 }
1808 if (acc_info->is_alloc)
1809 {
1810 if (acc_info->level >= 0 && gimple_bb (acc_info->stmt))
1811 {
1812 gimple stmt = acc_info->stmt;
1813 tree lhs;
1814
1815 gsi = gsi_for_stmt (stmt);
1816 gcc_assert (is_gimple_assign (acc_info->stmt));
1817 lhs = gimple_assign_lhs (acc_info->stmt);
1818 if (TREE_CODE (lhs) == SSA_NAME
1819 && acc_info->level < min_escape_l - 1)
1820 {
1821 imm_use_iterator imm_iter;
1822 use_operand_p use_p;
1823 gimple use_stmt;
1824
1825 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, lhs)
1826 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1827 {
1828 tree rhs, tmp;
1829 gimple new_stmt;
1830
1831 gcc_assert (gimple_assign_rhs_code (acc_info->stmt)
1832 == MEM_REF);
1833 /* Emit convert statement to convert to type of use. */
1834 tmp = create_tmp_var (TREE_TYPE (lhs), "new");
1835 rhs = gimple_assign_rhs1 (acc_info->stmt);
1836 rhs = fold_convert (TREE_TYPE (tmp),
1837 TREE_OPERAND (rhs, 0));
1838 new_stmt = gimple_build_assign (tmp, rhs);
1839 tmp = make_ssa_name (tmp, new_stmt);
1840 gimple_assign_set_lhs (new_stmt, tmp);
1841 gsi = gsi_for_stmt (acc_info->stmt);
1842 gsi_insert_after (&gsi, new_stmt, GSI_SAME_STMT);
1843 SET_USE (use_p, tmp);
1844 }
1845 }
1846 if (acc_info->level < min_escape_l - 1)
1847 gsi_remove (&gsi, true);
1848 }
1849 free (acc_info);
1850 continue;
1851 }
1852 code = gimple_assign_rhs_code (acc_info->stmt);
1853 if (code == MEM_REF
1854 && acc_info->level < min_escape_l - 1)
1855 {
1856 /* Replace the MEM_REF with NOP (cast) usually we are casting
1857 from "pointer to type" to "type". */
1858 tree t =
1859 build1 (NOP_EXPR, TREE_TYPE (gimple_assign_rhs1 (acc_info->stmt)),
1860 TREE_OPERAND (gimple_assign_rhs1 (acc_info->stmt), 0));
1861 gimple_assign_set_rhs_code (acc_info->stmt, NOP_EXPR);
1862 gimple_assign_set_rhs1 (acc_info->stmt, t);
1863 }
1864 else if (code == POINTER_PLUS_EXPR
1865 && acc_info->level < (min_escape_l))
1866 {
1867 imm_use_iterator imm_iter;
1868 use_operand_p use_p;
1869
1870 tree offset;
1871 int k = acc_info->level;
1872 tree num_elements, total_elements;
1873 tree tmp1;
1874 tree d_size = mi->dimension_size[k];
1875
1876 /* We already make sure in the analysis that the first operand
1877 is the base and the second is the offset. */
1878 offset = acc_info->offset;
1879 if (mi->dim_map[k] == min_escape_l - 1)
1880 {
1881 if (!check_transpose_p || mi->is_transposed_p == false)
1882 tmp1 = offset;
1883 else
1884 {
1885 tree new_offset;
1886
1887 new_offset =
1888 compute_offset (mi->dimension_type_size[min_escape_l],
1889 mi->dimension_type_size[k + 1], offset);
1890
1891 total_elements = new_offset;
1892 if (new_offset != offset)
1893 {
1894 gsi = gsi_for_stmt (acc_info->stmt);
1895 tmp1 = force_gimple_operand_gsi (&gsi, total_elements,
1896 true, NULL,
1897 true, GSI_SAME_STMT);
1898 }
1899 else
1900 tmp1 = offset;
1901 }
1902 }
1903 else
1904 {
1905 d_size = mi->dimension_size[mi->dim_map[k] + 1];
1906 num_elements =
1907 fold_build2 (MULT_EXPR, sizetype, fold_convert (sizetype, acc_info->index),
1908 fold_convert (sizetype, d_size));
1909 gsi = gsi_for_stmt (acc_info->stmt);
1910 tmp1 = force_gimple_operand_gsi (&gsi, num_elements, true,
1911 NULL, true, GSI_SAME_STMT);
1912 }
1913 /* Replace the offset if needed. */
1914 if (tmp1 != offset)
1915 {
1916 if (TREE_CODE (offset) == SSA_NAME)
1917 {
1918 gimple use_stmt;
1919
1920 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, offset)
1921 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1922 if (use_stmt == acc_info->stmt)
1923 SET_USE (use_p, tmp1);
1924 }
1925 else
1926 {
1927 gcc_assert (TREE_CODE (offset) == INTEGER_CST);
1928 gimple_assign_set_rhs2 (acc_info->stmt, tmp1);
1929 update_stmt (acc_info->stmt);
1930 }
1931 }
1932 }
1933 /* ??? meanwhile this happens because we record the same access
1934 site more than once; we should be using a hash table to
1935 avoid this and insert the STMT of the access site only
1936 once.
1937 else
1938 gcc_unreachable (); */
1939 free (acc_info);
1940 }
1941 VEC_free (access_site_info_p, heap, mi->access_l);
1942
1943 update_ssa (TODO_update_ssa);
1944 #ifdef ENABLE_CHECKING
1945 verify_ssa (true);
1946 #endif
1947 return 1;
1948 }
1949
1950 /* Sort A array of counts. Arrange DIM_MAP to reflect the new order. */
1951
1952 static void
1953 sort_dim_hot_level (gcov_type * a, int *dim_map, int n)
1954 {
1955 int i, j, tmp1;
1956 gcov_type tmp;
1957
1958 for (i = 0; i < n - 1; i++)
1959 {
1960 for (j = 0; j < n - 1 - i; j++)
1961 {
1962 if (a[j + 1] < a[j])
1963 {
1964 tmp = a[j]; /* swap a[j] and a[j+1] */
1965 a[j] = a[j + 1];
1966 a[j + 1] = tmp;
1967 tmp1 = dim_map[j];
1968 dim_map[j] = dim_map[j + 1];
1969 dim_map[j + 1] = tmp1;
1970 }
1971 }
1972 }
1973 }
1974
1975 /* Replace multiple mallocs (one for each dimension) to one malloc
1976 with the size of DIM1*DIM2*...*DIMN*size_of_element
1977 Make sure that we hold the size in the malloc site inside a
1978 new global variable; this way we ensure that the size doesn't
1979 change and it is accessible from all the other functions that
1980 uses the matrix. Also, the original calls to free are deleted,
1981 and replaced by a new call to free the flattened matrix. */
1982
1983 static int
1984 transform_allocation_sites (void **slot, void *data ATTRIBUTE_UNUSED)
1985 {
1986 int i;
1987 struct matrix_info *mi;
1988 tree type, oldfn, prev_dim_size;
1989 gimple call_stmt_0, use_stmt;
1990 struct cgraph_node *c_node;
1991 struct cgraph_edge *e;
1992 gimple_stmt_iterator gsi;
1993 struct malloc_call_data mcd = {NULL, NULL_TREE, NULL_TREE};
1994 HOST_WIDE_INT element_size;
1995
1996 imm_use_iterator imm_iter;
1997 use_operand_p use_p;
1998 tree old_size_0, tmp;
1999 int min_escape_l;
2000 int id;
2001
2002 mi = (struct matrix_info *) *slot;
2003
2004 min_escape_l = mi->min_indirect_level_escape;
2005
2006 if (!mi->malloc_for_level)
2007 mi->min_indirect_level_escape = 0;
2008
2009 if (mi->min_indirect_level_escape < 2)
2010 return 1;
2011
2012 mi->dim_map = (int *) xcalloc (mi->min_indirect_level_escape, sizeof (int));
2013 for (i = 0; i < mi->min_indirect_level_escape; i++)
2014 mi->dim_map[i] = i;
2015 if (check_transpose_p)
2016 {
2017 int i;
2018
2019 if (dump_file)
2020 {
2021 fprintf (dump_file, "Matrix %s:\n", get_name (mi->decl));
2022 for (i = 0; i < min_escape_l; i++)
2023 {
2024 fprintf (dump_file, "dim %d before sort ", i);
2025 if (mi->dim_hot_level)
2026 fprintf (dump_file,
2027 "count is " HOST_WIDEST_INT_PRINT_DEC " \n",
2028 mi->dim_hot_level[i]);
2029 }
2030 }
2031 sort_dim_hot_level (mi->dim_hot_level, mi->dim_map,
2032 mi->min_indirect_level_escape);
2033 if (dump_file)
2034 for (i = 0; i < min_escape_l; i++)
2035 {
2036 fprintf (dump_file, "dim %d after sort\n", i);
2037 if (mi->dim_hot_level)
2038 fprintf (dump_file, "count is " HOST_WIDE_INT_PRINT_DEC
2039 " \n", (HOST_WIDE_INT) mi->dim_hot_level[i]);
2040 }
2041 for (i = 0; i < mi->min_indirect_level_escape; i++)
2042 {
2043 if (dump_file)
2044 fprintf (dump_file, "dim_map[%d] after sort %d\n", i,
2045 mi->dim_map[i]);
2046 if (mi->dim_map[i] != i)
2047 {
2048 if (dump_file)
2049 fprintf (dump_file,
2050 "Transposed dimensions: dim %d is now dim %d\n",
2051 mi->dim_map[i], i);
2052 mi->is_transposed_p = true;
2053 }
2054 }
2055 }
2056 else
2057 {
2058 for (i = 0; i < mi->min_indirect_level_escape; i++)
2059 mi->dim_map[i] = i;
2060 }
2061 /* Call statement of allocation site of level 0. */
2062 call_stmt_0 = mi->malloc_for_level[0];
2063
2064 /* Finds the correct malloc information. */
2065 collect_data_for_malloc_call (call_stmt_0, &mcd);
2066
2067 mi->dimension_size[0] = mcd.size_var;
2068 mi->dimension_size_orig[0] = mcd.size_var;
2069 /* Make sure that the variables in the size expression for
2070 all the dimensions (above level 0) aren't modified in
2071 the allocation function. */
2072 for (i = 1; i < mi->min_indirect_level_escape; i++)
2073 {
2074 tree t;
2075 check_var_data data;
2076
2077 /* mi->dimension_size must contain the expression of the size calculated
2078 in check_allocation_function. */
2079 gcc_assert (mi->dimension_size[i]);
2080
2081 data.fn = mi->allocation_function_decl;
2082 data.stmt = NULL;
2083 t = walk_tree_without_duplicates (&(mi->dimension_size[i]),
2084 check_var_notmodified_p,
2085 &data);
2086 if (t != NULL_TREE)
2087 {
2088 mark_min_matrix_escape_level (mi, i, data.stmt);
2089 break;
2090 }
2091 }
2092
2093 if (mi->min_indirect_level_escape < 2)
2094 return 1;
2095
2096 /* Since we should make sure that the size expression is available
2097 before the call to malloc of level 0. */
2098 gsi = gsi_for_stmt (call_stmt_0);
2099
2100 /* Find out the size of each dimension by looking at the malloc
2101 sites and create a global variable to hold it.
2102 We add the assignment to the global before the malloc of level 0. */
2103
2104 /* To be able to produce gimple temporaries. */
2105 oldfn = current_function_decl;
2106 current_function_decl = mi->allocation_function_decl;
2107 push_cfun (DECL_STRUCT_FUNCTION (mi->allocation_function_decl));
2108
2109 /* Set the dimension sizes as follows:
2110 DIM_SIZE[i] = DIM_SIZE[n] * ... * DIM_SIZE[i]
2111 where n is the maximum non escaping level. */
2112 element_size = mi->dimension_type_size[mi->min_indirect_level_escape];
2113 prev_dim_size = NULL_TREE;
2114
2115 for (i = mi->min_indirect_level_escape - 1; i >= 0; i--)
2116 {
2117 tree dim_size, dim_var;
2118 gimple stmt;
2119 tree d_type_size;
2120
2121 /* Now put the size expression in a global variable and initialize it to
2122 the size expression before the malloc of level 0. */
2123 dim_var =
2124 add_new_static_var (TREE_TYPE
2125 (mi->dimension_size_orig[mi->dim_map[i]]));
2126 type = TREE_TYPE (mi->dimension_size_orig[mi->dim_map[i]]);
2127
2128 /* DIM_SIZE = MALLOC_SIZE_PARAM / TYPE_SIZE. */
2129 /* Find which dim ID becomes dim I. */
2130 for (id = 0; id < mi->min_indirect_level_escape; id++)
2131 if (mi->dim_map[id] == i)
2132 break;
2133 d_type_size =
2134 build_int_cst (type, mi->dimension_type_size[id + 1]);
2135 if (!prev_dim_size)
2136 prev_dim_size = build_int_cst (type, element_size);
2137 if (!check_transpose_p && i == mi->min_indirect_level_escape - 1)
2138 {
2139 dim_size = mi->dimension_size_orig[id];
2140 }
2141 else
2142 {
2143 dim_size =
2144 fold_build2 (TRUNC_DIV_EXPR, type, mi->dimension_size_orig[id],
2145 d_type_size);
2146
2147 dim_size = fold_build2 (MULT_EXPR, type, dim_size, prev_dim_size);
2148 }
2149 dim_size = force_gimple_operand_gsi (&gsi, dim_size, true, NULL,
2150 true, GSI_SAME_STMT);
2151 /* GLOBAL_HOLDING_THE_SIZE = DIM_SIZE. */
2152 stmt = gimple_build_assign (dim_var, dim_size);
2153 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
2154
2155 prev_dim_size = mi->dimension_size[i] = dim_var;
2156 }
2157 update_ssa (TODO_update_ssa);
2158 /* Replace the malloc size argument in the malloc of level 0 to be
2159 the size of all the dimensions. */
2160 c_node = cgraph_get_node (mi->allocation_function_decl);
2161 gcc_checking_assert (c_node);
2162 old_size_0 = gimple_call_arg (call_stmt_0, 0);
2163 tmp = force_gimple_operand_gsi (&gsi, mi->dimension_size[0], true,
2164 NULL, true, GSI_SAME_STMT);
2165 if (TREE_CODE (old_size_0) == SSA_NAME)
2166 {
2167 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, old_size_0)
2168 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
2169 if (use_stmt == call_stmt_0)
2170 SET_USE (use_p, tmp);
2171 }
2172 /* When deleting the calls to malloc we need also to remove the edge from
2173 the call graph to keep it consistent. Notice that cgraph_edge may
2174 create a new node in the call graph if there is no node for the given
2175 declaration; this shouldn't be the case but currently there is no way to
2176 check this outside of "cgraph.c". */
2177 for (i = 1; i < mi->min_indirect_level_escape; i++)
2178 {
2179 gimple_stmt_iterator gsi;
2180
2181 gimple call_stmt = mi->malloc_for_level[i];
2182 gcc_assert (is_gimple_call (call_stmt));
2183 e = cgraph_edge (c_node, call_stmt);
2184 gcc_assert (e);
2185 cgraph_remove_edge (e);
2186 gsi = gsi_for_stmt (call_stmt);
2187 /* Remove the call stmt. */
2188 gsi_remove (&gsi, true);
2189 /* Remove the assignment of the allocated area. */
2190 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter,
2191 gimple_call_lhs (call_stmt))
2192 {
2193 gsi = gsi_for_stmt (use_stmt);
2194 gsi_remove (&gsi, true);
2195 }
2196 }
2197 update_ssa (TODO_update_ssa);
2198 #ifdef ENABLE_CHECKING
2199 verify_ssa (true);
2200 #endif
2201 /* Delete the calls to free. */
2202 for (i = 1; i < mi->min_indirect_level_escape; i++)
2203 {
2204 gimple_stmt_iterator gsi;
2205
2206 /* ??? wonder why this case is possible but we failed on it once. */
2207 if (!mi->free_stmts[i].stmt)
2208 continue;
2209
2210 c_node = cgraph_get_node (mi->free_stmts[i].func);
2211 gcc_checking_assert (c_node);
2212 gcc_assert (is_gimple_call (mi->free_stmts[i].stmt));
2213 e = cgraph_edge (c_node, mi->free_stmts[i].stmt);
2214 gcc_assert (e);
2215 cgraph_remove_edge (e);
2216 current_function_decl = mi->free_stmts[i].func;
2217 set_cfun (DECL_STRUCT_FUNCTION (mi->free_stmts[i].func));
2218 gsi = gsi_for_stmt (mi->free_stmts[i].stmt);
2219 gsi_remove (&gsi, true);
2220 }
2221 /* Return to the previous situation. */
2222 current_function_decl = oldfn;
2223 pop_cfun ();
2224 return 1;
2225
2226 }
2227
2228
2229 /* Print out the results of the escape analysis. */
2230 static int
2231 dump_matrix_reorg_analysis (void **slot, void *data ATTRIBUTE_UNUSED)
2232 {
2233 struct matrix_info *mi = (struct matrix_info *) *slot;
2234
2235 if (!dump_file)
2236 return 1;
2237 fprintf (dump_file, "Matrix \"%s\"; Escaping Level: %d, Num Dims: %d,",
2238 get_name (mi->decl), mi->min_indirect_level_escape, mi->num_dims);
2239 fprintf (dump_file, " Malloc Dims: %d, ", mi->max_malloced_level);
2240 fprintf (dump_file, "\n");
2241 if (mi->min_indirect_level_escape >= 2)
2242 fprintf (dump_file, "Flattened %d dimensions \n",
2243 mi->min_indirect_level_escape);
2244 return 1;
2245 }
2246
2247 /* Perform matrix flattening. */
2248
2249 static unsigned int
2250 matrix_reorg (void)
2251 {
2252 struct cgraph_node *node;
2253
2254 if (profile_info)
2255 check_transpose_p = true;
2256 else
2257 check_transpose_p = false;
2258 /* If there are hand written vectors, we skip this optimization. */
2259 FOR_EACH_FUNCTION (node)
2260 if (!may_flatten_matrices (node))
2261 return 0;
2262 matrices_to_reorg = htab_create (37, mtt_info_hash, mtt_info_eq, mat_free);
2263 /* Find and record all potential matrices in the program. */
2264 find_matrices_decl ();
2265 /* Analyze the accesses of the matrices (escaping analysis). */
2266 FOR_EACH_DEFINED_FUNCTION (node)
2267 {
2268 tree temp_fn;
2269
2270 temp_fn = current_function_decl;
2271 current_function_decl = node->symbol.decl;
2272 push_cfun (DECL_STRUCT_FUNCTION (node->symbol.decl));
2273 bitmap_obstack_initialize (NULL);
2274 gimple_register_cfg_hooks ();
2275
2276 if (!gimple_in_ssa_p (cfun))
2277 {
2278 free_dominance_info (CDI_DOMINATORS);
2279 free_dominance_info (CDI_POST_DOMINATORS);
2280 pop_cfun ();
2281 current_function_decl = temp_fn;
2282 bitmap_obstack_release (NULL);
2283
2284 return 0;
2285 }
2286
2287 #ifdef ENABLE_CHECKING
2288 verify_flow_info ();
2289 #endif
2290
2291 if (!matrices_to_reorg)
2292 {
2293 free_dominance_info (CDI_DOMINATORS);
2294 free_dominance_info (CDI_POST_DOMINATORS);
2295 pop_cfun ();
2296 current_function_decl = temp_fn;
2297 bitmap_obstack_release (NULL);
2298
2299 return 0;
2300 }
2301
2302 /* Create htap for phi nodes. */
2303 htab_mat_acc_phi_nodes = htab_create (37, mat_acc_phi_hash,
2304 mat_acc_phi_eq, free);
2305 if (!check_transpose_p)
2306 find_sites_in_func (false);
2307 else
2308 {
2309 find_sites_in_func (true);
2310 loop_optimizer_init (LOOPS_NORMAL);
2311 if (current_loops)
2312 scev_initialize ();
2313 htab_traverse (matrices_to_reorg, analyze_transpose, NULL);
2314 if (current_loops)
2315 {
2316 scev_finalize ();
2317 loop_optimizer_finalize ();
2318 current_loops = NULL;
2319 }
2320 }
2321 /* If the current function is the allocation function for any of
2322 the matrices we check its allocation and the escaping level. */
2323 htab_traverse (matrices_to_reorg, check_allocation_function, NULL);
2324 free_dominance_info (CDI_DOMINATORS);
2325 free_dominance_info (CDI_POST_DOMINATORS);
2326 pop_cfun ();
2327 current_function_decl = temp_fn;
2328 bitmap_obstack_release (NULL);
2329 }
2330 htab_traverse (matrices_to_reorg, transform_allocation_sites, NULL);
2331 /* Now transform the accesses. */
2332 FOR_EACH_DEFINED_FUNCTION (node)
2333 {
2334 /* Remember that allocation sites have been handled. */
2335 tree temp_fn;
2336
2337 temp_fn = current_function_decl;
2338 current_function_decl = node->symbol.decl;
2339 push_cfun (DECL_STRUCT_FUNCTION (node->symbol.decl));
2340 bitmap_obstack_initialize (NULL);
2341 gimple_register_cfg_hooks ();
2342 record_all_accesses_in_func ();
2343 htab_traverse (matrices_to_reorg, transform_access_sites, NULL);
2344 cgraph_rebuild_references ();
2345 free_dominance_info (CDI_DOMINATORS);
2346 free_dominance_info (CDI_POST_DOMINATORS);
2347 pop_cfun ();
2348 current_function_decl = temp_fn;
2349 bitmap_obstack_release (NULL);
2350 }
2351 htab_traverse (matrices_to_reorg, dump_matrix_reorg_analysis, NULL);
2352
2353 current_function_decl = NULL;
2354 set_cfun (NULL);
2355 matrices_to_reorg = NULL;
2356 return 0;
2357 }
2358
2359
2360 /* The condition for matrix flattening to be performed. */
2361 static bool
2362 gate_matrix_reorg (void)
2363 {
2364 return flag_ipa_matrix_reorg && flag_whole_program;
2365 }
2366
2367 struct simple_ipa_opt_pass pass_ipa_matrix_reorg =
2368 {
2369 {
2370 SIMPLE_IPA_PASS,
2371 "matrix-reorg", /* name */
2372 gate_matrix_reorg, /* gate */
2373 matrix_reorg, /* execute */
2374 NULL, /* sub */
2375 NULL, /* next */
2376 0, /* static_pass_number */
2377 TV_NONE, /* tv_id */
2378 0, /* properties_required */
2379 0, /* properties_provided */
2380 0, /* properties_destroyed */
2381 0, /* todo_flags_start */
2382 TODO_dump_symtab /* todo_flags_finish */
2383 }
2384 };