]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/matrix-reorg.c
c-cppbuiltin.c (c_cpp_builtins): Change _OPENMP value to 200805.
[thirdparty/gcc.git] / gcc / matrix-reorg.c
1 /* Matrix layout transformations.
2 Copyright (C) 2006, 2007, 2008 Free Software Foundation, Inc.
3 Contributed by Razya Ladelsky <razya@il.ibm.com>
4 Originally written by Revital Eres and Mustafa Hagog.
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 /*
23 Matrix flattening optimization tries to replace a N-dimensional
24 matrix with its equivalent M-dimensional matrix, where M < N.
25 This first implementation focuses on global matrices defined dynamically.
26
27 When N==1, we actually flatten the whole matrix.
28 For instance consider a two-dimensional array a [dim1] [dim2].
29 The code for allocating space for it usually looks like:
30
31 a = (int **) malloc(dim1 * sizeof(int *));
32 for (i=0; i<dim1; i++)
33 a[i] = (int *) malloc (dim2 * sizeof(int));
34
35 If the array "a" is found suitable for this optimization,
36 its allocation is replaced by:
37
38 a = (int *) malloc (dim1 * dim2 *sizeof(int));
39
40 and all the references to a[i][j] are replaced by a[i * dim2 + j].
41
42 The two main phases of the optimization are the analysis
43 and transformation.
44 The driver of the optimization is matrix_reorg ().
45
46
47
48 Analysis phase:
49 ===============
50
51 We'll number the dimensions outside-in, meaning the most external
52 is 0, then 1, and so on.
53 The analysis part of the optimization determines K, the escape
54 level of a N-dimensional matrix (K <= N), that allows flattening of
55 the external dimensions 0,1,..., K-1. Escape level 0 means that the
56 whole matrix escapes and no flattening is possible.
57
58 The analysis part is implemented in analyze_matrix_allocation_site()
59 and analyze_matrix_accesses().
60
61 Transformation phase:
62 =====================
63 In this phase we define the new flattened matrices that replace the
64 original matrices in the code.
65 Implemented in transform_allocation_sites(),
66 transform_access_sites().
67
68 Matrix Transposing
69 ==================
70 The idea of Matrix Transposing is organizing the matrix in a different
71 layout such that the dimensions are reordered.
72 This could produce better cache behavior in some cases.
73
74 For example, lets look at the matrix accesses in the following loop:
75
76 for (i=0; i<N; i++)
77 for (j=0; j<M; j++)
78 access to a[i][j]
79
80 This loop can produce good cache behavior because the elements of
81 the inner dimension are accessed sequentially.
82
83 However, if the accesses of the matrix were of the following form:
84
85 for (i=0; i<N; i++)
86 for (j=0; j<M; j++)
87 access to a[j][i]
88
89 In this loop we iterate the columns and not the rows.
90 Therefore, replacing the rows and columns
91 would have had an organization with better (cache) locality.
92 Replacing the dimensions of the matrix is called matrix transposing.
93
94 This example, of course, could be enhanced to multiple dimensions matrices
95 as well.
96
97 Since a program could include all kind of accesses, there is a decision
98 mechanism, implemented in analyze_transpose(), which implements a
99 heuristic that tries to determine whether to transpose the matrix or not,
100 according to the form of the more dominant accesses.
101 This decision is transferred to the flattening mechanism, and whether
102 the matrix was transposed or not, the matrix is flattened (if possible).
103
104 This decision making is based on profiling information and loop information.
105 If profiling information is available, decision making mechanism will be
106 operated, otherwise the matrix will only be flattened (if possible).
107
108 Both optimizations are described in the paper "Matrix flattening and
109 transposing in GCC" which was presented in GCC summit 2006.
110 http://www.gccsummit.org/2006/2006-GCC-Summit-Proceedings.pdf
111
112 */
113
114 #include "config.h"
115 #include "system.h"
116 #include "coretypes.h"
117 #include "tm.h"
118 #include "tree.h"
119 #include "rtl.h"
120 #include "c-tree.h"
121 #include "tree-inline.h"
122 #include "tree-flow.h"
123 #include "tree-flow-inline.h"
124 #include "langhooks.h"
125 #include "hashtab.h"
126 #include "toplev.h"
127 #include "flags.h"
128 #include "ggc.h"
129 #include "debug.h"
130 #include "target.h"
131 #include "cgraph.h"
132 #include "diagnostic.h"
133 #include "timevar.h"
134 #include "params.h"
135 #include "fibheap.h"
136 #include "c-common.h"
137 #include "intl.h"
138 #include "function.h"
139 #include "basic-block.h"
140 #include "cfgloop.h"
141 #include "tree-iterator.h"
142 #include "tree-pass.h"
143 #include "opts.h"
144 #include "tree-data-ref.h"
145 #include "tree-chrec.h"
146 #include "tree-scalar-evolution.h"
147
148 /*
149 We need to collect a lot of data from the original malloc,
150 particularly as the gimplifier has converted:
151
152 orig_var = (struct_type *) malloc (x * sizeof (struct_type *));
153
154 into
155
156 T3 = <constant> ; ** <constant> is amount to malloc; precomputed **
157 T4 = malloc (T3);
158 T5 = (struct_type *) T4;
159 orig_var = T5;
160
161 The following struct fields allow us to collect all the necessary data from
162 the gimplified program. The comments in the struct below are all based
163 on the gimple example above. */
164
165 struct malloc_call_data
166 {
167 tree call_stmt; /* Tree for "T4 = malloc (T3);" */
168 tree size_var; /* Var decl for T3. */
169 tree malloc_size; /* Tree for "<constant>", the rhs assigned to T3. */
170 };
171
172 /* The front end of the compiler, when parsing statements of the form:
173
174 var = (type_cast) malloc (sizeof (type));
175
176 always converts this single statement into the following statements
177 (GIMPLE form):
178
179 T.1 = sizeof (type);
180 T.2 = malloc (T.1);
181 T.3 = (type_cast) T.2;
182 var = T.3;
183
184 Since we need to create new malloc statements and modify the original
185 statements somewhat, we need to find all four of the above statements.
186 Currently record_call_1 (called for building cgraph edges) finds and
187 records the statements containing the actual call to malloc, but we
188 need to find the rest of the variables/statements on our own. That
189 is what the following function does. */
190 static void
191 collect_data_for_malloc_call (tree stmt, struct malloc_call_data *m_data)
192 {
193 tree size_var = NULL;
194 tree malloc_fn_decl;
195 tree tmp;
196 tree arg1;
197
198 gcc_assert (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT);
199
200 tmp = get_call_expr_in (stmt);
201 malloc_fn_decl = CALL_EXPR_FN (tmp);
202 if (TREE_CODE (malloc_fn_decl) != ADDR_EXPR
203 || TREE_CODE (TREE_OPERAND (malloc_fn_decl, 0)) != FUNCTION_DECL
204 || DECL_FUNCTION_CODE (TREE_OPERAND (malloc_fn_decl, 0)) !=
205 BUILT_IN_MALLOC)
206 return;
207
208 arg1 = CALL_EXPR_ARG (tmp, 0);
209 size_var = arg1;
210
211 m_data->call_stmt = stmt;
212 m_data->size_var = size_var;
213 if (TREE_CODE (size_var) != VAR_DECL)
214 m_data->malloc_size = size_var;
215 else
216 m_data->malloc_size = NULL_TREE;
217 }
218
219 /* Information about matrix access site.
220 For example: if an access site of matrix arr is arr[i][j]
221 the ACCESS_SITE_INFO structure will have the address
222 of arr as its stmt. The INDEX_INFO will hold information about the
223 initial address and index of each dimension. */
224 struct access_site_info
225 {
226 /* The statement (INDIRECT_REF or POINTER_PLUS_EXPR). */
227 tree stmt;
228
229 /* In case of POINTER_PLUS_EXPR, what is the offset. */
230 tree offset;
231
232 /* The index which created the offset. */
233 tree index;
234
235 /* The indirection level of this statement. */
236 int level;
237
238 /* TRUE for allocation site FALSE for access site. */
239 bool is_alloc;
240
241 /* The function containing the access site. */
242 tree function_decl;
243
244 /* This access is iterated in the inner most loop */
245 bool iterated_by_inner_most_loop_p;
246 };
247
248 typedef struct access_site_info *access_site_info_p;
249 DEF_VEC_P (access_site_info_p);
250 DEF_VEC_ALLOC_P (access_site_info_p, heap);
251
252 /* Information about matrix to flatten. */
253 struct matrix_info
254 {
255 /* Decl tree of this matrix. */
256 tree decl;
257 /* Number of dimensions; number
258 of "*" in the type declaration. */
259 int num_dims;
260
261 /* Minimum indirection level that escapes, 0 means that
262 the whole matrix escapes, k means that dimensions
263 0 to ACTUAL_DIM - k escapes. */
264 int min_indirect_level_escape;
265
266 tree min_indirect_level_escape_stmt;
267
268 /* Is the matrix transposed. */
269 bool is_transposed_p;
270
271 /* Hold the allocation site for each level (dimension).
272 We can use NUM_DIMS as the upper bound and allocate the array
273 once with this number of elements and no need to use realloc and
274 MAX_MALLOCED_LEVEL. */
275 tree *malloc_for_level;
276
277 int max_malloced_level;
278
279 /* The location of the allocation sites (they must be in one
280 function). */
281 tree allocation_function_decl;
282
283 /* The calls to free for each level of indirection. */
284 struct free_info
285 {
286 tree stmt;
287 tree func;
288 } *free_stmts;
289
290 /* An array which holds for each dimension its size. where
291 dimension 0 is the outer most (one that contains all the others).
292 */
293 tree *dimension_size;
294
295 /* An array which holds for each dimension it's original size
296 (before transposing and flattening take place). */
297 tree *dimension_size_orig;
298
299 /* An array which holds for each dimension the size of the type of
300 of elements accessed in that level (in bytes). */
301 HOST_WIDE_INT *dimension_type_size;
302
303 int dimension_type_size_len;
304
305 /* An array collecting the count of accesses for each dimension. */
306 gcov_type *dim_hot_level;
307
308 /* An array of the accesses to be flattened.
309 elements are of type "struct access_site_info *". */
310 VEC (access_site_info_p, heap) * access_l;
311
312 /* A map of how the dimensions will be organized at the end of
313 the analyses. */
314 int *dim_map;
315 };
316
317 /* In each phi node we want to record the indirection level we have when we
318 get to the phi node. Usually we will have phi nodes with more than two
319 arguments, then we must assure that all of them get to the phi node with
320 the same indirection level, otherwise it's not safe to do the flattening.
321 So we record the information regarding the indirection level each time we
322 get to the phi node in this hash table. */
323
324 struct matrix_access_phi_node
325 {
326 tree phi;
327 int indirection_level;
328 };
329
330 /* We use this structure to find if the SSA variable is accessed inside the
331 tree and record the tree containing it. */
332
333 struct ssa_acc_in_tree
334 {
335 /* The variable whose accesses in the tree we are looking for. */
336 tree ssa_var;
337 /* The tree and code inside it the ssa_var is accessed, currently
338 it could be an INDIRECT_REF or CALL_EXPR. */
339 enum tree_code t_code;
340 tree t_tree;
341 /* The place in the containing tree. */
342 tree *tp;
343 tree second_op;
344 bool var_found;
345 };
346
347 static void analyze_matrix_accesses (struct matrix_info *, tree, int, bool,
348 sbitmap, bool);
349 static int transform_allocation_sites (void **, void *);
350 static int transform_access_sites (void **, void *);
351 static int analyze_transpose (void **, void *);
352 static int dump_matrix_reorg_analysis (void **, void *);
353
354 static bool check_transpose_p;
355
356 /* Hash function used for the phi nodes. */
357
358 static hashval_t
359 mat_acc_phi_hash (const void *p)
360 {
361 const struct matrix_access_phi_node *ma_phi = p;
362
363 return htab_hash_pointer (ma_phi->phi);
364 }
365
366 /* Equality means phi node pointers are the same. */
367
368 static int
369 mat_acc_phi_eq (const void *p1, const void *p2)
370 {
371 const struct matrix_access_phi_node *phi1 = p1;
372 const struct matrix_access_phi_node *phi2 = p2;
373
374 if (phi1->phi == phi2->phi)
375 return 1;
376
377 return 0;
378 }
379
380 /* Hold the PHI nodes we visit during the traversal for escaping
381 analysis. */
382 static htab_t htab_mat_acc_phi_nodes = NULL;
383
384 /* This hash-table holds the information about the matrices we are
385 going to handle. */
386 static htab_t matrices_to_reorg = NULL;
387
388 /* Return a hash for MTT, which is really a "matrix_info *". */
389 static hashval_t
390 mtt_info_hash (const void *mtt)
391 {
392 return htab_hash_pointer (((const struct matrix_info *) mtt)->decl);
393 }
394
395 /* Return true if MTT1 and MTT2 (which are really both of type
396 "matrix_info *") refer to the same decl. */
397 static int
398 mtt_info_eq (const void *mtt1, const void *mtt2)
399 {
400 const struct matrix_info *i1 = mtt1;
401 const struct matrix_info *i2 = mtt2;
402
403 if (i1->decl == i2->decl)
404 return true;
405
406 return false;
407 }
408
409 /* Return the inner most tree that is not a cast. */
410 static tree
411 get_inner_of_cast_expr (tree t)
412 {
413 while (CONVERT_EXPR_P (t)
414 || TREE_CODE (t) == VIEW_CONVERT_EXPR)
415 t = TREE_OPERAND (t, 0);
416
417 return t;
418 }
419
420 /* Return false if STMT may contain a vector expression.
421 In this situation, all matrices should not be flattened. */
422 static bool
423 may_flatten_matrices_1 (tree stmt)
424 {
425 tree t;
426
427 switch (TREE_CODE (stmt))
428 {
429 case GIMPLE_MODIFY_STMT:
430 t = GIMPLE_STMT_OPERAND (stmt, 1);
431 while (CONVERT_EXPR_P (t))
432 {
433 if (TREE_TYPE (t) && POINTER_TYPE_P (TREE_TYPE (t)))
434 {
435 tree pointee;
436
437 pointee = TREE_TYPE (t);
438 while (POINTER_TYPE_P (pointee))
439 pointee = TREE_TYPE (pointee);
440 if (TREE_CODE (pointee) == VECTOR_TYPE)
441 {
442 if (dump_file)
443 fprintf (dump_file,
444 "Found vector type, don't flatten matrix\n");
445 return false;
446 }
447 }
448 t = TREE_OPERAND (t, 0);
449 }
450 break;
451 case ASM_EXPR:
452 /* Asm code could contain vector operations. */
453 return false;
454 break;
455 default:
456 break;
457 }
458 return true;
459 }
460
461 /* Return false if there are hand-written vectors in the program.
462 We disable the flattening in such a case. */
463 static bool
464 may_flatten_matrices (struct cgraph_node *node)
465 {
466 tree decl;
467 struct function *func;
468 basic_block bb;
469 block_stmt_iterator bsi;
470
471 decl = node->decl;
472 if (node->analyzed)
473 {
474 func = DECL_STRUCT_FUNCTION (decl);
475 FOR_EACH_BB_FN (bb, func)
476 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
477 if (!may_flatten_matrices_1 (bsi_stmt (bsi)))
478 return false;
479 }
480 return true;
481 }
482
483 /* Given a VAR_DECL, check its type to determine whether it is
484 a definition of a dynamic allocated matrix and therefore is
485 a suitable candidate for the matrix flattening optimization.
486 Return NULL if VAR_DECL is not such decl. Otherwise, allocate
487 a MATRIX_INFO structure, fill it with the relevant information
488 and return a pointer to it.
489 TODO: handle also statically defined arrays. */
490 static struct matrix_info *
491 analyze_matrix_decl (tree var_decl)
492 {
493 struct matrix_info *m_node, tmpmi, *mi;
494 tree var_type;
495 int dim_num = 0;
496
497 gcc_assert (matrices_to_reorg);
498
499 if (TREE_CODE (var_decl) == PARM_DECL)
500 var_type = DECL_ARG_TYPE (var_decl);
501 else if (TREE_CODE (var_decl) == VAR_DECL)
502 var_type = TREE_TYPE (var_decl);
503 else
504 return NULL;
505
506 if (!POINTER_TYPE_P (var_type))
507 return NULL;
508
509 while (POINTER_TYPE_P (var_type))
510 {
511 var_type = TREE_TYPE (var_type);
512 dim_num++;
513 }
514
515 if (dim_num <= 1)
516 return NULL;
517
518 if (!COMPLETE_TYPE_P (var_type)
519 || TREE_CODE (TYPE_SIZE_UNIT (var_type)) != INTEGER_CST)
520 return NULL;
521
522 /* Check to see if this pointer is already in there. */
523 tmpmi.decl = var_decl;
524 mi = htab_find (matrices_to_reorg, &tmpmi);
525
526 if (mi)
527 return NULL;
528
529 /* Record the matrix. */
530
531 m_node = (struct matrix_info *) xcalloc (1, sizeof (struct matrix_info));
532 m_node->decl = var_decl;
533 m_node->num_dims = dim_num;
534 m_node->free_stmts
535 = (struct free_info *) xcalloc (dim_num, sizeof (struct free_info));
536
537 /* Init min_indirect_level_escape to -1 to indicate that no escape
538 analysis has been done yet. */
539 m_node->min_indirect_level_escape = -1;
540 m_node->is_transposed_p = false;
541
542 return m_node;
543 }
544
545 /* Free matrix E. */
546 static void
547 mat_free (void *e)
548 {
549 struct matrix_info *mat = (struct matrix_info *) e;
550
551 if (!mat)
552 return;
553
554 if (mat->free_stmts)
555 free (mat->free_stmts);
556 if (mat->dim_hot_level)
557 free (mat->dim_hot_level);
558 if (mat->malloc_for_level)
559 free (mat->malloc_for_level);
560 }
561
562 /* Find all potential matrices.
563 TODO: currently we handle only multidimensional
564 dynamically allocated arrays. */
565 static void
566 find_matrices_decl (void)
567 {
568 struct matrix_info *tmp;
569 PTR *slot;
570 struct varpool_node *vnode;
571
572 gcc_assert (matrices_to_reorg);
573
574 /* For every global variable in the program:
575 Check to see if it's of a candidate type and record it. */
576 for (vnode = varpool_nodes_queue; vnode; vnode = vnode->next_needed)
577 {
578 tree var_decl = vnode->decl;
579
580 if (!var_decl || TREE_CODE (var_decl) != VAR_DECL)
581 continue;
582
583 if (matrices_to_reorg)
584 if ((tmp = analyze_matrix_decl (var_decl)))
585 {
586 if (!TREE_ADDRESSABLE (var_decl))
587 {
588 slot = htab_find_slot (matrices_to_reorg, tmp, INSERT);
589 *slot = tmp;
590 }
591 }
592 }
593 return;
594 }
595
596 /* Mark that the matrix MI escapes at level L. */
597 static void
598 mark_min_matrix_escape_level (struct matrix_info *mi, int l, tree s)
599 {
600 if (mi->min_indirect_level_escape == -1
601 || (mi->min_indirect_level_escape > l))
602 {
603 mi->min_indirect_level_escape = l;
604 mi->min_indirect_level_escape_stmt = s;
605 }
606 }
607
608 /* Find if the SSA variable is accessed inside the
609 tree and record the tree containing it.
610 The only relevant uses are the case of SSA_NAME, or SSA inside
611 INDIRECT_REF, CALL_EXPR, PLUS_EXPR, POINTER_PLUS_EXPR, MULT_EXPR. */
612 static void
613 ssa_accessed_in_tree (tree t, struct ssa_acc_in_tree *a)
614 {
615 tree call, decl;
616 tree arg;
617 call_expr_arg_iterator iter;
618
619 a->t_code = TREE_CODE (t);
620 switch (a->t_code)
621 {
622 tree op1, op2;
623
624 case SSA_NAME:
625 if (t == a->ssa_var)
626 a->var_found = true;
627 break;
628 case INDIRECT_REF:
629 if (SSA_VAR_P (TREE_OPERAND (t, 0))
630 && TREE_OPERAND (t, 0) == a->ssa_var)
631 a->var_found = true;
632 break;
633 case CALL_EXPR:
634 FOR_EACH_CALL_EXPR_ARG (arg, iter, t)
635 {
636 if (arg == a->ssa_var)
637 {
638 a->var_found = true;
639 call = get_call_expr_in (t);
640 if (call && (decl = get_callee_fndecl (call)))
641 a->t_tree = decl;
642 break;
643 }
644 }
645 break;
646 case POINTER_PLUS_EXPR:
647 case PLUS_EXPR:
648 case MULT_EXPR:
649 op1 = TREE_OPERAND (t, 0);
650 op2 = TREE_OPERAND (t, 1);
651
652 if (op1 == a->ssa_var)
653 {
654 a->var_found = true;
655 a->second_op = op2;
656 }
657 else if (op2 == a->ssa_var)
658 {
659 a->var_found = true;
660 a->second_op = op1;
661 }
662 break;
663 default:
664 break;
665 }
666 }
667
668 /* Record the access/allocation site information for matrix MI so we can
669 handle it later in transformation. */
670 static void
671 record_access_alloc_site_info (struct matrix_info *mi, tree stmt, tree offset,
672 tree index, int level, bool is_alloc)
673 {
674 struct access_site_info *acc_info;
675
676 if (!mi->access_l)
677 mi->access_l = VEC_alloc (access_site_info_p, heap, 100);
678
679 acc_info
680 = (struct access_site_info *)
681 xcalloc (1, sizeof (struct access_site_info));
682 acc_info->stmt = stmt;
683 acc_info->offset = offset;
684 acc_info->index = index;
685 acc_info->function_decl = current_function_decl;
686 acc_info->level = level;
687 acc_info->is_alloc = is_alloc;
688
689 VEC_safe_push (access_site_info_p, heap, mi->access_l, acc_info);
690
691 }
692
693 /* Record the malloc as the allocation site of the given LEVEL. But
694 first we Make sure that all the size parameters passed to malloc in
695 all the allocation sites could be pre-calculated before the call to
696 the malloc of level 0 (the main malloc call). */
697 static void
698 add_allocation_site (struct matrix_info *mi, tree stmt, int level)
699 {
700 struct malloc_call_data mcd;
701
702 /* Make sure that the allocation sites are in the same function. */
703 if (!mi->allocation_function_decl)
704 mi->allocation_function_decl = current_function_decl;
705 else if (mi->allocation_function_decl != current_function_decl)
706 {
707 int min_malloc_level;
708
709 gcc_assert (mi->malloc_for_level);
710
711 /* Find the minimum malloc level that already has been seen;
712 we known its allocation function must be
713 MI->allocation_function_decl since it's different than
714 CURRENT_FUNCTION_DECL then the escaping level should be
715 MIN (LEVEL, MIN_MALLOC_LEVEL) - 1 , and the allocation function
716 must be set accordingly. */
717 for (min_malloc_level = 0;
718 min_malloc_level < mi->max_malloced_level
719 && mi->malloc_for_level[min_malloc_level]; min_malloc_level++);
720 if (level < min_malloc_level)
721 {
722 mi->allocation_function_decl = current_function_decl;
723 mark_min_matrix_escape_level (mi, min_malloc_level, stmt);
724 }
725 else
726 {
727 mark_min_matrix_escape_level (mi, level, stmt);
728 /* cannot be that (level == min_malloc_level)
729 we would have returned earlier. */
730 return;
731 }
732 }
733
734 /* Find the correct malloc information. */
735 collect_data_for_malloc_call (stmt, &mcd);
736
737 /* We accept only calls to malloc function; we do not accept
738 calls like calloc and realloc. */
739 if (!mi->malloc_for_level)
740 {
741 mi->malloc_for_level = xcalloc (level + 1, sizeof (tree));
742 mi->max_malloced_level = level + 1;
743 }
744 else if (mi->max_malloced_level <= level)
745 {
746 mi->malloc_for_level
747 = xrealloc (mi->malloc_for_level, (level + 1) * sizeof (tree));
748
749 /* Zero the newly allocated items. */
750 memset (&(mi->malloc_for_level[mi->max_malloced_level + 1]),
751 0, (level - mi->max_malloced_level) * sizeof (tree));
752
753 mi->max_malloced_level = level + 1;
754 }
755 mi->malloc_for_level[level] = stmt;
756 }
757
758 /* Given an assignment statement STMT that we know that its
759 left-hand-side is the matrix MI variable, we traverse the immediate
760 uses backwards until we get to a malloc site. We make sure that
761 there is one and only one malloc site that sets this variable. When
762 we are performing the flattening we generate a new variable that
763 will hold the size for each dimension; each malloc that allocates a
764 dimension has the size parameter; we use that parameter to
765 initialize the dimension size variable so we can use it later in
766 the address calculations. LEVEL is the dimension we're inspecting.
767 Return if STMT is related to an allocation site. */
768
769 static void
770 analyze_matrix_allocation_site (struct matrix_info *mi, tree stmt,
771 int level, sbitmap visited)
772 {
773 if (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT)
774 {
775 tree rhs = GIMPLE_STMT_OPERAND (stmt, 1);
776
777 rhs = get_inner_of_cast_expr (rhs);
778 if (TREE_CODE (rhs) == SSA_NAME)
779 {
780 tree def = SSA_NAME_DEF_STMT (rhs);
781
782 analyze_matrix_allocation_site (mi, def, level, visited);
783 return;
784 }
785
786 /* A result of call to malloc. */
787 else if (TREE_CODE (rhs) == CALL_EXPR)
788 {
789 int call_flags = call_expr_flags (rhs);
790
791 if (!(call_flags & ECF_MALLOC))
792 {
793 mark_min_matrix_escape_level (mi, level, stmt);
794 return;
795 }
796 else
797 {
798 tree malloc_fn_decl;
799 const char *malloc_fname;
800
801 malloc_fn_decl = CALL_EXPR_FN (rhs);
802 if (TREE_CODE (malloc_fn_decl) != ADDR_EXPR
803 || TREE_CODE (TREE_OPERAND (malloc_fn_decl, 0)) !=
804 FUNCTION_DECL)
805 {
806 mark_min_matrix_escape_level (mi, level, stmt);
807 return;
808 }
809 malloc_fn_decl = TREE_OPERAND (malloc_fn_decl, 0);
810 malloc_fname = IDENTIFIER_POINTER (DECL_NAME (malloc_fn_decl));
811 if (DECL_FUNCTION_CODE (malloc_fn_decl) != BUILT_IN_MALLOC)
812 {
813 if (dump_file)
814 fprintf (dump_file,
815 "Matrix %s is an argument to function %s\n",
816 get_name (mi->decl), get_name (malloc_fn_decl));
817 mark_min_matrix_escape_level (mi, level, stmt);
818 return;
819 }
820 }
821 /* This is a call to malloc of level 'level'.
822 mi->max_malloced_level-1 == level means that we've
823 seen a malloc statement of level 'level' before.
824 If the statement is not the same one that we've
825 seen before, then there's another malloc statement
826 for the same level, which means that we need to mark
827 it escaping. */
828 if (mi->malloc_for_level
829 && mi->max_malloced_level-1 == level
830 && mi->malloc_for_level[level] != stmt)
831 {
832 mark_min_matrix_escape_level (mi, level, stmt);
833 return;
834 }
835 else
836 add_allocation_site (mi, stmt, level);
837 return;
838 }
839 /* If we are back to the original matrix variable then we
840 are sure that this is analyzed as an access site. */
841 else if (rhs == mi->decl)
842 return;
843 }
844 /* Looks like we don't know what is happening in this
845 statement so be in the safe side and mark it as escaping. */
846 mark_min_matrix_escape_level (mi, level, stmt);
847 }
848
849 /* The transposing decision making.
850 In order to to calculate the profitability of transposing, we collect two
851 types of information regarding the accesses:
852 1. profiling information used to express the hotness of an access, that
853 is how often the matrix is accessed by this access site (count of the
854 access site).
855 2. which dimension in the access site is iterated by the inner
856 most loop containing this access.
857
858 The matrix will have a calculated value of weighted hotness for each
859 dimension.
860 Intuitively the hotness level of a dimension is a function of how
861 many times it was the most frequently accessed dimension in the
862 highly executed access sites of this matrix.
863
864 As computed by following equation:
865 m n
866 __ __
867 \ \ dim_hot_level[i] +=
868 /_ /_
869 j i
870 acc[j]->dim[i]->iter_by_inner_loop * count(j)
871
872 Where n is the number of dims and m is the number of the matrix
873 access sites. acc[j]->dim[i]->iter_by_inner_loop is 1 if acc[j]
874 iterates over dim[i] in innermost loop, and is 0 otherwise.
875
876 The organization of the new matrix should be according to the
877 hotness of each dimension. The hotness of the dimension implies
878 the locality of the elements.*/
879 static int
880 analyze_transpose (void **slot, void *data ATTRIBUTE_UNUSED)
881 {
882 struct matrix_info *mi = *slot;
883 int min_escape_l = mi->min_indirect_level_escape;
884 struct loop *loop;
885 affine_iv iv;
886 struct access_site_info *acc_info;
887 int i;
888
889 if (min_escape_l < 2 || !mi->access_l)
890 {
891 if (mi->access_l)
892 {
893 for (i = 0;
894 VEC_iterate (access_site_info_p, mi->access_l, i, acc_info);
895 i++)
896 free (acc_info);
897 VEC_free (access_site_info_p, heap, mi->access_l);
898
899 }
900 return 1;
901 }
902 if (!mi->dim_hot_level)
903 mi->dim_hot_level =
904 (gcov_type *) xcalloc (min_escape_l, sizeof (gcov_type));
905
906
907 for (i = 0; VEC_iterate (access_site_info_p, mi->access_l, i, acc_info);
908 i++)
909 {
910 if (TREE_CODE (GIMPLE_STMT_OPERAND (acc_info->stmt, 1)) == POINTER_PLUS_EXPR
911 && acc_info->level < min_escape_l)
912 {
913 loop = loop_containing_stmt (acc_info->stmt);
914 if (!loop || loop->inner)
915 {
916 free (acc_info);
917 continue;
918 }
919 if (simple_iv (loop, acc_info->stmt, acc_info->offset, &iv, true))
920 {
921 if (iv.step != NULL)
922 {
923 HOST_WIDE_INT istep;
924
925 istep = int_cst_value (iv.step);
926 if (istep != 0)
927 {
928 acc_info->iterated_by_inner_most_loop_p = 1;
929 mi->dim_hot_level[acc_info->level] +=
930 bb_for_stmt (acc_info->stmt)->count;
931 }
932
933 }
934 }
935 }
936 free (acc_info);
937 }
938 VEC_free (access_site_info_p, heap, mi->access_l);
939
940 return 1;
941 }
942
943 /* Find the index which defines the OFFSET from base.
944 We walk from use to def until we find how the offset was defined. */
945 static tree
946 get_index_from_offset (tree offset, tree def_stmt)
947 {
948 tree op1, op2, expr, index;
949
950 if (TREE_CODE (def_stmt) == PHI_NODE)
951 return NULL;
952 expr = get_inner_of_cast_expr (GIMPLE_STMT_OPERAND (def_stmt, 1));
953 if (TREE_CODE (expr) == SSA_NAME)
954 return get_index_from_offset (offset, SSA_NAME_DEF_STMT (expr));
955 else if (TREE_CODE (expr) == MULT_EXPR)
956 {
957 op1 = TREE_OPERAND (expr, 0);
958 op2 = TREE_OPERAND (expr, 1);
959 if (TREE_CODE (op1) != INTEGER_CST && TREE_CODE (op2) != INTEGER_CST)
960 return NULL;
961 index = (TREE_CODE (op1) == INTEGER_CST) ? op2 : op1;
962 return index;
963 }
964 else
965 return NULL_TREE;
966 }
967
968 /* update MI->dimension_type_size[CURRENT_INDIRECT_LEVEL] with the size
969 of the type related to the SSA_VAR, or the type related to the
970 lhs of STMT, in the case that it is an INDIRECT_REF. */
971 static void
972 update_type_size (struct matrix_info *mi, tree stmt, tree ssa_var,
973 int current_indirect_level)
974 {
975 tree lhs;
976 HOST_WIDE_INT type_size;
977
978 /* Update type according to the type of the INDIRECT_REF expr. */
979 if (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT
980 && TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) == INDIRECT_REF)
981 {
982 lhs = GIMPLE_STMT_OPERAND (stmt, 0);
983 gcc_assert (POINTER_TYPE_P
984 (TREE_TYPE (SSA_NAME_VAR (TREE_OPERAND (lhs, 0)))));
985 type_size =
986 int_size_in_bytes (TREE_TYPE
987 (TREE_TYPE
988 (SSA_NAME_VAR (TREE_OPERAND (lhs, 0)))));
989 }
990 else
991 type_size = int_size_in_bytes (TREE_TYPE (ssa_var));
992
993 /* Record the size of elements accessed (as a whole)
994 in the current indirection level (dimension). If the size of
995 elements is not known at compile time, mark it as escaping. */
996 if (type_size <= 0)
997 mark_min_matrix_escape_level (mi, current_indirect_level, stmt);
998 else
999 {
1000 int l = current_indirect_level;
1001
1002 if (!mi->dimension_type_size)
1003 {
1004 mi->dimension_type_size
1005 = (HOST_WIDE_INT *) xcalloc (l + 1, sizeof (HOST_WIDE_INT));
1006 mi->dimension_type_size_len = l + 1;
1007 }
1008 else if (mi->dimension_type_size_len < l + 1)
1009 {
1010 mi->dimension_type_size
1011 = (HOST_WIDE_INT *) xrealloc (mi->dimension_type_size,
1012 (l + 1) * sizeof (HOST_WIDE_INT));
1013 memset (&mi->dimension_type_size[mi->dimension_type_size_len],
1014 0, (l + 1 - mi->dimension_type_size_len)
1015 * sizeof (HOST_WIDE_INT));
1016 mi->dimension_type_size_len = l + 1;
1017 }
1018 /* Make sure all the accesses in the same level have the same size
1019 of the type. */
1020 if (!mi->dimension_type_size[l])
1021 mi->dimension_type_size[l] = type_size;
1022 else if (mi->dimension_type_size[l] != type_size)
1023 mark_min_matrix_escape_level (mi, l, stmt);
1024 }
1025 }
1026
1027 /* USE_STMT represents a call_expr ,where one of the arguments is the
1028 ssa var that we want to check because it came from some use of matrix
1029 MI. CURRENT_INDIRECT_LEVEL is the indirection level we reached so
1030 far. */
1031
1032 static void
1033 analyze_accesses_for_call_expr (struct matrix_info *mi, tree use_stmt,
1034 int current_indirect_level)
1035 {
1036 tree call = get_call_expr_in (use_stmt);
1037 if (call && get_callee_fndecl (call))
1038 {
1039 if (DECL_FUNCTION_CODE (get_callee_fndecl (call)) != BUILT_IN_FREE)
1040 {
1041 if (dump_file)
1042 fprintf (dump_file,
1043 "Matrix %s: Function call %s, level %d escapes.\n",
1044 get_name (mi->decl), get_name (get_callee_fndecl (call)),
1045 current_indirect_level);
1046 mark_min_matrix_escape_level (mi, current_indirect_level, use_stmt);
1047 }
1048 else if (mi->free_stmts[current_indirect_level].stmt != NULL
1049 && mi->free_stmts[current_indirect_level].stmt != use_stmt)
1050 mark_min_matrix_escape_level (mi, current_indirect_level, use_stmt);
1051 else
1052 {
1053 /*Record the free statements so we can delete them
1054 later. */
1055 int l = current_indirect_level;
1056
1057 mi->free_stmts[l].stmt = use_stmt;
1058 mi->free_stmts[l].func = current_function_decl;
1059 }
1060 }
1061 }
1062
1063 /* USE_STMT represents a phi node of the ssa var that we want to
1064 check because it came from some use of matrix
1065 MI.
1066 We check all the escaping levels that get to the PHI node
1067 and make sure they are all the same escaping;
1068 if not (which is rare) we let the escaping level be the
1069 minimum level that gets into that PHI because starting from
1070 that level we cannot expect the behavior of the indirections.
1071 CURRENT_INDIRECT_LEVEL is the indirection level we reached so far. */
1072
1073 static void
1074 analyze_accesses_for_phi_node (struct matrix_info *mi, tree use_stmt,
1075 int current_indirect_level, sbitmap visited,
1076 bool record_accesses)
1077 {
1078
1079 struct matrix_access_phi_node tmp_maphi, *maphi, **pmaphi;
1080
1081 tmp_maphi.phi = use_stmt;
1082 if ((maphi = htab_find (htab_mat_acc_phi_nodes, &tmp_maphi)))
1083 {
1084 if (maphi->indirection_level == current_indirect_level)
1085 return;
1086 else
1087 {
1088 int level = MIN (maphi->indirection_level,
1089 current_indirect_level);
1090 int j;
1091 tree t = NULL_TREE;
1092
1093 maphi->indirection_level = level;
1094 for (j = 0; j < PHI_NUM_ARGS (use_stmt); j++)
1095 {
1096 tree def = PHI_ARG_DEF (use_stmt, j);
1097
1098 if (TREE_CODE (SSA_NAME_DEF_STMT (def)) != PHI_NODE)
1099 t = SSA_NAME_DEF_STMT (def);
1100 }
1101 mark_min_matrix_escape_level (mi, level, t);
1102 }
1103 return;
1104 }
1105 maphi = (struct matrix_access_phi_node *)
1106 xcalloc (1, sizeof (struct matrix_access_phi_node));
1107 maphi->phi = use_stmt;
1108 maphi->indirection_level = current_indirect_level;
1109
1110 /* Insert to hash table. */
1111 pmaphi = (struct matrix_access_phi_node **)
1112 htab_find_slot (htab_mat_acc_phi_nodes, maphi, INSERT);
1113 gcc_assert (pmaphi);
1114 *pmaphi = maphi;
1115
1116 if (!TEST_BIT (visited, SSA_NAME_VERSION (PHI_RESULT (use_stmt))))
1117 {
1118 SET_BIT (visited, SSA_NAME_VERSION (PHI_RESULT (use_stmt)));
1119 analyze_matrix_accesses (mi, PHI_RESULT (use_stmt),
1120 current_indirect_level, false, visited,
1121 record_accesses);
1122 RESET_BIT (visited, SSA_NAME_VERSION (PHI_RESULT (use_stmt)));
1123 }
1124 }
1125
1126 /* USE_STMT represents a modify statement (the rhs or lhs include
1127 the ssa var that we want to check because it came from some use of matrix
1128 MI.
1129 CURRENT_INDIRECT_LEVEL is the indirection level we reached so far. */
1130
1131 static int
1132 analyze_accesses_for_modify_stmt (struct matrix_info *mi, tree ssa_var,
1133 tree use_stmt, int current_indirect_level,
1134 bool last_op, sbitmap visited,
1135 bool record_accesses)
1136 {
1137
1138 tree lhs = GIMPLE_STMT_OPERAND (use_stmt, 0);
1139 tree rhs = GIMPLE_STMT_OPERAND (use_stmt, 1);
1140 struct ssa_acc_in_tree lhs_acc, rhs_acc;
1141
1142 memset (&lhs_acc, 0, sizeof (lhs_acc));
1143 memset (&rhs_acc, 0, sizeof (rhs_acc));
1144
1145 lhs_acc.ssa_var = ssa_var;
1146 lhs_acc.t_code = ERROR_MARK;
1147 ssa_accessed_in_tree (lhs, &lhs_acc);
1148 rhs_acc.ssa_var = ssa_var;
1149 rhs_acc.t_code = ERROR_MARK;
1150 ssa_accessed_in_tree (get_inner_of_cast_expr (rhs), &rhs_acc);
1151
1152 /* The SSA must be either in the left side or in the right side,
1153 to understand what is happening.
1154 In case the SSA_NAME is found in both sides we should be escaping
1155 at this level because in this case we cannot calculate the
1156 address correctly. */
1157 if ((lhs_acc.var_found && rhs_acc.var_found
1158 && lhs_acc.t_code == INDIRECT_REF)
1159 || (!rhs_acc.var_found && !lhs_acc.var_found))
1160 {
1161 mark_min_matrix_escape_level (mi, current_indirect_level, use_stmt);
1162 return current_indirect_level;
1163 }
1164 gcc_assert (!rhs_acc.var_found || !lhs_acc.var_found);
1165
1166 /* If we are storing to the matrix at some level, then mark it as
1167 escaping at that level. */
1168 if (lhs_acc.var_found)
1169 {
1170 tree def;
1171 int l = current_indirect_level + 1;
1172
1173 gcc_assert (lhs_acc.t_code == INDIRECT_REF);
1174 def = get_inner_of_cast_expr (rhs);
1175 if (TREE_CODE (def) != SSA_NAME)
1176 mark_min_matrix_escape_level (mi, l, use_stmt);
1177 else
1178 {
1179 def = SSA_NAME_DEF_STMT (def);
1180 analyze_matrix_allocation_site (mi, def, l, visited);
1181 if (record_accesses)
1182 record_access_alloc_site_info (mi, use_stmt, NULL_TREE,
1183 NULL_TREE, l, true);
1184 update_type_size (mi, use_stmt, NULL, l);
1185 }
1186 return current_indirect_level;
1187 }
1188 /* Now, check the right-hand-side, to see how the SSA variable
1189 is used. */
1190 if (rhs_acc.var_found)
1191 {
1192 /* If we are passing the ssa name to a function call and
1193 the pointer escapes when passed to the function
1194 (not the case of free), then we mark the matrix as
1195 escaping at this level. */
1196 if (rhs_acc.t_code == CALL_EXPR)
1197 {
1198 analyze_accesses_for_call_expr (mi, use_stmt,
1199 current_indirect_level);
1200
1201 return current_indirect_level;
1202 }
1203 if (rhs_acc.t_code != INDIRECT_REF
1204 && rhs_acc.t_code != POINTER_PLUS_EXPR && rhs_acc.t_code != SSA_NAME)
1205 {
1206 mark_min_matrix_escape_level (mi, current_indirect_level, use_stmt);
1207 return current_indirect_level;
1208 }
1209 /* If the access in the RHS has an indirection increase the
1210 indirection level. */
1211 if (rhs_acc.t_code == INDIRECT_REF)
1212 {
1213 if (record_accesses)
1214 record_access_alloc_site_info (mi, use_stmt, NULL_TREE,
1215 NULL_TREE,
1216 current_indirect_level, true);
1217 current_indirect_level += 1;
1218 }
1219 else if (rhs_acc.t_code == POINTER_PLUS_EXPR)
1220 {
1221 gcc_assert (rhs_acc.second_op);
1222 if (last_op)
1223 /* Currently we support only one PLUS expression on the
1224 SSA_NAME that holds the base address of the current
1225 indirection level; to support more general case there
1226 is a need to hold a stack of expressions and regenerate
1227 the calculation later. */
1228 mark_min_matrix_escape_level (mi, current_indirect_level,
1229 use_stmt);
1230 else
1231 {
1232 tree index;
1233 tree op1, op2;
1234
1235 op1 = TREE_OPERAND (rhs, 0);
1236 op2 = TREE_OPERAND (rhs, 1);
1237
1238 op2 = (op1 == ssa_var) ? op2 : op1;
1239 if (TREE_CODE (op2) == INTEGER_CST)
1240 index =
1241 build_int_cst (TREE_TYPE (op1),
1242 TREE_INT_CST_LOW (op2) /
1243 int_size_in_bytes (TREE_TYPE (op1)));
1244 else
1245 {
1246 index =
1247 get_index_from_offset (op2, SSA_NAME_DEF_STMT (op2));
1248 if (index == NULL_TREE)
1249 {
1250 mark_min_matrix_escape_level (mi,
1251 current_indirect_level,
1252 use_stmt);
1253 return current_indirect_level;
1254 }
1255 }
1256 if (record_accesses)
1257 record_access_alloc_site_info (mi, use_stmt, op2,
1258 index,
1259 current_indirect_level, false);
1260 }
1261 }
1262 /* If we are storing this level of indirection mark it as
1263 escaping. */
1264 if (lhs_acc.t_code == INDIRECT_REF || TREE_CODE (lhs) != SSA_NAME)
1265 {
1266 int l = current_indirect_level;
1267
1268 /* One exception is when we are storing to the matrix
1269 variable itself; this is the case of malloc, we must make
1270 sure that it's the one and only one call to malloc so
1271 we call analyze_matrix_allocation_site to check
1272 this out. */
1273 if (TREE_CODE (lhs) != VAR_DECL || lhs != mi->decl)
1274 mark_min_matrix_escape_level (mi, current_indirect_level,
1275 use_stmt);
1276 else
1277 {
1278 /* Also update the escaping level. */
1279 analyze_matrix_allocation_site (mi, use_stmt, l, visited);
1280 if (record_accesses)
1281 record_access_alloc_site_info (mi, use_stmt, NULL_TREE,
1282 NULL_TREE, l, true);
1283 }
1284 }
1285 else
1286 {
1287 /* We are placing it in an SSA, follow that SSA. */
1288 analyze_matrix_accesses (mi, lhs,
1289 current_indirect_level,
1290 rhs_acc.t_code == POINTER_PLUS_EXPR,
1291 visited, record_accesses);
1292 }
1293 }
1294 return current_indirect_level;
1295 }
1296
1297 /* Given a SSA_VAR (coming from a use statement of the matrix MI),
1298 follow its uses and level of indirection and find out the minimum
1299 indirection level it escapes in (the highest dimension) and the maximum
1300 level it is accessed in (this will be the actual dimension of the
1301 matrix). The information is accumulated in MI.
1302 We look at the immediate uses, if one escapes we finish; if not,
1303 we make a recursive call for each one of the immediate uses of the
1304 resulting SSA name. */
1305 static void
1306 analyze_matrix_accesses (struct matrix_info *mi, tree ssa_var,
1307 int current_indirect_level, bool last_op,
1308 sbitmap visited, bool record_accesses)
1309 {
1310 imm_use_iterator imm_iter;
1311 use_operand_p use_p;
1312
1313 update_type_size (mi, SSA_NAME_DEF_STMT (ssa_var), ssa_var,
1314 current_indirect_level);
1315
1316 /* We don't go beyond the escaping level when we are performing the
1317 flattening. NOTE: we keep the last indirection level that doesn't
1318 escape. */
1319 if (mi->min_indirect_level_escape > -1
1320 && mi->min_indirect_level_escape <= current_indirect_level)
1321 return;
1322
1323 /* Now go over the uses of the SSA_NAME and check how it is used in
1324 each one of them. We are mainly looking for the pattern INDIRECT_REF,
1325 then a POINTER_PLUS_EXPR, then INDIRECT_REF etc. while in between there could
1326 be any number of copies and casts. */
1327 gcc_assert (TREE_CODE (ssa_var) == SSA_NAME);
1328
1329 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, ssa_var)
1330 {
1331 tree use_stmt = USE_STMT (use_p);
1332 if (TREE_CODE (use_stmt) == PHI_NODE)
1333 /* We check all the escaping levels that get to the PHI node
1334 and make sure they are all the same escaping;
1335 if not (which is rare) we let the escaping level be the
1336 minimum level that gets into that PHI because starting from
1337 that level we cannot expect the behavior of the indirections. */
1338
1339 analyze_accesses_for_phi_node (mi, use_stmt, current_indirect_level,
1340 visited, record_accesses);
1341
1342 else if (TREE_CODE (use_stmt) == CALL_EXPR)
1343 analyze_accesses_for_call_expr (mi, use_stmt, current_indirect_level);
1344 else if (TREE_CODE (use_stmt) == GIMPLE_MODIFY_STMT)
1345 current_indirect_level =
1346 analyze_accesses_for_modify_stmt (mi, ssa_var, use_stmt,
1347 current_indirect_level, last_op,
1348 visited, record_accesses);
1349 }
1350 }
1351
1352
1353 /* A walk_tree function to go over the VAR_DECL, PARM_DECL nodes of
1354 the malloc size expression and check that those aren't changed
1355 over the function. */
1356 static tree
1357 check_var_notmodified_p (tree * tp, int *walk_subtrees, void *data)
1358 {
1359 basic_block bb;
1360 tree t = *tp;
1361 tree fn = data;
1362 block_stmt_iterator bsi;
1363 tree stmt;
1364
1365 if (TREE_CODE (t) != VAR_DECL && TREE_CODE (t) != PARM_DECL)
1366 return NULL_TREE;
1367
1368 FOR_EACH_BB_FN (bb, DECL_STRUCT_FUNCTION (fn))
1369 {
1370 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1371 {
1372 stmt = bsi_stmt (bsi);
1373 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
1374 continue;
1375 if (GIMPLE_STMT_OPERAND (stmt, 0) == t)
1376 return stmt;
1377 }
1378 }
1379 *walk_subtrees = 1;
1380 return NULL_TREE;
1381 }
1382
1383 /* Go backwards in the use-def chains and find out the expression
1384 represented by the possible SSA name in EXPR, until it is composed
1385 of only VAR_DECL, PARM_DECL and INT_CST. In case of phi nodes
1386 we make sure that all the arguments represent the same subexpression,
1387 otherwise we fail. */
1388 static tree
1389 can_calculate_expr_before_stmt (tree expr, sbitmap visited)
1390 {
1391 tree def_stmt, op1, op2, res;
1392
1393 switch (TREE_CODE (expr))
1394 {
1395 case SSA_NAME:
1396 /* Case of loop, we don't know to represent this expression. */
1397 if (TEST_BIT (visited, SSA_NAME_VERSION (expr)))
1398 return NULL_TREE;
1399
1400 SET_BIT (visited, SSA_NAME_VERSION (expr));
1401 def_stmt = SSA_NAME_DEF_STMT (expr);
1402 res = can_calculate_expr_before_stmt (def_stmt, visited);
1403 RESET_BIT (visited, SSA_NAME_VERSION (expr));
1404 return res;
1405 case VAR_DECL:
1406 case PARM_DECL:
1407 case INTEGER_CST:
1408 return expr;
1409 case POINTER_PLUS_EXPR:
1410 case PLUS_EXPR:
1411 case MINUS_EXPR:
1412 case MULT_EXPR:
1413 op1 = TREE_OPERAND (expr, 0);
1414 op2 = TREE_OPERAND (expr, 1);
1415
1416 op1 = can_calculate_expr_before_stmt (op1, visited);
1417 if (!op1)
1418 return NULL_TREE;
1419 op2 = can_calculate_expr_before_stmt (op2, visited);
1420 if (op2)
1421 return fold_build2 (TREE_CODE (expr), TREE_TYPE (expr), op1, op2);
1422 return NULL_TREE;
1423 case GIMPLE_MODIFY_STMT:
1424 return can_calculate_expr_before_stmt (GIMPLE_STMT_OPERAND (expr, 1),
1425 visited);
1426 case PHI_NODE:
1427 {
1428 int j;
1429
1430 res = NULL_TREE;
1431 /* Make sure all the arguments represent the same value. */
1432 for (j = 0; j < PHI_NUM_ARGS (expr); j++)
1433 {
1434 tree new_res;
1435 tree def = PHI_ARG_DEF (expr, j);
1436
1437 new_res = can_calculate_expr_before_stmt (def, visited);
1438 if (res == NULL_TREE)
1439 res = new_res;
1440 else if (!new_res || !expressions_equal_p (res, new_res))
1441 return NULL_TREE;
1442 }
1443 return res;
1444 }
1445 CASE_CONVERT:
1446 res = can_calculate_expr_before_stmt (TREE_OPERAND (expr, 0), visited);
1447 if (res != NULL_TREE)
1448 return build1 (TREE_CODE (expr), TREE_TYPE (expr), res);
1449 else
1450 return NULL_TREE;
1451
1452 default:
1453 return NULL_TREE;
1454 }
1455 }
1456
1457 /* There should be only one allocation function for the dimensions
1458 that don't escape. Here we check the allocation sites in this
1459 function. We must make sure that all the dimensions are allocated
1460 using malloc and that the malloc size parameter expression could be
1461 pre-calculated before the call to the malloc of dimension 0.
1462
1463 Given a candidate matrix for flattening -- MI -- check if it's
1464 appropriate for flattening -- we analyze the allocation
1465 sites that we recorded in the previous analysis. The result of the
1466 analysis is a level of indirection (matrix dimension) in which the
1467 flattening is safe. We check the following conditions:
1468 1. There is only one allocation site for each dimension.
1469 2. The allocation sites of all the dimensions are in the same
1470 function.
1471 (The above two are being taken care of during the analysis when
1472 we check the allocation site).
1473 3. All the dimensions that we flatten are allocated at once; thus
1474 the total size must be known before the allocation of the
1475 dimension 0 (top level) -- we must make sure we represent the
1476 size of the allocation as an expression of global parameters or
1477 constants and that those doesn't change over the function. */
1478
1479 static int
1480 check_allocation_function (void **slot, void *data ATTRIBUTE_UNUSED)
1481 {
1482 int level;
1483 block_stmt_iterator bsi;
1484 basic_block bb_level_0;
1485 struct matrix_info *mi = *slot;
1486 sbitmap visited;
1487
1488 if (!mi->malloc_for_level)
1489 return 1;
1490
1491 visited = sbitmap_alloc (num_ssa_names);
1492
1493 /* Do nothing if the current function is not the allocation
1494 function of MI. */
1495 if (mi->allocation_function_decl != current_function_decl
1496 /* We aren't in the main allocation function yet. */
1497 || !mi->malloc_for_level[0])
1498 return 1;
1499
1500 for (level = 1; level < mi->max_malloced_level; level++)
1501 if (!mi->malloc_for_level[level])
1502 break;
1503
1504 mark_min_matrix_escape_level (mi, level, NULL_TREE);
1505
1506 bsi = bsi_for_stmt (mi->malloc_for_level[0]);
1507 bb_level_0 = bsi.bb;
1508
1509 /* Check if the expression of the size passed to malloc could be
1510 pre-calculated before the malloc of level 0. */
1511 for (level = 1; level < mi->min_indirect_level_escape; level++)
1512 {
1513 tree call_stmt, size;
1514 struct malloc_call_data mcd;
1515
1516 call_stmt = mi->malloc_for_level[level];
1517
1518 /* Find the correct malloc information. */
1519 collect_data_for_malloc_call (call_stmt, &mcd);
1520
1521 /* No need to check anticipation for constants. */
1522 if (TREE_CODE (mcd.size_var) == INTEGER_CST)
1523 {
1524 if (!mi->dimension_size)
1525 {
1526 mi->dimension_size =
1527 (tree *) xcalloc (mi->min_indirect_level_escape,
1528 sizeof (tree));
1529 mi->dimension_size_orig =
1530 (tree *) xcalloc (mi->min_indirect_level_escape,
1531 sizeof (tree));
1532 }
1533 mi->dimension_size[level] = mcd.size_var;
1534 mi->dimension_size_orig[level] = mcd.size_var;
1535 continue;
1536 }
1537 /* ??? Here we should also add the way to calculate the size
1538 expression not only know that it is anticipated. */
1539 sbitmap_zero (visited);
1540 size = can_calculate_expr_before_stmt (mcd.size_var, visited);
1541 if (size == NULL_TREE)
1542 {
1543 mark_min_matrix_escape_level (mi, level, call_stmt);
1544 if (dump_file)
1545 fprintf (dump_file,
1546 "Matrix %s: Cannot calculate the size of allocation, escaping at level %d\n",
1547 get_name (mi->decl), level);
1548 break;
1549 }
1550 if (!mi->dimension_size)
1551 {
1552 mi->dimension_size =
1553 (tree *) xcalloc (mi->min_indirect_level_escape, sizeof (tree));
1554 mi->dimension_size_orig =
1555 (tree *) xcalloc (mi->min_indirect_level_escape, sizeof (tree));
1556 }
1557 mi->dimension_size[level] = size;
1558 mi->dimension_size_orig[level] = size;
1559 }
1560
1561 /* We don't need those anymore. */
1562 for (level = mi->min_indirect_level_escape;
1563 level < mi->max_malloced_level; level++)
1564 mi->malloc_for_level[level] = NULL;
1565 return 1;
1566 }
1567
1568 /* Track all access and allocation sites. */
1569 static void
1570 find_sites_in_func (bool record)
1571 {
1572 sbitmap visited_stmts_1;
1573
1574 block_stmt_iterator bsi;
1575 tree stmt;
1576 basic_block bb;
1577 struct matrix_info tmpmi, *mi;
1578
1579 visited_stmts_1 = sbitmap_alloc (num_ssa_names);
1580
1581 FOR_EACH_BB (bb)
1582 {
1583 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1584 {
1585 stmt = bsi_stmt (bsi);
1586 if (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT
1587 && TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) == VAR_DECL)
1588 {
1589 tmpmi.decl = GIMPLE_STMT_OPERAND (stmt, 0);
1590 if ((mi = htab_find (matrices_to_reorg, &tmpmi)))
1591 {
1592 sbitmap_zero (visited_stmts_1);
1593 analyze_matrix_allocation_site (mi, stmt, 0, visited_stmts_1);
1594 }
1595 }
1596 if (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT
1597 && TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) == SSA_NAME
1598 && TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 1)) == VAR_DECL)
1599 {
1600 tmpmi.decl = GIMPLE_STMT_OPERAND (stmt, 1);
1601 if ((mi = htab_find (matrices_to_reorg, &tmpmi)))
1602 {
1603 sbitmap_zero (visited_stmts_1);
1604 analyze_matrix_accesses (mi,
1605 GIMPLE_STMT_OPERAND (stmt, 0), 0,
1606 false, visited_stmts_1, record);
1607 }
1608 }
1609 }
1610 }
1611 sbitmap_free (visited_stmts_1);
1612 }
1613
1614 /* Traverse the use-def chains to see if there are matrices that
1615 are passed through pointers and we cannot know how they are accessed.
1616 For each SSA-name defined by a global variable of our interest,
1617 we traverse the use-def chains of the SSA and follow the indirections,
1618 and record in what level of indirection the use of the variable
1619 escapes. A use of a pointer escapes when it is passed to a function,
1620 stored into memory or assigned (except in malloc and free calls). */
1621
1622 static void
1623 record_all_accesses_in_func (void)
1624 {
1625 unsigned i;
1626 sbitmap visited_stmts_1;
1627
1628 visited_stmts_1 = sbitmap_alloc (num_ssa_names);
1629
1630 for (i = 0; i < num_ssa_names; i++)
1631 {
1632 struct matrix_info tmpmi, *mi;
1633 tree ssa_var = ssa_name (i);
1634 tree rhs, lhs;
1635
1636 if (!ssa_var
1637 || TREE_CODE (SSA_NAME_DEF_STMT (ssa_var)) != GIMPLE_MODIFY_STMT)
1638 continue;
1639 rhs = GIMPLE_STMT_OPERAND (SSA_NAME_DEF_STMT (ssa_var), 1);
1640 lhs = GIMPLE_STMT_OPERAND (SSA_NAME_DEF_STMT (ssa_var), 0);
1641 if (TREE_CODE (rhs) != VAR_DECL && TREE_CODE (lhs) != VAR_DECL)
1642 continue;
1643
1644 /* If the RHS is a matrix that we want to analyze, follow the def-use
1645 chain for this SSA_VAR and check for escapes or apply the
1646 flattening. */
1647 tmpmi.decl = rhs;
1648 if ((mi = htab_find (matrices_to_reorg, &tmpmi)))
1649 {
1650 /* This variable will track the visited PHI nodes, so we can limit
1651 its size to the maximum number of SSA names. */
1652 sbitmap_zero (visited_stmts_1);
1653 analyze_matrix_accesses (mi, ssa_var,
1654 0, false, visited_stmts_1, true);
1655
1656 }
1657 }
1658 sbitmap_free (visited_stmts_1);
1659 }
1660
1661 /* Used when we want to convert the expression: RESULT = something * ORIG to RESULT = something * NEW. If ORIG and NEW are power of 2, shift operations can be done, else division and multiplication. */
1662 static tree
1663 compute_offset (HOST_WIDE_INT orig, HOST_WIDE_INT new, tree result)
1664 {
1665
1666 int x, y;
1667 tree result1, ratio, log, orig_tree, new_tree;
1668
1669 x = exact_log2 (orig);
1670 y = exact_log2 (new);
1671
1672 if (x != -1 && y != -1)
1673 {
1674 if (x == y)
1675 return result;
1676 else if (x > y)
1677 {
1678 log = build_int_cst (TREE_TYPE (result), x - y);
1679 result1 =
1680 fold_build2 (LSHIFT_EXPR, TREE_TYPE (result), result, log);
1681 return result1;
1682 }
1683 log = build_int_cst (TREE_TYPE (result), y - x);
1684 result1 = fold_build2 (RSHIFT_EXPR, TREE_TYPE (result), result, log);
1685
1686 return result1;
1687 }
1688 orig_tree = build_int_cst (TREE_TYPE (result), orig);
1689 new_tree = build_int_cst (TREE_TYPE (result), new);
1690 ratio = fold_build2 (TRUNC_DIV_EXPR, TREE_TYPE (result), result, orig_tree);
1691 result1 = fold_build2 (MULT_EXPR, TREE_TYPE (result), ratio, new_tree);
1692
1693 return result1;
1694 }
1695
1696
1697 /* We know that we are allowed to perform matrix flattening (according to the
1698 escape analysis), so we traverse the use-def chains of the SSA vars
1699 defined by the global variables pointing to the matrices of our interest.
1700 in each use of the SSA we calculate the offset from the base address
1701 according to the following equation:
1702
1703 a[I1][I2]...[Ik] , where D1..Dk is the length of each dimension and the
1704 escaping level is m <= k, and a' is the new allocated matrix,
1705 will be translated to :
1706
1707 b[I(m+1)]...[Ik]
1708
1709 where
1710 b = a' + I1*D2...*Dm + I2*D3...Dm + ... + Im
1711 */
1712
1713 static int
1714 transform_access_sites (void **slot, void *data ATTRIBUTE_UNUSED)
1715 {
1716 block_stmt_iterator bsi;
1717 struct matrix_info *mi = *slot;
1718 int min_escape_l = mi->min_indirect_level_escape;
1719 struct access_site_info *acc_info;
1720 int i;
1721
1722 if (min_escape_l < 2 || !mi->access_l)
1723 return 1;
1724 for (i = 0; VEC_iterate (access_site_info_p, mi->access_l, i, acc_info);
1725 i++)
1726 {
1727 tree orig, type;
1728
1729 /* This is possible because we collect the access sites before
1730 we determine the final minimum indirection level. */
1731 if (acc_info->level >= min_escape_l)
1732 {
1733 free (acc_info);
1734 continue;
1735 }
1736 if (acc_info->is_alloc)
1737 {
1738 if (acc_info->level >= 0 && bb_for_stmt (acc_info->stmt))
1739 {
1740 ssa_op_iter iter;
1741 tree def;
1742 tree stmt = acc_info->stmt;
1743
1744 FOR_EACH_SSA_TREE_OPERAND (def, stmt, iter, SSA_OP_DEF)
1745 mark_sym_for_renaming (SSA_NAME_VAR (def));
1746 bsi = bsi_for_stmt (stmt);
1747 gcc_assert (TREE_CODE (acc_info->stmt) == GIMPLE_MODIFY_STMT);
1748 if (TREE_CODE (GIMPLE_STMT_OPERAND (acc_info->stmt, 0)) ==
1749 SSA_NAME && acc_info->level < min_escape_l - 1)
1750 {
1751 imm_use_iterator imm_iter;
1752 use_operand_p use_p;
1753 tree use_stmt;
1754
1755 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter,
1756 GIMPLE_STMT_OPERAND (acc_info->stmt,
1757 0))
1758 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1759 {
1760 tree conv, tmp, stmts;
1761
1762 /* Emit convert statement to convert to type of use. */
1763 conv =
1764 fold_build1 (CONVERT_EXPR,
1765 TREE_TYPE (GIMPLE_STMT_OPERAND
1766 (acc_info->stmt, 0)),
1767 TREE_OPERAND (GIMPLE_STMT_OPERAND
1768 (acc_info->stmt, 1), 0));
1769 tmp =
1770 create_tmp_var (TREE_TYPE
1771 (GIMPLE_STMT_OPERAND
1772 (acc_info->stmt, 0)), "new");
1773 add_referenced_var (tmp);
1774 stmts =
1775 fold_build2 (GIMPLE_MODIFY_STMT,
1776 TREE_TYPE (GIMPLE_STMT_OPERAND
1777 (acc_info->stmt, 0)), tmp,
1778 conv);
1779 tmp = make_ssa_name (tmp, stmts);
1780 GIMPLE_STMT_OPERAND (stmts, 0) = tmp;
1781 bsi = bsi_for_stmt (acc_info->stmt);
1782 bsi_insert_after (&bsi, stmts, BSI_SAME_STMT);
1783 SET_USE (use_p, tmp);
1784 }
1785 }
1786 if (acc_info->level < min_escape_l - 1)
1787 bsi_remove (&bsi, true);
1788 }
1789 free (acc_info);
1790 continue;
1791 }
1792 orig = GIMPLE_STMT_OPERAND (acc_info->stmt, 1);
1793 type = TREE_TYPE (orig);
1794 if (TREE_CODE (orig) == INDIRECT_REF
1795 && acc_info->level < min_escape_l - 1)
1796 {
1797 /* Replace the INDIRECT_REF with NOP (cast) usually we are casting
1798 from "pointer to type" to "type". */
1799 orig =
1800 build1 (NOP_EXPR, TREE_TYPE (orig),
1801 GIMPLE_STMT_OPERAND (orig, 0));
1802 GIMPLE_STMT_OPERAND (acc_info->stmt, 1) = orig;
1803 }
1804 else if (TREE_CODE (orig) == POINTER_PLUS_EXPR
1805 && acc_info->level < (min_escape_l))
1806 {
1807 imm_use_iterator imm_iter;
1808 use_operand_p use_p;
1809
1810 tree offset;
1811 int k = acc_info->level;
1812 tree num_elements, total_elements;
1813 tree tmp1;
1814 tree d_size = mi->dimension_size[k];
1815
1816 /* We already make sure in the analysis that the first operand
1817 is the base and the second is the offset. */
1818 offset = acc_info->offset;
1819 if (mi->dim_map[k] == min_escape_l - 1)
1820 {
1821 if (!check_transpose_p || mi->is_transposed_p == false)
1822 tmp1 = offset;
1823 else
1824 {
1825 tree new_offset;
1826 tree d_type_size, d_type_size_k;
1827
1828 d_type_size = size_int (mi->dimension_type_size[min_escape_l]);
1829 d_type_size_k = size_int (mi->dimension_type_size[k + 1]);
1830
1831 new_offset =
1832 compute_offset (mi->dimension_type_size[min_escape_l],
1833 mi->dimension_type_size[k + 1], offset);
1834
1835 total_elements = new_offset;
1836 if (new_offset != offset)
1837 {
1838 bsi = bsi_for_stmt (acc_info->stmt);
1839 tmp1 = force_gimple_operand_bsi (&bsi, total_elements,
1840 true, NULL,
1841 true, BSI_SAME_STMT);
1842 }
1843 else
1844 tmp1 = offset;
1845 }
1846 }
1847 else
1848 {
1849 d_size = mi->dimension_size[mi->dim_map[k] + 1];
1850 num_elements =
1851 fold_build2 (MULT_EXPR, sizetype, fold_convert (sizetype, acc_info->index),
1852 fold_convert (sizetype, d_size));
1853 add_referenced_var (d_size);
1854 bsi = bsi_for_stmt (acc_info->stmt);
1855 tmp1 = force_gimple_operand_bsi (&bsi, num_elements, true,
1856 NULL, true, BSI_SAME_STMT);
1857 }
1858 /* Replace the offset if needed. */
1859 if (tmp1 != offset)
1860 {
1861 if (TREE_CODE (offset) == SSA_NAME)
1862 {
1863 tree use_stmt;
1864
1865 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, offset)
1866 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1867 if (use_stmt == acc_info->stmt)
1868 SET_USE (use_p, tmp1);
1869 }
1870 else
1871 {
1872 gcc_assert (TREE_CODE (offset) == INTEGER_CST);
1873 TREE_OPERAND (orig, 1) = tmp1;
1874 }
1875 }
1876 }
1877 /* ??? meanwhile this happens because we record the same access
1878 site more than once; we should be using a hash table to
1879 avoid this and insert the STMT of the access site only
1880 once.
1881 else
1882 gcc_unreachable (); */
1883 free (acc_info);
1884 }
1885 VEC_free (access_site_info_p, heap, mi->access_l);
1886
1887 update_ssa (TODO_update_ssa);
1888 #ifdef ENABLE_CHECKING
1889 verify_ssa (true);
1890 #endif
1891 return 1;
1892 }
1893
1894 /* Sort A array of counts. Arrange DIM_MAP to reflect the new order. */
1895
1896 static void
1897 sort_dim_hot_level (gcov_type * a, int *dim_map, int n)
1898 {
1899 int i, j, tmp1;
1900 gcov_type tmp;
1901
1902 for (i = 0; i < n - 1; i++)
1903 {
1904 for (j = 0; j < n - 1 - i; j++)
1905 {
1906 if (a[j + 1] < a[j])
1907 {
1908 tmp = a[j]; /* swap a[j] and a[j+1] */
1909 a[j] = a[j + 1];
1910 a[j + 1] = tmp;
1911 tmp1 = dim_map[j];
1912 dim_map[j] = dim_map[j + 1];
1913 dim_map[j + 1] = tmp1;
1914 }
1915 }
1916 }
1917 }
1918
1919 /* Replace multiple mallocs (one for each dimension) to one malloc
1920 with the size of DIM1*DIM2*...*DIMN*size_of_element
1921 Make sure that we hold the size in the malloc site inside a
1922 new global variable; this way we ensure that the size doesn't
1923 change and it is accessible from all the other functions that
1924 uses the matrix. Also, the original calls to free are deleted,
1925 and replaced by a new call to free the flattened matrix. */
1926
1927 static int
1928 transform_allocation_sites (void **slot, void *data ATTRIBUTE_UNUSED)
1929 {
1930 int i;
1931 struct matrix_info *mi;
1932 tree type, call_stmt_0, malloc_stmt, oldfn, prev_dim_size, use_stmt;
1933 struct cgraph_node *c_node;
1934 struct cgraph_edge *e;
1935 block_stmt_iterator bsi;
1936 struct malloc_call_data mcd;
1937 HOST_WIDE_INT element_size;
1938
1939 imm_use_iterator imm_iter;
1940 use_operand_p use_p;
1941 tree old_size_0, tmp;
1942 int min_escape_l;
1943 int id;
1944
1945 mi = *slot;
1946
1947 min_escape_l = mi->min_indirect_level_escape;
1948
1949 if (!mi->malloc_for_level)
1950 mi->min_indirect_level_escape = 0;
1951
1952 if (mi->min_indirect_level_escape < 2)
1953 return 1;
1954
1955 mi->dim_map = (int *) xcalloc (mi->min_indirect_level_escape, sizeof (int));
1956 for (i = 0; i < mi->min_indirect_level_escape; i++)
1957 mi->dim_map[i] = i;
1958 if (check_transpose_p)
1959 {
1960 int i;
1961
1962 if (dump_file)
1963 {
1964 fprintf (dump_file, "Matrix %s:\n", get_name (mi->decl));
1965 for (i = 0; i < min_escape_l; i++)
1966 {
1967 fprintf (dump_file, "dim %d before sort ", i);
1968 if (mi->dim_hot_level)
1969 fprintf (dump_file,
1970 "count is " HOST_WIDEST_INT_PRINT_DEC " \n",
1971 mi->dim_hot_level[i]);
1972 }
1973 }
1974 sort_dim_hot_level (mi->dim_hot_level, mi->dim_map,
1975 mi->min_indirect_level_escape);
1976 if (dump_file)
1977 for (i = 0; i < min_escape_l; i++)
1978 {
1979 fprintf (dump_file, "dim %d after sort\n", i);
1980 if (mi->dim_hot_level)
1981 fprintf (dump_file, "count is " HOST_WIDE_INT_PRINT_DEC
1982 " \n", (HOST_WIDE_INT) mi->dim_hot_level[i]);
1983 }
1984 for (i = 0; i < mi->min_indirect_level_escape; i++)
1985 {
1986 if (dump_file)
1987 fprintf (dump_file, "dim_map[%d] after sort %d\n", i,
1988 mi->dim_map[i]);
1989 if (mi->dim_map[i] != i)
1990 {
1991 if (dump_file)
1992 fprintf (dump_file,
1993 "Transposed dimensions: dim %d is now dim %d\n",
1994 mi->dim_map[i], i);
1995 mi->is_transposed_p = true;
1996 }
1997 }
1998 }
1999 else
2000 {
2001 for (i = 0; i < mi->min_indirect_level_escape; i++)
2002 mi->dim_map[i] = i;
2003 }
2004 /* Call statement of allocation site of level 0. */
2005 call_stmt_0 = mi->malloc_for_level[0];
2006
2007 /* Finds the correct malloc information. */
2008 collect_data_for_malloc_call (call_stmt_0, &mcd);
2009
2010 mi->dimension_size[0] = mcd.size_var;
2011 mi->dimension_size_orig[0] = mcd.size_var;
2012 /* Make sure that the variables in the size expression for
2013 all the dimensions (above level 0) aren't modified in
2014 the allocation function. */
2015 for (i = 1; i < mi->min_indirect_level_escape; i++)
2016 {
2017 tree t;
2018
2019 /* mi->dimension_size must contain the expression of the size calculated
2020 in check_allocation_function. */
2021 gcc_assert (mi->dimension_size[i]);
2022
2023 t = walk_tree_without_duplicates (&(mi->dimension_size[i]),
2024 check_var_notmodified_p,
2025 mi->allocation_function_decl);
2026 if (t != NULL_TREE)
2027 {
2028 mark_min_matrix_escape_level (mi, i, t);
2029 break;
2030 }
2031 }
2032
2033 if (mi->min_indirect_level_escape < 2)
2034 return 1;
2035
2036 /* Since we should make sure that the size expression is available
2037 before the call to malloc of level 0. */
2038 bsi = bsi_for_stmt (call_stmt_0);
2039
2040 /* Find out the size of each dimension by looking at the malloc
2041 sites and create a global variable to hold it.
2042 We add the assignment to the global before the malloc of level 0. */
2043
2044 /* To be able to produce gimple temporaries. */
2045 oldfn = current_function_decl;
2046 current_function_decl = mi->allocation_function_decl;
2047 push_cfun (DECL_STRUCT_FUNCTION (mi->allocation_function_decl));
2048
2049 /* Set the dimension sizes as follows:
2050 DIM_SIZE[i] = DIM_SIZE[n] * ... * DIM_SIZE[i]
2051 where n is the maximum non escaping level. */
2052 element_size = mi->dimension_type_size[mi->min_indirect_level_escape];
2053 prev_dim_size = NULL_TREE;
2054
2055 for (i = mi->min_indirect_level_escape - 1; i >= 0; i--)
2056 {
2057 tree dim_size, dim_var, tmp;
2058 tree d_type_size;
2059
2060 /* Now put the size expression in a global variable and initialize it to
2061 the size expression before the malloc of level 0. */
2062 dim_var =
2063 add_new_static_var (TREE_TYPE
2064 (mi->dimension_size_orig[mi->dim_map[i]]));
2065 type = TREE_TYPE (mi->dimension_size_orig[mi->dim_map[i]]);
2066
2067 /* DIM_SIZE = MALLOC_SIZE_PARAM / TYPE_SIZE. */
2068 /* Find which dim ID becomes dim I. */
2069 for (id = 0; id < mi->min_indirect_level_escape; id++)
2070 if (mi->dim_map[id] == i)
2071 break;
2072 d_type_size =
2073 build_int_cst (type, mi->dimension_type_size[id + 1]);
2074 if (!prev_dim_size)
2075 prev_dim_size = build_int_cst (type, element_size);
2076 if (!check_transpose_p && i == mi->min_indirect_level_escape - 1)
2077 {
2078 dim_size = mi->dimension_size_orig[id];
2079 }
2080 else
2081 {
2082 dim_size =
2083 fold_build2 (TRUNC_DIV_EXPR, type, mi->dimension_size_orig[id],
2084 d_type_size);
2085
2086 dim_size = fold_build2 (MULT_EXPR, type, dim_size, prev_dim_size);
2087 }
2088 dim_size = force_gimple_operand_bsi (&bsi, dim_size, true, NULL,
2089 true, BSI_SAME_STMT);
2090 /* GLOBAL_HOLDING_THE_SIZE = DIM_SIZE. */
2091 tmp = fold_build2 (GIMPLE_MODIFY_STMT, type, dim_var, dim_size);
2092 GIMPLE_STMT_OPERAND (tmp, 0) = dim_var;
2093 mark_symbols_for_renaming (tmp);
2094 bsi_insert_before (&bsi, tmp, BSI_SAME_STMT);
2095
2096 prev_dim_size = mi->dimension_size[i] = dim_var;
2097 }
2098 update_ssa (TODO_update_ssa);
2099 /* Replace the malloc size argument in the malloc of level 0 to be
2100 the size of all the dimensions. */
2101 malloc_stmt = GIMPLE_STMT_OPERAND (call_stmt_0, 1);
2102 c_node = cgraph_node (mi->allocation_function_decl);
2103 old_size_0 = CALL_EXPR_ARG (malloc_stmt, 0);
2104 tmp = force_gimple_operand_bsi (&bsi, mi->dimension_size[0], true,
2105 NULL, true, BSI_SAME_STMT);
2106 if (TREE_CODE (old_size_0) == SSA_NAME)
2107 {
2108 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, old_size_0)
2109 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
2110 if (use_stmt == call_stmt_0)
2111 SET_USE (use_p, tmp);
2112 }
2113 /* When deleting the calls to malloc we need also to remove the edge from
2114 the call graph to keep it consistent. Notice that cgraph_edge may
2115 create a new node in the call graph if there is no node for the given
2116 declaration; this shouldn't be the case but currently there is no way to
2117 check this outside of "cgraph.c". */
2118 for (i = 1; i < mi->min_indirect_level_escape; i++)
2119 {
2120 block_stmt_iterator bsi;
2121 tree use_stmt1 = NULL;
2122 tree call;
2123
2124 tree call_stmt = mi->malloc_for_level[i];
2125 call = GIMPLE_STMT_OPERAND (call_stmt, 1);
2126 gcc_assert (TREE_CODE (call) == CALL_EXPR);
2127 e = cgraph_edge (c_node, call_stmt);
2128 gcc_assert (e);
2129 cgraph_remove_edge (e);
2130 bsi = bsi_for_stmt (call_stmt);
2131 /* Remove the call stmt. */
2132 bsi_remove (&bsi, true);
2133 /* remove the type cast stmt. */
2134 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter,
2135 GIMPLE_STMT_OPERAND (call_stmt, 0))
2136 {
2137 use_stmt1 = use_stmt;
2138 bsi = bsi_for_stmt (use_stmt);
2139 bsi_remove (&bsi, true);
2140 }
2141 /* Remove the assignment of the allocated area. */
2142 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter,
2143 GIMPLE_STMT_OPERAND (use_stmt1, 0))
2144 {
2145 bsi = bsi_for_stmt (use_stmt);
2146 bsi_remove (&bsi, true);
2147 }
2148 }
2149 update_ssa (TODO_update_ssa);
2150 #ifdef ENABLE_CHECKING
2151 verify_ssa (true);
2152 #endif
2153 /* Delete the calls to free. */
2154 for (i = 1; i < mi->min_indirect_level_escape; i++)
2155 {
2156 block_stmt_iterator bsi;
2157 tree call;
2158
2159 /* ??? wonder why this case is possible but we failed on it once. */
2160 if (!mi->free_stmts[i].stmt)
2161 continue;
2162
2163 call = TREE_OPERAND (mi->free_stmts[i].stmt, 1);
2164 c_node = cgraph_node (mi->free_stmts[i].func);
2165
2166 gcc_assert (TREE_CODE (mi->free_stmts[i].stmt) == CALL_EXPR);
2167 e = cgraph_edge (c_node, mi->free_stmts[i].stmt);
2168 gcc_assert (e);
2169 cgraph_remove_edge (e);
2170 current_function_decl = mi->free_stmts[i].func;
2171 set_cfun (DECL_STRUCT_FUNCTION (mi->free_stmts[i].func));
2172 bsi = bsi_for_stmt (mi->free_stmts[i].stmt);
2173 bsi_remove (&bsi, true);
2174 }
2175 /* Return to the previous situation. */
2176 current_function_decl = oldfn;
2177 pop_cfun ();
2178 return 1;
2179
2180 }
2181
2182
2183 /* Print out the results of the escape analysis. */
2184 static int
2185 dump_matrix_reorg_analysis (void **slot, void *data ATTRIBUTE_UNUSED)
2186 {
2187 struct matrix_info *mi = *slot;
2188
2189 if (!dump_file)
2190 return 1;
2191 fprintf (dump_file, "Matrix \"%s\"; Escaping Level: %d, Num Dims: %d,",
2192 get_name (mi->decl), mi->min_indirect_level_escape, mi->num_dims);
2193 fprintf (dump_file, " Malloc Dims: %d, ", mi->max_malloced_level);
2194 fprintf (dump_file, "\n");
2195 if (mi->min_indirect_level_escape >= 2)
2196 fprintf (dump_file, "Flattened %d dimensions \n",
2197 mi->min_indirect_level_escape);
2198 return 1;
2199 }
2200
2201
2202 /* Perform matrix flattening. */
2203
2204 static unsigned int
2205 matrix_reorg (void)
2206 {
2207 struct cgraph_node *node;
2208
2209 if (profile_info)
2210 check_transpose_p = true;
2211 else
2212 check_transpose_p = false;
2213 /* If there are hand written vectors, we skip this optimization. */
2214 for (node = cgraph_nodes; node; node = node->next)
2215 if (!may_flatten_matrices (node))
2216 return 0;
2217 matrices_to_reorg = htab_create (37, mtt_info_hash, mtt_info_eq, mat_free);
2218 /* Find and record all potential matrices in the program. */
2219 find_matrices_decl ();
2220 /* Analyze the accesses of the matrices (escaping analysis). */
2221 for (node = cgraph_nodes; node; node = node->next)
2222 if (node->analyzed)
2223 {
2224 tree temp_fn;
2225
2226 temp_fn = current_function_decl;
2227 current_function_decl = node->decl;
2228 push_cfun (DECL_STRUCT_FUNCTION (node->decl));
2229 bitmap_obstack_initialize (NULL);
2230 tree_register_cfg_hooks ();
2231
2232 if (!gimple_in_ssa_p (cfun))
2233 {
2234 free_dominance_info (CDI_DOMINATORS);
2235 free_dominance_info (CDI_POST_DOMINATORS);
2236 pop_cfun ();
2237 current_function_decl = temp_fn;
2238 bitmap_obstack_release (NULL);
2239
2240 return 0;
2241 }
2242
2243 #ifdef ENABLE_CHECKING
2244 verify_flow_info ();
2245 #endif
2246
2247 if (!matrices_to_reorg)
2248 {
2249 free_dominance_info (CDI_DOMINATORS);
2250 free_dominance_info (CDI_POST_DOMINATORS);
2251 pop_cfun ();
2252 current_function_decl = temp_fn;
2253 bitmap_obstack_release (NULL);
2254
2255 return 0;
2256 }
2257
2258 /* Create htap for phi nodes. */
2259 htab_mat_acc_phi_nodes = htab_create (37, mat_acc_phi_hash,
2260 mat_acc_phi_eq, free);
2261 if (!check_transpose_p)
2262 find_sites_in_func (false);
2263 else
2264 {
2265 find_sites_in_func (true);
2266 loop_optimizer_init (LOOPS_NORMAL);
2267 if (current_loops)
2268 scev_initialize ();
2269 htab_traverse (matrices_to_reorg, analyze_transpose, NULL);
2270 if (current_loops)
2271 {
2272 scev_finalize ();
2273 loop_optimizer_finalize ();
2274 current_loops = NULL;
2275 }
2276 }
2277 /* If the current function is the allocation function for any of
2278 the matrices we check its allocation and the escaping level. */
2279 htab_traverse (matrices_to_reorg, check_allocation_function, NULL);
2280 free_dominance_info (CDI_DOMINATORS);
2281 free_dominance_info (CDI_POST_DOMINATORS);
2282 pop_cfun ();
2283 current_function_decl = temp_fn;
2284 bitmap_obstack_release (NULL);
2285 }
2286 htab_traverse (matrices_to_reorg, transform_allocation_sites, NULL);
2287 /* Now transform the accesses. */
2288 for (node = cgraph_nodes; node; node = node->next)
2289 if (node->analyzed)
2290 {
2291 /* Remember that allocation sites have been handled. */
2292 tree temp_fn;
2293
2294 temp_fn = current_function_decl;
2295 current_function_decl = node->decl;
2296 push_cfun (DECL_STRUCT_FUNCTION (node->decl));
2297 bitmap_obstack_initialize (NULL);
2298 tree_register_cfg_hooks ();
2299 record_all_accesses_in_func ();
2300 htab_traverse (matrices_to_reorg, transform_access_sites, NULL);
2301 free_dominance_info (CDI_DOMINATORS);
2302 free_dominance_info (CDI_POST_DOMINATORS);
2303 pop_cfun ();
2304 current_function_decl = temp_fn;
2305 bitmap_obstack_release (NULL);
2306 }
2307 htab_traverse (matrices_to_reorg, dump_matrix_reorg_analysis, NULL);
2308
2309 current_function_decl = NULL;
2310 set_cfun (NULL);
2311 matrices_to_reorg = NULL;
2312 return 0;
2313 }
2314
2315
2316 /* The condition for matrix flattening to be performed. */
2317 static bool
2318 gate_matrix_reorg (void)
2319 {
2320 return flag_ipa_matrix_reorg && flag_whole_program;
2321 }
2322
2323 struct simple_ipa_opt_pass pass_ipa_matrix_reorg =
2324 {
2325 {
2326 SIMPLE_IPA_PASS,
2327 "matrix-reorg", /* name */
2328 gate_matrix_reorg, /* gate */
2329 matrix_reorg, /* execute */
2330 NULL, /* sub */
2331 NULL, /* next */
2332 0, /* static_pass_number */
2333 0, /* tv_id */
2334 0, /* properties_required */
2335 PROP_trees, /* properties_provided */
2336 0, /* properties_destroyed */
2337 0, /* todo_flags_start */
2338 TODO_dump_cgraph | TODO_dump_func /* todo_flags_finish */
2339 }
2340 };