1 /* Multiply table generator for tile.
2 Copyright (C) 2011-2022 Free Software Foundation, Inc.
3 Contributed by Walter Lee (walt@tilera.com)
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published
9 by the Free Software Foundation; either version 3, or (at your
10 option) any later version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* This program creates a table used to compile multiply by constant
24 This program should be compiled by a c++ compiler. If it's
25 compiled with -DTILEPRO, it generates the multiply table for
26 TILEPro; otherwise it generates the multiply table for TILE-Gx.
27 Running the program produces the table in stdout.
29 The program works by generating every possible combination of up to
30 MAX_INSTRUCTIONS linear operators (such as add, sub, s2a, left
31 shift) and computing the multiplier computed by those instructions.
39 There are usually multiple instruction sequences to multiply by a
40 given constant. This program keeps only the cheapest one.
41 "Cheapest" is defined first by the minimum theoretical schedule
42 length, and if those are equal then by the number of instructions,
43 and if those are equal then by which instructions we "prefer"
44 (e.g. because we think one might take infinitesimally less power
45 than another, or simply because we arbitrarily pick one to be more
48 Once this program has determined the best instruction sequence for
49 each multiplier, it emits them in a table sorted by the multiplier
50 so the user can binary-search it to look for a match. The table is
51 pruned based on various criteria to keep its sizes reasonable. */
57 #define __STDC_LIMIT_MACROS
64 /* The string representing the architecture. */
65 #define ARCH "tilepro"
67 /* The type for the multiplication. */
72 /* The string representing the architecture. */
75 /* The type for the multiplication. */
76 typedef long long MUL_TYPE
;
80 /* Longest instruction sequence this will produce. With the current
81 stupid algorithm runtime grows very quickly with this number. */
82 #define MAX_INSTRUCTIONS 4
84 /* Maximum number of subexpressions in the expression DAG being
85 generated. This is the same as the number of instructions, except
86 that zero and the original register we'd like to multiply by a
87 constant are also thrown into the mix. */
88 #define MAX_SUBEXPRS (2 + MAX_INSTRUCTIONS)
90 #define MIN(x, y) ((x) <= (y) ? (x) : (y))
91 #define MAX(x, y) ((x) >= (y) ? (x) : (y))
93 #define ARRAY_SIZE(a) (sizeof (a) / sizeof ((a)[0]))
95 /* For this program a unary op is one which has only one nonconstant
96 operand. So shift left by 5 is considered unary. */
97 typedef MUL_TYPE (*unary_op_func
) (MUL_TYPE
);
98 typedef MUL_TYPE (*binary_op_func
) (MUL_TYPE
, MUL_TYPE
);
100 /* This describes an operation like 'add two registers' or 'left-shift
103 We call something a 'unary' operator if it only takes in one
104 register as input, even though it might have an implicit second
105 constant operand. Currently this is used for left-shift by
110 /* Construct for a binary operator. */
111 Operator (const char *pattern
, const char *name
, binary_op_func func
,
113 : m_pattern (pattern
), m_name (name
), m_top_index (-1),
114 m_unary_func (0), m_binary_func (func
), m_cost (cost
),
119 /* Construct for a unary operator. */
120 Operator (const char *pattern
, const char *name
, unary_op_func func
,
121 int rhs_if_unary
, int cost
)
122 : m_pattern (pattern
), m_name (name
), m_top_index (-1),
123 m_unary_func (func
), m_binary_func (0), m_cost (cost
),
124 m_rhs_if_unary (rhs_if_unary
)
128 bool is_unary () const
130 return m_binary_func
== NULL
;
133 /* Name of the pattern for this operation, e.g. CODE_FOR_addsi3. */
134 const char *m_pattern
;
136 /* Name of the opcode for this operation, e.g. add. */
139 /* We don't have enough bits in our output representation to store
140 the original insn_code value, so we store a compressed form
141 instead. These values are decoded back into insn_code via the
142 machine-generated multiply_insn_seq_decode_opcode lookup
146 /* Unary operator to apply, or NULL if this is a binary operator. */
147 unary_op_func m_unary_func
;
149 /* Binary operator to apply, or NULL if this is a unary operator. */
150 binary_op_func m_binary_func
;
152 /* Function of how expensive we consider this operator. Higher is
156 /* the RHS value to write into the C file if unary; used for shift
162 /* An entry in an expression DAG. */
166 Expr () : m_op (NULL
), m_lhs (0), m_rhs (0), m_produced_val (0),
167 m_critical_path_length (0)
171 /* The operator being applied to the operands. */
172 const Operator
*m_op
;
174 /* The index of the left-hand operand in the array of subexpressions
178 /* For binary ops ,this is the index of the left-hand operand in the
179 array of subexpressions already computed. For unary ops, it is
180 specific to the op (e.g. it might hold a constant shift
184 /* The multiplier produced by this expression tree. For example, for
185 the tree ((x << 5) + x), the value would be 33. */
186 MUL_TYPE m_produced_val
;
188 /* How far is this expression from the root, i.e. how many cycles
189 minimum will it take to compute this? */
190 int m_critical_path_length
;
194 /* Make function pointers for the various linear operators we can
195 apply to compute a multiplicative value. */
198 add (MUL_TYPE n1
, MUL_TYPE n2
)
204 sub (MUL_TYPE n1
, MUL_TYPE n2
)
210 s1a (MUL_TYPE n1
, MUL_TYPE n2
)
216 s2a (MUL_TYPE n1
, MUL_TYPE n2
)
222 s3a (MUL_TYPE n1
, MUL_TYPE n2
)
227 #define SHIFT(count) \
229 shift##count(MUL_TYPE n) \
231 return n << (count); \
301 static Operator ops
[] = {
302 Operator ("CODE_FOR_addsi3", "add", add
, 1040),
303 Operator ("CODE_FOR_subsi3", "sub", sub
, 1041),
304 Operator ("CODE_FOR_insn_s1a", "s1a", s1a
, 1042),
305 Operator ("CODE_FOR_insn_s2a", "s2a", s2a
, 1043),
306 Operator ("CODE_FOR_insn_s3a", "s3a", s3a
, 1044),
307 /* Note: shl by 1 is not necessary, since adding a value to itself
308 produces the same result. But the architecture team thinks
309 left-shift may use slightly less power, so we might as well
311 Operator ("CODE_FOR_ashlsi3", "shli", shift1
, 1, 1001),
312 Operator ("CODE_FOR_ashlsi3", "shli", shift2
, 2, 1002),
313 Operator ("CODE_FOR_ashlsi3", "shli", shift3
, 3, 1003),
314 Operator ("CODE_FOR_ashlsi3", "shli", shift4
, 4, 1004),
315 Operator ("CODE_FOR_ashlsi3", "shli", shift5
, 5, 1005),
316 Operator ("CODE_FOR_ashlsi3", "shli", shift6
, 6, 1006),
317 Operator ("CODE_FOR_ashlsi3", "shli", shift7
, 7, 1007),
318 Operator ("CODE_FOR_ashlsi3", "shli", shift8
, 8, 1008),
319 Operator ("CODE_FOR_ashlsi3", "shli", shift9
, 9, 1009),
320 Operator ("CODE_FOR_ashlsi3", "shli", shift10
, 10, 1010),
321 Operator ("CODE_FOR_ashlsi3", "shli", shift11
, 11, 1011),
322 Operator ("CODE_FOR_ashlsi3", "shli", shift12
, 12, 1012),
323 Operator ("CODE_FOR_ashlsi3", "shli", shift13
, 13, 1013),
324 Operator ("CODE_FOR_ashlsi3", "shli", shift14
, 14, 1014),
325 Operator ("CODE_FOR_ashlsi3", "shli", shift15
, 15, 1015),
326 Operator ("CODE_FOR_ashlsi3", "shli", shift16
, 16, 1016),
327 Operator ("CODE_FOR_ashlsi3", "shli", shift17
, 17, 1017),
328 Operator ("CODE_FOR_ashlsi3", "shli", shift18
, 18, 1018),
329 Operator ("CODE_FOR_ashlsi3", "shli", shift19
, 19, 1019),
330 Operator ("CODE_FOR_ashlsi3", "shli", shift20
, 20, 1020),
331 Operator ("CODE_FOR_ashlsi3", "shli", shift21
, 21, 1021),
332 Operator ("CODE_FOR_ashlsi3", "shli", shift22
, 22, 1022),
333 Operator ("CODE_FOR_ashlsi3", "shli", shift23
, 23, 1023),
334 Operator ("CODE_FOR_ashlsi3", "shli", shift24
, 24, 1024),
335 Operator ("CODE_FOR_ashlsi3", "shli", shift25
, 25, 1025),
336 Operator ("CODE_FOR_ashlsi3", "shli", shift26
, 26, 1026),
337 Operator ("CODE_FOR_ashlsi3", "shli", shift27
, 27, 1027),
338 Operator ("CODE_FOR_ashlsi3", "shli", shift28
, 28, 1028),
339 Operator ("CODE_FOR_ashlsi3", "shli", shift29
, 29, 1029),
340 Operator ("CODE_FOR_ashlsi3", "shli", shift30
, 30, 1030),
341 Operator ("CODE_FOR_ashlsi3", "shli", shift31
, 31, 1031)
344 static Operator ops
[] = {
345 Operator ("CODE_FOR_adddi3", "add", add
, 1070),
346 Operator ("CODE_FOR_subdi3", "sub", sub
, 1071),
347 Operator ("CODE_FOR_insn_shl1add", "shl1add", s1a
, 1072),
348 Operator ("CODE_FOR_insn_shl2add", "shl2add", s2a
, 1073),
349 Operator ("CODE_FOR_insn_shl3add", "shl3add", s3a
, 1074),
350 // Note: shl by 1 is not necessary, since adding a value to itself
351 // produces the same result. But the architecture team thinks left-shift
352 // may use slightly less power, so we might as well prefer it.
353 Operator ("CODE_FOR_ashldi3", "shli", shift1
, 1, 1001),
354 Operator ("CODE_FOR_ashldi3", "shli", shift2
, 2, 1002),
355 Operator ("CODE_FOR_ashldi3", "shli", shift3
, 3, 1003),
356 Operator ("CODE_FOR_ashldi3", "shli", shift4
, 4, 1004),
357 Operator ("CODE_FOR_ashldi3", "shli", shift5
, 5, 1005),
358 Operator ("CODE_FOR_ashldi3", "shli", shift6
, 6, 1006),
359 Operator ("CODE_FOR_ashldi3", "shli", shift7
, 7, 1007),
360 Operator ("CODE_FOR_ashldi3", "shli", shift8
, 8, 1008),
361 Operator ("CODE_FOR_ashldi3", "shli", shift9
, 9, 1009),
362 Operator ("CODE_FOR_ashldi3", "shli", shift10
, 10, 1010),
363 Operator ("CODE_FOR_ashldi3", "shli", shift11
, 11, 1011),
364 Operator ("CODE_FOR_ashldi3", "shli", shift12
, 12, 1012),
365 Operator ("CODE_FOR_ashldi3", "shli", shift13
, 13, 1013),
366 Operator ("CODE_FOR_ashldi3", "shli", shift14
, 14, 1014),
367 Operator ("CODE_FOR_ashldi3", "shli", shift15
, 15, 1015),
368 Operator ("CODE_FOR_ashldi3", "shli", shift16
, 16, 1016),
369 Operator ("CODE_FOR_ashldi3", "shli", shift17
, 17, 1017),
370 Operator ("CODE_FOR_ashldi3", "shli", shift18
, 18, 1018),
371 Operator ("CODE_FOR_ashldi3", "shli", shift19
, 19, 1019),
372 Operator ("CODE_FOR_ashldi3", "shli", shift20
, 20, 1020),
373 Operator ("CODE_FOR_ashldi3", "shli", shift21
, 21, 1021),
374 Operator ("CODE_FOR_ashldi3", "shli", shift22
, 22, 1022),
375 Operator ("CODE_FOR_ashldi3", "shli", shift23
, 23, 1023),
376 Operator ("CODE_FOR_ashldi3", "shli", shift24
, 24, 1024),
377 Operator ("CODE_FOR_ashldi3", "shli", shift25
, 25, 1025),
378 Operator ("CODE_FOR_ashldi3", "shli", shift26
, 26, 1026),
379 Operator ("CODE_FOR_ashldi3", "shli", shift27
, 27, 1027),
380 Operator ("CODE_FOR_ashldi3", "shli", shift28
, 28, 1028),
381 Operator ("CODE_FOR_ashldi3", "shli", shift29
, 29, 1029),
382 Operator ("CODE_FOR_ashldi3", "shli", shift30
, 30, 1030),
383 Operator ("CODE_FOR_ashldi3", "shli", shift31
, 31, 1031),
384 Operator ("CODE_FOR_ashldi3", "shli", shift32
, 32, 1032),
385 Operator ("CODE_FOR_ashldi3", "shli", shift33
, 33, 1033),
386 Operator ("CODE_FOR_ashldi3", "shli", shift34
, 34, 1034),
387 Operator ("CODE_FOR_ashldi3", "shli", shift35
, 35, 1035),
388 Operator ("CODE_FOR_ashldi3", "shli", shift36
, 36, 1036),
389 Operator ("CODE_FOR_ashldi3", "shli", shift37
, 37, 1037),
390 Operator ("CODE_FOR_ashldi3", "shli", shift38
, 38, 1038),
391 Operator ("CODE_FOR_ashldi3", "shli", shift39
, 39, 1039),
392 Operator ("CODE_FOR_ashldi3", "shli", shift40
, 40, 1040),
393 Operator ("CODE_FOR_ashldi3", "shli", shift41
, 41, 1041),
394 Operator ("CODE_FOR_ashldi3", "shli", shift42
, 42, 1042),
395 Operator ("CODE_FOR_ashldi3", "shli", shift43
, 43, 1043),
396 Operator ("CODE_FOR_ashldi3", "shli", shift44
, 44, 1044),
397 Operator ("CODE_FOR_ashldi3", "shli", shift45
, 45, 1045),
398 Operator ("CODE_FOR_ashldi3", "shli", shift46
, 46, 1046),
399 Operator ("CODE_FOR_ashldi3", "shli", shift47
, 47, 1047),
400 Operator ("CODE_FOR_ashldi3", "shli", shift48
, 48, 1048),
401 Operator ("CODE_FOR_ashldi3", "shli", shift49
, 49, 1049),
402 Operator ("CODE_FOR_ashldi3", "shli", shift50
, 50, 1050),
403 Operator ("CODE_FOR_ashldi3", "shli", shift51
, 51, 1051),
404 Operator ("CODE_FOR_ashldi3", "shli", shift52
, 52, 1052),
405 Operator ("CODE_FOR_ashldi3", "shli", shift53
, 53, 1053),
406 Operator ("CODE_FOR_ashldi3", "shli", shift54
, 54, 1054),
407 Operator ("CODE_FOR_ashldi3", "shli", shift55
, 55, 1055),
408 Operator ("CODE_FOR_ashldi3", "shli", shift56
, 56, 1056),
409 Operator ("CODE_FOR_ashldi3", "shli", shift57
, 57, 1057),
410 Operator ("CODE_FOR_ashldi3", "shli", shift58
, 58, 1058),
411 Operator ("CODE_FOR_ashldi3", "shli", shift59
, 59, 1059),
412 Operator ("CODE_FOR_ashldi3", "shli", shift60
, 60, 1060),
413 Operator ("CODE_FOR_ashldi3", "shli", shift61
, 61, 1061),
414 Operator ("CODE_FOR_ashldi3", "shli", shift62
, 62, 1062),
415 Operator ("CODE_FOR_ashldi3", "shli", shift63
, 63, 1063)
419 /* An ExpressionTree is an expression DAG. */
423 ExpressionTree () : m_num_vals (2)
425 m_exprs
[0].m_produced_val
= 0;
426 m_exprs
[1].m_produced_val
= 1;
429 void copy_technique_from (ExpressionTree
* other
)
431 /* TODO: write this; other can compute the same products with less
432 cost. We update this ExpressionTree object because some int is
433 already mapped to it. */
437 Expr m_exprs
[MAX_SUBEXPRS
];
442 for (int j
= 2; j
< m_num_vals
; j
++)
443 cost
+= m_exprs
[j
].m_op
->m_cost
;
444 return cost
+ m_exprs
[m_num_vals
- 1].m_critical_path_length
* 1000000;
449 typedef std::map
<MUL_TYPE
, ExpressionTree
*> ExpressionTreeMap
;
453 find_sequences (ExpressionTree
&s
, ExpressionTreeMap
&best_solution
)
455 /* Don't look more if we can't add any new values to the expression
457 const int num_vals
= s
.m_num_vals
;
458 if (num_vals
== MAX_SUBEXPRS
)
461 /* Grow array to make room for new values added as we loop. */
462 s
.m_num_vals
= num_vals
+ 1;
464 const Operator
*const prev_op
= s
.m_exprs
[num_vals
- 1].m_op
;
465 const int prev_top_index
= (prev_op
!= NULL
) ? prev_op
->m_top_index
: -1;
467 for (size_t f
= 0; f
< ARRAY_SIZE (ops
); f
++)
469 const Operator
*const op
= &ops
[f
];
471 for (int i
= 0; i
< num_vals
; i
++)
473 /* Only allow zero as the first operand to sub, otherwise
475 if (i
== 0 && op
->m_binary_func
!= sub
)
478 /* Unary ops don't actually use the second operand, so as a
479 big hack we trick it into only looping once in the inner
481 const int j_end
= op
->is_unary () ? 2 : num_vals
;
483 /* We never allow zero as the second operand, as it is
485 for (int j
= 1; j
< j_end
; j
++)
487 /* Does this expression use the immediately previous
489 const bool uses_prev_value
=
491 || (!op
->is_unary () && j
== num_vals
- 1));
493 if (!uses_prev_value
)
495 /* For search efficiency, prune redundant
496 instruction orderings.
498 This op does not take the immediately preceding
499 value as input, which means we could have done it
500 in the previous slot. If its opcode is less than
501 the previous instruction's, we'll declare this
502 instruction order non-canonical and not pursue
503 this branch of the search. */
504 if (op
->m_top_index
< prev_top_index
)
511 n
= op
->m_unary_func (s
.m_exprs
[i
].m_produced_val
);
515 n
= op
->m_binary_func (s
.m_exprs
[i
].m_produced_val
,
516 s
.m_exprs
[j
].m_produced_val
);
519 bool duplicate
= false;
520 for (int k
= 0; k
< num_vals
; k
++)
521 if (n
== s
.m_exprs
[j
].m_produced_val
)
530 /* See if we found the best solution for n. */
531 Expr
*e
= &s
.m_exprs
[num_vals
];
534 e
->m_rhs
= op
->is_unary () ? op
->m_rhs_if_unary
: j
;
535 e
->m_produced_val
= n
;
536 e
->m_critical_path_length
=
537 1 + MAX (s
.m_exprs
[i
].m_critical_path_length
,
538 s
.m_exprs
[j
].m_critical_path_length
);
540 ExpressionTreeMap::iterator
best (best_solution
.find (n
));
541 if (best
== best_solution
.end ()
542 || (*best
).second
->cost () > s
.cost ())
543 best_solution
[n
] = new ExpressionTree (s
);
545 /* Recurse and find more. */
546 find_sequences (s
, best_solution
);
551 /* Restore old size. */
552 s
.m_num_vals
= num_vals
;
556 /* For each insn_code used by an operator, choose a compact number so
557 it takes less space in the output data structure. This prints out a
558 lookup table used to map the compactified number back to an
561 create_insn_code_compression_table ()
565 /* Entry 0 must hold CODE_FOR_nothing to mean "end of array". */
566 printf ("const enum insn_code %s_multiply_insn_seq_decode_opcode[] = {\n"
567 " CODE_FOR_nothing /* must be first */ ", ARCH
);
569 for (size_t i
= 0; i
< ARRAY_SIZE (ops
); i
++)
571 Operator
*op
= &ops
[i
];
574 /* See if some previous Operator was using the same insn_code.
575 If so, reuse its table entry. */
576 for (size_t j
= 0; j
< i
; j
++)
578 Operator
*old
= &ops
[j
];
579 if (strcmp (old
->m_pattern
, op
->m_pattern
) == 0)
581 index
= old
->m_top_index
;
588 /* We need to make a new entry in the table. */
589 printf (",\n %s", op
->m_pattern
);
590 index
= next_index
++;
593 op
->m_top_index
= index
;
600 /* These are constants we've seen in code, that we want to keep table
602 static int multiply_constants_used
[] = {
1064 const int num_mult_constants_used
=
1065 (int)(sizeof multiply_constants_used
1066 / sizeof multiply_constants_used
[0]);
1069 #define XSIZE (sizeof multiply_constants_used / \
1070 sizeof multiply_constants_used[0] + 32) / 32
1071 unsigned multiply_constants_avail
[XSIZE
];
1075 /* bsearch helper function. */
1077 compare_constants (const void *key
, const void *t
)
1079 return (*(int*)key
) - *((int*)t
);
1084 find_mult_constants_used (int multiplier
)
1086 return (int *) bsearch (&multiplier
, multiply_constants_used
,
1087 num_mult_constants_used
,
1088 sizeof multiply_constants_used
[0],
1093 int num_ops (ExpressionTree
*s
)
1096 for (int i
= 0; i
< s
->m_num_vals
; i
++)
1098 Expr
*e
= &s
->m_exprs
[i
];
1099 if (e
->m_op
!= NULL
)
1108 tilepro_emit (int multiplier
, int num_ops
)
1110 int abs_multiplier
= (multiplier
>= 0) ? multiplier
: -multiplier
;
1112 /* Keep constants in range [-1024, 1024]. */
1113 if (abs_multiplier
<= 1024)
1116 /* Keep constants near powers of two. */
1117 int prev_pow2
= 1 << (31 - __builtin_clz (abs_multiplier
));
1118 int next_pow2
= prev_pow2
<< 1;
1120 if ((abs_multiplier
- prev_pow2
<= 10)
1121 || (next_pow2
- abs_multiplier
<= 10))
1124 /* Keep constants near powers of ten. */
1127 long long prev_pow10
;
1128 long long next_pow10
;
1130 while (((j
* 10) < abs_multiplier
)
1135 next_pow10
= j
* 10;
1137 if ((abs_multiplier
- prev_pow10
<= 10)
1138 || (next_pow10
- abs_multiplier
<= 10))
1142 /* Keep short sequences that have two or fewer ops. */
1146 /* Keep constants that are mostly 0's or mostly 1's. */
1147 if (__builtin_popcount (multiplier
) <= 2 ||
1148 __builtin_popcount (multiplier
) >= 30)
1151 /* Keep constants seen in actual code. */
1152 if ((find_mult_constants_used (multiplier
)))
1159 tilegx_emit (long long multiplier
, int num_ops
)
1161 long long abs_multiplier
= (multiplier
>= 0) ? multiplier
: - multiplier
;
1163 /* Keep constants in range [-1024, 1024]. */
1164 if (abs_multiplier
<= 1024)
1167 /* Otherwise exclude sequences with four ops. */
1171 /* Keep constants near powers of two. */
1173 unsigned long long prev_pow2
=
1174 1LL << (63 - __builtin_clzll (abs_multiplier
));
1175 unsigned long long next_pow2
= prev_pow2
<< 1;
1177 /* handle overflow case. */
1180 if (prev_pow2
- abs_multiplier
<= 10)
1183 else if ((abs_multiplier
- prev_pow2
<= 10)
1184 || (next_pow2
- abs_multiplier
<= 10))
1188 /* Keep constants near powers of ten. */
1191 long long prev_pow10
;
1192 long long next_pow10
;
1194 while (((j
* 10) < abs_multiplier
)
1199 next_pow10
= j
* 10;
1201 if ((abs_multiplier
- prev_pow10
<= 100)
1203 && (next_pow10
- abs_multiplier
<= 100)))
1210 /* Keep constants that are mostly 0's or mostly 1's. */
1211 if (__builtin_popcountll (multiplier
) <= 2 ||
1212 __builtin_popcountll (multiplier
) >= 62)
1215 /* Keep constants seen in actual code. */
1216 if (find_mult_constants_used (multiplier
))
1227 ExpressionTreeMap best_solution
;
1231 printf ("/* Constant multiply table for TILEPro.\n");
1233 printf ("/* Constant multiply table for TILE-Gx.\n");
1235 printf (" Copyright (C) 2011-2022 Free Software Foundation, Inc.\n");
1236 printf (" Contributed by Walter Lee (walt@tilera.com)\n");
1238 printf (" This file is part of GCC.\n");
1240 printf (" GCC is free software; you can redistribute it and/or modify it\n");
1241 printf (" under the terms of the GNU General Public License as published\n");
1242 printf (" by the Free Software Foundation; either version 3, or (at your\n");
1243 printf (" option) any later version.\n");
1245 printf (" GCC is distributed in the hope that it will be useful, but WITHOUT\n");
1246 printf (" ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n");
1247 printf (" or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public\n");
1248 printf (" License for more details.\n");
1250 printf (" You should have received a copy of the GNU General Public License\n");
1251 printf (" along with GCC; see the file COPYING3. If not see\n");
1252 printf (" <http://www.gnu.org/licenses/>. */\n");
1254 printf ("/* Note this file is auto-generated from gen-mul-tables.cc.\n");
1255 printf (" Make any required changes there. */\n");
1257 printf ("#define IN_TARGET_CODE 1\n");
1259 printf ("#include \"config.h\"\n");
1260 printf ("#include \"system.h\"\n");
1261 printf ("#include \"coretypes.h\"\n");
1262 printf ("#include \"backend.h\"\n");
1263 printf ("#include \"rtl.h\"\n");
1264 printf ("#include \"expmed.h\"\n");
1265 printf ("#include \"%s-multiply.h\"\n\n", ARCH
);
1266 create_insn_code_compression_table ();
1268 /* Try all combinations of operators and see what constants we
1269 produce. For each possible constant, record the most efficient
1270 way to generate it. */
1271 find_sequences (s
, best_solution
);
1273 printf ("const struct %s_multiply_insn_seq "
1274 "%s_multiply_insn_seq_table[] = {\n",
1277 const char *comma_separator
= "";
1279 ExpressionTreeMap::iterator
i (best_solution
.begin ());
1280 for (; i
!= best_solution
.end (); ++i
)
1282 ExpressionTree
*s
= (*i
).second
;
1283 const MUL_TYPE n
= (*i
).first
;
1285 if (n
== 0 || n
== 1)
1287 /* Both of these require zero operations, so these entries
1288 are bogus and should never be used. */
1292 /* Prune the list of entries to keep the table to a reasonable
1295 if (!tilepro_emit (n
, num_ops (s
)))
1298 if (!tilegx_emit (n
, num_ops (s
)))
1302 printf ("%s", comma_separator
);
1305 const MUL_TYPE int_min
= INT32_MIN
;
1307 const MUL_TYPE int_min
= INT64_MIN
;
1311 /* Handle C's woeful lack of unary negation. Without this,
1312 printing out INT_MIN in decimal will yield an unsigned
1313 int which could generate a compiler warning. */
1315 printf (" {%d - 1 /* 0x%x */ ,\n {", n
+ 1,
1318 printf (" {%lldll - 1 /* 0x%llx */ ,\n {", n
+ 1,
1319 (unsigned MUL_TYPE
) n
);
1325 printf (" {%d /* 0x%x */ ,\n {", n
, (unsigned) n
);
1327 printf (" {%lldll /* 0x%llx */ ,\n {", n
, (unsigned MUL_TYPE
) n
);
1332 for (int j
= 0; j
< s
->m_num_vals
; j
++)
1334 Expr
*e
= &s
->m_exprs
[j
];
1336 const Operator
*op
= e
->m_op
;
1341 snprintf (buf
, sizeof buf
, "%s{%d, %d, %d}%s",
1344 e
->m_lhs
, e
->m_rhs
, (j
+ 1) == s
->m_num_vals
? "}" : ",");
1348 snprintf (opnd0
, sizeof opnd0
, "r%d", e
->m_lhs
);
1350 snprintf (opnd0
, sizeof opnd0
, "zero");
1352 printf ("%s\t\t\t/* %s r%d, %s, %s%d */\n",
1353 buf
, op
->m_name
, j
, opnd0
,
1354 op
->is_unary () ? "" : "r", e
->m_rhs
);
1359 comma_separator
= ",\n";
1362 printf ("\n};\n\n");
1363 printf ("const int %s_multiply_insn_seq_table_size =\n"
1364 " (int) (sizeof %s_multiply_insn_seq_table\n"
1365 " / sizeof %s_multiply_insn_seq_table[0]);\n",
1368 return EXIT_SUCCESS
;