]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/tilepro/gen-mul-tables.cc
testsuite, Darwin: Fix darwin-comm-1.c error messages for Darwin <= 10.
[thirdparty/gcc.git] / gcc / config / tilepro / gen-mul-tables.cc
1 /* Multiply table generator for tile.
2 Copyright (C) 2011-2022 Free Software Foundation, Inc.
3 Contributed by Walter Lee (walt@tilera.com)
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published
9 by the Free Software Foundation; either version 3, or (at your
10 option) any later version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 /* This program creates a table used to compile multiply by constant
22 efficiently.
23
24 This program should be compiled by a c++ compiler. If it's
25 compiled with -DTILEPRO, it generates the multiply table for
26 TILEPro; otherwise it generates the multiply table for TILE-Gx.
27 Running the program produces the table in stdout.
28
29 The program works by generating every possible combination of up to
30 MAX_INSTRUCTIONS linear operators (such as add, sub, s2a, left
31 shift) and computing the multiplier computed by those instructions.
32 For example,
33
34 s2a r2,r1,r1
35 s2a r3,r2,r2
36
37 multiplies r1 by 25.
38
39 There are usually multiple instruction sequences to multiply by a
40 given constant. This program keeps only the cheapest one.
41 "Cheapest" is defined first by the minimum theoretical schedule
42 length, and if those are equal then by the number of instructions,
43 and if those are equal then by which instructions we "prefer"
44 (e.g. because we think one might take infinitesimally less power
45 than another, or simply because we arbitrarily pick one to be more
46 canonical).
47
48 Once this program has determined the best instruction sequence for
49 each multiplier, it emits them in a table sorted by the multiplier
50 so the user can binary-search it to look for a match. The table is
51 pruned based on various criteria to keep its sizes reasonable. */
52
53 #include <stdio.h>
54 #include <stdlib.h>
55 #include <assert.h>
56 #include <string.h>
57 #define __STDC_LIMIT_MACROS
58 #include <stdint.h>
59
60 #include <map>
61
62 #ifdef TILEPRO
63
64 /* The string representing the architecture. */
65 #define ARCH "tilepro"
66
67 /* The type for the multiplication. */
68 typedef int MUL_TYPE;
69
70 #else
71
72 /* The string representing the architecture. */
73 #define ARCH "tilegx"
74
75 /* The type for the multiplication. */
76 typedef long long MUL_TYPE;
77
78 #endif
79
80 /* Longest instruction sequence this will produce. With the current
81 stupid algorithm runtime grows very quickly with this number. */
82 #define MAX_INSTRUCTIONS 4
83
84 /* Maximum number of subexpressions in the expression DAG being
85 generated. This is the same as the number of instructions, except
86 that zero and the original register we'd like to multiply by a
87 constant are also thrown into the mix. */
88 #define MAX_SUBEXPRS (2 + MAX_INSTRUCTIONS)
89
90 #define MIN(x, y) ((x) <= (y) ? (x) : (y))
91 #define MAX(x, y) ((x) >= (y) ? (x) : (y))
92
93 #define ARRAY_SIZE(a) (sizeof (a) / sizeof ((a)[0]))
94
95 /* For this program a unary op is one which has only one nonconstant
96 operand. So shift left by 5 is considered unary. */
97 typedef MUL_TYPE (*unary_op_func) (MUL_TYPE);
98 typedef MUL_TYPE (*binary_op_func) (MUL_TYPE, MUL_TYPE);
99
100 /* This describes an operation like 'add two registers' or 'left-shift
101 by 7'.
102
103 We call something a 'unary' operator if it only takes in one
104 register as input, even though it might have an implicit second
105 constant operand. Currently this is used for left-shift by
106 constant. */
107 class Operator
108 {
109 public:
110 /* Construct for a binary operator. */
111 Operator (const char *pattern, const char *name, binary_op_func func,
112 int cost)
113 : m_pattern (pattern), m_name (name), m_top_index (-1),
114 m_unary_func (0), m_binary_func (func), m_cost (cost),
115 m_rhs_if_unary (0)
116 {
117 }
118
119 /* Construct for a unary operator. */
120 Operator (const char *pattern, const char *name, unary_op_func func,
121 int rhs_if_unary, int cost)
122 : m_pattern (pattern), m_name (name), m_top_index (-1),
123 m_unary_func (func), m_binary_func (0), m_cost (cost),
124 m_rhs_if_unary (rhs_if_unary)
125 {
126 }
127
128 bool is_unary () const
129 {
130 return m_binary_func == NULL;
131 }
132
133 /* Name of the pattern for this operation, e.g. CODE_FOR_addsi3. */
134 const char *m_pattern;
135
136 /* Name of the opcode for this operation, e.g. add. */
137 const char *m_name;
138
139 /* We don't have enough bits in our output representation to store
140 the original insn_code value, so we store a compressed form
141 instead. These values are decoded back into insn_code via the
142 machine-generated multiply_insn_seq_decode_opcode lookup
143 table. */
144 int m_top_index;
145
146 /* Unary operator to apply, or NULL if this is a binary operator. */
147 unary_op_func m_unary_func;
148
149 /* Binary operator to apply, or NULL if this is a unary operator. */
150 binary_op_func m_binary_func;
151
152 /* Function of how expensive we consider this operator. Higher is
153 worse. */
154 int m_cost;
155
156 /* the RHS value to write into the C file if unary; used for shift
157 count. */
158 int m_rhs_if_unary;
159 };
160
161
162 /* An entry in an expression DAG. */
163 class Expr
164 {
165 public:
166 Expr () : m_op (NULL), m_lhs (0), m_rhs (0), m_produced_val (0),
167 m_critical_path_length (0)
168 {
169 }
170
171 /* The operator being applied to the operands. */
172 const Operator *m_op;
173
174 /* The index of the left-hand operand in the array of subexpressions
175 already computed. */
176 int m_lhs;
177
178 /* For binary ops ,this is the index of the left-hand operand in the
179 array of subexpressions already computed. For unary ops, it is
180 specific to the op (e.g. it might hold a constant shift
181 count). */
182 int m_rhs;
183
184 /* The multiplier produced by this expression tree. For example, for
185 the tree ((x << 5) + x), the value would be 33. */
186 MUL_TYPE m_produced_val;
187
188 /* How far is this expression from the root, i.e. how many cycles
189 minimum will it take to compute this? */
190 int m_critical_path_length;
191 };
192
193
194 /* Make function pointers for the various linear operators we can
195 apply to compute a multiplicative value. */
196
197 static MUL_TYPE
198 add (MUL_TYPE n1, MUL_TYPE n2)
199 {
200 return n1 + n2;
201 }
202
203 static MUL_TYPE
204 sub (MUL_TYPE n1, MUL_TYPE n2)
205 {
206 return n1 - n2;
207 }
208
209 static MUL_TYPE
210 s1a (MUL_TYPE n1, MUL_TYPE n2)
211 {
212 return n1 * 2 + n2;
213 }
214
215 static MUL_TYPE
216 s2a (MUL_TYPE n1, MUL_TYPE n2)
217 {
218 return n1 * 4 + n2;
219 }
220
221 static MUL_TYPE
222 s3a (MUL_TYPE n1, MUL_TYPE n2)
223 {
224 return n1 * 8 + n2;
225 }
226
227 #define SHIFT(count) \
228 static MUL_TYPE \
229 shift##count(MUL_TYPE n) \
230 { \
231 return n << (count); \
232 }
233
234 SHIFT (1);
235 SHIFT (2);
236 SHIFT (3);
237 SHIFT (4);
238 SHIFT (5);
239 SHIFT (6);
240 SHIFT (7);
241 SHIFT (8);
242 SHIFT (9);
243 SHIFT (10);
244 SHIFT (11);
245 SHIFT (12);
246 SHIFT (13);
247 SHIFT (14);
248 SHIFT (15);
249 SHIFT (16);
250 SHIFT (17);
251 SHIFT (18);
252 SHIFT (19);
253 SHIFT (20);
254 SHIFT (21);
255 SHIFT (22);
256 SHIFT (23);
257 SHIFT (24);
258 SHIFT (25);
259 SHIFT (26);
260 SHIFT (27);
261 SHIFT (28);
262 SHIFT (29);
263 SHIFT (30);
264 SHIFT (31);
265 #ifndef TILEPRO
266 SHIFT (32);
267 SHIFT (33);
268 SHIFT (34);
269 SHIFT (35);
270 SHIFT (36);
271 SHIFT (37);
272 SHIFT (38);
273 SHIFT (39);
274 SHIFT (40);
275 SHIFT (41);
276 SHIFT (42);
277 SHIFT (43);
278 SHIFT (44);
279 SHIFT (45);
280 SHIFT (46);
281 SHIFT (47);
282 SHIFT (48);
283 SHIFT (49);
284 SHIFT (50);
285 SHIFT (51);
286 SHIFT (52);
287 SHIFT (53);
288 SHIFT (54);
289 SHIFT (55);
290 SHIFT (56);
291 SHIFT (57);
292 SHIFT (58);
293 SHIFT (59);
294 SHIFT (60);
295 SHIFT (61);
296 SHIFT (62);
297 SHIFT (63);
298 #endif
299
300 #ifdef TILEPRO
301 static Operator ops[] = {
302 Operator ("CODE_FOR_addsi3", "add", add, 1040),
303 Operator ("CODE_FOR_subsi3", "sub", sub, 1041),
304 Operator ("CODE_FOR_insn_s1a", "s1a", s1a, 1042),
305 Operator ("CODE_FOR_insn_s2a", "s2a", s2a, 1043),
306 Operator ("CODE_FOR_insn_s3a", "s3a", s3a, 1044),
307 /* Note: shl by 1 is not necessary, since adding a value to itself
308 produces the same result. But the architecture team thinks
309 left-shift may use slightly less power, so we might as well
310 prefer it. */
311 Operator ("CODE_FOR_ashlsi3", "shli", shift1, 1, 1001),
312 Operator ("CODE_FOR_ashlsi3", "shli", shift2, 2, 1002),
313 Operator ("CODE_FOR_ashlsi3", "shli", shift3, 3, 1003),
314 Operator ("CODE_FOR_ashlsi3", "shli", shift4, 4, 1004),
315 Operator ("CODE_FOR_ashlsi3", "shli", shift5, 5, 1005),
316 Operator ("CODE_FOR_ashlsi3", "shli", shift6, 6, 1006),
317 Operator ("CODE_FOR_ashlsi3", "shli", shift7, 7, 1007),
318 Operator ("CODE_FOR_ashlsi3", "shli", shift8, 8, 1008),
319 Operator ("CODE_FOR_ashlsi3", "shli", shift9, 9, 1009),
320 Operator ("CODE_FOR_ashlsi3", "shli", shift10, 10, 1010),
321 Operator ("CODE_FOR_ashlsi3", "shli", shift11, 11, 1011),
322 Operator ("CODE_FOR_ashlsi3", "shli", shift12, 12, 1012),
323 Operator ("CODE_FOR_ashlsi3", "shli", shift13, 13, 1013),
324 Operator ("CODE_FOR_ashlsi3", "shli", shift14, 14, 1014),
325 Operator ("CODE_FOR_ashlsi3", "shli", shift15, 15, 1015),
326 Operator ("CODE_FOR_ashlsi3", "shli", shift16, 16, 1016),
327 Operator ("CODE_FOR_ashlsi3", "shli", shift17, 17, 1017),
328 Operator ("CODE_FOR_ashlsi3", "shli", shift18, 18, 1018),
329 Operator ("CODE_FOR_ashlsi3", "shli", shift19, 19, 1019),
330 Operator ("CODE_FOR_ashlsi3", "shli", shift20, 20, 1020),
331 Operator ("CODE_FOR_ashlsi3", "shli", shift21, 21, 1021),
332 Operator ("CODE_FOR_ashlsi3", "shli", shift22, 22, 1022),
333 Operator ("CODE_FOR_ashlsi3", "shli", shift23, 23, 1023),
334 Operator ("CODE_FOR_ashlsi3", "shli", shift24, 24, 1024),
335 Operator ("CODE_FOR_ashlsi3", "shli", shift25, 25, 1025),
336 Operator ("CODE_FOR_ashlsi3", "shli", shift26, 26, 1026),
337 Operator ("CODE_FOR_ashlsi3", "shli", shift27, 27, 1027),
338 Operator ("CODE_FOR_ashlsi3", "shli", shift28, 28, 1028),
339 Operator ("CODE_FOR_ashlsi3", "shli", shift29, 29, 1029),
340 Operator ("CODE_FOR_ashlsi3", "shli", shift30, 30, 1030),
341 Operator ("CODE_FOR_ashlsi3", "shli", shift31, 31, 1031)
342 };
343 #else
344 static Operator ops[] = {
345 Operator ("CODE_FOR_adddi3", "add", add, 1070),
346 Operator ("CODE_FOR_subdi3", "sub", sub, 1071),
347 Operator ("CODE_FOR_insn_shl1add", "shl1add", s1a, 1072),
348 Operator ("CODE_FOR_insn_shl2add", "shl2add", s2a, 1073),
349 Operator ("CODE_FOR_insn_shl3add", "shl3add", s3a, 1074),
350 // Note: shl by 1 is not necessary, since adding a value to itself
351 // produces the same result. But the architecture team thinks left-shift
352 // may use slightly less power, so we might as well prefer it.
353 Operator ("CODE_FOR_ashldi3", "shli", shift1, 1, 1001),
354 Operator ("CODE_FOR_ashldi3", "shli", shift2, 2, 1002),
355 Operator ("CODE_FOR_ashldi3", "shli", shift3, 3, 1003),
356 Operator ("CODE_FOR_ashldi3", "shli", shift4, 4, 1004),
357 Operator ("CODE_FOR_ashldi3", "shli", shift5, 5, 1005),
358 Operator ("CODE_FOR_ashldi3", "shli", shift6, 6, 1006),
359 Operator ("CODE_FOR_ashldi3", "shli", shift7, 7, 1007),
360 Operator ("CODE_FOR_ashldi3", "shli", shift8, 8, 1008),
361 Operator ("CODE_FOR_ashldi3", "shli", shift9, 9, 1009),
362 Operator ("CODE_FOR_ashldi3", "shli", shift10, 10, 1010),
363 Operator ("CODE_FOR_ashldi3", "shli", shift11, 11, 1011),
364 Operator ("CODE_FOR_ashldi3", "shli", shift12, 12, 1012),
365 Operator ("CODE_FOR_ashldi3", "shli", shift13, 13, 1013),
366 Operator ("CODE_FOR_ashldi3", "shli", shift14, 14, 1014),
367 Operator ("CODE_FOR_ashldi3", "shli", shift15, 15, 1015),
368 Operator ("CODE_FOR_ashldi3", "shli", shift16, 16, 1016),
369 Operator ("CODE_FOR_ashldi3", "shli", shift17, 17, 1017),
370 Operator ("CODE_FOR_ashldi3", "shli", shift18, 18, 1018),
371 Operator ("CODE_FOR_ashldi3", "shli", shift19, 19, 1019),
372 Operator ("CODE_FOR_ashldi3", "shli", shift20, 20, 1020),
373 Operator ("CODE_FOR_ashldi3", "shli", shift21, 21, 1021),
374 Operator ("CODE_FOR_ashldi3", "shli", shift22, 22, 1022),
375 Operator ("CODE_FOR_ashldi3", "shli", shift23, 23, 1023),
376 Operator ("CODE_FOR_ashldi3", "shli", shift24, 24, 1024),
377 Operator ("CODE_FOR_ashldi3", "shli", shift25, 25, 1025),
378 Operator ("CODE_FOR_ashldi3", "shli", shift26, 26, 1026),
379 Operator ("CODE_FOR_ashldi3", "shli", shift27, 27, 1027),
380 Operator ("CODE_FOR_ashldi3", "shli", shift28, 28, 1028),
381 Operator ("CODE_FOR_ashldi3", "shli", shift29, 29, 1029),
382 Operator ("CODE_FOR_ashldi3", "shli", shift30, 30, 1030),
383 Operator ("CODE_FOR_ashldi3", "shli", shift31, 31, 1031),
384 Operator ("CODE_FOR_ashldi3", "shli", shift32, 32, 1032),
385 Operator ("CODE_FOR_ashldi3", "shli", shift33, 33, 1033),
386 Operator ("CODE_FOR_ashldi3", "shli", shift34, 34, 1034),
387 Operator ("CODE_FOR_ashldi3", "shli", shift35, 35, 1035),
388 Operator ("CODE_FOR_ashldi3", "shli", shift36, 36, 1036),
389 Operator ("CODE_FOR_ashldi3", "shli", shift37, 37, 1037),
390 Operator ("CODE_FOR_ashldi3", "shli", shift38, 38, 1038),
391 Operator ("CODE_FOR_ashldi3", "shli", shift39, 39, 1039),
392 Operator ("CODE_FOR_ashldi3", "shli", shift40, 40, 1040),
393 Operator ("CODE_FOR_ashldi3", "shli", shift41, 41, 1041),
394 Operator ("CODE_FOR_ashldi3", "shli", shift42, 42, 1042),
395 Operator ("CODE_FOR_ashldi3", "shli", shift43, 43, 1043),
396 Operator ("CODE_FOR_ashldi3", "shli", shift44, 44, 1044),
397 Operator ("CODE_FOR_ashldi3", "shli", shift45, 45, 1045),
398 Operator ("CODE_FOR_ashldi3", "shli", shift46, 46, 1046),
399 Operator ("CODE_FOR_ashldi3", "shli", shift47, 47, 1047),
400 Operator ("CODE_FOR_ashldi3", "shli", shift48, 48, 1048),
401 Operator ("CODE_FOR_ashldi3", "shli", shift49, 49, 1049),
402 Operator ("CODE_FOR_ashldi3", "shli", shift50, 50, 1050),
403 Operator ("CODE_FOR_ashldi3", "shli", shift51, 51, 1051),
404 Operator ("CODE_FOR_ashldi3", "shli", shift52, 52, 1052),
405 Operator ("CODE_FOR_ashldi3", "shli", shift53, 53, 1053),
406 Operator ("CODE_FOR_ashldi3", "shli", shift54, 54, 1054),
407 Operator ("CODE_FOR_ashldi3", "shli", shift55, 55, 1055),
408 Operator ("CODE_FOR_ashldi3", "shli", shift56, 56, 1056),
409 Operator ("CODE_FOR_ashldi3", "shli", shift57, 57, 1057),
410 Operator ("CODE_FOR_ashldi3", "shli", shift58, 58, 1058),
411 Operator ("CODE_FOR_ashldi3", "shli", shift59, 59, 1059),
412 Operator ("CODE_FOR_ashldi3", "shli", shift60, 60, 1060),
413 Operator ("CODE_FOR_ashldi3", "shli", shift61, 61, 1061),
414 Operator ("CODE_FOR_ashldi3", "shli", shift62, 62, 1062),
415 Operator ("CODE_FOR_ashldi3", "shli", shift63, 63, 1063)
416 };
417 #endif
418
419 /* An ExpressionTree is an expression DAG. */
420 class ExpressionTree
421 {
422 public:
423 ExpressionTree () : m_num_vals (2)
424 {
425 m_exprs[0].m_produced_val = 0;
426 m_exprs[1].m_produced_val = 1;
427 }
428
429 void copy_technique_from (ExpressionTree * other)
430 {
431 /* TODO: write this; other can compute the same products with less
432 cost. We update this ExpressionTree object because some int is
433 already mapped to it. */
434 }
435
436 int m_num_vals;
437 Expr m_exprs[MAX_SUBEXPRS];
438
439 int cost () const
440 {
441 int cost = 0;
442 for (int j = 2; j < m_num_vals; j++)
443 cost += m_exprs[j].m_op->m_cost;
444 return cost + m_exprs[m_num_vals - 1].m_critical_path_length * 1000000;
445 }
446 };
447
448
449 typedef std::map<MUL_TYPE, ExpressionTree *> ExpressionTreeMap;
450
451
452 static void
453 find_sequences (ExpressionTree &s, ExpressionTreeMap &best_solution)
454 {
455 /* Don't look more if we can't add any new values to the expression
456 tree. */
457 const int num_vals = s.m_num_vals;
458 if (num_vals == MAX_SUBEXPRS)
459 return;
460
461 /* Grow array to make room for new values added as we loop. */
462 s.m_num_vals = num_vals + 1;
463
464 const Operator *const prev_op = s.m_exprs[num_vals - 1].m_op;
465 const int prev_top_index = (prev_op != NULL) ? prev_op->m_top_index : -1;
466
467 for (size_t f = 0; f < ARRAY_SIZE (ops); f++)
468 {
469 const Operator *const op = &ops[f];
470
471 for (int i = 0; i < num_vals; i++)
472 {
473 /* Only allow zero as the first operand to sub, otherwise
474 it is useless. */
475 if (i == 0 && op->m_binary_func != sub)
476 continue;
477
478 /* Unary ops don't actually use the second operand, so as a
479 big hack we trick it into only looping once in the inner
480 loop. */
481 const int j_end = op->is_unary () ? 2 : num_vals;
482
483 /* We never allow zero as the second operand, as it is
484 always useless. */
485 for (int j = 1; j < j_end; j++)
486 {
487 /* Does this expression use the immediately previous
488 expression? */
489 const bool uses_prev_value =
490 (i == num_vals - 1
491 || (!op->is_unary () && j == num_vals - 1));
492
493 if (!uses_prev_value)
494 {
495 /* For search efficiency, prune redundant
496 instruction orderings.
497
498 This op does not take the immediately preceding
499 value as input, which means we could have done it
500 in the previous slot. If its opcode is less than
501 the previous instruction's, we'll declare this
502 instruction order non-canonical and not pursue
503 this branch of the search. */
504 if (op->m_top_index < prev_top_index)
505 continue;
506 }
507
508 MUL_TYPE n;
509 if (op->is_unary ())
510 {
511 n = op->m_unary_func (s.m_exprs[i].m_produced_val);
512 }
513 else
514 {
515 n = op->m_binary_func (s.m_exprs[i].m_produced_val,
516 s.m_exprs[j].m_produced_val);
517 }
518
519 bool duplicate = false;
520 for (int k = 0; k < num_vals; k++)
521 if (n == s.m_exprs[j].m_produced_val)
522 {
523 duplicate = true;
524 break;
525 }
526
527 if (duplicate)
528 continue;
529
530 /* See if we found the best solution for n. */
531 Expr *e = &s.m_exprs[num_vals];
532 e->m_op = op;
533 e->m_lhs = i;
534 e->m_rhs = op->is_unary () ? op->m_rhs_if_unary : j;
535 e->m_produced_val = n;
536 e->m_critical_path_length =
537 1 + MAX (s.m_exprs[i].m_critical_path_length,
538 s.m_exprs[j].m_critical_path_length);
539
540 ExpressionTreeMap::iterator best (best_solution.find (n));
541 if (best == best_solution.end ()
542 || (*best).second->cost () > s.cost ())
543 best_solution[n] = new ExpressionTree (s);
544
545 /* Recurse and find more. */
546 find_sequences (s, best_solution);
547 }
548 }
549 }
550
551 /* Restore old size. */
552 s.m_num_vals = num_vals;
553 }
554
555
556 /* For each insn_code used by an operator, choose a compact number so
557 it takes less space in the output data structure. This prints out a
558 lookup table used to map the compactified number back to an
559 insn_code. */
560 static void
561 create_insn_code_compression_table ()
562 {
563 int next_index = 1;
564
565 /* Entry 0 must hold CODE_FOR_nothing to mean "end of array". */
566 printf ("const enum insn_code %s_multiply_insn_seq_decode_opcode[] = {\n"
567 " CODE_FOR_nothing /* must be first */ ", ARCH);
568
569 for (size_t i = 0; i < ARRAY_SIZE (ops); i++)
570 {
571 Operator *op = &ops[i];
572 int index = -1;
573
574 /* See if some previous Operator was using the same insn_code.
575 If so, reuse its table entry. */
576 for (size_t j = 0; j < i; j++)
577 {
578 Operator *old = &ops[j];
579 if (strcmp (old->m_pattern, op->m_pattern) == 0)
580 {
581 index = old->m_top_index;
582 break;
583 }
584 }
585
586 if (index == -1)
587 {
588 /* We need to make a new entry in the table. */
589 printf (",\n %s", op->m_pattern);
590 index = next_index++;
591 }
592
593 op->m_top_index = index;
594 }
595
596 printf ("\n};\n\n");
597 }
598
599
600 /* These are constants we've seen in code, that we want to keep table
601 entries for. */
602 static int multiply_constants_used[] = {
603 -11796480,
604 -27439,
605 -25148,
606 -22820,
607 -21709,
608 -20995,
609 -20284,
610 -20239,
611 -19595,
612 -19447,
613 -19183,
614 -19165,
615 -18730,
616 -17828,
617 -17799,
618 -17237,
619 -17036,
620 -16549,
621 -16423,
622 -16294,
623 -16244,
624 -16069,
625 -15137,
626 -15083,
627 -15038,
628 -14924,
629 -14905,
630 -14752,
631 -14731,
632 -14529,
633 -14273,
634 -14090,
635 -14084,
636 -14043,
637 -13850,
638 -13802,
639 -13631,
640 -13455,
641 -13275,
642 -12879,
643 -12700,
644 -12534,
645 -12399,
646 -12131,
647 -12112,
648 -12054,
649 -12019,
650 -11759,
651 -11585,
652 -11467,
653 -11395,
654 -11295,
655 -11248,
656 -11148,
657 -11116,
658 -11086,
659 -11059,
660 -11018,
661 -10811,
662 -10538,
663 -10258,
664 -10217,
665 -10033,
666 -9766,
667 -9754,
668 -9534,
669 -9527,
670 -9467,
671 -9262,
672 -9232,
673 -9222,
674 -9198,
675 -9191,
676 -9113,
677 -8825,
678 -8756,
679 -8697,
680 -8693,
681 -8565,
682 -8342,
683 -8208,
684 -8200,
685 -8170,
686 -8102,
687 -7770,
688 -7678,
689 -7562,
690 -7376,
691 -7373,
692 -7221,
693 -7121,
694 -6835,
695 -6810,
696 -6626,
697 -6581,
698 -6461,
699 -6278,
700 -6263,
701 -6163,
702 -6029,
703 -5816,
704 -5540,
705 -5461,
706 -5384,
707 -5329,
708 -4985,
709 -4926,
710 -4815,
711 -4788,
712 -4758,
713 -4433,
714 -4229,
715 -4209,
716 -4176,
717 -4104,
718 -4095,
719 -4078,
720 -3941,
721 -3818,
722 -3600,
723 -3474,
724 -3314,
725 -3264,
726 -3196,
727 -3072,
728 -2912,
729 -2836,
730 -2773,
731 -2269,
732 -2184,
733 -2100,
734 -1730,
735 -1512,
736 -1500,
737 -1396,
738 -1344,
739 -1312,
740 -1297,
741 -1059,
742 -1058,
743 1027,
744 1049,
745 1059,
746 1100,
747 1104,
748 1108,
749 1136,
750 1200,
751 1204,
752 1242,
753 1292,
754 1304,
755 1312,
756 1320,
757 1336,
758 1344,
759 1348,
760 1360,
761 1364,
762 1395,
763 1448,
764 1460,
765 1461,
766 1472,
767 1488,
768 1500,
769 1512,
770 1568,
771 1576,
772 1649,
773 1664,
774 1684,
775 1696,
776 1744,
777 1812,
778 1822,
779 1884,
780 1963,
781 1978,
782 2000,
783 2012,
784 2014,
785 2037,
786 2039,
787 2100,
788 2139,
789 2144,
790 2184,
791 2237,
792 2260,
793 2320,
794 2408,
795 2446,
796 2447,
797 2499,
798 2531,
799 2578,
800 2592,
801 2611,
802 2633,
803 2704,
804 2730,
805 2773,
806 2880,
807 2896,
808 2998,
809 3000,
810 3001,
811 3021,
812 3079,
813 3112,
814 3150,
815 3179,
816 3192,
817 3240,
818 3264,
819 3271,
820 3283,
821 3328,
822 3363,
823 3367,
824 3453,
825 3529,
826 3570,
827 3580,
828 3600,
829 3624,
830 3707,
831 3783,
832 3826,
833 3897,
834 3941,
835 3962,
836 3989,
837 4000,
838 4025,
839 4073,
840 4093,
841 4099,
842 4108,
843 4184,
844 4209,
845 4369,
846 4376,
847 4416,
848 4433,
849 4434,
850 4482,
851 4582,
852 4712,
853 4717,
854 4813,
855 4815,
856 4864,
857 5000,
858 5027,
859 5040,
860 5091,
861 5195,
862 5243,
863 5260,
864 5285,
865 5329,
866 5331,
867 5350,
868 5361,
869 5387,
870 5461,
871 5492,
872 5529,
873 5573,
874 5793,
875 5819,
876 5915,
877 5946,
878 5992,
879 6000,
880 6164,
881 6205,
882 6262,
883 6263,
884 6269,
885 6270,
886 6387,
887 6400,
888 6406,
889 6476,
890 6541,
891 6565,
892 6568,
893 6626,
894 6656,
895 6732,
896 6810,
897 6817,
898 6859,
899 7040,
900 7053,
901 7141,
902 7169,
903 7221,
904 7223,
905 7274,
906 7282,
907 7350,
908 7369,
909 7373,
910 7442,
911 7447,
912 7471,
913 7518,
914 7542,
915 7566,
916 7587,
917 7663,
918 7678,
919 7682,
920 7748,
921 7752,
922 7791,
923 8000,
924 8026,
925 8048,
926 8170,
927 8203,
928 8204,
929 8290,
930 8368,
931 8520,
932 8640,
933 8666,
934 8672,
935 8697,
936 8716,
937 8728,
938 8756,
939 8820,
940 8875,
941 8918,
942 8956,
943 9058,
944 9154,
945 9175,
946 9191,
947 9217,
948 9262,
949 9321,
950 9373,
951 9434,
952 9465,
953 9514,
954 9534,
955 9633,
956 9746,
957 9810,
958 9850,
959 9947,
960 9973,
961 10000,
962 10009,
963 10033,
964 10055,
965 10217,
966 10248,
967 10298,
968 10310,
969 10323,
970 10368,
971 10438,
972 10456,
973 10486,
974 10538,
975 10664,
976 10695,
977 10700,
978 10703,
979 10832,
980 10887,
981 10935,
982 10958,
983 11018,
984 11059,
985 11061,
986 11086,
987 11116,
988 11148,
989 11190,
990 11249,
991 11314,
992 11332,
993 11363,
994 11409,
995 11415,
996 11443,
997 11467,
998 11512,
999 11522,
1000 11529,
1001 11585,
1002 11759,
1003 11768,
1004 11795,
1005 11893,
1006 11997,
1007 12131,
1008 12299,
1009 12536,
1010 12543,
1011 12893,
1012 12945,
1013 12998,
1014 13109,
1015 13213,
1016 13685,
1017 13930,
1018 14023,
1019 14024,
1020 14271,
1021 14564,
1022 14647,
1023 15326,
1024 15850,
1025 15855,
1026 15929,
1027 16000,
1028 16154,
1029 16496,
1030 16807,
1031 16819,
1032 16984,
1033 17203,
1034 17223,
1035 17333,
1036 17760,
1037 17799,
1038 17837,
1039 18029,
1040 18068,
1041 18336,
1042 18515,
1043 19595,
1044 20017,
1045 20131,
1046 20862,
1047 20995,
1048 21709,
1049 22554,
1050 25000,
1051 25172,
1052 25600,
1053 25733,
1054 27439,
1055 38470,
1056 46802,
1057 50000,
1058 11796480,
1059 16843009,
1060 23592960,
1061 };
1062
1063
1064 const int num_mult_constants_used =
1065 (int)(sizeof multiply_constants_used
1066 / sizeof multiply_constants_used[0]);
1067
1068
1069 #define XSIZE (sizeof multiply_constants_used / \
1070 sizeof multiply_constants_used[0] + 32) / 32
1071 unsigned multiply_constants_avail[XSIZE];
1072 #undef XSIZE
1073
1074
1075 /* bsearch helper function. */
1076 static int
1077 compare_constants (const void *key, const void *t)
1078 {
1079 return (*(int*)key) - *((int*)t);
1080 }
1081
1082
1083 static int *
1084 find_mult_constants_used (int multiplier)
1085 {
1086 return (int *) bsearch (&multiplier, multiply_constants_used,
1087 num_mult_constants_used,
1088 sizeof multiply_constants_used[0],
1089 compare_constants);
1090 }
1091
1092
1093 int num_ops (ExpressionTree *s)
1094 {
1095 int n = 0;
1096 for (int i = 0; i < s->m_num_vals; i++)
1097 {
1098 Expr *e = &s->m_exprs[i];
1099 if (e->m_op != NULL)
1100 n++;
1101 }
1102 return n;
1103 }
1104
1105
1106 #ifdef TILEPRO
1107 bool
1108 tilepro_emit (int multiplier, int num_ops)
1109 {
1110 int abs_multiplier = (multiplier >= 0) ? multiplier : -multiplier;
1111
1112 /* Keep constants in range [-1024, 1024]. */
1113 if (abs_multiplier <= 1024)
1114 return true;
1115
1116 /* Keep constants near powers of two. */
1117 int prev_pow2 = 1 << (31 - __builtin_clz (abs_multiplier));
1118 int next_pow2 = prev_pow2 << 1;
1119
1120 if ((abs_multiplier - prev_pow2 <= 10)
1121 || (next_pow2 - abs_multiplier <= 10))
1122 return true;
1123
1124 /* Keep constants near powers of ten. */
1125 {
1126 long long j = 1;
1127 long long prev_pow10;
1128 long long next_pow10;
1129
1130 while (((j * 10) < abs_multiplier)
1131 && (j < (j * 10)))
1132 j = j * 10;
1133
1134 prev_pow10 = j;
1135 next_pow10 = j * 10;
1136
1137 if ((abs_multiplier - prev_pow10 <= 10)
1138 || (next_pow10 - abs_multiplier <= 10))
1139 return true;
1140 }
1141
1142 /* Keep short sequences that have two or fewer ops. */
1143 if (num_ops <= 2)
1144 return true;
1145
1146 /* Keep constants that are mostly 0's or mostly 1's. */
1147 if (__builtin_popcount (multiplier) <= 2 ||
1148 __builtin_popcount (multiplier) >= 30)
1149 return true;
1150
1151 /* Keep constants seen in actual code. */
1152 if ((find_mult_constants_used (multiplier)))
1153 return true;
1154
1155 return false;
1156 }
1157 #else
1158 bool
1159 tilegx_emit (long long multiplier, int num_ops)
1160 {
1161 long long abs_multiplier = (multiplier >= 0) ? multiplier : - multiplier;
1162
1163 /* Keep constants in range [-1024, 1024]. */
1164 if (abs_multiplier <= 1024)
1165 return true;
1166
1167 /* Otherwise exclude sequences with four ops. */
1168 if (num_ops > 3)
1169 return false;
1170
1171 /* Keep constants near powers of two. */
1172 {
1173 unsigned long long prev_pow2 =
1174 1LL << (63 - __builtin_clzll (abs_multiplier));
1175 unsigned long long next_pow2 = prev_pow2 << 1;
1176
1177 /* handle overflow case. */
1178 if (next_pow2 == 0)
1179 {
1180 if (prev_pow2 - abs_multiplier <= 10)
1181 return true;
1182 }
1183 else if ((abs_multiplier - prev_pow2 <= 10)
1184 || (next_pow2 - abs_multiplier <= 10))
1185 return true;
1186 }
1187
1188 /* Keep constants near powers of ten. */
1189 {
1190 long long j = 1;
1191 long long prev_pow10;
1192 long long next_pow10;
1193
1194 while (((j * 10) < abs_multiplier)
1195 && (j < (j * 10)))
1196 j = j * 10;
1197
1198 prev_pow10 = j;
1199 next_pow10 = j * 10;
1200
1201 if ((abs_multiplier - prev_pow10 <= 100)
1202 || (next_pow10
1203 && (next_pow10 - abs_multiplier <= 100)))
1204 return true;
1205 }
1206
1207 if (num_ops <= 2)
1208 return true;
1209
1210 /* Keep constants that are mostly 0's or mostly 1's. */
1211 if (__builtin_popcountll (multiplier) <= 2 ||
1212 __builtin_popcountll (multiplier) >= 62)
1213 return true;
1214
1215 /* Keep constants seen in actual code. */
1216 if (find_mult_constants_used (multiplier))
1217 return true;
1218
1219 return false;
1220 }
1221 #endif
1222
1223
1224 int
1225 main ()
1226 {
1227 ExpressionTreeMap best_solution;
1228 ExpressionTree s;
1229
1230 #ifdef TILEPRO
1231 printf ("/* Constant multiply table for TILEPro.\n");
1232 #else
1233 printf ("/* Constant multiply table for TILE-Gx.\n");
1234 #endif
1235 printf (" Copyright (C) 2011-2022 Free Software Foundation, Inc.\n");
1236 printf (" Contributed by Walter Lee (walt@tilera.com)\n");
1237 printf ("\n");
1238 printf (" This file is part of GCC.\n");
1239 printf ("\n");
1240 printf (" GCC is free software; you can redistribute it and/or modify it\n");
1241 printf (" under the terms of the GNU General Public License as published\n");
1242 printf (" by the Free Software Foundation; either version 3, or (at your\n");
1243 printf (" option) any later version.\n");
1244 printf ("\n");
1245 printf (" GCC is distributed in the hope that it will be useful, but WITHOUT\n");
1246 printf (" ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n");
1247 printf (" or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public\n");
1248 printf (" License for more details.\n");
1249 printf ("\n");
1250 printf (" You should have received a copy of the GNU General Public License\n");
1251 printf (" along with GCC; see the file COPYING3. If not see\n");
1252 printf (" <http://www.gnu.org/licenses/>. */\n");
1253 printf ("\n");
1254 printf ("/* Note this file is auto-generated from gen-mul-tables.cc.\n");
1255 printf (" Make any required changes there. */\n");
1256 printf ("\n");
1257 printf ("#define IN_TARGET_CODE 1\n");
1258 printf ("\n");
1259 printf ("#include \"config.h\"\n");
1260 printf ("#include \"system.h\"\n");
1261 printf ("#include \"coretypes.h\"\n");
1262 printf ("#include \"backend.h\"\n");
1263 printf ("#include \"rtl.h\"\n");
1264 printf ("#include \"expmed.h\"\n");
1265 printf ("#include \"%s-multiply.h\"\n\n", ARCH);
1266 create_insn_code_compression_table ();
1267
1268 /* Try all combinations of operators and see what constants we
1269 produce. For each possible constant, record the most efficient
1270 way to generate it. */
1271 find_sequences (s, best_solution);
1272
1273 printf ("const struct %s_multiply_insn_seq "
1274 "%s_multiply_insn_seq_table[] = {\n",
1275 ARCH, ARCH);
1276
1277 const char *comma_separator = "";
1278
1279 ExpressionTreeMap::iterator i (best_solution.begin ());
1280 for (; i != best_solution.end (); ++i)
1281 {
1282 ExpressionTree *s = (*i).second;
1283 const MUL_TYPE n = (*i).first;
1284
1285 if (n == 0 || n == 1)
1286 {
1287 /* Both of these require zero operations, so these entries
1288 are bogus and should never be used. */
1289 continue;
1290 }
1291
1292 /* Prune the list of entries to keep the table to a reasonable
1293 size. */
1294 #ifdef TILEPRO
1295 if (!tilepro_emit (n, num_ops (s)))
1296 continue;
1297 #else
1298 if (!tilegx_emit (n, num_ops (s)))
1299 continue;
1300 #endif
1301
1302 printf ("%s", comma_separator);
1303
1304 #ifdef TILEPRO
1305 const MUL_TYPE int_min = INT32_MIN;
1306 #else
1307 const MUL_TYPE int_min = INT64_MIN;
1308 #endif
1309 if (n == int_min)
1310 {
1311 /* Handle C's woeful lack of unary negation. Without this,
1312 printing out INT_MIN in decimal will yield an unsigned
1313 int which could generate a compiler warning. */
1314 #ifdef TILEPRO
1315 printf (" {%d - 1 /* 0x%x */ ,\n {", n + 1,
1316 (unsigned) n);
1317 #else
1318 printf (" {%lldll - 1 /* 0x%llx */ ,\n {", n + 1,
1319 (unsigned MUL_TYPE) n);
1320 #endif
1321 }
1322 else
1323 {
1324 #ifdef TILEPRO
1325 printf (" {%d /* 0x%x */ ,\n {", n, (unsigned) n);
1326 #else
1327 printf (" {%lldll /* 0x%llx */ ,\n {", n, (unsigned MUL_TYPE) n);
1328 #endif
1329 }
1330
1331 bool first = true;
1332 for (int j = 0; j < s->m_num_vals; j++)
1333 {
1334 Expr *e = &s->m_exprs[j];
1335
1336 const Operator *op = e->m_op;
1337 if (op == NULL)
1338 continue;
1339
1340 char buf[1024];
1341 snprintf (buf, sizeof buf, "%s{%d, %d, %d}%s",
1342 first ? "" : " ",
1343 op->m_top_index,
1344 e->m_lhs, e->m_rhs, (j + 1) == s->m_num_vals ? "}" : ",");
1345
1346 char opnd0[10];
1347 if (e->m_lhs)
1348 snprintf (opnd0, sizeof opnd0, "r%d", e->m_lhs);
1349 else
1350 snprintf (opnd0, sizeof opnd0, "zero");
1351
1352 printf ("%s\t\t\t/* %s r%d, %s, %s%d */\n",
1353 buf, op->m_name, j, opnd0,
1354 op->is_unary () ? "" : "r", e->m_rhs);
1355
1356 first = false;
1357 }
1358 printf (" }");
1359 comma_separator = ",\n";
1360 }
1361
1362 printf ("\n};\n\n");
1363 printf ("const int %s_multiply_insn_seq_table_size =\n"
1364 " (int) (sizeof %s_multiply_insn_seq_table\n"
1365 " / sizeof %s_multiply_insn_seq_table[0]);\n",
1366 ARCH, ARCH, ARCH);
1367
1368 return EXIT_SUCCESS;
1369 }