]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/tilepro/gen-mul-tables.cc
Update copyright years.
[thirdparty/gcc.git] / gcc / config / tilepro / gen-mul-tables.cc
CommitLineData
dd552284 1/* Multiply table generator for tile.
7adcbafe 2 Copyright (C) 2011-2022 Free Software Foundation, Inc.
dd552284
WL
3 Contributed by Walter Lee (walt@tilera.com)
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published
9 by the Free Software Foundation; either version 3, or (at your
10 option) any later version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21/* This program creates a table used to compile multiply by constant
22 efficiently.
23
24 This program should be compiled by a c++ compiler. If it's
026c3cfd 25 compiled with -DTILEPRO, it generates the multiply table for
dd552284
WL
26 TILEPro; otherwise it generates the multiply table for TILE-Gx.
27 Running the program produces the table in stdout.
28
29 The program works by generating every possible combination of up to
30 MAX_INSTRUCTIONS linear operators (such as add, sub, s2a, left
31 shift) and computing the multiplier computed by those instructions.
32 For example,
33
34 s2a r2,r1,r1
35 s2a r3,r2,r2
36
37 multiplies r1 by 25.
38
39 There are usually multiple instruction sequences to multiply by a
40 given constant. This program keeps only the cheapest one.
41 "Cheapest" is defined first by the minimum theoretical schedule
42 length, and if those are equal then by the number of instructions,
43 and if those are equal then by which instructions we "prefer"
44 (e.g. because we think one might take infinitesimally less power
45 than another, or simply because we arbitrarily pick one to be more
46 canonical).
47
48 Once this program has determined the best instruction sequence for
49 each multiplier, it emits them in a table sorted by the multiplier
50 so the user can binary-search it to look for a match. The table is
51 pruned based on various criteria to keep its sizes reasonable. */
52
53#include <stdio.h>
54#include <stdlib.h>
55#include <assert.h>
56#include <string.h>
57#define __STDC_LIMIT_MACROS
58#include <stdint.h>
59
60#include <map>
61
62#ifdef TILEPRO
63
64/* The string representing the architecture. */
65#define ARCH "tilepro"
66
67/* The type for the multiplication. */
68typedef int MUL_TYPE;
69
70#else
71
72/* The string representing the architecture. */
73#define ARCH "tilegx"
74
75/* The type for the multiplication. */
76typedef long long MUL_TYPE;
77
78#endif
79
80/* Longest instruction sequence this will produce. With the current
81 stupid algorithm runtime grows very quickly with this number. */
82#define MAX_INSTRUCTIONS 4
83
84/* Maximum number of subexpressions in the expression DAG being
85 generated. This is the same as the number of instructions, except
86 that zero and the original register we'd like to multiply by a
87 constant are also thrown into the mix. */
88#define MAX_SUBEXPRS (2 + MAX_INSTRUCTIONS)
89
90#define MIN(x, y) ((x) <= (y) ? (x) : (y))
91#define MAX(x, y) ((x) >= (y) ? (x) : (y))
92
93/* For this program a unary op is one which has only one nonconstant
94 operand. So shift left by 5 is considered unary. */
95typedef MUL_TYPE (*unary_op_func) (MUL_TYPE);
96typedef MUL_TYPE (*binary_op_func) (MUL_TYPE, MUL_TYPE);
97
98/* This describes an operation like 'add two registers' or 'left-shift
99 by 7'.
100
101 We call something a 'unary' operator if it only takes in one
102 register as input, even though it might have an implicit second
103 constant operand. Currently this is used for left-shift by
104 constant. */
105class Operator
106{
107public:
108 /* Construct for a binary operator. */
109 Operator (const char *pattern, const char *name, binary_op_func func,
110 int cost)
111 : m_pattern (pattern), m_name (name), m_top_index (-1),
112 m_unary_func (0), m_binary_func (func), m_cost (cost),
113 m_rhs_if_unary (0)
114 {
115 }
116
117 /* Construct for a unary operator. */
118 Operator (const char *pattern, const char *name, unary_op_func func,
119 int rhs_if_unary, int cost)
120 : m_pattern (pattern), m_name (name), m_top_index (-1),
121 m_unary_func (func), m_binary_func (0), m_cost (cost),
122 m_rhs_if_unary (rhs_if_unary)
123 {
124 }
125
126 bool is_unary () const
127 {
128 return m_binary_func == NULL;
129 }
130
131 /* Name of the pattern for this operation, e.g. CODE_FOR_addsi3. */
132 const char *m_pattern;
133
134 /* Name of the opcode for this operation, e.g. add. */
135 const char *m_name;
136
137 /* We don't have enough bits in our output representation to store
138 the original insn_code value, so we store a compressed form
139 instead. These values are decoded back into insn_code via the
140 machine-generated multiply_insn_seq_decode_opcode lookup
141 table. */
142 int m_top_index;
143
144 /* Unary operator to apply, or NULL if this is a binary operator. */
145 unary_op_func m_unary_func;
146
147 /* Binary operator to apply, or NULL if this is a unary operator. */
148 binary_op_func m_binary_func;
149
150 /* Function of how expensive we consider this operator. Higher is
151 worse. */
152 int m_cost;
153
154 /* the RHS value to write into the C file if unary; used for shift
155 count. */
156 int m_rhs_if_unary;
157};
158
159
160/* An entry in an expression DAG. */
161class Expr
162{
163public:
164 Expr () : m_op (NULL), m_lhs (0), m_rhs (0), m_produced_val (0),
165 m_critical_path_length (0)
166 {
167 }
168
169 /* The operator being applied to the operands. */
170 const Operator *m_op;
171
172 /* The index of the left-hand operand in the array of subexpressions
173 already computed. */
174 int m_lhs;
175
176 /* For binary ops ,this is the index of the left-hand operand in the
177 array of subexpressions already computed. For unary ops, it is
178 specific to the op (e.g. it might hold a constant shift
179 count). */
180 int m_rhs;
181
182 /* The multiplier produced by this expression tree. For example, for
183 the tree ((x << 5) + x), the value would be 33. */
184 MUL_TYPE m_produced_val;
185
186 /* How far is this expression from the root, i.e. how many cycles
187 minimum will it take to compute this? */
188 int m_critical_path_length;
189};
190
191
192/* Make function pointers for the various linear operators we can
193 apply to compute a multiplicative value. */
194
195static MUL_TYPE
196add (MUL_TYPE n1, MUL_TYPE n2)
197{
198 return n1 + n2;
199}
200
201static MUL_TYPE
202sub (MUL_TYPE n1, MUL_TYPE n2)
203{
204 return n1 - n2;
205}
206
207static MUL_TYPE
208s1a (MUL_TYPE n1, MUL_TYPE n2)
209{
210 return n1 * 2 + n2;
211}
212
213static MUL_TYPE
214s2a (MUL_TYPE n1, MUL_TYPE n2)
215{
216 return n1 * 4 + n2;
217}
218
219static MUL_TYPE
220s3a (MUL_TYPE n1, MUL_TYPE n2)
221{
222 return n1 * 8 + n2;
223}
224
225#define SHIFT(count) \
226static MUL_TYPE \
227shift##count(MUL_TYPE n) \
228{ \
229 return n << (count); \
230}
231
232SHIFT (1);
233SHIFT (2);
234SHIFT (3);
235SHIFT (4);
236SHIFT (5);
237SHIFT (6);
238SHIFT (7);
239SHIFT (8);
240SHIFT (9);
241SHIFT (10);
242SHIFT (11);
243SHIFT (12);
244SHIFT (13);
245SHIFT (14);
246SHIFT (15);
247SHIFT (16);
248SHIFT (17);
249SHIFT (18);
250SHIFT (19);
251SHIFT (20);
252SHIFT (21);
253SHIFT (22);
254SHIFT (23);
255SHIFT (24);
256SHIFT (25);
257SHIFT (26);
258SHIFT (27);
259SHIFT (28);
260SHIFT (29);
261SHIFT (30);
262SHIFT (31);
263#ifndef TILEPRO
264SHIFT (32);
265SHIFT (33);
266SHIFT (34);
267SHIFT (35);
268SHIFT (36);
269SHIFT (37);
270SHIFT (38);
271SHIFT (39);
272SHIFT (40);
273SHIFT (41);
274SHIFT (42);
275SHIFT (43);
276SHIFT (44);
277SHIFT (45);
278SHIFT (46);
279SHIFT (47);
280SHIFT (48);
281SHIFT (49);
282SHIFT (50);
283SHIFT (51);
284SHIFT (52);
285SHIFT (53);
286SHIFT (54);
287SHIFT (55);
288SHIFT (56);
289SHIFT (57);
290SHIFT (58);
291SHIFT (59);
292SHIFT (60);
293SHIFT (61);
294SHIFT (62);
295SHIFT (63);
296#endif
297
298#ifdef TILEPRO
299static Operator ops[] = {
300 Operator ("CODE_FOR_addsi3", "add", add, 1040),
301 Operator ("CODE_FOR_subsi3", "sub", sub, 1041),
302 Operator ("CODE_FOR_insn_s1a", "s1a", s1a, 1042),
303 Operator ("CODE_FOR_insn_s2a", "s2a", s2a, 1043),
304 Operator ("CODE_FOR_insn_s3a", "s3a", s3a, 1044),
305 /* Note: shl by 1 is not necessary, since adding a value to itself
306 produces the same result. But the architecture team thinks
307 left-shift may use slightly less power, so we might as well
308 prefer it. */
309 Operator ("CODE_FOR_ashlsi3", "shli", shift1, 1, 1001),
310 Operator ("CODE_FOR_ashlsi3", "shli", shift2, 2, 1002),
311 Operator ("CODE_FOR_ashlsi3", "shli", shift3, 3, 1003),
312 Operator ("CODE_FOR_ashlsi3", "shli", shift4, 4, 1004),
313 Operator ("CODE_FOR_ashlsi3", "shli", shift5, 5, 1005),
314 Operator ("CODE_FOR_ashlsi3", "shli", shift6, 6, 1006),
315 Operator ("CODE_FOR_ashlsi3", "shli", shift7, 7, 1007),
316 Operator ("CODE_FOR_ashlsi3", "shli", shift8, 8, 1008),
317 Operator ("CODE_FOR_ashlsi3", "shli", shift9, 9, 1009),
318 Operator ("CODE_FOR_ashlsi3", "shli", shift10, 10, 1010),
319 Operator ("CODE_FOR_ashlsi3", "shli", shift11, 11, 1011),
320 Operator ("CODE_FOR_ashlsi3", "shli", shift12, 12, 1012),
321 Operator ("CODE_FOR_ashlsi3", "shli", shift13, 13, 1013),
322 Operator ("CODE_FOR_ashlsi3", "shli", shift14, 14, 1014),
323 Operator ("CODE_FOR_ashlsi3", "shli", shift15, 15, 1015),
324 Operator ("CODE_FOR_ashlsi3", "shli", shift16, 16, 1016),
325 Operator ("CODE_FOR_ashlsi3", "shli", shift17, 17, 1017),
326 Operator ("CODE_FOR_ashlsi3", "shli", shift18, 18, 1018),
327 Operator ("CODE_FOR_ashlsi3", "shli", shift19, 19, 1019),
328 Operator ("CODE_FOR_ashlsi3", "shli", shift20, 20, 1020),
329 Operator ("CODE_FOR_ashlsi3", "shli", shift21, 21, 1021),
330 Operator ("CODE_FOR_ashlsi3", "shli", shift22, 22, 1022),
331 Operator ("CODE_FOR_ashlsi3", "shli", shift23, 23, 1023),
332 Operator ("CODE_FOR_ashlsi3", "shli", shift24, 24, 1024),
333 Operator ("CODE_FOR_ashlsi3", "shli", shift25, 25, 1025),
334 Operator ("CODE_FOR_ashlsi3", "shli", shift26, 26, 1026),
335 Operator ("CODE_FOR_ashlsi3", "shli", shift27, 27, 1027),
336 Operator ("CODE_FOR_ashlsi3", "shli", shift28, 28, 1028),
337 Operator ("CODE_FOR_ashlsi3", "shli", shift29, 29, 1029),
338 Operator ("CODE_FOR_ashlsi3", "shli", shift30, 30, 1030),
339 Operator ("CODE_FOR_ashlsi3", "shli", shift31, 31, 1031)
340};
341#else
342static Operator ops[] = {
343 Operator ("CODE_FOR_adddi3", "add", add, 1070),
344 Operator ("CODE_FOR_subdi3", "sub", sub, 1071),
345 Operator ("CODE_FOR_insn_shl1add", "shl1add", s1a, 1072),
346 Operator ("CODE_FOR_insn_shl2add", "shl2add", s2a, 1073),
347 Operator ("CODE_FOR_insn_shl3add", "shl3add", s3a, 1074),
348 // Note: shl by 1 is not necessary, since adding a value to itself
349 // produces the same result. But the architecture team thinks left-shift
350 // may use slightly less power, so we might as well prefer it.
351 Operator ("CODE_FOR_ashldi3", "shli", shift1, 1, 1001),
352 Operator ("CODE_FOR_ashldi3", "shli", shift2, 2, 1002),
353 Operator ("CODE_FOR_ashldi3", "shli", shift3, 3, 1003),
354 Operator ("CODE_FOR_ashldi3", "shli", shift4, 4, 1004),
355 Operator ("CODE_FOR_ashldi3", "shli", shift5, 5, 1005),
356 Operator ("CODE_FOR_ashldi3", "shli", shift6, 6, 1006),
357 Operator ("CODE_FOR_ashldi3", "shli", shift7, 7, 1007),
358 Operator ("CODE_FOR_ashldi3", "shli", shift8, 8, 1008),
359 Operator ("CODE_FOR_ashldi3", "shli", shift9, 9, 1009),
360 Operator ("CODE_FOR_ashldi3", "shli", shift10, 10, 1010),
361 Operator ("CODE_FOR_ashldi3", "shli", shift11, 11, 1011),
362 Operator ("CODE_FOR_ashldi3", "shli", shift12, 12, 1012),
363 Operator ("CODE_FOR_ashldi3", "shli", shift13, 13, 1013),
364 Operator ("CODE_FOR_ashldi3", "shli", shift14, 14, 1014),
365 Operator ("CODE_FOR_ashldi3", "shli", shift15, 15, 1015),
366 Operator ("CODE_FOR_ashldi3", "shli", shift16, 16, 1016),
367 Operator ("CODE_FOR_ashldi3", "shli", shift17, 17, 1017),
368 Operator ("CODE_FOR_ashldi3", "shli", shift18, 18, 1018),
369 Operator ("CODE_FOR_ashldi3", "shli", shift19, 19, 1019),
370 Operator ("CODE_FOR_ashldi3", "shli", shift20, 20, 1020),
371 Operator ("CODE_FOR_ashldi3", "shli", shift21, 21, 1021),
372 Operator ("CODE_FOR_ashldi3", "shli", shift22, 22, 1022),
373 Operator ("CODE_FOR_ashldi3", "shli", shift23, 23, 1023),
374 Operator ("CODE_FOR_ashldi3", "shli", shift24, 24, 1024),
375 Operator ("CODE_FOR_ashldi3", "shli", shift25, 25, 1025),
376 Operator ("CODE_FOR_ashldi3", "shli", shift26, 26, 1026),
377 Operator ("CODE_FOR_ashldi3", "shli", shift27, 27, 1027),
378 Operator ("CODE_FOR_ashldi3", "shli", shift28, 28, 1028),
379 Operator ("CODE_FOR_ashldi3", "shli", shift29, 29, 1029),
380 Operator ("CODE_FOR_ashldi3", "shli", shift30, 30, 1030),
381 Operator ("CODE_FOR_ashldi3", "shli", shift31, 31, 1031),
382 Operator ("CODE_FOR_ashldi3", "shli", shift32, 32, 1032),
383 Operator ("CODE_FOR_ashldi3", "shli", shift33, 33, 1033),
384 Operator ("CODE_FOR_ashldi3", "shli", shift34, 34, 1034),
385 Operator ("CODE_FOR_ashldi3", "shli", shift35, 35, 1035),
386 Operator ("CODE_FOR_ashldi3", "shli", shift36, 36, 1036),
387 Operator ("CODE_FOR_ashldi3", "shli", shift37, 37, 1037),
388 Operator ("CODE_FOR_ashldi3", "shli", shift38, 38, 1038),
389 Operator ("CODE_FOR_ashldi3", "shli", shift39, 39, 1039),
390 Operator ("CODE_FOR_ashldi3", "shli", shift40, 40, 1040),
391 Operator ("CODE_FOR_ashldi3", "shli", shift41, 41, 1041),
392 Operator ("CODE_FOR_ashldi3", "shli", shift42, 42, 1042),
393 Operator ("CODE_FOR_ashldi3", "shli", shift43, 43, 1043),
394 Operator ("CODE_FOR_ashldi3", "shli", shift44, 44, 1044),
395 Operator ("CODE_FOR_ashldi3", "shli", shift45, 45, 1045),
396 Operator ("CODE_FOR_ashldi3", "shli", shift46, 46, 1046),
397 Operator ("CODE_FOR_ashldi3", "shli", shift47, 47, 1047),
398 Operator ("CODE_FOR_ashldi3", "shli", shift48, 48, 1048),
399 Operator ("CODE_FOR_ashldi3", "shli", shift49, 49, 1049),
400 Operator ("CODE_FOR_ashldi3", "shli", shift50, 50, 1050),
401 Operator ("CODE_FOR_ashldi3", "shli", shift51, 51, 1051),
402 Operator ("CODE_FOR_ashldi3", "shli", shift52, 52, 1052),
403 Operator ("CODE_FOR_ashldi3", "shli", shift53, 53, 1053),
404 Operator ("CODE_FOR_ashldi3", "shli", shift54, 54, 1054),
405 Operator ("CODE_FOR_ashldi3", "shli", shift55, 55, 1055),
406 Operator ("CODE_FOR_ashldi3", "shli", shift56, 56, 1056),
407 Operator ("CODE_FOR_ashldi3", "shli", shift57, 57, 1057),
408 Operator ("CODE_FOR_ashldi3", "shli", shift58, 58, 1058),
409 Operator ("CODE_FOR_ashldi3", "shli", shift59, 59, 1059),
410 Operator ("CODE_FOR_ashldi3", "shli", shift60, 60, 1060),
411 Operator ("CODE_FOR_ashldi3", "shli", shift61, 61, 1061),
412 Operator ("CODE_FOR_ashldi3", "shli", shift62, 62, 1062),
413 Operator ("CODE_FOR_ashldi3", "shli", shift63, 63, 1063)
414};
415#endif
416
417/* An ExpressionTree is an expression DAG. */
418class ExpressionTree
419{
420public:
421 ExpressionTree () : m_num_vals (2)
422 {
423 m_exprs[0].m_produced_val = 0;
424 m_exprs[1].m_produced_val = 1;
425 }
426
427 void copy_technique_from (ExpressionTree * other)
428 {
429 /* TODO: write this; other can compute the same products with less
430 cost. We update this ExpressionTree object because some int is
431 already mapped to it. */
432 }
433
434 int m_num_vals;
435 Expr m_exprs[MAX_SUBEXPRS];
436
437 int cost () const
438 {
439 int cost = 0;
440 for (int j = 2; j < m_num_vals; j++)
441 cost += m_exprs[j].m_op->m_cost;
442 return cost + m_exprs[m_num_vals - 1].m_critical_path_length * 1000000;
443 }
444};
445
446
447typedef std::map<MUL_TYPE, ExpressionTree *> ExpressionTreeMap;
448
449
450static void
451find_sequences (ExpressionTree &s, ExpressionTreeMap &best_solution)
452{
453 /* Don't look more if we can't add any new values to the expression
454 tree. */
455 const int num_vals = s.m_num_vals;
456 if (num_vals == MAX_SUBEXPRS)
457 return;
458
459 /* Grow array to make room for new values added as we loop. */
460 s.m_num_vals = num_vals + 1;
461
462 const Operator *const prev_op = s.m_exprs[num_vals - 1].m_op;
463 const int prev_top_index = (prev_op != NULL) ? prev_op->m_top_index : -1;
464
465 for (size_t f = 0; f < sizeof ops / sizeof ops[0]; f++)
466 {
467 const Operator *const op = &ops[f];
468
469 for (int i = 0; i < num_vals; i++)
470 {
471 /* Only allow zero as the first operand to sub, otherwise
472 it is useless. */
473 if (i == 0 && op->m_binary_func != sub)
474 continue;
475
476 /* Unary ops don't actually use the second operand, so as a
477 big hack we trick it into only looping once in the inner
478 loop. */
479 const int j_end = op->is_unary () ? 2 : num_vals;
480
481 /* We never allow zero as the second operand, as it is
482 always useless. */
483 for (int j = 1; j < j_end; j++)
484 {
485 /* Does this expression use the immediately previous
486 expression? */
487 const bool uses_prev_value =
488 (i == num_vals - 1
489 || (!op->is_unary () && j == num_vals - 1));
490
491 if (!uses_prev_value)
492 {
493 /* For search efficiency, prune redundant
494 instruction orderings.
495
496 This op does not take the immediately preceding
497 value as input, which means we could have done it
498 in the previous slot. If its opcode is less than
499 the previous instruction's, we'll declare this
500 instruction order non-canonical and not pursue
501 this branch of the search. */
502 if (op->m_top_index < prev_top_index)
503 continue;
504 }
505
506 MUL_TYPE n;
507 if (op->is_unary ())
508 {
509 n = op->m_unary_func (s.m_exprs[i].m_produced_val);
510 }
511 else
512 {
513 n = op->m_binary_func (s.m_exprs[i].m_produced_val,
514 s.m_exprs[j].m_produced_val);
515 }
516
517 bool duplicate = false;
518 for (int k = 0; k < num_vals; k++)
519 if (n == s.m_exprs[j].m_produced_val)
520 {
521 duplicate = true;
522 break;
523 }
524
525 if (duplicate)
526 continue;
527
528 /* See if we found the best solution for n. */
529 Expr *e = &s.m_exprs[num_vals];
530 e->m_op = op;
531 e->m_lhs = i;
532 e->m_rhs = op->is_unary () ? op->m_rhs_if_unary : j;
533 e->m_produced_val = n;
534 e->m_critical_path_length =
535 1 + MAX (s.m_exprs[i].m_critical_path_length,
536 s.m_exprs[j].m_critical_path_length);
537
538 ExpressionTreeMap::iterator best (best_solution.find (n));
539 if (best == best_solution.end ()
540 || (*best).second->cost () > s.cost ())
541 best_solution[n] = new ExpressionTree (s);
542
543 /* Recurse and find more. */
544 find_sequences (s, best_solution);
545 }
546 }
547 }
548
549 /* Restore old size. */
550 s.m_num_vals = num_vals;
551}
552
553
554/* For each insn_code used by an operator, choose a compact number so
555 it takes less space in the output data structure. This prints out a
556 lookup table used to map the compactified number back to an
557 insn_code. */
558static void
559create_insn_code_compression_table ()
560{
561 int next_index = 1;
562
563 /* Entry 0 must hold CODE_FOR_nothing to mean "end of array". */
564 printf ("const enum insn_code %s_multiply_insn_seq_decode_opcode[] = {\n"
565 " CODE_FOR_nothing /* must be first */ ", ARCH);
566
567 for (size_t i = 0; i < sizeof ops / sizeof ops[0]; i++)
568 {
569 Operator *op = &ops[i];
570 int index = -1;
571
572 /* See if some previous Operator was using the same insn_code.
573 If so, reuse its table entry. */
574 for (size_t j = 0; j < i; j++)
575 {
576 Operator *old = &ops[j];
577 if (strcmp (old->m_pattern, op->m_pattern) == 0)
578 {
579 index = old->m_top_index;
580 break;
581 }
582 }
583
584 if (index == -1)
585 {
586 /* We need to make a new entry in the table. */
587 printf (",\n %s", op->m_pattern);
588 index = next_index++;
589 }
590
591 op->m_top_index = index;
592 }
593
594 printf ("\n};\n\n");
595}
596
597
598/* These are constants we've seen in code, that we want to keep table
599 entries for. */
600static int multiply_constants_used[] = {
601 -11796480,
602 -27439,
603 -25148,
604 -22820,
605 -21709,
606 -20995,
607 -20284,
608 -20239,
609 -19595,
610 -19447,
611 -19183,
612 -19165,
613 -18730,
614 -17828,
615 -17799,
616 -17237,
617 -17036,
618 -16549,
619 -16423,
620 -16294,
621 -16244,
622 -16069,
623 -15137,
624 -15083,
625 -15038,
626 -14924,
627 -14905,
628 -14752,
629 -14731,
630 -14529,
631 -14273,
632 -14090,
633 -14084,
634 -14043,
635 -13850,
636 -13802,
637 -13631,
638 -13455,
639 -13275,
640 -12879,
641 -12700,
642 -12534,
643 -12399,
644 -12131,
645 -12112,
646 -12054,
647 -12019,
648 -11759,
649 -11585,
650 -11467,
651 -11395,
652 -11295,
653 -11248,
654 -11148,
655 -11116,
656 -11086,
657 -11059,
658 -11018,
659 -10811,
660 -10538,
661 -10258,
662 -10217,
663 -10033,
664 -9766,
665 -9754,
666 -9534,
667 -9527,
668 -9467,
669 -9262,
670 -9232,
671 -9222,
672 -9198,
673 -9191,
674 -9113,
675 -8825,
676 -8756,
677 -8697,
678 -8693,
679 -8565,
680 -8342,
681 -8208,
682 -8200,
683 -8170,
684 -8102,
685 -7770,
686 -7678,
687 -7562,
688 -7376,
689 -7373,
690 -7221,
691 -7121,
692 -6835,
693 -6810,
694 -6626,
695 -6581,
696 -6461,
697 -6278,
698 -6263,
699 -6163,
700 -6029,
701 -5816,
702 -5540,
703 -5461,
704 -5384,
705 -5329,
706 -4985,
707 -4926,
708 -4815,
709 -4788,
710 -4758,
711 -4433,
712 -4229,
713 -4209,
714 -4176,
715 -4104,
716 -4095,
717 -4078,
718 -3941,
719 -3818,
720 -3600,
721 -3474,
722 -3314,
723 -3264,
724 -3196,
725 -3072,
726 -2912,
727 -2836,
728 -2773,
729 -2269,
730 -2184,
731 -2100,
732 -1730,
733 -1512,
734 -1500,
735 -1396,
736 -1344,
737 -1312,
738 -1297,
739 -1059,
740 -1058,
741 1027,
742 1049,
743 1059,
744 1100,
745 1104,
746 1108,
747 1136,
748 1200,
749 1204,
750 1242,
751 1292,
752 1304,
753 1312,
754 1320,
755 1336,
756 1344,
757 1348,
758 1360,
759 1364,
760 1395,
761 1448,
762 1460,
763 1461,
764 1472,
765 1488,
766 1500,
767 1512,
768 1568,
769 1576,
770 1649,
771 1664,
772 1684,
773 1696,
774 1744,
775 1812,
776 1822,
777 1884,
778 1963,
779 1978,
780 2000,
781 2012,
782 2014,
783 2037,
784 2039,
785 2100,
786 2139,
787 2144,
788 2184,
789 2237,
790 2260,
791 2320,
792 2408,
793 2446,
794 2447,
795 2499,
796 2531,
797 2578,
798 2592,
799 2611,
800 2633,
801 2704,
802 2730,
803 2773,
804 2880,
805 2896,
806 2998,
807 3000,
808 3001,
809 3021,
810 3079,
811 3112,
812 3150,
813 3179,
814 3192,
815 3240,
816 3264,
817 3271,
818 3283,
819 3328,
820 3363,
821 3367,
822 3453,
823 3529,
824 3570,
825 3580,
826 3600,
827 3624,
828 3707,
829 3783,
830 3826,
831 3897,
832 3941,
833 3962,
834 3989,
835 4000,
836 4025,
837 4073,
838 4093,
839 4099,
840 4108,
841 4184,
842 4209,
843 4369,
844 4376,
845 4416,
846 4433,
847 4434,
848 4482,
849 4582,
850 4712,
851 4717,
852 4813,
853 4815,
854 4864,
855 5000,
856 5027,
857 5040,
858 5091,
859 5195,
860 5243,
861 5260,
862 5285,
863 5329,
864 5331,
865 5350,
866 5361,
867 5387,
868 5461,
869 5492,
870 5529,
871 5573,
872 5793,
873 5819,
874 5915,
875 5946,
876 5992,
877 6000,
878 6164,
879 6205,
880 6262,
881 6263,
882 6269,
883 6270,
884 6387,
885 6400,
886 6406,
887 6476,
888 6541,
889 6565,
890 6568,
891 6626,
892 6656,
893 6732,
894 6810,
895 6817,
896 6859,
897 7040,
898 7053,
899 7141,
900 7169,
901 7221,
902 7223,
903 7274,
904 7282,
905 7350,
906 7369,
907 7373,
908 7442,
909 7447,
910 7471,
911 7518,
912 7542,
913 7566,
914 7587,
915 7663,
916 7678,
917 7682,
918 7748,
919 7752,
920 7791,
921 8000,
922 8026,
923 8048,
924 8170,
925 8203,
926 8204,
927 8290,
928 8368,
929 8520,
930 8640,
931 8666,
932 8672,
933 8697,
934 8716,
935 8728,
936 8756,
937 8820,
938 8875,
939 8918,
940 8956,
941 9058,
942 9154,
943 9175,
944 9191,
945 9217,
946 9262,
947 9321,
948 9373,
949 9434,
950 9465,
951 9514,
952 9534,
953 9633,
954 9746,
955 9810,
956 9850,
957 9947,
958 9973,
959 10000,
960 10009,
961 10033,
962 10055,
963 10217,
964 10248,
965 10298,
966 10310,
967 10323,
968 10368,
969 10438,
970 10456,
971 10486,
972 10538,
973 10664,
974 10695,
975 10700,
976 10703,
977 10832,
978 10887,
979 10935,
980 10958,
981 11018,
982 11059,
983 11061,
984 11086,
985 11116,
986 11148,
987 11190,
988 11249,
989 11314,
990 11332,
991 11363,
992 11409,
993 11415,
994 11443,
995 11467,
996 11512,
997 11522,
998 11529,
999 11585,
1000 11759,
1001 11768,
1002 11795,
1003 11893,
1004 11997,
1005 12131,
1006 12299,
1007 12536,
1008 12543,
1009 12893,
1010 12945,
1011 12998,
1012 13109,
1013 13213,
1014 13685,
1015 13930,
1016 14023,
1017 14024,
1018 14271,
1019 14564,
1020 14647,
1021 15326,
1022 15850,
1023 15855,
1024 15929,
1025 16000,
1026 16154,
1027 16496,
1028 16807,
1029 16819,
1030 16984,
1031 17203,
1032 17223,
1033 17333,
1034 17760,
1035 17799,
1036 17837,
1037 18029,
1038 18068,
1039 18336,
1040 18515,
1041 19595,
1042 20017,
1043 20131,
1044 20862,
1045 20995,
1046 21709,
1047 22554,
1048 25000,
1049 25172,
1050 25600,
1051 25733,
1052 27439,
1053 38470,
1054 46802,
1055 50000,
1056 11796480,
1057 16843009,
1058 23592960,
1059};
1060
1061
1062const int num_mult_constants_used =
1063 (int)(sizeof multiply_constants_used
1064 / sizeof multiply_constants_used[0]);
1065
1066
1067#define XSIZE (sizeof multiply_constants_used / \
1068 sizeof multiply_constants_used[0] + 32) / 32
1069unsigned multiply_constants_avail[XSIZE];
1070#undef XSIZE
1071
1072
1073/* bsearch helper function. */
1074static int
1075compare_constants (const void *key, const void *t)
1076{
1077 return (*(int*)key) - *((int*)t);
1078}
1079
1080
1081static int *
1082find_mult_constants_used (int multiplier)
1083{
1084 return (int *) bsearch (&multiplier, multiply_constants_used,
1085 num_mult_constants_used,
1086 sizeof multiply_constants_used[0],
1087 compare_constants);
1088}
1089
1090
1091int num_ops (ExpressionTree *s)
1092{
1093 int n = 0;
1094 for (int i = 0; i < s->m_num_vals; i++)
1095 {
1096 Expr *e = &s->m_exprs[i];
1097 if (e->m_op != NULL)
1098 n++;
1099 }
1100 return n;
1101}
1102
1103
1104#ifdef TILEPRO
1105bool
1106tilepro_emit (int multiplier, int num_ops)
1107{
1108 int abs_multiplier = (multiplier >= 0) ? multiplier : -multiplier;
1109
1110 /* Keep constants in range [-1024, 1024]. */
1111 if (abs_multiplier <= 1024)
1112 return true;
1113
1114 /* Keep constants near powers of two. */
1115 int prev_pow2 = 1 << (31 - __builtin_clz (abs_multiplier));
1116 int next_pow2 = prev_pow2 << 1;
1117
1118 if ((abs_multiplier - prev_pow2 <= 10)
1119 || (next_pow2 - abs_multiplier <= 10))
1120 return true;
1121
1122 /* Keep constants near powers of ten. */
1123 {
1124 long long j = 1;
1125 long long prev_pow10;
1126 long long next_pow10;
1127
1128 while (((j * 10) < abs_multiplier)
1129 && (j < (j * 10)))
1130 j = j * 10;
1131
1132 prev_pow10 = j;
1133 next_pow10 = j * 10;
1134
1135 if ((abs_multiplier - prev_pow10 <= 10)
1136 || (next_pow10 - abs_multiplier <= 10))
1137 return true;
1138 }
1139
1140 /* Keep short sequences that have two or fewer ops. */
1141 if (num_ops <= 2)
1142 return true;
1143
1144 /* Keep constants that are mostly 0's or mostly 1's. */
1145 if (__builtin_popcount (multiplier) <= 2 ||
1146 __builtin_popcount (multiplier) >= 30)
1147 return true;
1148
1149 /* Keep constants seen in actual code. */
1150 if ((find_mult_constants_used (multiplier)))
1151 return true;
1152
1153 return false;
1154}
1155#else
1156bool
1157tilegx_emit (long long multiplier, int num_ops)
1158{
1159 long long abs_multiplier = (multiplier >= 0) ? multiplier : - multiplier;
1160
1161 /* Keep constants in range [-1024, 1024]. */
1162 if (abs_multiplier <= 1024)
1163 return true;
1164
1165 /* Otherwise exclude sequences with four ops. */
1166 if (num_ops > 3)
1167 return false;
1168
1169 /* Keep constants near powers of two. */
1170 {
1171 unsigned long long prev_pow2 =
1172 1LL << (63 - __builtin_clzll (abs_multiplier));
1173 unsigned long long next_pow2 = prev_pow2 << 1;
1174
1175 /* handle overflow case. */
1176 if (next_pow2 == 0)
1177 {
1178 if (prev_pow2 - abs_multiplier <= 10)
1179 return true;
1180 }
1181 else if ((abs_multiplier - prev_pow2 <= 10)
1182 || (next_pow2 - abs_multiplier <= 10))
1183 return true;
1184 }
1185
1186 /* Keep constants near powers of ten. */
1187 {
1188 long long j = 1;
1189 long long prev_pow10;
1190 long long next_pow10;
1191
1192 while (((j * 10) < abs_multiplier)
1193 && (j < (INTMAX_MAX / 10)))
1194 j = j * 10;
1195
1196 prev_pow10 = j;
1197 next_pow10 = (j > (INTMAX_MAX / 10)) ? 0 : j * 10;
1198
1199 if ((abs_multiplier - prev_pow10 <= 100)
1200 || (next_pow10
1201 && (next_pow10 - abs_multiplier <= 100)))
1202 return true;
1203 }
1204
1205 if (num_ops <= 2)
1206 return true;
1207
1208 /* Keep constants that are mostly 0's or mostly 1's. */
1209 if (__builtin_popcountll (multiplier) <= 2 ||
1210 __builtin_popcountll (multiplier) >= 62)
1211 return true;
1212
1213 /* Keep constants seen in actual code. */
1214 if (find_mult_constants_used (multiplier))
1215 return true;
1216
1217 return false;
1218}
1219#endif
1220
1221
1222int
1223main ()
1224{
1225 ExpressionTreeMap best_solution;
1226 ExpressionTree s;
1227
1228#ifdef TILEPRO
1229 printf ("/* Constant multiply table for TILEPro.\n");
1230#else
1231 printf ("/* Constant multiply table for TILE-Gx.\n");
1232#endif
7adcbafe 1233 printf (" Copyright (C) 2011-2022 Free Software Foundation, Inc.\n");
dd552284
WL
1234 printf (" Contributed by Walter Lee (walt@tilera.com)\n");
1235 printf ("\n");
1236 printf (" This file is part of GCC.\n");
1237 printf ("\n");
1238 printf (" GCC is free software; you can redistribute it and/or modify it\n");
1239 printf (" under the terms of the GNU General Public License as published\n");
1240 printf (" by the Free Software Foundation; either version 3, or (at your\n");
1241 printf (" option) any later version.\n");
1242 printf ("\n");
1243 printf (" GCC is distributed in the hope that it will be useful, but WITHOUT\n");
1244 printf (" ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n");
1245 printf (" or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public\n");
1246 printf (" License for more details.\n");
1247 printf ("\n");
1248 printf (" You should have received a copy of the GNU General Public License\n");
1249 printf (" along with GCC; see the file COPYING3. If not see\n");
1250 printf (" <http://www.gnu.org/licenses/>. */\n");
1251 printf ("\n");
b61d924d
AM
1252 printf ("/* Note this file is auto-generated from gen-mul-tables.cc.\n");
1253 printf (" Make any required changes there. */\n");
1254 printf ("\n");
27e5d7c7
IB
1255 printf ("#define IN_TARGET_CODE 1\n");
1256 printf ("\n");
dd552284
WL
1257 printf ("#include \"config.h\"\n");
1258 printf ("#include \"system.h\"\n");
1259 printf ("#include \"coretypes.h\"\n");
2f40fae2 1260 printf ("#include \"backend.h\"\n");
36566b39 1261 printf ("#include \"rtl.h\"\n");
36566b39 1262 printf ("#include \"expmed.h\"\n");
dd552284
WL
1263 printf ("#include \"%s-multiply.h\"\n\n", ARCH);
1264 create_insn_code_compression_table ();
1265
1266 /* Try all combinations of operators and see what constants we
1267 produce. For each possible constant, record the most efficient
1268 way to generate it. */
1269 find_sequences (s, best_solution);
1270
1271 printf ("const struct %s_multiply_insn_seq "
1272 "%s_multiply_insn_seq_table[] = {\n",
1273 ARCH, ARCH);
1274
1275 const char *comma_separator = "";
1276
1277 ExpressionTreeMap::iterator i (best_solution.begin ());
1278 for (; i != best_solution.end (); ++i)
1279 {
1280 ExpressionTree *s = (*i).second;
1281 const MUL_TYPE n = (*i).first;
1282
1283 if (n == 0 || n == 1)
1284 {
1285 /* Both of these require zero operations, so these entries
1286 are bogus and should never be used. */
1287 continue;
1288 }
1289
1290 /* Prune the list of entries to keep the table to a reasonable
1291 size. */
1292#ifdef TILEPRO
1293 if (!tilepro_emit (n, num_ops (s)))
1294 continue;
1295#else
1296 if (!tilegx_emit (n, num_ops (s)))
1297 continue;
1298#endif
1299
1300 printf ("%s", comma_separator);
1301
1302#ifdef TILEPRO
1303 const MUL_TYPE int_min = INT32_MIN;
1304#else
1305 const MUL_TYPE int_min = INT64_MIN;
1306#endif
1307 if (n == int_min)
1308 {
1309 /* Handle C's woeful lack of unary negation. Without this,
1310 printing out INT_MIN in decimal will yield an unsigned
1311 int which could generate a compiler warning. */
1312#ifdef TILEPRO
1313 printf (" {%d - 1 /* 0x%x */ ,\n {", n + 1,
1314 (unsigned) n);
1315#else
1316 printf (" {%lldll - 1 /* 0x%llx */ ,\n {", n + 1,
1317 (unsigned MUL_TYPE) n);
1318#endif
1319 }
1320 else
1321 {
1322#ifdef TILEPRO
1323 printf (" {%d /* 0x%x */ ,\n {", n, (unsigned) n);
1324#else
1325 printf (" {%lldll /* 0x%llx */ ,\n {", n, (unsigned MUL_TYPE) n);
1326#endif
1327 }
1328
1329 bool first = true;
1330 for (int j = 0; j < s->m_num_vals; j++)
1331 {
1332 Expr *e = &s->m_exprs[j];
1333
1334 const Operator *op = e->m_op;
1335 if (op == NULL)
1336 continue;
1337
1338 char buf[1024];
1339 snprintf (buf, sizeof buf, "%s{%d, %d, %d}%s",
1340 first ? "" : " ",
1341 op->m_top_index,
1342 e->m_lhs, e->m_rhs, (j + 1) == s->m_num_vals ? "}" : ",");
1343
1344 char opnd0[10];
1345 if (e->m_lhs)
1346 snprintf (opnd0, sizeof opnd0, "r%d", e->m_lhs);
1347 else
1348 snprintf (opnd0, sizeof opnd0, "zero");
1349
1350 printf ("%s\t\t\t/* %s r%d, %s, %s%d */\n",
1351 buf, op->m_name, j, opnd0,
1352 op->is_unary () ? "" : "r", e->m_rhs);
1353
1354 first = false;
1355 }
1356 printf (" }");
1357 comma_separator = ",\n";
1358 }
1359
1360 printf ("\n};\n\n");
1361 printf ("const int %s_multiply_insn_seq_table_size =\n"
1362 " (int) (sizeof %s_multiply_insn_seq_table\n"
1363 " / sizeof %s_multiply_insn_seq_table[0]);\n",
1364 ARCH, ARCH, ARCH);
1365
1366 return EXIT_SUCCESS;
1367}