]>
Commit | Line | Data |
---|---|---|
dd552284 | 1 | /* Multiply table generator for tile. |
7adcbafe | 2 | Copyright (C) 2011-2022 Free Software Foundation, Inc. |
dd552284 WL |
3 | Contributed by Walter Lee (walt@tilera.com) |
4 | ||
5 | This file is part of GCC. | |
6 | ||
7 | GCC is free software; you can redistribute it and/or modify it | |
8 | under the terms of the GNU General Public License as published | |
9 | by the Free Software Foundation; either version 3, or (at your | |
10 | option) any later version. | |
11 | ||
12 | GCC is distributed in the hope that it will be useful, but WITHOUT | |
13 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY | |
14 | or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public | |
15 | License for more details. | |
16 | ||
17 | You should have received a copy of the GNU General Public License | |
18 | along with GCC; see the file COPYING3. If not see | |
19 | <http://www.gnu.org/licenses/>. */ | |
20 | ||
21 | /* This program creates a table used to compile multiply by constant | |
22 | efficiently. | |
23 | ||
24 | This program should be compiled by a c++ compiler. If it's | |
026c3cfd | 25 | compiled with -DTILEPRO, it generates the multiply table for |
dd552284 WL |
26 | TILEPro; otherwise it generates the multiply table for TILE-Gx. |
27 | Running the program produces the table in stdout. | |
28 | ||
29 | The program works by generating every possible combination of up to | |
30 | MAX_INSTRUCTIONS linear operators (such as add, sub, s2a, left | |
31 | shift) and computing the multiplier computed by those instructions. | |
32 | For example, | |
33 | ||
34 | s2a r2,r1,r1 | |
35 | s2a r3,r2,r2 | |
36 | ||
37 | multiplies r1 by 25. | |
38 | ||
39 | There are usually multiple instruction sequences to multiply by a | |
40 | given constant. This program keeps only the cheapest one. | |
41 | "Cheapest" is defined first by the minimum theoretical schedule | |
42 | length, and if those are equal then by the number of instructions, | |
43 | and if those are equal then by which instructions we "prefer" | |
44 | (e.g. because we think one might take infinitesimally less power | |
45 | than another, or simply because we arbitrarily pick one to be more | |
46 | canonical). | |
47 | ||
48 | Once this program has determined the best instruction sequence for | |
49 | each multiplier, it emits them in a table sorted by the multiplier | |
50 | so the user can binary-search it to look for a match. The table is | |
51 | pruned based on various criteria to keep its sizes reasonable. */ | |
52 | ||
53 | #include <stdio.h> | |
54 | #include <stdlib.h> | |
55 | #include <assert.h> | |
56 | #include <string.h> | |
57 | #define __STDC_LIMIT_MACROS | |
58 | #include <stdint.h> | |
59 | ||
60 | #include <map> | |
61 | ||
62 | #ifdef TILEPRO | |
63 | ||
64 | /* The string representing the architecture. */ | |
65 | #define ARCH "tilepro" | |
66 | ||
67 | /* The type for the multiplication. */ | |
68 | typedef int MUL_TYPE; | |
69 | ||
70 | #else | |
71 | ||
72 | /* The string representing the architecture. */ | |
73 | #define ARCH "tilegx" | |
74 | ||
75 | /* The type for the multiplication. */ | |
76 | typedef long long MUL_TYPE; | |
77 | ||
78 | #endif | |
79 | ||
80 | /* Longest instruction sequence this will produce. With the current | |
81 | stupid algorithm runtime grows very quickly with this number. */ | |
82 | #define MAX_INSTRUCTIONS 4 | |
83 | ||
84 | /* Maximum number of subexpressions in the expression DAG being | |
85 | generated. This is the same as the number of instructions, except | |
86 | that zero and the original register we'd like to multiply by a | |
87 | constant are also thrown into the mix. */ | |
88 | #define MAX_SUBEXPRS (2 + MAX_INSTRUCTIONS) | |
89 | ||
90 | #define MIN(x, y) ((x) <= (y) ? (x) : (y)) | |
91 | #define MAX(x, y) ((x) >= (y) ? (x) : (y)) | |
92 | ||
93 | /* For this program a unary op is one which has only one nonconstant | |
94 | operand. So shift left by 5 is considered unary. */ | |
95 | typedef MUL_TYPE (*unary_op_func) (MUL_TYPE); | |
96 | typedef MUL_TYPE (*binary_op_func) (MUL_TYPE, MUL_TYPE); | |
97 | ||
98 | /* This describes an operation like 'add two registers' or 'left-shift | |
99 | by 7'. | |
100 | ||
101 | We call something a 'unary' operator if it only takes in one | |
102 | register as input, even though it might have an implicit second | |
103 | constant operand. Currently this is used for left-shift by | |
104 | constant. */ | |
105 | class Operator | |
106 | { | |
107 | public: | |
108 | /* Construct for a binary operator. */ | |
109 | Operator (const char *pattern, const char *name, binary_op_func func, | |
110 | int cost) | |
111 | : m_pattern (pattern), m_name (name), m_top_index (-1), | |
112 | m_unary_func (0), m_binary_func (func), m_cost (cost), | |
113 | m_rhs_if_unary (0) | |
114 | { | |
115 | } | |
116 | ||
117 | /* Construct for a unary operator. */ | |
118 | Operator (const char *pattern, const char *name, unary_op_func func, | |
119 | int rhs_if_unary, int cost) | |
120 | : m_pattern (pattern), m_name (name), m_top_index (-1), | |
121 | m_unary_func (func), m_binary_func (0), m_cost (cost), | |
122 | m_rhs_if_unary (rhs_if_unary) | |
123 | { | |
124 | } | |
125 | ||
126 | bool is_unary () const | |
127 | { | |
128 | return m_binary_func == NULL; | |
129 | } | |
130 | ||
131 | /* Name of the pattern for this operation, e.g. CODE_FOR_addsi3. */ | |
132 | const char *m_pattern; | |
133 | ||
134 | /* Name of the opcode for this operation, e.g. add. */ | |
135 | const char *m_name; | |
136 | ||
137 | /* We don't have enough bits in our output representation to store | |
138 | the original insn_code value, so we store a compressed form | |
139 | instead. These values are decoded back into insn_code via the | |
140 | machine-generated multiply_insn_seq_decode_opcode lookup | |
141 | table. */ | |
142 | int m_top_index; | |
143 | ||
144 | /* Unary operator to apply, or NULL if this is a binary operator. */ | |
145 | unary_op_func m_unary_func; | |
146 | ||
147 | /* Binary operator to apply, or NULL if this is a unary operator. */ | |
148 | binary_op_func m_binary_func; | |
149 | ||
150 | /* Function of how expensive we consider this operator. Higher is | |
151 | worse. */ | |
152 | int m_cost; | |
153 | ||
154 | /* the RHS value to write into the C file if unary; used for shift | |
155 | count. */ | |
156 | int m_rhs_if_unary; | |
157 | }; | |
158 | ||
159 | ||
160 | /* An entry in an expression DAG. */ | |
161 | class Expr | |
162 | { | |
163 | public: | |
164 | Expr () : m_op (NULL), m_lhs (0), m_rhs (0), m_produced_val (0), | |
165 | m_critical_path_length (0) | |
166 | { | |
167 | } | |
168 | ||
169 | /* The operator being applied to the operands. */ | |
170 | const Operator *m_op; | |
171 | ||
172 | /* The index of the left-hand operand in the array of subexpressions | |
173 | already computed. */ | |
174 | int m_lhs; | |
175 | ||
176 | /* For binary ops ,this is the index of the left-hand operand in the | |
177 | array of subexpressions already computed. For unary ops, it is | |
178 | specific to the op (e.g. it might hold a constant shift | |
179 | count). */ | |
180 | int m_rhs; | |
181 | ||
182 | /* The multiplier produced by this expression tree. For example, for | |
183 | the tree ((x << 5) + x), the value would be 33. */ | |
184 | MUL_TYPE m_produced_val; | |
185 | ||
186 | /* How far is this expression from the root, i.e. how many cycles | |
187 | minimum will it take to compute this? */ | |
188 | int m_critical_path_length; | |
189 | }; | |
190 | ||
191 | ||
192 | /* Make function pointers for the various linear operators we can | |
193 | apply to compute a multiplicative value. */ | |
194 | ||
195 | static MUL_TYPE | |
196 | add (MUL_TYPE n1, MUL_TYPE n2) | |
197 | { | |
198 | return n1 + n2; | |
199 | } | |
200 | ||
201 | static MUL_TYPE | |
202 | sub (MUL_TYPE n1, MUL_TYPE n2) | |
203 | { | |
204 | return n1 - n2; | |
205 | } | |
206 | ||
207 | static MUL_TYPE | |
208 | s1a (MUL_TYPE n1, MUL_TYPE n2) | |
209 | { | |
210 | return n1 * 2 + n2; | |
211 | } | |
212 | ||
213 | static MUL_TYPE | |
214 | s2a (MUL_TYPE n1, MUL_TYPE n2) | |
215 | { | |
216 | return n1 * 4 + n2; | |
217 | } | |
218 | ||
219 | static MUL_TYPE | |
220 | s3a (MUL_TYPE n1, MUL_TYPE n2) | |
221 | { | |
222 | return n1 * 8 + n2; | |
223 | } | |
224 | ||
225 | #define SHIFT(count) \ | |
226 | static MUL_TYPE \ | |
227 | shift##count(MUL_TYPE n) \ | |
228 | { \ | |
229 | return n << (count); \ | |
230 | } | |
231 | ||
232 | SHIFT (1); | |
233 | SHIFT (2); | |
234 | SHIFT (3); | |
235 | SHIFT (4); | |
236 | SHIFT (5); | |
237 | SHIFT (6); | |
238 | SHIFT (7); | |
239 | SHIFT (8); | |
240 | SHIFT (9); | |
241 | SHIFT (10); | |
242 | SHIFT (11); | |
243 | SHIFT (12); | |
244 | SHIFT (13); | |
245 | SHIFT (14); | |
246 | SHIFT (15); | |
247 | SHIFT (16); | |
248 | SHIFT (17); | |
249 | SHIFT (18); | |
250 | SHIFT (19); | |
251 | SHIFT (20); | |
252 | SHIFT (21); | |
253 | SHIFT (22); | |
254 | SHIFT (23); | |
255 | SHIFT (24); | |
256 | SHIFT (25); | |
257 | SHIFT (26); | |
258 | SHIFT (27); | |
259 | SHIFT (28); | |
260 | SHIFT (29); | |
261 | SHIFT (30); | |
262 | SHIFT (31); | |
263 | #ifndef TILEPRO | |
264 | SHIFT (32); | |
265 | SHIFT (33); | |
266 | SHIFT (34); | |
267 | SHIFT (35); | |
268 | SHIFT (36); | |
269 | SHIFT (37); | |
270 | SHIFT (38); | |
271 | SHIFT (39); | |
272 | SHIFT (40); | |
273 | SHIFT (41); | |
274 | SHIFT (42); | |
275 | SHIFT (43); | |
276 | SHIFT (44); | |
277 | SHIFT (45); | |
278 | SHIFT (46); | |
279 | SHIFT (47); | |
280 | SHIFT (48); | |
281 | SHIFT (49); | |
282 | SHIFT (50); | |
283 | SHIFT (51); | |
284 | SHIFT (52); | |
285 | SHIFT (53); | |
286 | SHIFT (54); | |
287 | SHIFT (55); | |
288 | SHIFT (56); | |
289 | SHIFT (57); | |
290 | SHIFT (58); | |
291 | SHIFT (59); | |
292 | SHIFT (60); | |
293 | SHIFT (61); | |
294 | SHIFT (62); | |
295 | SHIFT (63); | |
296 | #endif | |
297 | ||
298 | #ifdef TILEPRO | |
299 | static Operator ops[] = { | |
300 | Operator ("CODE_FOR_addsi3", "add", add, 1040), | |
301 | Operator ("CODE_FOR_subsi3", "sub", sub, 1041), | |
302 | Operator ("CODE_FOR_insn_s1a", "s1a", s1a, 1042), | |
303 | Operator ("CODE_FOR_insn_s2a", "s2a", s2a, 1043), | |
304 | Operator ("CODE_FOR_insn_s3a", "s3a", s3a, 1044), | |
305 | /* Note: shl by 1 is not necessary, since adding a value to itself | |
306 | produces the same result. But the architecture team thinks | |
307 | left-shift may use slightly less power, so we might as well | |
308 | prefer it. */ | |
309 | Operator ("CODE_FOR_ashlsi3", "shli", shift1, 1, 1001), | |
310 | Operator ("CODE_FOR_ashlsi3", "shli", shift2, 2, 1002), | |
311 | Operator ("CODE_FOR_ashlsi3", "shli", shift3, 3, 1003), | |
312 | Operator ("CODE_FOR_ashlsi3", "shli", shift4, 4, 1004), | |
313 | Operator ("CODE_FOR_ashlsi3", "shli", shift5, 5, 1005), | |
314 | Operator ("CODE_FOR_ashlsi3", "shli", shift6, 6, 1006), | |
315 | Operator ("CODE_FOR_ashlsi3", "shli", shift7, 7, 1007), | |
316 | Operator ("CODE_FOR_ashlsi3", "shli", shift8, 8, 1008), | |
317 | Operator ("CODE_FOR_ashlsi3", "shli", shift9, 9, 1009), | |
318 | Operator ("CODE_FOR_ashlsi3", "shli", shift10, 10, 1010), | |
319 | Operator ("CODE_FOR_ashlsi3", "shli", shift11, 11, 1011), | |
320 | Operator ("CODE_FOR_ashlsi3", "shli", shift12, 12, 1012), | |
321 | Operator ("CODE_FOR_ashlsi3", "shli", shift13, 13, 1013), | |
322 | Operator ("CODE_FOR_ashlsi3", "shli", shift14, 14, 1014), | |
323 | Operator ("CODE_FOR_ashlsi3", "shli", shift15, 15, 1015), | |
324 | Operator ("CODE_FOR_ashlsi3", "shli", shift16, 16, 1016), | |
325 | Operator ("CODE_FOR_ashlsi3", "shli", shift17, 17, 1017), | |
326 | Operator ("CODE_FOR_ashlsi3", "shli", shift18, 18, 1018), | |
327 | Operator ("CODE_FOR_ashlsi3", "shli", shift19, 19, 1019), | |
328 | Operator ("CODE_FOR_ashlsi3", "shli", shift20, 20, 1020), | |
329 | Operator ("CODE_FOR_ashlsi3", "shli", shift21, 21, 1021), | |
330 | Operator ("CODE_FOR_ashlsi3", "shli", shift22, 22, 1022), | |
331 | Operator ("CODE_FOR_ashlsi3", "shli", shift23, 23, 1023), | |
332 | Operator ("CODE_FOR_ashlsi3", "shli", shift24, 24, 1024), | |
333 | Operator ("CODE_FOR_ashlsi3", "shli", shift25, 25, 1025), | |
334 | Operator ("CODE_FOR_ashlsi3", "shli", shift26, 26, 1026), | |
335 | Operator ("CODE_FOR_ashlsi3", "shli", shift27, 27, 1027), | |
336 | Operator ("CODE_FOR_ashlsi3", "shli", shift28, 28, 1028), | |
337 | Operator ("CODE_FOR_ashlsi3", "shli", shift29, 29, 1029), | |
338 | Operator ("CODE_FOR_ashlsi3", "shli", shift30, 30, 1030), | |
339 | Operator ("CODE_FOR_ashlsi3", "shli", shift31, 31, 1031) | |
340 | }; | |
341 | #else | |
342 | static Operator ops[] = { | |
343 | Operator ("CODE_FOR_adddi3", "add", add, 1070), | |
344 | Operator ("CODE_FOR_subdi3", "sub", sub, 1071), | |
345 | Operator ("CODE_FOR_insn_shl1add", "shl1add", s1a, 1072), | |
346 | Operator ("CODE_FOR_insn_shl2add", "shl2add", s2a, 1073), | |
347 | Operator ("CODE_FOR_insn_shl3add", "shl3add", s3a, 1074), | |
348 | // Note: shl by 1 is not necessary, since adding a value to itself | |
349 | // produces the same result. But the architecture team thinks left-shift | |
350 | // may use slightly less power, so we might as well prefer it. | |
351 | Operator ("CODE_FOR_ashldi3", "shli", shift1, 1, 1001), | |
352 | Operator ("CODE_FOR_ashldi3", "shli", shift2, 2, 1002), | |
353 | Operator ("CODE_FOR_ashldi3", "shli", shift3, 3, 1003), | |
354 | Operator ("CODE_FOR_ashldi3", "shli", shift4, 4, 1004), | |
355 | Operator ("CODE_FOR_ashldi3", "shli", shift5, 5, 1005), | |
356 | Operator ("CODE_FOR_ashldi3", "shli", shift6, 6, 1006), | |
357 | Operator ("CODE_FOR_ashldi3", "shli", shift7, 7, 1007), | |
358 | Operator ("CODE_FOR_ashldi3", "shli", shift8, 8, 1008), | |
359 | Operator ("CODE_FOR_ashldi3", "shli", shift9, 9, 1009), | |
360 | Operator ("CODE_FOR_ashldi3", "shli", shift10, 10, 1010), | |
361 | Operator ("CODE_FOR_ashldi3", "shli", shift11, 11, 1011), | |
362 | Operator ("CODE_FOR_ashldi3", "shli", shift12, 12, 1012), | |
363 | Operator ("CODE_FOR_ashldi3", "shli", shift13, 13, 1013), | |
364 | Operator ("CODE_FOR_ashldi3", "shli", shift14, 14, 1014), | |
365 | Operator ("CODE_FOR_ashldi3", "shli", shift15, 15, 1015), | |
366 | Operator ("CODE_FOR_ashldi3", "shli", shift16, 16, 1016), | |
367 | Operator ("CODE_FOR_ashldi3", "shli", shift17, 17, 1017), | |
368 | Operator ("CODE_FOR_ashldi3", "shli", shift18, 18, 1018), | |
369 | Operator ("CODE_FOR_ashldi3", "shli", shift19, 19, 1019), | |
370 | Operator ("CODE_FOR_ashldi3", "shli", shift20, 20, 1020), | |
371 | Operator ("CODE_FOR_ashldi3", "shli", shift21, 21, 1021), | |
372 | Operator ("CODE_FOR_ashldi3", "shli", shift22, 22, 1022), | |
373 | Operator ("CODE_FOR_ashldi3", "shli", shift23, 23, 1023), | |
374 | Operator ("CODE_FOR_ashldi3", "shli", shift24, 24, 1024), | |
375 | Operator ("CODE_FOR_ashldi3", "shli", shift25, 25, 1025), | |
376 | Operator ("CODE_FOR_ashldi3", "shli", shift26, 26, 1026), | |
377 | Operator ("CODE_FOR_ashldi3", "shli", shift27, 27, 1027), | |
378 | Operator ("CODE_FOR_ashldi3", "shli", shift28, 28, 1028), | |
379 | Operator ("CODE_FOR_ashldi3", "shli", shift29, 29, 1029), | |
380 | Operator ("CODE_FOR_ashldi3", "shli", shift30, 30, 1030), | |
381 | Operator ("CODE_FOR_ashldi3", "shli", shift31, 31, 1031), | |
382 | Operator ("CODE_FOR_ashldi3", "shli", shift32, 32, 1032), | |
383 | Operator ("CODE_FOR_ashldi3", "shli", shift33, 33, 1033), | |
384 | Operator ("CODE_FOR_ashldi3", "shli", shift34, 34, 1034), | |
385 | Operator ("CODE_FOR_ashldi3", "shli", shift35, 35, 1035), | |
386 | Operator ("CODE_FOR_ashldi3", "shli", shift36, 36, 1036), | |
387 | Operator ("CODE_FOR_ashldi3", "shli", shift37, 37, 1037), | |
388 | Operator ("CODE_FOR_ashldi3", "shli", shift38, 38, 1038), | |
389 | Operator ("CODE_FOR_ashldi3", "shli", shift39, 39, 1039), | |
390 | Operator ("CODE_FOR_ashldi3", "shli", shift40, 40, 1040), | |
391 | Operator ("CODE_FOR_ashldi3", "shli", shift41, 41, 1041), | |
392 | Operator ("CODE_FOR_ashldi3", "shli", shift42, 42, 1042), | |
393 | Operator ("CODE_FOR_ashldi3", "shli", shift43, 43, 1043), | |
394 | Operator ("CODE_FOR_ashldi3", "shli", shift44, 44, 1044), | |
395 | Operator ("CODE_FOR_ashldi3", "shli", shift45, 45, 1045), | |
396 | Operator ("CODE_FOR_ashldi3", "shli", shift46, 46, 1046), | |
397 | Operator ("CODE_FOR_ashldi3", "shli", shift47, 47, 1047), | |
398 | Operator ("CODE_FOR_ashldi3", "shli", shift48, 48, 1048), | |
399 | Operator ("CODE_FOR_ashldi3", "shli", shift49, 49, 1049), | |
400 | Operator ("CODE_FOR_ashldi3", "shli", shift50, 50, 1050), | |
401 | Operator ("CODE_FOR_ashldi3", "shli", shift51, 51, 1051), | |
402 | Operator ("CODE_FOR_ashldi3", "shli", shift52, 52, 1052), | |
403 | Operator ("CODE_FOR_ashldi3", "shli", shift53, 53, 1053), | |
404 | Operator ("CODE_FOR_ashldi3", "shli", shift54, 54, 1054), | |
405 | Operator ("CODE_FOR_ashldi3", "shli", shift55, 55, 1055), | |
406 | Operator ("CODE_FOR_ashldi3", "shli", shift56, 56, 1056), | |
407 | Operator ("CODE_FOR_ashldi3", "shli", shift57, 57, 1057), | |
408 | Operator ("CODE_FOR_ashldi3", "shli", shift58, 58, 1058), | |
409 | Operator ("CODE_FOR_ashldi3", "shli", shift59, 59, 1059), | |
410 | Operator ("CODE_FOR_ashldi3", "shli", shift60, 60, 1060), | |
411 | Operator ("CODE_FOR_ashldi3", "shli", shift61, 61, 1061), | |
412 | Operator ("CODE_FOR_ashldi3", "shli", shift62, 62, 1062), | |
413 | Operator ("CODE_FOR_ashldi3", "shli", shift63, 63, 1063) | |
414 | }; | |
415 | #endif | |
416 | ||
417 | /* An ExpressionTree is an expression DAG. */ | |
418 | class ExpressionTree | |
419 | { | |
420 | public: | |
421 | ExpressionTree () : m_num_vals (2) | |
422 | { | |
423 | m_exprs[0].m_produced_val = 0; | |
424 | m_exprs[1].m_produced_val = 1; | |
425 | } | |
426 | ||
427 | void copy_technique_from (ExpressionTree * other) | |
428 | { | |
429 | /* TODO: write this; other can compute the same products with less | |
430 | cost. We update this ExpressionTree object because some int is | |
431 | already mapped to it. */ | |
432 | } | |
433 | ||
434 | int m_num_vals; | |
435 | Expr m_exprs[MAX_SUBEXPRS]; | |
436 | ||
437 | int cost () const | |
438 | { | |
439 | int cost = 0; | |
440 | for (int j = 2; j < m_num_vals; j++) | |
441 | cost += m_exprs[j].m_op->m_cost; | |
442 | return cost + m_exprs[m_num_vals - 1].m_critical_path_length * 1000000; | |
443 | } | |
444 | }; | |
445 | ||
446 | ||
447 | typedef std::map<MUL_TYPE, ExpressionTree *> ExpressionTreeMap; | |
448 | ||
449 | ||
450 | static void | |
451 | find_sequences (ExpressionTree &s, ExpressionTreeMap &best_solution) | |
452 | { | |
453 | /* Don't look more if we can't add any new values to the expression | |
454 | tree. */ | |
455 | const int num_vals = s.m_num_vals; | |
456 | if (num_vals == MAX_SUBEXPRS) | |
457 | return; | |
458 | ||
459 | /* Grow array to make room for new values added as we loop. */ | |
460 | s.m_num_vals = num_vals + 1; | |
461 | ||
462 | const Operator *const prev_op = s.m_exprs[num_vals - 1].m_op; | |
463 | const int prev_top_index = (prev_op != NULL) ? prev_op->m_top_index : -1; | |
464 | ||
465 | for (size_t f = 0; f < sizeof ops / sizeof ops[0]; f++) | |
466 | { | |
467 | const Operator *const op = &ops[f]; | |
468 | ||
469 | for (int i = 0; i < num_vals; i++) | |
470 | { | |
471 | /* Only allow zero as the first operand to sub, otherwise | |
472 | it is useless. */ | |
473 | if (i == 0 && op->m_binary_func != sub) | |
474 | continue; | |
475 | ||
476 | /* Unary ops don't actually use the second operand, so as a | |
477 | big hack we trick it into only looping once in the inner | |
478 | loop. */ | |
479 | const int j_end = op->is_unary () ? 2 : num_vals; | |
480 | ||
481 | /* We never allow zero as the second operand, as it is | |
482 | always useless. */ | |
483 | for (int j = 1; j < j_end; j++) | |
484 | { | |
485 | /* Does this expression use the immediately previous | |
486 | expression? */ | |
487 | const bool uses_prev_value = | |
488 | (i == num_vals - 1 | |
489 | || (!op->is_unary () && j == num_vals - 1)); | |
490 | ||
491 | if (!uses_prev_value) | |
492 | { | |
493 | /* For search efficiency, prune redundant | |
494 | instruction orderings. | |
495 | ||
496 | This op does not take the immediately preceding | |
497 | value as input, which means we could have done it | |
498 | in the previous slot. If its opcode is less than | |
499 | the previous instruction's, we'll declare this | |
500 | instruction order non-canonical and not pursue | |
501 | this branch of the search. */ | |
502 | if (op->m_top_index < prev_top_index) | |
503 | continue; | |
504 | } | |
505 | ||
506 | MUL_TYPE n; | |
507 | if (op->is_unary ()) | |
508 | { | |
509 | n = op->m_unary_func (s.m_exprs[i].m_produced_val); | |
510 | } | |
511 | else | |
512 | { | |
513 | n = op->m_binary_func (s.m_exprs[i].m_produced_val, | |
514 | s.m_exprs[j].m_produced_val); | |
515 | } | |
516 | ||
517 | bool duplicate = false; | |
518 | for (int k = 0; k < num_vals; k++) | |
519 | if (n == s.m_exprs[j].m_produced_val) | |
520 | { | |
521 | duplicate = true; | |
522 | break; | |
523 | } | |
524 | ||
525 | if (duplicate) | |
526 | continue; | |
527 | ||
528 | /* See if we found the best solution for n. */ | |
529 | Expr *e = &s.m_exprs[num_vals]; | |
530 | e->m_op = op; | |
531 | e->m_lhs = i; | |
532 | e->m_rhs = op->is_unary () ? op->m_rhs_if_unary : j; | |
533 | e->m_produced_val = n; | |
534 | e->m_critical_path_length = | |
535 | 1 + MAX (s.m_exprs[i].m_critical_path_length, | |
536 | s.m_exprs[j].m_critical_path_length); | |
537 | ||
538 | ExpressionTreeMap::iterator best (best_solution.find (n)); | |
539 | if (best == best_solution.end () | |
540 | || (*best).second->cost () > s.cost ()) | |
541 | best_solution[n] = new ExpressionTree (s); | |
542 | ||
543 | /* Recurse and find more. */ | |
544 | find_sequences (s, best_solution); | |
545 | } | |
546 | } | |
547 | } | |
548 | ||
549 | /* Restore old size. */ | |
550 | s.m_num_vals = num_vals; | |
551 | } | |
552 | ||
553 | ||
554 | /* For each insn_code used by an operator, choose a compact number so | |
555 | it takes less space in the output data structure. This prints out a | |
556 | lookup table used to map the compactified number back to an | |
557 | insn_code. */ | |
558 | static void | |
559 | create_insn_code_compression_table () | |
560 | { | |
561 | int next_index = 1; | |
562 | ||
563 | /* Entry 0 must hold CODE_FOR_nothing to mean "end of array". */ | |
564 | printf ("const enum insn_code %s_multiply_insn_seq_decode_opcode[] = {\n" | |
565 | " CODE_FOR_nothing /* must be first */ ", ARCH); | |
566 | ||
567 | for (size_t i = 0; i < sizeof ops / sizeof ops[0]; i++) | |
568 | { | |
569 | Operator *op = &ops[i]; | |
570 | int index = -1; | |
571 | ||
572 | /* See if some previous Operator was using the same insn_code. | |
573 | If so, reuse its table entry. */ | |
574 | for (size_t j = 0; j < i; j++) | |
575 | { | |
576 | Operator *old = &ops[j]; | |
577 | if (strcmp (old->m_pattern, op->m_pattern) == 0) | |
578 | { | |
579 | index = old->m_top_index; | |
580 | break; | |
581 | } | |
582 | } | |
583 | ||
584 | if (index == -1) | |
585 | { | |
586 | /* We need to make a new entry in the table. */ | |
587 | printf (",\n %s", op->m_pattern); | |
588 | index = next_index++; | |
589 | } | |
590 | ||
591 | op->m_top_index = index; | |
592 | } | |
593 | ||
594 | printf ("\n};\n\n"); | |
595 | } | |
596 | ||
597 | ||
598 | /* These are constants we've seen in code, that we want to keep table | |
599 | entries for. */ | |
600 | static int multiply_constants_used[] = { | |
601 | -11796480, | |
602 | -27439, | |
603 | -25148, | |
604 | -22820, | |
605 | -21709, | |
606 | -20995, | |
607 | -20284, | |
608 | -20239, | |
609 | -19595, | |
610 | -19447, | |
611 | -19183, | |
612 | -19165, | |
613 | -18730, | |
614 | -17828, | |
615 | -17799, | |
616 | -17237, | |
617 | -17036, | |
618 | -16549, | |
619 | -16423, | |
620 | -16294, | |
621 | -16244, | |
622 | -16069, | |
623 | -15137, | |
624 | -15083, | |
625 | -15038, | |
626 | -14924, | |
627 | -14905, | |
628 | -14752, | |
629 | -14731, | |
630 | -14529, | |
631 | -14273, | |
632 | -14090, | |
633 | -14084, | |
634 | -14043, | |
635 | -13850, | |
636 | -13802, | |
637 | -13631, | |
638 | -13455, | |
639 | -13275, | |
640 | -12879, | |
641 | -12700, | |
642 | -12534, | |
643 | -12399, | |
644 | -12131, | |
645 | -12112, | |
646 | -12054, | |
647 | -12019, | |
648 | -11759, | |
649 | -11585, | |
650 | -11467, | |
651 | -11395, | |
652 | -11295, | |
653 | -11248, | |
654 | -11148, | |
655 | -11116, | |
656 | -11086, | |
657 | -11059, | |
658 | -11018, | |
659 | -10811, | |
660 | -10538, | |
661 | -10258, | |
662 | -10217, | |
663 | -10033, | |
664 | -9766, | |
665 | -9754, | |
666 | -9534, | |
667 | -9527, | |
668 | -9467, | |
669 | -9262, | |
670 | -9232, | |
671 | -9222, | |
672 | -9198, | |
673 | -9191, | |
674 | -9113, | |
675 | -8825, | |
676 | -8756, | |
677 | -8697, | |
678 | -8693, | |
679 | -8565, | |
680 | -8342, | |
681 | -8208, | |
682 | -8200, | |
683 | -8170, | |
684 | -8102, | |
685 | -7770, | |
686 | -7678, | |
687 | -7562, | |
688 | -7376, | |
689 | -7373, | |
690 | -7221, | |
691 | -7121, | |
692 | -6835, | |
693 | -6810, | |
694 | -6626, | |
695 | -6581, | |
696 | -6461, | |
697 | -6278, | |
698 | -6263, | |
699 | -6163, | |
700 | -6029, | |
701 | -5816, | |
702 | -5540, | |
703 | -5461, | |
704 | -5384, | |
705 | -5329, | |
706 | -4985, | |
707 | -4926, | |
708 | -4815, | |
709 | -4788, | |
710 | -4758, | |
711 | -4433, | |
712 | -4229, | |
713 | -4209, | |
714 | -4176, | |
715 | -4104, | |
716 | -4095, | |
717 | -4078, | |
718 | -3941, | |
719 | -3818, | |
720 | -3600, | |
721 | -3474, | |
722 | -3314, | |
723 | -3264, | |
724 | -3196, | |
725 | -3072, | |
726 | -2912, | |
727 | -2836, | |
728 | -2773, | |
729 | -2269, | |
730 | -2184, | |
731 | -2100, | |
732 | -1730, | |
733 | -1512, | |
734 | -1500, | |
735 | -1396, | |
736 | -1344, | |
737 | -1312, | |
738 | -1297, | |
739 | -1059, | |
740 | -1058, | |
741 | 1027, | |
742 | 1049, | |
743 | 1059, | |
744 | 1100, | |
745 | 1104, | |
746 | 1108, | |
747 | 1136, | |
748 | 1200, | |
749 | 1204, | |
750 | 1242, | |
751 | 1292, | |
752 | 1304, | |
753 | 1312, | |
754 | 1320, | |
755 | 1336, | |
756 | 1344, | |
757 | 1348, | |
758 | 1360, | |
759 | 1364, | |
760 | 1395, | |
761 | 1448, | |
762 | 1460, | |
763 | 1461, | |
764 | 1472, | |
765 | 1488, | |
766 | 1500, | |
767 | 1512, | |
768 | 1568, | |
769 | 1576, | |
770 | 1649, | |
771 | 1664, | |
772 | 1684, | |
773 | 1696, | |
774 | 1744, | |
775 | 1812, | |
776 | 1822, | |
777 | 1884, | |
778 | 1963, | |
779 | 1978, | |
780 | 2000, | |
781 | 2012, | |
782 | 2014, | |
783 | 2037, | |
784 | 2039, | |
785 | 2100, | |
786 | 2139, | |
787 | 2144, | |
788 | 2184, | |
789 | 2237, | |
790 | 2260, | |
791 | 2320, | |
792 | 2408, | |
793 | 2446, | |
794 | 2447, | |
795 | 2499, | |
796 | 2531, | |
797 | 2578, | |
798 | 2592, | |
799 | 2611, | |
800 | 2633, | |
801 | 2704, | |
802 | 2730, | |
803 | 2773, | |
804 | 2880, | |
805 | 2896, | |
806 | 2998, | |
807 | 3000, | |
808 | 3001, | |
809 | 3021, | |
810 | 3079, | |
811 | 3112, | |
812 | 3150, | |
813 | 3179, | |
814 | 3192, | |
815 | 3240, | |
816 | 3264, | |
817 | 3271, | |
818 | 3283, | |
819 | 3328, | |
820 | 3363, | |
821 | 3367, | |
822 | 3453, | |
823 | 3529, | |
824 | 3570, | |
825 | 3580, | |
826 | 3600, | |
827 | 3624, | |
828 | 3707, | |
829 | 3783, | |
830 | 3826, | |
831 | 3897, | |
832 | 3941, | |
833 | 3962, | |
834 | 3989, | |
835 | 4000, | |
836 | 4025, | |
837 | 4073, | |
838 | 4093, | |
839 | 4099, | |
840 | 4108, | |
841 | 4184, | |
842 | 4209, | |
843 | 4369, | |
844 | 4376, | |
845 | 4416, | |
846 | 4433, | |
847 | 4434, | |
848 | 4482, | |
849 | 4582, | |
850 | 4712, | |
851 | 4717, | |
852 | 4813, | |
853 | 4815, | |
854 | 4864, | |
855 | 5000, | |
856 | 5027, | |
857 | 5040, | |
858 | 5091, | |
859 | 5195, | |
860 | 5243, | |
861 | 5260, | |
862 | 5285, | |
863 | 5329, | |
864 | 5331, | |
865 | 5350, | |
866 | 5361, | |
867 | 5387, | |
868 | 5461, | |
869 | 5492, | |
870 | 5529, | |
871 | 5573, | |
872 | 5793, | |
873 | 5819, | |
874 | 5915, | |
875 | 5946, | |
876 | 5992, | |
877 | 6000, | |
878 | 6164, | |
879 | 6205, | |
880 | 6262, | |
881 | 6263, | |
882 | 6269, | |
883 | 6270, | |
884 | 6387, | |
885 | 6400, | |
886 | 6406, | |
887 | 6476, | |
888 | 6541, | |
889 | 6565, | |
890 | 6568, | |
891 | 6626, | |
892 | 6656, | |
893 | 6732, | |
894 | 6810, | |
895 | 6817, | |
896 | 6859, | |
897 | 7040, | |
898 | 7053, | |
899 | 7141, | |
900 | 7169, | |
901 | 7221, | |
902 | 7223, | |
903 | 7274, | |
904 | 7282, | |
905 | 7350, | |
906 | 7369, | |
907 | 7373, | |
908 | 7442, | |
909 | 7447, | |
910 | 7471, | |
911 | 7518, | |
912 | 7542, | |
913 | 7566, | |
914 | 7587, | |
915 | 7663, | |
916 | 7678, | |
917 | 7682, | |
918 | 7748, | |
919 | 7752, | |
920 | 7791, | |
921 | 8000, | |
922 | 8026, | |
923 | 8048, | |
924 | 8170, | |
925 | 8203, | |
926 | 8204, | |
927 | 8290, | |
928 | 8368, | |
929 | 8520, | |
930 | 8640, | |
931 | 8666, | |
932 | 8672, | |
933 | 8697, | |
934 | 8716, | |
935 | 8728, | |
936 | 8756, | |
937 | 8820, | |
938 | 8875, | |
939 | 8918, | |
940 | 8956, | |
941 | 9058, | |
942 | 9154, | |
943 | 9175, | |
944 | 9191, | |
945 | 9217, | |
946 | 9262, | |
947 | 9321, | |
948 | 9373, | |
949 | 9434, | |
950 | 9465, | |
951 | 9514, | |
952 | 9534, | |
953 | 9633, | |
954 | 9746, | |
955 | 9810, | |
956 | 9850, | |
957 | 9947, | |
958 | 9973, | |
959 | 10000, | |
960 | 10009, | |
961 | 10033, | |
962 | 10055, | |
963 | 10217, | |
964 | 10248, | |
965 | 10298, | |
966 | 10310, | |
967 | 10323, | |
968 | 10368, | |
969 | 10438, | |
970 | 10456, | |
971 | 10486, | |
972 | 10538, | |
973 | 10664, | |
974 | 10695, | |
975 | 10700, | |
976 | 10703, | |
977 | 10832, | |
978 | 10887, | |
979 | 10935, | |
980 | 10958, | |
981 | 11018, | |
982 | 11059, | |
983 | 11061, | |
984 | 11086, | |
985 | 11116, | |
986 | 11148, | |
987 | 11190, | |
988 | 11249, | |
989 | 11314, | |
990 | 11332, | |
991 | 11363, | |
992 | 11409, | |
993 | 11415, | |
994 | 11443, | |
995 | 11467, | |
996 | 11512, | |
997 | 11522, | |
998 | 11529, | |
999 | 11585, | |
1000 | 11759, | |
1001 | 11768, | |
1002 | 11795, | |
1003 | 11893, | |
1004 | 11997, | |
1005 | 12131, | |
1006 | 12299, | |
1007 | 12536, | |
1008 | 12543, | |
1009 | 12893, | |
1010 | 12945, | |
1011 | 12998, | |
1012 | 13109, | |
1013 | 13213, | |
1014 | 13685, | |
1015 | 13930, | |
1016 | 14023, | |
1017 | 14024, | |
1018 | 14271, | |
1019 | 14564, | |
1020 | 14647, | |
1021 | 15326, | |
1022 | 15850, | |
1023 | 15855, | |
1024 | 15929, | |
1025 | 16000, | |
1026 | 16154, | |
1027 | 16496, | |
1028 | 16807, | |
1029 | 16819, | |
1030 | 16984, | |
1031 | 17203, | |
1032 | 17223, | |
1033 | 17333, | |
1034 | 17760, | |
1035 | 17799, | |
1036 | 17837, | |
1037 | 18029, | |
1038 | 18068, | |
1039 | 18336, | |
1040 | 18515, | |
1041 | 19595, | |
1042 | 20017, | |
1043 | 20131, | |
1044 | 20862, | |
1045 | 20995, | |
1046 | 21709, | |
1047 | 22554, | |
1048 | 25000, | |
1049 | 25172, | |
1050 | 25600, | |
1051 | 25733, | |
1052 | 27439, | |
1053 | 38470, | |
1054 | 46802, | |
1055 | 50000, | |
1056 | 11796480, | |
1057 | 16843009, | |
1058 | 23592960, | |
1059 | }; | |
1060 | ||
1061 | ||
1062 | const int num_mult_constants_used = | |
1063 | (int)(sizeof multiply_constants_used | |
1064 | / sizeof multiply_constants_used[0]); | |
1065 | ||
1066 | ||
1067 | #define XSIZE (sizeof multiply_constants_used / \ | |
1068 | sizeof multiply_constants_used[0] + 32) / 32 | |
1069 | unsigned multiply_constants_avail[XSIZE]; | |
1070 | #undef XSIZE | |
1071 | ||
1072 | ||
1073 | /* bsearch helper function. */ | |
1074 | static int | |
1075 | compare_constants (const void *key, const void *t) | |
1076 | { | |
1077 | return (*(int*)key) - *((int*)t); | |
1078 | } | |
1079 | ||
1080 | ||
1081 | static int * | |
1082 | find_mult_constants_used (int multiplier) | |
1083 | { | |
1084 | return (int *) bsearch (&multiplier, multiply_constants_used, | |
1085 | num_mult_constants_used, | |
1086 | sizeof multiply_constants_used[0], | |
1087 | compare_constants); | |
1088 | } | |
1089 | ||
1090 | ||
1091 | int num_ops (ExpressionTree *s) | |
1092 | { | |
1093 | int n = 0; | |
1094 | for (int i = 0; i < s->m_num_vals; i++) | |
1095 | { | |
1096 | Expr *e = &s->m_exprs[i]; | |
1097 | if (e->m_op != NULL) | |
1098 | n++; | |
1099 | } | |
1100 | return n; | |
1101 | } | |
1102 | ||
1103 | ||
1104 | #ifdef TILEPRO | |
1105 | bool | |
1106 | tilepro_emit (int multiplier, int num_ops) | |
1107 | { | |
1108 | int abs_multiplier = (multiplier >= 0) ? multiplier : -multiplier; | |
1109 | ||
1110 | /* Keep constants in range [-1024, 1024]. */ | |
1111 | if (abs_multiplier <= 1024) | |
1112 | return true; | |
1113 | ||
1114 | /* Keep constants near powers of two. */ | |
1115 | int prev_pow2 = 1 << (31 - __builtin_clz (abs_multiplier)); | |
1116 | int next_pow2 = prev_pow2 << 1; | |
1117 | ||
1118 | if ((abs_multiplier - prev_pow2 <= 10) | |
1119 | || (next_pow2 - abs_multiplier <= 10)) | |
1120 | return true; | |
1121 | ||
1122 | /* Keep constants near powers of ten. */ | |
1123 | { | |
1124 | long long j = 1; | |
1125 | long long prev_pow10; | |
1126 | long long next_pow10; | |
1127 | ||
1128 | while (((j * 10) < abs_multiplier) | |
1129 | && (j < (j * 10))) | |
1130 | j = j * 10; | |
1131 | ||
1132 | prev_pow10 = j; | |
1133 | next_pow10 = j * 10; | |
1134 | ||
1135 | if ((abs_multiplier - prev_pow10 <= 10) | |
1136 | || (next_pow10 - abs_multiplier <= 10)) | |
1137 | return true; | |
1138 | } | |
1139 | ||
1140 | /* Keep short sequences that have two or fewer ops. */ | |
1141 | if (num_ops <= 2) | |
1142 | return true; | |
1143 | ||
1144 | /* Keep constants that are mostly 0's or mostly 1's. */ | |
1145 | if (__builtin_popcount (multiplier) <= 2 || | |
1146 | __builtin_popcount (multiplier) >= 30) | |
1147 | return true; | |
1148 | ||
1149 | /* Keep constants seen in actual code. */ | |
1150 | if ((find_mult_constants_used (multiplier))) | |
1151 | return true; | |
1152 | ||
1153 | return false; | |
1154 | } | |
1155 | #else | |
1156 | bool | |
1157 | tilegx_emit (long long multiplier, int num_ops) | |
1158 | { | |
1159 | long long abs_multiplier = (multiplier >= 0) ? multiplier : - multiplier; | |
1160 | ||
1161 | /* Keep constants in range [-1024, 1024]. */ | |
1162 | if (abs_multiplier <= 1024) | |
1163 | return true; | |
1164 | ||
1165 | /* Otherwise exclude sequences with four ops. */ | |
1166 | if (num_ops > 3) | |
1167 | return false; | |
1168 | ||
1169 | /* Keep constants near powers of two. */ | |
1170 | { | |
1171 | unsigned long long prev_pow2 = | |
1172 | 1LL << (63 - __builtin_clzll (abs_multiplier)); | |
1173 | unsigned long long next_pow2 = prev_pow2 << 1; | |
1174 | ||
1175 | /* handle overflow case. */ | |
1176 | if (next_pow2 == 0) | |
1177 | { | |
1178 | if (prev_pow2 - abs_multiplier <= 10) | |
1179 | return true; | |
1180 | } | |
1181 | else if ((abs_multiplier - prev_pow2 <= 10) | |
1182 | || (next_pow2 - abs_multiplier <= 10)) | |
1183 | return true; | |
1184 | } | |
1185 | ||
1186 | /* Keep constants near powers of ten. */ | |
1187 | { | |
1188 | long long j = 1; | |
1189 | long long prev_pow10; | |
1190 | long long next_pow10; | |
1191 | ||
1192 | while (((j * 10) < abs_multiplier) | |
1193 | && (j < (INTMAX_MAX / 10))) | |
1194 | j = j * 10; | |
1195 | ||
1196 | prev_pow10 = j; | |
1197 | next_pow10 = (j > (INTMAX_MAX / 10)) ? 0 : j * 10; | |
1198 | ||
1199 | if ((abs_multiplier - prev_pow10 <= 100) | |
1200 | || (next_pow10 | |
1201 | && (next_pow10 - abs_multiplier <= 100))) | |
1202 | return true; | |
1203 | } | |
1204 | ||
1205 | if (num_ops <= 2) | |
1206 | return true; | |
1207 | ||
1208 | /* Keep constants that are mostly 0's or mostly 1's. */ | |
1209 | if (__builtin_popcountll (multiplier) <= 2 || | |
1210 | __builtin_popcountll (multiplier) >= 62) | |
1211 | return true; | |
1212 | ||
1213 | /* Keep constants seen in actual code. */ | |
1214 | if (find_mult_constants_used (multiplier)) | |
1215 | return true; | |
1216 | ||
1217 | return false; | |
1218 | } | |
1219 | #endif | |
1220 | ||
1221 | ||
1222 | int | |
1223 | main () | |
1224 | { | |
1225 | ExpressionTreeMap best_solution; | |
1226 | ExpressionTree s; | |
1227 | ||
1228 | #ifdef TILEPRO | |
1229 | printf ("/* Constant multiply table for TILEPro.\n"); | |
1230 | #else | |
1231 | printf ("/* Constant multiply table for TILE-Gx.\n"); | |
1232 | #endif | |
7adcbafe | 1233 | printf (" Copyright (C) 2011-2022 Free Software Foundation, Inc.\n"); |
dd552284 WL |
1234 | printf (" Contributed by Walter Lee (walt@tilera.com)\n"); |
1235 | printf ("\n"); | |
1236 | printf (" This file is part of GCC.\n"); | |
1237 | printf ("\n"); | |
1238 | printf (" GCC is free software; you can redistribute it and/or modify it\n"); | |
1239 | printf (" under the terms of the GNU General Public License as published\n"); | |
1240 | printf (" by the Free Software Foundation; either version 3, or (at your\n"); | |
1241 | printf (" option) any later version.\n"); | |
1242 | printf ("\n"); | |
1243 | printf (" GCC is distributed in the hope that it will be useful, but WITHOUT\n"); | |
1244 | printf (" ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n"); | |
1245 | printf (" or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public\n"); | |
1246 | printf (" License for more details.\n"); | |
1247 | printf ("\n"); | |
1248 | printf (" You should have received a copy of the GNU General Public License\n"); | |
1249 | printf (" along with GCC; see the file COPYING3. If not see\n"); | |
1250 | printf (" <http://www.gnu.org/licenses/>. */\n"); | |
1251 | printf ("\n"); | |
b61d924d AM |
1252 | printf ("/* Note this file is auto-generated from gen-mul-tables.cc.\n"); |
1253 | printf (" Make any required changes there. */\n"); | |
1254 | printf ("\n"); | |
27e5d7c7 IB |
1255 | printf ("#define IN_TARGET_CODE 1\n"); |
1256 | printf ("\n"); | |
dd552284 WL |
1257 | printf ("#include \"config.h\"\n"); |
1258 | printf ("#include \"system.h\"\n"); | |
1259 | printf ("#include \"coretypes.h\"\n"); | |
2f40fae2 | 1260 | printf ("#include \"backend.h\"\n"); |
36566b39 | 1261 | printf ("#include \"rtl.h\"\n"); |
36566b39 | 1262 | printf ("#include \"expmed.h\"\n"); |
dd552284 WL |
1263 | printf ("#include \"%s-multiply.h\"\n\n", ARCH); |
1264 | create_insn_code_compression_table (); | |
1265 | ||
1266 | /* Try all combinations of operators and see what constants we | |
1267 | produce. For each possible constant, record the most efficient | |
1268 | way to generate it. */ | |
1269 | find_sequences (s, best_solution); | |
1270 | ||
1271 | printf ("const struct %s_multiply_insn_seq " | |
1272 | "%s_multiply_insn_seq_table[] = {\n", | |
1273 | ARCH, ARCH); | |
1274 | ||
1275 | const char *comma_separator = ""; | |
1276 | ||
1277 | ExpressionTreeMap::iterator i (best_solution.begin ()); | |
1278 | for (; i != best_solution.end (); ++i) | |
1279 | { | |
1280 | ExpressionTree *s = (*i).second; | |
1281 | const MUL_TYPE n = (*i).first; | |
1282 | ||
1283 | if (n == 0 || n == 1) | |
1284 | { | |
1285 | /* Both of these require zero operations, so these entries | |
1286 | are bogus and should never be used. */ | |
1287 | continue; | |
1288 | } | |
1289 | ||
1290 | /* Prune the list of entries to keep the table to a reasonable | |
1291 | size. */ | |
1292 | #ifdef TILEPRO | |
1293 | if (!tilepro_emit (n, num_ops (s))) | |
1294 | continue; | |
1295 | #else | |
1296 | if (!tilegx_emit (n, num_ops (s))) | |
1297 | continue; | |
1298 | #endif | |
1299 | ||
1300 | printf ("%s", comma_separator); | |
1301 | ||
1302 | #ifdef TILEPRO | |
1303 | const MUL_TYPE int_min = INT32_MIN; | |
1304 | #else | |
1305 | const MUL_TYPE int_min = INT64_MIN; | |
1306 | #endif | |
1307 | if (n == int_min) | |
1308 | { | |
1309 | /* Handle C's woeful lack of unary negation. Without this, | |
1310 | printing out INT_MIN in decimal will yield an unsigned | |
1311 | int which could generate a compiler warning. */ | |
1312 | #ifdef TILEPRO | |
1313 | printf (" {%d - 1 /* 0x%x */ ,\n {", n + 1, | |
1314 | (unsigned) n); | |
1315 | #else | |
1316 | printf (" {%lldll - 1 /* 0x%llx */ ,\n {", n + 1, | |
1317 | (unsigned MUL_TYPE) n); | |
1318 | #endif | |
1319 | } | |
1320 | else | |
1321 | { | |
1322 | #ifdef TILEPRO | |
1323 | printf (" {%d /* 0x%x */ ,\n {", n, (unsigned) n); | |
1324 | #else | |
1325 | printf (" {%lldll /* 0x%llx */ ,\n {", n, (unsigned MUL_TYPE) n); | |
1326 | #endif | |
1327 | } | |
1328 | ||
1329 | bool first = true; | |
1330 | for (int j = 0; j < s->m_num_vals; j++) | |
1331 | { | |
1332 | Expr *e = &s->m_exprs[j]; | |
1333 | ||
1334 | const Operator *op = e->m_op; | |
1335 | if (op == NULL) | |
1336 | continue; | |
1337 | ||
1338 | char buf[1024]; | |
1339 | snprintf (buf, sizeof buf, "%s{%d, %d, %d}%s", | |
1340 | first ? "" : " ", | |
1341 | op->m_top_index, | |
1342 | e->m_lhs, e->m_rhs, (j + 1) == s->m_num_vals ? "}" : ","); | |
1343 | ||
1344 | char opnd0[10]; | |
1345 | if (e->m_lhs) | |
1346 | snprintf (opnd0, sizeof opnd0, "r%d", e->m_lhs); | |
1347 | else | |
1348 | snprintf (opnd0, sizeof opnd0, "zero"); | |
1349 | ||
1350 | printf ("%s\t\t\t/* %s r%d, %s, %s%d */\n", | |
1351 | buf, op->m_name, j, opnd0, | |
1352 | op->is_unary () ? "" : "r", e->m_rhs); | |
1353 | ||
1354 | first = false; | |
1355 | } | |
1356 | printf (" }"); | |
1357 | comma_separator = ",\n"; | |
1358 | } | |
1359 | ||
1360 | printf ("\n};\n\n"); | |
1361 | printf ("const int %s_multiply_insn_seq_table_size =\n" | |
1362 | " (int) (sizeof %s_multiply_insn_seq_table\n" | |
1363 | " / sizeof %s_multiply_insn_seq_table[0]);\n", | |
1364 | ARCH, ARCH, ARCH); | |
1365 | ||
1366 | return EXIT_SUCCESS; | |
1367 | } |