]>
Commit | Line | Data |
---|---|---|
5fd1486c | 1 | /* brig-function.cc -- declaration of brig_function class. |
a5544970 | 2 | Copyright (C) 2016-2019 Free Software Foundation, Inc. |
5fd1486c PJ |
3 | Contributed by Pekka Jaaskelainen <pekka.jaaskelainen@parmance.com> |
4 | for General Processor Tech. | |
5 | ||
6 | This file is part of GCC. | |
7 | ||
8 | GCC is free software; you can redistribute it and/or modify it under | |
9 | the terms of the GNU General Public License as published by the Free | |
10 | Software Foundation; either version 3, or (at your option) any later | |
11 | version. | |
12 | ||
13 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY | |
14 | WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
15 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
16 | for more details. | |
17 | ||
18 | You should have received a copy of the GNU General Public License | |
19 | along with GCC; see the file COPYING3. If not see | |
20 | <http://www.gnu.org/licenses/>. */ | |
21 | ||
22 | #include <sstream> | |
23 | #include <iomanip> | |
24 | ||
25 | #include "brig-function.h" | |
26 | #include "stringpool.h" | |
27 | #include "tree-iterator.h" | |
28 | #include "toplev.h" | |
29 | #include "gimplify.h" | |
30 | #include "gimple-expr.h" | |
31 | #include "print-tree.h" | |
32 | #include "hsa-brig-format.h" | |
33 | #include "stor-layout.h" | |
34 | #include "diagnostic-core.h" | |
35 | #include "brig-code-entry-handler.h" | |
36 | #include "brig-machine.h" | |
37 | #include "brig-util.h" | |
38 | #include "phsa.h" | |
39 | #include "tree-pretty-print.h" | |
40 | #include "dumpfile.h" | |
f0622a50 | 41 | #include "profile-count.h" |
5fd1486c PJ |
42 | #include "tree-cfg.h" |
43 | #include "errors.h" | |
44 | #include "function.h" | |
45 | #include "brig-to-generic.h" | |
46 | #include "brig-builtins.h" | |
080dc243 PJ |
47 | #include "options.h" |
48 | #include "fold-const.h" | |
49 | #include "target.h" | |
50 | #include "builtins.h" | |
51 | ||
52 | brig_function::builtin_map brig_function::s_custom_builtins; | |
5fd1486c PJ |
53 | |
54 | brig_function::brig_function (const BrigDirectiveExecutable *exec, | |
55 | brig_to_generic *parent) | |
56 | : m_brig_def (exec), m_is_kernel (false), m_is_finished (false), m_name (""), | |
57 | m_current_bind_expr (NULL_TREE), m_func_decl (NULL_TREE), | |
58 | m_context_arg (NULL_TREE), m_group_base_arg (NULL_TREE), | |
59 | m_private_base_arg (NULL_TREE), m_ret_value (NULL_TREE), | |
60 | m_next_kernarg_offset (0), m_kernarg_max_align (0), | |
d4b7f2ee PJ |
61 | m_ret_value_brig_var (NULL), m_has_barriers (false), m_has_allocas (false), |
62 | m_has_function_calls_with_barriers (false), m_calls_analyzed (false), | |
63 | m_is_wg_function (false), m_has_unexpanded_dp_builtins (false), | |
64 | m_generating_arg_block (false), m_parent (parent) | |
5fd1486c PJ |
65 | { |
66 | memset (m_regs, 0, | |
67 | BRIG_2_TREE_HSAIL_TOTAL_REG_COUNT * sizeof (BrigOperandRegister *)); | |
68 | memset (&m_descriptor, 0, sizeof (phsa_descriptor)); | |
080dc243 PJ |
69 | |
70 | if (s_custom_builtins.size () > 0) return; | |
71 | ||
72 | /* Populate the builtin index. */ | |
73 | #undef DEF_HSAIL_ATOMIC_BUILTIN | |
74 | #undef DEF_HSAIL_CVT_ZEROI_SAT_BUILTIN | |
75 | #undef DEF_HSAIL_INTR_BUILTIN | |
76 | #undef DEF_HSAIL_SAT_BUILTIN | |
77 | #undef DEF_HSAIL_BUILTIN | |
78 | #define DEF_HSAIL_BUILTIN(ENUM, HSAIL_OPCODE, HSAIL_TYPE, NAME, TYPE, ATTRS) \ | |
79 | s_custom_builtins[std::make_pair (HSAIL_OPCODE, HSAIL_TYPE)] \ | |
80 | = builtin_decl_explicit (ENUM); | |
81 | ||
82 | #include "brig-builtins.def" | |
5fd1486c PJ |
83 | } |
84 | ||
85 | brig_function::~brig_function () | |
86 | { | |
87 | for (size_t i = 0; i < BRIG_2_TREE_HSAIL_TOTAL_REG_COUNT; ++i) | |
88 | { | |
89 | if (m_regs[i] != NULL) | |
90 | { | |
91 | delete m_regs[i]; | |
92 | m_regs[i] = NULL; | |
93 | } | |
94 | } | |
95 | } | |
96 | ||
97 | /* Returns a GENERIC label with the given name in the given function. | |
98 | Creates it, if not yet found. */ | |
99 | ||
100 | tree | |
101 | brig_function::label (const std::string &name) | |
102 | { | |
103 | label_index::const_iterator i = m_label_index.find (name); | |
104 | if (i == m_label_index.end ()) | |
105 | { | |
106 | tree name_identifier | |
107 | = get_identifier_with_length (name.c_str (), name.size ()); | |
108 | ||
109 | tree label_decl = build_decl (UNKNOWN_LOCATION, LABEL_DECL, | |
110 | name_identifier, void_type_node); | |
111 | ||
112 | DECL_CONTEXT (label_decl) = m_func_decl; | |
113 | DECL_ARTIFICIAL (label_decl) = 0; | |
114 | ||
115 | m_label_index[name] = label_decl; | |
116 | return label_decl; | |
117 | } | |
118 | else | |
119 | return (*i).second; | |
120 | } | |
121 | ||
122 | /* Record an argument variable for later use. This includes both local | |
123 | variables inside arg blocks and incoming function arguments. */ | |
124 | ||
125 | void | |
126 | brig_function::add_arg_variable (const BrigDirectiveVariable *brigVar, | |
127 | tree treeDecl) | |
128 | { | |
129 | m_arg_variables[brigVar] = treeDecl; | |
130 | } | |
131 | ||
132 | tree | |
133 | brig_function::arg_variable (const BrigDirectiveVariable *var) const | |
134 | { | |
135 | variable_index::const_iterator i = m_arg_variables.find (var); | |
136 | if (i == m_arg_variables.end ()) | |
137 | return NULL_TREE; | |
138 | else | |
139 | return (*i).second; | |
140 | } | |
141 | ||
142 | /* Appends a new kernel argument descriptor for the current kernel's | |
143 | arg space. */ | |
144 | ||
145 | void | |
146 | brig_function::append_kernel_arg (const BrigDirectiveVariable *var, size_t size, | |
147 | size_t alignment) | |
148 | { | |
149 | gcc_assert (m_func_decl != NULL_TREE); | |
150 | gcc_assert (m_is_kernel); | |
151 | ||
152 | size_t align_padding = m_next_kernarg_offset % alignment == 0 ? | |
153 | 0 : (alignment - m_next_kernarg_offset % alignment); | |
154 | m_next_kernarg_offset += align_padding; | |
155 | m_kernarg_offsets[var] = m_next_kernarg_offset; | |
156 | m_next_kernarg_offset += size; | |
157 | ||
158 | m_kernarg_max_align | |
159 | = m_kernarg_max_align < alignment ? alignment : m_kernarg_max_align; | |
160 | } | |
161 | ||
162 | size_t | |
163 | brig_function::kernel_arg_offset (const BrigDirectiveVariable *var) const | |
164 | { | |
165 | var_offset_table::const_iterator i = m_kernarg_offsets.find (var); | |
166 | gcc_assert (i != m_kernarg_offsets.end ()); | |
167 | return (*i).second; | |
168 | } | |
169 | ||
170 | /* Add work-item ID variables to the beginning of the kernel function | |
171 | which can be used for address computation as kernel dispatch packet | |
172 | instructions can be expanded to GENERIC nodes referring to them. */ | |
173 | ||
174 | void | |
175 | brig_function::add_id_variables () | |
176 | { | |
177 | tree bind_expr = m_current_bind_expr; | |
178 | tree stmts = BIND_EXPR_BODY (bind_expr); | |
179 | ||
180 | /* Initialize the WG limits and local ids. */ | |
080dc243 | 181 | m_kernel_entry = tsi_start (stmts); |
5fd1486c PJ |
182 | |
183 | for (int i = 0; i < 3; ++i) | |
184 | { | |
185 | char dim_char = (char) ((int) 'x' + i); | |
186 | ||
187 | /* The local sizes are limited to 16b values, but let's still use 32b | |
188 | to avoid unnecessary casts (the ID functions are 32b). */ | |
189 | m_local_id_vars[i] | |
190 | = add_local_variable (std::string ("__local_") + dim_char, | |
080dc243 | 191 | long_long_integer_type_node); |
5fd1486c PJ |
192 | |
193 | tree workitemid_call | |
194 | = call_builtin (builtin_decl_explicit (BUILT_IN_HSAIL_WORKITEMID), 2, | |
195 | uint32_type_node, uint32_type_node, | |
196 | build_int_cst (uint32_type_node, i), ptr_type_node, | |
197 | m_context_arg); | |
198 | ||
199 | tree id_init = build2 (MODIFY_EXPR, TREE_TYPE (m_local_id_vars[i]), | |
080dc243 PJ |
200 | m_local_id_vars[i], |
201 | convert (TREE_TYPE (m_local_id_vars[i]), | |
202 | workitemid_call)); | |
5fd1486c | 203 | |
080dc243 | 204 | append_statement (id_init); |
5fd1486c PJ |
205 | |
206 | m_cur_wg_size_vars[i] | |
207 | = add_local_variable (std::string ("__cur_wg_size_") + dim_char, | |
080dc243 | 208 | long_long_integer_type_node); |
5fd1486c | 209 | |
080dc243 PJ |
210 | tree cwgz_call; |
211 | if (flag_assume_phsa) | |
212 | { | |
213 | tree_stl_vec operands | |
214 | = tree_stl_vec (1, build_int_cst (uint32_type_node, i)); | |
215 | cwgz_call | |
216 | = expand_or_call_builtin (BRIG_OPCODE_CURRENTWORKGROUPSIZE, | |
217 | BRIG_TYPE_U32, uint32_type_node, | |
218 | operands); | |
219 | } | |
220 | else | |
221 | cwgz_call = call_builtin | |
222 | (builtin_decl_explicit (BUILT_IN_HSAIL_CURRENTWORKGROUPSIZE), | |
223 | 2, uint32_type_node, uint32_type_node, | |
224 | build_int_cst (uint32_type_node, i), ptr_type_node, m_context_arg); | |
5fd1486c PJ |
225 | |
226 | tree limit_init = build2 (MODIFY_EXPR, TREE_TYPE (m_cur_wg_size_vars[i]), | |
080dc243 PJ |
227 | m_cur_wg_size_vars[i], |
228 | convert (TREE_TYPE (m_cur_wg_size_vars[i]), | |
229 | cwgz_call)); | |
5fd1486c | 230 | |
080dc243 | 231 | append_statement (limit_init); |
5fd1486c PJ |
232 | |
233 | m_wg_id_vars[i] | |
234 | = add_local_variable (std::string ("__workgroupid_") + dim_char, | |
235 | uint32_type_node); | |
236 | ||
080dc243 PJ |
237 | tree wgid_call; |
238 | if (flag_assume_phsa) | |
239 | { | |
240 | tree_stl_vec operands | |
241 | = tree_stl_vec (1, build_int_cst (uint32_type_node, i)); | |
242 | wgid_call | |
243 | = expand_or_call_builtin (BRIG_OPCODE_WORKGROUPID, BRIG_TYPE_U32, | |
244 | uint32_type_node, operands); | |
245 | } | |
246 | else | |
247 | wgid_call | |
248 | = call_builtin (builtin_decl_explicit (BUILT_IN_HSAIL_WORKGROUPID), | |
249 | 2, uint32_type_node, uint32_type_node, | |
250 | build_int_cst (uint32_type_node, i), ptr_type_node, | |
251 | m_context_arg); | |
5fd1486c PJ |
252 | |
253 | tree wgid_init = build2 (MODIFY_EXPR, TREE_TYPE (m_wg_id_vars[i]), | |
254 | m_wg_id_vars[i], wgid_call); | |
255 | ||
080dc243 | 256 | append_statement (wgid_init); |
5fd1486c PJ |
257 | |
258 | m_wg_size_vars[i] | |
259 | = add_local_variable (std::string ("__workgroupsize_") + dim_char, | |
260 | uint32_type_node); | |
261 | ||
080dc243 PJ |
262 | tree wgsize_call; |
263 | if (flag_assume_phsa) | |
264 | { | |
265 | tree_stl_vec operands | |
266 | = tree_stl_vec (1, build_int_cst (uint32_type_node, i)); | |
267 | wgsize_call | |
268 | = expand_or_call_builtin (BRIG_OPCODE_WORKGROUPSIZE, BRIG_TYPE_U32, | |
269 | uint32_type_node, operands); | |
270 | } | |
271 | else | |
272 | wgsize_call | |
273 | = call_builtin (builtin_decl_explicit (BUILT_IN_HSAIL_WORKGROUPSIZE), | |
274 | 2, uint32_type_node, uint32_type_node, | |
275 | build_int_cst (uint32_type_node, i), ptr_type_node, | |
276 | m_context_arg); | |
5fd1486c PJ |
277 | |
278 | tree wgsize_init = build2 (MODIFY_EXPR, TREE_TYPE (m_wg_size_vars[i]), | |
279 | m_wg_size_vars[i], wgsize_call); | |
280 | ||
080dc243 | 281 | append_statement (wgsize_init); |
5fd1486c PJ |
282 | |
283 | m_grid_size_vars[i] | |
284 | = add_local_variable (std::string ("__gridsize_") + dim_char, | |
285 | uint32_type_node); | |
286 | ||
287 | tree gridsize_call | |
288 | = call_builtin (builtin_decl_explicit (BUILT_IN_HSAIL_GRIDSIZE), 2, | |
289 | uint32_type_node, uint32_type_node, | |
290 | build_int_cst (uint32_type_node, i), ptr_type_node, | |
291 | m_context_arg); | |
292 | ||
293 | tree gridsize_init = build2 (MODIFY_EXPR, TREE_TYPE (m_grid_size_vars[i]), | |
294 | m_grid_size_vars[i], gridsize_call); | |
295 | ||
080dc243 PJ |
296 | append_statement (gridsize_init); |
297 | ||
298 | m_abs_id_base_vars[i] | |
299 | = add_local_variable (std::string ("__abs_id_base_") + dim_char, | |
300 | long_long_integer_type_node); | |
301 | ||
302 | m_abs_id_vars[i] | |
303 | = add_local_variable (std::string ("__abs_id_") + dim_char, | |
304 | long_long_integer_type_node); | |
305 | ||
306 | tree abs_id_base | |
307 | = build2 (MULT_EXPR, long_long_integer_type_node, | |
308 | convert (long_long_integer_type_node, m_wg_id_vars[i]), | |
309 | convert (long_long_integer_type_node, m_wg_size_vars[i])); | |
310 | tree abs_id | |
311 | = build2 (PLUS_EXPR, long_long_integer_type_node, abs_id_base, | |
312 | convert (long_long_integer_type_node, m_local_id_vars[i])); | |
313 | ||
314 | tree abs_id_base_init | |
315 | = build2 (MODIFY_EXPR, TREE_TYPE (m_abs_id_base_vars[i]), | |
316 | m_abs_id_base_vars[i], abs_id_base); | |
317 | append_statement (abs_id_base_init); | |
318 | ||
319 | tree abs_id_init = build2 (MODIFY_EXPR, | |
320 | TREE_TYPE (m_abs_id_vars[i]), | |
321 | m_abs_id_vars[i], abs_id); | |
322 | append_statement (abs_id_init); | |
5fd1486c | 323 | } |
5fd1486c PJ |
324 | } |
325 | ||
326 | /* Creates a new local variable with the given NAME and given GENERIC | |
327 | TYPE. */ | |
328 | ||
329 | tree | |
330 | brig_function::add_local_variable (std::string name, tree type) | |
331 | { | |
332 | tree name_identifier | |
333 | = get_identifier_with_length (name.c_str (), name.size ()); | |
334 | tree variable | |
335 | = build_decl (UNKNOWN_LOCATION, VAR_DECL, name_identifier, type); | |
336 | ||
337 | DECL_NONLOCAL (variable) = 0; | |
338 | TREE_ADDRESSABLE (variable) = 0; | |
339 | TREE_STATIC (variable) = 0; | |
340 | TREE_USED (variable) = 1; | |
341 | DECL_ARTIFICIAL (variable) = 0; | |
342 | ||
343 | tree bind_expr = DECL_SAVED_TREE (m_func_decl); | |
344 | ||
345 | DECL_CONTEXT (variable) = m_func_decl; | |
346 | ||
347 | DECL_CHAIN (variable) = BIND_EXPR_VARS (bind_expr); | |
348 | BIND_EXPR_VARS (bind_expr) = variable; | |
349 | return variable; | |
350 | } | |
351 | ||
dc03239c HL |
352 | /* Return tree type for an HSA register. |
353 | ||
354 | The tree type can be anything (scalar, vector, int, float, etc.) | |
355 | but its size is guaranteed to match the HSA register size. | |
356 | ||
357 | HSA registers are untyped but we select a type based on their use | |
358 | to reduce (sometimes unoptimizable) VIEW_CONVERT_EXPR nodes (seems | |
359 | to occur when use or def reaches over current BB). */ | |
360 | ||
361 | tree | |
362 | brig_function::get_tree_type_for_hsa_reg (const BrigOperandRegister *reg) const | |
363 | { | |
364 | size_t reg_size = gccbrig_reg_size (reg); | |
365 | ||
366 | /* The default type. */ | |
367 | tree type = build_nonstandard_integer_type (reg_size, true); | |
368 | ||
369 | if (m_parent->m_fn_regs_use_index.count (m_name) == 0) | |
370 | return type; | |
371 | ||
372 | const regs_use_index &index = m_parent->m_fn_regs_use_index[m_name]; | |
373 | size_t reg_id = gccbrig_hsa_reg_id (*reg); | |
374 | if (index.count (reg_id) == 0) | |
375 | return type; | |
376 | ||
377 | const reg_use_info &info = index.find (reg_id)->second; | |
378 | std::vector<std::pair<tree, size_t> >::const_iterator it | |
379 | = info.m_type_refs.begin (); | |
380 | std::vector<std::pair<tree, size_t> >::const_iterator it_end | |
381 | = info.m_type_refs.end (); | |
382 | size_t max_refs_as_type_count = 0; | |
383 | for (; it != it_end; it++) | |
384 | { | |
385 | size_t type_bit_size = int_size_in_bytes (it->first) * BITS_PER_UNIT; | |
386 | if (type_bit_size != reg_size) continue; | |
387 | if (it->second > max_refs_as_type_count) | |
388 | { | |
389 | type = it->first; | |
390 | max_refs_as_type_count = it->second; | |
391 | } | |
392 | } | |
393 | ||
394 | return type; | |
395 | } | |
396 | ||
5fd1486c PJ |
397 | /* Returns a DECL_VAR for the given HSAIL operand register. |
398 | If it has not been created yet for the function being generated, | |
dc03239c | 399 | creates it as a type determined by analysis phase. */ |
5fd1486c PJ |
400 | |
401 | tree | |
402 | brig_function::get_m_var_declfor_reg (const BrigOperandRegister *reg) | |
403 | { | |
dc03239c | 404 | size_t offset = gccbrig_hsa_reg_id (*reg); |
5fd1486c PJ |
405 | |
406 | reg_decl_index_entry *regEntry = m_regs[offset]; | |
407 | if (regEntry == NULL) | |
408 | { | |
409 | size_t reg_size = gccbrig_reg_size (reg); | |
410 | tree type; | |
411 | if (reg_size > 1) | |
dc03239c | 412 | type = get_tree_type_for_hsa_reg (reg); |
5fd1486c PJ |
413 | else |
414 | type = boolean_type_node; | |
415 | ||
416 | /* Drop the const qualifier so we do not end up with a read only | |
417 | register variable which cannot be written to later. */ | |
418 | tree nonconst_type = build_type_variant (type, false, false); | |
419 | ||
420 | regEntry = new reg_decl_index_entry; | |
421 | ||
422 | regEntry->m_var_decl | |
423 | = add_local_variable (gccbrig_reg_name (reg), nonconst_type); | |
424 | m_regs[offset] = regEntry; | |
425 | } | |
426 | return regEntry->m_var_decl; | |
427 | } | |
428 | ||
429 | /* Builds a work-item do..while loop for a single DIM. HEADER_ENTRY is | |
430 | a statement after which the iteration variables should be initialized and | |
431 | the loop body starts. BRANCH_AFTER is the statement after which the loop | |
432 | predicate check and the back edge goto will be appended. */ | |
433 | ||
434 | void | |
435 | brig_function::add_wi_loop (int dim, tree_stmt_iterator *header_entry, | |
436 | tree_stmt_iterator *branch_after) | |
437 | { | |
438 | tree ivar = m_local_id_vars[dim]; | |
080dc243 PJ |
439 | tree abs_id_base_var = m_abs_id_base_vars[dim]; |
440 | tree abs_id_var = m_abs_id_vars[dim]; | |
5fd1486c PJ |
441 | tree ivar_max = m_cur_wg_size_vars[dim]; |
442 | tree_stmt_iterator entry = *header_entry; | |
443 | ||
444 | /* TODO: this is not a parallel loop as we share the "register variables" | |
445 | across work-items. Should create a copy of them per WI instance. That | |
446 | is, declare temporaries for new definitions inside the loop body, not at | |
447 | function scope. */ | |
448 | ||
449 | tree ivar_init = build2 (MODIFY_EXPR, TREE_TYPE (ivar), ivar, | |
450 | build_zero_cst (TREE_TYPE (ivar))); | |
451 | tsi_link_after (&entry, ivar_init, TSI_NEW_STMT); | |
452 | ||
080dc243 PJ |
453 | tree abs_id_var_init = build2 (MODIFY_EXPR, TREE_TYPE (abs_id_var), |
454 | abs_id_var, | |
455 | convert (TREE_TYPE (abs_id_var), | |
456 | abs_id_base_var)); | |
457 | tsi_link_after (&entry, abs_id_var_init, TSI_NEW_STMT); | |
458 | ||
5fd1486c PJ |
459 | tree loop_body_label |
460 | = label (std::string ("__wi_loop_") + (char) ((int) 'x' + dim)); | |
461 | tree loop_body_label_stmt = build_stmt (LABEL_EXPR, loop_body_label); | |
462 | ||
463 | tsi_link_after (&entry, loop_body_label_stmt, TSI_NEW_STMT); | |
464 | ||
465 | if (m_has_unexpanded_dp_builtins) | |
466 | { | |
080dc243 PJ |
467 | if (!flag_assume_phsa) |
468 | { | |
469 | tree id_set_builtin | |
470 | = builtin_decl_explicit (BUILT_IN_HSAIL_SETWORKITEMID); | |
471 | /* Set the local ID to the current wi-loop iteration variable value | |
472 | to ensure the builtins see the correct values. */ | |
473 | tree id_set_call | |
474 | = call_builtin (id_set_builtin, 3, | |
475 | void_type_node, uint32_type_node, | |
476 | build_int_cst (uint32_type_node, dim), | |
477 | uint32_type_node, convert (uint32_type_node, ivar), | |
478 | ptr_type_node, m_context_arg); | |
479 | tsi_link_after (&entry, id_set_call, TSI_NEW_STMT); | |
480 | } | |
481 | else | |
482 | { | |
483 | tree ptr_type = build_pointer_type (uint32_type_node); | |
484 | tree ctx = build2 (MEM_REF, uint32_type_node, m_context_arg, | |
485 | build_int_cst (ptr_type, dim * 4)); | |
486 | tree assign = build2 (MODIFY_EXPR, uint32_type_node, ctx, | |
487 | convert (uint32_type_node, ivar)); | |
488 | ||
489 | tsi_link_after (&entry, assign, TSI_NEW_STMT); | |
490 | } | |
5fd1486c PJ |
491 | } |
492 | ||
493 | /* Increment the WI iteration variable. */ | |
494 | tree incr = build2 (PREINCREMENT_EXPR, TREE_TYPE (ivar), ivar, | |
495 | build_one_cst (TREE_TYPE (ivar))); | |
496 | ||
497 | tsi_link_after (branch_after, incr, TSI_NEW_STMT); | |
498 | ||
080dc243 PJ |
499 | /* ...and the abs id variable. */ |
500 | tree abs_id_incr = build2 (PREINCREMENT_EXPR, TREE_TYPE (abs_id_var), | |
501 | abs_id_var, | |
502 | build_one_cst (TREE_TYPE (abs_id_var))); | |
503 | ||
504 | tsi_link_after (branch_after, abs_id_incr, TSI_NEW_STMT); | |
505 | ||
5fd1486c PJ |
506 | /* Append the predicate check with the back edge goto. */ |
507 | tree condition = build2 (LT_EXPR, TREE_TYPE (ivar), ivar, ivar_max); | |
508 | tree target_goto = build1 (GOTO_EXPR, void_type_node, loop_body_label); | |
509 | tree if_stmt | |
510 | = build3 (COND_EXPR, void_type_node, condition, target_goto, NULL_TREE); | |
511 | tsi_link_after (branch_after, if_stmt, TSI_NEW_STMT); | |
512 | } | |
513 | ||
514 | /* Recursively analyzes the function and its callees for barrier usage. */ | |
515 | ||
516 | void | |
517 | brig_function::analyze_calls () | |
518 | { | |
519 | if (m_calls_analyzed) | |
520 | return; | |
521 | ||
522 | /* Set this early to not get stuck in case of recursive call graphs. | |
523 | This is safe because if the function calls itself, either the function | |
524 | has barrier calls which implies a call to a function with barrier calls, | |
525 | or it doesn't in which case the result depends on the later called | |
526 | functions. */ | |
527 | m_calls_analyzed = true; | |
528 | ||
529 | for (size_t i = 0; i < m_called_functions.size (); ++i) | |
530 | { | |
531 | tree f = m_called_functions[i]; | |
532 | brig_function *called_f = m_parent->get_finished_function (f); | |
533 | if (called_f == NULL) | |
534 | { | |
535 | /* Unfinished function (only declaration within the set of BRIGs) | |
536 | found. Cannot finish the CG analysis. Have to assume it does have | |
537 | a barrier for safety. */ | |
538 | m_has_function_calls_with_barriers = true; | |
539 | m_has_unexpanded_dp_builtins = true; | |
540 | break; | |
541 | } | |
542 | called_f->analyze_calls (); | |
543 | /* We can assume m_has_barriers has been correctly set during the | |
544 | construction of the function decl. No need to reanalyze it. */ | |
545 | m_has_function_calls_with_barriers |= called_f->m_has_barriers; | |
546 | ||
547 | /* If the function or any of its called functions has dispatch | |
548 | packet builtin calls that require the local id, we need to | |
549 | set the local id to the context in the work item loop before | |
550 | the functions are called. If we analyze the opposite, these | |
551 | function calls can be omitted. */ | |
552 | m_has_unexpanded_dp_builtins |= called_f->m_has_unexpanded_dp_builtins; | |
553 | } | |
554 | } | |
555 | ||
556 | /* Tries to convert the current kernel to a work-group function that executes | |
557 | all work-items using loops. Returns true in case the conversion was | |
558 | successful. */ | |
559 | ||
560 | bool | |
561 | brig_function::convert_to_wg_function () | |
562 | { | |
563 | if (!m_calls_analyzed) | |
564 | analyze_calls (); | |
565 | ||
566 | if (m_has_barriers || m_has_function_calls_with_barriers) | |
567 | return false; | |
568 | ||
569 | /* The most trivial case: No barriers at all in the kernel. | |
570 | We can create one big work-item loop around the whole kernel. */ | |
571 | tree bind_expr = m_current_bind_expr; | |
572 | tree stmts = BIND_EXPR_BODY (bind_expr); | |
573 | ||
574 | for (int i = 0; i < 3; ++i) | |
575 | { | |
576 | /* The previous loop has added a new label to the end of the function, | |
577 | the next level loop should wrap around it also. */ | |
578 | tree_stmt_iterator function_exit = tsi_last (stmts); | |
579 | add_wi_loop (i, &m_kernel_entry, &function_exit); | |
580 | } | |
581 | ||
582 | m_is_wg_function = true; | |
583 | return false; | |
584 | } | |
585 | ||
586 | /* Emits a kernel description to a special ELF section so it can be | |
587 | utilized by an HSA runtime implementation. The assembly block | |
588 | must be emitted to a statement list of an function, which is given | |
589 | as an argument. Returns the assembly block used to emit the section. */ | |
590 | ||
591 | tree | |
592 | brig_function::emit_metadata (tree stmt_list) | |
593 | { | |
594 | /* Emit an ELF section via an assembly directive that generates a special | |
595 | ELF section for each kernel that contains raw bytes of a descriptor | |
596 | object. This is pretty disgusting, but life is never perfect ;) */ | |
597 | ||
598 | /* Use the original kernel name without the '_' prefix in the section name. */ | |
599 | std::string kern_name = m_is_kernel ? m_name.substr (1) : m_name; | |
600 | ||
601 | std::ostringstream strstr; | |
602 | strstr << std::endl | |
603 | << ".pushsection " << PHSA_DESC_SECTION_PREFIX << kern_name | |
604 | << std::endl | |
605 | << "\t.p2align 1, 1, 1" << std::endl | |
606 | << "\t.byte "; | |
607 | ||
608 | for (size_t i = 0; i < sizeof (phsa_descriptor); ++i) | |
609 | { | |
610 | strstr << "0x" << std::setw (2) << std::setfill ('0') << std::hex | |
611 | << (unsigned) *((unsigned char *) &m_descriptor + i); | |
612 | if (i + 1 < sizeof (phsa_descriptor)) | |
613 | strstr << ", "; | |
614 | } | |
615 | ||
616 | strstr << std::endl << ".popsection" << std::endl << std::endl; | |
617 | ||
618 | tree metadata_asm | |
619 | = build_stmt (ASM_EXPR, | |
620 | build_string (strstr.str ().size (), strstr.str ().c_str ()), | |
621 | NULL_TREE, NULL_TREE, NULL_TREE, NULL_TREE); | |
622 | ||
623 | append_to_statement_list_force (metadata_asm, &stmt_list); | |
624 | return metadata_asm; | |
625 | } | |
626 | ||
627 | /* Emits the kernel launcher function. Also emits the metadata section | |
628 | creation statements in it. | |
629 | ||
630 | The launcher function calls the device-side runtime | |
631 | that runs the kernel for all work-items. In C: | |
632 | ||
633 | void KernelName (void* context, void* group_base_addr) | |
634 | { | |
635 | __hsail_launch_kernel (_KernelName, context, group_base_addr); | |
636 | } | |
637 | ||
638 | or, in case of a successful conversion to a work-group function: | |
639 | ||
640 | void KernelName (void* context, void* group_base_addr) | |
641 | { | |
642 | __hsail_launch_wg_function (_KernelName, context, group_base_addr); | |
643 | } | |
644 | ||
645 | The user/host sees this function as the kernel to call from the | |
646 | outside. The actual kernel generated from HSAIL was named _KernelName. | |
647 | */ | |
648 | ||
649 | tree | |
650 | brig_function::emit_launcher_and_metadata () | |
651 | { | |
652 | /* The original kernel name without the '_' prefix. */ | |
653 | std::string kern_name = m_name.substr (1); | |
654 | ||
655 | tree name_identifier | |
656 | = get_identifier_with_length (kern_name.c_str (), kern_name.size ()); | |
657 | ||
080dc243 PJ |
658 | tree restrict_void_ptr |
659 | = build_qualified_type (build_pointer_type (void_type_node), | |
660 | TYPE_QUAL_RESTRICT); | |
661 | tree restrict_char_ptr | |
662 | = build_qualified_type (build_pointer_type (char_type_node), | |
663 | TYPE_QUAL_RESTRICT); | |
5fd1486c PJ |
664 | tree launcher |
665 | = build_decl (UNKNOWN_LOCATION, FUNCTION_DECL, name_identifier, | |
080dc243 PJ |
666 | build_function_type_list (void_type_node, restrict_void_ptr, |
667 | restrict_char_ptr, NULL_TREE)); | |
5fd1486c PJ |
668 | |
669 | TREE_USED (launcher) = 1; | |
670 | DECL_ARTIFICIAL (launcher) = 1; | |
671 | ||
672 | tree context_arg = build_decl (UNKNOWN_LOCATION, PARM_DECL, | |
080dc243 PJ |
673 | get_identifier ("__context"), |
674 | restrict_void_ptr); | |
5fd1486c PJ |
675 | |
676 | DECL_ARGUMENTS (launcher) = context_arg; | |
080dc243 | 677 | DECL_ARG_TYPE (context_arg) = restrict_void_ptr; |
5fd1486c PJ |
678 | DECL_CONTEXT (context_arg) = launcher; |
679 | TREE_USED (context_arg) = 1; | |
680 | DECL_ARTIFICIAL (context_arg) = 1; | |
681 | ||
682 | tree group_base_addr_arg | |
683 | = build_decl (UNKNOWN_LOCATION, PARM_DECL, | |
080dc243 | 684 | get_identifier ("__group_base_addr"), restrict_char_ptr); |
5fd1486c PJ |
685 | |
686 | chainon (DECL_ARGUMENTS (launcher), group_base_addr_arg); | |
080dc243 | 687 | DECL_ARG_TYPE (group_base_addr_arg) = restrict_char_ptr; |
5fd1486c PJ |
688 | DECL_CONTEXT (group_base_addr_arg) = launcher; |
689 | TREE_USED (group_base_addr_arg) = 1; | |
690 | DECL_ARTIFICIAL (group_base_addr_arg) = 1; | |
691 | ||
692 | tree resdecl | |
693 | = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, void_type_node); | |
694 | ||
695 | DECL_RESULT (launcher) = resdecl; | |
696 | DECL_CONTEXT (resdecl) = launcher; | |
697 | ||
698 | DECL_INITIAL (launcher) = make_node (BLOCK); | |
699 | TREE_USED (DECL_INITIAL (launcher)) = 1; | |
700 | ||
701 | tree stmt_list = alloc_stmt_list (); | |
702 | ||
703 | tree bind_expr = build3 (BIND_EXPR, void_type_node, NULL, stmt_list, NULL); | |
704 | ||
637f3cde | 705 | TREE_STATIC (launcher) = 1; |
5fd1486c PJ |
706 | TREE_PUBLIC (launcher) = 1; |
707 | ||
708 | DECL_SAVED_TREE (launcher) = bind_expr; | |
709 | ||
710 | if (DECL_STRUCT_FUNCTION (launcher) == NULL) | |
711 | push_struct_function (launcher); | |
712 | else | |
713 | push_cfun (DECL_STRUCT_FUNCTION (launcher)); | |
714 | ||
715 | tree kernel_func_ptr = build1 (ADDR_EXPR, ptr_type_node, m_func_decl); | |
716 | ||
717 | tree phsail_launch_kernel_call; | |
718 | ||
d4b7f2ee PJ |
719 | /* Compute the local group segment frame start pointer. */ |
720 | tree group_local_offset_temp | |
721 | = create_tmp_var (uint32_type_node, "group_local_offset"); | |
722 | tree group_local_offset_arg | |
723 | = build2 (MODIFY_EXPR, uint32_type_node, | |
724 | group_local_offset_temp, | |
725 | build_int_cst (uint32_type_node, | |
726 | m_parent->m_module_group_variables.size())); | |
727 | ||
5fd1486c PJ |
728 | /* Emit a launcher depending whether we converted the kernel function to |
729 | a work group function or not. */ | |
730 | if (m_is_wg_function) | |
731 | phsail_launch_kernel_call | |
732 | = call_builtin (builtin_decl_explicit (BUILT_IN_HSAIL_LAUNCH_WG_FUNC), | |
d4b7f2ee | 733 | 4, void_type_node, |
080dc243 PJ |
734 | ptr_type_node, kernel_func_ptr, restrict_void_ptr, |
735 | context_arg, restrict_char_ptr, group_base_addr_arg, | |
d4b7f2ee | 736 | uint32_type_node, group_local_offset_arg); |
5fd1486c PJ |
737 | else |
738 | phsail_launch_kernel_call | |
739 | = call_builtin (builtin_decl_explicit (BUILT_IN_HSAIL_LAUNCH_KERNEL), | |
d4b7f2ee | 740 | 4, void_type_node, |
080dc243 PJ |
741 | ptr_type_node, kernel_func_ptr, restrict_void_ptr, |
742 | context_arg, restrict_char_ptr, group_base_addr_arg, | |
d4b7f2ee | 743 | uint32_type_node, group_local_offset_arg); |
5fd1486c PJ |
744 | |
745 | append_to_statement_list_force (phsail_launch_kernel_call, &stmt_list); | |
746 | ||
747 | emit_metadata (stmt_list); | |
748 | ||
637f3cde PJ |
749 | set_externally_visible (launcher); |
750 | ||
5fd1486c PJ |
751 | return launcher; |
752 | } | |
753 | ||
754 | tree | |
755 | brig_function::append_statement (tree stmt) | |
756 | { | |
757 | gcc_assert (m_func_decl != NULL); | |
758 | ||
759 | tree bind_expr = m_current_bind_expr; | |
760 | tree stmts = BIND_EXPR_BODY (bind_expr); | |
761 | ||
762 | append_to_statement_list_force (stmt, &stmts); | |
763 | return stmt; | |
764 | } | |
765 | ||
766 | /* Creates a new "alloca frame" for the current function by | |
767 | injecting an alloca frame push in the beginning of the function | |
768 | and an alloca frame pop before all function exit points. */ | |
769 | ||
770 | void | |
771 | brig_function::create_alloca_frame () | |
772 | { | |
773 | tree_stmt_iterator entry; | |
774 | ||
775 | /* Adds the alloca push only after the ids have been initialized | |
776 | in case of a kernel function. */ | |
777 | if (m_is_kernel) | |
778 | entry = m_kernel_entry; | |
779 | else | |
780 | { | |
781 | tree bind_expr = m_current_bind_expr; | |
782 | tree stmts = BIND_EXPR_BODY (bind_expr); | |
783 | entry = tsi_start (stmts); | |
784 | } | |
785 | ||
786 | tree push_frame_builtin = builtin_decl_explicit (BUILT_IN_HSAIL_PUSH_FRAME); | |
787 | tree push_frame_call | |
788 | = call_builtin (push_frame_builtin, 1, void_type_node, ptr_type_node, | |
789 | m_context_arg); | |
790 | ||
791 | tsi_link_before (&entry, push_frame_call, TSI_NEW_STMT); | |
792 | ||
793 | tree pop_frame_builtin = builtin_decl_explicit (BUILT_IN_HSAIL_POP_FRAME); | |
794 | ||
795 | do | |
796 | { | |
797 | tree stmt = tsi_stmt (entry); | |
798 | if (TREE_CODE (stmt) == RETURN_EXPR) | |
799 | { | |
800 | tree pop_frame_call | |
801 | = call_builtin (pop_frame_builtin, 1, void_type_node, | |
802 | ptr_type_node, m_context_arg); | |
803 | ||
804 | tsi_link_before (&entry, pop_frame_call, TSI_SAME_STMT); | |
805 | } | |
806 | tsi_next (&entry); | |
807 | } | |
808 | while (!tsi_end_p (entry)); | |
809 | } | |
810 | ||
811 | /* Finishes the currently built function. After calling this, no new | |
812 | statements should be appeneded to the function. */ | |
813 | void | |
814 | brig_function::finish () | |
815 | { | |
816 | append_return_stmt (); | |
817 | ||
818 | /* Currently assume single alloca frame per WG. */ | |
819 | if (m_has_allocas) | |
820 | create_alloca_frame (); | |
821 | } | |
822 | ||
823 | void | |
824 | brig_function::finish_kernel () | |
825 | { | |
826 | /* Kernel functions should have a single exit point. | |
827 | Let's create one. The return instructions should have | |
828 | been converted to branches to this label. */ | |
829 | append_statement (build_stmt (LABEL_EXPR, m_exit_label)); | |
830 | /* Attempt to convert the kernel to a work-group function that | |
831 | executes all work-items of the WG using a loop. */ | |
832 | convert_to_wg_function (); | |
833 | ||
834 | append_return_stmt (); | |
835 | ||
836 | /* Currently assume single alloca frame per WG. */ | |
837 | if (m_has_allocas) | |
838 | create_alloca_frame (); | |
839 | } | |
840 | ||
841 | void | |
842 | brig_function::append_return_stmt () | |
843 | { | |
844 | gcc_assert (m_current_bind_expr != NULL_TREE); | |
845 | tree stmts = BIND_EXPR_BODY (m_current_bind_expr); | |
846 | ||
847 | if (STATEMENT_LIST_TAIL (stmts) == NULL) | |
848 | return; /* Empty function. */ | |
849 | ||
850 | tree last_stmt = tsi_stmt (tsi_last (stmts)); | |
851 | ||
852 | if (TREE_CODE (last_stmt) == RETURN_EXPR) | |
853 | return; | |
854 | ||
855 | if (m_ret_value != NULL_TREE) | |
856 | { | |
857 | tree result_assign | |
858 | = build2 (MODIFY_EXPR, TREE_TYPE (m_ret_value), m_ret_value, | |
859 | m_ret_temp); | |
860 | ||
861 | tree return_expr | |
862 | = build1 (RETURN_EXPR, TREE_TYPE (result_assign), result_assign); | |
863 | append_to_statement_list_force (return_expr, &stmts); | |
864 | } | |
865 | else | |
866 | { | |
867 | tree return_stmt = build_stmt (RETURN_EXPR, NULL); | |
868 | append_to_statement_list_force (return_stmt, &stmts); | |
869 | } | |
870 | } | |
871 | ||
872 | bool | |
873 | brig_function::has_function_scope_var (const BrigBase* var) const | |
874 | { | |
875 | return m_function_scope_vars.find (var) != m_function_scope_vars.end (); | |
876 | } | |
d4b7f2ee PJ |
877 | |
878 | size_t | |
879 | brig_function::group_variable_segment_offset (const std::string &name) const | |
880 | { | |
881 | if (m_local_group_variables.has_variable (name)) | |
882 | return m_local_group_variables.segment_offset (name); | |
883 | ||
884 | gcc_assert (m_parent->m_module_group_variables.has_variable (name)); | |
885 | return m_parent->m_module_group_variables.segment_offset (name); | |
886 | } | |
080dc243 PJ |
887 | |
888 | /* Try to expand the given builtin call to reuse a previously generated | |
889 | variable, if possible. If not, just call the given builtin. | |
890 | BRIG_OPCODE and BRIG_TYPE identify the builtin's BRIG opcode/type, | |
891 | ARITH_TYPE its GENERIC type, and OPERANDS contains the builtin's | |
892 | input operands. */ | |
893 | ||
894 | tree | |
895 | brig_function::expand_or_call_builtin (BrigOpcode16_t brig_opcode, | |
896 | BrigType16_t brig_type, | |
897 | tree arith_type, | |
898 | tree_stl_vec &operands) | |
899 | { | |
900 | if (needs_workitem_context_data (brig_opcode)) | |
901 | m_has_unexpanded_dp_builtins = true; | |
902 | ||
903 | if (can_expand_builtin (brig_opcode)) | |
904 | return expand_builtin (brig_opcode, operands); | |
905 | ||
906 | tree built_in | |
907 | = get_builtin_for_hsa_opcode (arith_type, brig_opcode, brig_type); | |
908 | ||
909 | if (!VECTOR_TYPE_P (TREE_TYPE (TREE_TYPE (built_in))) | |
910 | && arith_type != NULL_TREE && VECTOR_TYPE_P (arith_type) | |
911 | && brig_opcode != BRIG_OPCODE_LERP | |
912 | && brig_opcode != BRIG_OPCODE_PACKCVT | |
913 | && brig_opcode != BRIG_OPCODE_SAD | |
914 | && brig_opcode != BRIG_OPCODE_SADHI) | |
915 | { | |
916 | /* Call the scalar built-in for all elements in the vector. */ | |
917 | tree_stl_vec operand0_elements; | |
918 | if (operands.size () > 0) | |
919 | unpack (operands[0], operand0_elements); | |
920 | ||
921 | tree_stl_vec operand1_elements; | |
922 | if (operands.size () > 1) | |
923 | unpack (operands[1], operand1_elements); | |
924 | ||
925 | tree_stl_vec result_elements; | |
926 | ||
927 | size_t element_count = gccbrig_type_vector_subparts (arith_type); | |
928 | for (size_t i = 0; i < element_count; ++i) | |
929 | { | |
930 | tree_stl_vec call_operands; | |
931 | if (operand0_elements.size () > 0) | |
932 | call_operands.push_back (operand0_elements.at (i)); | |
933 | ||
934 | if (operand1_elements.size () > 0) | |
935 | call_operands.push_back (operand1_elements.at (i)); | |
936 | ||
937 | result_elements.push_back | |
938 | (expand_or_call_builtin (brig_opcode, brig_type, | |
939 | TREE_TYPE (arith_type), | |
940 | call_operands)); | |
941 | } | |
942 | return pack (result_elements); | |
943 | } | |
944 | ||
945 | tree_stl_vec call_operands; | |
946 | tree_stl_vec operand_types; | |
947 | ||
948 | tree arg_type_chain = TYPE_ARG_TYPES (TREE_TYPE (built_in)); | |
949 | ||
950 | for (size_t i = 0; i < operands.size (); ++i) | |
951 | { | |
952 | tree operand_type = TREE_VALUE (arg_type_chain); | |
953 | call_operands.push_back (convert (operand_type, operands[i])); | |
954 | operand_types.push_back (operand_type); | |
955 | arg_type_chain = TREE_CHAIN (arg_type_chain); | |
956 | } | |
957 | ||
958 | if (needs_workitem_context_data (brig_opcode)) | |
959 | { | |
960 | call_operands.push_back (m_context_arg); | |
961 | operand_types.push_back (ptr_type_node); | |
962 | } | |
963 | ||
964 | size_t operand_count = call_operands.size (); | |
965 | ||
966 | call_operands.resize (4, NULL_TREE); | |
967 | operand_types.resize (4, NULL_TREE); | |
968 | for (size_t i = 0; i < operand_count; ++i) | |
969 | call_operands.at (i) = build_resize_convert_view (operand_types.at (i), | |
970 | call_operands.at (i)); | |
971 | ||
972 | tree fnptr = build_fold_addr_expr (built_in); | |
973 | return build_call_array (TREE_TYPE (TREE_TYPE (built_in)), fnptr, | |
974 | operand_count, &call_operands[0]); | |
975 | } | |
976 | ||
977 | /* Instead of calling a built-in function, use a more efficient mechanism | |
978 | such as reuse a previously returned value known to be still valid, or | |
979 | access the work-item context struct directly. This is beneficial especially | |
980 | for the work-item identification related builtins as not having them as | |
981 | unanalyzable black box calls can lead to more easily vectorizable parallel | |
982 | loops for multi work-item work-groups. BRIG_OPCODE identifies the builtin | |
983 | and OPERANDS store the operands. */ | |
984 | ||
985 | tree | |
986 | brig_function::expand_builtin (BrigOpcode16_t brig_opcode, | |
987 | tree_stl_vec &operands) | |
988 | { | |
989 | tree_stl_vec uint32_0 = tree_stl_vec (1, build_int_cst (uint32_type_node, 0)); | |
990 | ||
991 | tree_stl_vec uint32_1 = tree_stl_vec (1, build_int_cst (uint32_type_node, 1)); | |
992 | ||
993 | tree_stl_vec uint32_2 = tree_stl_vec (1, build_int_cst (uint32_type_node, 2)); | |
994 | ||
995 | if (brig_opcode == BRIG_OPCODE_WORKITEMFLATABSID) | |
996 | { | |
997 | tree id0 = expand_builtin (BRIG_OPCODE_WORKITEMABSID, uint32_0); | |
998 | id0 = convert (uint64_type_node, id0); | |
999 | ||
1000 | tree id1 = expand_builtin (BRIG_OPCODE_WORKITEMABSID, uint32_1); | |
1001 | id1 = convert (uint64_type_node, id1); | |
1002 | ||
1003 | tree id2 = expand_builtin (BRIG_OPCODE_WORKITEMABSID, uint32_2); | |
1004 | id2 = convert (uint64_type_node, id2); | |
1005 | ||
1006 | tree max0 = convert (uint64_type_node, m_grid_size_vars[0]); | |
1007 | tree max1 = convert (uint64_type_node, m_grid_size_vars[1]); | |
1008 | ||
1009 | tree id2_x_max0_x_max1 = build2 (MULT_EXPR, uint64_type_node, id2, max0); | |
1010 | id2_x_max0_x_max1 | |
1011 | = build2 (MULT_EXPR, uint64_type_node, id2_x_max0_x_max1, max1); | |
1012 | ||
1013 | tree id1_x_max0 = build2 (MULT_EXPR, uint64_type_node, id1, max0); | |
1014 | ||
1015 | tree sum = build2 (PLUS_EXPR, uint64_type_node, id0, id1_x_max0); | |
1016 | sum = build2 (PLUS_EXPR, uint64_type_node, sum, id2_x_max0_x_max1); | |
1017 | ||
1018 | return add_temp_var ("workitemflatabsid", sum); | |
1019 | } | |
1020 | else if (brig_opcode == BRIG_OPCODE_WORKITEMABSID) | |
1021 | { | |
1022 | HOST_WIDE_INT dim = int_constant_value (operands[0]); | |
1023 | return m_abs_id_vars[dim]; | |
1024 | } | |
1025 | else if (brig_opcode == BRIG_OPCODE_WORKITEMFLATID) | |
1026 | { | |
1027 | ||
1028 | tree wg_size_x = expand_builtin (BRIG_OPCODE_WORKGROUPSIZE, uint32_0); | |
1029 | tree wg_size_y = expand_builtin (BRIG_OPCODE_WORKGROUPSIZE, uint32_1); | |
1030 | tree z_x_wgsx_wgsy | |
1031 | = build2 (MULT_EXPR, uint32_type_node, | |
1032 | convert (uint32_type_node, | |
1033 | expand_builtin (BRIG_OPCODE_WORKITEMID, uint32_2)), | |
1034 | wg_size_x); | |
1035 | z_x_wgsx_wgsy = build2 (MULT_EXPR, uint32_type_node, z_x_wgsx_wgsy, | |
1036 | wg_size_y); | |
1037 | ||
1038 | tree y_x_wgsx | |
1039 | = build2 (MULT_EXPR, uint32_type_node, | |
1040 | convert (uint32_type_node, | |
1041 | expand_builtin (BRIG_OPCODE_WORKITEMID, uint32_1)), | |
1042 | wg_size_x); | |
1043 | ||
1044 | tree sum = build2 (PLUS_EXPR, uint32_type_node, y_x_wgsx, z_x_wgsx_wgsy); | |
1045 | sum = build2 (PLUS_EXPR, uint32_type_node, | |
1046 | convert (uint32_type_node, | |
1047 | expand_builtin (BRIG_OPCODE_WORKITEMID, uint32_0)), | |
1048 | sum); | |
1049 | return add_temp_var ("workitemflatid", sum); | |
1050 | } | |
1051 | else if (brig_opcode == BRIG_OPCODE_WORKGROUPSIZE) | |
1052 | { | |
1053 | HOST_WIDE_INT dim = int_constant_value (operands[0]); | |
1054 | if (flag_assume_phsa) | |
1055 | { | |
1056 | tree ptr_type = build_pointer_type (uint32_type_node); | |
1057 | tree ctx = build2 (MEM_REF, uint32_type_node, m_context_arg, | |
1058 | build_int_cst (ptr_type, | |
1059 | PHSA_CONTEXT_WG_SIZES | |
1060 | + dim * 4)); | |
1061 | std::string name ("wgsize_x"); | |
1062 | name [name.length() - 1] += dim; | |
1063 | return add_temp_var (name.c_str(), ctx); | |
1064 | } | |
1065 | else if (m_is_kernel) | |
1066 | { | |
1067 | /* For kernels without phsa we generate certain temps before | |
1068 | the WI loop, which means we don't need to rely on LICM to get | |
1069 | them moved out. */ | |
1070 | return m_wg_size_vars[dim]; | |
1071 | } | |
1072 | else | |
1073 | gcc_unreachable (); | |
1074 | } | |
1075 | else if (brig_opcode == BRIG_OPCODE_WORKITEMID) | |
1076 | { | |
1077 | HOST_WIDE_INT dim = int_constant_value (operands[0]); | |
1078 | if (m_is_kernel) | |
1079 | { | |
1080 | return m_local_id_vars [dim]; | |
1081 | } | |
1082 | else if (flag_assume_phsa) | |
1083 | { | |
1084 | tree ptr_type = build_pointer_type (uint32_type_node); | |
1085 | tree ctx = build2 (MEM_REF, uint32_type_node, m_context_arg, | |
1086 | build_int_cst (ptr_type, | |
1087 | PHSA_CONTEXT_OFFS_WI_IDS | |
1088 | + dim * 4)); | |
1089 | std::string name ("wiid_x"); | |
1090 | name [name.length() - 1] += dim; | |
1091 | return add_temp_var (name.c_str(), ctx); | |
1092 | } | |
1093 | else | |
1094 | gcc_unreachable (); | |
1095 | } | |
1096 | else if (brig_opcode == BRIG_OPCODE_WORKGROUPID) | |
1097 | { | |
1098 | HOST_WIDE_INT dim = int_constant_value (operands[0]); | |
1099 | if (flag_assume_phsa) | |
1100 | { | |
1101 | tree ptr_type = build_pointer_type (uint32_type_node); | |
1102 | tree ctx = build2 (MEM_REF, uint32_type_node, m_context_arg, | |
1103 | build_int_cst (ptr_type, | |
1104 | PHSA_CONTEXT_OFFS_WG_IDS | |
1105 | + dim * 4)); | |
1106 | std::string name ("wgid_x"); | |
1107 | name [name.length() - 1] += dim; | |
1108 | return add_temp_var (name.c_str(), ctx); | |
1109 | } else if (m_is_kernel) | |
1110 | return m_wg_id_vars [dim]; | |
1111 | else | |
1112 | gcc_unreachable (); | |
1113 | } | |
1114 | else if (brig_opcode == BRIG_OPCODE_CURRENTWORKGROUPSIZE) | |
1115 | { | |
1116 | HOST_WIDE_INT dim = int_constant_value (operands[0]); | |
1117 | if (flag_assume_phsa) | |
1118 | { | |
1119 | tree ptr_type = build_pointer_type (uint32_type_node); | |
1120 | tree ctx = build2 (MEM_REF, uint32_type_node, m_context_arg, | |
1121 | build_int_cst (ptr_type, | |
1122 | PHSA_CONTEXT_CURRENT_WG_SIZES | |
1123 | + dim * 4)); | |
1124 | std::string name ("curwgsize_x"); | |
1125 | name [name.length() - 1] += dim; | |
1126 | return add_temp_var (name.c_str(), ctx); | |
1127 | } else if (m_is_kernel) | |
1128 | return m_cur_wg_size_vars[dim]; | |
1129 | else | |
1130 | gcc_unreachable (); | |
1131 | } | |
1132 | else | |
1133 | gcc_unreachable (); | |
1134 | ||
1135 | return NULL_TREE; | |
1136 | } | |
1137 | ||
1138 | /* Returns true in case the given opcode that would normally be generated | |
1139 | as a builtin call can be expanded to tree nodes. */ | |
1140 | ||
1141 | bool | |
1142 | brig_function::can_expand_builtin (BrigOpcode16_t brig_opcode) const | |
1143 | { | |
1144 | switch (brig_opcode) | |
1145 | { | |
1146 | case BRIG_OPCODE_CURRENTWORKGROUPSIZE: | |
1147 | case BRIG_OPCODE_WORKITEMFLATID: | |
1148 | case BRIG_OPCODE_WORKITEMID: | |
1149 | case BRIG_OPCODE_WORKGROUPID: | |
1150 | case BRIG_OPCODE_WORKGROUPSIZE: | |
1151 | return m_is_kernel || flag_assume_phsa; | |
1152 | case BRIG_OPCODE_WORKITEMFLATABSID: | |
1153 | case BRIG_OPCODE_WORKITEMABSID: | |
1154 | return m_is_kernel; | |
1155 | default: | |
1156 | return false; | |
1157 | }; | |
1158 | } | |
1159 | ||
1160 | /* In case the HSA instruction must be implemented using a builtin, | |
1161 | this function is called to get the correct builtin function. | |
1162 | TYPE is the instruction tree type, BRIG_OPCODE the opcode of the | |
1163 | brig instruction and BRIG_TYPE the brig instruction's type. */ | |
1164 | ||
1165 | tree | |
1166 | brig_function::get_builtin_for_hsa_opcode | |
1167 | (tree type, BrigOpcode16_t brig_opcode, BrigType16_t brig_type) const | |
1168 | { | |
1169 | tree builtin = NULL_TREE; | |
1170 | tree builtin_type = type; | |
1171 | ||
1172 | /* For vector types, first find the scalar version of the builtin. */ | |
1173 | if (type != NULL_TREE && VECTOR_TYPE_P (type)) | |
1174 | builtin_type = TREE_TYPE (type); | |
1175 | BrigType16_t brig_inner_type = brig_type & BRIG_TYPE_BASE_MASK; | |
1176 | ||
1177 | /* Some BRIG opcodes can use the same builtins for unsigned and | |
1178 | signed types. Force these cases to unsigned types. */ | |
1179 | ||
1180 | if (brig_opcode == BRIG_OPCODE_BORROW | |
1181 | || brig_opcode == BRIG_OPCODE_CARRY | |
1182 | || brig_opcode == BRIG_OPCODE_LASTBIT | |
1183 | || brig_opcode == BRIG_OPCODE_BITINSERT) | |
1184 | { | |
1185 | if (brig_type == BRIG_TYPE_S32) | |
1186 | brig_type = BRIG_TYPE_U32; | |
1187 | else if (brig_type == BRIG_TYPE_S64) | |
1188 | brig_type = BRIG_TYPE_U64; | |
1189 | } | |
1190 | ||
1191 | switch (brig_opcode) | |
1192 | { | |
1193 | case BRIG_OPCODE_FLOOR: | |
1194 | builtin = mathfn_built_in (builtin_type, BUILT_IN_FLOOR); | |
1195 | break; | |
1196 | case BRIG_OPCODE_CEIL: | |
1197 | builtin = mathfn_built_in (builtin_type, BUILT_IN_CEIL); | |
1198 | break; | |
1199 | case BRIG_OPCODE_SQRT: | |
1200 | case BRIG_OPCODE_NSQRT: | |
1201 | builtin = mathfn_built_in (builtin_type, BUILT_IN_SQRT); | |
1202 | break; | |
1203 | case BRIG_OPCODE_RINT: | |
1204 | builtin = mathfn_built_in (builtin_type, BUILT_IN_RINT); | |
1205 | break; | |
1206 | case BRIG_OPCODE_TRUNC: | |
1207 | builtin = mathfn_built_in (builtin_type, BUILT_IN_TRUNC); | |
1208 | break; | |
1209 | case BRIG_OPCODE_COPYSIGN: | |
1210 | builtin = mathfn_built_in (builtin_type, BUILT_IN_COPYSIGN); | |
1211 | break; | |
1212 | case BRIG_OPCODE_NSIN: | |
1213 | builtin = mathfn_built_in (builtin_type, BUILT_IN_SIN); | |
1214 | break; | |
1215 | case BRIG_OPCODE_NLOG2: | |
1216 | builtin = mathfn_built_in (builtin_type, BUILT_IN_LOG2); | |
1217 | break; | |
1218 | case BRIG_OPCODE_NEXP2: | |
1219 | builtin = mathfn_built_in (builtin_type, BUILT_IN_EXP2); | |
1220 | break; | |
c566cc9f | 1221 | case BRIG_OPCODE_FMA: |
080dc243 PJ |
1222 | case BRIG_OPCODE_NFMA: |
1223 | builtin = mathfn_built_in (builtin_type, BUILT_IN_FMA); | |
1224 | break; | |
1225 | case BRIG_OPCODE_NCOS: | |
1226 | builtin = mathfn_built_in (builtin_type, BUILT_IN_COS); | |
1227 | break; | |
1228 | case BRIG_OPCODE_POPCOUNT: | |
1229 | /* Popcount should be typed by its argument type (the return value | |
1230 | is always u32). Let's use a b64 version for also for b32 for now. */ | |
1231 | return builtin_decl_explicit (BUILT_IN_POPCOUNTL); | |
1232 | case BRIG_OPCODE_BORROW: | |
1233 | /* Borrow uses the same builtin for unsigned and signed types. */ | |
1234 | if (brig_type == BRIG_TYPE_S32 || brig_type == BRIG_TYPE_U32) | |
1235 | return builtin_decl_explicit (BUILT_IN_HSAIL_BORROW_U32); | |
1236 | else | |
1237 | return builtin_decl_explicit (BUILT_IN_HSAIL_BORROW_U64); | |
1238 | case BRIG_OPCODE_CARRY: | |
1239 | /* Carry also uses the same builtin for unsigned and signed types. */ | |
1240 | if (brig_type == BRIG_TYPE_S32 || brig_type == BRIG_TYPE_U32) | |
1241 | return builtin_decl_explicit (BUILT_IN_HSAIL_CARRY_U32); | |
1242 | else | |
1243 | return builtin_decl_explicit (BUILT_IN_HSAIL_CARRY_U64); | |
1244 | default: | |
1245 | ||
1246 | /* Use our builtin index for finding a proper builtin for the BRIG | |
1247 | opcode and BRIG type. This takes care most of the builtin cases, | |
1248 | the special cases are handled in the separate 'case' statements | |
1249 | above. */ | |
1250 | builtin_map::const_iterator i | |
1251 | = s_custom_builtins.find (std::make_pair (brig_opcode, brig_type)); | |
1252 | if (i != s_custom_builtins.end ()) | |
1253 | return (*i).second; | |
1254 | ||
1255 | if (brig_inner_type != brig_type) | |
1256 | { | |
1257 | /* Try to find a scalar built-in we could use. */ | |
1258 | i = s_custom_builtins.find | |
1259 | (std::make_pair (brig_opcode, brig_inner_type)); | |
1260 | if (i != s_custom_builtins.end ()) | |
1261 | return (*i).second; | |
1262 | } | |
1263 | ||
1264 | /* In case this is an fp16 operation that is promoted to fp32, | |
1265 | try to find a fp32 scalar built-in. */ | |
1266 | if (brig_inner_type == BRIG_TYPE_F16) | |
1267 | { | |
1268 | i = s_custom_builtins.find | |
1269 | (std::make_pair (brig_opcode, BRIG_TYPE_F32)); | |
1270 | if (i != s_custom_builtins.end ()) | |
1271 | return (*i).second; | |
1272 | } | |
1273 | gcc_unreachable (); | |
1274 | } | |
1275 | ||
1276 | if (VECTOR_TYPE_P (type) && builtin != NULL_TREE) | |
1277 | { | |
1278 | /* Try to find a vectorized version of the built-in. | |
1279 | TODO: properly assert that builtin is a mathfn builtin? */ | |
1280 | tree vec_builtin | |
1281 | = targetm.vectorize.builtin_vectorized_function | |
1282 | (builtin_mathfn_code (builtin), type, type); | |
1283 | if (vec_builtin != NULL_TREE) | |
1284 | return vec_builtin; | |
1285 | else | |
1286 | return builtin; | |
1287 | } | |
1288 | if (builtin == NULL_TREE) | |
1289 | gcc_unreachable (); | |
1290 | return builtin; | |
1291 | } | |
1292 | ||
1293 | /* Unpacks the elements of the vector in VALUE to scalars (bit field | |
1294 | references) in ELEMENTS. */ | |
1295 | ||
1296 | void | |
1297 | brig_function::unpack (tree value, tree_stl_vec &elements) | |
1298 | { | |
1299 | size_t vec_size = int_size_in_bytes (TREE_TYPE (value)); | |
1300 | size_t element_size | |
1301 | = int_size_in_bytes (TREE_TYPE (TREE_TYPE (value))) * BITS_PER_UNIT; | |
1302 | size_t element_count | |
1303 | = vec_size * BITS_PER_UNIT / element_size; | |
1304 | ||
1305 | tree input_element_type = TREE_TYPE (TREE_TYPE (value)); | |
1306 | ||
1307 | value = add_temp_var ("unpack_input", value); | |
1308 | ||
1309 | for (size_t i = 0; i < element_count; ++i) | |
1310 | { | |
1311 | tree element | |
1312 | = build3 (BIT_FIELD_REF, input_element_type, value, | |
1313 | TYPE_SIZE (input_element_type), | |
1314 | bitsize_int(i * element_size)); | |
1315 | ||
1316 | element = add_temp_var ("scalar", element); | |
1317 | elements.push_back (element); | |
1318 | } | |
1319 | } | |
1320 | ||
1321 | /* Pack the elements of the scalars in ELEMENTS to the returned vector. */ | |
1322 | ||
1323 | tree | |
1324 | brig_function::pack (tree_stl_vec &elements) | |
1325 | { | |
1326 | size_t element_count = elements.size (); | |
1327 | ||
1328 | gcc_assert (element_count > 1); | |
1329 | ||
1330 | tree output_element_type = TREE_TYPE (elements.at (0)); | |
1331 | ||
1332 | vec<constructor_elt, va_gc> *constructor_vals = NULL; | |
1333 | for (size_t i = 0; i < element_count; ++i) | |
1334 | CONSTRUCTOR_APPEND_ELT (constructor_vals, NULL_TREE, elements.at (i)); | |
1335 | ||
1336 | tree vec_type = build_vector_type (output_element_type, element_count); | |
1337 | ||
1338 | /* build_constructor creates a vector type which is not a vector_cst | |
1339 | that requires compile time constant elements. */ | |
1340 | tree vec = build_constructor (vec_type, constructor_vals); | |
1341 | ||
1342 | /* Add a temp variable for readability. */ | |
1343 | tree tmp_var = create_tmp_var (vec_type, "vec_out"); | |
1344 | tree vec_tmp_assign = build2 (MODIFY_EXPR, TREE_TYPE (tmp_var), tmp_var, vec); | |
1345 | append_statement (vec_tmp_assign); | |
1346 | return tmp_var; | |
1347 | } | |
1348 | ||
1349 | /* Returns true in case the given opcode needs to know about work-item context | |
1350 | data. In such case the context data is passed as a pointer to a work-item | |
1351 | context object, as the last argument in the builtin call. */ | |
1352 | ||
1353 | bool | |
1354 | brig_function::needs_workitem_context_data | |
1355 | (BrigOpcode16_t brig_opcode) | |
1356 | { | |
1357 | switch (brig_opcode) | |
1358 | { | |
1359 | case BRIG_OPCODE_WORKITEMABSID: | |
1360 | case BRIG_OPCODE_WORKITEMFLATABSID: | |
1361 | case BRIG_OPCODE_WORKITEMFLATID: | |
1362 | case BRIG_OPCODE_CURRENTWORKITEMFLATID: | |
1363 | case BRIG_OPCODE_WORKITEMID: | |
1364 | case BRIG_OPCODE_WORKGROUPID: | |
1365 | case BRIG_OPCODE_WORKGROUPSIZE: | |
1366 | case BRIG_OPCODE_CURRENTWORKGROUPSIZE: | |
1367 | case BRIG_OPCODE_GRIDGROUPS: | |
1368 | case BRIG_OPCODE_GRIDSIZE: | |
1369 | case BRIG_OPCODE_DIM: | |
1370 | case BRIG_OPCODE_PACKETID: | |
1371 | case BRIG_OPCODE_PACKETCOMPLETIONSIG: | |
1372 | case BRIG_OPCODE_BARRIER: | |
1373 | case BRIG_OPCODE_WAVEBARRIER: | |
1374 | case BRIG_OPCODE_ARRIVEFBAR: | |
1375 | case BRIG_OPCODE_INITFBAR: | |
1376 | case BRIG_OPCODE_JOINFBAR: | |
1377 | case BRIG_OPCODE_LEAVEFBAR: | |
1378 | case BRIG_OPCODE_RELEASEFBAR: | |
1379 | case BRIG_OPCODE_WAITFBAR: | |
1380 | case BRIG_OPCODE_CUID: | |
1381 | case BRIG_OPCODE_MAXCUID: | |
1382 | case BRIG_OPCODE_DEBUGTRAP: | |
1383 | case BRIG_OPCODE_GROUPBASEPTR: | |
1384 | case BRIG_OPCODE_KERNARGBASEPTR: | |
1385 | case BRIG_OPCODE_ALLOCA: | |
1386 | return true; | |
1387 | default: | |
1388 | return false; | |
1389 | }; | |
1390 | } | |
1391 | ||
1392 | /* Appends and returns a new temp variable and an accompanying assignment | |
1393 | statement that stores the value of the given EXPR and has the given NAME. */ | |
1394 | ||
1395 | tree | |
1396 | brig_function::add_temp_var (std::string name, tree expr) | |
1397 | { | |
1398 | tree temp_var = create_tmp_var (TREE_TYPE (expr), name.c_str ()); | |
1399 | tree assign = build2 (MODIFY_EXPR, TREE_TYPE (temp_var), temp_var, expr); | |
1400 | append_statement (assign); | |
1401 | return temp_var; | |
1402 | } | |
1403 | ||
1404 | /* Returns the integer constant value of the given node. | |
1405 | If it's a cast, looks into the source of the cast. */ | |
1406 | ||
1407 | HOST_WIDE_INT | |
1408 | brig_function::int_constant_value (tree node) | |
1409 | { | |
1410 | tree n = node; | |
1411 | if (TREE_CODE (n) == VIEW_CONVERT_EXPR) | |
1412 | n = TREE_OPERAND (n, 0); | |
1413 | return int_cst_value (n); | |
1414 | } | |
1415 | ||
1416 | /* Returns the tree code that should be used to implement the given | |
1417 | HSA instruction opcode (BRIG_OPCODE) for the given type of instruction | |
1418 | (BRIG_TYPE). In case the opcode cannot be mapped to a TREE node directly, | |
1419 | returns TREE_LIST (if it can be emulated with a simple chain of tree | |
1420 | nodes) or CALL_EXPR if the opcode should be implemented using a builtin | |
1421 | call. */ | |
1422 | ||
1423 | tree_code | |
1424 | brig_function::get_tree_code_for_hsa_opcode | |
1425 | (BrigOpcode16_t brig_opcode, BrigType16_t brig_type) | |
1426 | { | |
1427 | BrigType16_t brig_inner_type = brig_type & BRIG_TYPE_BASE_MASK; | |
1428 | switch (brig_opcode) | |
1429 | { | |
1430 | case BRIG_OPCODE_NOP: | |
1431 | return NOP_EXPR; | |
1432 | case BRIG_OPCODE_ADD: | |
1433 | return PLUS_EXPR; | |
1434 | case BRIG_OPCODE_CMOV: | |
1435 | if (brig_inner_type == brig_type) | |
1436 | return COND_EXPR; | |
1437 | else | |
1438 | return VEC_COND_EXPR; | |
1439 | case BRIG_OPCODE_SUB: | |
1440 | return MINUS_EXPR; | |
1441 | case BRIG_OPCODE_MUL: | |
1442 | case BRIG_OPCODE_MUL24: | |
1443 | return MULT_EXPR; | |
1444 | case BRIG_OPCODE_MULHI: | |
1445 | case BRIG_OPCODE_MUL24HI: | |
1446 | return MULT_HIGHPART_EXPR; | |
1447 | case BRIG_OPCODE_DIV: | |
1448 | if (gccbrig_is_float_type (brig_inner_type)) | |
1449 | return RDIV_EXPR; | |
1450 | else | |
1451 | return TRUNC_DIV_EXPR; | |
1452 | case BRIG_OPCODE_NEG: | |
1453 | return NEGATE_EXPR; | |
1454 | case BRIG_OPCODE_MIN: | |
1455 | if (gccbrig_is_float_type (brig_inner_type)) | |
1456 | return CALL_EXPR; | |
1457 | else | |
1458 | return MIN_EXPR; | |
1459 | case BRIG_OPCODE_MAX: | |
1460 | if (gccbrig_is_float_type (brig_inner_type)) | |
1461 | return CALL_EXPR; | |
1462 | else | |
1463 | return MAX_EXPR; | |
080dc243 PJ |
1464 | case BRIG_OPCODE_ABS: |
1465 | return ABS_EXPR; | |
1466 | case BRIG_OPCODE_SHL: | |
1467 | return LSHIFT_EXPR; | |
1468 | case BRIG_OPCODE_SHR: | |
1469 | return RSHIFT_EXPR; | |
1470 | case BRIG_OPCODE_OR: | |
1471 | return BIT_IOR_EXPR; | |
1472 | case BRIG_OPCODE_XOR: | |
1473 | return BIT_XOR_EXPR; | |
1474 | case BRIG_OPCODE_AND: | |
1475 | return BIT_AND_EXPR; | |
1476 | case BRIG_OPCODE_NOT: | |
1477 | return BIT_NOT_EXPR; | |
1478 | case BRIG_OPCODE_RET: | |
1479 | return RETURN_EXPR; | |
1480 | case BRIG_OPCODE_MOV: | |
1481 | case BRIG_OPCODE_LDF: | |
1482 | return MODIFY_EXPR; | |
1483 | case BRIG_OPCODE_LD: | |
1484 | case BRIG_OPCODE_ST: | |
1485 | return MEM_REF; | |
1486 | case BRIG_OPCODE_BR: | |
1487 | return GOTO_EXPR; | |
1488 | case BRIG_OPCODE_REM: | |
1489 | if (brig_type == BRIG_TYPE_U64 || brig_type == BRIG_TYPE_U32) | |
1490 | return TRUNC_MOD_EXPR; | |
1491 | else | |
1492 | return CALL_EXPR; | |
1493 | case BRIG_OPCODE_NRCP: | |
1494 | case BRIG_OPCODE_NRSQRT: | |
1495 | /* Implement as 1/f (x). gcc should pattern detect that and | |
1496 | use a native instruction, if available, for it. */ | |
1497 | return TREE_LIST; | |
c566cc9f | 1498 | case BRIG_OPCODE_FMA: |
080dc243 PJ |
1499 | case BRIG_OPCODE_FLOOR: |
1500 | case BRIG_OPCODE_CEIL: | |
1501 | case BRIG_OPCODE_SQRT: | |
1502 | case BRIG_OPCODE_NSQRT: | |
1503 | case BRIG_OPCODE_RINT: | |
1504 | case BRIG_OPCODE_TRUNC: | |
1505 | case BRIG_OPCODE_POPCOUNT: | |
1506 | case BRIG_OPCODE_COPYSIGN: | |
1507 | case BRIG_OPCODE_NCOS: | |
1508 | case BRIG_OPCODE_NSIN: | |
1509 | case BRIG_OPCODE_NLOG2: | |
1510 | case BRIG_OPCODE_NEXP2: | |
1511 | case BRIG_OPCODE_NFMA: | |
1512 | /* Class has type B1 regardless of the float type, thus | |
1513 | the below builtin map search cannot find it. */ | |
1514 | case BRIG_OPCODE_CLASS: | |
1515 | case BRIG_OPCODE_WORKITEMABSID: | |
1516 | return CALL_EXPR; | |
1517 | default: | |
1518 | ||
1519 | /* Some BRIG opcodes can use the same builtins for unsigned and | |
1520 | signed types. Force these cases to unsigned types. | |
1521 | */ | |
1522 | ||
1523 | if (brig_opcode == BRIG_OPCODE_BORROW | |
1524 | || brig_opcode == BRIG_OPCODE_CARRY | |
1525 | || brig_opcode == BRIG_OPCODE_LASTBIT | |
1526 | || brig_opcode == BRIG_OPCODE_BITINSERT) | |
1527 | { | |
1528 | if (brig_type == BRIG_TYPE_S32) | |
1529 | brig_type = BRIG_TYPE_U32; | |
1530 | else if (brig_type == BRIG_TYPE_S64) | |
1531 | brig_type = BRIG_TYPE_U64; | |
1532 | } | |
1533 | ||
1534 | ||
1535 | builtin_map::const_iterator i | |
1536 | = s_custom_builtins.find (std::make_pair (brig_opcode, brig_type)); | |
1537 | if (i != s_custom_builtins.end ()) | |
1538 | return CALL_EXPR; | |
1539 | else if (s_custom_builtins.find | |
1540 | (std::make_pair (brig_opcode, brig_inner_type)) | |
1541 | != s_custom_builtins.end ()) | |
1542 | return CALL_EXPR; | |
1543 | if (brig_inner_type == BRIG_TYPE_F16 | |
1544 | && s_custom_builtins.find | |
1545 | (std::make_pair (brig_opcode, BRIG_TYPE_F32)) | |
1546 | != s_custom_builtins.end ()) | |
1547 | return CALL_EXPR; | |
1548 | break; | |
1549 | } | |
1550 | return TREE_LIST; /* Emulate using a chain of nodes. */ | |
1551 | } | |
1552 | ||
1553 | /* Inform of an update to the REG_VAR. */ | |
1554 | ||
1555 | void | |
1556 | brig_function::add_reg_var_update (tree reg_var, tree var) | |
1557 | { | |
1558 | if (var == m_abs_id_vars[0] || var == m_abs_id_vars[1] | |
1559 | || var == m_abs_id_vars[2] || var == m_local_id_vars[0] | |
1560 | || var == m_local_id_vars[1] || var == m_local_id_vars[2]) | |
1561 | m_id_val_defs [reg_var] = var; | |
1562 | else | |
1563 | { | |
1564 | /* Possible overwrite of an ID value. */ | |
1565 | ||
1566 | id_val_map::iterator i = m_id_val_defs.find (reg_var); | |
1567 | if (i != m_id_val_defs.end()) | |
1568 | m_id_val_defs.erase (i); | |
1569 | } | |
1570 | } | |
1571 | ||
1572 | /* If the REG_VAR is known to contain an ID value at this point in | |
1573 | the basic block, return true. */ | |
1574 | ||
1575 | bool | |
1576 | brig_function::is_id_val (tree reg_var) | |
1577 | { | |
1578 | id_val_map::iterator i = m_id_val_defs.find (reg_var); | |
1579 | return i != m_id_val_defs.end(); | |
1580 | } | |
1581 | ||
1582 | /* Return an ID value for the given REG_VAR if its known to contain | |
1583 | one at this point in the BB, NULL_TREE otherwise. */ | |
1584 | ||
1585 | tree | |
1586 | brig_function::id_val (tree reg_var) | |
1587 | { | |
1588 | id_val_map::iterator i = m_id_val_defs.find (reg_var); | |
1589 | if (i != m_id_val_defs.end()) | |
1590 | return (*i).second; | |
1591 | else | |
1592 | return NULL_TREE; | |
1593 | } | |
1594 | ||
1595 | /* Informs of starting a new basic block. Called when generating | |
1596 | a label, a call, a jump, or a return. */ | |
1597 | ||
1598 | void | |
1599 | brig_function::start_new_bb () | |
1600 | { | |
1601 | m_id_val_defs.clear (); | |
1602 | } |