1603 lines
50 KiB
C++
1603 lines
50 KiB
C++
|
/* brig-function.cc -- declaration of brig_function class.
|
||
|
Copyright (C) 2016-2021 Free Software Foundation, Inc.
|
||
|
Contributed by Pekka Jaaskelainen <pekka.jaaskelainen@parmance.com>
|
||
|
for General Processor Tech.
|
||
|
|
||
|
This file is part of GCC.
|
||
|
|
||
|
GCC is free software; you can redistribute it and/or modify it under
|
||
|
the terms of the GNU General Public License as published by the Free
|
||
|
Software Foundation; either version 3, or (at your option) any later
|
||
|
version.
|
||
|
|
||
|
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
||
|
WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||
|
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||
|
for more details.
|
||
|
|
||
|
You should have received a copy of the GNU General Public License
|
||
|
along with GCC; see the file COPYING3. If not see
|
||
|
<http://www.gnu.org/licenses/>. */
|
||
|
|
||
|
#include <sstream>
|
||
|
#include <iomanip>
|
||
|
|
||
|
#include "brig-function.h"
|
||
|
#include "stringpool.h"
|
||
|
#include "tree-iterator.h"
|
||
|
#include "toplev.h"
|
||
|
#include "gimplify.h"
|
||
|
#include "gimple-expr.h"
|
||
|
#include "print-tree.h"
|
||
|
#include "hsa-brig-format.h"
|
||
|
#include "stor-layout.h"
|
||
|
#include "diagnostic-core.h"
|
||
|
#include "brig-code-entry-handler.h"
|
||
|
#include "brig-machine.h"
|
||
|
#include "brig-util.h"
|
||
|
#include "phsa.h"
|
||
|
#include "tree-pretty-print.h"
|
||
|
#include "dumpfile.h"
|
||
|
#include "profile-count.h"
|
||
|
#include "tree-cfg.h"
|
||
|
#include "errors.h"
|
||
|
#include "function.h"
|
||
|
#include "brig-to-generic.h"
|
||
|
#include "brig-builtins.h"
|
||
|
#include "options.h"
|
||
|
#include "fold-const.h"
|
||
|
#include "target.h"
|
||
|
#include "builtins.h"
|
||
|
|
||
|
brig_function::builtin_map brig_function::s_custom_builtins;
|
||
|
|
||
|
brig_function::brig_function (const BrigDirectiveExecutable *exec,
|
||
|
brig_to_generic *parent)
|
||
|
: m_brig_def (exec), m_is_kernel (false), m_is_finished (false), m_name (""),
|
||
|
m_current_bind_expr (NULL_TREE), m_func_decl (NULL_TREE),
|
||
|
m_context_arg (NULL_TREE), m_group_base_arg (NULL_TREE),
|
||
|
m_private_base_arg (NULL_TREE), m_ret_value (NULL_TREE),
|
||
|
m_next_kernarg_offset (0), m_kernarg_max_align (0),
|
||
|
m_ret_value_brig_var (NULL), m_has_barriers (false), m_has_allocas (false),
|
||
|
m_has_function_calls_with_barriers (false), m_calls_analyzed (false),
|
||
|
m_is_wg_function (false), m_has_unexpanded_dp_builtins (false),
|
||
|
m_generating_arg_block (false), m_parent (parent)
|
||
|
{
|
||
|
memset (m_regs, 0,
|
||
|
BRIG_2_TREE_HSAIL_TOTAL_REG_COUNT * sizeof (BrigOperandRegister *));
|
||
|
memset (&m_descriptor, 0, sizeof (phsa_descriptor));
|
||
|
|
||
|
if (s_custom_builtins.size () > 0) return;
|
||
|
|
||
|
/* Populate the builtin index. */
|
||
|
#undef DEF_HSAIL_ATOMIC_BUILTIN
|
||
|
#undef DEF_HSAIL_CVT_ZEROI_SAT_BUILTIN
|
||
|
#undef DEF_HSAIL_INTR_BUILTIN
|
||
|
#undef DEF_HSAIL_SAT_BUILTIN
|
||
|
#undef DEF_HSAIL_BUILTIN
|
||
|
#define DEF_HSAIL_BUILTIN(ENUM, HSAIL_OPCODE, HSAIL_TYPE, NAME, TYPE, ATTRS) \
|
||
|
s_custom_builtins[std::make_pair (HSAIL_OPCODE, HSAIL_TYPE)] \
|
||
|
= builtin_decl_explicit (ENUM);
|
||
|
|
||
|
#include "brig-builtins.def"
|
||
|
}
|
||
|
|
||
|
brig_function::~brig_function ()
|
||
|
{
|
||
|
for (size_t i = 0; i < BRIG_2_TREE_HSAIL_TOTAL_REG_COUNT; ++i)
|
||
|
{
|
||
|
if (m_regs[i] != NULL)
|
||
|
{
|
||
|
delete m_regs[i];
|
||
|
m_regs[i] = NULL;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/* Returns a GENERIC label with the given name in the given function.
|
||
|
Creates it, if not yet found. */
|
||
|
|
||
|
tree
|
||
|
brig_function::label (const std::string &name)
|
||
|
{
|
||
|
label_index::const_iterator i = m_label_index.find (name);
|
||
|
if (i == m_label_index.end ())
|
||
|
{
|
||
|
tree name_identifier
|
||
|
= get_identifier_with_length (name.c_str (), name.size ());
|
||
|
|
||
|
tree label_decl = build_decl (UNKNOWN_LOCATION, LABEL_DECL,
|
||
|
name_identifier, void_type_node);
|
||
|
|
||
|
DECL_CONTEXT (label_decl) = m_func_decl;
|
||
|
DECL_ARTIFICIAL (label_decl) = 0;
|
||
|
|
||
|
m_label_index[name] = label_decl;
|
||
|
return label_decl;
|
||
|
}
|
||
|
else
|
||
|
return (*i).second;
|
||
|
}
|
||
|
|
||
|
/* Record an argument variable for later use. This includes both local
|
||
|
variables inside arg blocks and incoming function arguments. */
|
||
|
|
||
|
void
|
||
|
brig_function::add_arg_variable (const BrigDirectiveVariable *brigVar,
|
||
|
tree treeDecl)
|
||
|
{
|
||
|
m_arg_variables[brigVar] = treeDecl;
|
||
|
}
|
||
|
|
||
|
tree
|
||
|
brig_function::arg_variable (const BrigDirectiveVariable *var) const
|
||
|
{
|
||
|
variable_index::const_iterator i = m_arg_variables.find (var);
|
||
|
if (i == m_arg_variables.end ())
|
||
|
return NULL_TREE;
|
||
|
else
|
||
|
return (*i).second;
|
||
|
}
|
||
|
|
||
|
/* Appends a new kernel argument descriptor for the current kernel's
|
||
|
arg space. */
|
||
|
|
||
|
void
|
||
|
brig_function::append_kernel_arg (const BrigDirectiveVariable *var, size_t size,
|
||
|
size_t alignment)
|
||
|
{
|
||
|
gcc_assert (m_func_decl != NULL_TREE);
|
||
|
gcc_assert (m_is_kernel);
|
||
|
|
||
|
size_t align_padding = m_next_kernarg_offset % alignment == 0 ?
|
||
|
0 : (alignment - m_next_kernarg_offset % alignment);
|
||
|
m_next_kernarg_offset += align_padding;
|
||
|
m_kernarg_offsets[var] = m_next_kernarg_offset;
|
||
|
m_next_kernarg_offset += size;
|
||
|
|
||
|
m_kernarg_max_align
|
||
|
= m_kernarg_max_align < alignment ? alignment : m_kernarg_max_align;
|
||
|
}
|
||
|
|
||
|
size_t
|
||
|
brig_function::kernel_arg_offset (const BrigDirectiveVariable *var) const
|
||
|
{
|
||
|
var_offset_table::const_iterator i = m_kernarg_offsets.find (var);
|
||
|
gcc_assert (i != m_kernarg_offsets.end ());
|
||
|
return (*i).second;
|
||
|
}
|
||
|
|
||
|
/* Add work-item ID variables to the beginning of the kernel function
|
||
|
which can be used for address computation as kernel dispatch packet
|
||
|
instructions can be expanded to GENERIC nodes referring to them. */
|
||
|
|
||
|
void
|
||
|
brig_function::add_id_variables ()
|
||
|
{
|
||
|
tree bind_expr = m_current_bind_expr;
|
||
|
tree stmts = BIND_EXPR_BODY (bind_expr);
|
||
|
|
||
|
/* Initialize the WG limits and local ids. */
|
||
|
m_kernel_entry = tsi_start (stmts);
|
||
|
|
||
|
for (int i = 0; i < 3; ++i)
|
||
|
{
|
||
|
char dim_char = (char) ((int) 'x' + i);
|
||
|
|
||
|
/* The local sizes are limited to 16b values, but let's still use 32b
|
||
|
to avoid unnecessary casts (the ID functions are 32b). */
|
||
|
m_local_id_vars[i]
|
||
|
= add_local_variable (std::string ("__local_") + dim_char,
|
||
|
long_long_integer_type_node);
|
||
|
|
||
|
tree workitemid_call
|
||
|
= call_builtin (builtin_decl_explicit (BUILT_IN_HSAIL_WORKITEMID), 2,
|
||
|
uint32_type_node, uint32_type_node,
|
||
|
build_int_cst (uint32_type_node, i), ptr_type_node,
|
||
|
m_context_arg);
|
||
|
|
||
|
tree id_init = build2 (MODIFY_EXPR, TREE_TYPE (m_local_id_vars[i]),
|
||
|
m_local_id_vars[i],
|
||
|
convert (TREE_TYPE (m_local_id_vars[i]),
|
||
|
workitemid_call));
|
||
|
|
||
|
append_statement (id_init);
|
||
|
|
||
|
m_cur_wg_size_vars[i]
|
||
|
= add_local_variable (std::string ("__cur_wg_size_") + dim_char,
|
||
|
long_long_integer_type_node);
|
||
|
|
||
|
tree cwgz_call;
|
||
|
if (flag_assume_phsa)
|
||
|
{
|
||
|
tree_stl_vec operands
|
||
|
= tree_stl_vec (1, build_int_cst (uint32_type_node, i));
|
||
|
cwgz_call
|
||
|
= expand_or_call_builtin (BRIG_OPCODE_CURRENTWORKGROUPSIZE,
|
||
|
BRIG_TYPE_U32, uint32_type_node,
|
||
|
operands);
|
||
|
}
|
||
|
else
|
||
|
cwgz_call = call_builtin
|
||
|
(builtin_decl_explicit (BUILT_IN_HSAIL_CURRENTWORKGROUPSIZE),
|
||
|
2, uint32_type_node, uint32_type_node,
|
||
|
build_int_cst (uint32_type_node, i), ptr_type_node, m_context_arg);
|
||
|
|
||
|
tree limit_init = build2 (MODIFY_EXPR, TREE_TYPE (m_cur_wg_size_vars[i]),
|
||
|
m_cur_wg_size_vars[i],
|
||
|
convert (TREE_TYPE (m_cur_wg_size_vars[i]),
|
||
|
cwgz_call));
|
||
|
|
||
|
append_statement (limit_init);
|
||
|
|
||
|
m_wg_id_vars[i]
|
||
|
= add_local_variable (std::string ("__workgroupid_") + dim_char,
|
||
|
uint32_type_node);
|
||
|
|
||
|
tree wgid_call;
|
||
|
if (flag_assume_phsa)
|
||
|
{
|
||
|
tree_stl_vec operands
|
||
|
= tree_stl_vec (1, build_int_cst (uint32_type_node, i));
|
||
|
wgid_call
|
||
|
= expand_or_call_builtin (BRIG_OPCODE_WORKGROUPID, BRIG_TYPE_U32,
|
||
|
uint32_type_node, operands);
|
||
|
}
|
||
|
else
|
||
|
wgid_call
|
||
|
= call_builtin (builtin_decl_explicit (BUILT_IN_HSAIL_WORKGROUPID),
|
||
|
2, uint32_type_node, uint32_type_node,
|
||
|
build_int_cst (uint32_type_node, i), ptr_type_node,
|
||
|
m_context_arg);
|
||
|
|
||
|
tree wgid_init = build2 (MODIFY_EXPR, TREE_TYPE (m_wg_id_vars[i]),
|
||
|
m_wg_id_vars[i], wgid_call);
|
||
|
|
||
|
append_statement (wgid_init);
|
||
|
|
||
|
m_wg_size_vars[i]
|
||
|
= add_local_variable (std::string ("__workgroupsize_") + dim_char,
|
||
|
uint32_type_node);
|
||
|
|
||
|
tree wgsize_call;
|
||
|
if (flag_assume_phsa)
|
||
|
{
|
||
|
tree_stl_vec operands
|
||
|
= tree_stl_vec (1, build_int_cst (uint32_type_node, i));
|
||
|
wgsize_call
|
||
|
= expand_or_call_builtin (BRIG_OPCODE_WORKGROUPSIZE, BRIG_TYPE_U32,
|
||
|
uint32_type_node, operands);
|
||
|
}
|
||
|
else
|
||
|
wgsize_call
|
||
|
= call_builtin (builtin_decl_explicit (BUILT_IN_HSAIL_WORKGROUPSIZE),
|
||
|
2, uint32_type_node, uint32_type_node,
|
||
|
build_int_cst (uint32_type_node, i), ptr_type_node,
|
||
|
m_context_arg);
|
||
|
|
||
|
tree wgsize_init = build2 (MODIFY_EXPR, TREE_TYPE (m_wg_size_vars[i]),
|
||
|
m_wg_size_vars[i], wgsize_call);
|
||
|
|
||
|
append_statement (wgsize_init);
|
||
|
|
||
|
m_grid_size_vars[i]
|
||
|
= add_local_variable (std::string ("__gridsize_") + dim_char,
|
||
|
uint32_type_node);
|
||
|
|
||
|
tree gridsize_call
|
||
|
= call_builtin (builtin_decl_explicit (BUILT_IN_HSAIL_GRIDSIZE), 2,
|
||
|
uint32_type_node, uint32_type_node,
|
||
|
build_int_cst (uint32_type_node, i), ptr_type_node,
|
||
|
m_context_arg);
|
||
|
|
||
|
tree gridsize_init = build2 (MODIFY_EXPR, TREE_TYPE (m_grid_size_vars[i]),
|
||
|
m_grid_size_vars[i], gridsize_call);
|
||
|
|
||
|
append_statement (gridsize_init);
|
||
|
|
||
|
m_abs_id_base_vars[i]
|
||
|
= add_local_variable (std::string ("__abs_id_base_") + dim_char,
|
||
|
long_long_integer_type_node);
|
||
|
|
||
|
m_abs_id_vars[i]
|
||
|
= add_local_variable (std::string ("__abs_id_") + dim_char,
|
||
|
long_long_integer_type_node);
|
||
|
|
||
|
tree abs_id_base
|
||
|
= build2 (MULT_EXPR, long_long_integer_type_node,
|
||
|
convert (long_long_integer_type_node, m_wg_id_vars[i]),
|
||
|
convert (long_long_integer_type_node, m_wg_size_vars[i]));
|
||
|
tree abs_id
|
||
|
= build2 (PLUS_EXPR, long_long_integer_type_node, abs_id_base,
|
||
|
convert (long_long_integer_type_node, m_local_id_vars[i]));
|
||
|
|
||
|
tree abs_id_base_init
|
||
|
= build2 (MODIFY_EXPR, TREE_TYPE (m_abs_id_base_vars[i]),
|
||
|
m_abs_id_base_vars[i], abs_id_base);
|
||
|
append_statement (abs_id_base_init);
|
||
|
|
||
|
tree abs_id_init = build2 (MODIFY_EXPR,
|
||
|
TREE_TYPE (m_abs_id_vars[i]),
|
||
|
m_abs_id_vars[i], abs_id);
|
||
|
append_statement (abs_id_init);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/* Creates a new local variable with the given NAME and given GENERIC
|
||
|
TYPE. */
|
||
|
|
||
|
tree
|
||
|
brig_function::add_local_variable (std::string name, tree type)
|
||
|
{
|
||
|
tree name_identifier
|
||
|
= get_identifier_with_length (name.c_str (), name.size ());
|
||
|
tree variable
|
||
|
= build_decl (UNKNOWN_LOCATION, VAR_DECL, name_identifier, type);
|
||
|
|
||
|
DECL_NONLOCAL (variable) = 0;
|
||
|
TREE_ADDRESSABLE (variable) = 0;
|
||
|
TREE_STATIC (variable) = 0;
|
||
|
TREE_USED (variable) = 1;
|
||
|
DECL_ARTIFICIAL (variable) = 0;
|
||
|
|
||
|
tree bind_expr = DECL_SAVED_TREE (m_func_decl);
|
||
|
|
||
|
DECL_CONTEXT (variable) = m_func_decl;
|
||
|
|
||
|
DECL_CHAIN (variable) = BIND_EXPR_VARS (bind_expr);
|
||
|
BIND_EXPR_VARS (bind_expr) = variable;
|
||
|
return variable;
|
||
|
}
|
||
|
|
||
|
/* Return tree type for an HSA register.
|
||
|
|
||
|
The tree type can be anything (scalar, vector, int, float, etc.)
|
||
|
but its size is guaranteed to match the HSA register size.
|
||
|
|
||
|
HSA registers are untyped but we select a type based on their use
|
||
|
to reduce (sometimes unoptimizable) VIEW_CONVERT_EXPR nodes (seems
|
||
|
to occur when use or def reaches over current BB). */
|
||
|
|
||
|
tree
|
||
|
brig_function::get_tree_type_for_hsa_reg (const BrigOperandRegister *reg) const
|
||
|
{
|
||
|
size_t reg_size = gccbrig_reg_size (reg);
|
||
|
|
||
|
/* The default type. */
|
||
|
tree type = build_nonstandard_integer_type (reg_size, true);
|
||
|
|
||
|
if (m_parent->m_fn_regs_use_index.count (m_name) == 0)
|
||
|
return type;
|
||
|
|
||
|
const regs_use_index &index = m_parent->m_fn_regs_use_index[m_name];
|
||
|
size_t reg_id = gccbrig_hsa_reg_id (*reg);
|
||
|
if (index.count (reg_id) == 0)
|
||
|
return type;
|
||
|
|
||
|
const reg_use_info &info = index.find (reg_id)->second;
|
||
|
std::vector<std::pair<tree, size_t> >::const_iterator it
|
||
|
= info.m_type_refs.begin ();
|
||
|
std::vector<std::pair<tree, size_t> >::const_iterator it_end
|
||
|
= info.m_type_refs.end ();
|
||
|
size_t max_refs_as_type_count = 0;
|
||
|
for (; it != it_end; it++)
|
||
|
{
|
||
|
size_t type_bit_size = int_size_in_bytes (it->first) * BITS_PER_UNIT;
|
||
|
if (type_bit_size != reg_size) continue;
|
||
|
if (it->second > max_refs_as_type_count)
|
||
|
{
|
||
|
type = it->first;
|
||
|
max_refs_as_type_count = it->second;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return type;
|
||
|
}
|
||
|
|
||
|
/* Returns a DECL_VAR for the given HSAIL operand register.
|
||
|
If it has not been created yet for the function being generated,
|
||
|
creates it as a type determined by analysis phase. */
|
||
|
|
||
|
tree
|
||
|
brig_function::get_m_var_declfor_reg (const BrigOperandRegister *reg)
|
||
|
{
|
||
|
size_t offset = gccbrig_hsa_reg_id (*reg);
|
||
|
|
||
|
reg_decl_index_entry *regEntry = m_regs[offset];
|
||
|
if (regEntry == NULL)
|
||
|
{
|
||
|
size_t reg_size = gccbrig_reg_size (reg);
|
||
|
tree type;
|
||
|
if (reg_size > 1)
|
||
|
type = get_tree_type_for_hsa_reg (reg);
|
||
|
else
|
||
|
type = boolean_type_node;
|
||
|
|
||
|
/* Drop the const qualifier so we do not end up with a read only
|
||
|
register variable which cannot be written to later. */
|
||
|
tree nonconst_type = build_type_variant (type, false, false);
|
||
|
|
||
|
regEntry = new reg_decl_index_entry;
|
||
|
|
||
|
regEntry->m_var_decl
|
||
|
= add_local_variable (gccbrig_reg_name (reg), nonconst_type);
|
||
|
m_regs[offset] = regEntry;
|
||
|
}
|
||
|
return regEntry->m_var_decl;
|
||
|
}
|
||
|
|
||
|
/* Builds a work-item do..while loop for a single DIM. HEADER_ENTRY is
|
||
|
a statement after which the iteration variables should be initialized and
|
||
|
the loop body starts. BRANCH_AFTER is the statement after which the loop
|
||
|
predicate check and the back edge goto will be appended. */
|
||
|
|
||
|
void
|
||
|
brig_function::add_wi_loop (int dim, tree_stmt_iterator *header_entry,
|
||
|
tree_stmt_iterator *branch_after)
|
||
|
{
|
||
|
tree ivar = m_local_id_vars[dim];
|
||
|
tree abs_id_base_var = m_abs_id_base_vars[dim];
|
||
|
tree abs_id_var = m_abs_id_vars[dim];
|
||
|
tree ivar_max = m_cur_wg_size_vars[dim];
|
||
|
tree_stmt_iterator entry = *header_entry;
|
||
|
|
||
|
/* TODO: this is not a parallel loop as we share the "register variables"
|
||
|
across work-items. Should create a copy of them per WI instance. That
|
||
|
is, declare temporaries for new definitions inside the loop body, not at
|
||
|
function scope. */
|
||
|
|
||
|
tree ivar_init = build2 (MODIFY_EXPR, TREE_TYPE (ivar), ivar,
|
||
|
build_zero_cst (TREE_TYPE (ivar)));
|
||
|
tsi_link_after (&entry, ivar_init, TSI_NEW_STMT);
|
||
|
|
||
|
tree abs_id_var_init = build2 (MODIFY_EXPR, TREE_TYPE (abs_id_var),
|
||
|
abs_id_var,
|
||
|
convert (TREE_TYPE (abs_id_var),
|
||
|
abs_id_base_var));
|
||
|
tsi_link_after (&entry, abs_id_var_init, TSI_NEW_STMT);
|
||
|
|
||
|
tree loop_body_label
|
||
|
= label (std::string ("__wi_loop_") + (char) ((int) 'x' + dim));
|
||
|
tree loop_body_label_stmt = build_stmt (LABEL_EXPR, loop_body_label);
|
||
|
|
||
|
tsi_link_after (&entry, loop_body_label_stmt, TSI_NEW_STMT);
|
||
|
|
||
|
if (m_has_unexpanded_dp_builtins)
|
||
|
{
|
||
|
if (!flag_assume_phsa)
|
||
|
{
|
||
|
tree id_set_builtin
|
||
|
= builtin_decl_explicit (BUILT_IN_HSAIL_SETWORKITEMID);
|
||
|
/* Set the local ID to the current wi-loop iteration variable value
|
||
|
to ensure the builtins see the correct values. */
|
||
|
tree id_set_call
|
||
|
= call_builtin (id_set_builtin, 3,
|
||
|
void_type_node, uint32_type_node,
|
||
|
build_int_cst (uint32_type_node, dim),
|
||
|
uint32_type_node, convert (uint32_type_node, ivar),
|
||
|
ptr_type_node, m_context_arg);
|
||
|
tsi_link_after (&entry, id_set_call, TSI_NEW_STMT);
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
tree ptr_type = build_pointer_type (uint32_type_node);
|
||
|
tree ctx = build2 (MEM_REF, uint32_type_node, m_context_arg,
|
||
|
build_int_cst (ptr_type, dim * 4));
|
||
|
tree assign = build2 (MODIFY_EXPR, uint32_type_node, ctx,
|
||
|
convert (uint32_type_node, ivar));
|
||
|
|
||
|
tsi_link_after (&entry, assign, TSI_NEW_STMT);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/* Increment the WI iteration variable. */
|
||
|
tree incr = build2 (PREINCREMENT_EXPR, TREE_TYPE (ivar), ivar,
|
||
|
build_one_cst (TREE_TYPE (ivar)));
|
||
|
|
||
|
tsi_link_after (branch_after, incr, TSI_NEW_STMT);
|
||
|
|
||
|
/* ...and the abs id variable. */
|
||
|
tree abs_id_incr = build2 (PREINCREMENT_EXPR, TREE_TYPE (abs_id_var),
|
||
|
abs_id_var,
|
||
|
build_one_cst (TREE_TYPE (abs_id_var)));
|
||
|
|
||
|
tsi_link_after (branch_after, abs_id_incr, TSI_NEW_STMT);
|
||
|
|
||
|
/* Append the predicate check with the back edge goto. */
|
||
|
tree condition = build2 (LT_EXPR, TREE_TYPE (ivar), ivar, ivar_max);
|
||
|
tree target_goto = build1 (GOTO_EXPR, void_type_node, loop_body_label);
|
||
|
tree if_stmt
|
||
|
= build3 (COND_EXPR, void_type_node, condition, target_goto, NULL_TREE);
|
||
|
tsi_link_after (branch_after, if_stmt, TSI_NEW_STMT);
|
||
|
}
|
||
|
|
||
|
/* Recursively analyzes the function and its callees for barrier usage. */
|
||
|
|
||
|
void
|
||
|
brig_function::analyze_calls ()
|
||
|
{
|
||
|
if (m_calls_analyzed)
|
||
|
return;
|
||
|
|
||
|
/* Set this early to not get stuck in case of recursive call graphs.
|
||
|
This is safe because if the function calls itself, either the function
|
||
|
has barrier calls which implies a call to a function with barrier calls,
|
||
|
or it doesn't in which case the result depends on the later called
|
||
|
functions. */
|
||
|
m_calls_analyzed = true;
|
||
|
|
||
|
for (size_t i = 0; i < m_called_functions.size (); ++i)
|
||
|
{
|
||
|
tree f = m_called_functions[i];
|
||
|
brig_function *called_f = m_parent->get_finished_function (f);
|
||
|
if (called_f == NULL)
|
||
|
{
|
||
|
/* Unfinished function (only declaration within the set of BRIGs)
|
||
|
found. Cannot finish the CG analysis. Have to assume it does have
|
||
|
a barrier for safety. */
|
||
|
m_has_function_calls_with_barriers = true;
|
||
|
m_has_unexpanded_dp_builtins = true;
|
||
|
break;
|
||
|
}
|
||
|
called_f->analyze_calls ();
|
||
|
/* We can assume m_has_barriers has been correctly set during the
|
||
|
construction of the function decl. No need to reanalyze it. */
|
||
|
m_has_function_calls_with_barriers |= called_f->m_has_barriers;
|
||
|
|
||
|
/* If the function or any of its called functions has dispatch
|
||
|
packet builtin calls that require the local id, we need to
|
||
|
set the local id to the context in the work item loop before
|
||
|
the functions are called. If we analyze the opposite, these
|
||
|
function calls can be omitted. */
|
||
|
m_has_unexpanded_dp_builtins |= called_f->m_has_unexpanded_dp_builtins;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/* Tries to convert the current kernel to a work-group function that executes
|
||
|
all work-items using loops. Returns true in case the conversion was
|
||
|
successful. */
|
||
|
|
||
|
bool
|
||
|
brig_function::convert_to_wg_function ()
|
||
|
{
|
||
|
if (!m_calls_analyzed)
|
||
|
analyze_calls ();
|
||
|
|
||
|
if (m_has_barriers || m_has_function_calls_with_barriers)
|
||
|
return false;
|
||
|
|
||
|
/* The most trivial case: No barriers at all in the kernel.
|
||
|
We can create one big work-item loop around the whole kernel. */
|
||
|
tree bind_expr = m_current_bind_expr;
|
||
|
tree stmts = BIND_EXPR_BODY (bind_expr);
|
||
|
|
||
|
for (int i = 0; i < 3; ++i)
|
||
|
{
|
||
|
/* The previous loop has added a new label to the end of the function,
|
||
|
the next level loop should wrap around it also. */
|
||
|
tree_stmt_iterator function_exit = tsi_last (stmts);
|
||
|
add_wi_loop (i, &m_kernel_entry, &function_exit);
|
||
|
}
|
||
|
|
||
|
m_is_wg_function = true;
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
/* Emits a kernel description to a special ELF section so it can be
|
||
|
utilized by an HSA runtime implementation. The assembly block
|
||
|
must be emitted to a statement list of an function, which is given
|
||
|
as an argument. Returns the assembly block used to emit the section. */
|
||
|
|
||
|
tree
|
||
|
brig_function::emit_metadata (tree stmt_list)
|
||
|
{
|
||
|
/* Emit an ELF section via an assembly directive that generates a special
|
||
|
ELF section for each kernel that contains raw bytes of a descriptor
|
||
|
object. This is pretty disgusting, but life is never perfect ;) */
|
||
|
|
||
|
/* Use the original kernel name without the '_' prefix in the section name. */
|
||
|
std::string kern_name = m_is_kernel ? m_name.substr (1) : m_name;
|
||
|
|
||
|
std::ostringstream strstr;
|
||
|
strstr << std::endl
|
||
|
<< ".pushsection " << PHSA_DESC_SECTION_PREFIX << kern_name
|
||
|
<< std::endl
|
||
|
<< "\t.p2align 1, 1, 1" << std::endl
|
||
|
<< "\t.byte ";
|
||
|
|
||
|
for (size_t i = 0; i < sizeof (phsa_descriptor); ++i)
|
||
|
{
|
||
|
strstr << "0x" << std::setw (2) << std::setfill ('0') << std::hex
|
||
|
<< (unsigned) *((unsigned char *) &m_descriptor + i);
|
||
|
if (i + 1 < sizeof (phsa_descriptor))
|
||
|
strstr << ", ";
|
||
|
}
|
||
|
|
||
|
strstr << std::endl << ".popsection" << std::endl << std::endl;
|
||
|
|
||
|
tree metadata_asm
|
||
|
= build_stmt (ASM_EXPR,
|
||
|
build_string (strstr.str ().size (), strstr.str ().c_str ()),
|
||
|
NULL_TREE, NULL_TREE, NULL_TREE, NULL_TREE);
|
||
|
|
||
|
append_to_statement_list_force (metadata_asm, &stmt_list);
|
||
|
return metadata_asm;
|
||
|
}
|
||
|
|
||
|
/* Emits the kernel launcher function. Also emits the metadata section
|
||
|
creation statements in it.
|
||
|
|
||
|
The launcher function calls the device-side runtime
|
||
|
that runs the kernel for all work-items. In C:
|
||
|
|
||
|
void KernelName (void* context, void* group_base_addr)
|
||
|
{
|
||
|
__hsail_launch_kernel (_KernelName, context, group_base_addr);
|
||
|
}
|
||
|
|
||
|
or, in case of a successful conversion to a work-group function:
|
||
|
|
||
|
void KernelName (void* context, void* group_base_addr)
|
||
|
{
|
||
|
__hsail_launch_wg_function (_KernelName, context, group_base_addr);
|
||
|
}
|
||
|
|
||
|
The user/host sees this function as the kernel to call from the
|
||
|
outside. The actual kernel generated from HSAIL was named _KernelName.
|
||
|
*/
|
||
|
|
||
|
tree
|
||
|
brig_function::emit_launcher_and_metadata ()
|
||
|
{
|
||
|
/* The original kernel name without the '_' prefix. */
|
||
|
std::string kern_name = m_name.substr (1);
|
||
|
|
||
|
tree name_identifier
|
||
|
= get_identifier_with_length (kern_name.c_str (), kern_name.size ());
|
||
|
|
||
|
tree restrict_void_ptr
|
||
|
= build_qualified_type (build_pointer_type (void_type_node),
|
||
|
TYPE_QUAL_RESTRICT);
|
||
|
tree restrict_char_ptr
|
||
|
= build_qualified_type (build_pointer_type (char_type_node),
|
||
|
TYPE_QUAL_RESTRICT);
|
||
|
tree launcher
|
||
|
= build_decl (UNKNOWN_LOCATION, FUNCTION_DECL, name_identifier,
|
||
|
build_function_type_list (void_type_node, restrict_void_ptr,
|
||
|
restrict_char_ptr, NULL_TREE));
|
||
|
|
||
|
TREE_USED (launcher) = 1;
|
||
|
DECL_ARTIFICIAL (launcher) = 1;
|
||
|
|
||
|
tree context_arg = build_decl (UNKNOWN_LOCATION, PARM_DECL,
|
||
|
get_identifier ("__context"),
|
||
|
restrict_void_ptr);
|
||
|
|
||
|
DECL_ARGUMENTS (launcher) = context_arg;
|
||
|
DECL_ARG_TYPE (context_arg) = restrict_void_ptr;
|
||
|
DECL_CONTEXT (context_arg) = launcher;
|
||
|
TREE_USED (context_arg) = 1;
|
||
|
DECL_ARTIFICIAL (context_arg) = 1;
|
||
|
|
||
|
tree group_base_addr_arg
|
||
|
= build_decl (UNKNOWN_LOCATION, PARM_DECL,
|
||
|
get_identifier ("__group_base_addr"), restrict_char_ptr);
|
||
|
|
||
|
chainon (DECL_ARGUMENTS (launcher), group_base_addr_arg);
|
||
|
DECL_ARG_TYPE (group_base_addr_arg) = restrict_char_ptr;
|
||
|
DECL_CONTEXT (group_base_addr_arg) = launcher;
|
||
|
TREE_USED (group_base_addr_arg) = 1;
|
||
|
DECL_ARTIFICIAL (group_base_addr_arg) = 1;
|
||
|
|
||
|
tree resdecl
|
||
|
= build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, void_type_node);
|
||
|
|
||
|
DECL_RESULT (launcher) = resdecl;
|
||
|
DECL_CONTEXT (resdecl) = launcher;
|
||
|
|
||
|
DECL_INITIAL (launcher) = make_node (BLOCK);
|
||
|
TREE_USED (DECL_INITIAL (launcher)) = 1;
|
||
|
|
||
|
tree stmt_list = alloc_stmt_list ();
|
||
|
|
||
|
tree bind_expr = build3 (BIND_EXPR, void_type_node, NULL, stmt_list, NULL);
|
||
|
|
||
|
TREE_STATIC (launcher) = 1;
|
||
|
TREE_PUBLIC (launcher) = 1;
|
||
|
|
||
|
DECL_SAVED_TREE (launcher) = bind_expr;
|
||
|
|
||
|
if (DECL_STRUCT_FUNCTION (launcher) == NULL)
|
||
|
push_struct_function (launcher);
|
||
|
else
|
||
|
push_cfun (DECL_STRUCT_FUNCTION (launcher));
|
||
|
|
||
|
tree kernel_func_ptr = build1 (ADDR_EXPR, ptr_type_node, m_func_decl);
|
||
|
|
||
|
tree phsail_launch_kernel_call;
|
||
|
|
||
|
/* Compute the local group segment frame start pointer. */
|
||
|
tree group_local_offset_temp
|
||
|
= create_tmp_var (uint32_type_node, "group_local_offset");
|
||
|
tree group_local_offset_arg
|
||
|
= build2 (MODIFY_EXPR, uint32_type_node,
|
||
|
group_local_offset_temp,
|
||
|
build_int_cst (uint32_type_node,
|
||
|
m_parent->m_module_group_variables.size()));
|
||
|
|
||
|
/* Emit a launcher depending whether we converted the kernel function to
|
||
|
a work group function or not. */
|
||
|
if (m_is_wg_function)
|
||
|
phsail_launch_kernel_call
|
||
|
= call_builtin (builtin_decl_explicit (BUILT_IN_HSAIL_LAUNCH_WG_FUNC),
|
||
|
4, void_type_node,
|
||
|
ptr_type_node, kernel_func_ptr, restrict_void_ptr,
|
||
|
context_arg, restrict_char_ptr, group_base_addr_arg,
|
||
|
uint32_type_node, group_local_offset_arg);
|
||
|
else
|
||
|
phsail_launch_kernel_call
|
||
|
= call_builtin (builtin_decl_explicit (BUILT_IN_HSAIL_LAUNCH_KERNEL),
|
||
|
4, void_type_node,
|
||
|
ptr_type_node, kernel_func_ptr, restrict_void_ptr,
|
||
|
context_arg, restrict_char_ptr, group_base_addr_arg,
|
||
|
uint32_type_node, group_local_offset_arg);
|
||
|
|
||
|
append_to_statement_list_force (phsail_launch_kernel_call, &stmt_list);
|
||
|
|
||
|
emit_metadata (stmt_list);
|
||
|
|
||
|
set_externally_visible (launcher);
|
||
|
|
||
|
return launcher;
|
||
|
}
|
||
|
|
||
|
tree
|
||
|
brig_function::append_statement (tree stmt)
|
||
|
{
|
||
|
gcc_assert (m_func_decl != NULL);
|
||
|
|
||
|
tree bind_expr = m_current_bind_expr;
|
||
|
tree stmts = BIND_EXPR_BODY (bind_expr);
|
||
|
|
||
|
append_to_statement_list_force (stmt, &stmts);
|
||
|
return stmt;
|
||
|
}
|
||
|
|
||
|
/* Creates a new "alloca frame" for the current function by
|
||
|
injecting an alloca frame push in the beginning of the function
|
||
|
and an alloca frame pop before all function exit points. */
|
||
|
|
||
|
void
|
||
|
brig_function::create_alloca_frame ()
|
||
|
{
|
||
|
tree_stmt_iterator entry;
|
||
|
|
||
|
/* Adds the alloca push only after the ids have been initialized
|
||
|
in case of a kernel function. */
|
||
|
if (m_is_kernel)
|
||
|
entry = m_kernel_entry;
|
||
|
else
|
||
|
{
|
||
|
tree bind_expr = m_current_bind_expr;
|
||
|
tree stmts = BIND_EXPR_BODY (bind_expr);
|
||
|
entry = tsi_start (stmts);
|
||
|
}
|
||
|
|
||
|
tree push_frame_builtin = builtin_decl_explicit (BUILT_IN_HSAIL_PUSH_FRAME);
|
||
|
tree push_frame_call
|
||
|
= call_builtin (push_frame_builtin, 1, void_type_node, ptr_type_node,
|
||
|
m_context_arg);
|
||
|
|
||
|
tsi_link_before (&entry, push_frame_call, TSI_NEW_STMT);
|
||
|
|
||
|
tree pop_frame_builtin = builtin_decl_explicit (BUILT_IN_HSAIL_POP_FRAME);
|
||
|
|
||
|
do
|
||
|
{
|
||
|
tree stmt = tsi_stmt (entry);
|
||
|
if (TREE_CODE (stmt) == RETURN_EXPR)
|
||
|
{
|
||
|
tree pop_frame_call
|
||
|
= call_builtin (pop_frame_builtin, 1, void_type_node,
|
||
|
ptr_type_node, m_context_arg);
|
||
|
|
||
|
tsi_link_before (&entry, pop_frame_call, TSI_SAME_STMT);
|
||
|
}
|
||
|
tsi_next (&entry);
|
||
|
}
|
||
|
while (!tsi_end_p (entry));
|
||
|
}
|
||
|
|
||
|
/* Finishes the currently built function. After calling this, no new
|
||
|
statements should be appeneded to the function. */
|
||
|
void
|
||
|
brig_function::finish ()
|
||
|
{
|
||
|
append_return_stmt ();
|
||
|
|
||
|
/* Currently assume single alloca frame per WG. */
|
||
|
if (m_has_allocas)
|
||
|
create_alloca_frame ();
|
||
|
}
|
||
|
|
||
|
void
|
||
|
brig_function::finish_kernel ()
|
||
|
{
|
||
|
/* Kernel functions should have a single exit point.
|
||
|
Let's create one. The return instructions should have
|
||
|
been converted to branches to this label. */
|
||
|
append_statement (build_stmt (LABEL_EXPR, m_exit_label));
|
||
|
/* Attempt to convert the kernel to a work-group function that
|
||
|
executes all work-items of the WG using a loop. */
|
||
|
convert_to_wg_function ();
|
||
|
|
||
|
append_return_stmt ();
|
||
|
|
||
|
/* Currently assume single alloca frame per WG. */
|
||
|
if (m_has_allocas)
|
||
|
create_alloca_frame ();
|
||
|
}
|
||
|
|
||
|
void
|
||
|
brig_function::append_return_stmt ()
|
||
|
{
|
||
|
gcc_assert (m_current_bind_expr != NULL_TREE);
|
||
|
tree stmts = BIND_EXPR_BODY (m_current_bind_expr);
|
||
|
|
||
|
if (STATEMENT_LIST_TAIL (stmts) == NULL)
|
||
|
return; /* Empty function. */
|
||
|
|
||
|
tree last_stmt = tsi_stmt (tsi_last (stmts));
|
||
|
|
||
|
if (TREE_CODE (last_stmt) == RETURN_EXPR)
|
||
|
return;
|
||
|
|
||
|
if (m_ret_value != NULL_TREE)
|
||
|
{
|
||
|
tree result_assign
|
||
|
= build2 (MODIFY_EXPR, TREE_TYPE (m_ret_value), m_ret_value,
|
||
|
m_ret_temp);
|
||
|
|
||
|
tree return_expr
|
||
|
= build1 (RETURN_EXPR, TREE_TYPE (result_assign), result_assign);
|
||
|
append_to_statement_list_force (return_expr, &stmts);
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
tree return_stmt = build_stmt (RETURN_EXPR, NULL);
|
||
|
append_to_statement_list_force (return_stmt, &stmts);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
bool
|
||
|
brig_function::has_function_scope_var (const BrigBase* var) const
|
||
|
{
|
||
|
return m_function_scope_vars.find (var) != m_function_scope_vars.end ();
|
||
|
}
|
||
|
|
||
|
size_t
|
||
|
brig_function::group_variable_segment_offset (const std::string &name) const
|
||
|
{
|
||
|
if (m_local_group_variables.has_variable (name))
|
||
|
return m_local_group_variables.segment_offset (name);
|
||
|
|
||
|
gcc_assert (m_parent->m_module_group_variables.has_variable (name));
|
||
|
return m_parent->m_module_group_variables.segment_offset (name);
|
||
|
}
|
||
|
|
||
|
/* Try to expand the given builtin call to reuse a previously generated
|
||
|
variable, if possible. If not, just call the given builtin.
|
||
|
BRIG_OPCODE and BRIG_TYPE identify the builtin's BRIG opcode/type,
|
||
|
ARITH_TYPE its GENERIC type, and OPERANDS contains the builtin's
|
||
|
input operands. */
|
||
|
|
||
|
tree
|
||
|
brig_function::expand_or_call_builtin (BrigOpcode16_t brig_opcode,
|
||
|
BrigType16_t brig_type,
|
||
|
tree arith_type,
|
||
|
tree_stl_vec &operands)
|
||
|
{
|
||
|
if (needs_workitem_context_data (brig_opcode))
|
||
|
m_has_unexpanded_dp_builtins = true;
|
||
|
|
||
|
if (can_expand_builtin (brig_opcode))
|
||
|
return expand_builtin (brig_opcode, operands);
|
||
|
|
||
|
tree built_in
|
||
|
= get_builtin_for_hsa_opcode (arith_type, brig_opcode, brig_type);
|
||
|
|
||
|
if (!VECTOR_TYPE_P (TREE_TYPE (TREE_TYPE (built_in)))
|
||
|
&& arith_type != NULL_TREE && VECTOR_TYPE_P (arith_type)
|
||
|
&& brig_opcode != BRIG_OPCODE_LERP
|
||
|
&& brig_opcode != BRIG_OPCODE_PACKCVT
|
||
|
&& brig_opcode != BRIG_OPCODE_SAD
|
||
|
&& brig_opcode != BRIG_OPCODE_SADHI)
|
||
|
{
|
||
|
/* Call the scalar built-in for all elements in the vector. */
|
||
|
tree_stl_vec operand0_elements;
|
||
|
if (operands.size () > 0)
|
||
|
unpack (operands[0], operand0_elements);
|
||
|
|
||
|
tree_stl_vec operand1_elements;
|
||
|
if (operands.size () > 1)
|
||
|
unpack (operands[1], operand1_elements);
|
||
|
|
||
|
tree_stl_vec result_elements;
|
||
|
|
||
|
size_t element_count = gccbrig_type_vector_subparts (arith_type);
|
||
|
for (size_t i = 0; i < element_count; ++i)
|
||
|
{
|
||
|
tree_stl_vec call_operands;
|
||
|
if (operand0_elements.size () > 0)
|
||
|
call_operands.push_back (operand0_elements.at (i));
|
||
|
|
||
|
if (operand1_elements.size () > 0)
|
||
|
call_operands.push_back (operand1_elements.at (i));
|
||
|
|
||
|
result_elements.push_back
|
||
|
(expand_or_call_builtin (brig_opcode, brig_type,
|
||
|
TREE_TYPE (arith_type),
|
||
|
call_operands));
|
||
|
}
|
||
|
return pack (result_elements);
|
||
|
}
|
||
|
|
||
|
tree_stl_vec call_operands;
|
||
|
tree_stl_vec operand_types;
|
||
|
|
||
|
tree arg_type_chain = TYPE_ARG_TYPES (TREE_TYPE (built_in));
|
||
|
|
||
|
for (size_t i = 0; i < operands.size (); ++i)
|
||
|
{
|
||
|
tree operand_type = TREE_VALUE (arg_type_chain);
|
||
|
call_operands.push_back (convert (operand_type, operands[i]));
|
||
|
operand_types.push_back (operand_type);
|
||
|
arg_type_chain = TREE_CHAIN (arg_type_chain);
|
||
|
}
|
||
|
|
||
|
if (needs_workitem_context_data (brig_opcode))
|
||
|
{
|
||
|
call_operands.push_back (m_context_arg);
|
||
|
operand_types.push_back (ptr_type_node);
|
||
|
}
|
||
|
|
||
|
size_t operand_count = call_operands.size ();
|
||
|
|
||
|
call_operands.resize (4, NULL_TREE);
|
||
|
operand_types.resize (4, NULL_TREE);
|
||
|
for (size_t i = 0; i < operand_count; ++i)
|
||
|
call_operands.at (i) = build_resize_convert_view (operand_types.at (i),
|
||
|
call_operands.at (i));
|
||
|
|
||
|
tree fnptr = build_fold_addr_expr (built_in);
|
||
|
return build_call_array (TREE_TYPE (TREE_TYPE (built_in)), fnptr,
|
||
|
operand_count, &call_operands[0]);
|
||
|
}
|
||
|
|
||
|
/* Instead of calling a built-in function, use a more efficient mechanism
|
||
|
such as reuse a previously returned value known to be still valid, or
|
||
|
access the work-item context struct directly. This is beneficial especially
|
||
|
for the work-item identification related builtins as not having them as
|
||
|
unanalyzable black box calls can lead to more easily vectorizable parallel
|
||
|
loops for multi work-item work-groups. BRIG_OPCODE identifies the builtin
|
||
|
and OPERANDS store the operands. */
|
||
|
|
||
|
tree
|
||
|
brig_function::expand_builtin (BrigOpcode16_t brig_opcode,
|
||
|
tree_stl_vec &operands)
|
||
|
{
|
||
|
tree_stl_vec uint32_0 = tree_stl_vec (1, build_int_cst (uint32_type_node, 0));
|
||
|
|
||
|
tree_stl_vec uint32_1 = tree_stl_vec (1, build_int_cst (uint32_type_node, 1));
|
||
|
|
||
|
tree_stl_vec uint32_2 = tree_stl_vec (1, build_int_cst (uint32_type_node, 2));
|
||
|
|
||
|
if (brig_opcode == BRIG_OPCODE_WORKITEMFLATABSID)
|
||
|
{
|
||
|
tree id0 = expand_builtin (BRIG_OPCODE_WORKITEMABSID, uint32_0);
|
||
|
id0 = convert (uint64_type_node, id0);
|
||
|
|
||
|
tree id1 = expand_builtin (BRIG_OPCODE_WORKITEMABSID, uint32_1);
|
||
|
id1 = convert (uint64_type_node, id1);
|
||
|
|
||
|
tree id2 = expand_builtin (BRIG_OPCODE_WORKITEMABSID, uint32_2);
|
||
|
id2 = convert (uint64_type_node, id2);
|
||
|
|
||
|
tree max0 = convert (uint64_type_node, m_grid_size_vars[0]);
|
||
|
tree max1 = convert (uint64_type_node, m_grid_size_vars[1]);
|
||
|
|
||
|
tree id2_x_max0_x_max1 = build2 (MULT_EXPR, uint64_type_node, id2, max0);
|
||
|
id2_x_max0_x_max1
|
||
|
= build2 (MULT_EXPR, uint64_type_node, id2_x_max0_x_max1, max1);
|
||
|
|
||
|
tree id1_x_max0 = build2 (MULT_EXPR, uint64_type_node, id1, max0);
|
||
|
|
||
|
tree sum = build2 (PLUS_EXPR, uint64_type_node, id0, id1_x_max0);
|
||
|
sum = build2 (PLUS_EXPR, uint64_type_node, sum, id2_x_max0_x_max1);
|
||
|
|
||
|
return add_temp_var ("workitemflatabsid", sum);
|
||
|
}
|
||
|
else if (brig_opcode == BRIG_OPCODE_WORKITEMABSID)
|
||
|
{
|
||
|
HOST_WIDE_INT dim = int_constant_value (operands[0]);
|
||
|
return m_abs_id_vars[dim];
|
||
|
}
|
||
|
else if (brig_opcode == BRIG_OPCODE_WORKITEMFLATID)
|
||
|
{
|
||
|
|
||
|
tree wg_size_x = expand_builtin (BRIG_OPCODE_WORKGROUPSIZE, uint32_0);
|
||
|
tree wg_size_y = expand_builtin (BRIG_OPCODE_WORKGROUPSIZE, uint32_1);
|
||
|
tree z_x_wgsx_wgsy
|
||
|
= build2 (MULT_EXPR, uint32_type_node,
|
||
|
convert (uint32_type_node,
|
||
|
expand_builtin (BRIG_OPCODE_WORKITEMID, uint32_2)),
|
||
|
wg_size_x);
|
||
|
z_x_wgsx_wgsy = build2 (MULT_EXPR, uint32_type_node, z_x_wgsx_wgsy,
|
||
|
wg_size_y);
|
||
|
|
||
|
tree y_x_wgsx
|
||
|
= build2 (MULT_EXPR, uint32_type_node,
|
||
|
convert (uint32_type_node,
|
||
|
expand_builtin (BRIG_OPCODE_WORKITEMID, uint32_1)),
|
||
|
wg_size_x);
|
||
|
|
||
|
tree sum = build2 (PLUS_EXPR, uint32_type_node, y_x_wgsx, z_x_wgsx_wgsy);
|
||
|
sum = build2 (PLUS_EXPR, uint32_type_node,
|
||
|
convert (uint32_type_node,
|
||
|
expand_builtin (BRIG_OPCODE_WORKITEMID, uint32_0)),
|
||
|
sum);
|
||
|
return add_temp_var ("workitemflatid", sum);
|
||
|
}
|
||
|
else if (brig_opcode == BRIG_OPCODE_WORKGROUPSIZE)
|
||
|
{
|
||
|
HOST_WIDE_INT dim = int_constant_value (operands[0]);
|
||
|
if (flag_assume_phsa)
|
||
|
{
|
||
|
tree ptr_type = build_pointer_type (uint32_type_node);
|
||
|
tree ctx = build2 (MEM_REF, uint32_type_node, m_context_arg,
|
||
|
build_int_cst (ptr_type,
|
||
|
PHSA_CONTEXT_WG_SIZES
|
||
|
+ dim * 4));
|
||
|
std::string name ("wgsize_x");
|
||
|
name [name.length() - 1] += dim;
|
||
|
return add_temp_var (name.c_str(), ctx);
|
||
|
}
|
||
|
else if (m_is_kernel)
|
||
|
{
|
||
|
/* For kernels without phsa we generate certain temps before
|
||
|
the WI loop, which means we don't need to rely on LICM to get
|
||
|
them moved out. */
|
||
|
return m_wg_size_vars[dim];
|
||
|
}
|
||
|
else
|
||
|
gcc_unreachable ();
|
||
|
}
|
||
|
else if (brig_opcode == BRIG_OPCODE_WORKITEMID)
|
||
|
{
|
||
|
HOST_WIDE_INT dim = int_constant_value (operands[0]);
|
||
|
if (m_is_kernel)
|
||
|
{
|
||
|
return m_local_id_vars [dim];
|
||
|
}
|
||
|
else if (flag_assume_phsa)
|
||
|
{
|
||
|
tree ptr_type = build_pointer_type (uint32_type_node);
|
||
|
tree ctx = build2 (MEM_REF, uint32_type_node, m_context_arg,
|
||
|
build_int_cst (ptr_type,
|
||
|
PHSA_CONTEXT_OFFS_WI_IDS
|
||
|
+ dim * 4));
|
||
|
std::string name ("wiid_x");
|
||
|
name [name.length() - 1] += dim;
|
||
|
return add_temp_var (name.c_str(), ctx);
|
||
|
}
|
||
|
else
|
||
|
gcc_unreachable ();
|
||
|
}
|
||
|
else if (brig_opcode == BRIG_OPCODE_WORKGROUPID)
|
||
|
{
|
||
|
HOST_WIDE_INT dim = int_constant_value (operands[0]);
|
||
|
if (flag_assume_phsa)
|
||
|
{
|
||
|
tree ptr_type = build_pointer_type (uint32_type_node);
|
||
|
tree ctx = build2 (MEM_REF, uint32_type_node, m_context_arg,
|
||
|
build_int_cst (ptr_type,
|
||
|
PHSA_CONTEXT_OFFS_WG_IDS
|
||
|
+ dim * 4));
|
||
|
std::string name ("wgid_x");
|
||
|
name [name.length() - 1] += dim;
|
||
|
return add_temp_var (name.c_str(), ctx);
|
||
|
} else if (m_is_kernel)
|
||
|
return m_wg_id_vars [dim];
|
||
|
else
|
||
|
gcc_unreachable ();
|
||
|
}
|
||
|
else if (brig_opcode == BRIG_OPCODE_CURRENTWORKGROUPSIZE)
|
||
|
{
|
||
|
HOST_WIDE_INT dim = int_constant_value (operands[0]);
|
||
|
if (flag_assume_phsa)
|
||
|
{
|
||
|
tree ptr_type = build_pointer_type (uint32_type_node);
|
||
|
tree ctx = build2 (MEM_REF, uint32_type_node, m_context_arg,
|
||
|
build_int_cst (ptr_type,
|
||
|
PHSA_CONTEXT_CURRENT_WG_SIZES
|
||
|
+ dim * 4));
|
||
|
std::string name ("curwgsize_x");
|
||
|
name [name.length() - 1] += dim;
|
||
|
return add_temp_var (name.c_str(), ctx);
|
||
|
} else if (m_is_kernel)
|
||
|
return m_cur_wg_size_vars[dim];
|
||
|
else
|
||
|
gcc_unreachable ();
|
||
|
}
|
||
|
else
|
||
|
gcc_unreachable ();
|
||
|
|
||
|
return NULL_TREE;
|
||
|
}
|
||
|
|
||
|
/* Returns true in case the given opcode that would normally be generated
|
||
|
as a builtin call can be expanded to tree nodes. */
|
||
|
|
||
|
bool
|
||
|
brig_function::can_expand_builtin (BrigOpcode16_t brig_opcode) const
|
||
|
{
|
||
|
switch (brig_opcode)
|
||
|
{
|
||
|
case BRIG_OPCODE_CURRENTWORKGROUPSIZE:
|
||
|
case BRIG_OPCODE_WORKITEMFLATID:
|
||
|
case BRIG_OPCODE_WORKITEMID:
|
||
|
case BRIG_OPCODE_WORKGROUPID:
|
||
|
case BRIG_OPCODE_WORKGROUPSIZE:
|
||
|
return m_is_kernel || flag_assume_phsa;
|
||
|
case BRIG_OPCODE_WORKITEMFLATABSID:
|
||
|
case BRIG_OPCODE_WORKITEMABSID:
|
||
|
return m_is_kernel;
|
||
|
default:
|
||
|
return false;
|
||
|
};
|
||
|
}
|
||
|
|
||
|
/* In case the HSA instruction must be implemented using a builtin,
|
||
|
this function is called to get the correct builtin function.
|
||
|
TYPE is the instruction tree type, BRIG_OPCODE the opcode of the
|
||
|
brig instruction and BRIG_TYPE the brig instruction's type. */
|
||
|
|
||
|
tree
|
||
|
brig_function::get_builtin_for_hsa_opcode
|
||
|
(tree type, BrigOpcode16_t brig_opcode, BrigType16_t brig_type) const
|
||
|
{
|
||
|
tree builtin = NULL_TREE;
|
||
|
tree builtin_type = type;
|
||
|
|
||
|
/* For vector types, first find the scalar version of the builtin. */
|
||
|
if (type != NULL_TREE && VECTOR_TYPE_P (type))
|
||
|
builtin_type = TREE_TYPE (type);
|
||
|
BrigType16_t brig_inner_type = brig_type & BRIG_TYPE_BASE_MASK;
|
||
|
|
||
|
/* Some BRIG opcodes can use the same builtins for unsigned and
|
||
|
signed types. Force these cases to unsigned types. */
|
||
|
|
||
|
if (brig_opcode == BRIG_OPCODE_BORROW
|
||
|
|| brig_opcode == BRIG_OPCODE_CARRY
|
||
|
|| brig_opcode == BRIG_OPCODE_LASTBIT
|
||
|
|| brig_opcode == BRIG_OPCODE_BITINSERT)
|
||
|
{
|
||
|
if (brig_type == BRIG_TYPE_S32)
|
||
|
brig_type = BRIG_TYPE_U32;
|
||
|
else if (brig_type == BRIG_TYPE_S64)
|
||
|
brig_type = BRIG_TYPE_U64;
|
||
|
}
|
||
|
|
||
|
switch (brig_opcode)
|
||
|
{
|
||
|
case BRIG_OPCODE_FLOOR:
|
||
|
builtin = mathfn_built_in (builtin_type, BUILT_IN_FLOOR);
|
||
|
break;
|
||
|
case BRIG_OPCODE_CEIL:
|
||
|
builtin = mathfn_built_in (builtin_type, BUILT_IN_CEIL);
|
||
|
break;
|
||
|
case BRIG_OPCODE_SQRT:
|
||
|
case BRIG_OPCODE_NSQRT:
|
||
|
builtin = mathfn_built_in (builtin_type, BUILT_IN_SQRT);
|
||
|
break;
|
||
|
case BRIG_OPCODE_RINT:
|
||
|
builtin = mathfn_built_in (builtin_type, BUILT_IN_RINT);
|
||
|
break;
|
||
|
case BRIG_OPCODE_TRUNC:
|
||
|
builtin = mathfn_built_in (builtin_type, BUILT_IN_TRUNC);
|
||
|
break;
|
||
|
case BRIG_OPCODE_COPYSIGN:
|
||
|
builtin = mathfn_built_in (builtin_type, BUILT_IN_COPYSIGN);
|
||
|
break;
|
||
|
case BRIG_OPCODE_NSIN:
|
||
|
builtin = mathfn_built_in (builtin_type, BUILT_IN_SIN);
|
||
|
break;
|
||
|
case BRIG_OPCODE_NLOG2:
|
||
|
builtin = mathfn_built_in (builtin_type, BUILT_IN_LOG2);
|
||
|
break;
|
||
|
case BRIG_OPCODE_NEXP2:
|
||
|
builtin = mathfn_built_in (builtin_type, BUILT_IN_EXP2);
|
||
|
break;
|
||
|
case BRIG_OPCODE_FMA:
|
||
|
case BRIG_OPCODE_NFMA:
|
||
|
builtin = mathfn_built_in (builtin_type, BUILT_IN_FMA);
|
||
|
break;
|
||
|
case BRIG_OPCODE_NCOS:
|
||
|
builtin = mathfn_built_in (builtin_type, BUILT_IN_COS);
|
||
|
break;
|
||
|
case BRIG_OPCODE_POPCOUNT:
|
||
|
/* Popcount should be typed by its argument type (the return value
|
||
|
is always u32). Let's use a b64 version for also for b32 for now. */
|
||
|
return builtin_decl_explicit (BUILT_IN_POPCOUNTL);
|
||
|
case BRIG_OPCODE_BORROW:
|
||
|
/* Borrow uses the same builtin for unsigned and signed types. */
|
||
|
if (brig_type == BRIG_TYPE_S32 || brig_type == BRIG_TYPE_U32)
|
||
|
return builtin_decl_explicit (BUILT_IN_HSAIL_BORROW_U32);
|
||
|
else
|
||
|
return builtin_decl_explicit (BUILT_IN_HSAIL_BORROW_U64);
|
||
|
case BRIG_OPCODE_CARRY:
|
||
|
/* Carry also uses the same builtin for unsigned and signed types. */
|
||
|
if (brig_type == BRIG_TYPE_S32 || brig_type == BRIG_TYPE_U32)
|
||
|
return builtin_decl_explicit (BUILT_IN_HSAIL_CARRY_U32);
|
||
|
else
|
||
|
return builtin_decl_explicit (BUILT_IN_HSAIL_CARRY_U64);
|
||
|
default:
|
||
|
|
||
|
/* Use our builtin index for finding a proper builtin for the BRIG
|
||
|
opcode and BRIG type. This takes care most of the builtin cases,
|
||
|
the special cases are handled in the separate 'case' statements
|
||
|
above. */
|
||
|
builtin_map::const_iterator i
|
||
|
= s_custom_builtins.find (std::make_pair (brig_opcode, brig_type));
|
||
|
if (i != s_custom_builtins.end ())
|
||
|
return (*i).second;
|
||
|
|
||
|
if (brig_inner_type != brig_type)
|
||
|
{
|
||
|
/* Try to find a scalar built-in we could use. */
|
||
|
i = s_custom_builtins.find
|
||
|
(std::make_pair (brig_opcode, brig_inner_type));
|
||
|
if (i != s_custom_builtins.end ())
|
||
|
return (*i).second;
|
||
|
}
|
||
|
|
||
|
/* In case this is an fp16 operation that is promoted to fp32,
|
||
|
try to find a fp32 scalar built-in. */
|
||
|
if (brig_inner_type == BRIG_TYPE_F16)
|
||
|
{
|
||
|
i = s_custom_builtins.find
|
||
|
(std::make_pair (brig_opcode, BRIG_TYPE_F32));
|
||
|
if (i != s_custom_builtins.end ())
|
||
|
return (*i).second;
|
||
|
}
|
||
|
gcc_unreachable ();
|
||
|
}
|
||
|
|
||
|
if (VECTOR_TYPE_P (type) && builtin != NULL_TREE)
|
||
|
{
|
||
|
/* Try to find a vectorized version of the built-in.
|
||
|
TODO: properly assert that builtin is a mathfn builtin? */
|
||
|
tree vec_builtin
|
||
|
= targetm.vectorize.builtin_vectorized_function
|
||
|
(builtin_mathfn_code (builtin), type, type);
|
||
|
if (vec_builtin != NULL_TREE)
|
||
|
return vec_builtin;
|
||
|
else
|
||
|
return builtin;
|
||
|
}
|
||
|
if (builtin == NULL_TREE)
|
||
|
gcc_unreachable ();
|
||
|
return builtin;
|
||
|
}
|
||
|
|
||
|
/* Unpacks the elements of the vector in VALUE to scalars (bit field
|
||
|
references) in ELEMENTS. */
|
||
|
|
||
|
void
|
||
|
brig_function::unpack (tree value, tree_stl_vec &elements)
|
||
|
{
|
||
|
size_t vec_size = int_size_in_bytes (TREE_TYPE (value));
|
||
|
size_t element_size
|
||
|
= int_size_in_bytes (TREE_TYPE (TREE_TYPE (value))) * BITS_PER_UNIT;
|
||
|
size_t element_count
|
||
|
= vec_size * BITS_PER_UNIT / element_size;
|
||
|
|
||
|
tree input_element_type = TREE_TYPE (TREE_TYPE (value));
|
||
|
|
||
|
value = add_temp_var ("unpack_input", value);
|
||
|
|
||
|
for (size_t i = 0; i < element_count; ++i)
|
||
|
{
|
||
|
tree element
|
||
|
= build3 (BIT_FIELD_REF, input_element_type, value,
|
||
|
TYPE_SIZE (input_element_type),
|
||
|
bitsize_int(i * element_size));
|
||
|
|
||
|
element = add_temp_var ("scalar", element);
|
||
|
elements.push_back (element);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/* Pack the elements of the scalars in ELEMENTS to the returned vector. */
|
||
|
|
||
|
tree
|
||
|
brig_function::pack (tree_stl_vec &elements)
|
||
|
{
|
||
|
size_t element_count = elements.size ();
|
||
|
|
||
|
gcc_assert (element_count > 1);
|
||
|
|
||
|
tree output_element_type = TREE_TYPE (elements.at (0));
|
||
|
|
||
|
vec<constructor_elt, va_gc> *constructor_vals = NULL;
|
||
|
for (size_t i = 0; i < element_count; ++i)
|
||
|
CONSTRUCTOR_APPEND_ELT (constructor_vals, NULL_TREE, elements.at (i));
|
||
|
|
||
|
tree vec_type = build_vector_type (output_element_type, element_count);
|
||
|
|
||
|
/* build_constructor creates a vector type which is not a vector_cst
|
||
|
that requires compile time constant elements. */
|
||
|
tree vec = build_constructor (vec_type, constructor_vals);
|
||
|
|
||
|
/* Add a temp variable for readability. */
|
||
|
tree tmp_var = create_tmp_var (vec_type, "vec_out");
|
||
|
tree vec_tmp_assign = build2 (MODIFY_EXPR, TREE_TYPE (tmp_var), tmp_var, vec);
|
||
|
append_statement (vec_tmp_assign);
|
||
|
return tmp_var;
|
||
|
}
|
||
|
|
||
|
/* Returns true in case the given opcode needs to know about work-item context
|
||
|
data. In such case the context data is passed as a pointer to a work-item
|
||
|
context object, as the last argument in the builtin call. */
|
||
|
|
||
|
bool
|
||
|
brig_function::needs_workitem_context_data
|
||
|
(BrigOpcode16_t brig_opcode)
|
||
|
{
|
||
|
switch (brig_opcode)
|
||
|
{
|
||
|
case BRIG_OPCODE_WORKITEMABSID:
|
||
|
case BRIG_OPCODE_WORKITEMFLATABSID:
|
||
|
case BRIG_OPCODE_WORKITEMFLATID:
|
||
|
case BRIG_OPCODE_CURRENTWORKITEMFLATID:
|
||
|
case BRIG_OPCODE_WORKITEMID:
|
||
|
case BRIG_OPCODE_WORKGROUPID:
|
||
|
case BRIG_OPCODE_WORKGROUPSIZE:
|
||
|
case BRIG_OPCODE_CURRENTWORKGROUPSIZE:
|
||
|
case BRIG_OPCODE_GRIDGROUPS:
|
||
|
case BRIG_OPCODE_GRIDSIZE:
|
||
|
case BRIG_OPCODE_DIM:
|
||
|
case BRIG_OPCODE_PACKETID:
|
||
|
case BRIG_OPCODE_PACKETCOMPLETIONSIG:
|
||
|
case BRIG_OPCODE_BARRIER:
|
||
|
case BRIG_OPCODE_WAVEBARRIER:
|
||
|
case BRIG_OPCODE_ARRIVEFBAR:
|
||
|
case BRIG_OPCODE_INITFBAR:
|
||
|
case BRIG_OPCODE_JOINFBAR:
|
||
|
case BRIG_OPCODE_LEAVEFBAR:
|
||
|
case BRIG_OPCODE_RELEASEFBAR:
|
||
|
case BRIG_OPCODE_WAITFBAR:
|
||
|
case BRIG_OPCODE_CUID:
|
||
|
case BRIG_OPCODE_MAXCUID:
|
||
|
case BRIG_OPCODE_DEBUGTRAP:
|
||
|
case BRIG_OPCODE_GROUPBASEPTR:
|
||
|
case BRIG_OPCODE_KERNARGBASEPTR:
|
||
|
case BRIG_OPCODE_ALLOCA:
|
||
|
return true;
|
||
|
default:
|
||
|
return false;
|
||
|
};
|
||
|
}
|
||
|
|
||
|
/* Appends and returns a new temp variable and an accompanying assignment
|
||
|
statement that stores the value of the given EXPR and has the given NAME. */
|
||
|
|
||
|
tree
|
||
|
brig_function::add_temp_var (std::string name, tree expr)
|
||
|
{
|
||
|
tree temp_var = create_tmp_var (TREE_TYPE (expr), name.c_str ());
|
||
|
tree assign = build2 (MODIFY_EXPR, TREE_TYPE (temp_var), temp_var, expr);
|
||
|
append_statement (assign);
|
||
|
return temp_var;
|
||
|
}
|
||
|
|
||
|
/* Returns the integer constant value of the given node.
|
||
|
If it's a cast, looks into the source of the cast. */
|
||
|
|
||
|
HOST_WIDE_INT
|
||
|
brig_function::int_constant_value (tree node)
|
||
|
{
|
||
|
tree n = node;
|
||
|
if (TREE_CODE (n) == VIEW_CONVERT_EXPR)
|
||
|
n = TREE_OPERAND (n, 0);
|
||
|
return int_cst_value (n);
|
||
|
}
|
||
|
|
||
|
/* Returns the tree code that should be used to implement the given
|
||
|
HSA instruction opcode (BRIG_OPCODE) for the given type of instruction
|
||
|
(BRIG_TYPE). In case the opcode cannot be mapped to a TREE node directly,
|
||
|
returns TREE_LIST (if it can be emulated with a simple chain of tree
|
||
|
nodes) or CALL_EXPR if the opcode should be implemented using a builtin
|
||
|
call. */
|
||
|
|
||
|
tree_code
|
||
|
brig_function::get_tree_code_for_hsa_opcode
|
||
|
(BrigOpcode16_t brig_opcode, BrigType16_t brig_type)
|
||
|
{
|
||
|
BrigType16_t brig_inner_type = brig_type & BRIG_TYPE_BASE_MASK;
|
||
|
switch (brig_opcode)
|
||
|
{
|
||
|
case BRIG_OPCODE_NOP:
|
||
|
return NOP_EXPR;
|
||
|
case BRIG_OPCODE_ADD:
|
||
|
return PLUS_EXPR;
|
||
|
case BRIG_OPCODE_CMOV:
|
||
|
if (brig_inner_type == brig_type)
|
||
|
return COND_EXPR;
|
||
|
else
|
||
|
return VEC_COND_EXPR;
|
||
|
case BRIG_OPCODE_SUB:
|
||
|
return MINUS_EXPR;
|
||
|
case BRIG_OPCODE_MUL:
|
||
|
case BRIG_OPCODE_MUL24:
|
||
|
return MULT_EXPR;
|
||
|
case BRIG_OPCODE_MULHI:
|
||
|
case BRIG_OPCODE_MUL24HI:
|
||
|
return MULT_HIGHPART_EXPR;
|
||
|
case BRIG_OPCODE_DIV:
|
||
|
if (gccbrig_is_float_type (brig_inner_type))
|
||
|
return RDIV_EXPR;
|
||
|
else
|
||
|
return TRUNC_DIV_EXPR;
|
||
|
case BRIG_OPCODE_NEG:
|
||
|
return NEGATE_EXPR;
|
||
|
case BRIG_OPCODE_MIN:
|
||
|
if (gccbrig_is_float_type (brig_inner_type))
|
||
|
return CALL_EXPR;
|
||
|
else
|
||
|
return MIN_EXPR;
|
||
|
case BRIG_OPCODE_MAX:
|
||
|
if (gccbrig_is_float_type (brig_inner_type))
|
||
|
return CALL_EXPR;
|
||
|
else
|
||
|
return MAX_EXPR;
|
||
|
case BRIG_OPCODE_ABS:
|
||
|
return ABS_EXPR;
|
||
|
case BRIG_OPCODE_SHL:
|
||
|
return LSHIFT_EXPR;
|
||
|
case BRIG_OPCODE_SHR:
|
||
|
return RSHIFT_EXPR;
|
||
|
case BRIG_OPCODE_OR:
|
||
|
return BIT_IOR_EXPR;
|
||
|
case BRIG_OPCODE_XOR:
|
||
|
return BIT_XOR_EXPR;
|
||
|
case BRIG_OPCODE_AND:
|
||
|
return BIT_AND_EXPR;
|
||
|
case BRIG_OPCODE_NOT:
|
||
|
return BIT_NOT_EXPR;
|
||
|
case BRIG_OPCODE_RET:
|
||
|
return RETURN_EXPR;
|
||
|
case BRIG_OPCODE_MOV:
|
||
|
case BRIG_OPCODE_LDF:
|
||
|
return MODIFY_EXPR;
|
||
|
case BRIG_OPCODE_LD:
|
||
|
case BRIG_OPCODE_ST:
|
||
|
return MEM_REF;
|
||
|
case BRIG_OPCODE_BR:
|
||
|
return GOTO_EXPR;
|
||
|
case BRIG_OPCODE_REM:
|
||
|
if (brig_type == BRIG_TYPE_U64 || brig_type == BRIG_TYPE_U32)
|
||
|
return TRUNC_MOD_EXPR;
|
||
|
else
|
||
|
return CALL_EXPR;
|
||
|
case BRIG_OPCODE_NRCP:
|
||
|
case BRIG_OPCODE_NRSQRT:
|
||
|
/* Implement as 1/f (x). gcc should pattern detect that and
|
||
|
use a native instruction, if available, for it. */
|
||
|
return TREE_LIST;
|
||
|
case BRIG_OPCODE_FMA:
|
||
|
case BRIG_OPCODE_FLOOR:
|
||
|
case BRIG_OPCODE_CEIL:
|
||
|
case BRIG_OPCODE_SQRT:
|
||
|
case BRIG_OPCODE_NSQRT:
|
||
|
case BRIG_OPCODE_RINT:
|
||
|
case BRIG_OPCODE_TRUNC:
|
||
|
case BRIG_OPCODE_POPCOUNT:
|
||
|
case BRIG_OPCODE_COPYSIGN:
|
||
|
case BRIG_OPCODE_NCOS:
|
||
|
case BRIG_OPCODE_NSIN:
|
||
|
case BRIG_OPCODE_NLOG2:
|
||
|
case BRIG_OPCODE_NEXP2:
|
||
|
case BRIG_OPCODE_NFMA:
|
||
|
/* Class has type B1 regardless of the float type, thus
|
||
|
the below builtin map search cannot find it. */
|
||
|
case BRIG_OPCODE_CLASS:
|
||
|
case BRIG_OPCODE_WORKITEMABSID:
|
||
|
return CALL_EXPR;
|
||
|
default:
|
||
|
|
||
|
/* Some BRIG opcodes can use the same builtins for unsigned and
|
||
|
signed types. Force these cases to unsigned types.
|
||
|
*/
|
||
|
|
||
|
if (brig_opcode == BRIG_OPCODE_BORROW
|
||
|
|| brig_opcode == BRIG_OPCODE_CARRY
|
||
|
|| brig_opcode == BRIG_OPCODE_LASTBIT
|
||
|
|| brig_opcode == BRIG_OPCODE_BITINSERT)
|
||
|
{
|
||
|
if (brig_type == BRIG_TYPE_S32)
|
||
|
brig_type = BRIG_TYPE_U32;
|
||
|
else if (brig_type == BRIG_TYPE_S64)
|
||
|
brig_type = BRIG_TYPE_U64;
|
||
|
}
|
||
|
|
||
|
|
||
|
builtin_map::const_iterator i
|
||
|
= s_custom_builtins.find (std::make_pair (brig_opcode, brig_type));
|
||
|
if (i != s_custom_builtins.end ())
|
||
|
return CALL_EXPR;
|
||
|
else if (s_custom_builtins.find
|
||
|
(std::make_pair (brig_opcode, brig_inner_type))
|
||
|
!= s_custom_builtins.end ())
|
||
|
return CALL_EXPR;
|
||
|
if (brig_inner_type == BRIG_TYPE_F16
|
||
|
&& s_custom_builtins.find
|
||
|
(std::make_pair (brig_opcode, BRIG_TYPE_F32))
|
||
|
!= s_custom_builtins.end ())
|
||
|
return CALL_EXPR;
|
||
|
break;
|
||
|
}
|
||
|
return TREE_LIST; /* Emulate using a chain of nodes. */
|
||
|
}
|
||
|
|
||
|
/* Inform of an update to the REG_VAR. */
|
||
|
|
||
|
void
|
||
|
brig_function::add_reg_var_update (tree reg_var, tree var)
|
||
|
{
|
||
|
if (var == m_abs_id_vars[0] || var == m_abs_id_vars[1]
|
||
|
|| var == m_abs_id_vars[2] || var == m_local_id_vars[0]
|
||
|
|| var == m_local_id_vars[1] || var == m_local_id_vars[2])
|
||
|
m_id_val_defs [reg_var] = var;
|
||
|
else
|
||
|
{
|
||
|
/* Possible overwrite of an ID value. */
|
||
|
|
||
|
id_val_map::iterator i = m_id_val_defs.find (reg_var);
|
||
|
if (i != m_id_val_defs.end())
|
||
|
m_id_val_defs.erase (i);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/* If the REG_VAR is known to contain an ID value at this point in
|
||
|
the basic block, return true. */
|
||
|
|
||
|
bool
|
||
|
brig_function::is_id_val (tree reg_var)
|
||
|
{
|
||
|
id_val_map::iterator i = m_id_val_defs.find (reg_var);
|
||
|
return i != m_id_val_defs.end();
|
||
|
}
|
||
|
|
||
|
/* Return an ID value for the given REG_VAR if its known to contain
|
||
|
one at this point in the BB, NULL_TREE otherwise. */
|
||
|
|
||
|
tree
|
||
|
brig_function::id_val (tree reg_var)
|
||
|
{
|
||
|
id_val_map::iterator i = m_id_val_defs.find (reg_var);
|
||
|
if (i != m_id_val_defs.end())
|
||
|
return (*i).second;
|
||
|
else
|
||
|
return NULL_TREE;
|
||
|
}
|
||
|
|
||
|
/* Informs of starting a new basic block. Called when generating
|
||
|
a label, a call, a jump, or a return. */
|
||
|
|
||
|
void
|
||
|
brig_function::start_new_bb ()
|
||
|
{
|
||
|
m_id_val_defs.clear ();
|
||
|
}
|