Discussion:
[Mesa-dev] [PATCH 00/38] radv,
Rhys Perry
2018-12-07 17:21:53 UTC
Permalink
This series add support for:
- VK_KHR_shader_float16_int8
- VK_AMD_gpu_shader_half_float
- VK_AMD_gpu_shader_int16
- VK_KHR_8bit_storage
on VI+. Half floats are currently disabled on LLVM 7 because of a bug
causing large memory usage and long (or unbounded) compilation times with
some tests.

It depends on the follow patch series:
- https://patchwork.freedesktop.org/series/53454/
- https://patchwork.freedesktop.org/series/53602/
- https://patchwork.freedesktop.org/series/53660/

An older version was tested on my Polaris card, but due to hardware issues
I currently can't test the latest version of the series.

deqp-vk has no regressions and none of the newly enabled tests fail.

Rhys Perry (38):
ac: add various helpers for float16/int16/int8
ac/nir: implement 8-bit push constant, ssbo and ubo loads
ac/nir: implement 8-bit ssbo stores
ac/nir: fix 16-bit ssbo stores
ac/nir: implement 8-bit nir_load_const_instr
ac/nir: implement 8-bit conversions
ac/nir: fix 64-bit nir_op_f2f16_rtz
ac/nir: make ac_build_clamp work on all bit sizes
ac/nir: make ac_build_fract work on all bit sizes
ac/nir: make ac_build_isign work on all bit sizes
ac/nir: make ac_build_fsign work on all bit sizes
ac/nir: make ac_build_fdiv support 16-bit floats
ac/nir: implement half-float nir_op_frcp
ac/nir: implement half-float nir_op_frsq
ac/nir: implement half-float nir_op_ldexp
radv: lower 16-bit flrp
ac/nir: support half floats in emit_b2f
ac/nir: make emit_b2i work on all bit sizes
ac/nir: implement 16-bit shifts
compiler/nir: add lowering option for 16-bit ffma
ac/nir: implement 16-bit ac_build_ddxy
ac/nir: implement 8 and 16 bit ac_build_readlane
nir: make bitfield_reverse and ifind_msb work with all integers
ac/nir: make ac_find_lsb work on all bit sizes
ac/nir: make ac_build_umsb work on all bit sizes
ac/nir: implement 8 and 16 bit ac_build_imsb
ac/nir: make ac_build_bit_count work on all bit sizes
ac/nir: make ac_build_bitfield_reverse work on all bit sizes
ac/nir: implement 16-bit pack/unpack opcodes
ac/nir: add 8-bit and 16-bit types to glsl_base_to_llvm_type
ac/nir,radv: create an array of varying output types
ac/nir: store all outputs as f32
radv: store all fragment shader inputs as f32
radv: handle all fragment output types
ac,radv: run LLVM's SLP vectorizer
ac/nir: generate better code for nir_op_f2f16_rtz
ac/nir: have nir_op_f2f16 round to zero
radv: expose float16, int16 and int8 features and extensions

src/amd/common/ac_llvm_build.c | 355 ++++++++++++++------------
src/amd/common/ac_llvm_build.h | 22 +-
src/amd/common/ac_llvm_util.c | 9 +-
src/amd/common/ac_llvm_util.h | 1 +
src/amd/common/ac_nir_to_llvm.c | 258 +++++++++++++++----
src/amd/common/ac_shader_abi.h | 1 +
src/amd/vulkan/radv_device.c | 17 ++
src/amd/vulkan/radv_extensions.py | 4 +
src/amd/vulkan/radv_nir_to_llvm.c | 92 ++++---
src/amd/vulkan/radv_shader.c | 7 +
src/broadcom/compiler/nir_to_vir.c | 1 +
src/compiler/nir/nir.h | 1 +
src/compiler/nir/nir_opcodes.py | 4 +-
src/compiler/nir/nir_opt_algebraic.py | 4 +-
src/gallium/drivers/radeonsi/si_get.c | 1 +
src/gallium/drivers/vc4/vc4_program.c | 1 +
16 files changed, 516 insertions(+), 262 deletions(-)
--
2.19.2
Rhys Perry
2018-12-07 17:21:54 UTC
Permalink
Signed-off-by: Rhys Perry <***@gmail.com>
---
src/amd/common/ac_llvm_build.c | 123 ++++++++++++++++++++++++++++++--
src/amd/common/ac_llvm_build.h | 22 +++++-
src/amd/common/ac_nir_to_llvm.c | 30 ++++----
3 files changed, 154 insertions(+), 21 deletions(-)

diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index 154cc696a2..cc7c6da5a4 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -87,12 +87,16 @@ ac_llvm_context_init(struct ac_llvm_context *ctx,
ctx->v4f32 = LLVMVectorType(ctx->f32, 4);
ctx->v8i32 = LLVMVectorType(ctx->i32, 8);

+ ctx->i8_0 = LLVMConstInt(ctx->i8, 0, false);
+ ctx->i8_1 = LLVMConstInt(ctx->i8, 1, false);
ctx->i16_0 = LLVMConstInt(ctx->i16, 0, false);
ctx->i16_1 = LLVMConstInt(ctx->i16, 1, false);
ctx->i32_0 = LLVMConstInt(ctx->i32, 0, false);
ctx->i32_1 = LLVMConstInt(ctx->i32, 1, false);
ctx->i64_0 = LLVMConstInt(ctx->i64, 0, false);
ctx->i64_1 = LLVMConstInt(ctx->i64, 1, false);
+ ctx->f16_0 = LLVMConstReal(ctx->f16, 0.0);
+ ctx->f16_1 = LLVMConstReal(ctx->f16, 1.0);
ctx->f32_0 = LLVMConstReal(ctx->f32, 0.0);
ctx->f32_1 = LLVMConstReal(ctx->f32, 1.0);
ctx->f64_0 = LLVMConstReal(ctx->f64, 0.0);
@@ -201,7 +205,9 @@ ac_get_type_size(LLVMTypeRef type)

static LLVMTypeRef to_integer_type_scalar(struct ac_llvm_context *ctx, LLVMTypeRef t)
{
- if (t == ctx->f16 || t == ctx->i16)
+ if (t == ctx->i8)
+ return ctx->i8;
+ else if (t == ctx->f16 || t == ctx->i16)
return ctx->i16;
else if (t == ctx->f32 || t == ctx->i32)
return ctx->i32;
@@ -268,6 +274,110 @@ ac_to_float(struct ac_llvm_context *ctx, LLVMValueRef v)
return LLVMBuildBitCast(ctx->builder, v, ac_to_float_type(ctx, type), "");
}

+LLVMValueRef ac_get_zerof(struct ac_llvm_context *ctx, LLVMTypeRef t)
+{
+ if (t == ctx->f16)
+ return ctx->f16_0;
+ else if (t == ctx->f32)
+ return ctx->f32_0;
+ else if (t == ctx->f64)
+ return ctx->f64_0;
+ else
+ unreachable("Unhandled float size");
+}
+
+LLVMValueRef ac_get_onef(struct ac_llvm_context *ctx, LLVMTypeRef t)
+{
+ if (t == ctx->f16)
+ return ctx->f16_1;
+ else if (t == ctx->f32)
+ return ctx->f32_1;
+ else if (t == ctx->f64)
+ return ctx->f64_1;
+ else
+ unreachable("Unhandled float size");
+}
+
+LLVMValueRef ac_get_zero(struct ac_llvm_context *ctx, LLVMTypeRef t)
+{
+ if (t == ctx->i8)
+ return ctx->i8_0;
+ else if (t == ctx->i16)
+ return ctx->i16_0;
+ else if (t == ctx->i32)
+ return ctx->i32_0;
+ else if (t == ctx->i64)
+ return ctx->i64_0;
+ else
+ unreachable("Unhandled bit size");
+}
+
+LLVMValueRef ac_get_one(struct ac_llvm_context *ctx, LLVMTypeRef t)
+{
+ if (t == ctx->i8)
+ return ctx->i8_1;
+ else if (t == ctx->i16)
+ return ctx->i16_1;
+ else if (t == ctx->i32)
+ return ctx->i32_1;
+ else if (t == ctx->i64)
+ return ctx->i64_1;
+ else
+ unreachable("Unhandled bit size");
+}
+
+LLVMTypeRef ac_float_of_size(struct ac_llvm_context *ctx, unsigned bit_size)
+{
+ switch (bit_size) {
+ case 16:
+ return ctx->f16;
+ case 32:
+ return ctx->f32;
+ case 64:
+ return ctx->f64;
+ default:
+ unreachable("Unhandled bit size");
+ }
+}
+
+LLVMTypeRef ac_int_of_size(struct ac_llvm_context *ctx, unsigned bit_size)
+{
+ switch (bit_size) {
+ case 8:
+ return ctx->i8;
+ case 16:
+ return ctx->i16;
+ case 32:
+ return ctx->i32;
+ case 64:
+ return ctx->i64;
+ default:
+ unreachable("Unhandled bit size");
+ }
+}
+
+LLVMValueRef ac_build_ui_cast(struct ac_llvm_context *ctx, LLVMValueRef v, LLVMTypeRef t)
+{
+ unsigned new_bit_size = ac_get_elem_bits(ctx, t);
+ unsigned old_bit_size = ac_get_elem_bits(ctx, LLVMTypeOf(v));
+ if (new_bit_size > old_bit_size)
+ return LLVMBuildZExt(ctx->builder, v, t, "");
+ else if (new_bit_size < old_bit_size)
+ return LLVMBuildTrunc(ctx->builder, v, t, "");
+ else
+ return v;
+}
+
+LLVMValueRef ac_build_reinterpret(struct ac_llvm_context *ctx, LLVMValueRef v, LLVMTypeRef t)
+{
+ if (LLVMTypeOf(v) == t)
+ return v;
+
+ v = ac_to_integer(ctx, v);
+ v = ac_build_ui_cast(ctx, v, ac_to_integer_type(ctx, t));
+ return LLVMBuildBitCast(ctx->builder, v, t, "");
+}
+

LLVMValueRef
ac_build_intrinsic(struct ac_llvm_context *ctx, const char *name,
@@ -1309,15 +1419,18 @@ LLVMValueRef ac_build_buffer_load_format_gfx9_safe(struct ac_llvm_context *ctx,
}

LLVMValueRef
-ac_build_tbuffer_load_short(struct ac_llvm_context *ctx,
+ac_build_tbuffer_load_short_byte(struct ac_llvm_context *ctx,
LLVMValueRef rsrc,
LLVMValueRef vindex,
LLVMValueRef voffset,
LLVMValueRef soffset,
LLVMValueRef immoffset,
- LLVMValueRef glc)
+ LLVMValueRef glc,
+ unsigned size)
{
+ assert(size == 1 || size == 2);
const char *name = "llvm.amdgcn.tbuffer.load.i32";
+ int data_format = size == 1 ? V_008F0C_BUF_DATA_FORMAT_8 : V_008F0C_BUF_DATA_FORMAT_16;
LLVMTypeRef type = ctx->i32;
LLVMValueRef params[] = {
rsrc,
@@ -1325,13 +1438,13 @@ ac_build_tbuffer_load_short(struct ac_llvm_context *ctx,
voffset,
soffset,
immoffset,
- LLVMConstInt(ctx->i32, V_008F0C_BUF_DATA_FORMAT_16, false),
+ LLVMConstInt(ctx->i32, data_format, false),
LLVMConstInt(ctx->i32, V_008F0C_BUF_NUM_FORMAT_UINT, false),
glc,
ctx->i1false,
};
LLVMValueRef res = ac_build_intrinsic(ctx, name, type, params, 9, 0);
- return LLVMBuildTrunc(ctx->builder, res, ctx->i16, "");
+ return LLVMBuildTrunc(ctx->builder, res, ac_int_of_size(ctx, size * 8), "");
}

/**
diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h
index e90c8c21ad..34622bda10 100644
--- a/src/amd/common/ac_llvm_build.h
+++ b/src/amd/common/ac_llvm_build.h
@@ -76,12 +76,16 @@ struct ac_llvm_context {
LLVMTypeRef v4f32;
LLVMTypeRef v8i32;

+ LLVMValueRef i8_0;
+ LLVMValueRef i8_1;
LLVMValueRef i16_0;
LLVMValueRef i16_1;
LLVMValueRef i32_0;
LLVMValueRef i32_1;
LLVMValueRef i64_0;
LLVMValueRef i64_1;
+ LLVMValueRef f16_0;
+ LLVMValueRef f16_1;
LLVMValueRef f32_0;
LLVMValueRef f32_1;
LLVMValueRef f64_0;
@@ -132,6 +136,19 @@ LLVMValueRef ac_to_integer_or_pointer(struct ac_llvm_context *ctx, LLVMValueRef
LLVMTypeRef ac_to_float_type(struct ac_llvm_context *ctx, LLVMTypeRef t);
LLVMValueRef ac_to_float(struct ac_llvm_context *ctx, LLVMValueRef v);

+LLVMValueRef ac_get_zerof(struct ac_llvm_context *ctx, LLVMTypeRef t);
+LLVMValueRef ac_get_onef(struct ac_llvm_context *ctx, LLVMTypeRef t);
+
+LLVMValueRef ac_get_zero(struct ac_llvm_context *ctx, LLVMTypeRef t);
+LLVMValueRef ac_get_one(struct ac_llvm_context *ctx, LLVMTypeRef t);
+
+LLVMTypeRef ac_float_of_size(struct ac_llvm_context *ctx, unsigned bit_size);
+LLVMTypeRef ac_int_of_size(struct ac_llvm_context *ctx, unsigned bit_size);
+
+LLVMValueRef ac_build_ui_cast(struct ac_llvm_context *ctx, LLVMValueRef v, LLVMTypeRef t);
+
+LLVMValueRef ac_build_reinterpret(struct ac_llvm_context *ctx, LLVMValueRef v, LLVMTypeRef t);
+
LLVMValueRef
ac_build_intrinsic(struct ac_llvm_context *ctx, const char *name,
LLVMTypeRef return_type, LLVMValueRef *params,
@@ -290,13 +307,14 @@ LLVMValueRef ac_build_buffer_load_format_gfx9_safe(struct ac_llvm_context *ctx,
bool can_speculate);

LLVMValueRef
-ac_build_tbuffer_load_short(struct ac_llvm_context *ctx,
+ac_build_tbuffer_load_short_byte(struct ac_llvm_context *ctx,
LLVMValueRef rsrc,
LLVMValueRef vindex,
LLVMValueRef voffset,
LLVMValueRef soffset,
LLVMValueRef immoffset,
- LLVMValueRef glc);
+ LLVMValueRef glc,
+ unsigned size);

LLVMValueRef
ac_get_thread_id(struct ac_llvm_context *ctx);
diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 4a4c09cf5f..2e9fd7b689 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -1652,13 +1652,14 @@ static LLVMValueRef visit_load_buffer(struct ac_nir_context *ctx,

LLVMValueRef ret;
if (load_bytes == 2) {
- ret = ac_build_tbuffer_load_short(&ctx->ac,
- rsrc,
- vindex,
- offset,
- ctx->ac.i32_0,
- immoffset,
- glc);
+ ret = ac_build_tbuffer_load_short_byte(&ctx->ac,
+ rsrc,
+ vindex,
+ offset,
+ ctx->ac.i32_0,
+ immoffset,
+ glc,
+ 2);
} else {
const char *load_name;
LLVMTypeRef data_type;
@@ -1723,13 +1724,14 @@ static LLVMValueRef visit_load_ubo_buffer(struct ac_nir_context *ctx,
if (instr->dest.ssa.bit_size == 16) {
LLVMValueRef results[num_components];
for (unsigned i = 0; i < num_components; ++i) {
- results[i] = ac_build_tbuffer_load_short(&ctx->ac,
- rsrc,
- ctx->ac.i32_0,
- offset,
- ctx->ac.i32_0,
- LLVMConstInt(ctx->ac.i32, 2 * i, 0),
- ctx->ac.i1false);
+ results[i] = ac_build_tbuffer_load_short_byte(&ctx->ac,
+ rsrc,
+ ctx->ac.i32_0,
+ offset,
+ ctx->ac.i32_0,
+ LLVMConstInt(ctx->ac.i32, 2 * i, 0),
+ ctx->ac.i1false,
+ 2);
}
ret = ac_build_gather_values(&ctx->ac, results, num_components);
} else {
--
2.19.2
Rhys Perry
2018-12-07 17:21:56 UTC
Permalink
Signed-off-by: Rhys Perry <***@gmail.com>
---
src/amd/common/ac_nir_to_llvm.c | 22 ++++++++++++++++++++--
1 file changed, 20 insertions(+), 2 deletions(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 8910dabb3d..31fb77290c 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -1497,7 +1497,7 @@ static void visit_store_ssbo(struct ac_nir_context *ctx,

LLVMValueRef rsrc = ctx->abi->load_ssbo(ctx->abi,
get_src(ctx, instr->src[1]), true);
- LLVMValueRef base_data = ac_to_float(&ctx->ac, src_data);
+ LLVMValueRef base_data = src_data;
base_data = ac_trim_vector(&ctx->ac, base_data, instr->num_components);
LLVMValueRef base_offset = get_src(ctx, instr->src[2]);

@@ -1538,7 +1538,25 @@ static void visit_store_ssbo(struct ac_nir_context *ctx,
offset = LLVMBuildAdd(ctx->ac.builder, base_offset,
LLVMConstInt(ctx->ac.i32, start * elem_size_bytes, false), "");
}
- if (num_bytes == 2) {
+ if (num_bytes == 1) {
+ store_name = "llvm.amdgcn.tbuffer.store.i32";
+ data_type = ctx->ac.i32;
+ data = LLVMBuildZExt(ctx->ac.builder, data, data_type, "");
+ LLVMValueRef tbuffer_params[] = {
+ data,
+ rsrc,
+ ctx->ac.i32_0, /* vindex */
+ offset, /* voffset */
+ ctx->ac.i32_0,
+ ctx->ac.i32_0,
+ LLVMConstInt(ctx->ac.i32, 1, false), // dfmt (= 8bit)
+ LLVMConstInt(ctx->ac.i32, 4, false), // nfmt (= uint)
+ glc,
+ ctx->ac.i1false,
+ };
+ ac_build_intrinsic(&ctx->ac, store_name,
+ ctx->ac.voidt, tbuffer_params, 10, 0);
+ } else if (num_bytes == 2) {
store_name = "llvm.amdgcn.tbuffer.store.i32";
data_type = ctx->ac.i32;
LLVMValueRef tbuffer_params[] = {
--
2.19.2
Rhys Perry
2018-12-07 17:21:57 UTC
Permalink
Signed-off-by: Rhys Perry <***@gmail.com>
---
src/amd/common/ac_nir_to_llvm.c | 2 ++
1 file changed, 2 insertions(+)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 31fb77290c..535a47d790 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -1559,6 +1559,8 @@ static void visit_store_ssbo(struct ac_nir_context *ctx,
} else if (num_bytes == 2) {
store_name = "llvm.amdgcn.tbuffer.store.i32";
data_type = ctx->ac.i32;
+ data = LLVMBuildBitCast(ctx->ac.builder, data, ctx->ac.i16, "");
+ data = LLVMBuildZExt(ctx->ac.builder, data, data_type, "");
LLVMValueRef tbuffer_params[] = {
data,
rsrc,
--
2.19.2
Rhys Perry
2018-12-07 17:21:58 UTC
Permalink
Signed-off-by: Rhys Perry <***@gmail.com>
---
src/amd/common/ac_nir_to_llvm.c | 4 ++++
1 file changed, 4 insertions(+)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 535a47d790..6d0d2cbd55 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -1110,6 +1110,10 @@ static void visit_load_const(struct ac_nir_context *ctx,

for (unsigned i = 0; i < instr->def.num_components; ++i) {
switch (instr->def.bit_size) {
+ case 8:
+ values[i] = LLVMConstInt(element_type,
+ instr->value.u8[i], false);
+ break;
case 16:
values[i] = LLVMConstInt(element_type,
instr->value.u16[i], false);
--
2.19.2
Rhys Perry
2018-12-07 17:21:55 UTC
Permalink
Signed-off-by: Rhys Perry <***@gmail.com>
---
src/amd/common/ac_nir_to_llvm.c | 37 +++++++++++++++++++++++++++------
1 file changed, 31 insertions(+), 6 deletions(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 2e9fd7b689..8910dabb3d 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -1395,7 +1395,30 @@ static LLVMValueRef visit_load_push_constant(struct ac_nir_context *ctx,

ptr = ac_build_gep0(&ctx->ac, ctx->abi->push_constants, addr);

- if (instr->dest.ssa.bit_size == 16) {
+ if (instr->dest.ssa.bit_size == 8) {
+ unsigned load_dwords = instr->dest.ssa.num_components > 1 ? 2 : 1;
+ LLVMTypeRef vec_type = LLVMVectorType(LLVMInt8TypeInContext(ctx->ac.context), 4 * load_dwords);
+ ptr = ac_cast_ptr(&ctx->ac, ptr, vec_type);
+ LLVMValueRef res = LLVMBuildLoad(ctx->ac.builder, ptr, "");
+
+ LLVMValueRef params[3];
+ if (load_dwords > 1) {
+ LLVMValueRef res_vec = LLVMBuildBitCast(ctx->ac.builder, res, LLVMVectorType(ctx->ac.i32, 2), "");
+ params[0] = LLVMBuildExtractElement(ctx->ac.builder, res_vec, LLVMConstInt(ctx->ac.i32, 1, false), "");
+ params[1] = LLVMBuildExtractElement(ctx->ac.builder, res_vec, LLVMConstInt(ctx->ac.i32, 0, false), "");
+ } else {
+ res = LLVMBuildBitCast(ctx->ac.builder, res, ctx->ac.i32, "");
+ params[0] = ctx->ac.i32_0;
+ params[1] = res;
+ }
+ params[2] = addr;
+ res = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.alignbyte", ctx->ac.i32, params, 3, 0);
+
+ res = LLVMBuildTrunc(ctx->ac.builder, res, LLVMIntTypeInContext(ctx->ac.context, instr->dest.ssa.num_components * 8), "");
+ if (instr->dest.ssa.num_components > 1)
+ res = LLVMBuildBitCast(ctx->ac.builder, res, LLVMVectorType(LLVMInt8TypeInContext(ctx->ac.context), instr->dest.ssa.num_components), "");
+ return res;
+ } else if (instr->dest.ssa.bit_size == 16) {
unsigned load_dwords = instr->dest.ssa.num_components / 2 + 1;
LLVMTypeRef vec_type = LLVMVectorType(LLVMInt16TypeInContext(ctx->ac.context), 2 * load_dwords);
ptr = ac_cast_ptr(&ctx->ac, ptr, vec_type);
@@ -1651,7 +1674,7 @@ static LLVMValueRef visit_load_buffer(struct ac_nir_context *ctx,
LLVMValueRef immoffset = LLVMConstInt(ctx->ac.i32, i * elem_size_bytes, false);

LLVMValueRef ret;
- if (load_bytes == 2) {
+ if (load_bytes <= 2) {
ret = ac_build_tbuffer_load_short_byte(&ctx->ac,
rsrc,
vindex,
@@ -1659,7 +1682,7 @@ static LLVMValueRef visit_load_buffer(struct ac_nir_context *ctx,
ctx->ac.i32_0,
immoffset,
glc,
- 2);
+ load_bytes);
} else {
const char *load_name;
LLVMTypeRef data_type;
@@ -1675,6 +1698,7 @@ static LLVMValueRef visit_load_buffer(struct ac_nir_context *ctx,
data_type = ctx->ac.v2f32;
break;
case 4:
+ case 3:
load_name = "llvm.amdgcn.buffer.load.f32";
data_type = ctx->ac.f32;
break;
@@ -1721,7 +1745,8 @@ static LLVMValueRef visit_load_ubo_buffer(struct ac_nir_context *ctx,
if (instr->dest.ssa.bit_size == 64)
num_components *= 2;

- if (instr->dest.ssa.bit_size == 16) {
+ if (instr->dest.ssa.bit_size == 16 || instr->dest.ssa.bit_size == 8) {
+ unsigned size = instr->dest.ssa.bit_size / 8;
LLVMValueRef results[num_components];
for (unsigned i = 0; i < num_components; ++i) {
results[i] = ac_build_tbuffer_load_short_byte(&ctx->ac,
@@ -1729,9 +1754,9 @@ static LLVMValueRef visit_load_ubo_buffer(struct ac_nir_context *ctx,
ctx->ac.i32_0,
offset,
ctx->ac.i32_0,
- LLVMConstInt(ctx->ac.i32, 2 * i, 0),
+ LLVMConstInt(ctx->ac.i32, size * i, 0),
ctx->ac.i1false,
- 2);
+ size);
}
ret = ac_build_gather_values(&ctx->ac, results, num_components);
} else {
--
2.19.2
Rhys Perry
2018-12-07 17:22:01 UTC
Permalink
Signed-off-by: Rhys Perry <***@gmail.com>
---
src/amd/common/ac_llvm_build.c | 13 +++++++++----
1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index cc7c6da5a4..1ef28323d1 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -1707,16 +1707,20 @@ ac_build_umsb(struct ac_llvm_context *ctx,
LLVMValueRef ac_build_fmin(struct ac_llvm_context *ctx, LLVMValueRef a,
LLVMValueRef b)
{
+ char intr[64];
+ snprintf(intr, sizeof(intr), "llvm.minnum.f%d", ac_get_elem_bits(ctx, LLVMTypeOf(a)));
LLVMValueRef args[2] = {a, b};
- return ac_build_intrinsic(ctx, "llvm.minnum.f32", ctx->f32, args, 2,
+ return ac_build_intrinsic(ctx, intr, LLVMTypeOf(a), args, 2,
AC_FUNC_ATTR_READNONE);
}

LLVMValueRef ac_build_fmax(struct ac_llvm_context *ctx, LLVMValueRef a,
LLVMValueRef b)
{
+ char intr[64];
+ snprintf(intr, sizeof(intr), "llvm.maxnum.f%d", ac_get_elem_bits(ctx, LLVMTypeOf(a)));
LLVMValueRef args[2] = {a, b};
- return ac_build_intrinsic(ctx, "llvm.maxnum.f32", ctx->f32, args, 2,
+ return ac_build_intrinsic(ctx, intr, LLVMTypeOf(a), args, 2,
AC_FUNC_ATTR_READNONE);
}

@@ -1743,8 +1747,9 @@ LLVMValueRef ac_build_umin(struct ac_llvm_context *ctx, LLVMValueRef a,

LLVMValueRef ac_build_clamp(struct ac_llvm_context *ctx, LLVMValueRef value)
{
- return ac_build_fmin(ctx, ac_build_fmax(ctx, value, ctx->f32_0),
- ctx->f32_1);
+ LLVMTypeRef t = LLVMTypeOf(value);
+ return ac_build_fmin(ctx, ac_build_fmax(ctx, value, ac_get_zerof(ctx, t)),
+ ac_get_onef(ctx, t));
}

void ac_build_export(struct ac_llvm_context *ctx, struct ac_export_args *a)
--
2.19.2
Rhys Perry
2018-12-07 17:22:00 UTC
Permalink
Signed-off-by: Rhys Perry <***@gmail.com>
---
src/amd/common/ac_nir_to_llvm.c | 2 ++
1 file changed, 2 insertions(+)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 7c827b443b..ef850d6d22 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -886,6 +886,8 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
break;
case nir_op_f2f16_rtz:
src[0] = ac_to_float(&ctx->ac, src[0]);
+ if (LLVMTypeOf(src[0]) == ctx->ac.f64)
+ src[0] = LLVMBuildFPTrunc(ctx->ac.builder, src[0], ctx->ac.f32, "");
LLVMValueRef param[2] = { src[0], ctx->ac.f32_0 };
result = ac_build_cvt_pkrtz_f16(&ctx->ac, param);
result = LLVMBuildExtractElement(ctx->ac.builder, result, ctx->ac.i32_0, "");
--
2.19.2
Rhys Perry
2018-12-07 17:22:04 UTC
Permalink
Signed-off-by: Rhys Perry <***@gmail.com>
---
src/amd/common/ac_llvm_build.c | 16 ++++------------
1 file changed, 4 insertions(+), 12 deletions(-)

diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index fa5c68d1b6..c85f9a214e 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -2357,19 +2357,11 @@ LLVMValueRef ac_build_isign(struct ac_llvm_context *ctx, LLVMValueRef src0,
LLVMValueRef ac_build_fsign(struct ac_llvm_context *ctx, LLVMValueRef src0,
unsigned bitsize)
{
- LLVMValueRef cmp, val, zero, one;
- LLVMTypeRef type;
-
- if (bitsize == 32) {
- type = ctx->f32;
- zero = ctx->f32_0;
- one = ctx->f32_1;
- } else {
- type = ctx->f64;
- zero = ctx->f64_0;
- one = ctx->f64_1;
- }
+ LLVMTypeRef type = ac_float_of_size(ctx, bitsize);
+ LLVMValueRef zero = ac_get_zerof(ctx, type);
+ LLVMValueRef one = ac_get_onef(ctx, type);

+ LLVMValueRef cmp, val;
cmp = LLVMBuildFCmp(ctx->builder, LLVMRealOGT, src0, zero, "");
val = LLVMBuildSelect(ctx->builder, cmp, one, src0, "");
cmp = LLVMBuildFCmp(ctx->builder, LLVMRealOGE, val, zero, "");
--
2.19.2
Rhys Perry
2018-12-07 17:22:02 UTC
Permalink
Signed-off-by: Rhys Perry <***@gmail.com>
---
src/amd/common/ac_llvm_build.c | 13 +++----------
1 file changed, 3 insertions(+), 10 deletions(-)

diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index 1ef28323d1..0a1987c65b 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -2327,16 +2327,9 @@ void ac_build_waitcnt(struct ac_llvm_context *ctx, unsigned simm16)
LLVMValueRef ac_build_fract(struct ac_llvm_context *ctx, LLVMValueRef src0,
unsigned bitsize)
{
- LLVMTypeRef type;
- char *intr;
-
- if (bitsize == 32) {
- intr = "llvm.floor.f32";
- type = ctx->f32;
- } else {
- intr = "llvm.floor.f64";
- type = ctx->f64;
- }
+ LLVMTypeRef type = ac_float_of_size(ctx, bitsize);
+ char intr[64];
+ snprintf(intr, sizeof(intr), "llvm.floor.f%d", bitsize);

LLVMValueRef params[] = {
src0,
--
2.19.2
Rhys Perry
2018-12-07 17:22:03 UTC
Permalink
Signed-off-by: Rhys Perry <***@gmail.com>
---
src/amd/common/ac_llvm_build.c | 27 ++++-----------------------
1 file changed, 4 insertions(+), 23 deletions(-)

diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index 0a1987c65b..fa5c68d1b6 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -2342,30 +2342,11 @@ LLVMValueRef ac_build_fract(struct ac_llvm_context *ctx, LLVMValueRef src0,
LLVMValueRef ac_build_isign(struct ac_llvm_context *ctx, LLVMValueRef src0,
unsigned bitsize)
{
- LLVMValueRef cmp, val, zero, one;
- LLVMTypeRef type;
-
- switch (bitsize) {
- case 64:
- type = ctx->i64;
- zero = ctx->i64_0;
- one = ctx->i64_1;
- break;
- case 32:
- type = ctx->i32;
- zero = ctx->i32_0;
- one = ctx->i32_1;
- break;
- case 16:
- type = ctx->i16;
- zero = ctx->i16_0;
- one = ctx->i16_1;
- break;
- default:
- unreachable(!"invalid bitsize");
- break;
- }
+ LLVMTypeRef type = ac_int_of_size(ctx, bitsize);
+ LLVMValueRef zero = ac_get_zero(ctx, type);
+ LLVMValueRef one = ac_get_one(ctx, type);

+ LLVMValueRef cmp, val;
cmp = LLVMBuildICmp(ctx->builder, LLVMIntSGT, src0, zero, "");
val = LLVMBuildSelect(ctx->builder, cmp, one, src0, "");
cmp = LLVMBuildICmp(ctx->builder, LLVMIntSGE, val, zero, "");
--
2.19.2
Rhys Perry
2018-12-07 17:21:59 UTC
Permalink
Signed-off-by: Rhys Perry <***@gmail.com>
---
src/amd/common/ac_nir_to_llvm.c | 9 +++++----
1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 6d0d2cbd55..7c827b443b 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -858,12 +858,14 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
src[i] = ac_to_integer(&ctx->ac, src[i]);
result = ac_build_gather_values(&ctx->ac, src, num_components);
break;
+ case nir_op_f2i8:
case nir_op_f2i16:
case nir_op_f2i32:
case nir_op_f2i64:
src[0] = ac_to_float(&ctx->ac, src[0]);
result = LLVMBuildFPToSI(ctx->ac.builder, src[0], def_type, "");
break;
+ case nir_op_f2u8:
case nir_op_f2u16:
case nir_op_f2u32:
case nir_op_f2u64:
@@ -898,15 +900,14 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
else
result = LLVMBuildFPTrunc(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), "");
break;
+ case nir_op_u2u8:
case nir_op_u2u16:
case nir_op_u2u32:
case nir_op_u2u64:
src[0] = ac_to_integer(&ctx->ac, src[0]);
- if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) < ac_get_elem_bits(&ctx->ac, def_type))
- result = LLVMBuildZExt(ctx->ac.builder, src[0], def_type, "");
- else
- result = LLVMBuildTrunc(ctx->ac.builder, src[0], def_type, "");
+ result = ac_build_ui_cast(&ctx->ac, src[0], def_type);
break;
+ case nir_op_i2i8:
case nir_op_i2i16:
case nir_op_i2i32:
case nir_op_i2i64:
--
2.19.2
Rhys Perry
2018-12-07 17:22:07 UTC
Permalink
Signed-off-by: Rhys Perry <***@gmail.com>
---
src/amd/common/ac_nir_to_llvm.c | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 7084b390d2..b447da092f 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -788,8 +788,7 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
case nir_op_frsq:
result = emit_intrin_1f_param(&ctx->ac, "llvm.sqrt",
ac_to_float_type(&ctx->ac, def_type), src[0]);
- result = ac_build_fdiv(&ctx->ac, instr->dest.dest.ssa.bit_size == 32 ? ctx->ac.f32_1 : ctx->ac.f64_1,
- result);
+ result = ac_build_fdiv(&ctx->ac, ac_get_onef(&ctx->ac, LLVMTypeOf(result)), result);
break;
case nir_op_frexp_exp:
src[0] = ac_to_float(&ctx->ac, src[0]);
--
2.19.2
Rhys Perry
2018-12-07 17:22:09 UTC
Permalink
Signed-off-by: Rhys Perry <***@gmail.com>
---
src/amd/vulkan/radv_shader.c | 1 +
1 file changed, 1 insertion(+)

diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
index 456c462a23..9ba20ac72e 100644
--- a/src/amd/vulkan/radv_shader.c
+++ b/src/amd/vulkan/radv_shader.c
@@ -53,6 +53,7 @@
static const struct nir_shader_compiler_options nir_options = {
.vertex_id_zero_based = true,
.lower_scmp = true,
+ .lower_flrp16 = true,
.lower_flrp32 = true,
.lower_flrp64 = true,
.lower_device_index_to_zero = true,
--
2.19.2
Rhys Perry
2018-12-07 17:22:08 UTC
Permalink
Signed-off-by: Rhys Perry <***@gmail.com>
---
src/amd/common/ac_nir_to_llvm.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index b447da092f..bb7c421606 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -829,8 +829,10 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
break;
case nir_op_ldexp:
src[0] = ac_to_float(&ctx->ac, src[0]);
- if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) == 32)
+ if (ac_get_elem_bits(&ctx->ac, def_type) == 32)
result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.ldexp.f32", ctx->ac.f32, src, 2, AC_FUNC_ATTR_READNONE);
+ else if (ac_get_elem_bits(&ctx->ac, def_type) == 16)
+ result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.ldexp.f16", ctx->ac.f16, src, 2, AC_FUNC_ATTR_READNONE);
else
result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.ldexp.f64", ctx->ac.f64, src, 2, AC_FUNC_ATTR_READNONE);
break;
--
2.19.2
Rhys Perry
2018-12-07 17:22:05 UTC
Permalink
Signed-off-by: Rhys Perry <***@gmail.com>
---
src/amd/common/ac_llvm_build.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index c85f9a214e..e85c178f78 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -716,7 +716,7 @@ ac_build_fdiv(struct ac_llvm_context *ctx,
* If we do (num * (1 / den)), LLVM does:
* return num * v_rcp_f32(den);
*/
- LLVMValueRef one = LLVMTypeOf(num) == ctx->f64 ? ctx->f64_1 : ctx->f32_1;
+ LLVMValueRef one = ac_get_onef(ctx, LLVMTypeOf(num));
LLVMValueRef rcp = LLVMBuildFDiv(ctx->builder, one, den, "");
LLVMValueRef ret = LLVMBuildFMul(ctx->builder, num, rcp, "");
--
2.19.2
Rhys Perry
2018-12-07 17:22:06 UTC
Permalink
Signed-off-by: Rhys Perry <***@gmail.com>
---
src/amd/common/ac_nir_to_llvm.c | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index ef850d6d22..7084b390d2 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -657,8 +657,7 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
break;
case nir_op_frcp:
src[0] = ac_to_float(&ctx->ac, src[0]);
- result = ac_build_fdiv(&ctx->ac, instr->dest.dest.ssa.bit_size == 32 ? ctx->ac.f32_1 : ctx->ac.f64_1,
- src[0]);
+ result = ac_build_fdiv(&ctx->ac, ac_get_onef(&ctx->ac, LLVMTypeOf(src[0])), src[0]);
break;
case nir_op_iand:
result = LLVMBuildAnd(ctx->ac.builder, src[0], src[1], "");
--
2.19.2
Rhys Perry
2018-12-07 17:22:11 UTC
Permalink
Signed-off-by: Rhys Perry <***@gmail.com>
---
src/amd/common/ac_nir_to_llvm.c | 6 +-----
1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 459d9c119c..517da7ba9b 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -347,11 +347,7 @@ static LLVMValueRef emit_b2i(struct ac_llvm_context *ctx,
unsigned bitsize)
{
LLVMValueRef result = LLVMBuildAnd(ctx->builder, src0, ctx->i32_1, "");
-
- if (bitsize == 32)
- return result;
-
- return LLVMBuildZExt(ctx->builder, result, ctx->i64, "");
+ return ac_build_ui_cast(ctx, result, ac_int_of_size(ctx, bitsize));
}

static LLVMValueRef emit_i2b(struct ac_llvm_context *ctx,
--
2.19.2
Rhys Perry
2018-12-07 17:22:12 UTC
Permalink
Signed-off-by: Rhys Perry <***@gmail.com>
---
src/amd/common/ac_nir_to_llvm.c | 9 +++------
1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 517da7ba9b..aac3330c0d 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -672,20 +672,17 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
break;
case nir_op_ishl:
result = LLVMBuildShl(ctx->ac.builder, src[0],
- LLVMBuildZExt(ctx->ac.builder, src[1],
- LLVMTypeOf(src[0]), ""),
+ ac_build_ui_cast(&ctx->ac, src[1], LLVMTypeOf(src[0])),
"");
break;
case nir_op_ishr:
result = LLVMBuildAShr(ctx->ac.builder, src[0],
- LLVMBuildZExt(ctx->ac.builder, src[1],
- LLVMTypeOf(src[0]), ""),
+ ac_build_ui_cast(&ctx->ac, src[1], LLVMTypeOf(src[0])),
"");
break;
case nir_op_ushr:
result = LLVMBuildLShr(ctx->ac.builder, src[0],
- LLVMBuildZExt(ctx->ac.builder, src[1],
- LLVMTypeOf(src[0]), ""),
+ ac_build_ui_cast(&ctx->ac, src[1], LLVMTypeOf(src[0])),
"");
break;
case nir_op_ilt:
--
2.19.2
Rhys Perry
2018-12-07 17:22:10 UTC
Permalink
This seems to generate fine code, even though the IR is a bit ugly.

Signed-off-by: Rhys Perry <***@gmail.com>
---
src/amd/common/ac_nir_to_llvm.c | 14 ++++++++++----
1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index bb7c421606..459d9c119c 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -316,14 +316,20 @@ static LLVMValueRef emit_b2f(struct ac_llvm_context *ctx,
unsigned bitsize)
{
LLVMValueRef result = LLVMBuildAnd(ctx->builder, src0,
- LLVMBuildBitCast(ctx->builder, LLVMConstReal(ctx->f32, 1.0), ctx->i32, ""),
+ LLVMBuildBitCast(ctx->builder, ctx->f32_1, ctx->i32, ""),
"");
result = LLVMBuildBitCast(ctx->builder, result, ctx->f32, "");

- if (bitsize == 32)
+ switch (bitsize) {
+ case 16:
+ return LLVMBuildFPTrunc(ctx->builder, result, ctx->f16, "");
+ case 32:
return result;
-
- return LLVMBuildFPExt(ctx->builder, result, ctx->f64, "");
+ case 64:
+ return LLVMBuildFPExt(ctx->builder, result, ctx->f64, "");
+ default:
+ unreachable("Unsupported bit size.");
+ }
}

static LLVMValueRef emit_f2b(struct ac_llvm_context *ctx,
--
2.19.2
Rhys Perry
2018-12-07 17:22:26 UTC
Permalink
Signed-off-by: Rhys Perry <***@gmail.com>
---
src/amd/vulkan/radv_nir_to_llvm.c | 14 ++++----------
1 file changed, 4 insertions(+), 10 deletions(-)

diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c
index e5e4637f0d..3d367c1378 100644
--- a/src/amd/vulkan/radv_nir_to_llvm.c
+++ b/src/amd/vulkan/radv_nir_to_llvm.c
@@ -2093,7 +2093,6 @@ static void interp_fs_input(struct radv_shader_context *ctx,
LLVMValueRef attr_number;
unsigned chan;
LLVMValueRef i, j;
- bool interp = !LLVMIsUndef(interp_param);

attr_number = LLVMConstInt(ctx->ac.i32, attr, false);

@@ -2107,7 +2106,7 @@ static void interp_fs_input(struct radv_shader_context *ctx,
* fs.interp cannot be used on integers, because they can be equal
* to NaN.
*/
- if (interp) {
+ if (interp_param) {
interp_param = LLVMBuildBitCast(ctx->ac.builder, interp_param,
ctx->ac.v2f32, "");

@@ -2120,7 +2119,7 @@ static void interp_fs_input(struct radv_shader_context *ctx,
for (chan = 0; chan < 4; chan++) {
LLVMValueRef llvm_chan = LLVMConstInt(ctx->ac.i32, chan, false);

- if (interp) {
+ if (interp_param) {
result[chan] = ac_build_fs_interp(&ctx->ac,
llvm_chan,
attr_number,
@@ -2132,7 +2131,6 @@ static void interp_fs_input(struct radv_shader_context *ctx,
attr_number,
prim_mask);
result[chan] = LLVMBuildBitCast(ctx->ac.builder, result[chan], ctx->ac.i32, "");
- result[chan] = LLVMBuildTruncOrBitCast(ctx->ac.builder, result[chan], LLVMTypeOf(interp_param), "");
}
}
}
@@ -2160,10 +2158,6 @@ handle_fs_input_decl(struct radv_shader_context *ctx,

interp = lookup_interp_param(&ctx->abi, variable->data.interpolation, interp_type);
}
- bool is_16bit = glsl_type_is_16bit(variable->type);
- LLVMTypeRef type = is_16bit ? ctx->ac.i16 : ctx->ac.i32;
- if (interp == NULL)
- interp = LLVMGetUndef(type);

for (unsigned i = 0; i < attrib_count; ++i)
ctx->inputs[ac_llvm_reg_index_soa(idx + i, 0)] = interp;
@@ -2224,7 +2218,7 @@ handle_fs_inputs(struct radv_shader_context *ctx,
if (ctx->shader_info->info.ps.uses_input_attachments ||
ctx->shader_info->info.needs_multiview_view_index) {
ctx->input_mask |= 1ull << VARYING_SLOT_LAYER;
- ctx->inputs[ac_llvm_reg_index_soa(VARYING_SLOT_LAYER, 0)] = LLVMGetUndef(ctx->ac.i32);
+ ctx->inputs[ac_llvm_reg_index_soa(VARYING_SLOT_LAYER, 0)] = NULL;
}

for (unsigned i = 0; i < RADEON_LLVM_MAX_INPUTS; ++i) {
@@ -2240,7 +2234,7 @@ handle_fs_inputs(struct radv_shader_context *ctx,
interp_fs_input(ctx, index, interp_param, ctx->abi.prim_mask,
inputs);

- if (LLVMIsUndef(interp_param))
+ if (!interp_param)
ctx->shader_info->fs.flat_shaded_mask |= 1u << index;
++index;
} else if (i == VARYING_SLOT_CLIP_DIST0) {
--
2.19.2
Rhys Perry
2018-12-07 17:22:18 UTC
Permalink
Signed-off-by: Rhys Perry <***@gmail.com>
---
src/amd/common/ac_llvm_build.c | 38 +++++++---------------------------
1 file changed, 7 insertions(+), 31 deletions(-)

diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index 754ceda89b..0123f3e31d 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -1664,36 +1664,12 @@ ac_build_umsb(struct ac_llvm_context *ctx,
LLVMValueRef arg,
LLVMTypeRef dst_type)
{
- const char *intrin_name;
- LLVMTypeRef type;
- LLVMValueRef highest_bit;
- LLVMValueRef zero;
- unsigned bitsize;
-
- bitsize = ac_get_elem_bits(ctx, LLVMTypeOf(arg));
- switch (bitsize) {
- case 64:
- intrin_name = "llvm.ctlz.i64";
- type = ctx->i64;
- highest_bit = LLVMConstInt(ctx->i64, 63, false);
- zero = ctx->i64_0;
- break;
- case 32:
- intrin_name = "llvm.ctlz.i32";
- type = ctx->i32;
- highest_bit = LLVMConstInt(ctx->i32, 31, false);
- zero = ctx->i32_0;
- break;
- case 16:
- intrin_name = "llvm.ctlz.i16";
- type = ctx->i16;
- highest_bit = LLVMConstInt(ctx->i16, 15, false);
- zero = ctx->i16_0;
- break;
- default:
- unreachable(!"invalid bitsize");
- break;
- }
+ unsigned bitsize = ac_get_elem_bits(ctx, LLVMTypeOf(arg));
+ LLVMTypeRef type = ac_int_of_size(ctx, bitsize);
+ LLVMValueRef highest_bit = LLVMConstInt(type, bitsize - 1, false);
+ LLVMValueRef zero = ac_get_zero(ctx, type);
+ char intrin_name[64];
+ snprintf(intrin_name, sizeof(intrin_name), "llvm.ctlz.i%d", bitsize);

LLVMValueRef params[2] = {
arg,
@@ -1707,7 +1683,7 @@ ac_build_umsb(struct ac_llvm_context *ctx,
/* The HW returns the last bit index from MSB, but TGSI/NIR wants
* the index from LSB. Invert it by doing "31 - msb". */
msb = LLVMBuildSub(ctx->builder, highest_bit, msb, "");
- msb = LLVMBuildTruncOrBitCast(ctx->builder, msb, ctx->i32, "");
+ msb = ac_build_ui_cast(ctx, msb, dst_type);

/* check for zero */
return LLVMBuildSelect(ctx->builder,
--
2.19.2
Rhys Perry
2018-12-07 17:22:25 UTC
Permalink
Signed-off-by: Rhys Perry <***@gmail.com>
---
src/amd/common/ac_nir_to_llvm.c | 13 ++++++++-----
src/amd/vulkan/radv_nir_to_llvm.c | 22 +++++++++-------------
2 files changed, 17 insertions(+), 18 deletions(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index fa7b8c70f0..b4418af50a 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -2051,7 +2051,7 @@ static LLVMValueRef visit_load_var(struct ac_nir_context *ctx,
unreachable("unhandle variable mode");
}
ret = ac_build_varying_gather_values(&ctx->ac, values, ve, comp);
- return LLVMBuildBitCast(ctx->ac.builder, ret, get_def_type(ctx, &instr->dest.ssa), "");
+ return ac_build_reinterpret(&ctx->ac, ret, get_def_type(ctx, &instr->dest.ssa));
}

static void
@@ -2063,7 +2063,7 @@ visit_store_var(struct ac_nir_context *ctx,
LLVMValueRef temp_ptr, value;
int idx = var->data.driver_location;
unsigned comp = var->data.location_frac;
- LLVMValueRef src = ac_to_float(&ctx->ac, get_src(ctx, instr->src[1]));
+ LLVMValueRef src = get_src(ctx, instr->src[1]);
int writemask = instr->const_index[0];
LLVMValueRef indir_index;
unsigned const_index;
@@ -2082,6 +2082,11 @@ visit_store_var(struct ac_nir_context *ctx,

writemask = writemask << comp;

+ LLVMTypeRef type = ctx->ac.f32;
+ if (LLVMGetTypeKind(LLVMTypeOf(src)) == LLVMVectorTypeKind)
+ type = LLVMVectorType(ctx->ac.f32, LLVMGetVectorSize(LLVMTypeOf(src)));
+ src = ac_build_reinterpret(&ctx->ac, src, type);
+
switch (var->data.mode) {
case nir_var_shader_out:

@@ -4008,12 +4013,10 @@ ac_handle_shader_output_decl(struct ac_llvm_context *ctx,
}
}

- bool is_16bit = glsl_type_is_16bit(variable->type);
- LLVMTypeRef type = is_16bit ? ctx->f16 : ctx->f32;
for (unsigned i = 0; i < attrib_count; ++i) {
for (unsigned chan = 0; chan < 4; chan++) {
abi->outputs[ac_llvm_reg_index_soa(output_loc + i, chan)] =
- ac_build_alloca_undef(ctx, type, "");
+ ac_build_alloca_undef(ctx, ctx->f32, "");
}
}

diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c
index f114a86018..e5e4637f0d 100644
--- a/src/amd/vulkan/radv_nir_to_llvm.c
+++ b/src/amd/vulkan/radv_nir_to_llvm.c
@@ -2340,6 +2340,7 @@ si_llvm_init_export_args(struct radv_shader_context *ctx,

bool is_16bit = ac_get_type_size(LLVMTypeOf(values[0])) == 2;
if (ctx->stage == MESA_SHADER_FRAGMENT) {
+ bool is_16bit = ac_get_type_size(LLVMTypeOf(values[0])) == 2;
unsigned index = target - V_008DFC_SQ_EXP_MRT;
unsigned col_format = (ctx->options->key.fs.col_format >> (4 * index)) & 0xf;
bool is_int8 = (ctx->options->key.fs.is_int8 >> index) & 1;
@@ -2456,16 +2457,8 @@ si_llvm_init_export_args(struct radv_shader_context *ctx,
return;
}

- if (is_16bit) {
- for (unsigned chan = 0; chan < 4; chan++) {
- values[chan] = LLVMBuildBitCast(ctx->ac.builder, values[chan], ctx->ac.i16, "");
- args->out[chan] = LLVMBuildZExt(ctx->ac.builder, values[chan], ctx->ac.i32, "");
- }
- } else
- memcpy(&args->out[0], values, sizeof(values[0]) * 4);
-
- for (unsigned i = 0; i < 4; ++i)
- args->out[i] = ac_to_float(&ctx->ac, args->out[i]);
+ for (unsigned chan = 0; chan < 4; chan++)
+ args->out[chan] = ac_build_reinterpret(&ctx->ac, values[chan], ctx->ac.f32);
}

static void
@@ -3172,9 +3165,12 @@ handle_fs_outputs_post(struct radv_shader_context *ctx)
if (i < FRAG_RESULT_DATA0)
continue;

- for (unsigned j = 0; j < 4; j++)
- values[j] = ac_to_float(&ctx->ac,
- radv_load_output(ctx, i, j));
+ for (unsigned j = 0; j < 4; j++) {
+ values[j] = radv_load_output(ctx, i, j);
+ unsigned index = ac_llvm_reg_index_soa(i, 0);
+ LLVMTypeRef new_type = ctx->abi.output_types[index];
+ values[j] = ac_build_reinterpret(&ctx->ac, values[j], new_type);
+ }

bool ret = si_export_mrt_color(ctx, values,
i - FRAG_RESULT_DATA0,
--
2.19.2
Rhys Perry
2018-12-07 17:22:14 UTC
Permalink
Signed-off-by: Rhys Perry <***@gmail.com>
---
src/amd/common/ac_llvm_build.c | 17 ++++++++++++++---
1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index e85c178f78..f394d16bc9 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -1519,6 +1519,11 @@ ac_build_ddxy(struct ac_llvm_context *ctx,
LLVMValueRef tl, trbl, args[2];
LLVMValueRef result;

+ int size = ac_get_type_size(LLVMTypeOf(val));
+
+ if (size == 2)
+ val = LLVMBuildZExt(ctx->builder, val, ctx->i32, "");
+
if (HAVE_LLVM >= 0x0700) {
unsigned tl_lanes[4], trbl_lanes[4];

@@ -1600,13 +1605,19 @@ ac_build_ddxy(struct ac_llvm_context *ctx,
AC_FUNC_ATTR_CONVERGENT);
}

- tl = LLVMBuildBitCast(ctx->builder, tl, ctx->f32, "");
- trbl = LLVMBuildBitCast(ctx->builder, trbl, ctx->f32, "");
+ if (size == 2) {
+ tl = LLVMBuildTrunc(ctx->builder, tl, ctx->i16, "");
+ trbl = LLVMBuildTrunc(ctx->builder, trbl, ctx->i16, "");
+ }
+
+ LLVMTypeRef type = ac_float_of_size(ctx, size * 8);
+ tl = LLVMBuildBitCast(ctx->builder, tl, type, "");
+ trbl = LLVMBuildBitCast(ctx->builder, trbl, type, "");
result = LLVMBuildFSub(ctx->builder, trbl, tl, "");

if (HAVE_LLVM >= 0x0700) {
result = ac_build_intrinsic(ctx,
- "llvm.amdgcn.wqm.f32", ctx->f32,
+ LLVMTypeOf(val) == ctx->f32 ? "llvm.amdgcn.wqm.f32" : "llvm.amdgcn.wqm.f16", type,
&result, 1, 0);
}
--
2.19.2
Rhys Perry
2018-12-07 17:22:15 UTC
Permalink
Signed-off-by: Rhys Perry <***@gmail.com>
---
src/amd/common/ac_llvm_build.c | 12 +++++++++++-
1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index f394d16bc9..6266058b77 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -3149,9 +3149,15 @@ ac_build_readlane(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef la
{
LLVMTypeRef src_type = LLVMTypeOf(src);
src = ac_to_integer(ctx, src);
- unsigned bits = LLVMGetIntTypeWidth(LLVMTypeOf(src));
+ unsigned src_bits = LLVMGetIntTypeWidth(LLVMTypeOf(src));
+ unsigned bits = src_bits;
LLVMValueRef ret;

+ if (bits < 32) {
+ src = LLVMBuildZExt(ctx->builder, src, ctx->i32, "");
+ bits = 32;
+ }
+
if (bits == 32) {
ret = _ac_build_readlane(ctx, src, lane);
} else {
@@ -3168,6 +3174,10 @@ ac_build_readlane(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef la
LLVMConstInt(ctx->i32, i, 0), "");
}
}
+
+ if (src_bits < 32)
+ ret = LLVMBuildTrunc(ctx->builder, ret, ac_int_of_size(ctx, src_bits), "");
+
return LLVMBuildBitCast(ctx->builder, ret, src_type, "");
}
--
2.19.2
Rhys Perry
2018-12-07 17:22:27 UTC
Permalink
Signed-off-by: Rhys Perry <***@gmail.com>
---
src/amd/vulkan/radv_nir_to_llvm.c | 55 ++++++++++++++++++++-----------
1 file changed, 35 insertions(+), 20 deletions(-)

diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c
index 3d367c1378..342b79274a 100644
--- a/src/amd/vulkan/radv_nir_to_llvm.c
+++ b/src/amd/vulkan/radv_nir_to_llvm.c
@@ -2332,9 +2332,7 @@ si_llvm_init_export_args(struct radv_shader_context *ctx,
if (!values)
return;

- bool is_16bit = ac_get_type_size(LLVMTypeOf(values[0])) == 2;
if (ctx->stage == MESA_SHADER_FRAGMENT) {
- bool is_16bit = ac_get_type_size(LLVMTypeOf(values[0])) == 2;
unsigned index = target - V_008DFC_SQ_EXP_MRT;
unsigned col_format = (ctx->options->key.fs.col_format >> (4 * index)) & 0xf;
bool is_int8 = (ctx->options->key.fs.is_int8 >> index) & 1;
@@ -2345,6 +2343,28 @@ si_llvm_init_export_args(struct radv_shader_context *ctx,
LLVMValueRef (*packi)(struct ac_llvm_context *ctx, LLVMValueRef args[2],
unsigned bits, bool hi) = NULL;

+ if (LLVMTypeOf(values[0]) == ctx->ac.f16 &&
+ col_format != V_028714_SPI_SHADER_FP16_ABGR) {
+ for (unsigned chan = 0; chan < 4; chan++)
+ values[chan] = LLVMBuildFPExt(ctx->ac.builder,
+ values[chan],
+ ctx->ac.f32, "");
+ }
+
+ if (LLVMTypeOf(values[0]) == ctx->ac.i16 || LLVMTypeOf(values[0]) == ctx->ac.i8) {
+ if (col_format == V_028714_SPI_SHADER_SINT16_ABGR) {
+ for (unsigned chan = 0; chan < 4; chan++)
+ values[chan] = LLVMBuildSExt(ctx->ac.builder,
+ values[chan],
+ ctx->ac.i32, "");
+ } else {
+ for (unsigned chan = 0; chan < 4; chan++)
+ values[chan] = LLVMBuildZExt(ctx->ac.builder,
+ values[chan],
+ ctx->ac.i32, "");
+ }
+ }
+
switch(col_format) {
case V_028714_SPI_SHADER_ZERO:
args->enabled_channels = 0; /* writemask */
@@ -2370,12 +2390,16 @@ si_llvm_init_export_args(struct radv_shader_context *ctx,

case V_028714_SPI_SHADER_FP16_ABGR:
args->enabled_channels = 0x5;
- packf = ac_build_cvt_pkrtz_f16;
- if (is_16bit) {
- for (unsigned chan = 0; chan < 4; chan++)
- values[chan] = LLVMBuildFPExt(ctx->ac.builder,
- values[chan],
- ctx->ac.f32, "");
+ if (LLVMTypeOf(values[0]) == ctx->ac.f16) {
+ packi = ac_build_cvt_pk_u16;
+ for (unsigned chan = 0; chan < 4; chan++) {
+ values[chan] = ac_to_integer(&ctx->ac, values[chan]);
+ values[chan] = LLVMBuildZExt(ctx->ac.builder,
+ values[chan],
+ ctx->ac.i32, "");
+ }
+ } else {
+ packf = ac_build_cvt_pkrtz_f16;
}
break;

@@ -2392,23 +2416,11 @@ si_llvm_init_export_args(struct radv_shader_context *ctx,
case V_028714_SPI_SHADER_UINT16_ABGR:
args->enabled_channels = 0x5;
packi = ac_build_cvt_pk_u16;
- if (is_16bit) {
- for (unsigned chan = 0; chan < 4; chan++)
- values[chan] = LLVMBuildZExt(ctx->ac.builder,
- ac_to_integer(&ctx->ac, values[chan]),
- ctx->ac.i32, "");
- }
break;

case V_028714_SPI_SHADER_SINT16_ABGR:
args->enabled_channels = 0x5;
packi = ac_build_cvt_pk_i16;
- if (is_16bit) {
- for (unsigned chan = 0; chan < 4; chan++)
- values[chan] = LLVMBuildSExt(ctx->ac.builder,
- ac_to_integer(&ctx->ac, values[chan]),
- ctx->ac.i32, "");
- }
break;

default:
@@ -2448,6 +2460,9 @@ si_llvm_init_export_args(struct radv_shader_context *ctx,
}
args->compr = 1; /* COMPR flag */
}
+
+ for (unsigned i = 0; i < 4; ++i)
+ args->out[i] = ac_to_float(&ctx->ac, args->out[i]);
return;
}
--
2.19.2
Rhys Perry
2018-12-07 17:22:19 UTC
Permalink
Signed-off-by: Rhys Perry <***@gmail.com>
---
src/amd/common/ac_llvm_build.c | 4 ++++
1 file changed, 4 insertions(+)

diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index 0123f3e31d..2172d81f8b 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -1640,6 +1640,10 @@ ac_build_imsb(struct ac_llvm_context *ctx,
LLVMValueRef arg,
LLVMTypeRef dst_type)
{
+ //TODO: support 64-bit integers
+ if (LLVMTypeOf(arg) != ctx->i32)
+ arg = LLVMBuildSExt(ctx->builder, arg, ctx->i32, "");
+
LLVMValueRef msb = ac_build_intrinsic(ctx, "llvm.amdgcn.sffbh.i32",
dst_type, &arg, 1,
AC_FUNC_ATTR_READNONE);
--
2.19.2
Rhys Perry
2018-12-07 17:22:20 UTC
Permalink
Signed-off-by: Rhys Perry <***@gmail.com>
---
src/amd/common/ac_llvm_build.c | 33 +++++++--------------------------
1 file changed, 7 insertions(+), 26 deletions(-)

diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index 2172d81f8b..3990a1f56d 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -2362,35 +2362,16 @@ LLVMValueRef ac_build_fsign(struct ac_llvm_context *ctx, LLVMValueRef src0,

LLVMValueRef ac_build_bit_count(struct ac_llvm_context *ctx, LLVMValueRef src0)
{
- LLVMValueRef result;
- unsigned bitsize;
+ unsigned bitsize = ac_get_elem_bits(ctx, LLVMTypeOf(src0));

- bitsize = ac_get_elem_bits(ctx, LLVMTypeOf(src0));
+ char name[64];
+ snprintf(name, sizeof(name), "llvm.ctpop.i%d", bitsize);

- switch (bitsize) {
- case 64:
- result = ac_build_intrinsic(ctx, "llvm.ctpop.i64", ctx->i64,
- (LLVMValueRef []) { src0 }, 1,
- AC_FUNC_ATTR_READNONE);
-
- result = LLVMBuildTrunc(ctx->builder, result, ctx->i32, "");
- break;
- case 32:
- result = ac_build_intrinsic(ctx, "llvm.ctpop.i32", ctx->i32,
- (LLVMValueRef []) { src0 }, 1,
- AC_FUNC_ATTR_READNONE);
- break;
- case 16:
- result = ac_build_intrinsic(ctx, "llvm.ctpop.i16", ctx->i16,
- (LLVMValueRef []) { src0 }, 1,
- AC_FUNC_ATTR_READNONE);
- break;
- default:
- unreachable(!"invalid bitsize");
- break;
- }
+ LLVMValueRef result = ac_build_intrinsic(ctx, name, LLVMTypeOf(src0),
+ (LLVMValueRef []) { src0 }, 1,
+ AC_FUNC_ATTR_READNONE);

- return result;
+ return ac_build_ui_cast(ctx, result, ctx->i32);
}

LLVMValueRef ac_build_bitfield_reverse(struct ac_llvm_context *ctx,
--
2.19.2
Rhys Perry
2018-12-07 17:22:16 UTC
Permalink
Signed-off-by: Rhys Perry <***@gmail.com>
---
src/compiler/nir/nir_opcodes.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/compiler/nir/nir_opcodes.py b/src/compiler/nir/nir_opcodes.py
index 4ef4ecc6f2..962971c650 100644
--- a/src/compiler/nir/nir_opcodes.py
+++ b/src/compiler/nir/nir_opcodes.py
@@ -318,7 +318,7 @@ unop_convert("unpack_64_2x32_split_y", tuint32, tuint64, "src0 >> 32")
# Bit operations, part of ARB_gpu_shader5.


-unop("bitfield_reverse", tuint32, """
+unop("bitfield_reverse", tuint, """
/* we're not winning any awards for speed here, but that's ok */
dst = 0;
for (unsigned bit = 0; bit < 32; bit++)
@@ -342,7 +342,7 @@ for (int bit = bit_size - 1; bit >= 0; bit--) {
}
""")

-unop("ifind_msb", tint32, """
+unop_convert("ifind_msb", tint32, tint, """
dst = -1;
for (int bit = 31; bit >= 0; bit--) {
/* If src0 < 0, we're looking for the first 0 bit.
--
2.19.2
Rhys Perry
2018-12-07 17:22:28 UTC
Permalink
Signed-off-by: Rhys Perry <***@gmail.com>
---
src/amd/common/ac_llvm_util.c | 9 ++++++---
src/amd/common/ac_llvm_util.h | 1 +
src/amd/vulkan/radv_shader.c | 3 +++
3 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/src/amd/common/ac_llvm_util.c b/src/amd/common/ac_llvm_util.c
index dc9b684e9d..3219126188 100644
--- a/src/amd/common/ac_llvm_util.c
+++ b/src/amd/common/ac_llvm_util.c
@@ -33,6 +33,7 @@
#if HAVE_LLVM >= 0x0700
#include <llvm-c/Transforms/Utils.h>
#endif
+#include <llvm-c/Transforms/Vectorize.h>
#include "c11/threads.h"
#include "gallivm/lp_bld_misc.h"
#include "util/u_math.h"
@@ -177,7 +178,7 @@ static LLVMTargetMachineRef ac_create_target_machine(enum radeon_family family,
}

static LLVMPassManagerRef ac_create_passmgr(LLVMTargetLibraryInfoRef target_library_info,
- bool check_ir)
+ enum ac_target_machine_options tm_options)
{
LLVMPassManagerRef passmgr = LLVMCreatePassManager();
if (!passmgr)
@@ -187,7 +188,7 @@ static LLVMPassManagerRef ac_create_passmgr(LLVMTargetLibraryInfoRef target_libr
LLVMAddTargetLibraryInfo(target_library_info,
passmgr);

- if (check_ir)
+ if (tm_options & AC_TM_CHECK_IR)
LLVMAddVerifierPass(passmgr);
LLVMAddAlwaysInlinerPass(passmgr);
/* Normally, the pass manager runs all passes on one function before
@@ -203,6 +204,8 @@ static LLVMPassManagerRef ac_create_passmgr(LLVMTargetLibraryInfoRef target_libr
LLVMAddLICMPass(passmgr);
LLVMAddAggressiveDCEPass(passmgr);
LLVMAddCFGSimplificationPass(passmgr);
+ if (tm_options & AC_TM_SLP_VECTORIZE)
+ LLVMAddSLPVectorizePass(passmgr);
/* This is recommended by the instruction combining pass. */
LLVMAddEarlyCSEMemSSAPass(passmgr);
LLVMAddInstructionCombiningPass(passmgr);
@@ -332,7 +335,7 @@ ac_init_llvm_compiler(struct ac_llvm_compiler *compiler,
}

compiler->passmgr = ac_create_passmgr(compiler->target_library_info,
- tm_options & AC_TM_CHECK_IR);
+ tm_options);
if (!compiler->passmgr)
goto fail;

diff --git a/src/amd/common/ac_llvm_util.h b/src/amd/common/ac_llvm_util.h
index eaf5f21876..35b12c6c6c 100644
--- a/src/amd/common/ac_llvm_util.h
+++ b/src/amd/common/ac_llvm_util.h
@@ -65,6 +65,7 @@ enum ac_target_machine_options {
AC_TM_CHECK_IR = (1 << 5),
AC_TM_ENABLE_GLOBAL_ISEL = (1 << 6),
AC_TM_CREATE_LOW_OPT = (1 << 7),
+ AC_TM_SLP_VECTORIZE = (1 << 8),
};

enum ac_float_mode {
diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
index 9ba20ac72e..a2ddf17680 100644
--- a/src/amd/vulkan/radv_shader.c
+++ b/src/amd/vulkan/radv_shader.c
@@ -598,6 +598,9 @@ shader_variant_create(struct radv_device *device,
tm_options |= AC_TM_SISCHED;
if (options->check_ir)
tm_options |= AC_TM_CHECK_IR;
+ /* vectorization is disabled on pre-GFX9 because it's not very useful there */
+ if (device->physical_device->rad_info.chip_class >= GFX9)
+ tm_options |= AC_TM_SLP_VECTORIZE;

thread_compiler = !(device->instance->debug_flags & RADV_DEBUG_NOTHREADLLVM);
radv_init_llvm_once();
--
2.19.2
Rhys Perry
2018-12-07 17:22:29 UTC
Permalink
Signed-off-by: Rhys Perry <***@gmail.com>
---
src/amd/common/ac_nir_to_llvm.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index b4418af50a..92b773981b 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -889,7 +889,9 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
src[0] = LLVMBuildFPTrunc(ctx->ac.builder, src[0], ctx->ac.f32, "");
LLVMValueRef param[2] = { src[0], ctx->ac.f32_0 };
result = ac_build_cvt_pkrtz_f16(&ctx->ac, param);
- result = LLVMBuildExtractElement(ctx->ac.builder, result, ctx->ac.i32_0, "");
+ // generates better code than an extractelement with slp vectorization
+ result = LLVMBuildBitCast(ctx->ac.builder, result, ctx->ac.i32, "");
+ result = LLVMBuildTrunc(ctx->ac.builder, result, ctx->ac.i16, "");
break;
case nir_op_f2f16_rtne:
case nir_op_f2f16:
--
2.19.2
Rhys Perry
2018-12-07 17:22:17 UTC
Permalink
Signed-off-by: Rhys Perry <***@gmail.com>
---
src/amd/common/ac_llvm_build.c | 31 +++++--------------------------
1 file changed, 5 insertions(+), 26 deletions(-)

diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index 6266058b77..754ceda89b 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -2752,29 +2752,10 @@ LLVMValueRef ac_find_lsb(struct ac_llvm_context *ctx,
LLVMValueRef src0)
{
unsigned src0_bitsize = ac_get_elem_bits(ctx, LLVMTypeOf(src0));
- const char *intrin_name;
- LLVMTypeRef type;
- LLVMValueRef zero;
-
- switch (src0_bitsize) {
- case 64:
- intrin_name = "llvm.cttz.i64";
- type = ctx->i64;
- zero = ctx->i64_0;
- break;
- case 32:
- intrin_name = "llvm.cttz.i32";
- type = ctx->i32;
- zero = ctx->i32_0;
- break;
- case 16:
- intrin_name = "llvm.cttz.i16";
- type = ctx->i16;
- zero = ctx->i16_0;
- break;
- default:
- unreachable(!"invalid bitsize");
- }
+ char intrin_name[64];
+ LLVMTypeRef type = ac_int_of_size(ctx, src0_bitsize);
+ LLVMValueRef zero = ac_get_zero(ctx, type);
+ snprintf(intrin_name, sizeof(intrin_name), "llvm.cttz.i%d", src0_bitsize);

LLVMValueRef params[2] = {
src0,
@@ -2795,9 +2776,7 @@ LLVMValueRef ac_find_lsb(struct ac_llvm_context *ctx,
params, 2,
AC_FUNC_ATTR_READNONE);

- if (src0_bitsize == 64) {
- lsb = LLVMBuildTrunc(ctx->builder, lsb, ctx->i32, "");
- }
+ lsb = ac_build_ui_cast(ctx, lsb, ctx->i32);

/* TODO: We need an intrinsic to skip this conditional. */
/* Check for zero: */
--
2.19.2
Rhys Perry
2018-12-07 17:22:22 UTC
Permalink
Signed-off-by: Rhys Perry <***@gmail.com>
---
src/amd/common/ac_nir_to_llvm.c | 24 ++++++++++++++++++++++++
1 file changed, 24 insertions(+)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index aac3330c0d..d69135cc25 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -1011,6 +1011,30 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
break;
}

+ case nir_op_pack_32_2x16_split: {
+ LLVMValueRef tmp = ac_build_gather_values(&ctx->ac, src, 2);
+ result = LLVMBuildBitCast(ctx->ac.builder, tmp, ctx->ac.i32, "");
+ break;
+ }
+
+ case nir_op_unpack_32_2x16_split_x: {
+ LLVMValueRef tmp = LLVMBuildBitCast(ctx->ac.builder, src[0],
+ ctx->ac.v2i16,
+ "");
+ result = LLVMBuildExtractElement(ctx->ac.builder, tmp,
+ ctx->ac.i32_0, "");
+ break;
+ }
+
+ case nir_op_unpack_32_2x16_split_y: {
+ LLVMValueRef tmp = LLVMBuildBitCast(ctx->ac.builder, src[0],
+ ctx->ac.v2i16,
+ "");
+ result = LLVMBuildExtractElement(ctx->ac.builder, tmp,
+ ctx->ac.i32_1, "");
+ break;
+ }
+
case nir_op_cube_face_coord: {
src[0] = ac_to_float(&ctx->ac, src[0]);
LLVMValueRef results[2];
--
2.19.2
Rhys Perry
2018-12-07 17:22:30 UTC
Permalink
In the hope that one day LLVM will then be able to generate code with
vectorized v_cvt_pkrtz_f16_f32 instructions.

Signed-off-by: Rhys Perry <***@gmail.com>
---
src/amd/common/ac_nir_to_llvm.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 92b773981b..88b26e019f 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -884,6 +884,7 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
result = LLVMBuildUIToFP(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), "");
break;
case nir_op_f2f16_rtz:
+ case nir_op_f2f16:
src[0] = ac_to_float(&ctx->ac, src[0]);
if (LLVMTypeOf(src[0]) == ctx->ac.f64)
src[0] = LLVMBuildFPTrunc(ctx->ac.builder, src[0], ctx->ac.f32, "");
@@ -894,7 +895,6 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
result = LLVMBuildTrunc(ctx->ac.builder, result, ctx->ac.i16, "");
break;
case nir_op_f2f16_rtne:
- case nir_op_f2f16:
case nir_op_f2f32:
case nir_op_f2f64:
src[0] = ac_to_float(&ctx->ac, src[0]);
--
2.19.2
Rhys Perry
2018-12-07 17:22:23 UTC
Permalink
Signed-off-by: Rhys Perry <***@gmail.com>
---
src/amd/common/ac_nir_to_llvm.c | 8 ++++++++
1 file changed, 8 insertions(+)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index d69135cc25..e4ae85a1ae 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -3961,11 +3961,19 @@ glsl_base_to_llvm_type(struct ac_llvm_context *ac,
enum glsl_base_type type)
{
switch (type) {
+ case GLSL_TYPE_INT8:
+ case GLSL_TYPE_UINT8:
+ return ac->i8;
+ case GLSL_TYPE_INT16:
+ case GLSL_TYPE_UINT16:
+ return ac->i16;
case GLSL_TYPE_INT:
case GLSL_TYPE_UINT:
case GLSL_TYPE_BOOL:
case GLSL_TYPE_SUBROUTINE:
return ac->i32;
+ case GLSL_TYPE_FLOAT16:
+ return ac->f16;
case GLSL_TYPE_FLOAT: /* TODO handle mediump */
return ac->f32;
case GLSL_TYPE_INT64:
--
2.19.2
Rhys Perry
2018-12-07 17:22:24 UTC
Permalink
Signed-off-by: Rhys Perry <***@gmail.com>
---
src/amd/common/ac_nir_to_llvm.c | 68 +++++++++++++++++++++++++++++++
src/amd/common/ac_shader_abi.h | 1 +
src/amd/vulkan/radv_nir_to_llvm.c | 3 ++
3 files changed, 72 insertions(+)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index e4ae85a1ae..fa7b8c70f0 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -3917,6 +3917,68 @@ static void visit_cf_list(struct ac_nir_context *ctx,
}
}

+static unsigned traverse_var_component_slots(struct ac_llvm_context *ctx, bool vs_in,
+ struct nir_variable *var, unsigned cur_offset,
+ const struct glsl_type *cur_type,
+ void (*cb)(struct ac_llvm_context *, unsigned, enum glsl_base_type, void *),
+ void *cbdata)
+{
+ if (glsl_type_is_struct(cur_type)) {
+ for (unsigned i = 0; i < glsl_get_length(cur_type); i++) {
+ const struct glsl_type *ft = glsl_get_struct_field(cur_type, i);
+ cur_offset = traverse_var_component_slots(ctx, vs_in, var, cur_offset, ft, cb, cbdata);
+ }
+ return (cur_offset + 3) / 4 * 4;
+ }
+
+ enum glsl_base_type base_type = glsl_get_base_type(glsl_without_array_or_matrix(cur_type));
+
+ unsigned stride = glsl_get_component_slots(glsl_without_array_or_matrix(cur_type));
+ if (!var->data.compact)
+ stride = (stride + 3) / 4 * 4;
+ unsigned arr_len = MAX2(glsl_get_matrix_columns(cur_type), 1);
+ if (glsl_type_is_array(cur_type))
+ arr_len *= glsl_get_aoa_size(cur_type);
+ for (unsigned i = 0; i < arr_len; i++) {
+ for (unsigned j = 0; j < glsl_get_component_slots(glsl_without_array_or_matrix(cur_type)); j++) {
+ cb(ctx, cur_offset + var->data.location_frac + j, base_type, cbdata);
+ }
+ cur_offset += stride;
+ }
+ return cur_offset;
+}
+
+static void setup_output_type(struct ac_llvm_context *ctx, unsigned index, enum glsl_base_type base, void *output_types)
+{
+ LLVMTypeRef type;
+ switch (base) {
+ case GLSL_TYPE_INT8:
+ case GLSL_TYPE_UINT8:
+ type = ctx->i8;
+ break;
+ case GLSL_TYPE_INT16:
+ case GLSL_TYPE_UINT16:
+ type = ctx->i16;
+ break;
+ case GLSL_TYPE_FLOAT16:
+ type = ctx->f16;
+ break;
+ case GLSL_TYPE_INT:
+ case GLSL_TYPE_UINT:
+ case GLSL_TYPE_BOOL:
+ case GLSL_TYPE_INT64:
+ case GLSL_TYPE_UINT64:
+ type = ctx->i32;
+ break;
+ case GLSL_TYPE_FLOAT:
+ case GLSL_TYPE_DOUBLE:
+ default:
+ type = ctx->f32;
+ break;
+ }
+ ((LLVMTypeRef*)output_types)[index] = type;
+}
+
void
ac_handle_shader_output_decl(struct ac_llvm_context *ctx,
struct ac_shader_abi *abi,
@@ -3954,6 +4016,9 @@ ac_handle_shader_output_decl(struct ac_llvm_context *ctx,
ac_build_alloca_undef(ctx, type, "");
}
}
+
+ traverse_var_component_slots(ctx, false, variable, output_loc * 4,
+ variable->type, &setup_output_type, abi->output_types);
}

static LLVMTypeRef
@@ -4077,6 +4142,9 @@ void ac_nir_translate(struct ac_llvm_context *ac, struct ac_shader_abi *abi,

ctx.main_function = LLVMGetBasicBlockParent(LLVMGetInsertBlock(ctx.ac.builder));

+ for (unsigned i = 0; i < AC_LLVM_MAX_OUTPUTS * 4; i++)
+ ctx.abi->output_types[i] = ac->i32;
+
nir_foreach_variable(variable, &nir->outputs)
ac_handle_shader_output_decl(&ctx.ac, ctx.abi, nir, variable,
ctx.stage);
diff --git a/src/amd/common/ac_shader_abi.h b/src/amd/common/ac_shader_abi.h
index 6b9a91c92a..1d078fc42d 100644
--- a/src/amd/common/ac_shader_abi.h
+++ b/src/amd/common/ac_shader_abi.h
@@ -69,6 +69,7 @@ struct ac_shader_abi {
LLVMValueRef view_index;

LLVMValueRef outputs[AC_LLVM_MAX_OUTPUTS * 4];
+ LLVMTypeRef output_types[AC_LLVM_MAX_OUTPUTS * 4];

/* For VS and PS: pre-loaded shader inputs.
*
diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c
index 90bcc8dbfe..f114a86018 100644
--- a/src/amd/vulkan/radv_nir_to_llvm.c
+++ b/src/amd/vulkan/radv_nir_to_llvm.c
@@ -3945,6 +3945,9 @@ radv_compile_gs_copy_shader(struct ac_llvm_compiler *ac_llvm,
ctx.gs_max_out_vertices = geom_shader->info.gs.vertices_out;
ac_setup_rings(&ctx);

+ for (unsigned i = 0; i < AC_LLVM_MAX_OUTPUTS * 4; i++)
+ ctx.abi.output_types[i] = ctx.ac.i32;
+
nir_foreach_variable(variable, &geom_shader->outputs) {
scan_shader_output_decl(&ctx, variable, geom_shader, MESA_SHADER_VERTEX);
ac_handle_shader_output_decl(&ctx.ac, &ctx.abi, geom_shader,
--
2.19.2
Rhys Perry
2018-12-07 17:22:21 UTC
Permalink
Signed-off-by: Rhys Perry <***@gmail.com>
---
src/amd/common/ac_llvm_build.c | 26 ++++++--------------------
1 file changed, 6 insertions(+), 20 deletions(-)

diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index 3990a1f56d..68ea6078d3 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -2377,28 +2377,14 @@ LLVMValueRef ac_build_bit_count(struct ac_llvm_context *ctx, LLVMValueRef src0)
LLVMValueRef ac_build_bitfield_reverse(struct ac_llvm_context *ctx,
LLVMValueRef src0)
{
- LLVMValueRef result;
- unsigned bitsize;
-
- bitsize = ac_get_elem_bits(ctx, LLVMTypeOf(src0));
+ unsigned bitsize = ac_get_elem_bits(ctx, LLVMTypeOf(src0));

- switch (bitsize) {
- case 32:
- result = ac_build_intrinsic(ctx, "llvm.bitreverse.i32", ctx->i32,
- (LLVMValueRef []) { src0 }, 1,
- AC_FUNC_ATTR_READNONE);
- break;
- case 16:
- result = ac_build_intrinsic(ctx, "llvm.bitreverse.i16", ctx->i16,
- (LLVMValueRef []) { src0 }, 1,
- AC_FUNC_ATTR_READNONE);
- break;
- default:
- unreachable(!"invalid bitsize");
- break;
- }
+ char name[64];
+ snprintf(name, sizeof(name), "llvm.bitreverse.i%d", bitsize);

- return result;
+ return ac_build_intrinsic(ctx, name, LLVMTypeOf(src0),
+ (LLVMValueRef []) { src0 }, 1,
+ AC_FUNC_ATTR_READNONE);
}

#define AC_EXP_TARGET 0
--
2.19.2
Rhys Perry
2018-12-07 17:22:31 UTC
Permalink
Signed-off-by: Rhys Perry <***@gmail.com>
---
src/amd/vulkan/radv_device.c | 17 +++++++++++++++++
src/amd/vulkan/radv_extensions.py | 4 ++++
src/amd/vulkan/radv_shader.c | 3 +++
3 files changed, 24 insertions(+)

diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
index ad057a8750..8444651a84 100644
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -848,6 +848,23 @@ void radv_GetPhysicalDeviceFeatures2(
features->geometryStreams = true;
break;
}
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT16_INT8_FEATURES_KHR: {
+ VkPhysicalDeviceFloat16Int8FeaturesKHR *features =
+ (VkPhysicalDeviceFloat16Int8FeaturesKHR*)ext;
+ bool enabled = pdevice->rad_info.chip_class >= VI;
+ features->shaderFloat16 = enabled && HAVE_LLVM >= 0x0800;
+ features->shaderInt8 = enabled;
+ break;
+ }
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_8BIT_STORAGE_FEATURES_KHR: {
+ VkPhysicalDevice8BitStorageFeaturesKHR *features =
+ (VkPhysicalDevice8BitStorageFeaturesKHR*)ext;
+ bool enabled = pdevice->rad_info.chip_class >= VI;
+ features->storageBuffer8BitAccess = enabled;
+ features->uniformAndStorageBuffer8BitAccess = enabled;
+ features->storagePushConstant8 = enabled;
+ break;
+ }
default:
break;
}
diff --git a/src/amd/vulkan/radv_extensions.py b/src/amd/vulkan/radv_extensions.py
index 6bdf988d11..62c58e98af 100644
--- a/src/amd/vulkan/radv_extensions.py
+++ b/src/amd/vulkan/radv_extensions.py
@@ -91,6 +91,8 @@ EXTENSIONS = [
Extension('VK_KHR_xlib_surface', 6, 'VK_USE_PLATFORM_XLIB_KHR'),
Extension('VK_KHR_multiview', 1, True),
Extension('VK_KHR_display', 23, 'VK_USE_PLATFORM_DISPLAY_KHR'),
+ Extension('VK_KHR_shader_float16_int8', 1, 'device->rad_info.chip_class >= VI'),
+ Extension('VK_KHR_8bit_storage', 1, 'device->rad_info.chip_class >= VI'),
Extension('VK_EXT_direct_mode_display', 1, 'VK_USE_PLATFORM_DISPLAY_KHR'),
Extension('VK_EXT_acquire_xlib_display', 1, 'VK_USE_PLATFORM_XLIB_XRANDR_EXT'),
Extension('VK_EXT_calibrated_timestamps', 1, True),
@@ -117,6 +119,8 @@ EXTENSIONS = [
Extension('VK_AMD_shader_core_properties', 1, True),
Extension('VK_AMD_shader_info', 1, True),
Extension('VK_AMD_shader_trinary_minmax', 1, True),
+ Extension('VK_AMD_gpu_shader_half_float', 1, 'device->rad_info.chip_class >= VI && HAVE_LLVM >= 0x0800'),
+ Extension('VK_AMD_gpu_shader_int16', 1, 'device->rad_info.chip_class >= VI'),
Extension('VK_GOOGLE_decorate_string', 1, True),
Extension('VK_GOOGLE_hlsl_functionality1', 1, True),
]
diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
index a2ddf17680..921b9669f0 100644
--- a/src/amd/vulkan/radv_shader.c
+++ b/src/amd/vulkan/radv_shader.c
@@ -246,6 +246,9 @@ radv_shader_compile_to_nir(struct radv_device *device,
.storage_16bit = true,
.geometry_streams = true,
.transform_feedback = true,
+ .float16 = true,
+ .storage_8bit = true,
+ .int8 = true,
},
};
entry_point = spirv_to_nir(spirv, module->size / 4,
--
2.19.2
Rhys Perry
2018-12-07 17:22:13 UTC
Permalink
The lowering needs to be disabled for sufficient precision to pass
deqp-vk's 16-bit fma test on radv.

Signed-off-by: Rhys Perry <***@gmail.com>
---
src/broadcom/compiler/nir_to_vir.c | 1 +
src/compiler/nir/nir.h | 1 +
src/compiler/nir/nir_opt_algebraic.py | 4 +++-
src/gallium/drivers/radeonsi/si_get.c | 1 +
src/gallium/drivers/vc4/vc4_program.c | 1 +
5 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/src/broadcom/compiler/nir_to_vir.c b/src/broadcom/compiler/nir_to_vir.c
index 57be43d724..ec73ed269d 100644
--- a/src/broadcom/compiler/nir_to_vir.c
+++ b/src/broadcom/compiler/nir_to_vir.c
@@ -1975,6 +1975,7 @@ const nir_shader_compiler_options v3d_nir_options = {
.lower_fdiv = true,
.lower_find_lsb = true,
.lower_ffma = true,
+ .lower_ffma16 = true,
.lower_flrp32 = true,
.lower_fpow = true,
.lower_fsat = true,
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index e731653afc..0e40d6a97d 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -2072,6 +2072,7 @@ typedef struct nir_function {

typedef struct nir_shader_compiler_options {
bool lower_fdiv;
+ bool lower_ffma16;
bool lower_ffma;
bool fuse_ffma;
bool lower_flrp16;
diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py
index e3821320e8..c2a8b61d92 100644
--- a/src/compiler/nir/nir_opt_algebraic.py
+++ b/src/compiler/nir/nir_opt_algebraic.py
@@ -133,7 +133,9 @@ optimizations = [
(('~fadd', a, ('fmul', ('b2f', c), ('fadd', b, ('fneg', a)))), ('bcsel', c, b, a), 'options->lower_flrp32'),
(('~***@32', a, ('fmul', c , ('fadd', b, ('fneg', a)))), ('flrp', a, b, c), '!options->lower_flrp32'),
(('~***@64', a, ('fmul', c , ('fadd', b, ('fneg', a)))), ('flrp', a, b, c), '!options->lower_flrp64'),
- (('ffma', a, b, c), ('fadd', ('fmul', a, b), c), 'options->lower_ffma'),
+ (('***@16', a, b, c), ('fadd', ('fmul', a, b), c), 'options->lower_ffma16'),
+ (('***@32', a, b, c), ('fadd', ('fmul', a, b), c), 'options->lower_ffma'),
+ (('***@64', a, b, c), ('fadd', ('fmul', a, b), c), 'options->lower_ffma'),
(('~fadd', ('fmul', a, b), c), ('ffma', a, b, c), 'options->fuse_ffma'),

(('fdot4', ('vec4', a, b, c, 1.0), d), ('fdph', ('vec3', a, b, c), d)),
diff --git a/src/gallium/drivers/radeonsi/si_get.c b/src/gallium/drivers/radeonsi/si_get.c
index 91f38329d5..d295821d65 100644
--- a/src/gallium/drivers/radeonsi/si_get.c
+++ b/src/gallium/drivers/radeonsi/si_get.c
@@ -497,6 +497,7 @@ static const struct nir_shader_compiler_options nir_options = {
.lower_fdiv = true,
.lower_sub = true,
.lower_ffma = true,
+ .lower_ffma16 = true,
.lower_pack_snorm_2x16 = true,
.lower_pack_snorm_4x8 = true,
.lower_pack_unorm_2x16 = true,
diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c
index b98baca30c..9e8c6607cc 100644
--- a/src/gallium/drivers/vc4/vc4_program.c
+++ b/src/gallium/drivers/vc4/vc4_program.c
@@ -2234,6 +2234,7 @@ static const nir_shader_compiler_options nir_options = {
.lower_extract_word = true,
.lower_fdiv = true,
.lower_ffma = true,
+ .lower_ffma16 = true,
.lower_flrp32 = true,
.lower_fpow = true,
.lower_fsat = true,
--
2.19.2
Loading...