[Mesa-dev] [PATCH 07/11] intel/compiler: implement nir_instr_type_load

Discussion:

[Mesa-dev] [PATCH 07/11] intel/compiler: implement nir_instr_type_load_const for 16-bit constants

Iago Toral Quiroga

2018-04-11 07:20:30 UTC

From: Jose Maria Casanova Crespo <***@igalia.com>

---
src/intel/compiler/brw_fs_nir.cpp | 5 +++++
1 file changed, 5 insertions(+)

diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp
index ad31f7c82dc..822a1ac4227 100644
--- a/src/intel/compiler/brw_fs_nir.cpp
+++ b/src/intel/compiler/brw_fs_nir.cpp
@@ -1509,6 +1509,11 @@ fs_visitor::nir_emit_load_const(const fs_builder &bld,
fs_reg reg = bld.vgrf(reg_type, instr->def.num_components);

switch (instr->def.bit_size) {
+ case 16:
+ for (unsigned i = 0; i < instr->def.num_components; i++)
+ bld.MOV(offset(reg, bld, i), brw_imm_w(instr->value.i16[i]));
+ break;
+
case 32:
for (unsigned i = 0; i < instr->def.num_components; i++)
bld.MOV(offset(reg, bld, i), brw_imm_d(instr->value.i32[i]));

--
2.14.1

Iago Toral Quiroga

2018-04-11 07:20:26 UTC

Permalink

The lowering pass was specialized to act on 64-bit to 32-bit conversions only,
but the implementation is valid for other cases.
---
src/intel/compiler/brw_fs_lower_conversions.cpp | 5 ++++-
src/intel/compiler/brw_fs_nir.cpp | 14 +++-----------
2 files changed, 7 insertions(+), 12 deletions(-)

diff --git a/src/intel/compiler/brw_fs_lower_conversions.cpp b/src/intel/compiler/brw_fs_lower_conversions.cpp
index 663c9674c49..f95b39d3e86 100644
--- a/src/intel/compiler/brw_fs_lower_conversions.cpp
+++ b/src/intel/compiler/brw_fs_lower_conversions.cpp
@@ -54,7 +54,7 @@ fs_visitor::lower_conversions()
bool saturate = inst->saturate;

if (supports_type_conversion(inst)) {
- if (get_exec_type_size(inst) == 8 && type_sz(inst->dst.type) < 8) {
+ if (type_sz(inst->dst.type) < get_exec_type_size(inst)) {
/* From the Broadwell PRM, 3D Media GPGPU, "Double Precision Float to
* Single Precision Float":
*
@@ -64,6 +64,9 @@ fs_visitor::lower_conversions()
* So we need to allocate a temporary that's two registers, and then do
* a strided MOV to get the lower DWord of every Qword that has the
* result.
+ *
+ * This restriction applies, in general, whenever we convert to
+ * a type with a smaller bit-size.
*/
fs_reg temp = ibld.vgrf(get_exec_type(inst));
fs_reg strided_temp = subscript(temp, dst.type, 0);
diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp
index f40a3540e31..5e0dd37eefd 100644
--- a/src/intel/compiler/brw_fs_nir.cpp
+++ b/src/intel/compiler/brw_fs_nir.cpp
@@ -753,19 +753,9 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
*/

case nir_op_f2f16_undef:
- case nir_op_i2i16:
- case nir_op_u2u16: {
- /* TODO: Fixing aligment rules for conversions from 32-bits to
- * 16-bit types should be moved to lower_conversions
- */
- fs_reg tmp = bld.vgrf(op[0].type, 1);
- tmp = subscript(tmp, result.type, 0);
- inst = bld.MOV(tmp, op[0]);
- inst->saturate = instr->dest.saturate;
- inst = bld.MOV(result, tmp);
+ inst = bld.MOV(result, op[0]);
inst->saturate = instr->dest.saturate;
break;
- }

case nir_op_f2f64:
case nir_op_f2i64:
@@ -803,6 +793,8 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
case nir_op_f2u32:
case nir_op_i2i32:
case nir_op_u2u32:
+ case nir_op_i2i16:
+ case nir_op_u2u16:
inst = bld.MOV(result, op[0]);
inst->saturate = instr->dest.saturate;
break;

--
2.14.1

Jason Ekstrand

2018-04-24 14:58:30 UTC

Permalink

Post by Iago Toral Quiroga
The lowering pass was specialized to act on 64-bit to 32-bit conversions only,
but the implementation is valid for other cases.
---
src/intel/compiler/brw_fs_lower_conversions.cpp | 5 ++++-
src/intel/compiler/brw_fs_nir.cpp | 14 +++-----------
2 files changed, 7 insertions(+), 12 deletions(-)
diff --git a/src/intel/compiler/brw_fs_lower_conversions.cpp
b/src/intel/compiler/brw_fs_lower_conversions.cpp
index 663c9674c49..f95b39d3e86 100644
--- a/src/intel/compiler/brw_fs_lower_conversions.cpp
+++ b/src/intel/compiler/brw_fs_lower_conversions.cpp
@@ -54,7 +54,7 @@ fs_visitor::lower_conversions()
bool saturate = inst->saturate;
if (supports_type_conversion(inst)) {
- if (get_exec_type_size(inst) == 8 && type_sz(inst->dst.type) < 8) {
+ if (type_sz(inst->dst.type) < get_exec_type_size(inst)) {
/* From the Broadwell PRM, 3D Media GPGPU, "Double Precision Float to
*
@@ -64,6 +64,9 @@ fs_visitor::lower_conversions()
* So we need to allocate a temporary that's two registers, and then do
* a strided MOV to get the lower DWord of every Qword that has the
* result.
+ *
+ * This restriction applies, in general, whenever we convert to
+ * a type with a smaller bit-size.
*/
fs_reg temp = ibld.vgrf(get_exec_type(inst));
fs_reg strided_temp = subscript(temp, dst.type, 0);
diff --git a/src/intel/compiler/brw_fs_nir.cpp
b/src/intel/compiler/brw_fs_nir.cpp
index f40a3540e31..5e0dd37eefd 100644
--- a/src/intel/compiler/brw_fs_nir.cpp
+++ b/src/intel/compiler/brw_fs_nir.cpp
@@ -753,19 +753,9 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
*/
- case nir_op_u2u16: {
- /* TODO: Fixing aligment rules for conversions from 32-bits to
- * 16-bit types should be moved to lower_conversions
- */
- fs_reg tmp = bld.vgrf(op[0].type, 1);
- tmp = subscript(tmp, result.type, 0);
- inst = bld.MOV(tmp, op[0]);
- inst->saturate = instr->dest.saturate;
- inst = bld.MOV(result, tmp);
+ inst = bld.MOV(result, op[0]);
inst->saturate = instr->dest.saturate;
break;

It appears to me that we can move f2f16_undef to the block below as well.
Without or without that,

Post by Iago Toral Quiroga
- }
@@ -803,6 +793,8 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
inst = bld.MOV(result, op[0]);
inst->saturate = instr->dest.saturate;
break;
--
2.14.1
_______________________________________________
mesa-dev mailing list
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Iago Toral

2018-04-25 06:04:02 UTC

Permalink

Post by Jason Ekstrand

Post by Iago Toral Quiroga
dst.type) < 8) {

+ if (type_sz(inst->dst.type) < get_exec_type_size(inst)) {
/* From the Broadwell PRM, 3D Media GPGPU, "Double Precision Float to
*
@@ -64,6 +64,9 @@ fs_visitor::lower_conversions()
* So we need to allocate a temporary that's two registers, and then do
* a strided MOV to get the lower DWord of every Qword that has the
* result.
+ *
+ * This restriction applies, in general, whenever we convert to
+ * a type with a smaller bit-size.
*/
fs_reg temp = ibld.vgrf(get_exec_type(inst));
fs_reg strided_temp = subscript(temp, dst.type, 0);
diff --git a/src/intel/compiler/brw_fs_nir.cpp
b/src/intel/compiler/brw_fs_nir.cpp
index f40a3540e31..5e0dd37eefd 100644
--- a/src/intel/compiler/brw_fs_nir.cpp
+++ b/src/intel/compiler/brw_fs_nir.cpp
@@ -753,19 +753,9 @@ fs_visitor::nir_emit_alu(const fs_builder
&bld, nir_alu_instr *instr)
*/
- case nir_op_u2u16: {
- /* TODO: Fixing aligment rules for conversions from 32-bits to
- * 16-bit types should be moved to lower_conversions
- */
- fs_reg tmp = bld.vgrf(op[0].type, 1);
- tmp = subscript(tmp, result.type, 0);
- inst = bld.MOV(tmp, op[0]);
- inst->saturate = instr->dest.saturate;
- inst = bld.MOV(result, tmp);
+ inst = bld.MOV(result, op[0]);
inst->saturate = instr->dest.saturate;
break;

It appears to me that we can move f2f16_undef to the block below as
well. Without or without that,

f2f16_undef is the fallthough for the other f2f16 cases (the ones that
handle rounding modes) and the cases we are grouping here are also
falltrough cases for other things, so if we moves it here we'd need to
replicate the code again for the other f2f16 cases anyway.

Post by Jason Ekstrand

Post by Iago Toral Quiroga
- }
@@ -803,6 +793,8 @@ fs_visitor::nir_emit_alu(const fs_builder &bld,
nir_alu_instr *instr)
inst = bld.MOV(result, op[0]);
inst->saturate = instr->dest.saturate;
break;
--
2.14.1
_______________________________________________
mesa-dev mailing list
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Jason Ekstrand

2018-04-25 12:42:09 UTC

Permalink

On April 25, 2018 02:04:03 Iago Toral <***@igalia.com> wrote:
On Tue, 2018-04-24 at 07:58 -0700, Jason Ekstrand wrote:
On Wed, Apr 11, 2018 at 12:20 AM, Iago Toral Quiroga <***@igalia.com> wrote:

The lowering pass was specialized to act on 64-bit to 32-bit conversions only,
but the implementation is valid for other cases.
---
src/intel/compiler/brw_fs_lower_conversions.cpp | 5 ++++-
src/intel/compiler/brw_fs_nir.cpp | 14 +++-----------
2 files changed, 7 insertions(+), 12 deletions(-)

diff --git a/src/intel/compiler/brw_fs_lower_conversions.cpp
b/src/intel/compiler/brw_fs_lower_conversions.cpp
index 663c9674c49..f95b39d3e86 100644
--- a/src/intel/compiler/brw_fs_lower_conversions.cpp
+++ b/src/intel/compiler/brw_fs_lower_conversions.cpp
@@ -54,7 +54,7 @@ fs_visitor::lower_conversions()
bool saturate = inst->saturate;

if (supports_type_conversion(inst)) {
- if (get_exec_type_size(inst) == 8 && type_sz(inst->dst.type) < 8) {
+ if (type_sz(inst->dst.type) < get_exec_type_size(inst)) {
/* From the Broadwell PRM, 3D Media GPGPU, "Double Precision Float to
* Single Precision Float":
*
@@ -64,6 +64,9 @@ fs_visitor::lower_conversions()
* So we need to allocate a temporary that's two registers, and then do
* a strided MOV to get the lower DWord of every Qword that has the
* result.
+ *
+ * This restriction applies, in general, whenever we convert to
+ * a type with a smaller bit-size.
*/
fs_reg temp = ibld.vgrf(get_exec_type(inst));
fs_reg strided_temp = subscript(temp, dst.type, 0);
diff --git a/src/intel/compiler/brw_fs_nir.cpp
b/src/intel/compiler/brw_fs_nir.cpp
index f40a3540e31..5e0dd37eefd 100644
--- a/src/intel/compiler/brw_fs_nir.cpp
+++ b/src/intel/compiler/brw_fs_nir.cpp
@@ -753,19 +753,9 @@ fs_visitor::nir_emit_alu(const fs_builder &bld,
nir_alu_instr *instr)
*/

case nir_op_f2f16_undef:
- case nir_op_i2i16:
- case nir_op_u2u16: {
- /* TODO: Fixing aligment rules for conversions from 32-bits to
- * 16-bit types should be moved to lower_conversions
- */
- fs_reg tmp = bld.vgrf(op[0].type, 1);
- tmp = subscript(tmp, result.type, 0);
- inst = bld.MOV(tmp, op[0]);
- inst->saturate = instr->dest.saturate;
- inst = bld.MOV(result, tmp);
+ inst = bld.MOV(result, op[0]);
inst->saturate = instr->dest.saturate;
break;

It appears to me that we can move f2f16_undef to the block below as well.
Without or without that,

f2f16_undef is the fallthough for the other f2f16 cases (the ones that
handle rounding modes) and the cases we are grouping here are also
falltrough cases for other things, so if we moves it here we'd need to
replicate the code again for the other f2f16 cases anyway.

Ok, that's reasonable.

Reviewed-by: Jason Ekstrand <***@jlekstrand.net>

- }

case nir_op_f2f64:
case nir_op_f2i64:
@@ -803,6 +793,8 @@ fs_visitor::nir_emit_alu(const fs_builder &bld,
nir_alu_instr *instr)
case nir_op_f2u32:
case nir_op_i2i32:
case nir_op_u2u32:
+ case nir_op_i2i16:
+ case nir_op_u2u16:
inst = bld.MOV(result, op[0]);
inst->saturate = instr->dest.saturate;
break;
--
2.14.1

Iago Toral Quiroga

2018-04-11 07:20:24 UTC

Permalink

The hardware doesn't support 16-bit integer types, so we need to implement
these using 32-bit integer instructions and then convert the result back
to 16-bit.
---
src/intel/Makefile.sources | 1 +
src/intel/compiler/brw_nir.c | 2 +
src/intel/compiler/brw_nir.h | 2 +
src/intel/compiler/brw_nir_lower_16bit_int_math.c | 108 ++++++++++++++++++++++
src/intel/compiler/meson.build | 1 +
5 files changed, 114 insertions(+)
create mode 100644 src/intel/compiler/brw_nir_lower_16bit_int_math.c

diff --git a/src/intel/Makefile.sources b/src/intel/Makefile.sources
index 91c71a8dfaf..2cd76961ea4 100644
--- a/src/intel/Makefile.sources
+++ b/src/intel/Makefile.sources
@@ -79,6 +79,7 @@ COMPILER_FILES = \
compiler/brw_nir_analyze_boolean_resolves.c \
compiler/brw_nir_analyze_ubo_ranges.c \
compiler/brw_nir_attribute_workarounds.c \
+ compiler/brw_nir_lower_16bit_int_math.c \
compiler/brw_nir_lower_cs_intrinsics.c \
compiler/brw_nir_opt_peephole_ffma.c \
compiler/brw_nir_tcs_workarounds.c \
diff --git a/src/intel/compiler/brw_nir.c b/src/intel/compiler/brw_nir.c
index 69ab162f888..2e5754076ed 100644
--- a/src/intel/compiler/brw_nir.c
+++ b/src/intel/compiler/brw_nir.c
@@ -638,6 +638,8 @@ brw_preprocess_nir(const struct brw_compiler *compiler, nir_shader *nir)
nir_lower_isign64 |
nir_lower_divmod64);

+ brw_nir_lower_16bit_int_math(nir);
+
nir = brw_nir_optimize(nir, compiler, is_scalar);

if (is_scalar) {
diff --git a/src/intel/compiler/brw_nir.h b/src/intel/compiler/brw_nir.h
index 03f52da08e5..6ba1a8bc654 100644
--- a/src/intel/compiler/brw_nir.h
+++ b/src/intel/compiler/brw_nir.h
@@ -152,6 +152,8 @@ void brw_nir_analyze_ubo_ranges(const struct brw_compiler *compiler,

bool brw_nir_opt_peephole_ffma(nir_shader *shader);

+bool brw_nir_lower_16bit_int_math(nir_shader *shader);
+
nir_shader *brw_nir_optimize(nir_shader *nir,
const struct brw_compiler *compiler,
bool is_scalar);
diff --git a/src/intel/compiler/brw_nir_lower_16bit_int_math.c b/src/intel/compiler/brw_nir_lower_16bit_int_math.c
new file mode 100644
index 00000000000..6876309a822
--- /dev/null
+++ b/src/intel/compiler/brw_nir_lower_16bit_int_math.c
@@ -0,0 +1,108 @@
+/*
+ * Copyright © 2018 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "brw_nir.h"
+#include "nir_builder.h"
+
+/**
+ * Intel hardware doesn't support 16-bit integer Math instructions so this
+ * pass implements them in 32-bit and then converts the result back to 16-bit.
+ */
+static void
+lower_math_instr(nir_builder *bld, nir_alu_instr *alu, bool is_signed)
+{
+ const nir_op op = alu->op;
+
+ bld->cursor = nir_before_instr(&alu->instr);
+
+ nir_ssa_def *srcs_32[4] = { NULL, NULL, NULL, NULL };
+ const uint32_t num_inputs = nir_op_infos[op].num_inputs;
+ for (uint32_t i = 0; i < num_inputs; i++) {
+ nir_ssa_def *src = nir_ssa_for_alu_src(bld, alu, i);
+ srcs_32[i] = is_signed ? nir_i2i32(bld, src) : nir_u2u32(bld, src);
+ }
+
+ nir_ssa_def *dst_32 =
+ nir_build_alu(bld, op, srcs_32[0], srcs_32[1], srcs_32[2], srcs_32[3]);
+
+ nir_ssa_def *dst_16 =
+ is_signed ? nir_i2i16(bld, dst_32) : nir_u2u16(bld, dst_32);
+
+ nir_ssa_def_rewrite_uses(&alu->dest.dest.ssa, nir_src_for_ssa(dst_16));
+}
+
+static bool
+lower_instr(nir_builder *bld, nir_alu_instr *alu)
+{
+ assert(alu->dest.dest.is_ssa);
+ if (alu->dest.dest.ssa.bit_size != 16)
+ return false;
+
+ bool is_signed = false;
+ switch (alu->op) {
+ case nir_op_idiv:
+ case nir_op_imod:
+ is_signed = true;
+ /* Fallthrough */
+ case nir_op_udiv:
+ case nir_op_umod:
+ case nir_op_irem:
+ lower_math_instr(bld, alu, is_signed);
+ return true;
+ default:
+ return false;
+ }
+}
+
+static bool
+lower_impl(nir_function_impl *impl)
+{
+ nir_builder b;
+ nir_builder_init(&b, impl);
+ bool progress = false;
+
+ nir_foreach_block(block, impl) {
+ nir_foreach_instr_safe(instr, block) {
+ if (instr->type == nir_instr_type_alu)
+ progress |= lower_instr(&b, nir_instr_as_alu(instr));
+ }
+ }
+
+ nir_metadata_preserve(impl, nir_metadata_block_index |
+ nir_metadata_dominance);
+
+ return progress;
+}
+
+bool
+brw_nir_lower_16bit_int_math(nir_shader *shader)
+{
+ bool progress = false;
+
+ nir_foreach_function(function, shader) {
+ if (function->impl)
+ progress |= lower_impl(function->impl);
+ }
+
+ return progress;
+}
diff --git a/src/intel/compiler/meson.build b/src/intel/compiler/meson.build
index 72b7a6796cb..d80fcd6e31b 100644
--- a/src/intel/compiler/meson.build
+++ b/src/intel/compiler/meson.build
@@ -76,6 +76,7 @@ libintel_compiler_files = files(
'brw_nir_analyze_boolean_resolves.c',
'brw_nir_analyze_ubo_ranges.c',
'brw_nir_attribute_workarounds.c',
+ 'brw_nir_lower_16bit_int_math.c',
'brw_nir_lower_cs_intrinsics.c',
'brw_nir_opt_peephole_ffma.c',
'brw_nir_tcs_workarounds.c',

--
2.14.1

Jason Ekstrand

2018-04-24 13:56:26 UTC

Permalink

It may be useful to just use nir_algebraic for this. We already do for
trig workarounds. It's more painful from a build-system perspective but,
in general, the fewer hand-rolled algebraic lowering passes we have, the
better.

Post by Iago Toral Quiroga
The hardware doesn't support 16-bit integer types, so we need to implement
these using 32-bit integer instructions and then convert the result back
to 16-bit.
---
src/intel/Makefile.sources | 1 +
src/intel/compiler/brw_nir.c | 2 +
src/intel/compiler/brw_nir.h | 2 +
src/intel/compiler/brw_nir_lower_16bit_int_math.c | 108
++++++++++++++++++++++
src/intel/compiler/meson.build | 1 +
5 files changed, 114 insertions(+)
create mode 100644 src/intel/compiler/brw_nir_lower_16bit_int_math.c
diff --git a/src/intel/Makefile.sources b/src/intel/Makefile.sources
index 91c71a8dfaf..2cd76961ea4 100644
--- a/src/intel/Makefile.sources
+++ b/src/intel/Makefile.sources
@@ -79,6 +79,7 @@ COMPILER_FILES = \
compiler/brw_nir_analyze_boolean_resolves.c \
compiler/brw_nir_analyze_ubo_ranges.c \
compiler/brw_nir_attribute_workarounds.c \
+ compiler/brw_nir_lower_16bit_int_math.c \
compiler/brw_nir_lower_cs_intrinsics.c \
compiler/brw_nir_opt_peephole_ffma.c \
compiler/brw_nir_tcs_workarounds.c \
diff --git a/src/intel/compiler/brw_nir.c b/src/intel/compiler/brw_nir.c
index 69ab162f888..2e5754076ed 100644
--- a/src/intel/compiler/brw_nir.c
+++ b/src/intel/compiler/brw_nir.c
@@ -638,6 +638,8 @@ brw_preprocess_nir(const struct brw_compiler
*compiler, nir_shader *nir)
nir_lower_isign64 |
nir_lower_divmod64);
+ brw_nir_lower_16bit_int_math(nir);
+
nir = brw_nir_optimize(nir, compiler, is_scalar);
if (is_scalar) {
diff --git a/src/intel/compiler/brw_nir.h b/src/intel/compiler/brw_nir.h
index 03f52da08e5..6ba1a8bc654 100644
--- a/src/intel/compiler/brw_nir.h
+++ b/src/intel/compiler/brw_nir.h
@@ -152,6 +152,8 @@ void brw_nir_analyze_ubo_ranges(const struct brw_compiler *compiler,
bool brw_nir_opt_peephole_ffma(nir_shader *shader);
+bool brw_nir_lower_16bit_int_math(nir_shader *shader);
+
nir_shader *brw_nir_optimize(nir_shader *nir,
const struct brw_compiler *compiler,
bool is_scalar);
diff --git a/src/intel/compiler/brw_nir_lower_16bit_int_math.c
b/src/intel/compiler/brw_nir_lower_16bit_int_math.c
new file mode 100644
index 00000000000..6876309a822
--- /dev/null
+++ b/src/intel/compiler/brw_nir_lower_16bit_int_math.c
@@ -0,0 +1,108 @@
+/*
+ * Copyright Â© 2018 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "brw_nir.h"
+#include "nir_builder.h"
+
+/**
+ * Intel hardware doesn't support 16-bit integer Math instructions so this
+ * pass implements them in 32-bit and then converts the result back to 16-bit.
+ */
+static void
+lower_math_instr(nir_builder *bld, nir_alu_instr *alu, bool is_signed)
+{
+ const nir_op op = alu->op;
+
+ bld->cursor = nir_before_instr(&alu->instr);
+
+ nir_ssa_def *srcs_32[4] = { NULL, NULL, NULL, NULL };
+ const uint32_t num_inputs = nir_op_infos[op].num_inputs;
+ for (uint32_t i = 0; i < num_inputs; i++) {
+ nir_ssa_def *src = nir_ssa_for_alu_src(bld, alu, i);
+ srcs_32[i] = is_signed ? nir_i2i32(bld, src) : nir_u2u32(bld, src);
+ }
+
+ nir_ssa_def *dst_32 =
+ nir_build_alu(bld, op, srcs_32[0], srcs_32[1], srcs_32[2], srcs_32[3]);
+
+ nir_ssa_def *dst_16 =
+ is_signed ? nir_i2i16(bld, dst_32) : nir_u2u16(bld, dst_32);
+
+ nir_ssa_def_rewrite_uses(&alu->dest.dest.ssa,
nir_src_for_ssa(dst_16));
+}
+
+static bool
+lower_instr(nir_builder *bld, nir_alu_instr *alu)
+{
+ assert(alu->dest.dest.is_ssa);
+ if (alu->dest.dest.ssa.bit_size != 16)
+ return false;
+
+ bool is_signed = false;
+ switch (alu->op) {
+ is_signed = true;

You can get is_signed from nit_op_infos

Post by Iago Toral Quiroga
+ /* Fallthrough */

How is irem unsigned?

Post by Iago Toral Quiroga
+ lower_math_instr(bld, alu, is_signed);
+ return true;
+ return false;
+ }
+}
+
+static bool
+lower_impl(nir_function_impl *impl)
+{
+ nir_builder b;
+ nir_builder_init(&b, impl);
+ bool progress = false;
+
+ nir_foreach_block(block, impl) {
+ nir_foreach_instr_safe(instr, block) {
+ if (instr->type == nir_instr_type_alu)
+ progress |= lower_instr(&b, nir_instr_as_alu(instr));
+ }
+ }
+
+ nir_metadata_preserve(impl, nir_metadata_block_index |
+ nir_metadata_dominance);
+
+ return progress;
+}
+
+bool
+brw_nir_lower_16bit_int_math(nir_shader *shader)
+{
+ bool progress = false;
+
+ nir_foreach_function(function, shader) {
+ if (function->impl)
+ progress |= lower_impl(function->impl);
+ }
+
+ return progress;
+}
diff --git a/src/intel/compiler/meson.build b/src/intel/compiler/meson.
build
index 72b7a6796cb..d80fcd6e31b 100644
--- a/src/intel/compiler/meson.build
+++ b/src/intel/compiler/meson.build
@@ -76,6 +76,7 @@ libintel_compiler_files = files(
'brw_nir_analyze_boolean_resolves.c',
'brw_nir_analyze_ubo_ranges.c',
'brw_nir_attribute_workarounds.c',
+ 'brw_nir_lower_16bit_int_math.c',
'brw_nir_lower_cs_intrinsics.c',
'brw_nir_opt_peephole_ffma.c',
'brw_nir_tcs_workarounds.c',
--
2.14.1
_______________________________________________
mesa-dev mailing list
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Rob Clark

2018-04-24 14:38:09 UTC

Permalink

side-note, not sure if it really effects what you are doing here, but
karol ran into some cases, like 8bit signed imax, which needs to be
"lowered" to 16b (or 32b) and converted back for hw that doesn't
support smaller than 16b (or 32b). I think I have the same case with
ir3, which also has 16b but no 8b, (but he is a bit further along cl
cts than I am)..

I think there will be more of this sort of thing coming for more
instructions and for more than just 16b vs 32b. So not sure if
writing rules for each in nir_opt_algebraic.py will be so fun..

BR,
-R

It may be useful to just use nir_algebraic for this. We already do for trig
workarounds. It's more painful from a build-system perspective but, in
general, the fewer hand-rolled algebraic lowering passes we have, the
better.

Post by Iago Toral Quiroga
The hardware doesn't support 16-bit integer types, so we need to implement
these using 32-bit integer instructions and then convert the result back
to 16-bit.
---
src/intel/Makefile.sources | 1 +
src/intel/compiler/brw_nir.c | 2 +
src/intel/compiler/brw_nir.h | 2 +
src/intel/compiler/brw_nir_lower_16bit_int_math.c | 108
++++++++++++++++++++++
src/intel/compiler/meson.build | 1 +
5 files changed, 114 insertions(+)
create mode 100644 src/intel/compiler/brw_nir_lower_16bit_int_math.c
diff --git a/src/intel/Makefile.sources b/src/intel/Makefile.sources
index 91c71a8dfaf..2cd76961ea4 100644
--- a/src/intel/Makefile.sources
+++ b/src/intel/Makefile.sources
@@ -79,6 +79,7 @@ COMPILER_FILES = \
compiler/brw_nir_analyze_boolean_resolves.c \
compiler/brw_nir_analyze_ubo_ranges.c \
compiler/brw_nir_attribute_workarounds.c \
+ compiler/brw_nir_lower_16bit_int_math.c \
compiler/brw_nir_lower_cs_intrinsics.c \
compiler/brw_nir_opt_peephole_ffma.c \
compiler/brw_nir_tcs_workarounds.c \
diff --git a/src/intel/compiler/brw_nir.c b/src/intel/compiler/brw_nir.c
index 69ab162f888..2e5754076ed 100644
--- a/src/intel/compiler/brw_nir.c
+++ b/src/intel/compiler/brw_nir.c
@@ -638,6 +638,8 @@ brw_preprocess_nir(const struct brw_compiler
*compiler, nir_shader *nir)
nir_lower_isign64 |
nir_lower_divmod64);
+ brw_nir_lower_16bit_int_math(nir);
+
nir = brw_nir_optimize(nir, compiler, is_scalar);
if (is_scalar) {
diff --git a/src/intel/compiler/brw_nir.h b/src/intel/compiler/brw_nir.h
index 03f52da08e5..6ba1a8bc654 100644
--- a/src/intel/compiler/brw_nir.h
+++ b/src/intel/compiler/brw_nir.h
@@ -152,6 +152,8 @@ void brw_nir_analyze_ubo_ranges(const struct brw_compiler *compiler,
bool brw_nir_opt_peephole_ffma(nir_shader *shader);
+bool brw_nir_lower_16bit_int_math(nir_shader *shader);
+
nir_shader *brw_nir_optimize(nir_shader *nir,
const struct brw_compiler *compiler,
bool is_scalar);
diff --git a/src/intel/compiler/brw_nir_lower_16bit_int_math.c
b/src/intel/compiler/brw_nir_lower_16bit_int_math.c
new file mode 100644
index 00000000000..6876309a822
--- /dev/null
+++ b/src/intel/compiler/brw_nir_lower_16bit_int_math.c
@@ -0,0 +1,108 @@
+/*
+ * Copyright © 2018 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "brw_nir.h"
+#include "nir_builder.h"
+
+/**
+ * Intel hardware doesn't support 16-bit integer Math instructions so this
+ * pass implements them in 32-bit and then converts the result back to 16-bit.
+ */
+static void
+lower_math_instr(nir_builder *bld, nir_alu_instr *alu, bool is_signed)
+{
+ const nir_op op = alu->op;
+
+ bld->cursor = nir_before_instr(&alu->instr);
+
+ nir_ssa_def *srcs_32[4] = { NULL, NULL, NULL, NULL };
+ const uint32_t num_inputs = nir_op_infos[op].num_inputs;
+ for (uint32_t i = 0; i < num_inputs; i++) {
+ nir_ssa_def *src = nir_ssa_for_alu_src(bld, alu, i);
+ srcs_32[i] = is_signed ? nir_i2i32(bld, src) : nir_u2u32(bld, src);
+ }
+
+ nir_ssa_def *dst_32 =
+ nir_build_alu(bld, op, srcs_32[0], srcs_32[1], srcs_32[2], srcs_32[3]);
+
+ nir_ssa_def *dst_16 =
+ is_signed ? nir_i2i16(bld, dst_32) : nir_u2u16(bld, dst_32);
+
+ nir_ssa_def_rewrite_uses(&alu->dest.dest.ssa,
nir_src_for_ssa(dst_16));
+}
+
+static bool
+lower_instr(nir_builder *bld, nir_alu_instr *alu)
+{
+ assert(alu->dest.dest.is_ssa);
+ if (alu->dest.dest.ssa.bit_size != 16)
+ return false;
+
+ bool is_signed = false;
+ switch (alu->op) {
+ is_signed = true;

You can get is_signed from nit_op_infos

Post by Iago Toral Quiroga
+ /* Fallthrough */

How is irem unsigned?

Post by Iago Toral Quiroga
+ lower_math_instr(bld, alu, is_signed);
+ return true;
+ return false;
+ }
+}
+
+static bool
+lower_impl(nir_function_impl *impl)
+{
+ nir_builder b;
+ nir_builder_init(&b, impl);
+ bool progress = false;
+
+ nir_foreach_block(block, impl) {
+ nir_foreach_instr_safe(instr, block) {
+ if (instr->type == nir_instr_type_alu)
+ progress |= lower_instr(&b, nir_instr_as_alu(instr));
+ }
+ }
+
+ nir_metadata_preserve(impl, nir_metadata_block_index |
+ nir_metadata_dominance);
+
+ return progress;
+}
+
+bool
+brw_nir_lower_16bit_int_math(nir_shader *shader)
+{
+ bool progress = false;
+
+ nir_foreach_function(function, shader) {
+ if (function->impl)
+ progress |= lower_impl(function->impl);
+ }
+
+ return progress;
+}
diff --git a/src/intel/compiler/meson.build
b/src/intel/compiler/meson.build
index 72b7a6796cb..d80fcd6e31b 100644
--- a/src/intel/compiler/meson.build
+++ b/src/intel/compiler/meson.build
@@ -76,6 +76,7 @@ libintel_compiler_files = files(
'brw_nir_analyze_boolean_resolves.c',
'brw_nir_analyze_ubo_ranges.c',
'brw_nir_attribute_workarounds.c',
+ 'brw_nir_lower_16bit_int_math.c',
'brw_nir_lower_cs_intrinsics.c',
'brw_nir_opt_peephole_ffma.c',
'brw_nir_tcs_workarounds.c',
--
2.14.1
_______________________________________________
mesa-dev mailing list
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

_______________________________________________
mesa-dev mailing list
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Jason Ekstrand

2018-04-24 21:45:00 UTC

Permalink

Yeah, it may be that what we want is a generic "lower this to something
with more bits" pass. If this is a problem for the CL people, maybe we
just want some way to make it configurable and put it in core NIR. I don't
really have a huge preference. I'm just trying to make sure we explore the
solution space.

--Jason

BR,

Post by Rob Clark
-R

Post by Jason Ekstrand
It may be useful to just use nir_algebraic for this. We already do for

trig

Post by Jason Ekstrand
workarounds. It's more painful from a build-system perspective but, in
general, the fewer hand-rolled algebraic lowering passes we have, the
better.

Post by Iago Toral Quiroga
The hardware doesn't support 16-bit integer types, so we need to

implement

Post by Jason Ekstrand

Post by Iago Toral Quiroga
these using 32-bit integer instructions and then convert the result back
to 16-bit.
---
src/intel/Makefile.sources | 1 +
src/intel/compiler/brw_nir.c | 2 +
src/intel/compiler/brw_nir.h | 2 +
src/intel/compiler/brw_nir_lower_16bit_int_math.c | 108
++++++++++++++++++++++
src/intel/compiler/meson.build | 1 +
5 files changed, 114 insertions(+)
create mode 100644 src/intel/compiler/brw_nir_lower_16bit_int_math.c
diff --git a/src/intel/Makefile.sources b/src/intel/Makefile.sources
index 91c71a8dfaf..2cd76961ea4 100644
--- a/src/intel/Makefile.sources
+++ b/src/intel/Makefile.sources
@@ -79,6 +79,7 @@ COMPILER_FILES = \
compiler/brw_nir_analyze_boolean_resolves.c \
compiler/brw_nir_analyze_ubo_ranges.c \
compiler/brw_nir_attribute_workarounds.c \
+ compiler/brw_nir_lower_16bit_int_math.c \
compiler/brw_nir_lower_cs_intrinsics.c \
compiler/brw_nir_opt_peephole_ffma.c \
compiler/brw_nir_tcs_workarounds.c \
diff --git a/src/intel/compiler/brw_nir.c b/src/intel/compiler/brw_nir.c
index 69ab162f888..2e5754076ed 100644
--- a/src/intel/compiler/brw_nir.c
+++ b/src/intel/compiler/brw_nir.c
@@ -638,6 +638,8 @@ brw_preprocess_nir(const struct brw_compiler
*compiler, nir_shader *nir)
nir_lower_isign64 |
nir_lower_divmod64);
+ brw_nir_lower_16bit_int_math(nir);
+
nir = brw_nir_optimize(nir, compiler, is_scalar);
if (is_scalar) {
diff --git a/src/intel/compiler/brw_nir.h b/src/intel/compiler/brw_nir.h
index 03f52da08e5..6ba1a8bc654 100644
--- a/src/intel/compiler/brw_nir.h
+++ b/src/intel/compiler/brw_nir.h
@@ -152,6 +152,8 @@ void brw_nir_analyze_ubo_ranges(const struct
brw_compiler *compiler,
bool brw_nir_opt_peephole_ffma(nir_shader *shader);
+bool brw_nir_lower_16bit_int_math(nir_shader *shader);
+
nir_shader *brw_nir_optimize(nir_shader *nir,
const struct brw_compiler *compiler,
bool is_scalar);
diff --git a/src/intel/compiler/brw_nir_lower_16bit_int_math.c
b/src/intel/compiler/brw_nir_lower_16bit_int_math.c
new file mode 100644
index 00000000000..6876309a822
--- /dev/null
+++ b/src/intel/compiler/brw_nir_lower_16bit_int_math.c
@@ -0,0 +1,108 @@
+/*
+ * Copyright Â© 2018 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person

obtaining

Post by Jason Ekstrand

Post by Iago Toral Quiroga
a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom

the

Post by Jason Ekstrand

Post by Iago Toral Quiroga
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions

Post by Jason Ekstrand

Post by Iago Toral Quiroga
the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "brw_nir.h"
+#include "nir_builder.h"
+
+/**
+ * Intel hardware doesn't support 16-bit integer Math instructions so this
+ * pass implements them in 32-bit and then converts the result back to 16-bit.
+ */
+static void
+lower_math_instr(nir_builder *bld, nir_alu_instr *alu, bool is_signed)
+{
+ const nir_op op = alu->op;
+
+ bld->cursor = nir_before_instr(&alu->instr);
+
+ nir_ssa_def *srcs_32[4] = { NULL, NULL, NULL, NULL };
+ const uint32_t num_inputs = nir_op_infos[op].num_inputs;
+ for (uint32_t i = 0; i < num_inputs; i++) {
+ nir_ssa_def *src = nir_ssa_for_alu_src(bld, alu, i);
+ srcs_32[i] = is_signed ? nir_i2i32(bld, src) : nir_u2u32(bld,

src);

Post by Jason Ekstrand

Post by Iago Toral Quiroga
+ }
+
+ nir_ssa_def *dst_32 =
+ nir_build_alu(bld, op, srcs_32[0], srcs_32[1], srcs_32[2], srcs_32[3]);
+
+ nir_ssa_def *dst_16 =
+ is_signed ? nir_i2i16(bld, dst_32) : nir_u2u16(bld, dst_32);
+
+ nir_ssa_def_rewrite_uses(&alu->dest.dest.ssa,
nir_src_for_ssa(dst_16));
+}
+
+static bool
+lower_instr(nir_builder *bld, nir_alu_instr *alu)
+{
+ assert(alu->dest.dest.is_ssa);
+ if (alu->dest.dest.ssa.bit_size != 16)
+ return false;
+
+ bool is_signed = false;
+ switch (alu->op) {
+ is_signed = true;

You can get is_signed from nit_op_infos

Post by Iago Toral Quiroga
+ /* Fallthrough */

How is irem unsigned?

Post by Iago Toral Quiroga
+ lower_math_instr(bld, alu, is_signed);
+ return true;
+ return false;
+ }
+}
+
+static bool
+lower_impl(nir_function_impl *impl)
+{
+ nir_builder b;
+ nir_builder_init(&b, impl);
+ bool progress = false;
+
+ nir_foreach_block(block, impl) {
+ nir_foreach_instr_safe(instr, block) {
+ if (instr->type == nir_instr_type_alu)
+ progress |= lower_instr(&b, nir_instr_as_alu(instr));
+ }
+ }
+
+ nir_metadata_preserve(impl, nir_metadata_block_index |
+ nir_metadata_dominance);
+
+ return progress;
+}
+
+bool
+brw_nir_lower_16bit_int_math(nir_shader *shader)
+{
+ bool progress = false;
+
+ nir_foreach_function(function, shader) {
+ if (function->impl)
+ progress |= lower_impl(function->impl);
+ }
+
+ return progress;
+}
diff --git a/src/intel/compiler/meson.build
b/src/intel/compiler/meson.build
index 72b7a6796cb..d80fcd6e31b 100644
--- a/src/intel/compiler/meson.build
+++ b/src/intel/compiler/meson.build
@@ -76,6 +76,7 @@ libintel_compiler_files = files(
'brw_nir_analyze_boolean_resolves.c',
'brw_nir_analyze_ubo_ranges.c',
'brw_nir_attribute_workarounds.c',
+ 'brw_nir_lower_16bit_int_math.c',
'brw_nir_lower_cs_intrinsics.c',
'brw_nir_opt_peephole_ffma.c',
'brw_nir_tcs_workarounds.c',
--
2.14.1
_______________________________________________
mesa-dev mailing list
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

_______________________________________________
mesa-dev mailing list
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Rob Clark

2018-04-24 22:38:32 UTC

Permalink

Yeah, it may be that what we want is a generic "lower this to something with
more bits" pass. If this is a problem for the CL people, maybe we just want
some way to make it configurable and put it in core NIR. I don't really
have a huge preference. I'm just trying to make sure we explore the
solution space.

something generic/configurable in core nir seems more sane..

ofc, do what you need to do in the short term, I just mentioned this
to point out that more of this sort of "handle fewer bits in more
bits" stuff is coming down the pipe as we start seeing more 8b stuff..
if needed we can refactor..

BR,
-R

--Jason

Post by Rob Clark
BR,
-R

It may be useful to just use nir_algebraic for this. We already do for trig
workarounds. It's more painful from a build-system perspective but, in
general, the fewer hand-rolled algebraic lowering passes we have, the
better.

Post by Iago Toral Quiroga
The hardware doesn't support 16-bit integer types, so we need to implement
these using 32-bit integer instructions and then convert the result back
to 16-bit.
---
src/intel/Makefile.sources | 1 +
src/intel/compiler/brw_nir.c | 2 +
src/intel/compiler/brw_nir.h | 2 +
src/intel/compiler/brw_nir_lower_16bit_int_math.c | 108
++++++++++++++++++++++
src/intel/compiler/meson.build | 1 +
5 files changed, 114 insertions(+)
create mode 100644 src/intel/compiler/brw_nir_lower_16bit_int_math.c
diff --git a/src/intel/Makefile.sources b/src/intel/Makefile.sources
index 91c71a8dfaf..2cd76961ea4 100644
--- a/src/intel/Makefile.sources
+++ b/src/intel/Makefile.sources
@@ -79,6 +79,7 @@ COMPILER_FILES = \
compiler/brw_nir_analyze_boolean_resolves.c \
compiler/brw_nir_analyze_ubo_ranges.c \
compiler/brw_nir_attribute_workarounds.c \
+ compiler/brw_nir_lower_16bit_int_math.c \
compiler/brw_nir_lower_cs_intrinsics.c \
compiler/brw_nir_opt_peephole_ffma.c \
compiler/brw_nir_tcs_workarounds.c \
diff --git a/src/intel/compiler/brw_nir.c
b/src/intel/compiler/brw_nir.c
index 69ab162f888..2e5754076ed 100644
--- a/src/intel/compiler/brw_nir.c
+++ b/src/intel/compiler/brw_nir.c
@@ -638,6 +638,8 @@ brw_preprocess_nir(const struct brw_compiler
*compiler, nir_shader *nir)
nir_lower_isign64 |
nir_lower_divmod64);
+ brw_nir_lower_16bit_int_math(nir);
+
nir = brw_nir_optimize(nir, compiler, is_scalar);
if (is_scalar) {
diff --git a/src/intel/compiler/brw_nir.h
b/src/intel/compiler/brw_nir.h
index 03f52da08e5..6ba1a8bc654 100644
--- a/src/intel/compiler/brw_nir.h
+++ b/src/intel/compiler/brw_nir.h
@@ -152,6 +152,8 @@ void brw_nir_analyze_ubo_ranges(const struct
brw_compiler *compiler,
bool brw_nir_opt_peephole_ffma(nir_shader *shader);
+bool brw_nir_lower_16bit_int_math(nir_shader *shader);
+
nir_shader *brw_nir_optimize(nir_shader *nir,
const struct brw_compiler *compiler,
bool is_scalar);
diff --git a/src/intel/compiler/brw_nir_lower_16bit_int_math.c
b/src/intel/compiler/brw_nir_lower_16bit_int_math.c
new file mode 100644
index 00000000000..6876309a822
--- /dev/null
+++ b/src/intel/compiler/brw_nir_lower_16bit_int_math.c
@@ -0,0 +1,108 @@
+/*
+ * Copyright © 2018 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person
obtaining
a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ *
+ * The above copyright notice and this permission notice (including
the
next
+ * paragraph) shall be included in all copies or substantial portions
of
the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES
OR
OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "brw_nir.h"
+#include "nir_builder.h"
+
+/**
+ * Intel hardware doesn't support 16-bit integer Math instructions so this
+ * pass implements them in 32-bit and then converts the result back to 16-bit.
+ */
+static void
+lower_math_instr(nir_builder *bld, nir_alu_instr *alu, bool is_signed)
+{
+ const nir_op op = alu->op;
+
+ bld->cursor = nir_before_instr(&alu->instr);
+
+ nir_ssa_def *srcs_32[4] = { NULL, NULL, NULL, NULL };
+ const uint32_t num_inputs = nir_op_infos[op].num_inputs;
+ for (uint32_t i = 0; i < num_inputs; i++) {
+ nir_ssa_def *src = nir_ssa_for_alu_src(bld, alu, i);
+ srcs_32[i] = is_signed ? nir_i2i32(bld, src) : nir_u2u32(bld, src);
+ }
+
+ nir_ssa_def *dst_32 =
+ nir_build_alu(bld, op, srcs_32[0], srcs_32[1], srcs_32[2], srcs_32[3]);
+
+ nir_ssa_def *dst_16 =
+ is_signed ? nir_i2i16(bld, dst_32) : nir_u2u16(bld, dst_32);
+
+ nir_ssa_def_rewrite_uses(&alu->dest.dest.ssa,
nir_src_for_ssa(dst_16));
+}
+
+static bool
+lower_instr(nir_builder *bld, nir_alu_instr *alu)
+{
+ assert(alu->dest.dest.is_ssa);
+ if (alu->dest.dest.ssa.bit_size != 16)
+ return false;
+
+ bool is_signed = false;
+ switch (alu->op) {
+ is_signed = true;

You can get is_signed from nit_op_infos

Post by Iago Toral Quiroga
+ /* Fallthrough */

How is irem unsigned?

Post by Iago Toral Quiroga
+ lower_math_instr(bld, alu, is_signed);
+ return true;
+ return false;
+ }
+}
+
+static bool
+lower_impl(nir_function_impl *impl)
+{
+ nir_builder b;
+ nir_builder_init(&b, impl);
+ bool progress = false;
+
+ nir_foreach_block(block, impl) {
+ nir_foreach_instr_safe(instr, block) {
+ if (instr->type == nir_instr_type_alu)
+ progress |= lower_instr(&b, nir_instr_as_alu(instr));
+ }
+ }
+
+ nir_metadata_preserve(impl, nir_metadata_block_index |
+ nir_metadata_dominance);
+
+ return progress;
+}
+
+bool
+brw_nir_lower_16bit_int_math(nir_shader *shader)
+{
+ bool progress = false;
+
+ nir_foreach_function(function, shader) {
+ if (function->impl)
+ progress |= lower_impl(function->impl);
+ }
+
+ return progress;
+}
diff --git a/src/intel/compiler/meson.build
b/src/intel/compiler/meson.build
index 72b7a6796cb..d80fcd6e31b 100644
--- a/src/intel/compiler/meson.build
+++ b/src/intel/compiler/meson.build
@@ -76,6 +76,7 @@ libintel_compiler_files = files(
'brw_nir_analyze_boolean_resolves.c',
'brw_nir_analyze_ubo_ranges.c',
'brw_nir_attribute_workarounds.c',
+ 'brw_nir_lower_16bit_int_math.c',
'brw_nir_lower_cs_intrinsics.c',
'brw_nir_opt_peephole_ffma.c',
'brw_nir_tcs_workarounds.c',
--
2.14.1
_______________________________________________
mesa-dev mailing list
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

_______________________________________________
mesa-dev mailing list
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Jason Ekstrand

2018-04-25 03:11:38 UTC

Permalink

On April 24, 2018 18:38:33 Rob Clark <***@gmail.com> wrote:

On Tue, Apr 24, 2018 at 5:45 PM, Jason Ekstrand <***@jlekstrand.net> wrote:
On Tue, Apr 24, 2018 at 7:38 AM, Rob Clark <***@gmail.com> wrote:

side-note, not sure if it really effects what you are doing here, but
karol ran into some cases, like 8bit signed imax, which needs to be
"lowered" to 16b (or 32b) and converted back for hw that doesn't
support smaller than 16b (or 32b). I think I have the same case with
ir3, which also has 16b but no 8b, (but he is a bit further along cl
cts than I am)..

I think there will be more of this sort of thing coming for more
instructions and for more than just 16b vs 32b. So not sure if
writing rules for each in nir_opt_algebraic.py will be so fun..

Yeah, it may be that what we want is a generic "lower this to something with
more bits" pass. If this is a problem for the CL people, maybe we just want
some way to make it configurable and put it in core NIR. I don't really
have a huge preference. I'm just trying to make sure we explore the
solution space.

something generic/configurable in core nir seems more sane..

ofc, do what you need to do in the short term, I just mentioned this
to point out that more of this sort of "handle fewer bits in more
bits" stuff is coming down the pipe as we start seeing more 8b stuff..
if needed we can refactor..

Yeah, maybe the best thing to do then would be to just leave this as is and
plan to pull it into core NIR and generalize it when the time comes. One
idea for generalizing would be to have a callback that returns the bit size
to lower to or zero for "leave it alone".

I suspect we'll need more stuff like this for 8-bit integers and 16-bit
floats. I'll review the original for real in the morning.

--Jason

BR,
-R

--Jason

BR,
-R

On Tue, Apr 24, 2018 at 9:56 AM, Jason Ekstrand <***@jlekstrand.net>
wrote:
It may be useful to just use nir_algebraic for this. We already do for
trig
workarounds. It's more painful from a build-system perspective but, in
general, the fewer hand-rolled algebraic lowering passes we have, the
better.

On Wed, Apr 11, 2018 at 12:20 AM, Iago Toral Quiroga <***@igalia.com>
wrote:

The hardware doesn't support 16-bit integer types, so we need to
implement
these using 32-bit integer instructions and then convert the result
back
to 16-bit.
---
src/intel/Makefile.sources | 1 +
src/intel/compiler/brw_nir.c | 2 +
src/intel/compiler/brw_nir.h | 2 +
src/intel/compiler/brw_nir_lower_16bit_int_math.c | 108
++++++++++++++++++++++
src/intel/compiler/meson.build | 1 +
5 files changed, 114 insertions(+)
create mode 100644 src/intel/compiler/brw_nir_lower_16bit_int_math.c

diff --git a/src/intel/Makefile.sources b/src/intel/Makefile.sources
index 91c71a8dfaf..2cd76961ea4 100644
--- a/src/intel/Makefile.sources
+++ b/src/intel/Makefile.sources
@@ -79,6 +79,7 @@ COMPILER_FILES = \
compiler/brw_nir_analyze_boolean_resolves.c \
compiler/brw_nir_analyze_ubo_ranges.c \
compiler/brw_nir_attribute_workarounds.c \
+ compiler/brw_nir_lower_16bit_int_math.c \
compiler/brw_nir_lower_cs_intrinsics.c \
compiler/brw_nir_opt_peephole_ffma.c \
compiler/brw_nir_tcs_workarounds.c \
diff --git a/src/intel/compiler/brw_nir.c
b/src/intel/compiler/brw_nir.c
index 69ab162f888..2e5754076ed 100644
--- a/src/intel/compiler/brw_nir.c
+++ b/src/intel/compiler/brw_nir.c
@@ -638,6 +638,8 @@ brw_preprocess_nir(const struct brw_compiler
*compiler, nir_shader *nir)
nir_lower_isign64 |
nir_lower_divmod64);

+ brw_nir_lower_16bit_int_math(nir);
+
nir = brw_nir_optimize(nir, compiler, is_scalar);

if (is_scalar) {
diff --git a/src/intel/compiler/brw_nir.h
b/src/intel/compiler/brw_nir.h
index 03f52da08e5..6ba1a8bc654 100644
--- a/src/intel/compiler/brw_nir.h
+++ b/src/intel/compiler/brw_nir.h
@@ -152,6 +152,8 @@ void brw_nir_analyze_ubo_ranges(const struct
brw_compiler *compiler,

bool brw_nir_opt_peephole_ffma(nir_shader *shader);

+bool brw_nir_lower_16bit_int_math(nir_shader *shader);
+
nir_shader *brw_nir_optimize(nir_shader *nir,
const struct brw_compiler *compiler,
bool is_scalar);
diff --git a/src/intel/compiler/brw_nir_lower_16bit_int_math.c
b/src/intel/compiler/brw_nir_lower_16bit_int_math.c
new file mode 100644
index 00000000000..6876309a822
--- /dev/null
+++ b/src/intel/compiler/brw_nir_lower_16bit_int_math.c
@@ -0,0 +1,108 @@
+/*
+ * Copyright © 2018 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person
obtaining
a
+ * copy of this software and associated documentation files (the
"Software"),
+ * to deal in the Software without restriction, including without
limitation
+ * the rights to use, copy, modify, merge, publish, distribute,
sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom
the
+ * Software is furnished to do so, subject to the following
conditions:
+ *
+ * The above copyright notice and this permission notice (including
the
next
+ * paragraph) shall be included in all copies or substantial portions
of
the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES
OR
OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "brw_nir.h"
+#include "nir_builder.h"
+
+/**
+ * Intel hardware doesn't support 16-bit integer Math instructions so
this
+ * pass implements them in 32-bit and then converts the result back to
16-bit.
+ */
+static void
+lower_math_instr(nir_builder *bld, nir_alu_instr *alu, bool is_signed)
+{
+ const nir_op op = alu->op;
+
+ bld->cursor = nir_before_instr(&alu->instr);
+
+ nir_ssa_def *srcs_32[4] = { NULL, NULL, NULL, NULL };
+ const uint32_t num_inputs = nir_op_infos[op].num_inputs;
+ for (uint32_t i = 0; i < num_inputs; i++) {
+ nir_ssa_def *src = nir_ssa_for_alu_src(bld, alu, i);
+ srcs_32[i] = is_signed ? nir_i2i32(bld, src) : nir_u2u32(bld,
src);
+ }
+
+ nir_ssa_def *dst_32 =
+ nir_build_alu(bld, op, srcs_32[0], srcs_32[1], srcs_32[2],
srcs_32[3]);
+
+ nir_ssa_def *dst_16 =
+ is_signed ? nir_i2i16(bld, dst_32) : nir_u2u16(bld, dst_32);
+
+ nir_ssa_def_rewrite_uses(&alu->dest.dest.ssa,
nir_src_for_ssa(dst_16));
+}
+
+static bool
+lower_instr(nir_builder *bld, nir_alu_instr *alu)
+{
+ assert(alu->dest.dest.is_ssa);
+ if (alu->dest.dest.ssa.bit_size != 16)
+ return false;
+
+ bool is_signed = false;
+ switch (alu->op) {
+ case nir_op_idiv:
+ case nir_op_imod:
+ is_signed = true;

You can get is_signed from nit_op_infos

+ /* Fallthrough */
+ case nir_op_udiv:
+ case nir_op_umod:
+ case nir_op_irem:

How is irem unsigned?

+ lower_math_instr(bld, alu, is_signed);
+ return true;
+ default:
+ return false;
+ }
+}
+
+static bool
+lower_impl(nir_function_impl *impl)
+{
+ nir_builder b;
+ nir_builder_init(&b, impl);
+ bool progress = false;
+
+ nir_foreach_block(block, impl) {
+ nir_foreach_instr_safe(instr, block) {
+ if (instr->type == nir_instr_type_alu)
+ progress |= lower_instr(&b, nir_instr_as_alu(instr));
+ }
+ }
+
+ nir_metadata_preserve(impl, nir_metadata_block_index |
+ nir_metadata_dominance);
+
+ return progress;
+}
+
+bool
+brw_nir_lower_16bit_int_math(nir_shader *shader)
+{
+ bool progress = false;
+
+ nir_foreach_function(function, shader) {
+ if (function->impl)
+ progress |= lower_impl(function->impl);
+ }
+
+ return progress;
+}
diff --git a/src/intel/compiler/meson.build
b/src/intel/compiler/meson.build
index 72b7a6796cb..d80fcd6e31b 100644
--- a/src/intel/compiler/meson.build
+++ b/src/intel/compiler/meson.build
@@ -76,6 +76,7 @@ libintel_compiler_files = files(
'brw_nir_analyze_boolean_resolves.c',
'brw_nir_analyze_ubo_ranges.c',
'brw_nir_attribute_workarounds.c',
+ 'brw_nir_lower_16bit_int_math.c',
'brw_nir_lower_cs_intrinsics.c',
'brw_nir_opt_peephole_ffma.c',
'brw_nir_tcs_workarounds.c',
--
2.14.1

_______________________________________________
mesa-dev mailing list
mesa-***@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

_______________________________________________
mesa-dev mailing list
mesa-***@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Iago Toral

2018-04-25 05:59:16 UTC

Permalink

Post by Rob Clark
side-note, not sure if it really effects what you are doing here, but
karol ran into some cases, like 8bit signed imax, which needs to be
"lowered" to 16b (or 32b) and converted back for hw that doesn't
support smaller than 16b (or 32b). I think I have the same case with
ir3, which also has 16b but no 8b, (but he is a bit further along cl
cts than I am)..
I think there will be more of this sort of thing coming for more
instructions and for more than just 16b vs 32b. So not sure if
writing rules for each in nir_opt_algebraic.py will be so fun..
Yeah, it may be that what we want is a generic "lower this to
something with
more bits" pass. If this is a problem for the CL people, maybe we just want
some way to make it configurable and put it in core NIR. I don't really
have a huge preference. I'm just trying to make sure we explore the
solution space.
something generic/configurable in core nir seems more sane..
ofc, do what you need to do in the short term, I just mentioned this
to point out that more of this sort of "handle fewer bits in more
bits" stuff is coming down the pipe as we start seeing more 8b
stuff..
if needed we can refactor..
Yeah, maybe the best thing to do then would be to just leave this as is and
plan to pull it into core NIR and generalize it when the time
comes. One
idea for generalizing would be to have a callback that returns the bit size
to lower to or zero for "leave it alone".
I suspect we'll need more stuff like this for 8-bit integers and 16-
bit
floats. I'll review the original for real in the morning.

Yes, I confirm that. I have another pass that does the same for some
16-bt floating point operations, and in that case, some of the ops that
need to be lowered might depend on the hw gen.

Iago

Post by Rob Clark
--Jason
BR,
-R
--Jason
BR,
-R
It may be useful to just use nir_algebraic for this. We already do for
trig
workarounds. It's more painful from a build-system perspective but, in
general, the fewer hand-rolled algebraic lowering passes we have, the
better.
om>
The hardware doesn't support 16-bit integer types, so we need to implement
these using 32-bit integer instructions and then convert the result back
to 16-bit.
---
src/intel/Makefile.sources | 1 +
src/intel/compiler/brw_nir.c | 2 +
src/intel/compiler/brw_nir.h | 2 +
src/intel/compiler/brw_nir_lower_16bit_int_math.c | 108
++++++++++++++++++++++
src/intel/compiler/meson.build | 1 +
5 files changed, 114 insertions(+)
create mode 100644 src/intel/compiler/brw_nir_lower_16bit_int_math.c
diff --git a/src/intel/Makefile.sources b/src/intel/Makefile.sources
index 91c71a8dfaf..2cd76961ea4 100644
--- a/src/intel/Makefile.sources
+++ b/src/intel/Makefile.sources
@@ -79,6 +79,7 @@ COMPILER_FILES = \
compiler/brw_nir_analyze_boolean_resolves.c \
compiler/brw_nir_analyze_ubo_ranges.c \
compiler/brw_nir_attribute_workarounds.c \
+ compiler/brw_nir_lower_16bit_int_math.c \
compiler/brw_nir_lower_cs_intrinsics.c \
compiler/brw_nir_opt_peephole_ffma.c \
compiler/brw_nir_tcs_workarounds.c \
diff --git a/src/intel/compiler/brw_nir.c
b/src/intel/compiler/brw_nir.c
index 69ab162f888..2e5754076ed 100644
--- a/src/intel/compiler/brw_nir.c
+++ b/src/intel/compiler/brw_nir.c
@@ -638,6 +638,8 @@ brw_preprocess_nir(const struct brw_compiler
*compiler, nir_shader *nir)
nir_lower_isign64 |
nir_lower_divmod64);
+ brw_nir_lower_16bit_int_math(nir);
+
nir = brw_nir_optimize(nir, compiler, is_scalar);
if (is_scalar) {
diff --git a/src/intel/compiler/brw_nir.h
b/src/intel/compiler/brw_nir.h
index 03f52da08e5..6ba1a8bc654 100644
--- a/src/intel/compiler/brw_nir.h
+++ b/src/intel/compiler/brw_nir.h
@@ -152,6 +152,8 @@ void brw_nir_analyze_ubo_ranges(const struct brw_compiler *compiler,
bool brw_nir_opt_peephole_ffma(nir_shader *shader);
+bool brw_nir_lower_16bit_int_math(nir_shader *shader);
+
nir_shader *brw_nir_optimize(nir_shader *nir,
const struct brw_compiler *compiler,
bool is_scalar);
diff --git a/src/intel/compiler/brw_nir_lower_16bit_int_math.c
b/src/intel/compiler/brw_nir_lower_16bit_int_math.c
new file mode 100644
index 00000000000..6876309a822
--- /dev/null
+++ b/src/intel/compiler/brw_nir_lower_16bit_int_math.c
@@ -0,0 +1,108 @@
+/*
+ * Copyright © 2018 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person
obtaining
a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ *
+ * The above copyright notice and this permission notice (including
the
next
+ * paragraph) shall be included in all copies or substantial
portions
of
the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES
OR
OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "brw_nir.h"
+#include "nir_builder.h"
+
+/**
+ * Intel hardware doesn't support 16-bit integer Math instructions so
this
+ * pass implements them in 32-bit and then converts the result back to
16-bit.
+ */
+static void
+lower_math_instr(nir_builder *bld, nir_alu_instr *alu, bool
is_signed)
+{
+ const nir_op op = alu->op;
+
+ bld->cursor = nir_before_instr(&alu->instr);
+
+ nir_ssa_def *srcs_32[4] = { NULL, NULL, NULL, NULL };
+ const uint32_t num_inputs = nir_op_infos[op].num_inputs;
+ for (uint32_t i = 0; i < num_inputs; i++) {
+ nir_ssa_def *src = nir_ssa_for_alu_src(bld, alu, i);
+ srcs_32[i] = is_signed ? nir_i2i32(bld, src) : nir_u2u32(bld, src);
+ }
+
+ nir_ssa_def *dst_32 =
+ nir_build_alu(bld, op, srcs_32[0], srcs_32[1], srcs_32[2], srcs_32[3]);
+
+ nir_ssa_def *dst_16 =
+ is_signed ? nir_i2i16(bld, dst_32) : nir_u2u16(bld, dst_32);
+
+ nir_ssa_def_rewrite_uses(&alu->dest.dest.ssa,
nir_src_for_ssa(dst_16));
+}
+
+static bool
+lower_instr(nir_builder *bld, nir_alu_instr *alu)
+{
+ assert(alu->dest.dest.is_ssa);
+ if (alu->dest.dest.ssa.bit_size != 16)
+ return false;
+
+ bool is_signed = false;
+ switch (alu->op) {
+ is_signed = true;
You can get is_signed from nit_op_infos
+ /* Fallthrough */
How is irem unsigned?
+ lower_math_instr(bld, alu, is_signed);
+ return true;
+ return false;
+ }
+}
+
+static bool
+lower_impl(nir_function_impl *impl)
+{
+ nir_builder b;
+ nir_builder_init(&b, impl);
+ bool progress = false;
+
+ nir_foreach_block(block, impl) {
+ nir_foreach_instr_safe(instr, block) {
+ if (instr->type == nir_instr_type_alu)
+ progress |= lower_instr(&b, nir_instr_as_alu(instr));
+ }
+ }
+
+ nir_metadata_preserve(impl, nir_metadata_block_index |
+ nir_metadata_dominance);
+
+ return progress;
+}
+
+bool
+brw_nir_lower_16bit_int_math(nir_shader *shader)
+{
+ bool progress = false;
+
+ nir_foreach_function(function, shader) {
+ if (function->impl)
+ progress |= lower_impl(function->impl);
+ }
+
+ return progress;
+}
diff --git a/src/intel/compiler/meson.build
b/src/intel/compiler/meson.build
index 72b7a6796cb..d80fcd6e31b 100644
--- a/src/intel/compiler/meson.build
+++ b/src/intel/compiler/meson.build
@@ -76,6 +76,7 @@ libintel_compiler_files = files(
'brw_nir_analyze_boolean_resolves.c',
'brw_nir_analyze_ubo_ranges.c',
'brw_nir_attribute_workarounds.c',
+ 'brw_nir_lower_16bit_int_math.c',
'brw_nir_lower_cs_intrinsics.c',
'brw_nir_opt_peephole_ffma.c',
'brw_nir_tcs_workarounds.c',
--
2.14.1
_______________________________________________
mesa-dev mailing list
https://lists.freedesktop.org/mailman/listinfo/mesa-dev
_______________________________________________
mesa-dev mailing list
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Jason Ekstrand

2018-04-25 14:05:47 UTC

Permalink

Some of these comments may be duplicates of ones I made the first time
through.

For float16, we'll need f2f32. Also, is_signed can be derived from
nir_op_infos[op].input_types so it doesn't need to be passed in. If we
want to make it fully general, we probably also want to only do the
conversion if the source type is unsized.

Again, we can pull this from the destination type.

Post by Iago Toral Quiroga
+
+ nir_ssa_def_rewrite_uses(&alu->dest.dest.ssa,
nir_src_for_ssa(dst_16));
+}
+
+static bool
+lower_instr(nir_builder *bld, nir_alu_instr *alu)
+{

As mentioned in previous discussions, we may want to have a control
function such as

unsigned (*lower_bit_size)(const nir_alu_instr *, void *)

where the void * is something you pass in when you call the optimization
pass. I'm usually not a huge fan of making a super-general thing before we
need it. However, we already have two different drivers that need it for
different things so let's just do it, make sure it works for all the
use-cases, and get it right the first time.

Post by Iago Toral Quiroga
+ assert(alu->dest.dest.is_ssa);
+ if (alu->dest.dest.ssa.bit_size != 16)
+ return false;
+
+ bool is_signed = false;
+ switch (alu->op) {
+ is_signed = true;
+ /* Fallthrough */

irem is sgned.

Probably only want to call this if (progress)

Post by Iago Toral Quiroga
+
+ return progress;
+}
+
+bool
+brw_nir_lower_16bit_int_math(nir_shader *shader)

If we want this to handle 8-bit things, maybe it needs a different name. :-)

Post by Iago Toral Quiroga
+{
+ bool progress = false;
+
+ nir_foreach_function(function, shader) {
+ if (function->impl)
+ progress |= lower_impl(function->impl);
+ }
+
+ return progress;
+}
diff --git a/src/intel/compiler/meson.build b/src/intel/compiler/meson.
build
index 72b7a6796cb..d80fcd6e31b 100644
--- a/src/intel/compiler/meson.build
+++ b/src/intel/compiler/meson.build
@@ -76,6 +76,7 @@ libintel_compiler_files = files(
'brw_nir_analyze_boolean_resolves.c',
'brw_nir_analyze_ubo_ranges.c',
'brw_nir_attribute_workarounds.c',
+ 'brw_nir_lower_16bit_int_math.c',
'brw_nir_lower_cs_intrinsics.c',
'brw_nir_opt_peephole_ffma.c',
'brw_nir_tcs_workarounds.c',
--
2.14.1
_______________________________________________
mesa-dev mailing list
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Iago Toral

2018-04-26 06:01:42 UTC

Permalink

Post by Jason Ekstrand
Some of these comments may be duplicates of ones I made the first
time through.

Post by Iago Toral Quiroga
The hardware doesn't support 16-bit integer types, so we need to implement
these using 32-bit integer instructions and then convert the result back
to 16-bit.
---
src/intel/Makefile.sources | 1 +
src/intel/compiler/brw_nir.c | 2 +
src/intel/compiler/brw_nir.h | 2 +
src/intel/compiler/brw_nir_lower_16bit_int_math.c | 108
++++++++++++++++++++++
src/intel/compiler/meson.build | 1 +
5 files changed, 114 insertions(+)
create mode 100644
src/intel/compiler/brw_nir_lower_16bit_int_math.c
diff --git a/src/intel/Makefile.sources
b/src/intel/Makefile.sources
index 91c71a8dfaf..2cd76961ea4 100644
--- a/src/intel/Makefile.sources
+++ b/src/intel/Makefile.sources
@@ -79,6 +79,7 @@ COMPILER_FILES = \
compiler/brw_nir_analyze_boolean_resolves.c \
compiler/brw_nir_analyze_ubo_ranges.c \
compiler/brw_nir_attribute_workarounds.c \
+ compiler/brw_nir_lower_16bit_int_math.c \
compiler/brw_nir_lower_cs_intrinsics.c \
compiler/brw_nir_opt_peephole_ffma.c \
compiler/brw_nir_tcs_workarounds.c \
diff --git a/src/intel/compiler/brw_nir.c
b/src/intel/compiler/brw_nir.c
index 69ab162f888..2e5754076ed 100644
--- a/src/intel/compiler/brw_nir.c
+++ b/src/intel/compiler/brw_nir.c
@@ -638,6 +638,8 @@ brw_preprocess_nir(const struct brw_compiler
*compiler, nir_shader *nir)
nir_lower_isign64 |
nir_lower_divmod64);
+ brw_nir_lower_16bit_int_math(nir);
+
nir = brw_nir_optimize(nir, compiler, is_scalar);
if (is_scalar) {
diff --git a/src/intel/compiler/brw_nir.h
b/src/intel/compiler/brw_nir.h
index 03f52da08e5..6ba1a8bc654 100644
--- a/src/intel/compiler/brw_nir.h
+++ b/src/intel/compiler/brw_nir.h
@@ -152,6 +152,8 @@ void brw_nir_analyze_ubo_ranges(const struct
brw_compiler *compiler,
bool brw_nir_opt_peephole_ffma(nir_shader *shader);
+bool brw_nir_lower_16bit_int_math(nir_shader *shader);
+
nir_shader *brw_nir_optimize(nir_shader *nir,
const struct brw_compiler *compiler,
bool is_scalar);
diff --git a/src/intel/compiler/brw_nir_lower_16bit_int_math.c
b/src/intel/compiler/brw_nir_lower_16bit_int_math.c
new file mode 100644
index 00000000000..6876309a822
--- /dev/null
+++ b/src/intel/compiler/brw_nir_lower_16bit_int_math.c
@@ -0,0 +1,108 @@
+/*
+ * Copyright Â© 2018 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ *
+ * The above copyright notice and this permission notice
(including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "brw_nir.h"
+#include "nir_builder.h"
+
+/**
+ * Intel hardware doesn't support 16-bit integer Math instructions so this
+ * pass implements them in 32-bit and then converts the result back to 16-bit.
+ */
+static void
+lower_math_instr(nir_builder *bld, nir_alu_instr *alu, bool
is_signed)
+{
+ const nir_op op = alu->op;
+
+ bld->cursor = nir_before_instr(&alu->instr);
+
+ nir_ssa_def *srcs_32[4] = { NULL, NULL, NULL, NULL };
+ const uint32_t num_inputs = nir_op_infos[op].num_inputs;
+ for (uint32_t i = 0; i < num_inputs; i++) {
+ nir_ssa_def *src = nir_ssa_for_alu_src(bld, alu, i);
nir_u2u32(bld, src);

For float16, we'll need f2f32.

Yes, I have that (in a separate pass for float16), I suppose merging
both makes more sense that having them be separate.

Post by Jason Ekstrand
Also, is_signed can be derived from nir_op_infos[op].input_types so
it doesn't need to be passed in. If we want to make it fully
general, we probably also want to only do the conversion if the
source type is unsized.

Good point.

Post by Jason Ekstrand

Again, we can pull this from the destination type.

Post by Iago Toral Quiroga
+
+ nir_ssa_def_rewrite_uses(&alu->dest.dest.ssa,
nir_src_for_ssa(dst_16));
+}
+
+static bool
+lower_instr(nir_builder *bld, nir_alu_instr *alu)
+{

As mentioned in previous discussions, we may want to have a control
function such as
unsigned (*lower_bit_size)(const nir_alu_instr *, void *)
where the void * is something you pass in when you call the
optimization pass. I'm usually not a huge fan of making a super-
general thing before we need it. However, we already have two
different drivers that need it for different things so let's just do
it, make sure it works for all the use-cases, and get it right the
first time.

Yes, I agree.

Post by Jason Ekstrand

irem is sgned.

Oops, right.

Post by Jason Ekstrand

Probably only want to call this if (progress)

Ok.

Post by Jason Ekstrand

Post by Iago Toral Quiroga
+
+ return progress;
+}
+
+bool
+brw_nir_lower_16bit_int_math(nir_shader *shader)

If we want this to handle 8-bit things, maybe it needs a different name. :-)

Yes, and also if we want this to handle floats.
Iago

Post by Jason Ekstrand

Post by Iago Toral Quiroga
+{
+ bool progress = false;
+
+ nir_foreach_function(function, shader) {
+ if (function->impl)
+ progress |= lower_impl(function->impl);
+ }
+
+ return progress;
+}
diff --git a/src/intel/compiler/meson.build
b/src/intel/compiler/meson.build
index 72b7a6796cb..d80fcd6e31b 100644
--- a/src/intel/compiler/meson.build
+++ b/src/intel/compiler/meson.build
@@ -76,6 +76,7 @@ libintel_compiler_files = files(
'brw_nir_analyze_boolean_resolves.c',
'brw_nir_analyze_ubo_ranges.c',
'brw_nir_attribute_workarounds.c',
+ 'brw_nir_lower_16bit_int_math.c',
'brw_nir_lower_cs_intrinsics.c',
'brw_nir_opt_peephole_ffma.c',
'brw_nir_tcs_workarounds.c',
--
2.14.1
_______________________________________________
mesa-dev mailing list
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Iago Toral Quiroga

2018-04-11 07:20:29 UTC

Permalink

From: Jose Maria Casanova Crespo <***@igalia.com>

---
src/compiler/nir/nir_opt_constant_folding.c | 2 ++
1 file changed, 2 insertions(+)

diff --git a/src/compiler/nir/nir_opt_constant_folding.c b/src/compiler/nir/nir_opt_constant_folding.c
index d6be807b3dc..b63660ea4da 100644
--- a/src/compiler/nir/nir_opt_constant_folding.c
+++ b/src/compiler/nir/nir_opt_constant_folding.c
@@ -78,6 +78,8 @@ constant_fold_alu_instr(nir_alu_instr *instr, void *mem_ctx)
j++) {
if (load_const->def.bit_size == 64)
src[i].u64[j] = load_const->value.u64[instr->src[i].swizzle[j]];
+ else if (load_const->def.bit_size == 16)
+ src[i].u16[j] = load_const->value.u16[instr->src[i].swizzle[j]];
else
src[i].u32[j] = load_const->value.u32[instr->src[i].swizzle[j]];
}

--
2.14.1

Jason Ekstrand

2018-04-24 21:52:06 UTC

Permalink

Post by Iago Toral Quiroga
---
src/compiler/nir/nir_opt_constant_folding.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/src/compiler/nir/nir_opt_constant_folding.c
b/src/compiler/nir/nir_opt_constant_folding.c
index d6be807b3dc..b63660ea4da 100644
--- a/src/compiler/nir/nir_opt_constant_folding.c
+++ b/src/compiler/nir/nir_opt_constant_folding.c
@@ -78,6 +78,8 @@ constant_fold_alu_instr(nir_alu_instr *instr, void *mem_ctx)
j++) {
if (load_const->def.bit_size == 64)
src[i].u64[j] = load_const->value.u64[instr->
src[i].swizzle[j]];
+ else if (load_const->def.bit_size == 16)
+ src[i].u16[j] = load_const->value.u16[instr->
src[i].swizzle[j]];
else
src[i].u32[j] = load_const->value.u32[instr->
src[i].swizzle[j]];

Let's make this a switch and support 8 while we're at it.

Post by Iago Toral Quiroga
}
--
2.14.1
_______________________________________________
mesa-dev mailing list
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Chema Casanova

2018-04-25 12:00:09 UTC

Permalink

---
src/compiler/nir/nir_opt_constant_folding.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/src/compiler/nir/nir_opt_constant_folding.c
b/src/compiler/nir/nir_opt_constant_folding.c
index d6be807b3dc..b63660ea4da 100644
--- a/src/compiler/nir/nir_opt_constant_folding.c
+++ b/src/compiler/nir/nir_opt_constant_folding.c
@@ -78,6 +78,8 @@ constant_fold_alu_instr(nir_alu_instr *instr, void *mem_ctx)
j++) {
if (load_const->def.bit_size == 64)
src[i].u64[j] =
load_const->value.u64[instr->src[i].swizzle[j]];
+ else if (load_const->def.bit_size == 16)
+ src[i].u16[j] =
load_const->value.u16[instr->src[i].swizzle[j]];
else
src[i].u32[j] =
load_const->value.u32[instr->src[i].swizzle[j]];
Let's make this a switch and support 8 while we're at it.

Karol Herbst has just sent just a patch with these changes done. So I've
reviewed it as I got to the same patch.

}
--
2.14.1
_______________________________________________
mesa-dev mailing list
https://lists.freedesktop.org/mailman/listinfo/mesa-dev
<https://lists.freedesktop.org/mailman/listinfo/mesa-dev>
_______________________________________________
mesa-dev mailing list
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Iago Toral Quiroga

2018-04-11 07:20:27 UTC

Permalink

---
src/intel/compiler/brw_fs_nir.cpp | 4 ++++
1 file changed, 4 insertions(+)

diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp
index 5e0dd37eefd..5c414e45b61 100644
--- a/src/intel/compiler/brw_fs_nir.cpp
+++ b/src/intel/compiler/brw_fs_nir.cpp
@@ -791,10 +791,14 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
case nir_op_f2f32:
case nir_op_f2i32:
case nir_op_f2u32:
+ case nir_op_f2i16:
+ case nir_op_f2u16:
case nir_op_i2i32:
case nir_op_u2u32:
case nir_op_i2i16:
case nir_op_u2u16:
+ case nir_op_i2f16:
+ case nir_op_u2f16:
inst = bld.MOV(result, op[0]);
inst->saturate = instr->dest.saturate;
break;

--
2.14.1

Jason Ekstrand

2018-04-24 21:46:50 UTC

Permalink

Post by Iago Toral Quiroga
---
src/intel/compiler/brw_fs_nir.cpp | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/src/intel/compiler/brw_fs_nir.cpp
b/src/intel/compiler/brw_fs_nir.cpp
index 5e0dd37eefd..5c414e45b61 100644
--- a/src/intel/compiler/brw_fs_nir.cpp
+++ b/src/intel/compiler/brw_fs_nir.cpp
@@ -791,10 +791,14 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
inst = bld.MOV(result, op[0]);
inst->saturate = instr->dest.saturate;
break;
--
2.14.1
_______________________________________________
mesa-dev mailing list
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Iago Toral Quiroga

2018-04-11 07:20:33 UTC

Permalink

---
src/intel/vulkan/anv_pipeline.c | 1 +
1 file changed, 1 insertion(+)

diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c
index e64602d2844..be935363869 100644
--- a/src/intel/vulkan/anv_pipeline.c
+++ b/src/intel/vulkan/anv_pipeline.c
@@ -144,6 +144,7 @@ anv_shader_compile_to_nir(struct anv_pipeline *pipeline,
.multiview = true,
.variable_pointers = true,
.storage_16bit = device->instance->physicalDevice.info.gen >= 8,
+ .int16 = device->instance->physicalDevice.info.gen >= 8,
.subgroup_arithmetic = true,
.subgroup_basic = true,
.subgroup_ballot = true,

--
2.14.1

Iago Toral Quiroga

2018-04-11 07:20:32 UTC

Permalink

---
src/compiler/shader_info.h | 1 +
src/compiler/spirv/spirv_to_nir.c | 4 +++-
2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/compiler/shader_info.h b/src/compiler/shader_info.h
index ababe520b2d..15a8910c597 100644
--- a/src/compiler/shader_info.h
+++ b/src/compiler/shader_info.h
@@ -44,6 +44,7 @@ struct spirv_supported_capabilities {
bool multiview;
bool variable_pointers;
bool storage_16bit;
+ bool int16;
bool shader_viewport_index_layer;
bool subgroup_arithmetic;
bool subgroup_ballot;
diff --git a/src/compiler/spirv/spirv_to_nir.c b/src/compiler/spirv/spirv_to_nir.c
index 78c1e9ff597..b9b32a11f5b 100644
--- a/src/compiler/spirv/spirv_to_nir.c
+++ b/src/compiler/spirv/spirv_to_nir.c
@@ -3281,7 +3281,6 @@ vtn_handle_preamble_instruction(struct vtn_builder *b, SpvOp opcode,
case SpvCapabilityFloat16:
case SpvCapabilityInt64Atomics:
case SpvCapabilityAtomicStorage:
- case SpvCapabilityInt16:
case SpvCapabilityStorageImageMultisample:
case SpvCapabilityInt8:
case SpvCapabilitySparseResidency:
@@ -3297,6 +3296,9 @@ vtn_handle_preamble_instruction(struct vtn_builder *b, SpvOp opcode,
case SpvCapabilityInt64:
spv_check_supported(int64, cap);
break;
+ case SpvCapabilityInt16:
+ spv_check_supported(int16, cap);
+ break;

case SpvCapabilityAddresses:
case SpvCapabilityKernel:

--
2.14.1

Iago Toral Quiroga

2018-04-11 07:20:28 UTC

Permalink

---
src/intel/compiler/brw_fs_nir.cpp | 16 +++++++++++-----
1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp
index 5c414e45b61..ad31f7c82dc 100644
--- a/src/intel/compiler/brw_fs_nir.cpp
+++ b/src/intel/compiler/brw_fs_nir.cpp
@@ -1162,8 +1162,9 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
break;

case nir_op_i2b:
- case nir_op_f2b:
- if (nir_src_bit_size(instr->src[0].src) == 64) {
+ case nir_op_f2b: {
+ uint32_t bit_size = nir_src_bit_size(instr->src[0].src);
+ if (bit_size == 64) {
/* two-argument instructions can't take 64-bit immediates */
fs_reg zero;
fs_reg tmp;
@@ -1185,13 +1186,18 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
bld.CMP(tmp, op[0], zero, BRW_CONDITIONAL_NZ);
bld.MOV(result, subscript(tmp, BRW_REGISTER_TYPE_UD, 0));
} else {
- if (instr->op == nir_op_f2b) {
- bld.CMP(result, op[0], brw_imm_f(0.0f), BRW_CONDITIONAL_NZ);
+ fs_reg zero;
+ if (bit_size == 32) {
+ zero = instr->op == nir_op_f2b ? brw_imm_f(0.0f) : brw_imm_d(0);
} else {
- bld.CMP(result, op[0], brw_imm_d(0), BRW_CONDITIONAL_NZ);
+ assert(bit_size == 16);
+ zero = instr->op == nir_op_f2b ?
+ retype(brw_imm_w(0), BRW_REGISTER_TYPE_HF) : brw_imm_w(0);
}
+ bld.CMP(result, op[0], zero, BRW_CONDITIONAL_NZ);
}
break;
+ }

case nir_op_ftrunc:
inst = bld.RNDZ(result, op[0]);

--
2.14.1

Jason Ekstrand

2018-04-24 21:50:52 UTC

Permalink

Post by Iago Toral Quiroga
---
src/intel/compiler/brw_fs_nir.cpp | 16 +++++++++++-----
1 file changed, 11 insertions(+), 5 deletions(-)
diff --git a/src/intel/compiler/brw_fs_nir.cpp
b/src/intel/compiler/brw_fs_nir.cpp
index 5c414e45b61..ad31f7c82dc 100644
--- a/src/intel/compiler/brw_fs_nir.cpp
+++ b/src/intel/compiler/brw_fs_nir.cpp
@@ -1162,8 +1162,9 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
break;
- if (nir_src_bit_size(instr->src[0].src) == 64) {
+ case nir_op_f2b: {
+ uint32_t bit_size = nir_src_bit_size(instr->src[0].src);
+ if (bit_size == 64) {
/* two-argument instructions can't take 64-bit immediates */
fs_reg zero;
fs_reg tmp;
@@ -1185,13 +1186,18 @@ fs_visitor::nir_emit_alu(const fs_builder &bld,
nir_alu_instr *instr)
bld.CMP(tmp, op[0], zero, BRW_CONDITIONAL_NZ);
bld.MOV(result, subscript(tmp, BRW_REGISTER_TYPE_UD, 0));
} else {
- if (instr->op == nir_op_f2b) {
- bld.CMP(result, op[0], brw_imm_f(0.0f), BRW_CONDITIONAL_NZ);
+ fs_reg zero;
+ if (bit_size == 32) {
+ zero = instr->op == nir_op_f2b ? brw_imm_f(0.0f) : brw_imm_d(0);
} else {
- bld.CMP(result, op[0], brw_imm_d(0), BRW_CONDITIONAL_NZ);
+ assert(bit_size == 16);
+ zero = instr->op == nir_op_f2b ?
+ retype(brw_imm_w(0), BRW_REGISTER_TYPE_HF) : brw_imm_w(0);

I really wish there were some better way of building immediates. Oh, well,
we can fix that later.

Post by Iago Toral Quiroga
}
+ bld.CMP(result, op[0], zero, BRW_CONDITIONAL_NZ);
}
break;
+ }
inst = bld.RNDZ(result, op[0]);
--
2.14.1
_______________________________________________
mesa-dev mailing list
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Iago Toral Quiroga

2018-04-11 07:20:31 UTC

Permalink

From: Jose Maria Casanova Crespo <***@igalia.com>

16-bit immediates are replicated in each word of a 32-bit value
so we need to negate both.
---
src/intel/compiler/brw_shader.cpp | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/intel/compiler/brw_shader.cpp b/src/intel/compiler/brw_shader.cpp
index 9cdf9fcb23d..c7edc60b63d 100644
--- a/src/intel/compiler/brw_shader.cpp
+++ b/src/intel/compiler/brw_shader.cpp
@@ -581,7 +581,8 @@ brw_negate_immediate(enum brw_reg_type type, struct brw_reg *reg)
return true;
case BRW_REGISTER_TYPE_W:
case BRW_REGISTER_TYPE_UW:
- reg->d = -(int16_t)reg->ud;
+ case BRW_REGISTER_TYPE_HF:
+ reg->ud ^= 0x80008000;
return true;
case BRW_REGISTER_TYPE_F:
reg->f = -reg->f;
@@ -602,8 +603,6 @@ brw_negate_immediate(enum brw_reg_type type, struct brw_reg *reg)
case BRW_REGISTER_TYPE_UV:
case BRW_REGISTER_TYPE_V:
assert(!"unimplemented: negate UV/V immediate");
- case BRW_REGISTER_TYPE_HF:
- assert(!"unimplemented: negate HF immediate");
case BRW_REGISTER_TYPE_NF:
unreachable("no NF immediates");
}

--
2.14.1

Jason Ekstrand

2018-04-24 21:55:20 UTC

Permalink

Post by Iago Toral Quiroga
16-bit immediates are replicated in each word of a 32-bit value
so we need to negate both.
---
src/intel/compiler/brw_shader.cpp | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/src/intel/compiler/brw_shader.cpp b/src/intel/compiler/brw_
shader.cpp
index 9cdf9fcb23d..c7edc60b63d 100644
--- a/src/intel/compiler/brw_shader.cpp
+++ b/src/intel/compiler/brw_shader.cpp
@@ -581,7 +581,8 @@ brw_negate_immediate(enum brw_reg_type type, struct brw_reg *reg)
return true;
- reg->d = -(int16_t)reg->ud;
+ reg->ud ^= 0x80008000;

This is not correct for integers. We need to keep two separate cases.

Chema Casanova

2018-04-26 12:36:42 UTC

Permalink

Post by Iago Toral Quiroga
16-bit immediates are replicated in each word of a 32-bit value
so we need to negate both.
---
src/intel/compiler/brw_shader.cpp | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/src/intel/compiler/brw_shader.cpp
b/src/intel/compiler/brw_shader.cpp
index 9cdf9fcb23d..c7edc60b63d 100644
--- a/src/intel/compiler/brw_shader.cpp
+++ b/src/intel/compiler/brw_shader.cpp
@@ -581,7 +581,8 @@ brw_negate_immediate(enum brw_reg_type type,
struct brw_reg *reg)
return true;
- reg->d = -(int16_t)reg->ud;
+ reg->ud ^= 0x80008000;
This is not correct for integers. We need to keep two separate cases.

That's true, I've forgotten about two's complement representation. For
this series v2 I will send:

case BRW_REGISTER_TYPE_UW:
- reg->d = -(int16_t)reg->ud;
+ uint16_t value = -(int16_t)reg->ud;
+ reg->ud = value | value << 16;
+ return true;
+ case BRW_REGISTER_TYPE_HF:
+ reg->ud ^= 0x80008000;

I'm including for v2 also a new patch for solving a problem with
negative values at brw_imm_w that is replicating the 16-bit value
wrongly because of sign extension.

Post by Iago Toral Quiroga

return true;
reg->f = -reg->f;
@@ -602,8 +603,6 @@ brw_negate_immediate(enum brw_reg_type type,
struct brw_reg *reg)
assert(!"unimplemented: negate UV/V immediate");
- assert(!"unimplemented: negate HF immediate");
unreachable("no NF immediates");
}
--
2.14.1
_______________________________________________
mesa-dev mailing list
https://lists.freedesktop.org/mailman/listinfo/mesa-dev
<https://lists.freedesktop.org/mailman/listinfo/mesa-dev>
_______________________________________________
mesa-dev mailing list
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Iago Toral Quiroga

2018-04-11 07:20:25 UTC

Permalink

We need to use 16-bit constants with 16-bit instructions,
otherwise we get the following validation error:

"Destination stride must be equal to the ratio of the sizes of
the execution data type to the destination type"

Because the execution data type is 4B due to the 32-bit integer
constant.
---
src/intel/compiler/brw_fs_nir.cpp | 17 ++++++++++++-----
1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp
index 6c4bcd1c113..f40a3540e31 100644
--- a/src/intel/compiler/brw_fs_nir.cpp
+++ b/src/intel/compiler/brw_fs_nir.cpp
@@ -863,17 +863,24 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
break;
}

- case nir_op_isign:
+ case nir_op_isign: {
/* ASR(val, 31) -> negative val generates 0xffffffff (signed -1).
* -> non-negative val generates 0x00000000.
* Predicated OR sets 1 if val is positive.
*/
- assert(nir_dest_bit_size(instr->dest.dest) < 64);
- bld.CMP(bld.null_reg_d(), op[0], brw_imm_d(0), BRW_CONDITIONAL_G);
- bld.ASR(result, op[0], brw_imm_d(31));
- inst = bld.OR(result, result, brw_imm_d(1));
+ uint32_t bit_size = nir_dest_bit_size(instr->dest.dest);
+ assert(bit_size == 32 || bit_size == 16);
+
+ fs_reg zero = bit_size == 32 ? brw_imm_d(0) : brw_imm_w(0);
+ fs_reg one = bit_size == 32 ? brw_imm_d(1) : brw_imm_w(1);
+ fs_reg shift = bit_size == 32 ? brw_imm_d(31) : brw_imm_w(15);
+
+ bld.CMP(bld.null_reg_d(), op[0], zero, BRW_CONDITIONAL_G);
+ bld.ASR(result, op[0], shift);
+ inst = bld.OR(result, result, one);
inst->predicate = BRW_PREDICATE_NORMAL;
break;
+ }

case nir_op_frcp:
inst = bld.emit(SHADER_OPCODE_RCP, result, op[0]);

--
2.14.1

Jason Ekstrand

2018-04-24 14:52:26 UTC

Permalink

Post by Iago Toral Quiroga
We need to use 16-bit constants with 16-bit instructions,
"Destination stride must be equal to the ratio of the sizes of
the execution data type to the destination type"
Because the execution data type is 4B due to the 32-bit integer
constant.
---
src/intel/compiler/brw_fs_nir.cpp | 17 ++++++++++++-----
1 file changed, 12 insertions(+), 5 deletions(-)
diff --git a/src/intel/compiler/brw_fs_nir.cpp
b/src/intel/compiler/brw_fs_nir.cpp
index 6c4bcd1c113..f40a3540e31 100644
--- a/src/intel/compiler/brw_fs_nir.cpp
+++ b/src/intel/compiler/brw_fs_nir.cpp
@@ -863,17 +863,24 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
break;
}
+ case nir_op_isign: {
/* ASR(val, 31) -> negative val generates 0xffffffff (signed -1).
* -> non-negative val generates 0x00000000.
* Predicated OR sets 1 if val is positive.
*/
- assert(nir_dest_bit_size(instr->dest.dest) < 64);
- bld.CMP(bld.null_reg_d(), op[0], brw_imm_d(0), BRW_CONDITIONAL_G);
- bld.ASR(result, op[0], brw_imm_d(31));
- inst = bld.OR(result, result, brw_imm_d(1));
+ uint32_t bit_size = nir_dest_bit_size(instr->dest.dest);
+ assert(bit_size == 32 || bit_size == 16);
+
+ fs_reg zero = bit_size == 32 ? brw_imm_d(0) : brw_imm_w(0);
+ fs_reg one = bit_size == 32 ? brw_imm_d(1) : brw_imm_w(1);
+ fs_reg shift = bit_size == 32 ? brw_imm_d(31) : brw_imm_w(15);
+
+ bld.CMP(bld.null_reg_d(), op[0], zero, BRW_CONDITIONAL_G);
+ bld.ASR(result, op[0], shift);
+ inst = bld.OR(result, result, one);
inst->predicate = BRW_PREDICATE_NORMAL;
break;
+ }
inst = bld.emit(SHADER_OPCODE_RCP, result, op[0]);
--
2.14.1
_______________________________________________
mesa-dev mailing list
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Iago Toral Quiroga

2018-04-11 07:20:34 UTC

Permalink

---
src/intel/vulkan/anv_device.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c
index 7522b7865c2..306d5beff7d 100644
--- a/src/intel/vulkan/anv_device.c
+++ b/src/intel/vulkan/anv_device.c
@@ -759,7 +759,7 @@ void anv_GetPhysicalDeviceFeatures(
.shaderCullDistance = true,
.shaderFloat64 = pdevice->info.gen >= 8,
.shaderInt64 = pdevice->info.gen >= 8,
- .shaderInt16 = false,
+ .shaderInt16 = pdevice->info.gen >= 8,
.shaderResourceMinLod = false,
.variableMultisampleRate = false,
.inheritedQueries = true,

--
2.14.1

Jason Ekstrand

2018-04-24 21:56:14 UTC

Permalink

9-11 are

Post by Iago Toral Quiroga
---
src/intel/vulkan/anv_device.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c
index 7522b7865c2..306d5beff7d 100644
--- a/src/intel/vulkan/anv_device.c
+++ b/src/intel/vulkan/anv_device.c
@@ -759,7 +759,7 @@ void anv_GetPhysicalDeviceFeatures(
.shaderCullDistance = true,
.shaderFloat64 = pdevice->info.gen >= 8,
.shaderInt64 = pdevice->info.gen >= 8,
- .shaderInt16 = false,
+ .shaderInt16 = pdevice->info.gen >= 8,
.shaderResourceMinLod = false,
.variableMultisampleRate = false,
.inheritedQueries = true,
--
2.14.1
_______________________________________________
mesa-dev mailing list
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Jason Ekstrand

2018-04-24 21:54:08 UTC

Permalink

Post by Iago Toral Quiroga
---
src/intel/compiler/brw_fs_nir.cpp | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/src/intel/compiler/brw_fs_nir.cpp
b/src/intel/compiler/brw_fs_nir.cpp
index ad31f7c82dc..822a1ac4227 100644
--- a/src/intel/compiler/brw_fs_nir.cpp
+++ b/src/intel/compiler/brw_fs_nir.cpp
@@ -1509,6 +1509,11 @@ fs_visitor::nir_emit_load_const(const fs_builder &bld,
fs_reg reg = bld.vgrf(reg_type, instr->def.num_components);
switch (instr->def.bit_size) {
+ for (unsigned i = 0; i < instr->def.num_components; i++)
+ bld.MOV(offset(reg, bld, i), brw_imm_w(instr->value.i16[i]));
+ break;
+
for (unsigned i = 0; i < instr->def.num_components; i++)
bld.MOV(offset(reg, bld, i), brw_imm_d(instr->value.i32[i]));
--
2.14.1
_______________________________________________
mesa-dev mailing list
https://lists.freedesktop.org/mailman/listinfo/mesa-dev