Discussion:
[Mesa-dev] [PATCH] radv: export SampleMask from pixel shaders at full rate
Samuel Pitoiset
2017-12-12 17:08:09 UTC
Permalink
Use 16_ABGR instead of 32_ABGR if Z isn't written.

Ported from RadeonSI.

No CTS regressions on Polaris.

Signed-off-by: Samuel Pitoiset <***@gmail.com>
---
src/amd/common/ac_nir_to_llvm.c | 65 ++++++++++++++++++++++++++++++++++-------
src/amd/vulkan/radv_pipeline.c | 29 ++++++++++++++----
2 files changed, 78 insertions(+), 16 deletions(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 663b27d265..5916619e97 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -6166,6 +6166,26 @@ si_export_mrt_color(struct nir_to_llvm_context *ctx,
return true;
}

+static unsigned
+si_get_spi_shader_z_format(bool writes_z, bool writes_stencil,
+ bool writes_samplemask)
+{
+ if (writes_z) {
+ /* Z needs 32 bits. */
+ if (writes_samplemask)
+ return V_028710_SPI_SHADER_32_ABGR;
+ else if (writes_stencil)
+ return V_028710_SPI_SHADER_32_GR;
+ else
+ return V_028710_SPI_SHADER_32_R;
+ } else if (writes_stencil || writes_samplemask) {
+ /* Both stencil and sample mask need only 16 bits. */
+ return V_028710_SPI_SHADER_UINT16_ABGR;
+ } else {
+ return V_028710_SPI_SHADER_ZERO;
+ }
+}
+
static void
si_export_mrt_z(struct nir_to_llvm_context *ctx,
LLVMValueRef depth, LLVMValueRef stencil,
@@ -6184,19 +6204,42 @@ si_export_mrt_z(struct nir_to_llvm_context *ctx,
args.out[2] = LLVMGetUndef(ctx->ac.f32); /* B, sample mask */
args.out[3] = LLVMGetUndef(ctx->ac.f32); /* A, alpha to mask */

- if (depth) {
- args.out[0] = depth;
- args.enabled_channels |= 0x1;
- }
+ unsigned format = si_get_spi_shader_z_format(depth != NULL,
+ stencil != NULL,
+ samplemask != NULL);
+
+ if (format == V_028710_SPI_SHADER_UINT16_ABGR) {
+ assert(!depth);
+ args.compr = 1; /* COMPR flag */
+
+ if (stencil) {
+ /* Stencil should be in X[23:16]. */
+ stencil = ac_to_integer(&ctx->ac, stencil);
+ stencil = LLVMBuildShl(ctx->builder, stencil,
+ LLVMConstInt(ctx->ac.i32, 16, 0), "");
+ args.out[0] = ac_to_float(&ctx->ac, stencil);
+ args.enabled_channels |= 0x3;
+ }
+ if (samplemask) {
+ /* SampleMask should be in Y[15:0]. */
+ args.out[1] = samplemask;
+ args.enabled_channels |= 0xc;
+ }
+ } else {
+ if (depth) {
+ args.out[0] = depth;
+ args.enabled_channels |= 0x1;
+ }

- if (stencil) {
- args.out[1] = stencil;
- args.enabled_channels |= 0x2;
- }
+ if (stencil) {
+ args.out[1] = stencil;
+ args.enabled_channels |= 0x2;
+ }

- if (samplemask) {
- args.out[2] = samplemask;
- args.enabled_channels |= 0x4;
+ if (samplemask) {
+ args.out[2] = samplemask;
+ args.enabled_channels |= 0x4;
+ }
}

/* SI (except OLAND and HAINAN) has a bug that it only looks
diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c
index 0146d6935e..baaf5c4c77 100644
--- a/src/amd/vulkan/radv_pipeline.c
+++ b/src/amd/vulkan/radv_pipeline.c
@@ -2013,6 +2013,25 @@ radv_pipeline_stage_to_user_data_0(struct radv_pipeline *pipeline,
}
}

+static unsigned
+si_get_spi_shader_z_format(bool writes_z, bool writes_stencil,
+ bool writes_samplemask)
+{
+ if (writes_z) {
+ /* Z needs 32 bits. */
+ if (writes_samplemask)
+ return V_028710_SPI_SHADER_32_ABGR;
+ else if (writes_stencil)
+ return V_028710_SPI_SHADER_32_GR;
+ else
+ return V_028710_SPI_SHADER_32_R;
+ } else if (writes_stencil || writes_samplemask) {
+ /* Both stencil and sample mask need only 16 bits. */
+ return V_028710_SPI_SHADER_UINT16_ABGR;
+ } else {
+ return V_028710_SPI_SHADER_ZERO;
+ }
+}

static VkResult
radv_pipeline_init(struct radv_pipeline *pipeline,
@@ -2108,11 +2127,11 @@ radv_pipeline_init(struct radv_pipeline *pipeline,
if (pipeline->device->physical_device->has_rbplus)
pipeline->graphics.db_shader_control |= S_02880C_DUAL_QUAD_DISABLE(1);

- pipeline->graphics.shader_z_format =
- ps->info.fs.writes_sample_mask ? V_028710_SPI_SHADER_32_ABGR :
- ps->info.fs.writes_stencil ? V_028710_SPI_SHADER_32_GR :
- ps->info.fs.writes_z ? V_028710_SPI_SHADER_32_R :
- V_028710_SPI_SHADER_ZERO;
+ unsigned shader_z_format =
+ si_get_spi_shader_z_format(ps->info.fs.writes_z,
+ ps->info.fs.writes_stencil,
+ ps->info.fs.writes_sample_mask);
+ pipeline->graphics.shader_z_format = shader_z_format;

calculate_vgt_gs_mode(pipeline);
calculate_vs_outinfo(pipeline);
--
2.15.1
Bas Nieuwenhuizen
2017-12-13 20:21:41 UTC
Permalink
On Tue, Dec 12, 2017 at 6:08 PM, Samuel Pitoiset
Post by Samuel Pitoiset
Use 16_ABGR instead of 32_ABGR if Z isn't written.
Ported from RadeonSI.
No CTS regressions on Polaris.
---
src/amd/common/ac_nir_to_llvm.c | 65 ++++++++++++++++++++++++++++++++++-------
src/amd/vulkan/radv_pipeline.c | 29 ++++++++++++++----
2 files changed, 78 insertions(+), 16 deletions(-)
diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 663b27d265..5916619e97 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -6166,6 +6166,26 @@ si_export_mrt_color(struct nir_to_llvm_context *ctx,
return true;
}
+static unsigned
+si_get_spi_shader_z_format(bool writes_z, bool writes_stencil,
+ bool writes_samplemask)
+{
+ if (writes_z) {
+ /* Z needs 32 bits. */
+ if (writes_samplemask)
+ return V_028710_SPI_SHADER_32_ABGR;
+ else if (writes_stencil)
+ return V_028710_SPI_SHADER_32_GR;
+ else
+ return V_028710_SPI_SHADER_32_R;
+ } else if (writes_stencil || writes_samplemask) {
+ /* Both stencil and sample mask need only 16 bits. */
+ return V_028710_SPI_SHADER_UINT16_ABGR;
+ } else {
+ return V_028710_SPI_SHADER_ZERO;
+ }
+}
I'm not a fan of having this function in two places. Can we export the
format from the compiler to radv, or the other way around?

Otherwise,
Post by Samuel Pitoiset
+
static void
si_export_mrt_z(struct nir_to_llvm_context *ctx,
LLVMValueRef depth, LLVMValueRef stencil,
@@ -6184,19 +6204,42 @@ si_export_mrt_z(struct nir_to_llvm_context *ctx,
args.out[2] = LLVMGetUndef(ctx->ac.f32); /* B, sample mask */
args.out[3] = LLVMGetUndef(ctx->ac.f32); /* A, alpha to mask */
- if (depth) {
- args.out[0] = depth;
- args.enabled_channels |= 0x1;
- }
+ unsigned format = si_get_spi_shader_z_format(depth != NULL,
+ stencil != NULL,
+ samplemask != NULL);
+
+ if (format == V_028710_SPI_SHADER_UINT16_ABGR) {
+ assert(!depth);
+ args.compr = 1; /* COMPR flag */
+
+ if (stencil) {
+ /* Stencil should be in X[23:16]. */
+ stencil = ac_to_integer(&ctx->ac, stencil);
+ stencil = LLVMBuildShl(ctx->builder, stencil,
+ LLVMConstInt(ctx->ac.i32, 16, 0), "");
+ args.out[0] = ac_to_float(&ctx->ac, stencil);
+ args.enabled_channels |= 0x3;
+ }
+ if (samplemask) {
+ /* SampleMask should be in Y[15:0]. */
+ args.out[1] = samplemask;
+ args.enabled_channels |= 0xc;
+ }
+ } else {
+ if (depth) {
+ args.out[0] = depth;
+ args.enabled_channels |= 0x1;
+ }
- if (stencil) {
- args.out[1] = stencil;
- args.enabled_channels |= 0x2;
- }
+ if (stencil) {
+ args.out[1] = stencil;
+ args.enabled_channels |= 0x2;
+ }
- if (samplemask) {
- args.out[2] = samplemask;
- args.enabled_channels |= 0x4;
+ if (samplemask) {
+ args.out[2] = samplemask;
+ args.enabled_channels |= 0x4;
+ }
}
/* SI (except OLAND and HAINAN) has a bug that it only looks
diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c
index 0146d6935e..baaf5c4c77 100644
--- a/src/amd/vulkan/radv_pipeline.c
+++ b/src/amd/vulkan/radv_pipeline.c
@@ -2013,6 +2013,25 @@ radv_pipeline_stage_to_user_data_0(struct radv_pipeline *pipeline,
}
}
+static unsigned
+si_get_spi_shader_z_format(bool writes_z, bool writes_stencil,
+ bool writes_samplemask)
+{
+ if (writes_z) {
+ /* Z needs 32 bits. */
+ if (writes_samplemask)
+ return V_028710_SPI_SHADER_32_ABGR;
+ else if (writes_stencil)
+ return V_028710_SPI_SHADER_32_GR;
+ else
+ return V_028710_SPI_SHADER_32_R;
+ } else if (writes_stencil || writes_samplemask) {
+ /* Both stencil and sample mask need only 16 bits. */
+ return V_028710_SPI_SHADER_UINT16_ABGR;
+ } else {
+ return V_028710_SPI_SHADER_ZERO;
+ }
+}
static VkResult
radv_pipeline_init(struct radv_pipeline *pipeline,
@@ -2108,11 +2127,11 @@ radv_pipeline_init(struct radv_pipeline *pipeline,
if (pipeline->device->physical_device->has_rbplus)
pipeline->graphics.db_shader_control |= S_02880C_DUAL_QUAD_DISABLE(1);
- pipeline->graphics.shader_z_format =
- V_028710_SPI_SHADER_ZERO;
+ unsigned shader_z_format =
+ si_get_spi_shader_z_format(ps->info.fs.writes_z,
+ ps->info.fs.writes_stencil,
+ ps->info.fs.writes_sample_mask);
+ pipeline->graphics.shader_z_format = shader_z_format;
calculate_vgt_gs_mode(pipeline);
calculate_vs_outinfo(pipeline);
--
2.15.1
_______________________________________________
mesa-dev mailing list
https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Samuel Pitoiset
2017-12-14 11:52:59 UTC
Permalink
Post by Bas Nieuwenhuizen
On Tue, Dec 12, 2017 at 6:08 PM, Samuel Pitoiset
Post by Samuel Pitoiset
Use 16_ABGR instead of 32_ABGR if Z isn't written.
Ported from RadeonSI.
No CTS regressions on Polaris.
---
src/amd/common/ac_nir_to_llvm.c | 65 ++++++++++++++++++++++++++++++++++-------
src/amd/vulkan/radv_pipeline.c | 29 ++++++++++++++----
2 files changed, 78 insertions(+), 16 deletions(-)
diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 663b27d265..5916619e97 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -6166,6 +6166,26 @@ si_export_mrt_color(struct nir_to_llvm_context *ctx,
return true;
}
+static unsigned
+si_get_spi_shader_z_format(bool writes_z, bool writes_stencil,
+ bool writes_samplemask)
+{
+ if (writes_z) {
+ /* Z needs 32 bits. */
+ if (writes_samplemask)
+ return V_028710_SPI_SHADER_32_ABGR;
+ else if (writes_stencil)
+ return V_028710_SPI_SHADER_32_GR;
+ else
+ return V_028710_SPI_SHADER_32_R;
+ } else if (writes_stencil || writes_samplemask) {
+ /* Both stencil and sample mask need only 16 bits. */
+ return V_028710_SPI_SHADER_UINT16_ABGR;
+ } else {
+ return V_028710_SPI_SHADER_ZERO;
+ }
+}
I'm not a fan of having this function in two places. Can we export the
format from the compiler to radv, or the other way around?
Yeah, I'm not a big fan as well. Exporting the format from the compiler
to radv seems the best solution. I will update the patch.
Post by Bas Nieuwenhuizen
Otherwise,
Post by Samuel Pitoiset
+
static void
si_export_mrt_z(struct nir_to_llvm_context *ctx,
LLVMValueRef depth, LLVMValueRef stencil,
@@ -6184,19 +6204,42 @@ si_export_mrt_z(struct nir_to_llvm_context *ctx,
args.out[2] = LLVMGetUndef(ctx->ac.f32); /* B, sample mask */
args.out[3] = LLVMGetUndef(ctx->ac.f32); /* A, alpha to mask */
- if (depth) {
- args.out[0] = depth;
- args.enabled_channels |= 0x1;
- }
+ unsigned format = si_get_spi_shader_z_format(depth != NULL,
+ stencil != NULL,
+ samplemask != NULL);
+
+ if (format == V_028710_SPI_SHADER_UINT16_ABGR) {
+ assert(!depth);
+ args.compr = 1; /* COMPR flag */
+
+ if (stencil) {
+ /* Stencil should be in X[23:16]. */
+ stencil = ac_to_integer(&ctx->ac, stencil);
+ stencil = LLVMBuildShl(ctx->builder, stencil,
+ LLVMConstInt(ctx->ac.i32, 16, 0), "");
+ args.out[0] = ac_to_float(&ctx->ac, stencil);
+ args.enabled_channels |= 0x3;
+ }
+ if (samplemask) {
+ /* SampleMask should be in Y[15:0]. */
+ args.out[1] = samplemask;
+ args.enabled_channels |= 0xc;
+ }
+ } else {
+ if (depth) {
+ args.out[0] = depth;
+ args.enabled_channels |= 0x1;
+ }
- if (stencil) {
- args.out[1] = stencil;
- args.enabled_channels |= 0x2;
- }
+ if (stencil) {
+ args.out[1] = stencil;
+ args.enabled_channels |= 0x2;
+ }
- if (samplemask) {
- args.out[2] = samplemask;
- args.enabled_channels |= 0x4;
+ if (samplemask) {
+ args.out[2] = samplemask;
+ args.enabled_channels |= 0x4;
+ }
}
/* SI (except OLAND and HAINAN) has a bug that it only looks
diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c
index 0146d6935e..baaf5c4c77 100644
--- a/src/amd/vulkan/radv_pipeline.c
+++ b/src/amd/vulkan/radv_pipeline.c
@@ -2013,6 +2013,25 @@ radv_pipeline_stage_to_user_data_0(struct radv_pipeline *pipeline,
}
}
+static unsigned
+si_get_spi_shader_z_format(bool writes_z, bool writes_stencil,
+ bool writes_samplemask)
+{
+ if (writes_z) {
+ /* Z needs 32 bits. */
+ if (writes_samplemask)
+ return V_028710_SPI_SHADER_32_ABGR;
+ else if (writes_stencil)
+ return V_028710_SPI_SHADER_32_GR;
+ else
+ return V_028710_SPI_SHADER_32_R;
+ } else if (writes_stencil || writes_samplemask) {
+ /* Both stencil and sample mask need only 16 bits. */
+ return V_028710_SPI_SHADER_UINT16_ABGR;
+ } else {
+ return V_028710_SPI_SHADER_ZERO;
+ }
+}
static VkResult
radv_pipeline_init(struct radv_pipeline *pipeline,
@@ -2108,11 +2127,11 @@ radv_pipeline_init(struct radv_pipeline *pipeline,
if (pipeline->device->physical_device->has_rbplus)
pipeline->graphics.db_shader_control |= S_02880C_DUAL_QUAD_DISABLE(1);
- pipeline->graphics.shader_z_format =
- V_028710_SPI_SHADER_ZERO;
+ unsigned shader_z_format =
+ si_get_spi_shader_z_format(ps->info.fs.writes_z,
+ ps->info.fs.writes_stencil,
+ ps->info.fs.writes_sample_mask);
+ pipeline->graphics.shader_z_format = shader_z_format;
calculate_vgt_gs_mode(pipeline);
calculate_vs_outinfo(pipeline);
--
2.15.1
_______________________________________________
mesa-dev mailing list
https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Loading...