Timothy Arceri
2018-12-10 00:30:58 UTC
This adds support needed for marking the varyings as used but we
don't actually support packing patches in this patch.
---
src/compiler/nir/nir_linking_helpers.c | 73 ++++++++++++++++++--------
1 file changed, 51 insertions(+), 22 deletions(-)
diff --git a/src/compiler/nir/nir_linking_helpers.c b/src/compiler/nir/nir_linking_helpers.c
index a05890ada4..845aba5c87 100644
--- a/src/compiler/nir/nir_linking_helpers.c
+++ b/src/compiler/nir/nir_linking_helpers.c
@@ -289,15 +289,35 @@ struct varying_loc
uint32_t location;
};
+static void
+mark_all_slots_used(nir_variable *var, uint64_t *slots_used,
+ uint64_t slots_used_mask, unsigned num_slots)
+{
+ unsigned loc_offset = var->data.patch ? VARYING_SLOT_PATCH0 : 0;
+
+ slots_used[var->data.patch ? 1 : 0] |= slots_used_mask &
+ (((uint64_t)1 << num_slots) - 1) << (var->data.location - loc_offset);
+}
+
+static void
+mark_used_slots(nir_variable *var, uint64_t *slots_used, unsigned offset)
+{
+ unsigned loc_offset = offset - (var->data.patch ? VARYING_SLOT_PATCH0 : 0);
+
+ slots_used[var->data.patch ? 1 : 0] |= (uint64_t)1 << (var->data.location + loc_offset);
+}
+
static void
remap_slots_and_components(struct exec_list *var_list, gl_shader_stage stage,
struct varying_loc (*remap)[4],
- uint64_t *slots_used, uint64_t *out_slots_read)
+ uint64_t *slots_used, uint64_t *out_slots_read,
+ uint32_t *p_slots_used, uint32_t *p_out_slots_read)
{
- uint64_t out_slots_read_tmp = 0;
+ uint64_t out_slots_read_tmp[2] = {0};
+ uint64_t slots_used_tmp[2] = {0};
/* We don't touch builtins so just copy the bitmask */
- uint64_t slots_used_tmp =
+ slots_used_tmp[0] =
*slots_used & (((uint64_t)1 << (VARYING_SLOT_VAR0 - 1)) - 1);
nir_foreach_variable(var, var_list) {
@@ -305,8 +325,8 @@ remap_slots_and_components(struct exec_list *var_list, gl_shader_stage stage,
/* Only remap things that aren't built-ins */
if (var->data.location >= VARYING_SLOT_VAR0 &&
- var->data.location - VARYING_SLOT_VAR0 < 32) {
- assert(var->data.location - VARYING_SLOT_VAR0 < 32);
+ var->data.location - VARYING_SLOT_VAR0 < MAX_VARYINGS_INCL_PATCH) {
+ assert(var->data.location - VARYING_SLOT_VAR0 < MAX_VARYINGS_INCL_PATCH);
const struct glsl_type *type = var->type;
if (nir_is_per_vertex_io(var, stage)) {
@@ -321,11 +341,17 @@ remap_slots_and_components(struct exec_list *var_list, gl_shader_stage stage,
unsigned location = var->data.location - VARYING_SLOT_VAR0;
struct varying_loc *new_loc = &remap[location][var->data.location_frac];
- uint64_t slots = (((uint64_t)1 << num_slots) - 1) << var->data.location;
- if (slots & *slots_used)
+ unsigned loc_offset = var->data.patch ? VARYING_SLOT_PATCH0 : 0;
+ uint64_t used = var->data.patch ? *p_slots_used : *slots_used;
+ uint64_t outs_used =
+ var->data.patch ? *p_out_slots_read : *out_slots_read;
+ uint64_t slots =
+ (((uint64_t)1 << num_slots) - 1) << (var->data.location - loc_offset);
+
+ if (slots & used)
used_across_stages = true;
- if (slots & *out_slots_read)
+ if (slots & outs_used)
outputs_read = true;
if (new_loc->location) {
@@ -339,30 +365,29 @@ remap_slots_and_components(struct exec_list *var_list, gl_shader_stage stage,
* otherwise we will mess up the mask for things like partially
* marked arrays.
*/
- if (used_across_stages) {
- slots_used_tmp |=
- *slots_used & (((uint64_t)1 << num_slots) - 1) << var->data.location;
- }
+ if (used_across_stages)
+ mark_all_slots_used(var, slots_used_tmp, used, num_slots);
if (outputs_read) {
- out_slots_read_tmp |=
- *out_slots_read & (((uint64_t)1 << num_slots) - 1) << var->data.location;
+ mark_all_slots_used(var, out_slots_read_tmp, outs_used,
+ num_slots);
}
-
} else {
for (unsigned i = 0; i < num_slots; i++) {
if (used_across_stages)
- slots_used_tmp |= (uint64_t)1 << (var->data.location + i);
+ mark_used_slots(var, slots_used_tmp, i);
if (outputs_read)
- out_slots_read_tmp |= (uint64_t)1 << (var->data.location + i);
+ mark_used_slots(var, out_slots_read_tmp, i);
}
}
}
}
- *slots_used = slots_used_tmp;
- *out_slots_read = out_slots_read_tmp;
+ *slots_used = slots_used_tmp[0];
+ *out_slots_read = out_slots_read_tmp[0];
+ *p_slots_used = slots_used_tmp[1];
+ *p_out_slots_read = out_slots_read_tmp[1];
}
/* If there are empty components in the slot compact the remaining components
@@ -376,7 +401,7 @@ compact_components(nir_shader *producer, nir_shader *consumer, uint8_t *comps,
{
struct exec_list *input_list = &consumer->inputs;
struct exec_list *output_list = &producer->outputs;
- struct varying_loc remap[32][4] = {{{0}, {0}}};
+ struct varying_loc remap[MAX_VARYINGS_INCL_PATCH][4] = {{{0}, {0}}};
/* Create a cursor for each interpolation type */
unsigned cursor[4] = {0};
@@ -487,11 +512,15 @@ compact_components(nir_shader *producer, nir_shader *consumer, uint8_t *comps,
}
uint64_t zero = 0;
+ uint32_t zero32 = 0;
remap_slots_and_components(input_list, consumer->info.stage, remap,
- &consumer->info.inputs_read, &zero);
+ &consumer->info.inputs_read, &zero,
+ &consumer->info.patch_inputs_read, &zero32);
remap_slots_and_components(output_list, producer->info.stage, remap,
&producer->info.outputs_written,
- &producer->info.outputs_read);
+ &producer->info.outputs_read,
+ &producer->info.patch_outputs_written,
+ &producer->info.patch_outputs_read);
}
/* We assume that this has been called more-or-less directly after
don't actually support packing patches in this patch.
---
src/compiler/nir/nir_linking_helpers.c | 73 ++++++++++++++++++--------
1 file changed, 51 insertions(+), 22 deletions(-)
diff --git a/src/compiler/nir/nir_linking_helpers.c b/src/compiler/nir/nir_linking_helpers.c
index a05890ada4..845aba5c87 100644
--- a/src/compiler/nir/nir_linking_helpers.c
+++ b/src/compiler/nir/nir_linking_helpers.c
@@ -289,15 +289,35 @@ struct varying_loc
uint32_t location;
};
+static void
+mark_all_slots_used(nir_variable *var, uint64_t *slots_used,
+ uint64_t slots_used_mask, unsigned num_slots)
+{
+ unsigned loc_offset = var->data.patch ? VARYING_SLOT_PATCH0 : 0;
+
+ slots_used[var->data.patch ? 1 : 0] |= slots_used_mask &
+ (((uint64_t)1 << num_slots) - 1) << (var->data.location - loc_offset);
+}
+
+static void
+mark_used_slots(nir_variable *var, uint64_t *slots_used, unsigned offset)
+{
+ unsigned loc_offset = offset - (var->data.patch ? VARYING_SLOT_PATCH0 : 0);
+
+ slots_used[var->data.patch ? 1 : 0] |= (uint64_t)1 << (var->data.location + loc_offset);
+}
+
static void
remap_slots_and_components(struct exec_list *var_list, gl_shader_stage stage,
struct varying_loc (*remap)[4],
- uint64_t *slots_used, uint64_t *out_slots_read)
+ uint64_t *slots_used, uint64_t *out_slots_read,
+ uint32_t *p_slots_used, uint32_t *p_out_slots_read)
{
- uint64_t out_slots_read_tmp = 0;
+ uint64_t out_slots_read_tmp[2] = {0};
+ uint64_t slots_used_tmp[2] = {0};
/* We don't touch builtins so just copy the bitmask */
- uint64_t slots_used_tmp =
+ slots_used_tmp[0] =
*slots_used & (((uint64_t)1 << (VARYING_SLOT_VAR0 - 1)) - 1);
nir_foreach_variable(var, var_list) {
@@ -305,8 +325,8 @@ remap_slots_and_components(struct exec_list *var_list, gl_shader_stage stage,
/* Only remap things that aren't built-ins */
if (var->data.location >= VARYING_SLOT_VAR0 &&
- var->data.location - VARYING_SLOT_VAR0 < 32) {
- assert(var->data.location - VARYING_SLOT_VAR0 < 32);
+ var->data.location - VARYING_SLOT_VAR0 < MAX_VARYINGS_INCL_PATCH) {
+ assert(var->data.location - VARYING_SLOT_VAR0 < MAX_VARYINGS_INCL_PATCH);
const struct glsl_type *type = var->type;
if (nir_is_per_vertex_io(var, stage)) {
@@ -321,11 +341,17 @@ remap_slots_and_components(struct exec_list *var_list, gl_shader_stage stage,
unsigned location = var->data.location - VARYING_SLOT_VAR0;
struct varying_loc *new_loc = &remap[location][var->data.location_frac];
- uint64_t slots = (((uint64_t)1 << num_slots) - 1) << var->data.location;
- if (slots & *slots_used)
+ unsigned loc_offset = var->data.patch ? VARYING_SLOT_PATCH0 : 0;
+ uint64_t used = var->data.patch ? *p_slots_used : *slots_used;
+ uint64_t outs_used =
+ var->data.patch ? *p_out_slots_read : *out_slots_read;
+ uint64_t slots =
+ (((uint64_t)1 << num_slots) - 1) << (var->data.location - loc_offset);
+
+ if (slots & used)
used_across_stages = true;
- if (slots & *out_slots_read)
+ if (slots & outs_used)
outputs_read = true;
if (new_loc->location) {
@@ -339,30 +365,29 @@ remap_slots_and_components(struct exec_list *var_list, gl_shader_stage stage,
* otherwise we will mess up the mask for things like partially
* marked arrays.
*/
- if (used_across_stages) {
- slots_used_tmp |=
- *slots_used & (((uint64_t)1 << num_slots) - 1) << var->data.location;
- }
+ if (used_across_stages)
+ mark_all_slots_used(var, slots_used_tmp, used, num_slots);
if (outputs_read) {
- out_slots_read_tmp |=
- *out_slots_read & (((uint64_t)1 << num_slots) - 1) << var->data.location;
+ mark_all_slots_used(var, out_slots_read_tmp, outs_used,
+ num_slots);
}
-
} else {
for (unsigned i = 0; i < num_slots; i++) {
if (used_across_stages)
- slots_used_tmp |= (uint64_t)1 << (var->data.location + i);
+ mark_used_slots(var, slots_used_tmp, i);
if (outputs_read)
- out_slots_read_tmp |= (uint64_t)1 << (var->data.location + i);
+ mark_used_slots(var, out_slots_read_tmp, i);
}
}
}
}
- *slots_used = slots_used_tmp;
- *out_slots_read = out_slots_read_tmp;
+ *slots_used = slots_used_tmp[0];
+ *out_slots_read = out_slots_read_tmp[0];
+ *p_slots_used = slots_used_tmp[1];
+ *p_out_slots_read = out_slots_read_tmp[1];
}
/* If there are empty components in the slot compact the remaining components
@@ -376,7 +401,7 @@ compact_components(nir_shader *producer, nir_shader *consumer, uint8_t *comps,
{
struct exec_list *input_list = &consumer->inputs;
struct exec_list *output_list = &producer->outputs;
- struct varying_loc remap[32][4] = {{{0}, {0}}};
+ struct varying_loc remap[MAX_VARYINGS_INCL_PATCH][4] = {{{0}, {0}}};
/* Create a cursor for each interpolation type */
unsigned cursor[4] = {0};
@@ -487,11 +512,15 @@ compact_components(nir_shader *producer, nir_shader *consumer, uint8_t *comps,
}
uint64_t zero = 0;
+ uint32_t zero32 = 0;
remap_slots_and_components(input_list, consumer->info.stage, remap,
- &consumer->info.inputs_read, &zero);
+ &consumer->info.inputs_read, &zero,
+ &consumer->info.patch_inputs_read, &zero32);
remap_slots_and_components(output_list, producer->info.stage, remap,
&producer->info.outputs_written,
- &producer->info.outputs_read);
+ &producer->info.outputs_read,
+ &producer->info.patch_outputs_written,
+ &producer->info.patch_outputs_read);
}
/* We assume that this has been called more-or-less directly after
--
2.19.2
2.19.2