Discussion:
[Mesa-dev] More loop unrolling
Timothy Arceri
2018-12-07 03:08:00 UTC
Permalink
This is three series combined. I've sent the first two previously
(patch 1-11 & patch 12-15) and they have been partially reviewed
by Thomas. Please see the previous sends of those series for cover
letters.

There is a small bug fix in patch 11 that was discovered by some
new piglit tests [1]. Otherwise those series only contain small
changes suggested by Thomas during review.

Patches 16-20 are new and improve loop analysis so we can unroll
more loops with the unroll function introduced in patch 11.

[1] https://patchwork.freedesktop.org/series/53712/
Timothy Arceri
2018-12-07 03:08:01 UTC
Permalink
Reviewed-by: Thomas Helland <***@gmail.com>
---
src/compiler/nir/nir_loop_analyze.c | 31 ++++++++++-------------------
1 file changed, 10 insertions(+), 21 deletions(-)

diff --git a/src/compiler/nir/nir_loop_analyze.c b/src/compiler/nir/nir_loop_analyze.c
index 9c3fd2f286..c779383b36 100644
--- a/src/compiler/nir/nir_loop_analyze.c
+++ b/src/compiler/nir/nir_loop_analyze.c
@@ -624,8 +624,7 @@ find_trip_count(loop_info_state *state)
}

static bool
-force_unroll_array_access(loop_info_state *state, nir_shader *ns,
- nir_deref_instr *deref)
+force_unroll_array_access(loop_info_state *state, nir_deref_instr *deref)
{
for (nir_deref_instr *d = deref; d; d = nir_deref_instr_parent(d)) {
if (d->deref_type != nir_deref_type_array)
@@ -640,23 +639,18 @@ force_unroll_array_access(loop_info_state *state, nir_shader *ns,
nir_deref_instr *parent = nir_deref_instr_parent(d);
assert(glsl_type_is_array(parent->type) ||
glsl_type_is_matrix(parent->type));
- if (glsl_get_length(parent->type) == state->loop->info->trip_count) {
- state->loop->info->force_unroll = true;
+ if (glsl_get_length(parent->type) == state->loop->info->trip_count)
return true;
- }

- if (deref->mode & state->indirect_mask) {
- state->loop->info->force_unroll = true;
+ if (deref->mode & state->indirect_mask)
return true;
- }
}

return false;
}

static bool
-force_unroll_heuristics(loop_info_state *state, nir_shader *ns,
- nir_block *block)
+force_unroll_heuristics(loop_info_state *state, nir_block *block)
{
nir_foreach_instr(instr, block) {
if (instr->type != nir_instr_type_intrinsic)
@@ -670,12 +664,12 @@ force_unroll_heuristics(loop_info_state *state, nir_shader *ns,
if (intrin->intrinsic == nir_intrinsic_load_deref ||
intrin->intrinsic == nir_intrinsic_store_deref ||
intrin->intrinsic == nir_intrinsic_copy_deref) {
- if (force_unroll_array_access(state, ns,
+ if (force_unroll_array_access(state,
nir_src_as_deref(intrin->src[0])))
return true;

if (intrin->intrinsic == nir_intrinsic_copy_deref &&
- force_unroll_array_access(state, ns,
+ force_unroll_array_access(state,
nir_src_as_deref(intrin->src[1])))
return true;
}
@@ -745,15 +739,10 @@ get_loop_info(loop_info_state *state, nir_function_impl *impl)
find_trip_count(state);

nir_shader *ns = impl->function->shader;
- foreach_list_typed_safe(nir_cf_node, node, node, &state->loop->body) {
- if (node->type == nir_cf_node_block) {
- if (force_unroll_heuristics(state, ns, nir_cf_node_as_block(node)))
- break;
- } else {
- nir_foreach_block_in_cf_node(block, node) {
- if (force_unroll_heuristics(state, ns, block))
- break;
- }
+ nir_foreach_block_in_cf_node(block, &state->loop->cf_node) {
+ if (force_unroll_heuristics(state, block)) {
+ state->loop->info->force_unroll = true;
+ break;
}
}
}
--
2.19.2
Jason Ekstrand
2018-12-07 21:31:54 UTC
Permalink
Post by Timothy Arceri
---
src/compiler/nir/nir_loop_analyze.c | 31 ++++++++++-------------------
1 file changed, 10 insertions(+), 21 deletions(-)
diff --git a/src/compiler/nir/nir_loop_analyze.c
b/src/compiler/nir/nir_loop_analyze.c
index 9c3fd2f286..c779383b36 100644
--- a/src/compiler/nir/nir_loop_analyze.c
+++ b/src/compiler/nir/nir_loop_analyze.c
@@ -624,8 +624,7 @@ find_trip_count(loop_info_state *state)
}
static bool
-force_unroll_array_access(loop_info_state *state, nir_shader *ns,
- nir_deref_instr *deref)
+force_unroll_array_access(loop_info_state *state, nir_deref_instr *deref)
{
for (nir_deref_instr *d = deref; d; d = nir_deref_instr_parent(d)) {
if (d->deref_type != nir_deref_type_array)
@@ -640,23 +639,18 @@ force_unroll_array_access(loop_info_state *state, nir_shader *ns,
nir_deref_instr *parent = nir_deref_instr_parent(d);
assert(glsl_type_is_array(parent->type) ||
glsl_type_is_matrix(parent->type));
- if (glsl_get_length(parent->type) == state->loop->info->trip_count) {
- state->loop->info->force_unroll = true;
+ if (glsl_get_length(parent->type) == state->loop->info->trip_count)
return true;
- }
- if (deref->mode & state->indirect_mask) {
- state->loop->info->force_unroll = true;
+ if (deref->mode & state->indirect_mask)
return true;
- }
}
return false;
}
static bool
-force_unroll_heuristics(loop_info_state *state, nir_shader *ns,
- nir_block *block)
+force_unroll_heuristics(loop_info_state *state, nir_block *block)
{
nir_foreach_instr(instr, block) {
if (instr->type != nir_instr_type_intrinsic)
@@ -670,12 +664,12 @@ force_unroll_heuristics(loop_info_state *state, nir_shader *ns,
if (intrin->intrinsic == nir_intrinsic_load_deref ||
intrin->intrinsic == nir_intrinsic_store_deref ||
intrin->intrinsic == nir_intrinsic_copy_deref) {
- if (force_unroll_array_access(state, ns,
+ if (force_unroll_array_access(state,
nir_src_as_deref(intrin->src[0])))
return true;
if (intrin->intrinsic == nir_intrinsic_copy_deref &&
- force_unroll_array_access(state, ns,
+ force_unroll_array_access(state,
nir_src_as_deref(intrin->src[1])))
return true;
}
@@ -745,15 +739,10 @@ get_loop_info(loop_info_state *state,
nir_function_impl *impl)
find_trip_count(state);
nir_shader *ns = impl->function->shader;
- foreach_list_typed_safe(nir_cf_node, node, node, &state->loop->body) {
- if (node->type == nir_cf_node_block) {
- if (force_unroll_heuristics(state, ns,
nir_cf_node_as_block(node)))
- break;
- } else {
- nir_foreach_block_in_cf_node(block, node) {
- if (force_unroll_heuristics(state, ns, block))
- break;
- }
+ nir_foreach_block_in_cf_node(block, &state->loop->cf_node) {
+ if (force_unroll_heuristics(state, block)) {
+ state->loop->info->force_unroll = true;
+ break;
}
}
}
--
2.19.2
_______________________________________________
mesa-dev mailing list
https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Timothy Arceri
2018-12-07 03:08:02 UTC
Permalink
Following commits will introduce additional fields such as
guessed_trip_count. Renaming these will help avoid confusion
as our unrolling feature set grows.

Reviewed-by: Thomas Helland <***@gmail.com>
---
src/compiler/nir/nir.h | 8 +++++---
src/compiler/nir/nir_loop_analyze.c | 14 +++++++-------
src/compiler/nir/nir_opt_loop_unroll.c | 14 +++++++-------
3 files changed, 19 insertions(+), 17 deletions(-)

diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index db935c8496..ce4a81fbe1 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -1886,9 +1886,11 @@ typedef struct {
/* Number of instructions in the loop */
unsigned num_instructions;

- /* How many times the loop is run (if known) */
- unsigned trip_count;
- bool is_trip_count_known;
+ /* Maximum number of times the loop is run (if known) */
+ unsigned max_trip_count;
+
+ /* Do we know the exact number of times the loop will be run */
+ bool exact_trip_count_known;

/* Unroll the loop regardless of its size */
bool force_unroll;
diff --git a/src/compiler/nir/nir_loop_analyze.c b/src/compiler/nir/nir_loop_analyze.c
index c779383b36..700d1fe552 100644
--- a/src/compiler/nir/nir_loop_analyze.c
+++ b/src/compiler/nir/nir_loop_analyze.c
@@ -527,7 +527,7 @@ find_trip_count(loop_info_state *state)
{
bool trip_count_known = true;
nir_loop_terminator *limiting_terminator = NULL;
- int min_trip_count = -1;
+ int max_trip_count = -1;

list_for_each_entry(nir_loop_terminator, terminator,
&state->loop->info->loop_terminator_list,
@@ -606,8 +606,8 @@ find_trip_count(loop_info_state *state)
* iterations than previously (we have identified a more limiting
* terminator) set the trip count and limiting terminator.
*/
- if (min_trip_count == -1 || iterations < min_trip_count) {
- min_trip_count = iterations;
+ if (max_trip_count == -1 || iterations < max_trip_count) {
+ max_trip_count = iterations;
limiting_terminator = terminator;
}
break;
@@ -617,9 +617,9 @@ find_trip_count(loop_info_state *state)
}
}

- state->loop->info->is_trip_count_known = trip_count_known;
- if (min_trip_count > -1)
- state->loop->info->trip_count = min_trip_count;
+ state->loop->info->exact_trip_count_known = trip_count_known;
+ if (max_trip_count > -1)
+ state->loop->info->max_trip_count = max_trip_count;
state->loop->info->limiting_terminator = limiting_terminator;
}

@@ -639,7 +639,7 @@ force_unroll_array_access(loop_info_state *state, nir_deref_instr *deref)
nir_deref_instr *parent = nir_deref_instr_parent(d);
assert(glsl_type_is_array(parent->type) ||
glsl_type_is_matrix(parent->type));
- if (glsl_get_length(parent->type) == state->loop->info->trip_count)
+ if (glsl_get_length(parent->type) == state->loop->info->max_trip_count)
return true;

if (deref->mode & state->indirect_mask)
diff --git a/src/compiler/nir/nir_opt_loop_unroll.c b/src/compiler/nir/nir_opt_loop_unroll.c
index ea2012e292..0e9966320b 100644
--- a/src/compiler/nir/nir_opt_loop_unroll.c
+++ b/src/compiler/nir/nir_opt_loop_unroll.c
@@ -181,7 +181,7 @@ simple_unroll(nir_loop *loop)
nir_cf_list unrolled_lp_body;

/* Clone loop header and append to the loop body */
- for (unsigned i = 0; i < loop->info->trip_count; i++) {
+ for (unsigned i = 0; i < loop->info->max_trip_count; i++) {
/* Clone loop body */
nir_cf_list_clone(&unrolled_lp_body, &loop_body, loop->cf_node.parent,
remap_table);
@@ -340,7 +340,7 @@ complex_unroll(nir_loop *loop, nir_loop_terminator *unlimit_term,
* trip count == 1 we execute the code above the break twice and the
* code below it once so we need clone things twice and so on.
*/
- num_times_to_clone = loop->info->trip_count + 1;
+ num_times_to_clone = loop->info->max_trip_count + 1;
} else {
/* Pluck out the loop header */
nir_cf_extract(&lp_header, nir_before_block(header_blk),
@@ -368,7 +368,7 @@ complex_unroll(nir_loop *loop, nir_loop_terminator *unlimit_term,

nir_cf_node_remove(&limiting_term->nif->cf_node);

- num_times_to_clone = loop->info->trip_count;
+ num_times_to_clone = loop->info->max_trip_count;
}

/* In the terminator that we have no trip count for move everything after
@@ -568,14 +568,14 @@ is_loop_small_enough_to_unroll(nir_shader *shader, nir_loop_info *li)
{
unsigned max_iter = shader->options->max_unroll_iterations;

- if (li->trip_count > max_iter)
+ if (li->max_trip_count > max_iter)
return false;

if (li->force_unroll)
return true;

bool loop_not_too_large =
- li->num_instructions * li->trip_count <= max_iter * LOOP_UNROLL_LIMIT;
+ li->num_instructions * li->max_trip_count <= max_iter * LOOP_UNROLL_LIMIT;

return loop_not_too_large;
}
@@ -641,7 +641,7 @@ process_loops(nir_shader *sh, nir_cf_node *cf_node, bool *has_nested_loop_out)
if (!is_loop_small_enough_to_unroll(sh, loop->info))
goto exit;

- if (loop->info->is_trip_count_known) {
+ if (loop->info->exact_trip_count_known) {
simple_unroll(loop);
progress = true;
} else {
@@ -665,7 +665,7 @@ process_loops(nir_shader *sh, nir_cf_node *cf_node, bool *has_nested_loop_out)
* limiting terminator just do a simple unroll as the second
* terminator can never be reached.
*/
- if (loop->info->trip_count == 0 && !limiting_term_second) {
+ if (loop->info->max_trip_count == 0 && !limiting_term_second) {
simple_unroll(loop);
} else {
complex_unroll(loop, terminator, limiting_term_second);
--
2.19.2
Jason Ekstrand
2018-12-07 23:10:36 UTC
Permalink
Replacing min with max without changing any real code always looks a biit
weird but it does make sense. :-)
Post by Timothy Arceri
Following commits will introduce additional fields such as
guessed_trip_count. Renaming these will help avoid confusion
as our unrolling feature set grows.
---
src/compiler/nir/nir.h | 8 +++++---
src/compiler/nir/nir_loop_analyze.c | 14 +++++++-------
src/compiler/nir/nir_opt_loop_unroll.c | 14 +++++++-------
3 files changed, 19 insertions(+), 17 deletions(-)
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index db935c8496..ce4a81fbe1 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -1886,9 +1886,11 @@ typedef struct {
/* Number of instructions in the loop */
unsigned num_instructions;
- /* How many times the loop is run (if known) */
- unsigned trip_count;
- bool is_trip_count_known;
+ /* Maximum number of times the loop is run (if known) */
+ unsigned max_trip_count;
+
+ /* Do we know the exact number of times the loop will be run */
+ bool exact_trip_count_known;
/* Unroll the loop regardless of its size */
bool force_unroll;
diff --git a/src/compiler/nir/nir_loop_analyze.c
b/src/compiler/nir/nir_loop_analyze.c
index c779383b36..700d1fe552 100644
--- a/src/compiler/nir/nir_loop_analyze.c
+++ b/src/compiler/nir/nir_loop_analyze.c
@@ -527,7 +527,7 @@ find_trip_count(loop_info_state *state)
{
bool trip_count_known = true;
nir_loop_terminator *limiting_terminator = NULL;
- int min_trip_count = -1;
+ int max_trip_count = -1;
list_for_each_entry(nir_loop_terminator, terminator,
&state->loop->info->loop_terminator_list,
@@ -606,8 +606,8 @@ find_trip_count(loop_info_state *state)
* iterations than previously (we have identified a more limiting
* terminator) set the trip count and limiting terminator.
*/
- if (min_trip_count == -1 || iterations < min_trip_count) {
- min_trip_count = iterations;
+ if (max_trip_count == -1 || iterations < max_trip_count) {
+ max_trip_count = iterations;
limiting_terminator = terminator;
}
break;
@@ -617,9 +617,9 @@ find_trip_count(loop_info_state *state)
}
}
- state->loop->info->is_trip_count_known = trip_count_known;
- if (min_trip_count > -1)
- state->loop->info->trip_count = min_trip_count;
+ state->loop->info->exact_trip_count_known = trip_count_known;
+ if (max_trip_count > -1)
+ state->loop->info->max_trip_count = max_trip_count;
state->loop->info->limiting_terminator = limiting_terminator;
}
@@ -639,7 +639,7 @@ force_unroll_array_access(loop_info_state *state,
nir_deref_instr *deref)
nir_deref_instr *parent = nir_deref_instr_parent(d);
assert(glsl_type_is_array(parent->type) ||
glsl_type_is_matrix(parent->type));
- if (glsl_get_length(parent->type) == state->loop->info->trip_count)
+ if (glsl_get_length(parent->type) ==
state->loop->info->max_trip_count)
return true;
if (deref->mode & state->indirect_mask)
diff --git a/src/compiler/nir/nir_opt_loop_unroll.c
b/src/compiler/nir/nir_opt_loop_unroll.c
index ea2012e292..0e9966320b 100644
--- a/src/compiler/nir/nir_opt_loop_unroll.c
+++ b/src/compiler/nir/nir_opt_loop_unroll.c
@@ -181,7 +181,7 @@ simple_unroll(nir_loop *loop)
nir_cf_list unrolled_lp_body;
/* Clone loop header and append to the loop body */
- for (unsigned i = 0; i < loop->info->trip_count; i++) {
+ for (unsigned i = 0; i < loop->info->max_trip_count; i++) {
/* Clone loop body */
nir_cf_list_clone(&unrolled_lp_body, &loop_body,
loop->cf_node.parent,
remap_table);
@@ -340,7 +340,7 @@ complex_unroll(nir_loop *loop, nir_loop_terminator *unlimit_term,
* trip count == 1 we execute the code above the break twice and the
* code below it once so we need clone things twice and so on.
*/
- num_times_to_clone = loop->info->trip_count + 1;
+ num_times_to_clone = loop->info->max_trip_count + 1;
} else {
/* Pluck out the loop header */
nir_cf_extract(&lp_header, nir_before_block(header_blk),
@@ -368,7 +368,7 @@ complex_unroll(nir_loop *loop, nir_loop_terminator *unlimit_term,
nir_cf_node_remove(&limiting_term->nif->cf_node);
- num_times_to_clone = loop->info->trip_count;
+ num_times_to_clone = loop->info->max_trip_count;
}
/* In the terminator that we have no trip count for move everything after
@@ -568,14 +568,14 @@ is_loop_small_enough_to_unroll(nir_shader *shader, nir_loop_info *li)
{
unsigned max_iter = shader->options->max_unroll_iterations;
- if (li->trip_count > max_iter)
+ if (li->max_trip_count > max_iter)
return false;
if (li->force_unroll)
return true;
bool loop_not_too_large =
- li->num_instructions * li->trip_count <= max_iter *
LOOP_UNROLL_LIMIT;
+ li->num_instructions * li->max_trip_count <= max_iter *
LOOP_UNROLL_LIMIT;
return loop_not_too_large;
}
@@ -641,7 +641,7 @@ process_loops(nir_shader *sh, nir_cf_node *cf_node,
bool *has_nested_loop_out)
if (!is_loop_small_enough_to_unroll(sh, loop->info))
goto exit;
- if (loop->info->is_trip_count_known) {
+ if (loop->info->exact_trip_count_known) {
simple_unroll(loop);
progress = true;
} else {
@@ -665,7 +665,7 @@ process_loops(nir_shader *sh, nir_cf_node *cf_node,
bool *has_nested_loop_out)
* limiting terminator just do a simple unroll as the second
* terminator can never be reached.
*/
- if (loop->info->trip_count == 0 && !limiting_term_second) {
+ if (loop->info->max_trip_count == 0 && !limiting_term_second) {
simple_unroll(loop);
} else {
complex_unroll(loop, terminator, limiting_term_second);
--
2.19.2
_______________________________________________
mesa-dev mailing list
https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Nils Wallménius
2018-12-09 08:23:37 UTC
Permalink
Post by Jason Ekstrand
Replacing min with max without changing any real code always looks a biit
weird but it does make sense. :-)
Post by Timothy Arceri
Following commits will introduce additional fields such as
guessed_trip_count. Renaming these will help avoid confusion
as our unrolling feature set grows.
---
src/compiler/nir/nir.h | 8 +++++---
src/compiler/nir/nir_loop_analyze.c | 14 +++++++-------
src/compiler/nir/nir_opt_loop_unroll.c | 14 +++++++-------
3 files changed, 19 insertions(+), 17 deletions(-)
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index db935c8496..ce4a81fbe1 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -1886,9 +1886,11 @@ typedef struct {
/* Number of instructions in the loop */
unsigned num_instructions;
- /* How many times the loop is run (if known) */
- unsigned trip_count;
- bool is_trip_count_known;
+ /* Maximum number of times the loop is run (if known) */
+ unsigned max_trip_count;
+
+ /* Do we know the exact number of times the loop will be run */
+ bool exact_trip_count_known;
/* Unroll the loop regardless of its size */
bool force_unroll;
diff --git a/src/compiler/nir/nir_loop_analyze.c
b/src/compiler/nir/nir_loop_analyze.c
index c779383b36..700d1fe552 100644
--- a/src/compiler/nir/nir_loop_analyze.c
+++ b/src/compiler/nir/nir_loop_analyze.c
@@ -527,7 +527,7 @@ find_trip_count(loop_info_state *state)
{
bool trip_count_known = true;
nir_loop_terminator *limiting_terminator = NULL;
- int min_trip_count = -1;
+ int max_trip_count = -1;
list_for_each_entry(nir_loop_terminator, terminator,
&state->loop->info->loop_terminator_list,
@@ -606,8 +606,8 @@ find_trip_count(loop_info_state *state)
* iterations than previously (we have identified a more limiting
* terminator) set the trip count and limiting terminator.
*/
- if (min_trip_count == -1 || iterations < min_trip_count) {
- min_trip_count = iterations;
+ if (max_trip_count == -1 || iterations < max_trip_count) {
+ max_trip_count = iterations;
limiting_terminator = terminator;
}
break;
@@ -617,9 +617,9 @@ find_trip_count(loop_info_state *state)
}
}
- state->loop->info->is_trip_count_known = trip_count_known;
- if (min_trip_count > -1)
- state->loop->info->trip_count = min_trip_count;
+ state->loop->info->exact_trip_count_known = trip_count_known;
+ if (max_trip_count > -1)
+ state->loop->info->max_trip_count = max_trip_count;
state->loop->info->limiting_terminator = limiting_terminator;
}
@@ -639,7 +639,7 @@ force_unroll_array_access(loop_info_state *state,
nir_deref_instr *deref)
nir_deref_instr *parent = nir_deref_instr_parent(d);
assert(glsl_type_is_array(parent->type) ||
glsl_type_is_matrix(parent->type));
- if (glsl_get_length(parent->type) == state->loop->info->trip_count)
+ if (glsl_get_length(parent->type) ==
state->loop->info->max_trip_count)
return true;
if (deref->mode & state->indirect_mask)
diff --git a/src/compiler/nir/nir_opt_loop_unroll.c
b/src/compiler/nir/nir_opt_loop_unroll.c
index ea2012e292..0e9966320b 100644
--- a/src/compiler/nir/nir_opt_loop_unroll.c
+++ b/src/compiler/nir/nir_opt_loop_unroll.c
@@ -181,7 +181,7 @@ simple_unroll(nir_loop *loop)
nir_cf_list unrolled_lp_body;
/* Clone loop header and append to the loop body */
- for (unsigned i = 0; i < loop->info->trip_count; i++) {
+ for (unsigned i = 0; i < loop->info->max_trip_count; i++) {
/* Clone loop body */
nir_cf_list_clone(&unrolled_lp_body, &loop_body,
loop->cf_node.parent,
remap_table);
@@ -340,7 +340,7 @@ complex_unroll(nir_loop *loop, nir_loop_terminator *unlimit_term,
* trip count == 1 we execute the code above the break twice and the
* code below it once so we need clone things twice and so on.
*/
- num_times_to_clone = loop->info->trip_count + 1;
+ num_times_to_clone = loop->info->max_trip_count + 1;
} else {
/* Pluck out the loop header */
nir_cf_extract(&lp_header, nir_before_block(header_blk),
@@ -368,7 +368,7 @@ complex_unroll(nir_loop *loop, nir_loop_terminator *unlimit_term,
nir_cf_node_remove(&limiting_term->nif->cf_node);
- num_times_to_clone = loop->info->trip_count;
+ num_times_to_clone = loop->info->max_trip_count;
}
/* In the terminator that we have no trip count for move everything after
@@ -568,14 +568,14 @@ is_loop_small_enough_to_unroll(nir_shader *shader,
nir_loop_info *li)
{
unsigned max_iter = shader->options->max_unroll_iterations;
- if (li->trip_count > max_iter)
+ if (li->max_trip_count > max_iter)
return false;
if (li->force_unroll)
return true;
bool loop_not_too_large =
- li->num_instructions * li->trip_count <= max_iter *
LOOP_UNROLL_LIMIT;
+ li->num_instructions * li->max_trip_count <= max_iter * LOOP_UNROLL_LIMIT;
return loop_not_too_large;
}
@@ -641,7 +641,7 @@ process_loops(nir_shader *sh, nir_cf_node *cf_node,
bool *has_nested_loop_out)
if (!is_loop_small_enough_to_unroll(sh, loop->info))
goto exit;
- if (loop->info->is_trip_count_known) {
+ if (loop->info->exact_trip_count_known) {
simple_unroll(loop);
progress = true;
} else {
@@ -665,7 +665,7 @@ process_loops(nir_shader *sh, nir_cf_node *cf_node,
bool *has_nested_loop_out)
* limiting terminator just do a simple unroll as the second
* terminator can never be reached.
*/
- if (loop->info->trip_count == 0 && !limiting_term_second) {
+ if (loop->info->max_trip_count == 0 &&
!limiting_term_second) {
simple_unroll(loop);
} else {
complex_unroll(loop, terminator, limiting_term_second);
--
2.19.2
_______________________________________________
mesa-dev mailing list
https://lists.freedesktop.org/mailman/listinfo/mesa-dev
_______________________________________________
mesa-dev mailing list
https://lists.freedesktop.org/mailman/listinfo/mesa-dev
s/nit/nir/ in subject?

BR
Nils

Timothy Arceri
2018-12-07 03:08:04 UTC
Permalink
Reviewed-by: Thomas Helland <***@gmail.com>
---
src/compiler/nir/nir_opt_loop_unroll.c | 76 ++++++++++----------------
1 file changed, 28 insertions(+), 48 deletions(-)

diff --git a/src/compiler/nir/nir_opt_loop_unroll.c b/src/compiler/nir/nir_opt_loop_unroll.c
index 0e9966320b..c267c185b6 100644
--- a/src/compiler/nir/nir_opt_loop_unroll.c
+++ b/src/compiler/nir/nir_opt_loop_unroll.c
@@ -169,32 +169,21 @@ simple_unroll(nir_loop *loop)
_mesa_hash_table_create(NULL, _mesa_hash_pointer,
_mesa_key_pointer_equal);

- /* Clone the loop header */
- nir_cf_list cloned_header;
- nir_cf_list_clone(&cloned_header, &lp_header, loop->cf_node.parent,
- remap_table);
+ /* Clone the loop header and insert before the loop */
+ nir_cf_list_clone_and_reinsert(&lp_header, loop->cf_node.parent,
+ nir_before_cf_node(&loop->cf_node),
+ remap_table);

- /* Insert cloned loop header before the loop */
- nir_cf_reinsert(&cloned_header, nir_before_cf_node(&loop->cf_node));
-
- /* Temp list to store the cloned loop body as we unroll */
- nir_cf_list unrolled_lp_body;
-
- /* Clone loop header and append to the loop body */
for (unsigned i = 0; i < loop->info->max_trip_count; i++) {
- /* Clone loop body */
- nir_cf_list_clone(&unrolled_lp_body, &loop_body, loop->cf_node.parent,
- remap_table);
-
- /* Insert unrolled loop body before the loop */
- nir_cf_reinsert(&unrolled_lp_body, nir_before_cf_node(&loop->cf_node));
-
- /* Clone loop header */
- nir_cf_list_clone(&cloned_header, &lp_header, loop->cf_node.parent,
- remap_table);
-
- /* Insert loop header after loop body */
- nir_cf_reinsert(&cloned_header, nir_before_cf_node(&loop->cf_node));
+ /* Clone loop body and insert before the loop */
+ nir_cf_list_clone_and_reinsert(&loop_body, loop->cf_node.parent,
+ nir_before_cf_node(&loop->cf_node),
+ remap_table);
+
+ /* Clone loop header and insert after loop body */
+ nir_cf_list_clone_and_reinsert(&lp_header, loop->cf_node.parent,
+ nir_before_cf_node(&loop->cf_node),
+ remap_table);
}

/* Remove the break from the loop terminator and add instructions from
@@ -207,11 +196,9 @@ simple_unroll(nir_loop *loop)
nir_after_block(limiting_term->break_block));

/* Clone so things get properly remapped */
- nir_cf_list cloned_break_list;
- nir_cf_list_clone(&cloned_break_list, &break_list, loop->cf_node.parent,
- remap_table);
-
- nir_cf_reinsert(&cloned_break_list, nir_before_cf_node(&loop->cf_node));
+ nir_cf_list_clone_and_reinsert(&break_list, loop->cf_node.parent,
+ nir_before_cf_node(&loop->cf_node),
+ remap_table);

/* Remove the loop */
nir_cf_node_remove(&loop->cf_node);
@@ -394,19 +381,16 @@ complex_unroll(nir_loop *loop, nir_loop_terminator *unlimit_term,

/* Temp lists to store the cloned loop as we unroll */
nir_cf_list unrolled_lp_body;
- nir_cf_list cloned_header;

for (unsigned i = 0; i < num_times_to_clone; i++) {
- /* Clone loop header */
- nir_cf_list_clone(&cloned_header, &lp_header, loop->cf_node.parent,
- remap_table);

nir_cursor cursor =
get_complex_unroll_insert_location(unroll_loc,
unlimit_term->continue_from_then);

- /* Insert cloned loop header */
- nir_cf_reinsert(&cloned_header, cursor);
+ /* Clone loop header and insert in if branch */
+ nir_cf_list_clone_and_reinsert(&lp_header, loop->cf_node.parent,
+ cursor, remap_table);

cursor =
get_complex_unroll_insert_location(unroll_loc,
@@ -432,28 +416,24 @@ complex_unroll(nir_loop *loop, nir_loop_terminator *unlimit_term,
if (!limiting_term_second) {
assert(unroll_loc->type == nir_cf_node_if);

- nir_cf_list_clone(&cloned_header, &lp_header, loop->cf_node.parent,
- remap_table);
-
nir_cursor cursor =
get_complex_unroll_insert_location(unroll_loc,
unlimit_term->continue_from_then);

- /* Insert cloned loop header */
- nir_cf_reinsert(&cloned_header, cursor);
-
- /* Clone so things get properly remapped, and insert break block from
- * the limiting terminator.
- */
- nir_cf_list cloned_break_blk;
- nir_cf_list_clone(&cloned_break_blk, &limit_break_list,
- loop->cf_node.parent, remap_table);
+ /* Clone loop header and insert in if branch */
+ nir_cf_list_clone_and_reinsert(&lp_header, loop->cf_node.parent,
+ cursor, remap_table);

cursor =
get_complex_unroll_insert_location(unroll_loc,
unlimit_term->continue_from_then);

- nir_cf_reinsert(&cloned_break_blk, cursor);
+ /* Clone so things get properly remapped, and insert break block from
+ * the limiting terminator.
+ */
+ nir_cf_list_clone_and_reinsert(&limit_break_list, loop->cf_node.parent,
+ cursor, remap_table);
+
nir_cf_delete(&limit_break_list);
}
--
2.19.2
Jason Ekstrand
2018-12-07 23:14:40 UTC
Permalink
3 and 4 are
Post by Timothy Arceri
---
src/compiler/nir/nir_opt_loop_unroll.c | 76 ++++++++++----------------
1 file changed, 28 insertions(+), 48 deletions(-)
diff --git a/src/compiler/nir/nir_opt_loop_unroll.c
b/src/compiler/nir/nir_opt_loop_unroll.c
index 0e9966320b..c267c185b6 100644
--- a/src/compiler/nir/nir_opt_loop_unroll.c
+++ b/src/compiler/nir/nir_opt_loop_unroll.c
@@ -169,32 +169,21 @@ simple_unroll(nir_loop *loop)
_mesa_hash_table_create(NULL, _mesa_hash_pointer,
_mesa_key_pointer_equal);
- /* Clone the loop header */
- nir_cf_list cloned_header;
- nir_cf_list_clone(&cloned_header, &lp_header, loop->cf_node.parent,
- remap_table);
+ /* Clone the loop header and insert before the loop */
+ nir_cf_list_clone_and_reinsert(&lp_header, loop->cf_node.parent,
+ nir_before_cf_node(&loop->cf_node),
+ remap_table);
- /* Insert cloned loop header before the loop */
- nir_cf_reinsert(&cloned_header, nir_before_cf_node(&loop->cf_node));
-
- /* Temp list to store the cloned loop body as we unroll */
- nir_cf_list unrolled_lp_body;
-
- /* Clone loop header and append to the loop body */
for (unsigned i = 0; i < loop->info->max_trip_count; i++) {
- /* Clone loop body */
- nir_cf_list_clone(&unrolled_lp_body, &loop_body,
loop->cf_node.parent,
- remap_table);
-
- /* Insert unrolled loop body before the loop */
- nir_cf_reinsert(&unrolled_lp_body,
nir_before_cf_node(&loop->cf_node));
-
- /* Clone loop header */
- nir_cf_list_clone(&cloned_header, &lp_header, loop->cf_node.parent,
- remap_table);
-
- /* Insert loop header after loop body */
- nir_cf_reinsert(&cloned_header, nir_before_cf_node(&loop->cf_node));
+ /* Clone loop body and insert before the loop */
+ nir_cf_list_clone_and_reinsert(&loop_body, loop->cf_node.parent,
+ nir_before_cf_node(&loop->cf_node),
+ remap_table);
+
+ /* Clone loop header and insert after loop body */
+ nir_cf_list_clone_and_reinsert(&lp_header, loop->cf_node.parent,
+ nir_before_cf_node(&loop->cf_node),
+ remap_table);
}
/* Remove the break from the loop terminator and add instructions from
@@ -207,11 +196,9 @@ simple_unroll(nir_loop *loop)
nir_after_block(limiting_term->break_block));
/* Clone so things get properly remapped */
- nir_cf_list cloned_break_list;
- nir_cf_list_clone(&cloned_break_list, &break_list,
loop->cf_node.parent,
- remap_table);
-
- nir_cf_reinsert(&cloned_break_list,
nir_before_cf_node(&loop->cf_node));
+ nir_cf_list_clone_and_reinsert(&break_list, loop->cf_node.parent,
+ nir_before_cf_node(&loop->cf_node),
+ remap_table);
/* Remove the loop */
nir_cf_node_remove(&loop->cf_node);
@@ -394,19 +381,16 @@ complex_unroll(nir_loop *loop, nir_loop_terminator *unlimit_term,
/* Temp lists to store the cloned loop as we unroll */
nir_cf_list unrolled_lp_body;
- nir_cf_list cloned_header;
for (unsigned i = 0; i < num_times_to_clone; i++) {
- /* Clone loop header */
- nir_cf_list_clone(&cloned_header, &lp_header, loop->cf_node.parent,
- remap_table);
nir_cursor cursor =
get_complex_unroll_insert_location(unroll_loc,
unlimit_term->continue_from_then);
- /* Insert cloned loop header */
- nir_cf_reinsert(&cloned_header, cursor);
+ /* Clone loop header and insert in if branch */
+ nir_cf_list_clone_and_reinsert(&lp_header, loop->cf_node.parent,
+ cursor, remap_table);
cursor =
get_complex_unroll_insert_location(unroll_loc,
@@ -432,28 +416,24 @@ complex_unroll(nir_loop *loop, nir_loop_terminator *unlimit_term,
if (!limiting_term_second) {
assert(unroll_loc->type == nir_cf_node_if);
- nir_cf_list_clone(&cloned_header, &lp_header, loop->cf_node.parent,
- remap_table);
-
nir_cursor cursor =
get_complex_unroll_insert_location(unroll_loc,
unlimit_term->continue_from_then);
- /* Insert cloned loop header */
- nir_cf_reinsert(&cloned_header, cursor);
-
- /* Clone so things get properly remapped, and insert break block from
- * the limiting terminator.
- */
- nir_cf_list cloned_break_blk;
- nir_cf_list_clone(&cloned_break_blk, &limit_break_list,
- loop->cf_node.parent, remap_table);
+ /* Clone loop header and insert in if branch */
+ nir_cf_list_clone_and_reinsert(&lp_header, loop->cf_node.parent,
+ cursor, remap_table);
cursor =
get_complex_unroll_insert_location(unroll_loc,
unlimit_term->continue_from_then);
- nir_cf_reinsert(&cloned_break_blk, cursor);
+ /* Clone so things get properly remapped, and insert break block from
+ * the limiting terminator.
+ */
+ nir_cf_list_clone_and_reinsert(&limit_break_list,
loop->cf_node.parent,
+ cursor, remap_table);
+
nir_cf_delete(&limit_break_list);
}
--
2.19.2
_______________________________________________
mesa-dev mailing list
https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Timothy Arceri
2018-12-07 03:08:03 UTC
Permalink
Reviewed-by: Thomas Helland <***@gmail.com>
---
src/compiler/nir/nir_control_flow.h | 10 ++++++++++
1 file changed, 10 insertions(+)

diff --git a/src/compiler/nir/nir_control_flow.h b/src/compiler/nir/nir_control_flow.h
index 2ea460e5df..9111b30a29 100644
--- a/src/compiler/nir/nir_control_flow.h
+++ b/src/compiler/nir/nir_control_flow.h
@@ -145,6 +145,16 @@ void nir_cf_delete(nir_cf_list *cf_list);
void nir_cf_list_clone(nir_cf_list *dst, nir_cf_list *src, nir_cf_node *parent,
struct hash_table *remap_table);

+static inline void
+nir_cf_list_clone_and_reinsert(nir_cf_list *src_list, nir_cf_node *parent,
+ nir_cursor cursor,
+ struct hash_table *remap_table)
+{
+ nir_cf_list list;
+ nir_cf_list_clone(&list, src_list, parent, remap_table);
+ nir_cf_reinsert(&list, cursor);
+}
+
static inline void
nir_cf_list_extract(nir_cf_list *extracted, struct exec_list *cf_list)
{
--
2.19.2
Timothy Arceri
2018-12-07 03:08:05 UTC
Permalink
Reviewed-by: Thomas Helland <***@gmail.com>
---
src/compiler/nir/nir_opt_loop_unroll.c | 115 ++++++++++++++-----------
1 file changed, 64 insertions(+), 51 deletions(-)

diff --git a/src/compiler/nir/nir_opt_loop_unroll.c b/src/compiler/nir/nir_opt_loop_unroll.c
index c267c185b6..8406880204 100644
--- a/src/compiler/nir/nir_opt_loop_unroll.c
+++ b/src/compiler/nir/nir_opt_loop_unroll.c
@@ -236,6 +236,65 @@ get_complex_unroll_insert_location(nir_cf_node *node, bool continue_from_then)
}
}

+static nir_cf_node *
+complex_unroll_loop_body(nir_loop *loop, nir_loop_terminator *unlimit_term,
+ nir_cf_list *lp_header, nir_cf_list *lp_body,
+ struct hash_table *remap_table,
+ unsigned num_times_to_clone)
+{
+ /* In the terminator that we have no trip count for move everything after
+ * the terminator into the continue from branch.
+ */
+ nir_cf_list loop_end;
+ nir_cf_extract(&loop_end, nir_after_cf_node(&unlimit_term->nif->cf_node),
+ nir_after_block(nir_loop_last_block(loop)));
+ move_cf_list_into_loop_term(&loop_end, unlimit_term);
+
+ /* Pluck out the loop body. */
+ nir_cf_extract(lp_body, nir_before_block(nir_loop_first_block(loop)),
+ nir_after_block(nir_loop_last_block(loop)));
+
+ /* Set unroll_loc to the loop as we will insert the unrolled loop before it
+ */
+ nir_cf_node *unroll_loc = &loop->cf_node;
+
+ /* Temp list to store the cloned loop as we unroll */
+ nir_cf_list unrolled_lp_body;
+
+ for (unsigned i = 0; i < num_times_to_clone; i++) {
+
+ nir_cursor cursor =
+ get_complex_unroll_insert_location(unroll_loc,
+ unlimit_term->continue_from_then);
+
+ /* Clone loop header and insert in if branch */
+ nir_cf_list_clone_and_reinsert(lp_header, loop->cf_node.parent,
+ cursor, remap_table);
+
+ cursor =
+ get_complex_unroll_insert_location(unroll_loc,
+ unlimit_term->continue_from_then);
+
+ /* Clone loop body */
+ nir_cf_list_clone(&unrolled_lp_body, lp_body, loop->cf_node.parent,
+ remap_table);
+
+ unroll_loc = exec_node_data(nir_cf_node,
+ exec_list_get_tail(&unrolled_lp_body.list),
+ node);
+ assert(unroll_loc->type == nir_cf_node_block &&
+ exec_list_is_empty(&nir_cf_node_as_block(unroll_loc)->instr_list));
+
+ /* Get the unrolled if node */
+ unroll_loc = nir_cf_node_prev(unroll_loc);
+
+ /* Insert unrolled loop body */
+ nir_cf_reinsert(&unrolled_lp_body, cursor);
+ }
+
+ return unroll_loc;
+}
+
/**
* Unroll a loop with two exists when the trip count of one of the exits is
* unknown. If continue_from_then is true, the loop is repeated only when the
@@ -358,60 +417,14 @@ complex_unroll(nir_loop *loop, nir_loop_terminator *unlimit_term,
num_times_to_clone = loop->info->max_trip_count;
}

- /* In the terminator that we have no trip count for move everything after
- * the terminator into the continue from branch.
- */
- nir_cf_list loop_end;
- nir_cf_extract(&loop_end, nir_after_cf_node(&unlimit_term->nif->cf_node),
- nir_after_block(nir_loop_last_block(loop)));
- move_cf_list_into_loop_term(&loop_end, unlimit_term);
-
- /* Pluck out the loop body. */
- nir_cf_list loop_body;
- nir_cf_extract(&loop_body, nir_before_block(nir_loop_first_block(loop)),
- nir_after_block(nir_loop_last_block(loop)));
-
struct hash_table *remap_table =
_mesa_hash_table_create(NULL, _mesa_hash_pointer,
_mesa_key_pointer_equal);

- /* Set unroll_loc to the loop as we will insert the unrolled loop before it
- */
- nir_cf_node *unroll_loc = &loop->cf_node;
-
- /* Temp lists to store the cloned loop as we unroll */
- nir_cf_list unrolled_lp_body;
-
- for (unsigned i = 0; i < num_times_to_clone; i++) {
-
- nir_cursor cursor =
- get_complex_unroll_insert_location(unroll_loc,
- unlimit_term->continue_from_then);
-
- /* Clone loop header and insert in if branch */
- nir_cf_list_clone_and_reinsert(&lp_header, loop->cf_node.parent,
- cursor, remap_table);
-
- cursor =
- get_complex_unroll_insert_location(unroll_loc,
- unlimit_term->continue_from_then);
-
- /* Clone loop body */
- nir_cf_list_clone(&unrolled_lp_body, &loop_body, loop->cf_node.parent,
- remap_table);
-
- unroll_loc = exec_node_data(nir_cf_node,
- exec_list_get_tail(&unrolled_lp_body.list),
- node);
- assert(unroll_loc->type == nir_cf_node_block &&
- exec_list_is_empty(&nir_cf_node_as_block(unroll_loc)->instr_list));
-
- /* Get the unrolled if node */
- unroll_loc = nir_cf_node_prev(unroll_loc);
-
- /* Insert unrolled loop body */
- nir_cf_reinsert(&unrolled_lp_body, cursor);
- }
+ nir_cf_list lp_body;
+ nir_cf_node *unroll_loc =
+ complex_unroll_loop_body(loop, unlimit_term, &lp_header, &lp_body,
+ remap_table, num_times_to_clone);

if (!limiting_term_second) {
assert(unroll_loc->type == nir_cf_node_if);
@@ -442,7 +455,7 @@ complex_unroll(nir_loop *loop, nir_loop_terminator *unlimit_term,

/* Delete the original loop header and body */
nir_cf_delete(&lp_header);
- nir_cf_delete(&loop_body);
+ nir_cf_delete(&lp_body);

_mesa_hash_table_destroy(remap_table, NULL);
}
--
2.19.2
Jason Ekstrand
2018-12-07 23:18:48 UTC
Permalink
I haven't checked the details
Post by Timothy Arceri
---
src/compiler/nir/nir_opt_loop_unroll.c | 115 ++++++++++++++-----------
1 file changed, 64 insertions(+), 51 deletions(-)
diff --git a/src/compiler/nir/nir_opt_loop_unroll.c
b/src/compiler/nir/nir_opt_loop_unroll.c
index c267c185b6..8406880204 100644
--- a/src/compiler/nir/nir_opt_loop_unroll.c
+++ b/src/compiler/nir/nir_opt_loop_unroll.c
@@ -236,6 +236,65 @@ get_complex_unroll_insert_location(nir_cf_node *node,
bool continue_from_then)
}
}
+static nir_cf_node *
+complex_unroll_loop_body(nir_loop *loop, nir_loop_terminator
*unlimit_term,
+ nir_cf_list *lp_header, nir_cf_list *lp_body,
+ struct hash_table *remap_table,
+ unsigned num_times_to_clone)
+{
+ /* In the terminator that we have no trip count for move everything after
+ * the terminator into the continue from branch.
+ */
+ nir_cf_list loop_end;
+ nir_cf_extract(&loop_end,
nir_after_cf_node(&unlimit_term->nif->cf_node),
+ nir_after_block(nir_loop_last_block(loop)));
+ move_cf_list_into_loop_term(&loop_end, unlimit_term);
+
+ /* Pluck out the loop body. */
+ nir_cf_extract(lp_body, nir_before_block(nir_loop_first_block(loop)),
+ nir_after_block(nir_loop_last_block(loop)));
+
+ /* Set unroll_loc to the loop as we will insert the unrolled loop before it
+ */
+ nir_cf_node *unroll_loc = &loop->cf_node;
+
+ /* Temp list to store the cloned loop as we unroll */
+ nir_cf_list unrolled_lp_body;
+
+ for (unsigned i = 0; i < num_times_to_clone; i++) {
+
+ nir_cursor cursor =
+ get_complex_unroll_insert_location(unroll_loc,
+
unlimit_term->continue_from_then);
+
+ /* Clone loop header and insert in if branch */
+ nir_cf_list_clone_and_reinsert(lp_header, loop->cf_node.parent,
+ cursor, remap_table);
+
+ cursor =
+ get_complex_unroll_insert_location(unroll_loc,
+
unlimit_term->continue_from_then);
+
+ /* Clone loop body */
+ nir_cf_list_clone(&unrolled_lp_body, lp_body, loop->cf_node.parent,
+ remap_table);
+
+ unroll_loc = exec_node_data(nir_cf_node,
+
exec_list_get_tail(&unrolled_lp_body.list),
+ node);
+ assert(unroll_loc->type == nir_cf_node_block &&
+
exec_list_is_empty(&nir_cf_node_as_block(unroll_loc)->instr_list));
+
+ /* Get the unrolled if node */
+ unroll_loc = nir_cf_node_prev(unroll_loc);
+
+ /* Insert unrolled loop body */
+ nir_cf_reinsert(&unrolled_lp_body, cursor);
+ }
+
+ return unroll_loc;
+}
+
/**
* Unroll a loop with two exists when the trip count of one of the exits is
* unknown. If continue_from_then is true, the loop is repeated only when the
@@ -358,60 +417,14 @@ complex_unroll(nir_loop *loop, nir_loop_terminator *unlimit_term,
num_times_to_clone = loop->info->max_trip_count;
}
- /* In the terminator that we have no trip count for move everything after
- * the terminator into the continue from branch.
- */
- nir_cf_list loop_end;
- nir_cf_extract(&loop_end,
nir_after_cf_node(&unlimit_term->nif->cf_node),
- nir_after_block(nir_loop_last_block(loop)));
- move_cf_list_into_loop_term(&loop_end, unlimit_term);
-
- /* Pluck out the loop body. */
- nir_cf_list loop_body;
- nir_cf_extract(&loop_body,
nir_before_block(nir_loop_first_block(loop)),
- nir_after_block(nir_loop_last_block(loop)));
-
struct hash_table *remap_table =
_mesa_hash_table_create(NULL, _mesa_hash_pointer,
_mesa_key_pointer_equal);
- /* Set unroll_loc to the loop as we will insert the unrolled loop before it
- */
- nir_cf_node *unroll_loc = &loop->cf_node;
-
- /* Temp lists to store the cloned loop as we unroll */
- nir_cf_list unrolled_lp_body;
-
- for (unsigned i = 0; i < num_times_to_clone; i++) {
-
- nir_cursor cursor =
- get_complex_unroll_insert_location(unroll_loc,
-
unlimit_term->continue_from_then);
-
- /* Clone loop header and insert in if branch */
- nir_cf_list_clone_and_reinsert(&lp_header, loop->cf_node.parent,
- cursor, remap_table);
-
- cursor =
- get_complex_unroll_insert_location(unroll_loc,
-
unlimit_term->continue_from_then);
-
- /* Clone loop body */
- nir_cf_list_clone(&unrolled_lp_body, &loop_body,
loop->cf_node.parent,
- remap_table);
-
- unroll_loc = exec_node_data(nir_cf_node,
-
exec_list_get_tail(&unrolled_lp_body.list),
- node);
- assert(unroll_loc->type == nir_cf_node_block &&
-
exec_list_is_empty(&nir_cf_node_as_block(unroll_loc)->instr_list));
-
- /* Get the unrolled if node */
- unroll_loc = nir_cf_node_prev(unroll_loc);
-
- /* Insert unrolled loop body */
- nir_cf_reinsert(&unrolled_lp_body, cursor);
- }
+ nir_cf_list lp_body;
+ nir_cf_node *unroll_loc =
+ complex_unroll_loop_body(loop, unlimit_term, &lp_header, &lp_body,
+ remap_table, num_times_to_clone);
if (!limiting_term_second) {
assert(unroll_loc->type == nir_cf_node_if);
@@ -442,7 +455,7 @@ complex_unroll(nir_loop *loop, nir_loop_terminator *unlimit_term,
/* Delete the original loop header and body */
nir_cf_delete(&lp_header);
- nir_cf_delete(&loop_body);
+ nir_cf_delete(&lp_body);
_mesa_hash_table_destroy(remap_table, NULL);
}
--
2.19.2
_______________________________________________
mesa-dev mailing list
https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Timothy Arceri
2018-12-07 03:08:07 UTC
Permalink
This detects an induction variable used as an array index to guess
the trip count of the loop. This enables us to do a partial
unroll of the loop, with can eventually result in the loop being
eliminated.
---
src/compiler/nir/nir.h | 4 ++
src/compiler/nir/nir_loop_analyze.c | 78 ++++++++++++++++++++++++++---
2 files changed, 76 insertions(+), 6 deletions(-)

diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index ce4a81fbe1..a40e5a1418 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -1878,6 +1878,7 @@ typedef struct {
nir_block *continue_from_block;

bool continue_from_then;
+ bool induction_rhs;

struct list_head loop_terminator_link;
} nir_loop_terminator;
@@ -1886,6 +1887,9 @@ typedef struct {
/* Number of instructions in the loop */
unsigned num_instructions;

+ /* Guessed trip count based on array indexing */
+ unsigned guessed_trip_count;
+
/* Maximum number of times the loop is run (if known) */
unsigned max_trip_count;

diff --git a/src/compiler/nir/nir_loop_analyze.c b/src/compiler/nir/nir_loop_analyze.c
index eef224e4d5..ffcf2a3c27 100644
--- a/src/compiler/nir/nir_loop_analyze.c
+++ b/src/compiler/nir/nir_loop_analyze.c
@@ -382,6 +382,50 @@ find_array_access_via_induction(loop_info_state *state,
return 0;
}

+static bool
+guess_loop_limit(loop_info_state *state, nir_const_value *limit_val,
+ nir_loop_variable *basic_ind)
+{
+ nir_foreach_block_in_cf_node(block, &state->loop->cf_node) {
+ nir_foreach_instr(instr, block) {
+ if (instr->type != nir_instr_type_intrinsic)
+ continue;
+
+ nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+
+ /* Check for arrays variably-indexed by a loop induction variable. */
+ if (intrin->intrinsic == nir_intrinsic_load_deref ||
+ intrin->intrinsic == nir_intrinsic_store_deref ||
+ intrin->intrinsic == nir_intrinsic_copy_deref) {
+
+ nir_loop_variable *array_idx = NULL;
+ unsigned array_size =
+ find_array_access_via_induction(state,
+ nir_src_as_deref(intrin->src[0]),
+ &array_idx);
+ if (basic_ind == array_idx) {
+ limit_val->i32[0] = array_size;
+ return true;
+ }
+
+ if (intrin->intrinsic != nir_intrinsic_copy_deref)
+ continue;
+
+ array_size =
+ find_array_access_via_induction(state,
+ nir_src_as_deref(intrin->src[1]),
+ &array_idx);
+ if (basic_ind == array_idx) {
+ limit_val->i32[0] = array_size;
+ return true;
+ }
+ }
+ }
+ }
+
+ return false;
+}
+
static int32_t
get_iteration(nir_op cond_op, nir_const_value *initial, nir_const_value *step,
nir_const_value *limit)
@@ -558,6 +602,7 @@ static void
find_trip_count(loop_info_state *state)
{
bool trip_count_known = true;
+ bool guessed_trip_count = false;
nir_loop_terminator *limiting_terminator = NULL;
int max_trip_count = -1;

@@ -593,16 +638,33 @@ find_trip_count(loop_info_state *state)
basic_ind = get_loop_var(alu->src[1].src.ssa, state);
limit = get_loop_var(alu->src[0].src.ssa, state);
limit_rhs = false;
+ terminator->induction_rhs = true;
}

- /* The comparison has to have a basic induction variable
- * and a constant for us to be able to find trip counts
+ /* The comparison has to have a basic induction variable for us to be
+ * able to find trip counts.
*/
- if (basic_ind->type != basic_induction || !is_var_constant(limit)) {
+ if (basic_ind->type != basic_induction) {
trip_count_known = false;
continue;
}

+ /* Attempt to find a constant limit for the loop */
+ nir_const_value limit_val;
+ if (is_var_constant(limit)) {
+ limit_val =
+ nir_instr_as_load_const(limit->def->parent_instr)->value;
+ } else {
+ trip_count_known = false;
+
+ /* Guess loop limit based on array access */
+ if (!guess_loop_limit(state, &limit_val, basic_ind)) {
+ continue;
+ }
+
+ guessed_trip_count = true;
+ }
+
/* We have determined that we have the following constants:
* (With the typical int i = 0; i < x; i++; as an example)
* - Upper limit.
@@ -619,9 +681,6 @@ find_trip_count(loop_info_state *state)
nir_instr_as_load_const(basic_ind->ind->invariant->def->
parent_instr)->value;

- nir_const_value limit_val =
- nir_instr_as_load_const(limit->def->parent_instr)->value;
-
int iterations = calculate_iterations(&initial_val, &step_val,
&limit_val,
basic_ind->ind->alu_def, alu,
@@ -631,6 +690,13 @@ find_trip_count(loop_info_state *state)
/* Where we not able to calculate the iteration count */
if (iterations == -1) {
trip_count_known = false;
+ guessed_trip_count = false;
+ continue;
+ }
+
+ if (guessed_trip_count) {
+ guessed_trip_count = false;
+ state->loop->info->guessed_trip_count = iterations;
continue;
}
--
2.19.2
Jason Ekstrand
2018-12-08 00:16:42 UTC
Permalink
Post by Timothy Arceri
This detects an induction variable used as an array index to guess
the trip count of the loop. This enables us to do a partial
unroll of the loop, with can eventually result in the loop being
eliminated.
---
src/compiler/nir/nir.h | 4 ++
src/compiler/nir/nir_loop_analyze.c | 78 ++++++++++++++++++++++++++---
2 files changed, 76 insertions(+), 6 deletions(-)
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index ce4a81fbe1..a40e5a1418 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -1878,6 +1878,7 @@ typedef struct {
nir_block *continue_from_block;
bool continue_from_then;
+ bool induction_rhs;
struct list_head loop_terminator_link;
} nir_loop_terminator;
@@ -1886,6 +1887,9 @@ typedef struct {
/* Number of instructions in the loop */
unsigned num_instructions;
+ /* Guessed trip count based on array indexing */
+ unsigned guessed_trip_count;
+
/* Maximum number of times the loop is run (if known) */
unsigned max_trip_count;
diff --git a/src/compiler/nir/nir_loop_analyze.c
b/src/compiler/nir/nir_loop_analyze.c
index eef224e4d5..ffcf2a3c27 100644
--- a/src/compiler/nir/nir_loop_analyze.c
+++ b/src/compiler/nir/nir_loop_analyze.c
@@ -382,6 +382,50 @@ find_array_access_via_induction(loop_info_state *state,
return 0;
}
+static bool
+guess_loop_limit(loop_info_state *state, nir_const_value *limit_val,
+ nir_loop_variable *basic_ind)
+{
+ nir_foreach_block_in_cf_node(block, &state->loop->cf_node) {
+ nir_foreach_instr(instr, block) {
+ if (instr->type != nir_instr_type_intrinsic)
+ continue;
+
+ nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+
+ /* Check for arrays variably-indexed by a loop induction variable. */
+ if (intrin->intrinsic == nir_intrinsic_load_deref ||
+ intrin->intrinsic == nir_intrinsic_store_deref ||
+ intrin->intrinsic == nir_intrinsic_copy_deref) {
+
+ nir_loop_variable *array_idx = NULL;
+ unsigned array_size =
+ find_array_access_via_induction(state,
+
nir_src_as_deref(intrin->src[0]),
+ &array_idx);
+ if (basic_ind == array_idx) {
+ limit_val->i32[0] = array_size;
+ return true;
What if it's used for multiple array accesses of different lengths? This
just takes the first one. It seems like we could be smarter.
Post by Timothy Arceri
+ }
+
+ if (intrin->intrinsic != nir_intrinsic_copy_deref)
+ continue;
+
+ array_size =
+ find_array_access_via_induction(state,
+
nir_src_as_deref(intrin->src[1]),
+ &array_idx);
+ if (basic_ind == array_idx) {
+ limit_val->i32[0] = array_size;
+ return true;
+ }
+ }
+ }
+ }
+
+ return false;
+}
+
static int32_t
get_iteration(nir_op cond_op, nir_const_value *initial, nir_const_value *step,
nir_const_value *limit)
@@ -558,6 +602,7 @@ static void
find_trip_count(loop_info_state *state)
{
bool trip_count_known = true;
+ bool guessed_trip_count = false;
nir_loop_terminator *limiting_terminator = NULL;
int max_trip_count = -1;
@@ -593,16 +638,33 @@ find_trip_count(loop_info_state *state)
basic_ind = get_loop_var(alu->src[1].src.ssa, state);
limit = get_loop_var(alu->src[0].src.ssa, state);
limit_rhs = false;
+ terminator->induction_rhs = true;
}
- /* The comparison has to have a basic induction variable
- * and a constant for us to be able to find trip counts
+ /* The comparison has to have a basic induction variable for us to be
+ * able to find trip counts.
*/
- if (basic_ind->type != basic_induction ||
!is_var_constant(limit)) {
+ if (basic_ind->type != basic_induction) {
trip_count_known = false;
continue;
}
+ /* Attempt to find a constant limit for the loop */
+ nir_const_value limit_val;
+ if (is_var_constant(limit)) {
+ limit_val =
+ nir_instr_as_load_const(limit->def->parent_instr)->value;
+ } else {
+ trip_count_known = false;
+
+ /* Guess loop limit based on array access */
+ if (!guess_loop_limit(state, &limit_val, basic_ind)) {
+ continue;
+ }
+
+ guessed_trip_count = true;
+ }
+
* (With the typical int i = 0; i < x; i++; as an example)
* - Upper limit.
@@ -619,9 +681,6 @@ find_trip_count(loop_info_state *state)
nir_instr_as_load_const(basic_ind->ind->invariant->def->
parent_instr)->value;
- nir_const_value limit_val =
- nir_instr_as_load_const(limit->def->parent_instr)->value;
-
int iterations = calculate_iterations(&initial_val, &step_val,
&limit_val,
basic_ind->ind->alu_def, alu,
@@ -631,6 +690,13 @@ find_trip_count(loop_info_state *state)
/* Where we not able to calculate the iteration count */
if (iterations == -1) {
trip_count_known = false;
+ guessed_trip_count = false;
+ continue;
+ }
+
+ if (guessed_trip_count) {
+ guessed_trip_count = false;
This resetting confuses me. Why not just make it local to the loop?
Post by Timothy Arceri
+ state->loop->info->guessed_trip_count = iterations;
continue;
}
--
2.19.2
_______________________________________________
mesa-dev mailing list
https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Timothy Arceri
2018-12-08 04:10:03 UTC
Permalink
Post by Timothy Arceri
This detects an induction variable used as an array index to guess
the trip count of the loop. This enables us to do a partial
unroll of the loop, with can eventually result in the loop being
eliminated.
---
 src/compiler/nir/nir.h              |  4 ++
 src/compiler/nir/nir_loop_analyze.c | 78 ++++++++++++++++++++++++++---
 2 files changed, 76 insertions(+), 6 deletions(-)
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index ce4a81fbe1..a40e5a1418 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -1878,6 +1878,7 @@ typedef struct {
    nir_block *continue_from_block;
    bool continue_from_then;
+   bool induction_rhs;
    struct list_head loop_terminator_link;
 } nir_loop_terminator;
@@ -1886,6 +1887,9 @@ typedef struct {
    /* Number of instructions in the loop */
    unsigned num_instructions;
+   /* Guessed trip count based on array indexing */
+   unsigned guessed_trip_count;
+
    /* Maximum number of times the loop is run (if known) */
    unsigned max_trip_count;
diff --git a/src/compiler/nir/nir_loop_analyze.c
b/src/compiler/nir/nir_loop_analyze.c
index eef224e4d5..ffcf2a3c27 100644
--- a/src/compiler/nir/nir_loop_analyze.c
+++ b/src/compiler/nir/nir_loop_analyze.c
@@ -382,6 +382,50 @@ find_array_access_via_induction(loop_info_state *state,
    return 0;
 }
+static bool
+guess_loop_limit(loop_info_state *state, nir_const_value *limit_val,
+                 nir_loop_variable *basic_ind)
+{
+   nir_foreach_block_in_cf_node(block, &state->loop->cf_node) {
+      nir_foreach_instr(instr, block) {
+         if (instr->type != nir_instr_type_intrinsic)
+            continue;
+
+         nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+
+         /* Check for arrays variably-indexed by a loop induction
variable. */
+         if (intrin->intrinsic == nir_intrinsic_load_deref ||
+             intrin->intrinsic == nir_intrinsic_store_deref ||
+             intrin->intrinsic == nir_intrinsic_copy_deref) {
+
+            nir_loop_variable *array_idx = NULL;
+            unsigned array_size =
+               find_array_access_via_induction(state,
+
 nir_src_as_deref(intrin->src[0]),
+                                               &array_idx);
+            if (basic_ind == array_idx) {
+               limit_val->i32[0] = array_size;
+               return true;
What if it's used for multiple array accesses of different lengths?
This just takes the first one.  It seems like we could be smarter.
Yeah I guess so. I'll have another go at this.
Post by Timothy Arceri
+            }
+
+            if (intrin->intrinsic != nir_intrinsic_copy_deref)
+               continue;
+
+            array_size =
+               find_array_access_via_induction(state,
+
 nir_src_as_deref(intrin->src[1]),
+                                               &array_idx);
+            if (basic_ind == array_idx) {
+               limit_val->i32[0] = array_size;
+               return true;
+            }
+         }
+      }
+   }
+
+   return false;
+}
+
 static int32_t
 get_iteration(nir_op cond_op, nir_const_value *initial,
nir_const_value *step,
               nir_const_value *limit)
@@ -558,6 +602,7 @@ static void
 find_trip_count(loop_info_state *state)
 {
    bool trip_count_known = true;
+   bool guessed_trip_count = false;
    nir_loop_terminator *limiting_terminator = NULL;
    int max_trip_count = -1;
@@ -593,16 +638,33 @@ find_trip_count(loop_info_state *state)
             basic_ind = get_loop_var(alu->src[1].src.ssa, state);
             limit = get_loop_var(alu->src[0].src.ssa, state);
             limit_rhs = false;
+            terminator->induction_rhs = true;
          }
-         /* The comparison has to have a basic induction variable
-          * and a constant for us to be able to find trip counts
+         /* The comparison has to have a basic induction variable
for us to be
+          * able to find trip counts.
           */
-         if (basic_ind->type != basic_induction ||
!is_var_constant(limit)) {
+         if (basic_ind->type != basic_induction) {
             trip_count_known = false;
             continue;
          }
+         /* Attempt to find a constant limit for the loop */
+         nir_const_value limit_val;
+         if (is_var_constant(limit)) {
+            limit_val =
+
 nir_instr_as_load_const(limit->def->parent_instr)->value;
+         } else {
+            trip_count_known = false;
+
+            /* Guess loop limit based on array access */
+            if (!guess_loop_limit(state, &limit_val, basic_ind)) {
+               continue;
+            }
+
+            guessed_trip_count = true;
+         }
+
           * (With the typical int i = 0; i < x; i++; as an example)
           *    - Upper limit.
@@ -619,9 +681,6 @@ find_trip_count(loop_info_state *state)
             nir_instr_as_load_const(basic_ind->ind->invariant->def->
                                        parent_instr)->value;
-         nir_const_value limit_val =
-            nir_instr_as_load_const(limit->def->parent_instr)->value;
-
          int iterations = calculate_iterations(&initial_val,
&step_val,
                                                &limit_val,
basic_ind->ind->alu_def, alu,
@@ -631,6 +690,13 @@ find_trip_count(loop_info_state *state)
          /* Where we not able to calculate the iteration count */
          if (iterations == -1) {
             trip_count_known = false;
+            guessed_trip_count = false;
+            continue;
+         }
+
+         if (guessed_trip_count) {
+            guessed_trip_count = false;
This resetting confuses me.  Why not just make it local to the loop?
Yeah it is a little confusing but since we can have multiple terminators
we need a way to only update guessed_trip_count when the trip count was
guessed for the current terminator. This is also why we have a continue
i.e. so we don't update the regular trip count variable.

For example we could have a guessed trip count of 10 but another
terminator could guarantee a known trip count of 4. In which case we can
do a simple unroll and ignore the guessed count.

I can add a comment here to make this more clear.

Also I think this code needs to be updated to handle multiple guessed
trip counts if we want to support that corner case as per your previous
comment.
Post by Timothy Arceri
+            state->loop->info->guessed_trip_count = iterations;
             continue;
          }
--
2.19.2
_______________________________________________
mesa-dev mailing list
https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Timothy Arceri
2018-12-07 03:08:08 UTC
Permalink
In order to stop continuously partially unrolling the same loop
we add the bool partialy_unrolled to nir_loop, we add it here
rather than in nir_loop_info because nir_loop_info is only set
via loop analysis and is intended to be cleared before each
analysis. Also nir_loop_info is never cloned.
---
src/compiler/nir/nir.h | 1 +
src/compiler/nir/nir_clone.c | 1 +
2 files changed, 2 insertions(+)

diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index a40e5a1418..bf015bb53a 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -1917,6 +1917,7 @@ typedef struct {
struct exec_list body; /** < list of nir_cf_node */

nir_loop_info *info;
+ bool partially_unrolled;
} nir_loop;

/**
diff --git a/src/compiler/nir/nir_clone.c b/src/compiler/nir/nir_clone.c
index 989c5051a5..b229094679 100644
--- a/src/compiler/nir/nir_clone.c
+++ b/src/compiler/nir/nir_clone.c
@@ -548,6 +548,7 @@ static nir_loop *
clone_loop(clone_state *state, struct exec_list *cf_list, const nir_loop *loop)
{
nir_loop *nloop = nir_loop_create(state->ns);
+ nloop->partially_unrolled = loop->partially_unrolled;

nir_cf_node_insert_end(cf_list, &nloop->cf_node);
--
2.19.2
Timothy Arceri
2018-12-07 03:08:06 UTC
Permalink
Here we rework force_unroll_array_access() so that we can reused
the induction variable detection in a following patch.

Reviewed-by: Thomas Helland <***@gmail.com>
---
src/compiler/nir/nir_loop_analyze.c | 49 ++++++++++++++++++++---------
1 file changed, 35 insertions(+), 14 deletions(-)

diff --git a/src/compiler/nir/nir_loop_analyze.c b/src/compiler/nir/nir_loop_analyze.c
index 700d1fe552..eef224e4d5 100644
--- a/src/compiler/nir/nir_loop_analyze.c
+++ b/src/compiler/nir/nir_loop_analyze.c
@@ -350,6 +350,38 @@ find_loop_terminators(loop_info_state *state)
return success;
}

+/* This function looks for an array access within a loop that uses an
+ * induction variable for the array index. If found it returns the size of the
+ * array, otherwise 0 is returned. If we find an induction var we pass it back
+ * to the caller via array_index_out.
+ */
+static unsigned
+find_array_access_via_induction(loop_info_state *state,
+ nir_deref_instr *deref,
+ nir_loop_variable **array_index_out)
+{
+ for (nir_deref_instr *d = deref; d; d = nir_deref_instr_parent(d)) {
+ if (d->deref_type != nir_deref_type_array)
+ continue;
+
+ assert(d->arr.index.is_ssa);
+ nir_loop_variable *array_index = get_loop_var(d->arr.index.ssa, state);
+
+ if (array_index->type != basic_induction)
+ continue;
+
+ if (array_index_out)
+ *array_index_out = array_index;
+
+ nir_deref_instr *parent = nir_deref_instr_parent(d);
+ assert(glsl_type_is_array_or_matrix(parent->type));
+
+ return glsl_get_length(parent->type);
+ }
+
+ return 0;
+}
+
static int32_t
get_iteration(nir_op cond_op, nir_const_value *initial, nir_const_value *step,
nir_const_value *limit)
@@ -626,20 +658,9 @@ find_trip_count(loop_info_state *state)
static bool
force_unroll_array_access(loop_info_state *state, nir_deref_instr *deref)
{
- for (nir_deref_instr *d = deref; d; d = nir_deref_instr_parent(d)) {
- if (d->deref_type != nir_deref_type_array)
- continue;
-
- assert(d->arr.index.is_ssa);
- nir_loop_variable *array_index = get_loop_var(d->arr.index.ssa, state);
-
- if (array_index->type != basic_induction)
- continue;
-
- nir_deref_instr *parent = nir_deref_instr_parent(d);
- assert(glsl_type_is_array(parent->type) ||
- glsl_type_is_matrix(parent->type));
- if (glsl_get_length(parent->type) == state->loop->info->max_trip_count)
+ unsigned array_size = find_array_access_via_induction(state, deref, NULL);
+ if (array_size) {
+ if (array_size == state->loop->info->max_trip_count)
return true;

if (deref->mode & state->indirect_mask)
--
2.19.2
Jason Ekstrand
2018-12-07 23:21:47 UTC
Permalink
Post by Timothy Arceri
Here we rework force_unroll_array_access() so that we can reused
"reuse"
Post by Timothy Arceri
the induction variable detection in a following patch.
---
src/compiler/nir/nir_loop_analyze.c | 49 ++++++++++++++++++++---------
1 file changed, 35 insertions(+), 14 deletions(-)
diff --git a/src/compiler/nir/nir_loop_analyze.c
b/src/compiler/nir/nir_loop_analyze.c
index 700d1fe552..eef224e4d5 100644
--- a/src/compiler/nir/nir_loop_analyze.c
+++ b/src/compiler/nir/nir_loop_analyze.c
@@ -350,6 +350,38 @@ find_loop_terminators(loop_info_state *state)
return success;
}
+/* This function looks for an array access within a loop that uses an
+ * induction variable for the array index. If found it returns the size of the
+ * array, otherwise 0 is returned. If we find an induction var we pass it back
+ * to the caller via array_index_out.
+ */
+static unsigned
+find_array_access_via_induction(loop_info_state *state,
+ nir_deref_instr *deref,
+ nir_loop_variable **array_index_out)
+{
+ for (nir_deref_instr *d = deref; d; d = nir_deref_instr_parent(d)) {
+ if (d->deref_type != nir_deref_type_array)
+ continue;
+
+ assert(d->arr.index.is_ssa);
+ nir_loop_variable *array_index = get_loop_var(d->arr.index.ssa, state);
+
+ if (array_index->type != basic_induction)
+ continue;
+
+ if (array_index_out)
+ *array_index_out = array_index;
+
+ nir_deref_instr *parent = nir_deref_instr_parent(d);
+ assert(glsl_type_is_array_or_matrix(parent->type));
+
+ return glsl_get_length(parent->type);
+ }
+
+ return 0;
+}
+
static int32_t
get_iteration(nir_op cond_op, nir_const_value *initial, nir_const_value *step,
nir_const_value *limit)
@@ -626,20 +658,9 @@ find_trip_count(loop_info_state *state)
static bool
force_unroll_array_access(loop_info_state *state, nir_deref_instr *deref)
{
- for (nir_deref_instr *d = deref; d; d = nir_deref_instr_parent(d)) {
- if (d->deref_type != nir_deref_type_array)
- continue;
-
- assert(d->arr.index.is_ssa);
- nir_loop_variable *array_index = get_loop_var(d->arr.index.ssa, state);
-
- if (array_index->type != basic_induction)
- continue;
-
- nir_deref_instr *parent = nir_deref_instr_parent(d);
- assert(glsl_type_is_array(parent->type) ||
- glsl_type_is_matrix(parent->type));
- if (glsl_get_length(parent->type) ==
state->loop->info->max_trip_count)
+ unsigned array_size = find_array_access_via_induction(state, deref, NULL);
+ if (array_size) {
+ if (array_size == state->loop->info->max_trip_count)
return true;
if (deref->mode & state->indirect_mask)
--
2.19.2
_______________________________________________
mesa-dev mailing list
https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Timothy Arceri
2018-12-07 03:08:09 UTC
Permalink
This adds partial loop unrolling support and makes use of a
guessed trip count based on array access.

The code is written so that we could use partial unrolling
more generally, but for now it's only use when we have guessed
the trip count.

We use partial unrolling for this guessed trip count because its
possible any out of bounds array access doesn't otherwise affect
the shader e.g the stores/loads to/from the array are unused. So
we insert a copy of the loop in the innermost continue branch of
the unrolled loop. Later on its possible for nir_opt_dead_cf()
to then remove the loop in some cases.

A Renderdoc capture from the Rise of the Tomb Raider benchmark,
reports the following change in an affected compute shader:

GPU duration: 350 -> 325 microseconds

shader-db results radeonsi VEGA (NIR backend):

Totals from affected shaders:
SGPRS: 1120 -> 928 (-17.14 %)
VGPRS: 768 -> 516 (-32.81 %)
Spilled SGPRs: 666 -> 157 (-76.43 %)
Spilled VGPRs: 0 -> 0 (0.00 %)
Private memory VGPRs: 0 -> 0 (0.00 %)
Scratch size: 0 -> 0 (0.00 %) dwords per thread
Code Size: 44072 -> 51880 (17.72 %) bytes
LDS: 0 -> 0 (0.00 %) blocks
Max Waves: 108 -> 147 (36.11 %)
Wait states: 0 -> 0 (0.00 %

shader-db results i965 SKL:

total instructions in shared programs: 13098265 -> 13103359 (0.04%)
instructions in affected programs: 5126 -> 10220 (99.38%)
helped: 0
HURT: 21

total cycles in shared programs: 332039949 -> 331985622 (-0.02%)
cycles in affected programs: 289252 -> 234925 (-18.78%)
helped: 12
HURT: 9

vkpipeline-db results VEGA:

Totals from affected shaders:
SGPRS: 184 -> 184 (0.00 %)
VGPRS: 448 -> 448 (0.00 %)
Spilled SGPRs: 0 -> 0 (0.00 %)
Spilled VGPRs: 0 -> 0 (0.00 %)
Private memory VGPRs: 0 -> 0 (0.00 %)
Scratch size: 0 -> 0 (0.00 %) dwords per thread
Code Size: 26460 -> 25092 (-5.17 %) bytes
LDS: 6 -> 6 (0.00 %) blocks
Max Waves: 5 -> 5 (0.00 %)
Wait states: 0 -> 0 (0.00 %)
---
src/compiler/nir/nir_opt_loop_unroll.c | 206 ++++++++++++++++++++++++-
1 file changed, 198 insertions(+), 8 deletions(-)

diff --git a/src/compiler/nir/nir_opt_loop_unroll.c b/src/compiler/nir/nir_opt_loop_unroll.c
index 8406880204..d8df619b32 100644
--- a/src/compiler/nir/nir_opt_loop_unroll.c
+++ b/src/compiler/nir/nir_opt_loop_unroll.c
@@ -556,19 +556,200 @@ wrapper_unroll(nir_loop *loop)
return true;
}

+static bool
+is_access_out_of_bounds(nir_loop_terminator *term, nir_deref_instr *deref,
+ unsigned trip_count)
+{
+ for (nir_deref_instr *d = deref; d; d = nir_deref_instr_parent(d)) {
+ if (d->deref_type != nir_deref_type_array)
+ continue;
+
+ nir_alu_instr *alu = nir_instr_as_alu(term->conditional_instr);
+ nir_src src = term->induction_rhs ? alu->src[1].src : alu->src[0].src;
+ if (!nir_srcs_equal(d->arr.index, src))
+ continue;
+
+ nir_deref_instr *parent = nir_deref_instr_parent(d);
+ assert(glsl_type_is_array(parent->type) ||
+ glsl_type_is_matrix(parent->type));
+
+ /* We have already unrolled the loop and the new one will be imbedded in
+ * the innermost continue branch. So unless the array is greater than
+ * the trip count any iteration over the loop will be an out of bounds
+ * access of the array.
+ */
+ return glsl_get_length(parent->type) <= trip_count;
+ }
+
+ return false;
+}
+
+/* If we know an array access is going to be out of bounds remove or replace
+ * the access with an undef. This can later result in the entire loop being
+ * removed by nir_opt_dead_cf().
+ */
+static void
+remove_out_of_bounds_induction_use(nir_shader *shader, nir_loop *loop,
+ nir_loop_terminator *term,
+ nir_cf_list *lp_header,
+ nir_cf_list *lp_body,
+ unsigned trip_count)
+{
+ if (!loop->info->guessed_trip_count)
+ return;
+
+ /* Temporarily recreate the original loop so we can alter it */
+ nir_cf_reinsert(lp_header, nir_after_block(nir_loop_last_block(loop)));
+ nir_cf_reinsert(lp_body, nir_after_block(nir_loop_last_block(loop)));
+
+ nir_builder b;
+ nir_builder_init(&b, nir_cf_node_get_function(&loop->cf_node));
+
+ nir_foreach_block_in_cf_node(block, &loop->cf_node) {
+ nir_foreach_instr_safe(instr, block) {
+ if (instr->type != nir_instr_type_intrinsic)
+ continue;
+
+ nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+
+ /* Check for arrays variably-indexed by a loop induction variable.
+ * If this access is out of bounds remove the instruction or replace
+ * its use with an undefined instruction.
+ * If the loop is no longer useful we leave if for the appropriate
+ * pass to clean it up for us.
+ */
+ if (intrin->intrinsic == nir_intrinsic_load_deref ||
+ intrin->intrinsic == nir_intrinsic_store_deref ||
+ intrin->intrinsic == nir_intrinsic_copy_deref) {
+
+ if (is_access_out_of_bounds(term, nir_src_as_deref(intrin->src[0]),
+ trip_count)) {
+ if (intrin->intrinsic == nir_intrinsic_load_deref) {
+ assert(intrin->src[0].is_ssa);
+ nir_ssa_def *a_ssa = intrin->src[0].ssa;
+ nir_ssa_def *undef =
+ nir_ssa_undef(&b, intrin->num_components,
+ a_ssa->bit_size);
+ nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
+ nir_src_for_ssa(undef));
+ } else {
+ nir_instr_remove(instr);
+ }
+ }
+
+ if (intrin->intrinsic == nir_intrinsic_copy_deref &&
+ is_access_out_of_bounds(term, nir_src_as_deref(intrin->src[1]),
+ trip_count)) {
+ assert(intrin->src[1].is_ssa);
+ nir_ssa_def *a_ssa = intrin->src[1].ssa;
+ nir_ssa_def *undef =
+ nir_ssa_undef(&b, intrin->num_components, a_ssa->bit_size);
+
+ /* Replace the copy with a store of the undefined value */
+ b.cursor = nir_before_instr(instr);
+ nir_store_deref(&b, nir_src_as_deref(intrin->src[0]), undef, ~0);
+ nir_instr_remove(instr);
+ }
+ }
+ }
+ }
+
+ /* Now that we are done extract the loop header and body again */
+ nir_cf_extract(lp_header, nir_before_block(nir_loop_first_block(loop)),
+ nir_before_cf_node(&term->nif->cf_node));
+ nir_cf_extract(lp_body, nir_before_block(nir_loop_first_block(loop)),
+ nir_after_block(nir_loop_last_block(loop)));
+}
+
+/* Partially unrolls loops that don't have a known trip count.
+ */
+static void
+partial_unroll(nir_shader *shader, nir_loop *loop, unsigned trip_count)
+{
+ assert(list_length(&loop->info->loop_terminator_list) == 1);
+
+ nir_loop_terminator *terminator =
+ list_first_entry(&loop->info->loop_terminator_list,
+ nir_loop_terminator, loop_terminator_link);
+
+ assert(nir_is_trivial_loop_if(terminator->nif, terminator->break_block));
+
+ loop_prepare_for_unroll(loop);
+
+ /* Pluck out the loop header */
+ nir_cf_list lp_header;
+ nir_cf_extract(&lp_header, nir_before_block(nir_loop_first_block(loop)),
+ nir_before_cf_node(&terminator->nif->cf_node));
+
+ struct hash_table *remap_table =
+ _mesa_hash_table_create(NULL, _mesa_hash_pointer,
+ _mesa_key_pointer_equal);
+
+ nir_cf_list lp_body;
+ nir_cf_node *unroll_loc =
+ complex_unroll_loop_body(loop, terminator, &lp_header, &lp_body,
+ remap_table, trip_count);
+
+ /* Attempt to remove out of bounds array access */
+ remove_out_of_bounds_induction_use(shader, loop, terminator, &lp_header,
+ &lp_body, trip_count);
+
+ nir_cursor cursor =
+ get_complex_unroll_insert_location(unroll_loc,
+ terminator->continue_from_then);
+
+ /* Reinsert the loop in the innermost nested continue branch of the unrolled
+ * loop.
+ */
+ nir_loop *new_loop = nir_loop_create(shader);
+ nir_cf_node_insert(cursor, &new_loop->cf_node);
+ new_loop->partially_unrolled = true;
+
+ /* Clone loop header and insert into new loop */
+ nir_cf_list_clone_and_reinsert(&lp_header, loop->cf_node.parent,
+ nir_after_cf_list(&new_loop->body),
+ remap_table);
+
+ /* Clone loop body and insert into new loop */
+ nir_cf_list_clone_and_reinsert(&lp_body, loop->cf_node.parent,
+ nir_after_cf_list(&new_loop->body),
+ remap_table);
+
+ /* Insert break back into terminator */
+ nir_jump_instr *brk = nir_jump_instr_create(shader, nir_jump_break);
+ nir_if *nif = nir_block_get_following_if(nir_loop_first_block(new_loop));
+ if (terminator->continue_from_then) {
+ nir_instr_insert_after_block(nir_if_last_else_block(nif), &brk->instr);
+ } else {
+ nir_instr_insert_after_block(nir_if_last_then_block(nif), &brk->instr);
+ }
+
+ /* Delete the original loop header and body */
+ nir_cf_delete(&lp_header);
+ nir_cf_delete(&lp_body);
+
+ /* The original loop has been replaced so remove it. */
+ nir_cf_node_remove(&loop->cf_node);
+
+ _mesa_hash_table_destroy(remap_table, NULL);
+}
+
static bool
is_loop_small_enough_to_unroll(nir_shader *shader, nir_loop_info *li)
{
unsigned max_iter = shader->options->max_unroll_iterations;

- if (li->max_trip_count > max_iter)
+ unsigned trip_count =
+ li->max_trip_count ? li->max_trip_count : li->guessed_trip_count;
+
+ if (trip_count > max_iter)
return false;

- if (li->force_unroll)
+ if (li->force_unroll && !li->guessed_trip_count)
return true;

bool loop_not_too_large =
- li->num_instructions * li->max_trip_count <= max_iter * LOOP_UNROLL_LIMIT;
+ li->num_instructions * trip_count <= max_iter * LOOP_UNROLL_LIMIT;

return loop_not_too_large;
}
@@ -620,15 +801,24 @@ process_loops(nir_shader *sh, nir_cf_node *cf_node, bool *has_nested_loop_out)
!loop->info->complex_loop) {

nir_block *last_loop_blk = nir_loop_last_block(loop);
- if (!nir_block_ends_in_break(last_loop_blk))
+ if (nir_block_ends_in_break(last_loop_blk)) {
+ progress = wrapper_unroll(loop);
goto exit;
+ }

- progress = wrapper_unroll(loop);
-
- goto exit;
+ /* If we were able to guess the loop iteration based on array access
+ * then do a partial unroll.
+ */
+ unsigned num_lt = list_length(&loop->info->loop_terminator_list);
+ if (!has_nested_loop && num_lt == 1 && !loop->partially_unrolled &&
+ loop->info->guessed_trip_count &&
+ is_loop_small_enough_to_unroll(sh, loop->info)) {
+ partial_unroll(sh, loop, loop->info->guessed_trip_count);
+ progress = true;
+ }
}

- if (has_nested_loop || loop->info->limiting_terminator == NULL)
+ if (has_nested_loop || !loop->info->limiting_terminator)
goto exit;

if (!is_loop_small_enough_to_unroll(sh, loop->info))
--
2.19.2
Timothy Arceri
2018-12-07 03:08:11 UTC
Permalink
For some loops can have a single terminator but the exact trip
count is still unknown. For example:

for (int i = 0; i < imin(x, 4); i++)
...

Shader-db results radeonsi (all affected are from Tropico 5):

Totals from affected shaders:
SGPRS: 200 -> 208 (4.00 %)
VGPRS: 164 -> 148 (-9.76 %)
Spilled SGPRs: 0 -> 0 (0.00 %)
Spilled VGPRs: 0 -> 0 (0.00 %)
Private memory VGPRs: 0 -> 0 (0.00 %)
Scratch size: 0 -> 0 (0.00 %) dwords per thread
Code Size: 7208 -> 8672 (20.31 %) bytes
LDS: 0 -> 0 (0.00 %) blocks
Max Waves: 23 -> 27 (17.39 %)
Wait states: 0 -> 0 (0.00 %)

vkpipeline-db results RADV (Unrolls some Skyrim VR shaders):

Totals from affected shaders:
SGPRS: 304 -> 304 (0.00 %)
VGPRS: 300 -> 292 (-2.67 %)
Spilled SGPRs: 0 -> 0 (0.00 %)
Spilled VGPRs: 0 -> 0 (0.00 %)
Private memory VGPRs: 0 -> 0 (0.00 %)
Scratch size: 0 -> 0 (0.00 %) dwords per thread
Code Size: 15516 -> 26388 (70.07 %) bytes
LDS: 0 -> 0 (0.00 %) blocks
Max Waves: 29 -> 29 (0.00 %)
Wait states: 0 -> 0 (0.00 %)

v2: fix bug where last iteration would get optimised away by
mistake.
---
src/compiler/nir/nir_opt_loop_unroll.c | 55 ++++++++++++++++++++++++++
1 file changed, 55 insertions(+)

diff --git a/src/compiler/nir/nir_opt_loop_unroll.c b/src/compiler/nir/nir_opt_loop_unroll.c
index 9630e0738a..70e6c67bde 100644
--- a/src/compiler/nir/nir_opt_loop_unroll.c
+++ b/src/compiler/nir/nir_opt_loop_unroll.c
@@ -460,6 +460,55 @@ complex_unroll(nir_loop *loop, nir_loop_terminator *unlimit_term,
_mesa_hash_table_destroy(remap_table, NULL);
}

+/**
+ * Unroll loops where we only have a single terminator but the exact trip
+ * count is unknown. For example:
+ *
+ * for (int i = 0; i < imin(x, 4); i++)
+ * ...
+ */
+static void
+complex_unroll_single_terminator(nir_loop *loop)
+{
+ assert(list_length(&loop->info->loop_terminator_list) == 1);
+ assert(loop->info->limiting_terminator);
+ assert(nir_is_trivial_loop_if(loop->info->limiting_terminator->nif,
+ loop->info->limiting_terminator->break_block));
+
+ nir_loop_terminator *terminator = loop->info->limiting_terminator;
+
+ loop_prepare_for_unroll(loop);
+
+ /* Pluck out the loop header */
+ nir_cf_list lp_header;
+ nir_cf_extract(&lp_header, nir_before_block(nir_loop_first_block(loop)),
+ nir_before_cf_node(&terminator->nif->cf_node));
+
+ struct hash_table *remap_table =
+ _mesa_hash_table_create(NULL, _mesa_hash_pointer,
+ _mesa_key_pointer_equal);
+
+ /* We need to clone the loop one extra time in order to clone the lcssa
+ * vars for the last iteration (they are inside the following ifs break
+ * branch). We leave other passes to clean up this redundant if.
+ */
+ unsigned num_times_to_clone = loop->info->max_trip_count + 1;
+
+ nir_cf_list lp_body;
+ nir_cf_node *unroll_loc =
+ complex_unroll_loop_body(loop, terminator, &lp_header, &lp_body,
+ remap_table, num_times_to_clone);
+
+ /* Delete the original loop header and body */
+ nir_cf_delete(&lp_header);
+ nir_cf_delete(&lp_body);
+
+ /* The original loop has been replaced so remove it. */
+ nir_cf_node_remove(&loop->cf_node);
+
+ _mesa_hash_table_destroy(remap_table, NULL);
+}
+
/* Unrolls the classic wrapper loops e.g
*
* do {
@@ -856,6 +905,12 @@ process_loops(nir_shader *sh, nir_cf_node *cf_node, bool *has_nested_loop_out)
}
progress = true;
}
+
+ if (num_lt == 1) {
+ assert(loop->info->limiting_terminator->exact_trip_count_unknown);
+ complex_unroll_single_terminator(loop);
+ progress = true;
+ }
}
}
--
2.19.2
Timothy Arceri
2018-12-07 03:08:10 UTC
Permalink
This adds support to loop analysis for loops where the induction
variable is compared to the result of min(variable, constant).

For example:

for (int i = 0; i < imin(x, 4); i++)
...

We add a new bool to the loop terminator struct in order to
differentiate terminators with this exit condition.
---
src/compiler/nir/nir.h | 11 +++++++
src/compiler/nir/nir_loop_analyze.c | 41 ++++++++++++++++++++++----
src/compiler/nir/nir_opt_loop_unroll.c | 3 +-
3 files changed, 49 insertions(+), 6 deletions(-)

diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index bf015bb53a..f31e91a3c0 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -1880,6 +1880,17 @@ typedef struct {
bool continue_from_then;
bool induction_rhs;

+ /* This is true if the terminators exact trip count is unknown. For
+ * example:
+ *
+ * for (int i = 0; i < imin(x, 4); i++)
+ * ...
+ *
+ * Here loop analysis would have set a max_trip_count of 4 however we dont
+ * know for sure that this is the exact trip count.
+ */
+ bool exact_trip_count_unknown;
+
struct list_head loop_terminator_link;
} nir_loop_terminator;

diff --git a/src/compiler/nir/nir_loop_analyze.c b/src/compiler/nir/nir_loop_analyze.c
index ffcf2a3c27..b003b1f198 100644
--- a/src/compiler/nir/nir_loop_analyze.c
+++ b/src/compiler/nir/nir_loop_analyze.c
@@ -426,6 +426,35 @@ guess_loop_limit(loop_info_state *state, nir_const_value *limit_val,
return false;
}

+static bool
+try_find_limit_of_alu(nir_loop_variable *limit, nir_const_value *limit_val,
+ nir_loop_terminator *terminator, loop_info_state *state)
+{
+ if(!is_var_alu(limit))
+ return false;
+
+ nir_alu_instr *limit_alu = nir_instr_as_alu(limit->def->parent_instr);
+
+ if (limit_alu->op == nir_op_imin ||
+ limit_alu->op == nir_op_fmin) {
+ limit = get_loop_var(limit_alu->src[0].src.ssa, state);
+
+ if (!is_var_constant(limit))
+ limit = get_loop_var(limit_alu->src[1].src.ssa, state);
+
+ if (!is_var_constant(limit))
+ return false;
+
+ *limit_val = nir_instr_as_load_const(limit->def->parent_instr)->value;
+
+ terminator->exact_trip_count_unknown = true;
+
+ return true;
+ }
+
+ return false;
+}
+
static int32_t
get_iteration(nir_op cond_op, nir_const_value *initial, nir_const_value *step,
nir_const_value *limit)
@@ -657,12 +686,14 @@ find_trip_count(loop_info_state *state)
} else {
trip_count_known = false;

- /* Guess loop limit based on array access */
- if (!guess_loop_limit(state, &limit_val, basic_ind)) {
- continue;
- }
+ if (!try_find_limit_of_alu(limit, &limit_val, terminator, state)) {
+ /* Guess loop limit based on array access */
+ if (!guess_loop_limit(state, &limit_val, basic_ind)) {
+ continue;
+ }

- guessed_trip_count = true;
+ guessed_trip_count = true;
+ }
}

/* We have determined that we have the following constants:
diff --git a/src/compiler/nir/nir_opt_loop_unroll.c b/src/compiler/nir/nir_opt_loop_unroll.c
index d8df619b32..9630e0738a 100644
--- a/src/compiler/nir/nir_opt_loop_unroll.c
+++ b/src/compiler/nir/nir_opt_loop_unroll.c
@@ -830,7 +830,8 @@ process_loops(nir_shader *sh, nir_cf_node *cf_node, bool *has_nested_loop_out)
} else {
/* Attempt to unroll loops with two terminators. */
unsigned num_lt = list_length(&loop->info->loop_terminator_list);
- if (num_lt == 2) {
+ if (num_lt == 2 &&
+ !loop->info->limiting_terminator->exact_trip_count_unknown) {
bool limiting_term_second = true;
nir_loop_terminator *terminator =
list_first_entry(&loop->info->loop_terminator_list,
--
2.19.2
Timothy Arceri
2018-12-07 03:08:12 UTC
Permalink
From: Danylo Piliaiev <***@gmail.com>

Removing the last continue can allow more loops to unroll. Also
inserting code into the if branch can allow the various if opts
to progress further.

The insertion of some loops into the if branch also reduces VGPR
use in some shaders.

vkpipeline-db results (VEGA):

Totals from affected shaders:
SGPRS: 6552 -> 6576 (0.37 %)
VGPRS: 6544 -> 6532 (-0.18 %)
Spilled SGPRs: 0 -> 0 (0.00 %)
Spilled VGPRs: 0 -> 0 (0.00 %)
Private memory VGPRs: 0 -> 0 (0.00 %)
Scratch size: 0 -> 0 (0.00 %) dwords per thread
Code Size: 481952 -> 478032 (-0.81 %) bytes
LDS: 13 -> 13 (0.00 %) blocks
Max Waves: 241 -> 242 (0.41 %)
Wait states: 0 -> 0 (0.00 %)

Shader-db results radeonsi (VEGA):

Totals from affected shaders:
SGPRS: 168 -> 168 (0.00 %)
VGPRS: 144 -> 140 (-2.78 %)
Spilled SGPRs: 157 -> 157 (0.00 %)
Spilled VGPRs: 0 -> 0 (0.00 %)
Private memory VGPRs: 0 -> 0 (0.00 %)
Scratch size: 0 -> 0 (0.00 %) dwords per thread
Code Size: 8524 -> 8488 (-0.42 %) bytes
LDS: 0 -> 0 (0.00 %) blocks
Max Waves: 7 -> 7 (0.00 %)
Wait states: 0 -> 0 (0.00 %)

v2: (Timothy Arceri):
- allow for continues in either branch
- move any trailing loops inside the if as well as blocks.
- leave nir_opt_trivial_continues() to actually remove the
continue.

Reviewed-by: Thomas Helland <***@gmail.com>
Signed-off-by: Timothy Arceri <***@itsqueeze.com>
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=32211
---
src/compiler/nir/nir_opt_if.c | 95 +++++++++++++++++++++++++++++++++++
1 file changed, 95 insertions(+)

diff --git a/src/compiler/nir/nir_opt_if.c b/src/compiler/nir/nir_opt_if.c
index dd488b1787..4a9dffb782 100644
--- a/src/compiler/nir/nir_opt_if.c
+++ b/src/compiler/nir/nir_opt_if.c
@@ -263,6 +263,100 @@ rewrite_phi_predecessor_blocks(nir_if *nif,
}
}

+static bool
+nir_block_ends_in_continue(nir_block *block)
+{
+ if (exec_list_is_empty(&block->instr_list))
+ return false;
+
+ nir_instr *instr = nir_block_last_instr(block);
+ return instr->type == nir_instr_type_jump &&
+ nir_instr_as_jump(instr)->type == nir_jump_continue;
+}
+
+/**
+ * This optimization turns:
+ *
+ * loop {
+ * ...
+ * if (cond) {
+ * do_work_1();
+ * continue;
+ * } else {
+ * }
+ * do_work_2();
+ * }
+ *
+ * into:
+ *
+ * loop {
+ * ...
+ * if (cond) {
+ * do_work_1();
+ * continue;
+ * } else {
+ * do_work_2();
+ * }
+ * }
+ *
+ * The continue should then be removed by nir_opt_trivial_continues() and the
+ * loop can potentially be unrolled.
+ *
+ * Note: do_work_2() is only ever blocks and nested loops. We could also nest
+ * other if-statments in the branch which would allow further continues to
+ * be removed. However in practice this can result in increased register
+ * pressure.
+ */
+static bool
+opt_if_loop_last_continue(nir_loop *loop)
+{
+ /* Get the last if-stament in the loop */
+ nir_block *last_block = nir_loop_last_block(loop);
+ nir_cf_node *if_node = nir_cf_node_prev(&last_block->cf_node);
+ while (if_node) {
+ if (if_node->type == nir_cf_node_if)
+ break;
+
+ if_node = nir_cf_node_prev(if_node);
+ }
+
+ if (!if_node || if_node->type != nir_cf_node_if)
+ return false;
+
+ nir_if *nif = nir_cf_node_as_if(if_node);
+ nir_block *then_block = nir_if_last_then_block(nif);
+ nir_block *else_block = nir_if_last_else_block(nif);
+
+ bool then_ends_in_continue = nir_block_ends_in_continue(then_block);
+ bool else_ends_in_continue = nir_block_ends_in_continue(else_block);
+
+ /* If both branches end in a continue do nothing, this should be handled
+ * by nir_opt_dead_cf().
+ */
+ if (then_ends_in_continue && else_ends_in_continue)
+ return false;
+
+ if (!then_ends_in_continue && !else_ends_in_continue)
+ return false;
+
+ /* Move the last block of the loop inside the last if-statement */
+ nir_cf_list tmp;
+ nir_cf_extract(&tmp, nir_after_cf_node(if_node),
+ nir_after_block(last_block));
+ if (then_ends_in_continue) {
+ nir_cf_reinsert(&tmp, nir_after_cf_list(&nif->else_list));
+ } else {
+ nir_cf_reinsert(&tmp, nir_after_cf_list(&nif->then_list));
+ }
+
+ /* In order to avoid running nir_lower_regs_to_ssa_impl() every time an if
+ * opt makes progress we leave nir_opt_trivial_continues() to remove the
+ * continue now that the end of the loop has been simplified.
+ */
+
+ return true;
+}
+
/**
* This optimization turns:
*
@@ -700,6 +794,7 @@ opt_if_cf_list(nir_builder *b, struct exec_list *cf_list)
nir_loop *loop = nir_cf_node_as_loop(cf_node);
progress |= opt_if_cf_list(b, &loop->body);
progress |= opt_peel_loop_initial_if(loop);
+ progress |= opt_if_loop_last_continue(loop);
break;
}
--
2.19.2
Timothy Arceri
2018-12-07 03:08:14 UTC
Permalink
Reviewed-by: Thomas Helland <***@gmail.com>
---
src/compiler/nir/nir_loop_analyze.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/compiler/nir/nir_loop_analyze.c b/src/compiler/nir/nir_loop_analyze.c
index fbaa638884..ef69422c12 100644
--- a/src/compiler/nir/nir_loop_analyze.c
+++ b/src/compiler/nir/nir_loop_analyze.c
@@ -239,8 +239,8 @@ compute_induction_information(loop_info_state *state)
nir_foreach_phi_src(src, phi) {
nir_loop_variable *src_var = get_loop_var(src->src.ssa, state);

- /* If one of the sources is in a conditional or nested block then
- * panic.
+ /* If one of the sources is in an if branch or nested loop then don't
+ * attempt to go any further.
*/
if (src_var->in_if_branch || src_var->in_nested_loop)
break;
--
2.19.2
Timothy Arceri
2018-12-07 03:08:15 UTC
Permalink
This allows loop analysis to detect inductions variables that
are incremented in both branches of an if rather than in a main
loop block. For example:

loop {
block block_1:
/* preds: block_0 block_7 */
vec1 32 ssa_8 = phi block_0: ssa_4, block_7: ssa_20
vec1 32 ssa_9 = phi block_0: ssa_0, block_7: ssa_4
vec1 32 ssa_10 = phi block_0: ssa_1, block_7: ssa_4
vec1 32 ssa_11 = phi block_0: ssa_2, block_7: ssa_21
vec1 32 ssa_12 = phi block_0: ssa_3, block_7: ssa_22
vec4 32 ssa_13 = vec4 ssa_12, ssa_11, ssa_10, ssa_9
vec1 32 ssa_14 = ige ssa_8, ssa_5
/* succs: block_2 block_3 */
if ssa_14 {
block block_2:
/* preds: block_1 */
break
/* succs: block_8 */
} else {
block block_3:
/* preds: block_1 */
/* succs: block_4 */
}
block block_4:
/* preds: block_3 */
vec1 32 ssa_15 = ilt ssa_6, ssa_8
/* succs: block_5 block_6 */
if ssa_15 {
block block_5:
/* preds: block_4 */
vec1 32 ssa_16 = iadd ssa_8, ssa_7
vec1 32 ssa_17 = load_const (0x3f800000 /* 1.000000*/)
/* succs: block_7 */
} else {
block block_6:
/* preds: block_4 */
vec1 32 ssa_18 = iadd ssa_8, ssa_7
vec1 32 ssa_19 = load_const (0x3f800000 /* 1.000000*/)
/* succs: block_7 */
}
block block_7:
/* preds: block_5 block_6 */
vec1 32 ssa_20 = phi block_5: ssa_16, block_6: ssa_18
vec1 32 ssa_21 = phi block_5: ssa_17, block_6: ssa_4
vec1 32 ssa_22 = phi block_5: ssa_4, block_6: ssa_19
/* succs: block_1 */
}

Unfortunatly GCM could move the addition out of the if for us
(making this patch unrequired) but we still cannot enable the GCM
pass without regressions.

This unrolls a loop in Rise of The Tomb Raider.

vkpipeline-db results (VEGA):

Totals from affected shaders:
SGPRS: 88 -> 96 (9.09 %)
VGPRS: 56 -> 52 (-7.14 %)
Spilled SGPRs: 0 -> 0 (0.00 %)
Spilled VGPRs: 0 -> 0 (0.00 %)
Private memory VGPRs: 0 -> 0 (0.00 %)
Scratch size: 0 -> 0 (0.00 %) dwords per thread
Code Size: 2168 -> 4560 (110.33 %) bytes
LDS: 0 -> 0 (0.00 %) blocks
Max Waves: 4 -> 4 (0.00 %)
Wait states: 0 -> 0 (0.00 %)

Reviewed-by: Thomas Helland <***@gmail.com>
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=32211
---
src/compiler/nir/nir_loop_analyze.c | 36 +++++++++++++++++++++++++++++
1 file changed, 36 insertions(+)

diff --git a/src/compiler/nir/nir_loop_analyze.c b/src/compiler/nir/nir_loop_analyze.c
index ef69422c12..be74105594 100644
--- a/src/compiler/nir/nir_loop_analyze.c
+++ b/src/compiler/nir/nir_loop_analyze.c
@@ -245,6 +245,42 @@ compute_induction_information(loop_info_state *state)
if (src_var->in_if_branch || src_var->in_nested_loop)
break;

+ /* Detect inductions variables that are incremented in both branches
+ * of an unnested if rather than in a loop block.
+ */
+ if (is_var_phi(src_var)) {
+ nir_phi_instr *src_phi =
+ nir_instr_as_phi(src_var->def->parent_instr);
+
+ nir_op alu_op;
+ nir_ssa_def *alu_srcs[2] = {0};
+ nir_foreach_phi_src(src2, src_phi) {
+ nir_loop_variable *src_var2 =
+ get_loop_var(src2->src.ssa, state);
+
+ if (!src_var2->in_if_branch || !is_var_alu(src_var2))
+ break;
+
+ nir_alu_instr *alu =
+ nir_instr_as_alu(src_var2->def->parent_instr);
+ if (nir_op_infos[alu->op].num_inputs != 2)
+ break;
+
+ if (alu->src[0].src.ssa == alu_srcs[0] &&
+ alu->src[1].src.ssa == alu_srcs[1] &&
+ alu->op == alu_op) {
+ /* Both branches perform the same calculation so we can use
+ * one of them to find the induction variable.
+ */
+ src_var = src_var2;
+ } else {
+ alu_srcs[0] = alu->src[0].src.ssa;
+ alu_srcs[1] = alu->src[1].src.ssa;
+ alu_op = alu->op;
+ }
+ }
+ }
+
if (!src_var->in_loop) {
biv->def_outside_loop = src_var;
} else if (is_var_alu(src_var)) {
--
2.19.2
Timothy Arceri
2018-12-07 03:08:13 UTC
Permalink
This will allow us to improve analysis to find more induction
variables.

Reviewed-by: Thomas Helland <***@gmail.com>
---
src/compiler/nir/nir_loop_analyze.c | 34 ++++++++++++++++++-----------
1 file changed, 21 insertions(+), 13 deletions(-)

diff --git a/src/compiler/nir/nir_loop_analyze.c b/src/compiler/nir/nir_loop_analyze.c
index b003b1f198..fbaa638884 100644
--- a/src/compiler/nir/nir_loop_analyze.c
+++ b/src/compiler/nir/nir_loop_analyze.c
@@ -49,8 +49,11 @@ typedef struct {
/* If this is of type basic_induction */
struct nir_basic_induction_var *ind;

- /* True if variable is in an if branch or a nested loop */
- bool in_control_flow;
+ /* True if variable is in an if branch */
+ bool in_if_branch;
+
+ /* True if variable is in a nested loop */
+ bool in_nested_loop;

} nir_loop_variable;

@@ -83,7 +86,8 @@ get_loop_var(nir_ssa_def *value, loop_info_state *state)

typedef struct {
loop_info_state *state;
- bool in_control_flow;
+ bool in_if_branch;
+ bool in_nested_loop;
} init_loop_state;

static bool
@@ -92,8 +96,10 @@ init_loop_def(nir_ssa_def *def, void *void_init_loop_state)
init_loop_state *loop_init_state = void_init_loop_state;
nir_loop_variable *var = get_loop_var(def, loop_init_state->state);

- if (loop_init_state->in_control_flow) {
- var->in_control_flow = true;
+ if (loop_init_state->in_nested_loop) {
+ var->in_nested_loop = true;
+ } else if (loop_init_state->in_if_branch) {
+ var->in_if_branch = true;
} else {
/* Add to the tail of the list. That way we start at the beginning of
* the defs in the loop instead of the end when walking the list. This
@@ -110,9 +116,10 @@ init_loop_def(nir_ssa_def *def, void *void_init_loop_state)

static bool
init_loop_block(nir_block *block, loop_info_state *state,
- bool in_control_flow)
+ bool in_if_branch, bool in_nested_loop)
{
- init_loop_state init_state = {.in_control_flow = in_control_flow,
+ init_loop_state init_state = {.in_if_branch = in_if_branch,
+ .in_nested_loop = in_nested_loop,
.state = state };

nir_foreach_instr(instr, block) {
@@ -198,7 +205,7 @@ compute_invariance_information(loop_info_state *state)
*/
list_for_each_entry_safe(nir_loop_variable, var, &state->process_list,
process_link) {
- assert(!var->in_control_flow);
+ assert(!var->in_if_branch && !var->in_nested_loop);

if (mark_invariant(var->def, state))
list_del(&var->process_link);
@@ -216,7 +223,8 @@ compute_induction_information(loop_info_state *state)
* things in nested loops or conditionals should have been removed from
* the list by compute_invariance_information().
*/
- assert(!var->in_control_flow && var->type != invariant);
+ assert(!var->in_if_branch && !var->in_nested_loop &&
+ var->type != invariant);

/* We are only interested in checking phis for the basic induction
* variable case as its simple to detect. All basic induction variables
@@ -234,7 +242,7 @@ compute_induction_information(loop_info_state *state)
/* If one of the sources is in a conditional or nested block then
* panic.
*/
- if (src_var->in_control_flow)
+ if (src_var->in_if_branch || src_var->in_nested_loop)
break;

if (!src_var->in_loop) {
@@ -814,17 +822,17 @@ get_loop_info(loop_info_state *state, nir_function_impl *impl)
switch (node->type) {

case nir_cf_node_block:
- init_loop_block(nir_cf_node_as_block(node), state, false);
+ init_loop_block(nir_cf_node_as_block(node), state, false, false);
break;

case nir_cf_node_if:
nir_foreach_block_in_cf_node(block, node)
- init_loop_block(block, state, true);
+ init_loop_block(block, state, true, false);
break;

case nir_cf_node_loop:
nir_foreach_block_in_cf_node(block, node) {
- init_loop_block(block, state, true);
+ init_loop_block(block, state, false, true);
}
break;
--
2.19.2
Timothy Arceri
2018-12-07 03:08:17 UTC
Permalink
This will be used to help find the trip count of loops that look
like the following:

while (a < x && i < 8) {
...
i++;
}

Where the NIR will end up looking something like this:

vec1 32 ssa_0 = load_const (0x00000000 /* 0.000000 */)
vec1 32 ssa_1 = load_const (0x00000008 /* 0.000000 */)
loop {
...
vec1 32 ssa_28 = ige ssa_26, ssa_3
vec1 32 ssa_29 = ige ssa_27, ssa_1
vec1 32 ssa_30 = iadd ssa_29, ssa_28
vec1 ssa_31 = ieq ssa_30, ssa_0
if ssa_31 {
...
break
} else {
...
}
...
}

So in order to find the trip count we need to find the inverse of
ige.
---
src/compiler/nir/nir_loop_analyze.c | 29 +++++++++++++++++++++++++++++
1 file changed, 29 insertions(+)

diff --git a/src/compiler/nir/nir_loop_analyze.c b/src/compiler/nir/nir_loop_analyze.c
index 5446e7a120..2dd7dd7b20 100644
--- a/src/compiler/nir/nir_loop_analyze.c
+++ b/src/compiler/nir/nir_loop_analyze.c
@@ -665,6 +665,35 @@ calculate_iterations(nir_const_value *initial, nir_const_value *step,
return -1;
}

+static nir_op
+inverse_comparision(nir_alu_instr *alu)
+{
+ switch (alu->op) {
+ case nir_op_fge:
+ return nir_op_flt;
+ case nir_op_ige:
+ return nir_op_ilt;
+ case nir_op_uge:
+ return nir_op_ult;
+ case nir_op_flt:
+ return nir_op_fge;
+ case nir_op_ilt:
+ return nir_op_ige;
+ case nir_op_ult:
+ return nir_op_uge;
+ case nir_op_feq:
+ return nir_op_fne;
+ case nir_op_ieq:
+ return nir_op_ine;
+ case nir_op_fne:
+ return nir_op_feq;
+ case nir_op_ine:
+ return nir_op_ieq;
+ default:
+ unreachable("Unsuported comparision!");
+ }
+}
+
static bool
is_supported_terminator_condition(nir_alu_instr *alu)
{
--
2.19.2
Timothy Arceri
2018-12-07 03:08:16 UTC
Permalink
Here we create a helper is_supported_terminator_condition()
and use that rather than embedding all the trip count code
inside a switch.

The new helper will also be used in a following patch.
---
src/compiler/nir/nir_loop_analyze.c | 172 +++++++++++++++-------------
1 file changed, 93 insertions(+), 79 deletions(-)

diff --git a/src/compiler/nir/nir_loop_analyze.c b/src/compiler/nir/nir_loop_analyze.c
index be74105594..5446e7a120 100644
--- a/src/compiler/nir/nir_loop_analyze.c
+++ b/src/compiler/nir/nir_loop_analyze.c
@@ -665,6 +665,26 @@ calculate_iterations(nir_const_value *initial, nir_const_value *step,
return -1;
}

+static bool
+is_supported_terminator_condition(nir_alu_instr *alu)
+{
+ switch (alu->op) {
+ case nir_op_fge:
+ case nir_op_ige:
+ case nir_op_uge:
+ case nir_op_flt:
+ case nir_op_ilt:
+ case nir_op_ult:
+ case nir_op_feq:
+ case nir_op_ieq:
+ case nir_op_fne:
+ case nir_op_ine:
+ return true;
+ default:
+ return false;
+ }
+}
+
/* Run through each of the terminators of the loop and try to infer a possible
* trip-count. We need to check them all, and set the lowest trip-count as the
* trip-count of our loop. If one of the terminators has an undecidable
@@ -696,97 +716,91 @@ find_trip_count(loop_info_state *state)
nir_loop_variable *limit = NULL;
bool limit_rhs = true;

- switch (alu->op) {
- case nir_op_fge: case nir_op_ige: case nir_op_uge:
- case nir_op_flt: case nir_op_ilt: case nir_op_ult:
- case nir_op_feq: case nir_op_ieq:
- case nir_op_fne: case nir_op_ine:
-
- /* We assume that the limit is the "right" operand */
- basic_ind = get_loop_var(alu->src[0].src.ssa, state);
- limit = get_loop_var(alu->src[1].src.ssa, state);
-
- if (basic_ind->type != basic_induction) {
- /* We had it the wrong way, flip things around */
- basic_ind = get_loop_var(alu->src[1].src.ssa, state);
- limit = get_loop_var(alu->src[0].src.ssa, state);
- limit_rhs = false;
- terminator->induction_rhs = true;
- }
+ if (!is_supported_terminator_condition(alu)) {
+ trip_count_known = false;
+ continue;
+ }

- /* The comparison has to have a basic induction variable for us to be
- * able to find trip counts.
- */
- if (basic_ind->type != basic_induction) {
- trip_count_known = false;
- continue;
- }
+ /* We assume that the limit is the "right" operand */
+ basic_ind = get_loop_var(alu->src[0].src.ssa, state);
+ limit = get_loop_var(alu->src[1].src.ssa, state);

- /* Attempt to find a constant limit for the loop */
- nir_const_value limit_val;
- if (is_var_constant(limit)) {
- limit_val =
- nir_instr_as_load_const(limit->def->parent_instr)->value;
- } else {
- trip_count_known = false;
-
- if (!try_find_limit_of_alu(limit, &limit_val, terminator, state)) {
- /* Guess loop limit based on array access */
- if (!guess_loop_limit(state, &limit_val, basic_ind)) {
- continue;
- }
+ if (basic_ind->type != basic_induction) {
+ /* We had it the wrong way, flip things around */
+ basic_ind = get_loop_var(alu->src[1].src.ssa, state);
+ limit = get_loop_var(alu->src[0].src.ssa, state);
+ limit_rhs = false;
+ terminator->induction_rhs = true;
+ }

- guessed_trip_count = true;
- }
- }
+ /* The comparison has to have a basic induction variable for us to be
+ * able to find trip counts.
+ */
+ if (basic_ind->type != basic_induction) {
+ trip_count_known = false;
+ continue;
+ }

- /* We have determined that we have the following constants:
- * (With the typical int i = 0; i < x; i++; as an example)
- * - Upper limit.
- * - Starting value
- * - Step / iteration size
- * Thats all thats needed to calculate the trip-count
- */
+ /* Attempt to find a constant limit for the loop */
+ nir_const_value limit_val;
+ if (is_var_constant(limit)) {
+ limit_val =
+ nir_instr_as_load_const(limit->def->parent_instr)->value;
+ } else {
+ trip_count_known = false;

- nir_const_value initial_val =
- nir_instr_as_load_const(basic_ind->ind->def_outside_loop->
- def->parent_instr)->value;
+ if (!try_find_limit_of_alu(limit, &limit_val, terminator, state)) {
+ /* Guess loop limit based on array access */
+ if (!guess_loop_limit(state, &limit_val, basic_ind)) {
+ continue;
+ }

- nir_const_value step_val =
- nir_instr_as_load_const(basic_ind->ind->invariant->def->
- parent_instr)->value;
+ guessed_trip_count = true;
+ }
+ }

- int iterations = calculate_iterations(&initial_val, &step_val,
- &limit_val,
- basic_ind->ind->alu_def, alu,
- limit_rhs,
- terminator->continue_from_then);
+ /* We have determined that we have the following constants:
+ * (With the typical int i = 0; i < x; i++; as an example)
+ * - Upper limit.
+ * - Starting value
+ * - Step / iteration size
+ * Thats all thats needed to calculate the trip-count
+ */

- /* Where we not able to calculate the iteration count */
- if (iterations == -1) {
- trip_count_known = false;
- guessed_trip_count = false;
- continue;
- }
+ nir_const_value initial_val =
+ nir_instr_as_load_const(basic_ind->ind->def_outside_loop->
+ def->parent_instr)->value;

- if (guessed_trip_count) {
- guessed_trip_count = false;
- state->loop->info->guessed_trip_count = iterations;
- continue;
- }
+ nir_const_value step_val =
+ nir_instr_as_load_const(basic_ind->ind->invariant->def->
+ parent_instr)->value;

- /* If this is the first run or we have found a smaller amount of
- * iterations than previously (we have identified a more limiting
- * terminator) set the trip count and limiting terminator.
- */
- if (max_trip_count == -1 || iterations < max_trip_count) {
- max_trip_count = iterations;
- limiting_terminator = terminator;
- }
- break;
+ int iterations = calculate_iterations(&initial_val, &step_val,
+ &limit_val,
+ basic_ind->ind->alu_def, alu,
+ limit_rhs,
+ terminator->continue_from_then);

- default:
+ /* Where we not able to calculate the iteration count */
+ if (iterations == -1) {
trip_count_known = false;
+ guessed_trip_count = false;
+ continue;
+ }
+
+ if (guessed_trip_count) {
+ guessed_trip_count = false;
+ state->loop->info->guessed_trip_count = iterations;
+ continue;
+ }
+
+ /* If this is the first run or we have found a smaller amount of
+ * iterations than previously (we have identified a more limiting
+ * terminator) set the trip count and limiting terminator.
+ */
+ if (max_trip_count == -1 || iterations < max_trip_count) {
+ max_trip_count = iterations;
+ limiting_terminator = terminator;
}
}
--
2.19.2
Timothy Arceri
2018-12-07 03:08:20 UTC
Permalink
This will be used to help find the trip count of loops that look
like the following:

while (a < x && i < 8) {
...
i++;
}

Where the NIR will end up looking something like this:

vec1 32 ssa_0 = load_const (0x00000000 /* 0.000000 */)
vec1 32 ssa_1 = load_const (0x00000008 /* 0.000000 */)
loop {
...
vec1 32 ssa_28 = ige ssa_26, ssa_3
vec1 32 ssa_29 = ige ssa_27, ssa_1
vec1 32 ssa_30 = iadd ssa_29, ssa_28
vec1 ssa_31 = ieq ssa_30, ssa_0
if ssa_31 {
...
break
} else {
...
}
...
}

On RADV this unrolls a bunch of loops in F1-2017 shaders.

Totals from affected shaders:
SGPRS: 4112 -> 4032 (-1.95 %)
VGPRS: 4076 -> 3996 (-1.96 %)
Spilled SGPRs: 0 -> 0 (0.00 %)
Spilled VGPRs: 0 -> 0 (0.00 %)
Private memory VGPRs: 0 -> 0 (0.00 %)
Scratch size: 0 -> 0 (0.00 %) dwords per thread
Code Size: 510184 -> 589868 (15.62 %) bytes
LDS: 2 -> 2 (0.00 %) blocks
Max Waves: 200 -> 202 (1.00 %)
Wait states: 0 -> 0 (0.00 %)

It also unrolls a couple of loops in shader-db on radeonsi.

Totals from affected shaders:
SGPRS: 128 -> 128 (0.00 %)
VGPRS: 64 -> 64 (0.00 %)
Spilled SGPRs: 0 -> 0 (0.00 %)
Spilled VGPRs: 0 -> 0 (0.00 %)
Private memory VGPRs: 0 -> 0 (0.00 %)
Scratch size: 0 -> 0 (0.00 %) dwords per thread
Code Size: 6880 -> 9504 (38.14 %) bytes
LDS: 0 -> 0 (0.00 %) blocks
Max Waves: 16 -> 16 (0.00 %)
Wait states: 0 -> 0 (0.00 %)
---
src/compiler/nir/nir_loop_analyze.c | 71 ++++++++++++++++++++++++++++-
1 file changed, 70 insertions(+), 1 deletion(-)

diff --git a/src/compiler/nir/nir_loop_analyze.c b/src/compiler/nir/nir_loop_analyze.c
index ea20db9dbf..27f4ee427c 100644
--- a/src/compiler/nir/nir_loop_analyze.c
+++ b/src/compiler/nir/nir_loop_analyze.c
@@ -736,6 +736,59 @@ get_induction_and_limit_vars(nir_alu_instr *alu, nir_loop_variable **ind,
return limit_rhs;
}

+static void
+try_find_trip_count_vars_in_iand(nir_alu_instr **alu,
+ nir_loop_variable **ind,
+ nir_loop_variable **limit,
+ bool *limit_rhs,
+ loop_info_state *state)
+{
+ assert((*alu)->op == nir_op_ieq);
+
+ nir_ssa_def *iand_def = (*alu)->src[0].src.ssa;
+ nir_ssa_def *zero_def = (*alu)->src[1].src.ssa;
+
+ if (iand_def->parent_instr->type != nir_instr_type_alu ||
+ zero_def->parent_instr->type != nir_instr_type_load_const) {
+
+ /* Maybe we had it the wrong way, flip things around */
+ iand_def = (*alu)->src[1].src.ssa;
+ zero_def = (*alu)->src[0].src.ssa;
+
+ /* If we still didn't find what we need then return */
+ if (iand_def->parent_instr->type != nir_instr_type_alu ||
+ zero_def->parent_instr->type != nir_instr_type_load_const)
+ return;
+ }
+
+ /* If the loop is not breaking on (x && y) == 0 then return */
+ nir_alu_instr *iand = nir_instr_as_alu(iand_def->parent_instr);
+ nir_const_value zero =
+ nir_instr_as_load_const(zero_def->parent_instr)->value;
+ if (iand->op != nir_op_iand || zero.i32[0] != 0)
+ return;
+
+ /* Check if iand src is a terminator condition and try get induction var
+ * and trip limit var.
+ */
+ nir_ssa_def *src = iand->src[0].src.ssa;
+ if (src->parent_instr->type == nir_instr_type_alu) {
+ *alu = nir_instr_as_alu(src->parent_instr);
+ if (is_supported_terminator_condition(*alu))
+ *limit_rhs = get_induction_and_limit_vars(*alu, ind, limit, state);
+ }
+
+ /* Try the other iand src if needed */
+ if ((*ind)->type != basic_induction) {
+ src = iand->src[1].src.ssa;
+ if (src->parent_instr->type == nir_instr_type_alu) {
+ *alu = nir_instr_as_alu(src->parent_instr);
+ if (is_supported_terminator_condition(*alu))
+ *limit_rhs = get_induction_and_limit_vars(*alu, ind, limit, state);
+ }
+ }
+}
+
/* Run through each of the terminators of the loop and try to infer a possible
* trip-count. We need to check them all, and set the lowest trip-count as the
* trip-count of our loop. If one of the terminators has an undecidable
@@ -774,7 +827,21 @@ find_trip_count(loop_info_state *state)
nir_loop_variable *limit;
bool limit_rhs = get_induction_and_limit_vars(alu, &basic_ind, &limit,
state);
- terminator->induction_rhs = !limit_rhs;
+
+ if (basic_ind->type != basic_induction && alu->op == nir_op_ieq) {
+ trip_count_known = false;
+ terminator->exact_trip_count_unknown = true;
+
+ try_find_trip_count_vars_in_iand(&alu, &basic_ind, &limit,
+ &limit_rhs, state);
+
+ /* The loop is exiting on (x && y) == 0 so we need to get the
+ * inverse of x or y (i.e. which ever contained the induction var) in
+ * order to compute the trip count.
+ */
+ if (basic_ind->type == basic_induction)
+ alu_op = inverse_comparision(alu);
+ }

/* The comparison has to have a basic induction variable for us to be
* able to find trip counts.
@@ -784,6 +851,8 @@ find_trip_count(loop_info_state *state)
continue;
}

+ terminator->induction_rhs = !limit_rhs;
+
/* Attempt to find a constant limit for the loop */
nir_const_value limit_val;
if (is_var_constant(limit)) {
--
2.19.2
Timothy Arceri
2018-12-07 03:08:18 UTC
Permalink
This helps make find_trip_count() a little easier to follow but
will also be used by a following patch.
---
src/compiler/nir/nir_loop_analyze.c | 41 ++++++++++++++++++-----------
1 file changed, 26 insertions(+), 15 deletions(-)

diff --git a/src/compiler/nir/nir_loop_analyze.c b/src/compiler/nir/nir_loop_analyze.c
index 2dd7dd7b20..fab58144ea 100644
--- a/src/compiler/nir/nir_loop_analyze.c
+++ b/src/compiler/nir/nir_loop_analyze.c
@@ -714,6 +714,27 @@ is_supported_terminator_condition(nir_alu_instr *alu)
}
}

+static bool
+get_induction_and_limit_vars(nir_alu_instr *alu, nir_loop_variable **ind,
+ nir_loop_variable **limit,
+ loop_info_state *state)
+{
+ bool limit_rhs = true;
+
+ /* We assume that the limit is the "right" operand */
+ *ind = get_loop_var(alu->src[0].src.ssa, state);
+ *limit = get_loop_var(alu->src[1].src.ssa, state);
+
+ if ((*ind)->type != basic_induction) {
+ /* We had it the wrong way, flip things around */
+ *ind = get_loop_var(alu->src[1].src.ssa, state);
+ *limit = get_loop_var(alu->src[0].src.ssa, state);
+ limit_rhs = false;
+ }
+
+ return limit_rhs;
+}
+
/* Run through each of the terminators of the loop and try to infer a possible
* trip-count. We need to check them all, and set the lowest trip-count as the
* trip-count of our loop. If one of the terminators has an undecidable
@@ -741,26 +762,16 @@ find_trip_count(loop_info_state *state)
}

nir_alu_instr *alu = nir_instr_as_alu(terminator->conditional_instr);
- nir_loop_variable *basic_ind = NULL;
- nir_loop_variable *limit = NULL;
- bool limit_rhs = true;
-
if (!is_supported_terminator_condition(alu)) {
trip_count_known = false;
continue;
}

- /* We assume that the limit is the "right" operand */
- basic_ind = get_loop_var(alu->src[0].src.ssa, state);
- limit = get_loop_var(alu->src[1].src.ssa, state);
-
- if (basic_ind->type != basic_induction) {
- /* We had it the wrong way, flip things around */
- basic_ind = get_loop_var(alu->src[1].src.ssa, state);
- limit = get_loop_var(alu->src[0].src.ssa, state);
- limit_rhs = false;
- terminator->induction_rhs = true;
- }
+ nir_loop_variable *basic_ind;
+ nir_loop_variable *limit;
+ bool limit_rhs = get_induction_and_limit_vars(alu, &basic_ind, &limit,
+ state);
+ terminator->induction_rhs = !limit_rhs;

/* The comparison has to have a basic induction variable for us to be
* able to find trip counts.
--
2.19.2
Timothy Arceri
2018-12-07 03:08:19 UTC
Permalink
Rather than getting this from the alu instruction this allows us
some flexibility. In the following pass we instead pass the
inverse op.
---
src/compiler/nir/nir_loop_analyze.c | 17 ++++++++++-------
1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/src/compiler/nir/nir_loop_analyze.c b/src/compiler/nir/nir_loop_analyze.c
index fab58144ea..ea20db9dbf 100644
--- a/src/compiler/nir/nir_loop_analyze.c
+++ b/src/compiler/nir/nir_loop_analyze.c
@@ -590,7 +590,8 @@ test_iterations(int32_t iter_int, nir_const_value *step,
static int
calculate_iterations(nir_const_value *initial, nir_const_value *step,
nir_const_value *limit, nir_loop_variable *alu_def,
- nir_alu_instr *cond_alu, bool limit_rhs, bool invert_cond)
+ nir_alu_instr *cond_alu, nir_op alu_op, bool limit_rhs,
+ bool invert_cond)
{
assert(initial != NULL && step != NULL && limit != NULL);

@@ -605,10 +606,10 @@ calculate_iterations(nir_const_value *initial, nir_const_value *step,
nir_alu_type induction_base_type =
nir_alu_type_get_base_type(nir_op_infos[alu->op].output_type);
if (induction_base_type == nir_type_int || induction_base_type == nir_type_uint) {
- assert(nir_alu_type_get_base_type(nir_op_infos[cond_alu->op].input_types[1]) == nir_type_int ||
- nir_alu_type_get_base_type(nir_op_infos[cond_alu->op].input_types[1]) == nir_type_uint);
+ assert(nir_alu_type_get_base_type(nir_op_infos[alu_op].input_types[1]) == nir_type_int ||
+ nir_alu_type_get_base_type(nir_op_infos[alu_op].input_types[1]) == nir_type_uint);
} else {
- assert(nir_alu_type_get_base_type(nir_op_infos[cond_alu->op].input_types[0]) ==
+ assert(nir_alu_type_get_base_type(nir_op_infos[alu_op].input_types[0]) ==
induction_base_type);
}

@@ -632,7 +633,7 @@ calculate_iterations(nir_const_value *initial, nir_const_value *step,
trip_offset = 1;
}

- int iter_int = get_iteration(cond_alu->op, initial, step, limit);
+ int iter_int = get_iteration(alu_op, initial, step, limit);

/* If iter_int is negative the loop is ill-formed or is the conditional is
* unsigned with a huge iteration count so don't bother going any further.
@@ -655,7 +656,7 @@ calculate_iterations(nir_const_value *initial, nir_const_value *step,
for (int bias = -1; bias <= 1; bias++) {
const int iter_bias = iter_int + bias;

- if (test_iterations(iter_bias, step, limit, cond_alu->op, bit_size,
+ if (test_iterations(iter_bias, step, limit, alu_op, bit_size,
induction_base_type, initial,
limit_rhs, invert_cond)) {
return iter_bias > 0 ? iter_bias - trip_offset : iter_bias;
@@ -762,6 +763,8 @@ find_trip_count(loop_info_state *state)
}

nir_alu_instr *alu = nir_instr_as_alu(terminator->conditional_instr);
+ nir_op alu_op = alu->op;
+
if (!is_supported_terminator_condition(alu)) {
trip_count_known = false;
continue;
@@ -818,7 +821,7 @@ find_trip_count(loop_info_state *state)
int iterations = calculate_iterations(&initial_val, &step_val,
&limit_val,
basic_ind->ind->alu_def, alu,
- limit_rhs,
+ alu_op, limit_rhs,
terminator->continue_from_then);

/* Where we not able to calculate the iteration count */
--
2.19.2
Loading...