Discussion:
[Mesa-dev] [PATCH 07/11] i965: Implement all_varyings_in_vbos in terms of Array._DrawVAO.
M***@gmx.net
2018-05-07 06:15:01 UTC
Permalink
From: Mathias Fröhlich <***@web.de>

Signed-off-by: Mathias Fröhlich <***@web.de>
---
src/mesa/drivers/dri/i965/brw_draw.c | 17 ++---------------
1 file changed, 2 insertions(+), 15 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c
index 25ba372c44..c5f04264fa 100644
--- a/src/mesa/drivers/dri/i965/brw_draw.c
+++ b/src/mesa/drivers/dri/i965/brw_draw.c
@@ -25,6 +25,7 @@

#include <sys/errno.h>

+#include "main/arrayobj.h"
#include "main/blend.h"
#include "main/context.h"
#include "main/condrender.h"
@@ -917,20 +918,6 @@ retry:
}


-static bool
-all_varyings_in_vbos(const struct gl_vertex_array *arrays)
-{
- GLuint i;
-
- for (i = 0; i < VERT_ATTRIB_MAX; i++)
- if (arrays[i].BufferBinding->Stride &&
- arrays[i].BufferBinding->BufferObj->Name == 0)
- return false;
-
- return true;
-}
-
-

void
brw_draw_prims(struct gl_context *ctx,
@@ -982,7 +969,7 @@ brw_draw_prims(struct gl_context *ctx,
* get the minimum and maximum of their index buffer so we know what range
* to upload.
*/
- if (!index_bounds_valid && !all_varyings_in_vbos(arrays)) {
+ if (!index_bounds_valid && _mesa_draw_user_array_bits(ctx) != 0) {
perf_debug("Scanning index buffer to compute index buffer bounds. "
"Use glDrawRangeElements() to avoid this.\n");
vbo_get_minmax_indices(ctx, prims, ib, &min_index, &max_index, nr_prims);
--
2.14.3
M***@gmx.net
2018-05-07 06:14:56 UTC
Permalink
From: Mathias Fröhlich <***@web.de>

Signed-off-by: Mathias Fröhlich <***@web.de>
---
src/mesa/state_tracker/st_atom.c | 11 ++++-------
1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/src/mesa/state_tracker/st_atom.c b/src/mesa/state_tracker/st_atom.c
index 45a45960a3..df1a94e831 100644
--- a/src/mesa/state_tracker/st_atom.c
+++ b/src/mesa/state_tracker/st_atom.c
@@ -27,6 +27,7 @@


#include <stdio.h>
+#include "main/arrayobj.h"
#include "main/glheader.h"
#include "main/context.h"

@@ -138,19 +139,15 @@ static void check_program_state( struct st_context *st )

static void check_attrib_edgeflag(struct st_context *st)
{
- const struct gl_vertex_array *arrays = st->ctx->Array._DrawArrays;
- const struct gl_vertex_buffer_binding *binding;
GLboolean vertdata_edgeflags, edgeflag_culls_prims, edgeflags_enabled;
struct gl_program *vp = st->ctx->VertexProgram._Current;

- if (!arrays)
- return;
-
edgeflags_enabled = st->ctx->Polygon.FrontMode != GL_FILL ||
st->ctx->Polygon.BackMode != GL_FILL;

- binding = arrays[VERT_ATTRIB_EDGEFLAG].BufferBinding;
- vertdata_edgeflags = edgeflags_enabled && binding->Stride != 0;
+ vertdata_edgeflags = edgeflags_enabled &&
+ _mesa_draw_edge_flag_array_enabled(st->ctx);
+
if (vertdata_edgeflags != st->vertdata_edgeflags) {
st->vertdata_edgeflags = vertdata_edgeflags;
if (vp)
--
2.14.3
M***@gmx.net
2018-05-07 06:15:04 UTC
Permalink
From: Mathias Fröhlich <***@web.de>

Only tnl based drivers still use this array. So remove it
from core mesa and use Array._DrawVAO instead.

Signed-off-by: Mathias Fröhlich <***@web.de>
---
src/mesa/drivers/dri/nouveau/nouveau_vbo_t.c | 4 ++--
src/mesa/main/arrayobj.c | 1 -
src/mesa/main/attrib.c | 1 -
src/mesa/main/mtypes.h | 6 ------
src/mesa/main/varray.h | 14 --------------
src/mesa/tnl/t_draw.c | 11 +++++------
src/mesa/tnl/tnl.h | 2 +-
7 files changed, 8 insertions(+), 31 deletions(-)

diff --git a/src/mesa/drivers/dri/nouveau/nouveau_vbo_t.c b/src/mesa/drivers/dri/nouveau/nouveau_vbo_t.c
index 79b444cf55..d031ebeabd 100644
--- a/src/mesa/drivers/dri/nouveau/nouveau_vbo_t.c
+++ b/src/mesa/drivers/dri/nouveau/nouveau_vbo_t.c
@@ -550,9 +550,9 @@ TAG(vbo_draw)(struct gl_context *ctx,
struct gl_buffer_object *indirect)
{
/* Borrow and update the inputs list from the tnl context */
- _tnl_bind_inputs(ctx);
+ const struct gl_vertex_array* arrays = _tnl_bind_inputs(ctx);

- TAG(vbo_check_render_prims)(ctx, ctx->Array._DrawArrays,
+ TAG(vbo_check_render_prims)(ctx, arrays,
prims, nr_prims, ib,
index_bounds_valid, min_index, max_index,
tfb_vertcount, stream, indirect);
diff --git a/src/mesa/main/arrayobj.c b/src/mesa/main/arrayobj.c
index 05af50ef40..5ee68cf9e9 100644
--- a/src/mesa/main/arrayobj.c
+++ b/src/mesa/main/arrayobj.c
@@ -970,7 +970,6 @@ bind_vertex_array(struct gl_context *ctx, GLuint id, bool no_error)
* or to prevent a crash if the VAO being unbound is going to be
* deleted.
*/
- _mesa_set_drawing_arrays(ctx, NULL);
_mesa_set_draw_vao(ctx, ctx->Array._EmptyVAO, 0);

ctx->NewState |= _NEW_ARRAY;
diff --git a/src/mesa/main/attrib.c b/src/mesa/main/attrib.c
index e565750a89..cbe93ab6fa 100644
--- a/src/mesa/main/attrib.c
+++ b/src/mesa/main/attrib.c
@@ -1588,7 +1588,6 @@ copy_array_attrib(struct gl_context *ctx,
/* skip IndexBufferObj */

/* Invalidate array state. It will be updated during the next draw. */
- _mesa_set_drawing_arrays(ctx, NULL);
_mesa_set_draw_vao(ctx, ctx->Array._EmptyVAO, 0);
}

diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
index dee917f2e4..1a6194ab4d 100644
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -1629,12 +1629,6 @@ struct gl_array_attrib
*/
struct gl_vertex_array_object *_EmptyVAO;

- /**
- * Vertex arrays as consumed by a driver.
- * The array pointer is set up only by the VBO module.
- */
- const struct gl_vertex_array *_DrawArrays; /**< 0..VERT_ATTRIB_MAX-1 */
-
/** Legal array datatypes and the API for which they have been computed */
GLbitfield LegalTypesMask;
gl_api LegalTypesMaskAPI;
diff --git a/src/mesa/main/varray.h b/src/mesa/main/varray.h
index 25d2a29e73..6ab55ed36a 100644
--- a/src/mesa/main/varray.h
+++ b/src/mesa/main/varray.h
@@ -53,20 +53,6 @@ _mesa_attr_zero_aliases_vertex(const struct gl_context *ctx)
}


-/**
- * This specifies the set of vertex arrays used by the driver for drawing.
- */
-static inline void
-_mesa_set_drawing_arrays(struct gl_context *ctx,
- const struct gl_vertex_array *arrays)
-{
- if (ctx->Array._DrawArrays != arrays) {
- ctx->Array._DrawArrays = arrays;
- ctx->NewDriverState |= ctx->DriverFlags.NewArray;
- }
-}
-
-
extern void
_mesa_update_array_format(struct gl_context *ctx,
struct gl_vertex_array_object *vao,
diff --git a/src/mesa/tnl/t_draw.c b/src/mesa/tnl/t_draw.c
index a83b98eede..9814cdcec1 100644
--- a/src/mesa/tnl/t_draw.c
+++ b/src/mesa/tnl/t_draw.c
@@ -537,12 +537,12 @@ void _tnl_draw_prims(struct gl_context *ctx,
}


-void
+const struct gl_vertex_array*
_tnl_bind_inputs( struct gl_context *ctx )
{
TNLcontext *tnl = TNL_CONTEXT(ctx);
- _mesa_set_drawing_arrays(ctx, tnl->draw_arrays.inputs);
_vbo_update_inputs(ctx, &tnl->draw_arrays);
+ return tnl->draw_arrays.inputs;
}


@@ -558,12 +558,11 @@ _tnl_draw(struct gl_context *ctx,
struct gl_transform_feedback_object *tfb_vertcount,
unsigned stream, struct gl_buffer_object *indirect)
{
- /* Update TNLcontext::draw_arrays and set that pointer
- * into Array._DrawArrays.
+ /* Update TNLcontext::draw_arrays and return that pointer.
*/
- _tnl_bind_inputs(ctx);
+ const struct gl_vertex_array* arrays = _tnl_bind_inputs(ctx);

- _tnl_draw_prims(ctx, ctx->Array._DrawArrays, prim, nr_prims, ib,
+ _tnl_draw_prims(ctx, arrays, prim, nr_prims, ib,
index_bounds_valid, min_index, max_index,
tfb_vertcount, stream, indirect);
}
diff --git a/src/mesa/tnl/tnl.h b/src/mesa/tnl/tnl.h
index e506aee6df..4b6d5ec391 100644
--- a/src/mesa/tnl/tnl.h
+++ b/src/mesa/tnl/tnl.h
@@ -66,7 +66,7 @@ _tnl_wakeup( struct gl_context *ctx );
extern void
_tnl_need_projected_coords( struct gl_context *ctx, GLboolean flag );

-extern void
+extern const struct gl_vertex_array*
_tnl_bind_inputs( struct gl_context *ctx );
--
2.14.3
M***@gmx.net
2018-05-07 06:15:00 UTC
Permalink
From: Mathias Fröhlich <***@web.de>

Was meant to be temporary in gallium.

Signed-off-by: Mathias Fröhlich <***@web.de>
---
src/mesa/state_tracker/st_cb_feedback.c | 32 ++------------------------------
src/mesa/state_tracker/st_context.c | 3 ---
src/mesa/state_tracker/st_context.h | 3 ---
src/mesa/state_tracker/st_draw.c | 8 --------
4 files changed, 2 insertions(+), 44 deletions(-)

diff --git a/src/mesa/state_tracker/st_cb_feedback.c b/src/mesa/state_tracker/st_cb_feedback.c
index b7a082fca3..6e48be6f5d 100644
--- a/src/mesa/state_tracker/st_cb_feedback.c
+++ b/src/mesa/state_tracker/st_cb_feedback.c
@@ -272,34 +272,6 @@ draw_glselect_stage(struct gl_context *ctx, struct draw_context *draw)
}


-static void
-feedback_draw_vbo(struct gl_context *ctx,
- const struct _mesa_prim *prims,
- GLuint nr_prims,
- const struct _mesa_index_buffer *ib,
- GLboolean index_bounds_valid,
- GLuint min_index,
- GLuint max_index,
- struct gl_transform_feedback_object *tfb_vertcount,
- unsigned stream,
- struct gl_buffer_object *indirect)
-{
- struct st_context *st = st_context(ctx);
-
- /* The initial pushdown of the inputs array into the drivers */
- _mesa_set_drawing_arrays(ctx, st->draw_arrays.inputs);
- _vbo_update_inputs(ctx, &st->draw_arrays);
-
- /* The above needs to happen outside of st_feedback_draw_vbo,
- * since st_RasterPossets _DrawArrays and does not want that to be
- * overwritten by _mesa_set_drawing_arrays.
- */
- st_feedback_draw_vbo(ctx, prims, nr_prims, ib, index_bounds_valid,
- min_index, max_index, tfb_vertcount,
- stream, indirect);
-}
-
-
static void
st_RenderMode(struct gl_context *ctx, GLenum newMode )
{
@@ -318,7 +290,7 @@ st_RenderMode(struct gl_context *ctx, GLenum newMode )
st->selection_stage = draw_glselect_stage(ctx, draw);
draw_set_rasterize_stage(draw, st->selection_stage);
/* Plug in new vbo draw function */
- ctx->Driver.Draw = feedback_draw_vbo;
+ ctx->Driver.Draw = st_feedback_draw_vbo;
}
else {
struct gl_program *vp = st->ctx->VertexProgram._Current;
@@ -327,7 +299,7 @@ st_RenderMode(struct gl_context *ctx, GLenum newMode )
st->feedback_stage = draw_glfeedback_stage(ctx, draw);
draw_set_rasterize_stage(draw, st->feedback_stage);
/* Plug in new vbo draw function */
- ctx->Driver.Draw = feedback_draw_vbo;
+ ctx->Driver.Draw = st_feedback_draw_vbo;
/* need to generate/use a vertex program that emits pos/color/tex */
if (vp)
st->dirty |= ST_NEW_VERTEX_PROGRAM(st, st_vertex_program(vp));
diff --git a/src/mesa/state_tracker/st_context.c b/src/mesa/state_tracker/st_context.c
index ee76e07a7d..061b8b9c4c 100644
--- a/src/mesa/state_tracker/st_context.c
+++ b/src/mesa/state_tracker/st_context.c
@@ -552,9 +552,6 @@ st_create_context_priv(struct gl_context *ctx, struct pipe_context *pipe,
/* Initialize context's winsys buffers list */
LIST_INITHEAD(&st->winsys_buffers);

- /* Keep our list of gl_vertex_array inputs */
- _vbo_init_inputs(&st->draw_arrays);
-
return st;
}

diff --git a/src/mesa/state_tracker/st_context.h b/src/mesa/state_tracker/st_context.h
index 5125fc5839..9f5bfba3fd 100644
--- a/src/mesa/state_tracker/st_context.h
+++ b/src/mesa/state_tracker/st_context.h
@@ -295,9 +295,6 @@ struct st_context

/* Winsys buffers */
struct list_head winsys_buffers;
-
- /* For the initial pushdown, keep the list of vbo inputs. */
- struct vbo_inputs draw_arrays;
};


diff --git a/src/mesa/state_tracker/st_draw.c b/src/mesa/state_tracker/st_draw.c
index 6243659b50..12cae85f40 100644
--- a/src/mesa/state_tracker/st_draw.c
+++ b/src/mesa/state_tracker/st_draw.c
@@ -146,10 +146,6 @@ st_draw_vbo(struct gl_context *ctx,
unsigned i;
unsigned start = 0;

- /* The initial pushdown of the inputs array into the drivers */
- _mesa_set_drawing_arrays(ctx, st->draw_arrays.inputs);
- _vbo_update_inputs(ctx, &st->draw_arrays);
-
prepare_draw(st, ctx);

if (st->vertex_array_out_of_memory)
@@ -255,10 +251,6 @@ st_indirect_draw_vbo(struct gl_context *ctx,
struct pipe_draw_info info;
struct pipe_draw_indirect_info indirect;

- /* The initial pushdown of the inputs array into the drivers */
- _mesa_set_drawing_arrays(ctx, st->draw_arrays.inputs);
- _vbo_update_inputs(ctx, &st->draw_arrays);
-
assert(stride);
prepare_draw(st, ctx);
--
2.14.3
M***@gmx.net
2018-05-07 06:14:59 UTC
Permalink
From: Mathias Fröhlich <***@web.de>

Instead of playing with Array._DrawArrays, make the feedback draw
path use Array._DrawVAO. Also st_RasterPos needs to use the VAO then.

v2: Use helper methods to get the offset values for array and binding.
Update comments.

Signed-off-by: Mathias Fröhlich <***@web.de>
---
src/mesa/state_tracker/st_cb_rasterpos.c | 43 ++++++++++-------------------
src/mesa/state_tracker/st_draw_feedback.c | 46 ++++++-------------------------
2 files changed, 24 insertions(+), 65 deletions(-)

diff --git a/src/mesa/state_tracker/st_cb_rasterpos.c b/src/mesa/state_tracker/st_cb_rasterpos.c
index b73d543653..cf4718f8cb 100644
--- a/src/mesa/state_tracker/st_cb_rasterpos.c
+++ b/src/mesa/state_tracker/st_cb_rasterpos.c
@@ -38,9 +38,11 @@

#include "main/imports.h"
#include "main/macros.h"
+#include "main/arrayobj.h"
#include "main/feedback.h"
#include "main/rastpos.h"
-#include "glformats.h"
+#include "main/state.h"
+#include "main/varray.h"

#include "st_context.h"
#include "st_atom.h"
@@ -61,9 +63,7 @@ struct rastpos_stage
struct gl_context *ctx; /**< Rendering context */

/* vertex attrib info we can setup once and re-use */
- struct gl_vertex_buffer_binding binding;
- struct gl_array_attributes attrib[VERT_ATTRIB_MAX];
- struct gl_vertex_array array[VERT_ATTRIB_MAX];
+ struct gl_vertex_array_object *VAO;
struct _mesa_prim prim;
};

@@ -103,6 +103,8 @@ rastpos_line( struct draw_stage *stage, struct prim_header *prim )
static void
rastpos_destroy(struct draw_stage *stage)
{
+ struct rastpos_stage *rstage = (struct rastpos_stage*)stage;
+ _mesa_reference_vao(rstage->ctx, &rstage->VAO, NULL);
free(stage);
}

@@ -182,8 +184,6 @@ static struct rastpos_stage *
new_draw_rastpos_stage(struct gl_context *ctx, struct draw_context *draw)
{
struct rastpos_stage *rs = ST_CALLOC_STRUCT(rastpos_stage);
- GLuint i;
- GLuint elementSize;

rs->stage.draw = draw;
rs->stage.next = NULL;
@@ -196,20 +196,11 @@ new_draw_rastpos_stage(struct gl_context *ctx, struct draw_context *draw)
rs->stage.destroy = rastpos_destroy;
rs->ctx = ctx;

- rs->binding.Stride = 0;
- rs->binding.BufferObj = NULL;
-
- elementSize = _mesa_bytes_per_vertex_attrib(4, GL_FLOAT);
- for (i = 0; i < ARRAY_SIZE(rs->array); i++) {
- rs->attrib[i].Size = 4;
- rs->attrib[i].Type = GL_FLOAT;
- rs->attrib[i].Format = GL_RGBA;
- rs->attrib[i].Ptr = (GLubyte *) ctx->Current.Attrib[i];
- rs->attrib[i].Normalized = GL_TRUE;
- rs->attrib[i]._ElementSize = elementSize;
- rs->array[i].BufferBinding = &rs->binding;
- rs->array[i].VertexAttrib = &rs->attrib[i];
- }
+ rs->VAO = _mesa_new_vao(ctx, ~((GLuint)0));
+ _mesa_vertex_attrib_binding(ctx, rs->VAO, VERT_ATTRIB_POS, 0, false);
+ _mesa_update_array_format(ctx, rs->VAO, VERT_ATTRIB_POS, 4, GL_FLOAT,
+ GL_RGBA, GL_FALSE, GL_FALSE, GL_FALSE, 0);
+ _mesa_enable_vertex_array_attrib(ctx, rs->VAO, 0, false);

rs->prim.mode = GL_POINTS;
rs->prim.indexed = 0;
@@ -229,7 +220,6 @@ st_RasterPos(struct gl_context *ctx, const GLfloat v[4])
struct st_context *st = st_context(ctx);
struct draw_context *draw = st_get_draw_context(st);
struct rastpos_stage *rs;
- const struct gl_vertex_array *saved_arrays = ctx->Array._DrawArrays;

if (!st->draw)
return;
@@ -265,16 +255,13 @@ st_RasterPos(struct gl_context *ctx, const GLfloat v[4])
/* All vertex attribs but position were previously initialized above.
* Just plug in position pointer now.
*/
- rs->attrib[0].Ptr = (GLubyte *) v;
+ rs->VAO->VertexAttrib[VERT_ATTRIB_POS].Ptr = (GLubyte *) v;
+ rs->VAO->NewArrays |= VERT_BIT_POS;
+ _mesa_set_draw_vao(ctx, rs->VAO, VERT_BIT_POS);

- /* Draw the point.
- *
- * Don't set DriverFlags.NewArray.
- * st_feedback_draw_vbo doesn't check for that flag. */
- ctx->Array._DrawArrays = rs->array;
+ /* Draw the point. */
st_feedback_draw_vbo(ctx, &rs->prim, 1, NULL, GL_TRUE, 0, 1,
NULL, 0, NULL);
- ctx->Array._DrawArrays = saved_arrays;

/* restore draw's rasterization stage depending on rendermode */
if (ctx->RenderMode == GL_FEEDBACK) {
diff --git a/src/mesa/state_tracker/st_draw_feedback.c b/src/mesa/state_tracker/st_draw_feedback.c
index fa96b4e2e2..eb05ac9669 100644
--- a/src/mesa/state_tracker/st_draw_feedback.c
+++ b/src/mesa/state_tracker/st_draw_feedback.c
@@ -26,6 +26,7 @@
**************************************************************************/

#include "main/imports.h"
+#include "main/arrayobj.h"
#include "main/image.h"
#include "main/macros.h"
#include "main/varray.h"
@@ -131,9 +132,7 @@ st_feedback_draw_vbo(struct gl_context *ctx,
struct pipe_vertex_element velements[PIPE_MAX_ATTRIBS];
struct pipe_transfer *vb_transfer[PIPE_MAX_ATTRIBS] = {NULL};
struct pipe_transfer *ib_transfer = NULL;
- const struct gl_vertex_array *arrays = ctx->Array._DrawArrays;
GLuint attr, i;
- const GLubyte *low_addr = NULL;
const void *mapped_indices = NULL;

if (!draw)
@@ -168,56 +167,28 @@ st_feedback_draw_vbo(struct gl_context *ctx,
draw_bind_vertex_shader(draw, st->vp_variant->draw_shader);
set_feedback_vertex_format(ctx);

- /* Find the lowest address of the arrays we're drawing */
- if (vp->num_inputs) {
- const struct gl_vertex_array *array;
- const struct gl_vertex_buffer_binding *binding;
- const struct gl_array_attributes *attrib;
- array = &arrays[vp->index_to_input[0]];
- binding = array->BufferBinding;
- attrib = array->VertexAttrib;
-
- low_addr = _mesa_vertex_attrib_address(attrib, binding);
-
- for (attr = 1; attr < vp->num_inputs; attr++) {
- const GLubyte *start;
- array = &arrays[vp->index_to_input[attr]];
- binding = array->BufferBinding;
- attrib = array->VertexAttrib;
- start = _mesa_vertex_attrib_address(attrib, binding);
- low_addr = MIN2(low_addr, start);
- }
- }
-
/* loop over TGSI shader inputs to determine vertex buffer
* and attribute info
*/
for (attr = 0; attr < vp->num_inputs; attr++) {
const GLuint mesaAttr = vp->index_to_input[attr];
- const struct gl_vertex_array *array = &arrays[mesaAttr];
const struct gl_vertex_buffer_binding *binding;
const struct gl_array_attributes *attrib;
- struct gl_buffer_object *bufobj;
void *map;

- binding = array->BufferBinding;
- attrib = array->VertexAttrib;
- bufobj = binding->BufferObj;
+ _mesa_draw_attrib_and_binding(ctx, mesaAttr, &attrib, &binding);

- if (bufobj && bufobj->Name) {
- /* Attribute data is in a VBO.
- * Recall that for VBOs, the gl_vertex_array->Ptr field is
- * really an offset from the start of the VBO, not a pointer.
- */
- struct st_buffer_object *stobj = st_buffer_object(bufobj);
+ if (_mesa_is_bufferobj(binding->BufferObj)) {
+ /* Attribute data is in a VBO. */
+ struct st_buffer_object *stobj = st_buffer_object(binding->BufferObj);
assert(stobj->buffer);

vbuffers[attr].buffer.resource = NULL;
vbuffers[attr].is_user_buffer = false;
pipe_resource_reference(&vbuffers[attr].buffer.resource, stobj->buffer);
- vbuffers[attr].buffer_offset = pointer_to_offset(low_addr);
- velements[attr].src_offset = binding->Offset
- + attrib->RelativeOffset - pointer_to_offset(low_addr);
+ vbuffers[attr].buffer_offset = _mesa_draw_binding_offset(binding);
+ velements[attr].src_offset =
+ _mesa_draw_attributes_relative_offset(attrib);

/* map the attrib buffer */
map = pipe_buffer_map(pipe, vbuffers[attr].buffer.resource,
@@ -227,6 +198,7 @@ st_feedback_draw_vbo(struct gl_context *ctx,
vbuffers[attr].buffer.resource->width0);
}
else {
+ /* Attribute data is in a user space array. */
vbuffers[attr].buffer.user = attrib->Ptr;
vbuffers[attr].is_user_buffer = true;
vbuffers[attr].buffer_offset = 0;
--
2.14.3
M***@gmx.net
2018-05-07 06:15:02 UTC
Permalink
From: Mathias Fröhlich <***@web.de>

For now store binding and attrib in brw_vertex_element.
The i965 driver still provides lots of opportunity to make use
of the unique binding information in the VAO which is currently not
taken from the VAO.

Signed-off-by: Mathias Fröhlich <***@web.de>
---
src/mesa/drivers/dri/i965/brw_context.h | 3 ++-
src/mesa/drivers/dri/i965/brw_draw.c | 25 +++++++++------------
src/mesa/drivers/dri/i965/brw_draw_upload.c | 31 +++++++++++++--------------
src/mesa/drivers/dri/i965/genX_state_upload.c | 12 ++++-------
4 files changed, 31 insertions(+), 40 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index 23406816a9..02f3ad9772 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -440,7 +440,8 @@ struct brw_vertex_buffer {
GLuint step_rate;
};
struct brw_vertex_element {
- const struct gl_vertex_array *glarray;
+ const struct gl_array_attributes *glattrib;
+ const struct gl_vertex_buffer_binding *glbinding;

int buffer;
bool is_dual_slot;
diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c
index c5f04264fa..3b47dc7b26 100644
--- a/src/mesa/drivers/dri/i965/brw_draw.c
+++ b/src/mesa/drivers/dri/i965/brw_draw.c
@@ -278,8 +278,7 @@ brw_emit_prim(struct brw_context *brw,


static void
-brw_merge_inputs(struct brw_context *brw,
- const struct gl_vertex_array *arrays)
+brw_merge_inputs(struct brw_context *brw)
{
const struct gen_device_info *devinfo = &brw->screen->devinfo;
const struct gl_context *ctx = &brw->ctx;
@@ -292,8 +291,10 @@ brw_merge_inputs(struct brw_context *brw,
brw->vb.nr_buffers = 0;

for (i = 0; i < VERT_ATTRIB_MAX; i++) {
- brw->vb.inputs[i].buffer = -1;
- brw->vb.inputs[i].glarray = &arrays[i];
+ struct brw_vertex_element *input = &brw->vb.inputs[i];
+ input->buffer = -1;
+ _mesa_draw_attrib_and_binding(ctx, i,
+ &input->glattrib, &input->glbinding);
}

if (devinfo->gen < 8 && !devinfo->is_haswell) {
@@ -306,7 +307,7 @@ brw_merge_inputs(struct brw_context *brw,
uint8_t wa_flags = 0;

i = u_bit_scan64(&mask);
- glattrib = brw->vb.inputs[i].glarray->VertexAttrib;
+ glattrib = brw->vb.inputs[i].glattrib;

switch (glattrib->Type) {

@@ -693,7 +694,6 @@ brw_postdraw_reconcile_align_wa_slices(struct brw_context *brw)

static void
brw_prepare_drawing(struct gl_context *ctx,
- const struct gl_vertex_array *arrays,
const struct _mesa_index_buffer *ib,
bool index_bounds_valid,
GLuint min_index,
@@ -746,7 +746,7 @@ brw_prepare_drawing(struct gl_context *ctx,

/* Bind all inputs, derive varying and size information:
*/
- brw_merge_inputs(brw, arrays);
+ brw_merge_inputs(brw);

brw->ib.ib = ib;
brw->ctx.NewDriverState |= BRW_NEW_INDICES;
@@ -780,7 +780,6 @@ brw_finish_drawing(struct gl_context *ctx)
*/
static void
brw_draw_single_prim(struct gl_context *ctx,
- const struct gl_vertex_array *arrays,
const struct _mesa_prim *prim,
unsigned prim_id,
struct brw_transform_feedback_object *xfb_obj,
@@ -811,7 +810,7 @@ brw_draw_single_prim(struct gl_context *ctx,
brw->baseinstance = prim->base_instance;
if (prim_id > 0) { /* For i == 0 we just did this before the loop */
brw->ctx.NewDriverState |= BRW_NEW_VERTICES;
- brw_merge_inputs(brw, arrays);
+ brw_merge_inputs(brw);
}
}

@@ -933,14 +932,12 @@ brw_draw_prims(struct gl_context *ctx,
{
unsigned i;
struct brw_context *brw = brw_context(ctx);
- const struct gl_vertex_array *arrays;
int predicate_state = brw->predicate.state;
struct brw_transform_feedback_object *xfb_obj =
(struct brw_transform_feedback_object *) gl_xfb_obj;

/* The initial pushdown of the inputs array into the drivers */
_mesa_set_drawing_arrays(ctx, brw->vb.draw_arrays.inputs);
- arrays = ctx->Array._DrawArrays;
_vbo_update_inputs(ctx, &brw->vb.draw_arrays);

if (!brw_check_conditional_render(brw))
@@ -976,8 +973,7 @@ brw_draw_prims(struct gl_context *ctx,
index_bounds_valid = true;
}

- brw_prepare_drawing(ctx, arrays, ib, index_bounds_valid, min_index,
- max_index);
+ brw_prepare_drawing(ctx, ib, index_bounds_valid, min_index, max_index);
/* Try drawing with the hardware, but don't do anything else if we can't
* manage it. swrast doesn't support our featureset, so we can't fall back
* to it.
@@ -1014,8 +1010,7 @@ brw_draw_prims(struct gl_context *ctx,
brw->predicate.state = BRW_PREDICATE_STATE_USE_BIT;
}

- brw_draw_single_prim(ctx, arrays, &prims[i], i, xfb_obj, stream,
- indirect);
+ brw_draw_single_prim(ctx, &prims[i], i, xfb_obj, stream, indirect);
}

brw_finish_drawing(ctx);
diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c b/src/mesa/drivers/dri/i965/brw_draw_upload.c
index b1be269ca9..bc9b2566de 100644
--- a/src/mesa/drivers/dri/i965/brw_draw_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c
@@ -23,6 +23,7 @@
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/

+#include "main/arrayobj.h"
#include "main/bufferobj.h"
#include "main/context.h"
#include "main/enums.h"
@@ -403,9 +404,8 @@ copy_array_to_vbo_array(struct brw_context *brw,
struct brw_vertex_buffer *buffer,
GLuint dst_stride)
{
- const struct gl_vertex_array *glarray = element->glarray;
- const struct gl_vertex_buffer_binding *glbinding = glarray->BufferBinding;
- const struct gl_array_attributes *glattrib = glarray->VertexAttrib;
+ const struct gl_vertex_buffer_binding *glbinding = element->glbinding;
+ const struct gl_array_attributes *glattrib = element->glattrib;
const int src_stride = glbinding->Stride;

/* If the source stride is zero, we just want to upload the current
@@ -512,15 +512,15 @@ brw_prepare_vertices(struct brw_context *brw)

for (i = j = 0; i < brw->vb.nr_enabled; i++) {
struct brw_vertex_element *input = brw->vb.enabled[i];
- const struct gl_vertex_array *glarray = input->glarray;
- const struct gl_vertex_buffer_binding *glbinding = glarray->BufferBinding;
- const struct gl_array_attributes *glattrib = glarray->VertexAttrib;
+ const struct gl_vertex_buffer_binding *glbinding = input->glbinding;
+ const struct gl_array_attributes *glattrib = input->glattrib;

if (_mesa_is_bufferobj(glbinding->BufferObj)) {
struct intel_buffer_object *intel_buffer =
intel_buffer_object(glbinding->BufferObj);

- const uint32_t offset = glbinding->Offset + glattrib->RelativeOffset;
+ const uint32_t offset = _mesa_draw_binding_offset(glbinding) +
+ _mesa_draw_attributes_relative_offset(glattrib);

/* Start with the worst case */
uint32_t start = 0;
@@ -546,10 +546,11 @@ brw_prepare_vertices(struct brw_context *brw)
*/
unsigned k;
for (k = 0; k < i; k++) {
- const struct gl_vertex_array *other = brw->vb.enabled[k]->glarray;
- const struct gl_vertex_buffer_binding *obind = other->BufferBinding;
- const struct gl_array_attributes *oattrib = other->VertexAttrib;
- const uint32_t ooffset = obind->Offset + oattrib->RelativeOffset;
+ struct brw_vertex_element *other = brw->vb.enabled[k];
+ const struct gl_vertex_buffer_binding *obind = other->glbinding;
+ const struct gl_array_attributes *oattrib = other->glattrib;
+ const uint32_t ooffset = _mesa_draw_binding_offset(obind) +
+ _mesa_draw_attributes_relative_offset(oattrib);
if (glbinding->BufferObj == obind->BufferObj &&
glbinding->Stride == obind->Stride &&
glbinding->InstanceDivisor == obind->InstanceDivisor &&
@@ -658,8 +659,7 @@ brw_prepare_vertices(struct brw_context *brw)
buffer->step_rate = 0;

for (i = 0; i < nr_uploads; i++) {
- const struct gl_vertex_array *glarray = upload[i]->glarray;
- const struct gl_array_attributes *glattrib = glarray->VertexAttrib;
+ const struct gl_array_attributes *glattrib = upload[i]->glattrib;
/* Then, just point upload[i] at upload[0]'s buffer. */
upload[i]->offset = ((const unsigned char *)glattrib->Ptr - ptr);
upload[i]->buffer = j;
@@ -672,9 +672,8 @@ brw_prepare_vertices(struct brw_context *brw)
/* Upload non-interleaved arrays */
for (i = 0; i < nr_uploads; i++) {
struct brw_vertex_buffer *buffer = &brw->vb.buffers[j];
- const struct gl_vertex_array *glarray = upload[i]->glarray;
- const struct gl_vertex_buffer_binding *glbinding = glarray->BufferBinding;
- const struct gl_array_attributes *glattrib = glarray->VertexAttrib;
+ const struct gl_vertex_buffer_binding *glbinding = upload[i]->glbinding;
+ const struct gl_array_attributes *glattrib = upload[i]->glattrib;
if (glbinding->InstanceDivisor == 0) {
copy_array_to_vbo_array(brw, upload[i], min_index, max_index,
buffer, glattrib->_ElementSize);
diff --git a/src/mesa/drivers/dri/i965/genX_state_upload.c b/src/mesa/drivers/dri/i965/genX_state_upload.c
index b1867c1a1c..b65c95af51 100644
--- a/src/mesa/drivers/dri/i965/genX_state_upload.c
+++ b/src/mesa/drivers/dri/i965/genX_state_upload.c
@@ -560,8 +560,7 @@ genX(emit_vertices)(struct brw_context *brw)
*/
for (unsigned i = 0; i < brw->vb.nr_enabled; i++) {
struct brw_vertex_element *input = brw->vb.enabled[i];
- const struct gl_vertex_array *glarray = input->glarray;
- const struct gl_array_attributes *glattrib = glarray->VertexAttrib;
+ const struct gl_array_attributes *glattrib = input->glattrib;
uint32_t format = brw_get_vertex_surface_type(brw, glattrib);

if (uploads_needed(format, input->is_dual_slot) > 1)
@@ -652,8 +651,7 @@ genX(emit_vertices)(struct brw_context *brw)
unsigned i;
for (i = 0; i < brw->vb.nr_enabled; i++) {
const struct brw_vertex_element *input = brw->vb.enabled[i];
- const struct gl_vertex_array *glarray = input->glarray;
- const struct gl_array_attributes *glattrib = glarray->VertexAttrib;
+ const struct gl_array_attributes *glattrib = input->glattrib;
uint32_t format = brw_get_vertex_surface_type(brw, glattrib);
uint32_t comp0 = VFCOMP_STORE_SRC;
uint32_t comp1 = VFCOMP_STORE_SRC;
@@ -695,8 +693,7 @@ genX(emit_vertices)(struct brw_context *brw)
* entry. */
const unsigned offset = input->offset + c * 16;

- const struct gl_vertex_array *glarray = input->glarray;
- const struct gl_array_attributes *glattrib = glarray->VertexAttrib;
+ const struct gl_array_attributes *glattrib = input->glattrib;
const int size = (GEN_GEN < 8 && is_passthru_format(format)) ?
upload_format_size(upload_format) : glattrib->Size;

@@ -818,8 +815,7 @@ genX(emit_vertices)(struct brw_context *brw)

#if GEN_GEN >= 6
if (gen6_edgeflag_input) {
- const struct gl_vertex_array *glarray = gen6_edgeflag_input->glarray;
- const struct gl_array_attributes *glattrib = glarray->VertexAttrib;
+ const struct gl_array_attributes *glattrib = gen6_edgeflag_input->glattrib;
const uint32_t format = brw_get_vertex_surface_type(brw, glattrib);

struct GENX(VERTEX_ELEMENT_STATE) elem_state = {
--
2.14.3
M***@gmx.net
2018-05-07 06:15:03 UTC
Permalink
From: Mathias Fröhlich <***@web.de>

Was meant to be temporary in i965.

Signed-off-by: Mathias Fröhlich <***@web.de>
---
src/mesa/drivers/dri/i965/brw_context.h | 4 ----
src/mesa/drivers/dri/i965/brw_draw.c | 7 -------
2 files changed, 11 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index 02f3ad9772..fdc62eae5c 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -37,7 +37,6 @@
#include "main/macros.h"
#include "main/mtypes.h"
#include "main/errors.h"
-#include "vbo/vbo.h"
#include "brw_structs.h"
#include "brw_pipe_control.h"
#include "compiler/brw_compiler.h"
@@ -980,9 +979,6 @@ struct brw_context
* These bitfields indicate which workarounds are needed.
*/
uint8_t attrib_wa_flags[VERT_ATTRIB_MAX];
-
- /* For the initial pushdown, keep the list of vbo inputs. */
- struct vbo_inputs draw_arrays;
} vb;

struct {
diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c
index 3b47dc7b26..ae3b7be2dd 100644
--- a/src/mesa/drivers/dri/i965/brw_draw.c
+++ b/src/mesa/drivers/dri/i965/brw_draw.c
@@ -936,10 +936,6 @@ brw_draw_prims(struct gl_context *ctx,
struct brw_transform_feedback_object *xfb_obj =
(struct brw_transform_feedback_object *) gl_xfb_obj;

- /* The initial pushdown of the inputs array into the drivers */
- _mesa_set_drawing_arrays(ctx, brw->vb.draw_arrays.inputs);
- _vbo_update_inputs(ctx, &brw->vb.draw_arrays);
-
if (!brw_check_conditional_render(brw))
return;

@@ -1078,9 +1074,6 @@ brw_init_draw_functions(struct dd_function_table *functions)
void
brw_draw_init(struct brw_context *brw)
{
- /* Keep our list of gl_vertex_array inputs */
- _vbo_init_inputs(&brw->vb.draw_arrays);
-
for (int i = 0; i < VERT_ATTRIB_MAX; i++)
brw->vb.inputs[i].buffer = -1;
brw->vb.nr_buffers = 0;
--
2.14.3
M***@gmx.net
2018-05-07 06:14:55 UTC
Permalink
From: Mathias Fröhlich <***@web.de>

Compute VAO buffer binding information past the position/generic0 mapping.
Scan for duplicate buffer bindings and collapse them into derived
effective buffer binding index and effective attribute mask variables.
Provide a set of helper functions to access the distilled
information in the VAO. All of them prefixed with _mesa_draw_...
to indicate that they are meant to query draw information.

v2: Also group user space arrays containing interleaved arrays.
Add _Eff*Offset to be copied on attribute and binding copy.
Update comments.

Signed-off-by: Mathias Fröhlich <***@web.de>
---
src/mesa/main/arrayobj.c | 390 ++++++++++++++++++++++++++++++++++++++++++++-
src/mesa/main/arrayobj.h | 186 +++++++++++++++++++++
src/mesa/main/attrib.c | 1 +
src/mesa/main/mtypes.h | 64 ++++++++
src/mesa/main/varray.c | 9 ++
src/mesa/vbo/vbo.h | 8 +
src/mesa/vbo/vbo_context.c | 17 ++
src/mesa/vbo/vbo_private.h | 7 +-
8 files changed, 675 insertions(+), 7 deletions(-)

diff --git a/src/mesa/main/arrayobj.c b/src/mesa/main/arrayobj.c
index 899d4dec01..05af50ef40 100644
--- a/src/mesa/main/arrayobj.c
+++ b/src/mesa/main/arrayobj.c
@@ -451,8 +451,116 @@ _mesa_initialize_vao(struct gl_context *ctx,


/**
- * Updates the derived gl_vertex_arrays when a gl_array_attributes
- * or a gl_vertex_buffer_binding has changed.
+ * Compute the offset range for the provided binding.
+ *
+ * This is a helper function for the below.
+ */
+static void
+compute_vbo_offset_range(const struct gl_vertex_array_object *vao,
+ const struct gl_vertex_buffer_binding *binding,
+ GLsizeiptr* min, GLsizeiptr* max)
+{
+ /* The function is meant to work on VBO bindings */
+ assert(_mesa_is_bufferobj(binding->BufferObj));
+
+ /* Start with an inverted range of relative offsets. */
+ GLuint min_offset = ~(GLuint)0;
+ GLuint max_offset = 0;
+
+ /* We work on the unmapped originaly VAO array entries. */
+ GLbitfield mask = vao->_Enabled & binding->_BoundArrays;
+ /* The binding should be active somehow, not to return inverted ranges */
+ assert(mask);
+ while (mask) {
+ const int i = u_bit_scan(&mask);
+ const GLuint off = vao->VertexAttrib[i].RelativeOffset;
+ min_offset = MIN2(off, min_offset);
+ max_offset = MAX2(off, max_offset);
+ }
+
+ *min = binding->Offset + (GLsizeiptr)min_offset;
+ *max = binding->Offset + (GLsizeiptr)max_offset;
+}
+
+
+/**
+ * Update the unique binding and pos/generic0 map tracking in the vao.
+ *
+ * The idea is to build up information in the vao so that a consuming
+ * backend can execute the following to set up buffer and vertex element
+ * information:
+ *
+ * const GLbitfield inputs_read = VERT_BIT_ALL; // backend vp inputs
+ *
+ * // Attribute data is in a VBO.
+ * GLbitfield vbomask = inputs_read & _mesa_draw_vbo_array_bits(ctx);
+ * while (vbomask) {
+ * // The attribute index to start pulling a binding
+ * const gl_vert_attrib i = ffs(vbomask) - 1;
+ * const struct gl_vertex_buffer_binding *const binding
+ * = _mesa_draw_buffer_binding(vao, i);
+ *
+ * <insert code to handle the vertex buffer object at binding>
+ *
+ * const GLbitfield boundmask = _mesa_draw_bound_attrib_bits(binding);
+ * GLbitfield attrmask = vbomask & boundmask;
+ * assert(attrmask);
+ * // Walk attributes belonging to the binding
+ * while (attrmask) {
+ * const gl_vert_attrib attr = u_bit_scan(&attrmask);
+ * const struct gl_array_attributes *const attrib
+ * = _mesa_draw_array_attrib(vao, attr);
+ *
+ * <insert code to handle the vertex element refering to the binding>
+ * }
+ * vbomask &= ~boundmask;
+ * }
+ *
+ * // Process user space buffers
+ * GLbitfield usermask = inputs_read & _mesa_draw_user_array_bits(ctx);
+ * while (usermask) {
+ * // The attribute index to start pulling a binding
+ * const gl_vert_attrib i = ffs(usermask) - 1;
+ * const struct gl_vertex_buffer_binding *const binding
+ * = _mesa_draw_buffer_binding(vao, i);
+ *
+ * <insert code to handle a set of interleaved user space arrays at binding>
+ *
+ * const GLbitfield boundmask = _mesa_draw_bound_attrib_bits(binding);
+ * GLbitfield attrmask = usermask & boundmask;
+ * assert(attrmask);
+ * // Walk interleaved attributes with a common stride and instance divisor
+ * while (attrmask) {
+ * const gl_vert_attrib attr = u_bit_scan(&attrmask);
+ * const struct gl_array_attributes *const attrib
+ * = _mesa_draw_array_attrib(vao, attr);
+ *
+ * <insert code to handle non vbo vertex arrays>
+ * }
+ * usermask &= ~boundmask;
+ * }
+ *
+ * // Process values that should have better been uniforms in the application
+ * GLbitfield curmask = inputs_read & _mesa_draw_current_bits(ctx);
+ * while (curmask) {
+ * const gl_vert_attrib attr = u_bit_scan(&curmask);
+ * const struct gl_array_attributes *const attrib
+ * = _mesa_draw_current_attrib(ctx, attr);
+ *
+ * <insert code to handle current values>
+ * }
+ *
+ *
+ * Note that the scan below must not incoporate any context state.
+ * The rationale is that once a VAO is finalized it should not
+ * be touched anymore. That means, do not incorporate the
+ * gl_context::Array._DrawVAOEnabledAttribs bitmask into this scan.
+ * A backend driver may further reduce the handled vertex processing
+ * inputs based on their vertex shader inputs. But scanning for
+ * collapsable binding points to reduce relocs is done based on the
+ * enabled arrays.
+ * Also VAOs may be shared between contexts due to their use in dlists
+ * thus no context state should bleed into the VAO.
*/
void
_mesa_update_vao_derived_arrays(struct gl_context *ctx,
@@ -461,11 +569,281 @@ _mesa_update_vao_derived_arrays(struct gl_context *ctx,
/* Make sure we do not run into problems with shared objects */
assert(!vao->SharedAndImmutable || vao->NewArrays == 0);

- /*
- * Stay tuned, the next series scans for duplicate bindings in this
- * function. So that drivers can easily know the minimum unique set
- * of bindings.
+ /* Limit used for common binding scanning below. */
+ const GLsizeiptr MaxRelativeOffset =
+ ctx->Const.MaxVertexAttribRelativeOffset;
+
+ /* The gl_vertex_array_object::_AttributeMapMode denotes the way
+ * VERT_ATTRIB_{POS,GENERIC0} mapping is done.
+ *
+ * This mapping is used to map between the OpenGL api visible
+ * VERT_ATTRIB_* arrays to mesa driver arrayinputs or shader inputs.
+ * The mapping only depends on the enabled bits of the
+ * VERT_ATTRIB_{POS,GENERIC0} arrays and is tracked in the VAO.
+ *
+ * This map needs to be applied when finally translating to the bitmasks
+ * as consumed by the driver backends. The duplicate scanning is here
+ * can as well be done in the OpenGL API numbering without this map.
*/
+ const gl_attribute_map_mode mode = vao->_AttributeMapMode;
+ /* Enabled array bits. */
+ const GLbitfield enabled = vao->_Enabled;
+ /* VBO array bits. */
+ const GLbitfield vbos = vao->VertexAttribBufferMask;
+
+ /* Compute and store effectively enabled and mapped vbo arrays */
+ vao->_EffEnabledVBO = _mesa_vao_enable_to_vp_inputs(mode, enabled & vbos);
+ /* Walk those enabled arrays that have a real vbo attached */
+ GLbitfield mask = enabled;
+ while (mask) {
+ /* Do not use u_bit_scan as we can walk multiple attrib arrays at once */
+ const int i = ffs(mask) - 1;
+ /* The binding from the first to be processed attribute. */
+ const GLuint bindex = vao->VertexAttrib[i].BufferBindingIndex;
+ struct gl_vertex_buffer_binding *binding = &vao->BufferBinding[bindex];
+
+ /* The scan goes different for user space arrays than vbos */
+ if (_mesa_is_bufferobj(binding->BufferObj)) {
+ /* The bound arrays. */
+ const GLbitfield bound = enabled & binding->_BoundArrays;
+
+ /* Start this current effective binding with the actual bound arrays */
+ GLbitfield eff_bound_arrays = bound;
+
+ /*
+ * If there is nothing left to scan just update the effective binding
+ * information. If the VAO is already only using a single binding point
+ * we end up here. So the overhead of this scan for an application
+ * carefully preparing the VAO for draw is low.
+ */
+
+ GLbitfield scanmask = mask & vbos & ~bound;
+ /* Is there something left to scan? */
+ if (scanmask == 0) {
+ /* Just update the back reference from the attrib to the binding and
+ * the effective offset.
+ */
+ GLbitfield attrmask = eff_bound_arrays;
+ while (attrmask) {
+ const int j = u_bit_scan(&attrmask);
+ struct gl_array_attributes *attrib2 = &vao->VertexAttrib[j];
+
+ /* Update the index into the common binding point and offset */
+ attrib2->_EffBufferBindingIndex = bindex;
+ attrib2->_EffRelativeOffset = attrib2->RelativeOffset;
+ assert(attrib2->_EffRelativeOffset <= MaxRelativeOffset);
+
+ /* Only enabled arrays shall appear in the unique bindings */
+ assert(attrib2->Enabled);
+ }
+ /* Finally this is the set of effectively bound arrays with the
+ * original binding offset.
+ */
+ binding->_EffOffset = binding->Offset;
+ /* The bound arrays past the VERT_ATTRIB_{POS,GENERIC0} mapping. */
+ binding->_EffBoundArrays =
+ _mesa_vao_enable_to_vp_inputs(mode, eff_bound_arrays);
+
+ } else {
+ /* In the VBO case, scan for attribute/binding
+ * combinations with relative bindings in the range of
+ * [0, ctx->Const.MaxVertexAttribRelativeOffset].
+ * Note that this does also go beyond just interleaved arrays
+ * as long as they use the same VBO, binding parameters and the
+ * offsets stay within bounds that the backend still can handle.
+ */
+
+ GLsizeiptr min_offset, max_offset;
+ compute_vbo_offset_range(vao, binding, &min_offset, &max_offset);
+ assert(max_offset <= min_offset + MaxRelativeOffset);
+
+ /* Now scan. */
+ while (scanmask) {
+ /* Do not use u_bit_scan as we can walk multiple
+ * attrib arrays at once
+ */
+ const int j = ffs(scanmask) - 1;
+ const struct gl_array_attributes *attrib2 =
+ &vao->VertexAttrib[j];
+ const struct gl_vertex_buffer_binding *binding2 =
+ &vao->BufferBinding[attrib2->BufferBindingIndex];
+
+ /* Remove those attrib bits from the mask that are bound to the
+ * same effective binding point.
+ */
+ const GLbitfield bound2 = enabled & binding2->_BoundArrays;
+ scanmask &= ~bound2;
+
+ /* Check if we have an identical binding */
+ if (binding->Stride != binding2->Stride)
+ continue;
+ if (binding->InstanceDivisor != binding2->InstanceDivisor)
+ continue;
+ if (binding->BufferObj != binding2->BufferObj)
+ continue;
+ /* Check if we can fold both bindings into a common binding */
+ GLsizeiptr min_offset2, max_offset2;
+ compute_vbo_offset_range(vao, binding2,
+ &min_offset2, &max_offset2);
+ /* If the relative offset is within the limits ... */
+ if (min_offset + MaxRelativeOffset < max_offset2)
+ continue;
+ if (min_offset2 + MaxRelativeOffset < max_offset)
+ continue;
+ /* ... add this array to the effective binding */
+ eff_bound_arrays |= bound2;
+ min_offset = MIN2(min_offset, min_offset2);
+ max_offset = MAX2(max_offset, max_offset2);
+ assert(max_offset <= min_offset + MaxRelativeOffset);
+ }
+
+ /* Update the back reference from the attrib to the binding */
+ GLbitfield attrmask = eff_bound_arrays;
+ while (attrmask) {
+ const int j = u_bit_scan(&attrmask);
+ struct gl_array_attributes *attrib2 = &vao->VertexAttrib[j];
+ const struct gl_vertex_buffer_binding *binding2 =
+ &vao->BufferBinding[attrib2->BufferBindingIndex];
+
+ /* Update the index into the common binding point and offset */
+ attrib2->_EffBufferBindingIndex = bindex;
+ attrib2->_EffRelativeOffset =
+ binding2->Offset + attrib2->RelativeOffset - min_offset;
+ assert(attrib2->_EffRelativeOffset <= MaxRelativeOffset);
+
+ /* Only enabled arrays shall appear in the unique bindings */
+ assert(attrib2->Enabled);
+ }
+ /* Finally this is the set of effectively bound arrays */
+ binding->_EffOffset = min_offset;
+ /* The bound arrays past the VERT_ATTRIB_{POS,GENERIC0} mapping. */
+ binding->_EffBoundArrays =
+ _mesa_vao_enable_to_vp_inputs(mode, eff_bound_arrays);
+ }
+
+ /* Mark all the effective bound arrays as processed. */
+ mask &= ~eff_bound_arrays;
+
+ } else {
+ /* Scanning of common bindings for user space arrays.
+ */
+
+ const struct gl_array_attributes *attrib = &vao->VertexAttrib[i];
+ const GLbitfield bound = VERT_BIT(i);
+
+ /* Note that user space array pointers can only happen using a one
+ * to one binding point to array mapping.
+ * The OpenGL 4.x/ARB_vertex_attrib_binding api does not support
+ * user space arrays collected at multiple binding points.
+ * The only provider of user space interleaved arrays with a single
+ * binding point is the mesa internal vbo module. But that one
+ * provides a perfect interleaved set of arrays.
+ *
+ * If this would not be true we would potentially get attribute arrays
+ * with user space pointers that may not lie within the
+ * MaxRelativeOffset range but still attached to a single binding.
+ * Then we would need to store the effective attribute and binding
+ * grouping information in a seperate array beside
+ * gl_array_attributes/gl_vertex_buffer_binding.
+ */
+ assert(_mesa_bitcount(binding->_BoundArrays & vao->_Enabled) == 1
+ || (vao->_Enabled & ~binding->_BoundArrays) == 0);
+
+ /* Start this current effective binding with the array */
+ GLbitfield eff_bound_arrays = bound;
+
+ const GLubyte *ptr = attrib->Ptr;
+ unsigned vertex_end = attrib->_ElementSize;
+
+ /* Walk other user space arrays and see which are interleaved
+ * using the same binding parameters.
+ */
+ GLbitfield scanmask = mask & ~vbos & ~bound;
+ while (scanmask) {
+ const int j = u_bit_scan(&scanmask);
+ const struct gl_array_attributes *attrib2 = &vao->VertexAttrib[j];
+ const struct gl_vertex_buffer_binding *binding2 =
+ &vao->BufferBinding[attrib2->BufferBindingIndex];
+
+ /* See the comment at the same assert above. */
+ assert(_mesa_bitcount(binding2->_BoundArrays & vao->_Enabled) == 1
+ || (vao->_Enabled & ~binding->_BoundArrays) == 0);
+
+ /* Check if we have an identical binding */
+ if (binding->Stride != binding2->Stride)
+ continue;
+ if (binding->InstanceDivisor != binding2->InstanceDivisor)
+ continue;
+ if (ptr <= attrib2->Ptr) {
+ if (ptr + binding->Stride < attrib2->Ptr + attrib2->_ElementSize)
+ continue;
+ unsigned end = attrib2->Ptr + attrib2->_ElementSize - ptr;
+ vertex_end = MAX2(vertex_end, end);
+ } else {
+ if (attrib2->Ptr + binding->Stride < ptr + vertex_end)
+ continue;
+ vertex_end += (GLsizei)(ptr - attrib2->Ptr);
+ ptr = attrib2->Ptr;
+ }
+
+ /* User space buffer object */
+ assert(!_mesa_is_bufferobj(binding2->BufferObj));
+
+ eff_bound_arrays |= VERT_BIT(j);
+ }
+
+ /* Update the back reference from the attrib to the binding */
+ GLbitfield attrmask = eff_bound_arrays;
+ while (attrmask) {
+ const int j = u_bit_scan(&attrmask);
+ struct gl_array_attributes *attrib2 = &vao->VertexAttrib[j];
+
+ /* Update the index into the common binding point and the offset */
+ attrib2->_EffBufferBindingIndex = bindex;
+ attrib2->_EffRelativeOffset = attrib2->Ptr - ptr;
+ assert(attrib2->_EffRelativeOffset <= binding->Stride);
+
+ /* Only enabled arrays shall appear in the unique bindings */
+ assert(attrib2->Enabled);
+ }
+ /* Finally this is the set of effectively bound arrays */
+ binding->_EffOffset = (GLintptr)ptr;
+ /* The bound arrays past the VERT_ATTRIB_{POS,GENERIC0} mapping. */
+ binding->_EffBoundArrays =
+ _mesa_vao_enable_to_vp_inputs(mode, eff_bound_arrays);
+
+ /* Mark all the effective bound arrays as processed. */
+ mask &= ~eff_bound_arrays;
+ }
+ }
+
+#ifndef NDEBUG
+ /* Make sure the above code works as expected. */
+ for (gl_vert_attrib attr = 0; attr < VERT_ATTRIB_MAX; ++attr) {
+ /* Query the original api defined attrib/binding information ... */
+ const unsigned char *const map =_mesa_vao_attribute_map[mode];
+ const struct gl_array_attributes *attrib = &vao->VertexAttrib[map[attr]];
+ if (attrib->Enabled) {
+ const struct gl_vertex_buffer_binding *binding =
+ &vao->BufferBinding[attrib->BufferBindingIndex];
+ /* ... and compare that with the computed attrib/binding */
+ const struct gl_vertex_buffer_binding *binding2 =
+ &vao->BufferBinding[attrib->_EffBufferBindingIndex];
+ assert(binding->Stride == binding2->Stride);
+ assert(binding->InstanceDivisor == binding2->InstanceDivisor);
+ assert(binding->BufferObj == binding2->BufferObj);
+ if (_mesa_is_bufferobj(binding->BufferObj)) {
+ assert(attrib->_EffRelativeOffset <= MaxRelativeOffset);
+ assert(binding->Offset + attrib->RelativeOffset ==
+ binding2->_EffOffset + attrib->_EffRelativeOffset);
+ } else {
+ assert(attrib->_EffRelativeOffset < binding->Stride);
+ assert((GLintptr)attrib->Ptr ==
+ binding2->_EffOffset + attrib->_EffRelativeOffset);
+ }
+ }
+ }
+#endif
}


diff --git a/src/mesa/main/arrayobj.h b/src/mesa/main/arrayobj.h
index 8da5c9ffe0..8b11c79bdb 100644
--- a/src/mesa/main/arrayobj.h
+++ b/src/mesa/main/arrayobj.h
@@ -30,6 +30,7 @@
#include "glheader.h"
#include "mtypes.h"
#include "glformats.h"
+#include "vbo/vbo.h"

struct gl_context;

@@ -146,6 +147,191 @@ _mesa_get_vao_vp_inputs(const struct gl_vertex_array_object *vao)
}


+/**
+ * Helper functions for consuming backends to walk the
+ * ctx->Array._DrawVAO for driver side array setup.
+ * Note that mesa provides preprocessed minimal binding information
+ * in the VAO. See _mesa_update_vao_derived_arrays for documentation.
+ */
+
+/**
+ * Return enabled vertex attribute bits for draw.
+ */
+static inline GLbitfield
+_mesa_draw_array_bits(const struct gl_context *ctx)
+{
+ return ctx->Array._DrawVAOEnabledAttribs;
+}
+
+
+/**
+ * Return enabled buffer object vertex attribute bits for draw.
+ *
+ * Needs the a fully updated VAO ready for draw.
+ */
+static inline GLbitfield
+_mesa_draw_vbo_array_bits(const struct gl_context *ctx)
+{
+ const struct gl_vertex_array_object *const vao = ctx->Array._DrawVAO;
+ assert(vao->NewArrays == 0);
+ return vao->_EffEnabledVBO & ctx->Array._DrawVAOEnabledAttribs;
+}
+
+
+/**
+ * Return enabled user space vertex attribute bits for draw.
+ *
+ * Needs the a fully updated VAO ready for draw.
+ */
+static inline GLbitfield
+_mesa_draw_user_array_bits(const struct gl_context *ctx)
+{
+ const struct gl_vertex_array_object *const vao = ctx->Array._DrawVAO;
+ assert(vao->NewArrays == 0);
+ return ~vao->_EffEnabledVBO & ctx->Array._DrawVAOEnabledAttribs;
+}
+
+
+/**
+ * Return enabled current values attribute bits for draw.
+ */
+static inline GLbitfield
+_mesa_draw_current_bits(const struct gl_context *ctx)
+{
+ return ~ctx->Array._DrawVAOEnabledAttribs & VERT_BIT_ALL;
+}
+
+
+/**
+ * Return vertex buffer binding provided the attribute struct.
+ *
+ * Needs the a fully updated VAO ready for draw.
+ */
+static inline const struct gl_vertex_buffer_binding*
+_mesa_draw_buffer_binding_from_attrib(const struct gl_vertex_array_object *vao,
+ const struct gl_array_attributes *attrib)
+{
+ assert(vao->NewArrays == 0);
+ return &vao->BufferBinding[attrib->_EffBufferBindingIndex];
+}
+
+
+/**
+ * Return vertex array attribute provided the attribute number.
+ */
+static inline const struct gl_array_attributes*
+_mesa_draw_array_attrib(const struct gl_vertex_array_object *vao,
+ gl_vert_attrib attr)
+{
+ assert(vao->NewArrays == 0);
+ const gl_attribute_map_mode map_mode = vao->_AttributeMapMode;
+ return &vao->VertexAttrib[_mesa_vao_attribute_map[map_mode][attr]];
+}
+
+
+/**
+ * Return vertex buffer binding provided an attribute number.
+ */
+static inline const struct gl_vertex_buffer_binding*
+_mesa_draw_buffer_binding(const struct gl_vertex_array_object *vao,
+ gl_vert_attrib attr)
+{
+ const struct gl_array_attributes *const attrib
+ = _mesa_draw_array_attrib(vao, attr);
+ return _mesa_draw_buffer_binding_from_attrib(vao, attrib);
+}
+
+
+/**
+ * Return vertex attribute bits bound at the provided binding.
+ *
+ * Needs the a fully updated VAO ready for draw.
+ */
+static inline GLbitfield
+_mesa_draw_bound_attrib_bits(const struct gl_vertex_buffer_binding *binding)
+{
+ return binding->_EffBoundArrays;
+}
+
+
+/**
+ * Return the vertex offset bound at the provided binding.
+ *
+ * Needs the a fully updated VAO ready for draw.
+ */
+static inline GLintptr
+_mesa_draw_binding_offset(const struct gl_vertex_buffer_binding *binding)
+{
+ return binding->_EffOffset;
+}
+
+
+/**
+ * Return the relative offset of the provided attrib.
+ *
+ * Needs the a fully updated VAO ready for draw.
+ */
+static inline GLushort
+_mesa_draw_attributes_relative_offset(const struct gl_array_attributes *attrib)
+{
+ return attrib->_EffRelativeOffset;
+}
+
+
+/**
+ * Return a current value vertex array attribute provided the attribute number.
+ */
+static inline const struct gl_array_attributes*
+_mesa_draw_current_attrib(const struct gl_context *ctx, gl_vert_attrib attr)
+{
+ return _vbo_current_attrib(ctx, attr);
+}
+
+
+/**
+ * Return true if we have the VERT_ATTRIB_EDGEFLAG array enabled.
+ */
+static inline bool
+_mesa_draw_edge_flag_array_enabled(const struct gl_context *ctx)
+{
+ return ctx->Array._DrawVAOEnabledAttribs & VERT_BIT_EDGEFLAG;
+}
+
+
+/**
+ * Return the attrib for the given attribute.
+ */
+static inline const struct gl_array_attributes*
+_mesa_draw_attrib(const struct gl_context *ctx, gl_vert_attrib attr)
+{
+ if (ctx->Array._DrawVAOEnabledAttribs & VERT_BIT(attr)) {
+ const struct gl_vertex_array_object *vao = ctx->Array._DrawVAO;
+ return _mesa_draw_array_attrib(vao, attr);
+ } else {
+ return _vbo_current_attrib(ctx, attr);
+ }
+}
+
+
+/**
+ * Return the attrib, binding pair for the given attribute.
+ */
+static inline void
+_mesa_draw_attrib_and_binding(const struct gl_context *ctx, gl_vert_attrib attr,
+ const struct gl_array_attributes **attrib,
+ const struct gl_vertex_buffer_binding **binding)
+{
+ if (ctx->Array._DrawVAOEnabledAttribs & VERT_BIT(attr)) {
+ const struct gl_vertex_array_object *vao = ctx->Array._DrawVAO;
+ *attrib = _mesa_draw_array_attrib(vao, attr);
+ *binding = _mesa_draw_buffer_binding_from_attrib(vao, *attrib);
+ } else {
+ *attrib = _vbo_current_attrib(ctx, attr);
+ *binding = _vbo_current_binding(ctx);
+ }
+}
+
+
/*
* API functions
*/
diff --git a/src/mesa/main/attrib.c b/src/mesa/main/attrib.c
index 6127a556d7..e565750a89 100644
--- a/src/mesa/main/attrib.c
+++ b/src/mesa/main/attrib.c
@@ -1552,6 +1552,7 @@ copy_array_object(struct gl_context *ctx,

/* _Enabled must be the same than on push */
dest->_Enabled = src->_Enabled;
+ dest->_EffEnabledVBO = src->_EffEnabledVBO;
/* The bitmask of bound VBOs needs to match the VertexBinding array */
dest->VertexAttribBufferMask = src->VertexAttribBufferMask;
dest->_AttributeMapMode = src->_AttributeMapMode;
diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
index 2d3eb457f9..dee917f2e4 100644
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -1426,6 +1426,32 @@ struct gl_array_attributes
unsigned _ElementSize:8; /**< Size of each element in bytes */
/** Index into gl_vertex_array_object::BufferBinding[] array */
unsigned BufferBindingIndex:6;
+
+ /**
+ * Derived effective buffer binding index
+ *
+ * Index into the gl_vertex_buffer_binding array of the vao.
+ * Similar to BufferBindingIndex, but with the mapping of the
+ * position/generic0 attributes applied and with identical
+ * gl_vertex_buffer_binding entries collapsed to a single
+ * entry within the vao.
+ *
+ * The value is valid past calling _mesa_update_vao_derived_arrays.
+ * Note that _mesa_update_vao_derived_arrays is called when binding
+ * the VAO to Array._DrawVAO.
+ */
+ unsigned _EffBufferBindingIndex:6;
+ /**
+ * Derived effective relative offset.
+ *
+ * Relative offset to the effective buffers offset in
+ * gl_vertex_buffer_binding::_EffOffset.
+ *
+ * The value is valid past calling _mesa_update_vao_derived_arrays.
+ * Note that _mesa_update_vao_derived_arrays is called when binding
+ * the VAO to Array._DrawVAO.
+ */
+ GLushort _EffRelativeOffset;
};


@@ -1441,6 +1467,35 @@ struct gl_vertex_buffer_binding
GLuint InstanceDivisor; /**< GL_ARB_instanced_arrays */
struct gl_buffer_object *BufferObj; /**< GL_ARB_vertex_buffer_object */
GLbitfield _BoundArrays; /**< Arrays bound to this binding point */
+
+ /**
+ * Derived effective bound arrays.
+ *
+ * The effective binding handles enabled arrays past the
+ * position/generic0 attribute mapping and reduces the refered
+ * gl_vertex_buffer_binding entries to a unique subset.
+ *
+ * The value is valid past calling _mesa_update_vao_derived_arrays.
+ * Note that _mesa_update_vao_derived_arrays is called when binding
+ * the VAO to Array._DrawVAO.
+ */
+ GLbitfield _EffBoundArrays;
+ /**
+ * Derived offset.
+ *
+ * The absolute offset to that we can collapse some attributes
+ * to this unique effective binding.
+ * For user space array bindings this contains the smalles pointer value
+ * in the bound and interleaved arrays.
+ * For VBO bindings this contains an offset that lets the attributes
+ * _EffRelativeOffset stay positive and in bounds with
+ * Const.MaxVertexAttribRelativeOffset
+ *
+ * The value is valid past calling _mesa_update_vao_derived_arrays.
+ * Note that _mesa_update_vao_derived_arrays is called when binding
+ * the VAO to Array._DrawVAO.
+ */
+ GLintptr _EffOffset;
};


@@ -1495,6 +1550,15 @@ struct gl_vertex_array_object
/** Mask of VERT_BIT_* values indicating which arrays are enabled */
GLbitfield _Enabled;

+ /**
+ * Mask of VERT_BIT_* enabled arrays past position/generic0 mapping
+ *
+ * The value is valid past calling _mesa_update_vao_derived_arrays.
+ * Note that _mesa_update_vao_derived_arrays is called when binding
+ * the VAO to Array._DrawVAO.
+ */
+ GLbitfield _EffEnabledVBO;
+
/** Denotes the way the position/generic0 attribute is mapped */
gl_attribute_map_mode _AttributeMapMode;

diff --git a/src/mesa/main/varray.c b/src/mesa/main/varray.c
index 5df38a14f0..d16807b406 100644
--- a/src/mesa/main/varray.c
+++ b/src/mesa/main/varray.c
@@ -604,6 +604,11 @@ update_array(struct gl_context *ctx,
/* The Stride and Ptr fields are not set by update_array_format() */
struct gl_array_attributes *array = &vao->VertexAttrib[attrib];
array->Stride = stride;
+ /* For updating the pointer we would need to add the vao->NewArrays flag
+ * to the VAO. But but that is done already unconditionally in
+ * _mesa_update_array_format called above.
+ */
+ assert((vao->NewArrays | ~vao->_Enabled) & VERT_BIT(attrib));
array->Ptr = ptr;

/* Update the vertex buffer binding */
@@ -2868,6 +2873,8 @@ _mesa_copy_vertex_attrib_array(struct gl_context *ctx,
dst->Ptr = src->Ptr;
dst->Enabled = src->Enabled;
dst->_ElementSize = src->_ElementSize;
+ dst->_EffBufferBindingIndex = src->_EffBufferBindingIndex;
+ dst->_EffRelativeOffset = src->_EffRelativeOffset;
}

void
@@ -2879,6 +2886,8 @@ _mesa_copy_vertex_buffer_binding(struct gl_context *ctx,
dst->Stride = src->Stride;
dst->InstanceDivisor = src->InstanceDivisor;
dst->_BoundArrays = src->_BoundArrays;
+ dst->_EffBoundArrays = src->_EffBoundArrays;
+ dst->_EffOffset = src->_EffOffset;

_mesa_reference_buffer_object(ctx, &dst->BufferObj, src->BufferObj);
}
diff --git a/src/mesa/vbo/vbo.h b/src/mesa/vbo/vbo.h
index 9b15066291..ca46f9baa7 100644
--- a/src/mesa/vbo/vbo.h
+++ b/src/mesa/vbo/vbo.h
@@ -186,6 +186,14 @@ void
_vbo_update_inputs(struct gl_context *ctx, struct vbo_inputs *inputs);


+const struct gl_array_attributes*
+_vbo_current_attrib(const struct gl_context *ctx, gl_vert_attrib attr);
+
+
+const struct gl_vertex_buffer_binding*
+_vbo_current_binding(const struct gl_context *ctx);
+
+
void GLAPIENTRY
_es_Color4f(GLfloat r, GLfloat g, GLfloat b, GLfloat a);

diff --git a/src/mesa/vbo/vbo_context.c b/src/mesa/vbo/vbo_context.c
index ada78ffd63..f201604de5 100644
--- a/src/mesa/vbo/vbo_context.c
+++ b/src/mesa/vbo/vbo_context.c
@@ -234,6 +234,23 @@ _vbo_DestroyContext(struct gl_context *ctx)
}


+const struct gl_array_attributes*
+_vbo_current_attrib(const struct gl_context *ctx, gl_vert_attrib attr)
+{
+ const struct vbo_context *vbo = vbo_context((struct gl_context *)ctx);
+ const gl_vertex_processing_mode vmp = ctx->VertexProgram._VPMode;
+ return &vbo->current[_vbo_attribute_alias_map[vmp][attr]];
+}
+
+
+const struct gl_vertex_buffer_binding*
+_vbo_current_binding(const struct gl_context *ctx)
+{
+ const struct vbo_context *vbo = vbo_context((struct gl_context *)ctx);
+ return &vbo->binding;
+}
+
+
/*
* Helper function for _vbo_draw_indirect below that additionally takes a zero
* initialized array of _mesa_prim scratch space memory as the last argument.
diff --git a/src/mesa/vbo/vbo_private.h b/src/mesa/vbo/vbo_private.h
index 589c61d675..161762c4d7 100644
--- a/src/mesa/vbo/vbo_private.h
+++ b/src/mesa/vbo/vbo_private.h
@@ -209,7 +209,12 @@ _vbo_set_attrib_format(struct gl_context *ctx,
const GLboolean doubles = vbo_attrtype_to_double_flag(type);
_mesa_update_array_format(ctx, vao, attr, size, type, GL_RGBA,
GL_FALSE, integer, doubles, offset);
- /* Ptr for userspace arrays */
+ /* Ptr for userspace arrays.
+ * For updating the pointer we would need to add the vao->NewArrays flag
+ * to the VAO. But but that is done already unconditionally in
+ * _mesa_update_array_format called above.
+ */
+ assert((vao->NewArrays | ~vao->_Enabled) & VERT_BIT(attr));
vao->VertexAttrib[attr].Ptr = ADD_POINTERS(buffer_offset, offset);
}
--
2.14.3
Mathias Fröhlich
2018-05-09 05:36:09 UTC
Permalink
Hi Paul,
Minor nit-picks below and on patches 4 and 11. I skimmed the i965
changes, but otherwise everything looks OK AFAICT.
Nice work!
I will update the patches.
Thanks for the review!

best
Mathias
M***@gmx.net
2018-05-07 06:15:05 UTC
Permalink
From: Mathias Fröhlich <***@web.de>

The only remaining users of gl_vertex_array are tnl based
drivers. So move everything related to that into tnl and
rename it accordingly.

Signed-off-by: Mathias Fröhlich <***@web.de>
---
src/mesa/drivers/dri/nouveau/nouveau_vbo_t.c | 38 +++++------
src/mesa/main/mtypes.h | 14 ----
src/mesa/main/varray.h | 12 ----
src/mesa/state_tracker/st_draw.h | 1 -
src/mesa/tnl/t_context.c | 4 +-
src/mesa/tnl/t_context.h | 41 +++++++++++-
src/mesa/tnl/t_draw.c | 95 ++++++++++++++++++++++++++--
src/mesa/tnl/t_rebase.c | 4 +-
src/mesa/tnl/t_rebase.h | 2 +-
src/mesa/tnl/t_split.c | 2 +-
src/mesa/tnl/t_split.h | 4 +-
src/mesa/tnl/t_split_copy.c | 34 ++++++----
src/mesa/tnl/t_split_inplace.c | 4 +-
src/mesa/tnl/tnl.h | 24 +++++--
src/mesa/vbo/vbo.h | 37 -----------
src/mesa/vbo/vbo_exec.c | 86 -------------------------
16 files changed, 199 insertions(+), 203 deletions(-)

diff --git a/src/mesa/drivers/dri/nouveau/nouveau_vbo_t.c b/src/mesa/drivers/dri/nouveau/nouveau_vbo_t.c
index d031ebeabd..3900c770cb 100644
--- a/src/mesa/drivers/dri/nouveau/nouveau_vbo_t.c
+++ b/src/mesa/drivers/dri/nouveau/nouveau_vbo_t.c
@@ -41,7 +41,7 @@
* structures. */

static int
-get_array_stride(struct gl_context *ctx, const struct gl_vertex_array *a)
+get_array_stride(struct gl_context *ctx, const struct tnl_vertex_array *a)
{
struct nouveau_render_state *render = to_render_state(ctx);
const struct gl_vertex_buffer_binding *binding = a->BufferBinding;
@@ -57,7 +57,7 @@ get_array_stride(struct gl_context *ctx, const struct gl_vertex_array *a)

static void
vbo_init_arrays(struct gl_context *ctx, const struct _mesa_index_buffer *ib,
- const struct gl_vertex_array *arrays)
+ const struct tnl_vertex_array *arrays)
{
struct nouveau_render_state *render = to_render_state(ctx);
GLboolean imm = (render->mode == IMM);
@@ -78,7 +78,7 @@ vbo_init_arrays(struct gl_context *ctx, const struct _mesa_index_buffer *ib,
}

FOR_EACH_BOUND_ATTR(render, i, attr) {
- const struct gl_vertex_array *array = &arrays[attr];
+ const struct tnl_vertex_array *array = &arrays[attr];
const struct gl_vertex_buffer_binding *binding =
array->BufferBinding;
const struct gl_array_attributes *attrib = array->VertexAttrib;
@@ -94,7 +94,7 @@ vbo_init_arrays(struct gl_context *ctx, const struct _mesa_index_buffer *ib,

static void
vbo_deinit_arrays(struct gl_context *ctx, const struct _mesa_index_buffer *ib,
- const struct gl_vertex_array *arrays)
+ const struct tnl_vertex_array *arrays)
{
struct nouveau_render_state *render = to_render_state(ctx);
int i, attr;
@@ -118,7 +118,7 @@ vbo_deinit_arrays(struct gl_context *ctx, const struct _mesa_index_buffer *ib,
/* Make some rendering decisions from the GL context. */

static void
-vbo_choose_render_mode(struct gl_context *ctx, const struct gl_vertex_array *arrays)
+vbo_choose_render_mode(struct gl_context *ctx, const struct tnl_vertex_array *arrays)
{
struct nouveau_render_state *render = to_render_state(ctx);
int i;
@@ -136,12 +136,12 @@ vbo_choose_render_mode(struct gl_context *ctx, const struct gl_vertex_array *arr
}

static void
-vbo_emit_attr(struct gl_context *ctx, const struct gl_vertex_array *arrays,
+vbo_emit_attr(struct gl_context *ctx, const struct tnl_vertex_array *arrays,
int attr)
{
struct nouveau_pushbuf *push = context_push(ctx);
struct nouveau_render_state *render = to_render_state(ctx);
- const struct gl_vertex_array *array = &arrays[attr];
+ const struct tnl_vertex_array *array = &arrays[attr];
const struct gl_vertex_buffer_binding *binding = array->BufferBinding;
const struct gl_array_attributes *attrib = array->VertexAttrib;
const GLubyte *p = _mesa_vertex_attrib_address(attrib, binding);
@@ -179,7 +179,7 @@ vbo_emit_attr(struct gl_context *ctx, const struct gl_vertex_array *arrays,
#define MAT(a) VERT_ATTRIB_MAT(MAT_ATTRIB_##a)

static void
-vbo_choose_attrs(struct gl_context *ctx, const struct gl_vertex_array *arrays)
+vbo_choose_attrs(struct gl_context *ctx, const struct tnl_vertex_array *arrays)
{
struct nouveau_render_state *render = to_render_state(ctx);
int i;
@@ -222,13 +222,13 @@ vbo_choose_attrs(struct gl_context *ctx, const struct gl_vertex_array *arrays)
}

static int
-get_max_client_stride(struct gl_context *ctx, const struct gl_vertex_array *arrays)
+get_max_client_stride(struct gl_context *ctx, const struct tnl_vertex_array *arrays)
{
struct nouveau_render_state *render = to_render_state(ctx);
int i, attr, s = 0;

FOR_EACH_BOUND_ATTR(render, i, attr) {
- const struct gl_vertex_array *a = &arrays[attr];
+ const struct tnl_vertex_array *a = &arrays[attr];

if (!_mesa_is_bufferobj(a->BufferBinding->BufferObj))
s = MAX2(s, get_array_stride(ctx, a));
@@ -239,7 +239,7 @@ get_max_client_stride(struct gl_context *ctx, const struct gl_vertex_array *arra

static void
TAG(vbo_render_prims)(struct gl_context *ctx,
- const struct gl_vertex_array *arrays,
+ const struct tnl_vertex_array *arrays,
const struct _mesa_prim *prims, GLuint nr_prims,
const struct _mesa_index_buffer *ib,
GLboolean index_bounds_valid,
@@ -249,7 +249,7 @@ TAG(vbo_render_prims)(struct gl_context *ctx,
struct gl_buffer_object *indirect);

static GLboolean
-vbo_maybe_split(struct gl_context *ctx, const struct gl_vertex_array *arrays,
+vbo_maybe_split(struct gl_context *ctx, const struct tnl_vertex_array *arrays,
const struct _mesa_prim *prims, GLuint nr_prims,
const struct _mesa_index_buffer *ib,
GLuint min_index, GLuint max_index)
@@ -309,7 +309,7 @@ check_update_array(struct nouveau_array *a, unsigned offset,
}

static void
-vbo_bind_vertices(struct gl_context *ctx, const struct gl_vertex_array *arrays,
+vbo_bind_vertices(struct gl_context *ctx, const struct tnl_vertex_array *arrays,
int base, unsigned min_index, unsigned max_index, int *pdelta)
{
struct nouveau_render_state *render = to_render_state(ctx);
@@ -323,7 +323,7 @@ vbo_bind_vertices(struct gl_context *ctx, const struct gl_vertex_array *arrays,
*pdelta = -1;

FOR_EACH_BOUND_ATTR(render, i, attr) {
- const struct gl_vertex_array *array = &arrays[attr];
+ const struct tnl_vertex_array *array = &arrays[attr];
const struct gl_vertex_buffer_binding *binding =
array->BufferBinding;
const struct gl_array_attributes *attrib = array->VertexAttrib;
@@ -381,7 +381,7 @@ vbo_bind_vertices(struct gl_context *ctx, const struct gl_vertex_array *arrays,
}

static void
-vbo_draw_vbo(struct gl_context *ctx, const struct gl_vertex_array *arrays,
+vbo_draw_vbo(struct gl_context *ctx, const struct tnl_vertex_array *arrays,
const struct _mesa_prim *prims, GLuint nr_prims,
const struct _mesa_index_buffer *ib, GLuint min_index,
GLuint max_index)
@@ -431,7 +431,7 @@ extract_id(struct nouveau_array *a, int i, int j)
}

static void
-vbo_draw_imm(struct gl_context *ctx, const struct gl_vertex_array *arrays,
+vbo_draw_imm(struct gl_context *ctx, const struct tnl_vertex_array *arrays,
const struct _mesa_prim *prims, GLuint nr_prims,
const struct _mesa_index_buffer *ib, GLuint min_index,
GLuint max_index)
@@ -477,7 +477,7 @@ vbo_draw_imm(struct gl_context *ctx, const struct gl_vertex_array *arrays,

static void
TAG(vbo_render_prims)(struct gl_context *ctx,
- const struct gl_vertex_array *arrays,
+ const struct tnl_vertex_array *arrays,
const struct _mesa_prim *prims, GLuint nr_prims,
const struct _mesa_index_buffer *ib,
GLboolean index_bounds_valid,
@@ -515,7 +515,7 @@ TAG(vbo_render_prims)(struct gl_context *ctx,

static void
TAG(vbo_check_render_prims)(struct gl_context *ctx,
- const struct gl_vertex_array *arrays,
+ const struct tnl_vertex_array *arrays,
const struct _mesa_prim *prims, GLuint nr_prims,
const struct _mesa_index_buffer *ib,
GLboolean index_bounds_valid,
@@ -550,7 +550,7 @@ TAG(vbo_draw)(struct gl_context *ctx,
struct gl_buffer_object *indirect)
{
/* Borrow and update the inputs list from the tnl context */
- const struct gl_vertex_array* arrays = _tnl_bind_inputs(ctx);
+ const struct tnl_vertex_array* arrays = _tnl_bind_inputs(ctx);

TAG(vbo_check_render_prims)(ctx, arrays,
prims, nr_prims, ib,
diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
index 1a6194ab4d..b136ca523b 100644
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -1499,20 +1499,6 @@ struct gl_vertex_buffer_binding
};


-/**
- * Vertex array information which is derived from gl_array_attributes
- * and gl_vertex_buffer_binding information. Used by the VBO module and
- * device drivers.
- */
-struct gl_vertex_array
-{
- /** Vertex attribute array */
- const struct gl_array_attributes *VertexAttrib;
- /** Vertex buffer binding */
- const struct gl_vertex_buffer_binding *BufferBinding;
-};
-
-
/**
* A representation of "Vertex Array Objects" (VAOs) from OpenGL 3.1+ /
* the GL_ARB_vertex_array_object extension.
diff --git a/src/mesa/main/varray.h b/src/mesa/main/varray.h
index 6ab55ed36a..1f01fdd5b1 100644
--- a/src/mesa/main/varray.h
+++ b/src/mesa/main/varray.h
@@ -449,18 +449,6 @@ _mesa_VertexArrayBindingDivisor(GLuint vaobj, GLuint bindingIndex,
GLuint divisor);


-/**
- * Shallow copy one vertex array to another.
- */
-static inline void
-_mesa_copy_vertex_array(struct gl_vertex_array *dst,
- const struct gl_vertex_array *src)
-{
- dst->VertexAttrib = src->VertexAttrib;
- dst->BufferBinding = src->BufferBinding;
-}
-
-
extern void
_mesa_copy_vertex_attrib_array(struct gl_context *ctx,
struct gl_array_attributes *dst,
diff --git a/src/mesa/state_tracker/st_draw.h b/src/mesa/state_tracker/st_draw.h
index c1ebcd9f74..5b897bda11 100644
--- a/src/mesa/state_tracker/st_draw.h
+++ b/src/mesa/state_tracker/st_draw.h
@@ -38,7 +38,6 @@

struct _mesa_index_buffer;
struct _mesa_prim;
-struct gl_vertex_array;
struct gl_context;
struct st_context;

diff --git a/src/mesa/tnl/t_context.c b/src/mesa/tnl/t_context.c
index 345f0bf858..3383b23525 100644
--- a/src/mesa/tnl/t_context.c
+++ b/src/mesa/tnl/t_context.c
@@ -99,8 +99,8 @@ _tnl_CreateContext( struct gl_context *ctx )
_math_init_transformation();
_math_init_translate();

- /* Keep our list of gl_vertex_array inputs */
- _vbo_init_inputs(&tnl->draw_arrays);
+ /* Keep our list of tnl_vertex_array inputs */
+ _tnl_init_inputs(&tnl->draw_arrays);

return GL_TRUE;
}
diff --git a/src/mesa/tnl/t_context.h b/src/mesa/tnl/t_context.h
index 4827480e1a..eca9f66037 100644
--- a/src/mesa/tnl/t_context.h
+++ b/src/mesa/tnl/t_context.h
@@ -57,6 +57,8 @@

#include "vbo/vbo.h"

+#include "tnl.h"
+
#define MAX_PIPELINE_STAGES 30

/*
@@ -496,6 +498,41 @@ struct tnl_device_driver
};


+/**
+ * Utility that tracks and updates the current array entries.
+ */
+struct tnl_inputs
+{
+ /**
+ * Array of inputs to be set to the _DrawArrays pointer.
+ * The array contains pointers into the _DrawVAO and to the vbo modules
+ * current values. The array of pointers is updated incrementally
+ * based on the current and vertex_processing_mode values below.
+ */
+ struct tnl_vertex_array inputs[VERT_ATTRIB_MAX];
+ /** Those VERT_BIT_'s where the inputs array point to current values. */
+ GLbitfield current;
+ /** Store which aliasing current values - generics or materials - are set. */
+ gl_vertex_processing_mode vertex_processing_mode;
+};
+
+
+/**
+ * Initialize inputs.
+ */
+void
+_tnl_init_inputs(struct tnl_inputs *inputs);
+
+
+/**
+ * Update the tnl_vertex_array array inside the tnl_inputs structure
+ * provided the current _VPMode, the provided vao and
+ * the vao's enabled arrays filtered by the filter bitmask.
+ */
+void
+_tnl_update_inputs(struct gl_context *ctx, struct tnl_inputs *inputs);
+
+
/**
* Context state for T&L context.
*/
@@ -537,8 +574,8 @@ typedef struct
struct tnl_shine_tab *_ShineTabList; /**< MRU list of inactive shine tables */
/**@}*/

- /* The list of gl_vertex_array inputs. */
- struct vbo_inputs draw_arrays;
+ /* The list of tnl_vertex_array inputs. */
+ struct tnl_inputs draw_arrays;
} TNLcontext;


diff --git a/src/mesa/tnl/t_draw.c b/src/mesa/tnl/t_draw.c
index 9814cdcec1..1fe2d405cb 100644
--- a/src/mesa/tnl/t_draw.c
+++ b/src/mesa/tnl/t_draw.c
@@ -28,6 +28,7 @@
#include <stdio.h>

#include "main/glheader.h"
+#include "main/arrayobj.h"
#include "main/bufferobj.h"
#include "main/condrender.h"
#include "main/context.h"
@@ -273,7 +274,7 @@ static GLboolean *_tnl_import_edgeflag( struct gl_context *ctx,


static void bind_inputs( struct gl_context *ctx,
- const struct gl_vertex_array *inputs,
+ const struct tnl_vertex_array *inputs,
GLint count,
struct gl_buffer_object **bo,
GLuint *nr_bo )
@@ -285,7 +286,7 @@ static void bind_inputs( struct gl_context *ctx,
/* Map all the VBOs
*/
for (i = 0; i < VERT_ATTRIB_MAX; i++) {
- const struct gl_vertex_array *array = &inputs[i];
+ const struct tnl_vertex_array *array = &inputs[i];
const struct gl_vertex_buffer_binding *binding = array->BufferBinding;
const struct gl_array_attributes *attrib = array->VertexAttrib;
const void *ptr;
@@ -426,7 +427,7 @@ static void unmap_vbos( struct gl_context *ctx,
/* This is the main workhorse doing all the rendering work.
*/
void _tnl_draw_prims(struct gl_context *ctx,
- const struct gl_vertex_array *arrays,
+ const struct tnl_vertex_array *arrays,
const struct _mesa_prim *prim,
GLuint nr_prims,
const struct _mesa_index_buffer *ib,
@@ -537,11 +538,93 @@ void _tnl_draw_prims(struct gl_context *ctx,
}


-const struct gl_vertex_array*
+void
+_tnl_init_inputs(struct tnl_inputs *inputs)
+{
+ inputs->current = 0;
+ inputs->vertex_processing_mode = VP_MODE_FF;
+}
+
+
+/**
+ * Update the tnl_inputs's arrays to point to the vao->_VertexArray arrays
+ * according to the 'enable' bitmask.
+ * \param enable bitfield of VERT_BIT_x flags.
+ */
+static inline void
+update_vao_inputs(struct gl_context *ctx,
+ struct tnl_inputs *inputs, GLbitfield enable)
+{
+ const struct gl_vertex_array_object *vao = ctx->Array._DrawVAO;
+
+ /* Make sure we process only arrays enabled in the VAO */
+ assert((enable & ~_mesa_get_vao_vp_inputs(vao)) == 0);
+
+ /* Fill in the client arrays from the VAO */
+ const struct gl_vertex_buffer_binding *bindings = &vao->BufferBinding[0];
+ while (enable) {
+ const int attr = u_bit_scan(&enable);
+ struct tnl_vertex_array *input = &inputs->inputs[attr];
+ const struct gl_array_attributes *attrib;
+ attrib = _mesa_draw_array_attrib(vao, attr);
+ input->VertexAttrib = attrib;
+ input->BufferBinding = &bindings[attrib->BufferBindingIndex];
+ }
+}
+
+
+/**
+ * Update the tnl_inputs's arrays to point to the vbo->currval arrays
+ * according to the 'current' bitmask.
+ * \param current bitfield of VERT_BIT_x flags.
+ */
+static inline void
+update_current_inputs(struct gl_context *ctx,
+ struct tnl_inputs *inputs, GLbitfield current)
+{
+ gl_vertex_processing_mode mode = ctx->VertexProgram._VPMode;
+
+ /* All previously non current array pointers need update. */
+ GLbitfield mask = current & ~inputs->current;
+ /* On mode change, the slots aliasing with materials need update too */
+ if (mode != inputs->vertex_processing_mode)
+ mask |= current & VERT_BIT_MAT_ALL;
+
+ while (mask) {
+ const int attr = u_bit_scan(&mask);
+ struct tnl_vertex_array *input = &inputs->inputs[attr];
+ input->VertexAttrib = _vbo_current_attrib(ctx, attr);
+ input->BufferBinding = _vbo_current_binding(ctx);
+ }
+
+ inputs->current = current;
+ inputs->vertex_processing_mode = mode;
+}
+
+
+/**
+ * Update the tnl_inputs's arrays to point to the vao->_VertexArray and
+ * vbo->currval arrays according to Array._DrawVAO and
+ * Array._DrawVAOEnableAttribs.
+ */
+void
+_tnl_update_inputs(struct gl_context *ctx, struct tnl_inputs *inputs)
+{
+ const GLbitfield enable = ctx->Array._DrawVAOEnabledAttribs;
+
+ /* Update array input pointers */
+ update_vao_inputs(ctx, inputs, enable);
+
+ /* The rest must be current inputs. */
+ update_current_inputs(ctx, inputs, ~enable & VERT_BIT_ALL);
+}
+
+
+const struct tnl_vertex_array*
_tnl_bind_inputs( struct gl_context *ctx )
{
TNLcontext *tnl = TNL_CONTEXT(ctx);
- _vbo_update_inputs(ctx, &tnl->draw_arrays);
+ _tnl_update_inputs(ctx, &tnl->draw_arrays);
return tnl->draw_arrays.inputs;
}

@@ -560,7 +643,7 @@ _tnl_draw(struct gl_context *ctx,
{
/* Update TNLcontext::draw_arrays and return that pointer.
*/
- const struct gl_vertex_array* arrays = _tnl_bind_inputs(ctx);
+ const struct tnl_vertex_array* arrays = _tnl_bind_inputs(ctx);

_tnl_draw_prims(ctx, arrays, prim, nr_prims, ib,
index_bounds_valid, min_index, max_index,
diff --git a/src/mesa/tnl/t_rebase.c b/src/mesa/tnl/t_rebase.c
index 09a8a3da72..b6950e04fe 100644
--- a/src/mesa/tnl/t_rebase.c
+++ b/src/mesa/tnl/t_rebase.c
@@ -104,7 +104,7 @@ REBASE(GLubyte)
* all or nothing.
*/
void t_rebase_prims( struct gl_context *ctx,
- const struct gl_vertex_array *arrays,
+ const struct tnl_vertex_array *arrays,
const struct _mesa_prim *prim,
GLuint nr_prims,
const struct _mesa_index_buffer *ib,
@@ -113,7 +113,7 @@ void t_rebase_prims( struct gl_context *ctx,
tnl_draw_func draw )
{
struct gl_array_attributes tmp_attribs[VERT_ATTRIB_MAX];
- struct gl_vertex_array tmp_arrays[VERT_ATTRIB_MAX];
+ struct tnl_vertex_array tmp_arrays[VERT_ATTRIB_MAX];

struct _mesa_index_buffer tmp_ib;
struct _mesa_prim *tmp_prims = NULL;
diff --git a/src/mesa/tnl/t_rebase.h b/src/mesa/tnl/t_rebase.h
index ce2e8b0590..d0aa9e1890 100644
--- a/src/mesa/tnl/t_rebase.h
+++ b/src/mesa/tnl/t_rebase.h
@@ -28,7 +28,7 @@
#include "tnl.h"

void t_rebase_prims( struct gl_context *ctx,
- const struct gl_vertex_array *arrays,
+ const struct tnl_vertex_array *arrays,
const struct _mesa_prim *prim,
GLuint nr_prims,
const struct _mesa_index_buffer *ib,
diff --git a/src/mesa/tnl/t_split.c b/src/mesa/tnl/t_split.c
index b98bd404d5..d7aac10e4c 100644
--- a/src/mesa/tnl/t_split.c
+++ b/src/mesa/tnl/t_split.c
@@ -100,7 +100,7 @@ _tnl_split_prim_inplace(GLenum mode, GLuint *first, GLuint *incr)

void
_tnl_split_prims(struct gl_context *ctx,
- const struct gl_vertex_array arrays[],
+ const struct tnl_vertex_array arrays[],
const struct _mesa_prim *prim,
GLuint nr_prims,
const struct _mesa_index_buffer *ib,
diff --git a/src/mesa/tnl/t_split.h b/src/mesa/tnl/t_split.h
index ced7d30bdf..49017e5dfb 100644
--- a/src/mesa/tnl/t_split.h
+++ b/src/mesa/tnl/t_split.h
@@ -51,7 +51,7 @@ _tnl_split_prim_inplace(GLenum mode, GLuint *first, GLuint *incr);

void
_tnl_split_inplace(struct gl_context *ctx,
- const struct gl_vertex_array arrays[],
+ const struct tnl_vertex_array arrays[],
const struct _mesa_prim *prim,
GLuint nr_prims,
const struct _mesa_index_buffer *ib,
@@ -64,7 +64,7 @@ _tnl_split_inplace(struct gl_context *ctx,
*/
void
_tnl_split_copy(struct gl_context *ctx,
- const struct gl_vertex_array arrays[],
+ const struct tnl_vertex_array arrays[],
const struct _mesa_prim *prim,
GLuint nr_prims,
const struct _mesa_index_buffer *ib,
diff --git a/src/mesa/tnl/t_split_copy.c b/src/mesa/tnl/t_split_copy.c
index f76a470b5f..cbb7eb409f 100644
--- a/src/mesa/tnl/t_split_copy.c
+++ b/src/mesa/tnl/t_split_copy.c
@@ -53,7 +53,7 @@
*/
struct copy_context {
struct gl_context *ctx;
- const struct gl_vertex_array *array;
+ const struct tnl_vertex_array *array;
const struct _mesa_prim *prim;
GLuint nr_prims;
const struct _mesa_index_buffer *ib;
@@ -64,7 +64,7 @@ struct copy_context {
struct {
GLuint attr;
GLuint size;
- const struct gl_vertex_array *array;
+ const struct tnl_vertex_array *array;
const GLubyte *src_ptr;

struct gl_vertex_buffer_binding dstbinding;
@@ -73,7 +73,7 @@ struct copy_context {
} varying[VERT_ATTRIB_MAX];
GLuint nr_varying;

- struct gl_vertex_array dstarray[VERT_ATTRIB_MAX];
+ struct tnl_vertex_array dstarray[VERT_ATTRIB_MAX];
struct _mesa_index_buffer dstib;

GLuint *translated_elt_buf;
@@ -112,6 +112,18 @@ attr_size(const struct gl_array_attributes *attrib)
}


+/**
+ * Shallow copy one vertex array to another.
+ */
+static inline void
+copy_vertex_array(struct tnl_vertex_array *dst,
+ const struct tnl_vertex_array *src)
+{
+ dst->VertexAttrib = src->VertexAttrib;
+ dst->BufferBinding = src->BufferBinding;
+}
+
+
/**
* Starts returning true slightly before the buffer fills, to ensure
* that there is sufficient room for any remaining vertices to finish
@@ -142,7 +154,7 @@ check_flush(struct copy_context *copy)
*/
static void
dump_draw_info(struct gl_context *ctx,
- const struct gl_vertex_array *arrays,
+ const struct tnl_vertex_array *arrays,
const struct _mesa_prim *prims,
GLuint nr_prims,
const struct _mesa_index_buffer *ib,
@@ -157,7 +169,7 @@ dump_draw_info(struct gl_context *ctx,
printf(" Prim mode 0x%x\n", prims[i].mode);
printf(" IB: %p\n", (void*) ib);
for (j = 0; j < VERT_ATTRIB_MAX; j++) {
- const struct gl_vertex_array *array = &arrays[j];
+ const struct tnl_vertex_array *array = &arrays[j];
const struct gl_vertex_buffer_binding *binding
= array->BufferBinding;
const struct gl_array_attributes *attrib = array->VertexAttrib;
@@ -254,7 +266,7 @@ elt(struct copy_context *copy, GLuint elt_idx)
GLuint i;

for (i = 0; i < copy->nr_varying; i++) {
- const struct gl_vertex_array *srcarray = copy->varying[i].array;
+ const struct tnl_vertex_array *srcarray = copy->varying[i].array;
const struct gl_vertex_buffer_binding* srcbinding
= srcarray->BufferBinding;
const GLubyte *srcptr
@@ -432,11 +444,11 @@ replay_init(struct copy_context *copy)
*/
copy->vertex_size = 0;
for (i = 0; i < VERT_ATTRIB_MAX; i++) {
- const struct gl_vertex_array *array = &copy->array[i];
+ const struct tnl_vertex_array *array = &copy->array[i];
const struct gl_vertex_buffer_binding *binding = array->BufferBinding;

if (binding->Stride == 0) {
- _mesa_copy_vertex_array(&copy->dstarray[i], array);
+ copy_vertex_array(&copy->dstarray[i], array);
}
else {
const struct gl_array_attributes *attrib = array->VertexAttrib;
@@ -517,9 +529,9 @@ replay_init(struct copy_context *copy)
/* Setup new vertex arrays to point into the output buffer:
*/
for (offset = 0, i = 0; i < copy->nr_varying; i++) {
- const struct gl_vertex_array *src = copy->varying[i].array;
+ const struct tnl_vertex_array *src = copy->varying[i].array;
const struct gl_array_attributes *srcattr = src->VertexAttrib;
- struct gl_vertex_array *dst = &copy->dstarray[i];
+ struct tnl_vertex_array *dst = &copy->dstarray[i];
struct gl_vertex_buffer_binding *dstbind = &copy->varying[i].dstbinding;
struct gl_array_attributes *dstattr = &copy->varying[i].dstattribs;

@@ -591,7 +603,7 @@ replay_finish(struct copy_context *copy)
*/
void
_tnl_split_copy(struct gl_context *ctx,
- const struct gl_vertex_array *arrays,
+ const struct tnl_vertex_array *arrays,
const struct _mesa_prim *prim,
GLuint nr_prims,
const struct _mesa_index_buffer *ib,
diff --git a/src/mesa/tnl/t_split_inplace.c b/src/mesa/tnl/t_split_inplace.c
index 15a09861c7..8e9ecb7046 100644
--- a/src/mesa/tnl/t_split_inplace.c
+++ b/src/mesa/tnl/t_split_inplace.c
@@ -43,7 +43,7 @@
*/
struct split_context {
struct gl_context *ctx;
- const struct gl_vertex_array *array;
+ const struct tnl_vertex_array *array;
const struct _mesa_prim *prim;
GLuint nr_prims;
const struct _mesa_index_buffer *ib;
@@ -265,7 +265,7 @@ split_prims(struct split_context *split)

void
_tnl_split_inplace(struct gl_context *ctx,
- const struct gl_vertex_array *arrays,
+ const struct tnl_vertex_array *arrays,
const struct _mesa_prim *prim,
GLuint nr_prims,
const struct _mesa_index_buffer *ib,
diff --git a/src/mesa/tnl/tnl.h b/src/mesa/tnl/tnl.h
index 4b6d5ec391..42de7f4f29 100644
--- a/src/mesa/tnl/tnl.h
+++ b/src/mesa/tnl/tnl.h
@@ -30,7 +30,6 @@

#include "main/glheader.h"

-struct gl_vertex_array;
struct gl_context;
struct gl_program;
struct gl_buffer_object;
@@ -66,7 +65,22 @@ _tnl_wakeup( struct gl_context *ctx );
extern void
_tnl_need_projected_coords( struct gl_context *ctx, GLboolean flag );

-extern const struct gl_vertex_array*
+
+/**
+ * Vertex array information which is derived from gl_array_attributes
+ * and gl_vertex_buffer_binding information. Used by the VBO module and
+ * device drivers.
+ */
+struct tnl_vertex_array
+{
+ /** Vertex attribute array */
+ const struct gl_array_attributes *VertexAttrib;
+ /** Vertex buffer binding */
+ const struct gl_vertex_buffer_binding *BufferBinding;
+};
+
+
+extern const struct tnl_vertex_array*
_tnl_bind_inputs( struct gl_context *ctx );


@@ -86,7 +100,7 @@ struct _mesa_index_buffer;

void
_tnl_draw_prims(struct gl_context *ctx,
- const struct gl_vertex_array *arrays,
+ const struct tnl_vertex_array *arrays,
const struct _mesa_prim *prim,
GLuint nr_prims,
const struct _mesa_index_buffer *ib,
@@ -153,7 +167,7 @@ _tnl_validate_shine_tables( struct gl_context *ctx );
* This may be deprecated in the future
*/
typedef void (*tnl_draw_func)(struct gl_context *ctx,
- const struct gl_vertex_array* arrays,
+ const struct tnl_vertex_array* arrays,
const struct _mesa_prim *prims,
GLuint nr_prims,
const struct _mesa_index_buffer *ib,
@@ -181,7 +195,7 @@ struct split_limits

void
_tnl_split_prims(struct gl_context *ctx,
- const struct gl_vertex_array *arrays,
+ const struct tnl_vertex_array *arrays,
const struct _mesa_prim *prim,
GLuint nr_prims,
const struct _mesa_index_buffer *ib,
diff --git a/src/mesa/vbo/vbo.h b/src/mesa/vbo/vbo.h
index ca46f9baa7..4e3f15999c 100644
--- a/src/mesa/vbo/vbo.h
+++ b/src/mesa/vbo/vbo.h
@@ -38,9 +38,7 @@
extern "C" {
#endif

-struct gl_vertex_array;
struct gl_context;
-struct gl_transform_feedback_object;

struct _mesa_prim
{
@@ -151,41 +149,6 @@ vbo_sw_primitive_restart(struct gl_context *ctx,
struct gl_buffer_object *indirect);


-/**
- * Utility that tracks and updates the current array entries.
- */
-struct vbo_inputs
-{
- /**
- * Array of inputs to be set to the _DrawArrays pointer.
- * The array contains pointers into the _DrawVAO and to the vbo modules
- * current values. The array of pointers is updated incrementally
- * based on the current and vertex_processing_mode values below.
- */
- struct gl_vertex_array inputs[VERT_ATTRIB_MAX];
- /** Those VERT_BIT_'s where the inputs array point to current values. */
- GLbitfield current;
- /** Store which aliasing current values - generics or materials - are set. */
- gl_vertex_processing_mode vertex_processing_mode;
-};
-
-
-/**
- * Initialize inputs.
- */
-void
-_vbo_init_inputs(struct vbo_inputs *inputs);
-
-
-/**
- * Update the gl_vertex_array array inside the vbo_inputs structure
- * provided the current _VPMode, the provided vao and
- * the vao's enabled arrays filtered by the filter bitmask.
- */
-void
-_vbo_update_inputs(struct gl_context *ctx, struct vbo_inputs *inputs);
-
-
const struct gl_array_attributes*
_vbo_current_attrib(const struct gl_context *ctx, gl_vert_attrib attr);

diff --git a/src/mesa/vbo/vbo_exec.c b/src/mesa/vbo/vbo_exec.c
index 357ec1da76..34dbc001c9 100644
--- a/src/mesa/vbo/vbo_exec.c
+++ b/src/mesa/vbo/vbo_exec.c
@@ -239,89 +239,3 @@ vbo_merge_prims(struct _mesa_prim *p0, const struct _mesa_prim *p1)
p0->count += p1->count;
p0->end = p1->end;
}
-
-
-void
-_vbo_init_inputs(struct vbo_inputs *inputs)
-{
- inputs->current = 0;
- inputs->vertex_processing_mode = VP_MODE_FF;
-}
-
-
-/**
- * Update the vbo_inputs's arrays to point to the vao->_VertexArray arrays
- * according to the 'enable' bitmask.
- * \param enable bitfield of VERT_BIT_x flags.
- */
-static inline void
-update_vao_inputs(struct gl_context *ctx,
- struct vbo_inputs *inputs, GLbitfield enable)
-{
- const struct gl_vertex_array_object *vao = ctx->Array._DrawVAO;
-
- /* Make sure we process only arrays enabled in the VAO */
- assert((enable & ~_mesa_get_vao_vp_inputs(vao)) == 0);
-
- /* Fill in the client arrays from the VAO */
- const GLubyte *const map = _mesa_vao_attribute_map[vao->_AttributeMapMode];
- const struct gl_array_attributes *attribs = &vao->VertexAttrib[0];
- const struct gl_vertex_buffer_binding *bindings = &vao->BufferBinding[0];
- while (enable) {
- const int attr = u_bit_scan(&enable);
- struct gl_vertex_array *input = &inputs->inputs[attr];
- const struct gl_array_attributes *attrib = &attribs[map[attr]];
- input->VertexAttrib = attrib;
- input->BufferBinding = &bindings[attrib->BufferBindingIndex];
- }
-}
-
-
-/**
- * Update the vbo_inputs's arrays to point to the vbo->currval arrays
- * according to the 'current' bitmask.
- * \param current bitfield of VERT_BIT_x flags.
- */
-static inline void
-update_current_inputs(struct gl_context *ctx,
- struct vbo_inputs *inputs, GLbitfield current)
-{
- gl_vertex_processing_mode mode = ctx->VertexProgram._VPMode;
-
- /* All previously non current array pointers need update. */
- GLbitfield mask = current & ~inputs->current;
- /* On mode change, the slots aliasing with materials need update too */
- if (mode != inputs->vertex_processing_mode)
- mask |= current & VERT_BIT_MAT_ALL;
-
- struct vbo_context *vbo = vbo_context(ctx);
- const struct gl_array_attributes *const currval = &vbo->current[0];
- const GLubyte *const map = _vbo_attribute_alias_map[mode];
- while (mask) {
- const int attr = u_bit_scan(&mask);
- struct gl_vertex_array *input = &inputs->inputs[attr];
- input->VertexAttrib = &currval[map[attr]];
- input->BufferBinding = &vbo->binding;
- }
-
- inputs->current = current;
- inputs->vertex_processing_mode = mode;
-}
-
-
-/**
- * Update the vbo_inputs's arrays to point to the vao->_VertexArray and
- * vbo->currval arrays according to Array._DrawVAO and
- * Array._DrawVAOEnableAttribs.
- */
-void
-_vbo_update_inputs(struct gl_context *ctx, struct vbo_inputs *inputs)
-{
- const GLbitfield enable = ctx->Array._DrawVAOEnabledAttribs;
-
- /* Update array input pointers */
- update_vao_inputs(ctx, inputs, enable);
-
- /* The rest must be current inputs. */
- update_current_inputs(ctx, inputs, ~enable & VERT_BIT_ALL);
-}
--
2.14.3
Mathias Fröhlich
2018-05-09 05:37:24 UTC
Permalink
Hi,
One comment needs updating below.
[...]
Post by M***@gmx.net
-extern const struct gl_vertex_array*
+
+/**
+ * Vertex array information which is derived from gl_array_attributes
+ * and gl_vertex_buffer_binding information. Used by the VBO module and
+ * device drivers.
I think that comment needs to be updated. The tnl_vertex_array is only
used by the tnl module now, right?
Yes indeed! The comment did somehow miss the subject of the patch...
Thanks!

Mathias
M***@gmx.net
2018-05-07 06:14:58 UTC
Permalink
From: Mathias Fröhlich <***@web.de>

Finally make use of the binding information in the VAO when
setting up arrays for draw.

v2: Emit less relocations also for interleaved userspace arrays.

Signed-off-by: Mathias Fröhlich <***@web.de>
---
src/mesa/state_tracker/st_atom_array.c | 432 ++++++++-------------------------
1 file changed, 107 insertions(+), 325 deletions(-)

diff --git a/src/mesa/state_tracker/st_atom_array.c b/src/mesa/state_tracker/st_atom_array.c
index 2fd67e8d84..6b39b4186d 100644
--- a/src/mesa/state_tracker/st_atom_array.c
+++ b/src/mesa/state_tracker/st_atom_array.c
@@ -48,6 +48,7 @@
#include "main/bufferobj.h"
#include "main/glformats.h"
#include "main/varray.h"
+#include "main/arrayobj.h"

/* vertex_formats[gltype - GL_BYTE][integer*2 + normalized][size - 1] */
static const uint16_t vertex_formats[][4][4] = {
@@ -306,79 +307,6 @@ st_pipe_vertex_format(const struct gl_array_attributes *attrib)
return vertex_formats[type - GL_BYTE][index][size-1];
}

-static const struct gl_vertex_array *
-get_client_array(const struct gl_vertex_array *arrays,
- unsigned mesaAttr)
-{
- /* st_program uses 0xffffffff to denote a double placeholder attribute */
- if (mesaAttr == ST_DOUBLE_ATTRIB_PLACEHOLDER)
- return NULL;
- return &arrays[mesaAttr];
-}
-
-/**
- * Examine the active arrays to determine if we have interleaved
- * vertex arrays all living in one VBO, or all living in user space.
- */
-static GLboolean
-is_interleaved_arrays(const struct st_vertex_program *vp,
- const struct gl_vertex_array *arrays,
- unsigned num_inputs)
-{
- GLuint attr;
- const struct gl_buffer_object *firstBufObj = NULL;
- GLint firstStride = -1;
- const GLubyte *firstPtr = NULL;
- GLboolean userSpaceBuffer = GL_FALSE;
-
- for (attr = 0; attr < num_inputs; attr++) {
- const struct gl_vertex_array *array;
- const struct gl_vertex_buffer_binding *binding;
- const struct gl_array_attributes *attrib;
- const GLubyte *ptr;
- const struct gl_buffer_object *bufObj;
- GLsizei stride;
-
- array = get_client_array(arrays, vp->index_to_input[attr]);
- if (!array)
- continue;
-
- binding = array->BufferBinding;
- attrib = array->VertexAttrib;
- stride = binding->Stride; /* in bytes */
- ptr = _mesa_vertex_attrib_address(attrib, binding);
-
- /* To keep things simple, don't allow interleaved zero-stride attribs. */
- if (stride == 0)
- return false;
-
- bufObj = binding->BufferObj;
- if (attr == 0) {
- /* save info about the first array */
- firstStride = stride;
- firstPtr = ptr;
- firstBufObj = bufObj;
- userSpaceBuffer = !_mesa_is_bufferobj(bufObj);
- }
- else {
- /* check if other arrays interleave with the first, in same buffer */
- if (stride != firstStride)
- return GL_FALSE; /* strides don't match */
-
- if (bufObj != firstBufObj)
- return GL_FALSE; /* arrays in different VBOs */
-
- if (llabs(ptr - firstPtr) > firstStride)
- return GL_FALSE; /* arrays start too far apart */
-
- if ((!_mesa_is_bufferobj(bufObj)) != userSpaceBuffer)
- return GL_FALSE; /* mix of VBO and user-space arrays */
- }
- }
-
- return GL_TRUE;
-}
-
static void init_velement(struct pipe_vertex_element *velement,
int src_offset, int format,
int instance_divisor, int vbo_index)
@@ -392,13 +320,14 @@ static void init_velement(struct pipe_vertex_element *velement,

static void init_velement_lowered(const struct st_vertex_program *vp,
struct pipe_vertex_element *velements,
- int src_offset, int format,
- int instance_divisor, int vbo_index,
- int nr_components, GLboolean doubles,
- GLuint *attr_idx)
+ const struct gl_array_attributes *attrib,
+ int src_offset, int instance_divisor,
+ int vbo_index, int idx)
{
- int idx = *attr_idx;
- if (doubles) {
+ const unsigned format = st_pipe_vertex_format(attrib);
+ const GLubyte nr_components = attrib->Size;
+
+ if (attrib->Doubles) {
int lower_format;

if (nr_components < 2)
@@ -427,15 +356,11 @@ static void init_velement_lowered(const struct st_vertex_program *vp,
init_velement(&velements[idx], src_offset, PIPE_FORMAT_R32G32_UINT,
instance_divisor, vbo_index);
}
-
- idx++;
}
} else {
init_velement(&velements[idx], src_offset,
format, instance_divisor, vbo_index);
- idx++;
}
- *attr_idx = idx;
}

static void
@@ -457,274 +382,131 @@ set_vertex_attribs(struct st_context *st,
cso_set_vertex_elements(cso, num_velements, velements);
}

-/**
- * Set up for drawing interleaved arrays that all live in one VBO
- * or all live in user space.
- * \param vbuffer returns vertex buffer info
- * \param velements returns vertex element info
- */
-static void
-setup_interleaved_attribs(struct st_context *st,
- const struct st_vertex_program *vp,
- const struct gl_vertex_array *arrays,
- unsigned num_inputs)
-{
- struct pipe_vertex_buffer vbuffer;
- struct pipe_vertex_element velements[PIPE_MAX_ATTRIBS] = {{0}};
- GLuint attr;
- const GLubyte *low_addr = NULL;
- GLboolean usingVBO; /* all arrays in a VBO? */
- struct gl_buffer_object *bufobj;
- GLsizei stride;
-
- /* Find the lowest address of the arrays we're drawing,
- * Init bufobj and stride.
- */
- if (num_inputs) {
- const struct gl_vertex_array *array;
- const struct gl_vertex_buffer_binding *binding;
- const struct gl_array_attributes *attrib;
-
- array = get_client_array(arrays, vp->index_to_input[0]);
- assert(array);
-
- binding = array->BufferBinding;
- attrib = array->VertexAttrib;
-
- /* Since we're doing interleaved arrays, we know there'll be at most
- * one buffer object and the stride will be the same for all arrays.
- * Grab them now.
- */
- bufobj = binding->BufferObj;
- stride = binding->Stride;
-
- low_addr = _mesa_vertex_attrib_address(attrib, binding);
-
- for (attr = 1; attr < num_inputs; attr++) {
- const GLubyte *start;
- array = get_client_array(arrays, vp->index_to_input[attr]);
- if (!array)
- continue;
- binding = array->BufferBinding;
- attrib = array->VertexAttrib;
- start = _mesa_vertex_attrib_address(attrib, binding);
- low_addr = MIN2(low_addr, start);
- }
- }
- else {
- /* not sure we'll ever have zero inputs, but play it safe */
- bufobj = NULL;
- stride = 0;
- low_addr = 0;
- }
-
- /* are the arrays in user space? */
- usingVBO = _mesa_is_bufferobj(bufobj);
-
- for (attr = 0; attr < num_inputs;) {
- const struct gl_vertex_array *array;
- const struct gl_vertex_buffer_binding *binding;
- const struct gl_array_attributes *attrib;
- const GLubyte *ptr;
- unsigned src_offset;
- unsigned src_format;
-
- array = get_client_array(arrays, vp->index_to_input[attr]);
- assert(array);
-
- binding = array->BufferBinding;
- attrib = array->VertexAttrib;
- ptr = _mesa_vertex_attrib_address(attrib, binding);
-
- src_offset = (unsigned) (ptr - low_addr);
-
- src_format = st_pipe_vertex_format(attrib);
-
- init_velement_lowered(vp, velements, src_offset, src_format,
- binding->InstanceDivisor, 0,
- attrib->Size, attrib->Doubles, &attr);
- }
-
- /*
- * Return the vbuffer info and setup user-space attrib info, if needed.
- */
- if (num_inputs == 0) {
- /* just defensive coding here */
- vbuffer.buffer.resource = NULL;
- vbuffer.is_user_buffer = false;
- vbuffer.buffer_offset = 0;
- vbuffer.stride = 0;
- }
- else if (usingVBO) {
- /* all interleaved arrays in a VBO */
- struct st_buffer_object *stobj = st_buffer_object(bufobj);
-
- if (!stobj || !stobj->buffer) {
- st->vertex_array_out_of_memory = true;
- return; /* out-of-memory error probably */
- }
-
- vbuffer.buffer.resource = stobj->buffer;
- vbuffer.is_user_buffer = false;
- vbuffer.buffer_offset = pointer_to_offset(low_addr);
- vbuffer.stride = stride;
- }
- else {
- /* all interleaved arrays in user memory */
- vbuffer.buffer.user = low_addr;
- vbuffer.is_user_buffer = !!low_addr; /* if NULL, then unbind */
- vbuffer.buffer_offset = 0;
- vbuffer.stride = stride;
-
- if (low_addr)
- st->draw_needs_minmax_index = true;
- }
-
- set_vertex_attribs(st, &vbuffer, num_inputs ? 1 : 0,
- velements, num_inputs);
-}
-
-/**
- * Set up a separate pipe_vertex_buffer and pipe_vertex_element for each
- * vertex attribute.
- * \param vbuffer returns vertex buffer info
- * \param velements returns vertex element info
- */
-static void
-setup_non_interleaved_attribs(struct st_context *st,
- const struct st_vertex_program *vp,
- const struct gl_vertex_array *arrays,
- unsigned num_inputs)
+void st_update_array(struct st_context *st)
{
struct gl_context *ctx = st->ctx;
+ /* vertex program validation must be done before this */
+ const struct st_vertex_program *vp = st->vp;
+ /* _NEW_PROGRAM, ST_NEW_VS_STATE */
+ const GLbitfield inputs_read = st->vp_variant->vert_attrib_mask;
+ const struct gl_vertex_array_object *vao = ctx->Array._DrawVAO;
+ const ubyte *input_to_index = vp->input_to_index;
+
struct pipe_vertex_buffer vbuffer[PIPE_MAX_ATTRIBS];
- struct pipe_vertex_element velements[PIPE_MAX_ATTRIBS] = {{0}};
+ struct pipe_vertex_element velements[PIPE_MAX_ATTRIBS];
unsigned num_vbuffers = 0;
- unsigned unref_buffers = 0;
- GLuint attr;
-
- for (attr = 0; attr < num_inputs;) {
- const unsigned mesaAttr = vp->index_to_input[attr];
- const struct gl_vertex_array *array;
- const struct gl_vertex_buffer_binding *binding;
- const struct gl_array_attributes *attrib;
- struct gl_buffer_object *bufobj;
- GLsizei stride;
- unsigned src_format;
- unsigned bufidx;
-
- array = get_client_array(arrays, mesaAttr);
- assert(array);
-
- bufidx = num_vbuffers++;
-
- binding = array->BufferBinding;
- attrib = array->VertexAttrib;
- stride = binding->Stride;
- bufobj = binding->BufferObj;
-
- if (_mesa_is_bufferobj(bufobj)) {
- /* Attribute data is in a VBO.
- * Recall that for VBOs, the gl_vertex_array->Ptr field is
- * really an offset from the start of the VBO, not a pointer.
- */
- struct st_buffer_object *stobj = st_buffer_object(bufobj);

+ st->vertex_array_out_of_memory = FALSE;
+ st->draw_needs_minmax_index = false;
+
+ /* _NEW_PROGRAM */
+ /* ST_NEW_VERTEX_ARRAYS alias ctx->DriverFlags.NewArray */
+ /* Process attribute array data. */
+ GLbitfield mask = inputs_read & _mesa_draw_array_bits(ctx);
+ while (mask) {
+ /* The attribute index to start pulling a binding */
+ const gl_vert_attrib i = ffs(mask) - 1;
+ const struct gl_vertex_buffer_binding *const binding
+ = _mesa_draw_buffer_binding(vao, i);
+ const unsigned bufidx = num_vbuffers++;
+
+ if (_mesa_is_bufferobj(binding->BufferObj)) {
+ struct st_buffer_object *stobj = st_buffer_object(binding->BufferObj);
if (!stobj || !stobj->buffer) {
st->vertex_array_out_of_memory = true;
return; /* out-of-memory error probably */
}

+ /* Set the binding */
vbuffer[bufidx].buffer.resource = stobj->buffer;
vbuffer[bufidx].is_user_buffer = false;
- vbuffer[bufidx].buffer_offset =
- binding->Offset + attrib->RelativeOffset;
+ vbuffer[bufidx].buffer_offset = _mesa_draw_binding_offset(binding);
+ } else {
+ /* Set the binding */
+ const void *ptr = (const void *)_mesa_draw_binding_offset(binding);
+ vbuffer[bufidx].buffer.user = ptr;
+ vbuffer[bufidx].is_user_buffer = true;
+ vbuffer[bufidx].buffer_offset = 0;
+
+ if (!binding->InstanceDivisor)
+ st->draw_needs_minmax_index = true;
}
- else {
- if (stride == 0) {
- unsigned size = attrib->_ElementSize;
- /* This is optimal for GPU cache line usage if the upload size
- * is <= cache line size.
- */
- unsigned alignment = util_next_power_of_two(size);
-
- assert(attrib->Ptr);
- vbuffer[bufidx].buffer.user = attrib->Ptr;
- void *ptr = attrib->Ptr ? (void*)attrib->Ptr :
- (void*)ctx->Current.Attrib[mesaAttr];
-
- vbuffer[bufidx].is_user_buffer = false;
- vbuffer[bufidx].buffer.resource = NULL;
-
- /* Use const_uploader for zero-stride vertex attributes, because
- * it may use a better memory placement than stream_uploader.
- * The reason is that zero-stride attributes can be fetched many
- * times (thousands of times), so a better placement is going to
- * perform better.
- *
- * Upload the maximum possible size, which is 4x GLdouble = 32.
- */
- u_upload_data(st->can_bind_const_buffer_as_vertex ?
- st->pipe->const_uploader :
- st->pipe->stream_uploader,
- 0, size, alignment, ptr,
- &vbuffer[bufidx].buffer_offset,
- &vbuffer[bufidx].buffer.resource);
- unref_buffers |= 1u << bufidx;
- } else {
- assert(attrib->Ptr);
- vbuffer[bufidx].buffer.user = attrib->Ptr;
- vbuffer[bufidx].is_user_buffer = true;
- vbuffer[bufidx].buffer_offset = 0;
-
- if (!binding->InstanceDivisor)
- st->draw_needs_minmax_index = true;
- }
+ vbuffer[bufidx].stride = binding->Stride; /* in bytes */
+
+ const GLbitfield boundmask = _mesa_draw_bound_attrib_bits(binding);
+ GLbitfield attrmask = mask & boundmask;
+ /* Mark the those attributes as processed */
+ mask &= ~boundmask;
+ /* We can assume that we have array for the binding */
+ assert(attrmask);
+ /* Walk attributes belonging to the binding */
+ while (attrmask) {
+ const gl_vert_attrib attr = u_bit_scan(&attrmask);
+ const struct gl_array_attributes *const attrib
+ = _mesa_draw_array_attrib(vao, attr);
+ const GLuint off = _mesa_draw_attributes_relative_offset(attrib);
+ init_velement_lowered(vp, velements, attrib, off,
+ binding->InstanceDivisor, bufidx,
+ input_to_index[attr]);
}
+ }

- /* common-case setup */
- vbuffer[bufidx].stride = stride; /* in bytes */
+ const unsigned first_current_vbuffer = num_vbuffers;
+ /* _NEW_PROGRAM | _NEW_CURRENT_ATTRIB */
+ /* Process values that should have better been uniforms in the application */
+ GLbitfield curmask = inputs_read & _mesa_draw_current_bits(ctx);
+ if (curmask) {
+ /* For each attribute, upload the maximum possible size. */
+ GLubyte data[VERT_ATTRIB_MAX*sizeof(GLdouble)*4];
+ GLubyte *cursor = data;
+ const unsigned bufidx = num_vbuffers++;
+ unsigned max_alignment = 1;
+
+ while (curmask) {
+ const gl_vert_attrib attr = u_bit_scan(&curmask);
+ const struct gl_array_attributes *const attrib
+ = _mesa_draw_current_attrib(ctx, attr);
+ const unsigned size = attrib->_ElementSize;
+ const unsigned alignment = util_next_power_of_two(size);
+ max_alignment = MAX2(max_alignment, alignment);
+ memcpy(cursor, attrib->Ptr, size);
+ if (alignment != size)
+ memset(cursor + size, 0, alignment - size);
+
+ init_velement_lowered(vp, velements, attrib, cursor - data, 0,
+ bufidx, input_to_index[attr]);
+
+ cursor += alignment;
+ }

- src_format = st_pipe_vertex_format(attrib);
+ vbuffer[bufidx].is_user_buffer = false;
+ vbuffer[bufidx].buffer.resource = NULL;
+ /* vbuffer[bufidx].buffer_offset is set below */
+ vbuffer[bufidx].stride = 0;

- init_velement_lowered(vp, velements, 0, src_format,
- binding->InstanceDivisor, bufidx,
- attrib->Size, attrib->Doubles, &attr);
+ /* Use const_uploader for zero-stride vertex attributes, because
+ * it may use a better memory placement than stream_uploader.
+ * The reason is that zero-stride attributes can be fetched many
+ * times (thousands of times), so a better placement is going to
+ * perform better.
+ */
+ u_upload_data(st->can_bind_const_buffer_as_vertex ?
+ st->pipe->const_uploader :
+ st->pipe->stream_uploader,
+ 0, cursor - data, max_alignment, data,
+ &vbuffer[bufidx].buffer_offset,
+ &vbuffer[bufidx].buffer.resource);
}

if (!ctx->Const.AllowMappedBuffersDuringExecution) {
u_upload_unmap(st->pipe->stream_uploader);
}

+ const unsigned num_inputs = st->vp_variant->num_inputs;
set_vertex_attribs(st, vbuffer, num_vbuffers, velements, num_inputs);

/* Unreference uploaded zero-stride vertex buffers. */
- while (unref_buffers) {
- unsigned i = u_bit_scan(&unref_buffers);
+ for (unsigned i = first_current_vbuffer; i < num_vbuffers; ++i) {
pipe_resource_reference(&vbuffer[i].buffer.resource, NULL);
}
}
-
-void st_update_array(struct st_context *st)
-{
- struct gl_context *ctx = st->ctx;
- const struct gl_vertex_array *arrays = ctx->Array._DrawArrays;
- const struct st_vertex_program *vp;
- unsigned num_inputs;
-
- st->vertex_array_out_of_memory = FALSE;
- st->draw_needs_minmax_index = false;
-
- /* No drawing has been done yet, so do nothing. */
- if (!arrays)
- return;
-
- /* vertex program validation must be done before this */
- vp = st->vp;
- num_inputs = st->vp_variant->num_inputs;
-
- if (is_interleaved_arrays(vp, arrays, num_inputs))
- setup_interleaved_attribs(st, vp, arrays, num_inputs);
- else
- setup_non_interleaved_attribs(st, vp, arrays, num_inputs);
-}
--
2.14.3
Brian Paul
2018-05-09 02:22:40 UTC
Permalink
Two minor nits below.

-Brian
Post by M***@gmx.net
Finally make use of the binding information in the VAO when
setting up arrays for draw.
v2: Emit less relocations also for interleaved userspace arrays.
---
src/mesa/state_tracker/st_atom_array.c | 432 ++++++++-------------------------
1 file changed, 107 insertions(+), 325 deletions(-)
diff --git a/src/mesa/state_tracker/st_atom_array.c b/src/mesa/state_tracker/st_atom_array.c
index 2fd67e8d84..6b39b4186d 100644
--- a/src/mesa/state_tracker/st_atom_array.c
+++ b/src/mesa/state_tracker/st_atom_array.c
@@ -48,6 +48,7 @@
#include "main/bufferobj.h"
#include "main/glformats.h"
#include "main/varray.h"
+#include "main/arrayobj.h"
/* vertex_formats[gltype - GL_BYTE][integer*2 + normalized][size - 1] */
static const uint16_t vertex_formats[][4][4] = {
@@ -306,79 +307,6 @@ st_pipe_vertex_format(const struct gl_array_attributes *attrib)
return vertex_formats[type - GL_BYTE][index][size-1];
}
-static const struct gl_vertex_array *
-get_client_array(const struct gl_vertex_array *arrays,
- unsigned mesaAttr)
-{
- /* st_program uses 0xffffffff to denote a double placeholder attribute */
- if (mesaAttr == ST_DOUBLE_ATTRIB_PLACEHOLDER)
- return NULL;
- return &arrays[mesaAttr];
-}
-
-/**
- * Examine the active arrays to determine if we have interleaved
- * vertex arrays all living in one VBO, or all living in user space.
- */
-static GLboolean
-is_interleaved_arrays(const struct st_vertex_program *vp,
- const struct gl_vertex_array *arrays,
- unsigned num_inputs)
-{
- GLuint attr;
- const struct gl_buffer_object *firstBufObj = NULL;
- GLint firstStride = -1;
- const GLubyte *firstPtr = NULL;
- GLboolean userSpaceBuffer = GL_FALSE;
-
- for (attr = 0; attr < num_inputs; attr++) {
- const struct gl_vertex_array *array;
- const struct gl_vertex_buffer_binding *binding;
- const struct gl_array_attributes *attrib;
- const GLubyte *ptr;
- const struct gl_buffer_object *bufObj;
- GLsizei stride;
-
- array = get_client_array(arrays, vp->index_to_input[attr]);
- if (!array)
- continue;
-
- binding = array->BufferBinding;
- attrib = array->VertexAttrib;
- stride = binding->Stride; /* in bytes */
- ptr = _mesa_vertex_attrib_address(attrib, binding);
-
- /* To keep things simple, don't allow interleaved zero-stride attribs. */
- if (stride == 0)
- return false;
-
- bufObj = binding->BufferObj;
- if (attr == 0) {
- /* save info about the first array */
- firstStride = stride;
- firstPtr = ptr;
- firstBufObj = bufObj;
- userSpaceBuffer = !_mesa_is_bufferobj(bufObj);
- }
- else {
- /* check if other arrays interleave with the first, in same buffer */
- if (stride != firstStride)
- return GL_FALSE; /* strides don't match */
-
- if (bufObj != firstBufObj)
- return GL_FALSE; /* arrays in different VBOs */
-
- if (llabs(ptr - firstPtr) > firstStride)
- return GL_FALSE; /* arrays start too far apart */
-
- if ((!_mesa_is_bufferobj(bufObj)) != userSpaceBuffer)
- return GL_FALSE; /* mix of VBO and user-space arrays */
- }
- }
-
- return GL_TRUE;
-}
-
static void init_velement(struct pipe_vertex_element *velement,
int src_offset, int format,
int instance_divisor, int vbo_index)
@@ -392,13 +320,14 @@ static void init_velement(struct pipe_vertex_element *velement,
static void init_velement_lowered(const struct st_vertex_program *vp,
struct pipe_vertex_element *velements,
- int src_offset, int format,
- int instance_divisor, int vbo_index,
- int nr_components, GLboolean doubles,
- GLuint *attr_idx)
+ const struct gl_array_attributes *attrib,
+ int src_offset, int instance_divisor,
+ int vbo_index, int idx)
{
- int idx = *attr_idx;
- if (doubles) {
+ const unsigned format = st_pipe_vertex_format(attrib);
+ const GLubyte nr_components = attrib->Size;
+
+ if (attrib->Doubles) {
int lower_format;
if (nr_components < 2)
@@ -427,15 +356,11 @@ static void init_velement_lowered(const struct st_vertex_program *vp,
init_velement(&velements[idx], src_offset, PIPE_FORMAT_R32G32_UINT,
instance_divisor, vbo_index);
}
-
- idx++;
}
} else {
init_velement(&velements[idx], src_offset,
format, instance_divisor, vbo_index);
- idx++;
}
- *attr_idx = idx;
}
static void
@@ -457,274 +382,131 @@ set_vertex_attribs(struct st_context *st,
cso_set_vertex_elements(cso, num_velements, velements);
}
-/**
- * Set up for drawing interleaved arrays that all live in one VBO
- * or all live in user space.
- * \param vbuffer returns vertex buffer info
- * \param velements returns vertex element info
- */
-static void
-setup_interleaved_attribs(struct st_context *st,
- const struct st_vertex_program *vp,
- const struct gl_vertex_array *arrays,
- unsigned num_inputs)
-{
- struct pipe_vertex_buffer vbuffer;
- struct pipe_vertex_element velements[PIPE_MAX_ATTRIBS] = {{0}};
- GLuint attr;
- const GLubyte *low_addr = NULL;
- GLboolean usingVBO; /* all arrays in a VBO? */
- struct gl_buffer_object *bufobj;
- GLsizei stride;
-
- /* Find the lowest address of the arrays we're drawing,
- * Init bufobj and stride.
- */
- if (num_inputs) {
- const struct gl_vertex_array *array;
- const struct gl_vertex_buffer_binding *binding;
- const struct gl_array_attributes *attrib;
-
- array = get_client_array(arrays, vp->index_to_input[0]);
- assert(array);
-
- binding = array->BufferBinding;
- attrib = array->VertexAttrib;
-
- /* Since we're doing interleaved arrays, we know there'll be at most
- * one buffer object and the stride will be the same for all arrays.
- * Grab them now.
- */
- bufobj = binding->BufferObj;
- stride = binding->Stride;
-
- low_addr = _mesa_vertex_attrib_address(attrib, binding);
-
- for (attr = 1; attr < num_inputs; attr++) {
- const GLubyte *start;
- array = get_client_array(arrays, vp->index_to_input[attr]);
- if (!array)
- continue;
- binding = array->BufferBinding;
- attrib = array->VertexAttrib;
- start = _mesa_vertex_attrib_address(attrib, binding);
- low_addr = MIN2(low_addr, start);
- }
- }
- else {
- /* not sure we'll ever have zero inputs, but play it safe */
- bufobj = NULL;
- stride = 0;
- low_addr = 0;
- }
-
- /* are the arrays in user space? */
- usingVBO = _mesa_is_bufferobj(bufobj);
-
- for (attr = 0; attr < num_inputs;) {
- const struct gl_vertex_array *array;
- const struct gl_vertex_buffer_binding *binding;
- const struct gl_array_attributes *attrib;
- const GLubyte *ptr;
- unsigned src_offset;
- unsigned src_format;
-
- array = get_client_array(arrays, vp->index_to_input[attr]);
- assert(array);
-
- binding = array->BufferBinding;
- attrib = array->VertexAttrib;
- ptr = _mesa_vertex_attrib_address(attrib, binding);
-
- src_offset = (unsigned) (ptr - low_addr);
-
- src_format = st_pipe_vertex_format(attrib);
-
- init_velement_lowered(vp, velements, src_offset, src_format,
- binding->InstanceDivisor, 0,
- attrib->Size, attrib->Doubles, &attr);
- }
-
- /*
- * Return the vbuffer info and setup user-space attrib info, if needed.
- */
- if (num_inputs == 0) {
- /* just defensive coding here */
- vbuffer.buffer.resource = NULL;
- vbuffer.is_user_buffer = false;
- vbuffer.buffer_offset = 0;
- vbuffer.stride = 0;
- }
- else if (usingVBO) {
- /* all interleaved arrays in a VBO */
- struct st_buffer_object *stobj = st_buffer_object(bufobj);
-
- if (!stobj || !stobj->buffer) {
- st->vertex_array_out_of_memory = true;
- return; /* out-of-memory error probably */
- }
-
- vbuffer.buffer.resource = stobj->buffer;
- vbuffer.is_user_buffer = false;
- vbuffer.buffer_offset = pointer_to_offset(low_addr);
- vbuffer.stride = stride;
- }
- else {
- /* all interleaved arrays in user memory */
- vbuffer.buffer.user = low_addr;
- vbuffer.is_user_buffer = !!low_addr; /* if NULL, then unbind */
- vbuffer.buffer_offset = 0;
- vbuffer.stride = stride;
-
- if (low_addr)
- st->draw_needs_minmax_index = true;
- }
-
- set_vertex_attribs(st, &vbuffer, num_inputs ? 1 : 0,
- velements, num_inputs);
-}
-
-/**
- * Set up a separate pipe_vertex_buffer and pipe_vertex_element for each
- * vertex attribute.
- * \param vbuffer returns vertex buffer info
- * \param velements returns vertex element info
- */
-static void
-setup_non_interleaved_attribs(struct st_context *st,
- const struct st_vertex_program *vp,
- const struct gl_vertex_array *arrays,
- unsigned num_inputs)
+void st_update_array(struct st_context *st)
void and function names on separate lines.
Post by M***@gmx.net
{
struct gl_context *ctx = st->ctx;
+ /* vertex program validation must be done before this */
+ const struct st_vertex_program *vp = st->vp;
+ /* _NEW_PROGRAM, ST_NEW_VS_STATE */
+ const GLbitfield inputs_read = st->vp_variant->vert_attrib_mask;
+ const struct gl_vertex_array_object *vao = ctx->Array._DrawVAO;
+ const ubyte *input_to_index = vp->input_to_index;
+
struct pipe_vertex_buffer vbuffer[PIPE_MAX_ATTRIBS];
- struct pipe_vertex_element velements[PIPE_MAX_ATTRIBS] = {{0}};
+ struct pipe_vertex_element velements[PIPE_MAX_ATTRIBS];
unsigned num_vbuffers = 0;
- unsigned unref_buffers = 0;
- GLuint attr;
-
- for (attr = 0; attr < num_inputs;) {
- const unsigned mesaAttr = vp->index_to_input[attr];
- const struct gl_vertex_array *array;
- const struct gl_vertex_buffer_binding *binding;
- const struct gl_array_attributes *attrib;
- struct gl_buffer_object *bufobj;
- GLsizei stride;
- unsigned src_format;
- unsigned bufidx;
-
- array = get_client_array(arrays, mesaAttr);
- assert(array);
-
- bufidx = num_vbuffers++;
-
- binding = array->BufferBinding;
- attrib = array->VertexAttrib;
- stride = binding->Stride;
- bufobj = binding->BufferObj;
-
- if (_mesa_is_bufferobj(bufobj)) {
- /* Attribute data is in a VBO.
- * Recall that for VBOs, the gl_vertex_array->Ptr field is
- * really an offset from the start of the VBO, not a pointer.
- */
- struct st_buffer_object *stobj = st_buffer_object(bufobj);
+ st->vertex_array_out_of_memory = FALSE;
+ st->draw_needs_minmax_index = false;
+
+ /* _NEW_PROGRAM */
+ /* ST_NEW_VERTEX_ARRAYS alias ctx->DriverFlags.NewArray */
+ /* Process attribute array data. */
+ GLbitfield mask = inputs_read & _mesa_draw_array_bits(ctx);
+ while (mask) {
+ /* The attribute index to start pulling a binding */
+ const gl_vert_attrib i = ffs(mask) - 1;
+ const struct gl_vertex_buffer_binding *const binding
+ = _mesa_draw_buffer_binding(vao, i);
+ const unsigned bufidx = num_vbuffers++;
+
+ if (_mesa_is_bufferobj(binding->BufferObj)) {
+ struct st_buffer_object *stobj = st_buffer_object(binding->BufferObj);
if (!stobj || !stobj->buffer) {
st->vertex_array_out_of_memory = true;
return; /* out-of-memory error probably */
}
+ /* Set the binding */
vbuffer[bufidx].buffer.resource = stobj->buffer;
vbuffer[bufidx].is_user_buffer = false;
- vbuffer[bufidx].buffer_offset =
- binding->Offset + attrib->RelativeOffset;
+ vbuffer[bufidx].buffer_offset = _mesa_draw_binding_offset(binding);
+ } else {
+ /* Set the binding */
+ const void *ptr = (const void *)_mesa_draw_binding_offset(binding);
+ vbuffer[bufidx].buffer.user = ptr;
+ vbuffer[bufidx].is_user_buffer = true;
+ vbuffer[bufidx].buffer_offset = 0;
+
+ if (!binding->InstanceDivisor)
+ st->draw_needs_minmax_index = true;
}
- else {
- if (stride == 0) {
- unsigned size = attrib->_ElementSize;
- /* This is optimal for GPU cache line usage if the upload size
- * is <= cache line size.
- */
- unsigned alignment = util_next_power_of_two(size);
-
- assert(attrib->Ptr);
- vbuffer[bufidx].buffer.user = attrib->Ptr;
- (void*)ctx->Current.Attrib[mesaAttr];
-
- vbuffer[bufidx].is_user_buffer = false;
- vbuffer[bufidx].buffer.resource = NULL;
-
- /* Use const_uploader for zero-stride vertex attributes, because
- * it may use a better memory placement than stream_uploader.
- * The reason is that zero-stride attributes can be fetched many
- * times (thousands of times), so a better placement is going to
- * perform better.
- *
- * Upload the maximum possible size, which is 4x GLdouble = 32.
- */
- u_upload_data(st->can_bind_const_buffer_as_vertex ?
- st->pipe->stream_uploader,
- 0, size, alignment, ptr,
- &vbuffer[bufidx].buffer_offset,
- &vbuffer[bufidx].buffer.resource);
- unref_buffers |= 1u << bufidx;
- } else {
- assert(attrib->Ptr);
- vbuffer[bufidx].buffer.user = attrib->Ptr;
- vbuffer[bufidx].is_user_buffer = true;
- vbuffer[bufidx].buffer_offset = 0;
-
- if (!binding->InstanceDivisor)
- st->draw_needs_minmax_index = true;
- }
+ vbuffer[bufidx].stride = binding->Stride; /* in bytes */
+
+ const GLbitfield boundmask = _mesa_draw_bound_attrib_bits(binding);
+ GLbitfield attrmask = mask & boundmask;
+ /* Mark the those attributes as processed */
+ mask &= ~boundmask;
+ /* We can assume that we have array for the binding */
+ assert(attrmask);
+ /* Walk attributes belonging to the binding */
+ while (attrmask) {
+ const gl_vert_attrib attr = u_bit_scan(&attrmask);
+ const struct gl_array_attributes *const attrib
+ = _mesa_draw_array_attrib(vao, attr);
+ const GLuint off = _mesa_draw_attributes_relative_offset(attrib);
+ init_velement_lowered(vp, velements, attrib, off,
+ binding->InstanceDivisor, bufidx,
+ input_to_index[attr]);
}
+ }
- /* common-case setup */
- vbuffer[bufidx].stride = stride; /* in bytes */
+ const unsigned first_current_vbuffer = num_vbuffers;
+ /* _NEW_PROGRAM | _NEW_CURRENT_ATTRIB */
+ /* Process values that should have better been uniforms in the application */
+ GLbitfield curmask = inputs_read & _mesa_draw_current_bits(ctx);
+ if (curmask) {
+ /* For each attribute, upload the maximum possible size. */
+ GLubyte data[VERT_ATTRIB_MAX*sizeof(GLdouble)*4];
Spaces before/after *
Post by M***@gmx.net
+ GLubyte *cursor = data;
+ const unsigned bufidx = num_vbuffers++;
+ unsigned max_alignment = 1;
+
+ while (curmask) {
+ const gl_vert_attrib attr = u_bit_scan(&curmask);
+ const struct gl_array_attributes *const attrib
+ = _mesa_draw_current_attrib(ctx, attr);
+ const unsigned size = attrib->_ElementSize;
+ const unsigned alignment = util_next_power_of_two(size);
+ max_alignment = MAX2(max_alignment, alignment);
+ memcpy(cursor, attrib->Ptr, size);
+ if (alignment != size)
+ memset(cursor + size, 0, alignment - size);
+
+ init_velement_lowered(vp, velements, attrib, cursor - data, 0,
+ bufidx, input_to_index[attr]);
+
+ cursor += alignment;
+ }
- src_format = st_pipe_vertex_format(attrib);
+ vbuffer[bufidx].is_user_buffer = false;
+ vbuffer[bufidx].buffer.resource = NULL;
+ /* vbuffer[bufidx].buffer_offset is set below */
+ vbuffer[bufidx].stride = 0;
- init_velement_lowered(vp, velements, 0, src_format,
- binding->InstanceDivisor, bufidx,
- attrib->Size, attrib->Doubles, &attr);
+ /* Use const_uploader for zero-stride vertex attributes, because
+ * it may use a better memory placement than stream_uploader.
+ * The reason is that zero-stride attributes can be fetched many
+ * times (thousands of times), so a better placement is going to
+ * perform better.
+ */
+ u_upload_data(st->can_bind_const_buffer_as_vertex ?
+ st->pipe->stream_uploader,
+ 0, cursor - data, max_alignment, data,
+ &vbuffer[bufidx].buffer_offset,
+ &vbuffer[bufidx].buffer.resource);
}
if (!ctx->Const.AllowMappedBuffersDuringExecution) {
u_upload_unmap(st->pipe->stream_uploader);
}
+ const unsigned num_inputs = st->vp_variant->num_inputs;
set_vertex_attribs(st, vbuffer, num_vbuffers, velements, num_inputs);
/* Unreference uploaded zero-stride vertex buffers. */
- while (unref_buffers) {
- unsigned i = u_bit_scan(&unref_buffers);
+ for (unsigned i = first_current_vbuffer; i < num_vbuffers; ++i) {
pipe_resource_reference(&vbuffer[i].buffer.resource, NULL);
}
}
-
-void st_update_array(struct st_context *st)
-{
- struct gl_context *ctx = st->ctx;
- const struct gl_vertex_array *arrays = ctx->Array._DrawArrays;
- const struct st_vertex_program *vp;
- unsigned num_inputs;
-
- st->vertex_array_out_of_memory = FALSE;
- st->draw_needs_minmax_index = false;
-
- /* No drawing has been done yet, so do nothing. */
- if (!arrays)
- return;
-
- /* vertex program validation must be done before this */
- vp = st->vp;
- num_inputs = st->vp_variant->num_inputs;
-
- if (is_interleaved_arrays(vp, arrays, num_inputs))
- setup_interleaved_attribs(st, vp, arrays, num_inputs);
- else
- setup_non_interleaved_attribs(st, vp, arrays, num_inputs);
-}
Rob Clark
2018-05-15 17:41:17 UTC
Permalink
Post by M***@gmx.net
Finally make use of the binding information in the VAO when
setting up arrays for draw.
v2: Emit less relocations also for interleaved userspace arrays.
This appears to break glmark2 (and, well, probably other things too)

Thread 1 "glmark2" received signal SIGTRAP, Trace/breakpoint trap.
0x0000ffffbefc1c78 in kill () from /lib64/libc.so.6
(gdb) bt
#0 0x0000ffffbefc1c78 in kill () from /lib64/libc.so.6
#1 0x0000ffffbde5ff90 in _debug_assert_fail (expr=0xffffbe6b9320
"desc", file=0xffffbe6b92f0
"../src/gallium/auxiliary/util/u_format.h", line=767,
function=0xffffbe6b9830 <__func__.7922>
"util_format_get_blocksizebits") at
../src/gallium/auxiliary/util/u_debug.c:322
#2 0x0000ffffbdec4384 in util_format_get_blocksizebits (format=1531)
at ../src/gallium/auxiliary/util/u_format.h:767
#3 0x0000ffffbdec43bc in util_format_get_blocksize (format=1531) at
../src/gallium/auxiliary/util/u_format.h:781
#4 0x0000ffffbdec62a4 in u_vbuf_create_vertex_elements (mgr=0x7073b0,
count=2, attribs=0xffffffffea20) at
../src/gallium/auxiliary/util/u_vbuf.c:761
#5 0x0000ffffbdec4da4 in u_vbuf_set_vertex_elements_internal
(mgr=0x7073b0, count=2, states=0xffffffffea20) at
../src/gallium/auxiliary/util/u_vbuf.c:346
#6 0x0000ffffbdec4e94 in u_vbuf_set_vertex_elements (mgr=0x7073b0,
count=2, states=0xffffffffea20) at
../src/gallium/auxiliary/util/u_vbuf.c:367
#7 0x0000ffffbde804dc in cso_set_vertex_elements (ctx=0x6e8190,
count=2, states=0xffffffffea20) at
../src/gallium/auxiliary/cso_cache/cso_context.c:1067
#8 0x0000ffffbe3881d0 in set_vertex_attribs (st=0x6e2f20,
vbuffers=0xffffffffeb20, num_vbuffers=0, velements=0xffffffffea20,
num_velements=2) at ../src/mesa/state_tracker/st_atom_array.c:382
#9 0x0000ffffbe388708 in st_update_array (st=0x6e2f20) at
../src/mesa/state_tracker/st_atom_array.c:507
#10 0x0000ffffbe334634 in st_validate_state (st=0x6e2f20,
pipeline=ST_PIPELINE_RENDER) at
../src/mesa/state_tracker/st_atom.c:261
#11 0x0000ffffbe2802ec in prepare_draw (st=0x6e2f20, ctx=0x6c74d0) at
../src/mesa/state_tracker/st_draw.c:123
#12 0x0000ffffbe280364 in st_draw_vbo (ctx=0x6c74d0,
prims=0xffffffffef18, nr_prims=1, ib=0x0, index_bounds_valid=1 '\001',
min_index=0, max_index=21515, tfb_vertcount=0x0, stream=0,
indirect=0x0) at ../src/mesa/state_tracker/st_draw.c:153
#13 0x0000ffffbe270c48 in vbo_draw_arrays (ctx=0x6c74d0, mode=4,
start=0, count=21516, numInstances=1, baseInstance=0, drawID=0) at
../src/mesa/vbo/vbo_exec_array.c:391
#14 0x0000ffffbe2715a4 in vbo_exec_DrawArrays (mode=4, start=0,
count=21516) at ../src/mesa/vbo/vbo_exec_array.c:550
#15 0x0000ffffbebccc74 in shared_dispatch_stub_310 (mode=4, first=0,
count=21516) at
/home/robclark/src/mesa/debug/src/mapi/shared-glapi/glapi_mapi_tmp.h:19545
#16 0x00000000004731f8 in Mesh::render_array
(this=***@entry=0x4e7ee8) at ../src/mesh.cpp:554
#17 0x000000000041ba98 in SceneBuild::draw (this=0x4e7da0) at
../src/scene-build.cpp:244
#18 0x0000000000410d00 in MainLoop::draw (this=0x58b420) at
../src/main-loop.cpp:133
#19 0x0000000000411594 in MainLoop::step (this=***@entry=0x58b420) at
../src/main-loop.cpp:108
#20 0x0000000000409498 in do_benchmark (canvas=...) at ../src/main.cpp:117
#21 0x00000000004071b0 in main (argc=<optimized out>, argv=<optimized
out>) at ../src/main.cpp:210


looks like it is getting some garbage value for 'format'..

BR,
-R
Post by M***@gmx.net
---
src/mesa/state_tracker/st_atom_array.c | 432 ++++++++-------------------------
1 file changed, 107 insertions(+), 325 deletions(-)
diff --git a/src/mesa/state_tracker/st_atom_array.c b/src/mesa/state_tracker/st_atom_array.c
index 2fd67e8d84..6b39b4186d 100644
--- a/src/mesa/state_tracker/st_atom_array.c
+++ b/src/mesa/state_tracker/st_atom_array.c
@@ -48,6 +48,7 @@
#include "main/bufferobj.h"
#include "main/glformats.h"
#include "main/varray.h"
+#include "main/arrayobj.h"
/* vertex_formats[gltype - GL_BYTE][integer*2 + normalized][size - 1] */
static const uint16_t vertex_formats[][4][4] = {
@@ -306,79 +307,6 @@ st_pipe_vertex_format(const struct gl_array_attributes *attrib)
return vertex_formats[type - GL_BYTE][index][size-1];
}
-static const struct gl_vertex_array *
-get_client_array(const struct gl_vertex_array *arrays,
- unsigned mesaAttr)
-{
- /* st_program uses 0xffffffff to denote a double placeholder attribute */
- if (mesaAttr == ST_DOUBLE_ATTRIB_PLACEHOLDER)
- return NULL;
- return &arrays[mesaAttr];
-}
-
-/**
- * Examine the active arrays to determine if we have interleaved
- * vertex arrays all living in one VBO, or all living in user space.
- */
-static GLboolean
-is_interleaved_arrays(const struct st_vertex_program *vp,
- const struct gl_vertex_array *arrays,
- unsigned num_inputs)
-{
- GLuint attr;
- const struct gl_buffer_object *firstBufObj = NULL;
- GLint firstStride = -1;
- const GLubyte *firstPtr = NULL;
- GLboolean userSpaceBuffer = GL_FALSE;
-
- for (attr = 0; attr < num_inputs; attr++) {
- const struct gl_vertex_array *array;
- const struct gl_vertex_buffer_binding *binding;
- const struct gl_array_attributes *attrib;
- const GLubyte *ptr;
- const struct gl_buffer_object *bufObj;
- GLsizei stride;
-
- array = get_client_array(arrays, vp->index_to_input[attr]);
- if (!array)
- continue;
-
- binding = array->BufferBinding;
- attrib = array->VertexAttrib;
- stride = binding->Stride; /* in bytes */
- ptr = _mesa_vertex_attrib_address(attrib, binding);
-
- /* To keep things simple, don't allow interleaved zero-stride attribs. */
- if (stride == 0)
- return false;
-
- bufObj = binding->BufferObj;
- if (attr == 0) {
- /* save info about the first array */
- firstStride = stride;
- firstPtr = ptr;
- firstBufObj = bufObj;
- userSpaceBuffer = !_mesa_is_bufferobj(bufObj);
- }
- else {
- /* check if other arrays interleave with the first, in same buffer */
- if (stride != firstStride)
- return GL_FALSE; /* strides don't match */
-
- if (bufObj != firstBufObj)
- return GL_FALSE; /* arrays in different VBOs */
-
- if (llabs(ptr - firstPtr) > firstStride)
- return GL_FALSE; /* arrays start too far apart */
-
- if ((!_mesa_is_bufferobj(bufObj)) != userSpaceBuffer)
- return GL_FALSE; /* mix of VBO and user-space arrays */
- }
- }
-
- return GL_TRUE;
-}
-
static void init_velement(struct pipe_vertex_element *velement,
int src_offset, int format,
int instance_divisor, int vbo_index)
@@ -392,13 +320,14 @@ static void init_velement(struct pipe_vertex_element *velement,
static void init_velement_lowered(const struct st_vertex_program *vp,
struct pipe_vertex_element *velements,
- int src_offset, int format,
- int instance_divisor, int vbo_index,
- int nr_components, GLboolean doubles,
- GLuint *attr_idx)
+ const struct gl_array_attributes *attrib,
+ int src_offset, int instance_divisor,
+ int vbo_index, int idx)
{
- int idx = *attr_idx;
- if (doubles) {
+ const unsigned format = st_pipe_vertex_format(attrib);
+ const GLubyte nr_components = attrib->Size;
+
+ if (attrib->Doubles) {
int lower_format;
if (nr_components < 2)
@@ -427,15 +356,11 @@ static void init_velement_lowered(const struct st_vertex_program *vp,
init_velement(&velements[idx], src_offset, PIPE_FORMAT_R32G32_UINT,
instance_divisor, vbo_index);
}
-
- idx++;
}
} else {
init_velement(&velements[idx], src_offset,
format, instance_divisor, vbo_index);
- idx++;
}
- *attr_idx = idx;
}
static void
@@ -457,274 +382,131 @@ set_vertex_attribs(struct st_context *st,
cso_set_vertex_elements(cso, num_velements, velements);
}
-/**
- * Set up for drawing interleaved arrays that all live in one VBO
- * or all live in user space.
- * \param vbuffer returns vertex buffer info
- * \param velements returns vertex element info
- */
-static void
-setup_interleaved_attribs(struct st_context *st,
- const struct st_vertex_program *vp,
- const struct gl_vertex_array *arrays,
- unsigned num_inputs)
-{
- struct pipe_vertex_buffer vbuffer;
- struct pipe_vertex_element velements[PIPE_MAX_ATTRIBS] = {{0}};
- GLuint attr;
- const GLubyte *low_addr = NULL;
- GLboolean usingVBO; /* all arrays in a VBO? */
- struct gl_buffer_object *bufobj;
- GLsizei stride;
-
- /* Find the lowest address of the arrays we're drawing,
- * Init bufobj and stride.
- */
- if (num_inputs) {
- const struct gl_vertex_array *array;
- const struct gl_vertex_buffer_binding *binding;
- const struct gl_array_attributes *attrib;
-
- array = get_client_array(arrays, vp->index_to_input[0]);
- assert(array);
-
- binding = array->BufferBinding;
- attrib = array->VertexAttrib;
-
- /* Since we're doing interleaved arrays, we know there'll be at most
- * one buffer object and the stride will be the same for all arrays.
- * Grab them now.
- */
- bufobj = binding->BufferObj;
- stride = binding->Stride;
-
- low_addr = _mesa_vertex_attrib_address(attrib, binding);
-
- for (attr = 1; attr < num_inputs; attr++) {
- const GLubyte *start;
- array = get_client_array(arrays, vp->index_to_input[attr]);
- if (!array)
- continue;
- binding = array->BufferBinding;
- attrib = array->VertexAttrib;
- start = _mesa_vertex_attrib_address(attrib, binding);
- low_addr = MIN2(low_addr, start);
- }
- }
- else {
- /* not sure we'll ever have zero inputs, but play it safe */
- bufobj = NULL;
- stride = 0;
- low_addr = 0;
- }
-
- /* are the arrays in user space? */
- usingVBO = _mesa_is_bufferobj(bufobj);
-
- for (attr = 0; attr < num_inputs;) {
- const struct gl_vertex_array *array;
- const struct gl_vertex_buffer_binding *binding;
- const struct gl_array_attributes *attrib;
- const GLubyte *ptr;
- unsigned src_offset;
- unsigned src_format;
-
- array = get_client_array(arrays, vp->index_to_input[attr]);
- assert(array);
-
- binding = array->BufferBinding;
- attrib = array->VertexAttrib;
- ptr = _mesa_vertex_attrib_address(attrib, binding);
-
- src_offset = (unsigned) (ptr - low_addr);
-
- src_format = st_pipe_vertex_format(attrib);
-
- init_velement_lowered(vp, velements, src_offset, src_format,
- binding->InstanceDivisor, 0,
- attrib->Size, attrib->Doubles, &attr);
- }
-
- /*
- * Return the vbuffer info and setup user-space attrib info, if needed.
- */
- if (num_inputs == 0) {
- /* just defensive coding here */
- vbuffer.buffer.resource = NULL;
- vbuffer.is_user_buffer = false;
- vbuffer.buffer_offset = 0;
- vbuffer.stride = 0;
- }
- else if (usingVBO) {
- /* all interleaved arrays in a VBO */
- struct st_buffer_object *stobj = st_buffer_object(bufobj);
-
- if (!stobj || !stobj->buffer) {
- st->vertex_array_out_of_memory = true;
- return; /* out-of-memory error probably */
- }
-
- vbuffer.buffer.resource = stobj->buffer;
- vbuffer.is_user_buffer = false;
- vbuffer.buffer_offset = pointer_to_offset(low_addr);
- vbuffer.stride = stride;
- }
- else {
- /* all interleaved arrays in user memory */
- vbuffer.buffer.user = low_addr;
- vbuffer.is_user_buffer = !!low_addr; /* if NULL, then unbind */
- vbuffer.buffer_offset = 0;
- vbuffer.stride = stride;
-
- if (low_addr)
- st->draw_needs_minmax_index = true;
- }
-
- set_vertex_attribs(st, &vbuffer, num_inputs ? 1 : 0,
- velements, num_inputs);
-}
-
-/**
- * Set up a separate pipe_vertex_buffer and pipe_vertex_element for each
- * vertex attribute.
- * \param vbuffer returns vertex buffer info
- * \param velements returns vertex element info
- */
-static void
-setup_non_interleaved_attribs(struct st_context *st,
- const struct st_vertex_program *vp,
- const struct gl_vertex_array *arrays,
- unsigned num_inputs)
+void st_update_array(struct st_context *st)
{
struct gl_context *ctx = st->ctx;
+ /* vertex program validation must be done before this */
+ const struct st_vertex_program *vp = st->vp;
+ /* _NEW_PROGRAM, ST_NEW_VS_STATE */
+ const GLbitfield inputs_read = st->vp_variant->vert_attrib_mask;
+ const struct gl_vertex_array_object *vao = ctx->Array._DrawVAO;
+ const ubyte *input_to_index = vp->input_to_index;
+
struct pipe_vertex_buffer vbuffer[PIPE_MAX_ATTRIBS];
- struct pipe_vertex_element velements[PIPE_MAX_ATTRIBS] = {{0}};
+ struct pipe_vertex_element velements[PIPE_MAX_ATTRIBS];
unsigned num_vbuffers = 0;
- unsigned unref_buffers = 0;
- GLuint attr;
-
- for (attr = 0; attr < num_inputs;) {
- const unsigned mesaAttr = vp->index_to_input[attr];
- const struct gl_vertex_array *array;
- const struct gl_vertex_buffer_binding *binding;
- const struct gl_array_attributes *attrib;
- struct gl_buffer_object *bufobj;
- GLsizei stride;
- unsigned src_format;
- unsigned bufidx;
-
- array = get_client_array(arrays, mesaAttr);
- assert(array);
-
- bufidx = num_vbuffers++;
-
- binding = array->BufferBinding;
- attrib = array->VertexAttrib;
- stride = binding->Stride;
- bufobj = binding->BufferObj;
-
- if (_mesa_is_bufferobj(bufobj)) {
- /* Attribute data is in a VBO.
- * Recall that for VBOs, the gl_vertex_array->Ptr field is
- * really an offset from the start of the VBO, not a pointer.
- */
- struct st_buffer_object *stobj = st_buffer_object(bufobj);
+ st->vertex_array_out_of_memory = FALSE;
+ st->draw_needs_minmax_index = false;
+
+ /* _NEW_PROGRAM */
+ /* ST_NEW_VERTEX_ARRAYS alias ctx->DriverFlags.NewArray */
+ /* Process attribute array data. */
+ GLbitfield mask = inputs_read & _mesa_draw_array_bits(ctx);
+ while (mask) {
+ /* The attribute index to start pulling a binding */
+ const gl_vert_attrib i = ffs(mask) - 1;
+ const struct gl_vertex_buffer_binding *const binding
+ = _mesa_draw_buffer_binding(vao, i);
+ const unsigned bufidx = num_vbuffers++;
+
+ if (_mesa_is_bufferobj(binding->BufferObj)) {
+ struct st_buffer_object *stobj = st_buffer_object(binding->BufferObj);
if (!stobj || !stobj->buffer) {
st->vertex_array_out_of_memory = true;
return; /* out-of-memory error probably */
}
+ /* Set the binding */
vbuffer[bufidx].buffer.resource = stobj->buffer;
vbuffer[bufidx].is_user_buffer = false;
- vbuffer[bufidx].buffer_offset =
- binding->Offset + attrib->RelativeOffset;
+ vbuffer[bufidx].buffer_offset = _mesa_draw_binding_offset(binding);
+ } else {
+ /* Set the binding */
+ const void *ptr = (const void *)_mesa_draw_binding_offset(binding);
+ vbuffer[bufidx].buffer.user = ptr;
+ vbuffer[bufidx].is_user_buffer = true;
+ vbuffer[bufidx].buffer_offset = 0;
+
+ if (!binding->InstanceDivisor)
+ st->draw_needs_minmax_index = true;
}
- else {
- if (stride == 0) {
- unsigned size = attrib->_ElementSize;
- /* This is optimal for GPU cache line usage if the upload size
- * is <= cache line size.
- */
- unsigned alignment = util_next_power_of_two(size);
-
- assert(attrib->Ptr);
- vbuffer[bufidx].buffer.user = attrib->Ptr;
- (void*)ctx->Current.Attrib[mesaAttr];
-
- vbuffer[bufidx].is_user_buffer = false;
- vbuffer[bufidx].buffer.resource = NULL;
-
- /* Use const_uploader for zero-stride vertex attributes, because
- * it may use a better memory placement than stream_uploader.
- * The reason is that zero-stride attributes can be fetched many
- * times (thousands of times), so a better placement is going to
- * perform better.
- *
- * Upload the maximum possible size, which is 4x GLdouble = 32.
- */
- u_upload_data(st->can_bind_const_buffer_as_vertex ?
- st->pipe->stream_uploader,
- 0, size, alignment, ptr,
- &vbuffer[bufidx].buffer_offset,
- &vbuffer[bufidx].buffer.resource);
- unref_buffers |= 1u << bufidx;
- } else {
- assert(attrib->Ptr);
- vbuffer[bufidx].buffer.user = attrib->Ptr;
- vbuffer[bufidx].is_user_buffer = true;
- vbuffer[bufidx].buffer_offset = 0;
-
- if (!binding->InstanceDivisor)
- st->draw_needs_minmax_index = true;
- }
+ vbuffer[bufidx].stride = binding->Stride; /* in bytes */
+
+ const GLbitfield boundmask = _mesa_draw_bound_attrib_bits(binding);
+ GLbitfield attrmask = mask & boundmask;
+ /* Mark the those attributes as processed */
+ mask &= ~boundmask;
+ /* We can assume that we have array for the binding */
+ assert(attrmask);
+ /* Walk attributes belonging to the binding */
+ while (attrmask) {
+ const gl_vert_attrib attr = u_bit_scan(&attrmask);
+ const struct gl_array_attributes *const attrib
+ = _mesa_draw_array_attrib(vao, attr);
+ const GLuint off = _mesa_draw_attributes_relative_offset(attrib);
+ init_velement_lowered(vp, velements, attrib, off,
+ binding->InstanceDivisor, bufidx,
+ input_to_index[attr]);
}
+ }
- /* common-case setup */
- vbuffer[bufidx].stride = stride; /* in bytes */
+ const unsigned first_current_vbuffer = num_vbuffers;
+ /* _NEW_PROGRAM | _NEW_CURRENT_ATTRIB */
+ /* Process values that should have better been uniforms in the application */
+ GLbitfield curmask = inputs_read & _mesa_draw_current_bits(ctx);
+ if (curmask) {
+ /* For each attribute, upload the maximum possible size. */
+ GLubyte data[VERT_ATTRIB_MAX*sizeof(GLdouble)*4];
+ GLubyte *cursor = data;
+ const unsigned bufidx = num_vbuffers++;
+ unsigned max_alignment = 1;
+
+ while (curmask) {
+ const gl_vert_attrib attr = u_bit_scan(&curmask);
+ const struct gl_array_attributes *const attrib
+ = _mesa_draw_current_attrib(ctx, attr);
+ const unsigned size = attrib->_ElementSize;
+ const unsigned alignment = util_next_power_of_two(size);
+ max_alignment = MAX2(max_alignment, alignment);
+ memcpy(cursor, attrib->Ptr, size);
+ if (alignment != size)
+ memset(cursor + size, 0, alignment - size);
+
+ init_velement_lowered(vp, velements, attrib, cursor - data, 0,
+ bufidx, input_to_index[attr]);
+
+ cursor += alignment;
+ }
- src_format = st_pipe_vertex_format(attrib);
+ vbuffer[bufidx].is_user_buffer = false;
+ vbuffer[bufidx].buffer.resource = NULL;
+ /* vbuffer[bufidx].buffer_offset is set below */
+ vbuffer[bufidx].stride = 0;
- init_velement_lowered(vp, velements, 0, src_format,
- binding->InstanceDivisor, bufidx,
- attrib->Size, attrib->Doubles, &attr);
+ /* Use const_uploader for zero-stride vertex attributes, because
+ * it may use a better memory placement than stream_uploader.
+ * The reason is that zero-stride attributes can be fetched many
+ * times (thousands of times), so a better placement is going to
+ * perform better.
+ */
+ u_upload_data(st->can_bind_const_buffer_as_vertex ?
+ st->pipe->stream_uploader,
+ 0, cursor - data, max_alignment, data,
+ &vbuffer[bufidx].buffer_offset,
+ &vbuffer[bufidx].buffer.resource);
}
if (!ctx->Const.AllowMappedBuffersDuringExecution) {
u_upload_unmap(st->pipe->stream_uploader);
}
+ const unsigned num_inputs = st->vp_variant->num_inputs;
set_vertex_attribs(st, vbuffer, num_vbuffers, velements, num_inputs);
/* Unreference uploaded zero-stride vertex buffers. */
- while (unref_buffers) {
- unsigned i = u_bit_scan(&unref_buffers);
+ for (unsigned i = first_current_vbuffer; i < num_vbuffers; ++i) {
pipe_resource_reference(&vbuffer[i].buffer.resource, NULL);
}
}
-
-void st_update_array(struct st_context *st)
-{
- struct gl_context *ctx = st->ctx;
- const struct gl_vertex_array *arrays = ctx->Array._DrawArrays;
- const struct st_vertex_program *vp;
- unsigned num_inputs;
-
- st->vertex_array_out_of_memory = FALSE;
- st->draw_needs_minmax_index = false;
-
- /* No drawing has been done yet, so do nothing. */
- if (!arrays)
- return;
-
- /* vertex program validation must be done before this */
- vp = st->vp;
- num_inputs = st->vp_variant->num_inputs;
-
- if (is_interleaved_arrays(vp, arrays, num_inputs))
- setup_interleaved_attribs(st, vp, arrays, num_inputs);
- else
- setup_non_interleaved_attribs(st, vp, arrays, num_inputs);
-}
--
2.14.3
_______________________________________________
mesa-dev mailing list
https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Rob Clark
2018-05-15 18:07:53 UTC
Permalink
Post by Rob Clark
Post by M***@gmx.net
Finally make use of the binding information in the VAO when
setting up arrays for draw.
v2: Emit less relocations also for interleaved userspace arrays.
This appears to break glmark2 (and, well, probably other things too)
Thread 1 "glmark2" received signal SIGTRAP, Trace/breakpoint trap.
0x0000ffffbefc1c78 in kill () from /lib64/libc.so.6
(gdb) bt
#0 0x0000ffffbefc1c78 in kill () from /lib64/libc.so.6
#1 0x0000ffffbde5ff90 in _debug_assert_fail (expr=0xffffbe6b9320
"desc", file=0xffffbe6b92f0
"../src/gallium/auxiliary/util/u_format.h", line=767,
function=0xffffbe6b9830 <__func__.7922>
"util_format_get_blocksizebits") at
../src/gallium/auxiliary/util/u_debug.c:322
#2 0x0000ffffbdec4384 in util_format_get_blocksizebits (format=1531)
at ../src/gallium/auxiliary/util/u_format.h:767
#3 0x0000ffffbdec43bc in util_format_get_blocksize (format=1531) at
../src/gallium/auxiliary/util/u_format.h:781
#4 0x0000ffffbdec62a4 in u_vbuf_create_vertex_elements (mgr=0x7073b0,
count=2, attribs=0xffffffffea20) at
../src/gallium/auxiliary/util/u_vbuf.c:761
#5 0x0000ffffbdec4da4 in u_vbuf_set_vertex_elements_internal
(mgr=0x7073b0, count=2, states=0xffffffffea20) at
../src/gallium/auxiliary/util/u_vbuf.c:346
#6 0x0000ffffbdec4e94 in u_vbuf_set_vertex_elements (mgr=0x7073b0,
count=2, states=0xffffffffea20) at
../src/gallium/auxiliary/util/u_vbuf.c:367
#7 0x0000ffffbde804dc in cso_set_vertex_elements (ctx=0x6e8190,
count=2, states=0xffffffffea20) at
../src/gallium/auxiliary/cso_cache/cso_context.c:1067
#8 0x0000ffffbe3881d0 in set_vertex_attribs (st=0x6e2f20,
vbuffers=0xffffffffeb20, num_vbuffers=0, velements=0xffffffffea20,
num_velements=2) at ../src/mesa/state_tracker/st_atom_array.c:382
#9 0x0000ffffbe388708 in st_update_array (st=0x6e2f20) at
../src/mesa/state_tracker/st_atom_array.c:507
#10 0x0000ffffbe334634 in st_validate_state (st=0x6e2f20,
pipeline=ST_PIPELINE_RENDER) at
../src/mesa/state_tracker/st_atom.c:261
#11 0x0000ffffbe2802ec in prepare_draw (st=0x6e2f20, ctx=0x6c74d0) at
../src/mesa/state_tracker/st_draw.c:123
#12 0x0000ffffbe280364 in st_draw_vbo (ctx=0x6c74d0,
prims=0xffffffffef18, nr_prims=1, ib=0x0, index_bounds_valid=1 '\001',
min_index=0, max_index=21515, tfb_vertcount=0x0, stream=0,
indirect=0x0) at ../src/mesa/state_tracker/st_draw.c:153
#13 0x0000ffffbe270c48 in vbo_draw_arrays (ctx=0x6c74d0, mode=4,
start=0, count=21516, numInstances=1, baseInstance=0, drawID=0) at
../src/mesa/vbo/vbo_exec_array.c:391
#14 0x0000ffffbe2715a4 in vbo_exec_DrawArrays (mode=4, start=0,
count=21516) at ../src/mesa/vbo/vbo_exec_array.c:550
#15 0x0000ffffbebccc74 in shared_dispatch_stub_310 (mode=4, first=0,
count=21516) at
/home/robclark/src/mesa/debug/src/mapi/shared-glapi/glapi_mapi_tmp.h:19545
#16 0x00000000004731f8 in Mesh::render_array
#17 0x000000000041ba98 in SceneBuild::draw (this=0x4e7da0) at
../src/scene-build.cpp:244
#18 0x0000000000410d00 in MainLoop::draw (this=0x58b420) at
../src/main-loop.cpp:133
../src/main-loop.cpp:108
#20 0x0000000000409498 in do_benchmark (canvas=...) at ../src/main.cpp:117
#21 0x00000000004071b0 in main (argc=<optimized out>, argv=<optimized
out>) at ../src/main.cpp:210
looks like it is getting some garbage value for 'format'..
fwiw, I guess I should mention this is w/ freedreno (a530).. not sure
if it is depending on some pipe cap somewhere, but good (before) and
bad (after) backtrace: https://hastebin.com/raw/xusudasuma

somehow in set_vertex_attribs() we get num_vbuffers=0, so we, I guess,
don't go thru either of the loops to populate velements (so it just
ends up garbage from the stack).. this patch is kinda large, still
trying to make sense of what was *supposed* to happen..

BR,
-R
Post by Rob Clark
BR,
-R
Post by M***@gmx.net
---
src/mesa/state_tracker/st_atom_array.c | 432 ++++++++-------------------------
1 file changed, 107 insertions(+), 325 deletions(-)
diff --git a/src/mesa/state_tracker/st_atom_array.c b/src/mesa/state_tracker/st_atom_array.c
index 2fd67e8d84..6b39b4186d 100644
--- a/src/mesa/state_tracker/st_atom_array.c
+++ b/src/mesa/state_tracker/st_atom_array.c
@@ -48,6 +48,7 @@
#include "main/bufferobj.h"
#include "main/glformats.h"
#include "main/varray.h"
+#include "main/arrayobj.h"
/* vertex_formats[gltype - GL_BYTE][integer*2 + normalized][size - 1] */
static const uint16_t vertex_formats[][4][4] = {
@@ -306,79 +307,6 @@ st_pipe_vertex_format(const struct gl_array_attributes *attrib)
return vertex_formats[type - GL_BYTE][index][size-1];
}
-static const struct gl_vertex_array *
-get_client_array(const struct gl_vertex_array *arrays,
- unsigned mesaAttr)
-{
- /* st_program uses 0xffffffff to denote a double placeholder attribute */
- if (mesaAttr == ST_DOUBLE_ATTRIB_PLACEHOLDER)
- return NULL;
- return &arrays[mesaAttr];
-}
-
-/**
- * Examine the active arrays to determine if we have interleaved
- * vertex arrays all living in one VBO, or all living in user space.
- */
-static GLboolean
-is_interleaved_arrays(const struct st_vertex_program *vp,
- const struct gl_vertex_array *arrays,
- unsigned num_inputs)
-{
- GLuint attr;
- const struct gl_buffer_object *firstBufObj = NULL;
- GLint firstStride = -1;
- const GLubyte *firstPtr = NULL;
- GLboolean userSpaceBuffer = GL_FALSE;
-
- for (attr = 0; attr < num_inputs; attr++) {
- const struct gl_vertex_array *array;
- const struct gl_vertex_buffer_binding *binding;
- const struct gl_array_attributes *attrib;
- const GLubyte *ptr;
- const struct gl_buffer_object *bufObj;
- GLsizei stride;
-
- array = get_client_array(arrays, vp->index_to_input[attr]);
- if (!array)
- continue;
-
- binding = array->BufferBinding;
- attrib = array->VertexAttrib;
- stride = binding->Stride; /* in bytes */
- ptr = _mesa_vertex_attrib_address(attrib, binding);
-
- /* To keep things simple, don't allow interleaved zero-stride attribs. */
- if (stride == 0)
- return false;
-
- bufObj = binding->BufferObj;
- if (attr == 0) {
- /* save info about the first array */
- firstStride = stride;
- firstPtr = ptr;
- firstBufObj = bufObj;
- userSpaceBuffer = !_mesa_is_bufferobj(bufObj);
- }
- else {
- /* check if other arrays interleave with the first, in same buffer */
- if (stride != firstStride)
- return GL_FALSE; /* strides don't match */
-
- if (bufObj != firstBufObj)
- return GL_FALSE; /* arrays in different VBOs */
-
- if (llabs(ptr - firstPtr) > firstStride)
- return GL_FALSE; /* arrays start too far apart */
-
- if ((!_mesa_is_bufferobj(bufObj)) != userSpaceBuffer)
- return GL_FALSE; /* mix of VBO and user-space arrays */
- }
- }
-
- return GL_TRUE;
-}
-
static void init_velement(struct pipe_vertex_element *velement,
int src_offset, int format,
int instance_divisor, int vbo_index)
@@ -392,13 +320,14 @@ static void init_velement(struct pipe_vertex_element *velement,
static void init_velement_lowered(const struct st_vertex_program *vp,
struct pipe_vertex_element *velements,
- int src_offset, int format,
- int instance_divisor, int vbo_index,
- int nr_components, GLboolean doubles,
- GLuint *attr_idx)
+ const struct gl_array_attributes *attrib,
+ int src_offset, int instance_divisor,
+ int vbo_index, int idx)
{
- int idx = *attr_idx;
- if (doubles) {
+ const unsigned format = st_pipe_vertex_format(attrib);
+ const GLubyte nr_components = attrib->Size;
+
+ if (attrib->Doubles) {
int lower_format;
if (nr_components < 2)
@@ -427,15 +356,11 @@ static void init_velement_lowered(const struct st_vertex_program *vp,
init_velement(&velements[idx], src_offset, PIPE_FORMAT_R32G32_UINT,
instance_divisor, vbo_index);
}
-
- idx++;
}
} else {
init_velement(&velements[idx], src_offset,
format, instance_divisor, vbo_index);
- idx++;
}
- *attr_idx = idx;
}
static void
@@ -457,274 +382,131 @@ set_vertex_attribs(struct st_context *st,
cso_set_vertex_elements(cso, num_velements, velements);
}
-/**
- * Set up for drawing interleaved arrays that all live in one VBO
- * or all live in user space.
- * \param vbuffer returns vertex buffer info
- * \param velements returns vertex element info
- */
-static void
-setup_interleaved_attribs(struct st_context *st,
- const struct st_vertex_program *vp,
- const struct gl_vertex_array *arrays,
- unsigned num_inputs)
-{
- struct pipe_vertex_buffer vbuffer;
- struct pipe_vertex_element velements[PIPE_MAX_ATTRIBS] = {{0}};
- GLuint attr;
- const GLubyte *low_addr = NULL;
- GLboolean usingVBO; /* all arrays in a VBO? */
- struct gl_buffer_object *bufobj;
- GLsizei stride;
-
- /* Find the lowest address of the arrays we're drawing,
- * Init bufobj and stride.
- */
- if (num_inputs) {
- const struct gl_vertex_array *array;
- const struct gl_vertex_buffer_binding *binding;
- const struct gl_array_attributes *attrib;
-
- array = get_client_array(arrays, vp->index_to_input[0]);
- assert(array);
-
- binding = array->BufferBinding;
- attrib = array->VertexAttrib;
-
- /* Since we're doing interleaved arrays, we know there'll be at most
- * one buffer object and the stride will be the same for all arrays.
- * Grab them now.
- */
- bufobj = binding->BufferObj;
- stride = binding->Stride;
-
- low_addr = _mesa_vertex_attrib_address(attrib, binding);
-
- for (attr = 1; attr < num_inputs; attr++) {
- const GLubyte *start;
- array = get_client_array(arrays, vp->index_to_input[attr]);
- if (!array)
- continue;
- binding = array->BufferBinding;
- attrib = array->VertexAttrib;
- start = _mesa_vertex_attrib_address(attrib, binding);
- low_addr = MIN2(low_addr, start);
- }
- }
- else {
- /* not sure we'll ever have zero inputs, but play it safe */
- bufobj = NULL;
- stride = 0;
- low_addr = 0;
- }
-
- /* are the arrays in user space? */
- usingVBO = _mesa_is_bufferobj(bufobj);
-
- for (attr = 0; attr < num_inputs;) {
- const struct gl_vertex_array *array;
- const struct gl_vertex_buffer_binding *binding;
- const struct gl_array_attributes *attrib;
- const GLubyte *ptr;
- unsigned src_offset;
- unsigned src_format;
-
- array = get_client_array(arrays, vp->index_to_input[attr]);
- assert(array);
-
- binding = array->BufferBinding;
- attrib = array->VertexAttrib;
- ptr = _mesa_vertex_attrib_address(attrib, binding);
-
- src_offset = (unsigned) (ptr - low_addr);
-
- src_format = st_pipe_vertex_format(attrib);
-
- init_velement_lowered(vp, velements, src_offset, src_format,
- binding->InstanceDivisor, 0,
- attrib->Size, attrib->Doubles, &attr);
- }
-
- /*
- * Return the vbuffer info and setup user-space attrib info, if needed.
- */
- if (num_inputs == 0) {
- /* just defensive coding here */
- vbuffer.buffer.resource = NULL;
- vbuffer.is_user_buffer = false;
- vbuffer.buffer_offset = 0;
- vbuffer.stride = 0;
- }
- else if (usingVBO) {
- /* all interleaved arrays in a VBO */
- struct st_buffer_object *stobj = st_buffer_object(bufobj);
-
- if (!stobj || !stobj->buffer) {
- st->vertex_array_out_of_memory = true;
- return; /* out-of-memory error probably */
- }
-
- vbuffer.buffer.resource = stobj->buffer;
- vbuffer.is_user_buffer = false;
- vbuffer.buffer_offset = pointer_to_offset(low_addr);
- vbuffer.stride = stride;
- }
- else {
- /* all interleaved arrays in user memory */
- vbuffer.buffer.user = low_addr;
- vbuffer.is_user_buffer = !!low_addr; /* if NULL, then unbind */
- vbuffer.buffer_offset = 0;
- vbuffer.stride = stride;
-
- if (low_addr)
- st->draw_needs_minmax_index = true;
- }
-
- set_vertex_attribs(st, &vbuffer, num_inputs ? 1 : 0,
- velements, num_inputs);
-}
-
-/**
- * Set up a separate pipe_vertex_buffer and pipe_vertex_element for each
- * vertex attribute.
- * \param vbuffer returns vertex buffer info
- * \param velements returns vertex element info
- */
-static void
-setup_non_interleaved_attribs(struct st_context *st,
- const struct st_vertex_program *vp,
- const struct gl_vertex_array *arrays,
- unsigned num_inputs)
+void st_update_array(struct st_context *st)
{
struct gl_context *ctx = st->ctx;
+ /* vertex program validation must be done before this */
+ const struct st_vertex_program *vp = st->vp;
+ /* _NEW_PROGRAM, ST_NEW_VS_STATE */
+ const GLbitfield inputs_read = st->vp_variant->vert_attrib_mask;
+ const struct gl_vertex_array_object *vao = ctx->Array._DrawVAO;
+ const ubyte *input_to_index = vp->input_to_index;
+
struct pipe_vertex_buffer vbuffer[PIPE_MAX_ATTRIBS];
- struct pipe_vertex_element velements[PIPE_MAX_ATTRIBS] = {{0}};
+ struct pipe_vertex_element velements[PIPE_MAX_ATTRIBS];
unsigned num_vbuffers = 0;
- unsigned unref_buffers = 0;
- GLuint attr;
-
- for (attr = 0; attr < num_inputs;) {
- const unsigned mesaAttr = vp->index_to_input[attr];
- const struct gl_vertex_array *array;
- const struct gl_vertex_buffer_binding *binding;
- const struct gl_array_attributes *attrib;
- struct gl_buffer_object *bufobj;
- GLsizei stride;
- unsigned src_format;
- unsigned bufidx;
-
- array = get_client_array(arrays, mesaAttr);
- assert(array);
-
- bufidx = num_vbuffers++;
-
- binding = array->BufferBinding;
- attrib = array->VertexAttrib;
- stride = binding->Stride;
- bufobj = binding->BufferObj;
-
- if (_mesa_is_bufferobj(bufobj)) {
- /* Attribute data is in a VBO.
- * Recall that for VBOs, the gl_vertex_array->Ptr field is
- * really an offset from the start of the VBO, not a pointer.
- */
- struct st_buffer_object *stobj = st_buffer_object(bufobj);
+ st->vertex_array_out_of_memory = FALSE;
+ st->draw_needs_minmax_index = false;
+
+ /* _NEW_PROGRAM */
+ /* ST_NEW_VERTEX_ARRAYS alias ctx->DriverFlags.NewArray */
+ /* Process attribute array data. */
+ GLbitfield mask = inputs_read & _mesa_draw_array_bits(ctx);
+ while (mask) {
+ /* The attribute index to start pulling a binding */
+ const gl_vert_attrib i = ffs(mask) - 1;
+ const struct gl_vertex_buffer_binding *const binding
+ = _mesa_draw_buffer_binding(vao, i);
+ const unsigned bufidx = num_vbuffers++;
+
+ if (_mesa_is_bufferobj(binding->BufferObj)) {
+ struct st_buffer_object *stobj = st_buffer_object(binding->BufferObj);
if (!stobj || !stobj->buffer) {
st->vertex_array_out_of_memory = true;
return; /* out-of-memory error probably */
}
+ /* Set the binding */
vbuffer[bufidx].buffer.resource = stobj->buffer;
vbuffer[bufidx].is_user_buffer = false;
- vbuffer[bufidx].buffer_offset =
- binding->Offset + attrib->RelativeOffset;
+ vbuffer[bufidx].buffer_offset = _mesa_draw_binding_offset(binding);
+ } else {
+ /* Set the binding */
+ const void *ptr = (const void *)_mesa_draw_binding_offset(binding);
+ vbuffer[bufidx].buffer.user = ptr;
+ vbuffer[bufidx].is_user_buffer = true;
+ vbuffer[bufidx].buffer_offset = 0;
+
+ if (!binding->InstanceDivisor)
+ st->draw_needs_minmax_index = true;
}
- else {
- if (stride == 0) {
- unsigned size = attrib->_ElementSize;
- /* This is optimal for GPU cache line usage if the upload size
- * is <= cache line size.
- */
- unsigned alignment = util_next_power_of_two(size);
-
- assert(attrib->Ptr);
- vbuffer[bufidx].buffer.user = attrib->Ptr;
- (void*)ctx->Current.Attrib[mesaAttr];
-
- vbuffer[bufidx].is_user_buffer = false;
- vbuffer[bufidx].buffer.resource = NULL;
-
- /* Use const_uploader for zero-stride vertex attributes, because
- * it may use a better memory placement than stream_uploader.
- * The reason is that zero-stride attributes can be fetched many
- * times (thousands of times), so a better placement is going to
- * perform better.
- *
- * Upload the maximum possible size, which is 4x GLdouble = 32.
- */
- u_upload_data(st->can_bind_const_buffer_as_vertex ?
- st->pipe->stream_uploader,
- 0, size, alignment, ptr,
- &vbuffer[bufidx].buffer_offset,
- &vbuffer[bufidx].buffer.resource);
- unref_buffers |= 1u << bufidx;
- } else {
- assert(attrib->Ptr);
- vbuffer[bufidx].buffer.user = attrib->Ptr;
- vbuffer[bufidx].is_user_buffer = true;
- vbuffer[bufidx].buffer_offset = 0;
-
- if (!binding->InstanceDivisor)
- st->draw_needs_minmax_index = true;
- }
+ vbuffer[bufidx].stride = binding->Stride; /* in bytes */
+
+ const GLbitfield boundmask = _mesa_draw_bound_attrib_bits(binding);
+ GLbitfield attrmask = mask & boundmask;
+ /* Mark the those attributes as processed */
+ mask &= ~boundmask;
+ /* We can assume that we have array for the binding */
+ assert(attrmask);
+ /* Walk attributes belonging to the binding */
+ while (attrmask) {
+ const gl_vert_attrib attr = u_bit_scan(&attrmask);
+ const struct gl_array_attributes *const attrib
+ = _mesa_draw_array_attrib(vao, attr);
+ const GLuint off = _mesa_draw_attributes_relative_offset(attrib);
+ init_velement_lowered(vp, velements, attrib, off,
+ binding->InstanceDivisor, bufidx,
+ input_to_index[attr]);
}
+ }
- /* common-case setup */
- vbuffer[bufidx].stride = stride; /* in bytes */
+ const unsigned first_current_vbuffer = num_vbuffers;
+ /* _NEW_PROGRAM | _NEW_CURRENT_ATTRIB */
+ /* Process values that should have better been uniforms in the application */
+ GLbitfield curmask = inputs_read & _mesa_draw_current_bits(ctx);
+ if (curmask) {
+ /* For each attribute, upload the maximum possible size. */
+ GLubyte data[VERT_ATTRIB_MAX*sizeof(GLdouble)*4];
+ GLubyte *cursor = data;
+ const unsigned bufidx = num_vbuffers++;
+ unsigned max_alignment = 1;
+
+ while (curmask) {
+ const gl_vert_attrib attr = u_bit_scan(&curmask);
+ const struct gl_array_attributes *const attrib
+ = _mesa_draw_current_attrib(ctx, attr);
+ const unsigned size = attrib->_ElementSize;
+ const unsigned alignment = util_next_power_of_two(size);
+ max_alignment = MAX2(max_alignment, alignment);
+ memcpy(cursor, attrib->Ptr, size);
+ if (alignment != size)
+ memset(cursor + size, 0, alignment - size);
+
+ init_velement_lowered(vp, velements, attrib, cursor - data, 0,
+ bufidx, input_to_index[attr]);
+
+ cursor += alignment;
+ }
- src_format = st_pipe_vertex_format(attrib);
+ vbuffer[bufidx].is_user_buffer = false;
+ vbuffer[bufidx].buffer.resource = NULL;
+ /* vbuffer[bufidx].buffer_offset is set below */
+ vbuffer[bufidx].stride = 0;
- init_velement_lowered(vp, velements, 0, src_format,
- binding->InstanceDivisor, bufidx,
- attrib->Size, attrib->Doubles, &attr);
+ /* Use const_uploader for zero-stride vertex attributes, because
+ * it may use a better memory placement than stream_uploader.
+ * The reason is that zero-stride attributes can be fetched many
+ * times (thousands of times), so a better placement is going to
+ * perform better.
+ */
+ u_upload_data(st->can_bind_const_buffer_as_vertex ?
+ st->pipe->stream_uploader,
+ 0, cursor - data, max_alignment, data,
+ &vbuffer[bufidx].buffer_offset,
+ &vbuffer[bufidx].buffer.resource);
}
if (!ctx->Const.AllowMappedBuffersDuringExecution) {
u_upload_unmap(st->pipe->stream_uploader);
}
+ const unsigned num_inputs = st->vp_variant->num_inputs;
set_vertex_attribs(st, vbuffer, num_vbuffers, velements, num_inputs);
/* Unreference uploaded zero-stride vertex buffers. */
- while (unref_buffers) {
- unsigned i = u_bit_scan(&unref_buffers);
+ for (unsigned i = first_current_vbuffer; i < num_vbuffers; ++i) {
pipe_resource_reference(&vbuffer[i].buffer.resource, NULL);
}
}
-
-void st_update_array(struct st_context *st)
-{
- struct gl_context *ctx = st->ctx;
- const struct gl_vertex_array *arrays = ctx->Array._DrawArrays;
- const struct st_vertex_program *vp;
- unsigned num_inputs;
-
- st->vertex_array_out_of_memory = FALSE;
- st->draw_needs_minmax_index = false;
-
- /* No drawing has been done yet, so do nothing. */
- if (!arrays)
- return;
-
- /* vertex program validation must be done before this */
- vp = st->vp;
- num_inputs = st->vp_variant->num_inputs;
-
- if (is_interleaved_arrays(vp, arrays, num_inputs))
- setup_interleaved_attribs(st, vp, arrays, num_inputs);
- else
- setup_non_interleaved_attribs(st, vp, arrays, num_inputs);
-}
--
2.14.3
_______________________________________________
mesa-dev mailing list
https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Rob Clark
2018-05-15 18:12:47 UTC
Permalink
Post by Rob Clark
Post by Rob Clark
Post by M***@gmx.net
Finally make use of the binding information in the VAO when
setting up arrays for draw.
v2: Emit less relocations also for interleaved userspace arrays.
This appears to break glmark2 (and, well, probably other things too)
Thread 1 "glmark2" received signal SIGTRAP, Trace/breakpoint trap.
0x0000ffffbefc1c78 in kill () from /lib64/libc.so.6
(gdb) bt
#0 0x0000ffffbefc1c78 in kill () from /lib64/libc.so.6
#1 0x0000ffffbde5ff90 in _debug_assert_fail (expr=0xffffbe6b9320
"desc", file=0xffffbe6b92f0
"../src/gallium/auxiliary/util/u_format.h", line=767,
function=0xffffbe6b9830 <__func__.7922>
"util_format_get_blocksizebits") at
../src/gallium/auxiliary/util/u_debug.c:322
#2 0x0000ffffbdec4384 in util_format_get_blocksizebits (format=1531)
at ../src/gallium/auxiliary/util/u_format.h:767
#3 0x0000ffffbdec43bc in util_format_get_blocksize (format=1531) at
../src/gallium/auxiliary/util/u_format.h:781
#4 0x0000ffffbdec62a4 in u_vbuf_create_vertex_elements (mgr=0x7073b0,
count=2, attribs=0xffffffffea20) at
../src/gallium/auxiliary/util/u_vbuf.c:761
#5 0x0000ffffbdec4da4 in u_vbuf_set_vertex_elements_internal
(mgr=0x7073b0, count=2, states=0xffffffffea20) at
../src/gallium/auxiliary/util/u_vbuf.c:346
#6 0x0000ffffbdec4e94 in u_vbuf_set_vertex_elements (mgr=0x7073b0,
count=2, states=0xffffffffea20) at
../src/gallium/auxiliary/util/u_vbuf.c:367
#7 0x0000ffffbde804dc in cso_set_vertex_elements (ctx=0x6e8190,
count=2, states=0xffffffffea20) at
../src/gallium/auxiliary/cso_cache/cso_context.c:1067
#8 0x0000ffffbe3881d0 in set_vertex_attribs (st=0x6e2f20,
vbuffers=0xffffffffeb20, num_vbuffers=0, velements=0xffffffffea20,
num_velements=2) at ../src/mesa/state_tracker/st_atom_array.c:382
#9 0x0000ffffbe388708 in st_update_array (st=0x6e2f20) at
../src/mesa/state_tracker/st_atom_array.c:507
#10 0x0000ffffbe334634 in st_validate_state (st=0x6e2f20,
pipeline=ST_PIPELINE_RENDER) at
../src/mesa/state_tracker/st_atom.c:261
#11 0x0000ffffbe2802ec in prepare_draw (st=0x6e2f20, ctx=0x6c74d0) at
../src/mesa/state_tracker/st_draw.c:123
#12 0x0000ffffbe280364 in st_draw_vbo (ctx=0x6c74d0,
prims=0xffffffffef18, nr_prims=1, ib=0x0, index_bounds_valid=1 '\001',
min_index=0, max_index=21515, tfb_vertcount=0x0, stream=0,
indirect=0x0) at ../src/mesa/state_tracker/st_draw.c:153
#13 0x0000ffffbe270c48 in vbo_draw_arrays (ctx=0x6c74d0, mode=4,
start=0, count=21516, numInstances=1, baseInstance=0, drawID=0) at
../src/mesa/vbo/vbo_exec_array.c:391
#14 0x0000ffffbe2715a4 in vbo_exec_DrawArrays (mode=4, start=0,
count=21516) at ../src/mesa/vbo/vbo_exec_array.c:550
#15 0x0000ffffbebccc74 in shared_dispatch_stub_310 (mode=4, first=0,
count=21516) at
/home/robclark/src/mesa/debug/src/mapi/shared-glapi/glapi_mapi_tmp.h:19545
#16 0x00000000004731f8 in Mesh::render_array
#17 0x000000000041ba98 in SceneBuild::draw (this=0x4e7da0) at
../src/scene-build.cpp:244
#18 0x0000000000410d00 in MainLoop::draw (this=0x58b420) at
../src/main-loop.cpp:133
../src/main-loop.cpp:108
#20 0x0000000000409498 in do_benchmark (canvas=...) at ../src/main.cpp:117
#21 0x00000000004071b0 in main (argc=<optimized out>, argv=<optimized
out>) at ../src/main.cpp:210
looks like it is getting some garbage value for 'format'..
fwiw, I guess I should mention this is w/ freedreno (a530).. not sure
if it is depending on some pipe cap somewhere, but good (before) and
bad (after) backtrace: https://hastebin.com/raw/xusudasuma
somehow in set_vertex_attribs() we get num_vbuffers=0, so we, I guess,
don't go thru either of the loops to populate velements (so it just
ends up garbage from the stack).. this patch is kinda large, still
trying to make sense of what was *supposed* to happen..
oh, ok, "st/mesa: Make the input_to_index array available." isn't
handling the nir case ;-)

BR,
-R
Post by Rob Clark
BR,
-R
Post by Rob Clark
BR,
-R
Post by M***@gmx.net
---
src/mesa/state_tracker/st_atom_array.c | 432 ++++++++-------------------------
1 file changed, 107 insertions(+), 325 deletions(-)
diff --git a/src/mesa/state_tracker/st_atom_array.c b/src/mesa/state_tracker/st_atom_array.c
index 2fd67e8d84..6b39b4186d 100644
--- a/src/mesa/state_tracker/st_atom_array.c
+++ b/src/mesa/state_tracker/st_atom_array.c
@@ -48,6 +48,7 @@
#include "main/bufferobj.h"
#include "main/glformats.h"
#include "main/varray.h"
+#include "main/arrayobj.h"
/* vertex_formats[gltype - GL_BYTE][integer*2 + normalized][size - 1] */
static const uint16_t vertex_formats[][4][4] = {
@@ -306,79 +307,6 @@ st_pipe_vertex_format(const struct gl_array_attributes *attrib)
return vertex_formats[type - GL_BYTE][index][size-1];
}
-static const struct gl_vertex_array *
-get_client_array(const struct gl_vertex_array *arrays,
- unsigned mesaAttr)
-{
- /* st_program uses 0xffffffff to denote a double placeholder attribute */
- if (mesaAttr == ST_DOUBLE_ATTRIB_PLACEHOLDER)
- return NULL;
- return &arrays[mesaAttr];
-}
-
-/**
- * Examine the active arrays to determine if we have interleaved
- * vertex arrays all living in one VBO, or all living in user space.
- */
-static GLboolean
-is_interleaved_arrays(const struct st_vertex_program *vp,
- const struct gl_vertex_array *arrays,
- unsigned num_inputs)
-{
- GLuint attr;
- const struct gl_buffer_object *firstBufObj = NULL;
- GLint firstStride = -1;
- const GLubyte *firstPtr = NULL;
- GLboolean userSpaceBuffer = GL_FALSE;
-
- for (attr = 0; attr < num_inputs; attr++) {
- const struct gl_vertex_array *array;
- const struct gl_vertex_buffer_binding *binding;
- const struct gl_array_attributes *attrib;
- const GLubyte *ptr;
- const struct gl_buffer_object *bufObj;
- GLsizei stride;
-
- array = get_client_array(arrays, vp->index_to_input[attr]);
- if (!array)
- continue;
-
- binding = array->BufferBinding;
- attrib = array->VertexAttrib;
- stride = binding->Stride; /* in bytes */
- ptr = _mesa_vertex_attrib_address(attrib, binding);
-
- /* To keep things simple, don't allow interleaved zero-stride attribs. */
- if (stride == 0)
- return false;
-
- bufObj = binding->BufferObj;
- if (attr == 0) {
- /* save info about the first array */
- firstStride = stride;
- firstPtr = ptr;
- firstBufObj = bufObj;
- userSpaceBuffer = !_mesa_is_bufferobj(bufObj);
- }
- else {
- /* check if other arrays interleave with the first, in same buffer */
- if (stride != firstStride)
- return GL_FALSE; /* strides don't match */
-
- if (bufObj != firstBufObj)
- return GL_FALSE; /* arrays in different VBOs */
-
- if (llabs(ptr - firstPtr) > firstStride)
- return GL_FALSE; /* arrays start too far apart */
-
- if ((!_mesa_is_bufferobj(bufObj)) != userSpaceBuffer)
- return GL_FALSE; /* mix of VBO and user-space arrays */
- }
- }
-
- return GL_TRUE;
-}
-
static void init_velement(struct pipe_vertex_element *velement,
int src_offset, int format,
int instance_divisor, int vbo_index)
@@ -392,13 +320,14 @@ static void init_velement(struct pipe_vertex_element *velement,
static void init_velement_lowered(const struct st_vertex_program *vp,
struct pipe_vertex_element *velements,
- int src_offset, int format,
- int instance_divisor, int vbo_index,
- int nr_components, GLboolean doubles,
- GLuint *attr_idx)
+ const struct gl_array_attributes *attrib,
+ int src_offset, int instance_divisor,
+ int vbo_index, int idx)
{
- int idx = *attr_idx;
- if (doubles) {
+ const unsigned format = st_pipe_vertex_format(attrib);
+ const GLubyte nr_components = attrib->Size;
+
+ if (attrib->Doubles) {
int lower_format;
if (nr_components < 2)
@@ -427,15 +356,11 @@ static void init_velement_lowered(const struct st_vertex_program *vp,
init_velement(&velements[idx], src_offset, PIPE_FORMAT_R32G32_UINT,
instance_divisor, vbo_index);
}
-
- idx++;
}
} else {
init_velement(&velements[idx], src_offset,
format, instance_divisor, vbo_index);
- idx++;
}
- *attr_idx = idx;
}
static void
@@ -457,274 +382,131 @@ set_vertex_attribs(struct st_context *st,
cso_set_vertex_elements(cso, num_velements, velements);
}
-/**
- * Set up for drawing interleaved arrays that all live in one VBO
- * or all live in user space.
- * \param vbuffer returns vertex buffer info
- * \param velements returns vertex element info
- */
-static void
-setup_interleaved_attribs(struct st_context *st,
- const struct st_vertex_program *vp,
- const struct gl_vertex_array *arrays,
- unsigned num_inputs)
-{
- struct pipe_vertex_buffer vbuffer;
- struct pipe_vertex_element velements[PIPE_MAX_ATTRIBS] = {{0}};
- GLuint attr;
- const GLubyte *low_addr = NULL;
- GLboolean usingVBO; /* all arrays in a VBO? */
- struct gl_buffer_object *bufobj;
- GLsizei stride;
-
- /* Find the lowest address of the arrays we're drawing,
- * Init bufobj and stride.
- */
- if (num_inputs) {
- const struct gl_vertex_array *array;
- const struct gl_vertex_buffer_binding *binding;
- const struct gl_array_attributes *attrib;
-
- array = get_client_array(arrays, vp->index_to_input[0]);
- assert(array);
-
- binding = array->BufferBinding;
- attrib = array->VertexAttrib;
-
- /* Since we're doing interleaved arrays, we know there'll be at most
- * one buffer object and the stride will be the same for all arrays.
- * Grab them now.
- */
- bufobj = binding->BufferObj;
- stride = binding->Stride;
-
- low_addr = _mesa_vertex_attrib_address(attrib, binding);
-
- for (attr = 1; attr < num_inputs; attr++) {
- const GLubyte *start;
- array = get_client_array(arrays, vp->index_to_input[attr]);
- if (!array)
- continue;
- binding = array->BufferBinding;
- attrib = array->VertexAttrib;
- start = _mesa_vertex_attrib_address(attrib, binding);
- low_addr = MIN2(low_addr, start);
- }
- }
- else {
- /* not sure we'll ever have zero inputs, but play it safe */
- bufobj = NULL;
- stride = 0;
- low_addr = 0;
- }
-
- /* are the arrays in user space? */
- usingVBO = _mesa_is_bufferobj(bufobj);
-
- for (attr = 0; attr < num_inputs;) {
- const struct gl_vertex_array *array;
- const struct gl_vertex_buffer_binding *binding;
- const struct gl_array_attributes *attrib;
- const GLubyte *ptr;
- unsigned src_offset;
- unsigned src_format;
-
- array = get_client_array(arrays, vp->index_to_input[attr]);
- assert(array);
-
- binding = array->BufferBinding;
- attrib = array->VertexAttrib;
- ptr = _mesa_vertex_attrib_address(attrib, binding);
-
- src_offset = (unsigned) (ptr - low_addr);
-
- src_format = st_pipe_vertex_format(attrib);
-
- init_velement_lowered(vp, velements, src_offset, src_format,
- binding->InstanceDivisor, 0,
- attrib->Size, attrib->Doubles, &attr);
- }
-
- /*
- * Return the vbuffer info and setup user-space attrib info, if needed.
- */
- if (num_inputs == 0) {
- /* just defensive coding here */
- vbuffer.buffer.resource = NULL;
- vbuffer.is_user_buffer = false;
- vbuffer.buffer_offset = 0;
- vbuffer.stride = 0;
- }
- else if (usingVBO) {
- /* all interleaved arrays in a VBO */
- struct st_buffer_object *stobj = st_buffer_object(bufobj);
-
- if (!stobj || !stobj->buffer) {
- st->vertex_array_out_of_memory = true;
- return; /* out-of-memory error probably */
- }
-
- vbuffer.buffer.resource = stobj->buffer;
- vbuffer.is_user_buffer = false;
- vbuffer.buffer_offset = pointer_to_offset(low_addr);
- vbuffer.stride = stride;
- }
- else {
- /* all interleaved arrays in user memory */
- vbuffer.buffer.user = low_addr;
- vbuffer.is_user_buffer = !!low_addr; /* if NULL, then unbind */
- vbuffer.buffer_offset = 0;
- vbuffer.stride = stride;
-
- if (low_addr)
- st->draw_needs_minmax_index = true;
- }
-
- set_vertex_attribs(st, &vbuffer, num_inputs ? 1 : 0,
- velements, num_inputs);
-}
-
-/**
- * Set up a separate pipe_vertex_buffer and pipe_vertex_element for each
- * vertex attribute.
- * \param vbuffer returns vertex buffer info
- * \param velements returns vertex element info
- */
-static void
-setup_non_interleaved_attribs(struct st_context *st,
- const struct st_vertex_program *vp,
- const struct gl_vertex_array *arrays,
- unsigned num_inputs)
+void st_update_array(struct st_context *st)
{
struct gl_context *ctx = st->ctx;
+ /* vertex program validation must be done before this */
+ const struct st_vertex_program *vp = st->vp;
+ /* _NEW_PROGRAM, ST_NEW_VS_STATE */
+ const GLbitfield inputs_read = st->vp_variant->vert_attrib_mask;
+ const struct gl_vertex_array_object *vao = ctx->Array._DrawVAO;
+ const ubyte *input_to_index = vp->input_to_index;
+
struct pipe_vertex_buffer vbuffer[PIPE_MAX_ATTRIBS];
- struct pipe_vertex_element velements[PIPE_MAX_ATTRIBS] = {{0}};
+ struct pipe_vertex_element velements[PIPE_MAX_ATTRIBS];
unsigned num_vbuffers = 0;
- unsigned unref_buffers = 0;
- GLuint attr;
-
- for (attr = 0; attr < num_inputs;) {
- const unsigned mesaAttr = vp->index_to_input[attr];
- const struct gl_vertex_array *array;
- const struct gl_vertex_buffer_binding *binding;
- const struct gl_array_attributes *attrib;
- struct gl_buffer_object *bufobj;
- GLsizei stride;
- unsigned src_format;
- unsigned bufidx;
-
- array = get_client_array(arrays, mesaAttr);
- assert(array);
-
- bufidx = num_vbuffers++;
-
- binding = array->BufferBinding;
- attrib = array->VertexAttrib;
- stride = binding->Stride;
- bufobj = binding->BufferObj;
-
- if (_mesa_is_bufferobj(bufobj)) {
- /* Attribute data is in a VBO.
- * Recall that for VBOs, the gl_vertex_array->Ptr field is
- * really an offset from the start of the VBO, not a pointer.
- */
- struct st_buffer_object *stobj = st_buffer_object(bufobj);
+ st->vertex_array_out_of_memory = FALSE;
+ st->draw_needs_minmax_index = false;
+
+ /* _NEW_PROGRAM */
+ /* ST_NEW_VERTEX_ARRAYS alias ctx->DriverFlags.NewArray */
+ /* Process attribute array data. */
+ GLbitfield mask = inputs_read & _mesa_draw_array_bits(ctx);
+ while (mask) {
+ /* The attribute index to start pulling a binding */
+ const gl_vert_attrib i = ffs(mask) - 1;
+ const struct gl_vertex_buffer_binding *const binding
+ = _mesa_draw_buffer_binding(vao, i);
+ const unsigned bufidx = num_vbuffers++;
+
+ if (_mesa_is_bufferobj(binding->BufferObj)) {
+ struct st_buffer_object *stobj = st_buffer_object(binding->BufferObj);
if (!stobj || !stobj->buffer) {
st->vertex_array_out_of_memory = true;
return; /* out-of-memory error probably */
}
+ /* Set the binding */
vbuffer[bufidx].buffer.resource = stobj->buffer;
vbuffer[bufidx].is_user_buffer = false;
- vbuffer[bufidx].buffer_offset =
- binding->Offset + attrib->RelativeOffset;
+ vbuffer[bufidx].buffer_offset = _mesa_draw_binding_offset(binding);
+ } else {
+ /* Set the binding */
+ const void *ptr = (const void *)_mesa_draw_binding_offset(binding);
+ vbuffer[bufidx].buffer.user = ptr;
+ vbuffer[bufidx].is_user_buffer = true;
+ vbuffer[bufidx].buffer_offset = 0;
+
+ if (!binding->InstanceDivisor)
+ st->draw_needs_minmax_index = true;
}
- else {
- if (stride == 0) {
- unsigned size = attrib->_ElementSize;
- /* This is optimal for GPU cache line usage if the upload size
- * is <= cache line size.
- */
- unsigned alignment = util_next_power_of_two(size);
-
- assert(attrib->Ptr);
- vbuffer[bufidx].buffer.user = attrib->Ptr;
- (void*)ctx->Current.Attrib[mesaAttr];
-
- vbuffer[bufidx].is_user_buffer = false;
- vbuffer[bufidx].buffer.resource = NULL;
-
- /* Use const_uploader for zero-stride vertex attributes, because
- * it may use a better memory placement than stream_uploader.
- * The reason is that zero-stride attributes can be fetched many
- * times (thousands of times), so a better placement is going to
- * perform better.
- *
- * Upload the maximum possible size, which is 4x GLdouble = 32.
- */
- u_upload_data(st->can_bind_const_buffer_as_vertex ?
- st->pipe->stream_uploader,
- 0, size, alignment, ptr,
- &vbuffer[bufidx].buffer_offset,
- &vbuffer[bufidx].buffer.resource);
- unref_buffers |= 1u << bufidx;
- } else {
- assert(attrib->Ptr);
- vbuffer[bufidx].buffer.user = attrib->Ptr;
- vbuffer[bufidx].is_user_buffer = true;
- vbuffer[bufidx].buffer_offset = 0;
-
- if (!binding->InstanceDivisor)
- st->draw_needs_minmax_index = true;
- }
+ vbuffer[bufidx].stride = binding->Stride; /* in bytes */
+
+ const GLbitfield boundmask = _mesa_draw_bound_attrib_bits(binding);
+ GLbitfield attrmask = mask & boundmask;
+ /* Mark the those attributes as processed */
+ mask &= ~boundmask;
+ /* We can assume that we have array for the binding */
+ assert(attrmask);
+ /* Walk attributes belonging to the binding */
+ while (attrmask) {
+ const gl_vert_attrib attr = u_bit_scan(&attrmask);
+ const struct gl_array_attributes *const attrib
+ = _mesa_draw_array_attrib(vao, attr);
+ const GLuint off = _mesa_draw_attributes_relative_offset(attrib);
+ init_velement_lowered(vp, velements, attrib, off,
+ binding->InstanceDivisor, bufidx,
+ input_to_index[attr]);
}
+ }
- /* common-case setup */
- vbuffer[bufidx].stride = stride; /* in bytes */
+ const unsigned first_current_vbuffer = num_vbuffers;
+ /* _NEW_PROGRAM | _NEW_CURRENT_ATTRIB */
+ /* Process values that should have better been uniforms in the application */
+ GLbitfield curmask = inputs_read & _mesa_draw_current_bits(ctx);
+ if (curmask) {
+ /* For each attribute, upload the maximum possible size. */
+ GLubyte data[VERT_ATTRIB_MAX*sizeof(GLdouble)*4];
+ GLubyte *cursor = data;
+ const unsigned bufidx = num_vbuffers++;
+ unsigned max_alignment = 1;
+
+ while (curmask) {
+ const gl_vert_attrib attr = u_bit_scan(&curmask);
+ const struct gl_array_attributes *const attrib
+ = _mesa_draw_current_attrib(ctx, attr);
+ const unsigned size = attrib->_ElementSize;
+ const unsigned alignment = util_next_power_of_two(size);
+ max_alignment = MAX2(max_alignment, alignment);
+ memcpy(cursor, attrib->Ptr, size);
+ if (alignment != size)
+ memset(cursor + size, 0, alignment - size);
+
+ init_velement_lowered(vp, velements, attrib, cursor - data, 0,
+ bufidx, input_to_index[attr]);
+
+ cursor += alignment;
+ }
- src_format = st_pipe_vertex_format(attrib);
+ vbuffer[bufidx].is_user_buffer = false;
+ vbuffer[bufidx].buffer.resource = NULL;
+ /* vbuffer[bufidx].buffer_offset is set below */
+ vbuffer[bufidx].stride = 0;
- init_velement_lowered(vp, velements, 0, src_format,
- binding->InstanceDivisor, bufidx,
- attrib->Size, attrib->Doubles, &attr);
+ /* Use const_uploader for zero-stride vertex attributes, because
+ * it may use a better memory placement than stream_uploader.
+ * The reason is that zero-stride attributes can be fetched many
+ * times (thousands of times), so a better placement is going to
+ * perform better.
+ */
+ u_upload_data(st->can_bind_const_buffer_as_vertex ?
+ st->pipe->stream_uploader,
+ 0, cursor - data, max_alignment, data,
+ &vbuffer[bufidx].buffer_offset,
+ &vbuffer[bufidx].buffer.resource);
}
if (!ctx->Const.AllowMappedBuffersDuringExecution) {
u_upload_unmap(st->pipe->stream_uploader);
}
+ const unsigned num_inputs = st->vp_variant->num_inputs;
set_vertex_attribs(st, vbuffer, num_vbuffers, velements, num_inputs);
/* Unreference uploaded zero-stride vertex buffers. */
- while (unref_buffers) {
- unsigned i = u_bit_scan(&unref_buffers);
+ for (unsigned i = first_current_vbuffer; i < num_vbuffers; ++i) {
pipe_resource_reference(&vbuffer[i].buffer.resource, NULL);
}
}
-
-void st_update_array(struct st_context *st)
-{
- struct gl_context *ctx = st->ctx;
- const struct gl_vertex_array *arrays = ctx->Array._DrawArrays;
- const struct st_vertex_program *vp;
- unsigned num_inputs;
-
- st->vertex_array_out_of_memory = FALSE;
- st->draw_needs_minmax_index = false;
-
- /* No drawing has been done yet, so do nothing. */
- if (!arrays)
- return;
-
- /* vertex program validation must be done before this */
- vp = st->vp;
- num_inputs = st->vp_variant->num_inputs;
-
- if (is_interleaved_arrays(vp, arrays, num_inputs))
- setup_interleaved_attribs(st, vp, arrays, num_inputs);
- else
- setup_non_interleaved_attribs(st, vp, arrays, num_inputs);
-}
--
2.14.3
_______________________________________________
mesa-dev mailing list
https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Mathias Fröhlich
2018-05-15 18:33:38 UTC
Permalink
Hi,
Post by Rob Clark
oh, ok, "st/mesa: Make the input_to_index array available." isn't
handling the nir case ;-)
Uh, ok.

So, you may want to look at something similar to

commit 9987a072cbc00306eb4d34409f6325ae29728a2f
Author: Mathias Fröhlich <***@web.de>
Date: Sun Apr 1 20:18:36 2018 +0200

st/mesa: Make the input_to_index array available.

for the nir case.
Or how can I switch on NIR for radeon to reproduce that here in a fast way?

best

Mathias
Rob Clark
2018-05-15 18:36:19 UTC
Permalink
On Tue, May 15, 2018 at 2:33 PM, Mathias Fröhlich
Post by Mathias Fröhlich
Hi,
Post by Rob Clark
oh, ok, "st/mesa: Make the input_to_index array available." isn't
handling the nir case ;-)
Uh, ok.
So, you may want to look at something similar to
commit 9987a072cbc00306eb4d34409f6325ae29728a2f
Date: Sun Apr 1 20:18:36 2018 +0200
st/mesa: Make the input_to_index array available.
for the nir case.
Or how can I switch on NIR for radeon to reproduce that here in a fast way?
no problem, I just sent a fix, but looks like R600_DEBUG=nir should
enable nir in radeonsi

BR,
-R
Post by Mathias Fröhlich
best
Mathias
Mathias Fröhlich
2018-05-15 18:39:19 UTC
Permalink
Post by Rob Clark
On Tue, May 15, 2018 at 2:33 PM, Mathias Fröhlich
Post by Mathias Fröhlich
Hi,
Post by Rob Clark
oh, ok, "st/mesa: Make the input_to_index array available." isn't
handling the nir case ;-)
Uh, ok.
So, you may want to look at something similar to
commit 9987a072cbc00306eb4d34409f6325ae29728a2f
Date: Sun Apr 1 20:18:36 2018 +0200
st/mesa: Make the input_to_index array available.
for the nir case.
Or how can I switch on NIR for radeon to reproduce that here in a fast way?
no problem, I just sent a fix, but looks like R600_DEBUG=nir should
enable nir in radeonsi
I remember that I put 'check for NIR' onto the TODO list but that felt between
the cracks when heading for vacation and picking up the patch past that.

I will check with the environment variable.

best
Mathias
Erik Faye-Lund
2018-11-26 18:39:50 UTC
Permalink
Post by M***@gmx.net
Finally make use of the binding information in the VAO when
setting up arrays for draw.
v2: Emit less relocations also for interleaved userspace arrays.
I know this is *very* late notice, but this commit broke Super Tux Kart
on VirGL. Both the player-models as as well as the level data renders
with gibberish vertex positions since this commit.

The fix that Rob Clark did on top does not fix the problem (and
shouldn't have; VirGL doesn't use NIR).
Post by M***@gmx.net
---
src/mesa/state_tracker/st_atom_array.c | 432 ++++++++---------------
----------
1 file changed, 107 insertions(+), 325 deletions(-)
diff --git a/src/mesa/state_tracker/st_atom_array.c
b/src/mesa/state_tracker/st_atom_array.c
index 2fd67e8d84..6b39b4186d 100644
--- a/src/mesa/state_tracker/st_atom_array.c
+++ b/src/mesa/state_tracker/st_atom_array.c
@@ -48,6 +48,7 @@
#include "main/bufferobj.h"
#include "main/glformats.h"
#include "main/varray.h"
+#include "main/arrayobj.h"
/* vertex_formats[gltype - GL_BYTE][integer*2 + normalized][size - 1] */
static const uint16_t vertex_formats[][4][4] = {
@@ -306,79 +307,6 @@ st_pipe_vertex_format(const struct
gl_array_attributes *attrib)
return vertex_formats[type - GL_BYTE][index][size-1];
}
-static const struct gl_vertex_array *
-get_client_array(const struct gl_vertex_array *arrays,
- unsigned mesaAttr)
-{
- /* st_program uses 0xffffffff to denote a double placeholder attribute */
- if (mesaAttr == ST_DOUBLE_ATTRIB_PLACEHOLDER)
- return NULL;
- return &arrays[mesaAttr];
-}
-
-/**
- * Examine the active arrays to determine if we have interleaved
- * vertex arrays all living in one VBO, or all living in user space.
- */
-static GLboolean
-is_interleaved_arrays(const struct st_vertex_program *vp,
- const struct gl_vertex_array *arrays,
- unsigned num_inputs)
-{
- GLuint attr;
- const struct gl_buffer_object *firstBufObj = NULL;
- GLint firstStride = -1;
- const GLubyte *firstPtr = NULL;
- GLboolean userSpaceBuffer = GL_FALSE;
-
- for (attr = 0; attr < num_inputs; attr++) {
- const struct gl_vertex_array *array;
- const struct gl_vertex_buffer_binding *binding;
- const struct gl_array_attributes *attrib;
- const GLubyte *ptr;
- const struct gl_buffer_object *bufObj;
- GLsizei stride;
-
- array = get_client_array(arrays, vp->index_to_input[attr]);
- if (!array)
- continue;
-
- binding = array->BufferBinding;
- attrib = array->VertexAttrib;
- stride = binding->Stride; /* in bytes */
- ptr = _mesa_vertex_attrib_address(attrib, binding);
-
- /* To keep things simple, don't allow interleaved zero-stride attribs. */
- if (stride == 0)
- return false;
-
- bufObj = binding->BufferObj;
- if (attr == 0) {
- /* save info about the first array */
- firstStride = stride;
- firstPtr = ptr;
- firstBufObj = bufObj;
- userSpaceBuffer = !_mesa_is_bufferobj(bufObj);
- }
- else {
- /* check if other arrays interleave with the first, in same buffer */
- if (stride != firstStride)
- return GL_FALSE; /* strides don't match */
-
- if (bufObj != firstBufObj)
- return GL_FALSE; /* arrays in different VBOs */
-
- if (llabs(ptr - firstPtr) > firstStride)
- return GL_FALSE; /* arrays start too far apart */
-
- if ((!_mesa_is_bufferobj(bufObj)) != userSpaceBuffer)
- return GL_FALSE; /* mix of VBO and user-space arrays */
- }
- }
-
- return GL_TRUE;
-}
-
static void init_velement(struct pipe_vertex_element *velement,
int src_offset, int format,
int instance_divisor, int vbo_index)
@@ -392,13 +320,14 @@ static void init_velement(struct
pipe_vertex_element *velement,
static void init_velement_lowered(const struct st_vertex_program *vp,
struct pipe_vertex_element
*velements,
- int src_offset, int format,
- int instance_divisor, int
vbo_index,
- int nr_components, GLboolean doubles,
- GLuint *attr_idx)
+ const struct gl_array_attributes *attrib,
+ int src_offset, int
instance_divisor,
+ int vbo_index, int idx)
{
- int idx = *attr_idx;
- if (doubles) {
+ const unsigned format = st_pipe_vertex_format(attrib);
+ const GLubyte nr_components = attrib->Size;
+
+ if (attrib->Doubles) {
int lower_format;
if (nr_components < 2)
@@ -427,15 +356,11 @@ static void init_velement_lowered(const struct st_vertex_program *vp,
init_velement(&velements[idx], src_offset,
PIPE_FORMAT_R32G32_UINT,
instance_divisor, vbo_index);
}
-
- idx++;
}
} else {
init_velement(&velements[idx], src_offset,
format, instance_divisor, vbo_index);
- idx++;
}
- *attr_idx = idx;
}
static void
@@ -457,274 +382,131 @@ set_vertex_attribs(struct st_context *st,
cso_set_vertex_elements(cso, num_velements, velements);
}
-/**
- * Set up for drawing interleaved arrays that all live in one VBO
- * or all live in user space.
- * \param vbuffer returns vertex buffer info
- * \param velements returns vertex element info
- */
-static void
-setup_interleaved_attribs(struct st_context *st,
- const struct st_vertex_program *vp,
- const struct gl_vertex_array *arrays,
- unsigned num_inputs)
-{
- struct pipe_vertex_buffer vbuffer;
- struct pipe_vertex_element velements[PIPE_MAX_ATTRIBS] = {{0}};
- GLuint attr;
- const GLubyte *low_addr = NULL;
- GLboolean usingVBO; /* all arrays in a VBO? */
- struct gl_buffer_object *bufobj;
- GLsizei stride;
-
- /* Find the lowest address of the arrays we're drawing,
- * Init bufobj and stride.
- */
- if (num_inputs) {
- const struct gl_vertex_array *array;
- const struct gl_vertex_buffer_binding *binding;
- const struct gl_array_attributes *attrib;
-
- array = get_client_array(arrays, vp->index_to_input[0]);
- assert(array);
-
- binding = array->BufferBinding;
- attrib = array->VertexAttrib;
-
- /* Since we're doing interleaved arrays, we know there'll be at most
- * one buffer object and the stride will be the same for all arrays.
- * Grab them now.
- */
- bufobj = binding->BufferObj;
- stride = binding->Stride;
-
- low_addr = _mesa_vertex_attrib_address(attrib, binding);
-
- for (attr = 1; attr < num_inputs; attr++) {
- const GLubyte *start;
- array = get_client_array(arrays, vp->index_to_input[attr]);
- if (!array)
- continue;
- binding = array->BufferBinding;
- attrib = array->VertexAttrib;
- start = _mesa_vertex_attrib_address(attrib, binding);
- low_addr = MIN2(low_addr, start);
- }
- }
- else {
- /* not sure we'll ever have zero inputs, but play it safe */
- bufobj = NULL;
- stride = 0;
- low_addr = 0;
- }
-
- /* are the arrays in user space? */
- usingVBO = _mesa_is_bufferobj(bufobj);
-
- for (attr = 0; attr < num_inputs;) {
- const struct gl_vertex_array *array;
- const struct gl_vertex_buffer_binding *binding;
- const struct gl_array_attributes *attrib;
- const GLubyte *ptr;
- unsigned src_offset;
- unsigned src_format;
-
- array = get_client_array(arrays, vp->index_to_input[attr]);
- assert(array);
-
- binding = array->BufferBinding;
- attrib = array->VertexAttrib;
- ptr = _mesa_vertex_attrib_address(attrib, binding);
-
- src_offset = (unsigned) (ptr - low_addr);
-
- src_format = st_pipe_vertex_format(attrib);
-
- init_velement_lowered(vp, velements, src_offset, src_format,
- binding->InstanceDivisor, 0,
- attrib->Size, attrib->Doubles, &attr);
- }
-
- /*
- * Return the vbuffer info and setup user-space attrib info, if needed.
- */
- if (num_inputs == 0) {
- /* just defensive coding here */
- vbuffer.buffer.resource = NULL;
- vbuffer.is_user_buffer = false;
- vbuffer.buffer_offset = 0;
- vbuffer.stride = 0;
- }
- else if (usingVBO) {
- /* all interleaved arrays in a VBO */
- struct st_buffer_object *stobj = st_buffer_object(bufobj);
-
- if (!stobj || !stobj->buffer) {
- st->vertex_array_out_of_memory = true;
- return; /* out-of-memory error probably */
- }
-
- vbuffer.buffer.resource = stobj->buffer;
- vbuffer.is_user_buffer = false;
- vbuffer.buffer_offset = pointer_to_offset(low_addr);
- vbuffer.stride = stride;
- }
- else {
- /* all interleaved arrays in user memory */
- vbuffer.buffer.user = low_addr;
- vbuffer.is_user_buffer = !!low_addr; /* if NULL, then unbind */
- vbuffer.buffer_offset = 0;
- vbuffer.stride = stride;
-
- if (low_addr)
- st->draw_needs_minmax_index = true;
- }
-
- set_vertex_attribs(st, &vbuffer, num_inputs ? 1 : 0,
- velements, num_inputs);
-}
-
-/**
- * Set up a separate pipe_vertex_buffer and pipe_vertex_element for each
- * vertex attribute.
- * \param vbuffer returns vertex buffer info
- * \param velements returns vertex element info
- */
-static void
-setup_non_interleaved_attribs(struct st_context *st,
- const struct st_vertex_program *vp,
- const struct gl_vertex_array *arrays,
- unsigned num_inputs)
+void st_update_array(struct st_context *st)
{
struct gl_context *ctx = st->ctx;
+ /* vertex program validation must be done before this */
+ const struct st_vertex_program *vp = st->vp;
+ /* _NEW_PROGRAM, ST_NEW_VS_STATE */
+ const GLbitfield inputs_read = st->vp_variant->vert_attrib_mask;
+ const struct gl_vertex_array_object *vao = ctx->Array._DrawVAO;
+ const ubyte *input_to_index = vp->input_to_index;
+
struct pipe_vertex_buffer vbuffer[PIPE_MAX_ATTRIBS];
- struct pipe_vertex_element velements[PIPE_MAX_ATTRIBS] = {{0}};
+ struct pipe_vertex_element velements[PIPE_MAX_ATTRIBS];
unsigned num_vbuffers = 0;
- unsigned unref_buffers = 0;
- GLuint attr;
-
- for (attr = 0; attr < num_inputs;) {
- const unsigned mesaAttr = vp->index_to_input[attr];
- const struct gl_vertex_array *array;
- const struct gl_vertex_buffer_binding *binding;
- const struct gl_array_attributes *attrib;
- struct gl_buffer_object *bufobj;
- GLsizei stride;
- unsigned src_format;
- unsigned bufidx;
-
- array = get_client_array(arrays, mesaAttr);
- assert(array);
-
- bufidx = num_vbuffers++;
-
- binding = array->BufferBinding;
- attrib = array->VertexAttrib;
- stride = binding->Stride;
- bufobj = binding->BufferObj;
-
- if (_mesa_is_bufferobj(bufobj)) {
- /* Attribute data is in a VBO.
- * Recall that for VBOs, the gl_vertex_array->Ptr field is
- * really an offset from the start of the VBO, not a
pointer.
- */
- struct st_buffer_object *stobj = st_buffer_object(bufobj);
+ st->vertex_array_out_of_memory = FALSE;
+ st->draw_needs_minmax_index = false;
+
+ /* _NEW_PROGRAM */
+ /* ST_NEW_VERTEX_ARRAYS alias ctx->DriverFlags.NewArray */
+ /* Process attribute array data. */
+ GLbitfield mask = inputs_read & _mesa_draw_array_bits(ctx);
+ while (mask) {
+ /* The attribute index to start pulling a binding */
+ const gl_vert_attrib i = ffs(mask) - 1;
+ const struct gl_vertex_buffer_binding *const binding
+ = _mesa_draw_buffer_binding(vao, i);
+ const unsigned bufidx = num_vbuffers++;
+
+ if (_mesa_is_bufferobj(binding->BufferObj)) {
+ struct st_buffer_object *stobj = st_buffer_object(binding-
Post by M***@gmx.net
BufferObj);
if (!stobj || !stobj->buffer) {
st->vertex_array_out_of_memory = true;
return; /* out-of-memory error probably */
}
+ /* Set the binding */
vbuffer[bufidx].buffer.resource = stobj->buffer;
vbuffer[bufidx].is_user_buffer = false;
- vbuffer[bufidx].buffer_offset =
- binding->Offset + attrib->RelativeOffset;
+ vbuffer[bufidx].buffer_offset =
_mesa_draw_binding_offset(binding);
+ } else {
+ /* Set the binding */
+ const void *ptr = (const void
*)_mesa_draw_binding_offset(binding);
+ vbuffer[bufidx].buffer.user = ptr;
+ vbuffer[bufidx].is_user_buffer = true;
+ vbuffer[bufidx].buffer_offset = 0;
+
+ if (!binding->InstanceDivisor)
+ st->draw_needs_minmax_index = true;
}
- else {
- if (stride == 0) {
- unsigned size = attrib->_ElementSize;
- /* This is optimal for GPU cache line usage if the upload size
- * is <= cache line size.
- */
- unsigned alignment = util_next_power_of_two(size);
-
- assert(attrib->Ptr);
- vbuffer[bufidx].buffer.user = attrib->Ptr;
- (void*)ctx-
Post by M***@gmx.net
Current.Attrib[mesaAttr];
-
- vbuffer[bufidx].is_user_buffer = false;
- vbuffer[bufidx].buffer.resource = NULL;
-
- /* Use const_uploader for zero-stride vertex attributes, because
- * it may use a better memory placement than
stream_uploader.
- * The reason is that zero-stride attributes can be fetched many
- * times (thousands of times), so a better placement is going to
- * perform better.
- *
- * Upload the maximum possible size, which is 4x
GLdouble = 32.
- */
- u_upload_data(st->can_bind_const_buffer_as_vertex ?
- st->pipe->stream_uploader,
- 0, size, alignment, ptr,
- &vbuffer[bufidx].buffer_offset,
- &vbuffer[bufidx].buffer.resource);
- unref_buffers |= 1u << bufidx;
- } else {
- assert(attrib->Ptr);
- vbuffer[bufidx].buffer.user = attrib->Ptr;
- vbuffer[bufidx].is_user_buffer = true;
- vbuffer[bufidx].buffer_offset = 0;
-
- if (!binding->InstanceDivisor)
- st->draw_needs_minmax_index = true;
- }
+ vbuffer[bufidx].stride = binding->Stride; /* in bytes */
+
+ const GLbitfield boundmask =
_mesa_draw_bound_attrib_bits(binding);
+ GLbitfield attrmask = mask & boundmask;
+ /* Mark the those attributes as processed */
+ mask &= ~boundmask;
+ /* We can assume that we have array for the binding */
+ assert(attrmask);
+ /* Walk attributes belonging to the binding */
+ while (attrmask) {
+ const gl_vert_attrib attr = u_bit_scan(&attrmask);
+ const struct gl_array_attributes *const attrib
+ = _mesa_draw_array_attrib(vao, attr);
+ const GLuint off =
_mesa_draw_attributes_relative_offset(attrib);
+ init_velement_lowered(vp, velements, attrib, off,
+ binding->InstanceDivisor, bufidx,
+ input_to_index[attr]);
}
+ }
- /* common-case setup */
- vbuffer[bufidx].stride = stride; /* in bytes */
+ const unsigned first_current_vbuffer = num_vbuffers;
+ /* _NEW_PROGRAM | _NEW_CURRENT_ATTRIB */
+ /* Process values that should have better been uniforms in the application */
+ GLbitfield curmask = inputs_read & _mesa_draw_current_bits(ctx);
+ if (curmask) {
+ /* For each attribute, upload the maximum possible size. */
+ GLubyte data[VERT_ATTRIB_MAX*sizeof(GLdouble)*4];
+ GLubyte *cursor = data;
+ const unsigned bufidx = num_vbuffers++;
+ unsigned max_alignment = 1;
+
+ while (curmask) {
+ const gl_vert_attrib attr = u_bit_scan(&curmask);
+ const struct gl_array_attributes *const attrib
+ = _mesa_draw_current_attrib(ctx, attr);
+ const unsigned size = attrib->_ElementSize;
+ const unsigned alignment = util_next_power_of_two(size);
+ max_alignment = MAX2(max_alignment, alignment);
+ memcpy(cursor, attrib->Ptr, size);
+ if (alignment != size)
+ memset(cursor + size, 0, alignment - size);
+
+ init_velement_lowered(vp, velements, attrib, cursor - data, 0,
+ bufidx, input_to_index[attr]);
+
+ cursor += alignment;
+ }
- src_format = st_pipe_vertex_format(attrib);
+ vbuffer[bufidx].is_user_buffer = false;
+ vbuffer[bufidx].buffer.resource = NULL;
+ /* vbuffer[bufidx].buffer_offset is set below */
+ vbuffer[bufidx].stride = 0;
- init_velement_lowered(vp, velements, 0, src_format,
- binding->InstanceDivisor, bufidx,
- attrib->Size, attrib->Doubles, &attr);
+ /* Use const_uploader for zero-stride vertex attributes, because
+ * it may use a better memory placement than stream_uploader.
+ * The reason is that zero-stride attributes can be fetched many
+ * times (thousands of times), so a better placement is going to
+ * perform better.
+ */
+ u_upload_data(st->can_bind_const_buffer_as_vertex ?
+ st->pipe->stream_uploader,
+ 0, cursor - data, max_alignment, data,
+ &vbuffer[bufidx].buffer_offset,
+ &vbuffer[bufidx].buffer.resource);
}
if (!ctx->Const.AllowMappedBuffersDuringExecution) {
u_upload_unmap(st->pipe->stream_uploader);
}
+ const unsigned num_inputs = st->vp_variant->num_inputs;
set_vertex_attribs(st, vbuffer, num_vbuffers, velements,
num_inputs);
/* Unreference uploaded zero-stride vertex buffers. */
- while (unref_buffers) {
- unsigned i = u_bit_scan(&unref_buffers);
+ for (unsigned i = first_current_vbuffer; i < num_vbuffers; ++i) {
pipe_resource_reference(&vbuffer[i].buffer.resource, NULL);
}
}
-
-void st_update_array(struct st_context *st)
-{
- struct gl_context *ctx = st->ctx;
- const struct gl_vertex_array *arrays = ctx->Array._DrawArrays;
- const struct st_vertex_program *vp;
- unsigned num_inputs;
-
- st->vertex_array_out_of_memory = FALSE;
- st->draw_needs_minmax_index = false;
-
- /* No drawing has been done yet, so do nothing. */
- if (!arrays)
- return;
-
- /* vertex program validation must be done before this */
- vp = st->vp;
- num_inputs = st->vp_variant->num_inputs;
-
- if (is_interleaved_arrays(vp, arrays, num_inputs))
- setup_interleaved_attribs(st, vp, arrays, num_inputs);
- else
- setup_non_interleaved_attribs(st, vp, arrays, num_inputs);
-}
Mathias Fröhlich
2018-11-26 20:33:00 UTC
Permalink
Hi,
Post by Erik Faye-Lund
I know this is *very* late notice, but this commit broke Super Tux Kart
on VirGL. Both the player-models as as well as the level data renders
with gibberish vertex positions since this commit.
The fix that Rob Clark did on top does not fix the problem (and
shouldn't have; VirGL doesn't use NIR).
Do you have any idea how I can reproduce that issue with the least
effort?

Can you tell me what the virgl driver sees at
Const.MaxVertexAttribRelativeOffset and
Const.MaxVertexAttribStride?

best
Mathias
Erik Faye-Lund
2018-11-26 21:10:45 UTC
Permalink
Post by Mathias Fröhlich
Hi,
Post by Erik Faye-Lund
I know this is *very* late notice, but this commit broke Super Tux Kart
on VirGL. Both the player-models as as well as the level data renders
with gibberish vertex positions since this commit.
The fix that Rob Clark did on top does not fix the problem (and
shouldn't have; VirGL doesn't use NIR).
Do you have any idea how I can reproduce that issue with the least
effort?
Can you tell me what the virgl driver sees at
Const.MaxVertexAttribRelativeOffset and
Const.MaxVertexAttribStride?
best
Mathias
This depends a bit on the version in question. Nothing seems to ever
set Const.MaxVertexAttribRelativeOffset away from its initial value of
2047... Directly on top of your commit, Const.MaxVertexAttribStride is
reported as zero. On master, it's reported as whatever the host
supports, which is 2048 in my case.
Erik Faye-Lund
2018-11-26 21:16:35 UTC
Permalink
(sorry, forgot to answer one of the questions)
Post by Mathias Fröhlich
Hi,
Post by Erik Faye-Lund
I know this is *very* late notice, but this commit broke Super Tux Kart
on VirGL. Both the player-models as as well as the level data renders
with gibberish vertex positions since this commit.
The fix that Rob Clark did on top does not fix the problem (and
shouldn't have; VirGL doesn't use NIR).
Do you have any idea how I can reproduce that issue with the least
effort?
Sadly, no. I run a qemu VM where I run super tux cart. It's a rather
convoluted setup, I'm afraid. If you're interested in that Robert Foss
has written an article about how to set something like this up here:
https://memcpy.io/virtualizing-gpu-access.html

...But I totally understand if this is asking a bit too much. I can
help out with any information you need...
Mathias Fröhlich
2018-11-27 06:11:06 UTC
Permalink
Hi Erik,
Post by Erik Faye-Lund
Post by Mathias Fröhlich
Post by Erik Faye-Lund
I know this is *very* late notice, but this commit broke Super Tux Kart
on VirGL. Both the player-models as as well as the level data renders
with gibberish vertex positions since this commit.
The fix that Rob Clark did on top does not fix the problem (and
shouldn't have; VirGL doesn't use NIR).
Do you have any idea how I can reproduce that issue with the least
effort?
Sadly, no. I run a qemu VM where I run super tux cart. It's a rather
convoluted setup, I'm afraid. If you're interested in that Robert Foss
https://memcpy.io/virtualizing-gpu-access.html
...But I totally understand if this is asking a bit too much. I can
help out with any information you need...
Thanks!
That, just means that looking into has to wait at least until the weekend.
Probably even later.

And thanks for looking up the constants.
The effective binding computation depends on these and may change
the set up combined buffer objects. So these are interesting to know.

I have been putting a lot of internal verification into the code paths
especially _mesa_update_vao_derived_arrays contains a larger
#ifndef NDEBUG part that may tell us if there is something unexpected.

I assume you did run also with asserts enabled in the build?

I can observe some flicker in supertuxcart on i965. The nvidia blob seems
not to flicker here. Also when running through valgrind I don't get that flicker
on i965. Is that flashing - initially looked like a lighting effect of the game to
me - what you observe too?

Also what are the game options? Are shaders enabled in some way?
What does change if you change the game settings?
May be that gives us some hints?

best
Mathias
Erik Faye-Lund
2018-11-27 09:17:07 UTC
Permalink
Post by Mathias Fröhlich
Hi Erik,
Post by Erik Faye-Lund
Post by Mathias Fröhlich
Post by Erik Faye-Lund
I know this is *very* late notice, but this commit broke Super
Tux
Kart
on VirGL. Both the player-models as as well as the level data renders
with gibberish vertex positions since this commit.
The fix that Rob Clark did on top does not fix the problem (and
shouldn't have; VirGL doesn't use NIR).
Do you have any idea how I can reproduce that issue with the least
effort?
Sadly, no. I run a qemu VM where I run super tux cart. It's a rather
convoluted setup, I'm afraid. If you're interested in that Robert Foss
https://memcpy.io/virtualizing-gpu-access.html
...But I totally understand if this is asking a bit too much. I can
help out with any information you need...
Thanks!
That, just means that looking into has to wait at least until the weekend.
Probably even later.
And thanks for looking up the constants.
The effective binding computation depends on these and may change
the set up combined buffer objects. So these are interesting to know.
I have been putting a lot of internal verification into the code paths
especially _mesa_update_vao_derived_arrays contains a larger
#ifndef NDEBUG part that may tell us if there is something
unexpected.
I assume you did run also with asserts enabled in the build?
Yes, I ran with asserts on, and none triggered.
Post by Mathias Fröhlich
I can observe some flicker in supertuxcart on i965. The nvidia blob seems
not to flicker here. Also when running through valgrind I don't get that flicker
on i965. Is that flashing - initially looked like a lighting effect of the game to
me - what you observe too?
No, the models are completely garbled. You can find some example
screenshots here:

https://gitlab.freedesktop.org/virgl/virglrenderer/issues/59
Post by Mathias Fröhlich
Also what are the game options? Are shaders enabled in some way?
I'm playing with the default settings. I'm not sure what you mean with
"are shaders enabled"; VirGL is a gallium-driver, everything uses
shaders.
Post by Mathias Fröhlich
What does change if you change the game settings?
May be that gives us some hints?
I've tried setting both the lowest and highest graphics settings in the
game, and I get the same problem. This seems to happen regardless of
graphics settings.
Mathias Fröhlich
2018-11-30 06:06:16 UTC
Permalink
Good Morning,
Post by Erik Faye-Lund
Post by Mathias Fröhlich
Hi Erik,
Post by Erik Faye-Lund
Post by Mathias Fröhlich
Post by Erik Faye-Lund
I know this is *very* late notice, but this commit broke Super
Tux
Kart
on VirGL. Both the player-models as as well as the level data renders
with gibberish vertex positions since this commit.
The fix that Rob Clark did on top does not fix the problem (and
shouldn't have; VirGL doesn't use NIR).
Do you have any idea how I can reproduce that issue with the least
effort?
Sadly, no. I run a qemu VM where I run super tux cart. It's a rather
convoluted setup, I'm afraid. If you're interested in that Robert Foss
https://memcpy.io/virtualizing-gpu-access.html
...But I totally understand if this is asking a bit too much. I can
help out with any information you need...
Thanks!
That, just means that looking into has to wait at least until the weekend.
Probably even later.
And thanks for looking up the constants.
The effective binding computation depends on these and may change
the set up combined buffer objects. So these are interesting to know.
I have been putting a lot of internal verification into the code paths
especially _mesa_update_vao_derived_arrays contains a larger
#ifndef NDEBUG part that may tell us if there is something
unexpected.
I assume you did run also with asserts enabled in the build?
Yes, I ran with asserts on, and none triggered.
Ok, there should not be a problem.
At least nothing that I anticipated goes wrong.
Post by Erik Faye-Lund
Post by Mathias Fröhlich
I can observe some flicker in supertuxcart on i965. The nvidia blob seems
not to flicker here. Also when running through valgrind I don't get that flicker
on i965. Is that flashing - initially looked like a lighting effect of the game to
me - what you observe too?
No, the models are completely garbled. You can find some example
https://gitlab.freedesktop.org/virgl/virglrenderer/issues/59
Post by Mathias Fröhlich
Also what are the game options? Are shaders enabled in some way?
I'm playing with the default settings. I'm not sure what you mean with
"are shaders enabled"; VirGL is a gallium-driver, everything uses
shaders.
Ok, that is about what I meant. Sometimes that goes back to a pre shader
render engine behind the scenens.
Yes, gallium is always shaders, but depending on the higher level render techinque used
you may need different array setups with more or less vertex attributes. Thing
of tangent space that you do not need for a simple renderer. So, finally that may
influence the setup and trigger things. But what you report sounds pretty
fundamental.

The good side is that I set up at least what was easy to set up here, that is
a fedora 29 using virgl on a fedora 28 host using an Intel skylake type GPU.

Linux 4.19.4-300.fc29.x86_64 #1 SMP Fri Nov 23 13:03:11 UTC 2018 x86_64
[info ] IrrDriver: OpenGL version: 3.3
[info ] IrrDriver: OpenGL vendor: Red Hat
[info ] IrrDriver: OpenGL renderer: virgl
[info ] IrrDriver: OpenGL version string: 3.3 (Core Profile) Mesa 18.2.4

But, that just works. supertuxkart runs without any vertex corruption problems
on that combination. The installed default rpm of mesa is not patched in any way
that makes me suspicious regarding our problem. And git log mesa-18.2.4 tells
me that the patch you mention is included in 18.2.4.

Means either I do not yet reproduce the problem correctly on the application side.
Well, seems like already the initial screen to configure the track and that
should show problems, which run already fine on my combination.

Or we have a side effect somewhere in the complete chain down to the
host system, which is triggered by that patch.

So, looking at that game directly on the host system, that one has flaws
like the mentioned flashlight like frames that are probably missing
some geometry in one way or the other.

Means here:
game + mesa-18.0.5 + Skylake GT2 -> fail
game + mesa-18.2.4 + virgl + mesa-18.0.5 + Skylake GT2 -> works

Hmm, one question, on the mentioned setup on virgl. How does
glxgears render on that setup? Or alternatively how do other OpenGL
applications different from supertuxkart on that setup?

best

Mathias
Erik Faye-Lund
2018-12-03 11:15:17 UTC
Permalink
Post by Mathias Fröhlich
Good Morning,
Post by Erik Faye-Lund
Post by Mathias Fröhlich
Hi Erik,
Post by Erik Faye-Lund
Post by Mathias Fröhlich
Post by Erik Faye-Lund
I know this is *very* late notice, but this commit broke Super
Tux
Kart
on VirGL. Both the player-models as as well as the level
data
renders
with gibberish vertex positions since this commit.
The fix that Rob Clark did on top does not fix the problem (and
shouldn't have; VirGL doesn't use NIR).
Do you have any idea how I can reproduce that issue with the least
effort?
Sadly, no. I run a qemu VM where I run super tux cart. It's a rather
convoluted setup, I'm afraid. If you're interested in that
Robert
Foss
https://memcpy.io/virtualizing-gpu-access.html
...But I totally understand if this is asking a bit too much. I can
help out with any information you need...
Thanks!
That, just means that looking into has to wait at least until the weekend.
Probably even later.
And thanks for looking up the constants.
The effective binding computation depends on these and may change
the set up combined buffer objects. So these are interesting to know.
I have been putting a lot of internal verification into the code paths
especially _mesa_update_vao_derived_arrays contains a larger
#ifndef NDEBUG part that may tell us if there is something
unexpected.
I assume you did run also with asserts enabled in the build?
Yes, I ran with asserts on, and none triggered.
Ok, there should not be a problem.
At least nothing that I anticipated goes wrong.
Post by Erik Faye-Lund
Post by Mathias Fröhlich
I can observe some flicker in supertuxcart on i965. The nvidia
blob
seems
not to flicker here. Also when running through valgrind I don't
get
that flicker
on i965. Is that flashing - initially looked like a lighting
effect
of the game to
me - what you observe too?
No, the models are completely garbled. You can find some example
https://gitlab.freedesktop.org/virgl/virglrenderer/issues/59
Post by Mathias Fröhlich
Also what are the game options? Are shaders enabled in some way?
I'm playing with the default settings. I'm not sure what you mean with
"are shaders enabled"; VirGL is a gallium-driver, everything uses
shaders.
Ok, that is about what I meant. Sometimes that goes back to a pre shader
render engine behind the scenens.
Yes, gallium is always shaders, but depending on the higher level render techinque used
you may need different array setups with more or less vertex
attributes. Thing
of tangent space that you do not need for a simple renderer. So, finally that may
influence the setup and trigger things. But what you report sounds pretty
fundamental.
Yeah. An important thing to note is that virgl is pretty widely tested
by now, and we don't see this pop up in other places... And that sounds
a bit strange to me.
Post by Mathias Fröhlich
The good side is that I set up at least what was easy to set up here, that is
a fedora 29 using virgl on a fedora 28 host using an Intel skylake type GPU.
Linux 4.19.4-300.fc29.x86_64 #1 SMP Fri Nov 23 13:03:11 UTC 2018 x86_64
[info ] IrrDriver: OpenGL version: 3.3
[info ] IrrDriver: OpenGL vendor: Red Hat
[info ] IrrDriver: OpenGL renderer: virgl
[info ] IrrDriver: OpenGL version string: 3.3 (Core Profile) Mesa 18.2.4
But, that just works. supertuxkart runs without any vertex corruption problems
on that combination. The installed default rpm of mesa is not patched in any way
that makes me suspicious regarding our problem. And git log mesa-
18.2.4 tells
me that the patch you mention is included in 18.2.4.
Means either I do not yet reproduce the problem correctly on the application side.
Well, seems like already the initial screen to configure the track and that
should show problems, which run already fine on my combination.
Or we have a side effect somewhere in the complete chain down to the
host system, which is triggered by that patch.
Right. This is super-strange to me; we (Collabora) have multiple people
reproducing it independently (CC'ed Gert). What version of
virglrenderer are you using? Perhaps we have some misbehavior in newer
versions of it that was just masked without this patch?
Post by Mathias Fröhlich
So, looking at that game directly on the host system, that one has flaws
like the mentioned flashlight like frames that are probably missing
some geometry in one way or the other.
game + mesa-18.0.5 + Skylake GT2 -> fail
game + mesa-18.2.4 + virgl + mesa-18.0.5 + Skylake GT2 -> works
Hmm, one question, on the mentioned setup on virgl. How does
glxgears render on that setup? Or alternatively how do other OpenGL
applications different from supertuxkart on that setup?
glxgears renders just fine. We'er also passing pretty much all of the
OpenGL 4.3 CTS and most of piglit. Generally speaking, this doesn't
trigger.

I just got a notice that Serious Sam 3 has a similar problem (I haven't
tested this myself)... So perhaps there's some pattern that can be
found?
Post by Mathias Fröhlich
best
Mathias
Mathias Fröhlich
2018-12-04 06:54:17 UTC
Permalink
Hey,
Post by Erik Faye-Lund
Yeah. An important thing to note is that virgl is pretty widely tested
by now, and we don't see this pop up in other places... And that sounds
a bit strange to me.
Good to know, I don't actually know how wide virgl is already in use.
I was surprised to find a direct knob already in fedoras version of libvirt.
And I am now disappointed that after upgrading to the newest fedora on the
bottom side it just crashes somewhere. I thought may be I can reproduce that
with something newer that I need anyhow in the not so distant future ...

So, I am currently again without a basic virgl system.
Post by Erik Faye-Lund
Post by Mathias Fröhlich
The good side is that I set up at least what was easy to set up here, that is
a fedora 29 using virgl on a fedora 28 host using an Intel skylake type GPU.
Linux 4.19.4-300.fc29.x86_64 #1 SMP Fri Nov 23 13:03:11 UTC 2018 x86_64
[info ] IrrDriver: OpenGL version: 3.3
[info ] IrrDriver: OpenGL vendor: Red Hat
[info ] IrrDriver: OpenGL renderer: virgl
[info ] IrrDriver: OpenGL version string: 3.3 (Core Profile) Mesa 18.2.4
But, that just works. supertuxkart runs without any vertex corruption problems
on that combination. The installed default rpm of mesa is not patched in any way
that makes me suspicious regarding our problem. And git log mesa-
18.2.4 tells
me that the patch you mention is included in 18.2.4.
Means either I do not yet reproduce the problem correctly on the application side.
Well, seems like already the initial screen to configure the track and that
should show problems, which run already fine on my combination.
Or we have a side effect somewhere in the complete chain down to the
host system, which is triggered by that patch.
Right. This is super-strange to me; we (Collabora) have multiple people
reproducing it independently (CC'ed Gert). What version of
virglrenderer are you using? Perhaps we have some misbehavior in newer
versions of it that was just masked without this patch?
Sounds to me like that, or even worse something with the supertuxkart.
I have not yet understood what they are doing in detail with the VAO's.
But I was slightly looking into the direction of mmapping the buffer objects
and not flushing them correctly. That could potentially also lead to
such failures. Especially since some people observe and some not.
Not finally finished with investigating, but up to now I did not see something
wrong there.
Post by Erik Faye-Lund
Post by Mathias Fröhlich
Hmm, one question, on the mentioned setup on virgl. How does
glxgears render on that setup? Or alternatively how do other OpenGL
applications different from supertuxkart on that setup?
glxgears renders just fine. We'er also passing pretty much all of the
OpenGL 4.3 CTS and most of piglit. Generally speaking, this doesn't
trigger.
That was my expectation as well as it took so long to find something.
But still, not impossible that something is wrong.
Post by Erik Faye-Lund
I just got a notice that Serious Sam 3 has a similar problem (I haven't
tested this myself)... So perhaps there's some pattern that can be
found?
One observation that I saw with supertuxkart.
They really have a VAO that ends up with two effective bindings used
by 3 and 2 vertex attributes. That is the old gallium array translation code
did produce 5 vertex_element struct entries and each of that has a
vertex_buffer struct assigned. The minimal pipe_vertex_buffer translation
only happened in the old code if it could be reduced to a single vertex buffer entry.
Now the code produces that 3 pipe_vertex_element referencing 1
pipe_vertex_buffer and 2 pipe_vertex_element referencing an other
pipe_vertex_buffer. Which should be more optimal now but is it possible
that virgl somewhere down the road only handles the n elements to one buffer
and the n element to n buffer case. So the question is is there a bug in the n elements
to 1 < m < n buffer case?
Do you know what I mean with the effective binding?

May be you can observe the same type of VAO in Serious Sam 3?

Well I have confidence that this triggers something. At least debugging into
the upper parts in mesa then into i965 showed nothing unexpected.

best

Mathias
Erik Faye-Lund
2018-12-04 09:35:58 UTC
Permalink
Post by Mathias Fröhlich
Hey,
Post by Erik Faye-Lund
Yeah. An important thing to note is that virgl is pretty widely tested
by now, and we don't see this pop up in other places... And that sounds
a bit strange to me.
Good to know, I don't actually know how wide virgl is already in use.
I was surprised to find a direct knob already in fedoras version of libvirt.
And I am now disappointed that after upgrading to the newest fedora on the
bottom side it just crashes somewhere. I thought may be I can
reproduce that
with something newer that I need anyhow in the not so distant future ...
So, I am currently again without a basic virgl system.
Post by Erik Faye-Lund
Post by Mathias Fröhlich
The good side is that I set up at least what was easy to set up
here,
that is
a fedora 29 using virgl on a fedora 28 host using an Intel
skylake
type GPU.
Linux 4.19.4-300.fc29.x86_64 #1 SMP Fri Nov 23 13:03:11 UTC 2018 x86_64
[info ] IrrDriver: OpenGL version: 3.3
[info ] IrrDriver: OpenGL vendor: Red Hat
[info ] IrrDriver: OpenGL renderer: virgl
[info ] IrrDriver: OpenGL version string: 3.3 (Core Profile)
Mesa
18.2.4
But, that just works. supertuxkart runs without any vertex
corruption
problems
on that combination. The installed default rpm of mesa is not
patched
in any way
that makes me suspicious regarding our problem. And git log mesa-
18.2.4 tells
me that the patch you mention is included in 18.2.4.
Means either I do not yet reproduce the problem correctly on the application side.
Well, seems like already the initial screen to configure the
track
and that
should show problems, which run already fine on my combination.
Or we have a side effect somewhere in the complete chain down to the
host system, which is triggered by that patch.
Right. This is super-strange to me; we (Collabora) have multiple people
reproducing it independently (CC'ed Gert). What version of
virglrenderer are you using? Perhaps we have some misbehavior in newer
versions of it that was just masked without this patch?
Sounds to me like that, or even worse something with the
supertuxkart.
I have not yet understood what they are doing in detail with the VAO's.
But I was slightly looking into the direction of mmapping the buffer objects
and not flushing them correctly. That could potentially also lead to
such failures. Especially since some people observe and some not.
Not finally finished with investigating, but up to now I did not see something
wrong there.
One more breadcrumb:

Gert informed me (through other channels) that he had isolated this
issue to only trigger for indirect draws.

That might clear up a bit why this seems to happen in so few
applications; there's probably some combination of input layouts that
together with indirect draws become a very rare combination.
Post by Mathias Fröhlich
Post by Erik Faye-Lund
Post by Mathias Fröhlich
Hmm, one question, on the mentioned setup on virgl. How does
glxgears render on that setup? Or alternatively how do other OpenGL
applications different from supertuxkart on that setup?
glxgears renders just fine. We'er also passing pretty much all of the
OpenGL 4.3 CTS and most of piglit. Generally speaking, this doesn't
trigger.
That was my expectation as well as it took so long to find something.
But still, not impossible that something is wrong.
Post by Erik Faye-Lund
I just got a notice that Serious Sam 3 has a similar problem (I haven't
tested this myself)... So perhaps there's some pattern that can be
found?
One observation that I saw with supertuxkart.
They really have a VAO that ends up with two effective bindings used
by 3 and 2 vertex attributes. That is the old gallium array
translation code
did produce 5 vertex_element struct entries and each of that has a
vertex_buffer struct assigned. The minimal pipe_vertex_buffer
translation
only happened in the old code if it could be reduced to a single vertex buffer entry.
Now the code produces that 3 pipe_vertex_element referencing 1
pipe_vertex_buffer and 2 pipe_vertex_element referencing an other
pipe_vertex_buffer. Which should be more optimal now but is it
possible
that virgl somewhere down the road only handles the n elements to one buffer
and the n element to n buffer case. So the question is is there a bug in the n elements
to 1 < m < n buffer case?
Do you know what I mean with the effective binding?
I'm not quite sure I follow here. What's n and m in this case (I seem
to see three definitions of n, where two are similar, and none of m)?

But looking through both virgl and virglrenderer, I can't spot anything
obviously wrong with the way inputs are being set up...
Post by Mathias Fröhlich
May be you can observe the same type of VAO in Serious Sam 3?
I don't have Serious Sam 3 running myself. Gert?
Post by Mathias Fröhlich
Well I have confidence that this triggers something. At least
debugging into
the upper parts in mesa then into i965 showed nothing unexpected.
best
Mathias
_______________________________________________
mesa-dev mailing list
https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Gert Wollny
2018-12-04 12:33:29 UTC
Permalink
Post by Erik Faye-Lund
But looking through both virgl and virglrenderer, I can't spot
anything obviously wrong with the way inputs are being set up...
Post by Mathias Fröhlich
May be you can observe the same type of VAO in Serious Sam 3?
I don't have Serious Sam 3 running myself. Gert?
I have not yet looked in detail, but disabling ARB_draw_indirect
doesn't help neither with SS3 nor with Shadow Warrior - at least the
latter renders fine on the r600 host, the SS3 doesn't properly start
currently, but the Talos Principle runs fine on the host (takes forever
to load in the guest so I didn't check with virgl) and this is more or
less the same engine.

Best,
Gert
Mathias Fröhlich
2018-12-10 06:56:40 UTC
Permalink
Good Morning,
Post by Gert Wollny
Post by Erik Faye-Lund
But looking through both virgl and virglrenderer, I can't spot
anything obviously wrong with the way inputs are being set up...
Post by Mathias Fröhlich
May be you can observe the same type of VAO in Serious Sam 3?
I don't have Serious Sam 3 running myself. Gert?
I have not yet looked in detail, but disabling ARB_draw_indirect
doesn't help neither with SS3 nor with Shadow Warrior - at least the
latter renders fine on the r600 host, the SS3 doesn't properly start
currently, but the Talos Principle runs fine on the host (takes forever
to load in the guest so I didn't check with virgl) and this is more or
less the same engine.
So, that means it's not just happening with indirect draws.
Hmm ...

best
Mathias

Mathias Fröhlich
2018-12-10 17:23:12 UTC
Permalink
Hi Erik,

Not sure if this is our problem as I think that I only saw simple
bindings with a zero instance divisor while debugging supertux kart.

But at least I think that this is a problem in virglrenderer. The
glVertexBindingDivisor is per binding and not per vertex attribute
in OpenGL.
... you probably want to solve that differently, but for now this should
quick band aid to pinpoint the problem that we observe.

Does the attached patch to virglrenderer fix our problem?

best

Mathias
Mathias Fröhlich
2018-12-10 06:56:38 UTC
Permalink
Good Morning,

Again sorry, but since I only work here in the spare time, I did not find
enough to respond earlier.
Post by Erik Faye-Lund
Post by Mathias Fröhlich
Sounds to me like that, or even worse something with the
supertuxkart.
I have not yet understood what they are doing in detail with the VAO's.
But I was slightly looking into the direction of mmapping the buffer objects
and not flushing them correctly. That could potentially also lead to
such failures. Especially since some people observe and some not.
Not finally finished with investigating, but up to now I did not see something
wrong there.
Gert informed me (through other channels) that he had isolated this
issue to only trigger for indirect draws.
That might clear up a bit why this seems to happen in so few
applications; there's probably some combination of input layouts that
together with indirect draws become a very rare combination.
That does not directly ring a bell for me, but At least that narrows down
the issue somehow.
Gert, do you know, indirect draws with the
struct Draw{Arrays,Elements}IndirectCommand
in user space memory or in a buffer object?
Post by Erik Faye-Lund
Post by Mathias Fröhlich
Post by Erik Faye-Lund
Post by Mathias Fröhlich
Hmm, one question, on the mentioned setup on virgl. How does
glxgears render on that setup? Or alternatively how do other OpenGL
applications different from supertuxkart on that setup?
glxgears renders just fine. We'er also passing pretty much all of the
OpenGL 4.3 CTS and most of piglit. Generally speaking, this doesn't
trigger.
That was my expectation as well as it took so long to find something.
But still, not impossible that something is wrong.
Post by Erik Faye-Lund
I just got a notice that Serious Sam 3 has a similar problem (I haven't
tested this myself)... So perhaps there's some pattern that can be
found?
One observation that I saw with supertuxkart.
They really have a VAO that ends up with two effective bindings used
by 3 and 2 vertex attributes. That is the old gallium array
translation code
did produce 5 vertex_element struct entries and each of that has a
vertex_buffer struct assigned. The minimal pipe_vertex_buffer translation
only happened in the old code if it could be reduced to a single vertex buffer entry.
Now the code produces that 3 pipe_vertex_element referencing 1
pipe_vertex_buffer and 2 pipe_vertex_element referencing an other
pipe_vertex_buffer. Which should be more optimal now but is it possible
that virgl somewhere down the road only handles the n elements to one buffer
and the n element to n buffer case. So the question is is there a bug in the n elements
to 1 < m < n buffer case?
Do you know what I mean with the effective binding?
I'm not quite sure I follow here. What's n and m in this case (I seem
to see three definitions of n, where two are similar, and none of m)?
But looking through both virgl and virglrenderer, I can't spot anything
obviously wrong with the way inputs are being set up...
Ok, thanks for looking. I meant with M:N a layout something like

pipe_vertex_elements:
{ vertex_buffer_index = 0, src_offset = 0, ...},
{ vertex_buffer_index = 0, src_offset = 12, ...},
{ vertex_buffer_index = 1, src_offset = 0, ...},
{ vertex_buffer_index = 1, src_offset = 8, ...}

pipe_vertex_buffer:
{ buffer_offset = 0, buffer.resource = .... },
{ buffer_offset = 0, buffer.resource = .... }

Finally this is a 4:2 mapping.

The previous code did only produce either a N:1 mapping like

pipe_vertex_elements:
{ vertex_buffer_index = 0, src_offset = 0, ...},
{ vertex_buffer_index = 0, src_offset = 12, ...},
{ vertex_buffer_index = 0, src_offset = 24, ...},
{ vertex_buffer_index = 0, src_offset = 36, ...}

pipe_vertex_buffer:
{ buffer_offset = 0, buffer.resource = .... }

this one, if there was a single buffer object used that allows this kind of layout.
So there are *all* pipe_vertex_elements refering to a single pipe_vertex_buffer.

Or a N:N mapping like

pipe_vertex_elements:
{ vertex_buffer_index = 0, src_offset = 0, ...},
{ vertex_buffer_index = 1, src_offset = 0, ...},
{ vertex_buffer_index = 2, src_offset = 0, ...},
{ vertex_buffer_index = 3, src_offset = 0, ...}

pipe_vertex_buffer:
{ buffer_offset = 0, buffer.resource = .... },
{ buffer_offset = 12, buffer.resource = .... },
{ buffer_offset = 36, buffer.resource = .... },
{ buffer_offset = 24, buffer.resource = .... }

where you have one pipe_vertex_buffer per pipe_vertex_element.

So, If the backing driver somehow 'knew' that we could only get N:1
or N:N and not something like the 4:2 example above, we could easily
fail with the change you found.

best

Mathias
Marek Olšák
2018-11-26 20:42:16 UTC
Permalink
I have this issue on radeonsi as well, but the bad commit is:

commit f1998e15ffccf260552bf559abe1a733a8ce990e
Author: Erik Faye-Lund <erik.faye-***@collabora.com>
Date: Wed Nov 21 20:09:46 2018 +0100

mesa/main: remove ARB suffix from glGetnTexImage

This function has been core since OpenGL 4.3, so naming the
implementation and reporting erros using an ARB-suffix can be
confusing.

Signed-off-by: Erik Faye-Lund <erik.faye-***@collabora.com>
Reviewed-by: Juan A. Suarez <***@igalia.com>

Reverting the commit fixes supertuxkart.

Marek
Post by Erik Faye-Lund
Post by M***@gmx.net
Finally make use of the binding information in the VAO when
setting up arrays for draw.
v2: Emit less relocations also for interleaved userspace arrays.
I know this is *very* late notice, but this commit broke Super Tux Kart
on VirGL. Both the player-models as as well as the level data renders
with gibberish vertex positions since this commit.
The fix that Rob Clark did on top does not fix the problem (and
shouldn't have; VirGL doesn't use NIR).
Post by M***@gmx.net
---
src/mesa/state_tracker/st_atom_array.c | 432 ++++++++---------------
----------
1 file changed, 107 insertions(+), 325 deletions(-)
diff --git a/src/mesa/state_tracker/st_atom_array.c
b/src/mesa/state_tracker/st_atom_array.c
index 2fd67e8d84..6b39b4186d 100644
--- a/src/mesa/state_tracker/st_atom_array.c
+++ b/src/mesa/state_tracker/st_atom_array.c
@@ -48,6 +48,7 @@
#include "main/bufferobj.h"
#include "main/glformats.h"
#include "main/varray.h"
+#include "main/arrayobj.h"
/* vertex_formats[gltype - GL_BYTE][integer*2 + normalized][size - 1] */
static const uint16_t vertex_formats[][4][4] = {
@@ -306,79 +307,6 @@ st_pipe_vertex_format(const struct
gl_array_attributes *attrib)
return vertex_formats[type - GL_BYTE][index][size-1];
}
-static const struct gl_vertex_array *
-get_client_array(const struct gl_vertex_array *arrays,
- unsigned mesaAttr)
-{
- /* st_program uses 0xffffffff to denote a double placeholder attribute */
- if (mesaAttr == ST_DOUBLE_ATTRIB_PLACEHOLDER)
- return NULL;
- return &arrays[mesaAttr];
-}
-
-/**
- * Examine the active arrays to determine if we have interleaved
- * vertex arrays all living in one VBO, or all living in user space.
- */
-static GLboolean
-is_interleaved_arrays(const struct st_vertex_program *vp,
- const struct gl_vertex_array *arrays,
- unsigned num_inputs)
-{
- GLuint attr;
- const struct gl_buffer_object *firstBufObj = NULL;
- GLint firstStride = -1;
- const GLubyte *firstPtr = NULL;
- GLboolean userSpaceBuffer = GL_FALSE;
-
- for (attr = 0; attr < num_inputs; attr++) {
- const struct gl_vertex_array *array;
- const struct gl_vertex_buffer_binding *binding;
- const struct gl_array_attributes *attrib;
- const GLubyte *ptr;
- const struct gl_buffer_object *bufObj;
- GLsizei stride;
-
- array = get_client_array(arrays, vp->index_to_input[attr]);
- if (!array)
- continue;
-
- binding = array->BufferBinding;
- attrib = array->VertexAttrib;
- stride = binding->Stride; /* in bytes */
- ptr = _mesa_vertex_attrib_address(attrib, binding);
-
- /* To keep things simple, don't allow interleaved zero-stride attribs. */
- if (stride == 0)
- return false;
-
- bufObj = binding->BufferObj;
- if (attr == 0) {
- /* save info about the first array */
- firstStride = stride;
- firstPtr = ptr;
- firstBufObj = bufObj;
- userSpaceBuffer = !_mesa_is_bufferobj(bufObj);
- }
- else {
- /* check if other arrays interleave with the first, in same buffer */
- if (stride != firstStride)
- return GL_FALSE; /* strides don't match */
-
- if (bufObj != firstBufObj)
- return GL_FALSE; /* arrays in different VBOs */
-
- if (llabs(ptr - firstPtr) > firstStride)
- return GL_FALSE; /* arrays start too far apart */
-
- if ((!_mesa_is_bufferobj(bufObj)) != userSpaceBuffer)
- return GL_FALSE; /* mix of VBO and user-space arrays */
- }
- }
-
- return GL_TRUE;
-}
-
static void init_velement(struct pipe_vertex_element *velement,
int src_offset, int format,
int instance_divisor, int vbo_index)
@@ -392,13 +320,14 @@ static void init_velement(struct
pipe_vertex_element *velement,
static void init_velement_lowered(const struct st_vertex_program *vp,
struct pipe_vertex_element *velements,
- int src_offset, int format,
- int instance_divisor, int
vbo_index,
- int nr_components, GLboolean doubles,
- GLuint *attr_idx)
+ const struct gl_array_attributes *attrib,
+ int src_offset, int
instance_divisor,
+ int vbo_index, int idx)
{
- int idx = *attr_idx;
- if (doubles) {
+ const unsigned format = st_pipe_vertex_format(attrib);
+ const GLubyte nr_components = attrib->Size;
+
+ if (attrib->Doubles) {
int lower_format;
if (nr_components < 2)
@@ -427,15 +356,11 @@ static void init_velement_lowered(const struct
st_vertex_program *vp,
init_velement(&velements[idx], src_offset,
PIPE_FORMAT_R32G32_UINT,
instance_divisor, vbo_index);
}
-
- idx++;
}
} else {
init_velement(&velements[idx], src_offset,
format, instance_divisor, vbo_index);
- idx++;
}
- *attr_idx = idx;
}
static void
@@ -457,274 +382,131 @@ set_vertex_attribs(struct st_context *st,
cso_set_vertex_elements(cso, num_velements, velements);
}
-/**
- * Set up for drawing interleaved arrays that all live in one VBO
- * or all live in user space.
- * \param vbuffer returns vertex buffer info
- * \param velements returns vertex element info
- */
-static void
-setup_interleaved_attribs(struct st_context *st,
- const struct st_vertex_program *vp,
- const struct gl_vertex_array *arrays,
- unsigned num_inputs)
-{
- struct pipe_vertex_buffer vbuffer;
- struct pipe_vertex_element velements[PIPE_MAX_ATTRIBS] = {{0}};
- GLuint attr;
- const GLubyte *low_addr = NULL;
- GLboolean usingVBO; /* all arrays in a VBO? */
- struct gl_buffer_object *bufobj;
- GLsizei stride;
-
- /* Find the lowest address of the arrays we're drawing,
- * Init bufobj and stride.
- */
- if (num_inputs) {
- const struct gl_vertex_array *array;
- const struct gl_vertex_buffer_binding *binding;
- const struct gl_array_attributes *attrib;
-
- array = get_client_array(arrays, vp->index_to_input[0]);
- assert(array);
-
- binding = array->BufferBinding;
- attrib = array->VertexAttrib;
-
- /* Since we're doing interleaved arrays, we know there'll be at most
- * one buffer object and the stride will be the same for all arrays.
- * Grab them now.
- */
- bufobj = binding->BufferObj;
- stride = binding->Stride;
-
- low_addr = _mesa_vertex_attrib_address(attrib, binding);
-
- for (attr = 1; attr < num_inputs; attr++) {
- const GLubyte *start;
- array = get_client_array(arrays, vp->index_to_input[attr]);
- if (!array)
- continue;
- binding = array->BufferBinding;
- attrib = array->VertexAttrib;
- start = _mesa_vertex_attrib_address(attrib, binding);
- low_addr = MIN2(low_addr, start);
- }
- }
- else {
- /* not sure we'll ever have zero inputs, but play it safe */
- bufobj = NULL;
- stride = 0;
- low_addr = 0;
- }
-
- /* are the arrays in user space? */
- usingVBO = _mesa_is_bufferobj(bufobj);
-
- for (attr = 0; attr < num_inputs;) {
- const struct gl_vertex_array *array;
- const struct gl_vertex_buffer_binding *binding;
- const struct gl_array_attributes *attrib;
- const GLubyte *ptr;
- unsigned src_offset;
- unsigned src_format;
-
- array = get_client_array(arrays, vp->index_to_input[attr]);
- assert(array);
-
- binding = array->BufferBinding;
- attrib = array->VertexAttrib;
- ptr = _mesa_vertex_attrib_address(attrib, binding);
-
- src_offset = (unsigned) (ptr - low_addr);
-
- src_format = st_pipe_vertex_format(attrib);
-
- init_velement_lowered(vp, velements, src_offset, src_format,
- binding->InstanceDivisor, 0,
- attrib->Size, attrib->Doubles, &attr);
- }
-
- /*
- * Return the vbuffer info and setup user-space attrib info, if needed.
- */
- if (num_inputs == 0) {
- /* just defensive coding here */
- vbuffer.buffer.resource = NULL;
- vbuffer.is_user_buffer = false;
- vbuffer.buffer_offset = 0;
- vbuffer.stride = 0;
- }
- else if (usingVBO) {
- /* all interleaved arrays in a VBO */
- struct st_buffer_object *stobj = st_buffer_object(bufobj);
-
- if (!stobj || !stobj->buffer) {
- st->vertex_array_out_of_memory = true;
- return; /* out-of-memory error probably */
- }
-
- vbuffer.buffer.resource = stobj->buffer;
- vbuffer.is_user_buffer = false;
- vbuffer.buffer_offset = pointer_to_offset(low_addr);
- vbuffer.stride = stride;
- }
- else {
- /* all interleaved arrays in user memory */
- vbuffer.buffer.user = low_addr;
- vbuffer.is_user_buffer = !!low_addr; /* if NULL, then unbind */
- vbuffer.buffer_offset = 0;
- vbuffer.stride = stride;
-
- if (low_addr)
- st->draw_needs_minmax_index = true;
- }
-
- set_vertex_attribs(st, &vbuffer, num_inputs ? 1 : 0,
- velements, num_inputs);
-}
-
-/**
- * Set up a separate pipe_vertex_buffer and pipe_vertex_element for each
- * vertex attribute.
- * \param vbuffer returns vertex buffer info
- * \param velements returns vertex element info
- */
-static void
-setup_non_interleaved_attribs(struct st_context *st,
- const struct st_vertex_program *vp,
- const struct gl_vertex_array *arrays,
- unsigned num_inputs)
+void st_update_array(struct st_context *st)
{
struct gl_context *ctx = st->ctx;
+ /* vertex program validation must be done before this */
+ const struct st_vertex_program *vp = st->vp;
+ /* _NEW_PROGRAM, ST_NEW_VS_STATE */
+ const GLbitfield inputs_read = st->vp_variant->vert_attrib_mask;
+ const struct gl_vertex_array_object *vao = ctx->Array._DrawVAO;
+ const ubyte *input_to_index = vp->input_to_index;
+
struct pipe_vertex_buffer vbuffer[PIPE_MAX_ATTRIBS];
- struct pipe_vertex_element velements[PIPE_MAX_ATTRIBS] = {{0}};
+ struct pipe_vertex_element velements[PIPE_MAX_ATTRIBS];
unsigned num_vbuffers = 0;
- unsigned unref_buffers = 0;
- GLuint attr;
-
- for (attr = 0; attr < num_inputs;) {
- const unsigned mesaAttr = vp->index_to_input[attr];
- const struct gl_vertex_array *array;
- const struct gl_vertex_buffer_binding *binding;
- const struct gl_array_attributes *attrib;
- struct gl_buffer_object *bufobj;
- GLsizei stride;
- unsigned src_format;
- unsigned bufidx;
-
- array = get_client_array(arrays, mesaAttr);
- assert(array);
-
- bufidx = num_vbuffers++;
-
- binding = array->BufferBinding;
- attrib = array->VertexAttrib;
- stride = binding->Stride;
- bufobj = binding->BufferObj;
-
- if (_mesa_is_bufferobj(bufobj)) {
- /* Attribute data is in a VBO.
- * Recall that for VBOs, the gl_vertex_array->Ptr field is
- * really an offset from the start of the VBO, not a pointer.
- */
- struct st_buffer_object *stobj = st_buffer_object(bufobj);
+ st->vertex_array_out_of_memory = FALSE;
+ st->draw_needs_minmax_index = false;
+
+ /* _NEW_PROGRAM */
+ /* ST_NEW_VERTEX_ARRAYS alias ctx->DriverFlags.NewArray */
+ /* Process attribute array data. */
+ GLbitfield mask = inputs_read & _mesa_draw_array_bits(ctx);
+ while (mask) {
+ /* The attribute index to start pulling a binding */
+ const gl_vert_attrib i = ffs(mask) - 1;
+ const struct gl_vertex_buffer_binding *const binding
+ = _mesa_draw_buffer_binding(vao, i);
+ const unsigned bufidx = num_vbuffers++;
+
+ if (_mesa_is_bufferobj(binding->BufferObj)) {
+ struct st_buffer_object *stobj = st_buffer_object(binding-
Post by M***@gmx.net
BufferObj);
if (!stobj || !stobj->buffer) {
st->vertex_array_out_of_memory = true;
return; /* out-of-memory error probably */
}
+ /* Set the binding */
vbuffer[bufidx].buffer.resource = stobj->buffer;
vbuffer[bufidx].is_user_buffer = false;
- vbuffer[bufidx].buffer_offset =
- binding->Offset + attrib->RelativeOffset;
+ vbuffer[bufidx].buffer_offset =
_mesa_draw_binding_offset(binding);
+ } else {
+ /* Set the binding */
+ const void *ptr = (const void
*)_mesa_draw_binding_offset(binding);
+ vbuffer[bufidx].buffer.user = ptr;
+ vbuffer[bufidx].is_user_buffer = true;
+ vbuffer[bufidx].buffer_offset = 0;
+
+ if (!binding->InstanceDivisor)
+ st->draw_needs_minmax_index = true;
}
- else {
- if (stride == 0) {
- unsigned size = attrib->_ElementSize;
- /* This is optimal for GPU cache line usage if the upload size
- * is <= cache line size.
- */
- unsigned alignment = util_next_power_of_two(size);
-
- assert(attrib->Ptr);
- vbuffer[bufidx].buffer.user = attrib->Ptr;
- (void*)ctx-
Post by M***@gmx.net
Current.Attrib[mesaAttr];
-
- vbuffer[bufidx].is_user_buffer = false;
- vbuffer[bufidx].buffer.resource = NULL;
-
- /* Use const_uploader for zero-stride vertex attributes, because
- * it may use a better memory placement than
stream_uploader.
- * The reason is that zero-stride attributes can be fetched many
- * times (thousands of times), so a better placement is going to
- * perform better.
- *
- * Upload the maximum possible size, which is 4x GLdouble = 32.
- */
- u_upload_data(st->can_bind_const_buffer_as_vertex ?
- st->pipe->stream_uploader,
- 0, size, alignment, ptr,
- &vbuffer[bufidx].buffer_offset,
- &vbuffer[bufidx].buffer.resource);
- unref_buffers |= 1u << bufidx;
- } else {
- assert(attrib->Ptr);
- vbuffer[bufidx].buffer.user = attrib->Ptr;
- vbuffer[bufidx].is_user_buffer = true;
- vbuffer[bufidx].buffer_offset = 0;
-
- if (!binding->InstanceDivisor)
- st->draw_needs_minmax_index = true;
- }
+ vbuffer[bufidx].stride = binding->Stride; /* in bytes */
+
+ const GLbitfield boundmask =
_mesa_draw_bound_attrib_bits(binding);
+ GLbitfield attrmask = mask & boundmask;
+ /* Mark the those attributes as processed */
+ mask &= ~boundmask;
+ /* We can assume that we have array for the binding */
+ assert(attrmask);
+ /* Walk attributes belonging to the binding */
+ while (attrmask) {
+ const gl_vert_attrib attr = u_bit_scan(&attrmask);
+ const struct gl_array_attributes *const attrib
+ = _mesa_draw_array_attrib(vao, attr);
+ const GLuint off =
_mesa_draw_attributes_relative_offset(attrib);
+ init_velement_lowered(vp, velements, attrib, off,
+ binding->InstanceDivisor, bufidx,
+ input_to_index[attr]);
}
+ }
- /* common-case setup */
- vbuffer[bufidx].stride = stride; /* in bytes */
+ const unsigned first_current_vbuffer = num_vbuffers;
+ /* _NEW_PROGRAM | _NEW_CURRENT_ATTRIB */
+ /* Process values that should have better been uniforms in the application */
+ GLbitfield curmask = inputs_read & _mesa_draw_current_bits(ctx);
+ if (curmask) {
+ /* For each attribute, upload the maximum possible size. */
+ GLubyte data[VERT_ATTRIB_MAX*sizeof(GLdouble)*4];
+ GLubyte *cursor = data;
+ const unsigned bufidx = num_vbuffers++;
+ unsigned max_alignment = 1;
+
+ while (curmask) {
+ const gl_vert_attrib attr = u_bit_scan(&curmask);
+ const struct gl_array_attributes *const attrib
+ = _mesa_draw_current_attrib(ctx, attr);
+ const unsigned size = attrib->_ElementSize;
+ const unsigned alignment = util_next_power_of_two(size);
+ max_alignment = MAX2(max_alignment, alignment);
+ memcpy(cursor, attrib->Ptr, size);
+ if (alignment != size)
+ memset(cursor + size, 0, alignment - size);
+
+ init_velement_lowered(vp, velements, attrib, cursor - data, 0,
+ bufidx, input_to_index[attr]);
+
+ cursor += alignment;
+ }
- src_format = st_pipe_vertex_format(attrib);
+ vbuffer[bufidx].is_user_buffer = false;
+ vbuffer[bufidx].buffer.resource = NULL;
+ /* vbuffer[bufidx].buffer_offset is set below */
+ vbuffer[bufidx].stride = 0;
- init_velement_lowered(vp, velements, 0, src_format,
- binding->InstanceDivisor, bufidx,
- attrib->Size, attrib->Doubles, &attr);
+ /* Use const_uploader for zero-stride vertex attributes, because
+ * it may use a better memory placement than stream_uploader.
+ * The reason is that zero-stride attributes can be fetched many
+ * times (thousands of times), so a better placement is going to
+ * perform better.
+ */
+ u_upload_data(st->can_bind_const_buffer_as_vertex ?
+ st->pipe->stream_uploader,
+ 0, cursor - data, max_alignment, data,
+ &vbuffer[bufidx].buffer_offset,
+ &vbuffer[bufidx].buffer.resource);
}
if (!ctx->Const.AllowMappedBuffersDuringExecution) {
u_upload_unmap(st->pipe->stream_uploader);
}
+ const unsigned num_inputs = st->vp_variant->num_inputs;
set_vertex_attribs(st, vbuffer, num_vbuffers, velements,
num_inputs);
/* Unreference uploaded zero-stride vertex buffers. */
- while (unref_buffers) {
- unsigned i = u_bit_scan(&unref_buffers);
+ for (unsigned i = first_current_vbuffer; i < num_vbuffers; ++i) {
pipe_resource_reference(&vbuffer[i].buffer.resource, NULL);
}
}
-
-void st_update_array(struct st_context *st)
-{
- struct gl_context *ctx = st->ctx;
- const struct gl_vertex_array *arrays = ctx->Array._DrawArrays;
- const struct st_vertex_program *vp;
- unsigned num_inputs;
-
- st->vertex_array_out_of_memory = FALSE;
- st->draw_needs_minmax_index = false;
-
- /* No drawing has been done yet, so do nothing. */
- if (!arrays)
- return;
-
- /* vertex program validation must be done before this */
- vp = st->vp;
- num_inputs = st->vp_variant->num_inputs;
-
- if (is_interleaved_arrays(vp, arrays, num_inputs))
- setup_interleaved_attribs(st, vp, arrays, num_inputs);
- else
- setup_non_interleaved_attribs(st, vp, arrays, num_inputs);
-}
_______________________________________________
mesa-dev mailing list
https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Erik Faye-Lund
2018-11-26 20:56:22 UTC
Permalink
Uhm, what? Are you sure you're responding to the right email and
linking the right commit? Because, well, this problem has been present
here since Mesa 18.1, and that commit went in *today*... Also, it seems
very unlikely for a GetTexImage-related commit to change the renndered
*geometry*...
Post by Marek Olšák
commit f1998e15ffccf260552bf559abe1a733a8ce990e
Date: Wed Nov 21 20:09:46 2018 +0100
mesa/main: remove ARB suffix from glGetnTexImage
This function has been core since OpenGL 4.3, so naming the
implementation and reporting erros using an ARB-suffix can be
confusing.
Reverting the commit fixes supertuxkart.
Marek
On Mon, Nov 26, 2018 at 1:40 PM Erik Faye-Lund <
Post by Erik Faye-Lund
Post by M***@gmx.net
Finally make use of the binding information in the VAO when
setting up arrays for draw.
v2: Emit less relocations also for interleaved userspace arrays.
I know this is *very* late notice, but this commit broke Super Tux Kart
on VirGL. Both the player-models as as well as the level data renders
with gibberish vertex positions since this commit.
The fix that Rob Clark did on top does not fix the problem (and
shouldn't have; VirGL doesn't use NIR).
Post by M***@gmx.net
---
src/mesa/state_tracker/st_atom_array.c | 432 ++++++++-----------
----
Post by M***@gmx.net
----------
1 file changed, 107 insertions(+), 325 deletions(-)
diff --git a/src/mesa/state_tracker/st_atom_array.c
b/src/mesa/state_tracker/st_atom_array.c
index 2fd67e8d84..6b39b4186d 100644
--- a/src/mesa/state_tracker/st_atom_array.c
+++ b/src/mesa/state_tracker/st_atom_array.c
@@ -48,6 +48,7 @@
#include "main/bufferobj.h"
#include "main/glformats.h"
#include "main/varray.h"
+#include "main/arrayobj.h"
/* vertex_formats[gltype - GL_BYTE][integer*2 + normalized][size
-
Post by M***@gmx.net
1] */
static const uint16_t vertex_formats[][4][4] = {
@@ -306,79 +307,6 @@ st_pipe_vertex_format(const struct
gl_array_attributes *attrib)
return vertex_formats[type - GL_BYTE][index][size-1];
}
-static const struct gl_vertex_array *
-get_client_array(const struct gl_vertex_array *arrays,
- unsigned mesaAttr)
-{
- /* st_program uses 0xffffffff to denote a double placeholder attribute */
- if (mesaAttr == ST_DOUBLE_ATTRIB_PLACEHOLDER)
- return NULL;
- return &arrays[mesaAttr];
-}
-
-/**
- * Examine the active arrays to determine if we have interleaved
- * vertex arrays all living in one VBO, or all living in user
space.
Post by M***@gmx.net
- */
-static GLboolean
-is_interleaved_arrays(const struct st_vertex_program *vp,
- const struct gl_vertex_array *arrays,
- unsigned num_inputs)
-{
- GLuint attr;
- const struct gl_buffer_object *firstBufObj = NULL;
- GLint firstStride = -1;
- const GLubyte *firstPtr = NULL;
- GLboolean userSpaceBuffer = GL_FALSE;
-
- for (attr = 0; attr < num_inputs; attr++) {
- const struct gl_vertex_array *array;
- const struct gl_vertex_buffer_binding *binding;
- const struct gl_array_attributes *attrib;
- const GLubyte *ptr;
- const struct gl_buffer_object *bufObj;
- GLsizei stride;
-
- array = get_client_array(arrays, vp-
index_to_input[attr]);
- if (!array)
- continue;
-
- binding = array->BufferBinding;
- attrib = array->VertexAttrib;
- stride = binding->Stride; /* in bytes */
- ptr = _mesa_vertex_attrib_address(attrib, binding);
-
- /* To keep things simple, don't allow interleaved zero-
stride
Post by M***@gmx.net
attribs. */
- if (stride == 0)
- return false;
-
- bufObj = binding->BufferObj;
- if (attr == 0) {
- /* save info about the first array */
- firstStride = stride;
- firstPtr = ptr;
- firstBufObj = bufObj;
- userSpaceBuffer = !_mesa_is_bufferobj(bufObj);
- }
- else {
- /* check if other arrays interleave with the first, in
same
Post by M***@gmx.net
buffer */
- if (stride != firstStride)
- return GL_FALSE; /* strides don't match */
-
- if (bufObj != firstBufObj)
- return GL_FALSE; /* arrays in different VBOs */
-
- if (llabs(ptr - firstPtr) > firstStride)
- return GL_FALSE; /* arrays start too far apart */
-
- if ((!_mesa_is_bufferobj(bufObj)) != userSpaceBuffer)
- return GL_FALSE; /* mix of VBO and user-space arrays
*/
Post by M***@gmx.net
- }
- }
-
- return GL_TRUE;
-}
-
static void init_velement(struct pipe_vertex_element *velement,
int src_offset, int format,
int instance_divisor, int vbo_index)
@@ -392,13 +320,14 @@ static void init_velement(struct
pipe_vertex_element *velement,
static void init_velement_lowered(const struct st_vertex_program *vp,
struct pipe_vertex_element *velements,
- int src_offset, int format,
- int instance_divisor, int vbo_index,
- int nr_components, GLboolean doubles,
- GLuint *attr_idx)
+ const struct
gl_array_attributes
Post by M***@gmx.net
*attrib,
+ int src_offset, int
instance_divisor,
+ int vbo_index, int idx)
{
- int idx = *attr_idx;
- if (doubles) {
+ const unsigned format = st_pipe_vertex_format(attrib);
+ const GLubyte nr_components = attrib->Size;
+
+ if (attrib->Doubles) {
int lower_format;
if (nr_components < 2)
@@ -427,15 +356,11 @@ static void init_velement_lowered(const
struct
Post by M***@gmx.net
st_vertex_program *vp,
init_velement(&velements[idx], src_offset,
PIPE_FORMAT_R32G32_UINT,
instance_divisor, vbo_index);
}
-
- idx++;
}
} else {
init_velement(&velements[idx], src_offset,
format, instance_divisor, vbo_index);
- idx++;
}
- *attr_idx = idx;
}
static void
@@ -457,274 +382,131 @@ set_vertex_attribs(struct st_context *st,
cso_set_vertex_elements(cso, num_velements, velements);
}
-/**
- * Set up for drawing interleaved arrays that all live in one
VBO
Post by M***@gmx.net
- * or all live in user space.
- * \param vbuffer returns vertex buffer info
- * \param velements returns vertex element info
- */
-static void
-setup_interleaved_attribs(struct st_context *st,
- const struct st_vertex_program *vp,
- const struct gl_vertex_array *arrays,
- unsigned num_inputs)
-{
- struct pipe_vertex_buffer vbuffer;
- struct pipe_vertex_element velements[PIPE_MAX_ATTRIBS] =
{{0}};
Post by M***@gmx.net
- GLuint attr;
- const GLubyte *low_addr = NULL;
- GLboolean usingVBO; /* all arrays in a VBO? */
- struct gl_buffer_object *bufobj;
- GLsizei stride;
-
- /* Find the lowest address of the arrays we're drawing,
- * Init bufobj and stride.
- */
- if (num_inputs) {
- const struct gl_vertex_array *array;
- const struct gl_vertex_buffer_binding *binding;
- const struct gl_array_attributes *attrib;
-
- array = get_client_array(arrays, vp->index_to_input[0]);
- assert(array);
-
- binding = array->BufferBinding;
- attrib = array->VertexAttrib;
-
- /* Since we're doing interleaved arrays, we know there'll
be
Post by M***@gmx.net
at most
- * one buffer object and the stride will be the same for
all
Post by M***@gmx.net
arrays.
- * Grab them now.
- */
- bufobj = binding->BufferObj;
- stride = binding->Stride;
-
- low_addr = _mesa_vertex_attrib_address(attrib, binding);
-
- for (attr = 1; attr < num_inputs; attr++) {
- const GLubyte *start;
- array = get_client_array(arrays, vp-
index_to_input[attr]);
- if (!array)
- continue;
- binding = array->BufferBinding;
- attrib = array->VertexAttrib;
- start = _mesa_vertex_attrib_address(attrib, binding);
- low_addr = MIN2(low_addr, start);
- }
- }
- else {
- /* not sure we'll ever have zero inputs, but play it safe
*/
Post by M***@gmx.net
- bufobj = NULL;
- stride = 0;
- low_addr = 0;
- }
-
- /* are the arrays in user space? */
- usingVBO = _mesa_is_bufferobj(bufobj);
-
- for (attr = 0; attr < num_inputs;) {
- const struct gl_vertex_array *array;
- const struct gl_vertex_buffer_binding *binding;
- const struct gl_array_attributes *attrib;
- const GLubyte *ptr;
- unsigned src_offset;
- unsigned src_format;
-
- array = get_client_array(arrays, vp-
index_to_input[attr]);
- assert(array);
-
- binding = array->BufferBinding;
- attrib = array->VertexAttrib;
- ptr = _mesa_vertex_attrib_address(attrib, binding);
-
- src_offset = (unsigned) (ptr - low_addr);
-
- src_format = st_pipe_vertex_format(attrib);
-
- init_velement_lowered(vp, velements, src_offset,
src_format,
Post by M***@gmx.net
- binding->InstanceDivisor, 0,
- attrib->Size, attrib->Doubles,
&attr);
Post by M***@gmx.net
- }
-
- /*
- * Return the vbuffer info and setup user-space attrib info,
if
Post by M***@gmx.net
needed.
- */
- if (num_inputs == 0) {
- /* just defensive coding here */
- vbuffer.buffer.resource = NULL;
- vbuffer.is_user_buffer = false;
- vbuffer.buffer_offset = 0;
- vbuffer.stride = 0;
- }
- else if (usingVBO) {
- /* all interleaved arrays in a VBO */
- struct st_buffer_object *stobj = st_buffer_object(bufobj);
-
- if (!stobj || !stobj->buffer) {
- st->vertex_array_out_of_memory = true;
- return; /* out-of-memory error probably */
- }
-
- vbuffer.buffer.resource = stobj->buffer;
- vbuffer.is_user_buffer = false;
- vbuffer.buffer_offset = pointer_to_offset(low_addr);
- vbuffer.stride = stride;
- }
- else {
- /* all interleaved arrays in user memory */
- vbuffer.buffer.user = low_addr;
- vbuffer.is_user_buffer = !!low_addr; /* if NULL, then
unbind
Post by M***@gmx.net
*/
- vbuffer.buffer_offset = 0;
- vbuffer.stride = stride;
-
- if (low_addr)
- st->draw_needs_minmax_index = true;
- }
-
- set_vertex_attribs(st, &vbuffer, num_inputs ? 1 : 0,
- velements, num_inputs);
-}
-
-/**
- * Set up a separate pipe_vertex_buffer and pipe_vertex_element
for
Post by M***@gmx.net
each
- * vertex attribute.
- * \param vbuffer returns vertex buffer info
- * \param velements returns vertex element info
- */
-static void
-setup_non_interleaved_attribs(struct st_context *st,
- const struct st_vertex_program
*vp,
Post by M***@gmx.net
- const struct gl_vertex_array
*arrays,
Post by M***@gmx.net
- unsigned num_inputs)
+void st_update_array(struct st_context *st)
{
struct gl_context *ctx = st->ctx;
+ /* vertex program validation must be done before this */
+ const struct st_vertex_program *vp = st->vp;
+ /* _NEW_PROGRAM, ST_NEW_VS_STATE */
+ const GLbitfield inputs_read = st->vp_variant-
vert_attrib_mask;
+ const struct gl_vertex_array_object *vao = ctx-
Array._DrawVAO;
+ const ubyte *input_to_index = vp->input_to_index;
+
struct pipe_vertex_buffer vbuffer[PIPE_MAX_ATTRIBS];
- struct pipe_vertex_element velements[PIPE_MAX_ATTRIBS] =
{{0}};
Post by M***@gmx.net
+ struct pipe_vertex_element velements[PIPE_MAX_ATTRIBS];
unsigned num_vbuffers = 0;
- unsigned unref_buffers = 0;
- GLuint attr;
-
- for (attr = 0; attr < num_inputs;) {
- const unsigned mesaAttr = vp->index_to_input[attr];
- const struct gl_vertex_array *array;
- const struct gl_vertex_buffer_binding *binding;
- const struct gl_array_attributes *attrib;
- struct gl_buffer_object *bufobj;
- GLsizei stride;
- unsigned src_format;
- unsigned bufidx;
-
- array = get_client_array(arrays, mesaAttr);
- assert(array);
-
- bufidx = num_vbuffers++;
-
- binding = array->BufferBinding;
- attrib = array->VertexAttrib;
- stride = binding->Stride;
- bufobj = binding->BufferObj;
-
- if (_mesa_is_bufferobj(bufobj)) {
- /* Attribute data is in a VBO.
- * Recall that for VBOs, the gl_vertex_array->Ptr field
is
Post by M***@gmx.net
- * really an offset from the start of the VBO, not a pointer.
- */
- struct st_buffer_object *stobj =
st_buffer_object(bufobj);
Post by M***@gmx.net
+ st->vertex_array_out_of_memory = FALSE;
+ st->draw_needs_minmax_index = false;
+
+ /* _NEW_PROGRAM */
+ /* ST_NEW_VERTEX_ARRAYS alias ctx->DriverFlags.NewArray */
+ /* Process attribute array data. */
+ GLbitfield mask = inputs_read & _mesa_draw_array_bits(ctx);
+ while (mask) {
+ /* The attribute index to start pulling a binding */
+ const gl_vert_attrib i = ffs(mask) - 1;
+ const struct gl_vertex_buffer_binding *const binding
+ = _mesa_draw_buffer_binding(vao, i);
+ const unsigned bufidx = num_vbuffers++;
+
+ if (_mesa_is_bufferobj(binding->BufferObj)) {
+ struct st_buffer_object *stobj =
st_buffer_object(binding-
Post by M***@gmx.net
Post by M***@gmx.net
BufferObj);
if (!stobj || !stobj->buffer) {
st->vertex_array_out_of_memory = true;
return; /* out-of-memory error probably */
}
+ /* Set the binding */
vbuffer[bufidx].buffer.resource = stobj->buffer;
vbuffer[bufidx].is_user_buffer = false;
- vbuffer[bufidx].buffer_offset =
- binding->Offset + attrib->RelativeOffset;
+ vbuffer[bufidx].buffer_offset =
_mesa_draw_binding_offset(binding);
+ } else {
+ /* Set the binding */
+ const void *ptr = (const void
*)_mesa_draw_binding_offset(binding);
+ vbuffer[bufidx].buffer.user = ptr;
+ vbuffer[bufidx].is_user_buffer = true;
+ vbuffer[bufidx].buffer_offset = 0;
+
+ if (!binding->InstanceDivisor)
+ st->draw_needs_minmax_index = true;
}
- else {
- if (stride == 0) {
- unsigned size = attrib->_ElementSize;
- /* This is optimal for GPU cache line usage if the upload size
- * is <= cache line size.
- */
- unsigned alignment = util_next_power_of_two(size);
-
- assert(attrib->Ptr);
- vbuffer[bufidx].buffer.user = attrib->Ptr;
- (void*)ctx-
Post by M***@gmx.net
Current.Attrib[mesaAttr];
-
- vbuffer[bufidx].is_user_buffer = false;
- vbuffer[bufidx].buffer.resource = NULL;
-
- /* Use const_uploader for zero-stride vertex
attributes,
Post by M***@gmx.net
because
- * it may use a better memory placement than
stream_uploader.
- * The reason is that zero-stride attributes can be fetched many
- * times (thousands of times), so a better placement
is
Post by M***@gmx.net
going to
- * perform better.
- *
- * Upload the maximum possible size, which is 4x GLdouble = 32.
- */
- u_upload_data(st->can_bind_const_buffer_as_vertex ?
- st->pipe->stream_uploader,
- 0, size, alignment, ptr,
- &vbuffer[bufidx].buffer_offset,
- &vbuffer[bufidx].buffer.resource);
- unref_buffers |= 1u << bufidx;
- } else {
- assert(attrib->Ptr);
- vbuffer[bufidx].buffer.user = attrib->Ptr;
- vbuffer[bufidx].is_user_buffer = true;
- vbuffer[bufidx].buffer_offset = 0;
-
- if (!binding->InstanceDivisor)
- st->draw_needs_minmax_index = true;
- }
+ vbuffer[bufidx].stride = binding->Stride; /* in bytes */
+
+ const GLbitfield boundmask =
_mesa_draw_bound_attrib_bits(binding);
+ GLbitfield attrmask = mask & boundmask;
+ /* Mark the those attributes as processed */
+ mask &= ~boundmask;
+ /* We can assume that we have array for the binding */
+ assert(attrmask);
+ /* Walk attributes belonging to the binding */
+ while (attrmask) {
+ const gl_vert_attrib attr = u_bit_scan(&attrmask);
+ const struct gl_array_attributes *const attrib
+ = _mesa_draw_array_attrib(vao, attr);
+ const GLuint off =
_mesa_draw_attributes_relative_offset(attrib);
+ init_velement_lowered(vp, velements, attrib, off,
+ binding->InstanceDivisor, bufidx,
+ input_to_index[attr]);
}
+ }
- /* common-case setup */
- vbuffer[bufidx].stride = stride; /* in bytes */
+ const unsigned first_current_vbuffer = num_vbuffers;
+ /* _NEW_PROGRAM | _NEW_CURRENT_ATTRIB */
+ /* Process values that should have better been uniforms in
the
Post by M***@gmx.net
application */
+ GLbitfield curmask = inputs_read &
_mesa_draw_current_bits(ctx);
Post by M***@gmx.net
+ if (curmask) {
+ /* For each attribute, upload the maximum possible size.
*/
Post by M***@gmx.net
+ GLubyte data[VERT_ATTRIB_MAX*sizeof(GLdouble)*4];
+ GLubyte *cursor = data;
+ const unsigned bufidx = num_vbuffers++;
+ unsigned max_alignment = 1;
+
+ while (curmask) {
+ const gl_vert_attrib attr = u_bit_scan(&curmask);
+ const struct gl_array_attributes *const attrib
+ = _mesa_draw_current_attrib(ctx, attr);
+ const unsigned size = attrib->_ElementSize;
+ const unsigned alignment =
util_next_power_of_two(size);
Post by M***@gmx.net
+ max_alignment = MAX2(max_alignment, alignment);
+ memcpy(cursor, attrib->Ptr, size);
+ if (alignment != size)
+ memset(cursor + size, 0, alignment - size);
+
+ init_velement_lowered(vp, velements, attrib, cursor -
data,
Post by M***@gmx.net
0,
+ bufidx, input_to_index[attr]);
+
+ cursor += alignment;
+ }
- src_format = st_pipe_vertex_format(attrib);
+ vbuffer[bufidx].is_user_buffer = false;
+ vbuffer[bufidx].buffer.resource = NULL;
+ /* vbuffer[bufidx].buffer_offset is set below */
+ vbuffer[bufidx].stride = 0;
- init_velement_lowered(vp, velements, 0, src_format,
- binding->InstanceDivisor, bufidx,
- attrib->Size, attrib->Doubles,
&attr);
Post by M***@gmx.net
+ /* Use const_uploader for zero-stride vertex attributes, because
+ * it may use a better memory placement than
stream_uploader.
Post by M***@gmx.net
+ * The reason is that zero-stride attributes can be
fetched
Post by M***@gmx.net
many
+ * times (thousands of times), so a better placement is
going
Post by M***@gmx.net
to
+ * perform better.
+ */
+ u_upload_data(st->can_bind_const_buffer_as_vertex ?
+ st->pipe->stream_uploader,
+ 0, cursor - data, max_alignment, data,
+ &vbuffer[bufidx].buffer_offset,
+ &vbuffer[bufidx].buffer.resource);
}
if (!ctx->Const.AllowMappedBuffersDuringExecution) {
u_upload_unmap(st->pipe->stream_uploader);
}
+ const unsigned num_inputs = st->vp_variant->num_inputs;
set_vertex_attribs(st, vbuffer, num_vbuffers, velements, num_inputs);
/* Unreference uploaded zero-stride vertex buffers. */
- while (unref_buffers) {
- unsigned i = u_bit_scan(&unref_buffers);
+ for (unsigned i = first_current_vbuffer; i < num_vbuffers;
++i) {
Post by M***@gmx.net
pipe_resource_reference(&vbuffer[i].buffer.resource,
NULL);
Post by M***@gmx.net
}
}
-
-void st_update_array(struct st_context *st)
-{
- struct gl_context *ctx = st->ctx;
- const struct gl_vertex_array *arrays = ctx-
Array._DrawArrays;
- const struct st_vertex_program *vp;
- unsigned num_inputs;
-
- st->vertex_array_out_of_memory = FALSE;
- st->draw_needs_minmax_index = false;
-
- /* No drawing has been done yet, so do nothing. */
- if (!arrays)
- return;
-
- /* vertex program validation must be done before this */
- vp = st->vp;
- num_inputs = st->vp_variant->num_inputs;
-
- if (is_interleaved_arrays(vp, arrays, num_inputs))
- setup_interleaved_attribs(st, vp, arrays, num_inputs);
- else
- setup_non_interleaved_attribs(st, vp, arrays, num_inputs);
-}
_______________________________________________
mesa-dev mailing list
https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Marek Olšák
2018-11-26 21:21:55 UTC
Permalink
Yes, it's the right thread and the right commit for radeonsi. All geometry
is broken. I might revert it.

Marek
Post by Erik Faye-Lund
Uhm, what? Are you sure you're responding to the right email and
linking the right commit? Because, well, this problem has been present
here since Mesa 18.1, and that commit went in *today*... Also, it seems
very unlikely for a GetTexImage-related commit to change the renndered
*geometry*...
Post by Marek Olšák
commit f1998e15ffccf260552bf559abe1a733a8ce990e
Date: Wed Nov 21 20:09:46 2018 +0100
mesa/main: remove ARB suffix from glGetnTexImage
This function has been core since OpenGL 4.3, so naming the
implementation and reporting erros using an ARB-suffix can be
confusing.
Reverting the commit fixes supertuxkart.
Marek
On Mon, Nov 26, 2018 at 1:40 PM Erik Faye-Lund <
Post by Erik Faye-Lund
Post by M***@gmx.net
Finally make use of the binding information in the VAO when
setting up arrays for draw.
v2: Emit less relocations also for interleaved userspace arrays.
I know this is *very* late notice, but this commit broke Super Tux Kart
on VirGL. Both the player-models as as well as the level data renders
with gibberish vertex positions since this commit.
The fix that Rob Clark did on top does not fix the problem (and
shouldn't have; VirGL doesn't use NIR).
Post by M***@gmx.net
---
src/mesa/state_tracker/st_atom_array.c | 432 ++++++++-----------
----
Post by M***@gmx.net
----------
1 file changed, 107 insertions(+), 325 deletions(-)
diff --git a/src/mesa/state_tracker/st_atom_array.c
b/src/mesa/state_tracker/st_atom_array.c
index 2fd67e8d84..6b39b4186d 100644
--- a/src/mesa/state_tracker/st_atom_array.c
+++ b/src/mesa/state_tracker/st_atom_array.c
@@ -48,6 +48,7 @@
#include "main/bufferobj.h"
#include "main/glformats.h"
#include "main/varray.h"
+#include "main/arrayobj.h"
/* vertex_formats[gltype - GL_BYTE][integer*2 + normalized][size
-
Post by M***@gmx.net
1] */
static const uint16_t vertex_formats[][4][4] = {
@@ -306,79 +307,6 @@ st_pipe_vertex_format(const struct
gl_array_attributes *attrib)
return vertex_formats[type - GL_BYTE][index][size-1];
}
-static const struct gl_vertex_array *
-get_client_array(const struct gl_vertex_array *arrays,
- unsigned mesaAttr)
-{
- /* st_program uses 0xffffffff to denote a double placeholder attribute */
- if (mesaAttr == ST_DOUBLE_ATTRIB_PLACEHOLDER)
- return NULL;
- return &arrays[mesaAttr];
-}
-
-/**
- * Examine the active arrays to determine if we have interleaved
- * vertex arrays all living in one VBO, or all living in user
space.
Post by M***@gmx.net
- */
-static GLboolean
-is_interleaved_arrays(const struct st_vertex_program *vp,
- const struct gl_vertex_array *arrays,
- unsigned num_inputs)
-{
- GLuint attr;
- const struct gl_buffer_object *firstBufObj = NULL;
- GLint firstStride = -1;
- const GLubyte *firstPtr = NULL;
- GLboolean userSpaceBuffer = GL_FALSE;
-
- for (attr = 0; attr < num_inputs; attr++) {
- const struct gl_vertex_array *array;
- const struct gl_vertex_buffer_binding *binding;
- const struct gl_array_attributes *attrib;
- const GLubyte *ptr;
- const struct gl_buffer_object *bufObj;
- GLsizei stride;
-
- array = get_client_array(arrays, vp-
index_to_input[attr]);
- if (!array)
- continue;
-
- binding = array->BufferBinding;
- attrib = array->VertexAttrib;
- stride = binding->Stride; /* in bytes */
- ptr = _mesa_vertex_attrib_address(attrib, binding);
-
- /* To keep things simple, don't allow interleaved zero-
stride
Post by M***@gmx.net
attribs. */
- if (stride == 0)
- return false;
-
- bufObj = binding->BufferObj;
- if (attr == 0) {
- /* save info about the first array */
- firstStride = stride;
- firstPtr = ptr;
- firstBufObj = bufObj;
- userSpaceBuffer = !_mesa_is_bufferobj(bufObj);
- }
- else {
- /* check if other arrays interleave with the first, in
same
Post by M***@gmx.net
buffer */
- if (stride != firstStride)
- return GL_FALSE; /* strides don't match */
-
- if (bufObj != firstBufObj)
- return GL_FALSE; /* arrays in different VBOs */
-
- if (llabs(ptr - firstPtr) > firstStride)
- return GL_FALSE; /* arrays start too far apart */
-
- if ((!_mesa_is_bufferobj(bufObj)) != userSpaceBuffer)
- return GL_FALSE; /* mix of VBO and user-space arrays
*/
Post by M***@gmx.net
- }
- }
-
- return GL_TRUE;
-}
-
static void init_velement(struct pipe_vertex_element *velement,
int src_offset, int format,
int instance_divisor, int vbo_index)
@@ -392,13 +320,14 @@ static void init_velement(struct
pipe_vertex_element *velement,
static void init_velement_lowered(const struct st_vertex_program *vp,
struct pipe_vertex_element *velements,
- int src_offset, int format,
- int instance_divisor, int vbo_index,
- int nr_components, GLboolean doubles,
- GLuint *attr_idx)
+ const struct
gl_array_attributes
Post by M***@gmx.net
*attrib,
+ int src_offset, int
instance_divisor,
+ int vbo_index, int idx)
{
- int idx = *attr_idx;
- if (doubles) {
+ const unsigned format = st_pipe_vertex_format(attrib);
+ const GLubyte nr_components = attrib->Size;
+
+ if (attrib->Doubles) {
int lower_format;
if (nr_components < 2)
@@ -427,15 +356,11 @@ static void init_velement_lowered(const
struct
Post by M***@gmx.net
st_vertex_program *vp,
init_velement(&velements[idx], src_offset,
PIPE_FORMAT_R32G32_UINT,
instance_divisor, vbo_index);
}
-
- idx++;
}
} else {
init_velement(&velements[idx], src_offset,
format, instance_divisor, vbo_index);
- idx++;
}
- *attr_idx = idx;
}
static void
@@ -457,274 +382,131 @@ set_vertex_attribs(struct st_context *st,
cso_set_vertex_elements(cso, num_velements, velements);
}
-/**
- * Set up for drawing interleaved arrays that all live in one
VBO
Post by M***@gmx.net
- * or all live in user space.
- * \param vbuffer returns vertex buffer info
- * \param velements returns vertex element info
- */
-static void
-setup_interleaved_attribs(struct st_context *st,
- const struct st_vertex_program *vp,
- const struct gl_vertex_array *arrays,
- unsigned num_inputs)
-{
- struct pipe_vertex_buffer vbuffer;
- struct pipe_vertex_element velements[PIPE_MAX_ATTRIBS] =
{{0}};
Post by M***@gmx.net
- GLuint attr;
- const GLubyte *low_addr = NULL;
- GLboolean usingVBO; /* all arrays in a VBO? */
- struct gl_buffer_object *bufobj;
- GLsizei stride;
-
- /* Find the lowest address of the arrays we're drawing,
- * Init bufobj and stride.
- */
- if (num_inputs) {
- const struct gl_vertex_array *array;
- const struct gl_vertex_buffer_binding *binding;
- const struct gl_array_attributes *attrib;
-
- array = get_client_array(arrays, vp->index_to_input[0]);
- assert(array);
-
- binding = array->BufferBinding;
- attrib = array->VertexAttrib;
-
- /* Since we're doing interleaved arrays, we know there'll
be
Post by M***@gmx.net
at most
- * one buffer object and the stride will be the same for
all
Post by M***@gmx.net
arrays.
- * Grab them now.
- */
- bufobj = binding->BufferObj;
- stride = binding->Stride;
-
- low_addr = _mesa_vertex_attrib_address(attrib, binding);
-
- for (attr = 1; attr < num_inputs; attr++) {
- const GLubyte *start;
- array = get_client_array(arrays, vp-
index_to_input[attr]);
- if (!array)
- continue;
- binding = array->BufferBinding;
- attrib = array->VertexAttrib;
- start = _mesa_vertex_attrib_address(attrib, binding);
- low_addr = MIN2(low_addr, start);
- }
- }
- else {
- /* not sure we'll ever have zero inputs, but play it safe
*/
Post by M***@gmx.net
- bufobj = NULL;
- stride = 0;
- low_addr = 0;
- }
-
- /* are the arrays in user space? */
- usingVBO = _mesa_is_bufferobj(bufobj);
-
- for (attr = 0; attr < num_inputs;) {
- const struct gl_vertex_array *array;
- const struct gl_vertex_buffer_binding *binding;
- const struct gl_array_attributes *attrib;
- const GLubyte *ptr;
- unsigned src_offset;
- unsigned src_format;
-
- array = get_client_array(arrays, vp-
index_to_input[attr]);
- assert(array);
-
- binding = array->BufferBinding;
- attrib = array->VertexAttrib;
- ptr = _mesa_vertex_attrib_address(attrib, binding);
-
- src_offset = (unsigned) (ptr - low_addr);
-
- src_format = st_pipe_vertex_format(attrib);
-
- init_velement_lowered(vp, velements, src_offset,
src_format,
Post by M***@gmx.net
- binding->InstanceDivisor, 0,
- attrib->Size, attrib->Doubles,
&attr);
Post by M***@gmx.net
- }
-
- /*
- * Return the vbuffer info and setup user-space attrib info,
if
Post by M***@gmx.net
needed.
- */
- if (num_inputs == 0) {
- /* just defensive coding here */
- vbuffer.buffer.resource = NULL;
- vbuffer.is_user_buffer = false;
- vbuffer.buffer_offset = 0;
- vbuffer.stride = 0;
- }
- else if (usingVBO) {
- /* all interleaved arrays in a VBO */
- struct st_buffer_object *stobj = st_buffer_object(bufobj);
-
- if (!stobj || !stobj->buffer) {
- st->vertex_array_out_of_memory = true;
- return; /* out-of-memory error probably */
- }
-
- vbuffer.buffer.resource = stobj->buffer;
- vbuffer.is_user_buffer = false;
- vbuffer.buffer_offset = pointer_to_offset(low_addr);
- vbuffer.stride = stride;
- }
- else {
- /* all interleaved arrays in user memory */
- vbuffer.buffer.user = low_addr;
- vbuffer.is_user_buffer = !!low_addr; /* if NULL, then
unbind
Post by M***@gmx.net
*/
- vbuffer.buffer_offset = 0;
- vbuffer.stride = stride;
-
- if (low_addr)
- st->draw_needs_minmax_index = true;
- }
-
- set_vertex_attribs(st, &vbuffer, num_inputs ? 1 : 0,
- velements, num_inputs);
-}
-
-/**
- * Set up a separate pipe_vertex_buffer and pipe_vertex_element
for
Post by M***@gmx.net
each
- * vertex attribute.
- * \param vbuffer returns vertex buffer info
- * \param velements returns vertex element info
- */
-static void
-setup_non_interleaved_attribs(struct st_context *st,
- const struct st_vertex_program
*vp,
Post by M***@gmx.net
- const struct gl_vertex_array
*arrays,
Post by M***@gmx.net
- unsigned num_inputs)
+void st_update_array(struct st_context *st)
{
struct gl_context *ctx = st->ctx;
+ /* vertex program validation must be done before this */
+ const struct st_vertex_program *vp = st->vp;
+ /* _NEW_PROGRAM, ST_NEW_VS_STATE */
+ const GLbitfield inputs_read = st->vp_variant-
vert_attrib_mask;
+ const struct gl_vertex_array_object *vao = ctx-
Array._DrawVAO;
+ const ubyte *input_to_index = vp->input_to_index;
+
struct pipe_vertex_buffer vbuffer[PIPE_MAX_ATTRIBS];
- struct pipe_vertex_element velements[PIPE_MAX_ATTRIBS] =
{{0}};
Post by M***@gmx.net
+ struct pipe_vertex_element velements[PIPE_MAX_ATTRIBS];
unsigned num_vbuffers = 0;
- unsigned unref_buffers = 0;
- GLuint attr;
-
- for (attr = 0; attr < num_inputs;) {
- const unsigned mesaAttr = vp->index_to_input[attr];
- const struct gl_vertex_array *array;
- const struct gl_vertex_buffer_binding *binding;
- const struct gl_array_attributes *attrib;
- struct gl_buffer_object *bufobj;
- GLsizei stride;
- unsigned src_format;
- unsigned bufidx;
-
- array = get_client_array(arrays, mesaAttr);
- assert(array);
-
- bufidx = num_vbuffers++;
-
- binding = array->BufferBinding;
- attrib = array->VertexAttrib;
- stride = binding->Stride;
- bufobj = binding->BufferObj;
-
- if (_mesa_is_bufferobj(bufobj)) {
- /* Attribute data is in a VBO.
- * Recall that for VBOs, the gl_vertex_array->Ptr field
is
Post by M***@gmx.net
- * really an offset from the start of the VBO, not a pointer.
- */
- struct st_buffer_object *stobj =
st_buffer_object(bufobj);
Post by M***@gmx.net
+ st->vertex_array_out_of_memory = FALSE;
+ st->draw_needs_minmax_index = false;
+
+ /* _NEW_PROGRAM */
+ /* ST_NEW_VERTEX_ARRAYS alias ctx->DriverFlags.NewArray */
+ /* Process attribute array data. */
+ GLbitfield mask = inputs_read & _mesa_draw_array_bits(ctx);
+ while (mask) {
+ /* The attribute index to start pulling a binding */
+ const gl_vert_attrib i = ffs(mask) - 1;
+ const struct gl_vertex_buffer_binding *const binding
+ = _mesa_draw_buffer_binding(vao, i);
+ const unsigned bufidx = num_vbuffers++;
+
+ if (_mesa_is_bufferobj(binding->BufferObj)) {
+ struct st_buffer_object *stobj =
st_buffer_object(binding-
Post by M***@gmx.net
Post by M***@gmx.net
BufferObj);
if (!stobj || !stobj->buffer) {
st->vertex_array_out_of_memory = true;
return; /* out-of-memory error probably */
}
+ /* Set the binding */
vbuffer[bufidx].buffer.resource = stobj->buffer;
vbuffer[bufidx].is_user_buffer = false;
- vbuffer[bufidx].buffer_offset =
- binding->Offset + attrib->RelativeOffset;
+ vbuffer[bufidx].buffer_offset =
_mesa_draw_binding_offset(binding);
+ } else {
+ /* Set the binding */
+ const void *ptr = (const void
*)_mesa_draw_binding_offset(binding);
+ vbuffer[bufidx].buffer.user = ptr;
+ vbuffer[bufidx].is_user_buffer = true;
+ vbuffer[bufidx].buffer_offset = 0;
+
+ if (!binding->InstanceDivisor)
+ st->draw_needs_minmax_index = true;
}
- else {
- if (stride == 0) {
- unsigned size = attrib->_ElementSize;
- /* This is optimal for GPU cache line usage if the upload size
- * is <= cache line size.
- */
- unsigned alignment = util_next_power_of_two(size);
-
- assert(attrib->Ptr);
- vbuffer[bufidx].buffer.user = attrib->Ptr;
- (void*)ctx-
Post by M***@gmx.net
Current.Attrib[mesaAttr];
-
- vbuffer[bufidx].is_user_buffer = false;
- vbuffer[bufidx].buffer.resource = NULL;
-
- /* Use const_uploader for zero-stride vertex
attributes,
Post by M***@gmx.net
because
- * it may use a better memory placement than stream_uploader.
- * The reason is that zero-stride attributes can be fetched many
- * times (thousands of times), so a better placement
is
Post by M***@gmx.net
going to
- * perform better.
- *
- * Upload the maximum possible size, which is 4x GLdouble = 32.
- */
- u_upload_data(st->can_bind_const_buffer_as_vertex ?
- st->pipe->stream_uploader,
- 0, size, alignment, ptr,
- &vbuffer[bufidx].buffer_offset,
- &vbuffer[bufidx].buffer.resource);
- unref_buffers |= 1u << bufidx;
- } else {
- assert(attrib->Ptr);
- vbuffer[bufidx].buffer.user = attrib->Ptr;
- vbuffer[bufidx].is_user_buffer = true;
- vbuffer[bufidx].buffer_offset = 0;
-
- if (!binding->InstanceDivisor)
- st->draw_needs_minmax_index = true;
- }
+ vbuffer[bufidx].stride = binding->Stride; /* in bytes */
+
+ const GLbitfield boundmask =
_mesa_draw_bound_attrib_bits(binding);
+ GLbitfield attrmask = mask & boundmask;
+ /* Mark the those attributes as processed */
+ mask &= ~boundmask;
+ /* We can assume that we have array for the binding */
+ assert(attrmask);
+ /* Walk attributes belonging to the binding */
+ while (attrmask) {
+ const gl_vert_attrib attr = u_bit_scan(&attrmask);
+ const struct gl_array_attributes *const attrib
+ = _mesa_draw_array_attrib(vao, attr);
+ const GLuint off =
_mesa_draw_attributes_relative_offset(attrib);
+ init_velement_lowered(vp, velements, attrib, off,
+ binding->InstanceDivisor, bufidx,
+ input_to_index[attr]);
}
+ }
- /* common-case setup */
- vbuffer[bufidx].stride = stride; /* in bytes */
+ const unsigned first_current_vbuffer = num_vbuffers;
+ /* _NEW_PROGRAM | _NEW_CURRENT_ATTRIB */
+ /* Process values that should have better been uniforms in
the
Post by M***@gmx.net
application */
+ GLbitfield curmask = inputs_read &
_mesa_draw_current_bits(ctx);
Post by M***@gmx.net
+ if (curmask) {
+ /* For each attribute, upload the maximum possible size.
*/
Post by M***@gmx.net
+ GLubyte data[VERT_ATTRIB_MAX*sizeof(GLdouble)*4];
+ GLubyte *cursor = data;
+ const unsigned bufidx = num_vbuffers++;
+ unsigned max_alignment = 1;
+
+ while (curmask) {
+ const gl_vert_attrib attr = u_bit_scan(&curmask);
+ const struct gl_array_attributes *const attrib
+ = _mesa_draw_current_attrib(ctx, attr);
+ const unsigned size = attrib->_ElementSize;
+ const unsigned alignment =
util_next_power_of_two(size);
Post by M***@gmx.net
+ max_alignment = MAX2(max_alignment, alignment);
+ memcpy(cursor, attrib->Ptr, size);
+ if (alignment != size)
+ memset(cursor + size, 0, alignment - size);
+
+ init_velement_lowered(vp, velements, attrib, cursor -
data,
Post by M***@gmx.net
0,
+ bufidx, input_to_index[attr]);
+
+ cursor += alignment;
+ }
- src_format = st_pipe_vertex_format(attrib);
+ vbuffer[bufidx].is_user_buffer = false;
+ vbuffer[bufidx].buffer.resource = NULL;
+ /* vbuffer[bufidx].buffer_offset is set below */
+ vbuffer[bufidx].stride = 0;
- init_velement_lowered(vp, velements, 0, src_format,
- binding->InstanceDivisor, bufidx,
- attrib->Size, attrib->Doubles,
&attr);
Post by M***@gmx.net
+ /* Use const_uploader for zero-stride vertex attributes, because
+ * it may use a better memory placement than
stream_uploader.
Post by M***@gmx.net
+ * The reason is that zero-stride attributes can be
fetched
Post by M***@gmx.net
many
+ * times (thousands of times), so a better placement is
going
Post by M***@gmx.net
to
+ * perform better.
+ */
+ u_upload_data(st->can_bind_const_buffer_as_vertex ?
+ st->pipe->stream_uploader,
+ 0, cursor - data, max_alignment, data,
+ &vbuffer[bufidx].buffer_offset,
+ &vbuffer[bufidx].buffer.resource);
}
if (!ctx->Const.AllowMappedBuffersDuringExecution) {
u_upload_unmap(st->pipe->stream_uploader);
}
+ const unsigned num_inputs = st->vp_variant->num_inputs;
set_vertex_attribs(st, vbuffer, num_vbuffers, velements, num_inputs);
/* Unreference uploaded zero-stride vertex buffers. */
- while (unref_buffers) {
- unsigned i = u_bit_scan(&unref_buffers);
+ for (unsigned i = first_current_vbuffer; i < num_vbuffers;
++i) {
Post by M***@gmx.net
pipe_resource_reference(&vbuffer[i].buffer.resource,
NULL);
Post by M***@gmx.net
}
}
-
-void st_update_array(struct st_context *st)
-{
- struct gl_context *ctx = st->ctx;
- const struct gl_vertex_array *arrays = ctx-
Array._DrawArrays;
- const struct st_vertex_program *vp;
- unsigned num_inputs;
-
- st->vertex_array_out_of_memory = FALSE;
- st->draw_needs_minmax_index = false;
-
- /* No drawing has been done yet, so do nothing. */
- if (!arrays)
- return;
-
- /* vertex program validation must be done before this */
- vp = st->vp;
- num_inputs = st->vp_variant->num_inputs;
-
- if (is_interleaved_arrays(vp, arrays, num_inputs))
- setup_interleaved_attribs(st, vp, arrays, num_inputs);
- else
- setup_non_interleaved_attribs(st, vp, arrays, num_inputs);
-}
_______________________________________________
mesa-dev mailing list
https://lists.freedesktop.org/mailman/listinfo/mesa-dev
M***@gmx.net
2018-05-07 06:14:57 UTC
Permalink
From: Mathias Fröhlich <***@web.de>

The input_to_index array is already available internally
when preparing vertex programs. Store the map in
struct st_vertex_program.
Also store the bitmask of mesa vertex processing inputs in
struct st_vp_variant.

Signed-off-by: Mathias Fröhlich <***@web.de>
---
src/mesa/state_tracker/st_program.c | 17 ++++++++++++-----
src/mesa/state_tracker/st_program.h | 5 +++++
src/mesa/state_tracker/st_shader_cache.c | 4 ++++
3 files changed, 21 insertions(+), 5 deletions(-)

diff --git a/src/mesa/state_tracker/st_program.c b/src/mesa/state_tracker/st_program.c
index fe72ddaf2c..f256e2e862 100644
--- a/src/mesa/state_tracker/st_program.c
+++ b/src/mesa/state_tracker/st_program.c
@@ -388,11 +388,11 @@ st_translate_vertex_program(struct st_context *st,
enum pipe_error error;
unsigned num_outputs = 0;
unsigned attr;
- ubyte input_to_index[VERT_ATTRIB_MAX] = {0};
ubyte output_semantic_name[VARYING_SLOT_MAX] = {0};
ubyte output_semantic_index[VARYING_SLOT_MAX] = {0};

stvp->num_inputs = 0;
+ memset(stvp->input_to_index, ~0, sizeof(stvp->input_to_index));

if (stvp->Base.arb.IsPositionInvariant)
_mesa_insert_mvp_code(st->ctx, &stvp->Base);
@@ -403,7 +403,7 @@ st_translate_vertex_program(struct st_context *st,
*/
for (attr = 0; attr < VERT_ATTRIB_MAX; attr++) {
if ((stvp->Base.info.inputs_read & BITFIELD64_BIT(attr)) != 0) {
- input_to_index[attr] = stvp->num_inputs;
+ stvp->input_to_index[attr] = stvp->num_inputs;
stvp->index_to_input[stvp->num_inputs] = attr;
stvp->num_inputs++;
if ((stvp->Base.info.vs.double_inputs_read &
@@ -415,7 +415,7 @@ st_translate_vertex_program(struct st_context *st,
}
}
/* bit of a hack, presetup potentially unused edgeflag input */
- input_to_index[VERT_ATTRIB_EDGEFLAG] = stvp->num_inputs;
+ stvp->input_to_index[VERT_ATTRIB_EDGEFLAG] = stvp->num_inputs;
stvp->index_to_input[stvp->num_inputs] = VERT_ATTRIB_EDGEFLAG;

/* Compute mapping of vertex program outputs to slots.
@@ -495,7 +495,7 @@ st_translate_vertex_program(struct st_context *st,
&stvp->Base,
/* inputs */
stvp->num_inputs,
- input_to_index,
+ stvp->input_to_index,
NULL, /* inputSlotToAttr */
NULL, /* input semantic name */
NULL, /* input semantic index */
@@ -518,7 +518,7 @@ st_translate_vertex_program(struct st_context *st,
&stvp->Base,
/* inputs */
stvp->num_inputs,
- input_to_index,
+ stvp->input_to_index,
NULL, /* input semantic name */
NULL, /* input semantic index */
NULL,
@@ -598,6 +598,13 @@ st_create_vp_variant(struct st_context *st,
fprintf(stderr, "mesa: cannot emulate deprecated features\n");
}

+ for (unsigned index = 0; index < vpv->num_inputs; ++index) {
+ unsigned attr = stvp->index_to_input[index];
+ if (attr == ST_DOUBLE_ATTRIB_PLACEHOLDER)
+ continue;
+ vpv->vert_attrib_mask |= 1u << attr;
+ }
+
if (ST_DEBUG & DEBUG_TGSI) {
tgsi_dump(vpv->tgsi.tokens, 0);
debug_printf("\n");
diff --git a/src/mesa/state_tracker/st_program.h b/src/mesa/state_tracker/st_program.h
index a520ffbecb..f67ea5eb20 100644
--- a/src/mesa/state_tracker/st_program.h
+++ b/src/mesa/state_tracker/st_program.h
@@ -196,6 +196,9 @@ struct st_vp_variant

/** similar to that in st_vertex_program, but with edgeflags info too */
GLuint num_inputs;
+
+ /** Bitfield of VERT_BIT_* bits of mesa vertex processing inputs */
+ GLbitfield vert_attrib_mask;
};


@@ -215,6 +218,8 @@ struct st_vertex_program
/** maps a TGSI input index back to a Mesa VERT_ATTRIB_x */
ubyte index_to_input[PIPE_MAX_ATTRIBS];
ubyte num_inputs;
+ /** Reverse mapping of the above */
+ ubyte input_to_index[VERT_ATTRIB_MAX];

/** Maps VARYING_SLOT_x to slot */
ubyte result_to_output[VARYING_SLOT_MAX];
diff --git a/src/mesa/state_tracker/st_shader_cache.c b/src/mesa/state_tracker/st_shader_cache.c
index 3ca3fef1df..17f84180ca 100644
--- a/src/mesa/state_tracker/st_shader_cache.c
+++ b/src/mesa/state_tracker/st_shader_cache.c
@@ -84,6 +84,8 @@ st_serialise_ir_program(struct gl_context *ctx, struct gl_program *prog,
blob_write_uint32(&blob, stvp->num_inputs);
blob_write_bytes(&blob, stvp->index_to_input,
sizeof(stvp->index_to_input));
+ blob_write_bytes(&blob, stvp->input_to_index,
+ sizeof(stvp->input_to_index));
blob_write_bytes(&blob, stvp->result_to_output,
sizeof(stvp->result_to_output));

@@ -206,6 +208,8 @@ st_deserialise_ir_program(struct gl_context *ctx,
stvp->num_inputs = blob_read_uint32(&blob_reader);
blob_copy_bytes(&blob_reader, (uint8_t *) stvp->index_to_input,
sizeof(stvp->index_to_input));
+ blob_copy_bytes(&blob_reader, (uint8_t *) stvp->input_to_index,
+ sizeof(stvp->input_to_index));
blob_copy_bytes(&blob_reader, (uint8_t *) stvp->result_to_output,
sizeof(stvp->result_to_output));
--
2.14.3
Loading...