Samuel Pitoiset
2018-12-07 16:21:17 UTC
Basically, this extension allows applications to use custom
sample locations. This only implements the barely minimum.
It doesn't support variable sample locations during subpass.
Most of the dEQP-VK.pipeline.multisample.sample_locations_ext.*
CTS now pass.
Only enabled on VI+ because it's untested on older chips.
Signed-off-by: Samuel Pitoiset <***@gmail.com>
---
src/amd/vulkan/radv_cmd_buffer.c | 177 +++++++++++++++++++++++++++++-
src/amd/vulkan/radv_device.c | 27 +++++
src/amd/vulkan/radv_extensions.py | 1 +
src/amd/vulkan/radv_pipeline.c | 30 +++++
src/amd/vulkan/radv_private.h | 26 +++--
5 files changed, 253 insertions(+), 8 deletions(-)
diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index b4aea5bc898..c4bebeda0ce 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -105,6 +105,7 @@ radv_bind_dynamic_state(struct radv_cmd_buffer *cmd_buffer,
dest->viewport.count = src->viewport.count;
dest->scissor.count = src->scissor.count;
dest->discard_rectangle.count = src->discard_rectangle.count;
+ dest->sample_location.count = src->sample_location.count;
if (copy_mask & RADV_DYNAMIC_VIEWPORT) {
if (memcmp(&dest->viewport.viewports, &src->viewport.viewports,
@@ -192,6 +193,22 @@ radv_bind_dynamic_state(struct radv_cmd_buffer *cmd_buffer,
}
}
+ if (copy_mask & RADV_DYNAMIC_SAMPLE_LOCATIONS) {
+ if (dest->sample_location.per_pixel != src->sample_location.per_pixel ||
+ dest->sample_location.grid_size.width != src->sample_location.grid_size.width ||
+ dest->sample_location.grid_size.height != src->sample_location.grid_size.height ||
+ memcmp(&dest->sample_location.locations,
+ &src->sample_location.locations,
+ src->sample_location.count * sizeof(VkSampleLocationEXT))) {
+ dest->sample_location.per_pixel = src->sample_location.per_pixel;
+ dest->sample_location.grid_size = src->sample_location.grid_size;
+ typed_memcpy(dest->sample_location.locations,
+ src->sample_location.locations,
+ src->sample_location.count);
+ dest_mask |= RADV_DYNAMIC_SAMPLE_LOCATIONS;
+ }
+ }
+
cmd_buffer->state.dirty |= dest_mask;
}
@@ -634,6 +651,135 @@ radv_emit_descriptor_pointers(struct radv_cmd_buffer *cmd_buffer,
}
}
+/**
+ * Convert the user sample locations to hardware sample locations (the values
+ * that will be emitted by PA_SC_AA_SAMPLE_LOCS_PIXEL_*).
+ */
+static void
+radv_convert_user_sample_locs(struct radv_sample_locations_state *state,
+ uint32_t x, uint32_t y, VkOffset2D *sample_locs)
+{
+ uint32_t x_offset = x % state->grid_size.width;
+ uint32_t y_offset = y % state->grid_size.height;
+ uint32_t num_samples = (uint32_t)state->per_pixel;
+ VkSampleLocationEXT *user_locs;
+ uint32_t pixel_offset;
+
+ pixel_offset = (x_offset + y_offset * state->grid_size.width) * num_samples;
+
+ assert(pixel_offset <= MAX_SAMPLE_LOCATIONS);
+ user_locs = &state->locations[pixel_offset];
+
+ for (uint32_t i = 0; i < num_samples; i++) {
+ float shifted_pos_x = user_locs[i].x - 0.5;
+ float shifted_pos_y = user_locs[i].y - 0.5;
+
+ int32_t scaled_pos_x = floor(shifted_pos_x * 16);
+ int32_t scaled_pos_y = floor(shifted_pos_y * 16);
+
+ sample_locs[i].x = CLAMP(scaled_pos_x, -8, 7);
+ sample_locs[i].y = CLAMP(scaled_pos_y, -8, 7);
+ }
+}
+
+/**
+ * Compute the PA_SC_AA_SAMPLE_LOCS_PIXEL_* mask based on hardware sample
+ * locations.
+ */
+static void
+radv_compute_sample_locs_pixel(uint32_t num_samples, VkOffset2D *sample_locs,
+ uint32_t *sample_locs_pixel)
+{
+ for (uint32_t i = 0; i < num_samples; ++i) {
+ uint32_t sample_reg_idx = i / 4;
+ uint32_t sample_loc_idx = i % 4;
+ int32_t pos_x = sample_locs[i].x;
+ int32_t pos_y = sample_locs[i].y;
+
+ uint32_t shift_x = 8 * sample_loc_idx;
+ uint32_t shift_y = shift_x + 4;
+
+ sample_locs_pixel[sample_reg_idx] |= (pos_x & 0xf) << shift_x;
+ sample_locs_pixel[sample_reg_idx] |= (pos_y & 0xf) << shift_y;
+ }
+}
+
+/**
+ * Emit the sample locations that are specified with VK_EXT_sample_locations.
+ */
+static void
+radv_emit_sample_locations(struct radv_cmd_buffer *cmd_buffer)
+{
+ struct radv_pipeline *pipeline = cmd_buffer->state.pipeline;
+ struct radv_multisample_state *ms = &pipeline->graphics.ms;
+ struct radv_sample_locations_state *sample_location =
+ &cmd_buffer->state.dynamic.sample_location;
+ uint32_t num_samples = (uint32_t)sample_location->per_pixel;
+ struct radeon_cmdbuf *cs = cmd_buffer->cs;
+ uint32_t sample_locs_pixel[4][2] = {};
+ VkOffset2D sample_locs[4][8]; /* 8 is the max. sample count supported */
+ uint32_t max_sample_dist = 0;
+
+ /* Convert the user sample locations to hardware sample locations. */
+ radv_convert_user_sample_locs(sample_location, 0, 0, sample_locs[0]);
+ radv_convert_user_sample_locs(sample_location, 1, 0, sample_locs[1]);
+ radv_convert_user_sample_locs(sample_location, 0, 1, sample_locs[2]);
+ radv_convert_user_sample_locs(sample_location, 1, 1, sample_locs[3]);
+
+ /* Compute the PA_SC_AA_SAMPLE_LOCS_PIXEL_* mask. */
+ for (uint32_t i = 0; i < 4; i++) {
+ radv_compute_sample_locs_pixel(num_samples, sample_locs[i],
+ sample_locs_pixel[i]);
+ }
+
+ /* Emit the specified user sample locations. */
+ switch (num_samples) {
+ case 2:
+ case 4:
+ radeon_set_context_reg(cs, R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, sample_locs_pixel[0][0]);
+ radeon_set_context_reg(cs, R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, sample_locs_pixel[1][0]);
+ radeon_set_context_reg(cs, R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, sample_locs_pixel[2][0]);
+ radeon_set_context_reg(cs, R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, sample_locs_pixel[3][0]);
+ break;
+ case 8:
+ radeon_set_context_reg(cs, R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, sample_locs_pixel[0][0]);
+ radeon_set_context_reg(cs, R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, sample_locs_pixel[1][0]);
+ radeon_set_context_reg(cs, R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, sample_locs_pixel[2][0]);
+ radeon_set_context_reg(cs, R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, sample_locs_pixel[3][0]);
+ radeon_set_context_reg(cs, R_028BFC_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_1, sample_locs_pixel[0][1]);
+ radeon_set_context_reg(cs, R_028C0C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_1, sample_locs_pixel[1][1]);
+ radeon_set_context_reg(cs, R_028C1C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_1, sample_locs_pixel[2][1]);
+ radeon_set_context_reg(cs, R_028C2C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_1, sample_locs_pixel[3][1]);
+ break;
+ default:
+ unreachable("Unsupported number of samples!");
+ }
+
+ /* Compute the maximum sample distance from the specified locations. */
+ for (uint32_t i = 0; i < num_samples; i++) {
+ VkOffset2D offset = sample_locs[0][i];
+ max_sample_dist = MAX2(max_sample_dist,
+ MAX2(abs(offset.x), abs(offset.y)));
+ }
+
+ /* Emit the maximum sample distance if different. */
+ if (G_028BE0_MAX_SAMPLE_DIST(ms->pa_sc_aa_config) != max_sample_dist) {
+ uint32_t pa_sc_aa_config = ms->pa_sc_aa_config;
+
+ pa_sc_aa_config &= C_028BE0_MAX_SAMPLE_DIST;
+ pa_sc_aa_config |= S_028BE0_MAX_SAMPLE_DIST(max_sample_dist);
+
+ radeon_set_context_reg_seq(cs, R_028BE0_PA_SC_AA_CONFIG, 1);
+ radeon_emit(cs, pa_sc_aa_config);
+
+ /* GFX9: Flush DFSM when the AA mode changes. */
+ if (cmd_buffer->device->dfsm_allowed) {
+ radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
+ radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_DFSM) | EVENT_INDEX(0));
+ }
+ }
+}
+
static void
radv_update_multisample_state(struct radv_cmd_buffer *cmd_buffer,
struct radv_pipeline *pipeline)
@@ -645,7 +791,14 @@ radv_update_multisample_state(struct radv_cmd_buffer *cmd_buffer,
if (pipeline->shaders[MESA_SHADER_FRAGMENT]->info.info.ps.needs_sample_positions)
cmd_buffer->sample_positions_needed = true;
- if (old_pipeline && num_samples == old_pipeline->graphics.ms.num_samples)
+ /* Emit the multisample state (including sample locations) only if:
+ * - it's the first bound pipeline in the command buffer
+ * - the number of samples of this pipeline is different
+ * - the previous pipeline used custom sample locations
+ */
+ if (old_pipeline &&
+ num_samples == old_pipeline->graphics.ms.num_samples &&
+ !old_pipeline->dynamic_state.sample_location.count)
return;
radeon_set_context_reg_seq(cmd_buffer->cs, R_028BDC_PA_SC_LINE_CNTL, 2);
@@ -1711,6 +1864,9 @@ radv_cmd_buffer_flush_dynamic_state(struct radv_cmd_buffer *cmd_buffer)
if (states & RADV_CMD_DIRTY_DYNAMIC_DISCARD_RECTANGLE)
radv_emit_discard_rectangle(cmd_buffer);
+ if (states & RADV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS)
+ radv_emit_sample_locations(cmd_buffer);
+
cmd_buffer->state.dirty &= ~states;
}
@@ -3050,6 +3206,25 @@ void radv_CmdSetDiscardRectangleEXT(
state->dirty |= RADV_CMD_DIRTY_DYNAMIC_DISCARD_RECTANGLE;
}
+void radv_CmdSetSampleLocationsEXT(
+ VkCommandBuffer commandBuffer,
+ const VkSampleLocationsInfoEXT* pSampleLocationsInfo)
+{
+ RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+ struct radv_cmd_state *state = &cmd_buffer->state;
+
+ assert(pSampleLocationsInfo->sampleLocationsCount <= MAX_SAMPLE_LOCATIONS);
+
+ state->dynamic.sample_location.per_pixel = pSampleLocationsInfo->sampleLocationsPerPixel;
+ state->dynamic.sample_location.grid_size = pSampleLocationsInfo->sampleLocationGridSize;
+ state->dynamic.sample_location.count = pSampleLocationsInfo->sampleLocationsCount;
+ typed_memcpy(&state->dynamic.sample_location.locations[0],
+ pSampleLocationsInfo->pSampleLocations,
+ pSampleLocationsInfo->sampleLocationsCount);
+
+ state->dirty |= RADV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS;
+}
+
void radv_CmdExecuteCommands(
VkCommandBuffer commandBuffer,
uint32_t commandBufferCount,
diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
index cb51ee44e58..6b19641f66d 100644
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -1238,6 +1238,19 @@ void radv_GetPhysicalDeviceProperties2(
properties->transformFeedbackDraw = true;
break;
}
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLE_LOCATIONS_PROPERTIES_EXT: {
+ VkPhysicalDeviceSampleLocationsPropertiesEXT *properties =
+ (VkPhysicalDeviceSampleLocationsPropertiesEXT *)ext;
+ properties->sampleLocationSampleCounts = VK_SAMPLE_COUNT_2_BIT |
+ VK_SAMPLE_COUNT_4_BIT |
+ VK_SAMPLE_COUNT_8_BIT;
+ properties->maxSampleLocationGridSize = (VkExtent2D){ 2 , 2 };
+ properties->sampleLocationCoordinateRange[0] = 0.0f;
+ properties->sampleLocationCoordinateRange[1] = 1.0f;
+ properties->sampleLocationSubPixelBits = 4;
+ properties->variableSampleLocations = VK_FALSE;
+ break;
+ }
default:
break;
}
@@ -5111,3 +5124,17 @@ VkResult radv_GetCalibratedTimestampsEXT(
return VK_SUCCESS;
}
+
+void radv_GetPhysicalDeviceMultisamplePropertiesEXT(
+ VkPhysicalDevice physicalDevice,
+ VkSampleCountFlagBits samples,
+ VkMultisamplePropertiesEXT* pMultisampleProperties)
+{
+ if (samples & (VK_SAMPLE_COUNT_2_BIT |
+ VK_SAMPLE_COUNT_4_BIT |
+ VK_SAMPLE_COUNT_8_BIT)) {
+ pMultisampleProperties->maxSampleLocationGridSize = (VkExtent2D){ 2, 2 };
+ } else {
+ pMultisampleProperties->maxSampleLocationGridSize = (VkExtent2D){ 0, 0 };
+ }
+}
diff --git a/src/amd/vulkan/radv_extensions.py b/src/amd/vulkan/radv_extensions.py
index d14169144f7..19b24ac4157 100644
--- a/src/amd/vulkan/radv_extensions.py
+++ b/src/amd/vulkan/radv_extensions.py
@@ -106,6 +106,7 @@ EXTENSIONS = [
Extension('VK_EXT_external_memory_host', 1, 'device->rad_info.has_userptr'),
Extension('VK_EXT_global_priority', 1, 'device->rad_info.has_ctx_priority'),
Extension('VK_EXT_pci_bus_info', 1, True),
+ Extension('VK_EXT_sample_locations', 1, 'device->rad_info.chip_class >= VI'),
Extension('VK_EXT_sampler_filter_minmax', 1, 'device->rad_info.chip_class >= CIK'),
Extension('VK_EXT_scalar_block_layout', 1, 'device->rad_info.chip_class >= CIK'),
Extension('VK_EXT_shader_viewport_index_layer', 1, True),
diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c
index 33076cc2bd2..266fdb43367 100644
--- a/src/amd/vulkan/radv_pipeline.c
+++ b/src/amd/vulkan/radv_pipeline.c
@@ -1276,6 +1276,8 @@ static unsigned radv_dynamic_state_mask(VkDynamicState state)
return RADV_DYNAMIC_STENCIL_REFERENCE;
case VK_DYNAMIC_STATE_DISCARD_RECTANGLE_EXT:
return RADV_DYNAMIC_DISCARD_RECTANGLE;
+ case VK_DYNAMIC_STATE_SAMPLE_LOCATIONS_EXT:
+ return RADV_DYNAMIC_SAMPLE_LOCATIONS;
default:
unreachable("Unhandled dynamic state");
}
@@ -1306,6 +1308,11 @@ static uint32_t radv_pipeline_needed_dynamic_state(const VkGraphicsPipelineCreat
if (!vk_find_struct_const(pCreateInfo->pNext, PIPELINE_DISCARD_RECTANGLE_STATE_CREATE_INFO_EXT))
states &= ~RADV_DYNAMIC_DISCARD_RECTANGLE;
+ if (!pCreateInfo->pMultisampleState ||
+ !vk_find_struct_const(pCreateInfo->pMultisampleState->pNext,
+ PIPELINE_SAMPLE_LOCATIONS_STATE_CREATE_INFO_EXT))
+ states &= ~RADV_DYNAMIC_SAMPLE_LOCATIONS;
+
/* TODO: blend constants & line width. */
return states;
@@ -1442,6 +1449,29 @@ radv_pipeline_init_dynamic_state(struct radv_pipeline *pipeline,
discard_rectangle_info->discardRectangleCount);
}
+ if (states & RADV_DYNAMIC_SAMPLE_LOCATIONS) {
+ const VkPipelineSampleLocationsStateCreateInfoEXT *sample_location_info =
+ vk_find_struct_const(pCreateInfo->pMultisampleState->pNext,
+ PIPELINE_SAMPLE_LOCATIONS_STATE_CREATE_INFO_EXT);
+ /* If sampleLocationsEnable is VK_FALSE, the default sample
+ * locations are used and the values specified in
+ * sampleLocationsInfo are ignored.
+ */
+ if (sample_location_info->sampleLocationsEnable) {
+ const VkSampleLocationsInfoEXT *pSampleLocationsInfo =
+ &sample_location_info->sampleLocationsInfo;
+
+ assert(pSampleLocationsInfo->sampleLocationsCount <= MAX_SAMPLE_LOCATIONS);
+
+ dynamic->sample_location.per_pixel = pSampleLocationsInfo->sampleLocationsPerPixel;
+ dynamic->sample_location.grid_size = pSampleLocationsInfo->sampleLocationGridSize;
+ dynamic->sample_location.count = pSampleLocationsInfo->sampleLocationsCount;
+ typed_memcpy(&dynamic->sample_location.locations[0],
+ pSampleLocationsInfo->pSampleLocations,
+ pSampleLocationsInfo->sampleLocationsCount);
+ }
+ }
+
pipeline->dynamic_state.mask = states;
}
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
index e3dd301ee8f..4139a2911aa 100644
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -90,6 +90,7 @@ typedef uint32_t xcb_window_t;
#define MAX_VIEWPORTS 16
#define MAX_SCISSORS 16
#define MAX_DISCARD_RECTANGLES 4
+#define MAX_SAMPLE_LOCATIONS 32
#define MAX_PUSH_CONSTANTS_SIZE 128
#define MAX_PUSH_DESCRIPTORS 32
#define MAX_DYNAMIC_UNIFORM_BUFFERS 16
@@ -829,7 +830,8 @@ enum radv_dynamic_state_bits {
RADV_DYNAMIC_STENCIL_WRITE_MASK = 1 << 7,
RADV_DYNAMIC_STENCIL_REFERENCE = 1 << 8,
RADV_DYNAMIC_DISCARD_RECTANGLE = 1 << 9,
- RADV_DYNAMIC_ALL = (1 << 10) - 1,
+ RADV_DYNAMIC_SAMPLE_LOCATIONS = 1 << 10,
+ RADV_DYNAMIC_ALL = (1 << 11) - 1,
};
enum radv_cmd_dirty_bits {
@@ -845,12 +847,13 @@ enum radv_cmd_dirty_bits {
RADV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK = 1 << 7,
RADV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE = 1 << 8,
RADV_CMD_DIRTY_DYNAMIC_DISCARD_RECTANGLE = 1 << 9,
- RADV_CMD_DIRTY_DYNAMIC_ALL = (1 << 10) - 1,
- RADV_CMD_DIRTY_PIPELINE = 1 << 10,
- RADV_CMD_DIRTY_INDEX_BUFFER = 1 << 11,
- RADV_CMD_DIRTY_FRAMEBUFFER = 1 << 12,
- RADV_CMD_DIRTY_VERTEX_BUFFER = 1 << 13,
- RADV_CMD_DIRTY_STREAMOUT_BUFFER = 1 << 14,
+ RADV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS = 1 << 10,
+ RADV_CMD_DIRTY_DYNAMIC_ALL = (1 << 11) - 1,
+ RADV_CMD_DIRTY_PIPELINE = 1 << 11,
+ RADV_CMD_DIRTY_INDEX_BUFFER = 1 << 12,
+ RADV_CMD_DIRTY_FRAMEBUFFER = 1 << 13,
+ RADV_CMD_DIRTY_VERTEX_BUFFER = 1 << 14,
+ RADV_CMD_DIRTY_STREAMOUT_BUFFER = 1 << 15,
};
enum radv_cmd_flush_bits {
@@ -927,6 +930,13 @@ struct radv_discard_rectangle_state {
VkRect2D rectangles[MAX_DISCARD_RECTANGLES];
};
+struct radv_sample_locations_state {
+ VkSampleCountFlagBits per_pixel;
+ VkExtent2D grid_size;
+ uint32_t count;
+ VkSampleLocationEXT locations[MAX_SAMPLE_LOCATIONS];
+};
+
struct radv_dynamic_state {
/**
* Bitmask of (1 << VK_DYNAMIC_STATE_*).
@@ -969,6 +979,8 @@ struct radv_dynamic_state {
} stencil_reference;
struct radv_discard_rectangle_state discard_rectangle;
+
+ struct radv_sample_locations_state sample_location;
};
extern const struct radv_dynamic_state default_dynamic_state;
sample locations. This only implements the barely minimum.
It doesn't support variable sample locations during subpass.
Most of the dEQP-VK.pipeline.multisample.sample_locations_ext.*
CTS now pass.
Only enabled on VI+ because it's untested on older chips.
Signed-off-by: Samuel Pitoiset <***@gmail.com>
---
src/amd/vulkan/radv_cmd_buffer.c | 177 +++++++++++++++++++++++++++++-
src/amd/vulkan/radv_device.c | 27 +++++
src/amd/vulkan/radv_extensions.py | 1 +
src/amd/vulkan/radv_pipeline.c | 30 +++++
src/amd/vulkan/radv_private.h | 26 +++--
5 files changed, 253 insertions(+), 8 deletions(-)
diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index b4aea5bc898..c4bebeda0ce 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -105,6 +105,7 @@ radv_bind_dynamic_state(struct radv_cmd_buffer *cmd_buffer,
dest->viewport.count = src->viewport.count;
dest->scissor.count = src->scissor.count;
dest->discard_rectangle.count = src->discard_rectangle.count;
+ dest->sample_location.count = src->sample_location.count;
if (copy_mask & RADV_DYNAMIC_VIEWPORT) {
if (memcmp(&dest->viewport.viewports, &src->viewport.viewports,
@@ -192,6 +193,22 @@ radv_bind_dynamic_state(struct radv_cmd_buffer *cmd_buffer,
}
}
+ if (copy_mask & RADV_DYNAMIC_SAMPLE_LOCATIONS) {
+ if (dest->sample_location.per_pixel != src->sample_location.per_pixel ||
+ dest->sample_location.grid_size.width != src->sample_location.grid_size.width ||
+ dest->sample_location.grid_size.height != src->sample_location.grid_size.height ||
+ memcmp(&dest->sample_location.locations,
+ &src->sample_location.locations,
+ src->sample_location.count * sizeof(VkSampleLocationEXT))) {
+ dest->sample_location.per_pixel = src->sample_location.per_pixel;
+ dest->sample_location.grid_size = src->sample_location.grid_size;
+ typed_memcpy(dest->sample_location.locations,
+ src->sample_location.locations,
+ src->sample_location.count);
+ dest_mask |= RADV_DYNAMIC_SAMPLE_LOCATIONS;
+ }
+ }
+
cmd_buffer->state.dirty |= dest_mask;
}
@@ -634,6 +651,135 @@ radv_emit_descriptor_pointers(struct radv_cmd_buffer *cmd_buffer,
}
}
+/**
+ * Convert the user sample locations to hardware sample locations (the values
+ * that will be emitted by PA_SC_AA_SAMPLE_LOCS_PIXEL_*).
+ */
+static void
+radv_convert_user_sample_locs(struct radv_sample_locations_state *state,
+ uint32_t x, uint32_t y, VkOffset2D *sample_locs)
+{
+ uint32_t x_offset = x % state->grid_size.width;
+ uint32_t y_offset = y % state->grid_size.height;
+ uint32_t num_samples = (uint32_t)state->per_pixel;
+ VkSampleLocationEXT *user_locs;
+ uint32_t pixel_offset;
+
+ pixel_offset = (x_offset + y_offset * state->grid_size.width) * num_samples;
+
+ assert(pixel_offset <= MAX_SAMPLE_LOCATIONS);
+ user_locs = &state->locations[pixel_offset];
+
+ for (uint32_t i = 0; i < num_samples; i++) {
+ float shifted_pos_x = user_locs[i].x - 0.5;
+ float shifted_pos_y = user_locs[i].y - 0.5;
+
+ int32_t scaled_pos_x = floor(shifted_pos_x * 16);
+ int32_t scaled_pos_y = floor(shifted_pos_y * 16);
+
+ sample_locs[i].x = CLAMP(scaled_pos_x, -8, 7);
+ sample_locs[i].y = CLAMP(scaled_pos_y, -8, 7);
+ }
+}
+
+/**
+ * Compute the PA_SC_AA_SAMPLE_LOCS_PIXEL_* mask based on hardware sample
+ * locations.
+ */
+static void
+radv_compute_sample_locs_pixel(uint32_t num_samples, VkOffset2D *sample_locs,
+ uint32_t *sample_locs_pixel)
+{
+ for (uint32_t i = 0; i < num_samples; ++i) {
+ uint32_t sample_reg_idx = i / 4;
+ uint32_t sample_loc_idx = i % 4;
+ int32_t pos_x = sample_locs[i].x;
+ int32_t pos_y = sample_locs[i].y;
+
+ uint32_t shift_x = 8 * sample_loc_idx;
+ uint32_t shift_y = shift_x + 4;
+
+ sample_locs_pixel[sample_reg_idx] |= (pos_x & 0xf) << shift_x;
+ sample_locs_pixel[sample_reg_idx] |= (pos_y & 0xf) << shift_y;
+ }
+}
+
+/**
+ * Emit the sample locations that are specified with VK_EXT_sample_locations.
+ */
+static void
+radv_emit_sample_locations(struct radv_cmd_buffer *cmd_buffer)
+{
+ struct radv_pipeline *pipeline = cmd_buffer->state.pipeline;
+ struct radv_multisample_state *ms = &pipeline->graphics.ms;
+ struct radv_sample_locations_state *sample_location =
+ &cmd_buffer->state.dynamic.sample_location;
+ uint32_t num_samples = (uint32_t)sample_location->per_pixel;
+ struct radeon_cmdbuf *cs = cmd_buffer->cs;
+ uint32_t sample_locs_pixel[4][2] = {};
+ VkOffset2D sample_locs[4][8]; /* 8 is the max. sample count supported */
+ uint32_t max_sample_dist = 0;
+
+ /* Convert the user sample locations to hardware sample locations. */
+ radv_convert_user_sample_locs(sample_location, 0, 0, sample_locs[0]);
+ radv_convert_user_sample_locs(sample_location, 1, 0, sample_locs[1]);
+ radv_convert_user_sample_locs(sample_location, 0, 1, sample_locs[2]);
+ radv_convert_user_sample_locs(sample_location, 1, 1, sample_locs[3]);
+
+ /* Compute the PA_SC_AA_SAMPLE_LOCS_PIXEL_* mask. */
+ for (uint32_t i = 0; i < 4; i++) {
+ radv_compute_sample_locs_pixel(num_samples, sample_locs[i],
+ sample_locs_pixel[i]);
+ }
+
+ /* Emit the specified user sample locations. */
+ switch (num_samples) {
+ case 2:
+ case 4:
+ radeon_set_context_reg(cs, R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, sample_locs_pixel[0][0]);
+ radeon_set_context_reg(cs, R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, sample_locs_pixel[1][0]);
+ radeon_set_context_reg(cs, R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, sample_locs_pixel[2][0]);
+ radeon_set_context_reg(cs, R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, sample_locs_pixel[3][0]);
+ break;
+ case 8:
+ radeon_set_context_reg(cs, R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, sample_locs_pixel[0][0]);
+ radeon_set_context_reg(cs, R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, sample_locs_pixel[1][0]);
+ radeon_set_context_reg(cs, R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, sample_locs_pixel[2][0]);
+ radeon_set_context_reg(cs, R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, sample_locs_pixel[3][0]);
+ radeon_set_context_reg(cs, R_028BFC_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_1, sample_locs_pixel[0][1]);
+ radeon_set_context_reg(cs, R_028C0C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_1, sample_locs_pixel[1][1]);
+ radeon_set_context_reg(cs, R_028C1C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_1, sample_locs_pixel[2][1]);
+ radeon_set_context_reg(cs, R_028C2C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_1, sample_locs_pixel[3][1]);
+ break;
+ default:
+ unreachable("Unsupported number of samples!");
+ }
+
+ /* Compute the maximum sample distance from the specified locations. */
+ for (uint32_t i = 0; i < num_samples; i++) {
+ VkOffset2D offset = sample_locs[0][i];
+ max_sample_dist = MAX2(max_sample_dist,
+ MAX2(abs(offset.x), abs(offset.y)));
+ }
+
+ /* Emit the maximum sample distance if different. */
+ if (G_028BE0_MAX_SAMPLE_DIST(ms->pa_sc_aa_config) != max_sample_dist) {
+ uint32_t pa_sc_aa_config = ms->pa_sc_aa_config;
+
+ pa_sc_aa_config &= C_028BE0_MAX_SAMPLE_DIST;
+ pa_sc_aa_config |= S_028BE0_MAX_SAMPLE_DIST(max_sample_dist);
+
+ radeon_set_context_reg_seq(cs, R_028BE0_PA_SC_AA_CONFIG, 1);
+ radeon_emit(cs, pa_sc_aa_config);
+
+ /* GFX9: Flush DFSM when the AA mode changes. */
+ if (cmd_buffer->device->dfsm_allowed) {
+ radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
+ radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_DFSM) | EVENT_INDEX(0));
+ }
+ }
+}
+
static void
radv_update_multisample_state(struct radv_cmd_buffer *cmd_buffer,
struct radv_pipeline *pipeline)
@@ -645,7 +791,14 @@ radv_update_multisample_state(struct radv_cmd_buffer *cmd_buffer,
if (pipeline->shaders[MESA_SHADER_FRAGMENT]->info.info.ps.needs_sample_positions)
cmd_buffer->sample_positions_needed = true;
- if (old_pipeline && num_samples == old_pipeline->graphics.ms.num_samples)
+ /* Emit the multisample state (including sample locations) only if:
+ * - it's the first bound pipeline in the command buffer
+ * - the number of samples of this pipeline is different
+ * - the previous pipeline used custom sample locations
+ */
+ if (old_pipeline &&
+ num_samples == old_pipeline->graphics.ms.num_samples &&
+ !old_pipeline->dynamic_state.sample_location.count)
return;
radeon_set_context_reg_seq(cmd_buffer->cs, R_028BDC_PA_SC_LINE_CNTL, 2);
@@ -1711,6 +1864,9 @@ radv_cmd_buffer_flush_dynamic_state(struct radv_cmd_buffer *cmd_buffer)
if (states & RADV_CMD_DIRTY_DYNAMIC_DISCARD_RECTANGLE)
radv_emit_discard_rectangle(cmd_buffer);
+ if (states & RADV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS)
+ radv_emit_sample_locations(cmd_buffer);
+
cmd_buffer->state.dirty &= ~states;
}
@@ -3050,6 +3206,25 @@ void radv_CmdSetDiscardRectangleEXT(
state->dirty |= RADV_CMD_DIRTY_DYNAMIC_DISCARD_RECTANGLE;
}
+void radv_CmdSetSampleLocationsEXT(
+ VkCommandBuffer commandBuffer,
+ const VkSampleLocationsInfoEXT* pSampleLocationsInfo)
+{
+ RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+ struct radv_cmd_state *state = &cmd_buffer->state;
+
+ assert(pSampleLocationsInfo->sampleLocationsCount <= MAX_SAMPLE_LOCATIONS);
+
+ state->dynamic.sample_location.per_pixel = pSampleLocationsInfo->sampleLocationsPerPixel;
+ state->dynamic.sample_location.grid_size = pSampleLocationsInfo->sampleLocationGridSize;
+ state->dynamic.sample_location.count = pSampleLocationsInfo->sampleLocationsCount;
+ typed_memcpy(&state->dynamic.sample_location.locations[0],
+ pSampleLocationsInfo->pSampleLocations,
+ pSampleLocationsInfo->sampleLocationsCount);
+
+ state->dirty |= RADV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS;
+}
+
void radv_CmdExecuteCommands(
VkCommandBuffer commandBuffer,
uint32_t commandBufferCount,
diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
index cb51ee44e58..6b19641f66d 100644
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -1238,6 +1238,19 @@ void radv_GetPhysicalDeviceProperties2(
properties->transformFeedbackDraw = true;
break;
}
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLE_LOCATIONS_PROPERTIES_EXT: {
+ VkPhysicalDeviceSampleLocationsPropertiesEXT *properties =
+ (VkPhysicalDeviceSampleLocationsPropertiesEXT *)ext;
+ properties->sampleLocationSampleCounts = VK_SAMPLE_COUNT_2_BIT |
+ VK_SAMPLE_COUNT_4_BIT |
+ VK_SAMPLE_COUNT_8_BIT;
+ properties->maxSampleLocationGridSize = (VkExtent2D){ 2 , 2 };
+ properties->sampleLocationCoordinateRange[0] = 0.0f;
+ properties->sampleLocationCoordinateRange[1] = 1.0f;
+ properties->sampleLocationSubPixelBits = 4;
+ properties->variableSampleLocations = VK_FALSE;
+ break;
+ }
default:
break;
}
@@ -5111,3 +5124,17 @@ VkResult radv_GetCalibratedTimestampsEXT(
return VK_SUCCESS;
}
+
+void radv_GetPhysicalDeviceMultisamplePropertiesEXT(
+ VkPhysicalDevice physicalDevice,
+ VkSampleCountFlagBits samples,
+ VkMultisamplePropertiesEXT* pMultisampleProperties)
+{
+ if (samples & (VK_SAMPLE_COUNT_2_BIT |
+ VK_SAMPLE_COUNT_4_BIT |
+ VK_SAMPLE_COUNT_8_BIT)) {
+ pMultisampleProperties->maxSampleLocationGridSize = (VkExtent2D){ 2, 2 };
+ } else {
+ pMultisampleProperties->maxSampleLocationGridSize = (VkExtent2D){ 0, 0 };
+ }
+}
diff --git a/src/amd/vulkan/radv_extensions.py b/src/amd/vulkan/radv_extensions.py
index d14169144f7..19b24ac4157 100644
--- a/src/amd/vulkan/radv_extensions.py
+++ b/src/amd/vulkan/radv_extensions.py
@@ -106,6 +106,7 @@ EXTENSIONS = [
Extension('VK_EXT_external_memory_host', 1, 'device->rad_info.has_userptr'),
Extension('VK_EXT_global_priority', 1, 'device->rad_info.has_ctx_priority'),
Extension('VK_EXT_pci_bus_info', 1, True),
+ Extension('VK_EXT_sample_locations', 1, 'device->rad_info.chip_class >= VI'),
Extension('VK_EXT_sampler_filter_minmax', 1, 'device->rad_info.chip_class >= CIK'),
Extension('VK_EXT_scalar_block_layout', 1, 'device->rad_info.chip_class >= CIK'),
Extension('VK_EXT_shader_viewport_index_layer', 1, True),
diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c
index 33076cc2bd2..266fdb43367 100644
--- a/src/amd/vulkan/radv_pipeline.c
+++ b/src/amd/vulkan/radv_pipeline.c
@@ -1276,6 +1276,8 @@ static unsigned radv_dynamic_state_mask(VkDynamicState state)
return RADV_DYNAMIC_STENCIL_REFERENCE;
case VK_DYNAMIC_STATE_DISCARD_RECTANGLE_EXT:
return RADV_DYNAMIC_DISCARD_RECTANGLE;
+ case VK_DYNAMIC_STATE_SAMPLE_LOCATIONS_EXT:
+ return RADV_DYNAMIC_SAMPLE_LOCATIONS;
default:
unreachable("Unhandled dynamic state");
}
@@ -1306,6 +1308,11 @@ static uint32_t radv_pipeline_needed_dynamic_state(const VkGraphicsPipelineCreat
if (!vk_find_struct_const(pCreateInfo->pNext, PIPELINE_DISCARD_RECTANGLE_STATE_CREATE_INFO_EXT))
states &= ~RADV_DYNAMIC_DISCARD_RECTANGLE;
+ if (!pCreateInfo->pMultisampleState ||
+ !vk_find_struct_const(pCreateInfo->pMultisampleState->pNext,
+ PIPELINE_SAMPLE_LOCATIONS_STATE_CREATE_INFO_EXT))
+ states &= ~RADV_DYNAMIC_SAMPLE_LOCATIONS;
+
/* TODO: blend constants & line width. */
return states;
@@ -1442,6 +1449,29 @@ radv_pipeline_init_dynamic_state(struct radv_pipeline *pipeline,
discard_rectangle_info->discardRectangleCount);
}
+ if (states & RADV_DYNAMIC_SAMPLE_LOCATIONS) {
+ const VkPipelineSampleLocationsStateCreateInfoEXT *sample_location_info =
+ vk_find_struct_const(pCreateInfo->pMultisampleState->pNext,
+ PIPELINE_SAMPLE_LOCATIONS_STATE_CREATE_INFO_EXT);
+ /* If sampleLocationsEnable is VK_FALSE, the default sample
+ * locations are used and the values specified in
+ * sampleLocationsInfo are ignored.
+ */
+ if (sample_location_info->sampleLocationsEnable) {
+ const VkSampleLocationsInfoEXT *pSampleLocationsInfo =
+ &sample_location_info->sampleLocationsInfo;
+
+ assert(pSampleLocationsInfo->sampleLocationsCount <= MAX_SAMPLE_LOCATIONS);
+
+ dynamic->sample_location.per_pixel = pSampleLocationsInfo->sampleLocationsPerPixel;
+ dynamic->sample_location.grid_size = pSampleLocationsInfo->sampleLocationGridSize;
+ dynamic->sample_location.count = pSampleLocationsInfo->sampleLocationsCount;
+ typed_memcpy(&dynamic->sample_location.locations[0],
+ pSampleLocationsInfo->pSampleLocations,
+ pSampleLocationsInfo->sampleLocationsCount);
+ }
+ }
+
pipeline->dynamic_state.mask = states;
}
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
index e3dd301ee8f..4139a2911aa 100644
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -90,6 +90,7 @@ typedef uint32_t xcb_window_t;
#define MAX_VIEWPORTS 16
#define MAX_SCISSORS 16
#define MAX_DISCARD_RECTANGLES 4
+#define MAX_SAMPLE_LOCATIONS 32
#define MAX_PUSH_CONSTANTS_SIZE 128
#define MAX_PUSH_DESCRIPTORS 32
#define MAX_DYNAMIC_UNIFORM_BUFFERS 16
@@ -829,7 +830,8 @@ enum radv_dynamic_state_bits {
RADV_DYNAMIC_STENCIL_WRITE_MASK = 1 << 7,
RADV_DYNAMIC_STENCIL_REFERENCE = 1 << 8,
RADV_DYNAMIC_DISCARD_RECTANGLE = 1 << 9,
- RADV_DYNAMIC_ALL = (1 << 10) - 1,
+ RADV_DYNAMIC_SAMPLE_LOCATIONS = 1 << 10,
+ RADV_DYNAMIC_ALL = (1 << 11) - 1,
};
enum radv_cmd_dirty_bits {
@@ -845,12 +847,13 @@ enum radv_cmd_dirty_bits {
RADV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK = 1 << 7,
RADV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE = 1 << 8,
RADV_CMD_DIRTY_DYNAMIC_DISCARD_RECTANGLE = 1 << 9,
- RADV_CMD_DIRTY_DYNAMIC_ALL = (1 << 10) - 1,
- RADV_CMD_DIRTY_PIPELINE = 1 << 10,
- RADV_CMD_DIRTY_INDEX_BUFFER = 1 << 11,
- RADV_CMD_DIRTY_FRAMEBUFFER = 1 << 12,
- RADV_CMD_DIRTY_VERTEX_BUFFER = 1 << 13,
- RADV_CMD_DIRTY_STREAMOUT_BUFFER = 1 << 14,
+ RADV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS = 1 << 10,
+ RADV_CMD_DIRTY_DYNAMIC_ALL = (1 << 11) - 1,
+ RADV_CMD_DIRTY_PIPELINE = 1 << 11,
+ RADV_CMD_DIRTY_INDEX_BUFFER = 1 << 12,
+ RADV_CMD_DIRTY_FRAMEBUFFER = 1 << 13,
+ RADV_CMD_DIRTY_VERTEX_BUFFER = 1 << 14,
+ RADV_CMD_DIRTY_STREAMOUT_BUFFER = 1 << 15,
};
enum radv_cmd_flush_bits {
@@ -927,6 +930,13 @@ struct radv_discard_rectangle_state {
VkRect2D rectangles[MAX_DISCARD_RECTANGLES];
};
+struct radv_sample_locations_state {
+ VkSampleCountFlagBits per_pixel;
+ VkExtent2D grid_size;
+ uint32_t count;
+ VkSampleLocationEXT locations[MAX_SAMPLE_LOCATIONS];
+};
+
struct radv_dynamic_state {
/**
* Bitmask of (1 << VK_DYNAMIC_STATE_*).
@@ -969,6 +979,8 @@ struct radv_dynamic_state {
} stencil_reference;
struct radv_discard_rectangle_state discard_rectangle;
+
+ struct radv_sample_locations_state sample_location;
};
extern const struct radv_dynamic_state default_dynamic_state;
--
2.19.2
2.19.2