Discussion:
[PATCH 2/6] radv: Add syncobj signal/reset/wait to winsys.
Add Reply
Bas Nieuwenhuizen
2017-12-17 00:11:51 UTC
Reply
Permalink
Raw Message
---
src/amd/vulkan/radv_radeon_winsys.h | 4 +++
src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c | 40 +++++++++++++++++++++++++++
2 files changed, 44 insertions(+)

diff --git a/src/amd/vulkan/radv_radeon_winsys.h b/src/amd/vulkan/radv_radeon_winsys.h
index 2b815d9c5a9..e851c3edf86 100644
--- a/src/amd/vulkan/radv_radeon_winsys.h
+++ b/src/amd/vulkan/radv_radeon_winsys.h
@@ -274,6 +274,10 @@ struct radeon_winsys {
int (*create_syncobj)(struct radeon_winsys *ws, uint32_t *handle);
void (*destroy_syncobj)(struct radeon_winsys *ws, uint32_t handle);

+ void (*reset_syncobj)(struct radeon_winsys *ws, uint32_t handle);
+ void (*signal_syncobj)(struct radeon_winsys *ws, uint32_t handle);
+ bool (*wait_syncobj)(struct radeon_winsys *ws, uint32_t handle, uint64_t timeout);
+
int (*export_syncobj)(struct radeon_winsys *ws, uint32_t syncobj, int *fd);
int (*import_syncobj)(struct radeon_winsys *ws, int fd, uint32_t *syncobj);

diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c
index e5ea312aeeb..e4d444b8524 100644
--- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c
+++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c
@@ -1281,6 +1281,43 @@ static void radv_amdgpu_destroy_syncobj(struct radeon_winsys *_ws,
amdgpu_cs_destroy_syncobj(ws->dev, handle);
}

+static void radv_amdgpu_reset_syncobj(struct radeon_winsys *_ws,
+ uint32_t handle)
+{
+ struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
+ amdgpu_cs_syncobj_reset(ws->dev, &handle, 1);
+}
+
+static void radv_amdgpu_signal_syncobj(struct radeon_winsys *_ws,
+ uint32_t handle)
+{
+ struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
+ amdgpu_cs_syncobj_signal(ws->dev, &handle, 1);
+}
+
+static bool radv_amdgpu_wait_syncobj(struct radeon_winsys *_ws,
+ uint32_t handle, uint64_t timeout)
+{
+ struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
+ uint32_t tmp;
+
+ /* The timeouts are signed, while vulkan timeouts are unsigned. */
+ timeout = MIN2(timeout, INT64_MAX);
+
+ int ret = amdgpu_cs_syncobj_wait(ws->dev, &handle, 1, timeout,
+ DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT |
+ DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL,
+ &tmp);
+ if (ret == 0) {
+ return true;
+ } else if (ret == -1 && errno == ETIME) {
+ return false;
+ } else {
+ fprintf(stderr, "amdgpu: radv_amdgpu_wait_syncobj failed!\nerrno: %d\n", errno);
+ return false;
+ }
+}
+
static int radv_amdgpu_export_syncobj(struct radeon_winsys *_ws,
uint32_t syncobj,
int *fd)
@@ -1319,6 +1356,9 @@ void radv_amdgpu_cs_init_functions(struct radv_amdgpu_winsys *ws)
ws->base.destroy_sem = radv_amdgpu_destroy_sem;
ws->base.create_syncobj = radv_amdgpu_create_syncobj;
ws->base.destroy_syncobj = radv_amdgpu_destroy_syncobj;
+ ws->base.reset_syncobj = radv_amdgpu_reset_syncobj;
+ ws->base.signal_syncobj = radv_amdgpu_signal_syncobj;
+ ws->base.wait_syncobj = radv_amdgpu_wait_syncobj;
ws->base.export_syncobj = radv_amdgpu_export_syncobj;
ws->base.import_syncobj = radv_amdgpu_import_syncobj;
ws->base.fence_wait = radv_amdgpu_fence_wait;
--
2.15.1
Bas Nieuwenhuizen
2017-12-17 00:11:52 UTC
Reply
Permalink
Raw Message
First amdgpu bump after inclusion was 20 (which was done for local BOs).
---
src/amd/common/ac_gpu_info.c | 1 +
src/amd/common/ac_gpu_info.h | 1 +
2 files changed, 2 insertions(+)

diff --git a/src/amd/common/ac_gpu_info.c b/src/amd/common/ac_gpu_info.c
index 0576dd369cf..c042bb229ce 100644
--- a/src/amd/common/ac_gpu_info.c
+++ b/src/amd/common/ac_gpu_info.c
@@ -277,6 +277,7 @@ bool ac_query_gpu_info(int fd, amdgpu_device_handle dev,
vce.available_rings ? vce_version : 0;
info->has_userptr = true;
info->has_syncobj = has_syncobj(fd);
+ info->has_syncobj_wait = info->has_syncobj && info->drm_minor >= 20;
info->has_sync_file = info->has_syncobj && info->drm_minor >= 21;
info->has_ctx_priority = info->drm_minor >= 22;
info->num_render_backends = amdinfo->rb_pipes;
diff --git a/src/amd/common/ac_gpu_info.h b/src/amd/common/ac_gpu_info.h
index 5b9e51658b0..04e17f91c59 100644
--- a/src/amd/common/ac_gpu_info.h
+++ b/src/amd/common/ac_gpu_info.h
@@ -81,6 +81,7 @@ struct radeon_info {
uint32_t drm_patchlevel;
bool has_userptr;
bool has_syncobj;
+ bool has_syncobj_wait;
bool has_sync_file;
bool has_ctx_priority;
--
2.15.1
Marek Olšák
2017-12-26 17:29:56 UTC
Reply
Permalink
Raw Message
Does this mean that radeonsi shouldn't use amdgpu_cs_syncobj_wait on older DRM?

Does it make sense to have separate has_syncobj and has_syncobj_wait flags?

Marek

On Sun, Dec 17, 2017 at 1:11 AM, Bas Nieuwenhuizen
Post by Bas Nieuwenhuizen
First amdgpu bump after inclusion was 20 (which was done for local BOs).
---
src/amd/common/ac_gpu_info.c | 1 +
src/amd/common/ac_gpu_info.h | 1 +
2 files changed, 2 insertions(+)
diff --git a/src/amd/common/ac_gpu_info.c b/src/amd/common/ac_gpu_info.c
index 0576dd369cf..c042bb229ce 100644
--- a/src/amd/common/ac_gpu_info.c
+++ b/src/amd/common/ac_gpu_info.c
@@ -277,6 +277,7 @@ bool ac_query_gpu_info(int fd, amdgpu_device_handle dev,
vce.available_rings ? vce_version : 0;
info->has_userptr = true;
info->has_syncobj = has_syncobj(fd);
+ info->has_syncobj_wait = info->has_syncobj && info->drm_minor >= 20;
info->has_sync_file = info->has_syncobj && info->drm_minor >= 21;
info->has_ctx_priority = info->drm_minor >= 22;
info->num_render_backends = amdinfo->rb_pipes;
diff --git a/src/amd/common/ac_gpu_info.h b/src/amd/common/ac_gpu_info.h
index 5b9e51658b0..04e17f91c59 100644
--- a/src/amd/common/ac_gpu_info.h
+++ b/src/amd/common/ac_gpu_info.h
@@ -81,6 +81,7 @@ struct radeon_info {
uint32_t drm_patchlevel;
bool has_userptr;
bool has_syncobj;
+ bool has_syncobj_wait;
bool has_sync_file;
bool has_ctx_priority;
--
2.15.1
_______________________________________________
mesa-dev mailing list
https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Bas Nieuwenhuizen
2017-12-27 00:18:56 UTC
Reply
Permalink
Raw Message
For vulkan, I wanted this because of

drm/syncobj: Allow wait for submit and signal behavior (v5)

Vulkan VkFence semantics require that the application be able to perform
a CPU wait on work which may not yet have been submitted. This is
perfectly safe because the CPU wait has a timeout which will get
triggered eventually if no work is ever submitted. This behavior is
advantageous for multi-threaded workloads because, so long as all of the
threads agree on what fences to use up-front, you don't have the extra
cross-thread synchronization cost of thread A telling thread B that it
has submitted its dependent work and thread B is now free to wait.

Within a single process, this can be implemented in the userspace driver
by doing exactly the same kind of tracking the app would have to do
using posix condition variables or similar. However, in order for this
to work cross-process (as is required by VK_KHR_external_fence), we need
to handle this in the kernel.

This commit adds a WAIT_FOR_SUBMIT flag to DRM_IOCTL_SYNCOBJ_WAIT which
instructs the IOCTL to wait for the syncobj to have a non-null fence and
then wait on the fence. Combined with DRM_IOCTL_SYNCOBJ_RESET, you can
easily get the Vulkan behavior.


I suppose you could use an earlier DRM version if you don't need it.
IMO we should keep them separate, as on radv semaphores don't need any
wait functionality at all.
Post by Marek Olšák
Does this mean that radeonsi shouldn't use amdgpu_cs_syncobj_wait on older DRM?
Does it make sense to have separate has_syncobj and has_syncobj_wait flags?
Marek
On Sun, Dec 17, 2017 at 1:11 AM, Bas Nieuwenhuizen
Post by Bas Nieuwenhuizen
First amdgpu bump after inclusion was 20 (which was done for local BOs).
---
src/amd/common/ac_gpu_info.c | 1 +
src/amd/common/ac_gpu_info.h | 1 +
2 files changed, 2 insertions(+)
diff --git a/src/amd/common/ac_gpu_info.c b/src/amd/common/ac_gpu_info.c
index 0576dd369cf..c042bb229ce 100644
--- a/src/amd/common/ac_gpu_info.c
+++ b/src/amd/common/ac_gpu_info.c
@@ -277,6 +277,7 @@ bool ac_query_gpu_info(int fd, amdgpu_device_handle dev,
vce.available_rings ? vce_version : 0;
info->has_userptr = true;
info->has_syncobj = has_syncobj(fd);
+ info->has_syncobj_wait = info->has_syncobj && info->drm_minor >= 20;
info->has_sync_file = info->has_syncobj && info->drm_minor >= 21;
info->has_ctx_priority = info->drm_minor >= 22;
info->num_render_backends = amdinfo->rb_pipes;
diff --git a/src/amd/common/ac_gpu_info.h b/src/amd/common/ac_gpu_info.h
index 5b9e51658b0..04e17f91c59 100644
--- a/src/amd/common/ac_gpu_info.h
+++ b/src/amd/common/ac_gpu_info.h
@@ -81,6 +81,7 @@ struct radeon_info {
uint32_t drm_patchlevel;
bool has_userptr;
bool has_syncobj;
+ bool has_syncobj_wait;
bool has_sync_file;
bool has_ctx_priority;
--
2.15.1
_______________________________________________
mesa-dev mailing list
https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Marek Olšák
2017-12-28 14:57:32 UTC
Reply
Permalink
Raw Message
OK. I was confused because the name has_syncobj_wait suggests that
it's about amdgpu_cs_syncobj_wait, not WAIT_FOR_SUBMIT.

Marek

On Wed, Dec 27, 2017 at 1:18 AM, Bas Nieuwenhuizen
Post by Bas Nieuwenhuizen
For vulkan, I wanted this because of
drm/syncobj: Allow wait for submit and signal behavior (v5)
Vulkan VkFence semantics require that the application be able to perform
a CPU wait on work which may not yet have been submitted. This is
perfectly safe because the CPU wait has a timeout which will get
triggered eventually if no work is ever submitted. This behavior is
advantageous for multi-threaded workloads because, so long as all of the
threads agree on what fences to use up-front, you don't have the extra
cross-thread synchronization cost of thread A telling thread B that it
has submitted its dependent work and thread B is now free to wait.
Within a single process, this can be implemented in the userspace driver
by doing exactly the same kind of tracking the app would have to do
using posix condition variables or similar. However, in order for this
to work cross-process (as is required by VK_KHR_external_fence), we need
to handle this in the kernel.
This commit adds a WAIT_FOR_SUBMIT flag to DRM_IOCTL_SYNCOBJ_WAIT which
instructs the IOCTL to wait for the syncobj to have a non-null fence and
then wait on the fence. Combined with DRM_IOCTL_SYNCOBJ_RESET, you can
easily get the Vulkan behavior.
I suppose you could use an earlier DRM version if you don't need it.
IMO we should keep them separate, as on radv semaphores don't need any
wait functionality at all.
Post by Marek Olšák
Does this mean that radeonsi shouldn't use amdgpu_cs_syncobj_wait on older DRM?
Does it make sense to have separate has_syncobj and has_syncobj_wait flags?
Marek
On Sun, Dec 17, 2017 at 1:11 AM, Bas Nieuwenhuizen
Post by Bas Nieuwenhuizen
First amdgpu bump after inclusion was 20 (which was done for local BOs).
---
src/amd/common/ac_gpu_info.c | 1 +
src/amd/common/ac_gpu_info.h | 1 +
2 files changed, 2 insertions(+)
diff --git a/src/amd/common/ac_gpu_info.c b/src/amd/common/ac_gpu_info.c
index 0576dd369cf..c042bb229ce 100644
--- a/src/amd/common/ac_gpu_info.c
+++ b/src/amd/common/ac_gpu_info.c
@@ -277,6 +277,7 @@ bool ac_query_gpu_info(int fd, amdgpu_device_handle dev,
vce.available_rings ? vce_version : 0;
info->has_userptr = true;
info->has_syncobj = has_syncobj(fd);
+ info->has_syncobj_wait = info->has_syncobj && info->drm_minor >= 20;
info->has_sync_file = info->has_syncobj && info->drm_minor >= 21;
info->has_ctx_priority = info->drm_minor >= 22;
info->num_render_backends = amdinfo->rb_pipes;
diff --git a/src/amd/common/ac_gpu_info.h b/src/amd/common/ac_gpu_info.h
index 5b9e51658b0..04e17f91c59 100644
--- a/src/amd/common/ac_gpu_info.h
+++ b/src/amd/common/ac_gpu_info.h
@@ -81,6 +81,7 @@ struct radeon_info {
uint32_t drm_patchlevel;
bool has_userptr;
bool has_syncobj;
+ bool has_syncobj_wait;
bool has_sync_file;
bool has_ctx_priority;
--
2.15.1
_______________________________________________
mesa-dev mailing list
https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Bas Nieuwenhuizen
2017-12-17 00:11:55 UTC
Reply
Permalink
Raw Message
---
src/amd/vulkan/radv_device.c | 20 ++++++++++++++++++++
src/amd/vulkan/radv_extensions.py | 2 ++
2 files changed, 22 insertions(+)

diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
index 94562fda875..a4ec912ff2c 100644
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -3793,3 +3793,23 @@ VkResult radv_GetFenceFdKHR(VkDevice _device,
return vk_error(VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR);
return VK_SUCCESS;
}
+
+void radv_GetPhysicalDeviceExternalFencePropertiesKHR(
+ VkPhysicalDevice physicalDevice,
+ const VkPhysicalDeviceExternalFenceInfoKHR* pExternalFenceInfo,
+ VkExternalFencePropertiesKHR* pExternalFenceProperties)
+{
+ RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
+
+ if (pdevice->rad_info.has_syncobj_wait &&
+ pExternalFenceInfo->handleType == VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR) {
+ pExternalFenceProperties->exportFromImportedHandleTypes = VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR;
+ pExternalFenceProperties->compatibleHandleTypes = VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR;
+ pExternalFenceProperties->externalFenceFeatures = VK_EXTERNAL_FENCE_FEATURE_EXPORTABLE_BIT_KHR |
+ VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT_KHR;
+ } else {
+ pExternalFenceProperties->exportFromImportedHandleTypes = 0;
+ pExternalFenceProperties->compatibleHandleTypes = 0;
+ pExternalFenceProperties->externalFenceFeatures = 0;
+ }
+}
diff --git a/src/amd/vulkan/radv_extensions.py b/src/amd/vulkan/radv_extensions.py
index 3188735ea78..9af941fab35 100644
--- a/src/amd/vulkan/radv_extensions.py
+++ b/src/amd/vulkan/radv_extensions.py
@@ -53,6 +53,8 @@ EXTENSIONS = [
Extension('VK_KHR_bind_memory2', 1, True),
Extension('VK_KHR_dedicated_allocation', 1, True),
Extension('VK_KHR_descriptor_update_template', 1, True),
+ Extension('VK_KHR_external_fence', 1, 'device->rad_info.has_syncobj_wait'),
+ Extension('VK_KHR_external_fence_capabilities', 1, True),
Extension('VK_KHR_external_fence_fd', 1, 'device->rad_info.has_syncobj_wait'),
Extension('VK_KHR_external_memory', 1, True),
Extension('VK_KHR_external_memory_capabilities', 1, True),
--
2.15.1
Bas Nieuwenhuizen
2017-12-17 00:11:53 UTC
Reply
Permalink
Raw Message
---
src/amd/vulkan/radv_device.c | 113 ++++++++++++++++++++++++++++++++++++------
src/amd/vulkan/radv_private.h | 6 ++-
src/amd/vulkan/radv_wsi.c | 5 ++
3 files changed, 109 insertions(+), 15 deletions(-)

diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
index 7c0971d190d..fc9fb59f991 100644
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -1850,12 +1850,14 @@ fail:
static VkResult radv_alloc_sem_counts(struct radv_winsys_sem_counts *counts,
int num_sems,
const VkSemaphore *sems,
+ VkFence _fence,
bool reset_temp)
{
int syncobj_idx = 0, sem_idx = 0;

- if (num_sems == 0)
+ if (num_sems == 0 && _fence == VK_NULL_HANDLE)
return VK_SUCCESS;
+
for (uint32_t i = 0; i < num_sems; i++) {
RADV_FROM_HANDLE(radv_semaphore, sem, sems[i]);

@@ -1865,6 +1867,12 @@ static VkResult radv_alloc_sem_counts(struct radv_winsys_sem_counts *counts,
counts->sem_count++;
}

+ if (_fence != VK_NULL_HANDLE) {
+ RADV_FROM_HANDLE(radv_fence, fence, _fence);
+ if (fence->temp_syncobj || fence->syncobj)
+ counts->syncobj_count++;
+ }
+
if (counts->syncobj_count) {
counts->syncobj = (uint32_t *)malloc(sizeof(uint32_t) * counts->syncobj_count);
if (!counts->syncobj)
@@ -1893,6 +1901,14 @@ static VkResult radv_alloc_sem_counts(struct radv_winsys_sem_counts *counts,
}
}

+ if (_fence != VK_NULL_HANDLE) {
+ RADV_FROM_HANDLE(radv_fence, fence, _fence);
+ if (fence->temp_syncobj)
+ counts->syncobj[syncobj_idx++] = fence->temp_syncobj;
+ else if (fence->syncobj)
+ counts->syncobj[syncobj_idx++] = fence->syncobj;
+ }
+
return VK_SUCCESS;
}

@@ -1923,15 +1939,16 @@ VkResult radv_alloc_sem_info(struct radv_winsys_sem_info *sem_info,
int num_wait_sems,
const VkSemaphore *wait_sems,
int num_signal_sems,
- const VkSemaphore *signal_sems)
+ const VkSemaphore *signal_sems,
+ VkFence fence)
{
VkResult ret;
memset(sem_info, 0, sizeof(*sem_info));

- ret = radv_alloc_sem_counts(&sem_info->wait, num_wait_sems, wait_sems, true);
+ ret = radv_alloc_sem_counts(&sem_info->wait, num_wait_sems, wait_sems, VK_NULL_HANDLE, true);
if (ret)
return ret;
- ret = radv_alloc_sem_counts(&sem_info->signal, num_signal_sems, signal_sems, false);
+ ret = radv_alloc_sem_counts(&sem_info->signal, num_signal_sems, signal_sems, fence, false);
if (ret)
radv_free_sem_info(sem_info);

@@ -1997,7 +2014,8 @@ VkResult radv_QueueSubmit(
pSubmits[i].waitSemaphoreCount,
pSubmits[i].pWaitSemaphores,
pSubmits[i].signalSemaphoreCount,
- pSubmits[i].pSignalSemaphores);
+ pSubmits[i].pSignalSemaphores,
+ _fence);
if (result != VK_SUCCESS)
return result;

@@ -2068,11 +2086,18 @@ VkResult radv_QueueSubmit(

if (fence) {
if (!fence_emitted) {
- struct radv_winsys_sem_info sem_info = {0};
+ struct radv_winsys_sem_info sem_info;
+
+ result = radv_alloc_sem_info(&sem_info, 0, NULL, 0, NULL,
+ _fence);
+ if (result != VK_SUCCESS)
+ return result;
+
ret = queue->device->ws->cs_submit(ctx, queue->queue_idx,
&queue->device->empty_cs[queue->queue_family_index],
1, NULL, NULL, &sem_info,
false, base_fence);
+ radv_free_sem_info(&sem_info);
}
fence->submitted = true;
}
@@ -2573,7 +2598,8 @@ radv_sparse_image_opaque_bind_memory(struct radv_device *device,
pBindInfo[i].waitSemaphoreCount,
pBindInfo[i].pWaitSemaphores,
pBindInfo[i].signalSemaphoreCount,
- pBindInfo[i].pSignalSemaphores);
+ pBindInfo[i].pSignalSemaphores,
+ _fence);
if (result != VK_SUCCESS)
return result;

@@ -2606,6 +2632,11 @@ VkResult radv_CreateFence(
VkFence* pFence)
{
RADV_FROM_HANDLE(radv_device, device, _device);
+ const VkExportFenceCreateInfoKHR *export =
+ vk_find_struct_const(pCreateInfo->pNext, EXPORT_FENCE_CREATE_INFO_KHR);
+ VkExternalFenceHandleTypeFlagsKHR handleTypes =
+ export ? export->handleTypes : 0;
+
struct radv_fence *fence = vk_alloc2(&device->alloc, pAllocator,
sizeof(*fence), 8,
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
@@ -2615,10 +2646,24 @@ VkResult radv_CreateFence(

fence->submitted = false;
fence->signalled = !!(pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT);
- fence->fence = device->ws->create_fence();
- if (!fence->fence) {
- vk_free2(&device->alloc, pAllocator, fence);
- return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+ fence->temp_syncobj = 0;
+ if (handleTypes) {
+ int ret = device->ws->create_syncobj(device->ws, &fence->syncobj);
+ if (ret) {
+ vk_free2(&device->alloc, pAllocator, fence);
+ return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+ }
+ if (pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT) {
+ device->ws->signal_syncobj(device->ws, fence->syncobj);
+ }
+ fence->fence = NULL;
+ } else {
+ fence->fence = device->ws->create_fence();
+ if (!fence->fence) {
+ vk_free2(&device->alloc, pAllocator, fence);
+ return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+ }
+ fence->syncobj = 0;
}

*pFence = radv_fence_to_handle(fence);
@@ -2636,7 +2681,13 @@ void radv_DestroyFence(

if (!fence)
return;
- device->ws->destroy_fence(fence->fence);
+
+ if (fence->temp_syncobj)
+ device->ws->destroy_syncobj(device->ws, fence->temp_syncobj);
+ if (fence->syncobj)
+ device->ws->destroy_syncobj(device->ws, fence->syncobj);
+ if (fence->fence)
+ device->ws->destroy_fence(fence->fence);
vk_free2(&device->alloc, pAllocator, fence);
}

@@ -2671,6 +2722,18 @@ VkResult radv_WaitForFences(
RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
bool expired = false;

+ if (fence->temp_syncobj) {
+ if (!device->ws->wait_syncobj(device->ws, fence->temp_syncobj, timeout))
+ return VK_TIMEOUT;
+ continue;
+ }
+
+ if (fence->syncobj) {
+ if (!device->ws->wait_syncobj(device->ws, fence->syncobj, timeout))
+ return VK_TIMEOUT;
+ continue;
+ }
+
if (fence->signalled)
continue;

@@ -2687,13 +2750,26 @@ VkResult radv_WaitForFences(
return VK_SUCCESS;
}

-VkResult radv_ResetFences(VkDevice device,
+VkResult radv_ResetFences(VkDevice _device,
uint32_t fenceCount,
const VkFence *pFences)
{
+ RADV_FROM_HANDLE(radv_device, device, _device);
+
for (unsigned i = 0; i < fenceCount; ++i) {
RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
fence->submitted = fence->signalled = false;
+
+ /* Per spec, we first restore the permanent payload, and then reset, so
+ * having a temp syncobj should not skip resetting the permanent syncobj. */
+ if (fence->temp_syncobj) {
+ device->ws->destroy_syncobj(device->ws, fence->temp_syncobj);
+ fence->temp_syncobj = 0;
+ }
+
+ if (fence->syncobj) {
+ device->ws->reset_syncobj(device->ws, fence->syncobj);
+ }
}

return VK_SUCCESS;
@@ -2704,11 +2780,20 @@ VkResult radv_GetFenceStatus(VkDevice _device, VkFence _fence)
RADV_FROM_HANDLE(radv_device, device, _device);
RADV_FROM_HANDLE(radv_fence, fence, _fence);

+ if (fence->temp_syncobj) {
+ bool success = device->ws->wait_syncobj(device->ws, fence->temp_syncobj, 0);
+ return success ? VK_SUCCESS : VK_NOT_READY;
+ }
+
+ if (fence->syncobj) {
+ bool success = device->ws->wait_syncobj(device->ws, fence->syncobj, 0);
+ return success ? VK_SUCCESS : VK_NOT_READY;
+ }
+
if (fence->signalled)
return VK_SUCCESS;
if (!fence->submitted)
return VK_NOT_READY;
-
if (!device->ws->fence_wait(device->ws, fence->fence, false, 0))
return VK_NOT_READY;

diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
index 2e1362c446d..1b231801583 100644
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -1558,7 +1558,8 @@ VkResult radv_alloc_sem_info(struct radv_winsys_sem_info *sem_info,
int num_wait_sems,
const VkSemaphore *wait_sems,
int num_signal_sems,
- const VkSemaphore *signal_sems);
+ const VkSemaphore *signal_sems,
+ VkFence fence);
void radv_free_sem_info(struct radv_winsys_sem_info *sem_info);

void radv_set_descriptor_set(struct radv_cmd_buffer *cmd_buffer,
@@ -1597,6 +1598,9 @@ struct radv_fence {
struct radeon_winsys_fence *fence;
bool submitted;
bool signalled;
+
+ uint32_t syncobj;
+ uint32_t temp_syncobj;
};

struct radeon_winsys_sem;
diff --git a/src/amd/vulkan/radv_wsi.c b/src/amd/vulkan/radv_wsi.c
index 51e8ec666f7..e016e837102 100644
--- a/src/amd/vulkan/radv_wsi.c
+++ b/src/amd/vulkan/radv_wsi.c
@@ -214,6 +214,11 @@ VkResult radv_AcquireNextImageKHR(
if (fence && (result == VK_SUCCESS || result == VK_SUBOPTIMAL_KHR)) {
fence->submitted = true;
fence->signalled = true;
+ if (fence->temp_syncobj) {
+ device->ws->signal_syncobj(device->ws, fence->temp_syncobj);
+ } else if (fence->syncobj) {
+ device->ws->signal_syncobj(device->ws, fence->syncobj);
+ }
}
return result;
}
--
2.15.1
Bas Nieuwenhuizen
2017-12-17 00:11:54 UTC
Reply
Permalink
Raw Message
---
src/amd/vulkan/radv_device.c | 47 +++++++++++++++++++++++++++++++++++++++
src/amd/vulkan/radv_extensions.py | 1 +
2 files changed, 48 insertions(+)

diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
index fc9fb59f991..94562fda875 100644
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -3746,3 +3746,50 @@ void radv_GetPhysicalDeviceExternalSemaphorePropertiesKHR(
pExternalSemaphoreProperties->externalSemaphoreFeatures = 0;
}
}
+
+VkResult radv_ImportFenceFdKHR(VkDevice _device,
+ const VkImportFenceFdInfoKHR *pImportFenceFdInfo)
+{
+ RADV_FROM_HANDLE(radv_device, device, _device);
+ RADV_FROM_HANDLE(radv_fence, fence, pImportFenceFdInfo->fence);
+ uint32_t syncobj_handle = 0;
+ uint32_t *syncobj_dst = NULL;
+ assert(pImportFenceFdInfo->handleType == VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR);
+
+ int ret = device->ws->import_syncobj(device->ws, pImportFenceFdInfo->fd, &syncobj_handle);
+ if (ret != 0)
+ return vk_error(VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR);
+
+ if (pImportFenceFdInfo->flags & VK_FENCE_IMPORT_TEMPORARY_BIT_KHR) {
+ syncobj_dst = &fence->temp_syncobj;
+ } else {
+ syncobj_dst = &fence->syncobj;
+ }
+
+ if (*syncobj_dst)
+ device->ws->destroy_syncobj(device->ws, *syncobj_dst);
+
+ *syncobj_dst = syncobj_handle;
+ close(pImportFenceFdInfo->fd);
+ return VK_SUCCESS;
+}
+
+VkResult radv_GetFenceFdKHR(VkDevice _device,
+ const VkFenceGetFdInfoKHR *pGetFdInfo,
+ int *pFd)
+{
+ RADV_FROM_HANDLE(radv_device, device, _device);
+ RADV_FROM_HANDLE(radv_fence, fence, pGetFdInfo->fence);
+ int ret;
+ uint32_t syncobj_handle;
+
+ assert(pGetFdInfo->handleType == VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR);
+ if (fence->temp_syncobj)
+ syncobj_handle = fence->temp_syncobj;
+ else
+ syncobj_handle = fence->syncobj;
+ ret = device->ws->export_syncobj(device->ws, syncobj_handle, pFd);
+ if (ret)
+ return vk_error(VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR);
+ return VK_SUCCESS;
+}
diff --git a/src/amd/vulkan/radv_extensions.py b/src/amd/vulkan/radv_extensions.py
index 2c1c71ecdc7..3188735ea78 100644
--- a/src/amd/vulkan/radv_extensions.py
+++ b/src/amd/vulkan/radv_extensions.py
@@ -53,6 +53,7 @@ EXTENSIONS = [
Extension('VK_KHR_bind_memory2', 1, True),
Extension('VK_KHR_dedicated_allocation', 1, True),
Extension('VK_KHR_descriptor_update_template', 1, True),
+ Extension('VK_KHR_external_fence_fd', 1, 'device->rad_info.has_syncobj_wait'),
Extension('VK_KHR_external_memory', 1, True),
Extension('VK_KHR_external_memory_capabilities', 1, True),
Extension('VK_KHR_external_memory_fd', 1, True),
--
2.15.1
Dave Airlie
2017-12-18 02:30:13 UTC
Reply
Permalink
Raw Message
For the radv dependencies on syncobj signal/reset.
I've released 2.4.89 with those patches,

So, for the series,
Reviewed-by: Dave Airlie <***@redhat.com>

Dave.
Loading...